├── CREDITS ├── tests ├── 005.phpt ├── testdata │ ├── multimedia-demo.txt.gz │ ├── uue.exp │ ├── rfc2231.txt │ ├── rfc2231.exp │ ├── phpcvs1.exp │ ├── uue.txt │ ├── oeuue │ ├── qp.exp │ ├── mime.exp │ ├── qp.txt │ ├── mime.txt │ ├── m0001.txt │ ├── phpcvs1.txt │ └── multimedia-demo.exp ├── 001.phpt ├── 013.phpt ├── 008.phpt ├── 012.phpt ├── bug81422.phpt ├── 012-var.phpt ├── 012-stream.phpt ├── bug81403.phpt ├── 002.phpt ├── 003.phpt ├── gh29.phpt ├── 011.phpt ├── bug001.phpt ├── bug74223.phpt ├── gh24.phpt ├── gh30.phpt ├── 004.phpt ├── bug73110.phpt ├── gh19.phpt ├── 009.phpt ├── 007.phpt ├── 010.phpt ├── gh21.phpt ├── gh22.phpt ├── bug75825.phpt ├── 006.phpt └── parse_test_messages.phpt ├── Makefile.frag ├── composer.json ├── .travis.yml ├── config.w32 ├── config.m4 ├── .gitignore ├── try.php ├── .github └── workflows │ └── ci.yml ├── php_mailparse_rfc822.h ├── LICENSE ├── php_mailparse.h ├── arginfo.h ├── php_mailparse_mime.h ├── README.md ├── package.xml ├── php_mailparse_rfc822.re ├── php_mailparse_rfc822.c └── php_mailparse_mime.c /CREDITS: -------------------------------------------------------------------------------- 1 | Mailparse MIME parsing and manipulation functions 2 | Wez Furlong 3 | -------------------------------------------------------------------------------- /tests/005.phpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/php/pecl-mail-mailparse/master/tests/005.phpt -------------------------------------------------------------------------------- /tests/testdata/multimedia-demo.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/php/pecl-mail-mailparse/master/tests/testdata/multimedia-demo.txt.gz -------------------------------------------------------------------------------- /Makefile.frag: -------------------------------------------------------------------------------- 1 | $(top_srcdir)/ext/mailparse/php_mailparse_rfc822.c: $(top_srcdir)/ext/mailparse/php_mailparse_rfc822.re 2 | re2c -b $(top_srcdir)/ext/mailparse/php_mailparse_rfc822.re > $@ 3 | 4 | $(srcdir)/php_mailparse_rfc822.c: $(srcdir)/php_mailparse_rfc822.re 5 | re2c -b $(srcdir)/php_mailparse_rfc822.re > $@ 6 | -------------------------------------------------------------------------------- /tests/testdata/uue.exp: -------------------------------------------------------------------------------- 1 | Message: uue 2 | 3 | Part 1 4 | body-line-count => int(10) 5 | charset => string(8) "us-ascii" 6 | content-base => string(1) "/" 7 | content-type => string(10) "text/plain" 8 | ending-pos => int(945) 9 | line-count => int(27) 10 | starting-pos => int(0) 11 | starting-pos-body => int(743) 12 | transfer-encoding => string(4) "8bit" 13 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pecl/mailparse", 3 | "type": "php-ext", 4 | "license": "PHP-3.01", 5 | "description": "Email message manipulation", 6 | "require": { 7 | "php": ">= 7.3.0", 8 | "ext-mbstring": "*" 9 | }, 10 | "php-ext": { 11 | "extension-name": "mailparse", 12 | "configure-options": [] 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tests/001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check for mailparse presence 3 | --SKIPIF-- 4 | 9 | --POST-- 10 | --GET-- 11 | --FILE-- 12 | 15 | --EXPECT-- 16 | mailparse extension is available 17 | -------------------------------------------------------------------------------- /tests/013.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check mailparse_mimemessage_extract_uue 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | 14 | --EXPECTF-- 15 | resource(%d) of type (mailparse_mail_structure) 16 | -------------------------------------------------------------------------------- /tests/008.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | OO API Segfault when opening a file is not possible 3 | --SKIPIF-- 4 | 8 | --POST-- 9 | --GET-- 10 | --FILE-- 11 | 19 | --EXPECT-- 20 | OK 21 | -------------------------------------------------------------------------------- /tests/012.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check mailparse_mimemessage_extract_uue (file mode) 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | extract_uue(0, MAILPARSE_EXTRACT_RETURN)); 12 | ?> 13 | --EXPECT-- 14 | string(88) "FooBar - Baaaaa 15 | 16 | Requirements: 17 | o php with mailparse 18 | o virus scanner (optional) 19 | 20 | 21 | " 22 | -------------------------------------------------------------------------------- /tests/bug81422.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Bug #81422 (Potential double-free in mailparse_uudecode_all()) 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | 25 | --EXPECT-- 26 | int(3) 27 | -------------------------------------------------------------------------------- /tests/012-var.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check mailparse_mimemessage_extract_uue (var mode) 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | extract_uue(0, MAILPARSE_EXTRACT_RETURN)); 13 | ?> 14 | --EXPECT-- 15 | string(88) "FooBar - Baaaaa 16 | 17 | Requirements: 18 | o php with mailparse 19 | o virus scanner (optional) 20 | 21 | 22 | " 23 | -------------------------------------------------------------------------------- /tests/testdata/rfc2231.txt: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | From: "Wez Furlong" 3 | To: 4 | Subject: mime attach 5 | Date: Sat, 7 Sep 2002 12:41:14 +0100 6 | Message-ID: <000601c25663$78b7fcf0$0702a8c0@TITAN> 7 | MIME-Version: 1.0 8 | Content-Type: message/external-body; access-type=URL; 9 | URL*0="http://pecl.php.net/"; 10 | URL*1="package"; 11 | URL*2="/mailparse"; 12 | title*0*=us-ascii'en'This%20is%20even%20more%20; 13 | title*1*=%2A%2A%2Afun%2A%2A%2A%20; 14 | title*2="isn't it!" 15 | 16 | This is not the Body 17 | 18 | -------------------------------------------------------------------------------- /tests/012-stream.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check mailparse_mimemessage_extract_uue (stream mode) 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | extract_uue(0, MAILPARSE_EXTRACT_RETURN)); 13 | fclose($fp); 14 | ?> 15 | --EXPECT-- 16 | string(88) "FooBar - Baaaaa 17 | 18 | Requirements: 19 | o php with mailparse 20 | o virus scanner (optional) 21 | 22 | 23 | " 24 | -------------------------------------------------------------------------------- /tests/testdata/rfc2231.exp: -------------------------------------------------------------------------------- 1 | Message: rfc2231 2 | 3 | Part 1 4 | body-line-count => int(2) 5 | charset => string(8) "us-ascii" 6 | content-access-type => string(3) "URL" 7 | content-base => string(1) "/" 8 | content-title => string(74) "=?us-ascii?Q?This=20is=20even=20more=20=2A=2A=2Afun=2A=2A=2A=20?=isn't it!" 9 | content-type => string(21) "message/external-body" 10 | content-url => string(37) "http://pecl.php.net/package/mailparse" 11 | ending-pos => int(489) 12 | line-count => int(17) 13 | starting-pos => int(0) 14 | starting-pos-body => int(467) 15 | transfer-encoding => string(4) "8bit" 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | php: 3 | - nightly 4 | sudo: false 5 | env: 6 | - REPORT_EXIT_STATUS=1 NO_INTERACTION=1 7 | install: 8 | - phpize 9 | - EXTRA_LDFLAGS="-precious-files-regex .libs/mailparse.gcno" LDFLAGS="-lgcov" CFLAGS="-Wall -fno-strict-aliasing -coverage -O0" ./configure 10 | - make all 11 | script: 12 | - TEST_PHP_EXECUTABLE=$(which php) php -n 13 | -d open_basedir= -d output_buffering=0 -d memory_limit=-1 14 | run-tests.php -n 15 | -d extension_dir=modules -d extension=mailparse.so --show-diff 16 | tests 17 | - gcov --object-directory .libs *.c 18 | - bash <(curl -s https://codecov.io/bash) 19 | -------------------------------------------------------------------------------- /tests/bug81403.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Bug #81403 (mailparse_rfc822_parse_addresses drops escaped quotes) 3 | --SKIPIF-- 4 | 7 | --XFAIL-- 8 | Fix reverted see GH-29 and GH-30 9 | --FILE-- 10 | '; 12 | var_dump(mailparse_rfc822_parse_addresses($address)); 13 | ?> 14 | --EXPECT-- 15 | array(1) { 16 | [0]=> 17 | array(3) { 18 | ["display"]=> 19 | string(21) "Smith, Robert \"Bob\"" 20 | ["address"]=> 21 | string(15) "user@domain.org" 22 | ["is_group"]=> 23 | bool(false) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /tests/002.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check stream encoding 3 | --SKIPIF-- 4 | 9 | --POST-- 10 | --GET-- 11 | --FILE-- 12 | 26 | --EXPECT-- 27 | hello, this is some text=3Dhello. 28 | -------------------------------------------------------------------------------- /tests/003.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check handling of multiple To headers 3 | --SKIPIF-- 4 | 9 | --POST-- 10 | --GET-- 11 | --FILE-- 12 | 26 | --EXPECT-- 27 | fred@bloggs.com, wez@thebrainroom.com 28 | -------------------------------------------------------------------------------- /config.w32: -------------------------------------------------------------------------------- 1 | // vim:ft=javascript 2 | 3 | ARG_ENABLE("mailparse", "MAILPARSE support", "no"); 4 | 5 | if (PHP_MAILPARSE != "no") { 6 | 7 | if (PHP_MBSTRING == "no") { 8 | WARNING("mailparse requires mbstring"); 9 | } else { 10 | EXTENSION('mailparse', 'mailparse.c php_mailparse_mime.c php_mailparse_rfc822.c'); 11 | AC_DEFINE('HAVE_MAILPARSE', 1); 12 | 13 | 14 | // MFO.WriteLine(configure_module_dirname + "\\php_mailparse_rfc822.c: " + configure_module_dirname + "\\php_mailparse_rfc822.re"); 15 | // MFO.WriteLine("\t$(RE2C) -b " + configure_module_dirname + "\\php_mailparse_rfc822.re > " + configure_module_dirname + "\\php_mailparse_rfc822.c"); 16 | 17 | ADD_EXTENSION_DEP('mailparse', 'mbstring'); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/gh29.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | GH issue #29 Segmentation fault with ISO-2022-JP Subject header 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | e!"$h$m$7$/$*4j$$CW$7$^$9!#(B 20 | EOF; 21 | 22 | $resource = mailparse_msg_create(); 23 | 24 | $r = mailparse_msg_parse($resource, $data); 25 | echo 'ok', PHP_EOL; 26 | 27 | mailparse_msg_free($resource); 28 | 29 | exit(0); 30 | ?> 31 | --EXPECTF-- 32 | ok 33 | -------------------------------------------------------------------------------- /tests/011.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check mailparse_determine_best_xfer_encoding 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | getFileName(), '.txt') !== false) { 14 | $names[] = $file->getRealPath(); 15 | } 16 | } 17 | sort($names); 18 | foreach ($names as $name) { 19 | var_dump(mailparse_determine_best_xfer_encoding(fopen($name, 'r'))); 20 | } 21 | ?> 22 | --EXPECT-- 23 | string(4) "7bit" 24 | string(4) "7bit" 25 | string(6) "BASE64" 26 | string(4) "7bit" 27 | string(4) "7bit" 28 | string(4) "7bit" 29 | string(4) "7bit" 30 | -------------------------------------------------------------------------------- /tests/bug001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | mailparse_msg_free causes double free segfault 3 | --SKIPIF-- 4 | 6 | --FILE-- 7 | 7 | --FILE-- 8 | 34 | --EXPECT-- 35 | -------------------------------------------------------------------------------- /tests/gh24.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | GH issue #24 (Segmentation fault with mailparse_msg_create()) 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | 37 | --EXPECTF-- 38 | ok 39 | -------------------------------------------------------------------------------- /tests/testdata/phpcvs1.exp: -------------------------------------------------------------------------------- 1 | Message: phpcvs1 2 | 3 | Part 1 4 | body-line-count => int(105) 5 | charset => string(8) "us-ascii" 6 | content-base => string(1) "/" 7 | content-boundary => string(37) "=====================_71195359==_.ALT" 8 | content-type => string(21) "multipart/alternative" 9 | ending-pos => int(5284) 10 | line-count => int(141) 11 | starting-pos => int(0) 12 | starting-pos-body => int(1940) 13 | transfer-encoding => string(4) "8bit" 14 | 15 | Part 1.1 16 | body-line-count => int(98) 17 | charset => string(10) "iso-8859-1" 18 | content-base => string(1) "/" 19 | content-charset => string(10) "iso-8859-1" 20 | content-format => string(6) "flowed" 21 | content-type => string(10) "text/plain" 22 | ending-pos => int(5240) 23 | line-count => int(101) 24 | starting-pos => int(1980) 25 | starting-pos-body => int(2087) 26 | transfer-encoding => string(16) "quoted-printable" 27 | -------------------------------------------------------------------------------- /tests/gh30.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | GH issue #30 Segmentation fault with UTF-8 encoded X-MS-Iris-MetaData header 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | 33 | --EXPECTF-- 34 | ok 35 | -------------------------------------------------------------------------------- /config.m4: -------------------------------------------------------------------------------- 1 | PHP_ARG_ENABLE(mailparse, whether to enable mailparse support, 2 | [ --enable-mailparse Enable mailparse support.]) 3 | 4 | if test "$PHP_MAILPARSE" != "no"; then 5 | if test "$ext_shared" != "yes" && test "$enable_mbstring" != "yes"; then 6 | AC_MSG_WARN(Activating mbstring) 7 | enable_mbstring=yes 8 | fi 9 | 10 | AC_MSG_CHECKING(libmbfl headers) 11 | if test -f $abs_srcdir/ext/mbstring/libmbfl/mbfl/mbfilter.h; then 12 | dnl build in php-src tree 13 | AC_MSG_RESULT(found in $abs_srcdir/ext/mbstring) 14 | elif test -f $phpincludedir/ext/mbstring/libmbfl/mbfl/mbfilter.h; then 15 | dnl build alone 16 | AC_MSG_RESULT(found in $phpincludedir/ext/mbstring) 17 | else 18 | AC_MSG_ERROR(mbstring extension with libmbfl is missing) 19 | fi 20 | 21 | PHP_NEW_EXTENSION(mailparse, mailparse.c php_mailparse_mime.c php_mailparse_rfc822.c, $ext_shared) 22 | PHP_ADD_EXTENSION_DEP(mailparse, mbstring, true) 23 | PHP_ADD_MAKEFILE_FRAGMENT 24 | fi 25 | -------------------------------------------------------------------------------- /tests/004.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check uudecode_all 3 | --SKIPIF-- 4 | 9 | --POST-- 10 | --GET-- 11 | --FILE-- 12 | 41 | --EXPECT-- 42 | BODY 43 | To: fred@bloggs.com 44 | 45 | hello, this is some text hello. 46 | blah blah blah. 47 | 48 | UUE 49 | this is a test 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .deps 2 | .libs 3 | .php-version 4 | .rbenv-version 5 | acinclude.m4 6 | aclocal.m4 7 | autom4te.cache 8 | build 9 | config.guess 10 | config.h 11 | config.h.in 12 | config.log 13 | config.nice 14 | config.status 15 | config.sub 16 | configure 17 | configure.in 18 | configure.ac 19 | extras 20 | include 21 | install-sh 22 | libtool 23 | ltmain.sh 24 | ltmain.sh.backup 25 | Makefile 26 | Makefile.fragments 27 | Makefile.global 28 | Makefile.objects 29 | missing 30 | mkinstalldirs 31 | modules 32 | run-tests.php 33 | tmp-php.ini 34 | yaml.loT 35 | 36 | # General Ignores 37 | *~ 38 | .#* 39 | *. 40 | *.slo 41 | *.dep 42 | *.mk 43 | *.mem 44 | *.gcda 45 | *.gcno 46 | *.la 47 | *.lo 48 | *.o 49 | *.a 50 | *.ncb 51 | *.opt 52 | *.plg 53 | *swp 54 | *.patch 55 | *.tgz 56 | *.tar.gz 57 | *.tar.bz2 58 | .FBCIndex 59 | .FBCLockFolder 60 | core 61 | 62 | # Test specific Ignores 63 | tests/*.diff 64 | tests/*.exp 65 | tests/*.log 66 | tests/*.out 67 | tests/*.php 68 | tests/*.sh 69 | 70 | # coverage 71 | /coverage.info 72 | /reports 73 | -------------------------------------------------------------------------------- /tests/bug73110.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Bug #73110 (Mails with unknown MIME version are treated as plain/text) 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | 31 | --EXPECT-- 32 | string(21) "multipart/alternative" 33 | -------------------------------------------------------------------------------- /tests/testdata/uue.txt: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Received: from TITAN (titan.brainnet.i [192.168.2.7]) 3 | by zaneeb.brainnet.i (8.10.2/8.10.2/SuSE Linux 8.10.0-0.3) with ESMTP id g87Be5721229 4 | for ; Sat, 7 Sep 2002 12:40:05 +0100 5 | X-Authentication-Warning: zaneeb.brainnet.i: Host titan.brainnet.i [192.168.2.7] claimed to be TITAN 6 | From: "Wez Furlong" 7 | To: 8 | Subject: UUEncoded attachments 9 | Date: Sat, 7 Sep 2002 12:39:55 +0100 10 | Message-ID: <000001c25663$4cd5b460$0702a8c0@TITAN> 11 | X-Priority: 3 (Normal) 12 | X-MSMail-Priority: Normal 13 | X-Mailer: Microsoft Outlook, Build 10.0.2627 14 | Importance: Normal 15 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2600.0000 16 | X-TBR-DestBox: user.wez (auth as wez) (wez:) 17 | 18 | Hello, this is a message with UUE attachments. 19 | 20 | 21 | begin 644 README.dat 22 | M1F]O0F%R("T@0F%A86%A"@I297%U:7)E;65N=',Z(`H);R!P:'`@=VET:"!M 23 | K86EL<&%R 7 | --FILE-- 8 | 20 | 21 | 23 | 24 | 25 | 26 | TXT; 27 | 28 | $msg = new \MimeMessage("var", $original); 29 | $contents = $msg->extract_body(\MAILPARSE_EXTRACT_RETURN); 30 | 31 | var_dump($contents); 32 | 33 | exit(0); 34 | ?> 35 | --EXPECTF-- 36 | string(%d) " 8 | --FILE-- 9 | get_child_count(); 13 | $uue = $msg->enum_uue(); 14 | var_dump($n); 15 | var_dump($uue); 16 | ?> 17 | --EXPECT-- 18 | int(0) 19 | array(3) { 20 | [0]=> 21 | array(4) { 22 | ["filename"]=> 23 | string(11) "README1.dat" 24 | ["start-pos"]=> 25 | int(654) 26 | ["filesize"]=> 27 | int(88) 28 | ["end-pos"]=> 29 | int(785) 30 | } 31 | [1]=> 32 | array(4) { 33 | ["filename"]=> 34 | string(11) "README2.dat" 35 | ["start-pos"]=> 36 | int(808) 37 | ["filesize"]=> 38 | int(88) 39 | ["end-pos"]=> 40 | int(939) 41 | } 42 | [2]=> 43 | array(4) { 44 | ["filename"]=> 45 | string(11) "README3.dat" 46 | ["start-pos"]=> 47 | int(962) 48 | ["filesize"]=> 49 | int(88) 50 | ["end-pos"]=> 51 | int(1093) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/testdata/oeuue: -------------------------------------------------------------------------------- 1 | Path: test.ru!not-for-mail 2 | From: "mig" 3 | Newsgroups: support 4 | Subject: attch & OE 5 | Date: Wed, 23 Apr 2003 17:15:48 +0500 6 | Organization: test.ru | multi-purpose news-server 7 | X-FUBAR-Lines: 22 8 | Message-ID: 9 | NNTP-Posting-Host: mik 10 | X-Trace: onlinemedia.ru 1051100148 14468 192.168.111.2 (23 Apr 2003 12:15:48 GMT) 11 | X-Complaints-To: news@test.ru 12 | NNTP-Posting-Date: Wed, 23 Apr 2003 12:15:48 +0000 (UTC) 13 | X-Priority: 3 14 | X-MSMail-Priority: Normal 15 | X-Newsreader: Microsoft Outlook Express 6.00.2800.1106 16 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2800.1106 17 | Xref: test.ru support:65 18 | 19 | Hello! 20 | Subj. 21 | 22 | begin 644 README1.dat 23 | M1F]O0F%R("T@0F%A86%A"@I297%U:7)E;65N=',Z(`H);R!P:'`@=VET:"!M 24 | K86EL<&%R 9 | --POST-- 10 | --GET-- 11 | --FILE-- 12 | 35 | --EXPECT-- 36 | :sysmail@Some-Group.Some-Org 37 | ":sysmail"@Some-Group.Some-Org 38 | I am the greatest the 39 | Muhammed.Ali@Vegas.WBA 40 | ... 41 | strange 42 | :":sysmail"@Some-Group.Some-Org,Muhammed.Ali@Vegas.WBA; 43 | ":sysmail"@Some-Group.Some-Org 44 | Muhammed.Ali@Vegas.WBA 45 | ... 46 | 47 | -------------------------------------------------------------------------------- /tests/testdata/qp.exp: -------------------------------------------------------------------------------- 1 | Message: qp 2 | 3 | Part 1 4 | body-line-count => int(27) 5 | charset => string(8) "us-ascii" 6 | content-base => string(1) "/" 7 | content-boundary => string(41) "----=_NextPart_000_0003_01C2566B.C7AC6A50" 8 | content-type => string(15) "multipart/mixed" 9 | ending-pos => int(1438) 10 | line-count => int(47) 11 | starting-pos => int(0) 12 | starting-pos-body => int(839) 13 | transfer-encoding => string(4) "8bit" 14 | 15 | Part 1.1 16 | body-line-count => int(1) 17 | charset => string(8) "us-ascii" 18 | content-base => string(1) "/" 19 | content-charset => string(8) "us-ascii" 20 | content-type => string(10) "text/plain" 21 | ending-pos => int(1048) 22 | line-count => int(5) 23 | starting-pos => int(929) 24 | starting-pos-body => int(1008) 25 | transfer-encoding => string(4) "7bit" 26 | 27 | Part 1.2 28 | body-line-count => int(7) 29 | charset => string(8) "us-ascii" 30 | content-base => string(1) "/" 31 | content-disposition => string(10) "attachment" 32 | content-name => string(6) "README" 33 | content-type => string(24) "application/octet-stream" 34 | disposition-filename => string(6) "README" 35 | ending-pos => int(1389) 36 | line-count => int(13) 37 | starting-pos => int(1093) 38 | starting-pos-body => int(1245) 39 | transfer-encoding => string(16) "quoted-printable" 40 | -------------------------------------------------------------------------------- /tests/testdata/mime.exp: -------------------------------------------------------------------------------- 1 | Message: mime 2 | 3 | Part 1 4 | body-line-count => int(27) 5 | charset => string(8) "us-ascii" 6 | content-base => string(1) "/" 7 | content-boundary => string(41) "----=_NextPart_000_0007_01C2566B.DA7C64F0" 8 | content-type => string(15) "multipart/mixed" 9 | ending-pos => int(1441) 10 | line-count => int(47) 11 | starting-pos => int(0) 12 | starting-pos-body => int(836) 13 | transfer-encoding => string(4) "8bit" 14 | 15 | Part 1.1 16 | body-line-count => int(1) 17 | charset => string(8) "us-ascii" 18 | content-base => string(1) "/" 19 | content-charset => string(8) "us-ascii" 20 | content-type => string(10) "text/plain" 21 | ending-pos => int(1053) 22 | line-count => int(5) 23 | starting-pos => int(926) 24 | starting-pos-body => int(1005) 25 | transfer-encoding => string(4) "7bit" 26 | 27 | Part 1.2 28 | body-line-count => int(7) 29 | charset => string(8) "us-ascii" 30 | content-base => string(1) "/" 31 | content-disposition => string(10) "attachment" 32 | content-name => string(6) "README" 33 | content-type => string(24) "application/octet-stream" 34 | disposition-filename => string(6) "README" 35 | ending-pos => int(1392) 36 | line-count => int(13) 37 | starting-pos => int(1098) 38 | starting-pos-body => int(1250) 39 | transfer-encoding => string(16) "quoted-printable" 40 | -------------------------------------------------------------------------------- /tests/010.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Multiple headers not parsed into arra bug #6862 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | 30 | --EXPECT-- 31 | array(11) { 32 | ["headers"]=> 33 | array(1) { 34 | ["received"]=> 35 | array(2) { 36 | [0]=> 37 | string(54) "from mail pickup service by hotmail.com with Microsoft" 38 | [1]=> 39 | string(50) "from 66.178.40.49 by BAY116-DAV8.phx.gbl with DAV;" 40 | } 41 | } 42 | ["starting-pos"]=> 43 | int(0) 44 | ["starting-pos-body"]=> 45 | int(200) 46 | ["ending-pos"]=> 47 | int(200) 48 | ["ending-pos-body"]=> 49 | int(200) 50 | ["line-count"]=> 51 | int(6) 52 | ["body-line-count"]=> 53 | int(0) 54 | ["charset"]=> 55 | string(8) "us-ascii" 56 | ["transfer-encoding"]=> 57 | string(4) "8bit" 58 | ["content-type"]=> 59 | string(10) "text/plain" 60 | ["content-base"]=> 61 | string(1) "/" 62 | } 63 | -------------------------------------------------------------------------------- /tests/testdata/qp.txt: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Received: from TITAN (titan.brainnet.i [192.168.2.7]) 3 | by zaneeb.brainnet.i (8.10.2/8.10.2/SuSE Linux 8.10.0-0.3) with ESMTP id g87Bel721254 4 | for ; Sat, 7 Sep 2002 12:40:47 +0100 5 | X-Authentication-Warning: zaneeb.brainnet.i: Host titan.brainnet.i [192.168.2.7] claimed to be TITAN 6 | From: "Wez Furlong" 7 | To: 8 | Subject: qp attachments 9 | Date: Sat, 7 Sep 2002 12:40:37 +0100 10 | Message-ID: <000201c25663$65e80250$0702a8c0@TITAN> 11 | MIME-Version: 1.0 12 | Content-Type: multipart/mixed; 13 | boundary="----=_NextPart_000_0003_01C2566B.C7AC6A50" 14 | X-Priority: 3 (Normal) 15 | X-MSMail-Priority: Normal 16 | X-Mailer: Microsoft Outlook, Build 10.0.2627 17 | Importance: Normal 18 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2600.0000 19 | X-TBR-DestBox: user.wez (auth as wez) (wez:) 20 | 21 | This is a multi-part message in MIME format. 22 | 23 | ------=_NextPart_000_0003_01C2566B.C7AC6A50 24 | Content-Type: text/plain; 25 | charset="us-ascii" 26 | Content-Transfer-Encoding: 7bit 27 | 28 | this is a message with a qp attachment. 29 | 30 | ------=_NextPart_000_0003_01C2566B.C7AC6A50 31 | Content-Type: application/octet-stream; 32 | name="README" 33 | Content-Transfer-Encoding: quoted-printable 34 | Content-Disposition: attachment; 35 | filename="README" 36 | 37 | Xnti rpam and gnti yirusjkools for bendmail=0A= 38 | =0A= 39 | Requirements: =0A= 40 | o php with mailparse=0A= 41 | o virus scanner (optional)=0A= 42 | =0A= 43 | =0A= 44 | 45 | ------=_NextPart_000_0003_01C2566B.C7AC6A50-- 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/testdata/mime.txt: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Received: from TITAN (titan.brainnet.i [192.168.2.7]) 3 | by zaneeb.brainnet.i (8.10.2/8.10.2/SuSE Linux 8.10.0-0.3) with ESMTP id g87BfJ721279 4 | for ; Sat, 7 Sep 2002 12:41:19 +0100 5 | X-Authentication-Warning: zaneeb.brainnet.i: Host titan.brainnet.i [192.168.2.7] claimed to be TITAN 6 | From: "Wez Furlong" 7 | To: 8 | Subject: mime attach 9 | Date: Sat, 7 Sep 2002 12:41:14 +0100 10 | Message-ID: <000601c25663$78b7fcf0$0702a8c0@TITAN> 11 | MIME-Version: 1.0 12 | Content-Type: multipart/mixed; 13 | boundary="----=_NextPart_000_0007_01C2566B.DA7C64F0" 14 | X-Priority: 3 (Normal) 15 | X-MSMail-Priority: Normal 16 | X-Mailer: Microsoft Outlook, Build 10.0.2627 17 | Importance: Normal 18 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2600.0000 19 | X-TBR-DestBox: user.wez (auth as wez) (wez:) 20 | 21 | This is a multi-part message in MIME format. 22 | 23 | ------=_NextPart_000_0007_01C2566B.DA7C64F0 24 | Content-Type: text/plain; 25 | charset="us-ascii" 26 | Content-Transfer-Encoding: 7bit 27 | 28 | this is a message with regular mime attachment. 29 | 30 | ------=_NextPart_000_0007_01C2566B.DA7C64F0 31 | Content-Type: application/octet-stream; 32 | name="README" 33 | Content-Transfer-Encoding: quoted-printable 34 | Content-Disposition: attachment; 35 | filename="README" 36 | 37 | FooBar - blah blah blah foo bar bar baaaa=0A= 38 | =0A= 39 | Requirements: =0A= 40 | o php with mailparse=0A= 41 | o virus scanner (optional)=0A= 42 | =0A= 43 | =0A= 44 | 45 | ------=_NextPart_000_0007_01C2566B.DA7C64F0-- 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/gh21.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | GH issue #21 (Segmentation fault with mailparse_msg_create()) 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | 39 | To: "LinkedIn" 40 | Content-Type: multipart/alternative; 41 | boundary="part2_boundary" 42 | 43 | --part2_boundary 44 | Content-Type: text/plain; charset=UTF-8 45 | Content-Transfer-Encoding: quoted-printable 46 | 47 | foo 48 | 49 | --part2_boundary 50 | Content-Type: text/html; charset=UTF-8 51 | Content-Transfer-Encoding: quoted-printable 52 | 53 | foo 54 | 55 | --part2_boundary-- 56 | 57 | --part1_boundary-- 58 | EOF; 59 | 60 | $resource = mailparse_msg_create(); 61 | 62 | $r = mailparse_msg_parse($resource, $data); 63 | echo 'ok', PHP_EOL; 64 | 65 | mailparse_msg_free($resource); 66 | 67 | exit(0); 68 | ?> 69 | --EXPECTF-- 70 | ok 71 | -------------------------------------------------------------------------------- /tests/gh22.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | GH issue #22 (Segmentation fault with mailparse_msg_create()) 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | ; Mon, 30 Apr 2018 13:03:00 +0000 (UTC) 14 | To: ProtonMail Test 15 | Subject: Buggy message 16 | From: dummyaddress@domain.com 17 | Date: Tue, 02 Aug 2022 20:53:51 -0400 18 | MIME-Version: 1.0 19 | Content-Type: multipart/mixed; 20 | boundary="MCBoundary=_12208022055093421" 21 | 22 | --MCBoundary=_12208022055093421 23 | Content-Type: multipart/related; 24 | boundary="MCBoundary=_12208022055093431" 25 | 26 | --MCBoundary=_12208022055093431 27 | Content-Transfer-Encoding: quoted-printable 28 | Content-Type: text/html; charset=UTF-8 29 | 30 | hello part 1 31 | 32 | --MCBoundary=_12208022055093431-- 33 | 34 | --MCBoundary=_12208022055093421 35 | Content-Type: message/rfc822; 36 | name="a name" 37 | Content-Disposition: inline; 38 | filename="a name" 39 | 40 | Message-Id: 41 | Date: Tue, 02 Aug 2022 20:53:51 -0400 42 | From: some@one.com 43 | To: someone@protonmail.com 44 | Subject: a subject 45 | Content-Type: multipart/alternative; 46 | boundary="MCBoundary=_12208022055093381" 47 | 48 | --MCBoundary=_12208022055093381 49 | Content-Type: text/plain; charset=UTF-8 50 | Content-Transfer-Encoding: quoted-printable 51 | 52 | hello other part 53 | 54 | --MCBoundary=_12208022055093381 55 | Content-Type: text/html; charset=UTF-8 56 | Content-Transfer-Encoding: quoted-printable 57 | 58 | hello again 59 | 60 | --MCBoundary=_12208022055093381-- 61 | 62 | --MCBoundary=_12208022055093421-- 63 | EOF; 64 | 65 | $resource = mailparse_msg_create(); 66 | 67 | $r = mailparse_msg_parse($resource, $data); 68 | echo 'ok', PHP_EOL; 69 | 70 | mailparse_msg_free($resource); 71 | 72 | exit(0); 73 | ?> 74 | --EXPECTF-- 75 | ok 76 | -------------------------------------------------------------------------------- /try.php: -------------------------------------------------------------------------------- 1 | \n"; 34 | /* print a choice of sections */ 35 | foreach($struct as $st) { 36 | echo "\n"; 37 | echo "$st\n"; 38 | /* get a handle on the message resource for a subsection */ 39 | $section = mailparse_msg_get_part($mime, $st); 40 | /* get content-type, encoding and header information for that section */ 41 | $info = mailparse_msg_get_part_data($section); 42 | print_r($info); 43 | echo "\n"; 44 | echo "" . $info["content-type"] . "\n"; 45 | echo "" . $info["content-disposition"] . "\n"; 46 | echo "" . $info["disposition-filename"] . "\n"; 47 | echo "" . $info["charset"] . "\n"; 48 | echo ""; 49 | } 50 | echo ""; 51 | 52 | /* if we were called to display a part, do so now */ 53 | if ($showpart) { 54 | /* get a handle on the message resource for the desired part */ 55 | $sec = mailparse_msg_get_part($mime, $showpart); 56 | 57 | echo "
Section $showpart
"; 58 | ob_start(); 59 | /* extract the part from the message file and dump it to the output buffer 60 | * */ 61 | mailparse_msg_extract_part_file($sec, $filename); 62 | $contents = ob_get_contents(); 63 | ob_end_clean(); 64 | /* quote the message for safe display in a browser */ 65 | echo nl2br(htmlentities($contents)) . "
";; 66 | } 67 | ?> 68 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | on: [push, pull_request] 3 | jobs: 4 | ubuntu: 5 | strategy: 6 | matrix: 7 | version: ['7.3', '7.4', '8.0', '8.1', '8.2', '8.3'] 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Install re2c 11 | run: sudo apt-get install -y re2c 12 | - name: Checkout mailparse 13 | uses: actions/checkout@v2 14 | - name: Setup PHP 15 | uses: shivammathur/setup-php@v2 16 | with: 17 | php-version: ${{matrix.version}} 18 | extensions: mbstring 19 | tool: phpize, php-config 20 | - name: phpize 21 | run: phpize 22 | - name: configure 23 | run: ./configure --enable-mailparse 24 | - name: work around missing HAVE_MBSTRING in main/php_config.h 25 | run: echo '#define HAVE_MBSTRING 1' >> config.h 26 | - name: make 27 | run: make 28 | - name: test 29 | run: | 30 | export TEST_PHP_ARGS="-n -d extension=mbstring.so -d extension=modules/mailparse.so" 31 | php run-tests.php -P --show-diff tests 32 | windows: 33 | defaults: 34 | run: 35 | shell: cmd 36 | strategy: 37 | matrix: 38 | version: ["7.4", "8.0"] 39 | arch: [x64, x86] 40 | ts: [ts] 41 | runs-on: windows-latest 42 | steps: 43 | - name: Configure Git 44 | run: git config --global core.autocrlf false 45 | - name: Checkout mailparse 46 | uses: actions/checkout@v2 47 | - name: Setup PHP 48 | id: setup-php 49 | uses: cmb69/setup-php-sdk@v0.2 50 | with: 51 | version: ${{matrix.version}} 52 | arch: ${{matrix.arch}} 53 | ts: ${{matrix.ts}} 54 | - name: Enable Developer Command Prompt 55 | uses: ilammy/msvc-dev-cmd@v1 56 | with: 57 | arch: ${{matrix.arch}} 58 | toolset: ${{steps.setup-php.outputs.toolset}} 59 | - name: phpize 60 | run: phpize 61 | - name: configure 62 | run: configure --enable-mailparse --with-prefix=${{steps.setup-php.outputs.prefix}} 63 | - name: make 64 | run: nmake 65 | - name: test 66 | run: nmake test TESTS="-d extension=${{steps.setup-php.outputs.prefix}}\ext\php_mbstring.dll --show-diff tests" 67 | -------------------------------------------------------------------------------- /tests/testdata/m0001.txt: -------------------------------------------------------------------------------- 1 | From name@company.com Sun Jun 16 17:50:14 2013 2 | Received: from mail-ie0-f173.google.com (mail-ie0-f173.google.com [209.85.223.173]) 3 | by company2.com (Postfix) with ESMTPS id 8025F4681306 4 | for ; Sun, 16 Jun 2013 17:50:14 +0200 (CEST) 5 | Received: by mail-ie0-f173.google.com with SMTP id k13so5038157iea.32 6 | for ; Sun, 16 Jun 2013 08:50:12 -0700 (PDT) 7 | X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; 8 | d=google.com; s=20120113; 9 | h=mime-version:x-originating-ip:date:message-id:subject:from:to 10 | :content-type:x-gm-message-state; 11 | bh=yZj7VZhlR/PC1Ppu6D0HAEDO6wGp8lIzn0x6tvCe3I0=; 12 | b=jDV0df6Zqrc7DP43NsjWyCKGyr8fOtvZXb0Qn91R51Q8zoALJyjEn6RBmWnc7OONJz 13 | asvLY+/JdPz8+z/TrK3x+0EHGMMSQb4vM6gtKVZc2b1M/EBt0vjyZw1d9DFhDgCQ8XnA 14 | VHAHEW9LpA8GEXEJIJzLm/ALK08jvSas/Y7FLUnI5pfsuy5cqdupQO/krfmuHP1THRGG 15 | RV/mPaLXXGmGjpmgbJGpiXwHdGvOgwXd0/beBWmNBp4DcXOocpy3Ugw64ocF35ryEmuE 16 | jdtVB+JIcrsQWLueoB4lo4lkBfeej58pv0dH4WX/6T1fZXwbyAGtYgq4cPZ1OZKlQgPa 17 | aChQ== 18 | MIME-Version: 1.0 19 | X-Received: by 10.50.129.4 with SMTP id ns4mr3026228igb.4.1371397812268; Sun, 20 | 16 Jun 2013 08:50:12 -0700 (PDT) 21 | Received: by 10.50.60.1 with HTTP; Sun, 16 Jun 2013 08:50:12 -0700 (PDT) 22 | X-Originating-IP: [81.33.22.111] 23 | Date: Sun, 16 Jun 2013 17:50:12 +0200 24 | Message-ID: 25 | Subject: =?ISO-8859-1?Q?Mail_avec_fichier_attach=E9_de_1ko?= 26 | From: Name 27 | To: name@company2.com 28 | Content-Type: multipart/mixed; boundary=047d7b1635f77236f404df476f85 29 | X-Gm-Message-State: ALoCoQnQSAw+kmVESrneMgv1tjuPZLL9itnGr0ueHbj8xt5Y1NkSMtHMT4mREA6HEEZO/aD18SJ2 30 | 31 | --047d7b1635f77236f404df476f85 32 | Content-Type: multipart/alternative; boundary=047d7b1635f77236f004df476f83 33 | 34 | --047d7b1635f77236f004df476f83 35 | Content-Type: text/plain; charset=ISO-8859-1 36 | 37 | 38 | 39 | --047d7b1635f77236f004df476f83 40 | Content-Type: text/html; charset=ISO-8859-1 41 | 42 |

43 | 44 | --047d7b1635f77236f004df476f83-- 45 | --047d7b1635f77236f404df476f85 46 | Content-Type: application/octet-stream; name=attach01 47 | Content-Disposition: attachment; filename=attach01 48 | Content-Transfer-Encoding: base64 49 | X-Attachment-Id: f_hi0eudw60 50 | 51 | YQo= 52 | --047d7b1635f77236f404df476f85-- 53 | -------------------------------------------------------------------------------- /tests/bug75825.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Bug #75825 (mailparse_uudecode_all doesn't parse multiple files) 3 | --SKIPIF-- 4 | 7 | --FILE-- 8 | &5C=71E9"!B>2!T:&4@8V]M;6%N9"!I;G1E2!B87-H*#$I+"!I9B!^+RYB87-H7W!R;V9I;&4@;W(@?B\N8F%S:%]L 14 | M;V=I;@HC(&5X:7-T&%M 15 | M<&QE&%M<&QE 49 | --EXPECTF-- 50 | array(4) { 51 | [0]=> 52 | array(1) { 53 | ["filename"]=> 54 | string(%d) "%s" 55 | } 56 | [1]=> 57 | array(2) { 58 | ["origfilename"]=> 59 | string(8) ".profile" 60 | ["filename"]=> 61 | string(%d) "%s" 62 | } 63 | [2]=> 64 | array(2) { 65 | ["origfilename"]=> 66 | string(12) ".bash_logout" 67 | ["filename"]=> 68 | string(%d) "%s" 69 | } 70 | [3]=> 71 | array(2) { 72 | ["origfilename"]=> 73 | string(16) ".selected_editor" 74 | ["filename"]=> 75 | string(%d) "%s" 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /php_mailparse_rfc822.h: -------------------------------------------------------------------------------- 1 | /* 2 | +----------------------------------------------------------------------+ 3 | | Copyright (c) The PHP Group | 4 | +----------------------------------------------------------------------+ 5 | | This source file is subject to version 3.01 of the PHP license, | 6 | | that is bundled with this package in the file LICENSE, and is | 7 | | available at through the world-wide-web at | 8 | | http://www.php.net/license/3_01.txt. | 9 | | If you did not receive a copy of the PHP license and are unable to | 10 | | obtain it through the world-wide-web, please send a note to | 11 | | license@php.net so we can mail you a copy immediately. | 12 | +----------------------------------------------------------------------+ 13 | | Author: Wez Furlong | 14 | +----------------------------------------------------------------------+ 15 | */ 16 | 17 | #ifndef php_mailparse_rfc822_h 18 | #define php_mailparse_rfc822_h 19 | 20 | typedef struct _php_rfc822_token php_rfc822_token_t; 21 | typedef struct _php_rfc822_tokenized php_rfc822_tokenized_t; 22 | typedef struct _php_rfc822_address php_rfc822_address_t; 23 | typedef struct _php_rfc822_addresses php_rfc822_addresses_t; 24 | 25 | #define php_rfc822_token_is_atom(tok) ( (tok) == 0 || (tok) == '"' || (tok) == '(' ) 26 | struct _php_rfc822_token { 27 | int token; 28 | const char *value; 29 | int valuelen; 30 | }; 31 | 32 | struct _php_rfc822_tokenized { 33 | php_rfc822_token_t *tokens; 34 | int ntokens; 35 | char *buffer; 36 | }; 37 | 38 | struct _php_rfc822_address { 39 | char *name; 40 | char *address; 41 | int is_group; 42 | }; 43 | 44 | struct _php_rfc822_addresses { 45 | php_rfc822_address_t *addrs; 46 | int naddrs; 47 | }; 48 | 49 | PHP_MAILPARSE_API php_rfc822_tokenized_t *php_mailparse_rfc822_tokenize(const char *header, int report_errors); 50 | PHP_MAILPARSE_API void php_rfc822_tokenize_free(php_rfc822_tokenized_t *toks); 51 | 52 | PHP_MAILPARSE_API php_rfc822_addresses_t *php_rfc822_parse_address_tokens(php_rfc822_tokenized_t *toks); 53 | PHP_MAILPARSE_API void php_rfc822_free_addresses(php_rfc822_addresses_t *addrs); 54 | 55 | #define PHP_RFC822_RECOMBINE_IGNORE_COMMENTS 1 56 | #define PHP_RFC822_RECOMBINE_STRTOLOWER 2 57 | #define PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES 4 58 | #define PHP_RFC822_RECOMBINE_SPACE_ATOMS 8 59 | #define PHP_RFC822_RECOMBINE_INCLUDE_QUOTES 16 60 | #define PHP_RFC822_RECOMBINE_COMMENTS_ONLY 32 61 | PHP_MAILPARSE_API char *php_rfc822_recombine_tokens(php_rfc822_tokenized_t *toks, int first_token, int n_tokens, int flags); 62 | 63 | void php_rfc822_print_tokens(php_rfc822_tokenized_t *toks); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /tests/006.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Check extract_part_file 3 | --SKIPIF-- 4 | 9 | --POST-- 10 | --GET-- 11 | --FILE-- 12 | \n"; 39 | echo $result; 40 | 41 | echo "\nExtract to open file\n"; 42 | $fpdest = tmpfile(); 43 | mailparse_msg_extract_part_file($mime, $fp, $fpdest); 44 | echo "\nrewinding\n"; 45 | rewind($fpdest); 46 | fpassthru($fpdest); 47 | 48 | echo "\nExtract via user function\n"; 49 | $cbdata = ""; 50 | function callbackfunc($data) { 51 | $GLOBALS["cbdata"] .= $data; 52 | } 53 | mailparse_msg_extract_part_file($mime, $fp, "callbackfunc"); 54 | echo "callback data is:\n"; 55 | var_dump($cbdata); 56 | 57 | echo "\nExtract via Closure\n"; 58 | $cbdata = ""; 59 | $callbackfunc = function ($data) { 60 | $GLOBALS["cbdata"] .= $data; 61 | }; 62 | mailparse_msg_extract_part_file($mime, $fp, $callbackfunc); 63 | echo "callback data is:\n"; 64 | var_dump($cbdata); 65 | 66 | echo "\nExtract whole part to output\n"; 67 | mailparse_msg_extract_whole_part_file($mime, $fp); 68 | 69 | echo "\nExtract part from string to output\n"; 70 | mailparse_msg_extract_part($mime, $text); 71 | fclose($fpdest); 72 | fclose($fp); 73 | 74 | ?> 75 | --EXPECT-- 76 | Extract to output 77 | hello, this is some text hello. 78 | blah blah blah. 79 | Extract and return as string 80 | --> 81 | hello, this is some text hello. 82 | blah blah blah. 83 | 84 | Extract to open file 85 | 86 | rewinding 87 | hello, this is some text hello. 88 | blah blah blah. 89 | 90 | Extract via user function 91 | callback data is: 92 | string(48) "hello, this is some text hello. 93 | blah blah blah. 94 | " 95 | 96 | Extract via Closure 97 | callback data is: 98 | string(48) "hello, this is some text hello. 99 | blah blah blah. 100 | " 101 | 102 | Extract whole part to output 103 | To: fred@bloggs.com 104 | Mime-Version: 1.0 105 | Content-Type: text/plain 106 | Subject: A simple MIME message 107 | 108 | hello, this is some text hello. 109 | blah blah blah. 110 | 111 | Extract part from string to output 112 | hello, this is some text hello. 113 | blah blah blah. 114 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------- 2 | The PHP License, version 3.01 3 | Copyright (c) 1999 - 2014 The PHP Group. All rights reserved. 4 | -------------------------------------------------------------------- 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, is permitted provided that the following conditions 8 | are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in 15 | the documentation and/or other materials provided with the 16 | distribution. 17 | 18 | 3. The name "PHP" must not be used to endorse or promote products 19 | derived from this software without prior written permission. For 20 | written permission, please contact group@php.net. 21 | 22 | 4. Products derived from this software may not be called "PHP", nor 23 | may "PHP" appear in their name, without prior written permission 24 | from group@php.net. You may indicate that your software works in 25 | conjunction with PHP by saying "Foo for PHP" instead of calling 26 | it "PHP Foo" or "phpfoo" 27 | 28 | 5. The PHP Group may publish revised and/or new versions of the 29 | license from time to time. Each version will be given a 30 | distinguishing version number. 31 | Once covered code has been published under a particular version 32 | of the license, you may always continue to use it under the terms 33 | of that version. You may also choose to use such covered code 34 | under the terms of any subsequent version of the license 35 | published by the PHP Group. No one other than the PHP Group has 36 | the right to modify the terms applicable to covered code created 37 | under this License. 38 | 39 | 6. Redistributions of any form whatsoever must retain the following 40 | acknowledgment: 41 | "This product includes PHP software, freely available from 42 | ". 43 | 44 | THIS SOFTWARE IS PROVIDED BY THE PHP DEVELOPMENT TEAM ``AS IS'' AND 45 | ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 46 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 47 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PHP 48 | DEVELOPMENT TEAM OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 49 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 50 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 51 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 53 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 54 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 55 | OF THE POSSIBILITY OF SUCH DAMAGE. 56 | 57 | -------------------------------------------------------------------- 58 | 59 | This software consists of voluntary contributions made by many 60 | individuals on behalf of the PHP Group. 61 | 62 | The PHP Group can be contacted via Email at group@php.net. 63 | 64 | For more information on the PHP Group and the PHP project, 65 | please see . 66 | 67 | PHP includes the Zend Engine, freely available at 68 | . 69 | -------------------------------------------------------------------------------- /php_mailparse.h: -------------------------------------------------------------------------------- 1 | /* 2 | +----------------------------------------------------------------------+ 3 | | Copyright (c) The PHP Group | 4 | +----------------------------------------------------------------------+ 5 | | This source file is subject to version 3.01 of the PHP license, | 6 | | that is bundled with this package in the file LICENSE, and is | 7 | | available at through the world-wide-web at | 8 | | http://www.php.net/license/3_01.txt. | 9 | | If you did not receive a copy of the PHP license and are unable to | 10 | | obtain it through the world-wide-web, please send a note to | 11 | | license@php.net so we can mail you a copy immediately. | 12 | +----------------------------------------------------------------------+ 13 | | Author: Wez Furlong | 14 | | Credit also given to Double Precision Inc. who wrote the code that | 15 | | the support routines for this extension were based upon. | 16 | +----------------------------------------------------------------------+ 17 | */ 18 | 19 | #ifndef PHP_MAILPARSE_H 20 | #define PHP_MAILPARSE_H 21 | 22 | extern zend_module_entry mailparse_module_entry; 23 | #define phpext_mailparse_ptr &mailparse_module_entry 24 | 25 | #define PHP_MAILPARSE_VERSION "3.1.9" 26 | 27 | #ifdef PHP_WIN32 28 | #define PHP_MAILPARSE_API __declspec(dllexport) 29 | #else 30 | #define PHP_MAILPARSE_API 31 | #endif 32 | 33 | PHP_MINIT_FUNCTION(mailparse); 34 | PHP_MSHUTDOWN_FUNCTION(mailparse); 35 | PHP_RINIT_FUNCTION(mailparse); 36 | PHP_RSHUTDOWN_FUNCTION(mailparse); 37 | PHP_MINFO_FUNCTION(mailparse); 38 | 39 | PHP_FUNCTION(mailparse_msg_parse_file); 40 | PHP_FUNCTION(mailparse_msg_get_part); 41 | PHP_FUNCTION(mailparse_msg_get_structure); 42 | PHP_FUNCTION(mailparse_msg_get_part_data); 43 | PHP_FUNCTION(mailparse_msg_extract_part); 44 | PHP_FUNCTION(mailparse_msg_extract_part_file); 45 | PHP_FUNCTION(mailparse_msg_extract_whole_part_file); 46 | 47 | PHP_FUNCTION(mailparse_msg_create); 48 | PHP_FUNCTION(mailparse_msg_free); 49 | PHP_FUNCTION(mailparse_msg_parse); 50 | PHP_FUNCTION(mailparse_msg_parse_file); 51 | 52 | PHP_FUNCTION(mailparse_msg_find); 53 | PHP_FUNCTION(mailparse_msg_getstructure); 54 | PHP_FUNCTION(mailparse_msg_getinfo); 55 | PHP_FUNCTION(mailparse_msg_extract); 56 | PHP_FUNCTION(mailparse_msg_extract_file); 57 | PHP_FUNCTION(mailparse_rfc822_parse_addresses); 58 | PHP_FUNCTION(mailparse_determine_best_xfer_encoding); 59 | PHP_FUNCTION(mailparse_stream_encode); 60 | PHP_FUNCTION(mailparse_uudecode_all); 61 | 62 | PHP_FUNCTION(mailparse_test); 63 | 64 | PHP_MAILPARSE_API int php_mailparse_le_mime_part(void); 65 | PHP_MAILPARSE_API char* php_mailparse_msg_name(void); 66 | 67 | /* mimemessage object */ 68 | PHP_METHOD(mimemessage, __construct); 69 | PHP_METHOD(mimemessage, get_child); 70 | PHP_METHOD(mimemessage, get_child_count); 71 | PHP_METHOD(mimemessage, get_parent); 72 | PHP_METHOD(mimemessage, extract_headers); 73 | PHP_METHOD(mimemessage, extract_body); 74 | PHP_METHOD(mimemessage, enum_uue); 75 | PHP_METHOD(mimemessage, extract_uue); 76 | PHP_METHOD(mimemessage, remove); 77 | PHP_METHOD(mimemessage, add_child); 78 | 79 | # include "ext/mbstring/libmbfl/mbfl/mbfilter.h" 80 | 81 | #include "php_mailparse_rfc822.h" 82 | #include "php_mailparse_mime.h" 83 | 84 | #define MAILPARSE_BUFSIZ 4096 85 | ZEND_BEGIN_MODULE_GLOBALS(mailparse) 86 | char * def_charset; /* default charset for use in (re)writing mail */ 87 | ZEND_END_MODULE_GLOBALS(mailparse); 88 | 89 | extern ZEND_DECLARE_MODULE_GLOBALS(mailparse); 90 | 91 | 92 | #ifdef ZTS 93 | #define MAILPARSEG(v) TSRMG(mailparse_globals_id, zend_mailparse_globals *, v) 94 | #else 95 | #define MAILPARSEG(v) (mailparse_globals.v) 96 | #endif 97 | 98 | #endif 99 | 100 | 101 | /* 102 | * Local variables: 103 | * tab-width: 4 104 | * c-basic-offset: 4 105 | * End: 106 | * vim: sw=4 ts=4 107 | */ 108 | -------------------------------------------------------------------------------- /arginfo.h: -------------------------------------------------------------------------------- 1 | /* 2 | +----------------------------------------------------------------------+ 3 | | Copyright (c) The PHP Group | 4 | +----------------------------------------------------------------------+ 5 | | This source file is subject to version 3.01 of the PHP license, | 6 | | that is bundled with this package in the file LICENSE, and is | 7 | | available at through the world-wide-web at | 8 | | http://www.php.net/license/3_01.txt. | 9 | | If you did not receive a copy of the PHP license and are unable to | 10 | | obtain it through the world-wide-web, please send a note to | 11 | | license@php.net so we can mail you a copy immediately. | 12 | +----------------------------------------------------------------------+ 13 | | Author: Wez Furlong | 14 | | Credit also given to Double Precision Inc. who wrote the code that | 15 | | the support routines for this extension were based upon. | 16 | +----------------------------------------------------------------------+ 17 | */ 18 | 19 | #ifndef PHP_MAILPARSE_ARGINFO_H 20 | #define PHP_MAILPARSE_ARGINFO_H 21 | 22 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_void, 0, 0, 0) 23 | ZEND_END_ARG_INFO() 24 | 25 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_test, 0, 0, 1) 26 | ZEND_ARG_INFO(0, header) 27 | ZEND_END_ARG_INFO() 28 | 29 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_mimemessage_construct, 0, 0, 2) 30 | ZEND_ARG_INFO(0, mode) 31 | ZEND_ARG_INFO(0, source) 32 | ZEND_END_ARG_INFO() 33 | 34 | #define arginfo_mailparse_mimemessage_remove arginfo_mailparse_void 35 | 36 | #define arginfo_mailparse_mimemessage_add_child arginfo_mailparse_void 37 | 38 | #define arginfo_mailparse_mimemessage_get_child_count arginfo_mailparse_void 39 | 40 | #define arginfo_mailparse_mimemessage_get_parent arginfo_mailparse_void 41 | 42 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_mimemessage_get_child, 0, 0, 1) 43 | ZEND_ARG_INFO(0, item_to_find) 44 | ZEND_END_ARG_INFO() 45 | 46 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_mimemessage_extract_headers, 0, 0, 0) 47 | ZEND_ARG_INFO(0, mode) 48 | ZEND_ARG_INFO(0, arg) 49 | ZEND_END_ARG_INFO() 50 | 51 | #define arginfo_mailparse_mimemessage_extract_body arginfo_mailparse_mimemessage_extract_headers 52 | 53 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_mimemessage_extract_uue, 0, 0, 1) 54 | ZEND_ARG_INFO(0, index) 55 | ZEND_ARG_INFO(0, mode) 56 | ZEND_ARG_INFO(0, arg) 57 | ZEND_END_ARG_INFO() 58 | 59 | #define arginfo_mailparse_mimemessage_enum_uue arginfo_mailparse_void 60 | 61 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_fp, 0, 0, 1) 62 | ZEND_ARG_INFO(0, fp) 63 | ZEND_END_ARG_INFO() 64 | 65 | #define arginfo_mailparse_uudecode_all arginfo_mailparse_fp 66 | 67 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_rfc822_parse_addresses, 0, 0, 1) 68 | ZEND_ARG_INFO(0, addresses) 69 | ZEND_END_ARG_INFO() 70 | 71 | #define arginfo_mailparse_determine_best_xfer_encoding arginfo_mailparse_fp 72 | 73 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_stream_encode, 0, 0, 3) 74 | ZEND_ARG_INFO(0, source_fp) 75 | ZEND_ARG_INFO(0, dest_fp) 76 | ZEND_ARG_INFO(0, encoding) 77 | ZEND_END_ARG_INFO() 78 | 79 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_msg_parse, 0, 0, 2) 80 | ZEND_ARG_INFO(0, fp) 81 | ZEND_ARG_INFO(0, data) 82 | ZEND_END_ARG_INFO() 83 | 84 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_msg_parse_file, 0, 0, 1) 85 | ZEND_ARG_INFO(0, filename) 86 | ZEND_END_ARG_INFO() 87 | 88 | #define arginfo_mailparse_msg_free arginfo_mailparse_fp 89 | 90 | #define arginfo_mailparse_msg_create arginfo_mailparse_void 91 | 92 | #define arginfo_mailparse_msg_get_structure arginfo_mailparse_fp 93 | 94 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_msg_extract_part, 0, 0, 2) 95 | ZEND_ARG_INFO(0, fp) 96 | ZEND_ARG_INFO(0, msgbody) 97 | ZEND_ARG_INFO(0, callback) 98 | ZEND_END_ARG_INFO() 99 | 100 | ZEND_BEGIN_ARG_INFO_EX(arginfo_mailparse_msg_extract_whole_part_file, 0, 0, 2) 101 | ZEND_ARG_INFO(0, fp) 102 | ZEND_ARG_INFO(0, filename) 103 | ZEND_ARG_INFO(0, callback) 104 | ZEND_END_ARG_INFO() 105 | 106 | #define arginfo_mailparse_msg_extract_part_file arginfo_mailparse_msg_extract_whole_part_file 107 | 108 | #define arginfo_mailparse_msg_get_part_data arginfo_mailparse_fp 109 | 110 | #define arginfo_mailparse_msg_get_part arginfo_mailparse_msg_parse 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /php_mailparse_mime.h: -------------------------------------------------------------------------------- 1 | /* 2 | +----------------------------------------------------------------------+ 3 | | Copyright (c) The PHP Group | 4 | +----------------------------------------------------------------------+ 5 | | This source file is subject to version 3.01 of the PHP license, | 6 | | that is bundled with this package in the file LICENSE, and is | 7 | | available at through the world-wide-web at | 8 | | http://www.php.net/license/3_01.txt. | 9 | | If you did not receive a copy of the PHP license and are unable to | 10 | | obtain it through the world-wide-web, please send a note to | 11 | | license@php.net so we can mail you a copy immediately. | 12 | +----------------------------------------------------------------------+ 13 | | Author: Wez Furlong | 14 | +----------------------------------------------------------------------+ 15 | */ 16 | 17 | #ifndef php_mailparse_mime_h 18 | #define php_mailparse_mime_h 19 | 20 | #include "Zend/zend_smart_string.h" 21 | 22 | typedef struct _php_mimepart php_mimepart; 23 | 24 | struct php_mimeheader_with_attributes { 25 | char *value; 26 | zval attributes; 27 | }; 28 | 29 | PHP_MAILPARSE_API char *php_mimepart_attribute_get(struct php_mimeheader_with_attributes *attr, char *attrname); 30 | 31 | typedef int (*php_mimepart_extract_func_t)(php_mimepart *part, void *context, const char *buf, size_t n); 32 | 33 | /* this is used to remember the source of a mime part. 34 | * It is used mainly for writeable mime parts. */ 35 | struct php_mimepart_source { 36 | enum { mpNONE, mpSTRING, mpSTREAM } kind; 37 | zval zval; 38 | }; 39 | 40 | struct _php_mimepart { 41 | php_mimepart *parent; 42 | zend_resource *rsrc; /* for auto-cleanup */ 43 | int part_index; /* sequence number of this part */ 44 | HashTable children; /* child parts */ 45 | 46 | struct php_mimepart_source source; 47 | 48 | off_t startpos, endpos; /* offsets of this part in the message */ 49 | off_t bodystart, bodyend; /* offsets of the body content of this part */ 50 | size_t nlines, nbodylines; /* number of lines in section/body */ 51 | 52 | char *mime_version; 53 | char *content_transfer_encoding; 54 | char *content_location; 55 | char *content_base; 56 | char *boundary; 57 | char *charset; 58 | 59 | struct php_mimeheader_with_attributes *content_type, *content_disposition; 60 | 61 | zval headerhash; /* a record of all the headers */ 62 | 63 | /* these are used during part extraction */ 64 | php_mimepart_extract_func_t extract_func; 65 | mbfl_convert_filter *extract_filter; 66 | void *extract_context; 67 | 68 | /* these are used during parsing */ 69 | struct { 70 | int in_header:1; 71 | int is_dummy:1; 72 | int completed:1; 73 | 74 | smart_string workbuf; 75 | smart_string headerbuf; 76 | php_mimepart *lastpart; 77 | } parsedata; 78 | 79 | }; 80 | 81 | PHP_MAILPARSE_API php_mimepart *php_mimepart_alloc(); 82 | PHP_MAILPARSE_API void php_mimepart_free(php_mimepart *part); 83 | PHP_MAILPARSE_API int php_mimepart_parse(php_mimepart *part, const char *buf, size_t bufsize); 84 | PHP_MAILPARSE_API void php_mimepart_get_offsets(php_mimepart *part, off_t *start, off_t *end, off_t *start_body, int *nlines, int *nbodylines); 85 | 86 | PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_decode, php_mimepart_extract_func_t decoder, void *ptr); 87 | PHP_MAILPARSE_API void php_mimepart_decoder_finish(php_mimepart *part); 88 | PHP_MAILPARSE_API int php_mimepart_decoder_feed(php_mimepart *part, const char *buf, size_t bufsize); 89 | 90 | #define php_mimepart_to_zval(zval, part) ZVAL_RES(zval, part) 91 | 92 | typedef struct _php_mimepart_enumerator php_mimepart_enumerator; 93 | struct _php_mimepart_enumerator { 94 | php_mimepart_enumerator *next; 95 | int id; 96 | }; 97 | typedef int (*mimepart_enumerator_func)(php_mimepart *part, php_mimepart_enumerator *enumerator, void *ptr); 98 | typedef int (*mimepart_child_enumerator_func)(php_mimepart *parentpart, php_mimepart *child, int childindex, void *ptr); 99 | 100 | PHP_MAILPARSE_API void php_mimepart_enum_parts(php_mimepart *part, mimepart_enumerator_func callback, void *ptr); 101 | PHP_MAILPARSE_API void php_mimepart_enum_child_parts(php_mimepart *part, mimepart_child_enumerator_func callback, void *ptr); 102 | PHP_MAILPARSE_API php_mimepart *php_mimepart_find_by_name(php_mimepart *parent, const char *name); 103 | PHP_MAILPARSE_API php_mimepart *php_mimepart_find_child_by_position(php_mimepart *parent, int position); 104 | 105 | PHP_MAILPARSE_API void php_mimepart_remove_from_parent(php_mimepart *part); 106 | PHP_MAILPARSE_API void php_mimepart_add_child(php_mimepart *part, php_mimepart *child); 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /tests/testdata/phpcvs1.txt: -------------------------------------------------------------------------------- 1 | Return-Path: 2 | Received: from secure.thebrainroom.com (raq338.uk2net.com [213.239.42.171]) 3 | by zaneeb.brainnet.i (8.10.2/8.10.2/SuSE Linux 8.10.0-0.3) with ESMTP id g9SLLB208234 4 | for ; Mon, 28 Oct 2002 21:21:11 GMT 5 | X-Authentication-Warning: zaneeb.brainnet.i: Host raq338.uk2net.com [213.239.42.171] claimed to be secure.thebrainroom.com 6 | Received: from pb1.pair.com (pb1.pair.com [216.92.131.4]) 7 | by secure.thebrainroom.com (8.9.3/8.9.3) with SMTP id SAA02428 8 | for ; Mon, 28 Oct 2002 18:50:26 GMT 9 | Received: (qmail 63230 invoked by uid 1010); 28 Oct 2002 18:36:34 -0000 10 | Mailing-List: contact php-cvs-help@lists.php.net; run by ezmlm 11 | Precedence: bulk 12 | list-help: 13 | list-unsubscribe: 14 | list-post: 15 | Delivered-To: mailing list php-cvs@lists.php.net 16 | Received: (qmail 63215 invoked from network); 28 Oct 2002 18:36:33 -0000 17 | Reply-to: marcus.boerger@post.rwth-aachen.de 18 | Message-Id: <5.1.0.14.2.20021028193555.01d47c20@mailbox.rwth-aachen.de> 19 | X-Mailer: QUALCOMM Windows Eudora Version 5.1 20 | Date: Mon, 28 Oct 2002 19:36:10 +0100 21 | To: Melvyn Sopacua 22 | From: marcus.boerger@t-online.de (Marcus =?iso-8859-1?Q?B=F6rger?=) 23 | Cc: php-cvs@lists.php.net 24 | In-Reply-To: <5.1.0.14.2.20021028192151.039729e0@yoshimo.webtechs.idg.nl 25 | > 26 | References: <5.1.0.14.2.20021028190015.01d4d650@mailbox.rwth-aachen.de> 27 | <5.1.0.14.2.20021028183051.03c18958@yoshimo.webtechs.idg.nl> 28 | 29 | Mime-Version: 1.0 30 | Content-Type: multipart/alternative; 31 | boundary="=====================_71195359==_.ALT" 32 | X-Sender: 520072483730-0001@t-dialin.net 33 | X-Spam-Status: No, tests=bogofilter, spamicity=0.0% likelihood 34 | Subject: Re: [PHP-CVS] cvs: php4 /ext/iconv/tests 35 | X-TBR-DestBox: user.wez.php.cvs (auth as wez) (wez.php.cvs:) 36 | 37 | --=====================_71195359==_.ALT 38 | Content-Type: text/plain; charset="iso-8859-1"; format=flowed 39 | Content-Transfer-Encoding: quoted-printable 40 | 41 | Then what about: 42 | 43 | cvs -z3 -q diff skipif.inc test.inc (in directory=20 44 | S:\php4-HEAD\ext\iconv\tests\) 45 | Index: skipif.inc 46 | =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= 47 | =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= 48 | =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D 49 | RCS file: /repository/php4/ext/iconv/tests/skipif.inc,v 50 | retrieving revision 1.2 51 | diff -u -r1.2 skipif.inc 52 | --- skipif.inc 28 Oct 2002 17:15:21 -0000 1.2 53 | +++ skipif.inc 28 Oct 2002 18:35:25 -0000 54 | @@ -1,10 +1,11 @@ 55 | 93 | 94 | At 19:30 28.10.2002, Melvyn Sopacua wrote: 95 | >At 19:01 28-10-2002, Marcus B=F6rger wrote: 96 | > 97 | >>At 18:33 28.10.2002, Melvyn Sopacua wrote: 98 | >>>At 18:15 28-10-2002, Marcus B=F6rger wrote: 99 | >>> 100 | >>>> Log: 101 | >>>> fix this tests 102 | >>>> -they did not dl load module in test.... 103 | >>> 104 | >>>Yes, exactly as they shouldn't. 105 | >>> 106 | >>>It's been discussed. Why did you revert that? 107 | >>> 108 | >>>The main reason - to repeat it: 109 | >>>./configure --prefix=3D/previous/install 110 | >>> 111 | >>>dl('foo.so') =3D> foo.so version is previous install, not current! 112 | >> 113 | >>I did so because skipif.inc did so. Maybe we remove that code 114 | >>from both skipif.inc and test.inc now. Feel free to do that. 115 | > 116 | >Ok, then that was a left over. 117 | > 118 | >IMHO we should do a complete overhaul of */tests/* and remove any dl() 119 | >code, or come up with something, that will force the modules/ directory 120 | >on the testkit. 121 | > 122 | >This is again a good reason to setup php.ini-test. 123 | >Windows will then be a problem, which kinda makes the dl() thingy= 124 | troublesome 125 | >as well. 126 | > 127 | >The only thing I can think of to work around it, is to use a configure 128 | >option, that writes --with-test-modules-dir=3D into php.ini-test. But= 129 | that's 130 | >prolly overkill. 131 | > 132 | > 133 | > 134 | >Met vriendelijke groeten / With kind regards, 135 | > 136 | >Webmaster IDG.nl 137 | >Melvyn Sopacua 138 | > 139 | 140 | --=====================_71195359==_.ALT-- 141 | 142 | -------------------------------------------------------------------------------- /tests/parse_test_messages.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | Parse messages in testdata dir 3 | --SKIPIF-- 4 | 8 | --FILE-- 9 | $messages[$force_test]); 39 | } 40 | 41 | if (function_exists("version_compare") && version_compare(phpversion(), "4.3", "ge")) { 42 | $wrapper = "compress.zlib://"; 43 | } else { 44 | /* this section is here because it is useful to compare to the 45 | * original implementaion of mailparse for PHP 4.2 */ 46 | $wrapper = "zlib:"; 47 | 48 | function file_get_contents($filename) 49 | { 50 | $fp = fopen($filename, "rb"); 51 | $data = fread($fp, filesize($filename)); 52 | fclose($fp); 53 | return $data; 54 | } 55 | } 56 | 57 | function diff_strings($left, $right) 58 | { 59 | if (is_executable("/usr/bin/diff")) { 60 | $lf = tempnam("/tmp", "mpt"); 61 | $rf = tempnam("/tmp", "mpt"); 62 | 63 | $ok = false; 64 | 65 | $fp = fopen($lf, "wb"); 66 | if ($fp) { 67 | fwrite($fp, $left); 68 | fclose($fp); 69 | 70 | $fp = fopen($rf, "wb"); 71 | if ($fp) { 72 | fwrite($fp, $right); 73 | fclose($fp); 74 | 75 | $ok = true; 76 | } 77 | } 78 | 79 | if ($ok) { 80 | passthru("/usr/bin/diff -u $lf $rf"); 81 | } 82 | 83 | unlink($lf); 84 | unlink($rf); 85 | 86 | if ($ok) 87 | return; 88 | } 89 | 90 | 91 | $left = explode("\n", $left); 92 | $right = explode("\n", $right); 93 | 94 | $n = max(count($left), count($right)); 95 | 96 | $difflines = array(); 97 | 98 | $runstart = null; 99 | $runend = null; 100 | 101 | for ($i = 0; $i < $n; $i++) { 102 | if ($left[$i] != $right[$i]) { 103 | if ($runstart === null) { 104 | $runstart = $i; 105 | $runend = $i; 106 | } else { 107 | /* part of the run */ 108 | $runend = $i; 109 | } 110 | } else { 111 | if ($runstart !== null) { 112 | $difflines[] = array($runstart, $runend); 113 | $runstart = null; 114 | $runend = null; 115 | } 116 | } 117 | } 118 | if ($runstart !== null) 119 | $difflines[] = array($runstart, $runend); 120 | 121 | $lastprint = null; 122 | foreach ($difflines as $run) { 123 | list($start, $end) = $run; 124 | 125 | $startline = $start - 3; 126 | if ($startline < 0) 127 | $startline = 0; 128 | $endline = $end; 129 | 130 | if ($lastprint === null) { 131 | echo "@@ Line: " . ($startline+1) . "\n"; 132 | } else if ($startline <= $lastprint) { 133 | $startline = $lastprint+1; 134 | } 135 | 136 | if ($startline > $endline) 137 | continue; 138 | 139 | /* starting context */ 140 | for ($i = $startline; $i < $start; $i++) { 141 | echo " " . $left[$i] . "\n"; 142 | $lastprint = $i; 143 | } 144 | 145 | /* diff run */ 146 | for ($i = $start; $i <= $end; $i++) { 147 | echo "-" . $left[$i] . "\n"; 148 | } 149 | for ($i = $start; $i <= $end; $i++) { 150 | echo "+" . $right[$i] . "\n"; 151 | } 152 | $lastprint = $i; 153 | } 154 | 155 | } 156 | 157 | $skip_keys = array("headers", "ending-pos-body"); 158 | 159 | foreach ($messages as $name => $msgdata) { 160 | $testname = $testdir . "/" . $msgdata["testfile"]; 161 | if (!isset($msgdata["expectfile"])) { 162 | continue; 163 | } 164 | $expectname = $testdir . "/" . $msgdata["expectfile"]; 165 | 166 | $use_wrapper = substr($testname, -3) == ".gz" ? $wrapper : ""; 167 | $use_wrapper = $wrapper; 168 | $fp = fopen("$use_wrapper$testname", "rb") or die("failed to open the file!"); 169 | 170 | $mime = mailparse_msg_create(); 171 | $size = 0; 172 | while (!feof($fp)) { 173 | $data = fread($fp, 1024); 174 | //var_dump($data); 175 | if ($data !== false) { 176 | mailparse_msg_parse($mime, $data); 177 | $size += strlen($data); 178 | } 179 | } 180 | fclose($fp); 181 | //var_dump($size); 182 | 183 | $struct = mailparse_msg_get_structure($mime); 184 | 185 | ob_start(); 186 | echo "Message: $name\n"; 187 | foreach($struct as $partname) { 188 | $depth = count(explode(".", $partname)) - 1; 189 | $indent = str_repeat(" ", $depth * 2); 190 | $subpart = mailparse_msg_get_part($mime, $partname); 191 | if (!$subpart) { 192 | var_dump($partname); 193 | echo "\n"; 194 | var_dump($struct); 195 | break; 196 | } 197 | 198 | $data = mailparse_msg_get_part_data($subpart); 199 | echo "\n{$indent}Part $partname\n"; 200 | ksort($data); 201 | foreach ($data as $key => $value) { 202 | if (in_array($key, $skip_keys)) 203 | continue; 204 | echo "$indent$key => "; 205 | var_dump($value); 206 | } 207 | } 208 | $output = ob_get_contents(); 209 | 210 | if ($define_expect) { 211 | $fp = fopen($expectname, "wb"); 212 | fwrite($fp, $output); 213 | fclose($fp); 214 | } else { 215 | 216 | $expect = file_get_contents($expectname); 217 | 218 | if ($output != $expect) { 219 | ob_end_flush(); 220 | diff_strings($expect, $output); 221 | die("FAIL!"); 222 | } 223 | } 224 | 225 | ob_end_clean(); 226 | } 227 | 228 | echo "All messages parsed OK!\n"; 229 | ?> 230 | --EXPECT-- 231 | All messages parsed OK! 232 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mailparse library for PHP 2 | 3 | Mailparse is an extension for parsing and working with email messages. 4 | 5 | It can deal with rfc822 and rfc2045 (MIME) compliant messages. 6 | 7 | Mailparse is stream based, which means that it does not keep in-memory 8 | copies of the files it processes - so it is very resource efficient 9 | when dealing with large messages. 10 | 11 | Version 2.1.6 is for PHP 5 12 | 13 | ## OO Syntax 14 | 15 | ```php 16 | data["headers"]["subject"]; 46 | var_dump($msgpart->data); 47 | 48 | echo "The headers are:\n"; 49 | // Display the headers (in raw format) to the browser output. 50 | // You can also use: 51 | // $msgpart->extract_headers(MAILPARSE_EXTRACT_STREAM, $fp); 52 | // to write the headers to the supplied stream at it's current 53 | // position. 54 | // 55 | // $var = $msgpart->extract_headers(MAILPARSE_EXTRACT_RETURN); 56 | // to return the headers in a variable. 57 | $msgpart->extract_headers(MAILPARSE_EXTRACT_OUTPUT); 58 | 59 | // Display the body if this part is intended to be displayed: 60 | $n = $msgpart->get_child_count(); 61 | 62 | if ($n == 0) { 63 | // Return the body as a string (the MAILPARSE_EXTRACT parameter 64 | // acts just as it does in extract_headers method. 65 | $body = $msgpart->extract_body(MAILPARSE_EXTRACT_RETURN); 66 | echo htmlentities($body); 67 | 68 | // This function tells you about any uuencoded attachments 69 | // that are present in this part. 70 | $uue = $msgpart->enum_uue(); 71 | if ($uue !== false) { 72 | var_dump($uue); 73 | foreach($uue as $index => $data) { 74 | // $data => array("filename" => "original filename", 75 | // "filesize" => "size of extracted file", 76 | // ); 77 | 78 | printf("UUE[%d] %s (%d bytes)\n", 79 | $index, $data["filename"], 80 | $data["filesize"]); 81 | 82 | // Display the extracted part to the output. 83 | $msgpart->extract_uue($index, MAILPARSE_EXTRACT_OUTPUT); 84 | 85 | } 86 | } 87 | 88 | } else { 89 | // Recurse and show children of that part 90 | for ($i = 0; $i < $n; $i++) { 91 | $part =& $msgpart->get_child($i); 92 | display_part_info("$caption child $i", $part); 93 | } 94 | } 95 | } 96 | 97 | ``` 98 | 99 | 100 | The rest of this document may be out of date! Take a look at the [mailparse section of the online manual](http://php.net/manual/en/book.mailparse.php) for more hints about this stuff. 101 | 102 | $mime = mailparse_rfc2045_parse_file($file); 103 | $ostruct = mailparse_rfc2045_getstructure($mime); 104 | foreach($ostruct as $st) { 105 | $section = mailparse_rfc2045_find($mime, $st); 106 | $struct[$st] = mailparse_rfc2045_getinfo($section); 107 | } 108 | var_dump($struct); 109 | ?> 110 | array mailparse_rfc822_parse_addresses(string addresses) 111 | parses an rfc822 compliant recipient list, such as that found in To: From: 112 | headers. Returns a indexed array of assoc. arrays for each recipient: 113 | array(0 => array("display" => "Wez Furlong", "address" => "wez@php.net")) 114 | 115 | resource mailparse_rfc2045_create() 116 | Create a mime mail resource 117 | 118 | boolean mailparse_rfc2045_parse(resource mimemail, string data) 119 | incrementally parse data into the supplied mime mail resource. 120 | Concept: you can stream portions of a file at a time, rather than read 121 | and parse the whole thing. 122 | 123 | 124 | resource mailparse_rfc2045_parse_file(string $filename) 125 | Parse a file and return a $mime resource. 126 | The file is opened and streamed through the parser. 127 | This is the optimal way of parsing a mail file that 128 | you have on disk. 129 | 130 | 131 | array mailparse_rfc2045_getstructure(resource mimemail) 132 | returns an array containing a list of message parts in the form: 133 | array("1", "1.1", "1.2") 134 | 135 | resource mailparse_rfc2045_find(resource mimemail, string partname) 136 | returns an mime mail resource representing the named section 137 | 138 | array mailparse_rfc2045_getinfo(resource mimemail) 139 | returns an array containing the bounds, content type and headers of the 140 | section. 141 | 142 | mailparse_rfc2045_extract_file(resource mimemail, string filename[, string 143 | callbackfunc]) 144 | Extracts/decodes a message section from the supplied filename. 145 | If no callback func is supplied, it outputs the results into the current 146 | output buffer, otherwise it calls the callback with a string parameter 147 | containing the text. 148 | The contents of the section will be decoded according to their transfer 149 | encoding - base64, quoted-printable and uuencoded text are supported. 150 | 151 | All operations are done incrementally; streaming the input and output so that 152 | memory usage is on the whole lower than something like procmail or doing this 153 | stuff in PHP space. The aim is that it stays this way to handle large 154 | quantities of email. 155 | 156 | 157 | TODO: 158 | ===== 159 | 160 | . Add support for binhex encoding? 161 | . Extracting a message part without decoding the transfer encoding so that 162 | eg: pgp-signatures can be verified. 163 | 164 | . Work the other way around - build up a rfc2045 compliant message file from 165 | simple structure information and filenames/variables. 166 | 167 | vim:tw=78 168 | vim600:syn=php:tw=78 169 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | mailparse 4 | pecl.php.net 5 | Email message manipulation 6 | Mailparse is an extension for parsing and working with email messages. 7 | It can deal with rfc822 and rfc2045 (MIME) compliant messages. 8 | 9 | 10 | Wez Furlong 11 | wez 12 | wez@php.net 13 | yes 14 | 15 | 16 | Brian Shire 17 | shire 18 | shire@php.net 19 | yes 20 | 21 | 22 | John Jawed 23 | jawed 24 | jawed@php.net 25 | yes 26 | 27 | 28 | Sean DuBois 29 | seander 30 | seander@php.net 31 | yes 32 | 33 | 34 | Remi Collet 35 | remi 36 | remi@php.net 37 | yes 38 | 39 | 2025-09-30 40 | 41 | 3.1.9 42 | 3.0 43 | 44 | 45 | stable 46 | stable 47 | 48 | PHP-3.01 49 | 50 | - use Zend/zend_smart_string.h for PHP 8.5 51 | - Fix memory leak 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 7.3.0 123 | 124 | 125 | 1.10 126 | 127 | 128 | mbstring 129 | 130 | 131 | 132 | mailparse 133 | 134 | 135 | 136 | 2024-10-04 137 | 138 | 3.1.8 139 | 3.0 140 | 141 | 142 | stable 143 | stable 144 | 145 | PHP-3.01 146 | 147 | - PHP 8.4 compatibility 148 | 149 | 150 | 151 | 2024-10-04 152 | 153 | 3.1.7 154 | 3.0 155 | 156 | 157 | stable 158 | stable 159 | 160 | PHP-3.01 161 | 162 | - PHP 8.4 compatibility 163 | 164 | 165 | 166 | 2023-08-22 167 | 168 | 3.1.6 169 | 3.0 170 | 171 | 172 | stable 173 | stable 174 | 175 | PHP-3.01 176 | 177 | - fix #29 Segmentation fault with ISO-2022-JP Subject header 178 | - fix #30 Segmentation fault with UTF-8 encoded X-MS-Iris-MetaData header 179 | - revert fix #81403 mailparse_rfc822_parse_addresses drops escaped quotes 180 | 181 | 182 | 183 | 2023-07-27 184 | 185 | 3.1.5 186 | 3.0 187 | 188 | 189 | stable 190 | stable 191 | 192 | PHP-3.01 193 | 194 | - drop usage of removed mbfl APIs in PHP 8.3 195 | - fix GH-27 MimeMessage::__construct() throws TypeError with $mode=stream 196 | - fix GH-21, GH-22, GH-24 segfault in mailparse_msg_parse without mime-version 197 | - fix #81403 mailparse_rfc822_parse_addresses drops escaped quotes 198 | 199 | 200 | 201 | 2022-09-15 202 | 203 | 3.1.4 204 | 3.0 205 | 206 | 207 | stable 208 | stable 209 | 210 | PHP 211 | 212 | - declare mimemessage::data property 213 | - drop support for PHP older than 7.3 214 | 215 | 216 | 217 | 2022-02-21 218 | 219 | 3.1.3 220 | 3.0 221 | 222 | 223 | stable 224 | stable 225 | 226 | PHP 227 | 228 | - Fix #73110: Mails with unknown MIME version are treated as plain/text. (cmb) 229 | - Fix #74233: Parsing multi Content-Disposition causes memory leak. (cmb) 230 | - Fix #75825: mailparse_uudecode_all doesn't parse multiple files. (cmb) 231 | - Fix #81422: Potential double-free in mailparse_uudecode_all(). (cmb) 232 | - Fix gh#19 Segmentation fault with PHP 8.1 in extract_body using MAILPARSE_EXTRACT_RETURN. (Remi) 233 | 234 | 235 | 236 | 2021-09-01 237 | 238 | 3.1.2 239 | 3.0 240 | 241 | 242 | stable 243 | stable 244 | 245 | PHP 246 | 247 | - Fix for PHP 8.1 248 | 249 | 250 | 251 | 2020-09-16 252 | 253 | 3.1.1 254 | 3.0 255 | 256 | 257 | stable 258 | stable 259 | 260 | PHP 261 | 262 | - Fixed bug #74215: Memory leaks with mailparse (cmb) 263 | - Fixed bug #76498: Unable to use callable as callback (cmb) 264 | - Compatibility with 8.0.0beta4 265 | 266 | 267 | 268 | 2020-04-22 269 | 270 | 3.1.0 271 | 3.0 272 | 273 | 274 | stable 275 | stable 276 | 277 | PHP 278 | 279 | - add arginfo to all functions 280 | - fix MimeMessage constructor name 281 | 282 | 283 | 284 | 2019-12-19 285 | 286 | 3.0.4 287 | 3.0 288 | 289 | 290 | stable 291 | stable 292 | 293 | PHP 294 | 295 | - Replace ulong with zend_ulong, fix Windows build (cmb) 296 | 297 | 298 | 299 | 2019-03-20 300 | 301 | 3.0.3 302 | 3.0 303 | 304 | 305 | stable 306 | stable 307 | 308 | PHP 309 | 310 | QA Release: 311 | - add missing files in archive 312 | - fix -Wformat warning 313 | - add dependency on mbstring extension 314 | - PHP 7.3 compatibility 315 | 316 | 317 | 318 | 2016-12-07 319 | 320 | 3.0.2 321 | 3.0 322 | 323 | 324 | stable 325 | stable 326 | 327 | PHP 328 | 329 | - Fix segfault in getChild 330 | 331 | 332 | 333 | 2016-01-29 334 | 335 | 3.0.1 336 | 3.0 337 | 338 | 339 | stable 340 | stable 341 | 342 | PHP 343 | 344 | - Fix double free caused by mailparse_msg_free 345 | 346 | 347 | 348 | 2015-12-23 349 | 350 | 3.0.0 351 | 3.0 352 | 353 | 354 | stable 355 | stable 356 | 357 | PHP 358 | 359 | - PHP 7 Release 360 | 361 | 362 | 363 | 364 | -------------------------------------------------------------------------------- /php_mailparse_rfc822.re: -------------------------------------------------------------------------------- 1 | /* 2 | +----------------------------------------------------------------------+ 3 | | Copyright (c) The PHP Group | 4 | +----------------------------------------------------------------------+ 5 | | This source file is subject to version 3.01 of the PHP license, | 6 | | that is bundled with this package in the file LICENSE, and is | 7 | | available at through the world-wide-web at | 8 | | http://www.php.net/license/3_01.txt. | 9 | | If you did not receive a copy of the PHP license and are unable to | 10 | | obtain it through the world-wide-web, please send a note to | 11 | | license@php.net so we can mail you a copy immediately. | 12 | +----------------------------------------------------------------------+ 13 | | Author: Wez Furlong | 14 | +----------------------------------------------------------------------+ 15 | */ 16 | 17 | #include "php.h" 18 | #include "php_mailparse.h" 19 | #include "php_mailparse_rfc822.h" 20 | #include "ext/standard/php_string.h" 21 | #include "Zend/zend_smart_string.h" 22 | /*!re2c 23 | CHAR = [\000-\177]; 24 | ALPHA = [\101-\132]|[\141-\172]; 25 | DIGIT = [\060-\071]; 26 | CTL = [\000-\037]|[\177]; 27 | CR = [\015]; 28 | LF = [\012]; 29 | SPACE = [\040]; 30 | HTAB = [\011]; 31 | CRLF = CR LF; 32 | LWSPCHAR = SPACE|HTAB; 33 | LWSP = ( CRLF? LWSPCHAR)+; 34 | specials = [()<>@,;:\\".\[\]]; 35 | delimiters = (specials|LWSP); 36 | */ 37 | 38 | /*!re2c 39 | NUL = [\000]; 40 | any = [\001-\377]; 41 | space = (HTAB|SPACE|CR|LF); 42 | atom = [@,;:.%!?=/\[\]]; 43 | allspecials = (atom|[()<>"]|space); 44 | other = any\allspecials; 45 | */ 46 | 47 | #define YYFILL(n) if (YYCURSOR == YYLIMIT) goto stop 48 | #define YYCTYPE unsigned char 49 | #define YYCURSOR p 50 | #define YYLIMIT q 51 | #define YYMARKER r 52 | 53 | #define DEBUG_RFC822_SCANNER 0 54 | 55 | #if DEBUG_RFC822_SCANNER 56 | # define DBG_STATE(lbl) printf(lbl " %d:%c %d:%c\n", *YYCURSOR, *YYCURSOR, *start, *start) 57 | #else 58 | # define DBG_STATE(lbl) 59 | #endif 60 | 61 | #define ADD_ATOM_TOKEN() do { if (tokens) { tokens->token = *start; tokens->value = start; tokens->valuelen = 1; tokens++; } ++*ntokens; } while (0) 62 | #define REPORT_ERR(msg) do { if (report_errors) zend_error(E_WARNING, "input is not rfc822 compliant: %s", msg); } while(0) 63 | #define STR_FREE(ptr) if (ptr) { efree(ptr); } 64 | /* Tokenize a header. tokens may be NULL, in which case the number of tokens are 65 | counted, allowing the caller to allocate enough room */ 66 | static void tokenize(const char *header, php_rfc822_token_t *tokens, int *ntokens, int report_errors) 67 | { 68 | register const char *p, *q, *start; 69 | int in_bracket = 0; 70 | 71 | /* NB: parser assumes that the header has two bytes of NUL terminator */ 72 | 73 | YYCURSOR = header; 74 | YYLIMIT = YYCURSOR + strlen(YYCURSOR) + 1; 75 | 76 | *ntokens = 0; 77 | 78 | state_ground: 79 | start = YYCURSOR; 80 | 81 | #if DEBUG_RFC822_SCANNER 82 | printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSOR, *YYCURSOR, YYCURSOR); 83 | #endif 84 | 85 | /*!re2c 86 | NUL { goto stop; } 87 | space+ { DBG_STATE("SPACE"); goto state_ground; } 88 | (")"|"\\") { REPORT_ERR("token not valid in ground state"); goto state_ground; } 89 | "(" { DBG_STATE("START COMMENT"); 90 | if (tokens) { 91 | tokens->token = '('; 92 | tokens->value = start; 93 | tokens->valuelen = 0; 94 | } 95 | goto state_comment; 96 | } 97 | ["] (any\["])* ["] { DBG_STATE("QUOTE STRING"); 98 | if (tokens) { 99 | tokens->token = '"'; 100 | tokens->value = start + 1; 101 | tokens->valuelen = YYCURSOR - start - 2; 102 | tokens++; 103 | } 104 | ++*ntokens; 105 | 106 | goto state_ground; 107 | } 108 | "<" ">" { DBG_STATE("NULL <>"); 109 | ADD_ATOM_TOKEN(); 110 | if (tokens) { 111 | tokens->token = 0; 112 | tokens->value = ""; 113 | tokens->valuelen = 0; 114 | tokens++; 115 | } 116 | ++*ntokens; 117 | start++; 118 | ADD_ATOM_TOKEN(); 119 | goto state_ground; 120 | } 121 | "<" { DBG_STATE("LANGLE"); 122 | if (in_bracket) { 123 | REPORT_ERR("already in < bracket"); 124 | goto state_ground; 125 | } 126 | in_bracket = 1; 127 | ADD_ATOM_TOKEN(); 128 | goto state_ground; 129 | } 130 | ">" { DBG_STATE("RANGLE"); 131 | if (!in_bracket) { 132 | REPORT_ERR("not in < bracket"); 133 | goto state_ground; 134 | } 135 | in_bracket = 0; 136 | ADD_ATOM_TOKEN(); 137 | goto state_ground; 138 | } 139 | atom { DBG_STATE("ATOM"); ADD_ATOM_TOKEN(); goto state_ground; } 140 | other+ { DBG_STATE("ANY"); 141 | if (tokens) { 142 | tokens->token = 0; 143 | tokens->valuelen = YYCURSOR - start; 144 | tokens->value = start; 145 | tokens++; 146 | } 147 | ++*ntokens; 148 | goto state_ground; 149 | } 150 | */ 151 | 152 | state_comment: 153 | { 154 | int comment_depth = 1; 155 | while (1) { 156 | if (*YYCURSOR == 0) { 157 | /* unexpected end of header */ 158 | REPORT_ERR("unexpected end of header"); 159 | /* fake a quoted string for this last token */ 160 | if (tokens) 161 | tokens->token = '"'; 162 | ++*ntokens; 163 | return; 164 | } else if (*YYCURSOR == '(') { 165 | comment_depth++; 166 | } else if (*YYCURSOR == ')' && --comment_depth == 0) { 167 | /* end of nested comment sequence */ 168 | YYCURSOR++; 169 | if (tokens) 170 | tokens->valuelen++; 171 | break; 172 | } else if (*YYCURSOR == '\\' && YYCURSOR[1]) { 173 | YYCURSOR++; 174 | if (tokens) 175 | tokens->valuelen++; 176 | } 177 | YYCURSOR++; 178 | } 179 | if (tokens) { 180 | tokens->valuelen = YYCURSOR - tokens->value; 181 | tokens++; 182 | } 183 | ++*ntokens; 184 | goto state_ground; 185 | } 186 | stop: 187 | #if DEBUG_RFC822_SCANNER 188 | printf("STOPing parser ntokens=%d YYCURSOR=%p YYLIMIT=%p start=%p cursor=[%d] %s start=%s\n", *ntokens, 189 | YYCURSOR, YYLIMIT, start, *YYCURSOR, YYCURSOR, start); 190 | #else 191 | ; 192 | #endif 193 | } 194 | 195 | PHP_MAILPARSE_API php_rfc822_tokenized_t *php_mailparse_rfc822_tokenize(const char *header, int report_errors) 196 | { 197 | php_rfc822_tokenized_t *toks = ecalloc(1, sizeof(php_rfc822_tokenized_t)); 198 | int len = strlen(header); 199 | 200 | toks->buffer = emalloc(len + 2); 201 | strcpy(toks->buffer, header); 202 | toks->buffer[len] = 0; 203 | toks->buffer[len+1] = 0; /* mini hack: the parser sometimes relies in this */ 204 | 205 | tokenize(toks->buffer, NULL, &toks->ntokens, report_errors); 206 | toks->tokens = toks->ntokens ? ecalloc(toks->ntokens, sizeof(php_rfc822_token_t)) : NULL; 207 | tokenize(toks->buffer, toks->tokens, &toks->ntokens, report_errors); 208 | return toks; 209 | } 210 | 211 | PHP_MAILPARSE_API void php_rfc822_tokenize_free(php_rfc822_tokenized_t *toks) 212 | { 213 | if (toks->tokens) 214 | efree(toks->tokens); 215 | efree(toks->buffer); 216 | efree(toks); 217 | } 218 | 219 | PHP_MAILPARSE_API char *php_rfc822_recombine_tokens(php_rfc822_tokenized_t *toks, int first_token, int n_tokens, int flags) 220 | { 221 | char *ret = NULL; 222 | int i, upper, last_was_atom = 0, this_is_atom = 0, tok_equiv; 223 | size_t len = 1; /* for the NUL terminator */ 224 | 225 | upper = first_token + n_tokens; 226 | if (upper > toks->ntokens) 227 | upper = toks->ntokens; 228 | 229 | for (i = first_token; i < upper; i++, last_was_atom = this_is_atom) { 230 | 231 | tok_equiv = toks->tokens[i].token; 232 | if (tok_equiv == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES) 233 | tok_equiv = '"'; 234 | 235 | if (flags & PHP_RFC822_RECOMBINE_IGNORE_COMMENTS && tok_equiv == '(') 236 | continue; 237 | if (flags & PHP_RFC822_RECOMBINE_COMMENTS_ONLY && tok_equiv != '(' && !(toks->tokens[i].token == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES)) 238 | continue; 239 | 240 | this_is_atom = php_rfc822_token_is_atom(toks->tokens[i].token); 241 | if (this_is_atom && last_was_atom && flags & PHP_RFC822_RECOMBINE_SPACE_ATOMS) 242 | len++; /* allow room for a space */ 243 | 244 | if (flags & PHP_RFC822_RECOMBINE_INCLUDE_QUOTES && tok_equiv == '"') 245 | len += 2; 246 | 247 | len += toks->tokens[i].valuelen; 248 | } 249 | 250 | last_was_atom = this_is_atom = 0; 251 | 252 | ret = emalloc(len); 253 | 254 | for (i = first_token, len = 0; i < upper; i++, last_was_atom = this_is_atom) { 255 | const char *tokvalue; 256 | int toklen; 257 | 258 | tok_equiv = toks->tokens[i].token; 259 | if (tok_equiv == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES) 260 | tok_equiv = '"'; 261 | 262 | if (flags & PHP_RFC822_RECOMBINE_IGNORE_COMMENTS && tok_equiv == '(') 263 | continue; 264 | if (flags & PHP_RFC822_RECOMBINE_COMMENTS_ONLY && tok_equiv != '(' && !(toks->tokens[i].token == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES)) 265 | continue; 266 | 267 | tokvalue = toks->tokens[i].value; 268 | toklen = toks->tokens[i].valuelen; 269 | 270 | this_is_atom = php_rfc822_token_is_atom(toks->tokens[i].token); 271 | if (this_is_atom && last_was_atom && flags & PHP_RFC822_RECOMBINE_SPACE_ATOMS) { 272 | ret[len] = ' '; 273 | len++; 274 | } 275 | if (flags & PHP_RFC822_RECOMBINE_INCLUDE_QUOTES && tok_equiv == '"') 276 | ret[len++] = '"'; 277 | 278 | if (toks->tokens[i].token == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES) { 279 | /* don't include ( and ) in the output string */ 280 | tokvalue++; 281 | toklen -= 2; 282 | } 283 | 284 | memcpy(ret + len, tokvalue, toklen); 285 | len += toklen; 286 | 287 | if (flags & PHP_RFC822_RECOMBINE_INCLUDE_QUOTES && tok_equiv == '"') 288 | ret[len++] = '"'; 289 | 290 | } 291 | ret[len] = 0; 292 | 293 | if (flags & PHP_RFC822_RECOMBINE_STRTOLOWER) 294 | zend_str_tolower(ret, len); 295 | 296 | return ret; 297 | } 298 | 299 | static void parse_address_tokens(php_rfc822_tokenized_t *toks, 300 | php_rfc822_addresses_t *addrs, int *naddrs) 301 | { 302 | int start_tok = 0, iaddr = 0, i, in_group = 0, group_lbl_start = 0, group_lbl_end = 0; 303 | int a_start, a_count; /* position and count for address part of a name */ 304 | smart_string group_addrs = { 0, }; 305 | char *address_value = NULL; 306 | 307 | address: /* mailbox / group */ 308 | 309 | if (start_tok >= toks->ntokens) { 310 | /* the end */ 311 | *naddrs = iaddr; 312 | smart_string_free(&group_addrs); 313 | return; 314 | } 315 | 316 | /* look ahead to determine if we are dealing with a group */ 317 | for (i = start_tok; i < toks->ntokens; i++) 318 | if (toks->tokens[i].token != 0 && toks->tokens[i].token != '"') 319 | break; 320 | 321 | if (i < toks->ntokens && toks->tokens[i].token == ':') { 322 | /* it's a group */ 323 | in_group = 1; 324 | group_lbl_start = start_tok; 325 | group_lbl_end = i; 326 | 327 | /* we want the address for the group to include the leading ":" and the trailing ";" */ 328 | start_tok = i; 329 | } 330 | 331 | mailbox: /* addr-spec / phrase route-addr */ 332 | if (start_tok >= toks->ntokens) { 333 | /* the end */ 334 | *naddrs = iaddr; 335 | smart_string_free(&group_addrs); 336 | return; 337 | } 338 | 339 | /* skip spurious commas */ 340 | while (start_tok < toks->ntokens && (toks->tokens[start_tok].token == ',' 341 | || toks->tokens[start_tok].token == ';')) 342 | start_tok++; 343 | 344 | /* look ahead: if we find a '<' before we find an '@', we are dealing with 345 | a route-addr, otherwise we have an addr-spec */ 346 | for (i = start_tok; i < toks->ntokens && toks->tokens[i].token != ';' 347 | && toks->tokens[i].token != ',' && toks->tokens[i].token != '<'; i++) 348 | ; 349 | 350 | /* the stuff from start_tok to i - 1 is the display name part */ 351 | if (addrs && !in_group && i - start_tok > 0) { 352 | int j, has_comments = 0, has_strings = 0; 353 | switch(i < toks->ntokens ? toks->tokens[i].token : 0) { 354 | case ';': case ',': case '<': 355 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, i - start_tok, 356 | PHP_RFC822_RECOMBINE_SPACE_ATOMS); 357 | break; 358 | default: 359 | /* it's only the display name if there are quoted strings or comments in there */ 360 | for (j = start_tok; j < i; j++) { 361 | if (toks->tokens[j].token == '(') 362 | has_comments = 1; 363 | if (toks->tokens[j].token == '"') 364 | has_strings = 1; 365 | } 366 | if (has_comments && !has_strings) { 367 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, 368 | i - start_tok, 369 | PHP_RFC822_RECOMBINE_SPACE_ATOMS | PHP_RFC822_RECOMBINE_COMMENTS_ONLY 370 | | PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES 371 | ); 372 | } else if (has_strings) { 373 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, i - start_tok, 374 | PHP_RFC822_RECOMBINE_SPACE_ATOMS); 375 | 376 | } 377 | 378 | } 379 | 380 | } 381 | 382 | if (i < toks->ntokens && toks->tokens[i].token == '<') { 383 | int j; 384 | /* RFC822: route-addr = "<" [route] addr-spec ">" */ 385 | /* look for the closing '>' and recombine as the address part */ 386 | 387 | for (j = i; j < toks->ntokens && toks->tokens[j].token != '>'; j++) 388 | ; 389 | 390 | if (addrs) { 391 | a_start = i; 392 | a_count = j-i; 393 | /* if an address is enclosed in <>, leave them out of the the 394 | * address value that we return */ 395 | if (toks->tokens[a_start].token == '<') { 396 | a_start++; 397 | a_count--; 398 | } 399 | address_value = php_rfc822_recombine_tokens(toks, a_start, a_count, 400 | PHP_RFC822_RECOMBINE_SPACE_ATOMS| 401 | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS| 402 | PHP_RFC822_RECOMBINE_INCLUDE_QUOTES); 403 | } 404 | 405 | start_tok = ++j; 406 | } else { 407 | /* RFC822: addr-spec = local-part "@" domain */ 408 | if (addrs) { 409 | a_start = start_tok; 410 | a_count = i - start_tok; 411 | /* if an address is enclosed in <>, leave them out of the the 412 | * address value that we return */ 413 | if (toks->tokens[a_start].token == '<') { 414 | a_start++; 415 | a_count--; 416 | } 417 | 418 | address_value = php_rfc822_recombine_tokens(toks, a_start, a_count, 419 | PHP_RFC822_RECOMBINE_SPACE_ATOMS| 420 | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS| 421 | PHP_RFC822_RECOMBINE_INCLUDE_QUOTES); 422 | } 423 | start_tok = i; 424 | } 425 | 426 | if (addrs && address_value) { 427 | 428 | /* if no display name has been given, use the address */ 429 | if (addrs->addrs[iaddr].name == NULL) { 430 | addrs->addrs[iaddr].name = estrdup(address_value); 431 | } 432 | 433 | if (in_group) { 434 | if (group_addrs.len) 435 | smart_string_appendl(&group_addrs, ",", 1); 436 | smart_string_appends(&group_addrs, address_value); 437 | efree(address_value); 438 | } else { 439 | addrs->addrs[iaddr].address = address_value; 440 | } 441 | address_value = NULL; 442 | } 443 | 444 | if (!in_group) { 445 | iaddr++; 446 | goto address; 447 | } 448 | /* still dealing with a group. If we find a ";", that's the end of the group */ 449 | if ((start_tok < toks->ntokens && toks->tokens[start_tok].token == ';') || start_tok == toks->ntokens) { 450 | /* end of group */ 451 | 452 | if (addrs) { 453 | smart_string_appendl(&group_addrs, ";", 1); 454 | smart_string_0(&group_addrs); 455 | addrs->addrs[iaddr].address = estrdup(group_addrs.c); 456 | group_addrs.len = 0; 457 | 458 | STR_FREE(addrs->addrs[iaddr].name); 459 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, group_lbl_start, 460 | group_lbl_end - group_lbl_start, 461 | PHP_RFC822_RECOMBINE_SPACE_ATOMS); 462 | 463 | addrs->addrs[iaddr].is_group = 1; 464 | } 465 | 466 | iaddr++; 467 | in_group = 0; 468 | start_tok++; 469 | goto address; 470 | } 471 | /* look for more mailboxes in this group */ 472 | goto mailbox; 473 | } 474 | 475 | PHP_MAILPARSE_API php_rfc822_addresses_t *php_rfc822_parse_address_tokens(php_rfc822_tokenized_t *toks) 476 | { 477 | php_rfc822_addresses_t *addrs = ecalloc(1, sizeof(php_rfc822_addresses_t)); 478 | 479 | parse_address_tokens(toks, NULL, &addrs->naddrs); 480 | if (addrs->naddrs) { 481 | addrs->addrs = ecalloc(addrs->naddrs, sizeof(php_rfc822_address_t)); 482 | parse_address_tokens(toks, addrs, &addrs->naddrs); 483 | } 484 | 485 | return addrs; 486 | } 487 | 488 | PHP_MAILPARSE_API void php_rfc822_free_addresses(php_rfc822_addresses_t *addrs) 489 | { 490 | int i; 491 | for (i = 0; i < addrs->naddrs; i++) { 492 | if (addrs->addrs[i].name) 493 | STR_FREE(addrs->addrs[i].name); 494 | STR_FREE(addrs->addrs[i].address); 495 | } 496 | if (addrs->addrs) 497 | efree(addrs->addrs); 498 | efree(addrs); 499 | } 500 | void php_rfc822_print_addresses(php_rfc822_addresses_t *addrs) 501 | { 502 | int i; 503 | printf("printing addresses %p\n", addrs); fflush(stdout); 504 | for (i = 0; i < addrs->naddrs; i++) { 505 | printf("addr %d: name=%s address=%s\n", i, addrs->addrs[i].name, addrs->addrs[i].address); 506 | } 507 | } 508 | 509 | 510 | void php_rfc822_print_tokens(php_rfc822_tokenized_t *toks) 511 | { 512 | int i; 513 | for (i = 0; i < toks->ntokens; i++) { 514 | printf("token %d: token=%d/%c len=%d value=%s\n", i, toks->tokens[i].token, toks->tokens[i].token, 515 | toks->tokens[i].valuelen, toks->tokens[i].value); 516 | } 517 | } 518 | 519 | PHP_FUNCTION(mailparse_test) 520 | { 521 | char *header; 522 | size_t header_len; 523 | php_rfc822_tokenized_t *toks; 524 | php_rfc822_addresses_t *addrs; 525 | 526 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &header, &header_len) == FAILURE) { 527 | RETURN_FALSE; 528 | } 529 | 530 | 531 | #if 0 532 | { 533 | struct rfc822t *t = mailparse_rfc822t_alloc(header, NULL); 534 | for (i = 0; i < t->ntokens; i++) { 535 | printf("token %d: token=%d/%c len=%d value=%s\n", i, t->tokens[i].token, t->tokens[i].token, 536 | t->tokens[i].len, t->tokens[i].ptr); 537 | 538 | } 539 | mailparse_rfc822t_free(t); 540 | 541 | printf("--- and now:\n"); 542 | } 543 | #endif 544 | 545 | toks = php_mailparse_rfc822_tokenize((const char*)header, 1); 546 | php_rfc822_print_tokens(toks); 547 | 548 | addrs = php_rfc822_parse_address_tokens(toks); 549 | php_rfc822_print_addresses(addrs); 550 | php_rfc822_free_addresses(addrs); 551 | 552 | php_rfc822_tokenize_free(toks); 553 | } 554 | 555 | /* 556 | * Local variables: 557 | * tab-width: 4 558 | * c-basic-offset: 4 559 | * End: 560 | * vim600: sw=4 ts=4 fdm=marker syn=c 561 | * vim<600: sw=4 ts=4 562 | */ 563 | -------------------------------------------------------------------------------- /php_mailparse_rfc822.c: -------------------------------------------------------------------------------- 1 | /* Generated by re2c 3.1 on Wed Jul 30 12:30:59 2025 */ 2 | #line 1 "/home/php/git/mailparse/php_mailparse_rfc822.re" 3 | /* 4 | +----------------------------------------------------------------------+ 5 | | Copyright (c) The PHP Group | 6 | +----------------------------------------------------------------------+ 7 | | This source file is subject to version 3.01 of the PHP license, | 8 | | that is bundled with this package in the file LICENSE, and is | 9 | | available at through the world-wide-web at | 10 | | http://www.php.net/license/3_01.txt. | 11 | | If you did not receive a copy of the PHP license and are unable to | 12 | | obtain it through the world-wide-web, please send a note to | 13 | | license@php.net so we can mail you a copy immediately. | 14 | +----------------------------------------------------------------------+ 15 | | Author: Wez Furlong | 16 | +----------------------------------------------------------------------+ 17 | */ 18 | 19 | #include "php.h" 20 | #include "php_mailparse.h" 21 | #include "php_mailparse_rfc822.h" 22 | #include "ext/standard/php_string.h" 23 | #include "Zend/zend_smart_string.h" 24 | #line 36 "/home/php/git/mailparse/php_mailparse_rfc822.re" 25 | 26 | 27 | #line 45 "/home/php/git/mailparse/php_mailparse_rfc822.re" 28 | 29 | 30 | #define YYFILL(n) if (YYCURSOR == YYLIMIT) goto stop 31 | #define YYCTYPE unsigned char 32 | #define YYCURSOR p 33 | #define YYLIMIT q 34 | #define YYMARKER r 35 | 36 | #define DEBUG_RFC822_SCANNER 0 37 | 38 | #if DEBUG_RFC822_SCANNER 39 | # define DBG_STATE(lbl) printf(lbl " %d:%c %d:%c\n", *YYCURSOR, *YYCURSOR, *start, *start) 40 | #else 41 | # define DBG_STATE(lbl) 42 | #endif 43 | 44 | #define ADD_ATOM_TOKEN() do { if (tokens) { tokens->token = *start; tokens->value = start; tokens->valuelen = 1; tokens++; } ++*ntokens; } while (0) 45 | #define REPORT_ERR(msg) do { if (report_errors) zend_error(E_WARNING, "input is not rfc822 compliant: %s", msg); } while(0) 46 | #define STR_FREE(ptr) if (ptr) { efree(ptr); } 47 | /* Tokenize a header. tokens may be NULL, in which case the number of tokens are 48 | counted, allowing the caller to allocate enough room */ 49 | static void tokenize(const char *header, php_rfc822_token_t *tokens, int *ntokens, int report_errors) 50 | { 51 | register const char *p, *q, *start; 52 | int in_bracket = 0; 53 | 54 | /* NB: parser assumes that the header has two bytes of NUL terminator */ 55 | 56 | YYCURSOR = header; 57 | YYLIMIT = YYCURSOR + strlen(YYCURSOR) + 1; 58 | 59 | *ntokens = 0; 60 | 61 | state_ground: 62 | start = YYCURSOR; 63 | 64 | #if DEBUG_RFC822_SCANNER 65 | printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSOR, *YYCURSOR, YYCURSOR); 66 | #endif 67 | 68 | 69 | #line 70 "" 70 | { 71 | YYCTYPE yych; 72 | static const unsigned char yybm[] = { 73 | 0, 160, 160, 160, 160, 160, 160, 160, 74 | 160, 192, 192, 160, 160, 192, 160, 160, 75 | 160, 160, 160, 160, 160, 160, 160, 160, 76 | 160, 160, 160, 160, 160, 160, 160, 160, 77 | 192, 128, 0, 160, 160, 128, 160, 160, 78 | 128, 128, 160, 160, 128, 160, 128, 128, 79 | 160, 160, 160, 160, 160, 160, 160, 160, 80 | 160, 160, 128, 128, 128, 128, 128, 128, 81 | 128, 160, 160, 160, 160, 160, 160, 160, 82 | 160, 160, 160, 160, 160, 160, 160, 160, 83 | 160, 160, 160, 160, 160, 160, 160, 160, 84 | 160, 160, 160, 128, 160, 128, 160, 160, 85 | 160, 160, 160, 160, 160, 160, 160, 160, 86 | 160, 160, 160, 160, 160, 160, 160, 160, 87 | 160, 160, 160, 160, 160, 160, 160, 160, 88 | 160, 160, 160, 160, 160, 160, 160, 160, 89 | 160, 160, 160, 160, 160, 160, 160, 160, 90 | 160, 160, 160, 160, 160, 160, 160, 160, 91 | 160, 160, 160, 160, 160, 160, 160, 160, 92 | 160, 160, 160, 160, 160, 160, 160, 160, 93 | 160, 160, 160, 160, 160, 160, 160, 160, 94 | 160, 160, 160, 160, 160, 160, 160, 160, 95 | 160, 160, 160, 160, 160, 160, 160, 160, 96 | 160, 160, 160, 160, 160, 160, 160, 160, 97 | 160, 160, 160, 160, 160, 160, 160, 160, 98 | 160, 160, 160, 160, 160, 160, 160, 160, 99 | 160, 160, 160, 160, 160, 160, 160, 160, 100 | 160, 160, 160, 160, 160, 160, 160, 160, 101 | 160, 160, 160, 160, 160, 160, 160, 160, 102 | 160, 160, 160, 160, 160, 160, 160, 160, 103 | 160, 160, 160, 160, 160, 160, 160, 160, 104 | 160, 160, 160, 160, 160, 160, 160, 160, 105 | }; 106 | if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); 107 | yych = *YYCURSOR; 108 | if (yybm[0+yych] & 64) { 109 | goto yy3; 110 | } 111 | if (yych <= '-') { 112 | if (yych <= '%') { 113 | if (yych <= '!') { 114 | if (yych <= 0x00) goto yy1; 115 | if (yych <= 0x1F) goto yy2; 116 | goto yy4; 117 | } else { 118 | if (yych <= '"') goto yy5; 119 | if (yych <= '$') goto yy2; 120 | goto yy4; 121 | } 122 | } else { 123 | if (yych <= ')') { 124 | if (yych <= '\'') goto yy2; 125 | if (yych <= '(') goto yy6; 126 | goto yy7; 127 | } else { 128 | if (yych == ',') goto yy4; 129 | goto yy2; 130 | } 131 | } 132 | } else { 133 | if (yych <= '>') { 134 | if (yych <= ';') { 135 | if (yych <= '/') goto yy4; 136 | if (yych <= '9') goto yy2; 137 | goto yy4; 138 | } else { 139 | if (yych <= '<') goto yy9; 140 | if (yych <= '=') goto yy4; 141 | goto yy10; 142 | } 143 | } else { 144 | if (yych <= '[') { 145 | if (yych <= '@') goto yy4; 146 | if (yych <= 'Z') goto yy2; 147 | goto yy4; 148 | } else { 149 | if (yych <= '\\') goto yy11; 150 | if (yych <= ']') goto yy4; 151 | goto yy2; 152 | } 153 | } 154 | } 155 | yy1: 156 | ++YYCURSOR; 157 | #line 86 "/home/php/git/mailparse/php_mailparse_rfc822.re" 158 | { goto stop; } 159 | #line 160 "" 160 | yy2: 161 | ++YYCURSOR; 162 | if (YYLIMIT <= YYCURSOR) YYFILL(1); 163 | yych = *YYCURSOR; 164 | if (yybm[0+yych] & 32) { 165 | goto yy2; 166 | } 167 | #line 140 "/home/php/git/mailparse/php_mailparse_rfc822.re" 168 | { DBG_STATE("ANY"); 169 | if (tokens) { 170 | tokens->token = 0; 171 | tokens->valuelen = YYCURSOR - start; 172 | tokens->value = start; 173 | tokens++; 174 | } 175 | ++*ntokens; 176 | goto state_ground; 177 | } 178 | #line 179 "" 179 | yy3: 180 | ++YYCURSOR; 181 | if (YYLIMIT <= YYCURSOR) YYFILL(1); 182 | yych = *YYCURSOR; 183 | if (yybm[0+yych] & 64) { 184 | goto yy3; 185 | } 186 | #line 87 "/home/php/git/mailparse/php_mailparse_rfc822.re" 187 | { DBG_STATE("SPACE"); goto state_ground; } 188 | #line 189 "" 189 | yy4: 190 | ++YYCURSOR; 191 | #line 139 "/home/php/git/mailparse/php_mailparse_rfc822.re" 192 | { DBG_STATE("ATOM"); ADD_ATOM_TOKEN(); goto state_ground; } 193 | #line 194 "" 194 | yy5: 195 | ++YYCURSOR; 196 | if (YYLIMIT <= YYCURSOR) YYFILL(1); 197 | yych = *YYCURSOR; 198 | if (yybm[0+yych] & 128) { 199 | goto yy5; 200 | } 201 | if (yych >= 0x01) goto yy12; 202 | yy6: 203 | ++YYCURSOR; 204 | #line 89 "/home/php/git/mailparse/php_mailparse_rfc822.re" 205 | { DBG_STATE("START COMMENT"); 206 | if (tokens) { 207 | tokens->token = '('; 208 | tokens->value = start; 209 | tokens->valuelen = 0; 210 | } 211 | goto state_comment; 212 | } 213 | #line 214 "" 214 | yy7: 215 | ++YYCURSOR; 216 | yy8: 217 | #line 88 "/home/php/git/mailparse/php_mailparse_rfc822.re" 218 | { REPORT_ERR("token not valid in ground state"); goto state_ground; } 219 | #line 220 "" 220 | yy9: 221 | yych = *++YYCURSOR; 222 | if (yych == '>') goto yy13; 223 | #line 121 "/home/php/git/mailparse/php_mailparse_rfc822.re" 224 | { DBG_STATE("LANGLE"); 225 | if (in_bracket) { 226 | REPORT_ERR("already in < bracket"); 227 | goto state_ground; 228 | } 229 | in_bracket = 1; 230 | ADD_ATOM_TOKEN(); 231 | goto state_ground; 232 | } 233 | #line 234 "" 234 | yy10: 235 | ++YYCURSOR; 236 | #line 130 "/home/php/git/mailparse/php_mailparse_rfc822.re" 237 | { DBG_STATE("RANGLE"); 238 | if (!in_bracket) { 239 | REPORT_ERR("not in < bracket"); 240 | goto state_ground; 241 | } 242 | in_bracket = 0; 243 | ADD_ATOM_TOKEN(); 244 | goto state_ground; 245 | } 246 | #line 247 "" 247 | yy11: 248 | yych = *++YYCURSOR; 249 | if (yybm[0+yych] & 32) { 250 | goto yy2; 251 | } 252 | goto yy8; 253 | yy12: 254 | ++YYCURSOR; 255 | #line 97 "/home/php/git/mailparse/php_mailparse_rfc822.re" 256 | { DBG_STATE("QUOTE STRING"); 257 | if (tokens) { 258 | tokens->token = '"'; 259 | tokens->value = start + 1; 260 | tokens->valuelen = YYCURSOR - start - 2; 261 | tokens++; 262 | } 263 | ++*ntokens; 264 | 265 | goto state_ground; 266 | } 267 | #line 268 "" 268 | yy13: 269 | ++YYCURSOR; 270 | #line 108 "/home/php/git/mailparse/php_mailparse_rfc822.re" 271 | { DBG_STATE("NULL <>"); 272 | ADD_ATOM_TOKEN(); 273 | if (tokens) { 274 | tokens->token = 0; 275 | tokens->value = ""; 276 | tokens->valuelen = 0; 277 | tokens++; 278 | } 279 | ++*ntokens; 280 | start++; 281 | ADD_ATOM_TOKEN(); 282 | goto state_ground; 283 | } 284 | #line 285 "" 285 | } 286 | #line 150 "/home/php/git/mailparse/php_mailparse_rfc822.re" 287 | 288 | 289 | state_comment: 290 | { 291 | int comment_depth = 1; 292 | while (1) { 293 | if (*YYCURSOR == 0) { 294 | /* unexpected end of header */ 295 | REPORT_ERR("unexpected end of header"); 296 | /* fake a quoted string for this last token */ 297 | if (tokens) 298 | tokens->token = '"'; 299 | ++*ntokens; 300 | return; 301 | } else if (*YYCURSOR == '(') { 302 | comment_depth++; 303 | } else if (*YYCURSOR == ')' && --comment_depth == 0) { 304 | /* end of nested comment sequence */ 305 | YYCURSOR++; 306 | if (tokens) 307 | tokens->valuelen++; 308 | break; 309 | } else if (*YYCURSOR == '\\' && YYCURSOR[1]) { 310 | YYCURSOR++; 311 | if (tokens) 312 | tokens->valuelen++; 313 | } 314 | YYCURSOR++; 315 | } 316 | if (tokens) { 317 | tokens->valuelen = YYCURSOR - tokens->value; 318 | tokens++; 319 | } 320 | ++*ntokens; 321 | goto state_ground; 322 | } 323 | stop: 324 | #if DEBUG_RFC822_SCANNER 325 | printf("STOPing parser ntokens=%d YYCURSOR=%p YYLIMIT=%p start=%p cursor=[%d] %s start=%s\n", *ntokens, 326 | YYCURSOR, YYLIMIT, start, *YYCURSOR, YYCURSOR, start); 327 | #else 328 | ; 329 | #endif 330 | } 331 | 332 | PHP_MAILPARSE_API php_rfc822_tokenized_t *php_mailparse_rfc822_tokenize(const char *header, int report_errors) 333 | { 334 | php_rfc822_tokenized_t *toks = ecalloc(1, sizeof(php_rfc822_tokenized_t)); 335 | int len = strlen(header); 336 | 337 | toks->buffer = emalloc(len + 2); 338 | strcpy(toks->buffer, header); 339 | toks->buffer[len] = 0; 340 | toks->buffer[len+1] = 0; /* mini hack: the parser sometimes relies in this */ 341 | 342 | tokenize(toks->buffer, NULL, &toks->ntokens, report_errors); 343 | toks->tokens = toks->ntokens ? ecalloc(toks->ntokens, sizeof(php_rfc822_token_t)) : NULL; 344 | tokenize(toks->buffer, toks->tokens, &toks->ntokens, report_errors); 345 | return toks; 346 | } 347 | 348 | PHP_MAILPARSE_API void php_rfc822_tokenize_free(php_rfc822_tokenized_t *toks) 349 | { 350 | if (toks->tokens) 351 | efree(toks->tokens); 352 | efree(toks->buffer); 353 | efree(toks); 354 | } 355 | 356 | PHP_MAILPARSE_API char *php_rfc822_recombine_tokens(php_rfc822_tokenized_t *toks, int first_token, int n_tokens, int flags) 357 | { 358 | char *ret = NULL; 359 | int i, upper, last_was_atom = 0, this_is_atom = 0, tok_equiv; 360 | size_t len = 1; /* for the NUL terminator */ 361 | 362 | upper = first_token + n_tokens; 363 | if (upper > toks->ntokens) 364 | upper = toks->ntokens; 365 | 366 | for (i = first_token; i < upper; i++, last_was_atom = this_is_atom) { 367 | 368 | tok_equiv = toks->tokens[i].token; 369 | if (tok_equiv == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES) 370 | tok_equiv = '"'; 371 | 372 | if (flags & PHP_RFC822_RECOMBINE_IGNORE_COMMENTS && tok_equiv == '(') 373 | continue; 374 | if (flags & PHP_RFC822_RECOMBINE_COMMENTS_ONLY && tok_equiv != '(' && !(toks->tokens[i].token == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES)) 375 | continue; 376 | 377 | this_is_atom = php_rfc822_token_is_atom(toks->tokens[i].token); 378 | if (this_is_atom && last_was_atom && flags & PHP_RFC822_RECOMBINE_SPACE_ATOMS) 379 | len++; /* allow room for a space */ 380 | 381 | if (flags & PHP_RFC822_RECOMBINE_INCLUDE_QUOTES && tok_equiv == '"') 382 | len += 2; 383 | 384 | len += toks->tokens[i].valuelen; 385 | } 386 | 387 | last_was_atom = this_is_atom = 0; 388 | 389 | ret = emalloc(len); 390 | 391 | for (i = first_token, len = 0; i < upper; i++, last_was_atom = this_is_atom) { 392 | const char *tokvalue; 393 | int toklen; 394 | 395 | tok_equiv = toks->tokens[i].token; 396 | if (tok_equiv == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES) 397 | tok_equiv = '"'; 398 | 399 | if (flags & PHP_RFC822_RECOMBINE_IGNORE_COMMENTS && tok_equiv == '(') 400 | continue; 401 | if (flags & PHP_RFC822_RECOMBINE_COMMENTS_ONLY && tok_equiv != '(' && !(toks->tokens[i].token == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES)) 402 | continue; 403 | 404 | tokvalue = toks->tokens[i].value; 405 | toklen = toks->tokens[i].valuelen; 406 | 407 | this_is_atom = php_rfc822_token_is_atom(toks->tokens[i].token); 408 | if (this_is_atom && last_was_atom && flags & PHP_RFC822_RECOMBINE_SPACE_ATOMS) { 409 | ret[len] = ' '; 410 | len++; 411 | } 412 | if (flags & PHP_RFC822_RECOMBINE_INCLUDE_QUOTES && tok_equiv == '"') 413 | ret[len++] = '"'; 414 | 415 | if (toks->tokens[i].token == '(' && flags & PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES) { 416 | /* don't include ( and ) in the output string */ 417 | tokvalue++; 418 | toklen -= 2; 419 | } 420 | 421 | memcpy(ret + len, tokvalue, toklen); 422 | len += toklen; 423 | 424 | if (flags & PHP_RFC822_RECOMBINE_INCLUDE_QUOTES && tok_equiv == '"') 425 | ret[len++] = '"'; 426 | 427 | } 428 | ret[len] = 0; 429 | 430 | if (flags & PHP_RFC822_RECOMBINE_STRTOLOWER) 431 | zend_str_tolower(ret, len); 432 | 433 | return ret; 434 | } 435 | 436 | static void parse_address_tokens(php_rfc822_tokenized_t *toks, 437 | php_rfc822_addresses_t *addrs, int *naddrs) 438 | { 439 | int start_tok = 0, iaddr = 0, i, in_group = 0, group_lbl_start = 0, group_lbl_end = 0; 440 | int a_start, a_count; /* position and count for address part of a name */ 441 | smart_string group_addrs = { 0, }; 442 | char *address_value = NULL; 443 | 444 | address: /* mailbox / group */ 445 | 446 | if (start_tok >= toks->ntokens) { 447 | /* the end */ 448 | *naddrs = iaddr; 449 | smart_string_free(&group_addrs); 450 | return; 451 | } 452 | 453 | /* look ahead to determine if we are dealing with a group */ 454 | for (i = start_tok; i < toks->ntokens; i++) 455 | if (toks->tokens[i].token != 0 && toks->tokens[i].token != '"') 456 | break; 457 | 458 | if (i < toks->ntokens && toks->tokens[i].token == ':') { 459 | /* it's a group */ 460 | in_group = 1; 461 | group_lbl_start = start_tok; 462 | group_lbl_end = i; 463 | 464 | /* we want the address for the group to include the leading ":" and the trailing ";" */ 465 | start_tok = i; 466 | } 467 | 468 | mailbox: /* addr-spec / phrase route-addr */ 469 | if (start_tok >= toks->ntokens) { 470 | /* the end */ 471 | *naddrs = iaddr; 472 | smart_string_free(&group_addrs); 473 | return; 474 | } 475 | 476 | /* skip spurious commas */ 477 | while (start_tok < toks->ntokens && (toks->tokens[start_tok].token == ',' 478 | || toks->tokens[start_tok].token == ';')) 479 | start_tok++; 480 | 481 | /* look ahead: if we find a '<' before we find an '@', we are dealing with 482 | a route-addr, otherwise we have an addr-spec */ 483 | for (i = start_tok; i < toks->ntokens && toks->tokens[i].token != ';' 484 | && toks->tokens[i].token != ',' && toks->tokens[i].token != '<'; i++) 485 | ; 486 | 487 | /* the stuff from start_tok to i - 1 is the display name part */ 488 | if (addrs && !in_group && i - start_tok > 0) { 489 | int j, has_comments = 0, has_strings = 0; 490 | switch(i < toks->ntokens ? toks->tokens[i].token : 0) { 491 | case ';': case ',': case '<': 492 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, i - start_tok, 493 | PHP_RFC822_RECOMBINE_SPACE_ATOMS); 494 | break; 495 | default: 496 | /* it's only the display name if there are quoted strings or comments in there */ 497 | for (j = start_tok; j < i; j++) { 498 | if (toks->tokens[j].token == '(') 499 | has_comments = 1; 500 | if (toks->tokens[j].token == '"') 501 | has_strings = 1; 502 | } 503 | if (has_comments && !has_strings) { 504 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, 505 | i - start_tok, 506 | PHP_RFC822_RECOMBINE_SPACE_ATOMS | PHP_RFC822_RECOMBINE_COMMENTS_ONLY 507 | | PHP_RFC822_RECOMBINE_COMMENTS_TO_QUOTES 508 | ); 509 | } else if (has_strings) { 510 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, i - start_tok, 511 | PHP_RFC822_RECOMBINE_SPACE_ATOMS); 512 | 513 | } 514 | 515 | } 516 | 517 | } 518 | 519 | if (i < toks->ntokens && toks->tokens[i].token == '<') { 520 | int j; 521 | /* RFC822: route-addr = "<" [route] addr-spec ">" */ 522 | /* look for the closing '>' and recombine as the address part */ 523 | 524 | for (j = i; j < toks->ntokens && toks->tokens[j].token != '>'; j++) 525 | ; 526 | 527 | if (addrs) { 528 | a_start = i; 529 | a_count = j-i; 530 | /* if an address is enclosed in <>, leave them out of the the 531 | * address value that we return */ 532 | if (toks->tokens[a_start].token == '<') { 533 | a_start++; 534 | a_count--; 535 | } 536 | address_value = php_rfc822_recombine_tokens(toks, a_start, a_count, 537 | PHP_RFC822_RECOMBINE_SPACE_ATOMS| 538 | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS| 539 | PHP_RFC822_RECOMBINE_INCLUDE_QUOTES); 540 | } 541 | 542 | start_tok = ++j; 543 | } else { 544 | /* RFC822: addr-spec = local-part "@" domain */ 545 | if (addrs) { 546 | a_start = start_tok; 547 | a_count = i - start_tok; 548 | /* if an address is enclosed in <>, leave them out of the the 549 | * address value that we return */ 550 | if (toks->tokens[a_start].token == '<') { 551 | a_start++; 552 | a_count--; 553 | } 554 | 555 | address_value = php_rfc822_recombine_tokens(toks, a_start, a_count, 556 | PHP_RFC822_RECOMBINE_SPACE_ATOMS| 557 | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS| 558 | PHP_RFC822_RECOMBINE_INCLUDE_QUOTES); 559 | } 560 | start_tok = i; 561 | } 562 | 563 | if (addrs && address_value) { 564 | 565 | /* if no display name has been given, use the address */ 566 | if (addrs->addrs[iaddr].name == NULL) { 567 | addrs->addrs[iaddr].name = estrdup(address_value); 568 | } 569 | 570 | if (in_group) { 571 | if (group_addrs.len) 572 | smart_string_appendl(&group_addrs, ",", 1); 573 | smart_string_appends(&group_addrs, address_value); 574 | efree(address_value); 575 | } else { 576 | addrs->addrs[iaddr].address = address_value; 577 | } 578 | address_value = NULL; 579 | } 580 | 581 | if (!in_group) { 582 | iaddr++; 583 | goto address; 584 | } 585 | /* still dealing with a group. If we find a ";", that's the end of the group */ 586 | if ((start_tok < toks->ntokens && toks->tokens[start_tok].token == ';') || start_tok == toks->ntokens) { 587 | /* end of group */ 588 | 589 | if (addrs) { 590 | smart_string_appendl(&group_addrs, ";", 1); 591 | smart_string_0(&group_addrs); 592 | addrs->addrs[iaddr].address = estrdup(group_addrs.c); 593 | group_addrs.len = 0; 594 | 595 | STR_FREE(addrs->addrs[iaddr].name); 596 | addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, group_lbl_start, 597 | group_lbl_end - group_lbl_start, 598 | PHP_RFC822_RECOMBINE_SPACE_ATOMS); 599 | 600 | addrs->addrs[iaddr].is_group = 1; 601 | } 602 | 603 | iaddr++; 604 | in_group = 0; 605 | start_tok++; 606 | goto address; 607 | } 608 | /* look for more mailboxes in this group */ 609 | goto mailbox; 610 | } 611 | 612 | PHP_MAILPARSE_API php_rfc822_addresses_t *php_rfc822_parse_address_tokens(php_rfc822_tokenized_t *toks) 613 | { 614 | php_rfc822_addresses_t *addrs = ecalloc(1, sizeof(php_rfc822_addresses_t)); 615 | 616 | parse_address_tokens(toks, NULL, &addrs->naddrs); 617 | if (addrs->naddrs) { 618 | addrs->addrs = ecalloc(addrs->naddrs, sizeof(php_rfc822_address_t)); 619 | parse_address_tokens(toks, addrs, &addrs->naddrs); 620 | } 621 | 622 | return addrs; 623 | } 624 | 625 | PHP_MAILPARSE_API void php_rfc822_free_addresses(php_rfc822_addresses_t *addrs) 626 | { 627 | int i; 628 | for (i = 0; i < addrs->naddrs; i++) { 629 | if (addrs->addrs[i].name) 630 | STR_FREE(addrs->addrs[i].name); 631 | STR_FREE(addrs->addrs[i].address); 632 | } 633 | if (addrs->addrs) 634 | efree(addrs->addrs); 635 | efree(addrs); 636 | } 637 | void php_rfc822_print_addresses(php_rfc822_addresses_t *addrs) 638 | { 639 | int i; 640 | printf("printing addresses %p\n", addrs); fflush(stdout); 641 | for (i = 0; i < addrs->naddrs; i++) { 642 | printf("addr %d: name=%s address=%s\n", i, addrs->addrs[i].name, addrs->addrs[i].address); 643 | } 644 | } 645 | 646 | 647 | void php_rfc822_print_tokens(php_rfc822_tokenized_t *toks) 648 | { 649 | int i; 650 | for (i = 0; i < toks->ntokens; i++) { 651 | printf("token %d: token=%d/%c len=%d value=%s\n", i, toks->tokens[i].token, toks->tokens[i].token, 652 | toks->tokens[i].valuelen, toks->tokens[i].value); 653 | } 654 | } 655 | 656 | PHP_FUNCTION(mailparse_test) 657 | { 658 | char *header; 659 | size_t header_len; 660 | php_rfc822_tokenized_t *toks; 661 | php_rfc822_addresses_t *addrs; 662 | 663 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &header, &header_len) == FAILURE) { 664 | RETURN_FALSE; 665 | } 666 | 667 | 668 | #if 0 669 | { 670 | struct rfc822t *t = mailparse_rfc822t_alloc(header, NULL); 671 | for (i = 0; i < t->ntokens; i++) { 672 | printf("token %d: token=%d/%c len=%d value=%s\n", i, t->tokens[i].token, t->tokens[i].token, 673 | t->tokens[i].len, t->tokens[i].ptr); 674 | 675 | } 676 | mailparse_rfc822t_free(t); 677 | 678 | printf("--- and now:\n"); 679 | } 680 | #endif 681 | 682 | toks = php_mailparse_rfc822_tokenize((const char*)header, 1); 683 | php_rfc822_print_tokens(toks); 684 | 685 | addrs = php_rfc822_parse_address_tokens(toks); 686 | php_rfc822_print_addresses(addrs); 687 | php_rfc822_free_addresses(addrs); 688 | 689 | php_rfc822_tokenize_free(toks); 690 | } 691 | 692 | /* 693 | * Local variables: 694 | * tab-width: 4 695 | * c-basic-offset: 4 696 | * End: 697 | * vim600: sw=4 ts=4 fdm=marker syn=c 698 | * vim<600: sw=4 ts=4 699 | */ 700 | -------------------------------------------------------------------------------- /tests/testdata/multimedia-demo.exp: -------------------------------------------------------------------------------- 1 | Message: multimedia-demo 2 | 3 | Part 1 4 | body-line-count => int(25863) 5 | charset => string(8) "us-ascii" 6 | content-base => string(1) "/" 7 | content-boundary => string(11) "owatagusiam" 8 | content-type => string(15) "multipart/mixed" 9 | ending-pos => int(1819395) 10 | line-count => int(25876) 11 | starting-pos => int(0) 12 | starting-pos-body => int(390) 13 | transfer-encoding => string(4) "8bit" 14 | 15 | Part 1.1 16 | body-line-count => int(3) 17 | charset => string(8) "us-ascii" 18 | content-base => string(1) "/" 19 | content-description => string(11) "Explanation" 20 | content-type => string(10) "text/plain" 21 | ending-pos => int(650) 22 | line-count => int(6) 23 | starting-pos => int(404) 24 | starting-pos-body => int(463) 25 | transfer-encoding => string(4) "8bit" 26 | 27 | Part 1.2 28 | body-line-count => int(106) 29 | charset => string(8) "us-ascii" 30 | content-base => string(1) "/" 31 | content-description => string(14) "Rich Text demo" 32 | content-type => string(14) "message/rfc822" 33 | ending-pos => int(5532) 34 | line-count => int(109) 35 | starting-pos => int(665) 36 | starting-pos-body => int(731) 37 | transfer-encoding => string(4) "8bit" 38 | 39 | Part 1.2.1 40 | body-line-count => int(83) 41 | charset => string(8) "us-ascii" 42 | content-base => string(1) "/" 43 | content-boundary => string(55) "Interpart_Boundary_AdJn:mu0M2YtJKaFh9AdJn:mu0M2YtJKaFk=" 44 | content-type => string(21) "multipart/alternative" 45 | ending-pos => int(5532) 46 | line-count => int(106) 47 | starting-pos => int(731) 48 | starting-pos-body => int(1913) 49 | transfer-encoding => string(4) "8bit" 50 | 51 | Part 1.2.1.1 52 | body-line-count => int(16) 53 | charset => string(8) "us-ascii" 54 | content-base => string(1) "/" 55 | content-type => string(10) "text/plain" 56 | ending-pos => int(3249) 57 | line-count => int(17) 58 | starting-pos => int(2497) 59 | starting-pos-body => int(2498) 60 | transfer-encoding => string(4) "8bit" 61 | 62 | Part 1.2.1.2 63 | body-line-count => int(19) 64 | charset => string(8) "us-ascii" 65 | content-base => string(1) "/" 66 | content-boundary => string(57) "Alternative_Boundary_8dJn:mu0M2Yt5KaFZ8AdJn:mu0M2Yt1KaFdA" 67 | content-type => string(15) "multipart/mixed" 68 | ending-pos => int(4482) 69 | line-count => int(22) 70 | starting-pos => int(3308) 71 | starting-pos-body => int(3411) 72 | transfer-encoding => string(4) "8bit" 73 | 74 | Part 1.2.1.2.1 75 | body-line-count => int(13) 76 | charset => string(8) "us-ascii" 77 | content-base => string(1) "/" 78 | content-type => string(13) "text/richtext" 79 | ending-pos => int(4418) 80 | line-count => int(16) 81 | starting-pos => int(3471) 82 | starting-pos-body => int(3544) 83 | transfer-encoding => string(16) "quoted-printable" 84 | 85 | Part 1.2.1.3 86 | body-line-count => int(27) 87 | charset => string(8) "us-ascii" 88 | content-base => string(1) "/" 89 | content-type => string(24) "application/andrew-inset" 90 | ending-pos => int(5470) 91 | line-count => int(29) 92 | starting-pos => int(4540) 93 | starting-pos-body => int(4580) 94 | transfer-encoding => string(4) "8bit" 95 | 96 | Part 1.3 97 | body-line-count => int(7605) 98 | charset => string(8) "us-ascii" 99 | content-base => string(1) "/" 100 | content-description => string(15) "Voice Mail demo" 101 | content-type => string(14) "message/rfc822" 102 | ending-pos => int(560279) 103 | line-count => int(7608) 104 | starting-pos => int(5546) 105 | starting-pos-body => int(5613) 106 | transfer-encoding => string(4) "8bit" 107 | 108 | Part 1.3.1 109 | body-line-count => int(7586) 110 | charset => string(8) "us-ascii" 111 | content-base => string(1) "/" 112 | content-description => string(7) "Hi Mark" 113 | content-type => string(11) "audio/basic" 114 | ending-pos => int(560279) 115 | line-count => int(7605) 116 | starting-pos => int(5613) 117 | starting-pos-body => int(6556) 118 | transfer-encoding => string(6) "base64" 119 | 120 | Part 1.4 121 | body-line-count => int(465) 122 | charset => string(8) "us-ascii" 123 | content-base => string(1) "/" 124 | content-description => string(11) "Flint phone" 125 | content-type => string(11) "audio/basic" 126 | ending-pos => int(596156) 127 | line-count => int(469) 128 | starting-pos => int(560293) 129 | starting-pos-body => int(560387) 130 | transfer-encoding => string(6) "base64" 131 | 132 | Part 1.5 133 | body-line-count => int(23) 134 | charset => string(8) "us-ascii" 135 | content-base => string(1) "/" 136 | content-description => string(11) "MTR's photo" 137 | content-type => string(9) "image/pbm" 138 | ending-pos => int(598054) 139 | line-count => int(27) 140 | starting-pos => int(596171) 141 | starting-pos-body => int(596263) 142 | transfer-encoding => string(6) "base64" 143 | 144 | Part 1.6 145 | body-line-count => int(4565) 146 | charset => string(8) "us-ascii" 147 | content-base => string(1) "/" 148 | content-description => string(15) "Star Trek Party" 149 | content-type => string(14) "message/rfc822" 150 | ending-pos => int(776452) 151 | line-count => int(4568) 152 | starting-pos => int(598069) 153 | starting-pos-body => int(598136) 154 | transfer-encoding => string(4) "8bit" 155 | 156 | Part 1.6.1 157 | body-line-count => int(4532) 158 | charset => string(8) "us-ascii" 159 | content-base => string(1) "/" 160 | content-boundary => string(14) "Outermost_Trek" 161 | content-type => string(15) "multipart/mixed" 162 | ending-pos => int(776452) 163 | line-count => int(4565) 164 | starting-pos => int(598136) 165 | starting-pos-body => int(599956) 166 | transfer-encoding => string(4) "8bit" 167 | 168 | Part 1.6.1.1 169 | body-line-count => int(451) 170 | charset => string(8) "us-ascii" 171 | content-base => string(1) "/" 172 | content-boundary => string(28) "Where_No_One_Has_Gone_Before" 173 | content-type => string(15) "multipart/mixed" 174 | ending-pos => int(631998) 175 | line-count => int(453) 176 | starting-pos => int(599973) 177 | starting-pos-body => int(600042) 178 | transfer-encoding => string(4) "8bit" 179 | 180 | Part 1.6.1.1.1 181 | body-line-count => int(16) 182 | charset => string(8) "us-ascii" 183 | content-base => string(1) "/" 184 | content-type => string(10) "text/plain" 185 | ending-pos => int(600789) 186 | line-count => int(17) 187 | starting-pos => int(600073) 188 | starting-pos-body => int(600074) 189 | transfer-encoding => string(4) "8bit" 190 | 191 | Part 1.6.1.1.2 192 | body-line-count => int(426) 193 | charset => string(8) "us-ascii" 194 | content-base => string(1) "/" 195 | content-description => string(14) "He's dead, Jim" 196 | content-type => string(11) "audio/x-sun" 197 | ending-pos => int(631964) 198 | line-count => int(430) 199 | starting-pos => int(600821) 200 | starting-pos-body => int(600918) 201 | transfer-encoding => string(6) "base64" 202 | 203 | Part 1.6.1.2 204 | body-line-count => int(3418) 205 | charset => string(8) "us-ascii" 206 | content-base => string(1) "/" 207 | content-boundary => string(28) "Where_No_Man_Has_Gone_Before" 208 | content-type => string(15) "multipart/mixed" 209 | ending-pos => int(729141) 210 | line-count => int(3420) 211 | starting-pos => int(632015) 212 | starting-pos-body => int(632084) 213 | transfer-encoding => string(4) "8bit" 214 | 215 | Part 1.6.1.2.1 216 | body-line-count => int(352) 217 | charset => string(8) "us-ascii" 218 | content-base => string(1) "/" 219 | content-description => string(16) "Kirk/Spock/McCoy" 220 | content-type => string(9) "image/gif" 221 | ending-pos => int(657860) 222 | line-count => int(356) 223 | starting-pos => int(632115) 224 | starting-pos-body => int(632212) 225 | transfer-encoding => string(6) "base64" 226 | 227 | Part 1.6.1.2.2 228 | body-line-count => int(253) 229 | charset => string(8) "us-ascii" 230 | content-base => string(1) "/" 231 | content-description => string(25) "Star Trek Next Generation" 232 | content-type => string(9) "image/gif" 233 | ending-pos => int(676411) 234 | line-count => int(257) 235 | starting-pos => int(657892) 236 | starting-pos-body => int(657998) 237 | transfer-encoding => string(6) "base64" 238 | 239 | Part 1.6.1.2.3 240 | body-line-count => int(2436) 241 | charset => string(8) "us-ascii" 242 | content-base => string(1) "/" 243 | content-type => string(17) "application/x-be2" 244 | content-version => string(2) "12" 245 | ending-pos => int(720176) 246 | line-count => int(2438) 247 | starting-pos => int(676443) 248 | starting-pos-body => int(676487) 249 | transfer-encoding => string(4) "8bit" 250 | 251 | Part 1.6.1.2.4 252 | body-line-count => int(357) 253 | charset => string(8) "us-ascii" 254 | content-base => string(1) "/" 255 | content-type => string(22) "application/atomicmail" 256 | content-version => string(4) "1.12" 257 | ending-pos => int(729107) 258 | line-count => int(359) 259 | starting-pos => int(720208) 260 | starting-pos-body => int(720261) 261 | transfer-encoding => string(4) "8bit" 262 | 263 | Part 1.6.1.3 264 | body-line-count => int(647) 265 | charset => string(8) "us-ascii" 266 | content-base => string(1) "/" 267 | content-description => string(14) "Distress calls" 268 | content-type => string(11) "audio/x-sun" 269 | ending-pos => int(776430) 270 | line-count => int(651) 271 | starting-pos => int(729158) 272 | starting-pos-body => int(729255) 273 | transfer-encoding => string(6) "base64" 274 | 275 | Part 1.7 276 | body-line-count => int(483) 277 | charset => string(8) "us-ascii" 278 | content-base => string(1) "/" 279 | content-description => string(14) "Digitizer test" 280 | content-type => string(14) "message/rfc822" 281 | ending-pos => int(862163) 282 | line-count => int(486) 283 | starting-pos => int(776466) 284 | starting-pos-body => int(776532) 285 | transfer-encoding => string(4) "8bit" 286 | 287 | Part 1.7.1 288 | body-line-count => int(457) 289 | charset => string(8) "us-ascii" 290 | content-base => string(1) "/" 291 | content-boundary => string(24) "mail.sleepy.sau.144.8891" 292 | content-type => string(15) "multipart/mixed" 293 | ending-pos => int(862163) 294 | line-count => int(483) 295 | starting-pos => int(776532) 296 | starting-pos-body => int(777838) 297 | transfer-encoding => string(4) "8bit" 298 | 299 | Part 1.7.1.1 300 | body-line-count => int(0) 301 | charset => string(8) "us-ascii" 302 | content-base => string(1) "/" 303 | content-type => string(10) "text/plain" 304 | ending-pos => int(777887) 305 | line-count => int(1) 306 | starting-pos => int(777865) 307 | starting-pos-body => int(777866) 308 | transfer-encoding => string(4) "8bit" 309 | 310 | Part 1.7.1.2 311 | body-line-count => int(433) 312 | charset => string(8) "us-ascii" 313 | content-base => string(1) "/" 314 | content-description => string(12) "Bellcore mug" 315 | content-type => string(9) "image/pgm" 316 | ending-pos => int(861843) 317 | line-count => int(439) 318 | starting-pos => int(777915) 319 | starting-pos-body => int(778102) 320 | transfer-encoding => string(6) "base64" 321 | 322 | Part 1.7.1.3 323 | body-line-count => int(8) 324 | charset => string(8) "us-ascii" 325 | content-base => string(1) "/" 326 | content-type => string(10) "text/plain" 327 | ending-pos => int(862131) 328 | line-count => int(9) 329 | starting-pos => int(861871) 330 | starting-pos-body => int(861872) 331 | transfer-encoding => string(4) "8bit" 332 | 333 | Part 1.8 334 | body-line-count => int(431) 335 | charset => string(8) "us-ascii" 336 | content-base => string(1) "/" 337 | content-description => string(12) "More Imagery" 338 | content-type => string(14) "message/rfc822" 339 | ending-pos => int(936281) 340 | line-count => int(434) 341 | starting-pos => int(862177) 342 | starting-pos-body => int(862241) 343 | transfer-encoding => string(4) "8bit" 344 | 345 | Part 1.8.1 346 | body-line-count => int(406) 347 | charset => string(8) "us-ascii" 348 | content-base => string(1) "/" 349 | content-boundary => string(23) "mail.sleepy.sau.158.532" 350 | content-type => string(15) "multipart/mixed" 351 | ending-pos => int(936281) 352 | line-count => int(431) 353 | starting-pos => int(862241) 354 | starting-pos-body => int(863504) 355 | transfer-encoding => string(4) "8bit" 356 | 357 | Part 1.8.1.1 358 | body-line-count => int(26) 359 | charset => string(8) "us-ascii" 360 | content-base => string(1) "/" 361 | content-type => string(10) "text/plain" 362 | ending-pos => int(864751) 363 | line-count => int(27) 364 | starting-pos => int(863530) 365 | starting-pos-body => int(863531) 366 | transfer-encoding => string(4) "8bit" 367 | 368 | Part 1.8.1.2 369 | body-line-count => int(369) 370 | charset => string(8) "us-ascii" 371 | content-base => string(1) "/" 372 | content-description => string(23) "Mail architecture slide" 373 | content-type => string(9) "image/pbm" 374 | ending-pos => int(936251) 375 | line-count => int(374) 376 | starting-pos => int(864778) 377 | starting-pos-body => int(864934) 378 | transfer-encoding => string(6) "base64" 379 | 380 | Part 1.9 381 | body-line-count => int(6438) 382 | charset => string(8) "us-ascii" 383 | content-base => string(1) "/" 384 | content-description => string(15) "PostScript demo" 385 | content-type => string(14) "message/rfc822" 386 | ending-pos => int(1327933) 387 | line-count => int(6441) 388 | starting-pos => int(936295) 389 | starting-pos-body => int(936362) 390 | transfer-encoding => string(4) "8bit" 391 | 392 | Part 1.9.1 393 | body-line-count => int(6421) 394 | charset => string(8) "us-ascii" 395 | content-base => string(1) "/" 396 | content-description => string(14) "Captain Picard" 397 | content-type => string(22) "application/postscript" 398 | ending-pos => int(1327933) 399 | line-count => int(6438) 400 | starting-pos => int(936362) 401 | starting-pos-body => int(937199) 402 | transfer-encoding => string(4) "8bit" 403 | 404 | Part 1.10 405 | body-line-count => int(1015) 406 | charset => string(8) "us-ascii" 407 | content-base => string(1) "/" 408 | content-description => string(21) "Quoted-Printable test" 409 | content-type => string(9) "image/gif" 410 | ending-pos => int(1405346) 411 | line-count => int(1019) 412 | starting-pos => int(1327947) 413 | starting-pos-body => int(1328059) 414 | transfer-encoding => string(16) "quoted-printable" 415 | 416 | Part 1.11 417 | body-line-count => int(1382) 418 | charset => string(8) "us-ascii" 419 | content-base => string(1) "/" 420 | content-description => string(19) "q-p vs. base64 test" 421 | content-type => string(14) "message/rfc822" 422 | ending-pos => int(1507736) 423 | line-count => int(1385) 424 | starting-pos => int(1405361) 425 | starting-pos-body => int(1405432) 426 | transfer-encoding => string(4) "8bit" 427 | 428 | Part 1.11.1 429 | body-line-count => int(1368) 430 | charset => string(8) "us-ascii" 431 | content-base => string(1) "/" 432 | content-boundary => string(8) "hal_9000" 433 | content-type => string(15) "multipart/mixed" 434 | ending-pos => int(1507736) 435 | line-count => int(1382) 436 | starting-pos => int(1405432) 437 | starting-pos-body => int(1406106) 438 | transfer-encoding => string(4) "8bit" 439 | 440 | Part 1.11.1.1 441 | body-line-count => int(807) 442 | charset => string(8) "us-ascii" 443 | content-base => string(1) "/" 444 | content-description => string(21) "I'm sorry, Dave (q-p)" 445 | content-type => string(11) "audio/basic" 446 | ending-pos => int(1467518) 447 | line-count => int(811) 448 | starting-pos => int(1406117) 449 | starting-pos-body => int(1406231) 450 | transfer-encoding => string(16) "quoted-printable" 451 | 452 | Part 1.11.1.2 453 | body-line-count => int(549) 454 | charset => string(8) "us-ascii" 455 | content-base => string(1) "/" 456 | content-description => string(24) "I'm sorry, Dave (BASE64)" 457 | content-type => string(11) "audio/basic" 458 | ending-pos => int(1507722) 459 | line-count => int(553) 460 | starting-pos => int(1467530) 461 | starting-pos-body => int(1467637) 462 | transfer-encoding => string(6) "base64" 463 | 464 | Part 1.12 465 | body-line-count => int(3282) 466 | charset => string(8) "us-ascii" 467 | content-base => string(1) "/" 468 | content-description => string(22) "Multiple encapsulation" 469 | content-type => string(14) "message/rfc822" 470 | ending-pos => int(1819378) 471 | line-count => int(3285) 472 | starting-pos => int(1507750) 473 | starting-pos-body => int(1507824) 474 | transfer-encoding => string(4) "8bit" 475 | 476 | Part 1.12.1 477 | body-line-count => int(3271) 478 | charset => string(8) "us-ascii" 479 | content-base => string(1) "/" 480 | content-boundary => string(36) "16819560-2078917053-688350843:#11603" 481 | content-type => string(15) "multipart/mixed" 482 | ending-pos => int(1819378) 483 | line-count => int(3282) 484 | starting-pos => int(1507824) 485 | starting-pos-body => int(1508362) 486 | transfer-encoding => string(4) "8bit" 487 | 488 | Part 1.12.1.1 489 | body-line-count => int(861) 490 | charset => string(8) "us-ascii" 491 | content-base => string(1) "/" 492 | content-description => string(14) "The Simpsons!!" 493 | content-type => string(22) "application/postscript" 494 | ending-pos => int(1560994) 495 | line-count => int(865) 496 | starting-pos => int(1508401) 497 | starting-pos-body => int(1508509) 498 | transfer-encoding => string(6) "base64" 499 | 500 | Part 1.12.1.2 501 | body-line-count => int(299) 502 | charset => string(8) "us-ascii" 503 | content-base => string(1) "/" 504 | content-description => string(28) "Alice's PDP-10 w/ TECO & DDT" 505 | content-name => string(13) "Alices_PDP-10" 506 | content-type => string(6) "binary" 507 | ending-pos => int(1579392) 508 | line-count => int(303) 509 | starting-pos => int(1561034) 510 | starting-pos-body => int(1561161) 511 | transfer-encoding => string(6) "base64" 512 | 513 | Part 1.12.1.3 514 | body-line-count => int(2094) 515 | charset => string(8) "us-ascii" 516 | content-base => string(1) "/" 517 | content-description => string(12) "Going deeper" 518 | content-type => string(14) "message/rfc822" 519 | ending-pos => int(1819337) 520 | line-count => int(2097) 521 | starting-pos => int(1579432) 522 | starting-pos-body => int(1579496) 523 | transfer-encoding => string(4) "8bit" 524 | 525 | Part 1.12.1.3.1 526 | body-line-count => int(2087) 527 | charset => string(8) "us-ascii" 528 | content-base => string(1) "/" 529 | content-boundary => string(12) "foobarbazola" 530 | content-type => string(15) "multipart/mixed" 531 | ending-pos => int(1819337) 532 | line-count => int(2094) 533 | starting-pos => int(1579496) 534 | starting-pos-body => int(1579726) 535 | transfer-encoding => string(4) "8bit" 536 | 537 | Part 1.12.1.3.1.1 538 | body-line-count => int(7) 539 | charset => string(8) "us-ascii" 540 | content-base => string(1) "/" 541 | content-type => string(10) "text/plain" 542 | ending-pos => int(1580054) 543 | line-count => int(8) 544 | starting-pos => int(1579741) 545 | starting-pos-body => int(1579742) 546 | transfer-encoding => string(4) "8bit" 547 | 548 | Part 1.12.1.3.1.2 549 | body-line-count => int(838) 550 | charset => string(8) "us-ascii" 551 | content-base => string(1) "/" 552 | content-boundary => string(13) "seconddivider" 553 | content-type => string(18) "multipart/parallel" 554 | ending-pos => int(1739553) 555 | line-count => int(840) 556 | starting-pos => int(1580070) 557 | starting-pos-body => int(1580127) 558 | transfer-encoding => string(4) "8bit" 559 | 560 | Part 1.12.1.3.1.2.1 561 | body-line-count => int(16) 562 | charset => string(8) "us-ascii" 563 | content-base => string(1) "/" 564 | content-description => string(5) "Bunny" 565 | content-type => string(9) "image/gif" 566 | ending-pos => int(1583489) 567 | line-count => int(20) 568 | starting-pos => int(1580143) 569 | starting-pos-body => int(1580229) 570 | transfer-encoding => string(6) "base64" 571 | 572 | Part 1.12.1.3.1.2.2 573 | body-line-count => int(807) 574 | charset => string(8) "us-ascii" 575 | content-base => string(1) "/" 576 | content-description => string(14) "TV Theme songs" 577 | content-type => string(11) "audio/basic" 578 | ending-pos => int(1739502) 579 | line-count => int(811) 580 | starting-pos => int(1583506) 581 | starting-pos-body => int(1583603) 582 | transfer-encoding => string(6) "base64" 583 | 584 | Part 1.12.1.3.1.3 585 | body-line-count => int(195) 586 | charset => string(8) "us-ascii" 587 | content-base => string(1) "/" 588 | content-type => string(22) "application/atomicmail" 589 | ending-pos => int(1744335) 590 | line-count => int(197) 591 | starting-pos => int(1739568) 592 | starting-pos-body => int(1739606) 593 | transfer-encoding => string(4) "8bit" 594 | 595 | Part 1.12.1.3.1.4 596 | body-line-count => int(1031) 597 | charset => string(8) "us-ascii" 598 | content-base => string(1) "/" 599 | content-description => string(27) "Yet another level deeper..." 600 | content-type => string(14) "message/rfc822" 601 | ending-pos => int(1819320) 602 | line-count => int(1034) 603 | starting-pos => int(1744351) 604 | starting-pos-body => int(1744430) 605 | transfer-encoding => string(4) "8bit" 606 | 607 | Part 1.12.1.3.1.4.1 608 | body-line-count => int(1023) 609 | charset => string(8) "us-ascii" 610 | content-base => string(1) "/" 611 | content-description => string(13) "I'm Twying..." 612 | content-type => string(11) "audio/x-sun" 613 | ending-pos => int(1819320) 614 | line-count => int(1031) 615 | starting-pos => int(1744430) 616 | starting-pos-body => int(1744660) 617 | transfer-encoding => string(6) "base64" 618 | -------------------------------------------------------------------------------- /php_mailparse_mime.c: -------------------------------------------------------------------------------- 1 | /* 2 | +----------------------------------------------------------------------+ 3 | | Copyright (c) The PHP Group | 4 | +----------------------------------------------------------------------+ 5 | | This source file is subject to version 3.01 of the PHP license, | 6 | | that is bundled with this package in the file LICENSE, and is | 7 | | available at through the world-wide-web at | 8 | | http://www.php.net/license/3_01.txt. | 9 | | If you did not receive a copy of the PHP license and are unable to | 10 | | obtain it through the world-wide-web, please send a note to | 11 | | license@php.net so we can mail you a copy immediately. | 12 | +----------------------------------------------------------------------+ 13 | | Author: Wez Furlong | 14 | +----------------------------------------------------------------------+ 15 | */ 16 | 17 | #include "php.h" 18 | #include "php_mailparse.h" 19 | #include "php_mailparse_mime.h" 20 | #include "php_mailparse_rfc822.h" 21 | 22 | #define MAXLEVELS 20 23 | #define MAXPARTS 300 24 | #define IS_MIME_1(part) (((part)->mime_version && strcmp("1.0", (part)->mime_version) == 0) || ((part)->parent)) 25 | #define CONTENT_TYPE_IS(part, contenttypevalue) ((part)->content_type && strcasecmp((part)->content_type->value, contenttypevalue) == 0) 26 | #define CONTENT_TYPE_ISL(part, contenttypevalue, len) ((part)->content_type && strncasecmp((part)->content_type->value, contenttypevalue, len) == 0) 27 | #define STR_FREE(ptr) if (ptr) { efree(ptr); } 28 | #define mailparse_fetch_mimepart_resource(rfcvar, zvalarg) rfcvar = (php_mimepart *)zend_fetch_resource(Z_RES_P(zvalarg), php_mailparse_msg_name(), php_mailparse_le_mime_part()) 29 | 30 | static void php_mimeheader_free(struct php_mimeheader_with_attributes *attr) 31 | { 32 | STR_FREE(attr->value); 33 | zval_ptr_dtor(&attr->attributes); 34 | efree(attr); 35 | } 36 | 37 | static struct php_mimeheader_with_attributes * php_mimeheader_alloc(char *value) 38 | { 39 | struct php_mimeheader_with_attributes *attr; 40 | 41 | attr = ecalloc(1, sizeof(struct php_mimeheader_with_attributes)); 42 | 43 | array_init(&attr->attributes); 44 | 45 | attr->value = estrdup(value); 46 | 47 | return attr; 48 | } 49 | 50 | void rfc2231_to_mime(smart_string* value_buf, char* value, int charset_p, int prevcharset_p) 51 | { 52 | char *strp, *startofvalue = NULL; 53 | int quotes = 0; 54 | 55 | /* Process string, get positions and replace */ 56 | /* Set to start of buffer*/ 57 | if (charset_p) { 58 | 59 | /* Previous charset already set so only convert %nn to =nn*/ 60 | if (prevcharset_p) { 61 | quotes=2; 62 | } 63 | 64 | strp = value; 65 | while (*strp) { 66 | 67 | /* Quote handling*/ 68 | if (*strp == '\'') { 69 | if (quotes <= 1) { 70 | 71 | /* End of charset*/ 72 | if (quotes == 0) { 73 | *strp=0; 74 | } else { 75 | startofvalue = strp+1; 76 | } 77 | 78 | quotes++; 79 | } 80 | } else { 81 | /* Replace % with = - quoted printable*/ 82 | if (*strp == '%' && quotes==2) { 83 | *strp = '='; 84 | } 85 | } 86 | strp++; 87 | } 88 | } 89 | 90 | /* If first encoded token*/ 91 | if (charset_p && !prevcharset_p && startofvalue) { 92 | smart_string_appends(value_buf, "=?"); 93 | smart_string_appends(value_buf, value); 94 | smart_string_appends(value_buf, "?Q?"); 95 | smart_string_appends(value_buf, startofvalue); 96 | } 97 | 98 | /* If last encoded token*/ 99 | if (prevcharset_p && !charset_p) { 100 | smart_string_appends(value_buf, "?="); 101 | } 102 | 103 | /* Append value*/ 104 | if ((!charset_p || (prevcharset_p && charset_p)) && value) { 105 | smart_string_appends(value_buf, value); 106 | } 107 | } 108 | 109 | static struct php_mimeheader_with_attributes *php_mimeheader_alloc_from_tok(php_rfc822_tokenized_t *toks) 110 | { 111 | struct php_mimeheader_with_attributes *attr; 112 | int i, first_semi, next_semi, comments_before_semi, netscape_bug = 0; 113 | char *name_buf = NULL; 114 | smart_string value_buf = {0}; 115 | int is_rfc2231_name = 0; 116 | char *check_name; 117 | int charset_p, prevcharset_p = 0; 118 | int namechanged, currentencoded = 0; 119 | 120 | attr = ecalloc(1, sizeof(struct php_mimeheader_with_attributes)); 121 | 122 | array_init(&attr->attributes); 123 | 124 | /* php_rfc822_print_tokens(toks); */ 125 | 126 | /* look for optional ; which separates optional attributes from the main value */ 127 | for (first_semi = 2; first_semi < toks->ntokens; first_semi++) 128 | if (toks->tokens[first_semi].token == ';') { 129 | break; 130 | } 131 | 132 | attr->value = php_rfc822_recombine_tokens(toks, 2, first_semi - 2, 133 | PHP_RFC822_RECOMBINE_STRTOLOWER | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS); 134 | 135 | if (first_semi < toks->ntokens) { 136 | first_semi++; 137 | } 138 | 139 | /* Netscape Bug: Messenger sometimes omits the semi when wrapping the 140 | * the header. 141 | * That means we have to be even more clever than the spec says that 142 | * we need to :-/ 143 | * */ 144 | 145 | while (first_semi < toks->ntokens) { 146 | /* find the next ; */ 147 | comments_before_semi = 0; 148 | for (next_semi = first_semi; next_semi < toks->ntokens; next_semi++) { 149 | if (toks->tokens[next_semi].token == ';') { 150 | break; 151 | } 152 | if (toks->tokens[next_semi].token == '(') { 153 | comments_before_semi++; 154 | } 155 | } 156 | 157 | 158 | i = first_semi; 159 | if (i < next_semi) { 160 | i++; 161 | 162 | /* ignore comments */ 163 | while (i < next_semi && toks->tokens[i].token == '(') 164 | i++; 165 | 166 | if (i < next_semi && toks->tokens[i].token == '=') { 167 | char *name, *value; 168 | 169 | /* Here, next_semi --> "name" and i --> "=", so skip "=" sign */ 170 | i++; 171 | 172 | /* count those tokens; we expect "token = token" (3 tokens); if there are 173 | * more than that, then something is quite possibly wrong - Netscape Bug! */ 174 | if (next_semi < toks->ntokens 175 | && toks->tokens[next_semi].token != ';' 176 | && next_semi - first_semi - comments_before_semi > 3) { 177 | next_semi = i + 1; 178 | netscape_bug = 1; 179 | } 180 | 181 | name = php_rfc822_recombine_tokens(toks, first_semi, 1, 182 | PHP_RFC822_RECOMBINE_STRTOLOWER|PHP_RFC822_RECOMBINE_IGNORE_COMMENTS); 183 | value = php_rfc822_recombine_tokens(toks, i, next_semi - i, 184 | PHP_RFC822_RECOMBINE_IGNORE_COMMENTS); 185 | 186 | /* support rfc2231 mime parameter value 187 | * 188 | * Parameter Value Continuations: 189 | * 190 | * Content-Type: message/external-body; access-type=URL; 191 | * URL*0="ftp://"; 192 | * URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" 193 | * 194 | * is semantically identical to 195 | * 196 | * Content-Type: message/external-body; access-type=URL; 197 | * URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" 198 | * 199 | * Original rfc2231 support by IceWarp Ltd. 200 | */ 201 | check_name = strchr(name, '*'); 202 | if (check_name) { 203 | currentencoded = 1; 204 | 205 | /* Is last char * - charset encoding */ 206 | charset_p = *(name+strlen(name)-1) == '*'; 207 | 208 | /* Leave only attribute name without * */ 209 | *check_name = 0; 210 | 211 | /* New item or continuous */ 212 | if (NULL == name_buf) { 213 | namechanged = 0; 214 | name_buf = name; 215 | } else { 216 | namechanged = (strcmp(name_buf, name) != 0); 217 | if (!namechanged) { 218 | efree(name); 219 | name = 0; 220 | } 221 | } 222 | 223 | /* Check if name changed*/ 224 | if (!namechanged) { 225 | 226 | /* Append string to buffer - check if to be encoded... */ 227 | rfc2231_to_mime(&value_buf, value, charset_p, prevcharset_p); 228 | 229 | /* Mark previous */ 230 | prevcharset_p = charset_p; 231 | } 232 | 233 | is_rfc2231_name = 1; 234 | } 235 | 236 | /* Last item was encoded */ 237 | if (1 == is_rfc2231_name) { 238 | /* Name not null and name differs with new name*/ 239 | if (name && strcmp(name_buf, name) != 0) { 240 | /* Finalize packet */ 241 | rfc2231_to_mime(&value_buf, NULL, 0, prevcharset_p); 242 | 243 | add_assoc_stringl(&attr->attributes, name_buf, value_buf.c, value_buf.len); 244 | efree(name_buf); 245 | smart_string_free(&value_buf); 246 | 247 | prevcharset_p = 0; 248 | is_rfc2231_name = 0; 249 | name_buf = NULL; 250 | 251 | /* New non encoded name*/ 252 | if (!currentencoded) { 253 | /* Add string*/ 254 | add_assoc_string(&attr->attributes, name, value); 255 | efree(name); 256 | } else { /* Encoded name changed*/ 257 | if (namechanged) { 258 | /* Append string to buffer - check if to be encoded... */ 259 | rfc2231_to_mime(&value_buf, value, charset_p, prevcharset_p); 260 | 261 | /* Mark */ 262 | is_rfc2231_name = 1; 263 | name_buf = name; 264 | prevcharset_p = charset_p; 265 | } 266 | } 267 | 268 | namechanged = 0; 269 | } 270 | } else { 271 | add_assoc_string(&attr->attributes, name, value); 272 | efree(name); 273 | } 274 | 275 | efree(value); 276 | } 277 | } 278 | 279 | if (next_semi < toks->ntokens && !netscape_bug) { 280 | next_semi++; 281 | } 282 | 283 | first_semi = next_semi; 284 | netscape_bug = 0; 285 | } 286 | 287 | if (1 == is_rfc2231_name) { 288 | /* Finalize packet */ 289 | rfc2231_to_mime(&value_buf, NULL, 0, prevcharset_p); 290 | 291 | add_assoc_stringl(&attr->attributes, name_buf, value_buf.c, value_buf.len); 292 | efree(name_buf); 293 | smart_string_free(&value_buf); 294 | } 295 | 296 | 297 | return attr; 298 | } 299 | 300 | PHP_MAILPARSE_API php_mimepart *php_mimepart_alloc() 301 | { 302 | php_mimepart *part = ecalloc(1, sizeof(php_mimepart)); 303 | 304 | part->part_index = 1; 305 | 306 | zend_hash_init(&part->children, 0, NULL, NULL, 0); 307 | 308 | array_init(&part->headerhash); 309 | 310 | ZVAL_NULL(&part->source.zval); 311 | 312 | /* begin in header parsing mode */ 313 | part->parsedata.in_header = 1; 314 | part->rsrc = zend_register_resource(part, php_mailparse_le_mime_part()); 315 | return part; 316 | } 317 | 318 | 319 | PHP_MAILPARSE_API void php_mimepart_free(php_mimepart *part) 320 | { 321 | zval *childpart_z; 322 | HashPosition pos; 323 | 324 | /* free contained parts */ 325 | zend_hash_internal_pointer_reset_ex(&part->children, &pos); 326 | while ((childpart_z = zend_hash_get_current_data_ex(&part->children, &pos)) != NULL) { 327 | zval_ptr_dtor(childpart_z); 328 | zend_hash_move_forward_ex(&part->children, &pos); 329 | } 330 | 331 | zend_hash_destroy(&part->children); 332 | 333 | STR_FREE(part->mime_version); 334 | STR_FREE(part->content_transfer_encoding); 335 | STR_FREE(part->charset); 336 | STR_FREE(part->boundary); 337 | STR_FREE(part->content_base); 338 | STR_FREE(part->content_location); 339 | 340 | if (part->content_type) { 341 | php_mimeheader_free(part->content_type); 342 | part->content_type = NULL; 343 | } 344 | if (part->content_disposition) { 345 | php_mimeheader_free(part->content_disposition); 346 | part->content_disposition = NULL; 347 | } 348 | 349 | smart_string_free(&part->parsedata.workbuf); 350 | smart_string_free(&part->parsedata.headerbuf); 351 | 352 | zval_ptr_dtor(&part->source.zval); 353 | 354 | zval_ptr_dtor(&part->headerhash); 355 | 356 | efree(part); 357 | } 358 | 359 | static void php_mimepart_update_positions(php_mimepart *part, size_t newendpos, size_t newbodyend, size_t deltanlines) 360 | { 361 | while(part) { 362 | part->endpos = newendpos; 363 | part->bodyend = newbodyend; 364 | part->nlines += deltanlines; 365 | if (!part->parsedata.in_header) { 366 | part->nbodylines += deltanlines; 367 | } 368 | part = part->parent; 369 | } 370 | } 371 | 372 | PHP_MAILPARSE_API char *php_mimepart_attribute_get(struct php_mimeheader_with_attributes *attr, char *attrname) 373 | { 374 | zval *attrval; 375 | zend_string *hash_key; 376 | 377 | hash_key = zend_string_init(attrname, strlen(attrname), 0); 378 | attrval = zend_hash_find(Z_ARRVAL_P(&attr->attributes), hash_key); 379 | zend_string_release(hash_key); 380 | 381 | if (attrval != NULL) { 382 | return Z_STRVAL_P(attrval); 383 | } 384 | return NULL; 385 | } 386 | 387 | #define STR_SET_REPLACE(ptr, newval) do { STR_FREE(ptr); ptr = estrdup(newval); } while(0) 388 | 389 | static int php_mimepart_process_header(php_mimepart *part) 390 | { 391 | php_rfc822_tokenized_t *toks; 392 | char *header_key, *header_val, *header_val_stripped; 393 | zval *zheaderval; 394 | zend_string *header_zstring; 395 | 396 | if (part->parsedata.headerbuf.len == 0) { 397 | return SUCCESS; 398 | } 399 | 400 | smart_string_0(&part->parsedata.headerbuf); 401 | 402 | /* parse the header line */ 403 | toks = php_mailparse_rfc822_tokenize((const char*)part->parsedata.headerbuf.c, 0); 404 | 405 | /* valid headers consist of at least three tokens, with the first being a string and the 406 | * second token being a ':' */ 407 | if (toks->ntokens < 2 || toks->tokens[0].token != 0 || toks->tokens[1].token != ':') { 408 | part->parsedata.headerbuf.len = 0; 409 | 410 | php_rfc822_tokenize_free(toks); 411 | return FAILURE; 412 | } 413 | 414 | /* get a lower-case version of the first token */ 415 | header_key = php_rfc822_recombine_tokens(toks, 0, 1, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS|PHP_RFC822_RECOMBINE_STRTOLOWER); 416 | 417 | header_val = strchr(part->parsedata.headerbuf.c, ':'); 418 | header_val_stripped = php_rfc822_recombine_tokens(toks, 2, toks->ntokens-2, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS|PHP_RFC822_RECOMBINE_STRTOLOWER); 419 | 420 | if (header_val) { 421 | header_val++; 422 | while (isspace(*header_val)) 423 | header_val++; 424 | 425 | /* add the header to the hash. 426 | * join multiple To: or Cc: lines together */ 427 | header_zstring = zend_string_init(header_key, strlen(header_key), 0); 428 | if ((strcmp(header_key, "to") == 0 || strcmp(header_key, "cc") == 0) && (zheaderval = zend_hash_find(Z_ARRVAL_P(&part->headerhash), header_zstring)) != NULL) { 429 | int newlen; 430 | char *newstr; 431 | 432 | newlen = strlen(header_val) + Z_STRLEN_P(zheaderval) + 3; 433 | newstr = emalloc(newlen); 434 | 435 | strcpy(newstr, Z_STRVAL_P(zheaderval)); 436 | strcat(newstr, ", "); 437 | strcat(newstr, header_val); 438 | add_assoc_string(&part->headerhash, header_key, newstr); 439 | efree(newstr); 440 | } else { 441 | if((zheaderval = zend_hash_find(Z_ARRVAL_P(&part->headerhash), header_zstring)) != NULL) { 442 | if(Z_TYPE_P(zheaderval) == IS_ARRAY) { 443 | add_next_index_string(zheaderval, header_val); 444 | } else { 445 | /* Create a nested array if there is more than one of the same header */ 446 | zval zarr; 447 | array_init(&zarr); 448 | Z_ADDREF_P(zheaderval); 449 | 450 | add_next_index_zval(&zarr, zheaderval); 451 | add_next_index_string(&zarr, header_val); 452 | add_assoc_zval(&part->headerhash, header_key, &zarr); 453 | } 454 | } else { 455 | add_assoc_string(&part->headerhash, header_key, header_val); 456 | } 457 | } 458 | zend_string_release(header_zstring); 459 | /* if it is useful, keep a pointer to it in the mime part */ 460 | if (strcmp(header_key, "mime-version") == 0) { 461 | STR_SET_REPLACE(part->mime_version, header_val_stripped); 462 | } 463 | 464 | if (strcmp(header_key, "content-location") == 0) { 465 | STR_FREE(part->content_location); 466 | part->content_location = php_rfc822_recombine_tokens(toks, 2, toks->ntokens-2, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS); 467 | } 468 | if (strcmp(header_key, "content-base") == 0) { 469 | STR_FREE(part->content_base); 470 | part->content_base = php_rfc822_recombine_tokens(toks, 2, toks->ntokens-2, PHP_RFC822_RECOMBINE_IGNORE_COMMENTS); 471 | } 472 | 473 | if (strcmp(header_key, "content-transfer-encoding") == 0) { 474 | STR_SET_REPLACE(part->content_transfer_encoding, header_val_stripped); 475 | } 476 | if (strcmp(header_key, "content-type") == 0) { 477 | char *charset, *boundary; 478 | 479 | if (part->content_type) { 480 | php_mimeheader_free(part->content_type); 481 | part->content_type = NULL; 482 | } 483 | 484 | part->content_type = php_mimeheader_alloc_from_tok(toks); 485 | 486 | boundary = php_mimepart_attribute_get(part->content_type, "boundary"); 487 | if (boundary) { 488 | part->boundary = estrdup(boundary); 489 | } 490 | 491 | charset = php_mimepart_attribute_get(part->content_type, "charset"); 492 | if (charset) { 493 | STR_SET_REPLACE(part->charset, charset); 494 | } 495 | } 496 | if (strcmp(header_key, "content-disposition") == 0) { 497 | if (part->content_disposition) { 498 | php_mimeheader_free(part->content_disposition); 499 | part->content_disposition = NULL; 500 | } 501 | part->content_disposition = php_mimeheader_alloc_from_tok(toks); 502 | } 503 | 504 | } 505 | STR_FREE(header_key); 506 | STR_FREE(header_val_stripped); 507 | 508 | php_rfc822_tokenize_free(toks); 509 | 510 | /* zero the buffer size */ 511 | part->parsedata.headerbuf.len = 0; 512 | return SUCCESS; 513 | } 514 | 515 | static php_mimepart *alloc_new_child_part(php_mimepart *parentpart, size_t startpos, int inherit) 516 | { 517 | php_mimepart *child = php_mimepart_alloc(); 518 | zval child_z; 519 | 520 | parentpart->parsedata.lastpart = child; 521 | child->parent = parentpart; 522 | 523 | child->source.kind = parentpart->source.kind; 524 | if (parentpart->source.kind != mpNONE) { 525 | child->source.zval = parentpart->source.zval; 526 | zval_copy_ctor(&child->source.zval); 527 | } 528 | 529 | ZVAL_RES(&child_z, child->rsrc); 530 | zend_hash_next_index_insert(&parentpart->children, &child_z); 531 | child->startpos = child->endpos = child->bodystart = child->bodyend = startpos; 532 | 533 | if (inherit) { 534 | if (parentpart->content_transfer_encoding) { 535 | child->content_transfer_encoding = estrdup(parentpart->content_transfer_encoding); 536 | } 537 | if (parentpart->charset) { 538 | child->charset = estrdup(parentpart->charset); 539 | } 540 | } 541 | 542 | return child; 543 | } 544 | 545 | PHP_MAILPARSE_API void php_mimepart_get_offsets(php_mimepart *part, off_t *start, off_t *end, off_t *start_body, int *nlines, int *nbodylines) 546 | { 547 | *start = part->startpos; 548 | *end = part->endpos; 549 | *nlines = part->nlines; 550 | *nbodylines = part->nbodylines; 551 | *start_body = part->bodystart; 552 | 553 | /* Adjust for newlines in mime parts */ 554 | if (part->parent) { 555 | *end = part->bodyend; 556 | if (*nlines) { 557 | --*nlines; 558 | } 559 | if (*nbodylines) { 560 | --*nbodylines; 561 | } 562 | } 563 | } 564 | 565 | static int php_mimepart_process_line(php_mimepart *workpart) 566 | { 567 | size_t origcount, linelen; 568 | char *c; 569 | 570 | /* sanity check */ 571 | if (zend_hash_num_elements(&workpart->children) > MAXPARTS) { 572 | php_error_docref(NULL, E_WARNING, "MIME message too complex"); 573 | return FAILURE; 574 | } 575 | 576 | c = workpart->parsedata.workbuf.c; 577 | smart_string_0(&workpart->parsedata.workbuf); 578 | 579 | /* strip trailing \r\n -- we always have a trailing \n */ 580 | origcount = workpart->parsedata.workbuf.len; 581 | linelen = origcount - 1; 582 | if (linelen && c[linelen-1] == '\r') { 583 | --linelen; 584 | } 585 | 586 | /* Discover which part we were last working on */ 587 | while (workpart->parsedata.lastpart) { 588 | size_t bound_len; 589 | php_mimepart *lastpart = workpart->parsedata.lastpart; 590 | 591 | if (lastpart->parsedata.completed) { 592 | php_mimepart_update_positions(workpart, workpart->endpos + origcount, workpart->endpos + origcount, 1); 593 | return SUCCESS; 594 | } 595 | if (workpart->boundary == NULL || workpart->parsedata.in_header) { 596 | workpart = lastpart; 597 | continue; 598 | } 599 | bound_len = strlen(workpart->boundary); 600 | 601 | /* Look for a boundary */ 602 | if (c[0] == '-' && c[1] == '-' && linelen >= 2+bound_len && strncasecmp(workpart->boundary, c+2, bound_len) == 0) { 603 | php_mimepart *newpart; 604 | 605 | /* is it the final boundary ? */ 606 | if (linelen >= 4 + bound_len && strncmp(c+2+bound_len, "--", 2) == 0) { 607 | lastpart->parsedata.completed = 1; 608 | php_mimepart_update_positions(workpart, workpart->endpos + origcount, workpart->endpos + origcount, 1); 609 | return SUCCESS; 610 | } 611 | 612 | newpart = alloc_new_child_part(workpart, workpart->endpos + origcount, 1); 613 | php_mimepart_update_positions(workpart, workpart->endpos + origcount, workpart->endpos + linelen, 1); 614 | if (workpart->mime_version) { 615 | newpart->mime_version = estrdup(workpart->mime_version); 616 | } 617 | newpart->parsedata.in_header = 1; 618 | return SUCCESS; 619 | } 620 | workpart = lastpart; 621 | } 622 | 623 | if (!workpart->parsedata.in_header) { 624 | if (!workpart->parsedata.completed && !workpart->parsedata.lastpart) { 625 | /* update the body/part end positions. 626 | * For multipart messages, the final newline belongs to the boundary. 627 | * Otherwise it belongs to the body 628 | * */ 629 | if (workpart->parent && CONTENT_TYPE_ISL(workpart->parent, "multipart/", 10)) { 630 | php_mimepart_update_positions(workpart, workpart->endpos + origcount, workpart->endpos + linelen, 1); 631 | } else { 632 | php_mimepart_update_positions(workpart, workpart->endpos + origcount, workpart->endpos + origcount, 1); 633 | } 634 | } 635 | } else { 636 | 637 | if (linelen > 0) { 638 | 639 | php_mimepart_update_positions(workpart, workpart->endpos + origcount, workpart->endpos + linelen, 1); 640 | 641 | if (*c == ' ' || *c == '\t') { 642 | /* This doesn't technically confirm to rfc2822, as we're replacing \t with \s, but this seems to fix 643 | * cases where clients incorrectly fold by inserting a \t character. 644 | */ 645 | smart_string_appendl(&workpart->parsedata.headerbuf, " ", 1); 646 | c++; linelen--; 647 | } else { 648 | php_mimepart_process_header(workpart); 649 | } 650 | /* save header for possible continuation */ 651 | smart_string_appendl(&workpart->parsedata.headerbuf, c, linelen); 652 | 653 | } else { 654 | /* end of headers */ 655 | php_mimepart_process_header(workpart); 656 | 657 | /* start of body */ 658 | workpart->parsedata.in_header = 0; 659 | workpart->bodystart = workpart->endpos + origcount; 660 | php_mimepart_update_positions(workpart, workpart->bodystart, workpart->bodystart, 1); 661 | --workpart->nbodylines; 662 | 663 | /* some broken mailers include the content-type header but not a mime-version header. 664 | * some others may use a MIME version other than 1.0. 665 | * Let's relax and pretend they said they were mime 1.0 compatible */ 666 | if (!IS_MIME_1(workpart) && workpart->content_type != NULL) { 667 | if (workpart->mime_version != NULL) { 668 | efree(workpart->mime_version); 669 | } 670 | workpart->mime_version = estrdup("1.0"); 671 | } 672 | 673 | if (!IS_MIME_1(workpart)) { 674 | /* if we don't understand the MIME version, discard the content-type and 675 | * boundary */ 676 | if (workpart->content_disposition) { 677 | php_mimeheader_free(workpart->content_disposition); 678 | workpart->content_disposition = NULL; 679 | } 680 | if (workpart->boundary) { 681 | efree(workpart->boundary); 682 | workpart->boundary = NULL; 683 | } 684 | if (workpart->content_type) { 685 | php_mimeheader_free(workpart->content_type); 686 | workpart->content_type = NULL; 687 | } 688 | workpart->content_type = php_mimeheader_alloc("text/plain"); 689 | } 690 | /* if there is no content type, default to text/plain, but use multipart/digest when in 691 | * a multipart/rfc822 message */ 692 | if (IS_MIME_1(workpart) && workpart->content_type == NULL) { 693 | char *def_type = "text/plain"; 694 | 695 | if (workpart->parent && CONTENT_TYPE_IS(workpart->parent, "multipart/digest")) { 696 | def_type = "message/rfc822"; 697 | } 698 | 699 | workpart->content_type = php_mimeheader_alloc(def_type); 700 | } 701 | 702 | /* if no charset had previously been set, either through inheritance or by an 703 | * explicit content-type header, default to us-ascii */ 704 | if (workpart->charset == NULL) { 705 | workpart->charset = estrdup(MAILPARSEG(def_charset)); 706 | } 707 | 708 | if (CONTENT_TYPE_IS(workpart, "message/rfc822")) { 709 | workpart = alloc_new_child_part(workpart, workpart->bodystart, 0); 710 | workpart->parsedata.in_header = 1; 711 | return SUCCESS; 712 | 713 | } 714 | 715 | /* create a section for the preamble that precedes the first boundary */ 716 | if (workpart->boundary) { 717 | workpart = alloc_new_child_part(workpart, workpart->bodystart, 1); 718 | workpart->parsedata.in_header = 0; 719 | workpart->parsedata.is_dummy = 1; 720 | return SUCCESS; 721 | } 722 | 723 | return SUCCESS; 724 | } 725 | 726 | } 727 | 728 | return SUCCESS; 729 | } 730 | 731 | PHP_MAILPARSE_API int php_mimepart_parse(php_mimepart *part, const char *buf, size_t bufsize) 732 | { 733 | size_t len; 734 | 735 | while(bufsize > 0) { 736 | /* look for EOL */ 737 | for (len = 0; len < bufsize; len++) 738 | if (buf[len] == '\n') { 739 | break; 740 | } 741 | if (len < bufsize && buf[len] == '\n') { 742 | ++len; 743 | smart_string_appendl(&part->parsedata.workbuf, buf, len); 744 | if (php_mimepart_process_line(part) == FAILURE) { 745 | /* php_mimepart_process_line() only returns FAILURE in case the count of children 746 | * have exceeded MAXPARTS and doing so at the very begining, without doing any work. 747 | * It'd do this for all of the following lines, since the exceeded state won't change. 748 | * As no additional work have been done since the last php_mimepart_process_line() call, 749 | * it is safe to break the loop now not caring about the rest of the code. 750 | * 751 | * Known issues: 752 | * - some callers aren't obeying the returned value, but that's in the mailmessage 753 | * object which is not documented and seemingly otdated/unfinished anyway 754 | */ 755 | return FAILURE; 756 | }; 757 | part->parsedata.workbuf.len = 0; 758 | } else { 759 | smart_string_appendl(&part->parsedata.workbuf, buf, len); 760 | } 761 | 762 | buf += len; 763 | bufsize -= len; 764 | } 765 | return SUCCESS; 766 | } 767 | 768 | static int enum_parts_recurse(php_mimepart_enumerator *top, php_mimepart_enumerator **child, 769 | php_mimepart *part, mimepart_enumerator_func callback, void *ptr) 770 | { 771 | php_mimepart_enumerator next; 772 | php_mimepart *childpart; 773 | zval *childpart_z; 774 | HashPosition pos; 775 | 776 | *child = NULL; 777 | if (FAILURE == (*callback)(part, top, ptr)) { 778 | return FAILURE; 779 | } 780 | 781 | *child = &next; 782 | next.id = 1; 783 | 784 | if (CONTENT_TYPE_ISL(part, "multipart/", 10)) { 785 | next.id = 0; 786 | } 787 | 788 | zend_hash_internal_pointer_reset_ex(&part->children, &pos); 789 | while ((childpart_z = zend_hash_get_current_data_ex(&part->children, &pos)) != NULL) { 790 | mailparse_fetch_mimepart_resource(childpart, childpart_z); 791 | if (next.id) { 792 | if (FAILURE == enum_parts_recurse(top, &next.next, childpart, callback, ptr)) { 793 | return FAILURE; 794 | } 795 | } 796 | next.id++; 797 | zend_hash_move_forward_ex(&part->children, &pos); 798 | } 799 | return SUCCESS; 800 | } 801 | 802 | PHP_MAILPARSE_API void php_mimepart_enum_parts(php_mimepart *part, mimepart_enumerator_func callback, void *ptr) 803 | { 804 | php_mimepart_enumerator top; 805 | top.id = 1; 806 | 807 | enum_parts_recurse(&top, &top.next, part, callback, ptr); 808 | } 809 | 810 | PHP_MAILPARSE_API void php_mimepart_enum_child_parts(php_mimepart *part, mimepart_child_enumerator_func callback, void *ptr) 811 | { 812 | HashPosition pos; 813 | php_mimepart *childpart; 814 | zval *childpart_z; 815 | 816 | int index = 0; 817 | 818 | zend_hash_internal_pointer_reset_ex(&part->children, &pos); 819 | while ((childpart_z = zend_hash_get_current_data_ex(&part->children, &pos)) != NULL) { 820 | mailparse_fetch_mimepart_resource(childpart, childpart_z); 821 | if (FAILURE == (*callback)(part, childpart, index, ptr)) { 822 | return; 823 | } 824 | 825 | zend_hash_move_forward_ex(&part->children, &pos); 826 | index++; 827 | } 828 | } 829 | 830 | struct find_part_struct { 831 | const char *searchfor; 832 | php_mimepart *foundpart; 833 | }; 834 | 835 | static int find_part_callback(php_mimepart *part, php_mimepart_enumerator *id, void *ptr) 836 | { 837 | struct find_part_struct *find = ptr; 838 | const unsigned char *num = (const unsigned char*)find->searchfor; 839 | unsigned int n; 840 | 841 | while (id) { 842 | if (!isdigit((int)*num)) { 843 | return SUCCESS; 844 | } 845 | /* convert from decimal to int */ 846 | n = 0; 847 | while (isdigit((int)*num)) { 848 | n = (n * 10) + (*num++ - '0'); 849 | } 850 | if (*num) { 851 | if (*num != '.') { 852 | return SUCCESS; 853 | } 854 | num++; 855 | } 856 | if (n != (unsigned int)id->id) { 857 | return SUCCESS; 858 | } 859 | id = id->next; 860 | } 861 | if (*num == 0) { 862 | find->foundpart = part; 863 | } 864 | 865 | return SUCCESS; 866 | } 867 | 868 | PHP_MAILPARSE_API php_mimepart *php_mimepart_find_by_name(php_mimepart *parent, const char *name) 869 | { 870 | struct find_part_struct find; 871 | 872 | find.searchfor = name; 873 | find.foundpart = NULL; 874 | php_mimepart_enum_parts(parent, find_part_callback, &find); 875 | return find.foundpart; 876 | } 877 | 878 | PHP_MAILPARSE_API php_mimepart *php_mimepart_find_child_by_position(php_mimepart *parent, int position) 879 | { 880 | HashPosition pos; 881 | php_mimepart *childpart = NULL; 882 | zval *childpart_z; 883 | 884 | zend_hash_internal_pointer_reset_ex(&parent->children, &pos); 885 | while(position-- > 0) 886 | if (FAILURE == zend_hash_move_forward_ex(&parent->children, &pos)) { 887 | return NULL; 888 | } 889 | 890 | if ((childpart_z = zend_hash_get_current_data_ex(&parent->children, &pos)) != NULL) { 891 | mailparse_fetch_mimepart_resource(childpart, childpart_z); 892 | if(childpart) { 893 | return childpart; 894 | } 895 | } 896 | 897 | return NULL; 898 | } 899 | 900 | static int filter_into_work_buffer(int c, void *dat) 901 | { 902 | php_mimepart *part = dat; 903 | 904 | smart_string_appendc(&part->parsedata.workbuf, c); 905 | 906 | if (part->parsedata.workbuf.len >= 4096) { 907 | 908 | part->extract_func(part, part->extract_context, part->parsedata.workbuf.c, part->parsedata.workbuf.len); 909 | part->parsedata.workbuf.len = 0; 910 | } 911 | 912 | return c; 913 | } 914 | 915 | PHP_MAILPARSE_API void php_mimepart_decoder_prepare(php_mimepart *part, int do_decode, php_mimepart_extract_func_t decoder, void *ptr) 916 | { 917 | const mbfl_encoding *encoding; 918 | enum mbfl_no_encoding from = mbfl_no_encoding_8bit; 919 | 920 | if (do_decode && part->content_transfer_encoding) { 921 | encoding = mbfl_name2encoding(part->content_transfer_encoding); 922 | if (encoding) { 923 | from = encoding->no_encoding; 924 | } else { 925 | if (strcasecmp("binary", part->content_transfer_encoding) != 0) { 926 | zend_error(E_WARNING, "%s(): mbstring doesn't know how to decode %s transfer encoding!", 927 | get_active_function_name(), 928 | part->content_transfer_encoding); 929 | } 930 | from = mbfl_no_encoding_8bit; 931 | } 932 | } 933 | 934 | part->extract_func = decoder; 935 | part->extract_context = ptr; 936 | part->parsedata.workbuf.len = 0; 937 | 938 | if (do_decode) { 939 | if (from == mbfl_no_encoding_8bit || from == mbfl_no_encoding_7bit) { 940 | part->extract_filter = NULL; 941 | } else { 942 | part->extract_filter = mbfl_convert_filter_new( 943 | mbfl_no2encoding(from), mbfl_no2encoding(mbfl_no_encoding_8bit), 944 | filter_into_work_buffer, 945 | NULL, 946 | part 947 | ); 948 | } 949 | } 950 | 951 | } 952 | 953 | PHP_MAILPARSE_API void php_mimepart_decoder_finish(php_mimepart *part) 954 | { 955 | if (part->extract_filter) { 956 | mbfl_convert_filter_flush(part->extract_filter); 957 | mbfl_convert_filter_delete(part->extract_filter); 958 | } 959 | if (part->extract_func && part->parsedata.workbuf.len > 0) { 960 | part->extract_func(part, part->extract_context, part->parsedata.workbuf.c, part->parsedata.workbuf.len); 961 | part->parsedata.workbuf.len = 0; 962 | } 963 | } 964 | 965 | PHP_MAILPARSE_API int php_mimepart_decoder_feed(php_mimepart *part, const char *buf, size_t bufsize) 966 | { 967 | if (buf && bufsize) { 968 | size_t i; 969 | 970 | if (part->extract_filter) { 971 | for (i = 0; i < bufsize; i++) { 972 | if (mbfl_convert_filter_feed(buf[i], part->extract_filter) < 0) { 973 | zend_error(E_WARNING, "%s() - filter conversion failed. Input message is probably incorrectly encoded\n", 974 | get_active_function_name()); 975 | return -1; 976 | } 977 | } 978 | } else { 979 | return part->extract_func(part, part->extract_context, buf, bufsize); 980 | } 981 | } 982 | return 0; 983 | } 984 | 985 | PHP_MAILPARSE_API void php_mimepart_remove_from_parent(php_mimepart *part) 986 | { 987 | php_mimepart *parent = part->parent; 988 | HashPosition pos; 989 | php_mimepart *childpart; 990 | zval *childpart_z; 991 | 992 | if (parent == NULL) { 993 | return; 994 | } 995 | 996 | part->parent = NULL; 997 | 998 | zend_hash_internal_pointer_reset_ex(&parent->children, &pos); 999 | while((childpart_z = zend_hash_get_current_data_ex(&parent->children, &pos)) != NULL) { 1000 | if ((childpart_z = zend_hash_get_current_data_ex(&parent->children, &pos)) != NULL) { 1001 | mailparse_fetch_mimepart_resource(childpart, childpart_z); 1002 | if (childpart == part) { 1003 | zend_ulong h; 1004 | zend_hash_get_current_key_ex(&parent->children, NULL, &h, &pos); 1005 | zend_hash_index_del(&parent->children, h); 1006 | break; 1007 | } 1008 | } 1009 | zend_hash_move_forward_ex(&parent->children, &pos); 1010 | } 1011 | } 1012 | 1013 | PHP_MAILPARSE_API void php_mimepart_add_child(php_mimepart *part, php_mimepart *child) 1014 | { 1015 | 1016 | } 1017 | --------------------------------------------------------------------------------