├── .github └── workflows │ └── php.yml ├── LICENSE ├── README.md ├── composer.json └── src ├── Converters ├── Globals.php ├── HttpFoundation.php └── PSR7.php ├── Part.php └── StreamedPart.php /.github/workflows/php.yml: -------------------------------------------------------------------------------- 1 | name: PHP 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | tests: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | php-versions: ['7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3', '8.4'] 18 | name: PHP ${{ matrix.php-versions }} 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Setup PHP 22 | uses: shivammathur/setup-php@v2 23 | with: 24 | php-version: ${{ matrix.php-versions }} 25 | tools: composer 26 | - name: Install vendors 27 | run: composer install 28 | - name: Run tests 29 | run: vendor/bin/phpunit --bootstrap vendor/autoload.php tests 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2016 Romain Cambien 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included 11 | in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 19 | OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | [![PHP](https://github.com/Riverline/multipart-parser/actions/workflows/php.yml/badge.svg)](https://github.com/Riverline/multipart-parser/actions/workflows/php.yml) 4 | 5 | ## What is Riverline\MultiPartParser 6 | 7 | ``Riverline\MultiPartParse`` is a one class library to parse multipart documents (multipart email, multipart form, etc ...) 8 | and manage each part encoding and charset to extract their content. 9 | 10 | ## Requirements 11 | 12 | * PHP >= 5.6 13 | 14 | ## Installation 15 | 16 | ``Riverline\MultiPartParse`` is compatible with composer and any psr-0/psr-4 autoloader. 17 | 18 | ``` 19 | composer require riverline/multipart-parser 20 | ``` 21 | 22 | ## Usage 23 | 24 | ```php 25 | isMultiPart()) { 58 | $parts = $document->getParts(); 59 | echo $parts[0]->getBody(); // Output bar 60 | // It decode encoded content 61 | echo $parts[1]->getBody(); // Output base64 62 | 63 | // You can also filter by part name 64 | $parts = $document->getPartsByName('foo'); 65 | echo $parts[0]->getName(); // Output foo 66 | 67 | // You can extract the headers 68 | $contentDisposition = $parts[0]->getHeader('Content-Disposition'); 69 | echo $contentDisposition; // Output Content-Disposition: form-data; name="foo" 70 | // Helpers 71 | echo StreamedPart::getHeaderValue($contentDisposition); // Output form-data 72 | echo StreamedPart::getHeaderOption($contentDisposition, 'name'); // Output foo 73 | 74 | // File helper 75 | if ($parts[2]->isFile()) { 76 | echo $parts[2]->getFileName(); // Output text.txt 77 | echo $parts[2]->getMimeType(); // Output text/plain 78 | } 79 | } 80 | ``` 81 | 82 | ## Converters 83 | 84 | The library also provides three converters to quickly parse `PSR-7`, `HttpFoundation` and native requests. 85 | 86 | ```php 87 | =7.0", 19 | "ext-mbstring": "*" 20 | }, 21 | "require-dev": { 22 | "phpunit/phpunit": "*", 23 | "psr/http-message": "*", 24 | "symfony/psr-http-message-bridge": "*", 25 | "laminas/laminas-diactoros": "*" 26 | }, 27 | "autoload": { 28 | "psr-4": { 29 | "Riverline\\MultiPartParser\\": "src/" 30 | } 31 | }, 32 | "autoload-dev": { 33 | "psr-4": { 34 | "Riverline\\MultiPartParser\\": "tests/" 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/Converters/Globals.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Riverline\MultiPartParser\Converters; 13 | 14 | use Riverline\MultiPartParser\StreamedPart; 15 | 16 | /** 17 | * Class GlobalsTest 18 | */ 19 | class Globals 20 | { 21 | /** 22 | * @param bool|resource $input 23 | * 24 | * @return StreamedPart 25 | */ 26 | public static function convert($input = STDIN) 27 | { 28 | $stream = fopen('php://temp', 'rw'); 29 | 30 | foreach ($_SERVER as $key => $value) { 31 | if (0 === strpos($key, 'HTTP_')) { 32 | $key = str_replace('_', '-', strtolower(substr($key, 5))); 33 | fwrite($stream, "$key: $value\r\n"); 34 | } elseif (in_array($key, ['CONTENT_LENGTH', 'CONTENT_MD5', 'CONTENT_TYPE'])) { 35 | $key = str_replace('_', '-', strtolower($key)); 36 | fwrite($stream, "$key: $value\r\n"); 37 | } 38 | } 39 | 40 | fwrite($stream, "\r\n"); 41 | 42 | stream_copy_to_stream($input, $stream); 43 | 44 | rewind($stream); 45 | 46 | return new StreamedPart($stream); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/Converters/HttpFoundation.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Riverline\MultiPartParser\Converters; 13 | 14 | use Riverline\MultiPartParser\StreamedPart; 15 | use Symfony\Component\HttpFoundation\Request; 16 | 17 | /** 18 | * Class HttpFoundation 19 | */ 20 | class HttpFoundation 21 | { 22 | /** 23 | * @param Request $request 24 | * 25 | * @return StreamedPart 26 | */ 27 | public static function convert(Request $request) 28 | { 29 | $stream = fopen('php://temp', 'rw'); 30 | 31 | fwrite($stream, (string) $request->headers."\r\n"); 32 | 33 | stream_copy_to_stream($request->getContent(true), $stream); 34 | 35 | rewind($stream); 36 | 37 | return new StreamedPart($stream); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Converters/PSR7.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Riverline\MultiPartParser\Converters; 13 | 14 | use Psr\Http\Message\MessageInterface; 15 | use Riverline\MultiPartParser\StreamedPart; 16 | 17 | /** 18 | * Class PSR7 19 | */ 20 | class PSR7 21 | { 22 | /** 23 | * @param MessageInterface $message 24 | * 25 | * @return StreamedPart 26 | */ 27 | public static function convert(MessageInterface $message) 28 | { 29 | $stream = fopen('php://temp', 'rw'); 30 | 31 | foreach ($message->getHeaders() as $key => $values) { 32 | foreach ($values as $value) { 33 | fwrite($stream, "$key: $value\r\n"); 34 | } 35 | } 36 | fwrite($stream, "\r\n"); 37 | 38 | $body = $message->getBody(); 39 | $body->rewind(); 40 | 41 | while (!$body->eof()) { 42 | fwrite($stream, $body->read(1024)); 43 | } 44 | 45 | rewind($stream); 46 | 47 | return new StreamedPart($stream); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/Part.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Riverline\MultiPartParser; 13 | 14 | /** 15 | * Class Part 16 | * 17 | * @deprecated Wrapper class, use StreamedPart 18 | */ 19 | class Part extends StreamedPart 20 | { 21 | /** 22 | * MultiPart constructor. 23 | * 24 | * @param string $content 25 | * 26 | * @throws \InvalidArgumentException 27 | */ 28 | public function __construct($content) 29 | { 30 | $stream = fopen('php://temp', 'rw'); 31 | fwrite($stream, $content); 32 | rewind($stream); 33 | 34 | parent::__construct($stream); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/StreamedPart.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Riverline\MultiPartParser; 13 | 14 | /** 15 | * Class StreamedPart 16 | */ 17 | class StreamedPart 18 | { 19 | /** 20 | * @var resource 21 | */ 22 | private $stream; 23 | 24 | /** 25 | * @var array 26 | */ 27 | private $headers; 28 | 29 | /** 30 | * @var int 31 | */ 32 | private $bodyOffset; 33 | 34 | /** 35 | * @var StreamedPart[] 36 | */ 37 | private $parts = array(); 38 | 39 | /** 40 | * StreamParser constructor. 41 | * 42 | * @param resource $stream 43 | */ 44 | public function __construct($stream) 45 | { 46 | if (false === is_resource($stream)) { 47 | throw new \InvalidArgumentException('Input is not a stream'); 48 | } 49 | 50 | $this->stream = $stream; 51 | 52 | // Reset the stream 53 | rewind($this->stream); 54 | 55 | // Parse headers 56 | $endOfHeaders = false; 57 | $bufferSize = 8192; 58 | $headerLines = []; 59 | $buffer = ''; 60 | 61 | while (false !== ($line = fgets($this->stream, $bufferSize))) { 62 | // Append to buffer 63 | $buffer .= rtrim($line, "\r\n"); 64 | 65 | if (strlen($line) === $bufferSize-1) { 66 | // EOL not reached, continue 67 | continue; 68 | } 69 | 70 | if ('' === $buffer) { 71 | // Empty line cause by double new line, we reached the end of the headers section 72 | $endOfHeaders = true; 73 | break; 74 | } 75 | 76 | // Detect horizontal whitescapes before header 77 | $trimmed = ltrim($buffer); 78 | if (strlen($buffer) > strlen($trimmed)) { 79 | // Multi lines header, append to previous line 80 | $headerLines[count($headerLines)-1] .= "\x20".$trimmed; 81 | } else { 82 | $headerLines[] = $buffer; 83 | } 84 | 85 | // Reset buffer 86 | $buffer = ''; 87 | } 88 | 89 | if (false === $endOfHeaders) { 90 | throw new \InvalidArgumentException('Content is not valid'); 91 | } 92 | 93 | $this->headers = []; 94 | foreach ($headerLines as $line) { 95 | // We don't allow malformed headers that could have a very long length. 96 | // Indeed, in HTTP contexts these could be used for DoS/DoW attacks by slowing down the parsing. 97 | // Most web server allow a maximum of 8192 characters for an header line, so we'll use that value. 98 | if (strlen($line) > 8192) { 99 | throw new \InvalidArgumentException('Malformed header: header value is too long'); 100 | } 101 | 102 | $lineSplit = explode(':', $line, 2); 103 | 104 | if (2 === count($lineSplit)) { 105 | list($key, $value) = $lineSplit; 106 | // Decode value 107 | $value = mb_decode_mimeheader(trim($value)); 108 | } else { 109 | // Bogus header 110 | $key = $lineSplit[0]; 111 | $value = ''; 112 | } 113 | 114 | // Case-insensitive key 115 | $key = strtolower($key); 116 | if (false === key_exists($key, $this->headers)) { 117 | $this->headers[$key] = $value; 118 | } else { 119 | // Already got an header with this key, convert to array 120 | if (false === is_array($this->headers[$key])) { 121 | $this->headers[$key] = (array) $this->headers[$key]; 122 | } 123 | $this->headers[$key][] = $value; 124 | } 125 | } 126 | 127 | $this->bodyOffset = ftell($stream); 128 | 129 | // Is MultiPart ? 130 | if ($this->isMultiPart()) { 131 | // MultiPart ! 132 | $boundary = self::getHeaderOption($this->getHeader('Content-Type'), 'boundary'); 133 | 134 | if (null === $boundary) { 135 | throw new \InvalidArgumentException("Can't find boundary in content type"); 136 | } 137 | 138 | $separator = '--'.$boundary; 139 | 140 | $partOffset = 0; 141 | $endOfBody = false; 142 | $eofLength = 0; 143 | 144 | while ($line = fgets($this->stream, $bufferSize)) { 145 | $trimmed = rtrim($line, "\r\n"); 146 | 147 | // Search the separator 148 | if ($trimmed === $separator || $trimmed === $separator.'--') { 149 | if ($partOffset > 0) { 150 | $currentOffset = ftell($this->stream); 151 | $partLength = $currentOffset - $partOffset - strlen($line) - $eofLength; 152 | 153 | // Copy part in a new stream 154 | $partStream = fopen('php://temp', 'rw'); 155 | stream_copy_to_stream($this->stream, $partStream, $partLength, $partOffset); 156 | $this->parts[] = new self($partStream); 157 | // Reset current stream offset 158 | fseek($this->stream, $currentOffset); 159 | } 160 | 161 | if ($trimmed === $separator.'--') { 162 | // We reach the end separator 163 | $endOfBody = true; 164 | break; 165 | } 166 | 167 | // Update the part offset 168 | $partOffset = ftell($this->stream); 169 | } 170 | 171 | // Get end of line length (should be 2) 172 | $eofLength = strlen($line) - strlen($trimmed); 173 | } 174 | 175 | 176 | if (0 === count($this->parts) 177 | || false === $endOfBody 178 | ) { 179 | throw new \LogicException("Can't find multi-part content"); 180 | } 181 | } 182 | } 183 | 184 | 185 | /** 186 | * @return bool 187 | */ 188 | public function isMultiPart() 189 | { 190 | return ('multipart' === mb_strtolower(mb_strstr( 191 | self::getHeaderValue($this->getHeader('Content-Type')), 192 | '/', 193 | true 194 | ))); 195 | } 196 | 197 | /** 198 | * @return string 199 | * 200 | * @throws \LogicException if is multipart 201 | */ 202 | public function getBody() 203 | { 204 | if ($this->isMultiPart()) { 205 | throw new \LogicException("MultiPart content, there aren't body"); 206 | } 207 | 208 | $body = stream_get_contents($this->stream, -1, $this->bodyOffset); 209 | 210 | // Decode 211 | $encoding = strtolower((string) $this->getHeader('Content-Transfer-Encoding', '7bit')); 212 | switch ($encoding) { 213 | case 'base64': 214 | $body = base64_decode($body); 215 | break; 216 | case 'quoted-printable': 217 | $body = quoted_printable_decode($body); 218 | break; 219 | } 220 | 221 | // Convert to UTF-8 ( Not if binary or 7bit ( aka Ascii ) ) 222 | if (false === in_array($encoding, array('binary', '7bit'))) { 223 | // Charset 224 | $contentType = $this->getHeader('Content-Type'); 225 | $charset = self::getHeaderOption($contentType, 'charset'); 226 | if (null === $charset) { 227 | // Try to detect 228 | $charset = mb_detect_encoding($body) ?: 'utf-8'; 229 | } 230 | 231 | // Only convert if not UTF-8 232 | if ('utf-8' !== strtolower($charset)) { 233 | $body = mb_convert_encoding($body, 'utf-8', $charset); 234 | } 235 | } 236 | 237 | return $body; 238 | } 239 | 240 | /** 241 | * @return array 242 | */ 243 | public function getHeaders() 244 | { 245 | return $this->headers; 246 | } 247 | 248 | /** 249 | * @param string $key 250 | * 251 | * @param mixed $default 252 | * 253 | * @return mixed 254 | */ 255 | public function getHeader($key, $default = null) 256 | { 257 | // Case-insensitive key 258 | $key = strtolower($key); 259 | 260 | if (false === isset($this->headers[$key])) { 261 | return $default; 262 | } 263 | 264 | return $this->headers[$key]; 265 | } 266 | 267 | /** 268 | * @param string $header 269 | * 270 | * @return string 271 | */ 272 | public static function getHeaderValue($header) 273 | { 274 | list($value) = self::parseHeaderContent($header); 275 | 276 | return $value; 277 | } 278 | 279 | /** 280 | * @param string $header 281 | * 282 | * @return array 283 | */ 284 | public static function getHeaderOptions($header) 285 | { 286 | list(, $options) = self::parseHeaderContent($header); 287 | 288 | return $options; 289 | } 290 | 291 | /** 292 | * @param string $header 293 | * @param string $key 294 | * 295 | * @param mixed $default 296 | * 297 | * @return mixed 298 | */ 299 | public static function getHeaderOption($header, $key, $default = null) 300 | { 301 | $options = self::getHeaderOptions($header); 302 | 303 | if (false === isset($options[$key])) { 304 | return $default; 305 | } 306 | 307 | return $options[$key]; 308 | } 309 | 310 | /** 311 | * @return string 312 | */ 313 | public function getMimeType() 314 | { 315 | // Find Content-Disposition 316 | $contentType = $this->getHeader('Content-Type'); 317 | 318 | return self::getHeaderValue($contentType) ?: 'application/octet-stream'; 319 | } 320 | 321 | /** 322 | * @return string|null 323 | */ 324 | public function getName() 325 | { 326 | // Find Content-Disposition 327 | $contentDisposition = $this->getHeader('Content-Disposition'); 328 | 329 | return self::getHeaderOption($contentDisposition, 'name'); 330 | } 331 | 332 | /** 333 | * @return string|null 334 | */ 335 | public function getFileName() 336 | { 337 | // Find Content-Disposition 338 | $contentDisposition = $this->getHeader('Content-Disposition'); 339 | 340 | return self::getHeaderOption($contentDisposition, 'filename'); 341 | } 342 | 343 | /** 344 | * @return bool 345 | */ 346 | public function isFile() 347 | { 348 | return (false === is_null($this->getFileName())); 349 | } 350 | 351 | /** 352 | * @return StreamedPart[] 353 | * 354 | * @throws \LogicException if is not multipart 355 | */ 356 | public function getParts() 357 | { 358 | if (false === $this->isMultiPart()) { 359 | throw new \LogicException("Not MultiPart content, there aren't any parts"); 360 | } 361 | 362 | return $this->parts; 363 | } 364 | 365 | /** 366 | * @param string $name 367 | * 368 | * @return Part[] 369 | * 370 | * @throws \LogicException if is not multipart 371 | */ 372 | public function getPartsByName($name) 373 | { 374 | $parts = array(); 375 | 376 | foreach ($this->getParts() as $part) { 377 | if ($part->getName() === $name) { 378 | $parts[] = $part; 379 | } 380 | } 381 | 382 | return $parts; 383 | } 384 | 385 | /** 386 | * @param string $content 387 | * 388 | * @return array 389 | */ 390 | private static function parseHeaderContent($content) 391 | { 392 | $parts = explode(';', (string) $content); 393 | $headerValue = array_shift($parts); 394 | $options = array(); 395 | // Parse options 396 | foreach ($parts as $part) { 397 | if (false === empty($part)) { 398 | $partSplit = explode('=', $part, 2); 399 | if (2 === count($partSplit)) { 400 | list ($key, $value) = $partSplit; 401 | if ('*' === substr($key, -1)) { 402 | // RFC 5987 403 | $key = substr($key, 0, -1); 404 | if (preg_match( 405 | "/(?P[\w!#$%&+^_`{}~-]+)'(?P[\w-]*)'(?P.*)$/", 406 | $value, 407 | $matches 408 | )) { 409 | $value = mb_convert_encoding( 410 | rawurldecode($matches['value']), 411 | 'utf-8', 412 | $matches['charset'] 413 | ); 414 | } 415 | } 416 | $options[trim($key)] = trim($value, ' "'); 417 | } else { 418 | // Bogus option 419 | $options[$partSplit[0]] = ''; 420 | } 421 | } 422 | } 423 | 424 | return array($headerValue, $options); 425 | } 426 | } 427 | --------------------------------------------------------------------------------