├── .travis.yml ├── LICENSE ├── README.md ├── _config.yml ├── bin ├── detector ├── self-testing └── table-generator ├── composer.json ├── phpunit.xml ├── src ├── ContentStream.php ├── Detector.php └── TerminalInfo.php └── tests ├── ContentStreamTest.php ├── DetectorTest.php └── image.jpeg /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | 3 | php: 4 | - 5.4 5 | - 5.5 6 | - 5.6 7 | - 7.0 8 | - 7.1 9 | - 7.2 10 | - hhvm 11 | 12 | before_script: 13 | - composer install 14 | 15 | script: 16 | - vendor/bin/phpunit ./tests/ 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FileTypeDetector 2 | Files type detector based on file name extension or file content (binary content). 3 | 4 | [![Latest Stable Version](https://poser.pugx.org/wapmorgan/file-type-detector/v/stable)](https://packagist.org/packages/wapmorgan/file-type-detector) 5 | [![Total Downloads](https://poser.pugx.org/wapmorgan/file-type-detector/downloads)](https://packagist.org/packages/wapmorgan/file-type-detector) 6 | [![Latest Unstable Version](https://poser.pugx.org/wapmorgan/file-type-detector/v/unstable)](https://packagist.org/packages/wapmorgan/file-type-detector) 7 | [![License](https://poser.pugx.org/wapmorgan/file-type-detector/license)](https://packagist.org/packages/wapmorgan/file-type-detector) 8 | [![Tests](https://travis-ci.org/wapmorgan/FileTypeDetector.svg?branch=master)](https://travis-ci.org/wapmorgan/FileTypeDetector) 9 | 10 | 1. Usage 11 | 2. Installation 12 | 3. Supported formats 13 | 14 | # Usage 15 | 16 | ## File Type detection 17 | 18 | - Detection by file name: `Detector::detectByFilename(...filename...): array|boolean` 19 | - Detection by file content or stream content: `Detector::detectByContent(...filename/resource...): array|boolean` 20 | 21 | Both functions will return an `array` with following elements in case of success: 22 | 23 | - `[0]` - Type of file (`Detector::AUDIO` and so on) 24 | - `[1]` - Format of file (`Detector::MP3` and so on) 25 | - `[2]` - Mime type of file (`'audio/mpeg'` for example) 26 | 27 | In case of failure it will return `false`. 28 | 29 | Example: 30 | 31 | ```php 32 | $type = wapmorgan\FileTypeDetector\Detector::detectByFilename($filename); 33 | // or 34 | $type = wapmorgan\FileTypeDetector\Detector::detectByContent('file-without-extension'); 35 | // or 36 | $type = wapmorgan\FileTypeDetector\Detector::detectByContent(fopen('http://somedomain/somepath', 'r')); 37 | ``` 38 | 39 | ## Mimetype generation 40 | 41 | To get correct mimetype for file only there is `getMimeType($file)` function. 42 | 43 | ```php 44 | $mime = wapmorgan\FileTypeDetector\Detector::getMimeType($file); 45 | // or 46 | $mime = wapmorgan\FileTypeDetector\Detector::getMimeType(fopen('somefile', 'r')); 47 | ``` 48 | 49 | # Installation 50 | Install package via composer: 51 | ``` 52 | composer require wapmorgan/file-type-detector 53 | ``` 54 | 55 | # Supported formats 56 | 57 | Available to use types and their formats. 58 | 59 | | Application | Archive | Audio | Database | Disk_image | Document | Feed | Font | Image | Presentation | Scenario | Spreadsheet | Video | 60 | |-------------|---------|-------|----------|------------|----------|------|------|-------|--------------|----------|-------------|-------| 61 | | apk | 7z | aac | accdb | iso | doc | atom | otf | bmp | odp | reg | csv | 3gp | 62 | | com | arc | amr | mdb | nrg | docx | rss | ttf | gif | ppt | | ods | asf | 63 | | exe | arj | flac | odb | vhd | html | | | ico | pptx | | tsv | avi | 64 | | xap | bzip2 | m3u | sqlite | | json | | | jpeg | | | xls | flv | 65 | | | cab | midi | | | markdown | | | png | | | xlsx | m4v | 66 | | | dar | mp3 | | | odt | | | psd | | | | mkv | 67 | | | gzip | ogg | | | pdf | | | tiff | | | | mov | 68 | | | jar | wav | | | rtf | | | | | | | mp4 | 69 | | | lzma2 | wma | | | txt | | | | | | | mpeg | 70 | | | rar | | | | xml | | | | | | | swf | 71 | | | tar | | | | yaml | | | | | | | vob | 72 | | | zip | | | | | | | | | | | webm | 73 | | | | | | | | | | | | | | wmv | 74 | 75 | Formats support status. 76 | 77 | | Format | Extension | Detection by content | MimeType | Signature | 78 | |----------|-----------|----------------------|---------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 79 | | 3gp | 3gp | + | video/3gpp | at [0]: (0x0001466747970336770) | 80 | | 7z | 7z | + | application/x-7z-compressed | at [0]: (0x377abcaf271c) | 81 | | Aac | aac | + | audio/x-aac | at [0]: (0xfff1) / at [0]: (0xfff9) | 82 | | Accdb | accdb | + | application/x-msaccess | at [0]: (0x01005374616e6461726420414345204442) | 83 | | Amr | amr | + | audio/amr | at [0]: (0x2321414d52) | 84 | | Apk | apk | + | application/vnd.android.package-archive | at [0]: (0x504b34) & at [30]: ('AndroidManifest.xml') | 85 | | Arc | arc | + | application/x-freearc | at [0]: (0x4172431) | 86 | | Arj | arj | + | application/arj | at [0]: (0x60ea) | 87 | | Asf | asf | - | - | | 88 | | Atom | atom | + | application/atom+xml | at [0]: (' $maxLength) 44 | return '...'.substr($string, strlen($string) - $maxLength + 3); 45 | return $string; 46 | } 47 | -------------------------------------------------------------------------------- /bin/self-testing: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | getDefaultProperties(); 9 | 10 | $all_formats = $properties['extensions']; 11 | $all_types = $properties['types']; 12 | 13 | // check for type relation 14 | $formats_with_type = array(); 15 | foreach ($all_types as $type_formats) 16 | $formats_with_type = array_merge($formats_with_type, $type_formats); 17 | 18 | $formats_without_type = array_diff($all_formats, $formats_with_type); 19 | if (!empty($formats_without_type)) { 20 | echo '! There are formats without type:'.PHP_EOL. 21 | '- '.implode(PHP_EOL.'- ', $formats_without_type).PHP_EOL; 22 | } 23 | 24 | $formats_without_mimetype = array_diff($all_formats, array_keys($properties['mimeTypes'])); 25 | if (!empty($formats_without_mimetype)) { 26 | echo '! There are formats without mimetype:'.PHP_EOL. 27 | '- '.implode(PHP_EOL.'- ', $formats_without_mimetype).PHP_EOL; 28 | } 29 | 30 | $formats_without_signature = array_diff($all_formats, array_keys($properties['signatures'])); 31 | if (!empty($formats_without_signature)) { 32 | echo '! There are formats without signature:'.PHP_EOL. 33 | '- '.implode(PHP_EOL.'- ', $formats_without_signature).PHP_EOL; 34 | } 35 | -------------------------------------------------------------------------------- /bin/table-generator: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | getDefaultProperties(); 9 | 10 | $all_formats = $properties['extensions']; 11 | $all_types = $properties['types']; 12 | 13 | function bytesArray2String(array $bytes) { 14 | $output = null; 15 | foreach ($bytes as $byte) { 16 | $output .= dechex($byte); 17 | } 18 | return $output; 19 | } 20 | 21 | if (isset($argv[1]) && in_array($argv[1], array('types', 'support'))) { 22 | $table = $argv[1]; 23 | } else { 24 | echo 'Table type (write "types" or "support"): '; 25 | $input = trim(fgets(STDIN)); 26 | if (!in_array($input, array('types', 'support'))) 27 | die('Sorry, but input is invalid'.PHP_EOL); 28 | $table = $input; 29 | } 30 | 31 | switch ($table) { 32 | case 'types': 33 | 34 | ksort($all_types); 35 | 36 | $i = 0; 37 | foreach ($all_types as $type => $type_formats) { 38 | echo ($i++ > 0 ? ',' : null).ucfirst($type); 39 | sort($type_formats); 40 | $all_types[$type] = $type_formats; 41 | } 42 | echo PHP_EOL; 43 | 44 | $max_count = max(array_map(function ($formats) { return count($formats); }, $all_types)); 45 | 46 | for ($i = 0; $i < $max_count; $i++) { 47 | $j = 0; 48 | foreach ($all_types as $type => $type_formats) { 49 | echo ($j++ > 0 ? ',' : null).(isset($type_formats[$i]) ? $type_formats[$i] : null); 50 | } 51 | echo PHP_EOL; 52 | } 53 | 54 | break; 55 | 56 | case 'support': 57 | 58 | $mimetypes = $properties['mimeTypes']; 59 | $signatures = $properties['signatures']; 60 | 61 | echo 'Format,Extension,"Detection by content",MimeType,Signature'.PHP_EOL; 62 | 63 | asort($all_formats); 64 | 65 | foreach ($all_formats as $extension => $format) { 66 | echo ucfirst($format).','.$extension.','.(isset($signatures[$format]) ? '+' : '-').','.(isset($mimetypes[$format]) ? $mimetypes[$format] : '-').','; 67 | if (isset($signatures[$format])) { 68 | foreach ($signatures[$format] as $j => $format_signature) { 69 | if ($j > 0) echo ' / '; 70 | $i = 0; 71 | foreach ($format_signature as $offset => $signature_part) { 72 | if ($i++ > 0) echo ' & '; 73 | echo 'at ['.$offset.']: ('; 74 | if (is_string($signature_part)) echo '\''.$signature_part.'\''; 75 | else { 76 | if (isset($signature_part['bytes'])) { 77 | echo 'fuzzy search \''.implode(null, $signature_part['bytes']).'\''; 78 | } 79 | else if (is_integer($signature_part[0])) echo '0x'.bytesArray2String($signature_part); 80 | else echo '\''.implode(null, $signature_part).'\''; 81 | } 82 | echo ') '; 83 | } 84 | } 85 | } 86 | echo PHP_EOL; 87 | } 88 | 89 | break; 90 | } 91 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wapmorgan/file-type-detector", 3 | "description": "Detects file type by filename or content and generates correct mimetype.", 4 | "keywords": ["mimetype", "files", "formats", "filetype"], 5 | "license": "MIT", 6 | "autoload": { 7 | "psr-4": { 8 | "wapmorgan\\FileTypeDetector\\": "src/" 9 | } 10 | }, 11 | "autoload-dev": { 12 | "psr-4": { 13 | "wapmorgan\\FileTypeDetector\\": "tests/" 14 | } 15 | }, 16 | "require-dev": { 17 | "phpunit/phpunit": "^4.8" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | tests 7 | 8 | 9 | 10 | 11 | src 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/ContentStream.php: -------------------------------------------------------------------------------- 1 | fp = fopen($source, 'rb'); 15 | } 16 | // open stream 17 | else if (is_resource($source) && get_resource_type($source) == 'stream') { 18 | $this->fp = $source; 19 | $this->openedOutside = true; 20 | // cache all data if stream is not seekable 21 | $meta = stream_get_meta_data($source); 22 | if (!$meta['seekable']) { 23 | while (!feof($source)) 24 | $this->read[] = ord(fgetc($source)); 25 | } 26 | } else { 27 | throw new \Exception('Unknown source: '.var_export($source, true).' ('.gettype($source).')'); 28 | } 29 | } 30 | 31 | public function checkBytes($offset, $ethalon) { 32 | if ($offset < 0) { 33 | $stat = fstat($this->fp); 34 | $offset = $stat['size'] + $offset; 35 | } 36 | if (!is_array($ethalon)) $ethalon = $this->convertToBytes($ethalon); 37 | foreach ($ethalon as $i => $byte) { 38 | if (!isset($this->read[$offset+$i])) { 39 | fseek($this->fp, $offset+$i, SEEK_SET); 40 | $this->read[$offset+$i] = ord(fgetc($this->fp)); 41 | } 42 | if ($this->read[$offset+$i] !== $byte) 43 | return false; 44 | } 45 | return true; 46 | } 47 | 48 | public function convertToBytes($string) { 49 | $bytes = array(); 50 | $l = strlen($string); 51 | for ($i = 0; $i < $l; $i++) 52 | $bytes[$i] = ord($string[$i]); 53 | return $bytes; 54 | } 55 | 56 | public function find($offset, array $bytes, $maxDepth = 512, $reverse = false) { 57 | if ($offset < 0) { 58 | $stat = fstat($this->fp); 59 | $offset = $stat['size'] + $offset; 60 | } 61 | $i = 0; 62 | while (abs($i) <= $maxDepth) { 63 | $i = $reverse ? $i - 1 : $i + 1; 64 | 65 | if (!isset($this->read[$offset+$i])) { 66 | fseek($this->fp, $offset+$i, SEEK_SET); 67 | $this->read[$offset+$i] = ord(fgetc($this->fp)); 68 | } 69 | 70 | foreach ($bytes as $j => $byte) { 71 | if (is_string($byte)) $byte = ord($byte); 72 | if ($this->read[$offset+$i+$j] != $byte) 73 | continue(2); 74 | 75 | } 76 | return true; 77 | } 78 | return false; 79 | } 80 | 81 | public function __destruct() { 82 | if (!$this->openedOutside) 83 | fclose($this->fp); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/Detector.php: -------------------------------------------------------------------------------- 1 | self::JPEG, 110 | 'tif' => self::TIFF, 111 | 'mpg' => self::MPEG, 112 | 'mpe' => self::MPEG, 113 | 'm4a' => self::AAC, 114 | 'yml' => self::YAML, 115 | 'md' => self::MARKDOWN, 116 | 'mid' => self::MIDI, 117 | ); 118 | 119 | protected static $extensions = array( 120 | 'jpeg' => self::JPEG, 121 | 'bmp' => self::BMP, 122 | 'gif' => self::GIF, 123 | 'png' => self::PNG, 124 | 'tiff' => self::TIFF, 125 | 'psd' => self::PSD, 126 | 'ico' => self::ICO, 127 | 'arj' => self::ARJ, 128 | 'bz2' => self::BZIP2, 129 | 'gz' => self::GZIP, 130 | 'xz' => self::LZMA2, 131 | '7z' => self::_7ZIP, 132 | 'cab' => self::CAB, 133 | 'jar' => self::JAR, 134 | 'rar' => self::RAR, 135 | 'tar' => self::TAR, 136 | 'zip' => self::ZIP, 137 | 'arc' => self::ARC, 138 | 'dar' => self::DAR, 139 | 'iso' => self::ISO, 140 | 'nrg' => self::NRG, 141 | 'vhd' => self::VHD, 142 | 'accdb' => self::ACCDB, 143 | 'mdb' => self::MDB, 144 | 'odb' => self::ODB, 145 | 'doc' => self::DOC, 146 | 'docx' => self::DOCX, 147 | 'html' => self::HTML, 148 | 'odt' => self::ODT, 149 | 'pdf' => self::PDF, 150 | 'rtf' => self::RTF, 151 | 'txt' => self::TXT, 152 | 'md' => self::MARKDOWN, 153 | 'json' => self::JSON, 154 | 'yaml' => self::YAML, 155 | 'xml' => self::XML, 156 | 'atom' => self::ATOM, 157 | 'rss' => self::RSS, 158 | 'otf' => self::OTF, 159 | 'ttf' => self::TTF, 160 | 'apk' => self::APK, 161 | 'com' => self::COM, 162 | 'exe' => self::EXE, 163 | 'xap' => self::XAP, 164 | 'ppt' => self::PPT, 165 | 'pptx' => self::PPTX, 166 | 'odp' => self::ODP, 167 | 'flac' => self::FLAC, 168 | 'wma' => self::WMA, 169 | 'amr' => self::AMR, 170 | 'mp3' => self::MP3, 171 | 'aac' => self::AAC, 172 | 'm3u' => self::M3U, 173 | 'ogg' => self::OGG, 174 | 'wav' => self::WAV, 175 | 'midi' => self::MIDI, 176 | 'ods' => self::ODS, 177 | 'xls' => self::XLS, 178 | 'xlsx' => self::XLSX, 179 | 'csv' => self::CSV, 180 | 'tsv' => self::TSV, 181 | '3gp' => self::_3GP, 182 | 'asf' => self::ASF, 183 | 'avi' => self::AVI, 184 | 'flv' => self::FLV, 185 | 'm4v' => self::M4V, 186 | 'mkv' => self::MKV, 187 | 'mov' => self::MOV, 188 | 'mpeg' => self::MPEG, 189 | 'mp4' => self::MP4, 190 | 'swf' => self::SWF, 191 | 'vob' => self::VOB, 192 | 'wmv' => self::WMV, 193 | 'webm' => self::WEBM, 194 | 'reg' => self::REG, 195 | ); 196 | 197 | protected static $types = array( 198 | self::IMAGE => array( 199 | self::JPEG, 200 | self::BMP, 201 | self::GIF, 202 | self::PNG, 203 | self::TIFF, 204 | self::PSD, 205 | self::ICO, 206 | ), 207 | 208 | self::ARCHIVE => array( 209 | self::ARJ, 210 | self::BZIP2, 211 | self::GZIP, 212 | self::LZMA2, 213 | self::_7ZIP, 214 | self::CAB, 215 | self::JAR, 216 | self::RAR, 217 | self::TAR, 218 | self::ZIP, 219 | self::ARC, 220 | self::DAR, 221 | ), 222 | 223 | self::DISK_IMAGE => array( 224 | self::ISO, 225 | self::NRG, 226 | self::VHD, 227 | ), 228 | 229 | self::DATABASE => array( 230 | self::ACCDB, 231 | self::MDB, 232 | self::ODB, 233 | self::SQLITE, 234 | ), 235 | 236 | self::DOCUMENT => array( 237 | self::DOC, 238 | self::DOCX, 239 | self::HTML, 240 | self::ODT, 241 | self::PDF, 242 | self::RTF, 243 | self::TXT, 244 | self::MARKDOWN, 245 | self::JSON, 246 | self::YAML, 247 | self::XML, 248 | ), 249 | 250 | self::FEED => array( 251 | self::ATOM, 252 | self::RSS, 253 | ), 254 | 255 | self::FONT => array( 256 | self::OTF, 257 | self::TTF, 258 | ), 259 | 260 | self::APPLICATION => array( 261 | self::APK, 262 | self::COM, 263 | self::EXE, 264 | self::XAP, 265 | ), 266 | 267 | self::PRESENTATION => array( 268 | self::PPT, 269 | self::PPTX, 270 | self::ODP, 271 | ), 272 | 273 | self::AUDIO => array( 274 | self::FLAC, 275 | self::WMA, 276 | self::AMR, 277 | self::MP3, 278 | self::AAC, 279 | self::M3U, 280 | self::OGG, 281 | self::WAV, 282 | self::MIDI, 283 | ), 284 | 285 | self::SPREADSHEET => array( 286 | self::ODS, 287 | self::XLS, 288 | self::XLSX, 289 | self::CSV, 290 | self::TSV, 291 | ), 292 | 293 | self::VIDEO => array( 294 | self::_3GP, 295 | self::ASF, 296 | self::AVI, 297 | self::FLV, 298 | self::M4V, 299 | self::MKV, 300 | self::MOV, 301 | self::MPEG, 302 | self::MP4, 303 | self::SWF, 304 | self::VOB, 305 | self::WMV, 306 | self::WEBM, 307 | ), 308 | 309 | self::SCENARIO => array( 310 | self::REG, 311 | ), 312 | ); 313 | 314 | protected static $mimeTypes = array( 315 | self::JPEG => 'image/jpeg', 316 | self::BMP => 'image/bmp', 317 | self::GIF => 'image/gif', 318 | self::PNG => 'image/png', 319 | self::TIFF => 'image/tiff', 320 | self::PSD => 'image/vnd.adobe.photoshop', 321 | self::ICO => 'image/x-icon', 322 | 323 | self::ARJ => 'application/arj', 324 | self::BZIP2 => 'application/x-bzip2', 325 | self::GZIP => 'application/gzip', 326 | self::_7ZIP => 'application/x-7z-compressed', 327 | self::LZMA2 => 'application/x-xz', 328 | self::CAB => 'application/vnd.ms-cab-compressed', 329 | self::JAR => 'application/java-archive', 330 | self::RAR => 'application/x-rar-compressed', 331 | self::TAR => 'application/x-tar', 332 | self::ZIP => 'application/zip', 333 | self::ARC => 'application/x-freearc', 334 | self::DAR => 'application/x-dar', 335 | 336 | self::ISO => 'application/x-iso9660-image', 337 | 338 | self::ACCDB => 'application/x-msaccess', 339 | self::MDB => 'application/x-msaccess', 340 | self::ODB => 'application/vnd.oasis.opendocument.database', 341 | self::SQLITE => 'application/x-sqlite3', 342 | 343 | self::DOC => 'application/msword', 344 | self::DOCX => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 345 | self::HTML => 'text/html', 346 | self::ODT => 'application/vnd.oasis.opendocument.text', 347 | self::PDF => 'application/pdf', 348 | self::RTF => 'application/rtf', 349 | self::TXT => 'text/plain', 350 | self::MARKDOWN => 'text/markdown', 351 | self::YAML => 'text/yaml', 352 | self::JSON => 'application/json', 353 | self::XML => 'application/xml', 354 | 355 | self::ATOM => 'application/atom+xml', 356 | self::RSS => 'application/rss+xml', 357 | 358 | self::OTF => 'application/x-font-otf', 359 | self::TTF => 'application/x-font-ttf', 360 | 361 | self::APK => 'application/vnd.android.package-archive', 362 | self::COM => 'application/x-msdownload', 363 | self::EXE => 'application/x-msdownload', 364 | self::XAP => 'application/x-silverlight-app', 365 | 366 | self::PPT => 'application/vnd.ms-powerpoint', 367 | self::PPTX => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 368 | self::ODP => 'application/vnd.oasis.opendocument.presentation', 369 | 370 | self::FLAC => 'audio/x-flac', 371 | self::WMA => 'audio/x-ms-wma', 372 | self::AMR => 'audio/amr', 373 | self::MP3 => 'audio/mpeg', 374 | self::AAC => 'audio/x-aac', 375 | self::M3U => 'audio/x-mpegurl', 376 | self::OGG => 'audio/ogg', 377 | self::WAV => 'audio/x-wav', 378 | self::MIDI => 'audio/midi', 379 | 380 | self::ODS => 'application/vnd.oasis.opendocument.spreadsheet', 381 | self::XLS => 'application/vnd.ms-excel', 382 | self::XLSX => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 383 | self::CSV => 'text/csv', 384 | self::TSV => 'text/tab-separated-values', 385 | 386 | self::_3GP => 'video/3gpp', 387 | self::AVI => 'video/x-msvideo', 388 | self::FLV => 'video/x-flv', 389 | self::M4V => 'video/x-m4v', 390 | self::MKV => 'video/x-matroska', 391 | self::MOV => 'video/quicktime', 392 | self::MPEG => 'video/mpeg', 393 | self::MP4 => 'video/mp4', 394 | self::SWF => 'application/x-shockwave-flash', 395 | self::VOB => 'video/x-ms-vob', 396 | self::WMV => 'video/x-ms-wmv', 397 | self::WEBM => 'video/webm', 398 | 399 | self::REG => 'text/plain', 400 | ); 401 | 402 | protected static $signatures = [ 403 | // Images signatures 404 | self::JPEG => [[0 => [0xFF, 0xD8, 0xFF, 0xE0]]], 405 | self::BMP => [[0 => [0x42, 0x4D]]], 406 | self::GIF => [ 407 | [0 => [0x47, 0x49, 0x46, 0x38, 0x37, 0x61]], 408 | // or 409 | [0 => [0x47, 0x49, 0x46, 0x38, 0x39, 0x61]] 410 | ], 411 | self::PNG => [[0 => [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]]], 412 | self::TIFF => [ 413 | [0 => [0x49, 0x20, 0x49]], 414 | // or 415 | [0 => [0x49, 0x49, 0x2A, 0x00]], 416 | // or 417 | [0 => [0x4D, 0x4D, 0x00, 0x2A]], 418 | // or 419 | [0 => [0x4D, 0x4D, 0x00, 0x2B]] 420 | ], 421 | self::PSD => [[0 => [0x38, 0x42, 0x50, 0x53]]], 422 | self::ICO => [[0 => [0x00, 0x00, 0x01, 0x00]]], 423 | 424 | // Archives signatures 425 | self::ARJ => [[0 => [0x60, 0xEA]]], 426 | self::BZIP2 => [[0 => [0x42, 0x5A, 0x68]]], 427 | self::GZIP => [[0 => [0x1F, 0x8B]]], 428 | self::_7ZIP => [[0 => [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]]], 429 | self::CAB => [[0 => [0x4D, 0x53, 0x43, 0x46]]], 430 | self::JAR => [ 431 | [0 => [0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00, 0x08, 0x00]], 432 | // or 433 | [0 => [0x5F, 0x27, 0xA8, 0x89]] 434 | ], 435 | self::RAR => [ 436 | [0 => [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00]], 437 | // or 438 | [0 => [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00]] 439 | ], 440 | self::TAR => [ 441 | [0 => [0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30]], 442 | // or 443 | [0 => [0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00]] 444 | ], 445 | self::ARC => [[0 => [0x41, 0x72, 0x43, 0x01]]], 446 | self::DAR => [[0 => [0x00, 0x00, 0x00, 0x7B]]], 447 | 448 | // Disk images signatures 449 | self::ISO => [[0 => [0x43, 0x44, 0x30, 0x30, 0x31]]], 450 | self::NRG => [ 451 | [-8 => ['N', 'E', 'R', 'O']], 452 | // or 453 | [-12 => ['N', 'E', 'R', '5']] 454 | ], 455 | 456 | // Spreadsheets signatures 457 | self::ACCDB => [[0 => [0x00, 0x01, 0x00, 0x00, 0x53, 0x74, 0x61, 0x6E, 0x64, 0x61, 0x72, 0x64, 0x20, 0x41, 0x43, 0x45, 0x20, 0x44, 0x42]]], 458 | self::MDB => [[0 => [0x00, 0x01, 0x00, 0x00, 0x53, 0x74, 0x61, 0x6E, 0x64, 0x61, 0x72, 0x64, 0x20, 0x4A, 0x65, 0x74, 0x20, 0x44, 0x42]]], 459 | self::SQLITE => [[0 => [0x53, 0x51, 0x4C, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6F, 0x72, 0x6D, 0x61, 0x74, 0x20, 0x33, 0x00]]], 460 | 461 | // Microsoft Office old formats (doc, xls, ppt) 462 | self::DOC => [ 463 | [ 464 | 0 => [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 465 | // and 466 | 512 => [0xEC, 0xA5, 0xC1, 0x00], 467 | ] 468 | ], 469 | self::XLS => [ 470 | [ 471 | 0 => [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 472 | // and 473 | 512 => [0x09, 0x08, 0x10, 0x00, 0x00, 0x06, 0x05, 0x00], 474 | ] 475 | ], 476 | self::PPT => [ 477 | [ 478 | 0 => [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 479 | // and 480 | 512 => [0xA0, 0x46, 0x1D, 0xF0] 481 | ], 482 | // or 483 | [ 484 | 0 => [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 485 | // and 486 | 512 => [0x00, 0x6E, 0x1E, 0xF0] 487 | ], 488 | // or 489 | [ 490 | 0 => [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 491 | // and 492 | 512 => [0x0F, 0x00, 0xE8, 0x03] 493 | ] 494 | ], 495 | 496 | // Microsoft Office new formats (docx, xlsx, pptx) 497 | self::DOCX => [ 498 | [ 499 | 0 => [0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x06, 0x00], 500 | // and 501 | // search for substring at the end of file 502 | -22 => [ 503 | 'bytes' => ['w', 'o', 'r', 'd', '/'], 504 | 'depth' => 512, 505 | 'reverse' => true 506 | ] 507 | ] 508 | ], 509 | self::XLSX => [ 510 | [ 511 | 0 => [0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x06, 0x00], 512 | // and 513 | // search for substring at the end of file 514 | -22 => [ 515 | 'bytes' => ['x', 'l', '/'], 516 | 'depth' => 512, 517 | 'reverse' => true 518 | ] 519 | ] 520 | ], 521 | self::PPTX => [ 522 | [ 523 | 0 => [0x50, 0x4B, 0x03, 0x04, 0x14, 0x00, 0x06, 0x00], 524 | // and 525 | // search for substring at the end of file 526 | -22 => [ 527 | 'bytes' => ['p', 'p', 't', '/'], 528 | 'depth' => 512, 529 | 'reverse' => true 530 | ] 531 | ] 532 | ], 533 | 534 | // Open Alliance formats 535 | self::ODT => [ 536 | [ 537 | 0 => [0x50, 0x4B, 0x03, 0x04], 538 | // and 539 | 30 => ['m', 'i', 'm', 'e', 't', 'y', 'p', 'e', 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'v', 'n', 'd', '.', 'o', 'a', 's', 'i', 's', '.', 'o', 'p', 'e', 'n', 'd', 'o', 'c', 'u', 'm', 'e', 'n', 't', '.'], 540 | // and 541 | 73 => ['t', 'e', 'x', 't'], 542 | ] 543 | ], 544 | self::ODS => [ 545 | [ 546 | 0 => [0x50, 0x4B, 0x03, 0x04], 547 | // and 548 | 30 => ['m', 'i', 'm', 'e', 't', 'y', 'p', 'e', 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'v', 'n', 'd', '.', 'o', 'a', 's', 'i', 's', '.', 'o', 'p', 'e', 'n', 'd', 'o', 'c', 'u', 'm', 'e', 'n', 't', '.'], 549 | // and 550 | 73 => ['s', 'p', 'r', 'e', 'a', 'd', 's', 'h', 'e', 'e', 't'], 551 | ] 552 | ], 553 | self::ODP => [ 554 | [ 555 | 0 => [0x50, 0x4B, 0x03, 0x04], 556 | // and 557 | 30 => ['m', 'i', 'm', 'e', 't', 'y', 'p', 'e', 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'v', 'n', 'd', '.', 'o', 'a', 's', 'i', 's', '.', 'o', 'p', 'e', 'n', 'd', 'o', 'c', 'u', 'm', 'e', 'n', 't', '.'], 558 | // and 559 | 73 => ['p', 'r', 'e', 's', 'e', 'n', 't', 'a', 't', 'i', 'o', 'n'], 560 | ] 561 | ], 562 | self::ODB => [ 563 | [ 564 | 0 => [0x50, 0x4B, 0x03, 0x04], 565 | // and 566 | 30 => ['m', 'i', 'm', 'e', 't', 'y', 'p', 'e', 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'v', 'n', 'd', '.', 'o', 'a', 's', 'i', 's', '.', 'o', 'p', 'e', 'n', 'd', 'o', 'c', 'u', 'm', 'e', 'n', 't', '.'], 567 | // and 568 | 73 => ['b', 'a', 's', 'e'], 569 | ] 570 | ], 571 | 572 | // Text formats 573 | self::HTML => [[0 => ' [[0 => [0x25, 0x50, 0x44, 0x46]]], 575 | self::RTF => [[0 => [0x7B, 0x5C, 0x72, 0x74, 0x66, 0x31]]], 576 | self::ATOM => [[ 577 | 0 => ' [ 581 | 'bytes' => ['A', 't', 'o', 'm'], 582 | 'depth' => 100 583 | ] 584 | ]], 585 | self::RSS => [[ 586 | 0 => ' [ 589 | 'bytes' => ['<', 'r', 's', 's'], 590 | 'depth' => 100 591 | ] 592 | ]], 593 | // make sure xml at the end of Text's section 594 | self::XML => [[0 => ' [[0 => [0x4F, 0x54, 0x54, 0x4F]]], 598 | self::TTF => [[0 => [0x00, 0x01, 0x00, 0x00, 0x00]]], 599 | 600 | // Executables formats 601 | self::APK => [[ 602 | 0 => [0x50, 0x4B, 0x03, 0x04], 603 | // and 604 | 30 => ['A', 'n', 'd', 'r', 'o', 'i', 'd', 'M', 'a', 'n', 'i', 'f', 'e', 's', 't', '.', 'x', 'm', 'l'], 605 | ]], 606 | self::EXE => [[0 => [0x4D, 0x5A]]], 607 | 608 | // Audios formats 609 | self::FLAC => [[0 => [0x66, 0x4C, 0x61, 0x43, 0x00, 0x00, 0x00, 0x22]]], 610 | self::AMR => [[0 => [0x23, 0x21, 0x41, 0x4D, 0x52]]], 611 | self::MP3 => [[0 => [0x49, 0x44, 0x33]]], 612 | self::AAC => [ 613 | [0 => [0xFF, 0xF1]], 614 | // or 615 | [0 => [0xFF, 0xF9]] 616 | ], 617 | self::M3U => [[0 => ['#', 'E', 'X', 'T', 'M', '3', 'U']]], 618 | self::OGG => [[0 => ['O', 'g', 'g', 'S']]], 619 | self::MIDI => [[0 => [0x4D, 0x54, 0x68, 0x64]]], 620 | 621 | self::_3GP => [[0 => [0x00, 0x00, 0x00, 0x14, 0x66, 0x74, 0x79, 0x70, 0x33, 0x67, 0x70]]], 622 | self::AVI => [[ 623 | 0 => [0x52, 0x49, 0x46, 0x46], 624 | // and 625 | 8 => [0x41, 0x56, 0x49, 0x20, 0x4C, 0x49, 0x53, 0x54] 626 | ]], 627 | self::FLV => [[0 => [0x46, 0x4C, 0x56, 0x01]]], 628 | self::M4V => [[0 => [0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x6D, 0x70, 0x34, 0x32]]], 629 | self::MKV => [[0 => [0x1A, 0x45, 0xDF, 0xA3, 0x93, 0x42, 0x82, 0x88, 0x6D, 0x61, 0x74, 0x72, 0x6F, 0x73, 0x6B, 0x61]]], 630 | self::MOV => [ 631 | [4 => [0x66, 0x74, 0x79, 0x70, 0x71, 0x74, 0x20, 0x20]], 632 | // or 633 | [4 => [0x6D, 0x6F, 0x6F, 0x76]] 634 | ], 635 | self::MP4 => [ 636 | [4 => [0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6F, 0x6D]], 637 | // or 638 | [4 => [0x66, 0x74, 0x79, 0x70, 0x33, 0x67, 0x70, 0x35]], 639 | // or 640 | [4 => [0x66, 0x74, 0x79, 0x70, 0x4D, 0x53, 0x4E, 0x56]], 641 | // or 642 | [4 => [0x66, 0x74, 0x79, 0x70, 0x4D, 0x34, 0x41, 0x20]] 643 | ], 644 | self::MPEG => [[ 645 | 0 => [0x00, 0x00, 0x01], 646 | // and 647 | -4 => [0x00, 0x00, 0x01, 0xB7] 648 | ]], 649 | self::SWF => [[0 => [0x5A, 0x57, 0x53]]], 650 | self::VOB => [[ 651 | 0 => [0x00, 0x00, 0x01, 0xBA], 652 | // and 653 | -4 => [0x00, 0x00, 0x01, 0xB9] 654 | ]], 655 | self::WEBM => [[0 => [0x1A, 0x45, 0xDF, 0xA3]]], 656 | 657 | // zip is a container for a lot of formats 658 | self::ZIP => [ 659 | [0 => [0x50, 0x4B, 0x03, 0x04]], 660 | // or 661 | [0 => [0x50, 0x4B, 0x05, 0x06]], 662 | // or 663 | [0 => [0x50, 0x4B, 0x07, 0x08]] 664 | ], 665 | 666 | // Scneraios formats 667 | self::REG => [ 668 | [0 => [0xFF, 0xFE]], 669 | // or 670 | [0 => [0x52, 0x45, 0x47, 0x45, 0x44, 0x49, 0x54]] 671 | ] 672 | ]; 673 | 674 | public static function detectByFilename($filename) { 675 | $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); 676 | if (isset(self::$aliases[$ext])) $ext = self::$aliases[$ext]; 677 | if (isset(self::$extensions[$ext])) { 678 | $format = array(null, self::$extensions[$ext]); 679 | foreach (self::$types as $type => $formats) { 680 | if (in_array($format[1], $formats)) { 681 | $format[0] = $type; 682 | break; 683 | } 684 | } 685 | $format[2] = isset(self::$mimeTypes[$format[1]]) ? self::$mimeTypes[$format[1]] : false; 686 | return $format; 687 | } 688 | return false; 689 | } 690 | 691 | public static function detectByContent($source) { 692 | $stream = new ContentStream($source); 693 | foreach (self::$signatures as $format => $signatures) { 694 | foreach ($signatures as $or_signature) { 695 | $passed = true; 696 | foreach ($or_signature as $offset => $and_signature) { 697 | // search for substring in range 698 | if (isset($and_signature['bytes'])) { 699 | if ($stream->find($offset, $and_signature['bytes'], 700 | isset($and_signature['depth']) ? $and_signature['depth'] : 512, 701 | isset($and_signature['reverse']) ? $and_signature['reverse'] : false 702 | ) === false) { 703 | $passed = false; 704 | break; 705 | } 706 | } 707 | // exact match 708 | else { 709 | if ($stream->checkBytes($offset, $and_signature) === false) { 710 | $passed = false; 711 | break; 712 | } 713 | } 714 | } 715 | // if earlier we did not break inner loop, then all signatures matched 716 | if ($passed) { 717 | $format = array(null, $format); 718 | foreach (self::$types as $type => $formats) { 719 | if (in_array($format[1], $formats)) { 720 | $format[0] = $type; 721 | break; 722 | } 723 | } 724 | $format[2] = isset(self::$mimeTypes[$format[1]]) ? self::$mimeTypes[$format[1]] : false; 725 | return $format; 726 | } 727 | } 728 | } 729 | return false; 730 | } 731 | 732 | public static function getMimeType($file) { 733 | $format = is_resource($file) ? self::detectByContent($file) : self::detectByFilename($file); 734 | if ($format === false) 735 | return false; 736 | return $format[2]; 737 | } 738 | } 739 | -------------------------------------------------------------------------------- /src/TerminalInfo.php: -------------------------------------------------------------------------------- 1 | assertFalse($contentStream->find(0, [128])); 21 | unset($contentStream); 22 | } 23 | 24 | public function testFindWithNegativeOffset() { 25 | $contentStream = new ContentStream(__DIR__. '/image.jpeg'); 26 | 27 | $this->assertFalse($contentStream->find(-1, [128])); 28 | unset($contentStream); 29 | } 30 | 31 | public function testFindWithReverseIsTrue() { 32 | $contentStream = new ContentStream(__DIR__. '/image.jpeg'); 33 | 34 | $this->assertTrue($contentStream->find(-1, [128], 512, true)); 35 | unset($contentStream); 36 | } 37 | } -------------------------------------------------------------------------------- /tests/DetectorTest.php: -------------------------------------------------------------------------------- 1 | assertEquals($expectedType, Detector::detectByFilename($filename)); 13 | } 14 | 15 | public function testDetectionByFilenameShouldReturnFalse() { 16 | $this->assertFalse(Detector::detectByFilename('invalid_file')); 17 | } 18 | 19 | public function filenamesWithTypes() { 20 | return array( 21 | array('image.jpg', array(Detector::IMAGE, Detector::JPEG, 'image/jpeg')), 22 | array('music.mp3', array(Detector::AUDIO, Detector::MP3, 'audio/mpeg')) 23 | ); 24 | } 25 | 26 | /** 27 | * @dataProvider streamsWithTypes() 28 | */ 29 | public function testDetectionByContent($binary, $expectedType) { 30 | $fp = fopen('php://temp', 'r+'); 31 | if (is_array($binary)) $binary = implode(null, array_map(function ($code) { return chr($code); }, $binary)); 32 | fwrite($fp, $binary); 33 | rewind($fp); 34 | $this->assertEquals($expectedType, Detector::detectByContent($fp)); 35 | fclose($fp); 36 | } 37 | 38 | public function testDetectionByContentShouldReturnFalse() { 39 | $fp = fopen('php://temp', 'r'); 40 | $this->assertFalse(Detector::detectByContent($fp)); 41 | fclose($fp); 42 | } 43 | 44 | public function streamsWithTypes() { 45 | return array( 46 | array(array(0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A), array(Detector::IMAGE, Detector::PNG, 'image/png')), 47 | array(array(0x1F, 0x8B), array(Detector::ARCHIVE, Detector::GZIP, 'application/gzip')) 48 | ); 49 | } 50 | 51 | /** 52 | * @dataProvider filenamesWithTypes() 53 | */ 54 | public function testMimetypeGeneration($filename, $expectedType) { 55 | $this->assertEquals($expectedType[2], Detector::getMimeType($filename)); 56 | } 57 | 58 | public function testGetMimeTypeShouldReturnFalse() { 59 | $this->assertFalse(Detector::getMimeType('invalid_file')); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /tests/image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wapmorgan/FileTypeDetector/e6f7b4f9f27b1a68e0e3b3ee99535c9d4613b880/tests/image.jpeg --------------------------------------------------------------------------------