├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── application │ ├── x-7z-compressed │ └── zip ├── from_u8.rs ├── image │ ├── gif │ └── png ├── match_u8.rs └── text │ └── plain ├── src ├── basetype │ ├── check.rs │ ├── init.rs │ └── mod.rs ├── fdo_magic │ ├── builtin │ │ ├── aliases │ │ ├── check.rs │ │ ├── init.rs │ │ ├── magic │ │ ├── mod.rs │ │ └── subclasses │ ├── check.rs │ ├── mod.rs │ └── ruleset.rs ├── lib.rs └── main.rs └── tests ├── application ├── x-7z-compressed ├── x-tar └── zip ├── audio ├── flac ├── mpeg ├── ogg ├── opus └── wav ├── from_filepath.rs ├── from_u8.rs ├── image ├── bmp ├── gif ├── png ├── tiff ├── x-pcx ├── x-portable-bitmap ├── x-tga └── xbm ├── match_u8.rs └── text └── plain /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | graph.dot 4 | graph.svg 5 | 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.2.3 2 | 3 | Upgraded package versions to latest (except nom, which is currently stuck at 4 | 3.x) and fixed the paths in the doc tests 5 | 6 | # 0.2.2 7 | 8 | Yanked due to accidental breaking API change 9 | 10 | # 0.2.1 11 | 12 | Incorporated fix by Bram Sanders to prevent panic on non-existent file. 13 | 14 | # 0.2.0 15 | 16 | Major changes, front-end and back. 17 | 18 | - Added `is_alias` function 19 | - `from_*` functions excluding `from_*_node` now return MIME, not Option 20 | - New feature flag: `staticmime`. Changes type of MIME from String to &'static str 21 | - Bundled magic file, so it works on Windows as well. 22 | - Split `fdo_magic` checker into `fdo_magic::sys` and `fdo_magic::builtin` 23 | - `len` argument removed from `*_u8` functions 24 | - Tests and benchmarks added. 25 | - Fixed horribly broken logic in `fdo_magic` checker 26 | - Checks the most common types before obscure types 27 | - Changed hasher to `fnv`. 28 | - Added support for handling aliases in input 29 | - `tmagic` command has more features 30 | - Major speed improvements 31 | 32 | # 0.1.1 33 | 34 | - *Changed public interface*: Added `from_u8` export function 35 | - *Changed public interface*: Changed len argument for `u8` functions from `u32` to `usize` 36 | - Minor speed improvements in `fdo_magic` checker 37 | 38 | # 0.1.0 39 | 40 | Initial release 41 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tree_magic" 3 | version = "0.3.0" 4 | authors = ["Allison Hancock "] 5 | description = "Determines the MIME type of a file by traversing a filetype tree." 6 | repository = "https://github.com/aahancoc/tree_magic/" 7 | documentation = "https://docs.rs/tree_magic/" 8 | readme = "README.md" 9 | keywords = ["mime", "filesystem", "media-types"] 10 | license = "MIT" 11 | exclude = ["tests/*", "benches/*"] 12 | edition = "2018" 13 | publish = false 14 | 15 | [dependencies] 16 | petgraph = "^0.5" 17 | nom = "^3" 18 | lazy_static = "^1.4" 19 | fnv = "^1" 20 | mime = "^0.3" 21 | 22 | tabwriter = { version = "^1", optional = true } 23 | clap = { version = "^2", optional = true } 24 | scoped_threadpool = { version = "^0.1", optional = true } 25 | walkdir = { version = "^2", optional = true } 26 | num_cpus = { version = "^1", optional = true } 27 | 28 | [dev-dependencies] 29 | bencher = "^0.1" 30 | 31 | [features] 32 | cli = ["clap", "tabwriter", "scoped_threadpool", "walkdir", "num_cpus"] 33 | default = [] 34 | 35 | [lib] 36 | crate-type = ["lib"] 37 | path = "src/lib.rs" 38 | 39 | [[bin]] 40 | required-features = ["cli"] 41 | name = "tmagic" 42 | path = "src/main.rs" 43 | 44 | [[bench]] 45 | name = "from_u8" 46 | harness = false 47 | [[bench]] 48 | name = "match_u8" 49 | harness = false 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Aaron Hancock 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tree_magic 2 | 3 | tree_magic is a Rust crate that determines the MIME type a given file or byte stream. 4 | 5 | Read the documentation at https://docs.rs/tree_magic/ 6 | 7 | `stable` users: You may need to include the `cli` feature flag, even if you're using it as a library! (This is fixed on `nightly`) 8 | 9 | Unlike the typical approach that libmagic and file(1) uses, this loads all the file types in a tree based on subclasses. (EX: `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (MS Office 2007) subclasses `application/zip` which subclasses `application/octet-stream`) Then, instead of checking the file against *every* file type, it can traverse down the tree and only check the file types that make sense to check. (After all, the fastest check is the check that never gets run.) 10 | 11 | This library also provides the ability to check if a file is a certain type without going through the process of checking it against every file type. 12 | 13 | A simple command-line client `tmagic` is also provided that acts as a replacement for `file --mime-type`, excluding charset information. 14 | 15 | ## Performance 16 | 17 | This is fast. FAST. 18 | 19 | This is a test of my Downloads folder (sorry, can't find a good publicly available set of random files) on OpenSUSE Tumbleweed. `tmagic` was compiled with `cargo build --release`, and `file` came from the OpenSUSE repos. This is a warm run, which means I've ran both programs through a few times. System is a dual-core Intel Core i7 640M, and results were measured with `time`. 20 | 21 | Program | real | user | sys 22 | --------|------|------|----- 23 | tmagic 0.2.0 | 0m0.063s | 0m0.052s | 0m0.004s 24 | file-5.30 --mime-type | 0m0.924s | 0.800s | 0.116s 25 | 26 | There's a couple things that lead to this. Mainly: 27 | 28 | - Less types to parse due to graph approach. 29 | 30 | - First 4K of file is loaded then passed to all parsers, instead of constantly reloading from disk. (When doing that, the time was more around ~0.130s.) 31 | 32 | - The most common types (image/png, image/jpeg, application/zip, etc.) are checked before the exotic ones. 33 | 34 | - Everything that can be processed in a lazy_static! is. 35 | 36 | Nightly users can also run `cargo bench` for some benchmarks. For tree_magic 0.2.0 on the same hardware: 37 | 38 | test from_u8::application_zip ... bench: 17,086 ns/iter (+/- 845) 39 | test from_u8::image_gif ... bench: 5,027 ns/iter (+/- 520) 40 | test from_u8::image_png ... bench: 4,421 ns/iter (+/- 1,795) 41 | test from_u8::text_plain ... bench: 112,578 ns/iter (+/- 11,778) 42 | test match_u8::application_zip ... bench: 222 ns/iter (+/- 144) 43 | test match_u8::image_gif ... bench: 140 ns/iter (+/- 14) 44 | test match_u8::image_png ... bench: 139 ns/iter (+/- 18) 45 | test match_u8::text_plain ... bench: 44 ns/iter (+/- 3) 46 | 47 | However, it should be noted that the FreeDesktop.org magic files less filetypes than the magic files used by libmagic. (On my system tree_magic supports 400 types, while `/usr/share/misc/magic` contains 855 `!:mime` tags.) It is, however, significantly easier to parse, as it only covers magic numbers and not attributes or anything like that. See the TODO section for plans to fix this. 48 | 49 | ## Compatibility 50 | 51 | This has been tested using Rust Stable and Nightly on Windows 7 and OpenSUSE Tumbleweed Linux. 52 | 53 | All mime information and relation information is loaded from the Shared MIME-info Database as described at https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html. If you beleive that this is not present on your system, turn off the `sys_fdo_magic` feature flag. 54 | 55 | This provides the most common file types, but it's still missing some important ones, like LibreOffice or MS Office 2007+ support or ISO files. Expect this to improve, especially as the `zip` checker is added. 56 | 57 | ### Architecture 58 | 59 | `tree_magic` is split up into different "checker" modules. Each checker handles a certain set of filetypes, and only those. For instance, the `basetype` checker handles the `inode/*` and `text/plain` types, while the `fdo_magic` checker handles anything with a magic number. Th idea here is that instead of following the `libmagic` route of having one magic descriptor format that fits every file, we can specialize and choose the checker that suits the file format best. 60 | 61 | During library initialization, each checker is queried for the types is supports and the parent->child relations between them. During this time, the checkers can load any rules, schemas, etc. into memory. A big philosophy here is that **time during the checking phase is many times more valuable than during the init phase**. The library only gets initialized once, and the library can check thousands of files during a program's lifetime. 62 | 63 | From the list of file types and relations, a directed graph is built, and each node is added to a hash map. The library user can use these directly to find parents, children, etc. of a given MIME if needed. 64 | 65 | When a file needs to be checked against a certain MIME (match_*), each checker is queried to see if it supports that type, and if so, it runs the checker. If the checker returns true, it must be that type. 66 | 67 | When a file needs it's MIME type found (from_*), the library starts at the `all/all` node of the type graph (or whichever node the user specifies) and walks down the tree. If a match is found, it continues searching down that branch. If no match is found, it retrieves the deepest MIME type found. 68 | 69 | ## TODO 70 | 71 | ### Improve fdo-magic checker 72 | 73 | Right now the `fdo-magic` checker does not handle endianess. It also does not handle magic files stored in the user's home directory. 74 | 75 | ### Additional checkers 76 | 77 | It is planned to have custom file checking functions for many types. Here's some ideas: 78 | 79 | - `zip`: Everything that subclasses `application/zip` can be determined further by peeking at the zip's directory listing. 80 | 81 | - `grep`: Text files such as program scripts and configuration files could be parsed with a regex (or whatever works best). 82 | 83 | - `json`, `toml`, `xml`, etc: Check the given file against a schema and return true if it matches. (By this point there should be few enough potential matches that it should be okay to load the entire file) 84 | 85 | - (specialized parsers): Binary (or text) files without any sort of magic can be checked for compliance against a quick and dirty `nom` parser instead of the weird heuristics used by libmagic. 86 | 87 | To add additional checker types, add a new module exporting: 88 | 89 | - `init::get_supported() -> Vec<(String)>` 90 | 91 | - `init::get_subclasses() -> Vec` 92 | 93 | - `test::from_u8(&[u8], &str) -> bool` 94 | 95 | - `test::from_filpath(&str, &str) -> Result` 96 | 97 | and then add references to those functions into the CHECKERS lazy_static! in `lib.rs`. The bottommost entries get searched first. 98 | 99 | ### Caching 100 | 101 | Going forward, it is essential for a checker (like `basetype`'s metadata, or that json/toml/xml example) to be able to cache an in-memory representation of the file, so it doesn't have to get re-loaded and re-parsed for every new type. With the current architecture, this is rather difficult to implement. 102 | 103 | ### Multiple file types 104 | 105 | There are some weird files out there ( [Polyglot quines](https://en.wikipedia.org/wiki/Polyglot_(computing)) come to mind. ) that are multiple file types. This might be worth handling for security reasons. (It's not a huge priority, though.) 106 | 107 | ### Parallel processing 108 | 109 | Right now this is single-threaded. This is an embarasingly parallel task (multiple files, multiple types, multiple rules for each type...), so there should be a great speed benefit. 110 | 111 | ## TO NOT DO 112 | 113 | ### File attributes 114 | 115 | `libmagic` and `file`, by default, print descriptive strings detailing the file type and, for things like JPEG images or ELF files, a whole bunch of metadata. This is not something `tree_magic` will ever support, as it is entirely unnecessary. Support for attributes would best be handled in a seperate crate that, given a MIME, can extract metadata in a predictable, machine readable format. 116 | -------------------------------------------------------------------------------- /benches/application/x-7z-compressed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/benches/application/x-7z-compressed -------------------------------------------------------------------------------- /benches/application/zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/benches/application/zip -------------------------------------------------------------------------------- /benches/from_u8.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate bencher; 3 | extern crate tree_magic; 4 | use bencher::Bencher; 5 | 6 | ///Image tests 7 | fn image_gif(b: &mut Bencher) { 8 | b.iter(|| tree_magic::from_u8(include_bytes!("image/gif"))); 9 | } 10 | fn image_png(b: &mut Bencher) { 11 | b.iter(|| tree_magic::from_u8(include_bytes!("image/png"))); 12 | } 13 | 14 | /// Archive tests 15 | fn application_zip(b: &mut Bencher) { 16 | b.iter(|| tree_magic::from_u8(include_bytes!("application/zip"))); 17 | } 18 | 19 | /// Text tests 20 | fn text_plain(b: &mut Bencher) { 21 | b.iter(|| tree_magic::from_u8(include_bytes!("text/plain"))); 22 | } 23 | 24 | benchmark_group!(benches, image_gif, image_png, application_zip, text_plain); 25 | benchmark_main!(benches); 26 | -------------------------------------------------------------------------------- /benches/image/gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/benches/image/gif -------------------------------------------------------------------------------- /benches/image/png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/benches/image/png -------------------------------------------------------------------------------- /benches/match_u8.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate bencher; 3 | extern crate tree_magic; 4 | use bencher::Bencher; 5 | 6 | ///Image benchmarks 7 | fn image_gif(b: &mut Bencher) { 8 | b.iter(|| tree_magic::match_u8("image/gif", include_bytes!("image/gif"))); 9 | } 10 | fn image_png(b: &mut Bencher) { 11 | b.iter(|| tree_magic::match_u8("image/png", include_bytes!("image/png"))); 12 | } 13 | 14 | /// Archive tests 15 | fn application_zip(b: &mut Bencher) { 16 | b.iter(|| tree_magic::match_u8("application/zip", include_bytes!("application/zip"))); 17 | } 18 | 19 | /// Text tests 20 | fn text_plain(b: &mut Bencher) { 21 | b.iter(|| tree_magic::match_u8("text/plain", include_bytes!("text/plain"))); 22 | } 23 | 24 | benchmark_group!(benches, image_gif, image_png, application_zip, text_plain); 25 | benchmark_main!(benches); 26 | -------------------------------------------------------------------------------- /benches/text/plain: -------------------------------------------------------------------------------- 1 | This is just standard text. 2 | -------------------------------------------------------------------------------- /src/basetype/check.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use crate::{read_bytes, MIME}; 3 | 4 | /// If there are any null bytes, return False. Otherwise return True. 5 | fn is_text_plain_from_u8(b: &[u8]) -> bool 6 | { 7 | b.iter().filter(|&x| *x == 0).count() == 0 8 | } 9 | 10 | // TODO: Hoist the main logic here somewhere else. This'll get redundant fast! 11 | fn is_text_plain_from_filepath(filepath: &Path) -> bool 12 | { 13 | let b = match read_bytes(filepath, 512) { 14 | Ok(x) => x, 15 | Err(_) => return false 16 | }; 17 | is_text_plain_from_u8(b.as_slice()) 18 | } 19 | 20 | #[allow(unused_variables)] 21 | pub fn from_u8(b: &[u8], mimetype: MIME) -> bool 22 | { 23 | if mimetype == "application/octet-stream" || 24 | mimetype == "all/allfiles" 25 | { 26 | // Both of these are the case if we have a bytestream at all 27 | return true; 28 | } if mimetype == "text/plain" { 29 | return is_text_plain_from_u8(b); 30 | } else { 31 | // ...how did we get bytes for this? 32 | return false; 33 | } 34 | } 35 | 36 | pub fn from_filepath(filepath: &Path, mimetype: MIME) -> bool 37 | { 38 | use std::fs; 39 | 40 | // Being bad with error handling here, 41 | // but if you can't open it it's probably not a file. 42 | let meta = match fs::metadata(filepath) { 43 | Ok(x) => x, 44 | Err(_) => {return false;} 45 | }; 46 | 47 | match mimetype.to_string().as_str() { 48 | "all/all" => return true, 49 | "all/allfiles" | "application/octet-stream" => return meta.is_file(), 50 | "inode/directory" => return meta.is_dir(), 51 | "text/plain" => return is_text_plain_from_filepath(filepath), 52 | _ => return false 53 | } 54 | } -------------------------------------------------------------------------------- /src/basetype/init.rs: -------------------------------------------------------------------------------- 1 | use fnv::FnvHashMap; 2 | use crate::MIME; 3 | 4 | pub fn get_supported() -> Vec { 5 | super::TYPES.to_vec().iter().map(|x| x.parse().unwrap()).collect() 6 | } 7 | 8 | /// Returns Vec of parent->child relations 9 | pub fn get_subclasses() -> Vec<(MIME, MIME)> { 10 | let mut res = Vec::<(MIME, MIME)>::new(); 11 | 12 | // There's probably a better way to do this. 13 | res.push( ("all/all".parse().unwrap(), "all/allfiles".parse().unwrap()) ); 14 | res.push( ("all/all".parse().unwrap(), "inode/directory".parse().unwrap()) ); 15 | res.push( ("all/allfiles".parse().unwrap(), "application/octet-stream".parse().unwrap()) ); 16 | res.push( ("application/octet-stream".parse().unwrap(), "text/plain".parse().unwrap()) ); 17 | 18 | res 19 | } 20 | 21 | pub fn get_aliaslist() -> FnvHashMap { 22 | FnvHashMap::default() 23 | } -------------------------------------------------------------------------------- /src/basetype/mod.rs: -------------------------------------------------------------------------------- 1 | //! Handles "base types" such as inode/* and text/plain 2 | const TYPES: [&'static str; 5] = 3 | [ 4 | "all/all", 5 | "all/allfiles", 6 | "inode/directory", 7 | "text/plain", 8 | "application/octet-stream" 9 | ]; 10 | 11 | pub mod check; 12 | pub mod init; 13 | -------------------------------------------------------------------------------- /src/fdo_magic/builtin/aliases: -------------------------------------------------------------------------------- 1 | application/acrobat application/pdf 2 | application/cdr application/vnd.corel-draw 3 | application/coreldraw application/vnd.corel-draw 4 | application/dbase application/x-dbf 5 | application/dbf application/x-dbf 6 | application/docbook+xml application/x-docbook+xml 7 | application/emf image/emf 8 | application/futuresplash application/vnd.adobe.flash.movie 9 | application/gpx application/gpx+xml 10 | application/ico image/vnd.microsoft.icon 11 | application/ics text/calendar 12 | application/java application/x-java 13 | application/java-archive application/x-java-archive 14 | application/java-byte-code application/x-java 15 | application/java-vm application/x-java 16 | application/lotus123 application/vnd.lotus-1-2-3 17 | application/m3u audio/x-mpegurl 18 | application/mdb application/vnd.ms-access 19 | application/ms-tnef application/vnd.ms-tnef 20 | application/msaccess application/vnd.ms-access 21 | application/msexcel application/vnd.ms-excel 22 | application/mspowerpoint application/vnd.ms-powerpoint 23 | application/nappdf application/pdf 24 | application/pcap application/vnd.tcpdump.pcap 25 | application/pgp application/pgp-encrypted 26 | application/photoshop image/vnd.adobe.photoshop 27 | application/pkcs12 application/x-pkcs12 28 | application/pls audio/x-scpls 29 | application/powerpoint application/vnd.ms-powerpoint 30 | application/smil application/smil+xml 31 | application/stuffit application/x-stuffit 32 | application/vnd.apple.keynote application/x-iwork-keynote-sffkey 33 | application/vnd.geo+json application/geo+json 34 | application/vnd.haansoft-hwp application/x-hwp 35 | application/vnd.haansoft-hwt application/x-hwt 36 | application/vnd.ms-word application/msword 37 | application/vnd.ms-xpsdocument application/oxps 38 | application/vnd.msaccess application/vnd.ms-access 39 | application/vnd.oasis.docbook+xml application/x-docbook+xml 40 | application/vnd.rn-realmedia-vbr application/vnd.rn-realmedia 41 | application/vnd.sdp application/sdp 42 | application/vnd.smaf application/x-smaf 43 | application/vnd.stardivision.writer-global application/vnd.stardivision.writer 44 | application/vnd.sun.xml.base application/vnd.oasis.opendocument.database 45 | application/vnd.xdgapp application/vnd.flatpak 46 | application/wk1 application/vnd.lotus-1-2-3 47 | application/wmf image/wmf 48 | application/wordperfect application/vnd.wordperfect 49 | application/wwf application/x-wwf 50 | application/x-123 application/vnd.lotus-1-2-3 51 | application/x-annodex application/annodex 52 | application/x-bzip2 application/x-bzip 53 | application/x-cbz application/vnd.comicbook+zip 54 | application/x-cdr application/vnd.corel-draw 55 | application/x-chess-pgn application/vnd.chess-pgn 56 | application/x-chm application/vnd.ms-htmlhelp 57 | application/x-coreldraw application/vnd.corel-draw 58 | application/x-dbase application/x-dbf 59 | application/x-deb application/vnd.debian.binary-package 60 | application/x-debian-package application/vnd.debian.binary-package 61 | application/x-emf image/emf 62 | application/x-fictionbook application/x-fictionbook+xml 63 | application/x-flash-video video/x-flv 64 | application/x-frame application/vnd.framemaker 65 | application/x-gamecube-iso-image application/x-gamecube-rom 66 | application/x-gettext text/x-gettext-translation 67 | application/x-gnome-app-info application/x-desktop 68 | application/x-gpx application/gpx+xml 69 | application/x-gpx+xml application/gpx+xml 70 | application/x-gtar application/x-tar 71 | application/x-gzip application/gzip 72 | application/x-iso9660-image application/x-cd-image 73 | application/x-jar application/x-java-archive 74 | application/x-java-class application/x-java 75 | application/x-java-vm application/x-java 76 | application/x-javascript application/javascript 77 | application/x-kexiproject-sqlite application/x-kexiproject-sqlite3 78 | application/x-linguist text/vnd.trolltech.linguist 79 | application/x-lotus123 application/vnd.lotus-1-2-3 80 | application/x-lzh-compressed application/x-lha 81 | application/x-mathematica application/mathematica 82 | application/x-mdb application/vnd.ms-access 83 | application/x-mplayer2 video/x-ms-wmp 84 | application/x-ms-asx audio/x-ms-asx 85 | application/x-msaccess application/vnd.ms-access 86 | application/x-msexcel application/vnd.ms-excel 87 | application/x-msmetafile image/wmf 88 | application/x-mspowerpoint application/vnd.ms-powerpoint 89 | application/x-msword application/msword 90 | application/x-netscape-bookmarks application/x-mozilla-bookmarks 91 | application/x-ogg application/ogg 92 | application/x-palm-database application/vnd.palm 93 | application/x-pcap application/vnd.tcpdump.pcap 94 | application/x-pdf application/pdf 95 | application/x-photoshop image/vnd.adobe.photoshop 96 | application/x-pkcs12 application/pkcs12 97 | application/x-pkcs7-certificates application/pkcs7-mime 98 | application/x-quicktimeplayer application/x-quicktime-media-link 99 | application/x-rar application/vnd.rar 100 | application/x-rar-compressed application/vnd.rar 101 | application/x-redhat-package-manager application/x-rpm 102 | application/x-reject text/x-reject 103 | application/x-rnc application/relax-ng-compact-syntax 104 | application/x-sdp application/sdp 105 | application/x-shockwave-flash application/vnd.adobe.flash.movie 106 | application/x-sit application/x-stuffit 107 | application/x-snes-rom application/vnd.nintendo.snes.rom 108 | application/x-spss-savefile application/x-spss-sav 109 | application/x-srt application/x-subrip 110 | application/x-tex text/x-tex 111 | application/x-troff text/troff 112 | application/x-vnd.kde.kexi application/x-kexiproject-sqlite3 113 | application/x-wbfs application/x-wii-rom 114 | application/x-wia application/x-wii-rom 115 | application/x-wii-iso-image application/x-wii-rom 116 | application/x-win-lnk application/x-ms-shortcut 117 | application/x-wmf image/wmf 118 | application/x-wordperfect application/vnd.wordperfect 119 | application/x-x509-ca-cert application/pkix-cert 120 | application/x-x509-user-cert application/pkix-cert 121 | application/x-xspf+xml application/xspf+xml 122 | application/x-zip application/zip 123 | application/x-zip-compressed application/zip 124 | audio/3gpp video/3gpp 125 | audio/3gpp-encrypted video/3gpp 126 | audio/3gpp2 video/3gpp2 127 | audio/amr-encrypted audio/AMR 128 | audio/amr-wb-encrypted audio/AMR-WB 129 | audio/iMelody text/x-iMelody 130 | audio/m3u audio/x-mpegurl 131 | audio/m4a audio/mp4 132 | audio/mobile-xmf audio/x-xmf 133 | audio/mp3 audio/mpeg 134 | audio/mpegurl audio/x-mpegurl 135 | audio/scpls audio/x-scpls 136 | audio/tta audio/x-tta 137 | audio/vnd.m-realaudio audio/vnd.rn-realaudio 138 | audio/vnd.wave audio/x-wav 139 | audio/vorbis audio/x-vorbis+ogg 140 | audio/wav audio/x-wav 141 | audio/wma audio/x-ms-wma 142 | audio/x-aac audio/aac 143 | audio/x-aiffc audio/x-aifc 144 | audio/x-annodex audio/annodex 145 | audio/x-dts audio/vnd.dts 146 | audio/x-dtshd audio/vnd.dts.hd 147 | audio/x-flac audio/flac 148 | audio/x-iMelody text/x-iMelody 149 | audio/x-m3u audio/x-mpegurl 150 | audio/x-m4a audio/mp4 151 | audio/x-midi audio/midi 152 | audio/x-mp2 audio/mp2 153 | audio/x-mp3 audio/mpeg 154 | audio/x-mp3-playlist audio/x-mpegurl 155 | audio/x-mpeg audio/mpeg 156 | audio/x-mpg audio/mpeg 157 | audio/x-ogg audio/ogg 158 | audio/x-oggflac audio/x-flac+ogg 159 | audio/x-pn-realaudio audio/vnd.rn-realaudio 160 | audio/x-rn-3gpp-amr video/3gpp 161 | audio/x-rn-3gpp-amr-encrypted video/3gpp 162 | audio/x-rn-3gpp-amr-wb video/3gpp 163 | audio/x-rn-3gpp-amr-wb-encrypted video/3gpp 164 | audio/x-shorten application/x-shorten 165 | audio/x-vorbis audio/x-vorbis+ogg 166 | audio/xmf audio/x-xmf 167 | flv-application/octet-stream video/x-flv 168 | image/cdr application/vnd.corel-draw 169 | image/ico image/vnd.microsoft.icon 170 | image/icon image/vnd.microsoft.icon 171 | image/jpeg2000 image/jp2 172 | image/jpeg2000-image image/jp2 173 | image/jpx image/jp2 174 | image/pdf application/pdf 175 | image/photoshop image/vnd.adobe.photoshop 176 | image/pjpeg image/jpeg 177 | image/psd image/vnd.adobe.photoshop 178 | image/x-MS-bmp image/bmp 179 | image/x-bmp image/bmp 180 | image/x-cdr application/vnd.corel-draw 181 | image/x-djvu image/vnd.djvu 182 | image/x-emf image/emf 183 | image/x-fits image/fits 184 | image/x-icb image/x-tga 185 | image/x-ico image/vnd.microsoft.icon 186 | image/x-icon image/vnd.microsoft.icon 187 | image/x-iff image/x-ilbm 188 | image/x-jpeg2000-image image/jp2 189 | image/x-pcx image/vnd.zbrush.pcx 190 | image/x-photoshop image/vnd.adobe.photoshop 191 | image/x-psd image/vnd.adobe.photoshop 192 | image/x-win-metafile image/wmf 193 | image/x-wmf image/wmf 194 | image/x-xpm image/x-xpixmap 195 | image/x.djvu image/vnd.djvu 196 | text/directory text/vcard 197 | text/ecmascript application/ecmascript 198 | text/gedcom application/x-gedcom 199 | text/google-video-pointer text/x-google-video-pointer 200 | text/ico image/vnd.microsoft.icon 201 | text/javascript application/javascript 202 | text/mathml application/mathml+xml 203 | text/rdf application/rdf+xml 204 | text/rss application/rss+xml 205 | text/rtf application/rtf 206 | text/x-c text/x-csrc 207 | text/x-comma-separated-values text/csv 208 | text/x-csv text/csv 209 | text/x-diff text/x-patch 210 | text/x-dtd application/xml-dtd 211 | text/x-lyx application/x-lyx 212 | text/x-markdown text/markdown 213 | text/x-octave text/x-matlab 214 | text/x-opml text/x-opml+xml 215 | text/x-po text/x-gettext-translation 216 | text/x-pot text/x-gettext-translation-template 217 | text/x-sh application/x-shellscript 218 | text/x-sql application/sql 219 | text/x-troff text/troff 220 | text/x-vcalendar text/calendar 221 | text/x-vcard text/vcard 222 | text/x-yaml application/x-yaml 223 | text/xml application/xml 224 | text/xml-external-parsed-entity application/xml-external-parsed-entity 225 | text/yaml application/x-yaml 226 | video/3gp video/3gpp 227 | video/3gpp-encrypted video/3gpp 228 | video/avi video/x-msvideo 229 | video/divx video/x-msvideo 230 | video/fli video/x-flic 231 | video/flv video/x-flv 232 | video/mediaplayer video/x-ms-wmp 233 | video/mp4v-es video/mp4 234 | video/mpeg-system video/mpeg 235 | video/msvideo video/x-msvideo 236 | video/vivo video/vnd.vivo 237 | video/vnd.divx video/x-msvideo 238 | video/x-annodex video/annodex 239 | video/x-avi video/x-msvideo 240 | video/x-fli video/x-flic 241 | video/x-m4v video/mp4 242 | video/x-mpeg video/mpeg 243 | video/x-mpeg-system video/mpeg 244 | video/x-mpeg2 video/mpeg 245 | video/x-mpegurl video/vnd.mpegurl 246 | video/x-ms-asf application/vnd.ms-asf 247 | video/x-ms-asf-plugin application/vnd.ms-asf 248 | video/x-ms-wax audio/x-ms-asx 249 | video/x-ms-wm application/vnd.ms-asf 250 | video/x-ms-wmx audio/x-ms-asx 251 | video/x-ms-wvx audio/x-ms-asx 252 | video/x-ogg video/ogg 253 | video/x-ogm video/x-ogm+ogg 254 | video/x-real-video video/vnd.rn-realvideo 255 | video/x-theora video/x-theora+ogg 256 | x-directory/normal inode/directory 257 | zz-application/zz-winassoc-123 application/vnd.lotus-1-2-3 258 | zz-application/zz-winassoc-cab application/vnd.ms-cab-compressed 259 | zz-application/zz-winassoc-cdr application/vnd.corel-draw 260 | zz-application/zz-winassoc-doc application/msword 261 | zz-application/zz-winassoc-hlp application/winhlp 262 | zz-application/zz-winassoc-mdb application/vnd.ms-access 263 | zz-application/zz-winassoc-uu text/x-uuencode 264 | zz-application/zz-winassoc-xls application/vnd.ms-excel 265 | -------------------------------------------------------------------------------- /src/fdo_magic/builtin/check.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use petgraph::prelude::*; 3 | use crate::{fdo_magic, read_bytes}; 4 | use mime::Mime as MIME; 5 | 6 | /// Test against all rules 7 | #[allow(unused_variables)] 8 | pub fn from_u8(file: &[u8], mimetype: MIME) -> bool { 9 | 10 | // Get magic ruleset 11 | let graph = match super::ALLRULES.get(&mimetype) { 12 | Some(item) => item, 13 | None => return false // No rule for this mime 14 | }; 15 | 16 | // Check all rulesets 17 | for x in graph.externals(Incoming) { 18 | if fdo_magic::check::from_u8_walker(file, mimetype, graph, x, true) { 19 | return true; 20 | } 21 | } 22 | 23 | false 24 | } 25 | 26 | /// This only exists for the case of a direct match_filepath call 27 | /// and even then we could probably get rid of this... 28 | #[allow(unused_variables)] 29 | pub fn from_filepath(filepath: &Path, mimetype: MIME) -> bool{ 30 | // Get magic ruleset 31 | let magic_rules = match super::ALLRULES.get(&mimetype) { 32 | Some(item) => item, 33 | None => return false // No rule for this mime 34 | }; 35 | 36 | // Get # of bytes to read 37 | let mut scanlen = 0; 38 | for x in magic_rules.raw_nodes() { 39 | let ref y = x.weight; 40 | let tmplen = 41 | y.start_off as usize + 42 | y.val_len as usize + 43 | y.region_len as usize; 44 | 45 | if tmplen > scanlen { 46 | scanlen = tmplen; 47 | } 48 | } 49 | 50 | let b = match read_bytes(filepath, scanlen) { 51 | Ok(x) => x, 52 | Err(_) => return false 53 | }; 54 | 55 | from_u8(b.as_slice(), mimetype) 56 | } -------------------------------------------------------------------------------- /src/fdo_magic/builtin/init.rs: -------------------------------------------------------------------------------- 1 | use fnv::FnvHashMap; 2 | use crate::MIME; 3 | 4 | /// Read all subclass lines from file 5 | fn read_subclasses() -> Result, std::io::Error> { 6 | 7 | let r = include_str!("subclasses"); 8 | let mut subclasses = Vec::<(MIME, MIME)>::new(); 9 | 10 | for line in r.lines() { 11 | let child = line.split_whitespace().nth(0).unwrap_or("").parse().unwrap_or(mime::APPLICATION_OCTET_STREAM); 12 | let parent = line.split_whitespace().nth(1).unwrap_or("").parse().unwrap_or(mime::APPLICATION_OCTET_STREAM); 13 | 14 | subclasses.push( (parent, child) ); 15 | } 16 | 17 | Ok(subclasses) 18 | } 19 | 20 | // Get filetype aliases 21 | fn read_aliaslist() -> Result, std::io::Error> { 22 | let raliases = include_str!("aliases"); 23 | let mut aliaslist = FnvHashMap::::default(); 24 | 25 | for line in raliases.lines() { 26 | let a = line.split_whitespace().nth(0).unwrap_or("").parse().unwrap_or(mime::APPLICATION_OCTET_STREAM); 27 | let b = line.split_whitespace().nth(1).unwrap_or("").parse().unwrap_or(mime::APPLICATION_OCTET_STREAM); 28 | aliaslist.insert(a,b); 29 | } 30 | 31 | let aliaslist = aliaslist; 32 | Ok(aliaslist) 33 | } 34 | 35 | pub fn get_aliaslist() -> FnvHashMap { 36 | read_aliaslist().unwrap_or(FnvHashMap::default()) 37 | } 38 | 39 | /// Get list of supported MIME types 40 | pub fn get_supported() -> Vec { 41 | super::ALLRULES.keys().cloned().collect() 42 | } 43 | 44 | /// Get list of parent -> child subclass links 45 | pub fn get_subclasses() -> Vec<(MIME, MIME)> { 46 | 47 | let mut subclasses = read_subclasses().unwrap_or(Vec::<(MIME, MIME)>::new()); 48 | 49 | // If child or parent refers to an alias, change it to the real type 50 | for x in 0..subclasses.len(){ 51 | match super::ALIASES.get(&subclasses[x].0) { 52 | Some(alias) => {subclasses[x].0 = alias.clone();} 53 | None => {} 54 | } 55 | match super::ALIASES.get(&subclasses[x].1) { 56 | Some(alias) => {subclasses[x].1 = alias.clone();} 57 | None => {} 58 | } 59 | } 60 | 61 | subclasses 62 | } -------------------------------------------------------------------------------- /src/fdo_magic/builtin/magic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/src/fdo_magic/builtin/magic -------------------------------------------------------------------------------- /src/fdo_magic/builtin/mod.rs: -------------------------------------------------------------------------------- 1 | //! Read magic file bundled in crate 2 | 3 | use petgraph::prelude::*; 4 | use fnv::FnvHashMap; 5 | use crate::MIME; 6 | use super::MagicRule; 7 | 8 | /// Preload alias list 9 | lazy_static! { 10 | static ref ALIASES: FnvHashMap = { 11 | init::get_aliaslist() 12 | }; 13 | } 14 | 15 | /// Load magic file before anything else. 16 | lazy_static! { 17 | static ref ALLRULES: FnvHashMap> = { 18 | super::ruleset::from_u8(include_bytes!("magic")).unwrap_or(FnvHashMap::default()) 19 | }; 20 | } 21 | 22 | pub mod init; 23 | pub mod check; 24 | -------------------------------------------------------------------------------- /src/fdo_magic/builtin/subclasses: -------------------------------------------------------------------------------- 1 | image/x-minolta-mrw image/x-dcraw 2 | application/rss+xml application/xml 3 | text/x-chdr text/x-csrc 4 | application/x-vnd.akonadi.calendar.event text/calendar 5 | application/x-cdrdao-toc text/plain 6 | application/pkix-crl+pem application/x-pem-file 7 | application/x-ruby application/x-executable 8 | application/x-ruby text/plain 9 | application/x-cd-image application/x-raw-disk-image 10 | text/x-katefilelist text/plain 11 | text/x-katefilelist text/plain 12 | text/x-dsrc text/x-csrc 13 | application/vnd.kde.fontspackage application/zip 14 | application/vnd.kde.fontspackage application/zip 15 | application/gpx+xml application/xml 16 | application/x-kicad-pcbnew text/plain 17 | application/smil+xml application/xml 18 | text/x-ssa text/plain 19 | text/x-vnd.akonadi.socialfeeditem text/plain 20 | application/x-kommander text/plain 21 | application/x-kommander text/plain 22 | text/x-reject text/plain 23 | audio/vnd.dts.hd audio/vnd.dts 24 | application/vnd.flatpak.repo text/plain 25 | application/x-vnd.kde.alarm.active application/x-vnd.kde.alarm 26 | inode/vnd.kde.device.printer inode/directory 27 | inode/vnd.kde.device.printer inode/directory 28 | video/3gpp video/mp4 29 | application/x-source-rpm application/x-rpm 30 | inode/vnd.kde.service.afpovertcp inode/directory 31 | inode/vnd.kde.service.afpovertcp inode/directory 32 | text/html text/plain 33 | text/markdown text/plain 34 | text/calendar text/plain 35 | application/x-markaby application/x-ruby 36 | text/xmcd text/plain 37 | application/vnd.sun.xml.impress.template application/zip 38 | application/x-msi application/x-ole-storage 39 | image/x-sigma-x3f image/x-dcraw 40 | image/x-adobe-dng image/x-dcraw 41 | image/x-adobe-dng image/tiff 42 | application/x-smb-workgroup inode/directory 43 | application/x-smb-workgroup inode/directory 44 | application/x-ktheme application/zip 45 | application/x-ktheme application/zip 46 | text/enriched text/plain 47 | application/vnd.oasis.opendocument.text application/zip 48 | inode/vnd.kde.service.smb inode/directory 49 | inode/vnd.kde.service.smb inode/directory 50 | image/x-canon-crw image/x-dcraw 51 | text/x-readme text/plain 52 | application/jrd+json application/json 53 | application/x-font-type1 application/postscript 54 | application/x-font-otf application/x-font-ttf 55 | application/vnd.sun.xml.calc application/zip 56 | text/rfc822-headers text/plain 57 | application/vnd.ms-visio.stencil.main+xml application/zip 58 | message/partial text/plain 59 | video/x-matroska application/x-matroska 60 | application/vnd.ms-visio.template.macroEnabled.main+xml application/zip 61 | application/javascript application/ecmascript 62 | text/htmlh text/plain 63 | application/sdp text/plain 64 | application/x-kvtml application/xml 65 | application/x-kvtml application/xml 66 | application/x-xzpdf application/x-xz 67 | application/x-java-archive application/zip 68 | application/x-yaml text/plain 69 | application/x-vnd.kde.notes text/plain 70 | text/x-srecord text/plain 71 | text/x-svhdr text/x-verilog 72 | text/x-gettext-translation-template text/plain 73 | video/ogg application/ogg 74 | application/x-xbel application/xml 75 | application/x-quicktime-media-link video/quicktime 76 | text/cache-manifest text/plain 77 | text/x-matlab text/plain 78 | text/x-tcl text/plain 79 | inode/vnd.kde.service.sftp-ssh inode/directory 80 | inode/vnd.kde.service.sftp-ssh inode/directory 81 | application/x-kicad-project text/plain 82 | application/x-iwork-keynote-sffkey application/zip 83 | application/x-iwork-keynote-sffkey application/zip 84 | application/ecmascript text/plain 85 | application/owl+xml application/xml 86 | text/x-c++src text/x-csrc 87 | application/x-ccmx text/plain 88 | message/delivery-status text/plain 89 | text/x-genie text/plain 90 | application/vnd.openxmlformats-officedocument.wordprocessingml.document application/zip 91 | application/vnd.openxmlformats-officedocument.wordprocessingml.document application/zip 92 | application/x-shellscript application/x-executable 93 | application/x-shellscript text/plain 94 | image/x-kodak-dcr image/x-dcraw 95 | image/x-kodak-dcr image/tiff 96 | application/xhtml+xml application/xml 97 | text/x-makefile text/plain 98 | text/x-scala text/plain 99 | image/x-kodak-kdc image/x-dcraw 100 | image/x-kodak-kdc image/tiff 101 | audio/x-opus+ogg audio/ogg 102 | application/pkcs8+pem application/x-pem-file 103 | text/x-lua application/x-executable 104 | text/x-lua text/plain 105 | image/x-gzeps application/gzip 106 | application/vnd.oasis.opendocument.text-flat-xml application/xml 107 | application/vnd.oasis.opendocument.text-flat-xml application/xml 108 | application/x-mobipocket-ebook application/vnd.palm 109 | application/x-mobipocket-ebook application/x-palm-database 110 | application/x-mobipocket-ebook application/vnd.palm 111 | application/x-vnd.kde.alarm.template application/x-vnd.kde.alarm 112 | message/disposition-notification text/plain 113 | text/vnd.abc text/plain 114 | text/vnd.abc text/plain 115 | application/vnd.flatpak.ref text/plain 116 | text/x-ldif text/plain 117 | application/vnd.openxmlformats-officedocument.presentationml.presentation application/zip 118 | application/vnd.openxmlformats-officedocument.presentationml.presentation application/zip 119 | video/x-javafx video/x-flv 120 | text/x-ooc text/x-csrc 121 | application/x-glade application/xml 122 | text/vcard text/plain 123 | application/x-trig text/plain 124 | application/x-trig text/plain 125 | application/x-trig text/plain 126 | application/x-nautilus-link text/plain 127 | text/css text/plain 128 | application/x-vnd.kde.okteta.structure application/xml 129 | text/csv text/plain 130 | application/pkcs12+pem application/x-pem-file 131 | application/vnd.openofficeorg.extension application/zip 132 | inode/vnd.kde.bluedevil.device inode/directory 133 | image/x-eps application/postscript 134 | application/x-go-sgf text/plain 135 | text/x-xxencode text/plain 136 | application/x-bzpdf application/x-bzip 137 | text/x-modelica text/plain 138 | application/x-gzdvi application/gzip 139 | application/vnd.coffeescript text/plain 140 | application/vnd.ms-works application/x-ole-storage 141 | application/vnd.oasis.opendocument.text-web application/zip 142 | application/x-gnuplot text/plain 143 | application/vnd.ms-visio.template.main+xml application/zip 144 | inode/vnd.kde.service.webdavs inode/directory 145 | inode/vnd.kde.service.webdavs inode/directory 146 | text/x-setext text/plain 147 | x-content/unix-software x-content/software 148 | text/x-ascii85 text/plain 149 | application/x-lzpdf application/x-lzip 150 | application/x-shared-library-la text/plain 151 | inode/vnd.kde.service.ftps inode/directory 152 | inode/vnd.kde.service.ftps inode/directory 153 | application/base64 text/plain 154 | application/btoa text/plain 155 | text/x-uri text/plain 156 | inode/vnd.kde.device.router inode/directory 157 | inode/vnd.kde.device.router inode/directory 158 | application/vnd.oasis.opendocument.image application/zip 159 | message/news text/plain 160 | text/x-scons text/x-python 161 | text/vnd.trolltech.linguist application/xml 162 | application/x-m4 text/plain 163 | application/x-gzpostscript application/gzip 164 | image/x-panasonic-raw2 image/x-dcraw 165 | application/x-troff-man text/plain 166 | application/xspf+xml application/xml 167 | text/x-cobol text/plain 168 | text/spreadsheet text/plain 169 | application/x-gtk-builder application/xml 170 | application/mbox text/plain 171 | application/x-pagemaker application/x-ole-storage 172 | application/x-windows-themepack application/vnd.ms-cab-compressed 173 | application/vnd.kde.kpatience.savedgame application/xml 174 | text/x-tex text/plain 175 | application/vnd.kde.kpatience.savedstate application/xml 176 | application/x-desktop text/plain 177 | application/x-lyx text/plain 178 | text/x-patch text/plain 179 | text/x-objchdr text/x-csrc 180 | text/x-objchdr text/x-csrc 181 | text/x-mup text/plain 182 | application/x-vnd.akonadi.calendar.freebusy text/calendar 183 | application/vnd.chess-pgn text/plain 184 | audio/x-minipsf audio/x-psf 185 | application/vnd.oasis.opendocument.presentation application/zip 186 | audio/x-flac+ogg audio/ogg 187 | inode/mount-point inode/directory 188 | text/x-xmi application/xml 189 | application/x-vnd.akonadi.calendar.todo text/calendar 190 | application/x-brasero application/xml 191 | audio/ogg application/ogg 192 | application/mathml+xml application/xml 193 | audio/x-aifc application/x-iff 194 | text/x-java text/x-csrc 195 | audio/x-aiff application/x-iff 196 | application/x-iso9660-appimage application/x-executable 197 | application/x-iso9660-appimage application/x-iso9660-image 198 | application/x-iso9660-appimage application/x-executable 199 | application/x-iso9660-appimage application/x-cd-image 200 | application/xml text/plain 201 | application/json-patch+json application/json 202 | text/x-authors text/plain 203 | application/msword application/x-ole-storage 204 | audio/annodex application/annodex 205 | application/vnd.sun.xml.draw application/zip 206 | message/rfc822 text/plain 207 | application/pkcs10+pem application/x-pem-file 208 | inode/vnd.kde.service.webdav inode/directory 209 | inode/vnd.kde.service.webdav inode/directory 210 | application/geo+json application/json 211 | application/x-asp text/plain 212 | application/vnd.oasis.opendocument.database application/zip 213 | application/x-magicpoint text/plain 214 | application/x-vnd.akonadi.calendar.journal text/calendar 215 | application/metalink+xml application/xml 216 | application/metalink+xml application/xml 217 | application/metalink+xml application/xml 218 | application/x-gz-font-linux-psf application/gzip 219 | application/ld+json application/json 220 | application/vnd.ms-excel.sheet.binary.macroEnabled.12 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet 221 | application/vnd.ms-excel.sheet.binary.macroEnabled.12 application/zip 222 | inode/vnd.kde.device.unknown inode/directory 223 | inode/vnd.kde.device.unknown inode/directory 224 | audio/x-mpegurl text/plain 225 | application/vnd.ms-excel.addin.macroEnabled.12 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet 226 | application/x-fluid text/plain 227 | application/vnd.android.package-archive application/zip 228 | application/vnd.android.package-archive application/x-java-archive 229 | text/x-csrc text/plain 230 | application/vnd.sun.xml.calc.template application/zip 231 | image/x-nikon-nef image/x-dcraw 232 | image/x-nikon-nef image/tiff 233 | application/x-lrzip-compressed-tar application/x-lrzip 234 | image/x-sony-arw image/x-dcraw 235 | image/x-sony-arw image/tiff 236 | video/x-ms-wmv application/vnd.ms-asf 237 | application/x-kicad-schematic text/plain 238 | application/pkcs7-mime+pem application/x-pem-file 239 | text/x-vhdl text/plain 240 | application/vnd.ms-visio.drawing.macroEnabled.main+xml application/zip 241 | inode/vnd.kde.bluedevil.service inode/directory 242 | application/x-quanta text/plain 243 | application/x-quanta text/plain 244 | application/vnd.ms-powerpoint.slideshow.macroEnabled.12 application/vnd.openxmlformats-officedocument.presentationml.slideshow 245 | application/vnd.ms-powerpoint.slideshow.macroEnabled.12 application/zip 246 | image/x-portable-bitmap image/x-portable-anymap 247 | text/x-erlang text/plain 248 | text/x-ocl text/plain 249 | text/x-scheme text/plain 250 | application/x-ufraw application/xml 251 | application/x-tarz application/x-compress 252 | x-content/win32-software x-content/software 253 | application/x-cb7 application/x-7z-compressed 254 | text/x-changelog text/plain 255 | application/x-ica text/plain 256 | application/vnd.oasis.opendocument.graphics application/zip 257 | text/x-texinfo text/plain 258 | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet application/zip 259 | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet application/zip 260 | audio/x-ms-wma application/vnd.ms-asf 261 | audio/x-amzxml text/plain 262 | text/csv-schema text/plain 263 | inode/vnd.kde.service.rfb inode/symlink 264 | inode/vnd.kde.service.rfb inode/symlink 265 | application/vnd.oasis.opendocument.formula application/zip 266 | application/x-kns application/zip 267 | application/x-kns application/zip 268 | text/x-vnd.akonadi.note text/plain 269 | application/xml-external-parsed-entity application/xml 270 | video/x-ms-wmp video/x-ms-wmv 271 | video/x-ms-wmp video/x-ms-wmv 272 | application/pkcs7-signature text/plain 273 | application/x-sami text/plain 274 | text/x-go text/plain 275 | application/x-tzo application/x-lzop 276 | application/x-zip-compressed-fb2 application/zip 277 | image/x-kde-raw image/x-dcraw 278 | image/x-kde-raw image/x-dcraw 279 | application/vnd.ms-publisher application/x-ole-storage 280 | application/x-xz-compressed-tar application/x-xz 281 | application/x-bzip-compressed-tar application/x-bzip 282 | application/vnd.ms-visio.stencil.macroEnabled.main+xml application/zip 283 | audio/x-speex+ogg audio/ogg 284 | text/x-xslfo application/xml 285 | application/x-subrip text/plain 286 | application/vnd.ms-excel.sheet.macroEnabled.12 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet 287 | application/vnd.ms-excel.sheet.macroEnabled.12 application/zip 288 | application/x-audacity-project text/xml 289 | text/x-nfo text/x-readme 290 | text/x-csharp text/x-csrc 291 | audio/x-psflib audio/x-psf 292 | application/vnd.ms-visio.drawing.main+xml application/zip 293 | text/x-svsrc text/x-verilog 294 | text/x-literate-haskell text/plain 295 | text/tab-separated-values text/plain 296 | application/vnd.ms-powerpoint.presentation.macroEnabled.12 application/vnd.openxmlformats-officedocument.presentationml.presentation 297 | application/vnd.ms-powerpoint.presentation.macroEnabled.12 application/zip 298 | application/x-xpinstall application/zip 299 | application/vnd.sun.xml.impress application/zip 300 | application/x-kexiproject-sqlite2 application/x-sqlite2 301 | application/x-kexiproject-sqlite2 application/x-sqlite2 302 | application/x-kexiproject-sqlite3 application/x-sqlite3 303 | application/x-kexiproject-sqlite3 application/x-sqlite3 304 | application/vnd.visio application/x-ole-storage 305 | application/raml+yaml application/x-yaml 306 | application/x-netshow-channel application/vnd.ms-asf 307 | model/vrml text/plain 308 | application/x-turtle text/plain 309 | application/x-turtle text/plain 310 | application/x-vnd.kde.contactgroup text/xml 311 | application/x-vnd.kde.contactgroup text/xml 312 | application/metalink4+xml application/xml 313 | application/x-perl application/x-executable 314 | application/x-perl text/plain 315 | application/vnd.sun.xml.writer.template application/zip 316 | application/vnd.oasis.opendocument.text-master application/zip 317 | application/x-cbr application/vnd.rar 318 | application/x-cbt application/x-tar 319 | image/vnd.djvu+multipage image/vnd.djvu 320 | application/x-it87 text/plain 321 | application/x-superkaramba application/zip 322 | application/x-superkaramba application/zip 323 | image/svg+xml-compressed application/gzip 324 | text/x-adasrc text/plain 325 | application/vnd.oasis.opendocument.chart application/zip 326 | text/x-copying text/plain 327 | text/x-gherkin text/plain 328 | text/x-gettext-translation text/plain 329 | text/richtext text/plain 330 | inode/vnd.kde.device.server inode/directory 331 | inode/vnd.kde.device.server inode/directory 332 | text/x-fortran text/plain 333 | inode/vnd.kde.service.ssh inode/symlink 334 | inode/vnd.kde.service.ssh inode/symlink 335 | text/x-subviewer text/plain 336 | inode/vnd.kde.device.scanner inode/directory 337 | inode/vnd.kde.device.scanner inode/directory 338 | application/x-ipynb+json application/json 339 | video/x-theora+ogg video/ogg 340 | text/rust text/plain 341 | application/vnd.comicbook+zip application/zip 342 | image/x-pentax-pef image/x-dcraw 343 | image/x-pentax-pef image/tiff 344 | application/x-k3b application/zip 345 | text/x-hex text/plain 346 | text/x-hex text/plain 347 | text/troff text/plain 348 | text/x-iptables text/plain 349 | video/x-ogm+ogg video/ogg 350 | application/x-nzb application/xml 351 | application/x-nzb application/xml 352 | application/x-nzb application/xml 353 | text/vnd.wap.wml application/xml 354 | text/x-sass text/plain 355 | audio/x-matroska application/x-matroska 356 | application/vnd.sun.xml.math application/zip 357 | application/oxps application/zip 358 | application/x-awk application/x-executable 359 | application/x-awk text/plain 360 | text/x-verilog text/plain 361 | application/x-designer application/xml 362 | text/x-opml+xml application/xml 363 | application/x-fictionbook+xml application/xml 364 | application/x-csh application/x-shellscript 365 | application/x-csh text/plain 366 | application/pgp-encrypted text/plain 367 | application/x-lz4-compressed-tar application/x-lz4 368 | text/x-troff-me text/plain 369 | image/x-olympus-orf image/x-dcraw 370 | image/x-bzeps application/x-bzip 371 | text/x-python application/x-executable 372 | text/x-python text/plain 373 | text/x-idl text/plain 374 | application/x-plasma application/zip 375 | application/x-plasma application/zip 376 | application/x-lzma-compressed-tar application/x-lzma 377 | image/x-fuji-raf image/x-dcraw 378 | application/x-dia-shape application/xml 379 | application/vnd.apple.mpegurl text/plain 380 | application/epub+zip application/zip 381 | application/x-theme application/x-desktop 382 | text/x-mpsub text/plain 383 | model/iges text/plain 384 | image/x-portable-pixmap image/x-portable-anymap 385 | application/gml+xml application/xml 386 | application/xslt+xml application/xml 387 | text/x-troff-ms text/plain 388 | text/x-cmake text/plain 389 | application/x-vnd.kde.alarm.archived application/x-vnd.kde.alarm 390 | application/x-wwf application/pdf 391 | application/vnd.mozilla.xul+xml application/xml 392 | text/x-meson text/plain 393 | text/x-troff-mm text/plain 394 | text/x-uil text/plain 395 | application/x-mozilla-bookmarks text/html 396 | inode/vnd.kde.service.svn inode/directory 397 | inode/vnd.kde.service.svn inode/directory 398 | text/x-txt2tags text/plain 399 | application/vnd.snap application/vnd.squashfs 400 | inode/vnd.kde.device.workstation inode/directory 401 | inode/vnd.kde.device.workstation inode/directory 402 | inode/vnd.kde.service.ftp inode/directory 403 | inode/vnd.kde.service.ftp inode/directory 404 | application/vnd.sun.xml.draw.template application/zip 405 | application/x-bzdvi application/x-bzip 406 | application/pkix-cert+pem application/x-pem-file 407 | text/x-objcsrc text/x-csrc 408 | application/vnd.oasis.opendocument.graphics-flat-xml application/xml 409 | application/vnd.oasis.opendocument.graphics-flat-xml application/xml 410 | application/x-wais-source text/plain 411 | application/vnd.sun.xml.writer.global application/zip 412 | application/x-abiword application/xml 413 | application/atom+xml application/xml 414 | text/x-c++hdr text/x-chdr 415 | text/x-twig text/plain 416 | application/x-gzpdf application/gzip 417 | application/xsd application/xml 418 | application/xsd application/xml 419 | text/x-uuencode text/plain 420 | text/x-uuencode text/plain 421 | application/x-vnd.kde.alarm text/calendar 422 | text/x-ihex text/plain 423 | inode/vnd.kde.service.telnet inode/symlink 424 | inode/vnd.kde.service.telnet inode/symlink 425 | application/vnd.openxmlformats-officedocument.wordprocessingml.template application/zip 426 | application/vnd.openxmlformats-officedocument.wordprocessingml.template application/zip 427 | application/relaxng application/xml 428 | application/relaxng application/xml 429 | inode/vnd.kde.network inode/directory 430 | inode/vnd.kde.network inode/directory 431 | application/vnd.oasis.opendocument.graphics-template application/zip 432 | text/x-pascal text/plain 433 | inode/vnd.kde.service.http inode/symlink 434 | inode/vnd.kde.service.http inode/symlink 435 | application/x-php text/plain 436 | application/x-spkac+base64 text/plain 437 | text/x-eiffel text/plain 438 | application/x-mimearchive multipart/related 439 | application/x-cue text/plain 440 | text/x-haskell text/plain 441 | application/x-raw-disk-image-xz-compressed application/x-xz 442 | application/vnd.ms-word.template.macroEnabled.12 application/vnd.openxmlformats-officedocument.wordprocessingml.template 443 | application/vnd.ms-word.template.macroEnabled.12 application/zip 444 | application/sieve application/xml 445 | application/vnd.ms-excel.template.macroEnabled.12 application/vnd.openxmlformats-officedocument.spreadsheetml.template 446 | application/vnd.ms-excel.template.macroEnabled.12 application/zip 447 | text/x-log text/plain 448 | image/x-sony-sr2 image/x-dcraw 449 | image/x-sony-sr2 image/tiff 450 | image/x-portable-graymap image/x-portable-anymap 451 | text/x-lilypond text/plain 452 | application/vnd.ms-powerpoint.template.macroEnabled.12 application/vnd.openxmlformats-officedocument.presentationml.template 453 | application/vnd.ms-powerpoint.template.macroEnabled.12 application/zip 454 | application/relax-ng-compact-syntax text/plain 455 | application/x-valgrind-massif text/plain 456 | image/x-tiff-multipage image/tiff 457 | image/openraster application/zip 458 | application/x-font-ttx application/xml 459 | application/x-xliff application/xml 460 | text/x-install text/plain 461 | application/vnd.ms-word.document.macroEnabled.12 application/vnd.openxmlformats-officedocument.wordprocessingml.document 462 | application/vnd.ms-word.document.macroEnabled.12 application/zip 463 | image/x-ilbm application/x-iff 464 | application/vnd.google-earth.kml+xml application/xml 465 | application/x-aportisdoc application/vnd.palm 466 | application/x-webarchive application/x-compressed-tar 467 | application/x-webarchive application/x-gzip 468 | application/x-webarchive application/x-compressed-tar 469 | application/x-webarchive application/x-gzip 470 | video/x-matroska-3d application/x-matroska 471 | application/vnd.sun.xml.writer application/zip 472 | application/x-pem-key application/x-pem-file 473 | application/x-docbook+xml application/xml 474 | application/x-profile text/plain 475 | video/vnd.mpegurl text/plain 476 | image/x-kodak-k25 image/x-dcraw 477 | image/x-kodak-k25 image/tiff 478 | application/vnd.oasis.opendocument.spreadsheet-flat-xml application/xml 479 | application/vnd.oasis.opendocument.spreadsheet-flat-xml application/xml 480 | audio/webm video/webm 481 | application/vnd.oasis.opendocument.presentation-flat-xml application/xml 482 | application/vnd.oasis.opendocument.presentation-flat-xml application/xml 483 | application/vnd.oasis.opendocument.spreadsheet application/zip 484 | application/json application/javascript 485 | application/msword-template application/msword 486 | application/vnd.openxmlformats-officedocument.presentationml.template application/zip 487 | application/vnd.openxmlformats-officedocument.presentationml.template application/zip 488 | text/sgml text/plain 489 | application/vnd.ms-powerpoint.slide.macroEnabled.12 application/vnd.openxmlformats-officedocument.presentationml.slide 490 | application/xml-dtd text/plain 491 | text/x-credits text/plain 492 | text/x-bibtex text/plain 493 | text/wiki text/plain 494 | application/pgp-keys text/plain 495 | video/annodex application/annodex 496 | application/vnd.oasis.opendocument.spreadsheet-template application/zip 497 | application/vnd.oasis.opendocument.presentation-template application/zip 498 | application/mathematica text/plain 499 | audio/x-m4b audio/mp4 500 | application/rdf+xml application/xml 501 | application/x-java-jnlp-file application/xml 502 | application/vnd.google-earth.kmz application/zip 503 | text/x-rpm-spec text/plain 504 | application/x-ptoptimizer-script text/plain 505 | application/x-qtiplot text/plain 506 | image/x-sony-srf image/x-dcraw 507 | image/x-sony-srf image/tiff 508 | text/x-emacs-lisp text/plain 509 | text/x-dcl text/plain 510 | application/x-lzip-compressed-tar application/x-lzip 511 | application/vnd.oasis.opendocument.text-template application/zip 512 | application/vnd.oasis.opendocument.chart-template application/zip 513 | application/x-cpio-compressed application/gzip 514 | text/x-microdvd text/plain 515 | application/vnd.oasis.opendocument.formula-template application/zip 516 | text/x-moc text/plain 517 | application/x-compressed-tar application/gzip 518 | application/pgp-signature text/plain 519 | text/x-mof text/x-csrc 520 | application/vnd.openxmlformats-officedocument.presentationml.slide application/zip 521 | text/vtt text/plain 522 | application/sql text/plain 523 | image/x-canon-cr2 image/x-dcraw 524 | image/x-canon-cr2 image/tiff 525 | text/x-ms-regedit text/plain 526 | image/svg+xml application/xml 527 | text/x-scss text/plain 528 | audio/x-vorbis+ogg audio/ogg 529 | text/x-dsl text/plain 530 | application/x-dia-diagram application/xml 531 | application/vnd.openxmlformats-officedocument.spreadsheetml.template application/zip 532 | application/vnd.openxmlformats-officedocument.spreadsheetml.template application/zip 533 | text/x-vala text/x-csrc 534 | application/x-bzpostscript application/x-bzip 535 | image/x-panasonic-raw image/x-dcraw 536 | application/postscript text/plain 537 | application/x-vnd.akonadi.note text/plain 538 | text/x-base32 text/plain 539 | application/vnd.openxmlformats-officedocument.presentationml.slideshow application/zip 540 | application/vnd.openxmlformats-officedocument.presentationml.slideshow application/zip 541 | application/x-kgetlist application/xml 542 | application/x-kgetlist application/xml 543 | application/x-pem-file text/plain 544 | video/3gpp2 video/mp4 545 | inode/vnd.kde.service.nfs inode/directory 546 | inode/vnd.kde.service.nfs inode/directory 547 | application/rtf text/plain 548 | text/turtle text/plain 549 | application/x-smb-server inode/directory 550 | application/x-smb-server inode/directory 551 | -------------------------------------------------------------------------------- /src/fdo_magic/check.rs: -------------------------------------------------------------------------------- 1 | use petgraph::prelude::*; 2 | use crate::{MIME}; 3 | 4 | fn from_u8_singlerule(file: &[u8], rule: &super::MagicRule) -> bool { 5 | 6 | // Check if we're even in bounds 7 | let bound_min = 8 | rule.start_off as usize; 9 | let bound_max = 10 | rule.start_off as usize + 11 | rule.val_len as usize + 12 | rule.region_len as usize; 13 | 14 | if (file.len()) < bound_max { 15 | return false; 16 | } 17 | 18 | if rule.region_len == 0 { 19 | 20 | //println!("Region == 0"); 21 | 22 | match rule.mask { 23 | None => { 24 | //println!("\tMask == None"); 25 | let x: Vec = file.iter().skip(bound_min).take(bound_max - bound_min).map(|&x| x).collect(); 26 | //println!("\t{:?} / {:?}", x, rule.val); 27 | //println!("\tIndent: {}, Start: {}", rule.indent_level, rule.start_off); 28 | return rule.val.iter().eq(x.iter()); 29 | }, 30 | Some(ref mask) => { 31 | //println!("\tMask == Some, len == {}", mask.len()); 32 | //println!("\tIndent: {}, Start: {}", rule.indent_level, rule.start_off); 33 | let mut x: Vec = file.iter() 34 | .skip(bound_min) // Skip to start of area 35 | .take(bound_max - bound_min) // Take until end of area - region length 36 | .map(|&x| x).collect(); // Convert to vector 37 | let mut val: Vec = rule.val.iter().map(|&x| x).collect(); 38 | //println!("\t{:?} / {:?}", x, rule.val); 39 | 40 | 41 | assert_eq!(x.len(), mask.len()); 42 | for i in 0..std::cmp::min(x.len(), mask.len()) { 43 | x[i] &= mask[i]; 44 | val[i] = val[i] & mask[i]; 45 | } 46 | //println!("\t & {:?} => {:?}", mask, x); 47 | 48 | return rule.val.iter().eq(x.iter()); 49 | } 50 | } 51 | 52 | } else { 53 | //println!("\tRegion == {}", rule.region_len); 54 | //println!("\tIndent: {}, Start: {}", rule.indent_level, rule.start_off); 55 | 56 | // Define our testing slice 57 | let ref x: Vec = file.iter().take(file.len()).map(|&x| x).collect(); 58 | let testarea: Vec = x.iter().skip(bound_min).take(bound_max - bound_min).map(|&x| x).collect(); 59 | //println!("{:?}, {:?}, {:?}\n", file, testarea, rule.val); 60 | 61 | // Search down until we find a hit 62 | let mut y = Vec::::with_capacity(testarea.len()); 63 | for x in testarea.windows(rule.val_len as usize) { 64 | 65 | y.clear(); 66 | 67 | // Apply mask to value 68 | let ref rule_mask = rule.mask; 69 | match *rule_mask { 70 | Some(ref mask) => { 71 | 72 | for i in 0..rule.val_len { 73 | y.push(x[i as usize] & mask[i as usize]); 74 | } 75 | }, 76 | None => y = x.to_vec(), 77 | } 78 | 79 | if y.iter().eq(rule.val.iter()) { 80 | return true; 81 | } 82 | } 83 | } 84 | 85 | false 86 | } 87 | 88 | /// Test every given rule by walking graph 89 | /// TODO: Not loving the code duplication here. 90 | pub fn from_u8_walker( 91 | file: &[u8], 92 | mimetype: MIME, 93 | graph: &DiGraph, 94 | node: NodeIndex, 95 | isroot: bool 96 | ) -> bool { 97 | 98 | let n = graph.neighbors_directed(node, Outgoing); 99 | 100 | if isroot { 101 | let ref rule = graph[node]; 102 | 103 | // Check root 104 | if !from_u8_singlerule(&file, rule) { 105 | return false; 106 | } 107 | 108 | // Return if that was the only test 109 | if n.clone().count() == 0 { 110 | return true; 111 | } 112 | 113 | // Otherwise next indent level is lower, so continue 114 | } 115 | 116 | // Check subrules recursively 117 | for y in n { 118 | let ref rule = graph[y]; 119 | 120 | if from_u8_singlerule(&file, rule) { 121 | // Check next indent level if needed 122 | if graph.neighbors_directed(y, Outgoing).count() != 0 { 123 | return from_u8_walker(file, mimetype, graph, y, false); 124 | // Next indent level is lower, so this must be it 125 | } else { 126 | return true; 127 | } 128 | } 129 | } 130 | 131 | false 132 | } -------------------------------------------------------------------------------- /src/fdo_magic/mod.rs: -------------------------------------------------------------------------------- 1 | // Common routines for all fdo_magic parsers 2 | 3 | pub mod builtin; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct MagicRule { 7 | pub indent_level: u32, 8 | pub start_off: u32, 9 | pub val_len: u16, 10 | pub val: Vec, 11 | pub mask: Option>, 12 | pub word_len: u32, 13 | pub region_len: u32 14 | } 15 | 16 | pub mod ruleset; 17 | pub mod check; 18 | -------------------------------------------------------------------------------- /src/fdo_magic/ruleset.rs: -------------------------------------------------------------------------------- 1 | use std::str; 2 | use petgraph::prelude::*; 3 | use fnv::FnvHashMap; 4 | use crate::MIME; 5 | 6 | // Below functions from https://github.com/badboy/iso8601/blob/master/src/helper.rs 7 | // but modified to be safe and provide defaults 8 | pub fn to_string(s: &[u8]) -> std::result::Result<&str, std::str::Utf8Error> { 9 | str::from_utf8(s) 10 | } 11 | pub fn to_u32(s: std::result::Result<&str, std::str::Utf8Error>, def: u32) -> u32 { 12 | 13 | match s { 14 | Ok (t) => {str::FromStr::from_str(t).unwrap_or(def)}, 15 | Err (_) => def 16 | } 17 | } 18 | 19 | pub fn buf_to_u32(s: &[u8], def: u32) -> u32 { 20 | to_u32(to_string(s), def) 21 | } 22 | 23 | // Initial mime string 24 | // Format: [priority: mime] 25 | named!(mime<&str>, 26 | map_res!( 27 | delimited!( 28 | delimited!( 29 | char!('['), 30 | is_not!(":"), 31 | char!(':') 32 | ), 33 | is_not!("]"), // the mime 34 | tag!("]\n") 35 | ), 36 | str::from_utf8 37 | ) 38 | ); 39 | 40 | // Indent levels sub-parser for magic_rules 41 | // Default value 0 42 | named!(magic_rules_indent_level, 43 | do_parse!( 44 | ret: take_until!(">") >> 45 | (buf_to_u32(ret, 0)) 46 | ) 47 | ); 48 | 49 | // Start offset sub-parser for magic_rules 50 | named!(magic_rules_start_off, 51 | do_parse!( 52 | ret: take_until!("=") >> 53 | (buf_to_u32(ret, 0)) 54 | ) 55 | ); 56 | 57 | // Singular magic ruleset 58 | named!(magic_rules, 59 | do_parse!( 60 | peek!(is_a!("012345689>")) >> 61 | _indent_level: magic_rules_indent_level >> 62 | tag!(">") >> 63 | _start_off: magic_rules_start_off >> 64 | tag!("=") >> 65 | _val_len: u16!(nom::Endianness::Big) >> // length of value 66 | _val: do_parse!( 67 | ret: take!(_val_len) >> 68 | (ret.iter().map(|&x| x).collect()) 69 | ) >> // value 70 | 71 | _mask: opt!( 72 | do_parse!( 73 | char!('&') >> 74 | ret: take!(_val_len) >> // mask (default 0xFF) 75 | (ret.iter().map(|&x| x).collect()) 76 | ) 77 | ) >> 78 | 79 | // word size (default 1) 80 | _word_len: opt!( 81 | do_parse!( 82 | tag!("~") >> 83 | ret: take_until!("+") >> 84 | (buf_to_u32(ret, 1)) 85 | ) 86 | ) >> 87 | 88 | // length of region in file to check (default 1) 89 | _region_len: opt!( 90 | do_parse!( 91 | tag!("+") >> 92 | ret: take_until!("\n") >> 93 | (buf_to_u32(ret, 0)) 94 | ) 95 | ) >> 96 | 97 | take_until_and_consume!("\n") >> 98 | 99 | (super::MagicRule{ 100 | indent_level: _indent_level, 101 | start_off: _start_off, 102 | val: _val, 103 | val_len: _val_len, 104 | mask: _mask, 105 | word_len: _word_len.unwrap_or(1), 106 | region_len: _region_len.unwrap_or(0) 107 | }) 108 | ) 109 | 110 | ); 111 | 112 | /// Singular magic entry 113 | named!(magic_entry<(MIME, Vec)>, 114 | do_parse!( 115 | _mime: do_parse!(ret: mime >> (ret.parse().unwrap_or(mime::APPLICATION_OCTET_STREAM))) >> 116 | _rules: many0!(magic_rules) >> (_mime, _rules) 117 | ) 118 | ); 119 | 120 | /// Converts a magic file given as a &[u8] array 121 | /// to a vector of MagicEntry structs 122 | named!(from_u8_to_tuple_vec)>>, 123 | do_parse!( 124 | tag!("MIME-Magic\0\n") >> 125 | ret: many0!(magic_entry) >> 126 | (ret) 127 | ) 128 | ); 129 | 130 | fn gen_graph(magic_rules: Vec) -> DiGraph 131 | { 132 | use petgraph::prelude::*; 133 | // Whip up a graph real quick 134 | let mut graph = DiGraph::::new(); 135 | let mut rulestack = Vec::<(super::MagicRule, NodeIndex)>::new(); 136 | 137 | for x in magic_rules { 138 | let xnode = graph.add_node(x.clone()); 139 | 140 | loop { 141 | let y = rulestack.pop(); 142 | match y { 143 | None => {break;}, 144 | Some(rule) => { 145 | if rule.0.indent_level < x.indent_level { 146 | graph.add_edge(rule.1, xnode, 1); 147 | rulestack.push( rule ); 148 | break; 149 | } 150 | } 151 | }; 152 | } 153 | rulestack.push( (x, xnode) ); 154 | 155 | } 156 | 157 | let graph = graph; 158 | graph 159 | } 160 | 161 | pub fn from_u8(b: &[u8]) -> Result>, String> { 162 | let tuplevec = from_u8_to_tuple_vec(b).to_result().map_err(|e| e.to_string())?; 163 | let mut res = FnvHashMap::>::default(); 164 | 165 | for x in tuplevec { 166 | res.insert(x.0, gen_graph(x.1)); 167 | } 168 | 169 | Ok(res) 170 | 171 | } 172 | 173 | /// Loads the given magic file and outputs a vector of MagicEntry structs 174 | pub fn from_filepath(filepath: &str) -> Result>, String>{ 175 | use std::io::prelude::*; 176 | use std::io::BufReader; 177 | use std::fs::File; 178 | 179 | let fmagic = File::open(filepath).map_err(|e| e.to_string())?; 180 | let mut rmagic = BufReader::new(fmagic); 181 | let mut bmagic = Vec::::new(); 182 | rmagic.read_to_end(&mut bmagic).map_err(|e| e.to_string())?; 183 | 184 | let magic_ruleset = from_u8( 185 | bmagic.as_slice() 186 | ).map_err(|e| e.to_string())?; 187 | 188 | Ok(magic_ruleset) 189 | } 190 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `tree_magic` is a Rust crate that determines the MIME type a given file or byte stream. 2 | //! 3 | //! # About 4 | //! `tree_magic` is designed to be more efficient and to have less false positives compared 5 | //! to the old approach used by `libmagic`, or old-fashioned file extension comparisons. 6 | //! 7 | //! Instead, this loads all known MIME types into a tree based on subclasses. Then, instead 8 | //! of checking against *every* file type, `tree_magic` will traverse down the tree and 9 | //! only check the files that make sense to check. 10 | //! 11 | //! # Features 12 | //! - Very fast perfomance (~150ns to check one file against one type, 13 | //! between 5,000ns and 100,000ns to find a MIME type.) 14 | //! - Check if a file *is* a certain type. 15 | //! - Handles aliases (ex: `application/zip` vs `application/x-zip-compressed`) 16 | //! - Can delegate different file types to different "checkers", reducing false positives 17 | //! by choosing a different method of attack. 18 | //! 19 | //! # Feature flags 20 | //! `cli`: Enable building of `tmagic` binary 21 | //! 22 | //! # Example 23 | //! ```rust 24 | //! extern crate tree_magic; 25 | //! 26 | //! // Load a GIF file 27 | //! let input: &[u8] = include_bytes!("../tests/image/gif"); 28 | //! 29 | //! // Find the MIME type of the GIF 30 | //! let result = tree_magic::from_u8(input); 31 | //! assert_eq!(result, "image/gif"); 32 | //! 33 | //! // Check if the MIME and the file are a match 34 | //! let result = tree_magic::match_u8("image/gif", input); 35 | //! assert_eq!(result, true); 36 | //! ``` 37 | 38 | #![allow(unused_doc_comments)] 39 | #![allow(dead_code)] 40 | #[macro_use] extern crate nom; 41 | #[macro_use] extern crate lazy_static; 42 | 43 | use petgraph::prelude::*; 44 | use fnv::FnvHashMap; 45 | use fnv::FnvHashSet; 46 | use std::path::Path; 47 | use mime::Mime as MIME; 48 | 49 | mod fdo_magic; 50 | mod basetype; 51 | 52 | /// Check these types first 53 | /// TODO: Poll these from the checkers? Feels a bit arbitrary 54 | const TYPEORDER: [MIME; 4] = 55 | [ 56 | (mime::IMAGE_PNG), 57 | (mime::IMAGE_JPEG), 58 | (mime::IMAGE_GIF), 59 | (mime::APPLICATION_PDF) 60 | ]; 61 | 62 | /// Struct used to define checker functions for the sake of boilerplate reduction 63 | struct CheckerStruct { 64 | from_u8: fn(&[u8], MIME) -> bool, 65 | from_filepath: fn(&Path, MIME) -> bool, 66 | get_supported: fn() -> Vec, 67 | get_subclasses: fn() -> Vec<(MIME, MIME)>, 68 | get_aliaslist: fn() -> FnvHashMap 69 | } 70 | 71 | /// Maximum number of checkers supported with build config. 72 | /// TODO: Find any better way to do this! 73 | const CHECKERCOUNT: usize = 2; 74 | 75 | /// List of checker functions 76 | const CHECKERS: [CheckerStruct; CHECKERCOUNT] = 77 | [ 78 | // fdo_magic 79 | CheckerStruct{ 80 | from_u8: fdo_magic::builtin::check::from_u8, 81 | from_filepath: fdo_magic::builtin::check::from_filepath, 82 | get_supported: fdo_magic::builtin::init::get_supported, 83 | get_subclasses: fdo_magic::builtin::init::get_subclasses, 84 | get_aliaslist: fdo_magic::builtin::init::get_aliaslist 85 | }, 86 | // basetype 87 | CheckerStruct{ 88 | from_u8: basetype::check::from_u8, 89 | from_filepath: basetype::check::from_filepath, 90 | get_supported: basetype::init::get_supported, 91 | get_subclasses: basetype::init::get_subclasses, 92 | get_aliaslist: basetype::init::get_aliaslist 93 | } 94 | ]; 95 | 96 | /// Mappings between modules and supported mimes (by index in table above) 97 | lazy_static! { 98 | static ref CHECKER_SUPPORT: FnvHashMap = { 99 | let mut out = FnvHashMap::::default(); 100 | for i in 0..CHECKERS.len() { 101 | for j in (CHECKERS[i].get_supported)() { 102 | out.insert(j, i); 103 | } 104 | } 105 | out 106 | }; 107 | } 108 | 109 | lazy_static! { 110 | static ref ALIASES: FnvHashMap = { 111 | let mut out = FnvHashMap::::default(); 112 | for i in 0..CHECKERS.len() { 113 | out.extend((CHECKERS[i].get_aliaslist)()); 114 | } 115 | out 116 | }; 117 | } 118 | 119 | /// Information about currently loaded MIME types 120 | /// 121 | /// The `graph` contains subclass relations between all given mimes. 122 | /// (EX: `application/json` -> `text/plain` -> `application/octet-stream`) 123 | /// This is a `petgraph` DiGraph, so you can walk the tree if needed. 124 | /// 125 | /// The `hash` is a mapping between MIME types and nodes on the graph. 126 | /// The root of the graph is "all/all", so start traversing there unless 127 | /// you need to jump to a particular node. 128 | pub struct TypeStruct { 129 | pub graph: DiGraph, 130 | pub hash: FnvHashMap 131 | } 132 | 133 | lazy_static! { 134 | /// The TypeStruct autogenerated at library init, and used by the library. 135 | pub static ref TYPE: TypeStruct = { 136 | graph_init().unwrap_or( 137 | TypeStruct{ 138 | graph: DiGraph::new(), 139 | hash: FnvHashMap::default() 140 | } ) 141 | }; 142 | } 143 | 144 | // Initialize filetype graph 145 | fn graph_init() -> Result { 146 | 147 | let mut graph = DiGraph::::new(); 148 | let mut added_mimes = FnvHashMap::::default(); 149 | 150 | // Get list of MIME types and MIME relations 151 | let mut mimelist = Vec::::new(); 152 | let mut edgelist_raw = Vec::<(MIME, MIME)>::new(); 153 | for i in 0..CHECKERS.len() { 154 | mimelist.extend((CHECKERS[i].get_supported)()); 155 | edgelist_raw.extend((CHECKERS[i].get_subclasses)()); 156 | } 157 | mimelist.sort(); 158 | mimelist.dedup(); 159 | let mimelist = mimelist; 160 | 161 | // Create all nodes 162 | for mimetype in mimelist.iter() { 163 | let node = graph.add_node(mimetype.clone()); 164 | added_mimes.insert(mimetype.clone(), node); 165 | } 166 | 167 | let mut edge_list = FnvHashSet::<(NodeIndex, NodeIndex)>::with_capacity_and_hasher( 168 | edgelist_raw.len(), Default::default() 169 | ); 170 | for x in edgelist_raw { 171 | let child_raw = x.0; 172 | let parent_raw = x.1; 173 | 174 | let parent = match added_mimes.get(&parent_raw) { 175 | Some(node) => *node, 176 | None => {continue;} 177 | }; 178 | 179 | let child = match added_mimes.get(&child_raw) { 180 | Some(node) => *node, 181 | None => {continue;} 182 | }; 183 | 184 | edge_list.insert( (child, parent) ); 185 | } 186 | 187 | graph.extend_with_edges(&edge_list); 188 | 189 | //Add to applicaton/octet-stream, all/all, or text/plain, depending on top-level 190 | //(We'll just do it here because having the graph makes it really nice) 191 | let added_mimes_tmp = added_mimes.clone(); 192 | 193 | const text_plain: MIME = "text/plain".parse().unwrap(); 194 | let node_text = match added_mimes_tmp.get(&text_plain){ 195 | Some(x) => *x, 196 | None => { 197 | let node = graph.add_node(text_plain); 198 | added_mimes.insert(text_plain, node); 199 | node 200 | } 201 | }; 202 | const app_octet: MIME = "application/octet-stream".parse().unwrap(); 203 | let node_octet = match added_mimes_tmp.get(&app_octet){ 204 | Some(x) => *x, 205 | None => { 206 | let node = graph.add_node(app_octet); 207 | added_mimes.insert(app_octet, node); 208 | node 209 | } 210 | }; 211 | const all_all: MIME = "all/all".parse().unwrap(); 212 | let node_allall = match added_mimes_tmp.get(&all_all){ 213 | Some(x) => *x, 214 | None => { 215 | let node = graph.add_node(all_all); 216 | added_mimes.insert(all_all, node); 217 | node 218 | } 219 | }; 220 | const all_allfiles: MIME = "all/allfiles".parse().unwrap(); 221 | let node_allfiles = match added_mimes_tmp.get(&all_allfiles){ 222 | Some(x) => *x, 223 | None => { 224 | let node = graph.add_node(all_allfiles); 225 | added_mimes.insert(all_allfiles, node); 226 | node 227 | } 228 | }; 229 | 230 | let mut edge_list_2 = FnvHashSet::<(NodeIndex, NodeIndex)>::default(); 231 | for mimenode in graph.externals(Incoming) { 232 | 233 | let ref mimetype = graph[mimenode]; 234 | let toplevel = mimetype.type_(); 235 | 236 | if mimenode == node_text || mimenode == node_octet || 237 | mimenode == node_allfiles || mimenode == node_allall 238 | { 239 | continue; 240 | } 241 | 242 | if toplevel == "text" { 243 | edge_list_2.insert( (node_text, mimenode) ); 244 | } else if toplevel == "inode" { 245 | edge_list_2.insert( (node_allall, mimenode) ); 246 | } else { 247 | edge_list_2.insert( (node_octet, mimenode) ); 248 | } 249 | } 250 | // Don't add duplicate entries 251 | graph.extend_with_edges(edge_list_2.difference(&edge_list)); 252 | 253 | let graph = graph; 254 | let added_mimes = added_mimes; 255 | //println!("{:?}", Dot::with_config(&graph, &[Config::EdgeNoLabel])); 256 | 257 | Ok( TypeStruct{graph: graph, hash: added_mimes} ) 258 | } 259 | 260 | /// Just the part of from_*_node that walks the graph 261 | fn typegraph_walker( 262 | parentnode: NodeIndex, 263 | input: T, 264 | matchfn: fn(MIME, T) -> bool 265 | ) -> Option { 266 | 267 | // Pull most common types towards top 268 | let mut children: Vec = TYPE.graph 269 | .neighbors_directed(parentnode, Outgoing) 270 | .collect(); 271 | 272 | for i in 0..children.len() { 273 | let x = children[i]; 274 | if TYPEORDER.contains(&TYPE.graph[x]) { 275 | children.remove(i); 276 | children.insert(0, x); 277 | } 278 | } 279 | 280 | // Walk graph 281 | for childnode in children { 282 | let mimetype = TYPE.graph[childnode]; 283 | 284 | let result = (matchfn)(mimetype, input.clone()); 285 | match result { 286 | true => { 287 | match typegraph_walker(childnode, input, matchfn) { 288 | Some(foundtype) => return Some(foundtype), 289 | None => return Some(mimetype), 290 | } 291 | } 292 | false => continue, 293 | } 294 | } 295 | 296 | None 297 | } 298 | 299 | /// Transforms an alias into it's real type 300 | fn get_alias(mimetype: MIME) -> MIME { 301 | match ALIASES.get(&mimetype) { 302 | Some(x) => *x, 303 | None => mimetype 304 | } 305 | } 306 | 307 | /// Internal function. Checks if an alias exists, and if it does, 308 | /// then runs match_u8. 309 | fn match_u8_noalias(mimetype: MIME, bytes: &[u8]) -> bool 310 | { 311 | match CHECKER_SUPPORT.get(&mimetype) { 312 | None => {false}, 313 | Some(y) => (CHECKERS[*y].from_u8)(bytes, mimetype) 314 | } 315 | } 316 | 317 | /// Checks if the given bytestream matches the given MIME type. 318 | /// 319 | /// Returns true or false if it matches or not. If the given MIME type is not known, 320 | /// the function will always return false. 321 | /// If mimetype is an alias of a known MIME, the file will be checked agains that MIME. 322 | /// 323 | /// # Examples 324 | /// ```rust 325 | /// // Load a GIF file 326 | /// let input: &[u8] = include_bytes!("../tests/image/gif"); 327 | /// 328 | /// // Check if the MIME and the file are a match 329 | /// let result = tree_magic::match_u8("image/gif", input); 330 | /// assert_eq!(result, true); 331 | /// ``` 332 | pub fn match_u8(mimetype: MIME, bytes: &[u8]) -> bool 333 | { 334 | match_u8_noalias(get_alias(mimetype), bytes) 335 | } 336 | 337 | 338 | /// Gets the type of a file from a raw bytestream, starting at a certain node 339 | /// in the type graph. 340 | /// 341 | /// Returns MIME as string wrapped in Some if a type matches, or 342 | /// None if no match is found under the given node. 343 | /// Retreive the node from the `TYPE.hash` HashMap, using the MIME as the key. 344 | /// 345 | /// # Panics 346 | /// Will panic if the given node is not found in the graph. 347 | /// As the graph is immutable, this should not happen if the node index comes from 348 | /// TYPE.hash. 349 | /// 350 | /// # Examples 351 | /// ```rust 352 | /// /// In this example, we know we have a ZIP, but we want to see if it's something 353 | /// /// like an Office document that subclasses a ZIP. If it is not, like this example, 354 | /// /// it will return None. 355 | /// 356 | /// // Load a ZIP file 357 | /// let input: &[u8] = include_bytes!("../tests/application/zip"); 358 | /// 359 | /// // Get the graph node for ZIP 360 | /// let zipnode = tree_magic::TYPE.hash.get("application/zip").unwrap(); 361 | /// 362 | /// // Find the MIME type of the ZIP, starting from ZIP. 363 | /// let result = tree_magic::from_u8_node(*zipnode, input); 364 | /// assert_eq!(result, None); 365 | /// ``` 366 | pub fn from_u8_node(parentnode: NodeIndex, bytes: &[u8]) -> Option 367 | { 368 | typegraph_walker(parentnode, bytes, match_u8_noalias) 369 | } 370 | 371 | /// Gets the type of a file from a byte stream. 372 | /// 373 | /// Returns MIME as string. 374 | /// 375 | /// # Examples 376 | /// ```rust 377 | /// // Load a GIF file 378 | /// let input: &[u8] = include_bytes!("../tests/image/gif"); 379 | /// 380 | /// // Find the MIME type of the GIF 381 | /// let result = tree_magic::from_u8(input); 382 | /// assert_eq!(result, "image/gif"); 383 | /// ``` 384 | pub fn from_u8(bytes: &[u8]) -> MIME 385 | { 386 | let node = match TYPE.graph.externals(Incoming).next() { 387 | Some(foundnode) => foundnode, 388 | None => panic!("No filetype definitions are loaded.") 389 | }; 390 | from_u8_node(node, bytes).unwrap() 391 | } 392 | 393 | /// Internal function. Checks if an alias exists, and if it does, 394 | /// then runs `match_u8`. 395 | fn match_filepath_noalias(mimetype: MIME, filepath: &Path) -> bool 396 | { 397 | match CHECKER_SUPPORT.get(&mimetype) { 398 | None => {false}, 399 | Some(y) => { 400 | (CHECKERS[*y].from_filepath)(filepath, mimetype) 401 | } 402 | } 403 | } 404 | 405 | /// Check if the given filepath matches the given MIME type. 406 | /// 407 | /// Returns true or false if it matches or not, or an Error if the file could 408 | /// not be read. If the given MIME type is not known, it will always return false. 409 | /// 410 | /// # Examples 411 | /// ```rust 412 | /// use std::path::Path; 413 | /// 414 | /// // Get path to a GIF file 415 | /// let path: &Path = Path::new("tests/image/gif"); 416 | /// 417 | /// // Check if the MIME and the file are a match 418 | /// let result = tree_magic::match_filepath("image/gif", path); 419 | /// assert_eq!(result, true); 420 | /// ``` 421 | pub fn match_filepath(mimetype: MIME, filepath: &Path) -> bool 422 | { 423 | // Transform alias if needed 424 | match_filepath_noalias(get_alias(mimetype), filepath) 425 | } 426 | 427 | 428 | /// Gets the type of a file from a filepath, starting at a certain node 429 | /// in the type graph. 430 | /// 431 | /// Returns MIME as string wrapped in Some if a type matches, or 432 | /// None if the file is not found or cannot be opened. 433 | /// Retreive the node from the `TYPE.hash` FnvHashMap, using the MIME as the key. 434 | /// 435 | /// # Panics 436 | /// Will panic if the given node is not found in the graph. 437 | /// As the graph is immutable, this should not happen if the node index comes from 438 | /// `TYPE.hash`. 439 | /// 440 | /// # Examples 441 | /// ```rust 442 | /// /// In this example, we know we have a ZIP, but we want to see if it's something 443 | /// /// like an Office document that subclasses a ZIP. If it is not, like this example, 444 | /// /// it will return None. 445 | /// use std::path::Path; 446 | /// 447 | /// // Get path to a ZIP file 448 | /// let path: &Path = Path::new("tests/application/zip"); 449 | /// 450 | /// // Get the graph node for ZIP 451 | /// let zipnode = tree_magic::TYPE.hash.get("application/zip").unwrap(); 452 | /// 453 | /// // Find the MIME type of the ZIP, starting from ZIP. 454 | /// let result = tree_magic::from_filepath_node(*zipnode, path); 455 | /// assert_eq!(result, None); 456 | /// ``` 457 | pub fn from_filepath_node(parentnode: NodeIndex, filepath: &Path) -> Option 458 | { 459 | // We're actually just going to thunk this down to a u8 460 | // unless we're checking via basetype for speed reasons. 461 | 462 | // Ensure it's at least a application/octet-stream 463 | if !match_filepath(mime::APPLICATION_OCTET_STREAM, filepath){ 464 | // Check the other base types 465 | return typegraph_walker(parentnode, filepath, match_filepath_noalias); 466 | } 467 | 468 | // Load the first 2K of file and parse as u8 469 | // for batch processing like this 470 | 471 | let b = match read_bytes(filepath, 2048) { 472 | Ok(x) => x, 473 | Err(_) => return None 474 | }; 475 | 476 | from_u8_node(parentnode, b.as_slice()) 477 | } 478 | 479 | /// Gets the type of a file from a filepath. 480 | /// 481 | /// Does not look at file name or extension, just the contents. 482 | /// Returns MIME as string wrapped in Some if a type matches, or 483 | /// None if the file is not found or cannot be opened. 484 | /// 485 | /// # Examples 486 | /// ```rust 487 | /// use std::path::Path; 488 | /// 489 | /// // Get path to a GIF file 490 | /// let path: &Path = Path::new("tests/image/gif"); 491 | /// 492 | /// // Find the MIME type of the GIF 493 | /// let result = tree_magic::from_filepath(path); 494 | /// assert_eq!(result, Some("image/gif".to_string())); 495 | /// ``` 496 | pub fn from_filepath(filepath: &Path) -> Option { 497 | 498 | let node = match TYPE.graph.externals(Incoming).next() { 499 | Some(foundnode) => foundnode, 500 | None => panic!("No filetype definitions are loaded.") 501 | }; 502 | 503 | from_filepath_node(node, filepath) 504 | } 505 | 506 | /// Determines if a MIME is an alias of another MIME 507 | /// 508 | /// If this returns true, that means the two MIME types are equivalent. 509 | /// If this returns false, either one of the MIME types are missing, or they are different. 510 | /// 511 | /// # Examples 512 | /// ``` 513 | /// let mime1 = "application/zip".to_string(); 514 | /// let mime2 = "application/x-zip-compressed".to_string(); 515 | /// 516 | /// assert_eq!( tree_magic::is_alias(mime1, mime2), true ); 517 | pub fn is_alias(mime1: MIME, mime2: MIME) -> bool { 518 | let x = get_alias(mime1); 519 | let y = get_alias(mime2); 520 | 521 | return x == mime2 || y == mime1; 522 | } 523 | 524 | /// Reads the given number of bytes from a file 525 | fn read_bytes(filepath: &Path, bytecount: usize) -> Result, std::io::Error> { 526 | use std::io::prelude::*; 527 | use std::fs::File; 528 | 529 | let mut b = Vec::::with_capacity(bytecount); 530 | let f = File::open(filepath)?; 531 | f.take(bytecount as u64).read_to_end(&mut b)?; 532 | Ok(b) 533 | } 534 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | //! `tmagic`: Command line client for tree_magic 2 | //! 3 | //! # Features 4 | //! - Find MIME of a file 5 | //! - Match file against a set of MIMEs (significantly faster) 6 | //! - Search a folder recursively 7 | //! 8 | //! # Usage 9 | //! 10 | //! ``` 11 | //! tmagic [FLAGS] [OPTIONS] ... 12 | //! 13 | //! FLAGS: 14 | //! -h, --help Prints help information 15 | //! -r, --recursive Search directories recursively 16 | //! --ugly Print results as they come in, at expense of tab alignment 17 | //! -V, --version Prints version information 18 | //! 19 | //! OPTIONS: 20 | //! -m, --match= Print only files that match given MIMEs, seperated by commas 21 | //! 22 | //! ARGS: 23 | //! ... List of files or folders to check. Wildcards supported. 24 | 25 | use tabwriter::TabWriter; 26 | use std::io::prelude::*; 27 | use std::sync::mpsc; 28 | use std::path::PathBuf; 29 | use walkdir::{WalkDir}; 30 | use scoped_threadpool::Pool; 31 | 32 | macro_rules! convmime { 33 | ($x:expr) => {$x.to_string()} 34 | } 35 | 36 | fn main() { 37 | 38 | use clap::{Arg, App}; 39 | 40 | let args = App::new("TreeMagic") 41 | .version("0.2.0") 42 | .about("Determines the MIME type of a file by traversing a filetype tree.") 43 | .arg(Arg::with_name("file") 44 | .required(true) 45 | .index(1) 46 | .multiple(true) 47 | .help("List of files or folders to check. Wildcards supported.") 48 | ) 49 | .arg(Arg::with_name("recursive") 50 | .short("r") 51 | .long("recursive") 52 | .help("Search directories recursively") 53 | ) 54 | .arg(Arg::with_name("match") 55 | .short("m") 56 | .long("match") 57 | .use_delimiter(true) 58 | .takes_value(true) 59 | .require_equals(true) 60 | .help("Print only files that match given MIMEs, seperated by commas") 61 | ) 62 | .arg(Arg::with_name("ugly") 63 | .long("ugly") 64 | .help("Print results as they come in, at expense of tab alignment") 65 | ) 66 | .get_matches(); 67 | 68 | let mut files: Vec = args.values_of("file") 69 | .unwrap() 70 | .map(|x| PathBuf::from(x)) 71 | .collect(); 72 | let is_ugly = args.is_present("ugly"); 73 | let is_recursive = args.is_present("recursive"); 74 | let check_against: Vec = match args.values_of("match") { 75 | Some(y) => {y.map(|x| x.to_string()).collect()} 76 | None => Vec::::new() 77 | }; 78 | 79 | let mut tw = TabWriter::new(vec![]); 80 | let (tx, rx) = mpsc::channel(); 81 | 82 | // Get recursive results if needed 83 | if is_recursive { 84 | for dir in files.clone() { 85 | let entries = WalkDir::new(dir).into_iter().filter_map(|e| e.ok()); 86 | for entry in entries { 87 | files.push(PathBuf::from(entry.path())); 88 | } 89 | } 90 | } 91 | let files = files; 92 | 93 | let mut pool = Pool::new(num_cpus::get() as u32); 94 | // Acquire results for non-match 95 | if check_against.is_empty(){ 96 | pool.scoped(|scope| { 97 | for file in files { 98 | //let file = file.as_str(); 99 | //let file = Path::new(file); 100 | let tx = tx.clone(); 101 | scope.execute(move || { 102 | let result = tree_magic::from_filepath(file.as_path()); 103 | let result = format!("{:?}:\t{:?}", file, result); 104 | if is_ugly { 105 | println!("{}", result); 106 | } else { 107 | tx.send(result + "\n").unwrap_or_default(); 108 | } 109 | }); 110 | } 111 | }); 112 | // Acquire results for check against list of MIMES 113 | } else { 114 | pool.scoped(|scope| { 115 | for file in files { 116 | //let file = file.as_str(); 117 | //let file = Path::new(file); 118 | let tx = tx.clone(); 119 | let check_against = check_against.clone(); 120 | 121 | scope.execute(move || { 122 | let mut result: Option = None; 123 | 124 | for mime in check_against { 125 | let out = tree_magic::match_filepath(mime.as_str(), file.as_path()); 126 | if out { 127 | result = Some(mime); 128 | break; 129 | } 130 | } 131 | 132 | if result.is_none() { return; } 133 | 134 | let result = result.unwrap(); 135 | let result = format!("{:?}:\t{:?}", file, result); 136 | if is_ugly { 137 | println!("{}", result); 138 | } else { 139 | tx.send(result + "\n").unwrap_or_default(); 140 | } 141 | }); 142 | } 143 | }); 144 | } 145 | drop(tx); 146 | 147 | // Pretty-print results 148 | if !is_ugly { 149 | let mut list: Vec<_> = rx.iter().collect(); 150 | list.sort(); 151 | list.dedup(); 152 | for x in list { 153 | write!(&mut tw, "{}", x).unwrap(); 154 | } 155 | 156 | tw.flush().unwrap(); 157 | let out = String::from_utf8(tw.into_inner().unwrap()).unwrap_or("".to_string()); 158 | println!("{}", out); 159 | } 160 | 161 | } 162 | -------------------------------------------------------------------------------- /tests/application/x-7z-compressed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/application/x-7z-compressed -------------------------------------------------------------------------------- /tests/application/x-tar: -------------------------------------------------------------------------------- 1 | plain000644 001750 000144 00000000035 13073323120 012722 0ustar00valerieusers000000 000000 This is just standard text. 2 | -------------------------------------------------------------------------------- /tests/application/zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/application/zip -------------------------------------------------------------------------------- /tests/audio/flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/audio/flac -------------------------------------------------------------------------------- /tests/audio/mpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/audio/mpeg -------------------------------------------------------------------------------- /tests/audio/ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/audio/ogg -------------------------------------------------------------------------------- /tests/audio/opus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/audio/opus -------------------------------------------------------------------------------- /tests/audio/wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/audio/wav -------------------------------------------------------------------------------- /tests/from_filepath.rs: -------------------------------------------------------------------------------- 1 | mod from_filepath { 2 | 3 | extern crate tree_magic; 4 | 5 | use std::path::Path; 6 | 7 | #[test] 8 | fn nonexistent_file_returns_none() { 9 | assert_eq!( 10 | tree_magic::from_filepath(Path::new("this/file/does/not/exist")), 11 | None 12 | ); 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /tests/from_u8.rs: -------------------------------------------------------------------------------- 1 | mod from_u8 { 2 | 3 | extern crate tree_magic; 4 | 5 | macro_rules! convmime { 6 | ($x:expr) => {$x.parse().unwrap()} 7 | } 8 | 9 | ///Image tests 10 | #[test] 11 | fn image_gif() { 12 | assert_eq!( 13 | tree_magic::from_u8(include_bytes!("image/gif")), 14 | convmime!("image/gif") 15 | ); 16 | } 17 | #[test] 18 | fn image_png() { 19 | assert_eq!( 20 | tree_magic::from_u8(include_bytes!("image/png")), 21 | convmime!("image/png") 22 | ); 23 | } 24 | #[test] 25 | // GNU file reports image/x-ms-bmp 26 | fn image_bmp() { 27 | assert_eq!( 28 | tree_magic::from_u8(include_bytes!("image/bmp")), 29 | convmime!("image/bmp") 30 | ); 31 | } 32 | #[test] 33 | fn image_tiff() { 34 | assert_eq!( 35 | tree_magic::from_u8(include_bytes!("image/tiff")), 36 | convmime!("image/tiff") 37 | ); 38 | } 39 | #[test] 40 | fn image_x_portable_bitmap() { 41 | assert_eq!( 42 | tree_magic::from_u8(include_bytes!("image/x-portable-bitmap")), 43 | convmime!("image/x-portable-bitmap") 44 | ); 45 | } 46 | #[test] 47 | fn image_x_pcx() { 48 | assert_eq!( 49 | tree_magic::from_u8(include_bytes!("image/x-pcx")), 50 | convmime!("image/vnd.zbrush.pcx") 51 | ); 52 | } 53 | #[test] 54 | fn image_x_tga() { 55 | assert_eq!( 56 | tree_magic::from_u8(include_bytes!("image/x-tga")), 57 | convmime!("image/x-tga") 58 | ); 59 | } 60 | 61 | 62 | /// Archive tests 63 | #[test] 64 | fn application_tar() { 65 | assert_eq!( 66 | tree_magic::from_u8(include_bytes!("application/x-tar")), 67 | convmime!("application/x-tar") 68 | ); 69 | } 70 | #[test] 71 | fn application_x_7z() { 72 | assert_eq!( 73 | tree_magic::from_u8(include_bytes!("application/x-7z-compressed")), 74 | convmime!("application/x-7z-compressed") 75 | ); 76 | } 77 | #[test] 78 | fn application_zip() { 79 | assert_eq!( 80 | tree_magic::from_u8(include_bytes!("application/zip")), 81 | convmime!("application/zip") 82 | ); 83 | } 84 | 85 | /// Text tests 86 | #[test] 87 | fn text_plain() { 88 | assert_eq!( 89 | tree_magic::from_u8(include_bytes!("text/plain")), 90 | convmime!("text/plain") 91 | ); 92 | } 93 | 94 | // Audio tests 95 | #[test] 96 | fn audio_flac() { 97 | assert_eq!( 98 | tree_magic::from_u8(include_bytes!("audio/flac")), 99 | convmime!("audio/flac") 100 | ); 101 | } 102 | 103 | #[test] 104 | fn audio_mpeg() { 105 | assert_eq!( 106 | tree_magic::from_u8(include_bytes!("audio/mpeg")), 107 | convmime!("audio/mpeg") 108 | ); 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /tests/image/bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/image/bmp -------------------------------------------------------------------------------- /tests/image/gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/image/gif -------------------------------------------------------------------------------- /tests/image/png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/image/png -------------------------------------------------------------------------------- /tests/image/tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/image/tiff -------------------------------------------------------------------------------- /tests/image/x-pcx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/image/x-pcx -------------------------------------------------------------------------------- /tests/image/x-portable-bitmap: -------------------------------------------------------------------------------- 1 | P4 2 | 1 1 3 | -------------------------------------------------------------------------------- /tests/image/x-tga: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aahancoc/tree_magic/fcbb9592de0104e7fe6b9a580839f360a2fddbc8/tests/image/x-tga -------------------------------------------------------------------------------- /tests/image/xbm: -------------------------------------------------------------------------------- 1 | #define xbm_width 1 2 | #define xbm_height 1 3 | static unsigned char xbm_bits[] = { 4 | 0x00 }; 5 | -------------------------------------------------------------------------------- /tests/match_u8.rs: -------------------------------------------------------------------------------- 1 | mod match_u8 { 2 | extern crate tree_magic; 3 | 4 | ///Image tests 5 | #[test] 6 | fn image_gif() { 7 | assert!(tree_magic::match_u8("image/gif", include_bytes!("image/gif"))); 8 | } 9 | #[test] 10 | fn image_png() { 11 | assert!(tree_magic::match_u8("image/png", include_bytes!("image/png"))); 12 | } 13 | #[test] 14 | // GNU file reports as image/x-ms-bmp 15 | fn image_x_bmp() { 16 | assert!(tree_magic::match_u8("image/bmp", include_bytes!("image/bmp"))); 17 | } 18 | #[test] 19 | fn image_tiff() { 20 | assert!(tree_magic::match_u8("image/tiff", include_bytes!("image/tiff"))); 21 | } 22 | #[test] 23 | fn image_x_portable_bitmap() { 24 | assert!(tree_magic::match_u8("image/x-portable-bitmap", include_bytes!("image/x-portable-bitmap"))); 25 | } 26 | #[test] 27 | fn image_x_pcx() { 28 | assert!(tree_magic::match_u8("image/x-pcx", include_bytes!("image/x-pcx"))); 29 | } 30 | #[test] 31 | fn image_x_tga() { 32 | assert!(tree_magic::match_u8("image/x-tga", include_bytes!("image/x-tga"))); 33 | } 34 | 35 | 36 | /// Archive tests 37 | #[test] 38 | fn application_tar() { 39 | assert!(tree_magic::match_u8("application/x-tar", include_bytes!("application/x-tar"))); 40 | } 41 | #[test] 42 | fn application_x_7z() { 43 | assert!(tree_magic::match_u8("application/x-7z-compressed", include_bytes!("application/x-7z-compressed"))); 44 | } 45 | #[test] 46 | fn application_zip() { 47 | assert!(tree_magic::match_u8("application/zip", include_bytes!("application/zip"))); 48 | } 49 | 50 | /// Text tests 51 | #[test] 52 | fn text_plain() { 53 | assert!(tree_magic::match_u8("text/plain", include_bytes!("text/plain"))); 54 | } 55 | 56 | // Audio tests 57 | #[test] 58 | fn audio_flac() { 59 | assert!(tree_magic::match_u8("audio/flac", include_bytes!("audio/flac"))); 60 | } 61 | #[test] 62 | fn audio_mpeg() { 63 | assert!(tree_magic::match_u8("audio/mpeg", include_bytes!("audio/mpeg"))); 64 | } 65 | #[test] 66 | fn audio_ogg() { 67 | assert!(tree_magic::match_u8("audio/ogg", include_bytes!("audio/ogg"))); 68 | } 69 | #[test] 70 | fn audio_wav() { 71 | assert!(tree_magic::match_u8("audio/wav", include_bytes!("audio/wav"))); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /tests/text/plain: -------------------------------------------------------------------------------- 1 | This is just standard text. 2 | --------------------------------------------------------------------------------