├── .travis.yml ├── .gitignore ├── .github └── workflows │ └── node.js.yml ├── lib ├── stream-mmmagic.d.ts └── stream-mmmagic.js ├── package.json ├── LICENSE ├── CHANGELOG.md ├── test.js └── README.md /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "node" 4 | - "12" 5 | - "10" 6 | - "8" 7 | env: 8 | - CXX=g++-4.8 9 | addons: 10 | apt: 11 | sources: 12 | - ubuntu-toolchain-r-test 13 | packages: 14 | - g++-4.8 15 | - build-essential 16 | script: 17 | - npm test 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # Compiled binary addons (http://nodejs.org/api/addons.html) 20 | build/Release 21 | 22 | # Dependency directory 23 | # Commenting this out is preferred by some people, see 24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 25 | node_modules 26 | 27 | # Users Environment Variables 28 | .lock-wscript 29 | -------------------------------------------------------------------------------- /.github/workflows/node.js.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean install of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | strategy: 18 | matrix: 19 | node-version: [12.x, 14.x, 16.x] 20 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Use Node.js ${{ matrix.node-version }} 25 | uses: actions/setup-node@v2 26 | with: 27 | node-version: ${{ matrix.node-version }} 28 | cache: 'npm' 29 | - run: npm ci 30 | - run: npm test 31 | -------------------------------------------------------------------------------- /lib/stream-mmmagic.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'stream-mmmagic' { 2 | import ReadableStream = NodeJS.ReadableStream 3 | 4 | export type MimeType = string | { 5 | type: string, 6 | encoding: string 7 | } 8 | 9 | export interface SniffStreamMimeTypeOptions { 10 | magicFile?: string 11 | splitMime?: boolean 12 | peekBytes?: number 13 | } 14 | 15 | interface SniffStreamMimeType { 16 | ( 17 | input: ReadableStream, 18 | callback: (error: Error, mime: MimeType, output: ReadableStream) => void 19 | ): void 20 | 21 | ( 22 | input: ReadableStream, 23 | options: SniffStreamMimeTypeOptions, 24 | callback: (error: Error, mime: MimeType, output: ReadableStream) => void 25 | ): void 26 | 27 | promise (input: ReadableStream, options?: SniffStreamMimeTypeOptions): Promise<[MimeType, ReadableStream]> 28 | } 29 | 30 | const sniffStreamMimeType: SniffStreamMimeType 31 | 32 | export default sniffStreamMimeType 33 | } 34 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "stream-mmmagic", 3 | "version": "2.3.0", 4 | "description": "sniff the start of a stream (non-destructively) to detect the file type and encoding", 5 | "main": "lib/stream-mmmagic.js", 6 | "types": "lib/stream-mmmagic.d.ts", 7 | "scripts": { 8 | "test": "mocha test.js" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/seangarner/node-stream-mmmagic.git" 13 | }, 14 | "keywords": [ 15 | "stream", 16 | "mmmagic", 17 | "sniff", 18 | "filetype", 19 | "mime", 20 | "detection", 21 | "encoding" 22 | ], 23 | "author": "Sean Garner", 24 | "license": "MIT", 25 | "bugs": { 26 | "url": "https://github.com/seangarner/node-stream-mmmagic/issues" 27 | }, 28 | "homepage": "https://github.com/seangarner/node-stream-mmmagic", 29 | "dependencies": { 30 | "buffer-peek-stream": "^1.1.0", 31 | "mmmagic": "^0.5.0" 32 | }, 33 | "devDependencies": { 34 | "chai": "^4.3.4", 35 | "concat-stream": "^2.0.0", 36 | "mocha": "^9.0.2" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Sean Garner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/stream-mmmagic.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const mmm = require('mmmagic'); 4 | const peek = require('buffer-peek-stream'); 5 | 6 | function streamMmmagic(stream, options, callback) { 7 | 8 | if (!callback) { 9 | callback = options; 10 | options = {}; 11 | } 12 | 13 | const wantSplit = options.splitMime === undefined ? true : options.splitMime; 14 | const magicFile = options.magicFile; 15 | const peekBytes = options.peekBytes || 16384; 16 | 17 | // peek 16K which is more than you need for type; more would give it better chance on encoding 18 | return peek(stream, peekBytes, (err, buf, dest) => { 19 | if (err) return callback(err, null, dest); 20 | 21 | let magic; 22 | if (magicFile) { 23 | if (magicFiles.hasOwnProperty(magicFile)) { 24 | magic = magicFiles[magicFile]; 25 | } else { 26 | magic = magicFiles[magicFile] = new mmm.Magic(magicFile, mmm.MAGIC_MIME); 27 | } 28 | } else { 29 | magic = _magic; 30 | } 31 | 32 | magic.detect(buf, (err, res) => { 33 | if (err) return callback(err, null, dest); 34 | if (wantSplit) { 35 | res = splitMime(res); 36 | } 37 | callback(null, res, dest); 38 | }); 39 | }); 40 | } 41 | 42 | const _magic = new mmm.Magic(mmm.MAGIC_MIME); 43 | const magicFiles = {}; 44 | 45 | module.exports = streamMmmagic; 46 | 47 | module.exports.promise = function streamMmmagicPromise(input, options) { 48 | return new Promise((resolve, reject) => { 49 | streamMmmagic(input, options || {}, (err, mime, output) => { 50 | if (err) return reject(err); 51 | resolve([mime, output]); 52 | }) 53 | }); 54 | } 55 | 56 | var _splitMime = /^(.*); charset=(.*)$/; 57 | function splitMime(s) { 58 | var p = s.match(_splitMime); 59 | return { 60 | type: p[1], 61 | encoding: p[2] 62 | }; 63 | } 64 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # stream-mmmagic changelog 2 | 3 | ## 2.3.0 (2021/07/03) 4 | 5 | - add typescript types 6 | - bump deps 7 | - switch from travis to github actions 8 | 9 | ## 2.2.0 (2019/10/03) 10 | 11 | - add promise interface 12 | - drop support for node 4 & 6 (it might still work - but it's no longer tested) 13 | - added node 12 to test 14 | 15 | ## 2.1.0 (2018/09/19) 16 | 17 | - support node 10 18 | 19 | ## 2.0.0 (2017/01/30) 20 | **Major Changes** 21 | 22 | - removed support for setting `require('stream-mmmagic').config.magicFile` 23 | - `magicFile` option added which can be used during each call instead 24 | 25 | **Minor Changes** 26 | 27 | - add `peekBytes` option to control how many bytes of the start of the stream are sent to libmagic 28 | + default is sufficient for detecting type, but more may improve reliability of getting correct encoding 29 | - add `magicFile` option which replaces fragile module `config.magicFile` setting 30 | - add caching of `mmmagic` objects instead of recreating them for every new stream 31 | + Makes the (safe?) assumption that nobody is using this module with large numbers of unique magic files 32 | 33 | ## 1.1.0 (2017/01/09) 34 | 35 | - add `splitMime: false` option for #5 36 | - add tests 37 | 38 | ## 1.0.0 (2016/08/31) 39 | is backwards compatible with `0.2.0`; bump signifies this is now considered stable after being used 100,000s of times 40 | in a production environment. 41 | 42 | - add support for node 5 & 6 43 | 44 | ## 0.2.0 (2014/11/12) 45 | **backwards incompatible** using semver; api not stable until 1.0.0 46 | 47 | - function now returns a stream which should be piped from instead of the input stream 48 | - fix many issues with edge case streams, especially small binary ones 49 | 50 | ## 0.1.0 (2014/11/06) 51 | *don't use; broken for streams that emit more than 1 chunk* 52 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const magic = require('./'); 3 | const stream = require('stream'); 4 | const expect = require('chai').expect; 5 | const concat = require('concat-stream'); 6 | 7 | describe('stream-mmmagic', () => { 8 | function getStream() { 9 | const rs = fs.createReadStream(__filename, {encoding: 'utf8'}); 10 | after(() => { 11 | rs.close(); 12 | }); 13 | return rs; 14 | } 15 | 16 | it('should callback a Readable stream', (done) => { 17 | magic(getStream(), (err, mime, output) => { 18 | if (err) return done(err); 19 | expect(output).to.be.an.instanceof(stream.Readable); 20 | done(); 21 | }); 22 | }); 23 | 24 | it('should not callback a readstream with partially read data', (done) => { 25 | getStream().pipe(concat((sansMagic) => { 26 | magic(getStream(), (err, mime, output) => { 27 | if (err) return done(err); 28 | output.setEncoding('utf8'); 29 | output.pipe(concat((withMagic) => { 30 | expect(withMagic).to.eql(sansMagic); 31 | done(); 32 | })); 33 | }); 34 | })); 35 | }); 36 | 37 | it('should callback a mime type split into type and encoding', (done) => { 38 | magic(getStream(), (err, mime, output) => { 39 | if (err) return done(err); 40 | expect(mime).to.eql({ 41 | type: 'text/plain', 42 | // 🦄 force utf8 encoding of this file for the test 43 | encoding: 'utf-8' 44 | }); 45 | done(); 46 | }); 47 | }); 48 | 49 | it('should callback a mime string if splitMime:false', (done) => { 50 | magic(getStream(), {splitMime: false}, (err, mime, output) => { 51 | if (err) return done(err); 52 | expect(mime).to.equal('text/plain; charset=utf-8'); 53 | done(); 54 | }); 55 | }); 56 | 57 | it('should create new Magic object if a magicFile is specified', (done) => { 58 | const magicFile = 'node_modules/mmmagic/magic/magic.mgc'; 59 | magic(getStream(), {magicFile}, (err, mime, output) => { 60 | if (err) return done(err); 61 | expect(mime).to.eql({ 62 | type: 'text/plain', 63 | encoding: 'utf-8' 64 | }); 65 | done(); 66 | }); 67 | }); 68 | 69 | describe('promise', () => { 70 | it('should return a promise', async () => { 71 | expect(magic.promise(getStream())).to.be.a('promise'); 72 | }); 73 | 74 | it('should resolve an array with the mime type & output stream', async () => { 75 | const result = await magic.promise(getStream()); 76 | expect(result).to.be.an('array').with.lengthOf(2); 77 | const [mime, output] = result; 78 | expect(mime).to.eql({ 79 | type: 'text/plain', 80 | encoding: 'utf-8' // 🦄 force utf8 encoding of this file for the test 81 | }); 82 | expect(output).to.be.an.instanceof(stream.Readable); 83 | }); 84 | }); 85 | 86 | }); 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # node-stream-mmmagic 2 | [![Build Status](https://travis-ci.org/seangarner/node-stream-mmmagic.svg?branch=master)](https://travis-ci.org/seangarner/node-stream-mmmagic) 3 | 4 | Node module to sniff the start of a stream (non-destructively) to detect the file type and encoding 5 | when you don't have the luxury of being able to restart the stream again. 6 | 7 | It does so by using [buffer-peek-stream](https://github.com/seangarner/node-buffer-peek-stream) to 8 | get the first 16KB of the stream then send that to mmmagic (which uses libmagic). Before it's 9 | finished the peek stream will unshift the bytes it's received back onto the origin stream thereby 10 | making it appear as if the origin stream was new. 11 | 12 | ```bash 13 | npm install stream-mmmagic 14 | ``` 15 | 16 | ### Use 17 | ```js 18 | const magic = require('stream-mmmagic'); 19 | const input = fs.createReadStream('somefile.csv'); 20 | 21 | const [mime, output] = await magic.promise(input); 22 | console.log('TYPE:', mime.type); 23 | console.log('ENCODING:', mime.encoding); 24 | output.pipe(process.stdout); 25 | 26 | //- TYPE: text/plain 27 | //- ENCODING: us-ascii 28 | //- 29 | ``` 30 | 31 | 32 | ## Use (Callbacks) 33 | ```js 34 | var magic = require('stream-mmmagic'); 35 | 36 | var input = fs.createReadStream('somefile.csv'); 37 | 38 | magic(input, function (err, mime, output) { 39 | if (err) throw err; 40 | 41 | console.log('TYPE:', mime.type); 42 | console.log('ENCODING:', mime.encoding); 43 | 44 | // will print the *whole* file 45 | output.pipe(process.stdout); 46 | }); 47 | 48 | //- TYPE: text/plain 49 | //- ENCODING: us-ascii 50 | //- 51 | ``` 52 | 53 | ### `options.magicFile` Custom Magic File 54 | A magic file is bundled with the mmmagic npm module but if you want to use your own then set the path to the file on 55 | the `magicFile` option. 56 | 57 | ```js 58 | const magicFile = '/usr/share/magic'; 59 | magic(input, {magicFile}, callback); 60 | ``` 61 | 62 | ### `options.splitMime` Original Mime String 63 | Use `{splitMime: false}` option to get back the original mime string instead of a split object. 64 | ```js 65 | const [mime] = magic.promise(input, {splitMime: false}); 66 | console.log(mime); 67 | //- text/plain; charset=us-ascii 68 | ``` 69 | 70 | ### `options.peekBytes` Control Bytes Used for Analysis 71 | As the input stream starts to get data the first 16KB is buffered and sent to libmagic for analysis to get file type and 72 | encoding. 1KB is more than enough for detecting file type with a standard `magicFile` but the reliabilty of getting the 73 | correct encoding is increased the more bytes are buffered. The tradeoff is performance and memory use. 74 | 75 | Set `peekBytes` to the number of bytes you want buffered and sent to libmagic. For best results do not set below 256 76 | bytes. 77 | 78 | ```js 79 | // somefile.txt is a utf8 file where the first doublebyte char is after the first 1KB of the file 80 | const input = fs.createReadStream('somefile.txt'); 81 | 82 | const [{encoding}, output] = magic.promise(input, {peekBytes: 1024}); 83 | console.log(encoding); 84 | // not detected as utf8 because the first doublebyte char wasn't until later in the stream 85 | //- us-ascii 86 | 87 | const [{encoding}, output] = magic.promise(input, {peekBytes: 16384}); 88 | console.log(encoding); 89 | // now we're peeking 16KB into the file libmagic gets that first doublebyte char and knows it's utf8 90 | //- charset=utf8 91 | ``` 92 | 93 | ## LICENSE 94 | MIT 95 | --------------------------------------------------------------------------------