├── test ├── example │ ├── 1.txt │ ├── 2.txt │ └── 3.txt ├── workers │ ├── error-worker.js │ └── worker.js ├── error.spec.js └── index.spec.js ├── .gitignore ├── codecov.yml ├── renovate.json ├── .eslintrc.js ├── .travis.yml ├── LICENSE ├── package.json ├── index.js ├── README.md ├── .github └── workflows │ └── codeql-analysis.yml └── CHANGELOG.md /test/example/1.txt: -------------------------------------------------------------------------------- 1 | a -------------------------------------------------------------------------------- /test/example/2.txt: -------------------------------------------------------------------------------- 1 | b -------------------------------------------------------------------------------- /test/example/3.txt: -------------------------------------------------------------------------------- 1 | c -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | coverage 3 | yarn-error.log 4 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | parsers: 3 | javascript: 4 | enable_partials: yes 5 | -------------------------------------------------------------------------------- /test/workers/error-worker.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function (fileName, callback) { 4 | callback(new Error('Not today')); 5 | }; 6 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["@researchgate:lib"], 4 | "labels": ["dependencies"] 5 | } 6 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | // This file was created by spire-plugin-eslint for editor support 3 | module.exports = require('@researchgate/spire-config/eslint/node'); 4 | -------------------------------------------------------------------------------- /test/workers/worker.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const fs = require('fs'); 4 | 5 | module.exports = function (fileName, callback) { 6 | fs.readFile(fileName, 'utf8', callback); 7 | }; 8 | -------------------------------------------------------------------------------- /test/error.spec.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const workerPath = require.resolve('./workers/error-worker'); 3 | const FileProcessor = require('..'); 4 | 5 | const examplePath = (fileName) => path.join(__dirname, 'example', fileName); 6 | const pattern = examplePath('*.txt'); 7 | 8 | describe('fails', () => { 9 | test('reports error in worker', (done) => { 10 | const fileProcessor = new FileProcessor(pattern, workerPath); 11 | fileProcessor.on('error', (err) => { 12 | expect(err).toEqual(new Error('Not today')); 13 | done(); 14 | }); 15 | }); 16 | }); 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "15" 4 | - "14" 5 | - "12" 6 | - "10" 7 | script: 8 | - yarn test --coverage 9 | after_success: 10 | - bash <(curl -s https://codecov.io/bash) -f coverage/coverage-final.json 11 | jobs: 12 | include: 13 | - stage: test 14 | node_js: lts/* 15 | name: "Codestyle" 16 | script: 17 | - yarn lint 18 | after_success: skip 19 | - stage: release 20 | node_js: lts/* 21 | script: skip 22 | deploy: 23 | provider: script 24 | skip_cleanup: true 25 | script: yarn release 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Sergey Tatarintsev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@researchgate/file-processor", 3 | "version": "3.2.0", 4 | "description": "Run parallel tasks on a multiple files", 5 | "main": "index.js", 6 | "author": "Sergey Tatarintsev (https://github.com/SevInf)", 7 | "license": "MIT", 8 | "repository": "researchgate/node-file-processor", 9 | "scripts": { 10 | "test": "spire test", 11 | "lint": "spire lint", 12 | "release": "spire release" 13 | }, 14 | "engines": { 15 | "node": ">= 10.18.1" 16 | }, 17 | "files": [ 18 | "index.js" 19 | ], 20 | "dependencies": { 21 | "fast-glob": "^3.2.4", 22 | "worker-farm": "^1.4.1" 23 | }, 24 | "devDependencies": { 25 | "@researchgate/spire-config": "6.0.2", 26 | "spire": "3.2.3", 27 | "spire-plugin-semantic-release": "3.2.3" 28 | }, 29 | "spire": { 30 | "extends": [ 31 | [ 32 | "@researchgate/spire-config", 33 | { 34 | "eslint": "node", 35 | "jest": "base" 36 | } 37 | ] 38 | ], 39 | "plugins": [ 40 | "spire-plugin-semantic-release" 41 | ] 42 | }, 43 | "jest": { 44 | "preset": "@researchgate/jest-preset-base", 45 | "testEnvironment": "node" 46 | }, 47 | "prettier": "@researchgate/prettier-config" 48 | } 49 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const { EventEmitter } = require('events'); 4 | const fg = require('fast-glob'); 5 | const workerFarm = require('worker-farm'); 6 | 7 | class FileProcessor extends EventEmitter { 8 | constructor(globPattern, worker, options = {}, globOptions = {}) { 9 | super(); 10 | options = options || {}; 11 | const glob = (this.glob = fg.stream(globPattern, globOptions)); 12 | this.invokeWorker = options.invokeWorker || defaultInvokeWorker; 13 | const workers = (this.workers = workerFarm(options.worker || {}, worker)); 14 | 15 | let allQueued = false; 16 | let errorHappened = false; 17 | let queuedCount = 0; 18 | let processedCount = 0; 19 | 20 | const checkForEnd = () => { 21 | if (errorHappened || (allQueued && queuedCount === processedCount)) { 22 | if (!options.keepAlive) { 23 | workerFarm.end(workers); 24 | } 25 | if (!errorHappened) this.emit('end'); 26 | } 27 | }; 28 | 29 | glob.on('data', (path) => { 30 | queuedCount++; 31 | this.emit('queued', path); 32 | this.process(path, (err, result) => { 33 | processedCount++; 34 | if (err) { 35 | errorHappened = true; 36 | this.emit('error', err); 37 | } else { 38 | this.emit('processed', path, result); 39 | } 40 | 41 | checkForEnd(); 42 | }); 43 | }); 44 | 45 | glob.on('end', () => { 46 | allQueued = true; 47 | this.emit('allQueued', { queuedCount, processedCount }); 48 | checkForEnd(); 49 | }); 50 | } 51 | 52 | process(path, callback) { 53 | this.invokeWorker(this.workers, path, callback); 54 | } 55 | 56 | destroy(callback) { 57 | this.glob.destroy(); 58 | workerFarm.end(this.workers, callback); 59 | } 60 | } 61 | 62 | function defaultInvokeWorker(workers, path, callback) { 63 | workers(path, callback); 64 | } 65 | 66 | module.exports = FileProcessor; 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # file-processor 2 | 3 | [![Build Status](https://travis-ci.com/researchgate/node-file-processor.svg?branch=master)](https://travis-ci.com/researchgate/node-file-processor) 4 | [![codecov](https://codecov.io/gh/researchgate/node-file-processor/branch/master/graph/badge.svg)](https://codecov.io/gh/researchgate/node-file-processor) 5 | 6 | Node.js utility for mass-processing files in parallel. 7 | 8 | ## Usage 9 | 10 | Files are handled in parallel running workers. In order to use the library you 11 | need 2 modules. 12 | 13 | ### Worker module 14 | 15 | Must export single function, accepting `fileName` and `callback`. This function 16 | must process the file and call the `callback` when it is done. Function can be 17 | asynchronous. 18 | 19 | ```js 20 | module.exports = function (fileName, callback) { 21 | const result = doExpensiveProcessing(fileName); 22 | callback(null, result); 23 | }; 24 | ``` 25 | 26 | ### Main module 27 | 28 | Must use `FileProcessor` class and provide a it one or more glob patterns and 29 | path to worker module. Each file, matching the pattern will be processed by 30 | worker module. 31 | 32 | ```js 33 | const FileProcessor = require('@researchgate/file-processor'); 34 | const processor = new FileProcessor( 35 | ['path/to/some/files/*.txt', 'some/other/path/*.js'], 36 | require.resolve('./worker') 37 | ); 38 | 39 | processor.on('processed', (fileName, result) => { 40 | console.log(`result for ${fileName}: ${result}`); 41 | }); 42 | ``` 43 | 44 | `FileProcessor` instace emits following events: 45 | 46 | - `queued` - file is queued for processing. 47 | 48 | Arguments: 49 | 50 | - `fileName` 51 | 52 | * `processed` - file is successfully processed by worker. 53 | 54 | Arguments: 55 | 56 | - `fileName` 57 | - `result` - the result, returned by worker module 58 | 59 | * `error` - worker failed to process the file 60 | 61 | Arguments: 62 | 63 | - `error` 64 | 65 | * `allQueued` - all files, matching the pattern are queued for processing. 66 | 67 | Arguments: 68 | 69 | - `stats` - object with the following field 70 | 71 | - `queuedCount` - total number of queued files 72 | - `processedCount` - total number of files which are already processed 73 | 74 | * `end` - all files are processed. 75 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '19 9 * * 1' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'javascript' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v1 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v1 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v1 68 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # [3.2.0](https://github.com/researchgate/node-file-processor/compare/v3.1.0...v3.2.0) (2020-12-22) 2 | 3 | 4 | ### Features 5 | 6 | * Move to `fast-glob` and expose options for it. ([#48](https://github.com/researchgate/node-file-processor/issues/48)) ([3b5d2e5](https://github.com/researchgate/node-file-processor/commit/3b5d2e55d4ccb940ec3f7d812c0bf32256d7df09)) 7 | 8 | # [3.1.0](https://github.com/researchgate/node-file-processor/compare/v3.0.1...v3.1.0) (2020-06-03) 9 | 10 | 11 | ### Features 12 | 13 | * Support configuring how workers are invoked ([cea4464](https://github.com/researchgate/node-file-processor/commit/cea4464ca9a22ccb01e4bd4c31aa55e8cea7c4e2)) 14 | 15 | ## [3.0.1](https://github.com/researchgate/node-file-processor/compare/v3.0.0...v3.0.1) (2020-05-07) 16 | 17 | 18 | # [3.0.0](https://github.com/researchgate/node-file-processor/compare/v2.0.0...v3.0.0) (2020-05-07) 19 | 20 | 21 | ### Features 22 | 23 | * Drop support for node 8 ([fdee983](https://github.com/researchgate/node-file-processor/commit/fdee983b1299dc89dd7719db67a667233e60efc6)) 24 | 25 | 26 | ### BREAKING CHANGES 27 | 28 | * Only node 10.18.1 or newer is now supported 29 | 30 | # [2.0.0](https://github.com/researchgate/node-file-processor/compare/v1.2.0...v2.0.0) (2018-03-21) 31 | 32 | 33 | ### Features 34 | 35 | * Add keepAlive option ([f9fb05a](https://github.com/researchgate/node-file-processor/commit/f9fb05af299a411f517a848279820c5c8140315b)) 36 | 37 | 38 | ### BREAKING CHANGES 39 | 40 | * worker-farm options are moved to `options.worker` key. 41 | * abort method renamed to destory. 42 | 43 | 44 | 45 | # [1.2.0](https://github.com/researchgate/node-file-processor/compare/v1.1.0...v1.2.0) (2018-01-05) 46 | 47 | 48 | ### Bug Fixes 49 | 50 | * Exit immediately if an error happened ([#7](https://github.com/researchgate/node-file-processor/issues/7)) ([b41e656](https://github.com/researchgate/node-file-processor/commit/b41e65616e3ed7d328d8fc9061f45ea792fc5a3e)) 51 | 52 | 53 | ### Features 54 | 55 | * **worker-farm:** Allow to pass-through options to worker-farm ([#6](https://github.com/researchgate/node-file-processor/issues/6)) ([64fb20d](https://github.com/researchgate/node-file-processor/commit/64fb20d9de420c601681c795d6ccdceddf799d6a)) 56 | 57 | 58 | 59 | # [1.1.0](https://github.com/researchgate/node-file-processor/compare/v1.0.0...v1.1.0) (2017-08-11) 60 | 61 | 62 | ### Features 63 | 64 | * Support multiple paths ([3bc826e](https://github.com/researchgate/node-file-processor/commit/3bc826e8f729ab60bd9254f5b27dec39251362b8)) 65 | 66 | 67 | 68 | # [1.0.0](https://github.com/researchgate/node-file-processor/compare/815824ebe07c1bd6afe3e4508cc4d4ada79e4b82...v1.0.0) (2017-08-09) 69 | 70 | 71 | ### Features 72 | 73 | * Implement library ([815824e](https://github.com/researchgate/node-file-processor/commit/815824ebe07c1bd6afe3e4508cc4d4ada79e4b82)) 74 | -------------------------------------------------------------------------------- /test/index.spec.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const workerPath = require.resolve('./workers/worker'); 3 | const FileProcessor = require('..'); 4 | 5 | const examplePath = (fileName) => path.join(__dirname, 'example', fileName); 6 | const pattern = examplePath('*.txt'); 7 | 8 | describe('success', () => { 9 | let fileProcessor = null; 10 | beforeEach(() => { 11 | fileProcessor = new FileProcessor(pattern, workerPath); 12 | }); 13 | 14 | test('queued', () => { 15 | expect.assertions(3); 16 | const handler = jest.fn(); 17 | fileProcessor.on('queued', handler); 18 | return new Promise((resolve) => { 19 | fileProcessor.on('end', () => { 20 | expect(handler).toHaveBeenCalledWith(examplePath('1.txt')); 21 | expect(handler).toHaveBeenCalledWith(examplePath('2.txt')); 22 | expect(handler).toHaveBeenCalledWith(examplePath('3.txt')); 23 | resolve(); 24 | }); 25 | }); 26 | }); 27 | 28 | test('processed', () => { 29 | expect.assertions(3); 30 | const handler = jest.fn(); 31 | fileProcessor.on('processed', handler); 32 | return new Promise((resolve) => { 33 | fileProcessor.on('end', () => { 34 | expect(handler).toHaveBeenCalledWith(examplePath('1.txt'), 'a'); 35 | expect(handler).toHaveBeenCalledWith(examplePath('2.txt'), 'b'); 36 | expect(handler).toHaveBeenCalledWith(examplePath('3.txt'), 'c'); 37 | resolve(); 38 | }); 39 | }); 40 | }); 41 | 42 | test('allQueued', () => { 43 | expect.assertions(1); 44 | const handler = jest.fn(); 45 | fileProcessor.on('allQueued', handler); 46 | return new Promise((resolve) => { 47 | fileProcessor.on('end', () => { 48 | expect(handler).toHaveBeenCalledWith( 49 | expect.objectContaining({ 50 | queuedCount: 3, 51 | processedCount: expect.any(Number), 52 | }) 53 | ); 54 | resolve(); 55 | }); 56 | }); 57 | }); 58 | 59 | test('destroy', () => { 60 | return new Promise((resolve) => { 61 | fileProcessor.destroy(resolve); 62 | }); 63 | }); 64 | 65 | test('multiple paths', () => { 66 | fileProcessor = new FileProcessor( 67 | [examplePath('1.txt'), examplePath('3.txt')], 68 | workerPath 69 | ); 70 | 71 | expect.assertions(3); 72 | const handler = jest.fn(); 73 | fileProcessor.on('queued', handler); 74 | return new Promise((resolve) => { 75 | fileProcessor.on('end', () => { 76 | expect(handler).toHaveBeenCalledWith(examplePath('1.txt')); 77 | expect(handler).not.toHaveBeenCalledWith(examplePath('2.txt')); 78 | expect(handler).toHaveBeenCalledWith(examplePath('3.txt')); 79 | resolve(); 80 | }); 81 | }); 82 | }); 83 | 84 | describe('keepAlive', () => { 85 | let processor; 86 | 87 | beforeEach(() => { 88 | processor = new FileProcessor(examplePath('1.txt'), workerPath, { 89 | keepAlive: true, 90 | }); 91 | }); 92 | 93 | test('allows to process more files after initial pass', () => { 94 | expect.assertions(2); 95 | return new Promise((resolve) => { 96 | processor.on('end', () => { 97 | processor.process(examplePath('2.txt'), (error, result) => { 98 | expect(error).toBe(null); 99 | expect(result).toEqual('b'); 100 | resolve(); 101 | }); 102 | }); 103 | }); 104 | }); 105 | 106 | afterEach(() => { 107 | return new Promise((resolve) => { 108 | processor.destroy(resolve); 109 | }); 110 | }); 111 | }); 112 | 113 | test('invokeWorker', () => { 114 | expect.assertions(3); 115 | 116 | const processor = new FileProcessor(pattern, workerPath, { 117 | invokeWorker(workers, filepath, callback) { 118 | workers(filepath, (err, result) => 119 | err ? callback(err) : callback(null, `${result}!`) 120 | ); 121 | }, 122 | }); 123 | 124 | const handler = jest.fn(); 125 | processor.on('processed', handler); 126 | 127 | return new Promise((resolve) => { 128 | processor.on('end', () => { 129 | expect(handler).toHaveBeenCalledWith(examplePath('1.txt'), 'a!'); 130 | expect(handler).toHaveBeenCalledWith(examplePath('2.txt'), 'b!'); 131 | expect(handler).toHaveBeenCalledWith(examplePath('3.txt'), 'c!'); 132 | resolve(); 133 | }); 134 | }); 135 | }); 136 | }); 137 | --------------------------------------------------------------------------------