├── .gitignore ├── .npmignore ├── LICENSE ├── README.md ├── examples ├── basic │ ├── README.md │ ├── package-lock.json │ ├── package.json │ ├── serverless.yml │ └── src │ │ ├── lambda-parallelizer.js │ │ └── parallelizer-code.js └── with-bundler │ ├── README.md │ ├── package-lock.json │ ├── package.json │ ├── serverless.yml │ └── src │ ├── lambda-parallelizer.js │ ├── parallelizer-code-min.js │ └── parallelizer-code.js ├── images ├── node-parallelizer-package.png └── node-parallelizer.png ├── package-lock.json ├── package.json ├── src ├── child-process.js ├── index.js └── worker-thread.js └── test ├── benchmark-2.js └── benchmark.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .serverless/ 3 | .DS_Store -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | images/ 2 | test/ 3 | examples/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Eduardo Marcos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Node Parallelizer 2 | A NodeJS package for running code in parallel. Initially created to provide multiprocessing in an **AWS Lambda function**, but it can be used in any NodeJS environment. 3 | 4 | ## Supported parallelizers 5 | - Child Process 6 | - Worker threads 7 | 8 | ### Child Process Parallelizer 9 | This parallelizer is designed specifically for creating new Node.js processes. These processes include an extra communication channel that enables message exchange between the parent and child processes. 10 | 11 | Bear in mind that the created Node.js child processes are independent from the parent process, except for the IPC communication channel connecting them. Each child process has its own memory and V8 instance. Due to the extra resources required for these allocations, it is not advisable to create a large number of child Node.js processes. 12 | 13 | It uses the NodeJS [child process module](https://nodejs.org/api/child_process.html) behind the scenes. 14 | 15 | ### Worker Threads Parallelizer 16 | This parallelizer enables the use of threads that execute JavaScript in parallel. 17 | 18 | These threads are beneficial for carrying out JavaScript tasks that demand significant CPU resources. However, they offer limited advantages for I/O-intensive tasks. Node.js's native asynchronous I/O operations or the the child process parallelizer are more effective than this parallelizer type in such cases. 19 | 20 | It uses the NodeJS [worker threads module](https://nodejs.org/api/worker_threads.html) behind the scenes. 21 | 22 | ### Generic details 23 | When you call the `run(records)` method in a parallelizer instance, this package will split the list of records you provide into smaller subsets, and your code will be used to execute each subset in parallel. 24 | 25 | This package can detect the number of vCPU cores allocated to your execution environment and maximize their utilization. By default, it generates one child process/thread per vCPU core, but this setting can be customized to meet your specific requirements. Alternatively, you can manually specify the number of child processes/threads the library creates, regardless of the number of vCPU cores available. 26 | 27 | ## AWS Lambda & Node Parallelizer 28 | By default, It uses the Lambda function environment `/tmp` folder to create the required module that runs in the child process/thread. 29 | 30 | When you create an instance of the Child Process Parallelizer outside of the Lambda handler function, it will reuse the child processes across the different invocations within a Lambda instance, minimazing the impact of creating child process on every invocation. Furthermore, if the package detects a disconnection of any of the child processes, it will recreate it automatically without affecting the execution. 31 | 32 | ## Demostration 33 | ![Alt text describing the image](./images/node-parallelizer.png) 34 | 35 | ## Benchmark 36 | 37 | #### CPU & I/O operations (Parallelization per CPU = 1) 38 | ```bash 39 | $ node test/benchmark.js 40 | Child Parallelizer x 18.08 ops/sec 41 | Thread Parallelizer x 15.90 ops/sec 42 | Without Parallelizer x 2.79 ops/sec 43 | 44 | Result: 45 | Fastest is Child Parallelizer 46 | Slowest is Without Parallelizer 47 | ``` 48 | 49 | #### CPU & I/O operations (Parallelization per CPU = 3) 50 | ```bash 51 | $ node test/benchmark.js 52 | Child Parallelizer x 17.01 ops/sec 53 | Thread Parallelizer x 7.72 ops/sec 54 | Without Parallelizer x 2.93 ops/sec 55 | 56 | Result: 57 | Fastest is Child Parallelizer 58 | Slowest is Without Parallelizer 59 | ``` 60 | 61 | #### Child + Thread Parallelizers VS JavaScript Promise.All (Parallelization of = 1) 62 | ```bash 63 | $ node test/benchmark-2.js 64 | Child + Thread Parallelizers x 8.15 ops/sec 65 | JavaSCript Promise.All x 7.21 ops/sec 66 | 67 | Result: 68 | Fastest is Child and Thread Parallelizers 69 | Slowest is JavaSCript Promise.All 70 | ``` 71 | 72 | #### Child + Thread Parallelizers VS JavaScript Promise.All (Parallelization of = 3) 73 | ```bash 74 | $ node test/benchmark-2.js 75 | Child + Thread Parallelizers x 16.42 ops/sec 76 | JavaSCript Promise.All x 7.49 ops/sec 77 | 78 | Result: 79 | Fastest is Child + Thread Parallelizers 80 | Slowest is JavaSCript Promise.All 81 | ``` 82 | ## Installation 83 | ```bash 84 | npm i node-parallelizer --save 85 | ``` 86 | 87 | ## Usage 88 |
89 | Parallelizer (Basic) 90 | 91 | #### Class instantiation 92 | `Parallelizer({ type = 'child-process', tmpPath = '/tmp', filePath, processBatchFunctionName, parallelization = false, parallelizationPerCPU = 1, debug = false })` 93 | 94 | **Parameters** 95 | - `type` (String) (Default value: 'child-process') (Options: 'child-process' | 'worker-threads'): The parallelizer type to be used. 96 | - `tmpPath` (String) (Default value: '/tmp'): The path where the module that runs in the thread will be created. 97 | - `filePath` (String): The absolute path to the file that contains the function that will be executed in parallel. 98 | - `processBatchFunctionName` (String): The name of the function that will be executed in parallel. 99 | - `parallelization` (Number|false) (Default value: false): The exact number of processes/threads that will be created. If false, it is based on the CPU cores available. 100 | - `parallelizationPerCPU` (Number) (Default value: 1): If the `parallelization` is set to `false`, this parameter defines the amount of processes/threads per CPU. 101 | - `debug` (Boolean) (Default value: false): Enables the internal logs for debuggin purposes. 102 | #### Main methods 103 | `run(batch, params = null)` 104 | 105 | **Parameters** 106 | - `batch` (Array): The records you want to process in parallel. 107 | - `params` (Object) (Default value: false): Parameters that will be passed to each child/thread process. 108 | 109 | **Returns** (Array): The processes/threads' responses. 110 | #### Using the Node Parallizer in AWS Lambda. 111 | In this example, the repository structure looks like this 112 | ``` 113 | src/ 114 | handler.js 115 | parallel.js 116 | serverless.yml 117 | package.json 118 | ``` 119 | 120 | The below snippet represents your Lambda handler 121 | ```javascript 122 | // handler.js 123 | 124 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer"); 125 | 126 | // Creates a new parallelizer instance. 127 | const parallelizer = new Parallelizer({ type: PARALLELIZER_CHILD, filePath: "/var/task/src/parallel.js", processBatchFunctionName: 'batchProcessor' }); 128 | 129 | module.exports.handler = async(event) => { 130 | // Run batch in parallel 131 | const responses = await parallelizer.run(event.Records); 132 | 133 | console.log(responses); 134 | }; 135 | 136 | ``` 137 | > Make sure to provide the filePath parameter as an absolute path. In this example, we've included '/var/task/' prefix in the path because Lambda deploys your code within that folder. 138 | 139 | The below snippet represents the code you want to run in parallel 140 | ```javascript 141 | // parallel.js 142 | 143 | const batchProcessor = ({ batch }) => { 144 | 145 | // 146 | // HERE YOUR CODE 147 | // 148 | 149 | return { success: true, count: batch.length } 150 | } 151 | 152 | 153 | module.exports = { batchProcessor } 154 | 155 | ``` 156 | > Verify that the input signature of your function (in this case, batchProcessor) includes batch as a parameter, as it contains the subset of records that a child process will handle. 157 | 158 |
159 | 160 |
161 | Parallelizer (Advance) 162 | 163 | #### Class instantiation 164 | `Parallelizer([{ id: "only-cpu", type = 'worker-threads', tmpPath = '/tmp', filePath, processBatchFunctionName, parallelization = false, parallelizationPerCPU = 1, debug = false }, { id: "only-io", type = 'child-process', tmpPath = '/tmp', filePath, processBatchFunctionName, parallelization = false, parallelizationPerCPU = 1, debug = false }])` 165 | 166 | **Parameters** 167 | - List of: 168 | - `id` (String): The unique identifier for your Child/Thread internal instance. 169 | - `type` (String) (Default value: 'child-process') (Options: 'child-process' | 'worker-threads'): The parallelizer type to be used. 170 | - `tmpPath` (String) (Default value: '/tmp'): The path where the module that runs in the thread will be created. 171 | - `filePath` (String): The absolute path to the file that contains the function that will be executed in parallel. 172 | - `processBatchFunctionName` (String): The name of the function that will be executed in parallel. 173 | - `parallelization` (Number|false) (Default value: false): The exact number of processes/threads that will be created. If false, it is based on the CPU cores available. 174 | - `parallelizationPerCPU` (Number) (Default value: 1): If the `parallelization` is set to `false`, this parameter defines the amount of processes/threads per CPU. 175 | - `debug` (Boolean) (Default value: false): Enables the internal logs for debuggin purposes. 176 | #### Main methods 177 | `run([{ id: "only-cpu", batch: batchOne, params = {var: 1} },{ id: "only-io", batch: batchTwo }])` 178 | 179 | **Parameters** 180 | - `id` (String): The unique identifier for your Child/Thread internal instance. 181 | - `batch` (Array): The records you want to process in parallel. 182 | - `params` (Object) (Default value: false): Parameters that will be passed to each child/thread process. 183 | 184 | **Returns** (Array): A list with the processes/threads' responses. 185 | #### Using the Node Parallizer in AWS Lambda. 186 | In this example, the repository structure looks like this 187 | ``` 188 | src/ 189 | handler.js 190 | parallel.js 191 | serverless.yml 192 | package.json 193 | ``` 194 | 195 | The below snippet represents your Lambda handler 196 | ```javascript 197 | // handler.js 198 | 199 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer"); 200 | 201 | // Creates a new parallelizer instance with multiple different parallelizers. 202 | const parallelizer = new Parallelizer([ 203 | { id: "with-threads", type: PARALLELIZER_THREADS, parallelization: 2, filePath: "/var/task/src/parallel.js", processBatchFunctionName: 'batchProcessorOne' }, 204 | { id: "with-processes", type: PARALLELIZER_CHILD, parallelization: 4, filePath: "/var/task/src/parallel.js", processBatchFunctionName: 'batchProcessorTwo' }, 205 | ]); 206 | 207 | module.exports.handler = async(event) => { 208 | // Run batch in parallel 209 | const responses = await parallelizer.run([ 210 | { id: "with-threads", batch: event.batchOne }, 211 | { id: "with-processes", batch: event.batchTwo }, 212 | ]) 213 | 214 | console.log(responses); 215 | }; 216 | 217 | ``` 218 | > Make sure to provide the filePath parameter as an absolute path. In this example, we've included '/var/task/' prefix in the path because Lambda deploys your code within that folder. 219 | 220 | > Notice that we have added a new parameter called 'id'. This is used to distinguish between the various types of parallelizers and to pass the appropriate batch. 221 | 222 | The below snippet represents the code you want to run in parallel 223 | ```javascript 224 | // parallel.js 225 | 226 | const batchProcessorOne = ({ batch }) => { 227 | 228 | // 229 | // HERE YOUR CODE 230 | // 231 | 232 | return { success: true, count: batch.length } 233 | } 234 | 235 | const batchProcessorTwo = ({ batch }) => { 236 | 237 | // 238 | // HERE YOUR CODE 239 | // 240 | 241 | return { success: true, count: batch.length } 242 | } 243 | 244 | 245 | module.exports = { batchProcessorOne, batchProcessorTwo } 246 | 247 | ``` 248 | > Verify that the input signature of your function (in this case, batchProcessorOne and batchProcessorTwo) includes batch as a parameter, as it contains the subset of records that a child process will handle. 249 | 250 |
251 | 252 | 253 | ## Examples 254 | 255 | 1. [Basic](https://github.com/Edujugon/node-parallelizer/tree/main/examples/basic) 256 | 2. [With Bundler](https://github.com/Edujugon/node-parallelizer/tree/main/examples/with-bundler) 257 | 258 | ## Contribution 259 | We welcome contributions to this project. If you are interested in contributing, please feel free to submit a pull request. 260 | -------------------------------------------------------------------------------- /examples/basic/README.md: -------------------------------------------------------------------------------- 1 | ## Node-Parallelizer example with bundler 2 | 3 | ### Instal dependencies 4 | ``` 5 | npm install 6 | ``` 7 | 8 | ### Deploy 9 | ``` 10 | sls deploy 11 | ``` 12 | 13 | 14 | -------------------------------------------------------------------------------- /examples/basic/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "child-process", 3 | "version": "1.0.0", 4 | "lockfileVersion": 2, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "child-process", 9 | "version": "1.0.0", 10 | "license": "ISC", 11 | "dependencies": { 12 | "axios": "^1.4.0", 13 | "node-parallelizer": "^3.0.0" 14 | } 15 | }, 16 | "node_modules/asynckit": { 17 | "version": "0.4.0", 18 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", 19 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" 20 | }, 21 | "node_modules/axios": { 22 | "version": "1.4.0", 23 | "resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz", 24 | "integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==", 25 | "dependencies": { 26 | "follow-redirects": "^1.15.0", 27 | "form-data": "^4.0.0", 28 | "proxy-from-env": "^1.1.0" 29 | } 30 | }, 31 | "node_modules/combined-stream": { 32 | "version": "1.0.8", 33 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", 34 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", 35 | "dependencies": { 36 | "delayed-stream": "~1.0.0" 37 | }, 38 | "engines": { 39 | "node": ">= 0.8" 40 | } 41 | }, 42 | "node_modules/delayed-stream": { 43 | "version": "1.0.0", 44 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", 45 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", 46 | "engines": { 47 | "node": ">=0.4.0" 48 | } 49 | }, 50 | "node_modules/follow-redirects": { 51 | "version": "1.15.2", 52 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", 53 | "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", 54 | "funding": [ 55 | { 56 | "type": "individual", 57 | "url": "https://github.com/sponsors/RubenVerborgh" 58 | } 59 | ], 60 | "engines": { 61 | "node": ">=4.0" 62 | }, 63 | "peerDependenciesMeta": { 64 | "debug": { 65 | "optional": true 66 | } 67 | } 68 | }, 69 | "node_modules/form-data": { 70 | "version": "4.0.0", 71 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", 72 | "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", 73 | "dependencies": { 74 | "asynckit": "^0.4.0", 75 | "combined-stream": "^1.0.8", 76 | "mime-types": "^2.1.12" 77 | }, 78 | "engines": { 79 | "node": ">= 6" 80 | } 81 | }, 82 | "node_modules/mime-db": { 83 | "version": "1.52.0", 84 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", 85 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", 86 | "engines": { 87 | "node": ">= 0.6" 88 | } 89 | }, 90 | "node_modules/mime-types": { 91 | "version": "2.1.35", 92 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", 93 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", 94 | "dependencies": { 95 | "mime-db": "1.52.0" 96 | }, 97 | "engines": { 98 | "node": ">= 0.6" 99 | } 100 | }, 101 | "node_modules/node-parallelizer": { 102 | "version": "3.0.0", 103 | "resolved": "https://registry.npmjs.org/node-parallelizer/-/node-parallelizer-3.0.0.tgz", 104 | "integrity": "sha512-QiR8yRsF4MTR9UnIWYPwpny81wrEL6oF7KUkEXPiwpgYxs5hJUu/64V+/RFGtoY3QUqwMFJ3eueS0gFno2FMww==" 105 | }, 106 | "node_modules/proxy-from-env": { 107 | "version": "1.1.0", 108 | "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", 109 | "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" 110 | } 111 | }, 112 | "dependencies": { 113 | "asynckit": { 114 | "version": "0.4.0", 115 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", 116 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" 117 | }, 118 | "axios": { 119 | "version": "1.4.0", 120 | "resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz", 121 | "integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==", 122 | "requires": { 123 | "follow-redirects": "^1.15.0", 124 | "form-data": "^4.0.0", 125 | "proxy-from-env": "^1.1.0" 126 | } 127 | }, 128 | "combined-stream": { 129 | "version": "1.0.8", 130 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", 131 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", 132 | "requires": { 133 | "delayed-stream": "~1.0.0" 134 | } 135 | }, 136 | "delayed-stream": { 137 | "version": "1.0.0", 138 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", 139 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" 140 | }, 141 | "follow-redirects": { 142 | "version": "1.15.2", 143 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", 144 | "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==" 145 | }, 146 | "form-data": { 147 | "version": "4.0.0", 148 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", 149 | "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", 150 | "requires": { 151 | "asynckit": "^0.4.0", 152 | "combined-stream": "^1.0.8", 153 | "mime-types": "^2.1.12" 154 | } 155 | }, 156 | "mime-db": { 157 | "version": "1.52.0", 158 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", 159 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==" 160 | }, 161 | "mime-types": { 162 | "version": "2.1.35", 163 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", 164 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", 165 | "requires": { 166 | "mime-db": "1.52.0" 167 | } 168 | }, 169 | "node-parallelizer": { 170 | "version": "3.0.0", 171 | "resolved": "https://registry.npmjs.org/node-parallelizer/-/node-parallelizer-3.0.0.tgz", 172 | "integrity": "sha512-QiR8yRsF4MTR9UnIWYPwpny81wrEL6oF7KUkEXPiwpgYxs5hJUu/64V+/RFGtoY3QUqwMFJ3eueS0gFno2FMww==" 173 | }, 174 | "proxy-from-env": { 175 | "version": "1.1.0", 176 | "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", 177 | "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /examples/basic/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "child-process", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "parallelizer": "node_modules/esbuild/bin/esbuild ./src/parallelizer-code.js --bundle --platform=node --outfile=./src/parallelizer-code-min.js" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "ISC", 13 | "dependencies": { 14 | "axios": "^1.4.0", 15 | "node-parallelizer": "^3.0.0" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /examples/basic/serverless.yml: -------------------------------------------------------------------------------- 1 | service: my-service-using-node-parallelizer 2 | 3 | provider: 4 | name: aws 5 | stage: ${opt:stage, "dev"} 6 | region: ${opt:region, 'us-east-2'} 7 | runtime: nodejs18.x 8 | 9 | functions: 10 | lambda-parallelizer: 11 | handler: src/lambda-parallelizer.handler 12 | memorySize: 2000 13 | timeout: 120 14 | environment: 15 | PROCESSESPERCPU: 1 16 | PARALLELIZER_TYPE: child-process 17 | PARALLELIZER_DEBUG_MODE_ENABLED: false -------------------------------------------------------------------------------- /examples/basic/src/lambda-parallelizer.js: -------------------------------------------------------------------------------- 1 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer"); 2 | 3 | const parallelizerType = process.env.PARALLELIZER_TYPE || PARALLELIZER_CHILD; 4 | const parallelizerDebug = process.env.PARALLELIZER_DEBUG_MODE_ENABLED === 'true' ? true : false; 5 | 6 | const parallelizer = new Parallelizer({ 7 | type: parallelizerType, 8 | debug: parallelizerDebug, 9 | parallelizationPerCPU: process.env.PROCESSESPERCPU || 1, 10 | filePath: "/var/task/src/parallelizer-code.js", 11 | processBatchFunctionName: 'batchProcessor' 12 | }); 13 | 14 | 15 | exports.handler = async (event) => { 16 | 17 | const batch = [...Array(event.number).keys()]; 18 | 19 | // Run batch in parallel 20 | const responses = await parallelizer.run(batch); 21 | 22 | console.log(JSON.stringify(responses)); 23 | }; -------------------------------------------------------------------------------- /examples/basic/src/parallelizer-code.js: -------------------------------------------------------------------------------- 1 | // const axios = require('axios'); 2 | const fs = require('fs'); 3 | const crypto = require('crypto'); 4 | 5 | const batchProcessor = async ({ batch, params }) => { 6 | let sum = 0; 7 | 8 | for (let id = 0; id <= (batch.length * 1000000); id++) { 9 | sum += id; 10 | } 11 | 12 | // const response = await axios.get('https://httpbin.org/get?key=123'); 13 | 14 | let totalFiles = 0; 15 | for (let id = 0; id <= (batch.length); id++) { 16 | try { 17 | const uniqueId = crypto.randomBytes(16).toString('hex'); 18 | const file = `/tmp/example-file-${uniqueId}.txt`; 19 | 20 | fs.writeFileSync(file, '***Random Code***'); 21 | if (fs.existsSync(file)) { 22 | fs.unlinkSync(file); 23 | } 24 | totalFiles++; 25 | } catch (err) { 26 | console.log(err.message); 27 | } 28 | } 29 | 30 | return { sum, totalFiles }; 31 | } 32 | 33 | const batchProcessor2 = async ({ batch, params }) => { 34 | let sum = 0; 35 | 36 | for (let id = 0; id <= (batch.length * 1000000); id++) { 37 | sum += id; 38 | } 39 | 40 | // const response = await axios.get('https://httpbin.org/get?key=123'); 41 | 42 | let totalFiles = 0; 43 | const promises = []; 44 | for (let id = 0; id <= (batch.length); id++) { 45 | promises.push(createAndDeleteFile()); 46 | totalFiles++; 47 | } 48 | 49 | await Promise.all(promises); 50 | 51 | return { sum, totalFiles }; 52 | } 53 | 54 | const createAndDeleteFile = () => { 55 | return new Promise((resolve, reject) => { 56 | try { 57 | const uniqueId = crypto.randomBytes(16).toString('hex'); 58 | const file = `/tmp/example-file-${uniqueId}.txt`; 59 | 60 | fs.writeFileSync(file, '***Random Code***'); 61 | if (fs.existsSync(file)) { 62 | fs.unlinkSync(file); 63 | } 64 | // console.log(file); 65 | resolve(true); 66 | } catch (err) { 67 | console.log(err.message); 68 | resolve(true); 69 | } 70 | }) 71 | } 72 | 73 | 74 | const batchProcessorOnlyCPU = async ({ batch, params }) => { 75 | return new Promise((resolve, reject) => { 76 | let sum = 0; 77 | 78 | for (let id = 0; id <= (batch.length * 1000000); id++) { 79 | sum += id; 80 | } 81 | 82 | resolve({ sum }); 83 | }) 84 | } 85 | 86 | const batchProcessorOnlyIO = async ({ batch, params }) => { 87 | 88 | let totalFiles = 0; 89 | for (let id = 1; id <= (batch.length); id++) { 90 | try { 91 | const uniqueId = crypto.randomBytes(16).toString('hex'); 92 | const file = `/tmp/example-file-${uniqueId}.txt`; 93 | 94 | fs.writeFileSync(file, '***Random Code***'); 95 | if (fs.existsSync(file)) { 96 | fs.unlinkSync(file); 97 | } 98 | totalFiles++; 99 | } catch (err) { 100 | console.log(err.message); 101 | } 102 | } 103 | 104 | return { totalFiles }; 105 | } 106 | 107 | 108 | module.exports = { batchProcessor, batchProcessor2, batchProcessorOnlyCPU, batchProcessorOnlyIO } -------------------------------------------------------------------------------- /examples/with-bundler/README.md: -------------------------------------------------------------------------------- 1 | ## Node-Parallelizer example with bundler 2 | 3 | ### Instal dependencies 4 | ``` 5 | npm install 6 | ``` 7 | 8 | ### Bundle up the parallelizer file 9 | ``` 10 | npm run parallelizer 11 | ``` 12 | 13 | ### Deploy 14 | ``` 15 | sls deploy 16 | ``` 17 | 18 | 19 | -------------------------------------------------------------------------------- /examples/with-bundler/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "child-process", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "parallelizer": "node_modules/esbuild/bin/esbuild ./src/parallelizer-code.js --bundle --platform=node --outfile=./src/parallelizer-code-min.js" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "ISC", 13 | "dependencies": { 14 | "axios": "^1.4.0", 15 | "esbuild": "^0.17.18", 16 | "node-parallelizer": "^3.0.0" 17 | }, 18 | "devDependencies": { 19 | "serverless-esbuild": "^1.43.0", 20 | "serverless-prune-plugin": "^2.0.2" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/with-bundler/serverless.yml: -------------------------------------------------------------------------------- 1 | service: my-service-using-node-parallelizer 2 | 3 | provider: 4 | name: aws 5 | stage: ${opt:stage, "dev"} 6 | region: ${opt:region, 'us-east-2'} 7 | runtime: nodejs18.x 8 | 9 | plugins: 10 | - serverless-esbuild 11 | - serverless-prune-plugin 12 | 13 | package: 14 | individually: true 15 | exclude: 16 | - "**/*" 17 | 18 | custom: 19 | esbuild: 20 | bundle: true 21 | minify: false 22 | prune: 23 | automatic: true 24 | number: 1 25 | 26 | functions: 27 | lambda-parallelizer: 28 | handler: src/lambda-parallelizer.handler 29 | memorySize: 2000 30 | timeout: 120 31 | package: 32 | include: 33 | - src/parallelizer-code-min.js 34 | environment: 35 | PROCESSESPERCPU: 1 36 | PARALLELIZER_TYPE: child-process 37 | PARALLELIZER_DEBUG_MODE_ENABLED: false -------------------------------------------------------------------------------- /examples/with-bundler/src/lambda-parallelizer.js: -------------------------------------------------------------------------------- 1 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer"); 2 | 3 | const parallelizerType = process.env.PARALLELIZER_TYPE || PARALLELIZER_CHILD; 4 | const parallelizerDebug = process.env.PARALLELIZER_DEBUG_MODE_ENABLED === 'true' ? true : false; 5 | 6 | const parallelizer = new Parallelizer({ 7 | type: parallelizerType, 8 | debug: parallelizerDebug, 9 | parallelizationPerCPU: process.env.PROCESSESPERCPU || 1, 10 | filePath: "/var/task/src/parallelizer-code-min.js", 11 | processBatchFunctionName: 'batchProcessor' 12 | }); 13 | 14 | exports.handler = async (event) => { 15 | 16 | const batch = [...Array(event.number).keys()]; 17 | 18 | // Run batch in parallel 19 | const responses = await parallelizer.run(batch); 20 | 21 | console.log(JSON.stringify(responses)); 22 | }; -------------------------------------------------------------------------------- /examples/with-bundler/src/parallelizer-code-min.js: -------------------------------------------------------------------------------- 1 | // src/parallelizer-code.js 2 | var fs = require("fs"); 3 | var crypto = require("crypto"); 4 | var batchProcessor = async ({ batch }) => { 5 | let sum = 0; 6 | for (let id = 0; id <= batch.length * 1e6; id++) { 7 | sum += id; 8 | } 9 | let totalFiles = 0; 10 | for (let id = 0; id <= batch.length; id++) { 11 | try { 12 | const uniqueId = crypto.randomBytes(16).toString("hex"); 13 | const file = `/tmp/example-file-${uniqueId}.txt`; 14 | fs.writeFileSync(file, "***Random Code***"); 15 | if (fs.existsSync(file)) { 16 | fs.unlinkSync(file); 17 | } 18 | totalFiles++; 19 | } catch (err) { 20 | console.log(err.message); 21 | } 22 | } 23 | return { sum, totalFiles }; 24 | }; 25 | module.exports = { batchProcessor }; 26 | -------------------------------------------------------------------------------- /examples/with-bundler/src/parallelizer-code.js: -------------------------------------------------------------------------------- 1 | // const axios = require('axios'); 2 | const fs = require('fs'); 3 | const crypto = require('crypto'); 4 | 5 | const batchProcessor = async ({ batch }) => { 6 | let sum = 0; 7 | 8 | for (let id = 0; id <= (batch.length * 1000000); id++) { 9 | sum += id; 10 | } 11 | 12 | // const response = await axios.get('https://httpbin.org/get?key=123'); 13 | 14 | let totalFiles = 0; 15 | for (let id = 0; id <= (batch.length); id++) { 16 | try { 17 | const uniqueId = crypto.randomBytes(16).toString('hex'); 18 | const file = `/tmp/example-file-${uniqueId}.txt`; 19 | 20 | fs.writeFileSync(file, '***Random Code***'); 21 | if (fs.existsSync(file)) { 22 | fs.unlinkSync(file); 23 | } 24 | totalFiles++; 25 | } catch (err) { 26 | console.log(err.message); 27 | } 28 | } 29 | 30 | return { sum, totalFiles }; 31 | } 32 | 33 | 34 | module.exports = { batchProcessor } -------------------------------------------------------------------------------- /images/node-parallelizer-package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edujugon/node-parallelizer/f123c01daeeecd44bb000885c25580fdf10441b4/images/node-parallelizer-package.png -------------------------------------------------------------------------------- /images/node-parallelizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edujugon/node-parallelizer/f123c01daeeecd44bb000885c25580fdf10441b4/images/node-parallelizer.png -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-parallelizer", 3 | "version": "2.2.0", 4 | "lockfileVersion": 2, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "node-parallelizer", 9 | "version": "2.2.0", 10 | "license": "MIT", 11 | "devDependencies": { 12 | "benchmark": "^2.1.4" 13 | } 14 | }, 15 | "node_modules/benchmark": { 16 | "version": "2.1.4", 17 | "resolved": "https://registry.npmjs.org/benchmark/-/benchmark-2.1.4.tgz", 18 | "integrity": "sha512-l9MlfN4M1K/H2fbhfMy3B7vJd6AGKJVQn2h6Sg/Yx+KckoUA7ewS5Vv6TjSq18ooE1kS9hhAlQRH3AkXIh/aOQ==", 19 | "dev": true, 20 | "dependencies": { 21 | "lodash": "^4.17.4", 22 | "platform": "^1.3.3" 23 | } 24 | }, 25 | "node_modules/lodash": { 26 | "version": "4.17.21", 27 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", 28 | "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", 29 | "dev": true 30 | }, 31 | "node_modules/platform": { 32 | "version": "1.3.6", 33 | "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", 34 | "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", 35 | "dev": true 36 | } 37 | }, 38 | "dependencies": { 39 | "benchmark": { 40 | "version": "2.1.4", 41 | "resolved": "https://registry.npmjs.org/benchmark/-/benchmark-2.1.4.tgz", 42 | "integrity": "sha512-l9MlfN4M1K/H2fbhfMy3B7vJd6AGKJVQn2h6Sg/Yx+KckoUA7ewS5Vv6TjSq18ooE1kS9hhAlQRH3AkXIh/aOQ==", 43 | "dev": true, 44 | "requires": { 45 | "lodash": "^4.17.4", 46 | "platform": "^1.3.3" 47 | } 48 | }, 49 | "lodash": { 50 | "version": "4.17.21", 51 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", 52 | "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", 53 | "dev": true 54 | }, 55 | "platform": { 56 | "version": "1.3.6", 57 | "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", 58 | "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", 59 | "dev": true 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-parallelizer", 3 | "version": "3.1.0", 4 | "description": "A NodeJS package for running code in parallel. Initially created to provide multiprocessing in an AWS Lambda function, but it can be used in any NodeJS environment.", 5 | "main": "src/index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/Edujugon/node-parallelizer.git" 12 | }, 13 | "keywords": [ 14 | "parallelizer", 15 | "Lambda parallelizer", 16 | "child process", 17 | "parallelism", 18 | "worker thread", 19 | "nodejs parallel" 20 | ], 21 | "author": "Eduardo Marcos ", 22 | "license": "MIT", 23 | "bugs": { 24 | "url": "https://github.com/Edujugon/node-parallelizer/issues" 25 | }, 26 | "homepage": "https://github.com/Edujugon/node-parallelizer#readme", 27 | "devDependencies": { 28 | "benchmark": "^2.1.4" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/child-process.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const { fork } = require('child_process'); 4 | const os = require("os"); 5 | const fs = require('fs'); 6 | const crypto = require('crypto'); 7 | 8 | const childFileName = "child-process-file"; 9 | 10 | class ChildProcess { 11 | constructor({ tmpPath = '/tmp', parallelization = false, parallelizationPerCPU = 1, debug = false } = {}) { 12 | const uniqueId = crypto.randomBytes(16).toString('hex'); 13 | 14 | this.tmpPath = `${tmpPath}/${childFileName}-${uniqueId}.js`; 15 | this.childFile = null; 16 | this.childProcesses = []; 17 | this.parallelization = parallelization; 18 | this.parallelizationPerCPU = parallelizationPerCPU; 19 | 20 | this.processesCount = 1; 21 | this.debug = debug; 22 | } 23 | 24 | createChildProcessFromCode({ callback, customCode = '' }) { 25 | const finalChildCode = `${customCode} ${templateChildCode} const processBatch = ${callback.toString()}` 26 | this.childFile = this._createChildFile(finalChildCode); 27 | 28 | this._createChildProcesses(); 29 | } 30 | 31 | parallelizerFunction({ filePath, processBatchFunctionName }) { 32 | const finalChildCode = `const {${processBatchFunctionName}: processBatch} = require('${filePath}'); ${templateChildCode}` 33 | this.childFile = this._createChildFile(finalChildCode); 34 | 35 | this._createChildProcesses(); 36 | } 37 | 38 | _createChildProcesses() { 39 | this.processesCount = (typeof this.parallelization === 'number') ? this.parallelization : this._getProcessesCount(); 40 | 41 | for (let id = 0; id < this.processesCount; id++) { 42 | this.childProcesses.push(this._createFork()); 43 | } 44 | } 45 | 46 | async runBatch(batch, params = null) { 47 | if (this.childProcesses.length === 0) { 48 | throw new Error('No child processes created. Please run "createChildProcesses" method before "runBatch"') 49 | } 50 | 51 | // Get the amount of messages per batch. 52 | const batchCount = (batch.length < this.processesCount) ? 1 : batch.length / this.processesCount; 53 | 54 | // Create the batches 55 | const batches = findSubsets(batch, batchCount); 56 | 57 | // Process the batches using the child processes. 58 | return await this._processBatchesInForks(batches, params); 59 | } 60 | 61 | removeChildProcesses() { 62 | this.childProcesses.forEach(process => process.disconnect()); 63 | this.childProcesses = []; 64 | this._removeChildFile(); 65 | } 66 | 67 | removeChildThreads() { 68 | this.removeChildProcesses(); 69 | } 70 | 71 | _removeForkEvents() { 72 | this.childProcesses.forEach(child => { child.removeAllListeners('exit'); child.removeAllListeners('message') }); 73 | } 74 | 75 | async _processBatchesInForks(batches, params = null) { 76 | const batchesCount = batches.length; 77 | const childResponses = { 78 | responses: [], 79 | failures: [] 80 | }; 81 | 82 | let responsesReceived = 0; 83 | 84 | await new Promise((resolve, reject) => { 85 | for (let id = 0; id < batchesCount; id++) { 86 | // If a child has exited, then we recreate it. 87 | if (!this.childProcesses[id]?.connected) { 88 | logger({ 89 | message: `Child process #${id} no connected`, 90 | params: { 91 | child_id: id, 92 | }, 93 | debug: this.debug 94 | }) 95 | 96 | this.childProcesses[id] = this._createFork(); 97 | } 98 | 99 | this.childProcesses[id].on('exit', (code) => { 100 | logger({ 101 | message: `Child process #${id} exited with code: ${code}`, 102 | params: { 103 | child_id: id, 104 | exit_code: code 105 | }, 106 | debug: this.debug 107 | }) 108 | 109 | // In case a child process exists without sending a message. 110 | if (++responsesReceived == batchesCount) { 111 | this._removeForkEvents(); 112 | resolve('DONE'); 113 | } 114 | }); 115 | 116 | this.childProcesses[id].on('message', ({ type, logType = 'log', childLogMessage, childLogMessageParams = {}, reponse, status, errorMessage }) => { 117 | if (type == 'LOG') { 118 | logger({ 119 | message: childLogMessage, 120 | params: { 121 | child_id: id, 122 | ...childLogMessageParams 123 | }, 124 | debug: true, 125 | logType 126 | }) 127 | return; 128 | } 129 | 130 | logger({ 131 | message: `Child process #${id} status message: ${status}`, 132 | params: { 133 | child_id: id, 134 | status 135 | }, 136 | debug: this.debug 137 | }) 138 | 139 | if (status == 'FAILED') { 140 | logger({ 141 | message: `Child process #${id} error message: ${errorMessage}`, 142 | params: { 143 | child_id: id, 144 | error_message: errorMessage 145 | }, 146 | debug: this.debug 147 | }) 148 | childResponses.failures.push(errorMessage); 149 | } else if (status == 'SUCCESS') { 150 | childResponses.responses.push(reponse); 151 | } 152 | 153 | if (++responsesReceived == batchesCount) { 154 | this._removeForkEvents(); 155 | resolve('DONE'); 156 | } 157 | }); 158 | 159 | // Send message to child. 160 | this.childProcesses[id].send({ id, batch: batches[id], params }); 161 | } 162 | }) 163 | 164 | return childResponses; 165 | } 166 | 167 | _getProcessesCount() { 168 | const cpuData = os.cpus(); 169 | return cpuData.length * this.parallelizationPerCPU; 170 | } 171 | 172 | _createFork() { 173 | const newFork = fork(this.tmpPath); 174 | 175 | newFork.on('error', (error) => { 176 | logger({ 177 | message: `Error on child process: ${error}`, 178 | params: { 179 | error 180 | }, 181 | debug: this.debug 182 | }) 183 | }) 184 | 185 | return newFork; 186 | } 187 | 188 | _createChildFile(childCode) { 189 | try { 190 | fs.writeFileSync(this.tmpPath, childCode); 191 | } catch (error) { 192 | throw new Error(`Failed to create child process file: ${error.message}`); 193 | } 194 | } 195 | 196 | _removeChildFile() { 197 | if (!fs.existsSync(this.tmpPath)) 198 | return; 199 | 200 | try { 201 | fs.unlinkSync(this.tmpPath); 202 | } catch (error) { 203 | console.error(`Failed to remove temporary child process file: ${error.message}`); 204 | } 205 | } 206 | } 207 | 208 | const findSubsets = (array, n) => { 209 | return array.reduce((all, one, i) => { 210 | const ch = Math.floor(i / n); 211 | all[ch] = [].concat((all[ch] || []), one); 212 | return all 213 | }, []) 214 | } 215 | 216 | const templateChildCode = ` 217 | const mainLogger = ({ message, params = {}, logType = 'log' }) => { 218 | process.send({ type: "LOG", logType, childLogMessage: message, childLogMessageParams: params }); 219 | } 220 | 221 | // Listening to parent's messages. 222 | process.on("message", async (message) => { 223 | try { 224 | const reponse = await processBatch({ batch: message.batch, params: message.params, mainLogger }); 225 | 226 | process.send({ type: "MESSAGE", status: "SUCCESS", reponse }); 227 | } catch (e) { 228 | process.send({ type: "MESSAGE", status: "FAILED", errorMessage: e.toString() }); 229 | } 230 | }); 231 | `; 232 | 233 | const logger = ({ message, params = {}, debug = false, logType = 'log' }) => { 234 | if (!debug) { 235 | return 236 | } 237 | 238 | const logMsg = Object.assign({}, params); 239 | logMsg.message = message; 240 | 241 | console[logType](JSON.stringify(logMsg)); 242 | } 243 | 244 | module.exports = ChildProcess; -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | const ChildProcess = require("./child-process"); 2 | const WorkerThreads = require("./worker-thread"); 3 | 4 | const PARALLELIZER_CHILD = 'child-process'; 5 | const PARALLELIZER_THREADS = 'worker-threads'; 6 | 7 | const SINGLE_CHILD_THREAD_ID = 'single-process'; 8 | class Parallelizer { 9 | constructor(params) { 10 | 11 | this.childThreads = {}; 12 | 13 | if (!isArray(params)) { 14 | params.id = SINGLE_CHILD_THREAD_ID; 15 | params = [params]; 16 | } 17 | 18 | this._init(params); 19 | } 20 | 21 | 22 | _init(list) { 23 | list.forEach(({ id, type, tmpPath = '/tmp', parallelization = false, parallelizationPerCPU = 1, debug = false, filePath, processBatchFunctionName }) => { 24 | if (!filePath || !processBatchFunctionName) { 25 | throw new Error('filePath and processBatchFunctionName are required'); 26 | } 27 | 28 | const parallelizer = [PARALLELIZER_CHILD, PARALLELIZER_THREADS].includes(type) ? type : PARALLELIZER_CHILD; 29 | 30 | const childThreadParams = { tmpPath, parallelization, parallelizationPerCPU, debug }; 31 | 32 | this.childThreads[id] = (parallelizer === PARALLELIZER_CHILD) ? 33 | new ChildProcess(childThreadParams) : 34 | new WorkerThreads(childThreadParams); 35 | 36 | this.childThreads[id].parallelizerFunction({ filePath, processBatchFunctionName }); 37 | }); 38 | 39 | } 40 | 41 | 42 | async run(data, params = null) { 43 | if (Object.keys(this.childThreads).length == 1) { 44 | return this.childThreads[SINGLE_CHILD_THREAD_ID].runBatch(data, params); 45 | } 46 | 47 | if (!isArray(data)) { 48 | data.id = SINGLE_CHILD_THREAD_ID; 49 | data.params = data.params || params; 50 | data = [data]; 51 | } 52 | 53 | return await Promise.all(data.map(item => { 54 | const batch = item.batch 55 | const itemParams = item.params || params 56 | 57 | return this.childThreads[item.id].runBatch(batch, params = itemParams) 58 | })); 59 | } 60 | 61 | removeChildThreads(ids = null) { 62 | ids = (ids !== null && !isArray(ids)) ? [ids] : ids; 63 | 64 | Object.keys(this.childThreads) 65 | .filter(id => ids === null ? true : ids.includes(id)) 66 | .forEach((id) => { 67 | this.childThreads[id].removeChildThreads(); 68 | }); 69 | } 70 | } 71 | 72 | const isArray = (value) => { 73 | return Array.isArray(value); 74 | } 75 | 76 | 77 | module.exports = { ChildProcess, WorkerThreads, Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS }; -------------------------------------------------------------------------------- /src/worker-thread.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const { Worker } = require("worker_threads"); 4 | const os = require("os"); 5 | const fs = require('fs'); 6 | const crypto = require('crypto'); 7 | 8 | const workerFileName = "worker-thread-file.js"; 9 | 10 | class WorkerThreads { 11 | constructor({ tmpPath = '/tmp', parallelization = false, parallelizationPerCPU = 1, debug = false } = {}) { 12 | const uniqueId = crypto.randomBytes(16).toString('hex'); 13 | 14 | this.tmpPath = `${tmpPath}/${workerFileName}-${uniqueId}.js`; 15 | this.workerFile = null; 16 | this.parallelization = parallelization; 17 | this.parallelizationPerCPU = parallelizationPerCPU; 18 | this.threadsCount = 1; 19 | this.debug = debug; 20 | } 21 | 22 | parallelizerFunction = ({ filePath, processBatchFunctionName }) => { 23 | const threadCode = `const {${processBatchFunctionName}: processBatch} = require('${filePath}'); ${templateThreadCode}` 24 | this.workerFile = this._createWorkerFile(threadCode); 25 | 26 | this.threadsCount = (typeof this.parallelization === 'number') ? this.parallelization : this._getThreadsCount(); 27 | } 28 | 29 | runBatch = async (batch, params = null) => { 30 | // Get the amount of messages per batch. 31 | const batchCount = (batch.length < this.threadsCount) ? 1 : batch.length / this.threadsCount; 32 | 33 | // Create the batches 34 | const batches = findSubsets(batch, batchCount); 35 | 36 | // Process the batches using the threads. 37 | return await this._processBatchesInThreads(batches, params); 38 | } 39 | 40 | removeWorkerThreads() { 41 | this._removeThreadFile(); 42 | } 43 | removeChildThreads() { 44 | this._removeThreadFile(); 45 | } 46 | 47 | _processBatchesInThreads = async (batches, params = null) => { 48 | const batchesCount = batches.length; 49 | const threadResponses = { 50 | responses: [], 51 | failures: [] 52 | }; 53 | 54 | let responsesReceived = 0; 55 | 56 | await new Promise((resolve, reject) => { 57 | for (let id = 0; id < batchesCount; id++) { 58 | const worker = new Worker(this.tmpPath, { workerData: { id, batch: batches[id], params } }); 59 | worker.on('error', (error) => { 60 | logger({ 61 | message: `Thread #${id} error message: ${error.message}`, 62 | params: { 63 | thread_id: id, 64 | error_message: error.message 65 | }, 66 | debug: this.debug 67 | }) 68 | threadResponses.failures.push(error.message); 69 | 70 | // In case a thread exists without sending a message. 71 | if (++responsesReceived == batchesCount) { 72 | resolve('DONE'); 73 | } 74 | }); 75 | 76 | worker.on('message', ({ reponse, status, errorMessage }) => { 77 | logger({ 78 | message: `Thread #${id} status message: ${status}`, 79 | params: { 80 | thread_id: id, 81 | status 82 | }, 83 | debug: this.debug 84 | }) 85 | 86 | if (status == 'FAILED') { 87 | logger({ 88 | message: `Thread #${id} error message: ${errorMessage}`, 89 | params: { 90 | thread_id: id, 91 | error_message: errorMessage 92 | }, 93 | debug: this.debug 94 | }) 95 | threadResponses.failures.push(errorMessage); 96 | } else if (status == 'SUCCESS') { 97 | threadResponses.responses.push(reponse); 98 | } 99 | 100 | if (++responsesReceived == batchesCount) { 101 | resolve('DONE'); 102 | } 103 | }); 104 | } 105 | }) 106 | 107 | return threadResponses; 108 | } 109 | 110 | _removeThreadFile() { 111 | if (!fs.existsSync(this.tmpPath)) 112 | return; 113 | 114 | try { 115 | fs.unlinkSync(this.tmpPath); 116 | } catch (error) { 117 | console.error(`Failed to remove temporary child process file: ${error.message}`); 118 | } 119 | } 120 | 121 | _getThreadsCount = () => { 122 | const cpuData = os.cpus(); 123 | return cpuData.length * this.parallelizationPerCPU; 124 | } 125 | 126 | _createWorkerFile(childCode) { 127 | fs.writeFileSync(this.tmpPath, childCode); 128 | } 129 | } 130 | 131 | const findSubsets = (array, n) => { 132 | return array.reduce((all, one, i) => { 133 | const ch = Math.floor(i / n); 134 | all[ch] = [].concat((all[ch] || []), one); 135 | return all 136 | }, []) 137 | } 138 | 139 | const templateThreadCode = ` 140 | const { workerData, parentPort } = require("worker_threads"); 141 | 142 | (async () => { 143 | try { 144 | const reponse = await processBatch({ batch: workerData.batch, params: workerData.params }); 145 | parentPort.postMessage({ reponse, status: "SUCCESS" }); 146 | } catch (err) { 147 | parentPort.postMessage({ status: "FAILED", errorMessage: err.toString() }); 148 | } 149 | })(); 150 | `; 151 | 152 | const logger = ({ message, params = {}, debug = false, logType = 'log' }) => { 153 | if (!debug) { 154 | return 155 | } 156 | 157 | const logMsg = Object.assign({}, params); 158 | logMsg.message = message; 159 | 160 | console[logType](JSON.stringify(logMsg)); 161 | } 162 | 163 | module.exports = WorkerThreads; -------------------------------------------------------------------------------- /test/benchmark-2.js: -------------------------------------------------------------------------------- 1 | const Benchmark = require('benchmark'); 2 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require('../src/index'); 3 | const { batchProcessorOnlyCPU, batchProcessorOnlyIO } = require('../examples/basic/src/parallelizer-code'); 4 | const path = require('path'); 5 | 6 | 7 | const relativePath = '../examples/basic/src/parallelizer-code'; 8 | const absolutePath = path.resolve(__dirname, relativePath); 9 | 10 | const parallelizer = new Parallelizer([ 11 | { id: "only-cpu", type: PARALLELIZER_THREADS, parallelization: 4, filePath: absolutePath, processBatchFunctionName: 'batchProcessorOnlyCPU' }, 12 | { id: "only-io", type: PARALLELIZER_CHILD, parallelization: 4, filePath: absolutePath, processBatchFunctionName: 'batchProcessorOnlyIO' }, 13 | ]); 14 | 15 | const batch = [...Array(100).keys()]; 16 | const batch2 = [...Array(100).keys()]; 17 | 18 | 19 | const p = (fn) => { 20 | return { 21 | defer: true, 22 | async fn(deferred) { 23 | await fn(); 24 | deferred.resolve(); 25 | } 26 | } 27 | } 28 | 29 | const suite = new Benchmark.Suite; 30 | // add tests 31 | suite 32 | .add('Child + Thread Parallelizers', p(async () => { 33 | await parallelizer.run([ 34 | { id: "only-cpu", batch }, 35 | { id: "only-io", batch: batch2 }, 36 | ]) 37 | })) 38 | .add('JavaSCript Promise.All', p(async () => { 39 | await Promise.all([ 40 | batchProcessorOnlyCPU({ batch }), 41 | batchProcessorOnlyIO({ batch: batch2 }) 42 | ]) 43 | 44 | })) 45 | // add listeners 46 | .on('cycle', function (event) { 47 | parallelizer.removeChildThreads(); 48 | console.log(String(event.target)); 49 | }) 50 | .on('complete', function () { 51 | console.log('\nResult: '); 52 | console.log('Fastest is ' + this.filter('fastest').map('name')); 53 | console.log('Slowest is ' + this.filter('slowest').map('name')); 54 | }) 55 | // run async 56 | .run({ 'async': true }); -------------------------------------------------------------------------------- /test/benchmark.js: -------------------------------------------------------------------------------- 1 | const Benchmark = require('benchmark'); 2 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require('../src/index'); 3 | const { batchProcessor, batchProcessor2 } = require('../examples/basic/src/parallelizer-code'); 4 | const path = require('path'); 5 | 6 | 7 | const relativePath = '../examples/basic/src/parallelizer-code'; 8 | const absolutePath = path.resolve(__dirname, relativePath); 9 | 10 | const childParallelizer = new Parallelizer({ type: PARALLELIZER_CHILD, parallelizationPerCPU: 3, filePath: absolutePath, processBatchFunctionName: 'batchProcessor2' }); 11 | const threadParallelizer = new Parallelizer({ type: PARALLELIZER_THREADS, parallelizationPerCPU: 3, filePath: absolutePath, processBatchFunctionName: 'batchProcessor2' }); 12 | 13 | const batch = [...Array(100).keys()]; 14 | 15 | 16 | const p = (fn) => { 17 | return { 18 | defer: true, 19 | async fn(deferred) { 20 | await fn(); 21 | deferred.resolve(); 22 | } 23 | } 24 | } 25 | 26 | const suite = new Benchmark.Suite; 27 | // add tests 28 | suite 29 | .add('Child Parallelizer', p(async () => { 30 | await childParallelizer.run(batch); 31 | })) 32 | .add('Thread Parallelizer', p(async () => { 33 | await threadParallelizer.run(batch); 34 | })) 35 | .add('Without Parallelizer', p(async () => { 36 | await batchProcessor2({ batch }); 37 | })) 38 | // add listeners 39 | .on('cycle', function (event) { 40 | console.log(String(event.target)); 41 | }) 42 | .on('complete', function () { 43 | childParallelizer.removeChildThreads(); 44 | threadParallelizer.removeChildThreads(); 45 | 46 | console.log('\nResult: '); 47 | console.log('Fastest is ' + this.filter('fastest').map('name')); 48 | console.log('Slowest is ' + this.filter('slowest').map('name')); 49 | }) 50 | // run async 51 | .run({ 'async': true }); --------------------------------------------------------------------------------