├── .gitignore
├── .npmignore
├── LICENSE
├── README.md
├── examples
├── basic
│ ├── README.md
│ ├── package-lock.json
│ ├── package.json
│ ├── serverless.yml
│ └── src
│ │ ├── lambda-parallelizer.js
│ │ └── parallelizer-code.js
└── with-bundler
│ ├── README.md
│ ├── package-lock.json
│ ├── package.json
│ ├── serverless.yml
│ └── src
│ ├── lambda-parallelizer.js
│ ├── parallelizer-code-min.js
│ └── parallelizer-code.js
├── images
├── node-parallelizer-package.png
└── node-parallelizer.png
├── package-lock.json
├── package.json
├── src
├── child-process.js
├── index.js
└── worker-thread.js
└── test
├── benchmark-2.js
└── benchmark.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .serverless/
3 | .DS_Store
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | images/
2 | test/
3 | examples/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Eduardo Marcos
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Node Parallelizer
2 | A NodeJS package for running code in parallel. Initially created to provide multiprocessing in an **AWS Lambda function**, but it can be used in any NodeJS environment.
3 |
4 | ## Supported parallelizers
5 | - Child Process
6 | - Worker threads
7 |
8 | ### Child Process Parallelizer
9 | This parallelizer is designed specifically for creating new Node.js processes. These processes include an extra communication channel that enables message exchange between the parent and child processes.
10 |
11 | Bear in mind that the created Node.js child processes are independent from the parent process, except for the IPC communication channel connecting them. Each child process has its own memory and V8 instance. Due to the extra resources required for these allocations, it is not advisable to create a large number of child Node.js processes.
12 |
13 | It uses the NodeJS [child process module](https://nodejs.org/api/child_process.html) behind the scenes.
14 |
15 | ### Worker Threads Parallelizer
16 | This parallelizer enables the use of threads that execute JavaScript in parallel.
17 |
18 | These threads are beneficial for carrying out JavaScript tasks that demand significant CPU resources. However, they offer limited advantages for I/O-intensive tasks. Node.js's native asynchronous I/O operations or the the child process parallelizer are more effective than this parallelizer type in such cases.
19 |
20 | It uses the NodeJS [worker threads module](https://nodejs.org/api/worker_threads.html) behind the scenes.
21 |
22 | ### Generic details
23 | When you call the `run(records)` method in a parallelizer instance, this package will split the list of records you provide into smaller subsets, and your code will be used to execute each subset in parallel.
24 |
25 | This package can detect the number of vCPU cores allocated to your execution environment and maximize their utilization. By default, it generates one child process/thread per vCPU core, but this setting can be customized to meet your specific requirements. Alternatively, you can manually specify the number of child processes/threads the library creates, regardless of the number of vCPU cores available.
26 |
27 | ## AWS Lambda & Node Parallelizer
28 | By default, It uses the Lambda function environment `/tmp` folder to create the required module that runs in the child process/thread.
29 |
30 | When you create an instance of the Child Process Parallelizer outside of the Lambda handler function, it will reuse the child processes across the different invocations within a Lambda instance, minimazing the impact of creating child process on every invocation. Furthermore, if the package detects a disconnection of any of the child processes, it will recreate it automatically without affecting the execution.
31 |
32 | ## Demostration
33 | 
34 |
35 | ## Benchmark
36 |
37 | #### CPU & I/O operations (Parallelization per CPU = 1)
38 | ```bash
39 | $ node test/benchmark.js
40 | Child Parallelizer x 18.08 ops/sec
41 | Thread Parallelizer x 15.90 ops/sec
42 | Without Parallelizer x 2.79 ops/sec
43 |
44 | Result:
45 | Fastest is Child Parallelizer
46 | Slowest is Without Parallelizer
47 | ```
48 |
49 | #### CPU & I/O operations (Parallelization per CPU = 3)
50 | ```bash
51 | $ node test/benchmark.js
52 | Child Parallelizer x 17.01 ops/sec
53 | Thread Parallelizer x 7.72 ops/sec
54 | Without Parallelizer x 2.93 ops/sec
55 |
56 | Result:
57 | Fastest is Child Parallelizer
58 | Slowest is Without Parallelizer
59 | ```
60 |
61 | #### Child + Thread Parallelizers VS JavaScript Promise.All (Parallelization of = 1)
62 | ```bash
63 | $ node test/benchmark-2.js
64 | Child + Thread Parallelizers x 8.15 ops/sec
65 | JavaSCript Promise.All x 7.21 ops/sec
66 |
67 | Result:
68 | Fastest is Child and Thread Parallelizers
69 | Slowest is JavaSCript Promise.All
70 | ```
71 |
72 | #### Child + Thread Parallelizers VS JavaScript Promise.All (Parallelization of = 3)
73 | ```bash
74 | $ node test/benchmark-2.js
75 | Child + Thread Parallelizers x 16.42 ops/sec
76 | JavaSCript Promise.All x 7.49 ops/sec
77 |
78 | Result:
79 | Fastest is Child + Thread Parallelizers
80 | Slowest is JavaSCript Promise.All
81 | ```
82 | ## Installation
83 | ```bash
84 | npm i node-parallelizer --save
85 | ```
86 |
87 | ## Usage
88 |
89 | Parallelizer (Basic)
90 |
91 | #### Class instantiation
92 | `Parallelizer({ type = 'child-process', tmpPath = '/tmp', filePath, processBatchFunctionName, parallelization = false, parallelizationPerCPU = 1, debug = false })`
93 |
94 | **Parameters**
95 | - `type` (String) (Default value: 'child-process') (Options: 'child-process' | 'worker-threads'): The parallelizer type to be used.
96 | - `tmpPath` (String) (Default value: '/tmp'): The path where the module that runs in the thread will be created.
97 | - `filePath` (String): The absolute path to the file that contains the function that will be executed in parallel.
98 | - `processBatchFunctionName` (String): The name of the function that will be executed in parallel.
99 | - `parallelization` (Number|false) (Default value: false): The exact number of processes/threads that will be created. If false, it is based on the CPU cores available.
100 | - `parallelizationPerCPU` (Number) (Default value: 1): If the `parallelization` is set to `false`, this parameter defines the amount of processes/threads per CPU.
101 | - `debug` (Boolean) (Default value: false): Enables the internal logs for debuggin purposes.
102 | #### Main methods
103 | `run(batch, params = null)`
104 |
105 | **Parameters**
106 | - `batch` (Array): The records you want to process in parallel.
107 | - `params` (Object) (Default value: false): Parameters that will be passed to each child/thread process.
108 |
109 | **Returns** (Array): The processes/threads' responses.
110 | #### Using the Node Parallizer in AWS Lambda.
111 | In this example, the repository structure looks like this
112 | ```
113 | src/
114 | handler.js
115 | parallel.js
116 | serverless.yml
117 | package.json
118 | ```
119 |
120 | The below snippet represents your Lambda handler
121 | ```javascript
122 | // handler.js
123 |
124 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer");
125 |
126 | // Creates a new parallelizer instance.
127 | const parallelizer = new Parallelizer({ type: PARALLELIZER_CHILD, filePath: "/var/task/src/parallel.js", processBatchFunctionName: 'batchProcessor' });
128 |
129 | module.exports.handler = async(event) => {
130 | // Run batch in parallel
131 | const responses = await parallelizer.run(event.Records);
132 |
133 | console.log(responses);
134 | };
135 |
136 | ```
137 | > Make sure to provide the filePath parameter as an absolute path. In this example, we've included '/var/task/' prefix in the path because Lambda deploys your code within that folder.
138 |
139 | The below snippet represents the code you want to run in parallel
140 | ```javascript
141 | // parallel.js
142 |
143 | const batchProcessor = ({ batch }) => {
144 |
145 | //
146 | // HERE YOUR CODE
147 | //
148 |
149 | return { success: true, count: batch.length }
150 | }
151 |
152 |
153 | module.exports = { batchProcessor }
154 |
155 | ```
156 | > Verify that the input signature of your function (in this case, batchProcessor) includes batch as a parameter, as it contains the subset of records that a child process will handle.
157 |
158 |
159 |
160 |
161 | Parallelizer (Advance)
162 |
163 | #### Class instantiation
164 | `Parallelizer([{ id: "only-cpu", type = 'worker-threads', tmpPath = '/tmp', filePath, processBatchFunctionName, parallelization = false, parallelizationPerCPU = 1, debug = false }, { id: "only-io", type = 'child-process', tmpPath = '/tmp', filePath, processBatchFunctionName, parallelization = false, parallelizationPerCPU = 1, debug = false }])`
165 |
166 | **Parameters**
167 | - List of:
168 | - `id` (String): The unique identifier for your Child/Thread internal instance.
169 | - `type` (String) (Default value: 'child-process') (Options: 'child-process' | 'worker-threads'): The parallelizer type to be used.
170 | - `tmpPath` (String) (Default value: '/tmp'): The path where the module that runs in the thread will be created.
171 | - `filePath` (String): The absolute path to the file that contains the function that will be executed in parallel.
172 | - `processBatchFunctionName` (String): The name of the function that will be executed in parallel.
173 | - `parallelization` (Number|false) (Default value: false): The exact number of processes/threads that will be created. If false, it is based on the CPU cores available.
174 | - `parallelizationPerCPU` (Number) (Default value: 1): If the `parallelization` is set to `false`, this parameter defines the amount of processes/threads per CPU.
175 | - `debug` (Boolean) (Default value: false): Enables the internal logs for debuggin purposes.
176 | #### Main methods
177 | `run([{ id: "only-cpu", batch: batchOne, params = {var: 1} },{ id: "only-io", batch: batchTwo }])`
178 |
179 | **Parameters**
180 | - `id` (String): The unique identifier for your Child/Thread internal instance.
181 | - `batch` (Array): The records you want to process in parallel.
182 | - `params` (Object) (Default value: false): Parameters that will be passed to each child/thread process.
183 |
184 | **Returns** (Array): A list with the processes/threads' responses.
185 | #### Using the Node Parallizer in AWS Lambda.
186 | In this example, the repository structure looks like this
187 | ```
188 | src/
189 | handler.js
190 | parallel.js
191 | serverless.yml
192 | package.json
193 | ```
194 |
195 | The below snippet represents your Lambda handler
196 | ```javascript
197 | // handler.js
198 |
199 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer");
200 |
201 | // Creates a new parallelizer instance with multiple different parallelizers.
202 | const parallelizer = new Parallelizer([
203 | { id: "with-threads", type: PARALLELIZER_THREADS, parallelization: 2, filePath: "/var/task/src/parallel.js", processBatchFunctionName: 'batchProcessorOne' },
204 | { id: "with-processes", type: PARALLELIZER_CHILD, parallelization: 4, filePath: "/var/task/src/parallel.js", processBatchFunctionName: 'batchProcessorTwo' },
205 | ]);
206 |
207 | module.exports.handler = async(event) => {
208 | // Run batch in parallel
209 | const responses = await parallelizer.run([
210 | { id: "with-threads", batch: event.batchOne },
211 | { id: "with-processes", batch: event.batchTwo },
212 | ])
213 |
214 | console.log(responses);
215 | };
216 |
217 | ```
218 | > Make sure to provide the filePath parameter as an absolute path. In this example, we've included '/var/task/' prefix in the path because Lambda deploys your code within that folder.
219 |
220 | > Notice that we have added a new parameter called 'id'. This is used to distinguish between the various types of parallelizers and to pass the appropriate batch.
221 |
222 | The below snippet represents the code you want to run in parallel
223 | ```javascript
224 | // parallel.js
225 |
226 | const batchProcessorOne = ({ batch }) => {
227 |
228 | //
229 | // HERE YOUR CODE
230 | //
231 |
232 | return { success: true, count: batch.length }
233 | }
234 |
235 | const batchProcessorTwo = ({ batch }) => {
236 |
237 | //
238 | // HERE YOUR CODE
239 | //
240 |
241 | return { success: true, count: batch.length }
242 | }
243 |
244 |
245 | module.exports = { batchProcessorOne, batchProcessorTwo }
246 |
247 | ```
248 | > Verify that the input signature of your function (in this case, batchProcessorOne and batchProcessorTwo) includes batch as a parameter, as it contains the subset of records that a child process will handle.
249 |
250 |
251 |
252 |
253 | ## Examples
254 |
255 | 1. [Basic](https://github.com/Edujugon/node-parallelizer/tree/main/examples/basic)
256 | 2. [With Bundler](https://github.com/Edujugon/node-parallelizer/tree/main/examples/with-bundler)
257 |
258 | ## Contribution
259 | We welcome contributions to this project. If you are interested in contributing, please feel free to submit a pull request.
260 |
--------------------------------------------------------------------------------
/examples/basic/README.md:
--------------------------------------------------------------------------------
1 | ## Node-Parallelizer example with bundler
2 |
3 | ### Instal dependencies
4 | ```
5 | npm install
6 | ```
7 |
8 | ### Deploy
9 | ```
10 | sls deploy
11 | ```
12 |
13 |
14 |
--------------------------------------------------------------------------------
/examples/basic/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "child-process",
3 | "version": "1.0.0",
4 | "lockfileVersion": 2,
5 | "requires": true,
6 | "packages": {
7 | "": {
8 | "name": "child-process",
9 | "version": "1.0.0",
10 | "license": "ISC",
11 | "dependencies": {
12 | "axios": "^1.4.0",
13 | "node-parallelizer": "^3.0.0"
14 | }
15 | },
16 | "node_modules/asynckit": {
17 | "version": "0.4.0",
18 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
19 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
20 | },
21 | "node_modules/axios": {
22 | "version": "1.4.0",
23 | "resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz",
24 | "integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==",
25 | "dependencies": {
26 | "follow-redirects": "^1.15.0",
27 | "form-data": "^4.0.0",
28 | "proxy-from-env": "^1.1.0"
29 | }
30 | },
31 | "node_modules/combined-stream": {
32 | "version": "1.0.8",
33 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
34 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
35 | "dependencies": {
36 | "delayed-stream": "~1.0.0"
37 | },
38 | "engines": {
39 | "node": ">= 0.8"
40 | }
41 | },
42 | "node_modules/delayed-stream": {
43 | "version": "1.0.0",
44 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
45 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
46 | "engines": {
47 | "node": ">=0.4.0"
48 | }
49 | },
50 | "node_modules/follow-redirects": {
51 | "version": "1.15.2",
52 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
53 | "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
54 | "funding": [
55 | {
56 | "type": "individual",
57 | "url": "https://github.com/sponsors/RubenVerborgh"
58 | }
59 | ],
60 | "engines": {
61 | "node": ">=4.0"
62 | },
63 | "peerDependenciesMeta": {
64 | "debug": {
65 | "optional": true
66 | }
67 | }
68 | },
69 | "node_modules/form-data": {
70 | "version": "4.0.0",
71 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
72 | "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
73 | "dependencies": {
74 | "asynckit": "^0.4.0",
75 | "combined-stream": "^1.0.8",
76 | "mime-types": "^2.1.12"
77 | },
78 | "engines": {
79 | "node": ">= 6"
80 | }
81 | },
82 | "node_modules/mime-db": {
83 | "version": "1.52.0",
84 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
85 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
86 | "engines": {
87 | "node": ">= 0.6"
88 | }
89 | },
90 | "node_modules/mime-types": {
91 | "version": "2.1.35",
92 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
93 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
94 | "dependencies": {
95 | "mime-db": "1.52.0"
96 | },
97 | "engines": {
98 | "node": ">= 0.6"
99 | }
100 | },
101 | "node_modules/node-parallelizer": {
102 | "version": "3.0.0",
103 | "resolved": "https://registry.npmjs.org/node-parallelizer/-/node-parallelizer-3.0.0.tgz",
104 | "integrity": "sha512-QiR8yRsF4MTR9UnIWYPwpny81wrEL6oF7KUkEXPiwpgYxs5hJUu/64V+/RFGtoY3QUqwMFJ3eueS0gFno2FMww=="
105 | },
106 | "node_modules/proxy-from-env": {
107 | "version": "1.1.0",
108 | "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
109 | "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
110 | }
111 | },
112 | "dependencies": {
113 | "asynckit": {
114 | "version": "0.4.0",
115 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
116 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
117 | },
118 | "axios": {
119 | "version": "1.4.0",
120 | "resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz",
121 | "integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==",
122 | "requires": {
123 | "follow-redirects": "^1.15.0",
124 | "form-data": "^4.0.0",
125 | "proxy-from-env": "^1.1.0"
126 | }
127 | },
128 | "combined-stream": {
129 | "version": "1.0.8",
130 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
131 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
132 | "requires": {
133 | "delayed-stream": "~1.0.0"
134 | }
135 | },
136 | "delayed-stream": {
137 | "version": "1.0.0",
138 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
139 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
140 | },
141 | "follow-redirects": {
142 | "version": "1.15.2",
143 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
144 | "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA=="
145 | },
146 | "form-data": {
147 | "version": "4.0.0",
148 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
149 | "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
150 | "requires": {
151 | "asynckit": "^0.4.0",
152 | "combined-stream": "^1.0.8",
153 | "mime-types": "^2.1.12"
154 | }
155 | },
156 | "mime-db": {
157 | "version": "1.52.0",
158 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
159 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="
160 | },
161 | "mime-types": {
162 | "version": "2.1.35",
163 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
164 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
165 | "requires": {
166 | "mime-db": "1.52.0"
167 | }
168 | },
169 | "node-parallelizer": {
170 | "version": "3.0.0",
171 | "resolved": "https://registry.npmjs.org/node-parallelizer/-/node-parallelizer-3.0.0.tgz",
172 | "integrity": "sha512-QiR8yRsF4MTR9UnIWYPwpny81wrEL6oF7KUkEXPiwpgYxs5hJUu/64V+/RFGtoY3QUqwMFJ3eueS0gFno2FMww=="
173 | },
174 | "proxy-from-env": {
175 | "version": "1.1.0",
176 | "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
177 | "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
178 | }
179 | }
180 | }
181 |
--------------------------------------------------------------------------------
/examples/basic/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "child-process",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1",
8 | "parallelizer": "node_modules/esbuild/bin/esbuild ./src/parallelizer-code.js --bundle --platform=node --outfile=./src/parallelizer-code-min.js"
9 | },
10 | "keywords": [],
11 | "author": "",
12 | "license": "ISC",
13 | "dependencies": {
14 | "axios": "^1.4.0",
15 | "node-parallelizer": "^3.0.0"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/examples/basic/serverless.yml:
--------------------------------------------------------------------------------
1 | service: my-service-using-node-parallelizer
2 |
3 | provider:
4 | name: aws
5 | stage: ${opt:stage, "dev"}
6 | region: ${opt:region, 'us-east-2'}
7 | runtime: nodejs18.x
8 |
9 | functions:
10 | lambda-parallelizer:
11 | handler: src/lambda-parallelizer.handler
12 | memorySize: 2000
13 | timeout: 120
14 | environment:
15 | PROCESSESPERCPU: 1
16 | PARALLELIZER_TYPE: child-process
17 | PARALLELIZER_DEBUG_MODE_ENABLED: false
--------------------------------------------------------------------------------
/examples/basic/src/lambda-parallelizer.js:
--------------------------------------------------------------------------------
1 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer");
2 |
3 | const parallelizerType = process.env.PARALLELIZER_TYPE || PARALLELIZER_CHILD;
4 | const parallelizerDebug = process.env.PARALLELIZER_DEBUG_MODE_ENABLED === 'true' ? true : false;
5 |
6 | const parallelizer = new Parallelizer({
7 | type: parallelizerType,
8 | debug: parallelizerDebug,
9 | parallelizationPerCPU: process.env.PROCESSESPERCPU || 1,
10 | filePath: "/var/task/src/parallelizer-code.js",
11 | processBatchFunctionName: 'batchProcessor'
12 | });
13 |
14 |
15 | exports.handler = async (event) => {
16 |
17 | const batch = [...Array(event.number).keys()];
18 |
19 | // Run batch in parallel
20 | const responses = await parallelizer.run(batch);
21 |
22 | console.log(JSON.stringify(responses));
23 | };
--------------------------------------------------------------------------------
/examples/basic/src/parallelizer-code.js:
--------------------------------------------------------------------------------
1 | // const axios = require('axios');
2 | const fs = require('fs');
3 | const crypto = require('crypto');
4 |
5 | const batchProcessor = async ({ batch, params }) => {
6 | let sum = 0;
7 |
8 | for (let id = 0; id <= (batch.length * 1000000); id++) {
9 | sum += id;
10 | }
11 |
12 | // const response = await axios.get('https://httpbin.org/get?key=123');
13 |
14 | let totalFiles = 0;
15 | for (let id = 0; id <= (batch.length); id++) {
16 | try {
17 | const uniqueId = crypto.randomBytes(16).toString('hex');
18 | const file = `/tmp/example-file-${uniqueId}.txt`;
19 |
20 | fs.writeFileSync(file, '***Random Code***');
21 | if (fs.existsSync(file)) {
22 | fs.unlinkSync(file);
23 | }
24 | totalFiles++;
25 | } catch (err) {
26 | console.log(err.message);
27 | }
28 | }
29 |
30 | return { sum, totalFiles };
31 | }
32 |
33 | const batchProcessor2 = async ({ batch, params }) => {
34 | let sum = 0;
35 |
36 | for (let id = 0; id <= (batch.length * 1000000); id++) {
37 | sum += id;
38 | }
39 |
40 | // const response = await axios.get('https://httpbin.org/get?key=123');
41 |
42 | let totalFiles = 0;
43 | const promises = [];
44 | for (let id = 0; id <= (batch.length); id++) {
45 | promises.push(createAndDeleteFile());
46 | totalFiles++;
47 | }
48 |
49 | await Promise.all(promises);
50 |
51 | return { sum, totalFiles };
52 | }
53 |
54 | const createAndDeleteFile = () => {
55 | return new Promise((resolve, reject) => {
56 | try {
57 | const uniqueId = crypto.randomBytes(16).toString('hex');
58 | const file = `/tmp/example-file-${uniqueId}.txt`;
59 |
60 | fs.writeFileSync(file, '***Random Code***');
61 | if (fs.existsSync(file)) {
62 | fs.unlinkSync(file);
63 | }
64 | // console.log(file);
65 | resolve(true);
66 | } catch (err) {
67 | console.log(err.message);
68 | resolve(true);
69 | }
70 | })
71 | }
72 |
73 |
74 | const batchProcessorOnlyCPU = async ({ batch, params }) => {
75 | return new Promise((resolve, reject) => {
76 | let sum = 0;
77 |
78 | for (let id = 0; id <= (batch.length * 1000000); id++) {
79 | sum += id;
80 | }
81 |
82 | resolve({ sum });
83 | })
84 | }
85 |
86 | const batchProcessorOnlyIO = async ({ batch, params }) => {
87 |
88 | let totalFiles = 0;
89 | for (let id = 1; id <= (batch.length); id++) {
90 | try {
91 | const uniqueId = crypto.randomBytes(16).toString('hex');
92 | const file = `/tmp/example-file-${uniqueId}.txt`;
93 |
94 | fs.writeFileSync(file, '***Random Code***');
95 | if (fs.existsSync(file)) {
96 | fs.unlinkSync(file);
97 | }
98 | totalFiles++;
99 | } catch (err) {
100 | console.log(err.message);
101 | }
102 | }
103 |
104 | return { totalFiles };
105 | }
106 |
107 |
108 | module.exports = { batchProcessor, batchProcessor2, batchProcessorOnlyCPU, batchProcessorOnlyIO }
--------------------------------------------------------------------------------
/examples/with-bundler/README.md:
--------------------------------------------------------------------------------
1 | ## Node-Parallelizer example with bundler
2 |
3 | ### Instal dependencies
4 | ```
5 | npm install
6 | ```
7 |
8 | ### Bundle up the parallelizer file
9 | ```
10 | npm run parallelizer
11 | ```
12 |
13 | ### Deploy
14 | ```
15 | sls deploy
16 | ```
17 |
18 |
19 |
--------------------------------------------------------------------------------
/examples/with-bundler/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "child-process",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1",
8 | "parallelizer": "node_modules/esbuild/bin/esbuild ./src/parallelizer-code.js --bundle --platform=node --outfile=./src/parallelizer-code-min.js"
9 | },
10 | "keywords": [],
11 | "author": "",
12 | "license": "ISC",
13 | "dependencies": {
14 | "axios": "^1.4.0",
15 | "esbuild": "^0.17.18",
16 | "node-parallelizer": "^3.0.0"
17 | },
18 | "devDependencies": {
19 | "serverless-esbuild": "^1.43.0",
20 | "serverless-prune-plugin": "^2.0.2"
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/examples/with-bundler/serverless.yml:
--------------------------------------------------------------------------------
1 | service: my-service-using-node-parallelizer
2 |
3 | provider:
4 | name: aws
5 | stage: ${opt:stage, "dev"}
6 | region: ${opt:region, 'us-east-2'}
7 | runtime: nodejs18.x
8 |
9 | plugins:
10 | - serverless-esbuild
11 | - serverless-prune-plugin
12 |
13 | package:
14 | individually: true
15 | exclude:
16 | - "**/*"
17 |
18 | custom:
19 | esbuild:
20 | bundle: true
21 | minify: false
22 | prune:
23 | automatic: true
24 | number: 1
25 |
26 | functions:
27 | lambda-parallelizer:
28 | handler: src/lambda-parallelizer.handler
29 | memorySize: 2000
30 | timeout: 120
31 | package:
32 | include:
33 | - src/parallelizer-code-min.js
34 | environment:
35 | PROCESSESPERCPU: 1
36 | PARALLELIZER_TYPE: child-process
37 | PARALLELIZER_DEBUG_MODE_ENABLED: false
--------------------------------------------------------------------------------
/examples/with-bundler/src/lambda-parallelizer.js:
--------------------------------------------------------------------------------
1 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require("node-parallelizer");
2 |
3 | const parallelizerType = process.env.PARALLELIZER_TYPE || PARALLELIZER_CHILD;
4 | const parallelizerDebug = process.env.PARALLELIZER_DEBUG_MODE_ENABLED === 'true' ? true : false;
5 |
6 | const parallelizer = new Parallelizer({
7 | type: parallelizerType,
8 | debug: parallelizerDebug,
9 | parallelizationPerCPU: process.env.PROCESSESPERCPU || 1,
10 | filePath: "/var/task/src/parallelizer-code-min.js",
11 | processBatchFunctionName: 'batchProcessor'
12 | });
13 |
14 | exports.handler = async (event) => {
15 |
16 | const batch = [...Array(event.number).keys()];
17 |
18 | // Run batch in parallel
19 | const responses = await parallelizer.run(batch);
20 |
21 | console.log(JSON.stringify(responses));
22 | };
--------------------------------------------------------------------------------
/examples/with-bundler/src/parallelizer-code-min.js:
--------------------------------------------------------------------------------
1 | // src/parallelizer-code.js
2 | var fs = require("fs");
3 | var crypto = require("crypto");
4 | var batchProcessor = async ({ batch }) => {
5 | let sum = 0;
6 | for (let id = 0; id <= batch.length * 1e6; id++) {
7 | sum += id;
8 | }
9 | let totalFiles = 0;
10 | for (let id = 0; id <= batch.length; id++) {
11 | try {
12 | const uniqueId = crypto.randomBytes(16).toString("hex");
13 | const file = `/tmp/example-file-${uniqueId}.txt`;
14 | fs.writeFileSync(file, "***Random Code***");
15 | if (fs.existsSync(file)) {
16 | fs.unlinkSync(file);
17 | }
18 | totalFiles++;
19 | } catch (err) {
20 | console.log(err.message);
21 | }
22 | }
23 | return { sum, totalFiles };
24 | };
25 | module.exports = { batchProcessor };
26 |
--------------------------------------------------------------------------------
/examples/with-bundler/src/parallelizer-code.js:
--------------------------------------------------------------------------------
1 | // const axios = require('axios');
2 | const fs = require('fs');
3 | const crypto = require('crypto');
4 |
5 | const batchProcessor = async ({ batch }) => {
6 | let sum = 0;
7 |
8 | for (let id = 0; id <= (batch.length * 1000000); id++) {
9 | sum += id;
10 | }
11 |
12 | // const response = await axios.get('https://httpbin.org/get?key=123');
13 |
14 | let totalFiles = 0;
15 | for (let id = 0; id <= (batch.length); id++) {
16 | try {
17 | const uniqueId = crypto.randomBytes(16).toString('hex');
18 | const file = `/tmp/example-file-${uniqueId}.txt`;
19 |
20 | fs.writeFileSync(file, '***Random Code***');
21 | if (fs.existsSync(file)) {
22 | fs.unlinkSync(file);
23 | }
24 | totalFiles++;
25 | } catch (err) {
26 | console.log(err.message);
27 | }
28 | }
29 |
30 | return { sum, totalFiles };
31 | }
32 |
33 |
34 | module.exports = { batchProcessor }
--------------------------------------------------------------------------------
/images/node-parallelizer-package.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edujugon/node-parallelizer/f123c01daeeecd44bb000885c25580fdf10441b4/images/node-parallelizer-package.png
--------------------------------------------------------------------------------
/images/node-parallelizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edujugon/node-parallelizer/f123c01daeeecd44bb000885c25580fdf10441b4/images/node-parallelizer.png
--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "node-parallelizer",
3 | "version": "2.2.0",
4 | "lockfileVersion": 2,
5 | "requires": true,
6 | "packages": {
7 | "": {
8 | "name": "node-parallelizer",
9 | "version": "2.2.0",
10 | "license": "MIT",
11 | "devDependencies": {
12 | "benchmark": "^2.1.4"
13 | }
14 | },
15 | "node_modules/benchmark": {
16 | "version": "2.1.4",
17 | "resolved": "https://registry.npmjs.org/benchmark/-/benchmark-2.1.4.tgz",
18 | "integrity": "sha512-l9MlfN4M1K/H2fbhfMy3B7vJd6AGKJVQn2h6Sg/Yx+KckoUA7ewS5Vv6TjSq18ooE1kS9hhAlQRH3AkXIh/aOQ==",
19 | "dev": true,
20 | "dependencies": {
21 | "lodash": "^4.17.4",
22 | "platform": "^1.3.3"
23 | }
24 | },
25 | "node_modules/lodash": {
26 | "version": "4.17.21",
27 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
28 | "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
29 | "dev": true
30 | },
31 | "node_modules/platform": {
32 | "version": "1.3.6",
33 | "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
34 | "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
35 | "dev": true
36 | }
37 | },
38 | "dependencies": {
39 | "benchmark": {
40 | "version": "2.1.4",
41 | "resolved": "https://registry.npmjs.org/benchmark/-/benchmark-2.1.4.tgz",
42 | "integrity": "sha512-l9MlfN4M1K/H2fbhfMy3B7vJd6AGKJVQn2h6Sg/Yx+KckoUA7ewS5Vv6TjSq18ooE1kS9hhAlQRH3AkXIh/aOQ==",
43 | "dev": true,
44 | "requires": {
45 | "lodash": "^4.17.4",
46 | "platform": "^1.3.3"
47 | }
48 | },
49 | "lodash": {
50 | "version": "4.17.21",
51 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
52 | "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
53 | "dev": true
54 | },
55 | "platform": {
56 | "version": "1.3.6",
57 | "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
58 | "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
59 | "dev": true
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "node-parallelizer",
3 | "version": "3.1.0",
4 | "description": "A NodeJS package for running code in parallel. Initially created to provide multiprocessing in an AWS Lambda function, but it can be used in any NodeJS environment.",
5 | "main": "src/index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/Edujugon/node-parallelizer.git"
12 | },
13 | "keywords": [
14 | "parallelizer",
15 | "Lambda parallelizer",
16 | "child process",
17 | "parallelism",
18 | "worker thread",
19 | "nodejs parallel"
20 | ],
21 | "author": "Eduardo Marcos ",
22 | "license": "MIT",
23 | "bugs": {
24 | "url": "https://github.com/Edujugon/node-parallelizer/issues"
25 | },
26 | "homepage": "https://github.com/Edujugon/node-parallelizer#readme",
27 | "devDependencies": {
28 | "benchmark": "^2.1.4"
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/child-process.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | const { fork } = require('child_process');
4 | const os = require("os");
5 | const fs = require('fs');
6 | const crypto = require('crypto');
7 |
8 | const childFileName = "child-process-file";
9 |
10 | class ChildProcess {
11 | constructor({ tmpPath = '/tmp', parallelization = false, parallelizationPerCPU = 1, debug = false } = {}) {
12 | const uniqueId = crypto.randomBytes(16).toString('hex');
13 |
14 | this.tmpPath = `${tmpPath}/${childFileName}-${uniqueId}.js`;
15 | this.childFile = null;
16 | this.childProcesses = [];
17 | this.parallelization = parallelization;
18 | this.parallelizationPerCPU = parallelizationPerCPU;
19 |
20 | this.processesCount = 1;
21 | this.debug = debug;
22 | }
23 |
24 | createChildProcessFromCode({ callback, customCode = '' }) {
25 | const finalChildCode = `${customCode} ${templateChildCode} const processBatch = ${callback.toString()}`
26 | this.childFile = this._createChildFile(finalChildCode);
27 |
28 | this._createChildProcesses();
29 | }
30 |
31 | parallelizerFunction({ filePath, processBatchFunctionName }) {
32 | const finalChildCode = `const {${processBatchFunctionName}: processBatch} = require('${filePath}'); ${templateChildCode}`
33 | this.childFile = this._createChildFile(finalChildCode);
34 |
35 | this._createChildProcesses();
36 | }
37 |
38 | _createChildProcesses() {
39 | this.processesCount = (typeof this.parallelization === 'number') ? this.parallelization : this._getProcessesCount();
40 |
41 | for (let id = 0; id < this.processesCount; id++) {
42 | this.childProcesses.push(this._createFork());
43 | }
44 | }
45 |
46 | async runBatch(batch, params = null) {
47 | if (this.childProcesses.length === 0) {
48 | throw new Error('No child processes created. Please run "createChildProcesses" method before "runBatch"')
49 | }
50 |
51 | // Get the amount of messages per batch.
52 | const batchCount = (batch.length < this.processesCount) ? 1 : batch.length / this.processesCount;
53 |
54 | // Create the batches
55 | const batches = findSubsets(batch, batchCount);
56 |
57 | // Process the batches using the child processes.
58 | return await this._processBatchesInForks(batches, params);
59 | }
60 |
61 | removeChildProcesses() {
62 | this.childProcesses.forEach(process => process.disconnect());
63 | this.childProcesses = [];
64 | this._removeChildFile();
65 | }
66 |
67 | removeChildThreads() {
68 | this.removeChildProcesses();
69 | }
70 |
71 | _removeForkEvents() {
72 | this.childProcesses.forEach(child => { child.removeAllListeners('exit'); child.removeAllListeners('message') });
73 | }
74 |
75 | async _processBatchesInForks(batches, params = null) {
76 | const batchesCount = batches.length;
77 | const childResponses = {
78 | responses: [],
79 | failures: []
80 | };
81 |
82 | let responsesReceived = 0;
83 |
84 | await new Promise((resolve, reject) => {
85 | for (let id = 0; id < batchesCount; id++) {
86 | // If a child has exited, then we recreate it.
87 | if (!this.childProcesses[id]?.connected) {
88 | logger({
89 | message: `Child process #${id} no connected`,
90 | params: {
91 | child_id: id,
92 | },
93 | debug: this.debug
94 | })
95 |
96 | this.childProcesses[id] = this._createFork();
97 | }
98 |
99 | this.childProcesses[id].on('exit', (code) => {
100 | logger({
101 | message: `Child process #${id} exited with code: ${code}`,
102 | params: {
103 | child_id: id,
104 | exit_code: code
105 | },
106 | debug: this.debug
107 | })
108 |
109 | // In case a child process exists without sending a message.
110 | if (++responsesReceived == batchesCount) {
111 | this._removeForkEvents();
112 | resolve('DONE');
113 | }
114 | });
115 |
116 | this.childProcesses[id].on('message', ({ type, logType = 'log', childLogMessage, childLogMessageParams = {}, reponse, status, errorMessage }) => {
117 | if (type == 'LOG') {
118 | logger({
119 | message: childLogMessage,
120 | params: {
121 | child_id: id,
122 | ...childLogMessageParams
123 | },
124 | debug: true,
125 | logType
126 | })
127 | return;
128 | }
129 |
130 | logger({
131 | message: `Child process #${id} status message: ${status}`,
132 | params: {
133 | child_id: id,
134 | status
135 | },
136 | debug: this.debug
137 | })
138 |
139 | if (status == 'FAILED') {
140 | logger({
141 | message: `Child process #${id} error message: ${errorMessage}`,
142 | params: {
143 | child_id: id,
144 | error_message: errorMessage
145 | },
146 | debug: this.debug
147 | })
148 | childResponses.failures.push(errorMessage);
149 | } else if (status == 'SUCCESS') {
150 | childResponses.responses.push(reponse);
151 | }
152 |
153 | if (++responsesReceived == batchesCount) {
154 | this._removeForkEvents();
155 | resolve('DONE');
156 | }
157 | });
158 |
159 | // Send message to child.
160 | this.childProcesses[id].send({ id, batch: batches[id], params });
161 | }
162 | })
163 |
164 | return childResponses;
165 | }
166 |
167 | _getProcessesCount() {
168 | const cpuData = os.cpus();
169 | return cpuData.length * this.parallelizationPerCPU;
170 | }
171 |
172 | _createFork() {
173 | const newFork = fork(this.tmpPath);
174 |
175 | newFork.on('error', (error) => {
176 | logger({
177 | message: `Error on child process: ${error}`,
178 | params: {
179 | error
180 | },
181 | debug: this.debug
182 | })
183 | })
184 |
185 | return newFork;
186 | }
187 |
188 | _createChildFile(childCode) {
189 | try {
190 | fs.writeFileSync(this.tmpPath, childCode);
191 | } catch (error) {
192 | throw new Error(`Failed to create child process file: ${error.message}`);
193 | }
194 | }
195 |
196 | _removeChildFile() {
197 | if (!fs.existsSync(this.tmpPath))
198 | return;
199 |
200 | try {
201 | fs.unlinkSync(this.tmpPath);
202 | } catch (error) {
203 | console.error(`Failed to remove temporary child process file: ${error.message}`);
204 | }
205 | }
206 | }
207 |
208 | const findSubsets = (array, n) => {
209 | return array.reduce((all, one, i) => {
210 | const ch = Math.floor(i / n);
211 | all[ch] = [].concat((all[ch] || []), one);
212 | return all
213 | }, [])
214 | }
215 |
216 | const templateChildCode = `
217 | const mainLogger = ({ message, params = {}, logType = 'log' }) => {
218 | process.send({ type: "LOG", logType, childLogMessage: message, childLogMessageParams: params });
219 | }
220 |
221 | // Listening to parent's messages.
222 | process.on("message", async (message) => {
223 | try {
224 | const reponse = await processBatch({ batch: message.batch, params: message.params, mainLogger });
225 |
226 | process.send({ type: "MESSAGE", status: "SUCCESS", reponse });
227 | } catch (e) {
228 | process.send({ type: "MESSAGE", status: "FAILED", errorMessage: e.toString() });
229 | }
230 | });
231 | `;
232 |
233 | const logger = ({ message, params = {}, debug = false, logType = 'log' }) => {
234 | if (!debug) {
235 | return
236 | }
237 |
238 | const logMsg = Object.assign({}, params);
239 | logMsg.message = message;
240 |
241 | console[logType](JSON.stringify(logMsg));
242 | }
243 |
244 | module.exports = ChildProcess;
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | const ChildProcess = require("./child-process");
2 | const WorkerThreads = require("./worker-thread");
3 |
4 | const PARALLELIZER_CHILD = 'child-process';
5 | const PARALLELIZER_THREADS = 'worker-threads';
6 |
7 | const SINGLE_CHILD_THREAD_ID = 'single-process';
8 | class Parallelizer {
9 | constructor(params) {
10 |
11 | this.childThreads = {};
12 |
13 | if (!isArray(params)) {
14 | params.id = SINGLE_CHILD_THREAD_ID;
15 | params = [params];
16 | }
17 |
18 | this._init(params);
19 | }
20 |
21 |
22 | _init(list) {
23 | list.forEach(({ id, type, tmpPath = '/tmp', parallelization = false, parallelizationPerCPU = 1, debug = false, filePath, processBatchFunctionName }) => {
24 | if (!filePath || !processBatchFunctionName) {
25 | throw new Error('filePath and processBatchFunctionName are required');
26 | }
27 |
28 | const parallelizer = [PARALLELIZER_CHILD, PARALLELIZER_THREADS].includes(type) ? type : PARALLELIZER_CHILD;
29 |
30 | const childThreadParams = { tmpPath, parallelization, parallelizationPerCPU, debug };
31 |
32 | this.childThreads[id] = (parallelizer === PARALLELIZER_CHILD) ?
33 | new ChildProcess(childThreadParams) :
34 | new WorkerThreads(childThreadParams);
35 |
36 | this.childThreads[id].parallelizerFunction({ filePath, processBatchFunctionName });
37 | });
38 |
39 | }
40 |
41 |
42 | async run(data, params = null) {
43 | if (Object.keys(this.childThreads).length == 1) {
44 | return this.childThreads[SINGLE_CHILD_THREAD_ID].runBatch(data, params);
45 | }
46 |
47 | if (!isArray(data)) {
48 | data.id = SINGLE_CHILD_THREAD_ID;
49 | data.params = data.params || params;
50 | data = [data];
51 | }
52 |
53 | return await Promise.all(data.map(item => {
54 | const batch = item.batch
55 | const itemParams = item.params || params
56 |
57 | return this.childThreads[item.id].runBatch(batch, params = itemParams)
58 | }));
59 | }
60 |
61 | removeChildThreads(ids = null) {
62 | ids = (ids !== null && !isArray(ids)) ? [ids] : ids;
63 |
64 | Object.keys(this.childThreads)
65 | .filter(id => ids === null ? true : ids.includes(id))
66 | .forEach((id) => {
67 | this.childThreads[id].removeChildThreads();
68 | });
69 | }
70 | }
71 |
72 | const isArray = (value) => {
73 | return Array.isArray(value);
74 | }
75 |
76 |
77 | module.exports = { ChildProcess, WorkerThreads, Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS };
--------------------------------------------------------------------------------
/src/worker-thread.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | const { Worker } = require("worker_threads");
4 | const os = require("os");
5 | const fs = require('fs');
6 | const crypto = require('crypto');
7 |
8 | const workerFileName = "worker-thread-file.js";
9 |
10 | class WorkerThreads {
11 | constructor({ tmpPath = '/tmp', parallelization = false, parallelizationPerCPU = 1, debug = false } = {}) {
12 | const uniqueId = crypto.randomBytes(16).toString('hex');
13 |
14 | this.tmpPath = `${tmpPath}/${workerFileName}-${uniqueId}.js`;
15 | this.workerFile = null;
16 | this.parallelization = parallelization;
17 | this.parallelizationPerCPU = parallelizationPerCPU;
18 | this.threadsCount = 1;
19 | this.debug = debug;
20 | }
21 |
22 | parallelizerFunction = ({ filePath, processBatchFunctionName }) => {
23 | const threadCode = `const {${processBatchFunctionName}: processBatch} = require('${filePath}'); ${templateThreadCode}`
24 | this.workerFile = this._createWorkerFile(threadCode);
25 |
26 | this.threadsCount = (typeof this.parallelization === 'number') ? this.parallelization : this._getThreadsCount();
27 | }
28 |
29 | runBatch = async (batch, params = null) => {
30 | // Get the amount of messages per batch.
31 | const batchCount = (batch.length < this.threadsCount) ? 1 : batch.length / this.threadsCount;
32 |
33 | // Create the batches
34 | const batches = findSubsets(batch, batchCount);
35 |
36 | // Process the batches using the threads.
37 | return await this._processBatchesInThreads(batches, params);
38 | }
39 |
40 | removeWorkerThreads() {
41 | this._removeThreadFile();
42 | }
43 | removeChildThreads() {
44 | this._removeThreadFile();
45 | }
46 |
47 | _processBatchesInThreads = async (batches, params = null) => {
48 | const batchesCount = batches.length;
49 | const threadResponses = {
50 | responses: [],
51 | failures: []
52 | };
53 |
54 | let responsesReceived = 0;
55 |
56 | await new Promise((resolve, reject) => {
57 | for (let id = 0; id < batchesCount; id++) {
58 | const worker = new Worker(this.tmpPath, { workerData: { id, batch: batches[id], params } });
59 | worker.on('error', (error) => {
60 | logger({
61 | message: `Thread #${id} error message: ${error.message}`,
62 | params: {
63 | thread_id: id,
64 | error_message: error.message
65 | },
66 | debug: this.debug
67 | })
68 | threadResponses.failures.push(error.message);
69 |
70 | // In case a thread exists without sending a message.
71 | if (++responsesReceived == batchesCount) {
72 | resolve('DONE');
73 | }
74 | });
75 |
76 | worker.on('message', ({ reponse, status, errorMessage }) => {
77 | logger({
78 | message: `Thread #${id} status message: ${status}`,
79 | params: {
80 | thread_id: id,
81 | status
82 | },
83 | debug: this.debug
84 | })
85 |
86 | if (status == 'FAILED') {
87 | logger({
88 | message: `Thread #${id} error message: ${errorMessage}`,
89 | params: {
90 | thread_id: id,
91 | error_message: errorMessage
92 | },
93 | debug: this.debug
94 | })
95 | threadResponses.failures.push(errorMessage);
96 | } else if (status == 'SUCCESS') {
97 | threadResponses.responses.push(reponse);
98 | }
99 |
100 | if (++responsesReceived == batchesCount) {
101 | resolve('DONE');
102 | }
103 | });
104 | }
105 | })
106 |
107 | return threadResponses;
108 | }
109 |
110 | _removeThreadFile() {
111 | if (!fs.existsSync(this.tmpPath))
112 | return;
113 |
114 | try {
115 | fs.unlinkSync(this.tmpPath);
116 | } catch (error) {
117 | console.error(`Failed to remove temporary child process file: ${error.message}`);
118 | }
119 | }
120 |
121 | _getThreadsCount = () => {
122 | const cpuData = os.cpus();
123 | return cpuData.length * this.parallelizationPerCPU;
124 | }
125 |
126 | _createWorkerFile(childCode) {
127 | fs.writeFileSync(this.tmpPath, childCode);
128 | }
129 | }
130 |
131 | const findSubsets = (array, n) => {
132 | return array.reduce((all, one, i) => {
133 | const ch = Math.floor(i / n);
134 | all[ch] = [].concat((all[ch] || []), one);
135 | return all
136 | }, [])
137 | }
138 |
139 | const templateThreadCode = `
140 | const { workerData, parentPort } = require("worker_threads");
141 |
142 | (async () => {
143 | try {
144 | const reponse = await processBatch({ batch: workerData.batch, params: workerData.params });
145 | parentPort.postMessage({ reponse, status: "SUCCESS" });
146 | } catch (err) {
147 | parentPort.postMessage({ status: "FAILED", errorMessage: err.toString() });
148 | }
149 | })();
150 | `;
151 |
152 | const logger = ({ message, params = {}, debug = false, logType = 'log' }) => {
153 | if (!debug) {
154 | return
155 | }
156 |
157 | const logMsg = Object.assign({}, params);
158 | logMsg.message = message;
159 |
160 | console[logType](JSON.stringify(logMsg));
161 | }
162 |
163 | module.exports = WorkerThreads;
--------------------------------------------------------------------------------
/test/benchmark-2.js:
--------------------------------------------------------------------------------
1 | const Benchmark = require('benchmark');
2 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require('../src/index');
3 | const { batchProcessorOnlyCPU, batchProcessorOnlyIO } = require('../examples/basic/src/parallelizer-code');
4 | const path = require('path');
5 |
6 |
7 | const relativePath = '../examples/basic/src/parallelizer-code';
8 | const absolutePath = path.resolve(__dirname, relativePath);
9 |
10 | const parallelizer = new Parallelizer([
11 | { id: "only-cpu", type: PARALLELIZER_THREADS, parallelization: 4, filePath: absolutePath, processBatchFunctionName: 'batchProcessorOnlyCPU' },
12 | { id: "only-io", type: PARALLELIZER_CHILD, parallelization: 4, filePath: absolutePath, processBatchFunctionName: 'batchProcessorOnlyIO' },
13 | ]);
14 |
15 | const batch = [...Array(100).keys()];
16 | const batch2 = [...Array(100).keys()];
17 |
18 |
19 | const p = (fn) => {
20 | return {
21 | defer: true,
22 | async fn(deferred) {
23 | await fn();
24 | deferred.resolve();
25 | }
26 | }
27 | }
28 |
29 | const suite = new Benchmark.Suite;
30 | // add tests
31 | suite
32 | .add('Child + Thread Parallelizers', p(async () => {
33 | await parallelizer.run([
34 | { id: "only-cpu", batch },
35 | { id: "only-io", batch: batch2 },
36 | ])
37 | }))
38 | .add('JavaSCript Promise.All', p(async () => {
39 | await Promise.all([
40 | batchProcessorOnlyCPU({ batch }),
41 | batchProcessorOnlyIO({ batch: batch2 })
42 | ])
43 |
44 | }))
45 | // add listeners
46 | .on('cycle', function (event) {
47 | parallelizer.removeChildThreads();
48 | console.log(String(event.target));
49 | })
50 | .on('complete', function () {
51 | console.log('\nResult: ');
52 | console.log('Fastest is ' + this.filter('fastest').map('name'));
53 | console.log('Slowest is ' + this.filter('slowest').map('name'));
54 | })
55 | // run async
56 | .run({ 'async': true });
--------------------------------------------------------------------------------
/test/benchmark.js:
--------------------------------------------------------------------------------
1 | const Benchmark = require('benchmark');
2 | const { Parallelizer, PARALLELIZER_CHILD, PARALLELIZER_THREADS } = require('../src/index');
3 | const { batchProcessor, batchProcessor2 } = require('../examples/basic/src/parallelizer-code');
4 | const path = require('path');
5 |
6 |
7 | const relativePath = '../examples/basic/src/parallelizer-code';
8 | const absolutePath = path.resolve(__dirname, relativePath);
9 |
10 | const childParallelizer = new Parallelizer({ type: PARALLELIZER_CHILD, parallelizationPerCPU: 3, filePath: absolutePath, processBatchFunctionName: 'batchProcessor2' });
11 | const threadParallelizer = new Parallelizer({ type: PARALLELIZER_THREADS, parallelizationPerCPU: 3, filePath: absolutePath, processBatchFunctionName: 'batchProcessor2' });
12 |
13 | const batch = [...Array(100).keys()];
14 |
15 |
16 | const p = (fn) => {
17 | return {
18 | defer: true,
19 | async fn(deferred) {
20 | await fn();
21 | deferred.resolve();
22 | }
23 | }
24 | }
25 |
26 | const suite = new Benchmark.Suite;
27 | // add tests
28 | suite
29 | .add('Child Parallelizer', p(async () => {
30 | await childParallelizer.run(batch);
31 | }))
32 | .add('Thread Parallelizer', p(async () => {
33 | await threadParallelizer.run(batch);
34 | }))
35 | .add('Without Parallelizer', p(async () => {
36 | await batchProcessor2({ batch });
37 | }))
38 | // add listeners
39 | .on('cycle', function (event) {
40 | console.log(String(event.target));
41 | })
42 | .on('complete', function () {
43 | childParallelizer.removeChildThreads();
44 | threadParallelizer.removeChildThreads();
45 |
46 | console.log('\nResult: ');
47 | console.log('Fastest is ' + this.filter('fastest').map('name'));
48 | console.log('Slowest is ' + this.filter('slowest').map('name'));
49 | })
50 | // run async
51 | .run({ 'async': true });
--------------------------------------------------------------------------------