├── .mocharc.yml ├── src ├── index.ts └── map_reduce │ ├── index.ts │ ├── cluster.ts │ └── utils.ts ├── .gitignore ├── test ├── fixtures │ ├── skeletons.txt │ ├── dogs.txt │ ├── racoons.txt │ └── orchads.txt └── word_count.spec.ts ├── tsconfig.json ├── .github ├── ISSUE_TEMPLATE │ └── feature_request.md ├── FUNDING.yml └── workflows │ ├── regression-test.yml │ ├── npm-publish.yml │ └── codeql-analysis.yml ├── .eslintrc.js ├── package.json └── README.md /.mocharc.yml: -------------------------------------------------------------------------------- 1 | timeout: 40000 2 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./map_reduce"; 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | build/ 3 | .idea 4 | .nyc_output 5 | -------------------------------------------------------------------------------- /test/fixtures/skeletons.txt: -------------------------------------------------------------------------------- 1 | The skeleton had skeletons of his own in the closet -------------------------------------------------------------------------------- /src/map_reduce/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./cluster"; 2 | export * from "./utils"; 3 | -------------------------------------------------------------------------------- /test/fixtures/dogs.txt: -------------------------------------------------------------------------------- 1 | The golden retriever loved the fireworks each Fourth of July -------------------------------------------------------------------------------- /test/fixtures/racoons.txt: -------------------------------------------------------------------------------- 1 | They did nothing as the raccoon attacked the lady’s bag of food -------------------------------------------------------------------------------- /test/fixtures/orchads.txt: -------------------------------------------------------------------------------- 1 | Orchards seemed like a frivolous crop when so many people needed food -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "baseUrl": "./", 4 | "declaration": true, 5 | "emitDecoratorMetadata": true, 6 | "esModuleInterop": true, 7 | "experimentalDecorators": true, 8 | "importHelpers": true, 9 | "outDir": "build", 10 | "removeComments": true, 11 | "target": "es6", 12 | "module": "commonjs", 13 | "moduleResolution": "node", 14 | "lib": ["es2019", "dom"], 15 | "types": ["node", "mocha"], 16 | "typeRoots": ["node_modules/@types"], 17 | "noUnusedLocals": true, 18 | "noUnusedParameters": true, 19 | "resolveJsonModule": true 20 | }, 21 | "include": [ 22 | "./src/**/*" 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "env": { 3 | "es6": true, 4 | "node": true 5 | }, 6 | "parser": "@typescript-eslint/parser", 7 | "parserOptions": { 8 | "project": "tsconfig.json", 9 | "sourceType": "module" 10 | }, 11 | "plugins": [ 12 | "eslint-plugin-import", 13 | '@typescript-eslint', 14 | ], 15 | "extends": ["plugin:prettier/recommended"], 16 | "rules": { 17 | "@typescript-eslint/no-inferrable-types": "error", 18 | "eqeqeq": [ 19 | "error", 20 | "always" 21 | ], 22 | "import/order": "error", 23 | "no-console": "error", 24 | "prefer-const": "error", 25 | "newline-before-return": "error" 26 | } 27 | }; 28 | -------------------------------------------------------------------------------- /.github/workflows/regression-test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | strategy: 18 | matrix: 19 | node-version: [12.x, 14.x] 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Use Node.js ${{ matrix.node-version }} 24 | uses: actions/setup-node@v1 25 | with: 26 | node-version: ${{ matrix.node-version }} 27 | - run: npm ci 28 | - run: npm run build --if-present 29 | - run: npm test 30 | -------------------------------------------------------------------------------- /src/map_reduce/cluster.ts: -------------------------------------------------------------------------------- 1 | import cluster from "cluster"; 2 | import { 3 | Delay, 4 | initMaster, 5 | initWorkers, 6 | isMaster, 7 | MasterFn, 8 | NUM_CPUS, 9 | ReduceFn, 10 | shutdown, 11 | WorkerFn, 12 | } from "./utils"; 13 | 14 | const MapReduce = async ( 15 | masterFn: MasterFn, 16 | workerFns: WorkerFn[], 17 | reduceFn: ReduceFn, 18 | args: any 19 | ) => { 20 | if (isMaster()) { 21 | const { numWorkers = NUM_CPUS } = args; 22 | const { workerQueue, processOrder, failedOrder } = await initMaster( 23 | numWorkers 24 | ); 25 | 26 | await masterFn(workerQueue, args); 27 | while (workerQueue.getElements().length !== numWorkers) { 28 | await Delay(1000); 29 | } 30 | 31 | await shutdown(numWorkers); 32 | 33 | return reduceFn(processOrder, failedOrder); 34 | } else if (cluster.isWorker) { 35 | await initWorkers(workerFns, args); 36 | } 37 | }; 38 | 39 | export const sonicDistribute = MapReduce; 40 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@tiemma/sonic-distribute", 3 | "version": "1.0.3", 4 | "main": "build/index.js", 5 | "engines": { 6 | "node": "<16" 7 | }, 8 | "description": "Accelerate your DB backup and restore processes", 9 | "scripts": { 10 | "lint-fix": "eslint -c .eslintrc.js --fix --ext .ts --ext .js src/", 11 | "test": "nyc --reporter=text-summary mocha -r ts-node/register test/*.spec.ts" 12 | }, 13 | "keywords": [ 14 | "db", 15 | "relational", 16 | "graphs", 17 | "logical dumps" 18 | ], 19 | "author": "Emmanuel Bakare", 20 | "license": "ISC", 21 | "dependencies": { 22 | "@tiemma/sonic-core": "^1.0.12" 23 | }, 24 | "repository": { 25 | "type": "git", 26 | "url": "https://github.com/tiemma/sonic-distribute" 27 | }, 28 | "devDependencies": { 29 | "@types/mocha": "^9.0.0", 30 | "@types/sequelize": "^4.28.10", 31 | "@typescript-eslint/eslint-plugin": "^4.29.1", 32 | "@typescript-eslint/parser": "^4.29.1", 33 | "chai": "^4.3.4", 34 | "deep-equal-in-any-order": "^1.1.10", 35 | "eslint": "^7.32.0", 36 | "eslint-config-prettier": "^8.3.0", 37 | "eslint-plugin-import": "^2.24.0", 38 | "eslint-plugin-prettier": "^3.4.0", 39 | "mocha": "^9.0.3", 40 | "nyc": "^15.1.0", 41 | "openapi-types": "^7.2.3", 42 | "sequelize-cli": "^6.2.0", 43 | "ts-node": "^10.1.0", 44 | "tsconfig-paths": "^3.10.1", 45 | "tslib": "^2.3.0", 46 | "typescript": "^4.3.5" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/publishing-nodejs-packages 3 | 4 | name: Node.js Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: actions/setup-node@v1 16 | with: 17 | node-version: 12 18 | - run: npm ci 19 | - run: npm test 20 | 21 | publish-npm: 22 | needs: build 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v2 26 | - uses: actions/setup-node@v1 27 | with: 28 | node-version: 12 29 | registry-url: https://registry.npmjs.org/ 30 | - run: npm ci 31 | - run: npx tsc 32 | - run: npm publish --access=public 33 | env: 34 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 35 | 36 | publish-gpr: 37 | needs: build 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v2 41 | - uses: actions/setup-node@v1 42 | with: 43 | node-version: 12 44 | registry-url: https://npm.pkg.github.com/ 45 | repo-token: ${{ secrets.GITHUB_TOKEN }} 46 | - run: npm ci 47 | - run: npx tsc 48 | - run: npm publish --access=public 49 | env: 50 | NODE_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}} 51 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # ******** NOTE ******** 12 | 13 | name: "CodeQL" 14 | 15 | on: 16 | push: 17 | branches: [ master, contributor-rules ] 18 | pull_request: 19 | # The branches below must be a subset of the branches above 20 | branches: [ master ] 21 | schedule: 22 | - cron: '26 8 * * 2' 23 | 24 | jobs: 25 | analyze: 26 | name: Analyze 27 | runs-on: ubuntu-latest 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | language: [ 'javascript' ] 33 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 34 | # Learn more: 35 | # https://docs.github.com/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 36 | 37 | steps: 38 | - name: Checkout repository 39 | uses: actions/checkout@v2 40 | 41 | # Initializes the CodeQL tools for scanning. 42 | - name: Initialize CodeQL 43 | uses: github/codeql-action/init@v1 44 | with: 45 | languages: ${{ matrix.language }} 46 | # If you wish to specify custom queries, you can do so here or in a config file. 47 | # By default, queries listed here will override any specified in a config file. 48 | # Prefix the list here with "+" to use these queries and those in the config file. 49 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 50 | 51 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 52 | # If this step fails, then you should remove it and run the build manually (see below) 53 | - name: Autobuild 54 | uses: github/codeql-action/autobuild@v1 55 | 56 | # ℹ️ Command-line programs to run using the OS shell. 57 | # 📚 https://git.io/JvXDl 58 | 59 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 60 | # and modify them (or add more) to build your code if your project 61 | # uses a compiled language 62 | 63 | #- run: | 64 | # make bootstrap 65 | # make release 66 | 67 | - name: Perform CodeQL Analysis 68 | uses: github/codeql-action/analyze@v1 69 | -------------------------------------------------------------------------------- /test/word_count.spec.ts: -------------------------------------------------------------------------------- 1 | import { opendirSync, readdirSync, readFileSync, Dirent } from "fs"; 2 | import { expect, use } from "chai"; 3 | import deepEqualInAnyOrder from "deep-equal-in-any-order"; 4 | import { Queue } from "@tiemma/sonic-core"; 5 | import { 6 | getWorkerName, 7 | isMaster, 8 | Map, 9 | sonicDistribute, 10 | MapReduceEvent, 11 | } from "../src"; 12 | 13 | use(deepEqualInAnyOrder); 14 | 15 | const response = { 16 | Orchards: 2, 17 | seemed: 2, 18 | like: 2, 19 | a: 2, 20 | frivolous: 2, 21 | crop: 2, 22 | when: 2, 23 | so: 2, 24 | many: 2, 25 | people: 2, 26 | needed: 2, 27 | food: 4, 28 | The: 4, 29 | golden: 2, 30 | retriever: 2, 31 | loved: 2, 32 | the: 8, 33 | fireworks: 2, 34 | each: 2, 35 | Fourth: 2, 36 | of: 6, 37 | July: 2, 38 | skeleton: 2, 39 | had: 2, 40 | skeletons: 2, 41 | his: 2, 42 | own: 2, 43 | in: 2, 44 | closet: 2, 45 | They: 2, 46 | did: 2, 47 | nothing: 2, 48 | as: 2, 49 | raccoon: 2, 50 | attacked: 2, 51 | "lady’s": 2, 52 | bag: 2, 53 | }; 54 | 55 | const masterFn = async (workerQueue: Queue, args: any) => { 56 | const { dirPath } = args; 57 | // Work around opendirSync not being in node 10 for regression tests 58 | const dir = (opendirSync || readdirSync)(dirPath); 59 | for await (const file of dir) { 60 | await Map(workerQueue, { data: (file as Dirent).name }); 61 | } 62 | }; 63 | 64 | const workerFn1 = async (event: MapReduceEvent, args: any) => { 65 | const { dirPath } = args; 66 | const fileName = event.data; 67 | const file = await readFileSync(`${dirPath}/${fileName}`, { 68 | encoding: "utf-8", 69 | }); 70 | 71 | const wordCount: Record = {}; 72 | for (const word of file.split(" ")) { 73 | if (!wordCount[word]) { 74 | wordCount[word] = 0; 75 | } 76 | wordCount[word] += 1; 77 | } 78 | 79 | return wordCount; 80 | }; 81 | 82 | const workerFn2 = (event: MapReduceEvent, _: any) => { 83 | // double everything 84 | const wordCount: Record = event.data; 85 | for (const key of Object.keys(wordCount)) { 86 | wordCount[key] *= 2; 87 | } 88 | 89 | return wordCount; 90 | }; 91 | 92 | const reduceFn = (queue: Queue) => { 93 | const wordCounts: Record = {}; 94 | while (!queue.isEmpty()) { 95 | const wordCount = queue.dequeue(); 96 | for (const [word, count] of Object.entries(wordCount.response)) { 97 | if (!wordCounts[word]) { 98 | wordCounts[word] = 0; 99 | } 100 | wordCounts[word] += count as number; 101 | } 102 | } 103 | 104 | return wordCounts; 105 | }; 106 | 107 | describe(`Map reduce - ${getWorkerName()}`, () => { 108 | it("map reduce works as expected", async () => { 109 | // defer printing logs 110 | process.env["QUIET"] = "true"; 111 | 112 | const data = await sonicDistribute( 113 | masterFn, 114 | [workerFn1, workerFn2], 115 | reduceFn, 116 | { 117 | dirPath: `${process.cwd()}/test/fixtures`, 118 | numWorkers: 1, 119 | } 120 | ); 121 | 122 | if (isMaster()) { 123 | expect(data).deep.equal(response); 124 | process.exit(0); 125 | } 126 | }); 127 | }); 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sonic < distribute > 2 | 3 | ![image](https://raw.githubusercontent.com/Tiemma/sonic-core/master/image.png) 4 | 5 | Accelerate your linear processes with MapReduce. 6 | 7 | ![CodeQL](https://github.com/Tiemma/sonic-distribute/workflows/CodeQL/badge.svg) 8 | ![Node.js CI](https://github.com/Tiemma/sonic-distribute/workflows/Node.js%20CI/badge.svg) 9 | 10 | # What does this do? 11 | 12 | It sets up a cluster of workers with a single master node and deploys each task to each worker, adds the results to a queue and processes that based on the specification of your reduce operation. 13 | 14 | In simple terms, it's a framework for MapReduce based on Node's cluster. 15 | 16 | 17 | # How to use it 18 | 19 | - Install the package 20 | - Create the masterFn, workerFn and reduceFn methods 21 | - Call the MapReduce method with the functions and a set of args 22 | - Process the final output as needed 23 | 24 | ## Install the package 25 | 26 | To install the package, run 27 | ```bash 28 | npm install --save @tiemma/sonic-distribute 29 | ``` 30 | 31 | ## Create the masterFn, workerFn and reduceFn methods 32 | 33 | Create a set of methods which implement the Master, Worker and Reduce tasks. 34 | 35 | Note the signatures of the methods during your implementation 36 | 37 | ```typescript 38 | export type MasterFn = (workerQueue: Queue, args: any) => any; 39 | export type WorkerFn = (event: MapReduceEvent, args: any) => any; 40 | export type ReduceFn = (resultQueue: Queue, failedQueue: Queue) => any; 41 | ``` 42 | 43 | ## Call the MapReduce method with the functions and a set of args 44 | Then proceed to import and call the MapReduce method as desired 45 | 46 | ```typescript 47 | import {MapReduce} from "@tiemma/sonic-distribute"; 48 | 49 | //...describe masterFn, workerFn and reduceFn methods 50 | // the workerFns are pipelined ... event.data -> workerFn[0] -> workerFn[1] ... workerFn[n-1] -> response 51 | const response = await MapReduce(masterFn, [workerFn], reduceFn, { response: ....anything, numWorkers: desired_number_of_workers}) 52 | ``` 53 | 54 | > NOTE: The arg numWorkers is reserved to specify the desired number of workers to deploy 55 | 56 | ## Process the final output as needed 57 | Due to the nature of the fork, you would be required to access the output of the MapReduce operation as so: 58 | 59 | ```typescript 60 | import {MapReduce, isMaster()} from "@tiemma/sonic-distribute"; 61 | 62 | //...describe masterFn, workerFn and reduceFn methods 63 | const response = await MapReduce(masterFn, [workerFn], reduceFn, { response: ....anything, numWorkers: desired_number_of_workers}) 64 | if(isMaster()) { 65 | //....process response output of map reduce 66 | } 67 | ``` 68 | 69 | # Environment variables 70 | 71 | You can configure the environment to use the `QUIET` environment variable if you choose to not see any logs. 72 | 73 | # Why did I do this? 74 | 75 | I was working on another package to assist with logical dumps of database tables in the required foreign key order. 76 | 77 | This package was born out of the need to optimise the performance of the linear dump process in a configurable way. 78 | 79 | # Best Practices 80 | 81 | ## Synchronization is your job 82 | 83 | The entire framework serves to make it easy to just focus on your distributed processing tasks. 84 | 85 | There is a good focus on preserving processing order hence why the result is queued, rather than sorted. 86 | 87 | Any synchronization primitives required by you across workers would need to be implemented by you. 88 | 89 | Since the fork is process based, I advise external systems capable of locking based on some shm e.g sqlite file db, some instance of zookeeper etc. 90 | 91 | All of this is left to you to implement. 92 | 93 | If you have some method by which it can be simply implemented here, do create a PR using the ISSUE TEMPLATE [here](./.github/ISSUE_TEMPLATE/feature_request.md). 94 | 95 | 96 | # Future Plans 97 | 98 | ## Multi-master and tagged worker setups 99 | 100 | There might be a case to run multiple masters and support pushing jobs to certain tagged workers with various workerFns based on the pipeline workers job feature described above. 101 | 102 | This is an async pipeline and would be effectively represented by DAGs with various logic for traversing across job in either the worker or reduce stage and across those stages respectively. 103 | 104 | If you have some method by which it can be simply implemented here, do create a PR using the ISSUE TEMPLATE [here](./.github/ISSUE_TEMPLATE/feature_request.md). 105 | 106 | 107 | # Debugging 108 | 109 | By default, logs are shown. 110 | 111 | If you prefer no logs, kindly set the QUIET env variable. 112 | 113 | ```bash 114 | export QUIET=true 115 | ``` 116 | 117 | # I found a bug, how can I contribute? 118 | Open up a PR using the ISSUE TEMPLATE [here](./.github/ISSUE_TEMPLATE/feature_request.md) 119 | -------------------------------------------------------------------------------- /src/map_reduce/utils.ts: -------------------------------------------------------------------------------- 1 | import { cpus } from "os"; 2 | import cluster, { ClusterSettings } from "cluster"; 3 | import { pid } from "process"; 4 | import { Queue } from "@tiemma/sonic-core"; 5 | 6 | export type MasterFn = (workerQueue: Queue, args: any) => any; 7 | export type WorkerFn = (event: MapReduceEvent, args: any) => any; 8 | export type ReduceFn = (resultQueue: Queue, failedQueue: Queue) => any; 9 | 10 | export const NUM_CPUS = cpus().length; 11 | 12 | export const isMaster = () => { 13 | return cluster.isMaster || cluster.isPrimary; 14 | }; 15 | 16 | export const getWorkerName = () => { 17 | if (isMaster()) { 18 | return "MASTER"; 19 | } 20 | 21 | return `WORKER-${cluster.worker.id}`; 22 | }; 23 | 24 | export const Delay = (time = Math.random() * 50) => 25 | new Promise((resolve) => setTimeout(resolve, time)); 26 | export const getLogger = 27 | (loggerID: string) => 28 | (message: any, date = new Date().toISOString()) => { 29 | if (process.env["QUIET"]) return; 30 | // eslint-disable-next-line no-console 31 | console.log(`${date}: ${loggerID}: ${message}`); 32 | }; 33 | const logger = getLogger(getWorkerName()); 34 | 35 | export const clusterEvents = { 36 | MESSAGE: "message", 37 | DISCONNECT: "disconnect", 38 | }; 39 | 40 | export const getWorkerID = async (workerQueue: Queue) => { 41 | while (workerQueue.isEmpty()) { 42 | await Delay(); 43 | } 44 | 45 | let workerID = workerQueue.dequeue(); 46 | while (!cluster.workers[workerID]) { 47 | workerID = workerQueue.dequeue(); 48 | } 49 | 50 | return workerID; 51 | }; 52 | 53 | export interface MapReduceEvent { 54 | data: any; 55 | response?: any; 56 | failed?: boolean; 57 | 58 | // Internal, not to be directly used 59 | id?: number; 60 | SYN?: boolean; 61 | ACK?: boolean; 62 | SYN_ACK?: boolean; 63 | } 64 | 65 | export const configureWorkers = async (numWorkers: number) => { 66 | const workerQueue = new Queue(); 67 | const processOrder = new Queue(); 68 | const failedOrder = new Queue(); 69 | 70 | for (let i = 0; i < numWorkers; i++) { 71 | const worker = cluster.fork(); 72 | 73 | worker.on(clusterEvents.MESSAGE, (message: MapReduceEvent) => { 74 | const { id, SYN, SYN_ACK, response, failed } = message; 75 | 76 | if (SYN) { 77 | // Return signal to worker to start processing 78 | worker.send({ ACK: true }); 79 | } else if (SYN_ACK || id) { 80 | workerQueue.enqueue(worker.id); 81 | logger(`Worker ${worker.id} now available`); 82 | } 83 | 84 | if (failed) { 85 | failedOrder.enqueue(message); 86 | } else if (response) { 87 | processOrder.enqueue(message); 88 | } 89 | }); 90 | 91 | worker.on(clusterEvents.DISCONNECT, () => { 92 | logger(`Gracefully shutting down worker #${worker.id}`); 93 | }); 94 | } 95 | 96 | logger("Worker queues initializing"); 97 | while (workerQueue.getElements().length !== numWorkers) { 98 | await Delay(); 99 | } 100 | logger(`Workers queue populated`); 101 | 102 | return { workerQueue, processOrder, failedOrder }; 103 | }; 104 | 105 | export const initMaster = async (numWorkers: number) => { 106 | (cluster.setupMaster || cluster.setupPrimary)({ 107 | execArgv: ["-r", "tsconfig-paths/register", "-r", "ts-node/register"], 108 | } as ClusterSettings); 109 | 110 | logger("Running Map reduce"); 111 | logger(`Process running on pid ${pid}`); 112 | 113 | return configureWorkers(numWorkers); 114 | }; 115 | 116 | export const initWorkers = async (workerFns: WorkerFn[], args: any) => { 117 | const logger = getLogger(getWorkerName()); 118 | 119 | // Register worker on startup 120 | process.send({ SYN: true }); 121 | 122 | process.on(clusterEvents.MESSAGE, async (event: MapReduceEvent) => { 123 | // Establish master-node communication with 3-way handshake 124 | if (event.ACK) { 125 | logger(`Worker ${cluster.worker.id} now active and processing requests`); 126 | 127 | process.send({ SYN_ACK: true }); 128 | 129 | return; 130 | } 131 | 132 | const res = { id: cluster.worker.id, data: event.data }; 133 | try { 134 | let response = event.data; 135 | 136 | for (let i = 0; i < workerFns.length; i++) { 137 | response = await workerFns[i]( 138 | { data: response }, 139 | { 140 | ...args, 141 | workerID: cluster.worker.id, 142 | } 143 | ); 144 | } 145 | res["response"] = response; 146 | } catch (e) { 147 | logger(`Error occurred: ${e}`); 148 | res["failed"] = true; 149 | } 150 | 151 | process.send(res); 152 | }); 153 | }; 154 | 155 | export const shutdown = async (numWorkers: number) => { 156 | for (let i = 0; i <= numWorkers; i++) { 157 | if (cluster.workers[i]) { 158 | await Delay(100); 159 | cluster.workers[i].disconnect(); 160 | } 161 | } 162 | 163 | logger(`Shutting down master`); 164 | }; 165 | 166 | export const Map = async (workerQueue: Queue, event: MapReduceEvent) => { 167 | const workerID = await getWorkerID(workerQueue); 168 | 169 | cluster.workers[workerID].send(event); 170 | }; 171 | --------------------------------------------------------------------------------