├── badges └── npm-audit-badge.svg ├── .eslintrc ├── .gitignore ├── CHANGELOG.md ├── worker.js ├── .github └── workflows │ └── test.yml ├── LICENSE.md ├── package.json ├── test └── test.js ├── README.md └── index.js /badges/npm-audit-badge.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "apostrophe" 3 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | package-lock.json 2 | node_modules 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.0.1 4 | 5 | Added explicit MIT license file. 6 | 7 | ## 1.0.0 8 | 9 | Initial release, with tests. 10 | -------------------------------------------------------------------------------- /worker.js: -------------------------------------------------------------------------------- 1 | process.on('message', ({ 2 | regExp, flags, string 3 | }) => { 4 | const r = new RegExp(regExp, flags); 5 | process.send({ 6 | result: string.match(r) 7 | }); 8 | }); 9 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["*"] 8 | 9 | workflow_dispatch: 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | node-version: [18, 20] 17 | mongodb-version: [6.0, 7.0] 18 | 19 | steps: 20 | - name: Git checkout 21 | uses: actions/checkout@v4 22 | 23 | - name: Use Node.js ${{ matrix.node-version }} 24 | uses: actions/setup-node@v4 25 | with: 26 | node-version: ${{ matrix.node-version }} 27 | 28 | - name: Start MongoDB 29 | uses: supercharge/mongodb-github-action@1.11.0 30 | with: 31 | mongodb-version: ${{ matrix.mongodb-version }} 32 | 33 | - run: npm install 34 | 35 | - run: npm test 36 | env: 37 | CI: true 38 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, 2021 Apostrophe Technologies, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "time-limited-regular-expressions", 3 | "version": "1.0.1", 4 | "description": "Evaluates regular expressions with a time limit to mitigate DOS attacks based on catastrophic backtracking.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "npm run lint && npm run mocha", 8 | "lint": "eslint .", 9 | "mocha": "mocha" 10 | }, 11 | "repository": { 12 | "type": "git", 13 | "url": "git+https://github.com/apostrophecms/time-limited-regular-expressions.git" 14 | }, 15 | "keywords": [ 16 | "regular", 17 | "expressions", 18 | "time", 19 | "limit", 20 | "regexp", 21 | "regex" 22 | ], 23 | "author": "Apostrophe Technologies", 24 | "license": "MIT", 25 | "bugs": { 26 | "url": "https://github.com/apostrophecms/time-limited-regular-expressions/issues" 27 | }, 28 | "homepage": "https://github.com/apostrophecms/time-limited-regular-expressions#readme", 29 | "devDependencies": { 30 | "eslint-config-apostrophe": "^5.0.0", 31 | "mocha": "^8.0.1", 32 | "wtfnode": "^0.8.1" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert'); 2 | const regExp = require('../index.js')({ limit: 0.25 }); 3 | // Email address validator with evil characteristics (catastrophic backtracking) 4 | const evil = /^([a-zA-Z0-9])(([-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$/; 5 | 6 | describe('time limited regular expressions', () => { 7 | it('should process a matching regular expression', async () => { 8 | const result = await regExp.match(/^foo$/, 'foo'); 9 | assert(result); 10 | }); 11 | it('should process a nonmatching regular expression', async () => { 12 | const result = await regExp.match(/^foo$/, 'foo '); 13 | assert(!result); 14 | }); 15 | it('should support the global flag', async () => { 16 | const result = await regExp.match(/foo/g, 'foo bfoo wafoogle wafoom'); 17 | assert(result.length === 4); 18 | for (const r of result) { 19 | assert(r === 'foo'); 20 | } 21 | }); 22 | it('should run a problematic regular expression on short nonmatching input', async () => { 23 | const userDefinedEmail = 'AA'; 24 | const isValid = await regExp.match(evil, userDefinedEmail); 25 | assert(!isValid); 26 | }); 27 | it('should run a problematic regular expression on short matching input', async () => { 28 | const userDefinedEmail = 'test@test.com'; 29 | const isValid = await regExp.match(evil, userDefinedEmail); 30 | assert(isValid); 31 | }); 32 | it('should flunk a problematic regular expression on long input', async () => { 33 | const userDefinedEmail = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'; 34 | try { 35 | await regExp.match(evil, userDefinedEmail); 36 | // We should not get here at all, we should throw an timeout error 37 | assert(false); 38 | } catch (e) { 39 | assert(e.name === 'timeout'); 40 | } 41 | }); 42 | it('should run the problematic regular expression again on short input', async () => { 43 | const userDefinedEmail = 'AA'; 44 | const isValid = await regExp.match(evil, userDefinedEmail); 45 | assert(!isValid); 46 | }); 47 | it('should resolve four concurrent requests for a reasonable regular expression', async () => { 48 | const names = [ 'Bob', 'Jane', 'Sue', 'George' ]; 49 | const results = await Promise.all(names.map(name => regExp.match(/^\\w+$/, name))); 50 | assert(results.length === 4); 51 | assert(!results.find(result => !result)); 52 | }); 53 | }); 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # time-limited-regular-expressions 2 | 3 | [![CircleCI](https://circleci.com/gh/apostrophecms/time-limited-regular-expressions/tree/main.svg?style=svg)](https://circleci.com/gh/apostrophecms/time-limited-regular-expressions/tree/main) 4 | 5 | ## Why? 6 | 7 | You want to let end users enter their own regular expressions. But regular expressions can lead to [catastrophic backtracking](https://medium.com/@nitinpatel_20236/what-are-evil-regexes-7b21058c747e). This can take up hours of CPU time. In Node.js this means no other code can execute. It is a Denial of Service (DOS) attack vector, whether intentionally or by accident. 8 | 9 | This module lets you test regular expressions with a time limit to mitigate the pain. 10 | 11 | ## Usage 12 | 13 | ```javascript 14 | // Set a 1-second limit. Default is 0.25 seconds 15 | const regExp = require('time-limited-regular-expressions')({ limit: 1 }); 16 | 17 | // A common email address validator with potentially evil characteristics 18 | // (catastrophic backtracking) 19 | const evil = /^([a-zA-Z0-9])(([\-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$/; 20 | 21 | (async () => { 22 | // Run a potentially slow regular expression on short, matching input 23 | const realEmail = 'test@test.com'; 24 | const realEmailResult = await regExp.match(evil, realEmail); 25 | // Normal behavior, may be truthy or falsy according to match, 26 | // returns the same array result as regular regexp match() calls 27 | console.log(realEmailResult); 28 | // This input is long enough to trigger catastrophic backtracking and 29 | // could take hours to evaluate 30 | const evilEmail = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'; 31 | try { 32 | const evilEmailResult = await regExp.match(evil, evilEmail); 33 | // We will not get here, exception will be thrown 34 | } catch (e) { 35 | console.log(e.name); // Will be 'timeout' 36 | } 37 | })(); 38 | ``` 39 | 40 | ## Notes 41 | 42 | "Why is `match` an async function?" It runs in a separate process because that is the only way to avoid starving the Node.js application and implement a portable timeout on the regular expression. 43 | 44 | "How bad is the performance overhead?" Communication with a separate worker process makes it slower of course, but the process is reused by later calls, so the hit is not serious. 45 | 46 | Flags, for instance the `g` flag, are supported. 47 | 48 | You can pass the regular expression as a string, but regular expression literals (what you are used to typing) are easier to get right because you don't have to double-escape anything. 49 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const cp = require('child_process'); 2 | 3 | module.exports = ({ limit = 0.25 } = {}) => { 4 | let worker = null; 5 | let working = false; 6 | const queue = []; 7 | process.on('exit', cleanup); 8 | return { 9 | match(regExp, string) { 10 | let flags; 11 | regExp = regExp.toString(); 12 | // A regexp literal was typically passed and when it went through toString, 13 | // it became /regexp-goes-here/flags-go-here. Parse that into a form we 14 | // can feed to the RegExp constructor in the other process 15 | const matches = regExp.match(/^\/(.*?)\/([a-z]*)$/); 16 | if (matches) { 17 | regExp = matches[1]; 18 | flags = matches[2]; 19 | } 20 | return new Promise((resolve, reject) => { 21 | if (!worker) { 22 | worker = createWorker(); 23 | } 24 | queue.push({ 25 | regExp, 26 | flags, 27 | string, 28 | resolve, 29 | reject 30 | }); 31 | if (!working) { 32 | matchOneViaWorker(); 33 | } 34 | function createWorker() { 35 | const worker = cp.fork(`${__dirname}/worker.js`, { 36 | stdio: 'ignore' 37 | }); 38 | // So the parent process can exit due to a lack of work to do, 39 | // without explicitly closing the child 40 | worker.unref(); 41 | worker.channel.unref(); 42 | return worker; 43 | } 44 | function matchOneViaWorker() { 45 | let settled = false; 46 | if (!queue.length) { 47 | return; 48 | } 49 | working = true; 50 | const { 51 | regExp, string, resolve, reject 52 | } = queue.shift(); 53 | worker.once('message', receive); 54 | const timeout = setTimeout(function() { 55 | if (!settled) { 56 | worker.kill(); 57 | worker = createWorker(); 58 | const error = new Error(`A user-supplied regular expression took more than ${limit} seconds to evaluate.`); 59 | error.name = 'timeout'; 60 | reject(error); 61 | settled = true; 62 | working = false; 63 | matchOneViaWorker(); 64 | } 65 | }, limit * 1000); 66 | worker.send({ 67 | regExp, 68 | flags, 69 | string 70 | }); 71 | function receive(message) { 72 | clearTimeout(timeout); 73 | if (!settled) { 74 | settled = true; 75 | working = false; 76 | resolve(message.result); 77 | matchOneViaWorker(); 78 | } 79 | } 80 | } 81 | }); 82 | } 83 | }; 84 | function cleanup() { 85 | if (worker) { 86 | worker.kill(); 87 | } 88 | } 89 | }; 90 | --------------------------------------------------------------------------------