├── .appveyor.yml
├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── CONTRIBUTING.md
├── Cargo.toml
├── README.md
├── assets
    ├── arc_lookbehind.svg
    ├── lookBehind.js
    ├── look_behind.svg
    └── special_punct.svg
├── benches
    ├── chars_vs_jsbuffer.rs
    ├── major_libs.rs
    └── ref_perf_vs.rs
├── code_of_conduct.md
├── codecov.yml
├── examples
    ├── clear-comments
    │   └── src
    │   │   └── main.rs
    ├── count_tokens.rs
    ├── find_regexes.rs
    ├── instruments
    │   ├── bools.rs
    │   ├── comments.rs
    │   ├── idents.rs
    │   ├── keywords.rs
    │   ├── null.rs
    │   ├── numbers.rs
    │   ├── puncts.rs
    │   ├── regexes.rs
    │   ├── strings.rs
    │   └── templates.rs
    ├── major_libs
    │   └── src
    │   │   └── main.rs
    ├── semi_finder
    │   └── src
    │   │   └── main.rs
    ├── tokenize.rs
    └── tokens.js
├── license.txt
├── package.json
├── proptest-regressions
    ├── comments.txt
    ├── keywords.txt
    ├── numeric.txt
    ├── punct.txt
    ├── regex.txt
    └── strings.txt
├── regex.md
├── rustfmt.toml
├── src
    ├── error.rs
    ├── lib.rs
    ├── look_behind.rs
    ├── manual_scanner.rs
    ├── tokenizer
    │   ├── buffer.rs
    │   ├── keyword_trie.rs
    │   ├── mod.rs
    │   ├── tokens.rs
    │   └── unicode.rs
    └── tokens
    │   ├── boolean.rs
    │   ├── comment.rs
    │   ├── ident.rs
    │   ├── keyword.rs
    │   ├── mod.rs
    │   ├── number.rs
    │   ├── regex.rs
    │   ├── string.rs
    │   └── template.rs
└── tests
    ├── ecma262
        ├── es2015m.rs
        ├── es2015s.rs
        ├── es5.rs
        └── main.rs
    ├── moz_central
        └── main.rs
    ├── prop
        └── main.rs
    ├── proptest-regressions
        └── main.txt
    ├── readme
        ├── index.js
        └── main.rs
    └── snippets
        └── main.rs


/.appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   global:
 3 |     RUSTFLAGS: -Zunstable-options -Ctarget-feature=+crt-static
 4 |     RUST_BACKTRACE: 1
 5 |     CARGO_INCREMENTAL: 0   # should turn this back on when fixed!
 6 |   matrix:
 7 |     - TARGET: x86_64-pc-windows-msvc
 8 | 
 9 | install:
10 |   - ps: Install-Product node 10
11 |   - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
12 |   - rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly
13 |   - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
14 |   - rustc -V
15 |   - cargo -V
16 | 
17 | build: false
18 | 
19 | test_script:
20 |   - npm i
21 |   - cargo test
22 |   - cargo run --example major_libs --release
23 | 
24 | branches:
25 |   only:
26 |     - master


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v1
12 |     - name: Setup Node.js for use with actions
13 |       uses: actions/setup-node@v2.1.5
14 |     - name: install js test libs from npm
15 |       run: npm install
16 |     - name: Build
17 |       run: cargo build
18 |     - name: get moz_central files
19 |       run: curl https://hg.mozilla.org/mozilla-central/archive/tip.zip/js/src/jit-test/tests/ --output moz_central.zip
20 |     - name: unzip moz_central
21 |       run: unzip -qq moz_central -d moz_central
22 |     - name: Run tests
23 |       run: cargo test --features moz_central
24 |       if: success()
25 |     - name: Run Major Libs example
26 |       run: cargo run --example major_libs
27 |       if: success()
28 |     - name: Check syntax
29 |       run: cargo fmt --all -- --check
30 |       if: success()
31 |     - name: Get tarpaulin install script
32 |       run: cargo install cargo-tarpaulin
33 |       if: success()
34 |     - name: Run tarpaulin and upload to CodeCov.io
35 |       run: cargo tarpaulin --out Xml && bash <(curl -s https://codecov.io/bash)
36 |       env: 
37 |         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
38 |         RUST_LOG: trace
39 |       if: success()
40 |     - name: Cache node_modules
41 |       uses: actions/cache@v1.0.3
42 |       with:
43 |         path: ./node_modules
44 |         key: ${{ runner.os }}.node_modules
45 |     - name: before cargo cache
46 |       run: rm -rf ~/.cargo/registry
47 |     - name: Cache cargo directory
48 |       uses: actions/cache@v2.1.4
49 |       with:
50 |         key: ${{ runner.os }}.cargo
51 |         path: ~/.cargo
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | /target
 3 | **/*.rs.bk
 4 | **/.DS_Store
 5 | **/node_modules
 6 | Cargo.lock
 7 | *.log
 8 | /*.js
 9 | package-lock.json
10 | moz_central
11 | moz-central
12 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to RESS
 2 | 
 3 | If you are interested in contributing to RESS know that your help would be appreciated!
 4 | 
 5 | Feel free to open issues and/or pull requests for anything that you see that might be an improvement.
 6 | Please note that [ressa](https://github.com/freemasen/ressa) and [resast](https://github.com/freemasen/resast) may already have an issue opened.
 7 | 
 8 | I do not work on this full time, please be patient if I am not able to respond quickly.
 9 | 
10 | The primary development branch is the `next` branch. It would be ideal to create any pull requests against that branch over `master` or one of the other feature branches that might have been missed when cleaning up.
11 | 
12 | For any PRs know that the code must pass ci tests before they will be reviewed/merged. These test include the following commands you could use to check your version.
13 | ```sh
14 | $ npm i
15 | $ cargo test
16 | $ cargo run --example major_libs
17 | ```
18 | The release flag in the above is due to the fact that this example is a naive benchmark to validate that changes haven't completely ruined the performance. Feel free to leave this flag off when you are testing for a PR.
19 | 
20 | This will run all of the project's unit tests as well as a test against some major js libraries, namely [Angular-js](angularjs.org), [Jquery](jquery.com), [React/React-Dom](reactjs.org), [Vue](vuejs.org), [Moment.js](momentjs.com) and [Dexie](dexie.org).
21 | 
22 | If you are interested in becoming a maintainer send me an email and we can talk more about what that looks like.
23 | 
24 | 
25 | # Getting Started
26 | There are a few things you might need to know to get started. First, the tests and benchmarks require that `npm` is installed to pull down the javascript they evaluate so you'll need [node.js](https://nodejs.org/en/) installed. 
27 | 
28 | Because the benchmarks use Criterion, it can be difficult to use them with profiling so each of the single token benchmarks is extracted out as an example (you can find these in the examples/instruments folder). For the major_libs benchmark, you can use the example with the same name. These are helpful for working with tools like [`cargo instruments`](https://crates.io/crates/cargo-instruments).
29 | 
30 | The overall code layout works like this.
31 | 
32 | - lib.rs
33 |   - `Scanner`: The primary interface for this crate
34 |     - Mostly this is a wrapper around Tokenizer that handles detecting regexes and calculating line/column numbers
35 |   - `ScannerState`: This is used for caching the state and resetting it. See the `Scanner::get_state` and `Scanner::set_state` methods
36 | - erros.rs
37 |   - This is where the error structs live. If you add a new error type to the `Tokenizer` you will need to add a Into/From implementation here
38 | - look_behnid: 
39 |   - `LookBehind`: This is ring like structure that is used to keep the look behind tokens.
40 |     - For regex detection we only care are the last token we have seen and the three toknes before an open parentheses, so the Scanner keeps two of these on hand.
41 |     - The basic idea here is to just use a 3 element array and keep track of where we last put an element to be able to calculate which is `last`, `two` or `three`. 
42 |   - `MetaToken`: a cheaper token variant which only holds the bare minimum of information for regex detection
43 | - tokenizer
44 |   - mod.rs
45 |     - `RawItem`: a cheaper version of the `Item` struct from above, it has only as much information as the `Tokenizer` can determine; a `RawToken` and the byte index of the start and end.
46 |     - `Tokenizer`: This is the primary export of this module. This struct will perform the actual seperation and classification of tokens
47 |     - One note about the matching logic, matching on the length of a byte array or string a bunch of times with an if clause is cheaper than matching on the strings directly. Until [phf](https://github.com/sfackler/rust-phf) can handle byte slices, this is the fastest method available
48 |   - bufer.rs
49 |     - `JSBuffer`: Mostly a reimplementation of [std::Chars](https://doc.rust-lang.org/std/str/struct.Chars.html)
50 |       - For most look_ahead operations there is `look_ahead_matches` which takes a byte slice, however if you are looking for a single byte character the `look_ahead_byte_matche` is slightly faster
51 |       - `at_new_line` the `cmp` operation on u8 is faster than matching or `eq` so checking if something is smaller than a target is faster than doing bounds checks between `||`s
52 |     - tokens.rs
53 |       - `RawToken`: This is a token more tailored to directing the Scanner about how to construct a `tokens::Token`
54 |         - The three cases that can have new lines carry some extra information with them, the `new_line_count` and the `last_len` (length of the last line)
55 |       - `CommentKind`: empty version of `tokens::Comment`
56 |       - `StingKind`: empty version of `tokens::StringLit`
57 |       - `TemplateKind`: empty version of `tokens::Template`
58 |   - unicode.rs
59 |     - bounds checks on `char`s is more effective than binary search (which the two unicode implemtations I could find use) so these function bodies are generated using the approprate table
60 |     - The generation code may become available in the future but right now it isn't very effective
61 |     - `is_ident_start`: check if a `char` has the attribute of ident_start
62 |     - `is_id_continue`: check if a `char` has the attribute of ident_continue
63 |     - `is_other_whitesapce`: the ECMA spec says that any Zs category character is valid whitespace. This function will test any exotic whitespaces 
64 | 
65 | # Testing
66 | There are a few sets of JavaScript files that are required to run the tests in this repository. The first set can be easily aquired by running `npm install` in the root of this project. An additional test is also available behind a feature flag `moz_central` that requires the JIT Test files from the FireFox repository, the expectation is that these will exist in the folder `moz-central` in the root of this project. To get these files you can either manually download and unzip them by following [this link](https://hg.mozilla.org/mozilla-central/archive/tip.zip/js/src/jit-test/tests/) or you can execute the following command.
67 | 
68 | ```sh
69 | curl https://hg.mozilla.org/mozilla-central/archive/tip.zip/js/src/jit-test/tests/ --output moz-central.zip
70 | unzip -q moz-central.zip -d moz-central
71 | ```
72 | 
73 | To run these tests simple execute the following command.
74 | 
75 | ```sh
76 | cargo test --features moz_central -- moz_central
77 | ```


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ress"
 3 | version = "0.11.7"
 4 | authors = ["Robert Masen <r.f.masen@gmail.com>"]
 5 | description = "A scanner/tokenizer for JS files"
 6 | keywords = ["JavaScript", "parsing", "JS", "ES", "ECMA"]
 7 | categories = ["parsing", "text-processing", "web-programming"]
 8 | license = "MIT"
 9 | repository = "https://github.com/rusty-ecma/ress"
10 | readme = "./README.md"
11 | edition = "2018"
12 | 
13 | [dependencies]
14 | log = "0.4"
15 | unicode-xid = "0.2"
16 | 
17 | [dev-dependencies]
18 | walkdir = "2"
19 | docopt = "1"
20 | serde = "1"
21 | serde_derive = "1"
22 | proptest = "0.10"
23 | pretty_env_logger = "0.4"
24 | regex_generate = "0.2"
25 | criterion = "0.3"
26 | lazy_static = "1"
27 | res-regex = "0.1"
28 | 
29 | [features]
30 | default = []
31 | moz_central = []
32 | 
33 | [[example]]
34 | name = "major_libs"
35 | path = "examples/major_libs/src/main.rs"
36 | 
37 | [[example]]
38 | name = "clear-comments"
39 | path = "examples/clear-comments/src/main.rs"
40 | 
41 | [[example]]
42 | name = "semi_finder"
43 | path = "examples/semi_finder/src/main.rs"
44 | 
45 | #instrument's examples
46 | [[example]]
47 | name = "keywords"
48 | path = "examples/instruments/keywords.rs"
49 | [[example]]
50 | name = "puncts"
51 | path = "examples/instruments/puncts.rs"
52 | [[example]]
53 | name = "idents"
54 | path = "examples/instruments/idents.rs"
55 | [[example]]
56 | name = "strings"
57 | path = "examples/instruments/strings.rs"
58 | [[example]]
59 | name = "templates"
60 | path = "examples/instruments/templates.rs"
61 | [[example]]
62 | name = "regexes"
63 | path = "examples/instruments/regexes.rs"
64 | [[example]]
65 | name = "numbers"
66 | path = "examples/instruments/numbers.rs"
67 | [[example]]
68 | name = "bools"
69 | path = "examples/instruments/bools.rs"
70 | [[example]]
71 | name = "null"
72 | path = "examples/instruments/null.rs"
73 | [[example]]
74 | name = "comments"
75 | path = "examples/instruments/comments.rs"
76 | 
77 | [[bench]]
78 | name = "major_libs"
79 | harness = false
80 | 
81 | [[bench]]
82 | name = "ref_perf_vs"
83 | harness = false
84 | 
85 | [[bench]]
86 | name = "chars_vs_jsbuffer"
87 | harness = false
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RESS
  2 | 
  3 | > Rusty EcmaScript Scanner
  4 | 
  5 | [![Github Actions](https://img.shields.io/github/workflow/status/rusty-ecma/RESS/Rust)](https://travis-ci.org/FreeMasen/RESS)
  6 | [![crates.io](https://img.shields.io/crates/v/ress.svg)](https://crates.io/crates/ress)
  7 | [![last commit master](https://img.shields.io/github/last-commit/FreeMasen/RESS.svg)](https://github.com/FreeMasen/RESS/commits/master)
  8 | 
  9 | A scanner/tokenizer for JS written in Rust
 10 | 
 11 | ## Usage
 12 | 
 13 | The primary way to interact with ress is through the `Scanner` struct which implements `Iterator` over the `Item` struct. `Item` has three fields `token` for the `Token` found, `span` which represents the start and end of the byte position in the original string and `location` which represents start and end character position with a line and column. It's definition looks like this.
 14 | 
 15 | ```rust
 16 | Item {
 17 |     token: Token::Punct(Punct::Bang),
 18 |     span: Span {
 19 |         start: 0,
 20 |         end: 1,
 21 |     },
 22 |     location: SourceLocation {
 23 |         start: Position {
 24 |             line: 1,
 25 |             column: 1,
 26 |         },
 27 |         end: Position {
 28 |             line: 1,
 29 |             column: 2,
 30 |         }
 31 |     }
 32 | }
 33 | ```
 34 | 
 35 | Note: the EcmaScript spec allows for 4 new line characters, only two of which are normally rendered by modern text editors the location line numbers will count these un-rendered lines.
 36 | 
 37 | Here is an example that will check some JS text for the existence of a semicolon and panics if one
 38 | is found.
 39 | 
 40 | ```rust
 41 | use ress::Scanner;
 42 | 
 43 | static JS: &str = include_str!("index.js");
 44 | 
 45 | fn main() {
 46 |     let s = Scanner::new(JS);
 47 |     for item in s {
 48 |         let token = item.unwrap().token;
 49 |         if token.matches_punct_str(";") {
 50 |             panic!("A semi-colon!? Heathen!");
 51 |         }
 52 |     }
 53 |     println!("Good show! Why use something that's optional?")
 54 | }
 55 | ```
 56 | 
 57 | By far the most important part of `Item` is the `Token` enum, which will represent the 11 different types of token's supported by the [ECMAScript specification](https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar).
 58 | 
 59 | In Javascript [it is hard to know if a forward slash means divide or is the start of a regular expression](https://github.com/rusty-ecma/RESS/blob/master/regex.md).
 60 | The above `Scanner` will detect RegEx automatically by keeping track of the previously
 61 | parsed tokens, this makes things very convenient, however if you are parsing Javascript
 62 | into an AST, you likely already need to keep track of the same information. In that
 63 | case, you may not want to pay the performance cost of that automatic RegEx detection,
 64 |  you would want to reach for the `ManualScanner`. Instead of exposing
 65 | the basic `Iterator` interface, it exposes two primary methods for driving the scanner
 66 | `next_token` and `next_regex`. The first of those will always return a `/` or `/=` when
 67 | encountering a regular expression, the latter will fail if the next token isn't
 68 | a regular expression.
 69 | 
 70 | ```rust
 71 | use ress::{ManualScanner, prelude::*};
 72 | 
 73 | fn main() {
 74 |     let mut s = ManualScanner::new("let x = /[a-z]+/g");
 75 |     while let Some(Ok(item)) = s.next_token() {
 76 |         if item.token.matches_punct(Punct::ForwardSlash)
 77 |         || item.token.matches_punct(Punct::ForwardSlashEqual) {
 78 |             // it could be a 1 or 2 length prefix
 79 |             let regex = s.next_regex(1).unwrap().unwrap();
 80 |             println!("{:?}", regex);
 81 |         } else {
 82 |             println!("{:?}", item);
 83 |         }
 84 |     }
 85 | }
 86 | ```
 87 | 
 88 | ### ES Tokens
 89 | 
 90 | - Boolean Literal
 91 | - End of File
 92 | - Identifier
 93 | - Keyword
 94 | - Null Literal
 95 | - Numeric Literal
 96 | - Punctuation
 97 | - String Literal
 98 | - Regular Expression Literal
 99 | - Template String
100 | - Comment
101 | 
102 | Keep in mind that keywords have been moving around a lot in JS between ES3 through ES2019 so you might find some items parsed as keywords in the ES2019 context that are not in the ES3 context, this should be dealt with at a higher level. A good example of this is `yield` which is sometimes a keyword and sometimes an identifier, this package will always parse this as a Keyword. As of the writing of this readme `ress` supports all tokens in the [Stage 2 and Stage 3 ECMAScript Proposals](https://github.com/tc39/proposals) with the exception of the `#!` comments and number seperators.
103 | 
104 | For each of the token cases there is either a struct or enum to provide additional information with the exception of `NullLiteral` and `EoF` which should be self explanatory. The more complicated items do implement `ToString` which should get you back to the original js text for that token. The `Token` enum also provides a number of helper functions for building that picture without pulling the inner data our of the enum. Using the `Punct` case as an example the helper functions look like this.
105 | 
106 | ```rust
107 | fn is_punct(&self) -> bool;
108 | fn matches_punct(&self, p: Punct) -> bool;
109 | fn matches_punct_str(&self, s: &str) -> bool;
110 | ```
111 | 
112 | A similar set of functions are available for each case.
113 | 
114 | Like all `Iterators` the `Scanner` has a `next` method, It also has a `look_ahead` method that will allow you to parse the next value without advancing. Using this method can be a convenient way to get the next token without performing a mutable borrow, however you will be incurring the cost of parsing that token twice. All `Iterators` can be converted into a `Peekable` Iterator with a `peek` method, this will allow you to look ahead while only paying the cost once however `peek` performs a mutable borrow which means it needs to be in a different scope than a call to `next`.
115 | 
116 | ```rust
117 | // look_ahead
118 | let js = "function() { return; }";
119 | let mut s = Scanner::new(js);
120 | let current = s.next();
121 | let next = s.look_ahead();
122 | let new_current = s.next();
123 | assert_eq!(next, new_current);
124 | // peekable (fails to compile)
125 | let p = Scanner::new(js).peekable();
126 | let current = s.next(); // <-- first mutable borrow
127 | let next = p.peek(); // <-- second mutable borrow
128 | ```
129 | 
130 | For more intense lookahead scenarios `Scanner` makes available the `get_state` and `set_state` methods. These methods will allow you to capture a snapshot of the current position and any context, and then later reset to that position and context.
131 | 
132 | ```rust
133 | let js = "function() {
134 |     return 0;
135 | };";
136 | let mut s = Scanner::new(js);
137 | let start = s.get_state();
138 | assert_eq!(s.next().unwrap().unwrap().token, Token::Keyword(Keyword::Function));
139 | assert_eq!(s.next().unwrap().unwrap().token, Token::Punct(Punct::OpenParen));
140 | assert_eq!(s.next().unwrap().unwrap().token, Token::Punct(Punct::CloseParen));
141 | s.set_state(start);
142 | assert_eq!(s.next().unwrap().unwrap().token, Token::Keyword(Keyword::Function));
143 | ```
144 | 
145 | ## Why?
146 | 
147 | Wouldn't it be nice to write new JS development tools in Rust? The [clear-comments](https://github.com/FreeMasen/RESS/blob/master/examples/clear-comments/src/main.rs) example is a proof of concept on how you might use this crate to do just that. This example will take in a JS file and output a version with all of the comments removed. An example of how you might see it in action is below (assuming you have a file called in.js in the project root).
148 | 
149 | ```sh
150 | cargo run --example clear-comments -- ./in.js ./out.js
151 | ```
152 | 
153 | ## Performance
154 | 
155 | The below stats are from running `cargo +nightly bench` on a MBP (2.9 GHz i9-8850H & 16bg RAM).
156 | 
157 | | Lib         | Size     | Time      | +/-        |
158 | | ----------- | -------- | --------- | ---------- |
159 | | Angular 1.5 |   1.16mb | 18.991 ms |   4.393 ms |
160 | | jquery      | 271.75kb |  7.218 ms | 577.236 μs |
161 | | React       |  59.09kb |  1.976 ms | 116.139 μs |
162 | | React-dom   | 641.51kb | 16.880 ms |   3.614 ms |
163 | | Vue         | 289.30kb |  9.675 ms |   1.402 ms |
164 | 
165 | If you are interested in getting an idea about performance without waiting for `cargo bench` to complete you can run the following command.
166 | 
167 | ```sh
168 | cargo run --example major_libs
169 | ```
170 | 
171 | ## Contributing
172 | 
173 | [see contributing.md](https://github.com/FreeMasen/RESS/blob/master/CONTRIBUTING.md)
174 | 


--------------------------------------------------------------------------------
/assets/lookBehind.js:
--------------------------------------------------------------------------------
 1 | var runLookBehindAnimation = (function () {
 2 |     /**
 3 |      * Flag to avoid running more
 4 |      * than once
 5 |      */
 6 |     let running = false;
 7 |     /**
 8 |      * Set the arrow's fill to "black" at the 
 9 |      * provided index
10 |      * @param {number} current index
11 |      */
12 |     function setBlack(idx) {
13 |         if (idx < 0 || idx > 11) {
14 |             return;
15 |         }
16 |         const arrow = document.getElementById(`index-${idx}`);
17 |         arrow.style.fill = 'black';
18 |     }
19 |     /**
20 |      * Set the arrow's fill to "none" at the 
21 |      * provided index
22 |      * @param {number} idx currentIndex
23 |      */
24 |     function setNone(idx) {
25 |         if (idx < 0 || idx > 11) {
26 |             return;
27 |         }
28 |         const arrow = document.getElementById(`index-${idx}`);
29 |         arrow.style.fill = 'none';
30 |     }
31 |     /**
32 |      * Perform the fill setting correctly 
33 |      * - the last 3 are "black"
34 |      * - all others are "none"
35 |      * @param {number} idx current token index
36 |      */
37 |     function updateArrowColors(idx) {
38 |         for (let i = 0; i < 11; i++) {
39 |             if (i < idx - 2 || i > idx) {
40 |                 setNone(i);
41 |             } else {
42 |                 setBlack(i);
43 |             }
44 |         }
45 |     }
46 |     /**
47 |      * Set all arrow's fill to "none"
48 |      */
49 |     function clearAll() {
50 |         for (let i = 0; i < 11; i++) {
51 |             setNone(i);
52 |         }
53 |         running = false;
54 |     }
55 |     /**
56 |      * Perform one step in the animation
57 |      * 
58 |      * Calling this once will start an async loop
59 |      * for 10 counts finally clearing all arrows
60 |      * @param {number} idx Current iteration count
61 |      */
62 |     function oneTick(idx) {
63 |         if (!idx) idx = 0;
64 |         if (idx > 10) {
65 |             return clearAll();
66 |         }
67 |         updateArrowColors(idx);
68 |         setTimeout(run, 1000, idx + 1)
69 |     }
70 |     /**
71 |      * Exported member, starts the async loop
72 |      * but checks if we are already running
73 |      * and short-circuits if we are
74 |      */
75 |     return function run() {
76 |         if (running) {
77 |             return;
78 |         }
79 |         running = true;
80 |         oneTick();
81 |     }
82 | })();


--------------------------------------------------------------------------------
/benches/chars_vs_jsbuffer.rs:
--------------------------------------------------------------------------------
 1 | #![cfg(test)]
 2 | extern crate ress;
 3 | 
 4 | #[macro_use]
 5 | extern crate criterion;
 6 | 
 7 | use criterion::black_box;
 8 | use criterion::Criterion;
 9 | 
10 | fn ascii_string() -> String {
11 |     string_from_range(0..256)
12 | }
13 | fn non_ascii_string() -> String {
14 |     string_from_range(0x7FF..0x110000)
15 | }
16 | fn string_from_range(r: std::ops::Range<u32>) -> String {
17 |     let mut ret = String::new();
18 |     for i in r {
19 |         if let Some(ch) = std::char::from_u32(i) {
20 |             ret.push(ch);
21 |         }
22 |     }
23 |     ret
24 | }
25 | fn chars_ascii_chars(c: &mut Criterion) {
26 |     let s = ascii_string();
27 |     chars(c, &s, "chars_ascii_chars");
28 | }
29 | fn chars_non_ascii_chars(c: &mut Criterion) {
30 |     let mut s = non_ascii_string();
31 |     chars(c, &s, "chars_non_ascii_chars");
32 | }
33 | fn jsb_ascii_chars(c: &mut Criterion) {
34 |     let s = ascii_string();
35 |     js_buffer(c, &s, "jsb_ascii_chars");
36 | }
37 | fn jsb_non_ascii_chars(c: &mut Criterion) {
38 |     let s = non_ascii_string();
39 |     js_buffer(c, &s, "jsb_non_ascii_chars")
40 | }
41 | fn chars(c: &mut Criterion, s: &str, name: &str) {
42 |     c.bench_function(name, |b| {
43 |         b.iter(|| {
44 |             let mut chs = s.chars();
45 |             while let Some(ch) = chs.next() {
46 |                 black_box(ch);
47 |             }
48 |         });
49 |     });
50 | }
51 | fn js_buffer(c: &mut Criterion, s: &str, name: &str) {
52 |     c.bench_function(name, |b| {
53 |         b.iter(|| {
54 |             let mut chs = ress::JSBuffer::new(s.as_bytes());
55 |             while let Some(ch) = chs.next_char() {
56 |                 black_box(ch);
57 |             }
58 |         });
59 |     });
60 | }
61 | 
62 | criterion_group!(
63 |     benches,
64 |     chars_ascii_chars,
65 |     chars_non_ascii_chars,
66 |     jsb_ascii_chars,
67 |     jsb_non_ascii_chars,
68 | );
69 | criterion_main!(benches);
70 | 


--------------------------------------------------------------------------------
/benches/major_libs.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(test)]
  2 | extern crate ress;
  3 | 
  4 | #[macro_use]
  5 | extern crate criterion;
  6 | 
  7 | use criterion::black_box;
  8 | use criterion::Criterion;
  9 | 
 10 | use ress::Scanner;
 11 | use std::fs::read_to_string;
 12 | use std::path::PathBuf;
 13 | 
 14 | fn angular(c: &mut Criterion) {
 15 |     run_bench(c, Lib::Angular, "angular", false);
 16 | }
 17 | 
 18 | fn angular_min(c: &mut Criterion) {
 19 |     run_bench(c, Lib::Angular, "angular_min", true);
 20 | }
 21 | 
 22 | fn jq(c: &mut Criterion) {
 23 |     run_bench(c, Lib::Jquery, "jq", false);
 24 | }
 25 | 
 26 | fn jq_min(c: &mut Criterion) {
 27 |     run_bench(c, Lib::Jquery, "jq_min", true);
 28 | }
 29 | 
 30 | fn react(c: &mut Criterion) {
 31 |     run_bench(c, Lib::React, "react", false);
 32 | }
 33 | 
 34 | fn react_min(c: &mut Criterion) {
 35 |     run_bench(c, Lib::React, "react_min", true);
 36 | }
 37 | 
 38 | fn react_dom(c: &mut Criterion) {
 39 |     run_bench(c, Lib::ReactDom, "react_dom", false);
 40 | }
 41 | 
 42 | fn react_dom_min(c: &mut Criterion) {
 43 |     run_bench(c, Lib::ReactDom, "react_dom_min", true);
 44 | }
 45 | 
 46 | fn vue(c: &mut Criterion) {
 47 |     run_bench(c, Lib::Vue, "vue", false);
 48 | }
 49 | 
 50 | fn vue_min(c: &mut Criterion) {
 51 |     run_bench(c, Lib::Vue, "vue_min", true);
 52 | }
 53 | 
 54 | fn everything_es5(c: &mut Criterion) {
 55 |     run_bench(c, Lib::EveryEs5, "everything_es5", false);
 56 | }
 57 | 
 58 | fn everything_es2015_s(c: &mut Criterion) {
 59 |     run_bench(c, Lib::EveryEs2015Script, "everything_es2015_s", false);
 60 | }
 61 | 
 62 | fn everything_es2015_m(c: &mut Criterion) {
 63 |     run_bench(c, Lib::EveryEs2015Mod, "everything_es2015_m", false);
 64 | }
 65 | 
 66 | enum Lib {
 67 |     Jquery,
 68 |     Angular,
 69 |     React,
 70 |     ReactDom,
 71 |     Vue,
 72 |     EveryEs5,
 73 |     EveryEs2015Script,
 74 |     EveryEs2015Mod,
 75 | }
 76 | 
 77 | fn get_js(l: Lib) -> Result<String, ::std::io::Error> {
 78 |     let path = PathBuf::from(l.path());
 79 |     if !path.exists() {
 80 |         npm_install();
 81 |         if !path.exists() {
 82 |             panic!("npm install failed to make {} available", path.display());
 83 |         }
 84 |     }
 85 |     read_to_string(path)
 86 | }
 87 | 
 88 | fn get_min_js(l: Lib) -> Result<String, ::std::io::Error> {
 89 |     let path = PathBuf::from(l.min_path());
 90 |     if !path.exists() {
 91 |         npm_install();
 92 |         if !path.exists() {
 93 |             panic!("npm install failed to make {} available", path.display());
 94 |         }
 95 |     }
 96 |     read_to_string(path)
 97 | }
 98 | 
 99 | impl Lib {
100 |     pub fn path(&self) -> String {
101 |         match self {
102 |             Lib::Jquery => "node_modules/jquery/dist/jquery.js",
103 |             Lib::Angular => "node_modules/angular/angular.js",
104 |             Lib::React => "node_modules/react/umd/react.development.js",
105 |             Lib::ReactDom => "node_modules/react-dom/umd/react-dom.development.js",
106 |             Lib::Vue => "node_modules/vue/dist/vue.js",
107 |             Lib::EveryEs5 => "node_modules/everything.js/es5.js",
108 |             Lib::EveryEs2015Script => "node_modules/everything.js/es2015-script.js",
109 |             Lib::EveryEs2015Mod => "node_modules/everything.js/es2015-module.js",
110 |         }
111 |         .into()
112 |     }
113 | 
114 |     pub fn min_path(&self) -> String {
115 |         match self {
116 |             &Lib::Jquery => "node_modules/jquery/dist/jquery.min.js".into(),
117 |             &Lib::Angular => "node_modules/angular/angular.min.js".into(),
118 |             &Lib::React => "node_modules/react/umd/react.production.min.js".into(),
119 |             &Lib::ReactDom => "node_modules/react-dom/umd/react-dom.production.min.js".into(),
120 |             &Lib::Vue => "node_modules/vue/dist/vue.min.js".into(),
121 |             _ => String::new(),
122 |         }
123 |     }
124 | }
125 | 
126 | fn npm_install() {
127 |     eprintln!("Downloading required js dependencies");
128 |     let mut c = ::std::process::Command::new("npm");
129 |     c.arg("i");
130 |     let out = c.output().expect("Failed to read output from npm");
131 |     if !out.status.success() {
132 |         panic!(
133 |             "{}",
134 |             format!(
135 |                 "Failed to run npm i\n{:?}",
136 |                 String::from_utf8_lossy(&out.stderr)
137 |             )
138 |         );
139 |     }
140 | }
141 | 
142 | #[inline(always)]
143 | fn run_bench(c: &mut Criterion, lib: Lib, name: &str, min: bool) {
144 |     let js = if min {
145 |         get_min_js(lib).unwrap()
146 |     } else {
147 |         get_js(lib).unwrap()
148 |     };
149 |     run_bench_(c, &js, name)
150 | }
151 | 
152 | #[inline(always)]
153 | fn run_bench_(c: &mut Criterion, js: &str, name: &str) {
154 |     let mut group = c.benchmark_group(name);
155 |     group.throughput(criterion::Throughput::Bytes(js.len() as u64));
156 |     group.bench_function(name, |b| {
157 |         b.iter(|| {
158 |             for i in Scanner::new(&js) {
159 |                 black_box(i.unwrap());
160 |             }
161 |         })
162 |     });
163 |     group.finish();
164 | }
165 | 
166 | criterion_group!(
167 |     benches,
168 |     angular,
169 |     angular_min,
170 |     jq,
171 |     jq_min,
172 |     react,
173 |     react_min,
174 |     react_dom,
175 |     react_dom_min,
176 |     vue,
177 |     vue_min,
178 |     everything_es5,
179 |     everything_es2015_s,
180 |     everything_es2015_m
181 | );
182 | criterion_main!(benches);
183 | 


--------------------------------------------------------------------------------
/benches/ref_perf_vs.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(test)]
  2 | extern crate ress;
  3 | #[macro_use]
  4 | extern crate lazy_static;
  5 | #[macro_use]
  6 | extern crate criterion;
  7 | 
  8 | use criterion::{black_box, Criterion};
  9 | use ress::{Scanner, Tokenizer};
 10 | 
 11 | static KEYWORDS: &[&str] = &[
 12 |     "implements",
 13 |     "interface",
 14 |     "package",
 15 |     "private",
 16 |     "protected",
 17 |     "public",
 18 |     "static",
 19 |     "yield",
 20 |     "let",
 21 |     "enum",
 22 |     "export",
 23 |     "import",
 24 |     "super",
 25 |     "break",
 26 |     "case",
 27 |     "catch",
 28 |     "continue",
 29 |     "debugger",
 30 |     "default",
 31 |     "delete",
 32 |     "do",
 33 |     "else",
 34 |     "finally",
 35 |     "for",
 36 |     "function",
 37 |     "if",
 38 |     "instanceof",
 39 |     "in",
 40 |     "new",
 41 |     "return",
 42 |     "switch",
 43 |     "this",
 44 |     "throw",
 45 |     "try",
 46 |     "typeof",
 47 |     "var",
 48 |     "void",
 49 |     "while",
 50 |     "with",
 51 | ];
 52 | static PUNCTS: &[&str] = &[
 53 |     "{", "}", "(", ")", ".", ";", ",", "[", "]", ":", "?", "~", ">", "<", "=", "!", "+", "-", "/",
 54 |     "*", "%", "&", "|", "^", "#", "@", ">>>=", "...", "===", "!==", ">>>", "<<=", ">>=", "**=",
 55 |     "&&", "||", "==", "!=", "+=", "-=", "*=", "/=", "++", "--", "<<", ">>", "&=", "|=", "^=", "%=",
 56 |     "<=", ">=", "=>", "**",
 57 | ];
 58 | 
 59 | static STRINGS: &[&str] = &[
 60 |     r#""things and stuff""#,
 61 |     r#"'people and places'"#,
 62 |     r#""with and escaped \"""#,
 63 |     r#"'another escaped \''"#,
 64 |     r#""with a new \
 65 | line""#,
 66 |     r#"'another new line \
 67 | hahaha'"#,
 68 |     "\"sequence double quoted\\\r\nis hard\"",
 69 |     "'new line sequence\\\r\nmight be harder'",
 70 | ];
 71 | 
 72 | static COMMENTS: &[&str] = &[
 73 |     "//this is a comment",
 74 |     "/*this is a
 75 | multi-line comment*/",
 76 |     "<!-- This is an HTML comment -->",
 77 |     "<!-- This is an HTML comment --> with a trailer",
 78 | ];
 79 | 
 80 | static NUMBERS: &[&str] = &[
 81 |     "0",
 82 |     "00",
 83 |     "1234567890",
 84 |     "01234567",
 85 |     "0.",
 86 |     "0.00",
 87 |     "10.00",
 88 |     ".0",
 89 |     "0e0",
 90 |     "0E0",
 91 |     "0.e0",
 92 |     "0.00e+0",
 93 |     ".00e-0",
 94 |     "0x0",
 95 |     "0X0",
 96 |     "0x0123456789abcdefABCDEF",
 97 |     "0b0",
 98 |     "0b0100101",
 99 |     "0o0",
100 |     "0o01234567",
101 |     "2e308",
102 | ];
103 | static REGEX: &[&str] = &[
104 |     r#"x/"#,
105 |     r#"|/"#,
106 |     r#"|||/"#,
107 |     r#"^$\b\B/"#,
108 |     r#"(?=(?!(?:(.))))/"#,
109 |     r#"a.\f\n\r\t\v\0\[\-\/\\\x00\u0000/"#,
110 |     r#"\d\D\s\S\w\W/"#,
111 |     r#"\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz/"#,
112 |     r#"\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ/"#,
113 |     r#"[a-z-]/"#,
114 |     r#"[^\b\-^]/"#,
115 |     r#"[/\]\\]/"#,
116 |     r#"./i"#,
117 |     r#"./g"#,
118 |     r#"./m"#,
119 |     r#"./igm"#,
120 |     r#".*/"#,
121 |     r#".*?/"#,
122 |     r#".+/"#,
123 |     r#".+?/"#,
124 |     r#".?/"#,
125 |     r#".??/"#,
126 |     r#".{0}/"#,
127 |     r#".{0,}/"#,
128 |     r#".{0,0}/"#,
129 | ];
130 | 
131 | static TEMPLATE_STARTS: &[&str] = &[
132 |     "`things and stuff times ${",
133 |     "`things and stuff`",
134 |     r#"`a\${b`"#,
135 |     r#"`\0\n\x0A\u000A\u{A}${"#,
136 | ];
137 | 
138 | static TEMPLATE_CONTINUATIONS: &[&str] = &[
139 |     "`${} and animals and minerals`",
140 |     "`${}`",
141 |     "`${} and animals and minerals`",
142 |     "`${} and places and people ${",
143 | ];
144 | 
145 | static IDENTS: &[&str] = &[
146 |     r#"$"#,
147 |     r#"_"#,
148 |     r#"\u0078"#,
149 |     r#"x$"#,
150 |     r#"x_"#,
151 |     r#"x\u0030"#,
152 |     r#"xa"#,
153 |     r#"x0"#,
154 |     r#"x0a"#,
155 |     r#"x0123456789"#,
156 |     r#"qwertyuiopasdfghjklzxcvbnm"#,
157 |     r#"QWERTYUIOPASDFGHJKLZXCVBNM"#,
158 |     r#"œ一"#,
159 |     r#"ǻ둘"#,
160 |     r#"ɤ〩"#,
161 |     r#"φ"#,
162 |     r#"ﬁⅷ"#,
163 |     r#"ユニコード"#,
164 |     r#"x‌‍"#,
165 | ];
166 | 
167 | static BOOLS: &[&str] = &["true", "false"];
168 | 
169 | static NULL: &[&str] = &["null"];
170 | 
171 | lazy_static! {
172 |     static ref TOKENS: Vec<&'static str> = COMMENTS
173 |         .into_iter()
174 |         .chain(KEYWORDS.into_iter())
175 |         .chain(NUMBERS.into_iter())
176 |         .chain(PUNCTS.into_iter())
177 |         .chain(IDENTS.into_iter())
178 |         .chain(BOOLS.into_iter())
179 |         .chain(NULL.into_iter())
180 |         .chain(TEMPLATE_STARTS.into_iter())
181 |         .map(|s| *s)
182 |         .collect();
183 |     static ref JS: String = TOKENS.join("\n");
184 | }
185 | 
186 | fn keywords(c: &mut Criterion) {
187 |     c.bench_function("keywords", |b| {
188 |         b.iter(|| {
189 |             for key in KEYWORDS {
190 |                 black_box(Tokenizer::new(key).next(true).unwrap());
191 |             }
192 |         })
193 |     });
194 | }
195 | 
196 | fn punct(c: &mut Criterion) {
197 |     c.bench_function("punct", |b| {
198 |         b.iter(|| {
199 |             for punct in PUNCTS {
200 |                 black_box(Tokenizer::new(punct).next(true).unwrap());
201 |             }
202 |         })
203 |     });
204 | }
205 | 
206 | fn strings(c: &mut Criterion) {
207 |     c.bench_function("strings", |b| {
208 |         b.iter(|| {
209 |             for s in STRINGS {
210 |                 black_box(Tokenizer::new(s).next(true).unwrap());
211 |             }
212 |         })
213 |     });
214 | }
215 | 
216 | fn comments(c: &mut Criterion) {
217 |     c.bench_function("comments", |b| {
218 |         b.iter(|| {
219 |             for c in COMMENTS {
220 |                 black_box(Tokenizer::new(c).next(true).unwrap());
221 |             }
222 |         })
223 |     });
224 | }
225 | 
226 | fn numbers(c: &mut Criterion) {
227 |     c.bench_function("numbers", |b| {
228 |         b.iter(|| {
229 |             for n in NUMBERS {
230 |                 black_box(Tokenizer::new(n).next(true).unwrap());
231 |             }
232 |         })
233 |     });
234 | }
235 | 
236 | fn regex(c: &mut Criterion) {
237 |     c.bench_function("regex", |b| {
238 |         b.iter(|| {
239 |             for r in REGEX {
240 |                 black_box(Tokenizer::new(r).next_regex(1).unwrap());
241 |             }
242 |         })
243 |     });
244 | }
245 | 
246 | fn templates(c: &mut Criterion) {
247 |     c.bench_function("TEMPLATE_CONTINUATIONS", |b| {
248 |         b.iter(|| {
249 |             for s in TEMPLATE_CONTINUATIONS {
250 |                 let mut t = Tokenizer::new(&s);
251 |                 let _ = t.next(true).unwrap();
252 |                 black_box(t.next(true).unwrap());
253 |             }
254 |         })
255 |     });
256 |     c.bench_function("TEMPLATE_STARTS", |b| {
257 |         b.iter(|| {
258 |             for s in TEMPLATE_STARTS {
259 |                 black_box(Tokenizer::new(s).next(true).unwrap());
260 |             }
261 |         })
262 |     });
263 | }
264 | 
265 | fn bools(c: &mut Criterion) {
266 |     c.bench_function("bools", |b| {
267 |         b.iter(|| {
268 |             for b in BOOLS {
269 |                 black_box(Tokenizer::new(b).next(true).unwrap());
270 |             }
271 |         })
272 |     });
273 | }
274 | 
275 | fn null(c: &mut Criterion) {
276 |     c.bench_function("null", |b| {
277 |         b.iter(|| {
278 |             for b in NULL {
279 |                 black_box(Tokenizer::new(b).next(true).unwrap());
280 |             }
281 |         })
282 |     });
283 | }
284 | 
285 | fn idents(c: &mut Criterion) {
286 |     c.bench_function("idents", |b| {
287 |         b.iter(|| {
288 |             for i in IDENTS {
289 |                 black_box(Tokenizer::new(i).next(true).unwrap());
290 |             }
291 |         })
292 |     });
293 | }
294 | 
295 | pub fn token(c: &mut Criterion) {
296 |     c.bench_function("token", |b| {
297 |         b.iter(|| {
298 |             for s in TOKENS.iter() {
299 |                 black_box(Tokenizer::new(s).next(true).unwrap());
300 |             }
301 |         })
302 |     });
303 | }
304 | 
305 | fn scanner(c: &mut Criterion) {
306 |     c.bench_function("scanner", |b| {
307 |         b.iter(|| {
308 |             let s = Scanner::new(&JS);
309 |             black_box(s.collect::<Vec<_>>())
310 |         })
311 |     });
312 | }
313 | 
314 | criterion_group!(
315 |     benches, punct, keywords, idents, strings, comments, numbers, regex, templates, bools, null,
316 |     token, scanner
317 | );
318 | criterion_main!(benches);
319 | 


--------------------------------------------------------------------------------
/code_of_conduct.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
 8 | 
 9 | ## Our Standards
10 | 
11 | Examples of behavior that contributes to a positive environment for our community include:
12 | 
13 | * Demonstrating empathy and kindness toward other people
14 | * Being respectful of differing opinions, viewpoints, and experiences
15 | * Giving and gracefully accepting constructive feedback
16 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
17 | * Focusing on what is best not just for us as individuals, but for the overall community
18 | 
19 | Examples of unacceptable behavior include:
20 | 
21 | * The use of sexualized language or imagery, and sexual attention or
22 |   advances of any kind
23 | * Trolling, insulting or derogatory comments, and personal or political attacks
24 | * Public or private harassment
25 | * Publishing others' private information, such as a physical or email
26 |   address, without their explicit permission
27 | * Other conduct which could reasonably be considered inappropriate in a
28 |   professional setting
29 | 
30 | ## Enforcement Responsibilities
31 | 
32 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
33 | 
34 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
35 | 
36 | ## Scope
37 | 
38 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
39 | 
40 | ## Enforcement
41 | 
42 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at [INSERT CONTACT METHOD]. All complaints will be reviewed and investigated promptly and fairly.
43 | 
44 | All community leaders are obligated to respect the privacy and security of the reporter of any incident.
45 | 
46 | ## Enforcement Guidelines
47 | 
48 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
49 | 
50 | ### 1. Correction
51 | 
52 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
53 | 
54 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
55 | 
56 | ### 2. Warning
57 | 
58 | **Community Impact**: A violation through a single incident or series of actions.
59 | 
60 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
61 | 
62 | ### 3. Temporary Ban
63 | 
64 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
65 | 
66 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
67 | 
68 | ### 4. Permanent Ban
69 | 
70 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior,  harassment of an individual, or aggression toward or disparagement of classes of individuals.
71 | 
72 | **Consequence**: A permanent ban from any sort of public interaction within the project community.
73 | 
74 | ## Attribution
75 | 
76 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0,
77 | available at https://www.contributor-covenant.org/version/2/0/code-of-conduct.html.
78 | 
79 | Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
80 | 
81 | [homepage]: https://www.contributor-covenant.org
82 | 
83 | For answers to common questions about this code of conduct, see the FAQ at
84 | https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
85 | 
86 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   status:
 3 |     project:
 4 |       default:
 5 |         informational: true
 6 |     patch:
 7 |       default:
 8 |         enabled: no
 9 |         if_not_found: success
10 | 


--------------------------------------------------------------------------------
/examples/clear-comments/src/main.rs:
--------------------------------------------------------------------------------
  1 | //! This example is a quick and dirty example of
  2 | //! what someone might want to do with a JS token stream.
  3 | //! Essentially this is reading in the file and writing it out
  4 | //! with no comments. It successfully stripped all of the comments
  5 | //! out of a webpack output file though it cannot handle object literals
  6 | //! very well. It does a pretty good job of showing how you might use the Scanner.
  7 | extern crate docopt;
  8 | extern crate ress;
  9 | extern crate serde;
 10 | #[macro_use]
 11 | extern crate serde_derive;
 12 | 
 13 | use std::{
 14 |     fs::{read_to_string, File},
 15 |     io::{BufWriter, Write},
 16 |     path::PathBuf,
 17 |     string::ToString,
 18 | };
 19 | 
 20 | use docopt::Docopt;
 21 | 
 22 | use ress::prelude::*;
 23 | type RefToken<'a> = Token<&'a str>;
 24 | 
 25 | const USAGE: &str = "
 26 | clear-comments
 27 | 
 28 | Usage:
 29 |     clear-comments <in-path> <out-path>
 30 | ";
 31 | 
 32 | fn main() {
 33 |     let opts: Opts = Docopt::new(USAGE)
 34 |         .and_then(|d| d.deserialize())
 35 |         .unwrap_or_else(|e| {
 36 |             println!("error: {:?}", e);
 37 |             e.exit()
 38 |         });
 39 |     let js = if let Ok(s) = read_to_string(opts.arg_in_path) {
 40 |         s
 41 |     } else {
 42 |         eprintln!("Unable to read in-path");
 43 |         ::std::process::exit(1);
 44 |     };
 45 |     let s = Scanner::new(&js);
 46 |     let mut indent = 0;
 47 |     let f = File::create(&opts.arg_out_path).expect("Error opening outfile");
 48 |     let mut out = BufWriter::new(f);
 49 |     let mut last_token = Token::EoF;
 50 |     let mut new_line = false;
 51 |     let mut in_loop = false;
 52 |     let mut in_case = false;
 53 |     let mut in_if = false;
 54 |     let mut if_parens = 0;
 55 |     let mut unbraced_if = false;
 56 |     for item in s {
 57 |         let item = item.unwrap();
 58 |         println!("{:?}", item);
 59 |         let token = item.token;
 60 |         if token.matches_keyword(Keyword::If(())) {
 61 |             in_if = true;
 62 |         }
 63 |         if in_if && token.matches_punct(Punct::OpenParen) {
 64 |             if_parens += 1;
 65 |         }
 66 |         if in_if && token.matches_punct(Punct::CloseParen) {
 67 |             if_parens -= 1;
 68 |         }
 69 |         if last_token.matches_keyword(Keyword::For(())) {
 70 |             in_loop = true;
 71 |         }
 72 |         if last_token.matches_keyword(Keyword::Case(()))
 73 |             || last_token.matches_keyword(Keyword::Default(()))
 74 |         {
 75 |             in_case = true;
 76 |         }
 77 |         if last_token.matches_punct(Punct::Colon) && in_case {
 78 |             new_line = true;
 79 |         }
 80 |         if in_loop && last_token.matches_punct(Punct::CloseParen) {
 81 |             in_loop = false;
 82 |         }
 83 |         if token.is_comment() {
 84 |             continue;
 85 |         }
 86 |         if last_token.matches_punct(Punct::OpenBrace) {
 87 |             indent += 1;
 88 |             new_line = true;
 89 |         }
 90 |         if in_if
 91 |             && if_parens == 0
 92 |             && last_token.matches_punct(Punct::CloseParen)
 93 |             && !token.is_punct()
 94 |         {
 95 |             unbraced_if = true;
 96 |             new_line = true;
 97 |             indent += 1;
 98 |         }
 99 |         if last_token.matches_punct(Punct::CloseParen) && !token.is_punct() {
100 |             new_line = true;
101 |         }
102 |         if last_token.matches_punct(Punct::SemiColon) && !in_loop {
103 |             new_line = true;
104 |         }
105 |         if last_token.matches_punct(Punct::CloseBrace) && !token.is_punct() {
106 |             new_line = true;
107 |         }
108 |         if token.matches_punct(Punct::CloseBrace) {
109 |             indent -= 1;
110 |             new_line = !last_token.matches_punct(Punct::OpenBrace);
111 |         }
112 |         if last_token.is_comment() {
113 |             new_line = true;
114 |         }
115 |         if new_line {
116 |             out.write_all(format!("\n{}", "    ".repeat(indent)).as_bytes())
117 |                 .expect("error writing indent");
118 |             new_line = false;
119 |             in_if = false;
120 |             if_parens = 0;
121 |             if unbraced_if {
122 |                 indent -= 1;
123 |                 unbraced_if = false;
124 |             }
125 |         }
126 | 
127 |         if space_before(&last_token, &token) {
128 |             out.write_all(b" ").expect("error writing space");
129 |         }
130 |         out.write_all(token_to_string(&token).as_bytes())
131 |             .expect("Error writing token");
132 |         last_token = token;
133 |     }
134 | }
135 | 
136 | fn space_before(last_token: &RefToken, token: &RefToken) -> bool {
137 |     if last_token.matches_punct(Punct::Equal) || token.matches_punct(Punct::DoubleEqual) {
138 |         return true;
139 |     }
140 |     if last_token.matches_punct(Punct::Period)
141 |         && (token.is_ident() || token.matches_keyword(Keyword::This(())))
142 |     {
143 |         return false;
144 |     }
145 |     if (last_token.is_ident() || last_token.matches_keyword(Keyword::This(())))
146 |         && token.matches_punct(Punct::Period)
147 |     {
148 |         return false;
149 |     }
150 |     if token.matches_keyword(Keyword::If(())) {
151 |         return false;
152 |     }
153 |     if last_token.matches_keyword(Keyword::If(())) {
154 |         return true;
155 |     }
156 |     if last_token.matches_keyword(Keyword::Return(())) && !token.is_punct() {
157 |         return true;
158 |     }
159 |     if last_token.matches_keyword(Keyword::For(())) {
160 |         return true;
161 |     }
162 |     if last_token.matches_keyword(Keyword::Switch(())) {
163 |         return true;
164 |     }
165 |     if last_token.matches_punct(Punct::Colon) {
166 |         return true;
167 |     }
168 |     if token.matches_keyword(Keyword::This(())) {
169 |         return false;
170 |     }
171 |     if token.matches_punct(Punct::OpenParen) {
172 |         return false;
173 |     }
174 |     if token.matches_punct(Punct::CloseParen) {
175 |         return false;
176 |     }
177 |     if token.matches_punct(Punct::CloseBracket) {
178 |         return false;
179 |     }
180 |     if token.matches_punct(Punct::OpenBracket) {
181 |         return false;
182 |     }
183 |     if token.matches_punct(Punct::CloseBrace) {
184 |         return false;
185 |     }
186 |     if last_token.matches_punct(Punct::OpenBrace) {
187 |         return false;
188 |     }
189 |     if last_token.matches_punct(Punct::CloseBrace) {
190 |         return false;
191 |     }
192 |     if last_token.matches_punct(Punct::CloseParen) && token.matches_punct(Punct::OpenBrace) {
193 |         return true;
194 |     }
195 |     if last_token.matches_punct(Punct::OpenBracket) {
196 |         return false;
197 |     }
198 |     if last_token.matches_punct(Punct::OpenParen) {
199 |         return false;
200 |     }
201 |     if token.matches_punct(Punct::SemiColon) {
202 |         return false;
203 |     }
204 |     if token.matches_punct(Punct::Period) {
205 |         return false;
206 |     }
207 |     if last_token.matches_punct(Punct::Period) {
208 |         return false;
209 |     }
210 |     if token.matches_punct(Punct::Comma) {
211 |         return false;
212 |     }
213 |     if token.matches_punct(Punct::Colon) {
214 |         return false;
215 |     }
216 |     if last_token.matches_punct(Punct::Bang) {
217 |         return false;
218 |     }
219 |     if last_token.matches_punct(Punct::Comma) {
220 |         return true;
221 |     }
222 |     if token.matches_punct(Punct::Bang) {
223 |         return false;
224 |     }
225 |     if last_token.matches_keyword(Keyword::Function(())) && token.matches_punct(Punct::OpenBrace) {
226 |         return false;
227 |     }
228 |     if last_token.matches_keyword(Keyword::In(()))
229 |         || last_token.matches_ident_str("of")
230 |         || last_token.matches_keyword(Keyword::For(()))
231 |     {
232 |         return true;
233 |     }
234 |     if token.matches_keyword(Keyword::In(())) || token.matches_ident_str("of") {
235 |         return true;
236 |     }
237 |     if last_token.is_keyword() {
238 |         return true;
239 |     }
240 |     if last_token.matches_punct(Punct::SemiColon) {
241 |         return false;
242 |     }
243 |     if token.is_punct() || last_token.is_punct() {
244 |         return true;
245 |     }
246 |     false
247 | }
248 | 
249 | fn token_to_string(t: &RefToken) -> String {
250 |     match t {
251 |         Token::Boolean(ref t) => if t == &Boolean::True { "true" } else { "false" }.to_string(),
252 |         Token::Comment(ref comment) => {
253 |             if comment.is_multi_line() {
254 |                 format!("/*\n{}\n*/", comment.content)
255 |             } else {
256 |                 format!("//{}", comment.content)
257 |             }
258 |         }
259 |         Token::Ident(ref name) => name.to_string(),
260 |         Token::Keyword(ref key) => key.to_string(),
261 |         Token::Null => "null".to_string(),
262 |         Token::Number(ref number) => number.to_string(),
263 |         Token::Punct(ref p) => p.to_string(),
264 |         Token::RegEx(ref regex) => match regex.flags {
265 |             Some(ref f) => format!("/{}/{}", regex.body, f),
266 |             None => format!("/{}/", regex.body),
267 |         },
268 |         Token::String(ref s) => s.to_string(),
269 |         _ => String::new(),
270 |     }
271 | }
272 | 
273 | #[derive(Deserialize)]
274 | struct Opts {
275 |     arg_in_path: PathBuf,
276 |     arg_out_path: PathBuf,
277 | }
278 | 


--------------------------------------------------------------------------------
/examples/count_tokens.rs:
--------------------------------------------------------------------------------
 1 | use docopt::Docopt;
 2 | use ress::prelude::*;
 3 | #[macro_use]
 4 | extern crate serde_derive;
 5 | 
 6 | use std::{collections::HashMap, fs::read_to_string, path::PathBuf};
 7 | 
 8 | static USAGE: &str = "
 9 | count-tokens
10 | 
11 | Usage:
12 |     count-tokens <in-path>
13 | ";
14 | 
15 | #[derive(Deserialize)]
16 | struct Opts {
17 |     arg_in_path: PathBuf,
18 | }
19 | 
20 | fn main() {
21 |     let _ = pretty_env_logger::try_init();
22 |     let opts: Opts = Docopt::new(USAGE)
23 |         .and_then(|d| d.deserialize())
24 |         .unwrap_or_else(|e| {
25 |             println!("error: {:?}", e);
26 |             e.exit()
27 |         });
28 |     let js = read_to_string(opts.arg_in_path).expect("Failed to read file");
29 |     let mut counts = get_initial_counts();
30 |     let mut total = 0;
31 | 
32 |     for maybe in Scanner::new(&js) {
33 |         let item = maybe.expect("failed to scan token");
34 |         let key = token_type_str(&item.token);
35 |         counts.entry(key).and_modify(|c| *c += 1);
36 |         total += 1;
37 |     }
38 |     for (key, value) in counts {
39 |         println!("{}: {}", key, value);
40 |     }
41 |     println!("total: {}", total);
42 | }
43 | 
44 | fn token_type_str(tok: &Token<&str>) -> &'static str {
45 |     match tok {
46 |         Token::Null => "null",
47 |         Token::Boolean(_) => "bool",
48 |         Token::Ident(_) => "ident",
49 |         Token::Number(_) => "number",
50 |         Token::String(_) => "string",
51 |         Token::Keyword(_) => "keyword",
52 |         Token::Punct(_) => "punct",
53 |         Token::RegEx(_) => "regex",
54 |         Token::Template(_) => "template",
55 |         Token::Comment(_) => "comment",
56 |         Token::EoF => "eof",
57 |     }
58 | }
59 | 
60 | fn get_initial_counts() -> HashMap<&'static str, usize> {
61 |     let mut counts = HashMap::new();
62 |     counts.insert("regex", 0);
63 |     counts.insert("ident", 0);
64 |     counts.insert("template", 0);
65 |     counts.insert("bool", 0);
66 |     counts.insert("string", 0);
67 |     counts.insert("number", 0);
68 |     counts.insert("keyword", 0);
69 |     counts.insert("punct", 0);
70 |     counts.insert("comment", 0);
71 |     counts.insert("null", 0);
72 |     counts.insert("eof", 0);
73 |     counts
74 | }
75 | 


--------------------------------------------------------------------------------
/examples/find_regexes.rs:
--------------------------------------------------------------------------------
 1 | use ress::prelude::*;
 2 | use walkdir::WalkDir;
 3 | 
 4 | use std::{env::args, fs::read_to_string};
 5 | 
 6 | fn main() {
 7 |     let mut args = args();
 8 |     let _ = args.next();
 9 |     let start = args
10 |         .next()
11 |         .expect("No directory provided as starting location.");
12 |     println!("static REGEXES: &[&str] = &[");
13 |     let mut set = std::collections::HashSet::new();
14 |     for path in WalkDir::new(start) {
15 |         if let Ok(entry) = path {
16 |             let path = entry.path();
17 |             if path.is_file() {
18 |                 if let Some(ext) = path.extension() {
19 |                     if ext == "js" {
20 |                         if let Ok(js) = read_to_string(path) {
21 |                             let s = Scanner::new(&js);
22 |                             for item in s {
23 |                                 if let Ok(item) = item {
24 |                                     if item.token.is_regex() {
25 |                                         let s = js[item.span.start..item.span.end].to_string();
26 |                                         if set.insert(s) {
27 |                                             println!(
28 |                                                 "    r#\"{}\"#,",
29 |                                                 &js[item.span.start..item.span.end]
30 |                                             );
31 |                                         }
32 |                                     }
33 |                                 }
34 |                             }
35 |                         }
36 |                     }
37 |                 }
38 |             }
39 |         }
40 |     }
41 |     println!("];");
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/instruments/bools.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | fn main() {
 8 |     for _ in 0..1000 {
 9 |         let t = Tokenizer::new("true").next(true).unwrap();
10 |         core::mem::forget(t);
11 |         let f = Tokenizer::new("false").next(true).unwrap();
12 |         core::mem::forget(f);
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/examples/instruments/comments.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static COMMENTS: &[&str] = &[
 8 |     "//this is a comment",
 9 |     "/*this is a
10 | multi-line comment*/",
11 |     "<!-- This is an HTML comment -->",
12 |     "<!-- This is an HTML comment --> with a trailer",
13 | ];
14 | 
15 | fn main() {
16 |     for _ in 0..1000 {
17 |         for c in COMMENTS {
18 |             let d = Tokenizer::new(c).next(true).unwrap();
19 |             core::mem::forget(d);
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/instruments/idents.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static IDENTS: &[&str] = &[
 8 |     r#"$"#,
 9 |     r#"_"#,
10 |     r#"\u0078"#,
11 |     r#"x$"#,
12 |     r#"x_"#,
13 |     r#"x\u0030"#,
14 |     r#"xa"#,
15 |     r#"x0"#,
16 |     r#"x0a"#,
17 |     r#"x0123456789"#,
18 |     r#"qwertyuiopasdfghjklzxcvbnm"#,
19 |     r#"QWERTYUIOPASDFGHJKLZXCVBNM"#,
20 |     r#"œ一"#,
21 |     r#"ǻ둘"#,
22 |     r#"ɤ〩"#,
23 |     r#"φ"#,
24 |     r#"ﬁⅷ"#,
25 |     r#"ユニコード"#,
26 |     r#"x‌‍"#,
27 | ];
28 | 
29 | fn main() {
30 |     for _ in 0..1000 {
31 |         for i in IDENTS {
32 |             let d = Tokenizer::new(i).next(true).unwrap();
33 |             core::mem::forget(d);
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/examples/instruments/keywords.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static KEYWORDS: &[&str] = &[
 8 |     "implements",
 9 |     "interface",
10 |     "package",
11 |     "private",
12 |     "protected",
13 |     "public",
14 |     "static",
15 |     "yield",
16 |     "let",
17 |     "enum",
18 |     "export",
19 |     "import",
20 |     "super",
21 |     "break",
22 |     "case",
23 |     "catch",
24 |     "continue",
25 |     "debugger",
26 |     "default",
27 |     "delete",
28 |     "do",
29 |     "else",
30 |     "finally",
31 |     "for",
32 |     "function",
33 |     "if",
34 |     "instanceof",
35 |     "in",
36 |     "new",
37 |     "return",
38 |     "switch",
39 |     "this",
40 |     "throw",
41 |     "try",
42 |     "typeof",
43 |     "var",
44 |     "void",
45 |     "while",
46 |     "with",
47 | ];
48 | 
49 | fn main() {
50 |     for _ in 0..1000 {
51 |         for key in KEYWORDS {
52 |             let d = Tokenizer::new(key).next(true).unwrap();
53 |             core::mem::forget(d);
54 |         }
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/examples/instruments/null.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | fn main() {
 8 |     for _ in 0..1000 {
 9 |         let null = Tokenizer::new("null").next(true).unwrap();
10 |         core::mem::forget(null);
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/instruments/numbers.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | static NUMBERS: &[&str] = &[
 7 |     "0",
 8 |     "00",
 9 |     "1234567890",
10 |     "01234567",
11 |     "0.",
12 |     "0.00",
13 |     "10.00",
14 |     ".0",
15 |     "0e0",
16 |     "0E0",
17 |     "0.e0",
18 |     "0.00e+0",
19 |     ".00e-0",
20 |     "0x0",
21 |     "0X0",
22 |     "0x0123456789abcdefABCDEF",
23 |     "0b0",
24 |     "0b0100101",
25 |     "0o0",
26 |     "0o01234567",
27 |     "2e308",
28 | ];
29 | 
30 | fn main() {
31 |     for _ in 0..1000 {
32 |         for n in NUMBERS {
33 |             let d = Tokenizer::new(n).next(true).unwrap();
34 |             core::mem::forget(d);
35 |         }
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/instruments/puncts.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static PUNCTS: &[&str] = &[
 8 |     "{", "}", "(", ")", ".", ";", ",", "[", "]", ":", "?", "~", ">", "<", "=", "!", "+", "-", "/",
 9 |     "*", "%", "&", "|", "^", ">>>=", //3 char
10 |     "...", "===", "!==", ">>>", "<<=", ">>=", "**=", //2 char
11 |     "&&", "||", "==", "!=", "+=", "-=", "*=", "/=", "++", "--", "<<", ">>", "&=", "|=", "^=", "%=",
12 |     "<=", ">=", "=>", "**",
13 | ];
14 | 
15 | fn main() {
16 |     for _ in 0..1000 {
17 |         for punct in PUNCTS {
18 |             let d = Tokenizer::new(punct).next(true).unwrap();
19 |             core::mem::forget(d);
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/instruments/regexes.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static REGEX: &[&str] = &[
 8 |     r#"x/"#,
 9 |     r#"|/"#,
10 |     r#"|||/"#,
11 |     r#"^$\b\B/"#,
12 |     r#"(?=(?!(?:(.))))/"#,
13 |     r#"a.\f\n\r\t\v\0\[\-\/\\\x00\u0000/"#,
14 |     r#"\d\D\s\S\w\W/"#,
15 |     r#"\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz/"#,
16 |     r#"\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ/"#,
17 |     r#"[a-z-]/"#,
18 |     r#"[^\b\-^]/"#,
19 |     r#"[/\]\\]/"#,
20 |     r#"./i"#,
21 |     r#"./g"#,
22 |     r#"./m"#,
23 |     r#"./igm"#,
24 |     r#".*/"#,
25 |     r#".*?/"#,
26 |     r#".+/"#,
27 |     r#".+?/"#,
28 |     r#".?/"#,
29 |     r#".??/"#,
30 |     r#".{0}/"#,
31 |     r#".{0,}/"#,
32 |     r#".{0,0}/"#,
33 | ];
34 | 
35 | fn main() {
36 |     for _ in 0..1000 {
37 |         for r in REGEX {
38 |             let d = Tokenizer::new(r).next_regex(1).unwrap();
39 |             core::mem::forget(d);
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/instruments/strings.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static STRINGS: &[&str] = &[
 8 |     r#""things and stuff""#,
 9 |     r#"'people and places'"#,
10 |     r#""with and escaped \"""#,
11 |     r#"'another escaped \''"#,
12 |     r#""with a new \
13 | line""#,
14 |     r#"'another new line \
15 | hahaha'"#,
16 |     "\"sequence double quoted\\\r\nis hard\"",
17 |     "'new line sequence\\\r\nmight be harder'",
18 | ];
19 | 
20 | fn main() {
21 |     for _ in 0..1000 {
22 |         for s in STRINGS {
23 |             let d = Tokenizer::new(s).next(true).unwrap();
24 |             core::mem::forget(d);
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/instruments/templates.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::forget_non_drop)]
 2 | // This example exists to allow for profiling
 3 | // applications to provide details about
 4 | // the criterion benchmarks
 5 | use ress::Tokenizer;
 6 | 
 7 | static TEMPLATE_STARTS: &[&str] = &[
 8 |     "`things and stuff times ${",
 9 |     "`things and stuff`",
10 |     r#"`a\${b`"#,
11 |     r#"`\0\n\x0A\u000A\u{A}${"#,
12 | ];
13 | 
14 | static TEMPLATE_CONTINUATIONS: &[&str] = &[
15 |     "`${} and animals and minerals`",
16 |     "`${}`",
17 |     "`${} and animals and minerals`",
18 |     "`${} and places and people ${",
19 | ];
20 | 
21 | fn main() {
22 |     for _ in 0..1000 {
23 |         for s in TEMPLATE_CONTINUATIONS {
24 |             parse_two(s);
25 |         }
26 |         for s in TEMPLATE_STARTS {
27 |             parse(s);
28 |         }
29 |     }
30 | }
31 | #[inline]
32 | fn parse_two(s: &str) {
33 |     let mut t = Tokenizer::new(s);
34 |     let _ = t.next(true).unwrap();
35 |     let d = t.next(true).unwrap();
36 |     core::mem::forget(d);
37 | }
38 | #[inline]
39 | fn parse(s: &str) {
40 |     let e = Tokenizer::new(s).next(true).unwrap();
41 |     core::mem::forget(e);
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/major_libs/src/main.rs:
--------------------------------------------------------------------------------
  1 | //! This example is primarily for illustrating the
  2 | //! project's performance w/o waiting for the current
  3 | //! set of benches. It simply pulls down some major
  4 | //! JS libraries and attempts to tokenize them with
  5 | //! both methods and then reports the size, time and method
  6 | //! for each lib.
  7 | extern crate ress;
  8 | use std::{
  9 |     env::args,
 10 |     fs::read_to_string,
 11 |     path::PathBuf,
 12 |     time::{Duration, SystemTime},
 13 | };
 14 | 
 15 | struct Args {
 16 |     pub angular: bool,
 17 |     pub jquery: bool,
 18 |     pub react: bool,
 19 |     pub react_dom: bool,
 20 |     pub vue: bool,
 21 |     pub moment: bool,
 22 |     pub dexie: bool,
 23 | }
 24 | 
 25 | impl ::std::default::Default for Args {
 26 |     fn default() -> Args {
 27 |         Args {
 28 |             angular: false,
 29 |             jquery: false,
 30 |             react: false,
 31 |             react_dom: false,
 32 |             vue: false,
 33 |             moment: false,
 34 |             dexie: false,
 35 |         }
 36 |     }
 37 | }
 38 | 
 39 | impl Args {
 40 |     fn pristine(&self) -> bool {
 41 |         !self.angular
 42 |             && !self.jquery
 43 |             && !self.react
 44 |             && !self.react_dom
 45 |             && !self.vue
 46 |             && !self.moment
 47 |             && !self.dexie
 48 |     }
 49 | }
 50 | 
 51 | fn main() {
 52 |     let mut a = Args::default();
 53 |     // loop over the ags and check for
 54 |     // lib names. If they exist, run the test
 55 |     // and increment the counter
 56 |     for arg in args() {
 57 |         if arg == "jquery" || arg == "jq" {
 58 |             a.jquery = true;
 59 |         } else if arg == "angular" || arg == "ng" {
 60 |             a.angular = true;
 61 |         } else if arg == "react" {
 62 |             a.react = true;
 63 |         } else if arg == "react-dom" || arg == "rd" {
 64 |             a.react_dom = true;
 65 |         } else if arg == "vue" || arg == "v" {
 66 |             a.vue = true
 67 |         } else if arg == "moment" || arg == "mt" {
 68 |             a.moment = true;
 69 |         } else if arg == "dexie" || arg == "dx" {
 70 |             a.dexie = true;
 71 |         }
 72 |     }
 73 |     if a.jquery {
 74 |         jquery();
 75 |     }
 76 |     if a.angular {
 77 |         angular1();
 78 |     }
 79 |     if a.react {
 80 |         react();
 81 |     }
 82 |     if a.react_dom {
 83 |         react_dom();
 84 |     }
 85 |     if a.vue {
 86 |         vue();
 87 |     }
 88 |     if a.moment {
 89 |         moment();
 90 |     }
 91 |     if a.dexie {
 92 |         dexie();
 93 |     }
 94 |     if a.pristine() {
 95 |         jquery();
 96 |         angular1();
 97 |         react();
 98 |         react_dom();
 99 |         vue();
100 |         moment();
101 |         dexie();
102 |     }
103 | }
104 | 
105 | fn jquery() {
106 |     println!("trying jquery");
107 |     if let Ok(ref js) = get_js(Lib::Jquery) {
108 |         test_js(js, "jquery");
109 |     }
110 | }
111 | 
112 | fn angular1() {
113 |     println!("trying angular1");
114 |     if let Ok(ref js) = get_js(Lib::Angular) {
115 |         test_js(js, "angular");
116 |     }
117 | }
118 | 
119 | fn react() {
120 |     println!("trying react");
121 |     if let Ok(ref js) = get_js(Lib::React) {
122 |         test_js(js, "react");
123 |     }
124 | }
125 | 
126 | fn react_dom() {
127 |     println!("trying react_dom");
128 |     if let Ok(ref js) = get_js(Lib::ReactDom) {
129 |         test_js(js, "react-dom");
130 |     }
131 | }
132 | 
133 | fn vue() {
134 |     println!("trying vue");
135 |     if let Ok(ref js) = get_js(Lib::Vue) {
136 |         test_js(js, "vue");
137 |     }
138 | }
139 | 
140 | fn moment() {
141 |     println!("trying moment");
142 |     if let Ok(ref js) = get_js(Lib::Moment) {
143 |         test_js(js, "moment")
144 |     }
145 | }
146 | 
147 | fn dexie() {
148 |     println!("trying dexie");
149 |     if let Ok(ref js) = get_js(Lib::Dexie) {
150 |         test_js(js, "dexie");
151 |     }
152 | }
153 | 
154 | fn test_js(text: &str, name: &str) {
155 |     let size = text.len();
156 |     let now = SystemTime::now();
157 |     test(text);
158 |     if let Ok(e) = now.elapsed() {
159 |         report(size, e, "scanner", name)
160 |     } else {
161 |         println!("error capturing scanner duration for {}", name);
162 |     }
163 | }
164 | 
165 | fn test(text: &str) {
166 |     let s = ress::Scanner::new(text);
167 |     let _: Vec<_> = s.collect();
168 | }
169 | 
170 | fn report(bytes: usize, elapsed: Duration, method: &str, name: &str) {
171 |     let size = get_size(bytes);
172 |     println!(
173 |         "{} ({}) using {} in {}s {:.2}ms",
174 |         name,
175 |         size,
176 |         method,
177 |         elapsed.as_secs(),
178 |         elapsed.subsec_millis()
179 |     )
180 | }
181 | 
182 | fn get_size(b: usize) -> String {
183 |     let mut size = b as f32;
184 |     let mut i = 0;
185 |     while size > 1000.0 {
186 |         if i > 4 {
187 |             break;
188 |         }
189 |         size /= 1000.0;
190 |         i += 1;
191 |     }
192 |     let bytes = match i {
193 |         0 => "b",
194 |         1 => "kb",
195 |         2 => "mb",
196 |         3 => "gb",
197 |         _ => "tb",
198 |     };
199 |     format!("{:.2}{}", size, bytes)
200 | }
201 | 
202 | fn npm_install() -> Result<(), ::std::io::Error> {
203 |     let mut c = ::std::process::Command::new("npm");
204 |     c.arg("i");
205 |     c.output()?;
206 |     Ok(())
207 | }
208 | 
209 | enum Lib {
210 |     Jquery,
211 |     Angular,
212 |     React,
213 |     ReactDom,
214 |     Vue,
215 |     Moment,
216 |     Dexie,
217 | }
218 | 
219 | impl Lib {
220 |     fn path(&self) -> String {
221 |         match self {
222 |             Lib::Jquery => "node_modules/jquery/dist/jquery.js".into(),
223 |             Lib::Angular => "node_modules/angular/angular.js".into(),
224 |             Lib::React => "node_modules/react/umd/react.development.js".into(),
225 |             Lib::ReactDom => "node_modules/react-dom/umd/react-dom.development.js".into(),
226 |             Lib::Vue => "node_modules/vue/dist/vue.js".into(),
227 |             Lib::Moment => "node_modules/moment/moment.js".into(),
228 |             Lib::Dexie => "node_modules/dexie/dist/dexie.js".into(),
229 |         }
230 |     }
231 | }
232 | 
233 | fn get_js(l: Lib) -> Result<String, ::std::io::Error> {
234 |     let path = PathBuf::from(l.path());
235 |     if !path.exists() {
236 |         npm_install()?;
237 |         if !path.exists() {
238 |             println!("cannot find {:?}", path);
239 |         }
240 |     }
241 |     read_to_string(path)
242 | }
243 | 


--------------------------------------------------------------------------------
/examples/semi_finder/src/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate ress;
 2 | extern crate walkdir;
 3 | 
 4 | use ress::prelude::*;
 5 | use walkdir::WalkDir;
 6 | 
 7 | use std::{collections::HashMap, env::args, fs::read_to_string, path::PathBuf};
 8 | 
 9 | fn main() {
10 |     // get the command line arguments that started this process
11 |     let mut args = args();
12 |     // discard the first argument, this will be the path to our
13 |     // executable
14 |     let _ = args.next();
15 |     // The next argument will be the path to check
16 |     // panic and display an error to the user if no path
17 |     // was provided
18 |     let start = args
19 |         .next()
20 |         .expect("No directory provided as starting location.");
21 |     // Pass the argument off to our `check_files` function
22 |     let issues = check_files(start);
23 |     // If no issues were found
24 |     if issues.is_empty() {
25 |         // Print the success message
26 |         println!("Good to go, no semicolons found");
27 |     } else {
28 |         // Otherwise loop over the hashmap and
29 |         // tell the user where we found semi-colons that need to be
30 |         // removed
31 |         for (path, indexes) in issues {
32 |             println!("Issues found in {:?} at indexes:", path);
33 |             println!("\t{:?}\n", indexes)
34 |         }
35 |     }
36 | }
37 | 
38 | fn check_files(start: String) -> HashMap<PathBuf, Vec<usize>> {
39 |     // We are going to store the location of any semi-colons we have found
40 |     let mut ret: HashMap<PathBuf, Vec<usize>> = HashMap::new();
41 |     // loop over the directories in our path
42 |     // set the min_depth to 1, so we will skip the
43 |     // path passed in as `start`
44 |     for entry in WalkDir::new(start).min_depth(1) {
45 |         match entry {
46 |             Ok(entry) => {
47 |                 // If the entry doesn't error
48 |                 // capture the path of this entry
49 |                 let path = entry.path();
50 |                 //if the path ends with js, we want to check for semicolons
51 |                 if path.extension() == Some(::std::ffi::OsStr::new("js")) {
52 |                     // if we can read the file to a string
53 |                     // pass the text off to our check_js fn
54 |                     // if we can't we'll just skip it for now
55 |                     if let Ok(js) = read_to_string(path) {
56 |                         let indexes = check_js(&js);
57 |                         // if we found any semicolons, add them to our hashmap
58 |                         if !indexes.is_empty() {
59 |                             ret.insert(path.to_path_buf(), indexes);
60 |                         }
61 |                     }
62 |                 }
63 |             }
64 |             Err(e) => eprintln!("failed to get a directory entry: {:?}", e),
65 |         }
66 |     }
67 |     ret
68 | }
69 | 
70 | fn check_js(js: &str) -> Vec<usize> {
71 |     // Create a scanner with the text then
72 |     // filter out any tokens that are not semi-colons
73 |     // then collect them all into a `Vec` of the start indexes
74 |     Scanner::new(js)
75 |         .filter_map(|item| {
76 |             let item = item.unwrap();
77 |             // If this token matches the `Punct::SemiColon`
78 |             if let Token::Punct(ref inner) = item.token {
79 |                 match inner {
80 |                     // we want to return the first position of this token
81 |                     // since semi-colons are only 1 character wide we would
82 |                     // only need this part of the `Span`
83 |                     Punct::SemiColon => Some(item.span.start),
84 |                     _ => None,
85 |                 }
86 |             } else {
87 |                 None
88 |             }
89 |         })
90 |         .collect()
91 | }
92 | 


--------------------------------------------------------------------------------
/examples/tokenize.rs:
--------------------------------------------------------------------------------
 1 | fn main() {
 2 |     let mut args = std::env::args();
 3 |     let _ = args.next();
 4 |     let path = args.next().expect("First argument must be a file path");
 5 |     let path = std::path::Path::new(&path);
 6 |     if !path.exists() {
 7 |         panic!("First argument must be a file path");
 8 |     }
 9 |     let js = std::fs::read_to_string(path).expect("Couldn't read the path provide");
10 |     for item in ress::Scanner::new(&js) {
11 |         println!("{:?}", item.expect("failed to lex token"));
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/tokens.js:
--------------------------------------------------------------------------------
  1 | const cp = require('child_process');
  2 | const fs = require('fs');
  3 | const prog = require('progress');
  4 | 
  5 | 
  6 | function dd(infile, outfile, bytesize) {
  7 |     console.log('Getting started');
  8 |     var bar;
  9 |     var currentBytes;
 10 | 
 11 |     fs.stat(infile, function(err, stat) {
 12 |         if (err) return console.error('Unable to get infile stats', err.message);
 13 |         console.log(`moving \n\t${infile}`);
 14 |         console.log(`to \n\t${outfile}`);
 15 |         var inFileSize = stat.size;
 16 |         bar = new prog('Progress [:bar] :percent :current :total',
 17 |         {
 18 |             total: inFileSize,
 19 |             complete: '‡',
 20 |             incomplete: ' '
 21 |         });
 22 | 
 23 |         var dd = cp.spawn('dd', [`if=${infile}`, `of=${outfile}`, `bs=${bytesize || '1m'}`]);
 24 |         var interval = setInterval(function() {
 25 |             if (bar.complete) {
 26 |                 clearInterval(interval)
 27 |                 console.log('Finishing up');
 28 |             } else {
 29 |                 dd.kill('SIGINFO');
 30 |             }
 31 |         }, 100);
 32 |         dd.addListener('exit', function(code, sig) {
 33 |             if (code == 0) {
 34 |                 bar.tick(bar.total - bar.curr);
 35 |                 console.log('Complete');
 36 |                 process.exit();
 37 |             } else {
 38 |                 console.log(`Exit with code ${code}: ${sig}`);
 39 |                 process.exit();
 40 |             }
 41 |         });
 42 |         // TODO: Add color formatting
 43 |         dd.stderr.on('data', function(data) {
 44 |             console.log('dd.stderr.on("data", ' + data);
 45 |             if (typeof data != 'string') data = data.toString('utf8');
 46 |             var status = parse(data);
 47 |             var update;
 48 |             if (status) {
 49 |                 update =  status - currentBytes;
 50 |                 currentBytes = status;
 51 |                 if (!bar.complete) bar.tick(update);
 52 |             }
 53 |         });
 54 |     });
 55 | }
 56 | 
 57 | function parse(text) {
 58 |     var lines = text.split('\n')
 59 |     var line = lines[2]
 60 |     if (!line) {
 61 |         line = lines[0]
 62 |     } 
 63 |     var words = line.split(' ')
 64 |     return Number.parseInt(words[0])
 65 | }
 66 | 
 67 | var ifile;
 68 | var ofile;
 69 | var bs;
 70 | 
 71 | if (process.argv[2]) {
 72 |     ifile = process.argv[2]
 73 | } else {
 74 |     console.error('no ifile');
 75 |     process.exit();
 76 | }
 77 | if (process.argv[3]) {
 78 |     ofile = process.argv[3]
 79 | } else {
 80 |     console.error('no ofile');
 81 |     process.exit();
 82 | }
 83 | 
 84 | if (process.argv[4]) {
 85 |     bs = process.argv[4]
 86 | }
 87 | 
 88 | dd(ifile, ofile, bs);
 89 | 
 90 | //FIXME nothing used after this
 91 | var gen = function*() {
 92 |     yield 'one';
 93 |     yield 'two';
 94 |     yield 'three';
 95 | }
 96 | let generator = gen();
 97 | let current = generator.next();
 98 | while (!current.done) {
 99 |     console.log('current value:', current.value);
100 |     current = generator.next();
101 | }
102 | 
103 | var {a, b, c} = {a: 1, b: 2, c: 3};


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
1 | Copyright 2018 Robert F. Masen
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "devDependencies": {
 3 |     "angular": "^1.5.6",
 4 |     "dexie": "^2.0.4",
 5 |     "everything.js": "^1.0.3",
 6 |     "jquery": "^3.3.1",
 7 |     "moment": "^2.22.2",
 8 |     "react": "^16.4.1",
 9 |     "react-dom": "^16.4.1",
10 |     "vue": "^2.5.16"
11 |   },
12 |   "dependencies": {
13 |     "esprima": "^4.0.1"
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/proptest-regressions/comments.txt:
--------------------------------------------------------------------------------
1 | # Seeds for failure cases proptest has generated in the past. It is
2 | # automatically read and these particular cases re-run before any
3 | # novel cases are generated.
4 | #
5 | # It is recommended to check this file in to source control so that
6 | # everyone who runs the test benefits from these saved cases.
7 | xs 823165937 3524042579 1300144645 3460888313 # shrinks to s = "//\r¡"
8 | xs 2546364303 4055620366 3887015968 2037831009 # shrinks to s = "<!----->"
9 | 


--------------------------------------------------------------------------------
/proptest-regressions/keywords.txt:
--------------------------------------------------------------------------------
 1 | # Seeds for failure cases proptest has generated in the past. It is
 2 | # automatically read and these particular cases re-run before any
 3 | # novel cases are generated.
 4 | #
 5 | # It is recommended to check this file in to source control so that
 6 | # everyone who runs the test benefits from these saved cases.
 7 | xs 1831916767 2446038119 372885449 39983890 # shrinks to s = "class"
 8 | xs 3620432093 1598325935 2776960468 1839814061 # shrinks to s = "of"
 9 | xs 715549365 4242199435 3354376143 888258416 # shrinks to s = "await"
10 | xs 1617284926 3538474885 2036666429 3668690609 # shrinks to s = "arguments"
11 | 


--------------------------------------------------------------------------------
/proptest-regressions/numeric.txt:
--------------------------------------------------------------------------------
 1 | # Seeds for failure cases proptest has generated in the past. It is
 2 | # automatically read and these particular cases re-run before any
 3 | # novel cases are generated.
 4 | #
 5 | # It is recommended to check this file in to source control so that
 6 | # everyone who runs the test benefits from these saved cases.
 7 | xs 3219093276 2773970703 4154894776 3021806892 # shrinks to s = "0O8"
 8 | xs 891558002 1584292879 3558343646 3847314476 # shrinks to s = "+0"
 9 | xs 2600770942 2224153764 1401590777 3215062306 # shrinks to s = "a"
10 | xs 1886057689 3756146993 2034981319 1909346210 # shrinks to s = "0e-0"
11 | 


--------------------------------------------------------------------------------
/proptest-regressions/punct.txt:
--------------------------------------------------------------------------------
1 | # Seeds for failure cases proptest has generated in the past. It is
2 | # automatically read and these particular cases re-run before any
3 | # novel cases are generated.
4 | #
5 | # It is recommended to check this file in to source control so that
6 | # everyone who runs the test benefits from these saved cases.
7 | xs 2545902955 1260655647 3767056268 2752144796 # shrinks to s = ""
8 | 


--------------------------------------------------------------------------------
/proptest-regressions/regex.txt:
--------------------------------------------------------------------------------
 1 | # Seeds for failure cases proptest has generated in the past. It is
 2 | # automatically read and these particular cases re-run before any
 3 | # novel cases are generated.
 4 | #
 5 | # It is recommended to check this file in to source control so that
 6 | # everyone who runs the test benefits from these saved cases.
 7 | xs 919913896 2883734430 3354605609 1554838978 # shrinks to s = "//i"
 8 | xs 3256384021 627040095 2127382704 1475438757 # shrinks to s = "*/i"
 9 | xs 2627294730 3442124659 1611555692 1486966766 # shrinks to s = "([)/i"
10 | xs 526392280 1180277379 3516634806 4260267833 # shrinks to s = "?\\/A"
11 | xs 2045013785 354495369 2786121027 1334793481 # shrinks to s = "/A/A"
12 | xs 3043448367 3896913540 2883388277 3685927156 # shrinks to s = "\\A\\/a"
13 | 


--------------------------------------------------------------------------------
/proptest-regressions/strings.txt:
--------------------------------------------------------------------------------
1 | # Seeds for failure cases proptest has generated in the past. It is
2 | # automatically read and these particular cases re-run before any
3 | # novel cases are generated.
4 | #
5 | # It is recommended to check this file in to source control so that
6 | # everyone who runs the test benefits from these saved cases.
7 | xs 2303139490 2313371466 2634842513 3600428951 # shrinks to s = "\"\\\""
8 | xs 2518380799 2909478577 3296634888 1078387965 # shrinks to s = "\"\\\""
9 | 


--------------------------------------------------------------------------------
/regex.md:
--------------------------------------------------------------------------------
  1 | # Regex Detection at Tokenization
  2 | This library implements an algorithm to detect if any give forward slash is the beginning of a regular expression literal or should be considered a single forward slash (originally developed by the [sweet.js team](https://github.com/sweet-js/sweet-core/)). This may seem like a strange thing to need to do but the ecma script spec allows for some crazy things regarding division, for example:
  3 | 
  4 | ```js
  5 | let x = {} / 100;
  6 | //x == NaN
  7 | let y = function() {} / 100;
  8 | //y == NaN
  9 | {}/1/g //this is actually a regular expression!
 10 | ```
 11 | 
 12 | While most sane JS programmers wouldn't perform the above, the fact that it is valid means that we need to look backwards to know if any forward slash might be a regular expression. Keeping a history of tokens is a bit problematic, depending on how long that history needs to be. In this case we may need to look back an arbitrary number of tokens to get the right answer, keeping all of the tokens around indefinitely is pretty expensive. Even if we were to pair down the data to an un-nested enum that would be 1 bytes per token, the library jquery has a total of `46_863` tokens which would be `~45kb`. Add to the overall size and number of allocations the fact that we would need to scan backwards an unknown distance, touching each index, makes this solution less than ideal. So how could we get the same information more efficiently? Well, let's take a look at the [sweet.js "read" algorithm](https://github.com/sweet-js/sweet-core/wiki/design).
 13 | 
 14 | Initially reading their "almost-one lookbehind" description can be slightly confusing, [they published a paper](https://users.soe.ucsc.edu/~cormac/papers/dls14a.pdf) that details a method for creating "token-trees", the paper goes into much greater detail about what a "token-tree" is but to give you the short version of how it relates to the linked psuedo-code:
 15 | 
 16 | - `{}` and `()` are considered one token but represent all of the tokens between the open and close
 17 | - `tok-#` is referring to these "token-trees" not tokens themselves
 18 |   - so in `function(n) {} /`, `tok-2` is `)` and `tok-3` is `function`
 19 | - The `isBlock` helper function also requires that any `{}` can access a possible parent `{}`
 20 |   - so in `{function() {}}` the function body start needs to be able to see the block start at the very beginning
 21 | 
 22 | With that, let's try and walk through the pseudo-code in more plain language.
 23 | 
 24 | When we find a forward slash, the first thing we need to do is look backwards 1 token. If the token 1 before the `/` is a punctuation but not `}` or `)` or a keyword but not `this`, we found a regular expression. `}` and `)` are special cases we will get into next but all other previous tokens would mean it is not a regular expression. Now we have just two cases left, first is `)`. If the token before the `/` is a `)`, we need to jump backwards to the token before the `(` that would be paired with this `)`, if that is `if`, `while`, `for`, or `with`, we found a regex otherwise not. If the token one before the `/` is `}`, we need to determine if the pair of `{` and `}` is a "block" ([see below](#is-a-block)). If the `}` isn't part of a "block", we are not at a regex, if it is a block we need to check if that block is the body of a function expression ([see below](#is-a-function-expression-body)). If the block is the body of a function expression it is not a regular expression otherwise it is a regular expression.
 25 | 
 26 | #### Is a Block
 27 | To determine if a pair of curly braces is a block we first look 1 before the `{`, if it is a `(`, `[`, an _operator_ ([see below](#punctuation-or-keyword-represents-operation)), or the keyword `case` it is not a block. If the token 1 before the `{` is the keyword `return` or `yield`, we need to compare the line number of the keyword and the `{`, if they match then it is not a block otherwise it is a block. if the token 1 before the `{` is a `:`, we need to look at the possible parent `{`. If there is a parent we run the same test on that `{`, if that is a block, this `{` is also a block, otherwise it is not a block. If the token 1 before the `{` is anything else, it is a block.  
 28 | 
 29 | #### Is a Function Expression Body
 30 | if the token 1 before the `{` is `)`, we need to look at the two tokens before the paired `(`, if either of them are the keyword `function`, we need to look 1 token before _that_. If the token one before `function` is `(`, `[`, an _operator_ ([see below](#punctuation-or-keyword-represents-operation)), or the keyword `case` or `return` the block is the body of a function expression, in all other cases it is not.
 31 | 
 32 | <details>
 33 | <summary>As a Bulleted List</summary>
 34 | 
 35 | - if the current token is a `/`, look back one token
 36 |   - if the previous token is `)`
 37 |     - check the token before it's `(`
 38 |       - if that is `if`, `while`, `for`, or `with`, we found a regex
 39 |       - else, we found a forward slash
 40 |   - if the previous token is `}`
 41 |     - we check if it is a block
 42 |       - look 1 before it's `{`
 43 |         - if that is `(` or `[` it is not a block
 44 |         - if that is `:` we look to the `{`'s parent
 45 |           - if no parent, it is a block
 46 |           - else if the parent is a block, it is a block
 47 |           - else, it is not a block
 48 |         - if that is an _operator_ ([see below](#punctuation-or-keyword-represents-operation)), it is not a block
 49 |         - if that is the keyword `return` or `yield`
 50 |           - check the line number of the open brace and one token before the open brace
 51 |             - if they match, it is not a block
 52 |             - else, it is a block
 53 |         - if that is the keyword `case`, it is not a block
 54 |         - else, it is a block
 55 |     - if it is a block
 56 |       - we look to the token behind the `{`
 57 |         - if that is a `)`
 58 |           - we check if the token 1 or 2 before the `(` is the keyword `function`, we need to check if that is an expression
 59 |             - if the token before `function` is `(`, `[`, an _operator_ ([see below](#punctuation-or-keyword-represents-operation)), or the keyword `case` or `return`, we found a forward slash
 60 |             - else, we found a regex
 61 |         - else, we found a regex
 62 |     - else, we found a forward slash
 63 |   - if the previous token is any other punctuation, we found a regex
 64 |   - if the previous token is a keyword but not `this`, we found a regex
 65 |   - else, we found a forward slash
 66 | 
 67 | </details>
 68 | 
 69 | #### _Operators_
 70 | > `=`, `+=`, `-=`, `*=`, `/=`, `%=`, `<<=`, `>>=`, `>>>=`, `&=`, `|=`, `^=`, `,`, `+`, `-`, `*`, `/`, `%`, `<<`, `>>`, `>>>`, `&`, `|`, `^`, `&&`, `||`, `?`, `:`, `instanceof`, `in`, `===`, `==`, `>=`, `<=`, `<`, `>`, `!=`, `!==`, `++`, `--`, `~`, `!`, `delete`, `void`, `typeof`, `throw`, `new`
 71 | 
 72 | With all of that in mind, let's look at an example:
 73 | 
 74 | <div style="padding-top: 5px; background:white;">
 75 |     <img src="./assets/look_behind.svg" alt="types of tokens" />
 76 | </div>
 77 | 
 78 | As you can see, each of the tokens has a type, the key describes how we think about tokens when checking for a regular expression. There are 4 types of token we care about the rest get lumped into `other`, we can refer to this set as `MetaToken`s. Because of how the detecting a block works, we need each of these to know what line it was on, so all of the `MetaToken`s will carry their line number. Looking through the above description of our algorithm, the furthest we need to look backwards from an `(` is 3 tokens, so our scanner should always keep track of the last 3 tokens we have seen.
 79 | 
 80 | You may have noticed that one of the variants of `MetaToken` is "special punctuation", this is because we need to treat `(`, `)`, `{`, and `}` in a special way.
 81 | 
 82 | Using the same example, this is what special means:
 83 | <div style="padding: 5px;background: white;">
 84 |     <img alt="special punctuation" src="./assets/special_punct.svg" />
 85 | </div>
 86 | 
 87 | Every `)` or `}` needs to point to their paired `(` or `{` and every `{` needs to point to a parent `{` if one exists. In addition both the `(` and `{` need to point to the 3 tokens before them, which might look something like this:
 88 | 
 89 | <div style="padding: 5px;background: white;">
 90 |     <img alt="opens with lookbehind" src="./assets/arc_lookbehind.svg" />
 91 | </div>
 92 | 
 93 | First we encounter the red `(`, it would need to hold the `things` ident at position 1 and `function` keyword at position 2, position 3 would be empty. Next we would encounter the orange `{`, this would hold the `)` at 1, `(` at 2 and `things` at 3. Finally we would encounter the blue `{`, this would hold the orange `{` at 1, the `)` at 2 and the red `(` at 3, it also hold the orange `{` as its _parent_.
 94 | 
 95 | This means our scanner needs to keep 3 book keeping lists. The first is the last 3 tokens when scanning the next token, as covered above. This essentially needs to act like a queue with a fixed size where the `enqueue` action would `dequeue` when full. Here is an example of how this would look for the first 4 tokens in our example.
 96 | 
 97 | ```rust
 98 | //   3          2          1
 99 | // step 1
100 | [   None,     None,    "function"]
101 | // step 2
102 | [   None,  "function",  "thing"]
103 | // step 3
104 | ["function", "thing",     "("]
105 | // step 4
106 | [  "thing",    "(",       ")"]
107 | ```
108 | 
109 | The next two are going to be one stack for opening parentheses and one for opening curly braces. They are stacks because once we find a close, we don't need that open any more. With these three book keeping constructs we can build our chain of parentheses and curly brace pairs. 
110 | 
111 | When we encounter an `(`, we attach the last three tokens to it and push that into both the last three queue and the parentheses stack. When we find a `)`, we pop the last `(` and attach it to the `)` and then push the `)` into the last three queue. When we find an `{` we attach the last 3 tokens we have seen and if the curly brace stack is not empty we attach the top of that stack to this `{` as the _parent_. With all that done we can push the `{` into both the open curly stack and the last three queue. Now when we find a `}` we can pop the open curly off it's stack and link it to the `}`, with the `{` and `}` connected we can push the `}` onto the last three queue.
112 | 
113 | With all the book keeping and linking complete, when we find any `/` we can look back at our last three elements. If the first one is a `)`, we can use the link to the open, which is holding the three tokens before it, if the first token before the `(` is one of our keywords, we know this is the start of a regular expression. 
114 | 
115 | If one before the `/` is a `}`, we first check to see if that is the end of a _block_ by following the link to its open and checking one token before that, if that token is a `:` we recursively check the _parent_ opening curly brace, otherwise we look for our special keywords or punctuation. In the event that it is a _block_, we look one before the opening curly brace, if that is `)`, we check if that is part of a function signature by following the link to the `(` and then looking for a function keyword at 1 and 2 before that, if there is a function keyword there, we look one before it to determine if that is a function expression or declaration. WHEW!
116 | 
117 | Let's take a look at what the last 3 tokens look like when we reach the `/` on line 3 in our example.
118 | 
119 | ```rust
120 | [
121 |     // 3
122 |     MetaToken::CloseParen(MetaToken::OpenParen([
123 |         None,
124 |         MetaToken::Keyword(Keyword::Function),
125 |         MetaToken::Ident,
126 |     ])),
127 |     // 2
128 |     MetaToken::OpenBrace {
129 |         look_behind: [
130 |             MetaToken::Ident,
131 |             MetaToken::OpenParen([
132 |                 None,
133 |                 MetaToken::Keyword(Keyword::Function),
134 |                 MetaToken::Ident,
135 |             ]),
136 |             MetaToken::CloseParen(MetaToken::OpenParen([
137 |                 None,
138 |                 MetaToken::Keyword(Keyword::Function),
139 |                 MetaToken::Ident,
140 |             ])),
141 |         ],
142 |         parent: None,
143 |     },
144 |     // 1
145 |     MetaToken::OpenBrace {
146 |         look_behind: [
147 |             MetaToken::OpenParen([
148 |                 None,
149 |                 MetaToken::Keyword(Keyword::Function),
150 |                 MetaToken::Ident,
151 |             ]),
152 |             MetaToken::CloseParen(MetaToken::OpenParen([
153 |                 None,
154 |                 MetaToken::Keyword(Keyword::Function),
155 |                 MetaToken::Ident,
156 |             ])),
157 |             MetaToken::OpenBrace {
158 |                 look_behind: [
159 |                     MetaToken::Ident,
160 |                     MetaToken::OpenParen([
161 |                         None,
162 |                         MetaToken::Keyword(Keyword::Function),
163 |                         MetaToken::Ident,
164 |                     ]),
165 |                     MetaToken::CloseParen(MetaToken::OpenParen([
166 |                         None,
167 |                         MetaToken::Keyword(Keyword::Function),
168 |                         MetaToken::Ident,
169 |                     ])),
170 |                 ],
171 |                 parent: None,
172 |             },
173 |         ],
174 |         parent: Some(MetaToken::OpenBrace {
175 |             look_behind: [
176 |                 MetaToken::Ident,
177 |                 MetaToken::OpenParen([
178 |                     None,
179 |                     MetaToken::Keyword(Keyword::Function),
180 |                     MetaToken::Ident,
181 |                 ]),
182 |                 MetaToken::CloseParen(MetaToken::OpenParen([
183 |                     None,
184 |                     MetaToken::Keyword(Keyword::Function),
185 |                     MetaToken::Ident,
186 |                 ])),
187 |             ],
188 |             parent: None,
189 |         }),
190 |     },
191 | ]
192 | ```
193 | 
194 | We have essentially created a list of linked lists and they can get pretty big too! This means that each time we move 3 past a `}`, we might have a lot of things to `drop` and by default rust does that in a recursive manner ([which can get expensive](https://rust-unofficial.github.io/too-many-lists/first-drop.html)). If we look at our example JS from above, there are a total of 9 tokens, and when we reach the end of this block, 8 of them are still hanging around in memory. We could try and use some of Rust's smart pointers to make sure we don't have any clones lying around come drop time but picking apart when things should be `Rc`'d and when they cannot be is a pretty challenging problem. Another solution would be to re-write the drop implementation but that just seems like it might get messy. A third option is to try and find a way to capture this information with a `Copy` type.
195 | 
196 | If we look over the logic tree above, we can gather most of the information we need when we encounter any `(`, is the token before it `if`, `while`, `for` or `with` or is the token 1 or 2 before it the keyword `function` and is that an expression? Those are really the two key pieces of information we need. What if we just attached those two booleans to the `(` instead of always attaching the last 3 tokens to it? Then when we pop the `(` off its stack, we can transfer the same two booleans to the `)`.
197 | 
198 | Now when we find an `{` we can see if it is a block, if the token before is a `)`, we can also attach the paren flags into our `{`, finally we can copy that information over to the `}` when we pop the open off the curly brace stack. While this means we need to do the computation eagerly, it also means we don't have as much to clean up when we move past a `}`. We could capture all of the information we need a in couple of `struct`s that might look like this:
199 | 
200 | ```rust
201 | struct Paren {
202 |   is_conditiona: bool,
203 |   is_func_expr: bool,
204 | }
205 | 
206 | struct Brace {
207 |   is_block: bool,
208 |   paren: Option<Paren>,
209 | }
210 | ```
211 | 
212 | With these `struct`s, the last three tokens when we reach the `/` on line 3 would look like this:
213 | 
214 | ```rust
215 | [
216 |     MetaToken::CloseParen(Paren {
217 |         is_conditional: false,
218 |         is_func_expr: false,
219 |     }),
220 |     MetaToken::OpenBrace(Brace {
221 |         is_block: true,
222 |         paren: Some(Paren {
223 |             is_conditional: false,
224 |             is_func_expr: false,
225 |         }),
226 |     }),
227 |     MetaToken::OpenBrace(Brace {
228 |         is_block: true,
229 |         paren: Some(Paren {
230 |             is_conditional: false,
231 |             is_func_expr: false,
232 |         }),
233 |     }),
234 | ]
235 | ```
236 | That is much easier to follow, keeps a lot less information around, and solves our possible recursive `drop` problem. We still need to keep around our 3 book keeping lists, though they will be a list of copy types! The `MetaToken` is now just 4 bytes!


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | newline_style = "Unix"
2 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Clone, Debug, PartialEq, Eq)]
 2 | pub struct Error {
 3 |     pub line: usize,
 4 |     pub column: usize,
 5 |     pub msg: String,
 6 |     pub idx: usize,
 7 | }
 8 | 
 9 | impl ::std::error::Error for Error {}
10 | 
11 | impl ::std::fmt::Display for Error {
12 |     fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
13 |         write!(f, "{} at {}:{}", self.msg, self.line, self.column)
14 |     }
15 | }
16 | 
17 | #[derive(Clone, Debug, PartialEq, Eq)]
18 | pub struct RawError {
19 |     pub idx: usize,
20 |     pub msg: String,
21 | }
22 | 
23 | impl ::std::error::Error for RawError {}
24 | 
25 | impl ::std::fmt::Display for RawError {
26 |     fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
27 |         write!(f, "{} at {}", self.msg, self.idx)
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/look_behind.rs:
--------------------------------------------------------------------------------
  1 | use crate::tokenizer::RawKeyword;
  2 | use crate::tokens::Punct;
  3 | use std::rc::Rc;
  4 | 
  5 | /// A 2 element buffer of
  6 | /// MetaTokens, this will use a
  7 | /// "ring buffer"-esque scheme
  8 | /// for automatically overwriting
  9 | /// any element after 2
 10 | #[derive(Clone, Debug)]
 11 | pub struct LookBehind {
 12 |     list: [Option<MetaToken>; 3],
 13 |     pointer: u8,
 14 | }
 15 | 
 16 | impl LookBehind {
 17 |     #[inline]
 18 |     pub const fn new() -> Self {
 19 |         Self {
 20 |             list: [None, None, None],
 21 |             pointer: 2, // force the first pointer value to be 0
 22 |         }
 23 |     }
 24 |     #[inline]
 25 |     pub fn push(&mut self, token: MetaToken) {
 26 |         self.pointer = wrapping_add(self.pointer, 1, 2);
 27 |         self.list[self.pointer as usize] = Some(token)
 28 |     }
 29 |     #[inline]
 30 |     pub fn one(&self) -> &Option<MetaToken> {
 31 |         &self.list[self.pointer as usize]
 32 |     }
 33 |     #[inline]
 34 |     pub fn two(&self) -> &Option<MetaToken> {
 35 |         let idx = wrapping_sub(self.pointer, 1, 2) as usize;
 36 |         &self.list[idx]
 37 |     }
 38 |     #[inline]
 39 |     pub fn three(&self) -> &Option<MetaToken> {
 40 |         let idx = wrapping_sub(self.pointer, 2, 2) as usize;
 41 |         &self.list[idx]
 42 |     }
 43 | }
 44 | 
 45 | #[inline]
 46 | pub fn wrapping_sub(lhs: u8, rhs: u8, max: u8) -> u8 {
 47 |     if lhs >= rhs {
 48 |         lhs - rhs
 49 |     } else {
 50 |         let diff = rhs - lhs;
 51 |         (max + 1) - diff
 52 |     }
 53 | }
 54 | #[inline]
 55 | pub fn wrapping_add(lhs: u8, rhs: u8, max: u8) -> u8 {
 56 |     let maybe = lhs + rhs;
 57 |     if maybe > max {
 58 |         let diff = maybe - max;
 59 |         diff.saturating_sub(1)
 60 |     } else {
 61 |         maybe
 62 |     }
 63 | }
 64 | 
 65 | /// Token classes needed for look behind
 66 | ///
 67 | /// All variants will carry their line number
 68 | ///
 69 | #[derive(Debug, Clone, Copy)]
 70 | pub enum MetaToken {
 71 |     Keyword(RawKeyword, u32),
 72 |     Punct(Punct),
 73 |     OpenParen(Paren),
 74 |     CloseParen(Paren),
 75 |     OpenBrace(Brace, u32),
 76 |     CloseBrace(Brace),
 77 |     Ident,
 78 |     Other,
 79 | }
 80 | #[derive(Debug, Clone, Copy)]
 81 | pub struct Paren {
 82 |     pub func_expr: bool,
 83 |     pub conditional: bool,
 84 | }
 85 | #[derive(Debug, Clone, Copy)]
 86 | pub struct Brace {
 87 |     pub is_block: bool,
 88 |     pub paren: Option<Paren>,
 89 | }
 90 | 
 91 | impl MetaToken {
 92 |     pub fn line_number(self) -> u32 {
 93 |         match self {
 94 |             MetaToken::Keyword(_, line) | MetaToken::OpenBrace(_, line) => line,
 95 |             _ => 0,
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | impl PartialEq for MetaToken {
101 |     fn eq(&self, other: &MetaToken) -> bool {
102 |         match (self, other) {
103 |             (MetaToken::Keyword(lhs, _), MetaToken::Keyword(rhs, _)) => lhs == rhs,
104 |             (MetaToken::Punct(lhs), MetaToken::Punct(rhs)) => lhs == rhs,
105 |             (MetaToken::Ident, MetaToken::Ident) | (MetaToken::Other, MetaToken::Other) => true,
106 |             _ => false,
107 |         }
108 |     }
109 | }
110 | 
111 | impl<T> From<(&crate::Token<T>, u32)> for MetaToken {
112 |     fn from((other, line): (&crate::Token<T>, u32)) -> Self {
113 |         match other {
114 |             crate::Token::Keyword(k) => MetaToken::Keyword(k.into(), line),
115 |             crate::Token::Punct(p) => MetaToken::Punct(*p),
116 |             crate::Token::Ident(_) => MetaToken::Ident,
117 |             _ => MetaToken::Other,
118 |         }
119 |     }
120 | }
121 | 
122 | #[derive(Debug, Clone)]
123 | pub struct OpenBrace {
124 |     pub look_behind: LookBehind,
125 |     pub parent: Option<Rc<OpenBrace>>,
126 | }
127 | 
128 | #[derive(Debug, Clone)]
129 | pub struct CloseBrace {
130 |     pub open: Rc<OpenBrace>,
131 | }
132 | 
133 | #[derive(Debug, Clone)]
134 | pub struct CloseParen {
135 |     pub open: LookBehind,
136 | }
137 | 
138 | impl std::ops::Deref for OpenBrace {
139 |     type Target = LookBehind;
140 |     fn deref(&self) -> &Self::Target {
141 |         &self.look_behind
142 |     }
143 | }
144 | 
145 | #[cfg(test)]
146 | mod test {
147 |     use super::*;
148 |     use crate::tokens::Punct;
149 | 
150 |     #[test]
151 |     fn wrapping_collection() {
152 |         let first = MetaToken::Other;
153 |         let second = MetaToken::Ident;
154 |         let third = MetaToken::Keyword(RawKeyword::Function, 1);
155 |         let fourth = MetaToken::Punct(Punct::Ampersand);
156 |         let fifth = MetaToken::Punct(Punct::Bang);
157 |         let sixth = MetaToken::Punct(Punct::Caret);
158 |         let seventh = MetaToken::Punct(Punct::Pipe);
159 |         let eighth = MetaToken::Punct(Punct::Tilde);
160 |         let mut l = LookBehind::new();
161 |         l.push(first);
162 |         test(&l, Some(first), None, None);
163 |         l.push(second);
164 |         test(&l, Some(second), Some(first), None);
165 |         l.push(third);
166 |         test(&l, Some(third), Some(second), Some(first));
167 |         l.push(fourth);
168 |         test(&l, Some(fourth), Some(third), Some(second));
169 |         l.push(fifth);
170 |         test(&l, Some(fifth), Some(fourth), Some(third));
171 |         l.push(sixth);
172 |         test(&l, Some(sixth), Some(fifth), Some(fourth));
173 |         l.push(seventh);
174 |         test(&l, Some(seventh), Some(sixth), Some(fifth));
175 |         l.push(eighth);
176 |         test(&l, Some(eighth), Some(seventh), Some(sixth));
177 |     }
178 | 
179 |     fn test(
180 |         l: &LookBehind,
181 |         first: Option<MetaToken>,
182 |         second: Option<MetaToken>,
183 |         third: Option<MetaToken>,
184 |     ) {
185 |         println!("{:?}", l);
186 |         assert_eq!(l.one(), &first, "one didn't match");
187 |         assert_eq!(l.two(), &second, "two didn't match");
188 |         assert_eq!(l.three(), &third, "three didn't match");
189 |     }
190 | 
191 |     #[test]
192 |     fn wrapping() {
193 |         assert_eq!(wrapping_sub(4, 1, 4), 3);
194 |         assert_eq!(wrapping_sub(1, 1, 4), 0);
195 |         assert_eq!(wrapping_sub(0, 1, 4), 4);
196 |         assert_eq!(wrapping_add(0, 1, 4), 1);
197 |         assert_eq!(wrapping_add(4, 1, 4), 0);
198 |         assert_eq!(wrapping_add(0, 6, 4), 1)
199 |     }
200 | }
201 | 


--------------------------------------------------------------------------------
/src/tokenizer/buffer.rs:
--------------------------------------------------------------------------------
  1 | use std::char;
  2 | #[derive(Clone)]
  3 | pub struct JSBuffer<'a> {
  4 |     pub buffer: &'a [u8],
  5 |     pub idx: usize,
  6 |     pub len: usize,
  7 | }
  8 | const CONT_MASK: u8 = 0b0011_1111;
  9 | const TAG_CONT_U8: u8 = 0b1000_0000;
 10 | /// Re-implementation of
 11 | /// the std::str::Chars logic
 12 | impl<'a> JSBuffer<'a> {
 13 |     #[inline]
 14 |     pub fn next_char(&mut self) -> Option<char> {
 15 |         if self.at_end() {
 16 |             return None;
 17 |         }
 18 |         let x = self.next_or_zero();
 19 |         if x < 128 {
 20 |             return Some(x as char);
 21 |         }
 22 | 
 23 |         // Multibyte case follows
 24 |         // Decode from a byte combination out of: [[[x y] z] w]
 25 |         // NOTE: Performance is sensitive to the exact formulation here
 26 |         let init = (x & (0x7F >> 2)) as u32;
 27 |         let y = self.next_or_zero();
 28 |         let mut ch = Self::utf8_acc_cont_byte(init, y);
 29 |         if x < 0xE0 {
 30 |             return char::from_u32(ch);
 31 |         }
 32 |         // [[x y z] w] case
 33 |         // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
 34 |         let z = self.next_or_zero();
 35 |         let y_z = Self::utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
 36 |         ch = init << 12 | y_z;
 37 |         if x < 0xF0 {
 38 |             return char::from_u32(ch);
 39 |         }
 40 |         // [x y z w] case
 41 |         // use only the lower 3 bits of `init`
 42 |         let w = self.next_or_zero();
 43 |         ch = (init & 7) << 18 | Self::utf8_acc_cont_byte(y_z, w);
 44 |         char::from_u32(ch)
 45 |     }
 46 |     #[inline]
 47 |     pub fn prev_char(&mut self) -> Option<char> {
 48 |         // Decode UTF-8
 49 |         if self.idx == 0 {
 50 |             return None;
 51 |         }
 52 |         let w = self.prev_or_zero();
 53 |         if w < 128 {
 54 |             return char::from_u32(w as u32);
 55 |         }
 56 | 
 57 |         // Multibyte case follows
 58 |         // Decode from a byte combination out of: [x [y [z w]]]
 59 |         let mut ch;
 60 |         let z = self.prev_or_zero();
 61 |         ch = Self::utf8_first_byte(z, 2);
 62 |         if Self::utf8_is_cont_byte(z) {
 63 |             let y = self.prev_or_zero();
 64 |             ch = Self::utf8_first_byte(y, 3);
 65 |             if Self::utf8_is_cont_byte(y) {
 66 |                 let x = self.prev_or_zero();
 67 |                 ch = Self::utf8_first_byte(x, 4);
 68 |                 ch = Self::utf8_acc_cont_byte(ch, y);
 69 |             }
 70 |             ch = Self::utf8_acc_cont_byte(ch, z);
 71 |         }
 72 |         ch = Self::utf8_acc_cont_byte(ch, w);
 73 | 
 74 |         char::from_u32(ch)
 75 |     }
 76 |     #[inline]
 77 |     fn next_or_zero(&mut self) -> u8 {
 78 |         if self.at_end() {
 79 |             0
 80 |         } else {
 81 |             let old = self.idx;
 82 |             self.idx += 1;
 83 |             self.buffer[old]
 84 |         }
 85 |     }
 86 |     #[inline]
 87 |     fn prev_or_zero(&mut self) -> u8 {
 88 |         if self.idx < 1 {
 89 |             return 0;
 90 |         }
 91 |         self.idx = self.idx.saturating_sub(1);
 92 |         self.buffer[self.idx]
 93 |     }
 94 |     #[inline]
 95 |     #[allow(clippy::all)]
 96 |     fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
 97 |         (ch << 6) | (byte & CONT_MASK) as u32
 98 |     }
 99 |     #[inline]
100 |     #[allow(clippy::all)]
101 |     fn utf8_first_byte(byte: u8, width: u32) -> u32 {
102 |         (byte & (0x7F >> width)) as u32
103 |     }
104 |     #[inline]
105 |     fn utf8_is_cont_byte(byte: u8) -> bool {
106 |         (byte & !CONT_MASK) == TAG_CONT_U8
107 |     }
108 | }
109 | 
110 | impl<'a> JSBuffer<'a> {
111 |     pub fn new(buffer: &'a [u8]) -> Self {
112 |         Self {
113 |             buffer,
114 |             idx: 0,
115 |             len: buffer.len(),
116 |         }
117 |     }
118 |     /// Check if the buffer is at or past the
119 |     /// end of the bytes provided
120 |     #[inline]
121 |     pub fn at_end(&self) -> bool {
122 |         self.idx >= self.len
123 |     }
124 | 
125 |     /// Check if the next few bytes match the provided bytes
126 |     #[inline]
127 |     pub fn look_ahead_matches(&self, s: &[u8]) -> bool {
128 |         let len = s.len();
129 |         let end = self.idx + len;
130 |         if end > self.len {
131 |             return false;
132 |         }
133 |         end <= self.len && &self.buffer[self.idx..end] == s
134 |     }
135 |     /// Check if the next byte matches a single byte provided
136 |     #[inline]
137 |     pub fn look_ahead_byte_matches(&self, b: u8) -> bool {
138 |         if self.at_end() {
139 |             false
140 |         } else {
141 |             self.buffer[self.idx] == b
142 |         }
143 |     }
144 | 
145 |     /// Skip the number of characters provided returning the number of bytes skipped
146 |     /// note: these are full unicode characters, not just bytes
147 |     #[inline]
148 |     pub fn skip(&mut self, count: usize) {
149 |         for _ in 0..count {
150 |             self.next_char();
151 |         }
152 |     }
153 |     #[inline]
154 |     pub fn skip_back(&mut self, count: usize) {
155 |         for _ in 0..count {
156 |             self.prev_char();
157 |         }
158 |     }
159 |     /// Skip a single byte
160 |     /// note: this can cause the buffer to become unaligned
161 |     /// be sure to always know the character you are skipping
162 |     /// is 1 byte wide or use `skip` instead when unsure
163 |     #[inline]
164 |     pub fn skip_bytes(&mut self, count: usize) {
165 |         self.idx += count;
166 |     }
167 | 
168 |     /// check if current char is a valid
169 |     /// js whitespace character
170 |     pub fn at_whitespace(&mut self) -> bool {
171 |         if self.at_end() {
172 |             return false;
173 |         }
174 |         self.buffer[self.idx] == 9 //\t
175 |             || self.buffer[self.idx] == 10 // \n
176 |             || self.buffer[self.idx] == 11 // \u{000b}
177 |             || self.buffer[self.idx] == 12 // \f
178 |             || self.buffer[self.idx] == 13 // \r
179 |             || self.buffer[self.idx] == 32 // ' '
180 |             || (self.buffer[self.idx] == 194 && self.idx + 1 < self.len && self.buffer[self.idx+1] == 160)
181 |             || (self.buffer[self.idx] >= 226 && self.buffer[self.idx] <= 239 && self.len > self.idx + 2 && {
182 |                 match &self.buffer[self.idx..self.idx+3] {
183 |                     [239, 187, 191] //"\u{feff}",
184 |                     | [226, 128, 168] //"\u{2028}",
185 |                     | [226, 128, 169] //"\u{2029}",
186 |                     | [226, 128, 128] //"\u{2000}",
187 |                     | [226, 128, 129] //"\u{2001}",
188 |                     | [226, 128, 130] //"\u{2002}",
189 |                     | [226, 128, 131] //"\u{2003}",
190 |                     | [226, 128, 132] //"\u{2004}",
191 |                     | [226, 128, 133] //"\u{2005}",
192 |                     | [226, 128, 134] //"\u{2006}",
193 |                     | [226, 128, 135] //"\u{2007}",
194 |                     | [226, 128, 136] //"\u{2008}",
195 |                     | [226, 128, 137] //"\u{2009}",
196 |                     | [226, 128, 138] //"\u{200a}",
197 |                     | [226, 128, 175] //"\u{202f}",
198 |                     | [226, 129, 159] //"\u{205f}",
199 |                     | [227, 128, 128] => true,  //"\u{3000}",
200 |                     _ => false,
201 |                 }
202 |             } )
203 |     }
204 |     /// Check of the look ahead character is
205 |     /// a valid js new line character
206 |     #[inline]
207 |     pub fn at_new_line(&mut self) -> bool {
208 |         if self.at_end() {
209 |             return false;
210 |         }
211 |         let byte = self.buffer[self.idx];
212 |         if byte < 10 {
213 |             false
214 |         } else if byte == 10 {
215 |             true
216 |         } else if byte < 13 {
217 |             false
218 |         } else if byte == 13 {
219 |             true
220 |         } else if byte < 226 {
221 |             false
222 |         } else if byte == 226 {
223 |             self.look_ahead_matches("\u{2028}".as_bytes())
224 |                 || self.look_ahead_matches("\u{2029}".as_bytes())
225 |         } else {
226 |             false
227 |         }
228 |     }
229 |     /// check if the look ahead character is `0` or `1`
230 |     #[inline]
231 |     pub fn at_binary(&self) -> bool {
232 |         if self.at_end() {
233 |             return false;
234 |         }
235 |         self.buffer[self.idx] >= b'0' && self.buffer[self.idx] <= b'1'
236 |     }
237 |     /// check if the look ahead character is a number
238 |     /// between `0` and `9`, inclusive
239 |     #[inline]
240 |     pub fn at_decimal(&self) -> bool {
241 |         if self.at_end() {
242 |             return false;
243 |         }
244 |         self.buffer[self.idx] >= b'0' && self.buffer[self.idx] <= b'9'
245 |     }
246 |     /// check if the look ahead character is a number
247 |     /// between `0` and `7`, inclusive
248 |     #[inline]
249 |     pub fn at_octal(&self) -> bool {
250 |         if self.at_end() {
251 |             return false;
252 |         }
253 |         self.buffer[self.idx] >= b'0' && self.buffer[self.idx] <= b'7'
254 |     }
255 |     /// check if the look ahead character is a number
256 |     /// between `0` and `9` or `a` and `f` or `A` and `F`, inclusive
257 |     #[inline]
258 |     pub fn at_hex(&self) -> bool {
259 |         if self.at_end() {
260 |             return false;
261 |         }
262 |         (self.buffer[self.idx] >= b'0' && self.buffer[self.idx] <= b'9')
263 |             || (self.buffer[self.idx] >= b'a' && self.buffer[self.idx] <= b'f')
264 |             || (self.buffer[self.idx] >= b'A' && self.buffer[self.idx] <= b'F')
265 |     }
266 |     /// Peek forward 1 char with out updating the
267 |     /// `idx` to this new position.
268 |     ///
269 |     /// note: this will still cost the same amount
270 |     /// of work as `next_char` but cleans up the
271 |     /// book keeping for you
272 |     #[inline]
273 |     pub fn peek_char(&mut self) -> Option<char> {
274 |         let ch = self.next_char()?;
275 |         self.skip_back_bytes(ch.len_utf8());
276 |         Some(ch)
277 |     }
278 |     /// Skip backwards a number of bytes
279 |     /// note: this can cause the buffer to become unaligned
280 |     /// be sure to always know the character you are skipping
281 |     /// is [count] bytes wide or use `skip` instead when unsure
282 |     /// the right width is skipped
283 |     #[inline]
284 |     pub fn skip_back_bytes(&mut self, count: usize) {
285 |         self.idx -= count;
286 |     }
287 | }
288 | 
289 | impl<'a> From<&'a str> for JSBuffer<'a> {
290 |     fn from(s: &'a str) -> JSBuffer {
291 |         Self::new(s.as_bytes())
292 |     }
293 | }
294 | 
295 | #[cfg(test)]
296 | mod test {
297 |     use super::*;
298 | 
299 |     #[test]
300 |     fn ascii_chars() {
301 |         let mut bytes = Vec::new();
302 |         for i in 0..=255u8 {
303 |             if i.is_ascii() {
304 |                 bytes.push(i);
305 |             }
306 |         }
307 |         let mut buf = JSBuffer::new(&bytes);
308 |         for &byte in &bytes {
309 |             let ch = buf.next_char().unwrap();
310 |             assert_eq!(ch, byte as char);
311 |         }
312 |     }
313 |     #[test]
314 |     fn non_ascii_chars() {
315 |         let mut s = String::new();
316 |         eprintln!("collecting u32 chars");
317 |         for (i, v) in (0x7FF..=0x10FFFF).enumerate() {
318 |             if let Some(ch) = char::from_u32(v) {
319 |                 s.push(ch);
320 |             }
321 |             if i % 100 == 0 {
322 |                 eprintln!("{}", (v as f32 / (0x10FFFF - 0x7FF) as f32) * 100.0);
323 |             }
324 |         }
325 |         eprintln!("creating buffer");
326 |         let mut buf = JSBuffer::new(s.as_bytes());
327 |         for (i, c1) in s.char_indices() {
328 |             let c2 = buf.next_char().unwrap();
329 |             assert_eq!(
330 |                 c1, c2,
331 |                 "failed at character {}:\n{} vs {}\n{:08b}\n{:08b}",
332 |                 i, c1 as u32, c2 as u32, c1 as u32, c2 as u32
333 |             );
334 |         }
335 |     }
336 |     #[test]
337 |     fn at_whitespace() {
338 |         let whitespaces = &[
339 |             9,  // \t
340 |             10, // \n
341 |             11, // \u{000b}
342 |             12, // \f
343 |             13, // \r
344 |             32, // ' '
345 |             194, 160, //\u{00A0}
346 |             239, 187, 191, // \u{FEFF}
347 |             226, 128, 168, // \u{2028}
348 |             226, 128, 169, // \u{2029}
349 |             226, 128, 128, // \u{2000}
350 |             226, 128, 129, // \u{2001}
351 |             226, 128, 130, // \u{2002}
352 |             226, 128, 131, // \u{2003}
353 |             226, 128, 132, // \u{2004}
354 |             226, 128, 133, // \u{2005}
355 |             226, 128, 134, // \u{2006}
356 |             226, 128, 135, // \u{2007}
357 |             226, 128, 136, // \u{2008}
358 |             226, 128, 137, // \u{2009}
359 |             226, 128, 138, // \u{200A}
360 |             226, 128, 175, // \u{202F}
361 |             226, 129, 159, // \u{205F}
362 |             227, 128, 128, // \u{3000}
363 |         ];
364 |         let mut buf = JSBuffer::new(whitespaces);
365 |         while !buf.at_end() {
366 |             assert!(
367 |                 buf.at_whitespace(),
368 |                 "buffer was not at whitespace {}",
369 |                 buf.idx
370 |             );
371 |             buf.skip(1);
372 |         }
373 |     }
374 |     #[test]
375 |     fn at_oct_number() {
376 |         let s = "012345678";
377 |         let mut buf = JSBuffer::from(s);
378 |         for _ in 0..8 {
379 |             assert!(buf.at_octal());
380 |             let _ = buf.next_char();
381 |         }
382 |         assert!(!buf.at_octal());
383 |     }
384 |     #[test]
385 |     fn at_dec_number() {
386 |         let s = "0123456789a";
387 | 
388 |         let mut buf = JSBuffer::from(s);
389 |         for _ in 0..10 {
390 |             assert!(buf.at_decimal());
391 |             let _ = buf.next_char();
392 |         }
393 |         assert!(!buf.at_decimal());
394 |     }
395 |     #[test]
396 |     fn check() {
397 |         let s = "🦜🦡🐁kł둘";
398 |         let mut b = JSBuffer::from(s);
399 |         assert!(b.next_char().unwrap() == '🦜');
400 |         assert!(b.next_char().unwrap() == '🦡');
401 |         assert!(b.next_char().unwrap() == '🐁');
402 |         assert!(b.next_char().unwrap() == 'k');
403 |         assert!(b.next_char().unwrap() == 'ł');
404 |         assert!(b.next_char().unwrap() == '둘');
405 |         assert!(b.next_char().is_none());
406 |         assert!(b.prev_char().unwrap() == '둘');
407 |         assert!(b.prev_char().unwrap() == 'ł');
408 |         assert!(b.prev_char().unwrap() == 'k');
409 |         assert!(b.prev_char().unwrap() == '🐁');
410 |         assert!(b.prev_char().unwrap() == '🦡');
411 |         assert!(b.prev_char().unwrap() == '🦜');
412 |         assert!(b.prev_char().is_none());
413 |     }
414 | 
415 |     #[test]
416 |     fn at_end() {
417 |         let js = "'things and stuff'";
418 |         let mut buf = JSBuffer::from(js);
419 |         for (i, c) in js.char_indices() {
420 |             assert!(c == buf.next_char().unwrap());
421 |             if i < js.len() - 1 {
422 |                 assert!(!buf.at_end());
423 |             }
424 |         }
425 |         assert!(buf.at_end());
426 |     }
427 | 
428 |     #[test]
429 |     fn look_ahead_matches() {
430 |         let js = r#""things and stuff""#;
431 |         let mut buf = JSBuffer::from(js);
432 |         for i in 0..js.len() {
433 |             let c = &js[i..i + 1];
434 |             assert!(buf.look_ahead_matches(c.as_bytes()));
435 |             let _ = buf.next_char();
436 |         }
437 |     }
438 | }
439 | 


--------------------------------------------------------------------------------
/src/tokenizer/keyword_trie.rs:
--------------------------------------------------------------------------------
  1 | use crate::tokenizer::{RawKeyword, RawToken, Res, Tokenizer};
  2 | 
  3 | type MaybeKeyword = Res<Option<RawToken>>;
  4 | 
  5 | impl<'a> Tokenizer<'a> {
  6 |     /// Detect if an ident is a keyword starting from and id_start
  7 |     /// character
  8 |     ///
  9 |     /// note: the expectation of the start char is that if it were a
 10 |     /// unicode escape, it would already have been parsed to its approprate
 11 |     /// character
 12 |     pub(crate) fn keyword(&mut self, start: char) -> MaybeKeyword {
 13 |         match start {
 14 |             'a' => self.a_keywords(),
 15 |             'b' => self.b_keywords(),
 16 |             'c' => self.c_keywords(),
 17 |             'd' => self.d_keywords(),
 18 |             'e' => self.e_keywords(),
 19 |             'f' => self.f_keywords(),
 20 |             'i' => self.i_keywords(),
 21 |             'l' => self.l_keywords(),
 22 |             'n' => self.n_keywords(),
 23 |             'p' => self.p_keywords(),
 24 |             'r' => self.r_keywords(),
 25 |             's' => self.s_keywords(),
 26 |             't' => self.t_keywords(),
 27 |             'v' => self.v_keywords(),
 28 |             'w' => self.w_keywords(),
 29 |             'y' => self.y_keywords(),
 30 |             _ => Ok(None),
 31 |         }
 32 |     }
 33 |     /// attempt to parse `await`
 34 |     fn a_keywords(&mut self) -> MaybeKeyword {
 35 |         self.suffix_for_token("wait", RawToken::Keyword(RawKeyword::Await))
 36 |     }
 37 |     /// attempt to parse `break`
 38 |     fn b_keywords(&mut self) -> MaybeKeyword {
 39 |         self.suffix_for_token("reak", RawToken::Keyword(RawKeyword::Break))
 40 |     }
 41 |     /// attempt to parse `case`, `catch`, `class`, `const` or
 42 |     /// `continue`
 43 |     fn c_keywords(&mut self) -> MaybeKeyword {
 44 |         if self.eat_ch_or_escaped('a')? {
 45 |             if self.eat_ch_or_escaped('s')? {
 46 |                 self.suffix_for_token("e", RawToken::Keyword(RawKeyword::Case))
 47 |             } else if self.eat_ch_or_escaped('t')? {
 48 |                 self.suffix_for_token("ch", RawToken::Keyword(RawKeyword::Catch))
 49 |             } else {
 50 |                 Ok(None)
 51 |             }
 52 |         } else if self.eat_ch_or_escaped('l')? {
 53 |             self.suffix_for_token("ass", RawToken::Keyword(RawKeyword::Class))
 54 |         } else if self.eat_ch_or_escaped('o')? && self.eat_ch_or_escaped('n')? {
 55 |             if self.eat_ch_or_escaped('s')? {
 56 |                 self.suffix_for_token("t", RawToken::Keyword(RawKeyword::Const))
 57 |             } else if self.eat_ch_or_escaped('t')? {
 58 |                 self.suffix_for_token("inue", RawToken::Keyword(RawKeyword::Continue))
 59 |             } else {
 60 |                 Ok(None)
 61 |             }
 62 |         } else {
 63 |             Ok(None)
 64 |         }
 65 |     }
 66 |     /// attempt to parse `debugger`, `default`, `delete` or `do`
 67 |     fn d_keywords(&mut self) -> MaybeKeyword {
 68 |         if self.eat_ch_or_escaped('e')? {
 69 |             if self.eat_ch_or_escaped('b')? {
 70 |                 self.suffix_for_token("ugger", RawToken::Keyword(RawKeyword::Debugger))
 71 |             } else if self.eat_ch_or_escaped('f')? {
 72 |                 self.suffix_for_token("ault", RawToken::Keyword(RawKeyword::Default))
 73 |             } else if self.eat_ch_or_escaped('l')? {
 74 |                 self.suffix_for_token("ete", RawToken::Keyword(RawKeyword::Delete))
 75 |             } else {
 76 |                 Ok(None)
 77 |             }
 78 |         } else if self.eat_ch_or_escaped('o')? && self.at_ident_end() {
 79 |             Ok(Some(RawToken::Keyword(RawKeyword::Do)))
 80 |         } else {
 81 |             Ok(None)
 82 |         }
 83 |     }
 84 |     /// attempt to parse `else`, `enum`, `export`, or `extends`
 85 |     fn e_keywords(&mut self) -> MaybeKeyword {
 86 |         if self.eat_ch_or_escaped('l')? {
 87 |             self.suffix_for_token("se", RawToken::Keyword(RawKeyword::Else))
 88 |         } else if self.eat_ch_or_escaped('n')? {
 89 |             self.suffix_for_token("um", RawToken::Keyword(RawKeyword::Enum))
 90 |         } else if self.eat_ch_or_escaped('x')? {
 91 |             if self.eat_ch_or_escaped('p')? {
 92 |                 self.suffix_for_token("ort", RawToken::Keyword(RawKeyword::Export))
 93 |             } else if self.eat_ch_or_escaped('t')? {
 94 |                 self.suffix_for_token("ends", RawToken::Keyword(RawKeyword::Extends))
 95 |             } else {
 96 |                 Ok(None)
 97 |             }
 98 |         } else {
 99 |             Ok(None)
100 |         }
101 |     }
102 |     /// attempt to parse `false`, `finally`, `for` or `function`
103 |     fn f_keywords(&mut self) -> MaybeKeyword {
104 |         if self.eat_ch_or_escaped('a')? {
105 |             self.suffix_for_token("lse", RawToken::Boolean(false))
106 |         } else if self.eat_ch_or_escaped('i')? {
107 |             self.suffix_for_token("nally", RawToken::Keyword(RawKeyword::Finally))
108 |         } else if self.eat_ch_or_escaped('o')? {
109 |             self.suffix_for_token("r", RawToken::Keyword(RawKeyword::For))
110 |         } else if self.eat_ch_or_escaped('u')? {
111 |             self.suffix_for_token("nction", RawToken::Keyword(RawKeyword::Function))
112 |         } else {
113 |             Ok(None)
114 |         }
115 |     }
116 |     /// attempt to parse `if`, `implements`, `import`, `in`, `instanceof`,
117 |     /// or `interface`
118 |     fn i_keywords(&mut self) -> MaybeKeyword {
119 |         if self.eat_ch_or_escaped('f')? && self.at_ident_end() {
120 |             Ok(Some(RawToken::Keyword(RawKeyword::If)))
121 |         } else if self.eat_ch_or_escaped('m')? && self.eat_ch_or_escaped('p')? {
122 |             if self.eat_ch_or_escaped('l')? {
123 |                 self.suffix_for_token("ements", RawToken::Keyword(RawKeyword::Implements))
124 |             } else if self.eat_ch_or_escaped('o')? {
125 |                 self.suffix_for_token("rt", RawToken::Keyword(RawKeyword::Import))
126 |             } else {
127 |                 Ok(None)
128 |             }
129 |         } else if self.eat_ch_or_escaped('n')? {
130 |             if self.eat_ch_or_escaped('s')? {
131 |                 self.suffix_for_token("tanceof", RawToken::Keyword(RawKeyword::InstanceOf))
132 |             } else if self.eat_ch_or_escaped('t')? {
133 |                 self.suffix_for_token("erface", RawToken::Keyword(RawKeyword::Interface))
134 |             } else if self.at_ident_end() {
135 |                 Ok(Some(RawToken::Keyword(RawKeyword::In)))
136 |             } else {
137 |                 Ok(None)
138 |             }
139 |         } else {
140 |             Ok(None)
141 |         }
142 |     }
143 |     /// attempt to parse `let`
144 |     fn l_keywords(&mut self) -> MaybeKeyword {
145 |         self.suffix_for_token("et", RawToken::Keyword(RawKeyword::Let))
146 |     }
147 |     /// attempt to parse `new` or `null`
148 |     fn n_keywords(&mut self) -> MaybeKeyword {
149 |         if self.eat_ch_or_escaped('e')? {
150 |             self.suffix_for_token("w", RawToken::Keyword(RawKeyword::New))
151 |         } else if self.eat_ch_or_escaped('u')? {
152 |             self.suffix_for_token("ll", RawToken::Null)
153 |         } else {
154 |             Ok(None)
155 |         }
156 |     }
157 |     /// attempt to parse `package`, `private`, `protected`, or
158 |     /// `public`,
159 |     fn p_keywords(&mut self) -> MaybeKeyword {
160 |         if self.eat_ch_or_escaped('a')? {
161 |             self.suffix_for_token("ckage", RawToken::Keyword(RawKeyword::Package))
162 |         } else if self.eat_ch_or_escaped('r')? {
163 |             if self.eat_ch_or_escaped('i')? {
164 |                 self.suffix_for_token("vate", RawToken::Keyword(RawKeyword::Private))
165 |             } else if self.eat_ch_or_escaped('o')? {
166 |                 self.suffix_for_token("tected", RawToken::Keyword(RawKeyword::Protected))
167 |             } else {
168 |                 Ok(None)
169 |             }
170 |         } else if self.eat_ch_or_escaped('u')? {
171 |             self.suffix_for_token("blic", RawToken::Keyword(RawKeyword::Public))
172 |         } else {
173 |             Ok(None)
174 |         }
175 |     }
176 | 
177 |     fn r_keywords(&mut self) -> MaybeKeyword {
178 |         self.suffix_for_token("eturn", RawToken::Keyword(RawKeyword::Return))
179 |     }
180 |     /// attempt to parse `static`, `super`, or `switch`
181 |     fn s_keywords(&mut self) -> MaybeKeyword {
182 |         if self.eat_ch_or_escaped('t')? {
183 |             self.suffix_for_token("atic", RawToken::Keyword(RawKeyword::Static))
184 |         } else if self.eat_ch_or_escaped('u')? {
185 |             self.suffix_for_token("per", RawToken::Keyword(RawKeyword::Super))
186 |         } else if self.eat_ch_or_escaped('w')? {
187 |             self.suffix_for_token("itch", RawToken::Keyword(RawKeyword::Switch))
188 |         } else {
189 |             Ok(None)
190 |         }
191 |     }
192 |     /// attempt to parse `this`, `throw`, `true`,
193 |     /// `try`, or `typeof`
194 |     fn t_keywords(&mut self) -> MaybeKeyword {
195 |         if self.eat_ch_or_escaped('h')? {
196 |             if self.eat_ch_or_escaped('i')? {
197 |                 self.suffix_for_token("s", RawToken::Keyword(RawKeyword::This))
198 |             } else if self.eat_ch_or_escaped('r')? {
199 |                 self.suffix_for_token("ow", RawToken::Keyword(RawKeyword::Throw))
200 |             } else {
201 |                 Ok(None)
202 |             }
203 |         } else if self.eat_ch_or_escaped('r')? {
204 |             if self.eat_ch_or_escaped('u')? {
205 |                 self.suffix_for_token("e", RawToken::Boolean(true))
206 |             } else if self.eat_ch_or_escaped('y')? && self.at_ident_end() {
207 |                 Ok(Some(RawToken::Keyword(RawKeyword::Try)))
208 |             } else {
209 |                 Ok(None)
210 |             }
211 |         } else if self.eat_ch_or_escaped('y')? {
212 |             self.suffix_for_token("peof", RawToken::Keyword(RawKeyword::TypeOf))
213 |         } else {
214 |             Ok(None)
215 |         }
216 |     }
217 |     /// ttempt to parse `var` or `void`,
218 |     fn v_keywords(&mut self) -> MaybeKeyword {
219 |         if self.eat_ch_or_escaped('a')? {
220 |             self.suffix_for_token("r", RawToken::Keyword(RawKeyword::Var))
221 |         } else if self.eat_ch_or_escaped('o')? {
222 |             self.suffix_for_token("id", RawToken::Keyword(RawKeyword::Void))
223 |         } else {
224 |             Ok(None)
225 |         }
226 |     }
227 |     /// attempt to parse `while` or `with`
228 |     fn w_keywords(&mut self) -> MaybeKeyword {
229 |         if self.eat_ch_or_escaped('h')? {
230 |             self.suffix_for_token("ile", RawToken::Keyword(RawKeyword::While))
231 |         } else if self.eat_ch_or_escaped('i')? {
232 |             self.suffix_for_token("th", RawToken::Keyword(RawKeyword::With))
233 |         } else {
234 |             Ok(None)
235 |         }
236 |     }
237 |     /// attempt to parse `yield`
238 |     fn y_keywords(&mut self) -> MaybeKeyword {
239 |         self.suffix_for_token("ield", RawToken::Keyword(RawKeyword::Yield))
240 |     }
241 |     /// This will attempt to consumer the suffix, if successful and
242 |     /// the stream is at the end of an identifier, it will return
243 |     /// the `tok` provided
244 |     ///
245 |     /// This is useful for when we have reached a leaf on a trie
246 |     fn suffix_for_token(&mut self, suffix: &str, tok: RawToken) -> MaybeKeyword {
247 |         if self.eat_chs_or_escaped(suffix)? {
248 |             if self.at_ident_end() {
249 |                 Ok(Some(tok))
250 |             } else {
251 |                 Ok(None)
252 |             }
253 |         } else {
254 |             Ok(None)
255 |         }
256 |     }
257 |     /// Test if the stream has moved past the end of an identifier
258 |     fn at_ident_end(&mut self) -> bool {
259 |         if self.look_ahead_matches(r"\u") {
260 |             false
261 |         } else if let Some(c) = self.stream.next_char() {
262 |             if !Self::is_id_continue(c) && c != '\u{200C}' && c != '\u{200D}' {
263 |                 let _ = self.stream.prev_char();
264 |                 true
265 |             } else {
266 |                 false
267 |             }
268 |         } else {
269 |             true
270 |         }
271 |     }
272 |     /// If the characters in the provided &str matche the look ahead _bytes_
273 |     /// or a unicode escape of the characters, it will move
274 |     /// the stream's index forward to the approrate position
275 |     /// it will stop moving forward after at the first failed
276 |     /// match (this means it will consume any leading positive matches)
277 |     ///
278 |     /// note: the character provided must be an ascii character
279 |     /// to get a positive match
280 |     fn eat_chs_or_escaped(&mut self, chars: &str) -> Res<bool> {
281 |         for c in chars.chars() {
282 |             if !self.eat_ch_or_escaped(c)? {
283 |                 return Ok(false);
284 |             }
285 |         }
286 |         Ok(true)
287 |     }
288 |     /// If the character provided matches the look ahead _byte_
289 |     /// or a unicode escape of the character, it will move
290 |     /// the stream's index forward to the approrate position
291 |     ///
292 |     /// note: the character provided must be an ascii character
293 |     /// to get a positive match
294 |     pub(crate) fn eat_ch_or_escaped(&mut self, ch: char) -> Res<bool> {
295 |         debug_assert!(
296 |             ch.len_utf8() == 1,
297 |             "cannot use eat_ch_or_escaped with characters larger than 1 byte wide"
298 |         );
299 |         Ok(if self.look_ahead_byte_matches(ch) {
300 |             self.stream.skip_bytes(1);
301 |             true
302 |         } else if self.look_ahead_matches("\\u") {
303 |             let start = self.stream.idx;
304 |             self.stream.skip_bytes(1); // skip the slash only
305 |             let c = self.escaped_ident_part()?;
306 |             if c != ch {
307 |                 self.stream.idx = start;
308 |                 false
309 |             } else {
310 |                 true
311 |             }
312 |         } else {
313 |             false
314 |         })
315 |     }
316 | }
317 | 
318 | #[cfg(test)]
319 | mod test {
320 |     use super::*;
321 | 
322 |     #[test]
323 |     fn keyword_await() {
324 |         test_with_escapes("await", RawToken::Keyword(RawKeyword::Await));
325 |     }
326 | 
327 |     #[test]
328 |     fn keyword_break() {
329 |         test_with_escapes("break", RawToken::Keyword(RawKeyword::Break));
330 |     }
331 | 
332 |     #[test]
333 |     fn keyword_case() {
334 |         test_with_escapes("case", RawToken::Keyword(RawKeyword::Case));
335 |     }
336 | 
337 |     #[test]
338 |     fn keyword_catch() {
339 |         test_with_escapes("catch", RawToken::Keyword(RawKeyword::Catch));
340 |     }
341 |     #[test]
342 |     fn keyword_const() {
343 |         test_with_escapes("const", RawToken::Keyword(RawKeyword::Const));
344 |     }
345 |     #[test]
346 |     fn keyword_continue() {
347 |         test_with_escapes("continue", RawToken::Keyword(RawKeyword::Continue));
348 |     }
349 |     #[test]
350 |     fn keyword_class() {
351 |         test_with_escapes("class", RawToken::Keyword(RawKeyword::Class));
352 |     }
353 |     #[test]
354 |     fn keyword_debugger() {
355 |         test_with_escapes("debugger", RawToken::Keyword(RawKeyword::Debugger));
356 |     }
357 |     #[test]
358 |     fn keyword_default() {
359 |         test_with_escapes("default", RawToken::Keyword(RawKeyword::Default));
360 |     }
361 |     #[test]
362 |     fn keyword_delete() {
363 |         test_with_escapes("delete", RawToken::Keyword(RawKeyword::Delete));
364 |     }
365 |     #[test]
366 |     fn keyword_do() {
367 |         test_with_escapes("do", RawToken::Keyword(RawKeyword::Do));
368 |     }
369 | 
370 |     #[test]
371 |     fn keyword_else() {
372 |         test_with_escapes("else", RawToken::Keyword(RawKeyword::Else));
373 |     }
374 |     #[test]
375 |     fn keyword_enum() {
376 |         test_with_escapes("enum", RawToken::Keyword(RawKeyword::Enum));
377 |     }
378 |     #[test]
379 |     fn keyword_export() {
380 |         test_with_escapes("export", RawToken::Keyword(RawKeyword::Export));
381 |     }
382 |     #[test]
383 |     fn keyword_extends() {
384 |         test_with_escapes("extends", RawToken::Keyword(RawKeyword::Extends));
385 |     }
386 |     #[test]
387 |     fn keyword_false() {
388 |         test_with_escapes("false", RawToken::Boolean(false));
389 |     }
390 |     #[test]
391 |     fn keyword_finally() {
392 |         test_with_escapes("finally", RawToken::Keyword(RawKeyword::Finally));
393 |     }
394 |     #[test]
395 |     fn keyword_for() {
396 |         test_with_escapes("for", RawToken::Keyword(RawKeyword::For));
397 |     }
398 |     #[test]
399 |     fn keyword_function() {
400 |         test_with_escapes("function", RawToken::Keyword(RawKeyword::Function));
401 |     }
402 |     #[test]
403 |     fn keyword_if() {
404 |         test_with_escapes("if", RawToken::Keyword(RawKeyword::If));
405 |     }
406 |     #[test]
407 |     fn keyword_implements() {
408 |         test_with_escapes("implements", RawToken::Keyword(RawKeyword::Implements));
409 |     }
410 |     #[test]
411 |     fn keyword_import() {
412 |         test_with_escapes("import", RawToken::Keyword(RawKeyword::Import));
413 |     }
414 |     #[test]
415 |     fn keyword_in() {
416 |         test_with_escapes("in", RawToken::Keyword(RawKeyword::In));
417 |     }
418 |     #[test]
419 |     fn keyword_instance_of() {
420 |         test_with_escapes("instanceof", RawToken::Keyword(RawKeyword::InstanceOf));
421 |     }
422 |     #[test]
423 |     fn keyword_interface() {
424 |         test_with_escapes("interface", RawToken::Keyword(RawKeyword::Interface));
425 |     }
426 |     #[test]
427 |     fn keyword_let() {
428 |         test_with_escapes("let", RawToken::Keyword(RawKeyword::Let));
429 |     }
430 |     #[test]
431 |     fn keyword_new() {
432 |         test_with_escapes("new", RawToken::Keyword(RawKeyword::New));
433 |     }
434 |     #[test]
435 |     fn keyword_null() {
436 |         test_with_escapes("null", RawToken::Null);
437 |     }
438 |     #[test]
439 |     fn keyword_package() {
440 |         test_with_escapes("package", RawToken::Keyword(RawKeyword::Package));
441 |     }
442 |     #[test]
443 |     fn keyword_private() {
444 |         test_with_escapes("private", RawToken::Keyword(RawKeyword::Private));
445 |     }
446 |     #[test]
447 |     fn keyword_protected() {
448 |         test_with_escapes("protected", RawToken::Keyword(RawKeyword::Protected));
449 |     }
450 |     #[test]
451 |     fn keyword_public() {
452 |         test_with_escapes("public", RawToken::Keyword(RawKeyword::Public));
453 |     }
454 |     #[test]
455 |     fn keyword_return() {
456 |         test_with_escapes("return", RawToken::Keyword(RawKeyword::Return));
457 |     }
458 |     #[test]
459 |     fn keyword_static() {
460 |         test_with_escapes("static", RawToken::Keyword(RawKeyword::Static));
461 |     }
462 |     #[test]
463 |     fn keyword_super() {
464 |         test_with_escapes("super", RawToken::Keyword(RawKeyword::Super));
465 |     }
466 |     #[test]
467 |     fn keyword_switch() {
468 |         test_with_escapes("switch", RawToken::Keyword(RawKeyword::Switch));
469 |     }
470 |     #[test]
471 |     fn keyword_this() {
472 |         test_with_escapes("this", RawToken::Keyword(RawKeyword::This));
473 |     }
474 |     #[test]
475 |     fn keyword_throw() {
476 |         test_with_escapes("throw", RawToken::Keyword(RawKeyword::Throw));
477 |     }
478 |     #[test]
479 |     fn keyword_true() {
480 |         test_with_escapes("true", RawToken::Boolean(true));
481 |     }
482 |     #[test]
483 |     fn keyword_try() {
484 |         test_with_escapes("try", RawToken::Keyword(RawKeyword::Try));
485 |     }
486 |     #[test]
487 |     fn keyword_type_of() {
488 |         test_with_escapes("typeof", RawToken::Keyword(RawKeyword::TypeOf));
489 |     }
490 |     #[test]
491 |     fn keyword_var() {
492 |         test_with_escapes("var", RawToken::Keyword(RawKeyword::Var));
493 |     }
494 |     #[test]
495 |     fn keyword_void() {
496 |         test_with_escapes("void", RawToken::Keyword(RawKeyword::Void));
497 |     }
498 |     #[test]
499 |     fn keyword_while() {
500 |         test_with_escapes("while", RawToken::Keyword(RawKeyword::While));
501 |     }
502 |     #[test]
503 |     fn keyword_with() {
504 |         test_with_escapes("with", RawToken::Keyword(RawKeyword::With));
505 |     }
506 |     #[test]
507 |     fn keyword_yield() {
508 |         test_with_escapes("yield", RawToken::Keyword(RawKeyword::Yield));
509 |     }
510 | 
511 |     fn test_with_escapes(k: &str, expect: RawToken) {
512 |         let start = k.chars().next().expect("empty keyword");
513 |         let first = test_keyword(start, k)
514 |             .unwrap_or_else(|e| panic!("failed to parse {}: {}", k, e))
515 |             .unwrap_or_else(|| panic!("failed to parse {}", k));
516 |         assert_eq!(first, expect);
517 |         let mut escape_char_code;
518 |         let mut escape_code_points;
519 |         for i in 0..k.chars().count() {
520 |             escape_char_code = String::new();
521 |             escape_code_points = String::new();
522 |             for (j, c) in k.chars().enumerate() {
523 |                 if j == i {
524 |                     escape_char_code.push_str(&format!(r#"\u{:04X}"#, c as u8));
525 |                     escape_code_points.push_str(&format!(r#"\u{{{:06X}}}"#, c as u8));
526 |                 } else {
527 |                     escape_char_code.push(c);
528 |                     escape_code_points.push(c);
529 |                 }
530 |             }
531 |             let second = test_keyword(start, &escape_char_code)
532 |                 .unwrap_or_else(|e| {
533 |                     panic!(
534 |                         "failed to parse escaped keyword {}: {}",
535 |                         escape_char_code, e
536 |                     )
537 |                 })
538 |                 .unwrap_or_else(|| panic!("failed to parse escaped keyword {}", escape_char_code));
539 |             assert_eq!(
540 |                 second, expect,
541 |                 "{} doesn't match expected keyword",
542 |                 escape_char_code
543 |             );
544 |             let third = test_keyword(start, &escape_code_points)
545 |                 .unwrap_or_else(|e| {
546 |                     panic!(
547 |                         "failed to parse escaped keyword {}: {}",
548 |                         escape_code_points, e
549 |                     )
550 |                 })
551 |                 .unwrap_or_else(|| {
552 |                     panic!("failed to parse escaped keyword {}", escape_code_points)
553 |                 });
554 |             assert_eq!(
555 |                 third, expect,
556 |                 "{} doesn't match expected keyword",
557 |                 escape_code_points
558 |             );
559 |         }
560 |         let not = format!("{}_not", k);
561 |         assert_eq!(
562 |             test_keyword(start, &not)
563 |                 .unwrap_or_else(|e| panic!("Failed to parse not keyword {}: {}", not, e)),
564 |             None
565 |         );
566 |     }
567 | 
568 |     fn test_keyword(start: char, k: &str) -> MaybeKeyword {
569 |         dbg!(start);
570 |         dbg!(k);
571 |         let mut t = Tokenizer::new(k);
572 |         assert!(
573 |             t.eat_ch_or_escaped(start)?,
574 |             "start didn't match first character {}, {}",
575 |             start,
576 |             k
577 |         );
578 |         t.keyword(start)
579 |     }
580 | }
581 | 


--------------------------------------------------------------------------------
/src/tokenizer/tokens.rs:
--------------------------------------------------------------------------------
  1 | use crate::tokens::{CommentKind, Keyword, NumberKind, Punct};
  2 | 
  3 | #[derive(PartialEq, Eq, Debug, Clone, Copy)]
  4 | pub enum RawToken {
  5 |     /// `true` of `false`
  6 |     Boolean(bool),
  7 |     /// The end of the file
  8 |     EoF,
  9 |     /// An identifier this will be either a variable name
 10 |     /// or a function/method name
 11 |     Ident,
 12 |     /// A word that has been reserved to not be used as an identifier
 13 |     Keyword(RawKeyword),
 14 |     /// A `null` literal value
 15 |     Null,
 16 |     /// A number, this includes integers (`1`), decimals (`0.1`),
 17 |     /// hex (`0x8f`), binary (`0b010011010`), and octal (`0o273`)
 18 |     Number(NumberKind),
 19 |     /// A punctuation mark, this includes all mathematical operators
 20 |     /// logical operators and general syntax punctuation
 21 |     Punct(Punct),
 22 |     /// A string literal, either double or single quoted, the associated
 23 |     /// value will be the unquoted string
 24 |     String {
 25 |         kind: StringKind,
 26 |         new_line_count: usize,
 27 |         last_len: usize,
 28 |         found_octal_escape: bool,
 29 |     },
 30 |     /// A regular expression literal.
 31 |     /// ```js
 32 |     /// let regex = /[a-zA-Z]+/g;
 33 |     /// ```
 34 |     RegEx(usize),
 35 |     /// The string parts of a template string
 36 |     /// ```js
 37 |     ///    `things and stuff times ${10}`
 38 |     /// //  ^^^^^^^^^^^^^^^^^^^^^^      ^
 39 |     /// ```
 40 |     Template {
 41 |         kind: TemplateKind,
 42 |         new_line_count: usize,
 43 |         last_len: usize,
 44 |         has_octal_escape: bool,
 45 |         found_invalid_unicode_escape: bool,
 46 |         found_invalid_hex_escape: bool,
 47 |     },
 48 |     /// A comment, the associated value will contain the raw comment
 49 |     /// This will capture both inline comments `// I am an inline comment`
 50 |     /// and multi-line comments
 51 |     /// ```js
 52 |     /// /*multi lines
 53 |     /// * comments
 54 |     /// */
 55 |     /// ```
 56 |     Comment {
 57 |         kind: CommentKind,
 58 |         new_line_count: usize,
 59 |         last_len: usize,
 60 |         end_index: usize,
 61 |     },
 62 | }
 63 | 
 64 | impl Copy for Keyword<()> {}
 65 | 
 66 | impl RawToken {
 67 |     pub fn is_punct(&self) -> bool {
 68 |         matches!(self, RawToken::Punct(_))
 69 |     }
 70 | 
 71 |     pub fn is_comment(&self) -> bool {
 72 |         matches!(self, RawToken::Comment { .. })
 73 |     }
 74 |     pub fn is_div_punct(&self) -> bool {
 75 |         matches!(
 76 |             self,
 77 |             RawToken::Punct(Punct::ForwardSlash | Punct::ForwardSlashEqual)
 78 |         )
 79 |     }
 80 | }
 81 | 
 82 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 83 | pub enum StringKind {
 84 |     Double,
 85 |     Single,
 86 | }
 87 | 
 88 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 89 | pub enum TemplateKind {
 90 |     NoSub,
 91 |     Head,
 92 |     Body,
 93 |     Tail,
 94 | }
 95 | #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 96 | pub enum RawKeyword {
 97 |     Await,
 98 |     Break,
 99 |     Case,
100 |     Catch,
101 |     Class,
102 |     Const,
103 |     Continue,
104 |     Debugger,
105 |     Default,
106 |     Delete,
107 |     Do,
108 |     Else,
109 |     Enum,
110 |     Export,
111 |     Extends,
112 |     Finally,
113 |     For,
114 |     Function,
115 |     If,
116 |     Implements,
117 |     Import,
118 |     In,
119 |     InstanceOf,
120 |     Interface,
121 |     Let,
122 |     New,
123 |     Package,
124 |     Private,
125 |     Protected,
126 |     Public,
127 |     Return,
128 |     Static,
129 |     Super,
130 |     Switch,
131 |     This,
132 |     Throw,
133 |     Try,
134 |     TypeOf,
135 |     Var,
136 |     Void,
137 |     While,
138 |     With,
139 |     Yield,
140 | }
141 | 
142 | impl RawKeyword {
143 |     pub fn with_str(self, s: &str) -> crate::tokens::Keyword<&str> {
144 |         match self {
145 |             RawKeyword::Await => Keyword::Await(s),
146 |             RawKeyword::Break => Keyword::Break(s),
147 |             RawKeyword::Case => Keyword::Case(s),
148 |             RawKeyword::Catch => Keyword::Catch(s),
149 |             RawKeyword::Class => Keyword::Class(s),
150 |             RawKeyword::Const => Keyword::Const(s),
151 |             RawKeyword::Continue => Keyword::Continue(s),
152 |             RawKeyword::Debugger => Keyword::Debugger(s),
153 |             RawKeyword::Default => Keyword::Default(s),
154 |             RawKeyword::Delete => Keyword::Delete(s),
155 |             RawKeyword::Do => Keyword::Do(s),
156 |             RawKeyword::Else => Keyword::Else(s),
157 |             RawKeyword::Enum => Keyword::Enum(s),
158 |             RawKeyword::Export => Keyword::Export(s),
159 |             RawKeyword::Extends => Keyword::Extends(s),
160 |             RawKeyword::Finally => Keyword::Finally(s),
161 |             RawKeyword::For => Keyword::For(s),
162 |             RawKeyword::Function => Keyword::Function(s),
163 |             RawKeyword::If => Keyword::If(s),
164 |             RawKeyword::Implements => Keyword::Implements(s),
165 |             RawKeyword::Import => Keyword::Import(s),
166 |             RawKeyword::In => Keyword::In(s),
167 |             RawKeyword::InstanceOf => Keyword::InstanceOf(s),
168 |             RawKeyword::Interface => Keyword::Interface(s),
169 |             RawKeyword::Let => Keyword::Let(s),
170 |             RawKeyword::New => Keyword::New(s),
171 |             RawKeyword::Package => Keyword::Package(s),
172 |             RawKeyword::Private => Keyword::Private(s),
173 |             RawKeyword::Protected => Keyword::Protected(s),
174 |             RawKeyword::Public => Keyword::Public(s),
175 |             RawKeyword::Return => Keyword::Return(s),
176 |             RawKeyword::Static => Keyword::Static(s),
177 |             RawKeyword::Super => Keyword::Super(s),
178 |             RawKeyword::Switch => Keyword::Switch(s),
179 |             RawKeyword::This => Keyword::This(s),
180 |             RawKeyword::Throw => Keyword::Throw(s),
181 |             RawKeyword::Try => Keyword::Try(s),
182 |             RawKeyword::TypeOf => Keyword::TypeOf(s),
183 |             RawKeyword::Var => Keyword::Var(s),
184 |             RawKeyword::Void => Keyword::Void(s),
185 |             RawKeyword::While => Keyword::While(s),
186 |             RawKeyword::With => Keyword::With(s),
187 |             RawKeyword::Yield => Keyword::Yield(s),
188 |         }
189 |     }
190 | }
191 | 
192 | impl<T> From<&Keyword<T>> for RawKeyword {
193 |     fn from(k: &Keyword<T>) -> Self {
194 |         match k {
195 |             Keyword::Await(_) => RawKeyword::Await,
196 |             Keyword::Break(_) => RawKeyword::Break,
197 |             Keyword::Case(_) => RawKeyword::Case,
198 |             Keyword::Catch(_) => RawKeyword::Catch,
199 |             Keyword::Class(_) => RawKeyword::Class,
200 |             Keyword::Const(_) => RawKeyword::Const,
201 |             Keyword::Continue(_) => RawKeyword::Continue,
202 |             Keyword::Debugger(_) => RawKeyword::Debugger,
203 |             Keyword::Default(_) => RawKeyword::Default,
204 |             Keyword::Delete(_) => RawKeyword::Delete,
205 |             Keyword::Do(_) => RawKeyword::Do,
206 |             Keyword::Else(_) => RawKeyword::Else,
207 |             Keyword::Enum(_) => RawKeyword::Enum,
208 |             Keyword::Export(_) => RawKeyword::Export,
209 |             Keyword::Extends(_) => RawKeyword::Extends,
210 |             Keyword::Finally(_) => RawKeyword::Finally,
211 |             Keyword::For(_) => RawKeyword::For,
212 |             Keyword::Function(_) => RawKeyword::Function,
213 |             Keyword::If(_) => RawKeyword::If,
214 |             Keyword::Implements(_) => RawKeyword::Implements,
215 |             Keyword::Import(_) => RawKeyword::Import,
216 |             Keyword::In(_) => RawKeyword::In,
217 |             Keyword::InstanceOf(_) => RawKeyword::InstanceOf,
218 |             Keyword::Interface(_) => RawKeyword::Interface,
219 |             Keyword::Let(_) => RawKeyword::Let,
220 |             Keyword::New(_) => RawKeyword::New,
221 |             Keyword::Package(_) => RawKeyword::Package,
222 |             Keyword::Private(_) => RawKeyword::Private,
223 |             Keyword::Protected(_) => RawKeyword::Protected,
224 |             Keyword::Public(_) => RawKeyword::Public,
225 |             Keyword::Return(_) => RawKeyword::Return,
226 |             Keyword::Static(_) => RawKeyword::Static,
227 |             Keyword::Super(_) => RawKeyword::Super,
228 |             Keyword::Switch(_) => RawKeyword::Switch,
229 |             Keyword::This(_) => RawKeyword::This,
230 |             Keyword::Throw(_) => RawKeyword::Throw,
231 |             Keyword::Try(_) => RawKeyword::Try,
232 |             Keyword::TypeOf(_) => RawKeyword::TypeOf,
233 |             Keyword::Var(_) => RawKeyword::Var,
234 |             Keyword::Void(_) => RawKeyword::Void,
235 |             Keyword::While(_) => RawKeyword::While,
236 |             Keyword::With(_) => RawKeyword::With,
237 |             Keyword::Yield(_) => RawKeyword::Yield,
238 |         }
239 |     }
240 | }
241 | 


--------------------------------------------------------------------------------
/src/tokenizer/unicode.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::all)]
 2 | use unicode_xid::UnicodeXID;
 3 | 
 4 | /// wrap the `unic_ucd_ident`'s function
 5 | /// first short-circuiting around the ascii
 6 | /// and other non `CJK` characters
 7 | #[inline]
 8 | pub(crate) fn is_id_start(c: char) -> bool {
 9 |     if c >= 'a' && c <= 'z' {
10 |         true
11 |     } else if c >= 'A' && c <= 'Z' {
12 |         true
13 |     } else if c == '\\' || c == '_' || c == '$' {
14 |         true
15 |     } else if c < '\u{AA}' {
16 |         false
17 |     } else if c == '\u{2118}'
18 |         || c == '\u{212E}'
19 |         || c == '\u{309B}'
20 |         || c == '\u{309C}'
21 |         || c == '\u{1885}'
22 |         || c == '\u{1886}'
23 |     {
24 |         true
25 |     } else {
26 |         UnicodeXID::is_xid_start(c)
27 |     }
28 | }
29 | /// wrap the `unic_ucd_ident`'s function
30 | /// first short-circuiting around the ascii
31 | /// and other non `CJK` characters
32 | #[inline]
33 | pub(crate) fn is_id_continue(c: char) -> bool {
34 |     if c >= 'a' && c <= 'z' {
35 |         true
36 |     } else if c >= 'A' && c <= 'Z' {
37 |         true
38 |     } else if c >= '0' && c <= '9' {
39 |         true
40 |     } else if c == '\\' || c == '_' || c == '$' {
41 |         true
42 |     } else if c < '\u{AA}' {
43 |         false
44 |     } else if c == '\u{200C}'
45 |         || c == '\u{200D}'
46 |         || c == '\u{2118}'
47 |         || c == '\u{212E}'
48 |         || c == '\u{309B}'
49 |         || c == '\u{309C}'
50 |         || c == '\u{1885}'
51 |         || c == '\u{1886}'
52 |         || c == '\u{1369}'
53 |         || c == '\u{136A}'
54 |         || c == '\u{136B}'
55 |         || c == '\u{136C}'
56 |         || c == '\u{136D}'
57 |         || c == '\u{136E}'
58 |         || c == '\u{136F}'
59 |         || c == '\u{1370}'
60 |         || c == '\u{1371}'
61 |         || c == '\u{B7}'
62 |         || c == '\u{387}'
63 |         || c == '\u{19DA}'
64 |     {
65 |         true
66 |     } else {
67 |         UnicodeXID::is_xid_continue(c)
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/tokens/boolean.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 2 | /// The tokenized representation of `true` or `false`
 3 | pub enum Boolean {
 4 |     True,
 5 |     False,
 6 | }
 7 | impl PartialEq<bool> for Boolean {
 8 |     fn eq(&self, other: &bool) -> bool {
 9 |         matches!(
10 |             (self, other),
11 |             (Boolean::True, true) | (Boolean::False, false)
12 |         )
13 |     }
14 | }
15 | impl PartialEq<str> for Boolean {
16 |     fn eq(&self, other: &str) -> bool {
17 |         matches!(
18 |             (self, other),
19 |             (Boolean::True, "true") | (Boolean::False, "false")
20 |         )
21 |     }
22 | }
23 | impl Boolean {
24 |     /// Test if this instance represents `true`
25 |     pub fn is_true(self) -> bool {
26 |         matches!(self, Boolean::True)
27 |     }
28 | }
29 | 
30 | impl Boolean {
31 |     /// Create a Boolean from raw text
32 |     pub fn from(s: &str) -> Option<Self> {
33 |         if s == "true" {
34 |             Some(Boolean::True)
35 |         } else if s == "false" {
36 |             Some(Boolean::False)
37 |         } else {
38 |             None
39 |         }
40 |     }
41 | }
42 | 
43 | impl From<bool> for Boolean {
44 |     /// Creates a JS Bool for a rust bool
45 |     fn from(b: bool) -> Self {
46 |         if b {
47 |             Boolean::True
48 |         } else {
49 |             Boolean::False
50 |         }
51 |     }
52 | }
53 | 
54 | impl From<Boolean> for String {
55 |     /// Return this Boolean to the text
56 |     /// that was parsed to create it
57 |     fn from(b: Boolean) -> String {
58 |         match b {
59 |             Boolean::True => "true".into(),
60 |             Boolean::False => "false".into(),
61 |         }
62 |     }
63 | }
64 | 
65 | impl ToString for Boolean {
66 |     /// Return this Boolean to the text
67 |     /// that was parsed to create it
68 |     fn to_string(&self) -> String {
69 |         match self {
70 |             Boolean::True => "true".into(),
71 |             Boolean::False => "false".into(),
72 |         }
73 |     }
74 | }
75 | 
76 | impl From<Boolean> for bool {
77 |     /// Creates a Rust bool for a js bool
78 |     fn from(b: Boolean) -> bool {
79 |         match b {
80 |             Boolean::True => true,
81 |             Boolean::False => false,
82 |         }
83 |     }
84 | }
85 | 
86 | impl From<&Boolean> for bool {
87 |     /// Creates a js bool for a rust bool
88 |     fn from(b: &Boolean) -> bool {
89 |         match b {
90 |             Boolean::True => true,
91 |             Boolean::False => false,
92 |         }
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/tokens/comment.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, PartialEq, Eq, Clone)]
 2 | /// A comment, effectively should be treated
 3 | /// as white space. There are 3 kinds of comments
 4 | /// according to the specification.
 5 | ///
 6 | /// - Single line comments: //comment
 7 | /// - Multi line comments: /* comment */
 8 | /// - HTML comments: <!-- comment --> plus more!
 9 | pub struct Comment<T> {
10 |     pub kind: CommentKind,
11 |     pub content: T,
12 |     pub tail_content: Option<T>,
13 | }
14 | 
15 | impl<T> Comment<T> {
16 |     pub fn from_parts(content: T, kind: CommentKind, tail_content: Option<T>) -> Self {
17 |         Comment {
18 |             content,
19 |             kind,
20 |             tail_content,
21 |         }
22 |     }
23 |     pub fn new_single_line(content: T) -> Self {
24 |         Comment::from_parts(content, CommentKind::Single, None)
25 |     }
26 | 
27 |     pub fn new_multi_line(content: T) -> Self {
28 |         Comment::from_parts(content, CommentKind::Multi, None)
29 |     }
30 | 
31 |     pub fn new_html(content: T, tail_content: Option<T>) -> Self {
32 |         Comment::from_parts(content, CommentKind::Html, tail_content)
33 |     }
34 | 
35 |     pub fn new_html_no_tail(content: T) -> Self {
36 |         Comment::new_html(content, None)
37 |     }
38 | 
39 |     pub fn new_html_with_tail(content: T, tail: T) -> Self {
40 |         Comment::new_html(content, Some(tail))
41 |     }
42 | 
43 |     pub fn new_hashbang(content: T) -> Self {
44 |         Comment::from_parts(content, CommentKind::Hashbang, None)
45 |     }
46 |     pub fn is_multi_line(&self) -> bool {
47 |         self.kind == CommentKind::Multi
48 |     }
49 | 
50 |     pub fn is_single_line(&self) -> bool {
51 |         self.kind == CommentKind::Single
52 |     }
53 | 
54 |     pub fn is_html(&self) -> bool {
55 |         self.kind == CommentKind::Html
56 |     }
57 | 
58 |     pub fn is_hashbang(&self) -> bool {
59 |         self.kind == CommentKind::Hashbang
60 |     }
61 | }
62 | 
63 | impl<T> ToString for Comment<T>
64 | where
65 |     T: AsRef<str>,
66 | {
67 |     fn to_string(&self) -> String {
68 |         match self.kind {
69 |             CommentKind::Single => format!("//{}", self.content.as_ref()),
70 |             CommentKind::Multi => format!("/*{}*/", self.content.as_ref()),
71 |             CommentKind::Html => format!("<!--{}-->", self.content.as_ref()),
72 |             CommentKind::Hashbang => format!("#!{}", self.content.as_ref()),
73 |         }
74 |     }
75 | }
76 | 
77 | #[derive(Debug, PartialEq, Eq, Clone, Copy)]
78 | /// The 4 kinds of comments
79 | pub enum CommentKind {
80 |     Single,
81 |     Multi,
82 |     Html,
83 |     Hashbang,
84 | }
85 | 


--------------------------------------------------------------------------------
/src/tokens/ident.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, PartialEq, Eq, Clone)]
 2 | /// An identifier
 3 | pub struct Ident<T>(T);
 4 | 
 5 | impl<T> AsRef<str> for Ident<T>
 6 | where
 7 |     T: AsRef<str>,
 8 | {
 9 |     fn as_ref(&self) -> &str {
10 |         self.0.as_ref()
11 |     }
12 | }
13 | 
14 | impl<T> PartialEq<str> for &Ident<T>
15 | where
16 |     T: AsRef<str>,
17 | {
18 |     fn eq(&self, other: &str) -> bool {
19 |         self.0.as_ref().eq(other)
20 |     }
21 | }
22 | 
23 | impl<'a> From<&'a str> for Ident<&'a str> {
24 |     fn from(s: &'a str) -> Self {
25 |         Ident(s)
26 |     }
27 | }
28 | 
29 | impl<T> ToString for Ident<T>
30 | where
31 |     T: AsRef<str>,
32 | {
33 |     fn to_string(&self) -> String {
34 |         self.0.as_ref().to_string()
35 |     }
36 | }
37 | 
38 | impl<T> From<Ident<T>> for String
39 | where
40 |     T: ToString,
41 | {
42 |     fn from(id: Ident<T>) -> Self {
43 |         id.0.to_string()
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/tokens/keyword.rs:
--------------------------------------------------------------------------------
  1 | #[derive(Debug)]
  2 | /// A JS Keyword
  3 | ///
  4 | /// # Standard
  5 | /// await
  6 | /// break
  7 | /// case
  8 | /// catch
  9 | /// class
 10 | /// const
 11 | /// continue
 12 | /// debugger
 13 | /// default
 14 | /// delete (10)
 15 | /// do
 16 | /// else
 17 | /// export
 18 | /// extends
 19 | /// finally
 20 | /// for
 21 | /// function
 22 | /// if
 23 | /// import
 24 | /// in (20)
 25 | /// instanceof
 26 | /// new
 27 | /// return
 28 | /// super
 29 | /// switch
 30 | /// this
 31 | /// throw
 32 | /// try
 33 | /// typeof
 34 | /// var (30)
 35 | /// void
 36 | /// while
 37 | /// with
 38 | /// yield
 39 | /// # Future Reserved
 40 | /// enum
 41 | /// # Strict Mode Future Reserved
 42 | /// implements
 43 | /// package
 44 | /// protected
 45 | /// interface
 46 | /// private (40)
 47 | /// public
 48 | pub enum Keyword<T> {
 49 |     Await(T),
 50 |     Break(T),
 51 |     Case(T),
 52 |     Catch(T),
 53 |     Class(T),
 54 |     Const(T),
 55 |     Continue(T),
 56 |     Debugger(T),
 57 |     Default(T),
 58 |     Delete(T),
 59 |     Do(T),
 60 |     Else(T),
 61 |     Enum(T),
 62 |     Export(T),
 63 |     Extends(T),
 64 |     Finally(T),
 65 |     For(T),
 66 |     Function(T),
 67 |     If(T),
 68 |     Implements(T),
 69 |     Import(T),
 70 |     In(T),
 71 |     InstanceOf(T),
 72 |     Interface(T),
 73 |     Let(T),
 74 |     New(T),
 75 |     Package(T),
 76 |     Private(T),
 77 |     Protected(T),
 78 |     Public(T),
 79 |     Return(T),
 80 |     Static(T),
 81 |     Super(T),
 82 |     Switch(T),
 83 |     This(T),
 84 |     Throw(T),
 85 |     Try(T),
 86 |     TypeOf(T),
 87 |     Var(T),
 88 |     Void(T),
 89 |     While(T),
 90 |     With(T),
 91 |     Yield(T),
 92 | }
 93 | 
 94 | impl<T> Clone for Keyword<T>
 95 | where
 96 |     T: Clone,
 97 | {
 98 |     fn clone(&self) -> Self {
 99 |         match self {
100 |             Self::Await(i) => Self::Await(i.clone()),
101 |             Self::Break(i) => Self::Break(i.clone()),
102 |             Self::Case(i) => Self::Case(i.clone()),
103 |             Self::Catch(i) => Self::Catch(i.clone()),
104 |             Self::Class(i) => Self::Class(i.clone()),
105 |             Self::Const(i) => Self::Const(i.clone()),
106 |             Self::Continue(i) => Self::Continue(i.clone()),
107 |             Self::Debugger(i) => Self::Debugger(i.clone()),
108 |             Self::Default(i) => Self::Default(i.clone()),
109 |             Self::Delete(i) => Self::Delete(i.clone()),
110 |             Self::Do(i) => Self::Do(i.clone()),
111 |             Self::Else(i) => Self::Else(i.clone()),
112 |             Self::Enum(i) => Self::Enum(i.clone()),
113 |             Self::Export(i) => Self::Export(i.clone()),
114 |             Self::Extends(i) => Self::Extends(i.clone()),
115 |             Self::Finally(i) => Self::Finally(i.clone()),
116 |             Self::For(i) => Self::For(i.clone()),
117 |             Self::Function(i) => Self::Function(i.clone()),
118 |             Self::If(i) => Self::If(i.clone()),
119 |             Self::Implements(i) => Self::Implements(i.clone()),
120 |             Self::Import(i) => Self::Import(i.clone()),
121 |             Self::In(i) => Self::In(i.clone()),
122 |             Self::InstanceOf(i) => Self::InstanceOf(i.clone()),
123 |             Self::Interface(i) => Self::Interface(i.clone()),
124 |             Self::Let(i) => Self::Let(i.clone()),
125 |             Self::New(i) => Self::New(i.clone()),
126 |             Self::Package(i) => Self::Package(i.clone()),
127 |             Self::Private(i) => Self::Private(i.clone()),
128 |             Self::Protected(i) => Self::Protected(i.clone()),
129 |             Self::Public(i) => Self::Public(i.clone()),
130 |             Self::Return(i) => Self::Return(i.clone()),
131 |             Self::Static(i) => Self::Static(i.clone()),
132 |             Self::Super(i) => Self::Super(i.clone()),
133 |             Self::Switch(i) => Self::Switch(i.clone()),
134 |             Self::This(i) => Self::This(i.clone()),
135 |             Self::Throw(i) => Self::Throw(i.clone()),
136 |             Self::Try(i) => Self::Try(i.clone()),
137 |             Self::TypeOf(i) => Self::TypeOf(i.clone()),
138 |             Self::Var(i) => Self::Var(i.clone()),
139 |             Self::Void(i) => Self::Void(i.clone()),
140 |             Self::While(i) => Self::While(i.clone()),
141 |             Self::With(i) => Self::With(i.clone()),
142 |             Self::Yield(i) => Self::Yield(i.clone()),
143 |         }
144 |     }
145 | }
146 | 
147 | impl<T, U> PartialEq<Keyword<T>> for Keyword<U> {
148 |     fn eq(&self, other: &Keyword<T>) -> bool {
149 |         use Keyword::*;
150 |         matches!(
151 |             (self, other),
152 |             (Await(_), Await(_))
153 |                 | (Break(_), Break(_))
154 |                 | (Case(_), Case(_))
155 |                 | (Catch(_), Catch(_))
156 |                 | (Class(_), Class(_))
157 |                 | (Const(_), Const(_))
158 |                 | (Continue(_), Continue(_))
159 |                 | (Debugger(_), Debugger(_))
160 |                 | (Default(_), Default(_))
161 |                 | (Delete(_), Delete(_))
162 |                 | (Do(_), Do(_))
163 |                 | (Else(_), Else(_))
164 |                 | (Enum(_), Enum(_))
165 |                 | (Export(_), Export(_))
166 |                 | (Extends(_), Extends(_))
167 |                 | (Finally(_), Finally(_))
168 |                 | (For(_), For(_))
169 |                 | (Function(_), Function(_))
170 |                 | (If(_), If(_))
171 |                 | (Implements(_), Implements(_))
172 |                 | (Import(_), Import(_))
173 |                 | (In(_), In(_))
174 |                 | (InstanceOf(_), InstanceOf(_))
175 |                 | (Interface(_), Interface(_))
176 |                 | (Let(_), Let(_))
177 |                 | (New(_), New(_))
178 |                 | (Package(_), Package(_))
179 |                 | (Private(_), Private(_))
180 |                 | (Protected(_), Protected(_))
181 |                 | (Public(_), Public(_))
182 |                 | (Return(_), Return(_))
183 |                 | (Static(_), Static(_))
184 |                 | (Super(_), Super(_))
185 |                 | (Switch(_), Switch(_))
186 |                 | (This(_), This(_))
187 |                 | (Throw(_), Throw(_))
188 |                 | (Try(_), Try(_))
189 |                 | (TypeOf(_), TypeOf(_))
190 |                 | (Var(_), Var(_))
191 |                 | (Void(_), Void(_))
192 |                 | (While(_), While(_))
193 |                 | (With(_), With(_))
194 |                 | (Yield(_), Yield(_))
195 |         )
196 |     }
197 | }
198 | 
199 | impl Keyword<()> {
200 |     pub fn with_str(self, s: &str) -> Keyword<&str> {
201 |         match self {
202 |             Keyword::Await(_) => Keyword::Await(s),
203 |             Keyword::Break(_) => Keyword::Break(s),
204 |             Keyword::Case(_) => Keyword::Case(s),
205 |             Keyword::Catch(_) => Keyword::Catch(s),
206 |             Keyword::Class(_) => Keyword::Class(s),
207 |             Keyword::Const(_) => Keyword::Const(s),
208 |             Keyword::Continue(_) => Keyword::Continue(s),
209 |             Keyword::Debugger(_) => Keyword::Debugger(s),
210 |             Keyword::Default(_) => Keyword::Default(s),
211 |             Keyword::Delete(_) => Keyword::Delete(s),
212 |             Keyword::Do(_) => Keyword::Do(s),
213 |             Keyword::Else(_) => Keyword::Else(s),
214 |             Keyword::Enum(_) => Keyword::Enum(s),
215 |             Keyword::Export(_) => Keyword::Export(s),
216 |             Keyword::Extends(_) => Keyword::Extends(s),
217 |             Keyword::Finally(_) => Keyword::Finally(s),
218 |             Keyword::For(_) => Keyword::For(s),
219 |             Keyword::Function(_) => Keyword::Function(s),
220 |             Keyword::If(_) => Keyword::If(s),
221 |             Keyword::Implements(_) => Keyword::Implements(s),
222 |             Keyword::Import(_) => Keyword::Import(s),
223 |             Keyword::In(_) => Keyword::In(s),
224 |             Keyword::InstanceOf(_) => Keyword::InstanceOf(s),
225 |             Keyword::Interface(_) => Keyword::Interface(s),
226 |             Keyword::Let(_) => Keyword::Let(s),
227 |             Keyword::New(_) => Keyword::New(s),
228 |             Keyword::Package(_) => Keyword::Package(s),
229 |             Keyword::Private(_) => Keyword::Private(s),
230 |             Keyword::Protected(_) => Keyword::Protected(s),
231 |             Keyword::Public(_) => Keyword::Public(s),
232 |             Keyword::Return(_) => Keyword::Return(s),
233 |             Keyword::Static(_) => Keyword::Static(s),
234 |             Keyword::Super(_) => Keyword::Super(s),
235 |             Keyword::Switch(_) => Keyword::Switch(s),
236 |             Keyword::This(_) => Keyword::This(s),
237 |             Keyword::Throw(_) => Keyword::Throw(s),
238 |             Keyword::Try(_) => Keyword::Try(s),
239 |             Keyword::TypeOf(_) => Keyword::TypeOf(s),
240 |             Keyword::Var(_) => Keyword::Var(s),
241 |             Keyword::Void(_) => Keyword::Void(s),
242 |             Keyword::While(_) => Keyword::While(s),
243 |             Keyword::With(_) => Keyword::With(s),
244 |             Keyword::Yield(_) => Keyword::Yield(s),
245 |         }
246 |     }
247 | }
248 | 
249 | impl<T> ToString for Keyword<T> {
250 |     /// Convert a keyword into a string
251 |     fn to_string(&self) -> String {
252 |         self.as_str().into()
253 |     }
254 | }
255 | 
256 | impl<T> PartialEq<str> for Keyword<T> {
257 |     fn eq(&self, other: &str) -> bool {
258 |         self.as_str() == other
259 |     }
260 | }
261 | 
262 | impl<T> Keyword<T> {
263 |     /// Is this keyword one of the future reserved words
264 |     ///
265 |     /// - enum
266 |     /// - export
267 |     /// - implements
268 |     /// - super
269 |     pub fn is_future_reserved(&self) -> bool {
270 |         matches!(
271 |             self,
272 |             Keyword::Enum(_) | Keyword::Export(_) | Keyword::Implements(_) | Keyword::Super(_)
273 |         )
274 |     }
275 |     /// Is this keyword a reserved word when the context
276 |     /// has a 'use strict' directive.
277 |     ///
278 |     /// ## Keywords
279 |     /// - implements
280 |     /// - interface
281 |     /// - package
282 |     /// - private
283 |     /// - protected
284 |     /// - public
285 |     /// - static
286 |     /// - yield
287 |     /// - let
288 |     pub fn is_strict_reserved(&self) -> bool {
289 |         matches!(
290 |             self,
291 |             Keyword::Implements(_)
292 |                 | Keyword::Interface(_)
293 |                 | Keyword::Package(_)
294 |                 | Keyword::Private(_)
295 |                 | Keyword::Protected(_)
296 |                 | Keyword::Public(_)
297 |                 | Keyword::Static(_)
298 |                 | Keyword::Yield(_)
299 |                 | Keyword::Let(_)
300 |         )
301 |     }
302 |     /// Is this keyword a reserved word
303 |     ///
304 |     /// ## Keywords
305 |     /// - break
306 |     /// - case
307 |     /// - catch
308 |     /// - continue
309 |     /// - debugger
310 |     /// - default
311 |     /// - delete
312 |     /// - do
313 |     /// - else
314 |     /// - for
315 |     /// - function
316 |     /// - if
317 |     /// - instanceof
318 |     /// - in
319 |     /// - new
320 |     /// - return
321 |     /// - switch
322 |     /// - this
323 |     /// - throw
324 |     /// - try
325 |     /// - typeof
326 |     /// - var
327 |     /// - void
328 |     /// - while
329 |     /// - with
330 |     pub fn is_reserved(&self) -> bool {
331 |         matches!(
332 |             self,
333 |             Keyword::Break(_)
334 |                 | Keyword::Case(_)
335 |                 | Keyword::Catch(_)
336 |                 | Keyword::Class(_)
337 |                 | Keyword::Continue(_)
338 |                 | Keyword::Debugger(_)
339 |                 | Keyword::Default(_)
340 |                 | Keyword::Delete(_)
341 |                 | Keyword::Do(_)
342 |                 | Keyword::Else(_)
343 |                 | Keyword::Export(_)
344 |                 | Keyword::Extends(_)
345 |                 | Keyword::Finally(_)
346 |                 | Keyword::For(_)
347 |                 | Keyword::Function(_)
348 |                 | Keyword::If(_)
349 |                 | Keyword::Import(_)
350 |                 | Keyword::In(_)
351 |                 | Keyword::InstanceOf(_)
352 |                 | Keyword::New(_)
353 |                 | Keyword::Return(_)
354 |                 | Keyword::Switch(_)
355 |                 | Keyword::Super(_)
356 |                 | Keyword::This(_)
357 |                 | Keyword::Throw(_)
358 |                 | Keyword::Try(_)
359 |                 | Keyword::TypeOf(_)
360 |                 | Keyword::Var(_)
361 |                 | Keyword::Void(_)
362 |                 | Keyword::While(_)
363 |                 | Keyword::With(_)
364 |         )
365 |     }
366 | 
367 |     pub fn as_str(&self) -> &str {
368 |         match self {
369 |             Keyword::Await(_) => "await",
370 |             Keyword::Break(_) => "break",
371 |             Keyword::Case(_) => "case",
372 |             Keyword::Catch(_) => "catch",
373 |             Keyword::Class(_) => "class",
374 |             Keyword::Const(_) => "const",
375 |             Keyword::Continue(_) => "continue",
376 |             Keyword::Debugger(_) => "debugger",
377 |             Keyword::Default(_) => "default",
378 |             Keyword::Import(_) => "import",
379 |             Keyword::Delete(_) => "delete",
380 |             Keyword::Do(_) => "do",
381 |             Keyword::Else(_) => "else",
382 |             Keyword::Enum(_) => "enum",
383 |             Keyword::Export(_) => "export",
384 |             Keyword::Extends(_) => "extends",
385 |             Keyword::Finally(_) => "finally",
386 |             Keyword::For(_) => "for",
387 |             Keyword::Function(_) => "function",
388 |             Keyword::If(_) => "if",
389 |             Keyword::In(_) => "in",
390 |             Keyword::Implements(_) => "implements",
391 |             Keyword::InstanceOf(_) => "instanceof",
392 |             Keyword::Interface(_) => "interface",
393 |             Keyword::Let(_) => "let",
394 |             Keyword::New(_) => "new",
395 |             Keyword::Package(_) => "package",
396 |             Keyword::Private(_) => "private",
397 |             Keyword::Protected(_) => "protected",
398 |             Keyword::Public(_) => "public",
399 |             Keyword::Static(_) => "static",
400 |             Keyword::Return(_) => "return",
401 |             Keyword::Super(_) => "super",
402 |             Keyword::Switch(_) => "switch",
403 |             Keyword::This(_) => "this",
404 |             Keyword::Throw(_) => "throw",
405 |             Keyword::Try(_) => "try",
406 |             Keyword::TypeOf(_) => "typeof",
407 |             Keyword::Var(_) => "var",
408 |             Keyword::Void(_) => "void",
409 |             Keyword::While(_) => "while",
410 |             Keyword::With(_) => "with",
411 |             Keyword::Yield(_) => "yield",
412 |         }
413 |     }
414 | 
415 |     pub fn to_empty(&self) -> Keyword<()> {
416 |         match self {
417 |             Keyword::Await(_) => Keyword::Await(()),
418 |             Keyword::Break(_) => Keyword::Break(()),
419 |             Keyword::Case(_) => Keyword::Case(()),
420 |             Keyword::Catch(_) => Keyword::Catch(()),
421 |             Keyword::Class(_) => Keyword::Class(()),
422 |             Keyword::Const(_) => Keyword::Const(()),
423 |             Keyword::Continue(_) => Keyword::Continue(()),
424 |             Keyword::Debugger(_) => Keyword::Debugger(()),
425 |             Keyword::Default(_) => Keyword::Default(()),
426 |             Keyword::Import(_) => Keyword::Import(()),
427 |             Keyword::Delete(_) => Keyword::Delete(()),
428 |             Keyword::Do(_) => Keyword::Do(()),
429 |             Keyword::Else(_) => Keyword::Else(()),
430 |             Keyword::Enum(_) => Keyword::Enum(()),
431 |             Keyword::Export(_) => Keyword::Export(()),
432 |             Keyword::Extends(_) => Keyword::Extends(()),
433 |             Keyword::Finally(_) => Keyword::Finally(()),
434 |             Keyword::For(_) => Keyword::For(()),
435 |             Keyword::Function(_) => Keyword::Function(()),
436 |             Keyword::If(_) => Keyword::If(()),
437 |             Keyword::In(_) => Keyword::In(()),
438 |             Keyword::Implements(_) => Keyword::Implements(()),
439 |             Keyword::InstanceOf(_) => Keyword::InstanceOf(()),
440 |             Keyword::Interface(_) => Keyword::Interface(()),
441 |             Keyword::Let(_) => Keyword::Let(()),
442 |             Keyword::New(_) => Keyword::New(()),
443 |             Keyword::Package(_) => Keyword::Package(()),
444 |             Keyword::Private(_) => Keyword::Private(()),
445 |             Keyword::Protected(_) => Keyword::Protected(()),
446 |             Keyword::Public(_) => Keyword::Public(()),
447 |             Keyword::Static(_) => Keyword::Static(()),
448 |             Keyword::Return(_) => Keyword::Return(()),
449 |             Keyword::Super(_) => Keyword::Super(()),
450 |             Keyword::Switch(_) => Keyword::Switch(()),
451 |             Keyword::This(_) => Keyword::This(()),
452 |             Keyword::Throw(_) => Keyword::Throw(()),
453 |             Keyword::Try(_) => Keyword::Try(()),
454 |             Keyword::TypeOf(_) => Keyword::TypeOf(()),
455 |             Keyword::Var(_) => Keyword::Var(()),
456 |             Keyword::Void(_) => Keyword::Void(()),
457 |             Keyword::While(_) => Keyword::While(()),
458 |             Keyword::With(_) => Keyword::With(()),
459 |             Keyword::Yield(_) => Keyword::Yield(()),
460 |         }
461 |     }
462 | }
463 | 
464 | impl<'a> Keyword<&'a str> {
465 |     #[cfg(test)]
466 |     pub fn new(s: &str) -> Self {
467 |         match s {
468 |             "await" => Keyword::Await("await"),
469 |             "break" => Keyword::Break("break"),
470 |             "case" => Keyword::Case("case"),
471 |             "catch" => Keyword::Catch("catch"),
472 |             "class" => Keyword::Class("class"),
473 |             "const" => Keyword::Const("const"),
474 |             "continue" => Keyword::Continue("continue"),
475 |             "debugger" => Keyword::Debugger("debugger"),
476 |             "default" => Keyword::Default("default"),
477 |             "import" => Keyword::Import("import"),
478 |             "delete" => Keyword::Delete("delete"),
479 |             "do" => Keyword::Do("do"),
480 |             "else" => Keyword::Else("else"),
481 |             "enum" => Keyword::Enum("enum"),
482 |             "export" => Keyword::Export("export"),
483 |             "extends" => Keyword::Extends("extends"),
484 |             "finally" => Keyword::Finally("finally"),
485 |             "for" => Keyword::For("for"),
486 |             "function" => Keyword::Function("function"),
487 |             "if" => Keyword::If("if"),
488 |             "in" => Keyword::In("in"),
489 |             "implements" => Keyword::Implements("implements"),
490 |             "instanceof" => Keyword::InstanceOf("instanceof"),
491 |             "interface" => Keyword::Interface("interface"),
492 |             "let" => Keyword::Let("let"),
493 |             "new" => Keyword::New("new"),
494 |             "package" => Keyword::Package("package"),
495 |             "private" => Keyword::Private("private"),
496 |             "protected" => Keyword::Protected("protected"),
497 |             "public" => Keyword::Public("public"),
498 |             "static" => Keyword::Static("static"),
499 |             "return" => Keyword::Return("return"),
500 |             "super" => Keyword::Super("super"),
501 |             "switch" => Keyword::Switch("switch"),
502 |             "this" => Keyword::This("this"),
503 |             "throw" => Keyword::Throw("throw"),
504 |             "try" => Keyword::Try("try"),
505 |             "typeof" => Keyword::TypeOf("typeof"),
506 |             "var" => Keyword::Var("var"),
507 |             "void" => Keyword::Void("void"),
508 |             "while" => Keyword::While("while"),
509 |             "with" => Keyword::With("with"),
510 |             "yield" => Keyword::Yield("yield"),
511 |             _ => panic!("Invalid keyword..."),
512 |         }
513 |     }
514 |     pub fn has_unicode_escape(&self) -> bool {
515 |         match self {
516 |             Keyword::Await(s) => s,
517 |             Keyword::Break(s) => s,
518 |             Keyword::Case(s) => s,
519 |             Keyword::Catch(s) => s,
520 |             Keyword::Class(s) => s,
521 |             Keyword::Const(s) => s,
522 |             Keyword::Continue(s) => s,
523 |             Keyword::Debugger(s) => s,
524 |             Keyword::Default(s) => s,
525 |             Keyword::Import(s) => s,
526 |             Keyword::Delete(s) => s,
527 |             Keyword::Do(s) => s,
528 |             Keyword::Else(s) => s,
529 |             Keyword::Enum(s) => s,
530 |             Keyword::Export(s) => s,
531 |             Keyword::Extends(s) => s,
532 |             Keyword::Finally(s) => s,
533 |             Keyword::For(s) => s,
534 |             Keyword::Function(s) => s,
535 |             Keyword::If(s) => s,
536 |             Keyword::In(s) => s,
537 |             Keyword::Implements(s) => s,
538 |             Keyword::InstanceOf(s) => s,
539 |             Keyword::Interface(s) => s,
540 |             Keyword::Let(s) => s,
541 |             Keyword::New(s) => s,
542 |             Keyword::Package(s) => s,
543 |             Keyword::Private(s) => s,
544 |             Keyword::Protected(s) => s,
545 |             Keyword::Public(s) => s,
546 |             Keyword::Static(s) => s,
547 |             Keyword::Return(s) => s,
548 |             Keyword::Super(s) => s,
549 |             Keyword::Switch(s) => s,
550 |             Keyword::This(s) => s,
551 |             Keyword::Throw(s) => s,
552 |             Keyword::Try(s) => s,
553 |             Keyword::TypeOf(s) => s,
554 |             Keyword::Var(s) => s,
555 |             Keyword::Void(s) => s,
556 |             Keyword::While(s) => s,
557 |             Keyword::With(s) => s,
558 |             Keyword::Yield(s) => s,
559 |         }
560 |         .contains("\\u")
561 |     }
562 | }
563 | 


--------------------------------------------------------------------------------
/src/tokens/number.rs:
--------------------------------------------------------------------------------
  1 | #[derive(Debug, PartialEq, Eq, Clone)]
  2 | /// A JS number literal. There are 4 kinds of number
  3 | /// literals allowed in JS.
  4 | ///
  5 | /// - Decimal Literals - This includes integers and decimals with
  6 | ///     optional exponent notation
  7 | /// - Hexadecimal Literals - These begin with 0x and consist of numbers
  8 | ///     0-9 and letters A-F (case insensitive)
  9 | /// - Octal Literals - These being with 0o and consist of numbers
 10 | ///     0-7
 11 | /// - Binary Literals - These begin with 0b and consist of numbers 0 and 1
 12 | pub struct Number<T>(T);
 13 | 
 14 | /// Extension methods for allowing Number
 15 | /// to work with both &str and String
 16 | pub trait NumberExt {
 17 |     fn kind(&self) -> NumberKind;
 18 |     fn is_hex(&self) -> bool;
 19 |     fn is_bin(&self) -> bool;
 20 |     fn is_oct(&self) -> bool;
 21 |     fn is_dec(&self) -> bool;
 22 |     fn has_exponent(&self) -> bool;
 23 |     fn is_big_int(&self) -> bool;
 24 | }
 25 | 
 26 | impl<T> Number<T>
 27 | where
 28 |     T: AsRef<str>,
 29 | {
 30 |     pub fn kind(&self) -> NumberKind {
 31 |         let s = self.0.as_ref();
 32 |         match self.0.as_ref().get(0..2) {
 33 |             Some("0x") | Some("0X") => NumberKind::Hex,
 34 |             Some("0b") | Some("0B") => NumberKind::Bin,
 35 |             Some("0o") | Some("0O") => NumberKind::Oct,
 36 |             _ => {
 37 |                 if s.ends_with('n') {
 38 |                     NumberKind::BigInt
 39 |                 } else {
 40 |                     NumberKind::Dec
 41 |                 }
 42 |             }
 43 |         }
 44 |     }
 45 | 
 46 |     pub fn is_hex(&self) -> bool {
 47 |         self.kind() == NumberKind::Hex
 48 |     }
 49 |     pub fn is_bin(&self) -> bool {
 50 |         self.kind() == NumberKind::Bin
 51 |     }
 52 |     pub fn is_oct(&self) -> bool {
 53 |         self.kind() == NumberKind::Oct
 54 |     }
 55 |     pub fn is_dec(&self) -> bool {
 56 |         self.kind() == NumberKind::Dec
 57 |     }
 58 |     pub fn has_exponent(&self) -> bool {
 59 |         match self.kind() {
 60 |             NumberKind::Dec => self.0.as_ref().contains(|c| c == 'e' || c == 'E'),
 61 |             _ => false,
 62 |         }
 63 |     }
 64 |     pub fn is_big_int(&self) -> bool {
 65 |         self.kind() == NumberKind::BigInt
 66 |     }
 67 | }
 68 | 
 69 | impl<'a> From<&'a str> for Number<&'a str> {
 70 |     fn from(s: &'a str) -> Self {
 71 |         Number(s)
 72 |     }
 73 | }
 74 | 
 75 | impl<T> ToString for Number<T>
 76 | where
 77 |     T: AsRef<str>,
 78 | {
 79 |     fn to_string(&self) -> String {
 80 |         self.0.as_ref().to_string()
 81 |     }
 82 | }
 83 | 
 84 | impl<T> PartialEq<str> for &Number<T>
 85 | where
 86 |     T: AsRef<str>,
 87 | {
 88 |     fn eq(&self, other: &str) -> bool {
 89 |         self.0.as_ref().eq(other)
 90 |     }
 91 | }
 92 | 
 93 | #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 94 | /// The 5 kinds of numbers
 95 | pub enum NumberKind {
 96 |     Dec,
 97 |     Hex,
 98 |     Bin,
 99 |     Oct,
100 |     BigInt,
101 | }
102 | 


--------------------------------------------------------------------------------
/src/tokens/regex.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, PartialEq, Eq, Clone)]
 2 | /// A Regular Expression Literal
 3 | ///
 4 | /// These being with a `/` and the
 5 | /// body ends with another `/`
 6 | /// optionally a series of one letter
 7 | /// flags can be included after the `/`
 8 | pub struct RegEx<T> {
 9 |     pub body: T,
10 |     pub flags: Option<T>,
11 | }
12 | 
13 | impl<T> RegEx<T> {
14 |     pub fn from_parts(body: T, flags: Option<T>) -> Self {
15 |         RegEx { body, flags }
16 |     }
17 | }
18 | 
19 | impl<T> ToString for RegEx<T>
20 | where
21 |     T: AsRef<str>,
22 | {
23 |     fn to_string(&self) -> String {
24 |         let f = if let Some(f) = &self.flags {
25 |             f.as_ref().to_string()
26 |         } else {
27 |             String::new()
28 |         };
29 |         format!("/{}/{}", self.body.as_ref(), f)
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/tokens/string.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, PartialEq, Eq, Clone)]
 2 | /// A single or double quoted string
 3 | /// literal
 4 | pub enum StringLit<T> {
 5 |     Single(InnerString<T>),
 6 |     Double(InnerString<T>),
 7 | }
 8 | #[derive(Debug, PartialEq, Eq, Clone)]
 9 | pub struct InnerString<T> {
10 |     pub content: T,
11 |     pub contains_octal_escape: bool,
12 | }
13 | 
14 | impl<T> ToString for StringLit<T>
15 | where
16 |     T: AsRef<str>,
17 | {
18 |     fn to_string(&self) -> String {
19 |         match self {
20 |             StringLit::Single(ref s) => format!(r#"'{}'"#, s.content.as_ref()),
21 |             StringLit::Double(ref s) => format!(r#""{}""#, s.content.as_ref()),
22 |         }
23 |     }
24 | }
25 | 
26 | impl<T> AsRef<str> for StringLit<T>
27 | where
28 |     T: AsRef<str>,
29 | {
30 |     fn as_ref(&self) -> &str {
31 |         match self {
32 |             StringLit::Single(s) | StringLit::Double(s) => s.as_ref(),
33 |         }
34 |     }
35 | }
36 | 
37 | impl<T> AsRef<str> for InnerString<T>
38 | where
39 |     T: AsRef<str>,
40 | {
41 |     fn as_ref(&self) -> &str {
42 |         self.content.as_ref()
43 |     }
44 | }
45 | 
46 | impl<T> StringLit<T> {
47 |     pub fn single(content: T, oct: bool) -> Self {
48 |         StringLit::Single(InnerString {
49 |             content,
50 |             contains_octal_escape: oct,
51 |         })
52 |     }
53 |     pub fn double(content: T, oct: bool) -> Self {
54 |         StringLit::Double(InnerString {
55 |             content,
56 |             contains_octal_escape: oct,
57 |         })
58 |     }
59 |     pub fn is_single(&self) -> bool {
60 |         matches!(self, StringLit::Single(_))
61 |     }
62 |     pub fn is_double(&self) -> bool {
63 |         matches!(self, StringLit::Double(_))
64 |     }
65 |     pub fn has_octal_escape(&self) -> bool {
66 |         match self {
67 |             StringLit::Single(ref inner) | StringLit::Double(ref inner) => {
68 |                 inner.contains_octal_escape
69 |             }
70 |         }
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/tokens/template.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, PartialEq, Eq, Clone)]
 2 | /// A template string
 3 | ///
 4 | /// These include strings that are wrapped in back ticks (`)
 5 | /// which allows for interpolating any js expression between `${`
 6 | /// and `}`
 7 | pub enum Template<T> {
 8 |     NoSub(TemplateLiteral<T>),
 9 |     Head(TemplateLiteral<T>),
10 |     Middle(TemplateLiteral<T>),
11 |     Tail(TemplateLiteral<T>),
12 | }
13 | 
14 | #[derive(Debug, PartialEq, Eq, Clone)]
15 | pub struct TemplateLiteral<T> {
16 |     pub content: T,
17 |     pub contains_octal_escape: bool,
18 |     pub contains_invalid_unicode_escape: bool,
19 |     pub contains_invalid_hex_escape: bool,
20 | }
21 | impl<T> TemplateLiteral<T> {
22 |     pub fn new(
23 |         content: T,
24 |         contains_octal_escape: bool,
25 |         contains_invalid_unicode_escape: bool,
26 |         contains_invalid_hex_escape: bool,
27 |     ) -> Self {
28 |         Self {
29 |             content,
30 |             contains_octal_escape,
31 |             contains_invalid_unicode_escape,
32 |             contains_invalid_hex_escape,
33 |         }
34 |     }
35 | }
36 | 
37 | impl<T> Template<T> {
38 |     pub fn no_sub_template(content: T, oct: bool, uni: bool, hex: bool) -> Self {
39 |         Template::NoSub(TemplateLiteral::new(content, oct, uni, hex))
40 |     }
41 |     pub fn template_head(content: T, oct: bool, uni: bool, hex: bool) -> Self {
42 |         Template::Head(TemplateLiteral::new(content, oct, uni, hex))
43 |     }
44 |     pub fn template_middle(content: T, oct: bool, uni: bool, hex: bool) -> Self {
45 |         Template::Middle(TemplateLiteral::new(content, oct, uni, hex))
46 |     }
47 |     pub fn template_tail(content: T, oct: bool, uni: bool, hex: bool) -> Self {
48 |         Template::Tail(TemplateLiteral::new(content, oct, uni, hex))
49 |     }
50 |     pub fn is_head(&self) -> bool {
51 |         matches!(self, Template::Head(_))
52 |     }
53 |     pub fn is_middle(&self) -> bool {
54 |         matches!(self, Template::Middle(_))
55 |     }
56 |     pub fn is_tail(&self) -> bool {
57 |         matches!(self, Template::Tail(_))
58 |     }
59 |     pub fn is_no_sub(&self) -> bool {
60 |         matches!(self, Template::NoSub(_))
61 |     }
62 | }
63 | 
64 | impl<T> ToString for Template<T>
65 | where
66 |     T: AsRef<str>,
67 | {
68 |     fn to_string(&self) -> String {
69 |         match self {
70 |             Template::NoSub(ref t) => format!("`{}`", t.content.as_ref()),
71 |             Template::Head(ref t) => format!("`{}${{", t.content.as_ref()),
72 |             Template::Middle(ref t) => format!("}}{}${{", t.content.as_ref()),
73 |             Template::Tail(ref t) => format!("}}{}`", t.content.as_ref()),
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/tests/ecma262/main.rs:
--------------------------------------------------------------------------------
  1 | #![cfg(test)]
  2 | extern crate pretty_env_logger;
  3 | extern crate ress;
  4 | #[macro_use]
  5 | extern crate log;
  6 | #[macro_use]
  7 | extern crate lazy_static;
  8 | 
  9 | use std::{fs::read_to_string, path::Path, process::Command};
 10 | 
 11 | use ress::Scanner;
 12 | mod es2015m;
 13 | mod es2015s;
 14 | mod es5;
 15 | 
 16 | #[test]
 17 | fn es5_test() {
 18 |     println!("testing es5");
 19 |     ensure_logging();
 20 |     let js = get_js(EsVersion::Es5);
 21 |     for (i, (lhs, rhs)) in Scanner::new(&js).zip(es5::ES5.iter()).enumerate() {
 22 |         let lhs = lhs.unwrap();
 23 |         debug!("{:?}:{:?}", lhs.token, rhs);
 24 |         assert_eq!(
 25 |             (i, &lhs.token),
 26 |             (i, rhs),
 27 |             "{}:{}\n{}",
 28 |             EsVersion::Es5.path(),
 29 |             lhs.location.start,
 30 |             &js[lhs.span.start..lhs.span.end]
 31 |         );
 32 |     }
 33 | }
 34 | 
 35 | #[test]
 36 | fn es2015_script_test() {
 37 |     println!("testing es2015 script");
 38 |     ensure_logging();
 39 |     let js = get_js(EsVersion::Es2015Script);
 40 |     for (i, (lhs, rhs)) in Scanner::new(&js).zip(es2015s::TOKENS.iter()).enumerate() {
 41 |         let lhs = lhs.unwrap();
 42 |         debug!("{:?}:{:?}", lhs.token, rhs);
 43 |         assert_eq!(
 44 |             (i, &lhs.token),
 45 |             (i, rhs),
 46 |             "{}:{}\n{}",
 47 |             EsVersion::Es2015Script.path(),
 48 |             lhs.location.start,
 49 |             &js[lhs.span.start..lhs.span.end]
 50 |         );
 51 |     }
 52 | }
 53 | 
 54 | #[test]
 55 | fn es2015_module_test() {
 56 |     ensure_logging();
 57 |     debug!("testing es2015 module");
 58 |     let js = get_js(EsVersion::Es2015Module);
 59 |     for (i, (lhs, rhs)) in Scanner::new(&js).zip(es2015m::TOKENS.iter()).enumerate() {
 60 |         let lhs = lhs.unwrap();
 61 |         debug!("{:?}:{:?}", lhs.token, rhs);
 62 |         assert_eq!(
 63 |             (i, &lhs.token),
 64 |             (i, rhs),
 65 |             "{}:{}\n{}",
 66 |             EsVersion::Es2015Module.path(),
 67 |             lhs.location.start,
 68 |             &js[lhs.span.start..lhs.span.end]
 69 |         );
 70 |     }
 71 | }
 72 | 
 73 | fn ensure_logging() {
 74 |     let _ = pretty_env_logger::try_init();
 75 | }
 76 | 
 77 | enum EsVersion {
 78 |     Es5,
 79 |     Es2015Module,
 80 |     Es2015Script,
 81 | }
 82 | 
 83 | impl EsVersion {
 84 |     pub fn path(&self) -> String {
 85 |         format!(
 86 |             "node_modules/everything.js/{}",
 87 |             match self {
 88 |                 EsVersion::Es5 => "es5.js",
 89 |                 EsVersion::Es2015Module => "es2015-module.js",
 90 |                 EsVersion::Es2015Script => "es2015-script.js",
 91 |             }
 92 |         )
 93 |     }
 94 | }
 95 | 
 96 | fn get_js(version: EsVersion) -> String {
 97 |     get_file(version.path())
 98 | }
 99 | 
100 | fn get_file(path: impl AsRef<Path>) -> String {
101 |     let path = path.as_ref();
102 |     if !path.exists() {
103 |         npm_install();
104 |         if !path.exists() {
105 |             panic!("npm install failed to make {:?} available", path)
106 |         }
107 |     }
108 |     read_to_string(path).unwrap_or_else(|e| panic!("Failed to read {:?} to a string {}", path, e))
109 | }
110 | 
111 | fn npm_install() {
112 |     Command::new("npm")
113 |         .arg("install")
114 |         .output()
115 |         .expect("Failed to npm install");
116 | }
117 | 


--------------------------------------------------------------------------------
/tests/moz_central/main.rs:
--------------------------------------------------------------------------------
 1 | #![cfg(all(test, feature = "moz_central"))]
 2 | 
 3 | use ress::*;
 4 | use std::fs::read_to_string;
 5 | use std::path::{Path, PathBuf};
 6 | 
 7 | #[test]
 8 | fn moz_central() {
 9 |     let _ = pretty_env_logger::try_init();
10 |     let moz_central_path = Path::new("moz_central");
11 |     if !moz_central_path.exists() {
12 |         panic!("please download the JIT tests from the firefox repository. see CONTRIBUTING.md for more info");
13 |     }
14 |     let paths = get_paths(&moz_central_path);
15 |     let (failures, total) = walk(&paths);
16 |     eprintln!("completed {:?} tests", total);
17 |     if !failures.is_empty() {
18 |         panic!(
19 |             "{:?} tests failed\n{:?}",
20 |             failures.len(),
21 |             failures.join("\n")
22 |         );
23 |     }
24 | }
25 | 
26 | fn get_paths(root: &Path) -> Vec<PathBuf> {
27 |     walkdir::WalkDir::new(root)
28 |         .min_depth(1)
29 |         .into_iter()
30 |         .filter_map(|e| {
31 |             let entry = e.expect("bad entry");
32 |             let path = entry.into_path();
33 |             if path.is_file() {
34 |                 if let Some(ext) = path.extension() {
35 |                     if ext == "js" {
36 |                         Some(path)
37 |                     } else {
38 |                         None
39 |                     }
40 |                 } else {
41 |                     None
42 |                 }
43 |             } else {
44 |                 None
45 |             }
46 |         })
47 |         .collect()
48 | }
49 | 
50 | fn walk(paths: &[PathBuf]) -> (Vec<String>, usize) {
51 |     let mut ret = Vec::new();
52 |     let mut ct = 0;
53 |     for path in paths {
54 |         ct += 1;
55 |         let js = read_to_string(&path).unwrap();
56 |         let s = Scanner::new(js.as_str());
57 |         for item in s {
58 |             if let Err(e) = item {
59 |                 ret.push(format!("{:?}, path: {:?}", e, path.display()));
60 |             }
61 |         }
62 |     }
63 |     (ret, ct)
64 | }
65 | 


--------------------------------------------------------------------------------
/tests/prop/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate ress;
 2 | #[macro_use]
 3 | extern crate proptest;
 4 | 
 5 | proptest! {
 6 |     #[test]
 7 |     fn function_idents(s in r#"function [a-zA-Z_$\u2118\u212E\u309B\u309C\u1885\u1886][a-zA-Z_]+"#) {
 8 |         ress::tokenize(&s).unwrap();
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/proptest-regressions/main.txt:
--------------------------------------------------------------------------------
1 | # Seeds for failure cases proptest has generated in the past. It is
2 | # automatically read and these particular cases re-run before any
3 | # novel cases are generated.
4 | #
5 | # It is recommended to check this file in to source control so that
6 | # everyone who runs the test benefits from these saved cases.
7 | xs 3761395854 1747442289 2023256964 1719391646 # shrinks to s = "𑜰"
8 | 


--------------------------------------------------------------------------------
/tests/readme/index.js:
--------------------------------------------------------------------------------
1 | (function() {
2 |     console.log('hello world!')
3 | })()


--------------------------------------------------------------------------------
/tests/readme/main.rs:
--------------------------------------------------------------------------------
 1 | #![cfg(test)]
 2 | 
 3 | use ress::prelude::*;
 4 | 
 5 | #[test]
 6 | fn semi_example() {
 7 |     static JS: &str = include_str!("index.js");
 8 |     let s = Scanner::new(JS);
 9 |     for token in s {
10 |         let token = token.unwrap().token;
11 |         if token.matches_punct_str(";") {
12 |             panic!("A semi-colon!? Heathen!");
13 |         }
14 |     }
15 |     println!("Good show! Why use something that's optional?")
16 | }
17 | 
18 | #[test]
19 | #[allow(unused_variables)]
20 | fn failed_compile_borrow() {
21 |     // look_ahead
22 |     let js = "function() { return; }";
23 |     let mut s = Scanner::new(js);
24 |     let current = s.next();
25 |     let next = s.look_ahead();
26 |     let new_current = s.next();
27 |     assert_eq!(next, new_current);
28 |     // peekable (fails to compile)
29 |     let p = Scanner::new(js).peekable();
30 |     let current = s.next(); // <-- first mutable borrow
31 |                             // let next = p.peek(); // <-- second mutable borrow
32 | }
33 | 
34 | #[test]
35 | fn get_set_state() {
36 |     let js = "function() {
37 |     return 0;
38 | };";
39 |     let mut s = Scanner::new(js);
40 |     let start = s.get_state();
41 |     assert_eq!(
42 |         s.next().unwrap().unwrap().token,
43 |         Token::Keyword(Keyword::Function("Function"))
44 |     );
45 |     assert_eq!(
46 |         s.next().unwrap().unwrap().token,
47 |         Token::Punct(Punct::OpenParen)
48 |     );
49 |     assert_eq!(
50 |         s.next().unwrap().unwrap().token,
51 |         Token::Punct(Punct::CloseParen)
52 |     );
53 |     s.set_state(start);
54 |     assert_eq!(
55 |         s.next().unwrap().unwrap().token,
56 |         Token::Keyword(Keyword::Function("Function"))
57 |     );
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/snippets/main.rs:
--------------------------------------------------------------------------------
  1 | use ress::prelude::*;
  2 | 
  3 | #[test]
  4 | fn vue_number_error() {
  5 |     let js = "refElm = isUndef(newCh[newEndIdx + 1]) ? null : newCh[newEndIdx + 1].elm;";
  6 |     for item in Scanner::new(js) {
  7 |         println!("{:?}", item);
  8 |     }
  9 | }
 10 | #[test]
 11 | fn moment_regex_error() {
 12 |     let js = r"function removeFormattingTokens(input) {
 13 |         if (input.match(/\[[\s\S]/)) {
 14 |             return input.replace(/^\[|\]$/g, '');
 15 |         }
 16 |         return input.replace(/\\/g, '');
 17 |     }";
 18 |     for item in Scanner::new(js) {
 19 |         println!("{:?}", item);
 20 |     }
 21 | }
 22 | 
 23 | #[test]
 24 | fn number_member() {
 25 |     compare(
 26 |         "20..toString()",
 27 |         &[
 28 |             Token::Number("20.".into()),
 29 |             Token::Punct(Punct::Period),
 30 |             Token::Ident("toString".into()),
 31 |             Token::Punct(Punct::OpenParen),
 32 |             Token::Punct(Punct::CloseParen),
 33 |         ],
 34 |     );
 35 | }
 36 | #[test]
 37 | fn if_then_regex() {
 38 |     compare(
 39 |         "if (1) /a/",
 40 |         &[
 41 |             Token::Keyword(Keyword::If("If")),
 42 |             Token::Punct(Punct::OpenParen),
 43 |             Token::Number("1".into()),
 44 |             Token::Punct(Punct::CloseParen),
 45 |             Token::RegEx(RegEx {
 46 |                 body: "a",
 47 |                 flags: None,
 48 |             }),
 49 |         ],
 50 |     );
 51 | }
 52 | 
 53 | #[test]
 54 | fn line_terminator_in_string_literal() {
 55 |     let js = "' '";
 56 |     for _ in Scanner::new(js) {
 57 |         // just testing for panics on the byte index
 58 |         // for now
 59 |         //TODO: Allow this character in string literals
 60 |         // as per spec under feature "json superset"
 61 |     }
 62 | }
 63 | 
 64 | #[test]
 65 | fn lots_of_arcs() {
 66 |     let mut top = "".to_string();
 67 |     let mut bottom = "[".to_string();
 68 |     let ascii_start = 97;
 69 |     for i in 0..26 {
 70 |         let id = std::char::from_u32(ascii_start + i).unwrap();
 71 |         let obj = format!("{{{}:{}}}", id, i);
 72 |         top.push_str(&format!("({})", obj));
 73 |         if i != 25 {
 74 |             top.push_str(", ");
 75 |         }
 76 |         bottom.push_str(&format!("{},", obj));
 77 |     }
 78 |     bottom.push(']');
 79 |     let js = format!("{}\n\n{}", top, bottom);
 80 | 
 81 |     let s = Scanner::new(&js);
 82 |     for item in s {
 83 |         println!("{:?}", item.unwrap());
 84 |     }
 85 | }
 86 | 
 87 | #[test]
 88 | fn div_over_regex() {
 89 |     let js = "if (true) {
 90 |   ({} / function(){return 1});
 91 | }
 92 | ";
 93 |     for tok in panicking_scanner(js) {
 94 |         eprintln!("{:?}", tok)
 95 |     }
 96 | }
 97 | #[test]
 98 | fn regex_over_div() {
 99 |     let js = "{}/\\d/g;;";
100 |     compare(
101 |         js,
102 |         &[
103 |             Token::Punct(Punct::OpenBrace),
104 |             Token::Punct(Punct::CloseBrace),
105 |             Token::RegEx(RegEx::from_parts("\\d", Some("g"))),
106 |             Token::Punct(Punct::SemiColon),
107 |             Token::Punct(Punct::SemiColon),
108 |         ],
109 |     );
110 | }
111 | #[test]
112 | fn regex_over_div2() {
113 |     let js = "function(){}/\\d/g;;";
114 |     compare(
115 |         js,
116 |         &[
117 |             Token::Keyword(Keyword::Function("function")),
118 |             Token::Punct(Punct::OpenParen),
119 |             Token::Punct(Punct::CloseParen),
120 |             Token::Punct(Punct::OpenBrace),
121 |             Token::Punct(Punct::CloseBrace),
122 |             Token::RegEx(RegEx::from_parts("\\d", Some("g"))),
123 |             Token::Punct(Punct::SemiColon),
124 |             Token::Punct(Punct::SemiColon),
125 |         ],
126 |     );
127 | }
128 | #[test]
129 | fn regex_over_div3() {
130 |     let js = "function name(){}/\\d/g;;";
131 |     compare(
132 |         js,
133 |         &[
134 |             Token::Keyword(Keyword::Function("function")),
135 |             Token::Ident("name".into()),
136 |             Token::Punct(Punct::OpenParen),
137 |             Token::Punct(Punct::CloseParen),
138 |             Token::Punct(Punct::OpenBrace),
139 |             Token::Punct(Punct::CloseBrace),
140 |             Token::RegEx(RegEx::from_parts("\\d", Some("g"))),
141 |             Token::Punct(Punct::SemiColon),
142 |             Token::Punct(Punct::SemiColon),
143 |         ],
144 |     );
145 | }
146 | #[test]
147 | fn regex_over_div4() {
148 |     let _ = pretty_env_logger::try_init();
149 |     let js = "'use strict';function name(){}/\\d/g;;";
150 |     compare(
151 |         js,
152 |         &[
153 |             Token::String(StringLit::single("use strict", false)),
154 |             Token::Punct(Punct::SemiColon),
155 |             Token::Keyword(Keyword::Function("function")),
156 |             Token::Ident("name".into()),
157 |             Token::Punct(Punct::OpenParen),
158 |             Token::Punct(Punct::CloseParen),
159 |             Token::Punct(Punct::OpenBrace),
160 |             Token::Punct(Punct::CloseBrace),
161 |             Token::RegEx(RegEx::from_parts("\\d", Some("g"))),
162 |             Token::Punct(Punct::SemiColon),
163 |             Token::Punct(Punct::SemiColon),
164 |         ],
165 |     );
166 | }
167 | 
168 | #[test]
169 | fn html_comment_close() {
170 |     let js = "
171 | --> stuff is in a comment
172 |   --> also a comment
173 | /*multi-comment*/--> with trailer
174 | /*---*/
175 | let a;
176 | /*first comment*/ /*second comment*/--> with trailer";
177 |     compare(
178 |         js,
179 |         &[
180 |             Token::Comment(Comment {
181 |                 kind: ress::tokens::CommentKind::Html,
182 |                 content: "",
183 |                 tail_content: Some(" stuff is in a comment"),
184 |             }),
185 |             Token::Comment(Comment {
186 |                 kind: ress::tokens::CommentKind::Html,
187 |                 content: "",
188 |                 tail_content: Some(" also a comment"),
189 |             }),
190 |             Token::Comment(Comment {
191 |                 kind: ress::tokens::CommentKind::Multi,
192 |                 content: "multi-comment",
193 |                 tail_content: Some(" with trailer"),
194 |             }),
195 |             Token::Comment(Comment {
196 |                 kind: ress::tokens::CommentKind::Multi,
197 |                 content: "---",
198 |                 tail_content: None,
199 |             }),
200 |             Token::Keyword(Keyword::Let("let")),
201 |             Token::Ident("a".into()),
202 |             Token::Punct(Punct::SemiColon),
203 |             Token::Comment(Comment {
204 |                 kind: ress::tokens::CommentKind::Multi,
205 |                 content: "first comment",
206 |                 tail_content: None,
207 |             }),
208 |             Token::Comment(Comment {
209 |                 kind: ress::tokens::CommentKind::Multi,
210 |                 content: "second comment",
211 |                 tail_content: Some(" with trailer"),
212 |             }),
213 |         ],
214 |     );
215 | }
216 | #[test]
217 | fn decrement_greater_than() {
218 |     compare(
219 |         "for (var x = 0; x --> 0;);",
220 |         &[
221 |             Token::Keyword(Keyword::For("for")),
222 |             Token::Punct(Punct::OpenParen),
223 |             Token::Keyword(Keyword::Var("var")),
224 |             Token::Ident("x".into()),
225 |             Token::Punct(Punct::Equal),
226 |             Token::Number("0".into()),
227 |             Token::Punct(Punct::SemiColon),
228 |             Token::Ident("x".into()),
229 |             Token::Punct(Punct::DoubleDash),
230 |             Token::Punct(Punct::GreaterThan),
231 |             Token::Number("0".into()),
232 |             Token::Punct(Punct::SemiColon),
233 |             Token::Punct(Punct::CloseParen),
234 |             Token::Punct(Punct::SemiColon),
235 |         ],
236 |     )
237 | }
238 | #[test]
239 | fn decrement_greater_than_inline_multi() {
240 |     compare(
241 |         "for (var x = 0; x /**/--> 0;);",
242 |         &[
243 |             Token::Keyword(Keyword::For("for")),
244 |             Token::Punct(Punct::OpenParen),
245 |             Token::Keyword(Keyword::Var("var")),
246 |             Token::Ident("x".into()),
247 |             Token::Punct(Punct::Equal),
248 |             Token::Number("0".into()),
249 |             Token::Punct(Punct::SemiColon),
250 |             Token::Ident("x".into()),
251 |             Token::Comment(Comment::new_multi_line("")),
252 |             Token::Punct(Punct::DoubleDash),
253 |             Token::Punct(Punct::GreaterThan),
254 |             Token::Number("0".into()),
255 |             Token::Punct(Punct::SemiColon),
256 |             Token::Punct(Punct::CloseParen),
257 |             Token::Punct(Punct::SemiColon),
258 |         ],
259 |     )
260 | }
261 | 
262 | #[test]
263 | #[should_panic = "unterminated multi-line comment"]
264 | fn star_only_regex() {
265 |     run_failure("/*/");
266 | }
267 | 
268 | #[test]
269 | fn leading_space_regex() {
270 |     let js = r"/ \{[\s\S]*$/";
271 |     compare(
272 |         js,
273 |         &[Token::RegEx(RegEx {
274 |             body: r" \{[\s\S]*$",
275 |             flags: None,
276 |         })],
277 |     )
278 | }
279 | 
280 | #[test]
281 | #[should_panic]
282 | fn var_escaped_cr() {
283 |     let js = r"var\u000Dx;";
284 |     run_failure(js);
285 | }
286 | 
287 | #[test]
288 | fn long_comment() {
289 |     let _ = pretty_env_logger::try_init();
290 |     let inner = "\n* <!-- I should not be a unique comment -->\n*\n";
291 |     let js = format!("/*{}*/", inner);
292 |     compare(
293 |         &js,
294 |         &[Token::Comment(Comment {
295 |             kind: ress::tokens::CommentKind::Multi,
296 |             content: inner,
297 |             tail_content: None,
298 |         })],
299 |     )
300 | }
301 | 
302 | #[test]
303 | fn regex_column() {
304 |     compare_with_position(
305 |         "'abc'.match(/abc/);",
306 |         &[
307 |             (Token::String(StringLit::single("abc", false)), 1, 1),
308 |             (Token::Punct(Punct::Period), 1, 6),
309 |             (Token::Ident("match".into()), 1, 7),
310 |             (Token::Punct(Punct::OpenParen), 1, 12),
311 |             (Token::RegEx(RegEx::from_parts("abc", None)), 1, 13),
312 |             (Token::Punct(Punct::CloseParen), 1, 18),
313 |             (Token::Punct(Punct::SemiColon), 1, 19),
314 |         ],
315 |     );
316 | }
317 | 
318 | #[test]
319 | fn regex_spaces() {
320 |     let scanner = Scanner::new("var = / a /");
321 |     let mut last_end = 0;
322 |     for (i, item) in scanner.enumerate() {
323 |         let item = item.unwrap();
324 |         if item.token.is_eof() {
325 |             break;
326 |         }
327 | 
328 |         assert_eq!(
329 |             1,
330 |             item.location.start.column - last_end,
331 |             "{} for {:?}",
332 |             i,
333 |             item
334 |         );
335 |         last_end = item.location.end.column;
336 |     }
337 | }
338 | 
339 | #[test]
340 | fn regex_out_of_order() {
341 |     pretty_env_logger::try_init().ok();
342 |     let regex = r#"((?:[^BEGHLMOSWYZabcdhmswyz']+)|(?:'(?:[^']|'')*')|(?:G{1,5}|y{1,4}|Y{1,4}|M{1,5}|L{1,5}|w{1,2}|W{1}|d{1,2}|E{1,6}|c{1,6}|a{1,5}|b{1,5}|B{1,5}|h{1,2}|H{1,2}|m{1,2}|s{1,2}|S{1,3}|z{1,4}|Z{1,5}|O{1,4}))([\s\S]*)"#;
343 |     let js = format!("var DATE_FORMATS_SPLIT = /{}/", &regex);
344 |     compare_with_position(
345 |         js.as_str(),
346 |         &[
347 |             (Token::Keyword(Keyword::Var("var")), 1, 1),
348 |             (Token::Ident("DATE_FORMATS_SPLIT".into()), 1, 5),
349 |             (Token::Punct(Punct::Equal), 1, 24),
350 |             (Token::RegEx(RegEx::from_parts(regex, None)), 1, 26),
351 |         ],
352 |     );
353 | }
354 | 
355 | #[test]
356 | fn regex_pattern() {
357 |     pretty_env_logger::try_init().ok();
358 |     let re = r#" \{[\s\S]*$"#;
359 |     let js = format!("/{re}/");
360 | 
361 |     let mut scanner = Scanner::new(&js);
362 |     let Item {
363 |         location,
364 |         token: Token::RegEx(re2),
365 |         ..
366 |     } = scanner.next().unwrap().unwrap()
367 |     else {
368 |         panic!("Expected regex");
369 |     };
370 |     assert_eq!(location.start.line, 1);
371 |     assert_eq!(location.end.line, 1);
372 |     assert_eq!(location.start.column, 1);
373 |     assert_eq!(re2.body, re);
374 |     assert_eq!(location.end.column, re.len() + 3);
375 | }
376 | 
377 | #[test]
378 | fn regex_over_a0() {
379 |     let js = r#"val = / /"#;
380 |     compare(
381 |         js,
382 |         &[
383 |             Token::Ident("val".into()),
384 |             Token::Punct(Punct::Equal),
385 |             Token::RegEx(RegEx {
386 |                 body: "\u{a0}",
387 |                 flags: None,
388 |             }),
389 |         ],
390 |     )
391 | }
392 | 
393 | #[test]
394 | fn regex_over_a0_manual() {
395 |     use ress::ManualScanner;
396 |     let js = r#"val = / /"#;
397 |     let mut scanner = ManualScanner::new(js);
398 |     assert_eq!(
399 |         scanner.next_token().unwrap().unwrap().token,
400 |         Token::Ident("val".into())
401 |     );
402 |     assert_eq!(
403 |         scanner.next_token().unwrap().unwrap().token,
404 |         Token::Punct(Punct::Equal)
405 |     );
406 |     assert_eq!(
407 |         scanner.next_token().unwrap().unwrap().token,
408 |         Token::Punct(Punct::ForwardSlash)
409 |     );
410 |     assert_eq!(
411 |         scanner.next_regex(1).unwrap().unwrap().token,
412 |         Token::RegEx(RegEx {
413 |             body: "\u{a0}",
414 |             flags: None
415 |         })
416 |     );
417 | }
418 | 
419 | #[test]
420 | fn regex_all_whitespaces() {
421 |     let re: String = [
422 |         '\t', '\u{000b}', '\u{000c}', ' ', '\u{feff}', '\u{2000}', '\u{2001}', '\u{2002}',
423 |         '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}',
424 |         '\u{200a}', '\u{202f}', '\u{205f}', '\u{3000}',
425 |     ]
426 |     .iter()
427 |     .collect();
428 |     run_failure(&format!("var = /{re}/"));
429 | }
430 | 
431 | fn compare(js: &str, expectation: &[Token<&str>]) {
432 |     for (i, (par, ex)) in panicking_scanner(js).zip(expectation.iter()).enumerate() {
433 |         assert_eq!((i, &par), (i, ex));
434 |     }
435 | }
436 | 
437 | fn compare_with_position(js: &str, expectation: &[(Token<&str>, usize, usize)]) {
438 |     let scanner = Scanner::new(js);
439 |     let mut i = 0;
440 |     let mut expectation = expectation.iter();
441 |     for r in scanner {
442 |         let r = r.unwrap();
443 |         if r.is_eof() {
444 |             return;
445 |         }
446 |         i += 1;
447 |         let ex = expectation
448 |             .next()
449 |             .ok_or_else(|| {
450 |                 panic!("expectations too short for {:?}", r);
451 |             })
452 |             .unwrap();
453 |         assert_eq!((i, &r.token), (i, &ex.0), "{:?} vs {:?}", r, ex.0);
454 |         assert_eq!(
455 |             (i, r.location.start.line),
456 |             (i, ex.1),
457 |             "{:?} vs {:?}",
458 |             r,
459 |             ex.0
460 |         );
461 |         assert_eq!(
462 |             (i, r.location.start.column),
463 |             (i, ex.2),
464 |             "{:?} vs {:?}",
465 |             r,
466 |             ex.0
467 |         );
468 |     }
469 | }
470 | 
471 | fn run_failure(js: &str) {
472 |     for _ in panicking_scanner(js) {}
473 | }
474 | 
475 | fn panicking_scanner(js: &str) -> impl Iterator<Item = Token<&str>> {
476 |     Scanner::new(js).map(|r| r.unwrap().token)
477 | }
478 | 


--------------------------------------------------------------------------------