├── .editorconfig
├── .eslintrc.json
├── .gitattributes
├── .github
└── contributing.md
├── .gitignore
├── .npmrc
├── .travis.yml
├── .verb.md
├── CHANGELOG.md
├── LICENSE
├── README.md
├── examples
├── ast.js
├── lex.js
├── match.js
└── skipType.js
├── index.js
├── lib
├── location.js
├── state.js
└── token.js
├── package.json
└── test
├── api.advance.js
├── api.append.js
├── api.bos.js
├── api.capture.js
├── api.consume.js
├── api.current.js
├── api.eos.js
├── api.error.js
├── api.fail.js
├── api.handle.js
├── api.handlers.js
├── api.integration.js
├── api.isInside.js
├── api.lex.js
├── api.lookahead.js
├── api.lookbehind.js
├── api.match.js
├── api.peek.js
├── api.prev.js
├── api.push.js
├── api.scan.js
├── api.set.js
├── api.skip.js
├── api.skipTo.js
├── api.skipType.js
├── api.skipWhile.js
├── api.token.js
├── api.use.js
├── fixtures
└── file.txt
├── lexer.js
├── lexer.static.js
├── regressions.js
└── static.isToken.js
/.editorconfig:
--------------------------------------------------------------------------------
1 | # http://editorconfig.org/
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | end_of_line = lf
7 | indent_size = 2
8 | indent_style = space
9 | insert_final_newline = true
10 | trim_trailing_whitespace = true
11 |
12 | [{**/{actual,fixtures,expected,templates}/**,*.md}]
13 | trim_trailing_whitespace = false
14 | insert_final_newline = false
15 |
--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "eslint:recommended"
4 | ],
5 |
6 | "env": {
7 | "browser": false,
8 | "es6": true,
9 | "node": true,
10 | "mocha": true
11 | },
12 |
13 | "parserOptions":{
14 | "ecmaVersion": 9,
15 | "sourceType": "module",
16 | "ecmaFeatures": {
17 | "modules": true,
18 | "experimentalObjectRestSpread": true
19 | }
20 | },
21 |
22 | "globals": {
23 | "document": false,
24 | "navigator": false,
25 | "window": false
26 | },
27 |
28 | "rules": {
29 | "accessor-pairs": 2,
30 | "arrow-spacing": [2, { "before": true, "after": true }],
31 | "block-spacing": [2, "always"],
32 | "brace-style": [2, "1tbs", { "allowSingleLine": true }],
33 | "comma-dangle": [2, "never"],
34 | "comma-spacing": [2, { "before": false, "after": true }],
35 | "comma-style": [2, "last"],
36 | "constructor-super": 2,
37 | "curly": [2, "multi-line"],
38 | "dot-location": [2, "property"],
39 | "eol-last": 2,
40 | "eqeqeq": [2, "allow-null"],
41 | "generator-star-spacing": [2, { "before": true, "after": true }],
42 | "handle-callback-err": [2, "^(err|error)$" ],
43 | "indent": [2, 2, { "SwitchCase": 1 }],
44 | "key-spacing": [2, { "beforeColon": false, "afterColon": true }],
45 | "keyword-spacing": [2, { "before": true, "after": true }],
46 | "new-cap": [2, { "newIsCap": true, "capIsNew": false }],
47 | "new-parens": 2,
48 | "no-array-constructor": 2,
49 | "no-caller": 2,
50 | "no-class-assign": 2,
51 | "no-cond-assign": 2,
52 | "no-const-assign": 2,
53 | "no-control-regex": 2,
54 | "no-debugger": 2,
55 | "no-delete-var": 2,
56 | "no-dupe-args": 2,
57 | "no-dupe-class-members": 2,
58 | "no-dupe-keys": 2,
59 | "no-duplicate-case": 2,
60 | "no-empty-character-class": 2,
61 | "no-eval": 2,
62 | "no-ex-assign": 2,
63 | "no-extend-native": 2,
64 | "no-extra-bind": 2,
65 | "no-extra-boolean-cast": 2,
66 | "no-extra-parens": [2, "functions"],
67 | "no-fallthrough": 2,
68 | "no-floating-decimal": 2,
69 | "no-func-assign": 2,
70 | "no-implied-eval": 2,
71 | "no-inner-declarations": [2, "functions"],
72 | "no-invalid-regexp": 2,
73 | "no-irregular-whitespace": 2,
74 | "no-iterator": 2,
75 | "no-label-var": 2,
76 | "no-labels": 2,
77 | "no-lone-blocks": 2,
78 | "no-mixed-spaces-and-tabs": 2,
79 | "no-multi-spaces": 2,
80 | "no-multi-str": 2,
81 | "no-multiple-empty-lines": [2, { "max": 1 }],
82 | "no-native-reassign": 0,
83 | "no-negated-in-lhs": 2,
84 | "no-new": 2,
85 | "no-new-func": 2,
86 | "no-new-object": 2,
87 | "no-new-require": 2,
88 | "no-new-wrappers": 2,
89 | "no-obj-calls": 2,
90 | "no-octal": 2,
91 | "no-octal-escape": 2,
92 | "no-proto": 0,
93 | "no-redeclare": 2,
94 | "no-regex-spaces": 2,
95 | "no-return-assign": 2,
96 | "no-self-compare": 2,
97 | "no-sequences": 2,
98 | "no-shadow-restricted-names": 2,
99 | "no-spaced-func": 2,
100 | "no-sparse-arrays": 2,
101 | "no-this-before-super": 2,
102 | "no-throw-literal": 2,
103 | "no-trailing-spaces": 0,
104 | "no-undef": 2,
105 | "no-undef-init": 2,
106 | "no-unexpected-multiline": 2,
107 | "no-unneeded-ternary": [2, { "defaultAssignment": false }],
108 | "no-unreachable": 2,
109 | "no-unused-vars": [2, { "vars": "all", "args": "none" }],
110 | "no-useless-call": 0,
111 | "no-with": 2,
112 | "one-var": [0, { "initialized": "never" }],
113 | "operator-linebreak": [0, "after", { "overrides": { "?": "before", ":": "before" } }],
114 | "padded-blocks": [0, "never"],
115 | "quotes": [2, "single", "avoid-escape"],
116 | "radix": 2,
117 | "semi": [2, "always"],
118 | "semi-spacing": [2, { "before": false, "after": true }],
119 | "space-before-blocks": [2, "always"],
120 | "space-before-function-paren": [2, "never"],
121 | "space-in-parens": [2, "never"],
122 | "space-infix-ops": 2,
123 | "space-unary-ops": [2, { "words": true, "nonwords": false }],
124 | "spaced-comment": [0, "always", { "markers": ["global", "globals", "eslint", "eslint-disable", "*package", "!", ","] }],
125 | "use-isnan": 2,
126 | "valid-typeof": 2,
127 | "wrap-iife": [2, "any"],
128 | "yoda": [2, "never"]
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Enforce Unix newlines
2 | * text eol=lf
3 |
4 | # binaries
5 | *.ai binary
6 | *.psd binary
7 | *.jpg binary
8 | *.gif binary
9 | *.png binary
10 | *.jpeg binary
11 |
--------------------------------------------------------------------------------
/.github/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing to line-lexer
2 |
3 | First and foremost, thank you! We appreciate that you want to contribute to line-lexer, your time is valuable, and your contributions mean a lot to us.
4 |
5 | ## Important!
6 |
7 | By contributing to this project, you:
8 |
9 | * Agree that you have authored 100% of the content
10 | * Agree that you have the necessary rights to the content
11 | * Agree that you have received the necessary permissions from your employer to make the contributions (if applicable)
12 | * Agree that the content you contribute may be provided under the Project license(s)
13 |
14 | ## Getting started
15 |
16 | **What does "contributing" mean?**
17 |
18 | Creating an issue is the simplest form of contributing to a project. But there are many ways to contribute, including the following:
19 |
20 | - Updating or correcting documentation
21 | - Feature requests
22 | - Bug reports
23 |
24 | If you'd like to learn more about contributing in general, the [Guide to Idiomatic Contributing](https://github.com/jonschlinkert/idiomatic-contributing) has a lot of useful information.
25 |
26 | **Showing support for line-lexer**
27 |
28 | Please keep in mind that open source software is built by people like you, who spend their free time creating things the rest the community can use.
29 |
30 | Don't have time to contribute? No worries, here are some other ways to show your support for line-lexer:
31 |
32 | - star the [project](https://github.com/jonschlinkert/line-lexer)
33 | - tweet your support for line-lexer
34 |
35 | ## Issues
36 |
37 | ### Before creating an issue
38 |
39 | Please try to determine if the issue is caused by an underlying library, and if so, create the issue there. Sometimes this is difficult to know. We only ask that you attempt to give a reasonable attempt to find out. Oftentimes the readme will have advice about where to go to create issues.
40 |
41 | Try to follow these guidelines
42 |
43 | - **Avoid creating issues for implementation help**. It's much better for discoverability, SEO, and semantics - to keep the issue tracker focused on bugs and feature requests - to ask implementation-related questions on [stackoverflow.com][so]
44 | - **Investigate the issue**:
45 | - **Check the readme** - oftentimes you will find notes about creating issues, and where to go depending on the type of issue.
46 | - Create the issue in the appropriate repository.
47 |
48 | ### Creating an issue
49 |
50 | Please be as descriptive as possible when creating an issue. Give us the information we need to successfully answer your question or address your issue by answering the following in your issue:
51 |
52 | - **version**: please note the version of line-lexer are you using
53 | - **extensions, plugins, helpers, etc** (if applicable): please list any extensions you're using
54 | - **error messages**: please paste any error messages into the issue, or a [gist](https://gist.github.com/)
55 |
56 | ### Closing issues
57 |
58 | The original poster or the maintainer's of line-lexer may close an issue at any time. Typically, but not exclusively, issues are closed when:
59 |
60 | - The issue is resolved
61 | - The project's maintainers have determined the issue is out of scope
62 | - An issue is clearly a duplicate of another issue, in which case the duplicate issue will be linked.
63 | - A discussion has clearly run its course
64 |
65 |
66 | ## Next steps
67 |
68 | **Tips for creating idiomatic issues**
69 |
70 | Spending just a little extra time to review best practices and brush up on your contributing skills will, at minimum, make your issue easier to read, easier to resolve, and more likely to be found by others who have the same or similar issue in the future. At best, it will open up doors and potential career opportunities by helping you be at your best.
71 |
72 | The following resources were hand-picked to help you be the most effective contributor you can be:
73 |
74 | - The [Guide to Idiomatic Contributing](https://github.com/jonschlinkert/idiomatic-contributing) is a great place for newcomers to start, but there is also information for experienced contributors there.
75 | - Take some time to learn basic markdown. We can't stress this enough. Don't start pasting code into GitHub issues before you've taken a moment to review this [markdown cheatsheet](https://gist.github.com/jonschlinkert/5854601)
76 | - The GitHub guide to [basic markdown](https://help.github.com/articles/markdown-basics/) is another great markdown resource.
77 | - Learn about [GitHub Flavored Markdown](https://help.github.com/articles/github-flavored-markdown/). And if you want to really go above and beyond, read [mastering markdown](https://guides.github.com/features/mastering-markdown/).
78 |
79 | At the very least, please try to:
80 |
81 | - Use backticks to wrap code. This ensures that it retains its formatting and isn't modified when it's rendered by GitHub, and makes the code more readable to others
82 | - When applicable, use syntax highlighting by adding the correct language name after the first "code fence"
83 |
84 |
85 | [so]: http://stackoverflow.com/questions/tagged/line-lexer
86 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # always ignore files
2 | *.DS_Store
3 | .idea
4 | .vscode
5 | *.sublime-*
6 |
7 | # test related, or directories generated by tests
8 | test/actual
9 | actual
10 | coverage
11 | .nyc*
12 |
13 | # npm
14 | node_modules
15 | npm-debug.log
16 |
17 | # yarn
18 | yarn.lock
19 | yarn-error.log
20 |
21 | # misc
22 | _gh_pages
23 | _draft
24 | _drafts
25 | bower_components
26 | vendor
27 | temp
28 | tmp
29 | TODO.md
30 | package-lock.json
--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | package-lock=false
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | os:
3 | - linux
4 | - osx
5 | - windows
6 | language: node_js
7 | node_js:
8 | - node
9 | - '11'
10 | - '10'
11 | - '9'
12 | - '8'
13 |
--------------------------------------------------------------------------------
/.verb.md:
--------------------------------------------------------------------------------
1 | ## Breaking changes in v2.0!
2 |
3 | Please see the [changelog](CHANGELOG.md) for details!
4 |
5 | ## Usage
6 |
7 | ```js
8 | const Lexer = require('snapdragon-lexer');
9 | const lexer = new Lexer();
10 |
11 | lexer.capture('slash', /^\//);
12 | lexer.capture('text', /^\w+/);
13 | lexer.capture('star', /^\*/);
14 |
15 | console.log(lexer.tokenize('foo/*'));
16 | ```
17 |
18 | ## API
19 | {%= apidocs("index.js") %}
20 |
21 |
22 | ### .set
23 |
24 | Register a handler function.
25 |
26 | **Params**
27 |
28 | * `type` **{String}**
29 | * `fn` **{Function}**: The handler function to register.
30 |
31 | **Example**
32 |
33 | ```js
34 | lexer.set('star', function(token) {
35 | // do parser, lexer, or compiler stuff
36 | });
37 | ```
38 |
39 | As an alternative to `.set`, the [.capture](#capture) method will automatically register a handler when a function is passed as the last argument.
40 |
41 | ### .get
42 |
43 | Get a registered handler function.
44 |
45 | **Params**
46 |
47 | * `type` **{String}**
48 | * `fn` **{Function}**: The handler function to register.
49 |
50 | **Example**
51 |
52 | ```js
53 | lexer.set('star', function() {
54 | // do parser, lexer, or compiler stuff
55 | });
56 | const star = handlers.get('star');
57 | ```
58 |
59 | ## Properties
60 |
61 | ### lexer.isLexer
62 |
63 | Type: **{boolean}**
64 |
65 | Default: `true` (contant)
66 |
67 | This property is defined as a convenience, to make it easy for plugins to check for an instance of Lexer.
68 |
69 | ### lexer.input
70 |
71 | Type: **{string}**
72 |
73 | Default: `''`
74 |
75 | The unmodified source string provided by the user.
76 |
77 | ### lexer.string
78 |
79 | Type: **{string}**
80 |
81 | Default: `''`
82 |
83 | The source string minus the part of the string that has already been [consumed](#consume).
84 |
85 | ### lexer.consumed
86 |
87 | Type: **{string}**
88 |
89 | Default: `''`
90 |
91 | The part of the source string that has been consumed.
92 |
93 | ### lexer.tokens
94 |
95 | Type: **{array}**
96 |
97 | Default: `[]` (instance of [snapdragon-stack][])
98 |
99 | Array of lexed tokens.
100 |
101 | ### lexer.stash
102 |
103 | Type: **{array}**
104 |
105 | Default: `['']` (instance of [snapdragon-stack][])
106 |
107 | Array of captured strings. Similar to the [lexer.tokens](#lexertokens) array, but stores strings instead of token objects.
108 |
109 | ### lexer.stack
110 |
111 | Type: **{array}**
112 |
113 | Default: `[]` (instance of [snapdragon-stack][])
114 |
115 | LIFO (last in, first out) array. A token is pushed onto the stack when an "opening" character or character sequence needs to be tracked. When the (matching) "closing" character or character sequence is encountered, the (opening) token is popped off of the stack.
116 |
117 | The stack is not used by any lexer methods, it's reserved for the user. Stacks are necessary for creating Abstract Syntax Trees (ASTs), but if you require this functionality it would be better to use a parser such as [snapdragon-parser][snapdragon-parser], with methods and other conveniences for creating an AST.
118 |
119 | ### lexer.queue
120 |
121 | Type: **{array}**
122 |
123 | Default: `[]`
124 |
125 | FIFO (first in, first out) array, for temporarily storing tokens that are created when [.lookahead()](#lookahead) is called (or a method that calls `.lookhead()`, such as [.peek()](#peek)).
126 |
127 | Tokens are [dequeued](#dequeue) when [.next()](#next) is called.
128 |
129 | ### lexer.loc
130 |
131 | Type: **{Object}**
132 |
133 | Default: `{ index: 0, column: 0, line: 1 }`
134 |
135 | The updated source string location with the following properties.
136 |
137 | - `index` - 0-index
138 | - `column` - 0-index
139 | - `line` - 1-index
140 |
141 | The following plugins are available for automatically updating tokens with the location:
142 |
143 | - [snapdragon-location][]
144 | - [snapdragon-position][]
145 |
146 | ## Options
147 |
148 | ### options.source
149 |
150 | Type: **{string}**
151 |
152 | Default: `undefined`
153 |
154 | The source of the input string. This is typically a filename or file path, but can also be `'string'` if a string or buffer is provided directly.
155 |
156 | If `lexer.input` is undefined, and `options.source` is a string, the lexer will attempt to set `lexer.input` by calling `fs.readFileSync()` on the value provided on `options.source`.
157 |
158 | ### options.mode
159 |
160 | Type: **{string}**
161 |
162 | Default: `undefined`
163 |
164 | If `options.mode` is `character`, instead of calling handlers (which match using regex) the [.advance()](advance) method will [consume](#consume) and return one character at a time.
165 |
166 | ### options.value
167 |
168 | Type: **{string}**
169 |
170 | Default: `undefined`
171 |
172 | Specify the token property to use when the [.push](#push) method pushes a value onto [lexer.stash](#lexerstash). The logic works something like this:
173 |
174 | ```js
175 | lexer.append(token[lexer.options.value || 'value']);
176 | ```
177 |
178 | ## Tokens
179 |
180 | See the [snapdragon-token][] documentation for more details.
181 |
182 | ## Plugins
183 |
184 | Plugins are registered with the `lexer.use()` method and use the following conventions.
185 |
186 | ### Plugin Conventions
187 |
188 | Plugins are functions that take an instance of snapdragon-lexer.
189 |
190 | However, it's recommended that you always wrap your plugin function in another function that takes an options object. This allow users to pass options when using the plugin. _Even if your plugin doesn't take options, it's a best practice for users to always be able to use the same signature_.
191 |
192 | **Example**
193 |
194 | ```js
195 | function plugin(options) {
196 | return function(lexer) {
197 | // do stuff
198 | };
199 | }
200 |
201 | lexer.use(plugin());
202 | ```
203 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Release history
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7 |
8 |
9 | Guiding Principles
10 |
11 | - Changelogs are for humans, not machines.
12 | - There should be an entry for every single version.
13 | - The same types of changes should be grouped.
14 | - Versions and sections should be linkable.
15 | - The latest version comes first.
16 | - The release date of each versions is displayed.
17 | - Mention whether you follow Semantic Versioning.
18 |
19 |
20 |
21 |
22 | Types of changes
23 |
24 | Changelog entries are classified using the following labels _(from [keep-a-changelog](http://keepachangelog.com/)_):
25 |
26 | - `Added` for new features.
27 | - `Changed` for changes in existing functionality.
28 | - `Deprecated` for soon-to-be removed features.
29 | - `Removed` for now removed features.
30 | - `Fixed` for any bug fixes.
31 | - `Security` in case of vulnerabilities.
32 |
33 |
34 |
35 |
36 | ## [3.0.0] - 2018-01-11
37 |
38 | ### Breaking changes
39 |
40 | - removed `lexer.last()`
41 | - bumped [snapdragon-stack](https://github.com/here-be/snapdragon-stack), which has replaced all getters with methods that must be called.
42 |
43 | ## [2.0.0] - 2018-01-08
44 |
45 | ### Breaking changes
46 |
47 | The following changes were made in an effort to make the API closer to other popular parsing libraries, such as babel and acorn.
48 |
49 | - Renamed `token.val` to `token.value`
50 | - `lexer.loc.column` was changed from a 1-index number to a 0-index number
51 | - `.current` is now a property set by the `.handle()` method. The value of `lexer.current` is whatever is returned by a handler.
52 | - `.prev()` now returns the previously lexed token
53 | - `.push()`
54 |
55 | ## Added
56 |
57 | - If `lexer.options.mode` is set to `character`, `lexer.advance()` will consume and return a single character each time it's called, instead of iterating over the handlers.
58 | - the `token.match` array is now decorated with a `.consumed` property, which is the value of `lexer.consumed` _before_ the match was created.
59 | - adds `lexer.stack` for tracking opening/closing structures
60 | - adds `lexer.stash` for storing an array of strings (in addition to `lexer.tokens`, which stores objects)
61 | - adds `.append`
62 | - adds `.skipWhile`
63 | - adds `.skipSpaces`
64 |
65 | ## [1.0.0] - 2017-11-30
66 |
67 | - run update
68 | - update code comments, add `.skipType` method
69 | - add examples
70 | - update metadata and urls
71 |
72 | [3.0.0]: https://github.com/here-be/snapdragon-lexer/compare/3.0.0...2.0.0
73 | [2.0.0]: https://github.com/here-be/snapdragon-lexer/compare/2.0.0...1.0.0
74 | [1.0.0]: https://github.com/here-be/snapdragon-lexer/compare/1.0.0...0.1.0
75 | [keep-a-changelog]: https://github.com/olivierlacan/keep-a-changelog
76 |
77 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2017-present, Jon Schlinkert.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # snapdragon-lexer [](https://www.npmjs.com/package/snapdragon-lexer) [](https://npmjs.org/package/snapdragon-lexer) [](https://npmjs.org/package/snapdragon-lexer) [](https://travis-ci.org/here-be/snapdragon-lexer)
2 |
3 | > Converts a string into an array of tokens, with useful methods for looking ahead and behind, capturing, matching, et cetera.
4 |
5 | Please consider following this project's author, [Jon Schlinkert](https://github.com/jonschlinkert), and consider starring the project to show your :heart: and support.
6 |
7 | ## Table of Contents
8 |
9 |
10 | Details
11 |
12 | - [Install](#install)
13 | - [Breaking changes in v2.0!](#breaking-changes-in-v20)
14 | - [Usage](#usage)
15 | - [API](#api)
16 | * [.set](#set)
17 | * [.get](#get)
18 | - [Properties](#properties)
19 | * [lexer.isLexer](#lexerislexer)
20 | * [lexer.input](#lexerinput)
21 | * [lexer.string](#lexerstring)
22 | * [lexer.consumed](#lexerconsumed)
23 | * [lexer.tokens](#lexertokens)
24 | * [lexer.stash](#lexerstash)
25 | * [lexer.stack](#lexerstack)
26 | * [lexer.queue](#lexerqueue)
27 | * [lexer.loc](#lexerloc)
28 | - [Options](#options)
29 | * [options.source](#optionssource)
30 | * [options.mode](#optionsmode)
31 | * [options.value](#optionsvalue)
32 | - [Tokens](#tokens)
33 | - [Plugins](#plugins)
34 | * [Plugin Conventions](#plugin-conventions)
35 | - [About](#about)
36 |
37 |
38 |
39 | ## Install
40 |
41 | Install with [npm](https://www.npmjs.com/):
42 |
43 | ```sh
44 | $ npm install --save snapdragon-lexer
45 | ```
46 |
47 | ## Breaking changes in v2.0!
48 |
49 | Please see the [changelog](CHANGELOG.md) for details!
50 |
51 | ## Usage
52 |
53 | ```js
54 | const Lexer = require('snapdragon-lexer');
55 | const lexer = new Lexer();
56 |
57 | lexer.capture('slash', /^\//);
58 | lexer.capture('text', /^\w+/);
59 | lexer.capture('star', /^\*/);
60 |
61 | console.log(lexer.tokenize('foo/*'));
62 | ```
63 |
64 | ## API
65 |
66 | ### [Lexer](index.js#L23)
67 |
68 | Create a new `Lexer` with the given `options`.
69 |
70 | **Params**
71 |
72 | * `input` **{string|Object}**: (optional) Input string or options. You can also set input directly on `lexer.input` after initializing.
73 | * `options` **{object}**
74 |
75 | **Example**
76 |
77 | ```js
78 | const Lexer = require('snapdragon-lexer');
79 | const lexer = new Lexer('foo/bar');
80 | ```
81 |
82 | ### [.bos](index.js#L53)
83 |
84 | Returns true if we are still at the beginning-of-string, and
85 | no part of the string has been consumed.
86 |
87 | * `returns` **{boolean}**
88 |
89 | ### [.eos](index.js#L65)
90 |
91 | Returns true if `lexer.string` and `lexer.queue` are empty.
92 |
93 | * `returns` **{boolean}**
94 |
95 | ### [.set](index.js#L83)
96 |
97 | Register a handler function.
98 |
99 | **Params**
100 |
101 | * `type` **{string}**
102 | * `fn` **{function}**: The handler function to register.
103 |
104 | **Example**
105 |
106 | ```js
107 | lexer.set('star', function() {
108 | // do parser, lexer, or compiler stuff
109 | });
110 | ```
111 |
112 | ### [.get](index.js#L119)
113 |
114 | Get a registered handler function.
115 |
116 | **Params**
117 |
118 | * `type` **{string}**
119 | * `fn` **{function}**: The handler function to register.
120 |
121 | **Example**
122 |
123 | ```js
124 | lexer.set('star', function() {
125 | // do lexer stuff
126 | });
127 | const star = lexer.get('star');
128 | ```
129 |
130 | ### [.has](index.js#L138)
131 |
132 | Returns true if the lexer has a registered handler of the given `type`.
133 |
134 | **Params**
135 |
136 | * **{string}**: type
137 | * `returns` **{boolean}**
138 |
139 | **Example**
140 |
141 | ```js
142 | lexer.set('star', function() {});
143 | console.log(lexer.has('star')); // true
144 | ```
145 |
146 | ### [.token](index.js#L159)
147 |
148 | Create a new [Token](https://github.com/here-be/snapdragon-token) with the given `type` and `value`.
149 |
150 | **Params**
151 |
152 | * `type` **{string|Object}**: (required) The type of token to create
153 | * `value` **{string}**: (optional) The captured string
154 | * `match` **{array}**: (optional) Match results from `String.match()` or `RegExp.exec()`
155 | * `returns` **{Object}**: Returns an instance of [snapdragon-token](https://github.com/here-be/snapdragon-token)
156 |
157 | **Events**
158 |
159 | * `emits`: token
160 |
161 | **Example**
162 |
163 | ```js
164 | console.log(lexer.token({type: 'star', value: '*'}));
165 | console.log(lexer.token('star', '*'));
166 | console.log(lexer.token('star'));
167 | ```
168 |
169 | ### [.isToken](index.js#L179)
170 |
171 | Returns true if the given value is a [snapdragon-token](https://github.com/here-be/snapdragon-token) instance.
172 |
173 | **Params**
174 |
175 | * `token` **{object}**
176 | * `returns` **{boolean}**
177 |
178 | **Example**
179 |
180 | ```js
181 | const Token = require('snapdragon-token');
182 | lexer.isToken({}); // false
183 | lexer.isToken(new Token({type: 'star', value: '*'})); // true
184 | ```
185 |
186 | ### [.consume](index.js#L198)
187 |
188 | Consume the given length from `lexer.string`. The consumed value is used to update `lexer.state.consumed`, as well as the current position.
189 |
190 | **Params**
191 |
192 | * `len` **{number}**
193 | * `value` **{string}**: Optionally pass the value being consumed.
194 | * `returns` **{String}**: Returns the consumed value
195 |
196 | **Example**
197 |
198 | ```js
199 | lexer.consume(1);
200 | lexer.consume(1, '*');
201 | ```
202 |
203 | Returns a function for updating a token with lexer
204 | location information.
205 |
206 | * `returns` **{function}**
207 |
208 | ### [.match](index.js#L255)
209 |
210 | Use the given `regex` to match a substring from `lexer.string`. Also validates the regex to ensure that it starts with `^` since matching should always be against the beginning of the string, and throws if the regex matches an empty string, which can cause catastrophic backtracking.
211 |
212 | **Params**
213 |
214 | * `regex` **{regExp}**: (required)
215 | * `returns` **{Array|null}**: Returns the match array from `RegExp.exec` or null.
216 |
217 | **Example**
218 |
219 | ```js
220 | const lexer = new Lexer('foo/bar');
221 | const match = lexer.match(/^\w+/);
222 | console.log(match);
223 | //=> [ 'foo', index: 0, input: 'foo/bar' ]
224 | ```
225 |
226 | ### [.scan](index.js#L301)
227 |
228 | Scan for a matching substring by calling [.match()](#match) with the given `regex`. If a match is found, 1) a token of the specified `type` is created, 2) `match[0]` is used as `token.value`, and 3) the length of `match[0]` is sliced from `lexer.string` (by calling [.consume()](#consume)).
229 |
230 | **Params**
231 |
232 | * `type` **{string}**
233 | * `regex` **{regExp}**
234 | * `returns` **{Object}**: Returns a token if a match is found, otherwise undefined.
235 |
236 | **Events**
237 |
238 | * `emits`: scan
239 |
240 | **Example**
241 |
242 | ```js
243 | lexer.string = '/foo/';
244 | console.log(lexer.scan(/^\//, 'slash'));
245 | //=> Token { type: 'slash', value: '/' }
246 | console.log(lexer.scan(/^\w+/, 'text'));
247 | //=> Token { type: 'text', value: 'foo' }
248 | console.log(lexer.scan(/^\//, 'slash'));
249 | //=> Token { type: 'slash', value: '/' }
250 | ```
251 |
252 | ### [.capture](index.js#L338)
253 |
254 | Capture a token of the specified `type` using the provide `regex` for scanning and matching substrings. Automatically registers a handler when a function is passed as the last argument.
255 |
256 | **Params**
257 |
258 | * `type` **{string}**: (required) The type of token being captured.
259 | * `regex` **{regExp}**: (required) The regex for matching substrings.
260 | * `fn` **{function}**: (optional) If supplied, the function will be called on the token before pushing it onto `lexer.tokens`.
261 | * `returns` **{Object}**
262 |
263 | **Example**
264 |
265 | ```js
266 | lexer.capture('text', /^\w+/);
267 | lexer.capture('text', /^\w+/, token => {
268 | if (token.value === 'foo') {
269 | // do stuff
270 | }
271 | return token;
272 | });
273 | ```
274 |
275 | ### [.handle](index.js#L370)
276 |
277 | Calls handler `type` on `lexer.string`.
278 |
279 | **Params**
280 |
281 | * `type` **{string}**: The handler type to call on `lexer.string`
282 | * `returns` **{Object}**: Returns a token of the given `type` or undefined.
283 |
284 | **Events**
285 |
286 | * `emits`: handle
287 |
288 | **Example**
289 |
290 | ```js
291 | const lexer = new Lexer('/a/b');
292 | lexer.capture('slash', /^\//);
293 | lexer.capture('text', /^\w+/);
294 | console.log(lexer.handle('text'));
295 | //=> undefined
296 | console.log(lexer.handle('slash'));
297 | //=> { type: 'slash', value: '/' }
298 | console.log(lexer.handle('text'));
299 | //=> { type: 'text', value: 'a' }
300 | ```
301 |
302 | ### [.advance](index.js#L393)
303 |
304 | Get the next token by iterating over `lexer.handlers` and calling each handler on `lexer.string` until a handler returns a token. If no handlers return a token, an error is thrown with the substring that couldn't be lexed.
305 |
306 | * `returns` **{Object}**: Returns the first token returned by a handler, or the first character in the remaining string if `options.mode` is set to `character`.
307 |
308 | **Example**
309 |
310 | ```js
311 | const token = lexer.advance();
312 | ```
313 |
314 | ### [.lex](index.js#L429)
315 |
316 | Tokenizes a string and returns an array of tokens.
317 |
318 | **Params**
319 |
320 | * `input` **{string}**: The string to lex.
321 | * `returns` **{Array}**: Returns an array of tokens.
322 |
323 | **Example**
324 |
325 | ```js
326 | let lexer = new Lexer({ handlers: otherLexer.handlers })
327 | lexer.capture('slash', /^\//);
328 | lexer.capture('text', /^\w+/);
329 | const tokens = lexer.lex('a/b/c');
330 | console.log(tokens);
331 | // Results in:
332 | // [ Token { type: 'text', value: 'a' },
333 | // Token { type: 'slash', value: '/' },
334 | // Token { type: 'text', value: 'b' },
335 | // Token { type: 'slash', value: '/' },
336 | // Token { type: 'text', value: 'c' } ]
337 | ```
338 |
339 | ### [.enqueue](index.js#L454)
340 |
341 | Push a token onto the `lexer.queue` array.
342 |
343 | **Params**
344 |
345 | * `token` **{object}**
346 | * `returns` **{Object}**: Returns the given token with updated `token.index`.
347 |
348 | **Example**
349 |
350 | ```js
351 | console.log(lexer.queue.length); // 0
352 | lexer.enqueue(new Token('star', '*'));
353 | console.log(lexer.queue.length); // 1
354 | ```
355 |
356 | ### [.dequeue](index.js#L472)
357 |
358 | Shift a token from `lexer.queue`.
359 |
360 | * `returns` **{Object}**: Returns the given token with updated `token.index`.
361 |
362 | **Example**
363 |
364 | ```js
365 | console.log(lexer.queue.length); // 1
366 | lexer.dequeue();
367 | console.log(lexer.queue.length); // 0
368 | ```
369 |
370 | ### [.lookbehind](index.js#L488)
371 |
372 | Lookbehind `n` tokens.
373 |
374 | **Params**
375 |
376 | * `n` **{number}**
377 | * `returns` **{Object}**
378 |
379 | **Example**
380 |
381 | ```js
382 | const token = lexer.lookbehind(2);
383 | ```
384 |
385 | ### [.prev](index.js#L504)
386 |
387 | Get the previously lexed token.
388 |
389 | * `returns` **{Object|undefined}**: Returns a token or undefined.
390 |
391 | **Example**
392 |
393 | ```js
394 | const token = lexer.prev();
395 | ```
396 |
397 | ### [.lookahead](index.js#L522)
398 |
399 | Lookahead `n` tokens and return the last token. Pushes any intermediate tokens onto `lexer.tokens.` To lookahead a single token, use [.peek()](#peek).
400 |
401 | **Params**
402 |
403 | * `n` **{number}**
404 | * `returns` **{Object}**
405 |
406 | **Example**
407 |
408 | ```js
409 | const token = lexer.lookahead(2);
410 | ```
411 |
412 | ### [.peek](index.js#L540)
413 |
414 | Lookahead a single token.
415 |
416 | * `returns` **{Object}**: Returns a token.
417 |
418 | **Example**
419 |
420 | ```js
421 | const token = lexer.peek();
422 | ```
423 |
424 | ### [.next](index.js#L555)
425 |
426 | Get the next token, either from the `queue` or by [advancing](#advance).
427 |
428 | * `returns` **{Object|String}**: Returns a token, or (when `options.mode` is set to `character`) either gets the next character from `lexer.queue`, or consumes the next charcter in the string.
429 |
430 | **Example**
431 |
432 | ```js
433 | const token = lexer.next();
434 | ```
435 |
436 | ### [.skip](index.js#L571)
437 |
438 | Skip `n` tokens or characters in the string. Skipped values are not enqueued.
439 |
440 | **Params**
441 |
442 | * `n` **{number}**
443 | * `returns` **{Object}**: returns an array of skipped tokens.
444 |
445 | **Example**
446 |
447 | ```js
448 | const token = lexer.skip(1);
449 | ```
450 |
451 | ### [.skipWhile](index.js#L588)
452 |
453 | Skip tokens while the given `fn` returns true.
454 |
455 | **Params**
456 |
457 | * `fn` **{function}**: Return true if a token should be skipped.
458 | * `returns` **{Array}**: Returns an array if skipped tokens.
459 |
460 | **Example**
461 |
462 | ```js
463 | lexer.skipWhile(tok => tok.type !== 'space');
464 | ```
465 |
466 | ### [.skipType](index.js#L606)
467 |
468 | Skip the given token `types`.
469 |
470 | **Params**
471 |
472 | * `types` **{string|Array}**: One or more token types to skip.
473 | * `returns` **{Array}**: Returns an array if skipped tokens.
474 |
475 | **Example**
476 |
477 | ```js
478 | lexer.skipWhile(tok => tok.type !== 'space');
479 | ```
480 |
481 | ### [.skipType](index.js#L623)
482 |
483 | Skip the given token `types`.
484 |
485 | **Params**
486 |
487 | * `types` **{string|Array}**: One or more token types to skip.
488 | * `returns` **{Array}**: Returns an array if skipped tokens
489 |
490 | **Example**
491 |
492 | ```js
493 | lexer.skipType('space');
494 | lexer.skipType(['newline', 'space']);
495 | ```
496 |
497 | ### [.push](index.js#L645)
498 |
499 | Pushes the given `token` onto `lexer.tokens` and calls [.append()](#append) to push `token.value` onto `lexer.stash`. Disable pushing onto the stash by setting `lexer.options.append` or `token.append` to `false`.
500 |
501 | **Params**
502 |
503 | * `token` **{object|String}**
504 | * `returns` **{Object}**: Returns the given `token`.
505 |
506 | **Events**
507 |
508 | * `emits`: push
509 |
510 | **Example**
511 |
512 | ```js
513 | console.log(lexer.tokens.length); // 0
514 | lexer.push(new Token('star', '*'));
515 | console.log(lexer.tokens.length); // 1
516 | console.log(lexer.stash) // ['*']
517 | ```
518 |
519 | ### [.append](index.js#L686)
520 |
521 | Append a string to the last element on `lexer.stash`, or push the string onto the stash if no elements exist.
522 |
523 | **Params**
524 |
525 | * `value` **{String}**
526 | * `returns` **{String}**: Returns the last value in the array.
527 |
528 | **Example**
529 |
530 | ```js
531 | const stack = new Stack();
532 | stack.push('a');
533 | stack.push('b');
534 | stack.push('c');
535 | stack.append('_foo');
536 | stack.append('_bar');
537 | console.log(stack);
538 | //=> Stack ['a', 'b', 'c_foo_bar']
539 | ```
540 |
541 | ### [.isInside](index.js#L712)
542 |
543 | Returns true if a token with the given `type` is on the stack.
544 |
545 | **Params**
546 |
547 | * `type` **{string}**: The type to check for.
548 | * `returns` **{boolean}**
549 |
550 | **Example**
551 |
552 | ```js
553 | if (lexer.isInside('bracket') || lexer.isInside('brace')) {
554 | // do stuff
555 | }
556 | ```
557 |
558 | ### [.error](index.js#L733)
559 |
560 | Throw a formatted error message with details including the cursor position.
561 |
562 | **Params**
563 |
564 | * `msg` **{string}**: Message to use in the Error.
565 | * `node` **{object}**
566 | * `returns` **{undefined}**
567 |
568 | **Example**
569 |
570 | ```js
571 | lexer.set('foo', function(tok) {
572 | if (tok.value !== 'foo') {
573 | throw this.state.error('expected token.value to be "foo"', tok);
574 | }
575 | });
576 | ```
577 |
578 | ### [.use](index.js#L774)
579 |
580 | Call a plugin function on the lexer instance.
581 |
582 | **Params**
583 |
584 | * `fn` **{function}**
585 | * `returns` **{object}**: Returns the lexer instance.
586 |
587 | **Example**
588 |
589 | ```js
590 | lexer.use(function(lexer) {
591 | // do stuff to lexer
592 | });
593 | ```
594 |
595 | ### [Lexer#isLexer](index.js#L796)
596 |
597 | Static method that returns true if the given value is an instance of `snapdragon-lexer`.
598 |
599 | **Params**
600 |
601 | * `lexer` **{object}**
602 | * `returns` **{Boolean}**
603 |
604 | **Example**
605 |
606 | ```js
607 | const Lexer = require('snapdragon-lexer');
608 | const lexer = new Lexer();
609 | console.log(Lexer.isLexer(lexer)); //=> true
610 | console.log(Lexer.isLexer({})); //=> false
611 | ```
612 |
613 | ### [Lexer#isToken](index.js#L817)
614 |
615 | Static method that returns true if the given value is an instance of `snapdragon-token`. This is a proxy to `Token#isToken`.
616 |
617 | **Params**
618 |
619 | * `lexer` **{object}**
620 | * `returns` **{Boolean}**
621 |
622 | **Example**
623 |
624 | ```js
625 | const Token = require('snapdragon-token');
626 | const Lexer = require('snapdragon-lexer');
627 | console.log(Lexer.isToken(new Token({type: 'foo'}))); //=> true
628 | console.log(Lexer.isToken({})); //=> false
629 | ```
630 |
631 | ### [Lexer#State](index.js#L828)
632 |
633 | The State class, exposed as a static property.
634 |
635 | ### [Lexer#Token](index.js#L839)
636 |
637 | The Token class, exposed as a static property.
638 |
639 | ### .set
640 |
641 | Register a handler function.
642 |
643 | **Params**
644 |
645 | * `type` **{String}**
646 | * `fn` **{Function}**: The handler function to register.
647 |
648 | **Example**
649 |
650 | ```js
651 | lexer.set('star', function(token) {
652 | // do parser, lexer, or compiler stuff
653 | });
654 | ```
655 |
656 | As an alternative to `.set`, the [.capture](#capture) method will automatically register a handler when a function is passed as the last argument.
657 |
658 | ### .get
659 |
660 | Get a registered handler function.
661 |
662 | **Params**
663 |
664 | * `type` **{String}**
665 | * `fn` **{Function}**: The handler function to register.
666 |
667 | **Example**
668 |
669 | ```js
670 | lexer.set('star', function() {
671 | // do parser, lexer, or compiler stuff
672 | });
673 | const star = handlers.get('star');
674 | ```
675 |
676 | ## Properties
677 |
678 | ### lexer.isLexer
679 |
680 | Type: **{boolean}**
681 |
682 | Default: `true` (contant)
683 |
684 | This property is defined as a convenience, to make it easy for plugins to check for an instance of Lexer.
685 |
686 | ### lexer.input
687 |
688 | Type: **{string}**
689 |
690 | Default: `''`
691 |
692 | The unmodified source string provided by the user.
693 |
694 | ### lexer.string
695 |
696 | Type: **{string}**
697 |
698 | Default: `''`
699 |
700 | The source string minus the part of the string that has already been [consumed](#consume).
701 |
702 | ### lexer.consumed
703 |
704 | Type: **{string}**
705 |
706 | Default: `''`
707 |
708 | The part of the source string that has been consumed.
709 |
710 | ### lexer.tokens
711 |
712 | Type: **{array}**
713 |
714 | Default: `[]
715 |
716 | Array of lexed tokens.
717 |
718 | ### lexer.stash
719 |
720 | Type: **{array}**
721 |
722 | Default: `['']` (instance of [snapdragon-stack](https://github.com/here-be/snapdragon-stack))
723 |
724 | Array of captured strings. Similar to the [lexer.tokens](#lexertokens) array, but stores strings instead of token objects.
725 |
726 | ### lexer.stack
727 |
728 | Type: **{array}**
729 |
730 | Default: `[]
731 |
732 | LIFO (last in, first out) array. A token is pushed onto the stack when an "opening" character or character sequence needs to be tracked. When the (matching) "closing" character or character sequence is encountered, the (opening) token is popped off of the stack.
733 |
734 | The stack is not used by any lexer methods, it's reserved for the user. Stacks are necessary for creating Abstract Syntax Trees (ASTs), but if you require this functionality it would be better to use a parser such as [snapdragon-parser][snapdragon-parser], with methods and other conveniences for creating an AST.
735 |
736 | ### lexer.queue
737 |
738 | Type: **{array}**
739 |
740 | Default: `[]
741 |
742 | FIFO (first in, first out) array, for temporarily storing tokens that are created when [.lookahead()](#lookahead) is called (or a method that calls `.lookhead()`, such as [.peek()](#peek)).
743 |
744 | Tokens are [dequeued](#dequeue) when [.next()](#next) is called.
745 |
746 | ### lexer.loc
747 |
748 | Type: **{Object}**
749 |
750 | Default: `{ index: 0, column: 0, line: 1 }`
751 |
752 | The updated source string location with the following properties.
753 |
754 | * `index` - 0-index
755 | * `column` - 0-index
756 | * `line` - 1-index
757 |
758 | The following plugins are available for automatically updating tokens with the location:
759 |
760 | * [snapdragon-location](https://github.com/here-be/snapdragon-location)
761 | * [snapdragon-position](https://github.com/here-be/snapdragon-position)
762 |
763 | ## Options
764 |
765 | ### options.source
766 |
767 | Type: **{string}**
768 |
769 | Default: `undefined`
770 |
771 | The source of the input string. This is typically a filename or file path, but can also be `'string'` if a string or buffer is provided directly.
772 |
773 | If `lexer.input` is undefined, and `options.source` is a string, the lexer will attempt to set `lexer.input` by calling `fs.readFileSync()` on the value provided on `options.source`.
774 |
775 | ### options.mode
776 |
777 | Type: **{string}**
778 |
779 | Default: `undefined`
780 |
781 | If `options.mode` is `character`, instead of calling handlers (which match using regex) the [.advance()](advance) method will [consume](#consume) and return one character at a time.
782 |
783 | ### options.value
784 |
785 | Type: **{string}**
786 |
787 | Default: `undefined`
788 |
789 | Specify the token property to use when the [.push](#push) method pushes a value onto [lexer.stash](#lexerstash). The logic works something like this:
790 |
791 | ```js
792 | lexer.append(token[lexer.options.value || 'value']);
793 | ```
794 |
795 | ## Tokens
796 |
797 | See the [snapdragon-token](https://github.com/here-be/snapdragon-token) documentation for more details.
798 |
799 | ## Plugins
800 |
801 | Plugins are registered with the `lexer.use()` method and use the following conventions.
802 |
803 | ### Plugin Conventions
804 |
805 | Plugins are functions that take an instance of snapdragon-lexer.
806 |
807 | However, it's recommended that you always wrap your plugin function in another function that takes an options object. This allow users to pass options when using the plugin. _Even if your plugin doesn't take options, it's a best practice for users to always be able to use the same signature_.
808 |
809 | **Example**
810 |
811 | ```js
812 | function plugin(options) {
813 | return function(lexer) {
814 | // do stuff
815 | };
816 | }
817 |
818 | lexer.use(plugin());
819 | ```
820 |
821 | ## About
822 |
823 |
824 | Contributing
825 |
826 | Pull requests and stars are always welcome. For bugs and feature requests, [please create an issue](../../issues/new).
827 |
828 | Please read the [contributing guide](.github/contributing.md) for advice on opening issues, pull requests, and coding standards.
829 |
830 |
831 |
832 |
833 | Running Tests
834 |
835 | Running and reviewing unit tests is a great way to get familiarized with a library and its API. You can install dependencies and run tests with the following command:
836 |
837 | ```sh
838 | $ npm install && npm test
839 | ```
840 |
841 |
842 |
843 |
844 | Building docs
845 |
846 | _(This project's readme.md is generated by [verb](https://github.com/verbose/verb-generate-readme), please don't edit the readme directly. Any changes to the readme must be made in the [.verb.md](.verb.md) readme template.)_
847 |
848 | To generate the readme, run the following command:
849 |
850 | ```sh
851 | $ npm install -g verbose/verb#dev verb-generate-readme && verb
852 | ```
853 |
854 |
855 |
856 | ### Related projects
857 |
858 | You might also be interested in these projects:
859 |
860 | * [snapdragon-scanner](https://www.npmjs.com/package/snapdragon-scanner): Easily scan a string with an object of regex patterns to produce an array of… [more](https://github.com/here-be/snapdragon-scanner) | [homepage](https://github.com/here-be/snapdragon-scanner "Easily scan a string with an object of regex patterns to produce an array of tokens. ~100 sloc.")
861 |
862 | ### Author
863 |
864 | **Jon Schlinkert**
865 |
866 | * [GitHub Profile](https://github.com/jonschlinkert)
867 | * [Twitter Profile](https://twitter.com/jonschlinkert)
868 | * [LinkedIn Profile](https://linkedin.com/in/jonschlinkert)
869 |
870 | ### License
871 |
872 | Copyright © 2018, [Jon Schlinkert](https://github.com/jonschlinkert).
873 | Released under the [MIT License](LICENSE).
874 |
875 | ***
876 |
877 | _This file was generated by [verb-generate-readme](https://github.com/verbose/verb-generate-readme), v0.8.0, on November 19, 2018._
--------------------------------------------------------------------------------
/examples/ast.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Example of creating an AST
3 | */
4 |
5 | const ast = { type: 'root', nodes: [] };
6 | const stack = [ast];
7 |
8 | const Tokenizer = require('..');
9 | const tokenizer = new Tokenizer()
10 | .capture('text', /^\w+/)
11 | .capture('brace.open', /^\{/, tok => {
12 | // create a node to hold the contents of our brace pattern
13 | const brace = { type: 'brace', nodes: [tok] };
14 |
15 | // push the "brace" node onto the nodes array
16 | // of the last node on the stack
17 | stack[stack.length - 1].nodes.push(brace);
18 |
19 | // next, we also need to push the brace itself onto the stack
20 | // so that nodes can be pushed onto brace.nodes until
21 | // we get to the closing (right) brace
22 | stack.push(brace);
23 |
24 | // return the token to push it onto `tokenizer.tokens`
25 | return tok;
26 | })
27 | .capture('brace.close', /^\}/, tok => {
28 | // get the parent "brace" node by popping it from the stack
29 | const brace = stack.pop();
30 |
31 | // push the closing brace onto brace.nodes
32 | brace.nodes.push(tok);
33 |
34 | // return the token to push it onto `tokenizer.tokens`
35 | return tok;
36 | })
37 | .capture('comma', /^,/)
38 | .capture('slash', /^\//)
39 | .capture('star', /^\*/)
40 | .capture('dot', /^\./)
41 | .on('token', token => {
42 | // push all non-brace tokens onto the nodes array of
43 | // the "current" node on the stack (since we already
44 | // handle brace nodes above, to ensure they are pushed
45 | // on in the correct order)
46 | if (token.type.slice(0, 5) !== 'brace') {
47 | stack[stack.length - 1].nodes.push(token);
48 | }
49 | });
50 |
51 | tokenizer.tokenize('{foo,bar,{baz,qux}}/*.txt');
52 | console.log(JSON.stringify(ast, null, 2));
53 |
54 | /**
55 | * Results in an AST that looks something like this:
56 | */
57 |
58 | // var res = {
59 | // type: 'root',
60 | // nodes: [
61 | // {
62 | // type: 'brace',
63 | // nodes: [
64 | // {
65 | // type: 'brace.open',
66 | // val: '{'
67 | // },
68 | // {
69 | // type: 'text',
70 | // val: 'foo'
71 | // },
72 | // {
73 | // type: 'comma',
74 | // val: ','
75 | // },
76 | // {
77 | // type: 'text',
78 | // val: 'bar'
79 | // },
80 | // {
81 | // type: 'comma',
82 | // val: ','
83 | // },
84 | // {
85 | // type: 'brace',
86 | // nodes: [
87 | // {
88 | // type: 'brace.open',
89 | // val: '{'
90 | // },
91 | // {
92 | // type: 'text',
93 | // val: 'baz'
94 | // },
95 | // {
96 | // type: 'comma',
97 | // val: ','
98 | // },
99 | // {
100 | // type: 'text',
101 | // val: 'qux'
102 | // },
103 | // {
104 | // type: 'brace.close',
105 | // val: '}'
106 | // }
107 | // ]
108 | // },
109 | // {
110 | // type: 'brace.close',
111 | // val: '}'
112 | // }
113 | // ]
114 | // },
115 | // {
116 | // type: 'slash',
117 | // val: '/'
118 | // },
119 | // {
120 | // type: 'star',
121 | // val: '*'
122 | // },
123 | // {
124 | // type: 'dot',
125 |
126 | // val: '.'
127 | // },
128 | // {
129 | // type: 'text',
130 | // val: 'txt'
131 | // }
132 | // ]
133 | // };
134 |
--------------------------------------------------------------------------------
/examples/lex.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Example of using the .tokenize method
3 | */
4 |
5 | const Tokenizer = require('..');
6 | const tokenizer = new Tokenizer()
7 | .capture('space', /^ +/)
8 | .capture('equal', /^=/)
9 | .capture('text', /^\w+/)
10 | .capture('quote_single', /^"/)
11 | .capture('quote_double', /^'/)
12 | .capture('semi', /^;/)
13 |
14 | const tokens = tokenizer.lex('const foo = "bar";');
15 | console.log(tokens);
16 |
--------------------------------------------------------------------------------
/examples/match.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Example of using the .match method
3 | */
4 |
5 | const Tokenizer = require('..');
6 | const tokenizer = new Tokenizer('foo/bar');
7 | tokenizer.use(position());
8 |
9 | const match = tokenizer.match(/^\w+/);
10 | const tok = tokenizer.token('text', match[0], match);
11 | console.log(tok);
12 |
--------------------------------------------------------------------------------
/examples/skipType.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Example of using the .skipType method
3 | */
4 |
5 | const Tokenizer = require('..');
6 | const tokenizer = new Tokenizer('foo/*');
7 |
8 | tokenizer.capture('slash', /^\//);
9 | tokenizer.capture('text', /^\w+/);
10 | tokenizer.capture('star', /^\*/);
11 |
12 | tokenizer.skipType(['slash', 'text']);
13 | console.log(tokenizer);
14 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const Events = require('events');
4 | const assert = require('assert');
5 | const State = require('./lib/state');
6 | const Token = require('./lib/token');
7 | const Location = require('./lib/location');
8 | const { Position } = Location;
9 |
10 | /**
11 | * Create a new `Lexer` with the given `options`.
12 | *
13 | * ```js
14 | * const Lexer = require('snapdragon-lexer');
15 | * const lexer = new Lexer('foo/bar');
16 | * ```
17 | * @name Lexer
18 | * @param {string|Object} `input` (optional) Input string or options. You can also set input directly on `lexer.input` after initializing.
19 | * @param {object} `options`
20 | * @api public
21 | */
22 |
23 | class Lexer extends Events {
24 | constructor(input, options = {}) {
25 | super();
26 |
27 | if (typeof input !== 'string') {
28 | options = input || {};
29 | input = '';
30 | }
31 |
32 | this.options = { ...options };
33 | this.handlers = new Map();
34 | this.types = new Set();
35 | this.state = new State(input);
36 |
37 | if (options.handlers) {
38 | for (const [type, handler] of options.handlers) {
39 | this.handler(type, handler);
40 | }
41 | }
42 | }
43 |
44 | /**
45 | * Returns true if we are still at the beginning-of-string, and
46 | * no part of the string has been consumed.
47 | *
48 | * @name .bos
49 | * @return {boolean}
50 | * @api public
51 | */
52 |
53 | bos() {
54 | return !this.state.consumed;
55 | }
56 |
57 | /**
58 | * Returns true if `lexer.string` and `lexer.queue` are empty.
59 | *
60 | * @name .eos
61 | * @return {boolean}
62 | * @api public
63 | */
64 |
65 | eos() {
66 | return this.state.string === '' && this.state.queue.length === 0;
67 | }
68 |
69 | /**
70 | * Register a handler function.
71 | *
72 | * ```js
73 | * lexer.set('star', function() {
74 | * // do parser, lexer, or compiler stuff
75 | * });
76 | * ```
77 | * @name .set
78 | * @param {string} `type`
79 | * @param {function} `fn` The handler function to register.
80 | * @api public
81 | */
82 |
83 | set(type, handler = tok => tok) {
84 | this.types.add(type);
85 | const lexer = this;
86 | // can't do fat arrow here, we need to ensure that the handler
87 | // context is always correct whether handlers are called directly
88 | // or re-registered on a new instance, etc.
89 | this.handlers.set(type, function(...args) {
90 | let ctx = this || lexer;
91 | let loc = ctx.location();
92 | let tok = handler.call(ctx, ...args);
93 | if (tok && isObject(tok) && !Token.isToken(tok)) {
94 | tok = ctx.token(tok);
95 | }
96 | if (Token.isToken(tok) && !tok.type) {
97 | tok.type = type;
98 | }
99 | return Token.isToken(tok) ? loc(tok) : tok;
100 | });
101 | return this;
102 | }
103 |
104 | /**
105 | * Get a registered handler function.
106 | *
107 | * ```js
108 | * lexer.set('star', function() {
109 | * // do lexer stuff
110 | * });
111 | * const star = lexer.get('star');
112 | * ```
113 | * @name .get
114 | * @param {string} `type`
115 | * @param {function} `fn` The handler function to register.
116 | * @api public
117 | */
118 |
119 | get(type) {
120 | let handler = this.handlers.get(type) || this.handlers.get('unknown');
121 | assert(handler, `expected handler "${type}" to be a function`);
122 | return handler;
123 | }
124 |
125 | /**
126 | * Returns true if the lexer has a registered handler of the given `type`.
127 | *
128 | * ```js
129 | * lexer.set('star', function() {});
130 | * console.log(lexer.has('star')); // true
131 | * ```
132 | * @name .has
133 | * @param {string} type
134 | * @return {boolean}
135 | * @api public
136 | */
137 |
138 | has(type) {
139 | return this.handlers.has(type);
140 | }
141 |
142 | /**
143 | * Create a new [Token][snapdragon-token] with the given `type` and `value`.
144 | *
145 | * ```js
146 | * console.log(lexer.token({type: 'star', value: '*'}));
147 | * console.log(lexer.token('star', '*'));
148 | * console.log(lexer.token('star'));
149 | * ```
150 | * @name .token
151 | * @emits token
152 | * @param {string|Object} `type` (required) The type of token to create
153 | * @param {string} `value` (optional) The captured string
154 | * @param {array} `match` (optional) Match results from `String.match()` or `RegExp.exec()`
155 | * @return {Object} Returns an instance of [snapdragon-token][]
156 | * @api public
157 | */
158 |
159 | token(type, value, match) {
160 | let token = new Token(type, value, match);
161 | this.emit('token', token);
162 | return token;
163 | }
164 |
165 | /**
166 | * Returns true if the given value is a [snapdragon-token][] instance.
167 | *
168 | * ```js
169 | * const Token = require('snapdragon-token');
170 | * lexer.isToken({}); // false
171 | * lexer.isToken(new Token({type: 'star', value: '*'})); // true
172 | * ```
173 | * @name .isToken
174 | * @param {object} `token`
175 | * @return {boolean}
176 | * @api public
177 | */
178 |
179 | isToken(token) {
180 | return Token.isToken(token);
181 | }
182 |
183 | /**
184 | * Consume the given length from `lexer.string`. The consumed value is used
185 | * to update `lexer.state.consumed`, as well as the current position.
186 | *
187 | * ```js
188 | * lexer.consume(1);
189 | * lexer.consume(1, '*');
190 | * ```
191 | * @name .consume
192 | * @param {number} `len`
193 | * @param {string} `value` Optionally pass the value being consumed.
194 | * @return {String} Returns the consumed value
195 | * @api public
196 | */
197 |
198 | consume(len, value = this.state.string.slice(0, len)) {
199 | this.state.consumed += value;
200 | this.state.string = this.state.string.slice(len);
201 | this.updateLocation(value, len);
202 | return value;
203 | }
204 |
205 | /**
206 | * Update column and line number based on `value`.
207 | *
208 | * @param {string} `value`
209 | * @return {undefined}
210 | */
211 |
212 | updateLocation(value, len = value.length) {
213 | let i = value.lastIndexOf('\n');
214 | this.state.loc.column = ~i ? len - i : this.state.loc.column + len;
215 | this.state.loc.line += Math.max(0, value.split('\n').length - 1);
216 | this.state.loc.index += len;
217 | }
218 |
219 | /**
220 | * Returns a function for updating a token with lexer
221 | * location information.
222 | *
223 | * @return {function}
224 | * @api public
225 | */
226 |
227 | location() {
228 | let start = new Position(this);
229 |
230 | return token => {
231 | let end = new Position(this);
232 | define(token, 'loc', new Location(start, end, this));
233 | return token;
234 | };
235 | }
236 |
237 | /**
238 | * Use the given `regex` to match a substring from `lexer.string`. Also validates
239 | * the regex to ensure that it starts with `^` since matching should always be
240 | * against the beginning of the string, and throws if the regex matches an empty
241 | * string, which can cause catastrophic backtracking.
242 | *
243 | * ```js
244 | * const lexer = new Lexer('foo/bar');
245 | * const match = lexer.match(/^\w+/);
246 | * console.log(match);
247 | * //=> [ 'foo', index: 0, input: 'foo/bar' ]
248 | * ```
249 | * @name .match
250 | * @param {regExp} `regex` (required)
251 | * @return {Array|null} Returns the match array from `RegExp.exec` or null.
252 | * @api public
253 | */
254 |
255 | match(regex) {
256 | assert(regex instanceof RegExp, 'expected a regular expression');
257 |
258 | if (regex.validated !== true) {
259 | assert(regex.source[0] === '^', 'expected regex to start with "^"');
260 | regex.validated = true;
261 | }
262 |
263 | let consumed = this.state.consumed;
264 | let match = regex.exec(this.state.string);
265 | if (!match) return null;
266 |
267 | if (match[0] === '') {
268 | throw new SyntaxError('regex should not match an empty string');
269 | }
270 |
271 | this.emit('match', match);
272 | define(match, 'consumed', consumed);
273 | this.consume(match[0].length, match[0]);
274 | return match;
275 | }
276 |
277 | /**
278 | * Scan for a matching substring by calling [.match()](#match)
279 | * with the given `regex`. If a match is found, 1) a token of the
280 | * specified `type` is created, 2) `match[0]` is used as `token.value`,
281 | * and 3) the length of `match[0]` is sliced from `lexer.string`
282 | * (by calling [.consume()](#consume)).
283 | *
284 | * ```js
285 | * lexer.string = '/foo/';
286 | * console.log(lexer.scan(/^\//, 'slash'));
287 | * //=> Token { type: 'slash', value: '/' }
288 | * console.log(lexer.scan(/^\w+/, 'text'));
289 | * //=> Token { type: 'text', value: 'foo' }
290 | * console.log(lexer.scan(/^\//, 'slash'));
291 | * //=> Token { type: 'slash', value: '/' }
292 | * ```
293 | * @name .scan
294 | * @emits scan
295 | * @param {string} `type`
296 | * @param {regExp} `regex`
297 | * @return {Object} Returns a token if a match is found, otherwise undefined.
298 | * @api public
299 | */
300 |
301 | scan(regex, type) {
302 | try {
303 | let match = this.match(regex);
304 | if (match) {
305 | let tok = this.token(type, match[0], match);
306 | this.emit('scan', tok);
307 | return tok;
308 | }
309 | } catch (err) {
310 | err.regex = regex;
311 | err.type = type;
312 | throw err;
313 | }
314 | }
315 |
316 | /**
317 | * Capture a token of the specified `type` using the provide `regex`
318 | * for scanning and matching substrings. Automatically registers a handler
319 | * when a function is passed as the last argument.
320 | *
321 | * ```js
322 | * lexer.capture('text', /^\w+/);
323 | * lexer.capture('text', /^\w+/, token => {
324 | * if (token.value === 'foo') {
325 | * // do stuff
326 | * }
327 | * return token;
328 | * });
329 | * ```
330 | * @name .capture
331 | * @param {string} `type` (required) The type of token being captured.
332 | * @param {regExp} `regex` (required) The regex for matching substrings.
333 | * @param {function} `fn` (optional) If supplied, the function will be called on the token before pushing it onto `lexer.tokens`.
334 | * @return {Object}
335 | * @api public
336 | */
337 |
338 | capture(type, regex, fn) {
339 | let handler = function() {
340 | let token = this.scan(regex, type);
341 | if (token) {
342 | return fn ? fn.call(this, token) : token;
343 | }
344 | };
345 | this.set(type, handler);
346 | return this;
347 | }
348 |
349 | /**
350 | * Calls handler `type` on `lexer.string`.
351 | *
352 | * ```js
353 | * const lexer = new Lexer('/a/b');
354 | * lexer.capture('slash', /^\//);
355 | * lexer.capture('text', /^\w+/);
356 | * console.log(lexer.handle('text'));
357 | * //=> undefined
358 | * console.log(lexer.handle('slash'));
359 | * //=> { type: 'slash', value: '/' }
360 | * console.log(lexer.handle('text'));
361 | * //=> { type: 'text', value: 'a' }
362 | * ```
363 | * @name .handle
364 | * @emits handle
365 | * @param {string} `type` The handler type to call on `lexer.string`
366 | * @return {Object} Returns a token of the given `type` or undefined.
367 | * @api public
368 | */
369 |
370 | handle(type) {
371 | let token = this.get(type).call(this);
372 | if (token) {
373 | this.current = token;
374 | this.emit('handle', token);
375 | return token;
376 | }
377 | }
378 |
379 | /**
380 | * Get the next token by iterating over `lexer.handlers` and
381 | * calling each handler on `lexer.string` until a handler returns
382 | * a token. If no handlers return a token, an error is thrown
383 | * with the substring that couldn't be lexed.
384 | *
385 | * ```js
386 | * const token = lexer.advance();
387 | * ```
388 | * @name .advance
389 | * @return {Object} Returns the first token returned by a handler, or the first character in the remaining string if `options.mode` is set to `character`.
390 | * @api public
391 | */
392 |
393 | advance() {
394 | if (this.eos()) return;
395 | if (this.options.mode === 'character') {
396 | return (this.current = this.consume(1));
397 | }
398 | for (let type of this.types) {
399 | let token = this.handle(type);
400 | if (token) {
401 | return token;
402 | }
403 | }
404 | this.fail();
405 | }
406 |
407 | /**
408 | * Tokenizes a string and returns an array of tokens.
409 | *
410 | * ```js
411 | * let lexer = new Lexer({ handlers: otherLexer.handlers })
412 | * lexer.capture('slash', /^\//);
413 | * lexer.capture('text', /^\w+/);
414 | * const tokens = lexer.lex('a/b/c');
415 | * console.log(tokens);
416 | * // Results in:
417 | * // [ Token { type: 'text', value: 'a' },
418 | * // Token { type: 'slash', value: '/' },
419 | * // Token { type: 'text', value: 'b' },
420 | * // Token { type: 'slash', value: '/' },
421 | * // Token { type: 'text', value: 'c' } ]
422 | * ```
423 | * @name .lex
424 | * @param {string} `input` The string to lex.
425 | * @return {Array} Returns an array of tokens.
426 | * @api public
427 | */
428 |
429 | lex(input, options) {
430 | if (options) this.options = { ...options };
431 | if (input) this.state = new State(input);
432 | while (this.push(this.next()));
433 | return this.state.tokens;
434 | }
435 |
436 | tokenize(...args) {
437 | return this.lex(...args);
438 | }
439 |
440 | /**
441 | * Push a token onto the `lexer.queue` array.
442 | *
443 | * ```js
444 | * console.log(lexer.queue.length); // 0
445 | * lexer.enqueue(new Token('star', '*'));
446 | * console.log(lexer.queue.length); // 1
447 | * ```
448 | * @name .enqueue
449 | * @param {object} `token`
450 | * @return {Object} Returns the given token with updated `token.index`.
451 | * @api public
452 | */
453 |
454 | enqueue(token) {
455 | token && this.state.queue.push(token);
456 | return token;
457 | }
458 |
459 | /**
460 | * Shift a token from `lexer.queue`.
461 | *
462 | * ```js
463 | * console.log(lexer.queue.length); // 1
464 | * lexer.dequeue();
465 | * console.log(lexer.queue.length); // 0
466 | * ```
467 | * @name .dequeue
468 | * @return {Object} Returns the given token with updated `token.index`.
469 | * @api public
470 | */
471 |
472 | dequeue() {
473 | return this.state.queue.length && this.state.queue.shift();
474 | }
475 |
476 | /**
477 | * Lookbehind `n` tokens.
478 | *
479 | * ```js
480 | * const token = lexer.lookbehind(2);
481 | * ```
482 | * @name .lookbehind
483 | * @param {number} `n`
484 | * @return {Object}
485 | * @api public
486 | */
487 |
488 | lookbehind(n) {
489 | assert(Number.isInteger(n), 'expected a positive integer');
490 | return this.state.tokens[this.state.tokens.length - n];
491 | }
492 |
493 | /**
494 | * Get the previously lexed token.
495 | *
496 | * ```js
497 | * const token = lexer.prev();
498 | * ```
499 | * @name .prev
500 | * @returns {Object|undefined} Returns a token or undefined.
501 | * @api public
502 | */
503 |
504 | prev() {
505 | return this.lookbehind(1);
506 | }
507 |
508 | /**
509 | * Lookahead `n` tokens and return the last token. Pushes any
510 | * intermediate tokens onto `lexer.tokens.` To lookahead a single
511 | * token, use [.peek()](#peek).
512 | *
513 | * ```js
514 | * const token = lexer.lookahead(2);
515 | * ```
516 | * @name .lookahead
517 | * @param {number} `n`
518 | * @return {Object}
519 | * @api public
520 | */
521 |
522 | lookahead(n) {
523 | assert(Number.isInteger(n), 'expected a positive integer');
524 | let fetch = n - this.state.queue.length;
525 | while (fetch-- > 0 && this.enqueue(this.advance()));
526 | return this.state.queue[--n];
527 | }
528 |
529 | /**
530 | * Lookahead a single token.
531 | *
532 | * ```js
533 | * const token = lexer.peek();
534 | * ```
535 | * @name .peek
536 | * @return {Object} Returns a token.
537 | * @api public
538 | */
539 |
540 | peek() {
541 | return this.lookahead(1);
542 | }
543 |
544 | /**
545 | * Get the next token, either from the `queue` or by [advancing](#advance).
546 | *
547 | * ```js
548 | * const token = lexer.next();
549 | * ```
550 | * @name .next
551 | * @returns {Object|String} Returns a token, or (when `options.mode` is set to `character`) either gets the next character from `lexer.queue`, or consumes the next charcter in the string.
552 | * @api public
553 | */
554 |
555 | next() {
556 | return this.dequeue() || this.advance();
557 | }
558 |
559 | /**
560 | * Skip `n` tokens or characters in the string. Skipped values are not enqueued.
561 | *
562 | * ```js
563 | * const token = lexer.skip(1);
564 | * ```
565 | * @name .skip
566 | * @param {number} `n`
567 | * @returns {Object} returns an array of skipped tokens.
568 | * @api public
569 | */
570 |
571 | skip(n) {
572 | assert.equal(typeof n, 'number', 'expected a number');
573 | return this.skipWhile(() => n-- > 0);
574 | }
575 |
576 | /**
577 | * Skip tokens while the given `fn` returns true.
578 | *
579 | * ```js
580 | * lexer.skipWhile(tok => tok.type !== 'space');
581 | * ```
582 | * @name .skipWhile
583 | * @param {function} `fn` Return true if a token should be skipped.
584 | * @returns {Array} Returns an array if skipped tokens.
585 | * @api public
586 | */
587 |
588 | skipWhile(fn = !this.eos()) {
589 | const skipped = [];
590 | while (fn.call(this, this.peek())) skipped.push(this.next());
591 | return skipped;
592 | }
593 |
594 | /**
595 | * Skip the given token `types`.
596 | *
597 | * ```js
598 | * lexer.skipWhile(tok => tok.type !== 'space');
599 | * ```
600 | * @name .skipType
601 | * @param {string|Array} `types` One or more token types to skip.
602 | * @returns {Array} Returns an array if skipped tokens.
603 | * @api public
604 | */
605 |
606 | skipTo(type) {
607 | return this.skipWhile(tok => tok && tok.type !== type).concat(this.next());
608 | }
609 |
610 | /**
611 | * Skip the given token `types`.
612 | *
613 | * ```js
614 | * lexer.skipType('space');
615 | * lexer.skipType(['newline', 'space']);
616 | * ```
617 | * @name .skipType
618 | * @param {string|Array} `types` One or more token types to skip.
619 | * @returns {Array} Returns an array if skipped tokens
620 | * @api public
621 | */
622 |
623 | skipType(types) {
624 | return this.skipWhile(tok => [].concat(types).includes(tok.type));
625 | }
626 |
627 | /**
628 | * Pushes the given `token` onto `lexer.tokens` and calls [.append()](#append) to push
629 | * `token.value` onto `lexer.stash`. Disable pushing onto the stash by setting
630 | * `lexer.options.append` or `token.append` to `false`.
631 | *
632 | * ```js
633 | * console.log(lexer.tokens.length); // 0
634 | * lexer.push(new Token('star', '*'));
635 | * console.log(lexer.tokens.length); // 1
636 | * console.log(lexer.stash) // ['*']
637 | * ```
638 | * @name .push
639 | * @emits push
640 | * @param {object|String} `token`
641 | * @return {Object} Returns the given `token`.
642 | * @api public
643 | */
644 |
645 | push(token) {
646 | if (!token && token !== '') return;
647 | if (this.options.mode !== 'character') {
648 | assert(this.isToken(token), 'expected token to be an instance of Token');
649 | }
650 |
651 | this.emit('push', token);
652 | this.state.tokens.push(token);
653 |
654 | if (this.options.stash === false || token.stash === false) {
655 | return token;
656 | }
657 |
658 | if (this.options.mode === 'character') {
659 | this.append(token);
660 | } else {
661 | this.append(token.value);
662 | }
663 | return token;
664 | }
665 |
666 | /**
667 | * Append a string to the last element on `lexer.stash`, or push the
668 | * string onto the stash if no elements exist.
669 | *
670 | * ```js
671 | * const stack = new Stack();
672 | * stack.push('a');
673 | * stack.push('b');
674 | * stack.push('c');
675 | * stack.append('_foo');
676 | * stack.append('_bar');
677 | * console.log(stack);
678 | * //=> Stack ['a', 'b', 'c_foo_bar']
679 | * ```
680 | * @name .append
681 | * @param {String} `value`
682 | * @return {String} Returns the last value in the array.
683 | * @api public
684 | */
685 |
686 | append(value) {
687 | if (typeof value !== 'string') return;
688 | let n = this.state.stash.length - 1;
689 | if (this.state.stash[n] === '') {
690 | this.state.stash[n] += value;
691 | } else {
692 | this.state.stash.push(value);
693 | }
694 | this.emit('append', value);
695 | return this;
696 | }
697 |
698 | /**
699 | * Returns true if a token with the given `type` is on the stack.
700 | *
701 | * ```js
702 | * if (lexer.isInside('bracket') || lexer.isInside('brace')) {
703 | * // do stuff
704 | * }
705 | * ```
706 | * @name .isInside
707 | * @param {string} `type` The type to check for.
708 | * @return {boolean}
709 | * @api public
710 | */
711 |
712 | isInside(type) {
713 | return this.state.stack.some(tok => tok.type === type);
714 | }
715 |
716 | /**
717 | * Throw a formatted error message with details including the cursor position.
718 | *
719 | * ```js
720 | * lexer.set('foo', function(tok) {
721 | * if (tok.value !== 'foo') {
722 | * throw this.state.error('expected token.value to be "foo"', tok);
723 | * }
724 | * });
725 | * ```
726 | * @name .error
727 | * @param {string} `msg` Message to use in the Error.
728 | * @param {object} `node`
729 | * @return {undefined}
730 | * @api public
731 | */
732 |
733 | error(err) {
734 | if (typeof err === 'string') err = new Error(err);
735 | if (this.listenerCount('error') > 0) {
736 | this.emit('error', err);
737 | } else {
738 | throw err;
739 | }
740 | }
741 |
742 | /**
743 | * Throw an error if `lexer.stack` is not empty, or when the remaining string
744 | * cannot be lexed by the currently registered handlers.
745 | * @api private
746 | */
747 |
748 | fail() {
749 | let token = this.state.stack.pop();
750 | if (token) {
751 | const match = token && token.match;
752 | const value = match ? match[0] : token[this.options.value || 'value'];
753 | throw new Error(`unclosed: "${value}"`);
754 | }
755 | if (this.state.string) {
756 | throw new Error(`unmatched input: "${this.state.string.slice(0, 10)}"`);
757 | }
758 | }
759 |
760 | /**
761 | * Call a plugin function on the lexer instance.
762 | *
763 | * ```js
764 | * lexer.use(function(lexer) {
765 | * // do stuff to lexer
766 | * });
767 | * ```
768 | * @name .use
769 | * @param {function} `fn`
770 | * @return {object} Returns the lexer instance.
771 | * @api public
772 | */
773 |
774 | use(fn) {
775 | fn.call(this, this);
776 | return this;
777 | }
778 |
779 | /**
780 | * Static method that returns true if the given value is an
781 | * instance of `snapdragon-lexer`.
782 | *
783 | * ```js
784 | * const Lexer = require('snapdragon-lexer');
785 | * const lexer = new Lexer();
786 | * console.log(Lexer.isLexer(lexer)); //=> true
787 | * console.log(Lexer.isLexer({})); //=> false
788 | * ```
789 | * @name Lexer#isLexer
790 | * @param {object} `lexer`
791 | * @returns {Boolean}
792 | * @api public
793 | * @static
794 | */
795 |
796 | static isLexer(lexer) {
797 | return lexer instanceof Lexer;
798 | }
799 |
800 | /**
801 | * Static method that returns true if the given value is an
802 | * instance of `snapdragon-token`. This is a proxy to `Token#isToken`.
803 | *
804 | * ```js
805 | * const Token = require('snapdragon-token');
806 | * const Lexer = require('snapdragon-lexer');
807 | * console.log(Lexer.isToken(new Token({type: 'foo'}))); //=> true
808 | * console.log(Lexer.isToken({})); //=> false
809 | * ```
810 | * @name Lexer#isToken
811 | * @param {object} `lexer`
812 | * @returns {Boolean}
813 | * @api public
814 | * @static
815 | */
816 |
817 | static isToken(token) {
818 | return token instanceof Token;
819 | }
820 |
821 | /**
822 | * The State class, exposed as a static property.
823 | * @name Lexer#State
824 | * @api public
825 | * @static
826 | */
827 |
828 | static get State() {
829 | return State;
830 | }
831 |
832 | /**
833 | * The Token class, exposed as a static property.
834 | * @name Lexer#Token
835 | * @api public
836 | * @static
837 | */
838 |
839 | static get Token() {
840 | return Token;
841 | }
842 | }
843 |
844 | /**
845 | * Returns true if value is an object
846 | */
847 |
848 | function isObject(val) {
849 | return val && typeof val === 'object' && !Array.isArray(val);
850 | }
851 |
852 | /**
853 | * Define a non-enumerable property on `obj`
854 | */
855 |
856 | function define(obj, key, value) {
857 | Reflect.defineProperty(obj, key, { value });
858 | }
859 |
860 | /**
861 | * Expose `Lexer`
862 | * @type {Class}
863 | */
864 |
865 | module.exports = Lexer;
866 |
--------------------------------------------------------------------------------
/lib/location.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | class Position {
4 | constructor(lexer) {
5 | this.index = lexer.state.loc.index;
6 | this.column = lexer.state.loc.column;
7 | this.line = lexer.state.loc.line;
8 | }
9 | }
10 |
11 | class Location {
12 | constructor(start, end, lexer) {
13 | this.start = start;
14 | this.end = end;
15 | this.source = lexer.options.source;
16 | }
17 | get range() {
18 | return [this.start.index, this.end.index];
19 | }
20 | static get Position() {
21 | return Position;
22 | }
23 | }
24 |
25 | module.exports = Location;
26 |
--------------------------------------------------------------------------------
/lib/state.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | module.exports = class State {
4 | constructor(input) {
5 | this.indent = [''];
6 | this.queue = [];
7 | this.stack = [];
8 | this.stash = [''];
9 | this.output = [''];
10 | this.tokens = [];
11 | this.input = input; // unmodified user-defined input string
12 | this.string = input; // input string, minus consumed
13 | this.consumed = ''; // consumed part of the input string
14 |
15 | this.loc = {
16 | index: 0,
17 | column: 0,
18 | line: 1
19 | };
20 | }
21 | };
22 |
--------------------------------------------------------------------------------
/lib/token.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const isObject = val => val && typeof val === 'object' && !Array.isArray(val);
4 |
5 | class Token {
6 | constructor(type, value, match) {
7 | if (Array.isArray(value)) {
8 | match = value;
9 | value = match[1] || match[0];
10 | }
11 |
12 | if (isObject(type)) {
13 | Object.assign(this, type);
14 | } else {
15 | this.type = type;
16 | this.value = value;
17 | }
18 |
19 | Reflect.defineProperty(this, 'match', {
20 | value: this.match || match
21 | });
22 | }
23 |
24 | static isToken(val) {
25 | return val instanceof this;
26 | }
27 | }
28 |
29 | module.exports = Token;
30 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "snapdragon-lexer",
3 | "description": "Converts a string into an array of tokens, with useful methods for looking ahead and behind, capturing, matching, et cetera.",
4 | "version": "4.0.0",
5 | "homepage": "https://github.com/here-be/snapdragon-lexer",
6 | "author": "Jon Schlinkert (https://github.com/jonschlinkert)",
7 | "repository": "here-be/snapdragon-lexer",
8 | "bugs": {
9 | "url": "https://github.com/here-be/snapdragon-lexer/issues"
10 | },
11 | "license": "MIT",
12 | "files": [
13 | "index.js",
14 | "lib"
15 | ],
16 | "main": "index.js",
17 | "engines": {
18 | "node": ">=8"
19 | },
20 | "scripts": {
21 | "test": "mocha",
22 | "cover": "nyc --reporter=text --reporter=html mocha"
23 | },
24 | "devDependencies": {
25 | "define-property": "^2.0.2",
26 | "gulp-format-md": "^2.0.0",
27 | "mocha": "^5.2.0",
28 | "nyc": "^13.1.0"
29 | },
30 | "keywords": [
31 | "compile",
32 | "compiler",
33 | "convert",
34 | "lexer",
35 | "parse",
36 | "parser",
37 | "render",
38 | "scan",
39 | "scanner",
40 | "snapdragon",
41 | "token",
42 | "tokenize",
43 | "tokenizer",
44 | "transform"
45 | ],
46 | "verb": {
47 | "toc": "collapsible",
48 | "layout": "default",
49 | "tasks": [
50 | "readme"
51 | ],
52 | "plugins": [
53 | "gulp-format-md"
54 | ],
55 | "lint": {
56 | "reflinks": true
57 | },
58 | "related": {
59 | "list": [
60 | "snapdragon-parser",
61 | "snapdragon-scanner"
62 | ]
63 | },
64 | "reflinks": [
65 | "snapdragon-location",
66 | "snapdragon-parser",
67 | "snapdragon-position",
68 | "snapdragon-token"
69 | ]
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/test/api.advance.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.advance', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should advance to the next match and return a token', () => {
14 | lexer.capture('slash', /^\//);
15 | lexer.capture('text', /^\w+/);
16 | lexer.capture('star', /^\*/);
17 | lexer.state.string = 'foo/*';
18 |
19 | const tok = lexer.advance();
20 | assert.equal(tok.value, 'foo');
21 | });
22 |
23 | it('should consume the matched substring', () => {
24 | lexer.capture('slash', /^\//);
25 | lexer.capture('text', /^\w+/);
26 | lexer.capture('star', /^\*/);
27 | lexer.state.string = 'foo/*';
28 | lexer.advance();
29 |
30 | assert.equal(lexer.state.consumed, 'foo');
31 | assert.equal(lexer.state.string, '/*');
32 | });
33 |
34 | it('should fail when a match is not found', () => {
35 | lexer.state.string = 'foo/*';
36 | assert.throws(() => lexer.advance(), /unmatched/);
37 | });
38 |
39 | it('should advance in character mode', () => {
40 | lexer.options.mode = 'character';
41 | lexer.state.string = 'foo/*';
42 |
43 | assert.equal(lexer.advance(), 'f');
44 | assert.equal(lexer.advance(), 'o');
45 | assert.equal(lexer.advance(), 'o');
46 | assert.equal(lexer.advance(), '/');
47 | assert.equal(lexer.advance(), '*');
48 | });
49 | });
50 |
--------------------------------------------------------------------------------
/test/api.append.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.append', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should do nothing when the value is not a string', () => {
14 | lexer.append();
15 | lexer.append(null);
16 | lexer.append(false);
17 | assert.deepEqual(lexer.state.stash, ['']);
18 | });
19 |
20 | it('should not push empty strings onto the stash', () => {
21 | lexer.append('');
22 | lexer.append('');
23 | lexer.append('');
24 | assert.deepEqual(lexer.state.stash, ['']);
25 | });
26 |
27 | it('should append non-empty strings to the last value on the stash', () => {
28 | lexer.append('foo');
29 | lexer.append('');
30 | lexer.append('/');
31 | lexer.append('');
32 | lexer.append('*');
33 | lexer.append('.');
34 | lexer.append('js');
35 | assert.deepEqual(lexer.state.stash, ['foo', '/', '*', '.', 'js']);
36 | });
37 | });
38 |
--------------------------------------------------------------------------------
/test/api.bos.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.bos', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should be true when lexer.state.string is empty', () => {
14 | assert(lexer.bos());
15 | });
16 |
17 | it('should be false when lexer.state.string is not empty', () => {
18 | lexer.state.string = 'foo';
19 | assert(lexer.bos());
20 | });
21 | });
22 |
--------------------------------------------------------------------------------
/test/api.capture.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.capture', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should register a handler with type and regex only', () => {
14 | lexer.capture('text', /^\w+/);
15 | lexer.capture('star', /^\*/);
16 |
17 | assert.equal(typeof lexer.handlers.get('text'), 'function');
18 | assert.equal(typeof lexer.handlers.get('star'), 'function');
19 | });
20 |
21 | it('should register a handler with type, regex and handler function', () => {
22 | lexer.capture('text', /^\w+/, () => {});
23 | lexer.capture('star', /^\*/, () => {});
24 |
25 | assert.equal(typeof lexer.handlers.get('text'), 'function');
26 | assert.equal(typeof lexer.handlers.get('star'), 'function');
27 | });
28 |
29 | it('should expose the captured token to the given function', () => {
30 | let count = 0;
31 | lexer.capture('dot', /^\./, function(tok) {
32 | assert.equal(tok.type, 'dot');
33 | assert.equal(tok.value, '.');
34 | count++;
35 | return tok;
36 | });
37 |
38 | lexer.lex('...');
39 | assert.equal(count, 3);
40 | assert.equal(lexer.state.tokens.length, 3);
41 | });
42 |
43 | it('should expose the match on the token', () => {
44 | let count = 0;
45 | lexer.capture('dot', /^\.([a-z])\./, function(tok) {
46 | assert.equal(tok.match[0], '.a.');
47 | assert.equal(tok.match[1], 'a');
48 | count++;
49 | return tok;
50 | });
51 |
52 | lexer.lex('.a.');
53 | assert.equal(count, 1);
54 | assert.equal(lexer.state.tokens.length, 1);
55 | });
56 |
57 | it('should not call the function unless the regex matches', () => {
58 | let count = 0;
59 | lexer.capture('text', /^\w/);
60 | lexer.capture('dot', /^\./, function(tok) {
61 | count++;
62 | return tok;
63 | });
64 |
65 | lexer.lex('.a.b.');
66 | assert.equal(count, 3);
67 | assert.equal(lexer.state.tokens.length, 5);
68 | });
69 |
70 | it('should expose the lexer instance to handler', () => {
71 | let count = 0;
72 | lexer.capture('dot', /^\./, function(tok) {
73 | assert(Array.isArray(this.state.tokens));
74 | assert.equal(this.state.tokens.length, count);
75 | count++;
76 | return tok;
77 | });
78 |
79 | lexer.lex('.....');
80 | assert.equal(count, 5);
81 | assert.equal(lexer.state.tokens.length, 5);
82 | });
83 |
84 | it('should expose the lexer instance to handler', () => {
85 | let count = 0;
86 | lexer.capture('word', /^([a-y])/);
87 | lexer.capture('z', /^(z)/);
88 | lexer.capture('slash', /^(\/)/, function(tok) {
89 | if (tok) {
90 | assert(Array.isArray(this.state.tokens));
91 | count++;
92 | return tok;
93 | }
94 | });
95 |
96 | lexer.lex('a/z');
97 | assert.equal(count, 1);
98 | });
99 | });
100 |
--------------------------------------------------------------------------------
/test/api.consume.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.consume', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer('abcdefghi');
11 | });
12 |
13 | it('should remove the given length from lexer.state.string', () => {
14 | lexer.consume(1);
15 | assert.equal(lexer.state.string, 'bcdefghi');
16 | lexer.consume(3);
17 | assert.equal(lexer.state.string, 'efghi');
18 | lexer.consume(3);
19 | assert.equal(lexer.state.string, 'hi');
20 | });
21 | });
22 |
--------------------------------------------------------------------------------
/test/api.current.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.current', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | lexer.capture('dot', /^\./);
12 | lexer.capture('star', /^\*/);
13 | lexer.capture('slash', /^\//);
14 | lexer.capture('text', /^\w+/);
15 | lexer.state.string = '//foo/bar.com';
16 | });
17 |
18 | it('should get the previous token', () => {
19 | lexer.tokenize('//foo/bar.com');
20 | var text = lexer.current;
21 | assert(text);
22 | assert.equal(text.type, 'text');
23 | assert.equal(text.value, 'com');
24 | });
25 | });
26 |
--------------------------------------------------------------------------------
/test/api.eos.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.eos', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should be true when lexer.state.string is empty', () => {
14 | assert(lexer.eos());
15 | });
16 |
17 | it('should be false when lexer.state.string is not empty', () => {
18 | lexer.state.string = 'foo';
19 | assert(!lexer.eos());
20 | });
21 | });
22 |
--------------------------------------------------------------------------------
/test/api.error.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.error', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should throw an error', () => {
14 | assert.throws(() => {
15 | lexer.error(new Error('foo'));
16 | }, /foo/);
17 | });
18 |
19 | it('should convert a string to an error', () => {
20 | assert.throws(() => {
21 | lexer.error('foo');
22 | }, /foo/);
23 | });
24 |
25 | it('should emit an error', function(cb) {
26 | lexer.on('error', () => cb());
27 | lexer.error(new Error('foo'));
28 | });
29 |
30 | it('should not throw an error when listening for error', function(cb) {
31 | lexer.on('error', () => cb());
32 | assert.doesNotThrow(() => {
33 | lexer.error(new Error('foo'));
34 | });
35 | });
36 | });
37 |
--------------------------------------------------------------------------------
/test/api.fail.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.fail', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should fail when lexer.state.stack is not empty', () => {
14 | lexer.state.stack.push(lexer.token('brace.open', '{'));
15 | assert.throws(() => lexer.fail(), /unclosed: "\{"/);
16 | });
17 |
18 | it('should show token.match[0] in error message, when defined', () => {
19 | lexer.state.stack.push(lexer.token('brace.open', '{', ['{']));
20 | assert.throws(() => lexer.fail(), /unclosed: "\{"/);
21 | });
22 |
23 | it('should fail when lexer.state.string is not empty', () => {
24 | lexer.state.string = 'foo';
25 | assert.throws(() => lexer.fail(), /unmatched input: "foo"/);
26 | });
27 |
28 | it('should not fail when lexer.state.string is empty', () => {
29 | lexer.state.string = '';
30 | assert.doesNotThrow(() => lexer.fail());
31 | });
32 | });
33 |
--------------------------------------------------------------------------------
/test/api.handle.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const define = require('define-property');
6 | const Lexer = require('..');
7 | let lexer;
8 |
9 | describe('api.handle', () => {
10 | beforeEach(() => {
11 | lexer = new Lexer('//foo/bar.com');
12 | lexer.on('token', tok => define(tok, 'match', tok.match));
13 | lexer.capture('slash', /^\//);
14 | lexer.capture('text', /^\w+/);
15 | lexer.capture('dot', /^\./);
16 | });
17 |
18 | it('should return undefined if the handler does not match a substring', () => {
19 | assert.equal(typeof lexer.handle('text'), 'undefined');
20 | });
21 |
22 | it('should return a token if the handler matches a substring', () => {
23 | assert.deepEqual(lexer.handle('slash'), { type: 'slash', value: '/' });
24 | });
25 |
26 | it('should return a string when options.mode is "character"', () => {
27 | lexer = new Lexer('abcd', { mode: 'character' });
28 | lexer.lex();
29 | assert.deepEqual(lexer.state.tokens, ['a', 'b', 'c', 'd']);
30 | });
31 |
32 | it('should update lexer.state.string', () => {
33 | assert.equal(lexer.state.string, '//foo/bar.com');
34 | assert.deepEqual(lexer.handle('slash'), { type: 'slash', value: '/' });
35 |
36 | assert.equal(lexer.state.string, '/foo/bar.com');
37 | assert.deepEqual(lexer.handle('slash'), { type: 'slash', value: '/' });
38 |
39 | assert.equal(lexer.state.string, 'foo/bar.com');
40 | assert.deepEqual(lexer.handle('text'), { type: 'text', value: 'foo' });
41 |
42 | assert.equal(lexer.state.string, '/bar.com');
43 | assert.deepEqual(lexer.handle('slash'), { type: 'slash', value: '/' });
44 |
45 | assert.equal(lexer.state.string, 'bar.com');
46 | assert.deepEqual(lexer.handle('text'), { type: 'text', value: 'bar' });
47 |
48 | assert.equal(lexer.state.string, '.com');
49 | assert.deepEqual(lexer.handle('dot'), { type: 'dot', value: '.' });
50 |
51 | assert.equal(lexer.state.string, 'com');
52 | assert.deepEqual(lexer.handle('text'), { type: 'text', value: 'com' });
53 |
54 | assert.equal(lexer.state.string, '');
55 | });
56 | });
57 |
--------------------------------------------------------------------------------
/test/api.handlers.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('lexer.handlers', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | describe('.set', () => {
14 | it('should register handlers on the lexer.handlers object', () => {
15 | lexer.set('word', () => {});
16 | lexer.set('star', () => {});
17 |
18 | assert.equal(typeof lexer.handlers.get('word'), 'function');
19 | assert.equal(typeof lexer.handlers.get('star'), 'function');
20 | });
21 |
22 | it('should expose the lexer instance to registered handler', () => {
23 | var count = 0;
24 |
25 | lexer.set('word', function() {
26 | count++;
27 | assert(lexer === this, 'expected "this" to be an instance of Lexer');
28 | });
29 |
30 | lexer.handlers.get('word')();
31 | assert.equal(count, 1);
32 | });
33 | });
34 |
35 | describe('.get', () => {
36 | it('should get registered handlers from lexer.handlers', () => {
37 | lexer.set('word', () => {});
38 | lexer.set('star', () => {});
39 |
40 | assert.equal(typeof lexer.get('word'), 'function');
41 | assert.equal(typeof lexer.get('star'), 'function');
42 | });
43 |
44 | it('should throw an error when getting an unregistered handler', () => {
45 | assert.throws(() => {
46 | lexer.get('flfofofofofofo');
47 | });
48 | });
49 | });
50 |
51 | describe('.has', () => {
52 | it('should be true when a handler is registered', () => {
53 | lexer.set('word', () => {});
54 | assert(lexer.has('word'));
55 | });
56 |
57 | it('should be false when a handler is not registered', () => {
58 | assert(!lexer.has('slsllslsls'));
59 | });
60 | });
61 | });
62 |
--------------------------------------------------------------------------------
/test/api.integration.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.integration', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should register a handler when a function is not passed', () => {
14 | lexer.capture('text', /^\w+/);
15 | lexer.capture('star', /^\*/);
16 |
17 | assert.equal(typeof lexer.handlers.get('text'), 'function');
18 | assert.equal(typeof lexer.handlers.get('star'), 'function');
19 | });
20 |
21 | it('should register a handler when a function is passed', () => {
22 | lexer.capture('text', /^\w+/, () => {});
23 | lexer.capture('star', /^\*/, () => {});
24 |
25 | assert.equal(typeof lexer.handlers.get('text'), 'function');
26 | assert.equal(typeof lexer.handlers.get('star'), 'function');
27 | });
28 |
29 | it('should expose the captured token to the given function', () => {
30 | var count = 0;
31 | lexer.capture('dot', /^\./, function(tok) {
32 | assert.equal(tok.type, 'dot');
33 | assert.equal(tok.value, '.');
34 | count++;
35 | return tok;
36 | });
37 |
38 | lexer.tokenize('...');
39 | assert.equal(count, 3);
40 | assert.equal(lexer.state.tokens.length, 3);
41 | });
42 |
43 | it('should expose the match on the token', () => {
44 | var count = 0;
45 | lexer.capture('dot', /^\.([a-z])\./, function(tok) {
46 | assert.equal(tok.match[0], '.a.');
47 | assert.equal(tok.match[1], 'a');
48 | count++;
49 | return tok;
50 | });
51 |
52 | lexer.tokenize('.a.');
53 | assert.equal(count, 1);
54 | assert.equal(lexer.state.tokens.length, 1);
55 | });
56 |
57 | it('should not call the function unless the regex matches', () => {
58 | var count = 0;
59 | lexer.capture('text', /^\w/);
60 | lexer.capture('dot', /^\./, function(tok) {
61 | count++;
62 | return tok;
63 | });
64 |
65 | lexer.tokenize('.a.b.');
66 | assert.equal(count, 3);
67 | assert.equal(lexer.state.tokens.length, 5);
68 | });
69 |
70 | it('should expose the lexer instance to handler', () => {
71 | var count = 0;
72 | lexer.capture('dot', /^\./, function(tok) {
73 | assert(Array.isArray(this.state.tokens));
74 | assert.equal(this.state.tokens.length, count);
75 | count++;
76 | return tok;
77 | });
78 |
79 | lexer.tokenize('.....');
80 | assert.equal(count, 5);
81 | assert.equal(lexer.state.tokens.length, 5);
82 | });
83 |
84 | it('should expose the lexer instance to handler', () => {
85 | var count = 0;
86 | lexer.capture('word', /^([a-y])/);
87 | lexer.capture('z', /^(z)/);
88 | lexer.capture('slash', /^(\/)/, function(tok) {
89 | if (tok) {
90 | assert(Array.isArray(this.state.tokens));
91 | count++;
92 | return tok;
93 | }
94 | });
95 |
96 | lexer.tokenize('a/z');
97 | assert.equal(count, 1);
98 | });
99 |
100 | it('should expose the lexer instance to handler', () => {
101 | var count = 0;
102 |
103 | lexer.set('word', function() {
104 | return this.scan(/^(\w)/, 'word');
105 | });
106 |
107 | lexer.set('slash', function() {
108 | var tok = this.scan(/^(\/)/, 'slash');
109 | if (tok) {
110 | var next = this.peek();
111 | if (next && next.type === 'word') {
112 | count++;
113 | }
114 | return tok;
115 | }
116 | });
117 |
118 | lexer.tokenize('a/b/c/d/e/');
119 | assert.equal(lexer.state.tokens.length, 10);
120 | assert.equal(count, 4);
121 | });
122 | });
123 |
--------------------------------------------------------------------------------
/test/api.isInside.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.isInside', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should be true when type matches the last token on lexer.state.stack', () => {
14 | lexer.state.stack.push(lexer.token('foo'));
15 | assert(lexer.isInside('foo'));
16 | });
17 |
18 | it('should be false when type does not match last token on lexer.state.stack', () => {
19 | lexer.state.stack.push(lexer.token('foo'));
20 | assert(!lexer.isInside('bar'));
21 | });
22 | });
23 |
--------------------------------------------------------------------------------
/test/api.lex.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Token = require('../lib/token');
6 | const Lexer = require('..');
7 | let lexer;
8 |
9 | describe('lexer.lex', () => {
10 | beforeEach(() => {
11 | lexer = new Lexer();
12 | lexer.isLexer = true;
13 | lexer.capture('text', /^\w/);
14 | });
15 |
16 | it('should lex the given string', () => {
17 | const tokens = lexer.lex('abc');
18 | assert(Array.isArray(tokens));
19 | assert.equal(tokens.length, 3);
20 | });
21 |
22 | it('should lex the string passed to the constructor', () => {
23 | lexer = new Lexer('abcd');
24 | lexer.capture('text', /^\w/);
25 | assert.equal(lexer.state.input, 'abcd');
26 |
27 | const tokens = lexer.lex();
28 | assert(Array.isArray(tokens));
29 | assert(tokens.length, 4);
30 | });
31 |
32 | it('should patch token with .loc', () => {
33 | const tokens = lexer.lex('abc');
34 | assert(Array.isArray(tokens));
35 | assert(tokens.length);
36 |
37 | const tok = tokens[0];
38 | assert.equal(tok.type, 'text');
39 |
40 | assert(tok.loc);
41 | assert(tok.loc.start);
42 | assert(tok.loc.end);
43 |
44 | // a
45 | assert.equal(tok.loc.start.line, 1);
46 | assert.equal(tok.loc.start.column, 0);
47 | assert.equal(tok.loc.end.line, 1);
48 | assert.equal(tok.loc.end.column, 1);
49 |
50 | // b
51 | assert.equal(tokens[1].loc.start.line, 1);
52 | assert.equal(tokens[1].loc.start.column, 1);
53 | assert.equal(tokens[1].loc.end.line, 1);
54 | assert.equal(tokens[1].loc.end.column, 2);
55 |
56 | // c
57 | assert.equal(tokens[2].loc.start.line, 1);
58 | assert.equal(tokens[2].loc.start.column, 2);
59 | assert.equal(tokens[2].loc.end.line, 1);
60 | assert.equal(tokens[2].loc.end.column, 3);
61 | });
62 |
63 | it('should create a new Token with the given loc', () => {
64 | const loc = lexer.location();
65 | const token = loc(new Token());
66 |
67 | assert(token.loc);
68 | assert(token.loc.start);
69 | assert(token.loc.start.line);
70 | assert.equal(token.loc.start.column, 0);
71 |
72 | assert(token.loc.end);
73 | assert(token.loc.end.line);
74 | assert.equal(token.loc.end.column, 0);
75 | });
76 |
77 | it('should set/get location range', () => {
78 | lexer = new Lexer('foo/**/*.js');
79 | lexer.capture('stars', /^\*+/);
80 | lexer.capture('slash', /^\//);
81 | lexer.capture('dot', /^\./);
82 | lexer.capture('text', /^\w+/);
83 | lexer.lex();
84 |
85 | const stars = lexer.state.tokens.find(tok => tok.type === 'stars');
86 | assert.deepEqual(stars.loc.range, [4, 6]);
87 | });
88 |
89 | it('should create a new Token with the given loc and type', () => {
90 | const loc = lexer.location();
91 | const token = loc(new Token('*'));
92 |
93 | assert.equal(token.type, '*');
94 |
95 | assert(token.loc);
96 | assert(token.loc.start);
97 | assert(token.loc.start.line);
98 | assert.equal(token.loc.start.column, 0);
99 |
100 | assert(token.loc.end);
101 | assert(token.loc.end.line);
102 | assert.equal(token.loc.end.column, 0);
103 | });
104 |
105 | it('should create a new Token with the given loc, type, and val', () => {
106 | const loc = lexer.location();
107 | const token = loc(new Token('star', '*'));
108 |
109 | assert.equal(token.type, 'star');
110 | assert.equal(token.value, '*');
111 |
112 | assert(token.loc);
113 | assert(token.loc.start);
114 | assert(token.loc.start.line);
115 | assert.equal(token.loc.start.column, 0);
116 |
117 | assert(token.loc.end);
118 | assert(token.loc.end.line);
119 | assert.equal(token.loc.end.column, 0);
120 | });
121 |
122 | it('should create a new Token with the given loc and object', () => {
123 | const loc = lexer.location();
124 | const token = loc(new Token('star', '*'));
125 |
126 | assert.equal(token.value, '*');
127 | assert.equal(token.type, 'star');
128 |
129 | assert(token.loc);
130 | assert(token.loc.start);
131 | assert(token.loc.start.line);
132 | assert.equal(token.loc.start.column, 0);
133 |
134 | assert(token.loc.end);
135 | assert(token.loc.end.line);
136 | assert.equal(token.loc.end.column, 0);
137 | });
138 |
139 | it('should patch line number onto token.loc', () => {
140 | lexer.capture('slash', /^\//);
141 | lexer.capture('star', /^\*/);
142 | lexer.capture('text', /^\w+/);
143 | lexer.capture('dot', /^\./);
144 | lexer.capture('newline', /^\n/);
145 |
146 | lexer.lex('abc\nmno\nxyx');
147 |
148 | const tokens = lexer.state.tokens;
149 | assert.equal(tokens[0].type, 'text');
150 | assert.equal(tokens[1].type, 'newline');
151 | assert.equal(tokens[2].type, 'text');
152 |
153 | assert.deepEqual(tokens[0].loc, {
154 | source: undefined,
155 | start: {
156 | index: 0,
157 | column: 0,
158 | line: 1
159 | },
160 | end: {
161 | index: 3,
162 | column: 3,
163 | line: 1
164 | }
165 | });
166 |
167 | assert.deepEqual(tokens[1].loc, {
168 | source: undefined,
169 | start: {
170 | index: 3,
171 | column: 3,
172 | line: 1
173 | },
174 | end: {
175 | index: 4,
176 | column: 1,
177 | line: 2
178 | }
179 | });
180 |
181 | assert.deepEqual(tokens[2].loc, {
182 | source: undefined,
183 | start: {
184 | index: 4,
185 | column: 1,
186 | line: 2
187 | },
188 | end: {
189 | index: 7,
190 | column: 4,
191 | line: 2
192 | }
193 | });
194 |
195 | assert.deepEqual(tokens[3].loc, {
196 | source: undefined,
197 | start: {
198 | index: 7,
199 | column: 4,
200 | line: 2
201 | },
202 | end: {
203 | index: 8,
204 | column: 1,
205 | line: 3
206 | }
207 | });
208 |
209 | assert.deepEqual(tokens[4].loc, {
210 | source: undefined,
211 | start: {
212 | index: 8,
213 | column: 1,
214 | line: 3
215 | },
216 | end: {
217 | index: 11,
218 | column: 4,
219 | line: 3
220 | }
221 | });
222 | });
223 | });
224 |
--------------------------------------------------------------------------------
/test/api.lookahead.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.lookahead', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | lexer.capture('dot', /^\./);
12 | lexer.capture('star', /^\*/);
13 | lexer.capture('slash', /^\//);
14 | lexer.capture('text', /^\w+/);
15 | lexer.state.string = '//foo/bar.com';
16 | });
17 |
18 | it('should throw an error when the first argument is not a number', () => {
19 | assert.throws(() => lexer.lookahead(), /expected/);
20 | });
21 |
22 | it('should get the next "n" tokens and return the last one', () => {
23 | var tok = lexer.lookahead(3);
24 | assert(tok);
25 | assert.equal(tok.type, 'text');
26 | assert.equal(tok.value, 'foo');
27 | });
28 |
29 | it('should consume the captured substring', () => {
30 | lexer.lookahead(3);
31 | assert.equal(lexer.state.consumed, '//foo');
32 | });
33 |
34 | it('should add the captured tokens to lexer.state.queue', () => {
35 | var tok = lexer.lookahead(3);
36 | assert.equal(lexer.state.queue.length, 3);
37 | assert.equal(lexer.state.queue[2], tok);
38 | });
39 |
40 | it('should use enqueued tokens before capturing more', () => {
41 | lexer.lookahead(3);
42 | assert.equal(lexer.state.consumed, '//foo');
43 | assert.equal(lexer.state.queue.length, 3);
44 |
45 | lexer.lookahead(4);
46 | assert.equal(lexer.state.consumed, '//foo/');
47 | assert.equal(lexer.state.queue.length, 4);
48 | });
49 |
50 | it('should get the next token when lexer.state.queue is empty', () => {
51 | lexer.lookahead(1);
52 | assert.equal(lexer.state.consumed, '/');
53 | assert.equal(lexer.state.queue.length, 1);
54 | lexer.state.queue = [];
55 |
56 | lexer.lookahead(1);
57 | assert.equal(lexer.state.consumed, '//');
58 | assert.equal(lexer.state.queue.length, 1);
59 | lexer.state.queue = [];
60 |
61 | lexer.lookahead(1);
62 | assert.equal(lexer.state.consumed, '//foo');
63 | assert.equal(lexer.state.queue.length, 1);
64 | lexer.state.queue = [];
65 |
66 | lexer.lookahead(1);
67 | assert.equal(lexer.state.consumed, '//foo/');
68 | assert.equal(lexer.state.queue.length, 1);
69 | lexer.state.queue = [];
70 | });
71 | });
72 |
--------------------------------------------------------------------------------
/test/api.lookbehind.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.lookbehind', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | lexer.capture('dot', /^\./);
12 | lexer.capture('star', /^\*/);
13 | lexer.capture('slash', /^\//);
14 | lexer.capture('text', /^\w+/);
15 | lexer.state.string = '//foo/bar.com';
16 | });
17 |
18 | it('should throw an error when the first argument is not a number', () => {
19 | assert.throws(() => lexer.lookbehind(), /expected/);
20 | });
21 |
22 | it('should look behind "n" tokens', () => {
23 | lexer.tokenize('//foo/bar.com');
24 | var text = lexer.lookbehind(1);
25 | assert(text);
26 | assert.equal(text.type, 'text');
27 | assert.equal(text.value, 'com');
28 |
29 | var dot = lexer.lookbehind(2);
30 | assert(dot);
31 | assert.equal(dot.type, 'dot');
32 | assert.equal(dot.value, '.');
33 | });
34 | });
35 |
--------------------------------------------------------------------------------
/test/api.match.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const State = require('../lib/state');
6 | const Lexer = require('..');
7 | let lexer;
8 |
9 | describe('api.match', () => {
10 | beforeEach(() => {
11 | lexer = new Lexer('foo');
12 | lexer.capture('text', /^\w+/);
13 | lexer.capture('newline', /^\n+/);
14 | });
15 |
16 | it('should throw when arguments are invalid', () => {
17 | assert.throws(() => lexer.match(null), /expected/);
18 | assert.throws(() => lexer.match([]), /expected/);
19 | assert.throws(() => lexer.match({}), /expected/);
20 | });
21 |
22 | it('should throw when regex matches an empty string', () => {
23 | assert.throws(() => lexer.match(/^(?=.)/), /empty/);
24 | });
25 |
26 | it('should match with regex', () => {
27 | const match = lexer.match(/^\w/);
28 | assert(match);
29 | assert.equal(match[0], 'f');
30 | assert.equal(match.index, 0);
31 | assert.equal(match.input, 'foo');
32 | });
33 |
34 | it('should throw an error when regex does not have a boundary', () => {
35 | lexer = new Lexer();
36 | lexer.capture('slash', /\//);
37 | assert.throws(() => lexer.tokenize('a/b/c/d/e/f/g'));
38 | });
39 |
40 | it('should skip spaces', () => {
41 | lexer.state = new State('foo bar');
42 | assert.equal(lexer.advance().type, 'text');
43 | assert.equal(lexer.match(/^[\t ]+/), ' ');
44 | assert.equal(lexer.advance().type, 'text');
45 | });
46 |
47 | it('should skip tabs and spaces', () => {
48 | lexer.state = new State('foo \t \t bar');
49 | assert.equal(lexer.advance().type, 'text');
50 | assert.equal(lexer.match(/^[\t ]+/), ' \t \t ');
51 | assert.equal(lexer.advance().type, 'text');
52 | });
53 |
54 | it('should not skip newlines', () => {
55 | lexer.state = new State('foo \t \n bar');
56 | assert.equal(lexer.advance().type, 'text');
57 | assert.equal(lexer.match(/^[\t ]+/), ' \t ');
58 | assert.equal(lexer.advance().type, 'newline');
59 | });
60 | });
61 |
--------------------------------------------------------------------------------
/test/api.peek.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.peek', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | lexer.capture('dot', /^\./);
12 | lexer.capture('star', /^\*/);
13 | lexer.capture('slash', /^\//);
14 | lexer.capture('text', /^\w+/);
15 | lexer.state.string = '//foo/bar.com';
16 | });
17 |
18 | it('should get the next token', () => {
19 | var tok = lexer.peek();
20 | assert(tok);
21 | assert.equal(tok.type, 'slash');
22 | assert.equal(tok.value, '/');
23 | });
24 |
25 | it('should consume the captured substring', () => {
26 | lexer.peek();
27 | assert.equal(lexer.state.consumed, '/');
28 | });
29 |
30 | it('should add the captured token to lexer.state.queue', () => {
31 | var tok = lexer.peek();
32 | assert.equal(lexer.state.queue.length, 1);
33 | assert.equal(lexer.state.queue[0], tok);
34 | });
35 |
36 | it('should not consume more input if a token is enqueued', () => {
37 | lexer.peek();
38 | assert.equal(lexer.state.consumed, '/');
39 | assert.equal(lexer.state.queue.length, 1);
40 |
41 | lexer.peek();
42 | assert.equal(lexer.state.consumed, '/');
43 | assert.equal(lexer.state.queue.length, 1);
44 |
45 | lexer.peek();
46 | assert.equal(lexer.state.consumed, '/');
47 | assert.equal(lexer.state.queue.length, 1);
48 | });
49 |
50 | it('should get the next token when lexer.state.queue is empty', () => {
51 | lexer.peek();
52 | assert.equal(lexer.state.consumed, '/');
53 | assert.equal(lexer.state.queue.length, 1);
54 | lexer.state.queue = [];
55 |
56 | lexer.peek();
57 | assert.equal(lexer.state.consumed, '//');
58 | assert.equal(lexer.state.queue.length, 1);
59 | lexer.state.queue = [];
60 |
61 | lexer.peek();
62 | assert.equal(lexer.state.consumed, '//foo');
63 | assert.equal(lexer.state.queue.length, 1);
64 | lexer.state.queue = [];
65 |
66 | lexer.peek();
67 | assert.equal(lexer.state.consumed, '//foo/');
68 | assert.equal(lexer.state.queue.length, 1);
69 | lexer.state.queue = [];
70 | });
71 | });
72 |
--------------------------------------------------------------------------------
/test/api.prev.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.prev', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | lexer.capture('dot', /^\./);
12 | lexer.capture('star', /^\*/);
13 | lexer.capture('slash', /^\//);
14 | lexer.capture('text', /^\w+/);
15 | lexer.state.string = '//foo/bar.com';
16 | });
17 |
18 | it('should get the prev token on the tokens array', () => {
19 | lexer.tokenize('//foo/bar.com');
20 | const token = lexer.prev();
21 | assert(token);
22 | assert.equal(token.type, 'text');
23 | assert.equal(token.value, 'com');
24 | });
25 | });
26 |
--------------------------------------------------------------------------------
/test/api.push.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.push', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should throw when value is not a token', () => {
14 | assert.throws(() => lexer.push('foo'), /expected/);
15 | });
16 |
17 | it('should accept any value when options.mode is "character"', () => {
18 | lexer.options.mode = 'character';
19 | lexer.push('foo');
20 | });
21 |
22 | it('should push token values onto `lexer.state.stash`', () => {
23 | lexer.push(lexer.token('star', '*'));
24 | lexer.push(lexer.token('dot', '.'));
25 | lexer.push(lexer.token('text', 'js'));
26 | assert.deepEqual(lexer.state.stash, ['*', '.', 'js']);
27 | });
28 |
29 | it('should not stash when options.stash is false', () => {
30 | lexer.options.stash = false;
31 | lexer.push(lexer.token('star', '*'));
32 | lexer.push(lexer.token('dot', '.'));
33 | lexer.push(lexer.token('text', 'js'));
34 | assert.deepEqual(lexer.state.stash, ['']);
35 | });
36 |
37 | it('should not add value when token.stash is false', () => {
38 | lexer.options.stash = false;
39 | lexer.push(lexer.token({type: 'star', value: '*', stash: false}));
40 | lexer.push(lexer.token({type: 'dot', value: '.', stash: false}));
41 | lexer.push(lexer.token({type: 'text', value: 'js', stash: false}));
42 | assert.deepEqual(lexer.state.stash, ['']);
43 | });
44 | });
45 |
--------------------------------------------------------------------------------
/test/api.scan.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const define = require('define-property');
6 | const Lexer = require('..');
7 | let lexer;
8 |
9 | describe('api.scan', () => {
10 | beforeEach(() => {
11 | lexer = new Lexer();
12 | lexer.state.string = '//foo/bar.com';
13 | lexer.on('token', tok => define(tok, 'match', tok.match));
14 | });
15 |
16 | it('should throw when regex matches an empty string', () => {
17 | assert.throws(() => lexer.scan(/^(?=.)/, 'foo'), /empty/);
18 | });
19 |
20 | it('should add type to error object', function(cb) {
21 | try {
22 | lexer.scan(/^(?=.)/, 'foo');
23 | } catch (err) {
24 | assert.equal(err.type, 'foo');
25 | cb();
26 | }
27 | });
28 |
29 | it('should get the next token from the given regex', () => {
30 | assert.deepEqual(lexer.scan(/^\//, 'slash'), { type: 'slash', value: '/' });
31 | assert.deepEqual(lexer.scan(/^\//, 'slash'), { type: 'slash', value: '/' });
32 | assert.deepEqual(lexer.scan(/^\w+/, 'text'), { type: 'text', value: 'foo' });
33 | assert.deepEqual(lexer.scan(/^\//, 'slash'), { type: 'slash', value: '/' });
34 | assert.deepEqual(lexer.scan(/^\w+/, 'text'), { type: 'text', value: 'bar' });
35 | assert.deepEqual(lexer.scan(/^\./, 'dot'), { type: 'dot', value: '.' });
36 | assert.deepEqual(lexer.scan(/^\w+/, 'text'), { type: 'text', value: 'com' });
37 | });
38 |
39 | it('should emit "scan"', () => {
40 | var count = 0;
41 | var expected = [
42 | { type: 'slash', value: '/' },
43 | { type: 'slash', value: '/' },
44 | { type: 'text', value: 'foo' },
45 | { type: 'slash', value: '/' },
46 | { type: 'text', value: 'bar' },
47 | { type: 'dot', value: '.' },
48 | { type: 'text', value: 'com' }
49 | ];
50 |
51 | lexer.on('scan', function(tok) {
52 | assert.deepEqual(expected[count++], tok);
53 | });
54 |
55 | lexer.scan(/^\//, 'slash');
56 | lexer.scan(/^\//, 'slash');
57 | lexer.scan(/^\w+/, 'text');
58 | lexer.scan(/^\//, 'slash');
59 | lexer.scan(/^\w+/, 'text');
60 | lexer.scan(/^\./, 'dot');
61 | lexer.scan(/^\w+/, 'text');
62 |
63 | assert.equal(count, expected.length);
64 | });
65 | });
66 |
--------------------------------------------------------------------------------
/test/api.set.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.set', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should set a handler function on lexer.handlers', () => {
14 | lexer.set('star', () => {});
15 | assert.equal(typeof lexer.handlers.get('star'), 'function');
16 | });
17 |
18 | it('should create a noop by default', () => {
19 | lexer.set('star');
20 | assert.equal(typeof lexer.handlers.get('star'), 'function');
21 | });
22 |
23 | it('should call the registered handler function', () => {
24 | lexer.state.string = '*/';
25 | lexer.set('star', function() {
26 | let match = this.match(/^\*/, 'star');
27 | if (match) {
28 | return this.token('star', match);
29 | }
30 | });
31 |
32 | const tok = lexer.advance();
33 | assert(tok);
34 | assert.equal(tok.type, 'star');
35 | assert.equal(tok.value, '*');
36 | });
37 |
38 | it('should convert returned objects into Token instances', () => {
39 | lexer.state.string = '*/';
40 | lexer.set('star', function() {
41 | let match = this.match(/^\*/, 'star');
42 | if (match) {
43 | return { type: 'star', match };
44 | }
45 | });
46 |
47 | const tok = lexer.advance();
48 | assert(tok);
49 | assert(Lexer.isToken(tok));
50 | });
51 |
52 | it('should use handler type to set token.type when not defined', () => {
53 | lexer.state.string = '*/';
54 | lexer.set('star', function() {
55 | let match = this.match(/^\*/, 'star');
56 | if (match) {
57 | return { match };
58 | }
59 | });
60 |
61 | const tok = lexer.advance();
62 | assert(tok);
63 | assert.equal(tok.type, 'star');
64 | assert(Lexer.isToken(tok));
65 | });
66 | });
67 |
--------------------------------------------------------------------------------
/test/api.skip.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.skip', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer('//foo/bar.com')
11 | .capture('dot', /^\./)
12 | .capture('star', /^\*/)
13 | .capture('slash', /^\//)
14 | .capture('text', /^\w+/);
15 | });
16 |
17 | it('should skip the specified number of tokens', () => {
18 | lexer.skip(2);
19 | assert.equal(lexer.peek().type, 'text');
20 | });
21 |
22 | it('should not add the next (peeked) token to the queue', () => {
23 | lexer.skip(2);
24 | assert.equal(lexer.state.queue.length, 1);
25 | });
26 | });
27 |
--------------------------------------------------------------------------------
/test/api.skipTo.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.skipTo', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer('//foo/bar.com')
11 | .capture('dot', /^\./)
12 | .capture('star', /^\*/)
13 | .capture('slash', /^\//)
14 | .capture('text', /^\w+/);
15 | });
16 |
17 | it('should skip to the specified type', () => {
18 | const tokens = lexer.skipTo('dot');
19 | assert.equal(tokens.pop().type, 'dot');
20 | assert.equal(lexer.state.string, 'com');
21 | });
22 | });
23 |
--------------------------------------------------------------------------------
/test/api.skipType.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.skipType', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer('//foo/bar.com')
11 | .capture('dot', /^\./)
12 | .capture('star', /^\*/)
13 | .capture('slash', /^\//)
14 | .capture('text', /^\w+/);
15 | });
16 |
17 | it('should skip the specified types', () => {
18 | lexer.skipType(['slash', 'text']);
19 | assert.equal(lexer.peek().type, 'dot');
20 | });
21 |
22 | it('should skip the specified type', () => {
23 | lexer.skipType('slash');
24 | assert.equal(lexer.peek().type, 'text');
25 | });
26 | });
27 |
--------------------------------------------------------------------------------
/test/api.skipWhile.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.skipWhile', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer('//foo/bar.com')
11 | .capture('dot', /^\./)
12 | .capture('star', /^\*/)
13 | .capture('slash', /^\//)
14 | .capture('text', /^\w+/);
15 | });
16 |
17 | it('should skip while token.type does not match', () => {
18 | lexer.skipWhile(tok => tok.type !== 'dot');
19 | assert.equal(lexer.peek().type, 'dot');
20 | assert.equal(lexer.peek().value, '.');
21 | });
22 |
23 | it('should skip while lexer.state.string does not match', () => {
24 | lexer.skipWhile(tok => lexer.state.string[0] !== '.');
25 | assert.equal(lexer.peek().type, 'text');
26 | assert.equal(lexer.peek().value, 'bar');
27 | });
28 | });
29 |
--------------------------------------------------------------------------------
/test/api.token.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.token', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should return an instance of lexer.Token', () => {
14 | assert(lexer.token('foo') instanceof Lexer.Token);
15 | });
16 |
17 | it('should create a token from a string', () => {
18 | const token = lexer.token('foo');
19 | assert.equal(token.type, 'foo');
20 | assert.equal(token.value, undefined);
21 | });
22 |
23 | it('should create a token from a string and value', () => {
24 | const token = lexer.token('foo', 'bar');
25 | assert.equal(token.type, 'foo');
26 | assert.equal(token.value, 'bar');
27 | });
28 |
29 | it('should create a token from an object', () => {
30 | const token = lexer.token({type: 'foo', value: 'bar'});
31 | assert.equal(token.type, 'foo');
32 | assert.equal(token.value, 'bar');
33 | });
34 |
35 | it('should create a token from an object and match array', () => {
36 | const token = lexer.token({type: 'foo', value: 'bar'}, ['bar']);
37 | assert.equal(token.type, 'foo');
38 | assert.equal(token.value, 'bar');
39 | assert.deepEqual(token.match, ['bar']);
40 | });
41 |
42 | it('should create a token from type and match array', () => {
43 | const token = lexer.token('foo', ['bar']);
44 | assert.equal(token.type, 'foo');
45 | assert.equal(token.value, 'bar');
46 | assert.deepEqual(token.match, ['bar']);
47 | });
48 |
49 | it('should emit "token"', () => {
50 | let tokens = [];
51 |
52 | lexer.capture('slash', /^\//);
53 | lexer.capture('text', /^\w+/);
54 | lexer.capture('star', /^\*/);
55 | lexer.on('token', (tok) => tokens.push(tok));
56 |
57 | lexer.lex('a/*/b');
58 | assert.equal(tokens.length, 5);
59 | });
60 | });
61 |
--------------------------------------------------------------------------------
/test/api.use.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.use', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should call a plugin function with the lexer instance', cb => {
14 | lexer.use(function() {
15 | assert(this instanceof Lexer);
16 | cb();
17 | });
18 | });
19 | });
20 |
--------------------------------------------------------------------------------
/test/fixtures/file.txt:
--------------------------------------------------------------------------------
1 | This is an input string.
--------------------------------------------------------------------------------
/test/lexer.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('lexer', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should set and get state.options', () => {
14 | lexer.options = { foo: 'bar' };
15 | assert.equal(lexer.options.foo, 'bar');
16 | assert.equal(lexer.options.foo, 'bar');
17 | });
18 |
19 | it('should set state.string', () => {
20 | lexer.state.string = 'foo';
21 | assert.equal(lexer.state.string, 'foo');
22 | assert.equal(lexer.state.string, 'foo');
23 | });
24 |
25 | it('should set state.input', () => {
26 | lexer.state.input = 'foo';
27 | assert.equal(lexer.state.input, 'foo');
28 | assert.equal(lexer.state.input, 'foo');
29 | });
30 |
31 | it('should set state.consumed', () => {
32 | lexer.state.consumed = 'foo';
33 | assert.equal(lexer.state.consumed, 'foo');
34 | assert.equal(lexer.state.consumed, 'foo');
35 | });
36 | });
37 |
--------------------------------------------------------------------------------
/test/lexer.static.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('Lexer static methods', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should be true if value is an instance of Lexer', () => {
14 | assert(Lexer.isLexer(lexer));
15 | });
16 |
17 | it('should expose State class', () => {
18 | assert(typeof Lexer.State === 'function');
19 | });
20 | });
21 |
--------------------------------------------------------------------------------
/test/regressions.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('api.regressions', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | describe('constructor:', () => {
14 | it('should return an instance of Lexer:', () => {
15 | assert(lexer instanceof Lexer);
16 | });
17 | });
18 |
19 | describe('prototype methods:', () => {
20 | var methods = [
21 | 'advance',
22 | 'get',
23 | 'match',
24 | 'next',
25 | 'location',
26 | 'prev',
27 | 'set',
28 | 'skip',
29 | 'token',
30 | 'tokenize',
31 | 'updateLocation',
32 | 'use',
33 | ];
34 |
35 | methods.forEach(function(method) {
36 | it('should expose the `' + method + '` method', () => {
37 | assert.equal(typeof lexer[method], 'function', method);
38 | });
39 | });
40 | });
41 | });
42 |
--------------------------------------------------------------------------------
/test/static.isToken.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | require('mocha');
4 | const assert = require('assert');
5 | const Lexer = require('..');
6 | let lexer;
7 |
8 | describe('static.isToken', () => {
9 | beforeEach(() => {
10 | lexer = new Lexer();
11 | });
12 |
13 | it('should be true when the value is a token', () => {
14 | assert(Lexer.isToken(lexer.token('foo')));
15 | });
16 |
17 | it('should be false when the value is not a token', () => {
18 | assert(!Lexer.isToken());
19 | });
20 | });
21 |
--------------------------------------------------------------------------------