├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches ├── bench_c.rs ├── bench_rust_grammar_subset.rs ├── benches.rs └── part_gcc_test.i ├── src ├── binary_heap.rs ├── debug.rs ├── events.rs ├── forest │ ├── bocage │ │ ├── mod.rs │ │ ├── node.rs │ │ ├── order.rs │ │ └── traverse.rs │ ├── compact_bocage │ │ ├── mod.rs │ │ ├── node.rs │ │ ├── order.rs │ │ └── traverse.rs │ ├── mod.rs │ ├── node_handle.rs │ └── null_forest.rs ├── grammar.rs ├── item.rs ├── lib.rs ├── memory_use.rs └── recognizer.rs └── tests ├── grammars ├── ambiguous_arith.rs ├── mod.rs └── precedenced_arith.rs ├── helpers ├── cartesian_product.rs ├── mod.rs ├── parse.rs ├── simple_compact_evaluator.rs └── simple_evaluator.rs ├── test_c.rs ├── test_nulling.rs ├── test_recognizer.rs ├── test_sequence.rs ├── test_serde.rs └── tests.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: [cron: "40 1 * * *"] 7 | 8 | permissions: 9 | contents: read 10 | 11 | env: 12 | RUSTFLAGS: -Dwarnings 13 | 14 | jobs: 15 | test: 16 | name: Test suite 17 | runs-on: ubuntu-latest 18 | timeout-minutes: 45 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: dtolnay/rust-toolchain@nightly 22 | - run: cargo test 23 | 24 | windows: 25 | name: Test suite (windows) 26 | runs-on: windows-latest 27 | timeout-minutes: 45 28 | steps: 29 | - uses: actions/checkout@v3 30 | - uses: dtolnay/rust-toolchain@nightly 31 | - run: cargo test 32 | 33 | stable: 34 | name: Rust ${{matrix.rust}} 35 | runs-on: ubuntu-latest 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | rust: [stable, beta] 40 | timeout-minutes: 45 41 | steps: 42 | - uses: actions/checkout@v3 43 | - uses: dtolnay/rust-toolchain@master 44 | with: 45 | toolchain: ${{matrix.rust}} 46 | - run: cargo test 47 | 48 | nightly: 49 | name: Rust nightly ${{matrix.os == 'windows' && '(windows)' || ''}} 50 | runs-on: ${{matrix.os}}-latest 51 | strategy: 52 | fail-fast: false 53 | matrix: 54 | os: [ubuntu, windows] 55 | timeout-minutes: 45 56 | steps: 57 | - uses: actions/checkout@v3 58 | - uses: dtolnay/rust-toolchain@nightly 59 | - run: cargo build 60 | 61 | msrv: 62 | name: Rust ${{matrix.rust}} 63 | runs-on: ubuntu-latest 64 | strategy: 65 | fail-fast: false 66 | matrix: 67 | rust: [1.65.0, 1.66.0, 1.67.0] 68 | timeout-minutes: 45 69 | steps: 70 | - uses: actions/checkout@v3 71 | - uses: dtolnay/rust-toolchain@master 72 | with: 73 | toolchain: ${{matrix.rust}} 74 | - run: cargo test 75 | - run: cargo build 76 | # clippy: 77 | # name: Clippy 78 | # runs-on: ubuntu-latest 79 | # timeout-minutes: 45 80 | # steps: 81 | # - uses: actions/checkout@v3 82 | # - uses: dtolnay/rust-toolchain@clippy 83 | # - run: cargo clippy --features generation -- -Dclippy::all -Dclippy::pedantic 84 | 85 | fmt: 86 | name: rustfmt-check 87 | runs-on: ubuntu-latest 88 | if: github.event_name == 'pull_request' 89 | timeout-minutes: 45 90 | steps: 91 | - uses: actions/checkout@v2 92 | - uses: actions-rs/toolchain@v1 93 | with: 94 | toolchain: nightly 95 | components: rustfmt 96 | override: true 97 | - uses: LoliGothick/rustfmt-check@master 98 | with: 99 | token: ${{ secrets.GITHUB_TOKEN }} 100 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | 13 | # This project is a library. 14 | /Cargo.lock 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: rust 3 | # necessary for `travis-cargo coveralls --no-sudo` 4 | addons: 5 | apt: 6 | packages: 7 | - libcurl4-openssl-dev 8 | - libelf-dev 9 | - libdw-dev 10 | - binutils-dev # optional, only required for the --verify flag of coveralls 11 | 12 | rust: 13 | - nightly 14 | - beta 15 | - stable 16 | # load travis-cargo 17 | before_script: 18 | - | 19 | pip install 'travis-cargo<0.2' --user && 20 | export PATH=$HOME/.local/bin:$PATH 21 | script: 22 | - | 23 | travis-cargo build && 24 | travis-cargo test && 25 | travis-cargo bench && 26 | travis-cargo --only stable doc 27 | after_success: 28 | - travis-cargo --only stable doc-upload 29 | - travis-cargo coveralls --no-sudo --verify 30 | notifications: 31 | email: 32 | on_success: never 33 | env: 34 | global: 35 | # override the default `--features unstable` used for the nightly branch 36 | - TRAVIS_CARGO_NIGHTLY_FEATURE="" 37 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "gearley" 3 | version = "0.0.5" 4 | 5 | authors = [ "Piotr Czarnecki " ] 6 | description = "An Earley parser engine." 7 | keywords = ["grammar", "parsing", "language", "forest", "intersection"] 8 | documentation = "http://pczarn.github.io/gearley/" 9 | repository = "https://github.com/pczarn/gearley" 10 | license = "MIT/Apache-2.0" 11 | 12 | [profile.release] 13 | debug = true 14 | 15 | [lib] 16 | name = "gearley" 17 | 18 | [dependencies] 19 | cfg = { version = "0.6.1", features = ["serialize"] } 20 | bit-matrix = { version = "0.6", features = ["serialize"] } 21 | bit-vec = "0.6" 22 | optional = { version = "0.5", features = ["serde"] } 23 | ref_slice = "1.2" 24 | num = "0.2" 25 | num-traits = "0.2" 26 | num-derive = "0.3" 27 | log = "0.4" 28 | env_logger = "0.7" 29 | serde = "1.0" 30 | serde_derive = "1.0" 31 | 32 | [dev-dependencies] 33 | c_lexer_logos = "0.1.1" 34 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Piotr Czarnecki 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

gearley

3 |

4 | An Earley parser engine. 5 |

6 |

7 | 8 | [![crates.io][crates.io shield]][crates.io link] 9 | [![Documentation][docs.rs badge]][docs.rs link] 10 | ![Rust CI][github ci badge] 11 | ![MSRV][rustc 1.65+] 12 |
13 |
14 | [![Dependency Status][deps.rs status]][deps.rs link] 15 | [![Download Status][shields.io download count]][crates.io link] 16 | 17 |

18 |
19 | 20 | [crates.io shield]: https://img.shields.io/crates/v/gearley?label=latest 21 | [crates.io link]: https://crates.io/crates/gearley 22 | [docs.rs badge]: https://docs.rs/gearley/badge.svg?version=0.0.5 23 | [docs.rs link]: https://docs.rs/gearley/0.0.5/gearley/ 24 | [github ci badge]: https://github.com/pczarn/gearley/workflows/CI/badge.svg?branch=master 25 | [rustc 1.65+]: https://img.shields.io/badge/rustc-1.65%2B-blue.svg 26 | [deps.rs status]: https://deps.rs/crate/gearley/0.0.5/status.svg 27 | [deps.rs link]: https://deps.rs/crate/gearley/0.0.5 28 | [shields.io download count]: https://img.shields.io/crates/d/gearley.svg 29 | 30 | Work in progress. 31 | [You can check the documentation here](`https://docs.rs/gearley/latest/gearley/). 32 | 33 | This engine is meant to be a foundation of an optimized parser generator. 34 | 35 | Gearley is inspired by the [Marpa parser](http://jeffreykegler.github.io/Marpa-web-site/) 36 | by Jeffrey Kegler. 37 | 38 | ## Properties 39 | 40 | * blazing fast 41 | * as fast as YAEP 42 | * much faster than Marpa 43 | * memory efficient 44 | * new algorithm which uses online sorting 45 | * TODO: new hybrid algorithm 46 | * TODO: LALR 47 | * TODO: LL(1) 48 | * TODO: LR(1) 49 | * both time and memory complexity are small for simple grammars 50 | * time complexity: `O(n log n)` (n = input length) for `LR(1)` grammars 51 | * memory complexity: linear in input length for `LR(1)` grammars 52 | * lookahead 53 | * 1 token of lookahead 54 | * TODO: multithreaded parsing 55 | * TODO: fearless right-recursion 56 | * TODO: Leo's algorithm 57 | * general-purpose 58 | * accepts all context-free grammars 59 | * may be extended to accept any grammar with Pāṇini 60 | * TODO: data-dependent grammars 61 | * TODO: PEG 62 | * TODO: negation 63 | * TODO: boolean grammars 64 | * interop with any parsing algorithm 65 | * safe 66 | * TODO: pure safe Rust 67 | * elegant 68 | * the recognizer has a simple design 69 | * tiny core 70 | * only 470 lines of code implementing the core algorithm 71 | * mathematically elegant 72 | * uses simple data structures 73 | * three separate per-token passes 74 | * just like Marpa 75 | * highly preprocessed grammar 76 | * less complexity in the recognizer and parse forest makes up for heavy grammar transformations 77 | * naming 78 | * Pāṇini is named after an ancient grammarian and Indian scholar 79 | * parse forest naming is inspired by algebra 80 | * good error reporting 81 | * perfect parse progress information 82 | * tracing debugging 83 | * customizable 84 | * extensible on every level 85 | * customizable recognizer 86 | * optional control over bottom-up parse fragment completion 87 | * you control which fragments are admitted into the forest 88 | * optional custom parse events 89 | * optional initialization with given memory capacity 90 | * generic over optional Performance Policy 91 | * customizable parse forest 92 | * optional control over ambiguous node ordering 93 | * write your own parse forest 94 | * two official parse forest impls and a null forest 95 | * choose between a faster forest and a memory efficient forest 96 | * optionally ignore parse result and get only parse success or failure 97 | * open source 98 | * free is a fair price 99 | 100 | ## Extending gearley 101 | 102 | The grammar is stored in a byte string. You may [serialize or deserialize it](https://docs.rs/gearley/0.0.5/gearley/grammar/struct.InternalGrammar.html) 103 | yourself. Grammar construction is implemented in the 104 | [cfg library](https://github.com/pczarn/cfg). 105 | 106 | The recognizer provides [an interface](https://docs.rs/gearley/0.0.5/gearley/forest/trait.Forest.html) for writing a custom parse forest. Or you 107 | may reuse the default parse forest algorithm, but write your own code for [controlling 108 | rule order](https://docs.rs/gearley/0.0.5/gearley/forest/order/trait.Order.html), and for storing evaluated values within each tree node. 109 | 110 | Yet another interface gives [control over rule completion](https://docs.rs/gearley/0.0.5/gearley/recognizer/struct.CompleteSum.html). You may reject certain 111 | completed rules or modify their parse forests as the parse progresses. 112 | 113 | Gearley is perfectly extensible on every level. 114 | 115 | ## Glossary 116 | 117 | ### Recognizer 118 | 119 | | Gearley term | Marpa term | Alternative term | 120 | |--------------------|------------------------|----------------------------| 121 | | dot | dotted rule | -- | 122 | | earleme | earleme | input location | 123 | | item | Earley item | situation | 124 | | origin | origin | distance | 125 | | rule history | rule semantics | -- | 126 | | complete | complete | accept | 127 | 128 | Dot — a position in the grammar, which is an integer. 129 | 130 | Earleme — scalar position, currently equivalent to the input location index. 131 | 132 | Item — a value that consists of a dot, an origin and a bocage node. 133 | 134 | Origin — the Earley set number where a rule was predicted. Always smaller than 135 | the current Earley set ID for non-predicted items. 136 | 137 | Rule history — a rule summary that contains an action number and other information 138 | about semantics and the rule's journey through transformations. Each rule carries 139 | its own history. 140 | 141 | ### Parse forest 142 | 143 | | Gearley term | Marpa term | Alternative term | 144 | |--------------------|------------------------|----------------------------| 145 | | bocage | bocage | Shared Packed Parse Forest | 146 | | depth-first bocage | Abstract Syntax Forest | -- | 147 | | sum node | glade | OR node | 148 | | product node | factoring | AND node | 149 | | leaf node | bocage symbol | leaf node | 150 | | root node | peak glade | top node | 151 | 152 | Bocage — a parse forest in the form of a Directed Acyclic Graph. 153 | 154 | Depth-first bocage — a bocage that is traversed by evaluating one whole bocage 155 | node at a time. 156 | 157 | Sum node — a node that sums the number of trees in the forest. 158 | 159 | Product node — a node that may multiply the number of trees in the forest. 160 | 161 | Leaf node — a terminal node that begins a single tree in the forest. 162 | 163 | Root node — a node that is used as a parse result. 164 | 165 | ## Related work 166 | 167 | ### In Rust 168 | 169 | * [LALRPOP](https://github.com/nikomatsakis/lalrpop) — a LR(1) parser generator focused on ease of use. 170 | * [rust-lang's GLL](https://github.com/rust-lang/gll/) — a parsing framework. 171 | * [grammer with an E](https://github.com/lykenware/grammer/) — a grammar framework. 172 | * [Oak](https://github.com/ptal/oak/) — a PEG parser generator with typed expressions. 173 | 174 | ### In other languages 175 | 176 | * [Marpa](https://jeffreykegler.github.io/Marpa-web-site/) — an Earley parser (not a generator) 177 | that has advanced features. Written in literate C and in Perl. 178 | * [YAEP](https://github.com/vnmakarov/yaep) — an Earley parser engine that currently has 179 | the best speed and small memory use. Written in C. 180 | 181 | ### In academia 182 | 183 | * OMeta — a PEG parser with advanced features that go beyond parsing. 184 | * [SPPF-Style Parsing From Earley Recognisers](https://www.researchgate.net/publication/220367479_SPPF-Style_Parsing_From_Earley_Recognisers) — Elizabeth Scott. 185 | 186 | ## Quotes 187 | 188 | > I'd be very happy to have a superfast general parser out there but some extremely bright minds have been unable to solve it for 40 years. 189 | 190 | — Terence Parr, author of ANTLR 191 | 192 | > I would be very eager to see this. 193 | 194 | — mydoghasticks 195 | 196 | ## Thanks 197 | 198 | Thanks to Jay Earley, John Aycock, R. Nigel Horspool, and Elizabeth Scott who pioneered Earley parsing. 199 | 200 | Big thanks to [mr Jeffrey Kegler](https://github.com/jeffreykegler) who brought my attention to parsing and made this project possible through his work on Marpa/Earley and Kollos. 201 | 202 | Special thanks to CD PROJEKT RED, HAEVN, Kaśka Sochacka, sanah, Kwiat Jabłoni, Alex Rainbird, Beth Paterson, Carbon Based Lifeforms, and Solar Fields for providing amazing music, which made coding even more enjoyable. 203 | 204 | ## License 205 | 206 | Dual-licensed for compatibility with the Rust project. 207 | 208 | Licensed under the Apache License Version 2.0: 209 | http://www.apache.org/licenses/LICENSE-2.0, or the MIT license: 210 | http://opensource.org/licenses/MIT, at your option. 211 | -------------------------------------------------------------------------------- /benches/bench_rust_grammar_subset.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | extern crate cfg; 5 | extern crate gearley; 6 | 7 | macro_rules! trace(($($tt:tt)*) => ()); 8 | 9 | #[path = "../tests/helpers/mod.rs"] 10 | mod helpers; 11 | 12 | use cfg::sequence::Separator::Proper; 13 | use cfg::earley::Grammar; 14 | use gearley::forest::{Bocage, NullForest}; 15 | use gearley::grammar::InternalGrammar; 16 | use gearley::recognizer::Recognizer; 17 | use gearley::memory_use::MemoryUse; 18 | 19 | use helpers::Parse; 20 | 21 | macro_rules! rhs_elem { 22 | (use) => (0); 23 | (as) => (1); 24 | (::) => (2); 25 | (*) => (3); 26 | (,) => (4); 27 | (;) => (5); 28 | ('{') => (6); 29 | ('}') => (7); 30 | (pub) => (8); 31 | ($i:ident) => (9); 32 | } 33 | 34 | macro_rules! rhs { 35 | ($($e:tt)+) => ( 36 | &[$(rhs_elem!($e) + 9,)+] 37 | ) 38 | } 39 | 40 | const TOKENS: &'static [u32] = rhs!( 41 | use gearley::events::'{' PredictionEvents, MedialEvents, CompletionEvents '}'; 42 | use gearley::util::slice_builder::SliceBuilder; 43 | use gearley::forest::depth_first::'{' 44 | NullOrder, FastEvaluator, ArrayStore, ClosureActionEvaluator 45 | '}'; 46 | pub use self::PathParsingMode::*; 47 | 48 | use abi :: '{' self, Abi '}'; 49 | use ast::BareFnTy; 50 | use ast :: '{' RegionTyParamBound, TraitTyParamBound, TraitBoundModifier '}'; 51 | use ast::Unsafety; 52 | use ast :: '{' Mod, Arg, Arm, Attribute, BindingMode, TraitItemKind '}'; 53 | use ast::Block; 54 | use ast :: '{' BlockCheckMode, CaptureBy '}'; 55 | use ast :: '{' Constness, Crate, CrateConfig '}'; 56 | use ast :: '{' Decl, DeclKind '}'; 57 | use ast :: '{' EMPTY_CTXT, EnumDef, ExplicitSelf '}'; 58 | use ast :: '{' Expr, ExprKind '}'; 59 | use ast :: '{' Field, FnDecl '}'; 60 | use ast :: '{' ForeignItem, ForeignItemKind, FunctionRetTy '}'; 61 | use ast :: '{' Ident, ImplItem, Item, ItemKind '}'; 62 | use ast :: '{' Lit, LitKind, UintTy '}'; 63 | use ast::Local; 64 | use ast::MacStmtStyle; 65 | use ast::Mac_; 66 | use ast :: '{' MutTy, Mutability '}'; 67 | use ast::NamedField; 68 | use ast :: '{' Pat, PatKind '}'; 69 | use ast :: '{' PolyTraitRef, QSelf '}'; 70 | use ast :: '{' Stmt, StmtKind '}'; 71 | use ast :: '{' VariantData, StructField '}'; 72 | use ast::StrStyle; 73 | use ast::SelfKind; 74 | use ast :: '{' Delimited, SequenceRepetition, TokenTree, TraitItem, TraitRef '}'; 75 | use ast :: '{' Ty, TyKind, TypeBinding, TyParam, TyParamBounds '}'; 76 | use ast::UnnamedField; 77 | use ast :: '{' ViewPath, ViewPathGlob, ViewPathList, ViewPathSimple '}'; 78 | use ast :: '{' Visibility, WhereClause '}'; 79 | use attr :: '{' ThinAttributes, ThinAttributesExt, AttributesExt '}'; 80 | use ast :: '{' BinOpKind, UnOp '}'; 81 | use ast; 82 | use ast_util :: '{' self, ident_to_path '}'; 83 | use codemap :: '{' self, Span, BytePos, Spanned, spanned, mk_sp, CodeMap '}'; 84 | use errors :: '{' self, DiagnosticBuilder '}'; 85 | use ext::tt::macro_parser; 86 | use parse; 87 | use parse::classify; 88 | use parse::common::SeqSep; 89 | use parse::lexer :: '{' Reader, TokenAndSpan '}'; 90 | use parse::obsolete :: '{' ParserObsoleteMethods, ObsoleteSyntax '}'; 91 | use parse::token :: '{' self, intern, MatchNt, SubstNt, SpecialVarNt, InternedString '}'; 92 | use parse::token :: '{' keywords, special_idents, SpecialMacroVar '}'; 93 | use parse :: '{' new_sub_parser_from_file, ParseSess '}'; 94 | use util::parser :: '{' AssocOp, Fixity '}'; 95 | use print::pprust; 96 | use ptr::P; 97 | use parse::PResult; 98 | 99 | use std::collections::HashSet; 100 | use std::io::prelude::*; 101 | use std::mem; 102 | use std::path :: '{' Path, PathBuf '}'; 103 | use std::rc::Rc; 104 | use std::slice; 105 | ); 106 | 107 | const _TOKEN_NAMES: &'static [&'static str] = &[ 108 | "start", "use_decls", "use_decl", "segments", "segment", "import_mod", "import_seq", "import", 109 | "pub_opt", 110 | "use_tok", "as_tok", "mod_sep", "star", "comma", "semi", "lbrace", "rbrace", "pub_tok", "ident" 111 | ]; 112 | 113 | fn grammar() -> Grammar { 114 | let mut external = Grammar::new(); 115 | let (start, use_decls, use_decl, segments, segment, import_mod, import_seq, import, pub_opt) = external.sym(); 116 | let (use_tok, as_tok, mod_sep, star, comma, semi, lbrace, rbrace, pub_tok, ident) = external.sym(); 117 | external 118 | .sequence(segments).inclusive(0, None).rhs(segment) 119 | .sequence(import_seq).separator(Proper(comma)).inclusive(1, None).rhs(import) 120 | .sequence(use_decls).inclusive(0, None).rhs(use_decl) 121 | ; 122 | external.rule(start).rhs([use_decls]) 123 | .rule(use_decl).rhs([pub_opt, use_tok, segments, import_mod, semi]) 124 | .rule(segment).rhs([ident, mod_sep]) 125 | .rule(import_mod).rhs([lbrace, import_seq, rbrace]) 126 | .rhs([import]) 127 | .rhs([star]) 128 | .rule(import).rhs([ident]) 129 | .rhs([ident, as_tok, ident]) 130 | .rule(pub_opt).rhs([pub_tok]) 131 | .rhs([]) 132 | ; 133 | external.set_start(start); 134 | external 135 | } 136 | 137 | #[bench] 138 | fn bench_recognize_decl_use(b: &mut test::Bencher) { 139 | let external = grammar(); 140 | let cfg = InternalGrammar::from_grammar(&external); 141 | 142 | b.iter(|| { 143 | let mut rec: Recognizer = Recognizer::new_with_limit(&cfg, 2_000_000); 144 | rec.parse(TOKENS); 145 | test::black_box(&rec); 146 | }) 147 | } 148 | 149 | #[bench] 150 | fn bench_parse_decl_use(b: &mut test::Bencher) { 151 | let external = grammar(); 152 | let cfg = InternalGrammar::from_grammar(&external); 153 | 154 | b.iter(|| { 155 | let mut rec: Recognizer> = Recognizer::new_with_limit(&cfg, 2_000_000); 156 | let finished = rec.parse(TOKENS); 157 | assert!(finished); 158 | test::black_box(&rec.forest); 159 | }) 160 | } 161 | -------------------------------------------------------------------------------- /benches/benches.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | extern crate cfg; 5 | extern crate gearley; 6 | 7 | macro_rules! trace(($($tt:tt)*) => ()); 8 | 9 | #[macro_use] 10 | #[path = "../tests/grammars/mod.rs"] 11 | mod grammars; 12 | #[path = "../tests/helpers/mod.rs"] 13 | mod helpers; 14 | 15 | use gearley::grammar::InternalGrammar; 16 | use gearley::forest::{Bocage, NullForest}; 17 | use gearley::recognizer::Recognizer; 18 | use gearley::memory_use::MemoryUse; 19 | 20 | use grammars::*; 21 | use helpers::{SimpleEvaluator, Parse}; 22 | 23 | const SUM_TOKENS: &'static [u32] = precedenced_arith!( 24 | '1' '+' '(' '2' '*' '3' '-' '4' ')' '/' 25 | '(' '5' '5' ')' '-' '(' '5' '4' ')' '*' 26 | '5' '5' '+' '6' '2' '-' '1' '3' '-' '(' 27 | '(' '3' '6' ')' ')' 28 | ); 29 | 30 | #[bench] 31 | fn bench_ambiguous_arithmetic(b: &mut test::Bencher) { 32 | let tokens = ambiguous_arith!('2' '-' '0' '*' '3' '+' '1' '/' '2' '+' '8' '8' '+' '1' '/' '2'); 33 | let external = ambiguous_arith::grammar(); 34 | let cfg = InternalGrammar::from_grammar(&external); 35 | 36 | b.iter(|| { 37 | let mut evaluator = SimpleEvaluator::new( 38 | ambiguous_arith::leaf, 39 | ambiguous_arith::rule, 40 | |_, _: &mut _| unreachable!() 41 | ); 42 | let mut rec: Recognizer> = Recognizer::new_with_hint(&cfg, tokens.len()); 43 | assert!(rec.parse(tokens)); 44 | let mut traversal = rec.forest.traverse(); 45 | let results = evaluator.traverse(&mut traversal, rec.finished_node().unwrap()); 46 | test::black_box(results); 47 | }) 48 | } 49 | 50 | #[bench] 51 | fn bench_evaluate_precedenced_arith(b: &mut test::Bencher) { 52 | let external = precedenced_arith::grammar(); 53 | let cfg = InternalGrammar::from_grammar(&external); 54 | let sum_tokens = test::black_box(SUM_TOKENS); 55 | 56 | b.iter(|| { 57 | let mut evaluator = SimpleEvaluator::new( 58 | precedenced_arith::leaf, 59 | precedenced_arith::rule, 60 | |_, _: &mut _| unreachable!(), 61 | ); 62 | let bocage = Bocage::new(&cfg); 63 | let mut recognizer = Recognizer::new(&cfg, bocage); 64 | recognizer.parse(sum_tokens); 65 | let mut traversal = recognizer.forest.traverse(); 66 | let results = evaluator.traverse(&mut traversal, recognizer.finished_node().unwrap()); 67 | test::black_box(results); 68 | }) 69 | } 70 | 71 | #[bench] 72 | fn bench_process_grammar_for_precedenced_arith(b: &mut test::Bencher) { 73 | let external = precedenced_arith::grammar(); 74 | 75 | b.iter(|| { 76 | test::black_box(InternalGrammar::from_grammar(&external)); 77 | }) 78 | } 79 | 80 | #[bench] 81 | fn bench_recognize_precedenced_arith(b: &mut test::Bencher) { 82 | let grammar = precedenced_arith::grammar(); 83 | let cfg = InternalGrammar::from_grammar(&grammar); 84 | let sum_tokens = test::black_box(SUM_TOKENS); 85 | 86 | b.iter(|| { 87 | let mut recognizer = Recognizer::new(&cfg, NullForest); 88 | test::black_box(&recognizer.parse(sum_tokens)); 89 | }) 90 | } 91 | -------------------------------------------------------------------------------- /src/binary_heap.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Rust Project Developers, Piotr Czarnecki. 2 | // See the COPYRIGHT 3 | // file at the top-level directory of this distribution and at 4 | // http://rust-lang.org/COPYRIGHT. 5 | // 6 | // Licensed under the Apache License, Version 2.0 or the MIT license 8 | // , at your 9 | // option. This file may not be copied, modified, or distributed 10 | // except according to those terms. 11 | 12 | //! A priority queue implemented with a binary heap. 13 | 14 | #![allow(missing_docs)] 15 | #![cfg_attr(feature = "cargo-clippy", allow(nonminimal_bool))] 16 | 17 | use std::mem::swap; 18 | use std::u32; 19 | 20 | use forest::Forest; 21 | use item::{CompletedItem, CompletedItemLinked, Item}; 22 | use recognizer::Recognizer; 23 | 24 | impl<'g, F> Recognizer<'g, F> 25 | where F: Forest, 26 | { 27 | /// Returns the greatest item in the binary heap, or `None` if it is empty. 28 | #[inline] 29 | pub fn heap_peek(&self) -> Option> { 30 | self.complete.get(0).and_then(|&right_item| 31 | self.medial.get(right_item.idx as usize).map(|left_item| 32 | CompletedItem { 33 | origin: left_item.origin, 34 | dot: left_item.dot, 35 | left_node: left_item.node, 36 | right_node: right_item.node, 37 | } 38 | ) 39 | ) 40 | } 41 | 42 | #[inline(always)] 43 | fn heap_get(&self, idx_idx: usize) -> Option<&Item> { 44 | self.complete.get(idx_idx).and_then(|&item| self.medial.get(item.idx as usize)) 45 | } 46 | 47 | /// Removes the greatest item from the binary heap and returns it, or `None` if it 48 | /// is empty. 49 | pub fn heap_pop(&mut self) -> Option> { 50 | self.complete.pop().and_then(move |mut right_item| { 51 | if !self.complete.is_empty() { 52 | swap(&mut right_item, &mut self.complete[0]); 53 | self.sift_down(0); 54 | } 55 | self.medial.get(right_item.idx as usize).map(|left_item| 56 | CompletedItem { 57 | origin: left_item.origin, 58 | dot: left_item.dot, 59 | left_node: left_item.node, 60 | right_node: right_item.node, 61 | } 62 | ) 63 | }) 64 | } 65 | 66 | /// Pushes an item onto the binary heap. 67 | pub fn heap_push(&mut self, item: CompletedItem) { 68 | let old_indices_len = self.complete.len(); 69 | let old_medial_len = self.medial.len(); 70 | assert!(old_medial_len as u64 <= u32::MAX.into()); 71 | self.medial.push(item.into()); 72 | self.complete.push(CompletedItemLinked { 73 | idx: old_medial_len as u32, 74 | node: item.right_node, 75 | }); 76 | self.sift_up(0, old_indices_len); 77 | } 78 | 79 | /// Pushes an item onto the binary heap. 80 | pub fn heap_push_linked(&mut self, item: CompletedItemLinked) { 81 | let old_indices_len = self.complete.len(); 82 | self.complete.push(item); 83 | self.sift_up(0, old_indices_len); 84 | } 85 | 86 | /// Consumes the `BinaryHeap` and returns a vector in sorted 87 | /// (ascending) order. 88 | fn sift_up(&mut self, start: usize, mut pos: usize) { 89 | let element_idx = self.complete[pos]; 90 | let element = &self.medial[element_idx.idx as usize]; 91 | while pos > start { 92 | let parent = (pos - 1) / 2; 93 | let parent_idx = self.complete[parent]; 94 | if *element <= self.medial[parent_idx.idx as usize] { 95 | break; 96 | } 97 | self.complete[pos] = parent_idx; 98 | pos = parent; 99 | } 100 | self.complete[pos] = element_idx; 101 | } 102 | 103 | /// Take an element at `pos` and move it down the heap, 104 | /// while its children are larger. 105 | fn sift_down_range(&mut self, mut pos: usize, end: usize) { 106 | let element_idx = self.complete[pos]; 107 | let element = &self.medial[element_idx.idx as usize]; 108 | let mut child = 2 * pos + 1; 109 | while child < end { 110 | let right = child + 1; 111 | // compare with the greater of the two children 112 | if right < end && !(self.heap_get(child).unwrap() > self.heap_get(right).unwrap()) { 113 | child = right; 114 | } 115 | // if we are already in order, stop. 116 | if element >= self.heap_get(child).unwrap() { 117 | break; 118 | } 119 | self.complete[pos] = self.complete[child]; 120 | pos = child; 121 | child = 2 * pos + 1; 122 | } 123 | self.complete[pos] = element_idx; 124 | } 125 | 126 | fn sift_down(&mut self, pos: usize) { 127 | let len = self.complete.len(); 128 | self.sift_down_range(pos, len); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/debug.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use forest::Forest; 4 | use recognizer::Recognizer; 5 | 6 | impl<'g, F: Forest> fmt::Debug for Recognizer<'g, F> { 7 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 8 | write!(f, 9 | "Recognizer {{ grammar: {:?}, \ 10 | predicted: {:?}, medial: {:?}, \ 11 | complete: {:?}, indices: {:?}, \ 12 | current_medial_start: {:?}, earleme: {:?} }}", 13 | self.grammar, 14 | &self.predicted, 15 | &self.medial, 16 | &self.complete, 17 | &self.indices, 18 | &self.current_medial_start, 19 | &self.earleme 20 | ) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/events.rs: -------------------------------------------------------------------------------- 1 | use std::iter::{Zip, Chain}; 2 | use std::slice; 3 | 4 | use bit_matrix; 5 | use cfg::symbol::Symbol; 6 | 7 | use forest::Forest; 8 | use grammar::{ExternalDottedRule, Event}; 9 | use item::Item; 10 | use recognizer::Recognizer; 11 | 12 | type IterPredictionBitfield<'a> = bit_matrix::row::Iter<'a>; 13 | 14 | pub struct PredictedSymbols<'a> { 15 | pub(in super) iter: IterPredictionBitfield<'a>, 16 | pub(in super) idx: usize, 17 | } 18 | 19 | pub struct MedialItems<'a, N: 'a> { 20 | pub(in super) iter: slice::Iter<'a, Item>, 21 | } 22 | 23 | pub struct Prediction<'a, T: 'a> { 24 | iter: Zip, slice::Iter<'a, T>>, 25 | origin: usize, 26 | } 27 | 28 | pub struct Medial<'a, T: 'a, N: 'a> { 29 | events: &'a [T], 30 | items: MedialItems<'a, N>, 31 | } 32 | 33 | pub struct Events<'a, N: 'a> { 34 | iter: Chain< 35 | Prediction<'a, Event>, 36 | Medial<'a, Event, N> 37 | > 38 | } 39 | 40 | pub struct Distances<'a, N: 'a> { 41 | iter: Chain< 42 | Prediction<'a, Event>, 43 | Medial<'a, Event, N> 44 | > 45 | } 46 | 47 | pub struct Trace<'a, N: 'a> { 48 | iter: Chain< 49 | Prediction<'a, Option>, 50 | Medial<'a, Option, N> 51 | > 52 | } 53 | 54 | pub struct ExpectedTerminals<'a, N: 'a> { 55 | prev_scan_iter: MedialItems<'a, N>, 56 | rhs1: &'a [Option], 57 | } 58 | 59 | impl<'a> Iterator for PredictedSymbols<'a> { 60 | type Item = Symbol; 61 | 62 | fn next(&mut self) -> Option { 63 | for is_present in &mut self.iter { 64 | let symbol = Symbol::from(self.idx); 65 | self.idx += 1; 66 | if is_present { 67 | return Some(symbol); 68 | } 69 | } 70 | None 71 | } 72 | } 73 | 74 | impl<'a, N> Iterator for MedialItems<'a, N> { 75 | type Item = &'a Item; 76 | 77 | fn next(&mut self) -> Option { 78 | self.iter.next() 79 | } 80 | } 81 | 82 | impl<'a, T> Iterator for Prediction<'a, T> { 83 | type Item = (&'a T, usize); 84 | 85 | fn next(&mut self) -> Option { 86 | for (is_present, elem) in &mut self.iter { 87 | if is_present { 88 | return Some((elem, self.origin)); 89 | } 90 | } 91 | None 92 | } 93 | } 94 | 95 | impl<'a, T, L> Iterator for Medial<'a, T, L> { 96 | type Item = (&'a T, usize); 97 | 98 | fn next(&mut self) -> Option { 99 | let events = &self.events; 100 | self.items.next().map(|ei| { 101 | (&events[ei.dot as usize], ei.origin as usize) 102 | }) 103 | } 104 | } 105 | 106 | impl<'a, L> Iterator for Events<'a, L> { 107 | type Item = u32; 108 | 109 | fn next(&mut self) -> Option { 110 | for (&(event_id, _distance), _origin) in &mut self.iter { 111 | if event_id.is_some() { 112 | return event_id.into(); 113 | } 114 | } 115 | None 116 | } 117 | } 118 | 119 | impl<'a, L> Iterator for Distances<'a, L> { 120 | type Item = u32; 121 | 122 | fn next(&mut self) -> Option { 123 | for (&(_event_id, distance), _origin) in &mut self.iter { 124 | if distance.is_some() { 125 | return distance.into(); 126 | } 127 | } 128 | None 129 | } 130 | } 131 | 132 | impl<'a, N> Iterator for Trace<'a, N> { 133 | type Item = (ExternalDottedRule, usize); 134 | 135 | fn next(&mut self) -> Option<(ExternalDottedRule, usize)> { 136 | for (&external_dr_opt, origin) in &mut self.iter { 137 | if let Some(external_dotted_rule) = external_dr_opt { 138 | return Some((external_dotted_rule, origin)); 139 | } 140 | } 141 | None 142 | } 143 | } 144 | 145 | impl<'a, N> Iterator for ExpectedTerminals<'a, N> { 146 | type Item = Symbol; 147 | 148 | fn next(&mut self) -> Option { 149 | self.prev_scan_iter.next().map(|item| { 150 | self.rhs1[item.dot as usize].unwrap() 151 | }) 152 | } 153 | } 154 | 155 | impl<'g, F> Recognizer<'g, F> 156 | where F: Forest, 157 | { 158 | pub fn trace(&self) -> Trace { 159 | let trace = self.grammar.trace(); 160 | let prediction = Prediction { 161 | iter: self.predicted_symbols().iter.zip(trace[0].iter()), 162 | origin: self.earleme(), 163 | }; 164 | let medial = Medial { 165 | events: trace[1], 166 | items: self.medial_items(), 167 | }; 168 | Trace { 169 | iter: prediction.chain(medial), 170 | } 171 | } 172 | 173 | pub fn events(&self) -> Events { 174 | let (events_predict, events_flat) = self.grammar.events(); 175 | let prediction = Prediction { 176 | iter: self.predicted_symbols().iter.zip(events_predict.iter()), 177 | origin: self.earleme(), 178 | }; 179 | let medial = Medial { 180 | events: events_flat, 181 | items: self.medial_items(), 182 | }; 183 | Events { 184 | iter: prediction.chain(medial), 185 | } 186 | } 187 | 188 | pub fn minimal_distances(&self) -> Distances { 189 | Distances { 190 | iter: self.events().iter, 191 | } 192 | } 193 | 194 | pub fn expected_terminals(&self) -> ExpectedTerminals { 195 | ExpectedTerminals { 196 | prev_scan_iter: self.medial_items(), 197 | rhs1: self.grammar.rhs1(), 198 | } 199 | } 200 | } 201 | 202 | #[test] 203 | fn test_prediction_events() { 204 | use bit_matrix::BitMatrix; 205 | let mut bit_m = BitMatrix::new(1, 5); 206 | bit_m.set(0, 2, true); 207 | let mut row = bit_m.iter_row(0); 208 | assert_eq!(row.next(), Some(false)); 209 | assert_eq!(row.next(), Some(false)); 210 | assert_eq!(row.next(), Some(true)); 211 | assert_eq!(row.next(), Some(false)); 212 | assert_eq!(row.next(), Some(false)); 213 | assert_eq!(row.next(), None); 214 | let ev = [0, 1, 2, 3, 4]; 215 | let mut pred = Prediction { 216 | iter: bit_m.iter_row(0).zip(&ev[0..5]), 217 | origin: 123, 218 | }; 219 | assert_eq!(pred.next(), Some((&2, 123))); 220 | assert_eq!(pred.next(), None); 221 | } 222 | -------------------------------------------------------------------------------- /src/forest/bocage/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod node; 2 | pub mod order; 3 | pub mod traverse; 4 | 5 | use std::borrow::Borrow; 6 | use std::hint; 7 | 8 | use bit_vec::BitVec; 9 | use cfg::symbol::Symbol; 10 | use ref_slice::ref_slice; 11 | 12 | use forest::node_handle::NodeHandle; 13 | use forest::Forest; 14 | use grammar::InternalGrammar; 15 | use item::CompletedItem; 16 | 17 | use self::node::Node::*; 18 | use self::node::{CompactNode, Node, NULL_ACTION}; 19 | use self::order::Order; 20 | 21 | pub struct Bocage { 22 | pub(crate) graph: Vec, 23 | pub(crate) gc: MarkAndSweep, 24 | pub(crate) grammar: G, 25 | pub(crate) summand_count: u32, 26 | } 27 | 28 | pub(crate) struct MarkAndSweep { 29 | pub(crate) liveness: BitVec, 30 | // List for DFS and/or maybe relocation of stuff in the future. 31 | pub(crate) dfs: Vec, 32 | } 33 | 34 | impl Bocage 35 | where 36 | G: Borrow, 37 | { 38 | pub fn new(grammar: G) -> Self { 39 | Self::with_capacities(grammar, 1024, 32) 40 | } 41 | 42 | pub fn with_capacities(grammar: G, graph_cap: usize, dfs_cap: usize) -> Self { 43 | let mut result = Bocage { 44 | graph: Vec::with_capacity(graph_cap), 45 | gc: MarkAndSweep { 46 | liveness: BitVec::with_capacity(graph_cap), 47 | dfs: Vec::with_capacity(dfs_cap), 48 | }, 49 | grammar, 50 | summand_count: 0, 51 | }; 52 | result.initialize_nulling(); 53 | result 54 | } 55 | 56 | pub(crate) fn initialize_nulling(&mut self) { 57 | // TODO trivial grammar check 58 | // self.nulling_leaf_count = self.nulling_symbol_count(); 59 | let nulling_leaf_count = self.nulling_symbol_count(); 60 | // Ensure that `max` is not ridiculously large. 61 | assert!(nulling_leaf_count < (1 << 20), "invalid nullable symbol"); 62 | self.graph.extend((0..=nulling_leaf_count).map(|i| { 63 | NullingLeaf { 64 | symbol: Symbol::from(i), 65 | } 66 | .compact() 67 | })); 68 | for &(lhs, rhs0, rhs1) in self.grammar.borrow().eliminated_nulling_intermediate() { 69 | self.set( 70 | NodeHandle::nulling(lhs), 71 | Product { 72 | left_factor: NodeHandle::nulling(rhs0), 73 | right_factor: Some(NodeHandle::nulling(rhs1)), 74 | action: NULL_ACTION, 75 | }, 76 | ); 77 | } 78 | } 79 | 80 | fn nulling_symbol_count(&self) -> usize { 81 | self.grammar.borrow().max_nulling_symbol().unwrap_or(0) 82 | } 83 | 84 | #[inline] 85 | pub fn mark_alive(&mut self, root: NodeHandle, mut order: O) { 86 | self.gc.liveness.clear(); 87 | self.gc.liveness.grow(self.graph.len(), false); 88 | self.gc.dfs.push(root); 89 | while let Some(node) = self.gc.dfs.pop() { 90 | self.gc.liveness.set(node.usize(), true); 91 | let summands = Bocage::::summands(&self.graph, node); 92 | let summands = order.sum(summands); 93 | for summand in summands { 94 | self.postprocess_product_tree_node(summand); 95 | // TODO: use order for products. 96 | self.gc.dfs_queue_factors(summand); 97 | } 98 | } 99 | } 100 | 101 | #[inline] 102 | fn summands(graph: &Vec, node: NodeHandle) -> &[CompactNode] { 103 | unsafe { 104 | match graph.get_unchecked(node.usize()).expand() { 105 | Sum { count, .. } => { 106 | // back 107 | // let start = node.usize() - count as usize - 1; 108 | // let end = node.usize() - 1; 109 | let start = node.usize() + 1; 110 | let end = node.usize() + count as usize + 1; 111 | graph.get_unchecked(start..end) 112 | } 113 | _ => ref_slice(graph.get_unchecked(node.usize())), 114 | } 115 | } 116 | } 117 | 118 | #[inline] 119 | fn postprocess_product_tree_node(&self, node: &CompactNode) { 120 | if let Product { 121 | left_factor: factor, 122 | right_factor: None, 123 | action, 124 | } = node.expand() 125 | { 126 | // Add omitted phantom syms here. 127 | if let Some((sym, dir)) = self.grammar.borrow().nulling(action) { 128 | let (left, right) = if dir { 129 | (factor, NodeHandle::nulling(sym)) 130 | } else { 131 | (NodeHandle::nulling(sym), factor) 132 | }; 133 | node.set(Product { 134 | left_factor: left, 135 | right_factor: Some(right), 136 | action, 137 | }); 138 | } 139 | } 140 | } 141 | 142 | #[inline] 143 | fn set(&self, idx: NodeHandle, node: Node) { 144 | self.graph[idx.usize()].set(node); 145 | } 146 | 147 | #[inline] 148 | pub(super) fn is_transparent(&self, action: u32) -> bool { 149 | action == NULL_ACTION || self.grammar.borrow().external_origin(action).is_none() 150 | } 151 | 152 | // fn mark_and_sweep(&mut self, root: NodeHandle) { 153 | // self.mark_alive(root); 154 | // self.sweep_garbage(); 155 | // self.update_nulling_leaf_count(); 156 | // } 157 | 158 | // fn sweep_garbage(&mut self) { 159 | // let count = self.relocate_marked(); 160 | // self.graph.truncate(count); 161 | // } 162 | 163 | // fn update_nulling_leaf_count(&mut self) { 164 | // let prev_count = self.nulling_leaf_count; 165 | // self.nulling_leaf_count = self.gc.liveness.iter().take(prev_count).filter(|x| x).count(); 166 | // } 167 | 168 | // fn relocate_marked(&mut self) -> usize { 169 | // let mut destination = self.graph.iter(); 170 | // let mut count = 0; 171 | // // ... TODO: relocate 172 | // for (alive, source) in self.gc.liveness.iter().zip(self.graph.iter()) { 173 | // if alive { 174 | // destination.next().unwrap().cell.set(*source); 175 | // count += 1; 176 | // } 177 | // } 178 | // count 179 | // } 180 | } 181 | 182 | impl MarkAndSweep { 183 | #[inline] 184 | fn dfs_queue_factors(&mut self, summand: &CompactNode) { 185 | match summand.expand() { 186 | Product { 187 | left_factor, 188 | right_factor, 189 | .. 190 | } => { 191 | if let Some(factor) = right_factor { 192 | if let Some(false) = self.liveness.get(factor.usize()) { 193 | self.dfs.push(factor); 194 | } 195 | } 196 | if let Some(false) = self.liveness.get(left_factor.usize()) { 197 | self.dfs.push(left_factor); 198 | } 199 | } 200 | NullingLeaf { .. } | Evaluated { .. } => {} 201 | Sum { .. } => unreachable!(), 202 | } 203 | } 204 | } 205 | 206 | impl Forest for Bocage { 207 | type NodeRef = NodeHandle; 208 | type LeafValue = u32; 209 | 210 | const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize = 2; 211 | 212 | #[inline] 213 | fn begin_sum(&mut self) { 214 | // nothing to do 215 | } 216 | 217 | #[inline] 218 | fn push_summand(&mut self, item: CompletedItem) { 219 | self.graph.push( 220 | Product { 221 | action: item.dot, 222 | left_factor: item.left_node, 223 | right_factor: item.right_node, 224 | } 225 | .compact(), 226 | ); 227 | self.summand_count += 1; 228 | } 229 | 230 | #[inline] 231 | fn sum(&mut self, lhs_sym: Symbol, _origin: u32) -> Self::NodeRef { 232 | let result = unsafe { 233 | match self.summand_count { 234 | 0 => hint::unreachable_unchecked(), 235 | 1 => NodeHandle(self.graph.len() as u32 - 1), 236 | summand_count => { 237 | // Slower case: ambiguous node. 238 | let first_summand_idx = self.graph.len() - summand_count as usize; 239 | let first_summand = self.graph.get_unchecked(first_summand_idx).clone(); 240 | self.graph.push(first_summand); 241 | *self.graph.get_unchecked_mut(first_summand_idx) = Sum { 242 | nonterminal: lhs_sym, 243 | count: self.summand_count as u32, 244 | } 245 | .compact(); 246 | NodeHandle(first_summand_idx as u32) 247 | } 248 | } 249 | }; 250 | self.summand_count = 0; 251 | result 252 | } 253 | 254 | #[inline] 255 | fn leaf(&mut self, token: Symbol, _pos: u32, value: Self::LeafValue) -> Self::NodeRef { 256 | let result = NodeHandle(self.graph.len() as u32); 257 | self.graph.push( 258 | Evaluated { 259 | symbol: token, 260 | values: value, 261 | } 262 | .compact(), 263 | ); 264 | result 265 | } 266 | 267 | #[inline] 268 | fn nulling(&self, token: Symbol) -> Self::NodeRef { 269 | NodeHandle::nulling(token) 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /src/forest/bocage/node.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::hint; 3 | 4 | use cfg::symbol::Symbol; 5 | 6 | pub use self::Node::*; 7 | use self::Tag::*; 8 | use forest::node_handle::{NodeHandle, NULL_HANDLE}; 9 | 10 | // Node variants `Sum`/`Product` are better known in literature as `OR`/`AND`. 11 | #[derive(Copy, Clone, Debug)] 12 | pub enum Node { 13 | Sum { 14 | /// 8 bytes. 15 | /// Invariant: count > 1. 16 | /// Invariant: This node can only be directly followed by `Product`. 17 | nonterminal: Symbol, 18 | count: u32, 19 | }, 20 | Product { 21 | /// 12+ bytes. 22 | action: u32, 23 | left_factor: NodeHandle, 24 | right_factor: Option, 25 | }, 26 | NullingLeaf { 27 | /// 4 bytes. 28 | symbol: Symbol, 29 | }, 30 | Evaluated { 31 | /// 8 bytes. 32 | symbol: Symbol, 33 | values: u32, 34 | }, 35 | } 36 | 37 | #[derive(Clone)] 38 | pub struct CompactNode { 39 | cell: Cell<[CompactField; 3]>, 40 | } 41 | 42 | // Node variants `Sum`/`Product` are better known in literature as `OR`/`AND`. 43 | #[derive(Copy, Clone)] 44 | union CompactField { 45 | // sum 46 | nonterminal: Symbol, 47 | count: u32, 48 | 49 | // product 50 | action: u32, 51 | factor: NodeHandle, 52 | // right_factor: NodeHandle, 53 | 54 | // leaf 55 | symbol: Symbol, 56 | values: u32, 57 | 58 | // tag 59 | tag: u32, 60 | } 61 | 62 | #[derive(Copy, Clone)] 63 | enum Tag { 64 | LeafTag = 0b00 << TAG_BIT, 65 | SumTag = 0b01 << TAG_BIT, 66 | ProductTag = 0b10 << TAG_BIT, 67 | } 68 | 69 | impl Tag { 70 | #[inline] 71 | fn from_u32(n: u32) -> Option { 72 | let n = n & TAG_MASK; 73 | if n == LeafTag.to_u32() { 74 | Some(LeafTag) 75 | } else if n == SumTag.to_u32() { 76 | Some(SumTag) 77 | } else if n == ProductTag.to_u32() { 78 | Some(ProductTag) 79 | } else { 80 | None 81 | } 82 | } 83 | 84 | #[inline] 85 | fn to_u32(&self) -> u32 { 86 | match *self { 87 | LeafTag => 0b00 << TAG_BIT, 88 | SumTag => 0b01 << TAG_BIT, 89 | ProductTag => 0b10 << TAG_BIT, 90 | } 91 | } 92 | } 93 | 94 | const TAG_BIT: usize = 30; 95 | const TAG_MASK: u32 = 0b11 << TAG_BIT; 96 | const NULL_VALUES: u32 = 0xFFFF_FFFF; 97 | pub(super) const NULL_ACTION: u32 = !TAG_MASK; 98 | 99 | impl Node { 100 | #[inline] 101 | pub(super) fn compact(self) -> CompactNode { 102 | let mut fields = match self { 103 | Product { 104 | left_factor, 105 | right_factor, 106 | action, 107 | } => { 108 | let right_factor = right_factor.unwrap_or(NULL_HANDLE); 109 | [ 110 | CompactField { action }, 111 | CompactField { 112 | factor: left_factor, 113 | }, 114 | CompactField { 115 | factor: right_factor, 116 | }, 117 | ] 118 | } 119 | Sum { nonterminal, count } => [ 120 | CompactField { nonterminal }, 121 | CompactField { count }, 122 | CompactField { tag: 0 }, 123 | ], 124 | NullingLeaf { symbol } => [ 125 | CompactField { symbol }, 126 | CompactField { 127 | values: NULL_VALUES, 128 | }, 129 | CompactField { tag: 0 }, 130 | ], 131 | Evaluated { symbol, values } => [ 132 | CompactField { symbol }, 133 | CompactField { values }, 134 | CompactField { tag: 0 }, 135 | ], 136 | }; 137 | unsafe { 138 | set_tag(&mut fields, self.tag()); 139 | } 140 | CompactNode { 141 | cell: Cell::new(fields), 142 | } 143 | } 144 | 145 | #[inline] 146 | fn tag(&self) -> Tag { 147 | match self { 148 | Product { .. } => ProductTag, 149 | Sum { .. } => SumTag, 150 | NullingLeaf { .. } | Evaluated { .. } => LeafTag, 151 | } 152 | } 153 | } 154 | 155 | impl CompactNode { 156 | #[inline] 157 | pub(super) fn set(&self, node: Node) { 158 | self.cell.set(node.compact().cell.get()); 159 | } 160 | 161 | #[inline] 162 | pub(super) fn expand(&self) -> Node { 163 | let mut fields = self.cell.get(); 164 | unsafe { 165 | let tag = get_and_erase_tag(&mut fields); 166 | match tag { 167 | LeafTag => { 168 | if fields[1].values == NULL_VALUES { 169 | NullingLeaf { 170 | symbol: fields[0].symbol, 171 | } 172 | } else { 173 | Evaluated { 174 | symbol: fields[0].symbol, 175 | values: fields[1].values, 176 | } 177 | } 178 | } 179 | ProductTag => Product { 180 | action: fields[0].action, 181 | left_factor: fields[1].factor, 182 | right_factor: fields[2].factor.to_option(), 183 | }, 184 | SumTag => Sum { 185 | nonterminal: fields[0].nonterminal, 186 | count: fields[1].count, 187 | }, 188 | } 189 | } 190 | } 191 | } 192 | 193 | #[inline] 194 | unsafe fn unwrap_unchecked(opt: Option) -> T { 195 | match opt { 196 | Some(val) => val, 197 | None => hint::unreachable_unchecked(), 198 | } 199 | } 200 | 201 | #[inline] 202 | unsafe fn set_tag(fields: &mut [CompactField; 3], tag: Tag) { 203 | fields[0].tag |= tag.to_u32(); 204 | } 205 | 206 | #[inline] 207 | unsafe fn get_and_erase_tag(fields: &mut [CompactField; 3]) -> Tag { 208 | let &mut CompactField { ref mut tag } = &mut fields[0]; 209 | let extract_tag = *tag; 210 | *tag = *tag & !TAG_MASK; 211 | unwrap_unchecked(Tag::from_u32(extract_tag)) 212 | } 213 | -------------------------------------------------------------------------------- /src/forest/bocage/order.rs: -------------------------------------------------------------------------------- 1 | use cfg::symbol::Symbol; 2 | 3 | use super::node::CompactNode; 4 | 5 | pub trait Order { 6 | /// Apply the order to sum node alternatives. 7 | fn sum<'b>(&mut self, alternatives: &'b [CompactNode]) -> &'b [CompactNode] { 8 | alternatives 9 | } 10 | 11 | /// Apply the order to product node factors. 12 | fn product(&mut self, _factors: &[(Symbol, u32)]) -> Option { 13 | None 14 | } 15 | } 16 | 17 | #[derive(Default)] 18 | pub struct NullOrder; 19 | 20 | impl Order for NullOrder {} 21 | 22 | impl NullOrder { 23 | pub fn new() -> Self { 24 | NullOrder 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/forest/bocage/traverse.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::slice; 3 | 4 | use bit_vec; 5 | use cfg::symbol::Symbol; 6 | use ref_slice::ref_slice; 7 | 8 | use forest::bocage::node::Node::*; 9 | use forest::bocage::node::{CompactNode, Node}; 10 | use forest::node_handle::NodeHandle; 11 | use forest::Bocage; 12 | use grammar::InternalGrammar; 13 | 14 | pub use self::HandleVariant::*; 15 | 16 | impl Bocage { 17 | // Once node liveness is marked, you may traverse the nodes. 18 | pub fn traverse(&self) -> Traverse { 19 | Traverse { 20 | bocage: self, 21 | graph_iter: self.graph.iter(), 22 | liveness_iter: self.gc.liveness.iter(), 23 | factor_stack: vec![], 24 | factor_traversal: vec![], 25 | } 26 | } 27 | } 28 | 29 | pub struct Traverse<'f, G> { 30 | bocage: &'f Bocage, 31 | // main iterators 32 | graph_iter: slice::Iter<'f, CompactNode>, 33 | liveness_iter: bit_vec::Iter<'f>, 34 | // Space for unrolling factors 35 | factor_stack: Vec<(Symbol, u32)>, 36 | // Scratch space for traversal 37 | factor_traversal: Vec, 38 | } 39 | 40 | impl<'f, G> Traverse<'f, G> 41 | where 42 | G: Borrow, 43 | { 44 | pub fn next_node<'t>(&'t mut self) -> Option> { 45 | while let (Some(node), Some(alive)) = (self.graph_iter.next(), self.liveness_iter.next()) { 46 | if !alive { 47 | continue; 48 | } 49 | match node.expand() { 50 | Product { action, .. } => { 51 | if self.bocage.is_transparent(action) { 52 | continue; 53 | } 54 | return Some(TraversalHandle { 55 | node, 56 | symbol: self.bocage.grammar.borrow().get_lhs(action), 57 | item: SumHandle(Products { 58 | products: ref_slice(node).iter(), 59 | traverse: self, 60 | }), 61 | }); 62 | } 63 | Sum { 64 | nonterminal: symbol, 65 | count, 66 | } => { 67 | let products = self.graph_iter.as_slice()[..count as usize].iter(); 68 | for _ in 0..count { 69 | self.graph_iter.next(); 70 | self.liveness_iter.next(); 71 | } 72 | return Some(TraversalHandle { 73 | node, 74 | symbol, 75 | item: SumHandle(Products { 76 | products, 77 | traverse: self, 78 | }), 79 | }); 80 | } 81 | NullingLeaf { symbol } => { 82 | return Some(TraversalHandle { 83 | node, 84 | symbol, 85 | item: NullingHandle, 86 | }); 87 | } 88 | Evaluated { symbol, values } => { 89 | return Some(TraversalHandle { 90 | node, 91 | symbol, 92 | item: LeafHandle(values), 93 | }); 94 | } 95 | } 96 | } 97 | None 98 | } 99 | 100 | fn unfold_factors(&mut self, left: NodeHandle, right: Option) { 101 | self.factor_stack.clear(); 102 | self.enqueue_for_unfold(left, right); 103 | while let Some(node) = self.pop_for_unfold() { 104 | match node { 105 | Product { 106 | left_factor, 107 | right_factor, 108 | .. 109 | } => { 110 | self.enqueue_for_unfold(left_factor, right_factor); 111 | } 112 | Evaluated { symbol, values } => { 113 | self.factor_stack.push((symbol, values)); 114 | } 115 | _ => unreachable!(), 116 | } 117 | } 118 | } 119 | 120 | fn enqueue_for_unfold(&mut self, left: NodeHandle, right: Option) { 121 | if let Some(right) = right { 122 | self.factor_traversal.push(right); 123 | } 124 | self.factor_traversal.push(left); 125 | } 126 | 127 | fn pop_for_unfold(&mut self) -> Option { 128 | self.factor_traversal.pop().map(|handle| { 129 | let node = self.bocage.graph[handle.usize()].clone(); 130 | node.expand() 131 | }) 132 | } 133 | } 134 | 135 | pub struct TraversalHandle<'f, 't, G> { 136 | pub node: &'f CompactNode, 137 | pub symbol: Symbol, 138 | pub item: HandleVariant<'f, 't, G>, 139 | } 140 | 141 | pub enum HandleVariant<'f, 't, G> { 142 | SumHandle(Products<'f, 't, G>), 143 | NullingHandle, 144 | LeafHandle(u32), 145 | } 146 | 147 | pub struct Products<'f, 't, G> { 148 | products: slice::Iter<'f, CompactNode>, 149 | traverse: &'t mut Traverse<'f, G>, 150 | } 151 | 152 | pub struct ProductHandle<'t> { 153 | pub action: u32, 154 | pub factors: &'t [(Symbol, u32)], 155 | } 156 | 157 | impl<'f, 't, G> Products<'f, 't, G> 158 | where 159 | G: Borrow, 160 | { 161 | pub fn next_product<'p>(&'p mut self) -> Option { 162 | while let Some(node) = self.products.next() { 163 | match node.expand() { 164 | Product { 165 | left_factor, 166 | right_factor, 167 | action, 168 | } => { 169 | let origin = self 170 | .traverse 171 | .bocage 172 | .grammar 173 | .borrow() 174 | .external_origin(action); 175 | if let Some(action) = origin { 176 | self.traverse.unfold_factors(left_factor, right_factor); 177 | return Some(ProductHandle { 178 | action, 179 | factors: &self.traverse.factor_stack[..], 180 | }); 181 | } 182 | } 183 | _ => unreachable!(), 184 | } 185 | } 186 | None 187 | } 188 | } 189 | 190 | impl<'f, 't, G> TraversalHandle<'f, 't, G> { 191 | pub fn set_evaluation_result(&self, values: u32) { 192 | self.node.set(Evaluated { 193 | symbol: self.symbol, 194 | values, 195 | }); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/forest/compact_bocage/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod node; 2 | pub mod order; 3 | pub mod traverse; 4 | 5 | use std::borrow::Borrow; 6 | use std::hint; 7 | 8 | use bit_vec::BitVec; 9 | use cfg::symbol::Symbol; 10 | 11 | use forest::node_handle::NodeHandle; 12 | use forest::Forest; 13 | use grammar::InternalGrammar; 14 | use item::CompletedItem; 15 | 16 | use self::node::Node::*; 17 | use self::node::{Graph, Node, NULL_ACTION}; 18 | use self::order::Order; 19 | 20 | pub struct CompactBocage { 21 | pub(crate) graph: Graph, 22 | pub(crate) gc: MarkAndSweep, 23 | pub(crate) grammar: G, 24 | pub(crate) first_summand: NodeHandle, 25 | pub(crate) summand_count: u32, 26 | } 27 | 28 | pub(crate) struct MarkAndSweep { 29 | pub(crate) liveness: BitVec, 30 | // List for DFS and/or maybe relocation of stuff in the future. 31 | pub(crate) dfs: Vec, 32 | } 33 | 34 | impl CompactBocage 35 | where 36 | G: Borrow, 37 | { 38 | pub fn new(grammar: G) -> Self { 39 | Self::with_capacities(grammar, 1024, 32) 40 | } 41 | 42 | pub fn with_capacities(grammar: G, graph_cap: usize, dfs_cap: usize) -> Self { 43 | let mut result = CompactBocage { 44 | graph: Graph::with_capacity(graph_cap), 45 | gc: MarkAndSweep { 46 | liveness: BitVec::with_capacity(graph_cap), 47 | dfs: Vec::with_capacity(dfs_cap), 48 | }, 49 | grammar, 50 | summand_count: 0, 51 | first_summand: NodeHandle(0), 52 | }; 53 | result.initialize_nulling(); 54 | result 55 | } 56 | 57 | pub(crate) fn initialize_nulling(&mut self) { 58 | // TODO trivial grammar check 59 | // self.nulling_leaf_count = self.nulling_symbol_count(); 60 | let nulling_leaf_count = self.nulling_symbol_count(); 61 | // Ensure that `max` is not ridiculously large. 62 | assert!(nulling_leaf_count < (1 << 20), "invalid nullable symbol"); 63 | let mut graph: Vec = (0..nulling_leaf_count) 64 | .map(|i| NullingLeaf { 65 | symbol: Symbol::from(i), 66 | }) 67 | .collect(); 68 | for &(lhs, rhs0, rhs1) in self.grammar.borrow().eliminated_nulling_intermediate() { 69 | graph[lhs.usize()] = Product { 70 | left_factor: NodeHandle::nulling(rhs0), 71 | right_factor: Some(NodeHandle::nulling(rhs1)), 72 | action: NULL_ACTION, 73 | }; 74 | } 75 | let mut pos = 0; 76 | let mut relocation = vec![]; 77 | for node in &graph { 78 | relocation.push(NodeHandle(pos)); 79 | pos += node.classify(pos).size() as u32; 80 | } 81 | for node in graph { 82 | match node { 83 | Product { 84 | action, 85 | left_factor, 86 | right_factor, 87 | } => { 88 | self.graph.push(Product { 89 | action, 90 | left_factor: relocation[left_factor.usize()], 91 | right_factor: right_factor.map(|f| relocation[f.usize()]), 92 | }); 93 | } 94 | other => { 95 | self.graph.push(other); 96 | } 97 | } 98 | } 99 | } 100 | 101 | fn nulling_symbol_count(&self) -> usize { 102 | // why 1? 103 | self.grammar 104 | .borrow() 105 | .max_nulling_symbol() 106 | .map_or(1, |m| m + 1) 107 | } 108 | 109 | #[inline] 110 | pub fn mark_alive(&mut self, root: NodeHandle, _order: O) { 111 | self.gc.liveness.clear(); 112 | self.gc.liveness.grow(self.graph.vec.len(), false); 113 | self.gc.dfs.push(root); 114 | while let Some(node) = self.gc.dfs.pop() { 115 | self.gc.liveness.set(node.usize(), true); 116 | let summands = CompactBocage::::summands(&self.graph, node); 117 | // let summands = order.sum(summands); 118 | for summand in summands { 119 | // TODO: use order for products. 120 | self.gc.dfs_queue_factors(summand); 121 | } 122 | } 123 | } 124 | 125 | #[inline] 126 | fn summands<'a>(graph: &'a Graph, node: NodeHandle) -> impl Iterator + 'a { 127 | let mut iter = graph.iter_from(node); 128 | match iter.peek() { 129 | Some(Sum { count, .. }) => { 130 | iter.next(); 131 | iter.take(count as usize) 132 | } 133 | _ => iter.take(1), 134 | } 135 | } 136 | 137 | #[inline] 138 | fn process_product_tree_node(&self, mut node: Node) -> Node { 139 | match node { 140 | Product { 141 | ref mut left_factor, 142 | ref mut right_factor, 143 | action, 144 | } => { 145 | if right_factor.is_none() { 146 | // Add omitted phantom syms here. 147 | if let Some((sym, dir)) = self.grammar.borrow().nulling(action) { 148 | let (left, right) = if dir { 149 | (*left_factor, NodeHandle::nulling(sym)) 150 | } else { 151 | (NodeHandle::nulling(sym), *left_factor) 152 | }; 153 | *left_factor = left; 154 | *right_factor = Some(right); 155 | } 156 | } 157 | } 158 | _ => {} 159 | } 160 | node 161 | } 162 | 163 | #[inline] 164 | pub(super) fn is_transparent(&self, action: u32) -> bool { 165 | action == NULL_ACTION || self.grammar.borrow().external_origin(action).is_none() 166 | } 167 | 168 | // fn mark_and_sweep(&mut self, root: NodeHandle) { 169 | // self.mark_alive(root); 170 | // self.sweep_garbage(); 171 | // self.update_nulling_leaf_count(); 172 | // } 173 | 174 | // fn sweep_garbage(&mut self) { 175 | // let count = self.relocate_marked(); 176 | // self.graph.truncate(count); 177 | // } 178 | 179 | // fn update_nulling_leaf_count(&mut self) { 180 | // let prev_count = self.nulling_leaf_count; 181 | // self.nulling_leaf_count = self.gc.liveness.iter().take(prev_count).filter(|x| x).count(); 182 | // } 183 | 184 | // fn relocate_marked(&mut self) -> usize { 185 | // let mut destination = self.graph.iter(); 186 | // let mut count = 0; 187 | // // ... TODO: relocate 188 | // for (alive, source) in self.gc.liveness.iter().zip(self.graph.iter()) { 189 | // if alive { 190 | // destination.next().unwrap().cell.set(*source); 191 | // count += 1; 192 | // } 193 | // } 194 | // count 195 | // } 196 | } 197 | 198 | impl MarkAndSweep { 199 | #[inline] 200 | fn dfs_queue_factors(&mut self, summand: Node) { 201 | match summand { 202 | Product { 203 | left_factor, 204 | right_factor, 205 | .. 206 | } => { 207 | if let Some(factor) = right_factor { 208 | if let Some(false) = self.liveness.get(factor.usize()) { 209 | self.dfs.push(factor); 210 | } 211 | } 212 | if let Some(false) = self.liveness.get(left_factor.usize()) { 213 | self.dfs.push(left_factor); 214 | } 215 | } 216 | NullingLeaf { .. } | Evaluated { .. } => {} 217 | Sum { .. } => unreachable!(), 218 | } 219 | } 220 | } 221 | 222 | impl Forest for CompactBocage 223 | where 224 | G: Borrow, 225 | { 226 | type NodeRef = NodeHandle; 227 | type LeafValue = u32; 228 | 229 | const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize = 2; 230 | 231 | #[inline] 232 | fn begin_sum(&mut self) { 233 | self.first_summand = NodeHandle(self.graph.vec.len() as u32); 234 | } 235 | 236 | #[inline] 237 | fn push_summand(&mut self, item: CompletedItem) { 238 | self.graph.push(self.process_product_tree_node(Product { 239 | action: item.dot, 240 | left_factor: item.left_node, 241 | right_factor: item.right_node, 242 | })); 243 | self.summand_count += 1; 244 | } 245 | 246 | #[inline] 247 | fn sum(&mut self, lhs_sym: Symbol, _origin: u32) -> Self::NodeRef { 248 | unsafe { 249 | match self.summand_count { 250 | 0 => hint::unreachable_unchecked(), 251 | 1 => {} 252 | summand_count => { 253 | // Slower case: ambiguous node. 254 | let sum = Sum { 255 | nonterminal: lhs_sym, 256 | count: summand_count, 257 | }; 258 | self.graph.set_up(self.first_summand, sum); 259 | } 260 | } 261 | }; 262 | let result = self.first_summand; 263 | self.summand_count = 0; 264 | result 265 | } 266 | 267 | #[inline] 268 | fn leaf(&mut self, token: Symbol, _pos: u32, _value: Self::LeafValue) -> Self::NodeRef { 269 | self.graph.push(Evaluated { symbol: token }) 270 | } 271 | 272 | #[inline] 273 | fn nulling(&self, token: Symbol) -> Self::NodeRef { 274 | NodeHandle::nulling(token) 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /src/forest/compact_bocage/node.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::hint; 3 | 4 | use cfg::symbol::Symbol; 5 | 6 | pub use self::Node::*; 7 | use self::Tag::*; 8 | use forest::node_handle::{NodeHandle, NULL_HANDLE}; 9 | 10 | pub struct Graph { 11 | pub(crate) vec: Vec>, 12 | } 13 | 14 | impl Graph { 15 | pub(crate) fn with_capacity(capacity: usize) -> Self { 16 | Graph { 17 | vec: Vec::with_capacity(capacity), 18 | } 19 | } 20 | 21 | pub(crate) fn push(&mut self, node: Node) -> NodeHandle { 22 | let position = self.vec.len() as u32; 23 | let (node_repr, size) = node.to_repr(position); 24 | unsafe { 25 | self.vec 26 | .extend(node_repr.fields[..size].iter().cloned().map(Cell::new)); 27 | } 28 | NodeHandle(position) 29 | } 30 | 31 | pub(crate) fn set_up(&mut self, mut handle: NodeHandle, node: Node) { 32 | let (node_repr, size) = node.to_repr(handle.0); 33 | let mut current_handle = handle; 34 | while current_handle.usize() < handle.usize() + size { 35 | let current_node = self.get(current_handle); 36 | self.push(current_node); 37 | current_handle.0 += current_node.classify(current_handle.0).size() as u32; 38 | } 39 | for i in 0..size { 40 | unsafe { 41 | self.vec[handle.usize() + i].set(node_repr.fields[i]); 42 | } 43 | } 44 | handle.0 += size as u32; 45 | while handle.0 < current_handle.0 { 46 | self.vec[handle.usize()].set(NopTag.to_u16()); 47 | handle.0 += 1; 48 | } 49 | } 50 | 51 | pub(crate) fn get(&self, handle: NodeHandle) -> Node { 52 | self.iter_from(handle).next().unwrap() 53 | } 54 | 55 | pub(crate) fn iter_from(&self, handle: NodeHandle) -> Iter { 56 | Iter { 57 | vec: &self.vec[..], 58 | handle, 59 | } 60 | } 61 | } 62 | 63 | #[derive(Clone, Copy)] 64 | pub(crate) struct Iter<'a> { 65 | pub(crate) vec: &'a [Cell], 66 | pub(crate) handle: NodeHandle, 67 | } 68 | 69 | impl<'a> Iterator for Iter<'a> { 70 | type Item = Node; 71 | 72 | fn next(&mut self) -> Option { 73 | unsafe { 74 | let head = if let Some(head) = self.vec.get(self.handle.usize()).cloned() { 75 | head.get() 76 | } else { 77 | return None; 78 | }; 79 | let (tag, head) = get_and_erase_tag(head); 80 | if let NopTag = tag { 81 | self.handle.0 += 1; 82 | self.next() 83 | } else { 84 | let mut node_repr = NodeRepr { fields: [0; 6] }; 85 | node_repr.fields[0] = head; 86 | let slice = &self.vec[self.handle.usize() + 1..self.handle.usize() + tag.size()]; 87 | for (i, val) in slice.iter().enumerate() { 88 | node_repr.fields[1 + i] = val.get(); 89 | } 90 | let result = node_repr.expand(tag, self.handle.0); 91 | self.handle.0 += tag.size() as u32; 92 | Some(result) 93 | } 94 | } 95 | } 96 | } 97 | 98 | impl<'a> Iter<'a> { 99 | #[inline] 100 | pub(crate) fn peek(&mut self) -> Option { 101 | self.clone().next() 102 | } 103 | } 104 | 105 | // Node variants `Sum`/`Product` are better known in literature as `OR`/`AND`. 106 | #[derive(Copy, Clone, Debug, Eq, PartialEq)] 107 | pub enum Node { 108 | Sum { 109 | /// 8 bytes. 110 | /// Invariant: count > 1. 111 | /// Invariant: This node can only be directly followed by `Product`. 112 | count: u32, 113 | nonterminal: Symbol, 114 | }, 115 | Product { 116 | /// 12+ bytes. 117 | action: u32, 118 | left_factor: NodeHandle, 119 | right_factor: Option, 120 | }, 121 | NullingLeaf { 122 | /// 4 bytes. 123 | symbol: Symbol, 124 | }, 125 | Evaluated { 126 | /// 4 bytes. 127 | symbol: Symbol, 128 | }, 129 | } 130 | 131 | #[derive(Clone, Copy)] 132 | union NodeRepr { 133 | fields: [u16; 6], 134 | small_sum: SmallSumRepr, 135 | small_link: SmallLinkRepr, 136 | medium_link: MediumLinkRepr, 137 | small_product: SmallProductRepr, 138 | small_leaf: SmallLeafRepr, 139 | small_nulling_leaf: SmallNullingLeafRepr, 140 | sum: SumRepr, 141 | product: ProductRepr, 142 | leaf: LeafRepr, 143 | nop: NopRepr, 144 | } 145 | 146 | #[derive(Clone, Copy)] 147 | struct SmallSumRepr { 148 | nonterminal: u8, 149 | // smaller (big end position) 150 | count: u8, 151 | } 152 | 153 | #[derive(Clone, Copy)] 154 | struct SumRepr { 155 | count: u32, 156 | nonterminal: Symbol, 157 | } 158 | 159 | #[derive(Clone, Copy)] 160 | struct SmallLinkRepr { 161 | action: u8, 162 | // smaller (big end position) 163 | distance: u8, 164 | } 165 | 166 | #[derive(Clone, Copy)] 167 | struct MediumLinkRepr { 168 | distance: u16, 169 | action: u16, 170 | } 171 | 172 | #[derive(Clone, Copy)] 173 | struct SmallProductRepr { 174 | left_distance: u8, 175 | // smaller (big end position) 176 | right_distance: u8, 177 | action: u16, 178 | } 179 | 180 | #[derive(Clone, Copy)] 181 | #[repr(packed)] 182 | struct ProductRepr { 183 | upper_action: u16, 184 | lower_action: u16, 185 | left_factor: NodeHandle, 186 | right_factor: NodeHandle, 187 | } 188 | 189 | #[derive(Clone, Copy)] 190 | struct SmallNullingLeafRepr { 191 | symbol: u16, 192 | } 193 | 194 | #[derive(Clone, Copy)] 195 | struct LeafRepr { 196 | symbol: Symbol, 197 | } 198 | 199 | #[derive(Clone, Copy)] 200 | struct SmallLeafRepr { 201 | symbol: u16, 202 | } 203 | 204 | #[derive(Clone, Copy)] 205 | struct NopRepr { 206 | nop: u16, 207 | } 208 | 209 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 210 | pub(super) enum Tag { 211 | SmallSumTag = 0b000 << TAG_BIT, 212 | SmallLinkTag = 0b001 << TAG_BIT, 213 | MediumLinkTag = 0b010 << TAG_BIT, 214 | SmallProductTag = 0b011 << TAG_BIT, 215 | SmallLeafTag = 0b100 << TAG_BIT, 216 | // SmallNonnullingLeaf = 0b1000 << (TAG_BIT - 1), 217 | SmallNullingLeafTag = 0b1001 << (TAG_BIT - 1), 218 | LeafTag = 0b101 << TAG_BIT, 219 | SumTag = 0b111 << TAG_BIT, 220 | ProductTag = 0b110 << TAG_BIT, 221 | NopTag = 0b1111_1111_1111_1111, 222 | } 223 | 224 | impl Tag { 225 | #[inline] 226 | fn from_u16(num: u16) -> Option { 227 | let n = num & TAG_MASK; 228 | if num == NopTag.to_u16() { 229 | Some(NopTag) 230 | } else if n == LeafTag.to_u16() { 231 | Some(LeafTag) 232 | } else if n == SumTag.to_u16() { 233 | Some(SumTag) 234 | } else if n == ProductTag.to_u16() { 235 | Some(ProductTag) 236 | } else if n == SmallSumTag.to_u16() { 237 | Some(SmallSumTag) 238 | } else if n == SmallLinkTag.to_u16() { 239 | Some(SmallLinkTag) 240 | } else if n == MediumLinkTag.to_u16() { 241 | Some(MediumLinkTag) 242 | } else if n == SmallProductTag.to_u16() { 243 | Some(SmallProductTag) 244 | } else if n == SmallLeafTag.to_u16() { 245 | let n = num & SMALL_LEAF_TAG_MASK; 246 | if n == SmallLeafTag.to_u16() { 247 | Some(SmallLeafTag) 248 | } else if n == SmallNullingLeafTag.to_u16() { 249 | Some(SmallNullingLeafTag) 250 | } else { 251 | None 252 | } 253 | } else { 254 | None 255 | } 256 | } 257 | 258 | #[inline] 259 | pub(super) fn to_u16(self) -> u16 { 260 | match self { 261 | SmallSumTag => 0b000 << TAG_BIT, 262 | SmallLinkTag => 0b001 << TAG_BIT, 263 | MediumLinkTag => 0b010 << TAG_BIT, 264 | SmallProductTag => 0b011 << TAG_BIT, 265 | SmallLeafTag => 0b100 << TAG_BIT, 266 | // SmallNonnullingLeaf = 0b1000 << (TAG_BIT - 1), 267 | SmallNullingLeafTag => 0b1001 << (TAG_BIT - 1), 268 | LeafTag => 0b101 << TAG_BIT, 269 | SumTag => 0b111 << TAG_BIT, 270 | ProductTag => 0b110 << TAG_BIT, 271 | NopTag => 0b1111_1111_1111_1111, 272 | } 273 | } 274 | 275 | #[inline] 276 | fn mask(self) -> u16 { 277 | match self { 278 | SmallSumTag => TAG_MASK, 279 | SmallLinkTag => TAG_MASK, 280 | MediumLinkTag => TAG_MASK, 281 | SmallProductTag => TAG_MASK, 282 | SmallLeafTag => SMALL_LEAF_TAG_MASK, 283 | // SmallNonnullingLeaf = 0b1000 << (TAG_BIT - 1), 284 | SmallNullingLeafTag => SMALL_LEAF_TAG_MASK, 285 | LeafTag => TAG_MASK, 286 | SumTag => TAG_MASK, 287 | ProductTag => TAG_MASK, 288 | NopTag => 0b1111_1111_1111_1111, 289 | } 290 | } 291 | 292 | #[inline] 293 | pub(super) fn size(self) -> usize { 294 | match self { 295 | SmallSumTag => 1, 296 | SmallLinkTag => 1, 297 | MediumLinkTag => 2, 298 | SmallProductTag => 2, 299 | SmallLeafTag => 1, 300 | SmallNullingLeafTag => 1, 301 | LeafTag => 4, 302 | SumTag => 4, 303 | ProductTag => 6, 304 | NopTag => 1, 305 | } 306 | } 307 | } 308 | 309 | const TAG_BIT: usize = 5 + 8; 310 | const TAG_MASK: u16 = 0b111 << TAG_BIT; 311 | const SMALL_LEAF_TAG_MASK: u16 = 0b1111 << (TAG_BIT - 1); 312 | pub(super) const NULL_ACTION: u32 = !((TAG_MASK as u32) << 16); 313 | 314 | impl NodeRepr { 315 | fn expand(self, tag: Tag, position: u32) -> Node { 316 | unsafe { 317 | match (self, tag) { 318 | ( 319 | NodeRepr { 320 | small_sum: SmallSumRepr { nonterminal, count }, 321 | }, 322 | SmallSumTag, 323 | ) => Sum { 324 | nonterminal: Symbol::from(nonterminal as u32), 325 | count: count as u32, 326 | }, 327 | ( 328 | NodeRepr { 329 | sum: SumRepr { nonterminal, count }, 330 | }, 331 | SumTag, 332 | ) => Sum { nonterminal, count }, 333 | ( 334 | NodeRepr { 335 | small_link: SmallLinkRepr { distance, action }, 336 | }, 337 | SmallLinkTag, 338 | ) => Product { 339 | action: action as u32, 340 | left_factor: NodeHandle(position - distance as u32), 341 | right_factor: None, 342 | }, 343 | ( 344 | NodeRepr { 345 | medium_link: MediumLinkRepr { distance, action }, 346 | }, 347 | MediumLinkTag, 348 | ) => Product { 349 | action: action as u32, 350 | left_factor: NodeHandle(position - distance as u32), 351 | right_factor: None, 352 | }, 353 | ( 354 | NodeRepr { 355 | small_product: 356 | SmallProductRepr { 357 | right_distance, 358 | left_distance, 359 | action, 360 | }, 361 | }, 362 | SmallProductTag, 363 | ) => Product { 364 | action: action as u32, 365 | left_factor: NodeHandle(position - left_distance as u32), 366 | right_factor: Some(NodeHandle(position - right_distance as u32)), 367 | }, 368 | ( 369 | NodeRepr { 370 | product: 371 | ProductRepr { 372 | upper_action, 373 | lower_action, 374 | left_factor, 375 | right_factor, 376 | }, 377 | }, 378 | ProductTag, 379 | ) => Product { 380 | action: (upper_action as u32) << 16 | (lower_action as u32), 381 | left_factor, 382 | right_factor: right_factor.to_option(), 383 | }, 384 | ( 385 | NodeRepr { 386 | small_nulling_leaf: SmallNullingLeafRepr { symbol }, 387 | }, 388 | SmallNullingLeafTag, 389 | ) => NullingLeaf { 390 | symbol: Symbol::from(symbol as u32), 391 | }, 392 | ( 393 | NodeRepr { 394 | small_leaf: SmallLeafRepr { symbol }, 395 | }, 396 | SmallLeafTag, 397 | ) => Evaluated { 398 | symbol: Symbol::from(symbol as u32), 399 | }, 400 | ( 401 | NodeRepr { 402 | leaf: LeafRepr { symbol }, 403 | }, 404 | LeafTag, 405 | ) => Evaluated { symbol }, 406 | _ => unreachable!(), 407 | } 408 | } 409 | } 410 | } 411 | 412 | impl Node { 413 | #[inline] 414 | fn to_repr(self, position: u32) -> (NodeRepr, usize) { 415 | let tag = self.classify(position); 416 | unsafe { 417 | let mut result = match (self, tag) { 418 | (Sum { nonterminal, count }, SmallSumTag) => NodeRepr { 419 | small_sum: SmallSumRepr { 420 | nonterminal: nonterminal.usize() as u8, 421 | count: count as u8, 422 | }, 423 | }, 424 | (Sum { nonterminal, count }, SumTag) => NodeRepr { 425 | sum: SumRepr { nonterminal, count }, 426 | }, 427 | ( 428 | Product { 429 | left_factor, 430 | right_factor: None, 431 | action, 432 | }, 433 | SmallLinkTag, 434 | ) => NodeRepr { 435 | small_link: SmallLinkRepr { 436 | distance: (position - left_factor.0) as u8, 437 | action: action as u8, 438 | }, 439 | }, 440 | ( 441 | Product { 442 | left_factor, 443 | right_factor: None, 444 | action, 445 | }, 446 | MediumLinkTag, 447 | ) => NodeRepr { 448 | medium_link: MediumLinkRepr { 449 | distance: (position - left_factor.0) as u16, 450 | action: action as u16, 451 | }, 452 | }, 453 | ( 454 | Product { 455 | left_factor, 456 | right_factor: Some(right), 457 | action, 458 | }, 459 | SmallProductTag, 460 | ) => NodeRepr { 461 | small_product: SmallProductRepr { 462 | right_distance: (position - right.0) as u8, 463 | left_distance: (position - left_factor.0) as u8, 464 | action: action as u16, 465 | }, 466 | }, 467 | ( 468 | Product { 469 | left_factor, 470 | right_factor, 471 | action, 472 | }, 473 | ProductTag, 474 | ) => NodeRepr { 475 | product: ProductRepr { 476 | upper_action: (action >> 16) as u16, 477 | lower_action: action as u16, 478 | left_factor, 479 | right_factor: right_factor.unwrap_or(NULL_HANDLE), 480 | }, 481 | }, 482 | (NullingLeaf { symbol }, SmallNullingLeafTag) => NodeRepr { 483 | small_nulling_leaf: SmallNullingLeafRepr { 484 | symbol: symbol.usize() as u16, 485 | }, 486 | }, 487 | (NullingLeaf { symbol }, LeafTag) => NodeRepr { 488 | leaf: LeafRepr { symbol }, 489 | }, 490 | (Evaluated { symbol }, SmallLeafTag) => NodeRepr { 491 | small_leaf: SmallLeafRepr { 492 | symbol: symbol.usize() as u16, 493 | }, 494 | }, 495 | (Evaluated { symbol }, LeafTag) => NodeRepr { 496 | leaf: LeafRepr { symbol }, 497 | }, 498 | _ => unreachable!(), 499 | }; 500 | result.fields[0] |= tag.to_u16(); 501 | (result, tag.size()) 502 | } 503 | } 504 | 505 | #[inline] 506 | pub(super) fn classify(self, position: u32) -> Tag { 507 | match self { 508 | Product { 509 | left_factor, 510 | right_factor, 511 | action, 512 | } => match right_factor { 513 | Some(handle) => { 514 | if position >= handle.0 515 | && position >= left_factor.0 516 | && position - handle.0 < (1 << 5) 517 | && position - left_factor.0 < (1 << 8) 518 | && action < (1 << 16) 519 | { 520 | SmallProductTag 521 | } else { 522 | ProductTag 523 | } 524 | } 525 | None => { 526 | if position >= left_factor.0 527 | && position - left_factor.0 < (1 << 5) 528 | && action < (1 << 8) 529 | { 530 | SmallLinkTag 531 | } else if position >= left_factor.0 532 | && position - left_factor.0 < (1 << (5 + 8)) 533 | && action < (1 << 16) 534 | { 535 | MediumLinkTag 536 | } else { 537 | ProductTag 538 | } 539 | } 540 | }, 541 | NullingLeaf { symbol } => { 542 | if symbol.usize() < (1 << (4 + 8)) { 543 | SmallNullingLeafTag 544 | } else { 545 | LeafTag 546 | } 547 | } 548 | Evaluated { symbol } => { 549 | if symbol.usize() < (1 << (4 + 8)) { 550 | SmallLeafTag 551 | } else { 552 | LeafTag 553 | } 554 | } 555 | Sum { nonterminal, count } => { 556 | if count < (1 << 5) && nonterminal.usize() < (1 << 8) { 557 | SmallSumTag 558 | } else { 559 | SumTag 560 | } 561 | } 562 | } 563 | } 564 | } 565 | 566 | #[inline] 567 | unsafe fn unwrap_unchecked(opt: Option) -> T { 568 | match opt { 569 | Some(val) => val, 570 | None => hint::unreachable_unchecked(), 571 | } 572 | } 573 | 574 | #[inline] 575 | unsafe fn get_and_erase_tag(field: u16) -> (Tag, u16) { 576 | let tag = unwrap_unchecked(Tag::from_u16(field)); 577 | (tag, field & !tag.mask()) 578 | } 579 | -------------------------------------------------------------------------------- /src/forest/compact_bocage/order.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | 3 | use cfg::symbol::Symbol; 4 | 5 | pub trait Order { 6 | /// Apply the order to sum node alternatives. 7 | fn sum<'b>(&mut self, alternatives: &'b [Cell]) -> &'b [Cell] { 8 | alternatives 9 | } 10 | 11 | /// Apply the order to product node factors. 12 | fn product(&mut self, _factors: &[(Symbol, u32)]) -> Option { 13 | None 14 | } 15 | } 16 | 17 | #[derive(Default)] 18 | pub struct NullOrder; 19 | 20 | impl Order for NullOrder {} 21 | 22 | impl NullOrder { 23 | pub fn new() -> Self { 24 | NullOrder 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/forest/compact_bocage/traverse.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::iter; 3 | 4 | use bit_vec::BitVec; 5 | use cfg::symbol::Symbol; 6 | 7 | use forest::compact_bocage::node::Node::*; 8 | use forest::compact_bocage::node::{Iter, Node, Tag}; 9 | use forest::node_handle::NodeHandle; 10 | use forest::CompactBocage; 11 | use grammar::InternalGrammar; 12 | 13 | pub use self::HandleVariant::*; 14 | 15 | impl CompactBocage { 16 | // Once node liveness is marked, you may traverse the nodes. 17 | pub fn traverse(&self) -> Traverse { 18 | Traverse { 19 | bocage: self, 20 | graph_iter: self.graph.iter_from(NodeHandle(0)), 21 | liveness: &self.gc.liveness, 22 | factor_stack: vec![], 23 | factor_traversal: vec![], 24 | } 25 | } 26 | } 27 | 28 | pub struct Traverse<'f, G> { 29 | bocage: &'f CompactBocage, 30 | // main iterators 31 | graph_iter: Iter<'f>, 32 | liveness: &'f BitVec, 33 | // Space for unrolling factors 34 | factor_stack: Vec<(Symbol, NodeHandle)>, 35 | // Scratch space for traversal 36 | factor_traversal: Vec, 37 | } 38 | 39 | impl<'f, G> Traverse<'f, G> 40 | where 41 | G: Borrow, 42 | { 43 | pub fn next_node<'t>(&'t mut self) -> Option> { 44 | while let Some(node) = self.graph_iter.peek() { 45 | let iter = self.graph_iter; 46 | let alive = self.liveness[self.graph_iter.handle.usize()]; 47 | println!( 48 | "next_node @{:?} {:?} {}", 49 | self.graph_iter.handle, node, alive 50 | ); 51 | self.graph_iter.next(); 52 | if !alive { 53 | continue; 54 | } 55 | match node { 56 | Product { action, .. } => { 57 | if self.bocage.is_transparent(action) { 58 | continue; 59 | } 60 | let products = iter.take(1); 61 | return Some(TraversalHandle { 62 | iter, 63 | symbol: self.bocage.grammar.borrow().get_lhs(action), 64 | item: SumHandle(Products { 65 | products, 66 | traverse: self, 67 | }), 68 | }); 69 | } 70 | Sum { 71 | nonterminal: symbol, 72 | count, 73 | } => { 74 | let products = self.graph_iter.take(count as usize); 75 | for _ in 0..count { 76 | let p = self.graph_iter.handle; 77 | let n = self.graph_iter.next(); 78 | println!("next_node product @{:?} {:?}", p, n); 79 | } 80 | return Some(TraversalHandle { 81 | iter, 82 | symbol, 83 | item: SumHandle(Products { 84 | products, 85 | traverse: self, 86 | }), 87 | }); 88 | } 89 | NullingLeaf { symbol } => { 90 | return Some(TraversalHandle { 91 | iter, 92 | symbol, 93 | item: NullingHandle, 94 | }); 95 | } 96 | Evaluated { symbol, .. } => { 97 | return Some(TraversalHandle { 98 | iter, 99 | symbol, 100 | item: LeafHandle, 101 | }); 102 | } 103 | } 104 | } 105 | None 106 | } 107 | 108 | fn unfold_factors(&mut self, left: NodeHandle, right: Option) { 109 | self.factor_stack.clear(); 110 | self.enqueue_for_unfold(left, right); 111 | while let Some(node) = self.pop_for_unfold() { 112 | match node { 113 | ( 114 | Product { 115 | left_factor, 116 | right_factor, 117 | .. 118 | }, 119 | _, 120 | ) => { 121 | self.enqueue_for_unfold(left_factor, right_factor); 122 | } 123 | (Evaluated { symbol }, handle) => { 124 | self.factor_stack.push((symbol, handle)); 125 | } 126 | _ => unreachable!(), 127 | } 128 | } 129 | } 130 | 131 | fn enqueue_for_unfold(&mut self, left: NodeHandle, right: Option) { 132 | if let Some(right) = right { 133 | self.factor_traversal.push(right); 134 | } 135 | self.factor_traversal.push(left); 136 | } 137 | 138 | fn pop_for_unfold(&mut self) -> Option<(Node, NodeHandle)> { 139 | self.factor_traversal 140 | .pop() 141 | .map(|handle| (self.bocage.graph.get(handle), handle)) 142 | } 143 | } 144 | 145 | pub struct TraversalHandle<'f, 't, G> { 146 | pub(crate) iter: Iter<'f>, 147 | pub symbol: Symbol, 148 | pub item: HandleVariant<'f, 't, G>, 149 | } 150 | 151 | pub enum HandleVariant<'f, 't, G> { 152 | SumHandle(Products<'f, 't, G>), 153 | NullingHandle, 154 | LeafHandle, 155 | } 156 | 157 | pub struct Products<'f, 't, G> { 158 | products: iter::Take>, 159 | traverse: &'t mut Traverse<'f, G>, 160 | } 161 | 162 | pub struct ProductHandle<'t> { 163 | pub action: u32, 164 | pub factors: &'t [(Symbol, NodeHandle)], 165 | } 166 | 167 | impl<'f, 't, G> Products<'f, 't, G> 168 | where 169 | G: Borrow, 170 | { 171 | pub fn next_product<'p>(&'p mut self) -> Option { 172 | while let Some(node) = self.products.next() { 173 | match node { 174 | Product { 175 | left_factor, 176 | right_factor, 177 | action, 178 | } => { 179 | let origin = self 180 | .traverse 181 | .bocage 182 | .grammar 183 | .borrow() 184 | .external_origin(action); 185 | if let Some(action) = origin { 186 | self.traverse.unfold_factors(left_factor, right_factor); 187 | return Some(ProductHandle { 188 | action, 189 | factors: &self.traverse.factor_stack[..], 190 | }); 191 | } 192 | } 193 | _ => unreachable!(), 194 | } 195 | } 196 | None 197 | } 198 | } 199 | 200 | impl<'f, 't, G> TraversalHandle<'f, 't, G> { 201 | pub fn end_evaluation(&self) { 202 | self.iter.vec[self.iter.handle.usize()].set(Tag::SmallLeafTag.to_u16()); 203 | } 204 | 205 | pub fn handle(&self) -> NodeHandle { 206 | self.iter.handle 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/forest/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod bocage; 2 | pub mod compact_bocage; 3 | pub mod node_handle; 4 | pub mod null_forest; 5 | 6 | pub use self::bocage::Bocage; 7 | pub use self::compact_bocage::CompactBocage; 8 | pub use self::null_forest::NullForest; 9 | 10 | use cfg::Symbol; 11 | use std::fmt; 12 | 13 | use item::CompletedItem; 14 | 15 | pub trait Forest { 16 | /// Reference to a node. 17 | type NodeRef: Copy + fmt::Debug; 18 | type LeafValue; 19 | 20 | const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize; 21 | 22 | fn begin_sum(&mut self); 23 | 24 | fn push_summand(&mut self, item: CompletedItem); 25 | 26 | fn sum(&mut self, lhs_sym: Symbol, origin: u32) -> Self::NodeRef; 27 | 28 | fn leaf(&mut self, token: Symbol, pos: u32, value: Self::LeafValue) -> Self::NodeRef; 29 | 30 | fn nulling(&self, token: Symbol) -> Self::NodeRef; 31 | } 32 | -------------------------------------------------------------------------------- /src/forest/node_handle.rs: -------------------------------------------------------------------------------- 1 | use cfg::Symbol; 2 | 3 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] 4 | pub struct NodeHandle(pub(crate) u32); 5 | 6 | pub(super) const NULL_HANDLE: NodeHandle = NodeHandle(0xFFFF_FFFF); 7 | 8 | impl NodeHandle { 9 | #[inline] 10 | pub(super) fn nulling(symbol: Symbol) -> Self { 11 | NodeHandle(symbol.usize() as u32) 12 | } 13 | 14 | #[inline] 15 | pub(super) fn usize(self) -> usize { 16 | self.0 as usize 17 | } 18 | 19 | #[inline] 20 | pub(super) fn to_option(self) -> Option { 21 | if self == NULL_HANDLE { 22 | None 23 | } else { 24 | Some(self) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/forest/null_forest.rs: -------------------------------------------------------------------------------- 1 | use cfg::symbol::Symbol; 2 | 3 | use forest::Forest; 4 | use item::CompletedItem; 5 | 6 | /// An empty forest. 7 | pub struct NullForest; 8 | 9 | impl Forest for NullForest { 10 | type NodeRef = (); 11 | type LeafValue = (); 12 | 13 | const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize = 0; 14 | 15 | #[inline(always)] 16 | fn leaf(&mut self, _: Symbol, _: u32, _: ()) {} 17 | #[inline(always)] 18 | fn nulling(&self, _: Symbol) {} 19 | #[inline(always)] 20 | fn begin_sum(&mut self) {} 21 | #[inline(always)] 22 | fn push_summand(&mut self, _item: CompletedItem) {} 23 | #[inline(always)] 24 | fn sum(&mut self, _lhs_sym: Symbol, _origin: u32) -> Self::NodeRef { 25 | () 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/grammar.rs: -------------------------------------------------------------------------------- 1 | //! # Grammar transforms 2 | //! 3 | //! For efficiency, the recognizer works on processed grammars. Grammars described 4 | //! by the user are transformed to meet the following properties: 5 | //! 6 | //! ## Property 1: Right-hand-sides of all rules have one symbol or two symbols. 7 | //! 8 | //! That is, all rules are of the form 9 | //! `A ::= B C` 10 | //! or 11 | //! `D ::= E`. 12 | //! 13 | //! ### a) Right-hand-sides of all rules have at least one symbol. 14 | //! 15 | //! ### b) Right-hand-sides of all rules have at most two symbols. 16 | //! 17 | //! ## Property 2: There are no cycles among unit rules. 18 | //! 19 | //! That is, for any nonterminals `A`…`Z`, the set of rules doesn't have a subset 20 | //! such as {`A ::= B`, `B ::= C`, …, `Y ::= Z`, `Z ::= A`}. 21 | //! 22 | //! In other words, for any nonterminal `A`, `A` doesn't derive `A` in two or more steps. 23 | //! 24 | //! ## Property 3: Dot numbers for pre-RHS0 dots are ordered by the LHS symbol IDs. 25 | //! 26 | //! ## Property 4: Dot numbers for pre-RHS1 dots are ordered by their RHS1 symbol IDs. 27 | //! 28 | //! ## Property 5: IDs of unit rules are smaller than IDs of rules which they predict. 29 | //! 30 | //! Internal symbols must be remapped, because this property may interfere with (4). 31 | //! This property also requires (3). 32 | //! 33 | //! # Similarities to other parsers 34 | //! 35 | //! * 1.a) is required by some Earley parsers, including Marpa. 36 | //! * 1.b) is required for recognition in CYK parsers, and in a roundabout way for construction 37 | //! of bocages. 38 | //! * 2 is required by PEG and some other parsers. 39 | //! * 3, 4 and 5 are specific to gearley. 40 | //! 41 | //! # Motivation for grammar transforms 42 | //! 43 | //! ## Property 1.a), one RHS symbol. 44 | //! 45 | //! Handling nullable rules is notoriously difficult in Earley parsers. Even the original Earley's 46 | //! PhD paper contained an algorithm bug in handling nullable rules. We avoid nullability completely 47 | //! by remembering all about our null removal and fixing the parse forest post-parse. 48 | //! 49 | //! ## Property 1.b), two RHS symbols. 50 | //! 51 | //! Think about it: if a rule has three right-hand side symbols, and all of them are nullable, 52 | //! then property a) would produce 2*2*2 = 8 rules for each combination of missing null and present symbol. 53 | //! We avoid exponential blowup not only here in grammar preprocessing, but also in the bocage by restricting 54 | //! ourselves to no more than two symbols at a time. 55 | //! 56 | //! ## Property 2, no cycles among unit rules. 57 | //! 58 | //! ... 59 | //! 60 | //! ## Property 3, dot numbers for pre-RHS0 dots are ordered by the LHS symbol IDs. 61 | //! 62 | //! ... 63 | //! 64 | //! ## Property 4, dot numbers for pre-RHS1 dots are ordered by their RHS1 symbol IDs. 65 | //! 66 | //! ... 67 | //! 68 | //! ## Property 5, IDs of unit rules are smaller than IDs of rules which they predict. 69 | //! 70 | //! ... 71 | 72 | use std::convert::TryInto; 73 | use std::iter; 74 | 75 | use bit_matrix::BitMatrix; 76 | use bit_matrix::row::BitVecSlice; 77 | use cfg::{ContextFreeRef, GrammarRule, Symbol}; 78 | use cfg::rule::container::RuleContainer; 79 | use cfg::remap::Mapping; 80 | use cfg::prediction::{FirstSetsCollector, FollowSets}; 81 | use optional::Optioned; 82 | 83 | use item::Dot; 84 | 85 | pub use cfg::earley::{Grammar, BinarizedGrammar}; 86 | pub use cfg::earley::history::History; 87 | 88 | // # Future optimizations 89 | // 90 | // Store RHS1 and LHS in row-major instead of column-major order, so that the least significant bit 91 | // tells us whether a dot is medial or completed. Or don't. 92 | // 93 | // Parameterize the representation over symbol type (u32, u16, u8). 94 | 95 | #[derive(Serialize, Deserialize, Copy, Clone, Debug)] 96 | pub(in super) struct PredictionTransition { 97 | pub symbol: Symbol, 98 | pub dot: Dot, 99 | } 100 | 101 | #[derive(Eq, PartialEq, Ord, PartialOrd)] 102 | pub(in super) enum MaybePostdot { 103 | Binary(Symbol), 104 | Unary, 105 | } 106 | 107 | #[derive(Serialize, Deserialize, Clone, Default, Debug)] 108 | pub struct InternalGrammar { 109 | start_sym: Symbol, 110 | original_start_sym: Symbol, 111 | has_trivial_derivation: bool, 112 | eof_sym: Symbol, 113 | dot_before_eof: Dot, 114 | size: InternalGrammarSize, 115 | 116 | prediction_matrix: BitMatrix, 117 | // Inverse prediction lookup. 118 | unary_completions: Vec, 119 | unary_completion_index: Vec, 120 | 121 | binary_completions: Vec, 122 | binary_completion_index: Vec, 123 | 124 | follow_sets: BitMatrix, 125 | first_sets: BitMatrix, 126 | 127 | // array of events 128 | events_rhs: [Vec; 3], 129 | // 2-dimensional arrays for tracing 130 | trace_rhs: [Vec>; 3], 131 | // Each rule can have only one eliminated nulling symbol. 132 | nulling_eliminated: Vec, 133 | // Rules stored in column-major order. 134 | lhs: Vec>, 135 | rhs0: Vec>, 136 | rhs1: Vec>, 137 | // Rule origin preserved for post-parse actions. 138 | eval: Vec, 139 | // Mapping between external and internal symbols. 140 | sym_maps: Mapping, 141 | nulling_intermediate_rules: Vec, 142 | } 143 | 144 | #[derive(Serialize, Deserialize, Clone, Default, Debug)] 145 | pub struct InternalGrammarSize { 146 | pub syms: usize, 147 | pub rules: usize, 148 | pub internal_syms: usize, 149 | pub external_syms: usize, 150 | } 151 | 152 | pub(in super) type ExternalDottedRule = (u32, u32); 153 | type ExternalOrigin = Option; 154 | type EventId = Optioned; 155 | type MinimalDistance = Optioned; 156 | pub(in super) type Event = (EventId, MinimalDistance); 157 | type NullingEliminated = Option<(Symbol, bool)>; 158 | type NullingIntermediateRule = (Symbol, Symbol, Symbol); 159 | type CompletionTable = Vec>; 160 | 161 | impl InternalGrammar { 162 | fn new() -> Self { 163 | Self::default() 164 | } 165 | 166 | pub fn from_grammar(grammar: &Grammar) -> Self { 167 | Self::from_binarized_grammar(grammar.binarize()) 168 | } 169 | 170 | pub fn from_binarized_grammar(grammar: BinarizedGrammar) -> Self { 171 | let grammar = grammar.make_proper(); 172 | Self::from_proper_binarized_grammar(grammar) 173 | } 174 | 175 | pub fn from_proper_binarized_grammar(grammar: BinarizedGrammar) -> Self { 176 | let (mut grammar, nulling) = grammar.eliminate_nulling(); 177 | grammar.wrap_start(); 178 | Self::from_processed_grammar(grammar, &nulling) 179 | } 180 | 181 | pub fn from_processed_grammar(grammar: BinarizedGrammar, nulling: &BinarizedGrammar) -> Self { 182 | let (grammar, maps) = grammar.remap_symbols(); 183 | Self::from_processed_grammar_with_maps(grammar, maps, nulling) 184 | } 185 | 186 | pub fn from_processed_grammar_with_maps( 187 | mut grammar: BinarizedGrammar, 188 | maps: Mapping, 189 | nulling: &BinarizedGrammar) 190 | -> Self 191 | { 192 | grammar.sort_by(|a, b| a.lhs().cmp(&b.lhs())); 193 | let mut result = InternalGrammar::new(); 194 | result.populate_sizes(&grammar, &maps); 195 | result.populate_maps(maps); 196 | result.populate_grammar(&grammar); 197 | result.populate_nulling(nulling); 198 | trace!("populated grammar {:?}", &result); 199 | result 200 | } 201 | 202 | fn populate_sizes(&mut self, grammar: &BinarizedGrammar, maps: &Mapping) { 203 | self.size = InternalGrammarSize { 204 | rules: grammar.rules().count(), 205 | syms: grammar.sym_source().num_syms(), 206 | external_syms: maps.to_internal.len(), 207 | internal_syms: maps.to_external.len(), 208 | } 209 | } 210 | 211 | fn populate_grammar(&mut self, grammar: &BinarizedGrammar) { 212 | self.populate_start_sym(grammar); 213 | self.populate_grammar_with_lhs(grammar); 214 | self.populate_grammar_with_rhs(grammar); 215 | self.populate_grammar_with_history(grammar); 216 | self.populate_predictions(grammar); 217 | } 218 | 219 | fn populate_start_sym(&mut self, grammar: &BinarizedGrammar) { 220 | let start = grammar.start(); 221 | self.start_sym = start; 222 | self.eof_sym = grammar.eof().unwrap(); 223 | self.dot_before_eof = grammar.dot_before_eof().unwrap(); 224 | self.original_start_sym = grammar.original_start().unwrap(); 225 | } 226 | 227 | fn populate_grammar_with_lhs(&mut self, grammar: &BinarizedGrammar) { 228 | self.lhs.extend(grammar.rules().map(|rule| Some(rule.lhs()))); 229 | } 230 | 231 | fn populate_grammar_with_rhs(&mut self, grammar: &BinarizedGrammar) { 232 | self.rhs0.extend(grammar.rules().map(|rule| rule.rhs().get(0).cloned())); 233 | self.rhs1.extend(grammar.rules().map(|rule| rule.rhs().get(1).cloned())); 234 | } 235 | 236 | fn populate_grammar_with_history(&mut self, grammar: &BinarizedGrammar) { 237 | self.eval.extend( 238 | grammar.rules().map(|rule| rule.history().origin()) 239 | ); 240 | self.nulling_eliminated.extend( 241 | grammar.rules().map(|rule| rule.history().nullable()) 242 | ); 243 | 244 | self.populate_grammar_with_events_rhs(grammar); 245 | self.populate_grammar_with_trace_rhs(grammar); 246 | } 247 | 248 | fn populate_grammar_with_events_rhs(&mut self, grammar: &BinarizedGrammar) { 249 | self.events_rhs[1].extend( 250 | grammar.rules().map(|rule| rule.history().dot(1).event_without_tracing()) 251 | ); 252 | self.events_rhs[2].extend( 253 | grammar.rules().map(|rule| rule.history().dot(2).event_without_tracing()) 254 | ); 255 | } 256 | 257 | fn populate_grammar_with_trace_rhs(&mut self, grammar: &BinarizedGrammar) { 258 | self.trace_rhs[1].extend( 259 | grammar.rules().map(|rule| rule.history().dot(1).trace()) 260 | ); 261 | self.trace_rhs[2].extend( 262 | grammar.rules().map(|rule| rule.history().dot(2).trace()) 263 | ); 264 | } 265 | 266 | fn populate_maps(&mut self, maps: Mapping) { 267 | self.sym_maps = maps; 268 | } 269 | 270 | fn populate_predictions(&mut self, grammar: &BinarizedGrammar) { 271 | self.populate_prediction_matrix(grammar); 272 | self.populate_prediction_events(grammar); 273 | self.populate_completion_tables(grammar); 274 | self.populate_follow_sets(grammar); 275 | } 276 | 277 | fn populate_prediction_matrix(&mut self, grammar: &BinarizedGrammar) { 278 | self.prediction_matrix = BitMatrix::new(self.size.syms, self.size.syms); 279 | // Precompute DFA. 280 | for rule in grammar.rules() { 281 | self.prediction_matrix.set(rule.lhs().usize(), rule.rhs()[0].usize(), true); 282 | } 283 | self.prediction_matrix.transitive_closure(); 284 | // Prediction relation is reflexive. 285 | for i in 0..self.size.syms { 286 | self.prediction_matrix.set(i, i, true); 287 | } 288 | } 289 | 290 | fn populate_follow_sets(&mut self, grammar: &BinarizedGrammar) { 291 | self.follow_sets = BitMatrix::new(self.size.syms, self.size.syms); 292 | self.first_sets = BitMatrix::new(self.size.syms, self.size.syms); 293 | let first_sets = FirstSetsCollector::new(grammar); 294 | for (outer, inner) in first_sets.first_sets() { 295 | for elem_inner in inner.into_iter() { 296 | if let Some(inner_sym) = elem_inner { 297 | self.first_sets.set(outer.usize(), inner_sym.usize(), true); 298 | } 299 | } 300 | } 301 | self.first_sets.reflexive_closure(); 302 | let follow_sets = FollowSets::new(grammar, grammar.start(), first_sets.first_sets()); 303 | for (before, after) in follow_sets.follow_sets().into_iter() { 304 | for elem_after in after.into_iter() { 305 | if let Some(after_sym) = elem_after { 306 | self.follow_sets.set(before.usize(), after_sym.usize(), true); 307 | } 308 | } 309 | } 310 | } 311 | 312 | fn populate_completion_tables(&mut self, grammar: &BinarizedGrammar) { 313 | self.populate_unary_completion_table(grammar); 314 | self.populate_binary_completion_table(grammar); 315 | } 316 | 317 | fn populate_unary_completion_table(&mut self, grammar: &BinarizedGrammar) { 318 | let table = self.compute_unary_completion_table(grammar); 319 | self.populate_unary_completion_index(&table); 320 | self.populate_unary_completions(&table); 321 | } 322 | 323 | fn compute_unary_completion_table(&self, grammar: &BinarizedGrammar) -> CompletionTable { 324 | let mut table = iter::repeat(vec![]).take(self.size.syms).collect::>(); 325 | 326 | let mut unary_rules = vec![]; 327 | // check for ordering same as self.rules 328 | for (dot, rule) in grammar.rules().enumerate() { 329 | let is_unary = rule.rhs().get(1).is_none(); 330 | if is_unary { 331 | let rhs0_sym = rule.rhs()[0].usize(); 332 | unary_rules.push((rhs0_sym, rule.lhs, dot)); 333 | } 334 | } 335 | for (rhs0_sym, lhs_sym, dot) in unary_rules.into_iter() { 336 | table[rhs0_sym].push(PredictionTransition { 337 | symbol: lhs_sym, 338 | dot: dot as u32 339 | }); 340 | } 341 | table 342 | } 343 | 344 | fn populate_unary_completion_index(&mut self, table: &CompletionTable) { 345 | let mut current_idx = 0u32; 346 | self.unary_completion_index.push(0u32); 347 | self.unary_completion_index.extend(table.iter().map(|run| { 348 | current_idx = current_idx.checked_add(run.len() as u32).unwrap(); 349 | current_idx 350 | })); 351 | } 352 | 353 | fn populate_unary_completions(&mut self, table: &CompletionTable) { 354 | let iter_table = table.into_iter().flat_map(|v| v.into_iter()); 355 | self.unary_completions.extend(iter_table); 356 | } 357 | 358 | fn populate_binary_completion_table(&mut self, grammar: &BinarizedGrammar) { 359 | let table = self.compute_binary_completion_table(grammar); 360 | self.populate_binary_completion_index(&table); 361 | self.populate_binary_completions(&table); 362 | } 363 | 364 | fn compute_binary_completion_table(&self, grammar: &BinarizedGrammar) -> CompletionTable { 365 | let mut table = iter::repeat(vec![]).take(self.size.syms).collect::>(); 366 | 367 | let mut binary_rules = vec![]; 368 | // check for ordering same as self.rules 369 | for (dot, rule) in grammar.rules().enumerate() { 370 | let is_binary = rule.rhs().get(1).is_some(); 371 | if is_binary { 372 | let rhs0_sym = rule.rhs()[0].usize(); 373 | binary_rules.push((rhs0_sym, rule.lhs, dot)); 374 | } 375 | } 376 | for (rhs0_sym, lhs_sym, dot) in binary_rules.into_iter() { 377 | table[rhs0_sym].push(PredictionTransition { 378 | symbol: lhs_sym, 379 | dot: dot as u32 380 | }); 381 | } 382 | table 383 | } 384 | 385 | fn populate_binary_completion_index(&mut self, table: &CompletionTable) { 386 | let mut current_idx = 0u32; 387 | self.binary_completion_index.push(0u32); 388 | self.binary_completion_index.extend(table.iter().map(|run| { 389 | current_idx = current_idx.checked_add(run.len() as u32).unwrap(); 390 | current_idx 391 | })); 392 | } 393 | 394 | fn populate_binary_completions(&mut self, table: &CompletionTable) { 395 | let iter_table = table.into_iter().flat_map(|v| v.into_iter()); 396 | self.binary_completions.extend(iter_table); 397 | } 398 | 399 | fn populate_prediction_events(&mut self, grammar: &BinarizedGrammar) { 400 | let iter_events_pred = iter::repeat((Optioned::none(), Optioned::none())).take(self.size.syms); 401 | self.events_rhs[0].extend(iter_events_pred); 402 | let iter_trace_pred = iter::repeat(None).take(self.size.syms); 403 | self.trace_rhs[0].extend(iter_trace_pred); 404 | for rule in grammar.rules() { 405 | if let Some(&(pred_event, pred_tracing)) = rule.history().dot(0).event().as_ref() { 406 | // Prediction event and tracing. 407 | self.events_rhs[0][rule.lhs().usize()] = ( 408 | pred_event, 409 | rule.history().dot(0).distance() 410 | ); 411 | self.trace_rhs[0][rule.lhs().usize()] = Some(pred_tracing); 412 | } 413 | } 414 | } 415 | 416 | fn populate_nulling(&mut self, nulling: &BinarizedGrammar) { 417 | self.has_trivial_derivation = !nulling.is_empty(); 418 | let iter_nulling_intermediate = nulling.rules().filter_map(|rule| { 419 | if rule.history().origin().is_none() && rule.rhs().len() == 2 { 420 | Some((rule.lhs(), rule.rhs()[0], rule.rhs()[1])) 421 | } else { 422 | None 423 | } 424 | }); 425 | self.nulling_intermediate_rules.extend(iter_nulling_intermediate); 426 | } 427 | 428 | #[inline] 429 | pub(in super) fn eof(&self) -> Symbol { 430 | self.eof_sym 431 | } 432 | 433 | #[inline] 434 | pub(in super) fn can_follow(&self, before: Symbol, after: Option) -> bool { 435 | let after = after.unwrap_or(self.eof()).usize(); 436 | self.follow_sets[(before.usize(), after)] 437 | } 438 | 439 | #[inline] 440 | pub(in super) fn first(&self, outer: Symbol, maybe_inner: Option) -> bool { 441 | let inner = if let Some(inner) = maybe_inner { 442 | inner 443 | } else { 444 | return outer == self.eof() 445 | }; 446 | self.first_sets[(outer.usize(), inner.usize())] 447 | } 448 | 449 | #[inline] 450 | pub(in super) fn prediction_matrix(&self) -> &BitMatrix { 451 | &self.prediction_matrix 452 | } 453 | 454 | #[inline] 455 | pub(in super) fn predict(&self, sym: Symbol) -> &BitVecSlice { 456 | &self.prediction_matrix[sym.usize()] 457 | } 458 | 459 | #[inline] 460 | pub(in super) fn num_syms(&self) -> usize { 461 | self.size.syms 462 | } 463 | 464 | #[inline] 465 | pub(in super) fn num_rules(&self) -> usize { 466 | self.size.rules 467 | } 468 | 469 | #[inline] 470 | pub fn start_sym(&self) -> Symbol { 471 | self.start_sym 472 | } 473 | 474 | pub fn externalized_start_sym(&self) -> Symbol { 475 | self.to_external(self.original_start_sym) 476 | } 477 | 478 | #[inline] 479 | pub(in super) fn has_trivial_derivation(&self) -> bool { 480 | self.has_trivial_derivation 481 | } 482 | 483 | #[inline] 484 | pub(in super) fn nulling(&self, pos: u32) -> NullingEliminated { 485 | self.nulling_eliminated.get(pos as usize).and_then(|&ne| ne) 486 | } 487 | 488 | #[inline] 489 | pub(in super) fn events(&self) -> (&[Event], &[Event]) { 490 | (&self.events_rhs[1][..], &self.events_rhs[2][..]) 491 | } 492 | 493 | #[inline] 494 | pub(in super) fn trace(&self) -> [&[Option]; 3] { 495 | [&self.trace_rhs[0][..], &self.trace_rhs[1][..], &self.trace_rhs[2][..]] 496 | } 497 | 498 | #[inline] 499 | pub(in super) fn get_rhs1(&self, dot: Dot) -> Option { 500 | self.rhs1[dot as usize] 501 | } 502 | 503 | #[inline] 504 | pub(in super) fn get_rhs1_cmp(&self, dot: Dot) -> MaybePostdot { 505 | match self.rhs1[dot as usize] { 506 | None => MaybePostdot::Unary, 507 | Some(rhs1) => MaybePostdot::Binary(rhs1), 508 | } 509 | } 510 | 511 | #[inline] 512 | pub(in super) fn rhs1(&self) -> &[Option] { 513 | &self.rhs1[..] 514 | } 515 | 516 | #[inline] 517 | pub(in super) fn get_lhs(&self, dot: Dot) -> Symbol { 518 | self.lhs[dot as usize].unwrap() 519 | } 520 | 521 | #[inline] 522 | pub(in super) fn external_origin(&self, dot: Dot) -> ExternalOrigin { 523 | self.eval.get(dot as usize).cloned().unwrap() 524 | } 525 | 526 | pub(in super) fn eliminated_nulling_intermediate(&self) -> &[NullingIntermediateRule] { 527 | &*self.nulling_intermediate_rules 528 | } 529 | 530 | #[inline(always)] 531 | pub(in super) fn unary_completions(&self, sym: Symbol) -> &[PredictionTransition] { 532 | let idxs = &self.unary_completion_index[sym.usize() .. sym.usize() + 2]; 533 | let range = idxs[0] as usize .. idxs[1] as usize; 534 | &self.unary_completions[range] 535 | } 536 | 537 | #[inline(always)] 538 | pub(in super) fn binary_completions(&self, sym: Symbol) -> &[PredictionTransition] { 539 | let idxs = &self.binary_completion_index[sym.usize() .. sym.usize() + 2]; 540 | let range = idxs[0] as usize .. idxs[1] as usize; 541 | &self.binary_completions[range] 542 | } 543 | 544 | #[inline(always)] 545 | pub(in super) fn to_internal(&self, symbol: Symbol) -> Option { 546 | if self.sym_maps.to_internal.is_empty() { 547 | Some(symbol) 548 | } else { 549 | self.sym_maps.to_internal[symbol.usize()] 550 | } 551 | } 552 | 553 | #[inline] 554 | pub fn to_external(&self, symbol: Symbol) -> Symbol { 555 | if self.sym_maps.to_external.is_empty() { 556 | symbol 557 | } else { 558 | self.sym_maps.to_external[symbol.usize()] 559 | } 560 | } 561 | 562 | pub(in super) fn max_nulling_symbol(&self) -> Option { 563 | (0 .. self.num_rules()).filter_map(|action| { 564 | self.nulling(action as u32).map(|(sym, _dir)| sym.usize()) 565 | }).chain( 566 | self.eliminated_nulling_intermediate().iter().map(|&(_lhs, rhs0, _rhs1)| { 567 | rhs0.usize() 568 | }) 569 | ).max() 570 | } 571 | 572 | pub(in super) fn dot_before_eof(&self) -> Dot { 573 | self.dot_before_eof 574 | } 575 | } 576 | -------------------------------------------------------------------------------- /src/item.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | 3 | pub type Dot = u32; 4 | pub type Origin = u32; 5 | 6 | #[derive(Clone, Copy, Debug)] 7 | pub struct Item { 8 | pub(in super) origin: Origin, 9 | pub(in super) dot: Dot, 10 | pub node: N, 11 | } 12 | 13 | #[derive(Clone, Copy, Debug)] 14 | pub struct CompletedItem { 15 | /// The dot position. 16 | pub(in super) dot: Dot, 17 | /// The origin location. 18 | /// It comes after `dot`, so that (origin, dot) can be compared in a single instruction 19 | /// on little-endian systems. 20 | pub(in super) origin: Origin, 21 | /// Left bocage node. 22 | pub left_node: N, 23 | /// Right bocage node. 24 | pub right_node: Option, 25 | } 26 | 27 | #[derive(Clone, Copy, Debug)] 28 | pub struct CompletedItemLinked { 29 | /// Left item idx. 30 | pub idx: u32, 31 | /// Right bocage node. 32 | pub node: Option, 33 | } 34 | 35 | impl PartialEq for Item { 36 | fn eq(&self, other: &Self) -> bool { 37 | (self.origin, self.dot) == (other.origin, other.dot) 38 | } 39 | } 40 | 41 | impl Eq for Item {} 42 | 43 | impl PartialOrd for Item { 44 | fn partial_cmp(&self, other: &Self) -> Option { 45 | Some(self.cmp(other)) 46 | } 47 | } 48 | 49 | impl Ord for Item { 50 | fn cmp(&self, other: &Self) -> Ordering { 51 | (self.origin, self.dot).cmp(&(other.origin, other.dot)) 52 | } 53 | } 54 | 55 | impl PartialEq for CompletedItem { 56 | fn eq(&self, other: &Self) -> bool { 57 | (self.origin, self.dot) == (other.origin, other.dot) 58 | } 59 | } 60 | 61 | impl Eq for CompletedItem {} 62 | 63 | impl PartialOrd for CompletedItem { 64 | fn partial_cmp(&self, other: &Self) -> Option { 65 | Some((self.origin, self.dot).cmp(&(other.origin, other.dot))) 66 | } 67 | } 68 | 69 | impl Ord for CompletedItem { 70 | fn cmp(&self, other: &Self) -> Ordering { 71 | (self.origin, self.dot).cmp(&(other.origin, other.dot)) 72 | } 73 | } 74 | 75 | impl Into> for CompletedItem { 76 | fn into(self) -> Item { 77 | Item { 78 | origin: self.origin, 79 | dot: self.dot, 80 | node: self.left_node, 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "cargo-clippy", allow(new_without_default_derive))] 2 | 3 | #[macro_use] 4 | extern crate log; 5 | extern crate env_logger; 6 | extern crate optional; 7 | extern crate ref_slice; 8 | extern crate bit_matrix; 9 | extern crate bit_vec; 10 | extern crate cfg; 11 | extern crate serde; 12 | #[macro_use] 13 | extern crate serde_derive; 14 | extern crate num; 15 | extern crate num_derive; 16 | 17 | pub mod debug; 18 | pub mod events; 19 | pub mod forest; 20 | pub mod grammar; 21 | pub mod item; 22 | pub mod memory_use; 23 | pub mod recognizer; 24 | pub mod binary_heap; 25 | -------------------------------------------------------------------------------- /src/memory_use.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | 3 | use bit_matrix::BitMatrix; 4 | use bit_vec::BitVec; 5 | 6 | use forest::node_handle::NodeHandle; 7 | use forest::{Bocage, CompactBocage, Forest, NullForest}; 8 | use grammar::InternalGrammar; 9 | use item::{CompletedItem, Item}; 10 | use recognizer::Recognizer; 11 | 12 | const ITEMS_PER_SET: usize = 16; 13 | 14 | pub trait MemoryUse { 15 | type Arg; 16 | 17 | fn memory_use(&self) -> usize; 18 | fn new_with_limit(arg: Self::Arg, memory_limit: usize) -> Self; 19 | } 20 | 21 | impl<'g, F> MemoryUse for Recognizer<'g, F> 22 | where 23 | F: MemoryUse + Forest, 24 | { 25 | type Arg = &'g InternalGrammar; 26 | 27 | fn memory_use(&self) -> usize { 28 | self.forest.memory_use() 29 | + self.predicted.memory_use() 30 | + self.medial.memory_use() 31 | + self.complete.memory_use() 32 | + self.indices.memory_use() 33 | } 34 | 35 | fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self { 36 | let forest_use_bytes = memory_limit * F::FOREST_BYTES_PER_RECOGNIZER_BYTE 37 | / (F::FOREST_BYTES_PER_RECOGNIZER_BYTE + 1); 38 | let complete_use = match memory_limit { 39 | 0..=1000 => 16, 40 | 1000..=500_000 => 32, 41 | 500_000..=2_000_000 => 64, 42 | _ => 128, 43 | }; 44 | let recognizer_use_bytes = memory_limit 45 | - forest_use_bytes 46 | - complete_use * mem::size_of::>(); 47 | let bytes_per_set = mem::size_of::() 48 | + (grammar.num_syms() + 31) / 32 * 4 49 | + ITEMS_PER_SET * mem::size_of::>(); 50 | let sets_use = recognizer_use_bytes / bytes_per_set; 51 | let mut recognizer = Recognizer { 52 | forest: F::new_with_limit(grammar, forest_use_bytes), 53 | grammar, 54 | // The initial location is 0. 55 | earleme: 0, 56 | // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0. 57 | indices: Vec::with_capacity(sets_use), 58 | current_medial_start: 0, 59 | // Reserve some capacity for vectors. 60 | predicted: BitMatrix::new(sets_use, grammar.num_syms()), 61 | medial: Vec::with_capacity(sets_use * ITEMS_PER_SET), 62 | complete: Vec::with_capacity(complete_use), 63 | lookahead_hint: None, 64 | }; 65 | recognizer.indices.push(0); 66 | recognizer.indices.push(0); 67 | recognizer.predict(grammar.start_sym()); 68 | recognizer 69 | } 70 | } 71 | 72 | impl<'g, F> Recognizer<'g, F> 73 | where 74 | F: MemoryUse + Forest, 75 | { 76 | #[inline] 77 | pub fn new_with_hint(grammar: &'g InternalGrammar, tokens: usize) -> Self { 78 | let forest_use_bytes = tokens * 16; 79 | let complete_use = match tokens { 80 | 0..=200 => 16, 81 | 200..=10_000 => 32, 82 | 10_000..=100_000 => 64, 83 | _ => 128, 84 | }; 85 | let mut recognizer = Recognizer { 86 | forest: F::new_with_limit(grammar, forest_use_bytes), 87 | grammar, 88 | // The initial location is 0. 89 | earleme: 0, 90 | // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0. 91 | indices: Vec::with_capacity(tokens + 1), 92 | current_medial_start: 0, 93 | // Reserve some capacity for vectors. 94 | predicted: BitMatrix::new(tokens + 1, grammar.num_syms()), 95 | medial: Vec::with_capacity(tokens * ITEMS_PER_SET), 96 | complete: Vec::with_capacity(complete_use), 97 | lookahead_hint: None, 98 | }; 99 | recognizer.indices.push(0); 100 | recognizer.indices.push(0); 101 | recognizer.predict(grammar.start_sym()); 102 | recognizer 103 | } 104 | } 105 | 106 | impl<'g> MemoryUse for Recognizer<'g, NullForest> { 107 | type Arg = &'g InternalGrammar; 108 | 109 | fn memory_use(&self) -> usize { 110 | self.forest.memory_use() 111 | + self.predicted.memory_use() 112 | + self.medial.memory_use() 113 | + self.complete.memory_use() 114 | + self.indices.memory_use() 115 | } 116 | 117 | fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self { 118 | let complete_use = match memory_limit { 119 | 0..=1000 => 16, 120 | 1000..=500_000 => 32, 121 | 500_000..=2_000_000 => 64, 122 | _ => 128, 123 | }; 124 | let recognizer_use_bytes = 125 | memory_limit - complete_use * mem::size_of::>(); 126 | let bytes_per_set = mem::size_of::() 127 | + (grammar.num_syms() + 31) / 32 * 4 128 | + ITEMS_PER_SET * mem::size_of::>(); 129 | let sets_use = recognizer_use_bytes / bytes_per_set; 130 | let mut recognizer = Recognizer { 131 | forest: NullForest, 132 | grammar, 133 | // The initial location is 0. 134 | earleme: 0, 135 | // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0. 136 | indices: Vec::with_capacity(sets_use), 137 | current_medial_start: 0, 138 | // Reserve some capacity for vectors. 139 | predicted: BitMatrix::new(sets_use, grammar.num_syms()), 140 | medial: Vec::with_capacity(sets_use * ITEMS_PER_SET), 141 | complete: Vec::with_capacity(complete_use), 142 | lookahead_hint: None, 143 | }; 144 | recognizer.indices.push(0); 145 | recognizer.indices.push(0); 146 | recognizer.predict(grammar.start_sym()); 147 | recognizer 148 | } 149 | } 150 | 151 | impl MemoryUse for Vec { 152 | type Arg = (); 153 | 154 | fn memory_use(&self) -> usize { 155 | self.capacity() * mem::size_of::() 156 | } 157 | 158 | fn new_with_limit(_arg: (), memory_limit: usize) -> Self { 159 | let capacity = memory_limit / mem::size_of::(); 160 | Self::with_capacity(capacity) 161 | } 162 | } 163 | 164 | impl MemoryUse for BitMatrix { 165 | type Arg = usize; 166 | 167 | fn memory_use(&self) -> usize { 168 | let (rows, columns) = self.size(); 169 | rows * ((columns + 31) / 32 * 4) 170 | } 171 | 172 | fn new_with_limit(num_columns: usize, memory_limit: usize) -> Self { 173 | let row_size = (num_columns + 31) / 32 * 4; 174 | let capacity = memory_limit / row_size; 175 | Self::new(capacity, num_columns) 176 | } 177 | } 178 | 179 | impl MemoryUse for BitVec { 180 | type Arg = (); 181 | 182 | fn memory_use(&self) -> usize { 183 | (self.capacity() + 31) / 32 * 4 184 | } 185 | 186 | fn new_with_limit(_arg: (), memory_limit: usize) -> Self { 187 | let capacity = memory_limit * 8; 188 | Self::with_capacity(capacity) 189 | } 190 | } 191 | 192 | impl MemoryUse for NullForest { 193 | type Arg = (); 194 | 195 | fn memory_use(&self) -> usize { 196 | 0 197 | } 198 | 199 | fn new_with_limit(_arg: (), _memory_limit: usize) -> Self { 200 | NullForest 201 | } 202 | } 203 | 204 | impl<'g> MemoryUse for Bocage<&'g InternalGrammar> { 205 | type Arg = &'g InternalGrammar; 206 | 207 | fn memory_use(&self) -> usize { 208 | self.graph.memory_use() + self.gc.liveness.memory_use() + self.gc.dfs.memory_use() 209 | } 210 | 211 | fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self { 212 | let dfs_size = match memory_limit { 213 | 0..=1000 => 8, 214 | 1000..=100_000 => 32, 215 | _ => 64, 216 | }; 217 | let remaining_use = memory_limit - dfs_size * std::mem::size_of::(); 218 | let bytes_per_node = mem::size_of::() as f32 + 1.0 / 8.0; 219 | let graph_size = (remaining_use as f32 / bytes_per_node) as usize; 220 | Bocage::with_capacities(grammar, graph_size, dfs_size) 221 | } 222 | } 223 | 224 | impl<'g> MemoryUse for CompactBocage<&'g InternalGrammar> { 225 | type Arg = &'g InternalGrammar; 226 | 227 | fn memory_use(&self) -> usize { 228 | self.graph.vec.memory_use() + self.gc.liveness.memory_use() + self.gc.dfs.memory_use() 229 | } 230 | 231 | fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self { 232 | let dfs_size = match memory_limit { 233 | 0..=1000 => 8, 234 | 1000..=100_000 => 32, 235 | _ => 64, 236 | }; 237 | let remaining_use = memory_limit - dfs_size * std::mem::size_of::(); 238 | let bytes_per_node = mem::size_of::() as f32 + 1.0 / 8.0; 239 | let graph_size = (remaining_use as f32 / bytes_per_node) as usize; 240 | CompactBocage::with_capacities(grammar, graph_size, dfs_size) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /src/recognizer.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | use std::ops::Range; 3 | 4 | use bit_matrix::BitMatrix; 5 | use bit_matrix::row::BitVecSlice; 6 | use cfg::*; 7 | 8 | use events::{MedialItems, PredictedSymbols}; 9 | use forest::{Forest, NullForest}; 10 | use grammar::InternalGrammar; 11 | use item::{CompletedItem, CompletedItemLinked, Item, Origin}; 12 | // use policy::{PerformancePolicy, NullPerformancePolicy}; 13 | 14 | /// The recognizer implements the Earley algorithm. It parses the given input according 15 | /// to the `grammar`. The parse result is constructed inside the `forest`. 16 | /// 17 | /// To save memory, it only retains those parts of the Earley table that may be useful 18 | /// in the future. 19 | pub struct Recognizer<'g, F = NullForest> 20 | where 21 | F: Forest, 22 | { 23 | // The forest. 24 | pub forest: F, 25 | // The grammar. 26 | pub grammar: &'g InternalGrammar, 27 | // The policy. 28 | // policy: P, 29 | 30 | // Chart's items. 31 | 32 | // Predicted items are stored in a bit matrix. The bit matrix has a row for every Earley set. 33 | // 34 | // Length of `predicted` is earleme + 1, so that earleme points to the last 35 | pub(super) predicted: BitMatrix, 36 | 37 | // Medial items. 38 | // 39 | // N.B. This structure could be moved into its own module. 40 | pub(super) medial: Vec>, 41 | // Gearley's secret sauce: we have a binary heap for online sorting. 42 | // 43 | // Completed items are stored for the latest Earley set. 44 | // They are ordered by (origin, dot), starting with highest 45 | // origin and dot. The creation of a completed item can only be caused 46 | // by a scan or a completion of an item that has a higher (origin, dot) 47 | // pair value. 48 | pub(super) complete: Vec>, 49 | 50 | // Chart's indices. They point to the beginning of each Earley set. 51 | // 52 | // Length of `indices` is `earleme` + 2, so that earleme points to 53 | // the beginning of the range of indices for the last range. 54 | pub(super) indices: Vec, 55 | // Index that points to the beginning of the latest set. Equivalent to 56 | // the last element of `indices`. 57 | pub(super) current_medial_start: usize, 58 | 59 | // The input location. 60 | pub(super) earleme: usize, 61 | 62 | pub(super) lookahead_hint: Option>, 63 | } 64 | 65 | impl<'g, F> Recognizer<'g, F> 66 | where 67 | F: Forest, 68 | { 69 | /// Creates a new recognizer for a given grammar and forest. The recognizer has an initial 70 | /// Earley set that predicts the grammar's start symbol. 71 | pub fn new(grammar: &'g InternalGrammar, forest: F) -> Recognizer<'g, F> { 72 | let mut recognizer = Recognizer { 73 | forest, 74 | grammar, 75 | // The initial location is 0. 76 | earleme: 0, 77 | // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0. 78 | indices: vec![0, 0], 79 | current_medial_start: 0, 80 | // Reserve some capacity for vectors. 81 | predicted: BitMatrix::new(8, grammar.num_syms()), 82 | medial: Vec::with_capacity(256), 83 | complete: Vec::with_capacity(32), 84 | lookahead_hint: None, 85 | }; 86 | recognizer.predict(grammar.start_sym()); 87 | recognizer 88 | } 89 | 90 | /// Makes the current Earley set predict a given symbol. 91 | pub fn predict(&mut self, symbol: Symbol) { 92 | self.predicted[self.earleme].predict(symbol, self.grammar.predict(symbol)); 93 | } 94 | 95 | pub fn begin_earleme(&mut self) { 96 | // nothing to do 97 | } 98 | 99 | /// Reads a token. Creates a leaf bocage node with the given value. After reading one or more 100 | /// tokens, the parse can be advanced. 101 | pub fn scan(&mut self, symbol: Symbol, value: F::LeafValue) { 102 | // This method is a part of the scan pass. 103 | if let Some(internal) = self.grammar.to_internal(symbol) { 104 | let earleme = self.earleme as Origin; 105 | // Add a leaf node to the forest with the given value. 106 | let node = self.forest.leaf(symbol, earleme + 1, value); 107 | self.complete(earleme, internal, node); 108 | } 109 | } 110 | 111 | #[inline] 112 | pub fn lookahead_hint(&mut self, lookahead: Option) { 113 | let to_internal = |sym| self.grammar.to_internal(sym).unwrap(); 114 | self.lookahead_hint = Some(lookahead.map(to_internal)); 115 | } 116 | 117 | /// Advances the parse. Calling this method may set the finished node, which can be accessed 118 | /// through the `finished_node` method. 119 | pub fn end_earleme(&mut self) -> bool { 120 | if self.is_exhausted() { 121 | false 122 | } else { 123 | // Completion pass, which saves successful parses. 124 | self.complete_all_sums_entirely(); 125 | // Do the rest. 126 | self.advance_without_completion(); 127 | true 128 | } 129 | } 130 | 131 | /// Advances the parse. Omits the completion pass, which should be done through 132 | /// the `completions` method. Keep in mind that calling this method may not set 133 | /// the finished node, which should be tracked externally. 134 | pub fn advance_without_completion(&mut self) { 135 | self.sort_medial_items(); 136 | self.remove_unary_medial_items(); 137 | self.remove_unreachable_sets(); 138 | self.earleme += 1; 139 | // `earleme` is now at least 1. 140 | // Prediction pass. 141 | self.prediction_pass(); 142 | // Store the index. 143 | self.current_medial_start = self.medial.len(); 144 | self.indices.push(self.current_medial_start); 145 | } 146 | 147 | /// Checks whether the recognizer is exhausted. The recognizer is exhausted when it can't accept 148 | /// more input. 149 | #[inline] 150 | pub fn is_exhausted(&self) -> bool { 151 | self.medial.len() == self.current_medial_start && self.complete.is_empty() 152 | } 153 | 154 | /// Sorts medial items with deduplication. 155 | fn sort_medial_items(&mut self) { 156 | let grammar = &self.grammar; 157 | // Build index by postdot 158 | // These medial positions themselves are sorted by postdot symbol. 159 | self.medial[self.current_medial_start..].sort_unstable_by(|a, b| { 160 | (grammar.get_rhs1_cmp(a.dot), a.dot, a.origin).cmp(&( 161 | grammar.get_rhs1_cmp(b.dot), 162 | b.dot, 163 | b.origin, 164 | )) 165 | }); 166 | } 167 | 168 | fn remove_unary_medial_items(&mut self) { 169 | while let Some(&item) = self.medial.last() { 170 | if self.grammar.get_rhs1(item.dot).is_some() { 171 | break; 172 | } 173 | self.medial.pop(); 174 | } 175 | } 176 | 177 | fn remove_unreachable_sets(&mut self) { 178 | let origin = |item: &Item| item.origin as usize; 179 | let max_origin = self.medial[self.current_medial_start..] 180 | .iter() 181 | .map(origin) 182 | .max() 183 | .unwrap_or(self.earleme); 184 | let diff = self.earleme - max_origin; 185 | if diff <= 1 { 186 | return; 187 | } 188 | // | 0 | 1 | 2 | 3 | 189 | // ^ current_medial_start 190 | // _________diff = 2 191 | // ____drop = 1 192 | // ^ self.earleme = 2 193 | // ^ m = 0 194 | // | 0 | 1 | 2 | 195 | let drop = diff - 1; 196 | let new_medial_start = self.indices[self.indices.len() - 1 - drop]; 197 | self.indices.truncate(self.indices.len() - drop); 198 | let current_medial_length = self.medial.len() - self.current_medial_start; 199 | for i in 0..current_medial_length { 200 | self.medial[new_medial_start as usize + i] = self.medial[self.current_medial_start + i]; 201 | } 202 | self.medial 203 | .truncate(new_medial_start as usize + current_medial_length); 204 | self.current_medial_start = new_medial_start as usize; 205 | self.earleme -= drop; 206 | self.predicted.truncate(self.earleme + 1); 207 | for dst in self.predicted[self.earleme].iter_mut() { 208 | *dst = 0; 209 | } 210 | } 211 | 212 | /// Performs the prediction pass. 213 | fn prediction_pass(&mut self) { 214 | // Add a row to the matrix. 215 | self.predicted.grow(1, false); 216 | // Iterate through medial items in the current set. 217 | let iter = self.medial[self.current_medial_start..].iter(); 218 | // For each medial item in the current set, predict its postdot symbol. 219 | let row = &mut self.predicted[self.earleme]; 220 | for ei in iter { 221 | let postdot = self.grammar.get_rhs1(ei.dot).unwrap(); 222 | row.predict(postdot, self.grammar.predict(postdot)); 223 | } 224 | } 225 | 226 | /// Complete items. 227 | pub fn complete(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) { 228 | debug_assert!(sym != self.grammar.eof()); 229 | if self.predicted[set_id as usize].get(sym.usize()) { 230 | self.complete_medial_items(set_id, sym, rhs_link); 231 | self.complete_predictions(set_id, sym, rhs_link); 232 | } 233 | } 234 | 235 | /// Complete medial items in a given Earley set. 236 | fn complete_medial_items(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) { 237 | // Iterate through medial items to complete them. 238 | let set_range = self.medial_item_set_range(set_id, sym); 239 | if let Some(hint) = self.lookahead_hint { 240 | for idx in set_range { 241 | // New completed item. 242 | // from A ::= B • C 243 | // to A ::= B C • 244 | // 245 | // We might link to medial items by index, here. 246 | let dot = self.medial[idx].dot; 247 | if !self.grammar.can_follow(self.grammar.get_lhs(dot), hint) { 248 | continue; 249 | } 250 | self.heap_push_linked(CompletedItemLinked { 251 | idx: idx as u32, 252 | node: Some(rhs_link), 253 | }); 254 | } 255 | } else { 256 | for idx in set_range { 257 | // New completed item. 258 | // from A ::= B • C 259 | // to A ::= B C • 260 | // 261 | // We might link to medial items by index, here. 262 | self.heap_push_linked(CompletedItemLinked { 263 | idx: idx as u32, 264 | node: Some(rhs_link), 265 | }); 266 | } 267 | } 268 | } 269 | 270 | fn medial_item_set_range(&mut self, set_id: Origin, sym: Symbol) -> Range { 271 | // Huh, can we reduce complexity here? 272 | let outer_start = self.indices[set_id as usize]; 273 | let outer_end = self.indices[set_id as usize + 1]; 274 | let specific_set = &self.medial[outer_start..outer_end]; 275 | 276 | let inner_start = if specific_set.len() >= 16 { 277 | // When the set has 16 or more items, we use binary search to narrow down the range of 278 | // items. 279 | let set_idx = specific_set.binary_search_by(|ei| { 280 | (self.grammar.get_rhs1(ei.dot), Ordering::Greater).cmp(&(Some(sym), Ordering::Less)) 281 | }); 282 | match set_idx { 283 | Ok(idx) | Err(idx) => idx, 284 | } 285 | } else { 286 | specific_set 287 | .iter() 288 | .take_while(|ei| self.grammar.get_rhs1(ei.dot).unwrap() < sym) 289 | .count() 290 | }; 291 | 292 | // The range contains items that have the same RHS1 symbol. 293 | let inner_end = specific_set[inner_start..] 294 | .iter() 295 | .take_while(|ei| self.grammar.get_rhs1(ei.dot) == Some(sym)) 296 | .count(); 297 | outer_start + inner_start..outer_start + inner_start + inner_end 298 | } 299 | 300 | /// Complete predicted items that have a common postdot symbol. 301 | fn complete_predictions(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) { 302 | // New item, either completed or pre-terminal. Ensure uniqueness. 303 | // from A ::= • B c 304 | // to A ::= B • c 305 | self.complete_unary_predictions(set_id, sym, rhs_link); 306 | self.complete_binary_predictions(set_id, sym, rhs_link); 307 | } 308 | 309 | /// Complete an item if predicted at rhs0. 310 | fn complete_unary_predictions(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) { 311 | for trans in self.grammar.unary_completions(sym) { 312 | if self.predicted[set_id as usize].get(trans.symbol.usize()) { 313 | // No checks for uniqueness, because `medial` will be deduplicated. 314 | // from A ::= • B 315 | // to A ::= B • 316 | // --- 317 | // We could push to `medial` as well and link from `complete` to `medial`. 318 | 319 | if let Some(hint) = self.lookahead_hint { 320 | if !self 321 | .grammar 322 | .can_follow(self.grammar.get_lhs(trans.dot), hint) 323 | { 324 | continue; 325 | } 326 | } 327 | self.heap_push(CompletedItem { 328 | origin: set_id, 329 | dot: trans.dot, 330 | left_node: rhs_link, 331 | right_node: None, 332 | }); 333 | } 334 | } 335 | } 336 | 337 | /// Complete an item if predicted at rhs1. 338 | fn complete_binary_predictions(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) { 339 | for trans in self.grammar.binary_completions(sym) { 340 | if self.predicted[set_id as usize].get(trans.symbol.usize()) { 341 | if let Some(hint) = self.lookahead_hint { 342 | if !self 343 | .grammar 344 | .first(self.grammar.get_rhs1(trans.dot).unwrap(), hint) 345 | { 346 | continue; 347 | } 348 | } 349 | // No checks for uniqueness, because `medial` will be deduplicated. 350 | // from A ::= • B C 351 | // to A ::= B • C 352 | // Where C is terminal or nonterminal. 353 | 354 | self.medial.push(Item { 355 | origin: set_id, 356 | dot: trans.dot, 357 | node: rhs_link, 358 | }); 359 | } 360 | } 361 | } 362 | 363 | /// Resets the recognizer to its initial state by removing all contents. 364 | pub fn reset(&mut self) { 365 | self.earleme = 0; 366 | self.predict(self.grammar.start_sym()); 367 | // Indices reset to [0, 0]. 368 | self.indices.clear(); 369 | self.indices.push(0); 370 | self.indices.push(0); 371 | // Current medial start reset to 0. 372 | self.current_medial_start = 0; 373 | // Remove items. 374 | self.medial.clear(); 375 | self.complete.clear(); 376 | } 377 | 378 | // Finished node access. 379 | 380 | /// Checks whether there is a valid parse that ends at the current 381 | /// position. 382 | pub fn is_finished(&self) -> bool { 383 | self.finished_node().is_some() 384 | } 385 | 386 | /// Retrieves the bocage node that represents the parse that has finished at the current 387 | /// location. 388 | /// 389 | /// # Panics 390 | /// 391 | /// Panics when the parse has not finished at the current location. 392 | pub fn finished_node(&self) -> Option { 393 | if self.grammar.has_trivial_derivation() && self.earleme == 0 { 394 | Some(self.forest.nulling(self.grammar.externalized_start_sym())) 395 | } else { 396 | let has_dot_before_eof = |item: &&Item<_>| item.dot == self.grammar.dot_before_eof(); 397 | let item_node = |item: &Item<_>| item.node; 398 | self.medial.last().filter(has_dot_before_eof).map(item_node) 399 | } 400 | } 401 | 402 | // Event access. 403 | 404 | /// Accesses predicted symbols. 405 | pub fn predicted_symbols(&self) -> PredictedSymbols { 406 | let earleme = self.earleme(); 407 | PredictedSymbols { 408 | iter: self.predicted.iter_row(earleme), 409 | idx: 0, 410 | } 411 | } 412 | 413 | /// Accesses medial items. 414 | pub fn medial_items(&self) -> MedialItems { 415 | let indices_len = self.indices.len(); 416 | // Next-to-last index, which points to the beginning of the set before the current set. 417 | // The current set is empty. 418 | let items_start = self.indices[indices_len - 2]; 419 | MedialItems { 420 | iter: self.medial[items_start..].iter(), 421 | } 422 | } 423 | 424 | // Accessors. 425 | 426 | /// Returns the current location number. 427 | pub fn earleme(&self) -> usize { 428 | self.earleme 429 | } 430 | 431 | // Completion 432 | 433 | /// Performs the completion pass. 434 | pub fn complete_all_sums_entirely(&mut self) { 435 | while let Some(mut completion) = self.next_sum() { 436 | // Include all items in the completion. 437 | completion.complete_entire_sum(); 438 | } 439 | self.lookahead_hint = None; 440 | } 441 | 442 | /// Allows iteration through groups of completions that have unique symbol and origin. 443 | pub fn next_sum<'r>(&'r mut self) -> Option> { 444 | if let Some(ei) = self.heap_peek() { 445 | let lhs_sym = self.grammar.get_lhs(ei.dot); 446 | Some(CompleteSum { 447 | origin: ei.origin, 448 | lhs_sym, 449 | recognizer: self, 450 | }) 451 | } else { 452 | None 453 | } 454 | } 455 | } 456 | 457 | /// A group of completed items. 458 | pub struct CompleteSum<'g, 'r, F> 459 | where 460 | F: Forest, 461 | { 462 | /// The origin location of this completion. 463 | origin: Origin, 464 | /// The symbol of this completion. 465 | lhs_sym: Symbol, 466 | /// The recognizer. 467 | recognizer: &'r mut Recognizer<'g, F>, 468 | } 469 | 470 | impl<'g, 'r, F> CompleteSum<'g, 'r, F> 471 | where 472 | F: Forest, 473 | 'g: 'r, 474 | { 475 | /// Completes all items. 476 | pub fn complete_entire_sum(&mut self) { 477 | self.recognizer.forest.begin_sum(); 478 | // For each item, include it in the completion. 479 | while let Some(item) = self.next_summand() { 480 | self.push_summand(item); 481 | } 482 | // Use all items for completion. 483 | self.complete_sum(); 484 | } 485 | 486 | /// Skips all items. 487 | pub fn skip_entire_sum(&mut self) { 488 | // For each item, include it in the completion. 489 | while let Some(_) = self.next_summand() {} 490 | } 491 | 492 | /// Allows iteration through completed items. 493 | #[inline] 494 | pub fn next_summand(&mut self) -> Option> { 495 | if let Some(completion) = self.recognizer.heap_peek() { 496 | let completion_lhs_sym = self.recognizer.grammar.get_lhs(completion.dot); 497 | if self.origin == completion.origin && self.lhs_sym == completion_lhs_sym { 498 | self.recognizer.heap_pop(); 499 | Some(completion) 500 | } else { 501 | None 502 | } 503 | } else { 504 | None 505 | } 506 | } 507 | 508 | /// Includes an item in the completion. 509 | #[inline] 510 | pub fn push_summand(&mut self, completed_item: CompletedItem) { 511 | self.recognizer.forest.push_summand(completed_item); 512 | } 513 | 514 | /// Uses the completion to complete items in the recognizer. 515 | #[inline] 516 | pub fn complete_sum(&mut self) -> F::NodeRef { 517 | let node = self.recognizer.forest.sum(self.lhs_sym, self.origin); 518 | self.recognizer.complete(self.origin, self.lhs_sym, node); 519 | node 520 | } 521 | 522 | /// Returns the origin location of this completion. 523 | #[inline] 524 | pub fn origin(&self) -> Origin { 525 | self.origin 526 | } 527 | 528 | /// Returns the symbol of this completion. 529 | #[inline] 530 | pub fn symbol(&self) -> Symbol { 531 | self.lhs_sym 532 | } 533 | } 534 | 535 | trait Predict { 536 | fn predict(&mut self, sym: Symbol, source: &BitVecSlice); 537 | } 538 | 539 | impl Predict for BitVecSlice { 540 | fn predict(&mut self, sym: Symbol, source: &BitVecSlice) { 541 | if !self[sym.usize()] { 542 | // The source in the prediction matrix is the row that corresponds to the predicted 543 | // symbol. 544 | // 545 | // The destination in `predicted` is now the `self` that corresponds to the current 546 | // location. 547 | for (dst, &src) in self.iter_mut().zip(source.iter()) { 548 | *dst |= src; 549 | } 550 | } 551 | } 552 | } 553 | -------------------------------------------------------------------------------- /tests/grammars/ambiguous_arith.rs: -------------------------------------------------------------------------------- 1 | use cfg::Symbol; 2 | use cfg::earley::Grammar; 3 | 4 | pub fn grammar() -> Grammar { 5 | let mut bnf = Grammar::new(); 6 | let (expr, op, num, plus, minus, mul, div) = bnf.sym(); 7 | bnf.rule(expr).rhs([expr, op, expr]) 8 | .rhs([num]); 9 | bnf.rule(op).rhs([plus]) 10 | .rhs([minus]) 11 | .rhs([mul]) 12 | .rhs([div]); 13 | 14 | for _ in 0..10 { 15 | let sym = bnf.sym(); 16 | bnf.rule(num).rhs([sym, num]) 17 | .rhs([sym]); 18 | } 19 | bnf.set_start(expr); 20 | bnf 21 | } 22 | 23 | pub fn leaf(sym: Symbol) -> i32 { 24 | [0, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9][sym.usize()] 25 | } 26 | 27 | pub fn rule(rule: u32, args: &[&i32]) -> i32 { 28 | let a0 = args.get(0).map(|f| **f).unwrap_or(!0); 29 | let a1 = args.get(1).map(|f| **f).unwrap_or(!0); 30 | let a2 = args.get(2).map(|f| **f).unwrap_or(!0); 31 | 32 | match rule { 33 | 0 => { 34 | match a1 { 35 | 0 => a0 + a2, 36 | 1 => a0 - a2, 37 | 2 => a0 * a2, 38 | 3 => a0 / a2, 39 | _ => unreachable!(), 40 | } 41 | } 42 | 1 => a0, 43 | 44 | 2 => 0, 45 | 3 => 1, 46 | 4 => 2, 47 | 5 => 3, 48 | 49 | 6 | 8 | 10 | 12 | 14 | 16 | 18 | 20 | 22 | 24 => a0 * 10 + a1, 50 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 => a0, 51 | _ => unreachable!(), 52 | } 53 | } 54 | 55 | #[macro_export] 56 | macro_rules! ambiguous_arith_rhs_elem { 57 | ('+') => (0); 58 | ('-') => (1); 59 | ('*') => (2); 60 | ('/') => (3); 61 | ('0') => (4); 62 | ('1') => (5); 63 | ('2') => (6); 64 | ('3') => (7); 65 | ('4') => (8); 66 | ('5') => (9); 67 | ('6') => (10); 68 | ('7') => (11); 69 | ('8') => (12); 70 | ('9') => (13); 71 | ($e:expr) => ($e); 72 | } 73 | 74 | #[macro_export] 75 | macro_rules! ambiguous_arith { 76 | ($($e:tt)+) => ( 77 | &[$(ambiguous_arith_rhs_elem!($e) + 3,)+] 78 | ) 79 | } 80 | -------------------------------------------------------------------------------- /tests/grammars/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | #[macro_use] 4 | pub mod ambiguous_arith; 5 | #[macro_use] 6 | pub mod precedenced_arith; 7 | -------------------------------------------------------------------------------- /tests/grammars/precedenced_arith.rs: -------------------------------------------------------------------------------- 1 | use cfg::Symbol; 2 | use cfg::earley::Grammar; 3 | 4 | pub fn grammar() -> Grammar { 5 | let mut bnf = Grammar::new(); 6 | let (sum, product, factor, number, plus, minus, mul, div, lparen, rparen) = bnf.sym(); 7 | bnf.rule(sum).rhs([sum, plus, product]) 8 | .rhs([sum, minus, product]) 9 | .rhs([product]) 10 | .rule(product).rhs([product, mul, factor]) 11 | .rhs([product, div, factor]) 12 | .rhs([factor]) 13 | .rule(factor).rhs([lparen, sum, rparen]) 14 | .rhs([number]); 15 | for _ in 0..10 { 16 | let sym = bnf.sym(); 17 | bnf.rule(number).rhs(&[sym, number]) 18 | .rhs(&[sym]); 19 | } 20 | bnf.set_start(sum); 21 | bnf 22 | } 23 | 24 | pub fn leaf(sym: Symbol) -> i32 { 25 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9][sym.usize()] 26 | } 27 | 28 | pub fn rule(rule: u32, args: &[&i32]) -> i32 { 29 | let a0 = args.get(0).map(|f| **f).unwrap_or(!0); 30 | let a1 = args.get(1).map(|f| **f).unwrap_or(!0); 31 | let a2 = args.get(2).map(|f| **f).unwrap_or(!0); 32 | match rule { 33 | 0 => a0 + a2, 34 | 1 => a0 - a2, 35 | 2 => a0, 36 | 37 | 3 => a0 * a2, 38 | 4 => a0 / a2, 39 | 5 => a0, 40 | 41 | 6 => a1, 42 | 7 => a0, 43 | 44 | 8 | 10 | 12 | 14 | 16 | 18 | 20 | 22 | 24 | 26 => a0 * 10 + a1, 45 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 => a0, 46 | _ => unreachable!(), 47 | } 48 | } 49 | 50 | #[macro_export] 51 | macro_rules! precedenced_arith_rhs_elem { 52 | ('+') => (0); 53 | ('-') => (1); 54 | ('*') => (2); 55 | ('/') => (3); 56 | ('(') => (4); 57 | (')') => (5); 58 | ('0') => (6); 59 | ('1') => (7); 60 | ('2') => (8); 61 | ('3') => (9); 62 | ('4') => (10); 63 | ('5') => (11); 64 | ('6') => (12); 65 | ('7') => (13); 66 | ('8') => (14); 67 | ('9') => (15); 68 | ($e:expr) => ($e); 69 | } 70 | 71 | #[macro_export] 72 | macro_rules! precedenced_arith { 73 | ($($e:tt)+) => ( 74 | &[$(precedenced_arith_rhs_elem!($e) + 4,)+] 75 | ) 76 | } 77 | -------------------------------------------------------------------------------- /tests/helpers/cartesian_product.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | pub struct Factor<'a, V: 'a> { 4 | start: *const V, 5 | end: *const V, 6 | marker: PhantomData<&'a V>, 7 | } 8 | 9 | impl<'a, V> Factor<'a, V> { 10 | fn new(slice: &'a [V]) -> Self { 11 | let start = slice.as_ptr(); 12 | unsafe { 13 | Factor { 14 | start, 15 | end: start.offset(slice.len() as isize), 16 | marker: PhantomData, 17 | } 18 | } 19 | } 20 | 21 | fn advance(&mut self, ptr: &mut &'a V) -> bool { 22 | unsafe { 23 | *ptr = &*(*ptr as *const V).offset(1); 24 | if *ptr as *const _ == self.end { 25 | *ptr = &*self.start; 26 | true 27 | } else { 28 | false 29 | } 30 | } 31 | } 32 | } 33 | 34 | pub struct CartesianProduct<'a, V: 'a> { 35 | ptrs: Vec<&'a V>, 36 | ranges: Vec>, 37 | } 38 | 39 | impl<'a, V> CartesianProduct<'a, V> { 40 | pub fn new() -> Self { 41 | CartesianProduct { 42 | ptrs: Vec::with_capacity(8), 43 | ranges: Vec::with_capacity(8), 44 | } 45 | } 46 | 47 | pub fn clear(&mut self) { 48 | self.ranges.clear(); 49 | self.ptrs.clear(); 50 | } 51 | 52 | /// Multiplies the cartesian product by a slice. 53 | pub fn push(&mut self, slice: &'a [V]) { 54 | self.ranges.push(Factor::new(slice)); 55 | unsafe { 56 | self.ptrs 57 | .push(self.ranges.last().map(|factor| &*factor.start).unwrap()); 58 | } 59 | } 60 | 61 | /// Multiplies the cartesian product by an iterator. 62 | pub fn extend(&mut self, product: I) 63 | where 64 | I: Iterator, 65 | { 66 | self.ranges.extend(product.map(|slice| Factor::new(slice))); 67 | unsafe { 68 | // FIXME wrong range 69 | self.ptrs 70 | .extend(self.ranges.iter().map(|factor| &*factor.start)); 71 | } 72 | } 73 | 74 | pub fn as_slice(&self) -> &[&'a V] { 75 | &self.ptrs[..] 76 | } 77 | 78 | pub fn advance(&mut self) -> bool { 79 | for (ptr, factor) in self.ptrs.iter_mut().zip(&mut self.ranges) { 80 | if !factor.advance(ptr) { 81 | return true; 82 | } 83 | } 84 | false 85 | } 86 | } 87 | 88 | #[test] 89 | fn test_cartesian_product() { 90 | let (a, b, c) = ([1, 2, 3], [1, 2], [1, 2, 3]); 91 | let factors: &[&[u32]] = &[&a[..], &b[..], &c[..]]; 92 | let mut cartesian_product = CartesianProduct::new(); 93 | cartesian_product.clear(); 94 | cartesian_product.extend(factors.iter().cloned()); 95 | let mut result = vec![]; 96 | loop { 97 | { 98 | let val = cartesian_product.as_slice(); 99 | result.push(*val[0] * 100 + *val[1] * 10 + *val[2]); 100 | }; 101 | if !cartesian_product.advance() { 102 | break; 103 | } 104 | } 105 | assert_eq!( 106 | &result[..], 107 | &[ 108 | 111, 211, 311, 121, 221, 321, 112, 212, 312, 122, 222, 322, 113, 213, 313, 123, 223, 109 | 323, 110 | ] 111 | ); 112 | } 113 | -------------------------------------------------------------------------------- /tests/helpers/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | mod cartesian_product; 4 | pub mod parse; 5 | mod simple_compact_evaluator; 6 | mod simple_evaluator; 7 | 8 | pub use self::parse::Parse; 9 | pub use self::simple_compact_evaluator::SimpleCompactEvaluator; 10 | pub use self::simple_evaluator::SimpleEvaluator; 11 | -------------------------------------------------------------------------------- /tests/helpers/parse.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::fmt::Debug; 3 | 4 | use cfg::Symbol; 5 | use gearley::forest::bocage::order::NullOrder; 6 | use gearley::forest::compact_bocage::order::NullOrder as CompactNullOrder; 7 | use gearley::forest::{Bocage, CompactBocage, NullForest}; 8 | use gearley::grammar::InternalGrammar; 9 | use gearley::recognizer::Recognizer; 10 | 11 | pub trait Parse { 12 | fn parse(&mut self, tokens: &[u32]) -> bool; 13 | } 14 | 15 | impl<'g, G> Parse for Recognizer<'g, Bocage> 16 | where 17 | Self: Debug, 18 | G: Borrow, 19 | { 20 | #[inline] 21 | fn parse(&mut self, tokens: &[u32]) -> bool { 22 | let mut iter = tokens.iter().enumerate().peekable(); 23 | while let Some((i, &token)) = iter.next() { 24 | self.begin_earleme(); 25 | trace!("before pass 1 {:?}", &*self); 26 | self.scan(Symbol::from(token), i as u32); 27 | trace!("before pass 2 {:?}", &*self); 28 | self.lookahead_hint(iter.peek().map(|(_i, &t)| Symbol::from(t))); 29 | assert!(self.end_earleme(), "failed to parse after {}@{}", token, i); 30 | } 31 | trace!("finished {:?}", &*self); 32 | 33 | if self.is_finished() { 34 | self.forest 35 | .mark_alive(self.finished_node().unwrap(), NullOrder::new()); 36 | } 37 | self.is_finished() 38 | } 39 | } 40 | 41 | impl<'g, G> Parse for Recognizer<'g, CompactBocage> 42 | where 43 | Self: Debug, 44 | G: Borrow, 45 | { 46 | #[inline] 47 | fn parse(&mut self, tokens: &[u32]) -> bool { 48 | let mut iter = tokens.iter().enumerate().peekable(); 49 | while let Some((i, &token)) = iter.next() { 50 | self.begin_earleme(); 51 | trace!("before pass 1 {:?}", &*self); 52 | self.scan(Symbol::from(token), i as u32); 53 | trace!("before pass 2 {:?}", &*self); 54 | self.lookahead_hint(iter.peek().map(|(_i, &t)| Symbol::from(t))); 55 | assert!(self.end_earleme(), "failed to parse after {}@{}", token, i); 56 | } 57 | trace!("finished {:?}", &*self); 58 | 59 | if self.is_finished() { 60 | self.forest 61 | .mark_alive(self.finished_node().unwrap(), CompactNullOrder::new()); 62 | } 63 | self.is_finished() 64 | } 65 | } 66 | 67 | impl<'g> Parse for Recognizer<'g, NullForest> 68 | where 69 | Self: Debug, 70 | { 71 | #[inline] 72 | fn parse(&mut self, tokens: &[u32]) -> bool { 73 | for &token in tokens.iter() { 74 | self.begin_earleme(); 75 | trace!("before pass 1 {:?}", &*self); 76 | self.scan(Symbol::from(token), ()); 77 | trace!("before pass 2 {:?}", &*self); 78 | assert!(self.end_earleme()); 79 | } 80 | trace!("finished {:?}", &*self); 81 | 82 | self.is_finished() 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /tests/helpers/simple_compact_evaluator.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::collections::BTreeMap; 3 | use std::fmt; 4 | use std::mem; 5 | 6 | use cfg::Symbol; 7 | 8 | use gearley::forest::compact_bocage::traverse::{LeafHandle, NullingHandle, SumHandle, Traverse}; 9 | use gearley::forest::node_handle::NodeHandle; 10 | 11 | use gearley::grammar::InternalGrammar; 12 | 13 | use super::cartesian_product::CartesianProduct; 14 | 15 | pub struct SimpleCompactEvaluator { 16 | values: Vec, 17 | evaluated: BTreeMap>, 18 | leaf: F, 19 | rule: G, 20 | null: H, 21 | } 22 | 23 | impl SimpleCompactEvaluator 24 | where 25 | FLeaf: FnMut(Symbol) -> V, 26 | FRule: FnMut(u32, &[&V]) -> V, 27 | FNull: for<'r> FnMut(Symbol, &'r mut Vec), 28 | V: fmt::Debug + Clone, 29 | { 30 | pub fn new(leaf: FLeaf, rule: FRule, null: FNull) -> Self { 31 | SimpleCompactEvaluator { 32 | values: vec![], 33 | evaluated: BTreeMap::new(), 34 | leaf, 35 | rule, 36 | null, 37 | } 38 | } 39 | 40 | pub fn traverse<'f, G>(&mut self, traverse: &mut Traverse<'f, G>, root: NodeHandle) -> Vec 41 | where 42 | G: Borrow, 43 | { 44 | while let Some(mut item) = traverse.next_node() { 45 | match &mut item.item { 46 | &mut SumHandle(ref mut products) => { 47 | while let Some(product) = products.next_product() { 48 | let mut cartesian_product = CartesianProduct::new(); 49 | for &(_sym, handle) in product.factors { 50 | cartesian_product.push(&self.evaluated[&handle][..]); 51 | } 52 | loop { 53 | let v = (self.rule)(product.action, cartesian_product.as_slice()); 54 | self.values.push(v); 55 | if !cartesian_product.advance() { 56 | break; 57 | } 58 | } 59 | } 60 | } 61 | &mut NullingHandle => { 62 | (self.null)(item.symbol, &mut self.values); 63 | } 64 | &mut LeafHandle => { 65 | let v = (self.leaf)(item.symbol); 66 | self.values.push(v); 67 | } 68 | } 69 | self.evaluated 70 | .insert(item.handle(), mem::replace(&mut self.values, vec![])); 71 | item.end_evaluation(); 72 | } 73 | self.evaluated[&root].clone() 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /tests/helpers/simple_evaluator.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::fmt; 3 | use std::mem; 4 | 5 | use cfg::Symbol; 6 | 7 | use gearley::forest::bocage::traverse::{LeafHandle, NullingHandle, SumHandle, Traverse}; 8 | use gearley::forest::node_handle::NodeHandle; 9 | 10 | use gearley::grammar::InternalGrammar; 11 | 12 | use super::cartesian_product::CartesianProduct; 13 | 14 | pub struct SimpleEvaluator { 15 | values: Vec, 16 | evaluated: Vec>, 17 | leaf: F, 18 | rule: G, 19 | null: H, 20 | } 21 | 22 | impl SimpleEvaluator 23 | where 24 | FLeaf: FnMut(Symbol) -> V, 25 | FRule: FnMut(u32, &[&V]) -> V, 26 | FNull: for<'r> FnMut(Symbol, &'r mut Vec), 27 | V: fmt::Debug, 28 | { 29 | pub fn new(leaf: FLeaf, rule: FRule, null: FNull) -> Self { 30 | SimpleEvaluator { 31 | values: vec![], 32 | evaluated: vec![], 33 | leaf, 34 | rule, 35 | null, 36 | } 37 | } 38 | 39 | pub fn traverse<'f, G>(&mut self, traverse: &mut Traverse<'f, G>, _root: NodeHandle) -> Vec 40 | where 41 | G: Borrow, 42 | { 43 | while let Some(mut item) = traverse.next_node() { 44 | match &mut item.item { 45 | &mut SumHandle(ref mut products) => { 46 | while let Some(product) = products.next_product() { 47 | let mut cartesian_product = CartesianProduct::new(); 48 | for &(_sym, values_idx) in product.factors { 49 | cartesian_product.push(&self.evaluated[values_idx as usize][..]); 50 | } 51 | loop { 52 | let v = (self.rule)(product.action, cartesian_product.as_slice()); 53 | self.values.push(v); 54 | if !cartesian_product.advance() { 55 | break; 56 | } 57 | } 58 | } 59 | } 60 | &mut NullingHandle => { 61 | (self.null)(item.symbol, &mut self.values); 62 | } 63 | &mut LeafHandle(_) => { 64 | let v = (self.leaf)(item.symbol); 65 | self.values.push(v); 66 | } 67 | } 68 | let result = self.evaluated.len() as u32; 69 | self.evaluated.push(mem::replace(&mut self.values, vec![])); 70 | item.set_evaluation_result(result); 71 | } 72 | self.evaluated.pop().unwrap() 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /tests/test_c.rs: -------------------------------------------------------------------------------- 1 | extern crate cfg; 2 | extern crate gearley; 3 | extern crate c_lexer_logos; 4 | 5 | macro_rules! trace(($($tt:tt)*) => ()); 6 | 7 | mod helpers; 8 | 9 | use cfg::earley::Grammar; 10 | use gearley::forest::Bocage; 11 | use gearley::grammar::InternalGrammar; 12 | use gearley::recognizer::Recognizer; 13 | use gearley::memory_use::MemoryUse; 14 | 15 | use helpers::Parse; 16 | 17 | const _SYM_NAMES: &'static [&'static str] = &[ 18 | "term", "identifier", "signed", "const_", "inline", "auto", "break_", "case", "char_", "continue_", "default", 19 | "do_", "double", "else_", "enum_", "extern_", "float", "for_", "goto", "if_", "int", "long", "register", "return_", 20 | "short", "sizeof_", "static_", "struct_", "switch", "typedef", "union", "unsigned", "void", "volatile", "while_", 21 | "constant", "string_literal", "right_assign", "left_assign", "add_assign", "sub_assign", "mul_assign", 22 | "div_assign", "mod_assign", "and_assign", "xor_assign", "or_assign", "right_op", "left_op", "inc_op", "dec_op", 23 | "ptr_op", "and_op", "or_op", "le_op", "ge_op", "eq_op", "ne_op", "elipsis", "restrict", "bool_", "complex", "imaginary", 24 | "lparen", "rparen", "lbracket", "rbracket", "lbrace", "rbrace", "dot", "colon", "semicolon", "comma", "ampersand", 25 | "star", "plus", "minus", "tilde", "exclamation", "slash", "percent", "langle", "rangle", "xor", "pipe", "question", 26 | "equal", 27 | "start", "primary_expression", "postfix_expression", 28 | "argument_expression_list_opt", "argument_expression_list", "unary_expression", "unary_operator", 29 | "cast_expression", "multiplicative_expression", "additive_expression", "shift_expression", 30 | "relational_expression", "equality_expression", "AND_expression", "exclusive_OR_expression", 31 | "inclusive_OR_expression", "logical_AND_expression", "logical_OR_expression", 32 | "conditional_expression", "assignment_expression", "assignment_operator", "expression", 33 | "constant_expression", "declaration", "init_declarator_list_opt", "declaration_specifiers", 34 | "declaration_specifiers_opt", "init_declarator_list", "init_declarator", "storage_class_specifier", 35 | "type_specifier", "struct_or_union_specifier", "identifier_opt", "struct_or_union", 36 | "struct_declaration_list", "struct_declaration", "specifier_qualifier_list", 37 | "specifier_qualifier_list_opt", "struct_declarator_list", "struct_declarator", "declarator_opt", 38 | "enum_specifier", "enumerator_list", "enumerator", "type_qualifier", "function_specifier", "declarator", 39 | "pointer_opt", "direct_declarator", "type_qualifier_list_opt", "identifier_list_opt", "pointer", 40 | "type_qualifier_list", "parameter_type_list", "parameter_list", "parameter_declaration", 41 | "abstract_declarator_opt", "identifier_list", "abstract_declarator", "direct_abstract_declarator", 42 | "direct_abstract_declarator_opt", "assignment_expression_opt", "parameter_type_list_opt", 43 | "typedef_name", "initializer", "initializer_list", "designation_opt", "designation", "designator_list", 44 | "designator", "statement", "labeled_statement", "compound_statement", "block_item_list_opt", 45 | "block_item_list", "block_item", "expression_statement", "expression_opt", "selection_statement", 46 | "iteration_statement", "jump_statement", "translation_unit", "external_declaration", 47 | "function_definition", "declaration_list_opt", "declaration_list", "enumeration_constant", 48 | "type_name", "error", 49 | ]; 50 | 51 | #[allow(non_snake_case)] 52 | fn grammar() -> Grammar { 53 | let mut grammar = Grammar::new(); 54 | let ( 55 | _term, identifier, signed, const_, inline, auto, break_, case, char_, continue_, default, 56 | do_, double, else_, enum_, extern_, float, for_, goto, if_, int, long, register, return_, 57 | short, sizeof_, static_, struct_, switch, typedef, union, unsigned, void, volatile, while_, 58 | constant, string_literal, right_assign, left_assign, add_assign, sub_assign, mul_assign, 59 | div_assign, mod_assign, and_assign, xor_assign, or_assign, right_op, left_op, inc_op, dec_op, 60 | ptr_op, and_op, or_op, le_op, ge_op, eq_op, ne_op, elipsis, restrict, bool_, complex, imaginary, 61 | lparen, rparen, lbracket, rbracket, lbrace, rbrace, dot, colon, semicolon, comma, ampersand, 62 | star, plus, minus, tilde, exclamation, slash, percent, langle, rangle, xor, pipe, question, 63 | equal 64 | ) = grammar.sym(); 65 | 66 | let ( 67 | start, primary_expression, postfix_expression, 68 | argument_expression_list_opt, argument_expression_list, unary_expression, unary_operator, 69 | cast_expression, multiplicative_expression, additive_expression, shift_expression, 70 | relational_expression, equality_expression, AND_expression, exclusive_OR_expression, 71 | inclusive_OR_expression, logical_AND_expression, logical_OR_expression, 72 | conditional_expression, assignment_expression, assignment_operator, expression, 73 | constant_expression, declaration, init_declarator_list_opt, declaration_specifiers, 74 | declaration_specifiers_opt, init_declarator_list, init_declarator, storage_class_specifier, 75 | type_specifier, struct_or_union_specifier, identifier_opt, struct_or_union, 76 | struct_declaration_list, struct_declaration, specifier_qualifier_list, 77 | specifier_qualifier_list_opt, struct_declarator_list, struct_declarator, declarator_opt, 78 | enum_specifier, enumerator_list, enumerator, type_qualifier, function_specifier, declarator, 79 | pointer_opt, direct_declarator, type_qualifier_list_opt, identifier_list_opt, pointer, 80 | type_qualifier_list, parameter_type_list, parameter_list, parameter_declaration, 81 | abstract_declarator_opt, identifier_list, abstract_declarator, direct_abstract_declarator, 82 | direct_abstract_declarator_opt, assignment_expression_opt, parameter_type_list_opt, 83 | typedef_name, initializer, initializer_list, designation_opt, designation, designator_list, 84 | designator, statement, labeled_statement, compound_statement, block_item_list_opt, 85 | block_item_list, block_item, expression_statement, expression_opt, selection_statement, 86 | iteration_statement, jump_statement, translation_unit, external_declaration, 87 | function_definition, declaration_list_opt, declaration_list, enumeration_constant, 88 | type_name, error, 89 | ) = grammar.sym(); 90 | 91 | grammar.rule(start).rhs([translation_unit]); 92 | grammar.rule(primary_expression).rhs([identifier]) 93 | .rhs([constant]) 94 | .rhs([string_literal]) 95 | .rhs([lparen, expression, rparen]); 96 | grammar.rule(postfix_expression).rhs([primary_expression]) 97 | .rhs([postfix_expression, lbracket, expression, rbracket]) 98 | .rhs([postfix_expression, lparen, argument_expression_list_opt, rparen]) 99 | .rhs([postfix_expression, dot, identifier]) 100 | .rhs([postfix_expression, ptr_op, identifier]) 101 | .rhs([postfix_expression, inc_op]) 102 | .rhs([postfix_expression, dec_op]) 103 | .rhs([lparen, type_name, rparen, lbrace, initializer_list, rbrace]) 104 | .rhs([lparen, type_name, rparen, lbrace, initializer_list, comma, rbrace]); 105 | grammar.rule(argument_expression_list_opt).rhs([]) 106 | .rhs([argument_expression_list]); 107 | grammar.rule(argument_expression_list).rhs([assignment_expression]) 108 | .rhs([argument_expression_list, comma, assignment_expression]); 109 | grammar.rule(unary_expression).rhs([postfix_expression]) 110 | .rhs([inc_op, unary_expression]) 111 | .rhs([dec_op, unary_expression]) 112 | .rhs([unary_operator, cast_expression]) 113 | .rhs([sizeof_, unary_expression]) 114 | .rhs([sizeof_, lparen, type_name, rparen]); 115 | grammar.rule(unary_operator).rhs([ampersand]) 116 | .rhs([star]) 117 | .rhs([plus]) 118 | .rhs([minus]) 119 | .rhs([tilde]) 120 | .rhs([exclamation]); 121 | grammar.rule(cast_expression).rhs([unary_expression]) 122 | .rhs([lparen, type_name, rparen, cast_expression]); 123 | grammar.rule(multiplicative_expression).rhs([cast_expression]) 124 | .rhs([multiplicative_expression, star, cast_expression]) 125 | .rhs([multiplicative_expression, slash, cast_expression]) 126 | .rhs([multiplicative_expression, percent, cast_expression]); 127 | grammar.rule(additive_expression).rhs([multiplicative_expression]) 128 | .rhs([additive_expression, plus, multiplicative_expression]) 129 | .rhs([additive_expression, minus, multiplicative_expression]); 130 | grammar.rule(shift_expression).rhs([additive_expression]) 131 | .rhs([shift_expression, left_op, additive_expression]) 132 | .rhs([shift_expression, right_op, additive_expression]); 133 | grammar.rule(relational_expression).rhs([shift_expression]) 134 | .rhs([relational_expression, langle, shift_expression]) 135 | .rhs([relational_expression, rangle, shift_expression]) 136 | .rhs([relational_expression, le_op, shift_expression]) 137 | .rhs([relational_expression, ge_op, shift_expression]); 138 | grammar.rule(equality_expression).rhs([relational_expression]) 139 | .rhs([equality_expression, eq_op, relational_expression]) 140 | .rhs([equality_expression, ne_op, relational_expression]); 141 | grammar.rule(AND_expression).rhs([equality_expression]) 142 | .rhs([AND_expression, ampersand, equality_expression]); 143 | grammar.rule(exclusive_OR_expression).rhs([AND_expression]) 144 | .rhs([exclusive_OR_expression, xor, AND_expression]); 145 | grammar.rule(inclusive_OR_expression).rhs([exclusive_OR_expression]) 146 | .rhs([inclusive_OR_expression, pipe, exclusive_OR_expression]); 147 | grammar.rule(logical_AND_expression).rhs([inclusive_OR_expression]) 148 | .rhs([logical_AND_expression, and_op, inclusive_OR_expression]); 149 | grammar.rule(logical_OR_expression).rhs([logical_AND_expression]) 150 | .rhs([logical_OR_expression, or_op, logical_AND_expression]); 151 | grammar.rule(conditional_expression).rhs([logical_OR_expression]) 152 | .rhs([logical_OR_expression, question, expression, colon, conditional_expression]); 153 | grammar.rule(assignment_expression).rhs([conditional_expression]) 154 | .rhs([unary_expression, assignment_operator, assignment_expression]); 155 | grammar.rule(assignment_operator).rhs([equal]) 156 | .rhs([mul_assign]) 157 | .rhs([div_assign]) 158 | .rhs([mod_assign]) 159 | .rhs([add_assign]) 160 | .rhs([sub_assign]) 161 | .rhs([left_assign]) 162 | .rhs([right_assign]) 163 | .rhs([and_assign]) 164 | .rhs([xor_assign]) 165 | .rhs([or_assign]); 166 | grammar.rule(expression).rhs([assignment_expression]) 167 | .rhs([expression, comma, assignment_expression]) 168 | .rhs([error]); 169 | grammar.rule(constant_expression).rhs([conditional_expression]); 170 | 171 | grammar.rule(declaration).rhs([declaration_specifiers, init_declarator_list_opt, semicolon]) 172 | .rhs([error]); 173 | grammar.rule(init_declarator_list_opt).rhs([]) 174 | .rhs([init_declarator_list]); 175 | grammar.rule(declaration_specifiers).rhs([storage_class_specifier, declaration_specifiers_opt]) 176 | .rhs([type_specifier, declaration_specifiers_opt]) 177 | .rhs([type_qualifier, declaration_specifiers_opt]) 178 | .rhs([function_specifier, declaration_specifiers_opt]); 179 | grammar.rule(declaration_specifiers_opt).rhs([]) 180 | .rhs([declaration_specifiers]); 181 | grammar.rule(init_declarator_list).rhs([init_declarator]) 182 | .rhs([init_declarator_list, comma, init_declarator]); 183 | grammar.rule(init_declarator).rhs([declarator]) 184 | .rhs([declarator, equal, initializer]); 185 | grammar.rule(storage_class_specifier).rhs([typedef]) 186 | .rhs([extern_]) 187 | .rhs([static_]) 188 | .rhs([auto]) 189 | .rhs([register]); 190 | grammar.rule(type_specifier).rhs([void]) 191 | .rhs([char_]) 192 | .rhs([short]) 193 | .rhs([int]) 194 | .rhs([long]) 195 | .rhs([float]) 196 | .rhs([double]) 197 | .rhs([signed]) 198 | .rhs([unsigned]) 199 | .rhs([bool_]) 200 | .rhs([complex]) 201 | .rhs([imaginary]) 202 | .rhs([struct_or_union_specifier]) 203 | .rhs([enum_specifier]) 204 | .rhs([typedef_name]); 205 | grammar.rule(struct_or_union_specifier).rhs([struct_or_union, identifier_opt, lbrace, struct_declaration_list, rbrace]) 206 | .rhs([struct_or_union, identifier]); 207 | grammar.rule(identifier_opt).rhs([]) 208 | .rhs([identifier]); 209 | grammar.rule(struct_or_union).rhs([struct_]) 210 | .rhs([union]); 211 | grammar.rule(struct_declaration_list).rhs([struct_declaration]) 212 | .rhs([struct_declaration_list, struct_declaration]); 213 | grammar.rule(struct_declaration).rhs([specifier_qualifier_list, struct_declarator_list, semicolon]); 214 | grammar.rule(specifier_qualifier_list).rhs([type_specifier, specifier_qualifier_list_opt]) 215 | .rhs([type_qualifier, specifier_qualifier_list_opt]); 216 | grammar.rule(specifier_qualifier_list_opt).rhs([]) 217 | .rhs([specifier_qualifier_list]); 218 | grammar.rule(struct_declarator_list).rhs([struct_declarator]) 219 | .rhs([struct_declarator_list, comma, struct_declarator]); 220 | grammar.rule(struct_declarator).rhs([declarator]) 221 | .rhs([declarator_opt, colon, constant_expression]); 222 | grammar.rule(declarator_opt).rhs([]) 223 | .rhs([declarator]); 224 | grammar.rule(enum_specifier).rhs([enum_, identifier_opt, lbrace, enumerator_list, rbrace]) 225 | .rhs([enum_, identifier_opt, lbrace, enumerator_list, comma, rbrace]) 226 | .rhs([enum_, identifier]); 227 | grammar.rule(enumerator_list).rhs([enumerator]) 228 | .rhs([enumerator_list, comma, enumerator]); 229 | grammar.rule(enumerator).rhs([enumeration_constant]) 230 | .rhs([enumeration_constant, equal, constant_expression]); 231 | grammar.rule(type_qualifier).rhs([const_]) 232 | .rhs([restrict]) 233 | .rhs([volatile]); 234 | grammar.rule(function_specifier).rhs([inline]); 235 | grammar.rule(declarator).rhs([pointer_opt, direct_declarator]); 236 | grammar.rule(pointer_opt).rhs([]) 237 | .rhs([pointer]); 238 | grammar.rule(direct_declarator).rhs([identifier]) 239 | .rhs([lparen, declarator, rparen]) 240 | .rhs([direct_declarator, lbracket, type_qualifier_list_opt, assignment_expression_opt, rbracket]) 241 | .rhs([direct_declarator, lbracket, static_, type_qualifier_list_opt, assignment_expression, rbracket]) 242 | .rhs([direct_declarator, lbracket, type_qualifier_list, static_, assignment_expression, rbracket]) 243 | .rhs([direct_declarator, lbracket, type_qualifier_list_opt, star, rbracket]) 244 | .rhs([direct_declarator, lparen, parameter_type_list, rparen]) 245 | .rhs([direct_declarator, lparen, identifier_list_opt, rparen]); 246 | grammar.rule(type_qualifier_list_opt).rhs([]) 247 | .rhs([type_qualifier_list]); 248 | grammar.rule(identifier_list_opt).rhs([]) 249 | .rhs([identifier_list]); 250 | grammar.rule(pointer).rhs([star, type_qualifier_list_opt]) 251 | .rhs([star, type_qualifier_list_opt, pointer]); 252 | grammar.rule(type_qualifier_list).rhs([type_qualifier]) 253 | .rhs([type_qualifier_list, type_qualifier]); 254 | grammar.rule(parameter_type_list).rhs([parameter_list]) 255 | .rhs([parameter_list, comma, elipsis]); 256 | grammar.rule(parameter_list).rhs([parameter_declaration]) 257 | .rhs([parameter_list, comma, parameter_declaration]); 258 | grammar.rule(parameter_declaration).rhs([declaration_specifiers, declarator]) 259 | .rhs([declaration_specifiers, abstract_declarator_opt]); 260 | grammar.rule(abstract_declarator_opt).rhs([]) 261 | .rhs([abstract_declarator]); 262 | grammar.rule(identifier_list).rhs([identifier]) 263 | .rhs([identifier_list, comma, identifier]); 264 | grammar.rule(type_name).rhs([specifier_qualifier_list, abstract_declarator_opt]); 265 | grammar.rule(abstract_declarator).rhs([pointer]) 266 | .rhs([pointer_opt, direct_abstract_declarator]); 267 | grammar.rule(direct_abstract_declarator).rhs([lparen, abstract_declarator, rparen]) 268 | .rhs([direct_abstract_declarator_opt, lbracket, assignment_expression_opt, rbracket]) 269 | .rhs([direct_abstract_declarator_opt, lbracket, star, rbracket]) 270 | .rhs([direct_abstract_declarator_opt, lparen, parameter_type_list_opt, rparen]); 271 | grammar.rule(direct_abstract_declarator_opt).rhs([]) 272 | .rhs([direct_abstract_declarator]); 273 | grammar.rule(assignment_expression_opt).rhs([]) 274 | .rhs([assignment_expression]); 275 | grammar.rule(parameter_type_list_opt).rhs([]) 276 | .rhs([parameter_type_list]); 277 | grammar.rule(typedef_name).rhs([identifier]); 278 | grammar.rule(initializer).rhs([assignment_expression]) 279 | .rhs([lbrace, initializer_list, rbrace]) 280 | .rhs([lbrace, initializer_list, comma, rbrace]); 281 | grammar.rule(initializer_list).rhs([designation_opt, initializer]) 282 | .rhs([initializer_list, comma, designation_opt, initializer]); 283 | grammar.rule(designation_opt).rhs([]) 284 | .rhs([designation]); 285 | grammar.rule(designation).rhs([designator_list, equal]); 286 | grammar.rule(designator_list).rhs([designator]) 287 | .rhs([designator_list, designator]); 288 | grammar.rule(designator).rhs([rbracket, constant_expression, rbracket]) 289 | .rhs([dot, identifier]); 290 | grammar.rule(statement).rhs([labeled_statement]) 291 | .rhs([compound_statement]) 292 | .rhs([expression_statement]) 293 | .rhs([selection_statement]) 294 | .rhs([iteration_statement]) 295 | .rhs([jump_statement]) 296 | .rhs([error]); 297 | grammar.rule(labeled_statement).rhs([identifier, colon, statement]) 298 | .rhs([case, constant_expression, colon, statement]) 299 | .rhs([default, colon, statement]); 300 | grammar.rule(compound_statement).rhs([lbrace, block_item_list_opt, rbrace]); 301 | grammar.rule(block_item_list_opt).rhs([]) 302 | .rhs([block_item_list]); 303 | grammar.rule(block_item_list).rhs([block_item]) 304 | .rhs([block_item_list, block_item]); 305 | grammar.rule(block_item).rhs([declaration]) 306 | .rhs([statement]); 307 | grammar.rule(expression_statement).rhs([expression_opt, semicolon]); 308 | grammar.rule(expression_opt).rhs([]) 309 | .rhs([expression]); 310 | grammar.rule(selection_statement).rhs([if_, lparen, expression, rparen, statement]) 311 | .rhs([if_, lparen, expression, rparen, statement, else_, statement]) 312 | .rhs([switch, lparen, expression, rparen, statement]); 313 | grammar.rule(iteration_statement).rhs([while_, lparen, expression, rparen, statement]) 314 | .rhs([do_, statement, while_, lparen, expression, rparen, semicolon]) 315 | .rhs([for_, lparen, expression_opt, semicolon, expression_opt, semicolon, expression_opt, rparen, statement]) 316 | .rhs([for_, lparen, declaration, expression_opt, semicolon, expression_opt, rparen, statement]); 317 | grammar.rule(jump_statement).rhs([goto, identifier, semicolon]) 318 | .rhs([continue_, semicolon]) 319 | .rhs([break_, semicolon]) 320 | .rhs([return_, expression_opt, semicolon]); 321 | grammar.rule(translation_unit).rhs([external_declaration]) 322 | .rhs([translation_unit, external_declaration]); 323 | grammar.rule(external_declaration).rhs([function_definition]) 324 | .rhs([declaration]); 325 | grammar.rule(function_definition).rhs([declaration_specifiers, declarator, declaration_list_opt, compound_statement]); 326 | grammar.rule(declaration_list_opt).rhs([]) 327 | .rhs([declaration_list]); 328 | grammar.rule(declaration_list).rhs([declaration]) 329 | .rhs([declaration_list, declaration]); 330 | grammar.rule(enumeration_constant).rhs([identifier]); 331 | 332 | grammar.set_start(start); 333 | grammar 334 | } 335 | 336 | #[test] 337 | fn test_parse_c() { 338 | use c_lexer_logos::Lexer; 339 | use c_lexer_logos::token::Token::*; 340 | let external = grammar(); 341 | let mut grammar = Grammar::new(); 342 | let ( 343 | _term, identifier, signed, const_, inline, _auto, break_, case, char_, continue_, default, 344 | do_, double, else_, enum_, extern_, float, for_, goto, if_, int, long, register, return_, 345 | short, sizeof_, static_, struct_, switch, typedef, union, unsigned, void, volatile, while_, 346 | constant, string_literal, right_assign, left_assign, add_assign, sub_assign, mul_assign, 347 | div_assign, mod_assign, and_assign, xor_assign, or_assign, right_op, left_op, inc_op, dec_op, 348 | ptr_op, and_op, or_op, le_op, ge_op, eq_op, ne_op, elipsis, restrict, bool_, complex, imaginary, 349 | lparen, rparen, lbracket, rbracket, lbrace, rbrace, dot, colon, semicolon, comma, ampersand, 350 | star, plus, minus, tilde, exclamation, slash, percent, langle, rangle, xor, pipe, question, 351 | equal 352 | ) = grammar.sym(); 353 | 354 | let contents = include_str!("../benches/part_gcc_test.i"); 355 | let tokens: Vec<_> = Lexer::lex(&contents[..]).unwrap().into_iter().filter_map(|token| { 356 | // println!("{:?}", token); 357 | let tok = match token { 358 | LBrace => Some(lbrace), 359 | RBrace => Some(rbrace), 360 | LParen => Some(lparen), 361 | RParen => Some(rparen), 362 | LBracket => Some(lbracket), 363 | RBracket => Some(rbracket), 364 | Semicolon => Some(semicolon), 365 | Assign => Some(equal), 366 | Lt => Some(langle), 367 | Gt => Some(rangle), 368 | Minus => Some(minus), 369 | Tilde => Some(tilde), 370 | Exclamation => Some(exclamation), 371 | Plus => Some(plus), 372 | Multi => Some(star), 373 | Slash => Some(slash), 374 | Colon => Some(colon), 375 | QuestionMark => Some(question), 376 | Comma => Some(comma), 377 | Dot => Some(dot), 378 | SingleAnd => Some(ampersand), 379 | InclusiveOr => Some(pipe), 380 | ExclusiveOr => Some(xor), 381 | Mod => Some(percent), 382 | Identifier(_i_str) => Some(identifier), 383 | NumericLiteral(_num) => Some(constant), 384 | StringLiteral(_s) => Some(string_literal), 385 | FuncName => None, 386 | SIZEOF => Some(sizeof_), 387 | PtrOp => Some(ptr_op), 388 | IncOp => Some(inc_op), 389 | DecOp => Some(dec_op), 390 | LeftOp => Some(left_op), 391 | RightOp => Some(right_op), 392 | LeOp => Some(le_op), 393 | GeOp => Some(ge_op), 394 | EqOp => Some(eq_op), 395 | NeOp => Some(ne_op), 396 | AndOp => Some(and_op), 397 | OrOp => Some(or_op), 398 | MulAssign => Some(mul_assign), 399 | DivAssign => Some(div_assign), 400 | ModAssign => Some(mod_assign), 401 | AddAssign => Some(add_assign), 402 | SubAssign => Some(sub_assign), 403 | LeftAssign => Some(left_assign), 404 | RightAssign => Some(right_assign), 405 | AndAssign => Some(and_assign), 406 | XorAssign => Some(xor_assign), 407 | OrAssign => Some(or_assign), 408 | // TODO: this should be done when we found this is a typedef name, 409 | // typedef LL int, then LL is typedef_name 410 | TypedefName => Some(identifier), 411 | ELLIPSIS => Some(elipsis), // ... 412 | EnumerationConstant(..) => None, // TODO: add check 413 | LineTerminator => None, 414 | EOF => None, 415 | 416 | TYPEDEF => Some(typedef), 417 | EXTERN => Some(extern_), 418 | STATIC => Some(static_), 419 | // AUTO => Some(auto_), 420 | REGISTER => Some(register), 421 | INLINE => Some(inline), 422 | CONST => Some(const_), 423 | RESTRICT => Some(restrict), 424 | VOLATILE => Some(volatile), 425 | BOOL => Some(bool_), 426 | CHAR => Some(char_), 427 | SHORT => Some(short), 428 | INT => Some(int), 429 | LONG => Some(long), 430 | SIGNED => Some(signed), 431 | UNSIGNED => Some(unsigned), 432 | FLOAT => Some(float), 433 | DOUBLE => Some(double), 434 | VOID => Some(void), 435 | COMPLEX => Some(complex), 436 | IMAGINARY => Some(imaginary), 437 | STRUCT => Some(struct_), 438 | UNION => Some(union), 439 | ENUM => Some(enum_), 440 | CASE => Some(case), 441 | DEFAULT => Some(default), 442 | IF => Some(if_), 443 | ELSE => Some(else_), 444 | SWITCH => Some(switch), 445 | WHILE => Some(while_), 446 | DO => Some(do_), 447 | FOR => Some(for_), 448 | GOTO => Some(goto), 449 | CONTINUE => Some(continue_), 450 | BREAK => Some(break_), 451 | RETURN => Some(return_), 452 | // ALIGNAS => Some(alignas), 453 | // ALIGNOF => Some(alignof), 454 | // ATOMIC => Some(atomic), 455 | // GENERIC => Some(generic), 456 | // NORETURN, 457 | // StaticAssert, 458 | // ThreadLocal, 459 | _ => None, 460 | }; 461 | // tok.map(|t| (t.usize() as u32, start, end)) 462 | tok.map(|t| t.usize() as u32) 463 | }).collect(); 464 | let cfg = InternalGrammar::from_grammar(&external); 465 | let bocage = Bocage::new(&cfg); 466 | let mut rec: Recognizer> = Recognizer::new_with_limit(&cfg, 2_00_000); 467 | rec.forest = bocage; 468 | let finished = rec.parse(&tokens[..]); 469 | assert!(finished); 470 | println!("memory use: all:{} forest:{}", rec.memory_use(), rec.forest.memory_use()); 471 | } 472 | -------------------------------------------------------------------------------- /tests/test_nulling.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate log; 3 | extern crate cfg; 4 | extern crate env_logger; 5 | extern crate gearley; 6 | 7 | mod helpers; 8 | 9 | use cfg::earley::Grammar; 10 | use cfg::Symbol; 11 | use gearley::forest::{Bocage, CompactBocage}; 12 | use gearley::grammar::InternalGrammar; 13 | use gearley::recognizer::Recognizer; 14 | 15 | use helpers::{Parse, SimpleCompactEvaluator, SimpleEvaluator}; 16 | 17 | macro_rules! test_trivial_grammar { 18 | ($Bocage:ident, $SimpleEvaluator:ident) => { 19 | let _ = env_logger::try_init(); 20 | let mut external = Grammar::new(); 21 | let start = external.sym(); 22 | external.rule(start).rhs([]); 23 | external.set_start(start); 24 | let cfg = InternalGrammar::from_grammar(&external); 25 | let mut evaluator = $SimpleEvaluator::new( 26 | |_: Symbol| unreachable!(), 27 | |_: u32, _: &[&bool]| unreachable!(), 28 | |sym, builder: &mut Vec| { 29 | builder.reserve(1); 30 | if sym == start { 31 | builder.push(true); 32 | } else { 33 | builder.push(false); 34 | } 35 | }, 36 | ); 37 | let bocage = $Bocage::new(&cfg); 38 | let mut rec = Recognizer::new(&cfg, bocage); 39 | assert!(rec.parse(&[])); 40 | let mut traversal = rec.forest.traverse(); 41 | let results = evaluator.traverse(&mut traversal, rec.finished_node().unwrap()); 42 | assert_eq!(results, &[true]); 43 | }; 44 | } 45 | 46 | #[test] 47 | fn test_trivial_grammar() { 48 | test_trivial_grammar!(Bocage, SimpleEvaluator); 49 | } 50 | 51 | #[test] 52 | fn test_trivial_grammar_compact() { 53 | test_trivial_grammar!(CompactBocage, SimpleCompactEvaluator); 54 | } 55 | 56 | macro_rules! test_grammar_with_nulling_intermediate { 57 | ($Bocage:ident, $SimpleEvaluator:ident) => { 58 | let _ = env_logger::try_init(); 59 | let mut external = Grammar::new(); 60 | let (start, a, b, c, d, foo) = external.sym(); 61 | external 62 | .rule(start) 63 | .rhs([a, b, c, d, foo]) 64 | .rule(a) 65 | .rhs([]) 66 | .rule(b) 67 | .rhs([]) 68 | .rule(c) 69 | .rhs([]) 70 | .rule(d) 71 | .rhs([]); 72 | external.set_start(start); 73 | let cfg = InternalGrammar::from_grammar(&external); 74 | let mut evaluator = $SimpleEvaluator::new( 75 | |sym: Symbol| { 76 | if sym == foo { 77 | 3 78 | } else { 79 | unreachable!() 80 | } 81 | }, 82 | |rule: u32, arg: &[&i32]| { 83 | if rule == 0 { 84 | arg.iter().cloned().fold(0, |a, e| a + e) 85 | } else { 86 | unreachable!() 87 | } 88 | }, 89 | |sym, builder: &mut Vec| { 90 | builder.reserve(1); 91 | if sym == a { 92 | builder.push(1); 93 | } else { 94 | builder.push(2); 95 | } 96 | }, 97 | ); 98 | let bocage = $Bocage::new(&cfg); 99 | let mut rec = Recognizer::new(&cfg, bocage); 100 | assert!(rec.parse(&[foo.usize() as u32])); 101 | let mut traversal = rec.forest.traverse(); 102 | let results = evaluator.traverse(&mut traversal, rec.finished_node().unwrap()); 103 | assert_eq!(results, &[10]); 104 | }; 105 | } 106 | 107 | #[test] 108 | fn test_grammar_with_nulling_intermediate() { 109 | test_grammar_with_nulling_intermediate!(Bocage, SimpleEvaluator); 110 | } 111 | 112 | #[test] 113 | fn test_grammar_with_nulling_intermediate_compact() { 114 | test_grammar_with_nulling_intermediate!(CompactBocage, SimpleCompactEvaluator); 115 | } 116 | -------------------------------------------------------------------------------- /tests/test_recognizer.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate log; 3 | extern crate env_logger; 4 | extern crate cfg; 5 | extern crate gearley; 6 | 7 | mod helpers; 8 | 9 | use cfg::earley::Grammar; 10 | 11 | use gearley::forest::NullForest; 12 | use gearley::grammar::InternalGrammar; 13 | use gearley::recognizer::Recognizer; 14 | 15 | use helpers::Parse; 16 | 17 | #[test] 18 | fn test_recognize_nested() { 19 | let _ = env_logger::try_init(); 20 | let mut external = Grammar::new(); 21 | let (start, nested, terminal) = external.sym(); 22 | external.rule(start).rhs([nested, terminal]) 23 | .rule(nested).rhs([terminal, terminal]); 24 | external.set_start(start); 25 | let cfg = InternalGrammar::from_grammar(&external); 26 | let mut rec = Recognizer::new(&cfg, NullForest); 27 | let finished = rec.parse(&[terminal.usize() as u32; 3]); 28 | assert!(finished); 29 | } 30 | 31 | #[test] 32 | fn test_recognize_reset() { 33 | let _ = env_logger::try_init(); 34 | let mut external = Grammar::new(); 35 | let (start, nested, terminal) = external.sym(); 36 | external.rule(start).rhs([nested, terminal]) 37 | .rule(nested).rhs([terminal, terminal]); 38 | external.set_start(start); 39 | let cfg = InternalGrammar::from_grammar(&external); 40 | let mut rec = Recognizer::new(&cfg, NullForest); 41 | for _ in 0..100 { 42 | let finished = rec.parse(&[terminal.usize() as u32; 3]); 43 | assert!(finished); 44 | rec.reset(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/test_sequence.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate log; 3 | extern crate env_logger; 4 | extern crate cfg; 5 | extern crate gearley; 6 | 7 | mod helpers; 8 | 9 | use cfg::Symbol; 10 | use cfg::sequence::Separator::Trailing; 11 | use cfg::earley::Grammar; 12 | 13 | use gearley::forest::Bocage; 14 | use gearley::grammar::InternalGrammar; 15 | use gearley::recognizer::Recognizer; 16 | 17 | use helpers::{SimpleEvaluator, Parse}; 18 | 19 | #[test] 20 | fn test_sequence() { 21 | let _ = env_logger::try_init(); 22 | let (plus, minus) = (1, 2); 23 | let tokens = &[plus, minus, plus, minus, plus, minus]; 24 | let mut external = Grammar::new(); 25 | let (start, plus, minus) = external.sym(); 26 | external.sequence(start).separator(Trailing(minus)).inclusive(3, Some(3)).rhs(plus); 27 | external.set_start(start); 28 | 29 | let cfg = InternalGrammar::from_grammar(&external); 30 | let mut evaluator = SimpleEvaluator::new( 31 | |sym: Symbol| { 32 | match sym.usize() { 33 | 1 => 1, 34 | 2 => -1, 35 | _ => unreachable!() 36 | } 37 | }, 38 | |rule: u32, args: &[&i32]| { 39 | if rule == 0 { 40 | args.len() as i32 41 | } else { 42 | unreachable!() 43 | } 44 | }, 45 | |_, _: &mut Vec| unreachable!() 46 | ); 47 | let bocage = Bocage::new(&cfg); 48 | let mut recognizer = Recognizer::new(&cfg, bocage); 49 | assert!(recognizer.parse(tokens)); 50 | 51 | let mut traversal = recognizer.forest.traverse(); 52 | 53 | let results = evaluator.traverse(&mut traversal, recognizer.finished_node().unwrap()); 54 | 55 | assert_eq!(results, vec![6]); 56 | } 57 | -------------------------------------------------------------------------------- /tests/test_serde.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "serde")] 2 | #[macro_use] 3 | extern crate log; 4 | extern crate env_logger; 5 | extern crate cfg; 6 | extern crate gearley; 7 | extern crate serde; 8 | 9 | mod grammars; 10 | 11 | use gearley::forest::NullForest; 12 | use gearley::grammar::Grammar; 13 | use gearley::recognizer::Recognizer; 14 | 15 | use grammars::*; 16 | 17 | use serde::de::value::StringDeserializer; 18 | use serde::de::IntoDeserializer; 19 | 20 | #[test] 21 | fn test_serde() { 22 | let x = InternalGrammar::deserialize(String::into_deserializer("")); 23 | assert!(true); 24 | } 25 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate log; 3 | extern crate cfg; 4 | extern crate gearley; 5 | 6 | #[macro_use] 7 | mod grammars; 8 | mod helpers; 9 | 10 | use gearley::grammar::InternalGrammar; 11 | use gearley::forest::{Bocage, NullForest}; 12 | use gearley::recognizer::Recognizer; 13 | 14 | use grammars::*; 15 | use helpers::{SimpleEvaluator, Parse}; 16 | 17 | const SUM_TOKENS: &'static [u32] = precedenced_arith!( 18 | '1' '+' '(' '2' '*' '3' '-' '4' ')' '/' 19 | '(' '5' '5' ')' '-' '(' '5' '4' ')' '*' 20 | '5' '5' '+' '6' '2' '-' '1' '3' '-' '(' 21 | '(' '3' '6' ')' ')' 22 | ); 23 | 24 | #[test] 25 | fn test_precedenced_arith() { 26 | let external = precedenced_arith::grammar(); 27 | let cfg = InternalGrammar::from_grammar(&external); 28 | let mut rec = Recognizer::new(&cfg, NullForest); 29 | assert!(rec.parse(SUM_TOKENS)); 30 | } 31 | 32 | #[test] 33 | fn test_ambiguous_arithmetic() { 34 | let tokens = ambiguous_arith!('2' '-' '0' '*' '3' '+' '1'); 35 | let external = ambiguous_arith::grammar(); 36 | let cfg = InternalGrammar::from_grammar(&external); 37 | let mut evaluator = SimpleEvaluator::new( 38 | ambiguous_arith::leaf, 39 | ambiguous_arith::rule, 40 | |_, _: &mut Vec| unreachable!() 41 | ); 42 | let bocage = Bocage::new(&cfg); 43 | let mut rec = Recognizer::new(&cfg, bocage); 44 | assert!(rec.parse(tokens)); 45 | let mut traverse = rec.forest.traverse(); 46 | let results = evaluator.traverse(&mut traverse, rec.finished_node().unwrap()); 47 | 48 | // The result is currently ordered by rule ID: 49 | assert_eq!(results, vec![2, 1, 3, 7, 8]); 50 | 51 | // A result ordered by structure would be: [2, 1, 8, 3, 7] 52 | // where 53 | 54 | // 2 = 2 - (0 * (3 + 1)) 55 | // 1 = 2 - ((0 * 3) + 1) 56 | // 8 = (2 - 0) * (3 + 1) 57 | // 3 = (2 - (0 * 3)) + 1 58 | // 7 = ((2 - 0) * 3) + 1 59 | } 60 | --------------------------------------------------------------------------------