├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── benches
    ├── bench_c.rs
    ├── bench_rust_grammar_subset.rs
    ├── benches.rs
    └── part_gcc_test.i
├── src
    ├── binary_heap.rs
    ├── debug.rs
    ├── events.rs
    ├── forest
    │   ├── bocage
    │   │   ├── mod.rs
    │   │   ├── node.rs
    │   │   ├── order.rs
    │   │   └── traverse.rs
    │   ├── compact_bocage
    │   │   ├── mod.rs
    │   │   ├── node.rs
    │   │   ├── order.rs
    │   │   └── traverse.rs
    │   ├── mod.rs
    │   ├── node_handle.rs
    │   └── null_forest.rs
    ├── grammar.rs
    ├── item.rs
    ├── lib.rs
    ├── memory_use.rs
    └── recognizer.rs
└── tests
    ├── grammars
        ├── ambiguous_arith.rs
        ├── mod.rs
        └── precedenced_arith.rs
    ├── helpers
        ├── cartesian_product.rs
        ├── mod.rs
        ├── parse.rs
        ├── simple_compact_evaluator.rs
        └── simple_evaluator.rs
    ├── test_c.rs
    ├── test_nulling.rs
    ├── test_recognizer.rs
    ├── test_sequence.rs
    ├── test_serde.rs
    └── tests.rs


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 |   schedule: [cron: "40 1 * * *"]
  7 | 
  8 | permissions:
  9 |   contents: read
 10 | 
 11 | env:
 12 |   RUSTFLAGS: -Dwarnings
 13 | 
 14 | jobs:
 15 |   test:
 16 |     name: Test suite
 17 |     runs-on: ubuntu-latest
 18 |     timeout-minutes: 45
 19 |     steps:
 20 |       - uses: actions/checkout@v3
 21 |       - uses: dtolnay/rust-toolchain@nightly
 22 |       - run: cargo test
 23 | 
 24 |   windows:
 25 |     name: Test suite (windows)
 26 |     runs-on: windows-latest
 27 |     timeout-minutes: 45
 28 |     steps:
 29 |       - uses: actions/checkout@v3
 30 |       - uses: dtolnay/rust-toolchain@nightly
 31 |       - run: cargo test
 32 | 
 33 |   stable:
 34 |     name: Rust ${{matrix.rust}}
 35 |     runs-on: ubuntu-latest
 36 |     strategy:
 37 |       fail-fast: false
 38 |       matrix:
 39 |         rust: [stable, beta]
 40 |     timeout-minutes: 45
 41 |     steps:
 42 |       - uses: actions/checkout@v3
 43 |       - uses: dtolnay/rust-toolchain@master
 44 |         with:
 45 |           toolchain: ${{matrix.rust}}
 46 |       - run: cargo test
 47 | 
 48 |   nightly:
 49 |     name: Rust nightly ${{matrix.os == 'windows' && '(windows)' || ''}}
 50 |     runs-on: ${{matrix.os}}-latest
 51 |     strategy:
 52 |       fail-fast: false
 53 |       matrix:
 54 |         os: [ubuntu, windows]
 55 |     timeout-minutes: 45
 56 |     steps:
 57 |       - uses: actions/checkout@v3
 58 |       - uses: dtolnay/rust-toolchain@nightly
 59 |       - run: cargo build
 60 | 
 61 |   msrv:
 62 |     name: Rust ${{matrix.rust}}
 63 |     runs-on: ubuntu-latest
 64 |     strategy:
 65 |       fail-fast: false
 66 |       matrix:
 67 |         rust: [1.65.0, 1.66.0, 1.67.0]
 68 |     timeout-minutes: 45
 69 |     steps:
 70 |       - uses: actions/checkout@v3
 71 |       - uses: dtolnay/rust-toolchain@master
 72 |         with:
 73 |           toolchain: ${{matrix.rust}}
 74 |       - run: cargo test
 75 |       - run: cargo build
 76 |   # clippy:
 77 |   #   name: Clippy
 78 |   #   runs-on: ubuntu-latest
 79 |   #   timeout-minutes: 45
 80 |   #   steps:
 81 |   #     - uses: actions/checkout@v3
 82 |   #     - uses: dtolnay/rust-toolchain@clippy
 83 |   #     - run: cargo clippy --features generation -- -Dclippy::all -Dclippy::pedantic
 84 | 
 85 |   fmt:
 86 |     name: rustfmt-check
 87 |     runs-on: ubuntu-latest
 88 |     if: github.event_name == 'pull_request'
 89 |     timeout-minutes: 45
 90 |     steps:
 91 |       - uses: actions/checkout@v2
 92 |       - uses: actions-rs/toolchain@v1
 93 |         with:
 94 |             toolchain: nightly
 95 |             components: rustfmt
 96 |             override: true
 97 |       - uses: LoliGothick/rustfmt-check@master
 98 |         with:
 99 |             token: ${{ secrets.GITHUB_TOKEN }}
100 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled files
 2 | *.o
 3 | *.so
 4 | *.rlib
 5 | *.dll
 6 | 
 7 | # Executables
 8 | *.exe
 9 | 
10 | # Generated by Cargo
11 | /target/
12 | 
13 | # This project is a library.
14 | /Cargo.lock
15 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: rust
 3 | # necessary for `travis-cargo coveralls --no-sudo`
 4 | addons:
 5 |   apt:
 6 |     packages:
 7 |       - libcurl4-openssl-dev
 8 |       - libelf-dev
 9 |       - libdw-dev
10 |       - binutils-dev # optional, only required for the --verify flag of coveralls
11 | 
12 | rust:
13 |   - nightly
14 |   - beta
15 |   - stable
16 | # load travis-cargo
17 | before_script:
18 |   - |
19 |       pip install 'travis-cargo<0.2' --user &&
20 |       export PATH=$HOME/.local/bin:$PATH
21 | script:
22 |   - |
23 |       travis-cargo build &&
24 |       travis-cargo test &&
25 |       travis-cargo bench &&
26 |       travis-cargo --only stable doc
27 | after_success:
28 |   - travis-cargo --only stable doc-upload
29 |   - travis-cargo coveralls --no-sudo --verify
30 | notifications:
31 |   email:
32 |     on_success: never
33 | env:
34 |   global:
35 |     # override the default `--features unstable` used for the nightly branch
36 |     - TRAVIS_CARGO_NIGHTLY_FEATURE=""
37 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "gearley"
 3 | version = "0.0.5"
 4 | 
 5 | authors = [ "Piotr Czarnecki <pioczarn@gmail.com>" ]
 6 | description = "An Earley parser engine."
 7 | keywords = ["grammar", "parsing", "language", "forest", "intersection"]
 8 | documentation = "http://pczarn.github.io/gearley/"
 9 | repository = "https://github.com/pczarn/gearley"
10 | license = "MIT/Apache-2.0"
11 | 
12 | [profile.release]
13 | debug = true
14 | 
15 | [lib]
16 | name = "gearley"
17 | 
18 | [dependencies]
19 | cfg = { version = "0.6.1", features = ["serialize"] }
20 | bit-matrix = { version = "0.6", features = ["serialize"] }
21 | bit-vec = "0.6"
22 | optional = { version = "0.5", features = ["serde"] }
23 | ref_slice = "1.2"
24 | num = "0.2"
25 | num-traits = "0.2"
26 | num-derive = "0.3"
27 | log = "0.4"
28 | env_logger = "0.7"
29 | serde = "1.0"
30 | serde_derive = "1.0"
31 | 
32 | [dev-dependencies]
33 | c_lexer_logos = "0.1.1"
34 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 Piotr Czarnecki
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <h1>gearley</h1>
  3 |   <p>
  4 |     <strong>An Earley parser engine.</strong>
  5 |   </p>
  6 |   <p>
  7 | 
  8 | [![crates.io][crates.io shield]][crates.io link]
  9 | [![Documentation][docs.rs badge]][docs.rs link]
 10 | ![Rust CI][github ci badge]
 11 | ![MSRV][rustc 1.65+]
 12 | <br />
 13 | <br />
 14 | [![Dependency Status][deps.rs status]][deps.rs link]
 15 | [![Download Status][shields.io download count]][crates.io link]
 16 | 
 17 |   </p>
 18 | </div>
 19 | 
 20 | [crates.io shield]: https://img.shields.io/crates/v/gearley?label=latest
 21 | [crates.io link]: https://crates.io/crates/gearley
 22 | [docs.rs badge]: https://docs.rs/gearley/badge.svg?version=0.0.5
 23 | [docs.rs link]: https://docs.rs/gearley/0.0.5/gearley/
 24 | [github ci badge]: https://github.com/pczarn/gearley/workflows/CI/badge.svg?branch=master
 25 | [rustc 1.65+]: https://img.shields.io/badge/rustc-1.65%2B-blue.svg
 26 | [deps.rs status]: https://deps.rs/crate/gearley/0.0.5/status.svg
 27 | [deps.rs link]: https://deps.rs/crate/gearley/0.0.5
 28 | [shields.io download count]: https://img.shields.io/crates/d/gearley.svg
 29 | 
 30 | Work in progress.
 31 | [You can check the documentation here](`https://docs.rs/gearley/latest/gearley/).
 32 | 
 33 | This engine is meant to be a foundation of an optimized parser generator.
 34 | 
 35 | Gearley is inspired by the [Marpa parser](http://jeffreykegler.github.io/Marpa-web-site/)
 36 | by Jeffrey Kegler.
 37 | 
 38 | ## Properties
 39 | 
 40 | * blazing fast
 41 |     * as fast as YAEP
 42 |     * much faster than Marpa
 43 |     * memory efficient
 44 |     * new algorithm which uses online sorting
 45 |     * TODO: new hybrid algorithm
 46 |         * TODO: LALR
 47 |         * TODO: LL(1)
 48 |         * TODO: LR(1)
 49 |     * both time and memory complexity are small for simple grammars
 50 |         * time complexity: `O(n log n)` (n = input length) for `LR(1)` grammars
 51 |         * memory complexity: linear in input length for `LR(1)` grammars
 52 |     * lookahead
 53 |         * 1 token of lookahead
 54 |     * TODO: multithreaded parsing
 55 |     * TODO: fearless right-recursion
 56 |         * TODO: Leo's algorithm
 57 | * general-purpose
 58 |     * accepts all context-free grammars
 59 |     * may be extended to accept any grammar with Pāṇini
 60 |         * TODO: data-dependent grammars
 61 |         * TODO: PEG
 62 |         * TODO: negation
 63 |         * TODO: boolean grammars
 64 |     * interop with any parsing algorithm
 65 | * safe
 66 |     * TODO: pure safe Rust
 67 | * elegant
 68 |     * the recognizer has a simple design
 69 |         * tiny core
 70 |             * only 470 lines of code implementing the core algorithm
 71 |         * mathematically elegant
 72 |             * uses simple data structures
 73 |         * three separate per-token passes
 74 |             * just like Marpa
 75 |         * highly preprocessed grammar
 76 |             * less complexity in the recognizer and parse forest makes up for heavy grammar transformations
 77 |     * naming
 78 |         * Pāṇini is named after an ancient grammarian and Indian scholar
 79 |         * parse forest naming is inspired by algebra
 80 | * good error reporting
 81 |     * perfect parse progress information
 82 |     * tracing debugging
 83 | * customizable
 84 |     * extensible on every level
 85 |     * customizable recognizer
 86 |         * optional control over bottom-up parse fragment completion
 87 |             * you control which fragments are admitted into the forest
 88 |         * optional custom parse events
 89 |         * optional initialization with given memory capacity
 90 |         * generic over optional Performance Policy
 91 |     * customizable parse forest
 92 |         * optional control over ambiguous node ordering
 93 |         * write your own parse forest
 94 |         * two official parse forest impls and a null forest
 95 |             * choose between a faster forest and a memory efficient forest
 96 |             * optionally ignore parse result and get only parse success or failure
 97 | * open source
 98 |     * free is a fair price
 99 | 
100 | ## Extending gearley
101 | 
102 | The grammar is stored in a byte string. You may [serialize or deserialize it](https://docs.rs/gearley/0.0.5/gearley/grammar/struct.InternalGrammar.html)
103 | yourself. Grammar construction is implemented in the
104 | [cfg library](https://github.com/pczarn/cfg).
105 | 
106 | The recognizer provides [an interface](https://docs.rs/gearley/0.0.5/gearley/forest/trait.Forest.html) for writing a custom parse forest. Or you
107 | may reuse the default parse forest algorithm, but write your own code for [controlling
108 | rule order](https://docs.rs/gearley/0.0.5/gearley/forest/order/trait.Order.html), and for storing evaluated values within each tree node.
109 | 
110 | Yet another interface gives [control over rule completion](https://docs.rs/gearley/0.0.5/gearley/recognizer/struct.CompleteSum.html). You may reject certain
111 | completed rules or modify their parse forests as the parse progresses.
112 | 
113 | Gearley is perfectly extensible on every level.
114 | 
115 | ## Glossary
116 | 
117 | ### Recognizer
118 | 
119 | | Gearley term       | Marpa term             | Alternative term           |
120 | |--------------------|------------------------|----------------------------|
121 | | dot                | dotted rule            | --                         |
122 | | earleme            | earleme                | input location             |
123 | | item               | Earley item            | situation                  |
124 | | origin             | origin                 | distance                   |
125 | | rule history       | rule semantics         | --                         |
126 | | complete           | complete               | accept                     |
127 | 
128 | Dot — a position in the grammar, which is an integer.
129 | 
130 | Earleme — scalar position, currently equivalent to the input location index.
131 | 
132 | Item — a value that consists of a dot, an origin and a bocage node.
133 | 
134 | Origin — the Earley set number where a rule was predicted. Always smaller than
135 | the current Earley set ID for non-predicted items.
136 | 
137 | Rule history — a rule summary that contains an action number and other information
138 | about semantics and the rule's journey through transformations. Each rule carries
139 | its own history.
140 | 
141 | ### Parse forest
142 | 
143 | | Gearley term       | Marpa term             | Alternative term           |
144 | |--------------------|------------------------|----------------------------|
145 | | bocage             | bocage                 | Shared Packed Parse Forest |
146 | | depth-first bocage | Abstract Syntax Forest | --                         |
147 | | sum node           | glade                  | OR node                    |
148 | | product node       | factoring              | AND node                   |
149 | | leaf node          | bocage symbol          | leaf node                  |
150 | | root node          | peak glade             | top node                   |
151 | 
152 | Bocage — a parse forest in the form of a Directed Acyclic Graph.
153 | 
154 | Depth-first bocage — a bocage that is traversed by evaluating one whole bocage
155 | node at a time.
156 | 
157 | Sum node — a node that sums the number of trees in the forest.
158 | 
159 | Product node — a node that may multiply the number of trees in the forest.
160 | 
161 | Leaf node — a terminal node that begins a single tree in the forest.
162 | 
163 | Root node — a node that is used as a parse result.
164 | 
165 | ## Related work
166 | 
167 | ### In Rust
168 | 
169 | * [LALRPOP](https://github.com/nikomatsakis/lalrpop) — a LR(1) parser generator focused on ease of use.
170 | * [rust-lang's GLL](https://github.com/rust-lang/gll/) — a parsing framework.
171 |   * [grammer with an E](https://github.com/lykenware/grammer/) — a grammar framework.
172 | * [Oak](https://github.com/ptal/oak/) — a PEG parser generator with typed expressions.
173 | 
174 | ### In other languages
175 | 
176 | * [Marpa](https://jeffreykegler.github.io/Marpa-web-site/) — an Earley parser (not a generator)
177 |   that has advanced features. Written in literate C and in Perl.
178 | * [YAEP](https://github.com/vnmakarov/yaep) — an Earley parser engine that currently has
179 |   the best speed and small memory use. Written in C.
180 | 
181 | ### In academia
182 | 
183 | * OMeta — a PEG parser with advanced features that go beyond parsing.
184 | * [SPPF-Style Parsing From Earley Recognisers](https://www.researchgate.net/publication/220367479_SPPF-Style_Parsing_From_Earley_Recognisers) — Elizabeth Scott.
185 | 
186 | ## Quotes
187 | 
188 | > I'd be very happy to have a superfast general parser out there but some extremely bright minds have been unable to solve it for 40 years.
189 | 
190 |  — Terence Parr, author of ANTLR
191 | 
192 | > I would be very eager to see this.
193 | 
194 |  — mydoghasticks
195 | 
196 | ## Thanks
197 | 
198 | Thanks to Jay Earley, John Aycock, R. Nigel Horspool, and Elizabeth Scott who pioneered Earley parsing.
199 | 
200 | Big thanks to [mr Jeffrey Kegler](https://github.com/jeffreykegler) who brought my attention to parsing and made this project possible through his work on Marpa/Earley and Kollos.
201 | 
202 | Special thanks to CD PROJEKT RED, HAEVN, Kaśka Sochacka, sanah, Kwiat Jabłoni, Alex Rainbird, Beth Paterson, Carbon Based Lifeforms, and Solar Fields for providing amazing music, which made coding even more enjoyable.
203 | 
204 | ## License
205 | 
206 | Dual-licensed for compatibility with the Rust project.
207 | 
208 | Licensed under the Apache License Version 2.0:
209 | http://www.apache.org/licenses/LICENSE-2.0, or the MIT license:
210 | http://opensource.org/licenses/MIT, at your option.
211 | 


--------------------------------------------------------------------------------
/benches/bench_rust_grammar_subset.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | extern crate cfg;
  5 | extern crate gearley;
  6 | 
  7 | macro_rules! trace(($($tt:tt)*) => ());
  8 | 
  9 | #[path = "../tests/helpers/mod.rs"]
 10 | mod helpers;
 11 | 
 12 | use cfg::sequence::Separator::Proper;
 13 | use cfg::earley::Grammar;
 14 | use gearley::forest::{Bocage, NullForest};
 15 | use gearley::grammar::InternalGrammar;
 16 | use gearley::recognizer::Recognizer;
 17 | use gearley::memory_use::MemoryUse;
 18 | 
 19 | use helpers::Parse;
 20 | 
 21 | macro_rules! rhs_elem {
 22 |     (use) => (0);
 23 |     (as) => (1);
 24 |     (::) => (2);
 25 |     (*) => (3);
 26 |     (,) => (4);
 27 |     (;) => (5);
 28 |     ('{') => (6);
 29 |     ('}') => (7);
 30 |     (pub) => (8);
 31 |     ($i:ident) => (9);
 32 | }
 33 | 
 34 | macro_rules! rhs {
 35 |     ($($e:tt)+) => (
 36 |         &[$(rhs_elem!($e) + 9,)+]
 37 |     )
 38 | }
 39 | 
 40 | const TOKENS: &'static [u32] = rhs!(
 41 |     use gearley::events::'{' PredictionEvents, MedialEvents, CompletionEvents '}';
 42 |     use gearley::util::slice_builder::SliceBuilder;
 43 |     use gearley::forest::depth_first::'{'
 44 |         NullOrder, FastEvaluator, ArrayStore, ClosureActionEvaluator
 45 |     '}';
 46 |     pub use self::PathParsingMode::*;
 47 | 
 48 |     use abi :: '{' self, Abi '}';
 49 |     use ast::BareFnTy;
 50 |     use ast :: '{' RegionTyParamBound, TraitTyParamBound, TraitBoundModifier '}';
 51 |     use ast::Unsafety;
 52 |     use ast :: '{' Mod, Arg, Arm, Attribute, BindingMode, TraitItemKind '}';
 53 |     use ast::Block;
 54 |     use ast :: '{' BlockCheckMode, CaptureBy '}';
 55 |     use ast :: '{' Constness, Crate, CrateConfig '}';
 56 |     use ast :: '{' Decl, DeclKind '}';
 57 |     use ast :: '{' EMPTY_CTXT, EnumDef, ExplicitSelf '}';
 58 |     use ast :: '{' Expr, ExprKind '}';
 59 |     use ast :: '{' Field, FnDecl '}';
 60 |     use ast :: '{' ForeignItem, ForeignItemKind, FunctionRetTy '}';
 61 |     use ast :: '{' Ident, ImplItem, Item, ItemKind '}';
 62 |     use ast :: '{' Lit, LitKind, UintTy '}';
 63 |     use ast::Local;
 64 |     use ast::MacStmtStyle;
 65 |     use ast::Mac_;
 66 |     use ast :: '{' MutTy, Mutability '}';
 67 |     use ast::NamedField;
 68 |     use ast :: '{' Pat, PatKind '}';
 69 |     use ast :: '{' PolyTraitRef, QSelf '}';
 70 |     use ast :: '{' Stmt, StmtKind '}';
 71 |     use ast :: '{' VariantData, StructField '}';
 72 |     use ast::StrStyle;
 73 |     use ast::SelfKind;
 74 |     use ast :: '{' Delimited, SequenceRepetition, TokenTree, TraitItem, TraitRef '}';
 75 |     use ast :: '{' Ty, TyKind, TypeBinding, TyParam, TyParamBounds '}';
 76 |     use ast::UnnamedField;
 77 |     use ast :: '{' ViewPath, ViewPathGlob, ViewPathList, ViewPathSimple '}';
 78 |     use ast :: '{' Visibility, WhereClause '}';
 79 |     use attr :: '{' ThinAttributes, ThinAttributesExt, AttributesExt '}';
 80 |     use ast :: '{' BinOpKind, UnOp '}';
 81 |     use ast;
 82 |     use ast_util :: '{' self, ident_to_path '}';
 83 |     use codemap :: '{' self, Span, BytePos, Spanned, spanned, mk_sp, CodeMap '}';
 84 |     use errors :: '{' self, DiagnosticBuilder '}';
 85 |     use ext::tt::macro_parser;
 86 |     use parse;
 87 |     use parse::classify;
 88 |     use parse::common::SeqSep;
 89 |     use parse::lexer :: '{' Reader, TokenAndSpan '}';
 90 |     use parse::obsolete :: '{' ParserObsoleteMethods, ObsoleteSyntax '}';
 91 |     use parse::token :: '{' self, intern, MatchNt, SubstNt, SpecialVarNt, InternedString '}';
 92 |     use parse::token :: '{' keywords, special_idents, SpecialMacroVar '}';
 93 |     use parse :: '{' new_sub_parser_from_file, ParseSess '}';
 94 |     use util::parser :: '{' AssocOp, Fixity '}';
 95 |     use print::pprust;
 96 |     use ptr::P;
 97 |     use parse::PResult;
 98 | 
 99 |     use std::collections::HashSet;
100 |     use std::io::prelude::*;
101 |     use std::mem;
102 |     use std::path :: '{' Path, PathBuf '}';
103 |     use std::rc::Rc;
104 |     use std::slice;
105 | );
106 | 
107 | const _TOKEN_NAMES: &'static [&'static str] = &[
108 | "start", "use_decls", "use_decl", "segments", "segment", "import_mod", "import_seq", "import",
109 | "pub_opt",
110 | "use_tok", "as_tok", "mod_sep", "star", "comma", "semi", "lbrace", "rbrace", "pub_tok", "ident"
111 | ];
112 | 
113 | fn grammar() -> Grammar {
114 |     let mut external = Grammar::new();
115 |     let (start, use_decls, use_decl, segments, segment, import_mod, import_seq, import, pub_opt) = external.sym();
116 |     let (use_tok, as_tok, mod_sep, star, comma, semi, lbrace, rbrace, pub_tok, ident) = external.sym();
117 |     external
118 |             .sequence(segments).inclusive(0, None).rhs(segment)
119 |             .sequence(import_seq).separator(Proper(comma)).inclusive(1, None).rhs(import)
120 |             .sequence(use_decls).inclusive(0, None).rhs(use_decl)
121 |             ;
122 |     external.rule(start).rhs([use_decls])
123 |             .rule(use_decl).rhs([pub_opt, use_tok, segments, import_mod, semi])
124 |             .rule(segment).rhs([ident, mod_sep])
125 |             .rule(import_mod).rhs([lbrace, import_seq, rbrace])
126 |                           .rhs([import])
127 |                           .rhs([star])
128 |             .rule(import).rhs([ident])
129 |                          .rhs([ident, as_tok, ident])
130 |             .rule(pub_opt).rhs([pub_tok])
131 |                           .rhs([])
132 |             ;
133 |     external.set_start(start);
134 |     external
135 | }
136 | 
137 | #[bench]
138 | fn bench_recognize_decl_use(b: &mut test::Bencher) {
139 |     let external = grammar();
140 |     let cfg = InternalGrammar::from_grammar(&external);
141 | 
142 |     b.iter(|| {
143 |         let mut rec: Recognizer<NullForest> = Recognizer::new_with_limit(&cfg, 2_000_000);
144 |         rec.parse(TOKENS);
145 |         test::black_box(&rec);
146 |     })
147 | }
148 | 
149 | #[bench]
150 | fn bench_parse_decl_use(b: &mut test::Bencher) {
151 |     let external = grammar();
152 |     let cfg = InternalGrammar::from_grammar(&external);
153 | 
154 |     b.iter(|| {
155 |         let mut rec: Recognizer<Bocage<&'_ InternalGrammar>> = Recognizer::new_with_limit(&cfg, 2_000_000);
156 |         let finished = rec.parse(TOKENS);
157 |         assert!(finished);
158 |         test::black_box(&rec.forest);
159 |     })
160 | }
161 | 


--------------------------------------------------------------------------------
/benches/benches.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | extern crate cfg;
 5 | extern crate gearley;
 6 | 
 7 | macro_rules! trace(($($tt:tt)*) => ());
 8 | 
 9 | #[macro_use]
10 | #[path = "../tests/grammars/mod.rs"]
11 | mod grammars;
12 | #[path = "../tests/helpers/mod.rs"]
13 | mod helpers;
14 | 
15 | use gearley::grammar::InternalGrammar;
16 | use gearley::forest::{Bocage, NullForest};
17 | use gearley::recognizer::Recognizer;
18 | use gearley::memory_use::MemoryUse;
19 | 
20 | use grammars::*;
21 | use helpers::{SimpleEvaluator, Parse};
22 | 
23 | const SUM_TOKENS: &'static [u32] = precedenced_arith!(
24 |     '1' '+' '(' '2' '*' '3' '-' '4' ')' '/'
25 |     '(' '5' '5' ')' '-' '(' '5' '4' ')' '*'
26 |     '5' '5' '+' '6' '2' '-' '1' '3' '-' '('
27 |     '(' '3' '6' ')' ')'
28 | );
29 | 
30 | #[bench]
31 | fn bench_ambiguous_arithmetic(b: &mut test::Bencher) {
32 |     let tokens = ambiguous_arith!('2' '-' '0' '*' '3' '+' '1' '/' '2' '+' '8' '8' '+' '1' '/' '2');
33 |     let external = ambiguous_arith::grammar();
34 |     let cfg = InternalGrammar::from_grammar(&external);
35 | 
36 |     b.iter(|| {
37 |         let mut evaluator = SimpleEvaluator::new(
38 |             ambiguous_arith::leaf,
39 |             ambiguous_arith::rule,
40 |             |_, _: &mut _| unreachable!()
41 |         );
42 |         let mut rec: Recognizer<Bocage<&'_ InternalGrammar>> = Recognizer::new_with_hint(&cfg, tokens.len());
43 |         assert!(rec.parse(tokens));
44 |         let mut traversal = rec.forest.traverse();
45 |         let results = evaluator.traverse(&mut traversal, rec.finished_node().unwrap());
46 |         test::black_box(results);
47 |     })
48 | }
49 | 
50 | #[bench]
51 | fn bench_evaluate_precedenced_arith(b: &mut test::Bencher) {
52 |     let external = precedenced_arith::grammar();
53 |     let cfg = InternalGrammar::from_grammar(&external);
54 |     let sum_tokens = test::black_box(SUM_TOKENS);
55 | 
56 |     b.iter(|| {
57 |         let mut evaluator = SimpleEvaluator::new(
58 |             precedenced_arith::leaf,
59 |             precedenced_arith::rule,
60 |             |_, _: &mut _| unreachable!(),
61 |         );
62 |         let bocage = Bocage::new(&cfg);
63 |         let mut recognizer = Recognizer::new(&cfg, bocage);
64 |         recognizer.parse(sum_tokens);
65 |         let mut traversal = recognizer.forest.traverse();
66 |         let results = evaluator.traverse(&mut traversal, recognizer.finished_node().unwrap());
67 |         test::black_box(results);
68 |     })
69 | }
70 | 
71 | #[bench]
72 | fn bench_process_grammar_for_precedenced_arith(b: &mut test::Bencher) {
73 |     let external = precedenced_arith::grammar();
74 | 
75 |     b.iter(|| {
76 |         test::black_box(InternalGrammar::from_grammar(&external));
77 |     })
78 | }
79 | 
80 | #[bench]
81 | fn bench_recognize_precedenced_arith(b: &mut test::Bencher) {
82 |     let grammar = precedenced_arith::grammar();
83 |     let cfg = InternalGrammar::from_grammar(&grammar);
84 |     let sum_tokens = test::black_box(SUM_TOKENS);
85 | 
86 |     b.iter(|| {
87 |         let mut recognizer = Recognizer::new(&cfg, NullForest);
88 |         test::black_box(&recognizer.parse(sum_tokens));
89 |     })
90 | }
91 | 


--------------------------------------------------------------------------------
/src/binary_heap.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Rust Project Developers, Piotr Czarnecki.
  2 | // See the COPYRIGHT
  3 | // file at the top-level directory of this distribution and at
  4 | // http://rust-lang.org/COPYRIGHT.
  5 | //
  6 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  7 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  8 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  9 | // option. This file may not be copied, modified, or distributed
 10 | // except according to those terms.
 11 | 
 12 | //! A priority queue implemented with a binary heap.
 13 | 
 14 | #![allow(missing_docs)]
 15 | #![cfg_attr(feature = "cargo-clippy", allow(nonminimal_bool))]
 16 | 
 17 | use std::mem::swap;
 18 | use std::u32;
 19 | 
 20 | use forest::Forest;
 21 | use item::{CompletedItem, CompletedItemLinked, Item};
 22 | use recognizer::Recognizer;
 23 | 
 24 | impl<'g, F> Recognizer<'g, F>
 25 |     where F: Forest,
 26 | {
 27 |     /// Returns the greatest item in the binary heap, or `None` if it is empty.
 28 |     #[inline]
 29 |     pub fn heap_peek(&self) -> Option<CompletedItem<F::NodeRef>> {
 30 |         self.complete.get(0).and_then(|&right_item|
 31 |             self.medial.get(right_item.idx as usize).map(|left_item|
 32 |                 CompletedItem {
 33 |                     origin: left_item.origin,
 34 |                     dot: left_item.dot,
 35 |                     left_node: left_item.node,
 36 |                     right_node: right_item.node,
 37 |                 }
 38 |             )
 39 |         )
 40 |     }
 41 | 
 42 |     #[inline(always)]
 43 |     fn heap_get(&self, idx_idx: usize) -> Option<&Item<F::NodeRef>> {
 44 |         self.complete.get(idx_idx).and_then(|&item| self.medial.get(item.idx as usize))
 45 |     }
 46 | 
 47 |     /// Removes the greatest item from the binary heap and returns it, or `None` if it
 48 |     /// is empty.
 49 |     pub fn heap_pop(&mut self) -> Option<CompletedItem<F::NodeRef>> {
 50 |         self.complete.pop().and_then(move |mut right_item| {
 51 |             if !self.complete.is_empty() {
 52 |                 swap(&mut right_item, &mut self.complete[0]);
 53 |                 self.sift_down(0);
 54 |             }
 55 |             self.medial.get(right_item.idx as usize).map(|left_item|
 56 |                 CompletedItem {
 57 |                     origin: left_item.origin,
 58 |                     dot: left_item.dot,
 59 |                     left_node: left_item.node,
 60 |                     right_node: right_item.node,
 61 |                 }
 62 |             )
 63 |         })
 64 |     }
 65 | 
 66 |     /// Pushes an item onto the binary heap.
 67 |     pub fn heap_push(&mut self, item: CompletedItem<F::NodeRef>) {
 68 |         let old_indices_len = self.complete.len();
 69 |         let old_medial_len = self.medial.len();
 70 |         assert!(old_medial_len as u64 <= u32::MAX.into());
 71 |         self.medial.push(item.into());
 72 |         self.complete.push(CompletedItemLinked {
 73 |             idx: old_medial_len as u32,
 74 |             node: item.right_node,
 75 |         });
 76 |         self.sift_up(0, old_indices_len);
 77 |     }
 78 | 
 79 |     /// Pushes an item onto the binary heap.
 80 |     pub fn heap_push_linked(&mut self, item: CompletedItemLinked<F::NodeRef>) {
 81 |         let old_indices_len = self.complete.len();
 82 |         self.complete.push(item);
 83 |         self.sift_up(0, old_indices_len);
 84 |     }
 85 | 
 86 |     /// Consumes the `BinaryHeap` and returns a vector in sorted
 87 |     /// (ascending) order.
 88 |     fn sift_up(&mut self, start: usize, mut pos: usize) {
 89 |         let element_idx = self.complete[pos];
 90 |         let element = &self.medial[element_idx.idx as usize];
 91 |         while pos > start {
 92 |             let parent = (pos - 1) / 2;
 93 |             let parent_idx = self.complete[parent];
 94 |             if *element <= self.medial[parent_idx.idx as usize] {
 95 |                 break;
 96 |             }
 97 |             self.complete[pos] = parent_idx;
 98 |             pos = parent;
 99 |         }
100 |         self.complete[pos] = element_idx;
101 |     }
102 | 
103 |     /// Take an element at `pos` and move it down the heap,
104 |     /// while its children are larger.
105 |     fn sift_down_range(&mut self, mut pos: usize, end: usize) {
106 |         let element_idx = self.complete[pos];
107 |         let element = &self.medial[element_idx.idx as usize];
108 |         let mut child = 2 * pos + 1;
109 |         while child < end {
110 |             let right = child + 1;
111 |             // compare with the greater of the two children
112 |             if right < end && !(self.heap_get(child).unwrap() > self.heap_get(right).unwrap()) {
113 |                 child = right;
114 |             }
115 |             // if we are already in order, stop.
116 |             if element >= self.heap_get(child).unwrap() {
117 |                 break;
118 |             }
119 |             self.complete[pos] = self.complete[child];
120 |             pos = child;
121 |             child = 2 * pos + 1;
122 |         }
123 |         self.complete[pos] = element_idx;
124 |     }
125 | 
126 |     fn sift_down(&mut self, pos: usize) {
127 |         let len = self.complete.len();
128 |         self.sift_down_range(pos, len);
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/src/debug.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt;
 2 | 
 3 | use forest::Forest;
 4 | use recognizer::Recognizer;
 5 | 
 6 | impl<'g, F: Forest> fmt::Debug for Recognizer<'g, F> {
 7 | 	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 8 | 		write!(f,
 9 |             "Recognizer {{ grammar: {:?}, \
10 |             predicted: {:?}, medial: {:?}, \
11 |             complete: {:?}, indices: {:?}, \
12 |             current_medial_start: {:?}, earleme: {:?} }}",
13 | 			self.grammar,
14 | 			&self.predicted,
15 | 			&self.medial,
16 | 			&self.complete,
17 | 			&self.indices,
18 | 			&self.current_medial_start,
19 | 			&self.earleme
20 | 		)
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/src/events.rs:
--------------------------------------------------------------------------------
  1 | use std::iter::{Zip, Chain};
  2 | use std::slice;
  3 | 
  4 | use bit_matrix;
  5 | use cfg::symbol::Symbol;
  6 | 
  7 | use forest::Forest;
  8 | use grammar::{ExternalDottedRule, Event};
  9 | use item::Item;
 10 | use recognizer::Recognizer;
 11 | 
 12 | type IterPredictionBitfield<'a> = bit_matrix::row::Iter<'a>;
 13 | 
 14 | pub struct PredictedSymbols<'a> {
 15 |     pub(in super) iter: IterPredictionBitfield<'a>,
 16 |     pub(in super) idx: usize,
 17 | }
 18 | 
 19 | pub struct MedialItems<'a, N: 'a> {
 20 |     pub(in super) iter: slice::Iter<'a, Item<N>>,
 21 | }
 22 | 
 23 | pub struct Prediction<'a, T: 'a> {
 24 |     iter: Zip<IterPredictionBitfield<'a>, slice::Iter<'a, T>>,
 25 |     origin: usize,
 26 | }
 27 | 
 28 | pub struct Medial<'a, T: 'a, N: 'a> {
 29 |     events: &'a [T],
 30 |     items: MedialItems<'a, N>,
 31 | }
 32 | 
 33 | pub struct Events<'a, N: 'a> {
 34 |     iter: Chain<
 35 |         Prediction<'a, Event>,
 36 |         Medial<'a, Event, N>
 37 |     >
 38 | }
 39 | 
 40 | pub struct Distances<'a, N: 'a> {
 41 |     iter: Chain<
 42 |         Prediction<'a, Event>,
 43 |         Medial<'a, Event, N>
 44 |     >
 45 | }
 46 | 
 47 | pub struct Trace<'a, N: 'a> {
 48 |     iter: Chain<
 49 |         Prediction<'a, Option<ExternalDottedRule>>,
 50 |         Medial<'a, Option<ExternalDottedRule>, N>
 51 |     >
 52 | }
 53 | 
 54 | pub struct ExpectedTerminals<'a, N: 'a> {
 55 |     prev_scan_iter: MedialItems<'a, N>,
 56 |     rhs1: &'a [Option<Symbol>],
 57 | }
 58 | 
 59 | impl<'a> Iterator for PredictedSymbols<'a> {
 60 |     type Item = Symbol;
 61 | 
 62 |     fn next(&mut self) -> Option<Self::Item> {
 63 |         for is_present in &mut self.iter {
 64 |             let symbol = Symbol::from(self.idx);
 65 |             self.idx += 1;
 66 |             if is_present {
 67 |                 return Some(symbol);
 68 |             }
 69 |         }
 70 |         None
 71 |     }
 72 | }
 73 | 
 74 | impl<'a, N> Iterator for MedialItems<'a, N> {
 75 |     type Item = &'a Item<N>;
 76 | 
 77 |     fn next(&mut self) -> Option<Self::Item> {
 78 |         self.iter.next()
 79 |     }
 80 | }
 81 | 
 82 | impl<'a, T> Iterator for Prediction<'a, T> {
 83 |     type Item = (&'a T, usize);
 84 | 
 85 |     fn next(&mut self) -> Option<Self::Item> {
 86 |         for (is_present, elem) in &mut self.iter {
 87 |             if is_present {
 88 |                 return Some((elem, self.origin));
 89 |             }
 90 |         }
 91 |         None
 92 |     }
 93 | }
 94 | 
 95 | impl<'a, T, L> Iterator for Medial<'a, T, L> {
 96 |     type Item = (&'a T, usize);
 97 | 
 98 |     fn next(&mut self) -> Option<Self::Item> {
 99 |         let events = &self.events;
100 |         self.items.next().map(|ei| {
101 |             (&events[ei.dot as usize], ei.origin as usize)
102 |         })
103 |     }
104 | }
105 | 
106 | impl<'a, L> Iterator for Events<'a, L> {
107 |     type Item = u32;
108 | 
109 |     fn next(&mut self) -> Option<u32> {
110 |         for (&(event_id, _distance), _origin) in &mut self.iter {
111 |             if event_id.is_some() {
112 |                 return event_id.into();
113 |             }
114 |         }
115 |         None
116 |     }
117 | }
118 | 
119 | impl<'a, L> Iterator for Distances<'a, L> {
120 |     type Item = u32;
121 | 
122 |     fn next(&mut self) -> Option<u32> {
123 |         for (&(_event_id, distance), _origin) in &mut self.iter {
124 |             if distance.is_some() {
125 |                 return distance.into();
126 |             }
127 |         }
128 |         None
129 |     }
130 | }
131 | 
132 | impl<'a, N> Iterator for Trace<'a, N> {
133 |     type Item = (ExternalDottedRule, usize);
134 | 
135 |     fn next(&mut self) -> Option<(ExternalDottedRule, usize)> {
136 |         for (&external_dr_opt, origin) in &mut self.iter {
137 |             if let Some(external_dotted_rule) = external_dr_opt {
138 |                 return Some((external_dotted_rule, origin));
139 |             }
140 |         }
141 |         None
142 |     }
143 | }
144 | 
145 | impl<'a, N> Iterator for ExpectedTerminals<'a, N> {
146 |     type Item = Symbol;
147 | 
148 |     fn next(&mut self) -> Option<Self::Item> {
149 |         self.prev_scan_iter.next().map(|item| {
150 |             self.rhs1[item.dot as usize].unwrap()
151 |         })
152 |     }
153 | }
154 | 
155 | impl<'g, F> Recognizer<'g, F>
156 |     where F: Forest,
157 | {
158 |     pub fn trace(&self) -> Trace<F::NodeRef> {
159 |         let trace = self.grammar.trace();
160 |         let prediction = Prediction {
161 |             iter: self.predicted_symbols().iter.zip(trace[0].iter()),
162 |             origin: self.earleme(),
163 |         };
164 |         let medial = Medial {
165 |             events: trace[1],
166 |             items: self.medial_items(),
167 |         };
168 |         Trace {
169 |             iter: prediction.chain(medial),
170 |         }
171 |     }
172 | 
173 |     pub fn events(&self) -> Events<F::NodeRef> {
174 |         let (events_predict, events_flat) = self.grammar.events();
175 |         let prediction = Prediction {
176 |             iter: self.predicted_symbols().iter.zip(events_predict.iter()),
177 |             origin: self.earleme(),
178 |         };
179 |         let medial = Medial {
180 |             events: events_flat,
181 |             items: self.medial_items(),
182 |         };
183 |         Events {
184 |             iter: prediction.chain(medial),
185 |         }
186 |     }
187 | 
188 |     pub fn minimal_distances(&self) -> Distances<F::NodeRef> {
189 |         Distances {
190 |             iter: self.events().iter,
191 |         }
192 |     }
193 | 
194 |     pub fn expected_terminals(&self) -> ExpectedTerminals<F::NodeRef> {
195 |         ExpectedTerminals {
196 |             prev_scan_iter: self.medial_items(),
197 |             rhs1: self.grammar.rhs1(),
198 |         }
199 |     }
200 | }
201 | 
202 | #[test]
203 | fn test_prediction_events() {
204 |     use bit_matrix::BitMatrix;
205 |     let mut bit_m = BitMatrix::new(1, 5);
206 |     bit_m.set(0, 2, true);
207 |     let mut row = bit_m.iter_row(0);
208 |     assert_eq!(row.next(), Some(false));
209 |     assert_eq!(row.next(), Some(false));
210 |     assert_eq!(row.next(), Some(true));
211 |     assert_eq!(row.next(), Some(false));
212 |     assert_eq!(row.next(), Some(false));
213 |     assert_eq!(row.next(), None);
214 |     let ev = [0, 1, 2, 3, 4];
215 |     let mut pred = Prediction {
216 |         iter: bit_m.iter_row(0).zip(&ev[0..5]),
217 |         origin: 123,
218 |     };
219 |     assert_eq!(pred.next(), Some((&2, 123)));
220 |     assert_eq!(pred.next(), None);
221 | }
222 | 


--------------------------------------------------------------------------------
/src/forest/bocage/mod.rs:
--------------------------------------------------------------------------------
  1 | pub mod node;
  2 | pub mod order;
  3 | pub mod traverse;
  4 | 
  5 | use std::borrow::Borrow;
  6 | use std::hint;
  7 | 
  8 | use bit_vec::BitVec;
  9 | use cfg::symbol::Symbol;
 10 | use ref_slice::ref_slice;
 11 | 
 12 | use forest::node_handle::NodeHandle;
 13 | use forest::Forest;
 14 | use grammar::InternalGrammar;
 15 | use item::CompletedItem;
 16 | 
 17 | use self::node::Node::*;
 18 | use self::node::{CompactNode, Node, NULL_ACTION};
 19 | use self::order::Order;
 20 | 
 21 | pub struct Bocage<G> {
 22 |     pub(crate) graph: Vec<CompactNode>,
 23 |     pub(crate) gc: MarkAndSweep,
 24 |     pub(crate) grammar: G,
 25 |     pub(crate) summand_count: u32,
 26 | }
 27 | 
 28 | pub(crate) struct MarkAndSweep {
 29 |     pub(crate) liveness: BitVec,
 30 |     // List for DFS and/or maybe relocation of stuff in the future.
 31 |     pub(crate) dfs: Vec<NodeHandle>,
 32 | }
 33 | 
 34 | impl<G> Bocage<G>
 35 | where
 36 |     G: Borrow<InternalGrammar>,
 37 | {
 38 |     pub fn new(grammar: G) -> Self {
 39 |         Self::with_capacities(grammar, 1024, 32)
 40 |     }
 41 | 
 42 |     pub fn with_capacities(grammar: G, graph_cap: usize, dfs_cap: usize) -> Self {
 43 |         let mut result = Bocage {
 44 |             graph: Vec::with_capacity(graph_cap),
 45 |             gc: MarkAndSweep {
 46 |                 liveness: BitVec::with_capacity(graph_cap),
 47 |                 dfs: Vec::with_capacity(dfs_cap),
 48 |             },
 49 |             grammar,
 50 |             summand_count: 0,
 51 |         };
 52 |         result.initialize_nulling();
 53 |         result
 54 |     }
 55 | 
 56 |     pub(crate) fn initialize_nulling(&mut self) {
 57 |         // TODO trivial grammar check
 58 |         // self.nulling_leaf_count = self.nulling_symbol_count();
 59 |         let nulling_leaf_count = self.nulling_symbol_count();
 60 |         // Ensure that `max` is not ridiculously large.
 61 |         assert!(nulling_leaf_count < (1 << 20), "invalid nullable symbol");
 62 |         self.graph.extend((0..=nulling_leaf_count).map(|i| {
 63 |             NullingLeaf {
 64 |                 symbol: Symbol::from(i),
 65 |             }
 66 |             .compact()
 67 |         }));
 68 |         for &(lhs, rhs0, rhs1) in self.grammar.borrow().eliminated_nulling_intermediate() {
 69 |             self.set(
 70 |                 NodeHandle::nulling(lhs),
 71 |                 Product {
 72 |                     left_factor: NodeHandle::nulling(rhs0),
 73 |                     right_factor: Some(NodeHandle::nulling(rhs1)),
 74 |                     action: NULL_ACTION,
 75 |                 },
 76 |             );
 77 |         }
 78 |     }
 79 | 
 80 |     fn nulling_symbol_count(&self) -> usize {
 81 |         self.grammar.borrow().max_nulling_symbol().unwrap_or(0)
 82 |     }
 83 | 
 84 |     #[inline]
 85 |     pub fn mark_alive<O: Order>(&mut self, root: NodeHandle, mut order: O) {
 86 |         self.gc.liveness.clear();
 87 |         self.gc.liveness.grow(self.graph.len(), false);
 88 |         self.gc.dfs.push(root);
 89 |         while let Some(node) = self.gc.dfs.pop() {
 90 |             self.gc.liveness.set(node.usize(), true);
 91 |             let summands = Bocage::<G>::summands(&self.graph, node);
 92 |             let summands = order.sum(summands);
 93 |             for summand in summands {
 94 |                 self.postprocess_product_tree_node(summand);
 95 |                 // TODO: use order for products.
 96 |                 self.gc.dfs_queue_factors(summand);
 97 |             }
 98 |         }
 99 |     }
100 | 
101 |     #[inline]
102 |     fn summands(graph: &Vec<CompactNode>, node: NodeHandle) -> &[CompactNode] {
103 |         unsafe {
104 |             match graph.get_unchecked(node.usize()).expand() {
105 |                 Sum { count, .. } => {
106 |                     // back
107 |                     // let start = node.usize() - count as usize - 1;
108 |                     // let end = node.usize() - 1;
109 |                     let start = node.usize() + 1;
110 |                     let end = node.usize() + count as usize + 1;
111 |                     graph.get_unchecked(start..end)
112 |                 }
113 |                 _ => ref_slice(graph.get_unchecked(node.usize())),
114 |             }
115 |         }
116 |     }
117 | 
118 |     #[inline]
119 |     fn postprocess_product_tree_node(&self, node: &CompactNode) {
120 |         if let Product {
121 |             left_factor: factor,
122 |             right_factor: None,
123 |             action,
124 |         } = node.expand()
125 |         {
126 |             // Add omitted phantom syms here.
127 |             if let Some((sym, dir)) = self.grammar.borrow().nulling(action) {
128 |                 let (left, right) = if dir {
129 |                     (factor, NodeHandle::nulling(sym))
130 |                 } else {
131 |                     (NodeHandle::nulling(sym), factor)
132 |                 };
133 |                 node.set(Product {
134 |                     left_factor: left,
135 |                     right_factor: Some(right),
136 |                     action,
137 |                 });
138 |             }
139 |         }
140 |     }
141 | 
142 |     #[inline]
143 |     fn set(&self, idx: NodeHandle, node: Node) {
144 |         self.graph[idx.usize()].set(node);
145 |     }
146 | 
147 |     #[inline]
148 |     pub(super) fn is_transparent(&self, action: u32) -> bool {
149 |         action == NULL_ACTION || self.grammar.borrow().external_origin(action).is_none()
150 |     }
151 | 
152 |     // fn mark_and_sweep(&mut self, root: NodeHandle) {
153 |     //     self.mark_alive(root);
154 |     //     self.sweep_garbage();
155 |     //     self.update_nulling_leaf_count();
156 |     // }
157 | 
158 |     // fn sweep_garbage(&mut self) {
159 |     //     let count = self.relocate_marked();
160 |     //     self.graph.truncate(count);
161 |     // }
162 | 
163 |     // fn update_nulling_leaf_count(&mut self) {
164 |     //     let prev_count = self.nulling_leaf_count;
165 |     //     self.nulling_leaf_count = self.gc.liveness.iter().take(prev_count).filter(|x| x).count();
166 |     // }
167 | 
168 |     // fn relocate_marked(&mut self) -> usize {
169 |     //     let mut destination = self.graph.iter();
170 |     //     let mut count = 0;
171 |     //     // ... TODO: relocate
172 |     //     for (alive, source) in self.gc.liveness.iter().zip(self.graph.iter()) {
173 |     //         if alive {
174 |     //             destination.next().unwrap().cell.set(*source);
175 |     //             count += 1;
176 |     //         }
177 |     //     }
178 |     //     count
179 |     // }
180 | }
181 | 
182 | impl MarkAndSweep {
183 |     #[inline]
184 |     fn dfs_queue_factors(&mut self, summand: &CompactNode) {
185 |         match summand.expand() {
186 |             Product {
187 |                 left_factor,
188 |                 right_factor,
189 |                 ..
190 |             } => {
191 |                 if let Some(factor) = right_factor {
192 |                     if let Some(false) = self.liveness.get(factor.usize()) {
193 |                         self.dfs.push(factor);
194 |                     }
195 |                 }
196 |                 if let Some(false) = self.liveness.get(left_factor.usize()) {
197 |                     self.dfs.push(left_factor);
198 |                 }
199 |             }
200 |             NullingLeaf { .. } | Evaluated { .. } => {}
201 |             Sum { .. } => unreachable!(),
202 |         }
203 |     }
204 | }
205 | 
206 | impl<G> Forest for Bocage<G> {
207 |     type NodeRef = NodeHandle;
208 |     type LeafValue = u32;
209 | 
210 |     const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize = 2;
211 | 
212 |     #[inline]
213 |     fn begin_sum(&mut self) {
214 |         // nothing to do
215 |     }
216 | 
217 |     #[inline]
218 |     fn push_summand(&mut self, item: CompletedItem<Self::NodeRef>) {
219 |         self.graph.push(
220 |             Product {
221 |                 action: item.dot,
222 |                 left_factor: item.left_node,
223 |                 right_factor: item.right_node,
224 |             }
225 |             .compact(),
226 |         );
227 |         self.summand_count += 1;
228 |     }
229 | 
230 |     #[inline]
231 |     fn sum(&mut self, lhs_sym: Symbol, _origin: u32) -> Self::NodeRef {
232 |         let result = unsafe {
233 |             match self.summand_count {
234 |                 0 => hint::unreachable_unchecked(),
235 |                 1 => NodeHandle(self.graph.len() as u32 - 1),
236 |                 summand_count => {
237 |                     // Slower case: ambiguous node.
238 |                     let first_summand_idx = self.graph.len() - summand_count as usize;
239 |                     let first_summand = self.graph.get_unchecked(first_summand_idx).clone();
240 |                     self.graph.push(first_summand);
241 |                     *self.graph.get_unchecked_mut(first_summand_idx) = Sum {
242 |                         nonterminal: lhs_sym,
243 |                         count: self.summand_count as u32,
244 |                     }
245 |                     .compact();
246 |                     NodeHandle(first_summand_idx as u32)
247 |                 }
248 |             }
249 |         };
250 |         self.summand_count = 0;
251 |         result
252 |     }
253 | 
254 |     #[inline]
255 |     fn leaf(&mut self, token: Symbol, _pos: u32, value: Self::LeafValue) -> Self::NodeRef {
256 |         let result = NodeHandle(self.graph.len() as u32);
257 |         self.graph.push(
258 |             Evaluated {
259 |                 symbol: token,
260 |                 values: value,
261 |             }
262 |             .compact(),
263 |         );
264 |         result
265 |     }
266 | 
267 |     #[inline]
268 |     fn nulling(&self, token: Symbol) -> Self::NodeRef {
269 |         NodeHandle::nulling(token)
270 |     }
271 | }
272 | 


--------------------------------------------------------------------------------
/src/forest/bocage/node.rs:
--------------------------------------------------------------------------------
  1 | use std::cell::Cell;
  2 | use std::hint;
  3 | 
  4 | use cfg::symbol::Symbol;
  5 | 
  6 | pub use self::Node::*;
  7 | use self::Tag::*;
  8 | use forest::node_handle::{NodeHandle, NULL_HANDLE};
  9 | 
 10 | // Node variants `Sum`/`Product` are better known in literature as `OR`/`AND`.
 11 | #[derive(Copy, Clone, Debug)]
 12 | pub enum Node {
 13 |     Sum {
 14 |         /// 8 bytes.
 15 |         /// Invariant: count > 1.
 16 |         /// Invariant: This node can only be directly followed by `Product`.
 17 |         nonterminal: Symbol,
 18 |         count: u32,
 19 |     },
 20 |     Product {
 21 |         /// 12+ bytes.
 22 |         action: u32,
 23 |         left_factor: NodeHandle,
 24 |         right_factor: Option<NodeHandle>,
 25 |     },
 26 |     NullingLeaf {
 27 |         /// 4 bytes.
 28 |         symbol: Symbol,
 29 |     },
 30 |     Evaluated {
 31 |         /// 8 bytes.
 32 |         symbol: Symbol,
 33 |         values: u32,
 34 |     },
 35 | }
 36 | 
 37 | #[derive(Clone)]
 38 | pub struct CompactNode {
 39 |     cell: Cell<[CompactField; 3]>,
 40 | }
 41 | 
 42 | // Node variants `Sum`/`Product` are better known in literature as `OR`/`AND`.
 43 | #[derive(Copy, Clone)]
 44 | union CompactField {
 45 |     // sum
 46 |     nonterminal: Symbol,
 47 |     count: u32,
 48 | 
 49 |     // product
 50 |     action: u32,
 51 |     factor: NodeHandle,
 52 |     // right_factor: NodeHandle,
 53 | 
 54 |     // leaf
 55 |     symbol: Symbol,
 56 |     values: u32,
 57 | 
 58 |     // tag
 59 |     tag: u32,
 60 | }
 61 | 
 62 | #[derive(Copy, Clone)]
 63 | enum Tag {
 64 |     LeafTag = 0b00 << TAG_BIT,
 65 |     SumTag = 0b01 << TAG_BIT,
 66 |     ProductTag = 0b10 << TAG_BIT,
 67 | }
 68 | 
 69 | impl Tag {
 70 |     #[inline]
 71 |     fn from_u32(n: u32) -> Option<Self> {
 72 |         let n = n & TAG_MASK;
 73 |         if n == LeafTag.to_u32() {
 74 |             Some(LeafTag)
 75 |         } else if n == SumTag.to_u32() {
 76 |             Some(SumTag)
 77 |         } else if n == ProductTag.to_u32() {
 78 |             Some(ProductTag)
 79 |         } else {
 80 |             None
 81 |         }
 82 |     }
 83 | 
 84 |     #[inline]
 85 |     fn to_u32(&self) -> u32 {
 86 |         match *self {
 87 |             LeafTag => 0b00 << TAG_BIT,
 88 |             SumTag => 0b01 << TAG_BIT,
 89 |             ProductTag => 0b10 << TAG_BIT,
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | const TAG_BIT: usize = 30;
 95 | const TAG_MASK: u32 = 0b11 << TAG_BIT;
 96 | const NULL_VALUES: u32 = 0xFFFF_FFFF;
 97 | pub(super) const NULL_ACTION: u32 = !TAG_MASK;
 98 | 
 99 | impl Node {
100 |     #[inline]
101 |     pub(super) fn compact(self) -> CompactNode {
102 |         let mut fields = match self {
103 |             Product {
104 |                 left_factor,
105 |                 right_factor,
106 |                 action,
107 |             } => {
108 |                 let right_factor = right_factor.unwrap_or(NULL_HANDLE);
109 |                 [
110 |                     CompactField { action },
111 |                     CompactField {
112 |                         factor: left_factor,
113 |                     },
114 |                     CompactField {
115 |                         factor: right_factor,
116 |                     },
117 |                 ]
118 |             }
119 |             Sum { nonterminal, count } => [
120 |                 CompactField { nonterminal },
121 |                 CompactField { count },
122 |                 CompactField { tag: 0 },
123 |             ],
124 |             NullingLeaf { symbol } => [
125 |                 CompactField { symbol },
126 |                 CompactField {
127 |                     values: NULL_VALUES,
128 |                 },
129 |                 CompactField { tag: 0 },
130 |             ],
131 |             Evaluated { symbol, values } => [
132 |                 CompactField { symbol },
133 |                 CompactField { values },
134 |                 CompactField { tag: 0 },
135 |             ],
136 |         };
137 |         unsafe {
138 |             set_tag(&mut fields, self.tag());
139 |         }
140 |         CompactNode {
141 |             cell: Cell::new(fields),
142 |         }
143 |     }
144 | 
145 |     #[inline]
146 |     fn tag(&self) -> Tag {
147 |         match self {
148 |             Product { .. } => ProductTag,
149 |             Sum { .. } => SumTag,
150 |             NullingLeaf { .. } | Evaluated { .. } => LeafTag,
151 |         }
152 |     }
153 | }
154 | 
155 | impl CompactNode {
156 |     #[inline]
157 |     pub(super) fn set(&self, node: Node) {
158 |         self.cell.set(node.compact().cell.get());
159 |     }
160 | 
161 |     #[inline]
162 |     pub(super) fn expand(&self) -> Node {
163 |         let mut fields = self.cell.get();
164 |         unsafe {
165 |             let tag = get_and_erase_tag(&mut fields);
166 |             match tag {
167 |                 LeafTag => {
168 |                     if fields[1].values == NULL_VALUES {
169 |                         NullingLeaf {
170 |                             symbol: fields[0].symbol,
171 |                         }
172 |                     } else {
173 |                         Evaluated {
174 |                             symbol: fields[0].symbol,
175 |                             values: fields[1].values,
176 |                         }
177 |                     }
178 |                 }
179 |                 ProductTag => Product {
180 |                     action: fields[0].action,
181 |                     left_factor: fields[1].factor,
182 |                     right_factor: fields[2].factor.to_option(),
183 |                 },
184 |                 SumTag => Sum {
185 |                     nonterminal: fields[0].nonterminal,
186 |                     count: fields[1].count,
187 |                 },
188 |             }
189 |         }
190 |     }
191 | }
192 | 
193 | #[inline]
194 | unsafe fn unwrap_unchecked<T>(opt: Option<T>) -> T {
195 |     match opt {
196 |         Some(val) => val,
197 |         None => hint::unreachable_unchecked(),
198 |     }
199 | }
200 | 
201 | #[inline]
202 | unsafe fn set_tag(fields: &mut [CompactField; 3], tag: Tag) {
203 |     fields[0].tag |= tag.to_u32();
204 | }
205 | 
206 | #[inline]
207 | unsafe fn get_and_erase_tag(fields: &mut [CompactField; 3]) -> Tag {
208 |     let &mut CompactField { ref mut tag } = &mut fields[0];
209 |     let extract_tag = *tag;
210 |     *tag = *tag & !TAG_MASK;
211 |     unwrap_unchecked(Tag::from_u32(extract_tag))
212 | }
213 | 


--------------------------------------------------------------------------------
/src/forest/bocage/order.rs:
--------------------------------------------------------------------------------
 1 | use cfg::symbol::Symbol;
 2 | 
 3 | use super::node::CompactNode;
 4 | 
 5 | pub trait Order {
 6 |     /// Apply the order to sum node alternatives.
 7 |     fn sum<'b>(&mut self, alternatives: &'b [CompactNode]) -> &'b [CompactNode] {
 8 |         alternatives
 9 |     }
10 | 
11 |     /// Apply the order to product node factors.
12 |     fn product(&mut self, _factors: &[(Symbol, u32)]) -> Option<usize> {
13 |         None
14 |     }
15 | }
16 | 
17 | #[derive(Default)]
18 | pub struct NullOrder;
19 | 
20 | impl Order for NullOrder {}
21 | 
22 | impl NullOrder {
23 |     pub fn new() -> Self {
24 |         NullOrder
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/forest/bocage/traverse.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Borrow;
  2 | use std::slice;
  3 | 
  4 | use bit_vec;
  5 | use cfg::symbol::Symbol;
  6 | use ref_slice::ref_slice;
  7 | 
  8 | use forest::bocage::node::Node::*;
  9 | use forest::bocage::node::{CompactNode, Node};
 10 | use forest::node_handle::NodeHandle;
 11 | use forest::Bocage;
 12 | use grammar::InternalGrammar;
 13 | 
 14 | pub use self::HandleVariant::*;
 15 | 
 16 | impl<G> Bocage<G> {
 17 |     // Once node liveness is marked, you may traverse the nodes.
 18 |     pub fn traverse(&self) -> Traverse<G> {
 19 |         Traverse {
 20 |             bocage: self,
 21 |             graph_iter: self.graph.iter(),
 22 |             liveness_iter: self.gc.liveness.iter(),
 23 |             factor_stack: vec![],
 24 |             factor_traversal: vec![],
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | pub struct Traverse<'f, G> {
 30 |     bocage: &'f Bocage<G>,
 31 |     // main iterators
 32 |     graph_iter: slice::Iter<'f, CompactNode>,
 33 |     liveness_iter: bit_vec::Iter<'f>,
 34 |     // Space for unrolling factors
 35 |     factor_stack: Vec<(Symbol, u32)>,
 36 |     // Scratch space for traversal
 37 |     factor_traversal: Vec<NodeHandle>,
 38 | }
 39 | 
 40 | impl<'f, G> Traverse<'f, G>
 41 | where
 42 |     G: Borrow<InternalGrammar>,
 43 | {
 44 |     pub fn next_node<'t>(&'t mut self) -> Option<TraversalHandle<'f, 't, G>> {
 45 |         while let (Some(node), Some(alive)) = (self.graph_iter.next(), self.liveness_iter.next()) {
 46 |             if !alive {
 47 |                 continue;
 48 |             }
 49 |             match node.expand() {
 50 |                 Product { action, .. } => {
 51 |                     if self.bocage.is_transparent(action) {
 52 |                         continue;
 53 |                     }
 54 |                     return Some(TraversalHandle {
 55 |                         node,
 56 |                         symbol: self.bocage.grammar.borrow().get_lhs(action),
 57 |                         item: SumHandle(Products {
 58 |                             products: ref_slice(node).iter(),
 59 |                             traverse: self,
 60 |                         }),
 61 |                     });
 62 |                 }
 63 |                 Sum {
 64 |                     nonterminal: symbol,
 65 |                     count,
 66 |                 } => {
 67 |                     let products = self.graph_iter.as_slice()[..count as usize].iter();
 68 |                     for _ in 0..count {
 69 |                         self.graph_iter.next();
 70 |                         self.liveness_iter.next();
 71 |                     }
 72 |                     return Some(TraversalHandle {
 73 |                         node,
 74 |                         symbol,
 75 |                         item: SumHandle(Products {
 76 |                             products,
 77 |                             traverse: self,
 78 |                         }),
 79 |                     });
 80 |                 }
 81 |                 NullingLeaf { symbol } => {
 82 |                     return Some(TraversalHandle {
 83 |                         node,
 84 |                         symbol,
 85 |                         item: NullingHandle,
 86 |                     });
 87 |                 }
 88 |                 Evaluated { symbol, values } => {
 89 |                     return Some(TraversalHandle {
 90 |                         node,
 91 |                         symbol,
 92 |                         item: LeafHandle(values),
 93 |                     });
 94 |                 }
 95 |             }
 96 |         }
 97 |         None
 98 |     }
 99 | 
100 |     fn unfold_factors(&mut self, left: NodeHandle, right: Option<NodeHandle>) {
101 |         self.factor_stack.clear();
102 |         self.enqueue_for_unfold(left, right);
103 |         while let Some(node) = self.pop_for_unfold() {
104 |             match node {
105 |                 Product {
106 |                     left_factor,
107 |                     right_factor,
108 |                     ..
109 |                 } => {
110 |                     self.enqueue_for_unfold(left_factor, right_factor);
111 |                 }
112 |                 Evaluated { symbol, values } => {
113 |                     self.factor_stack.push((symbol, values));
114 |                 }
115 |                 _ => unreachable!(),
116 |             }
117 |         }
118 |     }
119 | 
120 |     fn enqueue_for_unfold(&mut self, left: NodeHandle, right: Option<NodeHandle>) {
121 |         if let Some(right) = right {
122 |             self.factor_traversal.push(right);
123 |         }
124 |         self.factor_traversal.push(left);
125 |     }
126 | 
127 |     fn pop_for_unfold(&mut self) -> Option<Node> {
128 |         self.factor_traversal.pop().map(|handle| {
129 |             let node = self.bocage.graph[handle.usize()].clone();
130 |             node.expand()
131 |         })
132 |     }
133 | }
134 | 
135 | pub struct TraversalHandle<'f, 't, G> {
136 |     pub node: &'f CompactNode,
137 |     pub symbol: Symbol,
138 |     pub item: HandleVariant<'f, 't, G>,
139 | }
140 | 
141 | pub enum HandleVariant<'f, 't, G> {
142 |     SumHandle(Products<'f, 't, G>),
143 |     NullingHandle,
144 |     LeafHandle(u32),
145 | }
146 | 
147 | pub struct Products<'f, 't, G> {
148 |     products: slice::Iter<'f, CompactNode>,
149 |     traverse: &'t mut Traverse<'f, G>,
150 | }
151 | 
152 | pub struct ProductHandle<'t> {
153 |     pub action: u32,
154 |     pub factors: &'t [(Symbol, u32)],
155 | }
156 | 
157 | impl<'f, 't, G> Products<'f, 't, G>
158 | where
159 |     G: Borrow<InternalGrammar>,
160 | {
161 |     pub fn next_product<'p>(&'p mut self) -> Option<ProductHandle> {
162 |         while let Some(node) = self.products.next() {
163 |             match node.expand() {
164 |                 Product {
165 |                     left_factor,
166 |                     right_factor,
167 |                     action,
168 |                 } => {
169 |                     let origin = self
170 |                         .traverse
171 |                         .bocage
172 |                         .grammar
173 |                         .borrow()
174 |                         .external_origin(action);
175 |                     if let Some(action) = origin {
176 |                         self.traverse.unfold_factors(left_factor, right_factor);
177 |                         return Some(ProductHandle {
178 |                             action,
179 |                             factors: &self.traverse.factor_stack[..],
180 |                         });
181 |                     }
182 |                 }
183 |                 _ => unreachable!(),
184 |             }
185 |         }
186 |         None
187 |     }
188 | }
189 | 
190 | impl<'f, 't, G> TraversalHandle<'f, 't, G> {
191 |     pub fn set_evaluation_result(&self, values: u32) {
192 |         self.node.set(Evaluated {
193 |             symbol: self.symbol,
194 |             values,
195 |         });
196 |     }
197 | }
198 | 


--------------------------------------------------------------------------------
/src/forest/compact_bocage/mod.rs:
--------------------------------------------------------------------------------
  1 | pub mod node;
  2 | pub mod order;
  3 | pub mod traverse;
  4 | 
  5 | use std::borrow::Borrow;
  6 | use std::hint;
  7 | 
  8 | use bit_vec::BitVec;
  9 | use cfg::symbol::Symbol;
 10 | 
 11 | use forest::node_handle::NodeHandle;
 12 | use forest::Forest;
 13 | use grammar::InternalGrammar;
 14 | use item::CompletedItem;
 15 | 
 16 | use self::node::Node::*;
 17 | use self::node::{Graph, Node, NULL_ACTION};
 18 | use self::order::Order;
 19 | 
 20 | pub struct CompactBocage<G> {
 21 |     pub(crate) graph: Graph,
 22 |     pub(crate) gc: MarkAndSweep,
 23 |     pub(crate) grammar: G,
 24 |     pub(crate) first_summand: NodeHandle,
 25 |     pub(crate) summand_count: u32,
 26 | }
 27 | 
 28 | pub(crate) struct MarkAndSweep {
 29 |     pub(crate) liveness: BitVec,
 30 |     // List for DFS and/or maybe relocation of stuff in the future.
 31 |     pub(crate) dfs: Vec<NodeHandle>,
 32 | }
 33 | 
 34 | impl<G> CompactBocage<G>
 35 | where
 36 |     G: Borrow<InternalGrammar>,
 37 | {
 38 |     pub fn new(grammar: G) -> Self {
 39 |         Self::with_capacities(grammar, 1024, 32)
 40 |     }
 41 | 
 42 |     pub fn with_capacities(grammar: G, graph_cap: usize, dfs_cap: usize) -> Self {
 43 |         let mut result = CompactBocage {
 44 |             graph: Graph::with_capacity(graph_cap),
 45 |             gc: MarkAndSweep {
 46 |                 liveness: BitVec::with_capacity(graph_cap),
 47 |                 dfs: Vec::with_capacity(dfs_cap),
 48 |             },
 49 |             grammar,
 50 |             summand_count: 0,
 51 |             first_summand: NodeHandle(0),
 52 |         };
 53 |         result.initialize_nulling();
 54 |         result
 55 |     }
 56 | 
 57 |     pub(crate) fn initialize_nulling(&mut self) {
 58 |         // TODO trivial grammar check
 59 |         // self.nulling_leaf_count = self.nulling_symbol_count();
 60 |         let nulling_leaf_count = self.nulling_symbol_count();
 61 |         // Ensure that `max` is not ridiculously large.
 62 |         assert!(nulling_leaf_count < (1 << 20), "invalid nullable symbol");
 63 |         let mut graph: Vec<Node> = (0..nulling_leaf_count)
 64 |             .map(|i| NullingLeaf {
 65 |                 symbol: Symbol::from(i),
 66 |             })
 67 |             .collect();
 68 |         for &(lhs, rhs0, rhs1) in self.grammar.borrow().eliminated_nulling_intermediate() {
 69 |             graph[lhs.usize()] = Product {
 70 |                 left_factor: NodeHandle::nulling(rhs0),
 71 |                 right_factor: Some(NodeHandle::nulling(rhs1)),
 72 |                 action: NULL_ACTION,
 73 |             };
 74 |         }
 75 |         let mut pos = 0;
 76 |         let mut relocation = vec![];
 77 |         for node in &graph {
 78 |             relocation.push(NodeHandle(pos));
 79 |             pos += node.classify(pos).size() as u32;
 80 |         }
 81 |         for node in graph {
 82 |             match node {
 83 |                 Product {
 84 |                     action,
 85 |                     left_factor,
 86 |                     right_factor,
 87 |                 } => {
 88 |                     self.graph.push(Product {
 89 |                         action,
 90 |                         left_factor: relocation[left_factor.usize()],
 91 |                         right_factor: right_factor.map(|f| relocation[f.usize()]),
 92 |                     });
 93 |                 }
 94 |                 other => {
 95 |                     self.graph.push(other);
 96 |                 }
 97 |             }
 98 |         }
 99 |     }
100 | 
101 |     fn nulling_symbol_count(&self) -> usize {
102 |         // why 1?
103 |         self.grammar
104 |             .borrow()
105 |             .max_nulling_symbol()
106 |             .map_or(1, |m| m + 1)
107 |     }
108 | 
109 |     #[inline]
110 |     pub fn mark_alive<O: Order>(&mut self, root: NodeHandle, _order: O) {
111 |         self.gc.liveness.clear();
112 |         self.gc.liveness.grow(self.graph.vec.len(), false);
113 |         self.gc.dfs.push(root);
114 |         while let Some(node) = self.gc.dfs.pop() {
115 |             self.gc.liveness.set(node.usize(), true);
116 |             let summands = CompactBocage::<G>::summands(&self.graph, node);
117 |             // let summands = order.sum(summands);
118 |             for summand in summands {
119 |                 // TODO: use order for products.
120 |                 self.gc.dfs_queue_factors(summand);
121 |             }
122 |         }
123 |     }
124 | 
125 |     #[inline]
126 |     fn summands<'a>(graph: &'a Graph, node: NodeHandle) -> impl Iterator<Item = Node> + 'a {
127 |         let mut iter = graph.iter_from(node);
128 |         match iter.peek() {
129 |             Some(Sum { count, .. }) => {
130 |                 iter.next();
131 |                 iter.take(count as usize)
132 |             }
133 |             _ => iter.take(1),
134 |         }
135 |     }
136 | 
137 |     #[inline]
138 |     fn process_product_tree_node(&self, mut node: Node) -> Node {
139 |         match node {
140 |             Product {
141 |                 ref mut left_factor,
142 |                 ref mut right_factor,
143 |                 action,
144 |             } => {
145 |                 if right_factor.is_none() {
146 |                     // Add omitted phantom syms here.
147 |                     if let Some((sym, dir)) = self.grammar.borrow().nulling(action) {
148 |                         let (left, right) = if dir {
149 |                             (*left_factor, NodeHandle::nulling(sym))
150 |                         } else {
151 |                             (NodeHandle::nulling(sym), *left_factor)
152 |                         };
153 |                         *left_factor = left;
154 |                         *right_factor = Some(right);
155 |                     }
156 |                 }
157 |             }
158 |             _ => {}
159 |         }
160 |         node
161 |     }
162 | 
163 |     #[inline]
164 |     pub(super) fn is_transparent(&self, action: u32) -> bool {
165 |         action == NULL_ACTION || self.grammar.borrow().external_origin(action).is_none()
166 |     }
167 | 
168 |     // fn mark_and_sweep(&mut self, root: NodeHandle) {
169 |     //     self.mark_alive(root);
170 |     //     self.sweep_garbage();
171 |     //     self.update_nulling_leaf_count();
172 |     // }
173 | 
174 |     // fn sweep_garbage(&mut self) {
175 |     //     let count = self.relocate_marked();
176 |     //     self.graph.truncate(count);
177 |     // }
178 | 
179 |     // fn update_nulling_leaf_count(&mut self) {
180 |     //     let prev_count = self.nulling_leaf_count;
181 |     //     self.nulling_leaf_count = self.gc.liveness.iter().take(prev_count).filter(|x| x).count();
182 |     // }
183 | 
184 |     // fn relocate_marked(&mut self) -> usize {
185 |     //     let mut destination = self.graph.iter();
186 |     //     let mut count = 0;
187 |     //     // ... TODO: relocate
188 |     //     for (alive, source) in self.gc.liveness.iter().zip(self.graph.iter()) {
189 |     //         if alive {
190 |     //             destination.next().unwrap().cell.set(*source);
191 |     //             count += 1;
192 |     //         }
193 |     //     }
194 |     //     count
195 |     // }
196 | }
197 | 
198 | impl MarkAndSweep {
199 |     #[inline]
200 |     fn dfs_queue_factors(&mut self, summand: Node) {
201 |         match summand {
202 |             Product {
203 |                 left_factor,
204 |                 right_factor,
205 |                 ..
206 |             } => {
207 |                 if let Some(factor) = right_factor {
208 |                     if let Some(false) = self.liveness.get(factor.usize()) {
209 |                         self.dfs.push(factor);
210 |                     }
211 |                 }
212 |                 if let Some(false) = self.liveness.get(left_factor.usize()) {
213 |                     self.dfs.push(left_factor);
214 |                 }
215 |             }
216 |             NullingLeaf { .. } | Evaluated { .. } => {}
217 |             Sum { .. } => unreachable!(),
218 |         }
219 |     }
220 | }
221 | 
222 | impl<G> Forest for CompactBocage<G>
223 | where
224 |     G: Borrow<InternalGrammar>,
225 | {
226 |     type NodeRef = NodeHandle;
227 |     type LeafValue = u32;
228 | 
229 |     const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize = 2;
230 | 
231 |     #[inline]
232 |     fn begin_sum(&mut self) {
233 |         self.first_summand = NodeHandle(self.graph.vec.len() as u32);
234 |     }
235 | 
236 |     #[inline]
237 |     fn push_summand(&mut self, item: CompletedItem<Self::NodeRef>) {
238 |         self.graph.push(self.process_product_tree_node(Product {
239 |             action: item.dot,
240 |             left_factor: item.left_node,
241 |             right_factor: item.right_node,
242 |         }));
243 |         self.summand_count += 1;
244 |     }
245 | 
246 |     #[inline]
247 |     fn sum(&mut self, lhs_sym: Symbol, _origin: u32) -> Self::NodeRef {
248 |         unsafe {
249 |             match self.summand_count {
250 |                 0 => hint::unreachable_unchecked(),
251 |                 1 => {}
252 |                 summand_count => {
253 |                     // Slower case: ambiguous node.
254 |                     let sum = Sum {
255 |                         nonterminal: lhs_sym,
256 |                         count: summand_count,
257 |                     };
258 |                     self.graph.set_up(self.first_summand, sum);
259 |                 }
260 |             }
261 |         };
262 |         let result = self.first_summand;
263 |         self.summand_count = 0;
264 |         result
265 |     }
266 | 
267 |     #[inline]
268 |     fn leaf(&mut self, token: Symbol, _pos: u32, _value: Self::LeafValue) -> Self::NodeRef {
269 |         self.graph.push(Evaluated { symbol: token })
270 |     }
271 | 
272 |     #[inline]
273 |     fn nulling(&self, token: Symbol) -> Self::NodeRef {
274 |         NodeHandle::nulling(token)
275 |     }
276 | }
277 | 


--------------------------------------------------------------------------------
/src/forest/compact_bocage/node.rs:
--------------------------------------------------------------------------------
  1 | use std::cell::Cell;
  2 | use std::hint;
  3 | 
  4 | use cfg::symbol::Symbol;
  5 | 
  6 | pub use self::Node::*;
  7 | use self::Tag::*;
  8 | use forest::node_handle::{NodeHandle, NULL_HANDLE};
  9 | 
 10 | pub struct Graph {
 11 |     pub(crate) vec: Vec<Cell<u16>>,
 12 | }
 13 | 
 14 | impl Graph {
 15 |     pub(crate) fn with_capacity(capacity: usize) -> Self {
 16 |         Graph {
 17 |             vec: Vec::with_capacity(capacity),
 18 |         }
 19 |     }
 20 | 
 21 |     pub(crate) fn push(&mut self, node: Node) -> NodeHandle {
 22 |         let position = self.vec.len() as u32;
 23 |         let (node_repr, size) = node.to_repr(position);
 24 |         unsafe {
 25 |             self.vec
 26 |                 .extend(node_repr.fields[..size].iter().cloned().map(Cell::new));
 27 |         }
 28 |         NodeHandle(position)
 29 |     }
 30 | 
 31 |     pub(crate) fn set_up(&mut self, mut handle: NodeHandle, node: Node) {
 32 |         let (node_repr, size) = node.to_repr(handle.0);
 33 |         let mut current_handle = handle;
 34 |         while current_handle.usize() < handle.usize() + size {
 35 |             let current_node = self.get(current_handle);
 36 |             self.push(current_node);
 37 |             current_handle.0 += current_node.classify(current_handle.0).size() as u32;
 38 |         }
 39 |         for i in 0..size {
 40 |             unsafe {
 41 |                 self.vec[handle.usize() + i].set(node_repr.fields[i]);
 42 |             }
 43 |         }
 44 |         handle.0 += size as u32;
 45 |         while handle.0 < current_handle.0 {
 46 |             self.vec[handle.usize()].set(NopTag.to_u16());
 47 |             handle.0 += 1;
 48 |         }
 49 |     }
 50 | 
 51 |     pub(crate) fn get(&self, handle: NodeHandle) -> Node {
 52 |         self.iter_from(handle).next().unwrap()
 53 |     }
 54 | 
 55 |     pub(crate) fn iter_from(&self, handle: NodeHandle) -> Iter {
 56 |         Iter {
 57 |             vec: &self.vec[..],
 58 |             handle,
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | #[derive(Clone, Copy)]
 64 | pub(crate) struct Iter<'a> {
 65 |     pub(crate) vec: &'a [Cell<u16>],
 66 |     pub(crate) handle: NodeHandle,
 67 | }
 68 | 
 69 | impl<'a> Iterator for Iter<'a> {
 70 |     type Item = Node;
 71 | 
 72 |     fn next(&mut self) -> Option<Node> {
 73 |         unsafe {
 74 |             let head = if let Some(head) = self.vec.get(self.handle.usize()).cloned() {
 75 |                 head.get()
 76 |             } else {
 77 |                 return None;
 78 |             };
 79 |             let (tag, head) = get_and_erase_tag(head);
 80 |             if let NopTag = tag {
 81 |                 self.handle.0 += 1;
 82 |                 self.next()
 83 |             } else {
 84 |                 let mut node_repr = NodeRepr { fields: [0; 6] };
 85 |                 node_repr.fields[0] = head;
 86 |                 let slice = &self.vec[self.handle.usize() + 1..self.handle.usize() + tag.size()];
 87 |                 for (i, val) in slice.iter().enumerate() {
 88 |                     node_repr.fields[1 + i] = val.get();
 89 |                 }
 90 |                 let result = node_repr.expand(tag, self.handle.0);
 91 |                 self.handle.0 += tag.size() as u32;
 92 |                 Some(result)
 93 |             }
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | impl<'a> Iter<'a> {
 99 |     #[inline]
100 |     pub(crate) fn peek(&mut self) -> Option<Node> {
101 |         self.clone().next()
102 |     }
103 | }
104 | 
105 | // Node variants `Sum`/`Product` are better known in literature as `OR`/`AND`.
106 | #[derive(Copy, Clone, Debug, Eq, PartialEq)]
107 | pub enum Node {
108 |     Sum {
109 |         /// 8 bytes.
110 |         /// Invariant: count > 1.
111 |         /// Invariant: This node can only be directly followed by `Product`.
112 |         count: u32,
113 |         nonterminal: Symbol,
114 |     },
115 |     Product {
116 |         /// 12+ bytes.
117 |         action: u32,
118 |         left_factor: NodeHandle,
119 |         right_factor: Option<NodeHandle>,
120 |     },
121 |     NullingLeaf {
122 |         /// 4 bytes.
123 |         symbol: Symbol,
124 |     },
125 |     Evaluated {
126 |         /// 4 bytes.
127 |         symbol: Symbol,
128 |     },
129 | }
130 | 
131 | #[derive(Clone, Copy)]
132 | union NodeRepr {
133 |     fields: [u16; 6],
134 |     small_sum: SmallSumRepr,
135 |     small_link: SmallLinkRepr,
136 |     medium_link: MediumLinkRepr,
137 |     small_product: SmallProductRepr,
138 |     small_leaf: SmallLeafRepr,
139 |     small_nulling_leaf: SmallNullingLeafRepr,
140 |     sum: SumRepr,
141 |     product: ProductRepr,
142 |     leaf: LeafRepr,
143 |     nop: NopRepr,
144 | }
145 | 
146 | #[derive(Clone, Copy)]
147 | struct SmallSumRepr {
148 |     nonterminal: u8,
149 |     // smaller (big end position)
150 |     count: u8,
151 | }
152 | 
153 | #[derive(Clone, Copy)]
154 | struct SumRepr {
155 |     count: u32,
156 |     nonterminal: Symbol,
157 | }
158 | 
159 | #[derive(Clone, Copy)]
160 | struct SmallLinkRepr {
161 |     action: u8,
162 |     // smaller (big end position)
163 |     distance: u8,
164 | }
165 | 
166 | #[derive(Clone, Copy)]
167 | struct MediumLinkRepr {
168 |     distance: u16,
169 |     action: u16,
170 | }
171 | 
172 | #[derive(Clone, Copy)]
173 | struct SmallProductRepr {
174 |     left_distance: u8,
175 |     // smaller (big end position)
176 |     right_distance: u8,
177 |     action: u16,
178 | }
179 | 
180 | #[derive(Clone, Copy)]
181 | #[repr(packed)]
182 | struct ProductRepr {
183 |     upper_action: u16,
184 |     lower_action: u16,
185 |     left_factor: NodeHandle,
186 |     right_factor: NodeHandle,
187 | }
188 | 
189 | #[derive(Clone, Copy)]
190 | struct SmallNullingLeafRepr {
191 |     symbol: u16,
192 | }
193 | 
194 | #[derive(Clone, Copy)]
195 | struct LeafRepr {
196 |     symbol: Symbol,
197 | }
198 | 
199 | #[derive(Clone, Copy)]
200 | struct SmallLeafRepr {
201 |     symbol: u16,
202 | }
203 | 
204 | #[derive(Clone, Copy)]
205 | struct NopRepr {
206 |     nop: u16,
207 | }
208 | 
209 | #[derive(Copy, Clone, Eq, PartialEq, Debug)]
210 | pub(super) enum Tag {
211 |     SmallSumTag = 0b000 << TAG_BIT,
212 |     SmallLinkTag = 0b001 << TAG_BIT,
213 |     MediumLinkTag = 0b010 << TAG_BIT,
214 |     SmallProductTag = 0b011 << TAG_BIT,
215 |     SmallLeafTag = 0b100 << TAG_BIT,
216 |     // SmallNonnullingLeaf = 0b1000 << (TAG_BIT - 1),
217 |     SmallNullingLeafTag = 0b1001 << (TAG_BIT - 1),
218 |     LeafTag = 0b101 << TAG_BIT,
219 |     SumTag = 0b111 << TAG_BIT,
220 |     ProductTag = 0b110 << TAG_BIT,
221 |     NopTag = 0b1111_1111_1111_1111,
222 | }
223 | 
224 | impl Tag {
225 |     #[inline]
226 |     fn from_u16(num: u16) -> Option<Self> {
227 |         let n = num & TAG_MASK;
228 |         if num == NopTag.to_u16() {
229 |             Some(NopTag)
230 |         } else if n == LeafTag.to_u16() {
231 |             Some(LeafTag)
232 |         } else if n == SumTag.to_u16() {
233 |             Some(SumTag)
234 |         } else if n == ProductTag.to_u16() {
235 |             Some(ProductTag)
236 |         } else if n == SmallSumTag.to_u16() {
237 |             Some(SmallSumTag)
238 |         } else if n == SmallLinkTag.to_u16() {
239 |             Some(SmallLinkTag)
240 |         } else if n == MediumLinkTag.to_u16() {
241 |             Some(MediumLinkTag)
242 |         } else if n == SmallProductTag.to_u16() {
243 |             Some(SmallProductTag)
244 |         } else if n == SmallLeafTag.to_u16() {
245 |             let n = num & SMALL_LEAF_TAG_MASK;
246 |             if n == SmallLeafTag.to_u16() {
247 |                 Some(SmallLeafTag)
248 |             } else if n == SmallNullingLeafTag.to_u16() {
249 |                 Some(SmallNullingLeafTag)
250 |             } else {
251 |                 None
252 |             }
253 |         } else {
254 |             None
255 |         }
256 |     }
257 | 
258 |     #[inline]
259 |     pub(super) fn to_u16(self) -> u16 {
260 |         match self {
261 |             SmallSumTag => 0b000 << TAG_BIT,
262 |             SmallLinkTag => 0b001 << TAG_BIT,
263 |             MediumLinkTag => 0b010 << TAG_BIT,
264 |             SmallProductTag => 0b011 << TAG_BIT,
265 |             SmallLeafTag => 0b100 << TAG_BIT,
266 |             // SmallNonnullingLeaf = 0b1000 << (TAG_BIT - 1),
267 |             SmallNullingLeafTag => 0b1001 << (TAG_BIT - 1),
268 |             LeafTag => 0b101 << TAG_BIT,
269 |             SumTag => 0b111 << TAG_BIT,
270 |             ProductTag => 0b110 << TAG_BIT,
271 |             NopTag => 0b1111_1111_1111_1111,
272 |         }
273 |     }
274 | 
275 |     #[inline]
276 |     fn mask(self) -> u16 {
277 |         match self {
278 |             SmallSumTag => TAG_MASK,
279 |             SmallLinkTag => TAG_MASK,
280 |             MediumLinkTag => TAG_MASK,
281 |             SmallProductTag => TAG_MASK,
282 |             SmallLeafTag => SMALL_LEAF_TAG_MASK,
283 |             // SmallNonnullingLeaf = 0b1000 << (TAG_BIT - 1),
284 |             SmallNullingLeafTag => SMALL_LEAF_TAG_MASK,
285 |             LeafTag => TAG_MASK,
286 |             SumTag => TAG_MASK,
287 |             ProductTag => TAG_MASK,
288 |             NopTag => 0b1111_1111_1111_1111,
289 |         }
290 |     }
291 | 
292 |     #[inline]
293 |     pub(super) fn size(self) -> usize {
294 |         match self {
295 |             SmallSumTag => 1,
296 |             SmallLinkTag => 1,
297 |             MediumLinkTag => 2,
298 |             SmallProductTag => 2,
299 |             SmallLeafTag => 1,
300 |             SmallNullingLeafTag => 1,
301 |             LeafTag => 4,
302 |             SumTag => 4,
303 |             ProductTag => 6,
304 |             NopTag => 1,
305 |         }
306 |     }
307 | }
308 | 
309 | const TAG_BIT: usize = 5 + 8;
310 | const TAG_MASK: u16 = 0b111 << TAG_BIT;
311 | const SMALL_LEAF_TAG_MASK: u16 = 0b1111 << (TAG_BIT - 1);
312 | pub(super) const NULL_ACTION: u32 = !((TAG_MASK as u32) << 16);
313 | 
314 | impl NodeRepr {
315 |     fn expand(self, tag: Tag, position: u32) -> Node {
316 |         unsafe {
317 |             match (self, tag) {
318 |                 (
319 |                     NodeRepr {
320 |                         small_sum: SmallSumRepr { nonterminal, count },
321 |                     },
322 |                     SmallSumTag,
323 |                 ) => Sum {
324 |                     nonterminal: Symbol::from(nonterminal as u32),
325 |                     count: count as u32,
326 |                 },
327 |                 (
328 |                     NodeRepr {
329 |                         sum: SumRepr { nonterminal, count },
330 |                     },
331 |                     SumTag,
332 |                 ) => Sum { nonterminal, count },
333 |                 (
334 |                     NodeRepr {
335 |                         small_link: SmallLinkRepr { distance, action },
336 |                     },
337 |                     SmallLinkTag,
338 |                 ) => Product {
339 |                     action: action as u32,
340 |                     left_factor: NodeHandle(position - distance as u32),
341 |                     right_factor: None,
342 |                 },
343 |                 (
344 |                     NodeRepr {
345 |                         medium_link: MediumLinkRepr { distance, action },
346 |                     },
347 |                     MediumLinkTag,
348 |                 ) => Product {
349 |                     action: action as u32,
350 |                     left_factor: NodeHandle(position - distance as u32),
351 |                     right_factor: None,
352 |                 },
353 |                 (
354 |                     NodeRepr {
355 |                         small_product:
356 |                             SmallProductRepr {
357 |                                 right_distance,
358 |                                 left_distance,
359 |                                 action,
360 |                             },
361 |                     },
362 |                     SmallProductTag,
363 |                 ) => Product {
364 |                     action: action as u32,
365 |                     left_factor: NodeHandle(position - left_distance as u32),
366 |                     right_factor: Some(NodeHandle(position - right_distance as u32)),
367 |                 },
368 |                 (
369 |                     NodeRepr {
370 |                         product:
371 |                             ProductRepr {
372 |                                 upper_action,
373 |                                 lower_action,
374 |                                 left_factor,
375 |                                 right_factor,
376 |                             },
377 |                     },
378 |                     ProductTag,
379 |                 ) => Product {
380 |                     action: (upper_action as u32) << 16 | (lower_action as u32),
381 |                     left_factor,
382 |                     right_factor: right_factor.to_option(),
383 |                 },
384 |                 (
385 |                     NodeRepr {
386 |                         small_nulling_leaf: SmallNullingLeafRepr { symbol },
387 |                     },
388 |                     SmallNullingLeafTag,
389 |                 ) => NullingLeaf {
390 |                     symbol: Symbol::from(symbol as u32),
391 |                 },
392 |                 (
393 |                     NodeRepr {
394 |                         small_leaf: SmallLeafRepr { symbol },
395 |                     },
396 |                     SmallLeafTag,
397 |                 ) => Evaluated {
398 |                     symbol: Symbol::from(symbol as u32),
399 |                 },
400 |                 (
401 |                     NodeRepr {
402 |                         leaf: LeafRepr { symbol },
403 |                     },
404 |                     LeafTag,
405 |                 ) => Evaluated { symbol },
406 |                 _ => unreachable!(),
407 |             }
408 |         }
409 |     }
410 | }
411 | 
412 | impl Node {
413 |     #[inline]
414 |     fn to_repr(self, position: u32) -> (NodeRepr, usize) {
415 |         let tag = self.classify(position);
416 |         unsafe {
417 |             let mut result = match (self, tag) {
418 |                 (Sum { nonterminal, count }, SmallSumTag) => NodeRepr {
419 |                     small_sum: SmallSumRepr {
420 |                         nonterminal: nonterminal.usize() as u8,
421 |                         count: count as u8,
422 |                     },
423 |                 },
424 |                 (Sum { nonterminal, count }, SumTag) => NodeRepr {
425 |                     sum: SumRepr { nonterminal, count },
426 |                 },
427 |                 (
428 |                     Product {
429 |                         left_factor,
430 |                         right_factor: None,
431 |                         action,
432 |                     },
433 |                     SmallLinkTag,
434 |                 ) => NodeRepr {
435 |                     small_link: SmallLinkRepr {
436 |                         distance: (position - left_factor.0) as u8,
437 |                         action: action as u8,
438 |                     },
439 |                 },
440 |                 (
441 |                     Product {
442 |                         left_factor,
443 |                         right_factor: None,
444 |                         action,
445 |                     },
446 |                     MediumLinkTag,
447 |                 ) => NodeRepr {
448 |                     medium_link: MediumLinkRepr {
449 |                         distance: (position - left_factor.0) as u16,
450 |                         action: action as u16,
451 |                     },
452 |                 },
453 |                 (
454 |                     Product {
455 |                         left_factor,
456 |                         right_factor: Some(right),
457 |                         action,
458 |                     },
459 |                     SmallProductTag,
460 |                 ) => NodeRepr {
461 |                     small_product: SmallProductRepr {
462 |                         right_distance: (position - right.0) as u8,
463 |                         left_distance: (position - left_factor.0) as u8,
464 |                         action: action as u16,
465 |                     },
466 |                 },
467 |                 (
468 |                     Product {
469 |                         left_factor,
470 |                         right_factor,
471 |                         action,
472 |                     },
473 |                     ProductTag,
474 |                 ) => NodeRepr {
475 |                     product: ProductRepr {
476 |                         upper_action: (action >> 16) as u16,
477 |                         lower_action: action as u16,
478 |                         left_factor,
479 |                         right_factor: right_factor.unwrap_or(NULL_HANDLE),
480 |                     },
481 |                 },
482 |                 (NullingLeaf { symbol }, SmallNullingLeafTag) => NodeRepr {
483 |                     small_nulling_leaf: SmallNullingLeafRepr {
484 |                         symbol: symbol.usize() as u16,
485 |                     },
486 |                 },
487 |                 (NullingLeaf { symbol }, LeafTag) => NodeRepr {
488 |                     leaf: LeafRepr { symbol },
489 |                 },
490 |                 (Evaluated { symbol }, SmallLeafTag) => NodeRepr {
491 |                     small_leaf: SmallLeafRepr {
492 |                         symbol: symbol.usize() as u16,
493 |                     },
494 |                 },
495 |                 (Evaluated { symbol }, LeafTag) => NodeRepr {
496 |                     leaf: LeafRepr { symbol },
497 |                 },
498 |                 _ => unreachable!(),
499 |             };
500 |             result.fields[0] |= tag.to_u16();
501 |             (result, tag.size())
502 |         }
503 |     }
504 | 
505 |     #[inline]
506 |     pub(super) fn classify(self, position: u32) -> Tag {
507 |         match self {
508 |             Product {
509 |                 left_factor,
510 |                 right_factor,
511 |                 action,
512 |             } => match right_factor {
513 |                 Some(handle) => {
514 |                     if position >= handle.0
515 |                         && position >= left_factor.0
516 |                         && position - handle.0 < (1 << 5)
517 |                         && position - left_factor.0 < (1 << 8)
518 |                         && action < (1 << 16)
519 |                     {
520 |                         SmallProductTag
521 |                     } else {
522 |                         ProductTag
523 |                     }
524 |                 }
525 |                 None => {
526 |                     if position >= left_factor.0
527 |                         && position - left_factor.0 < (1 << 5)
528 |                         && action < (1 << 8)
529 |                     {
530 |                         SmallLinkTag
531 |                     } else if position >= left_factor.0
532 |                         && position - left_factor.0 < (1 << (5 + 8))
533 |                         && action < (1 << 16)
534 |                     {
535 |                         MediumLinkTag
536 |                     } else {
537 |                         ProductTag
538 |                     }
539 |                 }
540 |             },
541 |             NullingLeaf { symbol } => {
542 |                 if symbol.usize() < (1 << (4 + 8)) {
543 |                     SmallNullingLeafTag
544 |                 } else {
545 |                     LeafTag
546 |                 }
547 |             }
548 |             Evaluated { symbol } => {
549 |                 if symbol.usize() < (1 << (4 + 8)) {
550 |                     SmallLeafTag
551 |                 } else {
552 |                     LeafTag
553 |                 }
554 |             }
555 |             Sum { nonterminal, count } => {
556 |                 if count < (1 << 5) && nonterminal.usize() < (1 << 8) {
557 |                     SmallSumTag
558 |                 } else {
559 |                     SumTag
560 |                 }
561 |             }
562 |         }
563 |     }
564 | }
565 | 
566 | #[inline]
567 | unsafe fn unwrap_unchecked<T>(opt: Option<T>) -> T {
568 |     match opt {
569 |         Some(val) => val,
570 |         None => hint::unreachable_unchecked(),
571 |     }
572 | }
573 | 
574 | #[inline]
575 | unsafe fn get_and_erase_tag(field: u16) -> (Tag, u16) {
576 |     let tag = unwrap_unchecked(Tag::from_u16(field));
577 |     (tag, field & !tag.mask())
578 | }
579 | 


--------------------------------------------------------------------------------
/src/forest/compact_bocage/order.rs:
--------------------------------------------------------------------------------
 1 | use std::cell::Cell;
 2 | 
 3 | use cfg::symbol::Symbol;
 4 | 
 5 | pub trait Order {
 6 |     /// Apply the order to sum node alternatives.
 7 |     fn sum<'b>(&mut self, alternatives: &'b [Cell<u16>]) -> &'b [Cell<u16>] {
 8 |         alternatives
 9 |     }
10 | 
11 |     /// Apply the order to product node factors.
12 |     fn product(&mut self, _factors: &[(Symbol, u32)]) -> Option<usize> {
13 |         None
14 |     }
15 | }
16 | 
17 | #[derive(Default)]
18 | pub struct NullOrder;
19 | 
20 | impl Order for NullOrder {}
21 | 
22 | impl NullOrder {
23 |     pub fn new() -> Self {
24 |         NullOrder
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/forest/compact_bocage/traverse.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Borrow;
  2 | use std::iter;
  3 | 
  4 | use bit_vec::BitVec;
  5 | use cfg::symbol::Symbol;
  6 | 
  7 | use forest::compact_bocage::node::Node::*;
  8 | use forest::compact_bocage::node::{Iter, Node, Tag};
  9 | use forest::node_handle::NodeHandle;
 10 | use forest::CompactBocage;
 11 | use grammar::InternalGrammar;
 12 | 
 13 | pub use self::HandleVariant::*;
 14 | 
 15 | impl<G> CompactBocage<G> {
 16 |     // Once node liveness is marked, you may traverse the nodes.
 17 |     pub fn traverse(&self) -> Traverse<G> {
 18 |         Traverse {
 19 |             bocage: self,
 20 |             graph_iter: self.graph.iter_from(NodeHandle(0)),
 21 |             liveness: &self.gc.liveness,
 22 |             factor_stack: vec![],
 23 |             factor_traversal: vec![],
 24 |         }
 25 |     }
 26 | }
 27 | 
 28 | pub struct Traverse<'f, G> {
 29 |     bocage: &'f CompactBocage<G>,
 30 |     // main iterators
 31 |     graph_iter: Iter<'f>,
 32 |     liveness: &'f BitVec,
 33 |     // Space for unrolling factors
 34 |     factor_stack: Vec<(Symbol, NodeHandle)>,
 35 |     // Scratch space for traversal
 36 |     factor_traversal: Vec<NodeHandle>,
 37 | }
 38 | 
 39 | impl<'f, G> Traverse<'f, G>
 40 | where
 41 |     G: Borrow<InternalGrammar>,
 42 | {
 43 |     pub fn next_node<'t>(&'t mut self) -> Option<TraversalHandle<'f, 't, G>> {
 44 |         while let Some(node) = self.graph_iter.peek() {
 45 |             let iter = self.graph_iter;
 46 |             let alive = self.liveness[self.graph_iter.handle.usize()];
 47 |             println!(
 48 |                 "next_node @{:?} {:?} {}",
 49 |                 self.graph_iter.handle, node, alive
 50 |             );
 51 |             self.graph_iter.next();
 52 |             if !alive {
 53 |                 continue;
 54 |             }
 55 |             match node {
 56 |                 Product { action, .. } => {
 57 |                     if self.bocage.is_transparent(action) {
 58 |                         continue;
 59 |                     }
 60 |                     let products = iter.take(1);
 61 |                     return Some(TraversalHandle {
 62 |                         iter,
 63 |                         symbol: self.bocage.grammar.borrow().get_lhs(action),
 64 |                         item: SumHandle(Products {
 65 |                             products,
 66 |                             traverse: self,
 67 |                         }),
 68 |                     });
 69 |                 }
 70 |                 Sum {
 71 |                     nonterminal: symbol,
 72 |                     count,
 73 |                 } => {
 74 |                     let products = self.graph_iter.take(count as usize);
 75 |                     for _ in 0..count {
 76 |                         let p = self.graph_iter.handle;
 77 |                         let n = self.graph_iter.next();
 78 |                         println!("next_node product @{:?} {:?}", p, n);
 79 |                     }
 80 |                     return Some(TraversalHandle {
 81 |                         iter,
 82 |                         symbol,
 83 |                         item: SumHandle(Products {
 84 |                             products,
 85 |                             traverse: self,
 86 |                         }),
 87 |                     });
 88 |                 }
 89 |                 NullingLeaf { symbol } => {
 90 |                     return Some(TraversalHandle {
 91 |                         iter,
 92 |                         symbol,
 93 |                         item: NullingHandle,
 94 |                     });
 95 |                 }
 96 |                 Evaluated { symbol, .. } => {
 97 |                     return Some(TraversalHandle {
 98 |                         iter,
 99 |                         symbol,
100 |                         item: LeafHandle,
101 |                     });
102 |                 }
103 |             }
104 |         }
105 |         None
106 |     }
107 | 
108 |     fn unfold_factors(&mut self, left: NodeHandle, right: Option<NodeHandle>) {
109 |         self.factor_stack.clear();
110 |         self.enqueue_for_unfold(left, right);
111 |         while let Some(node) = self.pop_for_unfold() {
112 |             match node {
113 |                 (
114 |                     Product {
115 |                         left_factor,
116 |                         right_factor,
117 |                         ..
118 |                     },
119 |                     _,
120 |                 ) => {
121 |                     self.enqueue_for_unfold(left_factor, right_factor);
122 |                 }
123 |                 (Evaluated { symbol }, handle) => {
124 |                     self.factor_stack.push((symbol, handle));
125 |                 }
126 |                 _ => unreachable!(),
127 |             }
128 |         }
129 |     }
130 | 
131 |     fn enqueue_for_unfold(&mut self, left: NodeHandle, right: Option<NodeHandle>) {
132 |         if let Some(right) = right {
133 |             self.factor_traversal.push(right);
134 |         }
135 |         self.factor_traversal.push(left);
136 |     }
137 | 
138 |     fn pop_for_unfold(&mut self) -> Option<(Node, NodeHandle)> {
139 |         self.factor_traversal
140 |             .pop()
141 |             .map(|handle| (self.bocage.graph.get(handle), handle))
142 |     }
143 | }
144 | 
145 | pub struct TraversalHandle<'f, 't, G> {
146 |     pub(crate) iter: Iter<'f>,
147 |     pub symbol: Symbol,
148 |     pub item: HandleVariant<'f, 't, G>,
149 | }
150 | 
151 | pub enum HandleVariant<'f, 't, G> {
152 |     SumHandle(Products<'f, 't, G>),
153 |     NullingHandle,
154 |     LeafHandle,
155 | }
156 | 
157 | pub struct Products<'f, 't, G> {
158 |     products: iter::Take<Iter<'f>>,
159 |     traverse: &'t mut Traverse<'f, G>,
160 | }
161 | 
162 | pub struct ProductHandle<'t> {
163 |     pub action: u32,
164 |     pub factors: &'t [(Symbol, NodeHandle)],
165 | }
166 | 
167 | impl<'f, 't, G> Products<'f, 't, G>
168 | where
169 |     G: Borrow<InternalGrammar>,
170 | {
171 |     pub fn next_product<'p>(&'p mut self) -> Option<ProductHandle> {
172 |         while let Some(node) = self.products.next() {
173 |             match node {
174 |                 Product {
175 |                     left_factor,
176 |                     right_factor,
177 |                     action,
178 |                 } => {
179 |                     let origin = self
180 |                         .traverse
181 |                         .bocage
182 |                         .grammar
183 |                         .borrow()
184 |                         .external_origin(action);
185 |                     if let Some(action) = origin {
186 |                         self.traverse.unfold_factors(left_factor, right_factor);
187 |                         return Some(ProductHandle {
188 |                             action,
189 |                             factors: &self.traverse.factor_stack[..],
190 |                         });
191 |                     }
192 |                 }
193 |                 _ => unreachable!(),
194 |             }
195 |         }
196 |         None
197 |     }
198 | }
199 | 
200 | impl<'f, 't, G> TraversalHandle<'f, 't, G> {
201 |     pub fn end_evaluation(&self) {
202 |         self.iter.vec[self.iter.handle.usize()].set(Tag::SmallLeafTag.to_u16());
203 |     }
204 | 
205 |     pub fn handle(&self) -> NodeHandle {
206 |         self.iter.handle
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/forest/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod bocage;
 2 | pub mod compact_bocage;
 3 | pub mod node_handle;
 4 | pub mod null_forest;
 5 | 
 6 | pub use self::bocage::Bocage;
 7 | pub use self::compact_bocage::CompactBocage;
 8 | pub use self::null_forest::NullForest;
 9 | 
10 | use cfg::Symbol;
11 | use std::fmt;
12 | 
13 | use item::CompletedItem;
14 | 
15 | pub trait Forest {
16 |     /// Reference to a node.
17 |     type NodeRef: Copy + fmt::Debug;
18 |     type LeafValue;
19 | 
20 |     const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize;
21 | 
22 |     fn begin_sum(&mut self);
23 | 
24 |     fn push_summand(&mut self, item: CompletedItem<Self::NodeRef>);
25 | 
26 |     fn sum(&mut self, lhs_sym: Symbol, origin: u32) -> Self::NodeRef;
27 | 
28 |     fn leaf(&mut self, token: Symbol, pos: u32, value: Self::LeafValue) -> Self::NodeRef;
29 | 
30 |     fn nulling(&self, token: Symbol) -> Self::NodeRef;
31 | }
32 | 


--------------------------------------------------------------------------------
/src/forest/node_handle.rs:
--------------------------------------------------------------------------------
 1 | use cfg::Symbol;
 2 | 
 3 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
 4 | pub struct NodeHandle(pub(crate) u32);
 5 | 
 6 | pub(super) const NULL_HANDLE: NodeHandle = NodeHandle(0xFFFF_FFFF);
 7 | 
 8 | impl NodeHandle {
 9 |     #[inline]
10 |     pub(super) fn nulling(symbol: Symbol) -> Self {
11 |         NodeHandle(symbol.usize() as u32)
12 |     }
13 | 
14 |     #[inline]
15 |     pub(super) fn usize(self) -> usize {
16 |         self.0 as usize
17 |     }
18 | 
19 |     #[inline]
20 |     pub(super) fn to_option(self) -> Option<NodeHandle> {
21 |         if self == NULL_HANDLE {
22 |             None
23 |         } else {
24 |             Some(self)
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/forest/null_forest.rs:
--------------------------------------------------------------------------------
 1 | use cfg::symbol::Symbol;
 2 | 
 3 | use forest::Forest;
 4 | use item::CompletedItem;
 5 | 
 6 | /// An empty forest.
 7 | pub struct NullForest;
 8 | 
 9 | impl Forest for NullForest {
10 |     type NodeRef = ();
11 |     type LeafValue = ();
12 | 
13 |     const FOREST_BYTES_PER_RECOGNIZER_BYTE: usize = 0;
14 | 
15 |     #[inline(always)]
16 |     fn leaf(&mut self, _: Symbol, _: u32, _: ()) {}
17 |     #[inline(always)]
18 |     fn nulling(&self, _: Symbol) {}
19 |     #[inline(always)]
20 |     fn begin_sum(&mut self) {}
21 |     #[inline(always)]
22 |     fn push_summand(&mut self, _item: CompletedItem<Self::NodeRef>) {}
23 |     #[inline(always)]
24 |     fn sum(&mut self, _lhs_sym: Symbol, _origin: u32) -> Self::NodeRef {
25 |         ()
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/grammar.rs:
--------------------------------------------------------------------------------
  1 | //! # Grammar transforms
  2 | //!
  3 | //! For efficiency, the recognizer works on processed grammars. Grammars described
  4 | //! by the user are transformed to meet the following properties:
  5 | //!
  6 | //! ## Property 1: Right-hand-sides of all rules have one symbol or two symbols.
  7 | //!
  8 | //! That is, all rules are of the form
  9 | //! `A ::= B C`
 10 | //! or
 11 | //! `D ::= E`.
 12 | //!
 13 | //! ### a) Right-hand-sides of all rules have at least one symbol.
 14 | //!
 15 | //! ### b) Right-hand-sides of all rules have at most two symbols.
 16 | //!
 17 | //! ## Property 2: There are no cycles among unit rules.
 18 | //!
 19 | //! That is, for any nonterminals `A`…`Z`, the set of rules doesn't have a subset
 20 | //! such as {`A ::= B`, `B ::= C`, …, `Y ::= Z`, `Z ::= A`}.
 21 | //!
 22 | //! In other words, for any nonterminal `A`, `A` doesn't derive `A` in two or more steps.
 23 | //!
 24 | //! ## Property 3: Dot numbers for pre-RHS0 dots are ordered by the LHS symbol IDs.
 25 | //!
 26 | //! ## Property 4: Dot numbers for pre-RHS1 dots are ordered by their RHS1 symbol IDs.
 27 | //!
 28 | //! ## Property 5: IDs of unit rules are smaller than IDs of rules which they predict.
 29 | //!
 30 | //! Internal symbols must be remapped, because this property may interfere with (4).
 31 | //! This property also requires (3).
 32 | //!
 33 | //! # Similarities to other parsers
 34 | //!
 35 | //! * 1.a) is required by some Earley parsers, including Marpa.
 36 | //! * 1.b) is required for recognition in CYK parsers, and in a roundabout way for construction
 37 | //!   of bocages.
 38 | //! * 2 is required by PEG and some other parsers.
 39 | //! * 3, 4 and 5 are specific to gearley.
 40 | //!
 41 | //! # Motivation for grammar transforms
 42 | //!
 43 | //! ## Property 1.a), one RHS symbol.
 44 | //!
 45 | //! Handling nullable rules is notoriously difficult in Earley parsers. Even the original Earley's
 46 | //! PhD paper contained an algorithm bug in handling nullable rules. We avoid nullability completely
 47 | //! by remembering all about our null removal and fixing the parse forest post-parse.
 48 | //!
 49 | //! ## Property 1.b), two RHS symbols.
 50 | //!
 51 | //! Think about it: if a rule has three right-hand side symbols, and all of them are nullable,
 52 | //! then property a) would produce 2*2*2 = 8 rules for each combination of missing null and present symbol.
 53 | //! We avoid exponential blowup not only here in grammar preprocessing, but also in the bocage by restricting
 54 | //! ourselves to no more than two symbols at a time.
 55 | //!
 56 | //! ## Property 2, no cycles among unit rules.
 57 | //!
 58 | //! ...
 59 | //!
 60 | //! ## Property 3, dot numbers for pre-RHS0 dots are ordered by the LHS symbol IDs.
 61 | //!
 62 | //! ...
 63 | //!
 64 | //! ## Property 4, dot numbers for pre-RHS1 dots are ordered by their RHS1 symbol IDs.
 65 | //!
 66 | //! ...
 67 | //!
 68 | //! ## Property 5, IDs of unit rules are smaller than IDs of rules which they predict.
 69 | //!
 70 | //! ...
 71 | 
 72 | use std::convert::TryInto;
 73 | use std::iter;
 74 | 
 75 | use bit_matrix::BitMatrix;
 76 | use bit_matrix::row::BitVecSlice;
 77 | use cfg::{ContextFreeRef, GrammarRule, Symbol};
 78 | use cfg::rule::container::RuleContainer;
 79 | use cfg::remap::Mapping;
 80 | use cfg::prediction::{FirstSetsCollector, FollowSets};
 81 | use optional::Optioned;
 82 | 
 83 | use item::Dot;
 84 | 
 85 | pub use cfg::earley::{Grammar, BinarizedGrammar};
 86 | pub use cfg::earley::history::History;
 87 | 
 88 | // # Future optimizations
 89 | //
 90 | // Store RHS1 and LHS in row-major instead of column-major order, so that the least significant bit
 91 | // tells us whether a dot is medial or completed. Or don't.
 92 | //
 93 | // Parameterize the representation over symbol type (u32, u16, u8).
 94 | 
 95 | #[derive(Serialize, Deserialize, Copy, Clone, Debug)]
 96 | pub(in super) struct PredictionTransition {
 97 |     pub symbol: Symbol,
 98 |     pub dot: Dot,
 99 | }
100 | 
101 | #[derive(Eq, PartialEq, Ord, PartialOrd)]
102 | pub(in super) enum MaybePostdot {
103 |     Binary(Symbol),
104 |     Unary,
105 | }
106 | 
107 | #[derive(Serialize, Deserialize, Clone, Default, Debug)]
108 | pub struct InternalGrammar {
109 |     start_sym: Symbol,
110 |     original_start_sym: Symbol,
111 |     has_trivial_derivation: bool,
112 |     eof_sym: Symbol,
113 |     dot_before_eof: Dot,
114 |     size: InternalGrammarSize,
115 | 
116 |     prediction_matrix: BitMatrix,
117 |     // Inverse prediction lookup.
118 |     unary_completions: Vec<PredictionTransition>,
119 |     unary_completion_index: Vec<u32>,
120 | 
121 |     binary_completions: Vec<PredictionTransition>,
122 |     binary_completion_index: Vec<u32>,
123 | 
124 |     follow_sets: BitMatrix,
125 |     first_sets: BitMatrix,
126 | 
127 |     // array of events
128 |     events_rhs: [Vec<Event>; 3],
129 |     // 2-dimensional arrays for tracing
130 |     trace_rhs: [Vec<Option<ExternalDottedRule>>; 3],
131 |     // Each rule can have only one eliminated nulling symbol.
132 |     nulling_eliminated: Vec<NullingEliminated>,
133 |     // Rules stored in column-major order.
134 |     lhs: Vec<Option<Symbol>>,
135 |     rhs0: Vec<Option<Symbol>>,
136 |     rhs1: Vec<Option<Symbol>>,
137 |     // Rule origin preserved for post-parse actions.
138 |     eval: Vec<ExternalOrigin>,
139 |     // Mapping between external and internal symbols.
140 |     sym_maps: Mapping,
141 |     nulling_intermediate_rules: Vec<NullingIntermediateRule>,
142 | }
143 | 
144 | #[derive(Serialize, Deserialize, Clone, Default, Debug)]
145 | pub struct InternalGrammarSize {
146 |     pub syms: usize,
147 |     pub rules: usize,
148 |     pub internal_syms: usize,
149 |     pub external_syms: usize,
150 | }
151 | 
152 | pub(in super) type ExternalDottedRule = (u32, u32);
153 | type ExternalOrigin = Option<u32>;
154 | type EventId = Optioned<u32>;
155 | type MinimalDistance = Optioned<u32>;
156 | pub(in super) type Event = (EventId, MinimalDistance);
157 | type NullingEliminated = Option<(Symbol, bool)>;
158 | type NullingIntermediateRule = (Symbol, Symbol, Symbol);
159 | type CompletionTable = Vec<Vec<PredictionTransition>>;
160 | 
161 | impl InternalGrammar {
162 |     fn new() -> Self {
163 |         Self::default()
164 |     }
165 | 
166 |     pub fn from_grammar(grammar: &Grammar) -> Self {
167 |         Self::from_binarized_grammar(grammar.binarize())
168 |     }
169 | 
170 |     pub fn from_binarized_grammar(grammar: BinarizedGrammar) -> Self {
171 |         let grammar = grammar.make_proper();
172 |         Self::from_proper_binarized_grammar(grammar)
173 |     }
174 | 
175 |     pub fn from_proper_binarized_grammar(grammar: BinarizedGrammar) -> Self {
176 |         let (mut grammar, nulling) = grammar.eliminate_nulling();
177 |         grammar.wrap_start();
178 |         Self::from_processed_grammar(grammar, &nulling)
179 |     }
180 | 
181 |     pub fn from_processed_grammar(grammar: BinarizedGrammar, nulling: &BinarizedGrammar) -> Self {
182 |         let (grammar, maps) = grammar.remap_symbols();
183 |         Self::from_processed_grammar_with_maps(grammar, maps, nulling)
184 |     }
185 | 
186 |     pub fn from_processed_grammar_with_maps(
187 |         mut grammar: BinarizedGrammar,
188 |         maps: Mapping,
189 |         nulling: &BinarizedGrammar)
190 |         -> Self
191 |     {
192 |         grammar.sort_by(|a, b| a.lhs().cmp(&b.lhs()));
193 |         let mut result = InternalGrammar::new();
194 |         result.populate_sizes(&grammar, &maps);
195 |         result.populate_maps(maps);
196 |         result.populate_grammar(&grammar);
197 |         result.populate_nulling(nulling);
198 |         trace!("populated grammar {:?}", &result);
199 |         result
200 |     }
201 | 
202 |     fn populate_sizes(&mut self, grammar: &BinarizedGrammar, maps: &Mapping) {
203 |         self.size = InternalGrammarSize {
204 |             rules: grammar.rules().count(),
205 |             syms: grammar.sym_source().num_syms(),
206 |             external_syms: maps.to_internal.len(),
207 |             internal_syms: maps.to_external.len(),
208 |         }
209 |     }
210 | 
211 |     fn populate_grammar(&mut self, grammar: &BinarizedGrammar) {
212 |         self.populate_start_sym(grammar);
213 |         self.populate_grammar_with_lhs(grammar);
214 |         self.populate_grammar_with_rhs(grammar);
215 |         self.populate_grammar_with_history(grammar);
216 |         self.populate_predictions(grammar);
217 |     }
218 | 
219 |     fn populate_start_sym(&mut self, grammar: &BinarizedGrammar) {
220 |         let start = grammar.start();
221 |         self.start_sym = start;
222 |         self.eof_sym = grammar.eof().unwrap();
223 |         self.dot_before_eof = grammar.dot_before_eof().unwrap();
224 |         self.original_start_sym = grammar.original_start().unwrap();
225 |     }
226 | 
227 |     fn populate_grammar_with_lhs(&mut self, grammar: &BinarizedGrammar) {
228 |         self.lhs.extend(grammar.rules().map(|rule| Some(rule.lhs())));
229 |     }
230 | 
231 |     fn populate_grammar_with_rhs(&mut self, grammar: &BinarizedGrammar) {
232 |         self.rhs0.extend(grammar.rules().map(|rule| rule.rhs().get(0).cloned()));
233 |         self.rhs1.extend(grammar.rules().map(|rule| rule.rhs().get(1).cloned()));
234 |     }
235 | 
236 |     fn populate_grammar_with_history(&mut self, grammar: &BinarizedGrammar) {
237 |         self.eval.extend(
238 |             grammar.rules().map(|rule| rule.history().origin())
239 |         );
240 |         self.nulling_eliminated.extend(
241 |             grammar.rules().map(|rule| rule.history().nullable())
242 |         );
243 | 
244 |         self.populate_grammar_with_events_rhs(grammar);
245 |         self.populate_grammar_with_trace_rhs(grammar);
246 |     }
247 | 
248 |     fn populate_grammar_with_events_rhs(&mut self, grammar: &BinarizedGrammar) {
249 |         self.events_rhs[1].extend(
250 |             grammar.rules().map(|rule| rule.history().dot(1).event_without_tracing())
251 |         );
252 |         self.events_rhs[2].extend(
253 |             grammar.rules().map(|rule| rule.history().dot(2).event_without_tracing())
254 |         );
255 |     }
256 | 
257 |     fn populate_grammar_with_trace_rhs(&mut self, grammar: &BinarizedGrammar) {
258 |         self.trace_rhs[1].extend(
259 |             grammar.rules().map(|rule| rule.history().dot(1).trace())
260 |         );
261 |         self.trace_rhs[2].extend(
262 |             grammar.rules().map(|rule| rule.history().dot(2).trace())
263 |         );
264 |     }
265 | 
266 |     fn populate_maps(&mut self, maps: Mapping) {
267 |         self.sym_maps = maps;
268 |     }
269 | 
270 |     fn populate_predictions(&mut self, grammar: &BinarizedGrammar) {
271 |         self.populate_prediction_matrix(grammar);
272 |         self.populate_prediction_events(grammar);
273 |         self.populate_completion_tables(grammar);
274 |         self.populate_follow_sets(grammar);
275 |     }
276 | 
277 |     fn populate_prediction_matrix(&mut self, grammar: &BinarizedGrammar) {
278 |         self.prediction_matrix = BitMatrix::new(self.size.syms, self.size.syms);
279 |         // Precompute DFA.
280 |         for rule in grammar.rules() {
281 |             self.prediction_matrix.set(rule.lhs().usize(), rule.rhs()[0].usize(), true);
282 |         }
283 |         self.prediction_matrix.transitive_closure();
284 |         // Prediction relation is reflexive.
285 |         for i in 0..self.size.syms {
286 |             self.prediction_matrix.set(i, i, true);
287 |         }
288 |     }
289 | 
290 |     fn populate_follow_sets(&mut self, grammar: &BinarizedGrammar) {
291 |         self.follow_sets = BitMatrix::new(self.size.syms, self.size.syms);
292 |         self.first_sets = BitMatrix::new(self.size.syms, self.size.syms);
293 |         let first_sets = FirstSetsCollector::new(grammar);
294 |         for (outer, inner) in first_sets.first_sets() {
295 |             for elem_inner in inner.into_iter() {
296 |                 if let Some(inner_sym) = elem_inner {
297 |                     self.first_sets.set(outer.usize(), inner_sym.usize(), true);
298 |                 }
299 |             }
300 |         }
301 |         self.first_sets.reflexive_closure();
302 |         let follow_sets = FollowSets::new(grammar, grammar.start(), first_sets.first_sets());
303 |         for (before, after) in follow_sets.follow_sets().into_iter() {
304 |             for elem_after in after.into_iter() {
305 |                 if let Some(after_sym) = elem_after {
306 |                     self.follow_sets.set(before.usize(), after_sym.usize(), true);
307 |                 }
308 |             }
309 |         }
310 |     }
311 | 
312 |     fn populate_completion_tables(&mut self, grammar: &BinarizedGrammar) {
313 |         self.populate_unary_completion_table(grammar);
314 |         self.populate_binary_completion_table(grammar);
315 |     }
316 | 
317 |     fn populate_unary_completion_table(&mut self, grammar: &BinarizedGrammar) {
318 |         let table = self.compute_unary_completion_table(grammar);
319 |         self.populate_unary_completion_index(&table);
320 |         self.populate_unary_completions(&table);
321 |     }
322 | 
323 |     fn compute_unary_completion_table(&self, grammar: &BinarizedGrammar) -> CompletionTable {
324 |         let mut table = iter::repeat(vec![]).take(self.size.syms).collect::<Vec<_>>();
325 | 
326 |         let mut unary_rules = vec![];
327 |         // check for ordering same as self.rules
328 |         for (dot, rule) in grammar.rules().enumerate() {
329 |             let is_unary = rule.rhs().get(1).is_none();
330 |             if is_unary {
331 |                 let rhs0_sym = rule.rhs()[0].usize();
332 |                 unary_rules.push((rhs0_sym, rule.lhs, dot));
333 |             }
334 |         }
335 |         for (rhs0_sym, lhs_sym, dot) in unary_rules.into_iter() {
336 |             table[rhs0_sym].push(PredictionTransition {
337 |                 symbol: lhs_sym,
338 |                 dot: dot as u32
339 |             });
340 |         }
341 |         table
342 |     }
343 | 
344 |     fn populate_unary_completion_index(&mut self, table: &CompletionTable) {
345 |         let mut current_idx = 0u32;
346 |         self.unary_completion_index.push(0u32);
347 |         self.unary_completion_index.extend(table.iter().map(|run| {
348 |             current_idx = current_idx.checked_add(run.len() as u32).unwrap();
349 |             current_idx
350 |         }));
351 |     }
352 | 
353 |     fn populate_unary_completions(&mut self, table: &CompletionTable) {
354 |         let iter_table = table.into_iter().flat_map(|v| v.into_iter());
355 |         self.unary_completions.extend(iter_table);
356 |     }
357 | 
358 |     fn populate_binary_completion_table(&mut self, grammar: &BinarizedGrammar) {
359 |         let table = self.compute_binary_completion_table(grammar);
360 |         self.populate_binary_completion_index(&table);
361 |         self.populate_binary_completions(&table);
362 |     }
363 | 
364 |     fn compute_binary_completion_table(&self, grammar: &BinarizedGrammar) -> CompletionTable {
365 |         let mut table = iter::repeat(vec![]).take(self.size.syms).collect::<Vec<_>>();
366 | 
367 |         let mut binary_rules = vec![];
368 |         // check for ordering same as self.rules
369 |         for (dot, rule) in grammar.rules().enumerate() {
370 |             let is_binary = rule.rhs().get(1).is_some();
371 |             if is_binary {
372 |                 let rhs0_sym = rule.rhs()[0].usize();
373 |                 binary_rules.push((rhs0_sym, rule.lhs, dot));
374 |             }
375 |         }
376 |         for (rhs0_sym, lhs_sym, dot) in binary_rules.into_iter() {
377 |             table[rhs0_sym].push(PredictionTransition {
378 |                 symbol: lhs_sym,
379 |                 dot: dot as u32
380 |             });
381 |         }
382 |         table
383 |     }
384 | 
385 |     fn populate_binary_completion_index(&mut self, table: &CompletionTable) {
386 |         let mut current_idx = 0u32;
387 |         self.binary_completion_index.push(0u32);
388 |         self.binary_completion_index.extend(table.iter().map(|run| {
389 |             current_idx = current_idx.checked_add(run.len() as u32).unwrap();
390 |             current_idx
391 |         }));
392 |     }
393 | 
394 |     fn populate_binary_completions(&mut self, table: &CompletionTable) {
395 |         let iter_table = table.into_iter().flat_map(|v| v.into_iter());
396 |         self.binary_completions.extend(iter_table);
397 |     }
398 | 
399 |     fn populate_prediction_events(&mut self, grammar: &BinarizedGrammar) {
400 |         let iter_events_pred = iter::repeat((Optioned::none(), Optioned::none())).take(self.size.syms);
401 |         self.events_rhs[0].extend(iter_events_pred);
402 |         let iter_trace_pred = iter::repeat(None).take(self.size.syms);
403 |         self.trace_rhs[0].extend(iter_trace_pred);
404 |         for rule in grammar.rules() {
405 |             if let Some(&(pred_event, pred_tracing)) = rule.history().dot(0).event().as_ref() {
406 |                 // Prediction event and tracing.
407 |                 self.events_rhs[0][rule.lhs().usize()] = (
408 |                     pred_event,
409 |                     rule.history().dot(0).distance()
410 |                 );
411 |                 self.trace_rhs[0][rule.lhs().usize()] = Some(pred_tracing);
412 |             }
413 |         }
414 |     }
415 | 
416 |     fn populate_nulling(&mut self, nulling: &BinarizedGrammar) {
417 |         self.has_trivial_derivation = !nulling.is_empty();
418 |         let iter_nulling_intermediate = nulling.rules().filter_map(|rule| {
419 |             if rule.history().origin().is_none() && rule.rhs().len() == 2 {
420 |                 Some((rule.lhs(), rule.rhs()[0], rule.rhs()[1]))
421 |             } else {
422 |                 None
423 |             }
424 |         });
425 |         self.nulling_intermediate_rules.extend(iter_nulling_intermediate);
426 |     }
427 | 
428 |     #[inline]
429 |     pub(in super) fn eof(&self) -> Symbol {
430 |         self.eof_sym
431 |     }
432 | 
433 |     #[inline]
434 |     pub(in super) fn can_follow(&self, before: Symbol, after: Option<Symbol>) -> bool {
435 |         let after = after.unwrap_or(self.eof()).usize();
436 |         self.follow_sets[(before.usize(), after)]
437 |     }
438 | 
439 |     #[inline]
440 |     pub(in super) fn first(&self, outer: Symbol, maybe_inner: Option<Symbol>) -> bool {
441 |         let inner = if let Some(inner) = maybe_inner {
442 |             inner
443 |         } else {
444 |             return outer == self.eof()
445 |         };
446 |         self.first_sets[(outer.usize(), inner.usize())]
447 |     }
448 | 
449 |     #[inline]
450 |     pub(in super) fn prediction_matrix(&self) -> &BitMatrix {
451 |         &self.prediction_matrix
452 |     }
453 | 
454 |     #[inline]
455 |     pub(in super) fn predict(&self, sym: Symbol) -> &BitVecSlice {
456 |         &self.prediction_matrix[sym.usize()]
457 |     }
458 | 
459 |     #[inline]
460 |     pub(in super) fn num_syms(&self) -> usize {
461 |         self.size.syms
462 |     }
463 | 
464 |     #[inline]
465 |     pub(in super) fn num_rules(&self) -> usize {
466 |         self.size.rules
467 |     }
468 | 
469 |     #[inline]
470 |     pub fn start_sym(&self) -> Symbol {
471 |         self.start_sym
472 |     }
473 | 
474 |     pub fn externalized_start_sym(&self) -> Symbol {
475 |         self.to_external(self.original_start_sym)
476 |     }
477 | 
478 |     #[inline]
479 |     pub(in super) fn has_trivial_derivation(&self) -> bool {
480 |         self.has_trivial_derivation
481 |     }
482 | 
483 |     #[inline]
484 |     pub(in super) fn nulling(&self, pos: u32) -> NullingEliminated {
485 |         self.nulling_eliminated.get(pos as usize).and_then(|&ne| ne)
486 |     }
487 | 
488 |     #[inline]
489 |     pub(in super) fn events(&self) -> (&[Event], &[Event]) {
490 |         (&self.events_rhs[1][..], &self.events_rhs[2][..])
491 |     }
492 | 
493 |     #[inline]
494 |     pub(in super) fn trace(&self) -> [&[Option<ExternalDottedRule>]; 3] {
495 |         [&self.trace_rhs[0][..], &self.trace_rhs[1][..], &self.trace_rhs[2][..]]
496 |     }
497 | 
498 |     #[inline]
499 |     pub(in super) fn get_rhs1(&self, dot: Dot) -> Option<Symbol> {
500 |         self.rhs1[dot as usize]
501 |     }
502 | 
503 |     #[inline]
504 |     pub(in super) fn get_rhs1_cmp(&self, dot: Dot) -> MaybePostdot {
505 |         match self.rhs1[dot as usize] {
506 |             None => MaybePostdot::Unary,
507 |             Some(rhs1) => MaybePostdot::Binary(rhs1),
508 |         }
509 |     }
510 | 
511 |     #[inline]
512 |     pub(in super) fn rhs1(&self) -> &[Option<Symbol>] {
513 |         &self.rhs1[..]
514 |     }
515 | 
516 |     #[inline]
517 |     pub(in super) fn get_lhs(&self, dot: Dot) -> Symbol {
518 |         self.lhs[dot as usize].unwrap()
519 |     }
520 | 
521 |     #[inline]
522 |     pub(in super) fn external_origin(&self, dot: Dot) -> ExternalOrigin {
523 |         self.eval.get(dot as usize).cloned().unwrap()
524 |     }
525 | 
526 |     pub(in super) fn eliminated_nulling_intermediate(&self) -> &[NullingIntermediateRule] {
527 |         &*self.nulling_intermediate_rules
528 |     }
529 | 
530 |     #[inline(always)]
531 |     pub(in super) fn unary_completions(&self, sym: Symbol) -> &[PredictionTransition] {
532 |         let idxs = &self.unary_completion_index[sym.usize() .. sym.usize() + 2];
533 |         let range = idxs[0] as usize .. idxs[1] as usize;
534 |         &self.unary_completions[range]
535 |     }
536 | 
537 |     #[inline(always)]
538 |     pub(in super) fn binary_completions(&self, sym: Symbol) -> &[PredictionTransition] {
539 |         let idxs = &self.binary_completion_index[sym.usize() .. sym.usize() + 2];
540 |         let range = idxs[0] as usize .. idxs[1] as usize;
541 |         &self.binary_completions[range]
542 |     }
543 | 
544 |     #[inline(always)]
545 |     pub(in super) fn to_internal(&self, symbol: Symbol) -> Option<Symbol> {
546 |         if self.sym_maps.to_internal.is_empty() {
547 |             Some(symbol)
548 |         } else {
549 |             self.sym_maps.to_internal[symbol.usize()]
550 |         }
551 |     }
552 | 
553 |     #[inline]
554 |     pub fn to_external(&self, symbol: Symbol) -> Symbol {
555 |         if self.sym_maps.to_external.is_empty() {
556 |             symbol
557 |         } else {
558 |             self.sym_maps.to_external[symbol.usize()]
559 |         }
560 |     }
561 | 
562 |     pub(in super) fn max_nulling_symbol(&self) -> Option<usize> {
563 |         (0 .. self.num_rules()).filter_map(|action| {
564 |             self.nulling(action as u32).map(|(sym, _dir)| sym.usize())
565 |         }).chain(
566 |             self.eliminated_nulling_intermediate().iter().map(|&(_lhs, rhs0, _rhs1)| {
567 |                 rhs0.usize()
568 |             })
569 |         ).max()
570 |     }
571 | 
572 |     pub(in super) fn dot_before_eof(&self) -> Dot {
573 |         self.dot_before_eof
574 |     }
575 | }
576 | 


--------------------------------------------------------------------------------
/src/item.rs:
--------------------------------------------------------------------------------
 1 | use std::cmp::Ordering;
 2 | 
 3 | pub type Dot = u32;
 4 | pub type Origin = u32;
 5 | 
 6 | #[derive(Clone, Copy, Debug)]
 7 | pub struct Item<N> {
 8 |     pub(in super) origin: Origin,
 9 |     pub(in super) dot: Dot,
10 |     pub node: N,
11 | }
12 | 
13 | #[derive(Clone, Copy, Debug)]
14 | pub struct CompletedItem<N> {
15 |     /// The dot position.
16 |     pub(in super) dot: Dot,
17 |     /// The origin location.
18 |     /// It comes after `dot`, so that (origin, dot) can be compared in a single instruction
19 |     /// on little-endian systems.
20 |     pub(in super) origin: Origin,
21 |     /// Left bocage node.
22 |     pub left_node: N,
23 |     /// Right bocage node.
24 |     pub right_node: Option<N>,
25 | }
26 | 
27 | #[derive(Clone, Copy, Debug)]
28 | pub struct CompletedItemLinked<N> {
29 |     /// Left item idx.
30 |     pub idx: u32,
31 |     /// Right bocage node.
32 |     pub node: Option<N>,
33 | }
34 | 
35 | impl<L> PartialEq for Item<L> {
36 |     fn eq(&self, other: &Self) -> bool {
37 |         (self.origin, self.dot) == (other.origin, other.dot)
38 |     }
39 | }
40 | 
41 | impl<L> Eq for Item<L> {}
42 | 
43 | impl<L> PartialOrd for Item<L> {
44 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
45 |         Some(self.cmp(other))
46 |     }
47 | }
48 | 
49 | impl<L> Ord for Item<L> {
50 |     fn cmp(&self, other: &Self) -> Ordering {
51 |         (self.origin, self.dot).cmp(&(other.origin, other.dot))
52 |     }
53 | }
54 | 
55 | impl<L> PartialEq for CompletedItem<L> {
56 |     fn eq(&self, other: &Self) -> bool {
57 |         (self.origin, self.dot) == (other.origin, other.dot)
58 |     }
59 | }
60 | 
61 | impl<L> Eq for CompletedItem<L> {}
62 | 
63 | impl<L> PartialOrd for CompletedItem<L> {
64 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
65 |         Some((self.origin, self.dot).cmp(&(other.origin, other.dot)))
66 |     }
67 | }
68 | 
69 | impl<L> Ord for CompletedItem<L> {
70 |     fn cmp(&self, other: &Self) -> Ordering {
71 |         (self.origin, self.dot).cmp(&(other.origin, other.dot))
72 |     }
73 | }
74 | 
75 | impl<N> Into<Item<N>> for CompletedItem<N> {
76 |     fn into(self) -> Item<N> {
77 |         Item {
78 |             origin: self.origin,
79 |             dot: self.dot,
80 |             node: self.left_node,
81 |         }
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![cfg_attr(feature = "cargo-clippy", allow(new_without_default_derive))]
 2 | 
 3 | #[macro_use]
 4 | extern crate log;
 5 | extern crate env_logger;
 6 | extern crate optional;
 7 | extern crate ref_slice;
 8 | extern crate bit_matrix;
 9 | extern crate bit_vec;
10 | extern crate cfg;
11 | extern crate serde;
12 | #[macro_use]
13 | extern crate serde_derive;
14 | extern crate num;
15 | extern crate num_derive;
16 | 
17 | pub mod debug;
18 | pub mod events;
19 | pub mod forest;
20 | pub mod grammar;
21 | pub mod item;
22 | pub mod memory_use;
23 | pub mod recognizer;
24 | pub mod binary_heap;
25 | 


--------------------------------------------------------------------------------
/src/memory_use.rs:
--------------------------------------------------------------------------------
  1 | use std::mem;
  2 | 
  3 | use bit_matrix::BitMatrix;
  4 | use bit_vec::BitVec;
  5 | 
  6 | use forest::node_handle::NodeHandle;
  7 | use forest::{Bocage, CompactBocage, Forest, NullForest};
  8 | use grammar::InternalGrammar;
  9 | use item::{CompletedItem, Item};
 10 | use recognizer::Recognizer;
 11 | 
 12 | const ITEMS_PER_SET: usize = 16;
 13 | 
 14 | pub trait MemoryUse {
 15 |     type Arg;
 16 | 
 17 |     fn memory_use(&self) -> usize;
 18 |     fn new_with_limit(arg: Self::Arg, memory_limit: usize) -> Self;
 19 | }
 20 | 
 21 | impl<'g, F> MemoryUse for Recognizer<'g, F>
 22 | where
 23 |     F: MemoryUse<Arg = &'g InternalGrammar> + Forest,
 24 | {
 25 |     type Arg = &'g InternalGrammar;
 26 | 
 27 |     fn memory_use(&self) -> usize {
 28 |         self.forest.memory_use()
 29 |             + self.predicted.memory_use()
 30 |             + self.medial.memory_use()
 31 |             + self.complete.memory_use()
 32 |             + self.indices.memory_use()
 33 |     }
 34 | 
 35 |     fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
 36 |         let forest_use_bytes = memory_limit * F::FOREST_BYTES_PER_RECOGNIZER_BYTE
 37 |             / (F::FOREST_BYTES_PER_RECOGNIZER_BYTE + 1);
 38 |         let complete_use = match memory_limit {
 39 |             0..=1000 => 16,
 40 |             1000..=500_000 => 32,
 41 |             500_000..=2_000_000 => 64,
 42 |             _ => 128,
 43 |         };
 44 |         let recognizer_use_bytes = memory_limit
 45 |             - forest_use_bytes
 46 |             - complete_use * mem::size_of::<CompletedItem<F::NodeRef>>();
 47 |         let bytes_per_set = mem::size_of::<usize>()
 48 |             + (grammar.num_syms() + 31) / 32 * 4
 49 |             + ITEMS_PER_SET * mem::size_of::<Item<F::NodeRef>>();
 50 |         let sets_use = recognizer_use_bytes / bytes_per_set;
 51 |         let mut recognizer = Recognizer {
 52 |             forest: F::new_with_limit(grammar, forest_use_bytes),
 53 |             grammar,
 54 |             // The initial location is 0.
 55 |             earleme: 0,
 56 |             // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0.
 57 |             indices: Vec::with_capacity(sets_use),
 58 |             current_medial_start: 0,
 59 |             // Reserve some capacity for vectors.
 60 |             predicted: BitMatrix::new(sets_use, grammar.num_syms()),
 61 |             medial: Vec::with_capacity(sets_use * ITEMS_PER_SET),
 62 |             complete: Vec::with_capacity(complete_use),
 63 |             lookahead_hint: None,
 64 |         };
 65 |         recognizer.indices.push(0);
 66 |         recognizer.indices.push(0);
 67 |         recognizer.predict(grammar.start_sym());
 68 |         recognizer
 69 |     }
 70 | }
 71 | 
 72 | impl<'g, F> Recognizer<'g, F>
 73 | where
 74 |     F: MemoryUse<Arg = &'g InternalGrammar> + Forest,
 75 | {
 76 |     #[inline]
 77 |     pub fn new_with_hint(grammar: &'g InternalGrammar, tokens: usize) -> Self {
 78 |         let forest_use_bytes = tokens * 16;
 79 |         let complete_use = match tokens {
 80 |             0..=200 => 16,
 81 |             200..=10_000 => 32,
 82 |             10_000..=100_000 => 64,
 83 |             _ => 128,
 84 |         };
 85 |         let mut recognizer = Recognizer {
 86 |             forest: F::new_with_limit(grammar, forest_use_bytes),
 87 |             grammar,
 88 |             // The initial location is 0.
 89 |             earleme: 0,
 90 |             // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0.
 91 |             indices: Vec::with_capacity(tokens + 1),
 92 |             current_medial_start: 0,
 93 |             // Reserve some capacity for vectors.
 94 |             predicted: BitMatrix::new(tokens + 1, grammar.num_syms()),
 95 |             medial: Vec::with_capacity(tokens * ITEMS_PER_SET),
 96 |             complete: Vec::with_capacity(complete_use),
 97 |             lookahead_hint: None,
 98 |         };
 99 |         recognizer.indices.push(0);
100 |         recognizer.indices.push(0);
101 |         recognizer.predict(grammar.start_sym());
102 |         recognizer
103 |     }
104 | }
105 | 
106 | impl<'g> MemoryUse for Recognizer<'g, NullForest> {
107 |     type Arg = &'g InternalGrammar;
108 | 
109 |     fn memory_use(&self) -> usize {
110 |         self.forest.memory_use()
111 |             + self.predicted.memory_use()
112 |             + self.medial.memory_use()
113 |             + self.complete.memory_use()
114 |             + self.indices.memory_use()
115 |     }
116 | 
117 |     fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
118 |         let complete_use = match memory_limit {
119 |             0..=1000 => 16,
120 |             1000..=500_000 => 32,
121 |             500_000..=2_000_000 => 64,
122 |             _ => 128,
123 |         };
124 |         let recognizer_use_bytes =
125 |             memory_limit - complete_use * mem::size_of::<CompletedItem<()>>();
126 |         let bytes_per_set = mem::size_of::<usize>()
127 |             + (grammar.num_syms() + 31) / 32 * 4
128 |             + ITEMS_PER_SET * mem::size_of::<Item<()>>();
129 |         let sets_use = recognizer_use_bytes / bytes_per_set;
130 |         let mut recognizer = Recognizer {
131 |             forest: NullForest,
132 |             grammar,
133 |             // The initial location is 0.
134 |             earleme: 0,
135 |             // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0.
136 |             indices: Vec::with_capacity(sets_use),
137 |             current_medial_start: 0,
138 |             // Reserve some capacity for vectors.
139 |             predicted: BitMatrix::new(sets_use, grammar.num_syms()),
140 |             medial: Vec::with_capacity(sets_use * ITEMS_PER_SET),
141 |             complete: Vec::with_capacity(complete_use),
142 |             lookahead_hint: None,
143 |         };
144 |         recognizer.indices.push(0);
145 |         recognizer.indices.push(0);
146 |         recognizer.predict(grammar.start_sym());
147 |         recognizer
148 |     }
149 | }
150 | 
151 | impl<T> MemoryUse for Vec<T> {
152 |     type Arg = ();
153 | 
154 |     fn memory_use(&self) -> usize {
155 |         self.capacity() * mem::size_of::<T>()
156 |     }
157 | 
158 |     fn new_with_limit(_arg: (), memory_limit: usize) -> Self {
159 |         let capacity = memory_limit / mem::size_of::<T>();
160 |         Self::with_capacity(capacity)
161 |     }
162 | }
163 | 
164 | impl MemoryUse for BitMatrix {
165 |     type Arg = usize;
166 | 
167 |     fn memory_use(&self) -> usize {
168 |         let (rows, columns) = self.size();
169 |         rows * ((columns + 31) / 32 * 4)
170 |     }
171 | 
172 |     fn new_with_limit(num_columns: usize, memory_limit: usize) -> Self {
173 |         let row_size = (num_columns + 31) / 32 * 4;
174 |         let capacity = memory_limit / row_size;
175 |         Self::new(capacity, num_columns)
176 |     }
177 | }
178 | 
179 | impl MemoryUse for BitVec {
180 |     type Arg = ();
181 | 
182 |     fn memory_use(&self) -> usize {
183 |         (self.capacity() + 31) / 32 * 4
184 |     }
185 | 
186 |     fn new_with_limit(_arg: (), memory_limit: usize) -> Self {
187 |         let capacity = memory_limit * 8;
188 |         Self::with_capacity(capacity)
189 |     }
190 | }
191 | 
192 | impl MemoryUse for NullForest {
193 |     type Arg = ();
194 | 
195 |     fn memory_use(&self) -> usize {
196 |         0
197 |     }
198 | 
199 |     fn new_with_limit(_arg: (), _memory_limit: usize) -> Self {
200 |         NullForest
201 |     }
202 | }
203 | 
204 | impl<'g> MemoryUse for Bocage<&'g InternalGrammar> {
205 |     type Arg = &'g InternalGrammar;
206 | 
207 |     fn memory_use(&self) -> usize {
208 |         self.graph.memory_use() + self.gc.liveness.memory_use() + self.gc.dfs.memory_use()
209 |     }
210 | 
211 |     fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
212 |         let dfs_size = match memory_limit {
213 |             0..=1000 => 8,
214 |             1000..=100_000 => 32,
215 |             _ => 64,
216 |         };
217 |         let remaining_use = memory_limit - dfs_size * std::mem::size_of::<NodeHandle>();
218 |         let bytes_per_node = mem::size_of::<u16>() as f32 + 1.0 / 8.0;
219 |         let graph_size = (remaining_use as f32 / bytes_per_node) as usize;
220 |         Bocage::with_capacities(grammar, graph_size, dfs_size)
221 |     }
222 | }
223 | 
224 | impl<'g> MemoryUse for CompactBocage<&'g InternalGrammar> {
225 |     type Arg = &'g InternalGrammar;
226 | 
227 |     fn memory_use(&self) -> usize {
228 |         self.graph.vec.memory_use() + self.gc.liveness.memory_use() + self.gc.dfs.memory_use()
229 |     }
230 | 
231 |     fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
232 |         let dfs_size = match memory_limit {
233 |             0..=1000 => 8,
234 |             1000..=100_000 => 32,
235 |             _ => 64,
236 |         };
237 |         let remaining_use = memory_limit - dfs_size * std::mem::size_of::<NodeHandle>();
238 |         let bytes_per_node = mem::size_of::<u16>() as f32 + 1.0 / 8.0;
239 |         let graph_size = (remaining_use as f32 / bytes_per_node) as usize;
240 |         CompactBocage::with_capacities(grammar, graph_size, dfs_size)
241 |     }
242 | }
243 | 


--------------------------------------------------------------------------------
/src/recognizer.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::Ordering;
  2 | use std::ops::Range;
  3 | 
  4 | use bit_matrix::BitMatrix;
  5 | use bit_matrix::row::BitVecSlice;
  6 | use cfg::*;
  7 | 
  8 | use events::{MedialItems, PredictedSymbols};
  9 | use forest::{Forest, NullForest};
 10 | use grammar::InternalGrammar;
 11 | use item::{CompletedItem, CompletedItemLinked, Item, Origin};
 12 | // use policy::{PerformancePolicy, NullPerformancePolicy};
 13 | 
 14 | /// The recognizer implements the Earley algorithm. It parses the given input according
 15 | /// to the `grammar`. The parse result is constructed inside the `forest`.
 16 | ///
 17 | /// To save memory, it only retains those parts of the Earley table that may be useful
 18 | /// in the future.
 19 | pub struct Recognizer<'g, F = NullForest>
 20 | where
 21 |     F: Forest,
 22 | {
 23 |     // The forest.
 24 |     pub forest: F,
 25 |     // The grammar.
 26 |     pub grammar: &'g InternalGrammar,
 27 |     // The policy.
 28 |     // policy: P,
 29 | 
 30 |     // Chart's items.
 31 | 
 32 |     // Predicted items are stored in a bit matrix. The bit matrix has a row for every Earley set.
 33 |     //
 34 |     // Length of `predicted` is earleme + 1, so that earleme points to the last
 35 |     pub(super) predicted: BitMatrix,
 36 | 
 37 |     // Medial items.
 38 |     //
 39 |     // N.B. This structure could be moved into its own module.
 40 |     pub(super) medial: Vec<Item<F::NodeRef>>,
 41 |     // Gearley's secret sauce: we have a binary heap for online sorting.
 42 |     //
 43 |     // Completed items are stored for the latest Earley set.
 44 |     // They are ordered by (origin, dot), starting with highest
 45 |     // origin and dot. The creation of a completed item can only be caused
 46 |     // by a scan or a completion of an item that has a higher (origin, dot)
 47 |     // pair value.
 48 |     pub(super) complete: Vec<CompletedItemLinked<F::NodeRef>>,
 49 | 
 50 |     // Chart's indices. They point to the beginning of each Earley set.
 51 |     //
 52 |     // Length of `indices` is `earleme` + 2, so that earleme points to
 53 |     // the beginning of the range of indices for the last range.
 54 |     pub(super) indices: Vec<usize>,
 55 |     // Index that points to the beginning of the latest set. Equivalent to
 56 |     // the last element of `indices`.
 57 |     pub(super) current_medial_start: usize,
 58 | 
 59 |     // The input location.
 60 |     pub(super) earleme: usize,
 61 | 
 62 |     pub(super) lookahead_hint: Option<Option<Symbol>>,
 63 | }
 64 | 
 65 | impl<'g, F> Recognizer<'g, F>
 66 | where
 67 |     F: Forest,
 68 | {
 69 |     /// Creates a new recognizer for a given grammar and forest. The recognizer has an initial
 70 |     /// Earley set that predicts the grammar's start symbol.
 71 |     pub fn new(grammar: &'g InternalGrammar, forest: F) -> Recognizer<'g, F> {
 72 |         let mut recognizer = Recognizer {
 73 |             forest,
 74 |             grammar,
 75 |             // The initial location is 0.
 76 |             earleme: 0,
 77 |             // The first Earley set begins at 0 and ends at 0. The second Earley set begins at 0.
 78 |             indices: vec![0, 0],
 79 |             current_medial_start: 0,
 80 |             // Reserve some capacity for vectors.
 81 |             predicted: BitMatrix::new(8, grammar.num_syms()),
 82 |             medial: Vec::with_capacity(256),
 83 |             complete: Vec::with_capacity(32),
 84 |             lookahead_hint: None,
 85 |         };
 86 |         recognizer.predict(grammar.start_sym());
 87 |         recognizer
 88 |     }
 89 | 
 90 |     /// Makes the current Earley set predict a given symbol.
 91 |     pub fn predict(&mut self, symbol: Symbol) {
 92 |         self.predicted[self.earleme].predict(symbol, self.grammar.predict(symbol));
 93 |     }
 94 | 
 95 |     pub fn begin_earleme(&mut self) {
 96 |         // nothing to do
 97 |     }
 98 | 
 99 |     /// Reads a token. Creates a leaf bocage node with the given value. After reading one or more
100 |     /// tokens, the parse can be advanced.
101 |     pub fn scan(&mut self, symbol: Symbol, value: F::LeafValue) {
102 |         // This method is a part of the scan pass.
103 |         if let Some(internal) = self.grammar.to_internal(symbol) {
104 |             let earleme = self.earleme as Origin;
105 |             // Add a leaf node to the forest with the given value.
106 |             let node = self.forest.leaf(symbol, earleme + 1, value);
107 |             self.complete(earleme, internal, node);
108 |         }
109 |     }
110 | 
111 |     #[inline]
112 |     pub fn lookahead_hint(&mut self, lookahead: Option<Symbol>) {
113 |         let to_internal = |sym| self.grammar.to_internal(sym).unwrap();
114 |         self.lookahead_hint = Some(lookahead.map(to_internal));
115 |     }
116 | 
117 |     /// Advances the parse. Calling this method may set the finished node, which can be accessed
118 |     /// through the `finished_node` method.
119 |     pub fn end_earleme(&mut self) -> bool {
120 |         if self.is_exhausted() {
121 |             false
122 |         } else {
123 |             // Completion pass, which saves successful parses.
124 |             self.complete_all_sums_entirely();
125 |             // Do the rest.
126 |             self.advance_without_completion();
127 |             true
128 |         }
129 |     }
130 | 
131 |     /// Advances the parse. Omits the completion pass, which should be done through
132 |     /// the `completions` method. Keep in mind that calling this method may not set
133 |     /// the finished node, which should be tracked externally.
134 |     pub fn advance_without_completion(&mut self) {
135 |         self.sort_medial_items();
136 |         self.remove_unary_medial_items();
137 |         self.remove_unreachable_sets();
138 |         self.earleme += 1;
139 |         // `earleme` is now at least 1.
140 |         // Prediction pass.
141 |         self.prediction_pass();
142 |         // Store the index.
143 |         self.current_medial_start = self.medial.len();
144 |         self.indices.push(self.current_medial_start);
145 |     }
146 | 
147 |     /// Checks whether the recognizer is exhausted. The recognizer is exhausted when it can't accept
148 |     /// more input.
149 |     #[inline]
150 |     pub fn is_exhausted(&self) -> bool {
151 |         self.medial.len() == self.current_medial_start && self.complete.is_empty()
152 |     }
153 | 
154 |     /// Sorts medial items with deduplication.
155 |     fn sort_medial_items(&mut self) {
156 |         let grammar = &self.grammar;
157 |         // Build index by postdot
158 |         // These medial positions themselves are sorted by postdot symbol.
159 |         self.medial[self.current_medial_start..].sort_unstable_by(|a, b| {
160 |             (grammar.get_rhs1_cmp(a.dot), a.dot, a.origin).cmp(&(
161 |                 grammar.get_rhs1_cmp(b.dot),
162 |                 b.dot,
163 |                 b.origin,
164 |             ))
165 |         });
166 |     }
167 | 
168 |     fn remove_unary_medial_items(&mut self) {
169 |         while let Some(&item) = self.medial.last() {
170 |             if self.grammar.get_rhs1(item.dot).is_some() {
171 |                 break;
172 |             }
173 |             self.medial.pop();
174 |         }
175 |     }
176 | 
177 |     fn remove_unreachable_sets(&mut self) {
178 |         let origin = |item: &Item<F::NodeRef>| item.origin as usize;
179 |         let max_origin = self.medial[self.current_medial_start..]
180 |             .iter()
181 |             .map(origin)
182 |             .max()
183 |             .unwrap_or(self.earleme);
184 |         let diff = self.earleme - max_origin;
185 |         if diff <= 1 {
186 |             return;
187 |         }
188 |         // | 0 | 1 | 2 | 3 |
189 |         //               ^ current_medial_start
190 |         //   _________diff = 2
191 |         //       ____drop = 1
192 |         //           ^ self.earleme = 2
193 |         //   ^ m = 0
194 |         // | 0 | 1 | 2 |
195 |         let drop = diff - 1;
196 |         let new_medial_start = self.indices[self.indices.len() - 1 - drop];
197 |         self.indices.truncate(self.indices.len() - drop);
198 |         let current_medial_length = self.medial.len() - self.current_medial_start;
199 |         for i in 0..current_medial_length {
200 |             self.medial[new_medial_start as usize + i] = self.medial[self.current_medial_start + i];
201 |         }
202 |         self.medial
203 |             .truncate(new_medial_start as usize + current_medial_length);
204 |         self.current_medial_start = new_medial_start as usize;
205 |         self.earleme -= drop;
206 |         self.predicted.truncate(self.earleme + 1);
207 |         for dst in self.predicted[self.earleme].iter_mut() {
208 |             *dst = 0;
209 |         }
210 |     }
211 | 
212 |     /// Performs the prediction pass.
213 |     fn prediction_pass(&mut self) {
214 |         // Add a row to the matrix.
215 |         self.predicted.grow(1, false);
216 |         // Iterate through medial items in the current set.
217 |         let iter = self.medial[self.current_medial_start..].iter();
218 |         // For each medial item in the current set, predict its postdot symbol.
219 |         let row = &mut self.predicted[self.earleme];
220 |         for ei in iter {
221 |             let postdot = self.grammar.get_rhs1(ei.dot).unwrap();
222 |             row.predict(postdot, self.grammar.predict(postdot));
223 |         }
224 |     }
225 | 
226 |     /// Complete items.
227 |     pub fn complete(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) {
228 |         debug_assert!(sym != self.grammar.eof());
229 |         if self.predicted[set_id as usize].get(sym.usize()) {
230 |             self.complete_medial_items(set_id, sym, rhs_link);
231 |             self.complete_predictions(set_id, sym, rhs_link);
232 |         }
233 |     }
234 | 
235 |     /// Complete medial items in a given Earley set.
236 |     fn complete_medial_items(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) {
237 |         // Iterate through medial items to complete them.
238 |         let set_range = self.medial_item_set_range(set_id, sym);
239 |         if let Some(hint) = self.lookahead_hint {
240 |             for idx in set_range {
241 |                 // New completed item.
242 |                 // from A ::= B • C
243 |                 // to   A ::= B   C •
244 |                 //
245 |                 // We might link to medial items by index, here.
246 |                 let dot = self.medial[idx].dot;
247 |                 if !self.grammar.can_follow(self.grammar.get_lhs(dot), hint) {
248 |                     continue;
249 |                 }
250 |                 self.heap_push_linked(CompletedItemLinked {
251 |                     idx: idx as u32,
252 |                     node: Some(rhs_link),
253 |                 });
254 |             }
255 |         } else {
256 |             for idx in set_range {
257 |                 // New completed item.
258 |                 // from A ::= B • C
259 |                 // to   A ::= B   C •
260 |                 //
261 |                 // We might link to medial items by index, here.
262 |                 self.heap_push_linked(CompletedItemLinked {
263 |                     idx: idx as u32,
264 |                     node: Some(rhs_link),
265 |                 });
266 |             }
267 |         }
268 |     }
269 | 
270 |     fn medial_item_set_range(&mut self, set_id: Origin, sym: Symbol) -> Range<usize> {
271 |         // Huh, can we reduce complexity here?
272 |         let outer_start = self.indices[set_id as usize];
273 |         let outer_end = self.indices[set_id as usize + 1];
274 |         let specific_set = &self.medial[outer_start..outer_end];
275 | 
276 |         let inner_start = if specific_set.len() >= 16 {
277 |             // When the set has 16 or more items, we use binary search to narrow down the range of
278 |             // items.
279 |             let set_idx = specific_set.binary_search_by(|ei| {
280 |                 (self.grammar.get_rhs1(ei.dot), Ordering::Greater).cmp(&(Some(sym), Ordering::Less))
281 |             });
282 |             match set_idx {
283 |                 Ok(idx) | Err(idx) => idx,
284 |             }
285 |         } else {
286 |             specific_set
287 |                 .iter()
288 |                 .take_while(|ei| self.grammar.get_rhs1(ei.dot).unwrap() < sym)
289 |                 .count()
290 |         };
291 | 
292 |         // The range contains items that have the same RHS1 symbol.
293 |         let inner_end = specific_set[inner_start..]
294 |             .iter()
295 |             .take_while(|ei| self.grammar.get_rhs1(ei.dot) == Some(sym))
296 |             .count();
297 |         outer_start + inner_start..outer_start + inner_start + inner_end
298 |     }
299 | 
300 |     /// Complete predicted items that have a common postdot symbol.
301 |     fn complete_predictions(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) {
302 |         // New item, either completed or pre-terminal. Ensure uniqueness.
303 |         // from A ::= • B   c
304 |         // to   A ::=   B • c
305 |         self.complete_unary_predictions(set_id, sym, rhs_link);
306 |         self.complete_binary_predictions(set_id, sym, rhs_link);
307 |     }
308 | 
309 |     /// Complete an item if predicted at rhs0.
310 |     fn complete_unary_predictions(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) {
311 |         for trans in self.grammar.unary_completions(sym) {
312 |             if self.predicted[set_id as usize].get(trans.symbol.usize()) {
313 |                 // No checks for uniqueness, because `medial` will be deduplicated.
314 |                 // from A ::= • B
315 |                 // to   A ::=   B •
316 |                 // ---
317 |                 // We could push to `medial` as well and link from `complete` to `medial`.
318 | 
319 |                 if let Some(hint) = self.lookahead_hint {
320 |                     if !self
321 |                         .grammar
322 |                         .can_follow(self.grammar.get_lhs(trans.dot), hint)
323 |                     {
324 |                         continue;
325 |                     }
326 |                 }
327 |                 self.heap_push(CompletedItem {
328 |                     origin: set_id,
329 |                     dot: trans.dot,
330 |                     left_node: rhs_link,
331 |                     right_node: None,
332 |                 });
333 |             }
334 |         }
335 |     }
336 | 
337 |     /// Complete an item if predicted at rhs1.
338 |     fn complete_binary_predictions(&mut self, set_id: Origin, sym: Symbol, rhs_link: F::NodeRef) {
339 |         for trans in self.grammar.binary_completions(sym) {
340 |             if self.predicted[set_id as usize].get(trans.symbol.usize()) {
341 |                 if let Some(hint) = self.lookahead_hint {
342 |                     if !self
343 |                         .grammar
344 |                         .first(self.grammar.get_rhs1(trans.dot).unwrap(), hint)
345 |                     {
346 |                         continue;
347 |                     }
348 |                 }
349 |                 // No checks for uniqueness, because `medial` will be deduplicated.
350 |                 // from A ::= • B   C
351 |                 // to   A ::=   B • C
352 |                 // Where C is terminal or nonterminal.
353 | 
354 |                 self.medial.push(Item {
355 |                     origin: set_id,
356 |                     dot: trans.dot,
357 |                     node: rhs_link,
358 |                 });
359 |             }
360 |         }
361 |     }
362 | 
363 |     /// Resets the recognizer to its initial state by removing all contents.
364 |     pub fn reset(&mut self) {
365 |         self.earleme = 0;
366 |         self.predict(self.grammar.start_sym());
367 |         // Indices reset to [0, 0].
368 |         self.indices.clear();
369 |         self.indices.push(0);
370 |         self.indices.push(0);
371 |         // Current medial start reset to 0.
372 |         self.current_medial_start = 0;
373 |         // Remove items.
374 |         self.medial.clear();
375 |         self.complete.clear();
376 |     }
377 | 
378 |     // Finished node access.
379 | 
380 |     /// Checks whether there is a valid parse that ends at the current
381 |     /// position.
382 |     pub fn is_finished(&self) -> bool {
383 |         self.finished_node().is_some()
384 |     }
385 | 
386 |     /// Retrieves the bocage node that represents the parse that has finished at the current
387 |     /// location.
388 |     ///
389 |     /// # Panics
390 |     ///
391 |     /// Panics when the parse has not finished at the current location.
392 |     pub fn finished_node(&self) -> Option<F::NodeRef> {
393 |         if self.grammar.has_trivial_derivation() && self.earleme == 0 {
394 |             Some(self.forest.nulling(self.grammar.externalized_start_sym()))
395 |         } else {
396 |             let has_dot_before_eof = |item: &&Item<_>| item.dot == self.grammar.dot_before_eof();
397 |             let item_node = |item: &Item<_>| item.node;
398 |             self.medial.last().filter(has_dot_before_eof).map(item_node)
399 |         }
400 |     }
401 | 
402 |     // Event access.
403 | 
404 |     /// Accesses predicted symbols.
405 |     pub fn predicted_symbols(&self) -> PredictedSymbols {
406 |         let earleme = self.earleme();
407 |         PredictedSymbols {
408 |             iter: self.predicted.iter_row(earleme),
409 |             idx: 0,
410 |         }
411 |     }
412 | 
413 |     /// Accesses medial items.
414 |     pub fn medial_items(&self) -> MedialItems<F::NodeRef> {
415 |         let indices_len = self.indices.len();
416 |         // Next-to-last index, which points to the beginning of the set before the current set.
417 |         // The current set is empty.
418 |         let items_start = self.indices[indices_len - 2];
419 |         MedialItems {
420 |             iter: self.medial[items_start..].iter(),
421 |         }
422 |     }
423 | 
424 |     // Accessors.
425 | 
426 |     /// Returns the current location number.
427 |     pub fn earleme(&self) -> usize {
428 |         self.earleme
429 |     }
430 | 
431 |     // Completion
432 | 
433 |     /// Performs the completion pass.
434 |     pub fn complete_all_sums_entirely(&mut self) {
435 |         while let Some(mut completion) = self.next_sum() {
436 |             // Include all items in the completion.
437 |             completion.complete_entire_sum();
438 |         }
439 |         self.lookahead_hint = None;
440 |     }
441 | 
442 |     /// Allows iteration through groups of completions that have unique symbol and origin.
443 |     pub fn next_sum<'r>(&'r mut self) -> Option<CompleteSum<'g, 'r, F>> {
444 |         if let Some(ei) = self.heap_peek() {
445 |             let lhs_sym = self.grammar.get_lhs(ei.dot);
446 |             Some(CompleteSum {
447 |                 origin: ei.origin,
448 |                 lhs_sym,
449 |                 recognizer: self,
450 |             })
451 |         } else {
452 |             None
453 |         }
454 |     }
455 | }
456 | 
457 | /// A group of completed items.
458 | pub struct CompleteSum<'g, 'r, F>
459 | where
460 |     F: Forest,
461 | {
462 |     /// The origin location of this completion.
463 |     origin: Origin,
464 |     /// The symbol of this completion.
465 |     lhs_sym: Symbol,
466 |     /// The recognizer.
467 |     recognizer: &'r mut Recognizer<'g, F>,
468 | }
469 | 
470 | impl<'g, 'r, F> CompleteSum<'g, 'r, F>
471 | where
472 |     F: Forest,
473 |     'g: 'r,
474 | {
475 |     /// Completes all items.
476 |     pub fn complete_entire_sum(&mut self) {
477 |         self.recognizer.forest.begin_sum();
478 |         // For each item, include it in the completion.
479 |         while let Some(item) = self.next_summand() {
480 |             self.push_summand(item);
481 |         }
482 |         // Use all items for completion.
483 |         self.complete_sum();
484 |     }
485 | 
486 |     /// Skips all items.
487 |     pub fn skip_entire_sum(&mut self) {
488 |         // For each item, include it in the completion.
489 |         while let Some(_) = self.next_summand() {}
490 |     }
491 | 
492 |     /// Allows iteration through completed items.
493 |     #[inline]
494 |     pub fn next_summand(&mut self) -> Option<CompletedItem<F::NodeRef>> {
495 |         if let Some(completion) = self.recognizer.heap_peek() {
496 |             let completion_lhs_sym = self.recognizer.grammar.get_lhs(completion.dot);
497 |             if self.origin == completion.origin && self.lhs_sym == completion_lhs_sym {
498 |                 self.recognizer.heap_pop();
499 |                 Some(completion)
500 |             } else {
501 |                 None
502 |             }
503 |         } else {
504 |             None
505 |         }
506 |     }
507 | 
508 |     /// Includes an item in the completion.
509 |     #[inline]
510 |     pub fn push_summand(&mut self, completed_item: CompletedItem<F::NodeRef>) {
511 |         self.recognizer.forest.push_summand(completed_item);
512 |     }
513 | 
514 |     /// Uses the completion to complete items in the recognizer.
515 |     #[inline]
516 |     pub fn complete_sum(&mut self) -> F::NodeRef {
517 |         let node = self.recognizer.forest.sum(self.lhs_sym, self.origin);
518 |         self.recognizer.complete(self.origin, self.lhs_sym, node);
519 |         node
520 |     }
521 | 
522 |     /// Returns the origin location of this completion.
523 |     #[inline]
524 |     pub fn origin(&self) -> Origin {
525 |         self.origin
526 |     }
527 | 
528 |     /// Returns the symbol of this completion.
529 |     #[inline]
530 |     pub fn symbol(&self) -> Symbol {
531 |         self.lhs_sym
532 |     }
533 | }
534 | 
535 | trait Predict {
536 |     fn predict(&mut self, sym: Symbol, source: &BitVecSlice);
537 | }
538 | 
539 | impl Predict for BitVecSlice {
540 |     fn predict(&mut self, sym: Symbol, source: &BitVecSlice) {
541 |         if !self[sym.usize()] {
542 |             // The source in the prediction matrix is the row that corresponds to the predicted
543 |             // symbol.
544 |             //
545 |             // The destination in `predicted` is now the `self` that corresponds to the current
546 |             // location.
547 |             for (dst, &src) in self.iter_mut().zip(source.iter()) {
548 |                 *dst |= src;
549 |             }
550 |         }
551 |     }
552 | }
553 | 


--------------------------------------------------------------------------------
/tests/grammars/ambiguous_arith.rs:
--------------------------------------------------------------------------------
 1 | use cfg::Symbol;
 2 | use cfg::earley::Grammar;
 3 | 
 4 | pub fn grammar() -> Grammar {
 5 |     let mut bnf = Grammar::new();
 6 |     let (expr, op, num, plus, minus, mul, div) = bnf.sym();
 7 |     bnf.rule(expr).rhs([expr, op, expr])
 8 |                   .rhs([num]);
 9 |     bnf.rule(op).rhs([plus])
10 |                 .rhs([minus])
11 |                 .rhs([mul])
12 |                 .rhs([div]);
13 | 
14 |     for _ in 0..10 {
15 |         let sym = bnf.sym();
16 |         bnf.rule(num).rhs([sym, num])
17 |                      .rhs([sym]);
18 |     }
19 |     bnf.set_start(expr);
20 |     bnf
21 | }
22 | 
23 | pub fn leaf(sym: Symbol) -> i32 {
24 |     [0, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9][sym.usize()]
25 | }
26 | 
27 | pub fn rule(rule: u32, args: &[&i32]) -> i32 {
28 |     let a0 = args.get(0).map(|f| **f).unwrap_or(!0);
29 |     let a1 = args.get(1).map(|f| **f).unwrap_or(!0);
30 |     let a2 = args.get(2).map(|f| **f).unwrap_or(!0);
31 | 
32 |     match rule {
33 |         0 => {
34 |             match a1 {
35 |                 0 => a0 + a2,
36 |                 1 => a0 - a2,
37 |                 2 => a0 * a2,
38 |                 3 => a0 / a2,
39 |                 _ => unreachable!(),
40 |             }
41 |         }
42 |         1 => a0,
43 | 
44 |         2 => 0,
45 |         3 => 1,
46 |         4 => 2,
47 |         5 => 3,
48 | 
49 |         6 | 8 | 10 | 12 | 14 | 16 | 18 | 20 | 22 | 24 => a0 * 10 + a1,
50 |         7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 => a0,
51 |         _ => unreachable!(),
52 |     }
53 | }
54 | 
55 | #[macro_export]
56 | macro_rules! ambiguous_arith_rhs_elem {
57 |     ('+') => (0);
58 |     ('-') => (1);
59 |     ('*') => (2);
60 |     ('/') => (3);
61 |     ('0') => (4);
62 |     ('1') => (5);
63 |     ('2') => (6);
64 |     ('3') => (7);
65 |     ('4') => (8);
66 |     ('5') => (9);
67 |     ('6') => (10);
68 |     ('7') => (11);
69 |     ('8') => (12);
70 |     ('9') => (13);
71 |     ($e:expr) => ($e);
72 | }
73 | 
74 | #[macro_export]
75 | macro_rules! ambiguous_arith {
76 |     ($($e:tt)+) => (
77 |         &[$(ambiguous_arith_rhs_elem!($e) + 3,)+]
78 |     )
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/grammars/mod.rs:
--------------------------------------------------------------------------------
1 | #![allow(dead_code)]
2 | 
3 | #[macro_use]
4 | pub mod ambiguous_arith;
5 | #[macro_use]
6 | pub mod precedenced_arith;
7 | 


--------------------------------------------------------------------------------
/tests/grammars/precedenced_arith.rs:
--------------------------------------------------------------------------------
 1 | use cfg::Symbol;
 2 | use cfg::earley::Grammar;
 3 | 
 4 | pub fn grammar() -> Grammar {
 5 |     let mut bnf = Grammar::new();
 6 |     let (sum, product, factor, number, plus, minus, mul, div, lparen, rparen) = bnf.sym();
 7 |     bnf.rule(sum).rhs([sum, plus, product])
 8 |                  .rhs([sum, minus, product])
 9 |                  .rhs([product])
10 |        .rule(product).rhs([product, mul, factor])
11 |                      .rhs([product, div, factor])
12 |                      .rhs([factor])
13 |        .rule(factor).rhs([lparen, sum, rparen])
14 |                     .rhs([number]);
15 |     for _ in 0..10 {
16 |         let sym = bnf.sym();
17 |         bnf.rule(number).rhs(&[sym, number])
18 |                         .rhs(&[sym]);
19 |     }
20 |     bnf.set_start(sum);
21 |     bnf
22 | }
23 | 
24 | pub fn leaf(sym: Symbol) -> i32 {
25 |     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9][sym.usize()]
26 | }
27 | 
28 | pub fn rule(rule: u32, args: &[&i32]) -> i32 {
29 |     let a0 = args.get(0).map(|f| **f).unwrap_or(!0);
30 |     let a1 = args.get(1).map(|f| **f).unwrap_or(!0);
31 |     let a2 = args.get(2).map(|f| **f).unwrap_or(!0);
32 |     match rule {
33 |         0 => a0 + a2,
34 |         1 => a0 - a2,
35 |         2 => a0,
36 | 
37 |         3 => a0 * a2,
38 |         4 => a0 / a2,
39 |         5 => a0,
40 | 
41 |         6 => a1,
42 |         7 => a0,
43 | 
44 |         8 | 10 | 12 | 14 | 16 | 18 | 20 | 22 | 24 | 26 => a0 * 10 + a1,
45 |         9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 => a0,
46 |         _ => unreachable!(),
47 |     }
48 | }
49 | 
50 | #[macro_export]
51 | macro_rules! precedenced_arith_rhs_elem {
52 |     ('+') => (0);
53 |     ('-') => (1);
54 |     ('*') => (2);
55 |     ('/') => (3);
56 |     ('(') => (4);
57 |     (')') => (5);
58 |     ('0') => (6);
59 |     ('1') => (7);
60 |     ('2') => (8);
61 |     ('3') => (9);
62 |     ('4') => (10);
63 |     ('5') => (11);
64 |     ('6') => (12);
65 |     ('7') => (13);
66 |     ('8') => (14);
67 |     ('9') => (15);
68 |     ($e:expr) => ($e);
69 | }
70 | 
71 | #[macro_export]
72 | macro_rules! precedenced_arith {
73 |     ($($e:tt)+) => (
74 |         &[$(precedenced_arith_rhs_elem!($e) + 4,)+]
75 |     )
76 | }
77 | 


--------------------------------------------------------------------------------
/tests/helpers/cartesian_product.rs:
--------------------------------------------------------------------------------
  1 | use std::marker::PhantomData;
  2 | 
  3 | pub struct Factor<'a, V: 'a> {
  4 |     start: *const V,
  5 |     end: *const V,
  6 |     marker: PhantomData<&'a V>,
  7 | }
  8 | 
  9 | impl<'a, V> Factor<'a, V> {
 10 |     fn new(slice: &'a [V]) -> Self {
 11 |         let start = slice.as_ptr();
 12 |         unsafe {
 13 |             Factor {
 14 |                 start,
 15 |                 end: start.offset(slice.len() as isize),
 16 |                 marker: PhantomData,
 17 |             }
 18 |         }
 19 |     }
 20 | 
 21 |     fn advance(&mut self, ptr: &mut &'a V) -> bool {
 22 |         unsafe {
 23 |             *ptr = &*(*ptr as *const V).offset(1);
 24 |             if *ptr as *const _ == self.end {
 25 |                 *ptr = &*self.start;
 26 |                 true
 27 |             } else {
 28 |                 false
 29 |             }
 30 |         }
 31 |     }
 32 | }
 33 | 
 34 | pub struct CartesianProduct<'a, V: 'a> {
 35 |     ptrs: Vec<&'a V>,
 36 |     ranges: Vec<Factor<'a, V>>,
 37 | }
 38 | 
 39 | impl<'a, V> CartesianProduct<'a, V> {
 40 |     pub fn new() -> Self {
 41 |         CartesianProduct {
 42 |             ptrs: Vec::with_capacity(8),
 43 |             ranges: Vec::with_capacity(8),
 44 |         }
 45 |     }
 46 | 
 47 |     pub fn clear(&mut self) {
 48 |         self.ranges.clear();
 49 |         self.ptrs.clear();
 50 |     }
 51 | 
 52 |     /// Multiplies the cartesian product by a slice.
 53 |     pub fn push(&mut self, slice: &'a [V]) {
 54 |         self.ranges.push(Factor::new(slice));
 55 |         unsafe {
 56 |             self.ptrs
 57 |                 .push(self.ranges.last().map(|factor| &*factor.start).unwrap());
 58 |         }
 59 |     }
 60 | 
 61 |     /// Multiplies the cartesian product by an iterator.
 62 |     pub fn extend<I>(&mut self, product: I)
 63 |     where
 64 |         I: Iterator<Item = &'a [V]>,
 65 |     {
 66 |         self.ranges.extend(product.map(|slice| Factor::new(slice)));
 67 |         unsafe {
 68 |             // FIXME wrong range
 69 |             self.ptrs
 70 |                 .extend(self.ranges.iter().map(|factor| &*factor.start));
 71 |         }
 72 |     }
 73 | 
 74 |     pub fn as_slice(&self) -> &[&'a V] {
 75 |         &self.ptrs[..]
 76 |     }
 77 | 
 78 |     pub fn advance(&mut self) -> bool {
 79 |         for (ptr, factor) in self.ptrs.iter_mut().zip(&mut self.ranges) {
 80 |             if !factor.advance(ptr) {
 81 |                 return true;
 82 |             }
 83 |         }
 84 |         false
 85 |     }
 86 | }
 87 | 
 88 | #[test]
 89 | fn test_cartesian_product() {
 90 |     let (a, b, c) = ([1, 2, 3], [1, 2], [1, 2, 3]);
 91 |     let factors: &[&[u32]] = &[&a[..], &b[..], &c[..]];
 92 |     let mut cartesian_product = CartesianProduct::new();
 93 |     cartesian_product.clear();
 94 |     cartesian_product.extend(factors.iter().cloned());
 95 |     let mut result = vec![];
 96 |     loop {
 97 |         {
 98 |             let val = cartesian_product.as_slice();
 99 |             result.push(*val[0] * 100 + *val[1] * 10 + *val[2]);
100 |         };
101 |         if !cartesian_product.advance() {
102 |             break;
103 |         }
104 |     }
105 |     assert_eq!(
106 |         &result[..],
107 |         &[
108 |             111, 211, 311, 121, 221, 321, 112, 212, 312, 122, 222, 322, 113, 213, 313, 123, 223,
109 |             323,
110 |         ]
111 |     );
112 | }
113 | 


--------------------------------------------------------------------------------
/tests/helpers/mod.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | 
 3 | mod cartesian_product;
 4 | pub mod parse;
 5 | mod simple_compact_evaluator;
 6 | mod simple_evaluator;
 7 | 
 8 | pub use self::parse::Parse;
 9 | pub use self::simple_compact_evaluator::SimpleCompactEvaluator;
10 | pub use self::simple_evaluator::SimpleEvaluator;
11 | 


--------------------------------------------------------------------------------
/tests/helpers/parse.rs:
--------------------------------------------------------------------------------
 1 | use std::borrow::Borrow;
 2 | use std::fmt::Debug;
 3 | 
 4 | use cfg::Symbol;
 5 | use gearley::forest::bocage::order::NullOrder;
 6 | use gearley::forest::compact_bocage::order::NullOrder as CompactNullOrder;
 7 | use gearley::forest::{Bocage, CompactBocage, NullForest};
 8 | use gearley::grammar::InternalGrammar;
 9 | use gearley::recognizer::Recognizer;
10 | 
11 | pub trait Parse {
12 |     fn parse(&mut self, tokens: &[u32]) -> bool;
13 | }
14 | 
15 | impl<'g, G> Parse for Recognizer<'g, Bocage<G>>
16 | where
17 |     Self: Debug,
18 |     G: Borrow<InternalGrammar>,
19 | {
20 |     #[inline]
21 |     fn parse(&mut self, tokens: &[u32]) -> bool {
22 |         let mut iter = tokens.iter().enumerate().peekable();
23 |         while let Some((i, &token)) = iter.next() {
24 |             self.begin_earleme();
25 |             trace!("before pass 1 {:?}", &*self);
26 |             self.scan(Symbol::from(token), i as u32);
27 |             trace!("before pass 2 {:?}", &*self);
28 |             self.lookahead_hint(iter.peek().map(|(_i, &t)| Symbol::from(t)));
29 |             assert!(self.end_earleme(), "failed to parse after {}@{}", token, i);
30 |         }
31 |         trace!("finished {:?}", &*self);
32 | 
33 |         if self.is_finished() {
34 |             self.forest
35 |                 .mark_alive(self.finished_node().unwrap(), NullOrder::new());
36 |         }
37 |         self.is_finished()
38 |     }
39 | }
40 | 
41 | impl<'g, G> Parse for Recognizer<'g, CompactBocage<G>>
42 | where
43 |     Self: Debug,
44 |     G: Borrow<InternalGrammar>,
45 | {
46 |     #[inline]
47 |     fn parse(&mut self, tokens: &[u32]) -> bool {
48 |         let mut iter = tokens.iter().enumerate().peekable();
49 |         while let Some((i, &token)) = iter.next() {
50 |             self.begin_earleme();
51 |             trace!("before pass 1 {:?}", &*self);
52 |             self.scan(Symbol::from(token), i as u32);
53 |             trace!("before pass 2 {:?}", &*self);
54 |             self.lookahead_hint(iter.peek().map(|(_i, &t)| Symbol::from(t)));
55 |             assert!(self.end_earleme(), "failed to parse after {}@{}", token, i);
56 |         }
57 |         trace!("finished {:?}", &*self);
58 | 
59 |         if self.is_finished() {
60 |             self.forest
61 |                 .mark_alive(self.finished_node().unwrap(), CompactNullOrder::new());
62 |         }
63 |         self.is_finished()
64 |     }
65 | }
66 | 
67 | impl<'g> Parse for Recognizer<'g, NullForest>
68 | where
69 |     Self: Debug,
70 | {
71 |     #[inline]
72 |     fn parse(&mut self, tokens: &[u32]) -> bool {
73 |         for &token in tokens.iter() {
74 |             self.begin_earleme();
75 |             trace!("before pass 1 {:?}", &*self);
76 |             self.scan(Symbol::from(token), ());
77 |             trace!("before pass 2 {:?}", &*self);
78 |             assert!(self.end_earleme());
79 |         }
80 |         trace!("finished {:?}", &*self);
81 | 
82 |         self.is_finished()
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/tests/helpers/simple_compact_evaluator.rs:
--------------------------------------------------------------------------------
 1 | use std::borrow::Borrow;
 2 | use std::collections::BTreeMap;
 3 | use std::fmt;
 4 | use std::mem;
 5 | 
 6 | use cfg::Symbol;
 7 | 
 8 | use gearley::forest::compact_bocage::traverse::{LeafHandle, NullingHandle, SumHandle, Traverse};
 9 | use gearley::forest::node_handle::NodeHandle;
10 | 
11 | use gearley::grammar::InternalGrammar;
12 | 
13 | use super::cartesian_product::CartesianProduct;
14 | 
15 | pub struct SimpleCompactEvaluator<V, F, G, H> {
16 |     values: Vec<V>,
17 |     evaluated: BTreeMap<NodeHandle, Vec<V>>,
18 |     leaf: F,
19 |     rule: G,
20 |     null: H,
21 | }
22 | 
23 | impl<V, FLeaf, FRule, FNull> SimpleCompactEvaluator<V, FLeaf, FRule, FNull>
24 | where
25 |     FLeaf: FnMut(Symbol) -> V,
26 |     FRule: FnMut(u32, &[&V]) -> V,
27 |     FNull: for<'r> FnMut(Symbol, &'r mut Vec<V>),
28 |     V: fmt::Debug + Clone,
29 | {
30 |     pub fn new(leaf: FLeaf, rule: FRule, null: FNull) -> Self {
31 |         SimpleCompactEvaluator {
32 |             values: vec![],
33 |             evaluated: BTreeMap::new(),
34 |             leaf,
35 |             rule,
36 |             null,
37 |         }
38 |     }
39 | 
40 |     pub fn traverse<'f, G>(&mut self, traverse: &mut Traverse<'f, G>, root: NodeHandle) -> Vec<V>
41 |     where
42 |         G: Borrow<InternalGrammar>,
43 |     {
44 |         while let Some(mut item) = traverse.next_node() {
45 |             match &mut item.item {
46 |                 &mut SumHandle(ref mut products) => {
47 |                     while let Some(product) = products.next_product() {
48 |                         let mut cartesian_product = CartesianProduct::new();
49 |                         for &(_sym, handle) in product.factors {
50 |                             cartesian_product.push(&self.evaluated[&handle][..]);
51 |                         }
52 |                         loop {
53 |                             let v = (self.rule)(product.action, cartesian_product.as_slice());
54 |                             self.values.push(v);
55 |                             if !cartesian_product.advance() {
56 |                                 break;
57 |                             }
58 |                         }
59 |                     }
60 |                 }
61 |                 &mut NullingHandle => {
62 |                     (self.null)(item.symbol, &mut self.values);
63 |                 }
64 |                 &mut LeafHandle => {
65 |                     let v = (self.leaf)(item.symbol);
66 |                     self.values.push(v);
67 |                 }
68 |             }
69 |             self.evaluated
70 |                 .insert(item.handle(), mem::replace(&mut self.values, vec![]));
71 |             item.end_evaluation();
72 |         }
73 |         self.evaluated[&root].clone()
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/helpers/simple_evaluator.rs:
--------------------------------------------------------------------------------
 1 | use std::borrow::Borrow;
 2 | use std::fmt;
 3 | use std::mem;
 4 | 
 5 | use cfg::Symbol;
 6 | 
 7 | use gearley::forest::bocage::traverse::{LeafHandle, NullingHandle, SumHandle, Traverse};
 8 | use gearley::forest::node_handle::NodeHandle;
 9 | 
10 | use gearley::grammar::InternalGrammar;
11 | 
12 | use super::cartesian_product::CartesianProduct;
13 | 
14 | pub struct SimpleEvaluator<V, F, G, H> {
15 |     values: Vec<V>,
16 |     evaluated: Vec<Vec<V>>,
17 |     leaf: F,
18 |     rule: G,
19 |     null: H,
20 | }
21 | 
22 | impl<V, FLeaf, FRule, FNull> SimpleEvaluator<V, FLeaf, FRule, FNull>
23 | where
24 |     FLeaf: FnMut(Symbol) -> V,
25 |     FRule: FnMut(u32, &[&V]) -> V,
26 |     FNull: for<'r> FnMut(Symbol, &'r mut Vec<V>),
27 |     V: fmt::Debug,
28 | {
29 |     pub fn new(leaf: FLeaf, rule: FRule, null: FNull) -> Self {
30 |         SimpleEvaluator {
31 |             values: vec![],
32 |             evaluated: vec![],
33 |             leaf,
34 |             rule,
35 |             null,
36 |         }
37 |     }
38 | 
39 |     pub fn traverse<'f, G>(&mut self, traverse: &mut Traverse<'f, G>, _root: NodeHandle) -> Vec<V>
40 |     where
41 |         G: Borrow<InternalGrammar>,
42 |     {
43 |         while let Some(mut item) = traverse.next_node() {
44 |             match &mut item.item {
45 |                 &mut SumHandle(ref mut products) => {
46 |                     while let Some(product) = products.next_product() {
47 |                         let mut cartesian_product = CartesianProduct::new();
48 |                         for &(_sym, values_idx) in product.factors {
49 |                             cartesian_product.push(&self.evaluated[values_idx as usize][..]);
50 |                         }
51 |                         loop {
52 |                             let v = (self.rule)(product.action, cartesian_product.as_slice());
53 |                             self.values.push(v);
54 |                             if !cartesian_product.advance() {
55 |                                 break;
56 |                             }
57 |                         }
58 |                     }
59 |                 }
60 |                 &mut NullingHandle => {
61 |                     (self.null)(item.symbol, &mut self.values);
62 |                 }
63 |                 &mut LeafHandle(_) => {
64 |                     let v = (self.leaf)(item.symbol);
65 |                     self.values.push(v);
66 |                 }
67 |             }
68 |             let result = self.evaluated.len() as u32;
69 |             self.evaluated.push(mem::replace(&mut self.values, vec![]));
70 |             item.set_evaluation_result(result);
71 |         }
72 |         self.evaluated.pop().unwrap()
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/test_c.rs:
--------------------------------------------------------------------------------
  1 | extern crate cfg;
  2 | extern crate gearley;
  3 | extern crate c_lexer_logos;
  4 | 
  5 | macro_rules! trace(($($tt:tt)*) => ());
  6 | 
  7 | mod helpers;
  8 | 
  9 | use cfg::earley::Grammar;
 10 | use gearley::forest::Bocage;
 11 | use gearley::grammar::InternalGrammar;
 12 | use gearley::recognizer::Recognizer;
 13 | use gearley::memory_use::MemoryUse;
 14 | 
 15 | use helpers::Parse;
 16 | 
 17 | const _SYM_NAMES: &'static [&'static str] = &[
 18 |     "term", "identifier", "signed", "const_", "inline", "auto", "break_", "case", "char_", "continue_", "default",
 19 |     "do_", "double", "else_", "enum_", "extern_", "float", "for_", "goto", "if_", "int", "long", "register", "return_",
 20 |     "short", "sizeof_", "static_", "struct_", "switch", "typedef", "union", "unsigned", "void", "volatile", "while_",
 21 |     "constant", "string_literal", "right_assign", "left_assign", "add_assign", "sub_assign", "mul_assign",
 22 |     "div_assign", "mod_assign", "and_assign", "xor_assign", "or_assign", "right_op", "left_op", "inc_op", "dec_op",
 23 |     "ptr_op", "and_op", "or_op", "le_op", "ge_op", "eq_op", "ne_op", "elipsis", "restrict", "bool_", "complex", "imaginary",
 24 |     "lparen", "rparen", "lbracket", "rbracket", "lbrace", "rbrace", "dot", "colon", "semicolon", "comma", "ampersand",
 25 |     "star", "plus", "minus", "tilde", "exclamation", "slash", "percent", "langle", "rangle", "xor", "pipe", "question",
 26 |     "equal",
 27 |     "start", "primary_expression", "postfix_expression",
 28 |     "argument_expression_list_opt", "argument_expression_list", "unary_expression", "unary_operator",
 29 |     "cast_expression", "multiplicative_expression", "additive_expression", "shift_expression",
 30 |     "relational_expression", "equality_expression", "AND_expression", "exclusive_OR_expression",
 31 |     "inclusive_OR_expression", "logical_AND_expression", "logical_OR_expression",
 32 |     "conditional_expression", "assignment_expression", "assignment_operator", "expression",
 33 |     "constant_expression", "declaration", "init_declarator_list_opt", "declaration_specifiers",
 34 |     "declaration_specifiers_opt", "init_declarator_list", "init_declarator", "storage_class_specifier",
 35 |     "type_specifier", "struct_or_union_specifier", "identifier_opt", "struct_or_union",
 36 |     "struct_declaration_list", "struct_declaration", "specifier_qualifier_list",
 37 |     "specifier_qualifier_list_opt", "struct_declarator_list", "struct_declarator", "declarator_opt",
 38 |     "enum_specifier", "enumerator_list", "enumerator", "type_qualifier", "function_specifier", "declarator",
 39 |     "pointer_opt", "direct_declarator", "type_qualifier_list_opt", "identifier_list_opt", "pointer",
 40 |     "type_qualifier_list", "parameter_type_list", "parameter_list", "parameter_declaration",
 41 |     "abstract_declarator_opt", "identifier_list", "abstract_declarator", "direct_abstract_declarator",
 42 |     "direct_abstract_declarator_opt", "assignment_expression_opt", "parameter_type_list_opt",
 43 |     "typedef_name", "initializer", "initializer_list", "designation_opt", "designation", "designator_list",
 44 |     "designator", "statement", "labeled_statement", "compound_statement", "block_item_list_opt",
 45 |     "block_item_list", "block_item", "expression_statement", "expression_opt", "selection_statement",
 46 |     "iteration_statement", "jump_statement", "translation_unit", "external_declaration",
 47 |     "function_definition", "declaration_list_opt", "declaration_list", "enumeration_constant",
 48 |     "type_name", "error",
 49 | ];
 50 | 
 51 | #[allow(non_snake_case)]
 52 | fn grammar() -> Grammar {
 53 |     let mut grammar = Grammar::new();
 54 |     let (
 55 |         _term, identifier, signed, const_, inline, auto, break_, case, char_, continue_, default,
 56 |         do_, double, else_, enum_, extern_, float, for_, goto, if_, int, long, register, return_,
 57 |         short, sizeof_, static_, struct_, switch, typedef, union, unsigned, void, volatile, while_,
 58 |         constant, string_literal, right_assign, left_assign, add_assign, sub_assign, mul_assign,
 59 |         div_assign, mod_assign, and_assign, xor_assign, or_assign, right_op, left_op, inc_op, dec_op,
 60 |         ptr_op, and_op, or_op, le_op, ge_op, eq_op, ne_op, elipsis, restrict, bool_, complex, imaginary,
 61 |         lparen, rparen, lbracket, rbracket, lbrace, rbrace, dot, colon, semicolon, comma, ampersand,
 62 |         star, plus, minus, tilde, exclamation, slash, percent, langle, rangle, xor, pipe, question,
 63 |         equal
 64 |     ) = grammar.sym();
 65 | 
 66 |     let (
 67 |         start, primary_expression, postfix_expression,
 68 |         argument_expression_list_opt, argument_expression_list, unary_expression, unary_operator,
 69 |         cast_expression, multiplicative_expression, additive_expression, shift_expression,
 70 |         relational_expression, equality_expression, AND_expression, exclusive_OR_expression,
 71 |         inclusive_OR_expression, logical_AND_expression, logical_OR_expression,
 72 |         conditional_expression, assignment_expression, assignment_operator, expression,
 73 |         constant_expression, declaration, init_declarator_list_opt, declaration_specifiers,
 74 |         declaration_specifiers_opt, init_declarator_list, init_declarator, storage_class_specifier,
 75 |         type_specifier, struct_or_union_specifier, identifier_opt, struct_or_union,
 76 |         struct_declaration_list, struct_declaration, specifier_qualifier_list,
 77 |         specifier_qualifier_list_opt, struct_declarator_list, struct_declarator, declarator_opt,
 78 |         enum_specifier, enumerator_list, enumerator, type_qualifier, function_specifier, declarator,
 79 |         pointer_opt, direct_declarator, type_qualifier_list_opt, identifier_list_opt, pointer,
 80 |         type_qualifier_list, parameter_type_list, parameter_list, parameter_declaration,
 81 |         abstract_declarator_opt, identifier_list, abstract_declarator, direct_abstract_declarator,
 82 |         direct_abstract_declarator_opt, assignment_expression_opt, parameter_type_list_opt,
 83 |         typedef_name, initializer, initializer_list, designation_opt, designation, designator_list,
 84 |         designator, statement, labeled_statement, compound_statement, block_item_list_opt,
 85 |         block_item_list, block_item, expression_statement, expression_opt, selection_statement,
 86 |         iteration_statement, jump_statement, translation_unit, external_declaration,
 87 |         function_definition, declaration_list_opt, declaration_list, enumeration_constant,
 88 |         type_name, error,
 89 |     ) = grammar.sym();
 90 | 
 91 |     grammar.rule(start).rhs([translation_unit]);
 92 |     grammar.rule(primary_expression).rhs([identifier])
 93 |                     .rhs([constant])
 94 |                     .rhs([string_literal])
 95 |                     .rhs([lparen, expression, rparen]);
 96 |     grammar.rule(postfix_expression).rhs([primary_expression])
 97 |                     .rhs([postfix_expression, lbracket, expression, rbracket])
 98 |                     .rhs([postfix_expression, lparen, argument_expression_list_opt, rparen])
 99 |                     .rhs([postfix_expression, dot, identifier])
100 |                     .rhs([postfix_expression, ptr_op, identifier])
101 |                     .rhs([postfix_expression, inc_op])
102 |                     .rhs([postfix_expression, dec_op])
103 |                     .rhs([lparen, type_name, rparen, lbrace, initializer_list, rbrace])
104 |                     .rhs([lparen, type_name, rparen, lbrace, initializer_list, comma, rbrace]);
105 |     grammar.rule(argument_expression_list_opt).rhs([])
106 |                                 .rhs([argument_expression_list]);
107 |     grammar.rule(argument_expression_list).rhs([assignment_expression])
108 |                             .rhs([argument_expression_list, comma, assignment_expression]);
109 |     grammar.rule(unary_expression).rhs([postfix_expression])
110 |                     .rhs([inc_op, unary_expression])
111 |                     .rhs([dec_op, unary_expression])
112 |                     .rhs([unary_operator, cast_expression])
113 |                     .rhs([sizeof_, unary_expression])
114 |                     .rhs([sizeof_, lparen, type_name, rparen]);
115 |     grammar.rule(unary_operator).rhs([ampersand])
116 |                 .rhs([star])
117 |                 .rhs([plus])
118 |                 .rhs([minus])
119 |                 .rhs([tilde])
120 |                 .rhs([exclamation]);
121 |     grammar.rule(cast_expression).rhs([unary_expression])
122 |                     .rhs([lparen, type_name, rparen, cast_expression]);
123 |     grammar.rule(multiplicative_expression).rhs([cast_expression])
124 |                             .rhs([multiplicative_expression, star, cast_expression])
125 |                             .rhs([multiplicative_expression, slash, cast_expression])
126 |                             .rhs([multiplicative_expression, percent, cast_expression]);
127 |     grammar.rule(additive_expression).rhs([multiplicative_expression])
128 |                         .rhs([additive_expression, plus, multiplicative_expression])
129 |                         .rhs([additive_expression, minus, multiplicative_expression]);
130 |     grammar.rule(shift_expression).rhs([additive_expression])
131 |                     .rhs([shift_expression, left_op, additive_expression])
132 |                     .rhs([shift_expression, right_op, additive_expression]);
133 |     grammar.rule(relational_expression).rhs([shift_expression])
134 |                         .rhs([relational_expression, langle, shift_expression])
135 |                         .rhs([relational_expression, rangle, shift_expression])
136 |                         .rhs([relational_expression, le_op, shift_expression])
137 |                         .rhs([relational_expression, ge_op, shift_expression]);
138 |     grammar.rule(equality_expression).rhs([relational_expression])
139 |                         .rhs([equality_expression, eq_op, relational_expression])
140 |                         .rhs([equality_expression, ne_op, relational_expression]);
141 |     grammar.rule(AND_expression).rhs([equality_expression])
142 |                 .rhs([AND_expression, ampersand, equality_expression]);
143 |     grammar.rule(exclusive_OR_expression).rhs([AND_expression])
144 |                             .rhs([exclusive_OR_expression, xor, AND_expression]);
145 |     grammar.rule(inclusive_OR_expression).rhs([exclusive_OR_expression])
146 |                             .rhs([inclusive_OR_expression, pipe, exclusive_OR_expression]);
147 |     grammar.rule(logical_AND_expression).rhs([inclusive_OR_expression])
148 |                         .rhs([logical_AND_expression, and_op, inclusive_OR_expression]);
149 |     grammar.rule(logical_OR_expression).rhs([logical_AND_expression])
150 |                         .rhs([logical_OR_expression, or_op, logical_AND_expression]);
151 |     grammar.rule(conditional_expression).rhs([logical_OR_expression])
152 |                         .rhs([logical_OR_expression, question, expression, colon, conditional_expression]);
153 |     grammar.rule(assignment_expression).rhs([conditional_expression])
154 |                         .rhs([unary_expression, assignment_operator, assignment_expression]);
155 |     grammar.rule(assignment_operator).rhs([equal])
156 |                         .rhs([mul_assign])
157 |                         .rhs([div_assign])
158 |                         .rhs([mod_assign])
159 |                         .rhs([add_assign])
160 |                         .rhs([sub_assign])
161 |                         .rhs([left_assign])
162 |                         .rhs([right_assign])
163 |                         .rhs([and_assign])
164 |                         .rhs([xor_assign])
165 |                         .rhs([or_assign]);
166 |     grammar.rule(expression).rhs([assignment_expression])
167 |             .rhs([expression, comma, assignment_expression])
168 |             .rhs([error]);
169 |     grammar.rule(constant_expression).rhs([conditional_expression]);
170 |                 
171 |     grammar.rule(declaration).rhs([declaration_specifiers, init_declarator_list_opt, semicolon])
172 |                 .rhs([error]);
173 |     grammar.rule(init_declarator_list_opt).rhs([])
174 |                             .rhs([init_declarator_list]);
175 |     grammar.rule(declaration_specifiers).rhs([storage_class_specifier, declaration_specifiers_opt])
176 |                     .rhs([type_specifier, declaration_specifiers_opt])
177 |                         .rhs([type_qualifier, declaration_specifiers_opt])
178 |                         .rhs([function_specifier, declaration_specifiers_opt]);
179 |     grammar.rule(declaration_specifiers_opt).rhs([])
180 |                             .rhs([declaration_specifiers]);
181 |     grammar.rule(init_declarator_list).rhs([init_declarator])
182 |                         .rhs([init_declarator_list, comma, init_declarator]);
183 |     grammar.rule(init_declarator).rhs([declarator])
184 |                     .rhs([declarator, equal, initializer]);
185 |     grammar.rule(storage_class_specifier).rhs([typedef])
186 |                             .rhs([extern_])
187 |                             .rhs([static_])
188 |                             .rhs([auto])
189 |                         .rhs([register]);
190 |     grammar.rule(type_specifier).rhs([void])
191 |                 .rhs([char_])
192 |                 .rhs([short])
193 |                 .rhs([int])
194 |                 .rhs([long])
195 |                 .rhs([float])
196 |                 .rhs([double])
197 |                 .rhs([signed])
198 |                 .rhs([unsigned])
199 |                 .rhs([bool_])
200 |                 .rhs([complex])
201 |                 .rhs([imaginary])
202 |                 .rhs([struct_or_union_specifier])
203 |                 .rhs([enum_specifier])
204 |                 .rhs([typedef_name]);
205 |     grammar.rule(struct_or_union_specifier).rhs([struct_or_union, identifier_opt, lbrace, struct_declaration_list, rbrace])
206 |                             .rhs([struct_or_union, identifier]);
207 |     grammar.rule(identifier_opt).rhs([])
208 |                 .rhs([identifier]);
209 |     grammar.rule(struct_or_union).rhs([struct_])
210 |                     .rhs([union]);
211 |     grammar.rule(struct_declaration_list).rhs([struct_declaration])
212 |                             .rhs([struct_declaration_list, struct_declaration]);
213 |     grammar.rule(struct_declaration).rhs([specifier_qualifier_list, struct_declarator_list, semicolon]);
214 |     grammar.rule(specifier_qualifier_list).rhs([type_specifier, specifier_qualifier_list_opt])
215 |                             .rhs([type_qualifier, specifier_qualifier_list_opt]);
216 |     grammar.rule(specifier_qualifier_list_opt).rhs([])
217 |                                 .rhs([specifier_qualifier_list]);
218 |     grammar.rule(struct_declarator_list).rhs([struct_declarator])
219 |                         .rhs([struct_declarator_list, comma, struct_declarator]);
220 |     grammar.rule(struct_declarator).rhs([declarator])
221 |                     .rhs([declarator_opt, colon, constant_expression]);
222 |     grammar.rule(declarator_opt).rhs([])
223 |                 .rhs([declarator]);
224 |     grammar.rule(enum_specifier).rhs([enum_, identifier_opt, lbrace, enumerator_list, rbrace])
225 |                 .rhs([enum_, identifier_opt, lbrace, enumerator_list, comma, rbrace])
226 |                 .rhs([enum_, identifier]);
227 |     grammar.rule(enumerator_list).rhs([enumerator])
228 |                     .rhs([enumerator_list, comma, enumerator]);
229 |     grammar.rule(enumerator).rhs([enumeration_constant])
230 |             .rhs([enumeration_constant, equal, constant_expression]);
231 |     grammar.rule(type_qualifier).rhs([const_])
232 |                 .rhs([restrict])
233 |                 .rhs([volatile]);
234 |     grammar.rule(function_specifier).rhs([inline]);
235 |     grammar.rule(declarator).rhs([pointer_opt, direct_declarator]);
236 |     grammar.rule(pointer_opt).rhs([])
237 |                 .rhs([pointer]);
238 |     grammar.rule(direct_declarator).rhs([identifier])
239 |                     .rhs([lparen, declarator, rparen])
240 |                     .rhs([direct_declarator, lbracket, type_qualifier_list_opt, assignment_expression_opt, rbracket])
241 |                     .rhs([direct_declarator, lbracket, static_, type_qualifier_list_opt, assignment_expression, rbracket])
242 |                     .rhs([direct_declarator, lbracket, type_qualifier_list, static_, assignment_expression, rbracket])
243 |                     .rhs([direct_declarator, lbracket, type_qualifier_list_opt, star, rbracket])
244 |                     .rhs([direct_declarator, lparen, parameter_type_list, rparen])
245 |                     .rhs([direct_declarator, lparen, identifier_list_opt, rparen]);
246 |     grammar.rule(type_qualifier_list_opt).rhs([])
247 |                             .rhs([type_qualifier_list]);
248 |     grammar.rule(identifier_list_opt).rhs([])
249 |                         .rhs([identifier_list]);
250 |     grammar.rule(pointer).rhs([star, type_qualifier_list_opt])
251 |             .rhs([star, type_qualifier_list_opt, pointer]);
252 |     grammar.rule(type_qualifier_list).rhs([type_qualifier])
253 |                         .rhs([type_qualifier_list, type_qualifier]);
254 |     grammar.rule(parameter_type_list).rhs([parameter_list])
255 |                         .rhs([parameter_list, comma, elipsis]);
256 |     grammar.rule(parameter_list).rhs([parameter_declaration])
257 |                 .rhs([parameter_list, comma, parameter_declaration]);
258 |     grammar.rule(parameter_declaration).rhs([declaration_specifiers, declarator])
259 |                         .rhs([declaration_specifiers, abstract_declarator_opt]);
260 |     grammar.rule(abstract_declarator_opt).rhs([])
261 |                             .rhs([abstract_declarator]);
262 |     grammar.rule(identifier_list).rhs([identifier])
263 |                     .rhs([identifier_list, comma, identifier]);
264 |     grammar.rule(type_name).rhs([specifier_qualifier_list, abstract_declarator_opt]);
265 |     grammar.rule(abstract_declarator).rhs([pointer])
266 |                         .rhs([pointer_opt, direct_abstract_declarator]);
267 |     grammar.rule(direct_abstract_declarator).rhs([lparen, abstract_declarator, rparen])
268 |                             .rhs([direct_abstract_declarator_opt, lbracket, assignment_expression_opt, rbracket])
269 |                             .rhs([direct_abstract_declarator_opt, lbracket, star, rbracket])
270 |                             .rhs([direct_abstract_declarator_opt, lparen, parameter_type_list_opt, rparen]);
271 |     grammar.rule(direct_abstract_declarator_opt).rhs([])
272 |                                 .rhs([direct_abstract_declarator]);
273 |     grammar.rule(assignment_expression_opt).rhs([])
274 |                             .rhs([assignment_expression]);
275 |     grammar.rule(parameter_type_list_opt).rhs([])
276 |                             .rhs([parameter_type_list]);
277 |     grammar.rule(typedef_name).rhs([identifier]);
278 |     grammar.rule(initializer).rhs([assignment_expression])
279 |                 .rhs([lbrace, initializer_list, rbrace])
280 |                 .rhs([lbrace, initializer_list, comma, rbrace]);
281 |     grammar.rule(initializer_list).rhs([designation_opt, initializer])
282 |                     .rhs([initializer_list, comma, designation_opt, initializer]);
283 |     grammar.rule(designation_opt).rhs([])
284 |                     .rhs([designation]);
285 |     grammar.rule(designation).rhs([designator_list, equal]);
286 |     grammar.rule(designator_list).rhs([designator])
287 |                     .rhs([designator_list, designator]);
288 |     grammar.rule(designator).rhs([rbracket, constant_expression, rbracket])
289 |             .rhs([dot, identifier]);
290 |     grammar.rule(statement).rhs([labeled_statement])
291 |             .rhs([compound_statement])
292 |             .rhs([expression_statement])
293 |             .rhs([selection_statement])
294 |             .rhs([iteration_statement])
295 |             .rhs([jump_statement])
296 |             .rhs([error]);
297 |     grammar.rule(labeled_statement).rhs([identifier, colon, statement])
298 |                     .rhs([case, constant_expression, colon, statement])
299 |                     .rhs([default, colon, statement]);
300 |     grammar.rule(compound_statement).rhs([lbrace, block_item_list_opt, rbrace]);
301 |     grammar.rule(block_item_list_opt).rhs([])
302 |                         .rhs([block_item_list]);
303 |     grammar.rule(block_item_list).rhs([block_item])
304 |                     .rhs([block_item_list, block_item]);
305 |     grammar.rule(block_item).rhs([declaration])
306 |             .rhs([statement]);
307 |     grammar.rule(expression_statement).rhs([expression_opt, semicolon]);
308 |     grammar.rule(expression_opt).rhs([])
309 |                 .rhs([expression]);
310 |     grammar.rule(selection_statement).rhs([if_, lparen, expression, rparen, statement])
311 |                         .rhs([if_, lparen, expression, rparen, statement, else_, statement])
312 |                         .rhs([switch, lparen, expression, rparen, statement]);
313 |     grammar.rule(iteration_statement).rhs([while_, lparen, expression, rparen, statement])
314 |                         .rhs([do_, statement, while_, lparen, expression, rparen, semicolon])
315 |                         .rhs([for_, lparen, expression_opt, semicolon, expression_opt, semicolon, expression_opt, rparen, statement])
316 |                         .rhs([for_, lparen, declaration, expression_opt, semicolon, expression_opt, rparen, statement]);
317 |     grammar.rule(jump_statement).rhs([goto, identifier, semicolon])
318 |                 .rhs([continue_, semicolon])
319 |                 .rhs([break_, semicolon])
320 |                 .rhs([return_, expression_opt, semicolon]);
321 |     grammar.rule(translation_unit).rhs([external_declaration])
322 |                     .rhs([translation_unit, external_declaration]);
323 |     grammar.rule(external_declaration).rhs([function_definition])
324 |                         .rhs([declaration]);
325 |     grammar.rule(function_definition).rhs([declaration_specifiers, declarator, declaration_list_opt, compound_statement]);
326 |     grammar.rule(declaration_list_opt).rhs([])
327 |                         .rhs([declaration_list]);
328 |     grammar.rule(declaration_list).rhs([declaration])
329 |                     .rhs([declaration_list, declaration]);
330 |     grammar.rule(enumeration_constant).rhs([identifier]);
331 | 
332 |     grammar.set_start(start);
333 |     grammar
334 | }
335 | 
336 | #[test]
337 | fn test_parse_c() {
338 |     use c_lexer_logos::Lexer;
339 |     use c_lexer_logos::token::Token::*;
340 |     let external = grammar();
341 |     let mut grammar = Grammar::new();
342 |     let (
343 |         _term, identifier, signed, const_, inline, _auto, break_, case, char_, continue_, default,
344 |         do_, double, else_, enum_, extern_, float, for_, goto, if_, int, long, register, return_,
345 |         short, sizeof_, static_, struct_, switch, typedef, union, unsigned, void, volatile, while_,
346 |         constant, string_literal, right_assign, left_assign, add_assign, sub_assign, mul_assign,
347 |         div_assign, mod_assign, and_assign, xor_assign, or_assign, right_op, left_op, inc_op, dec_op,
348 |         ptr_op, and_op, or_op, le_op, ge_op, eq_op, ne_op, elipsis, restrict, bool_, complex, imaginary,
349 |         lparen, rparen, lbracket, rbracket, lbrace, rbrace, dot, colon, semicolon, comma, ampersand,
350 |         star, plus, minus, tilde, exclamation, slash, percent, langle, rangle, xor, pipe, question,
351 |         equal
352 |     ) = grammar.sym();
353 | 
354 |     let contents = include_str!("../benches/part_gcc_test.i");
355 |     let tokens: Vec<_> = Lexer::lex(&contents[..]).unwrap().into_iter().filter_map(|token| {
356 |         // println!("{:?}", token);
357 |         let tok = match token {
358 |             LBrace => Some(lbrace),
359 |             RBrace => Some(rbrace),
360 |             LParen => Some(lparen),
361 |             RParen => Some(rparen),
362 |             LBracket => Some(lbracket),
363 |             RBracket => Some(rbracket),
364 |             Semicolon => Some(semicolon),
365 |             Assign => Some(equal),
366 |             Lt => Some(langle),
367 |             Gt => Some(rangle),
368 |             Minus => Some(minus),
369 |             Tilde => Some(tilde),
370 |             Exclamation => Some(exclamation),
371 |             Plus => Some(plus),
372 |             Multi => Some(star),
373 |             Slash => Some(slash),
374 |             Colon => Some(colon),
375 |             QuestionMark => Some(question),
376 |             Comma => Some(comma),
377 |             Dot => Some(dot),
378 |             SingleAnd => Some(ampersand),
379 |             InclusiveOr => Some(pipe),
380 |             ExclusiveOr => Some(xor),
381 |             Mod => Some(percent),
382 |             Identifier(_i_str) => Some(identifier),
383 |             NumericLiteral(_num) => Some(constant),
384 |             StringLiteral(_s) => Some(string_literal),
385 |             FuncName => None,
386 |             SIZEOF => Some(sizeof_),
387 |             PtrOp => Some(ptr_op),
388 |             IncOp => Some(inc_op),
389 |             DecOp => Some(dec_op),
390 |             LeftOp => Some(left_op),
391 |             RightOp => Some(right_op),
392 |             LeOp => Some(le_op),
393 |             GeOp => Some(ge_op),
394 |             EqOp => Some(eq_op),
395 |             NeOp => Some(ne_op),
396 |             AndOp => Some(and_op),
397 |             OrOp => Some(or_op),
398 |             MulAssign => Some(mul_assign),
399 |             DivAssign => Some(div_assign),
400 |             ModAssign => Some(mod_assign),
401 |             AddAssign => Some(add_assign),
402 |             SubAssign => Some(sub_assign),
403 |             LeftAssign => Some(left_assign),
404 |             RightAssign => Some(right_assign),
405 |             AndAssign => Some(and_assign),
406 |             XorAssign => Some(xor_assign),
407 |             OrAssign => Some(or_assign),
408 |             // TODO: this should be done when we found this is a typedef name,
409 |             //       typedef LL int, then LL is typedef_name
410 |             TypedefName => Some(identifier),
411 |             ELLIPSIS => Some(elipsis),                    // ...
412 |             EnumerationConstant(..) => None, // TODO: add check
413 |             LineTerminator => None,
414 |             EOF => None,
415 | 
416 |             TYPEDEF => Some(typedef),
417 |             EXTERN => Some(extern_),
418 |             STATIC => Some(static_),
419 |             // AUTO => Some(auto_),
420 |             REGISTER => Some(register),
421 |             INLINE => Some(inline),
422 |             CONST => Some(const_),
423 |             RESTRICT => Some(restrict),
424 |             VOLATILE => Some(volatile),
425 |             BOOL => Some(bool_),
426 |             CHAR => Some(char_),
427 |             SHORT => Some(short),
428 |             INT => Some(int),
429 |             LONG => Some(long),
430 |             SIGNED => Some(signed),
431 |             UNSIGNED => Some(unsigned),
432 |             FLOAT => Some(float),
433 |             DOUBLE => Some(double),
434 |             VOID => Some(void),
435 |             COMPLEX => Some(complex),
436 |             IMAGINARY => Some(imaginary),
437 |             STRUCT => Some(struct_),
438 |             UNION => Some(union),
439 |             ENUM => Some(enum_),
440 |             CASE => Some(case),
441 |             DEFAULT => Some(default),
442 |             IF => Some(if_),
443 |             ELSE => Some(else_),
444 |             SWITCH => Some(switch),
445 |             WHILE => Some(while_),
446 |             DO => Some(do_),
447 |             FOR => Some(for_),
448 |             GOTO => Some(goto),
449 |             CONTINUE => Some(continue_),
450 |             BREAK => Some(break_),
451 |             RETURN => Some(return_),
452 |             // ALIGNAS => Some(alignas),
453 |             // ALIGNOF => Some(alignof),
454 |             // ATOMIC => Some(atomic),
455 |             // GENERIC => Some(generic),
456 |             // NORETURN,
457 |             // StaticAssert,
458 |             // ThreadLocal,
459 |             _ => None,
460 |         };
461 |         // tok.map(|t| (t.usize() as u32, start, end))
462 |         tok.map(|t| t.usize() as u32)
463 |     }).collect();
464 |     let cfg = InternalGrammar::from_grammar(&external);
465 |     let bocage = Bocage::new(&cfg);
466 |     let mut rec: Recognizer<Bocage<&'_ InternalGrammar>> = Recognizer::new_with_limit(&cfg, 2_00_000);
467 |     rec.forest = bocage;
468 |     let finished = rec.parse(&tokens[..]);
469 |     assert!(finished);
470 |     println!("memory use: all:{} forest:{}", rec.memory_use(), rec.forest.memory_use());
471 | }
472 | 


--------------------------------------------------------------------------------
/tests/test_nulling.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate log;
  3 | extern crate cfg;
  4 | extern crate env_logger;
  5 | extern crate gearley;
  6 | 
  7 | mod helpers;
  8 | 
  9 | use cfg::earley::Grammar;
 10 | use cfg::Symbol;
 11 | use gearley::forest::{Bocage, CompactBocage};
 12 | use gearley::grammar::InternalGrammar;
 13 | use gearley::recognizer::Recognizer;
 14 | 
 15 | use helpers::{Parse, SimpleCompactEvaluator, SimpleEvaluator};
 16 | 
 17 | macro_rules! test_trivial_grammar {
 18 |     ($Bocage:ident, $SimpleEvaluator:ident) => {
 19 |         let _ = env_logger::try_init();
 20 |         let mut external = Grammar::new();
 21 |         let start = external.sym();
 22 |         external.rule(start).rhs([]);
 23 |         external.set_start(start);
 24 |         let cfg = InternalGrammar::from_grammar(&external);
 25 |         let mut evaluator = $SimpleEvaluator::new(
 26 |             |_: Symbol| unreachable!(),
 27 |             |_: u32, _: &[&bool]| unreachable!(),
 28 |             |sym, builder: &mut Vec<bool>| {
 29 |                 builder.reserve(1);
 30 |                 if sym == start {
 31 |                     builder.push(true);
 32 |                 } else {
 33 |                     builder.push(false);
 34 |                 }
 35 |             },
 36 |         );
 37 |         let bocage = $Bocage::new(&cfg);
 38 |         let mut rec = Recognizer::new(&cfg, bocage);
 39 |         assert!(rec.parse(&[]));
 40 |         let mut traversal = rec.forest.traverse();
 41 |         let results = evaluator.traverse(&mut traversal, rec.finished_node().unwrap());
 42 |         assert_eq!(results, &[true]);
 43 |     };
 44 | }
 45 | 
 46 | #[test]
 47 | fn test_trivial_grammar() {
 48 |     test_trivial_grammar!(Bocage, SimpleEvaluator);
 49 | }
 50 | 
 51 | #[test]
 52 | fn test_trivial_grammar_compact() {
 53 |     test_trivial_grammar!(CompactBocage, SimpleCompactEvaluator);
 54 | }
 55 | 
 56 | macro_rules! test_grammar_with_nulling_intermediate {
 57 |     ($Bocage:ident, $SimpleEvaluator:ident) => {
 58 |         let _ = env_logger::try_init();
 59 |         let mut external = Grammar::new();
 60 |         let (start, a, b, c, d, foo) = external.sym();
 61 |         external
 62 |             .rule(start)
 63 |             .rhs([a, b, c, d, foo])
 64 |             .rule(a)
 65 |             .rhs([])
 66 |             .rule(b)
 67 |             .rhs([])
 68 |             .rule(c)
 69 |             .rhs([])
 70 |             .rule(d)
 71 |             .rhs([]);
 72 |         external.set_start(start);
 73 |         let cfg = InternalGrammar::from_grammar(&external);
 74 |         let mut evaluator = $SimpleEvaluator::new(
 75 |             |sym: Symbol| {
 76 |                 if sym == foo {
 77 |                     3
 78 |                 } else {
 79 |                     unreachable!()
 80 |                 }
 81 |             },
 82 |             |rule: u32, arg: &[&i32]| {
 83 |                 if rule == 0 {
 84 |                     arg.iter().cloned().fold(0, |a, e| a + e)
 85 |                 } else {
 86 |                     unreachable!()
 87 |                 }
 88 |             },
 89 |             |sym, builder: &mut Vec<i32>| {
 90 |                 builder.reserve(1);
 91 |                 if sym == a {
 92 |                     builder.push(1);
 93 |                 } else {
 94 |                     builder.push(2);
 95 |                 }
 96 |             },
 97 |         );
 98 |         let bocage = $Bocage::new(&cfg);
 99 |         let mut rec = Recognizer::new(&cfg, bocage);
100 |         assert!(rec.parse(&[foo.usize() as u32]));
101 |         let mut traversal = rec.forest.traverse();
102 |         let results = evaluator.traverse(&mut traversal, rec.finished_node().unwrap());
103 |         assert_eq!(results, &[10]);
104 |     };
105 | }
106 | 
107 | #[test]
108 | fn test_grammar_with_nulling_intermediate() {
109 |     test_grammar_with_nulling_intermediate!(Bocage, SimpleEvaluator);
110 | }
111 | 
112 | #[test]
113 | fn test_grammar_with_nulling_intermediate_compact() {
114 |     test_grammar_with_nulling_intermediate!(CompactBocage, SimpleCompactEvaluator);
115 | }
116 | 


--------------------------------------------------------------------------------
/tests/test_recognizer.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate log;
 3 | extern crate env_logger;
 4 | extern crate cfg;
 5 | extern crate gearley;
 6 | 
 7 | mod helpers;
 8 | 
 9 | use cfg::earley::Grammar;
10 | 
11 | use gearley::forest::NullForest;
12 | use gearley::grammar::InternalGrammar;
13 | use gearley::recognizer::Recognizer;
14 | 
15 | use helpers::Parse;
16 | 
17 | #[test]
18 | fn test_recognize_nested() {
19 |     let _ = env_logger::try_init();
20 |     let mut external = Grammar::new();
21 |     let (start, nested, terminal) = external.sym();
22 |     external.rule(start).rhs([nested, terminal])
23 |             .rule(nested).rhs([terminal, terminal]);
24 |     external.set_start(start);
25 |     let cfg = InternalGrammar::from_grammar(&external);
26 |     let mut rec = Recognizer::new(&cfg, NullForest);
27 |     let finished = rec.parse(&[terminal.usize() as u32; 3]);
28 |     assert!(finished);
29 | }
30 | 
31 | #[test]
32 | fn test_recognize_reset() {
33 |     let _ = env_logger::try_init();
34 |     let mut external = Grammar::new();
35 |     let (start, nested, terminal) = external.sym();
36 |     external.rule(start).rhs([nested, terminal])
37 |             .rule(nested).rhs([terminal, terminal]);
38 |     external.set_start(start);
39 |     let cfg = InternalGrammar::from_grammar(&external);
40 |     let mut rec = Recognizer::new(&cfg, NullForest);
41 |     for _ in 0..100 {
42 |         let finished = rec.parse(&[terminal.usize() as u32; 3]);
43 |         assert!(finished);
44 |         rec.reset();
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/test_sequence.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate log;
 3 | extern crate env_logger;
 4 | extern crate cfg;
 5 | extern crate gearley;
 6 | 
 7 | mod helpers;
 8 | 
 9 | use cfg::Symbol;
10 | use cfg::sequence::Separator::Trailing;
11 | use cfg::earley::Grammar;
12 | 
13 | use gearley::forest::Bocage;
14 | use gearley::grammar::InternalGrammar;
15 | use gearley::recognizer::Recognizer;
16 | 
17 | use helpers::{SimpleEvaluator, Parse};
18 | 
19 | #[test]
20 | fn test_sequence() {
21 |     let _ = env_logger::try_init();
22 |     let (plus, minus) = (1, 2);
23 |     let tokens = &[plus, minus, plus, minus, plus, minus];
24 |     let mut external = Grammar::new();
25 |     let (start, plus, minus) = external.sym();
26 |     external.sequence(start).separator(Trailing(minus)).inclusive(3, Some(3)).rhs(plus);
27 |     external.set_start(start);
28 | 
29 |     let cfg = InternalGrammar::from_grammar(&external);
30 |     let mut evaluator = SimpleEvaluator::new(
31 |         |sym: Symbol| {
32 |             match sym.usize() {
33 |                 1 => 1,
34 |                 2 => -1,
35 |                 _ => unreachable!()
36 |             }
37 |         },
38 |         |rule: u32, args: &[&i32]| {
39 |             if rule == 0 {
40 |                 args.len() as i32
41 |             } else {
42 |                 unreachable!()
43 |             }
44 |         },
45 |         |_, _: &mut Vec<i32>| unreachable!()
46 |     );
47 |     let bocage = Bocage::new(&cfg);
48 |     let mut recognizer = Recognizer::new(&cfg, bocage);
49 |     assert!(recognizer.parse(tokens));
50 | 
51 |     let mut traversal = recognizer.forest.traverse();
52 | 
53 |     let results = evaluator.traverse(&mut traversal, recognizer.finished_node().unwrap());
54 | 
55 |     assert_eq!(results, vec![6]);
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/test_serde.rs:
--------------------------------------------------------------------------------
 1 | #![cfg(feature = "serde")]
 2 | #[macro_use]
 3 | extern crate log;
 4 | extern crate env_logger;
 5 | extern crate cfg;
 6 | extern crate gearley;
 7 | extern crate serde;
 8 | 
 9 | mod grammars;
10 | 
11 | use gearley::forest::NullForest;
12 | use gearley::grammar::Grammar;
13 | use gearley::recognizer::Recognizer;
14 | 
15 | use grammars::*;
16 | 
17 | use serde::de::value::StringDeserializer;
18 | use serde::de::IntoDeserializer;
19 | 
20 | #[test]
21 | fn test_serde() {
22 |     let x = InternalGrammar::deserialize(String::into_deserializer(""));
23 |     assert!(true);
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/tests.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate log;
 3 | extern crate cfg;
 4 | extern crate gearley;
 5 | 
 6 | #[macro_use]
 7 | mod grammars;
 8 | mod helpers;
 9 | 
10 | use gearley::grammar::InternalGrammar;
11 | use gearley::forest::{Bocage, NullForest};
12 | use gearley::recognizer::Recognizer;
13 | 
14 | use grammars::*;
15 | use helpers::{SimpleEvaluator, Parse};
16 | 
17 | const SUM_TOKENS: &'static [u32] = precedenced_arith!(
18 |     '1' '+' '(' '2' '*' '3' '-' '4' ')' '/'
19 |     '(' '5' '5' ')' '-' '(' '5' '4' ')' '*'
20 |     '5' '5' '+' '6' '2' '-' '1' '3' '-' '('
21 |     '(' '3' '6' ')' ')'
22 | );
23 | 
24 | #[test]
25 | fn test_precedenced_arith() {
26 |     let external = precedenced_arith::grammar();
27 |     let cfg = InternalGrammar::from_grammar(&external);
28 |     let mut rec = Recognizer::new(&cfg, NullForest);
29 |     assert!(rec.parse(SUM_TOKENS));
30 | }
31 | 
32 | #[test]
33 | fn test_ambiguous_arithmetic() {
34 |     let tokens = ambiguous_arith!('2' '-' '0' '*' '3' '+' '1');
35 |     let external = ambiguous_arith::grammar();
36 |     let cfg = InternalGrammar::from_grammar(&external);
37 |     let mut evaluator = SimpleEvaluator::new(
38 |         ambiguous_arith::leaf,
39 |         ambiguous_arith::rule,
40 |         |_, _: &mut Vec<i32>| unreachable!()
41 |     );
42 |     let bocage = Bocage::new(&cfg);
43 |     let mut rec = Recognizer::new(&cfg, bocage);
44 |     assert!(rec.parse(tokens));
45 |     let mut traverse = rec.forest.traverse();
46 |     let results = evaluator.traverse(&mut traverse, rec.finished_node().unwrap());
47 | 
48 |     // The result is currently ordered by rule ID:
49 |     assert_eq!(results, vec![2, 1, 3, 7, 8]);
50 | 
51 |     // A result ordered by structure would be: [2, 1, 8, 3, 7]
52 |     // where
53 | 
54 |     // 2  =  2 - (0 * (3 + 1))
55 |     // 1  =  2 - ((0 * 3) + 1)
56 |     // 8  =  (2 - 0) * (3 + 1)
57 |     // 3  =  (2 - (0 * 3)) + 1
58 |     // 7  =  ((2 - 0) * 3) + 1
59 | }
60 | 


--------------------------------------------------------------------------------