├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src ├── context.rs ├── forest.rs ├── high.rs ├── indexing_str.rs ├── input.rs ├── lib.rs ├── parser.rs ├── proc_macro.rs ├── proc_macro_input.rs ├── rule.rs └── scannerless.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | cache: cargo 7 | before_script: rustup component add rustfmt 8 | script: 9 | - cargo fmt --all -- --check 10 | - cargo test --all 11 | 12 | branches: 13 | only: 14 | - master 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "grammer" 3 | version = "0.0.1" 4 | authors = [ 5 | "Ana-Maria ", 6 | "Eduard-Mihai Burtescu " 7 | ] 8 | edition = "2018" 9 | repository = "https://github.com/LykenSol/grammer" 10 | license = "MIT/Apache-2.0" 11 | keywords = ["grammar"] 12 | readme = "README.md" 13 | description = "Grammar framework." 14 | 15 | [dependencies] 16 | indexmap = "1" 17 | indexing = "0.3.2" 18 | proc-macro2 = "1.0.6" 19 | elsa = "1.3.2" 20 | flat-token = "0.0.1" 21 | 22 | [lib] 23 | doctest = false 24 | test = false 25 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Grammar framework 2 | 3 | [![Build Status](https://travis-ci.com/LykenSol/grammer.svg?branch=master)](https://travis-ci.com/LykenSol/grammer) 4 | [![Latest Version](https://img.shields.io/crates/v/grammer.svg)](https://crates.io/crates/grammer) 5 | [![Rust Documentation](https://img.shields.io/badge/api-rustdoc-blue.svg)](https://docs.rs/grammer) 6 | 7 | ## License 8 | 9 | Licensed under either of 10 | 11 | * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 12 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 13 | 14 | at your option. 15 | 16 | ### Contribution 17 | 18 | Unless you explicitly state otherwise, any contribution intentionally submitted 19 | for inclusion in this crate by you, as defined in the Apache-2.0 license, shall 20 | be dual licensed as above, without any additional terms or conditions. 21 | -------------------------------------------------------------------------------- /src/context.rs: -------------------------------------------------------------------------------- 1 | use crate::rule::{Fields, Rule}; 2 | use elsa::FrozenVec; 3 | use std::cell::RefCell; 4 | use std::collections::HashMap; 5 | use std::convert::TryInto; 6 | use std::hash::Hash; 7 | use std::rc::Rc; 8 | 9 | /// Context object with global resources for working with grammar, 10 | /// such as interners. 11 | pub struct Context { 12 | interners: Interners, 13 | } 14 | 15 | /// Dispatch helper, to allow implementing interning logic on 16 | /// the type passed to `cx.intern(...)`. 17 | pub trait InternInCx { 18 | type Interned; 19 | 20 | fn intern_in_cx(self, cx: &Context) -> Self::Interned; 21 | } 22 | 23 | impl Context { 24 | pub fn new() -> Self { 25 | Context { 26 | interners: Interners::default(), 27 | } 28 | } 29 | 30 | pub fn intern>(&self, x: T) -> T::Interned { 31 | x.intern_in_cx(self) 32 | } 33 | } 34 | 35 | struct Interner { 36 | // FIXME(Manishearth/elsa#6) switch to `FrozenIndexSet` when available. 37 | map: RefCell, u32>>, 38 | vec: FrozenVec>, 39 | } 40 | 41 | impl Default for Interner { 42 | fn default() -> Self { 43 | Interner { 44 | map: RefCell::new(HashMap::default()), 45 | vec: FrozenVec::new(), 46 | } 47 | } 48 | } 49 | 50 | impl Interner { 51 | fn intern(&self, value: impl AsRef + Into>) -> u32 { 52 | if let Some(&i) = self.map.borrow().get(value.as_ref()) { 53 | return i; 54 | } 55 | let value = value.into(); 56 | let next = self.vec.len().try_into().unwrap(); 57 | self.map.borrow_mut().insert(value.clone(), next); 58 | self.vec.push(value); 59 | next 60 | } 61 | } 62 | 63 | macro_rules! interners { 64 | ($($name:ident => $ty:ty),* $(,)?) => { 65 | #[allow(non_snake_case)] 66 | struct Interners { 67 | $($name: Interner<$ty>),* 68 | } 69 | 70 | impl Default for Interners { 71 | fn default() -> Self { 72 | Interners { 73 | $($name: Default::default()),* 74 | } 75 | } 76 | } 77 | 78 | $( 79 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 80 | pub struct $name(u32); 81 | 82 | impl std::ops::Index<$name> for Context { 83 | type Output = $ty; 84 | 85 | fn index(&self, interned: $name) -> &Self::Output { 86 | &self.interners.$name.vec[interned.0 as usize] 87 | } 88 | } 89 | )* 90 | }; 91 | } 92 | 93 | interners! { 94 | IStr => str, 95 | IRule => Rule, 96 | IFields => Fields, 97 | } 98 | 99 | impl InternInCx for &'_ str { 100 | type Interned = IStr; 101 | 102 | fn intern_in_cx(self, cx: &Context) -> IStr { 103 | IStr(cx.interners.IStr.intern(self)) 104 | } 105 | } 106 | 107 | // FIXME(eddyb) automate this away somehow. 108 | impl AsRef for Rule { 109 | fn as_ref(&self) -> &Self { 110 | self 111 | } 112 | } 113 | 114 | impl InternInCx for Rule { 115 | type Interned = IRule; 116 | 117 | fn intern_in_cx(self, cx: &Context) -> Self::Interned { 118 | IRule(cx.interners.IRule.intern(self)) 119 | } 120 | } 121 | 122 | // FIXME(eddyb) automate this away somehow. 123 | impl AsRef for Fields { 124 | fn as_ref(&self) -> &Self { 125 | self 126 | } 127 | } 128 | 129 | impl InternInCx for Fields { 130 | type Interned = IFields; 131 | 132 | fn intern_in_cx(self, cx: &Context) -> Self::Interned { 133 | IFields(cx.interners.IFields.intern(self)) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/forest.rs: -------------------------------------------------------------------------------- 1 | use crate::high::{type_lambda, ExistsL, PairL}; 2 | use crate::input::{Input, Range}; 3 | use indexing::{self, Container}; 4 | use std::cmp::Ordering; 5 | use std::collections::{BTreeSet, HashMap, HashSet, VecDeque}; 6 | use std::fmt; 7 | use std::hash::{Hash, Hasher}; 8 | use std::io::{self, Write}; 9 | use std::str; 10 | 11 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 12 | pub enum NodeShape { 13 | Opaque, 14 | Alias(T), 15 | Choice(usize), 16 | Opt(T), 17 | Split(T, T), 18 | } 19 | 20 | impl fmt::Display for NodeShape { 21 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 22 | match self { 23 | NodeShape::Opaque => write!(f, "Opaque"), 24 | NodeShape::Alias(inner) => write!(f, "Alias({})", inner), 25 | NodeShape::Choice(count) => write!(f, "Choice({})", count), 26 | NodeShape::Opt(inner) => write!(f, "Opt({})", inner), 27 | NodeShape::Split(left, right) => write!(f, "Split({}, {})", left, right), 28 | } 29 | } 30 | } 31 | 32 | impl NodeShape { 33 | pub fn map(self, mut f: impl FnMut(T) -> U) -> NodeShape { 34 | match self { 35 | NodeShape::Opaque => NodeShape::Opaque, 36 | NodeShape::Alias(inner) => NodeShape::Alias(f(inner)), 37 | NodeShape::Choice(count) => NodeShape::Choice(count), 38 | NodeShape::Opt(inner) => NodeShape::Opt(f(inner)), 39 | NodeShape::Split(left, right) => NodeShape::Split(f(left), f(right)), 40 | } 41 | } 42 | } 43 | 44 | /// Objects capable of providing information about various parts of the grammar 45 | /// (mostly parse nodes and their substructure). 46 | /// 47 | /// For code generation, this doesn't need to be more than an unit struct, as 48 | /// all the information can be hardcoded, but in more dynamic settings, this 49 | /// might contain e.g. a reference to a context. 50 | pub trait GrammarReflector { 51 | type NodeKind: fmt::Debug + Eq + Hash + Copy; 52 | 53 | fn node_shape(&self, kind: Self::NodeKind) -> NodeShape; 54 | fn node_shape_choice_get(&self, kind: Self::NodeKind, i: usize) -> Self::NodeKind; 55 | fn node_desc(&self, kind: Self::NodeKind) -> String; 56 | } 57 | 58 | pub struct Node<'i, G: GrammarReflector> { 59 | pub kind: G::NodeKind, 60 | pub range: Range<'i>, 61 | } 62 | 63 | // FIXME(eddyb) can't derive these on `Node` because that puts bounds on `G`. 64 | impl Copy for Node<'_, G> {} 65 | impl Clone for Node<'_, G> { 66 | fn clone(&self) -> Self { 67 | *self 68 | } 69 | } 70 | impl PartialEq for Node<'_, G> { 71 | fn eq(&self, other: &Self) -> bool { 72 | (self.kind, self.range) == (other.kind, other.range) 73 | } 74 | } 75 | impl Eq for Node<'_, G> {} 76 | impl PartialOrd for Node<'_, G> 77 | where 78 | G::NodeKind: PartialOrd, 79 | { 80 | fn partial_cmp(&self, other: &Self) -> Option { 81 | (self.kind, self.range).partial_cmp(&(other.kind, other.range)) 82 | } 83 | } 84 | impl Ord for Node<'_, G> 85 | where 86 | G::NodeKind: Ord, 87 | { 88 | fn cmp(&self, other: &Self) -> Ordering { 89 | (self.kind, self.range).cmp(&(other.kind, other.range)) 90 | } 91 | } 92 | impl Hash for Node<'_, G> { 93 | fn hash(&self, state: &mut H) { 94 | (self.kind, self.range).hash(state); 95 | } 96 | } 97 | 98 | impl fmt::Debug for Node<'_, G> { 99 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 100 | write!( 101 | f, 102 | "{:?} @ {}..{}", 103 | self.kind, 104 | self.range.start(), 105 | self.range.end() 106 | ) 107 | } 108 | } 109 | 110 | /// A parse forest, in SPPF (Shared Packed Parse Forest) representation. 111 | pub struct ParseForest<'i, G: GrammarReflector, I: Input> { 112 | pub grammar: G, 113 | // HACK(eddyb) `pub(crate)` only for `parser`. 114 | pub(crate) input: Container<'i, I::Container>, 115 | pub(crate) possibilities: HashMap, BTreeSet>, 116 | } 117 | 118 | type_lambda! { 119 | pub type<'i> ParseForestL = ParseForest<'i, G, I>; 120 | pub type<'i> NodeL = Node<'i, G>; 121 | } 122 | 123 | pub type OwnedParseForestAndNode = ExistsL, NodeL>>; 124 | 125 | #[derive(Debug)] 126 | pub struct MoreThanOne; 127 | 128 | impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { 129 | pub fn input(&self, range: Range<'i>) -> &I::Slice { 130 | I::slice(&self.input, range) 131 | } 132 | 133 | pub fn source_info(&self, range: Range<'i>) -> I::SourceInfo { 134 | I::source_info(&self.input, range) 135 | } 136 | 137 | // NOTE(eddyb) this is a private helper and should never be exported. 138 | fn choice_child(&self, node: Node<'i, G>, choice: usize) -> Node<'i, G> { 139 | match self.grammar.node_shape(node.kind) { 140 | NodeShape::Choice(_) => Node { 141 | kind: self.grammar.node_shape_choice_get(node.kind, choice), 142 | range: node.range, 143 | }, 144 | shape => unreachable!( 145 | "choice_child({:?}, {}): non-choice shape {:?}", 146 | node, choice, shape 147 | ), 148 | } 149 | } 150 | 151 | pub fn one_choice(&self, node: Node<'i, G>) -> Result, MoreThanOne> { 152 | let choices = &self.possibilities[&node]; 153 | if choices.len() > 1 { 154 | return Err(MoreThanOne); 155 | } 156 | let &choice = choices.iter().next().unwrap(); 157 | Ok(self.choice_child(node, choice)) 158 | } 159 | 160 | pub fn all_choices<'a>( 161 | &'a self, 162 | node: Node<'i, G>, 163 | ) -> impl Iterator> + Clone + 'a 164 | where 165 | G::NodeKind: 'a, 166 | { 167 | self.possibilities[&node] 168 | .iter() 169 | .cloned() 170 | .map(move |choice| self.choice_child(node, choice)) 171 | } 172 | 173 | // NOTE(eddyb) this is a private helper and should never be exported. 174 | fn split_children(&self, node: Node<'i, G>, split: usize) -> (Node<'i, G>, Node<'i, G>) { 175 | match self.grammar.node_shape(node.kind) { 176 | NodeShape::Split(left_kind, right_kind) => { 177 | let (left, right, _) = node.range.split_at(split); 178 | ( 179 | Node { 180 | kind: left_kind, 181 | range: Range(left), 182 | }, 183 | Node { 184 | kind: right_kind, 185 | range: Range(right), 186 | }, 187 | ) 188 | } 189 | shape => unreachable!( 190 | "split_children({:?}, {}): non-split shape {:?}", 191 | node, split, shape 192 | ), 193 | } 194 | } 195 | 196 | pub fn one_split(&self, node: Node<'i, G>) -> Result<(Node<'i, G>, Node<'i, G>), MoreThanOne> { 197 | let splits = &self.possibilities[&node]; 198 | if splits.len() > 1 { 199 | return Err(MoreThanOne); 200 | } 201 | let &split = splits.iter().next().unwrap(); 202 | Ok(self.split_children(node, split)) 203 | } 204 | 205 | pub fn all_splits<'a>( 206 | &'a self, 207 | node: Node<'i, G>, 208 | ) -> impl Iterator, Node<'i, G>)> + Clone + 'a 209 | where 210 | G::NodeKind: 'a, 211 | { 212 | self.possibilities[&node] 213 | .iter() 214 | .cloned() 215 | .map(move |split| self.split_children(node, split)) 216 | } 217 | 218 | pub fn unpack_alias(&self, node: Node<'i, G>) -> Node<'i, G> { 219 | match self.grammar.node_shape(node.kind) { 220 | NodeShape::Alias(inner) => Node { 221 | kind: inner, 222 | range: node.range, 223 | }, 224 | shape => unreachable!("unpack_alias({:?}): non-alias shape {:?}", node, shape), 225 | } 226 | } 227 | 228 | pub fn unpack_opt(&self, node: Node<'i, G>) -> Option> { 229 | match self.grammar.node_shape(node.kind) { 230 | NodeShape::Opt(inner) => { 231 | if node.range.is_empty() { 232 | None 233 | } else { 234 | Some(Node { 235 | kind: inner, 236 | range: node.range, 237 | }) 238 | } 239 | } 240 | shape => unreachable!("unpack_opt({:?}): non-opt shape {:?}", node, shape), 241 | } 242 | } 243 | 244 | pub fn dump_graphviz(&self, out: &mut dyn Write) -> io::Result<()> { 245 | writeln!(out, "digraph forest {{")?; 246 | let mut queue: VecDeque<_> = self.possibilities.keys().cloned().collect(); 247 | let mut seen: HashSet<_> = queue.iter().cloned().collect(); 248 | let mut p = 0; 249 | let node_name = |Node { kind, range }| { 250 | format!( 251 | "{} @ {:?}", 252 | self.grammar.node_desc(kind), 253 | self.source_info(range) 254 | ) 255 | }; 256 | while let Some(source) = queue.pop_front() { 257 | let source_name = node_name(source); 258 | writeln!(out, " {:?} [shape=box]", source_name)?; 259 | let mut add_children = |children: &[(&str, Node<'i, G>)]| -> io::Result<()> { 260 | writeln!(out, r#" p{} [label="" shape=point]"#, p)?; 261 | writeln!(out, " {:?} -> p{}:n", source_name, p)?; 262 | for &(port, child) in children { 263 | writeln!( 264 | out, 265 | " p{}:{} -> {:?}:n [dir=none]", 266 | p, 267 | port, 268 | node_name(child) 269 | )?; 270 | if seen.insert(child) { 271 | queue.push_back(child); 272 | } 273 | } 274 | p += 1; 275 | Ok(()) 276 | }; 277 | match self.grammar.node_shape(source.kind) { 278 | NodeShape::Opaque => {} 279 | 280 | NodeShape::Alias(_) => { 281 | add_children(&[("s", self.unpack_alias(source))])?; 282 | } 283 | 284 | NodeShape::Opt(_) => { 285 | if let Some(child) = self.unpack_opt(source) { 286 | add_children(&[("s", child)])?; 287 | } 288 | } 289 | 290 | NodeShape::Choice(_) => { 291 | for child in self.all_choices(source) { 292 | add_children(&[("s", child)])?; 293 | } 294 | } 295 | 296 | NodeShape::Split(..) => { 297 | for (left, right) in self.all_splits(source) { 298 | add_children(&[("sw", left), ("se", right)])?; 299 | } 300 | } 301 | } 302 | } 303 | writeln!(out, "}}") 304 | } 305 | } 306 | 307 | pub mod typed { 308 | use super::{GrammarReflector, MoreThanOne, Node, ParseForest}; 309 | use crate::input::Input; 310 | 311 | #[derive(Clone)] 312 | enum Void {} 313 | 314 | // HACK(eddyb) this type uses `T` but is also uninhabited. 315 | type PhantomVoid = (Void, std::marker::PhantomData); 316 | 317 | pub trait Shaped { 318 | type Shape: Shape; 319 | 320 | // FIXME(eddyb) this is always `[usize; Self::Shape::STATE_LEN]`. 321 | // (but that doesn't work yet) 322 | type State: Default + AsMut<[usize]>; 323 | } 324 | 325 | pub trait FromShapeFields<'a, 'i, G: GrammarReflector, I: Input>: Sized { 326 | type Output; 327 | 328 | // FIXME(eddyb) use an array length const here instead when that works. 329 | type Fields: Default + AsMut<[Option>]>; 330 | 331 | fn from_shape_fields( 332 | forest: &'a ParseForest<'i, G, I>, 333 | fields: Self::Fields, 334 | ) -> Self::Output; 335 | 336 | fn one( 337 | forest: &'a ParseForest<'i, G, I>, 338 | node: Node<'i, G>, 339 | ) -> Result 340 | where 341 | Self: Shaped, 342 | { 343 | let mut state = Self::State::default(); 344 | let state = state.as_mut(); 345 | assert_eq!(state.len(), Self::Shape::STATE_LEN); 346 | 347 | Self::Shape::init(forest, node, state); 348 | 349 | let mut fields = Self::Fields::default(); 350 | Self::Shape::read(forest, node, state, fields.as_mut()); 351 | 352 | if Self::Shape::step(forest, node, state) { 353 | Err(MoreThanOne) 354 | } else { 355 | Ok(Self::from_shape_fields(forest, fields)) 356 | } 357 | } 358 | 359 | fn all( 360 | forest: &'a ParseForest<'i, G, I>, 361 | node: Node<'i, G>, 362 | ) -> ShapedAllIter<'a, 'i, G, I, Self> 363 | where 364 | Self: Shaped, 365 | { 366 | let mut state = Self::State::default(); 367 | assert_eq!(state.as_mut().len(), Self::Shape::STATE_LEN); 368 | 369 | Self::Shape::init(forest, node, state.as_mut()); 370 | 371 | ShapedAllIter { 372 | forest, 373 | node, 374 | state: Some(state), 375 | } 376 | } 377 | } 378 | 379 | pub struct ShapedAllIter<'a, 'i, G: GrammarReflector, I: Input, T: Shaped> { 380 | forest: &'a ParseForest<'i, G, I>, 381 | node: Node<'i, G>, 382 | state: Option, 383 | } 384 | 385 | impl<'a, 'i, G: GrammarReflector, I: Input, T: Shaped> Iterator for ShapedAllIter<'a, 'i, G, I, T> 386 | where 387 | T: FromShapeFields<'a, 'i, G, I>, 388 | { 389 | type Item = T::Output; 390 | 391 | fn next(&mut self) -> Option { 392 | let state = self.state.as_mut()?.as_mut(); 393 | let mut fields = T::Fields::default(); 394 | T::Shape::read(self.forest, self.node, state, fields.as_mut()); 395 | if !T::Shape::step(self.forest, self.node, state) { 396 | self.state.take(); 397 | } 398 | Some(T::from_shape_fields(self.forest, fields)) 399 | } 400 | } 401 | 402 | impl Shaped for () { 403 | type Shape = shape!(_); 404 | type State = [usize; ::STATE_LEN]; 405 | } 406 | 407 | impl<'i, G: GrammarReflector, I: Input> FromShapeFields<'_, 'i, G, I> for () { 408 | type Output = (); 409 | type Fields = [Option>; 0]; 410 | 411 | fn from_shape_fields(_: &ParseForest<'i, G, I>, []: Self::Fields) {} 412 | } 413 | 414 | impl<'a, 'i, G: GrammarReflector, I: Input, T> FromShapeFields<'a, 'i, G, I> for Option 415 | where 416 | T: FromShapeFields<'a, 'i, G, I, Fields = [Option>; 1]>, 417 | { 418 | type Output = Option; 419 | type Fields = [Option>; 1]; 420 | 421 | fn from_shape_fields( 422 | forest: &'a ParseForest<'i, G, I>, 423 | [node]: Self::Fields, 424 | ) -> Option { 425 | Some(T::from_shape_fields(forest, [Some(node?)])) 426 | } 427 | } 428 | 429 | pub struct WithShape>(PhantomVoid<(T, A, S)>); 430 | 431 | impl> Shaped for WithShape { 432 | type Shape = A; 433 | type State = S; 434 | } 435 | 436 | impl<'a, 'i, G: GrammarReflector, I: Input, T, A, S> FromShapeFields<'a, 'i, G, I> 437 | for WithShape 438 | where 439 | T: FromShapeFields<'a, 'i, G, I>, 440 | A: Shape, 441 | S: Default + AsMut<[usize]>, 442 | { 443 | type Output = T::Output; 444 | type Fields = T::Fields; 445 | 446 | fn from_shape_fields(forest: &'a ParseForest<'i, G, I>, fields: T::Fields) -> T::Output { 447 | T::from_shape_fields(forest, fields) 448 | } 449 | } 450 | 451 | pub trait Shape { 452 | const STATE_LEN: usize; 453 | 454 | fn init<'i, G: GrammarReflector, I: Input>( 455 | forest: &ParseForest<'i, G, I>, 456 | node: Node<'i, G>, 457 | state: &mut [usize], 458 | ); 459 | fn read<'i, G: GrammarReflector, I: Input>( 460 | forest: &ParseForest<'i, G, I>, 461 | node: Node<'i, G>, 462 | state: &[usize], 463 | fields: &mut [Option>], 464 | ); 465 | fn step<'i, G: GrammarReflector, I: Input>( 466 | forest: &ParseForest<'i, G, I>, 467 | node: Node<'i, G>, 468 | state: &mut [usize], 469 | ) -> bool; 470 | } 471 | 472 | pub struct Leaf(PhantomVoid<()>); 473 | 474 | impl Shape for Leaf { 475 | const STATE_LEN: usize = 0; 476 | 477 | fn init<'i, G: GrammarReflector, I: Input>( 478 | _: &ParseForest<'i, G, I>, 479 | _: Node<'i, G>, 480 | _: &mut [usize], 481 | ) { 482 | } 483 | fn read<'i, G: GrammarReflector, I: Input>( 484 | _: &ParseForest<'i, G, I>, 485 | _: Node<'i, G>, 486 | _: &[usize], 487 | _: &mut [Option>], 488 | ) { 489 | } 490 | fn step<'i, G: GrammarReflector, I: Input>( 491 | _: &ParseForest<'i, G, I>, 492 | _: Node<'i, G>, 493 | _: &mut [usize], 494 | ) -> bool { 495 | false 496 | } 497 | } 498 | 499 | // FIXME(eddyb) this should be using const generics. 500 | pub struct Field>(PhantomVoid); 501 | 502 | impl> Shape for Field { 503 | const STATE_LEN: usize = 0; 504 | 505 | fn init<'i, G: GrammarReflector, I: Input>( 506 | _: &ParseForest<'i, G, I>, 507 | _: Node<'i, G>, 508 | _: &mut [usize], 509 | ) { 510 | } 511 | fn read<'i, G: GrammarReflector, I: Input>( 512 | _: &ParseForest<'i, G, I>, 513 | node: Node<'i, G>, 514 | _: &[usize], 515 | fields: &mut [Option>], 516 | ) { 517 | fields[X::default().as_ref().len()] = Some(node); 518 | } 519 | fn step<'i, G: GrammarReflector, I: Input>( 520 | _: &ParseForest<'i, G, I>, 521 | _: Node<'i, G>, 522 | _: &mut [usize], 523 | ) -> bool { 524 | false 525 | } 526 | } 527 | 528 | pub struct Split(PhantomVoid<(Left, Right)>); 529 | 530 | impl Shape for Split { 531 | const STATE_LEN: usize = 1 + Left::STATE_LEN + Right::STATE_LEN; 532 | 533 | fn init<'i, G: GrammarReflector, I: Input>( 534 | forest: &ParseForest<'i, G, I>, 535 | node: Node<'i, G>, 536 | state: &mut [usize], 537 | ) { 538 | let (state_split, state) = state.split_at_mut(1); 539 | let state_split = &mut state_split[0]; 540 | let (state_left, state_right) = state.split_at_mut(Left::STATE_LEN); 541 | 542 | let &split = forest.possibilities[&node].iter().next().unwrap(); 543 | 544 | let (left, right) = forest.split_children(node, split); 545 | 546 | *state_split = split; 547 | Left::init(forest, left, state_left); 548 | Right::init(forest, right, state_right); 549 | } 550 | fn read<'i, G: GrammarReflector, I: Input>( 551 | forest: &ParseForest<'i, G, I>, 552 | node: Node<'i, G>, 553 | state: &[usize], 554 | fields: &mut [Option>], 555 | ) { 556 | let (state_split, state) = state.split_at(1); 557 | let state_split = state_split[0]; 558 | let (state_left, state_right) = state.split_at(Left::STATE_LEN); 559 | 560 | let (left, right) = forest.split_children(node, state_split); 561 | Left::read(forest, left, state_left, fields); 562 | Right::read(forest, right, state_right, fields); 563 | } 564 | fn step<'i, G: GrammarReflector, I: Input>( 565 | forest: &ParseForest<'i, G, I>, 566 | node: Node<'i, G>, 567 | state: &mut [usize], 568 | ) -> bool { 569 | let (state_split, state) = state.split_at_mut(1); 570 | let state_split = &mut state_split[0]; 571 | let (state_left, state_right) = state.split_at_mut(Left::STATE_LEN); 572 | 573 | let (left, right) = forest.split_children(node, *state_split); 574 | 575 | Right::step(forest, right, state_right) 576 | || Left::step(forest, left, state_left) && { 577 | Right::init(forest, right, state_right); 578 | true 579 | } 580 | || ({ 581 | use std::ops::Bound::*; 582 | forest.possibilities[&node] 583 | .range((Excluded(*state_split), Unbounded)) 584 | .next() 585 | .cloned() 586 | }) 587 | .map(|split| { 588 | *state_split = split; 589 | 590 | let (left, right) = forest.split_children(node, split); 591 | 592 | Left::init(forest, left, state_left); 593 | Right::init(forest, right, state_right); 594 | }) 595 | .is_some() 596 | } 597 | } 598 | 599 | pub struct Choice(PhantomVoid<(At, Cases)>); 600 | 601 | impl Shape for Choice { 602 | const STATE_LEN: usize = At::STATE_LEN + Cases::STATE_LEN; 603 | 604 | fn init<'i, G: GrammarReflector, I: Input>( 605 | forest: &ParseForest<'i, G, I>, 606 | node: Node<'i, G>, 607 | state: &mut [usize], 608 | ) { 609 | let (state_at, state_cases) = state.split_at_mut(At::STATE_LEN); 610 | 611 | let &choice = forest.possibilities[&node].iter().next().unwrap(); 612 | 613 | let child = forest.choice_child(node, choice); 614 | 615 | state_cases[0] = choice; 616 | At::init(forest, child, state_at); 617 | Cases::init(forest, child, state_cases); 618 | } 619 | fn read<'i, G: GrammarReflector, I: Input>( 620 | forest: &ParseForest<'i, G, I>, 621 | node: Node<'i, G>, 622 | state: &[usize], 623 | fields: &mut [Option>], 624 | ) { 625 | let (state_at, state_cases) = state.split_at(At::STATE_LEN); 626 | 627 | let child = forest.choice_child(node, state_cases[0]); 628 | 629 | At::read(forest, child, state_at, fields); 630 | Cases::read(forest, child, state_cases, fields); 631 | } 632 | fn step<'i, G: GrammarReflector, I: Input>( 633 | forest: &ParseForest<'i, G, I>, 634 | node: Node<'i, G>, 635 | state: &mut [usize], 636 | ) -> bool { 637 | let (state_at, state_cases) = state.split_at_mut(At::STATE_LEN); 638 | 639 | let child = forest.choice_child(node, state_cases[0]); 640 | 641 | At::step(forest, child, state_at) 642 | || Cases::step(forest, child, state_cases) && { 643 | At::init(forest, child, state_at); 644 | true 645 | } 646 | || ({ 647 | use std::ops::Bound::*; 648 | forest.possibilities[&node] 649 | .range((Excluded(state_cases[0]), Unbounded)) 650 | .next() 651 | .cloned() 652 | }) 653 | .map(|choice| { 654 | state_cases[0] = choice; 655 | 656 | let child = forest.choice_child(node, choice); 657 | 658 | At::init(forest, child, state_at); 659 | Cases::init(forest, child, state_cases); 660 | }) 661 | .is_some() 662 | } 663 | } 664 | 665 | pub trait CaseList: Shape { 666 | const LEN: usize; 667 | } 668 | 669 | pub struct CaseAppend(PhantomVoid<(Prefix, Last)>); 670 | 671 | impl CaseList for CaseAppend { 672 | const LEN: usize = Prefix::LEN + 1; 673 | } 674 | 675 | impl Shape for CaseAppend { 676 | const STATE_LEN: usize = { 677 | // HACK(eddyb) this is just `max(1 + Last::STATE_LEN, Prefix::STATE_LEN)`. 678 | let a = 1 + Last::STATE_LEN; 679 | let b = Prefix::STATE_LEN; 680 | 681 | let a_gt_b_mask = -((a > b) as isize) as usize; 682 | (a_gt_b_mask & a) | (!a_gt_b_mask & b) 683 | }; 684 | 685 | fn init<'i, G: GrammarReflector, I: Input>( 686 | forest: &ParseForest<'i, G, I>, 687 | node: Node<'i, G>, 688 | state: &mut [usize], 689 | ) { 690 | let (state_choice, state_last) = state.split_at_mut(1); 691 | let state_choice = state_choice[0]; 692 | 693 | if state_choice != Prefix::LEN { 694 | Prefix::init(forest, node, state); 695 | } else { 696 | Last::init(forest, node, state_last); 697 | } 698 | } 699 | fn read<'i, G: GrammarReflector, I: Input>( 700 | forest: &ParseForest<'i, G, I>, 701 | node: Node<'i, G>, 702 | state: &[usize], 703 | fields: &mut [Option>], 704 | ) { 705 | let (state_choice, state_last) = state.split_at(1); 706 | let state_choice = state_choice[0]; 707 | 708 | if state_choice != Prefix::LEN { 709 | Prefix::read(forest, node, state, fields); 710 | } else { 711 | Last::read(forest, node, state_last, fields); 712 | } 713 | } 714 | fn step<'i, G: GrammarReflector, I: Input>( 715 | forest: &ParseForest<'i, G, I>, 716 | node: Node<'i, G>, 717 | state: &mut [usize], 718 | ) -> bool { 719 | let (state_choice, state_last) = state.split_at_mut(1); 720 | let state_choice = state_choice[0]; 721 | 722 | if state_choice != Prefix::LEN { 723 | Prefix::step(forest, node, state) 724 | } else { 725 | Last::step(forest, node, state_last) 726 | } 727 | } 728 | } 729 | 730 | pub struct CaseEmpty(PhantomVoid<()>); 731 | 732 | impl CaseList for CaseEmpty { 733 | const LEN: usize = 0; 734 | } 735 | 736 | impl Shape for CaseEmpty { 737 | const STATE_LEN: usize = 0; 738 | 739 | fn init<'i, G: GrammarReflector, I: Input>( 740 | _: &ParseForest<'i, G, I>, 741 | _: Node<'i, G>, 742 | _: &mut [usize], 743 | ) { 744 | unreachable!() 745 | } 746 | fn read<'i, G: GrammarReflector, I: Input>( 747 | _: &ParseForest<'i, G, I>, 748 | _: Node<'i, G>, 749 | _: &[usize], 750 | _: &mut [Option>], 751 | ) { 752 | unreachable!() 753 | } 754 | fn step<'i, G: GrammarReflector, I: Input>( 755 | _: &ParseForest<'i, G, I>, 756 | _: Node<'i, G>, 757 | _: &mut [usize], 758 | ) -> bool { 759 | unreachable!() 760 | } 761 | } 762 | 763 | pub struct Opt(PhantomVoid); 764 | 765 | impl Shape for Opt { 766 | const STATE_LEN: usize = A::STATE_LEN; 767 | 768 | fn init<'i, G: GrammarReflector, I: Input>( 769 | forest: &ParseForest<'i, G, I>, 770 | node: Node<'i, G>, 771 | state: &mut [usize], 772 | ) { 773 | if let Some(child) = forest.unpack_opt(node) { 774 | A::init(forest, child, state); 775 | } 776 | } 777 | fn read<'i, G: GrammarReflector, I: Input>( 778 | forest: &ParseForest<'i, G, I>, 779 | node: Node<'i, G>, 780 | state: &[usize], 781 | fields: &mut [Option>], 782 | ) { 783 | if let Some(child) = forest.unpack_opt(node) { 784 | A::read(forest, child, state, fields); 785 | } 786 | } 787 | fn step<'i, G: GrammarReflector, I: Input>( 788 | forest: &ParseForest<'i, G, I>, 789 | node: Node<'i, G>, 790 | state: &mut [usize], 791 | ) -> bool { 792 | match forest.unpack_opt(node) { 793 | Some(child) => A::step(forest, child, state), 794 | None => false, 795 | } 796 | } 797 | } 798 | 799 | // HACK(eddyb) work around `macro_rules` not being `use`-able. 800 | pub use crate::__forest_typed_shape as shape; 801 | 802 | #[macro_export] 803 | macro_rules! __forest_typed_shape { 804 | (_) => { 805 | $crate::forest::typed::Leaf 806 | }; 807 | ($i:literal) => { 808 | $crate::forest::typed::Field<[(); $i]> 809 | }; 810 | (($l_shape:tt $r_shape:tt)) => { 811 | $crate::forest::typed::Split< 812 | $crate::forest::typed::shape!($l_shape), 813 | $crate::forest::typed::shape!($r_shape), 814 | > 815 | }; 816 | ({ $at_shape:tt @ $($shape:tt)* }) => { 817 | $crate::forest::typed::Choice< 818 | $crate::forest::typed::shape!($at_shape), 819 | $crate::forest::typed::shape!(cases { $($shape)* } rev {}), 820 | > 821 | }; 822 | // HACK(eddyb) have to reverse the tt list to produce a "snoc-list" 823 | // instead of "cons-list". 824 | (cases { $shape0:tt $($shape:tt)* } rev { $($shape_rev:tt)* }) => { 825 | $crate::forest::typed::shape!(cases { $($shape)* } rev { $shape0 $($shape_rev)* }) 826 | }; 827 | (cases {} rev { $shape_last:tt $($shape:tt)* }) => { 828 | $crate::forest::typed::CaseAppend< 829 | $crate::forest::typed::shape!(cases {} rev { $($shape)* }), 830 | $crate::forest::typed::shape!($shape_last), 831 | > 832 | }; 833 | (cases {} rev {}) => { $crate::forest::typed::CaseEmpty }; 834 | ([$shape:tt]) => { 835 | $crate::forest::typed::Opt< 836 | $crate::forest::typed::shape!($shape), 837 | > 838 | } 839 | } 840 | } 841 | -------------------------------------------------------------------------------- /src/high.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for emulating HKTs (over lifetimes) in Rust. 2 | 3 | use std::mem; 4 | use std::ops::{Deref, DerefMut}; 5 | 6 | /// Type lambda application, with a lifetime. 7 | pub trait ApplyL<'a> { 8 | type Out; 9 | } 10 | 11 | /// Type lambda taking a lifetime, i.e. `Lifetime -> Type`. 12 | pub trait LambdaL: for<'a> ApplyL<'a> {} 13 | 14 | impl ApplyL<'a>> LambdaL for T {} 15 | 16 | // HACK(eddyb) work around `macro_rules` not being `use`-able. 17 | pub use crate::__high_type_lambda as type_lambda; 18 | 19 | /// Define a new "type lambda" (over a lifetime). 20 | /// 21 | /// For example, `type_lambda!(type<'a> X = Y>;)` defines 22 | /// a `struct X {...}` that implements `ApplyL`, such that 23 | /// `for<'a> >::Out = Y>` holds. 24 | #[macro_export] 25 | macro_rules! __high_type_lambda { 26 | ($($vis:vis type<$lt:lifetime> $name:ident $(<$($T:ident $(: $bound:path)*),*>)* = $ty:ty;)+) => { 27 | $($vis struct $name $(<$($T $(: $bound)*),*>)* { 28 | _marker: ::std::marker::PhantomData<($($($T),*)*)>, 29 | } 30 | impl<$lt, $($($T $(: $bound)*),*)*> $crate::high::ApplyL<$lt> 31 | for $name $(<$($T),*>)* 32 | { 33 | type Out = $ty; 34 | })+ 35 | } 36 | } 37 | 38 | type_lambda! { 39 | pub type<'a> PairL = 40 | (>::Out, >::Out); 41 | } 42 | 43 | // HACK(eddyb) work around projection limitations with a newtype 44 | // FIXME(#52812) replace with `&'a >::Out` 45 | pub struct RefApplyL<'a, 'b, T: LambdaL>(&'a >::Out); 46 | 47 | impl<'a, T: LambdaL> Deref for RefApplyL<'_, 'a, T> { 48 | type Target = >::Out; 49 | fn deref(&self) -> &Self::Target { 50 | self.0 51 | } 52 | } 53 | 54 | // HACK(eddyb) work around projection limitations with a newtype 55 | // FIXME(#52812) replace with `&'a mut >::Out` 56 | pub struct RefMutApplyL<'a, 'b, T: LambdaL>(&'a mut >::Out); 57 | 58 | impl<'a, T: LambdaL> Deref for RefMutApplyL<'_, 'a, T> { 59 | type Target = >::Out; 60 | fn deref(&self) -> &Self::Target { 61 | self.0 62 | } 63 | } 64 | 65 | impl DerefMut for RefMutApplyL<'_, '_, T> { 66 | fn deref_mut(&mut self) -> &mut Self::Target { 67 | self.0 68 | } 69 | } 70 | 71 | /// Proof token for erasable lifetimes (soundly replaceable with existentials). 72 | /// That is, the lifetime is not used in references that borrow outside 73 | /// data, but rather only self-contained (e.g. `indexing` or `owning_ref`). 74 | #[derive(Copy, Clone)] 75 | pub struct ErasableL<'a> { 76 | _marker: ::std::marker::PhantomData<&'a mut &'a ()>, 77 | } 78 | 79 | impl ErasableL<'_> { 80 | /// Trivial proof that `'static` is erasable (it's always valid). 81 | pub const STATIC: ErasableL<'static> = ErasableL { 82 | _marker: ::std::marker::PhantomData, 83 | }; 84 | 85 | /// Enter an `indexing::scope`, where the closure also receives a proof that 86 | /// the generative lifetime is erasable (it doesn't come from a borrow). 87 | pub fn indexing_scope( 88 | a: A, 89 | f: impl for<'id> FnOnce(ErasableL<'id>, ::indexing::Container<'id, A>) -> R, 90 | ) -> R { 91 | ::indexing::scope(a, |container| { 92 | f( 93 | ErasableL { 94 | _marker: ::std::marker::PhantomData, 95 | }, 96 | container, 97 | ) 98 | }) 99 | } 100 | } 101 | 102 | /// Existential over a lifetime, i.e. `exists 'a.T('a)`. 103 | pub struct ExistsL(>::Out); 104 | 105 | impl ExistsL { 106 | /// Erase the lifetime `'a` from the value's type and wrap it in `ExistsL`. 107 | /// This requires a proof that `'a` is erasable at all (see `ErasableL`). 108 | /// To access the value later, use `unpack`, `unpack_ref` or `unpack_mut`. 109 | pub fn pack<'a>(_: ErasableL<'a>, value: >::Out) -> Self { 110 | let erased = unsafe { mem::transmute_copy(&value) }; 111 | mem::forget(value); 112 | ExistsL(erased) 113 | } 114 | 115 | /// Provide owned access to the value, with the original lifetime replaced 116 | /// by a generative lifetime, so that the closure can't assume equality 117 | /// to any other specific lifetime (thanks to lifetime parametricity). 118 | pub fn unpack( 119 | self, 120 | f: impl for<'a> FnOnce(ErasableL<'a>, >::Out) -> R, 121 | ) -> R { 122 | let skolem = unsafe { mem::transmute_copy(&self.0) }; 123 | mem::forget(self); 124 | f( 125 | ErasableL { 126 | _marker: ::std::marker::PhantomData, 127 | }, 128 | skolem, 129 | ) 130 | } 131 | 132 | /// Provide shared access to the value, with the original lifetime replaced 133 | /// by a generative lifetime, so that the closure can't assume equality 134 | /// to any other specific lifetime (thanks to lifetime parametricity). 135 | pub fn unpack_ref( 136 | &self, 137 | f: impl for<'a, 'b> FnOnce(ErasableL<'b>, RefApplyL<'a, 'b, T>) -> R, 138 | ) -> R { 139 | f( 140 | ErasableL { 141 | _marker: ::std::marker::PhantomData, 142 | }, 143 | RefApplyL(unsafe { &*(&self.0 as *const _ as *const _) }), 144 | ) 145 | } 146 | 147 | /// Provide mutable access to the value, with the original lifetime replaced 148 | /// by a generative lifetime, so that the closure can't assume equality 149 | /// to any other specific lifetime (thanks to lifetime parametricity). 150 | pub fn unpack_mut( 151 | &mut self, 152 | f: impl for<'a, 'b> FnOnce(ErasableL<'b>, RefMutApplyL<'a, 'b, T>) -> R, 153 | ) -> R { 154 | f( 155 | ErasableL { 156 | _marker: ::std::marker::PhantomData, 157 | }, 158 | RefMutApplyL(unsafe { &mut *(&mut self.0 as *mut _ as *mut _) }), 159 | ) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/indexing_str.rs: -------------------------------------------------------------------------------- 1 | //! String slice support for the `indexing` crate. 2 | // FIXME(eddyb) ensure `indexing::Range` can't break 3 | // `str`'s UTF-8 requirement, without overhead 4 | 5 | use indexing::container_traits::{Contiguous, Trustworthy}; 6 | use indexing::{Container, Range}; 7 | use std::ops::Deref; 8 | 9 | pub struct Str(str); 10 | 11 | impl<'a> From<&'a str> for &'a Str { 12 | fn from(s: &'a str) -> Self { 13 | unsafe { &*(s as *const str as *const Str) } 14 | } 15 | } 16 | 17 | impl Deref for Str { 18 | type Target = str; 19 | fn deref(&self) -> &str { 20 | &self.0 21 | } 22 | } 23 | 24 | unsafe impl Trustworthy for Str { 25 | type Item = u8; 26 | fn base_len(&self) -> usize { 27 | self.0.len() 28 | } 29 | } 30 | 31 | unsafe impl Contiguous for Str { 32 | fn begin(&self) -> *const Self::Item { 33 | self.0.as_ptr() 34 | } 35 | fn end(&self) -> *const Self::Item { 36 | unsafe { self.begin().add(self.0.len()) } 37 | } 38 | fn as_slice(&self) -> &[Self::Item] { 39 | self.0.as_bytes() 40 | } 41 | } 42 | 43 | impl Str { 44 | pub fn slice<'a, 'b, 'i>(input: &'b Container<'i, &'a Self>, range: Range<'i>) -> &'b Self { 45 | // NOTE(eddyb) following code is copied from `str::is_char_boundary`: 46 | let valid_utf8_start = |bytes: &[u8]| { 47 | match bytes.first() { 48 | None => true, 49 | // This is bit magic equivalent to: b < 128 || b >= 192 50 | Some(&b) => (b as i8) >= -0x40, 51 | } 52 | }; 53 | 54 | let (_, after) = input.split_around(range); 55 | let (bytes, bytes_after) = (&input[range], &input[after]); 56 | 57 | // HACK(eddyb) ensure the range is still a valid `str` 58 | assert!(valid_utf8_start(bytes)); 59 | assert!(valid_utf8_start(bytes_after)); 60 | 61 | unsafe { &*(bytes as *const [u8] as *const Str) } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/input.rs: -------------------------------------------------------------------------------- 1 | use crate::indexing_str; 2 | use indexing::container_traits::Trustworthy; 3 | use indexing::{self, Container, Index, Unknown}; 4 | use std::cmp::Ordering; 5 | use std::fmt; 6 | use std::hash::{Hash, Hasher}; 7 | use std::ops::{self, Deref, RangeInclusive}; 8 | use std::str; 9 | 10 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] 11 | pub struct Range<'i>(pub indexing::Range<'i>); 12 | 13 | impl<'i> Deref for Range<'i> { 14 | type Target = indexing::Range<'i>; 15 | fn deref(&self) -> &Self::Target { 16 | &self.0 17 | } 18 | } 19 | 20 | impl PartialOrd for Range<'_> { 21 | fn partial_cmp(&self, other: &Self) -> Option { 22 | (self.start(), self.end()).partial_cmp(&(other.start(), other.end())) 23 | } 24 | } 25 | 26 | impl Ord for Range<'_> { 27 | fn cmp(&self, other: &Self) -> Ordering { 28 | (self.start(), self.end()).cmp(&(other.start(), other.end())) 29 | } 30 | } 31 | 32 | impl Hash for Range<'_> { 33 | fn hash(&self, state: &mut H) { 34 | (self.start(), self.end()).hash(state); 35 | } 36 | } 37 | 38 | impl Range<'_> { 39 | pub fn subtract_suffix(self, other: Self) -> Self { 40 | assert_eq!(self.end(), other.end()); 41 | Range(self.split_at(other.start() - self.start()).0) 42 | } 43 | } 44 | 45 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] 46 | pub struct LineColumn { 47 | pub line: usize, 48 | pub column: usize, 49 | } 50 | 51 | impl fmt::Debug for LineColumn { 52 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 53 | write!(f, "{}:{}", 1 + self.line, 1 + self.column) 54 | } 55 | } 56 | 57 | impl LineColumn { 58 | fn count(prefix: &str) -> Self { 59 | let (line, column) = prefix 60 | .split('\n') 61 | .enumerate() 62 | .last() 63 | .map_or((0, 0), |(i, s)| (i, s.chars().count())); 64 | LineColumn { line, column } 65 | } 66 | } 67 | 68 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 69 | pub struct LineColumnRange { 70 | pub start: LineColumn, 71 | pub end: LineColumn, 72 | } 73 | 74 | impl fmt::Debug for LineColumnRange { 75 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 76 | write!(f, "{:?}-{:?}", self.start, self.end) 77 | } 78 | } 79 | 80 | pub trait Input: Sized { 81 | type Container: Trustworthy; 82 | type Slice: ?Sized; 83 | type SourceInfo: fmt::Debug; 84 | // FIXME(eddyb) remove - replace with `SourceInfo` for the affected range 85 | type SourceInfoPoint: fmt::Debug; 86 | fn to_container(self) -> Self::Container; 87 | fn slice<'a, 'i>( 88 | input: &'a Container<'i, Self::Container>, 89 | range: Range<'i>, 90 | ) -> &'a Self::Slice; 91 | fn source_info<'i>( 92 | input: &Container<'i, Self::Container>, 93 | range: Range<'i>, 94 | ) -> Self::SourceInfo; 95 | fn source_info_point<'i>( 96 | input: &Container<'i, Self::Container>, 97 | index: Index<'i, Unknown>, 98 | ) -> Self::SourceInfoPoint; 99 | } 100 | 101 | impl Input for &[T] { 102 | type Container = Self; 103 | type Slice = [T]; 104 | type SourceInfo = ops::Range; 105 | type SourceInfoPoint = usize; 106 | fn to_container(self) -> Self::Container { 107 | self 108 | } 109 | fn slice<'b, 'i>( 110 | input: &'b Container<'i, Self::Container>, 111 | range: Range<'i>, 112 | ) -> &'b Self::Slice { 113 | &input[range.0] 114 | } 115 | fn source_info<'i>(_: &Container<'i, Self::Container>, range: Range<'i>) -> Self::SourceInfo { 116 | range.as_range() 117 | } 118 | fn source_info_point<'i>( 119 | _: &Container<'i, Self::Container>, 120 | index: Index<'i, Unknown>, 121 | ) -> Self::SourceInfoPoint { 122 | index.integer() 123 | } 124 | } 125 | 126 | impl<'a> Input for &'a str { 127 | type Container = &'a indexing_str::Str; 128 | type Slice = str; 129 | type SourceInfo = LineColumnRange; 130 | type SourceInfoPoint = LineColumn; 131 | fn to_container(self) -> Self::Container { 132 | self.into() 133 | } 134 | fn slice<'b, 'i>( 135 | input: &'b Container<'i, Self::Container>, 136 | range: Range<'i>, 137 | ) -> &'b Self::Slice { 138 | indexing_str::Str::slice(input, range.0) 139 | } 140 | fn source_info<'i>( 141 | input: &Container<'i, Self::Container>, 142 | range: Range<'i>, 143 | ) -> Self::SourceInfo { 144 | let start = Self::source_info_point(input, range.first()); 145 | // HACK(eddyb) add up `LineColumn`s to avoid counting twice. 146 | // Ideally we'd cache around a line map, like rustc's `SourceMap`. 147 | let mut end = LineColumn::count(Self::slice(input, range)); 148 | end.line += start.line; 149 | if end.line == start.line { 150 | end.column += start.column; 151 | } 152 | LineColumnRange { start, end } 153 | } 154 | fn source_info_point<'i>( 155 | input: &Container<'i, Self::Container>, 156 | index: Index<'i, Unknown>, 157 | ) -> Self::SourceInfoPoint { 158 | let prefix_range = Range(input.split_at(index).0); 159 | LineColumn::count(Self::slice(input, prefix_range)) 160 | } 161 | } 162 | 163 | pub trait InputMatch { 164 | fn match_left(&self, pat: &Pat) -> Option; 165 | fn match_right(&self, pat: &Pat) -> Option; 166 | } 167 | 168 | impl, Pat: ?Sized> InputMatch<&'_ Pat> for I { 169 | fn match_left(&self, &pat: &&Pat) -> Option { 170 | self.match_left(pat) 171 | } 172 | fn match_right(&self, &pat: &&Pat) -> Option { 173 | self.match_right(pat) 174 | } 175 | } 176 | 177 | impl InputMatch<[T]> for [T] { 178 | fn match_left(&self, pat: &[T]) -> Option { 179 | if self.starts_with(pat) { 180 | Some(pat.len()) 181 | } else { 182 | None 183 | } 184 | } 185 | fn match_right(&self, pat: &[T]) -> Option { 186 | if self.ends_with(pat) { 187 | Some(pat.len()) 188 | } else { 189 | None 190 | } 191 | } 192 | } 193 | 194 | impl InputMatch> for [T] { 195 | fn match_left(&self, pat: &RangeInclusive) -> Option { 196 | let x = self.first()?; 197 | if pat.start() <= x && x <= pat.end() { 198 | Some(1) 199 | } else { 200 | None 201 | } 202 | } 203 | fn match_right(&self, pat: &RangeInclusive) -> Option { 204 | let x = self.last()?; 205 | if pat.start() <= x && x <= pat.end() { 206 | Some(1) 207 | } else { 208 | None 209 | } 210 | } 211 | } 212 | 213 | impl InputMatch for str { 214 | fn match_left(&self, pat: &str) -> Option { 215 | if self.starts_with(pat) { 216 | Some(pat.len()) 217 | } else { 218 | None 219 | } 220 | } 221 | fn match_right(&self, pat: &str) -> Option { 222 | if self.ends_with(pat) { 223 | Some(pat.len()) 224 | } else { 225 | None 226 | } 227 | } 228 | } 229 | 230 | impl InputMatch> for str { 231 | fn match_left(&self, pat: &RangeInclusive) -> Option { 232 | let c = self.chars().next()?; 233 | if *pat.start() <= c && c <= *pat.end() { 234 | Some(c.len_utf8()) 235 | } else { 236 | None 237 | } 238 | } 239 | fn match_right(&self, pat: &RangeInclusive) -> Option { 240 | let c = self.chars().rev().next()?; 241 | if *pat.start() <= c && c <= *pat.end() { 242 | Some(c.len_utf8()) 243 | } else { 244 | None 245 | } 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(unsafe_code)] 2 | #![deny(rust_2018_idioms)] 3 | 4 | // NOTE only these two modules can and do contain unsafe code. 5 | #[allow(unsafe_code)] 6 | mod high; 7 | #[allow(unsafe_code)] 8 | mod indexing_str; 9 | 10 | #[forbid(unsafe_code)] 11 | pub mod context; 12 | #[forbid(unsafe_code)] 13 | pub mod forest; 14 | #[forbid(unsafe_code)] 15 | pub mod input; 16 | #[forbid(unsafe_code)] 17 | pub mod parser; 18 | #[forbid(unsafe_code)] 19 | pub mod proc_macro; 20 | #[forbid(unsafe_code)] 21 | pub mod rule; 22 | #[forbid(unsafe_code)] 23 | pub mod scannerless; 24 | 25 | // HACK(eddyb) this contains impls for types in `proc_macro`, which depend on 26 | // `input`, collapse this back into `proc_macro`. 27 | #[forbid(unsafe_code)] 28 | mod proc_macro_input; 29 | 30 | // FIXME(eddyb) maybe put the rest of this file into submodules? 31 | 32 | use crate::context::{Context, IStr}; 33 | use indexmap::IndexMap; 34 | use std::collections::HashMap; 35 | use std::hash::Hash; 36 | 37 | pub struct Grammar { 38 | pub rules: IndexMap, 39 | } 40 | 41 | impl Grammar { 42 | pub fn new() -> Self { 43 | Grammar { 44 | rules: IndexMap::new(), 45 | } 46 | } 47 | pub fn define(&mut self, name: IStr, rule: rule::RuleWithFields) { 48 | self.rules.insert(name, rule); 49 | } 50 | pub fn extend(&mut self, other: Self) { 51 | self.rules.extend(other.rules); 52 | } 53 | pub fn insert_whitespace( 54 | self, 55 | cx: &Context, 56 | whitespace: rule::RuleWithFields, 57 | ) -> Self { 58 | Grammar { 59 | rules: self 60 | .rules 61 | .into_iter() 62 | .map(|(name, rule)| (name, rule.insert_whitespace(cx, whitespace))) 63 | .collect(), 64 | } 65 | } 66 | } 67 | 68 | impl Grammar { 69 | pub fn check(&self, cx: &Context) { 70 | for rule in self.rules.values() { 71 | rule.rule.check_call_names(cx, self); 72 | } 73 | 74 | let mut can_be_empty_cache = HashMap::new(); 75 | for rule in self.rules.values() { 76 | rule.rule 77 | .check_non_empty_opt(&mut can_be_empty_cache, cx, self); 78 | } 79 | } 80 | } 81 | 82 | /// Construct a (meta-)grammar for parsing a grammar. 83 | pub fn grammar_grammar>(cx: &Context) -> Grammar { 84 | use crate::rule::*; 85 | 86 | // HACK(eddyb) more explicit subset of the grammar, for bootstrapping. 87 | macro_rules! rule { 88 | ({ $start:tt ..= $end:tt }) => { 89 | eat($start..=$end) 90 | }; 91 | ({ ! $pat:tt }) => { 92 | negative_lookahead($pat) 93 | }; 94 | ({ ! $start:tt ..= $end:tt }) => { 95 | negative_lookahead($start..=$end) 96 | }; 97 | ($rule:ident) => { 98 | call(stringify!($rule)) 99 | }; 100 | ({ $name:ident : $rule:tt }) => { 101 | rule!($rule).field(stringify!($name)) 102 | }; 103 | ({ $rule:tt ? }) => { 104 | rule!($rule).opt() 105 | }; 106 | ({ $elem:tt * }) => { 107 | rule!($elem).repeat_many() 108 | }; 109 | ({ $elem:tt + }) => { 110 | rule!($elem).repeat_more() 111 | }; 112 | ({ $elem:tt + % $sep:tt }) => { 113 | rule!($elem).repeat_more_sep(rule!($sep), SepKind::Simple) 114 | }; 115 | ({ $rule0:tt $(| $rule:tt)+ }) => { 116 | rule!($rule0) $(| rule!($rule))+ 117 | }; 118 | ({ $rule0:tt $($rule:tt)* }) => { 119 | rule!($rule0) $(+ rule!($rule))* 120 | }; 121 | ($pat:expr) => { 122 | eat($pat) 123 | }; 124 | } 125 | 126 | macro_rules! grammar { 127 | ($($rule_name:ident = $($rule:tt)|+;)*) => ({ 128 | let mut grammar = Grammar::new(); 129 | $(grammar.define( 130 | cx.intern(stringify!($rule_name)), 131 | rule!({ $($rule)|+ }).finish(cx), 132 | );)* 133 | grammar 134 | }) 135 | } 136 | 137 | // Main grammar. 138 | let mut grammar = grammar! { 139 | Grammar = { FileStart {rules:{RuleDef*}} FileEnd }; 140 | RuleDef = { {name:Ident} "=" {rule:Or} ";" }; 141 | Or = {{"|"?} {rules:{Concat+ % "|"}}}; 142 | Concat = {rules:{Rule+}}; 143 | Rule = { {{ {field:Ident} ":" }?} {rule:Primary} {{modifier:Modifier}?} }; 144 | Primary = 145 | {Eat:Pattern} | 146 | {Call:Ident} | 147 | {Group:{ "{" {{or:Or}?} "}" }}; 148 | Modifier = 149 | {Opt:"?"} | 150 | {Repeat:{ {repeat:Repeat} {{ {kind:SepKind} {sep:Primary} }?} }}; 151 | Repeat = 152 | {Many:"*"} | 153 | {More:"+"}; 154 | SepKind = 155 | {Simple:"%"} | 156 | // HACK(eddyb) should be "%%", but `rustc`'s `proc_macro` server doesn't 157 | // always preserve jointness, except within multi-character Rust operators. 158 | {Trailing:{"%" "%"}}; 159 | Pattern = 160 | {Str:StrLit} | 161 | {CharRange:{ {{start:CharLit}?} ".." {{end:CharLit}?} }} | 162 | {CharRangeInclusive:{ {{start:CharLit}?} "..=" {end:CharLit} }}; 163 | }; 164 | 165 | // Lexical fragment of the grammar. 166 | grammar.extend(grammar! { 167 | FileStart = ""; 168 | FileEnd = ""; 169 | 170 | Ident = IDENT; 171 | 172 | // FIXME(eddyb) restrict literals, once `proc_macro` allows it. 173 | StrLit = LITERAL; 174 | CharLit = LITERAL; 175 | }); 176 | 177 | grammar 178 | } 179 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use crate::forest::{GrammarReflector, Node, OwnedParseForestAndNode, ParseForest}; 2 | use crate::high::ErasableL; 3 | use crate::input::{Input, InputMatch, Range}; 4 | use indexing::{self, Index, Unknown}; 5 | use std::collections::HashMap; 6 | 7 | pub struct Parser<'a, 'i, G: GrammarReflector, I: Input, Pat> { 8 | state: &'a mut ParserState<'i, G, I, Pat>, 9 | result: Range<'i>, 10 | remaining: Range<'i>, 11 | } 12 | 13 | struct ParserState<'i, G: GrammarReflector, I: Input, Pat> { 14 | forest: ParseForest<'i, G, I>, 15 | last_input_pos: Index<'i, Unknown>, 16 | expected_pats: Vec, 17 | } 18 | 19 | #[derive(Debug)] 20 | pub struct ParseError { 21 | pub at: A, 22 | pub expected: Vec, 23 | } 24 | 25 | pub type ParseResult = Result>; 26 | 27 | impl<'i, G: GrammarReflector, I: Input, Pat: Ord> Parser<'_, 'i, G, I, Pat> { 28 | pub fn parse_with( 29 | grammar: G, 30 | input: I, 31 | f: impl for<'i2> FnOnce(Parser<'_, 'i2, G, I, Pat>) -> Option>, 32 | ) -> ParseResult> { 33 | ErasableL::indexing_scope(input.to_container(), |lifetime, input| { 34 | let range = Range(input.range()); 35 | let mut state = ParserState { 36 | forest: ParseForest { 37 | grammar, 38 | input, 39 | possibilities: HashMap::new(), 40 | }, 41 | last_input_pos: range.first(), 42 | expected_pats: vec![], 43 | }; 44 | 45 | let result = f(Parser { 46 | state: &mut state, 47 | result: Range(range.frontiers().0), 48 | remaining: range, 49 | }); 50 | 51 | let mut error = ParseError { 52 | at: I::source_info_point(&state.forest.input, state.last_input_pos), 53 | expected: state.expected_pats, 54 | }; 55 | error.expected.sort(); 56 | error.expected.dedup(); 57 | 58 | match result { 59 | None => Err(error), 60 | Some(node) => { 61 | // The result is only a successful parse if it's as long as the input. 62 | if node.range == range { 63 | Ok(OwnedParseForestAndNode::pack( 64 | lifetime, 65 | (state.forest, node), 66 | )) 67 | } else { 68 | Err(error) 69 | } 70 | } 71 | } 72 | }) 73 | } 74 | 75 | // FIXME(eddyb) find an nicer way for algorithms to manipulate these ranges. 76 | pub fn result(&self) -> Range<'i> { 77 | self.result 78 | } 79 | 80 | pub fn remaining(&self) -> Range<'i> { 81 | self.remaining 82 | } 83 | 84 | /// Get the current result range, and leave behind an empty range 85 | /// (at the end of the current result / start of the remaining input). 86 | pub fn take_result(&mut self) -> Range<'i> { 87 | let result = self.result; 88 | self.result = Range(result.frontiers().1); 89 | result 90 | } 91 | 92 | pub fn with_result_and_remaining<'a>( 93 | &'a mut self, 94 | result: Range<'i>, 95 | remaining: Range<'i>, 96 | ) -> Parser<'a, 'i, G, I, Pat> { 97 | // HACK(eddyb) enforce that `result` and `remaining` are inside `self`. 98 | assert_eq!(self.result, Range(self.remaining.frontiers().0)); 99 | let full_new_range = result.join(remaining.0).unwrap(); 100 | assert!(self.remaining.start() <= full_new_range.start()); 101 | assert_eq!(self.remaining.end(), full_new_range.end()); 102 | 103 | Parser { 104 | state: self.state, 105 | result, 106 | remaining, 107 | } 108 | } 109 | 110 | pub fn input_consume_left<'a, SpecificPat: Into>( 111 | &'a mut self, 112 | pat: SpecificPat, 113 | ) -> Option> 114 | where 115 | I::Slice: InputMatch, 116 | { 117 | let start = self.remaining.first(); 118 | if start > self.state.last_input_pos { 119 | self.state.last_input_pos = start; 120 | self.state.expected_pats.clear(); 121 | } 122 | match self.state.forest.input(self.remaining).match_left(&pat) { 123 | Some(n) => { 124 | let (matching, after, _) = self.remaining.split_at(n); 125 | if after.first() > self.state.last_input_pos { 126 | self.state.last_input_pos = after.first(); 127 | self.state.expected_pats.clear(); 128 | } 129 | Some(Parser { 130 | state: self.state, 131 | result: Range(self.result.join(matching).unwrap()), 132 | remaining: Range(after), 133 | }) 134 | } 135 | None => { 136 | if start == self.state.last_input_pos { 137 | self.state.expected_pats.push(pat.into()); 138 | } 139 | None 140 | } 141 | } 142 | } 143 | 144 | pub fn input_consume_right<'a, SpecificPat>( 145 | &'a mut self, 146 | pat: SpecificPat, 147 | ) -> Option> 148 | where 149 | I::Slice: InputMatch, 150 | { 151 | // FIXME(eddyb) implement error reporting support like in `input_consume_left` 152 | match self.state.forest.input(self.remaining).match_right(&pat) { 153 | Some(n) => { 154 | let (before, matching, _) = self.remaining.split_at(self.remaining.len() - n); 155 | Some(Parser { 156 | state: self.state, 157 | result: Range(matching.join(self.result.0).unwrap()), 158 | remaining: Range(before), 159 | }) 160 | } 161 | None => None, 162 | } 163 | } 164 | 165 | // FIXME(eddyb) safeguard this against misuse. 166 | pub fn forest_add_choice(&mut self, kind: G::NodeKind, choice: usize) { 167 | self.state 168 | .forest 169 | .possibilities 170 | .entry(Node { 171 | kind, 172 | range: self.result, 173 | }) 174 | .or_default() 175 | .insert(choice); 176 | } 177 | 178 | // FIXME(eddyb) safeguard this against misuse. 179 | pub fn forest_add_split(&mut self, kind: G::NodeKind, left: Node<'i, G>) { 180 | self.result = Range(left.range.join(self.result.0).unwrap()); 181 | self.state 182 | .forest 183 | .possibilities 184 | .entry(Node { 185 | kind, 186 | range: self.result, 187 | }) 188 | .or_default() 189 | .insert(left.range.len()); 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/proc_macro.rs: -------------------------------------------------------------------------------- 1 | use crate::rule::{call, eat, MatchesEmpty, MaybeKnown}; 2 | use crate::scannerless::Pat as SPat; 3 | use flat_token::flatten; 4 | pub use flat_token::FlatToken; 5 | pub use proc_macro2::{ 6 | Delimiter, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree, 7 | }; 8 | use std::fmt; 9 | use std::ops::Deref; 10 | use std::str::FromStr; 11 | 12 | pub type Context = crate::context::Context; 13 | 14 | pub fn builtin(cx: &Context) -> crate::Grammar { 15 | let mut g = crate::Grammar::new(); 16 | 17 | let ident = eat(Pat(vec![FlatTokenPat::Ident(None)])).finish(cx); 18 | g.define(cx.intern("IDENT"), ident); 19 | 20 | g.define( 21 | cx.intern("LIFETIME"), 22 | eat(Pat(vec![ 23 | FlatTokenPat::Punct { 24 | ch: Some('\''), 25 | joint: Some(true), 26 | }, 27 | FlatTokenPat::Ident(None), 28 | ])) 29 | .finish(cx), 30 | ); 31 | 32 | let punct = eat(Pat(vec![FlatTokenPat::Punct { 33 | ch: None, 34 | joint: None, 35 | }])) 36 | .finish(cx); 37 | g.define(cx.intern("PUNCT"), punct); 38 | 39 | let literal = eat(Pat(vec![FlatTokenPat::Literal])).finish(cx); 40 | g.define(cx.intern("LITERAL"), literal); 41 | 42 | let delim = |c| eat(FlatTokenPat::Delim(c)); 43 | let group = |open, close| delim(open) + call("TOKEN_TREE").repeat_many() + delim(close); 44 | g.define( 45 | cx.intern("TOKEN_TREE"), 46 | (ident | punct | literal | group('(', ')') | group('[', ']') | group('{', '}')).finish(cx), 47 | ); 48 | 49 | g 50 | } 51 | 52 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 53 | pub struct Pat>>(pub Pats); 54 | 55 | impl FromStr for Pat { 56 | type Err = LexError; 57 | fn from_str(s: &str) -> Result { 58 | // Handle lone delimiters first, as they won't lex. 59 | let mut chars = s.chars(); 60 | if let (Some(ch), None) = (chars.next(), chars.next()) { 61 | if "()[]{}".contains(ch) { 62 | return Ok(FlatTokenPat::Delim(ch).into()); 63 | } 64 | } 65 | 66 | let mut tokens = vec![]; 67 | flatten(s.parse()?, &mut tokens); 68 | Ok(Pat(tokens.iter().map(FlatTokenPat::extract).collect())) 69 | } 70 | } 71 | 72 | // FIXME(eddyb) perhaps support `TryFrom`/`TryInto` directly in `grammar_grammar`? 73 | impl From<&str> for Pat { 74 | fn from(s: &str) -> Self { 75 | s.parse().unwrap() 76 | } 77 | } 78 | 79 | impl From for Pat { 80 | fn from(pats: Pats) -> Self { 81 | Pat(pats) 82 | } 83 | } 84 | 85 | impl From> for Pat { 86 | fn from(pat: FlatTokenPat) -> Self { 87 | Pat(vec![pat]) 88 | } 89 | } 90 | 91 | impl From for Pat { 92 | fn from(pat: SPat) -> Self { 93 | match pat { 94 | SPat::String(s) => s[..].into(), 95 | SPat::Range(..) => unimplemented!("char ranges are unsupported"), 96 | } 97 | } 98 | } 99 | 100 | impl MatchesEmpty for Pat { 101 | fn matches_empty(&self) -> MaybeKnown { 102 | MaybeKnown::Known(self.0.is_empty()) 103 | } 104 | } 105 | 106 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 107 | pub enum FlatTokenPat> { 108 | Delim(char), 109 | Ident(Option), 110 | Punct { 111 | ch: Option, 112 | joint: Option, 113 | }, 114 | Literal, 115 | } 116 | 117 | impl> fmt::Debug for FlatTokenPat { 118 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 119 | match self { 120 | FlatTokenPat::Delim(c) | FlatTokenPat::Punct { ch: Some(c), .. } => { 121 | write!(f, "\"{}\"", c) 122 | } 123 | FlatTokenPat::Ident(None) => f.write_str("IDENT"), 124 | FlatTokenPat::Ident(Some(ident)) => write!(f, "\"{}\"", ident.as_ref()), 125 | FlatTokenPat::Punct { ch: None, .. } => f.write_str("PUNCT"), 126 | FlatTokenPat::Literal => f.write_str("LITERAL"), 127 | } 128 | } 129 | } 130 | 131 | // FIXME(eddyb) can't use `Pats: AsRef<[FlatTokenPat]` as it doesn't constrain `S`. 132 | impl, Pats: Deref]>> fmt::Debug for Pat { 133 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 134 | match &self.0[..] { 135 | [] => f.write_str("\"\""), 136 | [pat] => pat.fmt(f), 137 | [FlatTokenPat::Punct { 138 | ch: Some('\''), 139 | joint: Some(true), 140 | }, FlatTokenPat::Ident(None)] => f.write_str("LIFETIME"), 141 | pats => { 142 | let mut was_joint = true; 143 | f.write_str("\"")?; 144 | for pat in pats { 145 | if !was_joint { 146 | f.write_str(" ")?; 147 | } 148 | match pat { 149 | FlatTokenPat::Punct { ch: Some(c), joint } => { 150 | write!(f, "{}", c)?; 151 | was_joint = *joint == Some(true); 152 | } 153 | FlatTokenPat::Ident(Some(ident)) => { 154 | write!(f, "{}", ident.as_ref())?; 155 | was_joint = false; 156 | } 157 | _ => unreachable!(), 158 | } 159 | } 160 | f.write_str("\"") 161 | } 162 | } 163 | } 164 | } 165 | 166 | impl> FlatTokenPat { 167 | pub fn extract(ft: &FlatToken) -> FlatTokenPat 168 | where 169 | S: From, 170 | { 171 | match ft { 172 | &FlatToken::Delim(delim, _) => FlatTokenPat::Delim(delim), 173 | FlatToken::Ident(tt) => FlatTokenPat::Ident(Some(tt.to_string().into())), 174 | FlatToken::Punct(tt) => FlatTokenPat::Punct { 175 | ch: Some(tt.as_char()), 176 | joint: if tt.spacing() == Spacing::Joint { 177 | Some(true) 178 | } else { 179 | None 180 | }, 181 | }, 182 | FlatToken::Literal(tt) => { 183 | unimplemented!( 184 | "matching specific literals is not supported, \ 185 | use `LITERAL` instead of `{}`", 186 | tt.to_string(), 187 | ); 188 | } 189 | } 190 | } 191 | 192 | pub fn matches(&self, ft: &FlatToken) -> bool { 193 | match (ft, self) { 194 | (FlatToken::Delim(a, _), FlatTokenPat::Delim(b)) => a == b, 195 | (FlatToken::Ident(_), FlatTokenPat::Ident(None)) => true, 196 | (FlatToken::Ident(a), FlatTokenPat::Ident(Some(b))) => a == b.as_ref(), 197 | (FlatToken::Punct(a), FlatTokenPat::Punct { ch, joint }) => { 198 | ch.map_or(true, |b| a.as_char() == b) 199 | && joint.map_or(true, |b| (a.spacing() == Spacing::Joint) == b) 200 | } 201 | (FlatToken::Literal(_), FlatTokenPat::Literal) => true, 202 | _ => false, 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/proc_macro_input.rs: -------------------------------------------------------------------------------- 1 | use crate::input::{Input, InputMatch, Range}; 2 | use crate::proc_macro::{FlatTokenPat, Span, TokenStream}; 3 | use flat_token::{flatten, FlatToken}; 4 | use indexing::{proof::Provable, Container, Index, Unknown}; 5 | use std::ops; 6 | 7 | impl Input for TokenStream { 8 | type Container = Vec; 9 | type Slice = [FlatToken]; 10 | type SourceInfo = ops::Range; 11 | type SourceInfoPoint = Span; 12 | fn to_container(self) -> Self::Container { 13 | let mut out = vec![]; 14 | flatten(self, &mut out); 15 | out 16 | } 17 | fn slice<'b, 'i>( 18 | input: &'b Container<'i, Self::Container>, 19 | range: Range<'i>, 20 | ) -> &'b Self::Slice { 21 | &input[range.0] 22 | } 23 | fn source_info<'i>( 24 | input: &Container<'i, Self::Container>, 25 | range: Range<'i>, 26 | ) -> Self::SourceInfo { 27 | // FIXME(eddyb) should be joining up spans, but the API 28 | // for that is still "semver-exempt" in `proc-macro2`. 29 | let last = range 30 | .nonempty() 31 | .map(|r| r.last().no_proof()) 32 | .unwrap_or(range.past_the_end()); 33 | Self::source_info_point(input, range.first())..Self::source_info_point(input, last) 34 | } 35 | fn source_info_point<'i>( 36 | input: &Container<'i, Self::Container>, 37 | index: Index<'i, Unknown>, 38 | ) -> Self::SourceInfoPoint { 39 | // Try to get as much information as possible. 40 | let (before, after) = input.split_at(index); 41 | let before = &input[before]; 42 | let after = &input[after]; 43 | if let Some(first) = after.first() { 44 | first.span() 45 | } else if let Some(last) = before.last() { 46 | // Not correct but we're at the end of the input anyway. 47 | last.span() 48 | } else { 49 | // HACK(eddyb) last resort, make a span up 50 | // (a better option should exist) 51 | Span::call_site() 52 | } 53 | } 54 | } 55 | 56 | impl InputMatch<[FlatTokenPat<&'_ str>]> for [FlatToken] { 57 | fn match_left(&self, pat: &[FlatTokenPat<&str>]) -> Option { 58 | if self 59 | .iter() 60 | .zip(pat) 61 | .take_while(|(t, p)| p.matches(t)) 62 | .count() 63 | == pat.len() 64 | { 65 | Some(pat.len()) 66 | } else { 67 | None 68 | } 69 | } 70 | fn match_right(&self, pat: &[FlatTokenPat<&str>]) -> Option { 71 | if self 72 | .iter() 73 | .zip(pat) 74 | .rev() 75 | .take_while(|(t, p)| p.matches(t)) 76 | .count() 77 | == pat.len() 78 | { 79 | Some(pat.len()) 80 | } else { 81 | None 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/rule.rs: -------------------------------------------------------------------------------- 1 | use crate::context::{Context, IFields, IRule, IStr}; 2 | use crate::forest::NodeShape; 3 | use indexmap::IndexMap; 4 | use std::collections::hash_map::Entry; 5 | use std::collections::HashMap; 6 | use std::fmt; 7 | use std::hash::Hash; 8 | use std::iter; 9 | use std::ops::{Add, BitAnd, BitOr}; 10 | 11 | #[derive(Copy, Clone)] 12 | pub struct RuleWithFields { 13 | pub rule: IRule, 14 | pub fields: IFields, 15 | } 16 | 17 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 18 | pub enum SepKind { 19 | Simple, 20 | Trailing, 21 | } 22 | 23 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 24 | pub enum Rule { 25 | Empty, 26 | Eat(Pat), 27 | Call(IStr), 28 | 29 | Concat([IRule; 2]), 30 | Or(Vec), 31 | 32 | Opt(IRule), 33 | RepeatMany(IRule, Option<(IRule, SepKind)>), 34 | RepeatMore(IRule, Option<(IRule, SepKind)>), 35 | } 36 | 37 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 38 | pub struct Field { 39 | pub name: IStr, 40 | pub sub: IFields, 41 | } 42 | 43 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 44 | pub enum Fields { 45 | Leaf(Option), 46 | Aggregate(Vec), 47 | } 48 | 49 | impl Fields { 50 | fn aggregate( 51 | cx: &Context, 52 | mut children: impl Iterator, 53 | ) -> IFields { 54 | let empty_leaf = cx.intern(Fields::Leaf(None)); 55 | let mut empty_count = 0; 56 | for child in &mut children { 57 | if child != empty_leaf { 58 | return cx.intern(Fields::Aggregate( 59 | iter::repeat(empty_leaf) 60 | .take(empty_count) 61 | .chain(iter::once(child)) 62 | .chain(children) 63 | .collect(), 64 | )); 65 | } 66 | 67 | empty_count += 1; 68 | } 69 | empty_leaf 70 | } 71 | } 72 | 73 | /// Helpers for building rules without needing a `Context` until the very end. 74 | /// 75 | /// NOTE: the module is private to disallow referring to the trait / types, 76 | /// as they are an implementation detail of the builder methods and operators. 77 | mod build { 78 | use super::*; 79 | 80 | // HACK(eddyb) like `Into` but using an associated type. 81 | // Needed for constraining the RHS of operator overload impls. 82 | pub trait Start { 83 | type Out; 84 | 85 | fn start(self) -> Self::Out; 86 | } 87 | 88 | impl Start for RuleWithFields { 89 | type Out = RuleWithFields; 90 | 91 | fn start(self) -> Self::Out { 92 | self 93 | } 94 | } 95 | 96 | pub trait Finish { 97 | fn finish(self, cx: &Context) -> RuleWithFields; 98 | } 99 | 100 | impl Finish for RuleWithFields { 101 | fn finish(self, _cx: &Context) -> RuleWithFields { 102 | self 103 | } 104 | } 105 | 106 | pub struct Empty; 107 | 108 | impl Finish for Empty { 109 | fn finish(self, cx: &Context) -> RuleWithFields { 110 | RuleWithFields { 111 | rule: cx.intern(Rule::Empty), 112 | fields: cx.intern(Fields::Leaf(None)), 113 | } 114 | } 115 | } 116 | 117 | pub struct Eat(Pat); 118 | 119 | impl Finish for Eat { 120 | fn finish(self, cx: &Context) -> RuleWithFields { 121 | RuleWithFields { 122 | rule: cx.intern(Rule::Eat(self.0)), 123 | fields: cx.intern(Fields::Leaf(None)), 124 | } 125 | } 126 | } 127 | 128 | pub struct Call<'a>(&'a str); 129 | 130 | impl Finish for Call<'_> { 131 | fn finish(self, cx: &Context) -> RuleWithFields { 132 | RuleWithFields { 133 | rule: cx.intern(Rule::Call(cx.intern(self.0))), 134 | fields: cx.intern(Fields::Leaf(None)), 135 | } 136 | } 137 | } 138 | 139 | pub struct Field<'a, R>(R, &'a str); 140 | 141 | impl> Finish for Field<'_, R> { 142 | fn finish(self, cx: &Context) -> RuleWithFields { 143 | let rule = self.0.finish(cx); 144 | RuleWithFields { 145 | rule: rule.rule, 146 | fields: cx.intern(Fields::Leaf(Some(super::Field { 147 | name: cx.intern(self.1), 148 | sub: rule.fields, 149 | }))), 150 | } 151 | } 152 | } 153 | 154 | pub struct Opt(R); 155 | 156 | impl> Finish for Opt { 157 | fn finish(self, cx: &Context) -> RuleWithFields { 158 | let rule = self.0.finish(cx); 159 | RuleWithFields { 160 | rule: cx.intern(Rule::Opt(rule.rule)), 161 | fields: Fields::aggregate(cx, iter::once(rule.fields)), 162 | } 163 | } 164 | } 165 | 166 | pub struct RepeatMany(E); 167 | 168 | impl> Finish for RepeatMany { 169 | fn finish(self, cx: &Context) -> RuleWithFields { 170 | let elem = self.0.finish(cx); 171 | RuleWithFields { 172 | rule: cx.intern(Rule::RepeatMany(elem.rule, None)), 173 | fields: Fields::aggregate(cx, iter::once(elem.fields)), 174 | } 175 | } 176 | } 177 | 178 | pub struct RepeatManySep(E, S, SepKind); 179 | 180 | impl, S: Finish> Finish for RepeatManySep { 181 | fn finish(self, cx: &Context) -> RuleWithFields { 182 | let elem = self.0.finish(cx); 183 | let sep = self.1.finish(cx); 184 | assert_eq!(cx[sep.fields], Fields::Leaf(None)); 185 | RuleWithFields { 186 | rule: cx.intern(Rule::RepeatMany(elem.rule, Some((sep.rule, self.2)))), 187 | fields: Fields::aggregate(cx, iter::once(elem.fields)), 188 | } 189 | } 190 | } 191 | 192 | pub struct RepeatMore(E); 193 | 194 | impl> Finish for RepeatMore { 195 | fn finish(self, cx: &Context) -> RuleWithFields { 196 | let elem = self.0.finish(cx); 197 | RuleWithFields { 198 | rule: cx.intern(Rule::RepeatMore(elem.rule, None)), 199 | fields: Fields::aggregate(cx, iter::once(elem.fields)), 200 | } 201 | } 202 | } 203 | 204 | pub struct RepeatMoreSep(E, S, SepKind); 205 | 206 | impl, S: Finish> Finish for RepeatMoreSep { 207 | fn finish(self, cx: &Context) -> RuleWithFields { 208 | let elem = self.0.finish(cx); 209 | let sep = self.1.finish(cx); 210 | assert_eq!(cx[sep.fields], Fields::Leaf(None)); 211 | RuleWithFields { 212 | rule: cx.intern(Rule::RepeatMore(elem.rule, Some((sep.rule, self.2)))), 213 | fields: Fields::aggregate(cx, iter::once(elem.fields)), 214 | } 215 | } 216 | } 217 | 218 | pub struct Concat(A, B); 219 | 220 | impl, B: Finish> Finish for Concat { 221 | fn finish(self, cx: &Context) -> RuleWithFields { 222 | let a = self.0.finish(cx); 223 | let b = self.1.finish(cx); 224 | 225 | match (&cx[a.rule], &cx[b.rule]) { 226 | (Rule::Empty, _) if cx[a.fields] == Fields::Leaf(None) => return b, 227 | (_, Rule::Empty) if cx[b.fields] == Fields::Leaf(None) => return a, 228 | _ => {} 229 | } 230 | 231 | RuleWithFields { 232 | rule: cx.intern(Rule::Concat([a.rule, b.rule])), 233 | fields: Fields::aggregate(cx, [a.fields, b.fields].iter().cloned()), 234 | } 235 | } 236 | } 237 | 238 | pub struct Or(A, B); 239 | 240 | impl, B: Finish> Finish for Or { 241 | fn finish(self, cx: &Context) -> RuleWithFields { 242 | let a = self.0.finish(cx); 243 | let b = self.1.finish(cx); 244 | 245 | match (&cx[a.rule], &cx[a.fields]) { 246 | (Rule::Or(a_rules), Fields::Leaf(None)) => RuleWithFields { 247 | rule: cx.intern(Rule::Or( 248 | a_rules.iter().cloned().chain(iter::once(b.rule)).collect(), 249 | )), 250 | fields: Fields::aggregate( 251 | cx, 252 | iter::repeat(a.fields) 253 | .take(a_rules.len()) 254 | .chain(iter::once(b.fields)), 255 | ), 256 | }, 257 | (Rule::Or(a_rules), Fields::Aggregate(a_children)) => RuleWithFields { 258 | rule: cx.intern(Rule::Or( 259 | a_rules.iter().cloned().chain(iter::once(b.rule)).collect(), 260 | )), 261 | fields: Fields::aggregate( 262 | cx, 263 | a_children.iter().cloned().chain(iter::once(b.fields)), 264 | ), 265 | }, 266 | _ => RuleWithFields { 267 | rule: cx.intern(Rule::Or(vec![a.rule, b.rule])), 268 | fields: Fields::aggregate(cx, [a.fields, b.fields].iter().cloned()), 269 | }, 270 | } 271 | } 272 | } 273 | 274 | /// Wrapper for building rules, to allow overloading operators uniformly. 275 | pub struct Build(R); 276 | 277 | impl Start for Build { 278 | type Out = R; 279 | 280 | fn start(self) -> R { 281 | self.0 282 | } 283 | } 284 | 285 | impl Build { 286 | pub fn finish(self, cx: &Context) -> RuleWithFields 287 | where 288 | R: Finish, 289 | { 290 | Finish::finish(self.0, cx) 291 | } 292 | } 293 | 294 | pub fn empty() -> build::Build { 295 | build::Build(build::Empty) 296 | } 297 | 298 | pub fn eat(pat: impl Into) -> build::Build> { 299 | build::Build(build::Eat(pat.into())) 300 | } 301 | 302 | pub fn call(name: &str) -> build::Build> { 303 | build::Build(build::Call(name)) 304 | } 305 | 306 | /// Helper macro to provide methods and operator overloads on both 307 | /// `RuleWithFields` and `Build`, instead of just one of them. 308 | macro_rules! builder_impls { 309 | (impl<$($g:ident),*> $Self:ty) => { 310 | impl<$($g),*> $Self { 311 | pub fn field<'a>(self, name: &'a str) -> Build::Out>> { 312 | Build(Field(self.start(), name)) 313 | } 314 | 315 | pub fn opt(self) -> Build::Out>> { 316 | Build(Opt(self.start())) 317 | } 318 | 319 | pub fn repeat_many(self) -> Build::Out>> { 320 | Build(RepeatMany(self.start())) 321 | } 322 | 323 | pub fn repeat_many_sep( 324 | self, 325 | sep: S, 326 | kind: SepKind, 327 | ) -> Build::Out, S::Out>> { 328 | Build(RepeatManySep(self.start(), sep.start(), kind)) 329 | } 330 | 331 | pub fn repeat_more(self) -> Build::Out>> { 332 | Build(RepeatMore(self.start())) 333 | } 334 | 335 | pub fn repeat_more_sep( 336 | self, 337 | sep: S, 338 | kind: SepKind, 339 | ) -> Build::Out, S::Out>> { 340 | Build(RepeatMoreSep(self.start(), sep.start(), kind)) 341 | } 342 | } 343 | 344 | impl<$($g,)* Other: Start> Add for $Self { 345 | type Output = Build::Out, Other::Out>>; 346 | 347 | fn add(self, other: Other) -> Self::Output { 348 | Build(Concat(self.start(), other.start())) 349 | } 350 | } 351 | 352 | impl<$($g,)* Other: Start> BitOr for $Self { 353 | type Output = Build::Out, Other::Out>>; 354 | 355 | fn bitor(self, other: Other) -> Self::Output { 356 | Build(Or(self.start(), other.start())) 357 | } 358 | } 359 | }; 360 | } 361 | 362 | builder_impls!(impl Build); 363 | builder_impls!(impl<> RuleWithFields); 364 | } 365 | 366 | pub use self::build::{call, eat, empty}; 367 | 368 | impl IRule { 369 | pub fn node_desc(self, cx: &Context) -> String 370 | where 371 | Pat: fmt::Debug, 372 | { 373 | match cx[self] { 374 | Rule::Empty => "".to_string(), 375 | Rule::Eat(ref pat) => format!("{:?}", pat), 376 | Rule::Call(r) => cx[r].to_string(), 377 | Rule::Concat([left, right]) => { 378 | format!("({} {})", left.node_desc(cx), right.node_desc(cx)) 379 | } 380 | Rule::Or(ref cases) => { 381 | assert!(cases.len() > 1); 382 | let mut desc = format!("({}", cases[0].node_desc(cx)); 383 | for rule in &cases[1..] { 384 | desc += " | "; 385 | desc += &rule.node_desc(cx); 386 | } 387 | desc + ")" 388 | } 389 | Rule::Opt(rule) => format!("{}?", rule.node_desc(cx)), 390 | Rule::RepeatMany(elem, None) => format!("{}*", elem.node_desc(cx)), 391 | Rule::RepeatMany(elem, Some((sep, SepKind::Simple))) => { 392 | format!("{}* % {}", elem.node_desc(cx), sep.node_desc(cx)) 393 | } 394 | Rule::RepeatMany(elem, Some((sep, SepKind::Trailing))) => { 395 | format!("{}* %% {}", elem.node_desc(cx), sep.node_desc(cx)) 396 | } 397 | Rule::RepeatMore(elem, None) => format!("{}+", elem.node_desc(cx)), 398 | Rule::RepeatMore(elem, Some((sep, SepKind::Simple))) => { 399 | format!("{}+ % {}", elem.node_desc(cx), sep.node_desc(cx)) 400 | } 401 | Rule::RepeatMore(elem, Some((sep, SepKind::Trailing))) => { 402 | format!("{}+ %% {}", elem.node_desc(cx), sep.node_desc(cx)) 403 | } 404 | } 405 | } 406 | 407 | pub fn node_shape( 408 | self, 409 | cx: &Context, 410 | named_rules: Option<&IndexMap>, 411 | ) -> NodeShape { 412 | match cx[self] { 413 | Rule::Empty | Rule::Eat(_) => NodeShape::Opaque, 414 | Rule::Call(name) => match named_rules.map(|rules| &rules[&name]) { 415 | Some(rule) if cx[rule.fields] != Fields::Leaf(None) => NodeShape::Alias(rule.rule), 416 | _ => NodeShape::Opaque, 417 | }, 418 | Rule::Concat([left, right]) => NodeShape::Split(left, right), 419 | Rule::Or(ref cases) => NodeShape::Choice(cases.len()), 420 | Rule::Opt(rule) => NodeShape::Opt(rule), 421 | Rule::RepeatMany(elem, sep) => NodeShape::Opt(cx.intern(Rule::RepeatMore(elem, sep))), 422 | Rule::RepeatMore(rule, None) => { 423 | NodeShape::Split(rule, cx.intern(Rule::RepeatMany(rule, None))) 424 | } 425 | Rule::RepeatMore(elem, Some((sep, SepKind::Simple))) => NodeShape::Split( 426 | elem, 427 | cx.intern(Rule::Opt(cx.intern(Rule::Concat([sep, self])))), 428 | ), 429 | Rule::RepeatMore(elem, Some((sep, SepKind::Trailing))) => NodeShape::Split( 430 | elem, 431 | cx.intern(Rule::Opt(cx.intern(Rule::Concat([ 432 | sep, 433 | cx.intern(Rule::RepeatMany(elem, Some((sep, SepKind::Trailing)))), 434 | ])))), 435 | ), 436 | } 437 | } 438 | 439 | fn can_be_empty( 440 | self, 441 | cache: &mut HashMap>, 442 | cx: &Context, 443 | grammar: &crate::Grammar, 444 | ) -> MaybeKnown { 445 | match cache.entry(self) { 446 | Entry::Occupied(entry) => return *entry.get(), 447 | Entry::Vacant(entry) => { 448 | entry.insert(MaybeKnown::Unknown); 449 | } 450 | }; 451 | let r = match cx[self] { 452 | Rule::Empty | Rule::Opt(_) | Rule::RepeatMany(..) => MaybeKnown::Known(true), 453 | Rule::Eat(ref pat) => pat.matches_empty(), 454 | Rule::Call(rule) => grammar.rules[&rule].rule.can_be_empty(cache, cx, grammar), 455 | Rule::Concat([left, right]) => { 456 | left.can_be_empty(cache, cx, grammar) & right.can_be_empty(cache, cx, grammar) 457 | } 458 | Rule::Or(ref rules) => rules.iter().fold(MaybeKnown::Known(false), |prev, rule| { 459 | prev | rule.can_be_empty(cache, cx, grammar) 460 | }), 461 | Rule::RepeatMore(elem, _) => elem.can_be_empty(cache, cx, grammar), 462 | }; 463 | match r { 464 | MaybeKnown::Known(_) => *cache.get_mut(&self).unwrap() = r, 465 | MaybeKnown::Unknown => { 466 | cache.remove(&self); 467 | } 468 | } 469 | r 470 | } 471 | 472 | pub(crate) fn check_non_empty_opt( 473 | self, 474 | cache: &mut HashMap>, 475 | cx: &Context, 476 | grammar: &crate::Grammar, 477 | ) { 478 | match cx[self] { 479 | Rule::Empty | Rule::Eat(_) | Rule::Call(_) => {} 480 | Rule::Concat([left, right]) => { 481 | left.check_non_empty_opt(cache, cx, grammar); 482 | right.check_non_empty_opt(cache, cx, grammar); 483 | } 484 | Rule::Or(ref rules) => { 485 | for rule in rules { 486 | rule.check_non_empty_opt(cache, cx, grammar); 487 | } 488 | } 489 | Rule::Opt(rule) => { 490 | assert_eq!( 491 | rule.can_be_empty(cache, cx, grammar), 492 | MaybeKnown::Known(false) 493 | ); 494 | rule.check_non_empty_opt(cache, cx, grammar) 495 | } 496 | Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { 497 | assert_eq!( 498 | elem.can_be_empty(cache, cx, grammar), 499 | MaybeKnown::Known(false) 500 | ); 501 | elem.check_non_empty_opt(cache, cx, grammar); 502 | if let Some((sep, _)) = sep { 503 | sep.check_non_empty_opt(cache, cx, grammar); 504 | } 505 | } 506 | } 507 | } 508 | 509 | pub(crate) fn check_call_names(self, cx: &Context, grammar: &crate::Grammar) { 510 | match cx[self] { 511 | Rule::Empty | Rule::Eat(_) => {} 512 | Rule::Call(rule) => { 513 | assert!( 514 | grammar.rules.contains_key(&rule), 515 | "no rule named `{}`", 516 | &cx[rule] 517 | ); 518 | } 519 | Rule::Concat([left, right]) => { 520 | left.check_call_names(cx, grammar); 521 | right.check_call_names(cx, grammar); 522 | } 523 | Rule::Or(ref rules) => { 524 | for rule in rules { 525 | rule.check_call_names(cx, grammar); 526 | } 527 | } 528 | Rule::Opt(rule) => rule.check_call_names(cx, grammar), 529 | Rule::RepeatMany(elem, sep) | Rule::RepeatMore(elem, sep) => { 530 | elem.check_call_names(cx, grammar); 531 | if let Some((sep, _)) = sep { 532 | sep.check_call_names(cx, grammar); 533 | } 534 | } 535 | } 536 | } 537 | } 538 | 539 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 540 | pub enum MaybeKnown { 541 | Known(T), 542 | Unknown, 543 | } 544 | 545 | impl BitOr for MaybeKnown { 546 | type Output = Self; 547 | 548 | fn bitor(self, rhs: Self) -> Self { 549 | match (self, rhs) { 550 | (MaybeKnown::Known(true), _) | (_, MaybeKnown::Known(true)) => MaybeKnown::Known(true), 551 | (MaybeKnown::Known(false), x) | (x, MaybeKnown::Known(false)) => x, 552 | (MaybeKnown::Unknown, MaybeKnown::Unknown) => MaybeKnown::Unknown, 553 | } 554 | } 555 | } 556 | 557 | impl BitAnd for MaybeKnown { 558 | type Output = Self; 559 | 560 | fn bitand(self, rhs: Self) -> Self { 561 | match (self, rhs) { 562 | (MaybeKnown::Known(false), _) | (_, MaybeKnown::Known(false)) => { 563 | MaybeKnown::Known(false) 564 | } 565 | (MaybeKnown::Known(true), x) | (x, MaybeKnown::Known(true)) => x, 566 | (MaybeKnown::Unknown, MaybeKnown::Unknown) => MaybeKnown::Unknown, 567 | } 568 | } 569 | } 570 | 571 | pub trait MatchesEmpty { 572 | fn matches_empty(&self) -> MaybeKnown; 573 | } 574 | 575 | pub trait Folder<'cx, Pat: 'cx + Eq + Hash>: Sized { 576 | fn cx(&self) -> &'cx Context; 577 | fn fold_leaf(&mut self, rule: RuleWithFields) -> RuleWithFields { 578 | rule 579 | } 580 | fn fold_concat(&mut self, left: RuleWithFields, right: RuleWithFields) -> RuleWithFields { 581 | (left.fold(self) + right.fold(self)).finish(self.cx()) 582 | } 583 | fn fold_or(&mut self, mut rules: impl Iterator) -> RuleWithFields { 584 | let first = rules.next().unwrap().fold(self); 585 | rules.fold(first, |or, rule| (or | rule.fold(self)).finish(self.cx())) 586 | } 587 | fn fold_opt(&mut self, rule: RuleWithFields) -> RuleWithFields { 588 | rule.fold(self).opt().finish(self.cx()) 589 | } 590 | fn fold_repeat_many( 591 | &mut self, 592 | elem: RuleWithFields, 593 | sep: Option<(RuleWithFields, SepKind)>, 594 | ) -> RuleWithFields { 595 | let elem = elem.fold(self); 596 | let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); 597 | match sep { 598 | None => elem.repeat_many().finish(self.cx()), 599 | Some((sep, kind)) => elem.repeat_many_sep(sep, kind).finish(self.cx()), 600 | } 601 | } 602 | fn fold_repeat_more( 603 | &mut self, 604 | elem: RuleWithFields, 605 | sep: Option<(RuleWithFields, SepKind)>, 606 | ) -> RuleWithFields { 607 | let elem = elem.fold(self); 608 | let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); 609 | match sep { 610 | None => elem.repeat_more().finish(self.cx()), 611 | Some((sep, kind)) => elem.repeat_more_sep(sep, kind).finish(self.cx()), 612 | } 613 | } 614 | } 615 | 616 | impl RuleWithFields { 617 | pub fn fold<'cx, Pat: 'cx + Eq + Hash>(self, folder: &mut impl Folder<'cx, Pat>) -> Self { 618 | let cx = folder.cx(); 619 | let aggregate_fields = match cx[self.fields] { 620 | Fields::Leaf(Some(field)) => { 621 | let mut rule = RuleWithFields { 622 | rule: self.rule, 623 | fields: field.sub, 624 | } 625 | .fold(folder); 626 | rule.fields = cx.intern(Fields::Leaf(Some(Field { 627 | name: field.name, 628 | sub: rule.fields, 629 | }))); 630 | return rule; 631 | } 632 | Fields::Leaf(None) => &[][..], 633 | Fields::Aggregate(ref children) => children, 634 | }; 635 | let field_rule = |rule, i| RuleWithFields { 636 | rule, 637 | fields: aggregate_fields 638 | .get(i) 639 | .cloned() 640 | .unwrap_or_else(|| cx.intern(Fields::Leaf(None))), 641 | }; 642 | match cx[self.rule] { 643 | Rule::Empty | Rule::Eat(_) | Rule::Call(_) => return folder.fold_leaf(self), 644 | Rule::Concat([left, right]) => { 645 | folder.fold_concat(field_rule(left, 0), field_rule(right, 1)) 646 | } 647 | Rule::Or(ref rules) => folder.fold_or( 648 | rules 649 | .iter() 650 | .enumerate() 651 | .map(|(i, &rule)| field_rule(rule, i)), 652 | ), 653 | Rule::Opt(rule) => folder.fold_opt(field_rule(rule, 0)), 654 | Rule::RepeatMany(elem, sep) => folder.fold_repeat_many( 655 | field_rule(elem, 0), 656 | sep.map(|(sep, kind)| (field_rule(sep, 1), kind)), 657 | ), 658 | Rule::RepeatMore(elem, sep) => folder.fold_repeat_more( 659 | field_rule(elem, 0), 660 | sep.map(|(sep, kind)| (field_rule(sep, 1), kind)), 661 | ), 662 | } 663 | } 664 | 665 | pub fn insert_whitespace( 666 | self, 667 | cx: &Context, 668 | whitespace: RuleWithFields, 669 | ) -> Self { 670 | assert_eq!(cx[whitespace.fields], Fields::Leaf(None)); 671 | 672 | struct WhitespaceInserter<'cx, Pat> { 673 | cx: &'cx Context, 674 | whitespace: RuleWithFields, 675 | } 676 | 677 | impl<'cx, Pat: Eq + Hash> Folder<'cx, Pat> for WhitespaceInserter<'cx, Pat> { 678 | fn cx(&self) -> &'cx Context { 679 | self.cx 680 | } 681 | // FIXME(eddyb) this will insert too many whitespace rules, 682 | // e.g. `A B? C` becomes `A WS B? WS C`, which when `B` is 683 | // missing, is `A WS WS C`. Even worse, `A? B` ends up as 684 | // `A? WS B`, which has an incorrect leading whitespace. 685 | fn fold_concat( 686 | &mut self, 687 | left: RuleWithFields, 688 | right: RuleWithFields, 689 | ) -> RuleWithFields { 690 | (left.fold(self) + self.whitespace + right.fold(self)).finish(self.cx()) 691 | } 692 | fn fold_repeat_many( 693 | &mut self, 694 | elem: RuleWithFields, 695 | sep: Option<(RuleWithFields, SepKind)>, 696 | ) -> RuleWithFields { 697 | let elem = elem.fold(self); 698 | let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); 699 | match sep { 700 | // A* => A* % WS 701 | None => elem 702 | .repeat_more_sep(self.whitespace, SepKind::Simple) 703 | .finish(self.cx), 704 | // A* % B => A* % (WS B WS) 705 | Some((sep, SepKind::Simple)) => elem 706 | .repeat_more_sep(self.whitespace + sep + self.whitespace, SepKind::Simple) 707 | .finish(self.cx), 708 | // FIXME(cad97) this will insert too many whitespace rules 709 | // A* %% B => ??? 710 | // Currently, A* %% (WS B WS), which allows trailing whitespace incorrectly 711 | Some((sep, SepKind::Trailing)) => elem 712 | .repeat_more_sep(self.whitespace + sep + self.whitespace, SepKind::Trailing) 713 | .finish(self.cx), 714 | } 715 | } 716 | fn fold_repeat_more( 717 | &mut self, 718 | elem: RuleWithFields, 719 | sep: Option<(RuleWithFields, SepKind)>, 720 | ) -> RuleWithFields { 721 | let elem = elem.fold(self); 722 | let sep = sep.map(|(sep, kind)| (sep.fold(self), kind)); 723 | match sep { 724 | // A+ => A+ % WS 725 | None => elem 726 | .repeat_more_sep(self.whitespace, SepKind::Simple) 727 | .finish(self.cx), 728 | // A+ % B => A+ % (WS B WS) 729 | Some((sep, SepKind::Simple)) => elem 730 | .fold(self) 731 | .repeat_more_sep(self.whitespace + sep + self.whitespace, SepKind::Simple) 732 | .finish(self.cx), 733 | // A+ %% B => A+ % (WS B WS) (WS B)? 734 | Some((sep, SepKind::Trailing)) => (elem 735 | .repeat_more_sep(self.whitespace + sep + self.whitespace, SepKind::Simple) 736 | + (self.whitespace + sep).opt()) 737 | .finish(self.cx), 738 | } 739 | } 740 | } 741 | 742 | self.fold(&mut WhitespaceInserter { cx, whitespace }) 743 | } 744 | } 745 | -------------------------------------------------------------------------------- /src/scannerless.rs: -------------------------------------------------------------------------------- 1 | use crate::rule::{MatchesEmpty, MaybeKnown}; 2 | use std::char; 3 | use std::fmt; 4 | use std::ops::{self, Bound, RangeBounds}; 5 | 6 | pub type Context = crate::context::Context>; 7 | 8 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 9 | pub enum Pat { 10 | String(S), 11 | Range(C, C), 12 | } 13 | 14 | impl fmt::Debug for Pat { 15 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 16 | match self { 17 | Pat::String(s) => s.fmt(f), 18 | &Pat::Range(start, end) => { 19 | if start != '\0' { 20 | start.fmt(f)?; 21 | } 22 | f.write_str("..")?; 23 | if end != char::MAX { 24 | f.write_str("=")?; 25 | end.fmt(f)?; 26 | } 27 | Ok(()) 28 | } 29 | } 30 | } 31 | } 32 | 33 | impl<'a, C> From<&'a str> for Pat<&'a str, C> { 34 | fn from(s: &'a str) -> Self { 35 | Pat::String(s) 36 | } 37 | } 38 | 39 | impl From<&str> for Pat { 40 | fn from(s: &str) -> Self { 41 | Pat::String(s.to_string()) 42 | } 43 | } 44 | 45 | impl From for Pat { 46 | fn from(s: String) -> Self { 47 | Pat::String(s) 48 | } 49 | } 50 | 51 | // HACK(eddyb) this should be generic over `RangeBounds`, 52 | // but that errors with: "upstream crates may add new impl of trait 53 | // `std::ops::RangeBounds` for type `&str` in future versions" 54 | impl<'a, S> From<(Bound<&'a char>, Bound<&'a char>)> for Pat { 55 | fn from(range: (Bound<&char>, Bound<&char>)) -> Self { 56 | let start = match range.start_bound() { 57 | Bound::Included(&c) => c, 58 | Bound::Excluded(&c) => { 59 | char::from_u32(c as u32 + 1).expect("excluded lower char bound too high") 60 | } 61 | Bound::Unbounded => '\0', 62 | }; 63 | let end = match range.end_bound() { 64 | Bound::Included(&c) => c, 65 | Bound::Excluded(&c) => { 66 | char::from_u32(c as u32 - 1).expect("excluded upper char bound too low") 67 | } 68 | Bound::Unbounded => char::MAX, 69 | }; 70 | Pat::Range(start, end) 71 | } 72 | } 73 | 74 | macro_rules! range_impls { 75 | ($($ty:ty),*) => { 76 | $(impl From<$ty> for Pat { 77 | fn from(range: $ty) -> Self { 78 | Self::from((range.start_bound(), range.end_bound())) 79 | } 80 | })* 81 | } 82 | } 83 | range_impls! { 84 | (Bound, Bound), 85 | ops::RangeTo, 86 | ops::Range, 87 | ops::RangeInclusive, 88 | ops::RangeFull, 89 | ops::RangeFrom, 90 | ops::RangeToInclusive 91 | } 92 | 93 | impl> MatchesEmpty for Pat { 94 | fn matches_empty(&self) -> MaybeKnown { 95 | MaybeKnown::Known(match self { 96 | Pat::String(s) => s.as_ref().is_empty(), 97 | Pat::Range(..) => false, 98 | }) 99 | } 100 | } 101 | --------------------------------------------------------------------------------