├── .cargo └── config.toml ├── .envrc ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── automata ├── Cargo.lock ├── Cargo.toml ├── build.rs ├── examples │ ├── matched_parentheses.rs │ ├── matched_parentheses_codegen │ │ ├── Cargo.toml │ │ ├── build.rs │ │ └── src │ │ │ ├── autogen.rs │ │ │ ├── main.rs │ │ │ └── parser.rs │ └── matched_parentheses_nondeterministic.rs ├── flake.lock ├── flake.nix └── src │ ├── check.rs │ ├── combinators.rs │ ├── ctrl.rs │ ├── curry.rs │ ├── f.rs │ ├── generalize.rs │ ├── graph.rs │ ├── in_progress.rs │ ├── input.rs │ ├── lib.rs │ ├── map_indices.rs │ ├── merge.rs │ ├── qc.rs │ ├── range.rs │ ├── range_map.rs │ ├── reindex.rs │ ├── run.rs │ ├── state.rs │ ├── test.rs │ ├── to_src.rs │ ├── transition.rs │ └── update.rs ├── build.rs ├── ci.sh ├── examples └── json │ ├── Cargo.toml │ ├── build.rs │ ├── src │ ├── main.rs │ ├── parser.rs │ └── test.rs │ └── types │ ├── Cargo.toml │ └── src │ └── lib.rs ├── flake.lock ├── flake.nix ├── rust-toolchain.toml └── src ├── lib.rs └── test.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [env] 2 | MIRIFLAGS = "-Zmiri-disable-isolation" 3 | QUICKCHECK_GENERATOR_SIZE = "16" 4 | RUST_BACKTRACE = "1" 5 | 6 | [term] 7 | color = "always" 8 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | use flake . 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | RUST_BACKTRACE: full 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Install Nix 18 | uses: cachix/install-nix-action@v22 19 | with: 20 | nix_path: nixpkgs=channel:nixos-unstable 21 | - name: CI 22 | run: ./ci.sh 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | **/target/ 3 | **/result 4 | **/examples/**/Cargo.lock 5 | **/autogen.rs 6 | **/.direnv/ 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "examples/json/JSONTestSuite"] 2 | path = examples/json/JSONTestSuite 3 | url = https://github.com/nst/JSONTestSuite.git 4 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "cfg-if" 16 | version = "1.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 19 | 20 | [[package]] 21 | name = "env_logger" 22 | version = "0.8.4" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" 25 | dependencies = [ 26 | "log", 27 | "regex", 28 | ] 29 | 30 | [[package]] 31 | name = "getrandom" 32 | version = "0.2.10" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" 35 | dependencies = [ 36 | "cfg-if", 37 | "libc", 38 | "wasi", 39 | ] 40 | 41 | [[package]] 42 | name = "inator" 43 | version = "0.3.0" 44 | dependencies = [ 45 | "inator-automata", 46 | "quickcheck", 47 | ] 48 | 49 | [[package]] 50 | name = "inator-automata" 51 | version = "0.1.0" 52 | dependencies = [ 53 | "quickcheck", 54 | "rand", 55 | ] 56 | 57 | [[package]] 58 | name = "json-inator" 59 | version = "0.1.0" 60 | dependencies = [ 61 | "inator", 62 | "types", 63 | ] 64 | 65 | [[package]] 66 | name = "libc" 67 | version = "0.2.149" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" 70 | 71 | [[package]] 72 | name = "log" 73 | version = "0.4.20" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" 76 | 77 | [[package]] 78 | name = "matched_parentheses_codegen" 79 | version = "0.1.0" 80 | dependencies = [ 81 | "inator-automata", 82 | "rand", 83 | ] 84 | 85 | [[package]] 86 | name = "memchr" 87 | version = "2.6.4" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" 90 | 91 | [[package]] 92 | name = "ppv-lite86" 93 | version = "0.2.17" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 96 | 97 | [[package]] 98 | name = "quickcheck" 99 | version = "1.0.3" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" 102 | dependencies = [ 103 | "env_logger", 104 | "log", 105 | "rand", 106 | ] 107 | 108 | [[package]] 109 | name = "rand" 110 | version = "0.8.5" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 113 | dependencies = [ 114 | "libc", 115 | "rand_chacha", 116 | "rand_core", 117 | ] 118 | 119 | [[package]] 120 | name = "rand_chacha" 121 | version = "0.3.1" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 124 | dependencies = [ 125 | "ppv-lite86", 126 | "rand_core", 127 | ] 128 | 129 | [[package]] 130 | name = "rand_core" 131 | version = "0.6.4" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 134 | dependencies = [ 135 | "getrandom", 136 | ] 137 | 138 | [[package]] 139 | name = "regex" 140 | version = "1.10.2" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" 143 | dependencies = [ 144 | "aho-corasick", 145 | "memchr", 146 | "regex-automata", 147 | "regex-syntax", 148 | ] 149 | 150 | [[package]] 151 | name = "regex-automata" 152 | version = "0.4.3" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" 155 | dependencies = [ 156 | "aho-corasick", 157 | "memchr", 158 | "regex-syntax", 159 | ] 160 | 161 | [[package]] 162 | name = "regex-syntax" 163 | version = "0.8.2" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 166 | 167 | [[package]] 168 | name = "types" 169 | version = "0.1.0" 170 | dependencies = [ 171 | "inator", 172 | ] 173 | 174 | [[package]] 175 | name = "wasi" 176 | version = "0.11.0+wasi-snapshot-preview1" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 179 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "inator" 3 | authors = ["Will Sturgeon "] 4 | version = "0.3.0" 5 | edition = "2021" 6 | description = "An evil parsing library." 7 | license = "MPL-2.0" 8 | repository = "https://github.com/wrsturgeon/inator" 9 | build = "build.rs" 10 | 11 | [workspace] 12 | members = [ 13 | "automata", 14 | "automata/examples/matched_parentheses_codegen", 15 | "examples/json", 16 | ] 17 | 18 | [dependencies] 19 | inator-automata = { path = "automata" } 20 | 21 | # Optional dependencies: 22 | quickcheck = { version = "1.0.3", optional = true } 23 | 24 | [features] 25 | quickcheck = ["dep:quickcheck", "inator-automata/quickcheck"] 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | 3 | 1. Definitions 4 | 5 | 1.1. "Contributor" means each individual or legal entity that creates, contributes 6 | to the creation of, or owns Covered Software. 7 | 8 | 1.2. "Contributor Version" means the combination of the Contributions of others 9 | (if any) used by a Contributor and that particular Contributor's Contribution. 10 | 11 | 1.3. "Contribution" means Covered Software of a particular Contributor. 12 | 13 | 1.4. "Covered Software" means Source Code Form to which the initial Contributor 14 | has attached the notice in Exhibit A, the Executable Form of such Source Code 15 | Form, and Modifications of such Source Code Form, in each case including portions 16 | thereof. 17 | 18 | 1.5. "Incompatible With Secondary Licenses" means 19 | 20 | (a) that the initial Contributor has attached the notice described in Exhibit 21 | B to the Covered Software; or 22 | 23 | (b) that the Covered Software was made available under the terms of version 24 | 1.1 or earlier of the License, but not also under the terms of a Secondary 25 | License. 26 | 27 | 1.6. "Executable Form" means any form of the work other than Source Code Form. 28 | 29 | 1.7. "Larger Work" means a work that combines Covered Software with other 30 | material, in a separate file or files, that is not Covered Software. 31 | 32 | 1.8. "License" means this document. 33 | 34 | 1.9. "Licensable" means having the right to grant, to the maximum extent possible, 35 | whether at the time of the initial grant or subsequently, any and all of the 36 | rights conveyed by this License. 37 | 38 | 1.10. "Modifications" means any of the following: 39 | 40 | (a) any file in Source Code Form that results from an addition to, deletion 41 | from, or modification of the contents of Covered Software; or 42 | 43 | (b) any new file in Source Code Form that contains any Covered Software. 44 | 45 | 1.11. "Patent Claims" of a Contributor means any patent claim(s), including 46 | without limitation, method, process, and apparatus claims, in any patent Licensable 47 | by such Contributor that would be infringed, but for the grant of the License, 48 | by the making, using, selling, offering for sale, having made, import, or 49 | transfer of either its Contributions or its Contributor Version. 50 | 51 | 1.12. "Secondary License" means either the GNU General Public License, Version 52 | 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General 53 | Public License, Version 3.0, or any later versions of those licenses. 54 | 55 | 1.13. "Source Code Form" means the form of the work preferred for making modifications. 56 | 57 | 1.14. "You" (or "Your") means an individual or a legal entity exercising rights 58 | under this License. For legal entities, "You" includes any entity that controls, 59 | is controlled by, or is under common control with You. For purposes of this 60 | definition, "control" means (a) the power, direct or indirect, to cause the 61 | direction or management of such entity, whether by contract or otherwise, 62 | or (b) ownership of more than fifty percent (50%) of the outstanding shares 63 | or beneficial ownership of such entity. 64 | 65 | 2. License Grants and Conditions 66 | 67 | 2.1. Grants 68 | Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive 69 | license: 70 | 71 | (a) under intellectual property rights (other than patent or trademark) Licensable 72 | by such Contributor to use, reproduce, make available, modify, display, perform, 73 | distribute, and otherwise exploit its Contributions, either on an unmodified 74 | basis, with Modifications, or as part of a Larger Work; and 75 | 76 | (b) under Patent Claims of such Contributor to make, use, sell, offer for 77 | sale, have made, import, and otherwise transfer either its Contributions or 78 | its Contributor Version. 79 | 80 | 2.2. Effective Date 81 | The licenses granted in Section 2.1 with respect to any Contribution become 82 | effective for each Contribution on the date the Contributor first distributes 83 | such Contribution. 84 | 85 | 2.3. Limitations on Grant Scope 86 | The licenses granted in this Section 2 are the only rights granted under this 87 | License. No additional rights or licenses will be implied from the distribution 88 | or licensing of Covered Software under this License. Notwithstanding Section 89 | 2.1(b) above, no patent license is granted by a Contributor: 90 | 91 | (a) for any code that a Contributor has removed from Covered Software; or 92 | 93 | (b) for infringements caused by: (i) Your and any other third party's modifications 94 | of Covered Software, or (ii) the combination of its Contributions with other 95 | software (except as part of its Contributor Version); or 96 | 97 | (c) under Patent Claims infringed by Covered Software in the absence of its 98 | Contributions. 99 | 100 | This License does not grant any rights in the trademarks, service marks, or 101 | logos of any Contributor (except as may be necessary to comply with the notice 102 | requirements in Section 3.4). 103 | 104 | 2.4. Subsequent Licenses 105 | No Contributor makes additional grants as a result of Your choice to distribute 106 | the Covered Software under a subsequent version of this License (see Section 107 | 10.2) or under the terms of a Secondary License (if permitted under the terms 108 | of Section 3.3). 109 | 110 | 2.5. Representation 111 | Each Contributor represents that the Contributor believes its Contributions 112 | are its original creation(s) or it has sufficient rights to grant the rights 113 | to its Contributions conveyed by this License. 114 | 115 | 2.6. Fair Use 116 | This License is not intended to limit any rights You have under applicable 117 | copyright doctrines of fair use, fair dealing, or other equivalents. 118 | 119 | 2.7. Conditions 120 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in 121 | Section 2.1. 122 | 123 | 3. Responsibilities 124 | 125 | 3.1. Distribution of Source Form 126 | All distribution of Covered Software in Source Code Form, including any Modifications 127 | that You create or to which You contribute, must be under the terms of this 128 | License. You must inform recipients that the Source Code Form of the Covered 129 | Software is governed by the terms of this License, and how they can obtain 130 | a copy of this License. You may not attempt to alter or restrict the recipients' 131 | rights in the Source Code Form. 132 | 133 | 3.2. Distribution of Executable Form 134 | If You distribute Covered Software in Executable Form then: 135 | 136 | (a) such Covered Software must also be made available in Source Code Form, 137 | as described in Section 3.1, and You must inform recipients of the Executable 138 | Form how they can obtain a copy of such Source Code Form by reasonable means 139 | in a timely manner, at a charge no more than the cost of distribution to the 140 | recipient; and 141 | 142 | (b) You may distribute such Executable Form under the terms of this License, 143 | or sublicense it under different terms, provided that the license for the 144 | Executable Form does not attempt to limit or alter the recipients' rights 145 | in the Source Code Form under this License. 146 | 147 | 3.3. Distribution of a Larger Work 148 | You may create and distribute a Larger Work under terms of Your choice, provided 149 | that You also comply with the requirements of this License for the Covered 150 | Software. If the Larger Work is a combination of Covered Software with a work 151 | governed by one or more Secondary Licenses, and the Covered Software is not 152 | Incompatible With Secondary Licenses, this License permits You to additionally 153 | distribute such Covered Software under the terms of such Secondary License(s), 154 | so that the recipient of the Larger Work may, at their option, further distribute 155 | the Covered Software under the terms of either this License or such Secondary 156 | License(s). 157 | 158 | 3.4. Notices 159 | You may not remove or alter the substance of any license notices (including 160 | copyright notices, patent notices, disclaimers of warranty, or limitations 161 | of liability) contained within the Source Code Form of the Covered Software, 162 | except that You may alter any license notices to the extent required to remedy 163 | known factual inaccuracies. 164 | 165 | 3.5. Application of Additional Terms 166 | You may choose to offer, and to charge a fee for, warranty, support, indemnity 167 | or liability obligations to one or more recipients of Covered Software. However, 168 | You may do so only on Your own behalf, and not on behalf of any Contributor. 169 | You must make it absolutely clear that any such warranty, support, indemnity, 170 | or liability obligation is offered by You alone, and You hereby agree to indemnify 171 | every Contributor for any liability incurred by such Contributor as a result 172 | of warranty, support, indemnity or liability terms You offer. You may include 173 | additional disclaimers of warranty and limitations of liability specific to 174 | any jurisdiction. 175 | 176 | 4. Inability to Comply Due to Statute or Regulation 177 | If it is impossible for You to comply with any of the terms of this License 178 | with respect to some or all of the Covered Software due to statute, judicial 179 | order, or regulation then You must: (a) comply with the terms of this License 180 | to the maximum extent possible; and (b) describe the limitations and the code 181 | they affect. Such description must be placed in a text file included with 182 | all distributions of the Covered Software under this License. Except to the 183 | extent prohibited by statute or regulation, such description must be sufficiently 184 | detailed for a recipient of ordinary skill to be able to understand it. 185 | 186 | 5. Termination 187 | 188 | 5.1. The rights granted under this License will terminate automatically if 189 | You fail to comply with any of its terms. However, if You become compliant, 190 | then the rights granted under this License from a particular Contributor are 191 | reinstated (a) provisionally, unless and until such Contributor explicitly 192 | and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor 193 | fails to notify You of the non-compliance by some reasonable means prior to 194 | 60 days after You have come back into compliance. Moreover, Your grants from 195 | a particular Contributor are reinstated on an ongoing basis if such Contributor 196 | notifies You of the non-compliance by some reasonable means, this is the first 197 | time You have received notice of non-compliance with this License from such 198 | Contributor, and You become compliant prior to 30 days after Your receipt 199 | of the notice. 200 | 201 | 5.2. If You initiate litigation against any entity by asserting a patent infringement 202 | claim (excluding declaratory judgment actions, counter-claims, and cross-claims) 203 | alleging that a Contributor Version directly or indirectly infringes any patent, 204 | then the rights granted to You by any and all Contributors for the Covered 205 | Software under Section 2.1 of this License shall terminate. 206 | 207 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end 208 | user license agreements (excluding distributors and resellers) which have 209 | been validly granted by You or Your distributors under this License prior 210 | to termination shall survive termination. 211 | 212 | 6. Disclaimer of Warranty 213 | Covered Software is provided under this License on an "as is" basis, without 214 | warranty of any kind, either expressed, implied, or statutory, including, 215 | without limitation, warranties that the Covered Software is free of defects, 216 | merchantable, fit for a particular purpose or non-infringing. The entire risk 217 | as to the quality and performance of the Covered Software is with You. Should 218 | any Covered Software prove defective in any respect, You (not any Contributor) 219 | assume the cost of any necessary servicing, repair, or correction. This disclaimer 220 | of warranty constitutes an essential part of this License. No use of any Covered 221 | Software is authorized under this License except under this disclaimer. 222 | 223 | 7. Limitation of Liability 224 | Under no circumstances and under no legal theory, whether tort (including 225 | negligence), contract, or otherwise, shall any Contributor, or anyone who 226 | distributes Covered Software as permitted above, be liable to You for any 227 | direct, indirect, special, incidental, or consequential damages of any character 228 | including, without limitation, damages for lost profits, loss of goodwill, 229 | work stoppage, computer failure or malfunction, or any and all other commercial 230 | damages or losses, even if such party shall have been informed of the possibility 231 | of such damages. This limitation of liability shall not apply to liability 232 | for death or personal injury resulting from such party's negligence to the 233 | extent applicable law prohibits such limitation. Some jurisdictions do not 234 | allow the exclusion or limitation of incidental or consequential damages, 235 | so this exclusion and limitation may not apply to You. 236 | 237 | 8. Litigation 238 | Any litigation relating to this License may be brought only in the courts 239 | of a jurisdiction where the defendant maintains its principal place of business 240 | and such litigation shall be governed by laws of that jurisdiction, without 241 | reference to its conflict-of-law provisions. Nothing in this Section shall 242 | prevent a party's ability to bring cross-claims or counter-claims. 243 | 244 | 9. Miscellaneous 245 | This License represents the complete agreement concerning the subject matter 246 | hereof. If any provision of this License is held to be unenforceable, such 247 | provision shall be reformed only to the extent necessary to make it enforceable. 248 | Any law or regulation which provides that the language of a contract shall 249 | be construed against the drafter shall not be used to construe this License 250 | against a Contributor. 251 | 252 | 10. Versions of the License 253 | 254 | 10.1. New Versions 255 | Mozilla Foundation is the license steward. Except as provided in Section 10.3, 256 | no one other than the license steward has the right to modify or publish new 257 | versions of this License. Each version will be given a distinguishing version 258 | number. 259 | 260 | 10.2. Effect of New Versions 261 | You may distribute the Covered Software under the terms of the version of 262 | the License under which You originally received the Covered Software, or under 263 | the terms of any subsequent version published by the license steward. 264 | 265 | 10.3. Modified Versions 266 | If you create software not governed by this License, and you want to create 267 | a new license for such software, you may create and use a modified version 268 | of this License if you rename the license and remove any references to the 269 | name of the license steward (except to note that such modified license differs 270 | from this License). 271 | 272 | 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses 273 | If You choose to distribute Source Code Form that is Incompatible With Secondary 274 | Licenses under the terms of this version of the License, the notice described 275 | in Exhibit B of this License must be attached. 276 | 277 | Exhibit A - Source Code Form License Notice 278 | 279 | This Source Code Form is subject to the terms of the Mozilla Public License, 280 | v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain 281 | one at http://mozilla.org/MPL/2.0/. 282 | 283 | If it is not possible or desirable to put the notice in a particular file, 284 | then You may include the notice in a location (such as a LICENSE file in a 285 | relevant directory) where a recipient would be likely to look for such a notice. 286 | 287 | You may add additional accurate notices of copyright ownership. 288 | 289 | Exhibit B - "Incompatible With Secondary Licenses" Notice 290 | 291 | This Source Code Form is "Incompatible With Secondary Licenses", as defined 292 | by the Mozilla Public License, v. 2.0. 293 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `inator`: An evil parsing library. 2 | ## You supply the evil plan; we build the _**inator**_. 3 | 4 | ![Portrait of the eminent Dr. Heinz Doofenshmirtz](http://images6.fanpop.com/image/polls/1198000/1198459_1364687083851_full.jpg) 5 | 6 | 🚧 Development still ongoing! 🚧 7 | 8 | ## TL;DR 9 | 10 | We ask for a specification and turn it into a graph that knows exactly what can happen at each step. 11 | We then ruthlessly cut things that won't happen, combine identical ones, and output the result as a Rust file. 12 | Compile this with the rest of your code and, _voila!_, you've got a hand-rolled zero-copy parser. 13 | 14 | ## The long explanation 15 | 16 | Given a language like "`AAAAAAAAB` or `AAAAAAAAC`", we'd like to write a parser that takes the common-sense route: 17 | blow through all the `A`s as a group, _then_ match on the `B` or `C` once. 18 | This has several advantages: 19 | - We never rewind the tape, so we don't need to store any previous input. 20 | - In the limit, it's twice as fast as trying one then the other. With _n_ alternatives for the last character, it's _n_ times faster. 21 | This general idea goes by the name _zero-copy streaming parsers_, and the pleasure of constructing them usually ranks similarly to being repeatedly stabbed. 22 | 23 | Yet, as far as I know, no existing parsing library tries to optimize these cases at compile time, and for good reason: it's _hard_. 24 | 25 | The problem is that, the more we can do with a parser, the less we can say about them before they run. 26 | I've tried to strike a balance between the two with a model of computation similar to [pushdown automata](https://en.wikipedia.org/wiki/Pushdown_automaton) that maps nicely onto stack-based bare-metal code. Here's the definition, with overloaded terms _italicized_: 27 | - A _parser_ is a _graph_. 28 | - A _graph_ is a set of _states_ with an initial _index_. 29 | - An _index_ is implementation-defined (literally, by a Rust trait), but it's usually either an natural number or a set thereof, 30 | corresponding to deterministic or nondeterministic evaluation, respectively. 31 | - A _state_ is a set of _curried transitions_ and a (possibly empty) set of error messages; if the input stream ends on a state with no error messages, we accept the input. 32 | - A _curried transition_ can take two forms: 33 | - Accept any input token and trigger one _transition_ for them all, or 34 | - Map a set of disjoint input ranges to potentially different transitions for each, and have an optional fallback transition if no ranges match. 35 | Note that we cannot inspect the stack before choosing a transition. 36 | - A _transition_ is one of three things: 37 | - _Lateral_: Don't touch the stack; just move to a new state index. 38 | - _Return_: Pop from the stack (reject if the stack is empty) and move to the state index we just popped. 39 | Note that this is exactly how Rust's actual call stack works in assembly. 40 | Also note that we can't move to a _specific_ state with a return/pop statement; this would entail returning a function pointer at runtime, which is pointlessly (get it?) slow. 41 | - _Call_: Push a specified _destination index_ onto the stack and move to a specified _detour index_. 42 | Note that the detour index needs to have a return/pop statement, at which it will move to the specified destination index, 43 | but (as of yet) we don't check that a return statement is reachable from any given detour index. 44 | You may have noticed that the structure of states and transitions maps remarkably well onto functions and calls: 45 | - States are functions; 46 | - Lateral transitions are [tail calls](https://en.wikipedia.org/wiki/Tail_call); 47 | - Return transitions are actual `return` statements; and 48 | - Calls are function calls that are not at the end of a block of execution (if they were, they would be lateral transitions). 49 | And this is exactly how we [~~trans~~compile](https://hisham.hm/2021/02/25/compiler-versus-transpiler-what-is-a-compiler-anyway/) it. 50 | 51 | Lastly, on top of this graph, parsers _output data_. We can't prove anything about the values you compute along the way—it's essentially having one model of computation (Rust) riding another (automata)—but, in practice, being able to have your parsers _output_ something (e.g., an abstract syntax tree) without having to run a lexer first is invaluable. 52 | 53 | The [`automata` directory](automata/) in this repository contains both an interpreter and a compiler for this language, and I remain extremely confident that their operation is always equivalent, but property-testing involving compiling Rust code is extremely difficult. In the future, I would like to prove their equivalence in [Coq](https://github.com/coq/coq) and [extract](https://softwarefoundations.cis.upenn.edu/lf-current/Extraction.html) the proof into Haskell and OCaml versions of this library. Way in the future! 🔮 54 | 55 | ## What does this whole process look like? 56 | 57 | Surprisingly, it looks a lot like just writing down what you want. 58 | The key idea is that ***parsers are data***, and you can pass them around, modify them, and combine them just like anything else. 59 | 60 | Here's how we parse either "abc" or "azc": 61 | ```rust 62 | use inator::toss; // Combinator that filters for a certain character, then forgets it. 63 | let a = toss('a'); 64 | let b = toss('b'); 65 | let c = toss('c'); 66 | let z = toss('z'); 67 | let abc_azc = a >> (b | z) >> c; 68 | ``` 69 | The above gets compiled down a function that takes an iterator, 70 | - checks that the first item is `a`; 71 | - runs a `match` statement on the second item without rewinding, sending both `b` and `z` to the same third state; 72 | - checks that the third item is `c`; then 73 | - checks that the next item is `None`. 74 | It's not remarkable when spelled out, but most simple parsers would allocate a buffer, read input into the buffer, rewind to the top, try `abc`, rewind, try `abz`, then reject. 75 | By the time a similar parser has even allocated its buffer, let alone read the input, our parser is almost surely already done. 76 | Plus, this approach to parsing requires zero allocations, so even microcontrollers running bare-metal code couldn't get any faster if you tried. 77 | 78 | Then we can take that whole above parser and pass it around, e.g. to put it in parentheses: 79 | ```rust 80 | // Copied from above: 81 | use inator::toss; 82 | let a = toss('a'); 83 | let b = toss('b'); 84 | let c = toss('c'); 85 | let z = toss('z'); 86 | let abc_azc = a >> (b | z) >> c; 87 | 88 | // Function from a parser to another parser! 89 | let parenthesized = |p| toss('(') >> p >> toss(')'); 90 | 91 | let paren_abc_azc = parenthesized( 92 | abc_azc, // <-- Parser we just made above, passed around as data 93 | ); 94 | ``` 95 | Above, if `p` is a parser that accepts `ABC`, then `parenthesized(p)` will accept `(ABC)`, and so on for any language other than `ABC`. Simple as that. 96 | 97 | If you need to _nest_ parentheses (or any other delimiters) and verify that everything matches up, there's a built-in function for that. `region` takes five arguments: 98 | - `name`, a `&'static string` describing the region (e.g. in error messages); 99 | - `open`, a parser that opens the region (here, it would be `toss('(')`); 100 | - `contents`, the parser that runs inside the region; 101 | - `close`, a parser that closes the region (here, it would be `toss(')')`); and 102 | - `combine`, which is a bit more complicated. 103 | - Every parser returns a value, but after a call, we have two: what we had before, and the return value from the call. 104 | You can combine these two values in any way you'd like, including by throwing one or the other out. 105 | 106 | ## Anything else cool you can do? 107 | 108 | Yes! Since we're really just riding on top of a decision-problem automaton, I'm working on (but confident about) taking a specification and inverting it to fuzz with an infinite stream of strings that are all guaranteed to be parsed correctly. 109 | If you're writing a language, this means _automatically generating all possible valid source files_, which would be huge. After the redesign of automata above, this is back in the to-do pile. 110 | 111 | ## Why not other parsing libraries? 112 | 113 | Please try other parsing libraries! My primary goal was a personal tool, but it turned out much better than I expected, so please put it to good use and send feedback! 114 | 115 | ## Acknowledgments 116 | 117 | Haskell's parsing libraries (and UPenn's Haskell course), for showing me that parsers can even work this way. 118 | 119 | Rajeev Alur (and UPenn's CIS 262), for formally introducing me to nondeterministic finite automata. 120 | 121 | Rust, for making this possible. 122 | -------------------------------------------------------------------------------- /automata/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "cfg-if" 16 | version = "1.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 19 | 20 | [[package]] 21 | name = "env_logger" 22 | version = "0.8.4" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" 25 | dependencies = [ 26 | "log", 27 | "regex", 28 | ] 29 | 30 | [[package]] 31 | name = "getrandom" 32 | version = "0.2.10" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" 35 | dependencies = [ 36 | "cfg-if", 37 | "libc", 38 | "wasi", 39 | ] 40 | 41 | [[package]] 42 | name = "inator-automata" 43 | version = "0.1.0" 44 | dependencies = [ 45 | "quickcheck", 46 | "rand", 47 | ] 48 | 49 | [[package]] 50 | name = "libc" 51 | version = "0.2.149" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" 54 | 55 | [[package]] 56 | name = "log" 57 | version = "0.4.20" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" 60 | 61 | [[package]] 62 | name = "memchr" 63 | version = "2.6.4" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" 66 | 67 | [[package]] 68 | name = "ppv-lite86" 69 | version = "0.2.17" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 72 | 73 | [[package]] 74 | name = "quickcheck" 75 | version = "1.0.3" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" 78 | dependencies = [ 79 | "env_logger", 80 | "log", 81 | "rand", 82 | ] 83 | 84 | [[package]] 85 | name = "rand" 86 | version = "0.8.5" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 89 | dependencies = [ 90 | "libc", 91 | "rand_chacha", 92 | "rand_core", 93 | ] 94 | 95 | [[package]] 96 | name = "rand_chacha" 97 | version = "0.3.1" 98 | source = "registry+https://github.com/rust-lang/crates.io-index" 99 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 100 | dependencies = [ 101 | "ppv-lite86", 102 | "rand_core", 103 | ] 104 | 105 | [[package]] 106 | name = "rand_core" 107 | version = "0.6.4" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 110 | dependencies = [ 111 | "getrandom", 112 | ] 113 | 114 | [[package]] 115 | name = "regex" 116 | version = "1.10.2" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" 119 | dependencies = [ 120 | "aho-corasick", 121 | "memchr", 122 | "regex-automata", 123 | "regex-syntax", 124 | ] 125 | 126 | [[package]] 127 | name = "regex-automata" 128 | version = "0.4.3" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" 131 | dependencies = [ 132 | "aho-corasick", 133 | "memchr", 134 | "regex-syntax", 135 | ] 136 | 137 | [[package]] 138 | name = "regex-syntax" 139 | version = "0.8.2" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 142 | 143 | [[package]] 144 | name = "wasi" 145 | version = "0.11.0+wasi-snapshot-preview1" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 148 | -------------------------------------------------------------------------------- /automata/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "inator-automata" 3 | authors = ["Will Sturgeon "] 4 | version = "0.1.0" 5 | edition = "2021" 6 | description = "Automata loosely based on visibly pushdown automata." 7 | license = "MPL-2.0" 8 | repository = "https://github.com/wrsturgeon/inator" 9 | build = "build.rs" 10 | 11 | [dependencies] 12 | # none 13 | 14 | # Optional dependencies: 15 | quickcheck = { version = "1.0.3", optional = true } 16 | 17 | [dev-dependencies] 18 | rand = "0.8.5" 19 | 20 | [features] 21 | quickcheck = ["dep:quickcheck"] 22 | -------------------------------------------------------------------------------- /automata/build.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Basic CI checks that would be a pain in the ass to write with a shell. 8 | 9 | const MPL_HEADER: &[u8] = b"/*\n * This Source Code Form is subject to the terms of the Mozilla Public\n * License, v. 2.0. If a copy of the MPL was not distributed with this\n * file, You can obtain one at https://mozilla.org/MPL/2.0/.\n */\n\n"; 10 | 11 | fn main() -> std::io::Result<()> { 12 | check(std::path::Path::new(r"build.rs"))?; 13 | check(std::path::Path::new(r"src"))?; 14 | Ok(()) 15 | } 16 | 17 | fn check(file: &std::path::Path) -> std::io::Result<()> { 18 | if file.is_dir() { 19 | for f in std::fs::read_dir(file)? { 20 | check(&f?.path())? 21 | } 22 | Ok(()) 23 | } else { 24 | let mut read = 25 | std::io::BufReader::with_capacity(MPL_HEADER.len(), std::fs::File::open(file)?); 26 | if std::io::BufRead::fill_buf(&mut read)? == MPL_HEADER { 27 | Ok(()) 28 | } else { 29 | panic!("{file:?} is missing the verbatim MPL comment (must start at the very first character, and must be followed by a newline). Please copy and paste it from any other file.") 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses.rs: -------------------------------------------------------------------------------- 1 | use inator_automata::{dyck_d, Run}; 2 | use rand::{thread_rng, RngCore}; 3 | 4 | /// Generate test cases (has nothing to do with automata!). 5 | fn generate(rng: &mut R, fuel: u8) -> String { 6 | let Some(depleted) = fuel.checked_sub(1) else { 7 | return String::new(); 8 | }; 9 | let f: [fn(&mut R, u8) -> String; 3] = [ 10 | |_, _| String::new(), 11 | |r, d| "(".to_owned() + &generate(r, d) + ")", 12 | |r, d| generate(r, d >> 1) + &generate(r, d >> 1), 13 | ]; 14 | f[(rng.next_u32() % 3) as usize](rng, depleted) 15 | } 16 | 17 | /// Check if this string consists of matched parentheses. 18 | fn accept>(iter: I) -> bool { 19 | let mut i: usize = 0; 20 | for c in iter { 21 | i = match c { 22 | '(' => i + 1, 23 | ')' => { 24 | if let Some(pred) = i.checked_sub(1) { 25 | pred 26 | } else { 27 | return false; 28 | } 29 | } 30 | _ => unreachable!(), 31 | } 32 | } 33 | i == 0 34 | } 35 | 36 | /// Output a jumble of parentheses with a very low chance of being valid. 37 | fn shitpost(rng: &mut R) -> String { 38 | let mut s = String::new(); 39 | loop { 40 | let i = rng.next_u32(); 41 | if i & 2 == 0 { 42 | return s; 43 | } 44 | s.push(if i & 1 == 0 { '(' } else { ')' }); 45 | } 46 | } 47 | 48 | pub fn main() { 49 | let parser = dyck_d(); 50 | 51 | let mut rng = thread_rng(); 52 | 53 | // Accept all valid strings 54 | for _ in 0..10 { 55 | let s = generate(&mut rng, 32); 56 | println!(); 57 | println!("\"{s}\""); 58 | let mut run = s.chars().run(&parser); 59 | println!(" {run:?}"); 60 | while let Some(r) = run.next() { 61 | let Ok(c) = r else { panic!("{r:?}") }; 62 | println!("{c:?} {run:?}"); 63 | } 64 | } 65 | 66 | // Reject all invalid strings 67 | 'examples: for _ in 0..10 { 68 | let s = shitpost(&mut rng); 69 | println!(); 70 | println!("\"{s}\""); 71 | let mut run = s.chars().run(&parser); 72 | println!(" {run:?}"); 73 | while let Some(r) = run.next() { 74 | let Ok(c) = r else { 75 | assert!(!accept(s.chars())); 76 | continue 'examples; 77 | }; 78 | println!("{c:?} {run:?}"); 79 | } 80 | assert!(accept(s.chars())); 81 | } 82 | 83 | // Print the Rust source representation of this parser 84 | println!("{}", parser.to_src().unwrap()); 85 | } 86 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses_codegen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "matched_parentheses_codegen" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rand = "0.8.5" 8 | 9 | [build-dependencies] 10 | inator-automata = { path = "../.." } 11 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses_codegen/build.rs: -------------------------------------------------------------------------------- 1 | use inator_automata::{dyck_d, IllFormed}; 2 | use std::io; 3 | 4 | pub fn main() -> Result, IllFormed> { 5 | // Very manually constructed parser recognizing only valid parentheses. 6 | dyck_d().to_file("src/parser.rs") 7 | } 8 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses_codegen/src/autogen.rs: -------------------------------------------------------------------------------- 1 | //! Automatically generated with [inator](https://crates.io/crates/inator). 2 | 3 | #![allow(dead_code, unused_variables)] 4 | 5 | /// Descriptive parsing error. 6 | #[allow(dead_code)] 7 | #[derive(Clone, Debug, PartialEq)] 8 | pub enum Error { 9 | /// Token without any relevant rule. 10 | Absurd { 11 | /// Index of the token that caused this error. 12 | index: usize, 13 | /// Particular token that didn't correspond to a rule. 14 | token: char, 15 | }, 16 | /// Token that would have closed a delimiter, but the delimiter wasn't open. 17 | Unopened { 18 | /// What was actually open, if anything, and the index of the token that opened it. 19 | what_was_open: Option<(&'static str, usize)>, 20 | /// Index of the token that caused this error. 21 | index: usize, 22 | }, 23 | /// After parsing all input, a delimiter remains open (e.g. "(a, b, c"). 24 | Unclosed { 25 | /// Region (user-defined name) that was not closed. Sensible to be e.g. "parentheses" for `(...)`. 26 | region: &'static str, 27 | /// Index at which the delimiter was opened (e.g., for parentheses, the index of the relevant '('). 28 | opened: usize, 29 | }, 30 | /// Ended on a user-defined non-accepting state. 31 | UserDefined { 32 | /// User-defined error message. 33 | messages: &'static [&'static str], 34 | }, 35 | } 36 | 37 | type R = Result<(Option<(usize, Option>)>, ()), Error>; 38 | 39 | #[repr(transparent)] 40 | struct F(fn(&mut I, ()) -> R); 41 | 42 | #[inline] 43 | pub fn parse>(input: I) -> Result<(), Error> { 44 | state_0(&mut input.into_iter().enumerate(), (), None) 45 | } 46 | 47 | #[inline] 48 | fn state_0>( 49 | input: &mut I, 50 | acc: (), 51 | stack_top: Option<(&'static str, usize)>, 52 | ) -> Result<(), Error> { 53 | match input.next() { 54 | None => stack_top.map_or(Ok(acc), |(region, opened)| { 55 | Err(Error::Unclosed { region, opened }) 56 | }), 57 | Some((index, token)) => match token { 58 | '('..='(' => { 59 | let detour = state_0(input, (), Some(("parentheses", index)))?; 60 | let postprocessed = (|(), ()| ())(acc, detour); 61 | state_0(input, acc, stack_top) 62 | } 63 | ')'..=')' => match stack_top { 64 | Some((region, _)) if region == "parentheses" => Ok(acc), 65 | _ => Err(Error::Unopened { 66 | what_was_open: stack_top, 67 | index, 68 | }), 69 | }, 70 | _ => Err(Error::Absurd { index, token }), 71 | }, 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses_codegen/src/main.rs: -------------------------------------------------------------------------------- 1 | #![allow(unreachable_code, unused_variables)] 2 | 3 | mod parser; 4 | 5 | use rand::{thread_rng, RngCore}; 6 | 7 | /// Generate test cases (has nothing to do with automata!). 8 | fn generate(rng: &mut R, fuel: u8) -> String { 9 | let Some(depleted) = fuel.checked_sub(1) else { 10 | return String::new(); 11 | }; 12 | let f: [fn(&mut R, u8) -> String; 3] = [ 13 | |_, _| String::new(), 14 | |r, d| "(".to_owned() + &generate(r, d) + ")", 15 | |r, d| generate(r, d >> 1) + &generate(r, d >> 1), 16 | ]; 17 | f[(rng.next_u32() % 3) as usize](rng, depleted) 18 | } 19 | 20 | /// Check if this string consists of matched parentheses. 21 | fn accept>(iter: I) -> bool { 22 | let mut i: usize = 0; 23 | for c in iter { 24 | i = match c { 25 | '(' => i + 1, 26 | ')' => { 27 | if let Some(pred) = i.checked_sub(1) { 28 | pred 29 | } else { 30 | return false; 31 | } 32 | } 33 | _ => unreachable!(), 34 | } 35 | } 36 | i == 0 37 | } 38 | 39 | /// Output a jumble of parentheses with a very low chance of being valid. 40 | fn shitpost(rng: &mut R) -> String { 41 | let mut s = String::new(); 42 | loop { 43 | let i = rng.next_u32(); 44 | if i & 2 == 0 { 45 | return s; 46 | } 47 | s.push(if i & 1 == 0 { '(' } else { ')' }); 48 | } 49 | } 50 | 51 | fn main() { 52 | let mut rng = thread_rng(); 53 | 54 | // Accept all valid strings 55 | for _ in 0..50 { 56 | let s = generate(&mut rng, 32); 57 | println!("\"{s}\""); 58 | assert_eq!(parser::parse(s.chars()), Ok(())); 59 | } 60 | 61 | // Reject all invalid strings 62 | for _ in 0..50 { 63 | let s = shitpost(&mut rng); 64 | println!("\"{s}\""); 65 | if accept(s.chars()) { 66 | assert_eq!(parser::parse(s.chars()), Ok(())); 67 | } else { 68 | assert!(parser::parse(s.chars()).is_err()); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses_codegen/src/parser.rs: -------------------------------------------------------------------------------- 1 | //! Automatically generated with [inator](https://crates.io/crates/inator). 2 | 3 | #![allow(dead_code, unused_variables)] 4 | 5 | /// Descriptive parsing error. 6 | #[allow(dead_code)] 7 | #[derive(Clone, Debug, PartialEq)] 8 | pub enum Error { 9 | /// Token without any relevant rule. 10 | Absurd { 11 | /// Index of the token that caused this error. 12 | index: usize, 13 | /// Particular token that didn't correspond to a rule. 14 | token: char, 15 | }, 16 | /// Token that would have closed a delimiter, but the delimiter wasn't open. 17 | Unopened { 18 | /// What was actually open, if anything, and the index of the token that opened it. 19 | what_was_open: Option<(&'static str, usize)>, 20 | /// Index of the token that caused this error. 21 | index: usize, 22 | }, 23 | /// After parsing all input, a delimiter remains open (e.g. "(a, b, c"). 24 | Unclosed { 25 | /// Region (user-defined name) that was not closed. Sensible to be e.g. "parentheses" for `(...)`. 26 | region: &'static str, 27 | /// Index at which the delimiter was opened (e.g., for parentheses, the index of the relevant '('). 28 | opened: usize, 29 | }, 30 | /// Ended on a user-defined non-accepting state. 31 | UserDefined { 32 | /// User-defined error message. 33 | messages: &'static [&'static str], 34 | }, 35 | } 36 | 37 | type R = Result<(Option<(usize, Option>)>, ()), Error>; 38 | 39 | #[repr(transparent)] 40 | struct F(fn(&mut I, ()) -> R); 41 | 42 | #[inline] 43 | pub fn parse>(input: I) -> Result<(), Error> { 44 | state_0(&mut input.into_iter().enumerate(), (), None) 45 | } 46 | 47 | #[inline] 48 | fn state_0>( 49 | input: &mut I, 50 | acc: (), 51 | stack_top: Option<(&'static str, usize)>, 52 | ) -> Result<(), Error> { 53 | match input.next() { 54 | None => stack_top.map_or(Ok(acc), |(region, opened)| { 55 | Err(Error::Unclosed { region, opened }) 56 | }), 57 | Some((index, token)) => match token { 58 | '('..='(' => { 59 | let detour = state_0(input, (), Some(("parentheses", index)))?; 60 | let postprocessed = (|(), ()| ())(acc, detour); 61 | state_0(input, acc, stack_top) 62 | } 63 | ')'..=')' => match stack_top { 64 | Some((region, _)) if region == "parentheses" => Ok(acc), 65 | _ => Err(Error::Unopened { 66 | what_was_open: stack_top, 67 | index, 68 | }), 69 | }, 70 | _ => Err(Error::Absurd { index, token }), 71 | }, 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /automata/examples/matched_parentheses_nondeterministic.rs: -------------------------------------------------------------------------------- 1 | use inator_automata::{dyck_nd, Run}; 2 | use rand::{thread_rng, RngCore}; 3 | 4 | /// Generate test cases (has nothing to do with automata!). 5 | fn generate(rng: &mut R, fuel: u8) -> String { 6 | let Some(depleted) = fuel.checked_sub(1) else { 7 | return String::new(); 8 | }; 9 | let f: [fn(&mut R, u8) -> String; 3] = [ 10 | |_, _| String::new(), 11 | |r, d| "(".to_owned() + &generate(r, d) + ")", 12 | |r, d| generate(r, d >> 1) + &generate(r, d >> 1), 13 | ]; 14 | f[(rng.next_u32() % 3) as usize](rng, depleted) 15 | } 16 | 17 | /// Check if this string consists of matched parentheses. 18 | fn accept>(iter: I) -> bool { 19 | let mut i: usize = 0; 20 | for c in iter { 21 | i = match c { 22 | '(' => i + 1, 23 | ')' => { 24 | if let Some(pred) = i.checked_sub(1) { 25 | pred 26 | } else { 27 | return false; 28 | } 29 | } 30 | _ => unreachable!(), 31 | } 32 | } 33 | i == 0 34 | } 35 | 36 | /// Output a jumble of parentheses with a very low chance of being valid. 37 | fn shitpost(rng: &mut R) -> String { 38 | let mut s = String::new(); 39 | loop { 40 | let i = rng.next_u32(); 41 | if i & 2 == 0 { 42 | return s; 43 | } 44 | s.push(if i & 1 == 0 { '(' } else { ')' }); 45 | } 46 | } 47 | 48 | pub fn main() { 49 | let parser = dyck_nd(); 50 | 51 | let mut rng = thread_rng(); 52 | 53 | // Accept all valid strings 54 | for _ in 0..10 { 55 | let s = generate(&mut rng, 32); 56 | println!(); 57 | println!("\"{s}\""); 58 | let mut run = s.chars().run(&parser); 59 | println!(" {run:?}"); 60 | while let Some(r) = run.next() { 61 | let Ok(c) = r else { panic!("{r:?}") }; 62 | println!("{c:?} {run:?}"); 63 | } 64 | } 65 | 66 | // Reject all invalid strings 67 | 'examples: for _ in 0..10 { 68 | let s = shitpost(&mut rng); 69 | println!(); 70 | println!("\"{s}\""); 71 | let mut run = s.chars().run(&parser); 72 | println!(" {run:?}"); 73 | while let Some(r) = run.next() { 74 | let Ok(c) = r else { 75 | assert!(!accept(s.chars())); 76 | continue 'examples; 77 | }; 78 | println!("{c:?} {run:?}"); 79 | } 80 | assert!(accept(s.chars())); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /automata/flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "fenix": { 4 | "inputs": { 5 | "nixpkgs": [ 6 | "nixpkgs" 7 | ], 8 | "rust-analyzer-src": "rust-analyzer-src" 9 | }, 10 | "locked": { 11 | "lastModified": 1697696548, 12 | "narHash": "sha256-653vv/6fwAaLnsm97S+BIAa7OsSlVv9FZIqTzlg4jXQ=", 13 | "owner": "nix-community", 14 | "repo": "fenix", 15 | "rev": "9d8534763043e7761b6872e6210d3a68ea2f296c", 16 | "type": "github" 17 | }, 18 | "original": { 19 | "owner": "nix-community", 20 | "repo": "fenix", 21 | "type": "github" 22 | } 23 | }, 24 | "flake-utils": { 25 | "inputs": { 26 | "systems": "systems" 27 | }, 28 | "locked": { 29 | "lastModified": 1694529238, 30 | "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", 31 | "owner": "numtide", 32 | "repo": "flake-utils", 33 | "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", 34 | "type": "github" 35 | }, 36 | "original": { 37 | "owner": "numtide", 38 | "repo": "flake-utils", 39 | "type": "github" 40 | } 41 | }, 42 | "naersk": { 43 | "inputs": { 44 | "nixpkgs": [ 45 | "nixpkgs" 46 | ] 47 | }, 48 | "locked": { 49 | "lastModified": 1697664192, 50 | "narHash": "sha256-nRTG3rYEGFV2+putRiC96+kNXDyKaPJgT6K/1FWN7yo=", 51 | "owner": "nix-community", 52 | "repo": "naersk", 53 | "rev": "636a9b5dd7f2ad7d7c3af929ecf95e4d4fab9e97", 54 | "type": "github" 55 | }, 56 | "original": { 57 | "owner": "nix-community", 58 | "repo": "naersk", 59 | "type": "github" 60 | } 61 | }, 62 | "nixpkgs": { 63 | "locked": { 64 | "lastModified": 1697456312, 65 | "narHash": "sha256-roiSnrqb5r+ehnKCauPLugoU8S36KgmWraHgRqVYndo=", 66 | "owner": "nixos", 67 | "repo": "nixpkgs", 68 | "rev": "ca012a02bf8327be9e488546faecae5e05d7d749", 69 | "type": "github" 70 | }, 71 | "original": { 72 | "owner": "nixos", 73 | "ref": "nixos-unstable", 74 | "repo": "nixpkgs", 75 | "type": "github" 76 | } 77 | }, 78 | "root": { 79 | "inputs": { 80 | "fenix": "fenix", 81 | "flake-utils": "flake-utils", 82 | "naersk": "naersk", 83 | "nixpkgs": "nixpkgs" 84 | } 85 | }, 86 | "rust-analyzer-src": { 87 | "flake": false, 88 | "locked": { 89 | "lastModified": 1697631181, 90 | "narHash": "sha256-W1EWCDHVZTAv1Xp4xirCqaYlHZLIWShVVBk2YQIRcXE=", 91 | "owner": "rust-lang", 92 | "repo": "rust-analyzer", 93 | "rev": "4586a6b26cd5a975a1826c0cfd9004a9bce3d7fd", 94 | "type": "github" 95 | }, 96 | "original": { 97 | "owner": "rust-lang", 98 | "ref": "nightly", 99 | "repo": "rust-analyzer", 100 | "type": "github" 101 | } 102 | }, 103 | "systems": { 104 | "locked": { 105 | "lastModified": 1681028828, 106 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 107 | "owner": "nix-systems", 108 | "repo": "default", 109 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 110 | "type": "github" 111 | }, 112 | "original": { 113 | "owner": "nix-systems", 114 | "repo": "default", 115 | "type": "github" 116 | } 117 | } 118 | }, 119 | "root": "root", 120 | "version": 7 121 | } 122 | -------------------------------------------------------------------------------- /automata/flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs = { 3 | fenix = { 4 | url = "github:nix-community/fenix"; 5 | inputs.nixpkgs.follows = "nixpkgs"; 6 | }; 7 | flake-utils.url = "github:numtide/flake-utils"; 8 | naersk = { 9 | url = "github:nix-community/naersk"; 10 | inputs.nixpkgs.follows = "nixpkgs"; 11 | }; 12 | nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; 13 | }; 14 | outputs = { fenix, flake-utils, naersk, nixpkgs, self }: 15 | flake-utils.lib.eachDefaultSystem (system: 16 | let 17 | name = "inator"; 18 | # pkgs = (import nixpkgs) { inherit system; }; 19 | # naersk' = pkgs.callPackage naersk { }; 20 | naersk' = (naersk.lib.${system}.override { 21 | cargo = toolchain; 22 | rustc = toolchain; 23 | }); 24 | settings = { 25 | # cargoBuildOptions = orig: orig ++ [ "--examples" ]; 26 | # doCheck = true; 27 | doDocFail = true; 28 | gitAllRefs = true; 29 | gitSubmodules = true; 30 | pname = "${name}"; 31 | src = ./.; 32 | }; 33 | toolchain = with fenix.packages.${system}; 34 | combine [ minimal.cargo minimal.rustc ]; 35 | in { 36 | packages = { 37 | ${name} = naersk'.buildPackage settings; 38 | default = self.packages.${system}.${name}; 39 | }; 40 | }); 41 | } 42 | -------------------------------------------------------------------------------- /automata/src/check.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Check well-formedness. 8 | 9 | use crate::{Ctrl, Curry, Input, Range, RangeMap, State, ToSrc, Transition, Update, FF}; 10 | use core::{fmt, mem, num::NonZeroUsize}; 11 | use std::collections::BTreeSet; 12 | 13 | /// Maximum size we're willing to tolerate in an `Err` variant (for performance reasons). 14 | const _MAX_ILL_FORMED_BYTES: usize = 64; 15 | /// Check that the above holds by throwing a compile-time out-of-bounds error if it doesn't. 16 | #[allow(clippy::indexing_slicing)] // <-- that's the point 17 | const _: () = [(); _MAX_ILL_FORMED_BYTES][mem::size_of::>()]; 18 | 19 | /// Witness to an ill-formed automaton (or part thereof). 20 | #[non_exhaustive] 21 | #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] 22 | pub enum IllFormed> { 23 | /// An index points to a state greater than the total number of states. 24 | OutOfBounds(usize), 25 | /// A set of indices contains no elements (we should just delete the transition). 26 | ProlongingDeath, 27 | /// A `Range`'s `first` field measured greater than its `last` field. 28 | InvertedRange(I, I), 29 | /// In a `RangeMap`, at least one key could be accepted by two existing ranges of keys. 30 | RangeMapOverlap(Range), 31 | /// In a `Curry`, a wildcard matches an input that a specific key also matches. 32 | WildcardMask { 33 | /// Input token (or range thereof) that could be ambiguous. 34 | arg_token: Option>, 35 | /// First output possibility. 36 | possibility_1: Box>, 37 | /// Second output possibility. 38 | possibility_2: Box>, 39 | }, 40 | /// Can't go to two different (deterministic) states at the same time. 41 | Superposition(usize, usize), 42 | /// Can't call two different functions on half-constructed outputs at the same time. 43 | IncompatibleCallbacks(Box>, Box>), 44 | /// Can't call two different functions to combine a returned value with a saved one at the same time. 45 | IncompatibleCombinators(Box, Box), 46 | /// Can't e.g. push to the stack and pop from it at the same time. 47 | IncompatibleActions(Box>, Box>), 48 | /// Two identical states at different indices. 49 | DuplicateState(Box>), 50 | /// Reference to a tagged state, but no state has that tag. 51 | TagDNE(String), 52 | /// An initial state expects an accumulator argument that is not `()`. 53 | InitialNotUnit(String), 54 | /// Tried to merge two states who need different output types. 55 | TypeMismatch(String, String), 56 | /// An accepting state returns the wrong type. 57 | WrongReturnType(String, String), 58 | /// Ambiguous regions: e.g. claiming to be opening both parentheses and brackets at the same time. 59 | AmbiguousRegions(&'static str, &'static str), 60 | } 61 | 62 | impl IllFormed { 63 | /// Convert the control parameter from `usize` to anything else. 64 | #[inline] 65 | #[must_use] 66 | pub fn convert_ctrl>(self) -> IllFormed { 67 | match self { 68 | IllFormed::OutOfBounds(i) => IllFormed::OutOfBounds(i), 69 | IllFormed::ProlongingDeath => IllFormed::ProlongingDeath, 70 | IllFormed::InvertedRange(a, b) => IllFormed::InvertedRange(a, b), 71 | IllFormed::RangeMapOverlap(range) => IllFormed::RangeMapOverlap(range), 72 | IllFormed::WildcardMask { 73 | arg_token, 74 | possibility_1, 75 | possibility_2, 76 | } => IllFormed::WildcardMask { 77 | arg_token, 78 | possibility_1: Box::new(possibility_1.convert_ctrl()), 79 | possibility_2: Box::new(possibility_2.convert_ctrl()), 80 | }, 81 | IllFormed::Superposition(a, b) => IllFormed::Superposition(a, b), 82 | IllFormed::IncompatibleCallbacks(a, b) => IllFormed::IncompatibleCallbacks(a, b), 83 | IllFormed::IncompatibleCombinators(a, b) => IllFormed::IncompatibleCombinators(a, b), 84 | IllFormed::IncompatibleActions(a, b) => IllFormed::IncompatibleActions( 85 | Box::new(a.convert_ctrl()), 86 | Box::new(b.convert_ctrl()), 87 | ), 88 | IllFormed::DuplicateState(s) => IllFormed::DuplicateState(Box::new(s.convert_ctrl())), 89 | IllFormed::TagDNE(s) => IllFormed::TagDNE(s), 90 | IllFormed::InitialNotUnit(s) => IllFormed::InitialNotUnit(s), 91 | IllFormed::TypeMismatch(a, b) => IllFormed::TypeMismatch(a, b), 92 | IllFormed::WrongReturnType(a, b) => IllFormed::WrongReturnType(a, b), 93 | IllFormed::AmbiguousRegions(a, b) => IllFormed::AmbiguousRegions(a, b), 94 | } 95 | } 96 | } 97 | 98 | impl> fmt::Display for IllFormed { 99 | #[inline] 100 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 101 | match *self { 102 | Self::OutOfBounds(i) => write!(f, "State index out of bounds: {i}"), 103 | Self::ProlongingDeath => write!( 104 | f, 105 | "Transition to a state that will never accept. \ 106 | Try removing the state along with any transitions to it.", 107 | ), 108 | Self::InvertedRange(ref a, ref b) => { 109 | write!( 110 | f, 111 | "Range with endpoints flipped: {}..={}", 112 | a.to_src(), 113 | b.to_src(), 114 | ) 115 | } 116 | Self::RangeMapOverlap(ref r) => { 117 | write!(f, "Multiple ranges would accept {}", r.to_src()) 118 | } 119 | Self::WildcardMask { 120 | ref arg_token, 121 | ref possibility_1, 122 | ref possibility_2, 123 | } => { 124 | write!( 125 | f, 126 | "On token {}, \ 127 | a wildcard match succeeds (`{}`), \ 128 | but so does a specific match (`{}`).", 129 | arg_token.as_ref().map_or("[end of input]".to_owned(), |r| { 130 | if r.first == r.last { 131 | r.first.to_src() 132 | } else { 133 | r.to_src() 134 | } 135 | }), 136 | possibility_1.to_src(), 137 | possibility_2.to_src(), 138 | ) 139 | } 140 | Self::Superposition(a, b) => write!( 141 | f, 142 | "Tried to visit two different deterministic states \ 143 | ({a} and {b}) at the same time.", 144 | ), 145 | Self::IncompatibleCallbacks(ref a, ref b) => { 146 | write!( 147 | f, 148 | "Tried to call both `{}` and `{}` at the same time.", 149 | a.src, b.src, 150 | ) 151 | } 152 | Self::IncompatibleCombinators(ref a, ref b) => { 153 | write!( 154 | f, 155 | "Tried to call both `{}` and `{}` at the same time.", 156 | a.src, b.src, 157 | ) 158 | } 159 | Self::IncompatibleActions(ref a, ref b) => write!( 160 | f, 161 | "Tried to {} and {} at the same time.", 162 | a.in_english(), 163 | b.in_english() 164 | ), 165 | Self::DuplicateState(ref s) => write!(f, "Duplicate state: {}", s.to_src()), 166 | Self::TagDNE(ref tag) => write!( 167 | f, 168 | "Requested a transition to a tag that does not exist: \"{tag}\"", 169 | ), 170 | Self::InitialNotUnit(ref s) => write!( 171 | f, 172 | "Initial state needs to take a unit-type input (`()`) but takes `{s}` instead.", 173 | ), 174 | Self::TypeMismatch(ref a, ref b) => write!(f, "Type mismatch: `{a}` =/= `{b}`."), 175 | Self::WrongReturnType(ref a, ref b) => write!(f, "Wrong output type: `{a}` =/= `{b}`"), 176 | Self::AmbiguousRegions(a, b) => write!( 177 | f, 178 | "Claiming to open two different regions (\"{a}\" and \"{b}\") simultaneously." 179 | ), 180 | } 181 | } 182 | } 183 | 184 | /// Check well-formedness. 185 | pub trait Check> { 186 | /// Check well-formedness. 187 | /// # Errors 188 | /// When not well-formed (with a witness). 189 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed>; 190 | } 191 | 192 | impl Check> for BTreeSet { 193 | #[inline] 194 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed> { 195 | if self.is_empty() { 196 | return Err(IllFormed::ProlongingDeath); 197 | } 198 | for &i in self { 199 | if i >= n_states.into() { 200 | return Err(IllFormed::OutOfBounds(i)); 201 | } 202 | } 203 | Ok(()) 204 | } 205 | } 206 | 207 | impl> Check for Curry { 208 | #[inline] 209 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed> { 210 | match *self { 211 | Self::Wildcard(ref etc) => etc.check(n_states), 212 | Self::Scrutinize { 213 | ref filter, 214 | ref fallback, 215 | } => { 216 | if let &Some(ref f) = fallback { 217 | f.check(n_states)?; 218 | } 219 | filter.check(n_states) 220 | } 221 | } 222 | } 223 | } 224 | 225 | impl> Check for Range { 226 | #[inline] 227 | fn check(&self, _: NonZeroUsize) -> Result<(), IllFormed> { 228 | if self.first <= self.last { 229 | Ok(()) 230 | } else { 231 | Err(IllFormed::InvertedRange( 232 | self.first.clone(), 233 | self.last.clone(), 234 | )) 235 | } 236 | } 237 | } 238 | 239 | impl> Check for RangeMap { 240 | #[inline] 241 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed> { 242 | self.iter().try_fold((), |(), (k, v)| { 243 | self.0 244 | .range(..k.clone()) 245 | .fold(None, |acc, (range, _)| { 246 | acc.or_else(|| range.clone().intersection(k.clone())) 247 | }) 248 | .map_or_else( 249 | || v.check(n_states), 250 | |overlap| Err(IllFormed::RangeMapOverlap(overlap)), 251 | ) 252 | }) 253 | } 254 | } 255 | 256 | impl> Check for State { 257 | #[inline] 258 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed> { 259 | self.transitions.check(n_states) 260 | } 261 | } 262 | 263 | impl> Check for Transition { 264 | #[inline] 265 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed> { 266 | match *self { 267 | Self::Lateral { ref dst, .. } => dst.check(n_states), 268 | Self::Call { 269 | ref detour, 270 | ref dst, 271 | .. 272 | } => { 273 | detour.check(n_states)?; 274 | dst.check(n_states) 275 | } 276 | Self::Return { .. } => Ok(()), 277 | } 278 | } 279 | } 280 | 281 | impl Check for usize { 282 | #[inline] 283 | fn check(&self, n_states: NonZeroUsize) -> Result<(), IllFormed> { 284 | if *self >= n_states.into() { 285 | Err(IllFormed::OutOfBounds(*self)) 286 | } else { 287 | Ok(()) 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /automata/src/combinators.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Operations on nondeterministic finite automata returning nondeterministic finite automata. 8 | 9 | #![allow(clippy::manual_assert, clippy::match_wild_err_arm, clippy::panic)] 10 | 11 | use crate::{Ctrl, Curry, Deterministic, Graph, Input, Merge, RangeMap, State, Transition, FF}; 12 | use core::{iter, mem, ops}; 13 | use std::collections::BTreeSet; 14 | 15 | impl ops::BitOr for Deterministic { 16 | type Output = Self; 17 | #[inline] 18 | fn bitor(self, rhs: Self) -> Self { 19 | let mut s = self.generalize(); 20 | let other = rhs.generalize(); 21 | // Note that union on pushdown automata is undecidable; 22 | // we presumably reject a subset of automata that might possibly work. 23 | if s.check().is_err() { 24 | panic!("Internal error") 25 | } 26 | let size = s.states.len(); 27 | let Graph { 28 | states: other_states, 29 | initial: other_initial, 30 | } = other.map_indices(|i| i.checked_add(size).expect("Absurdly huge number of states")); 31 | s.states.extend(other_states); 32 | s.initial.extend(other_initial); 33 | s.sort(); 34 | s.determinize().unwrap_or_else(|e| panic!("{e}")) 35 | } 36 | } 37 | 38 | impl ops::Rem> for Deterministic { 39 | type Output = Self; 40 | #[inline] 41 | fn rem(mut self, rhs: Transition) -> Self::Output { 42 | for state in &mut self.states { 43 | if state.non_accepting.is_empty() { 44 | if let Curry::Scrutinize { 45 | ref mut fallback, .. 46 | } = state.transitions 47 | { 48 | assert!( 49 | fallback.is_none(), 50 | "Tried to add a fallback transition, \ 51 | but a fallback already existed.", 52 | ); 53 | *fallback = Some(rhs.clone()); 54 | } 55 | } 56 | } 57 | self 58 | } 59 | } 60 | 61 | impl ops::Shr for Deterministic { 62 | type Output = Self; 63 | #[inline] 64 | fn shr(mut self, other: Self) -> Self::Output { 65 | let rhs_init = get!(other.states, other.initial) 66 | .transitions 67 | .clone() 68 | .generalize(); 69 | 70 | let accepting_indices = 71 | self.states 72 | .iter_mut() 73 | .enumerate() 74 | .fold(BTreeSet::new(), |mut acc_i, (i, st)| { 75 | if st.non_accepting.is_empty() { 76 | st.non_accepting = iter::once( 77 | "Ran the first part of a two-parser concatenation \ 78 | (with `>>`) but not the second one." 79 | .to_owned(), 80 | ) 81 | .collect(); // <-- No longer accepting since we need to run the second parser 82 | let _ = acc_i.insert(i); 83 | } 84 | acc_i 85 | }); 86 | 87 | let mut s = self.generalize(); 88 | if s.check().is_err() { 89 | panic!("Internal error") 90 | } 91 | let size = s.states.len(); 92 | 93 | let Graph { 94 | states: other_states, 95 | initial: other_initial, 96 | } = other 97 | .generalize() 98 | .map_indices(|i| i.checked_add(size).expect("Absurdly huge number of states")); 99 | 100 | s.states.extend(other_states); 101 | 102 | // For every transition that an empty stack can take from the initial state of the right-hand parser, 103 | // add that transition (only on the empty stack) to each accepting state of the left-hand parser. 104 | for state in &mut s.states { 105 | state.transitions = mem::replace( 106 | &mut state.transitions, 107 | Curry::Wildcard(Transition::Return { region: "" }), 108 | ) 109 | .merge(rhs_init.clone()) 110 | .unwrap_or_else(|e| panic!("{e}")); 111 | } 112 | 113 | // If any initial states are immediately accepting, we need to start in the second parser, too. 114 | if s.initial.iter().any(|i| accepting_indices.contains(i)) { 115 | s.initial.extend(other_initial.iter().copied()); 116 | } 117 | 118 | let mut out = Graph { 119 | states: s 120 | .states 121 | .into_iter() 122 | .map(|st| add_tail_call_state(st, &other_initial, &accepting_indices)) 123 | .collect(), 124 | ..s 125 | }; 126 | out.sort(); 127 | out.determinize().unwrap_or_else(|e| panic!("{e}")) 128 | } 129 | } 130 | 131 | impl ops::BitXor<(&'static str, Self, FF)> for Deterministic { 132 | type Output = Self; 133 | #[inline] 134 | fn bitxor(mut self, (region, other, combine): (&'static str, Self, FF)) -> Self::Output { 135 | let rhs_init = get!(other.states, other.initial) 136 | .transitions 137 | .clone() 138 | .generalize(); 139 | 140 | let accepting_indices = 141 | self.states 142 | .iter_mut() 143 | .enumerate() 144 | .fold(BTreeSet::new(), |mut acc_i, (i, st)| { 145 | if st.non_accepting.is_empty() { 146 | st.non_accepting = iter::once( 147 | "Ran the first part of a two-parser call \ 148 | (with `^`) but not the second one." 149 | .to_owned(), 150 | ) 151 | .collect(); // <-- No longer accepting since we need to run the second parser 152 | let _ = acc_i.insert(i); 153 | } 154 | acc_i 155 | }); 156 | 157 | let mut s = self.generalize(); 158 | if s.check().is_err() { 159 | panic!("Internal error") 160 | } 161 | let size = s.states.len(); 162 | 163 | let Graph { 164 | states: other_states, 165 | initial: other_initial, 166 | } = other 167 | .generalize() 168 | .map_indices(|i| i.checked_add(size).expect("Absurdly huge number of states")); 169 | 170 | s.states.extend(other_states); 171 | 172 | // For every transition that an empty stack can take from the initial state of the right-hand parser, 173 | // add that transition (only on the empty stack) to each accepting state of the left-hand parser. 174 | for state in &mut s.states { 175 | state.transitions = mem::replace( 176 | &mut state.transitions, 177 | Curry::Wildcard(Transition::Return { region: "" }), 178 | ) 179 | .merge(rhs_init.clone()) 180 | .unwrap_or_else(|e| panic!("{e}")); 181 | } 182 | 183 | // If any initial states are immediately accepting, we need to start in the second parser, too. 184 | if s.initial.iter().any(|i| accepting_indices.contains(i)) { 185 | s.initial.extend(other_initial.iter().copied()); 186 | } 187 | 188 | let mut out = Graph { 189 | states: s 190 | .states 191 | .into_iter() 192 | .map(|st| add_call_state(st, &other_initial, &accepting_indices, region, &combine)) 193 | .collect(), 194 | ..s 195 | }; 196 | out.sort(); 197 | out.determinize().unwrap_or_else(|e| panic!("{e}")) 198 | } 199 | } 200 | 201 | /// Add a tail call to any accepting state. 202 | #[inline] 203 | #[must_use] 204 | fn add_tail_call_state>( 205 | s: State, 206 | other_init: &BTreeSet, 207 | accepting_indices: &BTreeSet, 208 | ) -> State> { 209 | State { 210 | transitions: add_tail_call_curry(s.transitions, other_init, accepting_indices), 211 | non_accepting: s.non_accepting, 212 | } 213 | } 214 | 215 | /// Add a tail call to any accepting state. 216 | #[inline] 217 | #[must_use] 218 | fn add_tail_call_curry>( 219 | s: Curry, 220 | other_init: &BTreeSet, 221 | accepting_indices: &BTreeSet, 222 | ) -> Curry> { 223 | match s { 224 | Curry::Wildcard(t) => { 225 | Curry::Wildcard(add_tail_call_transition(t, other_init, accepting_indices)) 226 | } 227 | Curry::Scrutinize { filter, fallback } => Curry::Scrutinize { 228 | filter: add_tail_call_range_map(filter, other_init, accepting_indices), 229 | fallback: fallback.map(|f| add_tail_call_transition(f, other_init, accepting_indices)), 230 | }, 231 | } 232 | } 233 | 234 | /// Add a tail call to any accepting state. 235 | #[inline] 236 | #[must_use] 237 | fn add_tail_call_range_map>( 238 | s: RangeMap, 239 | other_init: &BTreeSet, 240 | accepting_indices: &BTreeSet, 241 | ) -> RangeMap> { 242 | RangeMap( 243 | s.0.into_iter() 244 | .map(|(k, v)| { 245 | ( 246 | k, 247 | add_tail_call_transition(v, other_init, accepting_indices), 248 | ) 249 | }) 250 | .collect(), 251 | ) 252 | } 253 | 254 | /// Add a tail call to any accepting state. 255 | #[inline] 256 | #[must_use] 257 | fn add_tail_call_transition>( 258 | s: Transition, 259 | other_init: &BTreeSet, 260 | accepting_indices: &BTreeSet, 261 | ) -> Transition> { 262 | match s { 263 | Transition::Lateral { ref dst, update } => Transition::Lateral { 264 | dst: add_tail_call_c(dst, other_init, accepting_indices), 265 | update, 266 | }, 267 | Transition::Call { 268 | region, 269 | ref detour, 270 | dst, 271 | combine, 272 | } => Transition::Call { 273 | region, 274 | detour: add_tail_call_c(detour, other_init, accepting_indices), 275 | dst: Box::new(add_tail_call_transition( 276 | *dst, 277 | other_init, 278 | accepting_indices, 279 | )), 280 | combine, 281 | }, 282 | Transition::Return { region } => Transition::Return { region }, 283 | } 284 | } 285 | 286 | /// Add a tail call only to accepting states. 287 | #[inline] 288 | #[must_use] 289 | fn add_tail_call_c>( 290 | c: &C, 291 | other_init: &BTreeSet, 292 | accepting_indices: &BTreeSet, 293 | ) -> BTreeSet { 294 | let accepts = c.view().any(|ref i| accepting_indices.contains(i)); 295 | let iter = c.view(); 296 | if accepts { 297 | iter.chain(other_init.iter().copied()).collect() 298 | } else { 299 | iter.collect() 300 | } 301 | } 302 | 303 | /// Add a call to any accepting state. 304 | #[inline] 305 | #[must_use] 306 | fn add_call_state>( 307 | s: State, 308 | other_init: &BTreeSet, 309 | accepting_indices: &BTreeSet, 310 | region: &'static str, 311 | combine: &FF, 312 | ) -> State> { 313 | State { 314 | transitions: add_call_curry( 315 | s.transitions, 316 | other_init, 317 | accepting_indices, 318 | region, 319 | combine, 320 | ), 321 | non_accepting: s.non_accepting, 322 | } 323 | } 324 | 325 | /// Add a call to any accepting state. 326 | #[inline] 327 | #[must_use] 328 | fn add_call_curry>( 329 | s: Curry, 330 | other_init: &BTreeSet, 331 | accepting_indices: &BTreeSet, 332 | region: &'static str, 333 | combine: &FF, 334 | ) -> Curry> { 335 | match s { 336 | Curry::Wildcard(t) => Curry::Wildcard(add_call_transition( 337 | t, 338 | other_init, 339 | accepting_indices, 340 | region, 341 | combine, 342 | )), 343 | Curry::Scrutinize { filter, fallback } => Curry::Scrutinize { 344 | filter: add_call_range_map(filter, other_init, accepting_indices, region, combine), 345 | fallback: fallback 346 | .map(|f| add_call_transition(f, other_init, accepting_indices, region, combine)), 347 | }, 348 | } 349 | } 350 | 351 | /// Add a call to any accepting state. 352 | #[inline] 353 | #[must_use] 354 | fn add_call_range_map>( 355 | s: RangeMap, 356 | other_init: &BTreeSet, 357 | accepting_indices: &BTreeSet, 358 | region: &'static str, 359 | combine: &FF, 360 | ) -> RangeMap> { 361 | RangeMap( 362 | s.0.into_iter() 363 | .map(|(k, v)| { 364 | ( 365 | k, 366 | add_call_transition(v, other_init, accepting_indices, region, combine), 367 | ) 368 | }) 369 | .collect(), 370 | ) 371 | } 372 | 373 | /// Add a call to any accepting state. 374 | #[inline] 375 | #[must_use] 376 | fn add_call_transition>( 377 | s: Transition, 378 | other_init: &BTreeSet, 379 | accepting_indices: &BTreeSet, 380 | region: &'static str, 381 | combine: &FF, 382 | ) -> Transition> { 383 | match s { 384 | Transition::Lateral { dst, update } => { 385 | if dst.view().any(|ref i| accepting_indices.contains(i)) { 386 | Transition::Call { 387 | region, 388 | detour: other_init.clone(), 389 | dst: Box::new(Transition::Lateral { dst, update }.generalize()), 390 | combine: combine.clone(), 391 | } 392 | } else { 393 | Transition::Lateral { 394 | dst: dst.view().collect(), 395 | update, 396 | } 397 | } 398 | } 399 | #[allow(clippy::shadow_unrelated)] 400 | Transition::Call { 401 | region, 402 | ref detour, 403 | dst, 404 | combine, 405 | } => Transition::Call { 406 | region, 407 | detour: detour.view().collect(), 408 | dst: Box::new(add_call_transition( 409 | *dst, 410 | other_init, 411 | accepting_indices, 412 | region, 413 | &combine, 414 | )), 415 | combine, 416 | }, 417 | #[allow(clippy::shadow_unrelated)] 418 | Transition::Return { region } => Transition::Return { region }, 419 | } 420 | } 421 | -------------------------------------------------------------------------------- /automata/src/ctrl.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Necessary preconditions to function as an index. 8 | 9 | use crate::{Check, Input, Merge, ToSrc}; 10 | use core::iter; 11 | use std::collections::{btree_set, BTreeSet}; 12 | 13 | #[cfg(feature = "quickcheck")] 14 | use core::num::NonZeroUsize; 15 | 16 | /// Necessary preconditions to function as an index. 17 | pub trait Ctrl: 18 | Check + Clone + Merge + Ord + PartialEq + ToSrc 19 | { 20 | /// Non-owning view over each index in what may be a collection. 21 | type View<'s>: Iterator 22 | where 23 | Self: 's; 24 | /// View each index in what may be a collection. 25 | fn view(&self) -> Self::View<'_>; 26 | /// Arbitrary value of this type, given an automaton with this many states. 27 | /// Should fail occasionally but not often. 28 | #[must_use] 29 | #[cfg(feature = "quickcheck")] 30 | fn arbitrary_given(n_states: NonZeroUsize, g: &mut quickcheck::Gen) -> Self; 31 | /// Apply a function to each index. 32 | #[must_use] 33 | fn map_indices usize>(self, f: F) -> Self; 34 | /// Turn a single index into its equivalent value in this type. 35 | #[must_use] 36 | fn from_usize(i: usize) -> Self; 37 | } 38 | 39 | impl Ctrl for usize { 40 | type View<'s> = iter::Once; 41 | #[inline] 42 | fn view(&self) -> Self::View<'_> { 43 | iter::once(*self) 44 | } 45 | #[inline] 46 | #[allow(clippy::arithmetic_side_effects, clippy::unwrap_used, unsafe_code)] 47 | #[cfg(feature = "quickcheck")] 48 | fn arbitrary_given(n_states: NonZeroUsize, g: &mut quickcheck::Gen) -> Self { 49 | use quickcheck::Arbitrary; 50 | Self::arbitrary(g) % n_states 51 | } 52 | #[inline] 53 | fn map_indices usize>(self, mut f: F) -> Self { 54 | f(self) 55 | } 56 | #[inline(always)] 57 | fn from_usize(i: usize) -> Self { 58 | i 59 | } 60 | } 61 | 62 | impl Ctrl for BTreeSet { 63 | type View<'s> = iter::Copied>; 64 | #[inline] 65 | fn view(&self) -> Self::View<'_> { 66 | self.iter().copied() 67 | } 68 | #[inline] 69 | #[allow(clippy::arithmetic_side_effects, clippy::unwrap_used, unsafe_code)] 70 | #[cfg(feature = "quickcheck")] 71 | fn arbitrary_given(n_states: NonZeroUsize, g: &mut quickcheck::Gen) -> Self { 72 | use quickcheck::Arbitrary; 73 | 'restart: loop { 74 | let set = BTreeSet::::arbitrary(g); 75 | if set.is_empty() { 76 | continue 'restart; 77 | } 78 | return set.into_iter().map(|i| i % n_states).collect(); 79 | } 80 | } 81 | #[inline] 82 | fn map_indices usize>(self, f: F) -> Self { 83 | self.into_iter().map(f).collect() 84 | } 85 | #[inline] 86 | fn from_usize(i: usize) -> Self { 87 | iter::once(i).collect() 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /automata/src/curry.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Read the next input symbol and decide an action. 8 | 9 | use crate::{Ctrl, IllFormed, Input, Range, RangeMap, Transition}; 10 | use core::{cmp, iter}; 11 | use std::collections::BTreeMap; 12 | 13 | /// Read the next input symbol and decide an action. 14 | #[allow(clippy::exhaustive_enums)] 15 | #[derive(Debug)] 16 | pub enum Curry> { 17 | /// Throw away the input (without looking at it) and do this. 18 | Wildcard(Transition), 19 | /// Map specific ranges of inputs to actions. 20 | Scrutinize { 21 | /// Specific ranges to route to various transitions. 22 | filter: RangeMap, 23 | /// If no ranges match, take this transition (if any; otherwise, fail). 24 | fallback: Option>, 25 | }, 26 | } 27 | 28 | impl> Clone for Curry { 29 | #[inline] 30 | fn clone(&self) -> Self { 31 | match *self { 32 | Self::Wildcard(ref etc) => Self::Wildcard(etc.clone()), 33 | Self::Scrutinize { 34 | ref filter, 35 | ref fallback, 36 | } => Self::Scrutinize { 37 | filter: filter.clone(), 38 | fallback: fallback.clone(), 39 | }, 40 | } 41 | } 42 | } 43 | 44 | impl> Eq for Curry {} 45 | 46 | impl> PartialEq for Curry { 47 | #[inline] 48 | fn eq(&self, other: &Self) -> bool { 49 | match (self, other) { 50 | (&Self::Wildcard(ref a), &Self::Wildcard(ref b)) => a == b, 51 | ( 52 | &Self::Scrutinize { 53 | filter: ref l_filter, 54 | fallback: ref l_fallback, 55 | }, 56 | &Self::Scrutinize { 57 | filter: ref r_filter, 58 | fallback: ref r_fallback, 59 | }, 60 | ) => (l_filter, l_fallback) == (r_filter, r_fallback), 61 | (&Self::Wildcard(..), &Self::Scrutinize { .. }) 62 | | (&Self::Scrutinize { .. }, &Self::Wildcard(..)) => false, // unfortunately no general way to tell if a range covers a whole type 63 | } 64 | } 65 | } 66 | 67 | impl> Ord for Curry { 68 | #[inline] 69 | fn cmp(&self, other: &Self) -> cmp::Ordering { 70 | match (self, other) { 71 | (&Self::Wildcard(ref a), &Self::Wildcard(ref b)) => a.cmp(b), 72 | (&Self::Wildcard(..), &Self::Scrutinize { .. }) => cmp::Ordering::Less, 73 | (&Self::Scrutinize { .. }, &Self::Wildcard(..)) => cmp::Ordering::Greater, 74 | ( 75 | &Self::Scrutinize { 76 | filter: ref l_filter, 77 | fallback: ref l_fallback, 78 | }, 79 | &Self::Scrutinize { 80 | filter: ref r_filter, 81 | fallback: ref r_fallback, 82 | }, 83 | ) => (l_filter, l_fallback).cmp(&(r_filter, r_fallback)), 84 | } 85 | } 86 | } 87 | 88 | impl> PartialOrd for Curry { 89 | #[inline] 90 | fn partial_cmp(&self, other: &Self) -> Option { 91 | Some(self.cmp(other)) 92 | } 93 | } 94 | 95 | impl> Curry { 96 | /// Look up a transition based on an input token. 97 | /// # Errors 98 | /// If multiple ranges fit an argument. 99 | #[inline] 100 | #[allow(clippy::type_complexity)] 101 | pub fn get(&self, key: &I) -> Result>, IllFormed> { 102 | match *self { 103 | Self::Wildcard(ref transition) => Ok(Some(transition)), 104 | Self::Scrutinize { 105 | ref filter, 106 | ref fallback, 107 | } => Ok(filter.get(key)?.or(fallback.as_ref())), 108 | } 109 | } 110 | 111 | /// Assert that this map has no keys in common with another. 112 | /// # Errors 113 | /// If there are keys in common, don't panic: instead, return them. 114 | /// Here's the meaning of an error: 115 | /// - `None`: Conflict on a fallback. 116 | /// - `Some(None, ..)`: Conflict on literally anything. Means both are wildcards. 117 | /// - `Some(Some(range), ..)`: Conflict on at least this range of values, 118 | /// which is an intersection of two offending ranges. 119 | #[inline] 120 | #[allow(clippy::result_large_err, clippy::type_complexity)] 121 | pub fn disjoint( 122 | &self, 123 | other: &Self, 124 | ) -> Result<(), Option<(Option>, Transition, Transition)>> { 125 | match (self, other) { 126 | (&Self::Wildcard(ref a), &Self::Wildcard(ref b)) => { 127 | Err(Some((None, a.clone(), b.clone()))) 128 | } 129 | ( 130 | &Self::Wildcard(ref w), 131 | &Self::Scrutinize { 132 | ref filter, 133 | ref fallback, 134 | }, 135 | ) 136 | | ( 137 | &Self::Scrutinize { 138 | ref filter, 139 | ref fallback, 140 | }, 141 | &Self::Wildcard(ref w), 142 | ) => filter.0.first_key_value().map_or_else( 143 | || fallback.as_ref().map_or(Ok(()), |_| Err(None)), 144 | |(k, v)| Err(Some((Some(k.clone()), w.clone(), v.clone()))), 145 | ), 146 | ( 147 | &Self::Scrutinize { 148 | filter: ref l_filter, 149 | fallback: ref l_fallback, 150 | }, 151 | &Self::Scrutinize { 152 | filter: ref r_filter, 153 | fallback: ref r_fallback, 154 | }, 155 | ) => l_filter.disjoint(r_filter).map_or_else( 156 | |(intersection, lv, rv)| Err(Some((Some(intersection), lv, rv))), 157 | |()| { 158 | if l_fallback.is_some() && r_fallback.is_some() { 159 | Err(None) 160 | } else { 161 | Ok(()) 162 | } 163 | }, 164 | ), 165 | } 166 | } 167 | 168 | /// All values in this collection, without their associated keys. 169 | #[inline] 170 | pub fn values(&self) -> Box>> { 171 | match *self { 172 | Self::Wildcard(ref etc) => Box::new(iter::once(etc)), 173 | Self::Scrutinize { 174 | ref filter, 175 | ref fallback, 176 | } => Box::new(filter.values().chain(fallback)), 177 | } 178 | } 179 | 180 | /// Remove an entry by key. 181 | /// # Panics 182 | /// If we ask to remove a wildcard but it's a specific value, or vice-versa. 183 | #[inline] 184 | pub fn remove(&mut self, key: Option>) { 185 | match *self { 186 | Self::Wildcard(..) => { 187 | // assert!( 188 | // key.is_none(), 189 | // "Asked to remove a specific value \ 190 | // but the map took a wildcard", 191 | // ); 192 | *self = Self::Scrutinize { 193 | filter: RangeMap(BTreeMap::new()), 194 | fallback: None, 195 | }; 196 | } 197 | Self::Scrutinize { 198 | ref mut filter, 199 | ref fallback, 200 | } => { 201 | filter.remove(&key.expect( 202 | "Asked to remove a wildcard \ 203 | but the map took a specific value", 204 | )); 205 | assert!( 206 | fallback.is_none(), 207 | "Asked to remove a value but the map has a fallback", 208 | ); 209 | } 210 | }; 211 | } 212 | 213 | /// All values in this collection, without their associated keys. 214 | #[inline] 215 | pub fn values_mut(&mut self) -> Box>> { 216 | match *self { 217 | Self::Wildcard(ref mut etc) => Box::new(iter::once(etc)), 218 | Self::Scrutinize { 219 | ref mut filter, 220 | ref mut fallback, 221 | } => Box::new(filter.values_mut().chain(fallback)), 222 | } 223 | } 224 | 225 | /// Check if this parser ever could, at any point, involve a fallback transition. 226 | #[inline] 227 | #[must_use] 228 | pub const fn involves_any_fallback(&self) -> bool { 229 | matches!( 230 | *self, 231 | Self::Scrutinize { 232 | fallback: Some(_), 233 | .. 234 | } 235 | ) 236 | } 237 | } 238 | 239 | impl Curry { 240 | /// Convert the control parameter from `usize` to anything else. 241 | #[inline] 242 | #[must_use] 243 | pub fn convert_ctrl>(self) -> Curry { 244 | match self { 245 | Curry::Wildcard(w) => Curry::Wildcard(w.convert_ctrl()), 246 | Curry::Scrutinize { filter, fallback } => Curry::Scrutinize { 247 | filter: filter.convert_ctrl(), 248 | fallback: fallback.map(Transition::convert_ctrl), 249 | }, 250 | } 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /automata/src/f.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Function representations. 8 | 9 | #![allow(clippy::module_name_repetitions)] 10 | 11 | use crate::ToSrc; 12 | 13 | /// One-argument function. 14 | #[non_exhaustive] 15 | #[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] 16 | pub struct F { 17 | /// Source-code representation of this function. 18 | pub src: String, 19 | /// Argument type. 20 | pub arg_t: String, 21 | /// Output type. 22 | pub output_t: String, 23 | } 24 | 25 | /// Two-argument function. 26 | #[non_exhaustive] 27 | #[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] 28 | pub struct FF { 29 | /// Source-code representation of this function. 30 | pub src: String, 31 | /// Type of the first argument. 32 | pub lhs_t: String, 33 | /// Type of the second argument. 34 | pub rhs_t: String, 35 | /// Output type. 36 | pub output_t: String, 37 | } 38 | 39 | impl F { 40 | /// Internals of the `f!(...)` macro. 41 | #[inline] 42 | #[must_use] 43 | pub fn _from_macro(src: String, _: fn(Arg) -> Output) -> Self { 44 | Self { 45 | src, 46 | arg_t: Arg::src_type(), 47 | output_t: Output::src_type(), 48 | } 49 | } 50 | } 51 | 52 | impl FF { 53 | /// Internals of the `ff!(...)` macro. 54 | #[inline] 55 | #[must_use] 56 | pub fn _from_macro( 57 | src: String, 58 | _: fn(Lhs, Rhs) -> Output, 59 | ) -> Self { 60 | Self { 61 | src, 62 | lhs_t: Lhs::src_type(), 63 | rhs_t: Rhs::src_type(), 64 | output_t: Output::src_type(), 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /automata/src/generalize.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Un-determinize an automaton to return a practically identical (but nominally nondeterministic) version. 8 | 9 | use crate::{Ctrl, Curry, Graph, Input, Nondeterministic, RangeMap, State, Transition}; 10 | use std::collections::BTreeSet; 11 | 12 | impl> Graph { 13 | /// Un-determinize an automaton to return a practically identical (but nominally nondeterministic) version. 14 | #[inline] 15 | pub fn generalize(self) -> Nondeterministic { 16 | Nondeterministic { 17 | states: self.states.into_iter().map(State::generalize).collect(), 18 | initial: self.initial.view().collect(), 19 | } 20 | } 21 | } 22 | 23 | impl> State { 24 | /// Un-determinize an automaton to return a practically identical (but nominally nondeterministic) version. 25 | #[inline] 26 | pub fn generalize(self) -> State> { 27 | State { 28 | transitions: self.transitions.generalize(), 29 | non_accepting: self.non_accepting, 30 | } 31 | } 32 | } 33 | 34 | impl> Curry { 35 | /// Un-determinize an automaton to return a practically identical (but nominally nondeterministic) version. 36 | #[inline] 37 | pub fn generalize(self) -> Curry> { 38 | match self { 39 | Self::Wildcard(w) => Curry::Wildcard(w.generalize()), 40 | Self::Scrutinize { filter, fallback } => Curry::Scrutinize { 41 | filter: filter.generalize(), 42 | fallback: fallback.map(Transition::generalize), 43 | }, 44 | } 45 | } 46 | } 47 | 48 | impl> RangeMap { 49 | /// Un-determinize an automaton to return a practically identical (but nominally nondeterministic) version. 50 | #[inline] 51 | #[must_use] 52 | pub fn generalize(self) -> RangeMap> { 53 | RangeMap( 54 | self.0 55 | .into_iter() 56 | .map(|(k, v)| (k, v.generalize())) 57 | .collect(), 58 | ) 59 | } 60 | } 61 | 62 | impl> Transition { 63 | /// Un-determinize an automaton to return a practically identical (but nominally nondeterministic) version. 64 | #[inline] 65 | pub fn generalize(self) -> Transition> { 66 | match self { 67 | Self::Lateral { dst, update } => Transition::Lateral { 68 | dst: dst.view().collect(), 69 | update, 70 | }, 71 | Self::Call { 72 | region, 73 | detour, 74 | dst, 75 | combine, 76 | } => Transition::Call { 77 | region, 78 | detour: detour.view().collect(), 79 | dst: Box::new(dst.generalize()), 80 | combine, 81 | }, 82 | Self::Return { region } => Transition::Return { region }, 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /automata/src/graph.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Automaton loosely based on visibly pushdown automata. 8 | 9 | use crate::{ 10 | try_merge, Check, Ctrl, Curry, IllFormed, Input, InputError, Merge, ParseError, RangeMap, 11 | State, Transition, 12 | }; 13 | use core::{iter, num::NonZeroUsize}; 14 | use std::{ 15 | collections::{btree_map, BTreeMap, BTreeSet}, 16 | ffi::OsStr, 17 | fs, io, 18 | path::Path, 19 | process::Command, 20 | }; 21 | 22 | /// One token corresponds to at most one transition. 23 | pub type Deterministic = Graph; 24 | 25 | /// One token corresponds to as many transitions as it would like; 26 | /// if any of these transitions eventually accept, the whole thing accepts. 27 | pub type Nondeterministic = Graph>; 28 | 29 | // TODO: make `states` a `BTreeSet`. 30 | 31 | /// Automaton loosely based on visibly pushdown automata. 32 | #[allow(clippy::exhaustive_structs)] 33 | #[derive(Debug)] 34 | pub struct Graph> { 35 | /// Every state, indexed. 36 | pub states: Vec>, 37 | /// Initial state of the machine (before reading input). 38 | pub initial: C, 39 | } 40 | 41 | impl> Clone for Graph { 42 | #[inline] 43 | fn clone(&self) -> Self { 44 | Self { 45 | states: self.states.clone(), 46 | initial: self.initial.clone(), 47 | } 48 | } 49 | } 50 | 51 | impl> Eq for Graph {} 52 | 53 | impl> PartialEq for Graph { 54 | #[inline] 55 | fn eq(&self, other: &Self) -> bool { 56 | self.initial == other.initial && self.states == other.states 57 | } 58 | } 59 | 60 | impl> Graph { 61 | /// Check a subset of well-formedness. 62 | /// Note that this can't check if determinization will succeed in less time than actually trying; 63 | /// if you want to see if there can be any runtime errors, just try to determinize it. 64 | /// # Errors 65 | /// When ill-formed (with a witness). 66 | #[inline] 67 | pub fn check(&self) -> Result<(), IllFormed> { 68 | let n_states = self.states.len(); 69 | for i in self.initial.view() { 70 | let Some(state) = self.states.get(i) else { 71 | return Err(IllFormed::OutOfBounds(i)); 72 | }; 73 | if let Some(t) = state.input_type()? { 74 | if t != "()" { 75 | return Err(IllFormed::InitialNotUnit(t.to_owned())); 76 | } 77 | } 78 | for transition in state.transitions.values() { 79 | let in_t = transition.input_type(); 80 | if let Some(t) = in_t { 81 | if t != "()" { 82 | return Err(IllFormed::InitialNotUnit(t.to_owned())); 83 | } 84 | } 85 | } 86 | } 87 | let _ = self.output_type()?; 88 | for (i, state) in self.states.iter().enumerate() { 89 | if get!(self.states, ..i).contains(state) { 90 | return Err(IllFormed::DuplicateState(Box::new(state.clone()))); 91 | } 92 | } 93 | NonZeroUsize::new(n_states).map_or(Ok(()), |nz| { 94 | self.states.iter().try_fold((), |(), state| state.check(nz)) 95 | }) 96 | } 97 | 98 | /// Run this parser to completion and check types along the way. 99 | /// # Errors 100 | /// If the parser determines there should be an error. 101 | #[inline] 102 | #[allow(unsafe_code)] 103 | pub fn accept>( 104 | &self, 105 | input: In, 106 | ) -> Result> { 107 | use crate::Run; 108 | let mut run = input.run(self); 109 | for r in &mut run { 110 | drop(r?); 111 | } 112 | for i in run.ctrl.view() { 113 | if get!(self.states, i).non_accepting.is_empty() { 114 | return Ok(run.output_t); 115 | } 116 | } 117 | Err(ParseError::BadInput(InputError::NotAccepting)) 118 | } 119 | 120 | /// Subset construction algorithm for determinizing nondeterministic automata. 121 | /// # Errors 122 | /// If there's an ambiguity (which would have crashed the nondeterministic automaton anyway). 123 | #[inline] 124 | #[allow( 125 | clippy::missing_panics_doc, 126 | clippy::type_complexity, 127 | clippy::unwrap_in_result 128 | )] 129 | pub fn determinize(&self) -> Result, IllFormed> { 130 | // Check that the source graph is well-formed 131 | self.check()?; 132 | 133 | // Associate each subset of states with a merged state 134 | let mut subsets_as_states = BTreeMap::new(); 135 | self.explore(&mut subsets_as_states, &self.initial)?; 136 | 137 | // Fix an ordering on those subsets 138 | let ordering: Vec = subsets_as_states.keys().cloned().collect(); 139 | // Don't need to sort--that's guaranteed in `BTreeMap::keys` 140 | 141 | let mut output = Deterministic { 142 | initial: unwrap!(ordering.binary_search(&self.initial)), 143 | states: ordering 144 | .iter() 145 | .map(|set| { 146 | let State { 147 | transitions, 148 | non_accepting, 149 | } = unwrap!(subsets_as_states.remove(set)); 150 | State { 151 | transitions: fix_indices_curry(transitions, &ordering), 152 | non_accepting, 153 | } 154 | }) 155 | .collect(), 156 | }; 157 | output.sort(); 158 | output 159 | .check() 160 | .map(|()| output) 161 | .map_err(IllFormed::convert_ctrl) 162 | } 163 | 164 | /// Associate each subset of states with a merged state. 165 | #[inline] 166 | fn explore( 167 | &self, 168 | subsets_as_states: &mut BTreeMap>, 169 | subset: &C, 170 | ) -> Result<(), IllFormed> { 171 | // Check if we've seen this subset already 172 | let btree_map::Entry::Vacant(entry) = subsets_as_states.entry(subset.clone()) else { 173 | return Ok(()); 174 | }; 175 | 176 | // Merge this subset of states into one (most of the heavy lifting) 177 | let mega_state = match try_merge(subset.view().map(|i| Ok(get!(self.states, i).clone()))) { 178 | // If no state follows, reject immediately. 179 | None => State { 180 | transitions: Curry::Scrutinize { 181 | filter: RangeMap(BTreeMap::new()), 182 | fallback: None, 183 | }, 184 | non_accepting: iter::once("Unexpected token".to_owned()).collect(), 185 | }, 186 | // If they successfully merged, return the merged state 187 | Some(Ok(ok)) => ok, 188 | // If they didn't successfully merge, something's wrong with the original automaton 189 | Some(Err(e)) => return Err(e), 190 | }; 191 | 192 | // Necessary before we move `mega_state` 193 | let all_dsts: BTreeSet = mega_state 194 | .transitions 195 | .values() 196 | .flat_map(|t| t.dsts().into_iter().cloned()) 197 | .collect(); 198 | 199 | // Insert the finished value (also to tell all below iterations that we've covered this case) 200 | let _ = entry.insert(mega_state); 201 | 202 | // Recurse on all possible next states 203 | all_dsts 204 | .into_iter() 205 | .try_fold((), |(), dst| self.explore(subsets_as_states, &dst)) 206 | } 207 | 208 | /// Compute the output type of any successful run. 209 | /// # Errors 210 | /// If multiple accepting states attempt to return different types. 211 | #[inline] 212 | pub fn output_type(&self) -> Result, IllFormed> { 213 | self.states 214 | .iter() 215 | .try_fold(None, |acc: Option<&str>, state| { 216 | if state.non_accepting.is_empty() { 217 | acc.map_or_else( 218 | || state.input_type(), 219 | |t| { 220 | if let Some(input_t) = state.input_type()? { 221 | if input_t != t { 222 | return Err(IllFormed::WrongReturnType( 223 | t.to_owned(), 224 | input_t.to_owned(), 225 | )); 226 | } 227 | } 228 | Ok(Some(t)) 229 | }, 230 | ) 231 | } else { 232 | Ok(acc) 233 | } 234 | }) 235 | } 236 | 237 | /// Compute the input type of any successful run. 238 | /// # Errors 239 | /// If multiple accepting states attempt to return different types. 240 | #[inline] 241 | #[allow(clippy::missing_panics_doc)] 242 | pub fn input_type(&self) -> Result, IllFormed> { 243 | self.initial 244 | .view() 245 | .map(|i| get!(self.states, i)) 246 | .try_fold(None, |acc, state| { 247 | let shit = acc.merge(state.transitions.values().try_fold(None, |accc, t| { 248 | accc.merge(t.input_type()).map_or_else( 249 | |(a, b)| { 250 | if a == b { 251 | Ok(Some(a)) 252 | } else { 253 | Err(IllFormed::TypeMismatch(a.to_owned(), b.to_owned())) 254 | } 255 | }, 256 | Ok, 257 | ) 258 | })?); 259 | shit.map_or_else( 260 | |(a, b)| { 261 | if a == b { 262 | Ok(Some(a)) 263 | } else { 264 | Err(IllFormed::TypeMismatch(a.to_owned(), b.to_owned())) 265 | } 266 | }, 267 | Ok, 268 | ) 269 | }) 270 | } 271 | 272 | /// Change nothing about the semantics but sort the internal vector of states. 273 | #[inline] 274 | #[allow(clippy::missing_panics_doc)] 275 | pub fn sort(&mut self) { 276 | // Associate each original index with a concrete state instead of just an index, 277 | // since we're going to be swapping the indices around. 278 | let index_map: BTreeMap> = 279 | self.states.iter().cloned().enumerate().collect(); 280 | self.states.sort_unstable(); 281 | self.states.dedup(); // <-- Cool that we can do this! 282 | self.initial = self 283 | .initial 284 | .clone() 285 | .map_indices(|i| unwrap!(self.states.binary_search(unwrap!(index_map.get(&i))))); 286 | // Can't do this in-place since the entire state array is required as an argument. 287 | self.states = self 288 | .states 289 | .iter() 290 | .map(|s| s.reindex(&self.states, &index_map)) 291 | .collect(); 292 | } 293 | 294 | /// Check if this parser ever could, at any point, involve a fallback transition. 295 | #[inline] 296 | #[must_use] 297 | pub fn involves_any_fallback(&self) -> bool { 298 | self.states.iter().any(State::involves_any_fallback) 299 | } 300 | 301 | /// Kleene-star operation: accept any number (including zero!) of repetitions of this parser. 302 | #[inline] 303 | #[must_use] 304 | #[allow(clippy::panic, clippy::missing_panics_doc)] 305 | pub fn star(self) -> Deterministic { 306 | let mut s = self.generalize(); 307 | let accepting: BTreeSet = s 308 | .states 309 | .iter() 310 | .enumerate() 311 | .filter(|&(_, st)| st.non_accepting.is_empty()) 312 | .map(|(i, _)| i) 313 | .collect(); 314 | for state in &mut s.states { 315 | match state.transitions { 316 | Curry::Wildcard(ref mut t) => t.star(&s.initial, &accepting), 317 | Curry::Scrutinize { 318 | ref mut filter, 319 | ref mut fallback, 320 | } => { 321 | filter.star(&s.initial, &accepting); 322 | if let &mut Some(ref mut f) = fallback { 323 | f.star(&s.initial, &accepting); 324 | } 325 | } 326 | } 327 | } 328 | let empty = Graph { 329 | states: vec![State { 330 | transitions: Curry::Scrutinize { 331 | filter: RangeMap(BTreeMap::new()), 332 | fallback: None, 333 | }, 334 | non_accepting: BTreeSet::new(), 335 | }], 336 | initial: 0, 337 | }; 338 | empty | s.determinize().unwrap_or_else(|e| panic!("{e}")) 339 | } 340 | } 341 | 342 | /// Use an ordering on subsets to translate each subset into a specific state. 343 | #[inline] 344 | #[allow(clippy::type_complexity)] 345 | fn fix_indices_curry>(value: Curry, ordering: &[C]) -> Curry { 346 | match value { 347 | Curry::Wildcard(etc) => Curry::Wildcard(fix_indices_transition(etc, ordering)), 348 | Curry::Scrutinize { filter, fallback } => Curry::Scrutinize { 349 | filter: fix_indices_range_map(filter, ordering), 350 | fallback: fallback.map(|f| fix_indices_transition(f, ordering)), 351 | }, 352 | } 353 | } 354 | 355 | /// Use an ordering on subsets to translate each subset into a specific state. 356 | #[inline] 357 | #[allow(clippy::type_complexity)] 358 | fn fix_indices_range_map>( 359 | value: RangeMap, 360 | ordering: &[C], 361 | ) -> RangeMap { 362 | RangeMap( 363 | value 364 | .0 365 | .into_iter() 366 | .map(|(k, v)| (k, fix_indices_transition(v, ordering))) 367 | .collect(), 368 | ) 369 | } 370 | 371 | /// Use an ordering on subsets to translate each subset into a specific state. 372 | #[inline] 373 | #[allow(clippy::type_complexity)] 374 | fn fix_indices_transition>( 375 | value: Transition, 376 | ordering: &[C], 377 | ) -> Transition { 378 | match value { 379 | Transition::Lateral { dst, update } => Transition::Lateral { 380 | dst: unwrap!(ordering.binary_search(&dst)), 381 | update, 382 | }, 383 | Transition::Call { 384 | region, 385 | ref detour, 386 | dst, 387 | combine, 388 | } => Transition::Call { 389 | region, 390 | detour: unwrap!(ordering.binary_search(detour)), 391 | dst: Box::new(fix_indices_transition(*dst, ordering)), 392 | combine, 393 | }, 394 | Transition::Return { region } => Transition::Return { region }, 395 | } 396 | } 397 | 398 | impl Graph { 399 | /// Write this parser as a Rust source file. 400 | /// # Errors 401 | /// If file creation or formatting fails. 402 | #[inline] 403 | pub fn to_file + AsRef>( 404 | &self, 405 | path: P, 406 | ) -> Result, IllFormed> { 407 | self.to_src().map(|src| { 408 | fs::write(&path, src)?; 409 | Command::new("rustfmt").arg(path).output().map(|_| {}) 410 | }) 411 | } 412 | } 413 | -------------------------------------------------------------------------------- /automata/src/in_progress.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Execute an automaton on an input sequence. 8 | 9 | use crate::{try_merge, Ctrl, Graph, IllFormed, Input, ToSrc, Transition}; 10 | use core::fmt; 11 | 12 | /// Execute an automaton on an input sequence. 13 | #[non_exhaustive] 14 | pub struct InProgress<'graph, I: Input, C: Ctrl, In: Iterator> { 15 | /// Reference to the graph we're riding. 16 | pub graph: &'graph Graph, 17 | /// Iterator over input tokens. 18 | pub input: In, 19 | /// Internal stack. 20 | pub stack: Vec>, 21 | /// Internal state. 22 | pub ctrl: C, 23 | /// Output type as we go. 24 | pub output_t: String, 25 | } 26 | 27 | impl, In: Iterator> fmt::Debug for InProgress<'_, I, C, In> { 28 | #[inline] 29 | #[allow(unsafe_code)] 30 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 31 | write!( 32 | f, 33 | "In progress: {} @ {:?} -> {}", 34 | self.stack.to_src(), 35 | self.ctrl.view().collect::>(), 36 | self.output_t, 37 | ) 38 | } 39 | } 40 | 41 | /// Input intentionally rejected by a parser without anything going wrong internally. 42 | #[non_exhaustive] 43 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 44 | pub enum InputError { 45 | /// Ended in a non-accepting state. 46 | NotAccepting, 47 | /// Ended input with items in the stack. 48 | Unclosed, 49 | /// Tried to close a region that hadn't been opened. 50 | Unopened, 51 | /// Tried to take a transition that did not exist. 52 | Absurd, 53 | } 54 | 55 | /// Either the parser intentionally rejected the input or the parser was broken. 56 | #[allow(clippy::exhaustive_enums)] 57 | #[derive(Clone, Debug, Eq, PartialEq)] 58 | pub enum ParseError> { 59 | /// Input intentionally rejected by a parser without anything going wrong internally. 60 | BadInput(InputError), 61 | /// Parser was broken. 62 | BadParser(IllFormed), 63 | } 64 | 65 | impl, In: Iterator> Iterator for InProgress<'_, I, C, In> { 66 | type Item = Result>; 67 | #[inline] 68 | fn next(&mut self) -> Option { 69 | let maybe_token = self.input.next(); 70 | let (c, o) = match step( 71 | self.graph, 72 | &self.ctrl, 73 | maybe_token.clone(), 74 | &mut self.stack, 75 | &self.output_t, 76 | ) { 77 | Ok(ok) => ok, 78 | Err(e) => return Some(Err(e)), 79 | }; 80 | self.output_t = o; 81 | self.ctrl = c?; 82 | maybe_token.map(Ok) // <-- Propagate the iterator's input 83 | } 84 | } 85 | 86 | /// Act on the automaton graph in response to one input token. 87 | #[inline] 88 | #[allow(clippy::type_complexity)] 89 | fn step>( 90 | graph: &Graph, 91 | ctrl: &C, 92 | maybe_token: Option, 93 | stack: &mut Vec>, 94 | output_t: &str, 95 | ) -> Result<(Option, String), ParseError> { 96 | ctrl.view().try_fold((), |(), i| { 97 | if graph.states.get(i).is_none() { 98 | Err(ParseError::BadParser(IllFormed::OutOfBounds(i))) 99 | } else { 100 | Ok(()) 101 | } 102 | })?; 103 | let mut states = ctrl.view().map(|i| get!(graph.states, i)); 104 | let Some(token) = maybe_token else { 105 | return if stack.is_empty() { 106 | if states.any(|s| s.non_accepting.is_empty()) { 107 | Ok((None, output_t.to_owned())) 108 | } else { 109 | Err(ParseError::BadInput(InputError::NotAccepting)) 110 | } 111 | } else { 112 | Err(ParseError::BadInput(InputError::Unclosed)) 113 | }; 114 | }; 115 | 116 | // Merge into a huge aggregate transition and act on that instead of individual transitions 117 | match try_merge(states.filter_map(|s| match s.transitions.get(&token) { 118 | Err(e) => Some(Err(e)), 119 | Ok(opt) => opt.map(|t| Ok(t.clone())), 120 | })) { 121 | None => Err(ParseError::BadInput(InputError::Absurd)), 122 | Some(Err(e)) => Err(ParseError::BadParser(e)), 123 | Some(Ok(mega_transition)) => mega_transition.invoke(output_t, stack)?.map_or( 124 | Err(ParseError::BadInput(InputError::Unopened)), 125 | |(c, out)| Ok((Some(c), out)), 126 | ), 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /automata/src/input.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Typing convenience: trait satisfying everything required for an input token. 8 | 9 | use crate::ToSrc; 10 | 11 | /// Typing convenience: trait satisfying everything required for an input token. 12 | pub trait Input: Clone + Ord + ToSrc {} 13 | 14 | impl Input for I {} 15 | -------------------------------------------------------------------------------- /automata/src/lib.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Modified pushdown automata, the backbone of the `inator` crate. 8 | 9 | #![deny(warnings)] 10 | #![allow(unknown_lints)] 11 | #![warn( 12 | clippy::all, 13 | clippy::missing_docs_in_private_items, 14 | clippy::nursery, 15 | clippy::pedantic, 16 | clippy::perf, 17 | clippy::restriction, 18 | clippy::cargo, 19 | elided_lifetimes_in_paths, 20 | missing_docs, 21 | rustdoc::all 22 | )] 23 | // https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html 24 | #![warn( 25 | absolute_paths_not_starting_with_crate, 26 | elided_lifetimes_in_paths, 27 | explicit_outlives_requirements, 28 | keyword_idents, 29 | let_underscore_drop, 30 | macro_use_extern_crate, 31 | meta_variable_misuse, 32 | missing_abi, 33 | missing_copy_implementations, 34 | missing_debug_implementations, 35 | missing_docs, 36 | non_ascii_idents, 37 | noop_method_call, 38 | pointer_structural_match, 39 | rust_2021_incompatible_closure_captures, 40 | rust_2021_incompatible_or_patterns, 41 | rust_2021_prefixes_incompatible_syntax, 42 | rust_2021_prelude_collisions, 43 | single_use_lifetimes, 44 | trivial_casts, 45 | trivial_numeric_casts, 46 | unreachable_pub, 47 | unsafe_code, 48 | unsafe_op_in_unsafe_fn, 49 | unstable_features, 50 | unused_crate_dependencies, 51 | unused_extern_crates, 52 | unused_import_braces, 53 | unused_lifetimes, 54 | unused_macro_rules, 55 | unused_qualifications, 56 | unused_results, 57 | unused_tuple_struct_fields, 58 | variant_size_differences 59 | )] 60 | #![allow( 61 | clippy::blanket_clippy_restriction_lints, 62 | clippy::cargo_common_metadata, 63 | clippy::expect_used, 64 | clippy::implicit_return, 65 | clippy::inline_always, 66 | clippy::let_underscore_untyped, 67 | clippy::min_ident_chars, 68 | clippy::missing_trait_methods, 69 | clippy::mod_module_files, 70 | clippy::multiple_unsafe_ops_per_block, 71 | clippy::needless_borrowed_reference, 72 | clippy::option_option, 73 | clippy::partial_pub_fields, 74 | clippy::pub_use, 75 | clippy::pub_with_shorthand, 76 | clippy::question_mark_used, 77 | clippy::redundant_pub_crate, 78 | clippy::ref_patterns, 79 | clippy::same_name_method, 80 | clippy::semicolon_outside_block, 81 | clippy::separated_literal_suffix, 82 | clippy::similar_names, 83 | clippy::single_call_fn, 84 | clippy::single_char_lifetime_names, 85 | clippy::std_instead_of_alloc, 86 | clippy::string_add, 87 | clippy::unneeded_field_pattern, 88 | clippy::use_self, 89 | clippy::wildcard_imports 90 | )] 91 | 92 | /// Call a function that will also be available to the compiled parser. 93 | #[macro_export] 94 | macro_rules! update { 95 | ($ex:expr) => { 96 | $crate::Update::_update_macro(stringify!($ex), $ex) 97 | }; 98 | } 99 | 100 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 101 | #[cfg(any(debug_assertions, test))] 102 | macro_rules! unwrap { 103 | ($expr:expr) => { 104 | $expr.unwrap() 105 | }; 106 | } 107 | 108 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 109 | #[cfg(not(any(debug_assertions, test)))] 110 | macro_rules! unwrap { 111 | ($expr:expr) => {{ 112 | #[allow(unsafe_code, unused_unsafe)] 113 | let result = unsafe { $expr.unwrap_unchecked() }; 114 | result 115 | }}; 116 | } 117 | 118 | /// Unreachable state, but checked if we're debugging. 119 | #[cfg(feature = "quickcheck")] 120 | #[cfg(any(debug_assertions, test))] 121 | macro_rules! never { 122 | () => { 123 | unreachable!() 124 | }; 125 | } 126 | 127 | /// Unreachable state, but checked if we're debugging. 128 | #[cfg(feature = "quickcheck")] 129 | #[cfg(not(any(debug_assertions, test)))] 130 | macro_rules! never { 131 | () => {{ 132 | #[allow(unsafe_code, unused_unsafe)] 133 | unsafe { 134 | core::hint::unreachable_unchecked() 135 | } 136 | }}; 137 | } 138 | 139 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 140 | #[cfg(any(debug_assertions, test))] 141 | macro_rules! get { 142 | ($expr:expr, $index:expr) => { 143 | $expr.get($index).unwrap() 144 | }; 145 | } 146 | 147 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 148 | #[cfg(not(any(debug_assertions, test)))] 149 | macro_rules! get { 150 | ($expr:expr, $index:expr) => {{ 151 | #[allow(unsafe_code, unused_unsafe)] 152 | let result = unsafe { $expr.get_unchecked($index) }; 153 | result 154 | }}; 155 | } 156 | 157 | /// One-argument function. 158 | #[macro_export] 159 | macro_rules! f { 160 | ($ex:expr) => { 161 | $crate::F::_from_macro(stringify!($ex).to_owned(), $ex) 162 | }; 163 | } 164 | 165 | /// Two-argument function. 166 | #[macro_export] 167 | macro_rules! ff { 168 | ($ex:expr) => { 169 | $crate::FF::_from_macro(stringify!($ex).to_owned(), $ex) 170 | }; 171 | } 172 | 173 | mod check; 174 | mod combinators; 175 | mod ctrl; 176 | mod curry; 177 | mod f; 178 | mod generalize; 179 | mod graph; 180 | mod in_progress; 181 | mod input; 182 | mod map_indices; 183 | mod merge; 184 | mod range; 185 | mod range_map; 186 | mod reindex; 187 | mod run; 188 | mod state; 189 | mod to_src; 190 | mod transition; 191 | mod update; 192 | 193 | #[cfg(feature = "quickcheck")] 194 | mod qc; 195 | 196 | pub use { 197 | check::{Check, IllFormed}, 198 | ctrl::Ctrl, 199 | curry::Curry, 200 | f::{F, FF}, 201 | graph::{Deterministic, Graph, Nondeterministic}, 202 | in_progress::{InProgress, InputError, ParseError}, 203 | input::Input, 204 | merge::{merge, try_merge, Merge}, 205 | range::Range, 206 | range_map::RangeMap, 207 | run::Run, 208 | state::State, 209 | to_src::ToSrc, 210 | transition::Transition, 211 | update::Update, 212 | }; 213 | 214 | #[cfg(test)] 215 | mod test; 216 | 217 | #[cfg(test)] 218 | use rand as _; // <-- needed in examples 219 | 220 | use {core::iter, std::collections::BTreeSet}; 221 | 222 | /// Language of matched parentheses and concatenations thereof. 223 | #[inline] 224 | #[must_use] 225 | pub fn dyck_d() -> Deterministic { 226 | Graph { 227 | states: vec![State { 228 | transitions: Curry::Scrutinize { 229 | filter: RangeMap( 230 | [ 231 | ( 232 | Range::unit('('), 233 | Transition::Call { 234 | region: "parentheses", 235 | detour: 0, 236 | dst: Box::new(Transition::Lateral { 237 | dst: 0, 238 | update: None, 239 | }), 240 | combine: ff!(|(), ()| ()), 241 | }, 242 | ), 243 | ( 244 | Range::unit(')'), 245 | Transition::Return { 246 | region: "parentheses", 247 | }, 248 | ), 249 | ] 250 | .into_iter() 251 | .collect(), 252 | ), 253 | fallback: None, 254 | }, 255 | non_accepting: BTreeSet::new(), 256 | }], 257 | initial: 0, 258 | } 259 | } 260 | 261 | /// Language of matched parentheses and concatenations thereof. 262 | #[inline] 263 | #[must_use] 264 | pub fn dyck_nd() -> Nondeterministic { 265 | Graph { 266 | states: vec![State { 267 | transitions: Curry::Scrutinize { 268 | filter: RangeMap( 269 | [ 270 | ( 271 | Range::unit('('), 272 | Transition::Call { 273 | region: "parentheses", 274 | detour: iter::once(0).collect(), 275 | dst: Box::new(Transition::Lateral { 276 | dst: iter::once(0).collect(), 277 | update: None, 278 | }), 279 | combine: ff!(|(), ()| ()), 280 | }, 281 | ), 282 | ( 283 | Range::unit(')'), 284 | Transition::Return { 285 | region: "parentheses", 286 | }, 287 | ), 288 | ] 289 | .into_iter() 290 | .collect(), 291 | ), 292 | fallback: None, 293 | }, 294 | non_accepting: BTreeSet::new(), 295 | }], 296 | initial: iter::once(0).collect(), 297 | } 298 | } 299 | -------------------------------------------------------------------------------- /automata/src/map_indices.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Apply a function to each index in a structure. 8 | 9 | use crate::{Ctrl, Curry, Graph, Input, RangeMap, State, Transition}; 10 | 11 | impl> Graph { 12 | /// Apply a function to each index. 13 | #[inline] 14 | #[must_use] 15 | pub fn map_indices usize>(self, mut f: F) -> Self { 16 | Self { 17 | states: self 18 | .states 19 | .into_iter() 20 | .map(|s| s.map_indices(&mut f)) 21 | .collect(), 22 | initial: self.initial.map_indices(&mut f), 23 | } 24 | } 25 | } 26 | 27 | impl> State { 28 | /// Apply a function to each index. 29 | #[inline] 30 | #[must_use] 31 | pub fn map_indices usize>(self, f: F) -> Self { 32 | Self { 33 | transitions: self.transitions.map_indices(f), 34 | ..self 35 | } 36 | } 37 | } 38 | 39 | impl> Curry { 40 | /// Apply a function to each index. 41 | #[inline] 42 | #[must_use] 43 | pub fn map_indices usize>(self, mut f: F) -> Self { 44 | match self { 45 | Self::Wildcard(etc) => Self::Wildcard(etc.map_indices(f)), 46 | Self::Scrutinize { filter, fallback } => Self::Scrutinize { 47 | fallback: fallback.map(|t| t.map_indices(&mut f)), 48 | filter: filter.map_indices(f), 49 | }, 50 | } 51 | } 52 | } 53 | 54 | impl> RangeMap { 55 | /// Apply a function to each index. 56 | #[inline] 57 | #[must_use] 58 | pub fn map_indices usize>(self, mut f: F) -> Self { 59 | Self( 60 | self.0 61 | .into_iter() 62 | .map(|(k, v)| (k, v.map_indices(&mut f))) 63 | .collect(), 64 | ) 65 | } 66 | } 67 | 68 | impl> Transition { 69 | /// Apply a function to each index. 70 | #[inline] 71 | #[must_use] 72 | pub fn map_indices usize>(self, mut f: F) -> Self { 73 | match self { 74 | Self::Lateral { dst, update } => Self::Lateral { 75 | dst: dst.map_indices(f), 76 | update, 77 | }, 78 | Self::Call { 79 | region, 80 | detour, 81 | dst, 82 | combine, 83 | } => Self::Call { 84 | region, 85 | detour: detour.map_indices(&mut f), 86 | dst: Box::new(dst.map_indices(f)), 87 | combine, 88 | }, 89 | Self::Return { region } => Self::Return { region }, 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /automata/src/merge.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Trait to fallibly combine multiple values into one value with identical semantics. 8 | 9 | use crate::{Ctrl, Curry, IllFormed, Input, RangeMap, State, Transition, Update, FF}; 10 | use core::convert::Infallible; 11 | use std::collections::{btree_map::Entry, BTreeMap, BTreeSet}; 12 | 13 | /// Trait to fallibly combine multiple values into one value with identical semantics. 14 | pub trait Merge: Sized { 15 | /// Implementation-defined error providing a witness to the reason the merge failed. 16 | type Error; 17 | /// Fallibly combine multiple values into one value with identical semantics. 18 | /// # Errors 19 | /// Implementation-defined: if the merge as we define it can't work. 20 | fn merge(self, other: Self) -> Result; 21 | } 22 | 23 | /// Merge a collection of elements into one. 24 | /// Return `None` if the collection was empty. 25 | #[inline] 26 | pub fn merge>(input: In) -> Option> { 27 | let mut iter = input.into_iter(); 28 | let first = iter.next()?; 29 | Some(iter.try_fold(first, Merge::merge)) 30 | } 31 | 32 | /// Merge a collection of `Result`s possibly containing elements into one. 33 | /// Return `None` if the collection was empty. 34 | #[inline] 35 | #[allow(clippy::module_name_repetitions)] 36 | pub fn try_merge>>( 37 | input: In, 38 | ) -> Option> { 39 | let mut iter = input.into_iter(); 40 | iter.next()?.map_or_else( 41 | |e| Some(Err(e)), 42 | |first| Some(iter.try_fold(first, |acc, m| acc.merge(m?))), 43 | ) 44 | } 45 | 46 | impl Merge for Option { 47 | type Error = (T, T); 48 | #[inline] 49 | fn merge(self, other: Self) -> Result { 50 | match (self, other) { 51 | (None, None) => Ok(None), 52 | (Some(a), None) => Ok(Some(a)), 53 | (None, Some(b)) => Ok(Some(b)), 54 | (Some(a), Some(b)) => Err((a, b)), 55 | } 56 | } 57 | } 58 | 59 | impl Merge for usize { 60 | type Error = (usize, usize); 61 | #[inline] 62 | fn merge(self, other: Self) -> Result { 63 | if self == other { 64 | Ok(self) 65 | } else { 66 | Err((self, other)) 67 | } 68 | } 69 | } 70 | 71 | impl<'s> Merge for &'s str { 72 | type Error = (&'s str, &'s str); 73 | #[inline] 74 | fn merge(self, other: Self) -> Result { 75 | if self == other { 76 | Ok(self) 77 | } else { 78 | Err((self, other)) 79 | } 80 | } 81 | } 82 | 83 | impl Merge for BTreeSet { 84 | type Error = (usize, usize); 85 | #[inline] 86 | fn merge(mut self, other: Self) -> Result { 87 | self.extend(other); 88 | Ok(self) 89 | } 90 | } 91 | 92 | impl Merge for BTreeMap { 93 | type Error = (K, V::Error); 94 | #[inline] 95 | fn merge(mut self, other: Self) -> Result { 96 | for (k, v) in other { 97 | match self.entry(k) { 98 | Entry::Occupied(extant) => { 99 | let (lk, lv) = extant.remove_entry(); 100 | let mv = lv.merge(v).map_err(|e| (lk.clone(), e))?; 101 | drop(self.insert(lk, mv)); 102 | } 103 | Entry::Vacant(empty) => drop(empty.insert(v)), 104 | } 105 | } 106 | Ok(self) 107 | } 108 | } 109 | 110 | impl> Merge for State { 111 | type Error = IllFormed; 112 | #[inline] 113 | #[allow(clippy::unwrap_in_result)] 114 | fn merge(mut self, other: Self) -> Result { 115 | Ok(Self { 116 | transitions: self.transitions.merge(other.transitions)?, 117 | non_accepting: if self.non_accepting.is_empty() || other.non_accepting.is_empty() { 118 | BTreeSet::new() 119 | } else { 120 | self.non_accepting.extend(other.non_accepting); 121 | self.non_accepting 122 | }, 123 | }) 124 | } 125 | } 126 | 127 | impl Merge for Vec { 128 | type Error = Infallible; 129 | #[inline] 130 | fn merge(mut self, other: Self) -> Result { 131 | self.extend(other); 132 | Ok(self) 133 | } 134 | } 135 | 136 | impl> Merge for Curry { 137 | type Error = IllFormed; 138 | #[inline] 139 | fn merge(self, other: Self) -> Result { 140 | match (self, other) { 141 | (Self::Wildcard(lhs), Self::Wildcard(rhs)) => Ok(Self::Wildcard(lhs.merge(rhs)?)), 142 | (Self::Wildcard(w), Self::Scrutinize { filter, .. }) 143 | | (Self::Scrutinize { filter, .. }, Self::Wildcard(w)) => { 144 | match filter.0.first_key_value() { 145 | None => Ok(Self::Wildcard(w)), 146 | Some((k, v)) => Err(IllFormed::WildcardMask { 147 | arg_token: Some(k.clone()), 148 | possibility_1: Box::new(w), 149 | possibility_2: Box::new(v.clone()), 150 | }), 151 | } 152 | } 153 | ( 154 | Self::Scrutinize { 155 | filter: l_filter, 156 | fallback: l_fallback, 157 | }, 158 | Self::Scrutinize { 159 | filter: r_filter, 160 | fallback: r_fallback, 161 | }, 162 | ) => Ok(Self::Scrutinize { 163 | filter: l_filter.merge(r_filter)?, 164 | fallback: l_fallback 165 | .merge(r_fallback) 166 | .map_or_else(|(a, b)| a.merge(b).map(Some), Ok)?, 167 | }), 168 | } 169 | } 170 | } 171 | 172 | impl> Merge for RangeMap { 173 | type Error = IllFormed; 174 | #[inline] 175 | fn merge(self, other: Self) -> Result { 176 | Ok(Self(self.0.merge(other.0).map_err(|(_, e)| e)?)) 177 | } 178 | } 179 | 180 | impl> Merge for Transition { 181 | type Error = IllFormed; 182 | #[inline] 183 | fn merge(self, other: Self) -> Result { 184 | match (self, other) { 185 | ( 186 | Self::Lateral { 187 | dst: l_dst, 188 | update: l_update, 189 | }, 190 | Self::Lateral { 191 | dst: r_dst, 192 | update: r_update, 193 | }, 194 | ) => Ok(Self::Lateral { 195 | dst: l_dst 196 | .merge(r_dst) 197 | .map_err(|(a, b)| IllFormed::Superposition(a, b))?, 198 | update: l_update 199 | .merge(r_update) 200 | .map_err(|(a, b)| IllFormed::IncompatibleCallbacks(Box::new(a), Box::new(b)))?, 201 | }), 202 | ( 203 | Self::Call { 204 | region: l_region, 205 | detour: l_detour, 206 | dst: l_dst, 207 | combine: l_combine, 208 | }, 209 | Self::Call { 210 | region: r_region, 211 | detour: r_detour, 212 | dst: r_dst, 213 | combine: r_combine, 214 | }, 215 | ) => Ok(Self::Call { 216 | region: l_region 217 | .merge(r_region) 218 | .map_err(|(a, b)| IllFormed::AmbiguousRegions(a, b))?, 219 | detour: l_detour 220 | .merge(r_detour) 221 | .map_err(|(a, b)| IllFormed::Superposition(a, b))?, 222 | dst: Box::new(l_dst.merge(*r_dst)?), 223 | combine: l_combine.merge(r_combine).map_err(|(a, b)| { 224 | IllFormed::IncompatibleCombinators(Box::new(a), Box::new(b)) 225 | })?, 226 | }), 227 | (Self::Return { region: l_region }, Self::Return { region: r_region }) => { 228 | Ok(Self::Return { 229 | region: l_region 230 | .merge(r_region) 231 | .map_err(|(a, b)| IllFormed::AmbiguousRegions(a, b))?, 232 | }) 233 | } 234 | (a, b) => Err(IllFormed::IncompatibleActions(Box::new(a), Box::new(b))), 235 | } 236 | } 237 | } 238 | 239 | impl Merge for Update { 240 | type Error = (Self, Self); 241 | #[inline] 242 | fn merge(self, other: Self) -> Result { 243 | if self == other { 244 | Ok(self) 245 | } else { 246 | Err((self, other)) 247 | } 248 | } 249 | } 250 | 251 | impl Merge for FF { 252 | type Error = (Self, Self); 253 | #[inline] 254 | fn merge(self, other: Self) -> Result { 255 | if self == other { 256 | Ok(self) 257 | } else { 258 | Err((self, other)) 259 | } 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /automata/src/qc.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! `QuickCheck` implementations for various types. 8 | 9 | use crate::{Ctrl, Curry, Graph, Input, Range, RangeMap, State, Transition, Update, FF}; 10 | use core::{iter, num::NonZeroUsize}; 11 | use quickcheck::{Arbitrary, Gen}; 12 | use std::collections::{BTreeMap, BTreeSet}; 13 | 14 | /// Sample a value uniformly below the maximum size allowed by a generator. 15 | #[inline] 16 | #[allow(clippy::arithmetic_side_effects)] 17 | fn within_size(g: &mut Gen) -> usize { 18 | usize::arbitrary(g) % NonZeroUsize::new(g.size()).expect("Zero-sized QuickCheck generator") 19 | } 20 | 21 | impl> Arbitrary for Graph { 22 | #[inline] 23 | #[allow(clippy::arithmetic_side_effects)] 24 | fn arbitrary(g: &mut Gen) -> Self { 25 | 'restart: loop { 26 | let size = within_size(g); 27 | let Some(nz) = NonZeroUsize::new(size) else { 28 | continue 'restart; 29 | }; 30 | let initial = C::arbitrary_given(nz, g); 31 | let mut states: Vec<_> = (0..size).map(|_| State::arbitrary_given(nz, g)).collect(); 32 | 'sort_again: loop { 33 | states.sort_unstable(); 34 | states.dedup(); 35 | let Some(nz_post) = NonZeroUsize::new(states.len()) else { 36 | continue 'restart; 37 | }; 38 | states = states 39 | .into_iter() 40 | .map(|s| s.map_indices(|i| i % nz_post)) 41 | .collect(); 42 | // Check if `states` is still sorted 43 | for i in 1..states.len() { 44 | if get!(states, i.overflowing_sub(1).0) >= get!(states, i) { 45 | continue 'sort_again; 46 | } 47 | } 48 | return Self { 49 | states, 50 | initial: initial.map_indices(|i| i % nz_post), 51 | }; 52 | } 53 | } 54 | } 55 | #[inline] 56 | fn shrink(&self) -> Box> { 57 | Box::new( 58 | (self.states.clone(), self.initial.clone()) 59 | .shrink() 60 | .filter_map(|(states, initial)| { 61 | let s = Self { states, initial }; 62 | (s.check() == Ok(())).then_some(s) 63 | }), 64 | ) 65 | } 66 | } 67 | 68 | impl Arbitrary for Range { 69 | #[inline] 70 | fn arbitrary(g: &mut Gen) -> Self { 71 | let (a, b) = <(I, I)>::arbitrary(g); 72 | if a <= b { 73 | Self { first: a, last: b } 74 | } else { 75 | Self { first: b, last: a } 76 | } 77 | } 78 | #[inline] 79 | fn shrink(&self) -> Box> { 80 | Box::new( 81 | (self.first.clone(), self.last.clone()) 82 | .shrink() 83 | .map(|(a, b)| { 84 | if a <= b { 85 | Self { first: a, last: b } 86 | } else { 87 | Self { first: b, last: a } 88 | } 89 | }), 90 | ) 91 | } 92 | } 93 | 94 | impl Arbitrary for Update { 95 | #[inline(always)] 96 | fn arbitrary(_: &mut Gen) -> Self { 97 | update!(|(), _| {}) 98 | } 99 | #[inline] 100 | fn shrink(&self) -> Box> { 101 | Box::new(iter::empty()) 102 | } 103 | } 104 | 105 | /// Implement only `shrink`; let `arbitrary` panic. 106 | /// Use when a value requires knowledge of the number of states in an automaton. 107 | macro_rules! shrink_only { 108 | (|$self:ident: &$t:ident| $body:expr) => { 109 | impl> Arbitrary for $t { 110 | #[inline(always)] 111 | fn arbitrary(_: &mut Gen) -> Self { 112 | never!() 113 | } 114 | #[inline] 115 | fn shrink(&$self) -> Box> { 116 | $body 117 | } 118 | } 119 | }; 120 | } 121 | 122 | shrink_only!(|self: &State| Box::new( 123 | (self.transitions.clone(), self.non_accepting.clone(),) 124 | .shrink() 125 | .map(|(transitions, non_accepting)| Self { 126 | transitions, 127 | non_accepting, 128 | }) 129 | )); 130 | 131 | shrink_only!(|self: &RangeMap| Box::new(self.0.shrink().map(Self))); 132 | 133 | shrink_only!(|self: &Curry| match *self { 134 | Self::Wildcard(ref etc) => Box::new(etc.shrink().map(Self::Wildcard)), 135 | #[allow(clippy::shadow_unrelated)] 136 | Self::Scrutinize { 137 | ref filter, 138 | ref fallback, 139 | } => Box::new( 140 | filter 141 | .0 142 | .first_key_value() 143 | .map(|(_, transition)| Self::Wildcard(transition.clone())) 144 | .into_iter() 145 | .chain( 146 | (filter.clone(), fallback.clone()) 147 | .shrink() 148 | .map(|(filter, fallback)| Self::Scrutinize { filter, fallback }) 149 | ) 150 | ), 151 | }); 152 | 153 | shrink_only!(|self: &Transition| { 154 | #[allow(clippy::shadow_unrelated, unreachable_code, unused_variables)] 155 | match *self { 156 | Self::Return { .. } => Box::new(iter::empty()), 157 | Self::Lateral { 158 | ref dst, 159 | ref update, 160 | } => Box::new( 161 | iter::once(Self::Return { region: "region" }).chain( 162 | (dst.clone(), update.clone()) 163 | .shrink() 164 | .map(|(dst, update)| Self::Lateral { dst, update }), 165 | ), 166 | ), 167 | Self::Call { 168 | ref detour, 169 | ref dst, 170 | ref combine, 171 | .. 172 | } => Box::new(dst.as_ref().shrink().chain( 173 | (detour.clone(), dst.clone(), combine.clone()).shrink().map( 174 | |(detour, dst, combine)| Self::Call { 175 | region: "region", 176 | detour, 177 | dst, 178 | combine, 179 | }, 180 | ), 181 | )), 182 | } 183 | }); 184 | 185 | impl> State { 186 | /// Construct an arbitrary value given an automaton with this many states. 187 | #[inline] 188 | #[must_use] 189 | pub fn arbitrary_given(n_states: NonZeroUsize, g: &mut Gen) -> Self { 190 | Self { 191 | transitions: Curry::arbitrary_given(n_states, g), 192 | non_accepting: BTreeSet::arbitrary(g), 193 | } 194 | } 195 | } 196 | 197 | impl> Curry { 198 | /// Construct an arbitrary value given an automaton with this many states. 199 | #[inline] 200 | #[must_use] 201 | pub fn arbitrary_given(n_states: NonZeroUsize, g: &mut Gen) -> Self { 202 | if bool::arbitrary(g) { 203 | Self::Wildcard(Transition::arbitrary_given(n_states, g)) 204 | } else { 205 | Self::Scrutinize { 206 | filter: RangeMap::arbitrary_given(n_states, g), 207 | fallback: bool::arbitrary(g).then(|| Transition::arbitrary_given(n_states, g)), 208 | } 209 | } 210 | } 211 | } 212 | 213 | impl> RangeMap { 214 | /// Construct an arbitrary value given an automaton with this many states. 215 | #[inline] 216 | #[must_use] 217 | pub fn arbitrary_given(n_states: NonZeroUsize, g: &mut Gen) -> Self { 218 | let mut entries: BTreeMap<_, _> = (0..within_size(g)) 219 | .map(|_| { 220 | ( 221 | Range::arbitrary(g), 222 | Transition::arbitrary_given(n_states, g), 223 | ) 224 | }) 225 | .collect(); 226 | // Remove overlap 227 | while let Some(key) = entries.keys().fold(None, |opt, k| { 228 | opt.or_else(|| { 229 | entries.range(..*k).fold(None, |acc, (range, _)| { 230 | acc.or_else(|| range.intersection(*k).is_some().then_some(*k)) 231 | }) 232 | }) 233 | }) { 234 | drop(entries.remove(&key)); 235 | } 236 | Self(entries) 237 | } 238 | } 239 | 240 | impl> Transition { 241 | /// Construct an arbitrary value given an automaton with this many states. 242 | #[inline] 243 | #[must_use] 244 | #[allow(clippy::missing_panics_doc)] 245 | pub fn arbitrary_given(n_states: NonZeroUsize, g: &mut Gen) -> Self { 246 | let choices: [fn(_, &mut _) -> _; 3] = [ 247 | |n, r| Self::Lateral { 248 | dst: C::arbitrary_given(n, r), 249 | update: Arbitrary::arbitrary(r), 250 | }, 251 | |n, r| Self::Call { 252 | region: "region", 253 | detour: C::arbitrary_given(n, r), 254 | dst: Box::new(Transition::arbitrary_given(n, r)), 255 | combine: Arbitrary::arbitrary(r), 256 | }, 257 | |_, _| Self::Return { region: "region" }, 258 | ]; 259 | g.choose(&choices).expect("impossible")(n_states, g) 260 | } 261 | } 262 | 263 | impl Arbitrary for FF { 264 | #[inline] 265 | fn arbitrary(_: &mut Gen) -> Self { 266 | Self { 267 | lhs_t: "()".to_owned(), 268 | rhs_t: "()".to_owned(), 269 | output_t: "()".to_owned(), 270 | src: "|(), ()| ()".to_owned(), 271 | } 272 | } 273 | #[inline] 274 | fn shrink(&self) -> Box> { 275 | Box::new(iter::empty()) 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /automata/src/range.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Inclusive range of values that, as a whole, implements `Ord`. 8 | 9 | use crate::Input; 10 | 11 | /// Inclusive range of values that, as a whole, implements `Ord`. 12 | #[allow(clippy::exhaustive_structs)] 13 | #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] 14 | pub struct Range { 15 | /// Least element, inclusive. 16 | pub first: I, 17 | /// Greatest element, inclusive. 18 | pub last: I, 19 | } 20 | 21 | impl Range { 22 | /// Trivial range with a single inhabitant. 23 | #[inline] 24 | #[must_use] 25 | pub fn unit(value: I) -> Self { 26 | Self { 27 | first: value.clone(), 28 | last: value, 29 | } 30 | } 31 | 32 | /// Check if a value lies within this range. 33 | #[inline] 34 | #[must_use] 35 | pub fn contains(&self, value: &I) -> bool { 36 | *value >= self.first && *value <= self.last 37 | } 38 | 39 | /// If two ranges overlap, return their intersection. 40 | #[inline] 41 | #[must_use] 42 | pub fn intersection(self, other: Self) -> Option { 43 | let first = self.first.clone().max(other.first.clone()); 44 | let last = self.last.min(other.last); 45 | (first <= last).then_some(Self { first, last }) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /automata/src/range_map.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Map from ranges of keys to values. 8 | 9 | use crate::{Ctrl, IllFormed, Input, Range, Transition}; 10 | use core::cmp; 11 | use std::collections::{BTreeMap, BTreeSet}; 12 | 13 | /// Map from ranges of keys to values. 14 | #[repr(transparent)] 15 | #[allow(clippy::exhaustive_structs)] 16 | #[derive(Debug, Default)] 17 | pub struct RangeMap>( 18 | /// Key-value entries as tuples. 19 | #[allow(clippy::type_complexity)] 20 | pub BTreeMap, Transition>, 21 | ); 22 | 23 | impl> Clone for RangeMap { 24 | #[inline] 25 | fn clone(&self) -> Self { 26 | Self(self.0.clone()) 27 | } 28 | } 29 | 30 | impl> Eq for RangeMap {} 31 | 32 | impl> PartialEq for RangeMap { 33 | #[inline] 34 | fn eq(&self, other: &Self) -> bool { 35 | self.0 == other.0 36 | } 37 | } 38 | 39 | impl> Ord for RangeMap { 40 | #[inline] 41 | fn cmp(&self, other: &Self) -> cmp::Ordering { 42 | self.0.cmp(&other.0) 43 | } 44 | } 45 | 46 | impl> PartialOrd for RangeMap { 47 | #[inline] 48 | fn partial_cmp(&self, other: &Self) -> Option { 49 | Some(self.cmp(other)) 50 | } 51 | } 52 | 53 | impl> RangeMap { 54 | /// Iterate over references to keys and values without consuming anything. 55 | #[inline] 56 | pub fn iter(&self) -> impl Iterator, &Transition)> { 57 | self.0.iter() 58 | } 59 | 60 | /// Look up an argument; fit any range that contains it. 61 | /// # Errors 62 | /// If multiple ranges fit an argument. 63 | #[inline] 64 | #[allow( 65 | clippy::missing_panics_doc, 66 | clippy::unwrap_in_result, 67 | clippy::type_complexity 68 | )] 69 | pub fn get(&self, key: &I) -> Result>, IllFormed> { 70 | let mut acc = None; 71 | for (range, transition) in &self.0 { 72 | if range.contains(key) { 73 | match acc { 74 | None => acc = Some((range, transition)), 75 | Some((prev_range, _)) => { 76 | return Err(IllFormed::RangeMapOverlap(unwrap!(range 77 | .clone() 78 | .intersection(prev_range.clone())))) 79 | } 80 | } 81 | } 82 | } 83 | Ok(acc.map(|(_, transition)| transition)) 84 | } 85 | 86 | /// Assert that this map has no keys in common with another. 87 | /// # Errors 88 | /// If there are keys in common, don't panic: instead, return them. 89 | #[inline] 90 | #[allow(clippy::result_large_err, clippy::type_complexity)] 91 | pub fn disjoint( 92 | &self, 93 | other: &Self, 94 | ) -> Result<(), (Range, Transition, Transition)> { 95 | self.0.iter().try_fold((), |(), (lk, lv)| { 96 | other.0.iter().try_fold((), |(), (rk, rv)| { 97 | rk.clone() 98 | .intersection(lk.clone()) 99 | .map_or(Ok(()), |range| Err((range, lv.clone(), rv.clone()))) 100 | }) 101 | }) 102 | } 103 | 104 | /// All values in this collection, without their associated keys. 105 | #[inline] 106 | pub fn values(&self) -> impl Iterator> { 107 | self.0.values() 108 | } 109 | 110 | /// Remove an entry by key. 111 | #[inline] 112 | pub fn remove(&mut self, key: &Range) { 113 | self.0 114 | .retain(|k, _| key.clone().intersection(k.clone()).is_none()); 115 | } 116 | 117 | /// All values in this collection, without their associated keys. 118 | #[inline] 119 | pub fn values_mut(&mut self) -> impl Iterator> { 120 | self.0.values_mut() 121 | } 122 | } 123 | 124 | impl RangeMap { 125 | /// Convert the control parameter from `usize` to anything else. 126 | #[inline] 127 | #[must_use] 128 | pub fn convert_ctrl>(self) -> RangeMap { 129 | RangeMap( 130 | self.0 131 | .into_iter() 132 | .map(|(k, v)| (k, v.convert_ctrl())) 133 | .collect(), 134 | ) 135 | } 136 | } 137 | 138 | impl RangeMap> { 139 | /// Kleene-star operation: accept any number (including zero!) of repetitions of this parser. 140 | #[inline] 141 | pub fn star(&mut self, init: &BTreeSet, accepting: &BTreeSet) { 142 | for t in self.0.values_mut() { 143 | t.star(init, accepting); 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /automata/src/reindex.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Update index "pointers" in response to a reordered array. 8 | 9 | use crate::*; 10 | use std::collections::BTreeMap; 11 | 12 | impl> State { 13 | /// Update index "pointers" in response to a reordered array. 14 | #[inline] 15 | #[must_use] 16 | pub fn reindex( 17 | &self, 18 | states: &[State], 19 | index_map: &BTreeMap>, 20 | ) -> Self { 21 | State { 22 | transitions: self.transitions.reindex(states, index_map), 23 | non_accepting: self.non_accepting.clone(), 24 | } 25 | } 26 | } 27 | 28 | impl> Curry { 29 | /// Update index "pointers" in response to a reordered array. 30 | #[inline] 31 | #[must_use] 32 | pub fn reindex( 33 | &self, 34 | states: &[State], 35 | index_map: &BTreeMap>, 36 | ) -> Self { 37 | match *self { 38 | Curry::Wildcard(ref etc) => Curry::Wildcard(etc.reindex(states, index_map)), 39 | Curry::Scrutinize { 40 | ref filter, 41 | ref fallback, 42 | } => Curry::Scrutinize { 43 | filter: filter.reindex(states, index_map), 44 | fallback: fallback.as_ref().map(|f| f.reindex(states, index_map)), 45 | }, 46 | } 47 | } 48 | } 49 | 50 | impl> RangeMap { 51 | /// Update index "pointers" in response to a reordered array. 52 | #[inline] 53 | #[must_use] 54 | pub fn reindex( 55 | &self, 56 | states: &[State], 57 | index_map: &BTreeMap>, 58 | ) -> Self { 59 | RangeMap( 60 | self.0 61 | .iter() 62 | .map(|(k, v)| (k.clone(), v.reindex(states, index_map))) 63 | .collect(), 64 | ) 65 | } 66 | } 67 | 68 | impl> Transition { 69 | /// Update index "pointers" in response to a reordered array. 70 | #[inline] 71 | #[must_use] 72 | #[allow(clippy::missing_panics_doc)] 73 | pub fn reindex( 74 | &self, 75 | states: &[State], 76 | index_map: &BTreeMap>, 77 | ) -> Self { 78 | let update_fn = |i| unwrap!(states.binary_search(unwrap!(index_map.get(&i)))); 79 | match *self { 80 | Self::Lateral { 81 | ref dst, 82 | ref update, 83 | } => Self::Lateral { 84 | dst: dst.clone().map_indices(update_fn), 85 | update: update.clone(), 86 | }, 87 | Self::Call { 88 | region, 89 | ref detour, 90 | ref dst, 91 | ref combine, 92 | } => Self::Call { 93 | region, 94 | detour: detour.clone().map_indices(update_fn), 95 | dst: Box::new(dst.clone().map_indices(update_fn)), 96 | combine: combine.clone(), 97 | }, 98 | Self::Return { region } => Self::Return { region }, 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /automata/src/run.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Execute an automaton on an input sequence. 8 | 9 | use crate::{Ctrl, Graph, InProgress, Input}; 10 | 11 | /// Execute an automaton on an input sequence. 12 | pub trait Run: IntoIterator + Sized 13 | where 14 | Self::Item: Input, 15 | { 16 | /// Execute an automaton on an input sequence. 17 | /// # Errors 18 | /// If the automaton is not well-formed (with a witness to why). 19 | #[allow(clippy::type_complexity)] 20 | fn run>( 21 | self, 22 | graph: &Graph, 23 | ) -> InProgress<'_, Self::Item, C, Self::IntoIter>; 24 | } 25 | 26 | impl Run for In 27 | where 28 | In::Item: Input, 29 | { 30 | #[inline] 31 | fn run>( 32 | self, 33 | graph: &Graph, 34 | ) -> InProgress<'_, Self::Item, C, Self::IntoIter> { 35 | InProgress { 36 | graph, 37 | input: self.into_iter(), 38 | stack: vec![], 39 | ctrl: graph.initial.clone(), 40 | output_t: "()".to_owned(), 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /automata/src/state.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! State, i.e. a node in an automaton graph. 8 | 9 | use crate::{Ctrl, Curry, IllFormed, Input}; 10 | use core::cmp; 11 | use std::collections::BTreeSet; 12 | 13 | /// State, i.e. a node in an automaton graph. 14 | #[allow(clippy::exhaustive_structs)] 15 | #[derive(Debug)] 16 | pub struct State> { 17 | /// Map from input tokens to actions. 18 | pub transitions: Curry, 19 | /// If input ends while in this state, should we accept? 20 | pub non_accepting: BTreeSet, 21 | } 22 | 23 | impl> State { 24 | /// Compute the input type of any run that reaches this state. 25 | /// # Errors 26 | /// If multiple transitions expect different types. 27 | #[inline] 28 | pub fn input_type(&self) -> Result, IllFormed> { 29 | self.transitions 30 | .values() 31 | .try_fold(None, |acc: Option<&str>, t| { 32 | let in_t = t.input_type(); 33 | acc.map_or(Ok(in_t), |other| match in_t { 34 | None => Ok(Some(other)), 35 | Some(ty) if ty == other => Ok(Some(other)), 36 | Some(ty) => Err(IllFormed::TypeMismatch(other.to_owned(), ty.to_owned())), 37 | }) 38 | }) 39 | } 40 | 41 | /// Check if this parser ever could, at any point, involve a fallback transition. 42 | #[inline] 43 | #[must_use] 44 | pub const fn involves_any_fallback(&self) -> bool { 45 | self.transitions.involves_any_fallback() 46 | } 47 | } 48 | 49 | impl State { 50 | /// Convert the control parameter from `usize` to anything else. 51 | #[inline] 52 | #[must_use] 53 | pub fn convert_ctrl>(self) -> State { 54 | State { 55 | transitions: self.transitions.convert_ctrl(), 56 | non_accepting: self.non_accepting, 57 | } 58 | } 59 | } 60 | 61 | impl> Clone for State { 62 | #[inline] 63 | fn clone(&self) -> Self { 64 | Self { 65 | transitions: self.transitions.clone(), 66 | non_accepting: self.non_accepting.clone(), 67 | } 68 | } 69 | } 70 | 71 | impl> Eq for State {} 72 | 73 | impl> PartialEq for State { 74 | #[inline] 75 | fn eq(&self, other: &Self) -> bool { 76 | self.transitions == other.transitions && self.non_accepting == other.non_accepting 77 | } 78 | } 79 | 80 | impl> Ord for State { 81 | #[inline] 82 | fn cmp(&self, other: &Self) -> cmp::Ordering { 83 | self.transitions 84 | .cmp(&other.transitions) 85 | .then_with(|| self.non_accepting.cmp(&other.non_accepting)) 86 | } 87 | } 88 | 89 | impl> PartialOrd for State { 90 | #[inline] 91 | fn partial_cmp(&self, other: &Self) -> Option { 92 | Some(self.cmp(other)) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /automata/src/to_src.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Translate an automaton into Rust source code. 8 | 9 | use crate::{ 10 | Ctrl, Curry, Deterministic, Graph, IllFormed, Input, Range, RangeMap, State, Transition, 11 | Update, FF, 12 | }; 13 | use core::ops::Bound; 14 | use std::collections::{BTreeMap, BTreeSet}; 15 | 16 | /// Translate a value into Rust source code that reproduces it. 17 | pub trait ToSrc { 18 | /// Translate a value into Rust source code that reproduces it. 19 | #[must_use] 20 | fn to_src(&self) -> String; 21 | /// Translate a type into Rust source code that reproduces it. 22 | #[must_use] 23 | fn src_type() -> String; 24 | } 25 | 26 | impl ToSrc for () { 27 | #[inline(always)] 28 | #[must_use] 29 | fn to_src(&self) -> String { 30 | Self::src_type() 31 | } 32 | #[inline] 33 | #[must_use] 34 | fn src_type() -> String { 35 | "()".to_owned() 36 | } 37 | } 38 | 39 | impl ToSrc for usize { 40 | #[inline] 41 | #[must_use] 42 | fn to_src(&self) -> String { 43 | format!("{self}") 44 | } 45 | #[inline] 46 | #[must_use] 47 | fn src_type() -> String { 48 | "usize".to_owned() 49 | } 50 | } 51 | 52 | impl ToSrc for u8 { 53 | #[inline] 54 | #[must_use] 55 | fn to_src(&self) -> String { 56 | // format!("{self}") 57 | format!("b'{}'", self.escape_ascii()) 58 | } 59 | #[inline] 60 | #[must_use] 61 | fn src_type() -> String { 62 | "u8".to_owned() 63 | } 64 | } 65 | 66 | impl ToSrc for char { 67 | #[inline] 68 | #[must_use] 69 | fn to_src(&self) -> String { 70 | format!("'{}'", self.escape_default()) 71 | } 72 | #[inline] 73 | #[must_use] 74 | fn src_type() -> String { 75 | "char".to_owned() 76 | } 77 | } 78 | 79 | impl ToSrc for BTreeSet { 80 | #[inline] 81 | #[must_use] 82 | fn to_src(&self) -> String { 83 | let mut iter = self.iter(); 84 | let Some(fst) = iter.next() else { 85 | return format!("{}::new()", Self::src_type()); 86 | }; 87 | let Some(snd) = iter.next() else { 88 | return format!("core::iter::once({}).collect()", fst.to_src()); 89 | }; 90 | format!( 91 | "[{}, {}{}].into_iter().collect()", 92 | fst.to_src(), 93 | snd.to_src(), 94 | iter.fold(String::new(), |acc, x| format!("{acc}, {}", x.to_src())), 95 | ) 96 | } 97 | #[inline] 98 | #[must_use] 99 | fn src_type() -> String { 100 | format!("std::collections::BTreeSet::<{}>", T::src_type()) 101 | } 102 | } 103 | 104 | impl ToSrc for Option { 105 | #[inline] 106 | #[must_use] 107 | fn to_src(&self) -> String { 108 | self.as_ref() 109 | .map_or_else(|| "None".to_owned(), |x| format!("Some({})", x.to_src())) 110 | } 111 | #[inline] 112 | #[must_use] 113 | fn src_type() -> String { 114 | format!("Option::<{}>", T::src_type()) 115 | } 116 | } 117 | 118 | impl ToSrc for Result { 119 | #[inline] 120 | #[must_use] 121 | fn to_src(&self) -> String { 122 | self.as_ref().map_or_else( 123 | |e| format!("Err({})", e.to_src()), 124 | |x| format!("Ok({})", x.to_src()), 125 | ) 126 | } 127 | #[inline] 128 | #[must_use] 129 | fn src_type() -> String { 130 | format!("Result::<{}, {}>", T::src_type(), E::src_type()) 131 | } 132 | } 133 | 134 | impl ToSrc for String { 135 | #[inline] 136 | #[must_use] 137 | fn to_src(&self) -> String { 138 | if self.is_empty() { 139 | "String::new()".to_owned() 140 | } else { 141 | format!("\"{}\".to_owned()", self.escape_default()) 142 | } 143 | } 144 | #[inline] 145 | #[must_use] 146 | fn src_type() -> String { 147 | "String".to_owned() 148 | } 149 | } 150 | 151 | impl ToSrc for &str { 152 | #[inline] 153 | #[must_use] 154 | fn to_src(&self) -> String { 155 | format!("\"{}\"", self.escape_default()) 156 | } 157 | #[inline] 158 | #[must_use] 159 | fn src_type() -> String { 160 | "&'static str".to_owned() 161 | } 162 | } 163 | 164 | impl ToSrc for Range { 165 | #[inline] 166 | #[must_use] 167 | fn to_src(&self) -> String { 168 | format!("{}..={}", self.first.to_src(), self.last.to_src()) 169 | } 170 | #[inline(always)] 171 | #[must_use] 172 | fn src_type() -> String { 173 | T::src_type() 174 | } 175 | } 176 | 177 | impl Deterministic { 178 | /// Translate a value into Rust source code that reproduces it. 179 | /// # Errors 180 | /// If this automaton is ill-formed. 181 | #[inline] 182 | #[allow(clippy::arithmetic_side_effects)] // <-- String concatenation with `+` 183 | pub fn to_src(&self) -> Result> { 184 | let token_t = I::src_type(); 185 | let output_t = self.output_type()?.unwrap_or("core::convert::Infallible"); 186 | Ok(format!( 187 | r#"//! Automatically generated with [inator](https://crates.io/crates/inator). 188 | 189 | #![allow(dead_code, unused_variables)] 190 | 191 | /// Descriptive parsing error. 192 | #[allow(dead_code)] 193 | #[derive(Clone, Debug, PartialEq)] 194 | pub enum Error {{ 195 | /// Token without any relevant rule. 196 | Absurd {{ 197 | /// Index of the token that caused this error. 198 | index: usize, 199 | /// Particular token that didn't correspond to a rule. 200 | token: {token_t}, 201 | }}, 202 | /// Token that would have closed a delimiter, but the delimiter wasn't open. 203 | Unopened {{ 204 | /// What was actually open, if anything, and the index of the token that opened it. 205 | what_was_open: Option<(&'static str, usize)>, 206 | /// Index of the token that caused this error. 207 | index: usize, 208 | }}, 209 | /// After parsing all input, a delimiter remains open (e.g. "(a, b, c"). 210 | Unclosed {{ 211 | /// Region (user-defined name) that was not closed. Sensible to be e.g. "parentheses" for `(...)`. 212 | region: &'static str, 213 | /// Index at which the delimiter was opened (e.g., for parentheses, the index of the relevant '('). 214 | opened: usize, 215 | }}, 216 | /// Ended on a user-defined non-accepting state. 217 | UserDefined {{ 218 | /// User-defined error message. 219 | messages: &'static [&'static str], 220 | }}, 221 | }} 222 | 223 | type R = Result<(Option<(usize, Option>)>, {output_t}), Error>; 224 | 225 | #[repr(transparent)] 226 | struct F(fn(&mut I, {output_t}) -> R); 227 | 228 | #[inline] 229 | pub fn parse>(input: I) -> Result<{output_t}, Error> {{ 230 | state_{}(&mut input.into_iter().enumerate(), (), None) 231 | }}{} 232 | "#, 233 | self.initial, 234 | self.states 235 | .iter() 236 | .enumerate() 237 | .try_fold(String::new(), |acc, (i, s)| Ok(acc + &s.to_src(i)?))?, 238 | )) 239 | } 240 | } 241 | 242 | impl State { 243 | /// Translate a value into Rust source code that reproduces it. 244 | #[inline] 245 | fn to_src(&self, i: usize) -> Result> { 246 | let input_t = self.input_type()?.unwrap_or("core::convert::Infallible"); 247 | let token_t = I::src_type(); 248 | let on_some = self.transitions.to_src(); 249 | let on_none = self.non_accepting.first().map_or_else( 250 | || { 251 | "stack_top.map_or( 252 | Ok(acc), 253 | |(region, opened)| Err(Error::Unclosed { region, opened }), 254 | )" 255 | .to_owned() 256 | }, 257 | |fst| { 258 | self.non_accepting 259 | .range((Bound::Excluded(fst.clone()), Bound::Unbounded)) 260 | .fold( 261 | format!( 262 | "Err(Error::UserDefined {{ messages: &[{}", 263 | fst.as_str().to_src(), 264 | ), 265 | |acc, msg| format!("{acc}, {}", msg.as_str().to_src()), 266 | ) 267 | + "] })" 268 | }, 269 | ); 270 | Ok(format!( 271 | r#" 272 | 273 | 274 | #[inline] 275 | fn state_{i}>(input: &mut I, acc: {input_t}, stack_top: Option<(&'static str, usize)>) -> Result<{input_t}, Error> {{ 276 | match input.next() {{ 277 | None => {on_none}, 278 | Some((index, token)) => match token {{{on_some} 279 | }}, 280 | }} 281 | }}"#, 282 | )) 283 | } 284 | } 285 | 286 | impl Curry { 287 | /// Translate a value into Rust source code that reproduces it. 288 | #[inline] 289 | #[must_use] 290 | fn to_src(&self) -> String { 291 | match *self { 292 | Self::Wildcard(ref etc) => format!( 293 | r#" 294 | _ => {{ 295 | {} 296 | }}"#, 297 | etc.to_src(), 298 | ), 299 | Self::Scrutinize { 300 | ref filter, 301 | ref fallback, 302 | } => format!( 303 | "{} 304 | _ => {}", 305 | filter.to_src(), 306 | fallback.as_ref().map_or_else( 307 | || "Err(Error::Absurd { index, token })".to_owned(), 308 | Transition::to_src, 309 | ) 310 | ), 311 | } 312 | } 313 | } 314 | 315 | impl RangeMap { 316 | /// Translate a value into Rust source code that reproduces it. 317 | #[inline] 318 | #[must_use] 319 | fn to_src(&self) -> String { 320 | self.0.iter().fold(String::new(), |acc, (k, v)| { 321 | format!( 322 | r#"{acc} 323 | {} => {{ 324 | {} 325 | }},"#, 326 | k.to_src(), 327 | v.to_src(), 328 | ) 329 | }) 330 | } 331 | } 332 | 333 | impl Transition { 334 | /// Translate a value into Rust source code that reproduces it. 335 | #[inline] 336 | #[must_use] 337 | fn to_src(&self) -> String { 338 | match *self { 339 | Self::Lateral { dst, update: None } => { 340 | format!("state_{dst}(input, acc, stack_top)") 341 | } 342 | Self::Lateral { 343 | dst, 344 | update: Some(Update { src, .. }), 345 | } => format!("state_{dst}(input, ({src})(acc, token), stack_top)"), 346 | Self::Call { 347 | region, 348 | detour, 349 | ref dst, 350 | combine: FF { ref src, .. }, 351 | } => format!( 352 | "\ 353 | let detour = state_{detour}(input, (), Some(({}, index)))?; 354 | let postprocessed = ({src})(acc, detour); 355 | {}", 356 | region.to_src(), 357 | dst.to_src(), 358 | ), 359 | Self::Return { region } => { 360 | format!( 361 | "match stack_top {{ 362 | Some((region, _)) if region == {} => Ok(acc), 363 | _ => Err(Error::Unopened {{ what_was_open: stack_top, index }}) 364 | }}", 365 | region.to_src(), 366 | ) 367 | } 368 | } 369 | } 370 | } 371 | 372 | impl> ToSrc for Graph { 373 | #[inline] 374 | fn to_src(&self) -> String { 375 | format!( 376 | "Nondeterministic {{ states: {}, initial: {} }}", 377 | self.states.to_src(), 378 | self.initial.to_src(), 379 | ) 380 | } 381 | #[inline] 382 | fn src_type() -> String { 383 | format!("Nondeterministic::<{}>", I::src_type()) 384 | } 385 | } 386 | 387 | impl ToSrc for Vec { 388 | #[inline] 389 | fn to_src(&self) -> String { 390 | self.first().map_or_else( 391 | || "vec![]".to_owned(), 392 | |fst| { 393 | format!( 394 | "vec![{}{}]", 395 | fst.to_src(), 396 | get!(self, 1..) 397 | .iter() 398 | .fold(String::new(), |acc, x| format!("{acc}, {}", x.to_src())) 399 | ) 400 | }, 401 | ) 402 | } 403 | #[inline] 404 | fn src_type() -> String { 405 | format!("Vce::<{}>", T::src_type()) 406 | } 407 | } 408 | 409 | impl> ToSrc for State { 410 | #[inline] 411 | fn to_src(&self) -> String { 412 | format!( 413 | "State {{ transitions: {}, non_accepting: {} }}", 414 | self.transitions.to_src(), 415 | self.non_accepting.to_src(), 416 | ) 417 | } 418 | #[inline] 419 | fn src_type() -> String { 420 | format!("State::<{}, BTreeSet>", I::src_type(),) 421 | } 422 | } 423 | 424 | impl> ToSrc for Curry { 425 | #[inline] 426 | fn to_src(&self) -> String { 427 | match *self { 428 | Self::Wildcard(ref w) => format!("{}::Wildcard({})", Self::src_type(), w.to_src()), 429 | Self::Scrutinize { 430 | ref filter, 431 | ref fallback, 432 | } => format!( 433 | "{}::Scrutinize {{ filter: {}, fallback: {} }}", 434 | Self::src_type(), 435 | filter.to_src(), 436 | fallback.to_src(), 437 | ), 438 | } 439 | } 440 | #[inline] 441 | fn src_type() -> String { 442 | format!("Curry::<{}, BTreeSet>", I::src_type(),) 443 | } 444 | } 445 | 446 | impl> ToSrc for RangeMap { 447 | #[inline] 448 | fn to_src(&self) -> String { 449 | format!("RangeMap({})", self.0.to_src()) 450 | } 451 | #[inline] 452 | fn src_type() -> String { 453 | format!("RangeMap::<{}, BTreeSet>", I::src_type(),) 454 | } 455 | } 456 | 457 | impl ToSrc for BTreeMap { 458 | #[inline] 459 | fn to_src(&self) -> String { 460 | match self.len() { 461 | 0 => "BTreeMap::new()".to_owned(), 462 | 1 => format!("iter::once({}).collect()", { 463 | let (k, v) = unwrap!(self.first_key_value()); 464 | (k.clone(), v.clone()).to_src() 465 | }), 466 | _ => format!( 467 | "{}.into_iter().collect()", 468 | self.iter() 469 | .map(|(k, v)| (k.clone(), v.clone())) 470 | .collect::>() 471 | .to_src() 472 | ), 473 | } 474 | } 475 | #[inline] 476 | fn src_type() -> String { 477 | format!("BTreeMap::<{}, {}>", K::src_type(), V::src_type()) 478 | } 479 | } 480 | 481 | impl ToSrc for (A, B) { 482 | #[inline] 483 | fn to_src(&self) -> String { 484 | format!("({}, {})", self.0.to_src(), self.1.to_src()) 485 | } 486 | #[inline] 487 | fn src_type() -> String { 488 | format!("({}, {})", A::src_type(), B::src_type()) 489 | } 490 | } 491 | 492 | impl> ToSrc for Transition { 493 | #[inline] 494 | fn to_src(&self) -> String { 495 | match *self { 496 | Self::Lateral { 497 | ref dst, 498 | ref update, 499 | } => format!( 500 | "Transition::Lateral {{ dst: {}, update: {} }}", 501 | dst.to_src(), 502 | update.to_src(), 503 | ), 504 | Self::Call { 505 | region, 506 | ref detour, 507 | ref dst, 508 | ref combine, 509 | } => format!( 510 | "Transition::Call {{ region: {}, detour: {}, dst: {}, combine: {} }}", 511 | region.to_src(), 512 | detour.to_src(), 513 | dst.to_src(), 514 | combine.to_src(), 515 | ), 516 | Self::Return { region } => { 517 | format!("Transition::Return {{ region: {} }}", region.to_src()) 518 | } 519 | } 520 | } 521 | #[inline] 522 | fn src_type() -> String { 523 | format!("Transition::<{}, BTreeSet>", I::src_type(),) 524 | } 525 | } 526 | 527 | impl ToSrc for Update { 528 | #[inline] 529 | fn to_src(&self) -> String { 530 | format!("update!({})", self.src.to_src()) 531 | } 532 | #[inline] 533 | fn src_type() -> String { 534 | format!("Update::<{}>", I::src_type()) 535 | } 536 | } 537 | 538 | impl ToSrc for FF { 539 | #[inline] 540 | fn to_src(&self) -> String { 541 | format!("ff!({})", self.src) 542 | } 543 | #[inline] 544 | fn src_type() -> String { 545 | "FF".to_owned() 546 | } 547 | } 548 | -------------------------------------------------------------------------------- /automata/src/transition.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Transition in an automaton: an action and a destination state. 8 | 9 | use crate::{Ctrl, Input, InputError, Merge, ParseError, Update, FF}; 10 | use core::{cmp, iter, mem}; 11 | use std::collections::BTreeSet; 12 | 13 | // TODO: rename `Call` to `Open` and `Return` to `Close` 14 | 15 | /// Transition in an automaton: an action and a destination state. 16 | #[derive(Debug)] 17 | #[allow(clippy::exhaustive_enums)] 18 | pub enum Transition> { 19 | /// Neither push nor pop: just move to a different state. 20 | Lateral { 21 | /// Go to this state. 22 | dst: C, 23 | /// Call this Rust function to update the output we're building. 24 | update: Option>, 25 | }, 26 | /// Call another function--i.e., push a pointer/index onto the stack. 27 | Call { 28 | /// Region (user-defined name) that we're opening. Sensible to be e.g. "parentheses" for `(...)`. 29 | region: &'static str, 30 | /// Call (and require a successful run from) this state before continuing. 31 | detour: C, 32 | /// After the call has succeeded, go to this state. 33 | dst: Box, 34 | /// Combine the cached results and the results of the called parser with this function. 35 | combine: FF, 36 | }, 37 | /// Return into the function that called us. 38 | /// Note that this is NOT how we return from the overall parser: 39 | /// that happens only when input ends AND the stack is empty. 40 | Return { 41 | /// Region (user-defined name) that we're closing. Sensible to be e.g. "parentheses" for `(...)`. 42 | region: &'static str, 43 | }, 44 | } 45 | 46 | impl> Clone for Transition { 47 | #[inline] 48 | fn clone(&self) -> Self { 49 | match *self { 50 | Self::Lateral { 51 | ref dst, 52 | ref update, 53 | } => Self::Lateral { 54 | dst: dst.clone(), 55 | update: update.clone(), 56 | }, 57 | Self::Call { 58 | region, 59 | ref detour, 60 | ref dst, 61 | ref combine, 62 | } => Self::Call { 63 | region, 64 | detour: detour.clone(), 65 | dst: dst.clone(), 66 | combine: combine.clone(), 67 | }, 68 | Self::Return { region } => Self::Return { region }, 69 | } 70 | } 71 | } 72 | 73 | impl> PartialEq for Transition { 74 | #[inline] 75 | fn eq(&self, other: &Self) -> bool { 76 | self.cmp(other).is_eq() // <-- TODO: faster 77 | } 78 | } 79 | impl> Eq for Transition {} 80 | 81 | impl> Ord for Transition { 82 | #[inline] 83 | fn cmp(&self, other: &Self) -> cmp::Ordering { 84 | match (self, other) { 85 | (&Self::Return { region: l_region }, &Self::Return { region: r_region }) => { 86 | l_region.cmp(r_region) 87 | } 88 | (&Self::Return { .. }, _) => cmp::Ordering::Less, 89 | (_, &Self::Return { .. }) => cmp::Ordering::Greater, 90 | ( 91 | &Self::Lateral { 92 | dst: ref l_dst, 93 | update: ref l_update, 94 | }, 95 | &Self::Lateral { 96 | dst: ref r_dst, 97 | update: ref r_update, 98 | }, 99 | ) => l_dst.cmp(r_dst).then_with(|| l_update.cmp(r_update)), 100 | (&Self::Lateral { .. }, _) => cmp::Ordering::Less, 101 | (_, &Self::Lateral { .. }) => cmp::Ordering::Greater, 102 | ( 103 | &Self::Call { 104 | region: l_region, 105 | detour: ref l_detour, 106 | dst: ref l_dst, 107 | combine: ref l_combine, 108 | }, 109 | &Self::Call { 110 | region: r_region, 111 | detour: ref r_detour, 112 | dst: ref r_dst, 113 | combine: ref r_combine, 114 | }, 115 | ) => l_region 116 | .cmp(r_region) 117 | .then_with(|| l_detour.cmp(r_detour)) 118 | .then_with(|| l_dst.cmp(r_dst)) 119 | .then_with(|| l_combine.cmp(r_combine)), 120 | } 121 | } 122 | } 123 | 124 | impl> PartialOrd for Transition { 125 | #[inline] 126 | fn partial_cmp(&self, other: &Self) -> Option { 127 | Some(self.cmp(other)) 128 | } 129 | } 130 | 131 | impl> Transition { 132 | /// Take this transition in an actual execution. 133 | /// Return the index of the machine's state after this transition. 134 | /// # Errors 135 | /// If we try to pop from an empty stack. 136 | #[inline] 137 | pub fn invoke( 138 | &self, 139 | output_t: &str, 140 | stack: &mut Vec>, 141 | ) -> Result, ParseError> { 142 | match *self { 143 | Self::Lateral { 144 | ref dst, 145 | ref update, 146 | } => Ok(Some(( 147 | dst.clone(), 148 | if let &Some(ref u) = update { 149 | u.invoke(output_t).map_err(ParseError::BadParser)? 150 | } else { 151 | output_t.to_owned() 152 | }, 153 | ))), 154 | Self::Call { 155 | ref detour, 156 | ref dst, 157 | .. 158 | } => { 159 | stack.push(*dst.clone()); 160 | Ok(Some((detour.clone(), "()".to_owned()))) 161 | } 162 | Self::Return { .. } => { 163 | let rtn_to = stack 164 | .pop() 165 | .ok_or(ParseError::BadInput(InputError::Unopened))?; 166 | // Ok(Some((rtn_to, output_t.to_owned()))) 167 | // No longer strictly small-step semantics, 168 | // but the alternative is a nightmare 169 | rtn_to.invoke(output_t, stack) 170 | } 171 | } 172 | } 173 | 174 | /// Compute the input type of any run that reaches this state. 175 | #[inline] 176 | #[must_use] 177 | pub fn input_type(&self) -> Option<&str> { 178 | match *self { 179 | Self::Lateral { ref update, .. } => update.as_ref().map(|u| u.input_t.as_str()), 180 | Self::Call { ref combine, .. } => Some(&combine.lhs_t), 181 | Self::Return { .. } => None, 182 | } 183 | } 184 | 185 | /// Immediate next destination (as a state index). 186 | /// For local transitions, it's what you would expect. 187 | /// For calls, it's both the call and the continuation after the call. 188 | /// For returns, it's nothing. 189 | #[inline] 190 | #[must_use] 191 | pub fn dsts(&self) -> BTreeSet<&C> { 192 | match *self { 193 | Self::Lateral { ref dst, .. } => iter::once(dst).collect(), 194 | Self::Call { 195 | ref detour, 196 | ref dst, 197 | .. 198 | } => iter::once(detour).chain(dst.dsts()).collect(), 199 | Self::Return { .. } => BTreeSet::new(), 200 | } 201 | } 202 | 203 | /// Natural-language representation of the action we're taking on the stack. 204 | #[inline] 205 | #[must_use] 206 | pub const fn in_english(&self) -> &'static str { 207 | match *self { 208 | Self::Lateral { .. } => "leave the stack alone", 209 | Self::Call { .. } => "push a call onto the stack", 210 | Self::Return { .. } => "return (i.e. pop from the stack)", 211 | } 212 | } 213 | } 214 | 215 | impl Transition { 216 | /// Convert the control parameter from `usize` to anything else. 217 | #[inline] 218 | #[must_use] 219 | pub fn convert_ctrl>(self) -> Transition { 220 | match self { 221 | Self::Lateral { dst, update } => Transition::Lateral { 222 | dst: C::from_usize(dst), 223 | update, 224 | }, 225 | Self::Call { 226 | region, 227 | detour, 228 | dst, 229 | combine, 230 | } => Transition::Call { 231 | region, 232 | detour: C::from_usize(detour), 233 | dst: Box::new(dst.convert_ctrl()), 234 | combine, 235 | }, 236 | Self::Return { region } => Transition::Return { region }, 237 | } 238 | } 239 | } 240 | 241 | impl Transition> { 242 | /// Kleene-star operation: accept any number (including zero!) of repetitions of this parser. 243 | #[inline] 244 | #[allow(clippy::missing_panics_doc)] 245 | pub fn star(&mut self, init: &BTreeSet, accepting: &BTreeSet) { 246 | match *self { 247 | Transition::Lateral { ref mut dst, .. } => { 248 | if dst.iter().any(|i| accepting.contains(i)) { 249 | *dst = unwrap!(mem::take(dst).merge(init.clone())); 250 | } 251 | } 252 | Transition::Call { ref mut dst, .. } => dst.star(init, accepting), 253 | Transition::Return { .. } => {} 254 | } 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /automata/src/update.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! A single-argument Rust function callable both in `build.rs` and in a source file. 8 | 9 | use crate::{Ctrl, IllFormed, Input, ToSrc}; 10 | use core::{cmp, fmt, marker::PhantomData}; 11 | 12 | /// A single-argument Rust function callable both in `build.rs` and in a source file. 13 | #[allow(clippy::exhaustive_structs)] 14 | pub struct Update { 15 | /// Source-code representation of the input type. 16 | pub input_t: String, 17 | /// Source-code representation of the input type. 18 | pub output_t: String, 19 | /// Representation of the type of tokens. 20 | pub ghost: PhantomData, 21 | /// Source-code representation that's promised to compile to a call operationally identical to `ptr`. 22 | pub src: &'static str, 23 | } 24 | 25 | impl Update { 26 | /// Internals of the `update!` macro. 27 | #[inline] 28 | #[must_use] 29 | pub fn _update_macro(src: &'static str, _: fn(T, I) -> U) -> Self { 30 | Self { 31 | input_t: T::src_type(), 32 | output_t: U::src_type(), 33 | ghost: PhantomData, 34 | src, 35 | } 36 | } 37 | 38 | /// Check types. 39 | /// # Errors 40 | /// If the argument type doesn't match the function's expected input type. 41 | #[inline] 42 | pub fn invoke>(&self, input_t: &str) -> Result> { 43 | input_t 44 | .eq(&self.input_t) 45 | .then(|| self.output_t.clone()) 46 | .ok_or_else(|| IllFormed::TypeMismatch(input_t.to_owned(), self.input_t.clone())) 47 | } 48 | } 49 | 50 | impl PartialEq for Update { 51 | #[inline] 52 | fn eq(&self, other: &Self) -> bool { 53 | self.src == other.src 54 | } 55 | } 56 | 57 | impl Eq for Update {} 58 | 59 | impl PartialOrd for Update { 60 | #[inline] 61 | fn partial_cmp(&self, other: &Self) -> Option { 62 | Some(self.cmp(other)) 63 | } 64 | } 65 | 66 | impl Ord for Update { 67 | #[inline] 68 | fn cmp(&self, other: &Self) -> cmp::Ordering { 69 | self.src.cmp(other.src) 70 | } 71 | } 72 | 73 | impl fmt::Debug for Update { 74 | #[inline] 75 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 76 | write!(f, "update!({})", self.src) 77 | } 78 | } 79 | 80 | impl Clone for Update { 81 | #[inline] 82 | fn clone(&self) -> Self { 83 | Self { 84 | input_t: self.input_t.clone(), 85 | output_t: self.output_t.clone(), 86 | ghost: self.ghost, 87 | src: self.src, 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | //! Basic CI checks that would be a pain in the ass to write with a shell. 8 | 9 | use std::{ 10 | fs::{self, File}, 11 | io, 12 | path::Path, 13 | }; 14 | 15 | const MPL_HEADER: &[u8] = b"/*\n * This Source Code Form is subject to the terms of the Mozilla Public\n * License, v. 2.0. If a copy of the MPL was not distributed with this\n * file, You can obtain one at https://mozilla.org/MPL/2.0/.\n */\n\n"; 16 | 17 | fn main() -> io::Result<()> { 18 | check(Path::new(r"build.rs"))?; 19 | check(Path::new(r"src"))?; 20 | Ok(()) 21 | } 22 | 23 | fn check(file: &Path) -> io::Result<()> { 24 | if file.is_dir() { 25 | for f in fs::read_dir(file)? { 26 | check(&f?.path())? 27 | } 28 | Ok(()) 29 | } else { 30 | let mut read = io::BufReader::with_capacity(MPL_HEADER.len(), File::open(file)?); 31 | if io::BufRead::fill_buf(&mut read)? == MPL_HEADER { 32 | Ok(()) 33 | } else { 34 | panic!("{file:?} is missing the verbatim MPL comment (must start at the very first character, and must be followed by a newline). Please copy and paste it from any other file.") 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /ci.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -ex # `-u` set below 4 | 5 | if [ -z "${QUICKCHECK_TESTS}" ] 6 | then 7 | if [ "${GITHUB_REF##*/}" = "main" ] 8 | then 9 | export QUICKCHECK_TESTS=1000000 10 | else 11 | export QUICKCHECK_TESTS=1000 12 | fi 13 | fi 14 | 15 | set -u 16 | 17 | # Update our workbench 18 | rustup toolchain install nightly || : 19 | rustup component add clippy miri rustfmt 20 | git submodule update --init --recursive --remote 21 | 22 | # Housekeeping 23 | cargo fmt --check 24 | cargo clippy --all-targets --no-default-features 25 | cargo clippy --all-targets --all-features 26 | 27 | # Non-property tests 28 | cargo test --no-default-features 29 | cargo test --no-default-features --examples 30 | cargo test -r --no-default-features 31 | cargo test -r --no-default-features --examples 32 | 33 | # Property tests 34 | for i in $(seq 2 8) 35 | do 36 | export QUICKCHECK_GENERATOR_SIZE=$(expr ${i} '*' '(' ${i} - 1 ')') 37 | RUST_BACKTRACE=0 QUICKCHECK_TESTS=$(expr ${QUICKCHECK_TESTS} / 50) cargo test --all-features 38 | RUST_BACKTRACE=0 QUICKCHECK_TESTS=$(expr ${QUICKCHECK_TESTS} / 10) cargo test -r --all-features 39 | RUST_BACKTRACE=0 QUICKCHECK_TESTS=$(expr ${QUICKCHECK_TESTS} / 10) cargo test -r --all-features --examples 40 | done 41 | 42 | # Run examples 43 | set +e 44 | export EXAMPLES=$(cargo run --example 2>&1 | grep '^ ') 45 | set -e 46 | if [ ! -z "$EXAMPLES" ] 47 | then 48 | echo $EXAMPLES | xargs -n 1 cargo miri run --example 49 | fi 50 | 51 | # Examples that are crates themselves 52 | for dir in $(ls -A examples) 53 | do 54 | if [ -d examples/$dir ] 55 | then 56 | cd examples/$dir 57 | cargo miri run 58 | cargo miri test 59 | cargo fmt --check 60 | cd ../.. 61 | fi 62 | done 63 | 64 | # Extremely slow (but lovely) UB checks 65 | cargo miri test --no-default-features 66 | cargo miri test --no-default-features --examples 67 | cargo miri test -r --no-default-features 68 | cargo miri test -r --no-default-features --examples 69 | 70 | # Nix build status 71 | git add -A 72 | nix build 73 | 74 | # Recurse on the automata library 75 | if [ -d automata ] 76 | then 77 | cd automata 78 | ../ci.sh 79 | cd .. 80 | fi 81 | 82 | # Check for remaining `FIXME`s 83 | grep -Rnw . --exclude-dir=target --exclude-dir=.git --exclude-dir='*JSONTestSuite*' --exclude=ci.sh -e FIXME && exit 1 || : # next line checks result 84 | 85 | # Print remaining `TODO`s 86 | grep -Rnw . --exclude-dir=target --exclude-dir=.git --exclude-dir='*JSONTestSuite*' --exclude=ci.sh -e TODO || : 87 | -------------------------------------------------------------------------------- /examples/json/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "json-inator" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | types = { path = "types" } 8 | 9 | [build-dependencies] 10 | inator = { path = "../.." } 11 | types = { path = "types" } 12 | -------------------------------------------------------------------------------- /examples/json/build.rs: -------------------------------------------------------------------------------- 1 | //! Notice that the functional definition looks almost exactly like the formal spec at ! 2 | 3 | use inator::*; 4 | use std::io; 5 | 6 | type I = u8; 7 | 8 | fn main() -> Result, IllFormed> { 9 | let empty = toss(b'\x20') | toss(b'\x0A') | toss(b'\x0D') | toss(b'\x09'); 10 | 11 | let parser = empty; 12 | 13 | parser.determinize().unwrap().to_file("src/parser.rs") 14 | } 15 | -------------------------------------------------------------------------------- /examples/json/src/main.rs: -------------------------------------------------------------------------------- 1 | // mod parser; 2 | 3 | #[cfg(test)] 4 | mod test; 5 | 6 | fn main() {} 7 | -------------------------------------------------------------------------------- /examples/json/src/parser.rs: -------------------------------------------------------------------------------- 1 | //! Automatically generated with [inator](https://crates.io/crates/inator). 2 | 3 | #![allow(dead_code, unused_variables)] 4 | 5 | /// Descriptive parsing error. 6 | #[allow(dead_code)] 7 | #[derive(Clone, Debug, PartialEq)] 8 | pub enum Error { 9 | /// Token without any relevant rule. 10 | Absurd { 11 | /// Index of the token that caused this error. 12 | index: usize, 13 | /// Particular token that didn't correspond to a rule. 14 | token: u8, 15 | }, 16 | /// Token that would have closed a delimiter, but the delimiter wasn't open. 17 | Unopened { 18 | /// What was actually open, if anything, and the index of the token that opened it. 19 | what_was_open: Option<(&'static str, usize)>, 20 | /// Index of the token that caused this error. 21 | index: usize, 22 | }, 23 | /// After parsing all input, a delimiter remains open (e.g. "(a, b, c"). 24 | Unclosed { 25 | /// Region (user-defined name) that was not closed. Sensible to be e.g. "parentheses" for `(...)`. 26 | region: &'static str, 27 | /// Index at which the delimiter was opened (e.g., for parentheses, the index of the relevant '('). 28 | opened: usize, 29 | }, 30 | /// Ended on a user-defined non-accepting state. 31 | UserDefined { 32 | /// User-defined error message. 33 | messages: &'static [&'static str], 34 | }, 35 | } 36 | 37 | type R = Result<(Option<(usize, Option>)>, core::convert::Infallible), Error>; 38 | 39 | #[repr(transparent)] 40 | struct F(fn(&mut I, core::convert::Infallible) -> R); 41 | 42 | #[inline] 43 | pub fn parse>(input: I) -> Result { 44 | state_1(&mut input.into_iter().enumerate(), (), None) 45 | } 46 | 47 | #[inline] 48 | fn state_0>( 49 | input: &mut I, 50 | acc: core::convert::Infallible, 51 | stack_top: Option<(&'static str, usize)>, 52 | ) -> Result { 53 | match input.next() { 54 | None => stack_top.map_or(Ok(acc), |(region, opened)| { 55 | Err(Error::Unclosed { region, opened }) 56 | }), 57 | Some((index, token)) => match token { 58 | _ => Err(Error::Absurd { index, token }), 59 | }, 60 | } 61 | } 62 | 63 | #[inline] 64 | fn state_1>( 65 | input: &mut I, 66 | acc: core::convert::Infallible, 67 | stack_top: Option<(&'static str, usize)>, 68 | ) -> Result { 69 | match input.next() { 70 | None => Err(Error::UserDefined { messages: &["Expected only a single token on [b\' \'..=b\' \'] but got another token after it", "Expected only a single token on [b\'\\n\'..=b\'\\n\'] but got another token after it", "Expected only a single token on [b\'\\r\'..=b\'\\r\'] but got another token after it", "Expected only a single token on [b\'\\t\'..=b\'\\t\'] but got another token after it"] }), 71 | Some((index, token)) => match token { 72 | b'\t'..=b'\t' => { 73 | state_0(input, acc, stack_top) 74 | }, 75 | b'\n'..=b'\n' => { 76 | state_0(input, acc, stack_top) 77 | }, 78 | b'\r'..=b'\r' => { 79 | state_0(input, acc, stack_top) 80 | }, 81 | b' '..=b' ' => { 82 | state_0(input, acc, stack_top) 83 | }, 84 | _ => Err(Error::Absurd { index, token }) 85 | }, 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /examples/json/src/test.rs: -------------------------------------------------------------------------------- 1 | /* 2 | use crate::*; 3 | use std::{fs, path::PathBuf}; 4 | 5 | #[inline] 6 | fn all_test_cases() -> impl Iterator, bool)> { 7 | fs::read_dir("JSONTestSuite/test_parsing") 8 | .unwrap() 9 | .into_iter() 10 | .filter_map(|r| { 11 | let file = r.unwrap(); 12 | let os_name = file.file_name(); 13 | let name = os_name.to_str().unwrap(); 14 | let succeed = match &name[..2] { 15 | "y_" => true, 16 | "n_" => false, 17 | "i_" => None?, 18 | _ => unreachable!(), 19 | }; 20 | Some((file.path(), fs::read(file.path()).unwrap(), succeed)) 21 | }) 22 | } 23 | */ 24 | 25 | #[test] 26 | fn entire_test_suite() { 27 | /* 28 | for (filename, input, should_pass) in all_test_cases() { 29 | assert_eq!( 30 | crate::parser::parse(input).is_ok(), 31 | should_pass, 32 | " 33 | 34 | FILE: 35 | {} 36 | 37 | CONTENTS: 38 | ``` 39 | {} 40 | ``` 41 | ", 42 | filename.to_str().unwrap(), 43 | fs::read_to_string(filename.clone()).unwrap(), 44 | ); 45 | } 46 | */ 47 | } 48 | -------------------------------------------------------------------------------- /examples/json/types/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "types" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | inator = { path = "../../.." } 8 | -------------------------------------------------------------------------------- /examples/json/types/src/lib.rs: -------------------------------------------------------------------------------- 1 | use inator::*; 2 | 3 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 4 | pub enum Stack {} 5 | 6 | impl ToSrc for Stack { 7 | #[inline] 8 | fn to_src(&self) -> String { 9 | todo!() 10 | } 11 | #[inline] 12 | fn src_type() -> String { 13 | "types::Stack".to_owned() 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "fenix": { 4 | "inputs": { 5 | "nixpkgs": "nixpkgs", 6 | "rust-analyzer-src": "rust-analyzer-src" 7 | }, 8 | "locked": { 9 | "lastModified": 1697869218, 10 | "narHash": "sha256-6xvEGBaIFg9nruyvmznfGjWgi5uSezbLyDqgEJKLV90=", 11 | "owner": "nix-community", 12 | "repo": "fenix", 13 | "rev": "595a9eed67a4bf54dfe3e5c3299362a695fef758", 14 | "type": "github" 15 | }, 16 | "original": { 17 | "owner": "nix-community", 18 | "repo": "fenix", 19 | "type": "github" 20 | } 21 | }, 22 | "flake-utils": { 23 | "inputs": { 24 | "systems": "systems" 25 | }, 26 | "locked": { 27 | "lastModified": 1694529238, 28 | "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", 29 | "owner": "numtide", 30 | "repo": "flake-utils", 31 | "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", 32 | "type": "github" 33 | }, 34 | "original": { 35 | "owner": "numtide", 36 | "repo": "flake-utils", 37 | "type": "github" 38 | } 39 | }, 40 | "naersk": { 41 | "inputs": { 42 | "nixpkgs": "nixpkgs_2" 43 | }, 44 | "locked": { 45 | "lastModified": 1697664192, 46 | "narHash": "sha256-nRTG3rYEGFV2+putRiC96+kNXDyKaPJgT6K/1FWN7yo=", 47 | "owner": "nix-community", 48 | "repo": "naersk", 49 | "rev": "636a9b5dd7f2ad7d7c3af929ecf95e4d4fab9e97", 50 | "type": "github" 51 | }, 52 | "original": { 53 | "owner": "nix-community", 54 | "repo": "naersk", 55 | "type": "github" 56 | } 57 | }, 58 | "nixpkgs": { 59 | "locked": { 60 | "lastModified": 1699781429, 61 | "narHash": "sha256-UYefjidASiLORAjIvVsUHG6WBtRhM67kTjEY4XfZOFs=", 62 | "owner": "nixos", 63 | "repo": "nixpkgs", 64 | "rev": "e44462d6021bfe23dfb24b775cc7c390844f773d", 65 | "type": "github" 66 | }, 67 | "original": { 68 | "owner": "nixos", 69 | "ref": "nixos-unstable", 70 | "repo": "nixpkgs", 71 | "type": "github" 72 | } 73 | }, 74 | "nixpkgs_2": { 75 | "locked": { 76 | "lastModified": 1700014976, 77 | "narHash": "sha256-dSGpS2YeJrXW5aH9y7Abd235gGufY3RuZFth6vuyVtU=", 78 | "owner": "NixOS", 79 | "repo": "nixpkgs", 80 | "rev": "592047fc9e4f7b74a4dc85d1b9f5243dfe4899e3", 81 | "type": "github" 82 | }, 83 | "original": { 84 | "id": "nixpkgs", 85 | "type": "indirect" 86 | } 87 | }, 88 | "root": { 89 | "inputs": { 90 | "fenix": "fenix", 91 | "flake-utils": "flake-utils", 92 | "naersk": "naersk" 93 | } 94 | }, 95 | "rust-analyzer-src": { 96 | "flake": false, 97 | "locked": { 98 | "lastModified": 1697820143, 99 | "narHash": "sha256-wcqAeCB+pv+BzVd1VkXYZ+fyPzMy+YdxaOuLS7XHIhY=", 100 | "owner": "rust-lang", 101 | "repo": "rust-analyzer", 102 | "rev": "954fb1d673107f3de5cab9b06cb3d1a2323eb6e0", 103 | "type": "github" 104 | }, 105 | "original": { 106 | "owner": "rust-lang", 107 | "ref": "nightly", 108 | "repo": "rust-analyzer", 109 | "type": "github" 110 | } 111 | }, 112 | "systems": { 113 | "locked": { 114 | "lastModified": 1681028828, 115 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 116 | "owner": "nix-systems", 117 | "repo": "default", 118 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 119 | "type": "github" 120 | }, 121 | "original": { 122 | "owner": "nix-systems", 123 | "repo": "default", 124 | "type": "github" 125 | } 126 | } 127 | }, 128 | "root": "root", 129 | "version": 7 130 | } 131 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs = { 3 | fenix.url = "github:nix-community/fenix"; 4 | flake-utils.url = "github:numtide/flake-utils"; 5 | naersk.url = "github:nix-community/naersk"; 6 | }; 7 | outputs = { fenix, flake-utils, naersk, self }: 8 | flake-utils.lib.eachDefaultSystem (system: 9 | let 10 | name = "inator"; 11 | naersk' = naersk.lib.${system}.override { 12 | cargo = toolchain; 13 | rustc = toolchain; 14 | }; 15 | settings = { 16 | pname = "${name}"; 17 | src = ./.; 18 | }; 19 | toolchain = with fenix.packages.${system}; 20 | combine [ minimal.cargo minimal.rustc ]; 21 | in { 22 | packages = { 23 | ${name} = naersk'.buildPackage settings; 24 | default = self.packages.${system}.${name}; 25 | }; 26 | }); 27 | } 28 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly" 3 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | /////////////////////////////////////////////////////////////////////////////////////////////////// 8 | /////_________________________________________________________________________________///////////// 9 | //////_____/\\\___/\\\\\___/\\\___/\\\\\\\\\\__/\\\\\\\\\__/\\\\\\\\\\___/\\\\\\\\\\___//////////// 10 | ///////____\/\\\__\/\\\\\\__/\\\__\/\\\////\\\_\///\\\///__\/\\\////\\\__\/\\\////\\\___/////////// 11 | ////////____\/\\\__\/\\\/\\\_/\\\__\/\\\__\/\\\___\/\\\_____\/\\\__\/\\\__\/\\\__\/\\\___////////// 12 | /////////____\/\\\__\/\\\//\\\/\\\__\/\\\\\\\\\\___\/\\\_____\/\\\__\/\\\__\/\\\\\\\\/____///////// 13 | //////////____\/\\\__\/\\\\//\\\\\\__\/\\\////\\\___\/\\\_____\/\\\__\/\\\__\/\\\///\\\____//////// 14 | ///////////____\/\\\__\/\\\_\//\\\\\__\/\\\__\/\\\___\/\\\_____\/\\\\\\\\\\__\/\\\_\//\\\___/////// 15 | ////////////____\///___\///___\/////___\///___\///____\///______\//////////___\/\\\__\///____////// 16 | /////////////_________________________________________________________________________________///// 17 | /////////////////////////////////////////////////////////////////////////////////////////////////// 18 | 19 | #![doc = include_str!("../README.md")] 20 | #![deny(warnings)] 21 | #![allow(unknown_lints)] 22 | #![warn( 23 | clippy::all, 24 | clippy::missing_docs_in_private_items, 25 | clippy::nursery, 26 | clippy::pedantic, 27 | clippy::perf, 28 | clippy::restriction, 29 | clippy::cargo, 30 | elided_lifetimes_in_paths, 31 | missing_docs, 32 | rustdoc::all 33 | )] 34 | // https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html 35 | #![warn( 36 | absolute_paths_not_starting_with_crate, 37 | elided_lifetimes_in_paths, 38 | explicit_outlives_requirements, 39 | keyword_idents, 40 | let_underscore_drop, 41 | macro_use_extern_crate, 42 | meta_variable_misuse, 43 | missing_abi, 44 | missing_copy_implementations, 45 | missing_debug_implementations, 46 | missing_docs, 47 | non_ascii_idents, 48 | noop_method_call, 49 | pointer_structural_match, 50 | rust_2021_incompatible_closure_captures, 51 | rust_2021_incompatible_or_patterns, 52 | rust_2021_prefixes_incompatible_syntax, 53 | rust_2021_prelude_collisions, 54 | single_use_lifetimes, 55 | trivial_casts, 56 | trivial_numeric_casts, 57 | unreachable_pub, 58 | unsafe_code, 59 | unsafe_op_in_unsafe_fn, 60 | unstable_features, 61 | unused_crate_dependencies, 62 | unused_extern_crates, 63 | unused_import_braces, 64 | unused_lifetimes, 65 | unused_macro_rules, 66 | unused_qualifications, 67 | unused_results, 68 | unused_tuple_struct_fields, 69 | variant_size_differences 70 | )] 71 | #![allow( 72 | clippy::blanket_clippy_restriction_lints, 73 | clippy::cargo_common_metadata, 74 | clippy::expect_used, 75 | clippy::implicit_return, 76 | clippy::inline_always, 77 | clippy::let_underscore_untyped, 78 | clippy::min_ident_chars, 79 | clippy::missing_trait_methods, 80 | clippy::mod_module_files, 81 | clippy::multiple_unsafe_ops_per_block, 82 | clippy::needless_borrowed_reference, 83 | clippy::option_option, 84 | clippy::partial_pub_fields, 85 | clippy::pub_use, 86 | clippy::pub_with_shorthand, 87 | clippy::question_mark_used, 88 | clippy::redundant_pub_crate, 89 | clippy::ref_patterns, 90 | clippy::semicolon_outside_block, 91 | clippy::separated_literal_suffix, 92 | clippy::similar_names, 93 | clippy::single_call_fn, 94 | clippy::single_char_lifetime_names, 95 | clippy::std_instead_of_alloc, 96 | clippy::string_add, 97 | clippy::use_self, 98 | clippy::wildcard_imports 99 | )] 100 | 101 | /* 102 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 103 | #[cfg(any(debug_assertions, test))] 104 | macro_rules! unwrap { 105 | ($expr:expr) => { 106 | $expr.unwrap() 107 | }; 108 | } 109 | 110 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 111 | #[cfg(not(any(debug_assertions, test)))] 112 | macro_rules! unwrap { 113 | ($expr:expr) => {{ 114 | #[allow(unsafe_code, unused_unsafe)] 115 | let result = unsafe { $expr.unwrap_unchecked() }; 116 | result 117 | }}; 118 | } 119 | 120 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 121 | #[cfg(any(debug_assertions, test))] 122 | macro_rules! get { 123 | ($expr:expr, $index:expr) => { 124 | $expr.get($index).unwrap() 125 | }; 126 | } 127 | 128 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 129 | #[cfg(not(any(debug_assertions, test)))] 130 | macro_rules! get { 131 | ($expr:expr, $index:expr) => {{ 132 | #[allow(unsafe_code, unused_unsafe)] 133 | let result = unsafe { $expr.get_unchecked($index) }; 134 | result 135 | }}; 136 | } 137 | 138 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 139 | #[cfg(any(debug_assertions, test))] 140 | macro_rules! get_mut { 141 | ($expr:expr, $index:expr) => { 142 | $expr.get_mut($index).unwrap() 143 | }; 144 | } 145 | 146 | /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. 147 | #[cfg(not(any(debug_assertions, test)))] 148 | macro_rules! get_mut { 149 | ($expr:expr, $index:expr) => {{ 150 | #[allow(unsafe_code, unused_unsafe)] 151 | let result = unsafe { $expr.get_unchecked_mut($index) }; 152 | result 153 | }}; 154 | } 155 | */ 156 | 157 | // TODO: derive ToSrc 158 | 159 | #[cfg(test)] 160 | mod test; 161 | 162 | pub use inator_automata::{Deterministic as Parser, *}; 163 | 164 | use core::iter; 165 | use std::collections::{BTreeMap, BTreeSet}; 166 | 167 | #[cfg(feature = "quickcheck")] 168 | use quickcheck as _; // <-- TODO: remove if we write some implementations 169 | 170 | /// Parser that accepts only the empty string. 171 | #[inline] 172 | #[must_use] 173 | pub fn empty() -> Deterministic { 174 | Graph { 175 | states: vec![State { 176 | transitions: Curry::Scrutinize { 177 | filter: RangeMap(BTreeMap::new()), 178 | fallback: None, 179 | }, 180 | non_accepting: BTreeSet::new(), 181 | }], 182 | initial: 0, 183 | } 184 | } 185 | 186 | /// Accept exactly this range of tokens and do exactly these things. 187 | #[inline] 188 | #[must_use] 189 | pub fn on_any_of(range: Range, update: Update) -> Deterministic { 190 | Graph { 191 | states: vec![ 192 | State { 193 | transitions: Curry::Scrutinize { 194 | filter: RangeMap(BTreeMap::new()), 195 | fallback: None, 196 | }, 197 | non_accepting: BTreeSet::new(), 198 | }, 199 | State { 200 | non_accepting: iter::once(format!( 201 | "Expected only a single token on [{}..={}] but got another token after it", 202 | range.first.to_src(), 203 | range.last.to_src(), 204 | )) 205 | .collect(), 206 | transitions: Curry::Scrutinize { 207 | filter: RangeMap( 208 | iter::once(( 209 | range, 210 | Transition::Lateral { 211 | dst: 0, 212 | update: Some(update), 213 | }, 214 | )) 215 | .collect(), 216 | ), 217 | fallback: None, 218 | }, 219 | }, 220 | ], 221 | initial: 1, 222 | } 223 | } 224 | 225 | /// Accept exactly this range of tokens and forget their values. 226 | #[inline] 227 | #[must_use] 228 | pub fn any_of(range: Range) -> Deterministic { 229 | Graph { 230 | states: vec![ 231 | State { 232 | transitions: Curry::Scrutinize { 233 | filter: RangeMap(BTreeMap::new()), 234 | fallback: None, 235 | }, 236 | non_accepting: BTreeSet::new(), 237 | }, 238 | State { 239 | non_accepting: iter::once(format!( 240 | "Expected only a single token on [{}..={}] but got another token after it", 241 | range.first.to_src(), 242 | range.last.to_src(), 243 | )) 244 | .collect(), 245 | transitions: Curry::Scrutinize { 246 | filter: RangeMap( 247 | iter::once(( 248 | range, 249 | Transition::Lateral { 250 | dst: 0, 251 | update: None, 252 | }, 253 | )) 254 | .collect(), 255 | ), 256 | fallback: None, 257 | }, 258 | }, 259 | ], 260 | initial: 1, 261 | } 262 | } 263 | 264 | /// Accept exactly this token and forget its value. 265 | #[inline] 266 | #[must_use] 267 | pub fn toss(token: I) -> Deterministic { 268 | any_of(Range::unit(token)) 269 | } 270 | -------------------------------------------------------------------------------- /src/test.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 | */ 6 | 7 | #![allow( 8 | clippy::absolute_paths, 9 | clippy::arithmetic_side_effects, 10 | clippy::indexing_slicing, 11 | clippy::panic, 12 | clippy::unwrap_used 13 | )] 14 | 15 | #[cfg(feature = "quickcheck")] // <-- TODO: disable for reduced tests 16 | use crate::*; 17 | 18 | /* 19 | /// Check if we can split this input into a bunch of non-zero-sized slices 20 | /// that are all individually accepted by a given parser. 21 | #[inline] 22 | #[cfg(feature = "quickcheck")] // <-- TODO: disable for reduced tests 23 | fn sliceable>(parser: &Graph, input: &[I]) -> bool { 24 | input.is_empty() 25 | || (1..=input.len()).rev().any(|i| { 26 | parser.accept(input[..i].iter().cloned()).is_ok() && sliceable(parser, &input[i..]) 27 | }) 28 | } 29 | */ 30 | 31 | #[cfg(feature = "quickcheck")] 32 | mod prop { 33 | use super::*; 34 | use quickcheck::*; 35 | 36 | quickcheck! { 37 | fn empty_works(input: Vec) -> bool { 38 | let parser = empty::(); 39 | if parser.check().is_err() { return false; } 40 | input.is_empty() == empty().accept(input).is_ok() 41 | } 42 | 43 | fn on_any_of_works(range: Range, input: Vec) -> bool { 44 | let parser = on_any_of(range, update!(|(), _| {})); 45 | if parser.check().is_err() { return false; } 46 | parser.accept(input.iter().copied()).is_ok() == (input.len() == 1 && range.contains(&input[0])) 47 | } 48 | } 49 | } 50 | --------------------------------------------------------------------------------