├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yml │ ├── cifuzz.yml │ └── codspeed.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── assets ├── bigbuckbunny.mp4 ├── links.txt ├── nom.png ├── nom.svg ├── small.mp4 └── testfile.txt ├── benchmarks ├── Cargo.toml ├── README.md ├── benches │ ├── arithmetic.rs │ ├── http.rs │ ├── http_streaming.rs │ ├── ini.rs │ ├── ini_str.rs │ ├── json.rs │ ├── json_streaming.rs │ └── number.rs ├── canada.json └── src │ └── lib.rs ├── doc ├── archive │ ├── FAQ.md │ ├── how_nom_macros_work.md │ ├── upgrading_to_nom_1.md │ ├── upgrading_to_nom_2.md │ └── upgrading_to_nom_4.md ├── choosing_a_combinator.md ├── custom_input_types.md ├── error_management.md ├── home.md ├── making_a_new_parser_from_scratch.md ├── nom_recipes.md └── upgrading_to_nom_5.md ├── examples ├── custom_error.rs ├── iterator.rs ├── json.rs ├── json2.rs ├── json_iterator.rs ├── s_expression.rs └── string.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── fuzz_arithmetic.rs ├── nom-language ├── Cargo.toml ├── LICENSE └── src │ ├── error.rs │ ├── lib.rs │ └── precedence │ ├── mod.rs │ └── tests.rs ├── proptest-regressions ├── character │ ├── complete.txt │ └── streaming.txt └── number │ ├── complete.txt │ └── streaming.txt ├── rustfmt.toml ├── src ├── bits │ ├── complete.rs │ ├── mod.rs │ └── streaming.rs ├── branch │ ├── mod.rs │ └── tests.rs ├── bytes │ ├── complete.rs │ ├── mod.rs │ ├── streaming.rs │ └── tests.rs ├── character │ ├── complete.rs │ ├── mod.rs │ ├── streaming.rs │ └── tests.rs ├── combinator │ ├── mod.rs │ └── tests.rs ├── error.rs ├── internal.rs ├── lib.rs ├── macros.rs ├── multi │ ├── mod.rs │ └── tests.rs ├── number │ ├── complete.rs │ ├── mod.rs │ └── streaming.rs ├── sequence │ ├── mod.rs │ └── tests.rs ├── str.rs └── traits.rs └── tests ├── arithmetic.rs ├── arithmetic_ast.rs ├── css.rs ├── custom_errors.rs ├── escaped.rs ├── expression_ast.rs ├── float.rs ├── fnmut.rs ├── ini.rs ├── ini_str.rs ├── issues.rs ├── json.rs ├── mp4.rs ├── multiline.rs ├── overflow.rs └── reborrow_fold.rs /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @Geal 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Hello, and thank you for submitting an issue to nom! 2 | 3 | 4 | First, please note that, for family reasons, I have limited time to work on 5 | nom, so following the advice here will make sure I will quickly understand 6 | your problem and answer as soon as possible. 7 | Second, if I don't get to work on your issue quickly, that does not mean I 8 | don't consider it important or useful. Major version releases happen once 9 | a year, and a lot of fixes are done for the occasion, once I have had time 10 | to think of the right solution. So I will get back to you :) 11 | 12 | ## Prerequisites 13 | 14 | Here are a few things you should provide to help me understand the issue: 15 | 16 | - Rust version : `rustc -V` 17 | - nom version : 18 | - nom compilation features used: 19 | 20 | ## Test case 21 | 22 | Please provide a short, complete (with crate import, etc) test case for 23 | the issue, showing clearly the expected and obtained results. 24 | 25 | Example test case: 26 | 27 | ``` 28 | #[macro_use] 29 | extern crate nom; 30 | 31 | named!(multi<&[u8], Vec<&[u8]> >, many1!( tag!( "abcd" ) ) ); 32 | 33 | let a = b"abcdabcd"; 34 | 35 | fn main() { 36 | let res = vec![&b"abcd"[..], &b"abcd"[..]]; 37 | assert_eq!(multi(&a[..]),Ok((&b""[..], res))); // returns Err::Incomplete(Unknown)) 38 | } 39 | ``` 40 | 41 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 42 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | RUST_MINVERSION: 1.65.0 7 | CARGO_INCREMENTAL: 0 8 | CARGO_NET_RETRY: 10 9 | 10 | jobs: 11 | test: 12 | name: Test 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | matrix: 17 | rust: 18 | - stable 19 | - beta 20 | - nightly 21 | - 1.65.0 22 | 23 | features: 24 | - '' 25 | 26 | include: 27 | - rust: stable 28 | features: '' 29 | - rust: stable 30 | features: '--features "std"' 31 | - rust: stable 32 | features: '--no-default-features' 33 | - rust: stable 34 | features: '--no-default-features --features "alloc"' 35 | - rust: nightly 36 | features: '' 37 | - rust: nightly 38 | features: '--no-default-features' 39 | - rust: nightly 40 | features: '--no-default-features --features "alloc"' 41 | 42 | steps: 43 | - name: Checkout sources 44 | uses: actions/checkout@v2 45 | 46 | - name: Install rust (${{ matrix.rust }}) 47 | uses: actions-rs/toolchain@v1 48 | with: 49 | toolchain: ${{ matrix.rust }} 50 | profile: minimal 51 | override: true 52 | 53 | - name: Cache 54 | uses: Swatinem/rust-cache@v1 55 | 56 | - name: Build 57 | uses: actions-rs/cargo@v1 58 | with: 59 | command: build 60 | args: --verbose ${{ matrix.features }} 61 | 62 | - name: Test 63 | uses: actions-rs/cargo@v1 64 | with: 65 | command: test 66 | args: --verbose ${{ matrix.features }} 67 | 68 | minrust: 69 | name: Test minimal rust version 70 | runs-on: ubuntu-latest 71 | 72 | steps: 73 | - name: Checkout sources 74 | uses: actions/checkout@v2 75 | 76 | - name: Install rust (${{ env.RUST_MINVERSION }}) 77 | uses: actions-rs/toolchain@v1 78 | with: 79 | toolchain: ${{ env.RUST_MINVERSION }} 80 | profile: minimal 81 | override: true 82 | 83 | - name: Cache 84 | uses: Swatinem/rust-cache@v1 85 | 86 | - name: Build 87 | uses: actions-rs/cargo@v1 88 | with: 89 | command: build 90 | args: --verbose --no-default-features --features "alloc,std" 91 | 92 | bench: 93 | name: Bench 94 | runs-on: ubuntu-latest 95 | 96 | steps: 97 | - name: Checkout sources 98 | uses: actions/checkout@v2 99 | 100 | - name: Install rust 101 | uses: actions-rs/toolchain@v1 102 | with: 103 | toolchain: nightly 104 | profile: minimal 105 | override: true 106 | 107 | - name: Cache 108 | uses: Swatinem/rust-cache@v1 109 | 110 | - name: Compile bench 111 | uses: actions-rs/cargo@v1 112 | with: 113 | command: bench 114 | args: --verbose --no-run --features "" 115 | 116 | - name: Run bench 117 | uses: actions-rs/cargo@v1 118 | with: 119 | command: bench 120 | args: --verbose --features "" 121 | 122 | doc: 123 | name: Build documentation 124 | runs-on: ubuntu-latest 125 | 126 | steps: 127 | - name: Checkout sources 128 | uses: actions/checkout@v2 129 | 130 | - name: Install rust 131 | uses: actions-rs/toolchain@v1 132 | with: 133 | toolchain: nightly 134 | profile: minimal 135 | override: true 136 | 137 | - name: Build 138 | env: 139 | RUSTDOCFLAGS: -D warnings 140 | run: cargo doc --no-deps --document-private-items --workspace --verbose --features "std docsrs" 141 | 142 | fmt: 143 | name: Check formatting 144 | runs-on: ubuntu-latest 145 | 146 | steps: 147 | - name: Checkout sources 148 | uses: actions/checkout@v2 149 | 150 | - name: Install rust 151 | uses: actions-rs/toolchain@v1 152 | with: 153 | toolchain: stable 154 | components: rustfmt 155 | profile: minimal 156 | override: true 157 | 158 | - name: cargo fmt -- --check 159 | continue-on-error: true 160 | uses: actions-rs/cargo@v1 161 | with: 162 | command: fmt 163 | args: -- --check 164 | 165 | coverage: 166 | name: Coverage 167 | runs-on: ubuntu-latest 168 | 169 | steps: 170 | - name: Checkout sources 171 | uses: actions/checkout@v2 172 | 173 | - name: Install rust 174 | uses: actions-rs/toolchain@v1 175 | with: 176 | toolchain: stable 177 | profile: minimal 178 | override: true 179 | 180 | - name: Cache 181 | uses: Swatinem/rust-cache@v1 182 | 183 | - name: Install cargo-tarpaulin 184 | uses: actions-rs/cargo@v1 185 | with: 186 | command: install 187 | args: cargo-tarpaulin 188 | 189 | - name: Run cargo tarpaulin 190 | uses: actions-rs/cargo@v1 191 | with: 192 | command: tarpaulin 193 | args: --output-dir coverage --out xml --workspace --exclude benchmarks 194 | 195 | - name: Upload coverage reports to Codecov 196 | uses: codecov/codecov-action@v4.0.1 197 | with: 198 | token: ${{ secrets.CODECOV_TOKEN }} 199 | slug: rust-bakery/nom 200 | -------------------------------------------------------------------------------- /.github/workflows/cifuzz.yml: -------------------------------------------------------------------------------- 1 | name: CIFuzz 2 | on: [pull_request] 3 | jobs: 4 | Fuzzing: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Build Fuzzers 8 | id: build 9 | uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master 10 | with: 11 | oss-fuzz-project-name: 'nom' 12 | dry-run: false 13 | language: rust 14 | - name: Run Fuzzers 15 | uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master 16 | with: 17 | oss-fuzz-project-name: 'nom' 18 | fuzz-seconds: 300 19 | dry-run: false 20 | language: rust 21 | - name: Upload Crash 22 | uses: actions/upload-artifact@v3 23 | if: failure() && steps.build.outcome == 'success' 24 | with: 25 | name: artifacts 26 | path: ./out/artifacts 27 | -------------------------------------------------------------------------------- /.github/workflows/codspeed.yml: -------------------------------------------------------------------------------- 1 | name: codspeed-benchmarks 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | pull_request: 8 | # `workflow_dispatch` allows CodSpeed to trigger backtest 9 | # performance analysis in order to generate initial data. 10 | workflow_dispatch: 11 | 12 | jobs: 13 | benchmarks: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - name: Setup rust toolchain, cache and cargo-codspeed binary 19 | uses: moonrepo/setup-rust@v0 20 | with: 21 | channel: stable 22 | cache-target: release 23 | bins: cargo-codspeed 24 | 25 | - name: Build the benchmark target(s) 26 | run: cargo codspeed build -p benchmarks 27 | 28 | - name: Run the benchmarks 29 | uses: CodSpeedHQ/action@v3 30 | with: 31 | run: cargo codspeed run -p benchmarks 32 | token: ${{ secrets.CODSPEED_TOKEN }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/* 2 | Cargo.lock 3 | FullRecognition.jpg 4 | map.rs 5 | oldsrc/ 6 | realworld/ 7 | src/generator.rs 8 | .DS_Store 9 | private-docs/ 10 | .idea/ 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to nom 2 | 3 | Thanks a lot for contributing to this project! 4 | 5 | The following is a set of guidelines for contributing to [nom][1]. 6 | 7 | **Since the project is young**: consider those best practices prone to change. Please suggest improvements! 8 | 9 | [1]: https://github.com/rust-bakery/nom 10 | 11 | ## Basics 12 | 13 | ### License 14 | 15 | The project uses the [MIT][l1] license. By contributing to this project you agree to license 16 | your changes under this license. 17 | 18 | [l1]: https://opensource.org/license/mit/ 19 | 20 | 21 | ## What to do 22 | 23 | ### Issues 24 | 25 | There is plenty of [features missing][i1] and possibly bugs might be already there. Feel free to add new [issues][i2] 26 | and to wrangle over those already [open][i3] and help fixing them. 27 | 28 | [i1]: https://github.com/rust-bakery/nom/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement 29 | [i2]: https://github.com/rust-bakery/nom/issues 30 | [i3]: https://github.com/rust-bakery/nom/issues?q=is%3Aopen+is%3Aissue 31 | 32 | ### Code 33 | 34 | Implementing new codecs, container formats or protocols is always welcome! 35 | 36 | ### Tests 37 | 38 | It is strongly suggested to provide test along changes so the coverage stays around the **85%**, helping to 39 | get to full coverage is pretty welcome. 40 | 41 | ### Benchmark 42 | 43 | Help in making sure the code does not have performance regression, by improving the benchmark suite or just by 44 | running it weekly, is welcome as well. 45 | 46 | ### Documentation 47 | 48 | To preview changes to the documentation: use `cargo doc` with [`cargo 49 | external-doc`](https://github.com/Geal/cargo-external-doc) 50 | 51 | ## Style 52 | 53 | ### Issue style 54 | 55 | Try to write at least 3 short paragraphs describing what were you trying to achieve, what is not working and 56 | the step by step actions that lead to the unwanted outcome. 57 | 58 | If possible provide: 59 | 60 | - a code snippet or a link to a [gist][is1] showcasing the problem, if is a library usage issue. 61 | - a backtrace, if it is a crash. 62 | - a sample file, if it is a decoding or encoding issue. 63 | 64 | [is1]: https://gist.github.com/ 65 | 66 | ### Coding style 67 | 68 | The normal rust coding style is checked by [rustfmt][cs1]. 69 | Readable code is the first step on having good and safe libraries. 70 | 71 | To avoid slight differences appearing in nightly versions, please 72 | use the following command to run rustfmt: `cargo +stable fmt` 73 | 74 | [cs1]: https://github.com/rust-lang/rustfmt 75 | 76 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | 3 | name = "nom" 4 | version = "8.0.0" 5 | authors = ["contact@geoffroycouprie.com"] 6 | description = "A byte-oriented, zero-copy, parser combinators library" 7 | license = "MIT" 8 | repository = "https://github.com/rust-bakery/nom" 9 | readme = "README.md" 10 | documentation = "https://docs.rs/nom" 11 | keywords = ["parser", "parser-combinators", "parsing", "streaming", "bit"] 12 | categories = ["parsing"] 13 | edition = "2021" 14 | autoexamples = false 15 | 16 | # also update in README.md (badge and "Rust version requirements" section) 17 | rust-version = "1.65.0" 18 | 19 | include = [ 20 | "CHANGELOG.md", 21 | "LICENSE", 22 | "README.md", 23 | ".gitignore", 24 | "Cargo.toml", 25 | "src/*.rs", 26 | "src/*/*.rs", 27 | "tests/*.rs", 28 | "doc/nom_recipes.md", 29 | ] 30 | 31 | [features] 32 | alloc = [] 33 | std = ["alloc", "memchr/std"] 34 | default = ["std"] 35 | docsrs = [] 36 | 37 | [dependencies.memchr] 38 | version = "2.3" 39 | default-features = false 40 | 41 | [dev-dependencies] 42 | doc-comment = "0.3" 43 | proptest = "=1.0.0" 44 | nom-language = { path = "./nom-language" } 45 | 46 | [package.metadata.docs.rs] 47 | features = ["alloc", "std", "docsrs"] 48 | all-features = true 49 | rustdoc-args = ["--generate-link-to-definition"] 50 | 51 | [profile.bench] 52 | debug = true 53 | lto = true 54 | codegen-units = 1 55 | 56 | [[test]] 57 | name = "arithmetic" 58 | 59 | [[test]] 60 | name = "arithmetic_ast" 61 | required-features = ["alloc"] 62 | 63 | [[test]] 64 | name = "css" 65 | 66 | [[test]] 67 | name = "custom_errors" 68 | 69 | [[test]] 70 | name = "expression_ast" 71 | required-features = ["alloc"] 72 | 73 | [[test]] 74 | name = "float" 75 | 76 | [[test]] 77 | name = "ini" 78 | required-features = ["alloc"] 79 | 80 | [[test]] 81 | name = "ini_str" 82 | required-features = ["alloc"] 83 | 84 | [[test]] 85 | name = "issues" 86 | required-features = ["alloc"] 87 | 88 | [[test]] 89 | name = "json" 90 | 91 | [[test]] 92 | name = "mp4" 93 | required-features = ["alloc"] 94 | 95 | [[test]] 96 | name = "multiline" 97 | required-features = ["alloc"] 98 | 99 | [[test]] 100 | name = "overflow" 101 | 102 | [[test]] 103 | name = "reborrow_fold" 104 | 105 | [[test]] 106 | name = "fnmut" 107 | required-features = ["alloc"] 108 | 109 | [[example]] 110 | name = "custom_error" 111 | required-features = ["alloc"] 112 | path = "examples/custom_error.rs" 113 | 114 | [[example]] 115 | name = "json" 116 | required-features = ["alloc"] 117 | path = "examples/json.rs" 118 | 119 | [[example]] 120 | name = "json2" 121 | required-features = ["alloc"] 122 | path = "examples/json2.rs" 123 | 124 | [[example]] 125 | name = "json_iterator" 126 | required-features = ["alloc"] 127 | path = "examples/json_iterator.rs" 128 | 129 | [[example]] 130 | name = "iterator" 131 | path = "examples/iterator.rs" 132 | 133 | [[example]] 134 | name = "s_expression" 135 | path = "examples/s_expression.rs" 136 | required-features = ["alloc"] 137 | 138 | [[example]] 139 | name = "string" 140 | required-features = ["alloc"] 141 | path = "examples/string.rs" 142 | 143 | [badges] 144 | travis-ci = { repository = "Geal/nom" } 145 | coveralls = { repository = "Geal/nom", branch = "main", service = "github" } 146 | maintenance = { status = "actively-developed" } 147 | 148 | [workspace] 149 | members = [".", "benchmarks/", "nom-language"] 150 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2019 Geoffroy Couprie 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /assets/bigbuckbunny.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bakery/nom/a44b52ed9052a66f5eb2add9aa5b314f034dc580/assets/bigbuckbunny.mp4 -------------------------------------------------------------------------------- /assets/links.txt: -------------------------------------------------------------------------------- 1 | https://github.com/ekmett/machines 2 | https://www.fpcomplete.com/user/snoyberg/library-documentation/conduit-overview 3 | https://hackage.haskell.org/package/pipes 4 | 5 | http://en.wikipedia.org/wiki/Iteratee#cite_note-play-enumeratee-3 6 | http://okmij.org/ftp/Streams.html#design 7 | http://okmij.org/ftp/Haskell/Iteratee/Iteratee.hs 8 | http://okmij.org/ftp/Haskell/Iteratee/describe.pdf 9 | http://okmij.org/ftp/Haskell/Iteratee/IterDemo.hs 10 | http://okmij.org/ftp/Haskell/Iteratee/IterDemo1.hs 11 | http://mandubian.com/2012/08/27/understanding-play2-iteratees-for-normal-humans/ 12 | https://github.com/playframework/playframework/tree/master/framework/src/iteratees/src/main/scala/play/api/libs/Iteratee 13 | https://github.com/playframework/playframework/blob/master/framework/src/iteratees/src/main/scala/play/api/libs/iteratee/Iteratee.scala 14 | https://github.com/playframework/playframework/blob/master/framework/src/iteratees/src/main/scala/play/api/libs/iteratee/Enumerator.scala 15 | http://stackoverflow.com/questions/10177666/cant-understand-iteratee-enumerator-enumeratee-in-play-2-0 16 | http://stackoverflow.com/questions/10346592/how-to-write-an-enumeratee-to-chunk-an-enumerator-along-different-boundaries 17 | http://okmij.org/ftp/Haskell/Iteratee/Iteratee.hs 18 | http://www.mew.org/~kazu/proj/enumerator/ 19 | http://www.reddit.com/r/rust/comments/2aur8x/using_macros_to_parse_file_formats/ 20 | https://github.com/LeoTestard/rustlex 21 | https://github.com/rust-lang/rust/blob/master/src/test/run-pass/monad.rs 22 | www.reddit.com/r/programming/comments/z7lwn/rust_typeclasses_talk/ 23 | https://github.com/rust-lang/rfcs/pull/53 24 | http://apocalisp.wordpress.com/2011/10/26/tail-call-elimination-in-scala-monads/ 25 | http://www.cis.upenn.edu/~bcpierce/sf/current/toc.html 26 | http://www.reddit.com/r/rust/comments/1a57pw/haskeller_playing_with_rust_a_question_about/ 27 | 28 | -------------------------------------------------------------------------------- /assets/nom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bakery/nom/a44b52ed9052a66f5eb2add9aa5b314f034dc580/assets/nom.png -------------------------------------------------------------------------------- /assets/small.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-bakery/nom/a44b52ed9052a66f5eb2add9aa5b314f034dc580/assets/small.mp4 -------------------------------------------------------------------------------- /assets/testfile.txt: -------------------------------------------------------------------------------- 1 | abcdabcdabcdabcdabcd 2 | efgh 3 | coin coin 4 | hello 5 | blah 6 | -------------------------------------------------------------------------------- /benchmarks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "benchmarks" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | criterion = "0.5.0" 10 | jemallocator = "0.5.4" 11 | nom = { path = "../" } 12 | 13 | [lib] 14 | bench = false 15 | 16 | [[bench]] 17 | name = "arithmetic" 18 | path = "benches/arithmetic.rs" 19 | harness = false 20 | 21 | [[bench]] 22 | name = "number" 23 | path = "benches/number.rs" 24 | harness = false 25 | 26 | [[bench]] 27 | name = "http" 28 | path = "benches/http.rs" 29 | harness = false 30 | 31 | [[bench]] 32 | name = "http_streaming" 33 | path = "benches/http_streaming.rs" 34 | harness = false 35 | 36 | 37 | [[bench]] 38 | name = "ini" 39 | path = "benches/ini.rs" 40 | harness = false 41 | 42 | [[bench]] 43 | name = "ini_str" 44 | path = "benches/ini_str.rs" 45 | harness = false 46 | 47 | [[bench]] 48 | name = "json" 49 | path = "benches/json.rs" 50 | harness = false 51 | 52 | [[bench]] 53 | name = "json_streaming" 54 | path = "benches/json_streaming.rs" 55 | harness = false 56 | 57 | [dev-dependencies] 58 | codspeed-criterion-compat = "2.4.1" 59 | nom-language = { path = "../nom-language" } 60 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks for nom parsers 2 | -------------------------------------------------------------------------------- /benchmarks/benches/arithmetic.rs: -------------------------------------------------------------------------------- 1 | #[global_allocator] 2 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 3 | 4 | use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; 5 | use nom::{ 6 | branch::alt, 7 | character::complete::{char, digit1, one_of, space0}, 8 | combinator::map_res, 9 | multi::fold, 10 | sequence::{delimited, pair}, 11 | IResult, Parser, 12 | }; 13 | 14 | // Parser definition 15 | 16 | // We transform an integer string into a i64, ignoring surrounding whitespaces 17 | // We look for a digit suite, and try to convert it. 18 | // If there are no digits, we look for a parenthesized expression. 19 | fn factor(input: &[u8]) -> IResult<&[u8], i64> { 20 | delimited( 21 | space0, 22 | alt(( 23 | map_res(digit1, |digits| { 24 | unsafe { std::str::from_utf8_unchecked(digits) }.parse() 25 | }), 26 | delimited(char('('), expr, char(')')), 27 | )), 28 | space0, 29 | ) 30 | .parse(input) 31 | } 32 | 33 | // We read an initial factor and for each time we find 34 | // a * or / operator followed by another factor, we do 35 | // the math by folding everything 36 | fn term(input: &[u8]) -> IResult<&[u8], i64> { 37 | let (input, init) = factor(input)?; 38 | fold( 39 | 0.., 40 | pair(one_of("*/"), factor), 41 | move || init, 42 | |acc, (op, val)| { 43 | if op == '*' { 44 | acc * val 45 | } else { 46 | acc / val 47 | } 48 | }, 49 | ) 50 | .parse_complete(input) 51 | } 52 | 53 | fn expr(input: &[u8]) -> IResult<&[u8], i64> { 54 | let (input, init) = term(input)?; 55 | fold( 56 | 0.., 57 | pair(one_of("+-"), term), 58 | move || init, 59 | |acc, (op, val)| { 60 | if op == '+' { 61 | acc + val 62 | } else { 63 | acc - val 64 | } 65 | }, 66 | ) 67 | .parse_complete(input) 68 | } 69 | 70 | #[allow(clippy::eq_op, clippy::erasing_op)] 71 | fn arithmetic(c: &mut Criterion) { 72 | let data = b" 2*2 / ( 5 - 1) + 3 / 4 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2));"; 73 | 74 | assert_eq!( 75 | expr(data), 76 | Ok(( 77 | &b";"[..], 78 | 2 * 2 / (5 - 1) + 3 / 4 * (2 - 7 + 567 * 12 / 2) + 3 * (1 + 2 * (45 / 2)), 79 | )) 80 | ); 81 | c.bench_function("arithmetic", |b| { 82 | b.iter(|| expr(data).unwrap()); 83 | }); 84 | } 85 | 86 | criterion_group!(benches, arithmetic); 87 | criterion_main!(benches); 88 | -------------------------------------------------------------------------------- /benchmarks/benches/http.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(rustfmt, rustfmt_skip)] 2 | 3 | #[global_allocator] 4 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 5 | 6 | use codspeed_criterion_compat::*; 7 | use nom::{IResult, bytes::{tag, take_while1}, character:: char, multi::many, OutputMode, Parser, PResult, error::Error, Mode, sequence::{preceded, delimited, separated_pair, terminated, pair}, OutputM, Emit, Complete}; 8 | 9 | #[cfg_attr(rustfmt, rustfmt_skip)] 10 | #[derive(Debug)] 11 | struct Request<'a> { 12 | method: &'a [u8], 13 | uri: &'a [u8], 14 | version: &'a [u8], 15 | } 16 | 17 | #[derive(Debug)] 18 | struct Header<'a> { 19 | name: &'a [u8], 20 | value: Vec<&'a [u8]>, 21 | } 22 | 23 | #[cfg_attr(rustfmt, rustfmt_skip)] 24 | #[cfg_attr(feature = "cargo-clippy", allow(match_same_arms))] 25 | fn is_token(c: u8) -> bool { 26 | match c { 27 | 128..=255 => false, 28 | 0..=31 => false, 29 | b'(' => false, 30 | b')' => false, 31 | b'<' => false, 32 | b'>' => false, 33 | b'@' => false, 34 | b',' => false, 35 | b';' => false, 36 | b':' => false, 37 | b'\\' => false, 38 | b'"' => false, 39 | b'/' => false, 40 | b'[' => false, 41 | b']' => false, 42 | b'?' => false, 43 | b'=' => false, 44 | b'{' => false, 45 | b'}' => false, 46 | b' ' => false, 47 | _ => true, 48 | } 49 | } 50 | 51 | fn not_line_ending(c: u8) -> bool { 52 | c != b'\r' && c != b'\n' 53 | } 54 | 55 | fn is_space(c: u8) -> bool { 56 | c == b' ' 57 | } 58 | 59 | fn is_not_space(c: u8) -> bool { 60 | c != b' ' 61 | } 62 | fn is_horizontal_space(c: u8) -> bool { 63 | c == b' ' || c == b'\t' 64 | } 65 | 66 | fn is_version(c: u8) -> bool { 67 | c >= b'0' && c <= b'9' || c == b'.' 68 | } 69 | 70 | fn line_ending<'a>()-> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> { 71 | tag("\n").or(tag("\r\n")) 72 | } 73 | 74 | fn request_line<'a>()-> impl Parser<&'a[u8], Output=Request<'a>, Error=Error<&'a[u8]>> { 75 | (take_while1(is_token), preceded(take_while1(is_space), take_while1(is_not_space)), delimited(take_while1(is_space), http_version(), line_ending())) 76 | .map(|(method, uri, version)| Request {method, uri, version}) 77 | } 78 | 79 | fn http_version<'a>() -> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> { 80 | 81 | preceded(tag("HTTP/"), take_while1(is_version)) 82 | } 83 | 84 | fn message_header_value<'a>() -> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> { 85 | 86 | delimited(take_while1(is_horizontal_space), take_while1(not_line_ending), line_ending()) 87 | } 88 | 89 | fn message_header<'a>() -> impl Parser<&'a[u8], Output=Header<'a>, Error=Error<&'a[u8]> >{ 90 | separated_pair(take_while1(is_token), char(':'), many(1.., message_header_value())) 91 | .map(|(name, value)|Header{ name, value }) 92 | } 93 | 94 | fn request<'a>() -> impl Parser<&'a[u8], Output=(Request<'a>, Vec>), Error=Error<&'a[u8]> > { 95 | pair(request_line(), terminated(many(1.., message_header()), line_ending())) 96 | } 97 | 98 | 99 | fn parse(data: &[u8]) -> Option, Vec>)>> { 100 | let mut buf = &data[..]; 101 | let mut v = Vec::new(); 102 | loop { 103 | match request().process::>(buf) { 104 | Ok((b, r)) => { 105 | buf = b; 106 | v.push(r); 107 | 108 | if b.is_empty() { 109 | 110 | //println!("{}", i); 111 | break; 112 | } 113 | } 114 | Err(e) => { 115 | println!("error: {:?}", e); 116 | return None; 117 | }, 118 | } 119 | } 120 | 121 | Some(v) 122 | } 123 | 124 | /* 125 | #[bench] 126 | fn small_test(b: &mut Bencher) { 127 | let data = include_bytes!("../../http-requests.txt"); 128 | b.iter(||{ 129 | parse(data) 130 | }); 131 | } 132 | 133 | #[bench] 134 | fn bigger_test(b: &mut Bencher) { 135 | let data = include_bytes!("../../bigger.txt"); 136 | b.iter(||{ 137 | parse(data) 138 | }); 139 | } 140 | */ 141 | 142 | fn one_test(c: &mut Criterion) { 143 | let data = &b"GET / HTTP/1.1 144 | Host: www.reddit.com 145 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1 146 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 147 | Accept-Language: en-us,en;q=0.5 148 | Accept-Encoding: gzip, deflate 149 | Connection: keep-alive 150 | 151 | "[..]; 152 | 153 | let mut http_group = c.benchmark_group("http"); 154 | http_group.throughput(Throughput::Bytes(data.len() as u64)); 155 | http_group.bench_with_input( 156 | BenchmarkId::new("parse", data.len()), 157 | data, 158 | |b, data| { 159 | b.iter(|| parse(data).unwrap()); 160 | }); 161 | 162 | http_group.finish(); 163 | } 164 | 165 | /* 166 | fn main() { 167 | let mut contents: Vec = Vec::new(); 168 | 169 | { 170 | use std::io::Read; 171 | 172 | let mut file = File::open(env::args().nth(1).expect("File to read")).expect("Failed to open file"); 173 | 174 | let _ = file.read_to_end(&mut contents).unwrap(); 175 | } 176 | 177 | let buf = &contents[..]; 178 | loop { 179 | parse(buf); 180 | } 181 | } 182 | */ 183 | 184 | criterion_group!(http, one_test); 185 | criterion_main!(http); 186 | -------------------------------------------------------------------------------- /benchmarks/benches/http_streaming.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(rustfmt, rustfmt_skip)] 2 | 3 | #[global_allocator] 4 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 5 | 6 | use codspeed_criterion_compat::*; 7 | use nom::{IResult, bytes::streaming::{tag, take_while1}, character::streaming::{line_ending, char}, multi::many, Parser}; 8 | 9 | #[cfg_attr(rustfmt, rustfmt_skip)] 10 | #[derive(Debug)] 11 | struct Request<'a> { 12 | method: &'a [u8], 13 | uri: &'a [u8], 14 | version: &'a [u8], 15 | } 16 | 17 | #[derive(Debug)] 18 | struct Header<'a> { 19 | name: &'a [u8], 20 | value: Vec<&'a [u8]>, 21 | } 22 | 23 | #[cfg_attr(rustfmt, rustfmt_skip)] 24 | #[cfg_attr(feature = "cargo-clippy", allow(match_same_arms))] 25 | fn is_token(c: u8) -> bool { 26 | match c { 27 | 128..=255 => false, 28 | 0..=31 => false, 29 | b'(' => false, 30 | b')' => false, 31 | b'<' => false, 32 | b'>' => false, 33 | b'@' => false, 34 | b',' => false, 35 | b';' => false, 36 | b':' => false, 37 | b'\\' => false, 38 | b'"' => false, 39 | b'/' => false, 40 | b'[' => false, 41 | b']' => false, 42 | b'?' => false, 43 | b'=' => false, 44 | b'{' => false, 45 | b'}' => false, 46 | b' ' => false, 47 | _ => true, 48 | } 49 | } 50 | 51 | fn not_line_ending(c: u8) -> bool { 52 | c != b'\r' && c != b'\n' 53 | } 54 | 55 | fn is_space(c: u8) -> bool { 56 | c == b' ' 57 | } 58 | 59 | fn is_not_space(c: u8) -> bool { 60 | c != b' ' 61 | } 62 | fn is_horizontal_space(c: u8) -> bool { 63 | c == b' ' || c == b'\t' 64 | } 65 | 66 | fn is_version(c: u8) -> bool { 67 | c >= b'0' && c <= b'9' || c == b'.' 68 | } 69 | 70 | fn request_line(input: &[u8]) -> IResult<&[u8], Request<'_>> { 71 | let (input, method) = take_while1(is_token)(input)?; 72 | let (input, _) = take_while1(is_space)(input)?; 73 | let (input, uri) = take_while1(is_not_space)(input)?; 74 | let (input, _) = take_while1(is_space)(input)?; 75 | let (input, version) = http_version(input)?; 76 | let (input, _) = line_ending(input)?; 77 | 78 | Ok((input, Request {method, uri, version})) 79 | } 80 | 81 | fn http_version(input: &[u8]) -> IResult<&[u8], &[u8]> { 82 | let (input, _) = tag("HTTP/")(input)?; 83 | let (input, version) = take_while1(is_version)(input)?; 84 | 85 | Ok((input, version)) 86 | } 87 | 88 | fn message_header_value(input: &[u8]) -> IResult<&[u8], &[u8]> { 89 | let (input, _) = take_while1(is_horizontal_space)(input)?; 90 | let (input, data) = take_while1(not_line_ending)(input)?; 91 | let (input, _) = line_ending(input)?; 92 | 93 | Ok((input, data)) 94 | } 95 | 96 | fn message_header(input: &[u8]) -> IResult<&[u8], Header<'_>> { 97 | let (input, name) = take_while1(is_token)(input)?; 98 | let (input, _) = char(':')(input)?; 99 | let (input, value) = many(1.., message_header_value).parse(input)?; 100 | 101 | Ok((input, Header{ name, value })) 102 | } 103 | 104 | fn request(input: &[u8]) -> IResult<&[u8], (Request<'_>, Vec>)> { 105 | let (input, req) = request_line(input)?; 106 | let (input, h) = many(1.., message_header).parse(input)?; 107 | let (input, _) = line_ending(input)?; 108 | 109 | Ok((input, (req, h))) 110 | } 111 | 112 | 113 | fn parse(data: &[u8]) -> Option, Vec>)>> { 114 | let mut buf = &data[..]; 115 | let mut v = Vec::new(); 116 | loop { 117 | match request(buf) { 118 | Ok((b, r)) => { 119 | buf = b; 120 | v.push(r); 121 | 122 | if b.is_empty() { 123 | 124 | //println!("{}", i); 125 | break; 126 | } 127 | } 128 | Err(e) => { 129 | println!("error: {:?}", e); 130 | return None; 131 | }, 132 | } 133 | } 134 | 135 | Some(v) 136 | } 137 | 138 | /* 139 | #[bench] 140 | fn small_test(b: &mut Bencher) { 141 | let data = include_bytes!("../../http-requests.txt"); 142 | b.iter(||{ 143 | parse(data) 144 | }); 145 | } 146 | 147 | #[bench] 148 | fn bigger_test(b: &mut Bencher) { 149 | let data = include_bytes!("../../bigger.txt"); 150 | b.iter(||{ 151 | parse(data) 152 | }); 153 | } 154 | */ 155 | 156 | fn one_test(c: &mut Criterion) { 157 | let data = &b"GET / HTTP/1.1 158 | Host: www.reddit.com 159 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:15.0) Gecko/20100101 Firefox/15.0.1 160 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 161 | Accept-Language: en-us,en;q=0.5 162 | Accept-Encoding: gzip, deflate 163 | Connection: keep-alive 164 | 165 | "[..]; 166 | 167 | let mut http_group = c.benchmark_group("http"); 168 | http_group.throughput(Throughput::Bytes(data.len() as u64)); 169 | http_group.bench_with_input( 170 | BenchmarkId::new("parse_streaming", data.len()), 171 | data, 172 | |b, data| { 173 | b.iter(|| parse(data).unwrap()); 174 | }); 175 | 176 | http_group.finish(); 177 | } 178 | 179 | /* 180 | fn main() { 181 | let mut contents: Vec = Vec::new(); 182 | 183 | { 184 | use std::io::Read; 185 | 186 | let mut file = File::open(env::args().nth(1).expect("File to read")).expect("Failed to open file"); 187 | 188 | let _ = file.read_to_end(&mut contents).unwrap(); 189 | } 190 | 191 | let buf = &contents[..]; 192 | loop { 193 | parse(buf); 194 | } 195 | } 196 | */ 197 | 198 | criterion_group!(http_streaming, one_test); 199 | criterion_main!(http_streaming); 200 | -------------------------------------------------------------------------------- /benchmarks/benches/ini.rs: -------------------------------------------------------------------------------- 1 | #[global_allocator] 2 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 3 | 4 | use codspeed_criterion_compat::*; 5 | 6 | use nom::{ 7 | bytes::complete::take_while, 8 | character::complete::{ 9 | alphanumeric1 as alphanumeric, char, multispace1 as multispace, space1 as space, 10 | }, 11 | combinator::{map_res, opt}, 12 | multi::many, 13 | sequence::{delimited, pair, separated_pair, terminated, tuple}, 14 | IResult, Parser, 15 | }; 16 | use std::collections::HashMap; 17 | use std::str; 18 | 19 | fn category(i: &[u8]) -> IResult<&[u8], &str> { 20 | map_res( 21 | delimited(char('['), take_while(|c| c != b']'), char(']')), 22 | str::from_utf8, 23 | ) 24 | .parse_complete(i) 25 | } 26 | 27 | fn key_value(i: &[u8]) -> IResult<&[u8], (&str, &str)> { 28 | let (i, key) = map_res(alphanumeric, str::from_utf8).parse_complete(i)?; 29 | let (i, _) = tuple((opt(space), char('='), opt(space))).parse_complete(i)?; 30 | let (i, val) = 31 | map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8).parse_complete(i)?; 32 | let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n'))).parse_complete(i)?; 33 | Ok((i, (key, val))) 34 | } 35 | 36 | fn categories(i: &[u8]) -> IResult<&[u8], HashMap<&str, HashMap<&str, &str>>> { 37 | many( 38 | 0.., 39 | separated_pair( 40 | category, 41 | opt(multispace), 42 | many(0.., terminated(key_value, opt(multispace))), 43 | ), 44 | ) 45 | .parse_complete(i) 46 | } 47 | 48 | fn bench_ini(c: &mut Criterion) { 49 | let str = "[owner] 50 | name=John Doe 51 | organization=Acme Widgets Inc. 52 | 53 | [database] 54 | server=192.0.2.62 55 | port=143 56 | file=payroll.dat 57 | \0"; 58 | 59 | let mut group = c.benchmark_group("ini"); 60 | group.throughput(Throughput::Bytes(str.len() as u64)); 61 | group.bench_function(BenchmarkId::new("parse", str.len()), |b| { 62 | b.iter(|| categories(str.as_bytes()).unwrap()) 63 | }); 64 | } 65 | 66 | fn bench_ini_keys_and_values(c: &mut Criterion) { 67 | let str = "server=192.0.2.62 68 | port=143 69 | file=payroll.dat 70 | \0"; 71 | 72 | fn acc(i: &[u8]) -> IResult<&[u8], Vec<(&str, &str)>> { 73 | many(0.., key_value).parse_complete(i) 74 | } 75 | 76 | let mut group = c.benchmark_group("ini keys and values"); 77 | group.throughput(Throughput::Bytes(str.len() as u64)); 78 | group.bench_function(BenchmarkId::new("parse", str.len()), |b| { 79 | b.iter(|| acc(str.as_bytes()).unwrap()) 80 | }); 81 | } 82 | 83 | fn bench_ini_key_value(c: &mut Criterion) { 84 | let str = "server=192.0.2.62\n"; 85 | 86 | let mut group = c.benchmark_group("ini key value"); 87 | group.throughput(Throughput::Bytes(str.len() as u64)); 88 | group.bench_function(BenchmarkId::new("parse", str.len()), |b| { 89 | b.iter(|| key_value(str.as_bytes()).unwrap()) 90 | }); 91 | } 92 | 93 | criterion_group!( 94 | benches, 95 | bench_ini, 96 | bench_ini_keys_and_values, 97 | bench_ini_key_value 98 | ); 99 | criterion_main!(benches); 100 | -------------------------------------------------------------------------------- /benchmarks/benches/ini_str.rs: -------------------------------------------------------------------------------- 1 | #[global_allocator] 2 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 3 | 4 | use codspeed_criterion_compat::*; 5 | 6 | use nom::{ 7 | bytes::complete::{is_a, tag, take_till, take_while}, 8 | character::complete::{alphanumeric1 as alphanumeric, char, not_line_ending, space0 as space}, 9 | combinator::opt, 10 | multi::many, 11 | sequence::{delimited, pair, terminated, tuple}, 12 | IResult, Parser, 13 | }; 14 | 15 | use std::collections::HashMap; 16 | 17 | fn is_line_ending_or_comment(chr: char) -> bool { 18 | chr == ';' || chr == '\n' 19 | } 20 | 21 | fn space_or_line_ending(i: &str) -> IResult<&str, &str> { 22 | is_a(" \r\n")(i) 23 | } 24 | 25 | fn category(i: &str) -> IResult<&str, &str> { 26 | terminated( 27 | delimited(char('['), take_while(|c| c != ']'), char(']')), 28 | opt(is_a(" \r\n")), 29 | ) 30 | .parse(i) 31 | } 32 | 33 | fn key_value(i: &str) -> IResult<&str, (&str, &str)> { 34 | let (i, key) = alphanumeric(i)?; 35 | let (i, _) = tuple((opt(space), tag("="), opt(space)))(i)?; 36 | let (i, val) = take_till(is_line_ending_or_comment)(i)?; 37 | let (i, _) = opt(space).parse_complete(i)?; 38 | let (i, _) = opt(pair(tag(";"), not_line_ending)).parse_complete(i)?; 39 | let (i, _) = opt(space_or_line_ending).parse_complete(i)?; 40 | Ok((i, (key, val))) 41 | } 42 | 43 | fn keys_and_values(input: &str) -> IResult<&str, HashMap<&str, &str>> { 44 | many(0.., key_value).parse_complete(input) 45 | } 46 | 47 | fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> { 48 | pair(category, keys_and_values).parse_complete(i) 49 | } 50 | 51 | fn categories(input: &str) -> IResult<&str, HashMap<&str, HashMap<&str, &str>>> { 52 | many(0.., category_and_keys).parse_complete(input) 53 | } 54 | 55 | fn bench_ini_str(c: &mut Criterion) { 56 | let s = "[owner] 57 | name=John Doe 58 | organization=Acme Widgets Inc. 59 | 60 | [database] 61 | server=192.0.2.62 62 | port=143 63 | file=payroll.dat 64 | "; 65 | 66 | let mut group = c.benchmark_group("ini str"); 67 | group.throughput(Throughput::Bytes(s.len() as u64)); 68 | group.bench_function(BenchmarkId::new("parse", s.len()), |b| { 69 | b.iter(|| categories(s).unwrap()) 70 | }); 71 | } 72 | 73 | criterion_group!(benches, bench_ini_str); 74 | criterion_main!(benches); 75 | -------------------------------------------------------------------------------- /benchmarks/benches/json_streaming.rs: -------------------------------------------------------------------------------- 1 | #[global_allocator] 2 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 3 | 4 | use codspeed_criterion_compat::*; 5 | use nom::{ 6 | branch::alt, 7 | bytes::streaming::{tag, take}, 8 | character::streaming::{anychar, char, multispace0, none_of}, 9 | combinator::{map, map_opt, map_res, value, verify}, 10 | error::{ErrorKind, ParseError}, 11 | multi::{fold, separated_list0}, 12 | number::streaming::{double, recognize_float}, 13 | sequence::{delimited, preceded, separated_pair}, 14 | IResult, Parser, 15 | }; 16 | 17 | use std::collections::HashMap; 18 | 19 | #[derive(Debug, PartialEq, Clone)] 20 | pub enum JsonValue { 21 | Null, 22 | Bool(bool), 23 | Str(String), 24 | Num(f64), 25 | Array(Vec), 26 | Object(HashMap), 27 | } 28 | 29 | fn boolean(input: &str) -> IResult<&str, bool> { 30 | alt((value(false, tag("false")), value(true, tag("true")))).parse(input) 31 | } 32 | 33 | fn u16_hex(input: &str) -> IResult<&str, u16> { 34 | map_res(take(4usize), |s| u16::from_str_radix(s, 16)).parse(input) 35 | } 36 | 37 | fn unicode_escape(input: &str) -> IResult<&str, char> { 38 | map_opt( 39 | alt(( 40 | // Not a surrogate 41 | map(verify(u16_hex, |cp| !(0xD800..0xE000).contains(cp)), |cp| { 42 | cp as u32 43 | }), 44 | // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details 45 | map( 46 | verify( 47 | separated_pair(u16_hex, tag("\\u"), u16_hex), 48 | |(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low), 49 | ), 50 | |(high, low)| { 51 | let high_ten = (high as u32) - 0xD800; 52 | let low_ten = (low as u32) - 0xDC00; 53 | (high_ten << 10) + low_ten + 0x10000 54 | }, 55 | ), 56 | )), 57 | // Could probably be replaced with .unwrap() or _unchecked due to the verify checks 58 | std::char::from_u32, 59 | ) 60 | .parse(input) 61 | } 62 | 63 | fn character(input: &str) -> IResult<&str, char> { 64 | let (input, c) = none_of("\"")(input)?; 65 | if c == '\\' { 66 | alt(( 67 | map_res(anychar, |c| { 68 | Ok(match c { 69 | '"' | '\\' | '/' => c, 70 | 'b' => '\x08', 71 | 'f' => '\x0C', 72 | 'n' => '\n', 73 | 'r' => '\r', 74 | 't' => '\t', 75 | _ => return Err(()), 76 | }) 77 | }), 78 | preceded(char('u'), unicode_escape), 79 | )) 80 | .parse(input) 81 | } else { 82 | Ok((input, c)) 83 | } 84 | } 85 | 86 | fn string(input: &str) -> IResult<&str, String> { 87 | delimited( 88 | char('"'), 89 | fold(0.., character, String::new, |mut string, c| { 90 | string.push(c); 91 | string 92 | }), 93 | char('"'), 94 | ) 95 | .parse(input) 96 | } 97 | 98 | fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, Output = O, Error = E>>( 99 | f: F, 100 | ) -> impl Parser<&'a str, Output = O, Error = E> { 101 | delimited(multispace0, f, multispace0) 102 | } 103 | 104 | fn array(input: &str) -> IResult<&str, Vec> { 105 | delimited( 106 | char('['), 107 | ws(separated_list0(ws(char(',')), json_value)), 108 | char(']'), 109 | ) 110 | .parse(input) 111 | } 112 | 113 | fn object(input: &str) -> IResult<&str, HashMap> { 114 | map( 115 | delimited( 116 | char('{'), 117 | ws(separated_list0( 118 | ws(char(',')), 119 | separated_pair(string, ws(char(':')), json_value), 120 | )), 121 | char('}'), 122 | ), 123 | |key_values| key_values.into_iter().collect(), 124 | ) 125 | .parse(input) 126 | } 127 | 128 | fn json_value(input: &str) -> IResult<&str, JsonValue> { 129 | use JsonValue::*; 130 | 131 | alt(( 132 | value(Null, tag("null")), 133 | map(boolean, Bool), 134 | map(string, Str), 135 | map(double, Num), 136 | map(array, Array), 137 | map(object, Object), 138 | )) 139 | .parse(input) 140 | } 141 | 142 | fn json(input: &str) -> IResult<&str, JsonValue> { 143 | ws(json_value).parse(input) 144 | } 145 | 146 | fn json_bench(c: &mut Criterion) { 147 | let data = " { \"a\"\t: 42, 148 | \"b\": [ \"x\", \"y\", 12 ,\"\\u2014\", \"\\uD83D\\uDE10\"] , 149 | \"c\": { \"hello\" : \"world\" 150 | } 151 | } ;"; 152 | 153 | // println!("data:\n{:?}", json(data)); 154 | c.bench_function("json streaming", |b| { 155 | b.iter(|| json(data).unwrap()); 156 | }); 157 | } 158 | 159 | fn recognize_float_bytes(c: &mut Criterion) { 160 | println!( 161 | "recognize_float_bytes result: {:?}", 162 | recognize_float::<_, (_, ErrorKind)>(&b"-1.234E-12;"[..]) 163 | ); 164 | c.bench_function("recognize float bytes streaming", |b| { 165 | b.iter(|| recognize_float::<_, (_, ErrorKind)>(&b"-1.234E-12;"[..])); 166 | }); 167 | } 168 | 169 | fn recognize_float_str(c: &mut Criterion) { 170 | println!( 171 | "recognize_float_str result: {:?}", 172 | recognize_float::<_, (_, ErrorKind)>("-1.234E-12;") 173 | ); 174 | c.bench_function("recognize float str streaming", |b| { 175 | b.iter(|| recognize_float::<_, (_, ErrorKind)>("-1.234E-12;")); 176 | }); 177 | } 178 | 179 | fn float_bytes(c: &mut Criterion) { 180 | println!( 181 | "float_bytes result: {:?}", 182 | double::<_, (_, ErrorKind)>(&b"-1.234E-12;"[..]) 183 | ); 184 | c.bench_function("float bytes streaming", |b| { 185 | b.iter(|| double::<_, (_, ErrorKind)>(&b"-1.234E-12"[..])); 186 | }); 187 | } 188 | 189 | fn float_str(c: &mut Criterion) { 190 | println!( 191 | "float_str result: {:?}", 192 | double::<_, (_, ErrorKind)>("-1.234E-12;") 193 | ); 194 | c.bench_function("float str streaming", |b| { 195 | b.iter(|| double::<_, (_, ErrorKind)>("-1.234E-12;")); 196 | }); 197 | } 198 | 199 | use nom::Err; 200 | use nom::ParseTo; 201 | fn std_float(input: &[u8]) -> IResult<&[u8], f64, (&[u8], ErrorKind)> { 202 | match recognize_float(input) { 203 | Err(e) => Err(e), 204 | Ok((i, s)) => match s.parse_to() { 205 | Some(n) => Ok((i, n)), 206 | None => Err(Err::Error((i, ErrorKind::Float))), 207 | }, 208 | } 209 | } 210 | 211 | fn std_float_bytes(c: &mut Criterion) { 212 | println!( 213 | "std_float_bytes result: {:?}", 214 | std_float(&b"-1.234E-12;"[..]) 215 | ); 216 | c.bench_function("std_float bytes streaming", |b| { 217 | b.iter(|| std_float(&b"-1.234E-12;"[..])); 218 | }); 219 | } 220 | 221 | criterion_group!( 222 | benches, 223 | json_bench, 224 | recognize_float_bytes, 225 | recognize_float_str, 226 | float_bytes, 227 | std_float_bytes, 228 | float_str 229 | ); 230 | criterion_main!(benches); 231 | -------------------------------------------------------------------------------- /benchmarks/benches/number.rs: -------------------------------------------------------------------------------- 1 | #[global_allocator] 2 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 3 | 4 | use codspeed_criterion_compat::*; 5 | use nom::number::complete; 6 | 7 | fn parser(i: &[u8]) -> nom::IResult<&[u8], u64> { 8 | complete::be_u64(i) 9 | } 10 | 11 | fn number(c: &mut Criterion) { 12 | let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; 13 | 14 | parser(&data[..]).expect("should parse correctly"); 15 | c.bench_function("number", move |b| { 16 | b.iter(|| parser(&data[..]).unwrap()); 17 | }); 18 | } 19 | 20 | criterion_group!(benches, number); 21 | criterion_main!(benches); 22 | -------------------------------------------------------------------------------- /benchmarks/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tests { 3 | #[test] 4 | fn it_works() { 5 | let result = 2 + 2; 6 | assert_eq!(result, 4); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /doc/archive/FAQ.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ### Using nightly to get better error messages 4 | 5 | **warning**: this only applies to nom 3. nom 4 uses the 6 | [compile_error](https://doc.rust-lang.org/std/macro.compile_error.html) macro 7 | available since Rust 1.20 8 | 9 | If you got the following error when compiling your nom parser: 10 | 11 | ``` 12 | error[E0425]: cannot find value `INVALID_NOM_SYNTAX_PLEASE_SEE_FAQ` in this scope 13 | --> src/lib.rs:111:7 14 | | 15 | 111 | INVALID_NOM_SYNTAX_PLEASE_SEE_FAQ //https://github.com/Geal/nom/blob/main/doc/archive/FAQ.md#using-nightly-to-get-better-error-messages 16 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ not found in this scope 17 | ``` 18 | 19 | It means that you are using Rust stable, and that one of your nom parsers has an invalid syntax. 20 | If you can switch to a nightly Rust compiler (as an example, with `rustup default nightly`), 21 | and if you activate the `nightly` feature on your nom dependency like this: 22 | 23 | ```toml 24 | [dependencies.nom] 25 | version = "^3" 26 | features = ["nightly"] 27 | ``` 28 | 29 | You can get more helpful error messages, such as this one: 30 | 31 | ``` 32 | $ cargo test --features nightly 33 | Compiling compiler_error v0.1.1 34 | Compiling nom v3.0.0 (file:///Users/geal/dev/rust/projects/nom) 35 | error: "do_parse is missing the return value. A do_parse call must end 36 | with a return value between parenthesis, as follows: 37 | 38 | do_parse!( 39 | a: tag!(\"abcd\") >> 40 | b: tag!(\"efgh\") >> 41 | 42 | ( Value { a: a, b: b } ) 43 | " 44 | --> src/sequence.rs:368:5 45 | | 46 | 368 | / compiler_error!("do_parse is missing the return value. A do_parse call must end 47 | 369 | | with a return value between parenthesis, as follows: 48 | 370 | | 49 | 371 | | do_parse!( 50 | ... | 51 | 375 | | ( Value { a: a, b: b } ) 52 | 376 | | "); 53 | | |______^ 54 | ... 55 | 851 | / named!(no_compiler, 56 | 852 | | do_parse!( 57 | 853 | | length: be_u8 >> 58 | 854 | | bytes: take!(length) 59 | 855 | | ) 60 | 856 | | ); 61 | | |___- in this macro invocation 62 | 63 | error: aborting due to previous error(s) 64 | 65 | error: Could not compile `nom`. 66 | ``` 67 | 68 | If the error message is not helpful, please reach out on the [Gitter chat](https://gitter.im/Geal/nom) or the IRC channel (#nom on freenode), and show 69 | your code and the error message you got. 70 | 71 | ### nom 1.0 does not compile on Rust older than 1.4 72 | 73 | Typically, the error would look like this: 74 | 75 | ```ignore 76 | src/stream.rs:74:44: 74:64 error: the parameter type `E` may not live long enough [E0309] 77 | src/stream.rs:74 if let &ConsumerState::Done(_,ref o) = self.apply(consumer) { 78 | ^~~~~~~~~~~~~~~~~~~~ 79 | note: in expansion of if let expansion 80 | src/stream.rs:74:5: 78:6 note: expansion site 81 | src/stream.rs:74:44: 74:64 help: run `rustc --explain E0309` to see a detailed explanation 82 | src/stream.rs:74:44: 74:64 help: consider adding an explicit lifetime bound `E: 'b`... 83 | src/stream.rs:74:44: 74:64 note: ...so that the reference type `&stream::ConsumerState` does not outlive the data it points at 84 | src/stream.rs:74 if let &ConsumerState::Done(_,ref o) = self.apply(consumer) { 85 | ^~~~~~~~~~~~~~~~~~~~ 86 | note: in expansion of if let expansion 87 | src/stream.rs:74:5: 78:6 note: expansion site 88 | src/stream.rs:74:44: 74:64 error: the parameter type `M` may not live long enough [E0309] 89 | src/stream.rs:74 if let &ConsumerState::Done(_,ref o) = self.apply(consumer) { 90 | ^~~~~~~~~~~~~~~~~~~~ 91 | note: in expansion of if let expansion 92 | src/stream.rs:74:5: 78:6 note: expansion site 93 | src/stream.rs:74:44: 74:64 help: run `rustc --explain E0309` to see a detailed explanation 94 | src/stream.rs:74:44: 74:64 help: consider adding an explicit lifetime bound `M: 'b`... 95 | src/stream.rs:74:44: 74:64 note: ...so that the reference type `&stream::ConsumerState` does not outlive the data it points at 96 | src/stream.rs:74 if let &ConsumerState::Done(_,ref o) = self.apply(consumer) { 97 | ^~~~~~~~~~~~~~~~~~~~ 98 | note: in expansion of if let expansion 99 | src/stream.rs:74:5: 78:6 note: expansion site 100 | error: aborting due to 2 previous errors 101 | 102 | Could not compile `nom`. 103 | ``` 104 | 105 | This is caused by some lifetime issues that may be fixed in a future version of nom. In the meantime, you can add `default-features=false` to nom's declaration in `Cargo.toml` to deactivate this part of the code: 106 | 107 | ```toml 108 | [dependencies.nom] 109 | version = "~1.0.0" 110 | default-features = false 111 | ``` 112 | 113 | ### The compiler indicates `error: expected an item keyword` then points to the function's return type in `named!`: 114 | 115 | ```ignore 116 | error: expected an item keyword 117 | named!(multi>, many0!( map_res!(tag!( "abcd" ), str::from_utf8) ) ); 118 | ^~~ 119 | ``` 120 | 121 | This happens because the macro processor mistakes `>>` for an operator. It will work correctly by adding a space, like this: `named!(multi< Vec<&str> >, ...` 122 | -------------------------------------------------------------------------------- /doc/archive/how_nom_macros_work.md: -------------------------------------------------------------------------------- 1 | # How nom macros work 2 | 3 | **NOTE: macros were removed in nom 7. You should now use the function based combinators** 4 | 5 | nom uses Rust macros heavily to provide a nice syntax and generate parsing code. 6 | This has multiple advantages: 7 | 8 | * It gives the appearance of combining functions without the runtime cost of closures 9 | * It helps Rust's code inference and borrow checking (less lifetime issues than iterator based solutions) 10 | * The generated code is very linear, just a large chain of pattern matching 11 | 12 | As a prerequisite, if you need more information on macros, please refer to 13 | [the little book of Rust macros](https://danielkeep.github.io/tlborm/book/README.html) 14 | and the [Macromancy talk](https://www.youtube.com/watch?v=8rodUyaGkQo) 15 | 16 | # Defining a new macro 17 | 18 | Let's take the `opt!` macro as example: `opt!` returns `IResult>`, 19 | producing a `Some(o)` if the child parser succeeded, and None otherwise. Here 20 | is how you could use it: 21 | 22 | ```rust 23 | named!(opt_tag>, opt!(digit)); 24 | ``` 25 | 26 | And here is how it is defined: 27 | 28 | ```rust 29 | #[macro_export] 30 | macro_rules! opt( 31 | ($i:expr, $submac:ident!( $($args:tt)* )) => ({ 32 | match $submac!($i, $($args)*) { 33 | Ok((i,o)) => Ok((i, Some(o))), 34 | Err(Err::Error(_)) => Ok(($i, None)), 35 | Err(e) => Err(e), 36 | } 37 | }); 38 | ($i:expr, $f:expr) => ( 39 | opt!($i, call!($f)); 40 | ); 41 | ); 42 | ``` 43 | 44 | To define a Rust macro, you indicate the name of the macro, then each pattern it 45 | is meant to apply to: 46 | 47 | ```rust 48 | macro_rules! my_macro ( 49 | () => ( ); 50 | () => ( ); 51 | ); 52 | ``` 53 | 54 | ## Passing input 55 | 56 | The first thing you can see in `opt!` is that the pattern have an additional 57 | parameter that you do not use: 58 | 59 | ```rust 60 | ($i:expr, $f:expr) 61 | ``` 62 | 63 | While you call: 64 | 65 | ```rust 66 | opt!(digit) 67 | ``` 68 | 69 | This is the first trick of nom macros: the first parameter, usually `$i` or `$input`, 70 | is the input data, passed by the parent parser. The expression using `named!` will 71 | translate like this: 72 | 73 | ```rust 74 | named!(opt_tag>, opt!(digit)); 75 | ``` 76 | 77 | to 78 | 79 | ```rust 80 | fn opt_tag(input:&[u8]) -> IResult<&[u8], Option<&[u8]>> { 81 | opt!(input, digit) 82 | } 83 | ``` 84 | 85 | This is how combinators hide all the plumbing: they receive the input automatically 86 | from the parent parser, may use that input, and pass the remaining input to the child 87 | parser. 88 | 89 | When you have multiple submacros, such as this example, the input is always passed 90 | to the first, top level combinator: 91 | 92 | ```rust 93 | macro_rules! multispaced ( 94 | ($i:expr, $submac:ident!( $($args:tt)* )) => ( 95 | delimited!($i, opt!(multispace), $submac!($($args)*), opt!(multispace)); 96 | ); 97 | ($i:expr, $f:expr) => ( 98 | multispaced!($i, call!($f)); 99 | ); 100 | ); 101 | ``` 102 | 103 | Here, `delimited!` will apply `opt!(multispace)` on the input, and if successful, 104 | will apply `$submac!($($args)*)` on the remaining input, and if successful, store 105 | the output and apply `opt!(multispace)` on the remaining input. 106 | 107 | ## Applying on macros or functions 108 | 109 | The second trick you can see is the two patterns: 110 | 111 | ```rust 112 | #[macro_export] 113 | macro_rules! opt( 114 | ($i:expr, $submac:ident!( $($args:tt)* )) => ( 115 | [...] 116 | ); 117 | ($i:expr, $f:expr) => ( 118 | opt!($i, call!($f)); 119 | ); 120 | ); 121 | ``` 122 | 123 | The first pattern is used to receive a macro as child parser, like this: 124 | 125 | ```rust 126 | opt!(tag!("abcd")) 127 | ``` 128 | 129 | The second pattern can receive a function, and transforms it in a macro, then calls 130 | itself again. This is done to avoid repeating code. Applying `opt!` with `digit` 131 | as argument would be transformed from this: 132 | 133 | ```rust 134 | opt!(digit) 135 | ``` 136 | 137 | transformed with the second pattern: 138 | 139 | ```rust 140 | opt!(call!(digit)) 141 | ``` 142 | 143 | The `call!` macro transforms `call!(input, f)` into `f(input)`. If you need to pass 144 | more parameters to the function, you can Use `call!(input, f, arg, arg2)` to get 145 | `f(input, arg, arg2)`. 146 | 147 | ## Using the macro's parameters 148 | 149 | The macro argument is decomposed into `$submac:ident!`, the macro's name and a bang, 150 | and `( $($args:tt)* )`, the tokens contained between the parenthesis of the macro call. 151 | 152 | ```rust 153 | ($i:expr, $submac:ident!( $($args:tt)* )) => ({ 154 | match $submac!($i, $($args)*) { 155 | Ok((i,o)) => Ok((i, Some(o))), 156 | Err(Err::Error(_)) => Ok(($i, None)), 157 | Err(e) => Err(e), 158 | } 159 | }); 160 | ``` 161 | 162 | The macro is called with the input we got, as first argument, then we pattern 163 | match on the result. Every combinator or parser must return a `IResult`, which 164 | is a `Result<(I, O), nom::Err>`, so you know which patterns you need to 165 | verify. If you need to call two parsers in a sequence, use the first parameter 166 | of `Ok((i,o))`: It is the input remaining after the first parser was applied. 167 | 168 | As an example, see how the `preceded!` macro works: 169 | 170 | ```rust 171 | ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => ( 172 | { 173 | match $submac!($i, $($args)*) { 174 | Err(e) => Err(e), 175 | Ok((i1, _)) => { 176 | $submac2!(i1, $($args2)*) 177 | }, 178 | } 179 | } 180 | ); 181 | ``` 182 | 183 | It applies the first parser, and if it succeeds, discards its result, and applies 184 | the remaining input `i1` to the second parser. 185 | -------------------------------------------------------------------------------- /doc/archive/upgrading_to_nom_1.md: -------------------------------------------------------------------------------- 1 | # Upgrading to nom 1.0 2 | 3 | The 1.0 release of nom is one of the biggest since the beginning of the project. Its goal was to rework some core parts to be more flexible, and clean code that was awkward or unclear. This resulted in breaking changes, that I hope will not happen again in the future (but hey, we are Rust developers, breaking changes are FUN for us!). 4 | 5 | Here are a few tips to update your code to run with nom 1.0: 6 | 7 | # Error typing 8 | 9 | `nom::Err` now depends on two generic types, the position `P` and the error type `E`: 10 | 11 | ```rust 12 | pub enum Err{ 13 | Code(ErrorKind), 14 | Node(ErrorKind, Box>), 15 | Position(ErrorKind, P), 16 | NodePosition(ErrorKind, P, Box>) 17 | } 18 | ``` 19 | 20 | The default error type is `u32` to keep some compatibility with older code. To update your code, the first step is to **replace all usages of `nom::ErrorCode` by `nom::ErrorKind`**. `ErrorKind` is now an enum that contains the same instances as the previous `ErrorCode`, with an additional generic parameter: 21 | 22 | ```rust 23 | pub enum ErrorKind { 24 | Custom(E), 25 | Tag, 26 | MapRes, 27 | MapOpt, 28 | Alt, 29 | [...] 30 | } 31 | ``` 32 | 33 | `ErrorKind::Custom` is where you will store your custom error type. Note that default nom parsers like `alphabetic` use `u32` as custom type, so you may need to translate the error types coming from those parsers like this: 34 | 35 | ```rust 36 | fix_error!(CustomErrorType, alphabetic) 37 | ``` 38 | 39 | Since the error type is now an enum instead of a `u32`, you can now **replace any `ErrorCode::Tag as u32` by `ErrorKind::Tag`**. 40 | 41 | # Lifetime elision 42 | 43 | The error type is now completely generic over the input type, so the lifetime that appeared in `IResult` is not necessary anymore. It changes function declarations like this: 44 | 45 | ```rust 46 | fn parse_status<'a>(i: &'a [u8]) -> IResult<'a, &'a [u8], Status> 47 | 48 | // To this: 49 | fn parse_status(i: &[u8]) -> IResult<&[u8], Status> 50 | ``` 51 | 52 | # Producers and consumers 53 | 54 | The old implementation was not flexible, and a bit slow (because of allocations). The new implementation can be driven more precisely outside of the consumer, step by step if needed, can return a result, has custom error types, and can combine consumers. You can see [an example in the repository](https://github.com/rust-bakery/nom/blob/1.0/tests/omnom.rs). 55 | 56 | # Changes around `Incomplete` 57 | 58 | * `chain!` will now count how much data has been consumed before a child parser returns `Incomplete`, and return an `Incomplete` with the added data size 59 | * an optional parser (in `opt!` or `chain!`) will return `Incomplete` if the child parser returned `Incomplete`, instead of stopping there. This is the correct behaviour, because the result will be the same if the data comes in chunks or complete from the start 60 | * `alt!` will now return `Incomplete` if one of its alternatives returns `Incomplete` instead of skipping to the next branch 61 | 62 | In the cases where you know that the data you get is complete, you can wrap a parser with `complete!`. This combinator will transform `Incomplete` in an `Error`. 63 | 64 | # Other changes 65 | 66 | `filter!` has been renamed to `take_while!` 67 | 68 | -------------------------------------------------------------------------------- /doc/archive/upgrading_to_nom_2.md: -------------------------------------------------------------------------------- 1 | # Upgrading to nom 2.0 2 | 3 | The 2.0 release of nom adds a lot of new features, but it was also time for a big cleanup of badly named functions and macros, awkwardly written features and redundant functionality. So this release has some breaking changes, but most of them are harmless. 4 | 5 | ## Simple VS verbose errors 6 | 7 | The error management system of nom 1.0 is powerful: it allows you to aggregate errors as you backtrack in the parser tree, and gives you clear indications about which combinators worked on which part of the input. Unfortunately, this slowed down the parsers a bit, since a lot of code was generated to drop the error list when it was not used. 8 | 9 | Not everybody uses that feature, so it was moved behind a compilation feature called "verbose-errors". For projects that do not use the `Err` enum and do not try to make their own custom error codes, it should build correctly out of the box. You can get between 30% and 50% perf gains on some parsers by updating to 2.0. 10 | 11 | For the parsers using it, you will probably get something like the following compilation error: 12 | 13 | ``` 14 | error: no associated item named `Code` found for type `nom::ErrorKind<_>` in the current scope 15 | --> src/metadata/parser.rs:309:31 16 | | 17 | 309 | _ => IResult::Error(Err::Code( 18 | | ^^^^^^^^^ 19 | 20 | error: no associated item named `Code` found for type `nom::ErrorKind<_>` in the current scope 21 | --> src/metadata/parser.rs:388:41 22 | | 23 | 388 | let result_invalid = IResult::Error(Err::Code(nom::ErrorKind::Custom( 24 | | ^^^^^^^^^ 25 | 26 | error: no associated item named `Position` found for type `nom::ErrorKind<_>` in the current scope 27 | --> src/utility/macros.rs:16:41 28 | | 29 | 16 | $crate::nom::IResult::Error($crate::nom::Err::Position( 30 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 31 | | 32 | ::: src/metadata/parser.rs 33 | | 34 | 178| bytes: skip_bytes!(14, 2) ~ 35 | | - in this macro invocation 36 | 37 | error: no associated item named `Position` found for type `nom::ErrorKind<_>` in the current scope 38 | --> src/utility/macros.rs:16:41 39 | | 40 | 16 | $crate::nom::IResult::Error($crate::nom::Err::Position( 41 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 42 | | 43 | ::: src/metadata/parser.rs 44 | | 45 | 201 | skip_bytes!(3), 46 | | - in this macro invocation 47 | ``` 48 | 49 | It is rather easy to fix, just activate the "verbose-errors" feature: 50 | 51 | ```diff 52 | -nom = "^1.0.0" 53 | nom = { version = "^2.0.0", features = ["verbose-errors"] } 54 | ``` 55 | 56 | If you only use `Err::Code` to make your custom error codes, you could switch to the simple errors, since it replaces the `Err` enum, which contained an `ErrorKind`, with the `ErrorKind` type directly. 57 | 58 | ## The eof function was removed 59 | 60 | The eof implementation was linked too much to the input type. This is now a macro combinator, called `eof!()`. 61 | 62 | If you see the following error, remove the `eof` import and replace all `eof` calls by `eof!()`. 63 | ``` 64 | error[E0432]: unresolved import `nom::eof` 65 | --> src/parser.rs:1:20 66 | | 67 | 1 | use nom::{IResult, eof, line_ending, not_line_ending, space}; 68 | | ^^^ no `eof` in `nom`. Did you mean to use `eol`? 69 | ``` 70 | 71 | ## Parsers returning `Incomplete` instead of an error on empty input 72 | 73 | `alpha`, `digit`, `alphanumeric`, `hex_digit`, `oct_digit`, `space`, `multispace`, `sized_buffer` will now return `Incomplete` if they get an empty input. If you get the following error message, you can wrap those calls with `complete!`, a combinator that transforms `Incomplete` to `Error`. 74 | 75 | ``` 76 | ---- rules::literals::tests::case_invalid_hexadecimal_no_number stdout ---- 77 | thread 'rules::literals::tests::case_invalid_hexadecimal_no_number' panicked at 'assertion failed: `(left == right)` (left: `Incomplete(Unknown)`, right: `Error(Position(HexDigit, []))`)', source/rules/literals.rs:726 78 | ``` 79 | 80 | This change was implemented to make these basic parsers more consistent. Please note that parsing the basic elements of a format, like the alphabet of a token, is always very specific to that format, and those functions may not always fit your needs. In that case, you can easily make your own with [`take_while`](take_while.m.html) and a function that test for the characters or bytes you need. 81 | 82 | ## `take_till!` iterates on bytes or chars, not on references to them 83 | 84 | The input types must now conform to a trait which requires changes to `take_till!`. If you get the following error: 85 | 86 | ``` 87 | error[E0308]: mismatched types 88 | --> src/linux/parser.rs:32:1 89 | | 90 | 32 | named!(parse_c_string, take_till!(is_nul_byte)); 91 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ expected &u8, found u8 92 | | 93 | = note: expected type `&u8` 94 | = note: found type `u8` 95 | = note: this error originates in a macro outside of the current crate 96 | ``` 97 | 98 | you can fix it with: 99 | 100 | ```diff 101 | -fn is_nul_byte(c: &u8) -> bool { 102 | - *c == 0x0 103 | +fn is_nul_byte(c: u8) -> bool { 104 | + c == 0x0 105 | ``` 106 | 107 | ## `length_value!`, `length_bytes!` refactoring 108 | 109 | The "length-value" pattern usually indicates that we get a length from the input, then take a slice of that size from the input, and convert that to a value of the type we need. The `length_value!` macro was using the length parameter to apply the value parser a specific number of times. 110 | 111 | - the `length_value!` macro was replaced by `length_count!` 112 | - the new `length_value!` macros takes a slice of the size obtained by the first child parser, then applies the second child parser on this slice. If the second parser returns incomplete, the parser fails 113 | - `length_data!` gets a length from its child parser, then returns a subslice of that length 114 | 115 | ``` 116 | error[E0308]: mismatched types 117 | --> src/tls.rs:378:37 118 | | 119 | 378 | cert_types: cert_types, 120 | | ^^^^^^^^^^ expected struct `std::vec::Vec`, found u8 121 | | 122 | = note: expected type `std::vec::Vec` 123 | = note: found type `u8` 124 | ``` 125 | 126 | ```diff 127 | fn parse_tls_handshake_msg_certificaterequest( i:&[u8] ) -> IResult<&[u8], TlsMessageHandshake> { 128 | chain!(i, 129 | - cert_types: length_value!(be_u8,be_u8) ~ 130 | + cert_types: length_count!(be_u8,be_u8) ~ 131 | sig_hash_algs_len: be_u16 ~ 132 | ``` 133 | 134 | ## `error!` does not exist anymore 135 | 136 | The `error!` macro, that was used to return a parsing error without backtracking through the parser tree, is now called `return_error!`. This change was done because the "log" crate also uses an `error!` macro, and they complained about the name conflict to nom instead of complaining to log, much to my dismay. 137 | 138 | The `add_error!` macro has also been renamed to `add_return_error!`. 139 | 140 | The compilation error you could get would be: 141 | 142 | ``` 143 | error: macro undefined: 'error!' 144 | --> src/parser.rs:205:10 145 | | 146 | 205 | error!(Custom(ParseError::InvalidData), 147 | | ^ 148 | ``` 149 | 150 | It is fixed by: 151 | 152 | ```diff 153 | named!(repeat<&str, u8, ParseError>, 154 | - error!(Custom(ParseError::RepeatNotNumeric), fix!( 155 | + return_error!(Custom(ParseError::RepeatNotNumeric), fix!( 156 | map_res!(flat_map!(take_s!(1), digit), FromStr::from_str)))); 157 | ``` 158 | 159 | ## The `offset()` method was moved to the `Offset` trait 160 | 161 | There is now an implementation of `Offset` for `&str`. The `HexDisplay` trait is now reserved for `&[u8]`. 162 | 163 | ## `AsChar::is_0_to_9` is now `AsChar::is_dec_digit` 164 | 165 | This makes the method naming more consistent. 166 | 167 | ## The number parsing macros with configurable endianness now take an enum as argument instead of a boolean 168 | 169 | Using a boolean to specify endianness was confusing, there is now the `nom::Endianness` enum: 170 | 171 | ```diff 172 | - named!(be_tst32, u32!(true)); 173 | - named!(le_tst32, u32!(false)); 174 | + named!(be_tst32, u32!(Endianness::Big)); 175 | + named!(le_tst32, u32!(Endianness::Little)); 176 | ``` 177 | 178 | ## End of line parsing 179 | 180 | There were different, incompatible ways to parse line endings. Now, the `eol`, `line_ending` and `not_line_ending` all have the same behaviour. First, test for '\n', then if it is not the right character, test for "\r\n". This fixes the length issues. 181 | -------------------------------------------------------------------------------- /doc/archive/upgrading_to_nom_4.md: -------------------------------------------------------------------------------- 1 | # Upgrading to nom 4.0 2 | 3 | The nom 4.0 is a nearly complete rewrite of nom's internal structures, along with a cleanup of a lot of parser and combinators whose semantics were unclear. Upgrading from previous nom versions can require a lot of changes, especially if you have a lot of unit tests. But most of those changes are pretty straightforward. 4 | 5 | ## Changes in internal structures 6 | 7 | Previous versions of nom all generated parsers with the following signature: 8 | 9 | ```rust 10 | fn parser(input: I) -> IResult { ... } 11 | ``` 12 | 13 | With the following definition for `IResult`: 14 | 15 | ```rust 16 | pub enum IResult { 17 | /// remaining input, result value 18 | Done(I,O), 19 | /// indicates the parser encountered an error. E is a custom error type you can redefine 20 | Error(Err), 21 | /// Incomplete contains a Needed, an enum that can represent a known quantity of input data, or unknown 22 | Incomplete(Needed) 23 | } 24 | 25 | pub enum Needed { 26 | /// needs more data, but we do not know how much 27 | Unknown, 28 | /// contains the required total data size 29 | Size(usize) 30 | } 31 | 32 | // if the "verbose-errors" feature is not active 33 | pub type Err = ErrorKind; 34 | 35 | // if the "verbose-errors" feature is active 36 | pub enum Err{ 37 | /// An error code, represented by an ErrorKind, which can contain a custom error code represented by E 38 | Code(ErrorKind), 39 | /// An error code, and the next error 40 | Node(ErrorKind, Vec>), 41 | /// An error code, and the input position 42 | Position(ErrorKind, P), 43 | /// An error code, the input position and the next error 44 | NodePosition(ErrorKind, P, Vec>) 45 | } 46 | ``` 47 | 48 | The new design uses the `Result` type from the standard library: 49 | 50 | ```rust 51 | pub type IResult = Result<(I, O), Err>; 52 | 53 | pub enum Err { 54 | /// There was not enough data 55 | Incomplete(Needed), 56 | /// The parser had an error (recoverable) 57 | Error(Context), 58 | /// The parser had an unrecoverable error 59 | Failure(Context), 60 | } 61 | 62 | pub enum Needed { 63 | /// needs more data, but we do not know how much 64 | Unknown, 65 | /// contains the required additional data size 66 | Size(usize) 67 | } 68 | 69 | // if the "verbose-errors" feature is inactive 70 | pub enum Context { 71 | Code(I, ErrorKind), 72 | } 73 | 74 | // if the "verbose-errors" feature is active 75 | pub enum Context { 76 | Code(I, ErrorKind), 77 | List(Vec<(I, ErrorKind)>), 78 | } 79 | ``` 80 | 81 | With this new design, the `Incomplete` case is now part of the error case, and we get a `Failure` 82 | case representing an unrecoverable error (combinators like `alt!` will not try another branch). 83 | The verbose error management is now a truly additive feature above the simple one (it adds a 84 | case to the `Context` enum). 85 | 86 | Error management types also get smaller and more efficient. We can now return 87 | the related input as part of the error in all cases. 88 | 89 | All of this will likely not affect your existing parsers, but require changes to the surrounding 90 | code that manipulates parser results. 91 | 92 | ## Faster parsers, new memory layout but with lower footprint 93 | 94 | These changes keep the same memory footprint in simple errors mode, and reduce it in verbose errors: 95 | 96 | | size of `IResult<&[u8], &[u8]>` | simple errors | verbose errors | 97 | |---|---|---| 98 | | nom 3 | 40 bytes | 64 bytes | 99 | | nom 4 | 40 bytes | 48 bytes | 100 | 101 | In addition, [parsers are faster in nom 4 than in nom 3](https://github.com/rust-bakery/nom/issues/356#issuecomment-333816834). This change is justified. 102 | 103 | ## Replacing parser result matchers 104 | 105 | Whenever you use pattern matching on the result of a parser, or compare it to another parser 106 | result (like in a unit test), you will have to perform the following changes: 107 | 108 | For the correct result case: 109 | 110 | ```rust 111 | IResult::Done(i, o) 112 | 113 | // becomes 114 | 115 | Ok((i, o)) 116 | ``` 117 | 118 | For the error case (note that argument position for `error_position` and other sibling macros was changed 119 | for the sake of consistency with the rest of the code): 120 | 121 | ```rust 122 | IResult::Error(error_position!(ErrorKind::OneOf, input)), 123 | 124 | // becomes 125 | 126 | Err(Err::Error(error_position!(input, ErrorKind::OneOf))) 127 | ``` 128 | 129 | ```rust 130 | IResult::Incomplete(Needed::Size(1)) 131 | 132 | // becomes 133 | 134 | Err(Err::Incomplete(Needed::Size(1))) 135 | ``` 136 | 137 | For pattern matching, you now need to handle the `Failure` case as well, which works like the error 138 | case: 139 | 140 | ```rust 141 | match result { 142 | Ok((remaining, value)) => { ... }, 143 | Err(Err::Incomplete(needed)) => { ... }, 144 | Err(Err::Error(e)) | Err(Err::Failure(e)) => { ... } 145 | } 146 | ``` 147 | 148 | ## Errors on `Incomplete` data size calculation 149 | 150 | In previous versions, `Needed::Size(sz)` indicated the total needed data size (counting the actual input). 151 | Now it only returns the additional data needed, so the values will have changed. 152 | 153 | ## New trait for input types 154 | 155 | nom allows other input types than `&[u8]` and `&str`, as long as they implement a set of traits 156 | that are used everywhere in nom. This version introduces the `AtEof` trait: 157 | 158 | ```rust 159 | pub trait AtEof { 160 | fn at_eof(&self) -> bool; 161 | } 162 | ``` 163 | 164 | This trait allows the input value to indicate whether there can be more input coming later (buffering 165 | data from a file, or waiting for network data). 166 | 167 | ## Dealing with `Incomplete` usage 168 | 169 | nom's parsers are designed to work around streaming issues: if there is not enough data to decide, a 170 | parser will return `Incomplete` instead of returning a partial value that might be false. 171 | 172 | As an example, if you want to parse alphabetic characters then digits, when you get the whole input 173 | `abc123;`, the parser will return `abc` for alphabetic characters, and `123` for the digits, and `;` 174 | as remaining input. 175 | 176 | But if you get that input in chunks, like `ab` then `c123;`, the alphabetic characters parser will 177 | return `Incomplete`, because it does not know if there will be more matching characters afterwards. 178 | If it returned `ab` directly, the digit parser would fail on the rest of the input, even though the 179 | input had the valid format. 180 | 181 | For some users, though, the input will never be partial (everything could be loaded in memory at once), 182 | and the solution in nom 3 and before was to wrap parts of the parsers with the `complete!()` combinator 183 | that transforms `Incomplete` in `Error`. 184 | 185 | nom 4 is much stricter about the behaviour with partial data, but provides better tools to deal with it. 186 | Thanks to the new `AtEof` trait for input types, nom now provides the `CompleteByteSlice(&[u8])` and 187 | `CompleteStr(&str)` input types, for which the `at_eof()` method always returns true. 188 | With these types, no need to put a `complete!()` combinator everywhere, you can just apply those types 189 | like this: 190 | 191 | ```rust 192 | named!(parser<&str,ReturnType>, ... ); 193 | 194 | // becomes 195 | 196 | named!(parser, ... ); 197 | ``` 198 | 199 | ```rust 200 | named!(parser<&str,&str>, ... ); 201 | 202 | // becomes 203 | 204 | named!(parser, ... ); 205 | ``` 206 | 207 | ```rust 208 | named!(parser, ... ); 209 | 210 | // becomes 211 | 212 | named!(parser, ... ); 213 | ``` 214 | 215 | And as an example, for a unit test: 216 | 217 | ```rust 218 | assert_eq!(parser("abcd123"), Ok(("123", "abcd")); 219 | 220 | // becomes 221 | 222 | assert_eq!(parser(CompleteStr("abcd123")), Ok((CompleteStr("123"), CompleteStr("abcd"))); 223 | ``` 224 | 225 | These types allow you to correctly handle cases like text formats for which there might be a last 226 | empty line or not, as seen in [one of the examples](https://github.com/rust-bakery/nom/blob/87d837006467aebcdb0c37621da874a56c8562b5/tests/multiline.rs). 227 | 228 | If those types feel a bit long to write everywhere in the parsers, it's possible 229 | to alias them like this: 230 | 231 | ```rust 232 | use nom::types::CompleteByteSlice as Input; 233 | ``` 234 | 235 | ## Custom error types 236 | 237 | Custom error types caused a lot of type inference issues in previous nom versions. Now error types 238 | are automatically converted as needed. If you want to set up a custom error type, you now need to 239 | implement `std::convert::From` for this type. 240 | 241 | ## Producers and consumers 242 | 243 | Producers and consumers were removed in nom 4. That feature was too hard to integrate in code that 244 | deals with IO. 245 | 246 | -------------------------------------------------------------------------------- /doc/custom_input_types.md: -------------------------------------------------------------------------------- 1 | # Custom input types 2 | 3 | While historically, nom has worked mainly on `&[u8]` and `&str`, it can actually 4 | use any type as input, as long as they follow a specific set of traits. 5 | Those traits were developed first to abstract away the differences between 6 | `&[u8]` and `&str`, but were then employed for more interesting types, 7 | like [nom_locate](https://github.com/fflorent/nom_locate), a wrapper type 8 | that can carry line and column information, or to parse 9 | [a list of tokens](https://github.com/Rydgel/monkey-rust/blob/master/lib/parser/mod.rs). 10 | 11 | ## Implementing a custom type 12 | 13 | Let's assume we have an input type we'll call `MyInput`. `MyInput` is a sequence of `MyItem` type. 14 | The goal is to define nom parsers with this signature: `MyInput -> IResult`. 15 | 16 | ```rust 17 | fn parser(i: MyInput) -> IResult { 18 | tag("test")(i) 19 | } 20 | ``` 21 | 22 | Here are the traits we have to implement for `MyInput`: 23 | 24 | | trait | usage | 25 | |---|---| 26 | | [AsBytes](https://docs.rs/nom/latest/nom/trait.AsBytes.html) |Casts the input type to a byte slice| 27 | | [Compare](https://docs.rs/nom/latest/nom/trait.Compare.html) |Character comparison operations| 28 | | [ExtendInto](https://docs.rs/nom/latest/nom/trait.ExtendInto.html) |Abstracts something which can extend an `Extend`| 29 | | [FindSubstring](https://docs.rs/nom/latest/nom/trait.FindSubstring.html) |Look for a substring in self| 30 | | [FindToken](https://docs.rs/nom/latest/nom/trait.FindToken.html) |Look for self in the given input stream| 31 | | [InputIter](https://docs.rs/nom/latest/nom/trait.InputIter.html) |Common iteration operations on the input type| 32 | | [InputLength](https://docs.rs/nom/latest/nom/trait.InputLength.html) |Calculate the input length| 33 | | [InputTake](https://docs.rs/nom/latest/nom/trait.InputTake.html) |Slicing operations| 34 | | [InputTakeAtPosition](https://docs.rs/nom/latest/nom/trait.InputTakeAtPosition.html) |Look for a specific token and split at its position| 35 | | [Offset](https://docs.rs/nom/latest/nom/trait.Offset.html) |Calculate the offset between slices| 36 | | [ParseTo](https://docs.rs/nom/latest/nom/trait.ParseTo.html) |Used to integrate `&str`'s `parse()` method| 37 | | [Slice](https://docs.rs/nom/latest/nom/trait.Slice.html) |Slicing operations using ranges| 38 | 39 | Here are the traits we have to implement for `MyItem`: 40 | 41 | | trait | usage | 42 | |---|---| 43 | | [AsChar](https://docs.rs/nom/latest/nom/trait.AsChar.html) |Transforms common types to a char for basic token parsing| 44 | -------------------------------------------------------------------------------- /doc/home.md: -------------------------------------------------------------------------------- 1 | # Nom is an awesome parser combinators library in Rust 2 | 3 | To get started using nom, you can include it in your Rust projects from 4 | [crates.io](https://crates.io/crates/nom). Here are a few links you will find useful: 5 | 6 | * [Reference documentation](https://docs.rs/nom) 7 | * [Gitter chat room](https://gitter.im/Geal/nom). You can also go to the #nom IRC 8 | channel on irc.mozilla.org, or ping 'geal' on Mozilla, Freenode, Geeknode or oftc IRC 9 | * [Making a new parser from scratch](making_a_new_parser_from_scratch.md) 10 | (general tips on writing a parser and code architecture) 11 | * [How to handle parser errors](error_management.md) 12 | * [Recipes for common nom tasks](nom_recipes.md) 13 | -------------------------------------------------------------------------------- /doc/making_a_new_parser_from_scratch.md: -------------------------------------------------------------------------------- 1 | # Making a new parser from scratch 2 | 3 | Writing a parser is a very fun, interactive process, but sometimes a daunting 4 | task. How do you test it? How to see ambiguities in specifications? 5 | 6 | nom is designed to abstract data manipulation (counting array offsets, 7 | converting to structures, etc) while providing a safe, composable API. It also 8 | takes care of making the code easy to test and read, but it can be confusing at 9 | first, if you are not familiar with parser combinators, or if you are not used 10 | to Rust generic functions. 11 | 12 | This document is here to help you in getting started with nom. You can also find 13 | [nom recipes for common short parsing tasks here](nom_recipes.md). If you need 14 | more specific help, please ping `geal` on IRC (libera, geeknode, 15 | oftc), go to `#nom-parsers` on Libera IRC, or on the 16 | [Gitter chat room](https://gitter.im/Geal/nom). 17 | 18 | # First step: the initial research 19 | 20 | A big part of the initial work lies in accumulating enough documentation and 21 | samples to understand the format. The specification is useful, but specifications 22 | represent an "official" point of view, that may not be the real world usage. Any 23 | blog post or open source code is useful, because it shows how people understand 24 | the format, and how they work around each other's bugs (if you think a 25 | specification ensures every implementation is consistent with the others, think again). 26 | 27 | You should get a lot of samples (file or network traces) to test your code. The 28 | easy way is to use a small number of samples coming from the same source and 29 | develop everything around them, to realize later that they share a very specific 30 | bug. 31 | 32 | # Code organization 33 | 34 | While it is tempting to insert the parsing code right inside the rest of the 35 | logic, it usually results in unmaintainable code, and makes testing challenging. 36 | Parser combinators, the parsing technique used in nom, assemble a lot of small 37 | functions to make powerful parsers. This means that those functions only depend 38 | on their input, not on an external state. This makes it easy to parse the input 39 | partially, and to test those functions independently. 40 | 41 | Usually, you can separate the parsing functions in their own module, so you 42 | could have a `src/lib.rs` file containing this: 43 | 44 | ```rust 45 | pub mod parser; 46 | ``` 47 | 48 | And the `src/parser.rs` file: 49 | 50 | ```rust 51 | use nom::IResult; 52 | use nom::number::complete::be_u16; 53 | use nom::bytes::complete::take; 54 | 55 | pub fn length_value(input: &[u8]) -> IResult<&[u8],&[u8]> { 56 | let (input, length) = be_u16(input)?; 57 | take(length)(input) 58 | } 59 | ``` 60 | 61 | # Writing a first parser 62 | 63 | Let's parse a simple expression like `(12345)`. nom parsers are functions that 64 | use the `nom::IResult` type everywhere. As an example, a parser taking a byte 65 | slice `&[u8]` and returning a 32 bits unsigned integer `u32` would have this 66 | signature: `fn parse_u32(input: &[u8]) -> IResult<&[u8], u32>`. 67 | 68 | The `IResult` type depends on the input and output types, and an optional custom 69 | error type. This enum can either be `Ok((i,o))` containing the remaining input 70 | and the output value, or, on the `Err` side, an error or an indication that more 71 | data is needed. 72 | 73 | ```rust 74 | pub type IResult = Result<(I, O), Err>; 75 | 76 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] 77 | pub enum Needed { 78 | Unknown, 79 | Size(u32) 80 | } 81 | 82 | #[derive(Debug, Clone, PartialEq)] 83 | pub enum Err { 84 | Incomplete(Needed), 85 | Error(E), 86 | Failure(E), 87 | } 88 | ``` 89 | 90 | nom uses this type everywhere. Every combination of parsers will pattern match 91 | on this to know if it must return a value, an error, consume more data, etc. 92 | But this is done behind the scenes most of the time. 93 | 94 | Parsers are usually built from the bottom up, by first writing parsers for the 95 | smallest elements, then assembling them in more complex parsers by using 96 | combinators. 97 | 98 | As an example, here is how we could build a (non spec compliant) HTTP request 99 | line parser: 100 | 101 | ```rust 102 | // first implement the basic parsers 103 | let method = take_while1(is_alpha); 104 | let space = take_while1(|c| c == ' '); 105 | let url = take_while1(|c| c!= ' '); 106 | let is_version = |c| c >= b'0' && c <= b'9' || c == b'.'; 107 | let http = tag("HTTP/"); 108 | let version = take_while1(is_version); 109 | let line_ending = tag("\r\n"); 110 | 111 | // combine http and version to extract the version string 112 | // preceded will return the result of the second parser 113 | // if both succeed 114 | let http_version = preceded(http, version); 115 | 116 | // combine all previous parsers in one function 117 | fn request_line(i: &[u8]) -> IResult<&[u8], Request> { 118 | 119 | // Tuples of parsers are a parser themselves, 120 | // parsing with each of them sequentially and returning a tuple of their results. 121 | // Unlike most other parsers, parser tuples are not `FnMut`, they must be wrapped 122 | // in the `parse` function to be able to be used in the same way as the others. 123 | let (input, (method, _, url, _, version, _)) = 124 | (method, space, url, space, http_version, line_ending).parse(i)?; 125 | 126 | Ok((input, Request { method, url, version })) 127 | } 128 | ``` 129 | 130 | Since it is easy to combine small parsers, I encourage you to write small 131 | functions corresponding to specific parts of the format, test them 132 | independently, then combine them in more general parsers. 133 | 134 | # Finding the right combinator 135 | 136 | nom has a lot of different combinators, depending on the use case. They are all 137 | described in the [reference](https://docs.rs/nom). 138 | 139 | [Basic functions](https://docs.rs/nom/#functions) are available. They deal mostly 140 | in recognizing character types, like `alphanumeric` or `digit`. They also parse 141 | big endian and little endian integers and floats of multiple sizes. 142 | 143 | Most of the functions are there to combine parsers, and they are generic over 144 | the input type. 145 | 146 | # Testing the parsers 147 | 148 | Once you have a parser function, a good trick is to test it on a lot of the 149 | samples you gathered, and integrate this to your unit tests. To that end, put 150 | all of the test files in a folder like `assets` and refer to test files like 151 | this: 152 | 153 | ```rust 154 | #[test] 155 | fn header_test() { 156 | let data = include_bytes!("../assets/axolotl-piano.gif"); 157 | println!("bytes:\n{}", &data[0..100].to_hex(8)); 158 | let res = header(data); 159 | // ... 160 | ``` 161 | 162 | The `include_bytes!` macro (provided by Rust's standard library) will integrate 163 | the file as a byte slice in your code. You can then just refer to the part of 164 | the input the parser has to handle via its offset. Here, we take the first 100 165 | bytes of a GIF file to parse its header 166 | (complete code [here](https://github.com/Geal/gif.rs/blob/master/src/parser.rs#L305-L309)). 167 | 168 | If your parser handles textual data, you can just use a lot of strings directly 169 | in the test, like this: 170 | 171 | ```rust 172 | #[test] 173 | fn factor_test() { 174 | assert_eq!(factor("3"), Ok(("", 3))); 175 | assert_eq!(factor(" 12"), Ok(("", 12))); 176 | assert_eq!(factor("537 "), Ok(("", 537))); 177 | assert_eq!(factor(" 24 "), Ok(("", 24))); 178 | } 179 | ``` 180 | 181 | The more samples and test cases you get, the more you can experiment with your 182 | parser design. 183 | 184 | # Debugging the parsers 185 | 186 | There are a few tools you can use to debug how code is generated. 187 | 188 | ## dbg_dmp 189 | 190 | This function wraps a parser that accepts a `&[u8]` as input and 191 | prints its hexdump if the child parser encountered an error: 192 | 193 | ```rust 194 | use nom::{IResult, error::dbg_dmp, bytes::complete::tag}; 195 | 196 | fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { 197 | dbg_dmp(tag("abcd"), "tag")(i) 198 | } 199 | 200 | let a = &b"efghijkl"[..]; 201 | 202 | // Will print the following message: 203 | // Error(Position(0, [101, 102, 103, 104, 105, 106, 107, 108])) at l.5 by ' tag ! ( "abcd" ) ' 204 | // 00000000 65 66 67 68 69 6a 6b 6c efghijkl 205 | f(a); 206 | ``` 207 | 208 | -------------------------------------------------------------------------------- /doc/upgrading_to_nom_5.md: -------------------------------------------------------------------------------- 1 | # Upgrading to nom 5.0 2 | 3 | ## Changes in error types 4 | 5 | **If you have a lot of unit tests, this is probably the biggest issue you'll encounter** 6 | 7 | Error management has been rewritten to avoid two issues present in previous 8 | versions: 9 | - The error type was causing type inference issues in macros 10 | - The `verbose-errors` was changing the API (adding a variant in an enum) and 11 | reducing the parsing speed. Since compilation features are additive, if a 12 | dependency used nom with `verbose-errors`, it would be activated for all dependencies 13 | 14 | The new error management simplifies the internal type, removing the `Context` 15 | type and the error conversions that needed to happen everywhere. 16 | 17 | Here's how the types change. Before: 18 | 19 | ```rust 20 | type IResult = Result<(I, O), Err>; 21 | 22 | pub enum Err { 23 | Incomplete(Needed), 24 | Error(Context), 25 | Failure(Context), 26 | } 27 | 28 | pub enum Context { 29 | Code(I, ErrorKind), 30 | // this variant only present if `verbose-errors` is active 31 | List(Vec<(I, ErrorKind)>), 32 | } 33 | ``` 34 | 35 | In nom 5: 36 | 37 | ```rust 38 | type IResult = Result<(I, O), Err>; 39 | 40 | pub enum Err { 41 | Incomplete(Needed), 42 | Error(E), 43 | Failure(E), 44 | } 45 | ``` 46 | 47 | Now the error type is completely generic, so you can choose exactly 48 | what you need, from erasing errors entirely, to reproducing the 49 | `verbose-errors` feature with the [`VerboseError` type](https://docs.rs/nom/latest/nom/error/struct.VerboseError.html). 50 | The [`ErrorKind` enum](https://docs.rs/nom/latest/nom/error/enum.ErrorKind.html) 51 | is not generic now: It does not need to hold a custom error type. 52 | 53 | Any error type has to implement the [`ParseError` trait](https://docs.rs/nom/latest/nom/error/trait.ParseError.html) 54 | that specifies methods to build an error from a position in input data, 55 | and an `ErrorKind`, or another error, etc. 56 | 57 | Since the error types change, this will probably generate errors 58 | in your unit tests. 59 | 60 | Usually, changing a `Err(Err::Error(Context::Code(input, error)))` to 61 | `Err(Err::Error((input, error)))` is enough (if you use the default 62 | error type `(Input, ErrorKind)`. 63 | 64 | ## Removal of `CompleteStr` and `CompleteByteSlice` 65 | 66 | Those types were introduced in nom 4 as alternative input types, to 67 | solve issues with streaming parsers. 68 | 69 | A core feature of nom is its support for streaming parsers: When you are 70 | handling network packets or large files, you might not have all of the data. 71 | As an example, if you use a parser recognizing numbers and you pass as input 72 | "123", the parser will return `Err(Err::Incomplete(_))` because it cannot decide 73 | if it has the whole input or not. The complete data could be "123;" or "12345". 74 | 75 | There are various issues with this approach, though. A lot of formats, especially 76 | text formats, are not meant to be very large, and in all cases the data will be 77 | entirely loaded in memory, so handling the streaming case gets annoying. 78 | And for some binary formats, there will often be some TLV (tag/length/value) 79 | elements for which we already know the complete size of the value. 80 | 81 | nom can work on various input types, as long as they implement a common set of 82 | traits, so nom 4 had the `CompleteByteSlice` and `CompleteStr` types as alternatives 83 | of `&[u8]` and `&str`, that changed the behaviour of parsers to assume that the 84 | input data is complete. Unfortunately, those types were hard to handle, since 85 | we would often need to convert them back and forth with their inner types, 86 | and they would appear everywhere in parser signatures. Also, they were unexpectedly 87 | slow, even though they were just wrapper types. 88 | 89 | In nom 5, those types were removed, and instead we have *streaming* and *complete* 90 | versions of various function combinators. You can find them in the corresponding 91 | submodules of the `bytes`, `character`, and `number` modules. Since macros cannot 92 | be isolated in modules (they are all at the top level once exported), all macros 93 | have been rewritten to use the *streaming* version. 94 | 95 | Upgrading from nom 4 means removing the `CompleteStr` and `CompleteByteSlice` types 96 | if you were using them, and checking which parsers suddenly return `Incomplete` on 97 | valid inputs. It indicates that you will need to replace some macros combinators 98 | with the *complete* function version. 99 | 100 | ## From macros to functions 101 | 102 | nom has used macros as its core tool for a long time, since they were a powerful 103 | tool to generate parsers. The code created was simple, approximately the same way 104 | it could be written manually, and it was easy for the compiler to optimize it. 105 | 106 | Unfortunately, macros were sometimes hard to manipulate, since nom was relying 107 | on a few lesser known tricks to build its DSL, and macros parsing errors were 108 | often too cryptic to understand. 109 | 110 | nom 5 introduces a new technique to write combinators. Instead of using macros 111 | that can take other macros as argument and call them by rewriting their argument 112 | list, we have functions that take other functions as arguments, and return 113 | functions. 114 | 115 | This technique has a lot of advantages over macros: 116 | - No type inference issues, you can explicitly describe the error type in 117 | function definitions 118 | - Nicer compilation errors: rustc can show you exactly what is missing when calling 119 | a combinator, if you need to import new traits, etc. 120 | - Those functions are actually faster than nom 4's macros when built with link time 121 | optimization 122 | - Small gain in compilation speed (since code can be reused instead of regenerated 123 | everywhere) 124 | - The macros are still there, but were rewritten to use the functions instead, so 125 | they gain the performance benefit immediately 126 | 127 | In practice, nom parsers will have the following signature: 128 | `Input -> IResult` 129 | 130 | A function combinator will then have this signature: 131 | ` -> impl Fn(Input) -> IResult` 132 | 133 | Here is an example with a simplified `take` combinator: 134 | 135 | ```rust 136 | pub fn take(count: usize) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> 137 | where 138 | { 139 | move |i: &[u8]| { 140 | if i.len() < count { 141 | Err(Err::Error((i, ErrorKind::Eof)) 142 | } else { 143 | Ok(i.split_at(count)) 144 | } 145 | } 146 | } 147 | ``` 148 | 149 | `take` generates a closure and returns it. We can use it directly like this: 150 | `take(5)(input)`. 151 | 152 | (this version of `take` is simplified because it actually uses generic input 153 | and error types and various traits over these types) 154 | 155 | More complex combinators like `pair` (returns a tuple of the result of 2 parsers) 156 | will be able to combine parsers to make more advanced ones: 157 | 158 | ```rust 159 | pub fn pair(first: F, second: G) -> impl Fn(I) -> IResult 160 | where 161 | F: Fn(I) -> IResult, 162 | G: Fn(I) -> IResult, 163 | { 164 | move |input: I| { 165 | let (input, o1) = first(input)?; 166 | second(input).map(|(i, o2)| (i, (o1, o2))) 167 | } 168 | } 169 | ``` 170 | 171 | This combinator is generic over its parser arguments and can assemble them in 172 | the closure that it returns. 173 | 174 | You can then use it that way: 175 | 176 | ```rust 177 | fn parser(i: &str) -> IResult<&str, (&str, &str)> { 178 | pair(alpha0, digit0)(i) 179 | } 180 | 181 | // will return `Ok((";", ("abc", "123")))` 182 | parser("abc123;"); 183 | ``` 184 | -------------------------------------------------------------------------------- /examples/custom_error.rs: -------------------------------------------------------------------------------- 1 | extern crate nom; 2 | 3 | use nom::error::ErrorKind; 4 | use nom::error::ParseError; 5 | use nom::Err::Error; 6 | use nom::IResult; 7 | 8 | #[derive(Debug, PartialEq)] 9 | pub enum CustomError { 10 | MyError, 11 | Nom(I, ErrorKind), 12 | } 13 | 14 | impl ParseError for CustomError { 15 | fn from_error_kind(input: I, kind: ErrorKind) -> Self { 16 | CustomError::Nom(input, kind) 17 | } 18 | 19 | fn append(_: I, _: ErrorKind, other: Self) -> Self { 20 | other 21 | } 22 | } 23 | 24 | pub fn parse(_input: &str) -> IResult<&str, &str, CustomError<&str>> { 25 | Err(Error(CustomError::MyError)) 26 | } 27 | 28 | fn main() {} 29 | 30 | #[cfg(test)] 31 | mod tests { 32 | use super::parse; 33 | use super::CustomError; 34 | use nom::Err::Error; 35 | 36 | #[test] 37 | fn it_works() { 38 | let err = parse("").unwrap_err(); 39 | match err { 40 | Error(e) => assert_eq!(e, CustomError::MyError), 41 | _ => panic!("Unexpected error: {:?}", err), 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/iterator.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::iter::Iterator; 3 | 4 | use nom::bytes::complete::tag; 5 | use nom::character::complete::alphanumeric1; 6 | use nom::combinator::iterator; 7 | use nom::sequence::{separated_pair, terminated}; 8 | use nom::IResult; 9 | 10 | fn main() { 11 | let mut data = "abcabcabcabc"; 12 | 13 | fn parser(i: &str) -> IResult<&str, &str> { 14 | tag("abc")(i) 15 | } 16 | 17 | // `from_fn` (available from Rust 1.34) can create an iterator 18 | // from a closure 19 | let it = std::iter::from_fn(move || { 20 | match parser(data) { 21 | // when successful, a nom parser returns a tuple of 22 | // the remaining input and the output value. 23 | // So we replace the captured input data with the 24 | // remaining input, to be parsed on the next call 25 | Ok((i, o)) => { 26 | data = i; 27 | Some(o) 28 | } 29 | _ => None, 30 | } 31 | }); 32 | 33 | for value in it { 34 | println!("parser returned: {}", value); 35 | } 36 | 37 | println!("\n********************\n"); 38 | 39 | let data = "abcabcabcabc"; 40 | 41 | // if `from_fn` is not available, it is possible to fold 42 | // over an iterator of functions 43 | let res = 44 | std::iter::repeat(parser) 45 | .take(3) 46 | .try_fold((data, Vec::new()), |(data, mut acc), parser| { 47 | parser(data).map(|(i, o)| { 48 | acc.push(o); 49 | (i, acc) 50 | }) 51 | }); 52 | 53 | // will print "parser iterator returned: Ok(("abc", ["abc", "abc", "abc"]))" 54 | println!("\nparser iterator returned: {:?}", res); 55 | 56 | println!("\n********************\n"); 57 | 58 | let data = "key1:value1,key2:value2,key3:value3,;"; 59 | 60 | // `nom::combinator::iterator` will return an iterator 61 | // producing the parsed values. Compared to the previous 62 | // solutions: 63 | // - we can work with a normal iterator like `from_fn` 64 | // - we can get the remaining input afterwards, like with the `try_fold` trick 65 | let mut nom_it = iterator( 66 | data, 67 | terminated( 68 | separated_pair(alphanumeric1, tag(":"), alphanumeric1), 69 | tag(","), 70 | ), 71 | ); 72 | 73 | let res = nom_it 74 | .by_ref() 75 | .map(|(k, v)| (k.to_uppercase(), v)) 76 | .collect::>(); 77 | 78 | let parser_result: IResult<_, _> = nom_it.finish(); 79 | let (remaining_input, ()) = parser_result.unwrap(); 80 | 81 | // will print "iterator returned {"key1": "value1", "key3": "value3", "key2": "value2"}, remaining input is ';'" 82 | println!( 83 | "iterator returned {:?}, remaining input is '{}'", 84 | res, remaining_input 85 | ); 86 | } 87 | -------------------------------------------------------------------------------- /examples/json2.rs: -------------------------------------------------------------------------------- 1 | //#[global_allocator] 2 | //static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; 3 | 4 | use nom::{ 5 | branch::alt, 6 | bytes::{tag, take}, 7 | character::{anychar, char, multispace0, none_of}, 8 | combinator::{map, map_opt, map_res, value, verify}, 9 | error::{Error, ParseError}, 10 | multi::{fold, separated_list0}, 11 | number::double, 12 | sequence::{delimited, preceded, separated_pair}, 13 | Complete, Emit, Mode, OutputM, Parser, 14 | }; 15 | 16 | use std::collections::HashMap; 17 | 18 | #[derive(Debug, PartialEq, Clone)] 19 | pub enum JsonValue { 20 | Null, 21 | Bool(bool), 22 | Str(String), 23 | Num(f64), 24 | Array(Vec), 25 | Object(HashMap), 26 | } 27 | 28 | fn boolean<'a>() -> impl Parser<&'a str, Output = bool, Error = Error<&'a str>> { 29 | alt((value(false, tag("false")), value(true, tag("true")))) 30 | } 31 | 32 | fn u16_hex<'a>() -> impl Parser<&'a str, Output = u16, Error = Error<&'a str>> { 33 | map_res(take(4usize), |s| u16::from_str_radix(s, 16)) 34 | } 35 | 36 | fn unicode_escape<'a>() -> impl Parser<&'a str, Output = char, Error = Error<&'a str>> { 37 | map_opt( 38 | alt(( 39 | // Not a surrogate 40 | map( 41 | verify(u16_hex(), |cp| !(0xD800..0xE000).contains(cp)), 42 | |cp| cp as u32, 43 | ), 44 | // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details 45 | map( 46 | verify( 47 | separated_pair(u16_hex(), tag("\\u"), u16_hex()), 48 | |(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low), 49 | ), 50 | |(high, low)| { 51 | let high_ten = (high as u32) - 0xD800; 52 | let low_ten = (low as u32) - 0xDC00; 53 | (high_ten << 10) + low_ten + 0x10000 54 | }, 55 | ), 56 | )), 57 | // Could probably be replaced with .unwrap() or _unchecked due to the verify checks 58 | std::char::from_u32, 59 | ) 60 | } 61 | 62 | fn character<'a>() -> impl Parser<&'a str, Output = char, Error = Error<&'a str>> { 63 | Character 64 | /*let (input, c) = none_of("\"")(input)?; 65 | if c == '\\' { 66 | alt(( 67 | map_res(anychar, |c| { 68 | Ok(match c { 69 | '"' | '\\' | '/' => c, 70 | 'b' => '\x08', 71 | 'f' => '\x0C', 72 | 'n' => '\n', 73 | 'r' => '\r', 74 | 't' => '\t', 75 | _ => return Err(()), 76 | }) 77 | }), 78 | preceded(char('u'), unicode_escape()), 79 | )) 80 | .parse(input) 81 | } else { 82 | Ok((input, c)) 83 | }*/ 84 | } 85 | 86 | struct Character; 87 | 88 | impl<'a> Parser<&'a str> for Character { 89 | type Output = char; 90 | 91 | type Error = Error<&'a str>; 92 | 93 | fn process( 94 | &mut self, 95 | input: &'a str, 96 | ) -> nom::PResult { 97 | let (input, c): (&str, char) = 98 | none_of("\"").process::>(input)?; 99 | if c == '\\' { 100 | alt(( 101 | map_res(anychar, |c| { 102 | Ok(match c { 103 | '"' | '\\' | '/' => c, 104 | 'b' => '\x08', 105 | 'f' => '\x0C', 106 | 'n' => '\n', 107 | 'r' => '\r', 108 | 't' => '\t', 109 | _ => return Err(()), 110 | }) 111 | }), 112 | preceded(char('u'), unicode_escape()), 113 | )) 114 | .process::(input) 115 | } else { 116 | Ok((input, OM::Output::bind(|| c))) 117 | } 118 | } 119 | } 120 | 121 | fn string<'a>() -> impl Parser<&'a str, Output = String, Error = Error<&'a str>> { 122 | delimited( 123 | char('"'), 124 | fold(0.., character(), String::new, |mut string, c| { 125 | string.push(c); 126 | string 127 | }), 128 | char('"'), 129 | ) 130 | } 131 | 132 | fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, Output = O, Error = E>>( 133 | f: F, 134 | ) -> impl Parser<&'a str, Output = O, Error = E> { 135 | delimited(multispace0(), f, multispace0()) 136 | } 137 | 138 | fn array<'a>() -> impl Parser<&'a str, Output = Vec, Error = Error<&'a str>> { 139 | delimited( 140 | char('['), 141 | ws(separated_list0(ws(char(',')), json_value())), 142 | char(']'), 143 | ) 144 | } 145 | 146 | fn object<'a>() -> impl Parser<&'a str, Output = HashMap, Error = Error<&'a str>> 147 | { 148 | map( 149 | delimited( 150 | char('{'), 151 | ws(separated_list0( 152 | ws(char(',')), 153 | separated_pair(string(), ws(char(':')), json_value()), 154 | )), 155 | char('}'), 156 | ), 157 | |key_values| key_values.into_iter().collect(), 158 | ) 159 | } 160 | 161 | fn json_value() -> JsonParser { 162 | JsonParser 163 | } 164 | 165 | struct JsonParser; 166 | 167 | // the main Parser implementation is done explicitely on a real type, 168 | // because haaving json_value return `impl Parser` would result in 169 | // "recursive opaque type" errors 170 | impl<'a> Parser<&'a str> for JsonParser { 171 | type Output = JsonValue; 172 | type Error = Error<&'a str>; 173 | 174 | fn process( 175 | &mut self, 176 | input: &'a str, 177 | ) -> nom::PResult { 178 | use JsonValue::*; 179 | 180 | let mut parser = alt(( 181 | value(Null, tag("null")), 182 | map(boolean(), Bool), 183 | map(string(), Str), 184 | map(double(), Num), 185 | map(array(), Array), 186 | map(object(), Object), 187 | )); 188 | 189 | parser.process::(input) 190 | } 191 | } 192 | 193 | fn json<'a>() -> impl Parser<&'a str, Output = JsonValue, Error = Error<&'a str>> { 194 | ws(json_value()) 195 | } 196 | 197 | fn main() { 198 | let data = include_str!("../benchmarks/canada.json"); 199 | 200 | loop { 201 | let _a = json() 202 | .process::>(data) 203 | .unwrap(); 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /examples/string.rs: -------------------------------------------------------------------------------- 1 | //! This example shows an example of how to parse an escaped string. The 2 | //! rules for the string are similar to JSON and rust. A string is: 3 | //! 4 | //! - Enclosed by double quotes 5 | //! - Can contain any raw unescaped code point besides \ and " 6 | //! - Matches the following escape sequences: \b, \f, \n, \r, \t, \", \\, \/ 7 | //! - Matches code points like Rust: \u{XXXX}, where XXXX can be up to 6 8 | //! hex characters 9 | //! - an escape followed by whitespace consumes all whitespace between the 10 | //! escape and the next non-whitespace character 11 | 12 | #![cfg(feature = "alloc")] 13 | 14 | use nom::branch::alt; 15 | use nom::bytes::streaming::{is_not, take_while_m_n}; 16 | use nom::character::streaming::{char, multispace1}; 17 | use nom::combinator::{map, map_opt, map_res, value, verify}; 18 | use nom::error::{FromExternalError, ParseError}; 19 | use nom::multi::fold; 20 | use nom::sequence::{delimited, preceded}; 21 | use nom::{IResult, Parser}; 22 | 23 | // parser combinators are constructed from the bottom up: 24 | // first we write parsers for the smallest elements (escaped characters), 25 | // then combine them into larger parsers. 26 | 27 | /// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6 28 | /// hexadecimal numerals. We will combine this later with parse_escaped_char 29 | /// to parse sequences like \u{00AC}. 30 | fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E> 31 | where 32 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, 33 | { 34 | // `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match 35 | // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals. 36 | let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()); 37 | 38 | // `preceded` takes a prefix parser, and if it succeeds, returns the result 39 | // of the body parser. In this case, it parses u{XXXX}. 40 | let parse_delimited_hex = preceded( 41 | char('u'), 42 | // `delimited` is like `preceded`, but it parses both a prefix and a suffix. 43 | // It returns the result of the middle parser. In this case, it parses 44 | // {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX 45 | delimited(char('{'), parse_hex, char('}')), 46 | ); 47 | 48 | // `map_res` takes the result of a parser and applies a function that returns 49 | // a Result. In this case we take the hex bytes from parse_hex and attempt to 50 | // convert them to a u32. 51 | let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16)); 52 | 53 | // map_opt is like map_res, but it takes an Option instead of a Result. If 54 | // the function returns None, map_opt returns an error. In this case, because 55 | // not all u32 values are valid unicode code points, we have to fallibly 56 | // convert to char with from_u32. 57 | map_opt(parse_u32, std::char::from_u32).parse(input) 58 | } 59 | 60 | /// Parse an escaped character: \n, \t, \r, \u{00AC}, etc. 61 | fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E> 62 | where 63 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, 64 | { 65 | preceded( 66 | char('\\'), 67 | // `alt` tries each parser in sequence, returning the result of 68 | // the first successful match 69 | alt(( 70 | parse_unicode, 71 | // The `value` parser returns a fixed value (the first argument) if its 72 | // parser (the second argument) succeeds. In these cases, it looks for 73 | // the marker characters (n, r, t, etc) and returns the matching 74 | // character (\n, \r, \t, etc). 75 | value('\n', char('n')), 76 | value('\r', char('r')), 77 | value('\t', char('t')), 78 | value('\u{08}', char('b')), 79 | value('\u{0C}', char('f')), 80 | value('\\', char('\\')), 81 | value('/', char('/')), 82 | value('"', char('"')), 83 | )), 84 | ) 85 | .parse(input) 86 | } 87 | 88 | /// Parse a backslash, followed by any amount of whitespace. This is used later 89 | /// to discard any escaped whitespace. 90 | fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>( 91 | input: &'a str, 92 | ) -> IResult<&'a str, &'a str, E> { 93 | preceded(char('\\'), multispace1).parse(input) 94 | } 95 | 96 | /// Parse a non-empty block of text that doesn't include \ or " 97 | fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> { 98 | // `is_not` parses a string of 0 or more characters that aren't one of the 99 | // given characters. 100 | let not_quote_slash = is_not("\"\\"); 101 | 102 | // `verify` runs a parser, then runs a verification function on the output of 103 | // the parser. The verification function accepts out output only if it 104 | // returns true. In this case, we want to ensure that the output of is_not 105 | // is non-empty. 106 | verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input) 107 | } 108 | 109 | /// A string fragment contains a fragment of a string being parsed: either 110 | /// a non-empty Literal (a series of non-escaped characters), a single 111 | /// parsed escaped character, or a block of escaped whitespace. 112 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 113 | enum StringFragment<'a> { 114 | Literal(&'a str), 115 | EscapedChar(char), 116 | EscapedWS, 117 | } 118 | 119 | /// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char 120 | /// into a StringFragment. 121 | fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E> 122 | where 123 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, 124 | { 125 | alt(( 126 | // The `map` combinator runs a parser, then applies a function to the output 127 | // of that parser. 128 | map(parse_literal, StringFragment::Literal), 129 | map(parse_escaped_char, StringFragment::EscapedChar), 130 | value(StringFragment::EscapedWS, parse_escaped_whitespace), 131 | )) 132 | .parse(input) 133 | } 134 | 135 | /// Parse a string. Use a loop of parse_fragment and push all of the fragments 136 | /// into an output string. 137 | fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E> 138 | where 139 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, 140 | { 141 | // fold is the equivalent of iterator::fold. It runs a parser in a loop, 142 | // and for each output value, calls a folding function on each output value. 143 | let build_string = fold( 144 | 0.., 145 | // Our parser function – parses a single string fragment 146 | parse_fragment, 147 | // Our init value, an empty string 148 | String::new, 149 | // Our folding function. For each fragment, append the fragment to the 150 | // string. 151 | |mut string, fragment| { 152 | match fragment { 153 | StringFragment::Literal(s) => string.push_str(s), 154 | StringFragment::EscapedChar(c) => string.push(c), 155 | StringFragment::EscapedWS => {} 156 | } 157 | string 158 | }, 159 | ); 160 | 161 | // Finally, parse the string. Note that, if `build_string` could accept a raw 162 | // " character, the closing delimiter " would never match. When using 163 | // `delimited` with a looping parser (like fold), be sure that the 164 | // loop won't accidentally match your closing delimiter! 165 | delimited(char('"'), build_string, char('"')).parse(input) 166 | } 167 | 168 | fn main() { 169 | let data = "\"abc\""; 170 | println!("EXAMPLE 1:\nParsing a simple input string: {}", data); 171 | let result = parse_string::<()>(data); 172 | assert_eq!(result, Ok(("", String::from("abc")))); 173 | println!("Result: {}\n\n", result.unwrap().1); 174 | 175 | let data = "\"tab:\\tafter tab, newline:\\nnew line, quote: \\\", emoji: \\u{1F602}, newline:\\nescaped whitespace: \\ abc\""; 176 | println!( 177 | "EXAMPLE 2:\nParsing a string with escape sequences, newline literal, and escaped whitespace:\n\n{}\n", 178 | data 179 | ); 180 | let result = parse_string::<()>(data); 181 | assert_eq!( 182 | result, 183 | Ok(( 184 | "", 185 | String::from("tab:\tafter tab, newline:\nnew line, quote: \", emoji: 😂, newline:\nescaped whitespace: abc") 186 | )) 187 | ); 188 | println!("Result:\n\n{}", result.unwrap().1); 189 | } 190 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | artifacts 2 | corpus 3 | target 4 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "nom-fuzz" 4 | version = "0.0.0" 5 | authors = ["David Korczynski "] 6 | publish = false 7 | edition = "2018" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.4.0" 14 | 15 | [dependencies.nom] 16 | path = ".." 17 | 18 | # Prevent this from interfering with workspaces 19 | [workspace] 20 | members = ["."] 21 | 22 | [[bin]] 23 | name = "fuzz_arithmetic" 24 | path = "fuzz_targets/fuzz_arithmetic.rs" 25 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_arithmetic.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | use std::str; 4 | 5 | extern crate nom; 6 | 7 | use nom::{ 8 | branch::alt, 9 | bytes::complete::tag, 10 | character::complete::char, 11 | character::complete::{digit1 as digit, space0 as space}, 12 | combinator::{map, map_res, verify}, 13 | multi::fold_many0, 14 | sequence::{delimited, pair, terminated}, 15 | IResult, Parser 16 | }; 17 | 18 | use std::cell::RefCell; 19 | use std::str::FromStr; 20 | 21 | thread_local! { 22 | pub static LEVEL: RefCell = RefCell::new(0); 23 | } 24 | 25 | fn reset() { 26 | LEVEL.with(|l| { 27 | *l.borrow_mut() = 0; 28 | }); 29 | } 30 | 31 | fn incr(i: &str) -> IResult<&str, ()> { 32 | LEVEL.with(|l| { 33 | *l.borrow_mut() += 1; 34 | 35 | // limit the number of recursions, the fuzzer keeps running into them 36 | if *l.borrow() >= 8192 { 37 | return Err(nom::Err::Failure(nom::error::Error::new( 38 | i, 39 | nom::error::ErrorKind::Count, 40 | ))); 41 | } else { 42 | Ok((i, ())) 43 | } 44 | }) 45 | } 46 | 47 | fn decr() { 48 | LEVEL.with(|l| { 49 | *l.borrow_mut() -= 1; 50 | }); 51 | } 52 | 53 | fn parens(i: &str) -> IResult<&str, i64> { 54 | delimited( 55 | space, 56 | delimited(terminated(tag("("), incr), expr, map(tag(")"), |_| decr())), 57 | space, 58 | ).parse(i) 59 | } 60 | 61 | fn factor(i: &str) -> IResult<&str, i64> { 62 | alt(( 63 | map_res(delimited(space, digit, space), FromStr::from_str), 64 | parens, 65 | )).parse(i) 66 | } 67 | 68 | fn term(i: &str) -> IResult<&str, i64> { 69 | incr(i)?; 70 | let (i, init) = factor(i).map_err(|e| { 71 | decr(); 72 | e 73 | })?; 74 | 75 | let res = fold_many0( 76 | alt(( 77 | pair(char('*'), factor), 78 | pair(char('/'), verify(factor, |i| *i != 0)), 79 | )), 80 | || init, 81 | |acc, (op, val): (char, i64)| { 82 | if op == '*' { 83 | acc.saturating_mul(val) 84 | } else { 85 | match acc.checked_div(val) { 86 | Some(v) => v, 87 | // we get a division with overflow because we can get acc = i64::MIN and val = -1 88 | // the division by zero is already checked earlier by verify 89 | None => i64::MAX, 90 | } 91 | } 92 | }, 93 | ).parse(i); 94 | 95 | decr(); 96 | res 97 | } 98 | 99 | fn expr(i: &str) -> IResult<&str, i64> { 100 | incr(i)?; 101 | let (i, init) = term(i).map_err(|e| { 102 | decr(); 103 | e 104 | })?; 105 | 106 | let res = fold_many0( 107 | pair(alt((char('+'), char('-'))), term), 108 | || init, 109 | |acc, (op, val): (char, i64)| { 110 | if op == '+' { 111 | acc.saturating_add(val) 112 | } else { 113 | acc.saturating_sub(val) 114 | } 115 | }, 116 | ).parse(i); 117 | 118 | decr(); 119 | res 120 | } 121 | 122 | fuzz_target!(|data: &[u8]| { 123 | reset(); 124 | // fuzzed code goes here 125 | let _ = match str::from_utf8(data) { 126 | Ok(v) => { 127 | //println!("v: {}", v); 128 | factor(v) 129 | } 130 | Err(_) => factor("2"), 131 | }; 132 | }); 133 | -------------------------------------------------------------------------------- /nom-language/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nom-language" 3 | version = "0.1.0" 4 | authors = ["contact@geoffroycouprie.com"] 5 | description = "Language parsing focused combinators for the nom parser library" 6 | edition = "2021" 7 | license = "MIT" 8 | repository = "https://github.com/rust-bakery/nom" 9 | 10 | [dependencies] 11 | nom = { path = "..", version = "8.0.0" } -------------------------------------------------------------------------------- /nom-language/LICENSE: -------------------------------------------------------------------------------- 1 | ../LICENSE -------------------------------------------------------------------------------- /nom-language/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use nom::{ 4 | error::{ContextError, ErrorKind, FromExternalError, ParseError}, 5 | ErrorConvert, 6 | }; 7 | 8 | /// This error type accumulates errors and their position when backtracking 9 | /// through a parse tree. With some post processing, 10 | /// it can be used to display user friendly error messages 11 | #[derive(Clone, Debug, Eq, PartialEq)] 12 | pub struct VerboseError { 13 | /// List of errors accumulated by `VerboseError`, containing the affected 14 | /// part of input data, and some context 15 | pub errors: Vec<(I, VerboseErrorKind)>, 16 | } 17 | 18 | #[derive(Clone, Debug, Eq, PartialEq)] 19 | /// Error context for `VerboseError` 20 | pub enum VerboseErrorKind { 21 | /// Static string added by the `context` function 22 | Context(&'static str), 23 | /// Indicates which character was expected by the `char` function 24 | Char(char), 25 | /// Error kind given by various nom parsers 26 | Nom(ErrorKind), 27 | } 28 | 29 | impl ParseError for VerboseError { 30 | fn from_error_kind(input: I, kind: ErrorKind) -> Self { 31 | VerboseError { 32 | errors: vec![(input, VerboseErrorKind::Nom(kind))], 33 | } 34 | } 35 | 36 | fn append(input: I, kind: ErrorKind, mut other: Self) -> Self { 37 | other.errors.push((input, VerboseErrorKind::Nom(kind))); 38 | other 39 | } 40 | 41 | fn from_char(input: I, c: char) -> Self { 42 | VerboseError { 43 | errors: vec![(input, VerboseErrorKind::Char(c))], 44 | } 45 | } 46 | } 47 | 48 | impl ContextError for VerboseError { 49 | fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self { 50 | other.errors.push((input, VerboseErrorKind::Context(ctx))); 51 | other 52 | } 53 | } 54 | 55 | impl FromExternalError for VerboseError { 56 | /// Create a new error from an input position and an external error 57 | fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { 58 | Self::from_error_kind(input, kind) 59 | } 60 | } 61 | 62 | impl fmt::Display for VerboseError { 63 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 64 | writeln!(f, "Parse error:")?; 65 | for (input, error) in &self.errors { 66 | match error { 67 | VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?, 68 | VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?, 69 | VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?, 70 | } 71 | } 72 | 73 | Ok(()) 74 | } 75 | } 76 | 77 | impl std::error::Error for VerboseError {} 78 | 79 | impl From> for VerboseError> { 80 | fn from(value: VerboseError<&[u8]>) -> Self { 81 | VerboseError { 82 | errors: value 83 | .errors 84 | .into_iter() 85 | .map(|(i, e)| (i.to_owned(), e)) 86 | .collect(), 87 | } 88 | } 89 | } 90 | 91 | impl From> for VerboseError { 92 | fn from(value: VerboseError<&str>) -> Self { 93 | VerboseError { 94 | errors: value 95 | .errors 96 | .into_iter() 97 | .map(|(i, e)| (i.to_owned(), e)) 98 | .collect(), 99 | } 100 | } 101 | } 102 | 103 | impl ErrorConvert> for VerboseError<(I, usize)> { 104 | fn convert(self) -> VerboseError { 105 | VerboseError { 106 | errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(), 107 | } 108 | } 109 | } 110 | 111 | impl ErrorConvert> for VerboseError { 112 | fn convert(self) -> VerboseError<(I, usize)> { 113 | VerboseError { 114 | errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(), 115 | } 116 | } 117 | } 118 | 119 | /// Transforms a `VerboseError` into a trace with input position information 120 | /// 121 | /// The errors contain references to input data that must come from `input`, 122 | /// because nom calculates byte offsets between them 123 | pub fn convert_error>(input: I, e: VerboseError) -> String { 124 | use nom::Offset; 125 | use std::fmt::Write; 126 | 127 | let mut result = String::new(); 128 | 129 | for (i, (substring, kind)) in e.errors.iter().enumerate() { 130 | let offset = input.offset(substring); 131 | 132 | if input.is_empty() { 133 | match kind { 134 | VerboseErrorKind::Char(c) => { 135 | write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c) 136 | } 137 | VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s), 138 | VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e), 139 | } 140 | } else { 141 | let prefix = &input.as_bytes()[..offset]; 142 | 143 | // Count the number of newlines in the first `offset` bytes of input 144 | let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1; 145 | 146 | // Find the line that includes the subslice: 147 | // Find the *last* newline before the substring starts 148 | let line_begin = prefix 149 | .iter() 150 | .rev() 151 | .position(|&b| b == b'\n') 152 | .map(|pos| offset - pos) 153 | .unwrap_or(0); 154 | 155 | // Find the full line after that newline 156 | let line = input[line_begin..] 157 | .lines() 158 | .next() 159 | .unwrap_or(&input[line_begin..]) 160 | .trim_end(); 161 | 162 | // The (1-indexed) column number is the offset of our substring into that line 163 | let column_number = line.offset(substring) + 1; 164 | 165 | match kind { 166 | VerboseErrorKind::Char(c) => { 167 | if let Some(actual) = substring.chars().next() { 168 | write!( 169 | &mut result, 170 | "{i}: at line {line_number}:\n\ 171 | {line}\n\ 172 | {caret:>column$}\n\ 173 | expected '{expected}', found {actual}\n\n", 174 | i = i, 175 | line_number = line_number, 176 | line = line, 177 | caret = '^', 178 | column = column_number, 179 | expected = c, 180 | actual = actual, 181 | ) 182 | } else { 183 | write!( 184 | &mut result, 185 | "{i}: at line {line_number}:\n\ 186 | {line}\n\ 187 | {caret:>column$}\n\ 188 | expected '{expected}', got end of input\n\n", 189 | i = i, 190 | line_number = line_number, 191 | line = line, 192 | caret = '^', 193 | column = column_number, 194 | expected = c, 195 | ) 196 | } 197 | } 198 | VerboseErrorKind::Context(s) => write!( 199 | &mut result, 200 | "{i}: at line {line_number}, in {context}:\n\ 201 | {line}\n\ 202 | {caret:>column$}\n\n", 203 | i = i, 204 | line_number = line_number, 205 | context = s, 206 | line = line, 207 | caret = '^', 208 | column = column_number, 209 | ), 210 | VerboseErrorKind::Nom(e) => write!( 211 | &mut result, 212 | "{i}: at line {line_number}, in {nom_err:?}:\n\ 213 | {line}\n\ 214 | {caret:>column$}\n\n", 215 | i = i, 216 | line_number = line_number, 217 | nom_err = e, 218 | line = line, 219 | caret = '^', 220 | column = column_number, 221 | ), 222 | } 223 | } 224 | // Because `write!` to a `String` is infallible, this `unwrap` is fine. 225 | .unwrap(); 226 | } 227 | 228 | result 229 | } 230 | 231 | #[test] 232 | fn convert_error_panic() { 233 | use nom::character::complete::char; 234 | use nom::IResult; 235 | 236 | let input = ""; 237 | 238 | let _result: IResult<_, _, VerboseError<&str>> = char('x')(input); 239 | } 240 | 241 | #[test] 242 | fn issue_1027_convert_error_panic_nonempty() { 243 | use nom::character::complete::char; 244 | use nom::sequence::pair; 245 | use nom::Err; 246 | use nom::IResult; 247 | use nom::Parser; 248 | 249 | let input = "a"; 250 | 251 | let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input); 252 | let err = match result.unwrap_err() { 253 | Err::Error(e) => e, 254 | _ => unreachable!(), 255 | }; 256 | 257 | let msg = convert_error(input, err); 258 | assert_eq!( 259 | msg, 260 | "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n" 261 | ); 262 | } 263 | -------------------------------------------------------------------------------- /nom-language/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Langage parsing combinators for the nom parser combinators library 2 | //! 3 | //! nom is a parser combinator library with a focus on safe parsing, 4 | //! streaming patterns, and zero copy. 5 | //! While nom provides general purpose combinators, this crate is targeted 6 | //! at language parsing. 7 | 8 | pub mod error; 9 | pub mod precedence; 10 | -------------------------------------------------------------------------------- /nom-language/src/precedence/tests.rs: -------------------------------------------------------------------------------- 1 | use crate::precedence::{binary_op, unary_op, Assoc, Operation}; 2 | use nom::{ 3 | branch::alt, 4 | bytes::complete::tag, 5 | character::complete::digit1, 6 | combinator::{fail, map_res}, 7 | error::ErrorKind, 8 | error_node_position, error_position, 9 | sequence::delimited, 10 | Err, IResult, 11 | }; 12 | 13 | use crate::precedence::precedence; 14 | 15 | fn parser(i: &str) -> IResult<&str, i64> { 16 | precedence( 17 | unary_op(1, tag("-")), 18 | fail(), 19 | alt(( 20 | binary_op(2, Assoc::Left, tag("*")), 21 | binary_op(2, Assoc::Left, tag("/")), 22 | binary_op(3, Assoc::Left, tag("+")), 23 | binary_op(3, Assoc::Left, tag("-")), 24 | )), 25 | alt(( 26 | map_res(digit1, |s: &str| s.parse::()), 27 | delimited(tag("("), parser, tag(")")), 28 | )), 29 | |op: Operation<&str, (), &str, i64>| { 30 | use crate::precedence::Operation::*; 31 | match op { 32 | Prefix("-", o) => Ok(-o), 33 | Binary(lhs, "*", rhs) => Ok(lhs * rhs), 34 | Binary(lhs, "/", rhs) => Ok(lhs / rhs), 35 | Binary(lhs, "+", rhs) => Ok(lhs + rhs), 36 | Binary(lhs, "-", rhs) => Ok(lhs - rhs), 37 | _ => Err("Invalid combination"), 38 | } 39 | }, 40 | )(i) 41 | } 42 | 43 | #[test] 44 | fn precedence_test() { 45 | assert_eq!(parser("3"), Ok(("", 3))); 46 | assert_eq!(parser("-3"), Ok(("", -3))); 47 | assert_eq!(parser("4-(2*2)"), Ok(("", 0))); 48 | assert_eq!(parser("4-2*2"), Ok(("", 0))); 49 | assert_eq!(parser("(4-2)*2"), Ok(("", 4))); 50 | assert_eq!(parser("2*2/1"), Ok(("", 4))); 51 | 52 | let a = "a"; 53 | 54 | assert_eq!( 55 | parser(a), 56 | Err(Err::Error(error_node_position!( 57 | &a[..], 58 | ErrorKind::Precedence, 59 | error_position!(&a[..], ErrorKind::Tag) 60 | ))) 61 | ); 62 | 63 | let b = "3+b"; 64 | 65 | assert_eq!( 66 | parser(b), 67 | Err(Err::Error(error_node_position!( 68 | &b[2..], 69 | ErrorKind::Precedence, 70 | error_position!(&b[2..], ErrorKind::Tag) 71 | ))) 72 | ); 73 | } 74 | -------------------------------------------------------------------------------- /proptest-regressions/character/complete.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc cc9654fa1abddf4d6045e4c4977fea390903ee6e6469630b0bb17fdf69219b6d # shrinks to s = "𑵧" 8 | cc 7dcadb118055527708beb3c5eadd3e14202a8f70e019004c33e9696853691827 # shrinks to s = "" 9 | cc e8af68daccf860a49177b5aab0dfeecea24c7530fec6c88469ca0f820188c6b1 # shrinks to s = "-" 10 | cc c98c899dcd0a9359ddbf246e3a1edddb349e6dd7e1d166637e551e4dcf570db6 # shrinks to s = "+0" 11 | -------------------------------------------------------------------------------- /proptest-regressions/character/streaming.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc 82a575ed1f031825e7474bff3702d0c42017471b5ac845bdbdc00c1534dbc4cb # shrinks to s = "" 8 | cc 155f8f4b052941ba58b8b90a5f8fa7da78c04c1a451083a4a89a348c86226904 # shrinks to s = "0" 9 | cc c35a5a751223822dd0a94416d5ca3cc53b4a61cdc4f9422251bc2c72712ed844 # shrinks to s = "-0" 10 | cc 478373182b684c42ce3746ea62d57a35a9c764ef75943e0bb1dc08f88b295581 # shrinks to s = "- " 11 | -------------------------------------------------------------------------------- /proptest-regressions/number/complete.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc b4267e69b3f62d9bfbc8b9a11d9250a4cc78a2f329841190fd2740b1b3e236d6 # shrinks to s = "" 8 | cc cf51966684042789e64f4b99449551cb227309f9db61f0fdd4266b0b7b572ce4 # shrinks to s = "0" 9 | cc fc0d8df9f3a0ea46ec05ff021be24244fe2533c4b5d75e74a78191148e2d07bb # shrinks to s = "0" 10 | cc 15e795e3c045df60a2fa871336f9bf43ca9036aeda19e8e440b956866b031a65 # shrinks to s = "0e" 11 | cc 20b201c32f3f8314cf32133c3e6a58dd1e4409ae883f7910efa1147ba7c73b6c # shrinks to s = "e" 12 | cc 47f9c093d94bc952a3593a79adc2cafa75c9cca51bee8a77150cfaeb89acbaf7 # shrinks to s = "01" 13 | cc 9d65816e63ee5da410b64aeb5f7d44dbfa7d0773c053380e05bff0beb1bc8d92 # shrinks to s = ".0" 14 | -------------------------------------------------------------------------------- /proptest-regressions/number/streaming.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc 68154e0c90b20374781d3e3932bddb80e8c6a97901d0331bbd7e6daa75b794cb # shrinks to s = "0e" 8 | cc d31506b74ad24a80485adb176039e2fa82cf58798738288a2c810952c68d7600 # shrinks to s = "inf" 9 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 2 2 | max_width = 100 3 | -------------------------------------------------------------------------------- /src/bits/complete.rs: -------------------------------------------------------------------------------- 1 | //! Bit level parsers 2 | //! 3 | 4 | use crate::error::{ErrorKind, ParseError}; 5 | use crate::internal::{Err, IResult}; 6 | use crate::lib::std::ops::{AddAssign, Div, Shl, Shr}; 7 | use crate::traits::{Input, ToUsize}; 8 | 9 | /// Generates a parser taking `count` bits 10 | /// 11 | /// # Example 12 | /// ```rust 13 | /// # use nom::bits::complete::take; 14 | /// # use nom::IResult; 15 | /// # use nom::error::{Error, ErrorKind}; 16 | /// // Input is a tuple of (input: I, bit_offset: usize) 17 | /// fn parser(input: (&[u8], usize), count: usize)-> IResult<(&[u8], usize), u8> { 18 | /// take(count)(input) 19 | /// } 20 | /// 21 | /// // Consumes 0 bits, returns 0 22 | /// assert_eq!(parser(([0b00010010].as_ref(), 0), 0), Ok((([0b00010010].as_ref(), 0), 0))); 23 | /// 24 | /// // Consumes 4 bits, returns their values and increase offset to 4 25 | /// assert_eq!(parser(([0b00010010].as_ref(), 0), 4), Ok((([0b00010010].as_ref(), 4), 0b00000001))); 26 | /// 27 | /// // Consumes 4 bits, offset is 4, returns their values and increase offset to 0 of next byte 28 | /// assert_eq!(parser(([0b00010010].as_ref(), 4), 4), Ok((([].as_ref(), 0), 0b00000010))); 29 | /// 30 | /// // Tries to consume 12 bits but only 8 are available 31 | /// assert_eq!(parser(([0b00010010].as_ref(), 0), 12), Err(nom::Err::Error(Error{input: ([0b00010010].as_ref(), 0), code: ErrorKind::Eof }))); 32 | /// ``` 33 | pub fn take>( 34 | count: C, 35 | ) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> 36 | where 37 | I: Input, 38 | C: ToUsize, 39 | O: From + AddAssign + Shl + Shr, 40 | { 41 | let count = count.to_usize(); 42 | move |(input, bit_offset): (I, usize)| { 43 | if count == 0 { 44 | Ok(((input, bit_offset), 0u8.into())) 45 | } else if input.input_len() * 8 < count + bit_offset { 46 | Err(Err::Error(E::from_error_kind( 47 | (input, bit_offset), 48 | ErrorKind::Eof, 49 | ))) 50 | } else { 51 | let cnt = (count + bit_offset).div(8); 52 | let mut acc: O = 0_u8.into(); 53 | let mut offset: usize = bit_offset; 54 | let mut remaining: usize = count; 55 | let mut end_offset: usize = 0; 56 | 57 | for byte in input.iter_elements().take(cnt + 1) { 58 | if remaining == 0 { 59 | break; 60 | } 61 | let val: O = if offset == 0 { 62 | byte.into() 63 | } else { 64 | ((byte << offset) >> offset).into() 65 | }; 66 | 67 | if remaining < 8 - offset { 68 | acc += val >> (8 - offset - remaining); 69 | end_offset = remaining + offset; 70 | break; 71 | } else { 72 | acc += val << (remaining - (8 - offset)); 73 | remaining -= 8 - offset; 74 | offset = 0; 75 | } 76 | } 77 | Ok(((input.take_from(cnt), end_offset), acc)) 78 | } 79 | } 80 | } 81 | 82 | /// Generates a parser taking `count` bits and comparing them to `pattern` 83 | pub fn tag>( 84 | pattern: O, 85 | count: C, 86 | ) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> 87 | where 88 | I: Input + Clone, 89 | C: ToUsize, 90 | O: From + AddAssign + Shl + Shr + PartialEq, 91 | { 92 | let count = count.to_usize(); 93 | move |input: (I, usize)| { 94 | let inp = input.clone(); 95 | 96 | take(count)(input).and_then(|(i, o)| { 97 | if pattern == o { 98 | Ok((i, o)) 99 | } else { 100 | Err(Err::Error(error_position!(inp, ErrorKind::TagBits))) 101 | } 102 | }) 103 | } 104 | } 105 | 106 | /// Parses one specific bit as a bool. 107 | /// 108 | /// # Example 109 | /// ```rust 110 | /// # use nom::bits::complete::bool; 111 | /// # use nom::IResult; 112 | /// # use nom::error::{Error, ErrorKind}; 113 | /// 114 | /// fn parse(input: (&[u8], usize)) -> IResult<(&[u8], usize), bool> { 115 | /// bool(input) 116 | /// } 117 | /// 118 | /// assert_eq!(parse(([0b10000000].as_ref(), 0)), Ok((([0b10000000].as_ref(), 1), true))); 119 | /// assert_eq!(parse(([0b10000000].as_ref(), 1)), Ok((([0b10000000].as_ref(), 2), false))); 120 | /// ``` 121 | pub fn bool>(input: (I, usize)) -> IResult<(I, usize), bool, E> 122 | where 123 | I: Input, 124 | { 125 | let (res, bit): (_, u32) = take(1usize)(input)?; 126 | Ok((res, bit != 0)) 127 | } 128 | 129 | #[cfg(test)] 130 | mod test { 131 | use super::*; 132 | 133 | #[test] 134 | fn test_take_0() { 135 | let input = [0b00010010].as_ref(); 136 | let count = 0usize; 137 | assert_eq!(count, 0usize); 138 | let offset = 0usize; 139 | 140 | let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset)); 141 | 142 | assert_eq!(result, Ok(((input, offset), 0))); 143 | } 144 | 145 | #[test] 146 | fn test_take_eof() { 147 | let input = [0b00010010].as_ref(); 148 | 149 | let result: crate::IResult<(&[u8], usize), usize> = take(1usize)((input, 8)); 150 | 151 | assert_eq!( 152 | result, 153 | Err(crate::Err::Error(crate::error::Error { 154 | input: (input, 8), 155 | code: ErrorKind::Eof 156 | })) 157 | ) 158 | } 159 | 160 | #[test] 161 | fn test_take_span_over_multiple_bytes() { 162 | let input = [0b00010010, 0b00110100, 0b11111111, 0b11111111].as_ref(); 163 | 164 | let result: crate::IResult<(&[u8], usize), usize> = take(24usize)((input, 4)); 165 | 166 | assert_eq!( 167 | result, 168 | Ok((([0b11111111].as_ref(), 4), 0b1000110100111111111111)) 169 | ); 170 | } 171 | 172 | #[test] 173 | fn test_bool_0() { 174 | let input = [0b10000000].as_ref(); 175 | 176 | let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); 177 | 178 | assert_eq!(result, Ok(((input, 1), true))); 179 | } 180 | 181 | #[test] 182 | fn test_bool_eof() { 183 | let input = [0b10000000].as_ref(); 184 | 185 | let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); 186 | 187 | assert_eq!( 188 | result, 189 | Err(crate::Err::Error(crate::error::Error { 190 | input: (input, 8), 191 | code: ErrorKind::Eof 192 | })) 193 | ); 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /src/bits/mod.rs: -------------------------------------------------------------------------------- 1 | //! Bit level parsers 2 | //! 3 | 4 | pub mod complete; 5 | pub mod streaming; 6 | 7 | use crate::error::{ErrorKind, ParseError}; 8 | use crate::internal::{Err, IResult, Needed, Parser}; 9 | use crate::traits::ErrorConvert; 10 | use crate::Input; 11 | 12 | /// Converts a byte-level input to a bit-level input, for consumption by a parser that uses bits. 13 | /// 14 | /// Afterwards, the input is converted back to a byte-level parser, with any remaining bits thrown 15 | /// away. 16 | /// 17 | /// # Example 18 | /// ``` 19 | /// use nom::bits::{bits, streaming::take}; 20 | /// use nom::error::Error; 21 | /// use nom::IResult; 22 | /// 23 | /// fn parse(input: &[u8]) -> IResult<&[u8], (u8, u8)> { 24 | /// bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize)))(input) 25 | /// } 26 | /// 27 | /// let input = &[0x12, 0x34, 0xff, 0xff]; 28 | /// 29 | /// let output = parse(input).expect("We take 1.5 bytes and the input is longer than 2 bytes"); 30 | /// 31 | /// // The first byte is consumed, the second byte is partially consumed and dropped. 32 | /// let remaining = output.0; 33 | /// assert_eq!(remaining, [0xff, 0xff]); 34 | /// 35 | /// let parsed = output.1; 36 | /// assert_eq!(parsed.0, 0x01); 37 | /// assert_eq!(parsed.1, 0x23); 38 | /// ``` 39 | pub fn bits(mut parser: P) -> impl FnMut(I) -> IResult 40 | where 41 | E1: ParseError<(I, usize)> + ErrorConvert, 42 | E2: ParseError, 43 | I: Input, 44 | P: Parser<(I, usize), Output = O, Error = E1>, 45 | { 46 | move |input: I| match parser.parse((input, 0)) { 47 | Ok(((rest, offset), result)) => { 48 | // If the next byte has been partially read, it will be sliced away as well. 49 | // The parser functions might already slice away all fully read bytes. 50 | // That's why `offset / 8` isn't necessarily needed at all times. 51 | let remaining_bytes_index = offset / 8 + if offset % 8 == 0 { 0 } else { 1 }; 52 | Ok((rest.take_from(remaining_bytes_index), result)) 53 | } 54 | Err(Err::Incomplete(n)) => Err(Err::Incomplete(n.map(|u| u.get() / 8 + 1))), 55 | Err(Err::Error(e)) => Err(Err::Error(e.convert())), 56 | Err(Err::Failure(e)) => Err(Err::Failure(e.convert())), 57 | } 58 | } 59 | 60 | /// Counterpart to `bits`, `bytes` transforms its bit stream input into a byte slice for the underlying 61 | /// parser, allowing byte-slice parsers to work on bit streams. 62 | /// 63 | /// A partial byte remaining in the input will be ignored and the given parser will start parsing 64 | /// at the next full byte. 65 | /// 66 | /// ``` 67 | /// use nom::bits::{bits, bytes, streaming::take}; 68 | /// use nom::combinator::rest; 69 | /// use nom::error::Error; 70 | /// use nom::IResult; 71 | /// 72 | /// fn parse(input: &[u8]) -> IResult<&[u8], (u8, u8, &[u8])> { 73 | /// bits::<_, _, Error<(&[u8], usize)>, _, _>(( 74 | /// take(4usize), 75 | /// take(8usize), 76 | /// bytes::<_, _, Error<&[u8]>, _, _>(rest) 77 | /// ))(input) 78 | /// } 79 | /// 80 | /// let input = &[0x12, 0x34, 0xff, 0xff]; 81 | /// 82 | /// assert_eq!(parse( input ), Ok(( &[][..], (0x01, 0x23, &[0xff, 0xff][..]) ))); 83 | /// ``` 84 | pub fn bytes(mut parser: P) -> impl FnMut((I, usize)) -> IResult<(I, usize), O, E2> 85 | where 86 | E1: ParseError + ErrorConvert, 87 | E2: ParseError<(I, usize)>, 88 | I: Input + Clone, 89 | P: Parser, 90 | { 91 | move |(input, offset): (I, usize)| { 92 | let inner = if offset % 8 != 0 { 93 | input.take_from(1 + offset / 8) 94 | } else { 95 | input.take_from(offset / 8) 96 | }; 97 | let i = (input, offset); 98 | match parser.parse(inner) { 99 | Ok((rest, res)) => Ok(((rest, 0), res)), 100 | Err(Err::Incomplete(Needed::Unknown)) => Err(Err::Incomplete(Needed::Unknown)), 101 | Err(Err::Incomplete(Needed::Size(sz))) => Err(match sz.get().checked_mul(8) { 102 | Some(v) => Err::Incomplete(Needed::new(v)), 103 | None => Err::Failure(E2::from_error_kind(i, ErrorKind::TooLarge)), 104 | }), 105 | Err(Err::Error(e)) => Err(Err::Error(e.convert())), 106 | Err(Err::Failure(e)) => Err(Err::Failure(e.convert())), 107 | } 108 | } 109 | } 110 | 111 | #[cfg(test)] 112 | mod test { 113 | use super::*; 114 | use crate::bits::streaming::take; 115 | use crate::error::Error; 116 | 117 | #[test] 118 | /// Take the `bits` function and assert that remaining bytes are correctly returned, if the 119 | /// previous bytes are fully consumed 120 | fn test_complete_byte_consumption_bits() { 121 | let input = &[0x12, 0x34, 0x56, 0x78]; 122 | 123 | // Take 3 bit slices with sizes [4, 8, 4]. 124 | let result: IResult<&[u8], (u8, u8, u8)> = 125 | bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize), take(4usize)))(input); 126 | 127 | let output = result.expect("We take 2 bytes and the input is longer than 2 bytes"); 128 | 129 | let remaining = output.0; 130 | assert_eq!(remaining, [0x56, 0x78]); 131 | 132 | let parsed = output.1; 133 | assert_eq!(parsed.0, 0x01); 134 | assert_eq!(parsed.1, 0x23); 135 | assert_eq!(parsed.2, 0x04); 136 | } 137 | 138 | #[test] 139 | /// Take the `bits` function and assert that remaining bytes are correctly returned, if the 140 | /// previous bytes are NOT fully consumed. Partially consumed bytes are supposed to be dropped. 141 | /// I.e. if we consume 1.5 bytes of 4 bytes, 2 bytes will be returned, bits 13-16 will be 142 | /// dropped. 143 | fn test_partial_byte_consumption_bits() { 144 | let input = &[0x12, 0x34, 0x56, 0x78]; 145 | 146 | // Take bit slices with sizes [4, 8]. 147 | let result: IResult<&[u8], (u8, u8)> = 148 | bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize)))(input); 149 | 150 | let output = result.expect("We take 1.5 bytes and the input is longer than 2 bytes"); 151 | 152 | let remaining = output.0; 153 | assert_eq!(remaining, [0x56, 0x78]); 154 | 155 | let parsed = output.1; 156 | assert_eq!(parsed.0, 0x01); 157 | assert_eq!(parsed.1, 0x23); 158 | } 159 | 160 | #[test] 161 | #[cfg(feature = "std")] 162 | /// Ensure that in Incomplete error is thrown, if too few bytes are passed for a given parser. 163 | fn test_incomplete_bits() { 164 | let input = &[0x12]; 165 | 166 | // Take bit slices with sizes [4, 8]. 167 | let result: IResult<&[u8], (u8, u8)> = 168 | bits::<_, _, Error<(&[u8], usize)>, _, _>((take(4usize), take(8usize)))(input); 169 | 170 | assert!(result.is_err()); 171 | let error = result.err().unwrap(); 172 | assert_eq!("Parsing requires 2 bytes/chars", error.to_string()); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/bits/streaming.rs: -------------------------------------------------------------------------------- 1 | //! Bit level parsers 2 | //! 3 | 4 | use crate::error::{ErrorKind, ParseError}; 5 | use crate::internal::{Err, IResult, Needed}; 6 | use crate::lib::std::ops::{AddAssign, Div, Shl, Shr}; 7 | use crate::traits::{Input, ToUsize}; 8 | 9 | /// Generates a parser taking `count` bits 10 | pub fn take>( 11 | count: C, 12 | ) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> 13 | where 14 | I: Input, 15 | C: ToUsize, 16 | O: From + AddAssign + Shl + Shr, 17 | { 18 | let count = count.to_usize(); 19 | move |(input, bit_offset): (I, usize)| { 20 | if count == 0 { 21 | Ok(((input, bit_offset), 0u8.into())) 22 | } else { 23 | let cnt = (count + bit_offset).div(8); 24 | if input.input_len() * 8 < count + bit_offset { 25 | Err(Err::Incomplete(Needed::new(count))) 26 | } else { 27 | let mut acc: O = 0_u8.into(); 28 | let mut offset: usize = bit_offset; 29 | let mut remaining: usize = count; 30 | let mut end_offset: usize = 0; 31 | 32 | for byte in input.iter_elements().take(cnt + 1) { 33 | if remaining == 0 { 34 | break; 35 | } 36 | let val: O = if offset == 0 { 37 | byte.into() 38 | } else { 39 | ((byte << offset) >> offset).into() 40 | }; 41 | 42 | if remaining < 8 - offset { 43 | acc += val >> (8 - offset - remaining); 44 | end_offset = remaining + offset; 45 | break; 46 | } else { 47 | acc += val << (remaining - (8 - offset)); 48 | remaining -= 8 - offset; 49 | offset = 0; 50 | } 51 | } 52 | Ok(((input.take_from(cnt), end_offset), acc)) 53 | } 54 | } 55 | } 56 | } 57 | 58 | /// Generates a parser taking `count` bits and comparing them to `pattern` 59 | pub fn tag>( 60 | pattern: O, 61 | count: C, 62 | ) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> 63 | where 64 | I: Input + Clone, 65 | C: ToUsize, 66 | O: From + AddAssign + Shl + Shr + PartialEq, 67 | { 68 | let count = count.to_usize(); 69 | move |input: (I, usize)| { 70 | let inp = input.clone(); 71 | 72 | take(count)(input).and_then(|(i, o)| { 73 | if pattern == o { 74 | Ok((i, o)) 75 | } else { 76 | Err(Err::Error(error_position!(inp, ErrorKind::TagBits))) 77 | } 78 | }) 79 | } 80 | } 81 | 82 | /// Parses one specific bit as a bool. 83 | /// 84 | /// # Example 85 | /// ```rust 86 | /// # use nom::bits::complete::bool; 87 | /// # use nom::IResult; 88 | /// # use nom::error::{Error, ErrorKind}; 89 | /// 90 | /// fn parse(input: (&[u8], usize)) -> IResult<(&[u8], usize), bool> { 91 | /// bool(input) 92 | /// } 93 | /// 94 | /// assert_eq!(parse(([0b10000000].as_ref(), 0)), Ok((([0b10000000].as_ref(), 1), true))); 95 | /// assert_eq!(parse(([0b10000000].as_ref(), 1)), Ok((([0b10000000].as_ref(), 2), false))); 96 | /// ``` 97 | pub fn bool>(input: (I, usize)) -> IResult<(I, usize), bool, E> 98 | where 99 | I: Input, 100 | { 101 | let (res, bit): (_, u32) = take(1usize)(input)?; 102 | Ok((res, bit != 0)) 103 | } 104 | 105 | #[cfg(test)] 106 | mod test { 107 | use super::*; 108 | 109 | #[test] 110 | fn test_take_0() { 111 | let input = [].as_ref(); 112 | let count = 0usize; 113 | assert_eq!(count, 0usize); 114 | let offset = 0usize; 115 | 116 | let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset)); 117 | 118 | assert_eq!(result, Ok(((input, offset), 0))); 119 | } 120 | 121 | #[test] 122 | fn test_tag_ok() { 123 | let input = [0b00011111].as_ref(); 124 | let offset = 0usize; 125 | let bits_to_take = 4usize; 126 | let value_to_tag = 0b0001; 127 | 128 | let result: crate::IResult<(&[u8], usize), usize> = 129 | tag(value_to_tag, bits_to_take)((input, offset)); 130 | 131 | assert_eq!(result, Ok(((input, bits_to_take), value_to_tag))); 132 | } 133 | 134 | #[test] 135 | fn test_tag_err() { 136 | let input = [0b00011111].as_ref(); 137 | let offset = 0usize; 138 | let bits_to_take = 4usize; 139 | let value_to_tag = 0b1111; 140 | 141 | let result: crate::IResult<(&[u8], usize), usize> = 142 | tag(value_to_tag, bits_to_take)((input, offset)); 143 | 144 | assert_eq!( 145 | result, 146 | Err(crate::Err::Error(crate::error::Error { 147 | input: (input, offset), 148 | code: ErrorKind::TagBits 149 | })) 150 | ); 151 | } 152 | 153 | #[test] 154 | fn test_bool_0() { 155 | let input = [0b10000000].as_ref(); 156 | 157 | let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); 158 | 159 | assert_eq!(result, Ok(((input, 1), true))); 160 | } 161 | 162 | #[test] 163 | fn test_bool_eof() { 164 | let input = [0b10000000].as_ref(); 165 | 166 | let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); 167 | 168 | assert_eq!(result, Err(crate::Err::Incomplete(Needed::new(1)))); 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/branch/tests.rs: -------------------------------------------------------------------------------- 1 | use crate::branch::{alt, permutation}; 2 | use crate::bytes::streaming::tag; 3 | use crate::error::ErrorKind; 4 | use crate::internal::{Err, IResult, Needed}; 5 | use crate::Parser; 6 | #[cfg(feature = "alloc")] 7 | use crate::{ 8 | error::ParseError, 9 | lib::std::{ 10 | fmt::Debug, 11 | string::{String, ToString}, 12 | }, 13 | }; 14 | 15 | #[cfg(feature = "alloc")] 16 | #[derive(Debug, Clone, Eq, PartialEq)] 17 | pub struct ErrorStr(String); 18 | 19 | #[cfg(feature = "alloc")] 20 | impl From for ErrorStr { 21 | fn from(i: u32) -> Self { 22 | ErrorStr(format!("custom error code: {}", i)) 23 | } 24 | } 25 | 26 | #[cfg(feature = "alloc")] 27 | impl<'a> From<&'a str> for ErrorStr { 28 | fn from(i: &'a str) -> Self { 29 | ErrorStr(format!("custom error message: {}", i)) 30 | } 31 | } 32 | 33 | #[cfg(feature = "alloc")] 34 | impl ParseError for ErrorStr { 35 | fn from_error_kind(input: I, kind: ErrorKind) -> Self { 36 | ErrorStr(format!("custom error message: ({:?}, {:?})", input, kind)) 37 | } 38 | 39 | fn append(input: I, kind: ErrorKind, other: Self) -> Self { 40 | ErrorStr(format!( 41 | "custom error message: ({:?}, {:?}) - {:?}", 42 | input, kind, other 43 | )) 44 | } 45 | } 46 | 47 | #[cfg(feature = "alloc")] 48 | #[test] 49 | fn alt_test() { 50 | use crate::Parser; 51 | 52 | fn work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { 53 | Ok((&b""[..], input)) 54 | } 55 | 56 | #[allow(unused_variables)] 57 | fn dont_work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { 58 | Err(Err::Error(ErrorStr("abcd".to_string()))) 59 | } 60 | 61 | fn work2(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { 62 | Ok((input, &b""[..])) 63 | } 64 | 65 | fn alt1(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { 66 | alt((dont_work, dont_work)).parse(i) 67 | } 68 | fn alt2(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { 69 | alt((dont_work, work)).parse(i) 70 | } 71 | fn alt3(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { 72 | alt((dont_work, dont_work, work2, dont_work)).parse(i) 73 | } 74 | //named!(alt1, alt!(dont_work | dont_work)); 75 | //named!(alt2, alt!(dont_work | work)); 76 | //named!(alt3, alt!(dont_work | dont_work | work2 | dont_work)); 77 | 78 | let a = &b"abcd"[..]; 79 | assert_eq!( 80 | alt1(a), 81 | Err(Err::Error(error_node_position!( 82 | a, 83 | ErrorKind::Alt, 84 | ErrorStr("abcd".to_string()) 85 | ))) 86 | ); 87 | assert_eq!(alt2(a), Ok((&b""[..], a))); 88 | assert_eq!(alt3(a), Ok((a, &b""[..]))); 89 | 90 | fn alt4(i: &[u8]) -> IResult<&[u8], &[u8]> { 91 | alt((tag("abcd"), tag("efgh"))).parse(i) 92 | } 93 | let b = &b"efgh"[..]; 94 | assert_eq!(alt4(a), Ok((&b""[..], a))); 95 | assert_eq!(alt4(b), Ok((&b""[..], b))); 96 | } 97 | 98 | #[test] 99 | fn alt_incomplete() { 100 | fn alt1(i: &[u8]) -> IResult<&[u8], &[u8]> { 101 | alt((tag("a"), tag("bc"), tag("def"))).parse(i) 102 | } 103 | 104 | let a = &b""[..]; 105 | assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1)))); 106 | let a = &b"b"[..]; 107 | assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1)))); 108 | let a = &b"bcd"[..]; 109 | assert_eq!(alt1(a), Ok((&b"d"[..], &b"bc"[..]))); 110 | let a = &b"cde"[..]; 111 | assert_eq!(alt1(a), Err(Err::Error(error_position!(a, ErrorKind::Tag)))); 112 | let a = &b"de"[..]; 113 | assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1)))); 114 | let a = &b"defg"[..]; 115 | assert_eq!(alt1(a), Ok((&b"g"[..], &b"def"[..]))); 116 | } 117 | 118 | #[test] 119 | fn alt_array() { 120 | fn alt1(i: &[u8]) -> IResult<&[u8], &[u8]> { 121 | alt([tag("a"), tag("bc"), tag("def")]).parse(i) 122 | } 123 | 124 | let a = &b"a"[..]; 125 | assert_eq!(alt1(a), Ok((&b""[..], (&b"a"[..])))); 126 | 127 | let bc = &b"bc"[..]; 128 | assert_eq!(alt1(bc), Ok((&b""[..], (&b"bc"[..])))); 129 | 130 | let defg = &b"defg"[..]; 131 | assert_eq!(alt1(defg), Ok((&b"g"[..], (&b"def"[..])))); 132 | } 133 | 134 | #[test] 135 | fn alt_dynamic_array() { 136 | fn alt1(i: &[u8]) -> IResult<&[u8], &[u8]> { 137 | alt(&mut [tag("a"), tag("bc"), tag("def")][..]).parse(i) 138 | } 139 | 140 | let a = &b"a"[..]; 141 | assert_eq!(alt1(a), Ok((&b""[..], (&b"a"[..])))); 142 | 143 | let bc = &b"bc"[..]; 144 | assert_eq!(alt1(bc), Ok((&b""[..], (&b"bc"[..])))); 145 | 146 | let defg = &b"defg"[..]; 147 | assert_eq!(alt1(defg), Ok((&b"g"[..], (&b"def"[..])))); 148 | } 149 | 150 | #[test] 151 | fn permutation_test() { 152 | #[allow(clippy::type_complexity)] 153 | fn perm(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8], &[u8])> { 154 | permutation((tag("abcd"), tag("efg"), tag("hi"))).parse(i) 155 | } 156 | 157 | let expected = (&b"abcd"[..], &b"efg"[..], &b"hi"[..]); 158 | 159 | let a = &b"abcdefghijk"[..]; 160 | assert_eq!(perm(a), Ok((&b"jk"[..], expected))); 161 | let b = &b"efgabcdhijk"[..]; 162 | assert_eq!(perm(b), Ok((&b"jk"[..], expected))); 163 | let c = &b"hiefgabcdjk"[..]; 164 | assert_eq!(perm(c), Ok((&b"jk"[..], expected))); 165 | 166 | let d = &b"efgxyzabcdefghi"[..]; 167 | assert_eq!( 168 | perm(d), 169 | Err(Err::Error(error_node_position!( 170 | &b"efgxyzabcdefghi"[..], 171 | ErrorKind::Permutation, 172 | error_position!(&b"xyzabcdefghi"[..], ErrorKind::Tag) 173 | ))) 174 | ); 175 | 176 | let e = &b"efgabc"[..]; 177 | assert_eq!(perm(e), Err(Err::Incomplete(Needed::new(1)))); 178 | } 179 | -------------------------------------------------------------------------------- /src/character/tests.rs: -------------------------------------------------------------------------------- 1 | use super::streaming::*; 2 | use crate::error::ErrorKind; 3 | use crate::internal::{Err, IResult}; 4 | 5 | #[test] 6 | fn one_of_test() { 7 | fn f(i: &[u8]) -> IResult<&[u8], char> { 8 | one_of("ab")(i) 9 | } 10 | 11 | let a = &b"abcd"[..]; 12 | assert_eq!(f(a), Ok((&b"bcd"[..], 'a'))); 13 | 14 | let b = &b"cde"[..]; 15 | assert_eq!(f(b), Err(Err::Error(error_position!(b, ErrorKind::OneOf)))); 16 | 17 | fn utf8(i: &str) -> IResult<&str, char> { 18 | one_of("+\u{FF0B}")(i) 19 | } 20 | 21 | assert!(utf8("+").is_ok()); 22 | assert!(utf8("\u{FF0B}").is_ok()); 23 | } 24 | 25 | #[test] 26 | fn none_of_test() { 27 | fn f(i: &[u8]) -> IResult<&[u8], char> { 28 | none_of("ab")(i) 29 | } 30 | 31 | let a = &b"abcd"[..]; 32 | assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::NoneOf)))); 33 | 34 | let b = &b"cde"[..]; 35 | assert_eq!(f(b), Ok((&b"de"[..], 'c'))); 36 | } 37 | 38 | #[test] 39 | fn char_byteslice() { 40 | fn f(i: &[u8]) -> IResult<&[u8], char> { 41 | char('c')(i) 42 | } 43 | 44 | let a = &b"abcd"[..]; 45 | assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char)))); 46 | 47 | let b = &b"cde"[..]; 48 | assert_eq!(f(b), Ok((&b"de"[..], 'c'))); 49 | } 50 | 51 | #[test] 52 | fn char_str() { 53 | fn f(i: &str) -> IResult<&str, char> { 54 | char('c')(i) 55 | } 56 | 57 | let a = "abcd"; 58 | assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char)))); 59 | 60 | let b = "cde"; 61 | assert_eq!(f(b), Ok(("de", 'c'))); 62 | } 63 | -------------------------------------------------------------------------------- /src/combinator/tests.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use crate::bytes::complete::take; 3 | use crate::bytes::streaming::tag; 4 | use crate::error::ErrorKind; 5 | use crate::error::ParseError; 6 | use crate::internal::{Err, IResult, Needed}; 7 | #[cfg(feature = "alloc")] 8 | use crate::lib::std::boxed::Box; 9 | use crate::number::complete::u8; 10 | 11 | macro_rules! assert_parse( 12 | ($left: expr, $right: expr) => { 13 | let res: $crate::IResult<_, _, (_, ErrorKind)> = $left; 14 | assert_eq!(res, $right); 15 | }; 16 | ); 17 | 18 | /*#[test] 19 | fn t1() { 20 | let v1:Vec = vec![1,2,3]; 21 | let v2:Vec = vec![4,5,6]; 22 | let d = Ok((&v1[..], &v2[..])); 23 | let res = d.flat_map(print); 24 | assert_eq!(res, Ok((&v2[..], ()))); 25 | }*/ 26 | 27 | #[test] 28 | fn eof_on_slices() { 29 | let not_over: &[u8] = &b"Hello, world!"[..]; 30 | let is_over: &[u8] = &b""[..]; 31 | 32 | let res_not_over = eof(not_over); 33 | assert_parse!( 34 | res_not_over, 35 | Err(Err::Error(error_position!(not_over, ErrorKind::Eof))) 36 | ); 37 | 38 | let res_over = eof(is_over); 39 | assert_parse!(res_over, Ok((is_over, is_over))); 40 | } 41 | 42 | #[test] 43 | fn eof_on_strs() { 44 | let not_over: &str = "Hello, world!"; 45 | let is_over: &str = ""; 46 | 47 | let res_not_over = eof(not_over); 48 | assert_parse!( 49 | res_not_over, 50 | Err(Err::Error(error_position!(not_over, ErrorKind::Eof))) 51 | ); 52 | 53 | let res_over = eof(is_over); 54 | assert_parse!(res_over, Ok((is_over, is_over))); 55 | } 56 | 57 | /* 58 | #[test] 59 | fn end_of_input() { 60 | let not_over = &b"Hello, world!"[..]; 61 | let is_over = &b""[..]; 62 | named!(eof_test, eof!()); 63 | 64 | let res_not_over = eof_test(not_over); 65 | assert_eq!(res_not_over, Err(Err::Error(error_position!(not_over, ErrorKind::Eof)))); 66 | 67 | let res_over = eof_test(is_over); 68 | assert_eq!(res_over, Ok((is_over, is_over))); 69 | } 70 | */ 71 | 72 | #[test] 73 | fn rest_on_slices() { 74 | let input: &[u8] = &b"Hello, world!"[..]; 75 | let empty: &[u8] = &b""[..]; 76 | assert_parse!(rest(input), Ok((empty, input))); 77 | } 78 | 79 | #[test] 80 | fn rest_on_strs() { 81 | let input: &str = "Hello, world!"; 82 | let empty: &str = ""; 83 | assert_parse!(rest(input), Ok((empty, input))); 84 | } 85 | 86 | #[test] 87 | fn rest_len_on_slices() { 88 | let input: &[u8] = &b"Hello, world!"[..]; 89 | assert_parse!(rest_len(input), Ok((input, input.len()))); 90 | } 91 | 92 | use crate::lib::std::convert::From; 93 | impl From for CustomError { 94 | fn from(_: u32) -> Self { 95 | CustomError 96 | } 97 | } 98 | 99 | impl ParseError for CustomError { 100 | fn from_error_kind(_: I, _: ErrorKind) -> Self { 101 | CustomError 102 | } 103 | 104 | fn append(_: I, _: ErrorKind, _: CustomError) -> Self { 105 | CustomError 106 | } 107 | } 108 | 109 | struct CustomError; 110 | #[allow(dead_code)] 111 | fn custom_error(input: &[u8]) -> IResult<&[u8], &[u8], CustomError> { 112 | //fix_error!(input, CustomError, alphanumeric) 113 | crate::character::streaming::alphanumeric1(input) 114 | } 115 | 116 | #[test] 117 | fn test_flat_map() { 118 | let input: &[u8] = &[3, 100, 101, 102, 103, 104][..]; 119 | assert_parse!( 120 | flat_map(u8, take).parse(input), 121 | Ok((&[103, 104][..], &[100, 101, 102][..])) 122 | ); 123 | } 124 | 125 | #[test] 126 | fn test_map_opt() { 127 | let input: &[u8] = &[50][..]; 128 | assert_parse!( 129 | map_opt(u8, |u| if u < 20 { Some(u) } else { None }).parse(input), 130 | Err(Err::Error((&[50][..], ErrorKind::MapOpt))) 131 | ); 132 | assert_parse!( 133 | map_opt(u8, |u| if u > 20 { Some(u) } else { None }).parse(input), 134 | Ok((&[][..], 50)) 135 | ); 136 | } 137 | 138 | #[test] 139 | fn test_map_parser() { 140 | let input: &[u8] = &[100, 101, 102, 103, 104][..]; 141 | assert_parse!( 142 | map_parser(take(4usize), take(2usize)).parse(input), 143 | Ok((&[104][..], &[100, 101][..])) 144 | ); 145 | } 146 | 147 | #[test] 148 | fn test_all_consuming() { 149 | let input: &[u8] = &[100, 101, 102][..]; 150 | assert_parse!( 151 | all_consuming(take(2usize)).parse(input), 152 | Err(Err::Error((&[102][..], ErrorKind::Eof))) 153 | ); 154 | assert_parse!( 155 | all_consuming(take(3usize)).parse(input), 156 | Ok((&[][..], &[100, 101, 102][..])) 157 | ); 158 | } 159 | 160 | #[test] 161 | #[allow(unused)] 162 | fn test_verify_ref() { 163 | use crate::bytes::complete::take; 164 | 165 | let mut parser1 = verify(take(3u8), |s: &[u8]| s == &b"abc"[..]); 166 | 167 | assert_eq!(parser1.parse(&b"abcd"[..]), Ok((&b"d"[..], &b"abc"[..]))); 168 | assert_eq!( 169 | parser1.parse(&b"defg"[..]), 170 | Err(Err::Error((&b"defg"[..], ErrorKind::Verify))) 171 | ); 172 | 173 | fn parser2(i: &[u8]) -> IResult<&[u8], u32> { 174 | verify(crate::number::streaming::be_u32, |val: &u32| *val < 3).parse(i) 175 | } 176 | } 177 | 178 | #[test] 179 | #[cfg(feature = "alloc")] 180 | fn test_verify_alloc() { 181 | use crate::bytes::complete::take; 182 | let mut parser1 = verify(map(take(3u8), |s: &[u8]| s.to_vec()), |s: &[u8]| { 183 | s == &b"abc"[..] 184 | }); 185 | 186 | assert_eq!( 187 | parser1.parse(&b"abcd"[..]), 188 | Ok((&b"d"[..], b"abc".to_vec())) 189 | ); 190 | assert_eq!( 191 | parser1.parse(&b"defg"[..]), 192 | Err(Err::Error((&b"defg"[..], ErrorKind::Verify))) 193 | ); 194 | } 195 | 196 | #[test] 197 | #[cfg(feature = "std")] 198 | fn test_into() { 199 | use crate::bytes::complete::take; 200 | use crate::{ 201 | error::{Error, ParseError}, 202 | Err, 203 | }; 204 | 205 | let mut parser = into(take::<_, _, Error<_>>(3u8)); 206 | let result: IResult<&[u8], Vec> = parser.parse(&b"abcdefg"[..]); 207 | 208 | assert_eq!(result, Ok((&b"defg"[..], vec![97, 98, 99]))); 209 | } 210 | 211 | #[test] 212 | fn opt_test() { 213 | fn opt_abcd(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> { 214 | opt(tag("abcd")).parse(i) 215 | } 216 | 217 | let a = &b"abcdef"[..]; 218 | let b = &b"bcdefg"[..]; 219 | let c = &b"ab"[..]; 220 | assert_eq!(opt_abcd(a), Ok((&b"ef"[..], Some(&b"abcd"[..])))); 221 | assert_eq!(opt_abcd(b), Ok((&b"bcdefg"[..], None))); 222 | assert_eq!(opt_abcd(c), Err(Err::Incomplete(Needed::new(2)))); 223 | } 224 | 225 | #[test] 226 | fn peek_test() { 227 | fn peek_tag(i: &[u8]) -> IResult<&[u8], &[u8]> { 228 | peek(tag("abcd")).parse(i) 229 | } 230 | 231 | assert_eq!(peek_tag(&b"abcdef"[..]), Ok((&b"abcdef"[..], &b"abcd"[..]))); 232 | assert_eq!(peek_tag(&b"ab"[..]), Err(Err::Incomplete(Needed::new(2)))); 233 | assert_eq!( 234 | peek_tag(&b"xxx"[..]), 235 | Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) 236 | ); 237 | } 238 | 239 | #[test] 240 | fn not_test() { 241 | fn not_aaa(i: &[u8]) -> IResult<&[u8], ()> { 242 | not(tag("aaa")).parse(i) 243 | } 244 | 245 | assert_eq!( 246 | not_aaa(&b"aaa"[..]), 247 | Err(Err::Error(error_position!(&b"aaa"[..], ErrorKind::Not))) 248 | ); 249 | assert_eq!(not_aaa(&b"aa"[..]), Err(Err::Incomplete(Needed::new(1)))); 250 | assert_eq!(not_aaa(&b"abcd"[..]), Ok((&b"abcd"[..], ()))); 251 | } 252 | 253 | #[test] 254 | fn verify_test() { 255 | use crate::bytes::streaming::take; 256 | 257 | fn test(i: &[u8]) -> IResult<&[u8], &[u8]> { 258 | verify(take(5u8), |slice: &[u8]| slice[0] == b'a').parse(i) 259 | } 260 | assert_eq!(test(&b"bcd"[..]), Err(Err::Incomplete(Needed::new(2)))); 261 | assert_eq!( 262 | test(&b"bcdefg"[..]), 263 | Err(Err::Error(error_position!( 264 | &b"bcdefg"[..], 265 | ErrorKind::Verify 266 | ))) 267 | ); 268 | assert_eq!(test(&b"abcdefg"[..]), Ok((&b"fg"[..], &b"abcde"[..]))); 269 | } 270 | 271 | #[test] 272 | fn fail_test() { 273 | let a = "string"; 274 | let b = "another string"; 275 | 276 | assert_eq!( 277 | fail::<_, &str, _>().parse(a), 278 | Err(Err::Error((a, ErrorKind::Fail))) 279 | ); 280 | assert_eq!( 281 | fail::<_, &str, _>().parse(b), 282 | Err(Err::Error((b, ErrorKind::Fail))) 283 | ); 284 | } 285 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | macro_rules! succ ( 2 | (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); 3 | (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); 4 | (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); 5 | (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); 6 | (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); 7 | (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); 8 | (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); 9 | (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); 10 | (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); 11 | (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); 12 | (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); 13 | (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); 14 | (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); 15 | (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); 16 | (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); 17 | (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); 18 | (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); 19 | (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); 20 | (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); 21 | (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); 22 | (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); 23 | ); 24 | -------------------------------------------------------------------------------- /tests/arithmetic.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, 3 | bytes::complete::tag, 4 | character::complete::char, 5 | character::complete::{digit1 as digit, space0 as space}, 6 | combinator::map_res, 7 | multi::fold, 8 | sequence::{delimited, pair}, 9 | IResult, Parser, 10 | }; 11 | 12 | // Parser definition 13 | 14 | use std::str::FromStr; 15 | 16 | // We parse any expr surrounded by parens, ignoring all whitespaces around those 17 | fn parens(i: &str) -> IResult<&str, i64> { 18 | delimited(space, delimited(tag("("), expr, tag(")")), space).parse(i) 19 | } 20 | 21 | // We transform an integer string into a i64, ignoring surrounding whitespaces 22 | // We look for a digit suite, and try to convert it. 23 | // If either str::from_utf8 or FromStr::from_str fail, 24 | // we fallback to the parens parser defined above 25 | fn factor(i: &str) -> IResult<&str, i64> { 26 | alt(( 27 | map_res(delimited(space, digit, space), FromStr::from_str), 28 | parens, 29 | )) 30 | .parse(i) 31 | } 32 | 33 | // We read an initial factor and for each time we find 34 | // a * or / operator followed by another factor, we do 35 | // the math by folding everything 36 | fn term(i: &str) -> IResult<&str, i64> { 37 | let (i, init) = factor(i)?; 38 | 39 | fold( 40 | 0.., 41 | pair(alt((char('*'), char('/'))), factor), 42 | move || init, 43 | |acc, (op, val): (char, i64)| { 44 | if op == '*' { 45 | acc * val 46 | } else { 47 | acc / val 48 | } 49 | }, 50 | ) 51 | .parse(i) 52 | } 53 | 54 | fn expr(i: &str) -> IResult<&str, i64> { 55 | let (i, init) = term(i)?; 56 | 57 | fold( 58 | 0.., 59 | pair(alt((char('+'), char('-'))), term), 60 | move || init, 61 | |acc, (op, val): (char, i64)| { 62 | if op == '+' { 63 | acc + val 64 | } else { 65 | acc - val 66 | } 67 | }, 68 | ) 69 | .parse(i) 70 | } 71 | 72 | #[test] 73 | fn factor_test() { 74 | assert_eq!(factor("3"), Ok(("", 3))); 75 | assert_eq!(factor(" 12"), Ok(("", 12))); 76 | assert_eq!(factor("537 "), Ok(("", 537))); 77 | assert_eq!(factor(" 24 "), Ok(("", 24))); 78 | } 79 | 80 | #[test] 81 | fn term_test() { 82 | assert_eq!(term(" 12 *2 / 3"), Ok(("", 8))); 83 | assert_eq!(term(" 2* 3 *2 *2 / 3"), Ok(("", 8))); 84 | assert_eq!(term(" 48 / 3/2"), Ok(("", 8))); 85 | } 86 | 87 | #[test] 88 | fn expr_test() { 89 | assert_eq!(expr(" 1 + 2 "), Ok(("", 3))); 90 | assert_eq!(expr(" 12 + 6 - 4+ 3"), Ok(("", 17))); 91 | assert_eq!(expr(" 1 + 2*3 + 4"), Ok(("", 11))); 92 | } 93 | 94 | #[test] 95 | fn parens_test() { 96 | assert_eq!(expr(" ( 2 )"), Ok(("", 2))); 97 | assert_eq!(expr(" 2* ( 3 + 4 ) "), Ok(("", 14))); 98 | assert_eq!(expr(" 2*2 / ( 5 - 1) + 3"), Ok(("", 4))); 99 | } 100 | -------------------------------------------------------------------------------- /tests/arithmetic_ast.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::fmt::{Debug, Display, Formatter}; 3 | 4 | use std::str::FromStr; 5 | 6 | use nom::Parser; 7 | use nom::{ 8 | branch::alt, 9 | bytes::complete::tag, 10 | character::complete::{digit1 as digit, multispace0 as multispace}, 11 | combinator::{map, map_res}, 12 | multi::many, 13 | sequence::{delimited, preceded}, 14 | IResult, 15 | }; 16 | 17 | pub enum Expr { 18 | Value(i64), 19 | Add(Box, Box), 20 | Sub(Box, Box), 21 | Mul(Box, Box), 22 | Div(Box, Box), 23 | Paren(Box), 24 | } 25 | 26 | #[derive(Debug)] 27 | pub enum Oper { 28 | Add, 29 | Sub, 30 | Mul, 31 | Div, 32 | } 33 | 34 | impl Display for Expr { 35 | fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result { 36 | use self::Expr::*; 37 | match *self { 38 | Value(val) => write!(format, "{}", val), 39 | Add(ref left, ref right) => write!(format, "{} + {}", left, right), 40 | Sub(ref left, ref right) => write!(format, "{} - {}", left, right), 41 | Mul(ref left, ref right) => write!(format, "{} * {}", left, right), 42 | Div(ref left, ref right) => write!(format, "{} / {}", left, right), 43 | Paren(ref expr) => write!(format, "({})", expr), 44 | } 45 | } 46 | } 47 | 48 | impl Debug for Expr { 49 | fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result { 50 | use self::Expr::*; 51 | match *self { 52 | Value(val) => write!(format, "{}", val), 53 | Add(ref left, ref right) => write!(format, "({:?} + {:?})", left, right), 54 | Sub(ref left, ref right) => write!(format, "({:?} - {:?})", left, right), 55 | Mul(ref left, ref right) => write!(format, "({:?} * {:?})", left, right), 56 | Div(ref left, ref right) => write!(format, "({:?} / {:?})", left, right), 57 | Paren(ref expr) => write!(format, "[{:?}]", expr), 58 | } 59 | } 60 | } 61 | 62 | fn parens(i: &str) -> IResult<&str, Expr> { 63 | delimited( 64 | multispace, 65 | delimited(tag("("), map(expr, |e| Expr::Paren(Box::new(e))), tag(")")), 66 | multispace, 67 | ) 68 | .parse(i) 69 | } 70 | 71 | fn factor(i: &str) -> IResult<&str, Expr> { 72 | alt(( 73 | map( 74 | map_res(delimited(multispace, digit, multispace), FromStr::from_str), 75 | Expr::Value, 76 | ), 77 | parens, 78 | )) 79 | .parse(i) 80 | } 81 | 82 | fn fold_exprs(initial: Expr, remainder: Vec<(Oper, Expr)>) -> Expr { 83 | remainder.into_iter().fold(initial, |acc, pair| { 84 | let (oper, expr) = pair; 85 | match oper { 86 | Oper::Add => Expr::Add(Box::new(acc), Box::new(expr)), 87 | Oper::Sub => Expr::Sub(Box::new(acc), Box::new(expr)), 88 | Oper::Mul => Expr::Mul(Box::new(acc), Box::new(expr)), 89 | Oper::Div => Expr::Div(Box::new(acc), Box::new(expr)), 90 | } 91 | }) 92 | } 93 | 94 | fn term(i: &str) -> IResult<&str, Expr> { 95 | let (i, initial) = factor(i)?; 96 | let (i, remainder) = many( 97 | 0.., 98 | alt(( 99 | |i| { 100 | let (i, mul) = preceded(tag("*"), factor).parse(i)?; 101 | Ok((i, (Oper::Mul, mul))) 102 | }, 103 | |i| { 104 | let (i, div) = preceded(tag("/"), factor).parse(i)?; 105 | Ok((i, (Oper::Div, div))) 106 | }, 107 | )), 108 | ) 109 | .parse(i)?; 110 | 111 | Ok((i, fold_exprs(initial, remainder))) 112 | } 113 | 114 | fn expr(i: &str) -> IResult<&str, Expr> { 115 | let (i, initial) = term(i)?; 116 | let (i, remainder) = many( 117 | 0.., 118 | alt(( 119 | |i| { 120 | let (i, add) = preceded(tag("+"), term).parse(i)?; 121 | Ok((i, (Oper::Add, add))) 122 | }, 123 | |i| { 124 | let (i, sub) = preceded(tag("-"), term).parse(i)?; 125 | Ok((i, (Oper::Sub, sub))) 126 | }, 127 | )), 128 | ) 129 | .parse(i)?; 130 | 131 | Ok((i, fold_exprs(initial, remainder))) 132 | } 133 | 134 | #[test] 135 | fn factor_test() { 136 | assert_eq!( 137 | factor(" 3 ").map(|(i, x)| (i, format!("{:?}", x))), 138 | Ok(("", String::from("3"))) 139 | ); 140 | } 141 | 142 | #[test] 143 | fn term_test() { 144 | assert_eq!( 145 | term(" 3 * 5 ").map(|(i, x)| (i, format!("{:?}", x))), 146 | Ok(("", String::from("(3 * 5)"))) 147 | ); 148 | } 149 | 150 | #[test] 151 | fn expr_test() { 152 | assert_eq!( 153 | expr(" 1 + 2 * 3 ").map(|(i, x)| (i, format!("{:?}", x))), 154 | Ok(("", String::from("(1 + (2 * 3))"))) 155 | ); 156 | assert_eq!( 157 | expr(" 1 + 2 * 3 / 4 - 5 ").map(|(i, x)| (i, format!("{:?}", x))), 158 | Ok(("", String::from("((1 + ((2 * 3) / 4)) - 5)"))) 159 | ); 160 | assert_eq!( 161 | expr(" 72 / 2 / 3 ").map(|(i, x)| (i, format!("{:?}", x))), 162 | Ok(("", String::from("((72 / 2) / 3)"))) 163 | ); 164 | } 165 | 166 | #[test] 167 | fn parens_test() { 168 | assert_eq!( 169 | expr(" ( 1 + 2 ) * 3 ").map(|(i, x)| (i, format!("{:?}", x))), 170 | Ok(("", String::from("([(1 + 2)] * 3)"))) 171 | ); 172 | } 173 | -------------------------------------------------------------------------------- /tests/css.rs: -------------------------------------------------------------------------------- 1 | use nom::bytes::complete::{tag, take_while_m_n}; 2 | use nom::combinator::map_res; 3 | use nom::{IResult, Parser}; 4 | 5 | #[derive(Debug, Eq, PartialEq)] 6 | pub struct Color { 7 | pub red: u8, 8 | pub green: u8, 9 | pub blue: u8, 10 | } 11 | 12 | fn from_hex(input: &str) -> Result { 13 | u8::from_str_radix(input, 16) 14 | } 15 | 16 | fn is_hex_digit(c: char) -> bool { 17 | c.is_ascii_hexdigit() 18 | } 19 | 20 | fn hex_primary(input: &str) -> IResult<&str, u8> { 21 | map_res(take_while_m_n(2, 2, is_hex_digit), from_hex).parse(input) 22 | } 23 | 24 | fn hex_color(input: &str) -> IResult<&str, Color> { 25 | let (input, _) = tag("#")(input)?; 26 | let (input, (red, green, blue)) = (hex_primary, hex_primary, hex_primary).parse(input)?; 27 | 28 | Ok((input, Color { red, green, blue })) 29 | } 30 | 31 | #[test] 32 | fn parse_color() { 33 | assert_eq!( 34 | hex_color("#2F14DF"), 35 | Ok(( 36 | "", 37 | Color { 38 | red: 47, 39 | green: 20, 40 | blue: 223, 41 | } 42 | )) 43 | ); 44 | } 45 | -------------------------------------------------------------------------------- /tests/custom_errors.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use nom::bytes::streaming::tag; 4 | use nom::character::streaming::digit1 as digit; 5 | use nom::combinator::verify; 6 | use nom::error::{ErrorKind, ParseError}; 7 | #[cfg(feature = "alloc")] 8 | use nom::multi::count; 9 | use nom::sequence::terminated; 10 | use nom::{IResult, Parser}; 11 | 12 | #[derive(Debug)] 13 | pub struct CustomError(String); 14 | 15 | impl<'a> From<(&'a str, ErrorKind)> for CustomError { 16 | fn from(error: (&'a str, ErrorKind)) -> Self { 17 | CustomError(format!("error code was: {:?}", error)) 18 | } 19 | } 20 | 21 | impl<'a> ParseError<&'a str> for CustomError { 22 | fn from_error_kind(_: &'a str, kind: ErrorKind) -> Self { 23 | CustomError(format!("error code was: {:?}", kind)) 24 | } 25 | 26 | fn append(_: &'a str, kind: ErrorKind, other: CustomError) -> Self { 27 | CustomError(format!("{:?}\nerror code was: {:?}", other, kind)) 28 | } 29 | } 30 | 31 | fn test1(input: &str) -> IResult<&str, &str, CustomError> { 32 | //fix_error!(input, CustomError, tag!("abcd")) 33 | tag("abcd")(input) 34 | } 35 | 36 | fn test2(input: &str) -> IResult<&str, &str, CustomError> { 37 | //terminated!(input, test1, fix_error!(CustomError, digit)) 38 | terminated(test1, digit).parse(input) 39 | } 40 | 41 | fn test3(input: &str) -> IResult<&str, &str, CustomError> { 42 | verify(test1, |s: &str| s.starts_with("abcd")).parse(input) 43 | } 44 | 45 | #[cfg(feature = "alloc")] 46 | fn test4(input: &str) -> IResult<&str, Vec<&str>, CustomError> { 47 | count(test1, 4).parse(input) 48 | } 49 | -------------------------------------------------------------------------------- /tests/escaped.rs: -------------------------------------------------------------------------------- 1 | use nom::bytes::complete::escaped; 2 | use nom::character::complete::digit1; 3 | use nom::character::complete::one_of; 4 | use nom::{error::ErrorKind, Err, IResult}; 5 | 6 | fn esc(s: &str) -> IResult<&str, &str, (&str, ErrorKind)> { 7 | escaped(digit1, '\\', one_of("\"n\\"))(s) 8 | } 9 | 10 | #[cfg(feature = "alloc")] 11 | fn esc_trans(s: &str) -> IResult<&str, String, (&str, ErrorKind)> { 12 | use nom::bytes::complete::{escaped_transform, tag}; 13 | escaped_transform(digit1, '\\', tag("n"))(s) 14 | } 15 | 16 | #[test] 17 | fn test_escaped() { 18 | assert_eq!(esc("abcd"), Err(Err::Error(("abcd", ErrorKind::Escaped)))); 19 | } 20 | 21 | #[test] 22 | #[cfg(feature = "alloc")] 23 | fn test_escaped_transform() { 24 | assert_eq!( 25 | esc_trans("abcd"), 26 | Err(Err::Error(("abcd", ErrorKind::EscapedTransform))) 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /tests/expression_ast.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, 3 | bytes::complete::tag, 4 | character::complete::{alphanumeric1 as alphanumeric, digit1 as digit}, 5 | combinator::{map, map_res}, 6 | multi::separated_list0, 7 | sequence::delimited, 8 | IResult, Parser, 9 | }; 10 | use nom_language::precedence::{binary_op, precedence, unary_op, Assoc, Operation}; 11 | 12 | // Elements of the abstract syntax tree (ast) that represents an expression. 13 | #[derive(Debug)] 14 | pub enum Expr { 15 | // A number literal. 16 | Num(i64), 17 | // An identifier. 18 | Iden(String), 19 | // Arithmetic operations. Each have a left hand side (lhs) and a right hand side (rhs). 20 | Add(Box, Box), 21 | Sub(Box, Box), 22 | Mul(Box, Box), 23 | Div(Box, Box), 24 | // The function call operation. Left is the expression the function is called on, right is the list of parameters. 25 | Call(Box, Vec), 26 | // The ternary operator, the expressions from left to right are: The condition, the true case, the false case. 27 | Tern(Box, Box, Box), 28 | } 29 | 30 | // Prefix operators. 31 | enum PrefixOp { 32 | Identity, // + 33 | Negate, // - 34 | } 35 | 36 | // Postfix operators. 37 | enum PostfixOp { 38 | // The function call operator. In addition to its own representation "()" it carries additional information that we need to keep here. 39 | // Specifically the vector of expressions that make up the parameters. 40 | Call(Vec), // () 41 | } 42 | 43 | // Binary operators. 44 | enum BinaryOp { 45 | Addition, // + 46 | Subtraction, // - 47 | Multiplication, // * 48 | Division, // / 49 | // The ternary operator can contain a single expression. 50 | Ternary(Expr), // ?: 51 | } 52 | 53 | // Parser for function calls. 54 | fn function_call(i: &str) -> IResult<&str, PostfixOp> { 55 | map( 56 | delimited( 57 | tag("("), 58 | // Subexpressions are evaluated by recursing back into the expression parser. 59 | separated_list0(tag(","), expression), 60 | tag(")"), 61 | ), 62 | |v: Vec| PostfixOp::Call(v), 63 | ) 64 | .parse(i) 65 | } 66 | 67 | // The ternary operator is actually just a binary operator that contains another expression. So it can be 68 | // handled similarly to the function call operator except its in a binary position and can only contain 69 | // a single expression. 70 | // 71 | // For example the expression "a IResult<&str, BinaryOp> { 78 | map(delimited(tag("?"), expression, tag(":")), |e: Expr| { 79 | BinaryOp::Ternary(e) 80 | }) 81 | .parse(i) 82 | } 83 | 84 | // The actual expression parser . 85 | fn expression(i: &str) -> IResult<&str, Expr> { 86 | precedence( 87 | alt(( 88 | unary_op(2, map(tag("+"), |_| PrefixOp::Identity)), 89 | unary_op(2, map(tag("-"), |_| PrefixOp::Negate)), 90 | )), 91 | // Function calls are implemented as postfix unary operators. 92 | unary_op(1, function_call), 93 | alt(( 94 | binary_op( 95 | 3, 96 | Assoc::Left, 97 | alt(( 98 | map(tag("*"), |_| BinaryOp::Multiplication), 99 | map(tag("/"), |_| BinaryOp::Division), 100 | )), 101 | ), 102 | binary_op( 103 | 4, 104 | Assoc::Left, 105 | alt(( 106 | map(tag("+"), |_| BinaryOp::Addition), 107 | map(tag("-"), |_| BinaryOp::Subtraction), 108 | )), 109 | ), 110 | // Ternary operators are just binary operators with a subexpression. 111 | binary_op(5, Assoc::Right, ternary_operator), 112 | )), 113 | alt(( 114 | map_res(digit, |s: &str| match s.parse::() { 115 | Ok(s) => Ok(Expr::Num(s)), 116 | Err(e) => Err(e), 117 | }), 118 | map(alphanumeric, |s: &str| Expr::Iden(s.to_string())), 119 | delimited(tag("("), expression, tag(")")), 120 | )), 121 | |op: Operation| -> Result { 122 | use nom_language::precedence::Operation::*; 123 | use BinaryOp::*; 124 | use PostfixOp::*; 125 | use PrefixOp::*; 126 | match op { 127 | // The identity operator (prefix +) is ignored. 128 | Prefix(Identity, e) => Ok(e), 129 | 130 | // Unary minus gets evaluated to the same representation as a multiplication with -1. 131 | Prefix(Negate, e) => Ok(Expr::Mul(Expr::Num(-1).into(), e.into())), 132 | 133 | // The list of parameters are taken from the operator and placed into the ast. 134 | Postfix(e, Call(p)) => Ok(Expr::Call(e.into(), p)), 135 | 136 | // Meaning is assigned to the expressions of the ternary operator during evaluation. 137 | // The lhs becomes the condition, the contained expression is the true case, rhs the false case. 138 | Binary(lhs, Ternary(e), rhs) => Ok(Expr::Tern(lhs.into(), e.into(), rhs.into())), 139 | 140 | // Raw operators get turned into their respective ast nodes. 141 | Binary(lhs, Multiplication, rhs) => Ok(Expr::Mul(lhs.into(), rhs.into())), 142 | Binary(lhs, Division, rhs) => Ok(Expr::Div(lhs.into(), rhs.into())), 143 | Binary(lhs, Addition, rhs) => Ok(Expr::Add(lhs.into(), rhs.into())), 144 | Binary(lhs, Subtraction, rhs) => Ok(Expr::Sub(lhs.into(), rhs.into())), 145 | } 146 | }, 147 | )(i) 148 | } 149 | 150 | #[test] 151 | fn expression_test() { 152 | assert_eq!( 153 | expression("-2*max(2,3)-2").map(|(i, x)| (i, format!("{:?}", x))), 154 | Ok(( 155 | "", 156 | String::from("Sub(Mul(Mul(Num(-1), Num(2)), Call(Iden(\"max\"), [Num(2), Num(3)])), Num(2))") 157 | )) 158 | ); 159 | 160 | assert_eq!( 161 | expression("a?2+c:-2*2").map(|(i, x)| (i, format!("{:?}", x))), 162 | Ok(( 163 | "", 164 | String::from( 165 | "Tern(Iden(\"a\"), Add(Num(2), Iden(\"c\")), Mul(Mul(Num(-1), Num(2)), Num(2)))" 166 | ) 167 | )) 168 | ); 169 | } 170 | -------------------------------------------------------------------------------- /tests/float.rs: -------------------------------------------------------------------------------- 1 | use nom::branch::alt; 2 | use nom::bytes::complete::tag; 3 | use nom::character::streaming::digit1 as digit; 4 | use nom::combinator::{map, map_res, opt, recognize}; 5 | use nom::error::ErrorKind; 6 | use nom::number::complete::f32; 7 | use nom::number::complete::f64; 8 | use nom::number::Endianness; 9 | use nom::sequence::{delimited, pair}; 10 | use nom::Err; 11 | use nom::{IResult, Parser}; 12 | use std::str; 13 | use std::str::FromStr; 14 | 15 | fn unsigned_float(i: &[u8]) -> IResult<&[u8], f32> { 16 | let float_bytes = recognize(alt(( 17 | delimited(digit, tag("."), opt(digit)), 18 | delimited(opt(digit), tag("."), digit), 19 | ))); 20 | let float_str = map_res(float_bytes, str::from_utf8); 21 | map_res(float_str, FromStr::from_str).parse(i) 22 | } 23 | 24 | fn float(i: &[u8]) -> IResult<&[u8], f32> { 25 | map( 26 | pair(opt(alt((tag("+"), tag("-")))), unsigned_float), 27 | |(sign, value)| { 28 | sign 29 | .and_then(|s| if s[0] == b'-' { Some(-1f32) } else { None }) 30 | .unwrap_or(1f32) 31 | * value 32 | }, 33 | ) 34 | .parse(i) 35 | } 36 | 37 | #[test] 38 | fn unsigned_float_test() { 39 | assert_eq!(unsigned_float(&b"123.456;"[..]), Ok((&b";"[..], 123.456))); 40 | assert_eq!(unsigned_float(&b"0.123;"[..]), Ok((&b";"[..], 0.123))); 41 | assert_eq!(unsigned_float(&b"123.0;"[..]), Ok((&b";"[..], 123.0))); 42 | assert_eq!(unsigned_float(&b"123.;"[..]), Ok((&b";"[..], 123.0))); 43 | assert_eq!(unsigned_float(&b".123;"[..]), Ok((&b";"[..], 0.123))); 44 | } 45 | 46 | #[test] 47 | fn float_test() { 48 | assert_eq!(float(&b"123.456;"[..]), Ok((&b";"[..], 123.456))); 49 | assert_eq!(float(&b"+123.456;"[..]), Ok((&b";"[..], 123.456))); 50 | assert_eq!(float(&b"-123.456;"[..]), Ok((&b";"[..], -123.456))); 51 | } 52 | 53 | #[test] 54 | fn test_f32_big_endian() { 55 | let be_f32 = |s| f32::<_, (_, ErrorKind)>(Endianness::Big)(s); 56 | 57 | assert_eq!( 58 | be_f32(&[0x41, 0x48, 0x00, 0x00][..]), 59 | Ok((&[] as &[u8], 12.5)) 60 | ); 61 | } 62 | 63 | #[test] 64 | fn test_f32_little_endian() { 65 | let le_f32 = |s| f32::<_, (_, ErrorKind)>(Endianness::Little)(s); 66 | 67 | assert_eq!( 68 | le_f32(&[0x00, 0x00, 0x48, 0x41][..]), 69 | Ok((&[] as &[u8], 12.5)) 70 | ); 71 | } 72 | 73 | #[test] 74 | fn test_f64_big_endian() { 75 | let be_f64 = |s| f64::<&[u8], (&[u8], ErrorKind)>(Endianness::Big)(s); 76 | 77 | let input = &[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]; 78 | let expected = 12.5f64; 79 | match be_f64(input) { 80 | Ok((rest, value)) => { 81 | assert!(rest.is_empty()); 82 | assert_eq!(value, expected); 83 | } 84 | Err(_) => assert!(false, "Failed to parse big-endian f64"), 85 | } 86 | 87 | let incomplete_input = &b"abc"[..]; 88 | assert!(matches!(be_f64(incomplete_input), Err(Err::Error(_)))); 89 | } 90 | 91 | #[test] 92 | fn test_f64_little_endian() { 93 | let le_f64 = |s| f64::<&[u8], (&[u8], ErrorKind)>(Endianness::Little)(s); 94 | 95 | let input = &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..]; 96 | let expected = 12.5f64; 97 | match le_f64(input) { 98 | Ok((rest, value)) => { 99 | assert!(rest.is_empty()); 100 | assert_eq!(value, expected); 101 | } 102 | Err(_) => assert!(false, "Failed to parse little-endian f64"), 103 | } 104 | 105 | let incomplete_input = &b"abc"[..]; 106 | assert!(matches!(le_f64(incomplete_input), Err(Err::Error(_)))); 107 | } 108 | -------------------------------------------------------------------------------- /tests/fnmut.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::tag, 3 | multi::{many, many0_count}, 4 | Parser, 5 | }; 6 | 7 | #[test] 8 | fn parse() { 9 | let mut counter = 0; 10 | 11 | let res = { 12 | let mut parser = many::<_, (), Vec<&str>, _, _>(0.., |i| { 13 | counter += 1; 14 | tag("abc")(i) 15 | }); 16 | 17 | parser.parse("abcabcabcabc").unwrap() 18 | }; 19 | 20 | println!("res: {:?}", res); 21 | assert_eq!(counter, 5); 22 | } 23 | 24 | #[test] 25 | fn accumulate() { 26 | let mut v = Vec::new(); 27 | 28 | let (_, count) = { 29 | let mut parser = many0_count::<_, (), _>(|i| { 30 | let (i, o) = tag("abc")(i)?; 31 | v.push(o); 32 | Ok((i, ())) 33 | }); 34 | parser.parse("abcabcabcabc").unwrap() 35 | }; 36 | 37 | println!("v: {:?}", v); 38 | assert_eq!(count, 4); 39 | assert_eq!(v.len(), 4); 40 | } 41 | -------------------------------------------------------------------------------- /tests/ini.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::take_while, 3 | character::complete::{ 4 | alphanumeric1 as alphanumeric, char, multispace0 as multispace, space0 as space, 5 | }, 6 | combinator::{map, map_res, opt}, 7 | multi::many, 8 | sequence::{delimited, pair, separated_pair, terminated}, 9 | IResult, Parser, 10 | }; 11 | 12 | use std::collections::HashMap; 13 | use std::str; 14 | 15 | fn category(i: &[u8]) -> IResult<&[u8], &str> { 16 | map_res( 17 | delimited(char('['), take_while(|c| c != b']'), char(']')), 18 | str::from_utf8, 19 | ) 20 | .parse(i) 21 | } 22 | 23 | fn key_value(i: &[u8]) -> IResult<&[u8], (&str, &str)> { 24 | let (i, key) = map_res(alphanumeric, str::from_utf8).parse(i)?; 25 | let (i, _) = (opt(space), char('='), opt(space)).parse(i)?; 26 | let (i, val) = map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8).parse(i)?; 27 | let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n'))).parse(i)?; 28 | Ok((i, (key, val))) 29 | } 30 | 31 | fn keys_and_values(i: &[u8]) -> IResult<&[u8], HashMap<&str, &str>> { 32 | many(0.., terminated(key_value, opt(multispace))).parse(i) 33 | } 34 | 35 | fn category_and_keys(i: &[u8]) -> IResult<&[u8], (&str, HashMap<&str, &str>)> { 36 | let (i, category) = terminated(category, opt(multispace)).parse(i)?; 37 | let (i, keys) = keys_and_values(i)?; 38 | Ok((i, (category, keys))) 39 | } 40 | 41 | fn categories(i: &[u8]) -> IResult<&[u8], HashMap<&str, HashMap<&str, &str>>> { 42 | map( 43 | many( 44 | 0.., 45 | separated_pair( 46 | category, 47 | opt(multispace), 48 | map( 49 | many(0.., terminated(key_value, opt(multispace))), 50 | |vec: Vec<_>| vec.into_iter().collect(), 51 | ), 52 | ), 53 | ), 54 | |vec: Vec<_>| vec.into_iter().collect(), 55 | ) 56 | .parse(i) 57 | } 58 | 59 | #[test] 60 | fn parse_category_test() { 61 | let ini_file = &b"[category] 62 | 63 | parameter=value 64 | key = value2"[..]; 65 | 66 | let ini_without_category = &b"\n\nparameter=value 67 | key = value2"[..]; 68 | 69 | let res = category(ini_file); 70 | println!("{:?}", res); 71 | match res { 72 | Ok((i, o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), 73 | _ => println!("error"), 74 | } 75 | 76 | assert_eq!(res, Ok((ini_without_category, "category"))); 77 | } 78 | 79 | #[test] 80 | fn parse_key_value_test() { 81 | let ini_file = &b"parameter=value 82 | key = value2"[..]; 83 | 84 | let ini_without_key_value = &b"\nkey = value2"[..]; 85 | 86 | let res = key_value(ini_file); 87 | println!("{:?}", res); 88 | match res { 89 | Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), 90 | _ => println!("error"), 91 | } 92 | 93 | assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); 94 | } 95 | 96 | #[test] 97 | fn parse_key_value_with_space_test() { 98 | let ini_file = &b"parameter = value 99 | key = value2"[..]; 100 | 101 | let ini_without_key_value = &b"\nkey = value2"[..]; 102 | 103 | let res = key_value(ini_file); 104 | println!("{:?}", res); 105 | match res { 106 | Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), 107 | _ => println!("error"), 108 | } 109 | 110 | assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); 111 | } 112 | 113 | #[test] 114 | fn parse_key_value_with_comment_test() { 115 | let ini_file = &b"parameter=value;abc 116 | key = value2"[..]; 117 | 118 | let ini_without_key_value = &b"\nkey = value2"[..]; 119 | 120 | let res = key_value(ini_file); 121 | println!("{:?}", res); 122 | match res { 123 | Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), 124 | _ => println!("error"), 125 | } 126 | 127 | assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); 128 | } 129 | 130 | #[test] 131 | fn parse_multiple_keys_and_values_test() { 132 | let ini_file = &b"parameter=value;abc 133 | 134 | key = value2 135 | 136 | [category]"[..]; 137 | 138 | let ini_without_key_value = &b"[category]"[..]; 139 | 140 | let res = keys_and_values(ini_file); 141 | println!("{:?}", res); 142 | match res { 143 | Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), 144 | _ => println!("error"), 145 | } 146 | 147 | let mut expected: HashMap<&str, &str> = HashMap::new(); 148 | expected.insert("parameter", "value"); 149 | expected.insert("key", "value2"); 150 | assert_eq!(res, Ok((ini_without_key_value, expected))); 151 | } 152 | 153 | #[test] 154 | fn parse_category_then_multiple_keys_and_values_test() { 155 | //FIXME: there can be an empty line or a comment line after a category 156 | let ini_file = &b"[abcd] 157 | parameter=value;abc 158 | 159 | key = value2 160 | 161 | [category]"[..]; 162 | 163 | let ini_after_parser = &b"[category]"[..]; 164 | 165 | let res = category_and_keys(ini_file); 166 | println!("{:?}", res); 167 | match res { 168 | Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), 169 | _ => println!("error"), 170 | } 171 | 172 | let mut expected_h: HashMap<&str, &str> = HashMap::new(); 173 | expected_h.insert("parameter", "value"); 174 | expected_h.insert("key", "value2"); 175 | assert_eq!(res, Ok((ini_after_parser, ("abcd", expected_h)))); 176 | } 177 | 178 | #[test] 179 | fn parse_multiple_categories_test() { 180 | let ini_file = &b"[abcd] 181 | 182 | parameter=value;abc 183 | 184 | key = value2 185 | 186 | [category] 187 | parameter3=value3 188 | key4 = value4 189 | "[..]; 190 | 191 | let ini_after_parser = &b""[..]; 192 | 193 | let res = categories(ini_file); 194 | //println!("{:?}", res); 195 | match res { 196 | Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), 197 | _ => println!("error"), 198 | } 199 | 200 | let mut expected_1: HashMap<&str, &str> = HashMap::new(); 201 | expected_1.insert("parameter", "value"); 202 | expected_1.insert("key", "value2"); 203 | let mut expected_2: HashMap<&str, &str> = HashMap::new(); 204 | expected_2.insert("parameter3", "value3"); 205 | expected_2.insert("key4", "value4"); 206 | let mut expected_h: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); 207 | expected_h.insert("abcd", expected_1); 208 | expected_h.insert("category", expected_2); 209 | assert_eq!(res, Ok((ini_after_parser, expected_h))); 210 | } 211 | -------------------------------------------------------------------------------- /tests/ini_str.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::{is_a, tag, take_till, take_while}, 3 | character::complete::{alphanumeric1 as alphanumeric, char, space0 as space}, 4 | combinator::opt, 5 | multi::many, 6 | sequence::{delimited, pair, terminated}, 7 | IResult, Parser, 8 | }; 9 | 10 | use std::collections::HashMap; 11 | 12 | fn is_line_ending_or_comment(chr: char) -> bool { 13 | chr == ';' || chr == '\n' 14 | } 15 | 16 | fn not_line_ending(i: &str) -> IResult<&str, &str> { 17 | take_while(|c| c != '\r' && c != '\n')(i) 18 | } 19 | 20 | fn space_or_line_ending(i: &str) -> IResult<&str, &str> { 21 | is_a(" \r\n")(i) 22 | } 23 | 24 | fn category(i: &str) -> IResult<&str, &str> { 25 | terminated( 26 | delimited(char('['), take_while(|c| c != ']'), char(']')), 27 | opt(is_a(" \r\n")), 28 | ) 29 | .parse(i) 30 | } 31 | 32 | fn key_value(i: &str) -> IResult<&str, (&str, &str)> { 33 | let (i, key) = alphanumeric(i)?; 34 | let (i, _) = (opt(space), tag("="), opt(space)).parse(i)?; 35 | let (i, val) = take_till(is_line_ending_or_comment)(i)?; 36 | let (i, _) = opt(space).parse(i)?; 37 | let (i, _) = opt(pair(tag(";"), not_line_ending)).parse(i)?; 38 | let (i, _) = opt(space_or_line_ending).parse(i)?; 39 | 40 | Ok((i, (key, val))) 41 | } 42 | 43 | fn keys_and_values_aggregator(i: &str) -> IResult<&str, Vec<(&str, &str)>> { 44 | many(0.., key_value).parse(i) 45 | } 46 | 47 | fn keys_and_values(input: &str) -> IResult<&str, HashMap<&str, &str>> { 48 | match keys_and_values_aggregator(input) { 49 | Ok((i, tuple_vec)) => Ok((i, tuple_vec.into_iter().collect())), 50 | Err(e) => Err(e), 51 | } 52 | } 53 | 54 | fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> { 55 | pair(category, keys_and_values).parse(i) 56 | } 57 | 58 | #[allow(clippy::type_complexity)] 59 | fn categories_aggregator(i: &str) -> IResult<&str, Vec<(&str, HashMap<&str, &str>)>> { 60 | many(0.., category_and_keys).parse(i) 61 | } 62 | 63 | fn categories(input: &str) -> IResult<&str, HashMap<&str, HashMap<&str, &str>>> { 64 | match categories_aggregator(input) { 65 | Ok((i, tuple_vec)) => Ok((i, tuple_vec.into_iter().collect())), 66 | Err(e) => Err(e), 67 | } 68 | } 69 | 70 | #[test] 71 | fn parse_category_test() { 72 | let ini_file = "[category] 73 | 74 | parameter=value 75 | key = value2"; 76 | 77 | let ini_without_category = "parameter=value 78 | key = value2"; 79 | 80 | let res = category(ini_file); 81 | println!("{:?}", res); 82 | match res { 83 | Ok((i, o)) => println!("i: {} | o: {:?}", i, o), 84 | _ => println!("error"), 85 | } 86 | 87 | assert_eq!(res, Ok((ini_without_category, "category"))); 88 | } 89 | 90 | #[test] 91 | fn parse_key_value_test() { 92 | let ini_file = "parameter=value 93 | key = value2"; 94 | 95 | let ini_without_key_value = "key = value2"; 96 | 97 | let res = key_value(ini_file); 98 | println!("{:?}", res); 99 | match res { 100 | Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), 101 | _ => println!("error"), 102 | } 103 | 104 | assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); 105 | } 106 | 107 | #[test] 108 | fn parse_key_value_with_space_test() { 109 | let ini_file = "parameter = value 110 | key = value2"; 111 | 112 | let ini_without_key_value = "key = value2"; 113 | 114 | let res = key_value(ini_file); 115 | println!("{:?}", res); 116 | match res { 117 | Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), 118 | _ => println!("error"), 119 | } 120 | 121 | assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); 122 | } 123 | 124 | #[test] 125 | fn parse_key_value_with_comment_test() { 126 | let ini_file = "parameter=value;abc 127 | key = value2"; 128 | 129 | let ini_without_key_value = "key = value2"; 130 | 131 | let res = key_value(ini_file); 132 | println!("{:?}", res); 133 | match res { 134 | Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), 135 | _ => println!("error"), 136 | } 137 | 138 | assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); 139 | } 140 | 141 | #[test] 142 | fn parse_multiple_keys_and_values_test() { 143 | let ini_file = "parameter=value;abc 144 | 145 | key = value2 146 | 147 | [category]"; 148 | 149 | let ini_without_key_value = "[category]"; 150 | 151 | let res = keys_and_values(ini_file); 152 | println!("{:?}", res); 153 | match res { 154 | Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), 155 | _ => println!("error"), 156 | } 157 | 158 | let mut expected: HashMap<&str, &str> = HashMap::new(); 159 | expected.insert("parameter", "value"); 160 | expected.insert("key", "value2"); 161 | assert_eq!(res, Ok((ini_without_key_value, expected))); 162 | } 163 | 164 | #[test] 165 | fn parse_category_then_multiple_keys_and_values_test() { 166 | //FIXME: there can be an empty line or a comment line after a category 167 | let ini_file = "[abcd] 168 | parameter=value;abc 169 | 170 | key = value2 171 | 172 | [category]"; 173 | 174 | let ini_after_parser = "[category]"; 175 | 176 | let res = category_and_keys(ini_file); 177 | println!("{:?}", res); 178 | match res { 179 | Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), 180 | _ => println!("error"), 181 | } 182 | 183 | let mut expected_h: HashMap<&str, &str> = HashMap::new(); 184 | expected_h.insert("parameter", "value"); 185 | expected_h.insert("key", "value2"); 186 | assert_eq!(res, Ok((ini_after_parser, ("abcd", expected_h)))); 187 | } 188 | 189 | #[test] 190 | fn parse_multiple_categories_test() { 191 | let ini_file = "[abcd] 192 | 193 | parameter=value;abc 194 | 195 | key = value2 196 | 197 | [category] 198 | parameter3=value3 199 | key4 = value4 200 | "; 201 | 202 | let res = categories(ini_file); 203 | //println!("{:?}", res); 204 | match res { 205 | Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), 206 | _ => println!("error"), 207 | } 208 | 209 | let mut expected_1: HashMap<&str, &str> = HashMap::new(); 210 | expected_1.insert("parameter", "value"); 211 | expected_1.insert("key", "value2"); 212 | let mut expected_2: HashMap<&str, &str> = HashMap::new(); 213 | expected_2.insert("parameter3", "value3"); 214 | expected_2.insert("key4", "value4"); 215 | let mut expected_h: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); 216 | expected_h.insert("abcd", expected_1); 217 | expected_h.insert("category", expected_2); 218 | assert_eq!(res, Ok(("", expected_h))); 219 | } 220 | -------------------------------------------------------------------------------- /tests/issues.rs: -------------------------------------------------------------------------------- 1 | //#![feature(trace_macros)] 2 | #![allow(dead_code)] 3 | #![allow(clippy::redundant_closure)] 4 | 5 | use nom::{error::ErrorKind, Err, IResult, Needed, Parser}; 6 | 7 | #[allow(dead_code)] 8 | struct Range { 9 | start: char, 10 | end: char, 11 | } 12 | 13 | pub fn take_char(input: &[u8]) -> IResult<&[u8], char> { 14 | if !input.is_empty() { 15 | Ok((&input[1..], input[0] as char)) 16 | } else { 17 | Err(Err::Incomplete(Needed::new(1))) 18 | } 19 | } 20 | 21 | #[cfg(feature = "std")] 22 | mod parse_int { 23 | use nom::{ 24 | character::streaming::{digit1 as digit, space1 as space}, 25 | combinator::{complete, map, opt}, 26 | multi::many, 27 | IResult, 28 | }; 29 | use nom::{HexDisplay, Parser}; 30 | use std::str; 31 | 32 | fn parse_ints(input: &[u8]) -> IResult<&[u8], Vec> { 33 | many(0.., spaces_or_int).parse(input) 34 | } 35 | 36 | fn spaces_or_int(input: &[u8]) -> IResult<&[u8], i32> { 37 | println!("{}", input.to_hex(8)); 38 | let (i, _) = opt(complete(space)).parse(input)?; 39 | let (i, res) = map(complete(digit), |x| { 40 | println!("x: {:?}", x); 41 | let result = str::from_utf8(x).unwrap(); 42 | println!("Result: {}", result); 43 | println!("int is empty?: {}", x.is_empty()); 44 | match result.parse() { 45 | Ok(i) => i, 46 | Err(e) => panic!("UH OH! NOT A DIGIT! {:?}", e), 47 | } 48 | }) 49 | .parse(i)?; 50 | 51 | Ok((i, res)) 52 | } 53 | 54 | #[test] 55 | fn issue_142() { 56 | let subject = parse_ints(&b"12 34 5689a"[..]); 57 | let expected = Ok((&b"a"[..], vec![12, 34, 5689])); 58 | assert_eq!(subject, expected); 59 | 60 | let subject = parse_ints(&b"12 34 5689 "[..]); 61 | let expected = Ok((&b" "[..], vec![12, 34, 5689])); 62 | assert_eq!(subject, expected) 63 | } 64 | } 65 | 66 | #[test] 67 | fn usize_length_bytes_issue() { 68 | use nom::multi::length_data; 69 | use nom::number::streaming::be_u16; 70 | let _: IResult<&[u8], &[u8], (&[u8], ErrorKind)> = length_data(be_u16).parse(b"012346"); 71 | } 72 | 73 | #[test] 74 | fn take_till_issue() { 75 | use nom::bytes::streaming::take_till; 76 | 77 | fn nothing(i: &[u8]) -> IResult<&[u8], &[u8]> { 78 | take_till(|_| true)(i) 79 | } 80 | 81 | assert_eq!(nothing(b""), Err(Err::Incomplete(Needed::new(1)))); 82 | assert_eq!(nothing(b"abc"), Ok((&b"abc"[..], &b""[..]))); 83 | } 84 | 85 | #[test] 86 | fn issue_655() { 87 | use nom::character::streaming::{line_ending, not_line_ending}; 88 | fn twolines(i: &str) -> IResult<&str, (&str, &str)> { 89 | let (i, l1) = not_line_ending(i)?; 90 | let (i, _) = line_ending(i)?; 91 | let (i, l2) = not_line_ending(i)?; 92 | let (i, _) = line_ending(i)?; 93 | 94 | Ok((i, (l1, l2))) 95 | } 96 | 97 | assert_eq!(twolines("foo\nbar\n"), Ok(("", ("foo", "bar")))); 98 | assert_eq!(twolines("féo\nbar\n"), Ok(("", ("féo", "bar")))); 99 | assert_eq!(twolines("foé\nbar\n"), Ok(("", ("foé", "bar")))); 100 | assert_eq!(twolines("foé\r\nbar\n"), Ok(("", ("foé", "bar")))); 101 | } 102 | 103 | #[cfg(feature = "alloc")] 104 | fn issue_717(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 105 | use nom::bytes::complete::{is_not, tag}; 106 | use nom::multi::separated_list0; 107 | 108 | separated_list0(tag(&[0x0][..]), is_not([0x0u8])).parse(i) 109 | } 110 | 111 | mod issue_647 { 112 | use nom::bytes::streaming::tag; 113 | use nom::combinator::complete; 114 | use nom::multi::separated_list0; 115 | use nom::Parser; 116 | use nom::{error::Error, number::streaming::be_f64, Err, IResult}; 117 | pub type Input<'a> = &'a [u8]; 118 | 119 | #[derive(PartialEq, Debug, Clone)] 120 | struct Data { 121 | c: f64, 122 | v: Vec, 123 | } 124 | 125 | #[allow(clippy::type_complexity)] 126 | fn list<'a>( 127 | input: Input<'a>, 128 | _cs: &'_ f64, 129 | ) -> Result<(Input<'a>, Vec), Err>> { 130 | separated_list0(complete(tag(",")), complete(be_f64)).parse(input) 131 | } 132 | 133 | fn data(input: Input<'_>) -> IResult, Data> { 134 | let (i, c) = be_f64(input)?; 135 | let (i, _) = tag("\n")(i)?; 136 | let (i, v) = list(i, &c)?; 137 | Ok((i, Data { c, v })) 138 | } 139 | } 140 | 141 | #[test] 142 | fn issue_848_overflow_incomplete_bits_to_bytes() { 143 | fn take(i: &[u8]) -> IResult<&[u8], &[u8]> { 144 | use nom::bytes::streaming::take; 145 | take(0x2000000000000000_usize)(i) 146 | } 147 | fn parser(i: &[u8]) -> IResult<&[u8], &[u8]> { 148 | use nom::bits::{bits, bytes}; 149 | 150 | bits(bytes(take))(i) 151 | } 152 | assert_eq!( 153 | parser(&b""[..]), 154 | Err(Err::Failure(nom::error_position!( 155 | &b""[..], 156 | ErrorKind::TooLarge 157 | ))) 158 | ); 159 | } 160 | 161 | #[test] 162 | fn issue_942() { 163 | use nom::error::{ContextError, ParseError}; 164 | pub fn parser<'a, E: ParseError<&'a str> + ContextError<&'a str>>( 165 | i: &'a str, 166 | ) -> IResult<&'a str, usize, E> { 167 | use nom::{character::complete::char, error::context, multi::many0_count}; 168 | many0_count(context("char_a", char('a'))).parse(i) 169 | } 170 | assert_eq!(parser::<()>("aaa"), Ok(("", 3))); 171 | } 172 | 173 | #[test] 174 | fn issue_many_m_n_with_zeros() { 175 | use nom::character::complete::char; 176 | use nom::multi::many; 177 | let mut parser = many::<_, (), Vec, _, _>(0..=0, char('a')); 178 | assert_eq!(parser.parse("aaa"), Ok(("aaa", vec!()))); 179 | } 180 | 181 | #[test] 182 | fn issue_1231_bits_expect_fn_closure() { 183 | use nom::bits::{bits, complete::take}; 184 | use nom::error::Error; 185 | pub fn example(input: &[u8]) -> IResult<&[u8], (u8, u8)> { 186 | bits::<_, _, Error<_>, _, _>((take(1usize), take(1usize)))(input) 187 | } 188 | assert_eq!(example(&[0xff]), Ok((&b""[..], (1, 1)))); 189 | } 190 | 191 | #[test] 192 | fn issue_1282_findtoken_char() { 193 | use nom::character::complete::one_of; 194 | use nom::error::Error; 195 | let mut parser = one_of::<_, _, Error<_>>(&['a', 'b', 'c'][..]); 196 | assert_eq!(parser("aaa"), Ok(("aa", 'a'))); 197 | } 198 | 199 | #[test] 200 | fn issue_x_looser_fill_bounds() { 201 | use nom::{ 202 | bytes::streaming::tag, character::streaming::digit1, error_position, multi::fill, 203 | sequence::terminated, 204 | }; 205 | 206 | fn fill_pair(i: &[u8]) -> IResult<&[u8], [&[u8]; 2]> { 207 | let mut buf = [&[][..], &[][..]]; 208 | let (i, _) = fill(terminated(digit1, tag(",")), &mut buf).parse(i)?; 209 | Ok((i, buf)) 210 | } 211 | 212 | assert_eq!( 213 | fill_pair(b"123,456,"), 214 | Ok((&b""[..], [&b"123"[..], &b"456"[..]])) 215 | ); 216 | assert_eq!( 217 | fill_pair(b"123,456,789"), 218 | Ok((&b"789"[..], [&b"123"[..], &b"456"[..]])) 219 | ); 220 | assert_eq!( 221 | fill_pair(b"123,,"), 222 | Err(Err::Error(error_position!(&b","[..], ErrorKind::Digit))) 223 | ); 224 | } 225 | 226 | #[test] 227 | fn issue_1459_clamp_capacity() { 228 | use nom::character::complete::char; 229 | 230 | // shouldn't panic 231 | use nom::multi::many_m_n; 232 | let mut parser = many_m_n::<_, (), _>(usize::MAX, usize::MAX, char('a')); 233 | assert_eq!(parser.parse("a"), Err(nom::Err::Error(()))); 234 | 235 | // shouldn't panic 236 | use nom::multi::count; 237 | let mut parser = count(char('a'), usize::MAX); 238 | assert_eq!(parser.parse("a"), Err(nom::Err::Error(()))); 239 | } 240 | 241 | #[test] 242 | fn issue_1617_count_parser_returning_zero_size() { 243 | use nom::{bytes::complete::tag, combinator::map, error::Error, multi::count}; 244 | 245 | // previously, `count()` panicked if the parser had type `O = ()` 246 | let parser = map(tag::<_, _, Error<&str>>("abc"), |_| ()); 247 | // shouldn't panic 248 | let result = count(parser, 3) 249 | .parse("abcabcabcdef") 250 | .expect("parsing should succeed"); 251 | assert_eq!(result, ("def", vec![(), (), ()])); 252 | } 253 | 254 | #[test] 255 | fn issue_1586_parser_iterator_impl() { 256 | use nom::{ 257 | character::complete::{digit1, newline}, 258 | combinator::{iterator, opt}, 259 | sequence::terminated, 260 | IResult, 261 | }; 262 | fn parse_line(i: &str) -> IResult<&str, &str> { 263 | terminated(digit1, opt(newline)).parse(i) 264 | } 265 | 266 | fn parse_input(i: &str) -> impl Iterator + '_ { 267 | iterator(i, parse_line).map(|x| x.parse::().unwrap()) 268 | } 269 | 270 | assert_eq!(parse_input("123\n456").collect::>(), vec![123, 456]); 271 | } 272 | -------------------------------------------------------------------------------- /tests/json.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "alloc")] 2 | 3 | use nom::{ 4 | branch::alt, 5 | bytes::complete::{tag, take}, 6 | character::complete::{anychar, char, multispace0, none_of}, 7 | combinator::{map, map_opt, map_res, value, verify}, 8 | error::ParseError, 9 | multi::{fold, separated_list0}, 10 | number::complete::double, 11 | sequence::{delimited, preceded, separated_pair}, 12 | IResult, Parser, 13 | }; 14 | 15 | use std::collections::HashMap; 16 | 17 | #[derive(Debug, PartialEq, Clone)] 18 | pub enum JsonValue { 19 | Null, 20 | Bool(bool), 21 | Str(String), 22 | Num(f64), 23 | Array(Vec), 24 | Object(HashMap), 25 | } 26 | 27 | fn boolean(input: &str) -> IResult<&str, bool> { 28 | alt((value(false, tag("false")), value(true, tag("true")))).parse(input) 29 | } 30 | 31 | fn u16_hex(input: &str) -> IResult<&str, u16> { 32 | map_res(take(4usize), |s| u16::from_str_radix(s, 16)).parse(input) 33 | } 34 | 35 | fn unicode_escape(input: &str) -> IResult<&str, char> { 36 | map_opt( 37 | alt(( 38 | // Not a surrogate 39 | map(verify(u16_hex, |cp| !(0xD800..0xE000).contains(cp)), |cp| { 40 | cp as u32 41 | }), 42 | // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details 43 | map( 44 | verify( 45 | separated_pair(u16_hex, tag("\\u"), u16_hex), 46 | |(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low), 47 | ), 48 | |(high, low)| { 49 | let high_ten = (high as u32) - 0xD800; 50 | let low_ten = (low as u32) - 0xDC00; 51 | (high_ten << 10) + low_ten + 0x10000 52 | }, 53 | ), 54 | )), 55 | // Could be probably replaced with .unwrap() or _unchecked due to the verify checks 56 | std::char::from_u32, 57 | ) 58 | .parse(input) 59 | } 60 | 61 | fn character(input: &str) -> IResult<&str, char> { 62 | let (input, c) = none_of("\"")(input)?; 63 | if c == '\\' { 64 | alt(( 65 | map_res(anychar, |c| { 66 | Ok(match c { 67 | '"' | '\\' | '/' => c, 68 | 'b' => '\x08', 69 | 'f' => '\x0C', 70 | 'n' => '\n', 71 | 'r' => '\r', 72 | 't' => '\t', 73 | _ => return Err(()), 74 | }) 75 | }), 76 | preceded(char('u'), unicode_escape), 77 | )) 78 | .parse(input) 79 | } else { 80 | Ok((input, c)) 81 | } 82 | } 83 | 84 | fn string(input: &str) -> IResult<&str, String> { 85 | delimited( 86 | char('"'), 87 | fold(0.., character, String::new, |mut string, c| { 88 | string.push(c); 89 | string 90 | }), 91 | char('"'), 92 | ) 93 | .parse(input) 94 | } 95 | 96 | fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, Output = O, Error = E>>( 97 | f: F, 98 | ) -> impl Parser<&'a str, Output = O, Error = E> { 99 | delimited(multispace0, f, multispace0) 100 | } 101 | 102 | fn array(input: &str) -> IResult<&str, Vec> { 103 | delimited( 104 | char('['), 105 | ws(separated_list0(ws(char(',')), json_value)), 106 | char(']'), 107 | ) 108 | .parse(input) 109 | } 110 | 111 | fn object(input: &str) -> IResult<&str, HashMap> { 112 | map( 113 | delimited( 114 | char('{'), 115 | ws(separated_list0( 116 | ws(char(',')), 117 | separated_pair(string, ws(char(':')), json_value), 118 | )), 119 | char('}'), 120 | ), 121 | |key_values| key_values.into_iter().collect(), 122 | ) 123 | .parse(input) 124 | } 125 | 126 | fn json_value(input: &str) -> IResult<&str, JsonValue> { 127 | use JsonValue::*; 128 | 129 | alt(( 130 | value(Null, tag("null")), 131 | map(boolean, Bool), 132 | map(string, Str), 133 | map(double, Num), 134 | map(array, Array), 135 | map(object, Object), 136 | )) 137 | .parse(input) 138 | } 139 | 140 | fn json(input: &str) -> IResult<&str, JsonValue> { 141 | ws(json_value).parse(input) 142 | } 143 | 144 | #[test] 145 | fn json_string() { 146 | assert_eq!(string("\"\""), Ok(("", "".to_string()))); 147 | assert_eq!(string("\"abc\""), Ok(("", "abc".to_string()))); 148 | assert_eq!( 149 | string("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""), 150 | Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())), 151 | ); 152 | assert_eq!(string("\"\\uD83D\\uDE10\""), Ok(("", "😐".to_string()))); 153 | 154 | assert!(string("\"").is_err()); 155 | assert!(string("\"abc").is_err()); 156 | assert!(string("\"\\\"").is_err()); 157 | assert!(string("\"\\u123\"").is_err()); 158 | assert!(string("\"\\uD800\"").is_err()); 159 | assert!(string("\"\\uD800\\uD800\"").is_err()); 160 | assert!(string("\"\\uDC00\"").is_err()); 161 | } 162 | 163 | #[test] 164 | fn json_object() { 165 | use JsonValue::*; 166 | 167 | let input = r#"{"a":42,"b":"x"}"#; 168 | 169 | let expected = Object( 170 | vec![ 171 | ("a".to_string(), Num(42.0)), 172 | ("b".to_string(), Str("x".to_string())), 173 | ] 174 | .into_iter() 175 | .collect(), 176 | ); 177 | 178 | assert_eq!(json(input), Ok(("", expected))); 179 | } 180 | 181 | #[test] 182 | fn json_array() { 183 | use JsonValue::*; 184 | 185 | let input = r#"[42,"x"]"#; 186 | 187 | let expected = Array(vec![Num(42.0), Str("x".to_string())]); 188 | 189 | assert_eq!(json(input), Ok(("", expected))); 190 | } 191 | 192 | #[test] 193 | fn json_whitespace() { 194 | use JsonValue::*; 195 | 196 | let input = r#" 197 | { 198 | "null" : null, 199 | "true" :true , 200 | "false": false , 201 | "number" : 123e4 , 202 | "string" : " abc 123 " , 203 | "array" : [ false , 1 , "two" ] , 204 | "object" : { "a" : 1.0 , "b" : "c" } , 205 | "empty_array" : [ ] , 206 | "empty_object" : { } 207 | } 208 | "#; 209 | 210 | assert_eq!( 211 | json(input), 212 | Ok(( 213 | "", 214 | Object( 215 | vec![ 216 | ("null".to_string(), Null), 217 | ("true".to_string(), Bool(true)), 218 | ("false".to_string(), Bool(false)), 219 | ("number".to_string(), Num(123e4)), 220 | ("string".to_string(), Str(" abc 123 ".to_string())), 221 | ( 222 | "array".to_string(), 223 | Array(vec![Bool(false), Num(1.0), Str("two".to_string())]) 224 | ), 225 | ( 226 | "object".to_string(), 227 | Object( 228 | vec![ 229 | ("a".to_string(), Num(1.0)), 230 | ("b".to_string(), Str("c".to_string())), 231 | ] 232 | .into_iter() 233 | .collect() 234 | ) 235 | ), 236 | ("empty_array".to_string(), Array(vec![]),), 237 | ("empty_object".to_string(), Object(HashMap::new()),), 238 | ] 239 | .into_iter() 240 | .collect() 241 | ) 242 | )) 243 | ); 244 | } 245 | -------------------------------------------------------------------------------- /tests/multiline.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | character::complete::{alphanumeric1 as alphanumeric, line_ending as eol}, 3 | multi::many, 4 | sequence::terminated, 5 | IResult, Parser, 6 | }; 7 | 8 | pub fn end_of_line(input: &str) -> IResult<&str, &str> { 9 | if input.is_empty() { 10 | Ok((input, input)) 11 | } else { 12 | eol(input) 13 | } 14 | } 15 | 16 | pub fn read_line(input: &str) -> IResult<&str, &str> { 17 | terminated(alphanumeric, end_of_line).parse(input) 18 | } 19 | 20 | pub fn read_lines(input: &str) -> IResult<&str, Vec<&str>> { 21 | many(0.., read_line).parse(input) 22 | } 23 | 24 | #[cfg(feature = "alloc")] 25 | #[test] 26 | fn read_lines_test() { 27 | let res = Ok(("", vec!["Duck", "Dog", "Cow"])); 28 | 29 | assert_eq!(read_lines("Duck\nDog\nCow\n"), res); 30 | assert_eq!(read_lines("Duck\nDog\nCow"), res); 31 | } 32 | -------------------------------------------------------------------------------- /tests/overflow.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::unreadable_literal)] 2 | #![cfg(target_pointer_width = "64")] 3 | 4 | use nom::bytes::streaming::take; 5 | #[cfg(feature = "alloc")] 6 | use nom::multi::{length_data, many}; 7 | #[cfg(feature = "alloc")] 8 | use nom::number::streaming::be_u64; 9 | use nom::{Err, IResult, Needed, Parser}; 10 | 11 | // Parser definition 12 | 13 | // We request a length that would trigger an overflow if computing consumed + requested 14 | fn parser02(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { 15 | (take(1_usize), take(18446744073709551615_usize)).parse(i) 16 | } 17 | 18 | #[test] 19 | fn overflow_incomplete_tuple() { 20 | assert_eq!( 21 | parser02(&b"3"[..]), 22 | Err(Err::Incomplete(Needed::new(18446744073709551615))) 23 | ); 24 | } 25 | 26 | #[test] 27 | #[cfg(feature = "alloc")] 28 | fn overflow_incomplete_length_bytes() { 29 | fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 30 | many(0.., length_data(be_u64)).parse(i) 31 | } 32 | 33 | // Trigger an overflow in length_data 34 | assert_eq!( 35 | multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xff"[..]), 36 | Err(Err::Incomplete(Needed::new(18446744073709551615))) 37 | ); 38 | } 39 | 40 | #[test] 41 | #[cfg(feature = "alloc")] 42 | fn overflow_incomplete_many0() { 43 | fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 44 | many(0.., length_data(be_u64)).parse(i) 45 | } 46 | 47 | // Trigger an overflow in many0 48 | assert_eq!( 49 | multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), 50 | Err(Err::Incomplete(Needed::new(18446744073709551599))) 51 | ); 52 | } 53 | 54 | #[test] 55 | #[cfg(feature = "alloc")] 56 | fn overflow_incomplete_many1() { 57 | fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 58 | many(1.., length_data(be_u64)).parse(i) 59 | } 60 | 61 | // Trigger an overflow in many1 62 | assert_eq!( 63 | multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), 64 | Err(Err::Incomplete(Needed::new(18446744073709551599))) 65 | ); 66 | } 67 | 68 | #[test] 69 | #[cfg(feature = "alloc")] 70 | fn overflow_incomplete_many_till() { 71 | use nom::{bytes::complete::tag, multi::many_till}; 72 | 73 | #[allow(clippy::type_complexity)] 74 | fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { 75 | many_till(length_data(be_u64), tag("abc")).parse(i) 76 | } 77 | 78 | // Trigger an overflow in many_till 79 | assert_eq!( 80 | multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), 81 | Err(Err::Incomplete(Needed::new(18446744073709551599))) 82 | ); 83 | } 84 | 85 | #[test] 86 | #[cfg(feature = "alloc")] 87 | fn overflow_incomplete_many_m_n() { 88 | fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 89 | many(2..=4, length_data(be_u64)).parse(i) 90 | } 91 | 92 | // Trigger an overflow in many_m_n 93 | assert_eq!( 94 | multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), 95 | Err(Err::Incomplete(Needed::new(18446744073709551599))) 96 | ); 97 | } 98 | 99 | #[test] 100 | #[cfg(feature = "alloc")] 101 | fn overflow_incomplete_count() { 102 | use nom::multi::count; 103 | 104 | fn counter(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 105 | count(length_data(be_u64), 2).parse(i) 106 | } 107 | 108 | assert_eq!( 109 | counter(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), 110 | Err(Err::Incomplete(Needed::new(18446744073709551599))) 111 | ); 112 | } 113 | 114 | #[test] 115 | #[cfg(feature = "alloc")] 116 | fn overflow_incomplete_length_count() { 117 | use nom::multi::length_count; 118 | use nom::number::streaming::be_u8; 119 | 120 | fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 121 | length_count(be_u8, length_data(be_u64)).parse(i) 122 | } 123 | 124 | assert_eq!( 125 | multi(&b"\x04\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xee"[..]), 126 | Err(Err::Incomplete(Needed::new(18446744073709551598))) 127 | ); 128 | } 129 | 130 | #[test] 131 | #[cfg(feature = "alloc")] 132 | fn overflow_incomplete_length_data() { 133 | fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { 134 | many(0.., length_data(be_u64)).parse(i) 135 | } 136 | 137 | assert_eq!( 138 | multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xff"[..]), 139 | Err(Err::Incomplete(Needed::new(18446744073709551615))) 140 | ); 141 | } 142 | -------------------------------------------------------------------------------- /tests/reborrow_fold.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | // #![allow(unused_variables)] 3 | 4 | use std::str; 5 | 6 | use nom::bytes::complete::is_not; 7 | use nom::character::complete::char; 8 | use nom::combinator::{map, map_res}; 9 | use nom::multi::fold; 10 | use nom::sequence::delimited; 11 | use nom::{IResult, Parser}; 12 | 13 | fn atom(_tomb: &mut ()) -> impl for<'a> FnMut(&'a [u8]) -> IResult<&'a [u8], String> { 14 | move |input| { 15 | map( 16 | map_res(is_not(" \t\r\n"), str::from_utf8), 17 | ToString::to_string, 18 | ) 19 | .parse(input) 20 | } 21 | } 22 | 23 | // FIXME: should we support the use case of borrowing data mutably in a parser? 24 | fn list<'a>(i: &'a [u8], tomb: &mut ()) -> IResult<&'a [u8], String> { 25 | delimited( 26 | char('('), 27 | fold(0.., atom(tomb), String::new, |acc: String, next: String| { 28 | acc + next.as_str() 29 | }), 30 | char(')'), 31 | ) 32 | .parse(i) 33 | } 34 | --------------------------------------------------------------------------------