├── .github └── workflows │ └── rust.yml ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches ├── input_html_elements.txt ├── input_word_100.txt └── match.rs ├── figures ├── graph.dot └── graph.svg ├── src ├── lib.rs └── trie.rs └── tests └── tests.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | push: 7 | branches: 8 | - main 9 | tags: 10 | - 'v*.*.*' 11 | pull_request: 12 | branches: 13 | - main 14 | 15 | env: 16 | CARGO_TERM_COLOR: always 17 | 18 | jobs: 19 | build: 20 | name: Check on ${{ matrix.rust }} 21 | runs-on: ubuntu-latest 22 | strategy: 23 | matrix: 24 | rust: 25 | - 1.70.0 # MSRV 26 | - stable 27 | - nightly 28 | steps: 29 | - uses: actions/checkout@v3 30 | - name: Install ${{ matrix.rust }} 31 | run: | 32 | rustup toolchain install ${{ matrix.rust }} --profile minimal --component rustfmt,clippy 33 | rustup default ${{ matrix.rust }} 34 | 35 | - name: Run cargo check 36 | continue-on-error: ${{ matrix.rust == 'nightly' }} 37 | run: cargo check 38 | 39 | - name: Run cargo fmt 40 | continue-on-error: ${{ matrix.rust == 'nightly' }} 41 | run: cargo fmt -- --check 42 | 43 | - name: Run cargo clippy 44 | # Run clippy only on stable to ignore unreasonable old warnings. 45 | continue-on-error: ${{ matrix.rust != 'stable' }} 46 | run: cargo clippy -- -D warnings -W clippy::nursery -W clippy::pedantic 47 | 48 | - name: Run cargo test 49 | run: cargo test 50 | 51 | - name: Run cargo test all features 52 | if: ${{ matrix.rust == 'nightly' }} 53 | run: cargo test --all-features 54 | 55 | - name: Run cargo doc 56 | if: ${{ matrix.rust == 'nightly' }} 57 | run: cargo doc --release --all-features 58 | env: 59 | RUSTDOCFLAGS: "-Dwarnings" 60 | 61 | publish: 62 | name: Publish 63 | runs-on: ubuntu-latest 64 | if: "startsWith(github.ref, 'refs/tags/')" 65 | needs: [ build ] 66 | steps: 67 | - uses: actions/checkout@v3 68 | - name: Install latest stable 69 | run: | 70 | rustup toolchain install stable --profile minimal 71 | rustup default stable 72 | 73 | - name: Run cargo publish 74 | run: cargo publish 75 | env: 76 | CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_TOKEN }} 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | We'd love to accept your patches and contributions to this project. 4 | There are just a few small guidelines you need to follow. 5 | 6 | - Unless you explicitly state otherwise, any contribution intentionally 7 | submitted for inclusion in work by you shall be dual licensed under MIT 8 | license or Apache-2.0 license without any additional terms or conditions. 9 | - All submissions, including submissions by project members, require review. 10 | We use GitHub pull requests for this purpose. 11 | Consult GitHub Help for more information on using pull requests. 12 | 13 | We require all contributors to follow the [daac-tools Code of 14 | Conduct](https://github.com/daac-tools/guidelines/blob/main/conduct.md). 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "trie-match" 3 | version = "0.2.0" 4 | edition = "2021" 5 | 6 | # Update README and CI settings, accordingly. 7 | rust-version = "1.70" 8 | 9 | authors = [ 10 | "Koichi Akabe ", 11 | ] 12 | description = "Fast match macro" 13 | license = "MIT OR Apache-2.0" 14 | homepage = "https://github.com/daac-tools/trie-match" 15 | repository = "https://github.com/daac-tools/trie-match" 16 | readme = "README.md" 17 | keywords = ["match", "text", "double-array", "macro"] 18 | categories = ["text-processing", "algorithms", "data-structures", "no-std::no-alloc"] 19 | exclude = [".*"] 20 | 21 | [lib] 22 | proc-macro = true 23 | 24 | [dependencies] 25 | syn = { version = "2.0", features = ["full", "extra-traits"] } # MIT or Apache-2.0 26 | proc-macro2 = "1.0" # MIT or Apache-2.0 27 | quote = "1.0" # MIT or Apache-2.0 28 | 29 | [features] 30 | # Nightly only 31 | cfg_attribute = [] 32 | 33 | [dev-dependencies] 34 | criterion = { version = "0.4.0", features = ["html_reports"] } # MIT or Apache-2.0 35 | phf = { version = "0.11", default-features = false, features = ["macros"] } 36 | 37 | [[bench]] 38 | name = "match" 39 | harness = false 40 | 41 | [package.metadata.docs.rs] 42 | all-features = true 43 | rustdoc-args = ["--cfg", "docsrs"] 44 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `trie_match! {}` 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/trie-match)](https://crates.io/crates/trie-match) 4 | [![Documentation](https://docs.rs/trie-match/badge.svg)](https://docs.rs/trie-match) 5 | [![Rust](https://img.shields.io/badge/rust-1.70%2B-blue.svg?maxAge=3600)](https://github.com/daac-tools/trie-match) 6 | [![Build Status](https://github.com/daac-tools/trie-match/actions/workflows/rust.yml/badge.svg)](https://github.com/daac-tools/trie-match/actions) 7 | [![Slack](https://img.shields.io/badge/join-chat-brightgreen?logo=slack)](https://join.slack.com/t/daac-tools/shared_invite/zt-1pwwqbcz4-KxL95Nam9VinpPlzUpEGyA) 8 | 9 | This macro speeds up Rust's `match` expression for comparing strings by using a 10 | compact double-array data structure. 11 | 12 | ## Usage 13 | 14 | Simply wrap the existing `match` expression with the `trie_match! {}` macro as 15 | follows: 16 | 17 | ```rust 18 | use trie_match::trie_match; 19 | 20 | let x = "abd"; 21 | 22 | let result = trie_match! { 23 | match x { 24 | "a" => 0, 25 | "abc" => 1, 26 | pat @ ("abd" | "bcc") => pat.bytes()[0], 27 | "bc" => 3, 28 | _ => 4, 29 | } 30 | }; 31 | 32 | assert_eq!(result, 3); 33 | ``` 34 | 35 | ## Why is it faster? 36 | 37 | In a normal `match` expression, the string is compared for each pattern. It is 38 | equivalent to the following code: 39 | 40 | ```rust 41 | if x == "a" { 42 | 0 43 | } else if x == "abc" { 44 | 1 45 | } else if x == "abd" || x == "bcc" { 46 | x.bytes()[0] 47 | } else if x == "bc" { 48 | 3 49 | } else { 50 | 4 51 | } 52 | ``` 53 | 54 | The above code requires that string comparisons be made from the beginning of 55 | the string each time. The time complexity of the above code is *O(mn)*, where 56 | *m* is the average pattern length, and *n* is the number of patterns. 57 | 58 | In contrast, this macro builds the following trie structure to retrieve the 59 | index of the matched arm: 60 | 61 | ![Trie](figures/graph.svg) 62 | 63 | Furthermore, this implementation uses the compact double-array data structure 64 | to achieve efficient state-to-state traversal, and the time complexity becomes 65 | *O(m)*. 66 | 67 | ## `cfg` attribute 68 | 69 | Only when using Nightly Rust, this macro supports conditional compilation with 70 | the `cfg` attribute. To use this feature, enable `features = ["cfg_attribute"]` 71 | in your `Cargo.toml`. 72 | 73 | ### Example 74 | 75 | ```rust 76 | trie_match! { 77 | match x { 78 | #[cfg(feature = "foo")] 79 | "a" => { .. } 80 | #[cfg(feature = "bar")] 81 | "b" => { .. } 82 | _ => { .. } 83 | } 84 | } 85 | ``` 86 | 87 | ## Limitations 88 | 89 | The followings are different from the normal `match` expression: 90 | 91 | * Only supports strings, byte strings, and u8 slices as patterns. 92 | * The wildcard is evaluated last. (The normal `match` expression does not 93 | match patterns after the wildcard.) 94 | * Guards are unavailable. 95 | 96 | Sometimes the normal `match` expression is faster, depending on how 97 | optimization is performed, so it is better to choose based on your speed 98 | experiments. 99 | 100 | ## Benchmark 101 | 102 | Run the following command: 103 | 104 | ``` 105 | cargo bench 106 | ``` 107 | 108 | Experimental results are as follows [μs]: 109 | 110 | * AMD Ryzen 7 5700U with Radeon Graphics 111 | 112 | | Bench name | Normal match | *phf* crate | *trie-match* crate | 113 | |----------------------|--------------|-------------|--------------------| 114 | | 100 words random | 1.94 | 2.02 | **1.09** | 115 | | HTML elements random | 2.32 | 2.43 | **0.55** | 116 | 117 | * 12th Gen Intel(R) Core(TM) i7-1270P 118 | 119 | | Bench name | Normal match | *phf* crate | *trie-match* crate | 120 | |----------------------|--------------|-------------|--------------------| 121 | | 100 words random | 1.13 | 1.29 | **0.61** | 122 | | HTML elements random | 1.24 | 1.51 | **0.36** | 123 | 124 | [phf crate](https://github.com/rust-phf/rust-phf): Compile time static maps 125 | using perfect hash functions. 126 | 127 | ## License 128 | 129 | Licensed under either of 130 | 131 | * Apache License, Version 2.0 132 | ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 133 | * MIT license 134 | ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 135 | 136 | at your option. 137 | 138 | ## Contribution 139 | 140 | See [the guidelines](./CONTRIBUTING.md). 141 | -------------------------------------------------------------------------------- /benches/input_html_elements.txt: -------------------------------------------------------------------------------- 1 | tt 2 | optgroup 3 | p 4 | map 5 | figcaption 6 | portal 7 | table 8 | strike 9 | colgroup 10 | style 11 | meter 12 | option 13 | dd 14 | img 15 | bdo 16 | samp 17 | track 18 | u 19 | span 20 | plaintext 21 | article 22 | center 23 | frameset 24 | cite 25 | datalist 26 | big 27 | select 28 | caption 29 | dialog 30 | section 31 | q 32 | base 33 | summary 34 | object 35 | td 36 | input 37 | tbody 38 | wbr 39 | dfn 40 | image 41 | figure 42 | output 43 | head 44 | iframe 45 | tfoot 46 | acronym 47 | ins 48 | br 49 | del 50 | rt 51 | noembed 52 | menu 53 | search 54 | main 55 | small 56 | param 57 | tr 58 | template 59 | ol 60 | kbd 61 | strong 62 | h1 63 | slot 64 | ul 65 | button 66 | video 67 | xmp 68 | th 69 | aside 70 | font 71 | rp 72 | data 73 | dt 74 | abbr 75 | pre 76 | audio 77 | fieldset 78 | source 79 | link 80 | nav 81 | meta 82 | blockquote 83 | picture 84 | form 85 | bdi 86 | a 87 | textarea 88 | s 89 | canvas 90 | mark 91 | menuitem 92 | li 93 | noscript 94 | code 95 | area 96 | rb 97 | b 98 | details 99 | label 100 | progress 101 | sup 102 | title 103 | html 104 | em 105 | i 106 | embed 107 | nobr 108 | div 109 | time 110 | rtc 111 | frame 112 | dl 113 | address 114 | noframes 115 | footer 116 | sub 117 | var 118 | col 119 | dir 120 | header 121 | legend 122 | hgroup 123 | marquee 124 | hr 125 | thead 126 | body 127 | script 128 | ruby 129 | -------------------------------------------------------------------------------- /benches/input_word_100.txt: -------------------------------------------------------------------------------- 1 | stampeding 2 | commendable 3 | adrenaline 4 | exobiology 5 | indifference 6 | avuncular 7 | prevailed 8 | foreparts 9 | legalistically 10 | intermarries 11 | desideratum 12 | evaluating 13 | lavishing 14 | attractable 15 | philippics 16 | antiabortionist 17 | lascivious 18 | breathable 19 | histogram 20 | rattlings 21 | interdict 22 | summarized 23 | relieving 24 | congresspeople 25 | fitfulness 26 | percolation 27 | upperclasswoman 28 | epistemic 29 | Chantilly 30 | stonemasons 31 | nonferrous 32 | emulsions 33 | charitably 34 | barracudas 35 | integrity 36 | knockdowns 37 | roadworks 38 | abortionists 39 | Salvadoran 40 | chanceries 41 | misinform 42 | caretaker 43 | extricated 44 | mandolins 45 | steeliest 46 | transpiration 47 | weirdness 48 | audiologists 49 | baronetcies 50 | performing 51 | publishing 52 | suspending 53 | dermatological 54 | contemplate 55 | spiritless 56 | nightwatchman 57 | paradisaical 58 | implicating 59 | timpanists 60 | Leavenworth 61 | amorality 62 | strangulated 63 | cellophane 64 | waterboard 65 | astrophysicists 66 | aerospace 67 | passphrase 68 | engendered 69 | spotlighting 70 | misapplication 71 | barterers 72 | poetesses 73 | dollhouse 74 | laparoscopic 75 | Dubrovnik 76 | rerecords 77 | shielding 78 | orthographically 79 | thicknesses 80 | Bendictus 81 | congealed 82 | cooperative 83 | encompass 84 | grouching 85 | shipowners 86 | jealously 87 | generational 88 | antecedents 89 | persecutes 90 | exemplified 91 | admirable 92 | squeakiest 93 | absconding 94 | extirpated 95 | exoskeletons 96 | earthworms 97 | chaotically 98 | shipbuilder 99 | equidistantly 100 | overprint 101 | -------------------------------------------------------------------------------- /benches/match.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{BufRead, BufReader}; 3 | use std::path::Path; 4 | use std::time::Duration; 5 | 6 | use criterion::{criterion_group, criterion_main, Criterion, SamplingMode}; 7 | use phf::phf_map; 8 | use trie_match::trie_match; 9 | 10 | fn load_input(path: impl AsRef) -> Vec { 11 | let mut result = vec![]; 12 | let file = BufReader::new(File::open(path.as_ref()).unwrap()); 13 | for line in file.lines() { 14 | result.push(line.unwrap()); 15 | } 16 | result 17 | } 18 | 19 | fn criterion_word100(c: &mut Criterion) { 20 | let mut group = c.benchmark_group("words_100"); 21 | group.sample_size(100); 22 | group.warm_up_time(Duration::from_millis(500)); 23 | group.measurement_time(Duration::from_secs(5)); 24 | group.sampling_mode(SamplingMode::Flat); 25 | 26 | let word_100 = load_input("benches/input_word_100.txt"); 27 | 28 | group.bench_function("phfmap_rand", |b| { 29 | static STATIC_MAP: phf::Map<&'static str, i32> = phf_map! { 30 | "stampeding" => 3141, 31 | "commendable" => 3141, 32 | "adrenaline" => 3141, 33 | "exobiology" => 3141, 34 | "indifference" => 3141, 35 | "avuncular" => 3141, 36 | "prevailed" => 3141, 37 | "foreparts" => 3141, 38 | "legalistically" => 3141, 39 | "intermarries" => 3141, 40 | "desideratum" => 3141, 41 | "evaluating" => 3141, 42 | "lavishing" => 3141, 43 | "attractable" => 3141, 44 | "philippics" => 3141, 45 | "antiabortionist" => 3141, 46 | "lascivious" => 3141, 47 | "breathable" => 3141, 48 | "histogram" => 3141, 49 | "rattlings" => 3141, 50 | "interdict" => 3141, 51 | "summarized" => 3141, 52 | "relieving" => 3141, 53 | "congresspeople" => 3141, 54 | "fitfulness" => 3141, 55 | "percolation" => 5926, 56 | "upperclasswoman" => 5926, 57 | "epistemic" => 5926, 58 | "Chantilly" => 5926, 59 | "stonemasons" => 5926, 60 | "nonferrous" => 5926, 61 | "emulsions" => 5926, 62 | "charitably" => 5926, 63 | "barracudas" => 5926, 64 | "integrity" => 5926, 65 | "knockdowns" => 5926, 66 | "roadworks" => 5926, 67 | "abortionists" => 5926, 68 | "Salvadoran" => 5926, 69 | "chanceries" => 5926, 70 | "misinform" => 5926, 71 | "caretaker" => 5926, 72 | "extricated" => 5926, 73 | "mandolins" => 5926, 74 | "steeliest" => 5926, 75 | "transpiration" => 5926, 76 | "weirdness" => 5926, 77 | "audiologists" => 5926, 78 | "baronetcies" => 5926, 79 | "performing" => 5926, 80 | "publishing" => 5358, 81 | "suspending" => 5358, 82 | "dermatological" => 5358, 83 | "contemplate" => 5358, 84 | "spiritless" => 5358, 85 | "nightwatchman" => 5358, 86 | "paradisaical" => 5358, 87 | "implicating" => 5358, 88 | "timpanists" => 5358, 89 | "Leavenworth" => 5358, 90 | "amorality" => 5358, 91 | "strangulated" => 5358, 92 | "cellophane" => 5358, 93 | "waterboard" => 5358, 94 | "astrophysicists" => 5358, 95 | "aerospace" => 5358, 96 | "passphrase" => 5358, 97 | "engendered" => 5358, 98 | "spotlighting" => 5358, 99 | "misapplication" => 5358, 100 | "barterers" => 5358, 101 | "poetesses" => 5358, 102 | "dollhouse" => 5358, 103 | "laparoscopic" => 5358, 104 | "Dubrovnik" => 5358, 105 | "rerecords" => 9793, 106 | "shielding" => 9793, 107 | "orthographically" => 9793, 108 | "thicknesses" => 9793, 109 | "Bendictus" => 9793, 110 | "congealed" => 9793, 111 | "cooperative" => 9793, 112 | "encompass" => 9793, 113 | "grouching" => 9793, 114 | "shipowners" => 9793, 115 | "jealously" => 9793, 116 | "generational" => 9793, 117 | "antecedents" => 9793, 118 | "persecutes" => 9793, 119 | "exemplified" => 9793, 120 | "admirable" => 9793, 121 | "squeakiest" => 9793, 122 | "absconding" => 9793, 123 | "extirpated" => 9793, 124 | "exoskeletons" => 9793, 125 | "earthworms" => 9793, 126 | "chaotically" => 9793, 127 | "shipbuilder" => 9793, 128 | "equidistantly" => 9793, 129 | "overprint" => 9793, 130 | }; 131 | b.iter(|| { 132 | let mut x = 0; 133 | for s in &word_100 { 134 | x += STATIC_MAP.get(s).unwrap_or(&0); 135 | } 136 | x 137 | }); 138 | }); 139 | 140 | group.bench_function("match_rand", |b| { 141 | b.iter(|| { 142 | let mut x = 0; 143 | for s in &word_100 { 144 | match s.as_str() { 145 | "stampeding" | "commendable" | "adrenaline" | "exobiology" | "indifference" 146 | | "avuncular" | "prevailed" | "foreparts" | "legalistically" 147 | | "intermarries" | "desideratum" | "evaluating" | "lavishing" 148 | | "attractable" | "philippics" | "antiabortionist" | "lascivious" 149 | | "breathable" | "histogram" | "rattlings" | "interdict" | "summarized" 150 | | "relieving" | "congresspeople" | "fitfulness" => { 151 | x += 3141; 152 | } 153 | "percolation" | "upperclasswoman" | "epistemic" | "Chantilly" 154 | | "stonemasons" | "nonferrous" | "emulsions" | "charitably" | "barracudas" 155 | | "integrity" | "knockdowns" | "roadworks" | "abortionists" | "Salvadoran" 156 | | "chanceries" | "misinform" | "caretaker" | "extricated" | "mandolins" 157 | | "steeliest" | "transpiration" | "weirdness" | "audiologists" 158 | | "baronetcies" | "performing" => { 159 | x += 5926; 160 | } 161 | "publishing" | "suspending" | "dermatological" | "contemplate" 162 | | "spiritless" | "nightwatchman" | "paradisaical" | "implicating" 163 | | "timpanists" | "Leavenworth" | "amorality" | "strangulated" 164 | | "cellophane" | "waterboard" | "astrophysicists" | "aerospace" 165 | | "passphrase" | "engendered" | "spotlighting" | "misapplication" 166 | | "barterers" | "poetesses" | "dollhouse" | "laparoscopic" | "Dubrovnik" => { 167 | x += 5358; 168 | } 169 | "rerecords" | "shielding" | "orthographically" | "thicknesses" 170 | | "Bendictus" | "congealed" | "cooperative" | "encompass" | "grouching" 171 | | "shipowners" | "jealously" | "generational" | "antecedents" 172 | | "persecutes" | "exemplified" | "admirable" | "squeakiest" | "absconding" 173 | | "extirpated" | "exoskeletons" | "earthworms" | "chaotically" 174 | | "shipbuilder" | "equidistantly" | "overprint" => { 175 | x += 9793; 176 | } 177 | _ => {} 178 | } 179 | } 180 | x 181 | }); 182 | }); 183 | 184 | group.bench_function("match_1", |b| { 185 | b.iter(|| { 186 | let mut x = 0; 187 | for s in &word_100 { 188 | match s.as_str() { 189 | "stampeding" | "commendable" | "adrenaline" | "exobiology" | "indifference" 190 | | "avuncular" | "prevailed" | "foreparts" | "legalistically" 191 | | "intermarries" | "desideratum" | "evaluating" | "lavishing" 192 | | "attractable" | "philippics" | "antiabortionist" | "lascivious" 193 | | "breathable" | "histogram" | "rattlings" | "interdict" | "summarized" 194 | | "relieving" | "congresspeople" | "fitfulness" => { 195 | x += 1; 196 | } 197 | "percolation" | "upperclasswoman" | "epistemic" | "Chantilly" 198 | | "stonemasons" | "nonferrous" | "emulsions" | "charitably" | "barracudas" 199 | | "integrity" | "knockdowns" | "roadworks" | "abortionists" | "Salvadoran" 200 | | "chanceries" | "misinform" | "caretaker" | "extricated" | "mandolins" 201 | | "steeliest" | "transpiration" | "weirdness" | "audiologists" 202 | | "baronetcies" | "performing" => { 203 | x += 2; 204 | } 205 | "publishing" | "suspending" | "dermatological" | "contemplate" 206 | | "spiritless" | "nightwatchman" | "paradisaical" | "implicating" 207 | | "timpanists" | "Leavenworth" | "amorality" | "strangulated" 208 | | "cellophane" | "waterboard" | "astrophysicists" | "aerospace" 209 | | "passphrase" | "engendered" | "spotlighting" | "misapplication" 210 | | "barterers" | "poetesses" | "dollhouse" | "laparoscopic" | "Dubrovnik" => { 211 | x += 3; 212 | } 213 | "rerecords" | "shielding" | "orthographically" | "thicknesses" 214 | | "Bendictus" | "congealed" | "cooperative" | "encompass" | "grouching" 215 | | "shipowners" | "jealously" | "generational" | "antecedents" 216 | | "persecutes" | "exemplified" | "admirable" | "squeakiest" | "absconding" 217 | | "extirpated" | "exoskeletons" | "earthworms" | "chaotically" 218 | | "shipbuilder" | "equidistantly" | "overprint" => { 219 | x += 4; 220 | } 221 | _ => {} 222 | } 223 | } 224 | x 225 | }); 226 | }); 227 | 228 | group.bench_function("match_0", |b| { 229 | b.iter(|| { 230 | let mut x = 0; 231 | for s in &word_100 { 232 | match s.as_str() { 233 | "stampeding" | "commendable" | "adrenaline" | "exobiology" | "indifference" 234 | | "avuncular" | "prevailed" | "foreparts" | "legalistically" 235 | | "intermarries" | "desideratum" | "evaluating" | "lavishing" 236 | | "attractable" | "philippics" | "antiabortionist" | "lascivious" 237 | | "breathable" | "histogram" | "rattlings" | "interdict" | "summarized" 238 | | "relieving" | "congresspeople" | "fitfulness" => { 239 | x += 0; 240 | } 241 | "percolation" | "upperclasswoman" | "epistemic" | "Chantilly" 242 | | "stonemasons" | "nonferrous" | "emulsions" | "charitably" | "barracudas" 243 | | "integrity" | "knockdowns" | "roadworks" | "abortionists" | "Salvadoran" 244 | | "chanceries" | "misinform" | "caretaker" | "extricated" | "mandolins" 245 | | "steeliest" | "transpiration" | "weirdness" | "audiologists" 246 | | "baronetcies" | "performing" => { 247 | x += 1; 248 | } 249 | "publishing" | "suspending" | "dermatological" | "contemplate" 250 | | "spiritless" | "nightwatchman" | "paradisaical" | "implicating" 251 | | "timpanists" | "Leavenworth" | "amorality" | "strangulated" 252 | | "cellophane" | "waterboard" | "astrophysicists" | "aerospace" 253 | | "passphrase" | "engendered" | "spotlighting" | "misapplication" 254 | | "barterers" | "poetesses" | "dollhouse" | "laparoscopic" | "Dubrovnik" => { 255 | x += 2; 256 | } 257 | "rerecords" | "shielding" | "orthographically" | "thicknesses" 258 | | "Bendictus" | "congealed" | "cooperative" | "encompass" | "grouching" 259 | | "shipowners" | "jealously" | "generational" | "antecedents" 260 | | "persecutes" | "exemplified" | "admirable" | "squeakiest" | "absconding" 261 | | "extirpated" | "exoskeletons" | "earthworms" | "chaotically" 262 | | "shipbuilder" | "equidistantly" | "overprint" => { 263 | x += 3; 264 | } 265 | _ => {} 266 | } 267 | } 268 | x 269 | }); 270 | }); 271 | 272 | group.bench_function("trie_match_rand", |b| { 273 | b.iter(|| { 274 | let mut x = 0; 275 | for s in &word_100 { 276 | trie_match!(match s.as_str() { 277 | "stampeding" | "commendable" | "adrenaline" | "exobiology" | "indifference" 278 | | "avuncular" | "prevailed" | "foreparts" | "legalistically" 279 | | "intermarries" | "desideratum" | "evaluating" | "lavishing" 280 | | "attractable" | "philippics" | "antiabortionist" | "lascivious" 281 | | "breathable" | "histogram" | "rattlings" | "interdict" | "summarized" 282 | | "relieving" | "congresspeople" | "fitfulness" => { 283 | x += 3141; 284 | } 285 | "percolation" | "upperclasswoman" | "epistemic" | "Chantilly" 286 | | "stonemasons" | "nonferrous" | "emulsions" | "charitably" | "barracudas" 287 | | "integrity" | "knockdowns" | "roadworks" | "abortionists" | "Salvadoran" 288 | | "chanceries" | "misinform" | "caretaker" | "extricated" | "mandolins" 289 | | "steeliest" | "transpiration" | "weirdness" | "audiologists" 290 | | "baronetcies" | "performing" => { 291 | x += 5926; 292 | } 293 | "publishing" | "suspending" | "dermatological" | "contemplate" 294 | | "spiritless" | "nightwatchman" | "paradisaical" | "implicating" 295 | | "timpanists" | "Leavenworth" | "amorality" | "strangulated" 296 | | "cellophane" | "waterboard" | "astrophysicists" | "aerospace" 297 | | "passphrase" | "engendered" | "spotlighting" | "misapplication" 298 | | "barterers" | "poetesses" | "dollhouse" | "laparoscopic" | "Dubrovnik" => { 299 | x += 5358; 300 | } 301 | "rerecords" | "shielding" | "orthographically" | "thicknesses" 302 | | "Bendictus" | "congealed" | "cooperative" | "encompass" | "grouching" 303 | | "shipowners" | "jealously" | "generational" | "antecedents" 304 | | "persecutes" | "exemplified" | "admirable" | "squeakiest" | "absconding" 305 | | "extirpated" | "exoskeletons" | "earthworms" | "chaotically" 306 | | "shipbuilder" | "equidistantly" | "overprint" => { 307 | x += 9793; 308 | } 309 | _ => {} 310 | }) 311 | } 312 | x 313 | }); 314 | }); 315 | 316 | group.bench_function("trie_match_1", |b| { 317 | b.iter(|| { 318 | let mut x = 0; 319 | for s in &word_100 { 320 | trie_match!(match s.as_str() { 321 | "stampeding" | "commendable" | "adrenaline" | "exobiology" | "indifference" 322 | | "avuncular" | "prevailed" | "foreparts" | "legalistically" 323 | | "intermarries" | "desideratum" | "evaluating" | "lavishing" 324 | | "attractable" | "philippics" | "antiabortionist" | "lascivious" 325 | | "breathable" | "histogram" | "rattlings" | "interdict" | "summarized" 326 | | "relieving" | "congresspeople" | "fitfulness" => { 327 | x += 1; 328 | } 329 | "percolation" | "upperclasswoman" | "epistemic" | "Chantilly" 330 | | "stonemasons" | "nonferrous" | "emulsions" | "charitably" | "barracudas" 331 | | "integrity" | "knockdowns" | "roadworks" | "abortionists" | "Salvadoran" 332 | | "chanceries" | "misinform" | "caretaker" | "extricated" | "mandolins" 333 | | "steeliest" | "transpiration" | "weirdness" | "audiologists" 334 | | "baronetcies" | "performing" => { 335 | x += 2; 336 | } 337 | "publishing" | "suspending" | "dermatological" | "contemplate" 338 | | "spiritless" | "nightwatchman" | "paradisaical" | "implicating" 339 | | "timpanists" | "Leavenworth" | "amorality" | "strangulated" 340 | | "cellophane" | "waterboard" | "astrophysicists" | "aerospace" 341 | | "passphrase" | "engendered" | "spotlighting" | "misapplication" 342 | | "barterers" | "poetesses" | "dollhouse" | "laparoscopic" | "Dubrovnik" => { 343 | x += 3; 344 | } 345 | "rerecords" | "shielding" | "orthographically" | "thicknesses" 346 | | "Bendictus" | "congealed" | "cooperative" | "encompass" | "grouching" 347 | | "shipowners" | "jealously" | "generational" | "antecedents" 348 | | "persecutes" | "exemplified" | "admirable" | "squeakiest" | "absconding" 349 | | "extirpated" | "exoskeletons" | "earthworms" | "chaotically" 350 | | "shipbuilder" | "equidistantly" | "overprint" => { 351 | x += 4; 352 | } 353 | _ => {} 354 | }) 355 | } 356 | x 357 | }); 358 | }); 359 | 360 | group.bench_function("trie_match_0", |b| { 361 | b.iter(|| { 362 | let mut x = 0; 363 | for s in &word_100 { 364 | trie_match!(match s.as_str() { 365 | "stampeding" | "commendable" | "adrenaline" | "exobiology" | "indifference" 366 | | "avuncular" | "prevailed" | "foreparts" | "legalistically" 367 | | "intermarries" | "desideratum" | "evaluating" | "lavishing" 368 | | "attractable" | "philippics" | "antiabortionist" | "lascivious" 369 | | "breathable" | "histogram" | "rattlings" | "interdict" | "summarized" 370 | | "relieving" | "congresspeople" | "fitfulness" => { 371 | x += 0; 372 | } 373 | "percolation" | "upperclasswoman" | "epistemic" | "Chantilly" 374 | | "stonemasons" | "nonferrous" | "emulsions" | "charitably" | "barracudas" 375 | | "integrity" | "knockdowns" | "roadworks" | "abortionists" | "Salvadoran" 376 | | "chanceries" | "misinform" | "caretaker" | "extricated" | "mandolins" 377 | | "steeliest" | "transpiration" | "weirdness" | "audiologists" 378 | | "baronetcies" | "performing" => { 379 | x += 1; 380 | } 381 | "publishing" | "suspending" | "dermatological" | "contemplate" 382 | | "spiritless" | "nightwatchman" | "paradisaical" | "implicating" 383 | | "timpanists" | "Leavenworth" | "amorality" | "strangulated" 384 | | "cellophane" | "waterboard" | "astrophysicists" | "aerospace" 385 | | "passphrase" | "engendered" | "spotlighting" | "misapplication" 386 | | "barterers" | "poetesses" | "dollhouse" | "laparoscopic" | "Dubrovnik" => { 387 | x += 2; 388 | } 389 | "rerecords" | "shielding" | "orthographically" | "thicknesses" 390 | | "Bendictus" | "congealed" | "cooperative" | "encompass" | "grouching" 391 | | "shipowners" | "jealously" | "generational" | "antecedents" 392 | | "persecutes" | "exemplified" | "admirable" | "squeakiest" | "absconding" 393 | | "extirpated" | "exoskeletons" | "earthworms" | "chaotically" 394 | | "shipbuilder" | "equidistantly" | "overprint" => { 395 | x += 3; 396 | } 397 | _ => {} 398 | }) 399 | } 400 | x 401 | }); 402 | }); 403 | } 404 | 405 | fn criterion_html_elements(c: &mut Criterion) { 406 | let mut group = c.benchmark_group("html_elements"); 407 | group.sample_size(100); 408 | group.warm_up_time(Duration::from_millis(500)); 409 | group.measurement_time(Duration::from_secs(5)); 410 | group.sampling_mode(SamplingMode::Flat); 411 | 412 | let html_elements = load_input("benches/input_html_elements.txt"); 413 | 414 | group.bench_function("phfmap_rand", |b| { 415 | static STATIC_MAP: phf::Map<&'static str, i32> = phf_map! { 416 | "bdo" => 3141, 417 | "rb" => 3141, 418 | "th" => 3141, 419 | "ul" => 3141, 420 | "pre" => 3141, 421 | "mark" => 3141, 422 | "em" => 3141, 423 | "search" => 3141, 424 | "head" => 3141, 425 | "li" => 3141, 426 | "del" => 3141, 427 | "details" => 3141, 428 | "p" => 3141, 429 | "bdi" => 3141, 430 | "time" => 3141, 431 | "area" => 3141, 432 | "br" => 3141, 433 | "var" => 3141, 434 | "aside" => 3141, 435 | "main" => 3141, 436 | "tfoot" => 3141, 437 | "hr" => 3141, 438 | "label" => 3141, 439 | "rp" => 3141, 440 | "menuitem" => 3141, 441 | "portal" => 5926, 442 | "wbr" => 5926, 443 | "cite" => 5926, 444 | "ins" => 5926, 445 | "footer" => 5926, 446 | "table" => 5926, 447 | "address" => 5926, 448 | "div" => 5926, 449 | "optgroup" => 5926, 450 | "dd" => 5926, 451 | "samp" => 5926, 452 | "map" => 5926, 453 | "xmp" => 5926, 454 | "embed" => 5926, 455 | "strong" => 5926, 456 | "dialog" => 5926, 457 | "colgroup" => 5926, 458 | "input" => 5926, 459 | "figure" => 5926, 460 | "body" => 5926, 461 | "strike" => 5926, 462 | "audio" => 5926, 463 | "marquee" => 5926, 464 | "noscript" => 5926, 465 | "form" => 5926, 466 | "nobr" => 5358, 467 | "font" => 5358, 468 | "textarea" => 5358, 469 | "tbody" => 5358, 470 | "picture" => 5358, 471 | "legend" => 5358, 472 | "img" => 5358, 473 | "progress" => 5358, 474 | "meter" => 5358, 475 | "script" => 5358, 476 | "dt" => 5358, 477 | "summary" => 5358, 478 | "ol" => 5358, 479 | "acronym" => 5358, 480 | "header" => 5358, 481 | "title" => 5358, 482 | "span" => 5358, 483 | "abbr" => 5358, 484 | "hgroup" => 5358, 485 | "meta" => 5358, 486 | "plaintext" => 5358, 487 | "base" => 5358, 488 | "sub" => 5358, 489 | "select" => 5358, 490 | "s" => 5358, 491 | "output" => 9793, 492 | "datalist" => 9793, 493 | "article" => 9793, 494 | "param" => 9793, 495 | "blockquote" => 9793, 496 | "i" => 9793, 497 | "tr" => 9793, 498 | "html" => 9793, 499 | "section" => 9793, 500 | "link" => 9793, 501 | "small" => 9793, 502 | "canvas" => 9793, 503 | "option" => 9793, 504 | "dir" => 9793, 505 | "col" => 9793, 506 | "noembed" => 9793, 507 | "rtc" => 9793, 508 | "big" => 9793, 509 | "figcaption" => 9793, 510 | "kbd" => 9793, 511 | "b" => 9793, 512 | "u" => 9793, 513 | "a" => 9793, 514 | "td" => 9793, 515 | "center" => 9793, 516 | "menu" => 2384, 517 | "template" => 2384, 518 | "data" => 2384, 519 | "image" => 2384, 520 | "fieldset" => 2384, 521 | "slot" => 2384, 522 | "q" => 2384, 523 | "thead" => 2384, 524 | "nav" => 2384, 525 | "style" => 2384, 526 | "button" => 2384, 527 | "video" => 2384, 528 | "dl" => 2384, 529 | "caption" => 2384, 530 | "ruby" => 2384, 531 | "tt" => 2384, 532 | "dfn" => 2384, 533 | "code" => 2384, 534 | "source" => 2384, 535 | "h1" => 2384, 536 | "iframe" => 2384, 537 | "sup" => 2384, 538 | "noframes" => 2384, 539 | "frameset" => 2384, 540 | "track" => 2384, 541 | "frame" => 2384, 542 | "rt" => 2384, 543 | "object" => 2384, 544 | }; 545 | b.iter(|| { 546 | let mut x = 0; 547 | for s in &html_elements { 548 | x += STATIC_MAP.get(s).unwrap_or(&0); 549 | } 550 | x 551 | }); 552 | }); 553 | 554 | group.bench_function("match_rand", |b| { 555 | b.iter(|| { 556 | let mut x = 0; 557 | for s in &html_elements { 558 | match s.as_str() { 559 | "bdo" | "rb" | "th" | "ul" | "pre" | "mark" | "em" | "search" | "head" 560 | | "li" | "del" | "details" | "p" | "bdi" | "time" | "area" | "br" | "var" 561 | | "aside" | "main" | "tfoot" | "hr" | "label" | "rp" | "menuitem" => { 562 | x += 3141; 563 | } 564 | "portal" | "wbr" | "cite" | "ins" | "footer" | "table" | "address" | "div" 565 | | "optgroup" | "dd" | "samp" | "map" | "xmp" | "embed" | "strong" 566 | | "dialog" | "colgroup" | "input" | "figure" | "body" | "strike" | "audio" 567 | | "marquee" | "noscript" | "form" => { 568 | x += 5926; 569 | } 570 | "nobr" | "font" | "textarea" | "tbody" | "picture" | "legend" | "img" 571 | | "progress" | "meter" | "script" | "dt" | "summary" | "ol" | "acronym" 572 | | "header" | "title" | "span" | "abbr" | "hgroup" | "meta" | "plaintext" 573 | | "base" | "sub" | "select" | "s" => { 574 | x += 5358; 575 | } 576 | "output" | "datalist" | "article" | "param" | "blockquote" | "i" | "tr" 577 | | "html" | "section" | "link" | "small" | "canvas" | "option" | "dir" 578 | | "col" | "noembed" | "rtc" | "big" | "figcaption" | "kbd" | "b" | "u" 579 | | "a" | "td" | "center" => { 580 | x += 9793; 581 | } 582 | "menu" | "template" | "data" | "image" | "fieldset" | "slot" | "q" 583 | | "thead" | "nav" | "style" | "button" | "video" | "dl" | "caption" 584 | | "ruby" | "tt" | "dfn" | "code" | "source" | "h1" | "iframe" | "sup" 585 | | "noframes" | "frameset" | "track" | "frame" | "rt" | "object" => { 586 | x += 2384; 587 | } 588 | _ => {} 589 | } 590 | } 591 | x 592 | }); 593 | }); 594 | 595 | group.bench_function("match_1", |b| { 596 | b.iter(|| { 597 | let mut x = 0; 598 | for s in &html_elements { 599 | match s.as_str() { 600 | "bdo" | "rb" | "th" | "ul" | "pre" | "mark" | "em" | "search" | "head" 601 | | "li" | "del" | "details" | "p" | "bdi" | "time" | "area" | "br" | "var" 602 | | "aside" | "main" | "tfoot" | "hr" | "label" | "rp" | "menuitem" => { 603 | x += 1; 604 | } 605 | "portal" | "wbr" | "cite" | "ins" | "footer" | "table" | "address" | "div" 606 | | "optgroup" | "dd" | "samp" | "map" | "xmp" | "embed" | "strong" 607 | | "dialog" | "colgroup" | "input" | "figure" | "body" | "strike" | "audio" 608 | | "marquee" | "noscript" | "form" => { 609 | x += 2; 610 | } 611 | "nobr" | "font" | "textarea" | "tbody" | "picture" | "legend" | "img" 612 | | "progress" | "meter" | "script" | "dt" | "summary" | "ol" | "acronym" 613 | | "header" | "title" | "span" | "abbr" | "hgroup" | "meta" | "plaintext" 614 | | "base" | "sub" | "select" | "s" => { 615 | x += 3; 616 | } 617 | "output" | "datalist" | "article" | "param" | "blockquote" | "i" | "tr" 618 | | "html" | "section" | "link" | "small" | "canvas" | "option" | "dir" 619 | | "col" | "noembed" | "rtc" | "big" | "figcaption" | "kbd" | "b" | "u" 620 | | "a" | "td" | "center" => { 621 | x += 4; 622 | } 623 | "menu" | "template" | "data" | "image" | "fieldset" | "slot" | "q" 624 | | "thead" | "nav" | "style" | "button" | "video" | "dl" | "caption" 625 | | "ruby" | "tt" | "dfn" | "code" | "source" | "h1" | "iframe" | "sup" 626 | | "noframes" | "frameset" | "track" | "frame" | "rt" | "object" => { 627 | x += 5; 628 | } 629 | _ => {} 630 | } 631 | } 632 | x 633 | }); 634 | }); 635 | 636 | group.bench_function("match_0", |b| { 637 | b.iter(|| { 638 | let mut x = 0; 639 | for s in &html_elements { 640 | match s.as_str() { 641 | "bdo" | "rb" | "th" | "ul" | "pre" | "mark" | "em" | "search" | "head" 642 | | "li" | "del" | "details" | "p" | "bdi" | "time" | "area" | "br" | "var" 643 | | "aside" | "main" | "tfoot" | "hr" | "label" | "rp" | "menuitem" => { 644 | x += 0; 645 | } 646 | "portal" | "wbr" | "cite" | "ins" | "footer" | "table" | "address" | "div" 647 | | "optgroup" | "dd" | "samp" | "map" | "xmp" | "embed" | "strong" 648 | | "dialog" | "colgroup" | "input" | "figure" | "body" | "strike" | "audio" 649 | | "marquee" | "noscript" | "form" => { 650 | x += 1; 651 | } 652 | "nobr" | "font" | "textarea" | "tbody" | "picture" | "legend" | "img" 653 | | "progress" | "meter" | "script" | "dt" | "summary" | "ol" | "acronym" 654 | | "header" | "title" | "span" | "abbr" | "hgroup" | "meta" | "plaintext" 655 | | "base" | "sub" | "select" | "s" => { 656 | x += 2; 657 | } 658 | "output" | "datalist" | "article" | "param" | "blockquote" | "i" | "tr" 659 | | "html" | "section" | "link" | "small" | "canvas" | "option" | "dir" 660 | | "col" | "noembed" | "rtc" | "big" | "figcaption" | "kbd" | "b" | "u" 661 | | "a" | "td" | "center" => { 662 | x += 3; 663 | } 664 | "menu" | "template" | "data" | "image" | "fieldset" | "slot" | "q" 665 | | "thead" | "nav" | "style" | "button" | "video" | "dl" | "caption" 666 | | "ruby" | "tt" | "dfn" | "code" | "source" | "h1" | "iframe" | "sup" 667 | | "noframes" | "frameset" | "track" | "frame" | "rt" | "object" => { 668 | x += 4; 669 | } 670 | _ => {} 671 | } 672 | } 673 | x 674 | }); 675 | }); 676 | 677 | group.bench_function("trie_match_rand", |b| { 678 | b.iter(|| { 679 | let mut x = 0; 680 | for s in &html_elements { 681 | trie_match!(match s.as_str() { 682 | "bdo" | "rb" | "th" | "ul" | "pre" | "mark" | "em" | "search" | "head" 683 | | "li" | "del" | "details" | "p" | "bdi" | "time" | "area" | "br" | "var" 684 | | "aside" | "main" | "tfoot" | "hr" | "label" | "rp" | "menuitem" => { 685 | x += 3141; 686 | } 687 | "portal" | "wbr" | "cite" | "ins" | "footer" | "table" | "address" | "div" 688 | | "optgroup" | "dd" | "samp" | "map" | "xmp" | "embed" | "strong" 689 | | "dialog" | "colgroup" | "input" | "figure" | "body" | "strike" | "audio" 690 | | "marquee" | "noscript" | "form" => { 691 | x += 5926; 692 | } 693 | "nobr" | "font" | "textarea" | "tbody" | "picture" | "legend" | "img" 694 | | "progress" | "meter" | "script" | "dt" | "summary" | "ol" | "acronym" 695 | | "header" | "title" | "span" | "abbr" | "hgroup" | "meta" | "plaintext" 696 | | "base" | "sub" | "select" | "s" => { 697 | x += 5358; 698 | } 699 | "output" | "datalist" | "article" | "param" | "blockquote" | "i" | "tr" 700 | | "html" | "section" | "link" | "small" | "canvas" | "option" | "dir" 701 | | "col" | "noembed" | "rtc" | "big" | "figcaption" | "kbd" | "b" | "u" 702 | | "a" | "td" | "center" => { 703 | x += 9793; 704 | } 705 | "menu" | "template" | "data" | "image" | "fieldset" | "slot" | "q" 706 | | "thead" | "nav" | "style" | "button" | "video" | "dl" | "caption" 707 | | "ruby" | "tt" | "dfn" | "code" | "source" | "h1" | "iframe" | "sup" 708 | | "noframes" | "frameset" | "track" | "frame" | "rt" | "object" => { 709 | x += 2384; 710 | } 711 | _ => {} 712 | }) 713 | } 714 | x 715 | }); 716 | }); 717 | 718 | group.bench_function("trie_match_1", |b| { 719 | b.iter(|| { 720 | let mut x = 0; 721 | for s in &html_elements { 722 | trie_match!(match s.as_str() { 723 | "bdo" | "rb" | "th" | "ul" | "pre" | "mark" | "em" | "search" | "head" 724 | | "li" | "del" | "details" | "p" | "bdi" | "time" | "area" | "br" | "var" 725 | | "aside" | "main" | "tfoot" | "hr" | "label" | "rp" | "menuitem" => { 726 | x += 1; 727 | } 728 | "portal" | "wbr" | "cite" | "ins" | "footer" | "table" | "address" | "div" 729 | | "optgroup" | "dd" | "samp" | "map" | "xmp" | "embed" | "strong" 730 | | "dialog" | "colgroup" | "input" | "figure" | "body" | "strike" | "audio" 731 | | "marquee" | "noscript" | "form" => { 732 | x += 2; 733 | } 734 | "nobr" | "font" | "textarea" | "tbody" | "picture" | "legend" | "img" 735 | | "progress" | "meter" | "script" | "dt" | "summary" | "ol" | "acronym" 736 | | "header" | "title" | "span" | "abbr" | "hgroup" | "meta" | "plaintext" 737 | | "base" | "sub" | "select" | "s" => { 738 | x += 3; 739 | } 740 | "output" | "datalist" | "article" | "param" | "blockquote" | "i" | "tr" 741 | | "html" | "section" | "link" | "small" | "canvas" | "option" | "dir" 742 | | "col" | "noembed" | "rtc" | "big" | "figcaption" | "kbd" | "b" | "u" 743 | | "a" | "td" | "center" => { 744 | x += 4; 745 | } 746 | "menu" | "template" | "data" | "image" | "fieldset" | "slot" | "q" 747 | | "thead" | "nav" | "style" | "button" | "video" | "dl" | "caption" 748 | | "ruby" | "tt" | "dfn" | "code" | "source" | "h1" | "iframe" | "sup" 749 | | "noframes" | "frameset" | "track" | "frame" | "rt" | "object" => { 750 | x += 5; 751 | } 752 | _ => {} 753 | }) 754 | } 755 | x 756 | }); 757 | }); 758 | 759 | group.bench_function("trie_match_0", |b| { 760 | b.iter(|| { 761 | let mut x = 0; 762 | for s in &html_elements { 763 | trie_match!(match s.as_str() { 764 | "bdo" | "rb" | "th" | "ul" | "pre" | "mark" | "em" | "search" | "head" 765 | | "li" | "del" | "details" | "p" | "bdi" | "time" | "area" | "br" | "var" 766 | | "aside" | "main" | "tfoot" | "hr" | "label" | "rp" | "menuitem" => { 767 | x += 0; 768 | } 769 | "portal" | "wbr" | "cite" | "ins" | "footer" | "table" | "address" | "div" 770 | | "optgroup" | "dd" | "samp" | "map" | "xmp" | "embed" | "strong" 771 | | "dialog" | "colgroup" | "input" | "figure" | "body" | "strike" | "audio" 772 | | "marquee" | "noscript" | "form" => { 773 | x += 1; 774 | } 775 | "nobr" | "font" | "textarea" | "tbody" | "picture" | "legend" | "img" 776 | | "progress" | "meter" | "script" | "dt" | "summary" | "ol" | "acronym" 777 | | "header" | "title" | "span" | "abbr" | "hgroup" | "meta" | "plaintext" 778 | | "base" | "sub" | "select" | "s" => { 779 | x += 2; 780 | } 781 | "output" | "datalist" | "article" | "param" | "blockquote" | "i" | "tr" 782 | | "html" | "section" | "link" | "small" | "canvas" | "option" | "dir" 783 | | "col" | "noembed" | "rtc" | "big" | "figcaption" | "kbd" | "b" | "u" 784 | | "a" | "td" | "center" => { 785 | x += 3; 786 | } 787 | "menu" | "template" | "data" | "image" | "fieldset" | "slot" | "q" 788 | | "thead" | "nav" | "style" | "button" | "video" | "dl" | "caption" 789 | | "ruby" | "tt" | "dfn" | "code" | "source" | "h1" | "iframe" | "sup" 790 | | "noframes" | "frameset" | "track" | "frame" | "rt" | "object" => { 791 | x += 4; 792 | } 793 | _ => {} 794 | }) 795 | } 796 | x 797 | }); 798 | }); 799 | } 800 | 801 | criterion_group!(benches, criterion_word100, criterion_html_elements); 802 | 803 | criterion_main!(benches); 804 | -------------------------------------------------------------------------------- /figures/graph.dot: -------------------------------------------------------------------------------- 1 | digraph trie { 2 | //fontname="Helvetica,Arial,sans-serif" 3 | node [fontname="sans-serif"] 4 | edge [fontname="sans-serif"] 5 | rankdir=LR; 6 | node [shape = doublecircle, width=0.0, label = "0"]; 1; 7 | node [shape = doublecircle, width=0.0, label = "1"]; 3; 8 | node [shape = doublecircle, width=0.0, label = "2"]; 4; 9 | node [shape = doublecircle, width=0.0, label = "3"]; 6; 10 | node [shape = doublecircle, width=0.0, label = "2"]; 7; 11 | node [shape = circle, width=0.2, label = ""]; 12 | 0->1 [label = "a"]; 13 | 1->2 [label = "b"]; 14 | 2->3 [label = "c"]; 15 | 2->4 [label = "d"]; 16 | 0->5 [label = "b"]; 17 | 5->6 [label = "c"]; 18 | 6->7 [label = "c"]; 19 | } 20 | -------------------------------------------------------------------------------- /figures/graph.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | trie 11 | 12 | 13 | 14 | 1 15 | 16 | 17 | 0 18 | 19 | 20 | 21 | 2 22 | 23 | 24 | 25 | 26 | 1->2 27 | 28 | 29 | b 30 | 31 | 32 | 33 | 3 34 | 35 | 36 | 1 37 | 38 | 39 | 40 | 4 41 | 42 | 43 | 2 44 | 45 | 46 | 47 | 6 48 | 49 | 50 | 3 51 | 52 | 53 | 54 | 7 55 | 56 | 57 | 2 58 | 59 | 60 | 61 | 6->7 62 | 63 | 64 | c 65 | 66 | 67 | 68 | 0 69 | 70 | 71 | 72 | 73 | 0->1 74 | 75 | 76 | a 77 | 78 | 79 | 80 | 5 81 | 82 | 83 | 84 | 85 | 0->5 86 | 87 | 88 | b 89 | 90 | 91 | 92 | 2->3 93 | 94 | 95 | c 96 | 97 | 98 | 99 | 2->4 100 | 101 | 102 | d 103 | 104 | 105 | 106 | 5->6 107 | 108 | 109 | c 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "cfg_attribute", feature(proc_macro_expand))] 2 | 3 | //! # `trie_match! {}` 4 | //! 5 | //! This macro speeds up Rust's `match` expression for comparing strings by using a compact 6 | //! double-array data structure. 7 | //! 8 | //! ## Usage 9 | //! 10 | //! Simply wrap the existing match expression with the `trie_match! {}` macro as 11 | //! follows: 12 | //! 13 | //! ``` 14 | //! use trie_match::trie_match; 15 | //! 16 | //! let x = "abd"; 17 | //! 18 | //! let result = trie_match! { 19 | //! match x { 20 | //! "a" => 0, 21 | //! "abc" => 1, 22 | //! pat @ ("abd" | "bcde") => pat.len(), 23 | //! "bc" => 3, 24 | //! _ => 4, 25 | //! } 26 | //! }; 27 | //! 28 | //! assert_eq!(result, 3); 29 | //! ``` 30 | #![cfg_attr( 31 | feature = "cfg_attribute", 32 | doc = r#" 33 | ## `cfg` attribute 34 | 35 | Only when using Nightly Rust, this macro supports conditional compilation with 36 | the `cfg` attribute. To use this feature, enable `features = ["cfg_attribute"]` 37 | in your `Cargo.toml`. 38 | 39 | ### Example 40 | 41 | ``` 42 | use trie_match::trie_match; 43 | 44 | let x = "abd"; 45 | 46 | let result = trie_match! { 47 | match x { 48 | #[cfg(not(feature = "foo"))] 49 | "a" => 0, 50 | "abc" => 1, 51 | #[cfg(feature = "bar")] 52 | "abd" | "bcc" => 2, 53 | "bc" => 3, 54 | _ => 4, 55 | } 56 | }; 57 | 58 | assert_eq!(result, 4); 59 | ``` 60 | "# 61 | )] 62 | //! 63 | //! ## Limitations 64 | //! 65 | //! The followings are different from the normal `match` expression: 66 | //! 67 | //! * Only supports strings, byte strings, and u8 slices as patterns. 68 | //! * The wildcard is evaluated last. (The normal `match` expression does not 69 | //! match patterns after the wildcard.) 70 | //! * Guards are unavailable. 71 | 72 | mod trie; 73 | 74 | extern crate proc_macro; 75 | 76 | use std::collections::HashMap; 77 | 78 | use proc_macro2::{Span, TokenStream}; 79 | use quote::{format_ident, quote}; 80 | use syn::{ 81 | parse_macro_input, spanned::Spanned, Arm, Error, Expr, ExprLit, ExprMatch, Lit, Pat, PatIdent, 82 | PatOr, PatReference, PatSlice, PatWild, 83 | }; 84 | 85 | #[cfg(feature = "cfg_attribute")] 86 | use proc_macro2::Ident; 87 | #[cfg(feature = "cfg_attribute")] 88 | use syn::{Attribute, Meta}; 89 | 90 | use crate::trie::Sparse; 91 | 92 | static ERROR_UNEXPECTED_PATTERN: &str = 93 | "`trie_match` only supports string literals, byte string literals, and u8 slices as patterns"; 94 | static ERROR_ATTRIBUTE_NOT_SUPPORTED: &str = "attribute not supported here"; 95 | static ERROR_GUARD_NOT_SUPPORTED: &str = "match guard not supported"; 96 | static ERROR_UNREACHABLE_PATTERN: &str = "unreachable pattern"; 97 | static ERROR_PATTERN_NOT_COVERED: &str = "non-exhaustive patterns: `_` not covered"; 98 | static ERROR_EXPECTED_U8_LITERAL: &str = "expected `u8` integer literal"; 99 | static ERROR_VARIABLE_NOT_MATCH: &str = "variable is not bound in all patterns"; 100 | 101 | #[cfg(not(feature = "cfg_attribute"))] 102 | static ERROR_ATTRIBUTE_NOT_SUPPORTED_CFG: &str = 103 | "attribute not supported here\nnote: consider enabling the `cfg_attribute` feature: \ 104 | https://docs.rs/trie-match/latest/trie_match/#cfg-attribute"; 105 | 106 | #[cfg(feature = "cfg_attribute")] 107 | static ERROR_NOT_CFG_ATTRIBUTE: &str = "only supports the cfg attribute"; 108 | 109 | /// Converts a literal pattern into a byte sequence. 110 | fn convert_literal_pattern(pat: &ExprLit) -> Result>, Error> { 111 | let ExprLit { attrs, lit } = pat; 112 | if let Some(attr) = attrs.first() { 113 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 114 | } 115 | match lit { 116 | Lit::Str(s) => Ok(Some(s.value().into())), 117 | Lit::ByteStr(s) => Ok(Some(s.value())), 118 | _ => Err(Error::new(lit.span(), ERROR_UNEXPECTED_PATTERN)), 119 | } 120 | } 121 | 122 | /// Converts a slice pattern into a byte sequence. 123 | fn convert_slice_pattern(pat: &PatSlice) -> Result>, Error> { 124 | let PatSlice { attrs, elems, .. } = pat; 125 | if let Some(attr) = attrs.first() { 126 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 127 | } 128 | let mut result = vec![]; 129 | for elem in elems { 130 | match elem { 131 | Pat::Lit(ExprLit { attrs, lit }) => { 132 | if let Some(attr) = attrs.first() { 133 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 134 | } 135 | match lit { 136 | Lit::Int(i) => { 137 | let int_type = i.suffix(); 138 | if int_type != "u8" && !int_type.is_empty() { 139 | return Err(Error::new(i.span(), ERROR_EXPECTED_U8_LITERAL)); 140 | } 141 | result.push(i.base10_parse::()?); 142 | } 143 | Lit::Byte(b) => { 144 | result.push(b.value()); 145 | } 146 | _ => { 147 | return Err(Error::new(elem.span(), ERROR_EXPECTED_U8_LITERAL)); 148 | } 149 | } 150 | } 151 | _ => { 152 | return Err(Error::new(elem.span(), ERROR_EXPECTED_U8_LITERAL)); 153 | } 154 | } 155 | } 156 | Ok(Some(result)) 157 | } 158 | 159 | /// Checks a wildcard pattern and returns `None`. 160 | /// 161 | /// The reason the type is `Result>, Error>` instead of `Result<(), Error>` is for 162 | /// consistency with other functions. 163 | fn convert_wildcard_pattern(pat: &PatWild) -> Result>, Error> { 164 | let PatWild { attrs, .. } = pat; 165 | if let Some(attr) = attrs.first() { 166 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 167 | } 168 | Ok(None) 169 | } 170 | 171 | /// Converts a reference pattern (e.g. `&[0, 1, ...]`) into a byte sequence. 172 | fn convert_reference_pattern(pat: &PatReference) -> Result>, Error> { 173 | let PatReference { attrs, pat, .. } = pat; 174 | if let Some(attr) = attrs.first() { 175 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 176 | } 177 | match &**pat { 178 | Pat::Lit(pat) => convert_literal_pattern(pat), 179 | Pat::Slice(pat) => convert_slice_pattern(pat), 180 | Pat::Reference(pat) => convert_reference_pattern(pat), 181 | _ => Err(Error::new(pat.span(), ERROR_UNEXPECTED_PATTERN)), 182 | } 183 | } 184 | 185 | struct PatternBytes { 186 | /// Bound variable identifier. 187 | ident: Option, 188 | 189 | /// Byte sequence of this pattern. `None` is for a wildcard. 190 | bytes: Option>, 191 | } 192 | 193 | impl PatternBytes { 194 | const fn new(ident: Option, bytes: Option>) -> Self { 195 | Self { ident, bytes } 196 | } 197 | } 198 | 199 | /// Retrieves pattern strings from the given token. 200 | /// 201 | /// None indicates a wild card pattern (`_`). 202 | fn retrieve_match_patterns( 203 | pat: &Pat, 204 | ident: Option, 205 | pat_bytes_set: &mut Vec, 206 | pat_set: &mut Vec, 207 | ) -> Result<(), Error> { 208 | match pat { 209 | Pat::Lit(lit) => { 210 | pat_set.push(pat.clone()); 211 | pat_bytes_set.push(PatternBytes::new(ident, convert_literal_pattern(lit)?)); 212 | } 213 | Pat::Slice(slice) => { 214 | pat_set.push(pat.clone()); 215 | pat_bytes_set.push(PatternBytes::new(ident, convert_slice_pattern(slice)?)); 216 | } 217 | Pat::Wild(pat) => { 218 | pat_bytes_set.push(PatternBytes::new(ident, convert_wildcard_pattern(pat)?)); 219 | } 220 | Pat::Reference(reference) => { 221 | pat_set.push(pat.clone()); 222 | pat_bytes_set.push(PatternBytes::new( 223 | ident, 224 | convert_reference_pattern(reference)?, 225 | )); 226 | } 227 | Pat::Ident(pat) => { 228 | if let Some(attr) = pat.attrs.first() { 229 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 230 | } 231 | let mut pat = pat.clone(); 232 | if let Some((_, subpat)) = pat.subpat.take() { 233 | retrieve_match_patterns(&subpat, Some(pat), pat_bytes_set, pat_set)?; 234 | } else { 235 | pat_bytes_set.push(PatternBytes::new(Some(pat), None)); 236 | } 237 | } 238 | Pat::Paren(pat) => { 239 | retrieve_match_patterns(&pat.pat, ident, pat_bytes_set, pat_set)?; 240 | } 241 | Pat::Or(PatOr { 242 | attrs, 243 | leading_vert: None, 244 | cases, 245 | }) => { 246 | if let Some(attr) = attrs.first() { 247 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED)); 248 | } 249 | for pat in cases { 250 | retrieve_match_patterns(pat, ident.clone(), pat_bytes_set, pat_set)?; 251 | } 252 | } 253 | _ => { 254 | return Err(Error::new(pat.span(), ERROR_UNEXPECTED_PATTERN)); 255 | } 256 | } 257 | Ok(()) 258 | } 259 | 260 | #[cfg(feature = "cfg_attribute")] 261 | fn evaluate_cfg_attribute(attrs: &[Attribute]) -> Result { 262 | for attr in attrs { 263 | let ident = attr.path().get_ident().map(Ident::to_string); 264 | if ident.as_deref() == Some("cfg") { 265 | if let Meta::List(list) = &attr.meta { 266 | let tokens = &list.tokens; 267 | let cfg_macro: proc_macro::TokenStream = quote! { cfg!(#tokens) }.into(); 268 | let expr = cfg_macro 269 | .expand_expr() 270 | .map_err(|e| Error::new(tokens.span(), e.to_string()))?; 271 | if expr.to_string() == "false" { 272 | return Ok(false); 273 | } 274 | continue; 275 | } 276 | } 277 | return Err(Error::new(attr.span(), ERROR_NOT_CFG_ATTRIBUTE)); 278 | } 279 | Ok(true) 280 | } 281 | 282 | struct MatchInfo { 283 | bodies: Vec, 284 | pattern_map: HashMap, usize>, 285 | wildcard_idx: usize, 286 | bound_vals: Vec>, 287 | pat_set: Vec, 288 | } 289 | 290 | fn parse_match_arms(arms: Vec) -> Result { 291 | let mut pattern_map = HashMap::new(); 292 | let mut wildcard_idx = None; 293 | let mut bound_vals = vec![]; 294 | let mut bodies = vec![]; 295 | let mut pat_set = vec![]; 296 | let mut i = 0; 297 | #[allow(clippy::explicit_counter_loop)] 298 | for Arm { 299 | attrs, 300 | pat, 301 | guard, 302 | body, 303 | .. 304 | } in arms 305 | { 306 | #[cfg(feature = "cfg_attribute")] 307 | if !evaluate_cfg_attribute(&attrs)? { 308 | continue; 309 | } 310 | #[cfg(not(feature = "cfg_attribute"))] 311 | if let Some(attr) = attrs.first() { 312 | return Err(Error::new(attr.span(), ERROR_ATTRIBUTE_NOT_SUPPORTED_CFG)); 313 | } 314 | 315 | if let Some((if_token, _)) = guard { 316 | return Err(Error::new(if_token.span(), ERROR_GUARD_NOT_SUPPORTED)); 317 | } 318 | let mut pat_bytes_set = vec![]; 319 | retrieve_match_patterns(&pat, None, &mut pat_bytes_set, &mut pat_set)?; 320 | let bound_val = pat_bytes_set[0].ident.clone(); 321 | for PatternBytes { ident, bytes } in pat_bytes_set { 322 | if ident != bound_val { 323 | return Err(Error::new( 324 | ident.or(bound_val).unwrap().span(), 325 | ERROR_VARIABLE_NOT_MATCH, 326 | )); 327 | } 328 | if let Some(bytes) = bytes { 329 | if pattern_map.contains_key(&bytes) { 330 | return Err(Error::new(pat.span(), ERROR_UNREACHABLE_PATTERN)); 331 | } 332 | pattern_map.insert(bytes, i); 333 | } else { 334 | if wildcard_idx.is_some() { 335 | return Err(Error::new(pat.span(), ERROR_UNREACHABLE_PATTERN)); 336 | } 337 | wildcard_idx.replace(i); 338 | } 339 | } 340 | bound_vals.push(bound_val); 341 | bodies.push(*body); 342 | i += 1; 343 | } 344 | let Some(wildcard_idx) = wildcard_idx else { 345 | return Err(Error::new(Span::call_site(), ERROR_PATTERN_NOT_COVERED)); 346 | }; 347 | Ok(MatchInfo { 348 | bodies, 349 | pattern_map, 350 | wildcard_idx, 351 | bound_vals, 352 | pat_set, 353 | }) 354 | } 355 | 356 | fn trie_match_inner(input: ExprMatch) -> Result { 357 | let ExprMatch { 358 | attrs, expr, arms, .. 359 | } = input; 360 | let MatchInfo { 361 | bodies, 362 | pattern_map, 363 | wildcard_idx, 364 | bound_vals, 365 | pat_set, 366 | } = parse_match_arms(arms)?; 367 | let mut trie = Sparse::new(); 368 | for (k, v) in pattern_map { 369 | if v == wildcard_idx { 370 | continue; 371 | } 372 | trie.add(k, v); 373 | } 374 | let (bases, checks, outs) = trie.build_double_array_trie(wildcard_idx); 375 | 376 | let out_check = outs.iter().zip(checks).map(|(out, check)| { 377 | let out = format_ident!("V{out}"); 378 | quote! { (__TrieMatchValue::#out, #check) } 379 | }); 380 | let arm = bodies 381 | .iter() 382 | .zip(bound_vals) 383 | .enumerate() 384 | .map(|(i, (body, bound_val))| { 385 | let i = format_ident!("V{i}"); 386 | let bound_val = bound_val.map_or_else(|| quote! { _ }, |val| quote! { #val }); 387 | quote! { (__TrieMatchValue::#i, #bound_val ) => #body } 388 | }); 389 | let enumvalue = (0..bodies.len()).map(|i| format_ident!("V{i}")); 390 | let wildcard_ident = format_ident!("V{wildcard_idx}"); 391 | Ok(quote! { 392 | { 393 | #[derive(Clone, Copy)] 394 | enum __TrieMatchValue { 395 | #( #enumvalue, )* 396 | } 397 | #( #attrs )* 398 | match #expr { 399 | // This is for type inference. 400 | query @ ( #( #pat_set | )* _) => { 401 | match (|query| unsafe { 402 | let query_ref = ::core::convert::AsRef::<[u8]>::as_ref(&query); 403 | let bases: &'static [i32] = &[ #( #bases, )* ]; 404 | let out_checks: &'static [(__TrieMatchValue, u8)] = &[ #( #out_check, )* ]; 405 | let mut pos = 0; 406 | let mut base = bases[0]; 407 | for &b in query_ref { 408 | pos = base.wrapping_add(i32::from(b)) as usize; 409 | if let Some((_, check)) = out_checks.get(pos) { 410 | if *check == b { 411 | base = *bases.get_unchecked(pos); 412 | continue; 413 | } 414 | } 415 | return (__TrieMatchValue::#wildcard_ident, query); 416 | } 417 | (out_checks.get_unchecked(pos).0, query) 418 | })(query) { 419 | #( #arm, )* 420 | } 421 | } 422 | } 423 | } 424 | }) 425 | } 426 | 427 | /// Generates a match expression that uses a trie structure. 428 | /// 429 | /// # Examples 430 | /// 431 | /// ``` 432 | /// use trie_match::trie_match; 433 | /// 434 | /// let x = "abd"; 435 | /// 436 | /// let result = trie_match! { 437 | /// match x { 438 | /// "a" => 0, 439 | /// "abc" => 1, 440 | /// pat @ ("abd" | "bcde") => pat.len(), 441 | /// "bc" => 3, 442 | /// _ => 4, 443 | /// } 444 | /// }; 445 | /// 446 | /// assert_eq!(result, 3); 447 | /// ``` 448 | #[proc_macro] 449 | pub fn trie_match(input: proc_macro::TokenStream) -> proc_macro::TokenStream { 450 | let input = parse_macro_input!(input as ExprMatch); 451 | trie_match_inner(input) 452 | .unwrap_or_else(Error::into_compile_error) 453 | .into() 454 | } 455 | -------------------------------------------------------------------------------- /src/trie.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeMap, HashSet}; 2 | 3 | #[derive(Debug)] 4 | struct State { 5 | edges: BTreeMap, 6 | value: Option, 7 | } 8 | 9 | impl Default for State { 10 | fn default() -> Self { 11 | Self { 12 | edges: BTreeMap::default(), 13 | value: None, 14 | } 15 | } 16 | } 17 | 18 | /// Sparse trie. 19 | pub struct Sparse { 20 | states: Vec>, 21 | } 22 | 23 | impl Sparse { 24 | pub fn new() -> Self { 25 | Self { 26 | states: vec![State::default()], 27 | } 28 | } 29 | 30 | /// Adds a new pattern. 31 | pub fn add(&mut self, pattern: impl AsRef<[u8]>, value: T) { 32 | let pattern = pattern.as_ref(); 33 | let mut state_idx = 0; 34 | for &b in pattern { 35 | let new_idx = self.states.len(); 36 | state_idx = *self.states[state_idx].edges.entry(b).or_insert(new_idx); 37 | if state_idx == new_idx { 38 | self.states.push(State::default()); 39 | } 40 | } 41 | self.states[state_idx].value = Some(value); 42 | } 43 | 44 | fn find_base( 45 | search_start: i32, 46 | is_used: &[bool], 47 | state: &State, 48 | used_bases: &HashSet, 49 | ) -> Option { 50 | let (&k, _) = state.edges.iter().next()?; 51 | let mut base_cand = search_start - i32::from(k); 52 | 'a: loop { 53 | if used_bases.contains(&base_cand) { 54 | base_cand += 1; 55 | continue; 56 | } 57 | for &k in state.edges.keys() { 58 | let pos = usize::try_from(base_cand + i32::from(k)).unwrap(); 59 | if let Some(&u) = is_used.get(pos) { 60 | if u { 61 | base_cand += 1; 62 | continue 'a; 63 | } 64 | } 65 | } 66 | break; 67 | } 68 | Some(base_cand) 69 | } 70 | 71 | /// Builds a compact double-array. 72 | /// 73 | /// # Arguments 74 | /// 75 | /// * `wildcard_idx` - A wild card index that is used for invalid state. This value is returned 76 | /// if the query matches no pattern. 77 | /// 78 | /// # Returns 79 | /// 80 | /// A tuple of a base array, a check array, and a value array. 81 | pub fn build_double_array_trie(&self, wildcard_value: T) -> (Vec, Vec, Vec) 82 | where 83 | T: Copy, 84 | { 85 | let mut bases = vec![i32::MAX]; 86 | let mut checks = vec![0]; 87 | let mut values = vec![wildcard_value]; 88 | let mut is_used = vec![true]; 89 | let mut stack = vec![(0, 0)]; 90 | // base=0 must be reserved for avoiding invalid transitions. 91 | // See https://github.com/daac-tools/trie-match/pull/11. 92 | let mut used_bases = HashSet::from([0]); 93 | let mut search_start = 0; 94 | while let Some((state_id, da_pos)) = stack.pop() { 95 | let state = &self.states[state_id]; 96 | if let Some(val) = state.value { 97 | values[da_pos] = val; 98 | } 99 | for &u in &is_used[usize::try_from(search_start).unwrap()..] { 100 | if !u { 101 | break; 102 | } 103 | search_start += 1; 104 | } 105 | if let Some(base) = Self::find_base(search_start, &is_used, state, &used_bases) { 106 | used_bases.insert(base); 107 | bases[da_pos] = base; 108 | for (&k, &v) in &state.edges { 109 | let child_da_pos = usize::try_from(base + i32::from(k)).unwrap(); 110 | if child_da_pos >= bases.len() { 111 | bases.resize(child_da_pos + 1, i32::MAX); 112 | checks.resize(child_da_pos + 1, 0); 113 | values.resize(child_da_pos + 1, wildcard_value); 114 | is_used.resize(child_da_pos + 1, false); 115 | } 116 | checks[child_da_pos] = k; 117 | is_used[child_da_pos] = true; 118 | stack.push((v, child_da_pos)); 119 | } 120 | } 121 | } 122 | (bases, checks, values) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | 3 | use trie_match::trie_match; 4 | 5 | #[test] 6 | fn test_only_wildcard() { 7 | let f = |text| { 8 | trie_match! { 9 | match text { 10 | _ => 4, 11 | } 12 | } 13 | }; 14 | assert_eq!(f(""), 4); 15 | assert_eq!(f("a"), 4); 16 | assert_eq!(f("ab"), 4); 17 | } 18 | 19 | #[test] 20 | fn test_prefix_patterns() { 21 | // 0 -a-> 1 -b-> 2 -c-> * -d-> 3 22 | let f = |text| { 23 | trie_match! { 24 | match text { 25 | "" => 0, 26 | "a" => 1, 27 | "ab" => 2, 28 | "abcd" => 3, 29 | _ => 4, 30 | } 31 | } 32 | }; 33 | assert_eq!(f(""), 0); 34 | assert_eq!(f("a"), 1); 35 | assert_eq!(f("ab"), 2); 36 | assert_eq!(f("abc"), 4); 37 | assert_eq!(f("abcd"), 3); 38 | assert_eq!(f("b"), 4); 39 | } 40 | 41 | #[test] 42 | fn test_longer_query() { 43 | // * -a-> * -b-> 0 44 | let f = |text| { 45 | trie_match! { 46 | match text { 47 | "ab" => 0, 48 | _ => 1, 49 | } 50 | } 51 | }; 52 | assert_eq!(f("ab"), 0); 53 | assert_eq!(f("abcdefg"), 1); 54 | } 55 | 56 | #[test] 57 | fn test_branch_root() { 58 | // * -a-> 0 59 | // \ 60 | // \-b-> 1 61 | let f = |text| { 62 | trie_match! { 63 | match text { 64 | "a" => 0, 65 | "b" => 1, 66 | _ => 2, 67 | } 68 | } 69 | }; 70 | assert_eq!(f("a"), 0); 71 | assert_eq!(f("b"), 1); 72 | assert_eq!(f("c"), 2); 73 | } 74 | 75 | #[test] 76 | fn test_branch_multiple_times() { 77 | // /-e-> 5 78 | // / 79 | // * --a--> 0 --b--> * --c--> * --d--> 1 80 | // \ \ \ 81 | // \-b-> 2 \-c-> 3 \-d-> * --e--> 4 82 | let f = |text| { 83 | trie_match! { 84 | match text { 85 | "a" => 0, 86 | "abcd" => 1, 87 | "b" => 2, 88 | "ac" => 3, 89 | "abde" => 4, 90 | "abe" => 5, 91 | _ => 6, 92 | } 93 | } 94 | }; 95 | assert_eq!(f(""), 6); 96 | assert_eq!(f("a"), 0); 97 | assert_eq!(f("ab"), 6); 98 | assert_eq!(f("abc"), 6); 99 | assert_eq!(f("abcd"), 1); 100 | assert_eq!(f("abd"), 6); 101 | assert_eq!(f("abde"), 4); 102 | assert_eq!(f("abe"), 5); 103 | assert_eq!(f("ac"), 3); 104 | assert_eq!(f("b"), 2); 105 | assert_eq!(f("abcde"), 6); 106 | assert_eq!(f("abdef"), 6); 107 | assert_eq!(f("acd"), 6); 108 | assert_eq!(f("ad"), 6); 109 | assert_eq!(f("bc"), 6); 110 | assert_eq!(f("c"), 6); 111 | } 112 | 113 | // This test confirms that the generator prevents base value conflictions. 114 | #[test] 115 | fn test_try_base_conflict() { 116 | let f = |text| { 117 | trie_match! { 118 | match text { 119 | // The following pattern adds multiple zeros into a base array in a normal 120 | // double-array, but it is not allowed in a compact double-array. 121 | "\u{1}\u{2}\u{3}" => 0, 122 | _ => 1, 123 | } 124 | } 125 | }; 126 | assert_eq!(f("\u{1}\u{2}\u{3}"), 0); 127 | assert_eq!(f("\u{2}\u{3}"), 1); 128 | assert_eq!(f("\u{3}"), 1); 129 | } 130 | 131 | // This test confirms that check[0] does not have an invalid value of zero. 132 | #[test] 133 | fn test_invalid_root_check_of_zero() { 134 | // [0] -x01-> [1] 135 | // \-x00-> [0] ? If check[0] is 0, such an invalid transition is possible. 136 | // 137 | // base: [0, MAX] 138 | // check: [0, 1] 139 | let f = |text| { 140 | trie_match! { 141 | match text { 142 | "\u{1}" => 1, 143 | _ => 0, 144 | } 145 | } 146 | }; 147 | assert_eq!(f("\u{0}\u{1}"), 0); 148 | } 149 | 150 | #[test] 151 | fn test_bytes_literal() { 152 | let f = |text: &[u8]| { 153 | trie_match! { 154 | match text { 155 | b"abc" => 0, 156 | _ => 1, 157 | } 158 | } 159 | }; 160 | assert_eq!(f(b"abc"), 0); 161 | assert_eq!(f(b"ab"), 1); 162 | } 163 | 164 | #[test] 165 | fn test_slice_byte_literal() { 166 | let f = |text: &[u8]| { 167 | trie_match! { 168 | match text { 169 | [b'a', b'b', b'c'] => 0, 170 | _ => 1, 171 | } 172 | } 173 | }; 174 | assert_eq!(f(b"abc"), 0); 175 | assert_eq!(f(b"ab"), 1); 176 | } 177 | 178 | #[test] 179 | fn test_slice_numbers() { 180 | let f = |text: &[u8]| { 181 | trie_match! { 182 | match text { 183 | [0, 1, 2] => 0, 184 | _ => 1, 185 | } 186 | } 187 | }; 188 | assert_eq!(f(&[0, 1, 2]), 0); 189 | assert_eq!(f(&[0, 1]), 1); 190 | } 191 | 192 | #[test] 193 | fn test_slice_ref_numbers() { 194 | let f = |text: &[u8]| { 195 | trie_match! { 196 | match text { 197 | &[0, 1, 2] => 0, 198 | _ => 1, 199 | } 200 | } 201 | }; 202 | assert_eq!(f(&[0, 1, 2]), 0); 203 | assert_eq!(f(&[0, 1]), 1); 204 | } 205 | 206 | #[test] 207 | fn test_binds() { 208 | let f = |text| { 209 | trie_match! { 210 | match text { 211 | x @ "abc" | x @ "def" => &x[1..], 212 | y @ ("ghi" | "jkl") => &y[2..], 213 | z @ "xyzw" => &z[3..], 214 | w => &w[4..], 215 | } 216 | } 217 | }; 218 | assert_eq!(f("abc"), "bc"); 219 | assert_eq!(f("def"), "ef"); 220 | assert_eq!(f("ghi"), "i"); 221 | assert_eq!(f("jkl"), "l"); 222 | assert_eq!(f("xyzw"), "w"); 223 | assert_eq!(f("abcdefg"), "efg"); 224 | } 225 | 226 | #[cfg(feature = "cfg_attribute")] 227 | #[test] 228 | fn test_cfg_attribute() { 229 | let f = |text| { 230 | trie_match! { 231 | match text { 232 | #[cfg(test)] 233 | "a" => 0, 234 | #[cfg(not(test))] 235 | "b" => 1, 236 | _ => 2, 237 | } 238 | } 239 | }; 240 | assert_eq!(f("a"), 0); 241 | assert_eq!(f("b"), 2); 242 | assert_eq!(f("c"), 2); 243 | } 244 | 245 | #[cfg(feature = "cfg_attribute")] 246 | #[test] 247 | fn test_cfg_attribute_combination() { 248 | let f = |text| { 249 | trie_match! { 250 | match text { 251 | #[cfg(test)] 252 | #[cfg(feature = "cfg_attribute")] 253 | "a" => 0, 254 | #[cfg(not(test))] 255 | #[cfg(feature = "cfg_attribute")] 256 | "b" => 1, 257 | #[cfg(test)] 258 | #[cfg(not(feature = "cfg_attribute"))] 259 | "c" => 2, 260 | #[cfg(not(test))] 261 | #[cfg(not(feature = "cfg_attribute"))] 262 | "d" => 3, 263 | _ => 4, 264 | } 265 | } 266 | }; 267 | assert_eq!(f("a"), 0); 268 | assert_eq!(f("b"), 4); 269 | assert_eq!(f("c"), 4); 270 | assert_eq!(f("d"), 4); 271 | } 272 | --------------------------------------------------------------------------------