├── .github └── workflows │ └── test.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── jacobs2021 ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── rustfmt.toml └── sestoft1996 ├── Cargo.lock ├── Cargo.toml ├── README.md └── src ├── idiomatic.rs ├── lib.rs └── raw.rs /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Test 3 | on: push 4 | 5 | env: 6 | CARGO_HOME: ${{ github.workspace }}/.cargo 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | container: 12 | image: 'rust:alpine' 13 | steps: 14 | - name: Checkout repository 15 | uses: actions/checkout@v3 16 | 17 | - name: Set up cache 18 | uses: actions/cache@v3 19 | with: 20 | path: | 21 | .cargo 22 | target 23 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 24 | 25 | - name: Run tests 26 | run: cargo test 27 | 28 | rustfmt: 29 | runs-on: ubuntu-latest 30 | container: 31 | image: 'rust:alpine' 32 | steps: 33 | - name: Checkout repository 34 | uses: actions/checkout@v3 35 | 36 | - name: Set up cache 37 | uses: actions/cache@v3 38 | with: 39 | path: | 40 | .cargo 41 | target 42 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 43 | 44 | - name: Set up rustfmt 45 | run: rustup component add rustfmt 46 | 47 | - name: Check formatting 48 | run: 'cargo fmt --all --check' 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "bstr" 7 | version = "0.2.17" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" 10 | dependencies = [ 11 | "lazy_static", 12 | "memchr", 13 | "regex-automata", 14 | ] 15 | 16 | [[package]] 17 | name = "console" 18 | version = "0.15.0" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" 21 | dependencies = [ 22 | "encode_unicode", 23 | "libc", 24 | "once_cell", 25 | "terminal_size", 26 | "winapi", 27 | ] 28 | 29 | [[package]] 30 | name = "encode_unicode" 31 | version = "0.3.6" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 34 | 35 | [[package]] 36 | name = "jacobs2021" 37 | version = "1.0.0" 38 | dependencies = [ 39 | "similar-asserts", 40 | ] 41 | 42 | [[package]] 43 | name = "lazy_static" 44 | version = "1.4.0" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 47 | 48 | [[package]] 49 | name = "libc" 50 | version = "0.2.126" 51 | source = "registry+https://github.com/rust-lang/crates.io-index" 52 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 53 | 54 | [[package]] 55 | name = "memchr" 56 | version = "2.5.0" 57 | source = "registry+https://github.com/rust-lang/crates.io-index" 58 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 59 | 60 | [[package]] 61 | name = "once_cell" 62 | version = "1.12.0" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" 65 | 66 | [[package]] 67 | name = "regex-automata" 68 | version = "0.1.10" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" 71 | 72 | [[package]] 73 | name = "sestoft1996" 74 | version = "1.0.0" 75 | 76 | [[package]] 77 | name = "similar" 78 | version = "2.1.0" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3" 81 | dependencies = [ 82 | "bstr", 83 | "unicode-segmentation", 84 | ] 85 | 86 | [[package]] 87 | name = "similar-asserts" 88 | version = "1.2.0" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "64c9f531a2375031d51c23c415ca12d0f0271b976211e2f727b7a0eac06a099d" 91 | dependencies = [ 92 | "console", 93 | "similar", 94 | ] 95 | 96 | [[package]] 97 | name = "terminal_size" 98 | version = "0.1.17" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" 101 | dependencies = [ 102 | "libc", 103 | "winapi", 104 | ] 105 | 106 | [[package]] 107 | name = "unicode-segmentation" 108 | version = "1.9.0" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" 111 | 112 | [[package]] 113 | name = "winapi" 114 | version = "0.3.9" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 117 | dependencies = [ 118 | "winapi-i686-pc-windows-gnu", 119 | "winapi-x86_64-pc-windows-gnu", 120 | ] 121 | 122 | [[package]] 123 | name = "winapi-i686-pc-windows-gnu" 124 | version = "0.4.0" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 127 | 128 | [[package]] 129 | name = "winapi-x86_64-pc-windows-gnu" 130 | version = "0.4.0" 131 | source = "registry+https://github.com/rust-lang/crates.io-index" 132 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 133 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["sestoft1996", "jacobs2021"] 3 | resolver = "2" 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pattern matching in Rust 2 | 3 | This repository contains a collection of pattern matching algorithms implemented 4 | in Rust. The goal of these implementations it to (hopefully) make it easier to 5 | understand them, as papers related to pattern matching (and papers in general) 6 | can be difficult to read. 7 | 8 | ## Background 9 | 10 | I ended up implementing these algorithms while investigating potential pattern 11 | matching/exhaustiveness checking algorithms for [Inko](https://inko-lang.org/). 12 | While there are plenty of papers on the subject, few of them include reference 13 | code, and almost all of them are really dense and difficult to read. I hope the 14 | code published in this repository is of use to those wishing to implement 15 | pattern matching/exhaustiveness. 16 | 17 | ## Algorithms 18 | 19 | | Name | Paper | Directory 20 | |:----------------------------------------------|:-----------------------------|:----------- 21 | | ML pattern compilation and partial evaluation | [PDF](https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.1363) | [sestoft1996](./sestoft1996/) 22 | | How to compile pattern matching | [PDF](https://julesjacobs.com/notes/patternmatching/patternmatching.pdf) | [jacobs2021](./jacobs2021/) 23 | 24 | Other papers I've come across (but don't necessarily want to implement): 25 | 26 | - [A generic algorithm for checking exhaustivity of pattern 27 | matching](https://dl.acm.org/doi/10.1145/2998392.2998401). 28 | - The Scala implementation [is found in this PR](https://github.com/lampepfl/dotty/pull/1364) (the `Space.scala` file). 29 | - Swift also uses this algorithm [here](https://github.com/apple/swift/blob/3c0b1ab03f189e044303436b8aa6a27c2f93707d/lib/Sema/TypeCheckSwitchStmt.cpp) 30 | - Some Reddit comments about the algorithm are [found here](https://www.reddit.com/r/ProgrammingLanguages/comments/cioxwn/a_generic_algorithm_for_checking_exhaustivity_of/) 31 | - [Compiling pattern matching to good decision 32 | trees](https://www.cs.tufts.edu/comp/150FP/archive/luc-maranget/jun08.pdf). 33 | This is about just compiling pattern matching into a decision tree, not about 34 | exhaustiveness checking. If you don't know how to read the computer science 35 | hieroglyphs (like me), this paper is basically impossible to understand. 36 | - See also https://alan-j-hu.github.io/writing/pattern-matching.html and 37 | https://contificate.github.io/compiling-pattern-matching/ 38 | - There's a [Rust implementation](https://github.com/SomewhatML/match-compile) 39 | of this algorithm, though it doesn't perform exhaustiveness checking. 40 | - [Warnings for pattern 41 | matching](http://pauillac.inria.fr/~maranget/papers/warn/warn.pdf). This is 42 | just about producing warnings/errors for e.g. non-exhaustive patterns. 43 | Similarly painful to understand as the previous paper (i.e. I gave up). 44 | - [The Implementation of Functional Programming 45 | Languages](https://www.microsoft.com/en-us/research/publication/the-implementation-of-functional-programming-languages/). 46 | This book has a chapter on pattern matching, but I gave up on it. 47 | 48 | ## Requirements 49 | 50 | A recent-ish (as of 2022) Rust version that supports the 2021 edition (though I 51 | think the 2018 edition should also work). 52 | 53 | ## Usage 54 | 55 | Each algorithm is implemented as a library, and come with a set of unit tests 56 | that you can run using `cargo test`. 57 | 58 | ## Licence 59 | 60 | The code in this repository is licensed under the 61 | [Unlicense](https://unlicense.org/). A copy of this license can be found in the 62 | file "LICENSE". 63 | -------------------------------------------------------------------------------- /jacobs2021/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jacobs2021" 3 | version = "1.0.0" 4 | authors = ["Yorick Peterse "] 5 | edition = "2021" 6 | 7 | [lib] 8 | doctest = false 9 | 10 | [dev-dependencies] 11 | similar-asserts = "^1.1" 12 | -------------------------------------------------------------------------------- /jacobs2021/README.md: -------------------------------------------------------------------------------- 1 | # How to compile pattern matching 2 | 3 | This directory contains an implementation of the algorithm discussed in the 4 | article [How to compile pattern 5 | matching](https://julesjacobs.com/notes/patternmatching/patternmatching.pdf) by 6 | Jules Jacobs. The algorithm in question took me a while to understand, and I'm 7 | grateful for all the help provided by Jules via Email. Thanks! 8 | 9 | Now on to the algorithm. In hindsight it ended up not being as difficult as I 10 | initially thought, rather the way it was explained was a bit hard to understand. 11 | The algorithm works as follows: 12 | 13 | First, we treat a match expression as if it were a table (in the database 14 | sense), consisting of rows and columns. The rows are the match cases (sometimes 15 | called "match arms"), and the columns the patterns to test. Consider this match 16 | expression (I'm using Rust syntax here): 17 | 18 | ```rust 19 | match some_number { 20 | 10 => foo, 21 | 20 => bar, 22 | 30 => baz 23 | } 24 | ``` 25 | 26 | Here `10 -> foo`, `20 -> bar` and `30 -> baz` are the rows, and `10`, `20` and 27 | `30` are the columns for each row. User provided match expressions only support 28 | single columns (OR patterns are just turned into separate rows), but internally 29 | the compiler supports multiple columns. 30 | 31 | Internally our match expression is represented not as a list of rows and columns 32 | implicitly testing against an outer variable (`some_number` in the above case), 33 | instead each column explicitly specifies what it tests against. This means the 34 | above match expression is internally represented as follows: 35 | 36 | ```rust 37 | match { 38 | some_number is 10 => foo, 39 | some_number is 20 => bar, 40 | some_number is 30 => baz 41 | } 42 | ``` 43 | 44 | Here I used the made-up syntax `x is y` to indicate the column tests against the 45 | variable `some_number`, and the pattern tested is e.g. `10`. 46 | 47 | Next, we need to get rid of variable patterns. This is done by pushing them into 48 | the right-hand side (= the code to run upon a match) of each case. This means we 49 | transform this expression: 50 | 51 | 52 | ```rust 53 | match { 54 | some_number is 10 => foo, 55 | some_number is num => bar 56 | } 57 | ``` 58 | 59 | Into this: 60 | 61 | ```rust 62 | match { 63 | some_number is 10 => foo, 64 | // I'm using "∅" here to signal a row without any columns. 65 | ∅ => { 66 | let num = some_number; 67 | bar 68 | } 69 | } 70 | ``` 71 | 72 | The article explains this makes things easier, though it doesn't really say 73 | clearly why. The reason for this is as follows: 74 | 75 | 1. It reduces the amount of duplication in the resulting decision tree, as we 76 | don't need to branch for variable and wildcard patterns. 77 | 1. It means variable patterns don't influence branching decisions discussed 78 | below. 79 | 1. When we branch on columns (again, discussed below), we can just forget about 80 | variable patterns. 81 | 82 | Essentially it takes the following steps: 83 | 84 | 1. Each right-hand side can store zero or more variables to define _before_ 85 | running the code. 86 | 1. Iterate over the columns in a row. 87 | 1. If the column is a variable pattern, copy/move the variable into the 88 | right-hand side's variable list. 89 | 1. Return a new row that only includes non-variable columns. 90 | 91 | The implementation handles this in the method `move_variable_patterns`. 92 | 93 | Now we need to decide what column to branch on. In practise it probably won't 94 | matter much which strategy is used, so the algorithm takes a simple approach: it 95 | takes the columns of the first row, and for every column counts how many times 96 | the variable tested against is tested against across all columns in all rows. It 97 | then returns the column of which the variable is tested against the most. The 98 | implementation of this is in method `branch_variable` 99 | 100 | Now that we know what variable/column to branch on, we can generate the 101 | necessary branches and sub trees. The article only covers simple constructor 102 | patterns, but my implementation also handles integer literals, booleans, and 103 | more. The exact approach differs a bit and I recommend studying the Rust code to 104 | get a better understanding, but it roughly works as follows: 105 | 106 | 1. Create an array containing triples in the form 107 | `(constructor, arguments, rows)`. In this triple `constructor` is the 108 | constructor we're testing against, `arguments` is a list of variables exposed 109 | to the sub tree, and `rows` is the list of rows to compile for this test. 110 | The `arguments` array is filled with one variable for every argument. 111 | 1. Iterate over all the current rows. 112 | 1. Obtain the column index of the branching variable. 113 | 1. If we found an index (remember that a now doesn't have to contain any columns 114 | testing the branching variable), use it to remove the column from the row. 115 | 1. Determine the index of the constructor in the array created in step 1. For 116 | ADTs you'd use the tag values, for booleans you could use 0 and 1 for false 117 | and true respectively, etc. 118 | 1. Zip the pattern arguments (also patterns) with the values in the `arguments` 119 | array from the triple for this constructor, and create a new column for every 120 | resulting pair. 121 | 1. Create a new row containing the old columns (minus the one we removed 122 | earlier), the new columns (created in the previous step), and the body of the 123 | row. Push this row into the `rows` array for our constructor. 124 | 1. If in step 3 we didn't find an index, copy the row into the `rows` array for 125 | every triple in the array created in step 1. 126 | 1. Finally, for every triple created in step 1 (and populated in later steps), 127 | create a Switch node for our decision tree. The constructor and arguments are 128 | stored in this Switch node, and the rows are compiled into a sub tree. 129 | 130 | This is a lot to take in, so I recommend taking a look at the following methods: 131 | 132 | - `compile_rows` 133 | - `compile_constructor_cases` 134 | 135 | The output of all this is a decision tree, with three possible nodes: Success, 136 | Failure, and Switch (see the `Decision` type). A "Failure" node indicates a 137 | pattern that didn't match, and is used to check for exhaustiveness. In my 138 | implementation I opted to check for exhaustiveness separately, as this saves us 139 | from having to manage some extra data structures until we actually need them. 140 | The implementation works as follows: 141 | 142 | When we produce a "Failure" node, a "missing" flag is set to `true`. After 143 | compiling our decision tree, we check this flag. If set to `true`, the method 144 | `Match::missing_patterns` is used to produce a list of patterns to add to make 145 | the match exhaustive. 146 | 147 | The implementation of this method is a bit messy in my opinion, but it's the 148 | best I could come up with at this time. The implementation essentially maintains 149 | a stack of "terms" (I couldn't come up with a better name), each describing a 150 | test and its arguments in the tree. These terms also store the variables tested 151 | against, which combined with the names is used to (recursively) reconstruct a 152 | pattern name. 153 | 154 | Checking for redundant patterns is easy: when reaching a "Success" node you'd 155 | somehow mark the right-hand side as processed. In my case I just store an 156 | integer value in an array. At the end you check for any right-hand sides that 157 | aren't marked, or in my case you check if any of their values are not in the 158 | array. 159 | 160 | This about sums up how the algorithm works. Don't worry if the above wall of 161 | text hurts your head, it took me about two weeks to understand it. My advice is 162 | to read the article from Jules, then read this README, then take a look at the 163 | code and corresponding tests. 164 | 165 | ## OR patterns 166 | 167 | OR patterns are not covered in the article. To support these patterns we have to 168 | take rows containing OR patterns in any columns, then expand those OR patterns 169 | into separate rows. The code here handles this in the `expand_or_patterns()` 170 | function. This function is called _before_ pushing variable/wildcard patterns 171 | out of the rows, ensuring that OR patterns containing these patterns work as 172 | expected. 173 | 174 | **NOTE:** a previous implementation used a `flatten_or` method called, with a 175 | different implementation. This implementation proved incorrect as it failed to 176 | handle bindings in OR patterns (e.g. `10 or number`). 177 | 178 | ## Range patterns 179 | 180 | Range patterns are handled using a `Range` constructor 181 | (`Constructor::Range(start, stop)`), produced when matching against integer 182 | types only (meaning we only support integer ranges). Just like regular integers 183 | we assume ranges are of infinite length, so a variable pattern is needed to make 184 | the match exhaustive. 185 | 186 | ## Guards 187 | 188 | Guards are supported as follows: each `Row` has a guard field, storing a 189 | `Option`, where the `usize` is just a dummy value for the guard; normally 190 | this would be (for example) an AST node to evaluate/lower. When we are about to 191 | produce a Success node for a row, we check if it defines a guard. If so, all 192 | remaining rows are compiled into the guard's fallback tree. 193 | -------------------------------------------------------------------------------- /jacobs2021/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::new_without_default)] 2 | 3 | //! An implementation of the algorithm described at 4 | //! https://julesjacobs.com/notes/patternmatching/patternmatching.pdf. 5 | use std::collections::{HashMap, HashSet}; 6 | 7 | /// The body of code to evaluate in case of a match. 8 | #[derive(Clone, Eq, PartialEq, Debug)] 9 | pub struct Body { 10 | /// Any variables to bind before running the code. 11 | /// 12 | /// The tuples are in the form `(name, source)` (i.e `bla = source`). 13 | bindings: Vec<(String, Variable)>, 14 | 15 | /// The "code" to run in case of a match. 16 | /// 17 | /// We just use an integer for the sake of simplicity, but normally this 18 | /// would be an AST node, or perhaps an index to an array of AST nodes. 19 | value: usize, 20 | } 21 | 22 | /// A type constructor. 23 | #[derive(Debug, Clone, Eq, PartialEq)] 24 | pub enum Constructor { 25 | True, 26 | False, 27 | Int(i64), 28 | Pair(TypeId, TypeId), 29 | Variant(TypeId, usize), 30 | Range(i64, i64), 31 | } 32 | 33 | impl Constructor { 34 | /// Returns the index of this constructor relative to its type. 35 | fn index(&self) -> usize { 36 | match self { 37 | Constructor::False 38 | | Constructor::Int(_) 39 | | Constructor::Pair(_, _) 40 | | Constructor::Range(_, _) => 0, 41 | Constructor::True => 1, 42 | Constructor::Variant(_, index) => *index, 43 | } 44 | } 45 | } 46 | 47 | /// Expands rows containing OR patterns into individual rows, such that each 48 | /// branch in the OR produces its own row. 49 | /// 50 | /// For each column that tests against an OR pattern, each sub pattern is 51 | /// translated into a new row. This work repeats itself until no more OR 52 | /// patterns remain in the rows. 53 | /// 54 | /// The implementation here is probably not as fast as it can be. Instead, it's 55 | /// optimized for ease of maintenance and readability. 56 | fn expand_or_patterns(rows: &mut Vec) { 57 | // If none of the rows contain any OR patterns, we can avoid the below work 58 | // loop, saving some allocations and time. 59 | if !rows 60 | .iter() 61 | .any(|r| r.columns.iter().any(|c| matches!(c.pattern, Pattern::Or(_)))) 62 | { 63 | return; 64 | } 65 | 66 | // The implementation uses two Vecs: the original one, and a temporary one 67 | // we push newly created rows into. After processing all rows we swap the 68 | // two, repeating this process until we no longer find any OR patterns. 69 | let mut new_rows = Vec::with_capacity(rows.len()); 70 | let mut found = true; 71 | 72 | while found { 73 | found = false; 74 | 75 | for row in rows.drain(0..) { 76 | // Find the first column containing an OR pattern. We process this 77 | // one column at a time, as that's (much) easier to implement 78 | // compared to handling all columns at once (as multiple columns may 79 | // contain OR patterns). 80 | let res = row.columns.iter().enumerate().find_map(|(idx, col)| { 81 | if let Pattern::Or(pats) = &col.pattern { 82 | Some((idx, col.variable, pats)) 83 | } else { 84 | None 85 | } 86 | }); 87 | 88 | if let Some((idx, var, pats)) = res { 89 | found = true; 90 | 91 | // This creates a new row for each branch in the OR pattern. 92 | // Other columns are left as-is. If such columns contain OR 93 | // patterns themselves, we'll expand them in a future iteration 94 | // of the surrounding `while` loop. 95 | for pat in pats { 96 | let mut new_row = row.clone(); 97 | 98 | new_row.columns[idx] = Column::new(var, pat.clone()); 99 | new_rows.push(new_row); 100 | } 101 | } else { 102 | new_rows.push(row); 103 | } 104 | } 105 | 106 | std::mem::swap(rows, &mut new_rows); 107 | } 108 | } 109 | 110 | /// A user defined pattern such as `Some((x, 10))`. 111 | #[derive(Clone, Eq, PartialEq, Debug)] 112 | pub enum Pattern { 113 | /// A pattern such as `Some(42)`. 114 | Constructor(Constructor, Vec), 115 | Int(i64), 116 | Binding(String), 117 | Or(Vec), 118 | Range(i64, i64), 119 | } 120 | 121 | /// A representation of a type. 122 | /// 123 | /// In a real compiler this would probably be a more complicated structure, but 124 | /// for the sake of simplicity we limit ourselves to a few basic types. 125 | #[derive(Clone)] 126 | pub enum Type { 127 | Int, 128 | Boolean, 129 | Pair(TypeId, TypeId), 130 | Enum(Vec<(String, Vec)>), 131 | } 132 | 133 | /// A unique ID to a type. 134 | /// 135 | /// In a real compiler this may just be a regular pointer, or an ID value like 136 | /// this. 137 | #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)] 138 | pub struct TypeId(usize); 139 | 140 | /// A variable used in a match expression. 141 | /// 142 | /// In a real compiler these would probably be registers or some other kind of 143 | /// variable/temporary generated by your compiler. 144 | #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)] 145 | pub struct Variable { 146 | id: usize, 147 | type_id: TypeId, 148 | } 149 | 150 | /// A single case (or row) in a match expression/table. 151 | #[derive(Clone, Eq, PartialEq, Debug)] 152 | pub struct Row { 153 | columns: Vec, 154 | guard: Option, 155 | body: Body, 156 | } 157 | 158 | impl Row { 159 | fn new(columns: Vec, guard: Option, body: Body) -> Self { 160 | Self { columns, guard, body } 161 | } 162 | 163 | fn remove_column(&mut self, variable: &Variable) -> Option { 164 | self.columns 165 | .iter() 166 | .position(|c| &c.variable == variable) 167 | .map(|idx| self.columns.remove(idx)) 168 | } 169 | } 170 | 171 | /// A column in a pattern matching table. 172 | /// 173 | /// A column contains a single variable to test, and a pattern to test against 174 | /// that variable. A row may contain multiple columns, though this wouldn't be 175 | /// exposed to the source language (= it's an implementation detail). 176 | #[derive(Clone, Eq, PartialEq, Debug)] 177 | pub struct Column { 178 | variable: Variable, 179 | pattern: Pattern, 180 | } 181 | 182 | impl Column { 183 | fn new(variable: Variable, pattern: Pattern) -> Self { 184 | Self { variable, pattern } 185 | } 186 | } 187 | 188 | /// A case in a decision tree to test against a variable. 189 | #[derive(Eq, PartialEq, Debug)] 190 | pub struct Case { 191 | /// The constructor to test against an input variable. 192 | constructor: Constructor, 193 | 194 | /// Variables to introduce to the body of this case. 195 | /// 196 | /// At runtime these would be populated with the values a pattern is matched 197 | /// against. For example, this pattern: 198 | /// 199 | /// case (10, 20, foo) -> ... 200 | /// 201 | /// Would result in three arguments, assigned the values `10`, `20` and 202 | /// `foo`. 203 | /// 204 | /// In a real compiler you'd assign these variables in your IR first, then 205 | /// generate the code for the sub tree. 206 | arguments: Vec, 207 | 208 | /// The sub tree of this case. 209 | body: Decision, 210 | } 211 | 212 | impl Case { 213 | fn new( 214 | constructor: Constructor, 215 | arguments: Vec, 216 | body: Decision, 217 | ) -> Self { 218 | Self { constructor, arguments, body } 219 | } 220 | } 221 | 222 | /// A decision tree compiled from a list of match cases. 223 | #[derive(Eq, PartialEq, Debug)] 224 | pub enum Decision { 225 | /// A pattern is matched and the right-hand value is to be returned. 226 | Success(Body), 227 | 228 | /// A pattern is missing. 229 | Failure, 230 | 231 | /// Checks if a guard evaluates to true, running the body if it does. 232 | /// 233 | /// The arguments are as follows: 234 | /// 235 | /// 1. The "condition" to evaluate. We just use a dummy value, but in a real 236 | /// compiler this would likely be an AST node of sorts. 237 | /// 2. The body to evaluate if the guard matches. 238 | /// 3. The sub tree to evaluate when the guard fails. 239 | Guard(usize, Body, Box), 240 | 241 | /// Checks if a value is any of the given patterns. 242 | /// 243 | /// The values are as follows: 244 | /// 245 | /// 1. The variable to test. 246 | /// 2. The cases to test against this variable. 247 | /// 3. A fallback decision to take, in case none of the cases matched. 248 | Switch(Variable, Vec, Option>), 249 | } 250 | 251 | /// A type for storing diagnostics produced by the decision tree compiler. 252 | pub struct Diagnostics { 253 | /// A flag indicating the match is missing one or more pattern. 254 | missing: bool, 255 | 256 | /// The right-hand sides that are reachable. 257 | /// 258 | /// If a right-hand side isn't in this list it means its pattern is 259 | /// redundant. 260 | reachable: Vec, 261 | } 262 | 263 | /// The result of compiling a pattern match expression. 264 | pub struct Match { 265 | pub types: Vec, 266 | pub tree: Decision, 267 | pub diagnostics: Diagnostics, 268 | } 269 | 270 | /// Information about a single constructor/value (aka term) being tested, used 271 | /// to build a list of names of missing patterns. 272 | #[derive(Debug)] 273 | struct Term { 274 | variable: Variable, 275 | name: String, 276 | arguments: Vec, 277 | } 278 | 279 | impl Term { 280 | fn new(variable: Variable, name: String, arguments: Vec) -> Self { 281 | Self { variable, name, arguments } 282 | } 283 | 284 | fn pattern_name( 285 | &self, 286 | terms: &[Term], 287 | mapping: &HashMap<&Variable, usize>, 288 | ) -> String { 289 | if self.arguments.is_empty() { 290 | self.name.to_string() 291 | } else { 292 | let args = self 293 | .arguments 294 | .iter() 295 | .map(|arg| { 296 | mapping 297 | .get(arg) 298 | .map(|&idx| terms[idx].pattern_name(terms, mapping)) 299 | .unwrap_or_else(|| "_".to_string()) 300 | }) 301 | .collect::>() 302 | .join(", "); 303 | 304 | format!("{}({})", self.name, args) 305 | } 306 | } 307 | } 308 | 309 | impl Match { 310 | /// Returns a list of patterns not covered by the match expression. 311 | pub fn missing_patterns(&self) -> Vec { 312 | let mut names = HashSet::new(); 313 | let mut steps = Vec::new(); 314 | 315 | self.add_missing_patterns(&self.tree, &mut steps, &mut names); 316 | 317 | let mut missing: Vec = names.into_iter().collect(); 318 | 319 | // Sorting isn't necessary, but it makes it a bit easier to write tests. 320 | missing.sort(); 321 | missing 322 | } 323 | 324 | fn add_missing_patterns( 325 | &self, 326 | node: &Decision, 327 | terms: &mut Vec, 328 | missing: &mut HashSet, 329 | ) { 330 | match node { 331 | Decision::Success(_) => {} 332 | Decision::Failure => { 333 | let mut mapping = HashMap::new(); 334 | 335 | // At this point the terms stack looks something like this: 336 | // `[term, term + arguments, term, ...]`. To construct a pattern 337 | // name from this stack, we first map all variables to their 338 | // term indexes. This is needed because when a term defines 339 | // arguments, the terms for those arguments don't necessarily 340 | // appear in order in the term stack. 341 | // 342 | // This mapping is then used when (recursively) generating a 343 | // pattern name. 344 | // 345 | // This approach could probably be done more efficiently, so if 346 | // you're reading this and happen to know of a way, please 347 | // submit a merge request :) 348 | for (index, step) in terms.iter().enumerate() { 349 | mapping.insert(&step.variable, index); 350 | } 351 | 352 | let name = terms 353 | .first() 354 | .map(|term| term.pattern_name(terms, &mapping)) 355 | .unwrap_or_else(|| "_".to_string()); 356 | 357 | missing.insert(name); 358 | } 359 | Decision::Guard(_, _, fallback) => { 360 | self.add_missing_patterns(fallback, terms, missing); 361 | } 362 | Decision::Switch(var, cases, fallback) => { 363 | for case in cases { 364 | match &case.constructor { 365 | Constructor::True => { 366 | let name = "true".to_string(); 367 | 368 | terms.push(Term::new(*var, name, Vec::new())); 369 | } 370 | Constructor::False => { 371 | let name = "false".to_string(); 372 | 373 | terms.push(Term::new(*var, name, Vec::new())); 374 | } 375 | Constructor::Int(_) | Constructor::Range(_, _) => { 376 | let name = "_".to_string(); 377 | 378 | terms.push(Term::new(*var, name, Vec::new())); 379 | } 380 | Constructor::Pair(_, _) => { 381 | let args = case.arguments.clone(); 382 | 383 | terms.push(Term::new(*var, String::new(), args)); 384 | } 385 | Constructor::Variant(typ, idx) => { 386 | let args = case.arguments.clone(); 387 | let name = if let Type::Enum(variants) = 388 | &self.types[typ.0] 389 | { 390 | variants[*idx].0.clone() 391 | } else { 392 | unreachable!() 393 | }; 394 | 395 | terms.push(Term::new(*var, name, args)); 396 | } 397 | } 398 | 399 | self.add_missing_patterns(&case.body, terms, missing); 400 | terms.pop(); 401 | } 402 | 403 | if let Some(node) = fallback { 404 | self.add_missing_patterns(node, terms, missing); 405 | } 406 | } 407 | } 408 | } 409 | } 410 | 411 | /// The `match` compiler itself (shocking, I know). 412 | pub struct Compiler { 413 | variable_id: usize, 414 | types: Vec, 415 | diagnostics: Diagnostics, 416 | } 417 | 418 | impl Compiler { 419 | pub fn new() -> Self { 420 | Self { 421 | variable_id: 0, 422 | types: Vec::new(), 423 | diagnostics: Diagnostics { missing: false, reachable: Vec::new() }, 424 | } 425 | } 426 | 427 | pub fn compile(mut self, rows: Vec) -> Match { 428 | Match { 429 | tree: self.compile_rows(rows), 430 | diagnostics: self.diagnostics, 431 | types: self.types, 432 | } 433 | } 434 | 435 | fn compile_rows(&mut self, mut rows: Vec) -> Decision { 436 | if rows.is_empty() { 437 | self.diagnostics.missing = true; 438 | 439 | return Decision::Failure; 440 | } 441 | 442 | expand_or_patterns(&mut rows); 443 | 444 | for row in &mut rows { 445 | self.move_variable_patterns(row); 446 | } 447 | 448 | // There may be multiple rows, but if the first one has no patterns 449 | // those extra rows are redundant, as a row without columns/patterns 450 | // always matches. 451 | if rows.first().map_or(false, |c| c.columns.is_empty()) { 452 | let row = rows.remove(0); 453 | 454 | self.diagnostics.reachable.push(row.body.value); 455 | 456 | return if let Some(guard) = row.guard { 457 | Decision::Guard( 458 | guard, 459 | row.body, 460 | Box::new(self.compile_rows(rows)), 461 | ) 462 | } else { 463 | Decision::Success(row.body) 464 | }; 465 | } 466 | 467 | let branch_var = self.branch_variable(&rows); 468 | 469 | match self.variable_type(branch_var).clone() { 470 | Type::Int => { 471 | let (cases, fallback) = 472 | self.compile_int_cases(rows, branch_var); 473 | 474 | Decision::Switch(branch_var, cases, Some(fallback)) 475 | } 476 | Type::Boolean => { 477 | let cases = vec![ 478 | (Constructor::False, Vec::new(), Vec::new()), 479 | (Constructor::True, Vec::new(), Vec::new()), 480 | ]; 481 | 482 | Decision::Switch( 483 | branch_var, 484 | self.compile_constructor_cases(rows, branch_var, cases), 485 | None, 486 | ) 487 | } 488 | Type::Pair(typ1, typ2) => { 489 | let cases = vec![( 490 | Constructor::Pair(typ1, typ2), 491 | self.new_variables(&[typ1, typ2]), 492 | Vec::new(), 493 | )]; 494 | 495 | Decision::Switch( 496 | branch_var, 497 | self.compile_constructor_cases(rows, branch_var, cases), 498 | None, 499 | ) 500 | } 501 | Type::Enum(variants) => { 502 | let cases = variants 503 | .iter() 504 | .enumerate() 505 | .map(|(idx, (_, args))| { 506 | ( 507 | Constructor::Variant(branch_var.type_id, idx), 508 | self.new_variables(args), 509 | Vec::new(), 510 | ) 511 | }) 512 | .collect(); 513 | 514 | Decision::Switch( 515 | branch_var, 516 | self.compile_constructor_cases(rows, branch_var, cases), 517 | None, 518 | ) 519 | } 520 | } 521 | } 522 | 523 | /// Compiles the cases and fallback cases for integer and range patterns. 524 | /// 525 | /// Integers have an infinite number of constructors, so we specialise the 526 | /// compilation of integer and range patterns. 527 | fn compile_int_cases( 528 | &mut self, 529 | rows: Vec, 530 | branch_var: Variable, 531 | ) -> (Vec, Box) { 532 | let mut raw_cases: Vec<(Constructor, Vec, Vec)> = 533 | Vec::new(); 534 | let mut fallback_rows = Vec::new(); 535 | let mut tested: HashMap<(i64, i64), usize> = HashMap::new(); 536 | 537 | for mut row in rows { 538 | if let Some(col) = row.remove_column(&branch_var) { 539 | let (key, cons) = match col.pattern { 540 | Pattern::Int(val) => ((val, val), Constructor::Int(val)), 541 | Pattern::Range(start, stop) => { 542 | ((start, stop), Constructor::Range(start, stop)) 543 | } 544 | _ => unreachable!(), 545 | }; 546 | 547 | if let Some(index) = tested.get(&key) { 548 | raw_cases[*index].2.push(row); 549 | continue; 550 | } 551 | 552 | tested.insert(key, raw_cases.len()); 553 | 554 | let mut rows = fallback_rows.clone(); 555 | 556 | rows.push(row); 557 | raw_cases.push((cons, Vec::new(), rows)); 558 | } else { 559 | for (_, _, rows) in &mut raw_cases { 560 | rows.push(row.clone()); 561 | } 562 | 563 | fallback_rows.push(row); 564 | } 565 | } 566 | 567 | let cases = raw_cases 568 | .into_iter() 569 | .map(|(cons, vars, rows)| { 570 | Case::new(cons, vars, self.compile_rows(rows)) 571 | }) 572 | .collect(); 573 | 574 | (cases, Box::new(self.compile_rows(fallback_rows))) 575 | } 576 | 577 | /// Compiles the cases and sub cases for the constructor located at the 578 | /// column of the branching variable. 579 | /// 580 | /// What exactly this method does may be a bit hard to understand from the 581 | /// code, as there's simply quite a bit going on. Roughly speaking, it does 582 | /// the following: 583 | /// 584 | /// 1. It takes the column we're branching on (based on the branching 585 | /// variable) and removes it from every row. 586 | /// 2. We add additional columns to this row, if the constructor takes any 587 | /// arguments (which we'll handle in a nested match). 588 | /// 3. We turn the resulting list of rows into a list of cases, then compile 589 | /// those into decision (sub) trees. 590 | /// 591 | /// If a row didn't include the branching variable, we simply copy that row 592 | /// into the list of rows for every constructor to test. 593 | /// 594 | /// For this to work, the `cases` variable must be prepared such that it has 595 | /// a triple for every constructor we need to handle. For an ADT with 10 596 | /// constructors, that means 10 triples. This is needed so this method can 597 | /// assign the correct sub matches to these constructors. 598 | /// 599 | /// Types with infinite constructors (e.g. int and string) are handled 600 | /// separately; they don't need most of this work anyway. 601 | fn compile_constructor_cases( 602 | &mut self, 603 | rows: Vec, 604 | branch_var: Variable, 605 | mut cases: Vec<(Constructor, Vec, Vec)>, 606 | ) -> Vec { 607 | for mut row in rows { 608 | if let Some(col) = row.remove_column(&branch_var) { 609 | if let Pattern::Constructor(cons, args) = col.pattern { 610 | let idx = cons.index(); 611 | let mut cols = row.columns; 612 | 613 | for (var, pat) in cases[idx].1.iter().zip(args.into_iter()) 614 | { 615 | cols.push(Column::new(*var, pat)); 616 | } 617 | 618 | cases[idx].2.push(Row::new(cols, row.guard, row.body)); 619 | } 620 | } else { 621 | for (_, _, rows) in &mut cases { 622 | rows.push(row.clone()); 623 | } 624 | } 625 | } 626 | 627 | cases 628 | .into_iter() 629 | .map(|(cons, vars, rows)| { 630 | Case::new(cons, vars, self.compile_rows(rows)) 631 | }) 632 | .collect() 633 | } 634 | 635 | /// Moves variable-only patterns/tests into the right-hand side/body of a 636 | /// case. 637 | /// 638 | /// This turns cases like this: 639 | /// 640 | /// case foo -> print(foo) 641 | /// 642 | /// Into this: 643 | /// 644 | /// case -> { 645 | /// let foo = it 646 | /// print(foo) 647 | /// } 648 | /// 649 | /// Where `it` is a variable holding the value `case foo` is compared 650 | /// against, and the case/row has no patterns (i.e. always matches). 651 | fn move_variable_patterns(&self, row: &mut Row) { 652 | row.columns.retain(|col| { 653 | if let Pattern::Binding(bind) = &col.pattern { 654 | row.body.bindings.push((bind.clone(), col.variable)); 655 | false 656 | } else { 657 | true 658 | } 659 | }); 660 | } 661 | 662 | /// Given a row, returns the variable in that row that's referred to the 663 | /// most across all rows. 664 | fn branch_variable(&self, rows: &[Row]) -> Variable { 665 | let mut counts = HashMap::new(); 666 | 667 | for row in rows { 668 | for col in &row.columns { 669 | *counts.entry(&col.variable).or_insert(0_usize) += 1 670 | } 671 | } 672 | 673 | rows[0] 674 | .columns 675 | .iter() 676 | .map(|col| col.variable) 677 | .max_by_key(|var| counts[var]) 678 | .unwrap() 679 | } 680 | 681 | /// Returns a new variable to use in the decision tree. 682 | /// 683 | /// In a real compiler you'd have to ensure these variables don't conflict 684 | /// with other variables. 685 | fn new_variable(&mut self, type_id: TypeId) -> Variable { 686 | let var = Variable { id: self.variable_id, type_id }; 687 | 688 | self.variable_id += 1; 689 | var 690 | } 691 | 692 | fn new_variables(&mut self, type_ids: &[TypeId]) -> Vec { 693 | type_ids.iter().map(|t| self.new_variable(*t)).collect() 694 | } 695 | 696 | /// Returns the type of a given variable. 697 | /// 698 | /// In a real compiler the implementation of this would likely be quite 699 | /// different, depending on how your type system is implemented. 700 | /// 701 | /// For the sake of simplicity, we just store types in a Vec and retrieve 702 | /// them here according to the variable's type ID. 703 | fn variable_type(&self, id: Variable) -> &Type { 704 | &self.types[id.type_id.0] 705 | } 706 | } 707 | 708 | #[cfg(test)] 709 | mod tests { 710 | use super::*; 711 | use similar_asserts::assert_eq; 712 | 713 | fn new_type(compiler: &mut Compiler, typ: Type) -> TypeId { 714 | let id = compiler.types.len(); 715 | 716 | compiler.types.push(typ); 717 | TypeId(id) 718 | } 719 | 720 | fn tt() -> Pattern { 721 | Pattern::Constructor(Constructor::True, Vec::new()) 722 | } 723 | 724 | fn ff() -> Pattern { 725 | Pattern::Constructor(Constructor::False, Vec::new()) 726 | } 727 | 728 | fn bind(name: &str) -> Pattern { 729 | Pattern::Binding(name.to_string()) 730 | } 731 | 732 | fn variant(typ: TypeId, index: usize, args: Vec) -> Pattern { 733 | Pattern::Constructor(Constructor::Variant(typ, index), args) 734 | } 735 | 736 | fn pair( 737 | typ1: TypeId, 738 | typ2: TypeId, 739 | pat1: Pattern, 740 | pat2: Pattern, 741 | ) -> Pattern { 742 | Pattern::Constructor(Constructor::Pair(typ1, typ2), vec![pat1, pat2]) 743 | } 744 | 745 | fn int(val: i64) -> Pattern { 746 | Pattern::Int(val) 747 | } 748 | 749 | fn rhs(value: usize) -> Body { 750 | Body { bindings: Vec::new(), value } 751 | } 752 | 753 | fn var(id: usize, type_id: TypeId) -> Variable { 754 | Variable { id, type_id } 755 | } 756 | 757 | fn compile( 758 | compiler: Compiler, 759 | input: Variable, 760 | rules: Vec<(Pattern, Body)>, 761 | ) -> Match { 762 | let rows = rules 763 | .into_iter() 764 | .map(|(pat, body)| { 765 | Row::new(vec![Column::new(input, pat)], None, body) 766 | }) 767 | .collect(); 768 | 769 | compiler.compile(rows) 770 | } 771 | 772 | fn failure() -> Decision { 773 | Decision::Failure 774 | } 775 | 776 | fn success(value: usize) -> Decision { 777 | Decision::Success(Body { bindings: Vec::new(), value }) 778 | } 779 | 780 | fn success_with_bindings( 781 | bindings: Vec<(&str, Variable)>, 782 | value: usize, 783 | ) -> Decision { 784 | Decision::Success(Body { 785 | bindings: bindings 786 | .into_iter() 787 | .map(|(n, v)| (n.to_string(), v)) 788 | .collect(), 789 | value, 790 | }) 791 | } 792 | 793 | #[test] 794 | fn test_move_variable_patterns() { 795 | let mut compiler = Compiler::new(); 796 | let typ = new_type(&mut compiler, Type::Boolean); 797 | let var1 = compiler.new_variable(typ); 798 | let var2 = compiler.new_variable(typ); 799 | let cons = Constructor::True; 800 | let mut row = Row { 801 | columns: vec![ 802 | Column::new(var2, bind("a")), 803 | Column::new( 804 | var1, 805 | Pattern::Constructor(cons.clone(), Vec::new()), 806 | ), 807 | ], 808 | guard: None, 809 | body: Body { bindings: Vec::new(), value: 42 }, 810 | }; 811 | 812 | compiler.move_variable_patterns(&mut row); 813 | 814 | assert_eq!( 815 | row, 816 | Row { 817 | columns: vec![Column::new( 818 | var1, 819 | Pattern::Constructor(cons, Vec::new()) 820 | )], 821 | guard: None, 822 | body: Body { 823 | bindings: vec![("a".to_string(), var2)], 824 | value: 42 825 | } 826 | } 827 | ); 828 | } 829 | 830 | #[test] 831 | fn test_move_variable_patterns_without_constructor_pattern() { 832 | let mut compiler = Compiler::new(); 833 | let typ = new_type(&mut compiler, Type::Boolean); 834 | let var1 = compiler.new_variable(typ); 835 | let mut row = Row { 836 | columns: vec![Column::new(var1, bind("a"))], 837 | guard: None, 838 | body: Body { bindings: Vec::new(), value: 42 }, 839 | }; 840 | 841 | compiler.move_variable_patterns(&mut row); 842 | 843 | assert_eq!( 844 | row, 845 | Row { 846 | columns: Vec::new(), 847 | guard: None, 848 | body: Body { 849 | bindings: vec![("a".to_string(), var1)], 850 | value: 42 851 | } 852 | } 853 | ); 854 | } 855 | 856 | #[test] 857 | fn test_branch_variable() { 858 | let mut compiler = Compiler::new(); 859 | let typ = new_type(&mut compiler, Type::Boolean); 860 | let var1 = compiler.new_variable(typ); 861 | let var2 = compiler.new_variable(typ); 862 | let rows = vec![ 863 | Row::new( 864 | vec![ 865 | Column::new(var1, Pattern::Int(42)), 866 | Column::new(var2, Pattern::Int(50)), 867 | ], 868 | None, 869 | rhs(1), 870 | ), 871 | Row::new(vec![Column::new(var2, Pattern::Int(4))], None, rhs(2)), 872 | ]; 873 | 874 | let branch = compiler.branch_variable(&rows); 875 | 876 | assert_eq!(branch, var2); 877 | } 878 | 879 | #[test] 880 | fn test_compile_simple_pattern() { 881 | let mut compiler = Compiler::new(); 882 | let typ = new_type(&mut compiler, Type::Boolean); 883 | let input = compiler.new_variable(typ); 884 | let result = 885 | compile(compiler, input, vec![(tt(), rhs(1)), (ff(), rhs(2))]); 886 | 887 | assert_eq!( 888 | result.tree, 889 | Decision::Switch( 890 | input, 891 | vec![ 892 | Case::new(Constructor::False, Vec::new(), success(2)), 893 | Case::new(Constructor::True, Vec::new(), success(1)), 894 | ], 895 | None 896 | ) 897 | ); 898 | } 899 | 900 | #[test] 901 | fn test_compile_nonexhaustive_pattern() { 902 | let mut compiler = Compiler::new(); 903 | let typ = new_type(&mut compiler, Type::Boolean); 904 | let input = compiler.new_variable(typ); 905 | let result = compile(compiler, input, vec![(tt(), rhs(1))]); 906 | 907 | assert_eq!( 908 | result.tree, 909 | Decision::Switch( 910 | input, 911 | vec![ 912 | Case::new(Constructor::False, Vec::new(), failure()), 913 | Case::new(Constructor::True, Vec::new(), success(1)), 914 | ], 915 | None 916 | ) 917 | ); 918 | assert!(result.diagnostics.missing); 919 | assert_eq!(result.missing_patterns(), vec!["false".to_string()]); 920 | } 921 | 922 | #[test] 923 | fn test_compile_redundant_pattern() { 924 | let mut compiler = Compiler::new(); 925 | let typ = new_type(&mut compiler, Type::Boolean); 926 | let input = compiler.new_variable(typ); 927 | let result = compile( 928 | compiler, 929 | input, 930 | vec![(tt(), rhs(1)), (tt(), rhs(2)), (ff(), rhs(3))], 931 | ); 932 | 933 | assert_eq!( 934 | result.tree, 935 | Decision::Switch( 936 | input, 937 | vec![ 938 | Case::new(Constructor::False, Vec::new(), success(3)), 939 | Case::new(Constructor::True, Vec::new(), success(1)), 940 | ], 941 | None 942 | ) 943 | ); 944 | assert_eq!(result.diagnostics.reachable, vec![3, 1]); 945 | } 946 | 947 | #[test] 948 | fn test_compile_redundant_int() { 949 | let mut compiler = Compiler::new(); 950 | let typ = new_type(&mut compiler, Type::Int); 951 | let input = compiler.new_variable(typ); 952 | let result = compile( 953 | compiler, 954 | input, 955 | vec![ 956 | (int(1), rhs(1)), 957 | (int(1), rhs(2)), 958 | (int(2), rhs(3)), 959 | (bind("a"), rhs(4)), 960 | ], 961 | ); 962 | 963 | assert_eq!( 964 | result.tree, 965 | Decision::Switch( 966 | input, 967 | vec![ 968 | Case::new(Constructor::Int(1), Vec::new(), success(1)), 969 | Case::new(Constructor::Int(2), Vec::new(), success(3)), 970 | ], 971 | Some(Box::new(success_with_bindings(vec![("a", input)], 4))) 972 | ) 973 | ); 974 | assert_eq!(result.diagnostics.reachable, vec![1, 3, 4]); 975 | } 976 | 977 | #[test] 978 | fn test_compile_variable_pattern() { 979 | let mut compiler = Compiler::new(); 980 | let typ = new_type(&mut compiler, Type::Boolean); 981 | let input = compiler.new_variable(typ); 982 | let result = 983 | compile(compiler, input, vec![(tt(), rhs(1)), (bind("a"), rhs(2))]); 984 | 985 | assert_eq!( 986 | result.tree, 987 | Decision::Switch( 988 | input, 989 | vec![ 990 | Case::new( 991 | Constructor::False, 992 | Vec::new(), 993 | success_with_bindings(vec![("a", input)], 2) 994 | ), 995 | Case::new(Constructor::True, Vec::new(), success(1)), 996 | ], 997 | None 998 | ) 999 | ); 1000 | } 1001 | 1002 | #[test] 1003 | fn test_compile_nonexhaustive_int_pattern() { 1004 | let mut compiler = Compiler::new(); 1005 | let int_type = new_type(&mut compiler, Type::Int); 1006 | let input = compiler.new_variable(int_type); 1007 | let result = 1008 | compile(compiler, input, vec![(int(4), rhs(1)), (int(5), rhs(2))]); 1009 | 1010 | assert_eq!( 1011 | result.tree, 1012 | Decision::Switch( 1013 | input, 1014 | vec![ 1015 | Case::new(Constructor::Int(4), Vec::new(), success(1)), 1016 | Case::new(Constructor::Int(5), Vec::new(), success(2)), 1017 | ], 1018 | Some(Box::new(failure())) 1019 | ) 1020 | ); 1021 | assert_eq!(result.missing_patterns(), vec!["_".to_string()]); 1022 | } 1023 | 1024 | #[test] 1025 | fn test_compile_exhaustive_int_pattern() { 1026 | let mut compiler = Compiler::new(); 1027 | let int_type = new_type(&mut compiler, Type::Int); 1028 | let input = compiler.new_variable(int_type); 1029 | let result = compile( 1030 | compiler, 1031 | input, 1032 | vec![(int(4), rhs(1)), (int(5), rhs(2)), (bind("a"), rhs(3))], 1033 | ); 1034 | 1035 | assert_eq!( 1036 | result.tree, 1037 | Decision::Switch( 1038 | input, 1039 | vec![ 1040 | Case::new(Constructor::Int(4), Vec::new(), success(1)), 1041 | Case::new(Constructor::Int(5), Vec::new(), success(2)), 1042 | ], 1043 | Some(Box::new(success_with_bindings(vec![("a", input)], 3))) 1044 | ) 1045 | ); 1046 | } 1047 | 1048 | #[test] 1049 | fn test_compile_unreachable_int_pattern() { 1050 | let mut compiler = Compiler::new(); 1051 | let int_type = new_type(&mut compiler, Type::Int); 1052 | let input = compiler.new_variable(int_type); 1053 | let result = compile( 1054 | compiler, 1055 | input, 1056 | vec![(int(4), rhs(1)), (bind("a"), rhs(3)), (int(5), rhs(2))], 1057 | ); 1058 | 1059 | assert_eq!( 1060 | result.tree, 1061 | Decision::Switch( 1062 | input, 1063 | vec![ 1064 | Case::new(Constructor::Int(4), Vec::new(), success(1)), 1065 | Case::new( 1066 | Constructor::Int(5), 1067 | Vec::new(), 1068 | success_with_bindings(vec![("a", input)], 3) 1069 | ), 1070 | ], 1071 | Some(Box::new(success_with_bindings(vec![("a", input)], 3))) 1072 | ) 1073 | ); 1074 | assert_eq!(result.diagnostics.reachable, vec![1, 3, 3]); 1075 | } 1076 | 1077 | #[test] 1078 | fn test_compile_nonexhaustive_nested_int_pattern() { 1079 | let mut compiler = Compiler::new(); 1080 | let int_type = new_type(&mut compiler, Type::Int); 1081 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type)); 1082 | let input = compiler.new_variable(tup_type); 1083 | let result = compile( 1084 | compiler, 1085 | input, 1086 | vec![(pair(int_type, int_type, int(4), bind("a")), rhs(1))], 1087 | ); 1088 | 1089 | assert_eq!( 1090 | result.tree, 1091 | Decision::Switch( 1092 | input, 1093 | vec![Case::new( 1094 | Constructor::Pair(int_type, int_type), 1095 | vec![var(1, int_type), var(2, int_type),], 1096 | Decision::Switch( 1097 | var(1, int_type), 1098 | vec![Case::new( 1099 | Constructor::Int(4), 1100 | Vec::new(), 1101 | success_with_bindings( 1102 | vec![("a", var(2, int_type))], 1103 | 1 1104 | ) 1105 | )], 1106 | Some(Box::new(failure())) 1107 | ) 1108 | )], 1109 | None 1110 | ) 1111 | ); 1112 | assert_eq!(result.missing_patterns(), vec!["(_, _)".to_string()]); 1113 | } 1114 | 1115 | #[test] 1116 | fn test_compile_exhaustive_nested_int_pattern() { 1117 | let mut compiler = Compiler::new(); 1118 | let int_type = new_type(&mut compiler, Type::Int); 1119 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type)); 1120 | let input = compiler.new_variable(tup_type); 1121 | let result = compile( 1122 | compiler, 1123 | input, 1124 | vec![ 1125 | (pair(int_type, int_type, int(4), int(5)), rhs(1)), 1126 | (pair(int_type, int_type, bind("a"), bind("b")), rhs(2)), 1127 | ], 1128 | ); 1129 | 1130 | assert_eq!( 1131 | result.tree, 1132 | Decision::Switch( 1133 | input, 1134 | vec![Case::new( 1135 | Constructor::Pair(int_type, int_type), 1136 | vec![var(1, int_type), var(2, int_type)], 1137 | Decision::Switch( 1138 | var(2, int_type), 1139 | vec![Case::new( 1140 | Constructor::Int(5), 1141 | Vec::new(), 1142 | Decision::Switch( 1143 | var(1, int_type), 1144 | vec![Case::new( 1145 | Constructor::Int(4), 1146 | Vec::new(), 1147 | success(1) 1148 | )], 1149 | Some(Box::new(success_with_bindings( 1150 | vec![ 1151 | ("a", var(1, int_type)), 1152 | ("b", var(2, int_type)) 1153 | ], 1154 | 2 1155 | ))) 1156 | ) 1157 | )], 1158 | Some(Box::new(success_with_bindings( 1159 | vec![ 1160 | ("a", var(1, int_type)), 1161 | ("b", var(2, int_type)) 1162 | ], 1163 | 2 1164 | ))) 1165 | ) 1166 | )], 1167 | None 1168 | ) 1169 | ); 1170 | } 1171 | 1172 | #[test] 1173 | fn test_compile_nonexhaustive_option_type() { 1174 | let mut compiler = Compiler::new(); 1175 | let int_type = new_type(&mut compiler, Type::Int); 1176 | let option_type = new_type( 1177 | &mut compiler, 1178 | Type::Enum(vec![ 1179 | ("Some".to_string(), vec![int_type]), 1180 | ("None".to_string(), Vec::new()), 1181 | ]), 1182 | ); 1183 | let input = compiler.new_variable(option_type); 1184 | let result = compile( 1185 | compiler, 1186 | input, 1187 | vec![(variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1))], 1188 | ); 1189 | 1190 | assert_eq!( 1191 | result.tree, 1192 | Decision::Switch( 1193 | input, 1194 | vec![ 1195 | Case::new( 1196 | Constructor::Variant(option_type, 0), 1197 | vec![var(1, int_type)], 1198 | Decision::Switch( 1199 | var(1, int_type), 1200 | vec![Case::new( 1201 | Constructor::Int(4), 1202 | Vec::new(), 1203 | success(1) 1204 | )], 1205 | Some(Box::new(failure())) 1206 | ) 1207 | ), 1208 | Case::new( 1209 | Constructor::Variant(option_type, 1), 1210 | Vec::new(), 1211 | failure() 1212 | ) 1213 | ], 1214 | None, 1215 | ) 1216 | ); 1217 | assert_eq!( 1218 | result.missing_patterns(), 1219 | vec!["None".to_string(), "Some(_)".to_string()] 1220 | ); 1221 | } 1222 | 1223 | #[test] 1224 | fn test_compile_nonexhaustive_option_type_with_multiple_arguments() { 1225 | let mut compiler = Compiler::new(); 1226 | let int_type = new_type(&mut compiler, Type::Int); 1227 | let option_type = new_type( 1228 | &mut compiler, 1229 | Type::Enum(vec![ 1230 | ("Some".to_string(), vec![int_type, int_type]), 1231 | ("None".to_string(), Vec::new()), 1232 | ]), 1233 | ); 1234 | let input = compiler.new_variable(option_type); 1235 | let result = compile( 1236 | compiler, 1237 | input, 1238 | vec![( 1239 | variant(option_type, 0, vec![Pattern::Int(4), Pattern::Int(5)]), 1240 | rhs(1), 1241 | )], 1242 | ); 1243 | 1244 | assert_eq!( 1245 | result.tree, 1246 | Decision::Switch( 1247 | input, 1248 | vec![ 1249 | Case::new( 1250 | Constructor::Variant(option_type, 0), 1251 | vec![var(1, int_type), var(2, int_type)], 1252 | Decision::Switch( 1253 | var(2, int_type), 1254 | vec![Case::new( 1255 | Constructor::Int(5), 1256 | Vec::new(), 1257 | Decision::Switch( 1258 | var(1, int_type), 1259 | vec![Case::new( 1260 | Constructor::Int(4), 1261 | Vec::new(), 1262 | success(1) 1263 | )], 1264 | Some(Box::new(failure())) 1265 | ) 1266 | )], 1267 | Some(Box::new(failure())) 1268 | ) 1269 | ), 1270 | Case::new( 1271 | Constructor::Variant(option_type, 1), 1272 | Vec::new(), 1273 | failure() 1274 | ) 1275 | ], 1276 | None 1277 | ) 1278 | ); 1279 | assert_eq!( 1280 | result.missing_patterns(), 1281 | vec!["None".to_string(), "Some(_, _)".to_string(),] 1282 | ); 1283 | } 1284 | 1285 | #[test] 1286 | fn test_compile_exhaustive_option_type() { 1287 | let mut compiler = Compiler::new(); 1288 | let int_type = new_type(&mut compiler, Type::Int); 1289 | let option_type = new_type( 1290 | &mut compiler, 1291 | Type::Enum(vec![ 1292 | ("Some".to_string(), vec![int_type]), 1293 | ("None".to_string(), Vec::new()), 1294 | ]), 1295 | ); 1296 | let input = compiler.new_variable(option_type); 1297 | let result = compile( 1298 | compiler, 1299 | input, 1300 | vec![ 1301 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)), 1302 | (variant(option_type, 0, vec![bind("a")]), rhs(2)), 1303 | (variant(option_type, 1, Vec::new()), rhs(3)), 1304 | ], 1305 | ); 1306 | 1307 | assert_eq!( 1308 | result.tree, 1309 | Decision::Switch( 1310 | input, 1311 | vec![ 1312 | Case::new( 1313 | Constructor::Variant(option_type, 0), 1314 | vec![var(1, int_type)], 1315 | Decision::Switch( 1316 | var(1, int_type), 1317 | vec![Case::new( 1318 | Constructor::Int(4), 1319 | Vec::new(), 1320 | success(1) 1321 | )], 1322 | Some(Box::new(success_with_bindings( 1323 | vec![("a", var(1, int_type))], 1324 | 2 1325 | ))) 1326 | ) 1327 | ), 1328 | Case::new( 1329 | Constructor::Variant(option_type, 1), 1330 | Vec::new(), 1331 | success(3) 1332 | ) 1333 | ], 1334 | None 1335 | ) 1336 | ); 1337 | } 1338 | 1339 | #[test] 1340 | fn test_compile_redundant_option_type_with_bool() { 1341 | let mut compiler = Compiler::new(); 1342 | let bool_type = new_type(&mut compiler, Type::Boolean); 1343 | let option_type = new_type( 1344 | &mut compiler, 1345 | Type::Enum(vec![ 1346 | ("Some".to_string(), vec![bool_type]), 1347 | ("None".to_string(), Vec::new()), 1348 | ]), 1349 | ); 1350 | let input = compiler.new_variable(option_type); 1351 | let result = compile( 1352 | compiler, 1353 | input, 1354 | vec![ 1355 | (variant(option_type, 0, vec![tt()]), rhs(1)), 1356 | (variant(option_type, 0, vec![tt()]), rhs(10)), 1357 | (variant(option_type, 0, vec![bind("a")]), rhs(2)), 1358 | (variant(option_type, 1, Vec::new()), rhs(3)), 1359 | ], 1360 | ); 1361 | 1362 | assert_eq!( 1363 | result.tree, 1364 | Decision::Switch( 1365 | input, 1366 | vec![ 1367 | Case::new( 1368 | Constructor::Variant(option_type, 0), 1369 | vec![var(1, bool_type)], 1370 | Decision::Switch( 1371 | var(1, bool_type), 1372 | vec![ 1373 | Case::new( 1374 | Constructor::False, 1375 | Vec::new(), 1376 | success_with_bindings( 1377 | vec![("a", var(1, bool_type))], 1378 | 2 1379 | ) 1380 | ), 1381 | Case::new( 1382 | Constructor::True, 1383 | Vec::new(), 1384 | success(1) 1385 | ) 1386 | ], 1387 | None 1388 | ) 1389 | ), 1390 | Case::new( 1391 | Constructor::Variant(option_type, 1), 1392 | Vec::new(), 1393 | success(3) 1394 | ) 1395 | ], 1396 | None 1397 | ) 1398 | ); 1399 | 1400 | assert_eq!(result.diagnostics.reachable, vec![2, 1, 3]); 1401 | } 1402 | 1403 | #[test] 1404 | fn test_compile_redundant_option_type_with_int() { 1405 | let mut compiler = Compiler::new(); 1406 | let int_type = new_type(&mut compiler, Type::Int); 1407 | let option_type = new_type( 1408 | &mut compiler, 1409 | Type::Enum(vec![ 1410 | ("Some".to_string(), vec![int_type]), 1411 | ("None".to_string(), Vec::new()), 1412 | ]), 1413 | ); 1414 | let input = compiler.new_variable(option_type); 1415 | let result = compile( 1416 | compiler, 1417 | input, 1418 | vec![ 1419 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)), 1420 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(10)), 1421 | (variant(option_type, 0, vec![bind("a")]), rhs(2)), 1422 | (variant(option_type, 1, Vec::new()), rhs(3)), 1423 | ], 1424 | ); 1425 | 1426 | assert_eq!( 1427 | result.tree, 1428 | Decision::Switch( 1429 | input, 1430 | vec![ 1431 | Case::new( 1432 | Constructor::Variant(option_type, 0), 1433 | vec![var(1, int_type)], 1434 | Decision::Switch( 1435 | var(1, int_type), 1436 | vec![Case::new( 1437 | Constructor::Int(4), 1438 | Vec::new(), 1439 | success(1) 1440 | ),], 1441 | Some(Box::new(success_with_bindings( 1442 | vec![("a", var(1, int_type))], 1443 | 2 1444 | ))) 1445 | ) 1446 | ), 1447 | Case::new( 1448 | Constructor::Variant(option_type, 1), 1449 | Vec::new(), 1450 | success(3) 1451 | ) 1452 | ], 1453 | None 1454 | ) 1455 | ); 1456 | 1457 | assert_eq!(result.diagnostics.reachable, vec![1, 2, 3]); 1458 | } 1459 | 1460 | #[test] 1461 | fn test_compile_exhaustive_option_type_with_binding() { 1462 | let mut compiler = Compiler::new(); 1463 | let int_type = new_type(&mut compiler, Type::Int); 1464 | let option_type = new_type( 1465 | &mut compiler, 1466 | Type::Enum(vec![ 1467 | ("Some".to_string(), vec![int_type]), 1468 | ("None".to_string(), Vec::new()), 1469 | ]), 1470 | ); 1471 | let input = compiler.new_variable(option_type); 1472 | let result = compile( 1473 | compiler, 1474 | input, 1475 | vec![ 1476 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)), 1477 | (bind("a"), rhs(2)), 1478 | ], 1479 | ); 1480 | 1481 | assert_eq!( 1482 | result.tree, 1483 | Decision::Switch( 1484 | input, 1485 | vec![ 1486 | Case::new( 1487 | Constructor::Variant(option_type, 0), 1488 | vec![var(1, int_type)], 1489 | Decision::Switch( 1490 | var(1, int_type), 1491 | vec![Case::new( 1492 | Constructor::Int(4), 1493 | Vec::new(), 1494 | success(1) 1495 | )], 1496 | Some(Box::new(success_with_bindings( 1497 | vec![("a", input)], 1498 | 2 1499 | ))) 1500 | ) 1501 | ), 1502 | Case::new( 1503 | Constructor::Variant(option_type, 1), 1504 | Vec::new(), 1505 | success_with_bindings(vec![("a", input)], 2) 1506 | ) 1507 | ], 1508 | None, 1509 | ) 1510 | ); 1511 | } 1512 | 1513 | #[test] 1514 | fn test_compile_nonexhaustive_pair_in_option_pattern() { 1515 | let mut compiler = Compiler::new(); 1516 | let int_type = new_type(&mut compiler, Type::Int); 1517 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type)); 1518 | let option_type = new_type( 1519 | &mut compiler, 1520 | Type::Enum(vec![ 1521 | ("Some".to_string(), vec![tup_type]), 1522 | ("None".to_string(), Vec::new()), 1523 | ]), 1524 | ); 1525 | let input = compiler.new_variable(option_type); 1526 | let result = compile( 1527 | compiler, 1528 | input, 1529 | vec![( 1530 | variant( 1531 | option_type, 1532 | 0, 1533 | vec![pair(int_type, int_type, int(4), bind("a"))], 1534 | ), 1535 | rhs(1), 1536 | )], 1537 | ); 1538 | 1539 | assert_eq!( 1540 | result.tree, 1541 | Decision::Switch( 1542 | input, 1543 | vec![ 1544 | Case::new( 1545 | Constructor::Variant(option_type, 0), 1546 | vec![var(1, tup_type)], 1547 | Decision::Switch( 1548 | var(1, tup_type), 1549 | vec![Case::new( 1550 | Constructor::Pair(int_type, int_type), 1551 | vec![var(2, int_type), var(3, int_type),], 1552 | Decision::Switch( 1553 | var(2, int_type), 1554 | vec![Case::new( 1555 | Constructor::Int(4), 1556 | Vec::new(), 1557 | success_with_bindings( 1558 | vec![("a", var(3, int_type))], 1559 | 1 1560 | ) 1561 | )], 1562 | Some(Box::new(failure())) 1563 | ) 1564 | )], 1565 | None, 1566 | ) 1567 | ), 1568 | Case::new( 1569 | Constructor::Variant(option_type, 1), 1570 | Vec::new(), 1571 | failure() 1572 | ) 1573 | ], 1574 | None 1575 | ) 1576 | ); 1577 | assert_eq!( 1578 | result.missing_patterns(), 1579 | vec!["None".to_string(), "Some((_, _))".to_string()] 1580 | ); 1581 | } 1582 | 1583 | #[test] 1584 | fn test_compile_or_bool_pattern() { 1585 | let mut compiler = Compiler::new(); 1586 | let bool_type = new_type(&mut compiler, Type::Boolean); 1587 | let input = compiler.new_variable(bool_type); 1588 | let result = compile( 1589 | compiler, 1590 | input, 1591 | vec![(Pattern::Or(vec![tt(), ff()]), rhs(1))], 1592 | ); 1593 | 1594 | assert_eq!( 1595 | result.tree, 1596 | Decision::Switch( 1597 | input, 1598 | vec![ 1599 | Case::new(Constructor::False, Vec::new(), success(1)), 1600 | Case::new(Constructor::True, Vec::new(), success(1)), 1601 | ], 1602 | None 1603 | ) 1604 | ); 1605 | } 1606 | 1607 | #[test] 1608 | fn test_compile_or_int_pattern() { 1609 | let mut compiler = Compiler::new(); 1610 | let int_type = new_type(&mut compiler, Type::Int); 1611 | let input = compiler.new_variable(int_type); 1612 | let result = compile( 1613 | compiler, 1614 | input, 1615 | vec![(Pattern::Or(vec![int(4), int(5)]), rhs(1))], 1616 | ); 1617 | 1618 | assert_eq!( 1619 | result.tree, 1620 | Decision::Switch( 1621 | input, 1622 | vec![ 1623 | Case::new(Constructor::Int(4), Vec::new(), success(1)), 1624 | Case::new(Constructor::Int(5), Vec::new(), success(1)), 1625 | ], 1626 | Some(Box::new(failure())) 1627 | ) 1628 | ); 1629 | } 1630 | 1631 | #[test] 1632 | fn test_range_pattern() { 1633 | let mut compiler = Compiler::new(); 1634 | let int_type = new_type(&mut compiler, Type::Int); 1635 | let input = compiler.new_variable(int_type); 1636 | let result = 1637 | compile(compiler, input, vec![(Pattern::Range(1, 10), rhs(1))]); 1638 | 1639 | assert_eq!( 1640 | result.tree, 1641 | Decision::Switch( 1642 | input, 1643 | vec![Case::new( 1644 | Constructor::Range(1, 10), 1645 | Vec::new(), 1646 | success(1) 1647 | )], 1648 | Some(Box::new(failure())) 1649 | ) 1650 | ); 1651 | } 1652 | 1653 | #[test] 1654 | fn test_nonexhaustive_guard() { 1655 | let mut compiler = Compiler::new(); 1656 | let int_type = new_type(&mut compiler, Type::Int); 1657 | let input = compiler.new_variable(int_type); 1658 | 1659 | let result = compiler.compile(vec![Row::new( 1660 | vec![Column::new(input, int(4))], 1661 | Some(42), 1662 | rhs(1), 1663 | )]); 1664 | 1665 | assert_eq!( 1666 | result.tree, 1667 | Decision::Switch( 1668 | input, 1669 | vec![Case::new( 1670 | Constructor::Int(4), 1671 | Vec::new(), 1672 | Decision::Guard(42, rhs(1), Box::new(failure())) 1673 | )], 1674 | Some(Box::new(failure())) 1675 | ) 1676 | ); 1677 | 1678 | assert_eq!(result.missing_patterns(), vec!["_".to_string()]); 1679 | } 1680 | 1681 | #[test] 1682 | fn test_nonexhaustive_option_with_two_rows_and_guard() { 1683 | let mut compiler = Compiler::new(); 1684 | let int_type = new_type(&mut compiler, Type::Int); 1685 | let option_type = new_type( 1686 | &mut compiler, 1687 | Type::Enum(vec![ 1688 | ("Some".to_string(), vec![int_type]), 1689 | ("None".to_string(), Vec::new()), 1690 | ]), 1691 | ); 1692 | let input = compiler.new_variable(option_type); 1693 | let result = compiler.compile(vec![ 1694 | Row::new( 1695 | vec![Column::new(input, variant(option_type, 0, vec![int(4)]))], 1696 | Some(42), 1697 | rhs(1), 1698 | ), 1699 | Row::new( 1700 | vec![Column::new( 1701 | input, 1702 | variant(option_type, 0, vec![bind("a")]), 1703 | )], 1704 | None, 1705 | rhs(2), 1706 | ), 1707 | ]); 1708 | 1709 | assert_eq!( 1710 | result.tree, 1711 | Decision::Switch( 1712 | input, 1713 | vec![ 1714 | Case::new( 1715 | Constructor::Variant(option_type, 0), 1716 | vec![var(1, int_type)], 1717 | Decision::Switch( 1718 | var(1, int_type), 1719 | vec![Case::new( 1720 | Constructor::Int(4), 1721 | Vec::new(), 1722 | Decision::Guard( 1723 | 42, 1724 | rhs(1), 1725 | Box::new(success_with_bindings( 1726 | vec![("a", var(1, int_type))], 1727 | 2 1728 | )), 1729 | ) 1730 | )], 1731 | Some(Box::new(success_with_bindings( 1732 | vec![("a", var(1, int_type))], 1733 | 2 1734 | ))) 1735 | ), 1736 | ), 1737 | Case::new( 1738 | Constructor::Variant(option_type, 1), 1739 | Vec::new(), 1740 | failure() 1741 | ) 1742 | ], 1743 | None 1744 | ) 1745 | ); 1746 | 1747 | assert_eq!(result.missing_patterns(), vec!["None".to_string()]); 1748 | } 1749 | 1750 | #[test] 1751 | fn test_exhaustive_guard() { 1752 | let mut compiler = Compiler::new(); 1753 | let int_type = new_type(&mut compiler, Type::Int); 1754 | let input = compiler.new_variable(int_type); 1755 | let result = compiler.compile(vec![ 1756 | Row::new(vec![Column::new(input, int(4))], Some(42), rhs(1)), 1757 | Row::new(vec![Column::new(input, bind("a"))], None, rhs(2)), 1758 | ]); 1759 | 1760 | assert_eq!( 1761 | result.tree, 1762 | Decision::Switch( 1763 | input, 1764 | vec![Case::new( 1765 | Constructor::Int(4), 1766 | Vec::new(), 1767 | Decision::Guard( 1768 | 42, 1769 | rhs(1), 1770 | Box::new(success_with_bindings(vec![("a", input)], 2)) 1771 | ) 1772 | )], 1773 | Some(Box::new(success_with_bindings(vec![("a", input)], 2))) 1774 | ) 1775 | ); 1776 | } 1777 | 1778 | #[test] 1779 | fn test_exhaustive_guard_with_bool() { 1780 | let mut compiler = Compiler::new(); 1781 | let bool_type = new_type(&mut compiler, Type::Boolean); 1782 | let input = compiler.new_variable(bool_type); 1783 | let result = compiler.compile(vec![ 1784 | Row::new(vec![Column::new(input, tt())], Some(42), rhs(1)), 1785 | Row::new(vec![Column::new(input, bind("a"))], None, rhs(2)), 1786 | ]); 1787 | 1788 | assert_eq!( 1789 | result.tree, 1790 | Decision::Switch( 1791 | input, 1792 | vec![ 1793 | Case::new( 1794 | Constructor::False, 1795 | Vec::new(), 1796 | success_with_bindings(vec![("a", input)], 2) 1797 | ), 1798 | Case::new( 1799 | Constructor::True, 1800 | Vec::new(), 1801 | Decision::Guard( 1802 | 42, 1803 | rhs(1), 1804 | Box::new(success_with_bindings( 1805 | vec![("a", input)], 1806 | 2 1807 | )) 1808 | ) 1809 | ) 1810 | ], 1811 | None 1812 | ) 1813 | ); 1814 | } 1815 | 1816 | #[test] 1817 | fn test_exhaustive_guard_with_int() { 1818 | let mut compiler = Compiler::new(); 1819 | let int_type = new_type(&mut compiler, Type::Int); 1820 | let input = compiler.new_variable(int_type); 1821 | let result = compiler.compile(vec![ 1822 | Row::new(vec![Column::new(input, int(1))], Some(42), rhs(1)), 1823 | Row::new(vec![Column::new(input, int(2))], None, rhs(2)), 1824 | Row::new(vec![Column::new(input, bind("b"))], None, rhs(3)), 1825 | ]); 1826 | 1827 | assert_eq!( 1828 | result.tree, 1829 | Decision::Switch( 1830 | input, 1831 | vec![ 1832 | Case::new( 1833 | Constructor::Int(1), 1834 | Vec::new(), 1835 | Decision::Guard( 1836 | 42, 1837 | rhs(1), 1838 | Box::new(success_with_bindings( 1839 | vec![("b", input)], 1840 | 3 1841 | )) 1842 | ) 1843 | ), 1844 | Case::new(Constructor::Int(2), Vec::new(), success(2)) 1845 | ], 1846 | Some(Box::new(success_with_bindings(vec![("b", input)], 3))) 1847 | ) 1848 | ); 1849 | } 1850 | 1851 | #[test] 1852 | fn test_exhaustive_guard_with_same_int() { 1853 | let mut compiler = Compiler::new(); 1854 | let int_type = new_type(&mut compiler, Type::Int); 1855 | let input = compiler.new_variable(int_type); 1856 | let result = compiler.compile(vec![ 1857 | Row::new(vec![Column::new(input, int(1))], Some(10), rhs(1)), 1858 | Row::new(vec![Column::new(input, int(1))], Some(20), rhs(2)), 1859 | Row::new(vec![Column::new(input, int(1))], None, rhs(3)), 1860 | Row::new(vec![Column::new(input, bind("b"))], None, rhs(4)), 1861 | ]); 1862 | 1863 | assert_eq!( 1864 | result.tree, 1865 | Decision::Switch( 1866 | input, 1867 | vec![Case::new( 1868 | Constructor::Int(1), 1869 | Vec::new(), 1870 | Decision::Guard( 1871 | 10, 1872 | rhs(1), 1873 | Box::new(Decision::Guard( 1874 | 20, 1875 | rhs(2), 1876 | Box::new(success(3)) 1877 | )) 1878 | ) 1879 | )], 1880 | Some(Box::new(success_with_bindings(vec![("b", input)], 4))) 1881 | ) 1882 | ); 1883 | } 1884 | 1885 | #[test] 1886 | fn test_exhaustive_option_with_guard() { 1887 | let mut compiler = Compiler::new(); 1888 | let int_type = new_type(&mut compiler, Type::Int); 1889 | let option_type = new_type( 1890 | &mut compiler, 1891 | Type::Enum(vec![ 1892 | ("Some".to_string(), vec![int_type]), 1893 | ("None".to_string(), Vec::new()), 1894 | ]), 1895 | ); 1896 | let input = compiler.new_variable(option_type); 1897 | let result = compiler.compile(vec![ 1898 | Row::new( 1899 | vec![Column::new(input, variant(option_type, 1, Vec::new()))], 1900 | None, 1901 | rhs(1), 1902 | ), 1903 | Row::new( 1904 | vec![Column::new( 1905 | input, 1906 | variant(option_type, 0, vec![bind("a")]), 1907 | )], 1908 | Some(42), 1909 | rhs(2), 1910 | ), 1911 | Row::new( 1912 | vec![Column::new( 1913 | input, 1914 | variant(option_type, 0, vec![bind("a")]), 1915 | )], 1916 | None, 1917 | rhs(3), 1918 | ), 1919 | ]); 1920 | 1921 | assert_eq!( 1922 | result.tree, 1923 | Decision::Switch( 1924 | input, 1925 | vec![ 1926 | Case::new( 1927 | Constructor::Variant(option_type, 0), 1928 | vec![var(1, int_type)], 1929 | Decision::Guard( 1930 | 42, 1931 | Body { 1932 | bindings: vec![( 1933 | "a".to_string(), 1934 | var(1, int_type) 1935 | )], 1936 | value: 2 1937 | }, 1938 | Box::new(success_with_bindings( 1939 | vec![("a", var(1, int_type))], 1940 | 3 1941 | )) 1942 | ) 1943 | ), 1944 | Case::new( 1945 | Constructor::Variant(option_type, 1), 1946 | Vec::new(), 1947 | success(1) 1948 | ), 1949 | ], 1950 | None 1951 | ) 1952 | ); 1953 | } 1954 | 1955 | #[test] 1956 | fn test_compile_exhaustive_nested_int_with_guard() { 1957 | let mut compiler = Compiler::new(); 1958 | let int_type = new_type(&mut compiler, Type::Int); 1959 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type)); 1960 | let input = compiler.new_variable(tup_type); 1961 | let result = compiler.compile(vec![ 1962 | Row::new( 1963 | vec![Column::new( 1964 | input, 1965 | pair(int_type, int_type, int(4), int(5)), 1966 | )], 1967 | Some(42), 1968 | rhs(1), 1969 | ), 1970 | Row::new( 1971 | vec![Column::new( 1972 | input, 1973 | pair(int_type, int_type, int(4), int(5)), 1974 | )], 1975 | None, 1976 | rhs(2), 1977 | ), 1978 | Row::new( 1979 | vec![Column::new( 1980 | input, 1981 | pair(int_type, int_type, bind("a"), bind("b")), 1982 | )], 1983 | None, 1984 | rhs(3), 1985 | ), 1986 | ]); 1987 | 1988 | assert_eq!( 1989 | result.tree, 1990 | Decision::Switch( 1991 | input, 1992 | vec![Case::new( 1993 | Constructor::Pair(int_type, int_type), 1994 | vec![var(1, int_type), var(2, int_type)], 1995 | Decision::Switch( 1996 | var(2, int_type), 1997 | vec![Case::new( 1998 | Constructor::Int(5), 1999 | Vec::new(), 2000 | Decision::Switch( 2001 | var(1, int_type), 2002 | vec![Case::new( 2003 | Constructor::Int(4), 2004 | Vec::new(), 2005 | Decision::Guard( 2006 | 42, 2007 | rhs(1), 2008 | Box::new(success(2)), 2009 | ) 2010 | )], 2011 | Some(Box::new(success_with_bindings( 2012 | vec![ 2013 | ("a", var(1, int_type)), 2014 | ("b", var(2, int_type)) 2015 | ], 2016 | 3 2017 | ))) 2018 | ) 2019 | )], 2020 | Some(Box::new(success_with_bindings( 2021 | vec![ 2022 | ("a", var(1, int_type)), 2023 | ("b", var(2, int_type)) 2024 | ], 2025 | 3 2026 | ))) 2027 | ) 2028 | )], 2029 | None 2030 | ) 2031 | ); 2032 | } 2033 | } 2034 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | use_try_shorthand = true 3 | reorder_imports = true 4 | edition = "2018" 5 | 6 | # The default setting results in too aggressive/clunky wrapping for a lot of 7 | # code. For example, this: 8 | # 9 | # if !foo.bar().baz() { 10 | # bar(); 11 | # } 12 | # 13 | # Would result in something like this: 14 | # 15 | # if !foo 16 | # .bar() 17 | # .baz() 18 | # { 19 | # bar(); 20 | # } 21 | # 22 | # Setting this to "Max" results in a more consistent and less infuriating 23 | # wrapping stype. 24 | use_small_heuristics = 'Max' 25 | -------------------------------------------------------------------------------- /sestoft1996/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "match" 7 | version = "1.0.0" 8 | -------------------------------------------------------------------------------- /sestoft1996/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sestoft1996" 3 | version = "1.0.0" 4 | authors = ["Yorick Peterse "] 5 | edition = "2021" 6 | 7 | [lib] 8 | doctest = false 9 | -------------------------------------------------------------------------------- /sestoft1996/README.md: -------------------------------------------------------------------------------- 1 | # ML Pattern match compilation and partial evaluation 2 | 3 | This directory contains an implementation of the pattern matching algorithm 4 | introduced in the paper "ML Pattern match compilation and partial evaluation" by 5 | Peter Sestoft, from 1996. 6 | 7 | ## A short rant about the paper 8 | 9 | The paper is a bit of a pain to read, and took me a solid week to understand. 10 | Part of this is because I'm not familiar with Standard ML, so I first had to 11 | learn that to some degree. The syntax can also be hard to grok when you have 12 | functions calling functions and passing those results as arguments directly, 13 | especially combined with operators (e.g is `foo bar :: baz` parsed as 14 | `foo(bar :: baz)` or `(foo bar) :: baz`?). 15 | 16 | It doesn't help that the paper makes references to the author's implementation, 17 | but the only two links regarding it are dead FTP links. I eventually found these 18 | implementations of this algorithm: 19 | 20 | - https://github.com/kfl/mosml/blob/f529b33bb891ff1df4aab198edad376f9ff64d28/src/compiler/Match.sml 21 | - https://github.com/rsdn/nemerle/blob/db4bc9078f1b6238da32df1519c1957e74b6834a/ncc/typing/DecisionTreeBuilder.n 22 | - https://github.com/rsdn/nitra/tree/master/Nitra/Nitra.Compiler/Generation/PatternMatching 23 | - https://github.com/melsman/mlkit/blob/237be62778985e76f912cefdc0bb21b22bed5bd4/src/Compiler/Lambda/CompileDec.sml#L510 24 | 25 | The Moscow ML implementation uses memoization and some extensions for the 26 | pattern matching logic. The Nemerle implementation is quite different and uses a 27 | more imperative/mutable approach. 28 | 29 | As to how the algorithm works: even now I don't quite understand why certain 30 | decisions were made, and the algorithm as a whole feels a bit crude. 31 | 32 | I could go on, but the summary is this: if you wish to understand the paper, I 33 | recommend reading through it while using my Rust code as a reference. It should 34 | be a bit easier to understand and translate to other languages, and it doesn't 35 | require a 20 year old language (though maybe it will if you're reading this 20 36 | years from now). 37 | 38 | ## Project structure 39 | 40 | There are two implementations of the algorithm: a raw version, and an idiomatic 41 | version. Neither version implements the memoization strategy as discussed in 42 | section 7.5, as this likely won't work well due to Rust's single ownership 43 | requirement. Both versions are extensively commented to better explain why 44 | certain decisions where made, what to keep in mind when reading the paper, etc. 45 | 46 | ### The raw version 47 | 48 | The raw version is more or less 1:1 translation of the SML code included in the 49 | paper. The code is terrible, relies on (poorly implemented) immutable lists 50 | (because the original algorithm requires immutable lists), and likely performs 51 | extremely poorly. I tried to keep this version as close to the paper as 52 | possible, only deviating where Rust simply required a different approach. 53 | 54 | Some differences from the paper: 55 | 56 | - Rust doesn't have built-in immutable lists, and the algorithm requires the use 57 | of immutable lists in a few places. Thus, we introduce a custom immutable 58 | linked list. 59 | - The paper assumes multiple ownership of values in a few places. This 60 | implementation instead clones values to work around that, as using a different 61 | approach requires a different implementation. 62 | - The `succeed'` and `fail'` functions are called `match_succeed` and 63 | `match_fail` respectively. Who the hell thought it was a good idea to allow 64 | quotes in symbol names? 65 | - When generating `Sel` nodes, the paper uses `i+1` to build the selector 66 | values. It's not clear why this is done (the paper makes no mention of it), 67 | and it seems unnecessary. As such we just use indexes starting at zero. 68 | - The paper implements various functions in an non-exhaustive manner, without 69 | any explanation as to why. My implementation uses exhaustive patterns where 70 | possible, and `unwrap()` in a few places where missing values (and thus 71 | panics) shouldn't occur in the absence of bugs (famous last words). 72 | 73 | ### The idiomatic version 74 | 75 | This implementation of the algorithm is closer to what you'd normally write in 76 | Rust. Some of the names used are still a bit confusing, but unfortunately I 77 | haven't been able to come up with better names. 78 | 79 | Unlike the raw implementation, this implementation doesn't rely on persistent 80 | lists. Instead, it uses mutable vectors that store values in reverse order. 81 | Storing them in this order means a pop() returns the head of the vector, instead 82 | of the tail. This makes retrieving the head cheap, as no values need to be 83 | shifted. 84 | 85 | Some function (e.g. `addneg` and `match_fail`) are inlined into their callers, 86 | as they are only called in one place. 87 | 88 | For traversing all the pattern matching rules we use a cursor, essentially 89 | turning the list into an iterator that you can rewind. This is needed because 90 | when building an `IfEq` node, both the true and false bodies need to start off 91 | with the same set of rules. Using a cursor allows us to do just that, but 92 | without cloning the rules. 93 | -------------------------------------------------------------------------------- /sestoft1996/src/idiomatic.rs: -------------------------------------------------------------------------------- 1 | /// An idiomatic Rust implementation of the pattern matching algorithm. 2 | use std::collections::HashSet; 3 | 4 | /// The result of a static match. 5 | #[derive(Debug)] 6 | enum Match { 7 | Yes, 8 | No, 9 | Maybe, 10 | } 11 | 12 | /// The description of terms already matched, corresponding to the `context` 13 | /// type in the paper. 14 | struct Context { 15 | values: Vec<(Constructor, Vec)>, 16 | } 17 | 18 | impl Context { 19 | fn new() -> Self { 20 | Self { values: Vec::new() } 21 | } 22 | 23 | fn push(&mut self, value: (Constructor, Vec)) { 24 | self.values.push(value); 25 | } 26 | 27 | fn pop(&mut self) -> Option<(Constructor, Vec)> { 28 | self.values.pop() 29 | } 30 | 31 | fn add_argument_to_last(&mut self, term: Term) { 32 | if let Some((_, args)) = self.values.last_mut() { 33 | args.push(term); 34 | } 35 | } 36 | 37 | fn reconstruct_term(&self, term: Term, work: &Work) -> Term { 38 | self.values.iter().zip(work.iter()).fold( 39 | term, 40 | |term, ((con, args), (_, _, dargs))| { 41 | let mut new_args: Vec<_> = dargs.clone(); 42 | 43 | new_args.push(term); 44 | new_args.extend(args.iter().rev().cloned()); 45 | Term::Pos(con.clone(), new_args) 46 | }, 47 | ) 48 | } 49 | } 50 | 51 | /// The work stack as used in the paper. 52 | /// 53 | /// The paper uses a list of triple lists, removing the need for some append 54 | /// operations. This is a bit annoying to work with in Rust (we have to unwrap() 55 | /// in some places), but again we're trying to stay as close to the paper as 56 | /// possible. 57 | /// 58 | /// We use a type alias here so we don't have to re-type this type name in the 59 | /// various places that it's used. 60 | type Work = Vec<(Vec, Vec, Vec)>; 61 | 62 | /// The type of the right-hand side of a case (i.e. the code to run). 63 | /// 64 | /// For the sake of simplicity we just use a String here. In a real compiler 65 | /// this would probably be an AST node or another sort of IR to run upon a 66 | /// match. 67 | pub type RHS = String; 68 | 69 | /// A type for storing diagnostics produced by the decision tree compiler. 70 | pub struct Diagnostics { 71 | /// The diagnostic messages produced. 72 | /// 73 | /// In a real compiler this would include more than just a message, such as 74 | /// the line and numbers. 75 | messages: Vec, 76 | 77 | /// The right-hand values (= the code you'd run upon a match) that have been 78 | /// processed. 79 | /// 80 | /// If a value isn't included in this set it means it and its pattern are 81 | /// redundant. 82 | /// 83 | /// In a real compiler you'd probably mark AST nodes directly. In our case 84 | /// the right-hand values are just simple strings, so we use a set instead. 85 | reachable: HashSet, 86 | } 87 | 88 | /// A type for compiling a list of rules into a decision tree. 89 | pub struct Compiler { 90 | /// The rules to compile into a decision tree. 91 | rules: Vec<(Pattern, RHS)>, 92 | 93 | /// The start of the first rule to compile. 94 | /// 95 | /// When generating IfEq nodes we need to generate two branches, both 96 | /// starting with the same set of rules. To avoid cloning we use a cursor, 97 | /// save it before processing one branch, then restore it for the other 98 | /// branch. 99 | rules_index: usize, 100 | 101 | diagnostics: Diagnostics, 102 | } 103 | 104 | impl Compiler { 105 | pub fn new(rules: Vec<(Pattern, RHS)>) -> Self { 106 | Self { 107 | rules, 108 | rules_index: 0, 109 | diagnostics: Diagnostics { 110 | messages: Vec::new(), 111 | reachable: HashSet::new(), 112 | }, 113 | } 114 | } 115 | 116 | pub fn compile(&mut self) -> Decision { 117 | self.fail(Term::bottom()) 118 | } 119 | 120 | fn fail(&mut self, term: Term) -> Decision { 121 | if let Some((pat, rhs)) = self.next_rule().cloned() { 122 | let ctx = Context::new(); 123 | let work = Vec::new(); 124 | 125 | self.match_pattern(pat, Access::Root, term, ctx, work, rhs) 126 | } else { 127 | self.diagnostics 128 | .messages 129 | .push(format!("Missing pattern: {}", term.error_string())); 130 | 131 | Decision::Failure 132 | } 133 | } 134 | 135 | fn succeed( 136 | &mut self, 137 | mut ctx: Context, 138 | mut work: Work, 139 | rhs: RHS, 140 | ) -> Decision { 141 | if let Some((mut pats, mut accs, mut terms)) = work.pop() { 142 | if let (Some(pat), Some(acc), Some(term)) = 143 | (pats.pop(), accs.pop(), terms.pop()) 144 | { 145 | work.push((pats, accs, terms)); 146 | self.match_pattern(pat, acc, term, ctx, work, rhs) 147 | } else { 148 | if let Some((con, mut args)) = ctx.pop() { 149 | args.reverse(); 150 | ctx.add_argument_to_last(Term::Pos(con, args)); 151 | } 152 | 153 | self.succeed(ctx, work, rhs) 154 | } 155 | } else { 156 | self.diagnostics.reachable.insert(rhs.clone()); 157 | Decision::Success(rhs) 158 | } 159 | } 160 | 161 | fn match_pattern( 162 | &mut self, 163 | pattern: Pattern, 164 | access: Access, 165 | term: Term, 166 | mut ctx: Context, 167 | work: Work, 168 | rhs: RHS, 169 | ) -> Decision { 170 | match pattern { 171 | Pattern::Variable(name) => { 172 | ctx.add_argument_to_last(term); 173 | Decision::Variable( 174 | access, 175 | name, 176 | Box::new(self.succeed(ctx, work, rhs)), 177 | ) 178 | } 179 | Pattern::Field(id, pat) => self.match_pattern( 180 | *pat, 181 | Access::Select(id, Box::new(access)), 182 | term, 183 | ctx, 184 | work, 185 | rhs, 186 | ), 187 | Pattern::Wildcard => { 188 | ctx.add_argument_to_last(term); 189 | self.succeed(ctx, work, rhs) 190 | } 191 | Pattern::Constructor(con, args) => match self 192 | .match_term(&con, &term) 193 | { 194 | Match::Yes => { 195 | self.matched(con, args, access, term, ctx, work, rhs) 196 | } 197 | Match::No => self.fail(ctx.reconstruct_term(term, &work)), 198 | Match::Maybe => { 199 | let false_term = 200 | ctx.reconstruct_term(term.clone().negated(&con), &work); 201 | let cursor = self.rules_index; 202 | let matched = self.matched( 203 | con.clone(), 204 | args, 205 | access.clone(), 206 | term, 207 | ctx, 208 | work, 209 | rhs, 210 | ); 211 | 212 | self.rules_index = cursor; 213 | 214 | Decision::IfEq( 215 | access, 216 | con, 217 | Box::new(matched), 218 | Box::new(self.fail(false_term)), 219 | ) 220 | } 221 | }, 222 | } 223 | } 224 | 225 | fn matched( 226 | &mut self, 227 | con: Constructor, 228 | args: Vec, 229 | obj: Access, 230 | term: Term, 231 | mut ctx: Context, 232 | mut work: Work, 233 | rhs: RHS, 234 | ) -> Decision { 235 | let access = (0..con.arity) 236 | .rev() 237 | .map(|i| Access::Select(i, Box::new(obj.clone()))) 238 | .collect(); 239 | 240 | let terms = match term { 241 | Term::Pos(_, dargs) => dargs, 242 | Term::Neg(_) => vec![Term::bottom(); con.arity], 243 | }; 244 | 245 | ctx.push((con, Vec::new())); 246 | work.push((args, access, terms)); 247 | self.succeed(ctx, work, rhs) 248 | } 249 | 250 | fn match_term(&mut self, con: &Constructor, term: &Term) -> Match { 251 | match term { 252 | Term::Pos(tcon, _) if con == tcon => Match::Yes, 253 | Term::Pos(_, _) => Match::No, 254 | Term::Neg(exl) if exl.contains(con) => Match::No, 255 | Term::Neg(exl) if con.span == (exl.len() + 1) => Match::Yes, 256 | Term::Neg(_) => Match::Maybe, 257 | } 258 | } 259 | 260 | fn next_rule(&mut self) -> Option<&(Pattern, RHS)> { 261 | if self.rules_index >= self.rules.len() { 262 | None 263 | } else { 264 | let val = self.rules.get(self.rules_index); 265 | 266 | self.rules_index += 1; 267 | 268 | val 269 | } 270 | } 271 | } 272 | 273 | /// A type constructor. 274 | /// 275 | /// For a boolean, a constructor would have the following values: 276 | /// 277 | /// - name: true or false 278 | /// - arity: 0, as booleans don't take arguments 279 | /// - span: 2, as there are only two constructors (true and false) 280 | /// 281 | /// In a real compiler you'd probably use pointers/IDs to your type data 282 | /// structures instead, but for the sake of keeping things simple we just use a 283 | /// struct that can be cloned. 284 | #[derive(Debug, Eq, PartialEq, Clone)] 285 | pub struct Constructor { 286 | name: String, 287 | 288 | // The number of arguments. 289 | arity: usize, 290 | 291 | // The total number of constructors of the owning type 292 | // 293 | // A span of 0 means the type has an infinite amount of constructors. 294 | span: usize, 295 | } 296 | 297 | /// A user provided pattern to match against an input value. 298 | /// 299 | /// We only provide two types of patterns: constructors, and variables/bindings. 300 | /// 301 | /// In a real compiler you'd probably be using AST nodes instead of dedicated 302 | /// pattern types, and include more cases for specific patterns (e.g. tuple and 303 | /// struct patterns). 304 | #[derive(Debug, Clone)] 305 | pub enum Pattern { 306 | Constructor(Constructor, Vec), 307 | Variable(String), 308 | Field(usize, Box), 309 | Wildcard, 310 | } 311 | 312 | #[derive(Debug, Clone, Eq, PartialEq)] 313 | pub enum Term { 314 | // `Cons` is the top-most constructor, and its components are described by 315 | // the Vec. 316 | // 317 | // The arguments are in reverse order, so the first argument is the last 318 | // value. 319 | Pos(Constructor, Vec), 320 | 321 | // Any term who's top-most constructor is _not_ any of the listed 322 | // constructors. 323 | // 324 | // For a Negative(S), the cardinality of S must be less than the span of 325 | // any constructor in S: 326 | // 327 | // cons.iter().all(|cons| cardinality(s) < span(cons)) 328 | // 329 | // Due to static typing, all constructors in S are of the same type, thus 330 | // have the same span. 331 | // 332 | // The constructors are in reverse order, so the first constructor is the 333 | // last value. 334 | Neg(Vec), 335 | } 336 | 337 | impl Term { 338 | fn bottom() -> Term { 339 | Term::Neg(Vec::new()) 340 | } 341 | 342 | fn negated(self, con: &Constructor) -> Term { 343 | match self { 344 | Term::Pos(_, _) => self, 345 | Term::Neg(mut nonset) => { 346 | nonset.push(con.clone()); 347 | Term::Neg(nonset) 348 | } 349 | } 350 | } 351 | } 352 | 353 | impl Term { 354 | /// Returns a string used to describe this term in an error message. 355 | fn error_string(&self) -> String { 356 | match self { 357 | Term::Pos(cons, args) => { 358 | if args.is_empty() { 359 | cons.name.clone() 360 | } else { 361 | format!( 362 | "{}({})", 363 | cons.name, 364 | args.iter() 365 | .rev() 366 | .map(|v| v.error_string()) 367 | .collect::>() 368 | .join(", ") 369 | ) 370 | } 371 | } 372 | Term::Neg(_) => "_".to_string(), 373 | } 374 | } 375 | } 376 | 377 | /// The `access` type in the paper. 378 | #[derive(Clone, Debug, Eq, PartialEq)] 379 | pub enum Access { 380 | Root, 381 | Select(usize, Box), 382 | } 383 | 384 | /// The `decision` type in the paper. 385 | #[derive(Debug, Eq, PartialEq, Clone)] 386 | pub enum Decision { 387 | /// A pattern didn't match. 388 | Failure, 389 | 390 | /// A pattern is matched and the right-hand value is to be returned. 391 | Success(RHS), 392 | 393 | /// Checks if a constructor matches the value at the given access path. 394 | IfEq(Access, Constructor, Box, Box), 395 | 396 | /// Checks if any of the given constructors match the value at the given 397 | /// access path. 398 | Switch(Access, Vec<(Constructor, Decision)>, Box), 399 | 400 | /// Bind a value to a variable, then continue matching the rest of the 401 | /// input. 402 | Variable(Access, String, Box), 403 | } 404 | 405 | impl Decision { 406 | /// Replaces a series of nested IfEq nodes for the same access object with a 407 | /// Switch node. 408 | pub fn replace_nested_if(self) -> Decision { 409 | match self { 410 | Decision::IfEq(root, con, ok, fail) => { 411 | let mut cases = vec![(con, *ok)]; 412 | let mut fallback = fail; 413 | 414 | loop { 415 | match *fallback { 416 | Decision::IfEq(acc, con, ok, fail) if root == acc => { 417 | fallback = fail; 418 | 419 | cases.push((con, *ok)); 420 | } 421 | _ => break, 422 | } 423 | } 424 | 425 | if cases.len() == 1 { 426 | let (con, ok) = cases.pop().unwrap(); 427 | 428 | Decision::IfEq(root, con, Box::new(ok), fallback) 429 | } else { 430 | Decision::Switch(root, cases, fallback) 431 | } 432 | } 433 | _ => self, 434 | } 435 | } 436 | } 437 | 438 | #[cfg(test)] 439 | mod tests { 440 | use super::*; 441 | 442 | fn con(name: &str, arity: usize, span: usize) -> Constructor { 443 | Constructor { name: name.to_string(), arity, span } 444 | } 445 | 446 | fn nil() -> Pattern { 447 | Pattern::Constructor(con("nil", 0, 1), Vec::new()) 448 | } 449 | 450 | fn tt_con() -> Constructor { 451 | con("true", 0, 2) 452 | } 453 | 454 | fn ff_con() -> Constructor { 455 | con("false", 0, 2) 456 | } 457 | 458 | fn tt() -> Pattern { 459 | Pattern::Constructor(tt_con(), Vec::new()) 460 | } 461 | 462 | fn ff() -> Pattern { 463 | Pattern::Constructor(ff_con(), Vec::new()) 464 | } 465 | 466 | fn pair(a: Pattern, b: Pattern) -> Pattern { 467 | Pattern::Constructor(con("pair", 2, 1), vec![b, a]) 468 | } 469 | 470 | fn var(name: &str) -> Pattern { 471 | Pattern::Variable(name.to_string()) 472 | } 473 | 474 | fn if_eq( 475 | acc: Access, 476 | con: Constructor, 477 | ok: Decision, 478 | fail: Decision, 479 | ) -> Decision { 480 | Decision::IfEq(acc, con, Box::new(ok), Box::new(fail)) 481 | } 482 | 483 | fn switch( 484 | acc: Access, 485 | cases: Vec<(Constructor, Decision)>, 486 | fallback: Decision, 487 | ) -> Decision { 488 | Decision::Switch(acc, cases, Box::new(fallback)) 489 | } 490 | 491 | fn bind(access: Access, name: &str, rest: Decision) -> Decision { 492 | Decision::Variable(access, name.to_string(), Box::new(rest)) 493 | } 494 | 495 | fn success(value: &str) -> Decision { 496 | Decision::Success(value.to_string()) 497 | } 498 | 499 | fn failure() -> Decision { 500 | Decision::Failure 501 | } 502 | 503 | fn rhs(value: &str) -> String { 504 | value.to_string() 505 | } 506 | 507 | fn obj() -> Access { 508 | Access::Root 509 | } 510 | 511 | fn sel(index: usize, acc: Access) -> Access { 512 | Access::Select(index, Box::new(acc)) 513 | } 514 | 515 | fn compile(rules: Vec<(Pattern, RHS)>) -> (Decision, Diagnostics) { 516 | let mut compiler = Compiler::new(rules); 517 | let tree = compiler.compile(); 518 | 519 | (tree, compiler.diagnostics) 520 | } 521 | 522 | #[test] 523 | fn test_term_description_error_string() { 524 | let term = Term::Pos( 525 | con("box", 2, 1), 526 | vec![ 527 | Term::Neg(vec![con("false", 0, 2)]), 528 | Term::Pos(con("true", 0, 2), Vec::new()), 529 | ], 530 | ); 531 | 532 | assert_eq!(term.error_string(), "box(true, _)"); 533 | } 534 | 535 | #[test] 536 | fn test_context_reconstruct_term() { 537 | let mut ctx = Context::new(); 538 | 539 | ctx.push(( 540 | con("baz", 0, 1), 541 | vec![ 542 | Term::Neg(vec![con("arg2", 0, 1)]), 543 | Term::Neg(vec![con("arg1", 0, 1)]), 544 | ], 545 | )); 546 | 547 | let work = vec![( 548 | Vec::new(), 549 | Vec::new(), 550 | vec![ 551 | Term::Neg(vec![con("work2", 0, 1)]), 552 | Term::Neg(vec![con("work1", 0, 1)]), 553 | ], 554 | )]; 555 | let dsc = Term::Neg(vec![con("bar", 0, 1)]); 556 | let new_dsc = ctx.reconstruct_term(dsc, &work); 557 | 558 | assert_eq!( 559 | new_dsc, 560 | Term::Pos( 561 | con("baz", 0, 1), 562 | vec![ 563 | Term::Neg(vec![con("work2", 0, 1)]), 564 | Term::Neg(vec![con("work1", 0, 1)]), 565 | Term::Neg(vec![con("bar", 0, 1)]), 566 | Term::Neg(vec![con("arg1", 0, 1)]), 567 | Term::Neg(vec![con("arg2", 0, 1)]), 568 | ] 569 | ) 570 | ); 571 | } 572 | 573 | #[test] 574 | fn test_context_add_argument_to_last() { 575 | let mut ctx = Context::new(); 576 | 577 | ctx.push(( 578 | con("baz", 0, 1), 579 | vec![ 580 | Term::Neg(vec![con("arg2", 0, 1)]), 581 | Term::Neg(vec![con("arg1", 0, 1)]), 582 | ], 583 | )); 584 | 585 | let term = Term::Neg(vec![con("bar", 0, 1)]); 586 | 587 | ctx.add_argument_to_last(term); 588 | 589 | assert_eq!( 590 | ctx.values, 591 | vec![( 592 | con("baz", 0, 1), 593 | vec![ 594 | Term::Neg(vec![con("arg2", 0, 1)]), 595 | Term::Neg(vec![con("arg1", 0, 1)]), 596 | Term::Neg(vec![con("bar", 0, 1)]), 597 | ] 598 | )] 599 | ); 600 | } 601 | 602 | #[test] 603 | fn test_match_always_succeeds() { 604 | let (result, _) = compile(vec![(nil(), rhs("true"))]); 605 | 606 | assert_eq!(result, success("true")); 607 | } 608 | 609 | #[test] 610 | fn test_match_always_fails() { 611 | let (result, _) = compile(Vec::new()); 612 | 613 | assert_eq!(result, failure()); 614 | } 615 | 616 | #[test] 617 | fn test_match_single_pattern() { 618 | let (result, _) = 619 | compile(vec![(tt(), rhs("true")), (ff(), rhs("false"))]); 620 | 621 | assert_eq!( 622 | result, 623 | if_eq(obj(), tt_con(), success("true"), success("false")) 624 | ); 625 | } 626 | 627 | #[test] 628 | fn test_match_var() { 629 | let (result, _) = compile(vec![(var("a"), rhs("true"))]); 630 | 631 | assert_eq!(result, bind(obj(), "a", success("true"))); 632 | } 633 | 634 | #[test] 635 | fn test_match_field() { 636 | let (result, _) = compile(vec![ 637 | (Pattern::Field(42, Box::new(tt())), rhs("foo")), 638 | (Pattern::Field(42, Box::new(var("a"))), rhs("bar")), 639 | ]); 640 | 641 | assert_eq!( 642 | result, 643 | if_eq( 644 | sel(42, obj()), 645 | tt_con(), 646 | success("foo"), 647 | bind(sel(42, obj()), "a", success("bar")) 648 | ) 649 | ); 650 | } 651 | 652 | #[test] 653 | fn test_match_wildcard() { 654 | let (result, _) = compile(vec![(Pattern::Wildcard, rhs("true"))]); 655 | 656 | assert_eq!(result, success("true")); 657 | } 658 | 659 | #[test] 660 | fn test_match_nested_var() { 661 | let (result, _) = compile(vec![(pair(var("a"), var("b")), rhs("foo"))]); 662 | 663 | assert_eq!( 664 | result, 665 | bind(sel(0, obj()), "a", bind(sel(1, obj()), "b", success("foo"))) 666 | ); 667 | } 668 | 669 | #[test] 670 | fn test_match_multiple_patterns() { 671 | let (result, diags) = compile(vec![ 672 | (tt(), rhs("true")), 673 | (ff(), rhs("false")), 674 | (tt(), rhs("redundant")), 675 | ]); 676 | 677 | // Redundant patterns are ignored on the decision tree. This is also how 678 | // you'd detect redundant patterns: you'd somehow mark every RHS when 679 | // you produce their Success nodes. Any RHS nodes that remain unmarked 680 | // are redundant. 681 | assert_eq!( 682 | result, 683 | if_eq(obj(), tt_con(), success("true"), success("false")) 684 | ); 685 | 686 | assert!(diags.reachable.contains(&"true".to_string())); 687 | assert!(diags.reachable.contains(&"false".to_string())); 688 | assert!(!diags.reachable.contains(&"redundant".to_string())); 689 | } 690 | 691 | #[test] 692 | fn test_nonexhaustive_match() { 693 | let (result, diags) = compile(vec![(tt(), rhs("true"))]); 694 | 695 | assert_eq!(result, if_eq(obj(), tt_con(), success("true"), failure())); 696 | assert_eq!(diags.messages, vec!["Missing pattern: _".to_string()]); 697 | } 698 | 699 | #[test] 700 | fn test_nonexhaustive_match_from_paper() { 701 | let green = Pattern::Constructor(con("green", 0, 3), Vec::new()); 702 | let (result, diags) = compile(vec![ 703 | (pair(tt(), green.clone()), rhs("111")), 704 | (pair(ff(), green.clone()), rhs("222")), 705 | ]); 706 | 707 | assert_eq!( 708 | result, 709 | if_eq( 710 | sel(0, obj()), 711 | tt_con(), 712 | if_eq( 713 | sel(1, obj()), 714 | con("green", 0, 3), 715 | success("111"), 716 | failure() 717 | ), 718 | if_eq( 719 | sel(1, obj()), 720 | con("green", 0, 3), 721 | success("222"), 722 | failure() 723 | ) 724 | ) 725 | ); 726 | 727 | assert_eq!( 728 | diags.messages, 729 | vec![ 730 | "Missing pattern: pair(true, _)".to_string(), 731 | "Missing pattern: pair(false, _)".to_string() 732 | ] 733 | ); 734 | } 735 | 736 | #[test] 737 | fn test_nested_match() { 738 | let (result, _) = compile(vec![ 739 | (pair(tt(), tt()), rhs("foo")), 740 | (pair(tt(), ff()), rhs("bar")), 741 | (pair(ff(), ff()), rhs("baz")), 742 | (pair(ff(), tt()), rhs("quix")), 743 | ]); 744 | 745 | assert_eq!( 746 | result, 747 | if_eq( 748 | sel(0, obj()), 749 | tt_con(), 750 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")), 751 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix")) 752 | ) 753 | ); 754 | } 755 | 756 | #[test] 757 | fn test_match_with_switch() { 758 | let a = con("a", 0, 4); 759 | let b = con("b", 0, 4); 760 | let c = con("c", 0, 4); 761 | let d = con("d", 0, 4); 762 | let a_pat = Pattern::Constructor(a.clone(), Vec::new()); 763 | let b_pat = Pattern::Constructor(b.clone(), Vec::new()); 764 | let c_pat = Pattern::Constructor(c.clone(), Vec::new()); 765 | let d_pat = Pattern::Constructor(d.clone(), Vec::new()); 766 | let (result, _) = compile(vec![ 767 | ((a_pat, rhs("a"))), 768 | ((b_pat, rhs("b"))), 769 | ((c_pat, rhs("c"))), 770 | ((d_pat, rhs("d"))), 771 | ]); 772 | 773 | assert_eq!( 774 | result.replace_nested_if(), 775 | switch( 776 | obj(), 777 | vec![(a, success("a")), (b, success("b")), (c, success("c"))], 778 | success("d") 779 | ) 780 | ); 781 | } 782 | 783 | #[test] 784 | fn test_nested_match_without_switch() { 785 | let (result, _) = compile(vec![ 786 | (pair(tt(), tt()), rhs("foo")), 787 | (pair(tt(), ff()), rhs("bar")), 788 | (pair(ff(), ff()), rhs("baz")), 789 | (pair(ff(), tt()), rhs("quix")), 790 | ]); 791 | 792 | // This doesn't produce a switch, as the nested patterns don't test the 793 | // same value. 794 | assert_eq!( 795 | result.replace_nested_if(), 796 | if_eq( 797 | sel(0, obj()), 798 | tt_con(), 799 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")), 800 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix")) 801 | ) 802 | ); 803 | } 804 | 805 | #[test] 806 | fn test_match_with_args() { 807 | let some = con("some", 3, 2); 808 | let (result, _) = compile(vec![ 809 | ( 810 | Pattern::Constructor(some.clone(), vec![ff(), tt(), tt()]), 811 | rhs("foo"), 812 | ), 813 | (var("x"), rhs("bar")), 814 | ]); 815 | 816 | assert_eq!( 817 | result, 818 | if_eq( 819 | obj(), 820 | some, 821 | if_eq( 822 | sel(0, obj()), 823 | tt_con(), 824 | if_eq( 825 | sel(1, obj()), 826 | tt_con(), 827 | if_eq( 828 | sel(2, obj()), 829 | ff_con(), 830 | success("foo"), 831 | bind(obj(), "x", success("bar")), 832 | ), 833 | bind(obj(), "x", success("bar")), 834 | ), 835 | bind(obj(), "x", success("bar")), 836 | ), 837 | bind(obj(), "x", success("bar")), 838 | ) 839 | ); 840 | } 841 | 842 | #[test] 843 | fn test_match_with_infinite_span() { 844 | let (result, diag) = compile(vec![( 845 | Pattern::Constructor(con("int", 0, 0), Vec::new()), 846 | rhs("foo"), 847 | )]); 848 | 849 | assert_eq!( 850 | result, 851 | if_eq(obj(), con("int", 0, 0), success("foo"), failure()) 852 | ); 853 | assert_eq!(diag.messages, vec!["Missing pattern: _"]); 854 | } 855 | 856 | #[test] 857 | fn test_match_nonexhaustive_with_args() { 858 | let some = con("some", 3, 2); 859 | let (result, diags) = compile(vec![( 860 | Pattern::Constructor(some.clone(), vec![ff(), ff(), tt()]), 861 | rhs("foo"), 862 | )]); 863 | 864 | assert_eq!( 865 | result, 866 | if_eq( 867 | obj(), 868 | some, 869 | if_eq( 870 | sel(0, obj()), 871 | tt_con(), 872 | if_eq( 873 | sel(1, obj()), 874 | ff_con(), 875 | if_eq( 876 | sel(2, obj()), 877 | ff_con(), 878 | success("foo"), 879 | failure() 880 | ), 881 | failure() 882 | ), 883 | failure() 884 | ), 885 | failure() 886 | ) 887 | ); 888 | 889 | assert_eq!( 890 | diags.messages, 891 | vec![ 892 | "Missing pattern: some(true, false, _)".to_string(), 893 | "Missing pattern: some(true, _, _)".to_string(), 894 | "Missing pattern: some(_, _, _)".to_string(), 895 | "Missing pattern: _".to_string(), 896 | ] 897 | ); 898 | } 899 | } 900 | -------------------------------------------------------------------------------- /sestoft1996/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::too_many_arguments)] 2 | 3 | pub mod idiomatic; 4 | pub mod raw; 5 | -------------------------------------------------------------------------------- /sestoft1996/src/raw.rs: -------------------------------------------------------------------------------- 1 | // This implementation is a more or less 1:1 port of the SML implementation 2 | // provided in the paper, with only a few changes made to make things work on 3 | // Rust. For example, the SML implementation assumes multiple ownership of 4 | // certain values, which isn't allowed in Rust. For the sake of simplicity, we 5 | // just clone values in this case. 6 | // 7 | // This implementation doesn't use the memoization approach briefly mentioned in 8 | // section 7.5 of the paper. This requires multiple ownership of the tree nodes, 9 | // or a different way of building the tree/graph (e.g. using IDs). To keep 10 | // things simple, we skip over this. 11 | // 12 | // Because this implementation is more or less a direct translation, it's _not_ 13 | // idiomatic Rust. An idiomatic implementation is provided separately. 14 | // 15 | // The Moscow ML compiler uses hash consing and a DAG as discussed in section 16 | // 7.5 of the paper. 17 | use std::collections::HashSet; 18 | use std::fmt; 19 | use std::rc::Rc; 20 | 21 | /// An immutable linked list. 22 | /// 23 | /// The algorithm as presented in the paper makes use of and requires immutable 24 | /// lists. For example, when compiling the `IfEq` nodes it compiles two 25 | /// different branches, but assumes work start off with the same set of rules, 26 | /// `work` values, etc. Since we're trying to stay as close to the paper as 27 | /// possible, we also follow the use of immutable data types. 28 | /// 29 | /// Like the rest of this implementation we're focusing on keeping things as 30 | /// simple as is reasonable, rather than making the implementation efficient. 31 | #[derive(Eq, PartialEq)] 32 | struct Node { 33 | value: T, 34 | next: Option>>, 35 | } 36 | 37 | #[derive(Clone, Eq, PartialEq)] 38 | pub struct List { 39 | head: Option>>, 40 | len: usize, 41 | } 42 | 43 | impl List { 44 | fn new() -> List { 45 | List { head: None, len: 0 } 46 | } 47 | 48 | /// Returns a new list starting with the given value. 49 | fn add(&self, value: T) -> List { 50 | List { 51 | head: Some(Rc::new(Node { value, next: self.head.clone() })), 52 | len: self.len + 1, 53 | } 54 | } 55 | 56 | /// Splits a list into the head and a list of the nodes that follow it. 57 | fn split(&self) -> (Option<&T>, List) { 58 | if let Some(n) = self.head.as_ref() { 59 | (Some(&n.value), List { head: n.next.clone(), len: self.len - 1 }) 60 | } else { 61 | (None, List { head: self.head.clone(), len: self.len }) 62 | } 63 | } 64 | 65 | fn iter(&self) -> ListIter { 66 | ListIter { node: self.head.as_ref() } 67 | } 68 | 69 | fn len(&self) -> usize { 70 | self.len 71 | } 72 | 73 | fn is_empty(&self) -> bool { 74 | self.head.is_none() 75 | } 76 | } 77 | 78 | impl List { 79 | fn contains(&self, value: &T) -> bool { 80 | self.iter().any(|v| v == value) 81 | } 82 | } 83 | 84 | impl List { 85 | /// Merges `self` and `other`. 86 | fn merge(&self, other: List) -> List { 87 | let mut new_list = List::new(); 88 | 89 | for value in self.iter().chain(other.iter()) { 90 | new_list = new_list.add(value.clone()); 91 | } 92 | 93 | new_list.rev() 94 | } 95 | 96 | /// Returns a new list with the values in reverse order. 97 | fn rev(&self) -> List { 98 | let mut new_list = List::new(); 99 | 100 | for v in self.iter() { 101 | new_list = new_list.add(v.clone()); 102 | } 103 | 104 | new_list 105 | } 106 | } 107 | 108 | impl fmt::Debug for List { 109 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 110 | f.debug_list().entries(self.iter()).finish() 111 | } 112 | } 113 | 114 | /// An iterator over the values in an immutable list. 115 | struct ListIter<'a, T> { 116 | node: Option<&'a Rc>>, 117 | } 118 | 119 | impl<'a, T> Iterator for ListIter<'a, T> { 120 | type Item = &'a T; 121 | 122 | fn next(&mut self) -> Option { 123 | if let Some(node) = self.node.take() { 124 | self.node = node.next.as_ref(); 125 | 126 | Some(&node.value) 127 | } else { 128 | None 129 | } 130 | } 131 | } 132 | 133 | /// The type used for storing diagnostic messages. 134 | pub struct Diagnostics { 135 | /// The diagnostic messages produced. 136 | /// 137 | /// In a real compiler this would include more than just a message, such as 138 | /// the line and numbers. 139 | messages: Vec, 140 | 141 | /// The right-hand values (= the code you'd run upon a match) that have been 142 | /// processed. 143 | /// 144 | /// If a value isn't included in this set it means it and its pattern are 145 | /// redundant. 146 | /// 147 | /// In a real compiler you'd probably mark AST nodes directly. In our case 148 | /// the right-hand values are just simple strings, so we use a set instead. 149 | reachable: HashSet, 150 | } 151 | 152 | /// The `con` (= constructor) type in the paper. 153 | /// 154 | /// For a boolean, a constructor would have the following values: 155 | /// 156 | /// - name: true or false 157 | /// - arity: 0, as booleans don't take arguments 158 | /// - span: 2, as there are only two constructors (true and false) 159 | /// 160 | /// In a real compiler you'd probably use pointers/IDs to your type data 161 | /// structures instead, but for the sake of keeping things simple we just use a 162 | /// struct that can be cloned. 163 | #[derive(Debug, Eq, PartialEq, Clone)] 164 | pub struct Con { 165 | name: String, 166 | 167 | // The number of arguments. 168 | arity: usize, 169 | 170 | // The total number of constructors of the owning type 171 | // 172 | // A span of 0 means the type has an infinite amount of constructors. 173 | span: usize, 174 | } 175 | 176 | /// A user provided pattern to match against an input value. 177 | /// 178 | /// We only provide two types of patterns: constructors, and variables/bindings. 179 | /// 180 | /// In a real compiler you'd probably be using AST nodes instead of dedicated 181 | /// pattern types, and include more cases for specific patterns (e.g. tuple and 182 | /// struct patterns). 183 | #[derive(Debug, Clone)] 184 | pub enum Pattern { 185 | Cons(Con, List), 186 | Var(String), 187 | } 188 | 189 | /// The `termd` type from the paper. 190 | #[derive(Debug, Clone, Eq, PartialEq)] 191 | pub enum TermDesc { 192 | // `Cons` is the top-most constructor, and its components are described by 193 | // the Vec. 194 | Pos(Con, List), 195 | 196 | // Any term who's top-most constructor is _not_ any of the listed 197 | // constructors. 198 | // 199 | // For a Negative(S), the cardinality of S must be less than the span of 200 | // any constructor in S: 201 | // 202 | // cons.iter().all(|cons| cardinality(s) < span(cons)) 203 | // 204 | // Due to static typing, all constructors in S are of the same type, thus 205 | // have the same span. 206 | Neg(List), 207 | } 208 | 209 | impl TermDesc { 210 | /// Returns a string used to describe this term in an error message. 211 | /// 212 | /// In a real compiler you'd do the following: 213 | /// 214 | /// For a Pos, just display the pattern/type/whatever name 215 | /// 216 | /// For a Neg(list), obtain all possible values from the constructor, ignore 217 | /// those in "list", then produce a name using the remaining values. So if 218 | /// "list" is `[red]`, and the possible values are `[red, green, blue]`, the 219 | /// returned string could be `green | blue`. If this is nested inside a 220 | /// `Pos("tuple", ...)` node you'd end up with something like 221 | /// `tuple(green | blue)`. 222 | /// 223 | /// For the sake of simplicity we just return `_` for a Neg. 224 | fn error_string(&self) -> String { 225 | match self { 226 | TermDesc::Pos(cons, args) => { 227 | if args.is_empty() { 228 | cons.name.clone() 229 | } else { 230 | format!( 231 | "{}({})", 232 | cons.name, 233 | args.iter() 234 | .map(|v| v.error_string()) 235 | .collect::>() 236 | .join(", ") 237 | ) 238 | } 239 | } 240 | TermDesc::Neg(_) => "_".to_string(), 241 | } 242 | } 243 | } 244 | 245 | /// The `access` type in the paper. 246 | #[derive(Clone, Debug, Eq, PartialEq)] 247 | pub enum Access { 248 | Obj, 249 | Sel(usize, Box), 250 | } 251 | 252 | /// The `decision` type in the paper. 253 | #[derive(Debug, Eq, PartialEq, Clone)] 254 | pub enum Decision { 255 | /// A pattern didn't match. 256 | Failure, 257 | 258 | /// A pattern is matched and the right-hand value is to be returned. 259 | Success(RHS), 260 | 261 | /// Checks if a constructor matches the value at the given access path. 262 | /// 263 | /// The members are as follows: 264 | /// 265 | /// 1. The value to test against 266 | /// 2. The pattern/value to match against 267 | /// 3. The path to take upon a match 268 | /// 4. The path to take upon a failure 269 | /// 270 | /// A node like this: 271 | /// 272 | /// IfEq(Sel(0, Obj), x, ok, err) 273 | /// 274 | /// Translates to roughly the following pseudo code: 275 | /// 276 | /// if obj.0 is x { 277 | /// ok 278 | /// } else { 279 | /// err 280 | /// } 281 | IfEq(Access, Con, Box, Box), 282 | 283 | /// Checks if any of the given constructors match the value at the given 284 | /// access path. 285 | /// 286 | /// The members are as follows: 287 | /// 288 | /// 1. The value to test against 289 | /// 2. The list of constructors to test and their corresponding decisions 290 | /// 3. A fallback decision in case no patterns match 291 | /// 292 | /// The fallback is needed because given a type with a span of N, IfEq nodes 293 | /// only test N-1 constructors, as the last possible constructor is 294 | /// implicitly assumed in the IfEq node's "else" body. That is, IfEq tests 295 | /// are like this: 296 | /// 297 | /// if value is green { 298 | /// ... 299 | /// } else { 300 | /// if value is red { 301 | /// ... 302 | /// } else { 303 | /// .. 304 | /// } 305 | /// } 306 | /// 307 | /// And not like this: 308 | /// 309 | /// if value is green { 310 | /// ... 311 | /// } else if value is red { 312 | /// ... 313 | /// } else if value is blue { 314 | /// ... 315 | /// } 316 | /// 317 | /// A real compiler may have to somehow "lift" the fallback into a 318 | /// switch/jump case. 319 | Switch(Access, List<(Con, Decision)>, Box), 320 | } 321 | 322 | /// The result of the `staticmatch` (or in our case `static_match`) function. 323 | #[derive(Debug)] 324 | enum StaticMatch { 325 | Yes, 326 | No, 327 | Maybe, 328 | } 329 | 330 | /// `type context = (con * termd list) list` in the paper. 331 | type Context = List<(Con, List)>; 332 | 333 | /// The work stack as used in the paper. 334 | /// 335 | /// The paper uses a list of triple lists, removing the need for some append 336 | /// operations. This is a bit annoying to work with in Rust (we have to unwrap() 337 | /// in some places), but again we're trying to stay as close to the paper as 338 | /// possible. 339 | /// 340 | /// We use a type alias here so we don't have to re-type this type name in the 341 | /// various places that it's used. 342 | type Work = List<(List, List, List)>; 343 | 344 | /// The type of the right-hand side of a case (i.e. the code to run). 345 | /// 346 | /// For the sake of simplicity we just use a String here. In a real compiler 347 | /// this would probably be an AST node or another sort of IR to run upon a 348 | /// match. 349 | pub type RHS = String; 350 | 351 | /// The `addneg` function in the paper. 352 | fn addneg(dsc: TermDesc, con: Con) -> TermDesc { 353 | match dsc { 354 | // The paper introduces this function as a non-exhaustive function. The 355 | // implementation in the Moscow ML compiler just returns the term when 356 | // it's a Pos, so we do the same. 357 | TermDesc::Pos(_, _) => dsc, 358 | TermDesc::Neg(nonset) => TermDesc::Neg(nonset.add(con)), 359 | } 360 | } 361 | 362 | /// The `staticmatch` function in the paper. 363 | fn staticmatch(pcon: &Con, term: &TermDesc) -> StaticMatch { 364 | match term { 365 | TermDesc::Pos(scon, _) => { 366 | if pcon == scon { 367 | StaticMatch::Yes 368 | } else { 369 | StaticMatch::No 370 | } 371 | } 372 | TermDesc::Neg(excluded) => { 373 | if excluded.contains(pcon) { 374 | StaticMatch::No 375 | } else if pcon.span == (excluded.len() + 1) { 376 | // The way this works is as follows: 377 | // 378 | // A boolean has a span of two, as it has two constructors (true 379 | // and false). 380 | // 381 | // The `if` above means we determined our constructor IS NOT in 382 | // the deny list. Due to static typing, our list can't 383 | // contain unrelated constructors (e.g. an ADT constructor). 384 | // 385 | // Thus, if the length of the deny list is one less than the 386 | // span of our type, we know for a fact our constructor matches 387 | // the remaining constructor. 388 | // 389 | // In other words: we know we are NOT A, B, and C, and the only 390 | // remaining option is D. Thus, we match D. 391 | StaticMatch::Yes 392 | } else { 393 | StaticMatch::Maybe 394 | } 395 | } 396 | } 397 | } 398 | 399 | /// The equivalent of `List.tabulate` as used in the paper. 400 | /// 401 | /// This function is kind of pointless in Rust as we could just use map(), but 402 | /// we try to stay as close to the paper as possible in this implementation. 403 | fn tabulate T>(length: usize, func: F) -> List { 404 | let mut list = List::new(); 405 | 406 | for val in (0..length).rev() { 407 | list = list.add(func(val)); 408 | } 409 | 410 | list 411 | } 412 | 413 | fn args T>(pcon: &Con, func: F) -> List { 414 | tabulate(pcon.arity, func) 415 | } 416 | 417 | fn getdargs(pcon: &Con, term: TermDesc) -> List { 418 | match term { 419 | TermDesc::Pos(_, dargs) => dargs, 420 | TermDesc::Neg(_) => { 421 | tabulate(pcon.arity, |_| TermDesc::Neg(List::new())) 422 | } 423 | } 424 | } 425 | 426 | fn getoargs(pcon: &Con, acc: Access) -> List { 427 | // The paper uses `i+1`, presumably because humans use "1" to address the 428 | // first element (or maybe this is an SML thing?). Unfortunately, this isn't 429 | // clarified in the paper. Since it doesn't seem to actually matter, and 430 | // basically everyting is 0-indexed, we drop the +1 here. 431 | args(pcon, |i| Access::Sel(i, Box::new(acc.clone()))) 432 | } 433 | 434 | fn augment(ctx: Context, dsc: TermDesc) -> Context { 435 | let (val, rest) = ctx.split(); 436 | 437 | if let Some((con, args)) = val { 438 | rest.add((con.clone(), args.add(dsc))) 439 | } else { 440 | rest 441 | } 442 | } 443 | 444 | fn norm(ctx: Context) -> Context { 445 | let (val, rest) = ctx.split(); 446 | 447 | if let Some((con, args)) = val { 448 | augment(rest, TermDesc::Pos(con.clone(), args.rev())) 449 | } else { 450 | rest 451 | } 452 | } 453 | 454 | fn builddsc(ctx: Context, dsc: TermDesc, work: Work) -> TermDesc { 455 | if let (Some((con, args)), rest) = ctx.split() { 456 | let (job, workr) = work.split(); 457 | let (_, _, dargs) = job.unwrap(); 458 | 459 | // The paper uses the following code for this: 460 | // 461 | // rev args @ (dsc :: dargs) 462 | // 463 | // SML parses this as follows: 464 | // 465 | // (rev args) @ (dsc :: dargs) 466 | // 467 | // That is: it first reverses `args`, then appends the result of 468 | // `(dsc :: dargs)` to it. If you were to _first_ merge the values and 469 | // then reverse, you'd get incorrect decision trees. Unfortunately, I 470 | // ran into exactly that bug, and it took me a few hours to figure out. 471 | // And this is why functions with arguments should use parentheses and 472 | // commas :) 473 | let new_dsc = 474 | TermDesc::Pos(con.clone(), args.rev().merge(dargs.add(dsc))); 475 | 476 | builddsc(rest, new_dsc, workr) 477 | } else { 478 | dsc 479 | } 480 | } 481 | 482 | fn fail( 483 | dsc: TermDesc, 484 | rules: List<(Pattern, RHS)>, 485 | diags: &mut Diagnostics, 486 | ) -> Decision { 487 | if let (Some((pat1, rhs1)), rulesr) = rules.split() { 488 | matches( 489 | pat1.clone(), 490 | Access::Obj, 491 | dsc, 492 | List::new(), 493 | List::new(), 494 | rhs1.clone(), 495 | rulesr, 496 | diags, 497 | ) 498 | } else { 499 | diags.messages.push(format!("Missing pattern: {}", dsc.error_string())); 500 | Decision::Failure 501 | } 502 | } 503 | 504 | fn succeed( 505 | ctx: Context, 506 | work: Work, 507 | rhs: RHS, 508 | rules: List<(Pattern, RHS)>, 509 | diags: &mut Diagnostics, 510 | ) -> Decision { 511 | if let (Some((pats, accs, dscs)), workr) = work.split() { 512 | if pats.is_empty() && accs.is_empty() && dscs.is_empty() { 513 | succeed(norm(ctx), workr, rhs, rules, diags) 514 | } else { 515 | let (pat1, patr) = pats.split(); 516 | let (obj1, objr) = accs.split(); 517 | let (dsc1, dscr) = dscs.split(); 518 | 519 | matches( 520 | pat1.unwrap().clone(), 521 | obj1.unwrap().clone(), 522 | dsc1.unwrap().clone(), 523 | ctx, 524 | workr.add((patr, objr, dscr)), 525 | rhs, 526 | rules, 527 | diags, 528 | ) 529 | } 530 | } else { 531 | diags.reachable.insert(rhs.clone()); 532 | Decision::Success(rhs) 533 | } 534 | } 535 | 536 | /// This corresponds to the inner function `succeed'` in the paper. 537 | fn match_succeed( 538 | pcon: Con, 539 | pargs: List, 540 | obj: Access, 541 | dsc: TermDesc, 542 | ctx: Context, 543 | work: Work, 544 | rhs: RHS, 545 | rules: List<(Pattern, RHS)>, 546 | diags: &mut Diagnostics, 547 | ) -> Decision { 548 | let oargs = getoargs(&pcon, obj); 549 | let dargs = getdargs(&pcon, dsc); 550 | 551 | succeed( 552 | ctx.add((pcon, List::new())), 553 | work.add((pargs, oargs, dargs)), 554 | rhs, 555 | rules, 556 | diags, 557 | ) 558 | } 559 | 560 | /// This corresponds to the inner function `fail'` in the paper. 561 | fn match_fail( 562 | newdsc: TermDesc, 563 | ctx: Context, 564 | work: Work, 565 | rules: List<(Pattern, RHS)>, 566 | diags: &mut Diagnostics, 567 | ) -> Decision { 568 | fail(builddsc(ctx, newdsc, work), rules, diags) 569 | } 570 | 571 | fn matches( 572 | pat1: Pattern, 573 | obj: Access, 574 | dsc: TermDesc, 575 | ctx: Context, 576 | work: Work, 577 | rhs: RHS, 578 | rules: List<(Pattern, RHS)>, 579 | diags: &mut Diagnostics, 580 | ) -> Decision { 581 | match pat1 { 582 | Pattern::Var(_) => succeed(augment(ctx, dsc), work, rhs, rules, diags), 583 | Pattern::Cons(pcon, pargs) => match staticmatch(&pcon, &dsc) { 584 | StaticMatch::Yes => match_succeed( 585 | pcon, pargs, obj, dsc, ctx, work, rhs, rules, diags, 586 | ), 587 | StaticMatch::No => match_fail(dsc, ctx, work, rules, diags), 588 | StaticMatch::Maybe => { 589 | // In the paper the equivalent code makes two assumptions that 590 | // don't work in Rust: 591 | // 592 | // 1. Certain values can have multiple owners (e.g. the `dsc` 593 | // value is shared between functions). 594 | // 2. When building the subtree for a matched value, the 595 | // algorithm expects that variables such as `work` and 596 | // `rules` _are not_ modified in place. If they are, 597 | // generating the subtree for an unmatched value produces 598 | // incorrect results. 599 | // 600 | // In case of shared ownership we just clone the values. In a 601 | // real compiler that probably wouldn't work very well, but for 602 | // the sake of this implementation it's good enough. 603 | Decision::IfEq( 604 | obj.clone(), 605 | pcon.clone(), 606 | Box::new(match_succeed( 607 | pcon.clone(), 608 | pargs, 609 | obj, 610 | dsc.clone(), 611 | ctx.clone(), 612 | work.clone(), 613 | rhs, 614 | rules.clone(), 615 | diags, 616 | )), 617 | Box::new(match_fail( 618 | addneg(dsc, pcon), 619 | ctx, 620 | work, 621 | rules, 622 | diags, 623 | )), 624 | ) 625 | } 626 | }, 627 | } 628 | } 629 | 630 | /// Recursively collects cases for a Switch node. 631 | /// 632 | /// This is based on the `collect` function as found in the Moscow ML compiler. 633 | fn collect( 634 | root_acc: &Access, 635 | cases: List<(Con, Decision)>, 636 | decision: Decision, 637 | ) -> (List<(Con, Decision)>, Decision) { 638 | match decision { 639 | Decision::IfEq(acc, con, ok, fail) if root_acc == &acc => { 640 | let (cases, dec) = collect(root_acc, cases, *fail); 641 | 642 | // We add our case _after_ recursing, ensuring the order of values 643 | // in the list is the same as the order of matches. If we were to 644 | // add _before_ recursing, the list would be in reverse order. 645 | (cases.add((con, *ok)), dec) 646 | } 647 | _ => (cases, decision), 648 | } 649 | } 650 | 651 | /// Replacing a series of nested IfEq nodes for the same access object with a 652 | /// Switch node. 653 | pub fn switchify(tree: Decision) -> Decision { 654 | match tree { 655 | Decision::IfEq(acc, con, ok, fail) => { 656 | let (cases, fallback) = collect(&acc, List::new(), *fail); 657 | 658 | if cases.is_empty() { 659 | Decision::IfEq(acc, con, ok, Box::new(fallback)) 660 | } else { 661 | Decision::Switch(acc, cases.add((con, *ok)), Box::new(fallback)) 662 | } 663 | } 664 | _ => tree, 665 | } 666 | } 667 | 668 | /// Compiles a list of rules into a decision tree. 669 | pub fn compile(rules: List<(Pattern, RHS)>) -> (Decision, Diagnostics) { 670 | let mut diags = 671 | Diagnostics { messages: Vec::new(), reachable: HashSet::new() }; 672 | 673 | (fail(TermDesc::Neg(List::new()), rules, &mut diags), diags) 674 | } 675 | 676 | #[cfg(test)] 677 | mod tests { 678 | use super::*; 679 | 680 | /// A macro for creating a linked list. 681 | /// 682 | /// Rust has no (linked) list literals, so we use this macro instead. 683 | /// Basically whenever you have the SML expression `[a; b; c]`, you'd 684 | /// instead use `list![a, b, c]`. 685 | /// 686 | /// When creating a list using this macro, the values are added to the end 687 | /// of the list. 688 | macro_rules! list { 689 | ($($value: expr),*$(,)?) => {{ 690 | let temp = vec![$($value),*]; 691 | let mut list = List::new(); 692 | 693 | for val in temp.into_iter().rev() { 694 | list = list.add(val); 695 | } 696 | 697 | list 698 | }} 699 | } 700 | 701 | fn con(name: &str, arity: usize, span: usize) -> Con { 702 | Con { name: name.to_string(), arity, span } 703 | } 704 | 705 | fn nil() -> Pattern { 706 | Pattern::Cons(con("nil", 0, 1), List::new()) 707 | } 708 | 709 | fn tt_con() -> Con { 710 | con("true", 0, 2) 711 | } 712 | 713 | fn ff_con() -> Con { 714 | con("false", 0, 2) 715 | } 716 | 717 | fn tt() -> Pattern { 718 | Pattern::Cons(tt_con(), List::new()) 719 | } 720 | 721 | fn ff() -> Pattern { 722 | Pattern::Cons(ff_con(), List::new()) 723 | } 724 | 725 | fn pair(a: Pattern, b: Pattern) -> Pattern { 726 | Pattern::Cons(con("pair", 2, 1), list![a, b]) 727 | } 728 | 729 | fn var(name: &str) -> Pattern { 730 | Pattern::Var(name.to_string()) 731 | } 732 | 733 | fn if_eq(acc: Access, con: Con, ok: Decision, fail: Decision) -> Decision { 734 | Decision::IfEq(acc, con, Box::new(ok), Box::new(fail)) 735 | } 736 | 737 | fn switch( 738 | acc: Access, 739 | cases: List<(Con, Decision)>, 740 | fallback: Decision, 741 | ) -> Decision { 742 | Decision::Switch(acc, cases, Box::new(fallback)) 743 | } 744 | 745 | fn success(value: &str) -> Decision { 746 | Decision::Success(value.to_string()) 747 | } 748 | 749 | fn failure() -> Decision { 750 | Decision::Failure 751 | } 752 | 753 | fn rhs(value: &str) -> String { 754 | value.to_string() 755 | } 756 | 757 | fn obj() -> Access { 758 | Access::Obj 759 | } 760 | 761 | fn sel(index: usize, acc: Access) -> Access { 762 | Access::Sel(index, Box::new(acc)) 763 | } 764 | 765 | #[test] 766 | fn test_list_push_pop() { 767 | let list1 = List::new(); 768 | let list2 = list1.add(10); 769 | let list3 = list2.add(20); 770 | 771 | assert!(list1.head.is_none()); 772 | assert!(list2.head.is_some()); 773 | assert!(list3.head.is_some()); 774 | 775 | assert_eq!(list2.split().0, Some(&10)); 776 | assert_eq!(list2.split().0, Some(&10)); 777 | assert_eq!(list3.split().0, Some(&20)); 778 | } 779 | 780 | #[test] 781 | fn test_list_rev() { 782 | let list1 = list![3, 2, 1]; 783 | let list2 = list1.rev(); 784 | 785 | assert_eq!(list1.iter().collect::>(), vec![&3, &2, &1]); 786 | assert_eq!(list2.iter().collect::>(), vec![&1, &2, &3]); 787 | } 788 | 789 | #[test] 790 | fn test_list_rev_and_merge() { 791 | let list1 = list![3, 2, 1]; 792 | let list2 = list![4]; 793 | let list3 = list1.rev().merge(list2.add(10)); 794 | 795 | assert_eq!(list3.iter().collect::>(), vec![&1, &2, &3, &10, &4]); 796 | } 797 | 798 | #[test] 799 | fn test_list_merge() { 800 | let list1 = list![1, 2]; 801 | let list2 = list![3, 4]; 802 | let list3 = list1.merge(list2); 803 | 804 | assert_eq!(list3.iter().collect::>(), vec![&1, &2, &3, &4]); 805 | } 806 | 807 | #[test] 808 | fn test_term_desc_error_string() { 809 | let term = TermDesc::Pos( 810 | con("box", 2, 1), 811 | list![ 812 | TermDesc::Pos(con("true", 0, 2), List::new()), 813 | TermDesc::Neg(list![con("false", 0, 2)]) 814 | ], 815 | ); 816 | 817 | assert_eq!(term.error_string(), "box(true, _)"); 818 | } 819 | 820 | #[test] 821 | fn test_tabulate() { 822 | let vals = tabulate(3, |v| v); 823 | 824 | assert_eq!(vals.iter().collect::>(), vec![&0, &1, &2]); 825 | } 826 | 827 | #[test] 828 | fn test_args() { 829 | let con = con("box", 2, 1); 830 | let vals = args(&con, |v| v); 831 | 832 | assert_eq!(vals.iter().collect::>(), vec![&0, &1]); 833 | } 834 | 835 | #[test] 836 | fn test_getdargs_with_pos_term() { 837 | let con = con("box", 2, 1); 838 | let term = 839 | TermDesc::Pos(con.clone(), list![TermDesc::Neg(List::new())]); 840 | let args = getdargs(&con, term); 841 | let arg = args.iter().next(); 842 | 843 | assert!(matches!(arg, Some(TermDesc::Neg(_)))); 844 | } 845 | 846 | #[test] 847 | fn test_getdargs_with_neg_term() { 848 | let con = con("box", 2, 1); 849 | let term = TermDesc::Neg(List::new()); 850 | let args = getdargs(&con, term); 851 | let mut iter = args.iter(); 852 | 853 | assert!(matches!(iter.next(), Some(TermDesc::Neg(_)))); 854 | assert!(matches!(iter.next(), Some(TermDesc::Neg(_)))); 855 | } 856 | 857 | #[test] 858 | fn test_getoargs() { 859 | let con = con("box", 2, 1); 860 | let acc = sel(42, obj()); 861 | let args = getoargs(&con, acc); 862 | 863 | assert_eq!( 864 | args.iter().collect::>(), 865 | vec![&sel(0, sel(42, obj())), &sel(1, sel(42, obj()))] 866 | ); 867 | } 868 | 869 | #[test] 870 | fn test_builddsc() { 871 | let ctx = list![( 872 | con("baz", 0, 1), 873 | list![ 874 | TermDesc::Neg(list![con("arg1", 0, 1)]), 875 | TermDesc::Neg(list![con("arg2", 0, 1)]), 876 | ] 877 | )]; 878 | let work = list![( 879 | List::new(), 880 | List::new(), 881 | list![ 882 | TermDesc::Neg(list![con("work1", 0, 1)]), 883 | TermDesc::Neg(list![con("work2", 0, 1)]) 884 | ] 885 | )]; 886 | let dsc = TermDesc::Neg(list![con("bar", 0, 1)]); 887 | let new_dsc = builddsc(ctx, dsc, work); 888 | 889 | assert_eq!( 890 | new_dsc, 891 | TermDesc::Pos( 892 | con("baz", 0, 1), 893 | list![ 894 | TermDesc::Neg(list![con("arg2", 0, 1)]), 895 | TermDesc::Neg(list![con("arg1", 0, 1)]), 896 | TermDesc::Neg(list![con("bar", 0, 1)]), 897 | TermDesc::Neg(list![con("work1", 0, 1)]), 898 | TermDesc::Neg(list![con("work2", 0, 1)]), 899 | ] 900 | ) 901 | ); 902 | } 903 | 904 | #[test] 905 | fn test_augment() { 906 | let ctx = list![( 907 | con("baz", 0, 1), 908 | list![ 909 | TermDesc::Neg(list![con("arg1", 0, 1)]), 910 | TermDesc::Neg(list![con("arg2", 0, 1)]), 911 | ] 912 | )]; 913 | 914 | let dsc = TermDesc::Neg(list![con("bar", 0, 1)]); 915 | let new_ctx = augment(ctx, dsc); 916 | 917 | assert_eq!( 918 | new_ctx, 919 | list![( 920 | con("baz", 0, 1), 921 | list![ 922 | TermDesc::Neg(list![con("bar", 0, 1)]), 923 | TermDesc::Neg(list![con("arg1", 0, 1)]), 924 | TermDesc::Neg(list![con("arg2", 0, 1)]), 925 | ] 926 | )] 927 | ); 928 | } 929 | 930 | #[test] 931 | fn test_match_always_succeeds() { 932 | let (result, _) = compile(list![(nil(), rhs("true"))]); 933 | 934 | assert_eq!(result, success("true")); 935 | } 936 | 937 | #[test] 938 | fn test_match_always_fails() { 939 | let (result, _) = compile(List::new()); 940 | 941 | assert_eq!(result, failure()); 942 | } 943 | 944 | #[test] 945 | fn test_match_single_pattern() { 946 | let (result, _) = 947 | compile(list![(tt(), rhs("true")), (ff(), rhs("false")),]); 948 | 949 | assert_eq!( 950 | result, 951 | if_eq(obj(), tt_con(), success("true"), success("false")) 952 | ); 953 | } 954 | 955 | #[test] 956 | fn test_match_var() { 957 | let (result, _) = compile(list![(var("a"), rhs("true"))]); 958 | 959 | assert_eq!(result, success("true")); 960 | } 961 | 962 | #[test] 963 | fn test_match_multiple_patterns() { 964 | let (result, diags) = compile(list![ 965 | (tt(), rhs("true")), 966 | (ff(), rhs("false")), 967 | (tt(), rhs("redundant")) 968 | ]); 969 | 970 | // Redundant patterns are ignored on the decision tree. This is also how 971 | // you'd detect redundant patterns: you'd somehow mark every RHS when 972 | // you produce their Success nodes. Any RHS nodes that remain unmarked 973 | // are redundant. 974 | assert_eq!( 975 | result, 976 | if_eq(obj(), tt_con(), success("true"), success("false")) 977 | ); 978 | 979 | assert!(diags.reachable.contains(&"true".to_string())); 980 | assert!(diags.reachable.contains(&"false".to_string())); 981 | assert!(!diags.reachable.contains(&"redundant".to_string())); 982 | } 983 | 984 | #[test] 985 | fn test_nonexhaustive_match() { 986 | let (result, diags) = compile(list![(tt(), rhs("true")),]); 987 | 988 | assert_eq!(result, if_eq(obj(), tt_con(), success("true"), failure())); 989 | assert_eq!(diags.messages, vec!["Missing pattern: _".to_string()]); 990 | } 991 | 992 | #[test] 993 | fn test_nonexhaustive_match_from_paper() { 994 | let green = Pattern::Cons(con("green", 0, 3), List::new()); 995 | let (result, diags) = compile(list![ 996 | (pair(tt(), green.clone()), rhs("111")), 997 | (pair(ff(), green.clone()), rhs("222")), 998 | ]); 999 | 1000 | assert_eq!( 1001 | result, 1002 | if_eq( 1003 | sel(0, obj()), 1004 | tt_con(), 1005 | if_eq( 1006 | sel(1, obj()), 1007 | con("green", 0, 3), 1008 | success("111"), 1009 | failure() 1010 | ), 1011 | if_eq( 1012 | sel(1, obj()), 1013 | con("green", 0, 3), 1014 | success("222"), 1015 | failure() 1016 | ) 1017 | ) 1018 | ); 1019 | 1020 | assert_eq!( 1021 | diags.messages, 1022 | vec![ 1023 | "Missing pattern: pair(true, _)".to_string(), 1024 | "Missing pattern: pair(false, _)".to_string() 1025 | ] 1026 | ); 1027 | } 1028 | 1029 | #[test] 1030 | fn test_nested_match() { 1031 | let (result, _) = compile(list![ 1032 | (pair(tt(), tt()), rhs("foo")), 1033 | (pair(tt(), ff()), rhs("bar")), 1034 | (pair(ff(), ff()), rhs("baz")), 1035 | (pair(ff(), tt()), rhs("quix")), 1036 | ]); 1037 | 1038 | assert_eq!( 1039 | result, 1040 | if_eq( 1041 | sel(0, obj()), 1042 | tt_con(), 1043 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")), 1044 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix")) 1045 | ) 1046 | ); 1047 | } 1048 | 1049 | #[test] 1050 | fn test_match_with_switchify() { 1051 | let a = con("a", 0, 4); 1052 | let b = con("b", 0, 4); 1053 | let c = con("c", 0, 4); 1054 | let d = con("d", 0, 4); 1055 | let a_pat = Pattern::Cons(a.clone(), List::new()); 1056 | let b_pat = Pattern::Cons(b.clone(), List::new()); 1057 | let c_pat = Pattern::Cons(c.clone(), List::new()); 1058 | let d_pat = Pattern::Cons(d.clone(), List::new()); 1059 | let (result, _) = compile(list![ 1060 | ((a_pat, rhs("a"))), 1061 | ((b_pat, rhs("b"))), 1062 | ((c_pat, rhs("c"))), 1063 | ((d_pat, rhs("d"))) 1064 | ]); 1065 | 1066 | assert_eq!( 1067 | switchify(result), 1068 | switch( 1069 | obj(), 1070 | list![(a, success("a")), (b, success("b")), (c, success("c"))], 1071 | success("d") 1072 | ) 1073 | ); 1074 | } 1075 | 1076 | #[test] 1077 | fn test_nested_match_without_switch() { 1078 | let (result, _) = compile(list![ 1079 | (pair(tt(), tt()), rhs("foo")), 1080 | (pair(tt(), ff()), rhs("bar")), 1081 | (pair(ff(), ff()), rhs("baz")), 1082 | (pair(ff(), tt()), rhs("quix")), 1083 | ]); 1084 | 1085 | // This doesn't produce a switch, as the nested patterns don't test the 1086 | // same value. 1087 | assert_eq!( 1088 | switchify(result), 1089 | if_eq( 1090 | sel(0, obj()), 1091 | tt_con(), 1092 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")), 1093 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix")) 1094 | ) 1095 | ); 1096 | } 1097 | 1098 | #[test] 1099 | fn test_match_with_args() { 1100 | let some = con("some", 3, 2); 1101 | let (result, _) = compile(list![ 1102 | (Pattern::Cons(some.clone(), list![tt(), tt(), ff()]), rhs("foo")), 1103 | (var("x"), rhs("bar")) 1104 | ]); 1105 | 1106 | assert_eq!( 1107 | result, 1108 | if_eq( 1109 | obj(), 1110 | some, 1111 | if_eq( 1112 | sel(0, obj()), 1113 | tt_con(), 1114 | if_eq( 1115 | sel(1, obj()), 1116 | tt_con(), 1117 | if_eq( 1118 | sel(2, obj()), 1119 | ff_con(), 1120 | success("foo"), 1121 | success("bar") 1122 | ), 1123 | success("bar") 1124 | ), 1125 | success("bar") 1126 | ), 1127 | success("bar") 1128 | ) 1129 | ); 1130 | } 1131 | 1132 | #[test] 1133 | fn test_match_nonexhaustive_with_args() { 1134 | let some = con("some", 3, 2); 1135 | let (result, diags) = compile(list![( 1136 | Pattern::Cons(some.clone(), list![tt(), ff(), ff()]), 1137 | rhs("foo") 1138 | ),]); 1139 | 1140 | assert_eq!( 1141 | result, 1142 | if_eq( 1143 | obj(), 1144 | some, 1145 | if_eq( 1146 | sel(0, obj()), 1147 | tt_con(), 1148 | if_eq( 1149 | sel(1, obj()), 1150 | ff_con(), 1151 | if_eq( 1152 | sel(2, obj()), 1153 | ff_con(), 1154 | success("foo"), 1155 | failure() 1156 | ), 1157 | failure() 1158 | ), 1159 | failure() 1160 | ), 1161 | failure() 1162 | ) 1163 | ); 1164 | 1165 | assert_eq!( 1166 | diags.messages, 1167 | vec![ 1168 | "Missing pattern: some(true, false, _)".to_string(), 1169 | "Missing pattern: some(true, _, _)".to_string(), 1170 | "Missing pattern: some(_, _, _)".to_string(), 1171 | "Missing pattern: _".to_string(), 1172 | ] 1173 | ); 1174 | } 1175 | } 1176 | --------------------------------------------------------------------------------