├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── jacobs2021
    ├── Cargo.toml
    ├── README.md
    └── src
    │   └── lib.rs
├── rustfmt.toml
└── sestoft1996
    ├── Cargo.lock
    ├── Cargo.toml
    ├── README.md
    └── src
        ├── idiomatic.rs
        ├── lib.rs
        └── raw.rs


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Test
 3 | on: push
 4 | 
 5 | env:
 6 |   CARGO_HOME: ${{ github.workspace }}/.cargo
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     container:
12 |       image: 'rust:alpine'
13 |     steps:
14 |       - name: Checkout repository
15 |         uses: actions/checkout@v3
16 | 
17 |       - name: Set up cache
18 |         uses: actions/cache@v3
19 |         with:
20 |           path: |
21 |             .cargo
22 |             target
23 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
24 | 
25 |       - name: Run tests
26 |         run: cargo test
27 | 
28 |   rustfmt:
29 |     runs-on: ubuntu-latest
30 |     container:
31 |       image: 'rust:alpine'
32 |     steps:
33 |       - name: Checkout repository
34 |         uses: actions/checkout@v3
35 | 
36 |       - name: Set up cache
37 |         uses: actions/cache@v3
38 |         with:
39 |           path: |
40 |             .cargo
41 |             target
42 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
43 | 
44 |       - name: Set up rustfmt
45 |         run: rustup component add rustfmt
46 | 
47 |       - name: Check formatting
48 |         run: 'cargo fmt --all --check'
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "bstr"
  7 | version = "0.2.17"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
 10 | dependencies = [
 11 |  "lazy_static",
 12 |  "memchr",
 13 |  "regex-automata",
 14 | ]
 15 | 
 16 | [[package]]
 17 | name = "console"
 18 | version = "0.15.0"
 19 | source = "registry+https://github.com/rust-lang/crates.io-index"
 20 | checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
 21 | dependencies = [
 22 |  "encode_unicode",
 23 |  "libc",
 24 |  "once_cell",
 25 |  "terminal_size",
 26 |  "winapi",
 27 | ]
 28 | 
 29 | [[package]]
 30 | name = "encode_unicode"
 31 | version = "0.3.6"
 32 | source = "registry+https://github.com/rust-lang/crates.io-index"
 33 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 34 | 
 35 | [[package]]
 36 | name = "jacobs2021"
 37 | version = "1.0.0"
 38 | dependencies = [
 39 |  "similar-asserts",
 40 | ]
 41 | 
 42 | [[package]]
 43 | name = "lazy_static"
 44 | version = "1.4.0"
 45 | source = "registry+https://github.com/rust-lang/crates.io-index"
 46 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 47 | 
 48 | [[package]]
 49 | name = "libc"
 50 | version = "0.2.126"
 51 | source = "registry+https://github.com/rust-lang/crates.io-index"
 52 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
 53 | 
 54 | [[package]]
 55 | name = "memchr"
 56 | version = "2.5.0"
 57 | source = "registry+https://github.com/rust-lang/crates.io-index"
 58 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
 59 | 
 60 | [[package]]
 61 | name = "once_cell"
 62 | version = "1.12.0"
 63 | source = "registry+https://github.com/rust-lang/crates.io-index"
 64 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
 65 | 
 66 | [[package]]
 67 | name = "regex-automata"
 68 | version = "0.1.10"
 69 | source = "registry+https://github.com/rust-lang/crates.io-index"
 70 | checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
 71 | 
 72 | [[package]]
 73 | name = "sestoft1996"
 74 | version = "1.0.0"
 75 | 
 76 | [[package]]
 77 | name = "similar"
 78 | version = "2.1.0"
 79 | source = "registry+https://github.com/rust-lang/crates.io-index"
 80 | checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
 81 | dependencies = [
 82 |  "bstr",
 83 |  "unicode-segmentation",
 84 | ]
 85 | 
 86 | [[package]]
 87 | name = "similar-asserts"
 88 | version = "1.2.0"
 89 | source = "registry+https://github.com/rust-lang/crates.io-index"
 90 | checksum = "64c9f531a2375031d51c23c415ca12d0f0271b976211e2f727b7a0eac06a099d"
 91 | dependencies = [
 92 |  "console",
 93 |  "similar",
 94 | ]
 95 | 
 96 | [[package]]
 97 | name = "terminal_size"
 98 | version = "0.1.17"
 99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
101 | dependencies = [
102 |  "libc",
103 |  "winapi",
104 | ]
105 | 
106 | [[package]]
107 | name = "unicode-segmentation"
108 | version = "1.9.0"
109 | source = "registry+https://github.com/rust-lang/crates.io-index"
110 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
111 | 
112 | [[package]]
113 | name = "winapi"
114 | version = "0.3.9"
115 | source = "registry+https://github.com/rust-lang/crates.io-index"
116 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
117 | dependencies = [
118 |  "winapi-i686-pc-windows-gnu",
119 |  "winapi-x86_64-pc-windows-gnu",
120 | ]
121 | 
122 | [[package]]
123 | name = "winapi-i686-pc-windows-gnu"
124 | version = "0.4.0"
125 | source = "registry+https://github.com/rust-lang/crates.io-index"
126 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
127 | 
128 | [[package]]
129 | name = "winapi-x86_64-pc-windows-gnu"
130 | version = "0.4.0"
131 | source = "registry+https://github.com/rust-lang/crates.io-index"
132 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
133 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = ["sestoft1996", "jacobs2021"]
3 | resolver = "2"
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pattern matching in Rust
 2 | 
 3 | This repository contains a collection of pattern matching algorithms implemented
 4 | in Rust. The goal of these implementations it to (hopefully) make it easier to
 5 | understand them, as papers related to pattern matching (and papers in general)
 6 | can be difficult to read.
 7 | 
 8 | ## Background
 9 | 
10 | I ended up implementing these algorithms while investigating potential pattern
11 | matching/exhaustiveness checking algorithms for [Inko](https://inko-lang.org/).
12 | While there are plenty of papers on the subject, few of them include reference
13 | code, and almost all of them are really dense and difficult to read. I hope the
14 | code published in this repository is of use to those wishing to implement
15 | pattern matching/exhaustiveness.
16 | 
17 | ## Algorithms
18 | 
19 | | Name                                          | Paper                        | Directory
20 | |:----------------------------------------------|:-----------------------------|:-----------
21 | | ML pattern compilation and partial evaluation | [PDF](https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.1363) | [sestoft1996](./sestoft1996/)
22 | | How to compile pattern matching               | [PDF](https://julesjacobs.com/notes/patternmatching/patternmatching.pdf) | [jacobs2021](./jacobs2021/)
23 | 
24 | Other papers I've come across (but don't necessarily want to implement):
25 | 
26 | - [A generic algorithm for checking exhaustivity of pattern
27 |   matching](https://dl.acm.org/doi/10.1145/2998392.2998401).
28 |   - The Scala implementation [is found in this PR](https://github.com/lampepfl/dotty/pull/1364) (the `Space.scala` file).
29 |   - Swift also uses this algorithm [here](https://github.com/apple/swift/blob/3c0b1ab03f189e044303436b8aa6a27c2f93707d/lib/Sema/TypeCheckSwitchStmt.cpp)
30 |   - Some Reddit comments about the algorithm are [found here](https://www.reddit.com/r/ProgrammingLanguages/comments/cioxwn/a_generic_algorithm_for_checking_exhaustivity_of/)
31 | - [Compiling pattern matching to good decision
32 |   trees](https://www.cs.tufts.edu/comp/150FP/archive/luc-maranget/jun08.pdf).
33 |   This is about just compiling pattern matching into a decision tree, not about
34 |   exhaustiveness checking. If you don't know how to read the computer science
35 |   hieroglyphs (like me), this paper is basically impossible to understand.
36 |   - See also https://alan-j-hu.github.io/writing/pattern-matching.html and
37 |     https://contificate.github.io/compiling-pattern-matching/
38 |   - There's a [Rust implementation](https://github.com/SomewhatML/match-compile)
39 |     of this algorithm, though it doesn't perform exhaustiveness checking.
40 | - [Warnings for pattern
41 |   matching](http://pauillac.inria.fr/~maranget/papers/warn/warn.pdf). This is
42 |   just about producing warnings/errors for e.g. non-exhaustive patterns.
43 |   Similarly painful to understand as the previous paper (i.e. I gave up).
44 | - [The Implementation of Functional Programming
45 |   Languages](https://www.microsoft.com/en-us/research/publication/the-implementation-of-functional-programming-languages/).
46 |   This book has a chapter on pattern matching, but I gave up on it.
47 | 
48 | ## Requirements
49 | 
50 | A recent-ish (as of 2022) Rust version that supports the 2021 edition (though I
51 | think the 2018 edition should also work).
52 | 
53 | ## Usage
54 | 
55 | Each algorithm is implemented as a library, and come with a set of unit tests
56 | that you can run using `cargo test`.
57 | 
58 | ## Licence
59 | 
60 | The code in this repository is licensed under the
61 | [Unlicense](https://unlicense.org/). A copy of this license can be found in the
62 | file "LICENSE".
63 | 


--------------------------------------------------------------------------------
/jacobs2021/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jacobs2021"
 3 | version = "1.0.0"
 4 | authors = ["Yorick Peterse <yorick@yorickpeterse.com>"]
 5 | edition = "2021"
 6 | 
 7 | [lib]
 8 | doctest = false
 9 | 
10 | [dev-dependencies]
11 | similar-asserts = "^1.1"
12 | 


--------------------------------------------------------------------------------
/jacobs2021/README.md:
--------------------------------------------------------------------------------
  1 | # How to compile pattern matching
  2 | 
  3 | This directory contains an implementation of the algorithm discussed in the
  4 | article [How to compile pattern
  5 | matching](https://julesjacobs.com/notes/patternmatching/patternmatching.pdf) by
  6 | Jules Jacobs. The algorithm in question took me a while to understand, and I'm
  7 | grateful for all the help provided by Jules via Email. Thanks!
  8 | 
  9 | Now on to the algorithm. In hindsight it ended up not being as difficult as I
 10 | initially thought, rather the way it was explained was a bit hard to understand.
 11 | The algorithm works as follows:
 12 | 
 13 | First, we treat a match expression as if it were a table (in the database
 14 | sense), consisting of rows and columns. The rows are the match cases (sometimes
 15 | called "match arms"), and the columns the patterns to test. Consider this match
 16 | expression (I'm using Rust syntax here):
 17 | 
 18 | ```rust
 19 | match some_number {
 20 |     10 => foo,
 21 |     20 => bar,
 22 |     30 => baz
 23 | }
 24 | ```
 25 | 
 26 | Here `10 -> foo`, `20 -> bar` and `30 -> baz` are the rows, and `10`, `20` and
 27 | `30` are the columns for each row. User provided match expressions only support
 28 | single columns (OR patterns are just turned into separate rows), but internally
 29 | the compiler supports multiple columns.
 30 | 
 31 | Internally our match expression is represented not as a list of rows and columns
 32 | implicitly testing against an outer variable (`some_number` in the above case),
 33 | instead each column explicitly specifies what it tests against. This means the
 34 | above match expression is internally represented as follows:
 35 | 
 36 | ```rust
 37 | match {
 38 |   some_number is 10 => foo,
 39 |   some_number is 20 => bar,
 40 |   some_number is 30 => baz
 41 | }
 42 | ```
 43 | 
 44 | Here I used the made-up syntax `x is y` to indicate the column tests against the
 45 | variable `some_number`, and the pattern tested is e.g. `10`.
 46 | 
 47 | Next, we need to get rid of variable patterns. This is done by pushing them into
 48 | the right-hand side (= the code to run upon a match) of each case. This means we
 49 | transform this expression:
 50 | 
 51 | 
 52 | ```rust
 53 | match {
 54 |     some_number is 10 => foo,
 55 |     some_number is num => bar
 56 | }
 57 | ```
 58 | 
 59 | Into this:
 60 | 
 61 | ```rust
 62 | match {
 63 |     some_number is 10 => foo,
 64 |     // I'm using "∅" here to signal a row without any columns.
 65 |     ∅ => {
 66 |         let num = some_number;
 67 |         bar
 68 |     }
 69 | }
 70 | ```
 71 | 
 72 | The article explains this makes things easier, though it doesn't really say
 73 | clearly why. The reason for this is as follows:
 74 | 
 75 | 1. It reduces the amount of duplication in the resulting decision tree, as we
 76 |    don't need to branch for variable and wildcard patterns.
 77 | 1. It means variable patterns don't influence branching decisions discussed
 78 |    below.
 79 | 1. When we branch on columns (again, discussed below), we can just forget about
 80 |    variable patterns.
 81 | 
 82 | Essentially it takes the following steps:
 83 | 
 84 | 1. Each right-hand side can store zero or more variables to define _before_
 85 |    running the code.
 86 | 1. Iterate over the columns in a row.
 87 | 1. If the column is a variable pattern, copy/move the variable into the
 88 |    right-hand side's variable list.
 89 | 1. Return a new row that only includes non-variable columns.
 90 | 
 91 | The implementation handles this in the method `move_variable_patterns`.
 92 | 
 93 | Now we need to decide what column to branch on. In practise it probably won't
 94 | matter much which strategy is used, so the algorithm takes a simple approach: it
 95 | takes the columns of the first row, and for every column counts how many times
 96 | the variable tested against is tested against across all columns in all rows. It
 97 | then returns the column of which the variable is tested against the most. The
 98 | implementation of this is in method `branch_variable`
 99 | 
100 | Now that we know what variable/column to branch on, we can generate the
101 | necessary branches and sub trees. The article only covers simple constructor
102 | patterns, but my implementation also handles integer literals, booleans, and
103 | more. The exact approach differs a bit and I recommend studying the Rust code to
104 | get a better understanding, but it roughly works as follows:
105 | 
106 | 1. Create an array containing triples in the form
107 |   `(constructor, arguments, rows)`. In this triple `constructor` is the
108 |    constructor we're testing against, `arguments` is a list of variables exposed
109 |    to the sub tree, and `rows` is the list of rows to compile for this test.
110 |    The `arguments` array is filled with one variable for every argument.
111 | 1. Iterate over all the current rows.
112 | 1. Obtain the column index of the branching variable.
113 | 1. If we found an index (remember that a now doesn't have to contain any columns
114 |    testing the branching variable), use it to remove the column from the row.
115 | 1. Determine the index of the constructor in the array created in step 1. For
116 |    ADTs you'd use the tag values, for booleans you could use 0 and 1 for false
117 |    and true respectively, etc.
118 | 1. Zip the pattern arguments (also patterns) with the values in the `arguments`
119 |    array from the triple for this constructor, and create a new column for every
120 |    resulting pair.
121 | 1. Create a new row containing the old columns (minus the one we removed
122 |    earlier), the new columns (created in the previous step), and the body of the
123 |    row. Push this row into the `rows` array for our constructor.
124 | 1. If in step 3 we didn't find an index, copy the row into the `rows` array for
125 |    every triple in the array created in step 1.
126 | 1. Finally, for every triple created in step 1 (and populated in later steps),
127 |    create a Switch node for our decision tree. The constructor and arguments are
128 |    stored in this Switch node, and the rows are compiled into a sub tree.
129 | 
130 | This is a lot to take in, so I recommend taking a look at the following methods:
131 | 
132 | - `compile_rows`
133 | - `compile_constructor_cases`
134 | 
135 | The output of all this is a decision tree, with three possible nodes: Success,
136 | Failure, and Switch (see the `Decision` type). A "Failure" node indicates a
137 | pattern that didn't match, and is used to check for exhaustiveness. In my
138 | implementation I opted to check for exhaustiveness separately, as this saves us
139 | from having to manage some extra data structures until we actually need them.
140 | The implementation works as follows:
141 | 
142 | When we produce a "Failure" node, a "missing" flag is set to `true`. After
143 | compiling our decision tree, we check this flag. If set to `true`, the method
144 | `Match::missing_patterns` is used to produce a list of patterns to add to make
145 | the match exhaustive.
146 | 
147 | The implementation of this method is a bit messy in my opinion, but it's the
148 | best I could come up with at this time. The implementation essentially maintains
149 | a stack of "terms" (I couldn't come up with a better name), each describing a
150 | test and its arguments in the tree. These terms also store the variables tested
151 | against, which combined with the names is used to (recursively) reconstruct a
152 | pattern name.
153 | 
154 | Checking for redundant patterns is easy: when reaching a "Success" node you'd
155 | somehow mark the right-hand side as processed. In my case I just store an
156 | integer value in an array. At the end you check for any right-hand sides that
157 | aren't marked, or in my case you check if any of their values are not in the
158 | array.
159 | 
160 | This about sums up how the algorithm works. Don't worry if the above wall of
161 | text hurts your head, it took me about two weeks to understand it. My advice is
162 | to read the article from Jules, then read this README, then take a look at the
163 | code and corresponding tests.
164 | 
165 | ## OR patterns
166 | 
167 | OR patterns are not covered in the article. To support these patterns we have to
168 | take rows containing OR patterns in any columns, then expand those OR patterns
169 | into separate rows. The code here handles this in the `expand_or_patterns()`
170 | function. This function is called _before_ pushing variable/wildcard patterns
171 | out of the rows, ensuring that OR patterns containing these patterns work as
172 | expected.
173 | 
174 | **NOTE:** a previous implementation used a `flatten_or` method called, with a
175 | different implementation. This implementation proved incorrect as it failed to
176 | handle bindings in OR patterns (e.g. `10 or number`).
177 | 
178 | ## Range patterns
179 | 
180 | Range patterns are handled using a `Range` constructor
181 | (`Constructor::Range(start, stop)`), produced when matching against integer
182 | types only (meaning we only support integer ranges). Just like regular integers
183 | we assume ranges are of infinite length, so a variable pattern is needed to make
184 | the match exhaustive.
185 | 
186 | ## Guards
187 | 
188 | Guards are supported as follows: each `Row` has a guard field, storing a
189 | `Option<usize>`, where the `usize` is just a dummy value for the guard; normally
190 | this would be (for example) an AST node to evaluate/lower. When we are about to
191 | produce a Success node for a row, we check if it defines a guard. If so, all
192 | remaining rows are compiled into the guard's fallback tree.
193 | 


--------------------------------------------------------------------------------
/jacobs2021/src/lib.rs:
--------------------------------------------------------------------------------
   1 | #![allow(clippy::new_without_default)]
   2 | 
   3 | //! An implementation of the algorithm described at
   4 | //! https://julesjacobs.com/notes/patternmatching/patternmatching.pdf.
   5 | use std::collections::{HashMap, HashSet};
   6 | 
   7 | /// The body of code to evaluate in case of a match.
   8 | #[derive(Clone, Eq, PartialEq, Debug)]
   9 | pub struct Body {
  10 |     /// Any variables to bind before running the code.
  11 |     ///
  12 |     /// The tuples are in the form `(name, source)` (i.e `bla = source`).
  13 |     bindings: Vec<(String, Variable)>,
  14 | 
  15 |     /// The "code" to run in case of a match.
  16 |     ///
  17 |     /// We just use an integer for the sake of simplicity, but normally this
  18 |     /// would be an AST node, or perhaps an index to an array of AST nodes.
  19 |     value: usize,
  20 | }
  21 | 
  22 | /// A type constructor.
  23 | #[derive(Debug, Clone, Eq, PartialEq)]
  24 | pub enum Constructor {
  25 |     True,
  26 |     False,
  27 |     Int(i64),
  28 |     Pair(TypeId, TypeId),
  29 |     Variant(TypeId, usize),
  30 |     Range(i64, i64),
  31 | }
  32 | 
  33 | impl Constructor {
  34 |     /// Returns the index of this constructor relative to its type.
  35 |     fn index(&self) -> usize {
  36 |         match self {
  37 |             Constructor::False
  38 |             | Constructor::Int(_)
  39 |             | Constructor::Pair(_, _)
  40 |             | Constructor::Range(_, _) => 0,
  41 |             Constructor::True => 1,
  42 |             Constructor::Variant(_, index) => *index,
  43 |         }
  44 |     }
  45 | }
  46 | 
  47 | /// Expands rows containing OR patterns into individual rows, such that each
  48 | /// branch in the OR produces its own row.
  49 | ///
  50 | /// For each column that tests against an OR pattern, each sub pattern is
  51 | /// translated into a new row. This work repeats itself until no more OR
  52 | /// patterns remain in the rows.
  53 | ///
  54 | /// The implementation here is probably not as fast as it can be. Instead, it's
  55 | /// optimized for ease of maintenance and readability.
  56 | fn expand_or_patterns(rows: &mut Vec<Row>) {
  57 |     // If none of the rows contain any OR patterns, we can avoid the below work
  58 |     // loop, saving some allocations and time.
  59 |     if !rows
  60 |         .iter()
  61 |         .any(|r| r.columns.iter().any(|c| matches!(c.pattern, Pattern::Or(_))))
  62 |     {
  63 |         return;
  64 |     }
  65 | 
  66 |     // The implementation uses two Vecs: the original one, and a temporary one
  67 |     // we push newly created rows into. After processing all rows we swap the
  68 |     // two, repeating this process until we no longer find any OR patterns.
  69 |     let mut new_rows = Vec::with_capacity(rows.len());
  70 |     let mut found = true;
  71 | 
  72 |     while found {
  73 |         found = false;
  74 | 
  75 |         for row in rows.drain(0..) {
  76 |             // Find the first column containing an OR pattern. We process this
  77 |             // one column at a time, as that's (much) easier to implement
  78 |             // compared to handling all columns at once (as multiple columns may
  79 |             // contain OR patterns).
  80 |             let res = row.columns.iter().enumerate().find_map(|(idx, col)| {
  81 |                 if let Pattern::Or(pats) = &col.pattern {
  82 |                     Some((idx, col.variable, pats))
  83 |                 } else {
  84 |                     None
  85 |                 }
  86 |             });
  87 | 
  88 |             if let Some((idx, var, pats)) = res {
  89 |                 found = true;
  90 | 
  91 |                 // This creates a new row for each branch in the OR pattern.
  92 |                 // Other columns are left as-is. If such columns contain OR
  93 |                 // patterns themselves, we'll expand them in a future iteration
  94 |                 // of the surrounding `while` loop.
  95 |                 for pat in pats {
  96 |                     let mut new_row = row.clone();
  97 | 
  98 |                     new_row.columns[idx] = Column::new(var, pat.clone());
  99 |                     new_rows.push(new_row);
 100 |                 }
 101 |             } else {
 102 |                 new_rows.push(row);
 103 |             }
 104 |         }
 105 | 
 106 |         std::mem::swap(rows, &mut new_rows);
 107 |     }
 108 | }
 109 | 
 110 | /// A user defined pattern such as `Some((x, 10))`.
 111 | #[derive(Clone, Eq, PartialEq, Debug)]
 112 | pub enum Pattern {
 113 |     /// A pattern such as `Some(42)`.
 114 |     Constructor(Constructor, Vec<Pattern>),
 115 |     Int(i64),
 116 |     Binding(String),
 117 |     Or(Vec<Pattern>),
 118 |     Range(i64, i64),
 119 | }
 120 | 
 121 | /// A representation of a type.
 122 | ///
 123 | /// In a real compiler this would probably be a more complicated structure, but
 124 | /// for the sake of simplicity we limit ourselves to a few basic types.
 125 | #[derive(Clone)]
 126 | pub enum Type {
 127 |     Int,
 128 |     Boolean,
 129 |     Pair(TypeId, TypeId),
 130 |     Enum(Vec<(String, Vec<TypeId>)>),
 131 | }
 132 | 
 133 | /// A unique ID to a type.
 134 | ///
 135 | /// In a real compiler this may just be a regular pointer, or an ID value like
 136 | /// this.
 137 | #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)]
 138 | pub struct TypeId(usize);
 139 | 
 140 | /// A variable used in a match expression.
 141 | ///
 142 | /// In a real compiler these would probably be registers or some other kind of
 143 | /// variable/temporary generated by your compiler.
 144 | #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)]
 145 | pub struct Variable {
 146 |     id: usize,
 147 |     type_id: TypeId,
 148 | }
 149 | 
 150 | /// A single case (or row) in a match expression/table.
 151 | #[derive(Clone, Eq, PartialEq, Debug)]
 152 | pub struct Row {
 153 |     columns: Vec<Column>,
 154 |     guard: Option<usize>,
 155 |     body: Body,
 156 | }
 157 | 
 158 | impl Row {
 159 |     fn new(columns: Vec<Column>, guard: Option<usize>, body: Body) -> Self {
 160 |         Self { columns, guard, body }
 161 |     }
 162 | 
 163 |     fn remove_column(&mut self, variable: &Variable) -> Option<Column> {
 164 |         self.columns
 165 |             .iter()
 166 |             .position(|c| &c.variable == variable)
 167 |             .map(|idx| self.columns.remove(idx))
 168 |     }
 169 | }
 170 | 
 171 | /// A column in a pattern matching table.
 172 | ///
 173 | /// A column contains a single variable to test, and a pattern to test against
 174 | /// that variable. A row may contain multiple columns, though this wouldn't be
 175 | /// exposed to the source language (= it's an implementation detail).
 176 | #[derive(Clone, Eq, PartialEq, Debug)]
 177 | pub struct Column {
 178 |     variable: Variable,
 179 |     pattern: Pattern,
 180 | }
 181 | 
 182 | impl Column {
 183 |     fn new(variable: Variable, pattern: Pattern) -> Self {
 184 |         Self { variable, pattern }
 185 |     }
 186 | }
 187 | 
 188 | /// A case in a decision tree to test against a variable.
 189 | #[derive(Eq, PartialEq, Debug)]
 190 | pub struct Case {
 191 |     /// The constructor to test against an input variable.
 192 |     constructor: Constructor,
 193 | 
 194 |     /// Variables to introduce to the body of this case.
 195 |     ///
 196 |     /// At runtime these would be populated with the values a pattern is matched
 197 |     /// against. For example, this pattern:
 198 |     ///
 199 |     ///     case (10, 20, foo) -> ...
 200 |     ///
 201 |     /// Would result in three arguments, assigned the values `10`, `20` and
 202 |     /// `foo`.
 203 |     ///
 204 |     /// In a real compiler you'd assign these variables in your IR first, then
 205 |     /// generate the code for the sub tree.
 206 |     arguments: Vec<Variable>,
 207 | 
 208 |     /// The sub tree of this case.
 209 |     body: Decision,
 210 | }
 211 | 
 212 | impl Case {
 213 |     fn new(
 214 |         constructor: Constructor,
 215 |         arguments: Vec<Variable>,
 216 |         body: Decision,
 217 |     ) -> Self {
 218 |         Self { constructor, arguments, body }
 219 |     }
 220 | }
 221 | 
 222 | /// A decision tree compiled from a list of match cases.
 223 | #[derive(Eq, PartialEq, Debug)]
 224 | pub enum Decision {
 225 |     /// A pattern is matched and the right-hand value is to be returned.
 226 |     Success(Body),
 227 | 
 228 |     /// A pattern is missing.
 229 |     Failure,
 230 | 
 231 |     /// Checks if a guard evaluates to true, running the body if it does.
 232 |     ///
 233 |     /// The arguments are as follows:
 234 |     ///
 235 |     /// 1. The "condition" to evaluate. We just use a dummy value, but in a real
 236 |     ///    compiler this would likely be an AST node of sorts.
 237 |     /// 2. The body to evaluate if the guard matches.
 238 |     /// 3. The sub tree to evaluate when the guard fails.
 239 |     Guard(usize, Body, Box<Decision>),
 240 | 
 241 |     /// Checks if a value is any of the given patterns.
 242 |     ///
 243 |     /// The values are as follows:
 244 |     ///
 245 |     /// 1. The variable to test.
 246 |     /// 2. The cases to test against this variable.
 247 |     /// 3. A fallback decision to take, in case none of the cases matched.
 248 |     Switch(Variable, Vec<Case>, Option<Box<Decision>>),
 249 | }
 250 | 
 251 | /// A type for storing diagnostics produced by the decision tree compiler.
 252 | pub struct Diagnostics {
 253 |     /// A flag indicating the match is missing one or more pattern.
 254 |     missing: bool,
 255 | 
 256 |     /// The right-hand sides that are reachable.
 257 |     ///
 258 |     /// If a right-hand side isn't in this list it means its pattern is
 259 |     /// redundant.
 260 |     reachable: Vec<usize>,
 261 | }
 262 | 
 263 | /// The result of compiling a pattern match expression.
 264 | pub struct Match {
 265 |     pub types: Vec<Type>,
 266 |     pub tree: Decision,
 267 |     pub diagnostics: Diagnostics,
 268 | }
 269 | 
 270 | /// Information about a single constructor/value (aka term) being tested, used
 271 | /// to build a list of names of missing patterns.
 272 | #[derive(Debug)]
 273 | struct Term {
 274 |     variable: Variable,
 275 |     name: String,
 276 |     arguments: Vec<Variable>,
 277 | }
 278 | 
 279 | impl Term {
 280 |     fn new(variable: Variable, name: String, arguments: Vec<Variable>) -> Self {
 281 |         Self { variable, name, arguments }
 282 |     }
 283 | 
 284 |     fn pattern_name(
 285 |         &self,
 286 |         terms: &[Term],
 287 |         mapping: &HashMap<&Variable, usize>,
 288 |     ) -> String {
 289 |         if self.arguments.is_empty() {
 290 |             self.name.to_string()
 291 |         } else {
 292 |             let args = self
 293 |                 .arguments
 294 |                 .iter()
 295 |                 .map(|arg| {
 296 |                     mapping
 297 |                         .get(arg)
 298 |                         .map(|&idx| terms[idx].pattern_name(terms, mapping))
 299 |                         .unwrap_or_else(|| "_".to_string())
 300 |                 })
 301 |                 .collect::<Vec<_>>()
 302 |                 .join(", ");
 303 | 
 304 |             format!("{}({})", self.name, args)
 305 |         }
 306 |     }
 307 | }
 308 | 
 309 | impl Match {
 310 |     /// Returns a list of patterns not covered by the match expression.
 311 |     pub fn missing_patterns(&self) -> Vec<String> {
 312 |         let mut names = HashSet::new();
 313 |         let mut steps = Vec::new();
 314 | 
 315 |         self.add_missing_patterns(&self.tree, &mut steps, &mut names);
 316 | 
 317 |         let mut missing: Vec<String> = names.into_iter().collect();
 318 | 
 319 |         // Sorting isn't necessary, but it makes it a bit easier to write tests.
 320 |         missing.sort();
 321 |         missing
 322 |     }
 323 | 
 324 |     fn add_missing_patterns(
 325 |         &self,
 326 |         node: &Decision,
 327 |         terms: &mut Vec<Term>,
 328 |         missing: &mut HashSet<String>,
 329 |     ) {
 330 |         match node {
 331 |             Decision::Success(_) => {}
 332 |             Decision::Failure => {
 333 |                 let mut mapping = HashMap::new();
 334 | 
 335 |                 // At this point the terms stack looks something like this:
 336 |                 // `[term, term + arguments, term, ...]`. To construct a pattern
 337 |                 // name from this stack, we first map all variables to their
 338 |                 // term indexes. This is needed because when a term defines
 339 |                 // arguments, the terms for those arguments don't necessarily
 340 |                 // appear in order in the term stack.
 341 |                 //
 342 |                 // This mapping is then used when (recursively) generating a
 343 |                 // pattern name.
 344 |                 //
 345 |                 // This approach could probably be done more efficiently, so if
 346 |                 // you're reading this and happen to know of a way, please
 347 |                 // submit a merge request :)
 348 |                 for (index, step) in terms.iter().enumerate() {
 349 |                     mapping.insert(&step.variable, index);
 350 |                 }
 351 | 
 352 |                 let name = terms
 353 |                     .first()
 354 |                     .map(|term| term.pattern_name(terms, &mapping))
 355 |                     .unwrap_or_else(|| "_".to_string());
 356 | 
 357 |                 missing.insert(name);
 358 |             }
 359 |             Decision::Guard(_, _, fallback) => {
 360 |                 self.add_missing_patterns(fallback, terms, missing);
 361 |             }
 362 |             Decision::Switch(var, cases, fallback) => {
 363 |                 for case in cases {
 364 |                     match &case.constructor {
 365 |                         Constructor::True => {
 366 |                             let name = "true".to_string();
 367 | 
 368 |                             terms.push(Term::new(*var, name, Vec::new()));
 369 |                         }
 370 |                         Constructor::False => {
 371 |                             let name = "false".to_string();
 372 | 
 373 |                             terms.push(Term::new(*var, name, Vec::new()));
 374 |                         }
 375 |                         Constructor::Int(_) | Constructor::Range(_, _) => {
 376 |                             let name = "_".to_string();
 377 | 
 378 |                             terms.push(Term::new(*var, name, Vec::new()));
 379 |                         }
 380 |                         Constructor::Pair(_, _) => {
 381 |                             let args = case.arguments.clone();
 382 | 
 383 |                             terms.push(Term::new(*var, String::new(), args));
 384 |                         }
 385 |                         Constructor::Variant(typ, idx) => {
 386 |                             let args = case.arguments.clone();
 387 |                             let name = if let Type::Enum(variants) =
 388 |                                 &self.types[typ.0]
 389 |                             {
 390 |                                 variants[*idx].0.clone()
 391 |                             } else {
 392 |                                 unreachable!()
 393 |                             };
 394 | 
 395 |                             terms.push(Term::new(*var, name, args));
 396 |                         }
 397 |                     }
 398 | 
 399 |                     self.add_missing_patterns(&case.body, terms, missing);
 400 |                     terms.pop();
 401 |                 }
 402 | 
 403 |                 if let Some(node) = fallback {
 404 |                     self.add_missing_patterns(node, terms, missing);
 405 |                 }
 406 |             }
 407 |         }
 408 |     }
 409 | }
 410 | 
 411 | /// The `match` compiler itself (shocking, I know).
 412 | pub struct Compiler {
 413 |     variable_id: usize,
 414 |     types: Vec<Type>,
 415 |     diagnostics: Diagnostics,
 416 | }
 417 | 
 418 | impl Compiler {
 419 |     pub fn new() -> Self {
 420 |         Self {
 421 |             variable_id: 0,
 422 |             types: Vec::new(),
 423 |             diagnostics: Diagnostics { missing: false, reachable: Vec::new() },
 424 |         }
 425 |     }
 426 | 
 427 |     pub fn compile(mut self, rows: Vec<Row>) -> Match {
 428 |         Match {
 429 |             tree: self.compile_rows(rows),
 430 |             diagnostics: self.diagnostics,
 431 |             types: self.types,
 432 |         }
 433 |     }
 434 | 
 435 |     fn compile_rows(&mut self, mut rows: Vec<Row>) -> Decision {
 436 |         if rows.is_empty() {
 437 |             self.diagnostics.missing = true;
 438 | 
 439 |             return Decision::Failure;
 440 |         }
 441 | 
 442 |         expand_or_patterns(&mut rows);
 443 | 
 444 |         for row in &mut rows {
 445 |             self.move_variable_patterns(row);
 446 |         }
 447 | 
 448 |         // There may be multiple rows, but if the first one has no patterns
 449 |         // those extra rows are redundant, as a row without columns/patterns
 450 |         // always matches.
 451 |         if rows.first().map_or(false, |c| c.columns.is_empty()) {
 452 |             let row = rows.remove(0);
 453 | 
 454 |             self.diagnostics.reachable.push(row.body.value);
 455 | 
 456 |             return if let Some(guard) = row.guard {
 457 |                 Decision::Guard(
 458 |                     guard,
 459 |                     row.body,
 460 |                     Box::new(self.compile_rows(rows)),
 461 |                 )
 462 |             } else {
 463 |                 Decision::Success(row.body)
 464 |             };
 465 |         }
 466 | 
 467 |         let branch_var = self.branch_variable(&rows);
 468 | 
 469 |         match self.variable_type(branch_var).clone() {
 470 |             Type::Int => {
 471 |                 let (cases, fallback) =
 472 |                     self.compile_int_cases(rows, branch_var);
 473 | 
 474 |                 Decision::Switch(branch_var, cases, Some(fallback))
 475 |             }
 476 |             Type::Boolean => {
 477 |                 let cases = vec![
 478 |                     (Constructor::False, Vec::new(), Vec::new()),
 479 |                     (Constructor::True, Vec::new(), Vec::new()),
 480 |                 ];
 481 | 
 482 |                 Decision::Switch(
 483 |                     branch_var,
 484 |                     self.compile_constructor_cases(rows, branch_var, cases),
 485 |                     None,
 486 |                 )
 487 |             }
 488 |             Type::Pair(typ1, typ2) => {
 489 |                 let cases = vec![(
 490 |                     Constructor::Pair(typ1, typ2),
 491 |                     self.new_variables(&[typ1, typ2]),
 492 |                     Vec::new(),
 493 |                 )];
 494 | 
 495 |                 Decision::Switch(
 496 |                     branch_var,
 497 |                     self.compile_constructor_cases(rows, branch_var, cases),
 498 |                     None,
 499 |                 )
 500 |             }
 501 |             Type::Enum(variants) => {
 502 |                 let cases = variants
 503 |                     .iter()
 504 |                     .enumerate()
 505 |                     .map(|(idx, (_, args))| {
 506 |                         (
 507 |                             Constructor::Variant(branch_var.type_id, idx),
 508 |                             self.new_variables(args),
 509 |                             Vec::new(),
 510 |                         )
 511 |                     })
 512 |                     .collect();
 513 | 
 514 |                 Decision::Switch(
 515 |                     branch_var,
 516 |                     self.compile_constructor_cases(rows, branch_var, cases),
 517 |                     None,
 518 |                 )
 519 |             }
 520 |         }
 521 |     }
 522 | 
 523 |     /// Compiles the cases and fallback cases for integer and range patterns.
 524 |     ///
 525 |     /// Integers have an infinite number of constructors, so we specialise the
 526 |     /// compilation of integer and range patterns.
 527 |     fn compile_int_cases(
 528 |         &mut self,
 529 |         rows: Vec<Row>,
 530 |         branch_var: Variable,
 531 |     ) -> (Vec<Case>, Box<Decision>) {
 532 |         let mut raw_cases: Vec<(Constructor, Vec<Variable>, Vec<Row>)> =
 533 |             Vec::new();
 534 |         let mut fallback_rows = Vec::new();
 535 |         let mut tested: HashMap<(i64, i64), usize> = HashMap::new();
 536 | 
 537 |         for mut row in rows {
 538 |             if let Some(col) = row.remove_column(&branch_var) {
 539 |                 let (key, cons) = match col.pattern {
 540 |                     Pattern::Int(val) => ((val, val), Constructor::Int(val)),
 541 |                     Pattern::Range(start, stop) => {
 542 |                         ((start, stop), Constructor::Range(start, stop))
 543 |                     }
 544 |                     _ => unreachable!(),
 545 |                 };
 546 | 
 547 |                 if let Some(index) = tested.get(&key) {
 548 |                     raw_cases[*index].2.push(row);
 549 |                     continue;
 550 |                 }
 551 | 
 552 |                 tested.insert(key, raw_cases.len());
 553 | 
 554 |                 let mut rows = fallback_rows.clone();
 555 | 
 556 |                 rows.push(row);
 557 |                 raw_cases.push((cons, Vec::new(), rows));
 558 |             } else {
 559 |                 for (_, _, rows) in &mut raw_cases {
 560 |                     rows.push(row.clone());
 561 |                 }
 562 | 
 563 |                 fallback_rows.push(row);
 564 |             }
 565 |         }
 566 | 
 567 |         let cases = raw_cases
 568 |             .into_iter()
 569 |             .map(|(cons, vars, rows)| {
 570 |                 Case::new(cons, vars, self.compile_rows(rows))
 571 |             })
 572 |             .collect();
 573 | 
 574 |         (cases, Box::new(self.compile_rows(fallback_rows)))
 575 |     }
 576 | 
 577 |     /// Compiles the cases and sub cases for the constructor located at the
 578 |     /// column of the branching variable.
 579 |     ///
 580 |     /// What exactly this method does may be a bit hard to understand from the
 581 |     /// code, as there's simply quite a bit going on. Roughly speaking, it does
 582 |     /// the following:
 583 |     ///
 584 |     /// 1. It takes the column we're branching on (based on the branching
 585 |     ///    variable) and removes it from every row.
 586 |     /// 2. We add additional columns to this row, if the constructor takes any
 587 |     ///    arguments (which we'll handle in a nested match).
 588 |     /// 3. We turn the resulting list of rows into a list of cases, then compile
 589 |     ///    those into decision (sub) trees.
 590 |     ///
 591 |     /// If a row didn't include the branching variable, we simply copy that row
 592 |     /// into the list of rows for every constructor to test.
 593 |     ///
 594 |     /// For this to work, the `cases` variable must be prepared such that it has
 595 |     /// a triple for every constructor we need to handle. For an ADT with 10
 596 |     /// constructors, that means 10 triples. This is needed so this method can
 597 |     /// assign the correct sub matches to these constructors.
 598 |     ///
 599 |     /// Types with infinite constructors (e.g. int and string) are handled
 600 |     /// separately; they don't need most of this work anyway.
 601 |     fn compile_constructor_cases(
 602 |         &mut self,
 603 |         rows: Vec<Row>,
 604 |         branch_var: Variable,
 605 |         mut cases: Vec<(Constructor, Vec<Variable>, Vec<Row>)>,
 606 |     ) -> Vec<Case> {
 607 |         for mut row in rows {
 608 |             if let Some(col) = row.remove_column(&branch_var) {
 609 |                 if let Pattern::Constructor(cons, args) = col.pattern {
 610 |                     let idx = cons.index();
 611 |                     let mut cols = row.columns;
 612 | 
 613 |                     for (var, pat) in cases[idx].1.iter().zip(args.into_iter())
 614 |                     {
 615 |                         cols.push(Column::new(*var, pat));
 616 |                     }
 617 | 
 618 |                     cases[idx].2.push(Row::new(cols, row.guard, row.body));
 619 |                 }
 620 |             } else {
 621 |                 for (_, _, rows) in &mut cases {
 622 |                     rows.push(row.clone());
 623 |                 }
 624 |             }
 625 |         }
 626 | 
 627 |         cases
 628 |             .into_iter()
 629 |             .map(|(cons, vars, rows)| {
 630 |                 Case::new(cons, vars, self.compile_rows(rows))
 631 |             })
 632 |             .collect()
 633 |     }
 634 | 
 635 |     /// Moves variable-only patterns/tests into the right-hand side/body of a
 636 |     /// case.
 637 |     ///
 638 |     /// This turns cases like this:
 639 |     ///
 640 |     ///     case foo -> print(foo)
 641 |     ///
 642 |     /// Into this:
 643 |     ///
 644 |     ///     case -> {
 645 |     ///       let foo = it
 646 |     ///       print(foo)
 647 |     ///     }
 648 |     ///
 649 |     /// Where `it` is a variable holding the value `case foo` is compared
 650 |     /// against, and the case/row has no patterns (i.e. always matches).
 651 |     fn move_variable_patterns(&self, row: &mut Row) {
 652 |         row.columns.retain(|col| {
 653 |             if let Pattern::Binding(bind) = &col.pattern {
 654 |                 row.body.bindings.push((bind.clone(), col.variable));
 655 |                 false
 656 |             } else {
 657 |                 true
 658 |             }
 659 |         });
 660 |     }
 661 | 
 662 |     /// Given a row, returns the variable in that row that's referred to the
 663 |     /// most across all rows.
 664 |     fn branch_variable(&self, rows: &[Row]) -> Variable {
 665 |         let mut counts = HashMap::new();
 666 | 
 667 |         for row in rows {
 668 |             for col in &row.columns {
 669 |                 *counts.entry(&col.variable).or_insert(0_usize) += 1
 670 |             }
 671 |         }
 672 | 
 673 |         rows[0]
 674 |             .columns
 675 |             .iter()
 676 |             .map(|col| col.variable)
 677 |             .max_by_key(|var| counts[var])
 678 |             .unwrap()
 679 |     }
 680 | 
 681 |     /// Returns a new variable to use in the decision tree.
 682 |     ///
 683 |     /// In a real compiler you'd have to ensure these variables don't conflict
 684 |     /// with other variables.
 685 |     fn new_variable(&mut self, type_id: TypeId) -> Variable {
 686 |         let var = Variable { id: self.variable_id, type_id };
 687 | 
 688 |         self.variable_id += 1;
 689 |         var
 690 |     }
 691 | 
 692 |     fn new_variables(&mut self, type_ids: &[TypeId]) -> Vec<Variable> {
 693 |         type_ids.iter().map(|t| self.new_variable(*t)).collect()
 694 |     }
 695 | 
 696 |     /// Returns the type of a given variable.
 697 |     ///
 698 |     /// In a real compiler the implementation of this would likely be quite
 699 |     /// different, depending on how your type system is implemented.
 700 |     ///
 701 |     /// For the sake of simplicity, we just store types in a Vec and retrieve
 702 |     /// them here according to the variable's type ID.
 703 |     fn variable_type(&self, id: Variable) -> &Type {
 704 |         &self.types[id.type_id.0]
 705 |     }
 706 | }
 707 | 
 708 | #[cfg(test)]
 709 | mod tests {
 710 |     use super::*;
 711 |     use similar_asserts::assert_eq;
 712 | 
 713 |     fn new_type(compiler: &mut Compiler, typ: Type) -> TypeId {
 714 |         let id = compiler.types.len();
 715 | 
 716 |         compiler.types.push(typ);
 717 |         TypeId(id)
 718 |     }
 719 | 
 720 |     fn tt() -> Pattern {
 721 |         Pattern::Constructor(Constructor::True, Vec::new())
 722 |     }
 723 | 
 724 |     fn ff() -> Pattern {
 725 |         Pattern::Constructor(Constructor::False, Vec::new())
 726 |     }
 727 | 
 728 |     fn bind(name: &str) -> Pattern {
 729 |         Pattern::Binding(name.to_string())
 730 |     }
 731 | 
 732 |     fn variant(typ: TypeId, index: usize, args: Vec<Pattern>) -> Pattern {
 733 |         Pattern::Constructor(Constructor::Variant(typ, index), args)
 734 |     }
 735 | 
 736 |     fn pair(
 737 |         typ1: TypeId,
 738 |         typ2: TypeId,
 739 |         pat1: Pattern,
 740 |         pat2: Pattern,
 741 |     ) -> Pattern {
 742 |         Pattern::Constructor(Constructor::Pair(typ1, typ2), vec![pat1, pat2])
 743 |     }
 744 | 
 745 |     fn int(val: i64) -> Pattern {
 746 |         Pattern::Int(val)
 747 |     }
 748 | 
 749 |     fn rhs(value: usize) -> Body {
 750 |         Body { bindings: Vec::new(), value }
 751 |     }
 752 | 
 753 |     fn var(id: usize, type_id: TypeId) -> Variable {
 754 |         Variable { id, type_id }
 755 |     }
 756 | 
 757 |     fn compile(
 758 |         compiler: Compiler,
 759 |         input: Variable,
 760 |         rules: Vec<(Pattern, Body)>,
 761 |     ) -> Match {
 762 |         let rows = rules
 763 |             .into_iter()
 764 |             .map(|(pat, body)| {
 765 |                 Row::new(vec![Column::new(input, pat)], None, body)
 766 |             })
 767 |             .collect();
 768 | 
 769 |         compiler.compile(rows)
 770 |     }
 771 | 
 772 |     fn failure() -> Decision {
 773 |         Decision::Failure
 774 |     }
 775 | 
 776 |     fn success(value: usize) -> Decision {
 777 |         Decision::Success(Body { bindings: Vec::new(), value })
 778 |     }
 779 | 
 780 |     fn success_with_bindings(
 781 |         bindings: Vec<(&str, Variable)>,
 782 |         value: usize,
 783 |     ) -> Decision {
 784 |         Decision::Success(Body {
 785 |             bindings: bindings
 786 |                 .into_iter()
 787 |                 .map(|(n, v)| (n.to_string(), v))
 788 |                 .collect(),
 789 |             value,
 790 |         })
 791 |     }
 792 | 
 793 |     #[test]
 794 |     fn test_move_variable_patterns() {
 795 |         let mut compiler = Compiler::new();
 796 |         let typ = new_type(&mut compiler, Type::Boolean);
 797 |         let var1 = compiler.new_variable(typ);
 798 |         let var2 = compiler.new_variable(typ);
 799 |         let cons = Constructor::True;
 800 |         let mut row = Row {
 801 |             columns: vec![
 802 |                 Column::new(var2, bind("a")),
 803 |                 Column::new(
 804 |                     var1,
 805 |                     Pattern::Constructor(cons.clone(), Vec::new()),
 806 |                 ),
 807 |             ],
 808 |             guard: None,
 809 |             body: Body { bindings: Vec::new(), value: 42 },
 810 |         };
 811 | 
 812 |         compiler.move_variable_patterns(&mut row);
 813 | 
 814 |         assert_eq!(
 815 |             row,
 816 |             Row {
 817 |                 columns: vec![Column::new(
 818 |                     var1,
 819 |                     Pattern::Constructor(cons, Vec::new())
 820 |                 )],
 821 |                 guard: None,
 822 |                 body: Body {
 823 |                     bindings: vec![("a".to_string(), var2)],
 824 |                     value: 42
 825 |                 }
 826 |             }
 827 |         );
 828 |     }
 829 | 
 830 |     #[test]
 831 |     fn test_move_variable_patterns_without_constructor_pattern() {
 832 |         let mut compiler = Compiler::new();
 833 |         let typ = new_type(&mut compiler, Type::Boolean);
 834 |         let var1 = compiler.new_variable(typ);
 835 |         let mut row = Row {
 836 |             columns: vec![Column::new(var1, bind("a"))],
 837 |             guard: None,
 838 |             body: Body { bindings: Vec::new(), value: 42 },
 839 |         };
 840 | 
 841 |         compiler.move_variable_patterns(&mut row);
 842 | 
 843 |         assert_eq!(
 844 |             row,
 845 |             Row {
 846 |                 columns: Vec::new(),
 847 |                 guard: None,
 848 |                 body: Body {
 849 |                     bindings: vec![("a".to_string(), var1)],
 850 |                     value: 42
 851 |                 }
 852 |             }
 853 |         );
 854 |     }
 855 | 
 856 |     #[test]
 857 |     fn test_branch_variable() {
 858 |         let mut compiler = Compiler::new();
 859 |         let typ = new_type(&mut compiler, Type::Boolean);
 860 |         let var1 = compiler.new_variable(typ);
 861 |         let var2 = compiler.new_variable(typ);
 862 |         let rows = vec![
 863 |             Row::new(
 864 |                 vec![
 865 |                     Column::new(var1, Pattern::Int(42)),
 866 |                     Column::new(var2, Pattern::Int(50)),
 867 |                 ],
 868 |                 None,
 869 |                 rhs(1),
 870 |             ),
 871 |             Row::new(vec![Column::new(var2, Pattern::Int(4))], None, rhs(2)),
 872 |         ];
 873 | 
 874 |         let branch = compiler.branch_variable(&rows);
 875 | 
 876 |         assert_eq!(branch, var2);
 877 |     }
 878 | 
 879 |     #[test]
 880 |     fn test_compile_simple_pattern() {
 881 |         let mut compiler = Compiler::new();
 882 |         let typ = new_type(&mut compiler, Type::Boolean);
 883 |         let input = compiler.new_variable(typ);
 884 |         let result =
 885 |             compile(compiler, input, vec![(tt(), rhs(1)), (ff(), rhs(2))]);
 886 | 
 887 |         assert_eq!(
 888 |             result.tree,
 889 |             Decision::Switch(
 890 |                 input,
 891 |                 vec![
 892 |                     Case::new(Constructor::False, Vec::new(), success(2)),
 893 |                     Case::new(Constructor::True, Vec::new(), success(1)),
 894 |                 ],
 895 |                 None
 896 |             )
 897 |         );
 898 |     }
 899 | 
 900 |     #[test]
 901 |     fn test_compile_nonexhaustive_pattern() {
 902 |         let mut compiler = Compiler::new();
 903 |         let typ = new_type(&mut compiler, Type::Boolean);
 904 |         let input = compiler.new_variable(typ);
 905 |         let result = compile(compiler, input, vec![(tt(), rhs(1))]);
 906 | 
 907 |         assert_eq!(
 908 |             result.tree,
 909 |             Decision::Switch(
 910 |                 input,
 911 |                 vec![
 912 |                     Case::new(Constructor::False, Vec::new(), failure()),
 913 |                     Case::new(Constructor::True, Vec::new(), success(1)),
 914 |                 ],
 915 |                 None
 916 |             )
 917 |         );
 918 |         assert!(result.diagnostics.missing);
 919 |         assert_eq!(result.missing_patterns(), vec!["false".to_string()]);
 920 |     }
 921 | 
 922 |     #[test]
 923 |     fn test_compile_redundant_pattern() {
 924 |         let mut compiler = Compiler::new();
 925 |         let typ = new_type(&mut compiler, Type::Boolean);
 926 |         let input = compiler.new_variable(typ);
 927 |         let result = compile(
 928 |             compiler,
 929 |             input,
 930 |             vec![(tt(), rhs(1)), (tt(), rhs(2)), (ff(), rhs(3))],
 931 |         );
 932 | 
 933 |         assert_eq!(
 934 |             result.tree,
 935 |             Decision::Switch(
 936 |                 input,
 937 |                 vec![
 938 |                     Case::new(Constructor::False, Vec::new(), success(3)),
 939 |                     Case::new(Constructor::True, Vec::new(), success(1)),
 940 |                 ],
 941 |                 None
 942 |             )
 943 |         );
 944 |         assert_eq!(result.diagnostics.reachable, vec![3, 1]);
 945 |     }
 946 | 
 947 |     #[test]
 948 |     fn test_compile_redundant_int() {
 949 |         let mut compiler = Compiler::new();
 950 |         let typ = new_type(&mut compiler, Type::Int);
 951 |         let input = compiler.new_variable(typ);
 952 |         let result = compile(
 953 |             compiler,
 954 |             input,
 955 |             vec![
 956 |                 (int(1), rhs(1)),
 957 |                 (int(1), rhs(2)),
 958 |                 (int(2), rhs(3)),
 959 |                 (bind("a"), rhs(4)),
 960 |             ],
 961 |         );
 962 | 
 963 |         assert_eq!(
 964 |             result.tree,
 965 |             Decision::Switch(
 966 |                 input,
 967 |                 vec![
 968 |                     Case::new(Constructor::Int(1), Vec::new(), success(1)),
 969 |                     Case::new(Constructor::Int(2), Vec::new(), success(3)),
 970 |                 ],
 971 |                 Some(Box::new(success_with_bindings(vec![("a", input)], 4)))
 972 |             )
 973 |         );
 974 |         assert_eq!(result.diagnostics.reachable, vec![1, 3, 4]);
 975 |     }
 976 | 
 977 |     #[test]
 978 |     fn test_compile_variable_pattern() {
 979 |         let mut compiler = Compiler::new();
 980 |         let typ = new_type(&mut compiler, Type::Boolean);
 981 |         let input = compiler.new_variable(typ);
 982 |         let result =
 983 |             compile(compiler, input, vec![(tt(), rhs(1)), (bind("a"), rhs(2))]);
 984 | 
 985 |         assert_eq!(
 986 |             result.tree,
 987 |             Decision::Switch(
 988 |                 input,
 989 |                 vec![
 990 |                     Case::new(
 991 |                         Constructor::False,
 992 |                         Vec::new(),
 993 |                         success_with_bindings(vec![("a", input)], 2)
 994 |                     ),
 995 |                     Case::new(Constructor::True, Vec::new(), success(1)),
 996 |                 ],
 997 |                 None
 998 |             )
 999 |         );
1000 |     }
1001 | 
1002 |     #[test]
1003 |     fn test_compile_nonexhaustive_int_pattern() {
1004 |         let mut compiler = Compiler::new();
1005 |         let int_type = new_type(&mut compiler, Type::Int);
1006 |         let input = compiler.new_variable(int_type);
1007 |         let result =
1008 |             compile(compiler, input, vec![(int(4), rhs(1)), (int(5), rhs(2))]);
1009 | 
1010 |         assert_eq!(
1011 |             result.tree,
1012 |             Decision::Switch(
1013 |                 input,
1014 |                 vec![
1015 |                     Case::new(Constructor::Int(4), Vec::new(), success(1)),
1016 |                     Case::new(Constructor::Int(5), Vec::new(), success(2)),
1017 |                 ],
1018 |                 Some(Box::new(failure()))
1019 |             )
1020 |         );
1021 |         assert_eq!(result.missing_patterns(), vec!["_".to_string()]);
1022 |     }
1023 | 
1024 |     #[test]
1025 |     fn test_compile_exhaustive_int_pattern() {
1026 |         let mut compiler = Compiler::new();
1027 |         let int_type = new_type(&mut compiler, Type::Int);
1028 |         let input = compiler.new_variable(int_type);
1029 |         let result = compile(
1030 |             compiler,
1031 |             input,
1032 |             vec![(int(4), rhs(1)), (int(5), rhs(2)), (bind("a"), rhs(3))],
1033 |         );
1034 | 
1035 |         assert_eq!(
1036 |             result.tree,
1037 |             Decision::Switch(
1038 |                 input,
1039 |                 vec![
1040 |                     Case::new(Constructor::Int(4), Vec::new(), success(1)),
1041 |                     Case::new(Constructor::Int(5), Vec::new(), success(2)),
1042 |                 ],
1043 |                 Some(Box::new(success_with_bindings(vec![("a", input)], 3)))
1044 |             )
1045 |         );
1046 |     }
1047 | 
1048 |     #[test]
1049 |     fn test_compile_unreachable_int_pattern() {
1050 |         let mut compiler = Compiler::new();
1051 |         let int_type = new_type(&mut compiler, Type::Int);
1052 |         let input = compiler.new_variable(int_type);
1053 |         let result = compile(
1054 |             compiler,
1055 |             input,
1056 |             vec![(int(4), rhs(1)), (bind("a"), rhs(3)), (int(5), rhs(2))],
1057 |         );
1058 | 
1059 |         assert_eq!(
1060 |             result.tree,
1061 |             Decision::Switch(
1062 |                 input,
1063 |                 vec![
1064 |                     Case::new(Constructor::Int(4), Vec::new(), success(1)),
1065 |                     Case::new(
1066 |                         Constructor::Int(5),
1067 |                         Vec::new(),
1068 |                         success_with_bindings(vec![("a", input)], 3)
1069 |                     ),
1070 |                 ],
1071 |                 Some(Box::new(success_with_bindings(vec![("a", input)], 3)))
1072 |             )
1073 |         );
1074 |         assert_eq!(result.diagnostics.reachable, vec![1, 3, 3]);
1075 |     }
1076 | 
1077 |     #[test]
1078 |     fn test_compile_nonexhaustive_nested_int_pattern() {
1079 |         let mut compiler = Compiler::new();
1080 |         let int_type = new_type(&mut compiler, Type::Int);
1081 |         let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1082 |         let input = compiler.new_variable(tup_type);
1083 |         let result = compile(
1084 |             compiler,
1085 |             input,
1086 |             vec![(pair(int_type, int_type, int(4), bind("a")), rhs(1))],
1087 |         );
1088 | 
1089 |         assert_eq!(
1090 |             result.tree,
1091 |             Decision::Switch(
1092 |                 input,
1093 |                 vec![Case::new(
1094 |                     Constructor::Pair(int_type, int_type),
1095 |                     vec![var(1, int_type), var(2, int_type),],
1096 |                     Decision::Switch(
1097 |                         var(1, int_type),
1098 |                         vec![Case::new(
1099 |                             Constructor::Int(4),
1100 |                             Vec::new(),
1101 |                             success_with_bindings(
1102 |                                 vec![("a", var(2, int_type))],
1103 |                                 1
1104 |                             )
1105 |                         )],
1106 |                         Some(Box::new(failure()))
1107 |                     )
1108 |                 )],
1109 |                 None
1110 |             )
1111 |         );
1112 |         assert_eq!(result.missing_patterns(), vec!["(_, _)".to_string()]);
1113 |     }
1114 | 
1115 |     #[test]
1116 |     fn test_compile_exhaustive_nested_int_pattern() {
1117 |         let mut compiler = Compiler::new();
1118 |         let int_type = new_type(&mut compiler, Type::Int);
1119 |         let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1120 |         let input = compiler.new_variable(tup_type);
1121 |         let result = compile(
1122 |             compiler,
1123 |             input,
1124 |             vec![
1125 |                 (pair(int_type, int_type, int(4), int(5)), rhs(1)),
1126 |                 (pair(int_type, int_type, bind("a"), bind("b")), rhs(2)),
1127 |             ],
1128 |         );
1129 | 
1130 |         assert_eq!(
1131 |             result.tree,
1132 |             Decision::Switch(
1133 |                 input,
1134 |                 vec![Case::new(
1135 |                     Constructor::Pair(int_type, int_type),
1136 |                     vec![var(1, int_type), var(2, int_type)],
1137 |                     Decision::Switch(
1138 |                         var(2, int_type),
1139 |                         vec![Case::new(
1140 |                             Constructor::Int(5),
1141 |                             Vec::new(),
1142 |                             Decision::Switch(
1143 |                                 var(1, int_type),
1144 |                                 vec![Case::new(
1145 |                                     Constructor::Int(4),
1146 |                                     Vec::new(),
1147 |                                     success(1)
1148 |                                 )],
1149 |                                 Some(Box::new(success_with_bindings(
1150 |                                     vec![
1151 |                                         ("a", var(1, int_type)),
1152 |                                         ("b", var(2, int_type))
1153 |                                     ],
1154 |                                     2
1155 |                                 )))
1156 |                             )
1157 |                         )],
1158 |                         Some(Box::new(success_with_bindings(
1159 |                             vec![
1160 |                                 ("a", var(1, int_type)),
1161 |                                 ("b", var(2, int_type))
1162 |                             ],
1163 |                             2
1164 |                         )))
1165 |                     )
1166 |                 )],
1167 |                 None
1168 |             )
1169 |         );
1170 |     }
1171 | 
1172 |     #[test]
1173 |     fn test_compile_nonexhaustive_option_type() {
1174 |         let mut compiler = Compiler::new();
1175 |         let int_type = new_type(&mut compiler, Type::Int);
1176 |         let option_type = new_type(
1177 |             &mut compiler,
1178 |             Type::Enum(vec![
1179 |                 ("Some".to_string(), vec![int_type]),
1180 |                 ("None".to_string(), Vec::new()),
1181 |             ]),
1182 |         );
1183 |         let input = compiler.new_variable(option_type);
1184 |         let result = compile(
1185 |             compiler,
1186 |             input,
1187 |             vec![(variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1))],
1188 |         );
1189 | 
1190 |         assert_eq!(
1191 |             result.tree,
1192 |             Decision::Switch(
1193 |                 input,
1194 |                 vec![
1195 |                     Case::new(
1196 |                         Constructor::Variant(option_type, 0),
1197 |                         vec![var(1, int_type)],
1198 |                         Decision::Switch(
1199 |                             var(1, int_type),
1200 |                             vec![Case::new(
1201 |                                 Constructor::Int(4),
1202 |                                 Vec::new(),
1203 |                                 success(1)
1204 |                             )],
1205 |                             Some(Box::new(failure()))
1206 |                         )
1207 |                     ),
1208 |                     Case::new(
1209 |                         Constructor::Variant(option_type, 1),
1210 |                         Vec::new(),
1211 |                         failure()
1212 |                     )
1213 |                 ],
1214 |                 None,
1215 |             )
1216 |         );
1217 |         assert_eq!(
1218 |             result.missing_patterns(),
1219 |             vec!["None".to_string(), "Some(_)".to_string()]
1220 |         );
1221 |     }
1222 | 
1223 |     #[test]
1224 |     fn test_compile_nonexhaustive_option_type_with_multiple_arguments() {
1225 |         let mut compiler = Compiler::new();
1226 |         let int_type = new_type(&mut compiler, Type::Int);
1227 |         let option_type = new_type(
1228 |             &mut compiler,
1229 |             Type::Enum(vec![
1230 |                 ("Some".to_string(), vec![int_type, int_type]),
1231 |                 ("None".to_string(), Vec::new()),
1232 |             ]),
1233 |         );
1234 |         let input = compiler.new_variable(option_type);
1235 |         let result = compile(
1236 |             compiler,
1237 |             input,
1238 |             vec![(
1239 |                 variant(option_type, 0, vec![Pattern::Int(4), Pattern::Int(5)]),
1240 |                 rhs(1),
1241 |             )],
1242 |         );
1243 | 
1244 |         assert_eq!(
1245 |             result.tree,
1246 |             Decision::Switch(
1247 |                 input,
1248 |                 vec![
1249 |                     Case::new(
1250 |                         Constructor::Variant(option_type, 0),
1251 |                         vec![var(1, int_type), var(2, int_type)],
1252 |                         Decision::Switch(
1253 |                             var(2, int_type),
1254 |                             vec![Case::new(
1255 |                                 Constructor::Int(5),
1256 |                                 Vec::new(),
1257 |                                 Decision::Switch(
1258 |                                     var(1, int_type),
1259 |                                     vec![Case::new(
1260 |                                         Constructor::Int(4),
1261 |                                         Vec::new(),
1262 |                                         success(1)
1263 |                                     )],
1264 |                                     Some(Box::new(failure()))
1265 |                                 )
1266 |                             )],
1267 |                             Some(Box::new(failure()))
1268 |                         )
1269 |                     ),
1270 |                     Case::new(
1271 |                         Constructor::Variant(option_type, 1),
1272 |                         Vec::new(),
1273 |                         failure()
1274 |                     )
1275 |                 ],
1276 |                 None
1277 |             )
1278 |         );
1279 |         assert_eq!(
1280 |             result.missing_patterns(),
1281 |             vec!["None".to_string(), "Some(_, _)".to_string(),]
1282 |         );
1283 |     }
1284 | 
1285 |     #[test]
1286 |     fn test_compile_exhaustive_option_type() {
1287 |         let mut compiler = Compiler::new();
1288 |         let int_type = new_type(&mut compiler, Type::Int);
1289 |         let option_type = new_type(
1290 |             &mut compiler,
1291 |             Type::Enum(vec![
1292 |                 ("Some".to_string(), vec![int_type]),
1293 |                 ("None".to_string(), Vec::new()),
1294 |             ]),
1295 |         );
1296 |         let input = compiler.new_variable(option_type);
1297 |         let result = compile(
1298 |             compiler,
1299 |             input,
1300 |             vec![
1301 |                 (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)),
1302 |                 (variant(option_type, 0, vec![bind("a")]), rhs(2)),
1303 |                 (variant(option_type, 1, Vec::new()), rhs(3)),
1304 |             ],
1305 |         );
1306 | 
1307 |         assert_eq!(
1308 |             result.tree,
1309 |             Decision::Switch(
1310 |                 input,
1311 |                 vec![
1312 |                     Case::new(
1313 |                         Constructor::Variant(option_type, 0),
1314 |                         vec![var(1, int_type)],
1315 |                         Decision::Switch(
1316 |                             var(1, int_type),
1317 |                             vec![Case::new(
1318 |                                 Constructor::Int(4),
1319 |                                 Vec::new(),
1320 |                                 success(1)
1321 |                             )],
1322 |                             Some(Box::new(success_with_bindings(
1323 |                                 vec![("a", var(1, int_type))],
1324 |                                 2
1325 |                             )))
1326 |                         )
1327 |                     ),
1328 |                     Case::new(
1329 |                         Constructor::Variant(option_type, 1),
1330 |                         Vec::new(),
1331 |                         success(3)
1332 |                     )
1333 |                 ],
1334 |                 None
1335 |             )
1336 |         );
1337 |     }
1338 | 
1339 |     #[test]
1340 |     fn test_compile_redundant_option_type_with_bool() {
1341 |         let mut compiler = Compiler::new();
1342 |         let bool_type = new_type(&mut compiler, Type::Boolean);
1343 |         let option_type = new_type(
1344 |             &mut compiler,
1345 |             Type::Enum(vec![
1346 |                 ("Some".to_string(), vec![bool_type]),
1347 |                 ("None".to_string(), Vec::new()),
1348 |             ]),
1349 |         );
1350 |         let input = compiler.new_variable(option_type);
1351 |         let result = compile(
1352 |             compiler,
1353 |             input,
1354 |             vec![
1355 |                 (variant(option_type, 0, vec![tt()]), rhs(1)),
1356 |                 (variant(option_type, 0, vec![tt()]), rhs(10)),
1357 |                 (variant(option_type, 0, vec![bind("a")]), rhs(2)),
1358 |                 (variant(option_type, 1, Vec::new()), rhs(3)),
1359 |             ],
1360 |         );
1361 | 
1362 |         assert_eq!(
1363 |             result.tree,
1364 |             Decision::Switch(
1365 |                 input,
1366 |                 vec![
1367 |                     Case::new(
1368 |                         Constructor::Variant(option_type, 0),
1369 |                         vec![var(1, bool_type)],
1370 |                         Decision::Switch(
1371 |                             var(1, bool_type),
1372 |                             vec![
1373 |                                 Case::new(
1374 |                                     Constructor::False,
1375 |                                     Vec::new(),
1376 |                                     success_with_bindings(
1377 |                                         vec![("a", var(1, bool_type))],
1378 |                                         2
1379 |                                     )
1380 |                                 ),
1381 |                                 Case::new(
1382 |                                     Constructor::True,
1383 |                                     Vec::new(),
1384 |                                     success(1)
1385 |                                 )
1386 |                             ],
1387 |                             None
1388 |                         )
1389 |                     ),
1390 |                     Case::new(
1391 |                         Constructor::Variant(option_type, 1),
1392 |                         Vec::new(),
1393 |                         success(3)
1394 |                     )
1395 |                 ],
1396 |                 None
1397 |             )
1398 |         );
1399 | 
1400 |         assert_eq!(result.diagnostics.reachable, vec![2, 1, 3]);
1401 |     }
1402 | 
1403 |     #[test]
1404 |     fn test_compile_redundant_option_type_with_int() {
1405 |         let mut compiler = Compiler::new();
1406 |         let int_type = new_type(&mut compiler, Type::Int);
1407 |         let option_type = new_type(
1408 |             &mut compiler,
1409 |             Type::Enum(vec![
1410 |                 ("Some".to_string(), vec![int_type]),
1411 |                 ("None".to_string(), Vec::new()),
1412 |             ]),
1413 |         );
1414 |         let input = compiler.new_variable(option_type);
1415 |         let result = compile(
1416 |             compiler,
1417 |             input,
1418 |             vec![
1419 |                 (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)),
1420 |                 (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(10)),
1421 |                 (variant(option_type, 0, vec![bind("a")]), rhs(2)),
1422 |                 (variant(option_type, 1, Vec::new()), rhs(3)),
1423 |             ],
1424 |         );
1425 | 
1426 |         assert_eq!(
1427 |             result.tree,
1428 |             Decision::Switch(
1429 |                 input,
1430 |                 vec![
1431 |                     Case::new(
1432 |                         Constructor::Variant(option_type, 0),
1433 |                         vec![var(1, int_type)],
1434 |                         Decision::Switch(
1435 |                             var(1, int_type),
1436 |                             vec![Case::new(
1437 |                                 Constructor::Int(4),
1438 |                                 Vec::new(),
1439 |                                 success(1)
1440 |                             ),],
1441 |                             Some(Box::new(success_with_bindings(
1442 |                                 vec![("a", var(1, int_type))],
1443 |                                 2
1444 |                             )))
1445 |                         )
1446 |                     ),
1447 |                     Case::new(
1448 |                         Constructor::Variant(option_type, 1),
1449 |                         Vec::new(),
1450 |                         success(3)
1451 |                     )
1452 |                 ],
1453 |                 None
1454 |             )
1455 |         );
1456 | 
1457 |         assert_eq!(result.diagnostics.reachable, vec![1, 2, 3]);
1458 |     }
1459 | 
1460 |     #[test]
1461 |     fn test_compile_exhaustive_option_type_with_binding() {
1462 |         let mut compiler = Compiler::new();
1463 |         let int_type = new_type(&mut compiler, Type::Int);
1464 |         let option_type = new_type(
1465 |             &mut compiler,
1466 |             Type::Enum(vec![
1467 |                 ("Some".to_string(), vec![int_type]),
1468 |                 ("None".to_string(), Vec::new()),
1469 |             ]),
1470 |         );
1471 |         let input = compiler.new_variable(option_type);
1472 |         let result = compile(
1473 |             compiler,
1474 |             input,
1475 |             vec![
1476 |                 (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)),
1477 |                 (bind("a"), rhs(2)),
1478 |             ],
1479 |         );
1480 | 
1481 |         assert_eq!(
1482 |             result.tree,
1483 |             Decision::Switch(
1484 |                 input,
1485 |                 vec![
1486 |                     Case::new(
1487 |                         Constructor::Variant(option_type, 0),
1488 |                         vec![var(1, int_type)],
1489 |                         Decision::Switch(
1490 |                             var(1, int_type),
1491 |                             vec![Case::new(
1492 |                                 Constructor::Int(4),
1493 |                                 Vec::new(),
1494 |                                 success(1)
1495 |                             )],
1496 |                             Some(Box::new(success_with_bindings(
1497 |                                 vec![("a", input)],
1498 |                                 2
1499 |                             )))
1500 |                         )
1501 |                     ),
1502 |                     Case::new(
1503 |                         Constructor::Variant(option_type, 1),
1504 |                         Vec::new(),
1505 |                         success_with_bindings(vec![("a", input)], 2)
1506 |                     )
1507 |                 ],
1508 |                 None,
1509 |             )
1510 |         );
1511 |     }
1512 | 
1513 |     #[test]
1514 |     fn test_compile_nonexhaustive_pair_in_option_pattern() {
1515 |         let mut compiler = Compiler::new();
1516 |         let int_type = new_type(&mut compiler, Type::Int);
1517 |         let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1518 |         let option_type = new_type(
1519 |             &mut compiler,
1520 |             Type::Enum(vec![
1521 |                 ("Some".to_string(), vec![tup_type]),
1522 |                 ("None".to_string(), Vec::new()),
1523 |             ]),
1524 |         );
1525 |         let input = compiler.new_variable(option_type);
1526 |         let result = compile(
1527 |             compiler,
1528 |             input,
1529 |             vec![(
1530 |                 variant(
1531 |                     option_type,
1532 |                     0,
1533 |                     vec![pair(int_type, int_type, int(4), bind("a"))],
1534 |                 ),
1535 |                 rhs(1),
1536 |             )],
1537 |         );
1538 | 
1539 |         assert_eq!(
1540 |             result.tree,
1541 |             Decision::Switch(
1542 |                 input,
1543 |                 vec![
1544 |                     Case::new(
1545 |                         Constructor::Variant(option_type, 0),
1546 |                         vec![var(1, tup_type)],
1547 |                         Decision::Switch(
1548 |                             var(1, tup_type),
1549 |                             vec![Case::new(
1550 |                                 Constructor::Pair(int_type, int_type),
1551 |                                 vec![var(2, int_type), var(3, int_type),],
1552 |                                 Decision::Switch(
1553 |                                     var(2, int_type),
1554 |                                     vec![Case::new(
1555 |                                         Constructor::Int(4),
1556 |                                         Vec::new(),
1557 |                                         success_with_bindings(
1558 |                                             vec![("a", var(3, int_type))],
1559 |                                             1
1560 |                                         )
1561 |                                     )],
1562 |                                     Some(Box::new(failure()))
1563 |                                 )
1564 |                             )],
1565 |                             None,
1566 |                         )
1567 |                     ),
1568 |                     Case::new(
1569 |                         Constructor::Variant(option_type, 1),
1570 |                         Vec::new(),
1571 |                         failure()
1572 |                     )
1573 |                 ],
1574 |                 None
1575 |             )
1576 |         );
1577 |         assert_eq!(
1578 |             result.missing_patterns(),
1579 |             vec!["None".to_string(), "Some((_, _))".to_string()]
1580 |         );
1581 |     }
1582 | 
1583 |     #[test]
1584 |     fn test_compile_or_bool_pattern() {
1585 |         let mut compiler = Compiler::new();
1586 |         let bool_type = new_type(&mut compiler, Type::Boolean);
1587 |         let input = compiler.new_variable(bool_type);
1588 |         let result = compile(
1589 |             compiler,
1590 |             input,
1591 |             vec![(Pattern::Or(vec![tt(), ff()]), rhs(1))],
1592 |         );
1593 | 
1594 |         assert_eq!(
1595 |             result.tree,
1596 |             Decision::Switch(
1597 |                 input,
1598 |                 vec![
1599 |                     Case::new(Constructor::False, Vec::new(), success(1)),
1600 |                     Case::new(Constructor::True, Vec::new(), success(1)),
1601 |                 ],
1602 |                 None
1603 |             )
1604 |         );
1605 |     }
1606 | 
1607 |     #[test]
1608 |     fn test_compile_or_int_pattern() {
1609 |         let mut compiler = Compiler::new();
1610 |         let int_type = new_type(&mut compiler, Type::Int);
1611 |         let input = compiler.new_variable(int_type);
1612 |         let result = compile(
1613 |             compiler,
1614 |             input,
1615 |             vec![(Pattern::Or(vec![int(4), int(5)]), rhs(1))],
1616 |         );
1617 | 
1618 |         assert_eq!(
1619 |             result.tree,
1620 |             Decision::Switch(
1621 |                 input,
1622 |                 vec![
1623 |                     Case::new(Constructor::Int(4), Vec::new(), success(1)),
1624 |                     Case::new(Constructor::Int(5), Vec::new(), success(1)),
1625 |                 ],
1626 |                 Some(Box::new(failure()))
1627 |             )
1628 |         );
1629 |     }
1630 | 
1631 |     #[test]
1632 |     fn test_range_pattern() {
1633 |         let mut compiler = Compiler::new();
1634 |         let int_type = new_type(&mut compiler, Type::Int);
1635 |         let input = compiler.new_variable(int_type);
1636 |         let result =
1637 |             compile(compiler, input, vec![(Pattern::Range(1, 10), rhs(1))]);
1638 | 
1639 |         assert_eq!(
1640 |             result.tree,
1641 |             Decision::Switch(
1642 |                 input,
1643 |                 vec![Case::new(
1644 |                     Constructor::Range(1, 10),
1645 |                     Vec::new(),
1646 |                     success(1)
1647 |                 )],
1648 |                 Some(Box::new(failure()))
1649 |             )
1650 |         );
1651 |     }
1652 | 
1653 |     #[test]
1654 |     fn test_nonexhaustive_guard() {
1655 |         let mut compiler = Compiler::new();
1656 |         let int_type = new_type(&mut compiler, Type::Int);
1657 |         let input = compiler.new_variable(int_type);
1658 | 
1659 |         let result = compiler.compile(vec![Row::new(
1660 |             vec![Column::new(input, int(4))],
1661 |             Some(42),
1662 |             rhs(1),
1663 |         )]);
1664 | 
1665 |         assert_eq!(
1666 |             result.tree,
1667 |             Decision::Switch(
1668 |                 input,
1669 |                 vec![Case::new(
1670 |                     Constructor::Int(4),
1671 |                     Vec::new(),
1672 |                     Decision::Guard(42, rhs(1), Box::new(failure()))
1673 |                 )],
1674 |                 Some(Box::new(failure()))
1675 |             )
1676 |         );
1677 | 
1678 |         assert_eq!(result.missing_patterns(), vec!["_".to_string()]);
1679 |     }
1680 | 
1681 |     #[test]
1682 |     fn test_nonexhaustive_option_with_two_rows_and_guard() {
1683 |         let mut compiler = Compiler::new();
1684 |         let int_type = new_type(&mut compiler, Type::Int);
1685 |         let option_type = new_type(
1686 |             &mut compiler,
1687 |             Type::Enum(vec![
1688 |                 ("Some".to_string(), vec![int_type]),
1689 |                 ("None".to_string(), Vec::new()),
1690 |             ]),
1691 |         );
1692 |         let input = compiler.new_variable(option_type);
1693 |         let result = compiler.compile(vec![
1694 |             Row::new(
1695 |                 vec![Column::new(input, variant(option_type, 0, vec![int(4)]))],
1696 |                 Some(42),
1697 |                 rhs(1),
1698 |             ),
1699 |             Row::new(
1700 |                 vec![Column::new(
1701 |                     input,
1702 |                     variant(option_type, 0, vec![bind("a")]),
1703 |                 )],
1704 |                 None,
1705 |                 rhs(2),
1706 |             ),
1707 |         ]);
1708 | 
1709 |         assert_eq!(
1710 |             result.tree,
1711 |             Decision::Switch(
1712 |                 input,
1713 |                 vec![
1714 |                     Case::new(
1715 |                         Constructor::Variant(option_type, 0),
1716 |                         vec![var(1, int_type)],
1717 |                         Decision::Switch(
1718 |                             var(1, int_type),
1719 |                             vec![Case::new(
1720 |                                 Constructor::Int(4),
1721 |                                 Vec::new(),
1722 |                                 Decision::Guard(
1723 |                                     42,
1724 |                                     rhs(1),
1725 |                                     Box::new(success_with_bindings(
1726 |                                         vec![("a", var(1, int_type))],
1727 |                                         2
1728 |                                     )),
1729 |                                 )
1730 |                             )],
1731 |                             Some(Box::new(success_with_bindings(
1732 |                                 vec![("a", var(1, int_type))],
1733 |                                 2
1734 |                             )))
1735 |                         ),
1736 |                     ),
1737 |                     Case::new(
1738 |                         Constructor::Variant(option_type, 1),
1739 |                         Vec::new(),
1740 |                         failure()
1741 |                     )
1742 |                 ],
1743 |                 None
1744 |             )
1745 |         );
1746 | 
1747 |         assert_eq!(result.missing_patterns(), vec!["None".to_string()]);
1748 |     }
1749 | 
1750 |     #[test]
1751 |     fn test_exhaustive_guard() {
1752 |         let mut compiler = Compiler::new();
1753 |         let int_type = new_type(&mut compiler, Type::Int);
1754 |         let input = compiler.new_variable(int_type);
1755 |         let result = compiler.compile(vec![
1756 |             Row::new(vec![Column::new(input, int(4))], Some(42), rhs(1)),
1757 |             Row::new(vec![Column::new(input, bind("a"))], None, rhs(2)),
1758 |         ]);
1759 | 
1760 |         assert_eq!(
1761 |             result.tree,
1762 |             Decision::Switch(
1763 |                 input,
1764 |                 vec![Case::new(
1765 |                     Constructor::Int(4),
1766 |                     Vec::new(),
1767 |                     Decision::Guard(
1768 |                         42,
1769 |                         rhs(1),
1770 |                         Box::new(success_with_bindings(vec![("a", input)], 2))
1771 |                     )
1772 |                 )],
1773 |                 Some(Box::new(success_with_bindings(vec![("a", input)], 2)))
1774 |             )
1775 |         );
1776 |     }
1777 | 
1778 |     #[test]
1779 |     fn test_exhaustive_guard_with_bool() {
1780 |         let mut compiler = Compiler::new();
1781 |         let bool_type = new_type(&mut compiler, Type::Boolean);
1782 |         let input = compiler.new_variable(bool_type);
1783 |         let result = compiler.compile(vec![
1784 |             Row::new(vec![Column::new(input, tt())], Some(42), rhs(1)),
1785 |             Row::new(vec![Column::new(input, bind("a"))], None, rhs(2)),
1786 |         ]);
1787 | 
1788 |         assert_eq!(
1789 |             result.tree,
1790 |             Decision::Switch(
1791 |                 input,
1792 |                 vec![
1793 |                     Case::new(
1794 |                         Constructor::False,
1795 |                         Vec::new(),
1796 |                         success_with_bindings(vec![("a", input)], 2)
1797 |                     ),
1798 |                     Case::new(
1799 |                         Constructor::True,
1800 |                         Vec::new(),
1801 |                         Decision::Guard(
1802 |                             42,
1803 |                             rhs(1),
1804 |                             Box::new(success_with_bindings(
1805 |                                 vec![("a", input)],
1806 |                                 2
1807 |                             ))
1808 |                         )
1809 |                     )
1810 |                 ],
1811 |                 None
1812 |             )
1813 |         );
1814 |     }
1815 | 
1816 |     #[test]
1817 |     fn test_exhaustive_guard_with_int() {
1818 |         let mut compiler = Compiler::new();
1819 |         let int_type = new_type(&mut compiler, Type::Int);
1820 |         let input = compiler.new_variable(int_type);
1821 |         let result = compiler.compile(vec![
1822 |             Row::new(vec![Column::new(input, int(1))], Some(42), rhs(1)),
1823 |             Row::new(vec![Column::new(input, int(2))], None, rhs(2)),
1824 |             Row::new(vec![Column::new(input, bind("b"))], None, rhs(3)),
1825 |         ]);
1826 | 
1827 |         assert_eq!(
1828 |             result.tree,
1829 |             Decision::Switch(
1830 |                 input,
1831 |                 vec![
1832 |                     Case::new(
1833 |                         Constructor::Int(1),
1834 |                         Vec::new(),
1835 |                         Decision::Guard(
1836 |                             42,
1837 |                             rhs(1),
1838 |                             Box::new(success_with_bindings(
1839 |                                 vec![("b", input)],
1840 |                                 3
1841 |                             ))
1842 |                         )
1843 |                     ),
1844 |                     Case::new(Constructor::Int(2), Vec::new(), success(2))
1845 |                 ],
1846 |                 Some(Box::new(success_with_bindings(vec![("b", input)], 3)))
1847 |             )
1848 |         );
1849 |     }
1850 | 
1851 |     #[test]
1852 |     fn test_exhaustive_guard_with_same_int() {
1853 |         let mut compiler = Compiler::new();
1854 |         let int_type = new_type(&mut compiler, Type::Int);
1855 |         let input = compiler.new_variable(int_type);
1856 |         let result = compiler.compile(vec![
1857 |             Row::new(vec![Column::new(input, int(1))], Some(10), rhs(1)),
1858 |             Row::new(vec![Column::new(input, int(1))], Some(20), rhs(2)),
1859 |             Row::new(vec![Column::new(input, int(1))], None, rhs(3)),
1860 |             Row::new(vec![Column::new(input, bind("b"))], None, rhs(4)),
1861 |         ]);
1862 | 
1863 |         assert_eq!(
1864 |             result.tree,
1865 |             Decision::Switch(
1866 |                 input,
1867 |                 vec![Case::new(
1868 |                     Constructor::Int(1),
1869 |                     Vec::new(),
1870 |                     Decision::Guard(
1871 |                         10,
1872 |                         rhs(1),
1873 |                         Box::new(Decision::Guard(
1874 |                             20,
1875 |                             rhs(2),
1876 |                             Box::new(success(3))
1877 |                         ))
1878 |                     )
1879 |                 )],
1880 |                 Some(Box::new(success_with_bindings(vec![("b", input)], 4)))
1881 |             )
1882 |         );
1883 |     }
1884 | 
1885 |     #[test]
1886 |     fn test_exhaustive_option_with_guard() {
1887 |         let mut compiler = Compiler::new();
1888 |         let int_type = new_type(&mut compiler, Type::Int);
1889 |         let option_type = new_type(
1890 |             &mut compiler,
1891 |             Type::Enum(vec![
1892 |                 ("Some".to_string(), vec![int_type]),
1893 |                 ("None".to_string(), Vec::new()),
1894 |             ]),
1895 |         );
1896 |         let input = compiler.new_variable(option_type);
1897 |         let result = compiler.compile(vec![
1898 |             Row::new(
1899 |                 vec![Column::new(input, variant(option_type, 1, Vec::new()))],
1900 |                 None,
1901 |                 rhs(1),
1902 |             ),
1903 |             Row::new(
1904 |                 vec![Column::new(
1905 |                     input,
1906 |                     variant(option_type, 0, vec![bind("a")]),
1907 |                 )],
1908 |                 Some(42),
1909 |                 rhs(2),
1910 |             ),
1911 |             Row::new(
1912 |                 vec![Column::new(
1913 |                     input,
1914 |                     variant(option_type, 0, vec![bind("a")]),
1915 |                 )],
1916 |                 None,
1917 |                 rhs(3),
1918 |             ),
1919 |         ]);
1920 | 
1921 |         assert_eq!(
1922 |             result.tree,
1923 |             Decision::Switch(
1924 |                 input,
1925 |                 vec![
1926 |                     Case::new(
1927 |                         Constructor::Variant(option_type, 0),
1928 |                         vec![var(1, int_type)],
1929 |                         Decision::Guard(
1930 |                             42,
1931 |                             Body {
1932 |                                 bindings: vec![(
1933 |                                     "a".to_string(),
1934 |                                     var(1, int_type)
1935 |                                 )],
1936 |                                 value: 2
1937 |                             },
1938 |                             Box::new(success_with_bindings(
1939 |                                 vec![("a", var(1, int_type))],
1940 |                                 3
1941 |                             ))
1942 |                         )
1943 |                     ),
1944 |                     Case::new(
1945 |                         Constructor::Variant(option_type, 1),
1946 |                         Vec::new(),
1947 |                         success(1)
1948 |                     ),
1949 |                 ],
1950 |                 None
1951 |             )
1952 |         );
1953 |     }
1954 | 
1955 |     #[test]
1956 |     fn test_compile_exhaustive_nested_int_with_guard() {
1957 |         let mut compiler = Compiler::new();
1958 |         let int_type = new_type(&mut compiler, Type::Int);
1959 |         let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1960 |         let input = compiler.new_variable(tup_type);
1961 |         let result = compiler.compile(vec![
1962 |             Row::new(
1963 |                 vec![Column::new(
1964 |                     input,
1965 |                     pair(int_type, int_type, int(4), int(5)),
1966 |                 )],
1967 |                 Some(42),
1968 |                 rhs(1),
1969 |             ),
1970 |             Row::new(
1971 |                 vec![Column::new(
1972 |                     input,
1973 |                     pair(int_type, int_type, int(4), int(5)),
1974 |                 )],
1975 |                 None,
1976 |                 rhs(2),
1977 |             ),
1978 |             Row::new(
1979 |                 vec![Column::new(
1980 |                     input,
1981 |                     pair(int_type, int_type, bind("a"), bind("b")),
1982 |                 )],
1983 |                 None,
1984 |                 rhs(3),
1985 |             ),
1986 |         ]);
1987 | 
1988 |         assert_eq!(
1989 |             result.tree,
1990 |             Decision::Switch(
1991 |                 input,
1992 |                 vec![Case::new(
1993 |                     Constructor::Pair(int_type, int_type),
1994 |                     vec![var(1, int_type), var(2, int_type)],
1995 |                     Decision::Switch(
1996 |                         var(2, int_type),
1997 |                         vec![Case::new(
1998 |                             Constructor::Int(5),
1999 |                             Vec::new(),
2000 |                             Decision::Switch(
2001 |                                 var(1, int_type),
2002 |                                 vec![Case::new(
2003 |                                     Constructor::Int(4),
2004 |                                     Vec::new(),
2005 |                                     Decision::Guard(
2006 |                                         42,
2007 |                                         rhs(1),
2008 |                                         Box::new(success(2)),
2009 |                                     )
2010 |                                 )],
2011 |                                 Some(Box::new(success_with_bindings(
2012 |                                     vec![
2013 |                                         ("a", var(1, int_type)),
2014 |                                         ("b", var(2, int_type))
2015 |                                     ],
2016 |                                     3
2017 |                                 )))
2018 |                             )
2019 |                         )],
2020 |                         Some(Box::new(success_with_bindings(
2021 |                             vec![
2022 |                                 ("a", var(1, int_type)),
2023 |                                 ("b", var(2, int_type))
2024 |                             ],
2025 |                             3
2026 |                         )))
2027 |                     )
2028 |                 )],
2029 |                 None
2030 |             )
2031 |         );
2032 |     }
2033 | }
2034 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
 1 | max_width = 80
 2 | use_try_shorthand = true
 3 | reorder_imports = true
 4 | edition = "2018"
 5 | 
 6 | # The default setting results in too aggressive/clunky wrapping for a lot of
 7 | # code. For example, this:
 8 | #
 9 | #     if !foo.bar().baz() {
10 | #         bar();
11 | #     }
12 | #
13 | # Would result in something like this:
14 | #
15 | #     if !foo
16 | #         .bar()
17 | #         .baz()
18 | #     {
19 | #         bar();
20 | #     }
21 | #
22 | # Setting this to "Max" results in a more consistent and less infuriating
23 | # wrapping stype.
24 | use_small_heuristics = 'Max'
25 | 


--------------------------------------------------------------------------------
/sestoft1996/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 | 
5 | [[package]]
6 | name = "match"
7 | version = "1.0.0"
8 | 


--------------------------------------------------------------------------------
/sestoft1996/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "sestoft1996"
3 | version = "1.0.0"
4 | authors = ["Yorick Peterse <yorick@yorickpeterse.com>"]
5 | edition = "2021"
6 | 
7 | [lib]
8 | doctest = false
9 | 


--------------------------------------------------------------------------------
/sestoft1996/README.md:
--------------------------------------------------------------------------------
 1 | # ML Pattern match compilation and partial evaluation
 2 | 
 3 | This directory contains an implementation of the pattern matching algorithm
 4 | introduced in the paper "ML Pattern match compilation and partial evaluation" by
 5 | Peter Sestoft, from 1996.
 6 | 
 7 | ## A short rant about the paper
 8 | 
 9 | The paper is a bit of a pain to read, and took me a solid week to understand.
10 | Part of this is because I'm not familiar with Standard ML, so I first had to
11 | learn that to some degree. The syntax can also be hard to grok when you have
12 | functions calling functions and passing those results as arguments directly,
13 | especially combined with operators (e.g is `foo bar :: baz` parsed as
14 | `foo(bar :: baz)` or `(foo bar) :: baz`?).
15 | 
16 | It doesn't help that the paper makes references to the author's implementation,
17 | but the only two links regarding it are dead FTP links. I eventually found these
18 | implementations of this algorithm:
19 | 
20 | - https://github.com/kfl/mosml/blob/f529b33bb891ff1df4aab198edad376f9ff64d28/src/compiler/Match.sml
21 | - https://github.com/rsdn/nemerle/blob/db4bc9078f1b6238da32df1519c1957e74b6834a/ncc/typing/DecisionTreeBuilder.n
22 | - https://github.com/rsdn/nitra/tree/master/Nitra/Nitra.Compiler/Generation/PatternMatching
23 | - https://github.com/melsman/mlkit/blob/237be62778985e76f912cefdc0bb21b22bed5bd4/src/Compiler/Lambda/CompileDec.sml#L510
24 | 
25 | The Moscow ML implementation uses memoization and some extensions for the
26 | pattern matching logic. The Nemerle implementation is quite different and uses a
27 | more imperative/mutable approach.
28 | 
29 | As to how the algorithm works: even now I don't quite understand why certain
30 | decisions were made, and the algorithm as a whole feels a bit crude.
31 | 
32 | I could go on, but the summary is this: if you wish to understand the paper, I
33 | recommend reading through it while using my Rust code as a reference. It should
34 | be a bit easier to understand and translate to other languages, and it doesn't
35 | require a 20 year old language (though maybe it will if you're reading this 20
36 | years from now).
37 | 
38 | ## Project structure
39 | 
40 | There are two implementations of the algorithm: a raw version, and an idiomatic
41 | version. Neither version implements the memoization strategy as discussed in
42 | section 7.5, as this likely won't work well due to Rust's single ownership
43 | requirement. Both versions are extensively commented to better explain why
44 | certain decisions where made, what to keep in mind when reading the paper, etc.
45 | 
46 | ### The raw version
47 | 
48 | The raw version is more or less 1:1 translation of the SML code included in the
49 | paper. The code is terrible, relies on (poorly implemented) immutable lists
50 | (because the original algorithm requires immutable lists), and likely performs
51 | extremely poorly. I tried to keep this version as close to the paper as
52 | possible, only deviating where Rust simply required a different approach.
53 | 
54 | Some differences from the paper:
55 | 
56 | - Rust doesn't have built-in immutable lists, and the algorithm requires the use
57 |   of immutable lists in a few places. Thus, we introduce a custom immutable
58 |   linked list.
59 | - The paper assumes multiple ownership of values in a few places. This
60 |   implementation instead clones values to work around that, as using a different
61 |   approach requires a different implementation.
62 | - The `succeed'` and `fail'` functions are called `match_succeed` and
63 |   `match_fail` respectively. Who the hell thought it was a good idea to allow
64 |   quotes in symbol names?
65 | - When generating `Sel` nodes, the paper uses `i+1` to build the selector
66 |   values. It's not clear why this is done (the paper makes no mention of it),
67 |   and it seems unnecessary. As such we just use indexes starting at zero.
68 | - The paper implements various functions in an non-exhaustive manner, without
69 |   any explanation as to why. My implementation uses exhaustive patterns where
70 |   possible, and `unwrap()` in a few places where missing values (and thus
71 |   panics) shouldn't occur in the absence of bugs (famous last words).
72 | 
73 | ### The idiomatic version
74 | 
75 | This implementation of the algorithm is closer to what you'd normally write in
76 | Rust. Some of the names used are still a bit confusing, but unfortunately I
77 | haven't been able to come up with better names.
78 | 
79 | Unlike the raw implementation, this implementation doesn't rely on persistent
80 | lists. Instead, it uses mutable vectors that store values in reverse order.
81 | Storing them in this order means a pop() returns the head of the vector, instead
82 | of the tail. This makes retrieving the head cheap, as no values need to be
83 | shifted.
84 | 
85 | Some function (e.g. `addneg` and `match_fail`) are inlined into their callers,
86 | as they are only called in one place.
87 | 
88 | For traversing all the pattern matching rules we use a cursor, essentially
89 | turning the list into an iterator that you can rewind. This is needed because
90 | when building an `IfEq` node, both the true and false bodies need to start off
91 | with the same set of rules. Using a cursor allows us to do just that, but
92 | without cloning the rules.
93 | 


--------------------------------------------------------------------------------
/sestoft1996/src/idiomatic.rs:
--------------------------------------------------------------------------------
  1 | /// An idiomatic Rust implementation of the pattern matching algorithm.
  2 | use std::collections::HashSet;
  3 | 
  4 | /// The result of a static match.
  5 | #[derive(Debug)]
  6 | enum Match {
  7 |     Yes,
  8 |     No,
  9 |     Maybe,
 10 | }
 11 | 
 12 | /// The description of terms already matched, corresponding to the `context`
 13 | /// type in the paper.
 14 | struct Context {
 15 |     values: Vec<(Constructor, Vec<Term>)>,
 16 | }
 17 | 
 18 | impl Context {
 19 |     fn new() -> Self {
 20 |         Self { values: Vec::new() }
 21 |     }
 22 | 
 23 |     fn push(&mut self, value: (Constructor, Vec<Term>)) {
 24 |         self.values.push(value);
 25 |     }
 26 | 
 27 |     fn pop(&mut self) -> Option<(Constructor, Vec<Term>)> {
 28 |         self.values.pop()
 29 |     }
 30 | 
 31 |     fn add_argument_to_last(&mut self, term: Term) {
 32 |         if let Some((_, args)) = self.values.last_mut() {
 33 |             args.push(term);
 34 |         }
 35 |     }
 36 | 
 37 |     fn reconstruct_term(&self, term: Term, work: &Work) -> Term {
 38 |         self.values.iter().zip(work.iter()).fold(
 39 |             term,
 40 |             |term, ((con, args), (_, _, dargs))| {
 41 |                 let mut new_args: Vec<_> = dargs.clone();
 42 | 
 43 |                 new_args.push(term);
 44 |                 new_args.extend(args.iter().rev().cloned());
 45 |                 Term::Pos(con.clone(), new_args)
 46 |             },
 47 |         )
 48 |     }
 49 | }
 50 | 
 51 | /// The work stack as used in the paper.
 52 | ///
 53 | /// The paper uses a list of triple lists, removing the need for some append
 54 | /// operations. This is a bit annoying to work with in Rust (we have to unwrap()
 55 | /// in some places), but again we're trying to stay as close to the paper as
 56 | /// possible.
 57 | ///
 58 | /// We use a type alias here so we don't have to re-type this type name in the
 59 | /// various places that it's used.
 60 | type Work = Vec<(Vec<Pattern>, Vec<Access>, Vec<Term>)>;
 61 | 
 62 | /// The type of the right-hand side of a case (i.e. the code to run).
 63 | ///
 64 | /// For the sake of simplicity we just use a String here. In a real compiler
 65 | /// this would probably be an AST node or another sort of IR to run upon a
 66 | /// match.
 67 | pub type RHS = String;
 68 | 
 69 | /// A type for storing diagnostics produced by the decision tree compiler.
 70 | pub struct Diagnostics {
 71 |     /// The diagnostic messages produced.
 72 |     ///
 73 |     /// In a real compiler this would include more than just a message, such as
 74 |     /// the line and numbers.
 75 |     messages: Vec<String>,
 76 | 
 77 |     /// The right-hand values (= the code you'd run upon a match) that have been
 78 |     /// processed.
 79 |     ///
 80 |     /// If a value isn't included in this set it means it and its pattern are
 81 |     /// redundant.
 82 |     ///
 83 |     /// In a real compiler you'd probably mark AST nodes directly. In our case
 84 |     /// the right-hand values are just simple strings, so we use a set instead.
 85 |     reachable: HashSet<RHS>,
 86 | }
 87 | 
 88 | /// A type for compiling a list of rules into a decision tree.
 89 | pub struct Compiler {
 90 |     /// The rules to compile into a decision tree.
 91 |     rules: Vec<(Pattern, RHS)>,
 92 | 
 93 |     /// The start of the first rule to compile.
 94 |     ///
 95 |     /// When generating IfEq nodes we need to generate two branches, both
 96 |     /// starting with the same set of rules. To avoid cloning we use a cursor,
 97 |     /// save it before processing one branch, then restore it for the other
 98 |     /// branch.
 99 |     rules_index: usize,
100 | 
101 |     diagnostics: Diagnostics,
102 | }
103 | 
104 | impl Compiler {
105 |     pub fn new(rules: Vec<(Pattern, RHS)>) -> Self {
106 |         Self {
107 |             rules,
108 |             rules_index: 0,
109 |             diagnostics: Diagnostics {
110 |                 messages: Vec::new(),
111 |                 reachable: HashSet::new(),
112 |             },
113 |         }
114 |     }
115 | 
116 |     pub fn compile(&mut self) -> Decision {
117 |         self.fail(Term::bottom())
118 |     }
119 | 
120 |     fn fail(&mut self, term: Term) -> Decision {
121 |         if let Some((pat, rhs)) = self.next_rule().cloned() {
122 |             let ctx = Context::new();
123 |             let work = Vec::new();
124 | 
125 |             self.match_pattern(pat, Access::Root, term, ctx, work, rhs)
126 |         } else {
127 |             self.diagnostics
128 |                 .messages
129 |                 .push(format!("Missing pattern: {}", term.error_string()));
130 | 
131 |             Decision::Failure
132 |         }
133 |     }
134 | 
135 |     fn succeed(
136 |         &mut self,
137 |         mut ctx: Context,
138 |         mut work: Work,
139 |         rhs: RHS,
140 |     ) -> Decision {
141 |         if let Some((mut pats, mut accs, mut terms)) = work.pop() {
142 |             if let (Some(pat), Some(acc), Some(term)) =
143 |                 (pats.pop(), accs.pop(), terms.pop())
144 |             {
145 |                 work.push((pats, accs, terms));
146 |                 self.match_pattern(pat, acc, term, ctx, work, rhs)
147 |             } else {
148 |                 if let Some((con, mut args)) = ctx.pop() {
149 |                     args.reverse();
150 |                     ctx.add_argument_to_last(Term::Pos(con, args));
151 |                 }
152 | 
153 |                 self.succeed(ctx, work, rhs)
154 |             }
155 |         } else {
156 |             self.diagnostics.reachable.insert(rhs.clone());
157 |             Decision::Success(rhs)
158 |         }
159 |     }
160 | 
161 |     fn match_pattern(
162 |         &mut self,
163 |         pattern: Pattern,
164 |         access: Access,
165 |         term: Term,
166 |         mut ctx: Context,
167 |         work: Work,
168 |         rhs: RHS,
169 |     ) -> Decision {
170 |         match pattern {
171 |             Pattern::Variable(name) => {
172 |                 ctx.add_argument_to_last(term);
173 |                 Decision::Variable(
174 |                     access,
175 |                     name,
176 |                     Box::new(self.succeed(ctx, work, rhs)),
177 |                 )
178 |             }
179 |             Pattern::Field(id, pat) => self.match_pattern(
180 |                 *pat,
181 |                 Access::Select(id, Box::new(access)),
182 |                 term,
183 |                 ctx,
184 |                 work,
185 |                 rhs,
186 |             ),
187 |             Pattern::Wildcard => {
188 |                 ctx.add_argument_to_last(term);
189 |                 self.succeed(ctx, work, rhs)
190 |             }
191 |             Pattern::Constructor(con, args) => match self
192 |                 .match_term(&con, &term)
193 |             {
194 |                 Match::Yes => {
195 |                     self.matched(con, args, access, term, ctx, work, rhs)
196 |                 }
197 |                 Match::No => self.fail(ctx.reconstruct_term(term, &work)),
198 |                 Match::Maybe => {
199 |                     let false_term =
200 |                         ctx.reconstruct_term(term.clone().negated(&con), &work);
201 |                     let cursor = self.rules_index;
202 |                     let matched = self.matched(
203 |                         con.clone(),
204 |                         args,
205 |                         access.clone(),
206 |                         term,
207 |                         ctx,
208 |                         work,
209 |                         rhs,
210 |                     );
211 | 
212 |                     self.rules_index = cursor;
213 | 
214 |                     Decision::IfEq(
215 |                         access,
216 |                         con,
217 |                         Box::new(matched),
218 |                         Box::new(self.fail(false_term)),
219 |                     )
220 |                 }
221 |             },
222 |         }
223 |     }
224 | 
225 |     fn matched(
226 |         &mut self,
227 |         con: Constructor,
228 |         args: Vec<Pattern>,
229 |         obj: Access,
230 |         term: Term,
231 |         mut ctx: Context,
232 |         mut work: Work,
233 |         rhs: RHS,
234 |     ) -> Decision {
235 |         let access = (0..con.arity)
236 |             .rev()
237 |             .map(|i| Access::Select(i, Box::new(obj.clone())))
238 |             .collect();
239 | 
240 |         let terms = match term {
241 |             Term::Pos(_, dargs) => dargs,
242 |             Term::Neg(_) => vec![Term::bottom(); con.arity],
243 |         };
244 | 
245 |         ctx.push((con, Vec::new()));
246 |         work.push((args, access, terms));
247 |         self.succeed(ctx, work, rhs)
248 |     }
249 | 
250 |     fn match_term(&mut self, con: &Constructor, term: &Term) -> Match {
251 |         match term {
252 |             Term::Pos(tcon, _) if con == tcon => Match::Yes,
253 |             Term::Pos(_, _) => Match::No,
254 |             Term::Neg(exl) if exl.contains(con) => Match::No,
255 |             Term::Neg(exl) if con.span == (exl.len() + 1) => Match::Yes,
256 |             Term::Neg(_) => Match::Maybe,
257 |         }
258 |     }
259 | 
260 |     fn next_rule(&mut self) -> Option<&(Pattern, RHS)> {
261 |         if self.rules_index >= self.rules.len() {
262 |             None
263 |         } else {
264 |             let val = self.rules.get(self.rules_index);
265 | 
266 |             self.rules_index += 1;
267 | 
268 |             val
269 |         }
270 |     }
271 | }
272 | 
273 | /// A type constructor.
274 | ///
275 | /// For a boolean, a constructor would have the following values:
276 | ///
277 | /// - name: true or false
278 | /// - arity: 0, as booleans don't take arguments
279 | /// - span: 2, as there are only two constructors (true and false)
280 | ///
281 | /// In a real compiler you'd probably use pointers/IDs to your type data
282 | /// structures instead, but for the sake of keeping things simple we just use a
283 | /// struct that can be cloned.
284 | #[derive(Debug, Eq, PartialEq, Clone)]
285 | pub struct Constructor {
286 |     name: String,
287 | 
288 |     // The number of arguments.
289 |     arity: usize,
290 | 
291 |     // The total number of constructors of the owning type
292 |     //
293 |     // A span of 0 means the type has an infinite amount of constructors.
294 |     span: usize,
295 | }
296 | 
297 | /// A user provided pattern to match against an input value.
298 | ///
299 | /// We only provide two types of patterns: constructors, and variables/bindings.
300 | ///
301 | /// In a real compiler you'd probably be using AST nodes instead of dedicated
302 | /// pattern types, and include more cases for specific patterns (e.g. tuple and
303 | /// struct patterns).
304 | #[derive(Debug, Clone)]
305 | pub enum Pattern {
306 |     Constructor(Constructor, Vec<Pattern>),
307 |     Variable(String),
308 |     Field(usize, Box<Pattern>),
309 |     Wildcard,
310 | }
311 | 
312 | #[derive(Debug, Clone, Eq, PartialEq)]
313 | pub enum Term {
314 |     // `Cons` is the top-most constructor, and its components are described by
315 |     // the Vec.
316 |     //
317 |     // The arguments are in reverse order, so the first argument is the last
318 |     // value.
319 |     Pos(Constructor, Vec<Term>),
320 | 
321 |     // Any term who's top-most constructor is _not_ any of the listed
322 |     // constructors.
323 |     //
324 |     // For a Negative(S), the cardinality of S must be less than the span of
325 |     // any constructor in S:
326 |     //
327 |     //     cons.iter().all(|cons| cardinality(s) < span(cons))
328 |     //
329 |     // Due to static typing, all constructors in S are of the same type, thus
330 |     // have the same span.
331 |     //
332 |     // The constructors are in reverse order, so the first constructor is the
333 |     // last value.
334 |     Neg(Vec<Constructor>),
335 | }
336 | 
337 | impl Term {
338 |     fn bottom() -> Term {
339 |         Term::Neg(Vec::new())
340 |     }
341 | 
342 |     fn negated(self, con: &Constructor) -> Term {
343 |         match self {
344 |             Term::Pos(_, _) => self,
345 |             Term::Neg(mut nonset) => {
346 |                 nonset.push(con.clone());
347 |                 Term::Neg(nonset)
348 |             }
349 |         }
350 |     }
351 | }
352 | 
353 | impl Term {
354 |     /// Returns a string used to describe this term in an error message.
355 |     fn error_string(&self) -> String {
356 |         match self {
357 |             Term::Pos(cons, args) => {
358 |                 if args.is_empty() {
359 |                     cons.name.clone()
360 |                 } else {
361 |                     format!(
362 |                         "{}({})",
363 |                         cons.name,
364 |                         args.iter()
365 |                             .rev()
366 |                             .map(|v| v.error_string())
367 |                             .collect::<Vec<_>>()
368 |                             .join(", ")
369 |                     )
370 |                 }
371 |             }
372 |             Term::Neg(_) => "_".to_string(),
373 |         }
374 |     }
375 | }
376 | 
377 | /// The `access` type in the paper.
378 | #[derive(Clone, Debug, Eq, PartialEq)]
379 | pub enum Access {
380 |     Root,
381 |     Select(usize, Box<Access>),
382 | }
383 | 
384 | /// The `decision` type in the paper.
385 | #[derive(Debug, Eq, PartialEq, Clone)]
386 | pub enum Decision {
387 |     /// A pattern didn't match.
388 |     Failure,
389 | 
390 |     /// A pattern is matched and the right-hand value is to be returned.
391 |     Success(RHS),
392 | 
393 |     /// Checks if a constructor matches the value at the given access path.
394 |     IfEq(Access, Constructor, Box<Decision>, Box<Decision>),
395 | 
396 |     /// Checks if any of the given constructors match the value at the given
397 |     /// access path.
398 |     Switch(Access, Vec<(Constructor, Decision)>, Box<Decision>),
399 | 
400 |     /// Bind a value to a variable, then continue matching the rest of the
401 |     /// input.
402 |     Variable(Access, String, Box<Decision>),
403 | }
404 | 
405 | impl Decision {
406 |     /// Replaces a series of nested IfEq nodes for the same access object with a
407 |     /// Switch node.
408 |     pub fn replace_nested_if(self) -> Decision {
409 |         match self {
410 |             Decision::IfEq(root, con, ok, fail) => {
411 |                 let mut cases = vec![(con, *ok)];
412 |                 let mut fallback = fail;
413 | 
414 |                 loop {
415 |                     match *fallback {
416 |                         Decision::IfEq(acc, con, ok, fail) if root == acc => {
417 |                             fallback = fail;
418 | 
419 |                             cases.push((con, *ok));
420 |                         }
421 |                         _ => break,
422 |                     }
423 |                 }
424 | 
425 |                 if cases.len() == 1 {
426 |                     let (con, ok) = cases.pop().unwrap();
427 | 
428 |                     Decision::IfEq(root, con, Box::new(ok), fallback)
429 |                 } else {
430 |                     Decision::Switch(root, cases, fallback)
431 |                 }
432 |             }
433 |             _ => self,
434 |         }
435 |     }
436 | }
437 | 
438 | #[cfg(test)]
439 | mod tests {
440 |     use super::*;
441 | 
442 |     fn con(name: &str, arity: usize, span: usize) -> Constructor {
443 |         Constructor { name: name.to_string(), arity, span }
444 |     }
445 | 
446 |     fn nil() -> Pattern {
447 |         Pattern::Constructor(con("nil", 0, 1), Vec::new())
448 |     }
449 | 
450 |     fn tt_con() -> Constructor {
451 |         con("true", 0, 2)
452 |     }
453 | 
454 |     fn ff_con() -> Constructor {
455 |         con("false", 0, 2)
456 |     }
457 | 
458 |     fn tt() -> Pattern {
459 |         Pattern::Constructor(tt_con(), Vec::new())
460 |     }
461 | 
462 |     fn ff() -> Pattern {
463 |         Pattern::Constructor(ff_con(), Vec::new())
464 |     }
465 | 
466 |     fn pair(a: Pattern, b: Pattern) -> Pattern {
467 |         Pattern::Constructor(con("pair", 2, 1), vec![b, a])
468 |     }
469 | 
470 |     fn var(name: &str) -> Pattern {
471 |         Pattern::Variable(name.to_string())
472 |     }
473 | 
474 |     fn if_eq(
475 |         acc: Access,
476 |         con: Constructor,
477 |         ok: Decision,
478 |         fail: Decision,
479 |     ) -> Decision {
480 |         Decision::IfEq(acc, con, Box::new(ok), Box::new(fail))
481 |     }
482 | 
483 |     fn switch(
484 |         acc: Access,
485 |         cases: Vec<(Constructor, Decision)>,
486 |         fallback: Decision,
487 |     ) -> Decision {
488 |         Decision::Switch(acc, cases, Box::new(fallback))
489 |     }
490 | 
491 |     fn bind(access: Access, name: &str, rest: Decision) -> Decision {
492 |         Decision::Variable(access, name.to_string(), Box::new(rest))
493 |     }
494 | 
495 |     fn success(value: &str) -> Decision {
496 |         Decision::Success(value.to_string())
497 |     }
498 | 
499 |     fn failure() -> Decision {
500 |         Decision::Failure
501 |     }
502 | 
503 |     fn rhs(value: &str) -> String {
504 |         value.to_string()
505 |     }
506 | 
507 |     fn obj() -> Access {
508 |         Access::Root
509 |     }
510 | 
511 |     fn sel(index: usize, acc: Access) -> Access {
512 |         Access::Select(index, Box::new(acc))
513 |     }
514 | 
515 |     fn compile(rules: Vec<(Pattern, RHS)>) -> (Decision, Diagnostics) {
516 |         let mut compiler = Compiler::new(rules);
517 |         let tree = compiler.compile();
518 | 
519 |         (tree, compiler.diagnostics)
520 |     }
521 | 
522 |     #[test]
523 |     fn test_term_description_error_string() {
524 |         let term = Term::Pos(
525 |             con("box", 2, 1),
526 |             vec![
527 |                 Term::Neg(vec![con("false", 0, 2)]),
528 |                 Term::Pos(con("true", 0, 2), Vec::new()),
529 |             ],
530 |         );
531 | 
532 |         assert_eq!(term.error_string(), "box(true, _)");
533 |     }
534 | 
535 |     #[test]
536 |     fn test_context_reconstruct_term() {
537 |         let mut ctx = Context::new();
538 | 
539 |         ctx.push((
540 |             con("baz", 0, 1),
541 |             vec![
542 |                 Term::Neg(vec![con("arg2", 0, 1)]),
543 |                 Term::Neg(vec![con("arg1", 0, 1)]),
544 |             ],
545 |         ));
546 | 
547 |         let work = vec![(
548 |             Vec::new(),
549 |             Vec::new(),
550 |             vec![
551 |                 Term::Neg(vec![con("work2", 0, 1)]),
552 |                 Term::Neg(vec![con("work1", 0, 1)]),
553 |             ],
554 |         )];
555 |         let dsc = Term::Neg(vec![con("bar", 0, 1)]);
556 |         let new_dsc = ctx.reconstruct_term(dsc, &work);
557 | 
558 |         assert_eq!(
559 |             new_dsc,
560 |             Term::Pos(
561 |                 con("baz", 0, 1),
562 |                 vec![
563 |                     Term::Neg(vec![con("work2", 0, 1)]),
564 |                     Term::Neg(vec![con("work1", 0, 1)]),
565 |                     Term::Neg(vec![con("bar", 0, 1)]),
566 |                     Term::Neg(vec![con("arg1", 0, 1)]),
567 |                     Term::Neg(vec![con("arg2", 0, 1)]),
568 |                 ]
569 |             )
570 |         );
571 |     }
572 | 
573 |     #[test]
574 |     fn test_context_add_argument_to_last() {
575 |         let mut ctx = Context::new();
576 | 
577 |         ctx.push((
578 |             con("baz", 0, 1),
579 |             vec![
580 |                 Term::Neg(vec![con("arg2", 0, 1)]),
581 |                 Term::Neg(vec![con("arg1", 0, 1)]),
582 |             ],
583 |         ));
584 | 
585 |         let term = Term::Neg(vec![con("bar", 0, 1)]);
586 | 
587 |         ctx.add_argument_to_last(term);
588 | 
589 |         assert_eq!(
590 |             ctx.values,
591 |             vec![(
592 |                 con("baz", 0, 1),
593 |                 vec![
594 |                     Term::Neg(vec![con("arg2", 0, 1)]),
595 |                     Term::Neg(vec![con("arg1", 0, 1)]),
596 |                     Term::Neg(vec![con("bar", 0, 1)]),
597 |                 ]
598 |             )]
599 |         );
600 |     }
601 | 
602 |     #[test]
603 |     fn test_match_always_succeeds() {
604 |         let (result, _) = compile(vec![(nil(), rhs("true"))]);
605 | 
606 |         assert_eq!(result, success("true"));
607 |     }
608 | 
609 |     #[test]
610 |     fn test_match_always_fails() {
611 |         let (result, _) = compile(Vec::new());
612 | 
613 |         assert_eq!(result, failure());
614 |     }
615 | 
616 |     #[test]
617 |     fn test_match_single_pattern() {
618 |         let (result, _) =
619 |             compile(vec![(tt(), rhs("true")), (ff(), rhs("false"))]);
620 | 
621 |         assert_eq!(
622 |             result,
623 |             if_eq(obj(), tt_con(), success("true"), success("false"))
624 |         );
625 |     }
626 | 
627 |     #[test]
628 |     fn test_match_var() {
629 |         let (result, _) = compile(vec![(var("a"), rhs("true"))]);
630 | 
631 |         assert_eq!(result, bind(obj(), "a", success("true")));
632 |     }
633 | 
634 |     #[test]
635 |     fn test_match_field() {
636 |         let (result, _) = compile(vec![
637 |             (Pattern::Field(42, Box::new(tt())), rhs("foo")),
638 |             (Pattern::Field(42, Box::new(var("a"))), rhs("bar")),
639 |         ]);
640 | 
641 |         assert_eq!(
642 |             result,
643 |             if_eq(
644 |                 sel(42, obj()),
645 |                 tt_con(),
646 |                 success("foo"),
647 |                 bind(sel(42, obj()), "a", success("bar"))
648 |             )
649 |         );
650 |     }
651 | 
652 |     #[test]
653 |     fn test_match_wildcard() {
654 |         let (result, _) = compile(vec![(Pattern::Wildcard, rhs("true"))]);
655 | 
656 |         assert_eq!(result, success("true"));
657 |     }
658 | 
659 |     #[test]
660 |     fn test_match_nested_var() {
661 |         let (result, _) = compile(vec![(pair(var("a"), var("b")), rhs("foo"))]);
662 | 
663 |         assert_eq!(
664 |             result,
665 |             bind(sel(0, obj()), "a", bind(sel(1, obj()), "b", success("foo")))
666 |         );
667 |     }
668 | 
669 |     #[test]
670 |     fn test_match_multiple_patterns() {
671 |         let (result, diags) = compile(vec![
672 |             (tt(), rhs("true")),
673 |             (ff(), rhs("false")),
674 |             (tt(), rhs("redundant")),
675 |         ]);
676 | 
677 |         // Redundant patterns are ignored on the decision tree. This is also how
678 |         // you'd detect redundant patterns: you'd somehow mark every RHS when
679 |         // you produce their Success nodes. Any RHS nodes that remain unmarked
680 |         // are redundant.
681 |         assert_eq!(
682 |             result,
683 |             if_eq(obj(), tt_con(), success("true"), success("false"))
684 |         );
685 | 
686 |         assert!(diags.reachable.contains(&"true".to_string()));
687 |         assert!(diags.reachable.contains(&"false".to_string()));
688 |         assert!(!diags.reachable.contains(&"redundant".to_string()));
689 |     }
690 | 
691 |     #[test]
692 |     fn test_nonexhaustive_match() {
693 |         let (result, diags) = compile(vec![(tt(), rhs("true"))]);
694 | 
695 |         assert_eq!(result, if_eq(obj(), tt_con(), success("true"), failure()));
696 |         assert_eq!(diags.messages, vec!["Missing pattern: _".to_string()]);
697 |     }
698 | 
699 |     #[test]
700 |     fn test_nonexhaustive_match_from_paper() {
701 |         let green = Pattern::Constructor(con("green", 0, 3), Vec::new());
702 |         let (result, diags) = compile(vec![
703 |             (pair(tt(), green.clone()), rhs("111")),
704 |             (pair(ff(), green.clone()), rhs("222")),
705 |         ]);
706 | 
707 |         assert_eq!(
708 |             result,
709 |             if_eq(
710 |                 sel(0, obj()),
711 |                 tt_con(),
712 |                 if_eq(
713 |                     sel(1, obj()),
714 |                     con("green", 0, 3),
715 |                     success("111"),
716 |                     failure()
717 |                 ),
718 |                 if_eq(
719 |                     sel(1, obj()),
720 |                     con("green", 0, 3),
721 |                     success("222"),
722 |                     failure()
723 |                 )
724 |             )
725 |         );
726 | 
727 |         assert_eq!(
728 |             diags.messages,
729 |             vec![
730 |                 "Missing pattern: pair(true, _)".to_string(),
731 |                 "Missing pattern: pair(false, _)".to_string()
732 |             ]
733 |         );
734 |     }
735 | 
736 |     #[test]
737 |     fn test_nested_match() {
738 |         let (result, _) = compile(vec![
739 |             (pair(tt(), tt()), rhs("foo")),
740 |             (pair(tt(), ff()), rhs("bar")),
741 |             (pair(ff(), ff()), rhs("baz")),
742 |             (pair(ff(), tt()), rhs("quix")),
743 |         ]);
744 | 
745 |         assert_eq!(
746 |             result,
747 |             if_eq(
748 |                 sel(0, obj()),
749 |                 tt_con(),
750 |                 if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
751 |                 if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
752 |             )
753 |         );
754 |     }
755 | 
756 |     #[test]
757 |     fn test_match_with_switch() {
758 |         let a = con("a", 0, 4);
759 |         let b = con("b", 0, 4);
760 |         let c = con("c", 0, 4);
761 |         let d = con("d", 0, 4);
762 |         let a_pat = Pattern::Constructor(a.clone(), Vec::new());
763 |         let b_pat = Pattern::Constructor(b.clone(), Vec::new());
764 |         let c_pat = Pattern::Constructor(c.clone(), Vec::new());
765 |         let d_pat = Pattern::Constructor(d.clone(), Vec::new());
766 |         let (result, _) = compile(vec![
767 |             ((a_pat, rhs("a"))),
768 |             ((b_pat, rhs("b"))),
769 |             ((c_pat, rhs("c"))),
770 |             ((d_pat, rhs("d"))),
771 |         ]);
772 | 
773 |         assert_eq!(
774 |             result.replace_nested_if(),
775 |             switch(
776 |                 obj(),
777 |                 vec![(a, success("a")), (b, success("b")), (c, success("c"))],
778 |                 success("d")
779 |             )
780 |         );
781 |     }
782 | 
783 |     #[test]
784 |     fn test_nested_match_without_switch() {
785 |         let (result, _) = compile(vec![
786 |             (pair(tt(), tt()), rhs("foo")),
787 |             (pair(tt(), ff()), rhs("bar")),
788 |             (pair(ff(), ff()), rhs("baz")),
789 |             (pair(ff(), tt()), rhs("quix")),
790 |         ]);
791 | 
792 |         // This doesn't produce a switch, as the nested patterns don't test the
793 |         // same value.
794 |         assert_eq!(
795 |             result.replace_nested_if(),
796 |             if_eq(
797 |                 sel(0, obj()),
798 |                 tt_con(),
799 |                 if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
800 |                 if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
801 |             )
802 |         );
803 |     }
804 | 
805 |     #[test]
806 |     fn test_match_with_args() {
807 |         let some = con("some", 3, 2);
808 |         let (result, _) = compile(vec![
809 |             (
810 |                 Pattern::Constructor(some.clone(), vec![ff(), tt(), tt()]),
811 |                 rhs("foo"),
812 |             ),
813 |             (var("x"), rhs("bar")),
814 |         ]);
815 | 
816 |         assert_eq!(
817 |             result,
818 |             if_eq(
819 |                 obj(),
820 |                 some,
821 |                 if_eq(
822 |                     sel(0, obj()),
823 |                     tt_con(),
824 |                     if_eq(
825 |                         sel(1, obj()),
826 |                         tt_con(),
827 |                         if_eq(
828 |                             sel(2, obj()),
829 |                             ff_con(),
830 |                             success("foo"),
831 |                             bind(obj(), "x", success("bar")),
832 |                         ),
833 |                         bind(obj(), "x", success("bar")),
834 |                     ),
835 |                     bind(obj(), "x", success("bar")),
836 |                 ),
837 |                 bind(obj(), "x", success("bar")),
838 |             )
839 |         );
840 |     }
841 | 
842 |     #[test]
843 |     fn test_match_with_infinite_span() {
844 |         let (result, diag) = compile(vec![(
845 |             Pattern::Constructor(con("int", 0, 0), Vec::new()),
846 |             rhs("foo"),
847 |         )]);
848 | 
849 |         assert_eq!(
850 |             result,
851 |             if_eq(obj(), con("int", 0, 0), success("foo"), failure())
852 |         );
853 |         assert_eq!(diag.messages, vec!["Missing pattern: _"]);
854 |     }
855 | 
856 |     #[test]
857 |     fn test_match_nonexhaustive_with_args() {
858 |         let some = con("some", 3, 2);
859 |         let (result, diags) = compile(vec![(
860 |             Pattern::Constructor(some.clone(), vec![ff(), ff(), tt()]),
861 |             rhs("foo"),
862 |         )]);
863 | 
864 |         assert_eq!(
865 |             result,
866 |             if_eq(
867 |                 obj(),
868 |                 some,
869 |                 if_eq(
870 |                     sel(0, obj()),
871 |                     tt_con(),
872 |                     if_eq(
873 |                         sel(1, obj()),
874 |                         ff_con(),
875 |                         if_eq(
876 |                             sel(2, obj()),
877 |                             ff_con(),
878 |                             success("foo"),
879 |                             failure()
880 |                         ),
881 |                         failure()
882 |                     ),
883 |                     failure()
884 |                 ),
885 |                 failure()
886 |             )
887 |         );
888 | 
889 |         assert_eq!(
890 |             diags.messages,
891 |             vec![
892 |                 "Missing pattern: some(true, false, _)".to_string(),
893 |                 "Missing pattern: some(true, _, _)".to_string(),
894 |                 "Missing pattern: some(_, _, _)".to_string(),
895 |                 "Missing pattern: _".to_string(),
896 |             ]
897 |         );
898 |     }
899 | }
900 | 


--------------------------------------------------------------------------------
/sestoft1996/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![allow(clippy::too_many_arguments)]
2 | 
3 | pub mod idiomatic;
4 | pub mod raw;
5 | 


--------------------------------------------------------------------------------
/sestoft1996/src/raw.rs:
--------------------------------------------------------------------------------
   1 | // This implementation is a more or less 1:1 port of the SML implementation
   2 | // provided in the paper, with only a few changes made to make things work on
   3 | // Rust. For example, the SML implementation assumes multiple ownership of
   4 | // certain values, which isn't allowed in Rust. For the sake of simplicity, we
   5 | // just clone values in this case.
   6 | //
   7 | // This implementation doesn't use the memoization approach briefly mentioned in
   8 | // section 7.5 of the paper. This requires multiple ownership of the tree nodes,
   9 | // or a different way of building the tree/graph (e.g. using IDs). To keep
  10 | // things simple, we skip over this.
  11 | //
  12 | // Because this implementation is more or less a direct translation, it's _not_
  13 | // idiomatic Rust. An idiomatic implementation is provided separately.
  14 | //
  15 | // The Moscow ML compiler uses hash consing and a DAG as discussed in section
  16 | // 7.5 of the paper.
  17 | use std::collections::HashSet;
  18 | use std::fmt;
  19 | use std::rc::Rc;
  20 | 
  21 | /// An immutable linked list.
  22 | ///
  23 | /// The algorithm as presented in the paper makes use of and requires immutable
  24 | /// lists. For example, when compiling the `IfEq` nodes it compiles two
  25 | /// different branches, but assumes work start off with the same set of rules,
  26 | /// `work` values, etc. Since we're trying to stay as close to the paper as
  27 | /// possible, we also follow the use of immutable data types.
  28 | ///
  29 | /// Like the rest of this implementation we're focusing on keeping things as
  30 | /// simple as is reasonable, rather than making the implementation efficient.
  31 | #[derive(Eq, PartialEq)]
  32 | struct Node<T> {
  33 |     value: T,
  34 |     next: Option<Rc<Node<T>>>,
  35 | }
  36 | 
  37 | #[derive(Clone, Eq, PartialEq)]
  38 | pub struct List<T> {
  39 |     head: Option<Rc<Node<T>>>,
  40 |     len: usize,
  41 | }
  42 | 
  43 | impl<T> List<T> {
  44 |     fn new() -> List<T> {
  45 |         List { head: None, len: 0 }
  46 |     }
  47 | 
  48 |     /// Returns a new list starting with the given value.
  49 |     fn add(&self, value: T) -> List<T> {
  50 |         List {
  51 |             head: Some(Rc::new(Node { value, next: self.head.clone() })),
  52 |             len: self.len + 1,
  53 |         }
  54 |     }
  55 | 
  56 |     /// Splits a list into the head and a list of the nodes that follow it.
  57 |     fn split(&self) -> (Option<&T>, List<T>) {
  58 |         if let Some(n) = self.head.as_ref() {
  59 |             (Some(&n.value), List { head: n.next.clone(), len: self.len - 1 })
  60 |         } else {
  61 |             (None, List { head: self.head.clone(), len: self.len })
  62 |         }
  63 |     }
  64 | 
  65 |     fn iter(&self) -> ListIter<T> {
  66 |         ListIter { node: self.head.as_ref() }
  67 |     }
  68 | 
  69 |     fn len(&self) -> usize {
  70 |         self.len
  71 |     }
  72 | 
  73 |     fn is_empty(&self) -> bool {
  74 |         self.head.is_none()
  75 |     }
  76 | }
  77 | 
  78 | impl<T: Eq + PartialEq> List<T> {
  79 |     fn contains(&self, value: &T) -> bool {
  80 |         self.iter().any(|v| v == value)
  81 |     }
  82 | }
  83 | 
  84 | impl<T: Clone> List<T> {
  85 |     /// Merges `self` and `other`.
  86 |     fn merge(&self, other: List<T>) -> List<T> {
  87 |         let mut new_list = List::new();
  88 | 
  89 |         for value in self.iter().chain(other.iter()) {
  90 |             new_list = new_list.add(value.clone());
  91 |         }
  92 | 
  93 |         new_list.rev()
  94 |     }
  95 | 
  96 |     /// Returns a new list with the values in reverse order.
  97 |     fn rev(&self) -> List<T> {
  98 |         let mut new_list = List::new();
  99 | 
 100 |         for v in self.iter() {
 101 |             new_list = new_list.add(v.clone());
 102 |         }
 103 | 
 104 |         new_list
 105 |     }
 106 | }
 107 | 
 108 | impl<T: fmt::Debug> fmt::Debug for List<T> {
 109 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 110 |         f.debug_list().entries(self.iter()).finish()
 111 |     }
 112 | }
 113 | 
 114 | /// An iterator over the values in an immutable list.
 115 | struct ListIter<'a, T> {
 116 |     node: Option<&'a Rc<Node<T>>>,
 117 | }
 118 | 
 119 | impl<'a, T> Iterator for ListIter<'a, T> {
 120 |     type Item = &'a T;
 121 | 
 122 |     fn next(&mut self) -> Option<Self::Item> {
 123 |         if let Some(node) = self.node.take() {
 124 |             self.node = node.next.as_ref();
 125 | 
 126 |             Some(&node.value)
 127 |         } else {
 128 |             None
 129 |         }
 130 |     }
 131 | }
 132 | 
 133 | /// The type used for storing diagnostic messages.
 134 | pub struct Diagnostics {
 135 |     /// The diagnostic messages produced.
 136 |     ///
 137 |     /// In a real compiler this would include more than just a message, such as
 138 |     /// the line and numbers.
 139 |     messages: Vec<String>,
 140 | 
 141 |     /// The right-hand values (= the code you'd run upon a match) that have been
 142 |     /// processed.
 143 |     ///
 144 |     /// If a value isn't included in this set it means it and its pattern are
 145 |     /// redundant.
 146 |     ///
 147 |     /// In a real compiler you'd probably mark AST nodes directly. In our case
 148 |     /// the right-hand values are just simple strings, so we use a set instead.
 149 |     reachable: HashSet<RHS>,
 150 | }
 151 | 
 152 | /// The `con` (= constructor) type in the paper.
 153 | ///
 154 | /// For a boolean, a constructor would have the following values:
 155 | ///
 156 | /// - name: true or false
 157 | /// - arity: 0, as booleans don't take arguments
 158 | /// - span: 2, as there are only two constructors (true and false)
 159 | ///
 160 | /// In a real compiler you'd probably use pointers/IDs to your type data
 161 | /// structures instead, but for the sake of keeping things simple we just use a
 162 | /// struct that can be cloned.
 163 | #[derive(Debug, Eq, PartialEq, Clone)]
 164 | pub struct Con {
 165 |     name: String,
 166 | 
 167 |     // The number of arguments.
 168 |     arity: usize,
 169 | 
 170 |     // The total number of constructors of the owning type
 171 |     //
 172 |     // A span of 0 means the type has an infinite amount of constructors.
 173 |     span: usize,
 174 | }
 175 | 
 176 | /// A user provided pattern to match against an input value.
 177 | ///
 178 | /// We only provide two types of patterns: constructors, and variables/bindings.
 179 | ///
 180 | /// In a real compiler you'd probably be using AST nodes instead of dedicated
 181 | /// pattern types, and include more cases for specific patterns (e.g. tuple and
 182 | /// struct patterns).
 183 | #[derive(Debug, Clone)]
 184 | pub enum Pattern {
 185 |     Cons(Con, List<Pattern>),
 186 |     Var(String),
 187 | }
 188 | 
 189 | /// The `termd` type from the paper.
 190 | #[derive(Debug, Clone, Eq, PartialEq)]
 191 | pub enum TermDesc {
 192 |     // `Cons` is the top-most constructor, and its components are described by
 193 |     // the Vec.
 194 |     Pos(Con, List<TermDesc>),
 195 | 
 196 |     // Any term who's top-most constructor is _not_ any of the listed
 197 |     // constructors.
 198 |     //
 199 |     // For a Negative(S), the cardinality of S must be less than the span of
 200 |     // any constructor in S:
 201 |     //
 202 |     //     cons.iter().all(|cons| cardinality(s) < span(cons))
 203 |     //
 204 |     // Due to static typing, all constructors in S are of the same type, thus
 205 |     // have the same span.
 206 |     Neg(List<Con>),
 207 | }
 208 | 
 209 | impl TermDesc {
 210 |     /// Returns a string used to describe this term in an error message.
 211 |     ///
 212 |     /// In a real compiler you'd do the following:
 213 |     ///
 214 |     /// For a Pos, just display the pattern/type/whatever name
 215 |     ///
 216 |     /// For a Neg(list), obtain all possible values from the constructor, ignore
 217 |     /// those in "list", then produce a name using the remaining values. So if
 218 |     /// "list" is `[red]`, and the possible values are `[red, green, blue]`, the
 219 |     /// returned string could be `green | blue`. If this is nested inside a
 220 |     /// `Pos("tuple", ...)` node you'd end up with something like
 221 |     /// `tuple(green | blue)`.
 222 |     ///
 223 |     /// For the sake of simplicity we just return `_` for a Neg.
 224 |     fn error_string(&self) -> String {
 225 |         match self {
 226 |             TermDesc::Pos(cons, args) => {
 227 |                 if args.is_empty() {
 228 |                     cons.name.clone()
 229 |                 } else {
 230 |                     format!(
 231 |                         "{}({})",
 232 |                         cons.name,
 233 |                         args.iter()
 234 |                             .map(|v| v.error_string())
 235 |                             .collect::<Vec<_>>()
 236 |                             .join(", ")
 237 |                     )
 238 |                 }
 239 |             }
 240 |             TermDesc::Neg(_) => "_".to_string(),
 241 |         }
 242 |     }
 243 | }
 244 | 
 245 | /// The `access` type in the paper.
 246 | #[derive(Clone, Debug, Eq, PartialEq)]
 247 | pub enum Access {
 248 |     Obj,
 249 |     Sel(usize, Box<Access>),
 250 | }
 251 | 
 252 | /// The `decision` type in the paper.
 253 | #[derive(Debug, Eq, PartialEq, Clone)]
 254 | pub enum Decision {
 255 |     /// A pattern didn't match.
 256 |     Failure,
 257 | 
 258 |     /// A pattern is matched and the right-hand value is to be returned.
 259 |     Success(RHS),
 260 | 
 261 |     /// Checks if a constructor matches the value at the given access path.
 262 |     ///
 263 |     /// The members are as follows:
 264 |     ///
 265 |     /// 1. The value to test against
 266 |     /// 2. The pattern/value to match against
 267 |     /// 3. The path to take upon a match
 268 |     /// 4. The path to take upon a failure
 269 |     ///
 270 |     /// A node like this:
 271 |     ///
 272 |     ///     IfEq(Sel(0, Obj), x, ok, err)
 273 |     ///
 274 |     /// Translates to roughly the following pseudo code:
 275 |     ///
 276 |     ///     if obj.0 is x {
 277 |     ///       ok
 278 |     ///     } else {
 279 |     ///       err
 280 |     ///     }
 281 |     IfEq(Access, Con, Box<Decision>, Box<Decision>),
 282 | 
 283 |     /// Checks if any of the given constructors match the value at the given
 284 |     /// access path.
 285 |     ///
 286 |     /// The members are as follows:
 287 |     ///
 288 |     /// 1. The value to test against
 289 |     /// 2. The list of constructors to test and their corresponding decisions
 290 |     /// 3. A fallback decision in case no patterns match
 291 |     ///
 292 |     /// The fallback is needed because given a type with a span of N, IfEq nodes
 293 |     /// only test N-1 constructors, as the last possible constructor is
 294 |     /// implicitly assumed in the IfEq node's "else" body. That is, IfEq tests
 295 |     /// are like this:
 296 |     ///
 297 |     ///     if value is green {
 298 |     ///       ...
 299 |     ///     } else {
 300 |     ///       if value is red {
 301 |     ///         ...
 302 |     ///       } else {
 303 |     ///         ..
 304 |     ///       }
 305 |     ///     }
 306 |     ///
 307 |     /// And not like this:
 308 |     ///
 309 |     ///     if value is green {
 310 |     ///       ...
 311 |     ///     } else if value is red {
 312 |     ///       ...
 313 |     ///     } else if value is blue {
 314 |     ///       ...
 315 |     ///     }
 316 |     ///
 317 |     /// A real compiler may have to somehow "lift" the fallback into a
 318 |     /// switch/jump case.
 319 |     Switch(Access, List<(Con, Decision)>, Box<Decision>),
 320 | }
 321 | 
 322 | /// The result of the `staticmatch` (or in our case `static_match`) function.
 323 | #[derive(Debug)]
 324 | enum StaticMatch {
 325 |     Yes,
 326 |     No,
 327 |     Maybe,
 328 | }
 329 | 
 330 | /// `type context = (con * termd list) list` in the paper.
 331 | type Context = List<(Con, List<TermDesc>)>;
 332 | 
 333 | /// The work stack as used in the paper.
 334 | ///
 335 | /// The paper uses a list of triple lists, removing the need for some append
 336 | /// operations. This is a bit annoying to work with in Rust (we have to unwrap()
 337 | /// in some places), but again we're trying to stay as close to the paper as
 338 | /// possible.
 339 | ///
 340 | /// We use a type alias here so we don't have to re-type this type name in the
 341 | /// various places that it's used.
 342 | type Work = List<(List<Pattern>, List<Access>, List<TermDesc>)>;
 343 | 
 344 | /// The type of the right-hand side of a case (i.e. the code to run).
 345 | ///
 346 | /// For the sake of simplicity we just use a String here. In a real compiler
 347 | /// this would probably be an AST node or another sort of IR to run upon a
 348 | /// match.
 349 | pub type RHS = String;
 350 | 
 351 | /// The `addneg` function in the paper.
 352 | fn addneg(dsc: TermDesc, con: Con) -> TermDesc {
 353 |     match dsc {
 354 |         // The paper introduces this function as a non-exhaustive function. The
 355 |         // implementation in the Moscow ML compiler just returns the term when
 356 |         // it's a Pos, so we do the same.
 357 |         TermDesc::Pos(_, _) => dsc,
 358 |         TermDesc::Neg(nonset) => TermDesc::Neg(nonset.add(con)),
 359 |     }
 360 | }
 361 | 
 362 | /// The `staticmatch` function in the paper.
 363 | fn staticmatch(pcon: &Con, term: &TermDesc) -> StaticMatch {
 364 |     match term {
 365 |         TermDesc::Pos(scon, _) => {
 366 |             if pcon == scon {
 367 |                 StaticMatch::Yes
 368 |             } else {
 369 |                 StaticMatch::No
 370 |             }
 371 |         }
 372 |         TermDesc::Neg(excluded) => {
 373 |             if excluded.contains(pcon) {
 374 |                 StaticMatch::No
 375 |             } else if pcon.span == (excluded.len() + 1) {
 376 |                 // The way this works is as follows:
 377 |                 //
 378 |                 // A boolean has a span of two, as it has two constructors (true
 379 |                 // and false).
 380 |                 //
 381 |                 // The `if` above means we determined our constructor IS NOT in
 382 |                 // the deny list. Due to static typing, our list can't
 383 |                 // contain unrelated constructors (e.g. an ADT constructor).
 384 |                 //
 385 |                 // Thus, if the length of the deny list is one less than the
 386 |                 // span of our type, we know for a fact our constructor matches
 387 |                 // the remaining constructor.
 388 |                 //
 389 |                 // In other words: we know we are NOT A, B, and C, and the only
 390 |                 // remaining option is D. Thus, we match D.
 391 |                 StaticMatch::Yes
 392 |             } else {
 393 |                 StaticMatch::Maybe
 394 |             }
 395 |         }
 396 |     }
 397 | }
 398 | 
 399 | /// The equivalent of `List.tabulate` as used in the paper.
 400 | ///
 401 | /// This function is kind of pointless in Rust as we could just use map(), but
 402 | /// we try to stay as close to the paper as possible in this implementation.
 403 | fn tabulate<T, F: Fn(usize) -> T>(length: usize, func: F) -> List<T> {
 404 |     let mut list = List::new();
 405 | 
 406 |     for val in (0..length).rev() {
 407 |         list = list.add(func(val));
 408 |     }
 409 | 
 410 |     list
 411 | }
 412 | 
 413 | fn args<T, F: Fn(usize) -> T>(pcon: &Con, func: F) -> List<T> {
 414 |     tabulate(pcon.arity, func)
 415 | }
 416 | 
 417 | fn getdargs(pcon: &Con, term: TermDesc) -> List<TermDesc> {
 418 |     match term {
 419 |         TermDesc::Pos(_, dargs) => dargs,
 420 |         TermDesc::Neg(_) => {
 421 |             tabulate(pcon.arity, |_| TermDesc::Neg(List::new()))
 422 |         }
 423 |     }
 424 | }
 425 | 
 426 | fn getoargs(pcon: &Con, acc: Access) -> List<Access> {
 427 |     // The paper uses `i+1`, presumably because humans use "1" to address the
 428 |     // first element (or maybe this is an SML thing?). Unfortunately, this isn't
 429 |     // clarified in the paper. Since it doesn't seem to actually matter, and
 430 |     // basically everyting is 0-indexed, we drop the +1 here.
 431 |     args(pcon, |i| Access::Sel(i, Box::new(acc.clone())))
 432 | }
 433 | 
 434 | fn augment(ctx: Context, dsc: TermDesc) -> Context {
 435 |     let (val, rest) = ctx.split();
 436 | 
 437 |     if let Some((con, args)) = val {
 438 |         rest.add((con.clone(), args.add(dsc)))
 439 |     } else {
 440 |         rest
 441 |     }
 442 | }
 443 | 
 444 | fn norm(ctx: Context) -> Context {
 445 |     let (val, rest) = ctx.split();
 446 | 
 447 |     if let Some((con, args)) = val {
 448 |         augment(rest, TermDesc::Pos(con.clone(), args.rev()))
 449 |     } else {
 450 |         rest
 451 |     }
 452 | }
 453 | 
 454 | fn builddsc(ctx: Context, dsc: TermDesc, work: Work) -> TermDesc {
 455 |     if let (Some((con, args)), rest) = ctx.split() {
 456 |         let (job, workr) = work.split();
 457 |         let (_, _, dargs) = job.unwrap();
 458 | 
 459 |         // The paper uses the following code for this:
 460 |         //
 461 |         //     rev args @ (dsc :: dargs)
 462 |         //
 463 |         // SML parses this as follows:
 464 |         //
 465 |         //     (rev args)   @   (dsc :: dargs)
 466 |         //
 467 |         // That is: it first reverses `args`, then appends the result of
 468 |         // `(dsc :: dargs)` to it. If you were to _first_ merge the values and
 469 |         // then reverse, you'd get incorrect decision trees. Unfortunately, I
 470 |         // ran into exactly that bug, and it took me a few hours to figure out.
 471 |         // And this is why functions with arguments should use parentheses and
 472 |         // commas :)
 473 |         let new_dsc =
 474 |             TermDesc::Pos(con.clone(), args.rev().merge(dargs.add(dsc)));
 475 | 
 476 |         builddsc(rest, new_dsc, workr)
 477 |     } else {
 478 |         dsc
 479 |     }
 480 | }
 481 | 
 482 | fn fail(
 483 |     dsc: TermDesc,
 484 |     rules: List<(Pattern, RHS)>,
 485 |     diags: &mut Diagnostics,
 486 | ) -> Decision {
 487 |     if let (Some((pat1, rhs1)), rulesr) = rules.split() {
 488 |         matches(
 489 |             pat1.clone(),
 490 |             Access::Obj,
 491 |             dsc,
 492 |             List::new(),
 493 |             List::new(),
 494 |             rhs1.clone(),
 495 |             rulesr,
 496 |             diags,
 497 |         )
 498 |     } else {
 499 |         diags.messages.push(format!("Missing pattern: {}", dsc.error_string()));
 500 |         Decision::Failure
 501 |     }
 502 | }
 503 | 
 504 | fn succeed(
 505 |     ctx: Context,
 506 |     work: Work,
 507 |     rhs: RHS,
 508 |     rules: List<(Pattern, RHS)>,
 509 |     diags: &mut Diagnostics,
 510 | ) -> Decision {
 511 |     if let (Some((pats, accs, dscs)), workr) = work.split() {
 512 |         if pats.is_empty() && accs.is_empty() && dscs.is_empty() {
 513 |             succeed(norm(ctx), workr, rhs, rules, diags)
 514 |         } else {
 515 |             let (pat1, patr) = pats.split();
 516 |             let (obj1, objr) = accs.split();
 517 |             let (dsc1, dscr) = dscs.split();
 518 | 
 519 |             matches(
 520 |                 pat1.unwrap().clone(),
 521 |                 obj1.unwrap().clone(),
 522 |                 dsc1.unwrap().clone(),
 523 |                 ctx,
 524 |                 workr.add((patr, objr, dscr)),
 525 |                 rhs,
 526 |                 rules,
 527 |                 diags,
 528 |             )
 529 |         }
 530 |     } else {
 531 |         diags.reachable.insert(rhs.clone());
 532 |         Decision::Success(rhs)
 533 |     }
 534 | }
 535 | 
 536 | /// This corresponds to the inner function `succeed'` in the paper.
 537 | fn match_succeed(
 538 |     pcon: Con,
 539 |     pargs: List<Pattern>,
 540 |     obj: Access,
 541 |     dsc: TermDesc,
 542 |     ctx: Context,
 543 |     work: Work,
 544 |     rhs: RHS,
 545 |     rules: List<(Pattern, RHS)>,
 546 |     diags: &mut Diagnostics,
 547 | ) -> Decision {
 548 |     let oargs = getoargs(&pcon, obj);
 549 |     let dargs = getdargs(&pcon, dsc);
 550 | 
 551 |     succeed(
 552 |         ctx.add((pcon, List::new())),
 553 |         work.add((pargs, oargs, dargs)),
 554 |         rhs,
 555 |         rules,
 556 |         diags,
 557 |     )
 558 | }
 559 | 
 560 | /// This corresponds to the inner function `fail'` in the paper.
 561 | fn match_fail(
 562 |     newdsc: TermDesc,
 563 |     ctx: Context,
 564 |     work: Work,
 565 |     rules: List<(Pattern, RHS)>,
 566 |     diags: &mut Diagnostics,
 567 | ) -> Decision {
 568 |     fail(builddsc(ctx, newdsc, work), rules, diags)
 569 | }
 570 | 
 571 | fn matches(
 572 |     pat1: Pattern,
 573 |     obj: Access,
 574 |     dsc: TermDesc,
 575 |     ctx: Context,
 576 |     work: Work,
 577 |     rhs: RHS,
 578 |     rules: List<(Pattern, RHS)>,
 579 |     diags: &mut Diagnostics,
 580 | ) -> Decision {
 581 |     match pat1 {
 582 |         Pattern::Var(_) => succeed(augment(ctx, dsc), work, rhs, rules, diags),
 583 |         Pattern::Cons(pcon, pargs) => match staticmatch(&pcon, &dsc) {
 584 |             StaticMatch::Yes => match_succeed(
 585 |                 pcon, pargs, obj, dsc, ctx, work, rhs, rules, diags,
 586 |             ),
 587 |             StaticMatch::No => match_fail(dsc, ctx, work, rules, diags),
 588 |             StaticMatch::Maybe => {
 589 |                 // In the paper the equivalent code makes two assumptions that
 590 |                 // don't work in Rust:
 591 |                 //
 592 |                 // 1. Certain values can have multiple owners (e.g. the `dsc`
 593 |                 //    value is shared between functions).
 594 |                 // 2. When building the subtree for a matched value, the
 595 |                 //    algorithm expects that variables such as `work` and
 596 |                 //    `rules` _are not_ modified in place. If they are,
 597 |                 //    generating the subtree for an unmatched value produces
 598 |                 //    incorrect results.
 599 |                 //
 600 |                 // In case of shared ownership we just clone the values. In a
 601 |                 // real compiler that probably wouldn't work very well, but for
 602 |                 // the sake of this implementation it's good enough.
 603 |                 Decision::IfEq(
 604 |                     obj.clone(),
 605 |                     pcon.clone(),
 606 |                     Box::new(match_succeed(
 607 |                         pcon.clone(),
 608 |                         pargs,
 609 |                         obj,
 610 |                         dsc.clone(),
 611 |                         ctx.clone(),
 612 |                         work.clone(),
 613 |                         rhs,
 614 |                         rules.clone(),
 615 |                         diags,
 616 |                     )),
 617 |                     Box::new(match_fail(
 618 |                         addneg(dsc, pcon),
 619 |                         ctx,
 620 |                         work,
 621 |                         rules,
 622 |                         diags,
 623 |                     )),
 624 |                 )
 625 |             }
 626 |         },
 627 |     }
 628 | }
 629 | 
 630 | /// Recursively collects cases for a Switch node.
 631 | ///
 632 | /// This is based on the `collect` function as found in the Moscow ML compiler.
 633 | fn collect(
 634 |     root_acc: &Access,
 635 |     cases: List<(Con, Decision)>,
 636 |     decision: Decision,
 637 | ) -> (List<(Con, Decision)>, Decision) {
 638 |     match decision {
 639 |         Decision::IfEq(acc, con, ok, fail) if root_acc == &acc => {
 640 |             let (cases, dec) = collect(root_acc, cases, *fail);
 641 | 
 642 |             // We add our case _after_ recursing, ensuring the order of values
 643 |             // in the list is the same as the order of matches. If we were to
 644 |             // add _before_ recursing, the list would be in reverse order.
 645 |             (cases.add((con, *ok)), dec)
 646 |         }
 647 |         _ => (cases, decision),
 648 |     }
 649 | }
 650 | 
 651 | /// Replacing a series of nested IfEq nodes for the same access object with a
 652 | /// Switch node.
 653 | pub fn switchify(tree: Decision) -> Decision {
 654 |     match tree {
 655 |         Decision::IfEq(acc, con, ok, fail) => {
 656 |             let (cases, fallback) = collect(&acc, List::new(), *fail);
 657 | 
 658 |             if cases.is_empty() {
 659 |                 Decision::IfEq(acc, con, ok, Box::new(fallback))
 660 |             } else {
 661 |                 Decision::Switch(acc, cases.add((con, *ok)), Box::new(fallback))
 662 |             }
 663 |         }
 664 |         _ => tree,
 665 |     }
 666 | }
 667 | 
 668 | /// Compiles a list of rules into a decision tree.
 669 | pub fn compile(rules: List<(Pattern, RHS)>) -> (Decision, Diagnostics) {
 670 |     let mut diags =
 671 |         Diagnostics { messages: Vec::new(), reachable: HashSet::new() };
 672 | 
 673 |     (fail(TermDesc::Neg(List::new()), rules, &mut diags), diags)
 674 | }
 675 | 
 676 | #[cfg(test)]
 677 | mod tests {
 678 |     use super::*;
 679 | 
 680 |     /// A macro for creating a linked list.
 681 |     ///
 682 |     /// Rust has no (linked) list literals, so we use this macro instead.
 683 |     /// Basically whenever you have the SML expression `[a; b; c]`, you'd
 684 |     /// instead use `list![a, b, c]`.
 685 |     ///
 686 |     /// When creating a list using this macro, the values are added to the end
 687 |     /// of the list.
 688 |     macro_rules! list {
 689 |         ($($value: expr),*$(,)?) => {{
 690 |             let temp = vec![$($value),*];
 691 |             let mut list = List::new();
 692 | 
 693 |             for val in temp.into_iter().rev() {
 694 |                 list = list.add(val);
 695 |             }
 696 | 
 697 |             list
 698 |         }}
 699 |     }
 700 | 
 701 |     fn con(name: &str, arity: usize, span: usize) -> Con {
 702 |         Con { name: name.to_string(), arity, span }
 703 |     }
 704 | 
 705 |     fn nil() -> Pattern {
 706 |         Pattern::Cons(con("nil", 0, 1), List::new())
 707 |     }
 708 | 
 709 |     fn tt_con() -> Con {
 710 |         con("true", 0, 2)
 711 |     }
 712 | 
 713 |     fn ff_con() -> Con {
 714 |         con("false", 0, 2)
 715 |     }
 716 | 
 717 |     fn tt() -> Pattern {
 718 |         Pattern::Cons(tt_con(), List::new())
 719 |     }
 720 | 
 721 |     fn ff() -> Pattern {
 722 |         Pattern::Cons(ff_con(), List::new())
 723 |     }
 724 | 
 725 |     fn pair(a: Pattern, b: Pattern) -> Pattern {
 726 |         Pattern::Cons(con("pair", 2, 1), list![a, b])
 727 |     }
 728 | 
 729 |     fn var(name: &str) -> Pattern {
 730 |         Pattern::Var(name.to_string())
 731 |     }
 732 | 
 733 |     fn if_eq(acc: Access, con: Con, ok: Decision, fail: Decision) -> Decision {
 734 |         Decision::IfEq(acc, con, Box::new(ok), Box::new(fail))
 735 |     }
 736 | 
 737 |     fn switch(
 738 |         acc: Access,
 739 |         cases: List<(Con, Decision)>,
 740 |         fallback: Decision,
 741 |     ) -> Decision {
 742 |         Decision::Switch(acc, cases, Box::new(fallback))
 743 |     }
 744 | 
 745 |     fn success(value: &str) -> Decision {
 746 |         Decision::Success(value.to_string())
 747 |     }
 748 | 
 749 |     fn failure() -> Decision {
 750 |         Decision::Failure
 751 |     }
 752 | 
 753 |     fn rhs(value: &str) -> String {
 754 |         value.to_string()
 755 |     }
 756 | 
 757 |     fn obj() -> Access {
 758 |         Access::Obj
 759 |     }
 760 | 
 761 |     fn sel(index: usize, acc: Access) -> Access {
 762 |         Access::Sel(index, Box::new(acc))
 763 |     }
 764 | 
 765 |     #[test]
 766 |     fn test_list_push_pop() {
 767 |         let list1 = List::new();
 768 |         let list2 = list1.add(10);
 769 |         let list3 = list2.add(20);
 770 | 
 771 |         assert!(list1.head.is_none());
 772 |         assert!(list2.head.is_some());
 773 |         assert!(list3.head.is_some());
 774 | 
 775 |         assert_eq!(list2.split().0, Some(&10));
 776 |         assert_eq!(list2.split().0, Some(&10));
 777 |         assert_eq!(list3.split().0, Some(&20));
 778 |     }
 779 | 
 780 |     #[test]
 781 |     fn test_list_rev() {
 782 |         let list1 = list![3, 2, 1];
 783 |         let list2 = list1.rev();
 784 | 
 785 |         assert_eq!(list1.iter().collect::<Vec<_>>(), vec![&3, &2, &1]);
 786 |         assert_eq!(list2.iter().collect::<Vec<_>>(), vec![&1, &2, &3]);
 787 |     }
 788 | 
 789 |     #[test]
 790 |     fn test_list_rev_and_merge() {
 791 |         let list1 = list![3, 2, 1];
 792 |         let list2 = list![4];
 793 |         let list3 = list1.rev().merge(list2.add(10));
 794 | 
 795 |         assert_eq!(list3.iter().collect::<Vec<_>>(), vec![&1, &2, &3, &10, &4]);
 796 |     }
 797 | 
 798 |     #[test]
 799 |     fn test_list_merge() {
 800 |         let list1 = list![1, 2];
 801 |         let list2 = list![3, 4];
 802 |         let list3 = list1.merge(list2);
 803 | 
 804 |         assert_eq!(list3.iter().collect::<Vec<_>>(), vec![&1, &2, &3, &4]);
 805 |     }
 806 | 
 807 |     #[test]
 808 |     fn test_term_desc_error_string() {
 809 |         let term = TermDesc::Pos(
 810 |             con("box", 2, 1),
 811 |             list![
 812 |                 TermDesc::Pos(con("true", 0, 2), List::new()),
 813 |                 TermDesc::Neg(list![con("false", 0, 2)])
 814 |             ],
 815 |         );
 816 | 
 817 |         assert_eq!(term.error_string(), "box(true, _)");
 818 |     }
 819 | 
 820 |     #[test]
 821 |     fn test_tabulate() {
 822 |         let vals = tabulate(3, |v| v);
 823 | 
 824 |         assert_eq!(vals.iter().collect::<Vec<_>>(), vec![&0, &1, &2]);
 825 |     }
 826 | 
 827 |     #[test]
 828 |     fn test_args() {
 829 |         let con = con("box", 2, 1);
 830 |         let vals = args(&con, |v| v);
 831 | 
 832 |         assert_eq!(vals.iter().collect::<Vec<_>>(), vec![&0, &1]);
 833 |     }
 834 | 
 835 |     #[test]
 836 |     fn test_getdargs_with_pos_term() {
 837 |         let con = con("box", 2, 1);
 838 |         let term =
 839 |             TermDesc::Pos(con.clone(), list![TermDesc::Neg(List::new())]);
 840 |         let args = getdargs(&con, term);
 841 |         let arg = args.iter().next();
 842 | 
 843 |         assert!(matches!(arg, Some(TermDesc::Neg(_))));
 844 |     }
 845 | 
 846 |     #[test]
 847 |     fn test_getdargs_with_neg_term() {
 848 |         let con = con("box", 2, 1);
 849 |         let term = TermDesc::Neg(List::new());
 850 |         let args = getdargs(&con, term);
 851 |         let mut iter = args.iter();
 852 | 
 853 |         assert!(matches!(iter.next(), Some(TermDesc::Neg(_))));
 854 |         assert!(matches!(iter.next(), Some(TermDesc::Neg(_))));
 855 |     }
 856 | 
 857 |     #[test]
 858 |     fn test_getoargs() {
 859 |         let con = con("box", 2, 1);
 860 |         let acc = sel(42, obj());
 861 |         let args = getoargs(&con, acc);
 862 | 
 863 |         assert_eq!(
 864 |             args.iter().collect::<Vec<_>>(),
 865 |             vec![&sel(0, sel(42, obj())), &sel(1, sel(42, obj()))]
 866 |         );
 867 |     }
 868 | 
 869 |     #[test]
 870 |     fn test_builddsc() {
 871 |         let ctx = list![(
 872 |             con("baz", 0, 1),
 873 |             list![
 874 |                 TermDesc::Neg(list![con("arg1", 0, 1)]),
 875 |                 TermDesc::Neg(list![con("arg2", 0, 1)]),
 876 |             ]
 877 |         )];
 878 |         let work = list![(
 879 |             List::new(),
 880 |             List::new(),
 881 |             list![
 882 |                 TermDesc::Neg(list![con("work1", 0, 1)]),
 883 |                 TermDesc::Neg(list![con("work2", 0, 1)])
 884 |             ]
 885 |         )];
 886 |         let dsc = TermDesc::Neg(list![con("bar", 0, 1)]);
 887 |         let new_dsc = builddsc(ctx, dsc, work);
 888 | 
 889 |         assert_eq!(
 890 |             new_dsc,
 891 |             TermDesc::Pos(
 892 |                 con("baz", 0, 1),
 893 |                 list![
 894 |                     TermDesc::Neg(list![con("arg2", 0, 1)]),
 895 |                     TermDesc::Neg(list![con("arg1", 0, 1)]),
 896 |                     TermDesc::Neg(list![con("bar", 0, 1)]),
 897 |                     TermDesc::Neg(list![con("work1", 0, 1)]),
 898 |                     TermDesc::Neg(list![con("work2", 0, 1)]),
 899 |                 ]
 900 |             )
 901 |         );
 902 |     }
 903 | 
 904 |     #[test]
 905 |     fn test_augment() {
 906 |         let ctx = list![(
 907 |             con("baz", 0, 1),
 908 |             list![
 909 |                 TermDesc::Neg(list![con("arg1", 0, 1)]),
 910 |                 TermDesc::Neg(list![con("arg2", 0, 1)]),
 911 |             ]
 912 |         )];
 913 | 
 914 |         let dsc = TermDesc::Neg(list![con("bar", 0, 1)]);
 915 |         let new_ctx = augment(ctx, dsc);
 916 | 
 917 |         assert_eq!(
 918 |             new_ctx,
 919 |             list![(
 920 |                 con("baz", 0, 1),
 921 |                 list![
 922 |                     TermDesc::Neg(list![con("bar", 0, 1)]),
 923 |                     TermDesc::Neg(list![con("arg1", 0, 1)]),
 924 |                     TermDesc::Neg(list![con("arg2", 0, 1)]),
 925 |                 ]
 926 |             )]
 927 |         );
 928 |     }
 929 | 
 930 |     #[test]
 931 |     fn test_match_always_succeeds() {
 932 |         let (result, _) = compile(list![(nil(), rhs("true"))]);
 933 | 
 934 |         assert_eq!(result, success("true"));
 935 |     }
 936 | 
 937 |     #[test]
 938 |     fn test_match_always_fails() {
 939 |         let (result, _) = compile(List::new());
 940 | 
 941 |         assert_eq!(result, failure());
 942 |     }
 943 | 
 944 |     #[test]
 945 |     fn test_match_single_pattern() {
 946 |         let (result, _) =
 947 |             compile(list![(tt(), rhs("true")), (ff(), rhs("false")),]);
 948 | 
 949 |         assert_eq!(
 950 |             result,
 951 |             if_eq(obj(), tt_con(), success("true"), success("false"))
 952 |         );
 953 |     }
 954 | 
 955 |     #[test]
 956 |     fn test_match_var() {
 957 |         let (result, _) = compile(list![(var("a"), rhs("true"))]);
 958 | 
 959 |         assert_eq!(result, success("true"));
 960 |     }
 961 | 
 962 |     #[test]
 963 |     fn test_match_multiple_patterns() {
 964 |         let (result, diags) = compile(list![
 965 |             (tt(), rhs("true")),
 966 |             (ff(), rhs("false")),
 967 |             (tt(), rhs("redundant"))
 968 |         ]);
 969 | 
 970 |         // Redundant patterns are ignored on the decision tree. This is also how
 971 |         // you'd detect redundant patterns: you'd somehow mark every RHS when
 972 |         // you produce their Success nodes. Any RHS nodes that remain unmarked
 973 |         // are redundant.
 974 |         assert_eq!(
 975 |             result,
 976 |             if_eq(obj(), tt_con(), success("true"), success("false"))
 977 |         );
 978 | 
 979 |         assert!(diags.reachable.contains(&"true".to_string()));
 980 |         assert!(diags.reachable.contains(&"false".to_string()));
 981 |         assert!(!diags.reachable.contains(&"redundant".to_string()));
 982 |     }
 983 | 
 984 |     #[test]
 985 |     fn test_nonexhaustive_match() {
 986 |         let (result, diags) = compile(list![(tt(), rhs("true")),]);
 987 | 
 988 |         assert_eq!(result, if_eq(obj(), tt_con(), success("true"), failure()));
 989 |         assert_eq!(diags.messages, vec!["Missing pattern: _".to_string()]);
 990 |     }
 991 | 
 992 |     #[test]
 993 |     fn test_nonexhaustive_match_from_paper() {
 994 |         let green = Pattern::Cons(con("green", 0, 3), List::new());
 995 |         let (result, diags) = compile(list![
 996 |             (pair(tt(), green.clone()), rhs("111")),
 997 |             (pair(ff(), green.clone()), rhs("222")),
 998 |         ]);
 999 | 
1000 |         assert_eq!(
1001 |             result,
1002 |             if_eq(
1003 |                 sel(0, obj()),
1004 |                 tt_con(),
1005 |                 if_eq(
1006 |                     sel(1, obj()),
1007 |                     con("green", 0, 3),
1008 |                     success("111"),
1009 |                     failure()
1010 |                 ),
1011 |                 if_eq(
1012 |                     sel(1, obj()),
1013 |                     con("green", 0, 3),
1014 |                     success("222"),
1015 |                     failure()
1016 |                 )
1017 |             )
1018 |         );
1019 | 
1020 |         assert_eq!(
1021 |             diags.messages,
1022 |             vec![
1023 |                 "Missing pattern: pair(true, _)".to_string(),
1024 |                 "Missing pattern: pair(false, _)".to_string()
1025 |             ]
1026 |         );
1027 |     }
1028 | 
1029 |     #[test]
1030 |     fn test_nested_match() {
1031 |         let (result, _) = compile(list![
1032 |             (pair(tt(), tt()), rhs("foo")),
1033 |             (pair(tt(), ff()), rhs("bar")),
1034 |             (pair(ff(), ff()), rhs("baz")),
1035 |             (pair(ff(), tt()), rhs("quix")),
1036 |         ]);
1037 | 
1038 |         assert_eq!(
1039 |             result,
1040 |             if_eq(
1041 |                 sel(0, obj()),
1042 |                 tt_con(),
1043 |                 if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
1044 |                 if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
1045 |             )
1046 |         );
1047 |     }
1048 | 
1049 |     #[test]
1050 |     fn test_match_with_switchify() {
1051 |         let a = con("a", 0, 4);
1052 |         let b = con("b", 0, 4);
1053 |         let c = con("c", 0, 4);
1054 |         let d = con("d", 0, 4);
1055 |         let a_pat = Pattern::Cons(a.clone(), List::new());
1056 |         let b_pat = Pattern::Cons(b.clone(), List::new());
1057 |         let c_pat = Pattern::Cons(c.clone(), List::new());
1058 |         let d_pat = Pattern::Cons(d.clone(), List::new());
1059 |         let (result, _) = compile(list![
1060 |             ((a_pat, rhs("a"))),
1061 |             ((b_pat, rhs("b"))),
1062 |             ((c_pat, rhs("c"))),
1063 |             ((d_pat, rhs("d")))
1064 |         ]);
1065 | 
1066 |         assert_eq!(
1067 |             switchify(result),
1068 |             switch(
1069 |                 obj(),
1070 |                 list![(a, success("a")), (b, success("b")), (c, success("c"))],
1071 |                 success("d")
1072 |             )
1073 |         );
1074 |     }
1075 | 
1076 |     #[test]
1077 |     fn test_nested_match_without_switch() {
1078 |         let (result, _) = compile(list![
1079 |             (pair(tt(), tt()), rhs("foo")),
1080 |             (pair(tt(), ff()), rhs("bar")),
1081 |             (pair(ff(), ff()), rhs("baz")),
1082 |             (pair(ff(), tt()), rhs("quix")),
1083 |         ]);
1084 | 
1085 |         // This doesn't produce a switch, as the nested patterns don't test the
1086 |         // same value.
1087 |         assert_eq!(
1088 |             switchify(result),
1089 |             if_eq(
1090 |                 sel(0, obj()),
1091 |                 tt_con(),
1092 |                 if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
1093 |                 if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
1094 |             )
1095 |         );
1096 |     }
1097 | 
1098 |     #[test]
1099 |     fn test_match_with_args() {
1100 |         let some = con("some", 3, 2);
1101 |         let (result, _) = compile(list![
1102 |             (Pattern::Cons(some.clone(), list![tt(), tt(), ff()]), rhs("foo")),
1103 |             (var("x"), rhs("bar"))
1104 |         ]);
1105 | 
1106 |         assert_eq!(
1107 |             result,
1108 |             if_eq(
1109 |                 obj(),
1110 |                 some,
1111 |                 if_eq(
1112 |                     sel(0, obj()),
1113 |                     tt_con(),
1114 |                     if_eq(
1115 |                         sel(1, obj()),
1116 |                         tt_con(),
1117 |                         if_eq(
1118 |                             sel(2, obj()),
1119 |                             ff_con(),
1120 |                             success("foo"),
1121 |                             success("bar")
1122 |                         ),
1123 |                         success("bar")
1124 |                     ),
1125 |                     success("bar")
1126 |                 ),
1127 |                 success("bar")
1128 |             )
1129 |         );
1130 |     }
1131 | 
1132 |     #[test]
1133 |     fn test_match_nonexhaustive_with_args() {
1134 |         let some = con("some", 3, 2);
1135 |         let (result, diags) = compile(list![(
1136 |             Pattern::Cons(some.clone(), list![tt(), ff(), ff()]),
1137 |             rhs("foo")
1138 |         ),]);
1139 | 
1140 |         assert_eq!(
1141 |             result,
1142 |             if_eq(
1143 |                 obj(),
1144 |                 some,
1145 |                 if_eq(
1146 |                     sel(0, obj()),
1147 |                     tt_con(),
1148 |                     if_eq(
1149 |                         sel(1, obj()),
1150 |                         ff_con(),
1151 |                         if_eq(
1152 |                             sel(2, obj()),
1153 |                             ff_con(),
1154 |                             success("foo"),
1155 |                             failure()
1156 |                         ),
1157 |                         failure()
1158 |                     ),
1159 |                     failure()
1160 |                 ),
1161 |                 failure()
1162 |             )
1163 |         );
1164 | 
1165 |         assert_eq!(
1166 |             diags.messages,
1167 |             vec![
1168 |                 "Missing pattern: some(true, false, _)".to_string(),
1169 |                 "Missing pattern: some(true, _, _)".to_string(),
1170 |                 "Missing pattern: some(_, _, _)".to_string(),
1171 |                 "Missing pattern: _".to_string(),
1172 |             ]
1173 |         );
1174 |     }
1175 | }
1176 | 


--------------------------------------------------------------------------------