├── .github
└── workflows
│ └── test.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── jacobs2021
├── Cargo.toml
├── README.md
└── src
│ └── lib.rs
├── rustfmt.toml
└── sestoft1996
├── Cargo.lock
├── Cargo.toml
├── README.md
└── src
├── idiomatic.rs
├── lib.rs
└── raw.rs
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | ---
2 | name: Test
3 | on: push
4 |
5 | env:
6 | CARGO_HOME: ${{ github.workspace }}/.cargo
7 |
8 | jobs:
9 | test:
10 | runs-on: ubuntu-latest
11 | container:
12 | image: 'rust:alpine'
13 | steps:
14 | - name: Checkout repository
15 | uses: actions/checkout@v3
16 |
17 | - name: Set up cache
18 | uses: actions/cache@v3
19 | with:
20 | path: |
21 | .cargo
22 | target
23 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
24 |
25 | - name: Run tests
26 | run: cargo test
27 |
28 | rustfmt:
29 | runs-on: ubuntu-latest
30 | container:
31 | image: 'rust:alpine'
32 | steps:
33 | - name: Checkout repository
34 | uses: actions/checkout@v3
35 |
36 | - name: Set up cache
37 | uses: actions/cache@v3
38 | with:
39 | path: |
40 | .cargo
41 | target
42 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
43 |
44 | - name: Set up rustfmt
45 | run: rustup component add rustfmt
46 |
47 | - name: Check formatting
48 | run: 'cargo fmt --all --check'
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 |
--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "bstr"
7 | version = "0.2.17"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
10 | dependencies = [
11 | "lazy_static",
12 | "memchr",
13 | "regex-automata",
14 | ]
15 |
16 | [[package]]
17 | name = "console"
18 | version = "0.15.0"
19 | source = "registry+https://github.com/rust-lang/crates.io-index"
20 | checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
21 | dependencies = [
22 | "encode_unicode",
23 | "libc",
24 | "once_cell",
25 | "terminal_size",
26 | "winapi",
27 | ]
28 |
29 | [[package]]
30 | name = "encode_unicode"
31 | version = "0.3.6"
32 | source = "registry+https://github.com/rust-lang/crates.io-index"
33 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
34 |
35 | [[package]]
36 | name = "jacobs2021"
37 | version = "1.0.0"
38 | dependencies = [
39 | "similar-asserts",
40 | ]
41 |
42 | [[package]]
43 | name = "lazy_static"
44 | version = "1.4.0"
45 | source = "registry+https://github.com/rust-lang/crates.io-index"
46 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
47 |
48 | [[package]]
49 | name = "libc"
50 | version = "0.2.126"
51 | source = "registry+https://github.com/rust-lang/crates.io-index"
52 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
53 |
54 | [[package]]
55 | name = "memchr"
56 | version = "2.5.0"
57 | source = "registry+https://github.com/rust-lang/crates.io-index"
58 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
59 |
60 | [[package]]
61 | name = "once_cell"
62 | version = "1.12.0"
63 | source = "registry+https://github.com/rust-lang/crates.io-index"
64 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
65 |
66 | [[package]]
67 | name = "regex-automata"
68 | version = "0.1.10"
69 | source = "registry+https://github.com/rust-lang/crates.io-index"
70 | checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
71 |
72 | [[package]]
73 | name = "sestoft1996"
74 | version = "1.0.0"
75 |
76 | [[package]]
77 | name = "similar"
78 | version = "2.1.0"
79 | source = "registry+https://github.com/rust-lang/crates.io-index"
80 | checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
81 | dependencies = [
82 | "bstr",
83 | "unicode-segmentation",
84 | ]
85 |
86 | [[package]]
87 | name = "similar-asserts"
88 | version = "1.2.0"
89 | source = "registry+https://github.com/rust-lang/crates.io-index"
90 | checksum = "64c9f531a2375031d51c23c415ca12d0f0271b976211e2f727b7a0eac06a099d"
91 | dependencies = [
92 | "console",
93 | "similar",
94 | ]
95 |
96 | [[package]]
97 | name = "terminal_size"
98 | version = "0.1.17"
99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
101 | dependencies = [
102 | "libc",
103 | "winapi",
104 | ]
105 |
106 | [[package]]
107 | name = "unicode-segmentation"
108 | version = "1.9.0"
109 | source = "registry+https://github.com/rust-lang/crates.io-index"
110 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
111 |
112 | [[package]]
113 | name = "winapi"
114 | version = "0.3.9"
115 | source = "registry+https://github.com/rust-lang/crates.io-index"
116 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
117 | dependencies = [
118 | "winapi-i686-pc-windows-gnu",
119 | "winapi-x86_64-pc-windows-gnu",
120 | ]
121 |
122 | [[package]]
123 | name = "winapi-i686-pc-windows-gnu"
124 | version = "0.4.0"
125 | source = "registry+https://github.com/rust-lang/crates.io-index"
126 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
127 |
128 | [[package]]
129 | name = "winapi-x86_64-pc-windows-gnu"
130 | version = "0.4.0"
131 | source = "registry+https://github.com/rust-lang/crates.io-index"
132 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
133 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = ["sestoft1996", "jacobs2021"]
3 | resolver = "2"
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pattern matching in Rust
2 |
3 | This repository contains a collection of pattern matching algorithms implemented
4 | in Rust. The goal of these implementations it to (hopefully) make it easier to
5 | understand them, as papers related to pattern matching (and papers in general)
6 | can be difficult to read.
7 |
8 | ## Background
9 |
10 | I ended up implementing these algorithms while investigating potential pattern
11 | matching/exhaustiveness checking algorithms for [Inko](https://inko-lang.org/).
12 | While there are plenty of papers on the subject, few of them include reference
13 | code, and almost all of them are really dense and difficult to read. I hope the
14 | code published in this repository is of use to those wishing to implement
15 | pattern matching/exhaustiveness.
16 |
17 | ## Algorithms
18 |
19 | | Name | Paper | Directory
20 | |:----------------------------------------------|:-----------------------------|:-----------
21 | | ML pattern compilation and partial evaluation | [PDF](https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.1363) | [sestoft1996](./sestoft1996/)
22 | | How to compile pattern matching | [PDF](https://julesjacobs.com/notes/patternmatching/patternmatching.pdf) | [jacobs2021](./jacobs2021/)
23 |
24 | Other papers I've come across (but don't necessarily want to implement):
25 |
26 | - [A generic algorithm for checking exhaustivity of pattern
27 | matching](https://dl.acm.org/doi/10.1145/2998392.2998401).
28 | - The Scala implementation [is found in this PR](https://github.com/lampepfl/dotty/pull/1364) (the `Space.scala` file).
29 | - Swift also uses this algorithm [here](https://github.com/apple/swift/blob/3c0b1ab03f189e044303436b8aa6a27c2f93707d/lib/Sema/TypeCheckSwitchStmt.cpp)
30 | - Some Reddit comments about the algorithm are [found here](https://www.reddit.com/r/ProgrammingLanguages/comments/cioxwn/a_generic_algorithm_for_checking_exhaustivity_of/)
31 | - [Compiling pattern matching to good decision
32 | trees](https://www.cs.tufts.edu/comp/150FP/archive/luc-maranget/jun08.pdf).
33 | This is about just compiling pattern matching into a decision tree, not about
34 | exhaustiveness checking. If you don't know how to read the computer science
35 | hieroglyphs (like me), this paper is basically impossible to understand.
36 | - See also https://alan-j-hu.github.io/writing/pattern-matching.html and
37 | https://contificate.github.io/compiling-pattern-matching/
38 | - There's a [Rust implementation](https://github.com/SomewhatML/match-compile)
39 | of this algorithm, though it doesn't perform exhaustiveness checking.
40 | - [Warnings for pattern
41 | matching](http://pauillac.inria.fr/~maranget/papers/warn/warn.pdf). This is
42 | just about producing warnings/errors for e.g. non-exhaustive patterns.
43 | Similarly painful to understand as the previous paper (i.e. I gave up).
44 | - [The Implementation of Functional Programming
45 | Languages](https://www.microsoft.com/en-us/research/publication/the-implementation-of-functional-programming-languages/).
46 | This book has a chapter on pattern matching, but I gave up on it.
47 |
48 | ## Requirements
49 |
50 | A recent-ish (as of 2022) Rust version that supports the 2021 edition (though I
51 | think the 2018 edition should also work).
52 |
53 | ## Usage
54 |
55 | Each algorithm is implemented as a library, and come with a set of unit tests
56 | that you can run using `cargo test`.
57 |
58 | ## Licence
59 |
60 | The code in this repository is licensed under the
61 | [Unlicense](https://unlicense.org/). A copy of this license can be found in the
62 | file "LICENSE".
63 |
--------------------------------------------------------------------------------
/jacobs2021/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "jacobs2021"
3 | version = "1.0.0"
4 | authors = ["Yorick Peterse "]
5 | edition = "2021"
6 |
7 | [lib]
8 | doctest = false
9 |
10 | [dev-dependencies]
11 | similar-asserts = "^1.1"
12 |
--------------------------------------------------------------------------------
/jacobs2021/README.md:
--------------------------------------------------------------------------------
1 | # How to compile pattern matching
2 |
3 | This directory contains an implementation of the algorithm discussed in the
4 | article [How to compile pattern
5 | matching](https://julesjacobs.com/notes/patternmatching/patternmatching.pdf) by
6 | Jules Jacobs. The algorithm in question took me a while to understand, and I'm
7 | grateful for all the help provided by Jules via Email. Thanks!
8 |
9 | Now on to the algorithm. In hindsight it ended up not being as difficult as I
10 | initially thought, rather the way it was explained was a bit hard to understand.
11 | The algorithm works as follows:
12 |
13 | First, we treat a match expression as if it were a table (in the database
14 | sense), consisting of rows and columns. The rows are the match cases (sometimes
15 | called "match arms"), and the columns the patterns to test. Consider this match
16 | expression (I'm using Rust syntax here):
17 |
18 | ```rust
19 | match some_number {
20 | 10 => foo,
21 | 20 => bar,
22 | 30 => baz
23 | }
24 | ```
25 |
26 | Here `10 -> foo`, `20 -> bar` and `30 -> baz` are the rows, and `10`, `20` and
27 | `30` are the columns for each row. User provided match expressions only support
28 | single columns (OR patterns are just turned into separate rows), but internally
29 | the compiler supports multiple columns.
30 |
31 | Internally our match expression is represented not as a list of rows and columns
32 | implicitly testing against an outer variable (`some_number` in the above case),
33 | instead each column explicitly specifies what it tests against. This means the
34 | above match expression is internally represented as follows:
35 |
36 | ```rust
37 | match {
38 | some_number is 10 => foo,
39 | some_number is 20 => bar,
40 | some_number is 30 => baz
41 | }
42 | ```
43 |
44 | Here I used the made-up syntax `x is y` to indicate the column tests against the
45 | variable `some_number`, and the pattern tested is e.g. `10`.
46 |
47 | Next, we need to get rid of variable patterns. This is done by pushing them into
48 | the right-hand side (= the code to run upon a match) of each case. This means we
49 | transform this expression:
50 |
51 |
52 | ```rust
53 | match {
54 | some_number is 10 => foo,
55 | some_number is num => bar
56 | }
57 | ```
58 |
59 | Into this:
60 |
61 | ```rust
62 | match {
63 | some_number is 10 => foo,
64 | // I'm using "∅" here to signal a row without any columns.
65 | ∅ => {
66 | let num = some_number;
67 | bar
68 | }
69 | }
70 | ```
71 |
72 | The article explains this makes things easier, though it doesn't really say
73 | clearly why. The reason for this is as follows:
74 |
75 | 1. It reduces the amount of duplication in the resulting decision tree, as we
76 | don't need to branch for variable and wildcard patterns.
77 | 1. It means variable patterns don't influence branching decisions discussed
78 | below.
79 | 1. When we branch on columns (again, discussed below), we can just forget about
80 | variable patterns.
81 |
82 | Essentially it takes the following steps:
83 |
84 | 1. Each right-hand side can store zero or more variables to define _before_
85 | running the code.
86 | 1. Iterate over the columns in a row.
87 | 1. If the column is a variable pattern, copy/move the variable into the
88 | right-hand side's variable list.
89 | 1. Return a new row that only includes non-variable columns.
90 |
91 | The implementation handles this in the method `move_variable_patterns`.
92 |
93 | Now we need to decide what column to branch on. In practise it probably won't
94 | matter much which strategy is used, so the algorithm takes a simple approach: it
95 | takes the columns of the first row, and for every column counts how many times
96 | the variable tested against is tested against across all columns in all rows. It
97 | then returns the column of which the variable is tested against the most. The
98 | implementation of this is in method `branch_variable`
99 |
100 | Now that we know what variable/column to branch on, we can generate the
101 | necessary branches and sub trees. The article only covers simple constructor
102 | patterns, but my implementation also handles integer literals, booleans, and
103 | more. The exact approach differs a bit and I recommend studying the Rust code to
104 | get a better understanding, but it roughly works as follows:
105 |
106 | 1. Create an array containing triples in the form
107 | `(constructor, arguments, rows)`. In this triple `constructor` is the
108 | constructor we're testing against, `arguments` is a list of variables exposed
109 | to the sub tree, and `rows` is the list of rows to compile for this test.
110 | The `arguments` array is filled with one variable for every argument.
111 | 1. Iterate over all the current rows.
112 | 1. Obtain the column index of the branching variable.
113 | 1. If we found an index (remember that a now doesn't have to contain any columns
114 | testing the branching variable), use it to remove the column from the row.
115 | 1. Determine the index of the constructor in the array created in step 1. For
116 | ADTs you'd use the tag values, for booleans you could use 0 and 1 for false
117 | and true respectively, etc.
118 | 1. Zip the pattern arguments (also patterns) with the values in the `arguments`
119 | array from the triple for this constructor, and create a new column for every
120 | resulting pair.
121 | 1. Create a new row containing the old columns (minus the one we removed
122 | earlier), the new columns (created in the previous step), and the body of the
123 | row. Push this row into the `rows` array for our constructor.
124 | 1. If in step 3 we didn't find an index, copy the row into the `rows` array for
125 | every triple in the array created in step 1.
126 | 1. Finally, for every triple created in step 1 (and populated in later steps),
127 | create a Switch node for our decision tree. The constructor and arguments are
128 | stored in this Switch node, and the rows are compiled into a sub tree.
129 |
130 | This is a lot to take in, so I recommend taking a look at the following methods:
131 |
132 | - `compile_rows`
133 | - `compile_constructor_cases`
134 |
135 | The output of all this is a decision tree, with three possible nodes: Success,
136 | Failure, and Switch (see the `Decision` type). A "Failure" node indicates a
137 | pattern that didn't match, and is used to check for exhaustiveness. In my
138 | implementation I opted to check for exhaustiveness separately, as this saves us
139 | from having to manage some extra data structures until we actually need them.
140 | The implementation works as follows:
141 |
142 | When we produce a "Failure" node, a "missing" flag is set to `true`. After
143 | compiling our decision tree, we check this flag. If set to `true`, the method
144 | `Match::missing_patterns` is used to produce a list of patterns to add to make
145 | the match exhaustive.
146 |
147 | The implementation of this method is a bit messy in my opinion, but it's the
148 | best I could come up with at this time. The implementation essentially maintains
149 | a stack of "terms" (I couldn't come up with a better name), each describing a
150 | test and its arguments in the tree. These terms also store the variables tested
151 | against, which combined with the names is used to (recursively) reconstruct a
152 | pattern name.
153 |
154 | Checking for redundant patterns is easy: when reaching a "Success" node you'd
155 | somehow mark the right-hand side as processed. In my case I just store an
156 | integer value in an array. At the end you check for any right-hand sides that
157 | aren't marked, or in my case you check if any of their values are not in the
158 | array.
159 |
160 | This about sums up how the algorithm works. Don't worry if the above wall of
161 | text hurts your head, it took me about two weeks to understand it. My advice is
162 | to read the article from Jules, then read this README, then take a look at the
163 | code and corresponding tests.
164 |
165 | ## OR patterns
166 |
167 | OR patterns are not covered in the article. To support these patterns we have to
168 | take rows containing OR patterns in any columns, then expand those OR patterns
169 | into separate rows. The code here handles this in the `expand_or_patterns()`
170 | function. This function is called _before_ pushing variable/wildcard patterns
171 | out of the rows, ensuring that OR patterns containing these patterns work as
172 | expected.
173 |
174 | **NOTE:** a previous implementation used a `flatten_or` method called, with a
175 | different implementation. This implementation proved incorrect as it failed to
176 | handle bindings in OR patterns (e.g. `10 or number`).
177 |
178 | ## Range patterns
179 |
180 | Range patterns are handled using a `Range` constructor
181 | (`Constructor::Range(start, stop)`), produced when matching against integer
182 | types only (meaning we only support integer ranges). Just like regular integers
183 | we assume ranges are of infinite length, so a variable pattern is needed to make
184 | the match exhaustive.
185 |
186 | ## Guards
187 |
188 | Guards are supported as follows: each `Row` has a guard field, storing a
189 | `Option`, where the `usize` is just a dummy value for the guard; normally
190 | this would be (for example) an AST node to evaluate/lower. When we are about to
191 | produce a Success node for a row, we check if it defines a guard. If so, all
192 | remaining rows are compiled into the guard's fallback tree.
193 |
--------------------------------------------------------------------------------
/jacobs2021/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![allow(clippy::new_without_default)]
2 |
3 | //! An implementation of the algorithm described at
4 | //! https://julesjacobs.com/notes/patternmatching/patternmatching.pdf.
5 | use std::collections::{HashMap, HashSet};
6 |
7 | /// The body of code to evaluate in case of a match.
8 | #[derive(Clone, Eq, PartialEq, Debug)]
9 | pub struct Body {
10 | /// Any variables to bind before running the code.
11 | ///
12 | /// The tuples are in the form `(name, source)` (i.e `bla = source`).
13 | bindings: Vec<(String, Variable)>,
14 |
15 | /// The "code" to run in case of a match.
16 | ///
17 | /// We just use an integer for the sake of simplicity, but normally this
18 | /// would be an AST node, or perhaps an index to an array of AST nodes.
19 | value: usize,
20 | }
21 |
22 | /// A type constructor.
23 | #[derive(Debug, Clone, Eq, PartialEq)]
24 | pub enum Constructor {
25 | True,
26 | False,
27 | Int(i64),
28 | Pair(TypeId, TypeId),
29 | Variant(TypeId, usize),
30 | Range(i64, i64),
31 | }
32 |
33 | impl Constructor {
34 | /// Returns the index of this constructor relative to its type.
35 | fn index(&self) -> usize {
36 | match self {
37 | Constructor::False
38 | | Constructor::Int(_)
39 | | Constructor::Pair(_, _)
40 | | Constructor::Range(_, _) => 0,
41 | Constructor::True => 1,
42 | Constructor::Variant(_, index) => *index,
43 | }
44 | }
45 | }
46 |
47 | /// Expands rows containing OR patterns into individual rows, such that each
48 | /// branch in the OR produces its own row.
49 | ///
50 | /// For each column that tests against an OR pattern, each sub pattern is
51 | /// translated into a new row. This work repeats itself until no more OR
52 | /// patterns remain in the rows.
53 | ///
54 | /// The implementation here is probably not as fast as it can be. Instead, it's
55 | /// optimized for ease of maintenance and readability.
56 | fn expand_or_patterns(rows: &mut Vec) {
57 | // If none of the rows contain any OR patterns, we can avoid the below work
58 | // loop, saving some allocations and time.
59 | if !rows
60 | .iter()
61 | .any(|r| r.columns.iter().any(|c| matches!(c.pattern, Pattern::Or(_))))
62 | {
63 | return;
64 | }
65 |
66 | // The implementation uses two Vecs: the original one, and a temporary one
67 | // we push newly created rows into. After processing all rows we swap the
68 | // two, repeating this process until we no longer find any OR patterns.
69 | let mut new_rows = Vec::with_capacity(rows.len());
70 | let mut found = true;
71 |
72 | while found {
73 | found = false;
74 |
75 | for row in rows.drain(0..) {
76 | // Find the first column containing an OR pattern. We process this
77 | // one column at a time, as that's (much) easier to implement
78 | // compared to handling all columns at once (as multiple columns may
79 | // contain OR patterns).
80 | let res = row.columns.iter().enumerate().find_map(|(idx, col)| {
81 | if let Pattern::Or(pats) = &col.pattern {
82 | Some((idx, col.variable, pats))
83 | } else {
84 | None
85 | }
86 | });
87 |
88 | if let Some((idx, var, pats)) = res {
89 | found = true;
90 |
91 | // This creates a new row for each branch in the OR pattern.
92 | // Other columns are left as-is. If such columns contain OR
93 | // patterns themselves, we'll expand them in a future iteration
94 | // of the surrounding `while` loop.
95 | for pat in pats {
96 | let mut new_row = row.clone();
97 |
98 | new_row.columns[idx] = Column::new(var, pat.clone());
99 | new_rows.push(new_row);
100 | }
101 | } else {
102 | new_rows.push(row);
103 | }
104 | }
105 |
106 | std::mem::swap(rows, &mut new_rows);
107 | }
108 | }
109 |
110 | /// A user defined pattern such as `Some((x, 10))`.
111 | #[derive(Clone, Eq, PartialEq, Debug)]
112 | pub enum Pattern {
113 | /// A pattern such as `Some(42)`.
114 | Constructor(Constructor, Vec),
115 | Int(i64),
116 | Binding(String),
117 | Or(Vec),
118 | Range(i64, i64),
119 | }
120 |
121 | /// A representation of a type.
122 | ///
123 | /// In a real compiler this would probably be a more complicated structure, but
124 | /// for the sake of simplicity we limit ourselves to a few basic types.
125 | #[derive(Clone)]
126 | pub enum Type {
127 | Int,
128 | Boolean,
129 | Pair(TypeId, TypeId),
130 | Enum(Vec<(String, Vec)>),
131 | }
132 |
133 | /// A unique ID to a type.
134 | ///
135 | /// In a real compiler this may just be a regular pointer, or an ID value like
136 | /// this.
137 | #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)]
138 | pub struct TypeId(usize);
139 |
140 | /// A variable used in a match expression.
141 | ///
142 | /// In a real compiler these would probably be registers or some other kind of
143 | /// variable/temporary generated by your compiler.
144 | #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)]
145 | pub struct Variable {
146 | id: usize,
147 | type_id: TypeId,
148 | }
149 |
150 | /// A single case (or row) in a match expression/table.
151 | #[derive(Clone, Eq, PartialEq, Debug)]
152 | pub struct Row {
153 | columns: Vec,
154 | guard: Option,
155 | body: Body,
156 | }
157 |
158 | impl Row {
159 | fn new(columns: Vec, guard: Option, body: Body) -> Self {
160 | Self { columns, guard, body }
161 | }
162 |
163 | fn remove_column(&mut self, variable: &Variable) -> Option {
164 | self.columns
165 | .iter()
166 | .position(|c| &c.variable == variable)
167 | .map(|idx| self.columns.remove(idx))
168 | }
169 | }
170 |
171 | /// A column in a pattern matching table.
172 | ///
173 | /// A column contains a single variable to test, and a pattern to test against
174 | /// that variable. A row may contain multiple columns, though this wouldn't be
175 | /// exposed to the source language (= it's an implementation detail).
176 | #[derive(Clone, Eq, PartialEq, Debug)]
177 | pub struct Column {
178 | variable: Variable,
179 | pattern: Pattern,
180 | }
181 |
182 | impl Column {
183 | fn new(variable: Variable, pattern: Pattern) -> Self {
184 | Self { variable, pattern }
185 | }
186 | }
187 |
188 | /// A case in a decision tree to test against a variable.
189 | #[derive(Eq, PartialEq, Debug)]
190 | pub struct Case {
191 | /// The constructor to test against an input variable.
192 | constructor: Constructor,
193 |
194 | /// Variables to introduce to the body of this case.
195 | ///
196 | /// At runtime these would be populated with the values a pattern is matched
197 | /// against. For example, this pattern:
198 | ///
199 | /// case (10, 20, foo) -> ...
200 | ///
201 | /// Would result in three arguments, assigned the values `10`, `20` and
202 | /// `foo`.
203 | ///
204 | /// In a real compiler you'd assign these variables in your IR first, then
205 | /// generate the code for the sub tree.
206 | arguments: Vec,
207 |
208 | /// The sub tree of this case.
209 | body: Decision,
210 | }
211 |
212 | impl Case {
213 | fn new(
214 | constructor: Constructor,
215 | arguments: Vec,
216 | body: Decision,
217 | ) -> Self {
218 | Self { constructor, arguments, body }
219 | }
220 | }
221 |
222 | /// A decision tree compiled from a list of match cases.
223 | #[derive(Eq, PartialEq, Debug)]
224 | pub enum Decision {
225 | /// A pattern is matched and the right-hand value is to be returned.
226 | Success(Body),
227 |
228 | /// A pattern is missing.
229 | Failure,
230 |
231 | /// Checks if a guard evaluates to true, running the body if it does.
232 | ///
233 | /// The arguments are as follows:
234 | ///
235 | /// 1. The "condition" to evaluate. We just use a dummy value, but in a real
236 | /// compiler this would likely be an AST node of sorts.
237 | /// 2. The body to evaluate if the guard matches.
238 | /// 3. The sub tree to evaluate when the guard fails.
239 | Guard(usize, Body, Box),
240 |
241 | /// Checks if a value is any of the given patterns.
242 | ///
243 | /// The values are as follows:
244 | ///
245 | /// 1. The variable to test.
246 | /// 2. The cases to test against this variable.
247 | /// 3. A fallback decision to take, in case none of the cases matched.
248 | Switch(Variable, Vec, Option>),
249 | }
250 |
251 | /// A type for storing diagnostics produced by the decision tree compiler.
252 | pub struct Diagnostics {
253 | /// A flag indicating the match is missing one or more pattern.
254 | missing: bool,
255 |
256 | /// The right-hand sides that are reachable.
257 | ///
258 | /// If a right-hand side isn't in this list it means its pattern is
259 | /// redundant.
260 | reachable: Vec,
261 | }
262 |
263 | /// The result of compiling a pattern match expression.
264 | pub struct Match {
265 | pub types: Vec,
266 | pub tree: Decision,
267 | pub diagnostics: Diagnostics,
268 | }
269 |
270 | /// Information about a single constructor/value (aka term) being tested, used
271 | /// to build a list of names of missing patterns.
272 | #[derive(Debug)]
273 | struct Term {
274 | variable: Variable,
275 | name: String,
276 | arguments: Vec,
277 | }
278 |
279 | impl Term {
280 | fn new(variable: Variable, name: String, arguments: Vec) -> Self {
281 | Self { variable, name, arguments }
282 | }
283 |
284 | fn pattern_name(
285 | &self,
286 | terms: &[Term],
287 | mapping: &HashMap<&Variable, usize>,
288 | ) -> String {
289 | if self.arguments.is_empty() {
290 | self.name.to_string()
291 | } else {
292 | let args = self
293 | .arguments
294 | .iter()
295 | .map(|arg| {
296 | mapping
297 | .get(arg)
298 | .map(|&idx| terms[idx].pattern_name(terms, mapping))
299 | .unwrap_or_else(|| "_".to_string())
300 | })
301 | .collect::>()
302 | .join(", ");
303 |
304 | format!("{}({})", self.name, args)
305 | }
306 | }
307 | }
308 |
309 | impl Match {
310 | /// Returns a list of patterns not covered by the match expression.
311 | pub fn missing_patterns(&self) -> Vec {
312 | let mut names = HashSet::new();
313 | let mut steps = Vec::new();
314 |
315 | self.add_missing_patterns(&self.tree, &mut steps, &mut names);
316 |
317 | let mut missing: Vec = names.into_iter().collect();
318 |
319 | // Sorting isn't necessary, but it makes it a bit easier to write tests.
320 | missing.sort();
321 | missing
322 | }
323 |
324 | fn add_missing_patterns(
325 | &self,
326 | node: &Decision,
327 | terms: &mut Vec,
328 | missing: &mut HashSet,
329 | ) {
330 | match node {
331 | Decision::Success(_) => {}
332 | Decision::Failure => {
333 | let mut mapping = HashMap::new();
334 |
335 | // At this point the terms stack looks something like this:
336 | // `[term, term + arguments, term, ...]`. To construct a pattern
337 | // name from this stack, we first map all variables to their
338 | // term indexes. This is needed because when a term defines
339 | // arguments, the terms for those arguments don't necessarily
340 | // appear in order in the term stack.
341 | //
342 | // This mapping is then used when (recursively) generating a
343 | // pattern name.
344 | //
345 | // This approach could probably be done more efficiently, so if
346 | // you're reading this and happen to know of a way, please
347 | // submit a merge request :)
348 | for (index, step) in terms.iter().enumerate() {
349 | mapping.insert(&step.variable, index);
350 | }
351 |
352 | let name = terms
353 | .first()
354 | .map(|term| term.pattern_name(terms, &mapping))
355 | .unwrap_or_else(|| "_".to_string());
356 |
357 | missing.insert(name);
358 | }
359 | Decision::Guard(_, _, fallback) => {
360 | self.add_missing_patterns(fallback, terms, missing);
361 | }
362 | Decision::Switch(var, cases, fallback) => {
363 | for case in cases {
364 | match &case.constructor {
365 | Constructor::True => {
366 | let name = "true".to_string();
367 |
368 | terms.push(Term::new(*var, name, Vec::new()));
369 | }
370 | Constructor::False => {
371 | let name = "false".to_string();
372 |
373 | terms.push(Term::new(*var, name, Vec::new()));
374 | }
375 | Constructor::Int(_) | Constructor::Range(_, _) => {
376 | let name = "_".to_string();
377 |
378 | terms.push(Term::new(*var, name, Vec::new()));
379 | }
380 | Constructor::Pair(_, _) => {
381 | let args = case.arguments.clone();
382 |
383 | terms.push(Term::new(*var, String::new(), args));
384 | }
385 | Constructor::Variant(typ, idx) => {
386 | let args = case.arguments.clone();
387 | let name = if let Type::Enum(variants) =
388 | &self.types[typ.0]
389 | {
390 | variants[*idx].0.clone()
391 | } else {
392 | unreachable!()
393 | };
394 |
395 | terms.push(Term::new(*var, name, args));
396 | }
397 | }
398 |
399 | self.add_missing_patterns(&case.body, terms, missing);
400 | terms.pop();
401 | }
402 |
403 | if let Some(node) = fallback {
404 | self.add_missing_patterns(node, terms, missing);
405 | }
406 | }
407 | }
408 | }
409 | }
410 |
411 | /// The `match` compiler itself (shocking, I know).
412 | pub struct Compiler {
413 | variable_id: usize,
414 | types: Vec,
415 | diagnostics: Diagnostics,
416 | }
417 |
418 | impl Compiler {
419 | pub fn new() -> Self {
420 | Self {
421 | variable_id: 0,
422 | types: Vec::new(),
423 | diagnostics: Diagnostics { missing: false, reachable: Vec::new() },
424 | }
425 | }
426 |
427 | pub fn compile(mut self, rows: Vec) -> Match {
428 | Match {
429 | tree: self.compile_rows(rows),
430 | diagnostics: self.diagnostics,
431 | types: self.types,
432 | }
433 | }
434 |
435 | fn compile_rows(&mut self, mut rows: Vec) -> Decision {
436 | if rows.is_empty() {
437 | self.diagnostics.missing = true;
438 |
439 | return Decision::Failure;
440 | }
441 |
442 | expand_or_patterns(&mut rows);
443 |
444 | for row in &mut rows {
445 | self.move_variable_patterns(row);
446 | }
447 |
448 | // There may be multiple rows, but if the first one has no patterns
449 | // those extra rows are redundant, as a row without columns/patterns
450 | // always matches.
451 | if rows.first().map_or(false, |c| c.columns.is_empty()) {
452 | let row = rows.remove(0);
453 |
454 | self.diagnostics.reachable.push(row.body.value);
455 |
456 | return if let Some(guard) = row.guard {
457 | Decision::Guard(
458 | guard,
459 | row.body,
460 | Box::new(self.compile_rows(rows)),
461 | )
462 | } else {
463 | Decision::Success(row.body)
464 | };
465 | }
466 |
467 | let branch_var = self.branch_variable(&rows);
468 |
469 | match self.variable_type(branch_var).clone() {
470 | Type::Int => {
471 | let (cases, fallback) =
472 | self.compile_int_cases(rows, branch_var);
473 |
474 | Decision::Switch(branch_var, cases, Some(fallback))
475 | }
476 | Type::Boolean => {
477 | let cases = vec![
478 | (Constructor::False, Vec::new(), Vec::new()),
479 | (Constructor::True, Vec::new(), Vec::new()),
480 | ];
481 |
482 | Decision::Switch(
483 | branch_var,
484 | self.compile_constructor_cases(rows, branch_var, cases),
485 | None,
486 | )
487 | }
488 | Type::Pair(typ1, typ2) => {
489 | let cases = vec![(
490 | Constructor::Pair(typ1, typ2),
491 | self.new_variables(&[typ1, typ2]),
492 | Vec::new(),
493 | )];
494 |
495 | Decision::Switch(
496 | branch_var,
497 | self.compile_constructor_cases(rows, branch_var, cases),
498 | None,
499 | )
500 | }
501 | Type::Enum(variants) => {
502 | let cases = variants
503 | .iter()
504 | .enumerate()
505 | .map(|(idx, (_, args))| {
506 | (
507 | Constructor::Variant(branch_var.type_id, idx),
508 | self.new_variables(args),
509 | Vec::new(),
510 | )
511 | })
512 | .collect();
513 |
514 | Decision::Switch(
515 | branch_var,
516 | self.compile_constructor_cases(rows, branch_var, cases),
517 | None,
518 | )
519 | }
520 | }
521 | }
522 |
523 | /// Compiles the cases and fallback cases for integer and range patterns.
524 | ///
525 | /// Integers have an infinite number of constructors, so we specialise the
526 | /// compilation of integer and range patterns.
527 | fn compile_int_cases(
528 | &mut self,
529 | rows: Vec,
530 | branch_var: Variable,
531 | ) -> (Vec, Box) {
532 | let mut raw_cases: Vec<(Constructor, Vec, Vec)> =
533 | Vec::new();
534 | let mut fallback_rows = Vec::new();
535 | let mut tested: HashMap<(i64, i64), usize> = HashMap::new();
536 |
537 | for mut row in rows {
538 | if let Some(col) = row.remove_column(&branch_var) {
539 | let (key, cons) = match col.pattern {
540 | Pattern::Int(val) => ((val, val), Constructor::Int(val)),
541 | Pattern::Range(start, stop) => {
542 | ((start, stop), Constructor::Range(start, stop))
543 | }
544 | _ => unreachable!(),
545 | };
546 |
547 | if let Some(index) = tested.get(&key) {
548 | raw_cases[*index].2.push(row);
549 | continue;
550 | }
551 |
552 | tested.insert(key, raw_cases.len());
553 |
554 | let mut rows = fallback_rows.clone();
555 |
556 | rows.push(row);
557 | raw_cases.push((cons, Vec::new(), rows));
558 | } else {
559 | for (_, _, rows) in &mut raw_cases {
560 | rows.push(row.clone());
561 | }
562 |
563 | fallback_rows.push(row);
564 | }
565 | }
566 |
567 | let cases = raw_cases
568 | .into_iter()
569 | .map(|(cons, vars, rows)| {
570 | Case::new(cons, vars, self.compile_rows(rows))
571 | })
572 | .collect();
573 |
574 | (cases, Box::new(self.compile_rows(fallback_rows)))
575 | }
576 |
577 | /// Compiles the cases and sub cases for the constructor located at the
578 | /// column of the branching variable.
579 | ///
580 | /// What exactly this method does may be a bit hard to understand from the
581 | /// code, as there's simply quite a bit going on. Roughly speaking, it does
582 | /// the following:
583 | ///
584 | /// 1. It takes the column we're branching on (based on the branching
585 | /// variable) and removes it from every row.
586 | /// 2. We add additional columns to this row, if the constructor takes any
587 | /// arguments (which we'll handle in a nested match).
588 | /// 3. We turn the resulting list of rows into a list of cases, then compile
589 | /// those into decision (sub) trees.
590 | ///
591 | /// If a row didn't include the branching variable, we simply copy that row
592 | /// into the list of rows for every constructor to test.
593 | ///
594 | /// For this to work, the `cases` variable must be prepared such that it has
595 | /// a triple for every constructor we need to handle. For an ADT with 10
596 | /// constructors, that means 10 triples. This is needed so this method can
597 | /// assign the correct sub matches to these constructors.
598 | ///
599 | /// Types with infinite constructors (e.g. int and string) are handled
600 | /// separately; they don't need most of this work anyway.
601 | fn compile_constructor_cases(
602 | &mut self,
603 | rows: Vec,
604 | branch_var: Variable,
605 | mut cases: Vec<(Constructor, Vec, Vec)>,
606 | ) -> Vec {
607 | for mut row in rows {
608 | if let Some(col) = row.remove_column(&branch_var) {
609 | if let Pattern::Constructor(cons, args) = col.pattern {
610 | let idx = cons.index();
611 | let mut cols = row.columns;
612 |
613 | for (var, pat) in cases[idx].1.iter().zip(args.into_iter())
614 | {
615 | cols.push(Column::new(*var, pat));
616 | }
617 |
618 | cases[idx].2.push(Row::new(cols, row.guard, row.body));
619 | }
620 | } else {
621 | for (_, _, rows) in &mut cases {
622 | rows.push(row.clone());
623 | }
624 | }
625 | }
626 |
627 | cases
628 | .into_iter()
629 | .map(|(cons, vars, rows)| {
630 | Case::new(cons, vars, self.compile_rows(rows))
631 | })
632 | .collect()
633 | }
634 |
635 | /// Moves variable-only patterns/tests into the right-hand side/body of a
636 | /// case.
637 | ///
638 | /// This turns cases like this:
639 | ///
640 | /// case foo -> print(foo)
641 | ///
642 | /// Into this:
643 | ///
644 | /// case -> {
645 | /// let foo = it
646 | /// print(foo)
647 | /// }
648 | ///
649 | /// Where `it` is a variable holding the value `case foo` is compared
650 | /// against, and the case/row has no patterns (i.e. always matches).
651 | fn move_variable_patterns(&self, row: &mut Row) {
652 | row.columns.retain(|col| {
653 | if let Pattern::Binding(bind) = &col.pattern {
654 | row.body.bindings.push((bind.clone(), col.variable));
655 | false
656 | } else {
657 | true
658 | }
659 | });
660 | }
661 |
662 | /// Given a row, returns the variable in that row that's referred to the
663 | /// most across all rows.
664 | fn branch_variable(&self, rows: &[Row]) -> Variable {
665 | let mut counts = HashMap::new();
666 |
667 | for row in rows {
668 | for col in &row.columns {
669 | *counts.entry(&col.variable).or_insert(0_usize) += 1
670 | }
671 | }
672 |
673 | rows[0]
674 | .columns
675 | .iter()
676 | .map(|col| col.variable)
677 | .max_by_key(|var| counts[var])
678 | .unwrap()
679 | }
680 |
681 | /// Returns a new variable to use in the decision tree.
682 | ///
683 | /// In a real compiler you'd have to ensure these variables don't conflict
684 | /// with other variables.
685 | fn new_variable(&mut self, type_id: TypeId) -> Variable {
686 | let var = Variable { id: self.variable_id, type_id };
687 |
688 | self.variable_id += 1;
689 | var
690 | }
691 |
692 | fn new_variables(&mut self, type_ids: &[TypeId]) -> Vec {
693 | type_ids.iter().map(|t| self.new_variable(*t)).collect()
694 | }
695 |
696 | /// Returns the type of a given variable.
697 | ///
698 | /// In a real compiler the implementation of this would likely be quite
699 | /// different, depending on how your type system is implemented.
700 | ///
701 | /// For the sake of simplicity, we just store types in a Vec and retrieve
702 | /// them here according to the variable's type ID.
703 | fn variable_type(&self, id: Variable) -> &Type {
704 | &self.types[id.type_id.0]
705 | }
706 | }
707 |
708 | #[cfg(test)]
709 | mod tests {
710 | use super::*;
711 | use similar_asserts::assert_eq;
712 |
713 | fn new_type(compiler: &mut Compiler, typ: Type) -> TypeId {
714 | let id = compiler.types.len();
715 |
716 | compiler.types.push(typ);
717 | TypeId(id)
718 | }
719 |
720 | fn tt() -> Pattern {
721 | Pattern::Constructor(Constructor::True, Vec::new())
722 | }
723 |
724 | fn ff() -> Pattern {
725 | Pattern::Constructor(Constructor::False, Vec::new())
726 | }
727 |
728 | fn bind(name: &str) -> Pattern {
729 | Pattern::Binding(name.to_string())
730 | }
731 |
732 | fn variant(typ: TypeId, index: usize, args: Vec) -> Pattern {
733 | Pattern::Constructor(Constructor::Variant(typ, index), args)
734 | }
735 |
736 | fn pair(
737 | typ1: TypeId,
738 | typ2: TypeId,
739 | pat1: Pattern,
740 | pat2: Pattern,
741 | ) -> Pattern {
742 | Pattern::Constructor(Constructor::Pair(typ1, typ2), vec![pat1, pat2])
743 | }
744 |
745 | fn int(val: i64) -> Pattern {
746 | Pattern::Int(val)
747 | }
748 |
749 | fn rhs(value: usize) -> Body {
750 | Body { bindings: Vec::new(), value }
751 | }
752 |
753 | fn var(id: usize, type_id: TypeId) -> Variable {
754 | Variable { id, type_id }
755 | }
756 |
757 | fn compile(
758 | compiler: Compiler,
759 | input: Variable,
760 | rules: Vec<(Pattern, Body)>,
761 | ) -> Match {
762 | let rows = rules
763 | .into_iter()
764 | .map(|(pat, body)| {
765 | Row::new(vec![Column::new(input, pat)], None, body)
766 | })
767 | .collect();
768 |
769 | compiler.compile(rows)
770 | }
771 |
772 | fn failure() -> Decision {
773 | Decision::Failure
774 | }
775 |
776 | fn success(value: usize) -> Decision {
777 | Decision::Success(Body { bindings: Vec::new(), value })
778 | }
779 |
780 | fn success_with_bindings(
781 | bindings: Vec<(&str, Variable)>,
782 | value: usize,
783 | ) -> Decision {
784 | Decision::Success(Body {
785 | bindings: bindings
786 | .into_iter()
787 | .map(|(n, v)| (n.to_string(), v))
788 | .collect(),
789 | value,
790 | })
791 | }
792 |
793 | #[test]
794 | fn test_move_variable_patterns() {
795 | let mut compiler = Compiler::new();
796 | let typ = new_type(&mut compiler, Type::Boolean);
797 | let var1 = compiler.new_variable(typ);
798 | let var2 = compiler.new_variable(typ);
799 | let cons = Constructor::True;
800 | let mut row = Row {
801 | columns: vec![
802 | Column::new(var2, bind("a")),
803 | Column::new(
804 | var1,
805 | Pattern::Constructor(cons.clone(), Vec::new()),
806 | ),
807 | ],
808 | guard: None,
809 | body: Body { bindings: Vec::new(), value: 42 },
810 | };
811 |
812 | compiler.move_variable_patterns(&mut row);
813 |
814 | assert_eq!(
815 | row,
816 | Row {
817 | columns: vec![Column::new(
818 | var1,
819 | Pattern::Constructor(cons, Vec::new())
820 | )],
821 | guard: None,
822 | body: Body {
823 | bindings: vec![("a".to_string(), var2)],
824 | value: 42
825 | }
826 | }
827 | );
828 | }
829 |
830 | #[test]
831 | fn test_move_variable_patterns_without_constructor_pattern() {
832 | let mut compiler = Compiler::new();
833 | let typ = new_type(&mut compiler, Type::Boolean);
834 | let var1 = compiler.new_variable(typ);
835 | let mut row = Row {
836 | columns: vec![Column::new(var1, bind("a"))],
837 | guard: None,
838 | body: Body { bindings: Vec::new(), value: 42 },
839 | };
840 |
841 | compiler.move_variable_patterns(&mut row);
842 |
843 | assert_eq!(
844 | row,
845 | Row {
846 | columns: Vec::new(),
847 | guard: None,
848 | body: Body {
849 | bindings: vec![("a".to_string(), var1)],
850 | value: 42
851 | }
852 | }
853 | );
854 | }
855 |
856 | #[test]
857 | fn test_branch_variable() {
858 | let mut compiler = Compiler::new();
859 | let typ = new_type(&mut compiler, Type::Boolean);
860 | let var1 = compiler.new_variable(typ);
861 | let var2 = compiler.new_variable(typ);
862 | let rows = vec![
863 | Row::new(
864 | vec![
865 | Column::new(var1, Pattern::Int(42)),
866 | Column::new(var2, Pattern::Int(50)),
867 | ],
868 | None,
869 | rhs(1),
870 | ),
871 | Row::new(vec![Column::new(var2, Pattern::Int(4))], None, rhs(2)),
872 | ];
873 |
874 | let branch = compiler.branch_variable(&rows);
875 |
876 | assert_eq!(branch, var2);
877 | }
878 |
879 | #[test]
880 | fn test_compile_simple_pattern() {
881 | let mut compiler = Compiler::new();
882 | let typ = new_type(&mut compiler, Type::Boolean);
883 | let input = compiler.new_variable(typ);
884 | let result =
885 | compile(compiler, input, vec![(tt(), rhs(1)), (ff(), rhs(2))]);
886 |
887 | assert_eq!(
888 | result.tree,
889 | Decision::Switch(
890 | input,
891 | vec![
892 | Case::new(Constructor::False, Vec::new(), success(2)),
893 | Case::new(Constructor::True, Vec::new(), success(1)),
894 | ],
895 | None
896 | )
897 | );
898 | }
899 |
900 | #[test]
901 | fn test_compile_nonexhaustive_pattern() {
902 | let mut compiler = Compiler::new();
903 | let typ = new_type(&mut compiler, Type::Boolean);
904 | let input = compiler.new_variable(typ);
905 | let result = compile(compiler, input, vec![(tt(), rhs(1))]);
906 |
907 | assert_eq!(
908 | result.tree,
909 | Decision::Switch(
910 | input,
911 | vec![
912 | Case::new(Constructor::False, Vec::new(), failure()),
913 | Case::new(Constructor::True, Vec::new(), success(1)),
914 | ],
915 | None
916 | )
917 | );
918 | assert!(result.diagnostics.missing);
919 | assert_eq!(result.missing_patterns(), vec!["false".to_string()]);
920 | }
921 |
922 | #[test]
923 | fn test_compile_redundant_pattern() {
924 | let mut compiler = Compiler::new();
925 | let typ = new_type(&mut compiler, Type::Boolean);
926 | let input = compiler.new_variable(typ);
927 | let result = compile(
928 | compiler,
929 | input,
930 | vec![(tt(), rhs(1)), (tt(), rhs(2)), (ff(), rhs(3))],
931 | );
932 |
933 | assert_eq!(
934 | result.tree,
935 | Decision::Switch(
936 | input,
937 | vec![
938 | Case::new(Constructor::False, Vec::new(), success(3)),
939 | Case::new(Constructor::True, Vec::new(), success(1)),
940 | ],
941 | None
942 | )
943 | );
944 | assert_eq!(result.diagnostics.reachable, vec![3, 1]);
945 | }
946 |
947 | #[test]
948 | fn test_compile_redundant_int() {
949 | let mut compiler = Compiler::new();
950 | let typ = new_type(&mut compiler, Type::Int);
951 | let input = compiler.new_variable(typ);
952 | let result = compile(
953 | compiler,
954 | input,
955 | vec![
956 | (int(1), rhs(1)),
957 | (int(1), rhs(2)),
958 | (int(2), rhs(3)),
959 | (bind("a"), rhs(4)),
960 | ],
961 | );
962 |
963 | assert_eq!(
964 | result.tree,
965 | Decision::Switch(
966 | input,
967 | vec![
968 | Case::new(Constructor::Int(1), Vec::new(), success(1)),
969 | Case::new(Constructor::Int(2), Vec::new(), success(3)),
970 | ],
971 | Some(Box::new(success_with_bindings(vec![("a", input)], 4)))
972 | )
973 | );
974 | assert_eq!(result.diagnostics.reachable, vec![1, 3, 4]);
975 | }
976 |
977 | #[test]
978 | fn test_compile_variable_pattern() {
979 | let mut compiler = Compiler::new();
980 | let typ = new_type(&mut compiler, Type::Boolean);
981 | let input = compiler.new_variable(typ);
982 | let result =
983 | compile(compiler, input, vec![(tt(), rhs(1)), (bind("a"), rhs(2))]);
984 |
985 | assert_eq!(
986 | result.tree,
987 | Decision::Switch(
988 | input,
989 | vec![
990 | Case::new(
991 | Constructor::False,
992 | Vec::new(),
993 | success_with_bindings(vec![("a", input)], 2)
994 | ),
995 | Case::new(Constructor::True, Vec::new(), success(1)),
996 | ],
997 | None
998 | )
999 | );
1000 | }
1001 |
1002 | #[test]
1003 | fn test_compile_nonexhaustive_int_pattern() {
1004 | let mut compiler = Compiler::new();
1005 | let int_type = new_type(&mut compiler, Type::Int);
1006 | let input = compiler.new_variable(int_type);
1007 | let result =
1008 | compile(compiler, input, vec![(int(4), rhs(1)), (int(5), rhs(2))]);
1009 |
1010 | assert_eq!(
1011 | result.tree,
1012 | Decision::Switch(
1013 | input,
1014 | vec![
1015 | Case::new(Constructor::Int(4), Vec::new(), success(1)),
1016 | Case::new(Constructor::Int(5), Vec::new(), success(2)),
1017 | ],
1018 | Some(Box::new(failure()))
1019 | )
1020 | );
1021 | assert_eq!(result.missing_patterns(), vec!["_".to_string()]);
1022 | }
1023 |
1024 | #[test]
1025 | fn test_compile_exhaustive_int_pattern() {
1026 | let mut compiler = Compiler::new();
1027 | let int_type = new_type(&mut compiler, Type::Int);
1028 | let input = compiler.new_variable(int_type);
1029 | let result = compile(
1030 | compiler,
1031 | input,
1032 | vec![(int(4), rhs(1)), (int(5), rhs(2)), (bind("a"), rhs(3))],
1033 | );
1034 |
1035 | assert_eq!(
1036 | result.tree,
1037 | Decision::Switch(
1038 | input,
1039 | vec![
1040 | Case::new(Constructor::Int(4), Vec::new(), success(1)),
1041 | Case::new(Constructor::Int(5), Vec::new(), success(2)),
1042 | ],
1043 | Some(Box::new(success_with_bindings(vec![("a", input)], 3)))
1044 | )
1045 | );
1046 | }
1047 |
1048 | #[test]
1049 | fn test_compile_unreachable_int_pattern() {
1050 | let mut compiler = Compiler::new();
1051 | let int_type = new_type(&mut compiler, Type::Int);
1052 | let input = compiler.new_variable(int_type);
1053 | let result = compile(
1054 | compiler,
1055 | input,
1056 | vec![(int(4), rhs(1)), (bind("a"), rhs(3)), (int(5), rhs(2))],
1057 | );
1058 |
1059 | assert_eq!(
1060 | result.tree,
1061 | Decision::Switch(
1062 | input,
1063 | vec![
1064 | Case::new(Constructor::Int(4), Vec::new(), success(1)),
1065 | Case::new(
1066 | Constructor::Int(5),
1067 | Vec::new(),
1068 | success_with_bindings(vec![("a", input)], 3)
1069 | ),
1070 | ],
1071 | Some(Box::new(success_with_bindings(vec![("a", input)], 3)))
1072 | )
1073 | );
1074 | assert_eq!(result.diagnostics.reachable, vec![1, 3, 3]);
1075 | }
1076 |
1077 | #[test]
1078 | fn test_compile_nonexhaustive_nested_int_pattern() {
1079 | let mut compiler = Compiler::new();
1080 | let int_type = new_type(&mut compiler, Type::Int);
1081 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1082 | let input = compiler.new_variable(tup_type);
1083 | let result = compile(
1084 | compiler,
1085 | input,
1086 | vec![(pair(int_type, int_type, int(4), bind("a")), rhs(1))],
1087 | );
1088 |
1089 | assert_eq!(
1090 | result.tree,
1091 | Decision::Switch(
1092 | input,
1093 | vec![Case::new(
1094 | Constructor::Pair(int_type, int_type),
1095 | vec![var(1, int_type), var(2, int_type),],
1096 | Decision::Switch(
1097 | var(1, int_type),
1098 | vec![Case::new(
1099 | Constructor::Int(4),
1100 | Vec::new(),
1101 | success_with_bindings(
1102 | vec![("a", var(2, int_type))],
1103 | 1
1104 | )
1105 | )],
1106 | Some(Box::new(failure()))
1107 | )
1108 | )],
1109 | None
1110 | )
1111 | );
1112 | assert_eq!(result.missing_patterns(), vec!["(_, _)".to_string()]);
1113 | }
1114 |
1115 | #[test]
1116 | fn test_compile_exhaustive_nested_int_pattern() {
1117 | let mut compiler = Compiler::new();
1118 | let int_type = new_type(&mut compiler, Type::Int);
1119 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1120 | let input = compiler.new_variable(tup_type);
1121 | let result = compile(
1122 | compiler,
1123 | input,
1124 | vec![
1125 | (pair(int_type, int_type, int(4), int(5)), rhs(1)),
1126 | (pair(int_type, int_type, bind("a"), bind("b")), rhs(2)),
1127 | ],
1128 | );
1129 |
1130 | assert_eq!(
1131 | result.tree,
1132 | Decision::Switch(
1133 | input,
1134 | vec![Case::new(
1135 | Constructor::Pair(int_type, int_type),
1136 | vec![var(1, int_type), var(2, int_type)],
1137 | Decision::Switch(
1138 | var(2, int_type),
1139 | vec![Case::new(
1140 | Constructor::Int(5),
1141 | Vec::new(),
1142 | Decision::Switch(
1143 | var(1, int_type),
1144 | vec![Case::new(
1145 | Constructor::Int(4),
1146 | Vec::new(),
1147 | success(1)
1148 | )],
1149 | Some(Box::new(success_with_bindings(
1150 | vec![
1151 | ("a", var(1, int_type)),
1152 | ("b", var(2, int_type))
1153 | ],
1154 | 2
1155 | )))
1156 | )
1157 | )],
1158 | Some(Box::new(success_with_bindings(
1159 | vec![
1160 | ("a", var(1, int_type)),
1161 | ("b", var(2, int_type))
1162 | ],
1163 | 2
1164 | )))
1165 | )
1166 | )],
1167 | None
1168 | )
1169 | );
1170 | }
1171 |
1172 | #[test]
1173 | fn test_compile_nonexhaustive_option_type() {
1174 | let mut compiler = Compiler::new();
1175 | let int_type = new_type(&mut compiler, Type::Int);
1176 | let option_type = new_type(
1177 | &mut compiler,
1178 | Type::Enum(vec![
1179 | ("Some".to_string(), vec![int_type]),
1180 | ("None".to_string(), Vec::new()),
1181 | ]),
1182 | );
1183 | let input = compiler.new_variable(option_type);
1184 | let result = compile(
1185 | compiler,
1186 | input,
1187 | vec![(variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1))],
1188 | );
1189 |
1190 | assert_eq!(
1191 | result.tree,
1192 | Decision::Switch(
1193 | input,
1194 | vec![
1195 | Case::new(
1196 | Constructor::Variant(option_type, 0),
1197 | vec![var(1, int_type)],
1198 | Decision::Switch(
1199 | var(1, int_type),
1200 | vec![Case::new(
1201 | Constructor::Int(4),
1202 | Vec::new(),
1203 | success(1)
1204 | )],
1205 | Some(Box::new(failure()))
1206 | )
1207 | ),
1208 | Case::new(
1209 | Constructor::Variant(option_type, 1),
1210 | Vec::new(),
1211 | failure()
1212 | )
1213 | ],
1214 | None,
1215 | )
1216 | );
1217 | assert_eq!(
1218 | result.missing_patterns(),
1219 | vec!["None".to_string(), "Some(_)".to_string()]
1220 | );
1221 | }
1222 |
1223 | #[test]
1224 | fn test_compile_nonexhaustive_option_type_with_multiple_arguments() {
1225 | let mut compiler = Compiler::new();
1226 | let int_type = new_type(&mut compiler, Type::Int);
1227 | let option_type = new_type(
1228 | &mut compiler,
1229 | Type::Enum(vec![
1230 | ("Some".to_string(), vec![int_type, int_type]),
1231 | ("None".to_string(), Vec::new()),
1232 | ]),
1233 | );
1234 | let input = compiler.new_variable(option_type);
1235 | let result = compile(
1236 | compiler,
1237 | input,
1238 | vec![(
1239 | variant(option_type, 0, vec![Pattern::Int(4), Pattern::Int(5)]),
1240 | rhs(1),
1241 | )],
1242 | );
1243 |
1244 | assert_eq!(
1245 | result.tree,
1246 | Decision::Switch(
1247 | input,
1248 | vec![
1249 | Case::new(
1250 | Constructor::Variant(option_type, 0),
1251 | vec![var(1, int_type), var(2, int_type)],
1252 | Decision::Switch(
1253 | var(2, int_type),
1254 | vec![Case::new(
1255 | Constructor::Int(5),
1256 | Vec::new(),
1257 | Decision::Switch(
1258 | var(1, int_type),
1259 | vec![Case::new(
1260 | Constructor::Int(4),
1261 | Vec::new(),
1262 | success(1)
1263 | )],
1264 | Some(Box::new(failure()))
1265 | )
1266 | )],
1267 | Some(Box::new(failure()))
1268 | )
1269 | ),
1270 | Case::new(
1271 | Constructor::Variant(option_type, 1),
1272 | Vec::new(),
1273 | failure()
1274 | )
1275 | ],
1276 | None
1277 | )
1278 | );
1279 | assert_eq!(
1280 | result.missing_patterns(),
1281 | vec!["None".to_string(), "Some(_, _)".to_string(),]
1282 | );
1283 | }
1284 |
1285 | #[test]
1286 | fn test_compile_exhaustive_option_type() {
1287 | let mut compiler = Compiler::new();
1288 | let int_type = new_type(&mut compiler, Type::Int);
1289 | let option_type = new_type(
1290 | &mut compiler,
1291 | Type::Enum(vec![
1292 | ("Some".to_string(), vec![int_type]),
1293 | ("None".to_string(), Vec::new()),
1294 | ]),
1295 | );
1296 | let input = compiler.new_variable(option_type);
1297 | let result = compile(
1298 | compiler,
1299 | input,
1300 | vec![
1301 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)),
1302 | (variant(option_type, 0, vec![bind("a")]), rhs(2)),
1303 | (variant(option_type, 1, Vec::new()), rhs(3)),
1304 | ],
1305 | );
1306 |
1307 | assert_eq!(
1308 | result.tree,
1309 | Decision::Switch(
1310 | input,
1311 | vec![
1312 | Case::new(
1313 | Constructor::Variant(option_type, 0),
1314 | vec![var(1, int_type)],
1315 | Decision::Switch(
1316 | var(1, int_type),
1317 | vec![Case::new(
1318 | Constructor::Int(4),
1319 | Vec::new(),
1320 | success(1)
1321 | )],
1322 | Some(Box::new(success_with_bindings(
1323 | vec![("a", var(1, int_type))],
1324 | 2
1325 | )))
1326 | )
1327 | ),
1328 | Case::new(
1329 | Constructor::Variant(option_type, 1),
1330 | Vec::new(),
1331 | success(3)
1332 | )
1333 | ],
1334 | None
1335 | )
1336 | );
1337 | }
1338 |
1339 | #[test]
1340 | fn test_compile_redundant_option_type_with_bool() {
1341 | let mut compiler = Compiler::new();
1342 | let bool_type = new_type(&mut compiler, Type::Boolean);
1343 | let option_type = new_type(
1344 | &mut compiler,
1345 | Type::Enum(vec![
1346 | ("Some".to_string(), vec![bool_type]),
1347 | ("None".to_string(), Vec::new()),
1348 | ]),
1349 | );
1350 | let input = compiler.new_variable(option_type);
1351 | let result = compile(
1352 | compiler,
1353 | input,
1354 | vec![
1355 | (variant(option_type, 0, vec![tt()]), rhs(1)),
1356 | (variant(option_type, 0, vec![tt()]), rhs(10)),
1357 | (variant(option_type, 0, vec![bind("a")]), rhs(2)),
1358 | (variant(option_type, 1, Vec::new()), rhs(3)),
1359 | ],
1360 | );
1361 |
1362 | assert_eq!(
1363 | result.tree,
1364 | Decision::Switch(
1365 | input,
1366 | vec![
1367 | Case::new(
1368 | Constructor::Variant(option_type, 0),
1369 | vec![var(1, bool_type)],
1370 | Decision::Switch(
1371 | var(1, bool_type),
1372 | vec![
1373 | Case::new(
1374 | Constructor::False,
1375 | Vec::new(),
1376 | success_with_bindings(
1377 | vec![("a", var(1, bool_type))],
1378 | 2
1379 | )
1380 | ),
1381 | Case::new(
1382 | Constructor::True,
1383 | Vec::new(),
1384 | success(1)
1385 | )
1386 | ],
1387 | None
1388 | )
1389 | ),
1390 | Case::new(
1391 | Constructor::Variant(option_type, 1),
1392 | Vec::new(),
1393 | success(3)
1394 | )
1395 | ],
1396 | None
1397 | )
1398 | );
1399 |
1400 | assert_eq!(result.diagnostics.reachable, vec![2, 1, 3]);
1401 | }
1402 |
1403 | #[test]
1404 | fn test_compile_redundant_option_type_with_int() {
1405 | let mut compiler = Compiler::new();
1406 | let int_type = new_type(&mut compiler, Type::Int);
1407 | let option_type = new_type(
1408 | &mut compiler,
1409 | Type::Enum(vec![
1410 | ("Some".to_string(), vec![int_type]),
1411 | ("None".to_string(), Vec::new()),
1412 | ]),
1413 | );
1414 | let input = compiler.new_variable(option_type);
1415 | let result = compile(
1416 | compiler,
1417 | input,
1418 | vec![
1419 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)),
1420 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(10)),
1421 | (variant(option_type, 0, vec![bind("a")]), rhs(2)),
1422 | (variant(option_type, 1, Vec::new()), rhs(3)),
1423 | ],
1424 | );
1425 |
1426 | assert_eq!(
1427 | result.tree,
1428 | Decision::Switch(
1429 | input,
1430 | vec![
1431 | Case::new(
1432 | Constructor::Variant(option_type, 0),
1433 | vec![var(1, int_type)],
1434 | Decision::Switch(
1435 | var(1, int_type),
1436 | vec![Case::new(
1437 | Constructor::Int(4),
1438 | Vec::new(),
1439 | success(1)
1440 | ),],
1441 | Some(Box::new(success_with_bindings(
1442 | vec![("a", var(1, int_type))],
1443 | 2
1444 | )))
1445 | )
1446 | ),
1447 | Case::new(
1448 | Constructor::Variant(option_type, 1),
1449 | Vec::new(),
1450 | success(3)
1451 | )
1452 | ],
1453 | None
1454 | )
1455 | );
1456 |
1457 | assert_eq!(result.diagnostics.reachable, vec![1, 2, 3]);
1458 | }
1459 |
1460 | #[test]
1461 | fn test_compile_exhaustive_option_type_with_binding() {
1462 | let mut compiler = Compiler::new();
1463 | let int_type = new_type(&mut compiler, Type::Int);
1464 | let option_type = new_type(
1465 | &mut compiler,
1466 | Type::Enum(vec![
1467 | ("Some".to_string(), vec![int_type]),
1468 | ("None".to_string(), Vec::new()),
1469 | ]),
1470 | );
1471 | let input = compiler.new_variable(option_type);
1472 | let result = compile(
1473 | compiler,
1474 | input,
1475 | vec![
1476 | (variant(option_type, 0, vec![Pattern::Int(4)]), rhs(1)),
1477 | (bind("a"), rhs(2)),
1478 | ],
1479 | );
1480 |
1481 | assert_eq!(
1482 | result.tree,
1483 | Decision::Switch(
1484 | input,
1485 | vec![
1486 | Case::new(
1487 | Constructor::Variant(option_type, 0),
1488 | vec![var(1, int_type)],
1489 | Decision::Switch(
1490 | var(1, int_type),
1491 | vec![Case::new(
1492 | Constructor::Int(4),
1493 | Vec::new(),
1494 | success(1)
1495 | )],
1496 | Some(Box::new(success_with_bindings(
1497 | vec![("a", input)],
1498 | 2
1499 | )))
1500 | )
1501 | ),
1502 | Case::new(
1503 | Constructor::Variant(option_type, 1),
1504 | Vec::new(),
1505 | success_with_bindings(vec![("a", input)], 2)
1506 | )
1507 | ],
1508 | None,
1509 | )
1510 | );
1511 | }
1512 |
1513 | #[test]
1514 | fn test_compile_nonexhaustive_pair_in_option_pattern() {
1515 | let mut compiler = Compiler::new();
1516 | let int_type = new_type(&mut compiler, Type::Int);
1517 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1518 | let option_type = new_type(
1519 | &mut compiler,
1520 | Type::Enum(vec![
1521 | ("Some".to_string(), vec![tup_type]),
1522 | ("None".to_string(), Vec::new()),
1523 | ]),
1524 | );
1525 | let input = compiler.new_variable(option_type);
1526 | let result = compile(
1527 | compiler,
1528 | input,
1529 | vec![(
1530 | variant(
1531 | option_type,
1532 | 0,
1533 | vec![pair(int_type, int_type, int(4), bind("a"))],
1534 | ),
1535 | rhs(1),
1536 | )],
1537 | );
1538 |
1539 | assert_eq!(
1540 | result.tree,
1541 | Decision::Switch(
1542 | input,
1543 | vec![
1544 | Case::new(
1545 | Constructor::Variant(option_type, 0),
1546 | vec![var(1, tup_type)],
1547 | Decision::Switch(
1548 | var(1, tup_type),
1549 | vec![Case::new(
1550 | Constructor::Pair(int_type, int_type),
1551 | vec![var(2, int_type), var(3, int_type),],
1552 | Decision::Switch(
1553 | var(2, int_type),
1554 | vec![Case::new(
1555 | Constructor::Int(4),
1556 | Vec::new(),
1557 | success_with_bindings(
1558 | vec![("a", var(3, int_type))],
1559 | 1
1560 | )
1561 | )],
1562 | Some(Box::new(failure()))
1563 | )
1564 | )],
1565 | None,
1566 | )
1567 | ),
1568 | Case::new(
1569 | Constructor::Variant(option_type, 1),
1570 | Vec::new(),
1571 | failure()
1572 | )
1573 | ],
1574 | None
1575 | )
1576 | );
1577 | assert_eq!(
1578 | result.missing_patterns(),
1579 | vec!["None".to_string(), "Some((_, _))".to_string()]
1580 | );
1581 | }
1582 |
1583 | #[test]
1584 | fn test_compile_or_bool_pattern() {
1585 | let mut compiler = Compiler::new();
1586 | let bool_type = new_type(&mut compiler, Type::Boolean);
1587 | let input = compiler.new_variable(bool_type);
1588 | let result = compile(
1589 | compiler,
1590 | input,
1591 | vec![(Pattern::Or(vec![tt(), ff()]), rhs(1))],
1592 | );
1593 |
1594 | assert_eq!(
1595 | result.tree,
1596 | Decision::Switch(
1597 | input,
1598 | vec![
1599 | Case::new(Constructor::False, Vec::new(), success(1)),
1600 | Case::new(Constructor::True, Vec::new(), success(1)),
1601 | ],
1602 | None
1603 | )
1604 | );
1605 | }
1606 |
1607 | #[test]
1608 | fn test_compile_or_int_pattern() {
1609 | let mut compiler = Compiler::new();
1610 | let int_type = new_type(&mut compiler, Type::Int);
1611 | let input = compiler.new_variable(int_type);
1612 | let result = compile(
1613 | compiler,
1614 | input,
1615 | vec![(Pattern::Or(vec![int(4), int(5)]), rhs(1))],
1616 | );
1617 |
1618 | assert_eq!(
1619 | result.tree,
1620 | Decision::Switch(
1621 | input,
1622 | vec![
1623 | Case::new(Constructor::Int(4), Vec::new(), success(1)),
1624 | Case::new(Constructor::Int(5), Vec::new(), success(1)),
1625 | ],
1626 | Some(Box::new(failure()))
1627 | )
1628 | );
1629 | }
1630 |
1631 | #[test]
1632 | fn test_range_pattern() {
1633 | let mut compiler = Compiler::new();
1634 | let int_type = new_type(&mut compiler, Type::Int);
1635 | let input = compiler.new_variable(int_type);
1636 | let result =
1637 | compile(compiler, input, vec![(Pattern::Range(1, 10), rhs(1))]);
1638 |
1639 | assert_eq!(
1640 | result.tree,
1641 | Decision::Switch(
1642 | input,
1643 | vec![Case::new(
1644 | Constructor::Range(1, 10),
1645 | Vec::new(),
1646 | success(1)
1647 | )],
1648 | Some(Box::new(failure()))
1649 | )
1650 | );
1651 | }
1652 |
1653 | #[test]
1654 | fn test_nonexhaustive_guard() {
1655 | let mut compiler = Compiler::new();
1656 | let int_type = new_type(&mut compiler, Type::Int);
1657 | let input = compiler.new_variable(int_type);
1658 |
1659 | let result = compiler.compile(vec![Row::new(
1660 | vec![Column::new(input, int(4))],
1661 | Some(42),
1662 | rhs(1),
1663 | )]);
1664 |
1665 | assert_eq!(
1666 | result.tree,
1667 | Decision::Switch(
1668 | input,
1669 | vec![Case::new(
1670 | Constructor::Int(4),
1671 | Vec::new(),
1672 | Decision::Guard(42, rhs(1), Box::new(failure()))
1673 | )],
1674 | Some(Box::new(failure()))
1675 | )
1676 | );
1677 |
1678 | assert_eq!(result.missing_patterns(), vec!["_".to_string()]);
1679 | }
1680 |
1681 | #[test]
1682 | fn test_nonexhaustive_option_with_two_rows_and_guard() {
1683 | let mut compiler = Compiler::new();
1684 | let int_type = new_type(&mut compiler, Type::Int);
1685 | let option_type = new_type(
1686 | &mut compiler,
1687 | Type::Enum(vec![
1688 | ("Some".to_string(), vec![int_type]),
1689 | ("None".to_string(), Vec::new()),
1690 | ]),
1691 | );
1692 | let input = compiler.new_variable(option_type);
1693 | let result = compiler.compile(vec![
1694 | Row::new(
1695 | vec![Column::new(input, variant(option_type, 0, vec![int(4)]))],
1696 | Some(42),
1697 | rhs(1),
1698 | ),
1699 | Row::new(
1700 | vec![Column::new(
1701 | input,
1702 | variant(option_type, 0, vec![bind("a")]),
1703 | )],
1704 | None,
1705 | rhs(2),
1706 | ),
1707 | ]);
1708 |
1709 | assert_eq!(
1710 | result.tree,
1711 | Decision::Switch(
1712 | input,
1713 | vec![
1714 | Case::new(
1715 | Constructor::Variant(option_type, 0),
1716 | vec![var(1, int_type)],
1717 | Decision::Switch(
1718 | var(1, int_type),
1719 | vec![Case::new(
1720 | Constructor::Int(4),
1721 | Vec::new(),
1722 | Decision::Guard(
1723 | 42,
1724 | rhs(1),
1725 | Box::new(success_with_bindings(
1726 | vec![("a", var(1, int_type))],
1727 | 2
1728 | )),
1729 | )
1730 | )],
1731 | Some(Box::new(success_with_bindings(
1732 | vec![("a", var(1, int_type))],
1733 | 2
1734 | )))
1735 | ),
1736 | ),
1737 | Case::new(
1738 | Constructor::Variant(option_type, 1),
1739 | Vec::new(),
1740 | failure()
1741 | )
1742 | ],
1743 | None
1744 | )
1745 | );
1746 |
1747 | assert_eq!(result.missing_patterns(), vec!["None".to_string()]);
1748 | }
1749 |
1750 | #[test]
1751 | fn test_exhaustive_guard() {
1752 | let mut compiler = Compiler::new();
1753 | let int_type = new_type(&mut compiler, Type::Int);
1754 | let input = compiler.new_variable(int_type);
1755 | let result = compiler.compile(vec![
1756 | Row::new(vec![Column::new(input, int(4))], Some(42), rhs(1)),
1757 | Row::new(vec![Column::new(input, bind("a"))], None, rhs(2)),
1758 | ]);
1759 |
1760 | assert_eq!(
1761 | result.tree,
1762 | Decision::Switch(
1763 | input,
1764 | vec![Case::new(
1765 | Constructor::Int(4),
1766 | Vec::new(),
1767 | Decision::Guard(
1768 | 42,
1769 | rhs(1),
1770 | Box::new(success_with_bindings(vec![("a", input)], 2))
1771 | )
1772 | )],
1773 | Some(Box::new(success_with_bindings(vec![("a", input)], 2)))
1774 | )
1775 | );
1776 | }
1777 |
1778 | #[test]
1779 | fn test_exhaustive_guard_with_bool() {
1780 | let mut compiler = Compiler::new();
1781 | let bool_type = new_type(&mut compiler, Type::Boolean);
1782 | let input = compiler.new_variable(bool_type);
1783 | let result = compiler.compile(vec![
1784 | Row::new(vec![Column::new(input, tt())], Some(42), rhs(1)),
1785 | Row::new(vec![Column::new(input, bind("a"))], None, rhs(2)),
1786 | ]);
1787 |
1788 | assert_eq!(
1789 | result.tree,
1790 | Decision::Switch(
1791 | input,
1792 | vec![
1793 | Case::new(
1794 | Constructor::False,
1795 | Vec::new(),
1796 | success_with_bindings(vec![("a", input)], 2)
1797 | ),
1798 | Case::new(
1799 | Constructor::True,
1800 | Vec::new(),
1801 | Decision::Guard(
1802 | 42,
1803 | rhs(1),
1804 | Box::new(success_with_bindings(
1805 | vec![("a", input)],
1806 | 2
1807 | ))
1808 | )
1809 | )
1810 | ],
1811 | None
1812 | )
1813 | );
1814 | }
1815 |
1816 | #[test]
1817 | fn test_exhaustive_guard_with_int() {
1818 | let mut compiler = Compiler::new();
1819 | let int_type = new_type(&mut compiler, Type::Int);
1820 | let input = compiler.new_variable(int_type);
1821 | let result = compiler.compile(vec![
1822 | Row::new(vec![Column::new(input, int(1))], Some(42), rhs(1)),
1823 | Row::new(vec![Column::new(input, int(2))], None, rhs(2)),
1824 | Row::new(vec![Column::new(input, bind("b"))], None, rhs(3)),
1825 | ]);
1826 |
1827 | assert_eq!(
1828 | result.tree,
1829 | Decision::Switch(
1830 | input,
1831 | vec![
1832 | Case::new(
1833 | Constructor::Int(1),
1834 | Vec::new(),
1835 | Decision::Guard(
1836 | 42,
1837 | rhs(1),
1838 | Box::new(success_with_bindings(
1839 | vec![("b", input)],
1840 | 3
1841 | ))
1842 | )
1843 | ),
1844 | Case::new(Constructor::Int(2), Vec::new(), success(2))
1845 | ],
1846 | Some(Box::new(success_with_bindings(vec![("b", input)], 3)))
1847 | )
1848 | );
1849 | }
1850 |
1851 | #[test]
1852 | fn test_exhaustive_guard_with_same_int() {
1853 | let mut compiler = Compiler::new();
1854 | let int_type = new_type(&mut compiler, Type::Int);
1855 | let input = compiler.new_variable(int_type);
1856 | let result = compiler.compile(vec![
1857 | Row::new(vec![Column::new(input, int(1))], Some(10), rhs(1)),
1858 | Row::new(vec![Column::new(input, int(1))], Some(20), rhs(2)),
1859 | Row::new(vec![Column::new(input, int(1))], None, rhs(3)),
1860 | Row::new(vec![Column::new(input, bind("b"))], None, rhs(4)),
1861 | ]);
1862 |
1863 | assert_eq!(
1864 | result.tree,
1865 | Decision::Switch(
1866 | input,
1867 | vec![Case::new(
1868 | Constructor::Int(1),
1869 | Vec::new(),
1870 | Decision::Guard(
1871 | 10,
1872 | rhs(1),
1873 | Box::new(Decision::Guard(
1874 | 20,
1875 | rhs(2),
1876 | Box::new(success(3))
1877 | ))
1878 | )
1879 | )],
1880 | Some(Box::new(success_with_bindings(vec![("b", input)], 4)))
1881 | )
1882 | );
1883 | }
1884 |
1885 | #[test]
1886 | fn test_exhaustive_option_with_guard() {
1887 | let mut compiler = Compiler::new();
1888 | let int_type = new_type(&mut compiler, Type::Int);
1889 | let option_type = new_type(
1890 | &mut compiler,
1891 | Type::Enum(vec![
1892 | ("Some".to_string(), vec![int_type]),
1893 | ("None".to_string(), Vec::new()),
1894 | ]),
1895 | );
1896 | let input = compiler.new_variable(option_type);
1897 | let result = compiler.compile(vec![
1898 | Row::new(
1899 | vec![Column::new(input, variant(option_type, 1, Vec::new()))],
1900 | None,
1901 | rhs(1),
1902 | ),
1903 | Row::new(
1904 | vec![Column::new(
1905 | input,
1906 | variant(option_type, 0, vec![bind("a")]),
1907 | )],
1908 | Some(42),
1909 | rhs(2),
1910 | ),
1911 | Row::new(
1912 | vec![Column::new(
1913 | input,
1914 | variant(option_type, 0, vec![bind("a")]),
1915 | )],
1916 | None,
1917 | rhs(3),
1918 | ),
1919 | ]);
1920 |
1921 | assert_eq!(
1922 | result.tree,
1923 | Decision::Switch(
1924 | input,
1925 | vec![
1926 | Case::new(
1927 | Constructor::Variant(option_type, 0),
1928 | vec![var(1, int_type)],
1929 | Decision::Guard(
1930 | 42,
1931 | Body {
1932 | bindings: vec![(
1933 | "a".to_string(),
1934 | var(1, int_type)
1935 | )],
1936 | value: 2
1937 | },
1938 | Box::new(success_with_bindings(
1939 | vec![("a", var(1, int_type))],
1940 | 3
1941 | ))
1942 | )
1943 | ),
1944 | Case::new(
1945 | Constructor::Variant(option_type, 1),
1946 | Vec::new(),
1947 | success(1)
1948 | ),
1949 | ],
1950 | None
1951 | )
1952 | );
1953 | }
1954 |
1955 | #[test]
1956 | fn test_compile_exhaustive_nested_int_with_guard() {
1957 | let mut compiler = Compiler::new();
1958 | let int_type = new_type(&mut compiler, Type::Int);
1959 | let tup_type = new_type(&mut compiler, Type::Pair(int_type, int_type));
1960 | let input = compiler.new_variable(tup_type);
1961 | let result = compiler.compile(vec![
1962 | Row::new(
1963 | vec![Column::new(
1964 | input,
1965 | pair(int_type, int_type, int(4), int(5)),
1966 | )],
1967 | Some(42),
1968 | rhs(1),
1969 | ),
1970 | Row::new(
1971 | vec![Column::new(
1972 | input,
1973 | pair(int_type, int_type, int(4), int(5)),
1974 | )],
1975 | None,
1976 | rhs(2),
1977 | ),
1978 | Row::new(
1979 | vec![Column::new(
1980 | input,
1981 | pair(int_type, int_type, bind("a"), bind("b")),
1982 | )],
1983 | None,
1984 | rhs(3),
1985 | ),
1986 | ]);
1987 |
1988 | assert_eq!(
1989 | result.tree,
1990 | Decision::Switch(
1991 | input,
1992 | vec![Case::new(
1993 | Constructor::Pair(int_type, int_type),
1994 | vec![var(1, int_type), var(2, int_type)],
1995 | Decision::Switch(
1996 | var(2, int_type),
1997 | vec![Case::new(
1998 | Constructor::Int(5),
1999 | Vec::new(),
2000 | Decision::Switch(
2001 | var(1, int_type),
2002 | vec![Case::new(
2003 | Constructor::Int(4),
2004 | Vec::new(),
2005 | Decision::Guard(
2006 | 42,
2007 | rhs(1),
2008 | Box::new(success(2)),
2009 | )
2010 | )],
2011 | Some(Box::new(success_with_bindings(
2012 | vec![
2013 | ("a", var(1, int_type)),
2014 | ("b", var(2, int_type))
2015 | ],
2016 | 3
2017 | )))
2018 | )
2019 | )],
2020 | Some(Box::new(success_with_bindings(
2021 | vec![
2022 | ("a", var(1, int_type)),
2023 | ("b", var(2, int_type))
2024 | ],
2025 | 3
2026 | )))
2027 | )
2028 | )],
2029 | None
2030 | )
2031 | );
2032 | }
2033 | }
2034 |
--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 80
2 | use_try_shorthand = true
3 | reorder_imports = true
4 | edition = "2018"
5 |
6 | # The default setting results in too aggressive/clunky wrapping for a lot of
7 | # code. For example, this:
8 | #
9 | # if !foo.bar().baz() {
10 | # bar();
11 | # }
12 | #
13 | # Would result in something like this:
14 | #
15 | # if !foo
16 | # .bar()
17 | # .baz()
18 | # {
19 | # bar();
20 | # }
21 | #
22 | # Setting this to "Max" results in a more consistent and less infuriating
23 | # wrapping stype.
24 | use_small_heuristics = 'Max'
25 |
--------------------------------------------------------------------------------
/sestoft1996/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "match"
7 | version = "1.0.0"
8 |
--------------------------------------------------------------------------------
/sestoft1996/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "sestoft1996"
3 | version = "1.0.0"
4 | authors = ["Yorick Peterse "]
5 | edition = "2021"
6 |
7 | [lib]
8 | doctest = false
9 |
--------------------------------------------------------------------------------
/sestoft1996/README.md:
--------------------------------------------------------------------------------
1 | # ML Pattern match compilation and partial evaluation
2 |
3 | This directory contains an implementation of the pattern matching algorithm
4 | introduced in the paper "ML Pattern match compilation and partial evaluation" by
5 | Peter Sestoft, from 1996.
6 |
7 | ## A short rant about the paper
8 |
9 | The paper is a bit of a pain to read, and took me a solid week to understand.
10 | Part of this is because I'm not familiar with Standard ML, so I first had to
11 | learn that to some degree. The syntax can also be hard to grok when you have
12 | functions calling functions and passing those results as arguments directly,
13 | especially combined with operators (e.g is `foo bar :: baz` parsed as
14 | `foo(bar :: baz)` or `(foo bar) :: baz`?).
15 |
16 | It doesn't help that the paper makes references to the author's implementation,
17 | but the only two links regarding it are dead FTP links. I eventually found these
18 | implementations of this algorithm:
19 |
20 | - https://github.com/kfl/mosml/blob/f529b33bb891ff1df4aab198edad376f9ff64d28/src/compiler/Match.sml
21 | - https://github.com/rsdn/nemerle/blob/db4bc9078f1b6238da32df1519c1957e74b6834a/ncc/typing/DecisionTreeBuilder.n
22 | - https://github.com/rsdn/nitra/tree/master/Nitra/Nitra.Compiler/Generation/PatternMatching
23 | - https://github.com/melsman/mlkit/blob/237be62778985e76f912cefdc0bb21b22bed5bd4/src/Compiler/Lambda/CompileDec.sml#L510
24 |
25 | The Moscow ML implementation uses memoization and some extensions for the
26 | pattern matching logic. The Nemerle implementation is quite different and uses a
27 | more imperative/mutable approach.
28 |
29 | As to how the algorithm works: even now I don't quite understand why certain
30 | decisions were made, and the algorithm as a whole feels a bit crude.
31 |
32 | I could go on, but the summary is this: if you wish to understand the paper, I
33 | recommend reading through it while using my Rust code as a reference. It should
34 | be a bit easier to understand and translate to other languages, and it doesn't
35 | require a 20 year old language (though maybe it will if you're reading this 20
36 | years from now).
37 |
38 | ## Project structure
39 |
40 | There are two implementations of the algorithm: a raw version, and an idiomatic
41 | version. Neither version implements the memoization strategy as discussed in
42 | section 7.5, as this likely won't work well due to Rust's single ownership
43 | requirement. Both versions are extensively commented to better explain why
44 | certain decisions where made, what to keep in mind when reading the paper, etc.
45 |
46 | ### The raw version
47 |
48 | The raw version is more or less 1:1 translation of the SML code included in the
49 | paper. The code is terrible, relies on (poorly implemented) immutable lists
50 | (because the original algorithm requires immutable lists), and likely performs
51 | extremely poorly. I tried to keep this version as close to the paper as
52 | possible, only deviating where Rust simply required a different approach.
53 |
54 | Some differences from the paper:
55 |
56 | - Rust doesn't have built-in immutable lists, and the algorithm requires the use
57 | of immutable lists in a few places. Thus, we introduce a custom immutable
58 | linked list.
59 | - The paper assumes multiple ownership of values in a few places. This
60 | implementation instead clones values to work around that, as using a different
61 | approach requires a different implementation.
62 | - The `succeed'` and `fail'` functions are called `match_succeed` and
63 | `match_fail` respectively. Who the hell thought it was a good idea to allow
64 | quotes in symbol names?
65 | - When generating `Sel` nodes, the paper uses `i+1` to build the selector
66 | values. It's not clear why this is done (the paper makes no mention of it),
67 | and it seems unnecessary. As such we just use indexes starting at zero.
68 | - The paper implements various functions in an non-exhaustive manner, without
69 | any explanation as to why. My implementation uses exhaustive patterns where
70 | possible, and `unwrap()` in a few places where missing values (and thus
71 | panics) shouldn't occur in the absence of bugs (famous last words).
72 |
73 | ### The idiomatic version
74 |
75 | This implementation of the algorithm is closer to what you'd normally write in
76 | Rust. Some of the names used are still a bit confusing, but unfortunately I
77 | haven't been able to come up with better names.
78 |
79 | Unlike the raw implementation, this implementation doesn't rely on persistent
80 | lists. Instead, it uses mutable vectors that store values in reverse order.
81 | Storing them in this order means a pop() returns the head of the vector, instead
82 | of the tail. This makes retrieving the head cheap, as no values need to be
83 | shifted.
84 |
85 | Some function (e.g. `addneg` and `match_fail`) are inlined into their callers,
86 | as they are only called in one place.
87 |
88 | For traversing all the pattern matching rules we use a cursor, essentially
89 | turning the list into an iterator that you can rewind. This is needed because
90 | when building an `IfEq` node, both the true and false bodies need to start off
91 | with the same set of rules. Using a cursor allows us to do just that, but
92 | without cloning the rules.
93 |
--------------------------------------------------------------------------------
/sestoft1996/src/idiomatic.rs:
--------------------------------------------------------------------------------
1 | /// An idiomatic Rust implementation of the pattern matching algorithm.
2 | use std::collections::HashSet;
3 |
4 | /// The result of a static match.
5 | #[derive(Debug)]
6 | enum Match {
7 | Yes,
8 | No,
9 | Maybe,
10 | }
11 |
12 | /// The description of terms already matched, corresponding to the `context`
13 | /// type in the paper.
14 | struct Context {
15 | values: Vec<(Constructor, Vec)>,
16 | }
17 |
18 | impl Context {
19 | fn new() -> Self {
20 | Self { values: Vec::new() }
21 | }
22 |
23 | fn push(&mut self, value: (Constructor, Vec)) {
24 | self.values.push(value);
25 | }
26 |
27 | fn pop(&mut self) -> Option<(Constructor, Vec)> {
28 | self.values.pop()
29 | }
30 |
31 | fn add_argument_to_last(&mut self, term: Term) {
32 | if let Some((_, args)) = self.values.last_mut() {
33 | args.push(term);
34 | }
35 | }
36 |
37 | fn reconstruct_term(&self, term: Term, work: &Work) -> Term {
38 | self.values.iter().zip(work.iter()).fold(
39 | term,
40 | |term, ((con, args), (_, _, dargs))| {
41 | let mut new_args: Vec<_> = dargs.clone();
42 |
43 | new_args.push(term);
44 | new_args.extend(args.iter().rev().cloned());
45 | Term::Pos(con.clone(), new_args)
46 | },
47 | )
48 | }
49 | }
50 |
51 | /// The work stack as used in the paper.
52 | ///
53 | /// The paper uses a list of triple lists, removing the need for some append
54 | /// operations. This is a bit annoying to work with in Rust (we have to unwrap()
55 | /// in some places), but again we're trying to stay as close to the paper as
56 | /// possible.
57 | ///
58 | /// We use a type alias here so we don't have to re-type this type name in the
59 | /// various places that it's used.
60 | type Work = Vec<(Vec, Vec, Vec)>;
61 |
62 | /// The type of the right-hand side of a case (i.e. the code to run).
63 | ///
64 | /// For the sake of simplicity we just use a String here. In a real compiler
65 | /// this would probably be an AST node or another sort of IR to run upon a
66 | /// match.
67 | pub type RHS = String;
68 |
69 | /// A type for storing diagnostics produced by the decision tree compiler.
70 | pub struct Diagnostics {
71 | /// The diagnostic messages produced.
72 | ///
73 | /// In a real compiler this would include more than just a message, such as
74 | /// the line and numbers.
75 | messages: Vec,
76 |
77 | /// The right-hand values (= the code you'd run upon a match) that have been
78 | /// processed.
79 | ///
80 | /// If a value isn't included in this set it means it and its pattern are
81 | /// redundant.
82 | ///
83 | /// In a real compiler you'd probably mark AST nodes directly. In our case
84 | /// the right-hand values are just simple strings, so we use a set instead.
85 | reachable: HashSet,
86 | }
87 |
88 | /// A type for compiling a list of rules into a decision tree.
89 | pub struct Compiler {
90 | /// The rules to compile into a decision tree.
91 | rules: Vec<(Pattern, RHS)>,
92 |
93 | /// The start of the first rule to compile.
94 | ///
95 | /// When generating IfEq nodes we need to generate two branches, both
96 | /// starting with the same set of rules. To avoid cloning we use a cursor,
97 | /// save it before processing one branch, then restore it for the other
98 | /// branch.
99 | rules_index: usize,
100 |
101 | diagnostics: Diagnostics,
102 | }
103 |
104 | impl Compiler {
105 | pub fn new(rules: Vec<(Pattern, RHS)>) -> Self {
106 | Self {
107 | rules,
108 | rules_index: 0,
109 | diagnostics: Diagnostics {
110 | messages: Vec::new(),
111 | reachable: HashSet::new(),
112 | },
113 | }
114 | }
115 |
116 | pub fn compile(&mut self) -> Decision {
117 | self.fail(Term::bottom())
118 | }
119 |
120 | fn fail(&mut self, term: Term) -> Decision {
121 | if let Some((pat, rhs)) = self.next_rule().cloned() {
122 | let ctx = Context::new();
123 | let work = Vec::new();
124 |
125 | self.match_pattern(pat, Access::Root, term, ctx, work, rhs)
126 | } else {
127 | self.diagnostics
128 | .messages
129 | .push(format!("Missing pattern: {}", term.error_string()));
130 |
131 | Decision::Failure
132 | }
133 | }
134 |
135 | fn succeed(
136 | &mut self,
137 | mut ctx: Context,
138 | mut work: Work,
139 | rhs: RHS,
140 | ) -> Decision {
141 | if let Some((mut pats, mut accs, mut terms)) = work.pop() {
142 | if let (Some(pat), Some(acc), Some(term)) =
143 | (pats.pop(), accs.pop(), terms.pop())
144 | {
145 | work.push((pats, accs, terms));
146 | self.match_pattern(pat, acc, term, ctx, work, rhs)
147 | } else {
148 | if let Some((con, mut args)) = ctx.pop() {
149 | args.reverse();
150 | ctx.add_argument_to_last(Term::Pos(con, args));
151 | }
152 |
153 | self.succeed(ctx, work, rhs)
154 | }
155 | } else {
156 | self.diagnostics.reachable.insert(rhs.clone());
157 | Decision::Success(rhs)
158 | }
159 | }
160 |
161 | fn match_pattern(
162 | &mut self,
163 | pattern: Pattern,
164 | access: Access,
165 | term: Term,
166 | mut ctx: Context,
167 | work: Work,
168 | rhs: RHS,
169 | ) -> Decision {
170 | match pattern {
171 | Pattern::Variable(name) => {
172 | ctx.add_argument_to_last(term);
173 | Decision::Variable(
174 | access,
175 | name,
176 | Box::new(self.succeed(ctx, work, rhs)),
177 | )
178 | }
179 | Pattern::Field(id, pat) => self.match_pattern(
180 | *pat,
181 | Access::Select(id, Box::new(access)),
182 | term,
183 | ctx,
184 | work,
185 | rhs,
186 | ),
187 | Pattern::Wildcard => {
188 | ctx.add_argument_to_last(term);
189 | self.succeed(ctx, work, rhs)
190 | }
191 | Pattern::Constructor(con, args) => match self
192 | .match_term(&con, &term)
193 | {
194 | Match::Yes => {
195 | self.matched(con, args, access, term, ctx, work, rhs)
196 | }
197 | Match::No => self.fail(ctx.reconstruct_term(term, &work)),
198 | Match::Maybe => {
199 | let false_term =
200 | ctx.reconstruct_term(term.clone().negated(&con), &work);
201 | let cursor = self.rules_index;
202 | let matched = self.matched(
203 | con.clone(),
204 | args,
205 | access.clone(),
206 | term,
207 | ctx,
208 | work,
209 | rhs,
210 | );
211 |
212 | self.rules_index = cursor;
213 |
214 | Decision::IfEq(
215 | access,
216 | con,
217 | Box::new(matched),
218 | Box::new(self.fail(false_term)),
219 | )
220 | }
221 | },
222 | }
223 | }
224 |
225 | fn matched(
226 | &mut self,
227 | con: Constructor,
228 | args: Vec,
229 | obj: Access,
230 | term: Term,
231 | mut ctx: Context,
232 | mut work: Work,
233 | rhs: RHS,
234 | ) -> Decision {
235 | let access = (0..con.arity)
236 | .rev()
237 | .map(|i| Access::Select(i, Box::new(obj.clone())))
238 | .collect();
239 |
240 | let terms = match term {
241 | Term::Pos(_, dargs) => dargs,
242 | Term::Neg(_) => vec![Term::bottom(); con.arity],
243 | };
244 |
245 | ctx.push((con, Vec::new()));
246 | work.push((args, access, terms));
247 | self.succeed(ctx, work, rhs)
248 | }
249 |
250 | fn match_term(&mut self, con: &Constructor, term: &Term) -> Match {
251 | match term {
252 | Term::Pos(tcon, _) if con == tcon => Match::Yes,
253 | Term::Pos(_, _) => Match::No,
254 | Term::Neg(exl) if exl.contains(con) => Match::No,
255 | Term::Neg(exl) if con.span == (exl.len() + 1) => Match::Yes,
256 | Term::Neg(_) => Match::Maybe,
257 | }
258 | }
259 |
260 | fn next_rule(&mut self) -> Option<&(Pattern, RHS)> {
261 | if self.rules_index >= self.rules.len() {
262 | None
263 | } else {
264 | let val = self.rules.get(self.rules_index);
265 |
266 | self.rules_index += 1;
267 |
268 | val
269 | }
270 | }
271 | }
272 |
273 | /// A type constructor.
274 | ///
275 | /// For a boolean, a constructor would have the following values:
276 | ///
277 | /// - name: true or false
278 | /// - arity: 0, as booleans don't take arguments
279 | /// - span: 2, as there are only two constructors (true and false)
280 | ///
281 | /// In a real compiler you'd probably use pointers/IDs to your type data
282 | /// structures instead, but for the sake of keeping things simple we just use a
283 | /// struct that can be cloned.
284 | #[derive(Debug, Eq, PartialEq, Clone)]
285 | pub struct Constructor {
286 | name: String,
287 |
288 | // The number of arguments.
289 | arity: usize,
290 |
291 | // The total number of constructors of the owning type
292 | //
293 | // A span of 0 means the type has an infinite amount of constructors.
294 | span: usize,
295 | }
296 |
297 | /// A user provided pattern to match against an input value.
298 | ///
299 | /// We only provide two types of patterns: constructors, and variables/bindings.
300 | ///
301 | /// In a real compiler you'd probably be using AST nodes instead of dedicated
302 | /// pattern types, and include more cases for specific patterns (e.g. tuple and
303 | /// struct patterns).
304 | #[derive(Debug, Clone)]
305 | pub enum Pattern {
306 | Constructor(Constructor, Vec),
307 | Variable(String),
308 | Field(usize, Box),
309 | Wildcard,
310 | }
311 |
312 | #[derive(Debug, Clone, Eq, PartialEq)]
313 | pub enum Term {
314 | // `Cons` is the top-most constructor, and its components are described by
315 | // the Vec.
316 | //
317 | // The arguments are in reverse order, so the first argument is the last
318 | // value.
319 | Pos(Constructor, Vec),
320 |
321 | // Any term who's top-most constructor is _not_ any of the listed
322 | // constructors.
323 | //
324 | // For a Negative(S), the cardinality of S must be less than the span of
325 | // any constructor in S:
326 | //
327 | // cons.iter().all(|cons| cardinality(s) < span(cons))
328 | //
329 | // Due to static typing, all constructors in S are of the same type, thus
330 | // have the same span.
331 | //
332 | // The constructors are in reverse order, so the first constructor is the
333 | // last value.
334 | Neg(Vec),
335 | }
336 |
337 | impl Term {
338 | fn bottom() -> Term {
339 | Term::Neg(Vec::new())
340 | }
341 |
342 | fn negated(self, con: &Constructor) -> Term {
343 | match self {
344 | Term::Pos(_, _) => self,
345 | Term::Neg(mut nonset) => {
346 | nonset.push(con.clone());
347 | Term::Neg(nonset)
348 | }
349 | }
350 | }
351 | }
352 |
353 | impl Term {
354 | /// Returns a string used to describe this term in an error message.
355 | fn error_string(&self) -> String {
356 | match self {
357 | Term::Pos(cons, args) => {
358 | if args.is_empty() {
359 | cons.name.clone()
360 | } else {
361 | format!(
362 | "{}({})",
363 | cons.name,
364 | args.iter()
365 | .rev()
366 | .map(|v| v.error_string())
367 | .collect::>()
368 | .join(", ")
369 | )
370 | }
371 | }
372 | Term::Neg(_) => "_".to_string(),
373 | }
374 | }
375 | }
376 |
377 | /// The `access` type in the paper.
378 | #[derive(Clone, Debug, Eq, PartialEq)]
379 | pub enum Access {
380 | Root,
381 | Select(usize, Box),
382 | }
383 |
384 | /// The `decision` type in the paper.
385 | #[derive(Debug, Eq, PartialEq, Clone)]
386 | pub enum Decision {
387 | /// A pattern didn't match.
388 | Failure,
389 |
390 | /// A pattern is matched and the right-hand value is to be returned.
391 | Success(RHS),
392 |
393 | /// Checks if a constructor matches the value at the given access path.
394 | IfEq(Access, Constructor, Box, Box),
395 |
396 | /// Checks if any of the given constructors match the value at the given
397 | /// access path.
398 | Switch(Access, Vec<(Constructor, Decision)>, Box),
399 |
400 | /// Bind a value to a variable, then continue matching the rest of the
401 | /// input.
402 | Variable(Access, String, Box),
403 | }
404 |
405 | impl Decision {
406 | /// Replaces a series of nested IfEq nodes for the same access object with a
407 | /// Switch node.
408 | pub fn replace_nested_if(self) -> Decision {
409 | match self {
410 | Decision::IfEq(root, con, ok, fail) => {
411 | let mut cases = vec![(con, *ok)];
412 | let mut fallback = fail;
413 |
414 | loop {
415 | match *fallback {
416 | Decision::IfEq(acc, con, ok, fail) if root == acc => {
417 | fallback = fail;
418 |
419 | cases.push((con, *ok));
420 | }
421 | _ => break,
422 | }
423 | }
424 |
425 | if cases.len() == 1 {
426 | let (con, ok) = cases.pop().unwrap();
427 |
428 | Decision::IfEq(root, con, Box::new(ok), fallback)
429 | } else {
430 | Decision::Switch(root, cases, fallback)
431 | }
432 | }
433 | _ => self,
434 | }
435 | }
436 | }
437 |
438 | #[cfg(test)]
439 | mod tests {
440 | use super::*;
441 |
442 | fn con(name: &str, arity: usize, span: usize) -> Constructor {
443 | Constructor { name: name.to_string(), arity, span }
444 | }
445 |
446 | fn nil() -> Pattern {
447 | Pattern::Constructor(con("nil", 0, 1), Vec::new())
448 | }
449 |
450 | fn tt_con() -> Constructor {
451 | con("true", 0, 2)
452 | }
453 |
454 | fn ff_con() -> Constructor {
455 | con("false", 0, 2)
456 | }
457 |
458 | fn tt() -> Pattern {
459 | Pattern::Constructor(tt_con(), Vec::new())
460 | }
461 |
462 | fn ff() -> Pattern {
463 | Pattern::Constructor(ff_con(), Vec::new())
464 | }
465 |
466 | fn pair(a: Pattern, b: Pattern) -> Pattern {
467 | Pattern::Constructor(con("pair", 2, 1), vec![b, a])
468 | }
469 |
470 | fn var(name: &str) -> Pattern {
471 | Pattern::Variable(name.to_string())
472 | }
473 |
474 | fn if_eq(
475 | acc: Access,
476 | con: Constructor,
477 | ok: Decision,
478 | fail: Decision,
479 | ) -> Decision {
480 | Decision::IfEq(acc, con, Box::new(ok), Box::new(fail))
481 | }
482 |
483 | fn switch(
484 | acc: Access,
485 | cases: Vec<(Constructor, Decision)>,
486 | fallback: Decision,
487 | ) -> Decision {
488 | Decision::Switch(acc, cases, Box::new(fallback))
489 | }
490 |
491 | fn bind(access: Access, name: &str, rest: Decision) -> Decision {
492 | Decision::Variable(access, name.to_string(), Box::new(rest))
493 | }
494 |
495 | fn success(value: &str) -> Decision {
496 | Decision::Success(value.to_string())
497 | }
498 |
499 | fn failure() -> Decision {
500 | Decision::Failure
501 | }
502 |
503 | fn rhs(value: &str) -> String {
504 | value.to_string()
505 | }
506 |
507 | fn obj() -> Access {
508 | Access::Root
509 | }
510 |
511 | fn sel(index: usize, acc: Access) -> Access {
512 | Access::Select(index, Box::new(acc))
513 | }
514 |
515 | fn compile(rules: Vec<(Pattern, RHS)>) -> (Decision, Diagnostics) {
516 | let mut compiler = Compiler::new(rules);
517 | let tree = compiler.compile();
518 |
519 | (tree, compiler.diagnostics)
520 | }
521 |
522 | #[test]
523 | fn test_term_description_error_string() {
524 | let term = Term::Pos(
525 | con("box", 2, 1),
526 | vec![
527 | Term::Neg(vec![con("false", 0, 2)]),
528 | Term::Pos(con("true", 0, 2), Vec::new()),
529 | ],
530 | );
531 |
532 | assert_eq!(term.error_string(), "box(true, _)");
533 | }
534 |
535 | #[test]
536 | fn test_context_reconstruct_term() {
537 | let mut ctx = Context::new();
538 |
539 | ctx.push((
540 | con("baz", 0, 1),
541 | vec![
542 | Term::Neg(vec![con("arg2", 0, 1)]),
543 | Term::Neg(vec![con("arg1", 0, 1)]),
544 | ],
545 | ));
546 |
547 | let work = vec![(
548 | Vec::new(),
549 | Vec::new(),
550 | vec![
551 | Term::Neg(vec![con("work2", 0, 1)]),
552 | Term::Neg(vec![con("work1", 0, 1)]),
553 | ],
554 | )];
555 | let dsc = Term::Neg(vec![con("bar", 0, 1)]);
556 | let new_dsc = ctx.reconstruct_term(dsc, &work);
557 |
558 | assert_eq!(
559 | new_dsc,
560 | Term::Pos(
561 | con("baz", 0, 1),
562 | vec![
563 | Term::Neg(vec![con("work2", 0, 1)]),
564 | Term::Neg(vec![con("work1", 0, 1)]),
565 | Term::Neg(vec![con("bar", 0, 1)]),
566 | Term::Neg(vec![con("arg1", 0, 1)]),
567 | Term::Neg(vec![con("arg2", 0, 1)]),
568 | ]
569 | )
570 | );
571 | }
572 |
573 | #[test]
574 | fn test_context_add_argument_to_last() {
575 | let mut ctx = Context::new();
576 |
577 | ctx.push((
578 | con("baz", 0, 1),
579 | vec![
580 | Term::Neg(vec![con("arg2", 0, 1)]),
581 | Term::Neg(vec![con("arg1", 0, 1)]),
582 | ],
583 | ));
584 |
585 | let term = Term::Neg(vec![con("bar", 0, 1)]);
586 |
587 | ctx.add_argument_to_last(term);
588 |
589 | assert_eq!(
590 | ctx.values,
591 | vec![(
592 | con("baz", 0, 1),
593 | vec![
594 | Term::Neg(vec![con("arg2", 0, 1)]),
595 | Term::Neg(vec![con("arg1", 0, 1)]),
596 | Term::Neg(vec![con("bar", 0, 1)]),
597 | ]
598 | )]
599 | );
600 | }
601 |
602 | #[test]
603 | fn test_match_always_succeeds() {
604 | let (result, _) = compile(vec![(nil(), rhs("true"))]);
605 |
606 | assert_eq!(result, success("true"));
607 | }
608 |
609 | #[test]
610 | fn test_match_always_fails() {
611 | let (result, _) = compile(Vec::new());
612 |
613 | assert_eq!(result, failure());
614 | }
615 |
616 | #[test]
617 | fn test_match_single_pattern() {
618 | let (result, _) =
619 | compile(vec![(tt(), rhs("true")), (ff(), rhs("false"))]);
620 |
621 | assert_eq!(
622 | result,
623 | if_eq(obj(), tt_con(), success("true"), success("false"))
624 | );
625 | }
626 |
627 | #[test]
628 | fn test_match_var() {
629 | let (result, _) = compile(vec![(var("a"), rhs("true"))]);
630 |
631 | assert_eq!(result, bind(obj(), "a", success("true")));
632 | }
633 |
634 | #[test]
635 | fn test_match_field() {
636 | let (result, _) = compile(vec![
637 | (Pattern::Field(42, Box::new(tt())), rhs("foo")),
638 | (Pattern::Field(42, Box::new(var("a"))), rhs("bar")),
639 | ]);
640 |
641 | assert_eq!(
642 | result,
643 | if_eq(
644 | sel(42, obj()),
645 | tt_con(),
646 | success("foo"),
647 | bind(sel(42, obj()), "a", success("bar"))
648 | )
649 | );
650 | }
651 |
652 | #[test]
653 | fn test_match_wildcard() {
654 | let (result, _) = compile(vec![(Pattern::Wildcard, rhs("true"))]);
655 |
656 | assert_eq!(result, success("true"));
657 | }
658 |
659 | #[test]
660 | fn test_match_nested_var() {
661 | let (result, _) = compile(vec![(pair(var("a"), var("b")), rhs("foo"))]);
662 |
663 | assert_eq!(
664 | result,
665 | bind(sel(0, obj()), "a", bind(sel(1, obj()), "b", success("foo")))
666 | );
667 | }
668 |
669 | #[test]
670 | fn test_match_multiple_patterns() {
671 | let (result, diags) = compile(vec![
672 | (tt(), rhs("true")),
673 | (ff(), rhs("false")),
674 | (tt(), rhs("redundant")),
675 | ]);
676 |
677 | // Redundant patterns are ignored on the decision tree. This is also how
678 | // you'd detect redundant patterns: you'd somehow mark every RHS when
679 | // you produce their Success nodes. Any RHS nodes that remain unmarked
680 | // are redundant.
681 | assert_eq!(
682 | result,
683 | if_eq(obj(), tt_con(), success("true"), success("false"))
684 | );
685 |
686 | assert!(diags.reachable.contains(&"true".to_string()));
687 | assert!(diags.reachable.contains(&"false".to_string()));
688 | assert!(!diags.reachable.contains(&"redundant".to_string()));
689 | }
690 |
691 | #[test]
692 | fn test_nonexhaustive_match() {
693 | let (result, diags) = compile(vec![(tt(), rhs("true"))]);
694 |
695 | assert_eq!(result, if_eq(obj(), tt_con(), success("true"), failure()));
696 | assert_eq!(diags.messages, vec!["Missing pattern: _".to_string()]);
697 | }
698 |
699 | #[test]
700 | fn test_nonexhaustive_match_from_paper() {
701 | let green = Pattern::Constructor(con("green", 0, 3), Vec::new());
702 | let (result, diags) = compile(vec![
703 | (pair(tt(), green.clone()), rhs("111")),
704 | (pair(ff(), green.clone()), rhs("222")),
705 | ]);
706 |
707 | assert_eq!(
708 | result,
709 | if_eq(
710 | sel(0, obj()),
711 | tt_con(),
712 | if_eq(
713 | sel(1, obj()),
714 | con("green", 0, 3),
715 | success("111"),
716 | failure()
717 | ),
718 | if_eq(
719 | sel(1, obj()),
720 | con("green", 0, 3),
721 | success("222"),
722 | failure()
723 | )
724 | )
725 | );
726 |
727 | assert_eq!(
728 | diags.messages,
729 | vec![
730 | "Missing pattern: pair(true, _)".to_string(),
731 | "Missing pattern: pair(false, _)".to_string()
732 | ]
733 | );
734 | }
735 |
736 | #[test]
737 | fn test_nested_match() {
738 | let (result, _) = compile(vec![
739 | (pair(tt(), tt()), rhs("foo")),
740 | (pair(tt(), ff()), rhs("bar")),
741 | (pair(ff(), ff()), rhs("baz")),
742 | (pair(ff(), tt()), rhs("quix")),
743 | ]);
744 |
745 | assert_eq!(
746 | result,
747 | if_eq(
748 | sel(0, obj()),
749 | tt_con(),
750 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
751 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
752 | )
753 | );
754 | }
755 |
756 | #[test]
757 | fn test_match_with_switch() {
758 | let a = con("a", 0, 4);
759 | let b = con("b", 0, 4);
760 | let c = con("c", 0, 4);
761 | let d = con("d", 0, 4);
762 | let a_pat = Pattern::Constructor(a.clone(), Vec::new());
763 | let b_pat = Pattern::Constructor(b.clone(), Vec::new());
764 | let c_pat = Pattern::Constructor(c.clone(), Vec::new());
765 | let d_pat = Pattern::Constructor(d.clone(), Vec::new());
766 | let (result, _) = compile(vec![
767 | ((a_pat, rhs("a"))),
768 | ((b_pat, rhs("b"))),
769 | ((c_pat, rhs("c"))),
770 | ((d_pat, rhs("d"))),
771 | ]);
772 |
773 | assert_eq!(
774 | result.replace_nested_if(),
775 | switch(
776 | obj(),
777 | vec![(a, success("a")), (b, success("b")), (c, success("c"))],
778 | success("d")
779 | )
780 | );
781 | }
782 |
783 | #[test]
784 | fn test_nested_match_without_switch() {
785 | let (result, _) = compile(vec![
786 | (pair(tt(), tt()), rhs("foo")),
787 | (pair(tt(), ff()), rhs("bar")),
788 | (pair(ff(), ff()), rhs("baz")),
789 | (pair(ff(), tt()), rhs("quix")),
790 | ]);
791 |
792 | // This doesn't produce a switch, as the nested patterns don't test the
793 | // same value.
794 | assert_eq!(
795 | result.replace_nested_if(),
796 | if_eq(
797 | sel(0, obj()),
798 | tt_con(),
799 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
800 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
801 | )
802 | );
803 | }
804 |
805 | #[test]
806 | fn test_match_with_args() {
807 | let some = con("some", 3, 2);
808 | let (result, _) = compile(vec![
809 | (
810 | Pattern::Constructor(some.clone(), vec![ff(), tt(), tt()]),
811 | rhs("foo"),
812 | ),
813 | (var("x"), rhs("bar")),
814 | ]);
815 |
816 | assert_eq!(
817 | result,
818 | if_eq(
819 | obj(),
820 | some,
821 | if_eq(
822 | sel(0, obj()),
823 | tt_con(),
824 | if_eq(
825 | sel(1, obj()),
826 | tt_con(),
827 | if_eq(
828 | sel(2, obj()),
829 | ff_con(),
830 | success("foo"),
831 | bind(obj(), "x", success("bar")),
832 | ),
833 | bind(obj(), "x", success("bar")),
834 | ),
835 | bind(obj(), "x", success("bar")),
836 | ),
837 | bind(obj(), "x", success("bar")),
838 | )
839 | );
840 | }
841 |
842 | #[test]
843 | fn test_match_with_infinite_span() {
844 | let (result, diag) = compile(vec![(
845 | Pattern::Constructor(con("int", 0, 0), Vec::new()),
846 | rhs("foo"),
847 | )]);
848 |
849 | assert_eq!(
850 | result,
851 | if_eq(obj(), con("int", 0, 0), success("foo"), failure())
852 | );
853 | assert_eq!(diag.messages, vec!["Missing pattern: _"]);
854 | }
855 |
856 | #[test]
857 | fn test_match_nonexhaustive_with_args() {
858 | let some = con("some", 3, 2);
859 | let (result, diags) = compile(vec![(
860 | Pattern::Constructor(some.clone(), vec![ff(), ff(), tt()]),
861 | rhs("foo"),
862 | )]);
863 |
864 | assert_eq!(
865 | result,
866 | if_eq(
867 | obj(),
868 | some,
869 | if_eq(
870 | sel(0, obj()),
871 | tt_con(),
872 | if_eq(
873 | sel(1, obj()),
874 | ff_con(),
875 | if_eq(
876 | sel(2, obj()),
877 | ff_con(),
878 | success("foo"),
879 | failure()
880 | ),
881 | failure()
882 | ),
883 | failure()
884 | ),
885 | failure()
886 | )
887 | );
888 |
889 | assert_eq!(
890 | diags.messages,
891 | vec![
892 | "Missing pattern: some(true, false, _)".to_string(),
893 | "Missing pattern: some(true, _, _)".to_string(),
894 | "Missing pattern: some(_, _, _)".to_string(),
895 | "Missing pattern: _".to_string(),
896 | ]
897 | );
898 | }
899 | }
900 |
--------------------------------------------------------------------------------
/sestoft1996/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![allow(clippy::too_many_arguments)]
2 |
3 | pub mod idiomatic;
4 | pub mod raw;
5 |
--------------------------------------------------------------------------------
/sestoft1996/src/raw.rs:
--------------------------------------------------------------------------------
1 | // This implementation is a more or less 1:1 port of the SML implementation
2 | // provided in the paper, with only a few changes made to make things work on
3 | // Rust. For example, the SML implementation assumes multiple ownership of
4 | // certain values, which isn't allowed in Rust. For the sake of simplicity, we
5 | // just clone values in this case.
6 | //
7 | // This implementation doesn't use the memoization approach briefly mentioned in
8 | // section 7.5 of the paper. This requires multiple ownership of the tree nodes,
9 | // or a different way of building the tree/graph (e.g. using IDs). To keep
10 | // things simple, we skip over this.
11 | //
12 | // Because this implementation is more or less a direct translation, it's _not_
13 | // idiomatic Rust. An idiomatic implementation is provided separately.
14 | //
15 | // The Moscow ML compiler uses hash consing and a DAG as discussed in section
16 | // 7.5 of the paper.
17 | use std::collections::HashSet;
18 | use std::fmt;
19 | use std::rc::Rc;
20 |
21 | /// An immutable linked list.
22 | ///
23 | /// The algorithm as presented in the paper makes use of and requires immutable
24 | /// lists. For example, when compiling the `IfEq` nodes it compiles two
25 | /// different branches, but assumes work start off with the same set of rules,
26 | /// `work` values, etc. Since we're trying to stay as close to the paper as
27 | /// possible, we also follow the use of immutable data types.
28 | ///
29 | /// Like the rest of this implementation we're focusing on keeping things as
30 | /// simple as is reasonable, rather than making the implementation efficient.
31 | #[derive(Eq, PartialEq)]
32 | struct Node {
33 | value: T,
34 | next: Option>>,
35 | }
36 |
37 | #[derive(Clone, Eq, PartialEq)]
38 | pub struct List {
39 | head: Option>>,
40 | len: usize,
41 | }
42 |
43 | impl List {
44 | fn new() -> List {
45 | List { head: None, len: 0 }
46 | }
47 |
48 | /// Returns a new list starting with the given value.
49 | fn add(&self, value: T) -> List {
50 | List {
51 | head: Some(Rc::new(Node { value, next: self.head.clone() })),
52 | len: self.len + 1,
53 | }
54 | }
55 |
56 | /// Splits a list into the head and a list of the nodes that follow it.
57 | fn split(&self) -> (Option<&T>, List) {
58 | if let Some(n) = self.head.as_ref() {
59 | (Some(&n.value), List { head: n.next.clone(), len: self.len - 1 })
60 | } else {
61 | (None, List { head: self.head.clone(), len: self.len })
62 | }
63 | }
64 |
65 | fn iter(&self) -> ListIter {
66 | ListIter { node: self.head.as_ref() }
67 | }
68 |
69 | fn len(&self) -> usize {
70 | self.len
71 | }
72 |
73 | fn is_empty(&self) -> bool {
74 | self.head.is_none()
75 | }
76 | }
77 |
78 | impl List {
79 | fn contains(&self, value: &T) -> bool {
80 | self.iter().any(|v| v == value)
81 | }
82 | }
83 |
84 | impl List {
85 | /// Merges `self` and `other`.
86 | fn merge(&self, other: List) -> List {
87 | let mut new_list = List::new();
88 |
89 | for value in self.iter().chain(other.iter()) {
90 | new_list = new_list.add(value.clone());
91 | }
92 |
93 | new_list.rev()
94 | }
95 |
96 | /// Returns a new list with the values in reverse order.
97 | fn rev(&self) -> List {
98 | let mut new_list = List::new();
99 |
100 | for v in self.iter() {
101 | new_list = new_list.add(v.clone());
102 | }
103 |
104 | new_list
105 | }
106 | }
107 |
108 | impl fmt::Debug for List {
109 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
110 | f.debug_list().entries(self.iter()).finish()
111 | }
112 | }
113 |
114 | /// An iterator over the values in an immutable list.
115 | struct ListIter<'a, T> {
116 | node: Option<&'a Rc>>,
117 | }
118 |
119 | impl<'a, T> Iterator for ListIter<'a, T> {
120 | type Item = &'a T;
121 |
122 | fn next(&mut self) -> Option {
123 | if let Some(node) = self.node.take() {
124 | self.node = node.next.as_ref();
125 |
126 | Some(&node.value)
127 | } else {
128 | None
129 | }
130 | }
131 | }
132 |
133 | /// The type used for storing diagnostic messages.
134 | pub struct Diagnostics {
135 | /// The diagnostic messages produced.
136 | ///
137 | /// In a real compiler this would include more than just a message, such as
138 | /// the line and numbers.
139 | messages: Vec,
140 |
141 | /// The right-hand values (= the code you'd run upon a match) that have been
142 | /// processed.
143 | ///
144 | /// If a value isn't included in this set it means it and its pattern are
145 | /// redundant.
146 | ///
147 | /// In a real compiler you'd probably mark AST nodes directly. In our case
148 | /// the right-hand values are just simple strings, so we use a set instead.
149 | reachable: HashSet,
150 | }
151 |
152 | /// The `con` (= constructor) type in the paper.
153 | ///
154 | /// For a boolean, a constructor would have the following values:
155 | ///
156 | /// - name: true or false
157 | /// - arity: 0, as booleans don't take arguments
158 | /// - span: 2, as there are only two constructors (true and false)
159 | ///
160 | /// In a real compiler you'd probably use pointers/IDs to your type data
161 | /// structures instead, but for the sake of keeping things simple we just use a
162 | /// struct that can be cloned.
163 | #[derive(Debug, Eq, PartialEq, Clone)]
164 | pub struct Con {
165 | name: String,
166 |
167 | // The number of arguments.
168 | arity: usize,
169 |
170 | // The total number of constructors of the owning type
171 | //
172 | // A span of 0 means the type has an infinite amount of constructors.
173 | span: usize,
174 | }
175 |
176 | /// A user provided pattern to match against an input value.
177 | ///
178 | /// We only provide two types of patterns: constructors, and variables/bindings.
179 | ///
180 | /// In a real compiler you'd probably be using AST nodes instead of dedicated
181 | /// pattern types, and include more cases for specific patterns (e.g. tuple and
182 | /// struct patterns).
183 | #[derive(Debug, Clone)]
184 | pub enum Pattern {
185 | Cons(Con, List),
186 | Var(String),
187 | }
188 |
189 | /// The `termd` type from the paper.
190 | #[derive(Debug, Clone, Eq, PartialEq)]
191 | pub enum TermDesc {
192 | // `Cons` is the top-most constructor, and its components are described by
193 | // the Vec.
194 | Pos(Con, List),
195 |
196 | // Any term who's top-most constructor is _not_ any of the listed
197 | // constructors.
198 | //
199 | // For a Negative(S), the cardinality of S must be less than the span of
200 | // any constructor in S:
201 | //
202 | // cons.iter().all(|cons| cardinality(s) < span(cons))
203 | //
204 | // Due to static typing, all constructors in S are of the same type, thus
205 | // have the same span.
206 | Neg(List),
207 | }
208 |
209 | impl TermDesc {
210 | /// Returns a string used to describe this term in an error message.
211 | ///
212 | /// In a real compiler you'd do the following:
213 | ///
214 | /// For a Pos, just display the pattern/type/whatever name
215 | ///
216 | /// For a Neg(list), obtain all possible values from the constructor, ignore
217 | /// those in "list", then produce a name using the remaining values. So if
218 | /// "list" is `[red]`, and the possible values are `[red, green, blue]`, the
219 | /// returned string could be `green | blue`. If this is nested inside a
220 | /// `Pos("tuple", ...)` node you'd end up with something like
221 | /// `tuple(green | blue)`.
222 | ///
223 | /// For the sake of simplicity we just return `_` for a Neg.
224 | fn error_string(&self) -> String {
225 | match self {
226 | TermDesc::Pos(cons, args) => {
227 | if args.is_empty() {
228 | cons.name.clone()
229 | } else {
230 | format!(
231 | "{}({})",
232 | cons.name,
233 | args.iter()
234 | .map(|v| v.error_string())
235 | .collect::>()
236 | .join(", ")
237 | )
238 | }
239 | }
240 | TermDesc::Neg(_) => "_".to_string(),
241 | }
242 | }
243 | }
244 |
245 | /// The `access` type in the paper.
246 | #[derive(Clone, Debug, Eq, PartialEq)]
247 | pub enum Access {
248 | Obj,
249 | Sel(usize, Box),
250 | }
251 |
252 | /// The `decision` type in the paper.
253 | #[derive(Debug, Eq, PartialEq, Clone)]
254 | pub enum Decision {
255 | /// A pattern didn't match.
256 | Failure,
257 |
258 | /// A pattern is matched and the right-hand value is to be returned.
259 | Success(RHS),
260 |
261 | /// Checks if a constructor matches the value at the given access path.
262 | ///
263 | /// The members are as follows:
264 | ///
265 | /// 1. The value to test against
266 | /// 2. The pattern/value to match against
267 | /// 3. The path to take upon a match
268 | /// 4. The path to take upon a failure
269 | ///
270 | /// A node like this:
271 | ///
272 | /// IfEq(Sel(0, Obj), x, ok, err)
273 | ///
274 | /// Translates to roughly the following pseudo code:
275 | ///
276 | /// if obj.0 is x {
277 | /// ok
278 | /// } else {
279 | /// err
280 | /// }
281 | IfEq(Access, Con, Box, Box),
282 |
283 | /// Checks if any of the given constructors match the value at the given
284 | /// access path.
285 | ///
286 | /// The members are as follows:
287 | ///
288 | /// 1. The value to test against
289 | /// 2. The list of constructors to test and their corresponding decisions
290 | /// 3. A fallback decision in case no patterns match
291 | ///
292 | /// The fallback is needed because given a type with a span of N, IfEq nodes
293 | /// only test N-1 constructors, as the last possible constructor is
294 | /// implicitly assumed in the IfEq node's "else" body. That is, IfEq tests
295 | /// are like this:
296 | ///
297 | /// if value is green {
298 | /// ...
299 | /// } else {
300 | /// if value is red {
301 | /// ...
302 | /// } else {
303 | /// ..
304 | /// }
305 | /// }
306 | ///
307 | /// And not like this:
308 | ///
309 | /// if value is green {
310 | /// ...
311 | /// } else if value is red {
312 | /// ...
313 | /// } else if value is blue {
314 | /// ...
315 | /// }
316 | ///
317 | /// A real compiler may have to somehow "lift" the fallback into a
318 | /// switch/jump case.
319 | Switch(Access, List<(Con, Decision)>, Box),
320 | }
321 |
322 | /// The result of the `staticmatch` (or in our case `static_match`) function.
323 | #[derive(Debug)]
324 | enum StaticMatch {
325 | Yes,
326 | No,
327 | Maybe,
328 | }
329 |
330 | /// `type context = (con * termd list) list` in the paper.
331 | type Context = List<(Con, List)>;
332 |
333 | /// The work stack as used in the paper.
334 | ///
335 | /// The paper uses a list of triple lists, removing the need for some append
336 | /// operations. This is a bit annoying to work with in Rust (we have to unwrap()
337 | /// in some places), but again we're trying to stay as close to the paper as
338 | /// possible.
339 | ///
340 | /// We use a type alias here so we don't have to re-type this type name in the
341 | /// various places that it's used.
342 | type Work = List<(List, List, List)>;
343 |
344 | /// The type of the right-hand side of a case (i.e. the code to run).
345 | ///
346 | /// For the sake of simplicity we just use a String here. In a real compiler
347 | /// this would probably be an AST node or another sort of IR to run upon a
348 | /// match.
349 | pub type RHS = String;
350 |
351 | /// The `addneg` function in the paper.
352 | fn addneg(dsc: TermDesc, con: Con) -> TermDesc {
353 | match dsc {
354 | // The paper introduces this function as a non-exhaustive function. The
355 | // implementation in the Moscow ML compiler just returns the term when
356 | // it's a Pos, so we do the same.
357 | TermDesc::Pos(_, _) => dsc,
358 | TermDesc::Neg(nonset) => TermDesc::Neg(nonset.add(con)),
359 | }
360 | }
361 |
362 | /// The `staticmatch` function in the paper.
363 | fn staticmatch(pcon: &Con, term: &TermDesc) -> StaticMatch {
364 | match term {
365 | TermDesc::Pos(scon, _) => {
366 | if pcon == scon {
367 | StaticMatch::Yes
368 | } else {
369 | StaticMatch::No
370 | }
371 | }
372 | TermDesc::Neg(excluded) => {
373 | if excluded.contains(pcon) {
374 | StaticMatch::No
375 | } else if pcon.span == (excluded.len() + 1) {
376 | // The way this works is as follows:
377 | //
378 | // A boolean has a span of two, as it has two constructors (true
379 | // and false).
380 | //
381 | // The `if` above means we determined our constructor IS NOT in
382 | // the deny list. Due to static typing, our list can't
383 | // contain unrelated constructors (e.g. an ADT constructor).
384 | //
385 | // Thus, if the length of the deny list is one less than the
386 | // span of our type, we know for a fact our constructor matches
387 | // the remaining constructor.
388 | //
389 | // In other words: we know we are NOT A, B, and C, and the only
390 | // remaining option is D. Thus, we match D.
391 | StaticMatch::Yes
392 | } else {
393 | StaticMatch::Maybe
394 | }
395 | }
396 | }
397 | }
398 |
399 | /// The equivalent of `List.tabulate` as used in the paper.
400 | ///
401 | /// This function is kind of pointless in Rust as we could just use map(), but
402 | /// we try to stay as close to the paper as possible in this implementation.
403 | fn tabulate T>(length: usize, func: F) -> List {
404 | let mut list = List::new();
405 |
406 | for val in (0..length).rev() {
407 | list = list.add(func(val));
408 | }
409 |
410 | list
411 | }
412 |
413 | fn args T>(pcon: &Con, func: F) -> List {
414 | tabulate(pcon.arity, func)
415 | }
416 |
417 | fn getdargs(pcon: &Con, term: TermDesc) -> List {
418 | match term {
419 | TermDesc::Pos(_, dargs) => dargs,
420 | TermDesc::Neg(_) => {
421 | tabulate(pcon.arity, |_| TermDesc::Neg(List::new()))
422 | }
423 | }
424 | }
425 |
426 | fn getoargs(pcon: &Con, acc: Access) -> List {
427 | // The paper uses `i+1`, presumably because humans use "1" to address the
428 | // first element (or maybe this is an SML thing?). Unfortunately, this isn't
429 | // clarified in the paper. Since it doesn't seem to actually matter, and
430 | // basically everyting is 0-indexed, we drop the +1 here.
431 | args(pcon, |i| Access::Sel(i, Box::new(acc.clone())))
432 | }
433 |
434 | fn augment(ctx: Context, dsc: TermDesc) -> Context {
435 | let (val, rest) = ctx.split();
436 |
437 | if let Some((con, args)) = val {
438 | rest.add((con.clone(), args.add(dsc)))
439 | } else {
440 | rest
441 | }
442 | }
443 |
444 | fn norm(ctx: Context) -> Context {
445 | let (val, rest) = ctx.split();
446 |
447 | if let Some((con, args)) = val {
448 | augment(rest, TermDesc::Pos(con.clone(), args.rev()))
449 | } else {
450 | rest
451 | }
452 | }
453 |
454 | fn builddsc(ctx: Context, dsc: TermDesc, work: Work) -> TermDesc {
455 | if let (Some((con, args)), rest) = ctx.split() {
456 | let (job, workr) = work.split();
457 | let (_, _, dargs) = job.unwrap();
458 |
459 | // The paper uses the following code for this:
460 | //
461 | // rev args @ (dsc :: dargs)
462 | //
463 | // SML parses this as follows:
464 | //
465 | // (rev args) @ (dsc :: dargs)
466 | //
467 | // That is: it first reverses `args`, then appends the result of
468 | // `(dsc :: dargs)` to it. If you were to _first_ merge the values and
469 | // then reverse, you'd get incorrect decision trees. Unfortunately, I
470 | // ran into exactly that bug, and it took me a few hours to figure out.
471 | // And this is why functions with arguments should use parentheses and
472 | // commas :)
473 | let new_dsc =
474 | TermDesc::Pos(con.clone(), args.rev().merge(dargs.add(dsc)));
475 |
476 | builddsc(rest, new_dsc, workr)
477 | } else {
478 | dsc
479 | }
480 | }
481 |
482 | fn fail(
483 | dsc: TermDesc,
484 | rules: List<(Pattern, RHS)>,
485 | diags: &mut Diagnostics,
486 | ) -> Decision {
487 | if let (Some((pat1, rhs1)), rulesr) = rules.split() {
488 | matches(
489 | pat1.clone(),
490 | Access::Obj,
491 | dsc,
492 | List::new(),
493 | List::new(),
494 | rhs1.clone(),
495 | rulesr,
496 | diags,
497 | )
498 | } else {
499 | diags.messages.push(format!("Missing pattern: {}", dsc.error_string()));
500 | Decision::Failure
501 | }
502 | }
503 |
504 | fn succeed(
505 | ctx: Context,
506 | work: Work,
507 | rhs: RHS,
508 | rules: List<(Pattern, RHS)>,
509 | diags: &mut Diagnostics,
510 | ) -> Decision {
511 | if let (Some((pats, accs, dscs)), workr) = work.split() {
512 | if pats.is_empty() && accs.is_empty() && dscs.is_empty() {
513 | succeed(norm(ctx), workr, rhs, rules, diags)
514 | } else {
515 | let (pat1, patr) = pats.split();
516 | let (obj1, objr) = accs.split();
517 | let (dsc1, dscr) = dscs.split();
518 |
519 | matches(
520 | pat1.unwrap().clone(),
521 | obj1.unwrap().clone(),
522 | dsc1.unwrap().clone(),
523 | ctx,
524 | workr.add((patr, objr, dscr)),
525 | rhs,
526 | rules,
527 | diags,
528 | )
529 | }
530 | } else {
531 | diags.reachable.insert(rhs.clone());
532 | Decision::Success(rhs)
533 | }
534 | }
535 |
536 | /// This corresponds to the inner function `succeed'` in the paper.
537 | fn match_succeed(
538 | pcon: Con,
539 | pargs: List,
540 | obj: Access,
541 | dsc: TermDesc,
542 | ctx: Context,
543 | work: Work,
544 | rhs: RHS,
545 | rules: List<(Pattern, RHS)>,
546 | diags: &mut Diagnostics,
547 | ) -> Decision {
548 | let oargs = getoargs(&pcon, obj);
549 | let dargs = getdargs(&pcon, dsc);
550 |
551 | succeed(
552 | ctx.add((pcon, List::new())),
553 | work.add((pargs, oargs, dargs)),
554 | rhs,
555 | rules,
556 | diags,
557 | )
558 | }
559 |
560 | /// This corresponds to the inner function `fail'` in the paper.
561 | fn match_fail(
562 | newdsc: TermDesc,
563 | ctx: Context,
564 | work: Work,
565 | rules: List<(Pattern, RHS)>,
566 | diags: &mut Diagnostics,
567 | ) -> Decision {
568 | fail(builddsc(ctx, newdsc, work), rules, diags)
569 | }
570 |
571 | fn matches(
572 | pat1: Pattern,
573 | obj: Access,
574 | dsc: TermDesc,
575 | ctx: Context,
576 | work: Work,
577 | rhs: RHS,
578 | rules: List<(Pattern, RHS)>,
579 | diags: &mut Diagnostics,
580 | ) -> Decision {
581 | match pat1 {
582 | Pattern::Var(_) => succeed(augment(ctx, dsc), work, rhs, rules, diags),
583 | Pattern::Cons(pcon, pargs) => match staticmatch(&pcon, &dsc) {
584 | StaticMatch::Yes => match_succeed(
585 | pcon, pargs, obj, dsc, ctx, work, rhs, rules, diags,
586 | ),
587 | StaticMatch::No => match_fail(dsc, ctx, work, rules, diags),
588 | StaticMatch::Maybe => {
589 | // In the paper the equivalent code makes two assumptions that
590 | // don't work in Rust:
591 | //
592 | // 1. Certain values can have multiple owners (e.g. the `dsc`
593 | // value is shared between functions).
594 | // 2. When building the subtree for a matched value, the
595 | // algorithm expects that variables such as `work` and
596 | // `rules` _are not_ modified in place. If they are,
597 | // generating the subtree for an unmatched value produces
598 | // incorrect results.
599 | //
600 | // In case of shared ownership we just clone the values. In a
601 | // real compiler that probably wouldn't work very well, but for
602 | // the sake of this implementation it's good enough.
603 | Decision::IfEq(
604 | obj.clone(),
605 | pcon.clone(),
606 | Box::new(match_succeed(
607 | pcon.clone(),
608 | pargs,
609 | obj,
610 | dsc.clone(),
611 | ctx.clone(),
612 | work.clone(),
613 | rhs,
614 | rules.clone(),
615 | diags,
616 | )),
617 | Box::new(match_fail(
618 | addneg(dsc, pcon),
619 | ctx,
620 | work,
621 | rules,
622 | diags,
623 | )),
624 | )
625 | }
626 | },
627 | }
628 | }
629 |
630 | /// Recursively collects cases for a Switch node.
631 | ///
632 | /// This is based on the `collect` function as found in the Moscow ML compiler.
633 | fn collect(
634 | root_acc: &Access,
635 | cases: List<(Con, Decision)>,
636 | decision: Decision,
637 | ) -> (List<(Con, Decision)>, Decision) {
638 | match decision {
639 | Decision::IfEq(acc, con, ok, fail) if root_acc == &acc => {
640 | let (cases, dec) = collect(root_acc, cases, *fail);
641 |
642 | // We add our case _after_ recursing, ensuring the order of values
643 | // in the list is the same as the order of matches. If we were to
644 | // add _before_ recursing, the list would be in reverse order.
645 | (cases.add((con, *ok)), dec)
646 | }
647 | _ => (cases, decision),
648 | }
649 | }
650 |
651 | /// Replacing a series of nested IfEq nodes for the same access object with a
652 | /// Switch node.
653 | pub fn switchify(tree: Decision) -> Decision {
654 | match tree {
655 | Decision::IfEq(acc, con, ok, fail) => {
656 | let (cases, fallback) = collect(&acc, List::new(), *fail);
657 |
658 | if cases.is_empty() {
659 | Decision::IfEq(acc, con, ok, Box::new(fallback))
660 | } else {
661 | Decision::Switch(acc, cases.add((con, *ok)), Box::new(fallback))
662 | }
663 | }
664 | _ => tree,
665 | }
666 | }
667 |
668 | /// Compiles a list of rules into a decision tree.
669 | pub fn compile(rules: List<(Pattern, RHS)>) -> (Decision, Diagnostics) {
670 | let mut diags =
671 | Diagnostics { messages: Vec::new(), reachable: HashSet::new() };
672 |
673 | (fail(TermDesc::Neg(List::new()), rules, &mut diags), diags)
674 | }
675 |
676 | #[cfg(test)]
677 | mod tests {
678 | use super::*;
679 |
680 | /// A macro for creating a linked list.
681 | ///
682 | /// Rust has no (linked) list literals, so we use this macro instead.
683 | /// Basically whenever you have the SML expression `[a; b; c]`, you'd
684 | /// instead use `list![a, b, c]`.
685 | ///
686 | /// When creating a list using this macro, the values are added to the end
687 | /// of the list.
688 | macro_rules! list {
689 | ($($value: expr),*$(,)?) => {{
690 | let temp = vec![$($value),*];
691 | let mut list = List::new();
692 |
693 | for val in temp.into_iter().rev() {
694 | list = list.add(val);
695 | }
696 |
697 | list
698 | }}
699 | }
700 |
701 | fn con(name: &str, arity: usize, span: usize) -> Con {
702 | Con { name: name.to_string(), arity, span }
703 | }
704 |
705 | fn nil() -> Pattern {
706 | Pattern::Cons(con("nil", 0, 1), List::new())
707 | }
708 |
709 | fn tt_con() -> Con {
710 | con("true", 0, 2)
711 | }
712 |
713 | fn ff_con() -> Con {
714 | con("false", 0, 2)
715 | }
716 |
717 | fn tt() -> Pattern {
718 | Pattern::Cons(tt_con(), List::new())
719 | }
720 |
721 | fn ff() -> Pattern {
722 | Pattern::Cons(ff_con(), List::new())
723 | }
724 |
725 | fn pair(a: Pattern, b: Pattern) -> Pattern {
726 | Pattern::Cons(con("pair", 2, 1), list![a, b])
727 | }
728 |
729 | fn var(name: &str) -> Pattern {
730 | Pattern::Var(name.to_string())
731 | }
732 |
733 | fn if_eq(acc: Access, con: Con, ok: Decision, fail: Decision) -> Decision {
734 | Decision::IfEq(acc, con, Box::new(ok), Box::new(fail))
735 | }
736 |
737 | fn switch(
738 | acc: Access,
739 | cases: List<(Con, Decision)>,
740 | fallback: Decision,
741 | ) -> Decision {
742 | Decision::Switch(acc, cases, Box::new(fallback))
743 | }
744 |
745 | fn success(value: &str) -> Decision {
746 | Decision::Success(value.to_string())
747 | }
748 |
749 | fn failure() -> Decision {
750 | Decision::Failure
751 | }
752 |
753 | fn rhs(value: &str) -> String {
754 | value.to_string()
755 | }
756 |
757 | fn obj() -> Access {
758 | Access::Obj
759 | }
760 |
761 | fn sel(index: usize, acc: Access) -> Access {
762 | Access::Sel(index, Box::new(acc))
763 | }
764 |
765 | #[test]
766 | fn test_list_push_pop() {
767 | let list1 = List::new();
768 | let list2 = list1.add(10);
769 | let list3 = list2.add(20);
770 |
771 | assert!(list1.head.is_none());
772 | assert!(list2.head.is_some());
773 | assert!(list3.head.is_some());
774 |
775 | assert_eq!(list2.split().0, Some(&10));
776 | assert_eq!(list2.split().0, Some(&10));
777 | assert_eq!(list3.split().0, Some(&20));
778 | }
779 |
780 | #[test]
781 | fn test_list_rev() {
782 | let list1 = list![3, 2, 1];
783 | let list2 = list1.rev();
784 |
785 | assert_eq!(list1.iter().collect::>(), vec![&3, &2, &1]);
786 | assert_eq!(list2.iter().collect::>(), vec![&1, &2, &3]);
787 | }
788 |
789 | #[test]
790 | fn test_list_rev_and_merge() {
791 | let list1 = list![3, 2, 1];
792 | let list2 = list![4];
793 | let list3 = list1.rev().merge(list2.add(10));
794 |
795 | assert_eq!(list3.iter().collect::>(), vec![&1, &2, &3, &10, &4]);
796 | }
797 |
798 | #[test]
799 | fn test_list_merge() {
800 | let list1 = list![1, 2];
801 | let list2 = list![3, 4];
802 | let list3 = list1.merge(list2);
803 |
804 | assert_eq!(list3.iter().collect::>(), vec![&1, &2, &3, &4]);
805 | }
806 |
807 | #[test]
808 | fn test_term_desc_error_string() {
809 | let term = TermDesc::Pos(
810 | con("box", 2, 1),
811 | list![
812 | TermDesc::Pos(con("true", 0, 2), List::new()),
813 | TermDesc::Neg(list![con("false", 0, 2)])
814 | ],
815 | );
816 |
817 | assert_eq!(term.error_string(), "box(true, _)");
818 | }
819 |
820 | #[test]
821 | fn test_tabulate() {
822 | let vals = tabulate(3, |v| v);
823 |
824 | assert_eq!(vals.iter().collect::>(), vec![&0, &1, &2]);
825 | }
826 |
827 | #[test]
828 | fn test_args() {
829 | let con = con("box", 2, 1);
830 | let vals = args(&con, |v| v);
831 |
832 | assert_eq!(vals.iter().collect::>(), vec![&0, &1]);
833 | }
834 |
835 | #[test]
836 | fn test_getdargs_with_pos_term() {
837 | let con = con("box", 2, 1);
838 | let term =
839 | TermDesc::Pos(con.clone(), list![TermDesc::Neg(List::new())]);
840 | let args = getdargs(&con, term);
841 | let arg = args.iter().next();
842 |
843 | assert!(matches!(arg, Some(TermDesc::Neg(_))));
844 | }
845 |
846 | #[test]
847 | fn test_getdargs_with_neg_term() {
848 | let con = con("box", 2, 1);
849 | let term = TermDesc::Neg(List::new());
850 | let args = getdargs(&con, term);
851 | let mut iter = args.iter();
852 |
853 | assert!(matches!(iter.next(), Some(TermDesc::Neg(_))));
854 | assert!(matches!(iter.next(), Some(TermDesc::Neg(_))));
855 | }
856 |
857 | #[test]
858 | fn test_getoargs() {
859 | let con = con("box", 2, 1);
860 | let acc = sel(42, obj());
861 | let args = getoargs(&con, acc);
862 |
863 | assert_eq!(
864 | args.iter().collect::>(),
865 | vec![&sel(0, sel(42, obj())), &sel(1, sel(42, obj()))]
866 | );
867 | }
868 |
869 | #[test]
870 | fn test_builddsc() {
871 | let ctx = list![(
872 | con("baz", 0, 1),
873 | list![
874 | TermDesc::Neg(list![con("arg1", 0, 1)]),
875 | TermDesc::Neg(list![con("arg2", 0, 1)]),
876 | ]
877 | )];
878 | let work = list![(
879 | List::new(),
880 | List::new(),
881 | list![
882 | TermDesc::Neg(list![con("work1", 0, 1)]),
883 | TermDesc::Neg(list![con("work2", 0, 1)])
884 | ]
885 | )];
886 | let dsc = TermDesc::Neg(list![con("bar", 0, 1)]);
887 | let new_dsc = builddsc(ctx, dsc, work);
888 |
889 | assert_eq!(
890 | new_dsc,
891 | TermDesc::Pos(
892 | con("baz", 0, 1),
893 | list![
894 | TermDesc::Neg(list![con("arg2", 0, 1)]),
895 | TermDesc::Neg(list![con("arg1", 0, 1)]),
896 | TermDesc::Neg(list![con("bar", 0, 1)]),
897 | TermDesc::Neg(list![con("work1", 0, 1)]),
898 | TermDesc::Neg(list![con("work2", 0, 1)]),
899 | ]
900 | )
901 | );
902 | }
903 |
904 | #[test]
905 | fn test_augment() {
906 | let ctx = list![(
907 | con("baz", 0, 1),
908 | list![
909 | TermDesc::Neg(list![con("arg1", 0, 1)]),
910 | TermDesc::Neg(list![con("arg2", 0, 1)]),
911 | ]
912 | )];
913 |
914 | let dsc = TermDesc::Neg(list![con("bar", 0, 1)]);
915 | let new_ctx = augment(ctx, dsc);
916 |
917 | assert_eq!(
918 | new_ctx,
919 | list![(
920 | con("baz", 0, 1),
921 | list![
922 | TermDesc::Neg(list![con("bar", 0, 1)]),
923 | TermDesc::Neg(list![con("arg1", 0, 1)]),
924 | TermDesc::Neg(list![con("arg2", 0, 1)]),
925 | ]
926 | )]
927 | );
928 | }
929 |
930 | #[test]
931 | fn test_match_always_succeeds() {
932 | let (result, _) = compile(list![(nil(), rhs("true"))]);
933 |
934 | assert_eq!(result, success("true"));
935 | }
936 |
937 | #[test]
938 | fn test_match_always_fails() {
939 | let (result, _) = compile(List::new());
940 |
941 | assert_eq!(result, failure());
942 | }
943 |
944 | #[test]
945 | fn test_match_single_pattern() {
946 | let (result, _) =
947 | compile(list![(tt(), rhs("true")), (ff(), rhs("false")),]);
948 |
949 | assert_eq!(
950 | result,
951 | if_eq(obj(), tt_con(), success("true"), success("false"))
952 | );
953 | }
954 |
955 | #[test]
956 | fn test_match_var() {
957 | let (result, _) = compile(list![(var("a"), rhs("true"))]);
958 |
959 | assert_eq!(result, success("true"));
960 | }
961 |
962 | #[test]
963 | fn test_match_multiple_patterns() {
964 | let (result, diags) = compile(list![
965 | (tt(), rhs("true")),
966 | (ff(), rhs("false")),
967 | (tt(), rhs("redundant"))
968 | ]);
969 |
970 | // Redundant patterns are ignored on the decision tree. This is also how
971 | // you'd detect redundant patterns: you'd somehow mark every RHS when
972 | // you produce their Success nodes. Any RHS nodes that remain unmarked
973 | // are redundant.
974 | assert_eq!(
975 | result,
976 | if_eq(obj(), tt_con(), success("true"), success("false"))
977 | );
978 |
979 | assert!(diags.reachable.contains(&"true".to_string()));
980 | assert!(diags.reachable.contains(&"false".to_string()));
981 | assert!(!diags.reachable.contains(&"redundant".to_string()));
982 | }
983 |
984 | #[test]
985 | fn test_nonexhaustive_match() {
986 | let (result, diags) = compile(list![(tt(), rhs("true")),]);
987 |
988 | assert_eq!(result, if_eq(obj(), tt_con(), success("true"), failure()));
989 | assert_eq!(diags.messages, vec!["Missing pattern: _".to_string()]);
990 | }
991 |
992 | #[test]
993 | fn test_nonexhaustive_match_from_paper() {
994 | let green = Pattern::Cons(con("green", 0, 3), List::new());
995 | let (result, diags) = compile(list![
996 | (pair(tt(), green.clone()), rhs("111")),
997 | (pair(ff(), green.clone()), rhs("222")),
998 | ]);
999 |
1000 | assert_eq!(
1001 | result,
1002 | if_eq(
1003 | sel(0, obj()),
1004 | tt_con(),
1005 | if_eq(
1006 | sel(1, obj()),
1007 | con("green", 0, 3),
1008 | success("111"),
1009 | failure()
1010 | ),
1011 | if_eq(
1012 | sel(1, obj()),
1013 | con("green", 0, 3),
1014 | success("222"),
1015 | failure()
1016 | )
1017 | )
1018 | );
1019 |
1020 | assert_eq!(
1021 | diags.messages,
1022 | vec![
1023 | "Missing pattern: pair(true, _)".to_string(),
1024 | "Missing pattern: pair(false, _)".to_string()
1025 | ]
1026 | );
1027 | }
1028 |
1029 | #[test]
1030 | fn test_nested_match() {
1031 | let (result, _) = compile(list![
1032 | (pair(tt(), tt()), rhs("foo")),
1033 | (pair(tt(), ff()), rhs("bar")),
1034 | (pair(ff(), ff()), rhs("baz")),
1035 | (pair(ff(), tt()), rhs("quix")),
1036 | ]);
1037 |
1038 | assert_eq!(
1039 | result,
1040 | if_eq(
1041 | sel(0, obj()),
1042 | tt_con(),
1043 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
1044 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
1045 | )
1046 | );
1047 | }
1048 |
1049 | #[test]
1050 | fn test_match_with_switchify() {
1051 | let a = con("a", 0, 4);
1052 | let b = con("b", 0, 4);
1053 | let c = con("c", 0, 4);
1054 | let d = con("d", 0, 4);
1055 | let a_pat = Pattern::Cons(a.clone(), List::new());
1056 | let b_pat = Pattern::Cons(b.clone(), List::new());
1057 | let c_pat = Pattern::Cons(c.clone(), List::new());
1058 | let d_pat = Pattern::Cons(d.clone(), List::new());
1059 | let (result, _) = compile(list![
1060 | ((a_pat, rhs("a"))),
1061 | ((b_pat, rhs("b"))),
1062 | ((c_pat, rhs("c"))),
1063 | ((d_pat, rhs("d")))
1064 | ]);
1065 |
1066 | assert_eq!(
1067 | switchify(result),
1068 | switch(
1069 | obj(),
1070 | list![(a, success("a")), (b, success("b")), (c, success("c"))],
1071 | success("d")
1072 | )
1073 | );
1074 | }
1075 |
1076 | #[test]
1077 | fn test_nested_match_without_switch() {
1078 | let (result, _) = compile(list![
1079 | (pair(tt(), tt()), rhs("foo")),
1080 | (pair(tt(), ff()), rhs("bar")),
1081 | (pair(ff(), ff()), rhs("baz")),
1082 | (pair(ff(), tt()), rhs("quix")),
1083 | ]);
1084 |
1085 | // This doesn't produce a switch, as the nested patterns don't test the
1086 | // same value.
1087 | assert_eq!(
1088 | switchify(result),
1089 | if_eq(
1090 | sel(0, obj()),
1091 | tt_con(),
1092 | if_eq(sel(1, obj()), tt_con(), success("foo"), success("bar")),
1093 | if_eq(sel(1, obj()), ff_con(), success("baz"), success("quix"))
1094 | )
1095 | );
1096 | }
1097 |
1098 | #[test]
1099 | fn test_match_with_args() {
1100 | let some = con("some", 3, 2);
1101 | let (result, _) = compile(list![
1102 | (Pattern::Cons(some.clone(), list![tt(), tt(), ff()]), rhs("foo")),
1103 | (var("x"), rhs("bar"))
1104 | ]);
1105 |
1106 | assert_eq!(
1107 | result,
1108 | if_eq(
1109 | obj(),
1110 | some,
1111 | if_eq(
1112 | sel(0, obj()),
1113 | tt_con(),
1114 | if_eq(
1115 | sel(1, obj()),
1116 | tt_con(),
1117 | if_eq(
1118 | sel(2, obj()),
1119 | ff_con(),
1120 | success("foo"),
1121 | success("bar")
1122 | ),
1123 | success("bar")
1124 | ),
1125 | success("bar")
1126 | ),
1127 | success("bar")
1128 | )
1129 | );
1130 | }
1131 |
1132 | #[test]
1133 | fn test_match_nonexhaustive_with_args() {
1134 | let some = con("some", 3, 2);
1135 | let (result, diags) = compile(list![(
1136 | Pattern::Cons(some.clone(), list![tt(), ff(), ff()]),
1137 | rhs("foo")
1138 | ),]);
1139 |
1140 | assert_eq!(
1141 | result,
1142 | if_eq(
1143 | obj(),
1144 | some,
1145 | if_eq(
1146 | sel(0, obj()),
1147 | tt_con(),
1148 | if_eq(
1149 | sel(1, obj()),
1150 | ff_con(),
1151 | if_eq(
1152 | sel(2, obj()),
1153 | ff_con(),
1154 | success("foo"),
1155 | failure()
1156 | ),
1157 | failure()
1158 | ),
1159 | failure()
1160 | ),
1161 | failure()
1162 | )
1163 | );
1164 |
1165 | assert_eq!(
1166 | diags.messages,
1167 | vec![
1168 | "Missing pattern: some(true, false, _)".to_string(),
1169 | "Missing pattern: some(true, _, _)".to_string(),
1170 | "Missing pattern: some(_, _, _)".to_string(),
1171 | "Missing pattern: _".to_string(),
1172 | ]
1173 | );
1174 | }
1175 | }
1176 |
--------------------------------------------------------------------------------