├── CHANGES.txt
├── README.md
├── bench.lua
├── files
└── tamale.html
├── index.html
├── luadoc.css
├── modules
└── tamale.html
├── tamale-1.2.2-1.rockspec
├── tamale.lua
└── test.lua
/CHANGES.txt:
--------------------------------------------------------------------------------
1 | v1.2.1:
2 | * Return to comparing strings by ==, and added case for when pattern is a closure instead. This is more predictable, and removes a bunch of indexing special cases. Also added utility function tamale.P, which converts a string literal to a pattern string matcher (i.e., P"num %d+").
3 | * Added "partial" row flag and special V"..." variable for allowing/capturing extra value fields not present in the row pattern.
4 | * Thanks to David Manura, Javier Guerra Giraldez, and Steve Donovan for feedback.
5 |
6 | v1.2:
7 | * Added substitution for variables in result pattern, if any.
8 | * Changed "where" row hook to "when", to match Erlang. (oops)
9 | * Added check to block matching when value table had extra fields missing in pattern table.
10 | * Made strings compare by == unless pattern characters were present, then string.match is used (with captures). (This was probably a bad idea, as it's likely to cause unexpected bugs.)
11 | * Added explicit version number and rockspec.
12 | * Added several tests.
13 |
14 | v1.1:
15 | * Sped up unification by saving ignore flag for variables at creation, than rechecking names at runtime.
16 | * Added ids option, to mark sentinel values which should still be compared by value rather than structure.
17 | * Removed "fail" hook for spec table, since it's functionally equivalent to adding a final row of { V"_", failure_handler }.
18 | * Added several tests, bugfixes, etc. (Thanks to Steve Donovan for early feedback.)
19 |
20 | v1.0:
21 | * Initial release.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Tamale - a TAble MAtching Lua Extension
2 |
3 | ## Overview
4 |
5 | Tamale is a [Lua][] library for structural pattern matching - kind of like regular expressions for *arbitrary data structures*, not just strings. (Or [Sinatra][] for data structures, rather than URLs.)
6 |
7 | [Lua]: http://lua.org
8 | [Sinatra]: http://www.sinatrarb.com
9 |
10 | `tamale.matcher` reads a *rule table* and produces a *matcher function*. The table should list `{pattern, result}` rules, which are structurally compared in order against the input. The matcher returns the result for the first successful rule, or `(nil, "Match failed")` if none match.
11 |
12 | ### Basic Usage
13 |
14 | require "tamale"
15 | local V = tamale.var
16 | local M = tamale.matcher {
17 | { {"foo", 1, {} }, "one" },
18 | { 10, function() return "two" end},
19 | { {"bar", 10, 100}, "three" },
20 | { {"baz", V"X" }, V"X" }, -- V"X" is a variable
21 | { {"add", V"X", V"Y"}, function(cs) return cs.X + cs.Y end },
22 | }
23 |
24 | print(M({"foo", 1, {}})) --> "one"
25 | print(M(10)) --> "two"
26 | print(M({"bar", 10, 100})) --> "three"
27 | print(M({"baz", "four"})) --> "four"
28 | print(M({"add", 2, 3}) --> 5
29 | print(M({"sub", 2, 3}) --> nil, "Match failed"
30 |
31 | The result can be either a literal value (number, string, etc.), a
32 | variable, a table, or a function. Functions are called with a table containing the original input and captures (if any); its result is returned. Variables in the result (standalone or in tables) are
33 | replaced with their captures.
34 |
35 |
36 | ### Benefits of Pattern Matching
37 |
38 | + Declarative (AKA "data-driven") programming is easy to locally reason about, maintain, and debug.
39 | + Structures do not need to be manually unpacked - pattern variables automatically capture the value from their position in the input.
40 | + "It fits or it doesn't fit" - the contract that code is expected to follow is very clear.
41 | + Rule tables can be compiled down to search trees, which are potentially more efficient than long, nested if / switch statements. (Tamale currently does not do this, but could in the future without any change to its interface. Also, see Indexing below.)
42 |
43 | Imperative code to rebalance red-black trees can get pretty hairy. With pattern matching, the list of transformations *is* the code.
44 |
45 | -- create red & black tags and local pattern variables
46 | local R,B,a,x,b,y,c,z,d = "R", "B", V"a", V"x", V"b", V"y", V"c", V"z", V"d"
47 | local balanced = { R, { B, a, x, b }, y, { B, c, z, d } }
48 |
49 | balance = tamale.matcher {
50 | { {B, {R, {R, a, x, b}, y, c}, z, d}, balanced },
51 | { {B, {R, a, x, {R, b, y, c,}}, z, d}, balanced },
52 | { {B, a, x, {R, {R, b, y, c,}, z, d}}, balanced },
53 | { {B, a, x, {R, b, y, {R, c, z, d}}}, balanced },
54 | { V"body", V"body" }, -- default case, keep the same
55 | }
56 |
57 | (Adapted from Chris Okasaki's _Purely Functional Data Structures_.)
58 |
59 | The style of pattern matching used in Tamale is closest to [Erlang](http://erlang.org)'s. Since pattern-matching comes from declarative languages, it may help to study them directly.
60 |
61 | Particularly recommended:
62 |
63 | * _The Art of Prolog_ by Leon Sterling & Ehud Shapiro
64 | * _Programming Erlang_ by Joe Armstrong
65 |
66 |
67 | ## Rules
68 |
69 | Each rule has the form `{ *pattern*, *result*, [when=function] }`.
70 |
71 | The pattern can be a literal value, table, or function. For tables, every field is checked against every field in the input (and those
72 | fields may in turn contain literals, variables, tables, or functions).
73 |
74 | Functions are called on the input's corresponding field. If the function's first result is non-false, the field is considered a match, and all results are appended to the capture table. (See below) If the function returns false or nil, the match was a failure.
75 |
76 | `tamale.P` marks strings as patterns that should be compared with string.match (possibly returning captures), rather than as a string literal. Use it like `{ P"aaa(.*)bbb", result}`.
77 |
78 | Its entire implementation is just
79 |
80 | function P(str)
81 | return function(v)
82 | if type(v) == "string" then return string.match(v, str) end
83 | end
84 | end
85 |
86 |
87 | Rules also have two optional keyword arguments:
88 |
89 | ### Extra Restrictions - `when=function(captures)`
90 |
91 | This is used to add further restrictions to a rule, such as a rule that can only take strings *which are also valid e-mail addresses*. (The function is passed the captures table.)
92 |
93 | -- is_valid(cs) checks cs[1]
94 | { P"(.*)", register_address, when=is_valid }
95 |
96 |
97 | ### Partial patterns - `partial=true`
98 |
99 | This flag allows a table pattern to match an table input value which has *more fields that are listed in the pattern*.
100 |
101 | { {tag="leaf"}, some_fun, partial=true }
102 |
103 | could match against *any* table that has the value t.tag == "leaf", regardless of any other fields.
104 |
105 |
106 | ## Variables and Captures
107 |
108 | The patterns specified in Tamale rules can have variables, which capture the contents of that position in the input. To create a Tamale variable, use `tamale.var('x')` (which can potentially aliased as `V'x'`, if you're into the whole brevity thing).
109 |
110 | Variable names can be any string, though any beginning with _ are ignored during matching (i.e., `{V"_", V"_", V"X", V"_" }` will capture the third value from any four-value array). Variable names are not required to be uppercase, it's just a convention from Prolog and Erlang.
111 |
112 | Also, note that declaring local variables for frequently used Tamale variables can make rule tables cleaner. Compare
113 |
114 | local X, Y, Z = V"X", V"Y", V"Z"
115 | M = tamale.matcher {
116 | { {X, X}, 1}, -- capitalization helps to keep
117 | { {X, Y}, 2}, -- the Tamale vars distinct from
118 | { {X, Y, Z}, 3}, -- the Lua vars
119 | }
120 |
121 | with
122 |
123 | M = tamale.matcher {
124 | { {V'X', V'X'}, 1},
125 | { {V'X', V'Y'}, 2},
126 | { {V'X', V'Y', V'Z'}, 3},
127 | }
128 |
129 | The _ example above could be reduced to `{_, _, X, _}`.
130 |
131 | Finally, when the same variable appears in multiple fields in a rule pattern, such as { X, Y, X }, each repeated field must structurally match its other occurrances. `{X, Y, X}` would match `{6, 1, 6}`, but not `{5, 1, 7}`.
132 |
133 |
134 | ## The Rule Table
135 |
136 | The function `tamale.matcher` takes a rule table and returns a matcher function. The matcher function takes one or more arguments; the first is matched against the rule table, and any further arguments are saved in captures.args.
137 |
138 | The rule table also takes a couple other options, which are described below.
139 |
140 |
141 | ## Identifiers - `ids={List, Of, IDs}`
142 |
143 | Tamale defaults to structural comparison of tables, but sometimes tables are used as identifiers, e.g. `SENTINEL = {}`. The rule table can have an optional argument of `ids={LIST, OF, IDS}`, for values that should still be compared by `==` rather than structure. (Otherwise, *all* such IDs would match each other, and any empty table.)
144 |
145 |
146 | ## Indexing - `index=field`
147 |
148 | Indexing in Tamale is like indexing in relational databases - Rather than testing every single rule to find a match, only those in the index need to be tested. Often, this singlehandedly eliminates most of the rules. By default, the rules are indexed by the first value.
149 |
150 | When the rule table
151 |
152 | tamale.matcher {
153 | { {1, "a"}, 1 },
154 | { {1, "b"}, 2 },
155 | { {1, "c"}, 3 },
156 | { {2, "d"}, 4 },
157 | }
158 |
159 | is matched against {2, "d"}, it only needs one test if the rule table is indexed by the first field - the fourth rule is the only one starting with 2. To specify a different index than `pattern[1]`, give the rule table a keyword argument of `index=I`, where I is either another key (such as 2 or "tag"), or a function. If a function is used, each rule will be indexed by the result of applying the function to it.
160 |
161 | For example, with the rule table
162 |
163 | tamale.matcher {
164 | { {"a", "b", 1}, 1 }, -- index "ab"
165 | { {"a", "c", 1}, 2 }, -- index "ac"
166 | { {"b", "a", 1}, 3 }, -- index "ba"
167 | { {"b", "c", 1}, 4 }, -- index "bc"
168 | index=function(rule) return rule[1] .. rule[2] end
169 | }
170 |
171 | each rule will be indexed based on the first two fields concatenated, rather than just the first. An input value of {"a", "c", 1} would only
172 | need to check the second row, not the first.
173 |
174 | Indexing should never change the *results* of pattern matching, just make the matcher function do less searching. Note that an indexing function needs to be deterministic - indexing by (say) `os.time()` will produce weird results. An argument of `index=false` turns indexing off.
175 |
176 |
177 | ## Debugging - `debug=true`
178 |
179 | Tamale has several debugging traces. They can be enabled either by spetting `tamale.DEBUG` to true, or adding `debug=true` as a keyword argument to a rule table.
180 |
181 | Matching `{ "a", "c", 1 }` against
182 |
183 | tamale.matcher {
184 | { {"a", "b", 1}, 1 },
185 | { {"a", "c", 1}, 2 },
186 | { {"b", "a", 1}, 3 },
187 | { {"b", "c", 1}, 4 },
188 | index=function(rule) return rule[1] .. rule[2] end,
189 | debug = true
190 | }
191 |
192 | will print
193 |
194 | * rule 1: indexing on index(t)=ab
195 | * rule 2: indexing on index(t)=ac
196 | * rule 3: indexing on index(t)=ba
197 | * rule 4: indexing on index(t)=bc
198 | -- Checking rules: 2
199 | -- Trying rule 2...matched
200 | 2
201 |
202 | This can be used to check whether indexing is effective, if one rule is pre-empting another, etc.
203 |
--------------------------------------------------------------------------------
/bench.lua:
--------------------------------------------------------------------------------
1 | require "tamale"
2 | require "socket" --for socket.gettime
3 |
4 | DEF_CT = 10000
5 |
6 | local fmt = string.format
7 | local now = socket.gettime
8 | local V = tamale.var
9 |
10 | function init(mode)
11 | return tamale.matcher {
12 | { 27, "twenty-seven" },
13 | { "str", "string" },
14 | { { 1, 2, 3},
15 | function(t) return "one two three" end },
16 | { { 1, {2, "three"}, 4}, function(t) return "success" end },
17 | { { "gt3", V"X"}, function(t) return 10 * t.X end,
18 | where=function (t) return t.X > 3 end },
19 | { { V"a", V"b", V"c", V"b" }, function(t) return "ABCB" end },
20 | { { "a", {"b", V"X" }, "c", V"X"},
21 | function(t) return "X is " .. t.X end },
22 | { { "a", {"b", V"X" }, "c", V"Y"},
23 | function(t)
24 | local b = { "X is " }
25 | b[2] = t.X
26 | b[3] = " and Y is "
27 | b[4] = t.Y
28 | return table.concat(b)
29 | end },
30 | { { "extract", { V"_", V"_", V"third", V"_" } },
31 | function(t) return t.third end },
32 | }
33 | end
34 |
35 | function timed(name, f, ct)
36 | ct = ct or DEF_CT
37 | local cpre = os.clock()
38 | for i=1,ct do f() end
39 | local cpost = os.clock()
40 | local cdelta = cpost - cpre
41 | print(fmt("%25s: %d x: clock %d ms (%.3f ms per)",
42 | name, ct, cdelta * 1000, (cdelta * 1000) / ct))
43 | end
44 |
45 | M = init("search")
46 |
47 | timed("init", function() local M = init("search") end)
48 |
49 | timed("match-first-literal",
50 | function()
51 | local res = M(27)
52 | -- assert(res == "twenty-seven")
53 | end)
54 |
55 | timed("match-structured-vars",
56 | function()
57 | local res = M { "a", {"b", "bananas"}, "c", "bananas" }
58 | -- assert(res == "X is bananas")
59 | end)
60 |
61 | timed("match-structured",
62 | function()
63 | local res = M { "a", {"b", "bananas"}, "c", "garlic" }
64 | -- assert(res == "X is bananas and Y is garlic")
65 | end)
66 |
67 | timed("match-abcb",
68 | function()
69 | local res = M { "a", "b", "c", "b" }
70 | -- assert(res == "ABCB")
71 | end)
72 |
73 | timed("match-abcb-fail",
74 | function()
75 | local res = M { "a", "b", "c", "x" }
76 | -- should fail
77 | -- assert(res == false)
78 | end)
79 |
--------------------------------------------------------------------------------
/files/tamale.html:
--------------------------------------------------------------------------------
1 |
3 |
4 |
116 | Returns a function that tests a string with string:match, rather than ==. Any captures from the string match are appended to the capture table. Like var, this would probably be locally aliased, and used like { P"num (%d+)", handler }.
117 |
118 |
119 |
168 | Return a matcher function for a given specification. When the function is called on one or more values, its first argument is tested in order against every rule that could possibly match it, selecting the relevant result (if any) or returning the values (false, "Match failed", val). If the result is a function, it is called with a table containing any captures and any subsequent arguments passed to the matcher function (in captures.args).
169 |
170 |
171 |
Parameters
172 |
173 |
174 |
175 | spec: A list of rows, where each row is of the form { rule, result, [when=capture_predicate] }.
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
Usage
184 |
185 |
186 |
spec.ids: An optional list of table values that should be compared by identity, not structure. If any empty tables are being used as a sentinel value (e.g. "MAGIC_ID = {}"), list them here.
187 |
188 |
spec.debug=true: Turn on debugging traces for the matcher.
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
var (name)
202 |
203 | Mark a string in a match pattern as a variable key. (You probably want to alias this locally to something short.) Any variables beginning with _ are ignored.
204 |
205 |
206 |
113 | Returns a function that tests a string with string:match, rather than ==. Any captures from the string match are appended to the capture table. Like var, this would probably be locally aliased, and used like { P"num (%d+)", handler }.
114 |
115 |
116 |
165 | Return a matcher function for a given specification. When the function is called on one or more values, its first argument is tested in order against every rule that could possibly match it, selecting the relevant result (if any) or returning the values (false, "Match failed", val). If the result is a function, it is called with a table containing any captures and any subsequent arguments passed to the matcher function (in captures.args).
166 |
167 |
168 |
Parameters
169 |
170 |
171 |
172 | spec: A list of rows, where each row is of the form { rule, result, [when=capture_predicate] }.
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
Usage
181 |
182 |
183 |
spec.ids: An optional list of table values that should be compared by identity, not structure. If any empty tables are being used as a sentinel value (e.g. "MAGIC_ID = {}"), list them here.
184 |
185 |
spec.debug=true: Turn on debugging traces for the matcher.
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
var (name)
199 |
200 | Mark a string in a match pattern as a variable key. (You probably want to alias this locally to something short.) Any variables beginning with _ are ignored.
201 |
202 |
203 |
A variable named "..." captures subsequent array-portion values.
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
--------------------------------------------------------------------------------
/tamale-1.2.2-1.rockspec:
--------------------------------------------------------------------------------
1 | package = "tamale"
2 | version = "1.2.2-1"
3 | source = {
4 | url = "git://github.com/silentbicycle/tamale.git",
5 | tag = "v1.2.2"
6 | }
7 | description = {
8 | summary = "Erlang-style pattern matching for Lua",
9 | detailed = [[
10 | Tamale adds structural pattern matching (as in Erlang, Prolog, etc.) to
11 | Lua. Pattern matching unpacks and matches on data structures like
12 | regular expressions do on strings.
13 |
14 | Rather than writing a series of nested ifs to test and extract from
15 | a structure, you can build a test function from a series of rules, and
16 | it will generate a dispatch function (with variable captures, etc.).
17 | ]],
18 | homepage = "http://github.com/silentbicycle/tamale",
19 | license = "MIT/X11"
20 | }
21 | dependencies = {
22 | "lua >= 5.1" --earlier may work but is untested
23 | }
24 | build = {
25 | type = "builtin",
26 | modules = {
27 | tamale = "tamale.lua"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/tamale.lua:
--------------------------------------------------------------------------------
1 | --[[
2 | Copyright (c) 2010 Scott Vokes
3 |
4 | Permission is hereby granted, free of charge, to any person
5 | obtaining a copy of this software and associated documentation
6 | files (the "Software"), to deal in the Software without
7 | restriction, including without limitation the rights to use,
8 | copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the
10 | Software is furnished to do so, subject to the following
11 | conditions:
12 |
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | OTHER DEALINGS IN THE SOFTWARE.
24 | --]]
25 |
26 |
27 | -- Depenedencies
28 | local assert, getmetatable, ipairs, pairs, pcall, setmetatable, type =
29 | assert, getmetatable, ipairs, pairs, pcall, setmetatable, type
30 | local concat, insert, sort = table.concat, table.insert, table.sort
31 | local strmatch, tostring = string.match, tostring
32 |
33 | local function trace(...) print(string.format(...)) end
34 |
35 | ---TAble-MAtching Lua Extension.
36 | module("tamale")
37 |
38 | VERSION = "1.2.1"
39 |
40 | DEBUG = false --Set to true to enable traces.
41 |
42 | local function sentinel(descr)
43 | return setmetatable({}, { __tostring=function() return descr end })
44 | end
45 |
46 | local VAR, NIL = sentinel("[var]"), sentinel("[nil]")
47 | local function is_var(t) return getmetatable(t) == VAR end
48 |
49 |
50 | ---Mark a string in a match pattern as a variable key.
51 | -- (You probably want to alias this locally to something short.)
52 | -- Any variables beginning with _ are ignored.
53 | -- @usage { "extract", {var"_", var"_", var"third", var"_" } }
54 | -- @usage A variable named "..." captures subsequent array-portion values.
55 | function var(name)
56 | assert(type(name) == "string", "Variable name must be string")
57 | local ignore = (name:sub(1, 1) == "_")
58 | local rest = (name == "...")
59 | return setmetatable( { name=name, ignore=ignore, rest=rest }, VAR)
60 | end
61 |
62 |
63 | ---Returns a function that tests a string with string:match, rather
64 | -- than ==. Any captures from the string match are appended to the
65 | -- capture table. Like var, this would probably be locally aliased,
66 | -- and used like { P"num (%d+)", handler }.
67 | function P(str)
68 | return function(v)
69 | if type(v) == "string" then return strmatch(v, str) end
70 | end
71 | end
72 |
73 |
74 | ---Default hook for match failure.
75 | -- @param val The unmatched value.
76 | function match_fail(val)
77 | return nil, "Match failed", val
78 | end
79 |
80 |
81 | -- Key-weak cache for table counts, since #t only gives the
82 | -- length of the array portion, and otherwise, values with extra
83 | -- non-numeric keys can match rows that do not have them.
84 | local counts = setmetatable({}, { __mode="k"})
85 |
86 | local function get_count(t)
87 | local v = counts[t]
88 | if not v then
89 | v = 0
90 | for k in pairs(t) do v = v + 1 end
91 | counts[t] = v
92 | end
93 | return v
94 | end
95 |
96 |
97 | -- Structurally match val against a pattern, setting variables in the
98 | -- pattern to the corresponding values in val, and recursively
99 | -- unifying table fields. Functions are treated as predicates - any
100 | -- non-false result(s) are considered a success and are captured.
101 | local function unify(pat, val, cs, ids, row)
102 | local pt, vt, nil_captures = type(pat), type(val), 0
103 | if pt == "table" then
104 | if is_var(pat) then
105 | local cur = cs[pat.name]
106 | if cur and cur ~= val and not pat.ignore then return false end
107 | cs[pat.name] = val
108 | return cs
109 | end
110 | if vt ~= "table" then return false end
111 | if ids[pat] and pat ~= val then --compare by pointer equality
112 | return false
113 | else
114 | for k,v in pairs(pat) do
115 | if not unify(v, val[k], cs, ids, row) then return false end
116 | end
117 | end
118 | if not row.partial then --make sure val doesn't have extra fields
119 | if get_count(pat) ~= get_count(val) then return false end
120 | elseif row.rest then --save V"..." captures
121 | local rest = {}
122 | for i=row.rest,#val do rest[#rest+1] = val[i] end
123 | cs['...'] = rest
124 | end
125 | return cs
126 | elseif pt == "function" then
127 | local fcs = { pat(val) } --function captures
128 | if #fcs == 0 or not fcs[1] then return false end
129 | for _,c in ipairs(fcs) do cs[#cs+1] = c end
130 | return cs
131 | else --just compare as literals
132 | return pat == val and cs or false
133 | end
134 | end
135 |
136 |
137 | -- Replace any variables in the result with their captures.
138 | local function substituted(res, u)
139 | local r = {}
140 | if is_var(res) then return u[res.name] end
141 | for k,v in pairs(res) do
142 | if type(v) == "table" then
143 | if is_var(v) then r[k] = u[v.name] else r[k] = substituted(v, u) end
144 | else
145 | r[k] = v
146 | end
147 | end
148 | return r
149 | end
150 |
151 |
152 | -- Return (or execute) the result, substituting any vars present.
153 | local function do_res(res, u, has_vars)
154 | local t = type(res)
155 | if t == "function" then
156 | return res(u)
157 | elseif t == "table" and has_vars then
158 | return substituted(res, u), u
159 | end
160 | return res, u
161 | end
162 |
163 |
164 | local function append(t, key, val)
165 | local arr = t[key] or {}
166 | arr[#arr+1] = val; t[key] = arr
167 | end
168 |
169 |
170 | local function has_vars(res)
171 | if type(res) ~= "table" then return false end
172 | if is_var(res) then return true end
173 | for k,v in pairs(res) do
174 | if type(v) == "table" then
175 | if is_var(v) or has_vars(v) then return true end
176 | end
177 | end
178 | return false
179 | end
180 |
181 |
182 | -- If the list of row IDs didn't exist when the var row was
183 | -- indexed (and thus didn't get added), add it here.
184 | local function prepend_vars(vars, lists)
185 | for i=#vars,1,-1 do
186 | local vid = vars[i]
187 | for k,l in pairs(lists) do
188 | if l[1] > vid then insert(l, 1, vid) end
189 | end
190 | end
191 | end
192 |
193 | local function indexable(v)
194 | return not is_var(v) and type(v) ~= "function"
195 | end
196 |
197 | -- Index each literal pattern and pattern table's first value (t[1]).
198 | -- Also, add insert patterns with variables or functions in the
199 | -- appropriate place(s).
200 | local function index_spec(spec)
201 | local ls, ts = {}, {} --literals and tables
202 | local lni, tni = {}, {} --non-indexable fields for same
203 | local vrs = {} --rows with vars in the result
204 |
205 | local debug = spec.debug
206 | -- field/value to index by, defaults to t[1].
207 | local ispec, indexer
208 | if spec.index == false then
209 | ispec = false -- false -> don't index
210 | else
211 | ispec = spec.index or 1
212 | end
213 |
214 | if type(ispec) == "function" then indexer = ispec
215 | elseif ispec == "false" then
216 | indexer = function() end --put everything in the same index
217 | else
218 | indexer = function(t) return t[ispec] end
219 | end
220 | spec.indexer = indexer
221 |
222 | for id, row in ipairs(spec) do
223 | local pat, res = row[1], row[2]
224 | local pt = type(pat)
225 | if not indexable(pat) then --could match anything
226 | if debug then trace(" * rule %d: not indexable, adding to all", id) end
227 | lni[#lni+1] = id; tni[#tni+1] = id --for those that don't yet exist
228 | for _,l in ipairs{ls, ts} do --and append to those that do
229 | for k in pairs(l) do append(l, k, id) end
230 | end
231 | elseif pt == "table" then
232 | local v = indexer(pat) or NIL
233 | if not indexable(v) then --goes in every index
234 | if debug then trace(" * rule %d: index(table) is not indexable", id) end
235 | for k in pairs(ts) do append(ts, k, id) end
236 | tni[#tni+1] = id
237 | else
238 | if debug then trace(" * rule %d: indexing on index(t)=%s",
239 | id, tostring(v)) end
240 | append(ts, v, id)
241 | end
242 |
243 | for i,v in ipairs(pat) do --check for special V"..." var
244 | if is_var(v) and v.rest then
245 | if debug then trace(" * rule %d: V'...' found in field %d",
246 | id, i) end
247 | row.partial = true; row.rest = i; break
248 | end
249 | end
250 | else
251 | if debug then trace(" * rule %d: indexing on %s",
252 | id, tostring(pat)) end
253 | append(ls, pat, id)
254 | end
255 | if has_vars(res) then
256 | if debug then trace(" * rule %d: found var(s) in result", id) end
257 | vrs[id] = true
258 | end
259 | end
260 |
261 | prepend_vars(lni, ls)
262 | prepend_vars(tni, ts)
263 | ls[VAR] = lni; ts[VAR] = tni
264 | return { ls=ls, ts=ts, vrs=vrs }
265 | end
266 |
267 |
268 | -- Get the appropriate list of rows to check (if any).
269 | local function check_index(spec, t, idx)
270 | local tt = type(t)
271 | if tt == "table" then
272 | local key = spec.indexer(t) or NIL
273 | local ts = idx.ts
274 | return ts[key] or ts[VAR]
275 | else
276 | local ls = idx.ls
277 | return ls[t] or ls[VAR]
278 | end
279 | end
280 |
281 |
282 | ---Return a matcher function for a given specification. When the
283 | -- function is called on one or more values, its first argument is
284 | -- tested in order against every rule that could possibly match it,
285 | -- selecting the relevant result (if any) or returning the values
286 | -- (false, "Match failed", val).
287 | -- If the result is a function, it is called with a table containing
288 | -- any captures and any subsequent arguments passed to the matcher
289 | -- function (in captures.args).
290 | --@param spec A list of rows, where each row is of the form
291 | -- { rule, result, [when=capture_predicate] }.
292 | --@usage spec.ids: An optional list of table values that should be
293 | -- compared by identity, not structure. If any empty tables are
294 | -- being used as a sentinel value (e.g. "MAGIC_ID = {}"), list
295 | -- them here.
296 | --@usage spec.debug=true: Turn on debugging traces for the matcher.
297 | function matcher(spec)
298 | local debug = spec.debug or DEBUG
299 | local ids = {}
300 | if spec.ids then
301 | for _,id in ipairs(spec.ids) do ids[id] = true end
302 | end
303 |
304 | local idx = index_spec(spec)
305 | local vrs = idx.vrs --variable rows
306 |
307 | return
308 | function (t, ...)
309 | local rows = check_index(spec, t, idx)
310 | if debug then
311 | trace(" -- Checking rules: %s", concat(rows, ", "))
312 | end
313 |
314 | for _,id in ipairs(rows) do
315 | local row = spec[id]
316 | local pat, res, when = row[1], row[2], row.when
317 | if debug and res == nil then trace " -- Missing result" end
318 | local args = { ... }
319 |
320 | local u = unify(pat, t, { args=args }, ids, row)
321 | if debug then
322 | trace(" -- Trying rule %d...%s", id, u and "matched" or "failed")
323 | end
324 |
325 | if u then
326 | u.input = t --whole matched value
327 | if when then
328 | local ok, val = pcall(when, u)
329 | if debug then trace(" -- Running when(captures) check...%s",
330 | (ok and val) and "matched" or "failed")
331 | end
332 | if ok and val then
333 | return do_res(res, u, vrs[id])
334 | end
335 | else
336 | return do_res(res, u, vrs[id])
337 | end
338 | end
339 | end
340 | if debug then trace(" -- Failed") end
341 | local fail = spec.fail or match_fail
342 | return fail(t)
343 | end
344 | end
345 |
--------------------------------------------------------------------------------
/test.lua:
--------------------------------------------------------------------------------
1 | require "tamale"
2 | require "lunatest"
3 |
4 | local V, P = tamale.var, tamale.P
5 |
6 |
7 | function setup(name)
8 | local X, A, B, C, D = V"X", V"A", V"B", V"C", V"D"
9 | M = tamale.matcher {
10 | { 27, "twenty-seven" },
11 | { "str", "string" },
12 | { { 1, 2, 3},
13 | function(t) return "one two three" end },
14 | { { 1, {2, "three"}, 4}, function(t) return "success" end },
15 | { { "gt3", X}, function(t) return 10 * t.X end,
16 | when=function (t) return t.X > 3 end },
17 | { { A, B, C, B }, function(t) return "ABCB" end },
18 | { { "a", {"b", X }, "c", X},
19 | function(t) return "X is " .. t.X end },
20 | { { "extract", { V"_", V"_", V"third", V"_" } },
21 | function(t) return t.third end }
22 | }
23 | end
24 |
25 | -- trivial example from the docs
26 | function test_example()
27 | local V = tamale.var
28 | local M = tamale.matcher {
29 | { {"foo", 1, {} }, "one" },
30 | { 10, function() return "two" end},
31 | { {"bar", 10, 100}, "three" },
32 | { {"baz", V"X" }, V"X" },
33 | { {V"X", V"Y"},
34 | function(cs) return cs.X + cs.Y end },
35 | }
36 |
37 | assert_equal("one", M({"foo", 1, {}}))
38 | assert_equal("two", M(10))
39 | assert_equal("three", M({"bar", 10, 100}))
40 | assert_equal("four", M({"baz", "four"}))
41 | assert_equal(5, M({2, 3}))
42 | end
43 |
44 | function test_m()
45 | assert_true(M)
46 | end
47 |
48 | function test_literal_num()
49 | assert_equal("twenty-seven", M(27))
50 | end
51 |
52 | function test_literal_str()
53 | assert_equal("string", M "str")
54 | end
55 |
56 | function test_literal_num_trio()
57 | assert_equal("one two three", M {1, 2, 3})
58 | end
59 |
60 | function test_literal_num_trio_too_many()
61 | assert_false(M {1, 2, 3, 4})
62 | end
63 |
64 | function test_nomatch()
65 | assert_false(M {1, 2, 4})
66 | end
67 |
68 | function test_matchvar()
69 | assert_equal(70, M {"gt3", 7})
70 | end
71 |
72 | function test_matchvar_fail()
73 | assert_false(M {"gt3", "boo"})
74 | end
75 |
76 | function test_matchvar_nested()
77 | assert_equal("success", (M {1, {2, "three"}, 4}))
78 | end
79 |
80 | function test_match_repeated_num_var()
81 | assert_equal("ABCB", M {1, 2, 3, 2})
82 | end
83 |
84 | function test_match_repeated_str_var()
85 | assert_equal("ABCB", M {"apple", "banana", "corn", "banana"})
86 | end
87 |
88 | function test_match_repeated_table_var()
89 | local apple, banana, corn = {}, {}, {}
90 | assert_equal("ABCB", M {apple, banana, corn, banana})
91 | end
92 |
93 | function test_match_repeated_table_var_FAIL_out_of_order()
94 | local apple, banana, corn = {}, {}, {}
95 | assert_false(M {apple, corn, banana, banana})
96 | end
97 |
98 | function test_destructuring()
99 | assert_equal("X is FOO", M { "a", { "b", "FOO" }, "c", "FOO"})
100 | end
101 |
102 | function test_dont_care()
103 | assert_equal("third",
104 | M { "extract",
105 | { "first", "second", "third", "fourth" }})
106 | end
107 |
108 | function test_match_any()
109 | local m = tamale.matcher {
110 | { V"_", function(t) return t end }
111 | }
112 | assert_true(m "any string", "match any string")
113 | assert_true(m(4), "match a number")
114 | assert_true(m {"x", "y", "z"}, "match a table")
115 | end
116 |
117 |
118 | --Match against three values that add up to 35, and use
119 | --structural matching to check that the first and third are the same.
120 | local aba_pt_match = tamale.matcher {
121 | { { x=V"X", y=V"Y", z=V"X" },
122 | function(t) return t.X + t.Y + t.X end }
123 | }
124 |
125 | function test_kv_match()
126 | assert_equal(35, aba_pt_match {x=15, y=5, z=15 })
127 | end
128 |
129 | function test_kv_match_fail()
130 | assert_false(aba_pt_match {x=10, y=20, z=5 })
131 | end
132 |
133 |
134 | --Empty tables can also be used as sentinel values, so make it
135 | --possibly to force comparison by identity rather than structure.
136 | function test_match_IDs()
137 | local a, b, c = {}, {}, {}
138 |
139 | local m = tamale.matcher {
140 | { {a, b, c}, "PASS" },
141 | ids={a, b, c},
142 | }
143 | assert_equal("PASS", m {a, b, c})
144 | -- (b and c are equal by structure but not identity)
145 | assert_false(m {a, c, b})
146 | end
147 |
148 |
149 | function test_IDs_2()
150 | local a, b, c = {}, {}, {}
151 | local m = tamale.matcher {
152 | { {a, 1}, 1},
153 | { {b, 1}, 2},
154 | { {c, 1}, 3},
155 | ids={a, b, c},
156 | }
157 | assert_equal(1, m{a, 1}, "a")
158 | assert_equal(2, m{b, 1}, "b")
159 | assert_equal(3, m{c, 1}, "c")
160 | end
161 |
162 |
163 | --Result tables with variables in them should have their captures substituted.
164 | function test_substitution()
165 | local m = tamale.matcher {
166 | { {x=V"x", y=V"y" }, {y=V"x", z=V"y" } },
167 | { {"swap", V"x", V"y" }, {V"y", V"x" } }
168 | }
169 |
170 | local res = m {x=10, y=20}
171 | assert_equal(10, res.y)
172 | assert_equal(20, res.z)
173 | local res2 = m { "swap", 10, 20 }
174 | assert_equal(20, res2[1])
175 | assert_equal(10, res2[2])
176 | end
177 |
178 |
179 | function test_substitution_var_only()
180 | local m = tamale.matcher {
181 | { V"all", V"all" }
182 | }
183 |
184 | for i=1,10 do assert_equal(i, m(i)) end
185 | for i in ("bananas"):gmatch(".") do assert_equal(i, m(i)) end
186 | end
187 |
188 |
189 | function test_substitution_boxing()
190 | local m = tamale.matcher {
191 | { V"all", { V"all" } }
192 | }
193 |
194 | for i=1,10 do
195 | local res = m(i)
196 | assert_equal(i, res[1])
197 | end
198 | end
199 |
200 |
201 | -- Any extra arguments to the matcher are collected in captures.args.
202 | function test_extra_matcher_arg()
203 | local m = tamale.matcher {
204 | { "sum", function(cap)
205 | local total = 0
206 | for i,v in ipairs(cap.args) do total = total + v end
207 | return total
208 | end },
209 | { "sumlen", function(cap)
210 | local total = 0
211 | for i,v in ipairs(cap.args) do total = total + #v end
212 | return total
213 | end }
214 | }
215 | assert_equal(10, m("sum", 1, 2, 3, 4))
216 | assert_equal(15, m("sum", 1, 2, 3, 4, 5))
217 | assert_equal(10, m("sumlen", "a", "ao", "aoe", "aoeu"))
218 | end
219 |
220 | function test_match_order()
221 | local is_number = function(t) return type(t.X) == "number" end
222 |
223 | local m = tamale.matcher {
224 | { V"X", 1, when=is_number },
225 | { "y", 2 },
226 | { V"X", 3 },
227 | { "z", 4 },
228 | }
229 | assert_equal(1, m(23))
230 | assert_equal(2, m"y")
231 | assert_equal(3, m"z", [[should be shadowed by V"X"]])
232 | assert_equal(3, m"w")
233 | end
234 |
235 | -- Strings, even those w/ pattern chars, should be compared
236 | -- literally unless passed in via a comparison function.
237 | function test_str_literal_cmp()
238 | local m = tamale.matcher {
239 | { "foo (%d+)", function(t) return tonumber(t[1]) end },
240 | { "foo 23", 1 },
241 | }
242 | assert_equal(1, m"foo 23")
243 | end
244 |
245 |
246 | function test_str_pattern()
247 | local m = tamale.matcher {
248 | { P"foo (%d+)", function(t) return tonumber(t[1]) end },
249 | { P"foo (%a+)$", function(t) return t[1] end },
250 | { P"foo (%a+) (%d+) (%a+)",
251 | function(t) return t[1] .. tostring(t[2]) .. t[3] end
252 | },
253 | { "foo", 3 },
254 | { "bar", 4 },
255 | }
256 | assert_equal(23, m"foo 23")
257 | assert_equal("bar", m"foo bar")
258 | assert_equal(3, m"foo")
259 | assert_equal(4, m"bar")
260 | end
261 |
262 | -- Should not match string patterns when passed as literal strings
263 | function test_table_str_literal_cmp()
264 | local m = tamale.matcher {
265 | { {"foo (%d+)"}, function(t) return tonumber(t[1]) end }, --fails
266 | { {"foo 23"}, 1 },
267 | }
268 | assert_equal(1, m{"foo 23"})
269 | end
270 |
271 | -- If a function pattern returns false or nil, it fails, otherwise it
272 | -- succeeds and its results are captured.
273 | function test_function_matching_behavior()
274 | local m = tamale.matcher {
275 | { function() end, 1 }, --always fails
276 | { function() return false end, 2 }, --always fails
277 | { P"foo (%d+)", function(t) return tonumber(t[1]) - 20 end },
278 | { function() return 1, 2, 3 end, --always succeeds
279 | function(t) return t[1] + t[2] + t[3] end },
280 | }
281 |
282 | assert_equal(3, m("foo 23"))
283 | assert_equal(6, m(""))
284 | end
285 |
286 | function test_table_str_pattern()
287 | local m = tamale.matcher {
288 | { {P"foo (%d+)"}, function(t) return tonumber(t[1]) end },
289 | { {P"foo (%a+)$"}, function(t) return t[1] end },
290 | { {P"foo (%a+) (%d+) (%a+)"},
291 | function(t) return t[1] .. tostring(t[2]) .. t[3] end
292 | },
293 | { {"foo"}, 3 },
294 | { {"bar"}, 4 },
295 | }
296 | assert_equal(23, m{"foo 23"})
297 | assert_equal("bar", m{"foo bar"})
298 | assert_equal("bar23baz", m{"foo bar 23 baz"})
299 | assert_equal(3, m{"foo"})
300 | assert_equal(4, m{"bar"})
301 | end
302 |
303 | -- By default, the presence of extra keys should block matching.
304 | -- (This can be disabled with the partial=true flag on a row.)
305 | function test_extra_keys()
306 | local m = tamale.matcher {
307 | { { k=1, v=2}, 3 },
308 | { { k=1, v=2, e=3}, 4 },
309 | { { 1, k=1, v=2 }, 5 },
310 | }
311 | assert_equal(3, m{k=1, v=2})
312 | assert_equal(4, m{k=1, v=2, e=3},
313 | "should not silently ignore extra e=3")
314 | assert_equal(5, m{1, k=1, v=2})
315 | end
316 |
317 | -- Test that partial row matches work: match against anything
318 | -- that has a tag of "foo", and return the sum of its numeric fields.
319 | function test_partial()
320 | local function sum_fields(env)
321 | local tot = 0
322 | for k,v in pairs(env.input) do
323 | if type(v) == "number" then tot = tot + v end
324 | end
325 | return tot
326 | end
327 | local m = tamale.matcher {
328 | { { tag="foo" }, sum_fields, partial=true },
329 | { V"_", false },
330 | }
331 | assert_equal(12, m{tag="foo", x=3, y=4, z=5})
332 | end
333 |
334 | -- A variable named "..." captures all of the remaining array-portion fields.
335 | function test_vararg()
336 | local function sum_fields(env)
337 | local tot = 0
338 | for _,v in ipairs(env['...']) do
339 | if type(v) == "number" then tot = tot + v end
340 | end
341 | return tot
342 | end
343 |
344 | local m = tamale.matcher {
345 | { { "foo", V"..." }, sum_fields },
346 | { V"_", "nope" },
347 | }
348 | assert_equal(15, m{ "foo", 1, 2, 3, 4, 5})
349 | end
350 |
351 | function test_approx()
352 | local function approx(a)
353 | return function(b) return math.abs(a-b) < 1 end
354 | end
355 | local m = tamale.matcher {
356 | { {approx(5), approx(10)}, true }
357 | }
358 | assert_true(m {5.1, 10.1})
359 | assert_false(m {5.1, 11.1})
360 | end
361 |
362 | function test_default_case()
363 | local m = tamale.matcher {
364 | { 1, 1 },
365 | -- this one should fail, 2 -> 4 instead.
366 | -- make sure the V"_" row is in the index.
367 | { 2, 2, when=function(t) return false end },
368 | { V"_", 4 },
369 | }
370 | assert_equal(1, m(1))
371 | assert_equal(4, m(2))
372 | assert_equal(4, m(5))
373 | end
374 |
375 | -- Translated pattern-matching example from "Learn You Some Erlang for Great Good!"
376 | -- Also: Regression test. Final default case of V"_" wasn't indexing correctly
377 | -- against tables, so guards made them fail.
378 | function test_LYSEFGG_beach()
379 |
380 | local function between(key, x, y)
381 | return function(cs)
382 | local v = cs[key]
383 | return v >= x and v <= y
384 | end
385 | end
386 |
387 | local beach = tamale.matcher {
388 | { {"celsius", V"N"}, when=between("N", 20, 45),
389 | "favorable" },
390 | { {"kelvin", V"N"}, when=between("N", 293, 318),
391 | "scientifically favorable" },
392 | { {"fahrenheit", V"N"}, when=between("N", 68, 113),
393 | "favorable in the US" },
394 | { V"_", "avoid beach" },
395 | }
396 |
397 | assert_equal("favorable", beach{"celsius", 23})
398 | assert_equal("avoid beach", beach{"kelvin", 23})
399 | assert_equal("favorable in the US", beach{"fahrenheit", 97})
400 | assert_equal("avoid beach", beach{"fahrenheit", -5})
401 | end
402 |
403 | -- Test indexing by a different field than t[1].
404 | function test_custom_index()
405 | local m = tamale.matcher {
406 | { {1, "a"}, 1},
407 | { {1, "b"}, 2},
408 | { {1, "c"}, 3},
409 | { {1, "d"}, 4},
410 | index=2,
411 | }
412 |
413 | assert_equal(1, m{1, "a"}, "a")
414 | assert_equal(2, m{1, "b"}, "b")
415 | assert_equal(3, m{1, "c"}, "c")
416 | assert_equal(4, m{1, "d"}, "d")
417 | assert_false(m{3, "b"})
418 | end
419 |
420 | -- Test indexing by a function.
421 | function test_custom_index_function()
422 | local m = tamale.matcher {
423 | { {1, "a", 1}, 1},
424 | { {1, "b", 2}, 2},
425 | { {1, "c", 3}, 3},
426 | { {1, "d", 4}, 4},
427 | --index=false,
428 | index=function(r) return r[1] + 3*r[3] end,
429 | --debug=true
430 | }
431 |
432 | assert_equal(1, m{1, "a", 1}, "a")
433 | assert_equal(2, m{1, "b", 2}, "b")
434 | assert_equal(3, m{1, "c", 3}, "c")
435 | assert_equal(4, m{1, "d", 4}, "d")
436 | assert_false(m{1, "b", 1})
437 | end
438 |
439 | function test_indexing_nested_tables_no_index()
440 | local m = tamale.matcher {
441 | index=false,
442 | { {{"T", V"X"}}, function(c) return "ok" end},
443 | { V"default", function() return "fail" end},
444 | }
445 | assert_equal("ok", m( {{"T", "foo"}} ))
446 | end
447 |
448 | function test_indexing_nested_tables_with_custom_index()
449 | local m = tamale.matcher {
450 | index=function(pat) return (pat[1] or {})[1] end,
451 | { {{"T", V"X"}}, function(c) return "ok" end},
452 | { V"default", function() return "fail" end},
453 | }
454 | assert_equal("ok", m( {{"T", "foo"}} ))
455 | end
456 |
457 |
458 | lunatest.run()
459 |
--------------------------------------------------------------------------------