├── LICENSE ├── README.md ├── eval.zig ├── main.zig ├── parse.zig ├── script.l └── types.zig /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Zig comptime lisp 2 | 3 | This is a toy lisp following in the steps of a ["Make A Lisp"](https://github.com/kanaka/mal) or "MAL". However, unlike other "Make A Lisps" this one is implemented entirely in compiletime Zig code. This makes it much less useful (no I/O other than compiletime loading of strings or embedded files and output to the Zig language). However, it does generate executables! :D 4 | 5 | "script.l" is a file with the toy lisp language that gets pulled, parsed, and evaluated at compiletime through Zig's @embedFile command. Recursion, loops, most arithematic, comments, and most anything useful isn't implemented, but you can use "if" and define functions and variables and do some basic arithmatic. 6 | 7 | I likely won't ever touch this repository again, as it's a fun toy but I wanted to see if I could get the proof-of-concept of a MAL working, which I think I have. This is working with Zig version zig-windows-x86_64-0.8.0-dev.1369+45d220cac. 8 | 9 | -------------------------------------------------------------------------------- /eval.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | // const // print = std.debug.// print; 3 | const memEql = std.mem.eql; 4 | const stringToEnum = std.meta.stringToEnum; 5 | const types = @import("types.zig"); 6 | const parse = @import("parse.zig"); 7 | const Atom = types.Atom; 8 | const AtomList = types.AtomList; 9 | const Variable = types.Variable; 10 | const VarList = types.VarList; 11 | const Value = types.Value; 12 | const SpecialForms = types.SpecialForms; 13 | const Func = types.Func; 14 | const Function = types.Function; 15 | const SyntaxErrors = types.SyntaxErrors; 16 | 17 | /// Addition; some dummy functions to play with -> add takes 2 params (order independant) 18 | pub fn add(l: Atom, r: Atom) Atom { 19 | return Atom{ .number = l.number + r.number }; 20 | } 21 | 22 | /// Division; order dependant 23 | pub fn sub(l: Atom, r: Atom) Atom { 24 | return Atom{ .number = l.number - r.number }; 25 | } 26 | 27 | /// Negation; neg takes 1 param 28 | pub fn neg(l: Atom) Atom { 29 | return Atom{ .number = -l.number }; 30 | } 31 | 32 | pub fn less(l: Atom, r: Atom) Atom { 33 | return Atom{ .number = if (l.number < r.number) 1.0 else 0 }; 34 | } 35 | 36 | pub const Env = struct { 37 | outer: ?*Env, 38 | varlist: VarList, 39 | 40 | pub fn copy(self: *const Env) Env { 41 | return Env{ .outer = self.outer, .varlist = self.varlist }; 42 | } 43 | 44 | pub fn push(self: *Env, symbol: []const u8, value: Value) void { 45 | var node = VarList.Node{ .data = Variable{ .name = symbol, .value = value } }; 46 | self.varlist.prepend(&node); 47 | } 48 | 49 | pub fn find(self: *const Env, symbol: []const u8) ?*const Env { 50 | var it = self.varlist.first; 51 | while (it) |node| : (it = node.next) { 52 | if (memEql(u8, node.data.name, symbol)) return self; 53 | } 54 | return if (self.outer) |outer_node| outer_node.find(symbol) else null; 55 | } 56 | 57 | pub fn get(self: *const Env, symbol: []const u8) !Value { 58 | if (self.find(symbol)) |env| { 59 | var it = env.varlist.first; 60 | while (it) |node| : (it = node.next) { 61 | if (memEql(u8, node.data.name, symbol)) return node.data.value; 62 | } 63 | return error.KeyDisappearedAfterFinding; 64 | } else { 65 | return error.CannotFindKeyInEnvs; 66 | } 67 | } 68 | 69 | pub fn addArgs(self: *Env, names: AtomList, values: AtomList) SyntaxErrors!void { 70 | if (names.len() != values.len()) return error.UserFunctionParameterArgumentLengthMismatch; 71 | comptime var i = 0; 72 | var name = names.first; 73 | var value = values.first; 74 | while (name) |nameNode| : (name = nameNode.next) { 75 | if (value) |valueNode| { 76 | self.push(nameNode.data.keyword, Value{ .atom = valueNode.data }); 77 | value = valueNode.next; // the same as name = nameNode.next on continuation 78 | } 79 | } 80 | } 81 | }; 82 | 83 | pub fn evalProgram(comptime ast: AtomList) SyntaxErrors!AtomList { 84 | var corelist = VarList{}; 85 | const functions = [_]Variable{ 86 | Variable{ .name = "add", .value = Value{ .func = Func{ .funcTwo = &add } } }, 87 | Variable{ .name = "sub", .value = Value{ .func = Func{ .funcTwo = &sub } } }, 88 | Variable{ .name = "less", .value = Value{ .func = Func{ .funcTwo = &less } } }, 89 | }; 90 | 91 | for (functions) |function| { 92 | var func = VarList.Node{ .data = function }; 93 | corelist.prepend(&func); 94 | } 95 | var global_env = Env{ .outer = null, .varlist = corelist }; 96 | 97 | var results = AtomList{}; 98 | var it = ast.first; 99 | while (it) |node| : (it = node.next) { 100 | const evaluation = try comptime eval(node.data, &global_env); 101 | var new_node = AtomList.Node{ .data = evaluation }; 102 | if (results.len() >= 1) { 103 | results.first.?.findLast().insertAfter(&new_node); // front to back growth 104 | } else { 105 | results.prepend(&new_node); 106 | } 107 | } 108 | return results; 109 | } 110 | 111 | pub fn eval(x: Atom, env: *Env) SyntaxErrors!Atom { 112 | @setEvalBranchQuota(1_000_000); 113 | return switch (x) { 114 | .number => x, // number evaluates to itself 115 | .keyword => (try env.get(x.keyword)).atom, // non function keywords 116 | .function => error.NoFunctionShouldBeHere, // we shouldn't see a bare function 117 | .list => blk: { 118 | if (x.list.len() == 0) break :blk x; // list is empty, return emptylist 119 | const node = comptime x.list.first.?; 120 | const data = comptime node.data; 121 | const next = comptime node.next; 122 | if (data != .keyword) break :blk eval(comptime data, comptime env); // evaluate it if not a kwd 123 | if (next == null) break :blk (try env.get(data.keyword)).atom; // if its termina, find it (variable) 124 | if (stringToEnum(comptime SpecialForms, data.keyword)) |special_form| { // special form 125 | break :blk switch (special_form) { 126 | .def => handleDefSpecialForm(next.?, env), 127 | .@"if" => handleIfSpecialForm(next.?, env), 128 | .@"fn" => handleFnSpecialForm(next.?, env), 129 | }; 130 | } else { // function that's not a special form 131 | break :blk handleFunction(node, env); 132 | } 133 | }, 134 | }; 135 | } 136 | 137 | /// No bool values, like the cool kids 138 | pub fn handleIfSpecialForm(conditional: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom { 139 | const evaluated_condition = try eval(conditional.data, env); 140 | const is_true = switch (evaluated_condition) { 141 | .number => if (evaluated_condition.number == 0.0) false else true, // only 0.0 is false! 142 | else => true, 143 | }; 144 | const first = conditional.next.?.data; // first branch if true 145 | const second = conditional.next.?.next.?.data; // take second branch if false 146 | return if (is_true) try eval(first, env) else try eval(second, env); 147 | } 148 | 149 | /// Define variables and functions 150 | pub fn handleDefSpecialForm(variable_name_node: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom { 151 | const value_node = variable_name_node.next orelse return error.NoDefinedValue; 152 | const atom = try eval(value_node.data, env); 153 | const value = switch (atom) { 154 | .function => Value{ .func = Func{ .funcUser = atom.function } }, 155 | else => Value{ .atom = atom }, 156 | }; 157 | env.push(variable_name_node.data.keyword, value); 158 | return atom; 159 | } 160 | 161 | // build arg and body lists for function 162 | pub fn handleFnSpecialForm(args: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom { 163 | var arg = AtomList{}; 164 | var argnode = AtomList.Node{ .data = args.data }; 165 | arg.prepend(&argnode); 166 | var bod = AtomList{}; 167 | if (args.next) |body| bod.prepend(body); 168 | var new_env = env.copy(); 169 | var func_data = Function{ .args = arg, .body = bod, .env = &new_env }; 170 | return Atom{ .function = &func_data }; 171 | } 172 | 173 | pub fn handleFunction(topnode: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom { 174 | const next = topnode.next.?; 175 | var copy = AtomList.Node{ .data = try eval(next.data, env) }; 176 | var args = AtomList{}; 177 | args.prepend(©); 178 | var it = next.next; 179 | while (it) |node| : (it = node.next) { // traverse any other args 180 | var new_node = AtomList.Node{ .data = try eval(node.data, env) }; 181 | copy.insertAfter(&new_node); // append 182 | } 183 | const val = (try env.get(topnode.data.keyword)); 184 | switch (val) { 185 | .func => return try applyFunction(val.func, args), 186 | .atom => return val.atom, 187 | } 188 | return (try env.get(topnode.data.keyword)).atom; 189 | } 190 | 191 | pub fn applyFunction(func: Func, args: AtomList) !Atom { 192 | return switch (func) { 193 | .funcZero => func.funcZero.*(), 194 | .funcOne => func.funcOne.*(args.first.?.data), 195 | .funcTwo => func.funcTwo.*(args.first.?.data, args.first.?.next.?.data), 196 | .funcUser => blk: { 197 | const n = func.funcUser.args.first.?.data; 198 | var new_env = Env{ .outer = func.funcUser.env, .varlist = VarList{} }; 199 | switch (func.funcUser.args.first.?.data) { 200 | .list => { 201 | const names = Atom{ .list = n.list }; 202 | try new_env.addArgs(names.list, args); 203 | }, 204 | .keyword => { 205 | const names = Atom{ .keyword = n.keyword }; 206 | var list = AtomList{}; 207 | var node = AtomList.Node{ .data = names }; 208 | list.prepend(&node); 209 | try new_env.addArgs(list, args); 210 | }, 211 | else => return error.SomethingFellThroughTheEvalCracks, 212 | } 213 | break :blk try eval(Atom{ .list = func.funcUser.body }, &new_env); 214 | }, 215 | }; 216 | } 217 | -------------------------------------------------------------------------------- /main.zig: -------------------------------------------------------------------------------- 1 | const print = @import("std").debug.print; 2 | 3 | const parse = @import("parse.zig"); 4 | const eval = @import("eval.zig"); 5 | const types = @import("types.zig"); 6 | 7 | pub const script = @embedFile("script.l"); 8 | const result = comptime eval.evalProgram(parse.abstract_syntax_tree); 9 | 10 | pub fn main() !void { 11 | print("{s}\n", .{parse.tokens}); 12 | debugList(try result); 13 | } 14 | 15 | /// These debug functions are useful for printing out the results 16 | /// of the linked list/nested structure that I'm using 17 | pub fn debugList(res: types.AtomList) void { 18 | debugNode(res.first.?); 19 | } 20 | 21 | pub fn debugNode(node: *types.AtomList.Node) void { 22 | if (node.next) |next| debugNode(next); 23 | debugAtom(node.data); 24 | } 25 | 26 | pub fn debugAtom(atom: types.Atom) void { 27 | switch (atom) { 28 | .number => print("number: {}\n", .{atom.number}), 29 | .list => debugList(atom.list), 30 | .function => debugFn(atom), 31 | .keyword => {}, 32 | } 33 | } 34 | 35 | pub fn debugFn(atom: types.Atom) void { 36 | const func = atom.function; 37 | debugList(func.args); 38 | debugList(func.body); 39 | } 40 | -------------------------------------------------------------------------------- /parse.zig: -------------------------------------------------------------------------------- 1 | const script = @import("main.zig").script; 2 | 3 | const types = @import("types.zig"); 4 | const Atom = types.Atom; 5 | const AtomList = types.AtomList; 6 | const SyntaxErrors = types.SyntaxErrors; 7 | 8 | const parseFloat = @import("std").fmt.parseFloat; 9 | 10 | // I'd prefer this all to be in a function, but I couldn't figure out how to give the tokenize 11 | // function the size of the needed token-array without having an external/global variable for it 12 | // I guess I should do a double-pass? - read to count then make? 13 | pub const token_count = comptime countTokens(script); 14 | pub const tokens = comptime tokenize(script, token_count); 15 | pub const abstract_syntax_tree = try comptime parse(); 16 | 17 | pub fn countTokens(comptime buf: []const u8) usize { 18 | var num = 0; 19 | var last = ' '; 20 | for (buf) |char| { 21 | num += switch (char) { 22 | '(', ')' => 1, 23 | ' ', '\n' => 0, 24 | else => if (isSplitByte(last)) 1 else 0, 25 | }; 26 | last = char; 27 | } 28 | return num; 29 | } 30 | 31 | pub fn tokenize(comptime buf: []const u8, size: usize) [size][]const u8 { 32 | var token_array: [size][]const u8 = undefined; 33 | var index: usize = 0; 34 | var token_iter = TokenIterator{ .index = 0, .buf = buf }; 35 | while (token_iter.next()) |token| : (index += 1) { 36 | token_array[index] = token; 37 | } 38 | return token_array; 39 | } 40 | 41 | const TokenIterator = struct { 42 | buf: []const u8, 43 | index: usize, 44 | pub fn next(self: *TokenIterator) ?[]const u8 { 45 | // move to beginning of token 46 | while (self.index < self.buf.len and isSkipByte(self.buf[self.index])) : (self.index += 1) {} 47 | const start = self.index; 48 | if (start == self.buf.len) return null; 49 | if (self.buf[start] == '(' or self.buf[start] == ')') { 50 | self.index += 1; 51 | return self.buf[start .. start + 1]; 52 | } 53 | 54 | // move to end of token 55 | while (self.index < self.buf.len and !isSplitByte(self.buf[self.index])) : (self.index += 1) {} 56 | const end = self.index; 57 | return self.buf[start..end]; 58 | } 59 | }; 60 | 61 | fn isSkipByte(byte: u8) bool { 62 | return byte == ' ' or byte == '\n'; 63 | } 64 | 65 | fn isSplitByte(byte: u8) bool { 66 | @setEvalBranchQuota(1_000_000); // use this as needed to stop compiler quitting on the job! 67 | return byte == ' ' or byte == ')' or byte == '(' or byte == '\n'; 68 | } 69 | 70 | /// takes in the current index and accesses the globals 'token_count' and 'tokens'; ideally these 71 | /// would be in a struct or something.. but I wasn't sure how to do that with the recursion 72 | /// neither of these globals are (or probably can be) modified/altered 73 | fn parse() SyntaxErrors!AtomList { 74 | comptime var list = AtomList{}; 75 | comptime var index: comptime_int = 0; 76 | while (index < token_count) { 77 | comptime var atom_index = try comptime nextBlock(index); 78 | comptime var node = AtomList.Node{ .data = atom_index.atom }; 79 | if (index == 0) { 80 | list.prepend(&node); 81 | } else { 82 | list.first.?.findLast().insertAfter(&node); 83 | } 84 | index = atom_index.index; 85 | } 86 | return list; 87 | } 88 | 89 | fn nextBlock(comptime current_index: comptime_int) SyntaxErrors!AtomIndex { 90 | @setEvalBranchQuota(1_000_000); 91 | var index = current_index; 92 | if (index == token_count) return error.IndexEqualTokenCount; 93 | if (popToken(index)) |token_index| { // poptoken just increments a counter and returns a char 94 | const token = token_index.token; 95 | index = token_index.index; 96 | if (token[0] == '(') { // we're starting a new expression 97 | var list = AtomList{}; 98 | while (popToken(index)) |next_token_index| { 99 | const next_token = next_token_index.token; // extract the token 100 | index = next_token_index.index; // update the index 101 | if (next_token[0] == ')') break; // we've reached the end of a 'list' 102 | 103 | // index - 1 fixes an off-by-one error (I can't figure out why exactly) 104 | var next_atom_index = try nextBlock(index - 1); // recurse in case of other expressions 105 | index = next_atom_index.index; // update the index yet again after recursion 106 | var list_node = AtomList.Node{ .data = next_atom_index.atom }; 107 | if (list.len() >= 1) { 108 | list.first.?.findLast().insertAfter(&list_node); // front to back growth 109 | } else { 110 | list.prepend(&list_node); // if it's the first in the list we'll just add it 111 | } 112 | } 113 | return AtomIndex{ .atom = Atom{ .list = list }, .index = index }; // we got the expression 114 | } else if (token[0] == ')') { 115 | return error.FoundRParensInParse; // mismatched parens 116 | } else { 117 | return AtomIndex{ .atom = atomize(token), .index = index }; 118 | } 119 | } else { 120 | return error.EndOfTokenList; // we shouldn't reach end of tokens here 121 | } 122 | return error.ParsingUnreachable; // makes the comptime happy 123 | } 124 | 125 | /// somewhat eagerly increments counters 126 | fn popToken(index: usize) ?TokenIndex { 127 | return if (token_count == index) null else TokenIndex{ .index = index + 1, .token = tokens[index] }; 128 | } 129 | 130 | pub fn atomize(token: []const u8) Atom { 131 | return if (parseNumber(token)) |t| t else Atom{ .keyword = token }; 132 | } 133 | 134 | fn parseNumber(token: []const u8) ?Atom { 135 | return if (parseFloat(f64, token)) |t| Atom{ .number = t } else |err| null; 136 | } 137 | 138 | /// these seem like silly things - but I can't figure out how to return the index without them... 139 | pub const AtomIndex = struct { 140 | atom: Atom, 141 | index: comptime_int, 142 | }; 143 | 144 | pub const TokenIndex = struct { 145 | token: []const u8, 146 | index: comptime_int, 147 | }; 148 | -------------------------------------------------------------------------------- /script.l: -------------------------------------------------------------------------------- 1 | (def num (fn n (if (less n 2) 1 (add (sub 3 4) (sub 4 7))))) 2 | (def newnum (num 7)) 3 | (newnum) -------------------------------------------------------------------------------- /types.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const eval = @import("eval.zig"); 3 | const Env = eval.Env; 4 | 5 | const SinglyLinkedList = std.SinglyLinkedList; 6 | pub const AtomList = SinglyLinkedList(Atom); 7 | pub const VarList = SinglyLinkedList(Variable); 8 | 9 | pub const SpecialForms = enum { 10 | def, 11 | @"if", 12 | @"fn", 13 | }; 14 | 15 | // errors rock with comptime... partly because print debugging isn't very helpful :) 16 | pub const SyntaxErrors = error{ 17 | InvalidParseToken, 18 | FoundRParensInParse, 19 | EndOfTokenList, 20 | ParsingUnreachable, 21 | NoFunctionFound, 22 | CannotApplyFunction, 23 | IndexEqualTokenCount, 24 | CannotFindKeyInEnvs, 25 | UserFunctionParameterArgumentLengthMismatch, 26 | InvalidFunctionArgsOrBody, 27 | NoDefKeyword, 28 | NoDefinedValue, 29 | NoFunctionShouldBeHere, 30 | SomethingFellThroughTheEvalCracks, 31 | KeyDisappearedAfterFinding, 32 | }; 33 | 34 | pub const Variable = struct { 35 | name: []const u8, 36 | value: Value, 37 | }; 38 | 39 | pub const Value = union(enum) { 40 | atom: Atom, 41 | func: Func, 42 | }; 43 | 44 | /// this is a bad name for what it represents... I'm just not sure what's better... "Type" seems 45 | /// worse. This is the "basetype" (too long of a name) for this toy language 46 | pub const Atom = union(enum) { 47 | number: f64, 48 | list: AtomList, 49 | keyword: []const u8, 50 | function: *const Function, 51 | }; 52 | 53 | /// Union of function pointer types with different numbers of input parameters. Not sure of a better 54 | /// way to do this -> I took inspiration from the MAL implementation on: 55 | /// github.com/kanaka/mal/tree/master/impls/zig 56 | /// also, func can't be an atom, as it would depend on atom 57 | pub const Func = union(enum) { 58 | funcZero: *const fn () Atom, 59 | funcOne: *const fn (first: Atom) Atom, 60 | funcTwo: *const fn (first: Atom, second: Atom) Atom, 61 | funcUser: *const Function, 62 | }; 63 | 64 | pub const Function = struct { 65 | args: AtomList, 66 | body: AtomList, 67 | env: *Env, 68 | }; 69 | --------------------------------------------------------------------------------