├── LICENSE
├── README.md
├── eval.zig
├── main.zig
├── parse.zig
├── script.l
└── types.zig


/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Zig comptime lisp
2 | 
3 | This is a toy lisp following in the steps of a ["Make A Lisp"](https://github.com/kanaka/mal) or "MAL". However, unlike other "Make A Lisps" this one is implemented entirely in compiletime Zig code. This makes it much less useful (no I/O other than compiletime loading of strings or embedded files and output to the Zig language). However, it does generate executables! :D
4 | 
5 | "script.l" is a file with the toy lisp language that gets pulled, parsed, and evaluated at compiletime through Zig's @embedFile command. Recursion, loops, most arithematic, comments, and most anything useful isn't implemented, but you can use "if" and define functions and variables and do some basic arithmatic.
6 | 
7 | I likely won't ever touch this repository again, as it's a fun toy but I wanted to see if I could get the proof-of-concept of a MAL working, which I think I have. This is working with Zig version zig-windows-x86_64-0.8.0-dev.1369+45d220cac.
8 | 
9 | 


--------------------------------------------------------------------------------
/eval.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | // const // print = std.debug.// print;
  3 | const memEql = std.mem.eql;
  4 | const stringToEnum = std.meta.stringToEnum;
  5 | const types = @import("types.zig");
  6 | const parse = @import("parse.zig");
  7 | const Atom = types.Atom;
  8 | const AtomList = types.AtomList;
  9 | const Variable = types.Variable;
 10 | const VarList = types.VarList;
 11 | const Value = types.Value;
 12 | const SpecialForms = types.SpecialForms;
 13 | const Func = types.Func;
 14 | const Function = types.Function;
 15 | const SyntaxErrors = types.SyntaxErrors;
 16 | 
 17 | /// Addition; some dummy functions to play with -> add takes 2 params (order independant)
 18 | pub fn add(l: Atom, r: Atom) Atom {
 19 |     return Atom{ .number = l.number + r.number };
 20 | }
 21 | 
 22 | /// Division; order dependant
 23 | pub fn sub(l: Atom, r: Atom) Atom {
 24 |     return Atom{ .number = l.number - r.number };
 25 | }
 26 | 
 27 | /// Negation; neg takes 1 param
 28 | pub fn neg(l: Atom) Atom {
 29 |     return Atom{ .number = -l.number };
 30 | }
 31 | 
 32 | pub fn less(l: Atom, r: Atom) Atom {
 33 |     return Atom{ .number = if (l.number < r.number) 1.0 else 0 };
 34 | }
 35 | 
 36 | pub const Env = struct {
 37 |     outer: ?*Env,
 38 |     varlist: VarList,
 39 | 
 40 |     pub fn copy(self: *const Env) Env {
 41 |         return Env{ .outer = self.outer, .varlist = self.varlist };
 42 |     }
 43 | 
 44 |     pub fn push(self: *Env, symbol: []const u8, value: Value) void {
 45 |         var node = VarList.Node{ .data = Variable{ .name = symbol, .value = value } };
 46 |         self.varlist.prepend(&node);
 47 |     }
 48 | 
 49 |     pub fn find(self: *const Env, symbol: []const u8) ?*const Env {
 50 |         var it = self.varlist.first;
 51 |         while (it) |node| : (it = node.next) {
 52 |             if (memEql(u8, node.data.name, symbol)) return self;
 53 |         }
 54 |         return if (self.outer) |outer_node| outer_node.find(symbol) else null;
 55 |     }
 56 | 
 57 |     pub fn get(self: *const Env, symbol: []const u8) !Value {
 58 |         if (self.find(symbol)) |env| {
 59 |             var it = env.varlist.first;
 60 |             while (it) |node| : (it = node.next) {
 61 |                 if (memEql(u8, node.data.name, symbol)) return node.data.value;
 62 |             }
 63 |             return error.KeyDisappearedAfterFinding;
 64 |         } else {
 65 |             return error.CannotFindKeyInEnvs;
 66 |         }
 67 |     }
 68 | 
 69 |     pub fn addArgs(self: *Env, names: AtomList, values: AtomList) SyntaxErrors!void {
 70 |         if (names.len() != values.len()) return error.UserFunctionParameterArgumentLengthMismatch;
 71 |         comptime var i = 0;
 72 |         var name = names.first;
 73 |         var value = values.first;
 74 |         while (name) |nameNode| : (name = nameNode.next) {
 75 |             if (value) |valueNode| {
 76 |                 self.push(nameNode.data.keyword, Value{ .atom = valueNode.data });
 77 |                 value = valueNode.next; // the same as name = nameNode.next on continuation
 78 |             }
 79 |         }
 80 |     }
 81 | };
 82 | 
 83 | pub fn evalProgram(comptime ast: AtomList) SyntaxErrors!AtomList {
 84 |     var corelist = VarList{};
 85 |     const functions = [_]Variable{
 86 |         Variable{ .name = "add", .value = Value{ .func = Func{ .funcTwo = &add } } },
 87 |         Variable{ .name = "sub", .value = Value{ .func = Func{ .funcTwo = &sub } } },
 88 |         Variable{ .name = "less", .value = Value{ .func = Func{ .funcTwo = &less } } },
 89 |     };
 90 | 
 91 |     for (functions) |function| {
 92 |         var func = VarList.Node{ .data = function };
 93 |         corelist.prepend(&func);
 94 |     }
 95 |     var global_env = Env{ .outer = null, .varlist = corelist };
 96 | 
 97 |     var results = AtomList{};
 98 |     var it = ast.first;
 99 |     while (it) |node| : (it = node.next) {
100 |         const evaluation = try comptime eval(node.data, &global_env);
101 |         var new_node = AtomList.Node{ .data = evaluation };
102 |         if (results.len() >= 1) {
103 |             results.first.?.findLast().insertAfter(&new_node); // front to back growth
104 |         } else {
105 |             results.prepend(&new_node);
106 |         }
107 |     }
108 |     return results;
109 | }
110 | 
111 | pub fn eval(x: Atom, env: *Env) SyntaxErrors!Atom {
112 |     @setEvalBranchQuota(1_000_000);
113 |     return switch (x) {
114 |         .number => x, // number evaluates to itself
115 |         .keyword => (try env.get(x.keyword)).atom, // non function keywords
116 |         .function => error.NoFunctionShouldBeHere, // we shouldn't see a bare function
117 |         .list => blk: {
118 |             if (x.list.len() == 0) break :blk x; // list is empty, return emptylist
119 |             const node = comptime x.list.first.?;
120 |             const data = comptime node.data;
121 |             const next = comptime node.next;
122 |             if (data != .keyword) break :blk eval(comptime data, comptime env); // evaluate it if not a kwd
123 |             if (next == null) break :blk (try env.get(data.keyword)).atom; // if its termina, find it (variable)
124 |             if (stringToEnum(comptime SpecialForms, data.keyword)) |special_form| { // special form
125 |                 break :blk switch (special_form) {
126 |                     .def => handleDefSpecialForm(next.?, env),
127 |                     .@"if" => handleIfSpecialForm(next.?, env),
128 |                     .@"fn" => handleFnSpecialForm(next.?, env),
129 |                 };
130 |             } else { // function that's not a special form
131 |                 break :blk handleFunction(node, env);
132 |             }
133 |         },
134 |     };
135 | }
136 | 
137 | /// No bool values, like the cool kids
138 | pub fn handleIfSpecialForm(conditional: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom {
139 |     const evaluated_condition = try eval(conditional.data, env);
140 |     const is_true = switch (evaluated_condition) {
141 |         .number => if (evaluated_condition.number == 0.0) false else true, // only 0.0 is false!
142 |         else => true,
143 |     };
144 |     const first = conditional.next.?.data; // first branch if true
145 |     const second = conditional.next.?.next.?.data; // take second branch if false
146 |     return if (is_true) try eval(first, env) else try eval(second, env);
147 | }
148 | 
149 | /// Define variables and functions
150 | pub fn handleDefSpecialForm(variable_name_node: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom {
151 |     const value_node = variable_name_node.next orelse return error.NoDefinedValue;
152 |     const atom = try eval(value_node.data, env);
153 |     const value = switch (atom) {
154 |         .function => Value{ .func = Func{ .funcUser = atom.function } },
155 |         else => Value{ .atom = atom },
156 |     };
157 |     env.push(variable_name_node.data.keyword, value);
158 |     return atom;
159 | }
160 | 
161 | // build arg and body lists for function
162 | pub fn handleFnSpecialForm(args: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom {
163 |     var arg = AtomList{};
164 |     var argnode = AtomList.Node{ .data = args.data };
165 |     arg.prepend(&argnode);
166 |     var bod = AtomList{};
167 |     if (args.next) |body| bod.prepend(body);
168 |     var new_env = env.copy();
169 |     var func_data = Function{ .args = arg, .body = bod, .env = &new_env };
170 |     return Atom{ .function = &func_data };
171 | }
172 | 
173 | pub fn handleFunction(topnode: *types.AtomList.Node, env: *Env) SyntaxErrors!Atom {
174 |     const next = topnode.next.?;
175 |     var copy = AtomList.Node{ .data = try eval(next.data, env) };
176 |     var args = AtomList{};
177 |     args.prepend(&copy);
178 |     var it = next.next;
179 |     while (it) |node| : (it = node.next) { // traverse any other args
180 |         var new_node = AtomList.Node{ .data = try eval(node.data, env) };
181 |         copy.insertAfter(&new_node); // append
182 |     }
183 |     const val = (try env.get(topnode.data.keyword));
184 |     switch (val) {
185 |         .func => return try applyFunction(val.func, args),
186 |         .atom => return val.atom,
187 |     }
188 |     return (try env.get(topnode.data.keyword)).atom;
189 | }
190 | 
191 | pub fn applyFunction(func: Func, args: AtomList) !Atom {
192 |     return switch (func) {
193 |         .funcZero => func.funcZero.*(),
194 |         .funcOne => func.funcOne.*(args.first.?.data),
195 |         .funcTwo => func.funcTwo.*(args.first.?.data, args.first.?.next.?.data),
196 |         .funcUser => blk: {
197 |             const n = func.funcUser.args.first.?.data;
198 |             var new_env = Env{ .outer = func.funcUser.env, .varlist = VarList{} };
199 |             switch (func.funcUser.args.first.?.data) {
200 |                 .list => {
201 |                     const names = Atom{ .list = n.list };
202 |                     try new_env.addArgs(names.list, args);
203 |                 },
204 |                 .keyword => {
205 |                     const names = Atom{ .keyword = n.keyword };
206 |                     var list = AtomList{};
207 |                     var node = AtomList.Node{ .data = names };
208 |                     list.prepend(&node);
209 |                     try new_env.addArgs(list, args);
210 |                 },
211 |                 else => return error.SomethingFellThroughTheEvalCracks,
212 |             }
213 |             break :blk try eval(Atom{ .list = func.funcUser.body }, &new_env);
214 |         },
215 |     };
216 | }
217 | 


--------------------------------------------------------------------------------
/main.zig:
--------------------------------------------------------------------------------
 1 | const print = @import("std").debug.print;
 2 | 
 3 | const parse = @import("parse.zig");
 4 | const eval = @import("eval.zig");
 5 | const types = @import("types.zig");
 6 | 
 7 | pub const script = @embedFile("script.l");
 8 | const result = comptime eval.evalProgram(parse.abstract_syntax_tree);
 9 | 
10 | pub fn main() !void {
11 |     print("{s}\n", .{parse.tokens});
12 |     debugList(try result);
13 | }
14 | 
15 | /// These debug functions are useful for printing out the results
16 | /// of the linked list/nested structure that I'm using
17 | pub fn debugList(res: types.AtomList) void {
18 |     debugNode(res.first.?);
19 | }
20 | 
21 | pub fn debugNode(node: *types.AtomList.Node) void {
22 |     if (node.next) |next| debugNode(next);
23 |     debugAtom(node.data);
24 | }
25 | 
26 | pub fn debugAtom(atom: types.Atom) void {
27 |     switch (atom) {
28 |         .number => print("number: {}\n", .{atom.number}),
29 |         .list => debugList(atom.list),
30 |         .function => debugFn(atom),
31 |         .keyword => {},
32 |     }
33 | }
34 | 
35 | pub fn debugFn(atom: types.Atom) void {
36 |     const func = atom.function;
37 |     debugList(func.args);
38 |     debugList(func.body);
39 | }
40 | 


--------------------------------------------------------------------------------
/parse.zig:
--------------------------------------------------------------------------------
  1 | const script = @import("main.zig").script;
  2 | 
  3 | const types = @import("types.zig");
  4 | const Atom = types.Atom;
  5 | const AtomList = types.AtomList;
  6 | const SyntaxErrors = types.SyntaxErrors;
  7 | 
  8 | const parseFloat = @import("std").fmt.parseFloat;
  9 | 
 10 | // I'd prefer this all to be in a function, but I couldn't figure out how to give the tokenize
 11 | // function the size of the needed token-array without having an external/global variable for it
 12 | // I guess I should do a double-pass? - read to count then make?
 13 | pub const token_count = comptime countTokens(script);
 14 | pub const tokens = comptime tokenize(script, token_count);
 15 | pub const abstract_syntax_tree = try comptime parse();
 16 | 
 17 | pub fn countTokens(comptime buf: []const u8) usize {
 18 |     var num = 0;
 19 |     var last = ' ';
 20 |     for (buf) |char| {
 21 |         num += switch (char) {
 22 |             '(', ')' => 1,
 23 |             ' ', '\n' => 0,
 24 |             else => if (isSplitByte(last)) 1 else 0,
 25 |         };
 26 |         last = char;
 27 |     }
 28 |     return num;
 29 | }
 30 | 
 31 | pub fn tokenize(comptime buf: []const u8, size: usize) [size][]const u8 {
 32 |     var token_array: [size][]const u8 = undefined;
 33 |     var index: usize = 0;
 34 |     var token_iter = TokenIterator{ .index = 0, .buf = buf };
 35 |     while (token_iter.next()) |token| : (index += 1) {
 36 |         token_array[index] = token;
 37 |     }
 38 |     return token_array;
 39 | }
 40 | 
 41 | const TokenIterator = struct {
 42 |     buf: []const u8,
 43 |     index: usize,
 44 |     pub fn next(self: *TokenIterator) ?[]const u8 {
 45 |         // move to beginning of token
 46 |         while (self.index < self.buf.len and isSkipByte(self.buf[self.index])) : (self.index += 1) {}
 47 |         const start = self.index;
 48 |         if (start == self.buf.len) return null;
 49 |         if (self.buf[start] == '(' or self.buf[start] == ')') {
 50 |             self.index += 1;
 51 |             return self.buf[start .. start + 1];
 52 |         }
 53 | 
 54 |         // move to end of token
 55 |         while (self.index < self.buf.len and !isSplitByte(self.buf[self.index])) : (self.index += 1) {}
 56 |         const end = self.index;
 57 |         return self.buf[start..end];
 58 |     }
 59 | };
 60 | 
 61 | fn isSkipByte(byte: u8) bool {
 62 |     return byte == ' ' or byte == '\n';
 63 | }
 64 | 
 65 | fn isSplitByte(byte: u8) bool {
 66 |     @setEvalBranchQuota(1_000_000); // use this as needed to stop compiler quitting on the job!
 67 |     return byte == ' ' or byte == ')' or byte == '(' or byte == '\n';
 68 | }
 69 | 
 70 | /// takes in the current index and accesses the globals 'token_count' and 'tokens'; ideally these
 71 | /// would be in a struct or something.. but I wasn't sure how to do that with the recursion
 72 | /// neither of these globals are (or probably can be) modified/altered
 73 | fn parse() SyntaxErrors!AtomList {
 74 |     comptime var list = AtomList{};
 75 |     comptime var index: comptime_int = 0;
 76 |     while (index < token_count) {
 77 |         comptime var atom_index = try comptime nextBlock(index);
 78 |         comptime var node = AtomList.Node{ .data = atom_index.atom };
 79 |         if (index == 0) {
 80 |             list.prepend(&node);
 81 |         } else {
 82 |             list.first.?.findLast().insertAfter(&node);
 83 |         }
 84 |         index = atom_index.index;
 85 |     }
 86 |     return list;
 87 | }
 88 | 
 89 | fn nextBlock(comptime current_index: comptime_int) SyntaxErrors!AtomIndex {
 90 |     @setEvalBranchQuota(1_000_000);
 91 |     var index = current_index;
 92 |     if (index == token_count) return error.IndexEqualTokenCount;
 93 |     if (popToken(index)) |token_index| { // poptoken just increments a counter and returns a char
 94 |         const token = token_index.token;
 95 |         index = token_index.index;
 96 |         if (token[0] == '(') { // we're starting a new expression
 97 |             var list = AtomList{};
 98 |             while (popToken(index)) |next_token_index| {
 99 |                 const next_token = next_token_index.token; // extract the token
100 |                 index = next_token_index.index; // update the index
101 |                 if (next_token[0] == ')') break; // we've reached the end of a 'list'
102 | 
103 |                 // index - 1 fixes an off-by-one error (I can't figure out why exactly)
104 |                 var next_atom_index = try nextBlock(index - 1); // recurse in case of other expressions
105 |                 index = next_atom_index.index; // update the index yet again after recursion
106 |                 var list_node = AtomList.Node{ .data = next_atom_index.atom };
107 |                 if (list.len() >= 1) {
108 |                     list.first.?.findLast().insertAfter(&list_node); // front to back growth
109 |                 } else {
110 |                     list.prepend(&list_node); // if it's the first in the list we'll just add it
111 |                 }
112 |             }
113 |             return AtomIndex{ .atom = Atom{ .list = list }, .index = index }; // we got the expression
114 |         } else if (token[0] == ')') {
115 |             return error.FoundRParensInParse; // mismatched parens
116 |         } else {
117 |             return AtomIndex{ .atom = atomize(token), .index = index };
118 |         }
119 |     } else {
120 |         return error.EndOfTokenList; // we shouldn't reach end of tokens here
121 |     }
122 |     return error.ParsingUnreachable; // makes the comptime happy
123 | }
124 | 
125 | /// somewhat eagerly increments counters
126 | fn popToken(index: usize) ?TokenIndex {
127 |     return if (token_count == index) null else TokenIndex{ .index = index + 1, .token = tokens[index] };
128 | }
129 | 
130 | pub fn atomize(token: []const u8) Atom {
131 |     return if (parseNumber(token)) |t| t else Atom{ .keyword = token };
132 | }
133 | 
134 | fn parseNumber(token: []const u8) ?Atom {
135 |     return if (parseFloat(f64, token)) |t| Atom{ .number = t } else |err| null;
136 | }
137 | 
138 | /// these seem like silly things - but I can't figure out how to return the index without them...
139 | pub const AtomIndex = struct {
140 |     atom: Atom,
141 |     index: comptime_int,
142 | };
143 | 
144 | pub const TokenIndex = struct {
145 |     token: []const u8,
146 |     index: comptime_int,
147 | };
148 | 


--------------------------------------------------------------------------------
/script.l:
--------------------------------------------------------------------------------
1 | (def num (fn n (if (less n 2) 1 (add (sub 3 4) (sub 4 7)))))
2 | (def newnum (num 7))
3 | (newnum)


--------------------------------------------------------------------------------
/types.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const eval = @import("eval.zig");
 3 | const Env = eval.Env;
 4 | 
 5 | const SinglyLinkedList = std.SinglyLinkedList;
 6 | pub const AtomList = SinglyLinkedList(Atom);
 7 | pub const VarList = SinglyLinkedList(Variable);
 8 | 
 9 | pub const SpecialForms = enum {
10 |     def,
11 |     @"if",
12 |     @"fn",
13 | };
14 | 
15 | // errors rock with comptime... partly because print debugging isn't very helpful :)
16 | pub const SyntaxErrors = error{
17 |     InvalidParseToken,
18 |     FoundRParensInParse,
19 |     EndOfTokenList,
20 |     ParsingUnreachable,
21 |     NoFunctionFound,
22 |     CannotApplyFunction,
23 |     IndexEqualTokenCount,
24 |     CannotFindKeyInEnvs,
25 |     UserFunctionParameterArgumentLengthMismatch,
26 |     InvalidFunctionArgsOrBody,
27 |     NoDefKeyword,
28 |     NoDefinedValue,
29 |     NoFunctionShouldBeHere,
30 |     SomethingFellThroughTheEvalCracks,
31 |     KeyDisappearedAfterFinding,
32 | };
33 | 
34 | pub const Variable = struct {
35 |     name: []const u8,
36 |     value: Value,
37 | };
38 | 
39 | pub const Value = union(enum) {
40 |     atom: Atom,
41 |     func: Func,
42 | };
43 | 
44 | /// this is a bad name for what it represents... I'm just not sure what's better... "Type" seems
45 | /// worse. This is the "basetype" (too long of a name) for this toy language
46 | pub const Atom = union(enum) {
47 |     number: f64,
48 |     list: AtomList,
49 |     keyword: []const u8,
50 |     function: *const Function,
51 | };
52 | 
53 | /// Union of function pointer types with different numbers of input parameters. Not sure of a better
54 | /// way to do this -> I took inspiration from the MAL implementation on:
55 | /// github.com/kanaka/mal/tree/master/impls/zig
56 | /// also, func can't be an atom, as it would depend on atom
57 | pub const Func = union(enum) {
58 |     funcZero: *const fn () Atom,
59 |     funcOne: *const fn (first: Atom) Atom,
60 |     funcTwo: *const fn (first: Atom, second: Atom) Atom,
61 |     funcUser: *const Function,
62 | };
63 | 
64 | pub const Function = struct {
65 |     args: AtomList,
66 |     body: AtomList,
67 |     env: *Env,
68 | };
69 | 


--------------------------------------------------------------------------------