├── Makefile ├── builtin.c ├── data.c ├── eval.c ├── html ├── arithmetic.html ├── booleans.html ├── builtins.html ├── continuations.html ├── data.html ├── expressions.html ├── gc.html ├── index.html ├── intro.html ├── lambda.html ├── library.html ├── macros.html ├── next.html ├── parser.html ├── quasiquotation.html ├── style.css ├── sugar.html └── variadics.html ├── library.lisp ├── lisp.h ├── main.c ├── print.c └── read.c /Makefile: -------------------------------------------------------------------------------- 1 | sources=$(wildcard *.c) 2 | 3 | CFLAGS=-Wall -O0 -g --std=c99 -D_GNU_SOURCE 4 | LDFLAGS=-lreadline 5 | 6 | objects=$(sources:.c=.o) 7 | 8 | lisp: $(objects) 9 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | $(objects): $(wildcard *.h) 12 | 13 | .PHONY: clean 14 | clean: 15 | $(RM) *.o lisp 16 | 17 | -------------------------------------------------------------------------------- /builtin.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | 3 | int builtin_car(Atom args, Atom *result) 4 | { 5 | if (nilp(args) || !nilp(cdr(args))) 6 | return Error_Args; 7 | 8 | if (nilp(car(args))) 9 | *result = nil; 10 | else if (car(args).type != AtomType_Pair) 11 | return Error_Type; 12 | else 13 | *result = car(car(args)); 14 | 15 | return Error_OK; 16 | } 17 | 18 | int builtin_cdr(Atom args, Atom *result) 19 | { 20 | if (nilp(args) || !nilp(cdr(args))) 21 | return Error_Args; 22 | 23 | if (nilp(car(args))) 24 | *result = nil; 25 | else if (car(args).type != AtomType_Pair) 26 | return Error_Type; 27 | else 28 | *result = cdr(car(args)); 29 | 30 | return Error_OK; 31 | } 32 | 33 | int builtin_cons(Atom args, Atom *result) 34 | { 35 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 36 | return Error_Args; 37 | 38 | *result = cons(car(args), car(cdr(args))); 39 | 40 | return Error_OK; 41 | } 42 | 43 | int builtin_eq(Atom args, Atom *result) 44 | { 45 | Atom a, b; 46 | int eq; 47 | 48 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 49 | return Error_Args; 50 | 51 | a = car(args); 52 | b = car(cdr(args)); 53 | 54 | if (a.type == b.type) { 55 | switch (a.type) { 56 | case AtomType_Nil: 57 | eq = 1; 58 | break; 59 | case AtomType_Pair: 60 | case AtomType_Closure: 61 | case AtomType_Macro: 62 | eq = (a.value.pair == b.value.pair); 63 | break; 64 | case AtomType_Symbol: 65 | eq = (a.value.symbol == b.value.symbol); 66 | break; 67 | case AtomType_Integer: 68 | eq = (a.value.integer == b.value.integer); 69 | break; 70 | case AtomType_Builtin: 71 | eq = (a.value.builtin == b.value.builtin); 72 | break; 73 | } 74 | } else { 75 | eq = 0; 76 | } 77 | 78 | *result = eq ? make_sym("T") : nil; 79 | return Error_OK; 80 | } 81 | 82 | int builtin_pairp(Atom args, Atom *result) 83 | { 84 | if (nilp(args) || !nilp(cdr(args))) 85 | return Error_Args; 86 | 87 | *result = (car(args).type == AtomType_Pair) ? make_sym("T") : nil; 88 | return Error_OK; 89 | } 90 | 91 | int builtin_procp(Atom args, Atom *result) 92 | { 93 | if (nilp(args) || !nilp(cdr(args))) 94 | return Error_Args; 95 | 96 | *result = (car(args).type == AtomType_Builtin 97 | || car(args).type == AtomType_Closure) ? make_sym("T") : nil; 98 | return Error_OK; 99 | } 100 | 101 | int builtin_add(Atom args, Atom *result) 102 | { 103 | Atom a, b; 104 | 105 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 106 | return Error_Args; 107 | 108 | a = car(args); 109 | b = car(cdr(args)); 110 | 111 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 112 | return Error_Type; 113 | 114 | *result = make_int(a.value.integer + b.value.integer); 115 | 116 | return Error_OK; 117 | } 118 | 119 | int builtin_subtract(Atom args, Atom *result) 120 | { 121 | Atom a, b; 122 | 123 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 124 | return Error_Args; 125 | 126 | a = car(args); 127 | b = car(cdr(args)); 128 | 129 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 130 | return Error_Type; 131 | 132 | *result = make_int(a.value.integer - b.value.integer); 133 | 134 | return Error_OK; 135 | } 136 | 137 | int builtin_multiply(Atom args, Atom *result) 138 | { 139 | Atom a, b; 140 | 141 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 142 | return Error_Args; 143 | 144 | a = car(args); 145 | b = car(cdr(args)); 146 | 147 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 148 | return Error_Type; 149 | 150 | *result = make_int(a.value.integer * b.value.integer); 151 | 152 | return Error_OK; 153 | } 154 | 155 | int builtin_divide(Atom args, Atom *result) 156 | { 157 | Atom a, b; 158 | 159 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 160 | return Error_Args; 161 | 162 | a = car(args); 163 | b = car(cdr(args)); 164 | 165 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 166 | return Error_Type; 167 | 168 | *result = make_int(a.value.integer / b.value.integer); 169 | 170 | return Error_OK; 171 | } 172 | 173 | int builtin_numeq(Atom args, Atom *result) 174 | { 175 | Atom a, b; 176 | 177 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 178 | return Error_Args; 179 | 180 | a = car(args); 181 | b = car(cdr(args)); 182 | 183 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 184 | return Error_Type; 185 | 186 | *result = (a.value.integer == b.value.integer) ? make_sym("T") : nil; 187 | 188 | return Error_OK; 189 | } 190 | 191 | int builtin_less(Atom args, Atom *result) 192 | { 193 | Atom a, b; 194 | 195 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 196 | return Error_Args; 197 | 198 | a = car(args); 199 | b = car(cdr(args)); 200 | 201 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 202 | return Error_Type; 203 | 204 | *result = (a.value.integer < b.value.integer) ? make_sym("T") : nil; 205 | 206 | return Error_OK; 207 | } 208 | 209 | -------------------------------------------------------------------------------- /data.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | struct Allocation { 8 | struct Pair pair; 9 | int mark : 1; 10 | struct Allocation *next; 11 | }; 12 | 13 | struct Allocation *global_allocations = NULL; 14 | 15 | Atom cons(Atom car_val, Atom cdr_val) 16 | { 17 | struct Allocation *a; 18 | Atom p; 19 | 20 | a = malloc(sizeof(struct Allocation)); 21 | a->mark = 0; 22 | a->next = global_allocations; 23 | global_allocations = a; 24 | 25 | p.type = AtomType_Pair; 26 | p.value.pair = &a->pair; 27 | 28 | car(p) = car_val; 29 | cdr(p) = cdr_val; 30 | 31 | return p; 32 | } 33 | 34 | Atom make_int(long x) 35 | { 36 | Atom a; 37 | a.type = AtomType_Integer; 38 | a.value.integer = x; 39 | return a; 40 | } 41 | 42 | static Atom sym_table = { AtomType_Nil }; 43 | 44 | Atom make_sym(const char *s) 45 | { 46 | Atom a, p; 47 | 48 | p = sym_table; 49 | while (!nilp(p)) { 50 | a = car(p); 51 | if (strcmp(a.value.symbol, s) == 0) 52 | return a; 53 | p = cdr(p); 54 | } 55 | 56 | a.type = AtomType_Symbol; 57 | a.value.symbol = strdup(s); 58 | sym_table = cons(a, sym_table); 59 | 60 | return a; 61 | } 62 | 63 | Atom make_builtin(Builtin fn) 64 | { 65 | Atom a; 66 | a.type = AtomType_Builtin; 67 | a.value.builtin = fn; 68 | return a; 69 | } 70 | 71 | int listp(Atom expr) 72 | { 73 | while (!nilp(expr)) { 74 | if (expr.type != AtomType_Pair) 75 | return 0; 76 | expr = cdr(expr); 77 | } 78 | return 1; 79 | } 80 | 81 | Atom copy_list(Atom list) 82 | { 83 | Atom a, p; 84 | 85 | if (nilp(list)) 86 | return nil; 87 | 88 | a = cons(car(list), nil); 89 | p = a; 90 | list = cdr(list); 91 | 92 | while (!nilp(list)) { 93 | cdr(p) = cons(car(list), nil); 94 | p = cdr(p); 95 | list = cdr(list); 96 | } 97 | 98 | return a; 99 | } 100 | 101 | Atom list_create(int n, ...) 102 | { 103 | va_list ap; 104 | Atom list = nil; 105 | 106 | va_start(ap, n); 107 | while (n--) { 108 | Atom item = va_arg(ap, Atom); 109 | list = cons(item, list); 110 | } 111 | va_end(ap); 112 | 113 | list_reverse(&list); 114 | return list; 115 | } 116 | 117 | Atom list_get(Atom list, int k) 118 | { 119 | while (k--) 120 | list = cdr(list); 121 | return car(list); 122 | } 123 | 124 | void list_set(Atom list, int k, Atom value) 125 | { 126 | while (k--) 127 | list = cdr(list); 128 | car(list) = value; 129 | } 130 | 131 | void list_reverse(Atom *list) 132 | { 133 | Atom tail = nil; 134 | while (!nilp(*list)) { 135 | Atom p = cdr(*list); 136 | cdr(*list) = tail; 137 | tail = *list; 138 | *list = p; 139 | } 140 | *list = tail; 141 | } 142 | 143 | void gc_mark(Atom root) 144 | { 145 | struct Allocation *a; 146 | 147 | if (!(root.type == AtomType_Pair 148 | || root.type == AtomType_Closure 149 | || root.type == AtomType_Macro)) 150 | return; 151 | 152 | a = (struct Allocation *) 153 | ((char *) root.value.pair 154 | - offsetof(struct Allocation, pair)); 155 | 156 | if (a->mark) 157 | return; 158 | 159 | a->mark = 1; 160 | 161 | gc_mark(car(root)); 162 | gc_mark(cdr(root)); 163 | } 164 | 165 | void gc() 166 | { 167 | struct Allocation *a, **p; 168 | 169 | gc_mark(sym_table); 170 | 171 | /* Free unmarked allocations */ 172 | p = &global_allocations; 173 | while (*p != NULL) { 174 | a = *p; 175 | if (!a->mark) { 176 | *p = a->next; 177 | free(a); 178 | } else { 179 | p = &a->next; 180 | } 181 | } 182 | 183 | /* Clear marks */ 184 | a = global_allocations; 185 | while (a != NULL) { 186 | a->mark = 0; 187 | a = a->next; 188 | } 189 | } 190 | 191 | -------------------------------------------------------------------------------- /eval.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | #include 3 | 4 | Atom env_create(Atom parent) 5 | { 6 | return cons(parent, nil); 7 | } 8 | 9 | int env_define(Atom env, Atom symbol, Atom value) 10 | { 11 | Atom bs = cdr(env); 12 | 13 | while (!nilp(bs)) { 14 | Atom b = car(bs); 15 | if (car(b).value.symbol == symbol.value.symbol) { 16 | cdr(b) = value; 17 | return Error_OK; 18 | } 19 | bs = cdr(bs); 20 | } 21 | 22 | cdr(env) = cons(cons(symbol, value), cdr(env)); 23 | 24 | return Error_OK; 25 | } 26 | 27 | int env_get(Atom env, Atom symbol, Atom *result) 28 | { 29 | Atom parent = car(env); 30 | Atom bs = cdr(env); 31 | 32 | while (!nilp(bs)) { 33 | Atom b = car(bs); 34 | if (car(b).value.symbol == symbol.value.symbol) { 35 | *result = cdr(b); 36 | return Error_OK; 37 | } 38 | bs = cdr(bs); 39 | } 40 | 41 | if (nilp(parent)) 42 | return Error_Unbound; 43 | 44 | return env_get(parent, symbol, result); 45 | } 46 | 47 | int env_set(Atom env, Atom symbol, Atom value) 48 | { 49 | Atom parent = car(env); 50 | Atom bs = cdr(env); 51 | 52 | while (!nilp(bs)) { 53 | Atom b = car(bs); 54 | if (car(b).value.symbol == symbol.value.symbol) { 55 | cdr(b) = value; 56 | return Error_OK; 57 | } 58 | bs = cdr(bs); 59 | } 60 | 61 | if (nilp(parent)) 62 | return Error_Unbound; 63 | 64 | return env_set(parent, symbol, value); 65 | } 66 | 67 | int make_closure(Atom env, Atom args, Atom body, Atom *result) 68 | { 69 | Atom p; 70 | 71 | if (!listp(body)) 72 | return Error_Syntax; 73 | 74 | /* Check argument names are all symbols */ 75 | p = args; 76 | while (!nilp(p)) { 77 | if (p.type == AtomType_Symbol) 78 | break; 79 | else if (p.type != AtomType_Pair 80 | || car(p).type != AtomType_Symbol) 81 | return Error_Type; 82 | p = cdr(p); 83 | } 84 | 85 | *result = cons(env, cons(args, body)); 86 | result->type = AtomType_Closure; 87 | 88 | return Error_OK; 89 | } 90 | 91 | Atom make_frame(Atom parent, Atom env, Atom tail) 92 | { 93 | return cons(parent, 94 | cons(env, 95 | cons(nil, /* op */ 96 | cons(tail, 97 | cons(nil, /* args */ 98 | cons(nil, /* body */ 99 | nil)))))); 100 | } 101 | 102 | int eval_do_exec(Atom *stack, Atom *expr, Atom *env) 103 | { 104 | Atom body; 105 | 106 | *env = list_get(*stack, 1); 107 | body = list_get(*stack, 5); 108 | *expr = car(body); 109 | body = cdr(body); 110 | if (nilp(body)) { 111 | /* Finished function; pop the stack */ 112 | *stack = car(*stack); 113 | } else { 114 | list_set(*stack, 5, body); 115 | } 116 | 117 | return Error_OK; 118 | } 119 | 120 | int eval_do_bind(Atom *stack, Atom *expr, Atom *env) 121 | { 122 | Atom op, args, arg_names, body; 123 | 124 | body = list_get(*stack, 5); 125 | if (!nilp(body)) 126 | return eval_do_exec(stack, expr, env); 127 | 128 | op = list_get(*stack, 2); 129 | args = list_get(*stack, 4); 130 | 131 | *env = env_create(car(op)); 132 | arg_names = car(cdr(op)); 133 | body = cdr(cdr(op)); 134 | list_set(*stack, 1, *env); 135 | list_set(*stack, 5, body); 136 | 137 | /* Bind the arguments */ 138 | while (!nilp(arg_names)) { 139 | if (arg_names.type == AtomType_Symbol) { 140 | env_define(*env, arg_names, args); 141 | args = nil; 142 | break; 143 | } 144 | 145 | if (nilp(args)) 146 | return Error_Args; 147 | env_define(*env, car(arg_names), car(args)); 148 | arg_names = cdr(arg_names); 149 | args = cdr(args); 150 | } 151 | if (!nilp(args)) 152 | return Error_Args; 153 | 154 | list_set(*stack, 4, nil); 155 | 156 | return eval_do_exec(stack, expr, env); 157 | } 158 | 159 | int eval_do_apply(Atom *stack, Atom *expr, Atom *env, Atom *result) 160 | { 161 | Atom op, args; 162 | 163 | op = list_get(*stack, 2); 164 | args = list_get(*stack, 4); 165 | 166 | if (!nilp(args)) { 167 | list_reverse(&args); 168 | list_set(*stack, 4, args); 169 | } 170 | 171 | if (op.type == AtomType_Symbol) { 172 | if (strcmp(op.value.symbol, "APPLY") == 0) { 173 | /* Replace the current frame */ 174 | *stack = car(*stack); 175 | *stack = make_frame(*stack, *env, nil); 176 | op = car(args); 177 | args = car(cdr(args)); 178 | if (!listp(args)) 179 | return Error_Syntax; 180 | 181 | list_set(*stack, 2, op); 182 | list_set(*stack, 4, args); 183 | } 184 | } 185 | 186 | if (op.type == AtomType_Builtin) { 187 | *stack = car(*stack); 188 | *expr = cons(op, args); 189 | return Error_OK; 190 | } else if (op.type != AtomType_Closure) { 191 | return Error_Type; 192 | } 193 | 194 | return eval_do_bind(stack, expr, env); 195 | } 196 | 197 | int eval_do_return(Atom *stack, Atom *expr, Atom *env, Atom *result) 198 | { 199 | Atom op, args, body; 200 | 201 | *env = list_get(*stack, 1); 202 | op = list_get(*stack, 2); 203 | body = list_get(*stack, 5); 204 | 205 | if (!nilp(body)) { 206 | /* Still running a procedure; ignore the result */ 207 | return eval_do_apply(stack, expr, env, result); 208 | } 209 | 210 | if (nilp(op)) { 211 | /* Finished evaluating operator */ 212 | op = *result; 213 | list_set(*stack, 2, op); 214 | 215 | if (op.type == AtomType_Macro) { 216 | /* Don't evaluate macro arguments */ 217 | args = list_get(*stack, 3); 218 | *stack = make_frame(*stack, *env, nil); 219 | op.type = AtomType_Closure; 220 | list_set(*stack, 2, op); 221 | list_set(*stack, 4, args); 222 | return eval_do_bind(stack, expr, env); 223 | } 224 | } else if (op.type == AtomType_Symbol) { 225 | /* Finished working on special form */ 226 | if (strcmp(op.value.symbol, "DEFINE") == 0) { 227 | Atom sym = list_get(*stack, 4); 228 | (void) env_define(*env, sym, *result); 229 | *stack = car(*stack); 230 | *expr = cons(make_sym("QUOTE"), cons(sym, nil)); 231 | return Error_OK; 232 | } else if (strcmp(op.value.symbol, "SET!") == 0) { 233 | Atom sym = list_get(*stack, 4); 234 | *stack = car(*stack); 235 | *expr = cons(make_sym("QUOTE"), cons(sym, nil)); 236 | return env_set(*env, sym, *result); 237 | } else if (strcmp(op.value.symbol, "IF") == 0) { 238 | args = list_get(*stack, 3); 239 | *expr = nilp(*result) ? car(cdr(args)) : car(args); 240 | *stack = car(*stack); 241 | return Error_OK; 242 | } else { 243 | goto store_arg; 244 | } 245 | } else if (op.type == AtomType_Macro) { 246 | /* Finished evaluating macro */ 247 | *expr = *result; 248 | *stack = car(*stack); 249 | return Error_OK; 250 | } else { 251 | store_arg: 252 | /* Store evaluated argument */ 253 | args = list_get(*stack, 4); 254 | list_set(*stack, 4, cons(*result, args)); 255 | } 256 | 257 | args = list_get(*stack, 3); 258 | if (nilp(args)) { 259 | /* No more arguments left to evaluate */ 260 | return eval_do_apply(stack, expr, env, result); 261 | } 262 | 263 | /* Evaluate next argument */ 264 | *expr = car(args); 265 | list_set(*stack, 3, cdr(args)); 266 | return Error_OK; 267 | } 268 | 269 | int eval_expr(Atom expr, Atom env, Atom *result) 270 | { 271 | static int count = 0; 272 | Error err = Error_OK; 273 | Atom stack = nil; 274 | 275 | do { 276 | if (++count == 100000) { 277 | gc_mark(expr); 278 | gc_mark(env); 279 | gc_mark(stack); 280 | gc(); 281 | count = 0; 282 | } 283 | 284 | if (expr.type == AtomType_Symbol) { 285 | err = env_get(env, expr, result); 286 | } else if (expr.type != AtomType_Pair) { 287 | *result = expr; 288 | } else if (!listp(expr)) { 289 | return Error_Syntax; 290 | } else { 291 | Atom op = car(expr); 292 | Atom args = cdr(expr); 293 | 294 | if (op.type == AtomType_Symbol) { 295 | /* Handle special forms */ 296 | 297 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 298 | if (nilp(args) || !nilp(cdr(args))) 299 | return Error_Args; 300 | 301 | *result = car(args); 302 | } else if (strcmp(op.value.symbol, "DEFINE") == 0) { 303 | Atom sym; 304 | 305 | if (nilp(args) || nilp(cdr(args))) 306 | return Error_Args; 307 | 308 | sym = car(args); 309 | if (sym.type == AtomType_Pair) { 310 | err = make_closure(env, cdr(sym), cdr(args), result); 311 | sym = car(sym); 312 | if (sym.type != AtomType_Symbol) 313 | return Error_Type; 314 | (void) env_define(env, sym, *result); 315 | *result = sym; 316 | } else if (sym.type == AtomType_Symbol) { 317 | if (!nilp(cdr(cdr(args)))) 318 | return Error_Args; 319 | stack = make_frame(stack, env, nil); 320 | list_set(stack, 2, op); 321 | list_set(stack, 4, sym); 322 | expr = car(cdr(args)); 323 | continue; 324 | } else { 325 | return Error_Type; 326 | } 327 | } else if (strcmp(op.value.symbol, "LAMBDA") == 0) { 328 | if (nilp(args) || nilp(cdr(args))) 329 | return Error_Args; 330 | 331 | err = make_closure(env, car(args), cdr(args), result); 332 | } else if (strcmp(op.value.symbol, "IF") == 0) { 333 | if (nilp(args) || nilp(cdr(args)) || nilp(cdr(cdr(args))) 334 | || !nilp(cdr(cdr(cdr(args))))) 335 | return Error_Args; 336 | 337 | stack = make_frame(stack, env, cdr(args)); 338 | list_set(stack, 2, op); 339 | expr = car(args); 340 | continue; 341 | } else if (strcmp(op.value.symbol, "DEFMACRO") == 0) { 342 | Atom name, macro; 343 | 344 | if (nilp(args) || nilp(cdr(args))) 345 | return Error_Args; 346 | 347 | if (car(args).type != AtomType_Pair) 348 | return Error_Syntax; 349 | 350 | name = car(car(args)); 351 | if (name.type != AtomType_Symbol) 352 | return Error_Type; 353 | 354 | err = make_closure(env, cdr(car(args)), 355 | cdr(args), ¯o); 356 | if (!err) { 357 | macro.type = AtomType_Macro; 358 | *result = name; 359 | (void) env_define(env, name, macro); 360 | } 361 | } else if (strcmp(op.value.symbol, "APPLY") == 0) { 362 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 363 | return Error_Args; 364 | 365 | stack = make_frame(stack, env, cdr(args)); 366 | list_set(stack, 2, op); 367 | expr = car(args); 368 | continue; 369 | } else if (strcmp(op.value.symbol, "SET!") == 0) { 370 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 371 | return Error_Args; 372 | if (car(args).type != AtomType_Symbol) 373 | return Error_Type; 374 | stack = make_frame(stack, env, nil); 375 | list_set(stack, 2, op); 376 | list_set(stack, 4, car(args)); 377 | expr = car(cdr(args)); 378 | continue; 379 | } else { 380 | goto push; 381 | } 382 | } else if (op.type == AtomType_Builtin) { 383 | err = (*op.value.builtin)(args, result); 384 | } else { 385 | push: 386 | /* Handle function application */ 387 | stack = make_frame(stack, env, args); 388 | expr = op; 389 | continue; 390 | } 391 | } 392 | 393 | if (nilp(stack)) 394 | break; 395 | 396 | if (!err) 397 | err = eval_do_return(&stack, &expr, &env, result); 398 | } while (!err); 399 | 400 | return err; 401 | } 402 | 403 | -------------------------------------------------------------------------------- /html/arithmetic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 6: Arithmetic 6 | 7 | 8 | 9 |

Arithmetic

10 | 11 |

12 | So far all we've been able to do is create and name objects. Some of 13 | those objects have been numbers — naturally we would like to do 14 | calculations with those numbers. 15 |

16 | 17 |

18 | In the last chapter we saw how to create built-in functions to tell 19 | eval_expr how to process arguments into a return value. 20 | We will now create four more builtins to perform the basic arithmetic 21 | operations. 22 |

23 | 24 | 25 | 26 | 27 | 29 | 30 | 32 | 33 | 35 | 36 | 38 |
ExpressionResult
(+ X Y)The sum of X and Y 28 |
(- X Y)The difference of X and Y 31 |
(* X Y)The product of X and Y 34 |
(/ X Y)The quotient of X and Y 37 |
39 | 40 |

41 | In the definitions above, when we write "the sum of X and 42 | Y", what we really mean is "the sum of the values 43 | obtained by evaluating X and Y". 44 | Remember that eval_expr will evaluate all the arguments 45 | to a functions by default; this is usually what we want to happen, so from 46 | now on we will not explicitly state this where the intent is obvious. 47 |

48 | 49 |

Implementation

50 | 51 |

52 | Once again almost all of our function consists of checking that the 53 | correct arguments were supplied. Finally the result is constructed by 54 | the call to make_int. 55 |

56 | 57 |
 58 | int builtin_add(Atom args, Atom *result)
 59 | {
 60 | 	Atom a, b;
 61 | 
 62 | 	if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
 63 | 		return Error_Args;
 64 | 
 65 | 	a = car(args);
 66 | 	b = car(cdr(args));
 67 | 
 68 | 	if (a.type != AtomType_Integer || b.type != AtomType_Integer)
 69 | 		return Error_Type;
 70 | 
 71 | 	*result = make_int(a.value.integer + b.value.integer);
 72 | 
 73 | 	return Error_OK;
 74 | }
 75 | 
76 | 77 |

78 | The other three functions differ by only one character, so I will omit 79 | them here. 80 |

81 | 82 |

83 | Finally we need to create bindings for our new functions in the initial 84 | environment: 85 |

 86 | env_set(env, make_sym("+"), make_builtin(builtin_add));
 87 | env_set(env, make_sym("-"), make_builtin(builtin_subtract));
 88 | env_set(env, make_sym("*"), make_builtin(builtin_multiply));
 89 | env_set(env, make_sym("/"), make_builtin(builtin_divide));
 90 | 
91 |

92 | 93 |

Testing

94 | 95 |

96 | We now have our very own LISP-style calculator. 97 |

98 | 99 |
100 | > (+ 1 1)
101 | 2
102 | > (define x (* 6 9))
103 | X
104 | > x
105 | 54
106 | > (- x 12)
107 | 42
108 | 
109 | 110 |

111 | In the last expression above, note that X is a symbol, not 112 | an integer. We have to evaluate the arguments so that 113 | builtin_subtract can operate on the integer value bound to 114 | X and not the symbol X itself. Similarly 115 | the value bound to X is the integer result of 116 | evaluating the expression (* 6 9). 117 |

118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /html/booleans.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 8: Booleans and short-circuit evaluation 6 | 7 | 8 | 9 |

Booleans and short-circuit evaluation

10 | 11 |

Booleans

12 | 13 |

14 | (Apologies if you are a logician and I've got this all wrong...) 15 |

16 | 17 |

18 | A boolean value is one of two classes of values which are called 19 | true and false. If we wish to interpret a value as a boolean, 20 | we consider it to be true if it is in the class of true values, 21 | and false otherwise. 22 |

23 | 24 |

Short-circuit evalutaion

25 | 26 |

27 | So far every expression we pass to eval is evaluated. With 28 | the exception of special forms such as DEFINE and 29 | LAMBDA, which store away expressions to be evaluated 30 | later, eval must walk the whole tree before returning a 31 | result. 32 |

33 | 34 |

35 | In this chapter we will define yet another special form IF, 36 | which will cause eval to choose which of two possible 37 | expressions to evaluate, and discard the other. 38 |

39 | 40 |

41 | The syntax is as follows: 42 |

 43 | (IF test true-expr false-expr)
 44 | 
45 | where test, true-expr and false-expr 46 | are arbitrary expressions. If the result of evaluating test is 47 | considered to be true, then the result of the IF-expression 48 | is the result of evaluating true-expr, otherwise it is the 49 | result of evaluating false-expr. Only one of 50 | true-expr and false-expr is evaluated; the 51 | other expression is ignored. 52 |

53 | 54 |

55 | But what kind of value is true? In our environment we will define 56 | NIL to be false. Any other value is true. 57 |

58 | 59 |

60 | Here is the code to handle IF-expressions. 61 |

62 | 63 |
 64 | int eval_expr(Atom expr, Atom env, Atom *result)
 65 | {
 66 | 	.
 67 | 	.
 68 | 	.
 69 | 	if (op.type == AtomType_Symbol) {
 70 | 		if (strcmp(op.value.symbol, "QUOTE") == 0) {
 71 | 		.
 72 | 		.
 73 | 		.
 74 | 		} else if (strcmp(op.value.symbol, "IF") == 0) {
 75 | 			Atom cond, val;
 76 | 
 77 | 			if (nilp(args) || nilp(cdr(args)) || nilp(cdr(cdr(args)))
 78 | 					|| !nilp(cdr(cdr(cdr(args)))))
 79 | 				return Error_Args;
 80 | 
 81 | 			err = eval_expr(car(args), env, &cond);
 82 | 			if (err)
 83 | 				return err;
 84 | 
 85 | 			val = nilp(cond) ? car(cdr(cdr(args))) : car(cdr(args));
 86 | 			return eval_expr(val, env, result);
 87 | 		}
 88 | 	}
 89 | 	.
 90 | 	.
 91 | 	.
 92 | }
 93 | 
94 | 95 |

96 | The argument check is getting a little unwieldy. A couple of alternatives 97 | are to modify car and cdr to return 98 | NIL if the argument is not a pair and forego the syntax 99 | check, or to create a helper function to count the list length. It won't 100 | get any worse than this, though — so let's not waste time on it. 101 |

102 | 103 |

104 | Traditionally LISP functions return the symbol T if they 105 | need to return a boolean value and there is no obvious object available. 106 | T is bound to itself, so evaluating it returns the symbol 107 | T again. A symbol is not NIL, and so is 108 | true. 109 |

110 | 111 |

112 | Add a binding for T to the initial environment: 113 |

114 | env_set(env, make_sym("T"), make_sym("T"));
115 | 
116 | Remember that make_sym will return the same 117 | symbol object if it is called multiple times with identical strings. 118 |

119 | 120 |

Testing

121 | 122 |
123 | > (if t 3 4)
124 | 3
125 | > (if nil 3 4)
126 | 4
127 | > (if 0 t nil)
128 | T
129 | 
130 | 131 |

132 | Unlike C, zero is true, not false. 133 |

134 | 135 |

Predicates

136 | 137 |

138 | While we could stop here, it would be useful to make some tests other 139 | than "is it NIL". This is where predicates come in. 140 | A predicate is a function which returns a true/false value according to 141 | some condition. 142 |

143 | 144 |

145 | We will define two built-in predicates, "=" which tests for 146 | numerical equality, and "<" which tests if one number 147 | is less than another. 148 |

149 | 150 |

151 | The functions are similar to our other numerical built-ins. 152 |

153 | 154 |
155 | int builtin_numeq(Atom args, Atom *result)
156 | {
157 | 	Atom a, b;
158 | 
159 | 	if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
160 | 		return Error_Args;
161 | 
162 | 	a = car(args);
163 | 	b = car(cdr(args));
164 | 
165 | 	if (a.type != AtomType_Integer || b.type != AtomType_Integer)
166 | 		return Error_Type;
167 | 
168 | 	*result = (a.value.integer == b.value.integer) ? make_sym("T") : nil;
169 | 
170 | 	return Error_OK;
171 | }
172 | 
173 | 174 |

175 | builtin_less follows the same pattern and is not shown here. 176 |

177 | 178 |

179 | Finally we must add them to the initial environment. 180 |

181 | 182 |
183 | env_set(env, make_sym("="), make_builtin(builtin_numeq));
184 | env_set(env, make_sym("<"), make_builtin(builtin_less));
185 | 
186 | 187 |

Testing

188 | 189 |
190 | > (= 3 3)
191 | T
192 | > (< 11 4)
193 | NIL
194 | 
195 | 196 |

197 | Barring memory and stack limitations, our LISP environment is now 198 | Turing-complete! If you have been entering the code as we go along, you 199 | can confirm that we have implemented the core of a usable programming 200 | language in well under 1,000 lines of C code. 201 |

202 | 203 |

204 | A classic demonstration: 205 |

206 | > (define fact
207 |     (lambda (x)
208 |       (if (= x 0)
209 |         1
210 |         (* x (fact (- x 1))))))
211 | FACT
212 | > (fact 10)
213 | 3628800
214 | 
215 | I have cheated a little here: the REPL does not allow the user to enter 216 | multi-line expressions, so you must enter the definition for 217 | fact all on one line. 218 |

219 | 220 |

221 | There is more to do yet, though. LISP has other features which make it 222 | possible to express some really interesting stuff, and there are a few loose 223 | ends to tidy up as well. 224 |

225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /html/builtins.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 5: Built-in functions 6 | 7 | 8 | 9 |

Built-in functions

10 | 11 |

12 | So far in our implementation, we have made use of the functions 13 | car, cdr and cons to construct 14 | and access LISP data. Now, we will make the same functionality 15 | available within the interpreted environment. 16 |

17 | 18 |

19 | We shall extend the list expression syntax to add some new operators: 20 |

21 |
(CAR EXPR) 22 |
Evaluates EXPR and returns the car of the 23 | result. It is an error if EXPR does not evaluate to a 24 | pair or NIL.
25 |
(CDR EXPR) 26 |
Evaluates EXPR and returns the cdr of the 27 | result. It is an error if EXPR does not evaluate to a 28 | pair or NIL.
29 |
(CONS A B) 30 |
Evaluates both arguments A and B, 31 | and returns a newly constructed pair containing the results.
32 |
33 |

34 | 35 |

36 | In the definitions above we allow taking the car and cdr of 37 | NIL, unlike our C versions. Some algorithms are simpler to 38 | express if the car and cdr of NIL are defined 39 | to be NIL. 40 |

41 | 42 |

43 | We could choose to implement these by adding more special cases 44 | to eval_expr, just like we did with QUOTE 45 | and DEFINE. However, we will want to add more operators 46 | in the future — and adding each one to eval_expr 47 | would cause the function to get very long. The alternative is to introduce 48 | the concept of functions. 49 |

50 | 51 |

Functions

52 | 53 |

54 | A function is a recipe for converting arguments into a value. If 55 | eval_expr encounters a list expression with a function 56 | as the operator, all it has to do is follow the recipe to come up with 57 | a value to use as the result of the expression. 58 |

59 | 60 |

61 | One way to implement these recipes is to create C functions which can 62 | be called from eval_expr. We will call these built-in 63 | or primitive functions. Let's see how to extend our LISP 64 | interpreter to accommodate these. 65 |

66 | 67 |

A new type of atom

68 | 69 |

70 | eval_expr will call built-in functions through a C function 71 | pointer, so they must all have the same prototype: 72 |

 73 | typedef int (*Builtin)(struct Atom args, struct Atom *result);
 74 | 
75 |

76 | 77 |

78 | In order to appear in expressions, we need a new kind of atom to 79 | represent them. 80 |

 81 | struct Atom {
 82 | 	enum {
 83 | 		.
 84 | 		.
 85 | 		.
 86 | 		AtomType_Builtin
 87 | 	} type;
 88 | 
 89 | 	union {
 90 | 		.
 91 | 		.
 92 | 		.
 93 | 		Builtin builtin;
 94 | 	} value;
 95 | };
 96 | 
97 | Sections of code which we wrote previously are abbreviated as 98 | ". . .". 99 |

100 | 101 |

102 | For completeness, print_expr needs to know how to display 103 | the new atom: 104 |

105 | void print_expr(Atom atom)
106 | {
107 | 	switch (atom.type) {
108 | 	.
109 | 	.
110 | 	.
111 | 	case AtomType_Builtin:
112 | 		printf("#<BUILTIN:%p>", atom.value.builtin);
113 | 		break;
114 | 	}
115 | }
116 | 
117 |

118 | 119 |

120 | And finally a helper function to create atoms of the new type: 121 |

122 | Atom make_builtin(Builtin fn)
123 | {
124 | 	Atom a;
125 | 	a.type = AtomType_Builtin;
126 | 	a.value.builtin = fn;
127 | 	return a;
128 | }
129 | 
130 |

131 | 132 |

Extending the evaluator

133 | 134 |

135 | We will need to create a shallow copy of the argument 136 | list. 137 |

138 | 139 |
140 | Atom copy_list(Atom list)
141 | {
142 | 	Atom a, p;
143 | 
144 | 	if (nilp(list))
145 | 		return nil;
146 | 
147 | 	a = cons(car(list), nil);
148 | 	p = a;
149 | 	list = cdr(list);
150 | 
151 | 	while (!nilp(list)) {
152 | 		cdr(p) = cons(car(list), nil);
153 | 		p = cdr(p);
154 | 		list = cdr(list);
155 | 	}
156 | 
157 | 	return a;
158 | }
159 | 
160 | 161 |

162 | apply simply calls the builtin function with a supplied 163 | list of arguments. We will extend this function later when we 164 | want to deal with other kinds of evaluation recipe. 165 |

166 | 167 |
168 | int apply(Atom fn, Atom args, Atom *result)
169 | {
170 | 	if (fn.type == AtomType_Builtin)
171 | 		return (*fn.value.builtin)(args, result);
172 | 
173 | 	return Error_Type;
174 | }
175 | 
176 | 177 |

178 | If a list expression is not one of the special forms we defined 179 | previously, then we will assume that the operator is something which 180 | evaluates to a function. We will also evaluate each of the arguments, 181 | and use apply to call that function with the list of 182 | results. 183 |

184 | 185 |
186 | int eval_expr(Atom expr, Atom env, Atom *result)
187 | {
188 | 	Atom op, args, p;
189 | 	Error err;
190 | 
191 | 	.
192 | 	.
193 | 	.
194 | 
195 | 	if (op.type == AtomType_Symbol) {
196 | 		.
197 | 		.
198 | 		.
199 | 	}
200 | 
201 | 	/* Evaluate operator */
202 | 	err = eval_expr(op, env, &op);
203 | 	if (err)
204 | 		return err;
205 | 
206 | 	/* Evaulate arguments */
207 | 	args = copy_list(args);
208 | 	p = args;
209 | 	while (!nilp(p)) {
210 | 		err = eval_expr(car(p), env, &car(p));
211 | 		if (err)
212 | 			return err;
213 | 
214 | 		p = cdr(p);
215 | 	}
216 | 
217 | 	return apply(op, args, result);
218 | }
219 | 
220 | 221 |

222 | The argument list is copied before being overwritten with the results of 223 | evaluating the arguments. We don't want to overwrite the original 224 | argument list in case we need to use the form again in the future. 225 |

226 | 227 |

Initial environment

228 | 229 |

230 | Previously we created an empty environment for the read-eval-print loop. 231 | The user has no way of creating atoms which represent builtin functions, 232 | so we populate the initial environment with bindings for our builtins. 233 |

234 | 235 |

236 | The functions themselves: 237 |

238 | 239 |
240 | int builtin_car(Atom args, Atom *result)
241 | {
242 | 	if (nilp(args) || !nilp(cdr(args)))
243 | 		return Error_Args;
244 | 
245 | 	if (nilp(car(args)))
246 | 		*result = nil;
247 | 	else if (car(args).type != AtomType_Pair)
248 | 		return Error_Type;
249 | 	else
250 | 		*result = car(car(args));
251 | 
252 | 	return Error_OK;
253 | }
254 | 
255 | 256 |

257 | Almost all of the function is code to deal with errors and type checking! 258 | Creating functions in this way is pretty tedious. 259 |

260 | 261 |
262 | int builtin_cdr(Atom args, Atom *result)
263 | {
264 | 	if (nilp(args) || !nilp(cdr(args)))
265 | 		return Error_Args;
266 | 
267 | 	if (nilp(car(args)))
268 | 		*result = nil;
269 | 	else if (car(args).type != AtomType_Pair)
270 | 		return Error_Type;
271 | 	else
272 | 		*result = cdr(car(args));
273 | 
274 | 	return Error_OK;
275 | }
276 | 
277 | 278 |

279 | builtin_cdr is almost identical to builtin_car. 280 |

281 | 282 |
283 | int builtin_cons(Atom args, Atom *result)
284 | {
285 | 	if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
286 | 		return Error_Args;
287 | 
288 | 	*result = cons(car(args), car(cdr(args)));
289 | 
290 | 	return Error_OK;
291 | }
292 | 
293 | 294 |

295 | With these defined, we can at last use env_set to create 296 | the bindings. 297 |

298 | 299 |
300 | int main(int argc, char **argv)
301 | {
302 | 	Atom env;
303 | 	char *input;
304 | 
305 | 	env = env_create(nil);
306 | 
307 | 	/* Set up the initial environment */
308 | 	env_set(env, make_sym("CAR"), make_builtin(builtin_car));
309 | 	env_set(env, make_sym("CDR"), make_builtin(builtin_cdr));
310 | 	env_set(env, make_sym("CONS"), make_builtin(builtin_cons));
311 | 
312 | 	while ((input = readline("> ")) != NULL) {
313 | 		.
314 | 		.
315 | 		.
316 | 	}
317 | 
318 | 	return 0;
319 | }
320 | 
321 | 322 |

Testing

323 | 324 |
325 | > (define foo 1)
326 | FOO
327 | > (define bar 2)
328 | BAR
329 | > (cons foo bar)
330 | (1 . 2)
331 | > (define baz (quote (a b c)))
332 | BAZ
333 | > (car baz)
334 | A
335 | > (cdr baz)
336 | (B C)
337 | 
338 | 339 |

340 | Notice that (CONS FOO BAR) is not the same as 341 | (QUOTE (FOO . BAR)). In the former expression, the arguments 342 | are evaluated and a new pair is created. 343 |

344 | 345 | 346 | 347 | 348 | -------------------------------------------------------------------------------- /html/continuations.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 14: Continuations and tail recursion 6 | 7 | 8 | 9 | 10 | NOTE The implementation of eval_expr 11 | and the design of the stack in this chapter are rather ad-hoc, and 12 | I'm not particularly proud of them. Please skip to the next chapter 13 | if they offend you. 14 | 15 | 16 |

Continuations and tail recursion

17 | 18 |

19 | Our eval_expr function has been implemented recursively 20 | — that is to say, when in the course of evaluating an expression 21 | it is necessary to evaluate a sub-expression, eval_expr 22 | calls itself to obtain the result. 23 |

24 | 25 |

26 | This works fairly well, and is easy to follow, but the depth of 27 | recursion in our LISP environment is limited by the stack size of the 28 | interpreter. LISP code traditionally makes heavy use of recursion, 29 | and we would like to support this up to the limit of available memory. 30 |

31 | 32 |

33 | Take the following pathological example: 34 |

 35 | (define (count n)
 36 |   (if (= n 0)
 37 |       0
 38 |       (+ 1 (count (- n 1)))))
 39 | 
40 |

41 | 42 |

43 | The COUNT function will recurse to depth n 44 | and return the sum of n ones. Expressions such as 45 | (COUNT 10) should compute OK with our current interpreter, 46 | but even (COUNT 10000) is enough to cause a stack overflow 47 | on my machine. 48 |

49 | 50 |

51 | To achieve this we will rewrite eval_expr as a loop, with 52 | helper functions to keep track of evaluations in progress and return 53 | the next expression to be evaluated. When there are no more expressions 54 | left, eval_expr can return the final result to the caller. 55 |

56 | 57 |

58 | As eval_expr works through the tree of expressions, we will 59 | keep track of arguments evaluated and pending evaluation in a series of 60 | frames, linked together to form a stack. This 61 | is broadly the same way that the compiled version of the recursive 62 | eval_expr works; in this case we are replacing the machine 63 | code stack with a LISP data structure and manipulating it explicitly. 64 |

65 | 66 |

67 | The stack can also be thought of as representing the future of the 68 | computation once the present expression has been evaluated. In this 69 | sense it is referred to as the current continuation. 70 |

71 | 72 |

73 | Since any function which is called by eval_expr may not 74 | call eval_expr (to avoid recursion), we must integrate 75 | apply and builtin_apply into the body of 76 | eval_expr. 77 |

78 | 79 |

Implementation

80 | 81 |

82 | A stack frame has the following form. 83 |

84 | 85 |
 86 | (parent env evaluated-op (pending-arg...) (evaluated-arg...) (body...))
 87 | 
88 | 89 |

90 | parent is the stack frame corresponding to the parent 91 | expression (that is, the one which is waiting for the result of the 92 | current expression). env is the current environment, 93 | evaluated-op is the evaluated operator, and 94 | pending-arg... and evaluated-arg are the 95 | arguments pending and following evaluation respectively. 96 | body... are the expressions in the function body 97 | which are pending execution. 98 |

99 | 100 |

101 | Rather than writing out long lists of car() and 102 | cdr(), we will define some helper functions to 103 | manipulate members of a list. 104 |

105 | 106 |
107 | Atom list_get(Atom list, int k)
108 | {
109 | 	while (k--)
110 | 		list = cdr(list);
111 | 	return car(list);
112 | }
113 | 
114 | void list_set(Atom list, int k, Atom value)
115 | {
116 | 	while (k--)
117 | 		list = cdr(list);
118 | 	car(list) = value;
119 | }
120 | 
121 | void list_reverse(Atom *list)
122 | {
123 | 	Atom tail = nil;
124 | 	while (!nilp(*list)) {
125 | 		Atom p = cdr(*list);
126 | 		cdr(*list) = tail;
127 | 		tail = *list;
128 | 		*list = p;
129 | 	}
130 | 	*list = tail;
131 | }
132 | 
133 | 134 |

135 | Another function creates a new stack frame ready to start evaluating a 136 | new function call, with the specified parent, environment and list of 137 | arguments pending evaluation (the tail). 138 |

139 | 140 |
141 | Atom make_frame(Atom parent, Atom env, Atom tail)
142 | {
143 | 	return cons(parent,
144 | 		cons(env,
145 | 		cons(nil, /* op */
146 | 		cons(tail,
147 | 		cons(nil, /* args */
148 | 		cons(nil, /* body */
149 | 		nil))))));
150 | }
151 | 
152 | 153 |

154 | Here is the innermost part of our new exec_expr, 155 | which sets expr to the next part of the function 156 | body, and pops the stack when we have reached end of the body. 157 |

158 | 159 |
160 | int eval_do_exec(Atom *stack, Atom *expr, Atom *env)
161 | {
162 | 	Atom body;
163 | 
164 | 	*env = list_get(*stack, 1);
165 | 	body = list_get(*stack, 5);
166 | 	*expr = car(body);
167 | 	body = cdr(body);
168 | 	if (nilp(body)) {
169 | 		/* Finished function; pop the stack */
170 | 		*stack = car(*stack);
171 | 	} else {
172 | 		list_set(*stack, 5, body);
173 | 	}
174 | 
175 | 	return Error_OK;
176 | }
177 | 
178 | 179 |

180 | This helper binds the function arguments into a new environment 181 | if they have not already been bound, then calls 182 | eval_do_exec to get the next expression in the body. 183 |

184 | 185 |
186 | int eval_do_bind(Atom *stack, Atom *expr, Atom *env)
187 | {
188 | 	Atom op, args, arg_names, body;
189 | 
190 | 	body = list_get(*stack, 5);
191 | 	if (!nilp(body))
192 | 		return eval_do_exec(stack, expr, env);
193 | 
194 | 	op = list_get(*stack, 2);
195 | 	args = list_get(*stack, 4);
196 | 
197 | 	*env = env_create(car(op));
198 | 	arg_names = car(cdr(op));
199 | 	body = cdr(cdr(op));
200 | 	list_set(*stack, 1, *env);
201 | 	list_set(*stack, 5, body);
202 | 
203 | 	/* Bind the arguments */
204 | 	while (!nilp(arg_names)) {
205 | 		if (arg_names.type == AtomType_Symbol) {
206 | 			env_set(*env, arg_names, args);
207 | 			args = nil;
208 | 			break;
209 | 		}
210 | 
211 | 		if (nilp(args))
212 | 			return Error_Args;
213 | 		env_set(*env, car(arg_names), car(args));
214 | 		arg_names = cdr(arg_names);
215 | 		args = cdr(args);
216 | 	}
217 | 	if (!nilp(args))
218 | 		return Error_Args;
219 | 
220 | 	list_set(*stack, 4, nil);
221 | 
222 | 	return eval_do_exec(stack, expr, env);
223 | }
224 | 
225 | 226 |

227 | The next function is called once all arguments have been evaluated, 228 | and is responsible either generating an expression to call a builtin, 229 | or delegating to eval_do_bind. 230 |

231 | 232 |
233 | int eval_do_apply(Atom *stack, Atom *expr, Atom *env, Atom *result)
234 | {
235 | 	Atom op, args;
236 | 
237 | 	op = list_get(*stack, 2);
238 | 	args = list_get(*stack, 4);
239 | 
240 | 	if (!nilp(args)) {
241 | 		list_reverse(&args);
242 | 		list_set(*stack, 4, args);
243 | 	}
244 | 
245 | 	if (op.type == AtomType_Symbol) {
246 | 		if (strcmp(op.value.symbol, "APPLY") == 0) {
247 | 			/* Replace the current frame */
248 | 			*stack = car(*stack);
249 | 			*stack = make_frame(*stack, *env, nil);
250 | 			op = car(args);
251 | 			args = car(cdr(args));
252 | 			if (!listp(args))
253 | 				return Error_Syntax;
254 | 
255 | 			list_set(*stack, 2, op);
256 | 			list_set(*stack, 4, args);
257 | 		}
258 | 	}
259 | 
260 | 	if (op.type == AtomType_Builtin) {
261 | 		*stack = car(*stack);
262 | 		*expr = cons(op, args);
263 | 		return Error_OK;
264 | 	} else if (op.type != AtomType_Closure) {
265 | 		return Error_Type;
266 | 	}
267 | 
268 | 	return eval_do_bind(stack, expr, env);
269 | }
270 | 
271 | 272 |

273 | This part is called once an expression has been evaluated, and 274 | is responsible for storing the result, which is either an operator, 275 | an argument, or an intermediate body expression, and fetching the 276 | next expression to evaluate. 277 |

278 | 279 |
280 | int eval_do_return(Atom *stack, Atom *expr, Atom *env, Atom *result)
281 | {
282 | 	Atom op, args, body;
283 | 
284 | 	*env = list_get(*stack, 1);
285 | 	op = list_get(*stack, 2);
286 | 	body = list_get(*stack, 5);
287 | 
288 | 	if (!nilp(body)) {
289 | 		/* Still running a procedure; ignore the result */
290 | 		return eval_do_apply(stack, expr, env, result);
291 | 	}
292 | 
293 | 	if (nilp(op)) {
294 | 		/* Finished evaluating operator */
295 | 		op = *result;
296 | 		list_set(*stack, 2, op);
297 | 
298 | 		if (op.type == AtomType_Macro) {
299 | 			/* Don't evaluate macro arguments */
300 | 			args = list_get(*stack, 3);
301 | 			*stack = make_frame(*stack, *env, nil);
302 | 			op.type = AtomType_Closure;
303 | 			list_set(*stack, 2, op);
304 | 			list_set(*stack, 4, args);
305 | 			return eval_do_bind(stack, expr, env);
306 | 		}
307 | 	} else if (op.type == AtomType_Symbol) {
308 | 		/* Finished working on special form */
309 | 		if (strcmp(op.value.symbol, "DEFINE") == 0) {
310 | 			Atom sym = list_get(*stack, 4);
311 | 			(void) env_set(*env, sym, *result);
312 | 			*stack = car(*stack);
313 | 			*expr = cons(make_sym("QUOTE"), cons(sym, nil));
314 | 			return Error_OK;
315 | 		} else if (strcmp(op.value.symbol, "IF") == 0) {
316 | 			args = list_get(*stack, 3);
317 | 			*expr = nilp(*result) ? car(cdr(args)) : car(args);
318 | 			*stack = car(*stack);
319 | 			return Error_OK;
320 | 		} else {
321 | 			goto store_arg;
322 | 		}
323 | 	} else if (op.type == AtomType_Macro) {
324 | 		/* Finished evaluating macro */
325 | 		*expr = *result;
326 | 		*stack = car(*stack);
327 | 		return Error_OK;
328 | 	} else {
329 | 	store_arg:
330 | 		/* Store evaluated argument */
331 | 		args = list_get(*stack, 4);
332 | 		list_set(*stack, 4, cons(*result, args));
333 | 	}
334 | 
335 | 	args = list_get(*stack, 3);
336 | 	if (nilp(args)) {
337 | 		/* No more arguments left to evaluate */
338 | 		return eval_do_apply(stack, expr, env, result);
339 | 	}
340 | 
341 | 	/* Evaluate next argument */
342 | 	*expr = car(args);
343 | 	list_set(*stack, 3, cdr(args));
344 | 	return Error_OK;
345 | }
346 | 
347 | 348 |

349 | And here we are at last with the new eval_expr. There 350 | is a lot of code for setting up special forms, but the rest is simply 351 | a loop waiting for the stack to clear. 352 |

353 | 354 |
355 | int eval_expr(Atom expr, Atom env, Atom *result)
356 | {
357 | 	Error err = Error_OK;
358 | 	Atom stack = nil;
359 | 
360 | 	do {
361 | 		if (expr.type == AtomType_Symbol) {
362 | 			err = env_get(env, expr, result);
363 | 		} else if (expr.type != AtomType_Pair) {
364 | 			*result = expr;
365 | 		} else if (!listp(expr)) {
366 | 			return Error_Syntax;
367 | 		} else {
368 | 			Atom op = car(expr);
369 | 			Atom args = cdr(expr);
370 | 
371 | 			if (op.type == AtomType_Symbol) {
372 | 				/* Handle special forms */
373 | 
374 | 				if (strcmp(op.value.symbol, "QUOTE") == 0) {
375 | 					if (nilp(args) || !nilp(cdr(args)))
376 | 						return Error_Args;
377 | 
378 | 					*result = car(args);
379 | 				} else if (strcmp(op.value.symbol, "DEFINE") == 0) {
380 | 					Atom sym;
381 | 
382 | 					if (nilp(args) || nilp(cdr(args)))
383 | 						return Error_Args;
384 | 
385 | 					sym = car(args);
386 | 					if (sym.type == AtomType_Pair) {
387 | 						err = make_closure(env, cdr(sym), cdr(args), result);
388 | 						sym = car(sym);
389 | 						if (sym.type != AtomType_Symbol)
390 | 							return Error_Type;
391 | 						(void) env_set(env, sym, *result);
392 | 						*result = sym;
393 | 					} else if (sym.type == AtomType_Symbol) {
394 | 						if (!nilp(cdr(cdr(args))))
395 | 							return Error_Args;
396 | 						stack = make_frame(stack, env, nil);
397 | 						list_set(stack, 2, op);
398 | 						list_set(stack, 4, sym);
399 | 						expr = car(cdr(args));
400 | 						continue;
401 | 					} else {
402 | 						return Error_Type;
403 | 					}
404 | 				} else if (strcmp(op.value.symbol, "LAMBDA") == 0) {
405 | 					if (nilp(args) || nilp(cdr(args)))
406 | 						return Error_Args;
407 | 
408 | 					err = make_closure(env, car(args), cdr(args), result);
409 | 				} else if (strcmp(op.value.symbol, "IF") == 0) {
410 | 					if (nilp(args) || nilp(cdr(args)) || nilp(cdr(cdr(args)))
411 | 							|| !nilp(cdr(cdr(cdr(args)))))
412 | 						return Error_Args;
413 | 
414 | 					stack = make_frame(stack, env, cdr(args));
415 | 					list_set(stack, 2, op);
416 | 					expr = car(args);
417 | 					continue;
418 | 				} else if (strcmp(op.value.symbol, "DEFMACRO") == 0) {
419 | 					Atom name, macro;
420 | 
421 | 					if (nilp(args) || nilp(cdr(args)))
422 | 						return Error_Args;
423 | 
424 | 					if (car(args).type != AtomType_Pair)
425 | 						return Error_Syntax;
426 | 
427 | 					name = car(car(args));
428 | 					if (name.type != AtomType_Symbol)
429 | 						return Error_Type;
430 | 
431 | 					err = make_closure(env, cdr(car(args)),
432 | 						cdr(args), &macro);
433 | 					if (!err) {
434 | 						macro.type = AtomType_Macro;
435 | 						*result = name;
436 | 						(void) env_set(env, name, macro);
437 | 					}
438 | 				} else if (strcmp(op.value.symbol, "APPLY") == 0) {
439 | 					if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
440 | 						return Error_Args;
441 | 
442 | 					stack = make_frame(stack, env, cdr(args));
443 | 					list_set(stack, 2, op);
444 | 					expr = car(args);
445 | 					continue;
446 | 				} else {
447 | 					goto push;
448 | 				}
449 | 			} else if (op.type == AtomType_Builtin) {
450 | 				err = (*op.value.builtin)(args, result);
451 | 			} else {
452 | 			push:
453 | 				/* Handle function application */
454 | 				stack = make_frame(stack, env, args);
455 | 				expr = op;
456 | 				continue;
457 | 			}
458 | 		}
459 | 
460 | 		if (nilp(stack))
461 | 			break;
462 | 
463 | 		if (!err)
464 | 			err = eval_do_return(&stack, &expr, &env, result);
465 | 	} while (!err);
466 | 
467 | 	return err;
468 | }
469 | 
470 | 471 |

Testing

472 | 473 |

474 | Let's try our COUNT function again. 475 |

476 | 477 |
478 | > (count 100000)
479 | 100000
480 | 
481 | 482 |

483 | Hooray! We can now recurse as much as we like without causing a stack 484 | overflow. If you have a lot of RAM, you should even be able to do 485 | a million levels deep. 486 |

487 | 488 |

Tail recursion

489 | 490 |

491 | If the last expression in a function is a call to another function, then 492 | the result can be returned directly to the first function's caller. This 493 | is known as a tail call. If the called function, through a series 494 | of tail calls, causes the first function to be called, we have 495 | tail recursion. 496 |

497 | 498 |

499 | Tail calls do not require the caller's stack frame to be retained, so 500 | a tail-recursive function can recurse as many levels as necessary without 501 | increasing the stack depth. 502 |

503 | 504 |

505 | The count function could be formulated as a tail-recursive 506 | procedure as follows: 507 |

508 | (define (count n a)
509 |   (if (= n 0)
510 |       a
511 |       (count (- n 1) (+ a 1))))
512 | 
513 | (count 100000 0)
514 | 
515 |

516 | 517 |

518 | If you watch eval_expr with a debugger you can confirm that 519 | the stack never grows above a few levels deep. 520 |

521 | 522 |

523 | All that is left to do is clean up all the temporary objects created 524 | by our new evaluator. 525 |

526 | 527 | 528 | 529 | 530 | -------------------------------------------------------------------------------- /html/data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 2: Data 6 | 7 | 8 | 9 |

Data

10 | 11 |

12 | We will define four kinds of object to begin with: 13 |

14 |
Integer
15 |
A number. For example: 3, -9, 0.
16 |
Symbol
17 |
A name consisting of a string of characters. For 18 | example: FOO, BAR, ADD-TWO. 19 | We will normalize characters to upper-case in this project, but this 20 | is not strictly necessary.
21 |
NIL
22 |
Represents "nothing". A bit like NULL in C and other 23 | languages.
24 |
Pair
25 |
A pair consists of two elements, which for historical reasons are 26 | called car and cdr. Both can hold either an integer, a 27 | symbol, NIL, or a reference to another pair. 28 | The types of each element may be different.
29 |
30 | Integers, symbols and NIL are called simple data. 31 | The term atom can refer to either a simple datum or a pair 32 | (purists may disagree on this point). 33 |

34 | 35 |

36 | Note that integers and symbols are immutable, so we can think 37 | of two integers with the same value as being the same object. This is 38 | particularly useful for symbols, because it allows us to test for 39 | equality by comparing pointers. 40 |

41 | 42 |

Implementation

43 | 44 |

45 | Let's declare some C types to hold our data. There are many clever ways 46 | to store LISP objects efficiently, but for this implementation we will 47 | stick to a very simple scheme [please excuse the pun]. 48 |

49 | 50 |
 51 | struct Atom {
 52 | 	enum {
 53 | 		AtomType_Nil,
 54 | 		AtomType_Pair,
 55 | 		AtomType_Symbol,
 56 | 		AtomType_Integer
 57 | 	} type;
 58 | 
 59 | 	union {
 60 | 		struct Pair *pair;
 61 | 		const char *symbol;
 62 | 		long integer;
 63 | 	} value;
 64 | };
 65 | 
 66 | struct Pair {
 67 | 	struct Atom atom[2];
 68 | };
 69 | 
 70 | typedef struct Atom Atom;
 71 | 
72 | 73 |

74 | A few macros will be handy: 75 |

 76 | #define car(p) ((p).value.pair->atom[0])
 77 | #define cdr(p) ((p).value.pair->atom[1])
 78 | #define nilp(atom) ((atom).type == AtomType_Nil)
 79 | 
 80 | static const Atom nil = { AtomType_Nil };
 81 | 
82 | The "p" in nilp stands for "predicate". Identifiers in C 83 | may not contain question marks. There is no need to restrict our LISP 84 | implementation in that way, of course. 85 |

86 | 87 |

88 | Integers and (pointers to) strings can be copied around, but we need to 89 | allocate pairs on the heap. 90 |

 91 | Atom cons(Atom car_val, Atom cdr_val)
 92 | {
 93 | 	Atom p;
 94 | 
 95 | 	p.type = AtomType_Pair;
 96 | 	p.value.pair = malloc(sizeof(struct Pair));
 97 | 
 98 | 	car(p) = car_val;
 99 | 	cdr(p) = cdr_val;
100 | 
101 | 	return p;
102 | }
103 | 
104 | cons is a function to allocate a pair on the heap and 105 | assign its two elements. 106 |

107 | 108 |

109 | At this point you will have noticed that using cons will 110 | leak memory the moment its return value is discarded. We will deal with 111 | that later. Of course, if you are using a garbage-collected language 112 | then the problem is already taken care of. 113 |

114 | 115 |

Testing

116 | 117 |

118 | Now we can start creating LISP objects. An integer: 119 |

120 | Atom make_int(long x)
121 | {
122 | 	Atom a;
123 | 	a.type = AtomType_Integer;
124 | 	a.value.integer = x;
125 | 	return a;
126 | }
127 | 
128 | And a symbol: 129 |
130 | Atom make_sym(const char *s)
131 | {
132 | 	Atom a;
133 | 	a.type = AtomType_Symbol;
134 | 	a.value.symbol = strdup(s);
135 | 	return a;
136 | }
137 | 
138 |

139 | 140 |

Textual representation

141 | 142 |

143 | We will write a pair like this: 144 |

(a . b)
145 | where a is the car and b is the 146 | cdr. 147 |

148 | 149 |

150 | By using the cdr of a pair to reference another pair, we can 151 | create a chain: 152 |

153 | (a . (b . (c . (d . NIL))))
154 | 
155 | Notice that the cdr of the last pair is NIL. This 156 | signifies the end of the chain, and we call this structure a 157 | list. To avoid having to write a large number of brackets, we 158 | will write the previous list like this: 159 |
(a b c d)
160 | Finally, if the cdr of the last pair in a list is not 161 | NIL, we will write this: 162 |
(p q . r)
163 | which is equivalent to 164 |
(p . (q . r))
165 | This is called an improper list. 166 |

167 | 168 |

Implementation

169 | 170 |

171 | Printing an atom or list is simple. 172 |

173 | void print_expr(Atom atom)
174 | {
175 | 	switch (atom.type) {
176 | 	case AtomType_Nil:
177 | 		printf("NIL");
178 | 		break;
179 | 	case AtomType_Pair:
180 | 		putchar('(');
181 | 		print_expr(car(atom));
182 | 		atom = cdr(atom);
183 | 		while (!nilp(atom)) {
184 | 			if (atom.type == AtomType_Pair) {
185 | 				putchar(' ');
186 | 				print_expr(car(atom));
187 | 				atom = cdr(atom);
188 | 			} else {
189 | 				printf(" . ");
190 | 				print_expr(atom);
191 | 				break;
192 | 			}
193 | 		}
194 | 		putchar(')');
195 | 		break;
196 | 	case AtomType_Symbol:
197 | 		printf("%s", atom.value.symbol);
198 | 		break;
199 | 	case AtomType_Integer:
200 | 		printf("%ld", atom.value.integer);
201 | 		break;
202 | 	}
203 | }
204 | 
205 | By using recursion we can print aribtrarily complex data structures. 206 | (Actually that's not true: for a very deeply nested structure we will 207 | run out of stack space, and a self-referencing tree will never finish 208 | printing). 209 |

210 | 211 |

Testing

212 | 213 |

214 | See what print_expr does with various atoms: 215 | 216 | 217 | 218 | 219 | 220 | 224 |
AtomOutput
make_int(42)42
make_sym("FOO")FOO
cons(make_sym("X"), make_sym("Y"))(X . Y)
cons(make_int(1),
221 |   cons(make_int(2),
222 |   cons(make_int(3),
223 |   nil)))
(1 2 3)
225 |

226 | 227 |

228 | All this is pretty trivial. We'll get on to some more interesting stuff 229 | in the next chapter. 230 |

231 | 232 |

One last thing

233 | 234 |

235 | Remember we said that we would treat identical symbols as being the 236 | same object? We can enforce that by keeping track of all the symbols 237 | created, and returning the same atom if the same sequence of characters 238 | is requested subsequently. 239 |

240 | 241 |

242 | Languages with a set or hashtable container make this easy, but we can 243 | use the LISP data structures already implemented to store the symbols 244 | in a list: 245 |

246 | static Atom sym_table = { AtomType_Nil };
247 | 
248 | Atom make_sym(const char *s)
249 | {
250 | 	Atom a, p;
251 | 
252 | 	p = sym_table;
253 | 	while (!nilp(p)) {
254 | 		a = car(p);
255 | 		if (strcmp(a.value.symbol, s) == 0)
256 | 			return a;
257 | 		p = cdr(p);
258 | 	}
259 | 
260 | 	a.type = AtomType_Symbol;
261 | 	a.value.symbol = strdup(s);
262 | 	sym_table = cons(a, sym_table);
263 | 
264 | 	return a;
265 | }
266 | 
267 | Neat, huh? It's not particularly efficient, but it will do fine for now. 268 |

269 | 270 | 271 | 272 | 273 | -------------------------------------------------------------------------------- /html/expressions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 4: Expressions 6 | 7 | 8 | 9 |

Expressions, Environment and Evaluation

10 | 11 |

Expressions

12 | 13 |

14 | LISP is all about expressions. An expression can be a 15 | literal, an identifier, or a list consisting of an 16 | operator and one or more arguments. 17 |

18 | 19 |

20 | A literal is an object with an intrinsic value. In our system, that's 21 | either an integer or NIL (if you consider "nothing" to be 22 | a value). 23 |

24 | 25 |

26 | An identifier is a name for an object. Symbols can be 27 | identifiers. 28 |

29 | 30 |

31 | Everything else is a list of the form (operator argument...) 32 | where argument... means zero or more arguments. 33 |

34 | 35 |

Environment

36 | 37 |

38 | To associate identifiers with objects we need an environment. 39 | This is a collection of bindings, each of which consists of an 40 | identifier and its corresponding value. For example: 41 | 42 | 43 | 44 | 45 | 46 | 47 |
Bindings
IdentifierValue
FOO42
BARNIL
BAZ(X Y Z)
48 | Note that the identifiers are all symbols, but the values can be any 49 | object within our system of data — the value for BAZ 50 | is a list containing three symbols. 51 |

52 | 53 |

54 | An environment can also have a parent environment. If there is 55 | no binding for a particular identifier in the environment, we can check 56 | the parent, the parent's parent and so on. In this way we can create a 57 | tree of environments which share bindings with their ancestors unless 58 | explicit replacements exist. 59 |

60 | 61 |

Implementation

62 | 63 |

64 | There is a convenient way of representing environments using our LISP 65 | data types: 66 |

 67 | (parent (identifier . value)...)
 68 | 
69 | So the environment above (assuming it has no parent) is: 70 |
 71 | (NIL (FOO . 42) (BAR . NIL) (BAZ . (X Y Z)))
 72 | 
73 |

74 | 75 |

76 | Here is a function to create an empty environment with a specified 77 | parent (which could be NIL): 78 |

 79 | Atom env_create(Atom parent)
 80 | {
 81 | 	return cons(parent, nil);
 82 | }
 83 | 
84 |

85 | 86 |

87 | Next we have two functions to retrieve and create bindings in an 88 | environment. 89 |

90 | 91 |

92 |

 93 | int env_get(Atom env, Atom symbol, Atom *result)
 94 | {
 95 | 	Atom parent = car(env);
 96 | 	Atom bs = cdr(env);
 97 | 
 98 | 	while (!nilp(bs)) {
 99 | 		Atom b = car(bs);
100 | 		if (car(b).value.symbol == symbol.value.symbol) {
101 | 			*result = cdr(b);
102 | 			return Error_OK;
103 | 		}
104 | 		bs = cdr(bs);
105 | 	}
106 | 
107 | 	if (nilp(parent))
108 | 		return Error_Unbound;
109 | 
110 | 	return env_get(parent, symbol, result);
111 | }
112 | 
113 | Disallowing duplicate symbols means that we don't have to call 114 | strcmp here, which should mean that this lookup function 115 | is not too slow. 116 |

117 | 118 |
119 | int env_set(Atom env, Atom symbol, Atom value)
120 | {
121 | 	Atom bs = cdr(env);
122 | 	Atom b = nil;
123 | 
124 | 	while (!nilp(bs)) {
125 | 		b = car(bs);
126 | 		if (car(b).value.symbol == symbol.value.symbol) {
127 | 			cdr(b) = value;
128 | 			return Error_OK;
129 | 		}
130 | 		bs = cdr(bs);
131 | 	}
132 | 
133 | 	b = cons(symbol, value);
134 | 	cdr(env) = cons(b, cdr(env));
135 | 
136 | 	return Error_OK;
137 | }
138 | 
139 | 140 |

141 | Only env_get recursively checks the parent environments. 142 | We don't want to modify the bindings of parents. 143 |

144 | 145 |

Evaluation

146 | 147 |

148 | Now that we have expressions, we can start to evaluate them. 149 | Evalution is a process which takes an expression and an environment, and 150 | produces a value (the result). Let's specify the rules. 151 |

152 | 153 |
    154 |
  • 155 | A literal will evaluate to itself. 156 |
  • 157 | 158 |
  • 159 | The environment allows us to determine a value for an identifier. 160 | Attempting to evaluate an identifier for which no binding exists is an 161 | error. 162 |
  • 163 | 164 |
  • 165 | A list expression with one of the following operators is called a 166 | special form: 167 |
    168 |
    QUOTE
    169 |
    The result of evaluating (QUOTE EXPR) is 170 | EXPR, which is returned without evaluating. 171 |
    DEFINE
    172 |
    Evaluating (DEFINE SYMBOL EXPR) creates a binding 173 | for SYMBOL (or modifies an existing binding) in the 174 | evaluation environment. SYMBOL is bound to the value 175 | obtained by evaluating EXPR. The final result is 176 | SYMBOL. 177 |
    178 | 179 |
  • 180 | 181 |
  • 182 | Anything else, including list expressions with any other operator, is 183 | invalid. 184 |
  • 185 |
186 | 187 |

Implementation

188 | 189 |

190 | We will need to check whether an expression is a proper list. 191 |

192 | int listp(Atom expr)
193 | {
194 | 	while (!nilp(expr)) {
195 | 		if (expr.type != AtomType_Pair)
196 | 			return 0;
197 | 		expr = cdr(expr);
198 | 	}
199 | 	return 1;
200 | }
201 | 
202 |

203 | 204 |

205 | The Error enumeration needs a few more entires: 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 |
Error_UnboundAttempted to evaluate a symbol for which no binding exists
Error_ArgsA list expression was shorter or longer than expected
Error_TypeAn object in an expression was of a different type than expected
220 |

221 | 222 |

223 | The function to perform evaluation is now a straightforward translation 224 | of the rules into C. 225 |

226 | 227 |
228 | int eval_expr(Atom expr, Atom env, Atom *result)
229 | {
230 | 	Atom op, args;
231 | 	Error err;
232 | 
233 | 	if (expr.type == AtomType_Symbol) {
234 | 		return env_get(env, expr, result);
235 | 	} else if (expr.type != AtomType_Pair) {
236 | 		*result = expr;
237 | 		return Error_OK;
238 | 	}
239 | 
240 | 	if (!listp(expr))
241 | 		return Error_Syntax;
242 | 
243 | 	op = car(expr);
244 | 	args = cdr(expr);
245 | 
246 | 	if (op.type == AtomType_Symbol) {
247 | 		if (strcmp(op.value.symbol, "QUOTE") == 0) {
248 | 			if (nilp(args) || !nilp(cdr(args)))
249 | 				return Error_Args;
250 | 
251 | 			*result = car(args);
252 | 			return Error_OK;
253 | 		} else if (strcmp(op.value.symbol, "DEFINE") == 0) {
254 | 			Atom sym, val;
255 | 
256 | 			if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
257 | 				return Error_Args;
258 | 
259 | 			sym = car(args);
260 | 			if (sym.type != AtomType_Symbol)
261 | 				return Error_Type;
262 | 
263 | 			err = eval_expr(car(cdr(args)), env, &val);
264 | 			if (err)
265 | 				return err;
266 | 
267 | 			*result = sym;
268 | 			return env_set(env, sym, val);
269 | 		}
270 | 	}
271 | 
272 | 	return Error_Syntax;
273 | }
274 | 
275 | 276 |

Testing

277 | 278 |

279 | Extending the read-print loop from the previous chapter, we now have a 280 | read-eval-print loop (REPL). This is the core of our LISP interpreter. 281 |

282 | 283 |
284 | int main(int argc, char **argv)
285 | {
286 | 	Atom env;
287 | 	char *input;
288 | 
289 | 	env = env_create(nil);
290 | 
291 | 	while ((input = readline("> ")) != NULL) {
292 | 		const char *p = input;
293 | 		Error err;
294 | 		Atom expr, result;
295 | 
296 | 		err = read_expr(p, &p, &expr);		
297 | 
298 | 		if (!err)
299 | 			err = eval_expr(expr, env, &result);
300 | 
301 | 		switch (err) {
302 | 		case Error_OK:
303 | 			print_expr(result);
304 | 			putchar('\n');
305 | 			break;
306 | 		case Error_Syntax:
307 | 			puts("Syntax error");
308 | 			break;
309 | 		case Error_Unbound:
310 | 			puts("Symbol not bound");
311 | 			break;
312 | 		case Error_Args:
313 | 			puts("Wrong number of arguments");
314 | 			break;
315 | 		case Error_Type:
316 | 			puts("Wrong type");
317 | 			break;
318 | 		}
319 | 
320 | 		free(input);
321 | 	}
322 | 
323 | 	return 0;
324 | }
325 | 
326 | 327 |

328 | Let's see what it can do. 329 |

330 | 331 |
332 | > foo
333 | Symbol not bound
334 | > (quote foo)
335 | FOO
336 | > (define foo 42)
337 | FOO
338 | > foo
339 | 42
340 | > (define foo (quote bar))
341 | FOO
342 | > foo
343 | BAR
344 | 
345 | 346 |

347 | We can now interactively assign names to objects. 348 |

349 | 350 | 351 | 352 | 353 | -------------------------------------------------------------------------------- /html/gc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 15: Garbage collection 6 | 7 | 8 | 9 |

Garbage collection

10 | 11 |

12 | We will implement a very simple mark-and-sweep garbage collector. 13 | This is not something you would want to use in a real application, 14 | but it will serve for our purposes. 15 |

16 | 17 |

18 | Remember that all our LISP data is allocated through the 19 | cons function. First we modify it to keep track of 20 | every allocation in a linked list. 21 |

22 | 23 |
 24 | struct Allocation {
 25 | 	struct Pair pair;
 26 | 	int mark : 1;
 27 | 	struct Allocation *next;
 28 | };
 29 | 
 30 | struct Allocation *global_allocations = NULL;
 31 | 
 32 | Atom cons(Atom car_val, Atom cdr_val)
 33 | {
 34 | 	struct Allocation *a;
 35 | 	Atom p;
 36 | 
 37 | 	a = malloc(sizeof(struct Allocation));
 38 | 	a->mark = 0;
 39 | 	a->next = global_allocations;
 40 | 	global_allocations = a;
 41 | 
 42 | 	p.type = AtomType_Pair;
 43 | 	p.value.pair = &a->pair;
 44 | 
 45 | 	car(p) = car_val;
 46 | 	cdr(p) = cdr_val;
 47 | 
 48 | 	return p;
 49 | }
 50 | 
51 | 52 |

53 | Now a function to mark a whole tree of pairs as "in use". 54 |

55 | 56 |
 57 | void gc_mark(Atom root)
 58 | {
 59 | 	struct Allocation *a;
 60 | 
 61 | 	if (!(root.type == AtomType_Pair
 62 | 		|| root.type == AtomType_Closure
 63 | 		|| root.type == AtomType_Macro))
 64 | 		return;
 65 | 
 66 | 	a = (struct Allocation *)
 67 | 		((char *) root.value.pair
 68 | 			- offsetof(struct Allocation, pair));
 69 | 
 70 | 	if (a->mark)
 71 | 		return;
 72 | 
 73 | 	a->mark = 1;
 74 | 
 75 | 	gc_mark(car(root));
 76 | 	gc_mark(cdr(root));
 77 | }
 78 | 
79 | 80 |

81 | The garbage collector frees everything which is not marked, and 82 | then clears the marks ready for the next run. We also mark the 83 | symbol table since these are referenced by a static variable. 84 |

85 | 86 |
 87 | void gc()
 88 | {
 89 | 	struct Allocation *a, **p;
 90 | 
 91 | 	gc_mark(sym_table);
 92 | 
 93 | 	/* Free unmarked allocations */
 94 | 	p = &global_allocations;
 95 | 	while (*p != NULL) {
 96 | 		a = *p;
 97 | 		if (!a->mark) {
 98 | 			*p = a->next;
 99 | 			free(a);
100 | 		} else {
101 | 			p = &a->next;
102 | 		}
103 | 	}
104 | 
105 | 	/* Clear marks */
106 | 	a = global_allocations;
107 | 	while (a != NULL) {
108 | 		a->mark = 0;
109 | 		a = a->next;
110 | 	}
111 | }
112 | 
113 | 114 |

115 | So that we don't run out of memory under deep recursion, we 116 | need to call the garbage collector every few iterations of 117 | eval_expr. The interval will roughly determine 118 | how many allocations are made between garbage collections. 119 |

120 | 121 |
122 | int eval_expr(Atom expr, Atom env, Atom *result)
123 | {
124 | 	static int count = 0;
125 | 	Error err = Error_OK;
126 | 	Atom stack = nil;
127 | 
128 | 	do {
129 | 		if (++count == 100000) {
130 | 			gc_mark(expr);
131 | 			gc_mark(env);
132 | 			gc_mark(stack);
133 | 			gc();
134 | 			count = 0;
135 | 		}
136 | 
137 | 	.
138 | 	.
139 | 	.
140 | }
141 | 
142 | 143 |

Testing

144 | 145 |

146 | Adapting the COUNT example from previous chapters: 147 |

148 | 149 |
150 | > (define (count n) (if (= n 0) t (count (- n 1))))
151 | COUNT
152 | > (count 1000000)
153 | T
154 | 
155 | 156 |

157 | And lo! the operation completes without eating up all of our RAM. 158 |

159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Building LISP 6 | 7 | 8 | 9 |

Building LISP

10 | 11 |

12 | 30 |

31 | 32 |

33 | The code is available on 34 | GitHub. 35 |

36 | 37 |

38 | Questions? Comments? Email 39 | amy@lwh.jp. 40 |

41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /html/intro.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 1: Introduction 6 | 7 | 8 | 9 |

Introduction

10 | 11 |

12 | The best way to understand how something works is to try to build it 13 | for yourself. Reading somebody else's explanation might satisfy your 14 | curiosity, but without the experience of falling into all the little 15 | traps it is difficult to get a feel for why something is 16 | designed a certain way. 17 |

18 | 19 |

20 | It's been said that every would-be programmer should write a compiler. 21 | While I think this is good advice (although I haven't followed it myself), 22 | there is so much effort involved just in parsing a language such as C 23 | that any potential insights risk getting lost in a mire of details. 24 | Perhaps creating an interpreter for some simple language would be a good 25 | first step. 26 |

27 | 28 |

29 | I first started playing around with LISP a good few years ago, yet much 30 | later than I should have. This led me to the classic lecture series 31 | 32 | Structure and Interpretation of Computer Programs. If you have the 33 | next 24 hours free and haven't seen the videos already, go watch them now. 34 |

35 | 36 |

37 | The course covers many topics, but the second half shows in detail how 38 | to evaluate LISP, first by implementing a simple version of 39 | eval in LISP itself. I figured that this would translate 40 | well into C, and so decided to try creating my own implementation 41 | of LISP. 42 |

43 | 44 |

45 | It was really easy. 46 |

47 | 48 |

49 | This article is an attempt to share the process by which I built my 50 | implementation, and the chapters occur roughly in the order in which I 51 | did things. Why not follow along and create your own version in your 52 | language of choice?* 53 |

54 | 55 |

56 | As a professional programmer (ha, ha), I spend the majority 57 | of my time writing C and C++. Most of the rest is Java. There are many 58 | languages out there, each with their own debatable merits, but I'd like 59 | to demonstrate just how simple a LISP machine can be — even built 60 | in as low-level a language as C. See John McCarthy's 61 | 62 | History of LISP for the story of the pioneers. 63 |

64 | 65 |

66 | So here is my toy implementation of LISP. I've borrowed features from 67 | various dialects, but it's closer to Scheme than Common LISP. The 68 | differences are trivial enough that changing over would not require 69 | substantial changes to the interpreter. Don't worry if you're not familiar 70 | with LISP; I will define everything as I go along. 71 |

72 | 73 |

74 | It is not meant to be the smallest possible implementation, nor the 75 | most efficient, nor the most complete; it could however be described as 76 | lazy. My goal was to write robust, easy-to-read code that does exactly 77 | what it needs to, and no more, and I hope that it conveys how little 78 | effort is required to construct an incredibly powerful environment like LISP. 79 |

80 | 81 |
82 | 83 | 84 | 85 |

86 | * If you are using a fancy language which supports something like 87 | eval, it would be cool to expose the native datatypes to 88 | the LISP environment. 89 |

90 | 91 |
92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /html/lambda.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 7: Lambda expressions and closures 6 | 7 | 8 | 9 |

Lambda expressions and closures

10 | 11 |

12 | This is where things start to get interesting. We will now implement 13 | support for lambda expressions, a way to build functions dynamically 14 | out of the LISP expressions we can already deal with. 15 |

16 | 17 |

18 | A lambda expression is a list expression with a particular syntax: 19 |

 20 | (LAMBDA (arg...) expr...)
 21 | 
22 |

23 | 24 |

25 | The result of evaluating a LAMBDA expression is a new 26 | kind of object which we will call a closure. A closure can be used 27 | in list expressions in the same way as a built-in function. In this case 28 | the arguments will be bound to the symbols listed as arg... 29 | in the lambda expression. The body of the function consists of the 30 | expressions expr..., which will be evaluated in turn. The result 31 | of evaluating the final expression is the result of applying the arguments 32 | to the closure. 33 |

34 | 35 |

36 | That's a pretty dense definition, so here is an example of how we would 37 | like to use lambda expressions: 38 |

 39 | (DEFINE SQUARE (LAMBDA (X) (* X X)))
 40 | 
41 |

42 | 43 |

44 | SQUARE should now be a function of one argument 45 | X, which returns the result of evaluating 46 | (* X X). Thus evaluating (SQUARE 3) 47 | should return 9. 48 |

49 | 50 |

Implementation

51 | 52 |

53 | We will represent the closure using a list: 54 |

 55 | (env (arg...) expr...)
 56 | 
57 | env is the environment in which the closure was defined. 58 | This is needed to allow the lambda function to use bindings without 59 | having to pass them as arguments. For example, recall that 60 | CAR is bound in the initial environment to our primitive 61 | builtin_car function. 62 |

63 | 64 |

65 | The first task is to add a new constant for the type field 66 | of our Atom structure: 67 |

 68 | struct Atom {
 69 | 	enum {
 70 | 		.
 71 | 		.
 72 | 		.
 73 | 		AtomType_Closure
 74 | 	} type;
 75 | 
 76 | 	union {
 77 | 		.
 78 | 		.
 79 | 		.
 80 | 	} value;
 81 | };
 82 | 
83 | Since the closure is just a regular list, there is no need to add anything 84 | to value. 85 |

86 | 87 |

88 | Like our other atom types, we will create a utility function to 89 | initialize them. make_closure, unlike the others, performs 90 | some validation of the arguments and so needs to return an error code. 91 |

92 | 93 |
 94 | int make_closure(Atom env, Atom args, Atom body, Atom *result)
 95 | {
 96 | 	Atom p;
 97 | 
 98 | 	if (!listp(args) || !listp(body))
 99 | 		return Error_Syntax;
100 | 
101 | 	/* Check argument names are all symbols */
102 | 	p = args;
103 | 	while (!nilp(p)) {
104 | 		if (car(p).type != AtomType_Symbol)
105 | 			return Error_Type;
106 | 		p = cdr(p);
107 | 	}
108 | 
109 | 	*result = cons(env, cons(args, body));
110 | 	result->type = AtomType_Closure;
111 | 
112 | 	return Error_OK;
113 | }
114 | 
115 | 116 |

117 | Next up is another special case in eval to create a 118 | closure whenever a lambda expression is encountered. 119 |

120 | 121 |
122 | int eval_expr(Atom expr, Atom env, Atom *result)
123 | {
124 | 	.
125 | 	.
126 | 	.
127 | 	if (op.type == AtomType_Symbol) {
128 | 		if (strcmp(op.value.symbol, "QUOTE") == 0) {
129 | 		.
130 | 		.
131 | 		.
132 | 		} else if (strcmp(op.value.symbol, "LAMBDA") == 0) {
133 | 			if (nilp(args) || nilp(cdr(args)))
134 | 				return Error_Args;
135 | 
136 | 			return make_closure(env, car(args), cdr(args), result);
137 | 		}
138 | 	}
139 | 	.
140 | 	.
141 | 	.
142 | }
143 | 
144 | 145 |

146 | The body of our SQUARE example above is expressed in terms 147 | of X. In order to evaluate the body, we need to create a new 148 | environment with X bound to the value of the argument: 149 |

150 | (closure-env (X . 3))
151 | 
152 | where the parent environment closure-env is the environment 153 | that was stored in the closure. 154 |

155 | 156 |

157 | Finally we extend apply to create the new environment and 158 | call eval for each expression in the body. 159 |

160 | 161 |
162 | int apply(Atom fn, Atom args, Atom *result)
163 | {
164 | 	Atom env, arg_names, body;
165 | 
166 | 	if (fn.type == AtomType_Builtin)
167 | 		return (*fn.value.builtin)(args, result);
168 | 	else if (fn.type != AtomType_Closure)
169 | 		return Error_Type;
170 | 
171 | 	env = env_create(car(fn));
172 | 	arg_names = car(cdr(fn));
173 | 	body = cdr(cdr(fn));
174 | 
175 | 	/* Bind the arguments */
176 | 	while (!nilp(arg_names)) {
177 | 		if (nilp(args))
178 | 			return Error_Args;
179 | 		env_set(env, car(arg_names), car(args));
180 | 		arg_names = cdr(arg_names);
181 | 		args = cdr(args);
182 | 	}
183 | 	if (!nilp(args))
184 | 		return Error_Args;
185 | 
186 | 	/* Evaluate the body */
187 | 	while (!nilp(body)) {
188 | 		Error err = eval_expr(car(body), env, result);
189 | 		if (err)
190 | 			return err;
191 | 		body = cdr(body);
192 | 	}
193 | 
194 | 	return Error_OK;
195 | }
196 | 
197 | 198 |

Testing

199 | 200 |

201 | Let's check that our SQUARE function works as intended. 202 |

203 | 204 |
205 | > (define square (lambda (x) (* x x)))
206 | SQUARE
207 | > (square 3)
208 | 9
209 | > (square 4)
210 | 16
211 | 
212 | 213 |

214 | Of course, lambda expressions do not have to be bound to a symbol — 215 | we can create anonymous functions. 216 |

217 | 218 |
219 | > ((lambda (x) (- x 2)) 7)
220 | 5
221 | 
222 | 223 |

224 | Fans of functional programming will be pleased to see that we can now 225 | do this kind of thing: 226 |

227 | 228 |
229 | > (define make-adder (lambda (x) (lambda (y) (+ x y))))
230 | MAKE-ADDER
231 | > (define add-two (make-adder 2))
232 | ADD-TWO
233 | > (add-two 5)
234 | 7
235 | 
236 | 237 |

238 | Do you know where the value "2" is stored? 239 |

240 | 241 | 242 | 243 | 244 | -------------------------------------------------------------------------------- /html/library.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 12: Library 6 | 7 | 8 | 9 |

Library

10 | 11 |

12 | We will now create a small library of useful functions for 13 | our LISP system. Rather than creating new builtins for each one, 14 | let's take advantage of the fact that much of the LISP standard 15 | library can be implemented in LISP itself in terms of lower-level 16 | fuctions. 17 |

18 | 19 |

20 | First we need a function to read the library definitions from disk. 21 |

22 | 23 |
 24 | char *slurp(const char *path)
 25 | {
 26 | 	FILE *file;
 27 | 	char *buf;
 28 | 	long len;
 29 | 
 30 | 	file = fopen(path, "r");
 31 | 	if (!file)
 32 | 		return NULL;
 33 | 	fseek(file, 0, SEEK_END);
 34 | 	len = ftell(file);
 35 | 	fseek(file, 0, SEEK_SET);
 36 | 
 37 | 	buf = malloc(len + 1);
 38 | 	if (!buf)
 39 | 		return NULL;
 40 | 
 41 | 	fread(buf, 1, len, file);
 42 | 	buf[len] = 0;
 43 | 	fclose(file);
 44 | 
 45 | 	return buf;
 46 | }
 47 | 
48 | 49 |

50 | And a routine, similar to our REPL in main, to 51 | process the definitions. Because we read the whole file in one 52 | go, there is no problem with splitting definitions over several 53 | lines. 54 |

55 | 56 |
 57 | void load_file(Atom env, const char *path)
 58 | {
 59 | 	char *text;
 60 | 
 61 | 	printf("Reading %s...\n", path);
 62 | 	text = slurp(path);
 63 | 	if (text) {
 64 | 		const char *p = text;
 65 | 		Atom expr;
 66 | 		while (read_expr(p, &p, &expr) == Error_OK) {
 67 | 			Atom result;
 68 | 			Error err = eval_expr(expr, env, &result);
 69 | 			if (err) {
 70 | 				printf("Error in expression:\n\t");
 71 | 				print_expr(expr);
 72 | 				putchar('\n');
 73 | 			} else {
 74 | 				print_expr(result);
 75 | 				putchar('\n');
 76 | 			}
 77 | 		}
 78 | 		free(text);
 79 | 	}
 80 | }
 81 | 
82 | 83 |

84 | Finally read in the library after setting up the builtins. 85 |

86 | 87 |
 88 | int main(int argc, char **argv)
 89 | {
 90 | 	.
 91 |  	.
 92 | 	.
 93 | 
 94 | 	/* Set up the initial environment */
 95 | 	.
 96 | 	.
 97 | 	.
 98 | 
 99 | 	load_file(env, "library.lisp");
100 | 
101 | 	/* Main loop */
102 | 	.
103 | 	.
104 | 	.
105 | }
106 | 
107 | 108 |

Testing

109 | 110 |

111 | Create library.lisp with the following definition: 112 |

113 | (define (abs x) (if (< x 0) (- x) x))
114 | 
115 |

116 | 117 |

118 | And run the interpreter: 119 |

120 | Reading library.lisp...
121 | ABS
122 | > (abs -2)
123 | 2
124 | 
125 | The ABS function will now be available in every session 126 | without having to define it each time. 127 |

128 | 129 |

fold

130 | 131 |

132 | foldl and foldr allow us to easily construct 133 | functions which combine elements of a list. 134 |

135 | 136 |
137 | (define (foldl proc init list)
138 |   (if list
139 |       (foldl proc
140 |              (proc init (car list))
141 |              (cdr list))
142 |       init))
143 | 
144 | (define (foldr proc init list)
145 |   (if list
146 |       (proc (car list)
147 |             (foldr proc init (cdr list)))
148 |       init))
149 | 
150 | 151 |

152 | See 153 | 154 | the internet for more details. 155 |

156 | 157 |
158 | (define (list . items)
159 |   (foldr cons nil items))
160 | 
161 | (define (reverse list)
162 |   (foldl (lambda (a x) (cons x a)) nil list))
163 | 
164 | 165 |

166 | list constructs a new list containing its arguments. 167 | reverse creates a copy of a list with the items in 168 | reverse order. 169 |

170 | 171 |

172 | The recursive definition of LIST requires O(n) stack 173 | space - a serious implementation would most likely use a more efficient 174 | version. 175 |

176 | 177 |

Testing

178 | 179 |
180 | > (list (+ 3 5) 'foo)
181 | (8 FOO)
182 | > (reverse '(1 2 3))
183 | (3 2 1)
184 | 
185 | 186 |

187 | See how much easier this was than implementing the functions as 188 | builtins. 189 |

190 | 191 |

More builtins

192 | 193 |

194 | Some primitive functions require access to the internals of the system. 195 |

196 | 197 |

apply

198 | 199 |

200 | The apply function: 201 |

202 | (APPLY fn arg-list)
203 | 
204 | calls fn with the arguments bound to the values in the 205 | list arg-list. 206 |

207 | 208 |
209 | int builtin_apply(Atom args, Atom *result)
210 | {
211 | 	Atom fn;
212 | 
213 | 	if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
214 | 		return Error_Args;
215 | 
216 | 	fn = car(args);
217 | 	args = car(cdr(args));
218 | 
219 | 	if (!listp(args))
220 | 		return Error_Syntax;
221 | 
222 | 	return apply(fn, args, result);
223 | }
224 | 
225 | 226 |

eq?

227 | 228 |

229 | eq? tests whether two atoms refer to the same object. 230 |

231 | 232 |
233 | int builtin_eq(Atom args, Atom *result)
234 | {
235 | 	Atom a, b;
236 | 	int eq;
237 | 
238 | 	if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
239 | 		return Error_Args;
240 | 
241 | 	a = car(args);
242 | 	b = car(cdr(args));
243 | 
244 | 	if (a.type == b.type) {
245 | 		switch (a.type) {
246 | 		case AtomType_Nil:
247 | 			eq = 1;
248 | 			break;
249 | 		case AtomType_Pair:
250 | 		case AtomType_Closure:
251 | 		case AtomType_Macro:
252 | 			eq = (a.value.pair == b.value.pair);
253 | 			break;
254 | 		case AtomType_Symbol:
255 | 			eq = (a.value.symbol == b.value.symbol);
256 | 			break;
257 | 		case AtomType_Integer:
258 | 			eq = (a.value.integer == b.value.integer);
259 | 			break;
260 | 		case AtomType_Builtin:
261 | 			eq = (a.value.builtin == b.value.builtin);
262 | 			break;
263 | 		}
264 | 	} else {
265 | 		eq = 0;
266 | 	}
267 | 
268 | 	*result = eq ? make_sym("T") : nil;
269 | 	return Error_OK;
270 | }
271 | 
272 | 273 |

pair?

274 | 275 |

276 | Tests whether an atom is a pair. 277 |

278 | 279 |
280 | int builtin_pairp(Atom args, Atom *result)
281 | {
282 | 	if (nilp(args) || !nilp(cdr(args)))
283 | 		return Error_Args;
284 | 
285 | 	*result = (car(args).type == AtomType_Pair) ? make_sym("T") : nil;
286 | 	return Error_OK;
287 | }
288 | 
289 | 290 |

291 | Don't forget to add bindings for these to the initial environment. 292 |

293 | env_set(env, make_sym("APPLY"), make_builtin(builtin_apply));
294 | env_set(env, make_sym("EQ?"), make_builtin(builtin_eq));
295 | env_set(env, make_sym("PAIR?"), make_builtin(builtin_pairp));
296 | 
297 |

298 | 299 |

map

300 | 301 |

302 | We can use foldr and apply to implement 303 | another important function map, which constructs a 304 | list containing the results of calling an n-ary function 305 | with the values contained in n lists in turn. 306 |

307 | 308 |
309 | (define (unary-map proc list)
310 |   (foldr (lambda (x rest) (cons (proc x) rest))
311 |          nil
312 |          list))
313 | 
314 | (define (map proc . arg-lists)
315 |   (if (car arg-lists)
316 |       (cons (apply proc (unary-map car arg-lists))
317 |             (apply map (cons proc
318 |                              (unary-map cdr arg-lists))))
319 |       nil))
320 | 
321 | 322 |

323 | Once again please note that there are alternative implementations. 324 |

325 | 326 |

327 | It works like this: 328 |

329 | 330 |
331 | > (map + '(1 2 3) '(4 5 6))
332 | (5 7 9)
333 | 
334 | 335 |

336 | The result is a list containing the results of evaluating 337 | (+ 1 4), (+ 2 5), and 338 | (+ 3 6). 339 |

340 | 341 | 342 | 343 | 344 | -------------------------------------------------------------------------------- /html/macros.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 11: Macros 6 | 7 | 8 | 9 |

Macros

10 | 11 |

12 | Macros allow you to create new special forms at runtime. Unlike a 13 | function, the arguments to a macro are not evaluated. The 14 | result of evaluating the body of the macro is then itself evaluated. 15 |

16 | 17 |

18 | Note: these are (essentially) Common LISP macros. Scheme has a 19 | different macro system, which avoids problems with identifiers 20 | introduced by the macro, but is more complex. 21 |

22 | 23 |

24 | We will define macros using the following syntax: 25 |

 26 | (DEFMACRO (name arg...) body...)
 27 | 
28 | This matches our DEFINE syntax for functions, but is 29 | slightly different from the form used in Common LISP. 30 |

31 | 32 |

Example

33 | 34 |

35 | Take the macro IGNORE defined by: 36 |

 37 | (DEFMACRO (IGNORE X)
 38 |   (CONS 'QUOTE
 39 |     (CONS X NIL)))
 40 | 
41 |

42 | 43 |

44 | If we then evaluate the expression 45 |

 46 | (IGNORE FOO)
 47 | 
48 | where FOO need not be bound, the body of IGNORE 49 | will first be evaluated with the argument X bound to the 50 | unevaluated symbol FOO. The result of evaluating 51 | the nested CONS expressions within this environment is: 52 |
 53 | (QUOTE . (FOO . NIL))
 54 | 
55 | which is of course equivalent to: 56 |
 57 | (QUOTE FOO)
 58 | 
59 | Finally, evaluating this value (which is the result of evaluating the 60 | macro body) gives us: 61 |
 62 | FOO
 63 | 
64 |

65 | 66 |

Implementation

67 | 68 |

69 | We will define a new type of atom: 70 |

 71 | AtomType_Macro
 72 | 
73 | the value of which is the same as AtomType_Closure. 74 |

75 | 76 |

77 | And now simply teach eval_expr about our new macro 78 | type. 79 |

80 | 81 |
 82 | int eval_expr(Atom expr, Atom env, Atom *result)
 83 | {
 84 | 	.
 85 | 	.
 86 | 	.
 87 | 	if (op.type == AtomType_Symbol) {
 88 | 		if (strcmp(op.value.symbol, "QUOTE") == 0) {
 89 | 		.
 90 | 		.
 91 | 		.
 92 | 		} else if (strcmp(op.value.symbol, "DEFMACRO") == 0) {
 93 | 			Atom name, macro;
 94 | 			Error err;
 95 | 
 96 | 			if (nilp(args) || nilp(cdr(args)))
 97 | 				return Error_Args;
 98 | 
 99 | 			if (car(args).type != AtomType_Pair)
100 | 				return Error_Syntax;
101 | 
102 | 			name = car(car(args));
103 | 			if (name.type != AtomType_Symbol)
104 | 				return Error_Type;
105 | 
106 | 			err = make_closure(env, cdr(car(args)),
107 | 				cdr(args), &macro);
108 | 			if (err)
109 | 				return err;
110 | 
111 | 			macro.type = AtomType_Macro;
112 | 			*result = name;
113 | 			return env_set(env, name, macro);
114 | 		}
115 | 	}
116 | 
117 | 	/* Evaluate operator */
118 | 	.
119 | 	.
120 | 	.
121 | 
122 | 	/* Is it a macro? */
123 | 	if (op.type == AtomType_Macro) {
124 | 		Atom expansion;
125 | 		op.type = AtomType_Closure;
126 | 		err = apply(op, args, &expansion);
127 | 		if (err)
128 | 			return err;
129 | 		return eval_expr(expansion, env, result);
130 | 	}
131 | 
132 | 	/* Evaulate arguments */
133 | 	.
134 | 	.
135 | 	.
136 | }
137 | 
138 | 139 |

Testing

140 | 141 |
142 | > (defmacro (ignore x) (cons 'quote (cons x nil)))
143 | IGNORE
144 | > (ignore foo)
145 | FOO
146 | > foo
147 | Symbol not bound
148 | 
149 | 150 |

151 | We will use macros in the future to define some new special forms. 152 |

153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /html/next.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 16: Where do we go from here? 6 | 7 | 8 | 9 |

Where do we go from here?

10 | 11 |

12 | The goal of this project was to demonstrate an easy implementation 13 | of LISP. There is not much point in optimizing or mindlessly 14 | implementing library functions — this work has already been 15 | done in other projects. 16 |

17 | 18 |

19 | Here are some possible extensions which might prove interesting: 20 |

    21 |
  • Expose continuations with call/cc
  • 22 |
  • Other numeric types (float, rational, bignum, complex)
  • 23 |
  • String, vector and boolean types
  • 24 |
  • I/O support
  • 25 |
  • Calls to system libraries
  • 26 |
  • JIT (integrate with LLVM, for example)
  • 27 |
  • Slab allocation
  • 28 |
  • Alternative GC
  • 29 |
30 |

31 | 32 |

33 | Now it's time to stop messing about in C and build something in LISP instead! 34 |

35 | 36 |

37 | That's all, folks. 38 |

39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /html/parser.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 3: Parser 6 | 7 | 8 | 9 |

Parser

10 | 11 |

12 | The next stage in our project is parsing: taking a line of text 13 | from the user (or elsewhere), and creating the data objects it represents. 14 | Naturally the user might type something which does not represent an 15 | object according to our definitions, in which case we must have some way 16 | to signal an error. 17 |

18 | 19 |

Errors

20 | 21 |

22 | Here is a definition of an Error type: 23 |

 24 | typedef enum {
 25 | 	Error_OK = 0,
 26 | 	Error_Syntax
 27 | } Error;
 28 | 
29 | If, like me, you learned to program in BASIC on microcomputers, you 30 | will be familiar with the dreaded SYNTAX ERROR. Now is our 31 | chance to see things from the other side of the fence. Most of our 32 | functions from now on will return an Error to indicate 33 | whether and how something went wrong. 34 |

35 | 36 |

Lexer

37 | 38 |

39 | I have no formal training in CS, but as far as I understand it the idea is 40 | to split a string up into tokens, which are both "words" and 41 | "punctuation", and discard any insignificant white space. So if the 42 | input is: 43 |

 44 | (foo bar)
 45 | 
46 | Then the four tokens are: 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
(foobar)
55 |

56 | 57 |

58 | So let's start by creating a lexer, which will return the start 59 | and end of the next token in a string. 60 |

61 | 62 |
 63 | int lex(const char *str, const char **start, const char **end)
 64 | {
 65 | 	const char *ws = " \t\n";
 66 | 	const char *delim = "() \t\n";
 67 | 	const char *prefix = "()";
 68 | 
 69 | 	str += strspn(str, ws);
 70 | 
 71 | 	if (str[0] == '\0') {
 72 | 		*start = *end = NULL;
 73 | 		return Error_Syntax;
 74 | 	}
 75 | 
 76 | 	*start = str;
 77 | 
 78 | 	if (strchr(prefix, str[0]) != NULL)
 79 | 		*end = str + 1;
 80 | 	else
 81 | 		*end = str + strcspn(str, delim);
 82 | 
 83 | 	return Error_OK;
 84 | }
 85 | 
86 | 87 |

88 | If our lexer hits the end of the string without finding a token (ie, 89 | the remainder of the string is entirely white space), then it will 90 | return a syntax error and set the start and end to NULL. 91 |

92 | 93 |

Parser

94 | 95 |

96 | Now we can think about the parser itself. The entry point 97 | is read_expr, which will read a single (possibly complex) 98 | object and return the error status and a pointer to the remainder of 99 | the input. 100 |

101 | int read_expr(const char *input, const char **end, Atom *result);
102 | 
103 |

104 | 105 |

106 | We will first deal with the simple data: integers, symbols and 107 | NIL. If you have a regex library available then this is 108 | easy, but it's not too bad in plain C either. 109 |

110 | 111 |
112 | int parse_simple(const char *start, const char *end, Atom *result)
113 | {
114 | 	char *buf, *p;
115 | 
116 | 	/* Is it an integer? */
117 | 	long val = strtol(start, &p, 10);
118 | 	if (p == end) {
119 | 		result->type = AtomType_Integer;
120 | 		result->value.integer = val;
121 | 		return Error_OK;
122 | 	}
123 | 
124 | 	/* NIL or symbol */
125 | 	buf = malloc(end - start + 1);
126 | 	p = buf;
127 | 	while (start != end)
128 | 		*p++ = toupper(*start), ++start;
129 | 	*p = '\0';
130 | 
131 | 	if (strcmp(buf, "NIL") == 0)
132 | 		*result = nil;
133 | 	else
134 | 		*result = make_sym(buf);
135 | 
136 | 	free(buf);
137 | 
138 | 	return Error_OK;
139 | }
140 | 
141 | 142 |

143 | Notice two things: first, we are converting the input to upper case. 144 | This isn't strictly necessary — there's nothing wrong with having 145 | a case-sensitive lisp — but it is the traditional behaviour. 146 | Secondly, NIL is a special case: it's parsed directly as 147 | AtomType_Nil, rather than leaving it as a symbol. 148 |

149 | 150 |

151 | If you're familiar with the various dialects of LISP then you will know 152 | that NIL is not necessarily the same as (), 153 | the empty list. We could choose to treat NIL as a 154 | symbol which evaluates to itself, but for this project we will consider 155 | both representations to be exactly the same. 156 |

157 | 158 |

159 | Next up are lists (including improper lists and pairs). The simplified 160 | list syntax makes this a little complicated, so we'll stick it all in a 161 | helper function. Once again recursion allows us to deal with nested 162 | lists. 163 |

164 | 165 |
166 | int read_list(const char *start, const char **end, Atom *result)
167 | {
168 | 	Atom p;
169 | 
170 | 	*end = start;
171 | 	p = *result = nil;
172 | 
173 | 	for (;;) {
174 | 		const char *token;
175 | 		Atom item;
176 | 		Error err;
177 | 
178 | 		err = lex(*end, &token, end);
179 | 		if (err)
180 | 			return err;
181 | 
182 | 		if (token[0] == ')')
183 | 			return Error_OK;
184 | 
185 | 		if (token[0] == '.' && *end - token == 1) {
186 | 			/* Improper list */
187 | 			if (nilp(p))
188 | 				return Error_Syntax;
189 | 
190 | 			err = read_expr(*end, end, &item);
191 | 			if (err)
192 | 				return err;
193 | 
194 | 			cdr(p) = item;
195 | 
196 | 			/* Read the closing ')' */
197 | 			err = lex(*end, &token, end);
198 | 			if (!err && token[0] != ')')
199 | 				err = Error_Syntax;
200 | 
201 | 			return err;
202 | 		}
203 | 
204 | 		err = read_expr(token, end, &item);
205 | 		if (err)
206 | 			return err;
207 | 
208 | 		if (nilp(p)) {
209 | 			/* First item */
210 | 			*result = cons(item, nil);
211 | 			p = *result;
212 | 		} else {
213 | 			cdr(p) = cons(item, nil);
214 | 			p = cdr(p);
215 | 		}
216 | 	}
217 | }
218 | 
219 | 220 |

221 | I dislike writing infinite loops, but this is the clearest layout I have 222 | come up with so far. Let me know if you can write a better one! 223 |

224 | 225 |

226 | Finally we have read_expr itself, which is very simple now 227 | that we have done all of the hard work: 228 |

229 | int read_expr(const char *input, const char **end, Atom *result)
230 | {
231 | 	const char *token;
232 | 	Error err;
233 | 
234 | 	err = lex(input, &token, end);
235 | 	if (err)
236 | 		return err;
237 | 
238 | 	if (token[0] == '(')
239 | 		return read_list(*end, end, result);
240 | 	else if (token[0] == ')')
241 | 		return Error_Syntax;
242 | 	else
243 | 		return parse_simple(token, *end, result);
244 | }
245 | 
246 | The check for a closing bracket will catch invalid forms such as 247 |
)
and
(X .)
248 |

249 | 250 |

Testing

251 | 252 |

253 | If we use the parser to create a simple read-print loop, then the we 254 | can type representations of objects on the console and check that they 255 | are parsed correctly. 256 |

257 | 258 |
259 | int main(int argc, char **argv)
260 | {
261 | 	char *input;
262 | 
263 | 	while ((input = readline("> ")) != NULL) {
264 | 		const char *p = input;
265 | 		Error err;
266 | 		Atom expr;
267 | 
268 | 		err = read_expr(p, &p, &expr);
269 | 
270 | 		switch (err) {
271 | 		case Error_OK:
272 | 			print_expr(expr);
273 | 			putchar('\n');
274 | 			break;
275 | 		case Error_Syntax:
276 | 			puts("Syntax error");
277 | 			break;
278 | 		}
279 | 
280 | 		free(input);
281 | 	}
282 | 
283 | 	return 0;
284 | }
285 | 
286 | 287 |

288 | This version uses the readline library, which shows a prompt 289 | and reads a line of text from the console. It supports editing beyond 290 | what a dumb terminal can provide, but a simple wrapper around 291 | fgets() will do just as well. 292 |

293 | 294 |
295 | > 42
296 | 42
297 | > (foo bar)
298 | (FOO BAR)
299 | > (s (t . u) v . (w . nil))
300 | (S (T . U) V W)
301 | > ()
302 | NIL
303 | 
304 | 305 |

306 | Looks good! Remember that () is exactly the same as 307 | NIL, and that (X Y) is just another way of 308 | writing (X . (Y . NIL)). 309 |

310 | 311 | 312 | 313 | 314 | -------------------------------------------------------------------------------- /html/quasiquotation.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 13: Quasiquotation 6 | 7 | 8 | 9 |

Quasiquotation

10 | 11 |

12 | QUASIQUOTE is an extension of the QUOTE 13 | special form which is convenient for writing macros. 14 |

15 | 16 |

17 | For symbols and other simple data, QUASIQUOTE behaves 18 | like QUOTE, returning the datum unevaluated. Lists 19 | are also return without being evaluated, with two exceptions. If 20 | an element of the list (or a sub-list) is of the form 21 | (UNQUOTE expr), then expr is 22 | evaluated and the result inserted into the list in place. 23 | (UNQUOTE-SPLICING expr) is similar, but the 24 | result of evaluating expr must be a list, the items 25 | of which are spliced into the parent list. 26 |

27 | 28 |

Example

29 | 30 |

31 |

 32 | (QUASIQUOTE (+ 1 (UNQUOTE (+ 2 3))))
 33 | 
34 | evaluates to 35 |
 36 | (+ 1 5)
 37 | 
38 |

39 | 40 |

41 | If we define L to be the list (3 4 5) 42 | then 43 |

 44 | (QUASIQUOTE (1 2 (UNQUOTE-SPLICING L)))
 45 | 
46 | evaluates to 47 |
 48 | (1 2 3 4 5)
 49 | 
50 |

51 | 52 |

Shorthand syntax

53 | 54 |

55 | Just like QUOTE, we will define the following 56 | abbreviations. 57 |

58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 |
AbbreviationEquivalent to
`expr(QUASIQUOTE expr)
,expr(UNQUOTE expr)
,@expr(UNQUOTE-SPLICING expr)
74 | 75 |

76 | Rewriting the examples above with this syntax gives 77 |

 78 | `(+ 1 ,(+ 2 3))
 79 | 
80 | and 81 |
 82 | `(1 2 ,@L)
 83 | 
84 |

85 | 86 |

Implementation

87 | 88 |

89 | We extend the lexer to understand the additional special tokens. 90 |

91 | 92 |
 93 | int lex(const char *str, const char **start, const char **end)
 94 | {
 95 | 	const char *ws = " \t\n";
 96 | 	const char *delim = "() \t\n";
 97 | 	const char *prefix = "()\'`";
 98 | 
 99 | 	str += strspn(str, ws);
100 | 
101 | 	if (str[0] == '\0') {
102 | 		*start = *end = NULL;
103 | 		return Error_Syntax;
104 | 	}
105 | 
106 | 	*start = str;
107 | 
108 | 	if (strchr(prefix, str[0]) != NULL)
109 | 		*end = str + 1;
110 | 	else if (str[0] == ',')
111 | 		*end = str + (str[1] == '@' ? 2 : 1);
112 | 	else
113 | 		*end = str + strcspn(str, delim);
114 | 
115 | 	return Error_OK;
116 | }
117 | 
118 | 119 |

120 | read_expr must expand the abbreviations in the same 121 | way as QUOTE 122 |

123 | 124 |
125 | int read_expr(const char *input, const char **end, Atom *result)
126 | {
127 | 	.
128 | 	.
129 | 	.
130 | 	if (token[0] == '(') {
131 | 	.
132 | 	.
133 | 	.
134 | 	} else if (token[0] == '`') {
135 | 		*result = cons(make_sym("QUASIQUOTE"), cons(nil, nil));
136 | 		return read_expr(*end, end, &car(cdr(*result)));
137 | 	} else if (token[0] == ',') {
138 | 		*result = cons(make_sym(
139 | 			token[1] == '@' ? "UNQUOTE-SPLICING" : "UNQUOTE"),
140 | 			cons(nil, nil));
141 | 		return read_expr(*end, end, &car(cdr(*result)));
142 | 	} else {
143 | 		.
144 | 		.
145 | 		.
146 | 	}
147 | }
148 | 
149 | 150 |

151 | The QUASIQUOTE operator itself may be defined as a macro. 152 | First we need a few helper functions. 153 |

154 | 155 |
156 | (define (append a b) (foldr cons b a))
157 | 
158 | (define (caar x) (car (car x)))
159 | 
160 | (define (cadr x) (car (cdr x)))
161 | 
162 | 163 |

164 | (append a b) concatenates the lists a 165 | and b. 166 |

167 | 168 |

169 | And now the macro itself: 170 |

171 | 172 |
173 | (defmacro (quasiquote x)
174 |   (if (pair? x)
175 |       (if (eq? (car x) 'unquote)
176 |           (cadr x)
177 |           (if (eq? (if (pair? (car x)) (caar x) nil) 'unquote-splicing)
178 |               (list 'append
179 |                     (cadr (car x))
180 |                     (list 'quasiquote (cdr x)))
181 |               (list 'cons
182 |                     (list 'quasiquote (car x))
183 |                     (list 'quasiquote (cdr x)))))
184 |       (list 'quote x)))
185 | 
186 | 187 |

188 | The definition above is a little hard to follow, since the 189 | resulting expression must be built up using LIST 190 | and may include additional calls to QUASIQUOTE. 191 |

192 | 193 |

194 | Quasiquotation allows us to make the body of a macro look like 195 | the expression it returns; for example the IGNORE 196 | macro in chapter 11 197 |

198 | (DEFMACRO (IGNORE X)
199 |   (CONS 'QUOTE (CONS X NIL)))
200 | 
201 | can now be written 202 |
203 | (DEFMACRO (IGNORE X)
204 |   `(QUOTE ,X))
205 | 
206 | and the operation is made clear. 207 |

208 | 209 |

Testing

210 | 211 |
212 | > `(+ 1 ,(+ 2 3))
213 | (+ 1 5)
214 | > (define l '(3 4 5))
215 | L
216 | > `(1 2 ,@l)
217 | (1 2 3 4 5)
218 | 
219 | 220 |

let

221 | 222 |

223 | We will now use QUASIQUOTE to define a new special 224 | form: 225 |

226 | (LET ((sym1 expr1)
227 |       (sym2 expr2)
228 |       ...)
229 |   body...)
230 | 
231 |

232 | 233 |

234 | LET causes the expressions expr to be evaluated 235 | with the symbols sym1, sym2... bound to the 236 | result of evaluating expr1, expr2 and so on. 237 | The result of the last expression body to be evaluated 238 | is returned. 239 |

240 | 241 |

242 | The definition is simple. 243 |

244 | 245 |
246 | (defmacro (let defs . body)
247 |   `((lambda ,(map car defs) ,@body)
248 |     ,@(map cadr defs)))
249 | 
250 | 251 |

Example

252 | 253 |

254 | When we evaluate the form 255 |

256 | (LET ((X 3) (Y 5)) (+ X Y))
257 | 
258 | it is transformed by the LET macro into 259 |
260 | ((LAMBDA (X Y) (+ X Y)) 3 5)
261 | 
262 | which behaves as desired. 263 |

264 | 265 |

Testing

266 | 267 |
268 | > (let ((x 3) (y 5)) (+ x y))
269 | 8
270 | > x
271 | Symbol not bound
272 | 
273 | 274 |

275 | The LET expression clarifies the programmer's 276 | intent to make temporary definitions. 277 |

278 | 279 |

A trick

280 | 281 |

282 | We can use LET to extend the built-in binary operator 283 | + to accept any number of arguments. 284 |

285 | 286 |
287 | (define +
288 |   (let ((old+ +))
289 |     (lambda xs (foldl old+ 0 xs))))
290 | 
291 | 292 |

293 | Compare this with the definition of ADD add the end 294 | of chapter 10. 295 |

296 | 297 |

Testing

298 | 299 |
300 | > (+ 1 2 3 4)
301 | 10
302 | 
303 | 304 |

305 | We didn't have to touch builtin_add or even recompile 306 | the interpreter. 307 |

308 | 309 | 310 | 311 | 312 | -------------------------------------------------------------------------------- /html/style.css: -------------------------------------------------------------------------------- 1 | pre 2 | { 3 | border-style: solid; 4 | padding: 1em; 5 | display: table; 6 | } 7 | 8 | pre.c 9 | { 10 | background-color: AliceBlue 11 | } 12 | 13 | pre.lisp 14 | { 15 | background-color: Beige 16 | } 17 | 18 | -------------------------------------------------------------------------------- /html/sugar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 9: Syntactic sugar 6 | 7 | 8 | 9 |

Syntactic sugar

10 | 11 |

12 | We will define some additional syntax to facilitate entry of some common 13 | expressions. Recall that we already allow the user to enter 14 |

 15 | (A B C)
 16 | 
17 | instead of 18 |
 19 | (A . (B . (C . NIL)))
 20 | 
21 |

22 | 23 |

Quoting

24 | 25 |

26 | In order to include a literal symbol or list in an expression, we need 27 | to use the QUOTE operator. As a shortcut, we will 28 | define 29 |

 30 | 'EXPR
 31 | 
32 | to be equivalent to 33 |
 34 | (QUOTE EXPR)
 35 | 
36 |

37 | 38 |

39 | So for example the following forms are equivalent: 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
Abbreviation Canonical form Evaluates to
'FOO(QUOTE FOO)FOO
'(+ 1 2)(QUOTE (+ 1 2))(+ 1 2)
'(A . B)(QUOTE (A . B))(A . B)
59 |

60 | 61 |

62 | The lexer needs to know that the quote mark is a prefix (i.e., it can 63 | appear immediately before another token but is not necessarily a 64 | delimeter). 65 |

 66 | int lex(const char *str, const char **start, const char **end)
 67 | {
 68 | 	const char *ws = " \t\n";
 69 | 	const char *delim = "() \t\n";
 70 | 	const char *prefix = "()\'";
 71 | 	.
 72 | 	.
 73 | 	.
 74 | }
 75 | 
76 |

77 | 78 |

79 | Also read_expr must convert it to the correct list 80 | expresssion. 81 |

82 | 83 |
 84 | int read_expr(const char *input, const char **end, Atom *result)
 85 | {
 86 | 	const char *token;
 87 | 	Error err;
 88 | 
 89 | 	err = lex(input, &token, end);
 90 | 	if (err)
 91 | 		return err;
 92 | 
 93 | 	if (token[0] == '(') {
 94 | 		return read_list(*end, end, result);
 95 | 	} else if (token[0] == ')') {
 96 | 		return Error_Syntax;
 97 | 	} else if (token[0] == '\'') {
 98 | 		*result = cons(make_sym("QUOTE"), cons(nil, nil));
 99 | 		return read_expr(*end, end, &car(cdr(*result)));
100 | 	} else {
101 | 		return parse_simple(token, *end, result);
102 | 	}
103 | }
104 | 
105 | 106 |

Testing

107 | 108 |
109 | > (define x '(a b c))
110 | X
111 | > x
112 | (A B C)
113 | > 'x
114 | X
115 | > (define foo 'bar)
116 | FOO
117 | > foo
118 | BAR
119 | > ''()
120 | (QUOTE NIL)
121 | 
122 | 123 |

Function definitions

124 | 125 |

126 | It is cumbersome to have to type a lambda expression every time we wish 127 | to define a function, so we will modify the DEFINE operator 128 | to avoid this. 129 |

130 | (DEFINE (name args...) body...)
131 | 
132 | is equivalent to 133 |
134 | (DEFINE name (LAMBDA (args...) body...))
135 | 
136 |

137 | 138 |

139 | Here's how: 140 |

141 | 142 |
143 | int eval_expr(Atom expr, Atom env, Atom *result)
144 | {
145 | 	.
146 | 	.
147 | 	.
148 | 	if (op.type == AtomType_Symbol) {
149 | 		if (strcmp(op.value.symbol, "QUOTE") == 0) {
150 | 		.
151 | 		.
152 | 		.
153 | 		} else if (strcmp(op.value.symbol, "DEFINE") == 0) {
154 | 			Atom sym, val;
155 | 
156 | 			if (nilp(args) || nilp(cdr(args)))
157 | 				return Error_Args;
158 | 
159 | 			sym = car(args);
160 | 			if (sym.type == AtomType_Pair) {
161 | 				err = make_closure(env, cdr(sym), cdr(args), &val);
162 | 				sym = car(sym);
163 | 				if (sym.type != AtomType_Symbol)
164 | 					return Error_Type;
165 | 			} else if (sym.type == AtomType_Symbol) {
166 | 				if (!nilp(cdr(cdr(args))))
167 | 					return Error_Args;
168 | 				err = eval_expr(car(cdr(args)), env, &val);
169 | 			} else {
170 | 				return Error_Type;
171 | 			}
172 | 
173 | 			if (err)
174 | 				return err;
175 | 
176 | 			*result = sym;
177 | 			return env_set(env, sym, val);
178 | 		} else if (strcmp(op.value.symbol, "LAMBDA") == 0) {
179 | 		.
180 | 		.
181 | 		.
182 | 		}
183 | 	}
184 | 	.
185 | 	.
186 | 	.
187 | }
188 | 
189 | 190 |

Testing

191 | 192 |
193 | > (define (square x) (* x x))
194 | SQUARE
195 | > (square 3)
196 | 9
197 | 
198 | 199 |

200 | Sweet! 201 |

202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /html/variadics.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chapter 10: Variadic functions 6 | 7 | 8 | 9 |

Variadic functions

10 | 11 |

12 | Up till now all functions have had a specified number of named arguments. 13 | We will now introduce a syntax for defining variadic functions, 14 | which may take a fixed number of named arguments and a variable number 15 | of additional arguments which are collected into a named list. 16 |

17 | 18 |

19 | The argument declarations for variadic functions are improper lists: 20 |

21 | 22 | 23 | 24 | 25 | 26 | 29 | 32 | 33 | 34 | 35 | 38 | 41 | 42 | 43 | 44 | 47 | 50 | 51 | 52 | 53 | 56 | 59 | 60 |
λ-syntaxCombined DEFINE
3 args 27 | (LAMBDA (arg1 arg2 arg3) body...) 28 | 30 | (DEFINE (name arg1 arg2 arg3) body...) 31 |
≥2 args 36 | (LAMBDA (arg1 arg2 . rest) body...) 37 | 39 | (DEFINE (name arg1 arg2 . rest) body...) 40 |
≥1 args 45 | (LAMBDA (arg1 . rest) body...) 46 | 48 | (DEFINE (name arg1 . rest) body...) 49 |
≥0 args 54 | (LAMBDA args body...) 55 | 57 | (DEFINE (name . args) body...) 58 |
61 | 62 |

63 | In the definitions above, the parameters are bound as follows: 64 |

65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 |
Definition(f 1 2 3)
Value of aValue of bValue of c
(DEFINE (f a b c) body...)123
(DEFINE (f a b . c) body...)12(3)
(DEFINE (f a . b) body...)1(2 3)
(DEFINE (f . a) body...)(1 2 3)
98 | 99 |

Implementation

100 | 101 |

102 | All that is required is a small modification to 103 | make_closure to accept the declaration: 104 |

105 | int make_closure(Atom env, Atom args, Atom body, Atom *result)
106 | {
107 | 	Atom p;
108 | 
109 | 	if (!listp(body))
110 | 		return Error_Syntax;
111 | 
112 | 	/* Check argument names are all symbols */
113 | 	p = args;
114 | 	while (!nilp(p)) {
115 | 		if (p.type == AtomType_Symbol)
116 | 			break;
117 | 		else if (p.type != AtomType_Pair
118 | 				|| car(p).type != AtomType_Symbol)
119 | 			return Error_Type;
120 | 		p = cdr(p);
121 | 	}
122 | 
123 | 	*result = cons(env, cons(args, body));
124 | 	result->type = AtomType_Closure;
125 | 
126 | 	return Error_OK;
127 | }
128 | 
129 |

130 | 131 |

132 | And another to apply to bind the additional arguments 133 | into a list: 134 |

135 | int apply(Atom fn, Atom args, Atom *result)
136 | {
137 | 	.
138 | 	.
139 | 	.
140 | 	/* Bind the arguments */
141 | 	while (!nilp(arg_names)) {
142 | 		if (arg_names.type == AtomType_Symbol) {
143 | 			env_set(env, arg_names, args);
144 | 			args = nil;
145 | 			break;
146 | 		}
147 | 
148 | 		if (nilp(args))
149 | 			return Error_Args;
150 | 		env_set(env, car(arg_names), car(args));
151 | 		arg_names = cdr(arg_names);
152 | 		args = cdr(args);
153 | 	}
154 | 	if (!nilp(args))
155 | 		return Error_Args;
156 | 	.
157 | 	.
158 | 	.
159 | }
160 | 
161 |

162 | 163 |

Testing

164 | 165 |

166 | A boring example: 167 |

168 | 169 |
170 | > ((lambda (a . b) a) 1 2 3)
171 | 1
172 | > ((lambda (a . b) b) 1 2 3)
173 | (2 3)
174 | > ((lambda args args) 1 2 3)
175 | (1 2 3)
176 | 
177 | 178 |

179 | We can also create a variadic adder: 180 |

181 | 182 |
183 | > (define (sum-list xs)
184 |     (if xs
185 |         (+ (car xs) (sum-list (cdr xs)))
186 |         0))
187 | SUM-LIST
188 | > (sum-list '(1 2 3))
189 | 6
190 | > (define (add . xs) (sum-list xs))
191 | ADD
192 | > (add 1 2 3)
193 | 6
194 | > (add 1 (- 4 2) (/ 9 3))
195 | 6
196 | 
197 | 198 |

199 | Since you can always pass a list to a regular function, this is 200 | really just another kind of syntactic sugar. 201 |

202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /library.lisp: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Functions used in macro definitions 3 | ;; 4 | 5 | (define (append a b) (foldr cons b a)) 6 | 7 | (define (caar x) (car (car x))) 8 | (define (cadr x) (car (cdr x))) 9 | (define (cdar x) (cdr (car x))) 10 | (define (cddr x) (cdr (cdr x))) 11 | 12 | (define (foldl proc init list) 13 | (if list 14 | (foldl proc 15 | (proc init (car list)) 16 | (cdr list)) 17 | init)) 18 | 19 | (define (foldr proc init list) 20 | (if list 21 | (proc (car list) 22 | (foldr proc init (cdr list))) 23 | init)) 24 | 25 | (define (list . items) 26 | (foldr cons nil items)) 27 | 28 | (define (unary-map proc list) 29 | (foldr (lambda (x rest) (cons (proc x) rest)) 30 | nil 31 | list)) 32 | 33 | (define (map proc . arg-lists) 34 | (if (car arg-lists) 35 | (cons (apply proc (unary-map car arg-lists)) 36 | (apply map (cons proc 37 | (unary-map cdr arg-lists)))) 38 | nil)) 39 | 40 | ;; 41 | ;; Quasiquote 42 | ;; 43 | 44 | (defmacro (quasiquote x) 45 | (if (pair? x) 46 | (if (eq? (car x) 'unquote) 47 | (cadr x) 48 | (if (eq? (if (pair? (car x)) (caar x) nil) 'unquote-splicing) 49 | (list 'append 50 | (cadr (car x)) 51 | (list 'quasiquote (cdr x))) 52 | (list 'cons 53 | (list 'quasiquote (car x)) 54 | (list 'quasiquote (cdr x))))) 55 | (list 'quote x))) 56 | 57 | ;; 58 | ;; Macros 59 | ;; 60 | 61 | (defmacro (and . terms) 62 | (if terms 63 | `(if ,(car terms) 64 | (and ,@(cdr terms)) 65 | nil) 66 | t)) 67 | 68 | (defmacro (begin . body) 69 | `((lambda () ,@body))) 70 | 71 | (defmacro (cond . clauses) 72 | (if clauses 73 | (let ((test (caar clauses)) 74 | (body (cdar clauses))) 75 | `(if ,test 76 | (begin ,@body) 77 | (cond ,@(cdr clauses)))) 78 | nil)) 79 | 80 | (defmacro (let defs . body) 81 | `((lambda ,(map car defs) ,@body) 82 | ,@(map cadr defs))) 83 | 84 | (defmacro (or . terms) 85 | (if terms 86 | `(if ,(car terms) 87 | t 88 | (or ,@(cdr terms))) 89 | nil)) 90 | 91 | (defmacro (unless test . body) 92 | `(when (not ,test) ,@body)) 93 | 94 | (defmacro (when test . body) 95 | `(if ,test (begin ,@body) nil)) 96 | 97 | ;; 98 | ;; Numeric functions 99 | ;; 100 | 101 | (define + 102 | (let ((old+ +)) 103 | (lambda xs (foldl old+ 0 xs)))) 104 | 105 | (define - 106 | (let ((old- -)) 107 | (lambda (x . xs) 108 | (if xs 109 | (foldl old- x xs) 110 | (old- 0 x))))) 111 | 112 | (define * 113 | (let ((old* *)) 114 | (lambda xs (foldl old* 1 xs)))) 115 | 116 | (define / 117 | (let ((old/ /)) 118 | (lambda (x . xs) 119 | (if xs 120 | (foldl old/ x xs) 121 | (old/ 1 x))))) 122 | 123 | (define (<= a b) (or (= a b) (< a b))) 124 | (define (> a b) (< b a)) 125 | (define (>= a b) (<= b a)) 126 | 127 | (define (abs x) (if (negative? x) (- x) x)) 128 | 129 | (define (even? x) (= (modulo x 2) 0)) 130 | 131 | (define (gcd . xs) 132 | (define (gcd-inner a b) 133 | (if (zero? b) a (gcd-inner b (remainder a b)))) 134 | (abs (foldl gcd-inner 0 xs))) 135 | 136 | (define (lcm . xs) 137 | (if xs 138 | (/ (abs (apply * xs)) 139 | (apply gcd xs)) 140 | 1)) 141 | 142 | (define (max x . xs) 143 | (foldl (lambda (a b) (if (> a b) a b)) x xs)) 144 | 145 | (define (min x . xs) 146 | (foldl (lambda (a b) (if (< a b) a b)) x xs)) 147 | 148 | (define (negative? x) (< x 0)) 149 | 150 | (define (odd? x) (= (modulo x 2) 1)) 151 | 152 | (define (positive? x) (> x 0)) 153 | 154 | (define (quotient a b) (/ a b)) 155 | 156 | (define (remainder a b) (- a (* b (quotient a b)))) 157 | 158 | (define (zero? x) (= x 0)) 159 | 160 | ;; TODO: modulo 161 | 162 | 163 | ;; 164 | ;; List functions 165 | ;; 166 | 167 | (define (for-each proc . arg-lists) 168 | (when (car arg-lists) 169 | (apply proc (map car arg-lists)) 170 | (apply for-each 171 | (append (list proc) 172 | (map cdr arg-lists))))) 173 | 174 | (define (length list) 175 | (foldl (lambda (count x) (+ count 1)) 0 list)) 176 | 177 | (define (list-ref x k) (car (list-tail x k))) 178 | 179 | (define (list-tail x k) 180 | (if (zero? k) 181 | x 182 | (list-tail (cdr x) (- k 1)))) 183 | 184 | (define (reverse list) 185 | (foldl (lambda (a x) (cons x a)) nil list)) 186 | 187 | ;; 188 | ;; Other functions 189 | ;; 190 | 191 | (define (list? x) 192 | (or (null? x) 193 | (and (pair? x) 194 | (list? (cdr x))))) 195 | 196 | (define (not x) (if x nil t)) 197 | 198 | (define (null? x) (not x)) 199 | -------------------------------------------------------------------------------- /lisp.h: -------------------------------------------------------------------------------- 1 | typedef enum { 2 | Error_OK = 0, 3 | Error_Syntax, 4 | Error_Unbound, 5 | Error_Args, 6 | Error_Type 7 | } Error; 8 | 9 | struct Atom; 10 | 11 | typedef int (*Builtin)(struct Atom args, struct Atom *result); 12 | 13 | struct Atom { 14 | enum { 15 | AtomType_Nil, 16 | AtomType_Pair, 17 | AtomType_Symbol, 18 | AtomType_Integer, 19 | AtomType_Builtin, 20 | AtomType_Closure, 21 | AtomType_Macro 22 | } type; 23 | 24 | union { 25 | struct Pair *pair; 26 | const char *symbol; 27 | long integer; 28 | Builtin builtin; 29 | } value; 30 | }; 31 | 32 | struct Pair { 33 | struct Atom atom[2]; 34 | }; 35 | 36 | typedef struct Atom Atom; 37 | 38 | #define car(p) ((p).value.pair->atom[0]) 39 | #define cdr(p) ((p).value.pair->atom[1]) 40 | #define nilp(atom) ((atom).type == AtomType_Nil) 41 | 42 | static const Atom nil = { AtomType_Nil }; 43 | 44 | /* READER */ 45 | 46 | int read_expr(const char *input, const char **end, Atom *result); 47 | 48 | /* PRINTER */ 49 | 50 | void print_expr(Atom atom); 51 | 52 | /* EVALUATOR */ 53 | 54 | Atom env_create(Atom parent); 55 | int env_define(Atom env, Atom symbol, Atom value); 56 | int env_get(Atom env, Atom symbol, Atom *result); 57 | int env_set(Atom env, Atom symbol, Atom value); 58 | int eval_expr(Atom expr, Atom env, Atom *result); 59 | 60 | /* DATA */ 61 | 62 | Atom cons(Atom car_val, Atom cdr_val); 63 | Atom make_int(long x); 64 | Atom make_sym(const char *s); 65 | Atom make_builtin(Builtin fn); 66 | int listp(Atom expr); 67 | Atom copy_list(Atom list); 68 | Atom list_create(int n, ...); 69 | Atom list_get(Atom list, int k); 70 | void list_set(Atom list, int k, Atom value); 71 | void list_reverse(Atom *list); 72 | void gc_mark(Atom root); 73 | void gc(); 74 | 75 | /* BUILTINS */ 76 | 77 | int builtin_car(Atom args, Atom *result); 78 | int builtin_cdr(Atom args, Atom *result); 79 | int builtin_cons(Atom args, Atom *result); 80 | int builtin_eq(Atom args, Atom *result); 81 | int builtin_pairp(Atom args, Atom *result); 82 | int builtin_procp(Atom args, Atom *result); 83 | int builtin_add(Atom args, Atom *result); 84 | int builtin_subtract(Atom args, Atom *result); 85 | int builtin_multiply(Atom args, Atom *result); 86 | int builtin_divide(Atom args, Atom *result); 87 | int builtin_numeq(Atom args, Atom *result); 88 | int builtin_less(Atom args, Atom *result); 89 | 90 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | #include 3 | #include 4 | #include 5 | 6 | char *slurp(const char *path) 7 | { 8 | FILE *file; 9 | char *buf; 10 | long len; 11 | 12 | file = fopen(path, "r"); 13 | if (!file) 14 | return NULL; 15 | fseek(file, 0, SEEK_END); 16 | len = ftell(file); 17 | fseek(file, 0, SEEK_SET); 18 | 19 | buf = malloc(len + 1); 20 | if (!buf) 21 | return NULL; 22 | 23 | fread(buf, 1, len, file); 24 | buf[len] = 0; 25 | fclose(file); 26 | 27 | return buf; 28 | } 29 | 30 | void load_file(Atom env, const char *path) 31 | { 32 | char *text; 33 | 34 | printf("Reading %s...\n", path); 35 | text = slurp(path); 36 | if (text) { 37 | const char *p = text; 38 | Atom expr; 39 | while (read_expr(p, &p, &expr) == Error_OK) { 40 | Atom result; 41 | Error err = eval_expr(expr, env, &result); 42 | if (err) { 43 | printf("Error in expression:\n\t"); 44 | print_expr(expr); 45 | putchar('\n'); 46 | } else { 47 | print_expr(result); 48 | putchar('\n'); 49 | } 50 | } 51 | free(text); 52 | } 53 | } 54 | 55 | int main(int argc, char **argv) 56 | { 57 | Atom env; 58 | char *input; 59 | 60 | env = env_create(nil); 61 | 62 | /* Set up the initial environment */ 63 | env_define(env, make_sym("CAR"), make_builtin(builtin_car)); 64 | env_define(env, make_sym("CDR"), make_builtin(builtin_cdr)); 65 | env_define(env, make_sym("CONS"), make_builtin(builtin_cons)); 66 | env_define(env, make_sym("+"), make_builtin(builtin_add)); 67 | env_define(env, make_sym("-"), make_builtin(builtin_subtract)); 68 | env_define(env, make_sym("*"), make_builtin(builtin_multiply)); 69 | env_define(env, make_sym("/"), make_builtin(builtin_divide)); 70 | env_define(env, make_sym("T"), make_sym("T")); 71 | env_define(env, make_sym("="), make_builtin(builtin_numeq)); 72 | env_define(env, make_sym("<"), make_builtin(builtin_less)); 73 | env_define(env, make_sym("EQ?"), make_builtin(builtin_eq)); 74 | env_define(env, make_sym("PAIR?"), make_builtin(builtin_pairp)); 75 | env_define(env, make_sym("PROCEDURE?"), make_builtin(builtin_procp)); 76 | 77 | load_file(env, "library.lisp"); 78 | 79 | /* Main loop */ 80 | while ((input = readline("> ")) != NULL) { 81 | const char *p = input; 82 | Error err; 83 | Atom expr, result; 84 | 85 | err = read_expr(p, &p, &expr); 86 | 87 | if (!err) 88 | err = eval_expr(expr, env, &result); 89 | 90 | switch (err) { 91 | case Error_OK: 92 | print_expr(result); 93 | putchar('\n'); 94 | break; 95 | case Error_Syntax: 96 | puts("Syntax error"); 97 | break; 98 | case Error_Unbound: 99 | puts("Symbol not bound"); 100 | break; 101 | case Error_Args: 102 | puts("Wrong number of arguments"); 103 | break; 104 | case Error_Type: 105 | puts("Wrong type"); 106 | break; 107 | } 108 | 109 | free(input); 110 | } 111 | 112 | return 0; 113 | } 114 | 115 | -------------------------------------------------------------------------------- /print.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | #include 3 | 4 | void print_expr(Atom atom) 5 | { 6 | switch (atom.type) { 7 | case AtomType_Nil: 8 | printf("NIL"); 9 | break; 10 | case AtomType_Pair: 11 | putchar('('); 12 | print_expr(car(atom)); 13 | atom = cdr(atom); 14 | while (!nilp(atom)) { 15 | if (atom.type == AtomType_Pair) { 16 | putchar(' '); 17 | print_expr(car(atom)); 18 | atom = cdr(atom); 19 | } else { 20 | printf(" . "); 21 | print_expr(atom); 22 | break; 23 | } 24 | } 25 | putchar(')'); 26 | break; 27 | case AtomType_Symbol: 28 | printf("%s", atom.value.symbol); 29 | break; 30 | case AtomType_Integer: 31 | printf("%ld", atom.value.integer); 32 | break; 33 | case AtomType_Builtin: 34 | printf("#", atom.value.builtin); 35 | break; 36 | case AtomType_Closure: 37 | printf("#", atom.value.pair); 38 | break; 39 | case AtomType_Macro: 40 | printf("#", atom.value.pair); 41 | break; 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /read.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | #include 3 | #include 4 | #include 5 | 6 | int lex(const char *str, const char **start, const char **end) 7 | { 8 | const char *ws = " \t\n"; 9 | const char *delim = "(); \t\n"; 10 | const char *prefix = "()\'`"; 11 | 12 | str += strspn(str, ws); 13 | 14 | if (str[0] == '\0') { 15 | *start = *end = NULL; 16 | return Error_Syntax; 17 | } 18 | 19 | *start = str; 20 | 21 | if (strchr(prefix, str[0]) != NULL) 22 | *end = str + 1; 23 | else if (str[0] == ',') 24 | *end = str + (str[1] == '@' ? 2 : 1); 25 | else if (str[0] == ';') { 26 | str = strchr(str, '\n'); 27 | if (!str) { 28 | *start = *end = NULL; 29 | return Error_Syntax; 30 | } 31 | return lex(str, start, end); 32 | } else 33 | *end = str + strcspn(str, delim); 34 | 35 | return Error_OK; 36 | } 37 | 38 | int parse_simple(const char *start, const char *end, Atom *result) 39 | { 40 | char *buf, *p; 41 | 42 | /* Is it an integer? */ 43 | long val = strtol(start, &p, 10); 44 | if (p == end) { 45 | result->type = AtomType_Integer; 46 | result->value.integer = val; 47 | return Error_OK; 48 | } 49 | 50 | /* NIL or symbol */ 51 | buf = malloc(end - start + 1); 52 | p = buf; 53 | while (start != end) 54 | *p++ = toupper(*start), ++start; 55 | *p = '\0'; 56 | 57 | if (strcmp(buf, "NIL") == 0) 58 | *result = nil; 59 | else 60 | *result = make_sym(buf); 61 | 62 | free(buf); 63 | 64 | return Error_OK; 65 | } 66 | 67 | int read_list(const char *start, const char **end, Atom *result) 68 | { 69 | Atom p; 70 | 71 | *end = start; 72 | p = *result = nil; 73 | 74 | for (;;) { 75 | const char *token; 76 | Atom item; 77 | Error err; 78 | 79 | err = lex(*end, &token, end); 80 | if (err) 81 | return err; 82 | 83 | if (token[0] == ')') 84 | return Error_OK; 85 | 86 | if (token[0] == '.' && *end - token == 1) { 87 | /* Improper list */ 88 | if (nilp(p)) 89 | return Error_Syntax; 90 | 91 | err = read_expr(*end, end, &item); 92 | if (err) 93 | return err; 94 | 95 | cdr(p) = item; 96 | 97 | /* Read the closing ')' */ 98 | err = lex(*end, &token, end); 99 | if (!err && token[0] != ')') 100 | err = Error_Syntax; 101 | 102 | return err; 103 | } 104 | 105 | err = read_expr(token, end, &item); 106 | if (err) 107 | return err; 108 | 109 | if (nilp(p)) { 110 | /* First item */ 111 | *result = cons(item, nil); 112 | p = *result; 113 | } else { 114 | cdr(p) = cons(item, nil); 115 | p = cdr(p); 116 | } 117 | } 118 | } 119 | 120 | int read_expr(const char *input, const char **end, Atom *result) 121 | { 122 | const char *token; 123 | Error err; 124 | 125 | err = lex(input, &token, end); 126 | if (err) 127 | return err; 128 | 129 | if (token[0] == '(') { 130 | return read_list(*end, end, result); 131 | } else if (token[0] == ')') { 132 | return Error_Syntax; 133 | } else if (token[0] == '\'') { 134 | *result = cons(make_sym("QUOTE"), cons(nil, nil)); 135 | return read_expr(*end, end, &car(cdr(*result))); 136 | } else if (token[0] == '`') { 137 | *result = cons(make_sym("QUASIQUOTE"), cons(nil, nil)); 138 | return read_expr(*end, end, &car(cdr(*result))); 139 | } else if (token[0] == ',') { 140 | *result = cons(make_sym( 141 | token[1] == '@' ? "UNQUOTE-SPLICING" : "UNQUOTE"), 142 | cons(nil, nil)); 143 | return read_expr(*end, end, &car(cdr(*result))); 144 | } else { 145 | return parse_simple(token, *end, result); 146 | } 147 | } 148 | 149 | --------------------------------------------------------------------------------