├── Makefile
├── builtin.c
├── data.c
├── eval.c
├── html
├── arithmetic.html
├── booleans.html
├── builtins.html
├── continuations.html
├── data.html
├── expressions.html
├── gc.html
├── index.html
├── intro.html
├── lambda.html
├── library.html
├── macros.html
├── next.html
├── parser.html
├── quasiquotation.html
├── style.css
├── sugar.html
└── variadics.html
├── library.lisp
├── lisp.h
├── main.c
├── print.c
└── read.c
/Makefile:
--------------------------------------------------------------------------------
1 | sources=$(wildcard *.c)
2 |
3 | CFLAGS=-Wall -O0 -g --std=c99 -D_GNU_SOURCE
4 | LDFLAGS=-lreadline
5 |
6 | objects=$(sources:.c=.o)
7 |
8 | lisp: $(objects)
9 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
10 |
11 | $(objects): $(wildcard *.h)
12 |
13 | .PHONY: clean
14 | clean:
15 | $(RM) *.o lisp
16 |
17 |
--------------------------------------------------------------------------------
/builtin.c:
--------------------------------------------------------------------------------
1 | #include "lisp.h"
2 |
3 | int builtin_car(Atom args, Atom *result)
4 | {
5 | if (nilp(args) || !nilp(cdr(args)))
6 | return Error_Args;
7 |
8 | if (nilp(car(args)))
9 | *result = nil;
10 | else if (car(args).type != AtomType_Pair)
11 | return Error_Type;
12 | else
13 | *result = car(car(args));
14 |
15 | return Error_OK;
16 | }
17 |
18 | int builtin_cdr(Atom args, Atom *result)
19 | {
20 | if (nilp(args) || !nilp(cdr(args)))
21 | return Error_Args;
22 |
23 | if (nilp(car(args)))
24 | *result = nil;
25 | else if (car(args).type != AtomType_Pair)
26 | return Error_Type;
27 | else
28 | *result = cdr(car(args));
29 |
30 | return Error_OK;
31 | }
32 |
33 | int builtin_cons(Atom args, Atom *result)
34 | {
35 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
36 | return Error_Args;
37 |
38 | *result = cons(car(args), car(cdr(args)));
39 |
40 | return Error_OK;
41 | }
42 |
43 | int builtin_eq(Atom args, Atom *result)
44 | {
45 | Atom a, b;
46 | int eq;
47 |
48 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
49 | return Error_Args;
50 |
51 | a = car(args);
52 | b = car(cdr(args));
53 |
54 | if (a.type == b.type) {
55 | switch (a.type) {
56 | case AtomType_Nil:
57 | eq = 1;
58 | break;
59 | case AtomType_Pair:
60 | case AtomType_Closure:
61 | case AtomType_Macro:
62 | eq = (a.value.pair == b.value.pair);
63 | break;
64 | case AtomType_Symbol:
65 | eq = (a.value.symbol == b.value.symbol);
66 | break;
67 | case AtomType_Integer:
68 | eq = (a.value.integer == b.value.integer);
69 | break;
70 | case AtomType_Builtin:
71 | eq = (a.value.builtin == b.value.builtin);
72 | break;
73 | }
74 | } else {
75 | eq = 0;
76 | }
77 |
78 | *result = eq ? make_sym("T") : nil;
79 | return Error_OK;
80 | }
81 |
82 | int builtin_pairp(Atom args, Atom *result)
83 | {
84 | if (nilp(args) || !nilp(cdr(args)))
85 | return Error_Args;
86 |
87 | *result = (car(args).type == AtomType_Pair) ? make_sym("T") : nil;
88 | return Error_OK;
89 | }
90 |
91 | int builtin_procp(Atom args, Atom *result)
92 | {
93 | if (nilp(args) || !nilp(cdr(args)))
94 | return Error_Args;
95 |
96 | *result = (car(args).type == AtomType_Builtin
97 | || car(args).type == AtomType_Closure) ? make_sym("T") : nil;
98 | return Error_OK;
99 | }
100 |
101 | int builtin_add(Atom args, Atom *result)
102 | {
103 | Atom a, b;
104 |
105 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
106 | return Error_Args;
107 |
108 | a = car(args);
109 | b = car(cdr(args));
110 |
111 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
112 | return Error_Type;
113 |
114 | *result = make_int(a.value.integer + b.value.integer);
115 |
116 | return Error_OK;
117 | }
118 |
119 | int builtin_subtract(Atom args, Atom *result)
120 | {
121 | Atom a, b;
122 |
123 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
124 | return Error_Args;
125 |
126 | a = car(args);
127 | b = car(cdr(args));
128 |
129 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
130 | return Error_Type;
131 |
132 | *result = make_int(a.value.integer - b.value.integer);
133 |
134 | return Error_OK;
135 | }
136 |
137 | int builtin_multiply(Atom args, Atom *result)
138 | {
139 | Atom a, b;
140 |
141 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
142 | return Error_Args;
143 |
144 | a = car(args);
145 | b = car(cdr(args));
146 |
147 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
148 | return Error_Type;
149 |
150 | *result = make_int(a.value.integer * b.value.integer);
151 |
152 | return Error_OK;
153 | }
154 |
155 | int builtin_divide(Atom args, Atom *result)
156 | {
157 | Atom a, b;
158 |
159 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
160 | return Error_Args;
161 |
162 | a = car(args);
163 | b = car(cdr(args));
164 |
165 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
166 | return Error_Type;
167 |
168 | *result = make_int(a.value.integer / b.value.integer);
169 |
170 | return Error_OK;
171 | }
172 |
173 | int builtin_numeq(Atom args, Atom *result)
174 | {
175 | Atom a, b;
176 |
177 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
178 | return Error_Args;
179 |
180 | a = car(args);
181 | b = car(cdr(args));
182 |
183 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
184 | return Error_Type;
185 |
186 | *result = (a.value.integer == b.value.integer) ? make_sym("T") : nil;
187 |
188 | return Error_OK;
189 | }
190 |
191 | int builtin_less(Atom args, Atom *result)
192 | {
193 | Atom a, b;
194 |
195 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
196 | return Error_Args;
197 |
198 | a = car(args);
199 | b = car(cdr(args));
200 |
201 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
202 | return Error_Type;
203 |
204 | *result = (a.value.integer < b.value.integer) ? make_sym("T") : nil;
205 |
206 | return Error_OK;
207 | }
208 |
209 |
--------------------------------------------------------------------------------
/data.c:
--------------------------------------------------------------------------------
1 | #include "lisp.h"
2 | #include
12 | So far all we've been able to do is create and name objects. Some of
13 | those objects have been numbers — naturally we would like to do
14 | calculations with those numbers.
15 |
18 | In the last chapter we saw how to create built-in functions to tell
19 |
41 | In the definitions above, when we write "the sum of
52 | Once again almost all of our function consists of checking that the
53 | correct arguments were supplied. Finally the result is constructed by
54 | the call to
78 | The other three functions differ by only one character, so I will omit
79 | them here.
80 |
83 | Finally we need to create bindings for our new functions in the initial
84 | environment:
85 | Arithmetic
10 |
11 | eval_expr
how to process arguments into a return value.
20 | We will now create four more builtins to perform the basic arithmetic
21 | operations.
22 |
25 |
39 |
40 |
26 | Expression Result
29 |
27 | (+ X Y)
The sum of X
and Y
28 |
32 |
30 | (- X Y)
The difference of X
and Y
31 |
35 |
33 | (* X Y)
The product of X
and Y
34 |
38 |
36 | (/ X Y)
The quotient of X
and Y
37 | X
and
42 | Y
", what we really mean is "the sum of the values
43 | obtained by evaluating X
and Y
".
44 | Remember that eval_expr
will evaluate all the arguments
45 | to a functions by default; this is usually what we want to happen, so from
46 | now on we will not explicitly state this where the intent is obvious.
47 | Implementation
50 |
51 | make_int
.
55 |
58 | int builtin_add(Atom args, Atom *result)
59 | {
60 | Atom a, b;
61 |
62 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args))))
63 | return Error_Args;
64 |
65 | a = car(args);
66 | b = car(cdr(args));
67 |
68 | if (a.type != AtomType_Integer || b.type != AtomType_Integer)
69 | return Error_Type;
70 |
71 | *result = make_int(a.value.integer + b.value.integer);
72 |
73 | return Error_OK;
74 | }
75 |
76 |
77 |
86 | env_set(env, make_sym("+"), make_builtin(builtin_add));
87 | env_set(env, make_sym("-"), make_builtin(builtin_subtract));
88 | env_set(env, make_sym("*"), make_builtin(builtin_multiply));
89 | env_set(env, make_sym("/"), make_builtin(builtin_divide));
90 |
91 |
96 | We now have our very own LISP-style calculator. 97 |
98 | 99 |100 | > (+ 1 1) 101 | 2 102 | > (define x (* 6 9)) 103 | X 104 | > x 105 | 54 106 | > (- x 12) 107 | 42 108 |109 | 110 |
111 | In the last expression above, note that X
is a symbol, not
112 | an integer. We have to evaluate the arguments so that
113 | builtin_subtract
can operate on the integer value bound to
114 | X
and not the symbol X
itself. Similarly
115 | the value bound to X
is the integer result of
116 | evaluating the expression (* 6 9)
.
117 |
14 | (Apologies if you are a logician and I've got this all wrong...) 15 |
16 | 17 |18 | A boolean value is one of two classes of values which are called 19 | true and false. If we wish to interpret a value as a boolean, 20 | we consider it to be true if it is in the class of true values, 21 | and false otherwise. 22 |
23 | 24 |
27 | So far every expression we pass to eval
is evaluated. With
28 | the exception of special forms such as DEFINE
and
29 | LAMBDA
, which store away expressions to be evaluated
30 | later, eval
must walk the whole tree before returning a
31 | result.
32 |
35 | In this chapter we will define yet another special form IF
,
36 | which will cause eval
to choose which of two possible
37 | expressions to evaluate, and discard the other.
38 |
41 | The syntax is as follows: 42 |
43 | (IF test true-expr false-expr) 44 |45 | where
test
, true-expr
and false-expr
46 | are arbitrary expressions. If the result of evaluating test
is
47 | considered to be true, then the result of the IF
-expression
48 | is the result of evaluating true-expr
, otherwise it is the
49 | result of evaluating false-expr
. Only one of
50 | true-expr
and false-expr
is evaluated; the
51 | other expression is ignored.
52 |
53 |
54 |
55 | But what kind of value is true? In our environment we will define
56 | NIL
to be false. Any other value is true.
57 |
60 | Here is the code to handle IF-expressions. 61 |
62 | 63 |64 | int eval_expr(Atom expr, Atom env, Atom *result) 65 | { 66 | . 67 | . 68 | . 69 | if (op.type == AtomType_Symbol) { 70 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 71 | . 72 | . 73 | . 74 | } else if (strcmp(op.value.symbol, "IF") == 0) { 75 | Atom cond, val; 76 | 77 | if (nilp(args) || nilp(cdr(args)) || nilp(cdr(cdr(args))) 78 | || !nilp(cdr(cdr(cdr(args))))) 79 | return Error_Args; 80 | 81 | err = eval_expr(car(args), env, &cond); 82 | if (err) 83 | return err; 84 | 85 | val = nilp(cond) ? car(cdr(cdr(args))) : car(cdr(args)); 86 | return eval_expr(val, env, result); 87 | } 88 | } 89 | . 90 | . 91 | . 92 | } 93 |94 | 95 |
96 | The argument check is getting a little unwieldy. A couple of alternatives
97 | are to modify car
and cdr
to return
98 | NIL
if the argument is not a pair and forego the syntax
99 | check, or to create a helper function to count the list length. It won't
100 | get any worse than this, though — so let's not waste time on it.
101 |
104 | Traditionally LISP functions return the symbol T
if they
105 | need to return a boolean value and there is no obvious object available.
106 | T
is bound to itself, so evaluating it returns the symbol
107 | T
again. A symbol is not NIL
, and so is
108 | true.
109 |
112 | Add a binding for T
to the initial environment:
113 |
114 | env_set(env, make_sym("T"), make_sym("T")); 115 |116 | Remember that
make_sym
will return the same
117 | symbol object if it is called multiple times with identical strings.
118 |
119 |
120 | 123 | > (if t 3 4) 124 | 3 125 | > (if nil 3 4) 126 | 4 127 | > (if 0 t nil) 128 | T 129 |130 | 131 |
132 | Unlike C, zero is true, not false. 133 |
134 | 135 |
138 | While we could stop here, it would be useful to make some tests other
139 | than "is it NIL
". This is where predicates come in.
140 | A predicate is a function which returns a true/false value according to
141 | some condition.
142 |
145 | We will define two built-in predicates, "=
" which tests for
146 | numerical equality, and "<
" which tests if one number
147 | is less than another.
148 |
151 | The functions are similar to our other numerical built-ins. 152 |
153 | 154 |155 | int builtin_numeq(Atom args, Atom *result) 156 | { 157 | Atom a, b; 158 | 159 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 160 | return Error_Args; 161 | 162 | a = car(args); 163 | b = car(cdr(args)); 164 | 165 | if (a.type != AtomType_Integer || b.type != AtomType_Integer) 166 | return Error_Type; 167 | 168 | *result = (a.value.integer == b.value.integer) ? make_sym("T") : nil; 169 | 170 | return Error_OK; 171 | } 172 |173 | 174 |
175 | builtin_less
follows the same pattern and is not shown here.
176 |
179 | Finally we must add them to the initial environment. 180 |
181 | 182 |183 | env_set(env, make_sym("="), make_builtin(builtin_numeq)); 184 | env_set(env, make_sym("<"), make_builtin(builtin_less)); 185 |186 | 187 |
190 | > (= 3 3) 191 | T 192 | > (< 11 4) 193 | NIL 194 |195 | 196 |
197 | Barring memory and stack limitations, our LISP environment is now 198 | Turing-complete! If you have been entering the code as we go along, you 199 | can confirm that we have implemented the core of a usable programming 200 | language in well under 1,000 lines of C code. 201 |
202 | 203 |204 | A classic demonstration: 205 |
206 | > (define fact 207 | (lambda (x) 208 | (if (= x 0) 209 | 1 210 | (* x (fact (- x 1)))))) 211 | FACT 212 | > (fact 10) 213 | 3628800 214 |215 | I have cheated a little here: the REPL does not allow the user to enter 216 | multi-line expressions, so you must enter the definition for 217 |
fact
all on one line.
218 |
219 |
220 | 221 | There is more to do yet, though. LISP has other features which make it 222 | possible to express some really interesting stuff, and there are a few loose 223 | ends to tidy up as well. 224 |
225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /html/builtins.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
12 | So far in our implementation, we have made use of the functions
13 | car
, cdr
and cons
to construct
14 | and access LISP data. Now, we will make the same functionality
15 | available within the interpreted environment.
16 |
19 | We shall extend the list expression syntax to add some new operators: 20 |
(CAR EXPR)
22 | EXPR
and returns the car of the
23 | result. It is an error if EXPR
does not evaluate to a
24 | pair or NIL
.(CDR EXPR)
26 | EXPR
and returns the cdr of the
27 | result. It is an error if EXPR
does not evaluate to a
28 | pair or NIL
.(CONS A B)
30 | A
and B
,
31 | and returns a newly constructed pair containing the results.
36 | In the definitions above we allow taking the car and cdr of
37 | NIL
, unlike our C versions. Some algorithms are simpler to
38 | express if the car and cdr of NIL
are defined
39 | to be NIL
.
40 |
43 | We could choose to implement these by adding more special cases
44 | to eval_expr
, just like we did with QUOTE
45 | and DEFINE
. However, we will want to add more operators
46 | in the future — and adding each one to eval_expr
47 | would cause the function to get very long. The alternative is to introduce
48 | the concept of functions.
49 |
54 | A function is a recipe for converting arguments into a value. If
55 | eval_expr
encounters a list expression with a function
56 | as the operator, all it has to do is follow the recipe to come up with
57 | a value to use as the result of the expression.
58 |
61 | One way to implement these recipes is to create C functions which can
62 | be called from eval_expr
. We will call these built-in
63 | or primitive functions. Let's see how to extend our LISP
64 | interpreter to accommodate these.
65 |
70 | eval_expr
will call built-in functions through a C function
71 | pointer, so they must all have the same prototype:
72 |
73 | typedef int (*Builtin)(struct Atom args, struct Atom *result); 74 |75 | 76 | 77 |
78 | In order to appear in expressions, we need a new kind of atom to 79 | represent them. 80 |
81 | struct Atom { 82 | enum { 83 | . 84 | . 85 | . 86 | AtomType_Builtin 87 | } type; 88 | 89 | union { 90 | . 91 | . 92 | . 93 | Builtin builtin; 94 | } value; 95 | }; 96 |97 | Sections of code which we wrote previously are abbreviated as 98 | "
. . .
".
99 |
100 |
101 |
102 | For completeness, print_expr
needs to know how to display
103 | the new atom:
104 |
105 | void print_expr(Atom atom) 106 | { 107 | switch (atom.type) { 108 | . 109 | . 110 | . 111 | case AtomType_Builtin: 112 | printf("#<BUILTIN:%p>", atom.value.builtin); 113 | break; 114 | } 115 | } 116 |117 | 118 | 119 |
120 | And finally a helper function to create atoms of the new type: 121 |
122 | Atom make_builtin(Builtin fn) 123 | { 124 | Atom a; 125 | a.type = AtomType_Builtin; 126 | a.value.builtin = fn; 127 | return a; 128 | } 129 |130 | 131 | 132 |
135 | We will need to create a shallow copy of the argument 136 | list. 137 |
138 | 139 |140 | Atom copy_list(Atom list) 141 | { 142 | Atom a, p; 143 | 144 | if (nilp(list)) 145 | return nil; 146 | 147 | a = cons(car(list), nil); 148 | p = a; 149 | list = cdr(list); 150 | 151 | while (!nilp(list)) { 152 | cdr(p) = cons(car(list), nil); 153 | p = cdr(p); 154 | list = cdr(list); 155 | } 156 | 157 | return a; 158 | } 159 |160 | 161 |
162 | apply
simply calls the builtin function with a supplied
163 | list of arguments. We will extend this function later when we
164 | want to deal with other kinds of evaluation recipe.
165 |
168 | int apply(Atom fn, Atom args, Atom *result) 169 | { 170 | if (fn.type == AtomType_Builtin) 171 | return (*fn.value.builtin)(args, result); 172 | 173 | return Error_Type; 174 | } 175 |176 | 177 |
178 | If a list expression is not one of the special forms we defined
179 | previously, then we will assume that the operator is something which
180 | evaluates to a function. We will also evaluate each of the arguments,
181 | and use apply
to call that function with the list of
182 | results.
183 |
186 | int eval_expr(Atom expr, Atom env, Atom *result) 187 | { 188 | Atom op, args, p; 189 | Error err; 190 | 191 | . 192 | . 193 | . 194 | 195 | if (op.type == AtomType_Symbol) { 196 | . 197 | . 198 | . 199 | } 200 | 201 | /* Evaluate operator */ 202 | err = eval_expr(op, env, &op); 203 | if (err) 204 | return err; 205 | 206 | /* Evaulate arguments */ 207 | args = copy_list(args); 208 | p = args; 209 | while (!nilp(p)) { 210 | err = eval_expr(car(p), env, &car(p)); 211 | if (err) 212 | return err; 213 | 214 | p = cdr(p); 215 | } 216 | 217 | return apply(op, args, result); 218 | } 219 |220 | 221 |
222 | The argument list is copied before being overwritten with the results of 223 | evaluating the arguments. We don't want to overwrite the original 224 | argument list in case we need to use the form again in the future. 225 |
226 | 227 |230 | Previously we created an empty environment for the read-eval-print loop. 231 | The user has no way of creating atoms which represent builtin functions, 232 | so we populate the initial environment with bindings for our builtins. 233 |
234 | 235 |236 | The functions themselves: 237 |
238 | 239 |240 | int builtin_car(Atom args, Atom *result) 241 | { 242 | if (nilp(args) || !nilp(cdr(args))) 243 | return Error_Args; 244 | 245 | if (nilp(car(args))) 246 | *result = nil; 247 | else if (car(args).type != AtomType_Pair) 248 | return Error_Type; 249 | else 250 | *result = car(car(args)); 251 | 252 | return Error_OK; 253 | } 254 |255 | 256 |
257 | Almost all of the function is code to deal with errors and type checking! 258 | Creating functions in this way is pretty tedious. 259 |
260 | 261 |262 | int builtin_cdr(Atom args, Atom *result) 263 | { 264 | if (nilp(args) || !nilp(cdr(args))) 265 | return Error_Args; 266 | 267 | if (nilp(car(args))) 268 | *result = nil; 269 | else if (car(args).type != AtomType_Pair) 270 | return Error_Type; 271 | else 272 | *result = cdr(car(args)); 273 | 274 | return Error_OK; 275 | } 276 |277 | 278 |
279 | builtin_cdr
is almost identical to builtin_car
.
280 |
283 | int builtin_cons(Atom args, Atom *result) 284 | { 285 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 286 | return Error_Args; 287 | 288 | *result = cons(car(args), car(cdr(args))); 289 | 290 | return Error_OK; 291 | } 292 |293 | 294 |
295 | With these defined, we can at last use env_set
to create
296 | the bindings.
297 |
300 | int main(int argc, char **argv) 301 | { 302 | Atom env; 303 | char *input; 304 | 305 | env = env_create(nil); 306 | 307 | /* Set up the initial environment */ 308 | env_set(env, make_sym("CAR"), make_builtin(builtin_car)); 309 | env_set(env, make_sym("CDR"), make_builtin(builtin_cdr)); 310 | env_set(env, make_sym("CONS"), make_builtin(builtin_cons)); 311 | 312 | while ((input = readline("> ")) != NULL) { 313 | . 314 | . 315 | . 316 | } 317 | 318 | return 0; 319 | } 320 |321 | 322 |
325 | > (define foo 1) 326 | FOO 327 | > (define bar 2) 328 | BAR 329 | > (cons foo bar) 330 | (1 . 2) 331 | > (define baz (quote (a b c))) 332 | BAZ 333 | > (car baz) 334 | A 335 | > (cdr baz) 336 | (B C) 337 |338 | 339 |
340 | Notice that (CONS FOO BAR)
is not the same as
341 | (QUOTE (FOO . BAR))
. In the former expression, the arguments
342 | are evaluated and a new pair is created.
343 |
eval_expr
11 | and the design of the stack in this chapter are rather ad-hoc, and
12 | I'm not particularly proud of them. Please skip to the next chapter
13 | if they offend you.
14 |
15 |
16 |
19 | Our eval_expr
function has been implemented recursively
20 | — that is to say, when in the course of evaluating an expression
21 | it is necessary to evaluate a sub-expression, eval_expr
22 | calls itself to obtain the result.
23 |
26 | This works fairly well, and is easy to follow, but the depth of 27 | recursion in our LISP environment is limited by the stack size of the 28 | interpreter. LISP code traditionally makes heavy use of recursion, 29 | and we would like to support this up to the limit of available memory. 30 |
31 | 32 |33 | Take the following pathological example: 34 |
35 | (define (count n) 36 | (if (= n 0) 37 | 0 38 | (+ 1 (count (- n 1))))) 39 |40 | 41 | 42 |
43 | The COUNT
function will recurse to depth n
44 | and return the sum of n
ones. Expressions such as
45 | (COUNT 10)
should compute OK with our current interpreter,
46 | but even (COUNT 10000)
is enough to cause a stack overflow
47 | on my machine.
48 |
51 | To achieve this we will rewrite eval_expr
as a loop, with
52 | helper functions to keep track of evaluations in progress and return
53 | the next expression to be evaluated. When there are no more expressions
54 | left, eval_expr
can return the final result to the caller.
55 |
58 | As eval_expr
works through the tree of expressions, we will
59 | keep track of arguments evaluated and pending evaluation in a series of
60 | frames
, linked together to form a stack
. This
61 | is broadly the same way that the compiled version of the recursive
62 | eval_expr
works; in this case we are replacing the machine
63 | code stack with a LISP data structure and manipulating it explicitly.
64 |
67 | The stack can also be thought of as representing the future of the 68 | computation once the present expression has been evaluated. In this 69 | sense it is referred to as the current continuation. 70 |
71 | 72 |
73 | Since any function which is called by eval_expr
may not
74 | call eval_expr
(to avoid recursion), we must integrate
75 | apply
and builtin_apply
into the body of
76 | eval_expr
.
77 |
82 | A stack frame has the following form. 83 |
84 | 85 |86 | (parent env evaluated-op (pending-arg...) (evaluated-arg...) (body...)) 87 |88 | 89 |
90 | parent
is the stack frame corresponding to the parent
91 | expression (that is, the one which is waiting for the result of the
92 | current expression). env
is the current environment,
93 | evaluated-op
is the evaluated operator, and
94 | pending-arg...
and evaluated-arg
are the
95 | arguments pending and following evaluation respectively.
96 | body...
are the expressions in the function body
97 | which are pending execution.
98 |
101 | Rather than writing out long lists of car()
and
102 | cdr()
, we will define some helper functions to
103 | manipulate members of a list.
104 |
107 | Atom list_get(Atom list, int k) 108 | { 109 | while (k--) 110 | list = cdr(list); 111 | return car(list); 112 | } 113 | 114 | void list_set(Atom list, int k, Atom value) 115 | { 116 | while (k--) 117 | list = cdr(list); 118 | car(list) = value; 119 | } 120 | 121 | void list_reverse(Atom *list) 122 | { 123 | Atom tail = nil; 124 | while (!nilp(*list)) { 125 | Atom p = cdr(*list); 126 | cdr(*list) = tail; 127 | tail = *list; 128 | *list = p; 129 | } 130 | *list = tail; 131 | } 132 |133 | 134 |
135 | Another function creates a new stack frame ready to start evaluating a 136 | new function call, with the specified parent, environment and list of 137 | arguments pending evaluation (the tail). 138 |
139 | 140 |141 | Atom make_frame(Atom parent, Atom env, Atom tail) 142 | { 143 | return cons(parent, 144 | cons(env, 145 | cons(nil, /* op */ 146 | cons(tail, 147 | cons(nil, /* args */ 148 | cons(nil, /* body */ 149 | nil)))))); 150 | } 151 |152 | 153 |
154 | Here is the innermost part of our new exec_expr
,
155 | which sets expr
to the next part of the function
156 | body, and pops the stack when we have reached end of the body.
157 |
160 | int eval_do_exec(Atom *stack, Atom *expr, Atom *env) 161 | { 162 | Atom body; 163 | 164 | *env = list_get(*stack, 1); 165 | body = list_get(*stack, 5); 166 | *expr = car(body); 167 | body = cdr(body); 168 | if (nilp(body)) { 169 | /* Finished function; pop the stack */ 170 | *stack = car(*stack); 171 | } else { 172 | list_set(*stack, 5, body); 173 | } 174 | 175 | return Error_OK; 176 | } 177 |178 | 179 |
180 | This helper binds the function arguments into a new environment
181 | if they have not already been bound, then calls
182 | eval_do_exec
to get the next expression in the body.
183 |
186 | int eval_do_bind(Atom *stack, Atom *expr, Atom *env) 187 | { 188 | Atom op, args, arg_names, body; 189 | 190 | body = list_get(*stack, 5); 191 | if (!nilp(body)) 192 | return eval_do_exec(stack, expr, env); 193 | 194 | op = list_get(*stack, 2); 195 | args = list_get(*stack, 4); 196 | 197 | *env = env_create(car(op)); 198 | arg_names = car(cdr(op)); 199 | body = cdr(cdr(op)); 200 | list_set(*stack, 1, *env); 201 | list_set(*stack, 5, body); 202 | 203 | /* Bind the arguments */ 204 | while (!nilp(arg_names)) { 205 | if (arg_names.type == AtomType_Symbol) { 206 | env_set(*env, arg_names, args); 207 | args = nil; 208 | break; 209 | } 210 | 211 | if (nilp(args)) 212 | return Error_Args; 213 | env_set(*env, car(arg_names), car(args)); 214 | arg_names = cdr(arg_names); 215 | args = cdr(args); 216 | } 217 | if (!nilp(args)) 218 | return Error_Args; 219 | 220 | list_set(*stack, 4, nil); 221 | 222 | return eval_do_exec(stack, expr, env); 223 | } 224 |225 | 226 |
227 | The next function is called once all arguments have been evaluated,
228 | and is responsible either generating an expression to call a builtin,
229 | or delegating to eval_do_bind
.
230 |
233 | int eval_do_apply(Atom *stack, Atom *expr, Atom *env, Atom *result) 234 | { 235 | Atom op, args; 236 | 237 | op = list_get(*stack, 2); 238 | args = list_get(*stack, 4); 239 | 240 | if (!nilp(args)) { 241 | list_reverse(&args); 242 | list_set(*stack, 4, args); 243 | } 244 | 245 | if (op.type == AtomType_Symbol) { 246 | if (strcmp(op.value.symbol, "APPLY") == 0) { 247 | /* Replace the current frame */ 248 | *stack = car(*stack); 249 | *stack = make_frame(*stack, *env, nil); 250 | op = car(args); 251 | args = car(cdr(args)); 252 | if (!listp(args)) 253 | return Error_Syntax; 254 | 255 | list_set(*stack, 2, op); 256 | list_set(*stack, 4, args); 257 | } 258 | } 259 | 260 | if (op.type == AtomType_Builtin) { 261 | *stack = car(*stack); 262 | *expr = cons(op, args); 263 | return Error_OK; 264 | } else if (op.type != AtomType_Closure) { 265 | return Error_Type; 266 | } 267 | 268 | return eval_do_bind(stack, expr, env); 269 | } 270 |271 | 272 |
273 | This part is called once an expression has been evaluated, and 274 | is responsible for storing the result, which is either an operator, 275 | an argument, or an intermediate body expression, and fetching the 276 | next expression to evaluate. 277 |
278 | 279 |280 | int eval_do_return(Atom *stack, Atom *expr, Atom *env, Atom *result) 281 | { 282 | Atom op, args, body; 283 | 284 | *env = list_get(*stack, 1); 285 | op = list_get(*stack, 2); 286 | body = list_get(*stack, 5); 287 | 288 | if (!nilp(body)) { 289 | /* Still running a procedure; ignore the result */ 290 | return eval_do_apply(stack, expr, env, result); 291 | } 292 | 293 | if (nilp(op)) { 294 | /* Finished evaluating operator */ 295 | op = *result; 296 | list_set(*stack, 2, op); 297 | 298 | if (op.type == AtomType_Macro) { 299 | /* Don't evaluate macro arguments */ 300 | args = list_get(*stack, 3); 301 | *stack = make_frame(*stack, *env, nil); 302 | op.type = AtomType_Closure; 303 | list_set(*stack, 2, op); 304 | list_set(*stack, 4, args); 305 | return eval_do_bind(stack, expr, env); 306 | } 307 | } else if (op.type == AtomType_Symbol) { 308 | /* Finished working on special form */ 309 | if (strcmp(op.value.symbol, "DEFINE") == 0) { 310 | Atom sym = list_get(*stack, 4); 311 | (void) env_set(*env, sym, *result); 312 | *stack = car(*stack); 313 | *expr = cons(make_sym("QUOTE"), cons(sym, nil)); 314 | return Error_OK; 315 | } else if (strcmp(op.value.symbol, "IF") == 0) { 316 | args = list_get(*stack, 3); 317 | *expr = nilp(*result) ? car(cdr(args)) : car(args); 318 | *stack = car(*stack); 319 | return Error_OK; 320 | } else { 321 | goto store_arg; 322 | } 323 | } else if (op.type == AtomType_Macro) { 324 | /* Finished evaluating macro */ 325 | *expr = *result; 326 | *stack = car(*stack); 327 | return Error_OK; 328 | } else { 329 | store_arg: 330 | /* Store evaluated argument */ 331 | args = list_get(*stack, 4); 332 | list_set(*stack, 4, cons(*result, args)); 333 | } 334 | 335 | args = list_get(*stack, 3); 336 | if (nilp(args)) { 337 | /* No more arguments left to evaluate */ 338 | return eval_do_apply(stack, expr, env, result); 339 | } 340 | 341 | /* Evaluate next argument */ 342 | *expr = car(args); 343 | list_set(*stack, 3, cdr(args)); 344 | return Error_OK; 345 | } 346 |347 | 348 |
349 | And here we are at last with the new eval_expr
. There
350 | is a lot of code for setting up special forms, but the rest is simply
351 | a loop waiting for the stack to clear.
352 |
355 | int eval_expr(Atom expr, Atom env, Atom *result) 356 | { 357 | Error err = Error_OK; 358 | Atom stack = nil; 359 | 360 | do { 361 | if (expr.type == AtomType_Symbol) { 362 | err = env_get(env, expr, result); 363 | } else if (expr.type != AtomType_Pair) { 364 | *result = expr; 365 | } else if (!listp(expr)) { 366 | return Error_Syntax; 367 | } else { 368 | Atom op = car(expr); 369 | Atom args = cdr(expr); 370 | 371 | if (op.type == AtomType_Symbol) { 372 | /* Handle special forms */ 373 | 374 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 375 | if (nilp(args) || !nilp(cdr(args))) 376 | return Error_Args; 377 | 378 | *result = car(args); 379 | } else if (strcmp(op.value.symbol, "DEFINE") == 0) { 380 | Atom sym; 381 | 382 | if (nilp(args) || nilp(cdr(args))) 383 | return Error_Args; 384 | 385 | sym = car(args); 386 | if (sym.type == AtomType_Pair) { 387 | err = make_closure(env, cdr(sym), cdr(args), result); 388 | sym = car(sym); 389 | if (sym.type != AtomType_Symbol) 390 | return Error_Type; 391 | (void) env_set(env, sym, *result); 392 | *result = sym; 393 | } else if (sym.type == AtomType_Symbol) { 394 | if (!nilp(cdr(cdr(args)))) 395 | return Error_Args; 396 | stack = make_frame(stack, env, nil); 397 | list_set(stack, 2, op); 398 | list_set(stack, 4, sym); 399 | expr = car(cdr(args)); 400 | continue; 401 | } else { 402 | return Error_Type; 403 | } 404 | } else if (strcmp(op.value.symbol, "LAMBDA") == 0) { 405 | if (nilp(args) || nilp(cdr(args))) 406 | return Error_Args; 407 | 408 | err = make_closure(env, car(args), cdr(args), result); 409 | } else if (strcmp(op.value.symbol, "IF") == 0) { 410 | if (nilp(args) || nilp(cdr(args)) || nilp(cdr(cdr(args))) 411 | || !nilp(cdr(cdr(cdr(args))))) 412 | return Error_Args; 413 | 414 | stack = make_frame(stack, env, cdr(args)); 415 | list_set(stack, 2, op); 416 | expr = car(args); 417 | continue; 418 | } else if (strcmp(op.value.symbol, "DEFMACRO") == 0) { 419 | Atom name, macro; 420 | 421 | if (nilp(args) || nilp(cdr(args))) 422 | return Error_Args; 423 | 424 | if (car(args).type != AtomType_Pair) 425 | return Error_Syntax; 426 | 427 | name = car(car(args)); 428 | if (name.type != AtomType_Symbol) 429 | return Error_Type; 430 | 431 | err = make_closure(env, cdr(car(args)), 432 | cdr(args), ¯o); 433 | if (!err) { 434 | macro.type = AtomType_Macro; 435 | *result = name; 436 | (void) env_set(env, name, macro); 437 | } 438 | } else if (strcmp(op.value.symbol, "APPLY") == 0) { 439 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 440 | return Error_Args; 441 | 442 | stack = make_frame(stack, env, cdr(args)); 443 | list_set(stack, 2, op); 444 | expr = car(args); 445 | continue; 446 | } else { 447 | goto push; 448 | } 449 | } else if (op.type == AtomType_Builtin) { 450 | err = (*op.value.builtin)(args, result); 451 | } else { 452 | push: 453 | /* Handle function application */ 454 | stack = make_frame(stack, env, args); 455 | expr = op; 456 | continue; 457 | } 458 | } 459 | 460 | if (nilp(stack)) 461 | break; 462 | 463 | if (!err) 464 | err = eval_do_return(&stack, &expr, &env, result); 465 | } while (!err); 466 | 467 | return err; 468 | } 469 |470 | 471 |
474 | Let's try our COUNT
function again.
475 |
478 | > (count 100000) 479 | 100000 480 |481 | 482 |
483 | Hooray! We can now recurse as much as we like without causing a stack 484 | overflow. If you have a lot of RAM, you should even be able to do 485 | a million levels deep. 486 |
487 | 488 |491 | If the last expression in a function is a call to another function, then 492 | the result can be returned directly to the first function's caller. This 493 | is known as a tail call. If the called function, through a series 494 | of tail calls, causes the first function to be called, we have 495 | tail recursion. 496 |
497 | 498 |499 | Tail calls do not require the caller's stack frame to be retained, so 500 | a tail-recursive function can recurse as many levels as necessary without 501 | increasing the stack depth. 502 |
503 | 504 |
505 | The count
function could be formulated as a tail-recursive
506 | procedure as follows:
507 |
508 | (define (count n a) 509 | (if (= n 0) 510 | a 511 | (count (- n 1) (+ a 1)))) 512 | 513 | (count 100000 0) 514 |515 | 516 | 517 |
518 | If you watch eval_expr
with a debugger you can confirm that
519 | the stack never grows above a few levels deep.
520 |
523 | All that is left to do is clean up all the temporary objects created 524 | by our new evaluator. 525 |
526 | 527 | 528 | 529 | 530 | -------------------------------------------------------------------------------- /html/data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | We will define four kinds of object to begin with: 13 |
FOO
, BAR
, ADD-TWO
.
19 | We will normalize characters to upper-case in this project, but this
20 | is not strictly necessary.NIL
NULL
in C and other
23 | languages.NIL
, or a reference to another pair.
28 | The types of each element may be different.NIL
are called simple data.
31 | The term atom can refer to either a simple datum or a pair
32 | (purists may disagree on this point).
33 |
34 |
35 | 36 | Note that integers and symbols are immutable, so we can think 37 | of two integers with the same value as being the same object. This is 38 | particularly useful for symbols, because it allows us to test for 39 | equality by comparing pointers. 40 |
41 | 42 |45 | Let's declare some C types to hold our data. There are many clever ways 46 | to store LISP objects efficiently, but for this implementation we will 47 | stick to a very simple scheme [please excuse the pun]. 48 |
49 | 50 |51 | struct Atom { 52 | enum { 53 | AtomType_Nil, 54 | AtomType_Pair, 55 | AtomType_Symbol, 56 | AtomType_Integer 57 | } type; 58 | 59 | union { 60 | struct Pair *pair; 61 | const char *symbol; 62 | long integer; 63 | } value; 64 | }; 65 | 66 | struct Pair { 67 | struct Atom atom[2]; 68 | }; 69 | 70 | typedef struct Atom Atom; 71 |72 | 73 |
74 | A few macros will be handy: 75 |
76 | #define car(p) ((p).value.pair->atom[0]) 77 | #define cdr(p) ((p).value.pair->atom[1]) 78 | #define nilp(atom) ((atom).type == AtomType_Nil) 79 | 80 | static const Atom nil = { AtomType_Nil }; 81 |82 | The "p" in
nilp
stands for "predicate". Identifiers in C
83 | may not contain question marks. There is no need to restrict our LISP
84 | implementation in that way, of course.
85 |
86 |
87 | 88 | Integers and (pointers to) strings can be copied around, but we need to 89 | allocate pairs on the heap. 90 |
91 | Atom cons(Atom car_val, Atom cdr_val) 92 | { 93 | Atom p; 94 | 95 | p.type = AtomType_Pair; 96 | p.value.pair = malloc(sizeof(struct Pair)); 97 | 98 | car(p) = car_val; 99 | cdr(p) = cdr_val; 100 | 101 | return p; 102 | } 103 |104 |
cons
is a function to allocate a pair on the heap and
105 | assign its two elements.
106 |
107 |
108 |
109 | At this point you will have noticed that using cons
will
110 | leak memory the moment its return value is discarded. We will deal with
111 | that later. Of course, if you are using a garbage-collected language
112 | then the problem is already taken care of.
113 |
118 | Now we can start creating LISP objects. An integer: 119 |
120 | Atom make_int(long x) 121 | { 122 | Atom a; 123 | a.type = AtomType_Integer; 124 | a.value.integer = x; 125 | return a; 126 | } 127 |128 | And a symbol: 129 |
130 | Atom make_sym(const char *s) 131 | { 132 | Atom a; 133 | a.type = AtomType_Symbol; 134 | a.value.symbol = strdup(s); 135 | return a; 136 | } 137 |138 | 139 | 140 |
143 | We will write a pair like this: 144 |
(a . b)145 | where
a
is the car and b
is the
146 | cdr.
147 |
148 |
149 | 150 | By using the cdr of a pair to reference another pair, we can 151 | create a chain: 152 |
153 | (a . (b . (c . (d . NIL)))) 154 |155 | Notice that the cdr of the last pair is
NIL
. This
156 | signifies the end of the chain, and we call this structure a
157 | list. To avoid having to write a large number of brackets, we
158 | will write the previous list like this:
159 | (a b c d)160 | Finally, if the cdr of the last pair in a list is not 161 |
NIL
, we will write this:
162 | (p q . r)163 | which is equivalent to 164 |
(p . (q . r))165 | This is called an improper list. 166 | 167 | 168 |
171 | Printing an atom or list is simple. 172 |
173 | void print_expr(Atom atom) 174 | { 175 | switch (atom.type) { 176 | case AtomType_Nil: 177 | printf("NIL"); 178 | break; 179 | case AtomType_Pair: 180 | putchar('('); 181 | print_expr(car(atom)); 182 | atom = cdr(atom); 183 | while (!nilp(atom)) { 184 | if (atom.type == AtomType_Pair) { 185 | putchar(' '); 186 | print_expr(car(atom)); 187 | atom = cdr(atom); 188 | } else { 189 | printf(" . "); 190 | print_expr(atom); 191 | break; 192 | } 193 | } 194 | putchar(')'); 195 | break; 196 | case AtomType_Symbol: 197 | printf("%s", atom.value.symbol); 198 | break; 199 | case AtomType_Integer: 200 | printf("%ld", atom.value.integer); 201 | break; 202 | } 203 | } 204 |205 | By using recursion we can print aribtrarily complex data structures. 206 | (Actually that's not true: for a very deeply nested structure we will 207 | run out of stack space, and a self-referencing tree will never finish 208 | printing). 209 | 210 | 211 |
214 | See what print_expr
does with various atoms:
215 |
Atom | Output |
---|---|
make_int(42) | 42 |
218 |
make_sym("FOO") | FOO |
219 |
cons(make_sym("X"), make_sym("Y")) | (X . Y) |
220 |
cons(make_int(1), | (1 2 3) |
224 |
228 | All this is pretty trivial. We'll get on to some more interesting stuff 229 | in the next chapter. 230 |
231 | 232 |235 | Remember we said that we would treat identical symbols as being the 236 | same object? We can enforce that by keeping track of all the symbols 237 | created, and returning the same atom if the same sequence of characters 238 | is requested subsequently. 239 |
240 | 241 |242 | Languages with a set or hashtable container make this easy, but we can 243 | use the LISP data structures already implemented to store the symbols 244 | in a list: 245 |
246 | static Atom sym_table = { AtomType_Nil }; 247 | 248 | Atom make_sym(const char *s) 249 | { 250 | Atom a, p; 251 | 252 | p = sym_table; 253 | while (!nilp(p)) { 254 | a = car(p); 255 | if (strcmp(a.value.symbol, s) == 0) 256 | return a; 257 | p = cdr(p); 258 | } 259 | 260 | a.type = AtomType_Symbol; 261 | a.value.symbol = strdup(s); 262 | sym_table = cons(a, sym_table); 263 | 264 | return a; 265 | } 266 |267 | Neat, huh? It's not particularly efficient, but it will do fine for now. 268 | 269 | 270 | 271 | 272 | 273 | -------------------------------------------------------------------------------- /html/expressions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
14 | LISP is all about expressions. An expression can be a 15 | literal, an identifier, or a list consisting of an 16 | operator and one or more arguments. 17 |
18 | 19 |
20 | A literal is an object with an intrinsic value. In our system, that's
21 | either an integer or NIL
(if you consider "nothing" to be
22 | a value).
23 |
26 | An identifier is a name for an object. Symbols can be 27 | identifiers. 28 |
29 | 30 |
31 | Everything else is a list of the form (operator argument...)
32 | where argument...
means zero or more arguments.
33 |
38 | To associate identifiers with objects we need an environment. 39 | This is a collection of bindings, each of which consists of an 40 | identifier and its corresponding value. For example: 41 |
Bindings | |
---|---|
Identifier | Value |
FOO | 42 |
BAR | NIL |
BAZ | (X Y Z) |
BAZ
50 | is a list containing three symbols.
51 |
52 |
53 | 54 | An environment can also have a parent environment. If there is 55 | no binding for a particular identifier in the environment, we can check 56 | the parent, the parent's parent and so on. In this way we can create a 57 | tree of environments which share bindings with their ancestors unless 58 | explicit replacements exist. 59 |
60 | 61 |64 | There is a convenient way of representing environments using our LISP 65 | data types: 66 |
67 | (parent (identifier . value)...) 68 |69 | So the environment above (assuming it has no parent) is: 70 |
71 | (NIL (FOO . 42) (BAR . NIL) (BAZ . (X Y Z))) 72 |73 | 74 | 75 |
76 | Here is a function to create an empty environment with a specified
77 | parent (which could be NIL
):
78 |
79 | Atom env_create(Atom parent) 80 | { 81 | return cons(parent, nil); 82 | } 83 |84 | 85 | 86 |
87 | Next we have two functions to retrieve and create bindings in an 88 | environment. 89 |
90 | 91 |92 |
93 | int env_get(Atom env, Atom symbol, Atom *result) 94 | { 95 | Atom parent = car(env); 96 | Atom bs = cdr(env); 97 | 98 | while (!nilp(bs)) { 99 | Atom b = car(bs); 100 | if (car(b).value.symbol == symbol.value.symbol) { 101 | *result = cdr(b); 102 | return Error_OK; 103 | } 104 | bs = cdr(bs); 105 | } 106 | 107 | if (nilp(parent)) 108 | return Error_Unbound; 109 | 110 | return env_get(parent, symbol, result); 111 | } 112 |113 | Disallowing duplicate symbols means that we don't have to call 114 |
strcmp
here, which should mean that this lookup function
115 | is not too slow.
116 |
117 |
118 | 119 | int env_set(Atom env, Atom symbol, Atom value) 120 | { 121 | Atom bs = cdr(env); 122 | Atom b = nil; 123 | 124 | while (!nilp(bs)) { 125 | b = car(bs); 126 | if (car(b).value.symbol == symbol.value.symbol) { 127 | cdr(b) = value; 128 | return Error_OK; 129 | } 130 | bs = cdr(bs); 131 | } 132 | 133 | b = cons(symbol, value); 134 | cdr(env) = cons(b, cdr(env)); 135 | 136 | return Error_OK; 137 | } 138 |139 | 140 |
141 | Only env_get
recursively checks the parent environments.
142 | We don't want to modify the bindings of parents.
143 |
148 | Now that we have expressions, we can start to evaluate them. 149 | Evalution is a process which takes an expression and an environment, and 150 | produces a value (the result). Let's specify the rules. 151 |
152 | 153 |QUOTE
(QUOTE EXPR)
is
170 | EXPR
, which is returned without evaluating.
171 | DEFINE
(DEFINE SYMBOL EXPR)
creates a binding
173 | for SYMBOL
(or modifies an existing binding) in the
174 | evaluation environment. SYMBOL
is bound to the value
175 | obtained by evaluating EXPR
. The final result is
176 | SYMBOL
.
177 | 190 | We will need to check whether an expression is a proper list. 191 |
192 | int listp(Atom expr) 193 | { 194 | while (!nilp(expr)) { 195 | if (expr.type != AtomType_Pair) 196 | return 0; 197 | expr = cdr(expr); 198 | } 199 | return 1; 200 | } 201 |202 | 203 | 204 |
205 | The Error
enumeration needs a few more entires:
206 |
Error_Unbound |
209 | Attempted to evaluate a symbol for which no binding exists | 210 |
Error_Args |
213 | A list expression was shorter or longer than expected |
Error_Type |
217 | An object in an expression was of a different type than expected | 218 |
223 | The function to perform evaluation is now a straightforward translation 224 | of the rules into C. 225 |
226 | 227 |228 | int eval_expr(Atom expr, Atom env, Atom *result) 229 | { 230 | Atom op, args; 231 | Error err; 232 | 233 | if (expr.type == AtomType_Symbol) { 234 | return env_get(env, expr, result); 235 | } else if (expr.type != AtomType_Pair) { 236 | *result = expr; 237 | return Error_OK; 238 | } 239 | 240 | if (!listp(expr)) 241 | return Error_Syntax; 242 | 243 | op = car(expr); 244 | args = cdr(expr); 245 | 246 | if (op.type == AtomType_Symbol) { 247 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 248 | if (nilp(args) || !nilp(cdr(args))) 249 | return Error_Args; 250 | 251 | *result = car(args); 252 | return Error_OK; 253 | } else if (strcmp(op.value.symbol, "DEFINE") == 0) { 254 | Atom sym, val; 255 | 256 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 257 | return Error_Args; 258 | 259 | sym = car(args); 260 | if (sym.type != AtomType_Symbol) 261 | return Error_Type; 262 | 263 | err = eval_expr(car(cdr(args)), env, &val); 264 | if (err) 265 | return err; 266 | 267 | *result = sym; 268 | return env_set(env, sym, val); 269 | } 270 | } 271 | 272 | return Error_Syntax; 273 | } 274 |275 | 276 |
279 | Extending the read-print loop from the previous chapter, we now have a 280 | read-eval-print loop (REPL). This is the core of our LISP interpreter. 281 |
282 | 283 |284 | int main(int argc, char **argv) 285 | { 286 | Atom env; 287 | char *input; 288 | 289 | env = env_create(nil); 290 | 291 | while ((input = readline("> ")) != NULL) { 292 | const char *p = input; 293 | Error err; 294 | Atom expr, result; 295 | 296 | err = read_expr(p, &p, &expr); 297 | 298 | if (!err) 299 | err = eval_expr(expr, env, &result); 300 | 301 | switch (err) { 302 | case Error_OK: 303 | print_expr(result); 304 | putchar('\n'); 305 | break; 306 | case Error_Syntax: 307 | puts("Syntax error"); 308 | break; 309 | case Error_Unbound: 310 | puts("Symbol not bound"); 311 | break; 312 | case Error_Args: 313 | puts("Wrong number of arguments"); 314 | break; 315 | case Error_Type: 316 | puts("Wrong type"); 317 | break; 318 | } 319 | 320 | free(input); 321 | } 322 | 323 | return 0; 324 | } 325 |326 | 327 |
328 | Let's see what it can do. 329 |
330 | 331 |332 | > foo 333 | Symbol not bound 334 | > (quote foo) 335 | FOO 336 | > (define foo 42) 337 | FOO 338 | > foo 339 | 42 340 | > (define foo (quote bar)) 341 | FOO 342 | > foo 343 | BAR 344 |345 | 346 |
347 | We can now interactively assign names to objects. 348 |
349 | 350 | 351 | 352 | 353 | -------------------------------------------------------------------------------- /html/gc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | We will implement a very simple mark-and-sweep garbage collector. 13 | This is not something you would want to use in a real application, 14 | but it will serve for our purposes. 15 |
16 | 17 |
18 | Remember that all our LISP data is allocated through the
19 | cons
function. First we modify it to keep track of
20 | every allocation in a linked list.
21 |
24 | struct Allocation { 25 | struct Pair pair; 26 | int mark : 1; 27 | struct Allocation *next; 28 | }; 29 | 30 | struct Allocation *global_allocations = NULL; 31 | 32 | Atom cons(Atom car_val, Atom cdr_val) 33 | { 34 | struct Allocation *a; 35 | Atom p; 36 | 37 | a = malloc(sizeof(struct Allocation)); 38 | a->mark = 0; 39 | a->next = global_allocations; 40 | global_allocations = a; 41 | 42 | p.type = AtomType_Pair; 43 | p.value.pair = &a->pair; 44 | 45 | car(p) = car_val; 46 | cdr(p) = cdr_val; 47 | 48 | return p; 49 | } 50 |51 | 52 |
53 | Now a function to mark a whole tree of pairs as "in use". 54 |
55 | 56 |57 | void gc_mark(Atom root) 58 | { 59 | struct Allocation *a; 60 | 61 | if (!(root.type == AtomType_Pair 62 | || root.type == AtomType_Closure 63 | || root.type == AtomType_Macro)) 64 | return; 65 | 66 | a = (struct Allocation *) 67 | ((char *) root.value.pair 68 | - offsetof(struct Allocation, pair)); 69 | 70 | if (a->mark) 71 | return; 72 | 73 | a->mark = 1; 74 | 75 | gc_mark(car(root)); 76 | gc_mark(cdr(root)); 77 | } 78 |79 | 80 |
81 | The garbage collector frees everything which is not marked, and 82 | then clears the marks ready for the next run. We also mark the 83 | symbol table since these are referenced by a static variable. 84 |
85 | 86 |87 | void gc() 88 | { 89 | struct Allocation *a, **p; 90 | 91 | gc_mark(sym_table); 92 | 93 | /* Free unmarked allocations */ 94 | p = &global_allocations; 95 | while (*p != NULL) { 96 | a = *p; 97 | if (!a->mark) { 98 | *p = a->next; 99 | free(a); 100 | } else { 101 | p = &a->next; 102 | } 103 | } 104 | 105 | /* Clear marks */ 106 | a = global_allocations; 107 | while (a != NULL) { 108 | a->mark = 0; 109 | a = a->next; 110 | } 111 | } 112 |113 | 114 |
115 | So that we don't run out of memory under deep recursion, we
116 | need to call the garbage collector every few iterations of
117 | eval_expr
. The interval will roughly determine
118 | how many allocations are made between garbage collections.
119 |
122 | int eval_expr(Atom expr, Atom env, Atom *result) 123 | { 124 | static int count = 0; 125 | Error err = Error_OK; 126 | Atom stack = nil; 127 | 128 | do { 129 | if (++count == 100000) { 130 | gc_mark(expr); 131 | gc_mark(env); 132 | gc_mark(stack); 133 | gc(); 134 | count = 0; 135 | } 136 | 137 | . 138 | . 139 | . 140 | } 141 |142 | 143 |
146 | Adapting the COUNT
example from previous chapters:
147 |
150 | > (define (count n) (if (= n 0) t (count (- n 1)))) 151 | COUNT 152 | > (count 1000000) 153 | T 154 |155 | 156 |
157 | And lo! the operation completes without eating up all of our RAM. 158 |
159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |33 | The code is available on 34 | GitHub. 35 |
36 | 37 |38 | Questions? Comments? Email 39 | amy@lwh.jp. 40 |
41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /html/intro.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | The best way to understand how something works is to try to build it 13 | for yourself. Reading somebody else's explanation might satisfy your 14 | curiosity, but without the experience of falling into all the little 15 | traps it is difficult to get a feel for why something is 16 | designed a certain way. 17 |
18 | 19 |20 | It's been said that every would-be programmer should write a compiler. 21 | While I think this is good advice (although I haven't followed it myself), 22 | there is so much effort involved just in parsing a language such as C 23 | that any potential insights risk getting lost in a mire of details. 24 | Perhaps creating an interpreter for some simple language would be a good 25 | first step. 26 |
27 | 28 |29 | I first started playing around with LISP a good few years ago, yet much 30 | later than I should have. This led me to the classic lecture series 31 | 32 | Structure and Interpretation of Computer Programs. If you have the 33 | next 24 hours free and haven't seen the videos already, go watch them now. 34 |
35 | 36 |
37 | The course covers many topics, but the second half shows in detail how
38 | to evaluate LISP, first by implementing a simple version of
39 | eval
in LISP itself. I figured that this would translate
40 | well into C, and so decided to try creating my own implementation
41 | of LISP.
42 |
45 | It was really easy. 46 |
47 | 48 |49 | This article is an attempt to share the process by which I built my 50 | implementation, and the chapters occur roughly in the order in which I 51 | did things. Why not follow along and create your own version in your 52 | language of choice?* 53 |
54 | 55 |56 | As a professional programmer (ha, ha), I spend the majority 57 | of my time writing C and C++. Most of the rest is Java. There are many 58 | languages out there, each with their own debatable merits, but I'd like 59 | to demonstrate just how simple a LISP machine can be — even built 60 | in as low-level a language as C. See John McCarthy's 61 | 62 | History of LISP for the story of the pioneers. 63 |
64 | 65 |66 | So here is my toy implementation of LISP. I've borrowed features from 67 | various dialects, but it's closer to Scheme than Common LISP. The 68 | differences are trivial enough that changing over would not require 69 | substantial changes to the interpreter. Don't worry if you're not familiar 70 | with LISP; I will define everything as I go along. 71 |
72 | 73 |74 | It is not meant to be the smallest possible implementation, nor the 75 | most efficient, nor the most complete; it could however be described as 76 | lazy. My goal was to write robust, easy-to-read code that does exactly 77 | what it needs to, and no more, and I hope that it conveys how little 78 | effort is required to construct an incredibly powerful environment like LISP. 79 |
80 | 81 |
86 | * If you are using a fancy language which supports something like
87 | eval
, it would be cool to expose the native datatypes to
88 | the LISP environment.
89 |
12 | This is where things start to get interesting. We will now implement 13 | support for lambda expressions, a way to build functions dynamically 14 | out of the LISP expressions we can already deal with. 15 |
16 | 17 |18 | A lambda expression is a list expression with a particular syntax: 19 |
20 | (LAMBDA (arg...) expr...) 21 |22 | 23 | 24 |
25 | The result of evaluating a LAMBDA
expression is a new
26 | kind of object which we will call a closure. A closure can be used
27 | in list expressions in the same way as a built-in function. In this case
28 | the arguments will be bound to the symbols listed as arg...
29 | in the lambda expression. The body of the function consists of the
30 | expressions expr...
, which will be evaluated in turn. The result
31 | of evaluating the final expression is the result of applying the arguments
32 | to the closure.
33 |
36 | That's a pretty dense definition, so here is an example of how we would 37 | like to use lambda expressions: 38 |
39 | (DEFINE SQUARE (LAMBDA (X) (* X X))) 40 |41 | 42 | 43 |
44 | SQUARE
should now be a function of one argument
45 | X
, which returns the result of evaluating
46 | (* X X)
. Thus evaluating (SQUARE 3)
47 | should return 9
.
48 |
53 | We will represent the closure using a list: 54 |
55 | (env (arg...) expr...) 56 |57 |
env
is the environment in which the closure was defined.
58 | This is needed to allow the lambda function to use bindings without
59 | having to pass them as arguments. For example, recall that
60 | CAR
is bound in the initial environment to our primitive
61 | builtin_car
function.
62 |
63 |
64 |
65 | The first task is to add a new constant for the type
field
66 | of our Atom
structure:
67 |
68 | struct Atom { 69 | enum { 70 | . 71 | . 72 | . 73 | AtomType_Closure 74 | } type; 75 | 76 | union { 77 | . 78 | . 79 | . 80 | } value; 81 | }; 82 |83 | Since the closure is just a regular list, there is no need to add anything 84 | to
value
.
85 |
86 |
87 |
88 | Like our other atom types, we will create a utility function to
89 | initialize them. make_closure
, unlike the others, performs
90 | some validation of the arguments and so needs to return an error code.
91 |
94 | int make_closure(Atom env, Atom args, Atom body, Atom *result) 95 | { 96 | Atom p; 97 | 98 | if (!listp(args) || !listp(body)) 99 | return Error_Syntax; 100 | 101 | /* Check argument names are all symbols */ 102 | p = args; 103 | while (!nilp(p)) { 104 | if (car(p).type != AtomType_Symbol) 105 | return Error_Type; 106 | p = cdr(p); 107 | } 108 | 109 | *result = cons(env, cons(args, body)); 110 | result->type = AtomType_Closure; 111 | 112 | return Error_OK; 113 | } 114 |115 | 116 |
117 | Next up is another special case in eval
to create a
118 | closure whenever a lambda expression is encountered.
119 |
122 | int eval_expr(Atom expr, Atom env, Atom *result) 123 | { 124 | . 125 | . 126 | . 127 | if (op.type == AtomType_Symbol) { 128 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 129 | . 130 | . 131 | . 132 | } else if (strcmp(op.value.symbol, "LAMBDA") == 0) { 133 | if (nilp(args) || nilp(cdr(args))) 134 | return Error_Args; 135 | 136 | return make_closure(env, car(args), cdr(args), result); 137 | } 138 | } 139 | . 140 | . 141 | . 142 | } 143 |144 | 145 |
146 | The body of our SQUARE
example above is expressed in terms
147 | of X
. In order to evaluate the body, we need to create a new
148 | environment with X
bound to the value of the argument:
149 |
150 | (closure-env (X . 3)) 151 |152 | where the parent environment
closure-env
is the environment
153 | that was stored in the closure.
154 |
155 |
156 |
157 | Finally we extend apply
to create the new environment and
158 | call eval
for each expression in the body.
159 |
162 | int apply(Atom fn, Atom args, Atom *result) 163 | { 164 | Atom env, arg_names, body; 165 | 166 | if (fn.type == AtomType_Builtin) 167 | return (*fn.value.builtin)(args, result); 168 | else if (fn.type != AtomType_Closure) 169 | return Error_Type; 170 | 171 | env = env_create(car(fn)); 172 | arg_names = car(cdr(fn)); 173 | body = cdr(cdr(fn)); 174 | 175 | /* Bind the arguments */ 176 | while (!nilp(arg_names)) { 177 | if (nilp(args)) 178 | return Error_Args; 179 | env_set(env, car(arg_names), car(args)); 180 | arg_names = cdr(arg_names); 181 | args = cdr(args); 182 | } 183 | if (!nilp(args)) 184 | return Error_Args; 185 | 186 | /* Evaluate the body */ 187 | while (!nilp(body)) { 188 | Error err = eval_expr(car(body), env, result); 189 | if (err) 190 | return err; 191 | body = cdr(body); 192 | } 193 | 194 | return Error_OK; 195 | } 196 |197 | 198 |
201 | Let's check that our SQUARE
function works as intended.
202 |
205 | > (define square (lambda (x) (* x x))) 206 | SQUARE 207 | > (square 3) 208 | 9 209 | > (square 4) 210 | 16 211 |212 | 213 |
214 | Of course, lambda expressions do not have to be bound to a symbol — 215 | we can create anonymous functions. 216 |
217 | 218 |219 | > ((lambda (x) (- x 2)) 7) 220 | 5 221 |222 | 223 |
224 | Fans of functional programming will be pleased to see that we can now 225 | do this kind of thing: 226 |
227 | 228 |229 | > (define make-adder (lambda (x) (lambda (y) (+ x y)))) 230 | MAKE-ADDER 231 | > (define add-two (make-adder 2)) 232 | ADD-TWO 233 | > (add-two 5) 234 | 7 235 |236 | 237 |
238 | Do you know where the value "2" is stored? 239 |
240 | 241 | 242 | 243 | 244 | -------------------------------------------------------------------------------- /html/library.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | We will now create a small library of useful functions for 13 | our LISP system. Rather than creating new builtins for each one, 14 | let's take advantage of the fact that much of the LISP standard 15 | library can be implemented in LISP itself in terms of lower-level 16 | fuctions. 17 |
18 | 19 |20 | First we need a function to read the library definitions from disk. 21 |
22 | 23 |24 | char *slurp(const char *path) 25 | { 26 | FILE *file; 27 | char *buf; 28 | long len; 29 | 30 | file = fopen(path, "r"); 31 | if (!file) 32 | return NULL; 33 | fseek(file, 0, SEEK_END); 34 | len = ftell(file); 35 | fseek(file, 0, SEEK_SET); 36 | 37 | buf = malloc(len + 1); 38 | if (!buf) 39 | return NULL; 40 | 41 | fread(buf, 1, len, file); 42 | buf[len] = 0; 43 | fclose(file); 44 | 45 | return buf; 46 | } 47 |48 | 49 |
50 | And a routine, similar to our REPL in main
, to
51 | process the definitions. Because we read the whole file in one
52 | go, there is no problem with splitting definitions over several
53 | lines.
54 |
57 | void load_file(Atom env, const char *path) 58 | { 59 | char *text; 60 | 61 | printf("Reading %s...\n", path); 62 | text = slurp(path); 63 | if (text) { 64 | const char *p = text; 65 | Atom expr; 66 | while (read_expr(p, &p, &expr) == Error_OK) { 67 | Atom result; 68 | Error err = eval_expr(expr, env, &result); 69 | if (err) { 70 | printf("Error in expression:\n\t"); 71 | print_expr(expr); 72 | putchar('\n'); 73 | } else { 74 | print_expr(result); 75 | putchar('\n'); 76 | } 77 | } 78 | free(text); 79 | } 80 | } 81 |82 | 83 |
84 | Finally read in the library after setting up the builtins. 85 |
86 | 87 |88 | int main(int argc, char **argv) 89 | { 90 | . 91 | . 92 | . 93 | 94 | /* Set up the initial environment */ 95 | . 96 | . 97 | . 98 | 99 | load_file(env, "library.lisp"); 100 | 101 | /* Main loop */ 102 | . 103 | . 104 | . 105 | } 106 |107 | 108 |
111 | Create library.lisp
with the following definition:
112 |
113 | (define (abs x) (if (< x 0) (- x) x)) 114 |115 | 116 | 117 |
118 | And run the interpreter: 119 |
120 | Reading library.lisp... 121 | ABS 122 | > (abs -2) 123 | 2 124 |125 | The
ABS
function will now be available in every session
126 | without having to define it each time.
127 |
128 |
129 | fold
132 | foldl
and foldr
allow us to easily construct
133 | functions which combine elements of a list.
134 |
137 | (define (foldl proc init list) 138 | (if list 139 | (foldl proc 140 | (proc init (car list)) 141 | (cdr list)) 142 | init)) 143 | 144 | (define (foldr proc init list) 145 | (if list 146 | (proc (car list) 147 | (foldr proc init (cdr list))) 148 | init)) 149 |150 | 151 |
152 | See 153 | 154 | the internet for more details. 155 |
156 | 157 |158 | (define (list . items) 159 | (foldr cons nil items)) 160 | 161 | (define (reverse list) 162 | (foldl (lambda (a x) (cons x a)) nil list)) 163 |164 | 165 |
166 | list
constructs a new list containing its arguments.
167 | reverse
creates a copy of a list with the items in
168 | reverse order.
169 |
172 | The recursive definition of LIST
requires O(n) stack
173 | space - a serious implementation would most likely use a more efficient
174 | version.
175 |
180 | > (list (+ 3 5) 'foo) 181 | (8 FOO) 182 | > (reverse '(1 2 3)) 183 | (3 2 1) 184 |185 | 186 |
187 | See how much easier this was than implementing the functions as 188 | builtins. 189 |
190 | 191 |194 | Some primitive functions require access to the internals of the system. 195 |
196 | 197 |apply
200 | The apply
function:
201 |
202 | (APPLY fn arg-list) 203 |204 | calls
fn
with the arguments bound to the values in the
205 | list arg-list
.
206 |
207 |
208 | 209 | int builtin_apply(Atom args, Atom *result) 210 | { 211 | Atom fn; 212 | 213 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 214 | return Error_Args; 215 | 216 | fn = car(args); 217 | args = car(cdr(args)); 218 | 219 | if (!listp(args)) 220 | return Error_Syntax; 221 | 222 | return apply(fn, args, result); 223 | } 224 |225 | 226 |
eq?
229 | eq?
tests whether two atoms refer to the same object.
230 |
233 | int builtin_eq(Atom args, Atom *result) 234 | { 235 | Atom a, b; 236 | int eq; 237 | 238 | if (nilp(args) || nilp(cdr(args)) || !nilp(cdr(cdr(args)))) 239 | return Error_Args; 240 | 241 | a = car(args); 242 | b = car(cdr(args)); 243 | 244 | if (a.type == b.type) { 245 | switch (a.type) { 246 | case AtomType_Nil: 247 | eq = 1; 248 | break; 249 | case AtomType_Pair: 250 | case AtomType_Closure: 251 | case AtomType_Macro: 252 | eq = (a.value.pair == b.value.pair); 253 | break; 254 | case AtomType_Symbol: 255 | eq = (a.value.symbol == b.value.symbol); 256 | break; 257 | case AtomType_Integer: 258 | eq = (a.value.integer == b.value.integer); 259 | break; 260 | case AtomType_Builtin: 261 | eq = (a.value.builtin == b.value.builtin); 262 | break; 263 | } 264 | } else { 265 | eq = 0; 266 | } 267 | 268 | *result = eq ? make_sym("T") : nil; 269 | return Error_OK; 270 | } 271 |272 | 273 |
pair?
276 | Tests whether an atom is a pair. 277 |
278 | 279 |280 | int builtin_pairp(Atom args, Atom *result) 281 | { 282 | if (nilp(args) || !nilp(cdr(args))) 283 | return Error_Args; 284 | 285 | *result = (car(args).type == AtomType_Pair) ? make_sym("T") : nil; 286 | return Error_OK; 287 | } 288 |289 | 290 |
291 | Don't forget to add bindings for these to the initial environment. 292 |
293 | env_set(env, make_sym("APPLY"), make_builtin(builtin_apply)); 294 | env_set(env, make_sym("EQ?"), make_builtin(builtin_eq)); 295 | env_set(env, make_sym("PAIR?"), make_builtin(builtin_pairp)); 296 |297 | 298 | 299 |
map
302 | We can use foldr
and apply
to implement
303 | another important function map
, which constructs a
304 | list containing the results of calling an n-ary function
305 | with the values contained in n lists in turn.
306 |
309 | (define (unary-map proc list) 310 | (foldr (lambda (x rest) (cons (proc x) rest)) 311 | nil 312 | list)) 313 | 314 | (define (map proc . arg-lists) 315 | (if (car arg-lists) 316 | (cons (apply proc (unary-map car arg-lists)) 317 | (apply map (cons proc 318 | (unary-map cdr arg-lists)))) 319 | nil)) 320 |321 | 322 |
323 | Once again please note that there are alternative implementations. 324 |
325 | 326 |327 | It works like this: 328 |
329 | 330 |331 | > (map + '(1 2 3) '(4 5 6)) 332 | (5 7 9) 333 |334 | 335 |
336 | The result is a list containing the results of evaluating
337 | (+ 1 4)
, (+ 2 5)
, and
338 | (+ 3 6)
.
339 |
12 | Macros allow you to create new special forms at runtime. Unlike a 13 | function, the arguments to a macro are not evaluated. The 14 | result of evaluating the body of the macro is then itself evaluated. 15 |
16 | 17 |18 | Note: these are (essentially) Common LISP macros. Scheme has a 19 | different macro system, which avoids problems with identifiers 20 | introduced by the macro, but is more complex. 21 |
22 | 23 |24 | We will define macros using the following syntax: 25 |
26 | (DEFMACRO (name arg...) body...) 27 |28 | This matches our
DEFINE
syntax for functions, but is
29 | slightly different from the form used in Common LISP.
30 |
31 |
32 |
35 | Take the macro IGNORE
defined by:
36 |
37 | (DEFMACRO (IGNORE X) 38 | (CONS 'QUOTE 39 | (CONS X NIL))) 40 |41 | 42 | 43 |
44 | If we then evaluate the expression 45 |
46 | (IGNORE FOO) 47 |48 | where
FOO
need not be bound, the body of IGNORE
49 | will first be evaluated with the argument X
bound to the
50 | unevaluated symbol FOO
. The result of evaluating
51 | the nested CONS
expressions within this environment is:
52 | 53 | (QUOTE . (FOO . NIL)) 54 |55 | which is of course equivalent to: 56 |
57 | (QUOTE FOO) 58 |59 | Finally, evaluating this value (which is the result of evaluating the 60 | macro body) gives us: 61 |
62 | FOO 63 |64 | 65 | 66 |
69 | We will define a new type of atom: 70 |
71 | AtomType_Macro 72 |73 | the value of which is the same as
AtomType_Closure
.
74 |
75 |
76 |
77 | And now simply teach eval_expr
about our new macro
78 | type.
79 |
82 | int eval_expr(Atom expr, Atom env, Atom *result) 83 | { 84 | . 85 | . 86 | . 87 | if (op.type == AtomType_Symbol) { 88 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 89 | . 90 | . 91 | . 92 | } else if (strcmp(op.value.symbol, "DEFMACRO") == 0) { 93 | Atom name, macro; 94 | Error err; 95 | 96 | if (nilp(args) || nilp(cdr(args))) 97 | return Error_Args; 98 | 99 | if (car(args).type != AtomType_Pair) 100 | return Error_Syntax; 101 | 102 | name = car(car(args)); 103 | if (name.type != AtomType_Symbol) 104 | return Error_Type; 105 | 106 | err = make_closure(env, cdr(car(args)), 107 | cdr(args), ¯o); 108 | if (err) 109 | return err; 110 | 111 | macro.type = AtomType_Macro; 112 | *result = name; 113 | return env_set(env, name, macro); 114 | } 115 | } 116 | 117 | /* Evaluate operator */ 118 | . 119 | . 120 | . 121 | 122 | /* Is it a macro? */ 123 | if (op.type == AtomType_Macro) { 124 | Atom expansion; 125 | op.type = AtomType_Closure; 126 | err = apply(op, args, &expansion); 127 | if (err) 128 | return err; 129 | return eval_expr(expansion, env, result); 130 | } 131 | 132 | /* Evaulate arguments */ 133 | . 134 | . 135 | . 136 | } 137 |138 | 139 |
142 | > (defmacro (ignore x) (cons 'quote (cons x nil))) 143 | IGNORE 144 | > (ignore foo) 145 | FOO 146 | > foo 147 | Symbol not bound 148 |149 | 150 |
151 | We will use macros in the future to define some new special forms. 152 |
153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /html/next.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | The goal of this project was to demonstrate an easy implementation 13 | of LISP. There is not much point in optimizing or mindlessly 14 | implementing library functions — this work has already been 15 | done in other projects. 16 |
17 | 18 |19 | Here are some possible extensions which might prove interesting: 20 |
call/cc
33 | Now it's time to stop messing about in C and build something in LISP instead! 34 |
35 | 36 |37 | That's all, folks. 38 |
39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /html/parser.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | The next stage in our project is parsing: taking a line of text 13 | from the user (or elsewhere), and creating the data objects it represents. 14 | Naturally the user might type something which does not represent an 15 | object according to our definitions, in which case we must have some way 16 | to signal an error. 17 |
18 | 19 |
22 | Here is a definition of an Error
type:
23 |
24 | typedef enum { 25 | Error_OK = 0, 26 | Error_Syntax 27 | } Error; 28 |29 | If, like me, you learned to program in BASIC on microcomputers, you 30 | will be familiar with the dreaded
SYNTAX ERROR
. Now is our
31 | chance to see things from the other side of the fence. Most of our
32 | functions from now on will return an Error
to indicate
33 | whether and how something went wrong.
34 |
35 |
36 | 39 | I have no formal training in CS, but as far as I understand it the idea is 40 | to split a string up into tokens, which are both "words" and 41 | "punctuation", and discard any insignificant white space. So if the 42 | input is: 43 |
44 | (foo bar) 45 |46 | Then the four tokens are: 47 |
( |
50 | foo |
51 | bar |
52 | ) |
53 |
58 | So let's start by creating a lexer, which will return the start 59 | and end of the next token in a string. 60 |
61 | 62 |63 | int lex(const char *str, const char **start, const char **end) 64 | { 65 | const char *ws = " \t\n"; 66 | const char *delim = "() \t\n"; 67 | const char *prefix = "()"; 68 | 69 | str += strspn(str, ws); 70 | 71 | if (str[0] == '\0') { 72 | *start = *end = NULL; 73 | return Error_Syntax; 74 | } 75 | 76 | *start = str; 77 | 78 | if (strchr(prefix, str[0]) != NULL) 79 | *end = str + 1; 80 | else 81 | *end = str + strcspn(str, delim); 82 | 83 | return Error_OK; 84 | } 85 |86 | 87 |
88 | If our lexer hits the end of the string without finding a token (ie,
89 | the remainder of the string is entirely white space), then it will
90 | return a syntax error and set the start and end to NULL
.
91 |
96 | Now we can think about the parser itself. The entry point
97 | is read_expr
, which will read a single (possibly complex)
98 | object and return the error status and a pointer to the remainder of
99 | the input.
100 |
101 | int read_expr(const char *input, const char **end, Atom *result); 102 |103 | 104 | 105 |
106 | We will first deal with the simple data: integers, symbols and
107 | NIL
. If you have a regex library available then this is
108 | easy, but it's not too bad in plain C either.
109 |
112 | int parse_simple(const char *start, const char *end, Atom *result) 113 | { 114 | char *buf, *p; 115 | 116 | /* Is it an integer? */ 117 | long val = strtol(start, &p, 10); 118 | if (p == end) { 119 | result->type = AtomType_Integer; 120 | result->value.integer = val; 121 | return Error_OK; 122 | } 123 | 124 | /* NIL or symbol */ 125 | buf = malloc(end - start + 1); 126 | p = buf; 127 | while (start != end) 128 | *p++ = toupper(*start), ++start; 129 | *p = '\0'; 130 | 131 | if (strcmp(buf, "NIL") == 0) 132 | *result = nil; 133 | else 134 | *result = make_sym(buf); 135 | 136 | free(buf); 137 | 138 | return Error_OK; 139 | } 140 |141 | 142 |
143 | Notice two things: first, we are converting the input to upper case.
144 | This isn't strictly necessary — there's nothing wrong with having
145 | a case-sensitive lisp — but it is the traditional behaviour.
146 | Secondly, NIL
is a special case: it's parsed directly as
147 | AtomType_Nil
, rather than leaving it as a symbol.
148 |
151 | If you're familiar with the various dialects of LISP then you will know
152 | that NIL
is not necessarily the same as ()
,
153 | the empty list. We could choose to treat NIL
as a
154 | symbol which evaluates to itself, but for this project we will consider
155 | both representations to be exactly the same.
156 |
159 | Next up are lists (including improper lists and pairs). The simplified 160 | list syntax makes this a little complicated, so we'll stick it all in a 161 | helper function. Once again recursion allows us to deal with nested 162 | lists. 163 |
164 | 165 |166 | int read_list(const char *start, const char **end, Atom *result) 167 | { 168 | Atom p; 169 | 170 | *end = start; 171 | p = *result = nil; 172 | 173 | for (;;) { 174 | const char *token; 175 | Atom item; 176 | Error err; 177 | 178 | err = lex(*end, &token, end); 179 | if (err) 180 | return err; 181 | 182 | if (token[0] == ')') 183 | return Error_OK; 184 | 185 | if (token[0] == '.' && *end - token == 1) { 186 | /* Improper list */ 187 | if (nilp(p)) 188 | return Error_Syntax; 189 | 190 | err = read_expr(*end, end, &item); 191 | if (err) 192 | return err; 193 | 194 | cdr(p) = item; 195 | 196 | /* Read the closing ')' */ 197 | err = lex(*end, &token, end); 198 | if (!err && token[0] != ')') 199 | err = Error_Syntax; 200 | 201 | return err; 202 | } 203 | 204 | err = read_expr(token, end, &item); 205 | if (err) 206 | return err; 207 | 208 | if (nilp(p)) { 209 | /* First item */ 210 | *result = cons(item, nil); 211 | p = *result; 212 | } else { 213 | cdr(p) = cons(item, nil); 214 | p = cdr(p); 215 | } 216 | } 217 | } 218 |219 | 220 |
221 | I dislike writing infinite loops, but this is the clearest layout I have 222 | come up with so far. Let me know if you can write a better one! 223 |
224 | 225 |
226 | Finally we have read_expr
itself, which is very simple now
227 | that we have done all of the hard work:
228 |
229 | int read_expr(const char *input, const char **end, Atom *result) 230 | { 231 | const char *token; 232 | Error err; 233 | 234 | err = lex(input, &token, end); 235 | if (err) 236 | return err; 237 | 238 | if (token[0] == '(') 239 | return read_list(*end, end, result); 240 | else if (token[0] == ')') 241 | return Error_Syntax; 242 | else 243 | return parse_simple(token, *end, result); 244 | } 245 |246 | The check for a closing bracket will catch invalid forms such as 247 |
)and
(X .)248 | 249 | 250 |
253 | If we use the parser to create a simple read-print loop, then the we 254 | can type representations of objects on the console and check that they 255 | are parsed correctly. 256 |
257 | 258 |259 | int main(int argc, char **argv) 260 | { 261 | char *input; 262 | 263 | while ((input = readline("> ")) != NULL) { 264 | const char *p = input; 265 | Error err; 266 | Atom expr; 267 | 268 | err = read_expr(p, &p, &expr); 269 | 270 | switch (err) { 271 | case Error_OK: 272 | print_expr(expr); 273 | putchar('\n'); 274 | break; 275 | case Error_Syntax: 276 | puts("Syntax error"); 277 | break; 278 | } 279 | 280 | free(input); 281 | } 282 | 283 | return 0; 284 | } 285 |286 | 287 |
288 | This version uses the readline library, which shows a prompt
289 | and reads a line of text from the console. It supports editing beyond
290 | what a dumb terminal can provide, but a simple wrapper around
291 | fgets()
will do just as well.
292 |
295 | > 42 296 | 42 297 | > (foo bar) 298 | (FOO BAR) 299 | > (s (t . u) v . (w . nil)) 300 | (S (T . U) V W) 301 | > () 302 | NIL 303 |304 | 305 |
306 | Looks good! Remember that ()
is exactly the same as
307 | NIL
, and that (X Y)
is just another way of
308 | writing (X . (Y . NIL))
.
309 |
12 | QUASIQUOTE
is an extension of the QUOTE
13 | special form which is convenient for writing macros.
14 |
17 | For symbols and other simple data, QUASIQUOTE
behaves
18 | like QUOTE
, returning the datum unevaluated. Lists
19 | are also return without being evaluated, with two exceptions. If
20 | an element of the list (or a sub-list) is of the form
21 | (UNQUOTE expr)
, then expr
is
22 | evaluated and the result inserted into the list in place.
23 | (UNQUOTE-SPLICING expr)
is similar, but the
24 | result of evaluating expr
must be a list, the items
25 | of which are spliced into the parent list.
26 |
31 |
32 | (QUASIQUOTE (+ 1 (UNQUOTE (+ 2 3)))) 33 |34 | evaluates to 35 |
36 | (+ 1 5) 37 |38 | 39 | 40 |
41 | If we define L
to be the list (3 4 5)
42 | then
43 |
44 | (QUASIQUOTE (1 2 (UNQUOTE-SPLICING L))) 45 |46 | evaluates to 47 |
48 | (1 2 3 4 5) 49 |50 | 51 | 52 |
55 | Just like QUOTE
, we will define the following
56 | abbreviations.
57 |
Abbreviation | Equivalent to |
---|---|
`expr |
63 | (QUASIQUOTE expr) |
64 |
,expr |
67 | (UNQUOTE expr) |
68 |
,@expr |
71 | (UNQUOTE-SPLICING expr) |
72 |
76 | Rewriting the examples above with this syntax gives 77 |
78 | `(+ 1 ,(+ 2 3)) 79 |80 | and 81 |
82 | `(1 2 ,@L) 83 |84 | 85 | 86 |
89 | We extend the lexer to understand the additional special tokens. 90 |
91 | 92 |93 | int lex(const char *str, const char **start, const char **end) 94 | { 95 | const char *ws = " \t\n"; 96 | const char *delim = "() \t\n"; 97 | const char *prefix = "()\'`"; 98 | 99 | str += strspn(str, ws); 100 | 101 | if (str[0] == '\0') { 102 | *start = *end = NULL; 103 | return Error_Syntax; 104 | } 105 | 106 | *start = str; 107 | 108 | if (strchr(prefix, str[0]) != NULL) 109 | *end = str + 1; 110 | else if (str[0] == ',') 111 | *end = str + (str[1] == '@' ? 2 : 1); 112 | else 113 | *end = str + strcspn(str, delim); 114 | 115 | return Error_OK; 116 | } 117 |118 | 119 |
120 | read_expr
must expand the abbreviations in the same
121 | way as QUOTE
122 |
125 | int read_expr(const char *input, const char **end, Atom *result) 126 | { 127 | . 128 | . 129 | . 130 | if (token[0] == '(') { 131 | . 132 | . 133 | . 134 | } else if (token[0] == '`') { 135 | *result = cons(make_sym("QUASIQUOTE"), cons(nil, nil)); 136 | return read_expr(*end, end, &car(cdr(*result))); 137 | } else if (token[0] == ',') { 138 | *result = cons(make_sym( 139 | token[1] == '@' ? "UNQUOTE-SPLICING" : "UNQUOTE"), 140 | cons(nil, nil)); 141 | return read_expr(*end, end, &car(cdr(*result))); 142 | } else { 143 | . 144 | . 145 | . 146 | } 147 | } 148 |149 | 150 |
151 | The QUASIQUOTE
operator itself may be defined as a macro.
152 | First we need a few helper functions.
153 |
156 | (define (append a b) (foldr cons b a)) 157 | 158 | (define (caar x) (car (car x))) 159 | 160 | (define (cadr x) (car (cdr x))) 161 |162 | 163 |
164 | (append a b)
concatenates the lists a
165 | and b
.
166 |
169 | And now the macro itself: 170 |
171 | 172 |173 | (defmacro (quasiquote x) 174 | (if (pair? x) 175 | (if (eq? (car x) 'unquote) 176 | (cadr x) 177 | (if (eq? (if (pair? (car x)) (caar x) nil) 'unquote-splicing) 178 | (list 'append 179 | (cadr (car x)) 180 | (list 'quasiquote (cdr x))) 181 | (list 'cons 182 | (list 'quasiquote (car x)) 183 | (list 'quasiquote (cdr x))))) 184 | (list 'quote x))) 185 |186 | 187 |
188 | The definition above is a little hard to follow, since the
189 | resulting expression must be built up using LIST
190 | and may include additional calls to QUASIQUOTE
.
191 |
194 | Quasiquotation allows us to make the body of a macro look like
195 | the expression it returns; for example the IGNORE
196 | macro in chapter 11
197 |
198 | (DEFMACRO (IGNORE X) 199 | (CONS 'QUOTE (CONS X NIL))) 200 |201 | can now be written 202 |
203 | (DEFMACRO (IGNORE X) 204 | `(QUOTE ,X)) 205 |206 | and the operation is made clear. 207 | 208 | 209 |
212 | > `(+ 1 ,(+ 2 3)) 213 | (+ 1 5) 214 | > (define l '(3 4 5)) 215 | L 216 | > `(1 2 ,@l) 217 | (1 2 3 4 5) 218 |219 | 220 |
let
223 | We will now use QUASIQUOTE
to define a new special
224 | form:
225 |
226 | (LET ((sym1 expr1) 227 | (sym2 expr2) 228 | ...) 229 | body...) 230 |231 | 232 | 233 |
234 | LET
causes the expressions expr
to be evaluated
235 | with the symbols sym1
, sym2
... bound to the
236 | result of evaluating expr1
, expr2
and so on.
237 | The result of the last expression body
to be evaluated
238 | is returned.
239 |
242 | The definition is simple. 243 |
244 | 245 |246 | (defmacro (let defs . body) 247 | `((lambda ,(map car defs) ,@body) 248 | ,@(map cadr defs))) 249 |250 | 251 |
254 | When we evaluate the form 255 |
256 | (LET ((X 3) (Y 5)) (+ X Y)) 257 |258 | it is transformed by the
LET
macro into
259 | 260 | ((LAMBDA (X Y) (+ X Y)) 3 5) 261 |262 | which behaves as desired. 263 | 264 | 265 |
268 | > (let ((x 3) (y 5)) (+ x y)) 269 | 8 270 | > x 271 | Symbol not bound 272 |273 | 274 |
275 | The LET
expression clarifies the programmer's
276 | intent to make temporary definitions.
277 |
282 | We can use LET
to extend the built-in binary operator
283 | +
to accept any number of arguments.
284 |
287 | (define + 288 | (let ((old+ +)) 289 | (lambda xs (foldl old+ 0 xs)))) 290 |291 | 292 |
293 | Compare this with the definition of ADD
add the end
294 | of chapter 10.
295 |
300 | > (+ 1 2 3 4) 301 | 10 302 |303 | 304 |
305 | We didn't have to touch builtin_add
or even recompile
306 | the interpreter.
307 |
12 | We will define some additional syntax to facilitate entry of some common 13 | expressions. Recall that we already allow the user to enter 14 |
15 | (A B C) 16 |17 | instead of 18 |
19 | (A . (B . (C . NIL))) 20 |21 | 22 | 23 |
26 | In order to include a literal symbol or list in an expression, we need
27 | to use the QUOTE
operator. As a shortcut, we will
28 | define
29 |
30 | 'EXPR 31 |32 | to be equivalent to 33 |
34 | (QUOTE EXPR) 35 |36 | 37 | 38 |
39 | So for example the following forms are equivalent: 40 |
Abbreviation | Canonical form | Evaluates to | 43 |
---|---|---|
'FOO |
45 | (QUOTE FOO) |
46 | FOO |
47 |
'(+ 1 2) |
50 | (QUOTE (+ 1 2)) |
51 | (+ 1 2) |
52 |
'(A . B) |
55 | (QUOTE (A . B)) |
56 | (A . B) |
57 |
62 | The lexer needs to know that the quote mark is a prefix (i.e., it can 63 | appear immediately before another token but is not necessarily a 64 | delimeter). 65 |
66 | int lex(const char *str, const char **start, const char **end) 67 | { 68 | const char *ws = " \t\n"; 69 | const char *delim = "() \t\n"; 70 | const char *prefix = "()\'"; 71 | . 72 | . 73 | . 74 | } 75 |76 | 77 | 78 |
79 | Also read_expr
must convert it to the correct list
80 | expresssion.
81 |
84 | int read_expr(const char *input, const char **end, Atom *result) 85 | { 86 | const char *token; 87 | Error err; 88 | 89 | err = lex(input, &token, end); 90 | if (err) 91 | return err; 92 | 93 | if (token[0] == '(') { 94 | return read_list(*end, end, result); 95 | } else if (token[0] == ')') { 96 | return Error_Syntax; 97 | } else if (token[0] == '\'') { 98 | *result = cons(make_sym("QUOTE"), cons(nil, nil)); 99 | return read_expr(*end, end, &car(cdr(*result))); 100 | } else { 101 | return parse_simple(token, *end, result); 102 | } 103 | } 104 |105 | 106 |
109 | > (define x '(a b c)) 110 | X 111 | > x 112 | (A B C) 113 | > 'x 114 | X 115 | > (define foo 'bar) 116 | FOO 117 | > foo 118 | BAR 119 | > ''() 120 | (QUOTE NIL) 121 |122 | 123 |
126 | It is cumbersome to have to type a lambda expression every time we wish
127 | to define a function, so we will modify the DEFINE
operator
128 | to avoid this.
129 |
130 | (DEFINE (name args...) body...) 131 |132 | is equivalent to 133 |
134 | (DEFINE name (LAMBDA (args...) body...)) 135 |136 | 137 | 138 |
139 | Here's how: 140 |
141 | 142 |143 | int eval_expr(Atom expr, Atom env, Atom *result) 144 | { 145 | . 146 | . 147 | . 148 | if (op.type == AtomType_Symbol) { 149 | if (strcmp(op.value.symbol, "QUOTE") == 0) { 150 | . 151 | . 152 | . 153 | } else if (strcmp(op.value.symbol, "DEFINE") == 0) { 154 | Atom sym, val; 155 | 156 | if (nilp(args) || nilp(cdr(args))) 157 | return Error_Args; 158 | 159 | sym = car(args); 160 | if (sym.type == AtomType_Pair) { 161 | err = make_closure(env, cdr(sym), cdr(args), &val); 162 | sym = car(sym); 163 | if (sym.type != AtomType_Symbol) 164 | return Error_Type; 165 | } else if (sym.type == AtomType_Symbol) { 166 | if (!nilp(cdr(cdr(args)))) 167 | return Error_Args; 168 | err = eval_expr(car(cdr(args)), env, &val); 169 | } else { 170 | return Error_Type; 171 | } 172 | 173 | if (err) 174 | return err; 175 | 176 | *result = sym; 177 | return env_set(env, sym, val); 178 | } else if (strcmp(op.value.symbol, "LAMBDA") == 0) { 179 | . 180 | . 181 | . 182 | } 183 | } 184 | . 185 | . 186 | . 187 | } 188 |189 | 190 |
193 | > (define (square x) (* x x)) 194 | SQUARE 195 | > (square 3) 196 | 9 197 |198 | 199 |
200 | Sweet! 201 |
202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /html/variadics.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |12 | Up till now all functions have had a specified number of named arguments. 13 | We will now introduce a syntax for defining variadic functions, 14 | which may take a fixed number of named arguments and a variable number 15 | of additional arguments which are collected into a named list. 16 |
17 | 18 |19 | The argument declarations for variadic functions are improper lists: 20 |
21 | 22 |λ-syntax | Combined DEFINE | |
---|---|---|
3 args | 26 |
27 | (LAMBDA (arg1 arg2 arg3) body...)
28 | |
29 |
30 | (DEFINE (name arg1 arg2 arg3) body...)
31 | |
32 |
≥2 args | 35 |
36 | (LAMBDA (arg1 arg2 . rest) body...)
37 | |
38 |
39 | (DEFINE (name arg1 arg2 . rest) body...)
40 | |
41 |
≥1 args | 44 |
45 | (LAMBDA (arg1 . rest) body...)
46 | |
47 |
48 | (DEFINE (name arg1 . rest) body...)
49 | |
50 |
≥0 args | 53 |
54 | (LAMBDA args body...)
55 | |
56 |
57 | (DEFINE (name . args) body...)
58 | |
59 |
63 | In the definitions above, the parameters are bound as follows: 64 |
65 | 66 |Definition | 69 |(f 1 2 3) |
70 | ||
---|---|---|---|
Value of a |
73 | Value of b |
74 | Value of c |
75 | |
(DEFINE (f a b c) body...) |
78 | 1 |
79 | 2 |
80 | 3 |
81 |
(DEFINE (f a b . c) body...) |
84 | 1 |
85 | 2 |
86 | (3) |
87 |
(DEFINE (f a . b) body...) |
90 | 1 |
91 | (2 3) |
92 | |
(DEFINE (f . a) body...) |
95 | (1 2 3) |
96 |
102 | All that is required is a small modification to
103 | make_closure
to accept the declaration:
104 |
105 | int make_closure(Atom env, Atom args, Atom body, Atom *result) 106 | { 107 | Atom p; 108 | 109 | if (!listp(body)) 110 | return Error_Syntax; 111 | 112 | /* Check argument names are all symbols */ 113 | p = args; 114 | while (!nilp(p)) { 115 | if (p.type == AtomType_Symbol) 116 | break; 117 | else if (p.type != AtomType_Pair 118 | || car(p).type != AtomType_Symbol) 119 | return Error_Type; 120 | p = cdr(p); 121 | } 122 | 123 | *result = cons(env, cons(args, body)); 124 | result->type = AtomType_Closure; 125 | 126 | return Error_OK; 127 | } 128 |129 | 130 | 131 |
132 | And another to apply
to bind the additional arguments
133 | into a list:
134 |
135 | int apply(Atom fn, Atom args, Atom *result) 136 | { 137 | . 138 | . 139 | . 140 | /* Bind the arguments */ 141 | while (!nilp(arg_names)) { 142 | if (arg_names.type == AtomType_Symbol) { 143 | env_set(env, arg_names, args); 144 | args = nil; 145 | break; 146 | } 147 | 148 | if (nilp(args)) 149 | return Error_Args; 150 | env_set(env, car(arg_names), car(args)); 151 | arg_names = cdr(arg_names); 152 | args = cdr(args); 153 | } 154 | if (!nilp(args)) 155 | return Error_Args; 156 | . 157 | . 158 | . 159 | } 160 |161 | 162 | 163 |
166 | A boring example: 167 |
168 | 169 |170 | > ((lambda (a . b) a) 1 2 3) 171 | 1 172 | > ((lambda (a . b) b) 1 2 3) 173 | (2 3) 174 | > ((lambda args args) 1 2 3) 175 | (1 2 3) 176 |177 | 178 |
179 | We can also create a variadic adder: 180 |
181 | 182 |183 | > (define (sum-list xs) 184 | (if xs 185 | (+ (car xs) (sum-list (cdr xs))) 186 | 0)) 187 | SUM-LIST 188 | > (sum-list '(1 2 3)) 189 | 6 190 | > (define (add . xs) (sum-list xs)) 191 | ADD 192 | > (add 1 2 3) 193 | 6 194 | > (add 1 (- 4 2) (/ 9 3)) 195 | 6 196 |197 | 198 |
199 | Since you can always pass a list to a regular function, this is 200 | really just another kind of syntactic sugar. 201 |
202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /library.lisp: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Functions used in macro definitions 3 | ;; 4 | 5 | (define (append a b) (foldr cons b a)) 6 | 7 | (define (caar x) (car (car x))) 8 | (define (cadr x) (car (cdr x))) 9 | (define (cdar x) (cdr (car x))) 10 | (define (cddr x) (cdr (cdr x))) 11 | 12 | (define (foldl proc init list) 13 | (if list 14 | (foldl proc 15 | (proc init (car list)) 16 | (cdr list)) 17 | init)) 18 | 19 | (define (foldr proc init list) 20 | (if list 21 | (proc (car list) 22 | (foldr proc init (cdr list))) 23 | init)) 24 | 25 | (define (list . items) 26 | (foldr cons nil items)) 27 | 28 | (define (unary-map proc list) 29 | (foldr (lambda (x rest) (cons (proc x) rest)) 30 | nil 31 | list)) 32 | 33 | (define (map proc . arg-lists) 34 | (if (car arg-lists) 35 | (cons (apply proc (unary-map car arg-lists)) 36 | (apply map (cons proc 37 | (unary-map cdr arg-lists)))) 38 | nil)) 39 | 40 | ;; 41 | ;; Quasiquote 42 | ;; 43 | 44 | (defmacro (quasiquote x) 45 | (if (pair? x) 46 | (if (eq? (car x) 'unquote) 47 | (cadr x) 48 | (if (eq? (if (pair? (car x)) (caar x) nil) 'unquote-splicing) 49 | (list 'append 50 | (cadr (car x)) 51 | (list 'quasiquote (cdr x))) 52 | (list 'cons 53 | (list 'quasiquote (car x)) 54 | (list 'quasiquote (cdr x))))) 55 | (list 'quote x))) 56 | 57 | ;; 58 | ;; Macros 59 | ;; 60 | 61 | (defmacro (and . terms) 62 | (if terms 63 | `(if ,(car terms) 64 | (and ,@(cdr terms)) 65 | nil) 66 | t)) 67 | 68 | (defmacro (begin . body) 69 | `((lambda () ,@body))) 70 | 71 | (defmacro (cond . clauses) 72 | (if clauses 73 | (let ((test (caar clauses)) 74 | (body (cdar clauses))) 75 | `(if ,test 76 | (begin ,@body) 77 | (cond ,@(cdr clauses)))) 78 | nil)) 79 | 80 | (defmacro (let defs . body) 81 | `((lambda ,(map car defs) ,@body) 82 | ,@(map cadr defs))) 83 | 84 | (defmacro (or . terms) 85 | (if terms 86 | `(if ,(car terms) 87 | t 88 | (or ,@(cdr terms))) 89 | nil)) 90 | 91 | (defmacro (unless test . body) 92 | `(when (not ,test) ,@body)) 93 | 94 | (defmacro (when test . body) 95 | `(if ,test (begin ,@body) nil)) 96 | 97 | ;; 98 | ;; Numeric functions 99 | ;; 100 | 101 | (define + 102 | (let ((old+ +)) 103 | (lambda xs (foldl old+ 0 xs)))) 104 | 105 | (define - 106 | (let ((old- -)) 107 | (lambda (x . xs) 108 | (if xs 109 | (foldl old- x xs) 110 | (old- 0 x))))) 111 | 112 | (define * 113 | (let ((old* *)) 114 | (lambda xs (foldl old* 1 xs)))) 115 | 116 | (define / 117 | (let ((old/ /)) 118 | (lambda (x . xs) 119 | (if xs 120 | (foldl old/ x xs) 121 | (old/ 1 x))))) 122 | 123 | (define (<= a b) (or (= a b) (< a b))) 124 | (define (> a b) (< b a)) 125 | (define (>= a b) (<= b a)) 126 | 127 | (define (abs x) (if (negative? x) (- x) x)) 128 | 129 | (define (even? x) (= (modulo x 2) 0)) 130 | 131 | (define (gcd . xs) 132 | (define (gcd-inner a b) 133 | (if (zero? b) a (gcd-inner b (remainder a b)))) 134 | (abs (foldl gcd-inner 0 xs))) 135 | 136 | (define (lcm . xs) 137 | (if xs 138 | (/ (abs (apply * xs)) 139 | (apply gcd xs)) 140 | 1)) 141 | 142 | (define (max x . xs) 143 | (foldl (lambda (a b) (if (> a b) a b)) x xs)) 144 | 145 | (define (min x . xs) 146 | (foldl (lambda (a b) (if (< a b) a b)) x xs)) 147 | 148 | (define (negative? x) (< x 0)) 149 | 150 | (define (odd? x) (= (modulo x 2) 1)) 151 | 152 | (define (positive? x) (> x 0)) 153 | 154 | (define (quotient a b) (/ a b)) 155 | 156 | (define (remainder a b) (- a (* b (quotient a b)))) 157 | 158 | (define (zero? x) (= x 0)) 159 | 160 | ;; TODO: modulo 161 | 162 | 163 | ;; 164 | ;; List functions 165 | ;; 166 | 167 | (define (for-each proc . arg-lists) 168 | (when (car arg-lists) 169 | (apply proc (map car arg-lists)) 170 | (apply for-each 171 | (append (list proc) 172 | (map cdr arg-lists))))) 173 | 174 | (define (length list) 175 | (foldl (lambda (count x) (+ count 1)) 0 list)) 176 | 177 | (define (list-ref x k) (car (list-tail x k))) 178 | 179 | (define (list-tail x k) 180 | (if (zero? k) 181 | x 182 | (list-tail (cdr x) (- k 1)))) 183 | 184 | (define (reverse list) 185 | (foldl (lambda (a x) (cons x a)) nil list)) 186 | 187 | ;; 188 | ;; Other functions 189 | ;; 190 | 191 | (define (list? x) 192 | (or (null? x) 193 | (and (pair? x) 194 | (list? (cdr x))))) 195 | 196 | (define (not x) (if x nil t)) 197 | 198 | (define (null? x) (not x)) 199 | -------------------------------------------------------------------------------- /lisp.h: -------------------------------------------------------------------------------- 1 | typedef enum { 2 | Error_OK = 0, 3 | Error_Syntax, 4 | Error_Unbound, 5 | Error_Args, 6 | Error_Type 7 | } Error; 8 | 9 | struct Atom; 10 | 11 | typedef int (*Builtin)(struct Atom args, struct Atom *result); 12 | 13 | struct Atom { 14 | enum { 15 | AtomType_Nil, 16 | AtomType_Pair, 17 | AtomType_Symbol, 18 | AtomType_Integer, 19 | AtomType_Builtin, 20 | AtomType_Closure, 21 | AtomType_Macro 22 | } type; 23 | 24 | union { 25 | struct Pair *pair; 26 | const char *symbol; 27 | long integer; 28 | Builtin builtin; 29 | } value; 30 | }; 31 | 32 | struct Pair { 33 | struct Atom atom[2]; 34 | }; 35 | 36 | typedef struct Atom Atom; 37 | 38 | #define car(p) ((p).value.pair->atom[0]) 39 | #define cdr(p) ((p).value.pair->atom[1]) 40 | #define nilp(atom) ((atom).type == AtomType_Nil) 41 | 42 | static const Atom nil = { AtomType_Nil }; 43 | 44 | /* READER */ 45 | 46 | int read_expr(const char *input, const char **end, Atom *result); 47 | 48 | /* PRINTER */ 49 | 50 | void print_expr(Atom atom); 51 | 52 | /* EVALUATOR */ 53 | 54 | Atom env_create(Atom parent); 55 | int env_define(Atom env, Atom symbol, Atom value); 56 | int env_get(Atom env, Atom symbol, Atom *result); 57 | int env_set(Atom env, Atom symbol, Atom value); 58 | int eval_expr(Atom expr, Atom env, Atom *result); 59 | 60 | /* DATA */ 61 | 62 | Atom cons(Atom car_val, Atom cdr_val); 63 | Atom make_int(long x); 64 | Atom make_sym(const char *s); 65 | Atom make_builtin(Builtin fn); 66 | int listp(Atom expr); 67 | Atom copy_list(Atom list); 68 | Atom list_create(int n, ...); 69 | Atom list_get(Atom list, int k); 70 | void list_set(Atom list, int k, Atom value); 71 | void list_reverse(Atom *list); 72 | void gc_mark(Atom root); 73 | void gc(); 74 | 75 | /* BUILTINS */ 76 | 77 | int builtin_car(Atom args, Atom *result); 78 | int builtin_cdr(Atom args, Atom *result); 79 | int builtin_cons(Atom args, Atom *result); 80 | int builtin_eq(Atom args, Atom *result); 81 | int builtin_pairp(Atom args, Atom *result); 82 | int builtin_procp(Atom args, Atom *result); 83 | int builtin_add(Atom args, Atom *result); 84 | int builtin_subtract(Atom args, Atom *result); 85 | int builtin_multiply(Atom args, Atom *result); 86 | int builtin_divide(Atom args, Atom *result); 87 | int builtin_numeq(Atom args, Atom *result); 88 | int builtin_less(Atom args, Atom *result); 89 | 90 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include "lisp.h" 2 | #include