├── bib.rkt
├── nanodemo.rkt
└── tutorial.scrbl


/bib.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide (all-defined-out))
 4 | (require scriblib/autobib)
 5 | 
 6 | (define-cite cite citet generate-bibliography)
 7 | 
 8 | (define appelcont
 9 |   (make-bib #:title "Compiling With Continuations"
10 |             #:author "Andrew W. Appel"
11 |             #:is-book? #t
12 |             #:date 2007
13 |             #:location (book-location #:edition "revised"
14 |                                       #:publisher "Cambridge University Press")
15 |             #:url "http://www.amazon.com/Compiling-Continuations-Andrew-W-Appel/dp/052103311X"))
16 | 
17 | (define plai
18 |   (make-bib #:title "Programming Language Application and Interpretation"
19 |             #:author "Shriram Krishnamurthi"
20 |             #:date 2003
21 |             #:is-book? #t
22 |             #:location (book-location #:edition "first")
23 |             #:url "https://cs.brown.edu/~sk/Publications/Books/ProgLangs/2007-04-26/"))
24 | 
25 | (define lambdalifting
26 |   (make-bib
27 |    #:title "Lambda Lifting: Transforming Programs to Recursive Equations"
28 |    #:author "Thomas Johnson"
29 |    #:date 1985
30 |    #:location (proceedings-location
31 |                "Conference on Functional Programming Languages and Computer Architecture")
32 |    #:url "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.4346"))
33 | 
34 | (define lexicalscope-link
35 |    "https://en.wikipedia.org/wiki/Scope_%28computer_science%29#Lexical_scoping")
36 | (define iumatch-link
37 |   "http://www.cs.indiana.edu/chezscheme/match/")
38 | (define plai-link
39 |   "https://cs.brown.edu/~sk/Publications/Books/ProgLangs/2007-04-26/")


--------------------------------------------------------------------------------
/nanodemo.rkt:
--------------------------------------------------------------------------------
  1 | #lang at-exp nanopass
  2 | 
  3 | (provide (except-out (all-defined-out)
  4 |                      define-language
  5 |                      define-pass))
  6 | 
  7 | (require (prefix-in nanopass: nanopass/base)
  8 |          (for-syntax racket/syntax
  9 |                      syntax/parse)
 10 |          (for-label racket/base
 11 |                     racket/match
 12 |                     racket/format
 13 |                     nanopass/base))
 14 | (define-syntax (define-language stx)
 15 |   (syntax-parse stx
 16 |     [(define-language name . rest)
 17 |      #:with name-code (format-id stx "~a-code" #'name)
 18 |      #`(begin
 19 |          (define name-code (quote-syntax #,stx))
 20 |          (nanopass:define-language name . rest))]))
 21 | (define-syntax (define-pass stx)
 22 |   (syntax-parse stx
 23 |     [(define-pass name . rest)
 24 |      #:with name-code (format-id stx "~a-code" #'name)
 25 |      #`(begin
 26 |          (define name-code (quote-syntax #,stx))
 27 |          (nanopass:define-pass name . rest))]))
 28 | 
 29 | (define (int64? x)
 30 |   (and (integer? x)
 31 |        (<= (- (expt 2 63)) x (- (expt 2 63) 1))))
 32 | 
 33 | (define-language Lsrc
 34 |   (terminals
 35 |    (int64 (n))
 36 |    (boolean (b))
 37 |    (symbol (x)))
 38 |   (Expr (e)
 39 |         n x b
 40 |         (= e1 e2)
 41 |         (+ e1 e2)
 42 |         (if e1 e2 e3)
 43 |         (cond [e1 e2] ... [e3])
 44 |         (when e1 e2)
 45 |         (λ (x) e)
 46 |         (e1 e2))
 47 |   (entry Expr))
 48 | 
 49 | (define-language L1
 50 |   (extends Lsrc)
 51 |   (Expr (e)
 52 |         (- (when e1 e2))))
 53 | 
 54 | (define-language L2
 55 |   (extends L1)
 56 |   (Expr (e)
 57 |         (- (cond [e1 e2] ... [e3]))))
 58 | 
 59 | (define-language L3
 60 |   (extends L2)
 61 |   (Expr (e)
 62 |         (- (λ (x) e))
 63 |         (+ (λ (x) fe)))
 64 |   (FreeVars-Expr (fe)
 65 |                  (+ (free (x ...) e))))
 66 | 
 67 | (define-language L4
 68 |   (extends L3)
 69 |   (terminals
 70 |    (+ (exact-nonnegative-integer (nat))))
 71 |   (Var (v)
 72 |        (+ x
 73 |           (env-get x nat)))
 74 |   (Expr (e)
 75 |         (- x
 76 |            (λ (x) fe)
 77 |            (e1 e2))
 78 |         (+ v
 79 |            (closure (x (x1 x2) e) (v ...))
 80 |            (closure-func x)
 81 |            (closure-env x)
 82 |            (let ([x e])
 83 |              e*)
 84 |            (e1 e2 e3)))
 85 |   (FreeVars-Expr (fe)
 86 |                  (- (free (x ...) e))))
 87 | 
 88 | (define-language L5
 89 |   (extends L4)
 90 |   (Program (p)
 91 |            (+ (program ([x (x1 x2) e*] ...)
 92 |                        e)))
 93 |   (Expr (e)
 94 |         (+ (make-closure x (v ...)))
 95 |         (- (closure (x (x1 x2) e) (v ...))))
 96 |   (entry Program))
 97 | 
 98 | (define-language L6
 99 |   (extends L5)
100 |   (Expr (e)
101 |         (- (+ e1 e2)
102 |            (= e1 e2)
103 |            (e1 e2 e3)
104 |            (if e1 e2 e3))
105 |         (+ (+ x1 x2)
106 |            (= x1 x2)
107 |            (x1 x2 x3)
108 |            (if x1 x2 x3))))
109 | 
110 | (define-language L7
111 |   (extends L6)
112 |   (Program (p)
113 |            (- (program ([x (x1 x2) e*] ...)
114 |                        e))
115 |            (+ (program ([x (x1 x2) le*] ...)
116 |                        le)))
117 |   (Expr (e)
118 |         (- (let ([x e])
119 |              e*)))
120 |   (Let-Expr (le)
121 |             (+ e
122 |                (let ([x e])
123 |                  le))))
124 | 
125 | (define-pass parse : * (e) -> Lsrc ()
126 |   (Expr : * (e) -> Expr ()
127 |         (match e
128 |           [`(= ,(app Expr e1) ,(app Expr e2))
129 |            `(= ,e1 ,e2)]
130 |           [`(+ ,(app Expr e1) ,(app Expr e2))
131 |            `(+ ,e1 ,e2)]
132 |           [`(if ,(app Expr e1) ,(app Expr e2) ,(app Expr e3))
133 |            `(if ,e1 ,e2 ,e3)]
134 |           [`(when ,(app Expr e1) ,(app Expr e2))
135 |            `(when ,e1 ,e2)]
136 |           [`(cond [,(app Expr e1) ,(app Expr e2)] ... [,(app Expr e3)])
137 |            `(cond [,e1 ,e2] ... [,e3])]
138 |           [`(λ (,x) ,(app Expr e1))
139 |            `(λ (,x) ,e1)]
140 |           [`(,(app Expr e1) ,(app Expr e2))
141 |            `(,e1 ,e2)]
142 |           [else e]))
143 |   (Expr e))
144 | 
145 | (define-pass desugar-when : Lsrc (e) -> L1 ()
146 |   (Expr : Expr (e) -> Expr ()
147 |         [(when ,[e1] ,[e2])
148 |          `(if ,e1 ,e2 #f)]))
149 | 
150 | (define-pass desugar-cond : L1 (e) -> L2 ()
151 |   (Expr : Expr (e) -> Expr ()
152 |         [(cond [,[e1]])
153 |          e1]
154 |         [(cond [,[e1] ,[e1*]] [,e2 ,e2*]  ...  [,e3])
155 |          `(if ,e1  ,e1*  ,(with-output-language (L1 Expr)
156 |                             (Expr `(cond [,e2 ,e2*] ... [,e3]))))]))
157 | 
158 | (define-pass delay-if : L2 (e) -> L2 ()
159 |   (Expr : Expr (e) -> Expr ()
160 |         [(if ,[e1] ,[e2] ,[e3])
161 |          (define x2 (gensym 'trash))
162 |          (define x3 (gensym 'trash))
163 |          `((if ,e1 (λ (,x2) ,e2) (λ (,x3) ,e3)) #f)]))
164 | 
165 | (define-pass identify-free-variables : L2 (e) -> L3 ()
166 |   (Expr : Expr (e) -> Expr ('())
167 |         [,x (values x (list x))]
168 |         [(+ ,[e1 a1] ,[e2 a2])
169 |          (values `(+ ,e1 ,e2)
170 |                  (set-union a1 a2))]
171 |         [(= ,[e1 a1] ,[e2 a2])
172 |          (values `(= ,e1 ,e2)
173 |                  (set-union a1 a2))]
174 |         [(if ,[e1 a1] ,[e2 a2] ,[e3 a3])
175 |          (values `(if ,e1 ,e2, e3)
176 |                  (set-union a1 a2 a3))]
177 |         [(λ (,x) ,[e1 a1])
178 |          (define a* (set-remove a1 x))
179 |          (values `(λ (,x) (free (,a* ...) ,e1))
180 |                  a*)]
181 |         [(,[e1 a1] ,[e2 a2])
182 |          (values `(,e1 ,e2)
183 |                  (set-union a1 a2))])
184 |   (let-values ([(res free) (Expr e)])
185 |     (unless (set-empty? free)
186 |       (error 'compiler "Unbound variables: ~a" free))
187 |     res))
188 | 
189 | (define-pass make-closures : L3 (e) -> L4 ()
190 |   (Expr : Expr (e [env #f] [fv '()]) -> Expr ()
191 |         [(,[e1] ,[e2])
192 |          (define clo-name (gensym 'clo))
193 |          `(let ([,clo-name ,e1])
194 |             ((closure-func ,clo-name)
195 |              ,e2
196 |              (closure-env ,clo-name)))]
197 |         [,x
198 |          (if (dict-has-key? fv x)
199 |              `(env-get ,env ,(dict-ref fv x))
200 |              x)]
201 |         [(λ (,x) (free (,x* ...) ,e))
202 |          (define lambda-name (gensym 'func))
203 |          (define env-name (gensym 'env))
204 |          (define e*
205 |            (Expr e env-name
206 |                  (for/list ([i (in-list x*)]
207 |                             [j (in-range (length x*))])
208 |                    (cons i j))))
209 |          `(closure (,lambda-name (,x ,env-name) ,e*)
210 |                    (,(for/list ([i (in-list x*)])
211 |                        (Expr i env fv)) ...))]))
212 | 
213 | (define-pass raise-closures : L4 (e) -> L5 ()
214 |   (definitions
215 |     (define lamb-name '())
216 |     (define lamb-arg  '())
217 |     (define lamb-env  '())
218 |     (define lamb-body '()))
219 |   (Expr : Expr (e) -> Expr ()
220 |         [(closure (,x1 (,x2 ,x3) ,[e]) (,[v*] ...))
221 |          (set! lamb-name (cons x1 lamb-name))
222 |          (set! lamb-arg (cons x2 lamb-arg))
223 |          (set! lamb-env (cons x3 lamb-env))
224 |          (set! lamb-body (cons e lamb-body))
225 |          `(make-closure ,x1 (,v* ...))])
226 |   (let ([e* (Expr e)])
227 |     `(program ([,lamb-name (,lamb-arg ,lamb-env) ,lamb-body] ...)
228 |               ,e*)))
229 | 
230 | (define-pass simplify-calls : L5 (e) -> L6 ()
231 |   (Expr : Expr (e) -> Expr ()
232 |         [(,[e1] ,[e2] ,[e3])
233 |          (define x1 (gensym 'app))
234 |          (define x2 (gensym 'app))
235 |          (define x3 (gensym 'app))
236 |          `(let ([,x1 ,e1])
237 |             (let ([,x2 ,e2])
238 |               (let ([,x3 ,e3])
239 |                 (,x1 ,x2 ,x3))))]
240 |         [(+ ,[e1] ,[e2])
241 |          (define x1 (gensym 'plus))
242 |          (define x2 (gensym 'plus))
243 |          `(let ([,x1 ,e1])
244 |             (let ([,x2 ,e2])
245 |                 (+ ,x1 ,x2)))]
246 |         [(= ,[e1] ,[e2])
247 |          (define x1 (gensym 'eq))
248 |          (define x2 (gensym 'eq))
249 |          `(let ([,x1 ,e1])
250 |             (let ([,x2 ,e2])
251 |               (= ,x1 ,x2)))]
252 |         [(if ,[e1] ,[e2] ,[e3])
253 |          (define x1 (gensym 'if))
254 |          (define x2 (gensym 'if))
255 |          (define x3 (gensym 'if))
256 |          `(let ([,x1 ,e1])
257 |             (let ([,x2 ,e2])
258 |               (let ([,x3 ,e3])
259 |                 (if ,x1 ,x2 ,x3))))]))
260 | 
261 | (define-pass raise-lets : L6 (e) -> L7 ()
262 |   (Expr : Expr (e) -> Expr ())
263 |   (Let-Expr : Expr (e [var #f] [next-expr #f]) -> Let-Expr ()
264 |             [(let ([,x ,e])
265 |                ,e*)
266 |              (Let-Expr e x (Let-Expr e* var next-expr))]
267 |             [else
268 |              (if var
269 |                  `(let ([,var ,(Expr e)])
270 |                     ,next-expr)
271 |                  (Expr e))])
272 |   (Program : Program (p) -> Program ()
273 |            [(program ([,x (,x1 ,x2) ,[Let-Expr : e #f #f -> e]] ...)
274 |                      ,[Let-Expr : e* #f #f -> e*])
275 |             `(program ([,x (,x1 ,x2) ,e] ...)
276 |                       ,e*)]))
277 | 
278 | (define runtime
279 |   @~a{#include <stdio.h>
280 |  #include <stdarg.h>
281 |  #include <stdlib.h>
282 |  #include <inttypes.h>
283 |  
284 |  struct Int;
285 |  struct Bool;
286 |  struct Closure;
287 |  union Racket_Object;
288 |  
289 |  typedef union Racket_Object (*Lambda)();
290 |  enum Tag {INT, BOOL, CLOSURE};
291 |  
292 |  typedef struct Int {
293 |   enum Tag t;
294 |   int64_t v;
295 |   } Int;
296 |   
297 |  typedef struct Bool {
298 |   enum Tag t;
299 |   int64_t v;
300 |   } Bool;
301 |    
302 |  typedef struct Closure {
303 |   enum Tag t;
304 |   Lambda l;
305 |   union Racket_Object * e;
306 |   } Closure;
307 |    
308 |  typedef union Racket_Object {
309 |   enum Tag t;
310 |   Int i;
311 |   Bool b;
312 |   Closure c;
313 |   } Racket_Object;
314 |    
315 |  Racket_Object __make_int(int64_t i) {
316 |   Racket_Object o;
317 |   o.t = INT;
318 |   o.i.v = i;
319 |   return o;
320 |  }
321 |  
322 |  Racket_Object __make_bool(int64_t b) {
323 |   Racket_Object o;
324 |   o.t = BOOL;
325 |   o.b.v = b;
326 |   return o;
327 |  }
328 |  
329 |  Racket_Object __make_closure(Lambda name, int argc, ...) {
330 |   /* Allocate space for env */
331 |   Racket_Object* env = malloc(sizeof(Racket_Object) * argc);
332 |   
333 |   /* Fill env */
334 |   va_list lp;
335 |   va_start(lp, argc);
336 |   for(int i = 0; i < argc; i++) {
337 |    env[i] = va_arg(lp, Racket_Object);
338 |   }
339 |   
340 |   /* Return closure */
341 |   Racket_Object o;
342 |   o.t = CLOSURE;
343 |   o.c.l = name;
344 |   o.c.e = env;
345 |   return o;
346 |  }
347 |  
348 |  Racket_Object __env_get(Racket_Object *env, unsigned int id) {
349 |   return env[id];
350 |  }
351 |  
352 |  Racket_Object  __prim_plus(Racket_Object a, Racket_Object b) {
353 |   if(a.t != INT || b.t != INT) {
354 |    printf("+: Expected Integer\n");
355 |    exit(1);
356 |   }
357 |   return __make_int(a.i.v + b.i.v);
358 |  }
359 |  
360 |  Racket_Object __prim_equal(Racket_Object a, Racket_Object b) {
361 |   if(a.t != INT || b.t != INT) {
362 |    printf("=: Expected Integer\n");
363 |    exit(1);
364 |   }
365 |   return __make_bool(a.i.v == b.i.v);
366 |  }
367 |  
368 |  Racket_Object __prim_if(Racket_Object a, Racket_Object b, Racket_Object c) {
369 |   if(a.t != BOOL) {
370 |    printf("if: Expected Bool\n");
371 |    exit(1);
372 |   }
373 |   return a.b.v ? b : c;
374 |  }})
375 | 
376 | (define-pass generate-c : L7 (e) -> * ()
377 |  (definitions
378 |     (define (c s)
379 |       (list->string
380 |        (cons #\_
381 |              (for/list ([i (in-string (symbol->string s))])
382 |                (cond
383 |                  [(or (char-alphabetic? i)
384 |                       (char-numeric? i))
385 |                   i]
386 |                  [else #\_])))))
387 |     (define (build-func-decl name x1 x2)
388 |       @~a{Racket_Object @c[name](Racket_Object @c[x1], Racket_Object* @c[x2]);})
389 |     (define (build-func name x1 x2 body)
390 |       @~a{Racket_Object @c[name](Racket_Object @c[x1], Racket_Object* @c[x2]) {
391 |   @(Let-Expr body)}}))
392 |   (Program : Program (e) -> * ()
393 |            [(program ([,x (,x1 ,x2) ,le*] ...)
394 |                      ,le)
395 |             @~a{@runtime
396 |              @(apply ~a (for/list ([x (in-list x)]
397 |                                    [x1 (in-list x1)]
398 |                                    [x2 (in-list x2)])
399 |                           (build-func-decl x x1 x2)))
400 |              @(apply ~a (for/list ([x (in-list x)]
401 |                                    [x1 (in-list x1)]
402 |                                    [x2 (in-list x2)]
403 |                                    [le* (in-list le*)])
404 |                           (build-func x x1 x2 le*)))
405 | 
406 |              Racket_Object __racket_main() {
407 |               @Let-Expr[le]
408 |              }
409 | 
410 |              int main () {
411 |               Racket_Object ret = __racket_main();
412 |               if(ret.t == CLOSURE) {
413 |                printf("ans = #<procedure>\n");
414 |               } else if(ret.t == INT) {
415 |                printf("ans = %" PRId64 "\n", ret.i.v);
416 |               } else {
417 |                printf("ans = %s", ret.b.v ? "#t" : "#f");
418 |               }
419 |               return 0;
420 |              }
421 |             }])
422 |   (Expr : Expr (e) -> * ()
423 |         [,n @~a{__make_int(@n)}]
424 |         [,b @~a{__make_bool(@(if b "1" "0"))}]
425 |         [(+ ,x1 ,x2)
426 |          @~a{__prim_plus(@c[x1], @c[x2])}]
427 |         [(= ,x1 ,x2)
428 |          @~a{__prim_equal(@c[x1], @c[x2])}]
429 |         [(if ,x1 ,x2 ,x3)
430 |          @~a{__prim_if(@c[x1],@c[x2],@c[x3])}]
431 |         [(,x1 ,x2 ,x3)
432 |          @~a{@c[x1](@c[x2], @c[x3])}]
433 |         [(closure-env ,x)
434 |          @~a{@c[x].c.e}]
435 |         [(closure-func ,x)
436 |          @~a{@c[x].c.l}]
437 |         [(make-closure ,x (,v ...))
438 |          @~a{__make_closure(@c[x],
439 |                             @(length v)
440 |                             @(apply ~a (for/list ([i (in-list v)])
441 |                                          @~a{, @Var[i]})))}])
442 |   (Var : Var (e) -> * ()
443 |        [,x @c[x]]
444 |        [(env-get ,x ,nat)
445 |         @~a{__env_get(@c[x], @nat)}])
446 |   (Let-Expr : Let-Expr (e) -> * ()
447 |             [(let ([,x (closure-func ,x*)]) ,le)
448 |              @~a{Lambda @c[x] = @c[x*].c.l;
449 |               @Let-Expr[le]}]
450 |             [(let ([,x (closure-env ,x*)]) ,le)
451 |              @~a{Racket_Object* @c[x] = @c[x*].c.e;
452 |               @Let-Expr[le]}]
453 |             [(let ([,x ,e]) ,le)
454 |              @~a{Racket_Object @c[x] = @(Expr e);
455 |               @Let-Expr[le]}]
456 |             [else @~a{return @(Expr e);}]))
457 | 
458 | (define compiler
459 |   (compose generate-c
460 |            raise-lets
461 |            simplify-calls
462 |            raise-closures
463 |            make-closures
464 |            identify-free-variables
465 |            delay-if
466 |            desugar-cond
467 |            desugar-when
468 |            parse))
469 | 
470 | (module+ test
471 |   (define x
472 |     (compiler
473 |      #;`(((λ (x)
474 |             (λ (x)
475 |               x)) 1) 2)
476 |      '(((λ (x)
477 |           (λ (y)
478 |             (cond [(= 6 (+ x y)) x]
479 |                   [y]))) 4) 2)))
480 |   
481 |   (displayln x)
482 |   (with-output-to-file "temp.c"
483 |     #:exists 'replace
484 |     (λ () (displayln x))))


--------------------------------------------------------------------------------
/tutorial.scrbl:
--------------------------------------------------------------------------------
   1 | #lang scribble/manual
   2 | 
   3 | @require[(except-in scribble/manual cite)
   4 |          scriblib/footnote
   5 |          scribble/examples
   6 |          scriblib/autobib
   7 |          nanopass/base
   8 |          "bib.rkt"
   9 |          "nanodemo.rkt"
  10 |          @for-label[racket/base
  11 |                     racket/match
  12 |                     racket/format
  13 |                     nanopass/base]]
  14 | 
  15 | @title{Writing a Compiler with Nanoapss}
  16 | @author{Leif Andersen}
  17 | 
  18 | @(define nano-eval (make-base-eval))
  19 | @examples[#:eval nano-eval
  20 |           #:hidden
  21 |           (require racket/list
  22 |                    nanopass/base
  23 |                    "nanodemo.rkt")]
  24 | 
  25 | @section{Introduction}
  26 | 
  27 | @section{Prologue: Installing Racket and Nanopass}
  28 | 
  29 | @section{Defining the Source Language}
  30 | 
  31 | As with other Racket based languages, the first line of a
  32 | Nanopass program is the @tt{#lang}. For this compiler, we will use:
  33 | 
  34 | @codeblock{#lang at-exp nanopass}
  35 | 
  36 | The @racket[nanopass] language both provides Nanopass
  37 | specific constructs reifies the bindings in the 
  38 | @racket[racket] language. Modules can get Nanopass bindings
  39 | without anything provided by @racket[racket] by requiring 
  40 | @racket[nanopass/base].
  41 | 
  42 | The @racket[at-exp] language installs the @"@"-reader, which
  43 | makes code generation easier. While @"@"-reader does help
  44 | format string, using them is not strictly necessary.
  45 | 
  46 | @racket[define-language] creates new languages in Nanopass.
  47 | The following code defines the source language, 
  48 | @racket[Lsrc], for our compiler:
  49 | 
  50 | @racketblock[#,Lsrc-code]
  51 | 
  52 | Terminals in Nanopass are defined by predicates. Any value
  53 | that satisfies a predicate can be a terminal of that type.
  54 | In the above example, @racket[b] is anything that is a 
  55 | @racket[boolean?]. The predicates @racket[symbol?] and 
  56 | @racket[boolean?] are provided by Nanopass. However, 
  57 | @racket[int64] is a user created predicate:
  58 | 
  59 | @racketblock[
  60 |  (define (int64? x)
  61 |    (and (integer? x)
  62 |         (<= (- (expt 2 63)) x (- (expt 2 63) 1))))]
  63 | 
  64 | @; <============================
  65 | In this compiler, @racket[Expr] is a non-terminal. It can be
  66 | a combination of terminals and other non-terminals. These
  67 | combinations are called production rules. Each production
  68 | rule may contain a label, as well as several meta-variables.
  69 | For our source language, a @racket[Expr] can be an integer
  70 | (@racket[n]), variable (@racket[x]), boolean (@racket[b]),
  71 | arithmetic expression (@racket[(= e1 e2)], 
  72 | @racket[(+ e1 e2)]), branching expression @;
  73 | 
  74 | (@racket[(if e1 e2 e3)], @racket[(when e1 e2)]), function
  75 | (@racket[(λ (x) e)]), and function application @;
  76 | 
  77 | (@racket[(e1 e2)]).
  78 | @; ============================>
  79 | 
  80 | Every meta-variable in a production rule must be unique and
  81 | have a name matching a terminal or non-terminal. Numbers
  82 | can be appended onto the end of a meta-variable without
  83 | changing its type. In the expression @racket[(+ e1 e2)]: 
  84 | @racket[+] is a tag and @racket[e1] and @racket[e2] are
  85 | meta-variables that can contain expressions.
  86 | 
  87 | The ellipsis (@racket[...]) in the @racket[cond] production
  88 | rule indicates that the pattern before it is a list that can
  89 | occur zero or more times. In this case, there can be
  90 | multiple pairs of @racket[(,e1 ,e2)] pairs. Only one
  91 | ellipsis is allowed for each level of parenthesis ( 
  92 | @racket[()]) in a production rule.
  93 | 
  94 | For example, the following is a valid pattern:
  95 | 
  96 | @racketblock[(let ([x e] ...) e2 ...) (code:comment "A valid pattern")]
  97 | 
  98 | However, the following is not a valid pattern:
  99 | 
 100 | @racketblock[(let [x e] ... e2 ...) (code:comment "Not a valid pattern")]
 101 | 
 102 | Finally, the @racket[entry] clause tells Nanopass which
 103 | non-terminal is the top most non-terminal. This is 
 104 | @racket[Expr] in this compiler.
 105 | 
 106 | @subsection{Building Source Expressions}
 107 | 
 108 | Rather than building a parser, for now, we will use 
 109 | @racket[with-output-language] to build programs in 
 110 | @racket[Lsrc]. @racket[with-output-language] rebinds 
 111 | @racket[quasiquote] to create a nanopass record.@note{
 112 |  @racket[with-racket-quasiquote] rebinds 
 113 |  @racket[quasiquote] back to the normal Racket version.}
 114 | 
 115 | @racket[with-output-language] additionally takes a language
 116 | and a non-terminal in that language. It uses this
 117 | information to determine which records to construct.
 118 | 
 119 | @examples[
 120 |  #:eval nano-eval
 121 |  (with-output-language (Lsrc Expr)
 122 |    `5)
 123 |  (with-output-language (Lsrc Expr)
 124 |    `(+ 4 6))
 125 |  (with-output-language (Lsrc Expr)
 126 |    `((λ (x) (x x)) (λ (x) (x x))))]
 127 | 
 128 | Although @racket[quasiquote] appears to be creating a list,
 129 | it is actually creating records with a fixed arity. Thus,
 130 | it will error if the expression does not match a pattern in
 131 | the language. Finally, only @racket[quasiquote] is rebound,
 132 | so other list creating constructs such a @racket[quote] are
 133 | unchanged.
 134 | 
 135 | @examples[
 136 |  #:eval nano-eval
 137 |  (eval:error (with-output-language (Lsrc Expr)
 138 |                `(+ 5 6 7)))
 139 |  (with-output-language (Lsrc Expr)
 140 |    '(+ 1 2))]
 141 | 
 142 | If a production has an ellipses (@racket[...]), then the
 143 | pattern prior to it may occur zero ore more times.
 144 | 
 145 | @examples[
 146 |  #:eval nano-eval
 147 |  (with-output-language (Lsrc Expr)
 148 |    `(cond [(= 5 4) 3]
 149 |           [(= 2 1) 0]
 150 |           [42]))
 151 |  (with-output-language (Lsrc Expr)
 152 |    `(cond [84]))]
 153 | 
 154 | @subsection[#:tag "deflangscale"]{Notes on Scaling Up}
 155 | 
 156 | The source language used in this tutorial is clearly a
 157 | small one that is designed to make it easy to learn how to
 158 | write simple compilers using Nanopass. There are a few
 159 | design choices that make it non trivial (although still
 160 | possible) to scale up to a production quality language.
 161 | 
 162 | First, this source language is missing any form of
 163 | mutation. This feature is lacking because handing it
 164 | requires the compiler to reason about assigned variables,
 165 | and requires the runtime to create mutable cells in a heap
 166 | to store these boxes. Doing so additionally necessitates
 167 | creating a garbage collector. We have omitted this as
 168 | implementing this is straightforward, and adds little
 169 | understanding to how to use the framework. Interested
 170 | readers can read about how to implement the runtime for
 171 | these cells in @hyperlink[plai-link]{ Programming Languages:
 172 |  Application and Interpretation}
 173 | @cite[plai]. Additionally, techniques used in this tutorial
 174 | can be used to detect assigned variables, making it
 175 | possible to determine when a mutable cell must be used. 
 176 | @note{TODO: Source for faster assigned variable detection.}
 177 | 
 178 | Second, in this compiler, primitives such as @racket[=] and
 179 | @racket[+] are encoded directly in the language. While this
 180 | makes sense for primitives that significantly differ
 181 | syntactically, variables with similar syntax will benefit
 182 | from having a @racket[prim?] predicate and terminal. This is
 183 | because the vast majority of the rules that apply to
 184 | variables are identical to each other. We have used
 185 | primitives directly in our source language for simplicity.
 186 | Separating out primitives, however, is a straightforward task.
 187 | 
 188 | Third, our source language only contains 64 bit integers
 189 | and booleans as datums. Larger languages will have other
 190 | types of datums such as arbitrarily large integers, floats,
 191 | strings, lists, and structs. We did not include these in
 192 | this compiler because adding them is a straightforward task.
 193 | Additionally, adding more data types requires adding more
 194 | primitives to handle these data types.
 195 | 
 196 | Fourth, all functions take exactly one argument. All of the
 197 | transformations shown in this tutorial can be trivially
 198 | extended to multi-arity functions. Doing so complicates the
 199 | passes shown in this tutorial too quickly. Rather, we first
 200 | introduce passes that operate over expressions with a fixed
 201 | number of size. Later in this compiler, however, we begin to
 202 | show examples of operations on expressions that have an
 203 | arbitrary number of arguments. This is because many
 204 | intermediate forms require expressions with a variadic
 205 | number of arguments.
 206 | 
 207 | Finally, this language is lacking common expressions such as
 208 | @racket[let] and @racket[letrec]. We omitted these because
 209 | they complicate the implementation of the compiler, and add
 210 | little value on learning to use Nanopass.
 211 | 
 212 | @section{A Simple Pass: Desugaring @racket[when] Forms}
 213 | 
 214 | Conditional expressions (@racket[if], @racket[cond], 
 215 | @racket[when]) in our language most directly map to ternary
 216 | operator in C. While C's @tt{if} statements are more
 217 | expressive, they can not be included in other expressions.
 218 | 
 219 | For example, the following expression is valid in our
 220 | language, but does not directly map to @tt{if} statements in
 221 | C:
 222 | 
 223 | @racketblock[(+ (if #t 5 6) 7) (code:comment "=> 13")]
 224 | 
 225 | Unfortunately, ternary operators in C always expect two
 226 | branched conditions. To compensate, we convert 
 227 | @racket[when] expressions into equivalent @racket[if] ones.
 228 | 
 229 | The transformation is:
 230 | 
 231 | @racketblock[
 232 |  (when <condition> <body>)
 233 |  (code:comment "=>")
 234 |  (if <condition> <body> #f)]
 235 | 
 236 | Here, @racket[<body>] is evaluated only if 
 237 | @racket[<condtion>] is true. Otherwise, the expression
 238 | evaluates to @racket[#f].@note{In Racket, @racket[when]
 239 |  expressions actually evaluate to @racket[void]. We evaluate
 240 |  to @racket[#f] instead to make the compiler simpler.}
 241 | 
 242 | @subsection{Extending Languages}
 243 | 
 244 | Languages created with @racket[define-language] can be
 245 | extensions of other languages. These so called extensions
 246 | are indicated with the @racket[extends] keyword.
 247 | 
 248 | The following language extends @racket[Lsrc]:
 249 | 
 250 | @racketblock[#,L1-code]
 251 | 
 252 | The @racket[+] form adds new expressions to non-terminals,
 253 | and the @racket[-] form removes production rules. These
 254 | forms can also be used inside of a @racket[terminals] form.
 255 | In this case, it adds and remove terminals.
 256 | 
 257 | We can use @racket[language->s-expresion] to see the full
 258 | language. This form is especially useful when a language is
 259 | formed by many extended languages.
 260 | 
 261 | @examples[
 262 |  #:eval nano-eval
 263 |  (language->s-expression L7)]
 264 | 
 265 | @subsection{Passes and Processors}
 266 | 
 267 | Nanopass uses @racket[define-pass] to create new passes.
 268 | Unlike languages, passes are functions that transforms
 269 | expressions from one language to another. The following pass
 270 | converts expressions from @racket[Lsrc] to @racket[L1]:
 271 | 
 272 | @racketblock[#,desugar-when-code]
 273 | 
 274 | Because @racket[when] is not a production in @racket[L1],
 275 | the @racket[desugar-when] pass converts uses of 
 276 | @racket[when] into @racket[if]. Unlike @racket[if], 
 277 | @racket[when] expressions only contain a condition and a
 278 | body. When a @racket[when] condition in our language is 
 279 | @racket[#f], the entire expression evaluates to @racket[#f],
 280 | without evaluating the body.
 281 | 
 282 | @examples[
 283 |  #:eval nano-eval
 284 |  (with-output-language (Lsrc Expr)
 285 |    (desugar-when `(when #f 42)))
 286 |  (with-output-language (Lsrc Expr)
 287 |    (desugar-when `(λ (x) (when x (λ (y) y)))))]
 288 | 
 289 | A pass constructed with @racket[define-pass] is composed of
 290 | a signature, a body, and a list of processors. In the above
 291 | pass, this signature is:
 292 | 
 293 | @racketblock[desugar-when : Lsrc (e) -> L1 ()]
 294 | 
 295 | The name of this pass is @racket[desugar-when]. It is
 296 | followed by @racket[Lsrc], which indicates that the source
 297 | its language, and @racket[L1] indicates the target language
 298 | for the pass. The @racket[(e)] is a list of the arguments
 299 | the pass takes. In this example, it is only one, which is
 300 | the source expression. The empty list @racket[()] is a list
 301 | of any extra return values that the pass may give. This pass
 302 | only returns an expression in the target language, and is
 303 | thus empty.
 304 | 
 305 | The remainder of the above pass is a processor, and is
 306 | discussed below in @secref{processors}.
 307 | 
 308 | @subsection[#:tag "processors"]{Processors and Catamorphisms}
 309 | 
 310 | The following is a processor in the @racket[desugare-when]
 311 | pass shown above:
 312 | 
 313 | @racketblock[
 314 |  (Expr : Expr (e) -> Expr ()
 315 |        [(when ,[e1] ,[e2])
 316 |         `(if ,e1 ,e2 #f)])]
 317 | 
 318 | Like passes, processors are functions and begin with a
 319 | signature:
 320 | 
 321 | @racketblock[Expr : Expr (e) -> Expr ()]
 322 | 
 323 | The first @racket[Expr] in this process is the name of this
 324 | process. While this name is arbitrary, @racket[Expr] is a
 325 | reasonable first name as it transforms expressions. The
 326 | second @racket[Expr] indicates that the input for this
 327 | processor is an @racket[Expr] in @racket[Lsrc]. This
 328 | information is determined by input language of the pass.
 329 | Analogously, the last @racket[Expr] indicates that the
 330 | output for this processor is an @racket[Expr] in 
 331 | @racket[L1]. Finally, like the pass itself, @racket[(e)]
 332 | means that this processor takes in one argument, an
 333 | expression, and has no additional return values besides the
 334 | output expression.
 335 | 
 336 | After the signature, a processor is composed of a series of
 337 | patterns and templates. Like Racket's @racket[match] form,
 338 | the processor selects the first pattern to match the given
 339 | expression. If none of the patterns match, Nanopass will
 340 | convert the expression to an equivalent one in the target
 341 | language and recursively match on all subpatterns in the
 342 | expression. This automatic behavior is how Nanopass
 343 | compilers reduce the amount of boilerplate.
 344 | 
 345 | The above processor contains one pattern:
 346 | 
 347 | @racketblock[
 348 |  [(when ,[e1] ,[e2])
 349 |   `(if ,e1 ,e2 #f)]]
 350 | 
 351 | This pattern does the actual transformation of @racket[if]
 352 | forms to @racket[when] forms. The first line is the pattern
 353 | itself. Unlike match (but like @racket[syntax-parse]),
 354 | patterns begin already in a @racket[quasiquote], and must
 355 | use @racket[unquote] (@tt{,}) to escape.
 356 | 
 357 | Using @racket[unquote] means to match a subexpression, and
 358 | bind it to the variable given. In this pattern, however,
 359 | these variables are surrounded by square bracket (@tt{[]}).
 360 | Bracket are for a feature of Nanopass called catamorphisms.
 361 | @note{The term catamorphism comes from category theory.
 362 |  While related, catamorphisms in this setting are used
 363 |  slightly differently and are more closely related to the
 364 |  @hyperlink[iumatch-link]{IU Pattern Matcher}
 365 |  or @tt{app} forms in @racket[match].}
 366 | 
 367 | These so-called catamorphisms further reduce boilerplate by
 368 | handling recursion automatically. The processor determines
 369 | input and output non-terminals by using the location in the
 370 | pattern and the name of the pattern variable. If the pass
 371 | contains a processor that matches this signature, it is used
 372 | to transform the variable. Otherwise, a default processor
 373 | that translates the expression to a similar one in the
 374 | target is used. Finally, the output is bound to the variable
 375 | inside of the brackets.
 376 | 
 377 | In this example, @racket[e1] is the first variable in a 
 378 | @racket[when] clause, which indicates that it is an 
 379 | @racket[Expr]. Next, because the variables name begins with
 380 | @racket[e], its output is also an @racket[Expr]. The
 381 | process named @racket[Expr] matches this signature, and is
 382 | used to process the variable. The result is bound to the
 383 | variable @racket[e1]. An analogous process happens for 
 384 | @racket[e2].
 385 | 
 386 | An equivalent pattern that does not use catamorphisms would
 387 | be:
 388 | 
 389 | @racketblock[
 390 |  [(when ,e1 ,e2)
 391 |   `(if ,(Expr e1) ,(Expr e2) #f)]]
 392 | 
 393 | Here, @racket[e1] and @racket[e2] are expressions in 
 394 | @racket[Lsrc], and thus must be passed into the 
 395 | @racket[Expr] processor to be converted into @racket[L1]
 396 | expression.
 397 | 
 398 | Note that the recursion for expressions not listed in a
 399 | processor is important. Even if an expression does not need
 400 | to be transformed, it may contain subexpressions that do.
 401 | 
 402 | @examples[
 403 |  #:eval nano-eval
 404 |  (with-output-language (Lsrc Expr)
 405 |    (desugar-when `(+ 5 (when #t 6))))]
 406 | 
 407 | @subsection[#:tag "whenifscale"]{Notes on Scaling Up}
 408 | 
 409 | Converting @racket[when] expressions to @racket[if]
 410 | expressions serves as a simple example to illustrate the
 411 | benefits of using Nanopass to write compilers, while also
 412 | showing the basics of how to use it. This particular
 413 | transformation, however, is simple enough that it is
 414 | generally implemented in a language's macro expander or
 415 | during its parsing pass.
 416 | 
 417 | @section{Desugaring @racket[cond] and recursive passes}
 418 | 
 419 | Sometimes a pass or a processor will recursively call
 420 | itself with on new expressions. When this happens, we need
 421 | to make sure that the new expression is in the input
 422 | language for the pass. By default @racket[define-pass] binds
 423 | @racket[quasiquote] to construct expressions in the output
 424 | language for the pass. We use @racket[with-output-language]
 425 | to rebind @racket[quasiquote] to the input language.
 426 | 
 427 | This operations occurs when compiling @racket[cond]
 428 | expressions. Like @racket[when], we need to desugar 
 429 | @racket[cond] expressions into @racket[if] ones. Doing so
 430 | allows us to directly compile them into ternary operators in
 431 | C.
 432 | 
 433 | The transformation that follows is:
 434 | 
 435 | @racketblock[
 436 |  (cond [<test-1> <body-1>]
 437 |        [<test-2> <body-2>]
 438 |        ....
 439 |        [<test-n-1> <boduy-n-1>]
 440 |        [<body-n>])
 441 |  (code:comment "=>")
 442 |  (if <test-1> <body-1>
 443 |      (if <test-2> <body-2>
 444 |          ....
 445 |          (if <test-n-1> <body-n-1> <body-n>) .... ))]
 446 | 
 447 | In this example, each branch of the @racket[cond] clause
 448 | becomes a possible branch in an @racket[if] expression. The
 449 | recursive nature of the target output causes a recursive
 450 | solution to occur naturally.
 451 | 
 452 | The following is the language is the result of desugaring 
 453 | @racket[cond]:
 454 | 
 455 | @racketblock[#,L2-code]
 456 | 
 457 | Similarly to how @racket[L1] removed @racket[when]
 458 | expressions from @racket[L2], this language removes 
 459 | @racket[cond] expressions from @racket[L1].
 460 | 
 461 | The following pass does the actual desugaring:
 462 | 
 463 | @racketblock[#,desugar-cond-code]
 464 | 
 465 | This pass is similar to the @racket[desugar-when] pass
 466 | before it, with two major differences. First, this pass uses
 467 | ellipses (@racket[...]) to match on lists. Second, this pass uses 
 468 | @racket[with-output-language] to construct expressions in @racket[L1].
 469 | 
 470 | @examples[
 471 |  #:eval nano-eval
 472 |  (with-output-language (L1 Expr)
 473 |     (desugar-cond `(cond [(= 5 6) 7]
 474 |                          [(= 8 9) 10]
 475 |                          [42])))]
 476 | 
 477 | @subsection{Complex patterns and pattern matching}
 478 | 
 479 | Ellipses in patterns bind the variables before it to a
 480 | list.@note{Pattern variables can occur before an arbitrarily
 481 |  deep level of ellipses. For example, if a pattern is two levels of
 482 |  ellipses deep, it will be a list of lists. If the pattern
 483 |  is three levels of ellipses deep it will be a list of list
 484 |  of lists.} In this case, both @racket[e2] and @racket[e2*]
 485 | are bound to lists that match the relevant input expression
 486 | given to the processor. The pattern causes them to look like
 487 | they are zipped together, but they are distinct lists.
 488 | 
 489 | The following code uses @racket[nanopass-case] to show that
 490 | @racket[e2] and @racket[e2*] are different lists:
 491 | 
 492 | @examples[
 493 |  #:eval nano-eval
 494 |  #:label #f
 495 |  (define cond-example
 496 |    (with-output-language (L1 Expr)
 497 |      `(cond [(= 1 2) 3]
 498 |             [(= 4 5) 6]
 499 |             [(= 7 8) 9]
 500 |             [10])))
 501 |  (nanopass-case (L1 Expr) cond-example
 502 |                 [(cond [,e1 ,e1*] [,e2 ,e2*] ... [,e3])
 503 |                  e2*])]
 504 | 
 505 | First, we create a @racket[cond] expression and name it 
 506 | @racket[cond-example]. We then use @racket[nanopass-case] to
 507 | destruct that expression, returning only @racket[e2*].
 508 | Notice that the result is a list. Returning @racket[e2]
 509 | would have similar results. If, however, we returned 
 510 | @racket[e1], @racket[e1*], or @racket[e3], the result would
 511 | have been a single expression, rather than a list of
 512 | expressions.
 513 | 
 514 | Lists can also be used in templates wherever an ellipsis is
 515 | allowed.
 516 | 
 517 | @examples[
 518 |  #:eval nano-eval
 519 |  (nanopass-case (L1 Expr) cond-example
 520 |                 [(cond [,e1 ,e1*] ... [,e3])
 521 |                  (with-output-language (L1 Expr)
 522 |                    `(cond [,e1* ,e1] ... [,e3]))])]
 523 | 
 524 | In this example we reverse the test and body of each of the
 525 | expressions in the @racket[cond] expression. While this does
 526 | change the semantics of what we would expect from a 
 527 | @racket[cond], it is syntactically valid. Additionally, both
 528 | @racket[e1] and @racket[e1*] are both lists of expressions.
 529 | Even though they appear to be zipped by Nanopass, they are
 530 | still distinct lists.
 531 | 
 532 | Note that because variables bound with ellipses in the
 533 | pattern are just lists, a lot of common idioms in other
 534 | pattern languages are not possible. This limitation becomes
 535 | particularly obvious when trying to duplicate a single
 536 | element to match the length of a list.
 537 | 
 538 | @examples[
 539 |  #:eval nano-eval
 540 |  (eval:error
 541 |   (nanopass-case (L1 Expr) cond-example
 542 |                  [(cond [,e1 ,e1*] ... [,e3])
 543 |                   (with-output-language (L1 Expr)
 544 |                    `(cond [,e1 ,e3] ... [5]))]))
 545 |  (nanopass-case (L1 Expr) cond-example
 546 |                 [(cond [,e1 ,e1*] ... [,e3])
 547 |                  (with-output-language (L1 Expr)
 548 |                    `(cond [,e1 ,(make-list (length e1) e3)] ... [5]))])]
 549 | 
 550 | The first example causes an error because @racket[e1] is a
 551 | list and @racket[e3] is a single expression. Using 
 552 | @racket[make-list], however, to generate a list of 
 553 | @racket[e3] expressions of the correct length however
 554 | achieves the desired behavior.
 555 | 
 556 | @subsection{Recursive templates}
 557 | 
 558 | Inside of a processor, @racket[quasiquote] is rebound to
 559 | construct an expression in the output language. Normally,
 560 | this is the correct behavior, but sometimes we want to
 561 | construct an expression in a different language, as in the
 562 | pass above.
 563 | 
 564 | More specifically, the following is the code that rebinds 
 565 | @racket[quasiquote]:
 566 | 
 567 | @racketblock[
 568 |  [(cond [,[e1] ,[e1*]] [,e2 ,e2*]  ...  [,e3])
 569 |   `(if ,e1  ,e1*  ,(with-output-language (L1 Expr)
 570 |                      (Expr `(cond [,e2 ,e2*] ... [,e3]))))]]
 571 | 
 572 | In this expression, the outer @racket[quasiquote]
 573 | constructs an expression in the output language for the
 574 | pass. The inner @racket[quasiquote], however, is
 575 | constructing an expression in @racket[L1], the input
 576 | language for the pass. Finally, the @racket[Expr] is the
 577 | name of the processor, and runs itself on the newly created
 578 | expression.
 579 | 
 580 | @subsection[#:tag "condscale"]{Notes on Scaling Up}
 581 | 
 582 | Many desugaring operations performed by the compiler are
 583 | fairly simple. As such, it is often easier for programmers
 584 | to implement them together in one pass. Merging these passes
 585 | can help reduce the boilerplate code surrounding the pass,
 586 | while not making the passes themselves any more
 587 | complicated.
 588 | 
 589 | The following is an alternate version of the desugar pass
 590 | that combines both @racket[desugar-when] and 
 591 | @racket[desugar-cond]:
 592 | 
 593 | @racketblock[
 594 |  (define-pass desugar-alt : Lsrc (e) -> L2 ()
 595 |    (Expr : Expr (e) -> Expr ()
 596 |          [(when ,[e1] ,[e2])
 597 |           `(if ,e1 ,e2 #f)]
 598 |          [(cond [,[e1]])
 599 |           e1]
 600 |          [(cond [,[e1] ,[e1*]] [,e2 ,e2*]  ...  [,e3])
 601 |           `(if ,e1  ,e1*  ,(with-output-language (L1 Expr)
 602 |                              (Expr `(cond [,e2 ,e2*] ... [,e3]))))]))]
 603 | 
 604 | This particular pass can be constructed simply by merging
 605 | the two passes together. Doing this merging is
 606 | straightforward for simple passes such as these.
 607 | Unfortunately, this process gets significantly more
 608 | complicated as passes themselves become more complicated.
 609 | For this reason, many front end passes of a compiler will be
 610 | merged like above. This so-called merging works because the
 611 | passes themselves are simple, and separating them out
 612 | 
 613 | The second approach to desugaring expressions is to do them
 614 | in the language's macro system. This makes it easier for
 615 | programmers to create there own macros that act as syntactic
 616 | sugar.@note{Racket's @racket[when] and @racket[cond] forms
 617 |  are desugared in Racket's
 618 |  @tech[#:key "macro"
 619 |        #:doc '(lib "scribblings/guide/macros.scrbl")]{
 620 |   macro system}.}
 621 | 
 622 | @section{Delaying @racket[if] Forms}
 623 | 
 624 | Unlike function application, the body and alternate body of
 625 | @racket[if] expressions should only be evaluated on the
 626 | result of the conditional expression. Worse still, @tt{if}
 627 | expressions in C are statements rather than expressions.
 628 | 
 629 | Ternary operators, however, are expressions. Using ternary
 630 | operators directly is still problematic because expressions
 631 | in C are not as expressive as ones in our source. For
 632 | example, expressions in our source can create closures apply
 633 | them to a new variable, and call that closure at a later
 634 | time, all in one expression. We will eventually need to
 635 | translate some of these operations into statements.
 636 | 
 637 | Translating expressions into statements is problematic with
 638 | the delayed nature of @racket[if] expressions. Specifically,
 639 | we want to first evaluate the condition, and then evaluate
 640 | either the body or alternative.@note{If our source was
 641 |  effect free, we could evaluate all subexpressions of 
 642 |  @racket[if]. This language does, however, have one major
 643 |  effect, non-termination. We only want an @racket[if]
 644 |  expression to not terminate if the appropriate
 645 |  subexpressions do not terminate.}
 646 | 
 647 | One way to delay the values of @racket[if] expressions is
 648 | to wrap them in function expressions, and apply the whole
 649 | expression to a dummy variable. After this transformation
 650 | the entire expression can be evaluated eagerly, and the
 651 | functions themselves will give the condition body delayed
 652 | semantics.
 653 | 
 654 | The transformation will cause @racket[if] expressions to
 655 | follow this form:
 656 | 
 657 | @racketblock[
 658 |  (if <cond> <body> <alt>)
 659 |  (code:comment "=>")
 660 |  ((if <cond> (λ (trash) <body>) (λ (trash) <alt>)) #f)]
 661 | 
 662 | Here, both the body and alternate are functions that do not
 663 | use their @racket[trash] argument. In order to force the
 664 | evaluation of the selected clause, we apply the result to 
 665 | @racket[#f]. This value gets mapped to @racket[trash], which
 666 | is never used.
 667 | 
 668 | The following pass transforms delayed @racket[if]
 669 | expressions to equivalent eager expressions:
 670 | 
 671 | @racketblock[#,delay-if-code]
 672 | 
 673 | Both the source and target languages for this pass are 
 674 | @racket[L2]. It is possible to create a new language that
 675 | statically enforces if expressions to store only functions.
 676 | Doing so in this case does not prevent further
 677 | optimizations, but does help programmers find bugs in their
 678 | compilers.
 679 | 
 680 | The following is an example of a language that enforces 
 681 | @racket[f] expressions to store functions in their body:
 682 | 
 683 | @racketblock[
 684 |  (define-language L2-alt
 685 |    (extends L2)
 686 |    (Expr (e)
 687 |          (- (λ (x) e)
 688 |             (if e1 e2 e3))
 689 |          (+ l
 690 |             (if e l2 l3)))
 691 |    (Lambda (l)
 692 |            (+ (λ (x) e))))]
 693 | 
 694 | All functions in this language take exactly one argument.
 695 | The ones in this pass, however, are thunks that do not
 696 | require an argument. To accommodate this, we generate to two
 697 | unused variables, and apply the result of the @racket[if]
 698 | expression to @racket[#f].
 699 | 
 700 | @examples[
 701 |  #:eval nano-eval
 702 |  (with-output-language (L2 Expr)
 703 |    (delay-if `(if #f 42 84)))]
 704 | 
 705 | After this transformation, we can treat @racket[if] as an
 706 | entirely eager expression.
 707 | 
 708 | @subsection[#:tag "ifscale"]{Notes on Scaling up}
 709 | 
 710 | TODO:
 711 | 
 712 | First: Other means of delaying
 713 | 
 714 | Second: Better generation of temporary variables.
 715 | 
 716 | Third: Actual thunks
 717 | 
 718 | @section{Closure Conversion}
 719 | 
 720 | Unlike our source language, C does not have closures. It
 721 | does, however, support higher order functions through the
 722 | use of function pointers. Unfortunately function pointers do
 723 | not store their own environments. Thus, we use closure
 724 | conversion@cite[appelcont] as our first step to supporting closures.
 725 | 
 726 | Closure conversion is the process of removing all free
 727 | variables from functions, and passing them in explicitly in
 728 | the form of an environment. The function associated with the
 729 | closure will eventually be lifted to the top level, but the
 730 | environment remains in the functions plaice. This is
 731 | possible because environment mappings are first class values
 732 | in C.
 733 | 
 734 | For example, if a function has one free variable @racket[y],
 735 | the transformation would look like:
 736 | 
 737 | @racketblock[
 738 |  (lambda (x) .... y ....)
 739 |  (code:comment "=>")
 740 |  (lambda (x env) .... (env-get env y) ....)]
 741 | 
 742 | Unfortunately, this transformation is not enough to create
 743 | a closure object. When a lambda occurs we need to also
 744 | explicitly create the environment associated with it. Doing
 745 | so allows the closure to bind to the variables as the
 746 | lambda's definition, rather then whatever they happen to be
 747 | at the call site. In other words, we want to preserve 
 748 | @hyperlink[lexicalscope-link]{lexical scoping} in our target
 749 | language.
 750 | 
 751 | Applying this idea to the transformation above gives the
 752 | following transformation:
 753 | 
 754 | @racketblock[
 755 |  (lambda (x env) .... (env-get env y) ....)
 756 |  (code:comment "=>")
 757 |  (closure (name (x env) ... (env-get env y) ...) (y))]
 758 | 
 759 | Here, @racket[closure] is a piece of syntax to describe the
 760 | @racket[closure] object. The first argument is the function
 761 | expression, which has now been given the name
 762 | @racket[name]. The second argument is the variables that
 763 | this closure's environment binds, in this case @racket[y].
 764 | 
 765 | Now that functions take two arguments, we also need to
 766 | modify all of the function call sites to also pass in the
 767 | function's environment. This transformation is simple to do
 768 | here because closure objects contain their environments.
 769 | 
 770 | The following is the transformation that happens at each
 771 | function's call site:
 772 | 
 773 | @racketblock[
 774 |  (f x)
 775 |  (code:comment "=>")
 776 |  ((closure-func f) x (closure-env f))]
 777 | 
 778 | In this example, @racket[closure-func] and 
 779 | @racket[closure-env] are special syntax that retrieves the
 780 | function and environment objects from a closure. A later
 781 | pass transforms @racket[closure-func] to retrieve a function
 782 | pointer. For now, however, the closure contains the literal
 783 | function itself.
 784 | 
 785 | We perform closure conversion in two passes. First, we
 786 | create a pass to identify all free variables in each
 787 | function. This pass enables us to transform free variables
 788 | into environment lookups, as well as determine which
 789 | variables should be passed in as part of the closure
 790 | environment. The second pass creates the actual explicit
 791 | closure structures. These structures still contain the
 792 | function and environment, while a later pass will lift them
 793 | to the top.
 794 | 
 795 | @subsection{Free Variable Identification}
 796 | 
 797 | The first step to closure conversion is to identify all of
 798 | the free variables in every function. This transformation
 799 | allows us to convert free variables into lookups in a later pass.
 800 | 
 801 | The following language modifies functions to store free variables:
 802 | 
 803 | @racketblock[#,L3-code]
 804 | 
 805 | In this language @racket[FreeVars-Expr] is a new
 806 | non-terminal that stores an expression and a list of
 807 | variables. Function expressions now store an expression with
 808 | free variables for their body. The main effect of this
 809 | transformation is that functions now have constant time
 810 | access to all of their free variables.
 811 | 
 812 | @examples[
 813 |  #:eval nano-eval
 814 |  (with-output-language (L3 Expr)
 815 |    `(λ (x) (free (y z) (+ x (+ y z)))))]
 816 | 
 817 | The following pass does the actual transformation:
 818 | 
 819 | @racketblock[#,identify-free-variables-code]
 820 | 
 821 | @; <====================
 822 | Unlike the previous passes, this pass uses extra return
 823 | values in its processors. This extra value stores a set of
 824 | free variables represented as a list in the expression. By
 825 | default, an expression contains no free variables. To
 826 | accomplish this, we pass the empty list@;
 827 | 
 828 | (@racket['()]) in as the default value for the extra return
 829 | values. Nanopass uses this value inside of any generated
 830 | parts of the processor.@note{The default value can contain
 831 |  free variables. When it does, those free variables serve as
 832 |  an indicator that the default return value is never used.} Every clause
 833 | in the @racket[Expr] processor uses @racket[values] to
 834 | return two values.
 835 | @; ====================>
 836 | 
 837 | The base case for this clause is:
 838 | 
 839 | @racketblock[[,x (values x (list x))]]
 840 | 
 841 | This case matches on a variable literals. These
 842 | variables remain unchained in this pass. However, they are
 843 | added to a list of free variables in the expression.
 844 | Function expressions then use this list to store free
 845 | variables:
 846 | 
 847 | @racketblock[
 848 |  [(λ (,x) ,[e1 a1])
 849 |   (define a* (set-remove a1 x))
 850 |   (values `(λ (,x) (free (,a* ...) ,e1))
 851 |           a*)]]
 852 | 
 853 | The set @racket[a*] removes the variable bound by the
 854 | function from the set of free variables. It then uses those
 855 | free variables in the @racket[free] expression. Finally, it
 856 | also passes this modified set for use in additional
 857 | expressions. Note that @racket[(,a* ...)] indicates both
 858 | that @racket[a*] is a list, and should be placed in the list
 859 | portion of the @racket[free] expression.
 860 | 
 861 | The remaining cases combine all of the free variables into a
 862 | common set. For example, the free variables in the
 863 | expression @racket[(+ ,e1 ,e2)], is the union between all of
 864 | the free variables in @racket[e1] and the free variables in
 865 | @racket[e2]. We need to explicitly create these cases
 866 | because Nanopass is not clever enough to generate these
 867 | clauses.
 868 | 
 869 | In addition to using extra return values in its processors,
 870 | this function also has a body:
 871 | 
 872 | @racketblock[
 873 |   (let-values ([(res free) (Expr e)])
 874 |     (unless (set-empty? free)
 875 |       (error 'compiler "Unbound variables: ~a" free))
 876 |     res)]
 877 | 
 878 | This body is needed because processors in this pass return
 879 | two values, while the pass itself returns only one. This
 880 | body additionally checks if any free variables have not been
 881 | accounted for by any functions. When free variables do
 882 | remain, the compiler throws an unbound variables error, and stops.
 883 | 
 884 | @examples[
 885 |  #:eval nano-eval
 886 |  (with-output-language (L2 Expr)
 887 |    (identify-free-variables
 888 |     `(λ (x)
 889 |        (λ (y) (+ x y)))))
 890 |  (eval:error
 891 |   (with-output-language (L2 Expr)
 892 |     (identify-free-variables
 893 |      `(λ (x) y))))]
 894 | 
 895 | @subsection{Explicit Closure Creation}
 896 | 
 897 | The next step for closure conversion is to take the free
 898 | variables we've just found, and use them to construct
 899 | explicit closure objects. We create these so-called closure
 900 | objects in the following language:
 901 | 
 902 | @racketblock[#,L4-code]
 903 | 
 904 | This language removes the free variable list because they
 905 | are no longer needed. It additionally introduces 
 906 | @racket[exact-nonnegative-integer]s to serve as offsets in a
 907 | closure object. Also, this language introduces @racket[let]
 908 | forms to bind expressions to values. And finally, this
 909 | language introduces four syntactic forms for operating on
 910 | closures:
 911 | 
 912 | @itemlist[
 913 |  @item{@racket[closure] - For building closure objects.}
 914 |  @item{@racket[closure-env] - For retrieving the environment
 915 |   portion of a closure.}
 916 |  @item{@racket[closure-func] - For retrieving the function
 917 |   portion of a closure.}
 918 |  @item{@racket[env-get] - For retrieving a specific variable
 919 |   in an environment.}]
 920 | 
 921 | @racketblock[#,make-closures-code]
 922 | 
 923 | @examples[
 924 |  #:eval nano-eval
 925 |  (with-output-language (L3 Expr)
 926 |    (make-closures `(λ (x) (free () x))))
 927 |  (with-output-language (L3 Expr)
 928 |    (make-closures `(λ (x) (free () (λ (y) (free (x) x))))))
 929 |  (with-output-language (L3 Expr)
 930 |    (make-closures `((λ (x) (free () x)) 42)))
 931 |  (with-output-language (L3 Expr)
 932 |    (make-closures `(((λ (x) (free ()(λ (y) (free (x) (+ x y)))))
 933 |                      2) 3)))]
 934 | 
 935 | @subsection[#:tag "ccscale"]{Notes on Scaling Up}
 936 | 
 937 | TODO:
 938 | 
 939 | First, free variable checking at top level
 940 | 
 941 | Second, intermediate passes.
 942 | 
 943 | Third, data structure for linear time
 944 | 
 945 | Fourth, Lambda Lifting@cite[lambdalifting].
 946 | 
 947 | @section{Turning Closures to Function Pointers}
 948 | 
 949 | @racketblock[#,L5-code]
 950 | 
 951 | @racketblock[#,raise-closures-code]
 952 | 
 953 | @subsection[#:tag "clotofuncscale"]{Notes on Scaling Up}
 954 | 
 955 | @section{Converting Expressions into Statements}
 956 | 
 957 | @subsection{Simplifying function application}
 958 | 
 959 | @racketblock[#,L6-code]
 960 | 
 961 | @racketblock[#,simplify-calls-code]
 962 | 
 963 | @subsection{Linearizing Code}
 964 | 
 965 | @racketblock[#,L7-code]
 966 | 
 967 | @racketblock[#,raise-lets-code]
 968 | 
 969 | @subsection[#:tag "simplifyscale"]{Notes on Scaling Up}
 970 | 
 971 | @section{The Runtime}
 972 | 
 973 | @codeblock[#:keep-lang-line? #f]|{#lang at-exp nanopass
 974 |  (define runtime
 975 |    @~a{#include <stdio.h>
 976 |   #include <stdarg.h>
 977 |   #include <stdlib.h>
 978 |   #include <inttypes.h>
 979 |   
 980 |   struct Int;
 981 |   struct Bool;
 982 |   struct Closure;
 983 |   union Racket_Object;
 984 |   
 985 |   typedef union Racket_Object (*Lambda)();
 986 |   enum Tag {INT, BOOL, CLOSURE};
 987 |   
 988 |   typedef struct Int {
 989 |    enum Tag t;
 990 |    int64_t v;
 991 |    } Int;
 992 |     
 993 |   typedef struct Bool {
 994 |    enum Tag t;
 995 |    int64_t v;
 996 |    } Bool;
 997 |     
 998 |   typedef struct Closure {
 999 |    enum Tag t;
1000 |    Lambda l;
1001 |    union Racket_Object * e;
1002 |    } Closure;
1003 |     
1004 |   typedef union Racket_Object {
1005 |    enum Tag t;
1006 |    Int i;
1007 |    Bool b;
1008 |    Closure c;
1009 |    } Racket_Object;
1010 |     
1011 |   Racket_Object __make_int(int64_t i) {
1012 |    Racket_Object o;
1013 |    o.t = INT;
1014 |    o.i.v = i;
1015 |    return o;
1016 |   }
1017 |   
1018 |   Racket_Object __make_bool(int64_t b) {
1019 |    Racket_Object o;
1020 |    o.t = BOOL;
1021 |    o.b.v = b;
1022 |    return o;
1023 |   }
1024 |   
1025 |   Racket_Object __make_closure(Lambda name, int argc, ...) {
1026 |    /* Allocate space for env */
1027 |    Racket_Object* env = malloc(sizeof(Racket_Object) * argc);
1028 |    
1029 |    /* Fill env */
1030 |    va_list lp;
1031 |    va_start(lp, argc);
1032 |    for(int i = 0; i < argc; i++) {
1033 |     env[i] = va_arg(lp, Racket_Object);
1034 |    }
1035 |    
1036 |    /* Return closure */
1037 |    Racket_Object o;
1038 |    o.t = CLOSURE;
1039 |    o.c.l = name;
1040 |    o.c.e = env;
1041 |    return o;
1042 |   }
1043 |   
1044 |   Racket_Object __env_get(Racket_Object *env, unsigned int id) {
1045 |    return env[id];
1046 |   }
1047 |   
1048 |   Racket_Object  __prim_plus(Racket_Object a, Racket_Object b) {
1049 |    if(a.t != INT || b.t != INT) {
1050 |     printf("+: Expected Integer\n");
1051 |     exit(1);
1052 |    }
1053 |    return __make_int(a.i.v + b.i.v);
1054 |   }
1055 |   
1056 |   Racket_Object __prim_equal(Racket_Object a, Racket_Object b) {
1057 |    if(a.t != INT || b.t != INT) {
1058 |     printf("=: Expected Integer\n");
1059 |     exit(1);
1060 |    }
1061 |    return __make_bool(a.i.v == b.i.v);
1062 |   }
1063 |   
1064 |   Racket_Object __prim_if(Racket_Object a,
1065 |             Racket_Object b,
1066 |             Racket_Object c) {
1067 |    if(a.t != BOOL) {
1068 |     printf("if: Expected Bool\n");
1069 |     exit(1);
1070 |    }
1071 |    return a.b.v ? b : c;
1072 |    }})}|
1073 | 
1074 | @section{Code Generation}
1075 | 
1076 | @codeblock[#:keep-lang-line? #f]|{#lang at-exp nanopass
1077 |  (define-pass generate-c : L7 (e) -> * ()
1078 |    (definitions
1079 |      (define (c s)
1080 |        (list->string
1081 |         (cons #\_
1082 |               (for/list ([i (in-string (symbol->string s))])
1083 |                 (cond
1084 |                   [(or (char-alphabetic? i)
1085 |                        (char-numeric? i))
1086 |                    i]
1087 |                   [else #\_])))))
1088 |      (define (build-func-decl name x1 x2)
1089 |        @~a{Racket_Object @c[name](Racket_Object @c[x1],
1090 |             Racket_Object* @c[x2]);})
1091 |      (define (build-func name x1 x2 body)
1092 |        @~a{Racket_Object @c[name](Racket_Object @c[x1],
1093 |             Racket_Object* @c[x2]) {
1094 |    @(Let-Expr body)}}))
1095 |    (Program : Program (e) -> * ()
1096 |             [(program ([,x (,x1 ,x2) ,le*] ...)
1097 |                       ,le)
1098 |              @~a{@runtime
1099 |                @(apply ~a (for/list ([x (in-list x)]
1100 |                                      [x1 (in-list x1)]
1101 |                                      [x2 (in-list x2)])
1102 |                             (build-func-decl x x1 x2)))
1103 |                @(apply ~a (for/list ([x (in-list x)]
1104 |                                      [x1 (in-list x1)]
1105 |                                      [x2 (in-list x2)]
1106 |                                      [le* (in-list le*)])
1107 |                             (build-func x x1 x2 le*)))
1108 |                
1109 |                Racket_Object __racket_main() {
1110 |                 @Let-Expr[le]
1111 |                }
1112 |                
1113 |                int main () {
1114 |                 Racket_Object ret = __racket_main();
1115 |                 if(ret.t == CLOSURE) {
1116 |                  printf("ans = #<procedure>\n");
1117 |                  } else if(ret.t == INT) {
1118 |                  printf("ans = " PRId64 "\n", ret.i.v);
1119 |                  } else {
1120 |                  printf("ans = %s", ret.b.v ? "#t" : "#f");
1121 |                 }
1122 |                 return 0;
1123 |                }
1124 |                }])
1125 |    (Expr : Expr (e) -> * ()
1126 |          [,n @~a{__make_int(@n)}]
1127 |          [,b @~a{__make_bool(@(if b "1" "0"))}]
1128 |          [(+ ,x1 ,x2)
1129 |           @~a{__prim_plus(@c[x1], @c[x2])}]
1130 |          [(= ,x1 ,x2)
1131 |           @~a{__prim_equal(@c[x1], @c[x2])}]
1132 |          [(if ,x1 ,x2 ,x3)
1133 |           @~a{__prim_if(@c[x1],@c[x2],@c[x3])}]
1134 |          [(,x1 ,x2 ,x3)
1135 |           @~a{@c[x1](@c[x2], @c[x3])}]
1136 |          [(closure-env ,x)
1137 |           @~a{@c[x].c.e}]
1138 |          [(closure-func ,x)
1139 |           @~a{@c[x].c.l}]
1140 |          [(make-closure ,x (,v ...))
1141 |           @~a{__make_closure(@c[x],
1142 |             @(length v)
1143 |             @(apply ~a (for/list ([i (in-list v)])
1144 |                          @~a{, @Var[i]})))}])
1145 |    (Var : Var (e) -> * ()
1146 |         [,x @c[x]]
1147 |         [(env-get ,x ,nat)
1148 |          @~a{__env_get(@c[x], @nat)}])
1149 |    (Let-Expr : Let-Expr (e) -> * ()
1150 |              [(let ([,x (closure-func ,x*)]) ,le)
1151 |               @~a{Lambda @c[x] = @c[x*].c.l;
1152 |                 @Let-Expr[le]}]
1153 |              [(let ([,x (closure-env ,x*)]) ,le)
1154 |               @~a{Racket_Object* @c[x] = @c[x*].c.e;
1155 |                 @Let-Expr[le]}]
1156 |              [(let ([,x ,e]) ,le)
1157 |               @~a{Racket_Object @c[x] = @(Expr e);
1158 |                 @Let-Expr[le]}]
1159 |              [else @~a{return @(Expr e);}]))}|
1160 | 
1161 | @section{Parsing}
1162 | 
1163 | @racketblock[#,parse-code]
1164 | 
1165 | @section{Tying Everything Together}
1166 | 
1167 | @racketblock[
1168 |  (define compiler
1169 |    (compose generate-c
1170 |             raise-lets
1171 |             simplify-calls
1172 |             raise-closures
1173 |             make-closures
1174 |             identify-free-variables
1175 |             delay-if
1176 |             desugar-cond
1177 |             desugar-when
1178 |             parse))]
1179 | 
1180 | @section{Further Reading}
1181 | 
1182 | @section{Bonus: Creating a #lang}
1183 | 
1184 | @generate-bibliography[]
1185 | 


--------------------------------------------------------------------------------