├── bib.rkt ├── nanodemo.rkt └── tutorial.scrbl /bib.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (provide (all-defined-out)) 4 | (require scriblib/autobib) 5 | 6 | (define-cite cite citet generate-bibliography) 7 | 8 | (define appelcont 9 | (make-bib #:title "Compiling With Continuations" 10 | #:author "Andrew W. Appel" 11 | #:is-book? #t 12 | #:date 2007 13 | #:location (book-location #:edition "revised" 14 | #:publisher "Cambridge University Press") 15 | #:url "http://www.amazon.com/Compiling-Continuations-Andrew-W-Appel/dp/052103311X")) 16 | 17 | (define plai 18 | (make-bib #:title "Programming Language Application and Interpretation" 19 | #:author "Shriram Krishnamurthi" 20 | #:date 2003 21 | #:is-book? #t 22 | #:location (book-location #:edition "first") 23 | #:url "https://cs.brown.edu/~sk/Publications/Books/ProgLangs/2007-04-26/")) 24 | 25 | (define lambdalifting 26 | (make-bib 27 | #:title "Lambda Lifting: Transforming Programs to Recursive Equations" 28 | #:author "Thomas Johnson" 29 | #:date 1985 30 | #:location (proceedings-location 31 | "Conference on Functional Programming Languages and Computer Architecture") 32 | #:url "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.4346")) 33 | 34 | (define lexicalscope-link 35 | "https://en.wikipedia.org/wiki/Scope_%28computer_science%29#Lexical_scoping") 36 | (define iumatch-link 37 | "http://www.cs.indiana.edu/chezscheme/match/") 38 | (define plai-link 39 | "https://cs.brown.edu/~sk/Publications/Books/ProgLangs/2007-04-26/") -------------------------------------------------------------------------------- /nanodemo.rkt: -------------------------------------------------------------------------------- 1 | #lang at-exp nanopass 2 | 3 | (provide (except-out (all-defined-out) 4 | define-language 5 | define-pass)) 6 | 7 | (require (prefix-in nanopass: nanopass/base) 8 | (for-syntax racket/syntax 9 | syntax/parse) 10 | (for-label racket/base 11 | racket/match 12 | racket/format 13 | nanopass/base)) 14 | (define-syntax (define-language stx) 15 | (syntax-parse stx 16 | [(define-language name . rest) 17 | #:with name-code (format-id stx "~a-code" #'name) 18 | #`(begin 19 | (define name-code (quote-syntax #,stx)) 20 | (nanopass:define-language name . rest))])) 21 | (define-syntax (define-pass stx) 22 | (syntax-parse stx 23 | [(define-pass name . rest) 24 | #:with name-code (format-id stx "~a-code" #'name) 25 | #`(begin 26 | (define name-code (quote-syntax #,stx)) 27 | (nanopass:define-pass name . rest))])) 28 | 29 | (define (int64? x) 30 | (and (integer? x) 31 | (<= (- (expt 2 63)) x (- (expt 2 63) 1)))) 32 | 33 | (define-language Lsrc 34 | (terminals 35 | (int64 (n)) 36 | (boolean (b)) 37 | (symbol (x))) 38 | (Expr (e) 39 | n x b 40 | (= e1 e2) 41 | (+ e1 e2) 42 | (if e1 e2 e3) 43 | (cond [e1 e2] ... [e3]) 44 | (when e1 e2) 45 | (λ (x) e) 46 | (e1 e2)) 47 | (entry Expr)) 48 | 49 | (define-language L1 50 | (extends Lsrc) 51 | (Expr (e) 52 | (- (when e1 e2)))) 53 | 54 | (define-language L2 55 | (extends L1) 56 | (Expr (e) 57 | (- (cond [e1 e2] ... [e3])))) 58 | 59 | (define-language L3 60 | (extends L2) 61 | (Expr (e) 62 | (- (λ (x) e)) 63 | (+ (λ (x) fe))) 64 | (FreeVars-Expr (fe) 65 | (+ (free (x ...) e)))) 66 | 67 | (define-language L4 68 | (extends L3) 69 | (terminals 70 | (+ (exact-nonnegative-integer (nat)))) 71 | (Var (v) 72 | (+ x 73 | (env-get x nat))) 74 | (Expr (e) 75 | (- x 76 | (λ (x) fe) 77 | (e1 e2)) 78 | (+ v 79 | (closure (x (x1 x2) e) (v ...)) 80 | (closure-func x) 81 | (closure-env x) 82 | (let ([x e]) 83 | e*) 84 | (e1 e2 e3))) 85 | (FreeVars-Expr (fe) 86 | (- (free (x ...) e)))) 87 | 88 | (define-language L5 89 | (extends L4) 90 | (Program (p) 91 | (+ (program ([x (x1 x2) e*] ...) 92 | e))) 93 | (Expr (e) 94 | (+ (make-closure x (v ...))) 95 | (- (closure (x (x1 x2) e) (v ...)))) 96 | (entry Program)) 97 | 98 | (define-language L6 99 | (extends L5) 100 | (Expr (e) 101 | (- (+ e1 e2) 102 | (= e1 e2) 103 | (e1 e2 e3) 104 | (if e1 e2 e3)) 105 | (+ (+ x1 x2) 106 | (= x1 x2) 107 | (x1 x2 x3) 108 | (if x1 x2 x3)))) 109 | 110 | (define-language L7 111 | (extends L6) 112 | (Program (p) 113 | (- (program ([x (x1 x2) e*] ...) 114 | e)) 115 | (+ (program ([x (x1 x2) le*] ...) 116 | le))) 117 | (Expr (e) 118 | (- (let ([x e]) 119 | e*))) 120 | (Let-Expr (le) 121 | (+ e 122 | (let ([x e]) 123 | le)))) 124 | 125 | (define-pass parse : * (e) -> Lsrc () 126 | (Expr : * (e) -> Expr () 127 | (match e 128 | [`(= ,(app Expr e1) ,(app Expr e2)) 129 | `(= ,e1 ,e2)] 130 | [`(+ ,(app Expr e1) ,(app Expr e2)) 131 | `(+ ,e1 ,e2)] 132 | [`(if ,(app Expr e1) ,(app Expr e2) ,(app Expr e3)) 133 | `(if ,e1 ,e2 ,e3)] 134 | [`(when ,(app Expr e1) ,(app Expr e2)) 135 | `(when ,e1 ,e2)] 136 | [`(cond [,(app Expr e1) ,(app Expr e2)] ... [,(app Expr e3)]) 137 | `(cond [,e1 ,e2] ... [,e3])] 138 | [`(λ (,x) ,(app Expr e1)) 139 | `(λ (,x) ,e1)] 140 | [`(,(app Expr e1) ,(app Expr e2)) 141 | `(,e1 ,e2)] 142 | [else e])) 143 | (Expr e)) 144 | 145 | (define-pass desugar-when : Lsrc (e) -> L1 () 146 | (Expr : Expr (e) -> Expr () 147 | [(when ,[e1] ,[e2]) 148 | `(if ,e1 ,e2 #f)])) 149 | 150 | (define-pass desugar-cond : L1 (e) -> L2 () 151 | (Expr : Expr (e) -> Expr () 152 | [(cond [,[e1]]) 153 | e1] 154 | [(cond [,[e1] ,[e1*]] [,e2 ,e2*] ... [,e3]) 155 | `(if ,e1 ,e1* ,(with-output-language (L1 Expr) 156 | (Expr `(cond [,e2 ,e2*] ... [,e3]))))])) 157 | 158 | (define-pass delay-if : L2 (e) -> L2 () 159 | (Expr : Expr (e) -> Expr () 160 | [(if ,[e1] ,[e2] ,[e3]) 161 | (define x2 (gensym 'trash)) 162 | (define x3 (gensym 'trash)) 163 | `((if ,e1 (λ (,x2) ,e2) (λ (,x3) ,e3)) #f)])) 164 | 165 | (define-pass identify-free-variables : L2 (e) -> L3 () 166 | (Expr : Expr (e) -> Expr ('()) 167 | [,x (values x (list x))] 168 | [(+ ,[e1 a1] ,[e2 a2]) 169 | (values `(+ ,e1 ,e2) 170 | (set-union a1 a2))] 171 | [(= ,[e1 a1] ,[e2 a2]) 172 | (values `(= ,e1 ,e2) 173 | (set-union a1 a2))] 174 | [(if ,[e1 a1] ,[e2 a2] ,[e3 a3]) 175 | (values `(if ,e1 ,e2, e3) 176 | (set-union a1 a2 a3))] 177 | [(λ (,x) ,[e1 a1]) 178 | (define a* (set-remove a1 x)) 179 | (values `(λ (,x) (free (,a* ...) ,e1)) 180 | a*)] 181 | [(,[e1 a1] ,[e2 a2]) 182 | (values `(,e1 ,e2) 183 | (set-union a1 a2))]) 184 | (let-values ([(res free) (Expr e)]) 185 | (unless (set-empty? free) 186 | (error 'compiler "Unbound variables: ~a" free)) 187 | res)) 188 | 189 | (define-pass make-closures : L3 (e) -> L4 () 190 | (Expr : Expr (e [env #f] [fv '()]) -> Expr () 191 | [(,[e1] ,[e2]) 192 | (define clo-name (gensym 'clo)) 193 | `(let ([,clo-name ,e1]) 194 | ((closure-func ,clo-name) 195 | ,e2 196 | (closure-env ,clo-name)))] 197 | [,x 198 | (if (dict-has-key? fv x) 199 | `(env-get ,env ,(dict-ref fv x)) 200 | x)] 201 | [(λ (,x) (free (,x* ...) ,e)) 202 | (define lambda-name (gensym 'func)) 203 | (define env-name (gensym 'env)) 204 | (define e* 205 | (Expr e env-name 206 | (for/list ([i (in-list x*)] 207 | [j (in-range (length x*))]) 208 | (cons i j)))) 209 | `(closure (,lambda-name (,x ,env-name) ,e*) 210 | (,(for/list ([i (in-list x*)]) 211 | (Expr i env fv)) ...))])) 212 | 213 | (define-pass raise-closures : L4 (e) -> L5 () 214 | (definitions 215 | (define lamb-name '()) 216 | (define lamb-arg '()) 217 | (define lamb-env '()) 218 | (define lamb-body '())) 219 | (Expr : Expr (e) -> Expr () 220 | [(closure (,x1 (,x2 ,x3) ,[e]) (,[v*] ...)) 221 | (set! lamb-name (cons x1 lamb-name)) 222 | (set! lamb-arg (cons x2 lamb-arg)) 223 | (set! lamb-env (cons x3 lamb-env)) 224 | (set! lamb-body (cons e lamb-body)) 225 | `(make-closure ,x1 (,v* ...))]) 226 | (let ([e* (Expr e)]) 227 | `(program ([,lamb-name (,lamb-arg ,lamb-env) ,lamb-body] ...) 228 | ,e*))) 229 | 230 | (define-pass simplify-calls : L5 (e) -> L6 () 231 | (Expr : Expr (e) -> Expr () 232 | [(,[e1] ,[e2] ,[e3]) 233 | (define x1 (gensym 'app)) 234 | (define x2 (gensym 'app)) 235 | (define x3 (gensym 'app)) 236 | `(let ([,x1 ,e1]) 237 | (let ([,x2 ,e2]) 238 | (let ([,x3 ,e3]) 239 | (,x1 ,x2 ,x3))))] 240 | [(+ ,[e1] ,[e2]) 241 | (define x1 (gensym 'plus)) 242 | (define x2 (gensym 'plus)) 243 | `(let ([,x1 ,e1]) 244 | (let ([,x2 ,e2]) 245 | (+ ,x1 ,x2)))] 246 | [(= ,[e1] ,[e2]) 247 | (define x1 (gensym 'eq)) 248 | (define x2 (gensym 'eq)) 249 | `(let ([,x1 ,e1]) 250 | (let ([,x2 ,e2]) 251 | (= ,x1 ,x2)))] 252 | [(if ,[e1] ,[e2] ,[e3]) 253 | (define x1 (gensym 'if)) 254 | (define x2 (gensym 'if)) 255 | (define x3 (gensym 'if)) 256 | `(let ([,x1 ,e1]) 257 | (let ([,x2 ,e2]) 258 | (let ([,x3 ,e3]) 259 | (if ,x1 ,x2 ,x3))))])) 260 | 261 | (define-pass raise-lets : L6 (e) -> L7 () 262 | (Expr : Expr (e) -> Expr ()) 263 | (Let-Expr : Expr (e [var #f] [next-expr #f]) -> Let-Expr () 264 | [(let ([,x ,e]) 265 | ,e*) 266 | (Let-Expr e x (Let-Expr e* var next-expr))] 267 | [else 268 | (if var 269 | `(let ([,var ,(Expr e)]) 270 | ,next-expr) 271 | (Expr e))]) 272 | (Program : Program (p) -> Program () 273 | [(program ([,x (,x1 ,x2) ,[Let-Expr : e #f #f -> e]] ...) 274 | ,[Let-Expr : e* #f #f -> e*]) 275 | `(program ([,x (,x1 ,x2) ,e] ...) 276 | ,e*)])) 277 | 278 | (define runtime 279 | @~a{#include 280 | #include 281 | #include 282 | #include 283 | 284 | struct Int; 285 | struct Bool; 286 | struct Closure; 287 | union Racket_Object; 288 | 289 | typedef union Racket_Object (*Lambda)(); 290 | enum Tag {INT, BOOL, CLOSURE}; 291 | 292 | typedef struct Int { 293 | enum Tag t; 294 | int64_t v; 295 | } Int; 296 | 297 | typedef struct Bool { 298 | enum Tag t; 299 | int64_t v; 300 | } Bool; 301 | 302 | typedef struct Closure { 303 | enum Tag t; 304 | Lambda l; 305 | union Racket_Object * e; 306 | } Closure; 307 | 308 | typedef union Racket_Object { 309 | enum Tag t; 310 | Int i; 311 | Bool b; 312 | Closure c; 313 | } Racket_Object; 314 | 315 | Racket_Object __make_int(int64_t i) { 316 | Racket_Object o; 317 | o.t = INT; 318 | o.i.v = i; 319 | return o; 320 | } 321 | 322 | Racket_Object __make_bool(int64_t b) { 323 | Racket_Object o; 324 | o.t = BOOL; 325 | o.b.v = b; 326 | return o; 327 | } 328 | 329 | Racket_Object __make_closure(Lambda name, int argc, ...) { 330 | /* Allocate space for env */ 331 | Racket_Object* env = malloc(sizeof(Racket_Object) * argc); 332 | 333 | /* Fill env */ 334 | va_list lp; 335 | va_start(lp, argc); 336 | for(int i = 0; i < argc; i++) { 337 | env[i] = va_arg(lp, Racket_Object); 338 | } 339 | 340 | /* Return closure */ 341 | Racket_Object o; 342 | o.t = CLOSURE; 343 | o.c.l = name; 344 | o.c.e = env; 345 | return o; 346 | } 347 | 348 | Racket_Object __env_get(Racket_Object *env, unsigned int id) { 349 | return env[id]; 350 | } 351 | 352 | Racket_Object __prim_plus(Racket_Object a, Racket_Object b) { 353 | if(a.t != INT || b.t != INT) { 354 | printf("+: Expected Integer\n"); 355 | exit(1); 356 | } 357 | return __make_int(a.i.v + b.i.v); 358 | } 359 | 360 | Racket_Object __prim_equal(Racket_Object a, Racket_Object b) { 361 | if(a.t != INT || b.t != INT) { 362 | printf("=: Expected Integer\n"); 363 | exit(1); 364 | } 365 | return __make_bool(a.i.v == b.i.v); 366 | } 367 | 368 | Racket_Object __prim_if(Racket_Object a, Racket_Object b, Racket_Object c) { 369 | if(a.t != BOOL) { 370 | printf("if: Expected Bool\n"); 371 | exit(1); 372 | } 373 | return a.b.v ? b : c; 374 | }}) 375 | 376 | (define-pass generate-c : L7 (e) -> * () 377 | (definitions 378 | (define (c s) 379 | (list->string 380 | (cons #\_ 381 | (for/list ([i (in-string (symbol->string s))]) 382 | (cond 383 | [(or (char-alphabetic? i) 384 | (char-numeric? i)) 385 | i] 386 | [else #\_]))))) 387 | (define (build-func-decl name x1 x2) 388 | @~a{Racket_Object @c[name](Racket_Object @c[x1], Racket_Object* @c[x2]);}) 389 | (define (build-func name x1 x2 body) 390 | @~a{Racket_Object @c[name](Racket_Object @c[x1], Racket_Object* @c[x2]) { 391 | @(Let-Expr body)}})) 392 | (Program : Program (e) -> * () 393 | [(program ([,x (,x1 ,x2) ,le*] ...) 394 | ,le) 395 | @~a{@runtime 396 | @(apply ~a (for/list ([x (in-list x)] 397 | [x1 (in-list x1)] 398 | [x2 (in-list x2)]) 399 | (build-func-decl x x1 x2))) 400 | @(apply ~a (for/list ([x (in-list x)] 401 | [x1 (in-list x1)] 402 | [x2 (in-list x2)] 403 | [le* (in-list le*)]) 404 | (build-func x x1 x2 le*))) 405 | 406 | Racket_Object __racket_main() { 407 | @Let-Expr[le] 408 | } 409 | 410 | int main () { 411 | Racket_Object ret = __racket_main(); 412 | if(ret.t == CLOSURE) { 413 | printf("ans = #\n"); 414 | } else if(ret.t == INT) { 415 | printf("ans = %" PRId64 "\n", ret.i.v); 416 | } else { 417 | printf("ans = %s", ret.b.v ? "#t" : "#f"); 418 | } 419 | return 0; 420 | } 421 | }]) 422 | (Expr : Expr (e) -> * () 423 | [,n @~a{__make_int(@n)}] 424 | [,b @~a{__make_bool(@(if b "1" "0"))}] 425 | [(+ ,x1 ,x2) 426 | @~a{__prim_plus(@c[x1], @c[x2])}] 427 | [(= ,x1 ,x2) 428 | @~a{__prim_equal(@c[x1], @c[x2])}] 429 | [(if ,x1 ,x2 ,x3) 430 | @~a{__prim_if(@c[x1],@c[x2],@c[x3])}] 431 | [(,x1 ,x2 ,x3) 432 | @~a{@c[x1](@c[x2], @c[x3])}] 433 | [(closure-env ,x) 434 | @~a{@c[x].c.e}] 435 | [(closure-func ,x) 436 | @~a{@c[x].c.l}] 437 | [(make-closure ,x (,v ...)) 438 | @~a{__make_closure(@c[x], 439 | @(length v) 440 | @(apply ~a (for/list ([i (in-list v)]) 441 | @~a{, @Var[i]})))}]) 442 | (Var : Var (e) -> * () 443 | [,x @c[x]] 444 | [(env-get ,x ,nat) 445 | @~a{__env_get(@c[x], @nat)}]) 446 | (Let-Expr : Let-Expr (e) -> * () 447 | [(let ([,x (closure-func ,x*)]) ,le) 448 | @~a{Lambda @c[x] = @c[x*].c.l; 449 | @Let-Expr[le]}] 450 | [(let ([,x (closure-env ,x*)]) ,le) 451 | @~a{Racket_Object* @c[x] = @c[x*].c.e; 452 | @Let-Expr[le]}] 453 | [(let ([,x ,e]) ,le) 454 | @~a{Racket_Object @c[x] = @(Expr e); 455 | @Let-Expr[le]}] 456 | [else @~a{return @(Expr e);}])) 457 | 458 | (define compiler 459 | (compose generate-c 460 | raise-lets 461 | simplify-calls 462 | raise-closures 463 | make-closures 464 | identify-free-variables 465 | delay-if 466 | desugar-cond 467 | desugar-when 468 | parse)) 469 | 470 | (module+ test 471 | (define x 472 | (compiler 473 | #;`(((λ (x) 474 | (λ (x) 475 | x)) 1) 2) 476 | '(((λ (x) 477 | (λ (y) 478 | (cond [(= 6 (+ x y)) x] 479 | [y]))) 4) 2))) 480 | 481 | (displayln x) 482 | (with-output-to-file "temp.c" 483 | #:exists 'replace 484 | (λ () (displayln x)))) -------------------------------------------------------------------------------- /tutorial.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @require[(except-in scribble/manual cite) 4 | scriblib/footnote 5 | scribble/examples 6 | scriblib/autobib 7 | nanopass/base 8 | "bib.rkt" 9 | "nanodemo.rkt" 10 | @for-label[racket/base 11 | racket/match 12 | racket/format 13 | nanopass/base]] 14 | 15 | @title{Writing a Compiler with Nanoapss} 16 | @author{Leif Andersen} 17 | 18 | @(define nano-eval (make-base-eval)) 19 | @examples[#:eval nano-eval 20 | #:hidden 21 | (require racket/list 22 | nanopass/base 23 | "nanodemo.rkt")] 24 | 25 | @section{Introduction} 26 | 27 | @section{Prologue: Installing Racket and Nanopass} 28 | 29 | @section{Defining the Source Language} 30 | 31 | As with other Racket based languages, the first line of a 32 | Nanopass program is the @tt{#lang}. For this compiler, we will use: 33 | 34 | @codeblock{#lang at-exp nanopass} 35 | 36 | The @racket[nanopass] language both provides Nanopass 37 | specific constructs reifies the bindings in the 38 | @racket[racket] language. Modules can get Nanopass bindings 39 | without anything provided by @racket[racket] by requiring 40 | @racket[nanopass/base]. 41 | 42 | The @racket[at-exp] language installs the @"@"-reader, which 43 | makes code generation easier. While @"@"-reader does help 44 | format string, using them is not strictly necessary. 45 | 46 | @racket[define-language] creates new languages in Nanopass. 47 | The following code defines the source language, 48 | @racket[Lsrc], for our compiler: 49 | 50 | @racketblock[#,Lsrc-code] 51 | 52 | Terminals in Nanopass are defined by predicates. Any value 53 | that satisfies a predicate can be a terminal of that type. 54 | In the above example, @racket[b] is anything that is a 55 | @racket[boolean?]. The predicates @racket[symbol?] and 56 | @racket[boolean?] are provided by Nanopass. However, 57 | @racket[int64] is a user created predicate: 58 | 59 | @racketblock[ 60 | (define (int64? x) 61 | (and (integer? x) 62 | (<= (- (expt 2 63)) x (- (expt 2 63) 1))))] 63 | 64 | @; <============================ 65 | In this compiler, @racket[Expr] is a non-terminal. It can be 66 | a combination of terminals and other non-terminals. These 67 | combinations are called production rules. Each production 68 | rule may contain a label, as well as several meta-variables. 69 | For our source language, a @racket[Expr] can be an integer 70 | (@racket[n]), variable (@racket[x]), boolean (@racket[b]), 71 | arithmetic expression (@racket[(= e1 e2)], 72 | @racket[(+ e1 e2)]), branching expression @; 73 | 74 | (@racket[(if e1 e2 e3)], @racket[(when e1 e2)]), function 75 | (@racket[(λ (x) e)]), and function application @; 76 | 77 | (@racket[(e1 e2)]). 78 | @; ============================> 79 | 80 | Every meta-variable in a production rule must be unique and 81 | have a name matching a terminal or non-terminal. Numbers 82 | can be appended onto the end of a meta-variable without 83 | changing its type. In the expression @racket[(+ e1 e2)]: 84 | @racket[+] is a tag and @racket[e1] and @racket[e2] are 85 | meta-variables that can contain expressions. 86 | 87 | The ellipsis (@racket[...]) in the @racket[cond] production 88 | rule indicates that the pattern before it is a list that can 89 | occur zero or more times. In this case, there can be 90 | multiple pairs of @racket[(,e1 ,e2)] pairs. Only one 91 | ellipsis is allowed for each level of parenthesis ( 92 | @racket[()]) in a production rule. 93 | 94 | For example, the following is a valid pattern: 95 | 96 | @racketblock[(let ([x e] ...) e2 ...) (code:comment "A valid pattern")] 97 | 98 | However, the following is not a valid pattern: 99 | 100 | @racketblock[(let [x e] ... e2 ...) (code:comment "Not a valid pattern")] 101 | 102 | Finally, the @racket[entry] clause tells Nanopass which 103 | non-terminal is the top most non-terminal. This is 104 | @racket[Expr] in this compiler. 105 | 106 | @subsection{Building Source Expressions} 107 | 108 | Rather than building a parser, for now, we will use 109 | @racket[with-output-language] to build programs in 110 | @racket[Lsrc]. @racket[with-output-language] rebinds 111 | @racket[quasiquote] to create a nanopass record.@note{ 112 | @racket[with-racket-quasiquote] rebinds 113 | @racket[quasiquote] back to the normal Racket version.} 114 | 115 | @racket[with-output-language] additionally takes a language 116 | and a non-terminal in that language. It uses this 117 | information to determine which records to construct. 118 | 119 | @examples[ 120 | #:eval nano-eval 121 | (with-output-language (Lsrc Expr) 122 | `5) 123 | (with-output-language (Lsrc Expr) 124 | `(+ 4 6)) 125 | (with-output-language (Lsrc Expr) 126 | `((λ (x) (x x)) (λ (x) (x x))))] 127 | 128 | Although @racket[quasiquote] appears to be creating a list, 129 | it is actually creating records with a fixed arity. Thus, 130 | it will error if the expression does not match a pattern in 131 | the language. Finally, only @racket[quasiquote] is rebound, 132 | so other list creating constructs such a @racket[quote] are 133 | unchanged. 134 | 135 | @examples[ 136 | #:eval nano-eval 137 | (eval:error (with-output-language (Lsrc Expr) 138 | `(+ 5 6 7))) 139 | (with-output-language (Lsrc Expr) 140 | '(+ 1 2))] 141 | 142 | If a production has an ellipses (@racket[...]), then the 143 | pattern prior to it may occur zero ore more times. 144 | 145 | @examples[ 146 | #:eval nano-eval 147 | (with-output-language (Lsrc Expr) 148 | `(cond [(= 5 4) 3] 149 | [(= 2 1) 0] 150 | [42])) 151 | (with-output-language (Lsrc Expr) 152 | `(cond [84]))] 153 | 154 | @subsection[#:tag "deflangscale"]{Notes on Scaling Up} 155 | 156 | The source language used in this tutorial is clearly a 157 | small one that is designed to make it easy to learn how to 158 | write simple compilers using Nanopass. There are a few 159 | design choices that make it non trivial (although still 160 | possible) to scale up to a production quality language. 161 | 162 | First, this source language is missing any form of 163 | mutation. This feature is lacking because handing it 164 | requires the compiler to reason about assigned variables, 165 | and requires the runtime to create mutable cells in a heap 166 | to store these boxes. Doing so additionally necessitates 167 | creating a garbage collector. We have omitted this as 168 | implementing this is straightforward, and adds little 169 | understanding to how to use the framework. Interested 170 | readers can read about how to implement the runtime for 171 | these cells in @hyperlink[plai-link]{ Programming Languages: 172 | Application and Interpretation} 173 | @cite[plai]. Additionally, techniques used in this tutorial 174 | can be used to detect assigned variables, making it 175 | possible to determine when a mutable cell must be used. 176 | @note{TODO: Source for faster assigned variable detection.} 177 | 178 | Second, in this compiler, primitives such as @racket[=] and 179 | @racket[+] are encoded directly in the language. While this 180 | makes sense for primitives that significantly differ 181 | syntactically, variables with similar syntax will benefit 182 | from having a @racket[prim?] predicate and terminal. This is 183 | because the vast majority of the rules that apply to 184 | variables are identical to each other. We have used 185 | primitives directly in our source language for simplicity. 186 | Separating out primitives, however, is a straightforward task. 187 | 188 | Third, our source language only contains 64 bit integers 189 | and booleans as datums. Larger languages will have other 190 | types of datums such as arbitrarily large integers, floats, 191 | strings, lists, and structs. We did not include these in 192 | this compiler because adding them is a straightforward task. 193 | Additionally, adding more data types requires adding more 194 | primitives to handle these data types. 195 | 196 | Fourth, all functions take exactly one argument. All of the 197 | transformations shown in this tutorial can be trivially 198 | extended to multi-arity functions. Doing so complicates the 199 | passes shown in this tutorial too quickly. Rather, we first 200 | introduce passes that operate over expressions with a fixed 201 | number of size. Later in this compiler, however, we begin to 202 | show examples of operations on expressions that have an 203 | arbitrary number of arguments. This is because many 204 | intermediate forms require expressions with a variadic 205 | number of arguments. 206 | 207 | Finally, this language is lacking common expressions such as 208 | @racket[let] and @racket[letrec]. We omitted these because 209 | they complicate the implementation of the compiler, and add 210 | little value on learning to use Nanopass. 211 | 212 | @section{A Simple Pass: Desugaring @racket[when] Forms} 213 | 214 | Conditional expressions (@racket[if], @racket[cond], 215 | @racket[when]) in our language most directly map to ternary 216 | operator in C. While C's @tt{if} statements are more 217 | expressive, they can not be included in other expressions. 218 | 219 | For example, the following expression is valid in our 220 | language, but does not directly map to @tt{if} statements in 221 | C: 222 | 223 | @racketblock[(+ (if #t 5 6) 7) (code:comment "=> 13")] 224 | 225 | Unfortunately, ternary operators in C always expect two 226 | branched conditions. To compensate, we convert 227 | @racket[when] expressions into equivalent @racket[if] ones. 228 | 229 | The transformation is: 230 | 231 | @racketblock[ 232 | (when ) 233 | (code:comment "=>") 234 | (if #f)] 235 | 236 | Here, @racket[] is evaluated only if 237 | @racket[] is true. Otherwise, the expression 238 | evaluates to @racket[#f].@note{In Racket, @racket[when] 239 | expressions actually evaluate to @racket[void]. We evaluate 240 | to @racket[#f] instead to make the compiler simpler.} 241 | 242 | @subsection{Extending Languages} 243 | 244 | Languages created with @racket[define-language] can be 245 | extensions of other languages. These so called extensions 246 | are indicated with the @racket[extends] keyword. 247 | 248 | The following language extends @racket[Lsrc]: 249 | 250 | @racketblock[#,L1-code] 251 | 252 | The @racket[+] form adds new expressions to non-terminals, 253 | and the @racket[-] form removes production rules. These 254 | forms can also be used inside of a @racket[terminals] form. 255 | In this case, it adds and remove terminals. 256 | 257 | We can use @racket[language->s-expresion] to see the full 258 | language. This form is especially useful when a language is 259 | formed by many extended languages. 260 | 261 | @examples[ 262 | #:eval nano-eval 263 | (language->s-expression L7)] 264 | 265 | @subsection{Passes and Processors} 266 | 267 | Nanopass uses @racket[define-pass] to create new passes. 268 | Unlike languages, passes are functions that transforms 269 | expressions from one language to another. The following pass 270 | converts expressions from @racket[Lsrc] to @racket[L1]: 271 | 272 | @racketblock[#,desugar-when-code] 273 | 274 | Because @racket[when] is not a production in @racket[L1], 275 | the @racket[desugar-when] pass converts uses of 276 | @racket[when] into @racket[if]. Unlike @racket[if], 277 | @racket[when] expressions only contain a condition and a 278 | body. When a @racket[when] condition in our language is 279 | @racket[#f], the entire expression evaluates to @racket[#f], 280 | without evaluating the body. 281 | 282 | @examples[ 283 | #:eval nano-eval 284 | (with-output-language (Lsrc Expr) 285 | (desugar-when `(when #f 42))) 286 | (with-output-language (Lsrc Expr) 287 | (desugar-when `(λ (x) (when x (λ (y) y)))))] 288 | 289 | A pass constructed with @racket[define-pass] is composed of 290 | a signature, a body, and a list of processors. In the above 291 | pass, this signature is: 292 | 293 | @racketblock[desugar-when : Lsrc (e) -> L1 ()] 294 | 295 | The name of this pass is @racket[desugar-when]. It is 296 | followed by @racket[Lsrc], which indicates that the source 297 | its language, and @racket[L1] indicates the target language 298 | for the pass. The @racket[(e)] is a list of the arguments 299 | the pass takes. In this example, it is only one, which is 300 | the source expression. The empty list @racket[()] is a list 301 | of any extra return values that the pass may give. This pass 302 | only returns an expression in the target language, and is 303 | thus empty. 304 | 305 | The remainder of the above pass is a processor, and is 306 | discussed below in @secref{processors}. 307 | 308 | @subsection[#:tag "processors"]{Processors and Catamorphisms} 309 | 310 | The following is a processor in the @racket[desugare-when] 311 | pass shown above: 312 | 313 | @racketblock[ 314 | (Expr : Expr (e) -> Expr () 315 | [(when ,[e1] ,[e2]) 316 | `(if ,e1 ,e2 #f)])] 317 | 318 | Like passes, processors are functions and begin with a 319 | signature: 320 | 321 | @racketblock[Expr : Expr (e) -> Expr ()] 322 | 323 | The first @racket[Expr] in this process is the name of this 324 | process. While this name is arbitrary, @racket[Expr] is a 325 | reasonable first name as it transforms expressions. The 326 | second @racket[Expr] indicates that the input for this 327 | processor is an @racket[Expr] in @racket[Lsrc]. This 328 | information is determined by input language of the pass. 329 | Analogously, the last @racket[Expr] indicates that the 330 | output for this processor is an @racket[Expr] in 331 | @racket[L1]. Finally, like the pass itself, @racket[(e)] 332 | means that this processor takes in one argument, an 333 | expression, and has no additional return values besides the 334 | output expression. 335 | 336 | After the signature, a processor is composed of a series of 337 | patterns and templates. Like Racket's @racket[match] form, 338 | the processor selects the first pattern to match the given 339 | expression. If none of the patterns match, Nanopass will 340 | convert the expression to an equivalent one in the target 341 | language and recursively match on all subpatterns in the 342 | expression. This automatic behavior is how Nanopass 343 | compilers reduce the amount of boilerplate. 344 | 345 | The above processor contains one pattern: 346 | 347 | @racketblock[ 348 | [(when ,[e1] ,[e2]) 349 | `(if ,e1 ,e2 #f)]] 350 | 351 | This pattern does the actual transformation of @racket[if] 352 | forms to @racket[when] forms. The first line is the pattern 353 | itself. Unlike match (but like @racket[syntax-parse]), 354 | patterns begin already in a @racket[quasiquote], and must 355 | use @racket[unquote] (@tt{,}) to escape. 356 | 357 | Using @racket[unquote] means to match a subexpression, and 358 | bind it to the variable given. In this pattern, however, 359 | these variables are surrounded by square bracket (@tt{[]}). 360 | Bracket are for a feature of Nanopass called catamorphisms. 361 | @note{The term catamorphism comes from category theory. 362 | While related, catamorphisms in this setting are used 363 | slightly differently and are more closely related to the 364 | @hyperlink[iumatch-link]{IU Pattern Matcher} 365 | or @tt{app} forms in @racket[match].} 366 | 367 | These so-called catamorphisms further reduce boilerplate by 368 | handling recursion automatically. The processor determines 369 | input and output non-terminals by using the location in the 370 | pattern and the name of the pattern variable. If the pass 371 | contains a processor that matches this signature, it is used 372 | to transform the variable. Otherwise, a default processor 373 | that translates the expression to a similar one in the 374 | target is used. Finally, the output is bound to the variable 375 | inside of the brackets. 376 | 377 | In this example, @racket[e1] is the first variable in a 378 | @racket[when] clause, which indicates that it is an 379 | @racket[Expr]. Next, because the variables name begins with 380 | @racket[e], its output is also an @racket[Expr]. The 381 | process named @racket[Expr] matches this signature, and is 382 | used to process the variable. The result is bound to the 383 | variable @racket[e1]. An analogous process happens for 384 | @racket[e2]. 385 | 386 | An equivalent pattern that does not use catamorphisms would 387 | be: 388 | 389 | @racketblock[ 390 | [(when ,e1 ,e2) 391 | `(if ,(Expr e1) ,(Expr e2) #f)]] 392 | 393 | Here, @racket[e1] and @racket[e2] are expressions in 394 | @racket[Lsrc], and thus must be passed into the 395 | @racket[Expr] processor to be converted into @racket[L1] 396 | expression. 397 | 398 | Note that the recursion for expressions not listed in a 399 | processor is important. Even if an expression does not need 400 | to be transformed, it may contain subexpressions that do. 401 | 402 | @examples[ 403 | #:eval nano-eval 404 | (with-output-language (Lsrc Expr) 405 | (desugar-when `(+ 5 (when #t 6))))] 406 | 407 | @subsection[#:tag "whenifscale"]{Notes on Scaling Up} 408 | 409 | Converting @racket[when] expressions to @racket[if] 410 | expressions serves as a simple example to illustrate the 411 | benefits of using Nanopass to write compilers, while also 412 | showing the basics of how to use it. This particular 413 | transformation, however, is simple enough that it is 414 | generally implemented in a language's macro expander or 415 | during its parsing pass. 416 | 417 | @section{Desugaring @racket[cond] and recursive passes} 418 | 419 | Sometimes a pass or a processor will recursively call 420 | itself with on new expressions. When this happens, we need 421 | to make sure that the new expression is in the input 422 | language for the pass. By default @racket[define-pass] binds 423 | @racket[quasiquote] to construct expressions in the output 424 | language for the pass. We use @racket[with-output-language] 425 | to rebind @racket[quasiquote] to the input language. 426 | 427 | This operations occurs when compiling @racket[cond] 428 | expressions. Like @racket[when], we need to desugar 429 | @racket[cond] expressions into @racket[if] ones. Doing so 430 | allows us to directly compile them into ternary operators in 431 | C. 432 | 433 | The transformation that follows is: 434 | 435 | @racketblock[ 436 | (cond [ ] 437 | [ ] 438 | .... 439 | [ ] 440 | []) 441 | (code:comment "=>") 442 | (if 443 | (if 444 | .... 445 | (if ) .... ))] 446 | 447 | In this example, each branch of the @racket[cond] clause 448 | becomes a possible branch in an @racket[if] expression. The 449 | recursive nature of the target output causes a recursive 450 | solution to occur naturally. 451 | 452 | The following is the language is the result of desugaring 453 | @racket[cond]: 454 | 455 | @racketblock[#,L2-code] 456 | 457 | Similarly to how @racket[L1] removed @racket[when] 458 | expressions from @racket[L2], this language removes 459 | @racket[cond] expressions from @racket[L1]. 460 | 461 | The following pass does the actual desugaring: 462 | 463 | @racketblock[#,desugar-cond-code] 464 | 465 | This pass is similar to the @racket[desugar-when] pass 466 | before it, with two major differences. First, this pass uses 467 | ellipses (@racket[...]) to match on lists. Second, this pass uses 468 | @racket[with-output-language] to construct expressions in @racket[L1]. 469 | 470 | @examples[ 471 | #:eval nano-eval 472 | (with-output-language (L1 Expr) 473 | (desugar-cond `(cond [(= 5 6) 7] 474 | [(= 8 9) 10] 475 | [42])))] 476 | 477 | @subsection{Complex patterns and pattern matching} 478 | 479 | Ellipses in patterns bind the variables before it to a 480 | list.@note{Pattern variables can occur before an arbitrarily 481 | deep level of ellipses. For example, if a pattern is two levels of 482 | ellipses deep, it will be a list of lists. If the pattern 483 | is three levels of ellipses deep it will be a list of list 484 | of lists.} In this case, both @racket[e2] and @racket[e2*] 485 | are bound to lists that match the relevant input expression 486 | given to the processor. The pattern causes them to look like 487 | they are zipped together, but they are distinct lists. 488 | 489 | The following code uses @racket[nanopass-case] to show that 490 | @racket[e2] and @racket[e2*] are different lists: 491 | 492 | @examples[ 493 | #:eval nano-eval 494 | #:label #f 495 | (define cond-example 496 | (with-output-language (L1 Expr) 497 | `(cond [(= 1 2) 3] 498 | [(= 4 5) 6] 499 | [(= 7 8) 9] 500 | [10]))) 501 | (nanopass-case (L1 Expr) cond-example 502 | [(cond [,e1 ,e1*] [,e2 ,e2*] ... [,e3]) 503 | e2*])] 504 | 505 | First, we create a @racket[cond] expression and name it 506 | @racket[cond-example]. We then use @racket[nanopass-case] to 507 | destruct that expression, returning only @racket[e2*]. 508 | Notice that the result is a list. Returning @racket[e2] 509 | would have similar results. If, however, we returned 510 | @racket[e1], @racket[e1*], or @racket[e3], the result would 511 | have been a single expression, rather than a list of 512 | expressions. 513 | 514 | Lists can also be used in templates wherever an ellipsis is 515 | allowed. 516 | 517 | @examples[ 518 | #:eval nano-eval 519 | (nanopass-case (L1 Expr) cond-example 520 | [(cond [,e1 ,e1*] ... [,e3]) 521 | (with-output-language (L1 Expr) 522 | `(cond [,e1* ,e1] ... [,e3]))])] 523 | 524 | In this example we reverse the test and body of each of the 525 | expressions in the @racket[cond] expression. While this does 526 | change the semantics of what we would expect from a 527 | @racket[cond], it is syntactically valid. Additionally, both 528 | @racket[e1] and @racket[e1*] are both lists of expressions. 529 | Even though they appear to be zipped by Nanopass, they are 530 | still distinct lists. 531 | 532 | Note that because variables bound with ellipses in the 533 | pattern are just lists, a lot of common idioms in other 534 | pattern languages are not possible. This limitation becomes 535 | particularly obvious when trying to duplicate a single 536 | element to match the length of a list. 537 | 538 | @examples[ 539 | #:eval nano-eval 540 | (eval:error 541 | (nanopass-case (L1 Expr) cond-example 542 | [(cond [,e1 ,e1*] ... [,e3]) 543 | (with-output-language (L1 Expr) 544 | `(cond [,e1 ,e3] ... [5]))])) 545 | (nanopass-case (L1 Expr) cond-example 546 | [(cond [,e1 ,e1*] ... [,e3]) 547 | (with-output-language (L1 Expr) 548 | `(cond [,e1 ,(make-list (length e1) e3)] ... [5]))])] 549 | 550 | The first example causes an error because @racket[e1] is a 551 | list and @racket[e3] is a single expression. Using 552 | @racket[make-list], however, to generate a list of 553 | @racket[e3] expressions of the correct length however 554 | achieves the desired behavior. 555 | 556 | @subsection{Recursive templates} 557 | 558 | Inside of a processor, @racket[quasiquote] is rebound to 559 | construct an expression in the output language. Normally, 560 | this is the correct behavior, but sometimes we want to 561 | construct an expression in a different language, as in the 562 | pass above. 563 | 564 | More specifically, the following is the code that rebinds 565 | @racket[quasiquote]: 566 | 567 | @racketblock[ 568 | [(cond [,[e1] ,[e1*]] [,e2 ,e2*] ... [,e3]) 569 | `(if ,e1 ,e1* ,(with-output-language (L1 Expr) 570 | (Expr `(cond [,e2 ,e2*] ... [,e3]))))]] 571 | 572 | In this expression, the outer @racket[quasiquote] 573 | constructs an expression in the output language for the 574 | pass. The inner @racket[quasiquote], however, is 575 | constructing an expression in @racket[L1], the input 576 | language for the pass. Finally, the @racket[Expr] is the 577 | name of the processor, and runs itself on the newly created 578 | expression. 579 | 580 | @subsection[#:tag "condscale"]{Notes on Scaling Up} 581 | 582 | Many desugaring operations performed by the compiler are 583 | fairly simple. As such, it is often easier for programmers 584 | to implement them together in one pass. Merging these passes 585 | can help reduce the boilerplate code surrounding the pass, 586 | while not making the passes themselves any more 587 | complicated. 588 | 589 | The following is an alternate version of the desugar pass 590 | that combines both @racket[desugar-when] and 591 | @racket[desugar-cond]: 592 | 593 | @racketblock[ 594 | (define-pass desugar-alt : Lsrc (e) -> L2 () 595 | (Expr : Expr (e) -> Expr () 596 | [(when ,[e1] ,[e2]) 597 | `(if ,e1 ,e2 #f)] 598 | [(cond [,[e1]]) 599 | e1] 600 | [(cond [,[e1] ,[e1*]] [,e2 ,e2*] ... [,e3]) 601 | `(if ,e1 ,e1* ,(with-output-language (L1 Expr) 602 | (Expr `(cond [,e2 ,e2*] ... [,e3]))))]))] 603 | 604 | This particular pass can be constructed simply by merging 605 | the two passes together. Doing this merging is 606 | straightforward for simple passes such as these. 607 | Unfortunately, this process gets significantly more 608 | complicated as passes themselves become more complicated. 609 | For this reason, many front end passes of a compiler will be 610 | merged like above. This so-called merging works because the 611 | passes themselves are simple, and separating them out 612 | 613 | The second approach to desugaring expressions is to do them 614 | in the language's macro system. This makes it easier for 615 | programmers to create there own macros that act as syntactic 616 | sugar.@note{Racket's @racket[when] and @racket[cond] forms 617 | are desugared in Racket's 618 | @tech[#:key "macro" 619 | #:doc '(lib "scribblings/guide/macros.scrbl")]{ 620 | macro system}.} 621 | 622 | @section{Delaying @racket[if] Forms} 623 | 624 | Unlike function application, the body and alternate body of 625 | @racket[if] expressions should only be evaluated on the 626 | result of the conditional expression. Worse still, @tt{if} 627 | expressions in C are statements rather than expressions. 628 | 629 | Ternary operators, however, are expressions. Using ternary 630 | operators directly is still problematic because expressions 631 | in C are not as expressive as ones in our source. For 632 | example, expressions in our source can create closures apply 633 | them to a new variable, and call that closure at a later 634 | time, all in one expression. We will eventually need to 635 | translate some of these operations into statements. 636 | 637 | Translating expressions into statements is problematic with 638 | the delayed nature of @racket[if] expressions. Specifically, 639 | we want to first evaluate the condition, and then evaluate 640 | either the body or alternative.@note{If our source was 641 | effect free, we could evaluate all subexpressions of 642 | @racket[if]. This language does, however, have one major 643 | effect, non-termination. We only want an @racket[if] 644 | expression to not terminate if the appropriate 645 | subexpressions do not terminate.} 646 | 647 | One way to delay the values of @racket[if] expressions is 648 | to wrap them in function expressions, and apply the whole 649 | expression to a dummy variable. After this transformation 650 | the entire expression can be evaluated eagerly, and the 651 | functions themselves will give the condition body delayed 652 | semantics. 653 | 654 | The transformation will cause @racket[if] expressions to 655 | follow this form: 656 | 657 | @racketblock[ 658 | (if ) 659 | (code:comment "=>") 660 | ((if (λ (trash) ) (λ (trash) )) #f)] 661 | 662 | Here, both the body and alternate are functions that do not 663 | use their @racket[trash] argument. In order to force the 664 | evaluation of the selected clause, we apply the result to 665 | @racket[#f]. This value gets mapped to @racket[trash], which 666 | is never used. 667 | 668 | The following pass transforms delayed @racket[if] 669 | expressions to equivalent eager expressions: 670 | 671 | @racketblock[#,delay-if-code] 672 | 673 | Both the source and target languages for this pass are 674 | @racket[L2]. It is possible to create a new language that 675 | statically enforces if expressions to store only functions. 676 | Doing so in this case does not prevent further 677 | optimizations, but does help programmers find bugs in their 678 | compilers. 679 | 680 | The following is an example of a language that enforces 681 | @racket[f] expressions to store functions in their body: 682 | 683 | @racketblock[ 684 | (define-language L2-alt 685 | (extends L2) 686 | (Expr (e) 687 | (- (λ (x) e) 688 | (if e1 e2 e3)) 689 | (+ l 690 | (if e l2 l3))) 691 | (Lambda (l) 692 | (+ (λ (x) e))))] 693 | 694 | All functions in this language take exactly one argument. 695 | The ones in this pass, however, are thunks that do not 696 | require an argument. To accommodate this, we generate to two 697 | unused variables, and apply the result of the @racket[if] 698 | expression to @racket[#f]. 699 | 700 | @examples[ 701 | #:eval nano-eval 702 | (with-output-language (L2 Expr) 703 | (delay-if `(if #f 42 84)))] 704 | 705 | After this transformation, we can treat @racket[if] as an 706 | entirely eager expression. 707 | 708 | @subsection[#:tag "ifscale"]{Notes on Scaling up} 709 | 710 | TODO: 711 | 712 | First: Other means of delaying 713 | 714 | Second: Better generation of temporary variables. 715 | 716 | Third: Actual thunks 717 | 718 | @section{Closure Conversion} 719 | 720 | Unlike our source language, C does not have closures. It 721 | does, however, support higher order functions through the 722 | use of function pointers. Unfortunately function pointers do 723 | not store their own environments. Thus, we use closure 724 | conversion@cite[appelcont] as our first step to supporting closures. 725 | 726 | Closure conversion is the process of removing all free 727 | variables from functions, and passing them in explicitly in 728 | the form of an environment. The function associated with the 729 | closure will eventually be lifted to the top level, but the 730 | environment remains in the functions plaice. This is 731 | possible because environment mappings are first class values 732 | in C. 733 | 734 | For example, if a function has one free variable @racket[y], 735 | the transformation would look like: 736 | 737 | @racketblock[ 738 | (lambda (x) .... y ....) 739 | (code:comment "=>") 740 | (lambda (x env) .... (env-get env y) ....)] 741 | 742 | Unfortunately, this transformation is not enough to create 743 | a closure object. When a lambda occurs we need to also 744 | explicitly create the environment associated with it. Doing 745 | so allows the closure to bind to the variables as the 746 | lambda's definition, rather then whatever they happen to be 747 | at the call site. In other words, we want to preserve 748 | @hyperlink[lexicalscope-link]{lexical scoping} in our target 749 | language. 750 | 751 | Applying this idea to the transformation above gives the 752 | following transformation: 753 | 754 | @racketblock[ 755 | (lambda (x env) .... (env-get env y) ....) 756 | (code:comment "=>") 757 | (closure (name (x env) ... (env-get env y) ...) (y))] 758 | 759 | Here, @racket[closure] is a piece of syntax to describe the 760 | @racket[closure] object. The first argument is the function 761 | expression, which has now been given the name 762 | @racket[name]. The second argument is the variables that 763 | this closure's environment binds, in this case @racket[y]. 764 | 765 | Now that functions take two arguments, we also need to 766 | modify all of the function call sites to also pass in the 767 | function's environment. This transformation is simple to do 768 | here because closure objects contain their environments. 769 | 770 | The following is the transformation that happens at each 771 | function's call site: 772 | 773 | @racketblock[ 774 | (f x) 775 | (code:comment "=>") 776 | ((closure-func f) x (closure-env f))] 777 | 778 | In this example, @racket[closure-func] and 779 | @racket[closure-env] are special syntax that retrieves the 780 | function and environment objects from a closure. A later 781 | pass transforms @racket[closure-func] to retrieve a function 782 | pointer. For now, however, the closure contains the literal 783 | function itself. 784 | 785 | We perform closure conversion in two passes. First, we 786 | create a pass to identify all free variables in each 787 | function. This pass enables us to transform free variables 788 | into environment lookups, as well as determine which 789 | variables should be passed in as part of the closure 790 | environment. The second pass creates the actual explicit 791 | closure structures. These structures still contain the 792 | function and environment, while a later pass will lift them 793 | to the top. 794 | 795 | @subsection{Free Variable Identification} 796 | 797 | The first step to closure conversion is to identify all of 798 | the free variables in every function. This transformation 799 | allows us to convert free variables into lookups in a later pass. 800 | 801 | The following language modifies functions to store free variables: 802 | 803 | @racketblock[#,L3-code] 804 | 805 | In this language @racket[FreeVars-Expr] is a new 806 | non-terminal that stores an expression and a list of 807 | variables. Function expressions now store an expression with 808 | free variables for their body. The main effect of this 809 | transformation is that functions now have constant time 810 | access to all of their free variables. 811 | 812 | @examples[ 813 | #:eval nano-eval 814 | (with-output-language (L3 Expr) 815 | `(λ (x) (free (y z) (+ x (+ y z)))))] 816 | 817 | The following pass does the actual transformation: 818 | 819 | @racketblock[#,identify-free-variables-code] 820 | 821 | @; <==================== 822 | Unlike the previous passes, this pass uses extra return 823 | values in its processors. This extra value stores a set of 824 | free variables represented as a list in the expression. By 825 | default, an expression contains no free variables. To 826 | accomplish this, we pass the empty list@; 827 | 828 | (@racket['()]) in as the default value for the extra return 829 | values. Nanopass uses this value inside of any generated 830 | parts of the processor.@note{The default value can contain 831 | free variables. When it does, those free variables serve as 832 | an indicator that the default return value is never used.} Every clause 833 | in the @racket[Expr] processor uses @racket[values] to 834 | return two values. 835 | @; ====================> 836 | 837 | The base case for this clause is: 838 | 839 | @racketblock[[,x (values x (list x))]] 840 | 841 | This case matches on a variable literals. These 842 | variables remain unchained in this pass. However, they are 843 | added to a list of free variables in the expression. 844 | Function expressions then use this list to store free 845 | variables: 846 | 847 | @racketblock[ 848 | [(λ (,x) ,[e1 a1]) 849 | (define a* (set-remove a1 x)) 850 | (values `(λ (,x) (free (,a* ...) ,e1)) 851 | a*)]] 852 | 853 | The set @racket[a*] removes the variable bound by the 854 | function from the set of free variables. It then uses those 855 | free variables in the @racket[free] expression. Finally, it 856 | also passes this modified set for use in additional 857 | expressions. Note that @racket[(,a* ...)] indicates both 858 | that @racket[a*] is a list, and should be placed in the list 859 | portion of the @racket[free] expression. 860 | 861 | The remaining cases combine all of the free variables into a 862 | common set. For example, the free variables in the 863 | expression @racket[(+ ,e1 ,e2)], is the union between all of 864 | the free variables in @racket[e1] and the free variables in 865 | @racket[e2]. We need to explicitly create these cases 866 | because Nanopass is not clever enough to generate these 867 | clauses. 868 | 869 | In addition to using extra return values in its processors, 870 | this function also has a body: 871 | 872 | @racketblock[ 873 | (let-values ([(res free) (Expr e)]) 874 | (unless (set-empty? free) 875 | (error 'compiler "Unbound variables: ~a" free)) 876 | res)] 877 | 878 | This body is needed because processors in this pass return 879 | two values, while the pass itself returns only one. This 880 | body additionally checks if any free variables have not been 881 | accounted for by any functions. When free variables do 882 | remain, the compiler throws an unbound variables error, and stops. 883 | 884 | @examples[ 885 | #:eval nano-eval 886 | (with-output-language (L2 Expr) 887 | (identify-free-variables 888 | `(λ (x) 889 | (λ (y) (+ x y))))) 890 | (eval:error 891 | (with-output-language (L2 Expr) 892 | (identify-free-variables 893 | `(λ (x) y))))] 894 | 895 | @subsection{Explicit Closure Creation} 896 | 897 | The next step for closure conversion is to take the free 898 | variables we've just found, and use them to construct 899 | explicit closure objects. We create these so-called closure 900 | objects in the following language: 901 | 902 | @racketblock[#,L4-code] 903 | 904 | This language removes the free variable list because they 905 | are no longer needed. It additionally introduces 906 | @racket[exact-nonnegative-integer]s to serve as offsets in a 907 | closure object. Also, this language introduces @racket[let] 908 | forms to bind expressions to values. And finally, this 909 | language introduces four syntactic forms for operating on 910 | closures: 911 | 912 | @itemlist[ 913 | @item{@racket[closure] - For building closure objects.} 914 | @item{@racket[closure-env] - For retrieving the environment 915 | portion of a closure.} 916 | @item{@racket[closure-func] - For retrieving the function 917 | portion of a closure.} 918 | @item{@racket[env-get] - For retrieving a specific variable 919 | in an environment.}] 920 | 921 | @racketblock[#,make-closures-code] 922 | 923 | @examples[ 924 | #:eval nano-eval 925 | (with-output-language (L3 Expr) 926 | (make-closures `(λ (x) (free () x)))) 927 | (with-output-language (L3 Expr) 928 | (make-closures `(λ (x) (free () (λ (y) (free (x) x)))))) 929 | (with-output-language (L3 Expr) 930 | (make-closures `((λ (x) (free () x)) 42))) 931 | (with-output-language (L3 Expr) 932 | (make-closures `(((λ (x) (free ()(λ (y) (free (x) (+ x y))))) 933 | 2) 3)))] 934 | 935 | @subsection[#:tag "ccscale"]{Notes on Scaling Up} 936 | 937 | TODO: 938 | 939 | First, free variable checking at top level 940 | 941 | Second, intermediate passes. 942 | 943 | Third, data structure for linear time 944 | 945 | Fourth, Lambda Lifting@cite[lambdalifting]. 946 | 947 | @section{Turning Closures to Function Pointers} 948 | 949 | @racketblock[#,L5-code] 950 | 951 | @racketblock[#,raise-closures-code] 952 | 953 | @subsection[#:tag "clotofuncscale"]{Notes on Scaling Up} 954 | 955 | @section{Converting Expressions into Statements} 956 | 957 | @subsection{Simplifying function application} 958 | 959 | @racketblock[#,L6-code] 960 | 961 | @racketblock[#,simplify-calls-code] 962 | 963 | @subsection{Linearizing Code} 964 | 965 | @racketblock[#,L7-code] 966 | 967 | @racketblock[#,raise-lets-code] 968 | 969 | @subsection[#:tag "simplifyscale"]{Notes on Scaling Up} 970 | 971 | @section{The Runtime} 972 | 973 | @codeblock[#:keep-lang-line? #f]|{#lang at-exp nanopass 974 | (define runtime 975 | @~a{#include 976 | #include 977 | #include 978 | #include 979 | 980 | struct Int; 981 | struct Bool; 982 | struct Closure; 983 | union Racket_Object; 984 | 985 | typedef union Racket_Object (*Lambda)(); 986 | enum Tag {INT, BOOL, CLOSURE}; 987 | 988 | typedef struct Int { 989 | enum Tag t; 990 | int64_t v; 991 | } Int; 992 | 993 | typedef struct Bool { 994 | enum Tag t; 995 | int64_t v; 996 | } Bool; 997 | 998 | typedef struct Closure { 999 | enum Tag t; 1000 | Lambda l; 1001 | union Racket_Object * e; 1002 | } Closure; 1003 | 1004 | typedef union Racket_Object { 1005 | enum Tag t; 1006 | Int i; 1007 | Bool b; 1008 | Closure c; 1009 | } Racket_Object; 1010 | 1011 | Racket_Object __make_int(int64_t i) { 1012 | Racket_Object o; 1013 | o.t = INT; 1014 | o.i.v = i; 1015 | return o; 1016 | } 1017 | 1018 | Racket_Object __make_bool(int64_t b) { 1019 | Racket_Object o; 1020 | o.t = BOOL; 1021 | o.b.v = b; 1022 | return o; 1023 | } 1024 | 1025 | Racket_Object __make_closure(Lambda name, int argc, ...) { 1026 | /* Allocate space for env */ 1027 | Racket_Object* env = malloc(sizeof(Racket_Object) * argc); 1028 | 1029 | /* Fill env */ 1030 | va_list lp; 1031 | va_start(lp, argc); 1032 | for(int i = 0; i < argc; i++) { 1033 | env[i] = va_arg(lp, Racket_Object); 1034 | } 1035 | 1036 | /* Return closure */ 1037 | Racket_Object o; 1038 | o.t = CLOSURE; 1039 | o.c.l = name; 1040 | o.c.e = env; 1041 | return o; 1042 | } 1043 | 1044 | Racket_Object __env_get(Racket_Object *env, unsigned int id) { 1045 | return env[id]; 1046 | } 1047 | 1048 | Racket_Object __prim_plus(Racket_Object a, Racket_Object b) { 1049 | if(a.t != INT || b.t != INT) { 1050 | printf("+: Expected Integer\n"); 1051 | exit(1); 1052 | } 1053 | return __make_int(a.i.v + b.i.v); 1054 | } 1055 | 1056 | Racket_Object __prim_equal(Racket_Object a, Racket_Object b) { 1057 | if(a.t != INT || b.t != INT) { 1058 | printf("=: Expected Integer\n"); 1059 | exit(1); 1060 | } 1061 | return __make_bool(a.i.v == b.i.v); 1062 | } 1063 | 1064 | Racket_Object __prim_if(Racket_Object a, 1065 | Racket_Object b, 1066 | Racket_Object c) { 1067 | if(a.t != BOOL) { 1068 | printf("if: Expected Bool\n"); 1069 | exit(1); 1070 | } 1071 | return a.b.v ? b : c; 1072 | }})}| 1073 | 1074 | @section{Code Generation} 1075 | 1076 | @codeblock[#:keep-lang-line? #f]|{#lang at-exp nanopass 1077 | (define-pass generate-c : L7 (e) -> * () 1078 | (definitions 1079 | (define (c s) 1080 | (list->string 1081 | (cons #\_ 1082 | (for/list ([i (in-string (symbol->string s))]) 1083 | (cond 1084 | [(or (char-alphabetic? i) 1085 | (char-numeric? i)) 1086 | i] 1087 | [else #\_]))))) 1088 | (define (build-func-decl name x1 x2) 1089 | @~a{Racket_Object @c[name](Racket_Object @c[x1], 1090 | Racket_Object* @c[x2]);}) 1091 | (define (build-func name x1 x2 body) 1092 | @~a{Racket_Object @c[name](Racket_Object @c[x1], 1093 | Racket_Object* @c[x2]) { 1094 | @(Let-Expr body)}})) 1095 | (Program : Program (e) -> * () 1096 | [(program ([,x (,x1 ,x2) ,le*] ...) 1097 | ,le) 1098 | @~a{@runtime 1099 | @(apply ~a (for/list ([x (in-list x)] 1100 | [x1 (in-list x1)] 1101 | [x2 (in-list x2)]) 1102 | (build-func-decl x x1 x2))) 1103 | @(apply ~a (for/list ([x (in-list x)] 1104 | [x1 (in-list x1)] 1105 | [x2 (in-list x2)] 1106 | [le* (in-list le*)]) 1107 | (build-func x x1 x2 le*))) 1108 | 1109 | Racket_Object __racket_main() { 1110 | @Let-Expr[le] 1111 | } 1112 | 1113 | int main () { 1114 | Racket_Object ret = __racket_main(); 1115 | if(ret.t == CLOSURE) { 1116 | printf("ans = #\n"); 1117 | } else if(ret.t == INT) { 1118 | printf("ans = " PRId64 "\n", ret.i.v); 1119 | } else { 1120 | printf("ans = %s", ret.b.v ? "#t" : "#f"); 1121 | } 1122 | return 0; 1123 | } 1124 | }]) 1125 | (Expr : Expr (e) -> * () 1126 | [,n @~a{__make_int(@n)}] 1127 | [,b @~a{__make_bool(@(if b "1" "0"))}] 1128 | [(+ ,x1 ,x2) 1129 | @~a{__prim_plus(@c[x1], @c[x2])}] 1130 | [(= ,x1 ,x2) 1131 | @~a{__prim_equal(@c[x1], @c[x2])}] 1132 | [(if ,x1 ,x2 ,x3) 1133 | @~a{__prim_if(@c[x1],@c[x2],@c[x3])}] 1134 | [(,x1 ,x2 ,x3) 1135 | @~a{@c[x1](@c[x2], @c[x3])}] 1136 | [(closure-env ,x) 1137 | @~a{@c[x].c.e}] 1138 | [(closure-func ,x) 1139 | @~a{@c[x].c.l}] 1140 | [(make-closure ,x (,v ...)) 1141 | @~a{__make_closure(@c[x], 1142 | @(length v) 1143 | @(apply ~a (for/list ([i (in-list v)]) 1144 | @~a{, @Var[i]})))}]) 1145 | (Var : Var (e) -> * () 1146 | [,x @c[x]] 1147 | [(env-get ,x ,nat) 1148 | @~a{__env_get(@c[x], @nat)}]) 1149 | (Let-Expr : Let-Expr (e) -> * () 1150 | [(let ([,x (closure-func ,x*)]) ,le) 1151 | @~a{Lambda @c[x] = @c[x*].c.l; 1152 | @Let-Expr[le]}] 1153 | [(let ([,x (closure-env ,x*)]) ,le) 1154 | @~a{Racket_Object* @c[x] = @c[x*].c.e; 1155 | @Let-Expr[le]}] 1156 | [(let ([,x ,e]) ,le) 1157 | @~a{Racket_Object @c[x] = @(Expr e); 1158 | @Let-Expr[le]}] 1159 | [else @~a{return @(Expr e);}]))}| 1160 | 1161 | @section{Parsing} 1162 | 1163 | @racketblock[#,parse-code] 1164 | 1165 | @section{Tying Everything Together} 1166 | 1167 | @racketblock[ 1168 | (define compiler 1169 | (compose generate-c 1170 | raise-lets 1171 | simplify-calls 1172 | raise-closures 1173 | make-closures 1174 | identify-free-variables 1175 | delay-if 1176 | desugar-cond 1177 | desugar-when 1178 | parse))] 1179 | 1180 | @section{Further Reading} 1181 | 1182 | @section{Bonus: Creating a #lang} 1183 | 1184 | @generate-bibliography[] 1185 | --------------------------------------------------------------------------------