├── examples
├── cat.rkt
├── hello2.rkt
├── README
├── hello-world.rkt
├── hello.rkt
├── prime.rkt
├── beer.rkt
└── mandel.rkt
├── extra.css
├── count.rkt
├── extra-queue.css
├── Makefile
├── info.rkt
├── scribble-helpers.rkt
├── lang
└── reader.rkt
├── parser.rkt
├── language.rkt
├── semantics.rkt
├── LICENSE
├── bf.tex
├── manual-queue.scrbl
└── manual.scrbl
/examples/cat.rkt:
--------------------------------------------------------------------------------
1 | #lang planet dyoo/bf
2 | ,[.[-],]
3 |
--------------------------------------------------------------------------------
/extra.css:
--------------------------------------------------------------------------------
1 | .versionbox {
2 | display: none
3 | }
4 |
--------------------------------------------------------------------------------
/count.rkt:
--------------------------------------------------------------------------------
1 | #lang racket
2 | (length (regexp-match* #px"\\w+" (current-input-port)))
--------------------------------------------------------------------------------
/extra-queue.css:
--------------------------------------------------------------------------------
1 | .tocset {
2 | display: none
3 | }
4 |
5 | .maincolumn {
6 | margin-left: 0;
7 | }
--------------------------------------------------------------------------------
/examples/hello2.rkt:
--------------------------------------------------------------------------------
1 | #lang planet dyoo/bf
2 | ++++++[>++++++++++++<-]>.
3 | >++++++++++[>++++++++++<-]>+.
4 | +++++++..+++.>++++[>+++++++++++<-]>.
5 | <+++[>----<-]>.<<<<<+++[>+++++<-]>.
6 | >>.+++.------.--------.>>+.
7 |
--------------------------------------------------------------------------------
/examples/README:
--------------------------------------------------------------------------------
1 | Here are some sample runs:
2 |
3 |
4 | $ racket hello-world.rkt
5 | Hello World!
6 |
7 |
8 |
9 | $ echo 10 | racket prime.rkt
10 | Primes up to: 2 3 5 7
11 |
12 |
13 |
14 | # http://www.99-bottles-of-beer.net/language-brainfuck-101.html
15 | $ racket beer.rkt
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | doc:
2 | scribble ++xref-in setup/xref load-collections-xref --redirect-main http://docs.racket-lang.org/ --dest-name index.html manual.scrbl
3 |
4 | doc-queue:
5 | scribble ++xref-in setup/xref load-collections-xref --redirect-main http://docs.racket-lang.org/ --dest-name index-queue.html manual-queue.scrbl
6 |
7 | publish: doc
8 | scp * hashcollision.org:webapps/htdocs/brainfudge
9 |
--------------------------------------------------------------------------------
/info.rkt:
--------------------------------------------------------------------------------
1 | #lang setup/infotab
2 | (define name "bf: a brainf*ck compiler for Racket")
3 | (define categories '(devtools))
4 | (define can-be-loaded-with 'all)
5 | (define required-core-version "5.1.1")
6 | (define version "1.8")
7 | (define repositories '("4.x"))
8 | (define scribblings '(("manual.scrbl" () (getting-started))))
9 | (define primary-file "language.rkt")
10 | (define blurb
11 | '("bf: a brainf*ck compiler for Racket. Includes a tutorial for building a language in Racket."))
12 | (define release-notes
13 | '((p "Final? release.")))
--------------------------------------------------------------------------------
/scribble-helpers.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (provide inject-javascript inject-css)
4 |
5 | (require scribble/core
6 | scribble/html-properties
7 | scriblib/render-cond)
8 |
9 | ;; Adds JavaScript if we're rendering in HTML.
10 | (define (inject-javascript . body)
11 | (cond-element
12 | [latex ""]
13 | [html (make-element (make-style #f (list (make-script-property "text/javascript"
14 | body)))
15 | '())]
16 | [text ""]))
17 |
18 | (define (inject-css path)
19 | (cond-element
20 | [latex ""]
21 | [html (make-element (make-style #f (list (make-css-addition path)))
22 | '())]
23 | [text ""]))
24 |
25 |
26 |
27 | ;;(define (google-analytics)
28 | ;; (make-tag
--------------------------------------------------------------------------------
/lang/reader.rkt:
--------------------------------------------------------------------------------
1 | #lang s-exp syntax/module-reader
2 | (planet dyoo/bf/language) ;; switched from (planet dyoo/bf/language)
3 | #:read my-read
4 | #:read-syntax my-read-syntax
5 | #:info my-get-info
6 | (require "../parser.rkt")
7 |
8 | (define (my-read in)
9 | (syntax->datum (my-read-syntax #f in)))
10 |
11 | (define (my-read-syntax src in)
12 | (parse-expr src in))
13 |
14 |
15 |
16 | ;; Extension: we'd like to cooperate with DrRacket and tell
17 | ;; it to use the default, textual lexer and color scheme when
18 | ;; editing bf programs.
19 | ;;
20 | ;; See: http://docs.racket-lang.org/guide/language-get-info.html
21 | ;; for more details, as well as the documentation in
22 | ;; syntax/module-reader.
23 | (define (my-get-info key default default-filter)
24 | (case key
25 | [(color-lexer)
26 | (dynamic-require 'syntax-color/default-lexer
27 | 'default-lexer)]
28 | [else
29 | (default-filter key default)]))
30 |
31 |
--------------------------------------------------------------------------------
/examples/hello-world.rkt:
--------------------------------------------------------------------------------
1 | #lang planet dyoo/bf
2 |
3 | +++++ +++++ initialize counter (cell #0) to 10
4 | [ use loop to set the next four cells to 70/100/30/10
5 | > +++++ ++ add 7 to cell #1
6 | > +++++ +++++ add 10 to cell #2
7 | > +++ add 3 to cell #3
8 | > + add 1 to cell #4
9 | <<<< - decrement counter (cell #0)
10 | ]
11 | > ++ . print 'H'
12 | > + . print 'e'
13 | +++++ ++ . print 'l'
14 | . print 'l'
15 | +++ . print 'o'
16 | > ++ . print ' '
17 | << +++++ +++++ +++++ . print 'W'
18 | > . print 'o'
19 | +++ . print 'r'
20 | ----- - . print 'l'
21 | ----- --- . print 'd'
22 | > + . print '!'
23 | > . print '\n'
24 |
--------------------------------------------------------------------------------
/examples/hello.rkt:
--------------------------------------------------------------------------------
1 | #lang s-exp (planet dyoo/bf/language)
2 |
3 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus)
4 | (brackets
5 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus)
6 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus)
7 | (greater-than) (plus)(plus)(plus)
8 | (greater-than) (plus)
9 | (less-than)(less-than)(less-than)(less-than) (minus))
10 | (greater-than) (plus)(plus) (period)
11 | (greater-than) (plus) (period)
12 | (plus)(plus)(plus)(plus)(plus) (plus)(plus) (period)
13 | (period)
14 | (plus)(plus)(plus) (period)
15 | (greater-than) (plus)(plus) (period)
16 | (less-than)(less-than) (plus)(plus)(plus)(plus)(plus)
17 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) (period)
18 | (greater-than) (period)
19 | (plus)(plus)(plus) (period)
20 | (minus)(minus)(minus)(minus)(minus) (minus) (period)
21 | (minus)(minus)(minus)(minus)(minus) (minus)(minus)(minus) (period)
22 | (greater-than) (plus) (period)
23 | (greater-than) (period)
--------------------------------------------------------------------------------
/parser.rkt:
--------------------------------------------------------------------------------
1 | #lang racket
2 |
3 | (require rackunit)
4 |
5 | ;; The only visible export of this module will be parse-expr.
6 | (provide parse-expr)
7 |
8 | ;; parse-expr: any input-port -> (U syntax eof)
9 | ;; Either produces a syntax object or the eof object.
10 | (define (parse-expr src in)
11 | (define-values (line column position) (port-next-location in))
12 | (define next-char (read-char in))
13 |
14 | ;; decorate/span: s-expression number -> syntax
15 | ;; Wrap the s-expression with source location.
16 | (define (decorate sexp span)
17 | (datum->syntax #f sexp (list src line column position span)))
18 |
19 | (cond
20 | [(eof-object? next-char) eof]
21 | [else
22 | (case next-char
23 | [(#\<) (decorate '(less-than) 1)]
24 | [(#\>) (decorate '(greater-than) 1)]
25 | [(#\+) (decorate '(plus) 1)]
26 | [(#\-) (decorate '(minus) 1)]
27 | [(#\,) (decorate '(comma) 1)]
28 | [(#\.) (decorate '(period) 1)]
29 | [(#\[)
30 | ;; The slightly messy case is bracket. We keep reading
31 | ;; a list of exprs, and then construct a wrapping bracket
32 | ;; around the whole thing.
33 | (define elements (parse-exprs src in))
34 | (define-values (l c tail-position)
35 | (port-next-location in))
36 | (decorate `(brackets ,@elements)
37 | (- tail-position position))]
38 | [else
39 | (parse-expr src in)])]))
40 |
41 | ;; parse-exprs: input-port -> (listof syntax)
42 | ;; Parse a list of expressions.
43 | (define (parse-exprs source-name in)
44 | (define peeked-char (peek-char in))
45 | (cond
46 | [(eof-object? peeked-char)
47 | (error 'parse-exprs "Expected ], but read eof")]
48 | [(char=? peeked-char #\])
49 | (read-char in)
50 | empty]
51 | [(member peeked-char (list #\< #\> #\+ #\- #\, #\. #\[))
52 | (cons (parse-expr source-name in)
53 | (parse-exprs source-name in))]
54 | [else
55 | (read-char in)
56 | (parse-exprs source-name in)]))
57 |
58 |
59 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60 | ;; Tests
61 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
62 |
63 | ;; simple tests
64 | (check-equal? eof (parse-expr 'test (open-input-string "")))
65 | (check-equal? '(greater-than)
66 | (syntax->datum (parse-expr 'test (open-input-string ">"))))
67 | (check-equal? '(less-than)
68 | (syntax->datum (parse-expr 'test (open-input-string "<"))))
69 | (check-equal? '(plus)
70 | (syntax->datum (parse-expr 'test (open-input-string "+"))))
71 | (check-equal? '(minus)
72 | (syntax->datum (parse-expr 'test (open-input-string "-"))))
73 | (check-equal? '(comma)
74 | (syntax->datum (parse-expr 'test (open-input-string ","))))
75 | (check-equal? '(period)
76 | (syntax->datum (parse-expr 'test (open-input-string "."))))
77 |
78 |
79 | ;; bracket tests
80 | (check-equal? '(brackets)
81 | (syntax->datum (parse-expr 'test (open-input-string "[]"))))
82 | (check-equal? '(brackets (brackets))
83 | (syntax->datum (parse-expr 'test (open-input-string "[[]]"))))
84 |
85 |
86 | ;; Parsing the "cat" function
87 | (let ([port (open-input-string ",[.,]")])
88 | (check-equal? '(comma)
89 | (syntax->datum (parse-expr 'test port)))
90 | (check-equal? '(brackets (period) (comma))
91 | (syntax->datum (parse-expr 'test port)))
92 | (check-equal? eof
93 | (parse-expr 'test port)))
--------------------------------------------------------------------------------
/language.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require "semantics.rkt"
4 | racket/stxparam
5 | (for-syntax racket/base))
6 |
7 | (provide greater-than
8 | less-than
9 | plus
10 | minus
11 | period
12 | comma
13 | brackets
14 | (rename-out [my-module-begin #%module-begin]))
15 |
16 |
17 |
18 | ;; We define a syntax parameter called current-state here.
19 | ;; This cooperates with the other forms in this language. See
20 | ;; my-module-begin's comments for more details.
21 | (define-syntax-parameter current-data #f)
22 | (define-syntax-parameter current-ptr #f)
23 |
24 |
25 |
26 |
27 | ;; Every module in this language will make sure that it
28 | ;; uses a fresh state. We create one, and then within
29 | ;; the lexical context of a my-module-begin, all the
30 | ;; other forms will refer to current-state.
31 | (define-syntax (my-module-begin stx)
32 | (syntax-case stx ()
33 | [(_ body ...)
34 | (syntax/loc stx
35 | (#%plain-module-begin
36 |
37 | (define (run)
38 | (let-values ([(fresh-state fresh-ptr) (new-state)])
39 |
40 | ;; Here are the mechanics we're using to get all the other
41 | ;; forms to use this fresh state.
42 | ;;
43 | ;; We use the syntax parameter library to make
44 | ;; any references to current-state within the body to
45 | ;; syntactically re-route to the fresh-state we create here.
46 | (syntax-parameterize ([current-data
47 | (make-rename-transformer #'fresh-state)]
48 | [current-ptr
49 | (make-rename-transformer #'fresh-ptr)])
50 | (begin body ... (void)))))
51 | (run)))]))
52 |
53 |
54 | ;; In order to produce good runtime error messages
55 | ;; for greater-than and less-than, we latch onto
56 | ;; the syntax object for dear life, since it has
57 | ;; information about where it came from in the
58 | ;; source syntax.
59 | ;;
60 | ;; The #'#,stx nonsense below allows us to pass the
61 | ;; syntax object. The semantics can then raise an
62 | ;; appropriate syntactic error with raise-syntax-error
63 | ;; if it sees anything bad happen at runtime.
64 | (define-syntax (greater-than stx)
65 | (syntax-case stx ()
66 | [(_)
67 | (quasisyntax/loc stx
68 | (increment-ptr current-data current-ptr
69 | (srcloc '#,(syntax-source stx)
70 | '#,(syntax-line stx)
71 | '#,(syntax-column stx)
72 | '#,(syntax-position stx)
73 | '#,(syntax-span stx))))]))
74 |
75 |
76 | (define-syntax (less-than stx)
77 | (syntax-case stx ()
78 | [(_)
79 | (quasisyntax/loc stx
80 | (decrement-ptr current-data current-ptr
81 | (srcloc '#,(syntax-source stx)
82 | '#,(syntax-line stx)
83 | '#,(syntax-column stx)
84 | '#,(syntax-position stx)
85 | '#,(syntax-span stx))))]))
86 |
87 |
88 | (define-syntax-rule (plus)
89 | (increment-byte current-data current-ptr))
90 |
91 | (define-syntax-rule (minus)
92 | (decrement-byte current-data current-ptr))
93 |
94 | (define-syntax-rule (period)
95 | (write-byte-to-stdout current-data current-ptr))
96 |
97 | (define-syntax-rule (comma)
98 | (read-byte-from-stdin current-data current-ptr))
99 |
100 | (define-syntax-rule (brackets body ...)
101 | (loop current-data current-ptr body ...))
--------------------------------------------------------------------------------
/examples/prime.rkt:
--------------------------------------------------------------------------------
1 | #lang planet dyoo/bf
2 | ===================================================================
3 | ======================== OUTPUT STRING ============================
4 | ===================================================================
5 | >++++++++[<++++++++>-]<++++++++++++++++.[-]
6 | >++++++++++[<++++++++++>-]<++++++++++++++.[-]
7 | >++++++++++[<++++++++++>-]<+++++.[-]
8 | >++++++++++[<++++++++++>-]<+++++++++.[-]
9 | >++++++++++[<++++++++++>-]<+.[-]
10 | >++++++++++[<++++++++++>-]<+++++++++++++++.[-]
11 | >+++++[<+++++>-]<+++++++.[-]
12 | >++++++++++[<++++++++++>-]<+++++++++++++++++.[-]
13 | >++++++++++[<++++++++++>-]<++++++++++++.[-]
14 | >+++++[<+++++>-]<+++++++.[-]
15 | >++++++++++[<++++++++++>-]<++++++++++++++++.[-]
16 | >++++++++++[<++++++++++>-]<+++++++++++.[-]
17 | >+++++++[<+++++++>-]<+++++++++.[-]
18 | >+++++[<+++++>-]<+++++++.[-]
19 |
20 | ===================================================================
21 | ======================== INPUT NUMBER ============================
22 | ===================================================================
23 | + cont=1
24 | [
25 | - cont=0
26 | >,
27 | ======SUB10======
28 | ----------
29 |
30 | [ not 10
31 | <+> cont=1
32 | =====SUB38======
33 | ----------
34 | ----------
35 | ----------
36 | --------
37 |
38 | >
39 | =====MUL10=======
40 | [>+>+<<-]>>[<<+>>-]< dup
41 |
42 | >>>+++++++++
43 | [
44 | <<<
45 | [>+>+<<-]>>[<<+>>-]< dup
46 | [<<+>>-]
47 | >>-
48 | ]
49 | <<<[-]<
50 | ======RMOVE1======
51 | <
52 | [>+<-]
53 | ]
54 | <
55 | ]
56 | >>[<<+>>-]<<
57 |
58 | ===================================================================
59 | ======================= PROCESS NUMBER ===========================
60 | ===================================================================
61 |
62 | ==== ==== ==== ====
63 | numd numu teid teiu
64 | ==== ==== ==== ====
65 |
66 | >+<-
67 | [
68 | >+
69 | ======DUP======
70 | [>+>+<<-]>>[<<+>>-]<
71 |
72 | >+<--
73 |
74 | >>>>>>>>+<<<<<<<< isprime=1
75 |
76 | [
77 | >+
78 |
79 | <-
80 |
81 | =====DUP3=====
82 | <[>>>+>+<<<<-]>>>>[<<<<+>>>>-]<<<
83 |
84 | =====DUP2=====
85 | >[>>+>+<<<-]>>>[<<<+>>>-]<<< <
86 |
87 |
88 | >>>
89 |
90 |
91 | ====DIVIDES=======
92 | [>+>+<<-]>>[<<+>>-]< DUP i=div
93 |
94 | <<
95 | [
96 | >>>>>+ bool=1
97 | <<<
98 | [>+>+<<-]>>[<<+>>-]< DUP
99 | [>>[-]<<-] IF i THEN bool=0
100 | >>
101 | [ IF i=0
102 | <<<<
103 | [>+>+<<-]>>[<<+>>-]< i=div
104 | >>>
105 | - bool=0
106 | ]
107 | <<<
108 | - DEC i
109 | <<
110 | -
111 | ]
112 |
113 | +>>[<<[-]>>-]<<
114 | >[-]< CLR div
115 | =====END DIVIDES====
116 |
117 |
118 | [>>>>>>[-]<<<<<<-] if divides then isprime=0
119 |
120 |
121 | <<
122 |
123 | >>[-]>[-]<<<
124 | ]
125 |
126 | >>>>>>>>
127 | [
128 | -
129 | <<<<<<<[-]<<
130 |
131 | [>>+>+<<<-]>>>[<<<+>>>-]<<<
132 |
133 | >>
134 |
135 |
136 |
137 |
138 | ===================================================================
139 | ======================== OUTPUT NUMBER ===========================
140 | ===================================================================
141 | [>+<-]>
142 |
143 | [
144 | ======DUP======
145 | [>+>+<<-]>>[<<+>>-]<
146 |
147 |
148 | ======MOD10====
149 | >+++++++++<
150 | [
151 | >>>+<< bool= 1
152 | [>+>[-]<<-] bool= ten==0
153 | >[<+>-] ten = tmp
154 | >[<<++++++++++>>-] if ten=0 ten=10
155 | <<- dec ten
156 | <- dec num
157 | ]
158 | +++++++++ num=9
159 | >[<->-]< dec num by ten
160 |
161 | =======RROT======
162 | [>+<-]
163 | < [>+<-]
164 | < [>+<-]
165 | >>>[<<<+>>>-]
166 | <
167 |
168 | =======DIV10========
169 | >+++++++++<
170 | [
171 | >>>+<< bool= 1
172 | [>+>[-]<<-] bool= ten==0
173 | >[<+>-] ten = tmp
174 | >[<<++++++++++>>>+<-] if ten=0 ten=10 inc div
175 | <<- dec ten
176 | <- dec num
177 | ]
178 | >>>>[<<<<+>>>>-]<<<< copy div to num
179 | >[-]< clear ten
180 |
181 | =======INC1=========
182 | <+>
183 | ]
184 |
185 | <
186 | [
187 | =======MOVER=========
188 | [>+<-]
189 |
190 | =======ADD48========
191 | +++++++[<+++++++>-]<->
192 |
193 | =======PUTC=======
194 | <.[-]>
195 |
196 | ======MOVEL2========
197 | >[<<+>>-]<
198 |
199 | <-
200 | ]
201 |
202 | >++++[<++++++++>-]<.[-]
203 |
204 | ===================================================================
205 | =========================== END FOR ===============================
206 | ===================================================================
207 |
208 |
209 | >>>>>>>
210 | ]
211 | <<<<<<<<
212 |
213 |
214 |
215 | >[-]<
216 | [-]
217 | <<-
218 | ]
219 |
220 | ======LF========
221 |
222 | ++++++++++.[-]
223 |
--------------------------------------------------------------------------------
/semantics.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | ;; This is a second semantics for the language that tries to go for speed,
4 | ;; at the expense of making things a little more complicated.
5 | ;;
6 | ;; It uses features in: http://docs.racket-lang.org/reference/unsafe.html
7 | ;; to reduce the number of runtime checks.
8 | ;;
9 | ;; We also manage the state as two separate values.
10 | ;;
11 | ;; Tape out-of-bounds errors at runtime should be properly reported with
12 | ;; source location.
13 |
14 | (require rackunit ;; we want unit tests
15 | racket/unsafe/ops ;; and we want raw, unsafe access for speed
16 | (for-syntax racket/base))
17 |
18 |
19 | (provide (all-defined-out))
20 |
21 |
22 |
23 | (define-syntax MAX-DATA-SIZE
24 | (lambda (stx) #'30000))
25 |
26 |
27 | ;; We use a customized error structure that supports
28 | ;; source location reporting.
29 | (define-struct (exn:fail:out-of-bounds exn:fail)
30 | (srcloc)
31 | #:property prop:exn:srclocs
32 | (lambda (a-struct)
33 | (list (exn:fail:out-of-bounds-srcloc a-struct))))
34 |
35 |
36 |
37 | ;; Creates a new state, with a byte array of 30000 zeros, and
38 | ;; the pointer at index 0.
39 | (define-syntax-rule (new-state)
40 | (values (make-bytes MAX-DATA-SIZE 0)
41 | 0))
42 |
43 |
44 | ;; Check to see if we've gone out of range. If we have a useful stx
45 | ;; to blame, use that syntax to highlight on screen.
46 | (define-syntax-rule (raise-range-errors! a-state caller-name loc)
47 | (raise (make-exn:fail:out-of-bounds
48 | (format "~a: pointer went out of range of data"
49 | caller-name)
50 | (current-continuation-marks)
51 | loc)))
52 |
53 |
54 | ;; increment the data pointer
55 | (define-syntax-rule (increment-ptr data ptr loc)
56 | (begin
57 | (set! ptr (unsafe-fx+ ptr 1))
58 | (when (unsafe-fx>= ptr MAX-DATA-SIZE)
59 | (raise-range-errors! a-state 'increment-ptr loc))))
60 |
61 |
62 | ;; decrement the data pointer
63 | (define-syntax-rule (decrement-ptr data ptr loc)
64 | (begin
65 | (set! ptr (unsafe-fx- ptr 1))
66 | (when (unsafe-fx< ptr 0)
67 | (raise-range-errors! a-state 'decrement-ptr loc))))
68 |
69 |
70 | ;; increment the byte at the data pointer
71 | (define-syntax-rule (increment-byte data ptr)
72 | (unsafe-bytes-set! data ptr
73 | (unsafe-fxmodulo
74 | (unsafe-fx+ (unsafe-bytes-ref data ptr)
75 | 1)
76 | 256)))
77 |
78 | ;; decrement the byte at the data pointer
79 | (define-syntax-rule (decrement-byte data ptr)
80 | (unsafe-bytes-set! data ptr
81 | (unsafe-fxmodulo
82 | (unsafe-fx- (unsafe-bytes-ref data ptr)
83 | 1)
84 | 256)))
85 |
86 | ;; print the byte at the data pointer
87 | (define-syntax-rule (write-byte-to-stdout data ptr)
88 | (begin
89 | (write-byte (unsafe-bytes-ref data ptr) (current-output-port))
90 | (flush-output (current-output-port))))
91 |
92 | ;; read a byte from stdin into the data pointer
93 | (define-syntax-rule (read-byte-from-stdin data ptr)
94 | (unsafe-bytes-set! data ptr (let ([v (read-byte (current-input-port))])
95 | (if (eof-object? v)
96 | 0
97 | v))))
98 |
99 | ;; Loops
100 | (define-syntax-rule (loop data ptr body ...)
101 | (unless (unsafe-fx= (unsafe-bytes-ref data ptr)
102 | 0)
103 | (let loop ()
104 | body ...
105 | (unless (unsafe-fx= (unsafe-bytes-ref data ptr)
106 | 0)
107 | (loop)))))
108 |
109 |
110 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
111 | ;; Some tests follow:
112 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
113 |
114 |
115 | ;; Simple exercises.
116 | (let-values ([(data ptr) (new-state)])
117 | (increment-byte data ptr)
118 | (check-equal? 1 (bytes-ref data 0))
119 | (increment-byte data ptr)
120 | (check-equal? 2 (bytes-ref data 0))
121 | (decrement-byte data ptr)
122 | (check-equal? 1 (bytes-ref data 0)))
123 |
124 | ;; pointer movement
125 | (let-values ([(data ptr) (new-state)])
126 | (increment-ptr data ptr #f)
127 | (increment-byte data ptr)
128 | (check-equal? 0 (bytes-ref data 0))
129 | (check-equal? 1 (bytes-ref data 1))
130 | (decrement-ptr data ptr #f)
131 | (increment-byte data ptr)
132 | (check-equal? 1 (bytes-ref data 0))
133 | (check-equal? 1 (bytes-ref data 1)))
134 |
135 | ;; make sure standard input is doing something
136 | (let-values ([(data ptr) (new-state)])
137 | (parameterize ([current-input-port
138 | (open-input-bytes (bytes 3 1 4))])
139 | (read-byte-from-stdin data ptr)
140 | (increment-ptr data ptr #f)
141 | (read-byte-from-stdin data ptr)
142 | (increment-ptr data ptr #f)
143 | (read-byte-from-stdin data ptr))
144 | (check-equal? 3 (bytes-ref data 0))
145 | (check-equal? 1 (bytes-ref data 1))
146 | (check-equal? 4 (bytes-ref data 2)))
147 |
148 |
149 | ;; make sure standard output is doing something
150 | (let-values ([(data ptr) (new-state)])
151 | (set! data (bytes 80 76 84))
152 | (let ([simulated-stdout (open-output-string)])
153 | (parameterize ([current-output-port simulated-stdout])
154 | (write-byte-to-stdout data ptr)
155 | (increment-ptr data ptr #f)
156 | (write-byte-to-stdout data ptr)
157 | (increment-ptr data ptr #f)
158 | (write-byte-to-stdout data ptr))
159 | (check-equal? "PLT" (get-output-string simulated-stdout))))
160 |
161 |
162 | ;; Let's see that we can clear.
163 | (let-values ([(data ptr) (new-state)])
164 | (set! data (bytes 0 104 101 108 112 109 101 105
165 | 109 109 101 108 116 105 110 103 ))
166 | (set! ptr 15)
167 | ;; [ [-] < ]
168 | (loop data ptr
169 | (loop data ptr (decrement-byte data ptr))
170 | (decrement-ptr data ptr #f))
171 |
172 | (check-equal? 0 ptr)
173 | (check-equal? (make-bytes 16 0) data))
174 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | CC0 1.0 Universal
2 |
3 | Statement of Purpose
4 |
5 | The laws of most jurisdictions throughout the world automatically confer
6 | exclusive Copyright and Related Rights (defined below) upon the creator and
7 | subsequent owner(s) (each and all, an "owner") of an original work of
8 | authorship and/or a database (each, a "Work").
9 |
10 | Certain owners wish to permanently relinquish those rights to a Work for the
11 | purpose of contributing to a commons of creative, cultural and scientific
12 | works ("Commons") that the public can reliably and without fear of later
13 | claims of infringement build upon, modify, incorporate in other works, reuse
14 | and redistribute as freely as possible in any form whatsoever and for any
15 | purposes, including without limitation commercial purposes. These owners may
16 | contribute to the Commons to promote the ideal of a free culture and the
17 | further production of creative, cultural and scientific works, or to gain
18 | reputation or greater distribution for their Work in part through the use and
19 | efforts of others.
20 |
21 | For these and/or other purposes and motivations, and without any expectation
22 | of additional consideration or compensation, the person associating CC0 with a
23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
25 | and publicly distribute the Work under its terms, with knowledge of his or her
26 | Copyright and Related Rights in the Work and the meaning and intended legal
27 | effect of CC0 on those rights.
28 |
29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
30 | protected by copyright and related or neighboring rights ("Copyright and
31 | Related Rights"). Copyright and Related Rights include, but are not limited
32 | to, the following:
33 |
34 | i. the right to reproduce, adapt, distribute, perform, display, communicate,
35 | and translate a Work;
36 |
37 | ii. moral rights retained by the original author(s) and/or performer(s);
38 |
39 | iii. publicity and privacy rights pertaining to a person's image or likeness
40 | depicted in a Work;
41 |
42 | iv. rights protecting against unfair competition in regards to a Work,
43 | subject to the limitations in paragraph 4(a), below;
44 |
45 | v. rights protecting the extraction, dissemination, use and reuse of data in
46 | a Work;
47 |
48 | vi. database rights (such as those arising under Directive 96/9/EC of the
49 | European Parliament and of the Council of 11 March 1996 on the legal
50 | protection of databases, and under any national implementation thereof,
51 | including any amended or successor version of such directive); and
52 |
53 | vii. other similar, equivalent or corresponding rights throughout the world
54 | based on applicable law or treaty, and any national implementations thereof.
55 |
56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
59 | and Related Rights and associated claims and causes of action, whether now
60 | known or unknown (including existing as well as future claims and causes of
61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
62 | duration provided by applicable law or treaty (including future time
63 | extensions), (iii) in any current or future medium and for any number of
64 | copies, and (iv) for any purpose whatsoever, including without limitation
65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
66 | the Waiver for the benefit of each member of the public at large and to the
67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
68 | shall not be subject to revocation, rescission, cancellation, termination, or
69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
70 | by the public as contemplated by Affirmer's express Statement of Purpose.
71 |
72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
73 | judged legally invalid or ineffective under applicable law, then the Waiver
74 | shall be preserved to the maximum extent permitted taking into account
75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
77 | non transferable, non sublicensable, non exclusive, irrevocable and
78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
80 | provided by applicable law or treaty (including future time extensions), (iii)
81 | in any current or future medium and for any number of copies, and (iv) for any
82 | purpose whatsoever, including without limitation commercial, advertising or
83 | promotional purposes (the "License"). The License shall be deemed effective as
84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
85 | License for any reason be judged legally invalid or ineffective under
86 | applicable law, such partial invalidity or ineffectiveness shall not
87 | invalidate the remainder of the License, and in such case Affirmer hereby
88 | affirms that he or she will not (i) exercise any of his or her remaining
89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
90 | and causes of action with respect to the Work, in either case contrary to
91 | Affirmer's express Statement of Purpose.
92 |
93 | 4. Limitations and Disclaimers.
94 |
95 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
96 | surrendered, licensed or otherwise affected by this document.
97 |
98 | b. Affirmer offers the Work as-is and makes no representations or warranties
99 | of any kind concerning the Work, express, implied, statutory or otherwise,
100 | including without limitation warranties of title, merchantability, fitness
101 | for a particular purpose, non infringement, or the absence of latent or
102 | other defects, accuracy, or the present or absence of errors, whether or not
103 | discoverable, all to the greatest extent permissible under applicable law.
104 |
105 | c. Affirmer disclaims responsibility for clearing rights of other persons
106 | that may apply to the Work or any use thereof, including without limitation
107 | any person's Copyright and Related Rights in the Work. Further, Affirmer
108 | disclaims responsibility for obtaining any necessary consents, permissions
109 | or other rights required for any use of the Work.
110 |
111 | d. Affirmer understands and acknowledges that Creative Commons is not a
112 | party to this document and has no duty or obligation with respect to this
113 | CC0 or use of the Work.
114 |
115 | For more information, please see
116 |
Hello world!
40 | 41 | \end{verbatim} 42 | 43 | Going back to the world of Racket, we see by analogy that the \verb+#lang+ line in a Racket program is a self-description of how to treat the rest of the program. (Actually, the \verb+#lang+ line is quite bit more active than this, but we'll get to this in a moment.) 44 | 45 | The racket part in the \verb+#lang+ line isn't inevitable: the main Racket distribution, in fact, comes bundled with several languages which can take the place of the word racket. Many of these languages (racket/base, typed/racket, lazy) still look like Racket... but some of them don't. Here's one example: 46 | \begin{verbatim} 47 | #lang datalog 48 | ancestor(A, B) :- parent(A, B). 49 | ancestor(A, B) :- 50 | parent(A, C), D = C, ancestor(D, B). 51 | parent(john, douglas). 52 | parent(bob, john). 53 | ancestor(A, B)? 54 | \end{verbatim} 55 | This is an example of a Datalog program that deals with logical relations. Neat! 56 | 57 | 58 | What might be surprising is that the mechanism for using different languages in Racket is wide open. Let's expand our minds. 59 | \begin{verbatim} 60 | #lang planet dyoo/bf 61 | ++++++[>++++++++++++<-]>. 62 | >++++++++++[>++++++++++<-]>+. 63 | +++++++..+++.>++++[>+++++++++++<-]>. 64 | <+++[>----<-]>.<<<<<+++[>+++++<-]>. 65 | >>.+++.------.--------.>>+. 66 | \end{verbatim} 67 | This language does not look like Racket. It looks like line noise. This is brainf*ck. Although this language is not included in the main distribution, because it is on PLaneT, anyone with Racket can easily play with it. 68 | 69 | 70 | Ignoring the question of why?!! someone would do this, let's ask another: how do we build this? This tutorial will cover how to build this language into Racket from scratch. 71 | 72 | Let's get started! 73 | 74 | \section{The view from high orbit} 75 | We want to teach Racket what it means when we say something like: 76 | \begin{verbatim} 77 | #lang planet dyoo/bf 78 | ,[.,] 79 | \end{verbatim} 80 | As mentioned earlier, a \verb+#lang+ line is quite active: it tells the Racket runtime how to convert from the surface syntax to a meaningful program. Programs in Racket get digested in a few stages; the process looks something like this: 81 | % 82 | \begin{displaymath} 83 | \xymatrix { 84 | \txt{surface syntax} \ar[rr]^-{\txt{reader}} & &\txt{AST} \ar[rr]^-{\txt{macro\\expansion}} & & \txt{core forms}} 85 | \end{displaymath} 86 | % 87 | When Racket sees \verb+#lang planet dyoo/bf+, it will look for a particular module that we call a reader; a reader consumes surface syntax and excretes ASTs, and these ASTs are then annotated so that Racket knows how to make sense out of them later on. At this point, the rest of the Racket infrastructure kicks in and macro-expands the ASTs out, ultimately, to a core language. 88 | 89 | So here's what we'll do: 90 | \begin{itemize} 91 | \item Capture the meaning of brainf*ck by writing a semantics module. 92 | 93 | \item Go from the line noise of the surface syntax into a more structured form by writing a parser module. 94 | 95 | \item Connect the pieces, the semantics and the surface syntax parser, by making a reader module. 96 | 97 | \item Profit! 98 | \end{itemize} 99 | 100 | 101 | \section{Flight preparations} 102 | Since we're starting from scratch, let's first make a work directory where we'll keep our source code. I'll call the directory ``bf/'', but you can use whatever name you want. 103 | \begin{verbatim} 104 | $ mkdir bf 105 | \end{verbatim} 106 | Ultimately, we want to put the fruit of our labor onto PLaneT, since that'll make it easier for others to use our work. Let's set up a PLaneT development link so the Racket environment knows about our work directory. I already have an account on PLaneT with my username dyoo. You can get an account fairly easily. 107 | 108 | If we enter the following at the command line, 109 | \begin{verbatim} 110 | $ planet link dyoo bf.plt 1 0 bf 111 | \end{verbatim} 112 | we'll make a development link that will associate any module path of the form (planet dyoo/bf/...) to our local ``bf/'' directory. Later on, when we create a package and upload it to PLaneT, we can drop this development link, and then all the references that use (planet dyoo/bf/...) will immediately switch over to the one on the PLaneT server. 113 | 114 | 115 | But does the link actually work? Let's write a very simple module in our work directory, and then see that Racket can find it through PLaneT. 116 | \begin{verbatim} 117 | $ cd bf 118 | ~/bf$ cat >hello.rkt 119 | #lang racket 120 | "hello world" 121 | \end{verbatim} 122 | Ok, let's see if Racket can find our magnificent ``hello.rkt'' module if we use the PLaneTized version of the name. 123 | \begin{verbatim} 124 | ~/bf$ racket 125 | Welcome to Racket v5.2. 126 | > (require (planet dyoo/bf/hello)) 127 | "hello world" 128 | > 129 | \end{verbatim} 130 | If we get to this point, then we've got the PLaneT development link in place. 131 | 132 | 133 | \section{The brainf*ck language} 134 | When we look at the definition of brainf*ck, it's actually not too bad. There's two bits of state, 135 | 136 | \begin{itemize} 137 | \item a byte array of data, and 138 | \item a pointer into that data array 139 | \end{itemize} 140 | % 141 | and it has only a few operations that affect this state: 142 | \begin{itemize} 143 | \item Increment the data pointer (\verb|>|) 144 | 145 | \item Decrement the data pointer (\verb|<|) 146 | 147 | \item Increment the byte at the data pointer (\verb|+|) 148 | 149 | \item Decrement the byte at the data pointer (\verb|-|) 150 | 151 | \item Write a byte to standard output (\verb|.|) 152 | 153 | \item Read a byte from standard input (\verb|,|) 154 | 155 | \item Perform a loop until the byte at the data pointer is zero (\verb|[|, \verb|]|) 156 | \end{itemize} 157 | 158 | Let's write a module that lets us play with such a system: let's call it ``semantics.rkt''. 159 | \begin{verbatim} 160 | #lang racket 161 | 162 | (require rackunit) ;; for unit testing 163 | (provide (all-defined-out)) 164 | 165 | ;; Our state contains two pieces. 166 | (define-struct state (data ptr) 167 | #:mutable) 168 | 169 | ;; Creates a new state, with a byte array of 30000 zeros, and 170 | ;; the pointer at index 0. 171 | (define (new-state) 172 | (make-state (make-vector 30000 0) 0)) 173 | 174 | ;; increment the data pointer 175 | (define (increment-ptr a-state) 176 | (set-state-ptr! a-state (add1 (state-ptr a-state)))) 177 | 178 | ;; decrement the data pointer 179 | (define (decrement-ptr a-state) 180 | (set-state-ptr! a-state (sub1 (state-ptr a-state)))) 181 | 182 | ;; increment the byte at the data pointer 183 | (define (increment-byte a-state) 184 | (define v (state-data a-state)) 185 | (define i (state-ptr a-state)) 186 | (vector-set! v i (add1 (vector-ref v i)))) 187 | 188 | ;; decrement the byte at the data pointer 189 | (define (decrement-byte a-state) 190 | (define v (state-data a-state)) 191 | (define i (state-ptr a-state)) 192 | (vector-set! v i (sub1 (vector-ref v i)))) 193 | 194 | ;; print the byte at the data pointer 195 | (define (write-byte-to-stdout a-state) 196 | (define v (state-data a-state)) 197 | (define i (state-ptr a-state)) 198 | (write-byte (vector-ref v i) (current-output-port))) 199 | 200 | ;; read a byte from stdin into the data pointer 201 | (define (read-byte-from-stdin a-state) 202 | (define v (state-data a-state)) 203 | (define i (state-ptr a-state)) 204 | (vector-set! v i (read-byte (current-input-port)))) 205 | 206 | ;; loops 207 | (define-syntax-rule (loop a-state body ...) 208 | (local [(define (loop) 209 | (unless (= (vector-ref (state-data a-state) (state-ptr a-state)) 210 | 0) 211 | body ... 212 | (loop)))] 213 | (loop))) 214 | \end{verbatim} 215 | Ok, that doesn't seem too bad. But of course, we should test this; let's use the \verb|rackunit| unit testing framework and tickle this code. Let's add a little more to the end of ``semantics.rkt''. 216 | \begin{verbatim} 217 | "semantics.rkt" 218 | ;; Simple exercises. 219 | (let ([s (new-state)]) 220 | (increment-byte s) 221 | (check-equal? 1 (vector-ref (state-data s) 0)) 222 | (increment-byte s) 223 | (check-equal? 2 (vector-ref (state-data s) 0)) 224 | (decrement-byte s) 225 | (check-equal? 1 (vector-ref (state-data s) 0))) 226 | 227 | ;; pointer movement 228 | (let ([s (new-state)]) 229 | (increment-ptr s) 230 | (increment-byte s) 231 | (check-equal? 0 (vector-ref (state-data s) 0)) 232 | (check-equal? 1 (vector-ref (state-data s) 1)) 233 | (decrement-ptr s) 234 | (increment-byte s) 235 | (check-equal? 1 (vector-ref (state-data s) 0)) 236 | (check-equal? 1 (vector-ref (state-data s) 1))) 237 | 238 | ;; make sure standard input is doing something 239 | (let ([s (new-state)]) 240 | (parameterize ([current-input-port 241 | (open-input-bytes (bytes 3 1 4))]) 242 | (read-byte-from-stdin s) 243 | (increment-ptr s) 244 | (read-byte-from-stdin s) 245 | (increment-ptr s) 246 | (read-byte-from-stdin s)) 247 | (check-equal? 3 (vector-ref (state-data s) 0)) 248 | (check-equal? 1 (vector-ref (state-data s) 1)) 249 | (check-equal? 4 (vector-ref (state-data s) 2))) 250 | 251 | 252 | ;; make sure standard output is doing something 253 | (let ([s (new-state)]) 254 | (set-state-data! s (vector 80 76 84)) 255 | (let ([simulated-stdout (open-output-string)]) 256 | (parameterize ([current-output-port simulated-stdout]) 257 | (write-byte-to-stdout s) 258 | (increment-ptr s) 259 | (write-byte-to-stdout s) 260 | (increment-ptr s) 261 | (write-byte-to-stdout s)) 262 | (check-equal? "PLT" (get-output-string simulated-stdout)))) 263 | 264 | 265 | ;; Let's see that we can clear. 266 | (let ([s (new-state)]) 267 | (set-state-data! s (vector 0 104 101 108 112 109 101 105 268 | 109 109 101 108 116 105 110 103)) 269 | (set-state-ptr! s 15) 270 | ;; [ [-] < ] 271 | (loop s 272 | (loop s (decrement-byte s)) 273 | (decrement-ptr s)) 274 | 275 | (check-equal? 0 (state-ptr s)) 276 | (check-equal? (make-vector 16 0) (state-data s))) 277 | \end{verbatim} 278 | Good! Our tests, at the very least, let us know that our definitions are doing something reasonable, and they should all pass. 279 | 280 | However, there are a few things that we may want to fix in the future, like the lack of error trapping if the input stream contains eof. And there's no bounds-checking on the ptr or on the values in the data. Wow, there are quite a few things that we might want to fix. But at the very least, we now have a module that captures the semantics of brainf*ck. 281 | 282 | \section{Lisping a language} 283 | We might even be cheeky enough to insist that people write brainf*ck programs with s-expressions. Let's take that route, and create a module language that uses our ``semantics.rkt''. We'll create such a module language in ``language.rkt''. 284 | \begin{verbatim} 285 | "language.rkt" 286 | #lang racket 287 | 288 | (require "semantics.rkt") 289 | 290 | (provide greater-than 291 | less-than 292 | plus 293 | minus 294 | period 295 | comma 296 | brackets 297 | (rename-out [my-module-begin #%module-begin])) 298 | 299 | ;; The current-state is a parameter used by the 300 | ;; rest of this language. 301 | (define current-state (make-parameter (new-state))) 302 | 303 | ;; Every module in this language will make sure that it 304 | ;; uses a fresh state. 305 | (define-syntax-rule (my-module-begin body ...) 306 | (#%plain-module-begin 307 | (parameterize ([current-state (new-state)]) 308 | body ...))) 309 | 310 | (define-syntax-rule (greater-than) 311 | (increment-ptr (current-state))) 312 | 313 | (define-syntax-rule (less-than) 314 | (decrement-ptr (current-state))) 315 | 316 | (define-syntax-rule (plus) 317 | (increment-byte (current-state))) 318 | 319 | (define-syntax-rule (minus) 320 | (decrement-byte (current-state))) 321 | 322 | (define-syntax-rule (period) 323 | (write-byte-to-stdout (current-state))) 324 | 325 | (define-syntax-rule (comma) 326 | (read-byte-from-stdin (current-state))) 327 | 328 | (define-syntax-rule (brackets body ...) 329 | (loop (current-state) body ...)) 330 | \end{verbatim} 331 | This ``language.rkt'' presents brainf*ck as a s-expression-based language. It uses the semantics we've coded up, and defines rules for handling greater-than, less-than, etc... We have a parameter called current-state that holds the state of the brainf*ck machine that's used through the language. 332 | 333 | There's one piece of this language that looks particularly mysterious: what's the \verb+#%module-begin+ form, and what is it doing? In Racket, every module has an implicit \verb+#%module-begin+ that wraps around the entirety of the module's body. We can see this by asking Racket to show us the results of the expansion process; here's a small example to demonstrate. 334 | \begin{verbatim} 335 | > (syntax->datum 336 | (expand '(module an-example-module '#%kernel 337 | "hello" 338 | "world"))) 339 | '(module an-example-module '#%kernel (#%module-begin '"hello" '"world")) 340 | \end{verbatim} 341 | Ignore, for the moment, the use of syntax->datum or the funky use of \verb+'#%kernel+. What we should notice is that Racket has added in that \verb+#%module-begin+ around the \verb+"hello"+ and \verb+"world"+. So there's the implicit wrapping that Racket is doing. 342 | 343 | 344 | It turns out that \verb+#%module-begin+ can be really useful! In particular, we want to guarantee that every module written in brainf*ck runs under a fresh state. If we had two brainf*ck programs running, say like this: 345 | \begin{verbatim} 346 | (require "my-first-bf-program.rkt") 347 | (require "my-second-bf-program.rkt") 348 | \end{verbatim} 349 | then it would be a shame to have the two programs clash just because they brainf*cked each other's data! By defining our own \verb+#%module-begin+, we can ensure that each brainf*ck module has its own fresh version of the state, and our definition of my-module-begin does this for us. 350 | 351 | 352 | Once we've written ``language.rkt'', we can use the language like this: 353 | \begin{verbatim} 354 | #lang s-exp (planet dyoo/bf/language) 355 | 356 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) 357 | (brackets 358 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) 359 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) 360 | (plus)(plus)(plus) (greater-than) (plus)(plus)(plus) 361 | (greater-than) (plus) (less-than)(less-than)(less-than) 362 | (less-than) (minus)) 363 | (greater-than) (plus)(plus) (period) 364 | (greater-than) (plus) (period) 365 | (plus)(plus)(plus)(plus)(plus) (plus)(plus) (period) 366 | (period) (plus)(plus)(plus) (period) 367 | (greater-than) (plus)(plus) (period) 368 | (less-than)(less-than) (plus)(plus)(plus)(plus)(plus) 369 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) 370 | (period) (greater-than) (period) 371 | (plus)(plus)(plus) (period) 372 | (minus)(minus)(minus)(minus)(minus)(minus)(period) 373 | (minus)(minus)(minus)(minus)(minus)(minus)(minus)(minus) 374 | (period)(greater-than) (plus) (period) (greater-than) (period) 375 | \end{verbatim} 376 | The \verb+#lang+ line here is saying, essentially, that the following program is written with s-expressions, and should be treated with the module language ``language.rkt'' that we just wrote up. And if we run this program, we should see a familiar greeting. Hurrah! 377 | 378 | ... But wait! We can't just declare victory here. We really do want to allow the throngs of brainf*ck programmers to write brainf*ck in the surface syntax that they deserve. Keep ``language.rkt'' on hand, though. We will reuse it by having our parser transform the surface syntax into the forms we defined in ``language.rkt''. 379 | 380 | Let's get that parser working! 381 | 382 | \section{Parsing the surface syntax} 383 | 384 | The Racket toolchain includes a professional-strength lexer and parser 385 | in the parser-tools collection. For the sake of keeping this example 386 | terse, though, we'll write a simple recursive-descent parser without 387 | using the parser-tools collection. 388 | 389 | The expected output of a successful parse should be some kind of 390 | abstract syntax tree. What representation should we use for the tree? 391 | For the purposes of this tutorial, let's use plain s-expressions as 392 | our representation. 393 | 394 | (As an alternative to plain s-expressions, we could have use an 395 | alternative data structure in Racket called a syntax object, which 396 | allows us to add additional attributes like source location to each 397 | datum. For the case of brainf*ck, we might not care, but if we were 398 | to write a parser for a more professional, sophisticated language, 399 | such as LOLCODE, then we'd want source locations so we can give good 400 | error messages during parsing or run-time.) 401 | 402 | 403 | Let's write a parser. Our parser will consume an input-port, from 404 | which we can read in bytes with 405 | \verb|read-byte|. We'll write the following into ``parser.rkt''. 406 | \begin{verbatim} 407 | "parser.rkt" 408 | #lang racket 409 | 410 | ;; The only visible export of this module will be parse-expr. 411 | (provide parse-expr) 412 | 413 | 414 | (define (parse-expr inp) 415 | (define ch (read-char inp)) 416 | (cond 417 | [(eof-object? ch) 418 | eof] 419 | [else 420 | (case ch 421 | [(#\.) '(dot)] 422 | [(#\,) '(comma)] 423 | [(#\+) '(plus)] 424 | [(#\-) '(minus)] 425 | [(#\<) '(less-than)] 426 | [(#\>) '(greater-than)] 427 | [(#\[) `(brackets ,@(parse-exprs inp))] 428 | [else 429 | (parse-expr inp)])])) 430 | 431 | (define (parse-exprs inp) 432 | (define peeked (peek-char inp)) 433 | (cond 434 | [(eof-object? peeked) 435 | (error 'parse "Expected ], but eof received instead")] 436 | [(member peeked '(#\. #\, #\+ #\- #\< #\> #\[)) 437 | (cons (parse-expr inp) (parse-exprs inp))] 438 | [(char=? peeked #\]) 439 | (read-char inp) 440 | empty] 441 | [else 442 | (read-char inp) 443 | (parse-exprs inp)])) 444 | 445 | \end{verbatim} 446 | 447 | This parser isn't anything too tricky, although there's a little bit 448 | of messiness because it needs to handle brackets recursively. That 449 | part is supposed to be a little messy anyway, since it's the capstone 450 | that builds tree structure out of a linear character stream. (If we 451 | were parsing a fully parenthesized language, we could simply use the 452 | built-in \verb|read-syntax| function in Racket, which already knows 453 | how to parse parenthesized structure.) 454 | 455 | 456 | Let's see if this parser does anything useful: 457 | \begin{verbatim} 458 | > (define my-sample-input-port (open-input-string ",[.,]")) 459 | > (define first-ast 460 | (parse-expr my-sample-input-port)) 461 | > first-ast 462 | '(comma) 463 | > (define second-ast 464 | (parse-expr my-sample-input-port)) 465 | > second-ast 466 | '(brackets (period) (comma)) 467 | > (parse-expr my-sample-input-port) 468 | #Hello world!
93 | 94 | } 95 | 96 | 97 | Going back to the world of Racket, we see by analogy that the @litchar{#lang} 98 | line in a Racket program is a self-description of how to treat the 99 | rest of the program. (Actually, the @litchar{#lang} line is quite bit more 100 | active than this, but we'll get to this in a moment.) 101 | 102 | 103 | The @racketmodname[racket] part in the @litchar{#lang} line isn't inevitable: the main Racket 104 | distribution, in fact, comes bundled with several languages which can 105 | take the place of the word @racketmodname[racket]. Many of these languages 106 | (@racketmodname[racket/base], @racketmodname[typed/racket], @racketmodname[lazy]) still look like Racket... but some 107 | of them don't. Here's one example: 108 | @codeblock{ 109 | #lang datalog 110 | ancestor(A, B) :- parent(A, B). 111 | ancestor(A, B) :- 112 | parent(A, C), D = C, ancestor(D, B). 113 | parent(john, douglas). 114 | parent(bob, john). 115 | ancestor(A, B)? 116 | } 117 | This is an example of a @link["http://en.wikipedia.org/wiki/Datalog"]{Datalog} 118 | program that deals with logical relations. Neat! 119 | 120 | 121 | What might be surprising is that the mechanism for using different 122 | languages in Racket is wide open. Let's expand our minds. 123 | @codeblock{ 124 | #lang planet dyoo/bf 125 | ++++++[>++++++++++++<-]>. 126 | >++++++++++[>++++++++++<-]>+. 127 | +++++++..+++.>++++[>+++++++++++<-]>. 128 | <+++[>----<-]>.<<<<<+++[>+++++<-]>. 129 | >>.+++.------.--------.>>+. 130 | } 131 | This language does not look like Racket. It looks like line 132 | noise. This is 133 | @link["http://en.wikipedia.org/wiki/Brainf*ck"]{@tt{brainf*ck}}. Although 134 | this language is not included in the main distribution, because it is 135 | on @link["http://planet.racket-lang.org"]{PLaneT}, anyone with Racket 136 | can easily play with it. 137 | 138 | 139 | Ignoring the question of @emph{why?!!} someone would do this, let's ask another: 140 | how do we build this? This tutorial will cover how to build this language 141 | into Racket from scratch. 142 | 143 | 144 | Let's get started! 145 | 146 | 147 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 148 | @section{The view from high orbit} 149 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 150 | 151 | We want to teach Racket what it means when we say something like: 152 | @codeblock|{ 153 | #lang planet dyoo/bf 154 | ,[.,] 155 | }| 156 | 157 | As mentioned earlier, a @litchar{#lang} line is quite active: it tells the Racket runtime how to 158 | convert from the surface syntax to a meaningful program. Programs in Racket get digested 159 | in a few stages; the process looks something like this: 160 | 161 | @verbatim|{ 162 | reader macro expansion 163 | surface syntax ---------> AST -----------------> core forms 164 | }| 165 | 166 | When Racket sees 167 | @litchar{#lang planet dyoo/bf}, it will look for a particular module that we call a @emph{reader}; 168 | a reader consumes surface syntax and excretes ASTs, and these ASTs are then 169 | annotated so that Racket knows how to make sense out of them later on. 170 | At this point, the rest of the Racket infrastructure kicks in and macro-expands the ASTs out, ultimately, 171 | to a @link["http://docs.racket-lang.org/reference/syntax-model.html#(part._fully-expanded)"]{core} language. 172 | 173 | 174 | So here's what we'll do: 175 | @itemlist[ 176 | @item{Capture the meaning of @tt{brainf*ck} by writing a semantics module.} 177 | @item{Go from the line noise of the surface syntax into a more structured form 178 | by writing a parser module.} 179 | @item{Connect the pieces, the semantics and the surface syntax parser, 180 | by making a reader module.} 181 | @item{Profit!}] 182 | 183 | 184 | 185 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 186 | @section{Flight preparations} 187 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 188 | Since we're starting from scratch, let's first make a work directory 189 | where we'll keep our source code. I'll call the directory @filepath{bf/}, but you can use 190 | whatever name you want. 191 | @verbatim|{ 192 | $ mkdir bf 193 | }| 194 | 195 | Ultimately, we want to put the fruit of our labor onto @link["http://docs.racket-lang.org/planet/index.html"]{PLaneT}, 196 | since that'll make it easier for others to use our work. 197 | Let's set up a @link["http://docs.racket-lang.org/planet/Developing_Packages_for_PLaneT.html#(part._devlinks)"]{PLaneT development link} so the Racket environment knows about our work directory. I already have an account 198 | on PLaneT with my username @tt{dyoo}. You can 199 | @link["http://planet.racket-lang.org/add.ss"]{get an account} fairly easily. 200 | 201 | If we enter the following at the command line, 202 | @verbatim|{ 203 | $ planet link dyoo bf.plt 1 0 bf 204 | }| 205 | we'll make a development link that will associate any module path of the form @racket[(planet dyoo/bf/...)] 206 | to our local @filepath{bf/} directory. Later on, when we create a package and upload it to PLaneT, 207 | we can drop this development link, and then all the references that use @racket[(planet dyoo/bf/...)] will 208 | immediately switch over to the one on the PLaneT server. 209 | 210 | 211 | But does the link actually work? Let's write a very simple module in our work directory, and 212 | then see that Racket can find it through PLaneT. 213 | @verbatim|{ 214 | $ cd bf 215 | ~/bf$ cat >hello.rkt 216 | #lang racket 217 | "hello world" 218 | }| 219 | Ok, let's see if Racket can find our magnificent @filepath{hello.rkt} module if we use the PLaneTized version of the name. 220 | @verbatim|{ 221 | ~/bf$ racket 222 | Welcome to Racket v5.2. 223 | > (require (planet dyoo/bf/hello)) 224 | "hello world" 225 | > 226 | }| 227 | If we get to this point, then we've got the PLaneT development link in place. 228 | 229 | 230 | 231 | 232 | 233 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 234 | @section{The @tt{brainf*ck} language} 235 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 236 | 237 | When we look at the definition of @link["http://en.wikipedia.org/wiki/Brainf*ck"]{@tt{brainf*ck}}, 238 | it's actually not too bad. There's two bits of state, 239 | @itemlist[ 240 | @item{a byte array of data, and} 241 | @item{a pointer into that data array} 242 | ] 243 | and it has only a few operations that affect this state: 244 | @itemlist[ 245 | @item{Increment the data pointer (@litchar{>})} 246 | @item{Decrement the data pointer (@litchar{<})} 247 | @item{Increment the byte at the data pointer (@litchar{+})} 248 | @item{Decrement the byte at the data pointer (@litchar{-})} 249 | @item{Write a byte to standard output (@litchar{.})} 250 | @item{Read a byte from standard input (@litchar{,})} 251 | @item{Perform a loop until the byte at the data pointer is zero (@litchar{[}, @litchar{]})} 252 | ] 253 | Let's write a module that lets us play with such a system: let's call it @filepath{semantics.rkt}. 254 | 255 | @filebox["semantics.rkt"]{ 256 | @codeblock|{ 257 | #lang racket 258 | 259 | (require rackunit) ;; for unit testing 260 | (provide (all-defined-out)) 261 | 262 | 263 | ;; Our state contains two pieces. 264 | (define-struct state (data ptr) 265 | #:mutable) 266 | 267 | ;; Creates a new state, with a byte array of 30000 zeros, and 268 | ;; the pointer at index 0. 269 | (define (new-state) 270 | (make-state (make-vector 30000 0) 271 | 0)) 272 | 273 | ;; increment the data pointer 274 | (define (increment-ptr a-state) 275 | (set-state-ptr! a-state (add1 (state-ptr a-state)))) 276 | 277 | ;; decrement the data pointer 278 | (define (decrement-ptr a-state) 279 | (set-state-ptr! a-state (sub1 (state-ptr a-state)))) 280 | 281 | ;; increment the byte at the data pointer 282 | (define (increment-byte a-state) 283 | (define v (state-data a-state)) 284 | (define i (state-ptr a-state)) 285 | (vector-set! v i (add1 (vector-ref v i)))) 286 | 287 | ;; decrement the byte at the data pointer 288 | (define (decrement-byte a-state) 289 | (define v (state-data a-state)) 290 | (define i (state-ptr a-state)) 291 | (vector-set! v i (sub1 (vector-ref v i)))) 292 | 293 | ;; print the byte at the data pointer 294 | (define (write-byte-to-stdout a-state) 295 | (define v (state-data a-state)) 296 | (define i (state-ptr a-state)) 297 | (write-byte (vector-ref v i) (current-output-port))) 298 | 299 | ;; read a byte from stdin into the data pointer 300 | (define (read-byte-from-stdin a-state) 301 | (define v (state-data a-state)) 302 | (define i (state-ptr a-state)) 303 | (vector-set! v i (read-byte (current-input-port)))) 304 | 305 | 306 | ;; we know how to do loops! 307 | (define-syntax-rule (loop a-state body ...) 308 | (local [(define (loop) 309 | (unless (= (vector-ref (state-data a-state) 310 | (state-ptr a-state)) 311 | 0) 312 | body ... 313 | (loop)))] 314 | (loop))) 315 | }|} 316 | 317 | Ok, that doesn't seem too bad. But of course, we should test this; let's use 318 | the @racketmodname{rackunit} unit testing framework and tickle this code. Let's add 319 | a little more to the end of @filepath{semantics.rkt}. 320 | @filebox["semantics.rkt"]{ 321 | @codeblock|{ 322 | ;; Simple exercises. 323 | (let ([s (new-state)]) 324 | (increment-byte s) 325 | (check-equal? 1 (vector-ref (state-data s) 0)) 326 | (increment-byte s) 327 | (check-equal? 2 (vector-ref (state-data s) 0)) 328 | (decrement-byte s) 329 | (check-equal? 1 (vector-ref (state-data s) 0))) 330 | 331 | ;; pointer movement 332 | (let ([s (new-state)]) 333 | (increment-ptr s) 334 | (increment-byte s) 335 | (check-equal? 0 (vector-ref (state-data s) 0)) 336 | (check-equal? 1 (vector-ref (state-data s) 1)) 337 | (decrement-ptr s) 338 | (increment-byte s) 339 | (check-equal? 1 (vector-ref (state-data s) 0)) 340 | (check-equal? 1 (vector-ref (state-data s) 1))) 341 | 342 | ;; make sure standard input is doing something 343 | (let ([s (new-state)]) 344 | (parameterize ([current-input-port 345 | (open-input-bytes (bytes 3 1 4))]) 346 | (read-byte-from-stdin s) 347 | (increment-ptr s) 348 | (read-byte-from-stdin s) 349 | (increment-ptr s) 350 | (read-byte-from-stdin s)) 351 | (check-equal? 3 (vector-ref (state-data s) 0)) 352 | (check-equal? 1 (vector-ref (state-data s) 1)) 353 | (check-equal? 4 (vector-ref (state-data s) 2))) 354 | 355 | 356 | ;; make sure standard output is doing something 357 | (let ([s (new-state)]) 358 | (set-state-data! s (vector 80 76 84)) 359 | (let ([simulated-stdout (open-output-string)]) 360 | (parameterize ([current-output-port simulated-stdout]) 361 | (write-byte-to-stdout s) 362 | (increment-ptr s) 363 | (write-byte-to-stdout s) 364 | (increment-ptr s) 365 | (write-byte-to-stdout s)) 366 | (check-equal? "PLT" (get-output-string simulated-stdout)))) 367 | 368 | 369 | ;; Let's see that we can clear. 370 | (let ([s (new-state)]) 371 | (set-state-data! s (vector 0 104 101 108 112 109 101 105 372 | 109 109 101 108 116 105 110 103)) 373 | (set-state-ptr! s 15) 374 | ;; [ [-] < ] 375 | (loop s 376 | (loop s (decrement-byte s)) 377 | (decrement-ptr s)) 378 | 379 | (check-equal? 0 (state-ptr s)) 380 | (check-equal? (make-vector 16 0) (state-data s))) 381 | }|} 382 | 383 | Good! Our tests, at the very least, let us know that our definitions are 384 | doing something reasonable, and they should all pass. 385 | 386 | 387 | However, there are a few things that we may want to fix in 388 | the future, like the lack 389 | of error trapping if the input stream contains @racket[eof]. And there's no bounds-checking 390 | on the @racket[ptr] or on the values in the data. Wow, there are quite a few things that we might want 391 | to fix. But at the very least, we now have a module that captures the semantics of @tt{brainf*ck}. 392 | 393 | 394 | 395 | @section{Lisping a language} 396 | 397 | We might even be cheeky enough to insist that people write @tt{brainf*ck} programs with s-expressions. 398 | Let's take that route, and create a @link["http://docs.racket-lang.org/guide/module-languages.html"]{module language} 399 | that uses our @filepath{semantics.rkt}. We'll create such a module language in @filepath{language.rkt}. 400 | @filebox["language.rkt"]{ 401 | @codeblock|{ 402 | #lang racket 403 | 404 | (require "semantics.rkt") 405 | 406 | (provide greater-than 407 | less-than 408 | plus 409 | minus 410 | period 411 | comma 412 | brackets 413 | (rename-out [my-module-begin #%module-begin])) 414 | 415 | ;; The current-state is a parameter used by the 416 | ;; rest of this language. 417 | (define current-state (make-parameter (new-state))) 418 | 419 | ;; Every module in this language will make sure that it 420 | ;; uses a fresh state. 421 | (define-syntax-rule (my-module-begin body ...) 422 | (#%plain-module-begin 423 | (parameterize ([current-state (new-state)]) 424 | body ...))) 425 | 426 | (define-syntax-rule (greater-than) 427 | (increment-ptr (current-state))) 428 | 429 | (define-syntax-rule (less-than) 430 | (decrement-ptr (current-state))) 431 | 432 | (define-syntax-rule (plus) 433 | (increment-byte (current-state))) 434 | 435 | (define-syntax-rule (minus) 436 | (decrement-byte (current-state))) 437 | 438 | (define-syntax-rule (period) 439 | (write-byte-to-stdout (current-state))) 440 | 441 | (define-syntax-rule (comma) 442 | (read-byte-from-stdin (current-state))) 443 | 444 | (define-syntax-rule (brackets body ...) 445 | (loop (current-state) body ...)) 446 | }|} 447 | 448 | 449 | This @filepath{language.rkt} presents @tt{brainf*ck} as a s-expression-based language. 450 | It uses the semantics we've coded up, and defines rules for handling 451 | @racket[greater-than], @racket[less-than], etc... We have a @link["http://docs.racket-lang.org/guide/parameterize.html"]{parameter} called @racket[current-state] 452 | that holds the state of the @tt{brainf*ck} machine that's used through the language. 453 | 454 | There's one piece of this language that looks particularly mysterious: what's the @racket[#%module-begin] form, 455 | and what is it doing? In Racket, every 456 | module has an implicit @racket[#%module-begin] that wraps around the entirety of the module's body. 457 | We can see this by asking Racket to show us the results of the expansion process; 458 | here's a small example to demonstrate. 459 | @interaction[#:eval my-evaluator 460 | (syntax->datum 461 | (expand '(module an-example-module '#%kernel 462 | "hello" 463 | "world"))) 464 | ] 465 | Ignore, for the moment, the use of @racket[syntax->datum] or the funky use of @racket['#%kernel]. 466 | What we should notice 467 | is that Racket has added in that @racket[#%module-begin] around the @racket["hello"] and @racket["world"]. 468 | So there's the implicit wrapping that Racket is doing. 469 | 470 | It turns out that @racket[#%module-begin] can be really useful! In particular, 471 | we want to guarantee that every module written in @tt{brainf*ck} runs under a fresh state. If 472 | we had two @tt{brainf*ck} programs running, say like this: 473 | @racketblock[(require "my-first-bf-program.rkt") 474 | (require "my-second-bf-program.rkt")] 475 | then it would be a shame to have the two programs clash just because they @tt{brainf*ck}ed each other's data! 476 | By defining our own @racket[#%module-begin], we can ensure that each @tt{brainf*ck} module has 477 | its own fresh version of the state, and our definition of @racket[my-module-begin] 478 | does this for us. 479 | 480 | 481 | 482 | Once we've written @filepath{language.rkt}, we can use the language 483 | like this: 484 | @codeblock|{ 485 | #lang s-exp (planet dyoo/bf/language) 486 | 487 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) 488 | (brackets 489 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) 490 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) 491 | (plus)(plus)(plus) (greater-than) (plus)(plus)(plus) 492 | (greater-than) (plus) (less-than)(less-than)(less-than) 493 | (less-than) (minus)) 494 | (greater-than) (plus)(plus) (period) 495 | (greater-than) (plus) (period) 496 | (plus)(plus)(plus)(plus)(plus) (plus)(plus) (period) 497 | (period) (plus)(plus)(plus) (period) 498 | (greater-than) (plus)(plus) (period) 499 | (less-than)(less-than) (plus)(plus)(plus)(plus)(plus) 500 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) 501 | (period) (greater-than) (period) 502 | (plus)(plus)(plus) (period) 503 | (minus)(minus)(minus)(minus)(minus)(minus)(period) 504 | (minus)(minus)(minus)(minus)(minus)(minus)(minus)(minus) 505 | (period)(greater-than) (plus) (period) (greater-than) (period) 506 | }| 507 | 508 | The @litchar{#lang} line here is saying, essentially, that the following program 509 | is written with s-expressions, and should be treated with the module language @filepath{language.rkt} 510 | that we just wrote up. And if we run this program, we should see a familiar greeting. 511 | Hurrah! 512 | 513 | 514 | ... But wait! We can't just declare victory here. We really do want 515 | to allow the throngs of @tt{brainf*ck} programmers to write @tt{brainf*ck} in the surface syntax that 516 | they deserve. 517 | Keep @filepath{language.rkt} on hand, though. We will reuse it by having our 518 | parser transform the surface syntax into the forms we defined in @filepath{language.rkt}. 519 | 520 | 521 | Let's get that parser working! 522 | 523 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 524 | @section{Parsing the surface syntax} 525 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 526 | 527 | The Racket toolchain includes a professional-strength lexer and parser 528 | in the @link["http://docs.racket-lang.org/parser-tools/index.html"]{parser-tools} collection. 529 | For the sake of keeping this example terse, we'll 530 | write a simple @link["http://en.wikipedia.org/wiki/Recursive_descent_parser"]{recursive-descent parser} without using the parser-tools collection. (But if our surface 531 | syntax were any more complicated, we might reconsider this decision.) 532 | 533 | The expected output of a successful parse should be some kind of abstract syntax tree. What representation 534 | should we use for the tree? Although we can use s-expressions, 535 | they're pretty lossy: they don't record where they came from 536 | in the original source text. For the case of @tt{brainf*ck}, we might not care, 537 | but if we were to write a parser for a more professional, 538 | sophisticated language (like @link["http://lolcode.com/"]{LOLCODE}) we 539 | want source locations so we can give good error messages during parsing or run-time. 540 | 541 | As an alternative to plain s-expressions, we'll use a data structure built into Racket called a 542 | @link["http://docs.racket-lang.org/guide/stx-obj.html"]{syntax object}; syntax objects let 543 | us represent ASTs, just like s-expressions, and they also carry along auxiliary 544 | information, such as source locations. Plus, as we briefly saw in our play with @racket[expand], syntax objects are the native data structure that Racket 545 | itself uses during macro expansion, so we might as well use them ourselves. 546 | 547 | For example, 548 | @interaction[#:eval my-evaluator 549 | (define an-example-syntax-object 550 | (datum->syntax #f 'hello (list "hello.rkt" 551 | 1 552 | 20 553 | 32 554 | 5)))] 555 | The first argument that we pass into @racket[datum->syntax] lets us tell Racket any 556 | lexical-scoping information that we know about the datum, but in this case, we don't have 557 | any on hand, so we just give it @racket[#f]. Let's look at the structure of this syntax object. 558 | @interaction[#:eval my-evaluator 559 | an-example-syntax-object 560 | (syntax? an-example-syntax-object) 561 | (syntax->datum an-example-syntax-object) 562 | (symbol? (syntax->datum an-example-syntax-object)) 563 | ] 564 | So a syntax object is a wrapper around an s-expression, and we can get the underlying datum by using @racket[syntax->datum]. 565 | Furthermore, this object remembers where it came from, and that it was on line 1, column 20, position 32, and was five characters long: 566 | @interaction[#:eval my-evaluator 567 | (syntax-source an-example-syntax-object) 568 | (syntax-line an-example-syntax-object) 569 | (syntax-column an-example-syntax-object) 570 | (syntax-position an-example-syntax-object) 571 | (syntax-span an-example-syntax-object) 572 | ] 573 | 574 | 575 | Now that we have some experience playing with syntax objects, let's write a parser. 576 | Our parser will consume an @link["http://docs.racket-lang.org/reference/ports.html"]{input-port}, 577 | from which we can read in bytes with @racket[read-byte], or find out where we are with @racket[port-next-location]. We also want to store some record of where our program originated from, 578 | so our parser will also take in a @racket[source-name] parameter. 579 | We'll write the following into @filepath{parser.rkt}. 580 | @filebox["parser.rkt"]{ 581 | @codeblock|{ 582 | #lang racket 583 | ;; The only visible export of this module will be parse-expr. 584 | (provide parse-expr) 585 | 586 | ;; parse-expr: any input-port -> (U syntax eof) 587 | ;; Either produces a syntax object or the eof object. 588 | (define (parse-expr src in) 589 | (define-values (line column position) (port-next-location in)) 590 | (define next-char (read-char in)) 591 | 592 | ;; decorate: s-expression number -> syntax 593 | ;; Wrap the s-expression with source location. 594 | (define (decorate sexp span) 595 | (datum->syntax #f sexp (list src line column position span))) 596 | 597 | (cond 598 | [(eof-object? next-char) eof] 599 | [else 600 | (case next-char 601 | [(#\<) (decorate '(less-than) 1)] 602 | [(#\>) (decorate '(greater-than) 1)] 603 | [(#\+) (decorate '(plus) 1)] 604 | [(#\-) (decorate '(minus) 1)] 605 | [(#\,) (decorate '(comma) 1)] 606 | [(#\.) (decorate '(period) 1)] 607 | [(#\[) 608 | ;; The slightly messy case is bracket. We keep reading 609 | ;; a list of exprs, and then construct a wrapping bracket 610 | ;; around the whole thing. 611 | (define elements (parse-exprs src in)) 612 | (define-values (l c tail-position) 613 | (port-next-location in)) 614 | (decorate `(brackets ,@elements) 615 | (- tail-position position))] 616 | [else 617 | (parse-expr src in)])])) 618 | 619 | ;; parse-exprs: input-port -> (listof syntax) 620 | ;; Parse a list of expressions. 621 | (define (parse-exprs source-name in) 622 | (define peeked-char (peek-char in)) 623 | (cond 624 | [(eof-object? peeked-char) 625 | (error 'parse-exprs "Expected ], but read eof")] 626 | [(char=? peeked-char #\]) 627 | (read-char in) 628 | empty] 629 | [(member peeked-char (list #\< #\> #\+ #\- #\, #\. #\[)) 630 | (cons (parse-expr source-name in) 631 | (parse-exprs source-name in))] 632 | [else 633 | (read-char in) 634 | (parse-exprs source-name in)])) 635 | }|} 636 | This parser isn't anything too tricky, although there's a little bit of 637 | messiness because it needs to handle brackets recursively. That part 638 | is supposed to be a little messy anyway, since it's the capstone that builds tree structure out 639 | of a linear character stream. (If we were using a parenthesized language, we 640 | could simply use @racket[read-syntax], but the whole point is to deal 641 | with the messiness of the surface syntax!) 642 | 643 | Let's see if this parser does anything useful: 644 | @interaction[#:eval my-evaluator 645 | (define my-sample-input-port (open-input-string ",[.,]")) 646 | (port-count-lines! my-sample-input-port) 647 | (define first-stx 648 | (parse-expr "my-sample-program.rkt" my-sample-input-port)) 649 | first-stx 650 | (define second-stx 651 | (parse-expr "my-sample-program.rkt" my-sample-input-port)) 652 | second-stx 653 | (parse-expr "my-sample-program.rkt" my-sample-input-port)] 654 | Good! So we're able to parse syntax objects out of an input stream. 655 | @interaction[#:eval my-evaluator 656 | (syntax->datum second-stx) 657 | (syntax-source second-stx) 658 | (syntax-position second-stx) 659 | (syntax-span second-stx)] 660 | And as we can see, we can explode the syntax object and look at its datum. We should note 661 | that the parser is generating syntax objects that use the same names as the defined names we 662 | have in our @filepath{language.rkt} module language. Yup, that's deliberate, and we'll see why in 663 | the next section. 664 | 665 | 666 | We mentioned that the parser wasn't too hard... but then again, we haven't written good traps 667 | for error conditions. This parser is a baby parser. 668 | If we were more rigorous, we'd probably implement it with the parser-tools collection, 669 | write unit tests for the parser with @racketmodname[rackunit], and 670 | make sure to produce good error messages when Bad Things happen 671 | (like having unbalanced brackets or parentheses. 672 | @;; Yes, the unbalanced parentheses here is a joke. I wonder if anyone will correct me for it. 673 | 674 | 675 | 676 | Still, we've now got the language and a parser. How do we tie them together? 677 | 678 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 679 | @section{Crossing the wires} 680 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 681 | 682 | This part is fairly straightforward. We have two pieces in hand: 683 | @itemlist[@item{A parser in @filepath{parser.rkt} for the surface syntax that produces ASTs} 684 | @item{A module language in @filepath{language.rkt} that provides the meaning for those ASTs.} 685 | ] 686 | To combine these two pieces together, we want to define a @link["http://docs.racket-lang.org/guide/hash-lang_reader.html"]{reader} that associates the two. 687 | When Racket encounters a @litchar{#lang} line of the form: 688 | @codeblock{ 689 | #lang planet dyoo/bf 690 | } 691 | it will look for a reader module in @filepath{lang/reader.rkt} and use it to parse the file. 692 | 693 | Racket provides a helper module called @racketmodname[syntax/module-reader] to handle most of the 694 | dirty work; let's use it. Make a @filepath{lang/} subdirectory, and create @filepath{reader.rkt} 695 | in that subdirectory, with the following content: 696 | @filebox["lang/reader.rkt"]{ 697 | @codeblock|{ 698 | #lang s-exp syntax/module-reader 699 | (planet dyoo/bf/language) 700 | #:read my-read 701 | #:read-syntax my-read-syntax 702 | 703 | (require "../parser.rkt") 704 | 705 | (define (my-read in) 706 | (syntax->datum (my-read-syntax #f in))) 707 | 708 | (define (my-read-syntax src in) 709 | (parse-expr src in)) 710 | }|} 711 | Some of this is magic, so let's step through this. The second line of the file tells @racketmodname[syntax/module-reader] that any syntax objects that 712 | come out are intended to take on their semantics from our language module @filepath{language.rkt}. @racketmodname[syntax/module-reader] 713 | is predisposed to assume that programs are read using @racket[read] and @racket[read-syntax], so we 714 | override that default and plug in our @racket[parse-expr] function into place. 715 | 716 | 717 | Now that we have all these pieces together, does any of this work? Let's try it! 718 | @verbatim|{ 719 | $ cat hello2.rkt 720 | #lang planet dyoo/bf 721 | ++++++[>++++++++++++<-]>. 722 | >++++++++++[>++++++++++<-]>+. 723 | +++++++..+++.>++++[>+++++++++++<-]>. 724 | <+++[>----<-]>.<<<<<+++[>+++++<-]>. 725 | >>.+++.------.--------.>>+. 726 | 727 | $ racket hello2.rkt 728 | Hello, World! 729 | }| 730 | 731 | Sweet, sweet words. 732 | 733 | 734 | 735 | 736 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 737 | @section{Landing on PLaneT} 738 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 739 | 740 | Finally, we want to get this work onto @link["http://docs.racket-lang.org/planet/index.html"]{PLaneT} so that other people can share in the joy 741 | of writing @tt{brainf*ck} in Racket. Let's do it! 742 | 743 | 744 | First, let's go back to the parent of our work directory. Once we're there, we'll use the @tt{planet create} command. 745 | 746 | @verbatim|{ 747 | $ planet create bf 748 | planet create bf 749 | MzTarring ./... 750 | MzTarring ./lang... 751 | 752 | WARNING: 753 | Package has no info.rkt file. This means it will not have a description or documentation on the PLaneT web site. 754 | 755 | $ ls -l bf.plt 756 | -rw-rw-r-- 1 dyoo nogroup 3358 Jun 12 19:39 bf.plt 757 | }| 758 | 759 | There are a few warnings, because we haven't defined an @filepath{info.rkt} which provides metadata 760 | about our package. Good, diligent citizens would @link["http://docs.racket-lang.org/planet/Developing_Packages_for_PLaneT.html#(part._.Create_an__info_rkt__.File__.Optional_)"]{write an @filepath{info.rkt} file}, so let's write one. 761 | @filebox["info.rkt"]{ 762 | @codeblock|{ 763 | #lang setup/infotab 764 | (define name "bf: a brainf*ck compiler for Racket") 765 | (define categories '(devtools)) 766 | (define can-be-loaded-with 'all) 767 | (define required-core-version "5.1.1") 768 | (define version "1.0") 769 | (define repositories '("4.x")) 770 | (define scribblings '()) 771 | (define primary-file "language.rkt") 772 | (define blurb 773 | '("Provides support for the brainf*ck language.")) 774 | (define release-notes 775 | '((p "First release"))) 776 | }|} 777 | 778 | 779 | 780 | 781 | Before we upload the package, let's make sure the @filepath{bf.plt} package works for us locally. We'll simulate an installation. First, let's break the development link. 782 | @verbatim{ 783 | $ planet unlink dyoo bf.plt 1 0 784 | } 785 | If we try running our test program from before, it should fail on us. 786 | 787 | @verbatim{ 788 | $ racket hello2.rkt 789 | require: PLaneT could not find the requested package: Server had no matching package: No package matched the specified criteria 790 | } 791 | Ok, that was expected. Since we've dissolved the development link, and since we haven't uploaded the 792 | package onto the PLaneT network yet, we see the error that we expect to see. 793 | 794 | Next, let's use @tt{planet fileinject} to simulate an installation of our package from PLaneT. 795 | @verbatim|{ 796 | $ planet fileinject dyoo bf.plt 1 0 797 | planet fileinject dyoo bf.plt 1 0 798 | 799 | ============= Installing bf.plt on Sun, 12 Jun 2011 19:49:50 ============= 800 | raco setup: Unpacking archive from /home/dyoo/bf.plt 801 | ... 802 | }| 803 | Lots and lots of output later, the package should be installed. 804 | 805 | If we try running our test program again... 806 | @verbatim{ 807 | $ racket hello2.rkt 808 | Hello, World! 809 | } 810 | Good! This simulates the situation where the package has been installed from PLaneT. 811 | 812 | 813 | Once we're finally satisfied with the package's contents, we can finally upload it onto PLaneT. 814 | If you log onto @link["http://planet.racket-lang.org"]{planet.racket-lang.org}, 815 | the user interface will allow 816 | you to upload your @filepath{bf.plt} package. 817 | 818 | 819 | 820 | 821 | @section{Acknowledgements} 822 | 823 | An extended version of this tutorial can be found at 824 | @url{http://hashcollision.org/brainfudge}. 825 | 826 | Very special thanks to @link["http://www.cs.brown.edu/~sk/"]{Shriram 827 | Krishnamurthi} for being understanding when I told him I had coded a 828 | @tt{brainf*ck} compiler. Basically, everyone in the Racket community 829 | (like Mark Engelberg, Eric Hanchrow, Eli Barzilay, Matthew Flatt, 830 | Robby Findler, and others that I'm blanking out on...) have been 831 | wonderful. The 832 | @link["http://lists.racket-lang.org/users/archive/2011-June/046090.html"]{mailing 833 | list thread} shows how many people have helped to shape this tutorial. 834 | 835 | Guillaume Marceau, Rodolfo Carvalho, Eric Hanchrow, and Shriram helped 836 | with grammar and spelling checks. Casey Klein suggested a section in 837 | the tutorial that shows how we can generate errors that point to 838 | original sources, and Eli Barzilay pushed on including an optimization 839 | section. The extended tutorial includes both of these topics. 840 | 841 | Finally, big shoutouts to the PLT group at 842 | Brown University --- this one is for you guys. :) 843 | @;; Ha! Closing parentheses. 844 | -------------------------------------------------------------------------------- /manual.scrbl: -------------------------------------------------------------------------------- 1 | #lang scribble/manual 2 | 3 | @(require planet/scribble 4 | planet/version 5 | planet/resolver 6 | scribble/eval 7 | racket/sandbox 8 | (for-label racket) 9 | (for-label racket/stxparam) 10 | (for-label racket/unsafe/ops) 11 | "scribble-helpers.rkt") 12 | 13 | @inject-css{extra.css} 14 | 15 | @inject-javascript|{ 16 | var _gaq = _gaq || []; 17 | _gaq.push(['_setAccount', 'UA-24146890-1']); 18 | _gaq.push(['_trackPageview']); 19 | 20 | (function() { 21 | var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; 22 | ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; 23 | var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); 24 | })(); 25 | }| 26 | 27 | 28 | @title{F*dging up a Racket} 29 | @author+email["Danny Yoo" "dyoo@hashcollision.org"] 30 | 31 | 32 | @;; I'll need an evaluator for some small examples. 33 | @(define my-evaluator 34 | (call-with-trusted-sandbox-configuration 35 | (lambda () 36 | (parameterize ([sandbox-output 'string] 37 | [sandbox-error-output 'string]) 38 | (make-evaluator 'racket 39 | #:requires 40 | (list (resolve-planet-path 41 | `(planet dyoo/bf/parser)))))))) 42 | 43 | 44 | @centered{@smaller{Source code can be found at: 45 | @url{https://github.com/dyoo/brainfudge}. The latest version of this 46 | document lives in @url{http://hashcollision.org/brainfudge}.}} 47 | 48 | 49 | @section{Introduction} 50 | 51 | If people say that @link["http://racket-lang.org"]{Racket} is just a 52 | @link["http://en.wikipedia.org/wiki/Scheme_(programming_language)"]{Scheme}, 53 | they are short-selling Racket a little. It's more accurate to say 54 | that Racket is a @link["http://docs.racket-lang.org/guide/languages.html"]{language} laboratory, with support for many different 55 | languages. 56 | 57 | Is that really true? Racket does include a nice 58 | @link["http://docs.racket-lang.org/guide/macros.html"]{macro} system, 59 | which allows a programmer to add in new language constructs. For 60 | example, we can get while loops into Racket with relative ease: 61 | @codeblock{ 62 | #lang racket 63 | (define-syntax-rule (while test body ...) 64 | (local [(define (loop) 65 | (when test 66 | body ... 67 | (loop)))] 68 | (loop))) 69 | ;; From this point forward, we've got while loops. 70 | (while (not (string=? (read-line) "quit")) 71 | (printf "never going to give you up\n") 72 | (printf "never going to let you down\n")) 73 | } 74 | So we can certainly extend the language. But this still looks just 75 | like a Scheme. 76 | 77 | 78 | 79 | Let's take a closer look at a Racket program. Every Racket 80 | program begins with a funny line at the very top that, on first 81 | glance, looks redundant: 82 | @codeblock{ 83 | #lang racket 84 | } 85 | Why in the world does a Racket program need to say that it's a Racket 86 | program? Isn't that obvious? 87 | 88 | 89 | 90 | We can understand the situation better by looking at another 91 | environment on our desktop, namely the web browser. A web browser 92 | supports different kinds of HTML variants, since HTML is a moving 93 | target, and browsers have come up with @link["http://en.wikipedia.org/wiki/Quirks_mode"]{crazy rules} for figuring out 94 | how to take an arbitrary document and decide what HTML parsing rules 95 | to apply to it. 96 | 97 | 98 | @link["http://en.wikipedia.org/wiki/HTML5"]{HTML 5} tries to make this determination 99 | somewhat more straightforward: we can define an HTML 5 document by 100 | putting a DOCTYPE element at the very top of the file which 101 | self-describes the document as being @emph{html}. 102 | 103 | @verbatim{ 104 | 105 | 106 |Hello world!
108 | 109 | } 110 | 111 | 112 | Going back to the world of Racket, we see by analogy that the @litchar{#lang} 113 | line in a Racket program is a self-description of how to treat the 114 | rest of the program. (Actually, the @litchar{#lang} line is quite bit more 115 | active than this, but we'll get to this in a moment.) 116 | 117 | 118 | The @racketmodname[racket] part in the @litchar{#lang} line isn't inevitable: the main Racket 119 | distribution, in fact, comes bundled with several languages which can 120 | take the place of the word @racketmodname[racket]. Many of these languages 121 | (@racketmodname[racket/base], @racketmodname[typed/racket], @racketmodname[lazy]) still look like Racket... but some 122 | of them don't. Here's one example: 123 | @codeblock{ 124 | #lang datalog 125 | ancestor(A, B) :- parent(A, B). 126 | ancestor(A, B) :- 127 | parent(A, C), D = C, ancestor(D, B). 128 | parent(john, douglas). 129 | parent(bob, john). 130 | ancestor(A, B)? 131 | } 132 | This is an example of a @link["http://en.wikipedia.org/wiki/Datalog"]{Datalog} 133 | program that deals with logical relations. Neat! 134 | 135 | 136 | What might be surprising is that the mechanism for using different 137 | languages in Racket is wide open. Let's expand our minds. 138 | @codeblock{ 139 | #lang planet dyoo/bf 140 | ++++++[>++++++++++++<-]>. 141 | >++++++++++[>++++++++++<-]>+. 142 | +++++++..+++.>++++[>+++++++++++<-]>. 143 | <+++[>----<-]>.<<<<<+++[>+++++<-]>. 144 | >>.+++.------.--------.>>+. 145 | } 146 | This language does not look like Racket. It looks like line 147 | noise. This is 148 | @link["http://en.wikipedia.org/wiki/Brainf*ck"]{@tt{brainf*ck}}. Although 149 | this language is not included in the main distribution, because it is 150 | on @link["http://planet.racket-lang.org"]{PLaneT}, anyone with Racket 151 | can easily play with it. 152 | 153 | 154 | Ignoring the question of @emph{why?!!} someone would do this, let's ask another: 155 | how do we build this? This tutorial will cover how to build this language 156 | into Racket from scratch. 157 | 158 | 159 | Let's get started! 160 | 161 | 162 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 163 | @section{The view from high orbit} 164 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 165 | 166 | We want to teach Racket what it means when we say something like: 167 | @codeblock|{ 168 | #lang planet dyoo/bf 169 | ,[.,] 170 | }| 171 | 172 | As mentioned earlier, a @litchar{#lang} line is quite active: it tells the Racket runtime how to 173 | convert from the surface syntax to a meaningful program. Programs in Racket get digested 174 | in a few stages; the process looks something like this: 175 | 176 | @verbatim|{ 177 | reader macro expansion 178 | surface syntax ---------> AST -----------------> core forms 179 | }| 180 | 181 | When Racket sees 182 | @litchar{#lang planet dyoo/bf}, it will look for a particular module that we call a @emph{reader}; 183 | a reader consumes surface syntax and excretes ASTs, and these ASTs are then 184 | annotated so that Racket knows how to make sense out of them later on. 185 | At this point, the rest of the Racket infrastructure kicks in and macro-expands the ASTs out, ultimately, 186 | to a @link["http://docs.racket-lang.org/reference/syntax-model.html#(part._fully-expanded)"]{core} language. 187 | 188 | 189 | So here's what we'll do: 190 | @itemlist[ 191 | @item{Capture the meaning of @tt{brainf*ck} by writing a semantics module.} 192 | @item{Go from the line noise of the surface syntax into a more structured form 193 | by writing a parser module.} 194 | @item{Connect the pieces, the semantics and the surface syntax parser, 195 | by making a reader module.} 196 | @item{Profit!}] 197 | 198 | 199 | 200 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 201 | @section{Flight preparations} 202 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 203 | Since we're starting from scratch, let's first make a work directory 204 | where we'll keep our source code. I'll call the directory @filepath{bf/}, but you can use 205 | whatever name you want. 206 | @verbatim|{ 207 | $ mkdir bf 208 | }| 209 | 210 | Ultimately, we want to put the fruit of our labor onto @link["http://docs.racket-lang.org/planet/index.html"]{PLaneT}, 211 | since that'll make it easier for others to use our work. 212 | Let's set up a @link["http://docs.racket-lang.org/planet/Developing_Packages_for_PLaneT.html#(part._devlinks)"]{PLaneT development link} so the Racket environment knows about our work directory. I already have an account 213 | on PLaneT with my username @tt{dyoo}. You can 214 | @link["http://planet.racket-lang.org/add.ss"]{get an account} fairly easily. 215 | 216 | If we enter the following at the command line, 217 | @verbatim|{ 218 | $ raco planet link dyoo bf.plt 1 0 bf 219 | }| 220 | we'll make a development link that will associate any module path of the form @racket[(planet dyoo/bf/...)] 221 | to our local @filepath{bf/} directory. Later on, when we create a package and upload it to PLaneT, 222 | we can drop this development link, and then all the references that use @racket[(planet dyoo/bf/...)] will 223 | immediately switch over to the one on the PLaneT server. 224 | 225 | 226 | But does the link actually work? Let's write a very simple module in our work directory, and 227 | then see that Racket can find it through PLaneT. 228 | @verbatim|{ 229 | $ cd bf 230 | ~/bf$ cat >hello.rkt 231 | #lang racket 232 | "hello world" 233 | }| 234 | Ok, let's see if Racket can find our magnificent @filepath{hello.rkt} module if we use the PLaneTized version of the name. 235 | @verbatim|{ 236 | ~/bf$ racket 237 | Welcome to Racket v5.2. 238 | > (require (planet dyoo/bf/hello)) 239 | "hello world" 240 | > 241 | }| 242 | If we get to this point, then we've got the PLaneT development link in place. 243 | 244 | 245 | 246 | 247 | 248 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 249 | @section{The @tt{brainf*ck} language} 250 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 251 | 252 | When we look at the definition of @link["http://en.wikipedia.org/wiki/Brainf*ck"]{@tt{brainf*ck}}, 253 | it's actually not too bad. There's two bits of state, 254 | @itemlist[ 255 | @item{a byte array of data, and} 256 | @item{a pointer into that data array} 257 | ] 258 | and it has only a few operations that affect this state: 259 | @itemlist[ 260 | @item{Increment the data pointer (@litchar{>})} 261 | @item{Decrement the data pointer (@litchar{<})} 262 | @item{Increment the byte at the data pointer (@litchar{+})} 263 | @item{Decrement the byte at the data pointer (@litchar{-})} 264 | @item{Write a byte to standard output (@litchar{.})} 265 | @item{Read a byte from standard input (@litchar{,})} 266 | @item{Perform a loop until the byte at the data pointer is zero (@litchar{[}, @litchar{]})} 267 | ] 268 | Let's write a module that lets us play with such a system: let's call it @filepath{semantics.rkt}. 269 | 270 | @filebox["semantics.rkt"]{ 271 | @codeblock|{ 272 | #lang racket 273 | 274 | (require rackunit) ;; for unit testing 275 | (provide (all-defined-out)) 276 | 277 | 278 | ;; Our state contains two pieces. 279 | (define-struct state (data ptr) 280 | #:mutable) 281 | 282 | ;; Creates a new state, with a byte array of 30000 zeros, and 283 | ;; the pointer at index 0. 284 | (define (new-state) 285 | (make-state (make-vector 30000 0) 286 | 0)) 287 | 288 | ;; increment the data pointer 289 | (define (increment-ptr a-state) 290 | (set-state-ptr! a-state (add1 (state-ptr a-state)))) 291 | 292 | ;; decrement the data pointer 293 | (define (decrement-ptr a-state) 294 | (set-state-ptr! a-state (sub1 (state-ptr a-state)))) 295 | 296 | ;; increment the byte at the data pointer 297 | (define (increment-byte a-state) 298 | (define v (state-data a-state)) 299 | (define i (state-ptr a-state)) 300 | (vector-set! v i (add1 (vector-ref v i)))) 301 | 302 | ;; decrement the byte at the data pointer 303 | (define (decrement-byte a-state) 304 | (define v (state-data a-state)) 305 | (define i (state-ptr a-state)) 306 | (vector-set! v i (sub1 (vector-ref v i)))) 307 | 308 | ;; print the byte at the data pointer 309 | (define (write-byte-to-stdout a-state) 310 | (define v (state-data a-state)) 311 | (define i (state-ptr a-state)) 312 | (write-byte (vector-ref v i) (current-output-port))) 313 | 314 | ;; read a byte from stdin into the data pointer 315 | (define (read-byte-from-stdin a-state) 316 | (define v (state-data a-state)) 317 | (define i (state-ptr a-state)) 318 | (vector-set! v i (read-byte (current-input-port)))) 319 | 320 | 321 | ;; we know how to do loops! 322 | (define-syntax-rule (loop a-state body ...) 323 | (local [(define (loop) 324 | (unless (= (vector-ref (state-data a-state) (state-ptr a-state)) 325 | 0) 326 | body ... 327 | (loop)))] 328 | (loop))) 329 | }|} 330 | 331 | Ok, that doesn't seem too bad. But of course, we should test this; let's use 332 | the @racketmodname[rackunit] unit testing framework and tickle this code. Let's add 333 | a little more to the end of @filepath{semantics.rkt}. 334 | @filebox["semantics.rkt"]{ 335 | @codeblock|{ 336 | ;; Simple exercises. 337 | (let ([s (new-state)]) 338 | (increment-byte s) 339 | (check-equal? 1 (vector-ref (state-data s) 0)) 340 | (increment-byte s) 341 | (check-equal? 2 (vector-ref (state-data s) 0)) 342 | (decrement-byte s) 343 | (check-equal? 1 (vector-ref (state-data s) 0))) 344 | 345 | ;; pointer movement 346 | (let ([s (new-state)]) 347 | (increment-ptr s) 348 | (increment-byte s) 349 | (check-equal? 0 (vector-ref (state-data s) 0)) 350 | (check-equal? 1 (vector-ref (state-data s) 1)) 351 | (decrement-ptr s) 352 | (increment-byte s) 353 | (check-equal? 1 (vector-ref (state-data s) 0)) 354 | (check-equal? 1 (vector-ref (state-data s) 1))) 355 | 356 | ;; make sure standard input is doing something 357 | (let ([s (new-state)]) 358 | (parameterize ([current-input-port 359 | (open-input-bytes (bytes 3 1 4))]) 360 | (read-byte-from-stdin s) 361 | (increment-ptr s) 362 | (read-byte-from-stdin s) 363 | (increment-ptr s) 364 | (read-byte-from-stdin s)) 365 | (check-equal? 3 (vector-ref (state-data s) 0)) 366 | (check-equal? 1 (vector-ref (state-data s) 1)) 367 | (check-equal? 4 (vector-ref (state-data s) 2))) 368 | 369 | 370 | ;; make sure standard output is doing something 371 | (let ([s (new-state)]) 372 | (set-state-data! s (vector 80 76 84)) 373 | (let ([simulated-stdout (open-output-string)]) 374 | (parameterize ([current-output-port simulated-stdout]) 375 | (write-byte-to-stdout s) 376 | (increment-ptr s) 377 | (write-byte-to-stdout s) 378 | (increment-ptr s) 379 | (write-byte-to-stdout s)) 380 | (check-equal? "PLT" (get-output-string simulated-stdout)))) 381 | 382 | 383 | ;; Let's see that we can clear. 384 | (let ([s (new-state)]) 385 | (set-state-data! s (vector 0 104 101 108 112 109 101 105 386 | 109 109 101 108 116 105 110 103)) 387 | (set-state-ptr! s 15) 388 | ;; [ [-] < ] 389 | (loop s 390 | (loop s (decrement-byte s)) 391 | (decrement-ptr s)) 392 | 393 | (check-equal? 0 (state-ptr s)) 394 | (check-equal? (make-vector 16 0) (state-data s))) 395 | }|} 396 | 397 | Good! Our tests, at the very least, let us know that our definitions are 398 | doing something reasonable, and they should all pass. 399 | 400 | 401 | However, there are a few things that we may want to fix in 402 | the future, like the lack 403 | of error trapping if the input stream contains @racket[eof]. And there's no bounds-checking 404 | on the @racket[ptr] or on the values in the data. Wow, there are quite a few things that we might want 405 | to fix. But at the very least, we now have a module that captures the semantics of @tt{brainf*ck}. 406 | 407 | 408 | 409 | @section{Lisping a language} 410 | 411 | We might even be cheeky enough to insist that people write @tt{brainf*ck} programs with s-expressions. 412 | Let's take that route, and create a @link["http://docs.racket-lang.org/guide/module-languages.html"]{module language} 413 | that uses our @filepath{semantics.rkt}. We'll create such a module language in @filepath{language.rkt}. 414 | @filebox["language.rkt"]{ 415 | @codeblock|{ 416 | #lang racket 417 | 418 | (require "semantics.rkt") 419 | 420 | (provide greater-than 421 | less-than 422 | plus 423 | minus 424 | period 425 | comma 426 | brackets 427 | (rename-out [my-module-begin #%module-begin])) 428 | 429 | ;; The current-state is a parameter used by the 430 | ;; rest of this language. 431 | (define current-state (make-parameter (new-state))) 432 | 433 | ;; Every module in this language will make sure that it 434 | ;; uses a fresh state. 435 | (define-syntax-rule (my-module-begin body ...) 436 | (#%plain-module-begin 437 | (parameterize ([current-state (new-state)]) 438 | body ...))) 439 | 440 | (define-syntax-rule (greater-than) 441 | (increment-ptr (current-state))) 442 | 443 | (define-syntax-rule (less-than) 444 | (decrement-ptr (current-state))) 445 | 446 | (define-syntax-rule (plus) 447 | (increment-byte (current-state))) 448 | 449 | (define-syntax-rule (minus) 450 | (decrement-byte (current-state))) 451 | 452 | (define-syntax-rule (period) 453 | (write-byte-to-stdout (current-state))) 454 | 455 | (define-syntax-rule (comma) 456 | (read-byte-from-stdin (current-state))) 457 | 458 | (define-syntax-rule (brackets body ...) 459 | (loop (current-state) body ...)) 460 | }|} 461 | 462 | 463 | This @filepath{language.rkt} presents @tt{brainf*ck} as a s-expression-based language. 464 | It uses the semantics we've coded up, and defines rules for handling 465 | @racket[greater-than], @racket[less-than], etc... We have a @link["http://docs.racket-lang.org/guide/parameterize.html"]{parameter} called @racket[current-state] 466 | that holds the state of the @tt{brainf*ck} machine that's used through the language. 467 | 468 | There's one piece of this language that looks particularly mysterious: what's the @racket[#%module-begin] form, 469 | and what is it doing? In Racket, every 470 | module has an implicit @racket[#%module-begin] that wraps around the entirety of the module's body. 471 | We can see this by asking Racket to show us the results of the expansion process; 472 | here's a small example to demonstrate. 473 | @interaction[#:eval my-evaluator 474 | (syntax->datum 475 | (expand '(module an-example-module '#%kernel 476 | "hello" 477 | "world"))) 478 | ] 479 | Ignore, for the moment, the use of @racket[syntax->datum] or the funky use of @racket['#%kernel]. 480 | What we should notice 481 | is that Racket has added in that @racket[#%module-begin] around the @racket["hello"] and @racket["world"]. 482 | So there's the implicit wrapping that Racket is doing. 483 | 484 | It turns out that @racket[#%module-begin] can be really useful! In particular, 485 | we want to guarantee that every module written in @tt{brainf*ck} runs under a fresh state. If 486 | we had two @tt{brainf*ck} programs running, say like this: 487 | @racketblock[(require "my-first-bf-program.rkt") 488 | (require "my-second-bf-program.rkt")] 489 | then it would be a shame to have the two programs clash just because they @tt{brainf*ck}ed each other's data! 490 | By defining our own @racket[#%module-begin], we can ensure that each @tt{brainf*ck} module has 491 | its own fresh version of the state, and our definition of @racket[my-module-begin] 492 | does this for us. 493 | 494 | 495 | 496 | Once we've written @filepath{language.rkt}, we can use the language 497 | like this: 498 | @codeblock|{ 499 | #lang s-exp (planet dyoo/bf/language) 500 | 501 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) 502 | (brackets 503 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) 504 | (greater-than) (plus)(plus)(plus)(plus)(plus) (plus)(plus) 505 | (plus)(plus)(plus) (greater-than) (plus)(plus)(plus) 506 | (greater-than) (plus) (less-than)(less-than)(less-than) 507 | (less-than) (minus)) 508 | (greater-than) (plus)(plus) (period) 509 | (greater-than) (plus) (period) 510 | (plus)(plus)(plus)(plus)(plus) (plus)(plus) (period) 511 | (period) (plus)(plus)(plus) (period) 512 | (greater-than) (plus)(plus) (period) 513 | (less-than)(less-than) (plus)(plus)(plus)(plus)(plus) 514 | (plus)(plus)(plus)(plus)(plus) (plus)(plus)(plus)(plus)(plus) 515 | (period) (greater-than) (period) 516 | (plus)(plus)(plus) (period) 517 | (minus)(minus)(minus)(minus)(minus)(minus)(period) 518 | (minus)(minus)(minus)(minus)(minus)(minus)(minus)(minus) 519 | (period)(greater-than) (plus) (period) (greater-than) (period) 520 | }| 521 | 522 | The @litchar{#lang} line here is saying, essentially, that the following program 523 | is written with s-expressions, and should be treated with the module language @filepath{language.rkt} 524 | that we just wrote up. And if we run this program, we should see a familiar greeting. 525 | Hurrah! 526 | 527 | 528 | ... But wait! We can't just declare victory here. We really do want 529 | to allow the throngs of @tt{brainf*ck} programmers to write @tt{brainf*ck} in the surface syntax that 530 | they deserve. 531 | Keep @filepath{language.rkt} on hand, though. We will reuse it by having our 532 | parser transform the surface syntax into the forms we defined in @filepath{language.rkt}. 533 | 534 | 535 | Let's get that parser working! 536 | 537 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 538 | @section{Parsing the surface syntax} 539 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 540 | 541 | The Racket toolchain includes a professional-strength lexer and parser 542 | in the @link["http://docs.racket-lang.org/parser-tools/index.html"]{parser-tools} collection. 543 | For the sake of keeping this example terse, we'll 544 | write a simple @link["http://en.wikipedia.org/wiki/Recursive_descent_parser"]{recursive-descent parser} without using the parser-tools collection. (But if our surface 545 | syntax were any more complicated, we might reconsider this decision.) 546 | 547 | The expected output of a successful parse should be some kind of abstract syntax tree. What representation 548 | should we use for the tree? Although we can use s-expressions, 549 | they're pretty lossy: they don't record where they came from 550 | in the original source text. For the case of @tt{brainf*ck}, we might not care, 551 | but if we were to write a parser for a more professional, 552 | sophisticated language (like @link["http://lolcode.com/"]{LOLCODE}) we 553 | want source locations so we can give good error messages during parsing or run-time. 554 | 555 | As an alternative to plain s-expressions, we'll use a data structure built into Racket called a 556 | @link["http://docs.racket-lang.org/guide/stx-obj.html"]{syntax object}; syntax objects let 557 | us represent ASTs, just like s-expressions, and they also carry along auxiliary 558 | information, such as source locations. Plus, as we briefly saw in our play with @racket[expand], syntax objects are the native data structure that Racket 559 | itself uses during macro expansion, so we might as well use them ourselves. 560 | 561 | For example, 562 | @interaction[#:eval my-evaluator 563 | (define an-example-syntax-object 564 | (datum->syntax #f 'hello (list "hello.rkt" 565 | 1 566 | 20 567 | 32 568 | 5)))] 569 | The first argument that we pass into @racket[datum->syntax] lets us tell Racket any 570 | lexical-scoping information that we know about the datum, but in this case, we don't have 571 | any on hand, so we just give it @racket[#f]. Let's look at the structure of this syntax object. 572 | @interaction[#:eval my-evaluator 573 | an-example-syntax-object 574 | (syntax? an-example-syntax-object) 575 | (syntax->datum an-example-syntax-object) 576 | (symbol? (syntax->datum an-example-syntax-object)) 577 | ] 578 | So a syntax object is a wrapper around an s-expression, and we can get the underlying datum by using @racket[syntax->datum]. 579 | Furthermore, this object remembers where it came from, and that it was on line 1, column 20, position 32, and was five characters long: 580 | @interaction[#:eval my-evaluator 581 | (syntax-source an-example-syntax-object) 582 | (syntax-line an-example-syntax-object) 583 | (syntax-column an-example-syntax-object) 584 | (syntax-position an-example-syntax-object) 585 | (syntax-span an-example-syntax-object) 586 | ] 587 | 588 | 589 | Now that we have some experience playing with syntax objects, let's write a parser. 590 | Our parser will consume an @link["http://docs.racket-lang.org/reference/ports.html"]{input-port}, 591 | from which we can read in bytes with @racket[read-byte], or find out where we are with @racket[port-next-location]. We also want to store some record of where our program originated from, 592 | so our parser will also take in a @racket[source-name] parameter. 593 | We'll write the following into @filepath{parser.rkt}. 594 | @filebox["parser.rkt"]{ 595 | @codeblock|{ 596 | #lang racket 597 | ;; The only visible export of this module will be parse-expr. 598 | (provide parse-expr) 599 | 600 | ;; parse-expr: any input-port -> (U syntax eof) 601 | ;; Either produces a syntax object or the eof object. 602 | (define (parse-expr src in) 603 | (define-values (line column position) (port-next-location in)) 604 | (define next-char (read-char in)) 605 | 606 | ;; decorate: s-expression number -> syntax 607 | ;; Wrap the s-expression with source location. 608 | (define (decorate sexp span) 609 | (datum->syntax #f sexp (list src line column position span))) 610 | 611 | (cond 612 | [(eof-object? next-char) eof] 613 | [else 614 | (case next-char 615 | [(#\<) (decorate '(less-than) 1)] 616 | [(#\>) (decorate '(greater-than) 1)] 617 | [(#\+) (decorate '(plus) 1)] 618 | [(#\-) (decorate '(minus) 1)] 619 | [(#\,) (decorate '(comma) 1)] 620 | [(#\.) (decorate '(period) 1)] 621 | [(#\[) 622 | ;; The slightly messy case is bracket. We keep reading 623 | ;; a list of exprs, and then construct a wrapping bracket 624 | ;; around the whole thing. 625 | (define elements (parse-exprs src in)) 626 | (define-values (l c tail-position) 627 | (port-next-location in)) 628 | (decorate `(brackets ,@elements) 629 | (- tail-position position))] 630 | [else 631 | (parse-expr src in)])])) 632 | 633 | ;; parse-exprs: input-port -> (listof syntax) 634 | ;; Parse a list of expressions. 635 | (define (parse-exprs source-name in) 636 | (define peeked-char (peek-char in)) 637 | (cond 638 | [(eof-object? peeked-char) 639 | (error 'parse-exprs "Expected ], but read eof")] 640 | [(char=? peeked-char #\]) 641 | (read-char in) 642 | empty] 643 | [(member peeked-char (list #\< #\> #\+ #\- #\, #\. #\[)) 644 | (cons (parse-expr source-name in) 645 | (parse-exprs source-name in))] 646 | [else 647 | (read-char in) 648 | (parse-exprs source-name in)])) 649 | }|} 650 | This parser isn't anything too tricky, although there's a little bit of 651 | messiness because it needs to handle brackets recursively. That part 652 | is supposed to be a little messy anyway, since it's the capstone that builds tree structure out 653 | of a linear character stream. (If we were using a parenthesized language, we 654 | could simply use @racket[read-syntax], but the whole point is to deal 655 | with the messiness of the surface syntax!) 656 | 657 | Let's see if this parser does anything useful: 658 | @interaction[#:eval my-evaluator 659 | (define my-sample-input-port (open-input-string ",[.,]")) 660 | (port-count-lines! my-sample-input-port) 661 | (define first-stx 662 | (parse-expr "my-sample-program.rkt" my-sample-input-port)) 663 | first-stx 664 | (define second-stx 665 | (parse-expr "my-sample-program.rkt" my-sample-input-port)) 666 | second-stx 667 | (parse-expr "my-sample-program.rkt" my-sample-input-port)] 668 | Good! So we're able to parse syntax objects out of an input stream. 669 | @interaction[#:eval my-evaluator 670 | (syntax->datum second-stx) 671 | (syntax-source second-stx) 672 | (syntax-position second-stx) 673 | (syntax-span second-stx)] 674 | And as we can see, we can explode the syntax object and look at its datum. We should note 675 | that the parser is generating syntax objects that use the same names as the defined names we 676 | have in our @filepath{language.rkt} module language. Yup, that's deliberate, and we'll see why in 677 | the next section. 678 | 679 | 680 | We mentioned that the parser wasn't too hard... but then again, we haven't written good traps 681 | for error conditions. This parser is a baby parser. 682 | If we were more rigorous, we'd probably implement it with the parser-tools collection, 683 | write unit tests for the parser with @racketmodname[rackunit], and 684 | make sure to produce good error messages when Bad Things happen 685 | (like having unbalanced brackets or parentheses. 686 | @;; Yes, the unbalanced parentheses here is a joke. I wonder if anyone will correct me for it. 687 | 688 | 689 | 690 | Still, we've now got the language and a parser. How do we tie them together? 691 | 692 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 693 | @section{Crossing the wires} 694 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 695 | 696 | This part is fairly straightforward. We have two pieces in hand: 697 | @itemlist[@item{A parser in @filepath{parser.rkt} for the surface syntax that produces ASTs} 698 | @item{A module language in @filepath{language.rkt} that provides the meaning for those ASTs.} 699 | ] 700 | To combine these two pieces together, we want to define a @link["http://docs.racket-lang.org/guide/hash-lang_reader.html"]{reader} that associates the two. 701 | When Racket encounters a @litchar{#lang} line of the form: 702 | @codeblock{ 703 | #lang planet dyoo/bf 704 | } 705 | it will look for a reader module in @filepath{lang/reader.rkt} and use it to parse the file. 706 | 707 | Racket provides a helper module called @racketmodname[syntax/module-reader] to handle most of the 708 | dirty work; let's use it. Make a @filepath{lang/} subdirectory, and create @filepath{reader.rkt} 709 | in that subdirectory, with the following content: 710 | @filebox["lang/reader.rkt"]{ 711 | @codeblock|{ 712 | #lang s-exp syntax/module-reader 713 | (planet dyoo/bf/language) 714 | #:read my-read 715 | #:read-syntax my-read-syntax 716 | 717 | (require "../parser.rkt") 718 | 719 | (define (my-read in) 720 | (syntax->datum (my-read-syntax #f in))) 721 | 722 | (define (my-read-syntax src in) 723 | (parse-expr src in)) 724 | }|} 725 | Some of this is magic, so let's step through this. The second line of the file tells @racketmodname[syntax/module-reader] that any syntax objects that 726 | come out are intended to take on their semantics from our language module @filepath{language.rkt}. @racket[syntax/module-reader] 727 | is predisposed to assume that programs are read using @racket[read] and @racket[read-syntax], so we 728 | override that default and plug in our @racket[parse-expr] function into place. 729 | 730 | 731 | Now that we have all these pieces together, does any of this work? Let's try it! 732 | @verbatim|{ 733 | $ cat hello2.rkt 734 | #lang planet dyoo/bf 735 | ++++++[>++++++++++++<-]>. 736 | >++++++++++[>++++++++++<-]>+. 737 | +++++++..+++.>++++[>+++++++++++<-]>. 738 | <+++[>----<-]>.<<<<<+++[>+++++<-]>. 739 | >>.+++.------.--------.>>+. 740 | 741 | $ racket hello2.rkt 742 | Hello, World! 743 | }| 744 | 745 | Sweet, sweet words. 746 | 747 | 748 | 749 | 750 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 751 | @section{Landing on PLaneT} 752 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 753 | 754 | Finally, we want to get this work onto @link["http://docs.racket-lang.org/planet/index.html"]{PLaneT} so that other people can share in the joy 755 | of writing @tt{brainf*ck} in Racket. Let's do it! 756 | 757 | 758 | First, let's go back to the parent of our work directory. Once we're there, we'll use the @tt{planet create} command. 759 | 760 | @verbatim|{ 761 | $ raco planet create bf 762 | planet create bf 763 | MzTarring ./... 764 | MzTarring ./lang... 765 | 766 | WARNING: 767 | Package has no info.rkt file. This means it will not have a description or documentation on the PLaneT web site. 768 | 769 | $ ls -l bf.plt 770 | -rw-rw-r-- 1 dyoo nogroup 3358 Jun 12 19:39 bf.plt 771 | }| 772 | 773 | There are a few warnings, because we haven't defined an @filepath{info.rkt} which provides metadata 774 | about our package. Good, diligent citizens would @link["http://docs.racket-lang.org/planet/Developing_Packages_for_PLaneT.html#(part._.Create_an__info_rkt__.File__.Optional_)"]{write an @filepath{info.rkt} file}, so let's write one. 775 | @filebox["info.rkt"]{ 776 | @codeblock|{ 777 | #lang setup/infotab 778 | (define name "bf: a brainf*ck compiler for Racket") 779 | (define categories '(devtools)) 780 | (define can-be-loaded-with 'all) 781 | (define required-core-version "5.1.1") 782 | (define version "1.0") 783 | (define repositories '("4.x")) 784 | (define scribblings '()) 785 | (define primary-file "language.rkt") 786 | (define blurb 787 | '("Provides support for the brainf*ck language.")) 788 | (define release-notes 789 | '((p "First release"))) 790 | }|} 791 | 792 | 793 | 794 | 795 | Before we upload the package, let's make sure the @filepath{bf.plt} package works for us locally. We'll simulate an installation. First, let's break the development link. 796 | @verbatim{ 797 | $ raco planet unlink dyoo bf.plt 1 0 798 | } 799 | If we try running our test program from before, it should fail on us. 800 | 801 | @verbatim{ 802 | $ racket hello2.rkt 803 | require: PLaneT could not find the requested package: Server had no matching package: No package matched the specified criteria 804 | } 805 | Ok, that was expected. Since we've dissolved the development link, and since we haven't uploaded the 806 | package onto the PLaneT network yet, we see the error that we expect to see. 807 | 808 | Next, let's use @tt{planet fileinject} to simulate an installation of our package from PLaneT. 809 | @verbatim|{ 810 | $ raco planet fileinject dyoo bf.plt 1 0 811 | planet fileinject dyoo bf.plt 1 0 812 | 813 | ============= Installing bf.plt on Sun, 12 Jun 2011 19:49:50 ============= 814 | raco setup: Unpacking archive from /home/dyoo/bf.plt 815 | ... 816 | }| 817 | Lots and lots of output later, the package should be installed. 818 | 819 | If we try running our test program again... 820 | @verbatim{ 821 | $ racket hello2.rkt 822 | Hello, World! 823 | } 824 | Good! This simulates the situation where the package has been installed from PLaneT. 825 | 826 | 827 | Once we're finally satisfied with the package's contents, we can finally upload it onto PLaneT. 828 | If you log onto @link["http://planet.racket-lang.org"]{planet.racket-lang.org}, 829 | the user interface will allow 830 | you to upload your @filepath{bf.plt} package. 831 | 832 | 833 | 834 | 835 | @section{Acknowledgements} 836 | 837 | Very special thanks to @link["http://www.cs.brown.edu/~sk/"]{Shriram 838 | Krishnamurthi} for being understanding when I told him I had coded a 839 | @tt{brainf*ck} compiler. Basically, everyone in the Racket community 840 | (like Mark Engelberg, Eric Hanchrow, Eli Barzilay, Matthew Flatt, 841 | Robby Findler, and others that I'm blanking out on...) have been 842 | wonderful. The 843 | @link["http://lists.racket-lang.org/users/archive/2011-June/046090.html"]{mailing 844 | list thread} shows how many people have helped to shape this tutorial. 845 | 846 | 847 | Guillaume Marceau, Rodolfo Carvalho, Eric Hanchrow, and Shriram helped 848 | with grammar and spelling checks. Casey Klein suggested a section in 849 | the tutorial that shows how we can generate errors that point to 850 | original sources. Eli Barzilay pushed on including an optimization 851 | section. 852 | 853 | Furthermore, thanks to those who commented from the 854 | @link["http://www.reddit.com/r/programming/comments/i1slm/amazing_tutorial_demonstrating_the_power_of/"]{/r/programming} 855 | Reddit thread: they helped isolate a performance issue regarding 856 | parameters and further motivated the following section on 857 | optimization. David Van Horn pointed out how to use 858 | @link["http://pypy.org"]{PyPy}'s JIT properly, with amazing results. 859 | Sam Tobin-Hochstadt and Jay McCarthy provided a few optimization 860 | suggestions, many of which are in the main @racketmodname[(planet 861 | dyoo/bf)] implementation. 862 | 863 | Finally, big shoutouts to the PLT group at 864 | Brown University --- this one is for you guys. :) 865 | @;; Ha! Closing parentheses. 866 | 867 | 868 | 869 | @section{Epilo... Optimization and polishing!} 870 | 871 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 872 | @; Warning Will Robinson, Warning! 873 | @;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 874 | 875 | @;; Just as in Puella Magi Madoka Magica, where things change in 876 | @;; Episode 10, here we too go back to the past. 877 | @;; We revise and revise in Chapter 10 with the hopes of making 878 | @;; things better. 879 | @;; Hopefully this won't be a disaster. 880 | 881 | 882 | So we upload and release the package on PLaneT, and send our 883 | marketeers out to spread the Word. We kick back, lazily twiddle our 884 | thumbs, and await the adoration of the global @tt{brainf*ck} 885 | community. 886 | 887 | To our dismay, someone brings up the fact that our 888 | implementation is 889 | @link["http://www.reddit.com/r/programming/comments/i1slm/amazing_tutorial_demonstrating_the_power_of/c20e7ka"]{slower} 890 | than an @link["https://bitbucket.org/brownan/pypy-tutorial/src/tip/example1.py"]{interpreter} written in another language. What?! 891 | 892 | But the Internet is absolutely correct. Let's run the numbers. 893 | We can grab another @tt{brainf*ck} implementation and try it on a 894 | benchmarking program, like the one that 895 | @link["https://github.com/dyoo/brainfudge/blob/master/examples/prime.rkt"]{generates 896 | prime numbers}. Let's see what the competition looks like: 897 | 898 | @verbatim|{ 899 | $ echo 100 | time ~/local/pypy/bin/pypy example1.py prime.b 900 | Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 901 | 16.72user 0.24system 0:17.18elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k 902 | 0inputs+0outputs (0major+3554minor)pagefaults 0swaps 903 | }| 904 | 905 | Ok, about sixteen seconds. Not bad. We're not even using their JIT, and 906 | they're still producing reasonable results. 907 | 908 | Now let's look at our own performance. We surely can't do worse, right? 909 | 910 | @verbatim|{ 911 | $ raco make prime.rkt && (echo 100 | time racket prime.rkt) 912 | Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 913 | 37.36user 0.65system 0:38.15elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 914 | 0inputs+0outputs (0major+10259minor)pagefaults 0swaps 915 | }| 916 | 917 | Thirty-seven seconds. Wow. Ouch. 918 | 919 | 920 | Outrageous! Aren't interpreters supposed to be slower than 921 | compilers? Isn't Racket a 922 | @link["http://docs.racket-lang.org/guide/performance.html"]{JIT-compiled 923 | language}? What the heck happened? 924 | 925 | 926 | We tried to follow the creed that says @emph{Get it right, then get it 927 | fast}... except that we didn't. We forgot the second part about getting it fast. 928 | Just because something is compiled and driven by a JIT doesn't guarantee 929 | that the generated code's going to perform particularly well, and the benchmark above 930 | shows that something strange is happening. 931 | 932 | 933 | So let's try our hand at optimization! We may not get the raw 934 | performance of an impressive project like @link["http://pypy.org/"]{PyPy}, but we 935 | still should be able to perform reasonably well. Furthermore, we will 936 | include some error handling that uses the source locations we constructed in our 937 | parser, in order to precisely point out runtime errors in the original source. 938 | 939 | 940 | As a warning, if you ran through the previous sections, you may want 941 | to take a small break before continuing forward. This optimization 942 | section is included near the end because the changes we'll be making 943 | require some deeper digging into Racket's language infrastructure, 944 | expecially with 945 | @link["http://docs.racket-lang.org/guide/macros.html"]{macros}. Take 946 | a relaxing walk, and then come back to this when you're ready. 947 | 948 | 949 | 950 | 951 | @subsection{Staring into the hot-spot} 952 | 953 | If we look a little closely into our implementation, we might notice 954 | something funny. Well, we might notice many things that look funny in 955 | our @tt{brainf*ck} implementation, but there's a particular one we'll 956 | focus on: each of the forms in @filepath{language.rkt} refer to the 957 | @racket[current-state] parameter. We use that parameter to make sure 958 | the other forms in the language use the same @racket[current-state] 959 | value. And of course we want this kind of localized behavior, to 960 | prevent the kind of interference that might happen if two 961 | @tt{brainf*ck} programs run. 962 | 963 | ... But every use of the parameter appears to be a function call. 964 | Just how bad is that? Let's see. We can fire up our trusty DrRacket 965 | and try the following program in our Interactions window: 966 | 967 | @interaction[#:eval my-evaluator 968 | (require rackunit) 969 | (define my-parameter (make-parameter (box 0))) 970 | (time 971 | (parameterize ([my-parameter (box 0)]) 972 | (for ([x (in-range 10000000)]) 973 | (set-box! (my-parameter) 974 | (add1 (unbox (my-parameter))))) 975 | (check-equal? (unbox (my-parameter)) 10000000)))] 976 | 977 | Hmmmm... Ok, what if we didn't have the parameter, and just accessed 978 | the variable more directly? 979 | 980 | @interaction[#:eval my-evaluator 981 | (require rackunit) 982 | (time 983 | (let ([my-parameter (box 0)]) 984 | (for ([x (in-range 10000000)]) 985 | (set-box! my-parameter 986 | (add1 (unbox my-parameter)))) 987 | (check-equal? (unbox my-parameter) 10000000)))] 988 | 989 | In the immortal words of 990 | @link["http://en.wikipedia.org/wiki/Neo_(The_Matrix)"]{Neo}: 991 | @emph{Whoa}. Ok, we've got ourselves a target! 992 | 993 | 994 | Let's take a look again at the definition of our 995 | @racket[my-module-begin] in @filepath{language.rkt}. 996 | 997 | @codeblock|{ 998 | (define current-state (make-parameter (new-state))) 999 | 1000 | (define-syntax-rule (my-module-begin body ...) 1001 | (#%plain-module-begin 1002 | (parameterize ([current-state (new-state)]) 1003 | body ...)))}| 1004 | 1005 | Let's replace the use of the @racket[parameterize] here with a simpler 1006 | @racket[let]. Now we've got something like this: 1007 | 1008 | @codeblock|{ 1009 | (define-syntax-rule (my-module-begin body ...) 1010 | (#%plain-module-begin 1011 | (let ([my-fresh-state (new-state)]) 1012 | body ...)))}| 1013 | 1014 | But now we have a small problem: we want the rest of the inner 1015 | @racket[body] forms to syntactically recognize and re-route any use of 1016 | @racket[current-state] with this @racket[my-fresh-state] binding. But 1017 | we certainly can't just rewrite the whole @filepath{language.rkt} and 1018 | replace uses of @racket[current-state] with @racket[my-fresh-state], 1019 | because @racket[my-fresh-state] isn't a global variable! What do we 1020 | do? 1021 | 1022 | There's a tool in the Racket library that allows us to solve this 1023 | problem: it's called a 1024 | @link["http://docs.racket-lang.org/reference/stxparam.html"]{syntax 1025 | parameter}. A syntax parameter is similar to the reviled parameter 1026 | that we talked about earlier, except that it works 1027 | @emph{syntactically} rather than @emph{dynamically}. A common use of 1028 | a syntax parameter is to let us wrap a certain area in our code, and 1029 | say: ``Anywhere this identifier shows up, rename it to use this 1030 | variable instead.'' 1031 | 1032 | Let's see a demonstration of these in action, because all this talk 1033 | is a little abstract. What do these syntax parameters really 1034 | do for us? Let's play with them again a little. 1035 | 1036 | 1037 | @interaction[#:eval my-evaluator 1038 | (require racket/stxparam) 1039 | 1040 | (define-syntax-parameter name 1041 | (lambda (stx) 1042 | #'"Madoka")) 1043 | 1044 | name 1045 | 1046 | (define-syntax-rule (say-your-name) 1047 | (printf "Your name is ~a\n" name)) 1048 | 1049 | (define (outside-the-barrier) 1050 | (printf "outside-the-barrier says: ") 1051 | (say-your-name)) 1052 | 1053 | 1054 | (say-your-name) 1055 | 1056 | (let ([the-hero "Homerun"]) 1057 | (syntax-parameterize 1058 | ([name (make-rename-transformer #'the-hero)]) 1059 | (say-your-name) 1060 | (outside-the-barrier))) 1061 | ] 1062 | 1063 | It helps to keep in mind that, in Racket, macros are functions that 1064 | work during compile-time. They take an input syntax, and produce an 1065 | output syntax. Here, we define @racket[name] to be a macro that 1066 | expands to @racket[#'"Madoka"] by default. When we use @racket[name] 1067 | directly, and when we use it in @racket[(say-your-name)] for the first 1068 | time, we're seeing this default in place. 1069 | 1070 | However, we make things more interesting (and a little more 1071 | confusing!) in the second use of @racket[say-your-name]: we use 1072 | @racket[let] to create a variable binding, and then use 1073 | @racket[syntax-parameterize] to reroute every use of @racket[name], 1074 | syntactically, with a use of @racket[the-hero]. Within the boundary 1075 | defined at the @racket[syntax-parameterize]'s body, @racket[name] is 1076 | magically transformed! That's why we can see @racket["Homerun"] in 1077 | the second use of @racket[(say-your-name)]. 1078 | 1079 | 1080 | Yet, where we use it from @racket[outside-the-barrier], @racket[name] 1081 | still takes on the default. Why? 1082 | 1083 | Let's go through the macro expanding process by hand, and wherever we 1084 | see @racket[(say-your-name)], let's replace with the @racket[(printf 1085 | ...)]. So when we say: 1086 | @racketblock[ 1087 | (define (outside-the-barrier) 1088 | (printf "outside-the-barrier says: ") 1089 | (say-your-name)) 1090 | ] 1091 | we really mean: 1092 | @racketblock[ 1093 | (define (outside-the-barrier) 1094 | (printf "outside-the-barrier says: ") 1095 | (printf "Your name is ~a\n" name)) 1096 | ] 1097 | 1098 | The use of @racket[name] here is lexically outside the barrier set up 1099 | by @racket[syntax-parameterize]. 1100 | 1101 | And now let's look at the second expression, the one with the @racket[let]. We take: 1102 | @racketblock[ 1103 | (let ([the-hero "Homerun"]) 1104 | (syntax-parameterize 1105 | ([name (make-rename-transformer #'the-hero)]) 1106 | (say-your-name) 1107 | (outside-the-barrier))) 1108 | ] 1109 | and after expanding it partially by hand, we get: 1110 | @racketblock[ 1111 | (let ([the-hero "Homerun"]) 1112 | (syntax-parameterize 1113 | ([name (make-rename-transformer #'the-hero)]) 1114 | (printf "Your name is ~a\n" name) 1115 | (outside-the-barrier))) 1116 | ] 1117 | 1118 | Ah! So the use of @racket[name] that's introduced by 1119 | @racket[say-your-name] is within the lexical boundaries of the 1120 | @racket[syntax-parameterize] form. But @racket[outside-the-barrier] 1121 | is a plain, vanilla function, and because it's not a macro, it doesn't 1122 | inline itself into the @racket[syntax-parameterize]'s body. We can 1123 | compare this with the more dynamic behavior of @racket[parameterize], 1124 | and see that this difference is what makes 1125 | @racket[syntax-parameterize] different from @racket[parameterize]. 1126 | Well, we could tell that they're different just from the names... but 1127 | the behavior we're seeing here makes it more clear just what that 1128 | difference is. 1129 | 1130 | 1131 | 1132 | Whew! Frankly, all of this is a little magical. But the hilarious 1133 | thing, despite all this verbiage about syntax parameters, is that the 1134 | implementation of the language looks almost exactly the same as 1135 | before. Here's a version of the language that uses these syntax 1136 | parameters; let's save it into @filepath{language.rkt} and replace the 1137 | previous contents. 1138 | 1139 | @filebox["language.rkt"]{ 1140 | @codeblock|{ 1141 | #lang racket 1142 | 1143 | (require "semantics.rkt" 1144 | racket/stxparam) 1145 | 1146 | (provide greater-than 1147 | less-than 1148 | plus 1149 | minus 1150 | period 1151 | comma 1152 | brackets 1153 | (rename-out [my-module-begin #%module-begin])) 1154 | 1155 | ;; The current-state is a syntax parameter used by the 1156 | ;; rest of this language. 1157 | (define-syntax-parameter current-state #f) 1158 | 1159 | ;; Every module in this language will make sure that it 1160 | ;; uses a fresh state. 1161 | (define-syntax-rule (my-module-begin body ...) 1162 | (#%plain-module-begin 1163 | (let ([fresh-state (new-state)]) 1164 | (syntax-parameterize 1165 | ([current-state 1166 | (make-rename-transformer #'fresh-state)]) 1167 | body ...)))) 1168 | 1169 | (define-syntax-rule (greater-than) 1170 | (increment-ptr current-state)) 1171 | 1172 | (define-syntax-rule (less-than) 1173 | (decrement-ptr current-state)) 1174 | 1175 | (define-syntax-rule (plus) 1176 | (increment-byte current-state)) 1177 | 1178 | (define-syntax-rule (minus) 1179 | (decrement-byte current-state)) 1180 | 1181 | (define-syntax-rule (period) 1182 | (write-byte-to-stdout current-state)) 1183 | 1184 | (define-syntax-rule (comma) 1185 | (read-byte-from-stdin current-state)) 1186 | 1187 | (define-syntax-rule (brackets body ...) 1188 | (loop current-state body ...)) 1189 | }|} 1190 | 1191 | 1192 | What effect does this change alone make to our performance on 1193 | @tt{brainf*ck} prime generation? Let's cross our fingers! 1194 | 1195 | @verbatim|{ 1196 | $ raco make prime.rkt && (echo 100 | time racket prime.rkt) 1197 | Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 1198 | 6.38user 0.09system 0:06.63elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 1199 | 0inputs+0outputs (0major+10121minor)pagefaults 0swaps 1200 | }| 1201 | 1202 | 1203 | Now that's more like it! Down from thirty-seven seconds to about six 1204 | and a half. Nice. When we compare this versus the previous 1205 | implementation of the language, we might laugh ruefully: we just got 1206 | rid of a few more parentheses and typed in a few symbols. But of 1207 | course, that's not what we truly did. What in the world just 1208 | happened? 1209 | 1210 | 1211 | Let's summarize what we did: earlier, we had used 1212 | @racket[parameterize] to maintain some shared local state within the 1213 | dynamic extent of our module's body. However, on reflection, we see 1214 | that we don't need the full power of dynamic scope: a simpler (and 1215 | cheaper!) lexical scoping mechanism is sufficient here. We now use 1216 | @racket[syntax-parameterize] as our mechanism for sharing that state 1217 | with the rest of the language. And if we ever see 1218 | @racket[parameterize] in a tight inner loop again, we shudder 1219 | instinctively. 1220 | 1221 | 1222 | But now ambition rears its head and whispers to us: can we make the 1223 | code go faster? At some point, we'll hit diminishing returns, but let's see 1224 | what other obvious things we can do, and observe what happens to the 1225 | benchmark results as we optimize. 1226 | 1227 | 1228 | 1229 | @subsection{Macros, macros everywhere} 1230 | 1231 | One trivial thing we can do is revisit our @filepath{semantics.rkt} 1232 | file, and transform all of the exported function definitions into 1233 | macros. This allows Racket's compiler to inline the definitions for 1234 | each use. That is, right now, Racket processes and expands our 1235 | @tt{brainf*ck} programs up to the function definitions in the 1236 | @filepath{semantics.rkt}, but does no intra-module optimizations. If 1237 | we modify those functions into macros, maybe that will help 1238 | performance. 1239 | 1240 | Basically, we go in and replace each 1241 | @racket[define] with a @racket[define-syntax-rule]. 1242 | Here's what @filepath{semantics.rkt} looks like after this change: 1243 | @filebox["semantics.rkt"]{ 1244 | @codeblock|{ 1245 | #lang racket 1246 | 1247 | (require rackunit) ;; for unit testing 1248 | (provide (all-defined-out)) 1249 | 1250 | ;; Our state contains two pieces. 1251 | (define-struct state (data ptr) 1252 | #:mutable) 1253 | 1254 | ;; Creates a new state, with a byte array of 30000 zeros, and 1255 | ;; the pointer at index 0. 1256 | (define-syntax-rule (new-state) 1257 | (make-state (make-vector 30000 0) 1258 | 0)) 1259 | 1260 | ;; increment the data pointer 1261 | (define-syntax-rule (increment-ptr a-state) 1262 | (set-state-ptr! a-state (add1 (state-ptr a-state)))) 1263 | 1264 | ;; decrement the data pointer 1265 | (define-syntax-rule (decrement-ptr a-state) 1266 | (set-state-ptr! a-state (sub1 (state-ptr a-state)))) 1267 | 1268 | ;; increment the byte at the data pointer 1269 | (define-syntax-rule (increment-byte a-state) 1270 | (let ([v (state-data a-state)] 1271 | [i (state-ptr a-state)]) 1272 | (vector-set! v i (add1 (vector-ref v i))))) 1273 | 1274 | ;; decrement the byte at the data pointer 1275 | (define-syntax-rule (decrement-byte a-state) 1276 | (let ([v (state-data a-state)] 1277 | [i (state-ptr a-state)]) 1278 | (vector-set! v i (sub1 (vector-ref v i))))) 1279 | 1280 | ;; print the byte at the data pointer 1281 | (define-syntax-rule (write-byte-to-stdout a-state) 1282 | (let ([v (state-data a-state)] 1283 | [i (state-ptr a-state)]) 1284 | (write-byte (vector-ref v i) (current-output-port)))) 1285 | 1286 | ;; read a byte from stdin into the data pointer 1287 | (define-syntax-rule (read-byte-from-stdin a-state) 1288 | (let ([v (state-data a-state)] 1289 | [i (state-ptr a-state)]) 1290 | (vector-set! v i (read-byte (current-input-port))))) 1291 | 1292 | ;; we know how to do loops! 1293 | (define-syntax-rule (loop a-state body ...) 1294 | (let loop () 1295 | (unless (= (vector-ref (state-data a-state) 1296 | (state-ptr a-state)) 1297 | 0) 1298 | body ... 1299 | (loop)))) 1300 | }|} 1301 | 1302 | What effect does this have on our benchmark? 1303 | @verbatim|{ 1304 | $ raco make prime.rkt && (echo 100 | time racket prime.rkt) 1305 | Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 1306 | 3.78user 0.10system 0:03.96elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 1307 | 0inputs+0outputs (0major+10101minor)pagefaults 0swaps 1308 | }| 1309 | 1310 | Ok, inlining each of the definitions of the semantics gives us a 1311 | little more performance, at the cost of some code expansion. But not a large one. 1312 | 1313 | 1314 | 1315 | @subsection{Structures? Where we're going, we won't need structures...} 1316 | 1317 | While we have our eye on @filepath{semantics.rkt}, we might wonder: 1318 | how much is it costing us to access the @racket[data] and @racket[ptr] 1319 | fields of our state? The use of the structure introduces an indirect 1320 | memory access. Maybe we can eliminate it, by saying that the 1321 | @emph{state} of our language consists of two pieces, rather than one 1322 | aggregate piece. So one proposal we can consider is to remove the 1323 | structure, and have each of the rules in our semantics deal with both 1324 | pieces of the state. 1325 | 1326 | The editing for this will be somewhat non-local: we'll need to touch 1327 | both @filepath{semantics.rkt} and @filepath{language.rkt} because each 1328 | form in the semantics will take in two pieces, and each language 1329 | construct in the language must provide those two pieces. Let's see 1330 | what this looks like for both files. 1331 | 1332 | 1333 | @filebox["semantics.rkt"]{ 1334 | @codeblock|{ 1335 | #lang racket 1336 | 1337 | (provide (all-defined-out)) 1338 | 1339 | ;; Provides two values: a byte array of 30000 zeros, and 1340 | ;; the pointer at index 0. 1341 | (define-syntax-rule (new-state) 1342 | (values (make-vector 30000 0) 1343 | 0)) 1344 | 1345 | ;; increment the data pointer 1346 | (define-syntax-rule (increment-ptr data ptr) 1347 | (set! ptr (add1 ptr))) 1348 | 1349 | ;; decrement the data pointer 1350 | (define-syntax-rule (decrement-ptr data ptr) 1351 | (set! ptr (sub1 ptr))) 1352 | 1353 | ;; increment the byte at the data pointer 1354 | (define-syntax-rule (increment-byte data ptr) 1355 | (vector-set! data ptr (add1 (vector-ref data ptr)))) 1356 | 1357 | ;; decrement the byte at the data pointer 1358 | (define-syntax-rule (decrement-byte data ptr) 1359 | (vector-set! data ptr (sub1 (vector-ref data ptr)))) 1360 | 1361 | ;; print the byte at the data pointer 1362 | (define-syntax-rule (write-byte-to-stdout data ptr) 1363 | (write-byte (vector-ref data ptr) (current-output-port))) 1364 | 1365 | ;; read a byte from stdin into the data pointer 1366 | (define-syntax-rule (read-byte-from-stdin data ptr) 1367 | (vector-set! data ptr (read-byte (current-input-port)))) 1368 | 1369 | ;; we know how to do loops! 1370 | (define-syntax-rule (loop data ptr body ...) 1371 | (let loop () 1372 | (unless (= (vector-ref data ptr) 1373 | 0) 1374 | body ... 1375 | (loop)))) 1376 | }|} 1377 | 1378 | 1379 | 1380 | @filebox["language.rkt"]{ 1381 | @codeblock|{ 1382 | #lang racket 1383 | 1384 | (require "semantics.rkt" 1385 | racket/stxparam) 1386 | 1387 | (provide greater-than 1388 | less-than 1389 | plus 1390 | minus 1391 | period 1392 | comma 1393 | brackets 1394 | (rename-out [my-module-begin #%module-begin])) 1395 | 1396 | ;; The current-data and current-ptr are syntax parameters used by the 1397 | ;; rest of this language. 1398 | (define-syntax-parameter current-data #f) 1399 | (define-syntax-parameter current-ptr #f) 1400 | 1401 | ;; Every module in this language will make sure that it 1402 | ;; uses a fresh state. 1403 | (define-syntax-rule (my-module-begin body ...) 1404 | (#%plain-module-begin 1405 | (let-values ([(fresh-data fresh-ptr) (new-state)]) 1406 | (syntax-parameterize 1407 | ([current-data 1408 | (make-rename-transformer #'fresh-data)] 1409 | [current-ptr 1410 | (make-rename-transformer #'fresh-ptr)]) 1411 | body ...)))) 1412 | 1413 | (define-syntax-rule (greater-than) 1414 | (increment-ptr current-data current-ptr)) 1415 | 1416 | (define-syntax-rule (less-than) 1417 | (decrement-ptr current-data current-ptr)) 1418 | 1419 | (define-syntax-rule (plus) 1420 | (increment-byte current-data current-ptr)) 1421 | 1422 | (define-syntax-rule (minus) 1423 | (decrement-byte current-data current-ptr)) 1424 | 1425 | (define-syntax-rule (period) 1426 | (write-byte-to-stdout current-data current-ptr)) 1427 | 1428 | (define-syntax-rule (comma) 1429 | (read-byte-from-stdin current-data current-ptr)) 1430 | 1431 | (define-syntax-rule (brackets body ...) 1432 | (loop current-data current-ptr body ...)) 1433 | }|} 1434 | 1435 | 1436 | Ok, so this change is pretty mechanical. However, it does have a 1437 | consequence: it means that our use of the semantics is a bit more 1438 | restricted. We give each form (@racket[increment-ptr], 1439 | @racket[decrement-ptr], ...) an identifier for the @racket[ptr], 1440 | because some of the rules will @racket[set!] the value of the 1441 | @racket[ptr] identifier. That requires us to first bind 1442 | the state variables, 1443 | @racketblock[(let-values ([(data ptr) (new-state)]) 1444 | ...)] 1445 | and then use @racket[data] and @racket[ptr] bindings 1446 | consistently with the semantics forms. In a sense, the semantics now treat 1447 | its arguments as reference variables. 1448 | 1449 | 1450 | In any case, what does our benchmark tell us about this optimization? 1451 | 1452 | @verbatim|{ 1453 | $ raco make prime.rkt && (echo 100 | time racket prime.rkt) 1454 | Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 1455 | 1.13user 0.09system 0:01.30elapsed 94%CPU (0avgtext+0avgdata 0maxresident)k 1456 | 0inputs+0outputs (0major+10095minor)pagefaults 0swaps 1457 | }| 1458 | 1459 | 1460 | That seems like a worthwhile optimization. Ok, we're down to about a 1461 | second plus a little more. 1462 | 1463 | 1464 | 1465 | @subsection{Strapping on the safety goggles} 1466 | 1467 | Let's pause for a moment. We should ask ourselves: is our language 1468 | actually doing The Right Thing? We might consider the following 1469 | situations: 1470 | 1471 | @itemlist[ 1472 | 1473 | @item{The program may try to read a byte from the standard input port, 1474 | and encounter @racket[eof] instead.} 1475 | 1476 | @item{A program may try to increment the value at the pointer beyond 1477 | the boundaries of a byte.} 1478 | 1479 | @item{The machine might be instructed to shift the pointer @bold{*clunk*} off 1480 | the data array.} 1481 | ] 1482 | 1483 | What happens in our current implementation when these situations arise? 1484 | 1485 | Oh dear. We should have looked at this earlier! How shameful! None 1486 | of these are directly addressed by our current implementation. We'd 1487 | better correct these flaws before continuing forward, before anyone 1488 | else notices. And even if this costs us a few milliseconds in 1489 | performance, it's certainly worth knowing exactly what should happen 1490 | in these situations. 1491 | 1492 | 1493 | 1494 | @subsubsection{@racket[eof]} 1495 | 1496 | According to the 1497 | @link["http://www.muppetlabs.com/~breadbox/bf/standards.html"]{Portable 1498 | Brainf*ck} guide, 1499 | 1500 | @nested[#:style 'inset]{ 1501 | If a program attempts to input a value when there is no more data in 1502 | the input stream, the value in the current cell after such an 1503 | operation is implementation-defined. (The most common choices are to 1504 | either store 0, or store -1, or to leave the cell's value 1505 | unchanged. This is frequently the most problematic issue for the 1506 | programmer wishing to achieve portability.)} 1507 | 1508 | 1509 | Let's choose to treat the reading of @racket[eof] as a zero. We can 1510 | change the definition of @racket[read-byte-from-stdin] in 1511 | @filepath{semantics.rkt} to do this. 1512 | 1513 | @codeblock{ 1514 | ;; read a byte from stdin into the data pointer 1515 | (define-syntax-rule (read-byte-from-stdin data ptr) 1516 | (vector-set! data ptr 1517 | (let ([a-value (read-byte (current-input-port))]) 1518 | (if (eof-object? a-value) 1519 | 0 1520 | a-value)))) 1521 | } 1522 | 1523 | 1524 | @subsubsection{@racket[out-of-range byte mutation]} 1525 | 1526 | Next, let's look at what the portability guide says about what happens 1527 | when we increment or decrement a byte past certain limits. 1528 | 1529 | @nested[#:style 'inset]{ 1530 | The range of values a single cell can contain is 1531 | implementation-defined. (The range need not be consistent, either: 1532 | consider the case of a "bignum" implementation, whose cells' ranges 1533 | would be limited only by currently available resources.) However, the 1534 | range of every cell shall always at least include the values 0 through 1535 | 127, inclusive.) 1536 | 1537 | If a program attempts to either decrement the value of a cell below 1538 | its documented minimum value, if any, or increment the value of a cell 1539 | beyond its documented maximum value, if any, then the value in the 1540 | cell after such an operation is implementation-defined. (Most 1541 | implementations choose to let the value wrap around in a fashion 1542 | typical to C integers, but this is not required.)} 1543 | 1544 | 1545 | So it looks like we have a little leeway here. We've implicitly been 1546 | using an vector of bytes, since we've been using @racket[read-byte] 1547 | and @racket[write-byte] on the values of the @racket[data] vector. 1548 | Since 1549 | @link["http://docs.racket-lang.org/guide/bytestrings.html#(tech._byte)"]{bytes} 1550 | range between @racket[0] and @racket[255], let's keep our cells in 1551 | that range too. One simple tool we can use is @racket[modulo], which 1552 | allows us to keep the values in that range. Let's use it. 1553 | 1554 | @codeblock|{ 1555 | ;; increment the byte at the data pointer 1556 | (define-syntax-rule (increment-byte data ptr) 1557 | (vector-set! data ptr (modulo (add1 (vector-ref data ptr)) 256))) 1558 | 1559 | ;; decrement the byte at the data pointer 1560 | (define-syntax-rule (decrement-byte data ptr) 1561 | (vector-set! data ptr (modulo (sub1 (vector-ref data ptr)) 256))) 1562 | }| 1563 | 1564 | 1565 | 1566 | @subsubsection{@racket[out-of-bounds pointer movement]} 1567 | 1568 | What does the portability guide say about moving the tape out-of-bounds? 1569 | 1570 | @nested[#:style 'inset]{ 1571 | If a program attempts to move the pointer below the first array cell, 1572 | or beyond the last array cell, then that program's behavior is 1573 | undefined. (A few implementations cause the pointer to wrap around, 1574 | but many, perhaps most, implementations behave in a manner consistent 1575 | with a C pointer wandering off into arbitrary memory.)} 1576 | 1577 | Wait. Stop right there. It is absolutely unacceptable for us to 1578 | just have the pointer wander out-of-bounds like that. 1579 | Even we @tt{brainf*ck} programmers must have our standards. 1580 | Instead, let's make it a guaranteed runtime error that halts 1581 | evaluation. Moreover, let's make sure the error message points 1582 | directly at the offending instruction in the source text. 1583 | 1584 | 1585 | How do we get our errors to highlight in DrRacket? Racket, like many 1586 | languages, provides exceptions as structured values. In particular, 1587 | DrRacket will cooperate when it sees an exception that provides source 1588 | location. 1589 | 1590 | 1591 | Let's look at a short and quick example of error highlighting in 1592 | action. Open up DrRacket and run the following program: 1593 | @codeblock|{ 1594 | #lang racket 1595 | 1596 | ;; We create a structure that supports the 1597 | ;; prop:exn:srcloc protocol. It carries 1598 | ;; with it the location of the syntax that 1599 | ;; is guilty. 1600 | (define-struct (exn:fail:he-who-shall-not-be-named 1601 | exn:fail) 1602 | (a-srcloc) 1603 | #:property prop:exn:srclocs 1604 | (lambda (a-struct) 1605 | (match a-struct 1606 | [(struct exn:fail:he-who-shall-not-be-named 1607 | (msg marks a-srcloc)) 1608 | (list a-srcloc)]))) 1609 | 1610 | ;; We can play with this by creating a form that 1611 | ;; looks at identifiers, and only flags specific ones. 1612 | (define-syntax (skeeterize stx) 1613 | (syntax-case stx () 1614 | [(_ expr) 1615 | (cond 1616 | [(and (identifier? #'expr) 1617 | (eq? (syntax-e #'expr) 'voldemort)) 1618 | (quasisyntax/loc stx 1619 | (raise (make-exn:fail:he-who-shall-not-be-named 1620 | "oh dear don't say his name" 1621 | (current-continuation-marks) 1622 | (srcloc '#,(syntax-source #'expr) 1623 | '#,(syntax-line #'expr) 1624 | '#,(syntax-column #'expr) 1625 | '#,(syntax-position #'expr) 1626 | '#,(syntax-span #'expr)))))] 1627 | [else 1628 | ;; Otherwise, leave the expression alone. 1629 | #'expr])])) 1630 | 1631 | (define (f x) 1632 | (* (skeeterize x) x)) 1633 | 1634 | (define (g voldemort) 1635 | (* (skeeterize voldemort) voldemort)) 1636 | 1637 | 1638 | ;; Examples: 1639 | (f 7) 1640 | (g 7) ;; The error should highlight the use 1641 | ;; of the one-who-shall-not-be-named 1642 | ;; in g. 1643 | }| 1644 | 1645 | When we create a @racket[make-exn:fail:he-who-shall-not-be-named], we 1646 | provide it a @racket[srcloc] from the originating syntax objects. 1647 | Furthermore, we tell the Racket runtime that this structure is a good 1648 | source for source locations, by annotating the structure's definition 1649 | with @racket[prop:exn:srclocs]. This allows the runtime system to 1650 | cooperate with the DrRacket editor, so that when a 1651 | @racket[make-exn:fail:he-who-shall-not-be-named] does get raised at 1652 | runtime, the editor can nicely highlight the offending party. 1653 | 1654 | 1655 | When we were looking at parsing, we were careful enough to produce 1656 | syntax objects with source locations. It would be a shame to waste 1657 | that effort. Here's what we'll do: we'll adjust the semantics of 1658 | @racket[increment-ptr] and @racket[decrement-ptr] to take in one more 1659 | argument: a representation of the source location. If we see that the 1660 | pointer's going to fall off, we can then raise an exception that's 1661 | annotated with @racket[srcloc] information. That should give the 1662 | DrRacket environment the information it needs to highlight 1663 | tape-movement errors at runtime. 1664 | 1665 | We'll need to change the definition of @racket[greater-than] and 1666 | @racket[less-than] in @filepath{language.rkt} to pass along the source 1667 | locations to the semantics forms, and we need to change the semantics 1668 | to use that location information whenever bad things happen. Here's 1669 | what @racket[greater-than] will look like: 1670 | @codeblock|{ 1671 | (define-syntax (greater-than stx) 1672 | (syntax-case stx () 1673 | [(_) 1674 | (quasisyntax/loc stx 1675 | (increment-ptr current-data current-ptr 1676 | (srcloc '#,(syntax-source stx) 1677 | '#,(syntax-line stx) 1678 | '#,(syntax-column stx) 1679 | '#,(syntax-position stx) 1680 | '#,(syntax-span stx))))]))}| 1681 | 1682 | One small complication is that we need the ability to talk about the 1683 | source location of the syntax object being fed to the 1684 | @racket[greater-than] macro, so we switched from using 1685 | @racket[define-syntax-rule] to the more low-level @racket[syntax-case] 1686 | macro definer. 1687 | 1688 | Let's look at the corresponding changes we need to make to 1689 | @racket[increment-ptr]; assuming we have a definition for an 1690 | @racket[exn:fail:out-of-bounds] exception, the code for 1691 | @racket[increment-ptr] will look like this. 1692 | 1693 | @codeblock|{ 1694 | (define-syntax-rule (increment-ptr data ptr loc) 1695 | (begin 1696 | (set! ptr (add1 ptr)) 1697 | (when (>= ptr (vector-length data)) 1698 | (raise (make-exn:fail:out-of-bounds 1699 | "out of bounds" 1700 | (current-continuation-marks) 1701 | loc)))))}| 1702 | 1703 | 1704 | 1705 | 1706 | 1707 | Our @filepath{semantics.rkt} and @filepath{language.rkt} now look like 1708 | the following: 1709 | 1710 | @filebox["semantics.rkt"]{ 1711 | @codeblock|{ 1712 | #lang racket 1713 | 1714 | (provide (all-defined-out)) 1715 | 1716 | ;; We use a customized error structure that supports 1717 | ;; source location reporting. 1718 | (define-struct (exn:fail:out-of-bounds exn:fail) 1719 | (srcloc) 1720 | #:property prop:exn:srclocs 1721 | (lambda (a-struct) 1722 | (list (exn:fail:out-of-bounds-srcloc a-struct)))) 1723 | 1724 | ;; Provides two values: a byte array of 30000 zeros, and 1725 | ;; the pointer at index 0. 1726 | (define-syntax-rule (new-state) 1727 | (values (make-vector 30000 0) 1728 | 0)) 1729 | 1730 | ;; increment the data pointer 1731 | (define-syntax-rule (increment-ptr data ptr loc) 1732 | (begin 1733 | (set! ptr (add1 ptr)) 1734 | (when (>= ptr (vector-length data)) 1735 | (raise (make-exn:fail:out-of-bounds 1736 | "out of bounds" 1737 | (current-continuation-marks) 1738 | loc))))) 1739 | 1740 | ;; decrement the data pointer 1741 | (define-syntax-rule (decrement-ptr data ptr loc) 1742 | (begin 1743 | (set! ptr (sub1 ptr)) 1744 | (when (< ptr 0) 1745 | (raise (make-exn:fail:out-of-bounds 1746 | "out of bounds" 1747 | (current-continuation-marks) 1748 | loc))))) 1749 | 1750 | ;; increment the byte at the data pointer 1751 | (define-syntax-rule (increment-byte data ptr) 1752 | (vector-set! data ptr 1753 | (modulo (add1 (vector-ref data ptr)) 256))) 1754 | 1755 | ;; decrement the byte at the data pointer 1756 | (define-syntax-rule (decrement-byte data ptr) 1757 | (vector-set! data ptr 1758 | (modulo (sub1 (vector-ref data ptr)) 256))) 1759 | 1760 | ;; print the byte at the data pointer 1761 | (define-syntax-rule (write-byte-to-stdout data ptr) 1762 | (write-byte (vector-ref data ptr) (current-output-port))) 1763 | 1764 | ;; read a byte from stdin into the data pointer 1765 | (define-syntax-rule (read-byte-from-stdin data ptr) 1766 | (vector-set! data ptr 1767 | (let ([a-value (read-byte (current-input-port))]) 1768 | (if (eof-object? a-value) 1769 | 0 1770 | a-value)))) 1771 | 1772 | ;; we know how to do loops! 1773 | (define-syntax-rule (loop data ptr body ...) 1774 | (let loop () 1775 | (unless (= (vector-ref data ptr) 1776 | 0) 1777 | body ... 1778 | (loop)))) 1779 | }|} 1780 | 1781 | 1782 | 1783 | @filebox["language.rkt"]{ 1784 | @codeblock|{ 1785 | #lang racket 1786 | 1787 | (require "semantics.rkt" 1788 | racket/stxparam) 1789 | 1790 | (provide greater-than 1791 | less-than 1792 | plus 1793 | minus 1794 | period 1795 | comma 1796 | brackets 1797 | (rename-out [my-module-begin #%module-begin])) 1798 | 1799 | ;; The current-data and current-ptr are syntax parameters used by the 1800 | ;; rest of this language. 1801 | (define-syntax-parameter current-data #f) 1802 | (define-syntax-parameter current-ptr #f) 1803 | 1804 | ;; Every module in this language will make sure that it 1805 | ;; uses a fresh state. 1806 | (define-syntax-rule (my-module-begin body ...) 1807 | (#%plain-module-begin 1808 | (let-values ([(fresh-data fresh-ptr) (new-state)]) 1809 | (syntax-parameterize 1810 | ([current-data 1811 | (make-rename-transformer #'fresh-data)] 1812 | [current-ptr 1813 | (make-rename-transformer #'fresh-ptr)]) 1814 | body ...)))) 1815 | 1816 | (define-syntax (greater-than stx) 1817 | (syntax-case stx () 1818 | [(_) 1819 | (quasisyntax/loc stx 1820 | (increment-ptr current-data current-ptr 1821 | (srcloc '#,(syntax-source stx) 1822 | '#,(syntax-line stx) 1823 | '#,(syntax-column stx) 1824 | '#,(syntax-position stx) 1825 | '#,(syntax-span stx))))])) 1826 | 1827 | (define-syntax (less-than stx) 1828 | (syntax-case stx () 1829 | [(_) 1830 | (quasisyntax/loc stx 1831 | (decrement-ptr current-data current-ptr 1832 | (srcloc '#,(syntax-source stx) 1833 | '#,(syntax-line stx) 1834 | '#,(syntax-column stx) 1835 | '#,(syntax-position stx) 1836 | '#,(syntax-span stx))))])) 1837 | 1838 | (define-syntax-rule (plus) 1839 | (increment-byte current-data current-ptr)) 1840 | 1841 | (define-syntax-rule (minus) 1842 | (decrement-byte current-data current-ptr)) 1843 | 1844 | (define-syntax-rule (period) 1845 | (write-byte-to-stdout current-data current-ptr)) 1846 | 1847 | (define-syntax-rule (comma) 1848 | (read-byte-from-stdin current-data current-ptr)) 1849 | 1850 | (define-syntax-rule (brackets body ...) 1851 | (loop current-data current-ptr body ...)) 1852 | }|} 1853 | 1854 | 1855 | 1856 | 1857 | And if we try running the following grumpy-looking program, 1858 | @verbatim|{ 1859 | #lang planet dyoo/bf 1860 | 1861 | /人◕ ‿‿ ◕人\ 1862 | 1863 | *********** 1864 | * * 1865 | * o>