├── .gitignore ├── project ├── build.properties └── build │ └── LetsBuildACompiler.scala ├── src ├── test │ └── scala │ │ └── CradleSpec.scala └── main │ └── scala │ └── Cradle.scala ├── README.textile └── reference └── crenshaw-txt ├── readme.txt ├── tutor1.txt ├── tutor8.txt ├── tutor4.txt ├── tutor9.txt ├── tutor2.txt ├── tutor12.txt └── tutor3.txt /.gitignore: -------------------------------------------------------------------------------- 1 | lib_managed 2 | project/boot 3 | target -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | #Project properties 2 | #Sat Nov 28 17:14:14 PST 2009 3 | project.organization=Alex Payne 4 | project.name=Let's Build A Compiler 5 | sbt.version=0.5.6 6 | project.version=1.0 7 | scala.version=2.7.7 8 | project.initialize=false 9 | -------------------------------------------------------------------------------- /project/build/LetsBuildACompiler.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | class LetsBuildACompilerProject(info: ProjectInfo) extends DefaultProject(info) { 4 | // repositories 5 | val scalaTools = "Scala-Tools" at "http://scala-tools.org/repo-releases" 6 | 7 | // dependencies 8 | val specs = "org.scala-tools.testing" % "specs" % "1.6.1" 9 | } -------------------------------------------------------------------------------- /src/test/scala/CradleSpec.scala: -------------------------------------------------------------------------------- 1 | package net.al3x.letsbuildacompiler.test 2 | 3 | import org.specs._ 4 | import scala.io.Source 5 | 6 | object CradleSpec extends Specification { 7 | "the stupid cradle" should { 8 | var cradle = null 9 | 10 | "emit assembly for a single number input" in { 11 | val input = Source.fromString("1") 12 | val cradle = new Cradle(input) 13 | cradle.init 14 | cradle.expression mustEqual "\tMOVE #1,D0" 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /README.textile: -------------------------------------------------------------------------------- 1 | h1. Let's Build A Compiler! 2 | 3 | h2. Introduction 4 | 5 | This an implementation of a simple compiler, built roughly in accordance with a "tutorial by Crenshaw, 1988":http://compilers.iecc.com/crenshaw/. An ASCII copy of the tutorial is included in the 'reference' directory. 6 | 7 | Crenshaw implements his compiler in a particular edition of Pascal, long since obsolete. I'll attempt to implement mine in Scala (2.7.7). 8 | 9 | h2. Progress 10 | 11 | * 2009Nov28 - 12 | 13 | h2. Authors 14 | 15 | "Alex Payne":http://al3x.net/ (at the behest and encouragement of "Steve Jenson":http://saladwithsteve.com/). -------------------------------------------------------------------------------- /src/main/scala/Cradle.scala: -------------------------------------------------------------------------------- 1 | package net.al3x.letsbuildacompiler 2 | 3 | import scala.io.Source 4 | 5 | 6 | class Cradle(input: Source) { 7 | var look: Char = ' ' 8 | 9 | // Read New Character From Input Stream 10 | def getChar: Char = { 11 | look = input.next 12 | look 13 | } 14 | 15 | // Report an Error 16 | // throw new TypeOfException("error message") 17 | 18 | // Report Error and Halt 19 | // throw new TypeOfException("error message"); exit(-1) 20 | 21 | // Report What Was Expected 22 | def expected(s: String) = { 23 | println("expected " + s) 24 | System.exit(-1) 25 | } 26 | 27 | def expected(c: Char) = { 28 | println("expected " + c.toString) 29 | System.exit(-1) 30 | } 31 | 32 | // Match a Specific Input Character 33 | def matchChar(c: Char) = { 34 | if (look == c) { 35 | getChar 36 | } else { 37 | expected(c) 38 | } 39 | } 40 | 41 | // Recognize an Alpha Character 42 | // char.isLetter 43 | 44 | // Recognize a Decimal Digit 45 | // char.isDigit 46 | 47 | // Get an Identifier 48 | def getName: Char = { 49 | if (!look.isLetter) { 50 | expected("Name") 51 | } else { 52 | return look 53 | } 54 | getChar 55 | } 56 | 57 | // Get a Number 58 | def getNum: Char = { 59 | if (!look.isDigit) { 60 | expected("Integer") 61 | } else { 62 | return look 63 | } 64 | getChar 65 | } 66 | 67 | // Output a String with Tab 68 | def emit(s: String) = "\t" + s 69 | 70 | // Output a String with Tab and CRLF 71 | def emitLn(s: String) = emit(s) 72 | 73 | // Parse and Translate a Math Expression 74 | def expression = { 75 | emitLn("MOVE #" + getNum + ",D0") 76 | } 77 | 78 | // Initialize 79 | def init = getChar 80 | } 81 | 82 | 83 | object Cradle { 84 | def main(args: Array[String]) { 85 | val c = new Cradle(Source.fromInputStream(System.in)) 86 | c.init 87 | c.expression 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /reference/crenshaw-txt/readme.txt: -------------------------------------------------------------------------------- 1 | TUTOR.ZIP 2 | 3 | This file contains all of the installments of Jack Crenshaw's 4 | tutorial on compiler construction, including the new Installment 15. 5 | The intended audience is those folks who are not computer scientists, 6 | but who enjoy computing and have always wanted to know how compilers 7 | work. A lot of compiler theory has been left out, but the practical 8 | issues are covered. By the time you have completed the series, you 9 | should be able to design and build your own working compiler. It will 10 | not be the world's best, nor will it put out incredibly tight code. 11 | Your product will probably never put Borland or MicroSoft out of 12 | business. But it will work, and it will be yours. 13 | 14 | A word about the file format: The files were originally created using 15 | Borland's DOS editor, Sprint. Sprint could write to a text file only 16 | if you formatted the file to go to the selected printer. I used the 17 | most common printer I could think of, the Epson MX-80, but even then 18 | the files ended up with printer control sequences at the beginning 19 | and end of each page. 20 | 21 | To bring the files up to date and get myself positioned to continue 22 | the series, I recently (1994) converted all the files to work with 23 | Microsoft Word for Windows. Unlike Sprint, Word allows you to write 24 | the file as a DOS text file. Unfortunately, this gave me a new 25 | problem, because when Word is writing to a text file, it doesn't 26 | write hard page breaks or page numbers. In other words, in six years 27 | we've gone from a file with page breaks and page numbers, but 28 | embedded escape sequences, to files with no embedded escape sequences 29 | but no page breaks or page numbers. Isn't progress wonderful? 30 | 31 | Of course, it's possible for me to insert the page numbers as 32 | straight text, rather than asking the editor to do it for me. But 33 | since Word won't allow me to write page breaks to the file, we would 34 | end up with files with page numbers that may or may not fall at the 35 | ends of the pages, depending on your editor and your printer. It 36 | seems to me that almost every file I've ever downloaded from 37 | CompuServe or BBS's that had such page numbering was incompatible 38 | with my printer, and gave me pages that were one line short or one 39 | line long, with the page numbers consequently walking up the page. 40 | 41 | So perhaps this new format is, after all, the safest one for general 42 | distribution. The files as they exist will look just fine if read 43 | into any text editor capable of reading DOS text files. Since most 44 | editors these days include rather sophisticated word processing 45 | capabilities, you should be able to get your editor to paginate for 46 | you, prior to printing. 47 | 48 | I hope you like the tutorials. Much thought went into them. 49 | 50 | 51 | Jack W. Crenshaw 52 | 53 | CompuServe 72325,1327 54 |  -------------------------------------------------------------------------------- /reference/crenshaw-txt/tutor1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | LET'S BUILD A COMPILER! 29 | 30 | By 31 | 32 | Jack W. Crenshaw, Ph.D. 33 | 34 | 24 July 1988 35 | 36 | 37 | Part I: INTRODUCTION 38 | 39 | 40 | ***************************************************************** 41 | * * 42 | * COPYRIGHT NOTICE * 43 | * * 44 | * Copyright (C) 1988 Jack W. Crenshaw. All rights reserved. * 45 | * * 46 | ***************************************************************** 47 | 48 | 49 | INTRODUCTION 50 | 51 | 52 | This series of articles is a tutorial on the theory and practice 53 | of developing language parsers and compilers. Before we are 54 | finished, we will have covered every aspect of compiler 55 | construction, designed a new programming language, and built a 56 | working compiler. 57 | 58 | Though I am not a computer scientist by education (my Ph.D. is in 59 | a different field, Physics), I have been interested in compilers 60 | for many years. I have bought and tried to digest the contents 61 | of virtually every book on the subject ever written. I don't 62 | mind telling you that it was slow going. Compiler texts are 63 | written for Computer Science majors, and are tough sledding for 64 | the rest of us. But over the years a bit of it began to seep in. 65 | What really caused it to jell was when I began to branch off on 66 | my own and begin to try things on my own computer. Now I plan to 67 | share with you what I have learned. At the end of this series 68 | you will by no means be a computer scientist, nor will you know 69 | all the esoterics of compiler theory. I intend to completely 70 | ignore the more theoretical aspects of the subject. What you 71 | _WILL_ know is all the practical aspects that one needs to know 72 | to build a working system. 73 | 74 | This is a "learn-by-doing" series. In the course of the series I 75 | will be performing experiments on a computer. You will be 76 | expected to follow along, repeating the experiments that I do, 77 | and performing some on your own. I will be using Turbo Pascal 78 | 4.0 on a PC clone. I will periodically insert examples written 79 | in TP. These will be executable code, which you will be expected 80 | to copy into your own computer and run. If you don't have a copy 81 | of Turbo, you will be severely limited in how well you will be 82 | able to follow what's going on. If you don't have a copy, I urge 83 | you to get one. After all, it's an excellent product, good for 84 | many other uses! 85 | 86 | Some articles on compilers show you examples, or show you (as in 87 | the case of Small-C) a finished product, which you can then copy 88 | and use without a whole lot of understanding of how it works. I 89 | hope to do much more than that. I hope to teach you HOW the 90 | things get done, so that you can go off on your own and not only 91 | reproduce what I have done, but improve on it. 92 | 93 | This is admittedly an ambitious undertaking, and it won't be done 94 | in one page. I expect to do it in the course of a number of 95 | articles. Each article will cover a single aspect of compiler 96 | theory, and will pretty much stand alone. If all you're 97 | interested in at a given time is one aspect, then you need to 98 | look only at that one article. Each article will be uploaded as 99 | it is complete, so you will have to wait for the last one before 100 | you can consider yourself finished. Please be patient. 101 | 102 | 103 | 104 | The average text on compiler theory covers a lot of ground that 105 | we won't be covering here. The typical sequence is: 106 | 107 | o An introductory chapter describing what a compiler is. 108 | 109 | o A chapter or two on syntax equations, using Backus-Naur Form 110 | (BNF). 111 | 112 | o A chapter or two on lexical scanning, with emphasis on 113 | deterministic and non-deterministic finite automata. 114 | 115 | o Several chapters on parsing theory, beginning with top-down 116 | recursive descent, and ending with LALR parsers. 117 | 118 | o A chapter on intermediate languages, with emphasis on P-code 119 | and similar reverse polish representations. 120 | 121 | o Many chapters on alternative ways to handle subroutines and 122 | parameter passing, type declarations, and such. 123 | 124 | o A chapter toward the end on code generation, usually for some 125 | imaginary CPU with a simple instruction set. Most readers 126 | (and in fact, most college classes) never make it this far. 127 | 128 | o A final chapter or two on optimization. This chapter often 129 | goes unread, too. 130 | 131 | 132 | I'll be taking a much different approach in this series. To 133 | begin with, I won't dwell long on options. I'll be giving you 134 | _A_ way that works. If you want to explore options, well and 135 | good ... I encourage you to do so ... but I'll be sticking to 136 | what I know. I also will skip over most of the theory that puts 137 | people to sleep. Don't get me wrong: I don't belittle the 138 | theory, and it's vitally important when it comes to dealing with 139 | the more tricky parts of a given language. But I believe in 140 | putting first things first. Here we'll be dealing with the 95% 141 | of compiler techniques that don't need a lot of theory to handle. 142 | 143 | I also will discuss only one approach to parsing: top-down, 144 | recursive descent parsing, which is the _ONLY_ technique that's 145 | at all amenable to hand-crafting a compiler. The other 146 | approaches are only useful if you have a tool like YACC, and also 147 | don't care how much memory space the final product uses. 148 | 149 | I also take a page from the work of Ron Cain, the author of the 150 | original Small C. Whereas almost all other compiler authors have 151 | historically used an intermediate language like P-code and 152 | divided the compiler into two parts (a front end that produces 153 | P-code, and a back end that processes P-code to produce 154 | executable object code), Ron showed us that it is a 155 | straightforward matter to make a compiler directly produce 156 | executable object code, in the form of assembler language 157 | statements. The code will _NOT_ be the world's tightest code ... 158 | producing optimized code is a much more difficult job. But it 159 | will work, and work reasonably well. Just so that I don't leave 160 | you with the impression that our end product will be worthless, I 161 | _DO_ intend to show you how to "soup up" the compiler with some 162 | optimization. 163 | 164 | 165 | 166 | Finally, I'll be using some tricks that I've found to be most 167 | helpful in letting me understand what's going on without wading 168 | through a lot of boiler plate. Chief among these is the use of 169 | single-character tokens, with no embedded spaces, for the early 170 | design work. I figure that if I can get a parser to recognize 171 | and deal with I-T-L, I can get it to do the same with IF-THEN- 172 | ELSE. And I can. In the second "lesson," I'll show you just 173 | how easy it is to extend a simple parser to handle tokens of 174 | arbitrary length. As another trick, I completely ignore file 175 | I/O, figuring that if I can read source from the keyboard and 176 | output object to the screen, I can also do it from/to disk files. 177 | Experience has proven that once a translator is working 178 | correctly, it's a straightforward matter to redirect the I/O to 179 | files. The last trick is that I make no attempt to do error 180 | correction/recovery. The programs we'll be building will 181 | RECOGNIZE errors, and will not CRASH, but they will simply stop 182 | on the first error ... just like good ol' Turbo does. There will 183 | be other tricks that you'll see as you go. Most of them can't be 184 | found in any compiler textbook, but they work. 185 | 186 | A word about style and efficiency. As you will see, I tend to 187 | write programs in _VERY_ small, easily understood pieces. None 188 | of the procedures we'll be working with will be more than about 189 | 15-20 lines long. I'm a fervent devotee of the KISS (Keep It 190 | Simple, Sidney) school of software development. I try to never 191 | do something tricky or complex, when something simple will do. 192 | Inefficient? Perhaps, but you'll like the results. As Brian 193 | Kernighan has said, FIRST make it run, THEN make it run fast. 194 | If, later on, you want to go back and tighten up the code in one 195 | of our products, you'll be able to do so, since the code will be 196 | quite understandable. If you do so, however, I urge you to wait 197 | until the program is doing everything you want it to. 198 | 199 | I also have a tendency to delay building a module until I 200 | discover that I need it. Trying to anticipate every possible 201 | future contingency can drive you crazy, and you'll generally 202 | guess wrong anyway. In this modern day of screen editors and 203 | fast compilers, I don't hesitate to change a module when I feel I 204 | need a more powerful one. Until then, I'll write only what I 205 | need. 206 | 207 | One final caveat: One of the principles we'll be sticking to here 208 | is that we don't fool around with P-code or imaginary CPUs, but 209 | that we will start out on day one producing working, executable 210 | object code, at least in the form of assembler language source. 211 | However, you may not like my choice of assembler language ... 212 | it's 68000 code, which is what works on my system (under SK*DOS). 213 | I think you'll find, though, that the translation to any other 214 | CPU such as the 80x86 will be quite obvious, though, so I don't 215 | see a problem here. In fact, I hope someone out there who knows 216 | the '86 language better than I do will offer us the equivalent 217 | object code fragments as we need them. 218 | 219 | 220 | THE CRADLE 221 | 222 | Every program needs some boiler plate ... I/O routines, error 223 | message routines, etc. The programs we develop here will be no 224 | exceptions. I've tried to hold this stuff to an absolute 225 | minimum, however, so that we can concentrate on the important 226 | stuff without losing it among the trees. The code given below 227 | represents about the minimum that we need to get anything done. 228 | It consists of some I/O routines, an error-handling routine and a 229 | skeleton, null main program. I call it our cradle. As we 230 | develop other routines, we'll add them to the cradle, and add the 231 | calls to them as we need to. Make a copy of the cradle and save 232 | it, because we'll be using it more than once. 233 | 234 | There are many different ways to organize the scanning activities 235 | of a parser. In Unix systems, authors tend to use getc and 236 | ungetc. I've had very good luck with the approach shown here, 237 | which is to use a single, global, lookahead character. Part of 238 | the initialization procedure (the only part, so far!) serves to 239 | "prime the pump" by reading the first character from the input 240 | stream. No other special techniques are required with Turbo 4.0 241 | ... each successive call to GetChar will read the next character 242 | in the stream. 243 | 244 | 245 | {--------------------------------------------------------------} 246 | program Cradle; 247 | 248 | {--------------------------------------------------------------} 249 | { Constant Declarations } 250 | 251 | const TAB = ^I; 252 | 253 | {--------------------------------------------------------------} 254 | { Variable Declarations } 255 | 256 | var Look: char; { Lookahead Character } 257 | 258 | {--------------------------------------------------------------} 259 | { Read New Character From Input Stream } 260 | 261 | procedure GetChar; 262 | begin 263 | Read(Look); 264 | end; 265 | 266 | {--------------------------------------------------------------} 267 | { Report an Error } 268 | 269 | procedure Error(s: string); 270 | begin 271 | WriteLn; 272 | WriteLn(^G, 'Error: ', s, '.'); 273 | end; 274 | 275 | 276 | {--------------------------------------------------------------} 277 | { Report Error and Halt } 278 | 279 | procedure Abort(s: string); 280 | begin 281 | Error(s); 282 | Halt; 283 | end; 284 | 285 | 286 | {--------------------------------------------------------------} 287 | { Report What Was Expected } 288 | 289 | procedure Expected(s: string); 290 | begin 291 | Abort(s + ' Expected'); 292 | end; 293 | 294 | {--------------------------------------------------------------} 295 | { Match a Specific Input Character } 296 | 297 | procedure Match(x: char); 298 | begin 299 | if Look = x then GetChar 300 | else Expected('''' + x + ''''); 301 | end; 302 | 303 | 304 | {--------------------------------------------------------------} 305 | { Recognize an Alpha Character } 306 | 307 | function IsAlpha(c: char): boolean; 308 | begin 309 | IsAlpha := upcase(c) in ['A'..'Z']; 310 | end; 311 | 312 | 313 | {--------------------------------------------------------------} 314 | 315 | { Recognize a Decimal Digit } 316 | 317 | function IsDigit(c: char): boolean; 318 | begin 319 | IsDigit := c in ['0'..'9']; 320 | end; 321 | 322 | 323 | {--------------------------------------------------------------} 324 | { Get an Identifier } 325 | 326 | function GetName: char; 327 | begin 328 | if not IsAlpha(Look) then Expected('Name'); 329 | GetName := UpCase(Look); 330 | GetChar; 331 | end; 332 | 333 | 334 | {--------------------------------------------------------------} 335 | { Get a Number } 336 | 337 | function GetNum: char; 338 | begin 339 | if not IsDigit(Look) then Expected('Integer'); 340 | GetNum := Look; 341 | GetChar; 342 | end; 343 | 344 | 345 | {--------------------------------------------------------------} 346 | { Output a String with Tab } 347 | 348 | procedure Emit(s: string); 349 | begin 350 | Write(TAB, s); 351 | end; 352 | 353 | 354 | 355 | 356 | {--------------------------------------------------------------} 357 | { Output a String with Tab and CRLF } 358 | 359 | procedure EmitLn(s: string); 360 | begin 361 | Emit(s); 362 | WriteLn; 363 | end; 364 | 365 | {--------------------------------------------------------------} 366 | { Initialize } 367 | 368 | procedure Init; 369 | begin 370 | GetChar; 371 | end; 372 | 373 | 374 | {--------------------------------------------------------------} 375 | { Main Program } 376 | 377 | begin 378 | Init; 379 | end. 380 | {--------------------------------------------------------------} 381 | 382 | 383 | That's it for this introduction. Copy the code above into TP and 384 | compile it. Make sure that it compiles and runs correctly. Then 385 | proceed to the first lesson, which is on expression parsing. 386 | 387 | 388 | ***************************************************************** 389 | * * 390 | * COPYRIGHT NOTICE * 391 | * * 392 | * Copyright (C) 1988 Jack W. Crenshaw. All rights reserved. * 393 | * * 394 | ***************************************************************** 395 | 396 | 397 | 398 | 399 | -------------------------------------------------------------------------------- /reference/crenshaw-txt/tutor8.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | LET'S BUILD A COMPILER! 30 | 31 | By 32 | 33 | Jack W. Crenshaw, Ph.D. 34 | 35 | 2 April 1989 36 | 37 | 38 | Part VIII: A LITTLE PHILOSOPHY 39 | 40 | 41 | ***************************************************************** 42 | * * 43 | * COPYRIGHT NOTICE * 44 | * * 45 | * Copyright (C) 1989 Jack W. Crenshaw. All rights reserved. * 46 | * * 47 | ***************************************************************** 48 | 49 | 50 | INTRODUCTION 51 | 52 | This is going to be a different kind of session than the others 53 | in our series on parsing and compiler construction. For this 54 | session, there won't be any experiments to do or code to write. 55 | This once, I'd like to just talk with you for a while. 56 | Mercifully, it will be a short session, and then we can take up 57 | where we left off, hopefully with renewed vigor. 58 | 59 | When I was in college, I found that I could always follow a 60 | prof's lecture a lot better if I knew where he was going with it. 61 | I'll bet you were the same. 62 | 63 | So I thought maybe it's about time I told you where we're going 64 | with this series: what's coming up in future installments, and in 65 | general what all this is about. I'll also share some general 66 | thoughts concerning the usefulness of what we've been doing. 67 | 68 | 69 | THE ROAD HOME 70 | 71 | So far, we've covered the parsing and translation of arithmetic 72 | expressions, Boolean expressions, and combinations connected by 73 | relational operators. We've also done the same for control 74 | constructs. In all of this we've leaned heavily on the use of 75 | top-down, recursive descent parsing, BNF definitions of the 76 | syntax, and direct generation of assembly-language code. We also 77 | learned the value of such tricks as single-character tokens to 78 | help us see the forest through the trees. In the last 79 | installment we dealt with lexical scanning, and I showed you 80 | simple but powerful ways to remove the single-character barriers. 81 | 82 | Throughout the whole study, I've emphasized the KISS philosophy 83 | ... Keep It Simple, Sidney ... and I hope by now you've realized 84 | just how simple this stuff can really be. While there are for 85 | sure areas of compiler theory that are truly intimidating, the 86 | ultimate message of this series is that in practice you can just 87 | politely sidestep many of these areas. If the language 88 | definition cooperates or, as in this series, if you can define 89 | the language as you go, it's possible to write down the language 90 | definition in BNF with reasonable ease. And, as we've seen, you 91 | can crank out parse procedures from the BNF just about as fast as 92 | you can type. 93 | 94 | As our compiler has taken form, it's gotten more parts, but each 95 | part is quite small and simple, and very much like all the 96 | others. 97 | 98 | At this point, we have many of the makings of a real, practical 99 | compiler. As a matter of fact, we already have all we need to 100 | build a toy compiler for a language as powerful as, say, Tiny 101 | BASIC. In the next couple of installments, we'll go ahead and 102 | define that language. 103 | 104 | To round out the series, we still have a few items to cover. 105 | These include: 106 | 107 | o Procedure calls, with and without parameters 108 | 109 | o Local and global variables 110 | 111 | o Basic types, such as character and integer types 112 | 113 | o Arrays 114 | 115 | o Strings 116 | 117 | o User-defined types and structures 118 | 119 | o Tree-structured parsers and intermediate languages 120 | 121 | o Optimization 122 | 123 | These will all be covered in future installments. When we're 124 | finished, you'll have all the tools you need to design and build 125 | your own languages, and the compilers to translate them. 126 | 127 | I can't design those languages for you, but I can make some 128 | comments and recommendations. I've already sprinkled some 129 | throughout past installments. You've seen, for example, the 130 | control constructs I prefer. 131 | 132 | These constructs are going to be part of the languages I build. 133 | I have three languages in mind at this point, two of which you 134 | will see in installments to come: 135 | 136 | TINY - A minimal, but usable language on the order of Tiny 137 | BASIC or Tiny C. It won't be very practical, but it will 138 | have enough power to let you write and run real programs 139 | that do something worthwhile. 140 | 141 | KISS - The language I'm building for my own use. KISS is 142 | intended to be a systems programming language. It won't 143 | have strong typing or fancy data structures, but it will 144 | support most of the things I want to do with a higher- 145 | order language (HOL), except perhaps writing compilers. 146 | 147 | I've also been toying for years with the idea of a HOL-like 148 | assembler, with structured control constructs and HOL-like 149 | assignment statements. That, in fact, was the impetus behind my 150 | original foray into the jungles of compiler theory. This one may 151 | never be built, simply because I've learned that it's actually 152 | easier to implement a language like KISS, that only uses a subset 153 | of the CPU instructions. As you know, assembly language can be 154 | bizarre and irregular in the extreme, and a language that maps 155 | one-for-one onto it can be a real challenge. Still, I've always 156 | felt that the syntax used in conventional assemblers is dumb ... 157 | why is 158 | 159 | MOVE.L A,B 160 | 161 | better, or easier to translate, than 162 | 163 | B=A ? 164 | 165 | I think it would be an interesting exercise to develop a 166 | "compiler" that would give the programmer complete access to and 167 | control over the full complement of the CPU instruction set, and 168 | would allow you to generate programs as efficient as assembly 169 | language, without the pain of learning a set of mnemonics. Can 170 | it be done? I don't know. The real question may be, "Will the 171 | resulting language be any easier to write than assembly"? If 172 | not, there's no point in it. I think that it can be done, but 173 | I'm not completely sure yet how the syntax should look. 174 | 175 | Perhaps you have some comments or suggestions on this one. I'd 176 | love to hear them. 177 | 178 | You probably won't be surprised to learn that I've already worked 179 | ahead in most of the areas that we will cover. I have some good 180 | news: Things never get much harder than they've been so far. 181 | It's possible to build a complete, working compiler for a real 182 | language, using nothing but the same kinds of techniques you've 183 | learned so far. And THAT brings up some interesting questions. 184 | 185 | 186 | WHY IS IT SO SIMPLE? 187 | 188 | Before embarking on this series, I always thought that compilers 189 | were just naturally complex computer programs ... the ultimate 190 | challenge. Yet the things we have done here have usually turned 191 | out to be quite simple, sometimes even trivial. 192 | 193 | For awhile, I thought is was simply because I hadn't yet gotten 194 | into the meat of the subject. I had only covered the simple 195 | parts. I will freely admit to you that, even when I began the 196 | series, I wasn't sure how far we would be able to go before 197 | things got too complex to deal with in the ways we have so far. 198 | But at this point I've already been down the road far enough to 199 | see the end of it. Guess what? 200 | 201 | 202 | THERE ARE NO HARD PARTS! 203 | 204 | 205 | Then, I thought maybe it was because we were not generating very 206 | good object code. Those of you who have been following the 207 | series and trying sample compiles know that, while the code works 208 | and is rather foolproof, its efficiency is pretty awful. I 209 | figured that if we were concentrating on turning out tight code, 210 | we would soon find all that missing complexity. 211 | 212 | To some extent, that one is true. In particular, my first few 213 | efforts at trying to improve efficiency introduced complexity at 214 | an alarming rate. But since then I've been tinkering around with 215 | some simple optimizations and I've found some that result in very 216 | respectable code quality, WITHOUT adding a lot of complexity. 217 | 218 | Finally, I thought that perhaps the saving grace was the "toy 219 | compiler" nature of the study. I have made no pretense that we 220 | were ever going to be able to build a compiler to compete with 221 | Borland and Microsoft. And yet, again, as I get deeper into this 222 | thing the differences are starting to fade away. 223 | 224 | Just to make sure you get the message here, let me state it flat 225 | out: 226 | 227 | USING THE TECHNIQUES WE'VE USED HERE, IT IS POSSIBLE TO 228 | BUILD A PRODUCTION-QUALITY, WORKING COMPILER WITHOUT ADDING 229 | A LOT OF COMPLEXITY TO WHAT WE'VE ALREADY DONE. 230 | 231 | 232 | Since the series began I've received some comments from you. 233 | Most of them echo my own thoughts: "This is easy! Why do the 234 | textbooks make it seem so hard?" Good question. 235 | 236 | Recently, I've gone back and looked at some of those texts again, 237 | and even bought and read some new ones. Each time, I come away 238 | with the same feeling: These guys have made it seem too hard. 239 | 240 | What's going on here? Why does the whole thing seem difficult in 241 | the texts, but easy to us? Are we that much smarter than Aho, 242 | Ullman, Brinch Hansen, and all the rest? 243 | 244 | Hardly. But we are doing some things differently, and more and 245 | more I'm starting to appreciate the value of our approach, and 246 | the way that it simplifies things. Aside from the obvious 247 | shortcuts that I outlined in Part I, like single-character tokens 248 | and console I/O, we have made some implicit assumptions and done 249 | some things differently from those who have designed compilers in 250 | the past. As it turns out, our approach makes life a lot easier. 251 | 252 | So why didn't all those other guys use it? 253 | 254 | You have to remember the context of some of the earlier compiler 255 | development. These people were working with very small computers 256 | of limited capacity. Memory was very limited, the CPU 257 | instruction set was minimal, and programs ran in batch mode 258 | rather than interactively. As it turns out, these caused some 259 | key design decisions that have really complicated the designs. 260 | Until recently, I hadn't realized how much of classical compiler 261 | design was driven by the available hardware. 262 | 263 | Even in cases where these limitations no longer apply, people 264 | have tended to structure their programs in the same way, since 265 | that is the way they were taught to do it. 266 | 267 | In our case, we have started with a blank sheet of paper. There 268 | is a danger there, of course, that you will end up falling into 269 | traps that other people have long since learned to avoid. But it 270 | also has allowed us to take different approaches that, partly by 271 | design and partly by pure dumb luck, have allowed us to gain 272 | simplicity. 273 | 274 | Here are the areas that I think have led to complexity in the 275 | past: 276 | 277 | o Limited RAM Forcing Multiple Passes 278 | 279 | I just read "Brinch Hansen on Pascal Compilers" (an 280 | excellent book, BTW). He developed a Pascal compiler for a 281 | PC, but he started the effort in 1981 with a 64K system, and 282 | so almost every design decision he made was aimed at making 283 | the compiler fit into RAM. To do this, his compiler has 284 | three passes, one of which is the lexical scanner. There is 285 | no way he could, for example, use the distributed scanner I 286 | introduced in the last installment, because the program 287 | structure wouldn't allow it. He also required not one but 288 | two intermediate languages, to provide the communication 289 | between phases. 290 | 291 | All the early compiler writers had to deal with this issue: 292 | Break the compiler up into enough parts so that it will fit 293 | in memory. When you have multiple passes, you need to add 294 | data structures to support the information that each pass 295 | leaves behind for the next. That adds complexity, and ends 296 | up driving the design. Lee's book, "The Anatomy of a 297 | Compiler," mentions a FORTRAN compiler developed for an IBM 298 | 1401. It had no fewer than 63 separate passes! Needless to 299 | say, in a compiler like this the separation into phases 300 | would dominate the design. 301 | 302 | Even in situations where RAM is plentiful, people have 303 | tended to use the same techniques because that is what 304 | they're familiar with. It wasn't until Turbo Pascal came 305 | along that we found how simple a compiler could be if you 306 | started with different assumptions. 307 | 308 | 309 | o Batch Processing 310 | 311 | In the early days, batch processing was the only choice ... 312 | there was no interactive computing. Even today, compilers 313 | run in essentially batch mode. 314 | 315 | In a mainframe compiler as well as many micro compilers, 316 | considerable effort is expended on error recovery ... it can 317 | consume as much as 30-40% of the compiler and completely 318 | drive the design. The idea is to avoid halting on the first 319 | error, but rather to keep going at all costs, so that you 320 | can tell the programmer about as many errors in the whole 321 | program as possible. 322 | 323 | All of that harks back to the days of the early mainframes, 324 | where turnaround time was measured in hours or days, and it 325 | was important to squeeze every last ounce of information out 326 | of each run. 327 | 328 | In this series, I've been very careful to avoid the issue of 329 | error recovery, and instead our compiler simply halts with 330 | an error message on the first error. I will frankly admit 331 | that it was mostly because I wanted to take the easy way out 332 | and keep things simple. But this approach, pioneered by 333 | Borland in Turbo Pascal, also has a lot going for it anyway. 334 | Aside from keeping the compiler simple, it also fits very 335 | well with the idea of an interactive system. When 336 | compilation is fast, and especially when you have an editor 337 | such as Borland's that will take you right to the point of 338 | the error, then it makes a lot of sense to stop there, and 339 | just restart the compilation after the error is fixed. 340 | 341 | 342 | o Large Programs 343 | 344 | Early compilers were designed to handle large programs ... 345 | essentially infinite ones. In those days there was little 346 | choice; the idea of subroutine libraries and separate 347 | compilation were still in the future. Again, this 348 | assumption led to multi-pass designs and intermediate files 349 | to hold the results of partial processing. 350 | 351 | Brinch Hansen's stated goal was that the compiler should be 352 | able to compile itself. Again, because of his limited RAM, 353 | this drove him to a multi-pass design. He needed as little 354 | resident compiler code as possible, so that the necessary 355 | tables and other data structures would fit into RAM. 356 | 357 | I haven't stated this one yet, because there hasn't been a 358 | need ... we've always just read and written the data as 359 | streams, anyway. But for the record, my plan has always 360 | been that, in a production compiler, the source and object 361 | data should all coexist in RAM with the compiler, a la the 362 | early Turbo Pascals. That's why I've been careful to keep 363 | routines like GetChar and Emit as separate routines, in 364 | spite of their small size. It will be easy to change them 365 | to read to and write from memory. 366 | 367 | 368 | o Emphasis on Efficiency 369 | 370 | John Backus has stated that, when he and his colleagues 371 | developed the original FORTRAN compiler, they KNEW that they 372 | had to make it produce tight code. In those days, there was 373 | a strong sentiment against HOLs and in favor of assembly 374 | language, and efficiency was the reason. If FORTRAN didn't 375 | produce very good code by assembly standards, the users 376 | would simply refuse to use it. For the record, that FORTRAN 377 | compiler turned out to be one of the most efficient ever 378 | built, in terms of code quality. But it WAS complex! 379 | 380 | Today, we have CPU power and RAM size to spare, so code 381 | efficiency is not so much of an issue. By studiously 382 | ignoring this issue, we have indeed been able to Keep It 383 | Simple. Ironically, though, as I have said, I have found 384 | some optimizations that we can add to the basic compiler 385 | structure, without having to add a lot of complexity. So in 386 | this case we get to have our cake and eat it too: we will 387 | end up with reasonable code quality, anyway. 388 | 389 | 390 | o Limited Instruction Sets 391 | 392 | The early computers had primitive instruction sets. Things 393 | that we take for granted, such as stack operations and 394 | indirect addressing, came only with great difficulty. 395 | 396 | Example: In most compiler designs, there is a data structure 397 | called the literal pool. The compiler typically identifies 398 | all literals used in the program, and collects them into a 399 | single data structure. All references to the literals are 400 | done indirectly to this pool. At the end of the 401 | compilation, the compiler issues commands to set aside 402 | storage and initialize the literal pool. 403 | 404 | We haven't had to address that issue at all. When we want 405 | to load a literal, we just do it, in line, as in 406 | 407 | MOVE #3,D0 408 | 409 | There is something to be said for the use of a literal pool, 410 | particularly on a machine like the 8086 where data and code 411 | can be separated. Still, the whole thing adds a fairly 412 | large amount of complexity with little in return. 413 | 414 | Of course, without the stack we would be lost. In a micro, 415 | both subroutine calls and temporary storage depend heavily 416 | on the stack, and we have used it even more than necessary 417 | to ease expression parsing. 418 | 419 | 420 | o Desire for Generality 421 | 422 | Much of the content of the typical compiler text is taken up 423 | with issues we haven't addressed here at all ... things like 424 | automated translation of grammars, or generation of LALR 425 | parse tables. This is not simply because the authors want 426 | to impress you. There are good, practical reasons why the 427 | subjects are there. 428 | 429 | We have been concentrating on the use of a recursive-descent 430 | parser to parse a deterministic grammar, i.e., a grammar 431 | that is not ambiguous and, therefore, can be parsed with one 432 | level of lookahead. I haven't made much of this limitation, 433 | but the fact is that this represents a small subset of 434 | possible grammars. In fact, there is an infinite number of 435 | grammars that we can't parse using our techniques. The LR 436 | technique is a more powerful one, and can deal with grammars 437 | that we can't. 438 | 439 | In compiler theory, it's important to know how to deal with 440 | these other grammars, and how to transform them into 441 | grammars that are easier to deal with. For example, many 442 | (but not all) ambiguous grammars can be transformed into 443 | unambiguous ones. The way to do this is not always obvious, 444 | though, and so many people have devoted years to develop 445 | ways to transform them automatically. 446 | 447 | In practice, these issues turn out to be considerably less 448 | important. Modern languages tend to be designed to be easy 449 | to parse, anyway. That was a key motivation in the design 450 | of Pascal. Sure, there are pathological grammars that you 451 | would be hard pressed to write unambiguous BNF for, but in 452 | the real world the best answer is probably to avoid those 453 | grammars! 454 | 455 | In our case, of course, we have sneakily let the language 456 | evolve as we go, so we haven't painted ourselves into any 457 | corners here. You may not always have that luxury. Still, 458 | with a little care you should be able to keep the parser 459 | simple without having to resort to automatic translation of 460 | the grammar. 461 | 462 | 463 | We have taken a vastly different approach in this series. We 464 | started with a clean sheet of paper, and developed techniques 465 | that work in the context that we are in; that is, a single-user 466 | PC with rather ample CPU power and RAM space. We have limited 467 | ourselves to reasonable grammars that are easy to parse, we have 468 | used the instruction set of the CPU to advantage, and we have not 469 | concerned ourselves with efficiency. THAT's why it's been easy. 470 | 471 | Does this mean that we are forever doomed to be able to build 472 | only toy compilers? No, I don't think so. As I've said, we can 473 | add certain optimizations without changing the compiler 474 | structure. If we want to process large files, we can always add 475 | file buffering to do that. These things do not affect the 476 | overall program design. 477 | 478 | And I think that's a key factor. By starting with small and 479 | limited cases, we have been able to concentrate on a structure 480 | for the compiler that is natural for the job. Since the 481 | structure naturally fits the job, it is almost bound to be simple 482 | and transparent. Adding capability doesn't have to change that 483 | basic structure. We can simply expand things like the file 484 | structure or add an optimization layer. I guess my feeling is 485 | that, back when resources were tight, the structures people ended 486 | up with were artificially warped to make them work under those 487 | conditions, and weren't optimum structures for the problem at 488 | hand. 489 | 490 | 491 | CONCLUSION 492 | 493 | Anyway, that's my arm-waving guess as to how we've been able to 494 | keep things simple. We started with something simple and let it 495 | evolve naturally, without trying to force it into some 496 | traditional mold. 497 | 498 | We're going to press on with this. I've given you a list of the 499 | areas we'll be covering in future installments. With those 500 | installments, you should be able to build complete, working 501 | compilers for just about any occasion, and build them simply. If 502 | you REALLY want to build production-quality compilers, you'll be 503 | able to do that, too. 504 | 505 | For those of you who are chafing at the bit for more parser code, 506 | I apologize for this digression. I just thought you'd like to 507 | have things put into perspective a bit. Next time, we'll get 508 | back to the mainstream of the tutorial. 509 | 510 | So far, we've only looked at pieces of compilers, and while we 511 | have many of the makings of a complete language, we haven't 512 | talked about how to put it all together. That will be the 513 | subject of our next two installments. Then we'll press on into 514 | the new subjects I listed at the beginning of this installment. 515 | 516 | See you then. 517 | 518 | ***************************************************************** 519 | * * 520 | * COPYRIGHT NOTICE * 521 | * * 522 | * Copyright (C) 1989 Jack W. Crenshaw. All rights reserved. * 523 | * * 524 | ***************************************************************** 525 | 526 | -------------------------------------------------------------------------------- /reference/crenshaw-txt/tutor4.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | LET'S BUILD A COMPILER! 30 | 31 | By 32 | 33 | Jack W. Crenshaw, Ph.D. 34 | 35 | 24 July 1988 36 | 37 | 38 | Part IV: INTERPRETERS 39 | 40 | 41 | ***************************************************************** 42 | * * 43 | * COPYRIGHT NOTICE * 44 | * * 45 | * Copyright (C) 1988 Jack W. Crenshaw. All rights reserved. * 46 | * * 47 | ***************************************************************** 48 | 49 | 50 | INTRODUCTION 51 | 52 | In the first three installments of this series, we've looked at 53 | parsing and compiling math expressions, and worked our way grad- 54 | ually and methodically from dealing with very simple one-term, 55 | one-character "expressions" up through more general ones, finally 56 | arriving at a very complete parser that could parse and translate 57 | complete assignment statements, with multi-character tokens, 58 | embedded white space, and function calls. This time, I'm going 59 | to walk you through the process one more time, only with the goal 60 | of interpreting rather than compiling object code. 61 | 62 | Since this is a series on compilers, why should we bother with 63 | interpreters? Simply because I want you to see how the nature of 64 | the parser changes as we change the goals. I also want to unify 65 | the concepts of the two types of translators, so that you can see 66 | not only the differences, but also the similarities. 67 | 68 | Consider the assignment statement 69 | 70 | x = 2 * y + 3 71 | 72 | In a compiler, we want the target CPU to execute this assignment 73 | at EXECUTION time. The translator itself doesn't do any arith- 74 | metic ... it only issues the object code that will cause the CPU 75 | to do it when the code is executed. For the example above, the 76 | compiler would issue code to compute the expression and store the 77 | results in variable x. 78 | 79 | For an interpreter, on the other hand, no object code is gen- 80 | erated. Instead, the arithmetic is computed immediately, as the 81 | parsing is going on. For the example, by the time parsing of the 82 | statement is complete, x will have a new value. 83 | 84 | The approach we've been taking in this whole series is called 85 | "syntax-driven translation." As you are aware by now, the struc- 86 | ture of the parser is very closely tied to the syntax of the 87 | productions we parse. We have built Pascal procedures that rec- 88 | ognize every language construct. Associated with each of these 89 | constructs (and procedures) is a corresponding "action," which 90 | does whatever makes sense to do once a construct has been 91 | recognized. In our compiler so far, every action involves 92 | emitting object code, to be executed later at execution time. In 93 | an interpreter, every action involves something to be done im- 94 | mediately. 95 | 96 | What I'd like you to see here is that the layout ... the struc- 97 | ture ... of the parser doesn't change. It's only the actions 98 | that change. So if you can write an interpreter for a given 99 | language, you can also write a compiler, and vice versa. Yet, as 100 | you will see, there ARE differences, and significant ones. 101 | Because the actions are different, the procedures that do the 102 | recognizing end up being written differently. Specifically, in 103 | the interpreter the recognizing procedures end up being coded as 104 | FUNCTIONS that return numeric values to their callers. None of 105 | the parsing routines for our compiler did that. 106 | 107 | Our compiler, in fact, is what we might call a "pure" compiler. 108 | Each time a construct is recognized, the object code is emitted 109 | IMMEDIATELY. (That's one reason the code is not very efficient.) 110 | The interpreter we'll be building here is a pure interpreter, in 111 | the sense that there is no translation, such as "tokenizing," 112 | performed on the source code. These represent the two extremes 113 | of translation. In the real world, translators are rarely so 114 | pure, but tend to have bits of each technique. 115 | 116 | I can think of several examples. I've already mentioned one: 117 | most interpreters, such as Microsoft BASIC, for example, trans- 118 | late the source code (tokenize it) into an intermediate form so 119 | that it'll be easier to parse real time. 120 | 121 | Another example is an assembler. The purpose of an assembler, of 122 | course, is to produce object code, and it normally does that on a 123 | one-to-one basis: one object instruction per line of source code. 124 | But almost every assembler also permits expressions as arguments. 125 | In this case, the expressions are always constant expressions, 126 | and so the assembler isn't supposed to issue object code for 127 | them. Rather, it "interprets" the expressions and computes the 128 | corresponding constant result, which is what it actually emits as 129 | object code. 130 | 131 | As a matter of fact, we could use a bit of that ourselves. The 132 | translator we built in the previous installment will dutifully 133 | spit out object code for complicated expressions, even though 134 | every term in the expression is a constant. In that case it 135 | would be far better if the translator behaved a bit more like an 136 | interpreter, and just computed the equivalent constant result. 137 | 138 | There is a concept in compiler theory called "lazy" translation. 139 | The idea is that you typically don't just emit code at every 140 | action. In fact, at the extreme you don't emit anything at all, 141 | until you absolutely have to. To accomplish this, the actions 142 | associated with the parsing routines typically don't just emit 143 | code. Sometimes they do, but often they simply return in- 144 | formation back to the caller. Armed with such information, the 145 | caller can then make a better choice of what to do. 146 | 147 | For example, given the statement 148 | 149 | x = x + 3 - 2 - (5 - 4) , 150 | 151 | our compiler will dutifully spit out a stream of 18 instructions 152 | to load each parameter into registers, perform the arithmetic, 153 | and store the result. A lazier evaluation would recognize that 154 | the arithmetic involving constants can be evaluated at compile 155 | time, and would reduce the expression to 156 | 157 | x = x + 0 . 158 | 159 | An even lazier evaluation would then be smart enough to figure 160 | out that this is equivalent to 161 | 162 | x = x , 163 | 164 | which calls for no action at all. We could reduce 18 in- 165 | structions to zero! 166 | 167 | Note that there is no chance of optimizing this way in our trans- 168 | lator as it stands, because every action takes place immediately. 169 | 170 | Lazy expression evaluation can produce significantly better 171 | object code than we have been able to so far. I warn you, 172 | though: it complicates the parser code considerably, because each 173 | routine now has to make decisions as to whether to emit object 174 | code or not. Lazy evaluation is certainly not named that because 175 | it's easier on the compiler writer! 176 | 177 | Since we're operating mainly on the KISS principle here, I won't 178 | go into much more depth on this subject. I just want you to be 179 | aware that you can get some code optimization by combining the 180 | techniques of compiling and interpreting. In particular, you 181 | should know that the parsing routines in a smarter translator 182 | will generally return things to their caller, and sometimes 183 | expect things as well. That's the main reason for going over 184 | interpretation in this installment. 185 | 186 | 187 | THE INTERPRETER 188 | 189 | OK, now that you know WHY we're going into all this, let's do it. 190 | Just to give you practice, we're going to start over with a bare 191 | cradle and build up the translator all over again. This time, of 192 | course, we can go a bit faster. 193 | 194 | Since we're now going to do arithmetic, the first thing we need 195 | to do is to change function GetNum, which up till now has always 196 | returned a character (or string). Now, it's better for it to 197 | return an integer. MAKE A COPY of the cradle (for goodness's 198 | sake, don't change the version in Cradle itself!!) and modify 199 | GetNum as follows: 200 | 201 | 202 | {--------------------------------------------------------------} 203 | { Get a Number } 204 | 205 | function GetNum: integer; 206 | begin 207 | if not IsDigit(Look) then Expected('Integer'); 208 | GetNum := Ord(Look) - Ord('0'); 209 | GetChar; 210 | end; 211 | {--------------------------------------------------------------} 212 | 213 | 214 | Now, write the following version of Expression: 215 | 216 | 217 | {---------------------------------------------------------------} 218 | { Parse and Translate an Expression } 219 | 220 | function Expression: integer; 221 | begin 222 | Expression := GetNum; 223 | end; 224 | {--------------------------------------------------------------} 225 | 226 | 227 | Finally, insert the statement 228 | 229 | 230 | Writeln(Expression); 231 | 232 | 233 | at the end of the main program. Now compile and test. 234 | 235 | All this program does is to "parse" and translate a single 236 | integer "expression." As always, you should make sure that it 237 | does that with the digits 0..9, and gives an error message for 238 | anything else. Shouldn't take you very long! 239 | 240 | OK, now let's extend this to include addops. Change Expression 241 | to read: 242 | 243 | 244 | {---------------------------------------------------------------} 245 | { Parse and Translate an Expression } 246 | 247 | function Expression: integer; 248 | var Value: integer; 249 | begin 250 | if IsAddop(Look) then 251 | Value := 0 252 | else 253 | Value := GetNum; 254 | while IsAddop(Look) do begin 255 | case Look of 256 | '+': begin 257 | Match('+'); 258 | Value := Value + GetNum; 259 | end; 260 | '-': begin 261 | Match('-'); 262 | Value := Value - GetNum; 263 | end; 264 | end; 265 | end; 266 | Expression := Value; 267 | end; 268 | {--------------------------------------------------------------} 269 | 270 | 271 | The structure of Expression, of course, parallels what we did 272 | before, so we shouldn't have too much trouble debugging it. 273 | There's been a SIGNIFICANT development, though, hasn't there? 274 | Procedures Add and Subtract went away! The reason is that the 275 | action to be taken requires BOTH arguments of the operation. I 276 | could have chosen to retain the procedures and pass into them the 277 | value of the expression to date, which is Value. But it seemed 278 | cleaner to me to keep Value as strictly a local variable, which 279 | meant that the code for Add and Subtract had to be moved in line. 280 | This result suggests that, while the structure we had developed 281 | was nice and clean for our simple-minded translation scheme, it 282 | probably wouldn't do for use with lazy evaluation. That's a 283 | little tidbit we'll probably want to keep in mind for later. 284 | 285 | OK, did the translator work? Then let's take the next step. 286 | It's not hard to figure out what procedure Term should now look 287 | like. Change every call to GetNum in function Expression to a 288 | call to Term, and then enter the following form for Term: 289 | 290 | 291 | 292 | 293 | {---------------------------------------------------------------} 294 | { Parse and Translate a Math Term } 295 | 296 | function Term: integer; 297 | var Value: integer; 298 | begin 299 | Value := GetNum; 300 | while Look in ['*', '/'] do begin 301 | case Look of 302 | '*': begin 303 | Match('*'); 304 | Value := Value * GetNum; 305 | end; 306 | '/': begin 307 | Match('/'); 308 | Value := Value div GetNum; 309 | end; 310 | end; 311 | end; 312 | Term := Value; 313 | end; 314 | {--------------------------------------------------------------} 315 | 316 | Now, try it out. Don't forget two things: first, we're dealing 317 | with integer division, so, for example, 1/3 should come out zero. 318 | Second, even though we can output multi-digit results, our input 319 | is still restricted to single digits. 320 | 321 | That seems like a silly restriction at this point, since we have 322 | already seen how easily function GetNum can be extended. So 323 | let's go ahead and fix it right now. The new version is 324 | 325 | 326 | {--------------------------------------------------------------} 327 | { Get a Number } 328 | 329 | function GetNum: integer; 330 | var Value: integer; 331 | begin 332 | Value := 0; 333 | if not IsDigit(Look) then Expected('Integer'); 334 | while IsDigit(Look) do begin 335 | Value := 10 * Value + Ord(Look) - Ord('0'); 336 | GetChar; 337 | end; 338 | GetNum := Value; 339 | end; 340 | {--------------------------------------------------------------} 341 | 342 | 343 | If you've compiled and tested this version of the interpreter, 344 | the next step is to install function Factor, complete with pa- 345 | renthesized expressions. We'll hold off a bit longer on the 346 | variable names. First, change the references to GetNum, in 347 | function Term, so that they call Factor instead. Now code the 348 | following version of Factor: 349 | 350 | 351 | 352 | 353 | {---------------------------------------------------------------} 354 | { Parse and Translate a Math Factor } 355 | 356 | function Expression: integer; Forward; 357 | 358 | function Factor: integer; 359 | begin 360 | if Look = '(' then begin 361 | Match('('); 362 | Factor := Expression; 363 | Match(')'); 364 | end 365 | else 366 | Factor := GetNum; 367 | end; 368 | {---------------------------------------------------------------} 369 | 370 | That was pretty easy, huh? We're rapidly closing in on a useful 371 | interpreter. 372 | 373 | 374 | A LITTLE PHILOSOPHY 375 | 376 | Before going any further, there's something I'd like to call to 377 | your attention. It's a concept that we've been making use of in 378 | all these sessions, but I haven't explicitly mentioned it up till 379 | now. I think it's time, because it's a concept so useful, and so 380 | powerful, that it makes all the difference between a parser 381 | that's trivially easy, and one that's too complex to deal with. 382 | 383 | In the early days of compiler technology, people had a terrible 384 | time figuring out how to deal with things like operator prece- 385 | dence ... the way that multiply and divide operators take 386 | precedence over add and subtract, etc. I remember a colleague of 387 | some thirty years ago, and how excited he was to find out how to 388 | do it. The technique used involved building two stacks, upon 389 | which you pushed each operator or operand. Associated with each 390 | operator was a precedence level, and the rules required that you 391 | only actually performed an operation ("reducing" the stack) if 392 | the precedence level showing on top of the stack was correct. To 393 | make life more interesting, an operator like ')' had different 394 | precedence levels, depending upon whether or not it was already 395 | on the stack. You had to give it one value before you put it on 396 | the stack, and another to decide when to take it off. Just for 397 | the experience, I worked all of this out for myself a few years 398 | ago, and I can tell you that it's very tricky. 399 | 400 | We haven't had to do anything like that. In fact, by now the 401 | parsing of an arithmetic statement should seem like child's play. 402 | How did we get so lucky? And where did the precedence stacks go? 403 | 404 | A similar thing is going on in our interpreter above. You just 405 | KNOW that in order for it to do the computation of arithmetic 406 | statements (as opposed to the parsing of them), there have to be 407 | numbers pushed onto a stack somewhere. But where is the stack? 408 | 409 | Finally, in compiler textbooks, there are a number of places 410 | where stacks and other structures are discussed. In the other 411 | leading parsing method (LR), an explicit stack is used. In fact, 412 | the technique is very much like the old way of doing arithmetic 413 | expressions. Another concept is that of a parse tree. Authors 414 | like to draw diagrams of the tokens in a statement, connected 415 | into a tree with operators at the internal nodes. Again, where 416 | are the trees and stacks in our technique? We haven't seen any. 417 | The answer in all cases is that the structures are implicit, not 418 | explicit. In any computer language, there is a stack involved 419 | every time you call a subroutine. Whenever a subroutine is 420 | called, the return address is pushed onto the CPU stack. At the 421 | end of the subroutine, the address is popped back off and control 422 | is transferred there. In a recursive language such as Pascal, 423 | there can also be local data pushed onto the stack, and it, too, 424 | returns when it's needed. 425 | 426 | For example, function Expression contains a local parameter 427 | called Value, which it fills by a call to Term. Suppose, in its 428 | next call to Term for the second argument, that Term calls 429 | Factor, which recursively calls Expression again. That "in- 430 | stance" of Expression gets another value for its copy of Value. 431 | What happens to the first Value? Answer: it's still on the 432 | stack, and will be there again when we return from our call 433 | sequence. 434 | 435 | In other words, the reason things look so simple is that we've 436 | been making maximum use of the resources of the language. The 437 | hierarchy levels and the parse trees are there, all right, but 438 | they're hidden within the structure of the parser, and they're 439 | taken care of by the order with which the various procedures are 440 | called. Now that you've seen how we do it, it's probably hard to 441 | imagine doing it any other way. But I can tell you that it took 442 | a lot of years for compiler writers to get that smart. The early 443 | compilers were too complex too imagine. Funny how things get 444 | easier with a little practice. 445 | 446 | The reason I've brought all this up is as both a lesson and a 447 | warning. The lesson: things can be easy when you do them right. 448 | The warning: take a look at what you're doing. If, as you branch 449 | out on your own, you begin to find a real need for a separate 450 | stack or tree structure, it may be time to ask yourself if you're 451 | looking at things the right way. Maybe you just aren't using the 452 | facilities of the language as well as you could be. 453 | 454 | 455 | The next step is to add variable names. Now, though, we have a 456 | slight problem. For the compiler, we had no problem in dealing 457 | with variable names ... we just issued the names to the assembler 458 | and let the rest of the program take care of allocating storage 459 | for them. Here, on the other hand, we need to be able to fetch 460 | the values of the variables and return them as the return values 461 | of Factor. We need a storage mechanism for these variables. 462 | 463 | Back in the early days of personal computing, Tiny BASIC lived. 464 | It had a grand total of 26 possible variables: one for each 465 | letter of the alphabet. This fits nicely with our concept of 466 | single-character tokens, so we'll try the same trick. In the 467 | beginning of your interpreter, just after the declaration of 468 | variable Look, insert the line: 469 | 470 | Table: Array['A'..'Z'] of integer; 471 | 472 | We also need to initialize the array, so add this procedure: 473 | 474 | 475 | 476 | 477 | {---------------------------------------------------------------} 478 | { Initialize the Variable Area } 479 | 480 | procedure InitTable; 481 | var i: char; 482 | begin 483 | for i := 'A' to 'Z' do 484 | Table[i] := 0; 485 | end; 486 | {---------------------------------------------------------------} 487 | 488 | 489 | You must also insert a call to InitTable, in procedure Init. 490 | DON'T FORGET to do that, or the results may surprise you! 491 | 492 | Now that we have an array of variables, we can modify Factor to 493 | use it. Since we don't have a way (so far) to set the variables, 494 | Factor will always return zero values for them, but let's go 495 | ahead and extend it anyway. Here's the new version: 496 | 497 | 498 | {---------------------------------------------------------------} 499 | { Parse and Translate a Math Factor } 500 | 501 | function Expression: integer; Forward; 502 | 503 | function Factor: integer; 504 | begin 505 | if Look = '(' then begin 506 | Match('('); 507 | Factor := Expression; 508 | Match(')'); 509 | end 510 | else if IsAlpha(Look) then 511 | Factor := Table[GetName] 512 | else 513 | Factor := GetNum; 514 | end; 515 | {---------------------------------------------------------------} 516 | 517 | 518 | As always, compile and test this version of the program. Even 519 | though all the variables are now zeros, at least we can correctly 520 | parse the complete expressions, as well as catch any badly formed 521 | expressions. 522 | 523 | I suppose you realize the next step: we need to do an assignment 524 | statement so we can put something INTO the variables. For now, 525 | let's stick to one-liners, though we will soon be handling 526 | multiple statements. 527 | 528 | The assignment statement parallels what we did before: 529 | 530 | 531 | {--------------------------------------------------------------} 532 | { Parse and Translate an Assignment Statement } 533 | 534 | 535 | 536 | procedure Assignment; 537 | var Name: char; 538 | begin 539 | Name := GetName; 540 | Match('='); 541 | Table[Name] := Expression; 542 | end; 543 | {--------------------------------------------------------------} 544 | 545 | 546 | To test this, I added a temporary write statement in the main 547 | program, to print out the value of A. Then I tested it with 548 | various assignments to it. 549 | 550 | Of course, an interpretive language that can only accept a single 551 | line of program is not of much value. So we're going to want to 552 | handle multiple statements. This merely means putting a loop 553 | around the call to Assignment. So let's do that now. But what 554 | should be the loop exit criterion? Glad you asked, because it 555 | brings up a point we've been able to ignore up till now. 556 | 557 | One of the most tricky things to handle in any translator is to 558 | determine when to bail out of a given construct and go look for 559 | something else. This hasn't been a problem for us so far because 560 | we've only allowed for a single kind of construct ... either an 561 | expression or an assignment statement. When we start adding 562 | loops and different kinds of statements, you'll find that we have 563 | to be very careful that things terminate properly. If we put our 564 | interpreter in a loop, we need a way to quit. Terminating on a 565 | newline is no good, because that's what sends us back for another 566 | line. We could always let an unrecognized character take us out, 567 | but that would cause every run to end in an error message, which 568 | certainly seems uncool. 569 | 570 | What we need is a termination character. I vote for Pascal's 571 | ending period ('.'). A minor complication is that Turbo ends 572 | every normal line with TWO characters, the carriage return (CR) 573 | and line feed (LF). At the end of each line, we need to eat 574 | these characters before processing the next one. A natural way 575 | to do this would be with procedure Match, except that Match's 576 | error message prints the character, which of course for the CR 577 | and/or LF won't look so great. What we need is a special proce- 578 | dure for this, which we'll no doubt be using over and over. Here 579 | it is: 580 | 581 | 582 | {--------------------------------------------------------------} 583 | { Recognize and Skip Over a Newline } 584 | 585 | procedure NewLine; 586 | begin 587 | if Look = CR then begin 588 | GetChar; 589 | if Look = LF then 590 | GetChar; 591 | end; 592 | end; 593 | {--------------------------------------------------------------} 594 | 595 | 596 | Insert this procedure at any convenient spot ... I put mine just 597 | after Match. Now, rewrite the main program to look like this: 598 | 599 | 600 | {--------------------------------------------------------------} 601 | { Main Program } 602 | 603 | begin 604 | Init; 605 | repeat 606 | Assignment; 607 | NewLine; 608 | until Look = '.'; 609 | end. 610 | {--------------------------------------------------------------} 611 | 612 | 613 | Note that the test for a CR is now gone, and that there are also 614 | no error tests within NewLine itself. That's OK, though ... 615 | whatever is left over in terms of bogus characters will be caught 616 | at the beginning of the next assignment statement. 617 | 618 | Well, we now have a functioning interpreter. It doesn't do us a 619 | lot of good, however, since we have no way to read data in or 620 | write it out. Sure would help to have some I/O! 621 | 622 | Let's wrap this session up, then, by adding the I/O routines. 623 | Since we're sticking to single-character tokens, I'll use '?' to 624 | stand for a read statement, and '!' for a write, with the char- 625 | acter immediately following them to be used as a one-token 626 | "parameter list." Here are the routines: 627 | 628 | {--------------------------------------------------------------} 629 | { Input Routine } 630 | 631 | procedure Input; 632 | begin 633 | Match('?'); 634 | Read(Table[GetName]); 635 | end; 636 | 637 | 638 | {--------------------------------------------------------------} 639 | { Output Routine } 640 | 641 | procedure Output; 642 | begin 643 | Match('!'); 644 | WriteLn(Table[GetName]); 645 | end; 646 | {--------------------------------------------------------------} 647 | 648 | They aren't very fancy, I admit ... no prompt character on input, 649 | for example ... but they get the job done. 650 | 651 | The corresponding changes in the main program are shown below. 652 | Note that we use the usual trick of a case statement based upon 653 | the current lookahead character, to decide what to do. 654 | 655 | 656 | {--------------------------------------------------------------} 657 | { Main Program } 658 | 659 | begin 660 | Init; 661 | repeat 662 | case Look of 663 | '?': Input; 664 | '!': Output; 665 | else Assignment; 666 | end; 667 | NewLine; 668 | until Look = '.'; 669 | end. 670 | {--------------------------------------------------------------} 671 | 672 | 673 | You have now completed a real, working interpreter. It's pretty 674 | sparse, but it works just like the "big boys." It includes three 675 | kinds of program statements (and can tell the difference!), 26 676 | variables, and I/O statements. The only things that it lacks, 677 | really, are control statements, subroutines, and some kind of 678 | program editing function. The program editing part, I'm going to 679 | pass on. After all, we're not here to build a product, but to 680 | learn things. The control statements, we'll cover in the next 681 | installment, and the subroutines soon after. I'm anxious to get 682 | on with that, so we'll leave the interpreter as it stands. 683 | 684 | I hope that by now you're convinced that the limitation of sin- 685 | gle-character names and the processing of white space are easily 686 | taken care of, as we did in the last session. This time, if 687 | you'd like to play around with these extensions, be my guest ... 688 | they're "left as an exercise for the student." See you next 689 | time. 690 | 691 | ***************************************************************** 692 | * * 693 | * COPYRIGHT NOTICE * 694 | * * 695 | * Copyright (C) 1988 Jack W. Crenshaw. All rights reserved. * 696 | * * 697 | ***************************************************************** 698 | 699 | 1 -- 700 | 701 | 702 | -------------------------------------------------------------------------------- /reference/crenshaw-txt/tutor9.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | LET'S BUILD A COMPILER! 30 | 31 | By 32 | 33 | Jack W. Crenshaw, Ph.D. 34 | 35 | 16 April 1989 36 | 37 | 38 | Part IX: A TOP VIEW 39 | 40 | 41 | ***************************************************************** 42 | * * 43 | * COPYRIGHT NOTICE * 44 | * * 45 | * Copyright (C) 1989 Jack W. Crenshaw. All rights reserved. * 46 | * * 47 | ***************************************************************** 48 | 49 | 50 | INTRODUCTION 51 | 52 | In the previous installments, we have learned many of the 53 | techniques required to build a full-blown compiler. We've done 54 | both assignment statements (with Boolean and arithmetic 55 | expressions), relational operators, and control constructs. We 56 | still haven't addressed procedure or function calls, but even so 57 | we could conceivably construct a mini-language without them. 58 | I've always thought it would be fun to see just how small a 59 | language one could build that would still be useful. We're 60 | ALMOST in a position to do that now. The problem is: though we 61 | know how to parse and translate the constructs, we still don't 62 | know quite how to put them all together into a language. 63 | 64 | In those earlier installments, the development of our programs 65 | had a decidedly bottom-up flavor. In the case of expression 66 | parsing, for example, we began with the very lowest level 67 | constructs, the individual constants and variables, and worked 68 | our way up to more complex expressions. 69 | 70 | Most people regard the top-down design approach as being better 71 | than the bottom-up one. I do too, but the way we did it 72 | certainly seemed natural enough for the kinds of things we were 73 | parsing. 74 | 75 | You mustn't get the idea, though, that the incremental approach 76 | that we've been using in all these tutorials is inherently 77 | bottom-up. In this installment I'd like to show you that the 78 | approach can work just as well when applied from the top down ... 79 | maybe better. We'll consider languages such as C and Pascal, and 80 | see how complete compilers can be built starting from the top. 81 | 82 | In the next installment, we'll apply the same technique to build 83 | a complete translator for a subset of the KISS language, which 84 | I'll be calling TINY. But one of my goals for this series is 85 | that you will not only be able to see how a compiler for TINY or 86 | KISS works, but that you will also be able to design and build 87 | compilers for your own languages. The C and Pascal examples will 88 | help. One thing I'd like you to see is that the natural 89 | structure of the compiler depends very much on the language being 90 | translated, so the simplicity and ease of construction of the 91 | compiler depends very much on letting the language set the 92 | program structure. 93 | 94 | It's a bit much to produce a full C or Pascal compiler here, and 95 | we won't try. But we can flesh out the top levels far enough so 96 | that you can see how it goes. 97 | 98 | Let's get started. 99 | 100 | 101 | THE TOP LEVEL 102 | 103 | One of the biggest mistakes people make in a top-down design is 104 | failing to start at the true top. They think they know what the 105 | overall structure of the design should be, so they go ahead and 106 | write it down. 107 | 108 | Whenever I start a new design, I always like to do it at the 109 | absolute beginning. In program design language (PDL), this top 110 | level looks something like: 111 | 112 | 113 | begin 114 | solve the problem 115 | end 116 | 117 | 118 | OK, I grant you that this doesn't give much of a hint as to what 119 | the next level is, but I like to write it down anyway, just to 120 | give me that warm feeling that I am indeed starting at the top. 121 | 122 | For our problem, the overall function of a compiler is to compile 123 | a complete program. Any definition of the language, written in 124 | BNF, begins here. What does the top level BNF look like? Well, 125 | that depends quite a bit on the language to be translated. Let's 126 | take a look at Pascal. 127 | 128 | 129 | THE STRUCTURE OF PASCAL 130 | 131 | Most texts for Pascal include a BNF or "railroad-track" 132 | definition of the language. Here are the first few lines of one: 133 | 134 | 135 | ::= '.' 136 | 137 | ::= PROGRAM 138 | 139 | ::= 140 | 141 | 142 | We can write recognizers to deal with each of these elements, 143 | just as we've done before. For each one, we'll use our familiar 144 | single-character tokens to represent the input, then flesh things 145 | out a little at a time. Let's begin with the first recognizer: 146 | the program itself. 147 | 148 | To translate this, we'll start with a fresh copy of the Cradle. 149 | Since we're back to single-character names, we'll just use a 'p' 150 | to stand for 'PROGRAM.' 151 | 152 | To a fresh copy of the cradle, add the following code, and insert 153 | a call to it from the main program: 154 | 155 | 156 | {--------------------------------------------------------------} 157 | { Parse and Translate A Program } 158 | 159 | procedure Prog; 160 | var Name: char; 161 | begin 162 | Match('p'); { Handles program header part } 163 | Name := GetName; 164 | Prolog(Name); 165 | Match('.'); 166 | Epilog(Name); 167 | end; 168 | {--------------------------------------------------------------} 169 | 170 | 171 | The procedures Prolog and Epilog perform whatever is required to 172 | let the program interface with the operating system, so that it 173 | can execute as a program. Needless to say, this part will be 174 | VERY OS-dependent. Remember, I've been emitting code for a 68000 175 | running under the OS I use, which is SK*DOS. I realize most of 176 | you are using PC's and would rather see something else, but I'm 177 | in this thing too deep to change now! 178 | 179 | Anyhow, SK*DOS is a particularly easy OS to interface to. Here 180 | is the code for Prolog and Epilog: 181 | 182 | 183 | {--------------------------------------------------------------} 184 | { Write the Prolog } 185 | 186 | procedure Prolog; 187 | begin 188 | EmitLn('WARMST EQU $A01E'); 189 | end; 190 | 191 | 192 | {--------------------------------------------------------------} 193 | { Write the Epilog } 194 | 195 | procedure Epilog(Name: char); 196 | begin 197 | EmitLn('DC WARMST'); 198 | EmitLn('END ' + Name); 199 | end; 200 | {--------------------------------------------------------------} 201 | 202 | As usual, add this code and try out the "compiler." At this 203 | point, there is only one legal input: 204 | 205 | 206 | px. (where x is any single letter, the program name) 207 | 208 | 209 | Well, as usual our first effort is rather unimpressive, but by 210 | now I'm sure you know that things will get more interesting. 211 | There is one important thing to note: THE OUTPUT IS A WORKING, 212 | COMPLETE, AND EXECUTABLE PROGRAM (at least after it's assembled). 213 | 214 | This is very important. The nice feature of the top-down 215 | approach is that at any stage you can compile a subset of the 216 | complete language and get a program that will run on the target 217 | machine. From here on, then, we need only add features by 218 | fleshing out the language constructs. It's all very similar to 219 | what we've been doing all along, except that we're approaching it 220 | from the other end. 221 | 222 | 223 | FLESHING IT OUT 224 | 225 | To flesh out the compiler, we only have to deal with language 226 | features one by one. I like to start with a stub procedure that 227 | does nothing, then add detail in incremental fashion. Let's 228 | begin by processing a block, in accordance with its PDL above. 229 | We can do this in two stages. First, add the null procedure: 230 | 231 | 232 | {--------------------------------------------------------------} 233 | { Parse and Translate a Pascal Block } 234 | 235 | procedure DoBlock(Name: char); 236 | begin 237 | end; 238 | {--------------------------------------------------------------} 239 | 240 | 241 | and modify Prog to read: 242 | 243 | 244 | {--------------------------------------------------------------} 245 | { Parse and Translate A Program } 246 | 247 | procedure Prog; 248 | var Name: char; 249 | begin 250 | Match('p'); 251 | Name := GetName; 252 | Prolog; 253 | DoBlock(Name); 254 | Match('.'); 255 | Epilog(Name); 256 | end; 257 | {--------------------------------------------------------------} 258 | 259 | 260 | That certainly shouldn't change the behavior of the program, and 261 | it doesn't. But now the definition of Prog is complete, and we 262 | can proceed to flesh out DoBlock. That's done right from its BNF 263 | definition: 264 | 265 | 266 | {--------------------------------------------------------------} 267 | { Parse and Translate a Pascal Block } 268 | 269 | procedure DoBlock(Name: char); 270 | begin 271 | Declarations; 272 | PostLabel(Name); 273 | Statements; 274 | end; 275 | {--------------------------------------------------------------} 276 | 277 | 278 | The procedure PostLabel was defined in the installment on 279 | branches. Copy it into your cradle. 280 | 281 | I probably need to explain the reason for inserting the label 282 | where I have. It has to do with the operation of SK*DOS. Unlike 283 | some OS's, SK*DOS allows the entry point to the main program to 284 | be anywhere in the program. All you have to do is to give that 285 | point a name. The call to PostLabel puts that name just before 286 | the first executable statement in the main program. How does 287 | SK*DOS know which of the many labels is the entry point, you ask? 288 | It's the one that matches the END statement at the end of the 289 | program. 290 | 291 | OK, now we need stubs for the procedures Declarations and 292 | Statements. Make them null procedures as we did before. 293 | 294 | Does the program still run the same? Then we can move on to the 295 | next stage. 296 | 297 | 298 | DECLARATIONS 299 | 300 | The BNF for Pascal declarations is: 301 | 302 | 303 | ::= (