├── 1 ├── cradle.c ├── cradle.h └── tutor1.txt ├── 2 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c └── tutor2.txt ├── 3 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c └── tutor3.txt ├── 4 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c └── tutor4.txt ├── 5 ├── Makefile ├── README.md ├── cradle.c ├── cradle.h ├── main.c └── tutor5.txt ├── 6 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c └── tutor6.txt ├── 7 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c └── tutor7.txt ├── 8 └── tutor8.txt ├── 9 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c └── tutor9.txt ├── 10 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c ├── prog.txt └── tutor10.txt ├── 11 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c ├── prog.txt └── tutor11.txt ├── 12 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c ├── prog.txt └── tutor12.txt ├── 13 ├── Makefile ├── cradle.c ├── cradle.h ├── main.c ├── prog.txt └── tutor13.txt ├── 14 ├── Makefile ├── README.md ├── cradle.c ├── cradle.h ├── main.c ├── prog.txt └── tutor14.txt ├── 15 └── tutor15.txt ├── 16 └── tutor16.txt ├── .gitignore ├── README.md └── get_chapters.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.s 2 | *.o 3 | *.so 4 | .~ 5 | *.swp 6 | *.out 7 | *.pdf 8 | *~ 9 | -------------------------------------------------------------------------------- /1/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | 5 | 6 | void GetChar() 7 | { 8 | Look = getchar(); 9 | } 10 | 11 | 12 | void Error(char *s) 13 | { 14 | printf("\nError: %s.", s); 15 | } 16 | 17 | void Abort(char *s) 18 | { 19 | Error(s); 20 | exit(1); 21 | } 22 | 23 | 24 | void Expected(char *s) 25 | { 26 | sprintf(tmp, "%s Expected", s); 27 | Abort(tmp); 28 | } 29 | 30 | 31 | void Match(char x) 32 | { 33 | if(Look == x) { 34 | GetChar(); 35 | } else { 36 | sprintf(tmp, "' %c ' ", x); 37 | Expected(tmp); 38 | } 39 | } 40 | 41 | 42 | int IsAlpha(char c) 43 | { 44 | return (UPCASE(c) >= 'A') && (UPCASE(c) <= 'Z'); 45 | } 46 | 47 | int IsDigit(char c) 48 | { 49 | return (c >= '0') && (c <= '9'); 50 | } 51 | 52 | 53 | char GetName() 54 | { 55 | char c = Look; 56 | 57 | if( !IsAlpha(Look)) { 58 | sprintf(tmp, "Name"); 59 | Expected(tmp); 60 | } 61 | 62 | GetChar(); 63 | 64 | return UPCASE(c); 65 | } 66 | 67 | 68 | char GetNum() 69 | { 70 | char c = Look; 71 | 72 | if( !IsDigit(Look)) { 73 | sprintf(tmp, "Integer"); 74 | Expected(tmp); 75 | } 76 | 77 | GetChar(); 78 | 79 | return c; 80 | } 81 | 82 | void Emit(char *s) 83 | { 84 | printf("\t%s", s); 85 | } 86 | 87 | void EmitLn(char *s) 88 | { 89 | Emit(s); 90 | printf("\n"); 91 | } 92 | 93 | void Init() 94 | { 95 | GetChar(); 96 | } 97 | 98 | -------------------------------------------------------------------------------- /1/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define UPCASE(C) (~(1<<5) & (C)) 5 | #define MAX_BUF 100 6 | 7 | static char tmp[MAX_BUF]; 8 | 9 | char Look; 10 | 11 | void GetChar(); 12 | 13 | void Error(char *s); 14 | void Abort(char *s); 15 | void Expected(char *s); 16 | void Match(char x); 17 | 18 | int IsAlpha(char c); 19 | int IsDigit(char c); 20 | 21 | char GetName(); 22 | char GetNum(); 23 | 24 | void Emit(char *s); 25 | void EmitLn(char *s); 26 | 27 | void Init(); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /1/tutor1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | LET'S BUILD A COMPILER! 29 | 30 | By 31 | 32 | Jack W. Crenshaw, Ph.D. 33 | 34 | 24 July 1988 35 | 36 | 37 | Part I: INTRODUCTION 38 | 39 | 40 | ***************************************************************** 41 | * * 42 | * COPYRIGHT NOTICE * 43 | * * 44 | * Copyright (C) 1988 Jack W. Crenshaw. All rights reserved. * 45 | * * 46 | ***************************************************************** 47 | 48 | 49 | INTRODUCTION 50 | 51 | 52 | This series of articles is a tutorial on the theory and practice 53 | of developing language parsers and compilers. Before we are 54 | finished, we will have covered every aspect of compiler 55 | construction, designed a new programming language, and built a 56 | working compiler. 57 | 58 | Though I am not a computer scientist by education (my Ph.D. is in 59 | a different field, Physics), I have been interested in compilers 60 | for many years. I have bought and tried to digest the contents 61 | of virtually every book on the subject ever written. I don't 62 | mind telling you that it was slow going. Compiler texts are 63 | written for Computer Science majors, and are tough sledding for 64 | the rest of us. But over the years a bit of it began to seep in. 65 | What really caused it to jell was when I began to branch off on 66 | my own and begin to try things on my own computer. Now I plan to 67 | share with you what I have learned. At the end of this series 68 | you will by no means be a computer scientist, nor will you know 69 | all the esoterics of compiler theory. I intend to completely 70 | ignore the more theoretical aspects of the subject. What you 71 | _WILL_ know is all the practical aspects that one needs to know 72 | to build a working system. 73 | 74 | This is a "learn-by-doing" series. In the course of the series I 75 | will be performing experiments on a computer. You will be 76 | expected to follow along, repeating the experiments that I do, 77 | and performing some on your own. I will be using Turbo Pascal 78 | 4.0 on a PC clone. I will periodically insert examples written 79 | in TP. These will be executable code, which you will be expected 80 | to copy into your own computer and run. If you don't have a copy 81 | of Turbo, you will be severely limited in how well you will be 82 | able to follow what's going on. If you don't have a copy, I urge 83 | you to get one. After all, it's an excellent product, good for 84 | many other uses! 85 | 86 | Some articles on compilers show you examples, or show you (as in 87 | the case of Small-C) a finished product, which you can then copy 88 | and use without a whole lot of understanding of how it works. I 89 | hope to do much more than that. I hope to teach you HOW the 90 | things get done, so that you can go off on your own and not only 91 | reproduce what I have done, but improve on it. 92 | 93 | This is admittedly an ambitious undertaking, and it won't be done 94 | in one page. I expect to do it in the course of a number of 95 | articles. Each article will cover a single aspect of compiler 96 | theory, and will pretty much stand alone. If all you're 97 | interested in at a given time is one aspect, then you need to 98 | look only at that one article. Each article will be uploaded as 99 | it is complete, so you will have to wait for the last one before 100 | you can consider yourself finished. Please be patient. 101 | 102 | 103 | 104 | The average text on compiler theory covers a lot of ground that 105 | we won't be covering here. The typical sequence is: 106 | 107 | o An introductory chapter describing what a compiler is. 108 | 109 | o A chapter or two on syntax equations, using Backus-Naur Form 110 | (BNF). 111 | 112 | o A chapter or two on lexical scanning, with emphasis on 113 | deterministic and non-deterministic finite automata. 114 | 115 | o Several chapters on parsing theory, beginning with top-down 116 | recursive descent, and ending with LALR parsers. 117 | 118 | o A chapter on intermediate languages, with emphasis on P-code 119 | and similar reverse polish representations. 120 | 121 | o Many chapters on alternative ways to handle subroutines and 122 | parameter passing, type declarations, and such. 123 | 124 | o A chapter toward the end on code generation, usually for some 125 | imaginary CPU with a simple instruction set. Most readers 126 | (and in fact, most college classes) never make it this far. 127 | 128 | o A final chapter or two on optimization. This chapter often 129 | goes unread, too. 130 | 131 | 132 | I'll be taking a much different approach in this series. To 133 | begin with, I won't dwell long on options. I'll be giving you 134 | _A_ way that works. If you want to explore options, well and 135 | good ... I encourage you to do so ... but I'll be sticking to 136 | what I know. I also will skip over most of the theory that puts 137 | people to sleep. Don't get me wrong: I don't belittle the 138 | theory, and it's vitally important when it comes to dealing with 139 | the more tricky parts of a given language. But I believe in 140 | putting first things first. Here we'll be dealing with the 95% 141 | of compiler techniques that don't need a lot of theory to handle. 142 | 143 | I also will discuss only one approach to parsing: top-down, 144 | recursive descent parsing, which is the _ONLY_ technique that's 145 | at all amenable to hand-crafting a compiler. The other 146 | approaches are only useful if you have a tool like YACC, and also 147 | don't care how much memory space the final product uses. 148 | 149 | I also take a page from the work of Ron Cain, the author of the 150 | original Small C. Whereas almost all other compiler authors have 151 | historically used an intermediate language like P-code and 152 | divided the compiler into two parts (a front end that produces 153 | P-code, and a back end that processes P-code to produce 154 | executable object code), Ron showed us that it is a 155 | straightforward matter to make a compiler directly produce 156 | executable object code, in the form of assembler language 157 | statements. The code will _NOT_ be the world's tightest code ... 158 | producing optimized code is a much more difficult job. But it 159 | will work, and work reasonably well. Just so that I don't leave 160 | you with the impression that our end product will be worthless, I 161 | _DO_ intend to show you how to "soup up" the compiler with some 162 | optimization. 163 | 164 | 165 | 166 | Finally, I'll be using some tricks that I've found to be most 167 | helpful in letting me understand what's going on without wading 168 | through a lot of boiler plate. Chief among these is the use of 169 | single-character tokens, with no embedded spaces, for the early 170 | design work. I figure that if I can get a parser to recognize 171 | and deal with I-T-L, I can get it to do the same with IF-THEN- 172 | ELSE. And I can. In the second "lesson," I'll show you just 173 | how easy it is to extend a simple parser to handle tokens of 174 | arbitrary length. As another trick, I completely ignore file 175 | I/O, figuring that if I can read source from the keyboard and 176 | output object to the screen, I can also do it from/to disk files. 177 | Experience has proven that once a translator is working 178 | correctly, it's a straightforward matter to redirect the I/O to 179 | files. The last trick is that I make no attempt to do error 180 | correction/recovery. The programs we'll be building will 181 | RECOGNIZE errors, and will not CRASH, but they will simply stop 182 | on the first error ... just like good ol' Turbo does. There will 183 | be other tricks that you'll see as you go. Most of them can't be 184 | found in any compiler textbook, but they work. 185 | 186 | A word about style and efficiency. As you will see, I tend to 187 | write programs in _VERY_ small, easily understood pieces. None 188 | of the procedures we'll be working with will be more than about 189 | 15-20 lines long. I'm a fervent devotee of the KISS (Keep It 190 | Simple, Sidney) school of software development. I try to never 191 | do something tricky or complex, when something simple will do. 192 | Inefficient? Perhaps, but you'll like the results. As Brian 193 | Kernighan has said, FIRST make it run, THEN make it run fast. 194 | If, later on, you want to go back and tighten up the code in one 195 | of our products, you'll be able to do so, since the code will be 196 | quite understandable. If you do so, however, I urge you to wait 197 | until the program is doing everything you want it to. 198 | 199 | I also have a tendency to delay building a module until I 200 | discover that I need it. Trying to anticipate every possible 201 | future contingency can drive you crazy, and you'll generally 202 | guess wrong anyway. In this modern day of screen editors and 203 | fast compilers, I don't hesitate to change a module when I feel I 204 | need a more powerful one. Until then, I'll write only what I 205 | need. 206 | 207 | One final caveat: One of the principles we'll be sticking to here 208 | is that we don't fool around with P-code or imaginary CPUs, but 209 | that we will start out on day one producing working, executable 210 | object code, at least in the form of assembler language source. 211 | However, you may not like my choice of assembler language ... 212 | it's 68000 code, which is what works on my system (under SK*DOS). 213 | I think you'll find, though, that the translation to any other 214 | CPU such as the 80x86 will be quite obvious, though, so I don't 215 | see a problem here. In fact, I hope someone out there who knows 216 | the '86 language better than I do will offer us the equivalent 217 | object code fragments as we need them. 218 | 219 | 220 | THE CRADLE 221 | 222 | Every program needs some boiler plate ... I/O routines, error 223 | message routines, etc. The programs we develop here will be no 224 | exceptions. I've tried to hold this stuff to an absolute 225 | minimum, however, so that we can concentrate on the important 226 | stuff without losing it among the trees. The code given below 227 | represents about the minimum that we need to get anything done. 228 | It consists of some I/O routines, an error-handling routine and a 229 | skeleton, null main program. I call it our cradle. As we 230 | develop other routines, we'll add them to the cradle, and add the 231 | calls to them as we need to. Make a copy of the cradle and save 232 | it, because we'll be using it more than once. 233 | 234 | There are many different ways to organize the scanning activities 235 | of a parser. In Unix systems, authors tend to use getc and 236 | ungetc. I've had very good luck with the approach shown here, 237 | which is to use a single, global, lookahead character. Part of 238 | the initialization procedure (the only part, so far!) serves to 239 | "prime the pump" by reading the first character from the input 240 | stream. No other special techniques are required with Turbo 4.0 241 | ... each successive call to GetChar will read the next character 242 | in the stream. 243 | 244 | 245 | {--------------------------------------------------------------} 246 | program Cradle; 247 | 248 | {--------------------------------------------------------------} 249 | { Constant Declarations } 250 | 251 | const TAB = ^I; 252 | 253 | {--------------------------------------------------------------} 254 | { Variable Declarations } 255 | 256 | var Look: char; { Lookahead Character } 257 | 258 | {--------------------------------------------------------------} 259 | { Read New Character From Input Stream } 260 | 261 | procedure GetChar; 262 | begin 263 | Read(Look); 264 | end; 265 | 266 | {--------------------------------------------------------------} 267 | { Report an Error } 268 | 269 | procedure Error(s: string); 270 | begin 271 | WriteLn; 272 | WriteLn(^G, 'Error: ', s, '.'); 273 | end; 274 | 275 | 276 | {--------------------------------------------------------------} 277 | { Report Error and Halt } 278 | 279 | procedure Abort(s: string); 280 | begin 281 | Error(s); 282 | Halt; 283 | end; 284 | 285 | 286 | {--------------------------------------------------------------} 287 | { Report What Was Expected } 288 | 289 | procedure Expected(s: string); 290 | begin 291 | Abort(s + ' Expected'); 292 | end; 293 | 294 | {--------------------------------------------------------------} 295 | { Match a Specific Input Character } 296 | 297 | procedure Match(x: char); 298 | begin 299 | if Look = x then GetChar 300 | else Expected('''' + x + ''''); 301 | end; 302 | 303 | 304 | {--------------------------------------------------------------} 305 | { Recognize an Alpha Character } 306 | 307 | function IsAlpha(c: char): boolean; 308 | begin 309 | IsAlpha := upcase(c) in ['A'..'Z']; 310 | end; 311 | 312 | 313 | {--------------------------------------------------------------} 314 | 315 | { Recognize a Decimal Digit } 316 | 317 | function IsDigit(c: char): boolean; 318 | begin 319 | IsDigit := c in ['0'..'9']; 320 | end; 321 | 322 | 323 | {--------------------------------------------------------------} 324 | { Get an Identifier } 325 | 326 | function GetName: char; 327 | begin 328 | if not IsAlpha(Look) then Expected('Name'); 329 | GetName := UpCase(Look); 330 | GetChar; 331 | end; 332 | 333 | 334 | {--------------------------------------------------------------} 335 | { Get a Number } 336 | 337 | function GetNum: char; 338 | begin 339 | if not IsDigit(Look) then Expected('Integer'); 340 | GetNum := Look; 341 | GetChar; 342 | end; 343 | 344 | 345 | {--------------------------------------------------------------} 346 | { Output a String with Tab } 347 | 348 | procedure Emit(s: string); 349 | begin 350 | Write(TAB, s); 351 | end; 352 | 353 | 354 | 355 | 356 | {--------------------------------------------------------------} 357 | { Output a String with Tab and CRLF } 358 | 359 | procedure EmitLn(s: string); 360 | begin 361 | Emit(s); 362 | WriteLn; 363 | end; 364 | 365 | {--------------------------------------------------------------} 366 | { Initialize } 367 | 368 | procedure Init; 369 | begin 370 | GetChar; 371 | end; 372 | 373 | 374 | {--------------------------------------------------------------} 375 | { Main Program } 376 | 377 | begin 378 | Init; 379 | end. 380 | {--------------------------------------------------------------} 381 | 382 | 383 | That's it for this introduction. Copy the code above into TP and 384 | compile it. Make sure that it compiles and runs correctly. Then 385 | proceed to the first lesson, which is on expression parsing. 386 | 387 | 388 | ***************************************************************** 389 | * * 390 | * COPYRIGHT NOTICE * 391 | * * 392 | * Copyright (C) 1988 Jack W. Crenshaw. All rights reserved. * 393 | * * 394 | ***************************************************************** 395 | 396 | 397 | 398 | 399 | -------------------------------------------------------------------------------- /10/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /10/cradle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cradle.h" 7 | #include 8 | 9 | #define MaxEntry 100 10 | #define MAX_SYMBOL_LENGTH 10 11 | static int LCount = 0; 12 | static char labelName[MAX_BUF]; 13 | char tmp[MAX_BUF]; 14 | 15 | /*char ST[TABLE_SIZE];*/ 16 | static int NEntry = 0; 17 | const char *ST[MaxEntry]; 18 | char SType[MaxEntry]; 19 | 20 | 21 | /* Keywords symbol table */ 22 | const char const *KWList[] = { 23 | "IF", 24 | "ELSE", 25 | "ENDIF", 26 | "WHILE", 27 | "ENDWHILE", 28 | "VAR", 29 | "BEGIN", 30 | "END", 31 | "PROGRAM" 32 | }; 33 | const char KWCode[] = "xilewevbep"; 34 | const int KWNum = sizeof(KWList)/sizeof(*KWList); 35 | 36 | char Token; /* current token */ 37 | char Value[MAX_BUF]; /* string token of Look */ 38 | 39 | /* Helper Functions */ 40 | char uppercase(char c) 41 | { 42 | if (IsAlpha(c)) { 43 | return (c & 0xDF); 44 | } else { 45 | return c; 46 | } 47 | } 48 | 49 | /* Table Lookup 50 | * If the input string matches a table entry, return the entry index, else 51 | * return -1. 52 | * *n* is the size of the table */ 53 | int Lookup(const char const *table[], const char *string, int n) 54 | { 55 | int i; 56 | bool found = false; 57 | 58 | for (i = 0; i < n; ++i) { 59 | if (strcmp(table[i], string) == 0) { 60 | found = true; 61 | break; 62 | } 63 | } 64 | return found ? i : -1; 65 | } 66 | 67 | /* Add a new entry to symbol table */ 68 | void AddEntry(char *symbol, char type) 69 | { 70 | if (InTable(symbol)) { 71 | sprintf(tmp, "Duplicate Identifier %s", symbol); 72 | Abort(tmp); 73 | } 74 | if (NEntry == MaxEntry) { 75 | Abort("Symbol Table Full"); 76 | } 77 | 78 | char *new_entry = (char *)malloc((strlen(symbol)+1)*sizeof(*new_entry)); 79 | if (new_entry == NULL) { 80 | Abort("AddEntry: not enough memory allocating new_entry."); 81 | } 82 | strcpy(new_entry, symbol); 83 | ST[NEntry] = new_entry; 84 | SType[NEntry] = type; 85 | 86 | NEntry++; 87 | } 88 | 89 | /* Get an Identifier and Scan it for keywords */ 90 | void Scan() 91 | { 92 | GetName(); 93 | int index = Lookup(KWList, Value, KWNum); 94 | Token = KWCode[index+1]; 95 | } 96 | 97 | void MatchString(char *str) 98 | { 99 | if (strcmp(Value, str) != 0) { 100 | sprintf(tmp, "\"%s\"", Value); 101 | Expected(tmp); 102 | } 103 | } 104 | 105 | void GetChar() 106 | { 107 | Look = getchar(); 108 | /* printf("Getchar: %c\n", Look); */ 109 | } 110 | 111 | 112 | void Error(char *s) 113 | { 114 | printf("\nError: %s.", s); 115 | } 116 | 117 | void Abort(char *s) 118 | { 119 | Error(s); 120 | exit(1); 121 | } 122 | 123 | 124 | void Expected(char *s) 125 | { 126 | sprintf(tmp, "%s Expected", s); 127 | Abort(tmp); 128 | } 129 | 130 | 131 | void Match(char x) 132 | { 133 | NewLine(); 134 | if(Look == x) { 135 | GetChar(); 136 | } else { 137 | sprintf(tmp, "' %c ' ", x); 138 | Expected(tmp); 139 | } 140 | SkipWhite(); 141 | } 142 | 143 | int IsAlpha(char c) 144 | { 145 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 146 | } 147 | 148 | int IsDigit(char c) 149 | { 150 | return (c >= '0') && (c <= '9'); 151 | } 152 | 153 | int IsAddop(char c) 154 | { 155 | return (c == '+') || (c == '-'); 156 | } 157 | 158 | int IsMulop(char c) 159 | { 160 | return (c == '*') || (c == '/'); 161 | } 162 | 163 | int IsOrOp(char c) 164 | { 165 | return strchr("|~", c) != NULL; 166 | } 167 | 168 | int IsRelop(char c) 169 | { 170 | return strchr("=#<>", c) != NULL; 171 | } 172 | 173 | int IsWhite(char c) 174 | { 175 | return strchr(" \t\r\n", c) != NULL; 176 | } 177 | 178 | int IsAlNum(char c) 179 | { 180 | return IsAlpha(c) || IsDigit(c); 181 | } 182 | 183 | void GetName() 184 | { 185 | NewLine(); 186 | if( !IsAlpha(Look)) { 187 | Expected("Name"); 188 | } 189 | 190 | char *p = Value; 191 | while(IsAlNum(Look)) { 192 | *p++ = uppercase(Look); 193 | GetChar(); 194 | } 195 | *p = '\0'; 196 | SkipWhite(); 197 | } 198 | 199 | 200 | int GetNum() 201 | { 202 | NewLine(); 203 | int value = 0; 204 | if( !IsDigit(Look)) { 205 | sprintf(tmp, "Integer"); 206 | Expected(tmp); 207 | } 208 | 209 | while (IsDigit(Look)) { 210 | value = value * 10 + Look - '0'; 211 | GetChar(); 212 | } 213 | 214 | SkipWhite(); 215 | 216 | return value; 217 | } 218 | 219 | void Emit(char *s) 220 | { 221 | printf("\t%s", s); 222 | } 223 | 224 | void EmitLn(char *s) 225 | { 226 | Emit(s); 227 | printf("\n"); 228 | } 229 | 230 | void Init() 231 | { 232 | LCount = 0; 233 | 234 | InitTable(); 235 | GetChar(); 236 | Scan(); 237 | SkipWhite(); 238 | } 239 | 240 | void InitTable() 241 | { 242 | int i; 243 | for (i = 0; i < MaxEntry; i++) { 244 | ST[i] = NULL; 245 | SType[i] = ' '; 246 | } 247 | 248 | } 249 | 250 | bool InTable(char *name) 251 | { 252 | return Lookup(ST, name, NEntry) != -1; 253 | } 254 | 255 | char *NewLabel() 256 | { 257 | sprintf(labelName, "L%02d", LCount); 258 | LCount ++; 259 | return labelName; 260 | } 261 | 262 | void PostLabel(char *label) 263 | { 264 | printf("%s:\n", label); 265 | } 266 | 267 | void SkipWhite() 268 | { 269 | while (IsWhite(Look)) { 270 | GetChar(); 271 | } 272 | } 273 | 274 | /* Skip over an End-of-Line */ 275 | void NewLine() 276 | { 277 | while(Look == '\n') { 278 | GetChar(); 279 | if (Look == '\r') { 280 | GetChar(); 281 | } 282 | SkipWhite(); 283 | } 284 | } 285 | 286 | /* re-targetable routines */ 287 | void Clear() 288 | { 289 | EmitLn("xor %eax, %eax"); 290 | } 291 | 292 | void Negate() 293 | { 294 | EmitLn("neg %eax"); 295 | } 296 | 297 | void LoadConst(int n) 298 | { 299 | sprintf(tmp, "movl $%d, %%eax", n); 300 | EmitLn(tmp); 301 | } 302 | 303 | /* Load a variable to primary register */ 304 | void LoadVar(char *name) 305 | { 306 | if (!InTable(name)) { 307 | char name_string[MAX_BUF]; 308 | Undefined(name_string); 309 | } 310 | sprintf(tmp, "movl %s, %%eax", name); 311 | EmitLn(tmp); 312 | } 313 | 314 | 315 | /* Push Primary onto stack */ 316 | void Push() 317 | { 318 | EmitLn("pushl %eax"); 319 | } 320 | 321 | /* Add Top of Stack to primary */ 322 | void PopAdd() 323 | { 324 | EmitLn("addl (%esp), %eax"); 325 | EmitLn("addl $4, %esp"); 326 | } 327 | 328 | /* Subtract Primary from Top of Stack */ 329 | void PopSub() 330 | { 331 | EmitLn("subl (%esp), %eax"); 332 | EmitLn("neg %eax"); 333 | EmitLn("addl $4, %esp"); 334 | } 335 | 336 | /* multiply top of stack by primary */ 337 | void PopMul() 338 | { 339 | EmitLn("imull (%esp), %eax"); 340 | EmitLn("addl $4, %esp"); 341 | } 342 | 343 | /* divide top of stack by primary */ 344 | void PopDiv() 345 | { 346 | /* for a expersion like a/b we have eax=b and %(esp)=a 347 | * but we need eax=a, and b on the stack 348 | */ 349 | EmitLn("movl (%esp), %edx"); 350 | EmitLn("addl $4, %esp"); 351 | EmitLn("pushl %eax"); 352 | EmitLn("movl %edx, %eax"); 353 | 354 | /* sign extesnion */ 355 | EmitLn("sarl $31, %edx"); 356 | EmitLn("idivl (%esp)"); 357 | EmitLn("addl $4, %esp"); 358 | } 359 | 360 | /* store primary to variable */ 361 | void Store(char *name) 362 | { 363 | if (!InTable(name)) { 364 | char name_string[MAX_BUF]; 365 | Undefined(name_string); 366 | } 367 | sprintf(tmp, "movl %%eax, %s", name); 368 | EmitLn(tmp); 369 | } 370 | 371 | void Undefined(char *name) 372 | { 373 | sprintf(tmp, "Undefined Identifier: %s", name); 374 | Abort(tmp); 375 | } 376 | 377 | /* Complement the primary register */ 378 | void NotIt() 379 | { 380 | EmitLn("not %eax"); 381 | } 382 | 383 | /* AND top of Stack with primary */ 384 | void PopAnd() 385 | { 386 | EmitLn("and (%esp), %eax"); 387 | EmitLn("addl $4, %esp"); 388 | } 389 | 390 | /* OR top of Stack with primary */ 391 | void PopOr() 392 | { 393 | EmitLn("or (%esp), %eax"); 394 | EmitLn("addl $4, %esp"); 395 | } 396 | 397 | /* XOR top of Stack with primary */ 398 | void PopXor() 399 | { 400 | EmitLn("xor (%esp), %eax"); 401 | EmitLn("addl $4, %esp"); 402 | } 403 | 404 | /* Compare top of Stack with primary */ 405 | void PopCompare() 406 | { 407 | EmitLn("addl $4, %esp"); 408 | EmitLn("cmp -4(%esp), %eax"); 409 | } 410 | 411 | /* set %eax if Compare was = */ 412 | void SetEqual() 413 | { 414 | EmitLn("sete %al"); 415 | EmitLn("movsx %al, %eax"); 416 | } 417 | 418 | /* set %eax if Compare was != */ 419 | void SetNEqual() 420 | { 421 | EmitLn("setne %al"); 422 | EmitLn("movsx %al, %eax"); 423 | } 424 | 425 | /* set %eax if Compare was > */ 426 | void SetGreater() 427 | { 428 | EmitLn("setl %al"); 429 | EmitLn("movsx %al, %eax"); 430 | } 431 | 432 | /* set %eax if Compare was >= */ 433 | void SetGreaterOrEqual() 434 | { 435 | EmitLn("setle %al"); 436 | EmitLn("movsx %al, %eax"); 437 | } 438 | 439 | /* set %eax if Compare was < */ 440 | void SetLess() 441 | { 442 | EmitLn("setg %al"); 443 | EmitLn("movsx %al, %eax"); 444 | } 445 | 446 | /* set %eax if Compare was <= */ 447 | void SetLessOrEqual() 448 | { 449 | EmitLn("setge %al"); 450 | EmitLn("movsx %al, %eax"); 451 | } 452 | 453 | /* Branch unconditional */ 454 | void Branch(char *label) 455 | { 456 | sprintf(tmp, "jmp %s", label); 457 | EmitLn(tmp); 458 | } 459 | 460 | /* Branch False */ 461 | void BranchFalse(char *label) 462 | { 463 | EmitLn("test $1, %eax"); 464 | sprintf(tmp, "jz %s", label); 465 | EmitLn(tmp); 466 | } 467 | -------------------------------------------------------------------------------- /10/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | #include 4 | 5 | #define MAX_BUF 100 6 | #define MaxEntry 100 7 | extern char tmp[MAX_BUF]; 8 | extern const char *ST[]; 9 | extern char SType[]; 10 | extern char Token; 11 | extern char Value[MAX_BUF]; 12 | char Look; 13 | 14 | void GetChar(); 15 | 16 | void Error(char *s); 17 | void Abort(char *s); 18 | void Expected(char *s); 19 | void Match(char x); 20 | void MatchString(char *str); 21 | 22 | int IsAlpha(char c); 23 | int IsDigit(char c); 24 | int IsAddop(char c); 25 | int IsMulop(char c); 26 | int IsOrOp(char c); 27 | int IsRelop(char c); 28 | int IsWhite(char c); 29 | int IsAlNum(char c); 30 | 31 | void GetName(); 32 | int GetNum(); 33 | 34 | void Emit(char *s); 35 | void EmitLn(char *s); 36 | 37 | void Init(); 38 | void InitTable(); 39 | bool InTable(char *name); 40 | void AddEntry(char *symbol, char type); 41 | 42 | char *NewLabel(); 43 | void PostLabel(char *label); 44 | void SkipWhite(); 45 | void NewLine(); 46 | void Scan(); 47 | 48 | /* re-targetable routines */ 49 | void Clear(); 50 | void Negate(); 51 | void LoadConst(int n); 52 | void LoadVar(char *name); 53 | void Push(); 54 | void PopAdd(); 55 | void PopSub(); 56 | void PopMul(); 57 | void PopDiv(); 58 | void Store(char *name); 59 | void Undefined(char *name); 60 | void NotIt(); 61 | void PopAnd(); 62 | void PopOr(); 63 | void PopXor(); 64 | void PopCompare(); 65 | void SetEqual(); 66 | void SetNEqual(); 67 | void SetGreater(); 68 | void SetGreaterOrEqual(); 69 | void SetLess(); 70 | void SetLessOrEqual(); 71 | void Branch(char *label); 72 | void BranchFalse(char *label); 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /10/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cradle.h" 7 | 8 | #ifdef DEBUG 9 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 10 | #else 11 | #define dprint(fmt, ...) 12 | #endif 13 | 14 | 15 | void Prog(); 16 | void Prolog(); 17 | void Epilog(); 18 | void Header(); 19 | void Main(); 20 | void Decl(); 21 | void TopDecls(); 22 | void Alloc(char *); 23 | void Block(); 24 | void Assignment(); 25 | 26 | void Factor(); 27 | void NegFactor(); 28 | void Expression(); 29 | void Subtract(); 30 | void FirstTerm(); 31 | void Term(); 32 | void Term1(); 33 | void Divide(); 34 | void Multiply(); 35 | void FirstFactor(); 36 | void Add(); 37 | void Equals(); 38 | void NotEquals(); 39 | void Less(); 40 | void LessOrEqual(); 41 | void Greater(); 42 | void Relation(); 43 | void NotFactor(); 44 | void BoolTerm(); 45 | void BoolOr(); 46 | void BoolXor(); 47 | void BoolExpression(); 48 | void DoIf(); 49 | void DoWhile(); 50 | 51 | void Prog() 52 | { 53 | MatchString("PROGRAM"); 54 | Header(); 55 | TopDecls(); 56 | Main(); 57 | Match('.'); 58 | } 59 | 60 | void Header() 61 | { 62 | EmitLn(".global _start"); 63 | } 64 | 65 | void Prolog() 66 | { 67 | EmitLn(".section .text"); 68 | EmitLn("_start:"); 69 | } 70 | 71 | void Epilog() 72 | { 73 | EmitLn("movl %eax, %ebx"); 74 | EmitLn("movl $1, %eax"); 75 | EmitLn("int $0x80"); 76 | } 77 | 78 | void Main() 79 | { 80 | MatchString("BEGIN"); 81 | Prolog(); 82 | Block(); 83 | MatchString("END"); 84 | Epilog(); 85 | } 86 | 87 | void TopDecls() 88 | { 89 | NewLine(); 90 | Scan(); 91 | while(Token != 'b') { 92 | switch(Token) { 93 | case 'v': 94 | Decl(); 95 | break; 96 | default: 97 | sprintf(tmp, "Unrecognized Keyword '%c'", Look); 98 | Abort(tmp); 99 | break; 100 | } 101 | Scan(); 102 | NewLine(); 103 | } 104 | } 105 | 106 | void Decl() 107 | { 108 | NewLine(); 109 | EmitLn(".section .data"); /* in case that the variable and function 110 | declarations are mixed */ 111 | GetName(); 112 | Alloc(Value); 113 | while(Look == ',') { 114 | Match(','); 115 | GetName(); 116 | Alloc(Value); 117 | NewLine(); 118 | } 119 | } 120 | 121 | void Alloc(char *name) 122 | { 123 | if (InTable(name)) { 124 | sprintf(tmp, "Duplicate Variable Name: %s", name); 125 | Abort(tmp); 126 | } 127 | AddEntry(name, 'v'); 128 | sprintf(tmp, "%s: .int ", name); 129 | Emit(tmp); 130 | if (Look == '=') { 131 | Match('='); 132 | if (Look == '-') { 133 | Emit("-"); 134 | Match('-'); 135 | } else { 136 | Emit(""); 137 | } 138 | printf("%d\n", GetNum()); 139 | } else { 140 | EmitLn("0"); 141 | } 142 | } 143 | 144 | /* Parse and Translate a Block of Statements 145 | * ::= ( )* 146 | * ::= | | 147 | * */ 148 | void Block() 149 | { 150 | Scan(); 151 | NewLine(); 152 | while(strchr("el", Token) == NULL) { 153 | switch (Token) { 154 | case 'i': 155 | DoIf(); 156 | break; 157 | case 'w': 158 | DoWhile(); 159 | break; 160 | default: 161 | Assignment(); 162 | break; 163 | } 164 | Scan(); 165 | NewLine(); 166 | } 167 | } 168 | 169 | void Assignment() 170 | { 171 | char name[MAX_BUF]; 172 | sprintf(name, Value); 173 | Match('='); 174 | BoolExpression(); 175 | Store(name); 176 | } 177 | 178 | void Factor() 179 | { 180 | if (Look == '(') { 181 | Match('('); 182 | BoolExpression(); 183 | Match(')'); 184 | } else if (IsAlpha(Look)) { 185 | GetName(); 186 | LoadVar(Value); 187 | } else { 188 | LoadConst(GetNum()); 189 | } 190 | } 191 | 192 | void NegFactor() 193 | { 194 | Match('-'); 195 | if (IsDigit(Look)) { 196 | LoadConst(-GetNum()); 197 | } else { 198 | Factor(); 199 | Negate(); 200 | } 201 | } 202 | 203 | /* Parse and Translate a Leading Factor */ 204 | void FirstFactor() 205 | { 206 | switch (Look) { 207 | case '+': 208 | Match('+'); 209 | Factor(); 210 | break; 211 | case '-': 212 | NegFactor(); 213 | break; 214 | default: 215 | Factor(); 216 | } 217 | } 218 | 219 | void Multiply() 220 | { 221 | Match('*'); 222 | Factor(); 223 | PopMul(); 224 | } 225 | 226 | void Divide() 227 | { 228 | Match('/'); 229 | Factor(); 230 | PopDiv(); 231 | } 232 | 233 | void Term1() 234 | { 235 | NewLine(); 236 | while(IsMulop(Look)) { 237 | Push(); 238 | switch(Look) { 239 | case '*': 240 | Multiply(); 241 | break; 242 | case '/': 243 | Divide(); 244 | break; 245 | default: 246 | break; 247 | } 248 | NewLine(); 249 | } 250 | } 251 | 252 | void Term() 253 | { 254 | Factor(); 255 | Term1(); 256 | } 257 | 258 | void FirstTerm() 259 | { 260 | FirstFactor(); 261 | Term1(); 262 | } 263 | 264 | void Add() 265 | { 266 | Match('+'); 267 | Term(); 268 | PopAdd(); 269 | } 270 | 271 | void Subtract() 272 | { 273 | Match('-'); 274 | Term(); 275 | PopSub(); 276 | } 277 | 278 | void Expression() 279 | { 280 | NewLine(); 281 | FirstTerm(); 282 | while(IsAddop(Look)) { 283 | Push(); 284 | switch(Look) { 285 | case '+': 286 | Add(); 287 | break; 288 | case '-': 289 | Subtract(); 290 | break; 291 | default: 292 | break; 293 | } 294 | NewLine(); 295 | } 296 | } 297 | 298 | /* Recognize and Translate a Relational "Equals" */ 299 | void Equals() 300 | { 301 | Match('='); 302 | Expression(); 303 | PopCompare(); 304 | SetEqual(); 305 | } 306 | 307 | /* Recognize and Translate a Relational "Not Equals" */ 308 | void NotEquals() 309 | { 310 | Match('>'); 311 | Expression(); 312 | PopCompare(); 313 | SetNEqual(); 314 | } 315 | 316 | /* Recognize and Translate a Relational "Less Than" */ 317 | void Less() 318 | { 319 | Match('<'); 320 | switch(Look) { 321 | case '=': 322 | LessOrEqual(); 323 | break; 324 | case '>': 325 | NotEquals(); 326 | break; 327 | default: 328 | Expression(); 329 | PopCompare(); 330 | SetLess(); 331 | break; 332 | } 333 | } 334 | 335 | /* Recognize and Translate a Relational "Less or Equal" */ 336 | void LessOrEqual() 337 | { 338 | Match('='); 339 | Expression(); 340 | PopCompare(); 341 | SetLessOrEqual(); 342 | } 343 | 344 | /* Recognize and Translate a Relational "Greater Than" */ 345 | void Greater() 346 | { 347 | Match('>'); 348 | if (Look == '=') { 349 | Match('='); 350 | Expression(); 351 | PopCompare(); 352 | SetGreaterOrEqual(); 353 | } else { 354 | Expression(); 355 | PopCompare(); 356 | SetGreater(); 357 | } 358 | } 359 | 360 | /* Parse and Translate a Relation */ 361 | void Relation() 362 | { 363 | Expression(); 364 | if (IsRelop(Look)) { 365 | Push(); 366 | switch (Look) { 367 | case '=': 368 | Equals(); 369 | break; 370 | case '#': 371 | NotEquals(); 372 | break; 373 | case '<': 374 | Less(); 375 | break; 376 | case '>': 377 | Greater(); 378 | break; 379 | default: 380 | break; 381 | } 382 | } 383 | } 384 | 385 | /* Parse and Translate a Boolean Factor with Leading NOT */ 386 | void NotFactor() 387 | { 388 | if (Look == '!') { 389 | Match('!'); 390 | Relation(); 391 | NotIt(); 392 | } else { 393 | Relation(); 394 | } 395 | } 396 | 397 | /* Parse and Translate a Boolean Term 398 | * ::= ( and_op ::= ( or_op )* */ 431 | void BoolExpression() 432 | { 433 | NewLine(); 434 | BoolTerm(); 435 | while(IsOrOp(Look)) { 436 | Push(); 437 | switch(Look) { 438 | case '|': 439 | BoolOr(); 440 | break; 441 | case '~': 442 | BoolXor(); 443 | break; 444 | default: 445 | break; 446 | } 447 | NewLine(); 448 | } 449 | } 450 | 451 | /* Recognize and Translate an IF construct */ 452 | void DoIf() 453 | { 454 | char L1[MAX_BUF]; 455 | char L2[MAX_BUF]; 456 | sprintf(L1, NewLabel()); 457 | sprintf(L2, L1); 458 | BoolExpression(); 459 | BranchFalse(L1); 460 | Block(); 461 | if (Token == 'l') { 462 | sprintf(L2, NewLabel()); 463 | Branch(L2); 464 | PostLabel(L1); 465 | Block(); 466 | } 467 | PostLabel(L2); 468 | MatchString("ENDIF"); 469 | } 470 | 471 | void DoWhile() 472 | { 473 | char L1[MAX_BUF]; 474 | char L2[MAX_BUF]; 475 | sprintf(L1, NewLabel()); 476 | sprintf(L2, NewLabel()); 477 | PostLabel(L1); 478 | BoolExpression(); 479 | BranchFalse(L2); 480 | Block(); 481 | MatchString("ENDWHILE"); 482 | Branch(L1); 483 | PostLabel(L2); 484 | } 485 | 486 | 487 | int main() 488 | { 489 | Init(); 490 | Prog(); 491 | if (Look != '\n') { 492 | Abort("Unexpected data after '.'"); 493 | } 494 | return 0; 495 | } 496 | -------------------------------------------------------------------------------- /10/prog.txt: -------------------------------------------------------------------------------- 1 | PROGRAM 2 | VAR xx, 3 | yy=1, 4 | zz=10 5 | BEGIN 6 | WHILE yy <= zz 7 | IF yy <> 5 8 | xx=xx+yy 9 | ELSE 10 | xx=xx+5 11 | ENDIF 12 | yy=yy+1 13 | ENDWHILE 14 | END. 15 | 16 | -------------------------------------------------------------------------------- /11/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /11/cradle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cradle.h" 7 | #include 8 | 9 | #define MaxEntry 100 10 | #define MAX_SYMBOL_LENGTH 10 11 | static int LCount = 0; 12 | static char labelName[MAX_BUF]; 13 | char tmp[MAX_BUF]; 14 | 15 | /*char ST[TABLE_SIZE];*/ 16 | static int NEntry = 0; 17 | const char *ST[MaxEntry]; 18 | char SType[MaxEntry]; 19 | 20 | 21 | /* Keywords symbol table */ 22 | const char const *KWList[] = { 23 | "IF", 24 | "ELSE", 25 | "ENDIF", 26 | "WHILE", 27 | "ENDWHILE", 28 | "VAR", 29 | "END", 30 | }; 31 | const char KWCode[] = "xileweve"; 32 | const int KWNum = sizeof(KWList)/sizeof(*KWList); 33 | 34 | char Token; /* current token */ 35 | char Value[MAX_BUF]; /* string token of Look */ 36 | 37 | /* Helper Functions */ 38 | char uppercase(char c) 39 | { 40 | if (IsAlpha(c)) { 41 | return (c & 0xDF); 42 | } else { 43 | return c; 44 | } 45 | } 46 | 47 | /* Table Lookup 48 | * If the input string matches a table entry, return the entry index, else 49 | * return -1. 50 | * *n* is the size of the table */ 51 | int Lookup(const char const *table[], const char *string, int n) 52 | { 53 | int i; 54 | bool found = false; 55 | 56 | for (i = 0; i < n; ++i) { 57 | if (strcmp(table[i], string) == 0) { 58 | found = true; 59 | break; 60 | } 61 | } 62 | return found ? i : -1; 63 | } 64 | 65 | int Locate(char *symbol) 66 | { 67 | return Lookup(ST, symbol, NEntry); 68 | } 69 | 70 | /* Add a new entry to symbol table */ 71 | void AddEntry(char *symbol, char type) 72 | { 73 | CheckDup(symbol); 74 | if (NEntry == MaxEntry) { 75 | Abort("Symbol Table Full"); 76 | } 77 | 78 | char *new_entry = (char *)malloc((strlen(symbol)+1)*sizeof(*new_entry)); 79 | if (new_entry == NULL) { 80 | Abort("AddEntry: not enough memory allocating new_entry."); 81 | } 82 | strcpy(new_entry, symbol); 83 | ST[NEntry] = new_entry; 84 | SType[NEntry] = type; 85 | 86 | NEntry++; 87 | } 88 | 89 | /* Get an Identifier and Scan it for keywords */ 90 | void Scan() 91 | { 92 | if (Token == 'x') { 93 | int index = Lookup(KWList, Value, KWNum); 94 | Token = KWCode[index+1]; 95 | } 96 | } 97 | 98 | void MatchString(char *str) 99 | { 100 | if (strcmp(Value, str) != 0) { 101 | sprintf(tmp, "\"%s\"", str); 102 | Expected(tmp); 103 | } 104 | Next(); 105 | } 106 | 107 | void GetChar() 108 | { 109 | Look = getchar(); 110 | /* printf("Getchar: %c\n", Look); */ 111 | } 112 | 113 | 114 | void Error(char *s) 115 | { 116 | printf("\nError: %s.", s); 117 | } 118 | 119 | void Abort(char *s) 120 | { 121 | Error(s); 122 | exit(1); 123 | } 124 | 125 | 126 | void Expected(char *s) 127 | { 128 | sprintf(tmp, "%s Expected", s); 129 | Abort(tmp); 130 | } 131 | 132 | 133 | void Match(char x) 134 | { 135 | NewLine(); 136 | if(Look == x) { 137 | GetChar(); 138 | } else { 139 | sprintf(tmp, "' %c ' ", x); 140 | Expected(tmp); 141 | } 142 | SkipWhite(); 143 | } 144 | 145 | int IsAlpha(char c) 146 | { 147 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 148 | } 149 | 150 | int IsDigit(char c) 151 | { 152 | return (c >= '0') && (c <= '9'); 153 | } 154 | 155 | int IsAddop(char c) 156 | { 157 | return (c == '+') || (c == '-'); 158 | } 159 | 160 | int IsMulop(char c) 161 | { 162 | return (c == '*') || (c == '/'); 163 | } 164 | 165 | int IsOrOp(char c) 166 | { 167 | return strchr("|~", c) != NULL; 168 | } 169 | 170 | int IsRelop(char c) 171 | { 172 | return strchr("=#<>", c) != NULL; 173 | } 174 | 175 | int IsWhite(char c) 176 | { 177 | return strchr(" \t\r\n", c) != NULL; 178 | } 179 | 180 | int IsAlNum(char c) 181 | { 182 | return IsAlpha(c) || IsDigit(c); 183 | } 184 | 185 | void GetName() 186 | { 187 | SkipWhite(); 188 | if( !IsAlpha(Look)) { 189 | Expected("Name"); 190 | } 191 | 192 | Token = 'x'; 193 | char *p = Value; 194 | do { 195 | *p++ = uppercase(Look); 196 | GetChar(); 197 | } while(IsAlNum(Look)) ; 198 | *p = '\0'; 199 | } 200 | 201 | void GetNum() 202 | { 203 | SkipWhite(); 204 | if( !IsDigit(Look)) { 205 | Expected("Integer"); 206 | } 207 | 208 | Token = '#'; 209 | char *p = Value; 210 | do { 211 | *p++ = Look; 212 | GetChar(); 213 | } while (IsDigit(Look)); 214 | *p = '\0'; 215 | } 216 | 217 | /* Get an operator */ 218 | void GetOp() 219 | { 220 | SkipWhite(); 221 | Token = Look; 222 | Value[0] = Look; 223 | Value[1] = '\0'; 224 | GetChar(); 225 | } 226 | 227 | /* Get the next input token */ 228 | void Next() 229 | { 230 | SkipWhite(); 231 | if (IsAlpha(Look)) { 232 | GetName(); 233 | } else if (IsDigit(Look)) { 234 | GetNum(); 235 | } else { 236 | GetOp(); 237 | } 238 | } 239 | 240 | void Emit(char *s) 241 | { 242 | printf("\t%s", s); 243 | } 244 | 245 | void EmitLn(char *s) 246 | { 247 | Emit(s); 248 | printf("\n"); 249 | } 250 | 251 | void Init() 252 | { 253 | LCount = 0; 254 | 255 | InitTable(); 256 | GetChar(); 257 | Next(); 258 | } 259 | 260 | void InitTable() 261 | { 262 | int i; 263 | for (i = 0; i < MaxEntry; i++) { 264 | ST[i] = NULL; 265 | SType[i] = ' '; 266 | } 267 | 268 | } 269 | 270 | /* look for symbol in table */ 271 | bool InTable(char *symbol) 272 | { 273 | return Locate(symbol) != -1; 274 | } 275 | 276 | /* Check to see if an identifier is in the symbol table, 277 | * report an error if it's not */ 278 | void CheckTable(char *symbol) 279 | { 280 | if (! InTable(symbol)) { 281 | Undefined(symbol); 282 | } 283 | } 284 | 285 | void CheckDup(char *symbol) 286 | { 287 | if (InTable(symbol)) { 288 | Duplicate(symbol); 289 | } 290 | } 291 | 292 | char *NewLabel() 293 | { 294 | sprintf(labelName, "L%02d", LCount); 295 | LCount ++; 296 | return labelName; 297 | } 298 | 299 | void PostLabel(char *label) 300 | { 301 | printf("%s:\n", label); 302 | } 303 | 304 | void SkipWhite() 305 | { 306 | while (IsWhite(Look)) { 307 | GetChar(); 308 | } 309 | } 310 | 311 | /* Skip over an End-of-Line */ 312 | void NewLine() 313 | { 314 | while(Look == '\n') { 315 | GetChar(); 316 | if (Look == '\r') { 317 | GetChar(); 318 | } 319 | SkipWhite(); 320 | } 321 | } 322 | 323 | /* re-targetable routines */ 324 | void Clear() 325 | { 326 | EmitLn("xor %eax, %eax"); 327 | } 328 | 329 | void Negate() 330 | { 331 | EmitLn("neg %eax"); 332 | } 333 | 334 | void LoadConst(char *value) 335 | { 336 | sprintf(tmp, "movl $%s, %%eax", value); 337 | EmitLn(tmp); 338 | } 339 | 340 | /* Load a variable to primary register */ 341 | void LoadVar(char *name) 342 | { 343 | if (!InTable(name)) { 344 | char name_string[MAX_BUF]; 345 | Undefined(name_string); 346 | } 347 | sprintf(tmp, "movl %s, %%eax", name); 348 | EmitLn(tmp); 349 | } 350 | 351 | 352 | /* Push Primary onto stack */ 353 | void Push() 354 | { 355 | EmitLn("pushl %eax"); 356 | } 357 | 358 | /* Add Top of Stack to primary */ 359 | void PopAdd() 360 | { 361 | EmitLn("addl (%esp), %eax"); 362 | EmitLn("addl $4, %esp"); 363 | } 364 | 365 | /* Subtract Primary from Top of Stack */ 366 | void PopSub() 367 | { 368 | EmitLn("subl (%esp), %eax"); 369 | EmitLn("neg %eax"); 370 | EmitLn("addl $4, %esp"); 371 | } 372 | 373 | /* multiply top of stack by primary */ 374 | void PopMul() 375 | { 376 | EmitLn("imull (%esp), %eax"); 377 | EmitLn("addl $4, %esp"); 378 | } 379 | 380 | /* divide top of stack by primary */ 381 | void PopDiv() 382 | { 383 | /* for a expersion like a/b we have eax=b and %(esp)=a 384 | * but we need eax=a, and b on the stack 385 | */ 386 | EmitLn("movl (%esp), %edx"); 387 | EmitLn("addl $4, %esp"); 388 | EmitLn("pushl %eax"); 389 | EmitLn("movl %edx, %eax"); 390 | 391 | /* sign extesnion */ 392 | EmitLn("sarl $31, %edx"); 393 | EmitLn("idivl (%esp)"); 394 | EmitLn("addl $4, %esp"); 395 | } 396 | 397 | /* store primary to variable */ 398 | void Store(char *name) 399 | { 400 | if (!InTable(name)) { 401 | char name_string[MAX_BUF]; 402 | Undefined(name_string); 403 | } 404 | sprintf(tmp, "movl %%eax, %s", name); 405 | EmitLn(tmp); 406 | } 407 | 408 | void Undefined(char *name) 409 | { 410 | sprintf(tmp, "Undefined Identifier: %s", name); 411 | Abort(tmp); 412 | } 413 | 414 | void Duplicate(char *name) 415 | { 416 | sprintf(tmp, "Duplicate Identifier: %s", name); 417 | Abort(tmp); 418 | } 419 | 420 | /* Complement the primary register */ 421 | void NotIt() 422 | { 423 | EmitLn("not %eax"); 424 | } 425 | 426 | /* AND top of Stack with primary */ 427 | void PopAnd() 428 | { 429 | EmitLn("and (%esp), %eax"); 430 | EmitLn("addl $4, %esp"); 431 | } 432 | 433 | /* OR top of Stack with primary */ 434 | void PopOr() 435 | { 436 | EmitLn("or (%esp), %eax"); 437 | EmitLn("addl $4, %esp"); 438 | } 439 | 440 | /* XOR top of Stack with primary */ 441 | void PopXor() 442 | { 443 | EmitLn("xor (%esp), %eax"); 444 | EmitLn("addl $4, %esp"); 445 | } 446 | 447 | /* Compare top of Stack with primary */ 448 | void PopCompare() 449 | { 450 | EmitLn("addl $4, %esp"); 451 | EmitLn("cmp -4(%esp), %eax"); 452 | } 453 | 454 | /* set %eax if Compare was = */ 455 | void SetEqual() 456 | { 457 | EmitLn("sete %al"); 458 | EmitLn("movsx %al, %eax"); 459 | } 460 | 461 | /* set %eax if Compare was != */ 462 | void SetNEqual() 463 | { 464 | EmitLn("setne %al"); 465 | EmitLn("movsx %al, %eax"); 466 | } 467 | 468 | /* set %eax if Compare was > */ 469 | void SetGreater() 470 | { 471 | EmitLn("setl %al"); 472 | EmitLn("movsx %al, %eax"); 473 | } 474 | 475 | /* set %eax if Compare was >= */ 476 | void SetGreaterOrEqual() 477 | { 478 | EmitLn("setle %al"); 479 | EmitLn("movsx %al, %eax"); 480 | } 481 | 482 | /* set %eax if Compare was < */ 483 | void SetLess() 484 | { 485 | EmitLn("setg %al"); 486 | EmitLn("movsx %al, %eax"); 487 | } 488 | 489 | /* set %eax if Compare was <= */ 490 | void SetLessOrEqual() 491 | { 492 | EmitLn("setge %al"); 493 | EmitLn("movsx %al, %eax"); 494 | } 495 | 496 | /* Branch unconditional */ 497 | void Branch(char *label) 498 | { 499 | sprintf(tmp, "jmp %s", label); 500 | EmitLn(tmp); 501 | } 502 | 503 | /* Branch False */ 504 | void BranchFalse(char *label) 505 | { 506 | EmitLn("test $1, %eax"); 507 | sprintf(tmp, "jz %s", label); 508 | EmitLn(tmp); 509 | } 510 | -------------------------------------------------------------------------------- /11/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | #include 4 | 5 | #define MAX_BUF 100 6 | #define MaxEntry 100 7 | extern char tmp[MAX_BUF]; 8 | extern const char *ST[]; 9 | extern char SType[]; 10 | extern char Token; 11 | extern char Value[MAX_BUF]; 12 | char Look; 13 | 14 | void GetChar(); 15 | 16 | void Error(char *s); 17 | void Abort(char *s); 18 | void Expected(char *s); 19 | void Match(char x); 20 | void MatchString(char *str); 21 | 22 | int IsAlpha(char c); 23 | int IsDigit(char c); 24 | int IsAddop(char c); 25 | int IsMulop(char c); 26 | int IsOrOp(char c); 27 | int IsRelop(char c); 28 | int IsWhite(char c); 29 | int IsAlNum(char c); 30 | 31 | void GetName(); 32 | void GetNum(); 33 | void GetOp(); 34 | void Next(); 35 | 36 | void Emit(char *s); 37 | void EmitLn(char *s); 38 | 39 | void Init(); 40 | void InitTable(); 41 | int Locate(char *symbol); 42 | bool InTable(char *symbol); 43 | void CheckTable(char *symbol); 44 | void CheckDup(char *symbol); 45 | void AddEntry(char *symbol, char type); 46 | 47 | char *NewLabel(); 48 | void PostLabel(char *label); 49 | void SkipWhite(); 50 | void NewLine(); 51 | void Scan(); 52 | 53 | /* re-targetable routines */ 54 | void Clear(); 55 | void Negate(); 56 | void LoadConst(char *value); 57 | void LoadVar(char *name); 58 | void Push(); 59 | void PopAdd(); 60 | void PopSub(); 61 | void PopMul(); 62 | void PopDiv(); 63 | void Store(char *name); 64 | void Undefined(char *name); 65 | void Duplicate(char *name); 66 | void NotIt(); 67 | void PopAnd(); 68 | void PopOr(); 69 | void PopXor(); 70 | void PopCompare(); 71 | void SetEqual(); 72 | void SetNEqual(); 73 | void SetGreater(); 74 | void SetGreaterOrEqual(); 75 | void SetLess(); 76 | void SetLessOrEqual(); 77 | void Branch(char *label); 78 | void BranchFalse(char *label); 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /11/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cradle.h" 7 | 8 | #ifdef DEBUG 9 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 10 | #else 11 | #define dprint(fmt, ...) 12 | #endif 13 | 14 | 15 | void TopDecls(); 16 | void Allocate(char *name, char *value); 17 | void Alloc(); 18 | void Block(); 19 | void Assignment(); 20 | 21 | void Factor(); 22 | void Expression(); 23 | void Subtract(); 24 | void Term(); 25 | void Divide(); 26 | void Multiply(); 27 | void FirstFactor(); 28 | void Add(); 29 | void Equals(); 30 | void NotEqual(); 31 | void Less(); 32 | void LessOrEqual(); 33 | void Greater(); 34 | void Relation(); 35 | void NotFactor(); 36 | void BoolTerm(); 37 | void BoolOr(); 38 | void BoolXor(); 39 | void BoolExpression(); 40 | void DoIf(); 41 | void DoWhile(); 42 | void CompareExpression(); 43 | void NextExpression(); 44 | 45 | void Header() 46 | { 47 | EmitLn(".global _start"); 48 | } 49 | 50 | void Prolog() 51 | { 52 | EmitLn(".section .text"); 53 | EmitLn("_start:"); 54 | } 55 | 56 | void Epilog() 57 | { 58 | EmitLn("movl %eax, %ebx"); 59 | EmitLn("movl $1, %eax"); 60 | EmitLn("int $0x80"); 61 | } 62 | 63 | void TopDecls() 64 | { 65 | Scan(); 66 | while(Token == 'v') { 67 | EmitLn(".section .data"); /* in case that the variable and function 68 | declarations are mixed */ 69 | Alloc(); 70 | while(Token == ',') { 71 | Alloc(); 72 | } 73 | } 74 | } 75 | 76 | /* Allocate Storage for a static variable */ 77 | void Allocate(char *name, char *value) 78 | { 79 | sprintf(tmp, "%s: .int %s", name, value); 80 | EmitLn(tmp); 81 | } 82 | 83 | void Alloc() 84 | { 85 | char name[MAX_BUF]; 86 | Next(); 87 | if (Token != 'x') { 88 | Expected("Variable Name"); 89 | } 90 | CheckDup(Value); 91 | 92 | sprintf(name, Value); 93 | AddEntry(name, 'v'); 94 | Next(); 95 | if (Token == '=') { 96 | Next(); 97 | if (Token != '#') { 98 | Expected("Integer"); 99 | } 100 | Allocate(name, Value); 101 | Next(); 102 | } else { 103 | Allocate(name, "0"); 104 | } 105 | } 106 | 107 | /* Parse and Translate a Block of Statements 108 | * ::= ( )* 109 | * ::= | | 110 | * */ 111 | void Block() 112 | { 113 | Scan(); 114 | while(strchr("el", Token) == NULL) { 115 | switch (Token) { 116 | case 'i': 117 | DoIf(); 118 | break; 119 | case 'w': 120 | DoWhile(); 121 | break; 122 | default: 123 | Assignment(); 124 | break; 125 | } 126 | Scan(); 127 | } 128 | } 129 | 130 | void Assignment() 131 | { 132 | char name[MAX_BUF]; 133 | sprintf(name, Value); 134 | Next(); 135 | MatchString("="); 136 | BoolExpression(); 137 | Store(name); 138 | } 139 | 140 | void Factor() 141 | { 142 | if (Token == '(') { 143 | Next(); 144 | BoolExpression(); 145 | MatchString(")"); 146 | } else { 147 | if (Token == 'x') { 148 | LoadVar(Value); 149 | } else if (Token == '#') { 150 | LoadConst(Value); 151 | } else { 152 | Expected("Math Factor"); 153 | } 154 | Next(); 155 | } 156 | } 157 | 158 | 159 | void Multiply() 160 | { 161 | Next(); 162 | Factor(); 163 | PopMul(); 164 | } 165 | 166 | void Divide() 167 | { 168 | Next(); 169 | Factor(); 170 | PopDiv(); 171 | } 172 | 173 | void Term() 174 | { 175 | Factor(); 176 | while(IsMulop(Token)) { 177 | Push(); 178 | switch(Token) { 179 | case '*': 180 | Multiply(); 181 | break; 182 | case '/': 183 | Divide(); 184 | break; 185 | default: 186 | break; 187 | } 188 | } 189 | } 190 | 191 | void Add() 192 | { 193 | Next(); 194 | Term(); 195 | PopAdd(); 196 | } 197 | 198 | void Subtract() 199 | { 200 | Next(); 201 | Term(); 202 | PopSub(); 203 | } 204 | 205 | void Expression() 206 | { 207 | if (IsAddop(Token)) { 208 | Clear(); 209 | } else { 210 | Term(); 211 | } 212 | 213 | while(IsAddop(Token)) { 214 | Push(); 215 | switch(Token) { 216 | case '+': 217 | Add(); 218 | break; 219 | case '-': 220 | Subtract(); 221 | break; 222 | default: 223 | break; 224 | } 225 | } 226 | } 227 | 228 | /* Get another expression and compare */ 229 | void CompareExpression() 230 | { 231 | Expression(); 232 | PopCompare(); 233 | } 234 | 235 | /* Get the next expression and compare */ 236 | void NextExpression() 237 | { 238 | Next(); 239 | CompareExpression(); 240 | } 241 | 242 | /* Recognize and Translate a Relational "Equals" */ 243 | void Equals() 244 | { 245 | NextExpression(); 246 | SetEqual(); 247 | } 248 | 249 | /* Recognize and Translate a Relational "Not Equals" */ 250 | void NotEqual() 251 | { 252 | NextExpression(); 253 | SetNEqual(); 254 | } 255 | 256 | /* Recognize and Translate a Relational "Less Than" */ 257 | void Less() 258 | { 259 | Next(); 260 | switch(Token) { 261 | case '=': 262 | LessOrEqual(); 263 | break; 264 | case '>': 265 | NotEqual(); 266 | break; 267 | default: 268 | CompareExpression(); 269 | SetLess(); 270 | break; 271 | } 272 | } 273 | 274 | /* Recognize and Translate a Relational "Less or Equal" */ 275 | void LessOrEqual() 276 | { 277 | NextExpression(); 278 | SetLessOrEqual(); 279 | } 280 | 281 | /* Recognize and Translate a Relational "Greater Than" */ 282 | void Greater() 283 | { 284 | Next(); 285 | if (Token == '=') { 286 | NextExpression(); 287 | SetGreaterOrEqual(); 288 | } else { 289 | CompareExpression(); 290 | SetGreater(); 291 | } 292 | } 293 | 294 | /* Parse and Translate a Relation */ 295 | void Relation() 296 | { 297 | Expression(); 298 | if (IsRelop(Token)) { 299 | Push(); 300 | switch (Token) { 301 | case '=': 302 | Equals(); 303 | break; 304 | case '<': 305 | Less(); 306 | break; 307 | case '>': 308 | Greater(); 309 | break; 310 | default: 311 | break; 312 | } 313 | } 314 | } 315 | 316 | /* Parse and Translate a Boolean Factor with Leading NOT */ 317 | void NotFactor() 318 | { 319 | if (Token == '!') { 320 | Next(); 321 | Relation(); 322 | NotIt(); 323 | } else { 324 | Relation(); 325 | } 326 | } 327 | 328 | /* Parse and Translate a Boolean Term 329 | * ::= ( and_op ::= ( or_op )* */ 360 | void BoolExpression() 361 | { 362 | BoolTerm(); 363 | while(IsOrOp(Token)) { 364 | Push(); 365 | switch(Look) { 366 | case '|': 367 | BoolOr(); 368 | break; 369 | case '~': 370 | BoolXor(); 371 | break; 372 | default: 373 | break; 374 | } 375 | } 376 | } 377 | 378 | /* Recognize and Translate an IF construct */ 379 | void DoIf() 380 | { 381 | Next(); 382 | char L1[MAX_BUF]; 383 | char L2[MAX_BUF]; 384 | sprintf(L1, NewLabel()); 385 | sprintf(L2, L1); 386 | BoolExpression(); 387 | BranchFalse(L1); 388 | Block(); 389 | if (Token == 'l') { 390 | Next(); 391 | sprintf(L2, NewLabel()); 392 | Branch(L2); 393 | PostLabel(L1); 394 | Block(); 395 | } 396 | PostLabel(L2); 397 | MatchString("ENDIF"); 398 | } 399 | 400 | void DoWhile() 401 | { 402 | Next(); 403 | char L1[MAX_BUF]; 404 | char L2[MAX_BUF]; 405 | sprintf(L1, NewLabel()); 406 | sprintf(L2, NewLabel()); 407 | PostLabel(L1); 408 | BoolExpression(); 409 | BranchFalse(L2); 410 | Block(); 411 | MatchString("ENDWHILE"); 412 | Branch(L1); 413 | PostLabel(L2); 414 | } 415 | 416 | 417 | int main() 418 | { 419 | Init(); 420 | MatchString("PROGRAM"); 421 | Header(); 422 | TopDecls(); 423 | MatchString("BEGIN"); 424 | Prolog(); 425 | Block(); 426 | MatchString("END"); 427 | Epilog(); 428 | 429 | return 0; 430 | } 431 | -------------------------------------------------------------------------------- /11/prog.txt: -------------------------------------------------------------------------------- 1 | PROGRAM 2 | VAR xx, 3 | yy=1, 4 | zz=10 5 | BEGIN 6 | WHILE yy <= zz 7 | IF yy <> 5 8 | xx=xx+yy 9 | ELSE 10 | xx=xx+5 11 | ENDIF 12 | yy=yy+1 13 | ENDWHILE 14 | END. 15 | 16 | -------------------------------------------------------------------------------- /12/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /12/cradle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cradle.h" 7 | #include 8 | 9 | #define MaxEntry 100 10 | #define MAX_SYMBOL_LENGTH 10 11 | static int LCount = 0; 12 | static char labelName[MAX_BUF]; 13 | char tmp[MAX_BUF]; 14 | char TempChar; 15 | 16 | /*char ST[TABLE_SIZE];*/ 17 | static int NEntry = 0; 18 | const char *ST[MaxEntry]; 19 | char SType[MaxEntry]; 20 | 21 | 22 | /* Keywords symbol table */ 23 | const char const *KWList[] = { 24 | "IF", 25 | "ELSE", 26 | "ENDIF", 27 | "WHILE", 28 | "ENDWHILE", 29 | "VAR", 30 | "END", 31 | }; 32 | const char KWCode[] = "xileweve"; 33 | const int KWNum = sizeof(KWList)/sizeof(*KWList); 34 | 35 | char Token; /* current token */ 36 | char Value[MAX_BUF]; /* string token of Look */ 37 | 38 | /* Helper Functions */ 39 | char uppercase(char c) 40 | { 41 | if (IsAlpha(c)) { 42 | return (c & 0xDF); 43 | } else { 44 | return c; 45 | } 46 | } 47 | 48 | /* Table Lookup 49 | * If the input string matches a table entry, return the entry index, else 50 | * return -1. 51 | * *n* is the size of the table */ 52 | int Lookup(const char const *table[], const char *string, int n) 53 | { 54 | int i; 55 | bool found = false; 56 | 57 | for (i = 0; i < n; ++i) { 58 | if (strcmp(table[i], string) == 0) { 59 | found = true; 60 | break; 61 | } 62 | } 63 | return found ? i : -1; 64 | } 65 | 66 | int Locate(char *symbol) 67 | { 68 | return Lookup(ST, symbol, NEntry); 69 | } 70 | 71 | /* Add a new entry to symbol table */ 72 | void AddEntry(char *symbol, char type) 73 | { 74 | CheckDup(symbol); 75 | if (NEntry == MaxEntry) { 76 | Abort("Symbol Table Full"); 77 | } 78 | 79 | char *new_entry = (char *)malloc((strlen(symbol)+1)*sizeof(*new_entry)); 80 | if (new_entry == NULL) { 81 | Abort("AddEntry: not enough memory allocating new_entry."); 82 | } 83 | strcpy(new_entry, symbol); 84 | ST[NEntry] = new_entry; 85 | SType[NEntry] = type; 86 | 87 | NEntry++; 88 | } 89 | 90 | /* Get an Identifier and Scan it for keywords */ 91 | void Scan() 92 | { 93 | if (Token == 'x') { 94 | int index = Lookup(KWList, Value, KWNum); 95 | Token = KWCode[index+1]; 96 | } 97 | } 98 | 99 | void MatchString(char *str) 100 | { 101 | if (strcmp(Value, str) != 0) { 102 | sprintf(tmp, "\"%s\"", str); 103 | Expected(tmp); 104 | } 105 | Next(); 106 | } 107 | 108 | void GetCharX() 109 | { 110 | Look = getchar(); 111 | /* printf("Getchar: %c\n", Look); */ 112 | } 113 | 114 | void GetChar() 115 | { 116 | if (TempChar != ' ') { 117 | Look = TempChar; 118 | TempChar = ' '; 119 | } else { 120 | GetCharX(); 121 | if (Look == '/') { 122 | TempChar = getchar(); 123 | if (TempChar == '*') { 124 | Look = '{'; 125 | TempChar = ' '; 126 | } 127 | } 128 | } 129 | } 130 | 131 | void Error(char *s) 132 | { 133 | printf("\nError: %s.", s); 134 | } 135 | 136 | void Abort(char *s) 137 | { 138 | Error(s); 139 | exit(1); 140 | } 141 | 142 | 143 | void Expected(char *s) 144 | { 145 | sprintf(tmp, "%s Expected", s); 146 | Abort(tmp); 147 | } 148 | 149 | 150 | void Match(char x) 151 | { 152 | NewLine(); 153 | if(Look == x) { 154 | GetChar(); 155 | } else { 156 | sprintf(tmp, "' %c ' ", x); 157 | Expected(tmp); 158 | } 159 | SkipWhite(); 160 | } 161 | 162 | int IsAlpha(char c) 163 | { 164 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 165 | } 166 | 167 | int IsDigit(char c) 168 | { 169 | return (c >= '0') && (c <= '9'); 170 | } 171 | 172 | int IsAddop(char c) 173 | { 174 | return (c == '+') || (c == '-'); 175 | } 176 | 177 | int IsMulop(char c) 178 | { 179 | return (c == '*') || (c == '/'); 180 | } 181 | 182 | int IsOrOp(char c) 183 | { 184 | return strchr("|~", c) != NULL; 185 | } 186 | 187 | int IsRelop(char c) 188 | { 189 | return strchr("=#<>", c) != NULL; 190 | } 191 | 192 | int IsWhite(char c) 193 | { 194 | return strchr(" \t\r\n{", c) != NULL; 195 | } 196 | 197 | int IsAlNum(char c) 198 | { 199 | return IsAlpha(c) || IsDigit(c); 200 | } 201 | 202 | void GetName() 203 | { 204 | SkipWhite(); 205 | if( !IsAlpha(Look)) { 206 | Expected("Name"); 207 | } 208 | 209 | Token = 'x'; 210 | char *p = Value; 211 | do { 212 | *p++ = uppercase(Look); 213 | GetChar(); 214 | } while(IsAlNum(Look)) ; 215 | *p = '\0'; 216 | } 217 | 218 | void GetNum() 219 | { 220 | SkipWhite(); 221 | if( !IsDigit(Look)) { 222 | Expected("Integer"); 223 | } 224 | 225 | Token = '#'; 226 | char *p = Value; 227 | do { 228 | *p++ = Look; 229 | GetChar(); 230 | } while (IsDigit(Look)); 231 | *p = '\0'; 232 | } 233 | 234 | /* Get an operator */ 235 | void GetOp() 236 | { 237 | SkipWhite(); 238 | Token = Look; 239 | Value[0] = Look; 240 | Value[1] = '\0'; 241 | GetChar(); 242 | } 243 | 244 | /* Get the next input token */ 245 | void Next() 246 | { 247 | SkipWhite(); 248 | if (IsAlpha(Look)) { 249 | GetName(); 250 | } else if (IsDigit(Look)) { 251 | GetNum(); 252 | } else { 253 | GetOp(); 254 | } 255 | } 256 | 257 | void Emit(char *s) 258 | { 259 | printf("\t%s", s); 260 | } 261 | 262 | void EmitLn(char *s) 263 | { 264 | Emit(s); 265 | printf("\n"); 266 | } 267 | 268 | void Init() 269 | { 270 | LCount = 0; 271 | 272 | InitTable(); 273 | GetChar(); 274 | Next(); 275 | } 276 | 277 | void InitTable() 278 | { 279 | int i; 280 | for (i = 0; i < MaxEntry; i++) { 281 | ST[i] = NULL; 282 | SType[i] = ' '; 283 | } 284 | 285 | } 286 | 287 | /* look for symbol in table */ 288 | bool InTable(char *symbol) 289 | { 290 | return Locate(symbol) != -1; 291 | } 292 | 293 | /* Check to see if an identifier is in the symbol table, 294 | * report an error if it's not */ 295 | void CheckTable(char *symbol) 296 | { 297 | if (! InTable(symbol)) { 298 | Undefined(symbol); 299 | } 300 | } 301 | 302 | void CheckDup(char *symbol) 303 | { 304 | if (InTable(symbol)) { 305 | Duplicate(symbol); 306 | } 307 | } 308 | 309 | char *NewLabel() 310 | { 311 | sprintf(labelName, "L%02d", LCount); 312 | LCount ++; 313 | return labelName; 314 | } 315 | 316 | void PostLabel(char *label) 317 | { 318 | printf("%s:\n", label); 319 | } 320 | 321 | void SkipWhite() 322 | { 323 | while (IsWhite(Look)) { 324 | if (Look == '{') { 325 | SkipComment(); 326 | } else { 327 | GetChar(); 328 | } 329 | } 330 | } 331 | 332 | void SkipComment() 333 | { 334 | do { 335 | do { 336 | GetChar(); 337 | if (Look == '{') { 338 | SkipComment(); 339 | } 340 | } while(Look != '*'); 341 | GetCharX(); 342 | } while(Look != '/'); 343 | GetChar(); 344 | } 345 | 346 | /* Skip over an End-of-Line */ 347 | void NewLine() 348 | { 349 | while(Look == '\n') { 350 | GetChar(); 351 | if (Look == '\r') { 352 | GetChar(); 353 | } 354 | SkipWhite(); 355 | } 356 | } 357 | 358 | /* re-targetable routines */ 359 | void Clear() 360 | { 361 | EmitLn("xor %eax, %eax"); 362 | } 363 | 364 | void Negate() 365 | { 366 | EmitLn("neg %eax"); 367 | } 368 | 369 | void LoadConst(char *value) 370 | { 371 | sprintf(tmp, "movl $%s, %%eax", value); 372 | EmitLn(tmp); 373 | } 374 | 375 | /* Load a variable to primary register */ 376 | void LoadVar(char *name) 377 | { 378 | if (!InTable(name)) { 379 | char name_string[MAX_BUF]; 380 | Undefined(name_string); 381 | } 382 | sprintf(tmp, "movl %s, %%eax", name); 383 | EmitLn(tmp); 384 | } 385 | 386 | 387 | /* Push Primary onto stack */ 388 | void Push() 389 | { 390 | EmitLn("pushl %eax"); 391 | } 392 | 393 | /* Add Top of Stack to primary */ 394 | void PopAdd() 395 | { 396 | EmitLn("addl (%esp), %eax"); 397 | EmitLn("addl $4, %esp"); 398 | } 399 | 400 | /* Subtract Primary from Top of Stack */ 401 | void PopSub() 402 | { 403 | EmitLn("subl (%esp), %eax"); 404 | EmitLn("neg %eax"); 405 | EmitLn("addl $4, %esp"); 406 | } 407 | 408 | /* multiply top of stack by primary */ 409 | void PopMul() 410 | { 411 | EmitLn("imull (%esp), %eax"); 412 | EmitLn("addl $4, %esp"); 413 | } 414 | 415 | /* divide top of stack by primary */ 416 | void PopDiv() 417 | { 418 | /* for a expersion like a/b we have eax=b and %(esp)=a 419 | * but we need eax=a, and b on the stack 420 | */ 421 | EmitLn("movl (%esp), %edx"); 422 | EmitLn("addl $4, %esp"); 423 | EmitLn("pushl %eax"); 424 | EmitLn("movl %edx, %eax"); 425 | 426 | /* sign extesnion */ 427 | EmitLn("sarl $31, %edx"); 428 | EmitLn("idivl (%esp)"); 429 | EmitLn("addl $4, %esp"); 430 | } 431 | 432 | /* store primary to variable */ 433 | void Store(char *name) 434 | { 435 | if (!InTable(name)) { 436 | char name_string[MAX_BUF]; 437 | Undefined(name_string); 438 | } 439 | sprintf(tmp, "movl %%eax, %s", name); 440 | EmitLn(tmp); 441 | } 442 | 443 | void Undefined(char *name) 444 | { 445 | sprintf(tmp, "Undefined Identifier: %s", name); 446 | Abort(tmp); 447 | } 448 | 449 | void Duplicate(char *name) 450 | { 451 | sprintf(tmp, "Duplicate Identifier: %s", name); 452 | Abort(tmp); 453 | } 454 | 455 | /* Complement the primary register */ 456 | void NotIt() 457 | { 458 | EmitLn("not %eax"); 459 | } 460 | 461 | /* AND top of Stack with primary */ 462 | void PopAnd() 463 | { 464 | EmitLn("and (%esp), %eax"); 465 | EmitLn("addl $4, %esp"); 466 | } 467 | 468 | /* OR top of Stack with primary */ 469 | void PopOr() 470 | { 471 | EmitLn("or (%esp), %eax"); 472 | EmitLn("addl $4, %esp"); 473 | } 474 | 475 | /* XOR top of Stack with primary */ 476 | void PopXor() 477 | { 478 | EmitLn("xor (%esp), %eax"); 479 | EmitLn("addl $4, %esp"); 480 | } 481 | 482 | /* Compare top of Stack with primary */ 483 | void PopCompare() 484 | { 485 | EmitLn("addl $4, %esp"); 486 | EmitLn("cmp -4(%esp), %eax"); 487 | } 488 | 489 | /* set %eax if Compare was = */ 490 | void SetEqual() 491 | { 492 | EmitLn("sete %al"); 493 | EmitLn("movsx %al, %eax"); 494 | } 495 | 496 | /* set %eax if Compare was != */ 497 | void SetNEqual() 498 | { 499 | EmitLn("setne %al"); 500 | EmitLn("movsx %al, %eax"); 501 | } 502 | 503 | /* set %eax if Compare was > */ 504 | void SetGreater() 505 | { 506 | EmitLn("setl %al"); 507 | EmitLn("movsx %al, %eax"); 508 | } 509 | 510 | /* set %eax if Compare was >= */ 511 | void SetGreaterOrEqual() 512 | { 513 | EmitLn("setle %al"); 514 | EmitLn("movsx %al, %eax"); 515 | } 516 | 517 | /* set %eax if Compare was < */ 518 | void SetLess() 519 | { 520 | EmitLn("setg %al"); 521 | EmitLn("movsx %al, %eax"); 522 | } 523 | 524 | /* set %eax if Compare was <= */ 525 | void SetLessOrEqual() 526 | { 527 | EmitLn("setge %al"); 528 | EmitLn("movsx %al, %eax"); 529 | } 530 | 531 | /* Branch unconditional */ 532 | void Branch(char *label) 533 | { 534 | sprintf(tmp, "jmp %s", label); 535 | EmitLn(tmp); 536 | } 537 | 538 | /* Branch False */ 539 | void BranchFalse(char *label) 540 | { 541 | EmitLn("test $1, %eax"); 542 | sprintf(tmp, "jz %s", label); 543 | EmitLn(tmp); 544 | } 545 | -------------------------------------------------------------------------------- /12/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | #include 4 | 5 | #define MAX_BUF 100 6 | #define MaxEntry 100 7 | extern char tmp[MAX_BUF]; 8 | extern const char *ST[]; 9 | extern char SType[]; 10 | extern char Token; 11 | extern char Value[MAX_BUF]; 12 | char Look; 13 | 14 | void GetChar(); 15 | 16 | void Error(char *s); 17 | void Abort(char *s); 18 | void Expected(char *s); 19 | void Match(char x); 20 | void MatchString(char *str); 21 | 22 | int IsAlpha(char c); 23 | int IsDigit(char c); 24 | int IsAddop(char c); 25 | int IsMulop(char c); 26 | int IsOrOp(char c); 27 | int IsRelop(char c); 28 | int IsWhite(char c); 29 | int IsAlNum(char c); 30 | 31 | void GetName(); 32 | void GetNum(); 33 | void GetOp(); 34 | void Next(); 35 | 36 | void Emit(char *s); 37 | void EmitLn(char *s); 38 | 39 | void Init(); 40 | void InitTable(); 41 | int Locate(char *symbol); 42 | bool InTable(char *symbol); 43 | void CheckTable(char *symbol); 44 | void CheckDup(char *symbol); 45 | void AddEntry(char *symbol, char type); 46 | 47 | char *NewLabel(); 48 | void PostLabel(char *label); 49 | void SkipWhite(); 50 | void SkipComment(); 51 | void NewLine(); 52 | void Scan(); 53 | 54 | /* re-targetable routines */ 55 | void Clear(); 56 | void Negate(); 57 | void LoadConst(char *value); 58 | void LoadVar(char *name); 59 | void Push(); 60 | void PopAdd(); 61 | void PopSub(); 62 | void PopMul(); 63 | void PopDiv(); 64 | void Store(char *name); 65 | void Undefined(char *name); 66 | void Duplicate(char *name); 67 | void NotIt(); 68 | void PopAnd(); 69 | void PopOr(); 70 | void PopXor(); 71 | void PopCompare(); 72 | void SetEqual(); 73 | void SetNEqual(); 74 | void SetGreater(); 75 | void SetGreaterOrEqual(); 76 | void SetLess(); 77 | void SetLessOrEqual(); 78 | void Branch(char *label); 79 | void BranchFalse(char *label); 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /12/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cradle.h" 7 | 8 | #ifdef DEBUG 9 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 10 | #else 11 | #define dprint(fmt, ...) 12 | #endif 13 | 14 | 15 | void TopDecls(); 16 | void Allocate(char *name, char *value); 17 | void Alloc(); 18 | void Block(); 19 | void Assignment(); 20 | 21 | void Factor(); 22 | void Expression(); 23 | void Subtract(); 24 | void Term(); 25 | void Divide(); 26 | void Multiply(); 27 | void FirstFactor(); 28 | void Add(); 29 | void Equals(); 30 | void NotEqual(); 31 | void Less(); 32 | void LessOrEqual(); 33 | void Greater(); 34 | void Relation(); 35 | void NotFactor(); 36 | void BoolTerm(); 37 | void BoolOr(); 38 | void BoolXor(); 39 | void BoolExpression(); 40 | void DoIf(); 41 | void DoWhile(); 42 | void CompareExpression(); 43 | void NextExpression(); 44 | 45 | void Semi(); 46 | 47 | void Header() 48 | { 49 | EmitLn(".global _start"); 50 | } 51 | 52 | void Prolog() 53 | { 54 | EmitLn(".section .text"); 55 | EmitLn("_start:"); 56 | } 57 | 58 | void Epilog() 59 | { 60 | EmitLn("movl %eax, %ebx"); 61 | EmitLn("movl $1, %eax"); 62 | EmitLn("int $0x80"); 63 | } 64 | 65 | void TopDecls() 66 | { 67 | Scan(); 68 | while(Token == 'v') { 69 | EmitLn(".section .data"); /* in case that the variable and function 70 | declarations are mixed */ 71 | Alloc(); 72 | while(Token == ',') { 73 | Alloc(); 74 | } 75 | Semi(); 76 | } 77 | } 78 | 79 | /* Allocate Storage for a static variable */ 80 | void Allocate(char *name, char *value) 81 | { 82 | sprintf(tmp, "%s: .int %s", name, value); 83 | EmitLn(tmp); 84 | } 85 | 86 | void Alloc() 87 | { 88 | char name[MAX_BUF]; 89 | Next(); 90 | if (Token != 'x') { 91 | Expected("Variable Name"); 92 | } 93 | CheckDup(Value); 94 | 95 | sprintf(name, Value); 96 | AddEntry(name, 'v'); 97 | Next(); 98 | if (Token == '=') { 99 | Next(); 100 | if (Token != '#') { 101 | Expected("Integer"); 102 | } 103 | Allocate(name, Value); 104 | Next(); 105 | } else { 106 | Allocate(name, "0"); 107 | } 108 | } 109 | 110 | /* Parse and Translate a Block of Statements 111 | * ::= ( )* 112 | * ::= | | 113 | * */ 114 | void Block() 115 | { 116 | Scan(); 117 | while(strchr("el", Token) == NULL) { 118 | switch (Token) { 119 | case 'i': 120 | DoIf(); 121 | break; 122 | case 'w': 123 | DoWhile(); 124 | break; 125 | case 'x': 126 | Assignment(); 127 | break; 128 | default: 129 | break; 130 | } 131 | Semi(); 132 | Scan(); 133 | } 134 | } 135 | 136 | void Assignment() 137 | { 138 | char name[MAX_BUF]; 139 | sprintf(name, Value); 140 | Next(); 141 | MatchString("="); 142 | BoolExpression(); 143 | Store(name); 144 | } 145 | 146 | void Factor() 147 | { 148 | if (Token == '(') { 149 | Next(); 150 | BoolExpression(); 151 | MatchString(")"); 152 | } else { 153 | if (Token == 'x') { 154 | LoadVar(Value); 155 | } else if (Token == '#') { 156 | LoadConst(Value); 157 | } else { 158 | Expected("Math Factor"); 159 | } 160 | Next(); 161 | } 162 | } 163 | 164 | 165 | void Multiply() 166 | { 167 | Next(); 168 | Factor(); 169 | PopMul(); 170 | } 171 | 172 | void Divide() 173 | { 174 | Next(); 175 | Factor(); 176 | PopDiv(); 177 | } 178 | 179 | void Term() 180 | { 181 | Factor(); 182 | while(IsMulop(Token)) { 183 | Push(); 184 | switch(Token) { 185 | case '*': 186 | Multiply(); 187 | break; 188 | case '/': 189 | Divide(); 190 | break; 191 | default: 192 | break; 193 | } 194 | } 195 | } 196 | 197 | void Add() 198 | { 199 | Next(); 200 | Term(); 201 | PopAdd(); 202 | } 203 | 204 | void Subtract() 205 | { 206 | Next(); 207 | Term(); 208 | PopSub(); 209 | } 210 | 211 | void Expression() 212 | { 213 | if (IsAddop(Token)) { 214 | Clear(); 215 | } else { 216 | Term(); 217 | } 218 | 219 | while(IsAddop(Token)) { 220 | Push(); 221 | switch(Token) { 222 | case '+': 223 | Add(); 224 | break; 225 | case '-': 226 | Subtract(); 227 | break; 228 | default: 229 | break; 230 | } 231 | } 232 | } 233 | 234 | /* Get another expression and compare */ 235 | void CompareExpression() 236 | { 237 | Expression(); 238 | PopCompare(); 239 | } 240 | 241 | /* Get the next expression and compare */ 242 | void NextExpression() 243 | { 244 | Next(); 245 | CompareExpression(); 246 | } 247 | 248 | /* Recognize and Translate a Relational "Equals" */ 249 | void Equals() 250 | { 251 | NextExpression(); 252 | SetEqual(); 253 | } 254 | 255 | /* Recognize and Translate a Relational "Not Equals" */ 256 | void NotEqual() 257 | { 258 | NextExpression(); 259 | SetNEqual(); 260 | } 261 | 262 | /* Recognize and Translate a Relational "Less Than" */ 263 | void Less() 264 | { 265 | Next(); 266 | switch(Token) { 267 | case '=': 268 | LessOrEqual(); 269 | break; 270 | case '>': 271 | NotEqual(); 272 | break; 273 | default: 274 | CompareExpression(); 275 | SetLess(); 276 | break; 277 | } 278 | } 279 | 280 | /* Recognize and Translate a Relational "Less or Equal" */ 281 | void LessOrEqual() 282 | { 283 | NextExpression(); 284 | SetLessOrEqual(); 285 | } 286 | 287 | /* Recognize and Translate a Relational "Greater Than" */ 288 | void Greater() 289 | { 290 | Next(); 291 | if (Token == '=') { 292 | NextExpression(); 293 | SetGreaterOrEqual(); 294 | } else { 295 | CompareExpression(); 296 | SetGreater(); 297 | } 298 | } 299 | 300 | /* Parse and Translate a Relation */ 301 | void Relation() 302 | { 303 | Expression(); 304 | if (IsRelop(Token)) { 305 | Push(); 306 | switch (Token) { 307 | case '=': 308 | Equals(); 309 | break; 310 | case '<': 311 | Less(); 312 | break; 313 | case '>': 314 | Greater(); 315 | break; 316 | default: 317 | break; 318 | } 319 | } 320 | } 321 | 322 | /* Parse and Translate a Boolean Factor with Leading NOT */ 323 | void NotFactor() 324 | { 325 | if (Token == '!') { 326 | Next(); 327 | Relation(); 328 | NotIt(); 329 | } else { 330 | Relation(); 331 | } 332 | } 333 | 334 | /* Parse and Translate a Boolean Term 335 | * ::= ( and_op ::= ( or_op )* */ 366 | void BoolExpression() 367 | { 368 | BoolTerm(); 369 | while(IsOrOp(Token)) { 370 | Push(); 371 | switch(Look) { 372 | case '|': 373 | BoolOr(); 374 | break; 375 | case '~': 376 | BoolXor(); 377 | break; 378 | default: 379 | break; 380 | } 381 | } 382 | } 383 | 384 | /* Recognize and Translate an IF construct */ 385 | void DoIf() 386 | { 387 | Next(); 388 | char L1[MAX_BUF]; 389 | char L2[MAX_BUF]; 390 | sprintf(L1, NewLabel()); 391 | sprintf(L2, L1); 392 | BoolExpression(); 393 | BranchFalse(L1); 394 | Block(); 395 | if (Token == 'l') { 396 | Next(); 397 | sprintf(L2, NewLabel()); 398 | Branch(L2); 399 | PostLabel(L1); 400 | Block(); 401 | } 402 | PostLabel(L2); 403 | MatchString("ENDIF"); 404 | } 405 | 406 | void DoWhile() 407 | { 408 | Next(); 409 | char L1[MAX_BUF]; 410 | char L2[MAX_BUF]; 411 | sprintf(L1, NewLabel()); 412 | sprintf(L2, NewLabel()); 413 | PostLabel(L1); 414 | BoolExpression(); 415 | BranchFalse(L2); 416 | Block(); 417 | MatchString("ENDWHILE"); 418 | Branch(L1); 419 | PostLabel(L2); 420 | } 421 | 422 | void Semi() 423 | { 424 | /* make a semicolon optional */ 425 | if (Token == ';') { 426 | Next(); 427 | } 428 | } 429 | 430 | int main() 431 | { 432 | Init(); 433 | MatchString("PROGRAM"); 434 | Semi(); 435 | Header(); 436 | TopDecls(); 437 | MatchString("BEGIN"); 438 | Prolog(); 439 | Block(); 440 | MatchString("END"); 441 | Epilog(); 442 | 443 | return 0; 444 | } 445 | -------------------------------------------------------------------------------- /12/prog.txt: -------------------------------------------------------------------------------- 1 | PROGRAM; 2 | /* 3 | calculate 1+2+...+10 4 | /* nested comment */ 5 | */ 6 | VAR xx, yy=1, zz=10; 7 | BEGIN 8 | WHILE yy <= zz 9 | IF yy <> 5 10 | xx=xx+yy; 11 | ELSE 12 | xx=xx+5; 13 | ENDIF; 14 | yy=yy+1; 15 | ENDWHILE; 16 | END. 17 | 18 | -------------------------------------------------------------------------------- /13/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /13/cradle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | 8 | #define MaxEntry 26 9 | const char TAB = '\t'; 10 | const char CR = '\n'; 11 | const char LF = '\r'; 12 | 13 | char tmp[MAX_BUF]; /* temporary buffer */ 14 | 15 | char Look; 16 | char ST[MaxEntry]; /* symbol table */ 17 | int Params[MaxEntry]; /* parameter table */ 18 | int NumParams = 0; 19 | int Base; 20 | 21 | /* read new character from input stream */ 22 | void GetChar() 23 | { 24 | Look = getchar(); 25 | } 26 | 27 | /* Report an Error */ 28 | void Error(char *str) 29 | { 30 | printf("\n"); 31 | printf("\aError: %s.\n", str); 32 | } 33 | 34 | /* report Error and Halt */ 35 | void Abort(char *str) 36 | { 37 | Error(str); 38 | exit(1); 39 | } 40 | 41 | /* report what was expected */ 42 | void Expected(char *str) 43 | { 44 | sprintf(tmp, "Expected: %s", str); 45 | Abort(tmp); 46 | } 47 | 48 | /* report an undefined identifier */ 49 | void Undefined(char symbol) 50 | { 51 | sprintf(tmp, "Undefined Identifier: %c", symbol); 52 | Abort(tmp); 53 | } 54 | 55 | /* report an duplicate identifier */ 56 | void Duplicate(char symbol) 57 | { 58 | sprintf(tmp, "Duplicate Identifier: %c", symbol); 59 | Abort(tmp); 60 | } 61 | 62 | /* Get type of symbole */ 63 | char TypeOf(char symbol) 64 | { 65 | if (IsParam(symbol)) { 66 | return 'f'; 67 | } else { 68 | return ST[symbol - 'A']; 69 | } 70 | } 71 | 72 | /* check if a symbol is in table */ 73 | bool InTable(char symbol) 74 | { 75 | return ST[symbol - 'A'] != ' '; 76 | } 77 | 78 | /* add a new symbol to table */ 79 | void AddEntry(char symbol, char type) 80 | { 81 | if (InTable(symbol)) { 82 | Duplicate(symbol); 83 | } 84 | ST[symbol-'A'] = type; 85 | } 86 | 87 | /* check an entry to make sure it's a variable */ 88 | void CheckVar(char name) 89 | { 90 | char tmp_buf[MAX_BUF]; 91 | if (!InTable(name)) { 92 | Undefined(name); 93 | } 94 | if (TypeOf(name) != 'v') { 95 | sprintf(tmp_buf, "%c is not a variable", name); 96 | Abort(tmp_buf); 97 | } 98 | } 99 | 100 | /* turn an character into uppercase */ 101 | char upcase(char c) 102 | { 103 | return (c & 0xDF); 104 | } 105 | 106 | bool IsAlpha(char c) 107 | { 108 | char upper = upcase(c); 109 | return (upper >= 'A') && (upper <= 'Z'); 110 | } 111 | 112 | bool IsDigit(char c) 113 | { 114 | return (c >= '0') && (c <= '9'); 115 | } 116 | 117 | bool IsAlNum(char c) 118 | { 119 | return IsAlpha(c) || IsDigit(c); 120 | } 121 | 122 | bool IsAddop(char c) 123 | { 124 | return strchr("+-", c) != NULL; 125 | } 126 | 127 | bool IsMulop(char c) 128 | { 129 | return strchr("*/", c) != NULL; 130 | } 131 | 132 | bool IsRelop(char c) 133 | { 134 | return strchr("=#<>", c) != NULL; 135 | } 136 | 137 | bool IsWhite(char c) 138 | { 139 | return strchr(" \t", c) != NULL; 140 | } 141 | 142 | /* skip over leading white space */ 143 | void SkipWhite(void) 144 | { 145 | while(IsWhite(Look)) { 146 | GetChar(); 147 | } 148 | } 149 | 150 | /* skip over an End-Of-Line */ 151 | void Fin(void) 152 | { 153 | if (Look == CR) { 154 | GetChar(); 155 | if (Look == LF) { 156 | GetChar(); 157 | } 158 | } else if (Look == LF){ 159 | GetChar(); 160 | } 161 | } 162 | 163 | /* match a specific input character */ 164 | void Match(char c) 165 | { 166 | if (Look == c) { 167 | GetChar(); 168 | } else { 169 | char tmp_buf[MAX_BUF]; 170 | sprintf(tmp_buf, "'%c'", c); 171 | Expected(tmp_buf); 172 | } 173 | SkipWhite(); 174 | } 175 | 176 | /* Get an identifier */ 177 | char GetName(void) 178 | { 179 | if (! IsAlpha(Look)) { 180 | Expected("Name"); 181 | } 182 | char name = upcase(Look); 183 | GetChar(); 184 | SkipWhite(); 185 | return name; 186 | } 187 | 188 | /* Get a number */ 189 | char GetNum(void) 190 | { 191 | if (!IsDigit(Look)) { 192 | Expected("Integer"); 193 | } 194 | char num = Look; 195 | GetChar(); 196 | SkipWhite(); 197 | return num; 198 | } 199 | 200 | /* output a string with TAB */ 201 | void Emit(char *str) 202 | { 203 | printf("\t%s", str); 204 | } 205 | 206 | /* Output a string with TAB and CRLF */ 207 | void EmitLn(char *str) 208 | { 209 | Emit(str); 210 | printf("\n"); 211 | } 212 | 213 | /* Post a label to output */ 214 | void PostLabel(char label) 215 | { 216 | printf("%c:\n", label); 217 | } 218 | 219 | /* Load a variable to the primary register */ 220 | void LoadVar(char name) 221 | { 222 | CheckVar(name); 223 | sprintf(tmp, "movl %c, %%eax", name); 224 | EmitLn(tmp); 225 | } 226 | 227 | /* store the primary register */ 228 | void StoreVar(char name) 229 | { 230 | CheckVar(name); 231 | sprintf(tmp, "movl %%eax, %c", name); 232 | EmitLn(tmp); 233 | } 234 | 235 | /* load a parameter to the primary register */ 236 | void LoadParam(int n) 237 | { 238 | int offset = 8 + 4*(Base - n); 239 | sprintf(tmp, "movl %d(%%ebp), %%eax", offset); 240 | EmitLn(tmp); 241 | } 242 | 243 | /* store a parameter from the primary register */ 244 | void StoreParam(int n) 245 | { 246 | int offset = 8 + 4*(Base - n); 247 | sprintf(tmp, "movl %%eax, %d(%%ebp)", offset); 248 | EmitLn(tmp); 249 | } 250 | 251 | /* push the primary register to the stack */ 252 | void Push() 253 | { 254 | EmitLn("push %eax"); 255 | } 256 | 257 | /* Adjust the stack pointer upwards by n bytes */ 258 | void CleanStack(int bytes) 259 | { 260 | if (bytes > 0) { 261 | sprintf(tmp, "addl $%d, %%esp", bytes); 262 | EmitLn(tmp); 263 | } 264 | } 265 | 266 | /* initialize the symbol table */ 267 | void InitTable(void) 268 | { 269 | int i; 270 | for (i = 0; i < MaxEntry; ++i) { 271 | ST[i] = ' '; 272 | } 273 | } 274 | 275 | /* initialize parameter table to NULL */ 276 | void ClearParams() 277 | { 278 | int i; 279 | for (i = 0; i < MaxEntry; ++i) { 280 | Params[i] = 0; 281 | } 282 | NumParams = 0; 283 | } 284 | 285 | /* find the parameter number */ 286 | int ParamNumber(char name) 287 | { 288 | return Params[name - 'A']; 289 | } 290 | 291 | /* see if an identifier is a parameter */ 292 | bool IsParam(char name) 293 | { 294 | return Params[name-'A'] != 0; 295 | } 296 | 297 | /* Add a new parameter to table */ 298 | void AddParam(char name) 299 | { 300 | if (IsParam(name)) { 301 | Duplicate(name); 302 | } 303 | NumParams++; 304 | Params[name - 'A'] = NumParams; 305 | } 306 | 307 | /* initialize */ 308 | void Init() 309 | { 310 | GetChar(); 311 | SkipWhite(); 312 | InitTable(); 313 | ClearParams(); 314 | } 315 | -------------------------------------------------------------------------------- /13/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #include 5 | 6 | #define MAX_BUF 100 7 | extern const char TAB; 8 | extern const char CR; 9 | extern const char LF; 10 | 11 | extern char Look; /* lookahead character */ 12 | extern char ST[]; /* symbol table */ 13 | extern int Params[]; /* parameter table */ 14 | extern int NumParams; 15 | extern int Base; 16 | 17 | /* read new character from input stream */ 18 | void GetChar(); 19 | 20 | /* Report an Error */ 21 | void Error(char *str); 22 | 23 | /* report Error and Halt */ 24 | void Abort(char *str); 25 | 26 | /* report what was expected */ 27 | void Expected(char *str); 28 | 29 | /* report an undefined identifier */ 30 | void Undefined(char symbol); 31 | 32 | /* report an duplicate identifier */ 33 | void Duplicate(char symbol); 34 | 35 | /* Get type of symbole */ 36 | char TypeOf(char symbol); 37 | 38 | /* check if a symbol is in table */ 39 | bool InTable(char symbol); 40 | 41 | /* add a new symbol to table */ 42 | void AddEntry(char symbol, char type); 43 | 44 | /* check an entry to make sure it's a variable */ 45 | void CheckVar(char name); 46 | 47 | 48 | bool IsAlpha(char c); 49 | bool IsDigit(char c); 50 | bool IsAlNum(char c); 51 | bool IsAddop(char c); 52 | bool IsMulop(char c); 53 | bool IsRelop(char c); 54 | bool IsWhite(char c); 55 | 56 | /* skip over leading white space */ 57 | void SkipWhite(void); 58 | /* skip over an End-Of-Line */ 59 | void Fin(void); 60 | 61 | /* match a specific input character */ 62 | void Match(char c); 63 | 64 | /* Get an identifier */ 65 | char GetName(void); 66 | 67 | /* Get a number */ 68 | char GetNum(void); 69 | 70 | /* output a string with TAB */ 71 | void Emit(char *str); 72 | /* Output a string with TAB and CRLF */ 73 | void EmitLn(char *str); 74 | 75 | 76 | /* Post a label to output */ 77 | void PostLabel(char label); 78 | 79 | /* Load a variable to the primary register */ 80 | void LoadVar(char name); 81 | 82 | /* store the primary register */ 83 | void StoreVar(char name); 84 | 85 | /* load a parameter to the primary register */ 86 | void LoadParam(int n); 87 | 88 | /* store a parameter from the primary register */ 89 | void StoreParam(int n); 90 | 91 | /* push the primary register to the stack */ 92 | void Push(); 93 | 94 | /* Adjust the stack pointer upwards by n bytes */ 95 | void CleanStack(int bytes); 96 | 97 | /* initialize the symbol table */ 98 | void InitTable(void); 99 | 100 | /* initialize parameter table to NULL */ 101 | void ClearParams(); 102 | 103 | /* find the parameter number */ 104 | int ParamNumber(char name); 105 | 106 | /* see if an identifier is a parameter */ 107 | bool IsParam(char name); 108 | 109 | /* Add a new parameter to table */ 110 | void AddParam(char name); 111 | 112 | /* initialize */ 113 | void Init(void); 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /13/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cradle.h" 5 | 6 | 7 | void Expression(); 8 | void AssignOrProc(); 9 | void Assignment(char name); 10 | void DoBlock(); 11 | void BeginBlock(); 12 | void Alloc(char name); 13 | void Decl(void); 14 | void TopDecls(void); 15 | void DoProc(void); 16 | void DoMain(void); 17 | void Return(); 18 | void CallProc(char name); 19 | void Call(char name); 20 | void FormalList(); 21 | void FormalParam(); 22 | void Param(); 23 | int ParamList(); 24 | void LocDecl(); 25 | int LocDecls(); 26 | 27 | void Header(); 28 | void Prolog(); 29 | void Epilog(); 30 | 31 | void ProcProlog(char name, int num_local_params); 32 | void ProcEpilog(); 33 | 34 | 35 | /* parse and tranlate an expression 36 | * vestigial version */ 37 | void Expression() 38 | { 39 | char name = GetName(); 40 | if (IsParam(name)) { 41 | LoadParam(ParamNumber(name)); 42 | } else { 43 | LoadVar(name); 44 | } 45 | } 46 | 47 | /* decide if a statement is an assignment or procedure call */ 48 | void AssignOrProc() 49 | { 50 | char name = GetName(); 51 | char tmp_buf[MAX_BUF]; 52 | switch (TypeOf(name)) { 53 | case ' ': 54 | Undefined(name); 55 | break; 56 | case 'v': 57 | case 'f': 58 | Assignment(name); 59 | break; 60 | case 'p': 61 | CallProc(name); 62 | break; 63 | default: 64 | sprintf(tmp_buf, "Identifier %c cannot be used here", name); 65 | Abort(tmp_buf); 66 | break; 67 | } 68 | } 69 | 70 | /* parse and tranlate an assignment statement */ 71 | void Assignment(char name) 72 | { 73 | Match('='); 74 | Expression(); 75 | if (IsParam(name)) { 76 | StoreParam(ParamNumber(name)); 77 | } else { 78 | StoreVar(name); 79 | } 80 | } 81 | 82 | /* parse and translate a block of statement */ 83 | void DoBlock() 84 | { 85 | while(strchr("e", Look) == NULL) { 86 | AssignOrProc(); 87 | Fin(); 88 | } 89 | } 90 | 91 | void CallProc(char name) 92 | { 93 | int bytes_pushed = ParamList(); 94 | Call(name); 95 | CleanStack(bytes_pushed); 96 | } 97 | 98 | /* call a procedure */ 99 | void Call(char name) 100 | { 101 | char tmp_buf[MAX_BUF]; 102 | sprintf(tmp_buf, "call %c", name); 103 | EmitLn(tmp_buf); 104 | } 105 | 106 | /* parse and translate a Begin-Block */ 107 | void BeginBlock() 108 | { 109 | Match('b'); 110 | Fin(); 111 | DoBlock(); 112 | Match('e'); 113 | Fin(); 114 | } 115 | 116 | /* allocate storage for a variable */ 117 | void Alloc(char name) 118 | { 119 | if (InTable(name)) { 120 | Duplicate(name); 121 | } 122 | ST[name-'A'] = 'v'; 123 | printf("\t%c : .int 0\n", name); 124 | } 125 | 126 | /* parse and translate a data declaration */ 127 | void Decl(void) 128 | { 129 | printf(".section .data\n"); 130 | Match('v'); 131 | Alloc(GetName()); 132 | } 133 | 134 | /* parse and translate global declarations */ 135 | void TopDecls(void) 136 | { 137 | char tmp_buf[MAX_BUF]; 138 | while(Look != '.') { 139 | switch(Look) { 140 | case 'v': 141 | Decl(); 142 | break; 143 | case 'p': 144 | DoProc(); 145 | break; 146 | case 'P': 147 | DoMain(); 148 | break; 149 | default: 150 | sprintf(tmp_buf, "Unrecognized keyword %c", Look); 151 | Abort(tmp_buf); 152 | break; 153 | } 154 | Fin(); 155 | } 156 | } 157 | 158 | void Header() 159 | { 160 | printf(".global _start\n"); 161 | } 162 | 163 | void Prolog() 164 | { 165 | EmitLn(".section .text"); 166 | EmitLn("_start:"); 167 | } 168 | 169 | void Epilog() 170 | { 171 | EmitLn("movl %eax, %ebx"); 172 | EmitLn("movl $1, %eax"); 173 | EmitLn("int $0x80"); 174 | } 175 | 176 | void DoProc(void) 177 | { 178 | Match('p'); 179 | char name = GetName(); 180 | Fin(); 181 | if (InTable(name)) { 182 | Duplicate(name); 183 | } 184 | ST[name-'A'] = 'p'; 185 | FormalList(); 186 | int num_local_params = LocDecls(); 187 | ProcProlog(name, num_local_params); 188 | BeginBlock(); 189 | ProcEpilog(); 190 | ClearParams(); 191 | } 192 | 193 | void Return() 194 | { 195 | EmitLn("ret"); 196 | } 197 | 198 | /* parse and translate a main program 199 | *
::= PROGRAM 200 | * */ 201 | void DoMain(void) 202 | { 203 | Match('P'); 204 | char name = GetName(); 205 | Fin(); 206 | if (InTable(name)) { 207 | Duplicate(name); 208 | } 209 | Prolog(); 210 | BeginBlock(); 211 | } 212 | 213 | /* process the formal parameter list of a procedure */ 214 | void FormalList() 215 | { 216 | Match('('); 217 | if (Look != ')') { 218 | FormalParam(); 219 | while(Look == ',') { 220 | Match(','); 221 | FormalParam(); 222 | } 223 | } 224 | Match(')'); 225 | Fin(); 226 | Base = NumParams; 227 | NumParams = NumParams + 1; 228 | } 229 | 230 | /* process a formal parameter */ 231 | void FormalParam() 232 | { 233 | AddParam(GetName()); 234 | } 235 | 236 | /* process an actual parameter */ 237 | void Param() 238 | { 239 | Expression(); 240 | Push(); 241 | } 242 | 243 | /* process the parameter list for a procedure call */ 244 | int ParamList() 245 | { 246 | int num_params = 0; 247 | Match('('); 248 | if (Look != ')') { 249 | Param(); 250 | num_params++; 251 | while(Look == ',') { 252 | Match(','); 253 | Param(); 254 | num_params++; 255 | } 256 | } 257 | Match(')'); 258 | return 4*num_params; 259 | } 260 | 261 | /* write the prolog for a procedure */ 262 | void ProcProlog(char name, int num_local_params) 263 | { 264 | char tmp_buf[MAX_BUF]; 265 | PostLabel(name); 266 | EmitLn("pushl %ebp"); 267 | EmitLn("movl %esp, %ebp"); 268 | sprintf(tmp_buf, "subl $%d, %%esp", 4*num_local_params); 269 | EmitLn(tmp_buf); 270 | } 271 | 272 | /* write the epilog for a procedure */ 273 | void ProcEpilog() 274 | { 275 | EmitLn("movl %ebp, %esp"); 276 | EmitLn("popl %ebp"); 277 | EmitLn("ret"); 278 | } 279 | 280 | /* parse and translate a local data declaration */ 281 | void LocDecl() 282 | { 283 | Match('v'); 284 | AddParam(GetName()); 285 | Fin(); 286 | } 287 | 288 | /* parse and translate local declarations */ 289 | int LocDecls() 290 | { 291 | int num_params = 0; 292 | while(Look == 'v') { 293 | LocDecl(); 294 | num_params ++; 295 | } 296 | return num_params; 297 | } 298 | 299 | int main(int argc, char *argv[]) 300 | { 301 | Init(); 302 | Header(); 303 | TopDecls(); 304 | Epilog(); 305 | return 0; 306 | } 307 | -------------------------------------------------------------------------------- /13/prog.txt: -------------------------------------------------------------------------------- 1 | vx 2 | vy 3 | vz 4 | pm(a,b,c) 5 | vt 6 | b 7 | t=a 8 | x=t 9 | e 10 | Ptb 11 | m(z,z,z) 12 | e. 13 | -------------------------------------------------------------------------------- /14/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /14/README.md: -------------------------------------------------------------------------------- 1 | # Notes 2 | 3 | x86 instructions is different to 68000 in some ways, the generated code is 4 | a trying to simulate the author's 68k codes. For example, the "MOVE" 5 | expression in 68k only requires the type information like "MOVE.L A(PC),D0" or 6 | "MOVE.B A(PC),D0" while x86 specify length by register types like "MOV EAX, A" 7 | or "MOV AL, A". More over, the AT&T syntax require the type information with 8 | operator like "movl A, %eax" or "movb A, %al". 9 | 10 | So the generating procedure might look like a giant compared to the one 11 | generating 68k codes. 12 | -------------------------------------------------------------------------------- /14/cradle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | 8 | #define MaxEntry 26 9 | const char TAB = '\t'; 10 | const char CR = '\n'; 11 | const char LF = '\r'; 12 | 13 | char tmp[MAX_BUF]; /* temporary buffer */ 14 | 15 | char Look; 16 | char ST[MaxEntry]; /* symbol table */ 17 | 18 | /* read new character from input stream */ 19 | void GetChar() 20 | { 21 | Look = getchar(); 22 | } 23 | 24 | /* Report an Error */ 25 | void Error(char *str) 26 | { 27 | printf("\n"); 28 | printf("\aError: %s.\n", str); 29 | } 30 | 31 | /* report Error and Halt */ 32 | void Abort(char *str) 33 | { 34 | Error(str); 35 | exit(1); 36 | } 37 | 38 | /* report what was expected */ 39 | void Expected(char *str) 40 | { 41 | sprintf(tmp, "Expected: %s", str); 42 | Abort(tmp); 43 | } 44 | 45 | /* report an undefined identifier */ 46 | void Undefined(char name) 47 | { 48 | sprintf(tmp, "Undefined Identifier: %c", name); 49 | Abort(tmp); 50 | } 51 | 52 | /* report an duplicate identifier */ 53 | void Duplicate(char name) 54 | { 55 | sprintf(tmp, "Duplicate Identifier: %c", name); 56 | Abort(tmp); 57 | } 58 | 59 | /* Get type of symbole */ 60 | char TypeOf(char symbol) 61 | { 62 | return ST[symbol - 'A']; 63 | } 64 | 65 | /* check if a symbol is in table */ 66 | bool InTable(char symbol) 67 | { 68 | return ST[symbol - 'A'] != '?'; 69 | } 70 | 71 | /* add a new symbol to table */ 72 | void AddEntry(char symbol, char type) 73 | { 74 | CheckDup(symbol); 75 | ST[symbol-'A'] = type; 76 | } 77 | 78 | /* check an entry to make sure it's a variable */ 79 | void CheckVar(char name) 80 | { 81 | char tmp_buf[MAX_BUF]; 82 | if (!InTable(name)) { 83 | Undefined(name); 84 | } 85 | if (TypeOf(name) != 'v') { 86 | sprintf(tmp_buf, "%c is not a variable", name); 87 | Abort(tmp_buf); 88 | } 89 | } 90 | 91 | /* check for a duplicate variable name */ 92 | void CheckDup(char name) 93 | { 94 | if (InTable(name)) { 95 | Duplicate(name); 96 | } 97 | } 98 | 99 | /* turn an character into uppercase */ 100 | char upcase(char c) 101 | { 102 | return (c & 0xDF); 103 | } 104 | 105 | bool IsAlpha(char c) 106 | { 107 | char upper = upcase(c); 108 | return (upper >= 'A') && (upper <= 'Z'); 109 | } 110 | 111 | bool IsDigit(char c) 112 | { 113 | return (c >= '0') && (c <= '9'); 114 | } 115 | 116 | bool IsAlNum(char c) 117 | { 118 | return IsAlpha(c) || IsDigit(c); 119 | } 120 | 121 | bool IsAddop(char c) 122 | { 123 | return strchr("+-", c) != NULL; 124 | } 125 | 126 | bool IsMulop(char c) 127 | { 128 | return strchr("*/", c) != NULL; 129 | } 130 | 131 | bool IsRelop(char c) 132 | { 133 | return strchr("=#<>", c) != NULL; 134 | } 135 | 136 | bool IsWhite(char c) 137 | { 138 | return strchr(" \t", c) != NULL; 139 | } 140 | 141 | bool IsVarType(char c) 142 | { 143 | return strchr("BWLbwl", c) != NULL; 144 | } 145 | 146 | /* get a variable type from the symbol table */ 147 | char VarType(char name) 148 | { 149 | char type = TypeOf(name); 150 | if (!IsVarType(type)) { 151 | sprintf(tmp, "Identifier %c is not a variable", name); 152 | } 153 | return type; 154 | } 155 | 156 | /* skip over leading white space */ 157 | void SkipWhite(void) 158 | { 159 | while(IsWhite(Look)) { 160 | GetChar(); 161 | } 162 | } 163 | 164 | /* skip over an End-Of-Line */ 165 | void Fin(void) 166 | { 167 | if (Look == CR) { 168 | GetChar(); 169 | if (Look == LF) { 170 | GetChar(); 171 | } 172 | } else if (Look == LF){ 173 | GetChar(); 174 | } 175 | } 176 | 177 | /* match a specific input character */ 178 | void Match(char c) 179 | { 180 | if (Look == c) { 181 | GetChar(); 182 | } else { 183 | char tmp_buf[MAX_BUF]; 184 | sprintf(tmp_buf, "'%c'", c); 185 | Expected(tmp_buf); 186 | } 187 | SkipWhite(); 188 | } 189 | 190 | /* Get an identifier */ 191 | char GetName(void) 192 | { 193 | if (! IsAlpha(Look)) { 194 | Expected("Name"); 195 | } 196 | char name = upcase(Look); 197 | GetChar(); 198 | SkipWhite(); 199 | return name; 200 | } 201 | 202 | /* Get a number */ 203 | int GetNum(void) 204 | { 205 | if (!IsDigit(Look)) { 206 | Expected("Integer"); 207 | } 208 | int val = 0; 209 | while(IsDigit(Look)) { 210 | val = 10*val + Look - '0'; 211 | GetChar(); 212 | } 213 | SkipWhite(); 214 | return val; 215 | } 216 | 217 | /* load a constant to the primary register */ 218 | char LoadNum(int val) 219 | { 220 | char type; 221 | if (abs(val) <= 127) { 222 | type = 'B'; 223 | } else if (abs(val) <= 32767) { 224 | type = 'W'; 225 | } else { 226 | type = 'L'; 227 | } 228 | LoadConst(val, type); 229 | return type; 230 | } 231 | 232 | /* output a string with TAB */ 233 | void Emit(char *str) 234 | { 235 | printf("\t%s", str); 236 | } 237 | 238 | /* Output a string with TAB and CRLF */ 239 | void EmitLn(char *str) 240 | { 241 | Emit(str); 242 | printf("\n"); 243 | } 244 | 245 | /* Post a label to output */ 246 | void PostLabel(char *label) 247 | { 248 | printf("%s:\n", label); 249 | } 250 | 251 | /* Load a variable to the primary register */ 252 | void LoadVar(char name, char type) 253 | { 254 | char src[MAX_BUF]; 255 | src[0] = name; 256 | src[1] = '\0'; 257 | char *dst; 258 | switch(type) { 259 | case 'B': 260 | dst = "%al"; 261 | break; 262 | case 'W': 263 | dst = "%ax"; 264 | break; 265 | case 'L': 266 | dst = "%eax"; 267 | break; 268 | default: 269 | dst = "%eax"; 270 | break; 271 | } 272 | Move(type, src, dst); 273 | } 274 | 275 | void Move(char size, char *src, char *dest) 276 | { 277 | sprintf(tmp, "MOV%c %s, %s", size, src, dest); 278 | EmitLn(tmp); 279 | } 280 | 281 | /* store the primary register */ 282 | void StoreVar(char name, char type) 283 | { 284 | char dest[MAX_BUF]; 285 | dest[0] = name; 286 | dest[1] = '\0'; 287 | char *src; 288 | switch(type) { 289 | case 'B': 290 | src = "%al"; 291 | break; 292 | case 'W': 293 | src = "%ax"; 294 | break; 295 | case 'L': 296 | src = "%eax"; 297 | break; 298 | default: 299 | src = "%eax"; 300 | break; 301 | } 302 | Move(type, src, dest); 303 | } 304 | 305 | /* load a variable to the primary register */ 306 | char Load(char name) 307 | { 308 | char type = VarType(name); 309 | LoadVar(name, type); 310 | return type; 311 | } 312 | 313 | /* Load a constant to the primary register */ 314 | void LoadConst(int val, char type) 315 | { 316 | char src[MAX_BUF]; 317 | sprintf(src, "$%d", val); 318 | char *dst; 319 | switch(type) { 320 | case 'B': 321 | dst = "%al"; 322 | break; 323 | case 'W': 324 | dst = "%ax"; 325 | break; 326 | case 'L': 327 | dst = "%eax"; 328 | break; 329 | default: 330 | dst = "%eax"; 331 | break; 332 | } 333 | Move(type, src, dst); 334 | } 335 | 336 | 337 | /* store a variable from the primary register */ 338 | void Store(char name, char src_type) 339 | { 340 | char dst_type = VarType(name); 341 | Convert(src_type, dst_type, 'a'); 342 | StoreVar(name, dst_type); 343 | } 344 | 345 | /* convert a data item from one type to another */ 346 | void Convert(char src, char dst, char reg) 347 | { 348 | /* this function only works when storing a variable and 349 | * (B,W) -> (W,L) 350 | * and the action are the same: zero extend %eax */ 351 | char tmp_buf[MAX_BUF]; 352 | if (src != dst) { 353 | switch(src) { 354 | case 'B': 355 | sprintf(tmp_buf, "movzx %%%cl, %%e%cx", reg, reg); 356 | EmitLn(tmp_buf); 357 | break; 358 | case 'W': 359 | sprintf(tmp_buf, "movzx %%%cx, %%e%cx", reg, reg); 360 | EmitLn(tmp_buf); 361 | break; 362 | default: 363 | break; 364 | } 365 | } 366 | } 367 | 368 | /* promote the size of a register value */ 369 | char Promote(char src_type, char dst_type, char reg) 370 | { 371 | char type = src_type; 372 | if (src_type != dst_type) { 373 | if ((src_type == 'B') || ((src_type == 'W' && dst_type == 'L'))) { 374 | Convert(src_type, dst_type, reg); 375 | type = dst_type; 376 | } 377 | } 378 | return type; 379 | } 380 | 381 | /* force both arguments to same type */ 382 | char SameType(char src_type, char dst_type) 383 | { 384 | src_type = Promote(src_type, dst_type, 'd'); 385 | return Promote(dst_type, src_type, 'a'); 386 | } 387 | 388 | /* initialize the symbol table */ 389 | void InitTable(void) 390 | { 391 | int i; 392 | for (i = 0; i < MaxEntry; ++i) { 393 | ST[i] = '?'; 394 | } 395 | } 396 | 397 | /* Dump the symbol table */ 398 | void DumpTable() 399 | { 400 | int i; 401 | for (i = 0; i < MaxEntry; ++i) { 402 | if (ST[i] != '?') { 403 | printf("%c: %c\n", i+'A', ST[i]); 404 | } 405 | } 406 | } 407 | 408 | /* initialize */ 409 | void Init() 410 | { 411 | GetChar(); 412 | SkipWhite(); 413 | InitTable(); 414 | } 415 | 416 | void Clear() 417 | { 418 | EmitLn("xor %eax, %eax"); 419 | } 420 | 421 | /* Push Primary onto stack */ 422 | void Push(char type) 423 | { 424 | switch(type) { 425 | case 'B': 426 | case 'W': 427 | EmitLn("pushw %ax"); 428 | break; 429 | case 'L': 430 | EmitLn("pushl %eax"); 431 | break; 432 | default: 433 | break; 434 | } 435 | } 436 | 437 | void Pop(char type) 438 | { 439 | switch(type) { 440 | case 'B': 441 | case 'W': 442 | EmitLn("popw %dx"); 443 | break; 444 | case 'L': 445 | EmitLn("popl %edx"); 446 | break; 447 | default: 448 | break; 449 | } 450 | } 451 | 452 | /* Add Top of Stack to primary */ 453 | char PopAdd(char src_type, char dst_type) 454 | { 455 | Pop(src_type); 456 | dst_type = SameType(src_type, dst_type); 457 | GenAdd(dst_type); 458 | return dst_type; 459 | 460 | EmitLn("addl (%esp), %eax"); 461 | EmitLn("addl $4, %esp"); 462 | } 463 | 464 | /* Subtract Primary from Top of Stack */ 465 | char PopSub(char src_type, char dst_type) 466 | { 467 | Pop(src_type); 468 | dst_type = SameType(src_type, dst_type); 469 | GenSub(dst_type); 470 | return dst_type; 471 | 472 | EmitLn("subl (%esp), %eax"); 473 | EmitLn("neg %eax"); 474 | EmitLn("addl $4, %esp"); 475 | } 476 | 477 | /* add top of stack to primary */ 478 | void GenAdd(char type) 479 | { 480 | switch(type) { 481 | case 'B': 482 | EmitLn("addb %dl, %al"); 483 | break; 484 | case 'W': 485 | EmitLn("addw %dx, %ax"); 486 | break; 487 | case 'L': 488 | EmitLn("addl %edx, %eax"); 489 | break; 490 | default: 491 | EmitLn("addl %edx, %eax"); 492 | break; 493 | } 494 | } 495 | 496 | /* subtract primary from top of stack to */ 497 | void GenSub(char type) 498 | { 499 | switch(type) { 500 | case 'B': 501 | EmitLn("subb %dl, %al"); 502 | EmitLn("neg %al"); 503 | break; 504 | case 'W': 505 | EmitLn("subw %dx, %ax"); 506 | EmitLn("neg %ax"); 507 | break; 508 | case 'L': 509 | EmitLn("subl %edx, %eax"); 510 | EmitLn("neg %eax"); 511 | break; 512 | default: 513 | EmitLn("subl %edx, %eax"); 514 | EmitLn("neg %eax"); 515 | break; 516 | } 517 | } 518 | 519 | /* multiply top of stack by primary (Word) */ 520 | void GenMul() 521 | { 522 | EmitLn("imulw %dx, %ax"); 523 | } 524 | 525 | /* multiply top of stack by primary (Long) */ 526 | void GenLongMul() 527 | { 528 | EmitLn("imull %edx, %eax"); 529 | } 530 | 531 | void GenDiv() 532 | { 533 | EmitLn("Dividision not implemented yet!"); 534 | } 535 | 536 | void GenLongDiv() 537 | { 538 | EmitLn("Dividision not implemented yet!"); 539 | } 540 | 541 | /* multiply top of stack by primary */ 542 | char PopMul(char src_type, char dst_type) 543 | { 544 | Pop(src_type); 545 | char type = SameType(src_type, dst_type); 546 | Convert(type, 'W', 'd'); 547 | Convert(type, 'W', 'a'); 548 | if (type == 'L') { 549 | GenLongMul(type); 550 | } else { 551 | GenMul(); 552 | } 553 | 554 | if (type == 'B') { 555 | type = 'W'; 556 | } else { 557 | type = 'L'; 558 | } 559 | return type; 560 | } 561 | 562 | /* divide top of stack by primary */ 563 | char PopDiv(char src_type, char dst_type) 564 | { 565 | char type; 566 | Pop(src_type); 567 | Convert(src_type, 'L', 'd'); 568 | if (src_type == 'L' || dst_type == 'L') { 569 | Convert(dst_type, 'L', 'a'); 570 | GenLongDiv(); 571 | type = 'L'; 572 | } else { 573 | Convert(dst_type, 'w', 'a'); 574 | GenDiv(); 575 | type = src_type; 576 | } 577 | return type; 578 | } 579 | -------------------------------------------------------------------------------- /14/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #include 5 | 6 | #define MAX_BUF 100 7 | extern const char TAB; 8 | extern const char CR; 9 | extern const char LF; 10 | 11 | extern char Look; /* lookahead character */ 12 | extern char ST[]; /* symbol table */ 13 | 14 | /* read new character from input stream */ 15 | void GetChar(); 16 | 17 | /* Report an Error */ 18 | void Error(char *str); 19 | 20 | /* report Error and Halt */ 21 | void Abort(char *str); 22 | 23 | /* report what was expected */ 24 | void Expected(char *str); 25 | 26 | /* report an undefined identifier */ 27 | void Undefined(char name); 28 | 29 | /* report an duplicate identifier */ 30 | void Duplicate(char name); 31 | 32 | /* Get type of symbole */ 33 | char TypeOf(char symbol); 34 | 35 | /* check if a symbol is in table */ 36 | bool InTable(char symbol); 37 | 38 | /* Dump the symbol table */ 39 | void DumpTable(); 40 | 41 | /* add a new symbol to table */ 42 | void AddEntry(char symbol, char type); 43 | 44 | /* check an entry to make sure it's a variable */ 45 | void CheckVar(char name); 46 | 47 | /* check for a duplicate variable name */ 48 | void CheckDup(char name); 49 | 50 | 51 | bool IsAlpha(char c); 52 | bool IsDigit(char c); 53 | bool IsAlNum(char c); 54 | bool IsAddop(char c); 55 | bool IsMulop(char c); 56 | bool IsRelop(char c); 57 | bool IsWhite(char c); 58 | bool IsVarType(char c); 59 | 60 | /* skip over leading white space */ 61 | void SkipWhite(void); 62 | /* skip over an End-Of-Line */ 63 | void Fin(void); 64 | 65 | /* match a specific input character */ 66 | void Match(char c); 67 | 68 | /* Get an identifier */ 69 | char GetName(void); 70 | 71 | /* Get a number */ 72 | int GetNum(void); 73 | 74 | /* load a constant to the primary register */ 75 | char LoadNum(int val); 76 | 77 | /* output a string with TAB */ 78 | void Emit(char *str); 79 | /* Output a string with TAB and CRLF */ 80 | void EmitLn(char *str); 81 | 82 | /* Post a label to output */ 83 | void PostLabel(char *label); 84 | 85 | /* Load a variable to the primary register */ 86 | void LoadVar(char name, char type); 87 | 88 | /* Load a constant to the primary register */ 89 | void LoadConst(int val, char type); 90 | 91 | /* store the primary register */ 92 | void StoreVar(char name, char type); 93 | 94 | /* initialize the symbol table */ 95 | void InitTable(void); 96 | 97 | /* initialize */ 98 | void Init(void); 99 | 100 | /* get a variable type from the symbol table */ 101 | char VarType(char name); 102 | 103 | void Move(char size, char *src, char *dest); 104 | 105 | /* load a variable to the primary register */ 106 | char Load(char name); 107 | /* store a variable from the primary register */ 108 | void Store(char name, char src_type); 109 | 110 | /* convert a data item from one type to another */ 111 | void Convert(char src, char dst, char reg); 112 | 113 | /* promote the size of a register value */ 114 | char Promote(char src_type, char dst_type, char reg); 115 | 116 | void Clear(); 117 | void Push(char type); 118 | char PopAdd(char src_type, char dst_type); 119 | char PopSub(char src_type, char dst_type); 120 | char PopMul(char src_type, char dst_type); 121 | char PopDiv(char src_type, char dst_type); 122 | void GenAdd(char type); 123 | void GenSub(char type); 124 | void GenMul(); 125 | void GenLongDiv(); 126 | void GenLongMul(); 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /14/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cradle.h" 5 | 6 | char Term(); 7 | char Expression(); 8 | void Assignment(); 9 | char Factor(); 10 | void DoBlock(); 11 | void BeginBlock(); 12 | void Alloc(char name, char type); 13 | void Decl(void); 14 | void TopDecls(void); 15 | 16 | void Header(); 17 | void Prolog(); 18 | void Epilog(); 19 | 20 | void Block(); 21 | 22 | char Unop(); 23 | char Add(char type); 24 | char Subtract(char type); 25 | char Multiply(char type); 26 | char Divide(char type); 27 | 28 | /* parse and tranlate an expression 29 | * vestigial version */ 30 | char Expression() 31 | { 32 | char type; 33 | if (IsAddop(Look)) { 34 | type = Unop(); 35 | } else { 36 | type = Term(); 37 | } 38 | 39 | while(IsAddop(Look)) { 40 | Push(type); 41 | switch (Look) { 42 | case '+': 43 | type = Add(type); 44 | break; 45 | case '-': 46 | type = Subtract(type); 47 | break; 48 | default: 49 | break; 50 | } 51 | } 52 | return type; 53 | } 54 | 55 | char Term() 56 | { 57 | char type = Factor(); 58 | while(IsMulop(Look)) { 59 | Push(type); 60 | switch (Look) { 61 | case '*': 62 | type = Multiply(type); 63 | break; 64 | case '/': 65 | type = Divide(type); 66 | break; 67 | default: 68 | break; 69 | } 70 | } 71 | return type; 72 | } 73 | 74 | /* parse and translate a Factor */ 75 | char Factor() 76 | { 77 | char type; 78 | if (Look == '(') { 79 | Match('('); 80 | type = Expression(); 81 | Match(')'); 82 | } else if (IsAlpha(Look)) { 83 | type = Load(GetName()); 84 | } else { 85 | type = LoadNum(GetNum()); 86 | } 87 | return type; 88 | } 89 | 90 | /* process a term with leading unary operator */ 91 | char Unop() 92 | { 93 | Clear(); 94 | return 'W'; 95 | } 96 | 97 | char Add(char type) 98 | { 99 | Match('+'); 100 | return PopAdd(type, Term()); 101 | } 102 | 103 | char Subtract(char type) 104 | { 105 | Match('-'); 106 | return PopSub(type, Term()); 107 | } 108 | 109 | char Multiply(char type) 110 | { 111 | Match('*'); 112 | return PopMul(type, Factor()); 113 | } 114 | 115 | char Divide(char type) 116 | { 117 | Match('/'); 118 | return PopDiv(type, Factor()); 119 | } 120 | 121 | 122 | /* parse and tranlate an assignment statement */ 123 | void Assignment() 124 | { 125 | char name = GetName(); 126 | Match('='); 127 | Store(name, Expression()); 128 | } 129 | 130 | void Block() 131 | { 132 | while(Look != '.') { 133 | Assignment(); 134 | Fin(); 135 | } 136 | } 137 | 138 | /* parse and translate a block of statement */ 139 | void DoBlock() 140 | { 141 | while(strchr("e", Look) == NULL) { 142 | Assignment(); 143 | Fin(); 144 | } 145 | } 146 | 147 | /* parse and translate a Begin-Block */ 148 | void BeginBlock() 149 | { 150 | Match('b'); 151 | Fin(); 152 | DoBlock(); 153 | Match('e'); 154 | Fin(); 155 | } 156 | 157 | 158 | /* Generate code for allocation of a variable */ 159 | void AllocVar(char name, char type) 160 | { 161 | char *p = ""; 162 | switch(type) { 163 | case 'B': 164 | p = "byte"; 165 | break; 166 | case 'W': 167 | p = "word"; 168 | break; 169 | case 'L': 170 | p = "long"; 171 | break; 172 | default: 173 | break; 174 | } 175 | printf("%c:\t.%s 0\n", name, p); 176 | } 177 | 178 | /* allocate storage for a variable */ 179 | void Alloc(char name, char type) 180 | { 181 | AddEntry(name, type); 182 | AllocVar(name, type); 183 | } 184 | 185 | /* parse and translate a data declaration */ 186 | void Decl(void) 187 | { 188 | char type = GetName(); 189 | Alloc(GetName(), type); 190 | } 191 | 192 | /* parse and translate global declarations */ 193 | void TopDecls(void) 194 | { 195 | printf(".section .data\n"); 196 | char tmp_buf[MAX_BUF]; 197 | while(Look != 'B') { 198 | switch(Look) { 199 | case 'b': 200 | case 'w': 201 | case 'l': 202 | Decl(); 203 | break; 204 | default: 205 | sprintf(tmp_buf, "Unrecognized keyword %c", Look); 206 | Abort(tmp_buf); 207 | break; 208 | } 209 | Fin(); 210 | } 211 | } 212 | 213 | void Header() 214 | { 215 | printf(".global _start\n"); 216 | } 217 | 218 | void Prolog() 219 | { 220 | EmitLn(".section .text"); 221 | EmitLn("_start:"); 222 | } 223 | 224 | void Epilog() 225 | { 226 | EmitLn("movl %eax, %ebx"); 227 | EmitLn("movl $1, %eax"); 228 | EmitLn("int $0x80"); 229 | } 230 | 231 | int main(int argc, char *argv[]) 232 | { 233 | Init(); 234 | TopDecls(); 235 | Match('B'); 236 | Fin(); 237 | Block(); 238 | DumpTable(); 239 | return 0; 240 | } 241 | -------------------------------------------------------------------------------- /14/prog.txt: -------------------------------------------------------------------------------- 1 | ba 2 | wb 3 | lc 4 | B 5 | a=10 6 | b=70000 7 | c=a+b 8 | a=c 9 | b=c 10 | . 11 | 12 | -------------------------------------------------------------------------------- /2/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /2/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | 5 | 6 | void GetChar() 7 | { 8 | Look = getchar(); 9 | } 10 | 11 | 12 | void Error(char *s) 13 | { 14 | printf("\nError: %s.", s); 15 | } 16 | 17 | void Abort(char *s) 18 | { 19 | Error(s); 20 | exit(1); 21 | } 22 | 23 | 24 | void Expected(char *s) 25 | { 26 | sprintf(tmp, "%s Expected", s); 27 | Abort(tmp); 28 | } 29 | 30 | 31 | void Match(char x) 32 | { 33 | if(Look == x) { 34 | GetChar(); 35 | } else { 36 | sprintf(tmp, "' %c ' ", x); 37 | Expected(tmp); 38 | } 39 | } 40 | 41 | 42 | int IsAlpha(char c) 43 | { 44 | return (UPCASE(c) >= 'A') && (UPCASE(c) <= 'Z'); 45 | } 46 | 47 | int IsDigit(char c) 48 | { 49 | return (c >= '0') && (c <= '9'); 50 | } 51 | 52 | int IsAddop(char c) 53 | { 54 | return (c == '+') || (c == '-'); 55 | } 56 | 57 | char GetName() 58 | { 59 | char c = Look; 60 | 61 | if( !IsAlpha(Look)) { 62 | sprintf(tmp, "Name"); 63 | Expected(tmp); 64 | } 65 | 66 | GetChar(); 67 | 68 | return UPCASE(c); 69 | } 70 | 71 | 72 | char GetNum() 73 | { 74 | char c = Look; 75 | 76 | if( !IsDigit(Look)) { 77 | sprintf(tmp, "Integer"); 78 | Expected(tmp); 79 | } 80 | 81 | GetChar(); 82 | 83 | return c; 84 | } 85 | 86 | void Emit(char *s) 87 | { 88 | printf("\t%s", s); 89 | } 90 | 91 | void EmitLn(char *s) 92 | { 93 | Emit(s); 94 | printf("\n"); 95 | } 96 | 97 | void Init() 98 | { 99 | GetChar(); 100 | } 101 | 102 | -------------------------------------------------------------------------------- /2/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | #define UPCASE(C) ((1<<6)| (C)) 4 | 5 | #define MAX_BUF 100 6 | char tmp[MAX_BUF]; 7 | 8 | char Look; 9 | 10 | void GetChar(); 11 | 12 | void Error(char *s); 13 | void Abort(char *s); 14 | void Expected(char *s); 15 | void Match(char x); 16 | 17 | int IsAlpha(char c); 18 | int IsDigit(char c); 19 | int IsAddop(char c); 20 | 21 | char GetName(); 22 | char GetNum(); 23 | 24 | void Emit(char *s); 25 | void EmitLn(char *s); 26 | 27 | void Init(); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /2/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | void Term(); 8 | void Expression(); 9 | void Add(); 10 | void Substract(); 11 | void Factor(); 12 | 13 | 14 | void Multiply() 15 | { 16 | Match('*'); 17 | Factor(); 18 | EmitLn("imull (%esp), %eax"); 19 | /* push of the stack */ 20 | EmitLn("addl $4, %esp"); 21 | } 22 | 23 | void Divide() 24 | { 25 | Match('/'); 26 | Factor(); 27 | 28 | /* for a expersion like a/b we have eax=b and %(esp)=a 29 | * but we need eax=a, and b on the stack 30 | */ 31 | EmitLn("movl (%esp), %edx"); 32 | EmitLn("addl $4, %esp"); 33 | 34 | EmitLn("pushl %eax"); 35 | 36 | EmitLn("movl %edx, %eax"); 37 | 38 | /* sign extesnion */ 39 | EmitLn("sarl $31, %edx"); 40 | EmitLn("idivl (%esp)"); 41 | EmitLn("addl $4, %esp"); 42 | 43 | } 44 | 45 | void Factor() 46 | { 47 | 48 | if(Look == '(') { 49 | 50 | Match('('); 51 | Expression(); 52 | Match(')'); 53 | } else if(IsAddop(Look)) { 54 | 55 | Match('-'); 56 | sprintf(tmp,"movl $%c, %%eax", GetNum()); 57 | EmitLn(tmp); 58 | EmitLn("negl %eax"); 59 | 60 | } else { 61 | 62 | sprintf(tmp,"movl $%c, %%eax", GetNum()); 63 | EmitLn(tmp); 64 | } 65 | } 66 | 67 | void Term() 68 | { 69 | Factor(); 70 | while (strchr("*/", Look)) { 71 | 72 | EmitLn("pushl %eax"); 73 | 74 | switch(Look) 75 | { 76 | case '*': 77 | Multiply(); 78 | break; 79 | case '/': 80 | Divide(); 81 | break; 82 | default: 83 | Expected("Mulop"); 84 | } 85 | } 86 | } 87 | 88 | void Expression() 89 | { 90 | if(IsAddop(Look)) 91 | EmitLn("xor %eax, %eax"); 92 | else 93 | Term(); 94 | 95 | while (strchr("+-", Look)) { 96 | 97 | EmitLn("pushl %eax"); 98 | 99 | switch(Look) 100 | { 101 | case '+': 102 | Add(); 103 | break; 104 | case '-': 105 | Substract(); 106 | break; 107 | default: 108 | Expected("Addop"); 109 | } 110 | } 111 | } 112 | 113 | 114 | void Add() 115 | { 116 | Match('+'); 117 | Term(); 118 | EmitLn("addl (%esp), %eax"); 119 | EmitLn("addl $4, %esp"); 120 | 121 | } 122 | 123 | 124 | void Substract() 125 | { 126 | Match('-'); 127 | Term(); 128 | EmitLn("subl (%esp), %eax"); 129 | EmitLn("negl %eax"); 130 | EmitLn("addl $4, %esp"); 131 | } 132 | 133 | 134 | int main() 135 | { 136 | 137 | Init(); 138 | EmitLn(".text"); 139 | EmitLn(".global _start"); 140 | EmitLn("_start:"); 141 | Expression(); 142 | 143 | /* return the result */ 144 | EmitLn("movl %eax, %ebx"); 145 | EmitLn("movl $1, %eax"); 146 | EmitLn("int $0x80"); 147 | return 0; 148 | } 149 | -------------------------------------------------------------------------------- /3/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /3/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | 5 | void GetChar() 6 | { 7 | Look = getchar(); 8 | } 9 | 10 | 11 | void Error(char *s) 12 | { 13 | printf("\nError: %s.", s); 14 | } 15 | 16 | void Abort(char *s) 17 | { 18 | Error(s); 19 | exit(1); 20 | } 21 | 22 | 23 | void Expected(char *s) 24 | { 25 | sprintf(tmp, "%s Expected", s); 26 | Abort(tmp); 27 | } 28 | 29 | 30 | void Match(char x) 31 | { 32 | if(Look == x) { 33 | GetChar(); 34 | SkipWhite(); 35 | } else { 36 | sprintf(tmp, "' %c ' ", x); 37 | Expected(tmp); 38 | } 39 | } 40 | 41 | 42 | int IsAlpha(char c) 43 | { 44 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 45 | } 46 | 47 | int IsDigit(char c) 48 | { 49 | return (c >= '0') && (c <= '9'); 50 | } 51 | 52 | int IsAlNum(char c) 53 | { 54 | return IsAlpha(c) || IsDigit(c); 55 | } 56 | 57 | int IsAddop(char c) 58 | { 59 | return (c == '+') || (c == '-'); 60 | } 61 | 62 | int IsWhite(char c) 63 | { 64 | return (c == ' ') || (c == '\t'); 65 | } 66 | 67 | char* GetName() 68 | { 69 | char *token = token_buf; 70 | 71 | if( !IsAlNum(Look)) { 72 | Expected("Name"); 73 | } 74 | while (IsAlNum(Look)) { 75 | *token = Look; 76 | token++; 77 | 78 | GetChar(); 79 | } 80 | 81 | SkipWhite(); 82 | 83 | *token = '\0'; 84 | return token_buf; 85 | } 86 | 87 | 88 | char* GetNum() 89 | { 90 | char *value = token_buf; 91 | 92 | if( !IsAlNum(Look)) { 93 | Expected("Integer"); 94 | } 95 | while (IsDigit(Look)) { 96 | *value = Look; 97 | value++; 98 | 99 | GetChar(); 100 | } 101 | 102 | SkipWhite(); 103 | 104 | *value = '\0'; 105 | return token_buf; 106 | } 107 | 108 | void SkipWhite() 109 | { 110 | while (IsWhite(Look)) { 111 | GetChar(); 112 | } 113 | } 114 | 115 | void Emit(char *s) 116 | { 117 | printf("\t%s", s); 118 | } 119 | 120 | void EmitLn(char *s) 121 | { 122 | Emit(s); 123 | printf("\n"); 124 | } 125 | 126 | void Init() 127 | { 128 | GetChar(); 129 | SkipWhite(); 130 | } 131 | 132 | -------------------------------------------------------------------------------- /3/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define MAX_BUF 100 5 | char tmp[MAX_BUF]; 6 | char token_buf[MAX_BUF]; 7 | 8 | char Look; 9 | 10 | void GetChar(); 11 | 12 | void Error(char *s); 13 | void Abort(char *s); 14 | void Expected(char *s); 15 | void Match(char x); 16 | 17 | int IsAlpha(char c); 18 | int IsDigit(char c); 19 | int IsAddop(char c); 20 | int IsAlNum(char c); 21 | int IsWhite(char c); 22 | 23 | char *GetName(); 24 | char *GetNum(); 25 | 26 | void SkipWhite(); 27 | 28 | void Emit(char *s); 29 | void EmitLn(char *s); 30 | 31 | void Init(); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /3/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | void Term(); 8 | void Expression(); 9 | void Add(); 10 | void Substract(); 11 | void Factor(); 12 | void Ident(); 13 | void Assignment(); 14 | 15 | 16 | void Multiply() 17 | { 18 | Match('*'); 19 | Factor(); 20 | EmitLn("imull (%esp), %eax"); 21 | /* push of the stack */ 22 | EmitLn("addl $4, %esp"); 23 | } 24 | 25 | void Divide() 26 | { 27 | Match('/'); 28 | Factor(); 29 | 30 | /* for a expersion like a/b we have eax=b and %(esp)=a 31 | * but we need eax=a, and b on the stack 32 | */ 33 | EmitLn("movl (%esp), %edx"); 34 | EmitLn("addl $4, %esp"); 35 | 36 | EmitLn("pushl %eax"); 37 | 38 | EmitLn("movl %edx, %eax"); 39 | 40 | /* sign extesnion */ 41 | EmitLn("sarl $31, %edx"); 42 | EmitLn("idivl (%esp)"); 43 | EmitLn("addl $4, %esp"); 44 | 45 | } 46 | 47 | void Ident() 48 | { 49 | char *name = GetName(); 50 | if (Look == '(') { 51 | Match('('); 52 | Match(')'); 53 | sprintf(tmp, "call %s", name); 54 | EmitLn(tmp); 55 | } else { 56 | sprintf(tmp, "movl %s, %%eax", name); 57 | EmitLn(tmp); 58 | } 59 | } 60 | 61 | void Factor() 62 | { 63 | if(Look == '(') { 64 | Match('('); 65 | Expression(); 66 | Match(')'); 67 | } else if(IsAddop(Look)) { 68 | Match('-'); 69 | sprintf(tmp,"movl $%s, %%eax", GetNum()); 70 | EmitLn(tmp); 71 | EmitLn("negl %eax"); 72 | } else if (IsAlpha(Look)) { 73 | Ident(); 74 | } else { 75 | sprintf(tmp,"movl $%s, %%eax", GetNum()); 76 | EmitLn(tmp); 77 | } 78 | } 79 | 80 | void Term() 81 | { 82 | Factor(); 83 | while (strchr("*/", Look)) { 84 | 85 | EmitLn("pushl %eax"); 86 | 87 | switch(Look) 88 | { 89 | case '*': 90 | Multiply(); 91 | break; 92 | case '/': 93 | Divide(); 94 | break; 95 | default: 96 | Expected("Mulop"); 97 | } 98 | } 99 | } 100 | 101 | void Expression() 102 | { 103 | if(IsAddop(Look)) 104 | EmitLn("xor %eax, %eax"); 105 | else 106 | Term(); 107 | 108 | while (strchr("+-", Look)) { 109 | 110 | EmitLn("pushl %eax"); 111 | 112 | switch(Look) 113 | { 114 | case '+': 115 | Add(); 116 | break; 117 | case '-': 118 | Substract(); 119 | break; 120 | default: 121 | Expected("Addop"); 122 | } 123 | } 124 | } 125 | 126 | 127 | void Add() 128 | { 129 | Match('+'); 130 | Term(); 131 | EmitLn("addl (%esp), %eax"); 132 | EmitLn("addl $4, %esp"); 133 | 134 | } 135 | 136 | 137 | void Substract() 138 | { 139 | Match('-'); 140 | Term(); 141 | EmitLn("subl (%esp), %eax"); 142 | EmitLn("negl %eax"); 143 | EmitLn("addl $4, %esp"); 144 | } 145 | 146 | void Assignment() 147 | { 148 | char *name = GetName(); 149 | Match('='); 150 | Expression(); 151 | sprintf(tmp, "lea %s, %%ebx", name); 152 | EmitLn(tmp); 153 | EmitLn("movl %eax, (%ebx)"); 154 | } 155 | 156 | int main() 157 | { 158 | 159 | Init(); 160 | EmitLn(".text"); 161 | EmitLn(".global _start"); 162 | EmitLn("_start:"); 163 | /* Expression(); */ 164 | Assignment(); 165 | if (Look != '\n') { 166 | Expected("NewLine"); 167 | } 168 | 169 | 170 | /* return the result */ 171 | EmitLn("movl %eax, %ebx"); 172 | EmitLn("movl $1, %eax"); 173 | EmitLn("int $0x80"); 174 | return 0; 175 | } 176 | -------------------------------------------------------------------------------- /4/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /4/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | 5 | /* Helper Functions */ 6 | char uppercase(char c) 7 | { 8 | return (c & 0xDD); 9 | } 10 | 11 | void GetChar() 12 | { 13 | Look = getchar(); 14 | /* printf("Getchar: %c\n", Look); */ 15 | } 16 | 17 | 18 | void Error(char *s) 19 | { 20 | printf("\nError: %s.", s); 21 | } 22 | 23 | void Abort(char *s) 24 | { 25 | Error(s); 26 | exit(1); 27 | } 28 | 29 | 30 | void Expected(char *s) 31 | { 32 | sprintf(tmp, "%s Expected", s); 33 | Abort(tmp); 34 | } 35 | 36 | 37 | void Match(char x) 38 | { 39 | if(Look == x) { 40 | GetChar(); 41 | } else { 42 | sprintf(tmp, "' %c ' ", x); 43 | Expected(tmp); 44 | } 45 | } 46 | 47 | void Newline() 48 | { 49 | if (Look == '\r') { 50 | GetChar(); 51 | if (Look == '\n') { 52 | GetChar(); 53 | } 54 | } else if (Look == '\n') { 55 | GetChar(); 56 | } 57 | } 58 | 59 | int IsAlpha(char c) 60 | { 61 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 62 | } 63 | 64 | int IsDigit(char c) 65 | { 66 | return (c >= '0') && (c <= '9'); 67 | } 68 | 69 | int IsAddop(char c) 70 | { 71 | return (c == '+') || (c == '-'); 72 | } 73 | 74 | char GetName() 75 | { 76 | char c = Look; 77 | 78 | if( !IsAlpha(Look)) { 79 | sprintf(tmp, "Name"); 80 | Expected(tmp); 81 | } 82 | 83 | GetChar(); 84 | 85 | return uppercase(c); 86 | } 87 | 88 | 89 | int GetNum() 90 | { 91 | int value = 0; 92 | if( !IsDigit(Look)) { 93 | sprintf(tmp, "Integer"); 94 | Expected(tmp); 95 | } 96 | 97 | while (IsDigit(Look)) { 98 | value = value * 10 + Look - '0'; 99 | GetChar(); 100 | } 101 | 102 | return value; 103 | } 104 | 105 | void Emit(char *s) 106 | { 107 | printf("\t%s", s); 108 | } 109 | 110 | void EmitLn(char *s) 111 | { 112 | Emit(s); 113 | printf("\n"); 114 | } 115 | 116 | void Init() 117 | { 118 | InitTable(); 119 | GetChar(); 120 | } 121 | 122 | void InitTable() 123 | { 124 | int i; 125 | for (i = 0; i < TABLE_SIZE; i++) { 126 | Table[i] = 0; 127 | } 128 | 129 | } 130 | -------------------------------------------------------------------------------- /4/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define MAX_BUF 100 5 | #define TABLE_SIZE 26 6 | char tmp[MAX_BUF]; 7 | 8 | char Look; 9 | int Table[TABLE_SIZE]; 10 | 11 | void GetChar(); 12 | 13 | void Error(char *s); 14 | void Abort(char *s); 15 | void Expected(char *s); 16 | void Match(char x); 17 | 18 | void Newline(); 19 | 20 | int IsAlpha(char c); 21 | int IsDigit(char c); 22 | int IsAddop(char c); 23 | 24 | char GetName(); 25 | int GetNum(); 26 | 27 | void Emit(char *s); 28 | void EmitLn(char *s); 29 | 30 | void Init(); 31 | void InitTable(); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /4/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | int Term(); 8 | int Expression(); 9 | void Add(); 10 | void Substract(); 11 | int Factor(); 12 | void Ident(); 13 | void Assignment(); 14 | 15 | 16 | /* Not used in Chapter 4 */ 17 | void Multiply() 18 | { 19 | Match('*'); 20 | Factor(); 21 | EmitLn("imull (%esp), %eax"); 22 | /* push of the stack */ 23 | EmitLn("addl $4, %esp"); 24 | } 25 | 26 | /* Not used in Chapter 4 */ 27 | void Divide() 28 | { 29 | Match('/'); 30 | Factor(); 31 | 32 | /* for a expersion like a/b we have eax=b and %(esp)=a 33 | * but we need eax=a, and b on the stack 34 | */ 35 | EmitLn("movl (%esp), %edx"); 36 | EmitLn("addl $4, %esp"); 37 | 38 | EmitLn("pushl %eax"); 39 | 40 | EmitLn("movl %edx, %eax"); 41 | 42 | /* sign extesnion */ 43 | EmitLn("sarl $31, %edx"); 44 | EmitLn("idivl (%esp)"); 45 | EmitLn("addl $4, %esp"); 46 | 47 | } 48 | 49 | void Ident() 50 | { 51 | char name = GetName(); 52 | if (Look == '(') { 53 | Match('('); 54 | Match(')'); 55 | sprintf(tmp, "call %c", name); 56 | EmitLn(tmp); 57 | } else { 58 | sprintf(tmp, "movl %c, %%eax", name); 59 | EmitLn(tmp); 60 | } 61 | } 62 | 63 | int Factor() 64 | { 65 | int factor; 66 | if (Look == '(') { 67 | Match('('); 68 | factor = Expression(); 69 | Match(')'); 70 | } else if (IsAlpha(Look)) { 71 | factor = Table[GetName() - 'A']; 72 | } else { 73 | factor = GetNum(); 74 | } 75 | 76 | return factor; 77 | } 78 | 79 | int Term() 80 | { 81 | int value = Factor(); 82 | while (strchr("*/", Look)) { 83 | switch(Look) 84 | { 85 | case '*': 86 | Match('*'); 87 | value *= Factor(); 88 | break; 89 | case '/': 90 | Match('/'); 91 | value /= Factor(); 92 | break; 93 | default: 94 | Expected("Mulop"); 95 | } 96 | } 97 | 98 | return value; 99 | } 100 | 101 | int Expression() 102 | { 103 | int value; 104 | if(IsAddop(Look)) 105 | value = 0; 106 | else 107 | value = Term(); 108 | 109 | while (IsAddop(Look)) { 110 | switch(Look) 111 | { 112 | case '+': 113 | Match('+'); 114 | value += Term(); 115 | break; 116 | case '-': 117 | Match('-'); 118 | value -= Term(); 119 | break; 120 | default: 121 | Expected("Addop"); 122 | } 123 | } 124 | 125 | return value; 126 | } 127 | 128 | 129 | /* Not used in Chapter 4 */ 130 | void Add() 131 | { 132 | Match('+'); 133 | Term(); 134 | EmitLn("addl (%esp), %eax"); 135 | EmitLn("addl $4, %esp"); 136 | 137 | } 138 | 139 | 140 | /* Not used in Chapter 4 */ 141 | void Substract() 142 | { 143 | Match('-'); 144 | Term(); 145 | EmitLn("subl (%esp), %eax"); 146 | EmitLn("negl %eax"); 147 | EmitLn("addl $4, %esp"); 148 | } 149 | 150 | void Assignment() 151 | { 152 | char name = GetName(); 153 | Match('='); 154 | Table[name - 'A'] = Expression(); 155 | } 156 | 157 | /* Input Routine 158 | * We do a little different to the original article. The syntax of 159 | * input is "?" */ 160 | void Input() 161 | { 162 | Match('?'); 163 | char name = GetName(); 164 | Table[name - 'A'] = Expression(); 165 | } 166 | 167 | /* Output Routine */ 168 | void Output() 169 | { 170 | Match('!'); 171 | sprintf(tmp, "%d", Table[GetName() - 'A']); 172 | EmitLn(tmp); 173 | } 174 | 175 | int main() 176 | { 177 | 178 | Init(); 179 | do 180 | { 181 | switch(Look) { 182 | case '?': 183 | Input(); 184 | break; 185 | case '!': 186 | Output(); 187 | break; 188 | default: 189 | Assignment(); 190 | } 191 | 192 | Newline(); 193 | } while (Look != '.'); 194 | return 0; 195 | } 196 | -------------------------------------------------------------------------------- /5/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /5/README.md: -------------------------------------------------------------------------------- 1 | # Some Notes on the original Article 2 | 3 | ## The IF statement 4 | The BNF shown on this chapter had some bugs. The author said that `` 5 | and `` are as follows: 6 | 7 | ``` 8 | ::= END 9 | ::= [ ]* 10 | ``` 11 | 12 | That means a `` can contain zero or more statments. However, there is 13 | no way to decide when to terminate a ``. When dealing with "IF" without 14 | "ELSE", the problem is not yet obvious: 15 | 16 | ``` 17 | IF ENDIF 18 | ``` 19 | 20 | Because the author imply that `` should not be ended with token "END", 21 | now we can treat token "ENDIF" which is represent by character 'e' as the 22 | termination of block. That means source code like "aibced" is allowed, aka 23 | "bc" this two statements are treated as one block. 24 | 25 | When it comes to "IF" statement with "ELSE", things just change. 26 | 27 | ``` 28 | IF [ ELSE ] ENDIF 29 | ``` 30 | 31 | While we are dealing with the first ``, there is no way to quit 32 | recognizing `` and match token "ELSE". Especially when the author 33 | propose test case "aiblcede". It was expected to recognize "b" as a single 34 | block and "l" as token "ELSE". Which however is not possible because there is 35 | no way to determine the exit of ``. 36 | 37 | ``` 38 | procedure Block; 39 | begin 40 | while not(Look in ['e']) do begin 41 | case Look of 42 | 'i': DoIf; 43 | 'o': Other; 44 | end; 45 | end; 46 | end; 47 | ``` 48 | 49 | The source code implies that every block should end with token "END" which is 50 | character 'e'. One way to fix this is to explicitly state that a `` 51 | ends with token "END". So the test case should be "aibelceede" and the BNF 52 | should explicitly be: 53 | 54 | ``` 55 | ::= 56 | ::= [ ]* END 57 | ``` 58 | 59 | Another way is to tell the block matcher to recognize the "ELSE" token as the 60 | author did in the next section. 61 | 62 | ``` 63 | procedure Block; 64 | begin 65 | while not(Look in ['e', 'l']) do begin 66 | case Look of 67 | 'i': DoIf; 68 | 'w': DoWhile; 69 | else Other; 70 | end; 71 | end; 72 | end; 73 | ``` 74 | 75 | Also note the `else` branch instead of the original 'o' branch. 76 | -------------------------------------------------------------------------------- /5/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | 5 | #define TABLE_SIZE 26 6 | static int LCount = 0; 7 | static char labelName[MAX_BUF]; 8 | 9 | static int Table[TABLE_SIZE]; 10 | 11 | /* Helper Functions */ 12 | char uppercase(char c) 13 | { 14 | return (c & 0xDF); 15 | } 16 | 17 | void GetChar() 18 | { 19 | Look = getchar(); 20 | /* printf("Getchar: %c\n", Look); */ 21 | } 22 | 23 | 24 | void Error(char *s) 25 | { 26 | printf("\nError: %s.", s); 27 | } 28 | 29 | void Abort(char *s) 30 | { 31 | Error(s); 32 | exit(1); 33 | } 34 | 35 | 36 | void Expected(char *s) 37 | { 38 | sprintf(tmp, "%s Expected", s); 39 | Abort(tmp); 40 | } 41 | 42 | 43 | void Match(char x) 44 | { 45 | if(Look == x) { 46 | GetChar(); 47 | } else { 48 | sprintf(tmp, "' %c ' ", x); 49 | Expected(tmp); 50 | } 51 | } 52 | 53 | void Newline() 54 | { 55 | if (Look == '\r') { 56 | GetChar(); 57 | if (Look == '\n') { 58 | GetChar(); 59 | } 60 | } else if (Look == '\n') { 61 | GetChar(); 62 | } 63 | } 64 | 65 | int IsAlpha(char c) 66 | { 67 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 68 | } 69 | 70 | int IsDigit(char c) 71 | { 72 | return (c >= '0') && (c <= '9'); 73 | } 74 | 75 | int IsAddop(char c) 76 | { 77 | return (c == '+') || (c == '-'); 78 | } 79 | 80 | char GetName() 81 | { 82 | char c = Look; 83 | 84 | if( !IsAlpha(Look)) { 85 | sprintf(tmp, "Name"); 86 | Expected(tmp); 87 | } 88 | 89 | GetChar(); 90 | 91 | return uppercase(c); 92 | } 93 | 94 | 95 | int GetNum() 96 | { 97 | int value = 0; 98 | if( !IsDigit(Look)) { 99 | sprintf(tmp, "Integer"); 100 | Expected(tmp); 101 | } 102 | 103 | while (IsDigit(Look)) { 104 | value = value * 10 + Look - '0'; 105 | GetChar(); 106 | } 107 | 108 | return value; 109 | } 110 | 111 | void Emit(char *s) 112 | { 113 | printf("\t%s", s); 114 | } 115 | 116 | void EmitLn(char *s) 117 | { 118 | Emit(s); 119 | printf("\n"); 120 | } 121 | 122 | void Init() 123 | { 124 | LCount = 0; 125 | 126 | InitTable(); 127 | GetChar(); 128 | } 129 | 130 | void InitTable() 131 | { 132 | int i; 133 | for (i = 0; i < TABLE_SIZE; i++) { 134 | Table[i] = 0; 135 | } 136 | 137 | } 138 | 139 | char *NewLabel() 140 | { 141 | sprintf(labelName, "L%02d", LCount); 142 | LCount ++; 143 | return labelName; 144 | } 145 | 146 | void PostLabel(char *label) 147 | { 148 | printf("%s:\n", label); 149 | } 150 | -------------------------------------------------------------------------------- /5/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define MAX_BUF 100 5 | static char tmp[MAX_BUF]; 6 | char Look; 7 | 8 | void GetChar(); 9 | 10 | void Error(char *s); 11 | void Abort(char *s); 12 | void Expected(char *s); 13 | void Match(char x); 14 | 15 | void Newline(); 16 | 17 | int IsAlpha(char c); 18 | int IsDigit(char c); 19 | int IsAddop(char c); 20 | 21 | char GetName(); 22 | int GetNum(); 23 | 24 | void Emit(char *s); 25 | void EmitLn(char *s); 26 | 27 | void Init(); 28 | void InitTable(); 29 | 30 | char *NewLabel(); 31 | void PostLabel(char *label); 32 | #endif 33 | -------------------------------------------------------------------------------- /5/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | #ifdef DEBUG 8 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 9 | #else 10 | #define dprint(fmt, ...) 11 | #endif 12 | 13 | void Other(); 14 | void Block(char *L); 15 | void Condition(); 16 | void DoProgram(); 17 | void DoIf(char *L); 18 | void DoWhile(); 19 | void DoLoop(); 20 | void DoRepeat(); 21 | void DoFor(); 22 | void Expression(); 23 | void DoDo(); 24 | void DoBreak(char *L); 25 | 26 | void Other() 27 | { 28 | sprintf(tmp, "%c", GetName()); 29 | EmitLn(tmp); 30 | } 31 | 32 | void Block(char *L) 33 | { 34 | while (! strchr("elu", Look)) { 35 | dprint("Block: get Look = %c\n", Look); 36 | switch (Look) { 37 | case 'i': 38 | DoIf(L); 39 | break; 40 | case 'w': 41 | DoWhile(); 42 | break; 43 | case 'p': 44 | DoLoop(); 45 | break; 46 | case 'r': 47 | DoRepeat(); 48 | break; 49 | case 'f': 50 | DoFor(); 51 | break; 52 | case 'd': 53 | DoDo(); 54 | break; 55 | case 'b': 56 | DoBreak(L); 57 | default: 58 | Other(); 59 | break; 60 | } 61 | /* this is for convinent, otherwise newline character will 62 | cause an error */ 63 | Newline(); 64 | } 65 | } 66 | 67 | void Condition() 68 | { 69 | EmitLn(""); 70 | } 71 | 72 | void DoProgram() 73 | { 74 | Block(NULL); 75 | if (Look != 'e') { 76 | Expected("End"); 77 | } 78 | EmitLn("END"); 79 | } 80 | 81 | void DoIf(char *L) 82 | { 83 | char L1[MAX_BUF]; 84 | char L2[MAX_BUF]; 85 | strcpy(L1, NewLabel()); 86 | strcpy(L2, L1); 87 | 88 | Match('i'); 89 | Condition(); 90 | 91 | sprintf(tmp, "jz %s", L1); 92 | EmitLn(tmp); 93 | 94 | Block(L); 95 | dprint("DoIf: Got Look = %c\n", Look); 96 | 97 | if (Look == 'l') { 98 | /* match *else* statement */ 99 | Match('l'); 100 | strcpy(L2, NewLabel()); 101 | 102 | sprintf(tmp, "jmp %s", L2); 103 | EmitLn(tmp); 104 | 105 | PostLabel(L1); 106 | 107 | Block(L); 108 | } 109 | 110 | Match('e'); 111 | PostLabel(L2); 112 | } 113 | 114 | void DoWhile() 115 | { 116 | char L1[MAX_BUF]; 117 | char L2[MAX_BUF]; 118 | 119 | Match('w'); 120 | strcpy(L1, NewLabel()); 121 | strcpy(L2, NewLabel()); 122 | PostLabel(L1); 123 | Condition(); 124 | sprintf(tmp, "jz %s", L2); 125 | EmitLn(tmp); 126 | Block(L2); 127 | Match('e'); 128 | sprintf(tmp, "jmp %s", L1); 129 | EmitLn(tmp); 130 | PostLabel(L2); 131 | } 132 | 133 | void DoLoop() 134 | { 135 | char L1[MAX_BUF]; 136 | char L2[MAX_BUF]; 137 | Match('p'); 138 | strcpy(L1, NewLabel()); 139 | strcpy(L2, NewLabel()); 140 | PostLabel(L1); 141 | Block(L2); 142 | Match('e'); 143 | sprintf(tmp, "jmp %s", L1); 144 | EmitLn(tmp); 145 | PostLabel(L2); 146 | } 147 | 148 | void DoRepeat() 149 | { 150 | char L1[MAX_BUF]; 151 | char L2[MAX_BUF]; 152 | Match('r'); 153 | strcpy(L1, NewLabel()); 154 | strcpy(L2, NewLabel()); 155 | PostLabel(L1); 156 | Block(L2); 157 | Match('u'); 158 | Condition(); 159 | 160 | sprintf(tmp, "jz %s", L1); 161 | EmitLn(tmp); 162 | PostLabel(L2); 163 | } 164 | 165 | /* I haven't test the actual generated x86 code here, so you're free to 166 | * inform me if there are bugs. :) */ 167 | void DoFor() 168 | { 169 | char L1[MAX_BUF]; 170 | char L2[MAX_BUF]; 171 | 172 | Match('f'); 173 | strcpy(L1, NewLabel()); 174 | strcpy(L2, NewLabel()); 175 | char name = GetName(); 176 | Match('='); 177 | Expression(); 178 | EmitLn("subl %eax, $1"); /* SUBQ #1, D0*/ 179 | sprintf(tmp, "lea %c, %%edx", name); 180 | EmitLn(tmp); 181 | EmitLn("movl %eax, (%edx)"); 182 | Expression(); 183 | EmitLn("push %eax"); /* save the execution of expression */ 184 | PostLabel(L1); 185 | sprintf(tmp, "lea %c, %%edx", name); 186 | EmitLn(tmp); 187 | EmitLn("movl (%edx), %eax"); 188 | EmitLn("addl %eax, 1"); 189 | EmitLn("movl %eax, (%edx)"); 190 | EmitLn("cmp (%esp), %eax"); 191 | sprintf(tmp, "jg %s", L2); 192 | EmitLn(tmp); 193 | Block(L2); 194 | Match('e'); 195 | sprintf(tmp, "jmp %s", L1); 196 | EmitLn(tmp); 197 | PostLabel(L2); 198 | EmitLn("pop %eax"); 199 | } 200 | 201 | void Expression() 202 | { 203 | EmitLn(""); 204 | } 205 | 206 | void DoDo() 207 | { 208 | Match('d'); 209 | char L1[MAX_BUF]; 210 | char L2[MAX_BUF]; 211 | strcpy(L1, NewLabel()); 212 | strcpy(L2, NewLabel()); 213 | Expression(); 214 | EmitLn("subl %eax, $1"); 215 | EmitLn("movl %eax, %ecx"); 216 | PostLabel(L1); 217 | EmitLn("pushl %ecx"); 218 | Block(L2); 219 | EmitLn("popl %ecx"); 220 | sprintf(tmp, "loop %s", L1); 221 | EmitLn(tmp); 222 | EmitLn("pushl %ecx"); 223 | PostLabel(L2); 224 | EmitLn("popl %ecx"); 225 | } 226 | 227 | void DoBreak(char *L) 228 | { 229 | Match('b'); 230 | if (L != NULL) { 231 | sprintf(tmp, "jmp %s", L); 232 | EmitLn(tmp); 233 | } else { 234 | Abort("No loop to break from"); 235 | } 236 | } 237 | 238 | int main() 239 | { 240 | Init(); 241 | DoProgram(); 242 | return 0; 243 | } 244 | -------------------------------------------------------------------------------- /6/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /6/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | #include 5 | 6 | #define TABLE_SIZE 26 7 | static int LCount = 0; 8 | static char labelName[MAX_BUF]; 9 | 10 | static int Table[TABLE_SIZE]; 11 | 12 | /* Helper Functions */ 13 | char uppercase(char c) 14 | { 15 | return (c & 0xDF); 16 | } 17 | 18 | void GetChar() 19 | { 20 | Look = getchar(); 21 | } 22 | 23 | 24 | void Error(char *s) 25 | { 26 | printf("\nError: %s.", s); 27 | } 28 | 29 | void Abort(char *s) 30 | { 31 | Error(s); 32 | exit(1); 33 | } 34 | 35 | 36 | void Expected(char *s) 37 | { 38 | sprintf(tmp, "%s Expected", s); 39 | Abort(tmp); 40 | } 41 | 42 | 43 | void Match(char x) 44 | { 45 | if(Look == x) { 46 | GetChar(); 47 | } else { 48 | sprintf(tmp, "' %c ' ", x); 49 | Expected(tmp); 50 | } 51 | } 52 | 53 | void Newline() 54 | { 55 | if (Look == '\r') { 56 | GetChar(); 57 | if (Look == '\n') { 58 | GetChar(); 59 | } 60 | } else if (Look == '\n') { 61 | GetChar(); 62 | } 63 | } 64 | 65 | int IsAlpha(char c) 66 | { 67 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 68 | } 69 | 70 | int IsDigit(char c) 71 | { 72 | return (c >= '0') && (c <= '9'); 73 | } 74 | 75 | int IsAddop(char c) 76 | { 77 | return (c == '+') || (c == '-'); 78 | } 79 | 80 | int IsBoolean(char c) 81 | { 82 | return strchr("TF", uppercase(c)) != NULL; 83 | } 84 | 85 | char GetName() 86 | { 87 | char c = Look; 88 | 89 | if( !IsAlpha(Look)) { 90 | sprintf(tmp, "Name"); 91 | Expected(tmp); 92 | } 93 | 94 | GetChar(); 95 | 96 | return uppercase(c); 97 | } 98 | 99 | 100 | int GetNum() 101 | { 102 | int value = 0; 103 | if( !IsDigit(Look)) { 104 | sprintf(tmp, "Integer"); 105 | Expected(tmp); 106 | } 107 | 108 | while (IsDigit(Look)) { 109 | value = value * 10 + Look - '0'; 110 | GetChar(); 111 | } 112 | 113 | return value; 114 | } 115 | 116 | int GetBoolean() 117 | { 118 | if (!IsBoolean(Look)) { 119 | Expected("Boolean Literal"); 120 | } 121 | int ret = uppercase(Look) == 'T'; 122 | GetChar(); 123 | return ret; 124 | } 125 | 126 | int IsOrop(char c) 127 | { 128 | return strchr("|~", c) != NULL; 129 | } 130 | 131 | int IsRelop(char c) 132 | { 133 | return strchr("=#<>", c) != NULL; 134 | } 135 | 136 | void Emit(char *s) 137 | { 138 | printf("\t%s", s); 139 | } 140 | 141 | void EmitLn(char *s) 142 | { 143 | Emit(s); 144 | printf("\n"); 145 | } 146 | 147 | void Init() 148 | { 149 | LCount = 0; 150 | 151 | InitTable(); 152 | GetChar(); 153 | } 154 | 155 | void InitTable() 156 | { 157 | int i; 158 | for (i = 0; i < TABLE_SIZE; i++) { 159 | Table[i] = 0; 160 | } 161 | 162 | } 163 | 164 | char *NewLabel() 165 | { 166 | sprintf(labelName, "L%02d", LCount); 167 | LCount ++; 168 | return labelName; 169 | } 170 | 171 | void PostLabel(char *label) 172 | { 173 | printf("%s:\n", label); 174 | } 175 | 176 | void Fin() 177 | { 178 | if (Look == '\r') { 179 | GetChar(); 180 | } 181 | if (Look == '\n') { 182 | GetChar(); 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /6/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define MAX_BUF 100 5 | static char tmp[MAX_BUF]; 6 | char Look; 7 | 8 | void GetChar(); 9 | 10 | void Error(char *s); 11 | void Abort(char *s); 12 | void Expected(char *s); 13 | void Match(char x); 14 | 15 | void Newline(); 16 | 17 | int IsAlpha(char c); 18 | int IsDigit(char c); 19 | int IsAddop(char c); 20 | int IsBoolean(char c); 21 | int IsOrop(char c); 22 | int IsRelop(char c); 23 | 24 | char GetName(); 25 | int GetNum(); 26 | int GetBoolean(); 27 | 28 | void Emit(char *s); 29 | void EmitLn(char *s); 30 | 31 | void Init(); 32 | void InitTable(); 33 | 34 | char *NewLabel(); 35 | void PostLabel(char *label); 36 | 37 | void Fin(); 38 | #endif 39 | -------------------------------------------------------------------------------- /6/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | #ifdef DEBUG 8 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 9 | #else 10 | #define dprint(fmt, ...) 11 | #endif 12 | 13 | 14 | void Other(); 15 | void Block(char *L); 16 | void DoProgram(); 17 | void DoIf(char *L); 18 | void DoWhile(); 19 | void DoLoop(); 20 | void DoRepeat(); 21 | void DoFor(); 22 | void Expression(); 23 | void DoDo(); 24 | void DoBreak(char *L); 25 | 26 | /* Added in chap6 */ 27 | void BoolFactor(); 28 | void NotFactor(); 29 | void BoolTerm(); 30 | void BoolExpression(); 31 | void BoolOr(); 32 | void BoolXor(); 33 | void Relation(); 34 | void Equals(); 35 | void NotEquals(); 36 | void Less(); 37 | void Greater(); 38 | void Ident(); 39 | void Factor(); 40 | void SignedFactor(); 41 | void Multiply(); 42 | void Divide(); 43 | void Term(); 44 | void Add(); 45 | void Subtract(); 46 | void Expression(); 47 | void Assignment(); 48 | 49 | void Other() 50 | { 51 | sprintf(tmp, "%c", GetName()); 52 | EmitLn(tmp); 53 | } 54 | 55 | void Block(char *L) 56 | { 57 | while (! strchr("elu", Look)) { 58 | dprint("Block: get Look = %c\n", Look); 59 | switch (Look) { 60 | case 'i': 61 | DoIf(L); 62 | break; 63 | case 'w': 64 | DoWhile(); 65 | break; 66 | case 'p': 67 | DoLoop(); 68 | break; 69 | case 'r': 70 | DoRepeat(); 71 | break; 72 | case 'f': 73 | DoFor(); 74 | break; 75 | case 'd': 76 | DoDo(); 77 | break; 78 | case 'b': 79 | DoBreak(L); 80 | default: 81 | Assignment(); 82 | break; 83 | } 84 | /* this is for convinent, otherwise newline character will 85 | cause an error */ 86 | /*Newline();*/ 87 | Fin(); 88 | } 89 | } 90 | 91 | void DoProgram() 92 | { 93 | Block(NULL); 94 | if (Look != 'e') { 95 | Expected("End"); 96 | } 97 | EmitLn("END"); 98 | } 99 | 100 | void DoIf(char *L) 101 | { 102 | char L1[MAX_BUF]; 103 | char L2[MAX_BUF]; 104 | strcpy(L1, NewLabel()); 105 | strcpy(L2, L1); 106 | 107 | Match('i'); 108 | BoolExpression(); 109 | 110 | sprintf(tmp, "jz %s", L1); 111 | EmitLn(tmp); 112 | 113 | Block(L); 114 | dprint("DoIf: Got Look = %c\n", Look); 115 | 116 | if (Look == 'l') { 117 | /* match *else* statement */ 118 | Match('l'); 119 | strcpy(L2, NewLabel()); 120 | 121 | sprintf(tmp, "jmp %s", L2); 122 | EmitLn(tmp); 123 | 124 | PostLabel(L1); 125 | 126 | Block(L); 127 | } 128 | 129 | Match('e'); 130 | PostLabel(L2); 131 | } 132 | 133 | void DoWhile() 134 | { 135 | char L1[MAX_BUF]; 136 | char L2[MAX_BUF]; 137 | 138 | Match('w'); 139 | strcpy(L1, NewLabel()); 140 | strcpy(L2, NewLabel()); 141 | PostLabel(L1); 142 | BoolExpression(); 143 | sprintf(tmp, "jz %s", L2); 144 | EmitLn(tmp); 145 | Block(L2); 146 | Match('e'); 147 | sprintf(tmp, "jmp %s", L1); 148 | EmitLn(tmp); 149 | PostLabel(L2); 150 | } 151 | 152 | void DoLoop() 153 | { 154 | char L1[MAX_BUF]; 155 | char L2[MAX_BUF]; 156 | Match('p'); 157 | strcpy(L1, NewLabel()); 158 | strcpy(L2, NewLabel()); 159 | PostLabel(L1); 160 | Block(L2); 161 | Match('e'); 162 | sprintf(tmp, "jmp %s", L1); 163 | EmitLn(tmp); 164 | PostLabel(L2); 165 | } 166 | 167 | void DoRepeat() 168 | { 169 | char L1[MAX_BUF]; 170 | char L2[MAX_BUF]; 171 | Match('r'); 172 | strcpy(L1, NewLabel()); 173 | strcpy(L2, NewLabel()); 174 | PostLabel(L1); 175 | Block(L2); 176 | Match('u'); 177 | BoolExpression(); 178 | 179 | sprintf(tmp, "jz %s", L1); 180 | EmitLn(tmp); 181 | PostLabel(L2); 182 | } 183 | 184 | /* I haven't test the actual generated x86 code here, so you're free to 185 | * inform me if there are bugs. :) */ 186 | void DoFor() 187 | { 188 | char L1[MAX_BUF]; 189 | char L2[MAX_BUF]; 190 | 191 | Match('f'); 192 | strcpy(L1, NewLabel()); 193 | strcpy(L2, NewLabel()); 194 | char name = GetName(); 195 | Match('='); 196 | Expression(); 197 | EmitLn("subl %eax, $1"); /* SUBQ #1, D0*/ 198 | sprintf(tmp, "lea %c, %%edx", name); 199 | EmitLn(tmp); 200 | EmitLn("movl %eax, (%edx)"); 201 | Expression(); 202 | EmitLn("push %eax"); /* save the execution of expression */ 203 | PostLabel(L1); 204 | sprintf(tmp, "lea %c, %%edx", name); 205 | EmitLn(tmp); 206 | EmitLn("movl (%edx), %eax"); 207 | EmitLn("addl %eax, 1"); 208 | EmitLn("movl %eax, (%edx)"); 209 | EmitLn("cmp (%esp), %eax"); 210 | sprintf(tmp, "jg %s", L2); 211 | EmitLn(tmp); 212 | Block(L2); 213 | Match('e'); 214 | sprintf(tmp, "jmp %s", L1); 215 | EmitLn(tmp); 216 | PostLabel(L2); 217 | EmitLn("pop %eax"); 218 | } 219 | 220 | void DoDo() 221 | { 222 | Match('d'); 223 | char L1[MAX_BUF]; 224 | char L2[MAX_BUF]; 225 | strcpy(L1, NewLabel()); 226 | strcpy(L2, NewLabel()); 227 | Expression(); 228 | EmitLn("subl %eax, $1"); 229 | EmitLn("movl %eax, %ecx"); 230 | PostLabel(L1); 231 | EmitLn("pushl %ecx"); 232 | Block(L2); 233 | EmitLn("popl %ecx"); 234 | sprintf(tmp, "loop %s", L1); 235 | EmitLn(tmp); 236 | EmitLn("pushl %ecx"); 237 | PostLabel(L2); 238 | EmitLn("popl %ecx"); 239 | } 240 | 241 | void DoBreak(char *L) 242 | { 243 | Match('b'); 244 | if (L != NULL) { 245 | sprintf(tmp, "jmp %s", L); 246 | EmitLn(tmp); 247 | } else { 248 | Abort("No loop to break from"); 249 | } 250 | } 251 | 252 | void BoolFactor() 253 | { 254 | if (IsBoolean(Look)) { 255 | if (GetBoolean()) { 256 | EmitLn("movl $-1, %eax"); 257 | } else { 258 | EmitLn("xor %eax, %eax"); 259 | } 260 | } else { 261 | Relation(); 262 | } 263 | } 264 | 265 | void Relation() 266 | { 267 | Expression(); 268 | if (IsRelop(Look)) { 269 | EmitLn("pushl %eax"); 270 | switch (Look) { 271 | case '=': 272 | Equals(); 273 | break; 274 | case '#': 275 | NotEquals(); 276 | break; 277 | case '<': 278 | Less(); 279 | break; 280 | case '>': 281 | Greater(); 282 | break; 283 | } 284 | } 285 | EmitLn("test %eax, %eax"); 286 | } 287 | 288 | void NotFactor() 289 | { 290 | if (Look == '!') { 291 | Match('!'); 292 | BoolFactor(); 293 | EmitLn("xor $-1, %eax"); 294 | } else { 295 | BoolFactor(); 296 | } 297 | } 298 | 299 | void BoolTerm() 300 | { 301 | NotFactor(); 302 | while(Look == '&') { 303 | EmitLn("pushl %eax"); 304 | Match('&'); 305 | NotFactor(); 306 | EmitLn("and (%esp), %eax"); 307 | EmitLn("addl $4, %esp"); 308 | } 309 | } 310 | 311 | void BoolExpression() 312 | { 313 | BoolTerm(); 314 | while (IsOrop(Look)) { 315 | EmitLn("pushl %eax"); 316 | switch (Look) { 317 | case '|': 318 | BoolOr(); 319 | break; 320 | case '~': 321 | BoolXor(); 322 | break; 323 | default: 324 | break; 325 | } 326 | } 327 | } 328 | 329 | void BoolOr() 330 | { 331 | Match('|'); 332 | BoolTerm(); 333 | EmitLn("or (%esp), %eax"); 334 | EmitLn("addl $4, %esp"); /* recover the stack */ 335 | } 336 | 337 | void BoolXor() 338 | { 339 | Match('~'); 340 | BoolTerm(); 341 | EmitLn("xor (%esp), %eax"); 342 | EmitLn("addl $4, %esp"); /* recover the stack */ 343 | } 344 | 345 | void Equals() 346 | { 347 | Match('='); 348 | Expression(); 349 | EmitLn("cmp (%esp), %eax"); 350 | /* Note that 80386 has setcc corresponds to 86000's SETCC 351 | * However, it only takes 8-bit registers */ 352 | EmitLn("sete %al"); 353 | EmitLn("addl $4, %esp"); /* recover the stack */ 354 | } 355 | 356 | void NotEquals() 357 | { 358 | Match('#'); 359 | Expression(); 360 | EmitLn("cmp (%esp), %eax"); 361 | EmitLn("setne %al"); 362 | EmitLn("addl $4, %esp"); /* recover the stack */ 363 | } 364 | 365 | void Less() 366 | { 367 | Match('<'); 368 | Expression(); 369 | EmitLn("cmp %eax, (%esp)"); 370 | EmitLn("setl %al"); 371 | EmitLn("addl $4, %esp"); /* recover the stack */ 372 | } 373 | 374 | void Greater() 375 | { 376 | Match('>'); 377 | Expression(); 378 | EmitLn("cmp %eax, (%esp)"); 379 | EmitLn("setg %al"); 380 | EmitLn("addl $4, %esp"); /* recover the stack */ 381 | } 382 | 383 | void Ident() 384 | { 385 | char c = GetName(); 386 | if (Look == '(') { 387 | Match('('); 388 | Match(')'); 389 | sprintf(tmp, "call %c", c); 390 | EmitLn(tmp); 391 | } else { 392 | sprintf(tmp, "movl %c, %%eax", c); 393 | EmitLn(tmp); 394 | } 395 | } 396 | 397 | void Factor() 398 | { 399 | if (Look == '(') { 400 | Match('('); 401 | Expression(); 402 | Match(')'); 403 | } else if (IsAlpha(Look)) { 404 | Ident(); 405 | } else { 406 | sprintf(tmp, "movl $%d, %%eax", GetNum()); 407 | EmitLn(tmp); 408 | } 409 | } 410 | 411 | void SignedFactor() 412 | { 413 | if (Look == '+') { 414 | GetChar(); 415 | Factor(); 416 | } else if (Look == '-') { 417 | GetChar(); 418 | if (IsDigit(Look)) { 419 | sprintf(tmp, "movl $-%d, %%eax", GetNum()); 420 | EmitLn(tmp); 421 | } else { 422 | Factor(); 423 | EmitLn("neg %eax"); 424 | } 425 | } else { 426 | Factor(); 427 | } 428 | } 429 | 430 | void Multiply() 431 | { 432 | Match('*'); 433 | Factor(); 434 | EmitLn("imull (%esp), %eax"); 435 | /* push of the stack */ 436 | EmitLn("addl $4, %esp"); 437 | } 438 | 439 | void Divide() 440 | { 441 | Match('/'); 442 | Factor(); 443 | 444 | /* for a expersion like a/b we have eax=b and %(esp)=a 445 | * but we need eax=a, and b on the stack 446 | */ 447 | EmitLn("movl (%esp), %edx"); 448 | EmitLn("addl $4, %esp"); 449 | 450 | EmitLn("pushl %eax"); 451 | 452 | EmitLn("movl %edx, %eax"); 453 | 454 | /* sign extesnion */ 455 | EmitLn("sarl $31, %edx"); 456 | EmitLn("idivl (%esp)"); 457 | EmitLn("addl $4, %esp"); 458 | 459 | } 460 | 461 | void Term() 462 | { 463 | SignedFactor(); 464 | while (strchr("*/", Look)) { 465 | EmitLn("pushl %eax"); 466 | switch(Look) 467 | { 468 | case '*': 469 | Multiply(); 470 | break; 471 | case '/': 472 | Divide(); 473 | break; 474 | default: 475 | Expected("Mulop"); 476 | } 477 | } 478 | } 479 | 480 | void Add() 481 | { 482 | Match('+'); 483 | Term(); 484 | EmitLn("addl (%esp), %eax"); 485 | EmitLn("addl $4, %esp"); 486 | } 487 | 488 | 489 | void Subtract() 490 | { 491 | Match('-'); 492 | Term(); 493 | EmitLn("subl (%esp), %eax"); 494 | EmitLn("negl %eax"); 495 | EmitLn("addl $4, %esp"); 496 | } 497 | 498 | void Expression() 499 | { 500 | Term(); 501 | while(IsAddop(Look)) { 502 | EmitLn("pushl %eax"); 503 | switch (Look) { 504 | case '+': 505 | Add(); 506 | break; 507 | case '-': 508 | Subtract(); 509 | break; 510 | default: 511 | Expected("Addop"); 512 | } 513 | } 514 | } 515 | 516 | void Assignment() 517 | { 518 | char c = GetName(); 519 | Match('='); 520 | BoolExpression(); 521 | sprintf(tmp, "lea %c, %%ebx", c); 522 | EmitLn(tmp); 523 | EmitLn("movl %eax, (%ebx)"); 524 | } 525 | 526 | int main() 527 | { 528 | Init(); 529 | DoProgram(); 530 | return 0; 531 | } 532 | -------------------------------------------------------------------------------- /7/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /7/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define TABLE_SIZE 26 8 | static int LCount = 0; 9 | static char labelName[MAX_BUF]; 10 | /*static char identifier[MAX_BUF];*/ 11 | static int Table[TABLE_SIZE]; 12 | char tmp[MAX_BUF]; 13 | 14 | 15 | /* Keywords symbol table */ 16 | const char * const KWList[] = { 17 | "IF", 18 | "ELSE", 19 | "ENDIF", 20 | "END", 21 | }; 22 | const char KWCode[] = "xilee"; 23 | const int KWNum = sizeof(KWList)/sizeof(*KWList); 24 | 25 | char Token; /* current token */ 26 | char Value[MAX_BUF]; /* string token of Look */ 27 | 28 | /* Table Lookup 29 | * If the input string matches a table entry, return the entry index, else 30 | * return -1. 31 | * *n* is the size of the table */ 32 | int Lookup(const char * const table[], const char *string, int n) 33 | { 34 | int i; 35 | bool found = false; 36 | 37 | for (i = 0; i < n; ++i) { 38 | if (strcmp(table[i], string) == 0) { 39 | found = true; 40 | break; 41 | } 42 | } 43 | return found ? i : -1; 44 | } 45 | 46 | 47 | void Scan() 48 | { 49 | /* in Unix/Linux, Endline is CR instead of LF CR in MSDOS*/ 50 | SkipWhite(); 51 | while(Look == '\n') { 52 | Fin(); 53 | } 54 | 55 | GetName(); 56 | int index = Lookup(KWList, Value, KWNum); 57 | Token = KWCode[index+1]; 58 | } 59 | 60 | /* Helper Functions */ 61 | char uppercase(char c) 62 | { 63 | if (IsAlpha(c)) { 64 | return (c & 0xDF); 65 | } else { 66 | return c; 67 | } 68 | } 69 | 70 | void GetChar() 71 | { 72 | Look = getchar(); 73 | } 74 | 75 | 76 | void Error(char *s) 77 | { 78 | printf("\nError: %s.", s); 79 | } 80 | 81 | void Abort(char *s) 82 | { 83 | Error(s); 84 | exit(1); 85 | } 86 | 87 | 88 | void Expected(char *s) 89 | { 90 | sprintf(tmp, "%s Expected", s); 91 | Abort(tmp); 92 | } 93 | 94 | 95 | void Match(char x) 96 | { 97 | if(Look == x) { 98 | GetChar(); 99 | } else { 100 | sprintf(tmp, "' %c ' ", x); 101 | Expected(tmp); 102 | } 103 | } 104 | 105 | void MatchString(char *str) 106 | { 107 | if (strcmp(Value, str) != 0) { 108 | sprintf(tmp, "\"%s\"", Value); 109 | Expected(tmp); 110 | } 111 | } 112 | 113 | void Newline() 114 | { 115 | if (Look == '\r') { 116 | GetChar(); 117 | if (Look == '\n') { 118 | GetChar(); 119 | } 120 | } else if (Look == '\n') { 121 | GetChar(); 122 | } 123 | } 124 | 125 | int IsWhite(char c) 126 | { 127 | return strchr(" \t\r\n", c) != NULL; 128 | } 129 | 130 | int IsOp(char c) 131 | { 132 | return strchr("+-*/<>:=", c) != NULL; 133 | } 134 | 135 | int IsAlpha(char c) 136 | { 137 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 138 | } 139 | 140 | int IsDigit(char c) 141 | { 142 | return (c >= '0') && (c <= '9'); 143 | } 144 | 145 | int IsAddop(char c) 146 | { 147 | return (c == '+') || (c == '-'); 148 | } 149 | 150 | int IsBoolean(char c) 151 | { 152 | return strchr("TF", uppercase(c)) != NULL; 153 | } 154 | 155 | int IsAlNum(char c) 156 | { 157 | return IsAlpha(c) || IsDigit(c); 158 | } 159 | 160 | void GetName() 161 | { 162 | SkipWhite(); 163 | while(Look == '\n') { 164 | Fin(); 165 | } 166 | 167 | char *p = Value; 168 | if (!IsAlpha(Look)) { 169 | Expected("Name"); 170 | } 171 | 172 | while(IsAlNum(Look)) { 173 | *p++ = uppercase(Look); 174 | GetChar(); 175 | } 176 | *p = '\0'; 177 | } 178 | 179 | void GetNum() 180 | { 181 | SkipWhite(); 182 | char *p = Value; 183 | if( !IsDigit(Look)) { 184 | Expected("Integer"); 185 | } 186 | 187 | while (IsDigit(Look)) { 188 | *p++ = Look; 189 | GetChar(); 190 | } 191 | *p = '\0'; 192 | Token = '#'; 193 | } 194 | 195 | int GetBoolean() 196 | { 197 | if (!IsBoolean(Look)) { 198 | Expected("Boolean Literal"); 199 | } 200 | int ret = uppercase(Look) == 'T'; 201 | GetChar(); 202 | return ret; 203 | } 204 | 205 | int IsOrop(char c) 206 | { 207 | return strchr("|~", c) != NULL; 208 | } 209 | 210 | int IsRelop(char c) 211 | { 212 | return strchr("=#<>", c) != NULL; 213 | } 214 | 215 | void Emit(char *s) 216 | { 217 | printf("\t%s", s); 218 | } 219 | 220 | void EmitLn(char *s) 221 | { 222 | Emit(s); 223 | printf("\n"); 224 | } 225 | 226 | void Init() 227 | { 228 | LCount = 0; 229 | 230 | InitTable(); 231 | GetChar(); 232 | } 233 | 234 | void SkipWhite() 235 | { 236 | while (IsWhite(Look)) { 237 | GetChar(); 238 | } 239 | } 240 | 241 | void InitTable() 242 | { 243 | int i; 244 | for (i = 0; i < TABLE_SIZE; i++) { 245 | Table[i] = 0; 246 | } 247 | 248 | } 249 | 250 | char *NewLabel() 251 | { 252 | sprintf(labelName, "L%02d", LCount); 253 | LCount ++; 254 | return labelName; 255 | } 256 | 257 | void PostLabel(char *label) 258 | { 259 | printf("%s:\n", label); 260 | } 261 | 262 | void Fin() 263 | { 264 | if (Look == '\r') { 265 | GetChar(); 266 | } 267 | if (Look == '\n') { 268 | GetChar(); 269 | } 270 | } 271 | 272 | -------------------------------------------------------------------------------- /7/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define MAX_BUF 100 5 | char Look; 6 | extern char Token; /* current token */ 7 | extern char Value[MAX_BUF]; /* string token of Look */ 8 | extern char tmp[MAX_BUF]; 9 | 10 | void GetChar(); 11 | 12 | void Error(char *s); 13 | void Abort(char *s); 14 | void Expected(char *s); 15 | void Match(char x); 16 | void MatchString(char *str); 17 | 18 | void Newline(); 19 | 20 | int IsWhite(char c); 21 | int IsOp(char c); 22 | 23 | int IsAlpha(char c); 24 | int IsDigit(char c); 25 | int IsAlNum(char c); 26 | int IsAddop(char c); 27 | int IsBoolean(char c); 28 | int IsOrop(char c); 29 | int IsRelop(char c); 30 | 31 | void GetName(); 32 | void GetNum(); 33 | void GetOp(); 34 | int GetBoolean(); 35 | 36 | void Scan(); 37 | void SkipWhite(); 38 | 39 | void Emit(char *s); 40 | void EmitLn(char *s); 41 | 42 | void Init(); 43 | void InitTable(); 44 | 45 | char *NewLabel(); 46 | void PostLabel(char *label); 47 | 48 | void Fin(); 49 | #endif 50 | -------------------------------------------------------------------------------- /7/main.c: -------------------------------------------------------------------------------- 1 | /* not that only IF statement is supported for merging lexer and parser in 2 | * chapter 7. 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "cradle.h" 10 | 11 | #ifdef DEBUG 12 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 13 | #else 14 | #define dprint(fmt, ...) 15 | #endif 16 | 17 | void Other(); 18 | void Block(); 19 | void DoProgram(); 20 | void DoIf(); 21 | void DoWhile(); 22 | void DoLoop(); 23 | void DoRepeat(); 24 | void DoFor(); 25 | void Expression(); 26 | void DoDo(); 27 | void DoBreak(char *L); 28 | 29 | /* Added in chap6 */ 30 | void BoolFactor(); 31 | void NotFactor(); 32 | void BoolTerm(); 33 | void BoolExpression(); 34 | void BoolOr(); 35 | void BoolXor(); 36 | void Relation(); 37 | void Equals(); 38 | void NotEquals(); 39 | void Less(); 40 | void Greater(); 41 | void Ident(); 42 | void Factor(); 43 | void SignedFactor(); 44 | void Multiply(); 45 | void Divide(); 46 | void Term(); 47 | void Add(); 48 | void Subtract(); 49 | void Expression(); 50 | void Assignment(); 51 | void Condition(); 52 | 53 | void Block() 54 | { 55 | Scan(); 56 | while (! strchr("el", Token)) { 57 | dprint("Block: get Look = %c\n", Look); 58 | switch (Token) { 59 | case 'i': 60 | DoIf(); 61 | break; 62 | case '\n': 63 | while(Look == '\n') { 64 | Fin(); 65 | } 66 | break; 67 | default: 68 | Assignment(); 69 | break; 70 | } 71 | Scan(); 72 | } 73 | } 74 | 75 | void DoProgram() 76 | { 77 | Block(); 78 | MatchString("END"); 79 | EmitLn("END"); 80 | } 81 | 82 | void DoIf() 83 | { 84 | Condition(); 85 | char L1[MAX_BUF]; 86 | char L2[MAX_BUF]; 87 | strcpy(L1, NewLabel()); 88 | strcpy(L2, L1); 89 | 90 | sprintf(tmp, "jz %s", L1); 91 | EmitLn(tmp); 92 | 93 | Block(); 94 | 95 | if (Token == 'l') { 96 | /* match *else* statement */ 97 | strcpy(L2, NewLabel()); 98 | 99 | sprintf(tmp, "jmp %s", L2); 100 | EmitLn(tmp); 101 | 102 | PostLabel(L1); 103 | 104 | Block(); 105 | } 106 | 107 | PostLabel(L2); 108 | MatchString("ENDIF"); 109 | } 110 | 111 | void DoWhile() 112 | { 113 | char L1[MAX_BUF]; 114 | char L2[MAX_BUF]; 115 | 116 | Match('w'); 117 | strcpy(L1, NewLabel()); 118 | strcpy(L2, NewLabel()); 119 | PostLabel(L1); 120 | BoolExpression(); 121 | sprintf(tmp, "jz %s", L2); 122 | EmitLn(tmp); 123 | Block(L2); 124 | Match('e'); 125 | sprintf(tmp, "jmp %s", L1); 126 | EmitLn(tmp); 127 | PostLabel(L2); 128 | } 129 | 130 | void DoLoop() 131 | { 132 | char L1[MAX_BUF]; 133 | char L2[MAX_BUF]; 134 | Match('p'); 135 | strcpy(L1, NewLabel()); 136 | strcpy(L2, NewLabel()); 137 | PostLabel(L1); 138 | Block(L2); 139 | Match('e'); 140 | sprintf(tmp, "jmp %s", L1); 141 | EmitLn(tmp); 142 | PostLabel(L2); 143 | } 144 | 145 | void DoRepeat() 146 | { 147 | char L1[MAX_BUF]; 148 | char L2[MAX_BUF]; 149 | Match('r'); 150 | strcpy(L1, NewLabel()); 151 | strcpy(L2, NewLabel()); 152 | PostLabel(L1); 153 | Block(L2); 154 | Match('u'); 155 | BoolExpression(); 156 | 157 | sprintf(tmp, "jz %s", L1); 158 | EmitLn(tmp); 159 | PostLabel(L2); 160 | } 161 | 162 | 163 | void DoBreak(char *L) 164 | { 165 | Match('b'); 166 | if (L != NULL) { 167 | sprintf(tmp, "jmp %s", L); 168 | EmitLn(tmp); 169 | } else { 170 | Abort("No loop to break from"); 171 | } 172 | } 173 | 174 | void BoolFactor() 175 | { 176 | if (IsBoolean(Look)) { 177 | if (GetBoolean()) { 178 | EmitLn("movl $-1, %eax"); 179 | } else { 180 | EmitLn("xor %eax, %eax"); 181 | } 182 | } else { 183 | Relation(); 184 | } 185 | } 186 | 187 | void Relation() 188 | { 189 | Expression(); 190 | if (IsRelop(Look)) { 191 | EmitLn("pushl %eax"); 192 | switch (Look) { 193 | case '=': 194 | Equals(); 195 | break; 196 | case '#': 197 | NotEquals(); 198 | break; 199 | case '<': 200 | Less(); 201 | break; 202 | case '>': 203 | Greater(); 204 | break; 205 | } 206 | } 207 | EmitLn("test %eax, %eax"); 208 | } 209 | 210 | void NotFactor() 211 | { 212 | if (Look == '!') { 213 | Match('!'); 214 | BoolFactor(); 215 | EmitLn("xor $-1, %eax"); 216 | } else { 217 | BoolFactor(); 218 | } 219 | } 220 | 221 | void BoolTerm() 222 | { 223 | NotFactor(); 224 | while(Look == '&') { 225 | EmitLn("pushl %eax"); 226 | Match('&'); 227 | NotFactor(); 228 | EmitLn("and (%esp), %eax"); 229 | EmitLn("addl $4, %esp"); 230 | } 231 | } 232 | 233 | void BoolExpression() 234 | { 235 | BoolTerm(); 236 | while (IsOrop(Look)) { 237 | EmitLn("pushl %eax"); 238 | switch (Look) { 239 | case '|': 240 | BoolOr(); 241 | break; 242 | case '~': 243 | BoolXor(); 244 | break; 245 | default: 246 | break; 247 | } 248 | } 249 | } 250 | 251 | void BoolOr() 252 | { 253 | Match('|'); 254 | BoolTerm(); 255 | EmitLn("or (%esp), %eax"); 256 | EmitLn("addl $4, %esp"); /* recover the stack */ 257 | } 258 | 259 | void BoolXor() 260 | { 261 | Match('~'); 262 | BoolTerm(); 263 | EmitLn("xor (%esp), %eax"); 264 | EmitLn("addl $4, %esp"); /* recover the stack */ 265 | } 266 | 267 | void Equals() 268 | { 269 | Match('='); 270 | Expression(); 271 | EmitLn("cmp (%esp), %eax"); 272 | /* Note that 80386 has setcc corresponds to 86000's SETCC 273 | * However, it only takes 8-bit registers */ 274 | EmitLn("sete %al"); 275 | EmitLn("addl $4, %esp"); /* recover the stack */ 276 | } 277 | 278 | void NotEquals() 279 | { 280 | Match('#'); 281 | Expression(); 282 | EmitLn("cmp (%esp), %eax"); 283 | EmitLn("setne %al"); 284 | EmitLn("addl $4, %esp"); /* recover the stack */ 285 | } 286 | 287 | void Less() 288 | { 289 | Match('<'); 290 | Expression(); 291 | EmitLn("cmp %eax, (%esp)"); 292 | EmitLn("setl %al"); 293 | EmitLn("addl $4, %esp"); /* recover the stack */ 294 | } 295 | 296 | void Greater() 297 | { 298 | Match('>'); 299 | Expression(); 300 | EmitLn("cmp %eax, (%esp)"); 301 | EmitLn("setg %al"); 302 | EmitLn("addl $4, %esp"); /* recover the stack */ 303 | } 304 | 305 | void Ident() 306 | { 307 | GetName(); 308 | if (Look == '(') { 309 | Match('('); 310 | Match(')'); 311 | sprintf(tmp, "call %s", Value); 312 | EmitLn(tmp); 313 | } else { 314 | sprintf(tmp, "movl %s, %%eax", Value); 315 | EmitLn(tmp); 316 | } 317 | } 318 | 319 | void Factor() 320 | { 321 | if (Look == '(') { 322 | Match('('); 323 | Expression(); 324 | Match(')'); 325 | } else if (IsAlpha(Look)) { 326 | Ident(); 327 | } else { 328 | GetNum(); 329 | sprintf(tmp, "movl $%s, %%eax", Value); 330 | EmitLn(tmp); 331 | } 332 | } 333 | 334 | void SignedFactor() 335 | { 336 | bool negative = Look == '-'; 337 | 338 | if (Look == '+') { 339 | GetChar(); 340 | SkipWhite(); 341 | } 342 | 343 | Factor(); 344 | 345 | if (negative) { 346 | EmitLn("neg %eax"); 347 | } 348 | } 349 | 350 | void Multiply() 351 | { 352 | Match('*'); 353 | Factor(); 354 | EmitLn("imull (%esp), %eax"); 355 | /* push of the stack */ 356 | EmitLn("addl $4, %esp"); 357 | } 358 | 359 | void Divide() 360 | { 361 | Match('/'); 362 | Factor(); 363 | 364 | /* for a expersion like a/b we have eax=b and %(esp)=a 365 | * but we need eax=a, and b on the stack 366 | */ 367 | EmitLn("movl (%esp), %edx"); 368 | EmitLn("addl $4, %esp"); 369 | 370 | EmitLn("pushl %eax"); 371 | 372 | EmitLn("movl %edx, %eax"); 373 | 374 | /* sign extesnion */ 375 | EmitLn("sarl $31, %edx"); 376 | EmitLn("idivl (%esp)"); 377 | EmitLn("addl $4, %esp"); 378 | 379 | } 380 | 381 | void Term1() 382 | { 383 | while (strchr("*/", Look)) { 384 | EmitLn("pushl %eax"); 385 | switch(Look) 386 | { 387 | case '*': 388 | Multiply(); 389 | break; 390 | case '/': 391 | Divide(); 392 | break; 393 | default: 394 | Expected("Mulop"); 395 | } 396 | } 397 | } 398 | 399 | void Term() 400 | { 401 | Factor(); 402 | Term1(); 403 | } 404 | 405 | void FirstTerm() 406 | { 407 | SignedFactor(); 408 | Term1(); 409 | } 410 | 411 | void Add() 412 | { 413 | Match('+'); 414 | Term(); 415 | EmitLn("addl (%esp), %eax"); 416 | EmitLn("addl $4, %esp"); 417 | } 418 | 419 | 420 | void Subtract() 421 | { 422 | Match('-'); 423 | Term(); 424 | EmitLn("subl (%esp), %eax"); 425 | EmitLn("negl %eax"); 426 | EmitLn("addl $4, %esp"); 427 | } 428 | 429 | void Expression() 430 | { 431 | FirstTerm(); 432 | while(IsAddop(Look)) { 433 | EmitLn("pushl %eax"); 434 | switch (Look) { 435 | case '+': 436 | Add(); 437 | break; 438 | case '-': 439 | Subtract(); 440 | break; 441 | default: 442 | Expected("Addop"); 443 | } 444 | } 445 | } 446 | 447 | /* This version of 'condition' is dummy */ 448 | void Condition() 449 | { 450 | EmitLn("Condition"); 451 | } 452 | 453 | void Assignment() 454 | { 455 | char name[MAX_BUF]; 456 | strcpy(name, Value); 457 | Match('='); 458 | Expression(); 459 | sprintf(tmp, "lea %s, %%ebx", name); 460 | EmitLn(tmp); 461 | EmitLn("movl %eax, (%ebx)"); 462 | } 463 | 464 | int main() 465 | { 466 | Init(); 467 | DoProgram(); 468 | return 0; 469 | } 470 | -------------------------------------------------------------------------------- /8/tutor8.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | LET'S BUILD A COMPILER! 30 | 31 | By 32 | 33 | Jack W. Crenshaw, Ph.D. 34 | 35 | 2 April 1989 36 | 37 | 38 | Part VIII: A LITTLE PHILOSOPHY 39 | 40 | 41 | ***************************************************************** 42 | * * 43 | * COPYRIGHT NOTICE * 44 | * * 45 | * Copyright (C) 1989 Jack W. Crenshaw. All rights reserved. * 46 | * * 47 | ***************************************************************** 48 | 49 | 50 | INTRODUCTION 51 | 52 | This is going to be a different kind of session than the others 53 | in our series on parsing and compiler construction. For this 54 | session, there won't be any experiments to do or code to write. 55 | This once, I'd like to just talk with you for a while. 56 | Mercifully, it will be a short session, and then we can take up 57 | where we left off, hopefully with renewed vigor. 58 | 59 | When I was in college, I found that I could always follow a 60 | prof's lecture a lot better if I knew where he was going with it. 61 | I'll bet you were the same. 62 | 63 | So I thought maybe it's about time I told you where we're going 64 | with this series: what's coming up in future installments, and in 65 | general what all this is about. I'll also share some general 66 | thoughts concerning the usefulness of what we've been doing. 67 | 68 | 69 | THE ROAD HOME 70 | 71 | So far, we've covered the parsing and translation of arithmetic 72 | expressions, Boolean expressions, and combinations connected by 73 | relational operators. We've also done the same for control 74 | constructs. In all of this we've leaned heavily on the use of 75 | top-down, recursive descent parsing, BNF definitions of the 76 | syntax, and direct generation of assembly-language code. We also 77 | learned the value of such tricks as single-character tokens to 78 | help us see the forest through the trees. In the last 79 | installment we dealt with lexical scanning, and I showed you 80 | simple but powerful ways to remove the single-character barriers. 81 | 82 | Throughout the whole study, I've emphasized the KISS philosophy 83 | ... Keep It Simple, Sidney ... and I hope by now you've realized 84 | just how simple this stuff can really be. While there are for 85 | sure areas of compiler theory that are truly intimidating, the 86 | ultimate message of this series is that in practice you can just 87 | politely sidestep many of these areas. If the language 88 | definition cooperates or, as in this series, if you can define 89 | the language as you go, it's possible to write down the language 90 | definition in BNF with reasonable ease. And, as we've seen, you 91 | can crank out parse procedures from the BNF just about as fast as 92 | you can type. 93 | 94 | As our compiler has taken form, it's gotten more parts, but each 95 | part is quite small and simple, and very much like all the 96 | others. 97 | 98 | At this point, we have many of the makings of a real, practical 99 | compiler. As a matter of fact, we already have all we need to 100 | build a toy compiler for a language as powerful as, say, Tiny 101 | BASIC. In the next couple of installments, we'll go ahead and 102 | define that language. 103 | 104 | To round out the series, we still have a few items to cover. 105 | These include: 106 | 107 | o Procedure calls, with and without parameters 108 | 109 | o Local and global variables 110 | 111 | o Basic types, such as character and integer types 112 | 113 | o Arrays 114 | 115 | o Strings 116 | 117 | o User-defined types and structures 118 | 119 | o Tree-structured parsers and intermediate languages 120 | 121 | o Optimization 122 | 123 | These will all be covered in future installments. When we're 124 | finished, you'll have all the tools you need to design and build 125 | your own languages, and the compilers to translate them. 126 | 127 | I can't design those languages for you, but I can make some 128 | comments and recommendations. I've already sprinkled some 129 | throughout past installments. You've seen, for example, the 130 | control constructs I prefer. 131 | 132 | These constructs are going to be part of the languages I build. 133 | I have three languages in mind at this point, two of which you 134 | will see in installments to come: 135 | 136 | TINY - A minimal, but usable language on the order of Tiny 137 | BASIC or Tiny C. It won't be very practical, but it will 138 | have enough power to let you write and run real programs 139 | that do something worthwhile. 140 | 141 | KISS - The language I'm building for my own use. KISS is 142 | intended to be a systems programming language. It won't 143 | have strong typing or fancy data structures, but it will 144 | support most of the things I want to do with a higher- 145 | order language (HOL), except perhaps writing compilers. 146 | 147 | I've also been toying for years with the idea of a HOL-like 148 | assembler, with structured control constructs and HOL-like 149 | assignment statements. That, in fact, was the impetus behind my 150 | original foray into the jungles of compiler theory. This one may 151 | never be built, simply because I've learned that it's actually 152 | easier to implement a language like KISS, that only uses a subset 153 | of the CPU instructions. As you know, assembly language can be 154 | bizarre and irregular in the extreme, and a language that maps 155 | one-for-one onto it can be a real challenge. Still, I've always 156 | felt that the syntax used in conventional assemblers is dumb ... 157 | why is 158 | 159 | MOVE.L A,B 160 | 161 | better, or easier to translate, than 162 | 163 | B=A ? 164 | 165 | I think it would be an interesting exercise to develop a 166 | "compiler" that would give the programmer complete access to and 167 | control over the full complement of the CPU instruction set, and 168 | would allow you to generate programs as efficient as assembly 169 | language, without the pain of learning a set of mnemonics. Can 170 | it be done? I don't know. The real question may be, "Will the 171 | resulting language be any easier to write than assembly"? If 172 | not, there's no point in it. I think that it can be done, but 173 | I'm not completely sure yet how the syntax should look. 174 | 175 | Perhaps you have some comments or suggestions on this one. I'd 176 | love to hear them. 177 | 178 | You probably won't be surprised to learn that I've already worked 179 | ahead in most of the areas that we will cover. I have some good 180 | news: Things never get much harder than they've been so far. 181 | It's possible to build a complete, working compiler for a real 182 | language, using nothing but the same kinds of techniques you've 183 | learned so far. And THAT brings up some interesting questions. 184 | 185 | 186 | WHY IS IT SO SIMPLE? 187 | 188 | Before embarking on this series, I always thought that compilers 189 | were just naturally complex computer programs ... the ultimate 190 | challenge. Yet the things we have done here have usually turned 191 | out to be quite simple, sometimes even trivial. 192 | 193 | For awhile, I thought is was simply because I hadn't yet gotten 194 | into the meat of the subject. I had only covered the simple 195 | parts. I will freely admit to you that, even when I began the 196 | series, I wasn't sure how far we would be able to go before 197 | things got too complex to deal with in the ways we have so far. 198 | But at this point I've already been down the road far enough to 199 | see the end of it. Guess what? 200 | 201 | 202 | THERE ARE NO HARD PARTS! 203 | 204 | 205 | Then, I thought maybe it was because we were not generating very 206 | good object code. Those of you who have been following the 207 | series and trying sample compiles know that, while the code works 208 | and is rather foolproof, its efficiency is pretty awful. I 209 | figured that if we were concentrating on turning out tight code, 210 | we would soon find all that missing complexity. 211 | 212 | To some extent, that one is true. In particular, my first few 213 | efforts at trying to improve efficiency introduced complexity at 214 | an alarming rate. But since then I've been tinkering around with 215 | some simple optimizations and I've found some that result in very 216 | respectable code quality, WITHOUT adding a lot of complexity. 217 | 218 | Finally, I thought that perhaps the saving grace was the "toy 219 | compiler" nature of the study. I have made no pretense that we 220 | were ever going to be able to build a compiler to compete with 221 | Borland and Microsoft. And yet, again, as I get deeper into this 222 | thing the differences are starting to fade away. 223 | 224 | Just to make sure you get the message here, let me state it flat 225 | out: 226 | 227 | USING THE TECHNIQUES WE'VE USED HERE, IT IS POSSIBLE TO 228 | BUILD A PRODUCTION-QUALITY, WORKING COMPILER WITHOUT ADDING 229 | A LOT OF COMPLEXITY TO WHAT WE'VE ALREADY DONE. 230 | 231 | 232 | Since the series began I've received some comments from you. 233 | Most of them echo my own thoughts: "This is easy! Why do the 234 | textbooks make it seem so hard?" Good question. 235 | 236 | Recently, I've gone back and looked at some of those texts again, 237 | and even bought and read some new ones. Each time, I come away 238 | with the same feeling: These guys have made it seem too hard. 239 | 240 | What's going on here? Why does the whole thing seem difficult in 241 | the texts, but easy to us? Are we that much smarter than Aho, 242 | Ullman, Brinch Hansen, and all the rest? 243 | 244 | Hardly. But we are doing some things differently, and more and 245 | more I'm starting to appreciate the value of our approach, and 246 | the way that it simplifies things. Aside from the obvious 247 | shortcuts that I outlined in Part I, like single-character tokens 248 | and console I/O, we have made some implicit assumptions and done 249 | some things differently from those who have designed compilers in 250 | the past. As it turns out, our approach makes life a lot easier. 251 | 252 | So why didn't all those other guys use it? 253 | 254 | You have to remember the context of some of the earlier compiler 255 | development. These people were working with very small computers 256 | of limited capacity. Memory was very limited, the CPU 257 | instruction set was minimal, and programs ran in batch mode 258 | rather than interactively. As it turns out, these caused some 259 | key design decisions that have really complicated the designs. 260 | Until recently, I hadn't realized how much of classical compiler 261 | design was driven by the available hardware. 262 | 263 | Even in cases where these limitations no longer apply, people 264 | have tended to structure their programs in the same way, since 265 | that is the way they were taught to do it. 266 | 267 | In our case, we have started with a blank sheet of paper. There 268 | is a danger there, of course, that you will end up falling into 269 | traps that other people have long since learned to avoid. But it 270 | also has allowed us to take different approaches that, partly by 271 | design and partly by pure dumb luck, have allowed us to gain 272 | simplicity. 273 | 274 | Here are the areas that I think have led to complexity in the 275 | past: 276 | 277 | o Limited RAM Forcing Multiple Passes 278 | 279 | I just read "Brinch Hansen on Pascal Compilers" (an 280 | excellent book, BTW). He developed a Pascal compiler for a 281 | PC, but he started the effort in 1981 with a 64K system, and 282 | so almost every design decision he made was aimed at making 283 | the compiler fit into RAM. To do this, his compiler has 284 | three passes, one of which is the lexical scanner. There is 285 | no way he could, for example, use the distributed scanner I 286 | introduced in the last installment, because the program 287 | structure wouldn't allow it. He also required not one but 288 | two intermediate languages, to provide the communication 289 | between phases. 290 | 291 | All the early compiler writers had to deal with this issue: 292 | Break the compiler up into enough parts so that it will fit 293 | in memory. When you have multiple passes, you need to add 294 | data structures to support the information that each pass 295 | leaves behind for the next. That adds complexity, and ends 296 | up driving the design. Lee's book, "The Anatomy of a 297 | Compiler," mentions a FORTRAN compiler developed for an IBM 298 | 1401. It had no fewer than 63 separate passes! Needless to 299 | say, in a compiler like this the separation into phases 300 | would dominate the design. 301 | 302 | Even in situations where RAM is plentiful, people have 303 | tended to use the same techniques because that is what 304 | they're familiar with. It wasn't until Turbo Pascal came 305 | along that we found how simple a compiler could be if you 306 | started with different assumptions. 307 | 308 | 309 | o Batch Processing 310 | 311 | In the early days, batch processing was the only choice ... 312 | there was no interactive computing. Even today, compilers 313 | run in essentially batch mode. 314 | 315 | In a mainframe compiler as well as many micro compilers, 316 | considerable effort is expended on error recovery ... it can 317 | consume as much as 30-40% of the compiler and completely 318 | drive the design. The idea is to avoid halting on the first 319 | error, but rather to keep going at all costs, so that you 320 | can tell the programmer about as many errors in the whole 321 | program as possible. 322 | 323 | All of that harks back to the days of the early mainframes, 324 | where turnaround time was measured in hours or days, and it 325 | was important to squeeze every last ounce of information out 326 | of each run. 327 | 328 | In this series, I've been very careful to avoid the issue of 329 | error recovery, and instead our compiler simply halts with 330 | an error message on the first error. I will frankly admit 331 | that it was mostly because I wanted to take the easy way out 332 | and keep things simple. But this approach, pioneered by 333 | Borland in Turbo Pascal, also has a lot going for it anyway. 334 | Aside from keeping the compiler simple, it also fits very 335 | well with the idea of an interactive system. When 336 | compilation is fast, and especially when you have an editor 337 | such as Borland's that will take you right to the point of 338 | the error, then it makes a lot of sense to stop there, and 339 | just restart the compilation after the error is fixed. 340 | 341 | 342 | o Large Programs 343 | 344 | Early compilers were designed to handle large programs ... 345 | essentially infinite ones. In those days there was little 346 | choice; the idea of subroutine libraries and separate 347 | compilation were still in the future. Again, this 348 | assumption led to multi-pass designs and intermediate files 349 | to hold the results of partial processing. 350 | 351 | Brinch Hansen's stated goal was that the compiler should be 352 | able to compile itself. Again, because of his limited RAM, 353 | this drove him to a multi-pass design. He needed as little 354 | resident compiler code as possible, so that the necessary 355 | tables and other data structures would fit into RAM. 356 | 357 | I haven't stated this one yet, because there hasn't been a 358 | need ... we've always just read and written the data as 359 | streams, anyway. But for the record, my plan has always 360 | been that, in a production compiler, the source and object 361 | data should all coexist in RAM with the compiler, a la the 362 | early Turbo Pascals. That's why I've been careful to keep 363 | routines like GetChar and Emit as separate routines, in 364 | spite of their small size. It will be easy to change them 365 | to read to and write from memory. 366 | 367 | 368 | o Emphasis on Efficiency 369 | 370 | John Backus has stated that, when he and his colleagues 371 | developed the original FORTRAN compiler, they KNEW that they 372 | had to make it produce tight code. In those days, there was 373 | a strong sentiment against HOLs and in favor of assembly 374 | language, and efficiency was the reason. If FORTRAN didn't 375 | produce very good code by assembly standards, the users 376 | would simply refuse to use it. For the record, that FORTRAN 377 | compiler turned out to be one of the most efficient ever 378 | built, in terms of code quality. But it WAS complex! 379 | 380 | Today, we have CPU power and RAM size to spare, so code 381 | efficiency is not so much of an issue. By studiously 382 | ignoring this issue, we have indeed been able to Keep It 383 | Simple. Ironically, though, as I have said, I have found 384 | some optimizations that we can add to the basic compiler 385 | structure, without having to add a lot of complexity. So in 386 | this case we get to have our cake and eat it too: we will 387 | end up with reasonable code quality, anyway. 388 | 389 | 390 | o Limited Instruction Sets 391 | 392 | The early computers had primitive instruction sets. Things 393 | that we take for granted, such as stack operations and 394 | indirect addressing, came only with great difficulty. 395 | 396 | Example: In most compiler designs, there is a data structure 397 | called the literal pool. The compiler typically identifies 398 | all literals used in the program, and collects them into a 399 | single data structure. All references to the literals are 400 | done indirectly to this pool. At the end of the 401 | compilation, the compiler issues commands to set aside 402 | storage and initialize the literal pool. 403 | 404 | We haven't had to address that issue at all. When we want 405 | to load a literal, we just do it, in line, as in 406 | 407 | MOVE #3,D0 408 | 409 | There is something to be said for the use of a literal pool, 410 | particularly on a machine like the 8086 where data and code 411 | can be separated. Still, the whole thing adds a fairly 412 | large amount of complexity with little in return. 413 | 414 | Of course, without the stack we would be lost. In a micro, 415 | both subroutine calls and temporary storage depend heavily 416 | on the stack, and we have used it even more than necessary 417 | to ease expression parsing. 418 | 419 | 420 | o Desire for Generality 421 | 422 | Much of the content of the typical compiler text is taken up 423 | with issues we haven't addressed here at all ... things like 424 | automated translation of grammars, or generation of LALR 425 | parse tables. This is not simply because the authors want 426 | to impress you. There are good, practical reasons why the 427 | subjects are there. 428 | 429 | We have been concentrating on the use of a recursive-descent 430 | parser to parse a deterministic grammar, i.e., a grammar 431 | that is not ambiguous and, therefore, can be parsed with one 432 | level of lookahead. I haven't made much of this limitation, 433 | but the fact is that this represents a small subset of 434 | possible grammars. In fact, there is an infinite number of 435 | grammars that we can't parse using our techniques. The LR 436 | technique is a more powerful one, and can deal with grammars 437 | that we can't. 438 | 439 | In compiler theory, it's important to know how to deal with 440 | these other grammars, and how to transform them into 441 | grammars that are easier to deal with. For example, many 442 | (but not all) ambiguous grammars can be transformed into 443 | unambiguous ones. The way to do this is not always obvious, 444 | though, and so many people have devoted years to develop 445 | ways to transform them automatically. 446 | 447 | In practice, these issues turn out to be considerably less 448 | important. Modern languages tend to be designed to be easy 449 | to parse, anyway. That was a key motivation in the design 450 | of Pascal. Sure, there are pathological grammars that you 451 | would be hard pressed to write unambiguous BNF for, but in 452 | the real world the best answer is probably to avoid those 453 | grammars! 454 | 455 | In our case, of course, we have sneakily let the language 456 | evolve as we go, so we haven't painted ourselves into any 457 | corners here. You may not always have that luxury. Still, 458 | with a little care you should be able to keep the parser 459 | simple without having to resort to automatic translation of 460 | the grammar. 461 | 462 | 463 | We have taken a vastly different approach in this series. We 464 | started with a clean sheet of paper, and developed techniques 465 | that work in the context that we are in; that is, a single-user 466 | PC with rather ample CPU power and RAM space. We have limited 467 | ourselves to reasonable grammars that are easy to parse, we have 468 | used the instruction set of the CPU to advantage, and we have not 469 | concerned ourselves with efficiency. THAT's why it's been easy. 470 | 471 | Does this mean that we are forever doomed to be able to build 472 | only toy compilers? No, I don't think so. As I've said, we can 473 | add certain optimizations without changing the compiler 474 | structure. If we want to process large files, we can always add 475 | file buffering to do that. These things do not affect the 476 | overall program design. 477 | 478 | And I think that's a key factor. By starting with small and 479 | limited cases, we have been able to concentrate on a structure 480 | for the compiler that is natural for the job. Since the 481 | structure naturally fits the job, it is almost bound to be simple 482 | and transparent. Adding capability doesn't have to change that 483 | basic structure. We can simply expand things like the file 484 | structure or add an optimization layer. I guess my feeling is 485 | that, back when resources were tight, the structures people ended 486 | up with were artificially warped to make them work under those 487 | conditions, and weren't optimum structures for the problem at 488 | hand. 489 | 490 | 491 | CONCLUSION 492 | 493 | Anyway, that's my arm-waving guess as to how we've been able to 494 | keep things simple. We started with something simple and let it 495 | evolve naturally, without trying to force it into some 496 | traditional mold. 497 | 498 | We're going to press on with this. I've given you a list of the 499 | areas we'll be covering in future installments. With those 500 | installments, you should be able to build complete, working 501 | compilers for just about any occasion, and build them simply. If 502 | you REALLY want to build production-quality compilers, you'll be 503 | able to do that, too. 504 | 505 | For those of you who are chafing at the bit for more parser code, 506 | I apologize for this digression. I just thought you'd like to 507 | have things put into perspective a bit. Next time, we'll get 508 | back to the mainstream of the tutorial. 509 | 510 | So far, we've only looked at pieces of compilers, and while we 511 | have many of the makings of a complete language, we haven't 512 | talked about how to put it all together. That will be the 513 | subject of our next two installments. Then we'll press on into 514 | the new subjects I listed at the beginning of this installment. 515 | 516 | See you then. 517 | 518 | ***************************************************************** 519 | * * 520 | * COPYRIGHT NOTICE * 521 | * * 522 | * Copyright (C) 1989 Jack W. Crenshaw. All rights reserved. * 523 | * * 524 | ***************************************************************** 525 | 526 | -------------------------------------------------------------------------------- /9/Makefile: -------------------------------------------------------------------------------- 1 | IN=main.c cradle.c 2 | OUT=main 3 | FLAGS=-Wall -Werror 4 | 5 | all: 6 | gcc -o $(OUT) $(IN) $(FLAGS) 7 | 8 | run: 9 | ./$(OUT) 10 | 11 | .PHONY: clean 12 | clean: 13 | rm $(OUT) 14 | -------------------------------------------------------------------------------- /9/cradle.c: -------------------------------------------------------------------------------- 1 | #include "cradle.h" 2 | #include 3 | #include 4 | 5 | #define TABLE_SIZE 26 6 | static int LCount = 0; 7 | static char labelName[MAX_BUF]; 8 | char tmp[MAX_BUF]; 9 | 10 | static int Table[TABLE_SIZE]; 11 | 12 | /* Helper Functions */ 13 | char uppercase(char c) 14 | { 15 | return (c & 0xDF); 16 | } 17 | 18 | void GetChar() 19 | { 20 | Look = getchar(); 21 | /* printf("Getchar: %c\n", Look); */ 22 | } 23 | 24 | 25 | void Error(char *s) 26 | { 27 | printf("\nError: %s.", s); 28 | } 29 | 30 | void Abort(char *s) 31 | { 32 | Error(s); 33 | exit(1); 34 | } 35 | 36 | 37 | void Expected(char *s) 38 | { 39 | sprintf(tmp, "%s Expected", s); 40 | Abort(tmp); 41 | } 42 | 43 | 44 | void Match(char x) 45 | { 46 | if(Look == x) { 47 | GetChar(); 48 | } else { 49 | sprintf(tmp, "' %c ' ", x); 50 | Expected(tmp); 51 | } 52 | } 53 | 54 | void Newline() 55 | { 56 | if (Look == '\r') { 57 | GetChar(); 58 | if (Look == '\n') { 59 | GetChar(); 60 | } 61 | } else if (Look == '\n') { 62 | GetChar(); 63 | } 64 | } 65 | 66 | int IsAlpha(char c) 67 | { 68 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 69 | } 70 | 71 | int IsDigit(char c) 72 | { 73 | return (c >= '0') && (c <= '9'); 74 | } 75 | 76 | int IsAddop(char c) 77 | { 78 | return (c == '+') || (c == '-'); 79 | } 80 | 81 | char GetName() 82 | { 83 | char c = Look; 84 | 85 | if( !IsAlpha(Look)) { 86 | sprintf(tmp, "Name"); 87 | Expected(tmp); 88 | } 89 | 90 | GetChar(); 91 | 92 | return uppercase(c); 93 | } 94 | 95 | 96 | int GetNum() 97 | { 98 | int value = 0; 99 | if( !IsDigit(Look)) { 100 | sprintf(tmp, "Integer"); 101 | Expected(tmp); 102 | } 103 | 104 | while (IsDigit(Look)) { 105 | value = value * 10 + Look - '0'; 106 | GetChar(); 107 | } 108 | 109 | return value; 110 | } 111 | 112 | void Emit(char *s) 113 | { 114 | printf("\t%s", s); 115 | } 116 | 117 | void EmitLn(char *s) 118 | { 119 | Emit(s); 120 | printf("\n"); 121 | } 122 | 123 | void Init() 124 | { 125 | LCount = 0; 126 | 127 | InitTable(); 128 | GetChar(); 129 | } 130 | 131 | void InitTable() 132 | { 133 | int i; 134 | for (i = 0; i < TABLE_SIZE; i++) { 135 | Table[i] = 0; 136 | } 137 | 138 | } 139 | 140 | char *NewLabel() 141 | { 142 | sprintf(labelName, "L%02d", LCount); 143 | LCount ++; 144 | return labelName; 145 | } 146 | 147 | void PostLabel(char *label) 148 | { 149 | printf("%s:\n", label); 150 | } 151 | -------------------------------------------------------------------------------- /9/cradle.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRADLE_H 2 | #define _CRADLE_H 3 | 4 | #define MAX_BUF 100 5 | extern char tmp[MAX_BUF]; 6 | char Look; 7 | 8 | void GetChar(); 9 | 10 | void Error(char *s); 11 | void Abort(char *s); 12 | void Expected(char *s); 13 | void Match(char x); 14 | 15 | void Newline(); 16 | 17 | int IsAlpha(char c); 18 | int IsDigit(char c); 19 | int IsAddop(char c); 20 | 21 | char GetName(); 22 | int GetNum(); 23 | 24 | void Emit(char *s); 25 | void EmitLn(char *s); 26 | 27 | void Init(); 28 | void InitTable(); 29 | 30 | char *NewLabel(); 31 | void PostLabel(char *label); 32 | #endif 33 | -------------------------------------------------------------------------------- /9/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cradle.h" 6 | 7 | #ifdef DEBUG 8 | #define dprint(fmt, ...) printf(fmt, __VA_ARGS__); 9 | #else 10 | #define dprint(fmt, ...) 11 | #endif 12 | 13 | void Prolog(char name); 14 | void Epilog(char name); 15 | void Prog(); 16 | void DoBlock(char name); 17 | void Declarations(); 18 | void Labels(); 19 | void Constants(); 20 | void Types(); 21 | void Variables(); 22 | void DoProcedure(); 23 | void DoFunction(); 24 | void Statements(); 25 | 26 | void Prog() 27 | { 28 | Match('p'); /* handles program header part */ 29 | char name = GetName(); 30 | Prolog(name); 31 | DoBlock(name); 32 | Match('.'); 33 | Epilog(name); 34 | } 35 | 36 | void Prolog(char name) 37 | { 38 | EmitLn(".text"); 39 | EmitLn(".global _start"); 40 | EmitLn("_start:"); 41 | } 42 | 43 | void Epilog(char name) 44 | { 45 | EmitLn("movl %eax, %ebx"); 46 | EmitLn("movl $1, %eax"); 47 | EmitLn("int $0x80"); 48 | } 49 | void DoBlock(char name) 50 | { 51 | Declarations(); 52 | sprintf(tmp, "%c", name); 53 | PostLabel(tmp); 54 | Statements(); 55 | } 56 | 57 | void Declarations() 58 | { 59 | while(strchr("lctvpf", Look) != NULL) { 60 | switch(Look) { 61 | case 'l': 62 | Labels(); 63 | break; 64 | case 'c': 65 | Constants(); 66 | break; 67 | case 't': 68 | Types(); 69 | break; 70 | case 'v': 71 | Variables(); 72 | break; 73 | case 'p': 74 | DoProcedure(); 75 | break; 76 | case 'f': 77 | DoFunction(); 78 | default: 79 | break; 80 | } 81 | } 82 | } 83 | 84 | void Labels() 85 | { 86 | Match('l'); 87 | } 88 | 89 | void Constants() 90 | { 91 | Match('c'); 92 | } 93 | 94 | void Types() 95 | { 96 | Match('t'); 97 | } 98 | 99 | void Variables() 100 | { 101 | Match('v'); 102 | } 103 | 104 | void DoProcedure() 105 | { 106 | Match('p'); 107 | } 108 | 109 | void DoFunction() 110 | { 111 | Match('f'); 112 | } 113 | 114 | void Statements() 115 | { 116 | Match('b'); 117 | while(Look != 'e') { 118 | GetChar(); 119 | } 120 | Match('e'); 121 | } 122 | 123 | int main() 124 | { 125 | Init(); 126 | Prog(); 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Information 2 | A C version of the "Let's Build a Compiler" by Jack Crenshaw 3 | http://compilers.iecc.com/crenshaw/ 4 | 5 | This repository is forked form: https://github.com/vtudose/Let-s-build-a-compiler 6 | 7 | And since the original is far from complete(only the first two chapter), and 8 | the author had been inactive for quite a long time, I decided to create a new 9 | repository. 10 | 11 | If there are are any licence issue, please inform. 12 | 13 | # Comments 14 | Below are some comments about the source code here and the original article. 15 | 16 | ## What to do with the generated code 17 | The generated assembly code is devoted to x86 architecture. The syntax is AT&T 18 | style and the experiment operating system is Linux. You may try to save the 19 | generated code to for example "test.s" and execute the following commands to 20 | check the output. 21 | ``` 22 | as --32 -o test.o test.s 23 | ld -m elf_i386 test.o -o test 24 | ./test 25 | echo $? 26 | ``` 27 | 28 | ## Assembly code 29 | It is a C port of the original article which is written in Pascal. And the 30 | generated code are ported to x86 instead of 86000. 31 | 32 | If you want to test the generated code, please keep in mind that the generated 33 | code might be incomplete to be directly assembled. For example, when loading a 34 | variable, we directly generate Assembly code "movl X, %eax", and variable 'X' 35 | might not be declared since the article is far from mentioning "variable types". 36 | Thus you'll have to type the missing parts all by yourself. 37 | 38 | ## IO routines 39 | I am definitely NOT an assembly expert. When the author mentioned adding IO 40 | routine by library, I cannot simply find the x86 alternative. The closest 41 | thing is C's library function "printf". But then I decided not no bother 42 | adding this kind of routine cause they are only used in two chapters. 43 | 44 | Instead, I'll save the value as the return code of a process. It's done by 45 | saving the value to register "%ebx" and call "int $0x80", as follows: 46 | ``` 47 | movl var, %ebx 48 | movl $1, %eax # exit function 49 | int $0x80 50 | ``` 51 | 52 | -Note from random github user- 53 | 54 | On older operating systems IO was performed by an interrupt to an area of 55 | memory that contains instructions for performing the operation with your 56 | particular hardware configuration. This left the developers of the operating 57 | systems unable to change the memory layout without forcing all the software 58 | written to be modified as well. 59 | 60 | The POSIX standard on UNIX-like operating systems as well as Windows and 61 | Macintosh operating systems, started including libraries for C or Pascal 62 | with functions for system calls. This means you will either have to compile 63 | for DOS or learn some specialized things about your operating system and 64 | the format of C libraries. 65 | 66 | For anyone on Windows I would recommend getting a used copy of Charles 67 | Petzold's Programming Windows Fifth Edition and checking out 68 | http://www.godevtool.com for some free tools and tutorials for accessing 69 | Windows API from assembly. 70 | 71 | -End of note- 72 | 73 | ## The Article 74 | The article mainly talks about how to design compiler top-down. It covered a 75 | lot of aspects in compiler design like lexical scanning, BNF, symbols, 76 | procedures, types, code generation, etc. 77 | 78 | It is a good article to follow for beginners. 79 | -------------------------------------------------------------------------------- /get_chapters.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | URL="http://compilers.iecc.com/crenshaw/" 5 | 6 | for i in $(seq 1 16); do 7 | 8 | file="tutor"$i".txt" 9 | mkdir -p $i 10 | cd $i 11 | 12 | if [ ! -f $file ]; 13 | then 14 | wget $URL""$i 15 | fi 16 | 17 | cd .. 18 | echo $i; done; 19 | --------------------------------------------------------------------------------