├── .gitignore
├── Makefile
├── README.md
├── hello.mini
├── minilang.c
└── minilang.mini


/.gitignore:
--------------------------------------------------------------------------------
1 | minilang
2 | minilang_jr
3 | minilang.s
4 | minilang_jr.s
5 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all:
 3 | 	c99 minilang.c -o minilang -g -Wall
 4 | 
 5 | 
 6 | bootstrap: minilang
 7 | 	./minilang minilang.mini minilang.s
 8 | 	gcc minilang.s -o minilang_jr
 9 | 	./minilang_jr minilang.mini minilang_jr.s
10 | 	diff minilang.s minilang_jr.s
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # minilang
 2 | 
 3 | Inspired by [BASICO](http://www.andreadrian.de/tbng/index.html), minilang is a
 4 | little programming language. My intention was to keep the implementation brief,
 5 | yet readable. It is bootstrapped, thus proving to be reasonably expressive. The
 6 | compiler outputs x86-64 ASM code.  Similar to Python, code blocks are formed
 7 | via indentation. There is no type checking whatsoever, so be careful.  Please,
 8 | look into `minilang.mini` and you will quickly get the idea.  Obligatorily,
 9 | here is your first programme:
10 | 
11 | 	main():puts("Hello, world!")
12 | 
13 | Compile the throwaway compiler:
14 | 
15 | 	$ c99 minilang.c -o minilang
16 | 
17 | Bootstrap thus:
18 | 
19 | 	$ ./minilang minilang.mini minilang.s       # compile with throwaway compiler
20 | 	$ gcc minilang.s -o minilang_jr             # GCC assembles the executable
21 | 	$ ./minilang_jr minilang.mini minilang_jr.s # compile with bootstrapped compiler
22 | 	$ diff minilang.s minilang_jr.s             # compare output -> equal
23 | 
24 | 


--------------------------------------------------------------------------------
/hello.mini:
--------------------------------------------------------------------------------
1 | main():puts("Hello, world!")
2 | 


--------------------------------------------------------------------------------
/minilang.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <stdarg.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | #include <assert.h>
  7 | 
  8 | 
  9 | enum {
 10 | 	LEX_EOF = EOF,
 11 | 	LEX_ASM = 0,
 12 | 	LEX_IF,
 13 | 	LEX_ELSE,
 14 | 	LEX_ELIF,
 15 | 	LEX_WHILE,
 16 | 	LEX_BREAK,
 17 | 	LEX_CONTINUE,
 18 | 	LEX_RETURN,
 19 | 	LEX_VAR,
 20 | 	LEX_KEYWORD_COUNT,
 21 | 	LEX_BLOCK_END,
 22 | 	LEX_CHAR,
 23 | 	LEX_STRING,
 24 | 	LEX_NUMBER,
 25 | 	LEX_IDENT,
 26 | 	LEX_ASM_LINE,
 27 | 	LEX_LE,
 28 | 	LEX_GE,
 29 | 	LEX_EQ,
 30 | 	LEX_NE,
 31 | 	LEX_SIZE
 32 | };
 33 | 
 34 | const char* keywords[] = {
 35 | 	"asm",
 36 | 	"if",
 37 | 	"else",
 38 | 	"elif",
 39 | 	"while",
 40 | 	"break",
 41 | 	"continue",
 42 | 	"return",
 43 | 	"var",
 44 | 	NULL,
 45 | 	"block end",
 46 | 	"character",
 47 | 	"string",
 48 | 	"number",
 49 | 	"identifier",
 50 | };
 51 | 
 52 | 
 53 | //	scanner
 54 | int			character;
 55 | int			lexeme;
 56 | char		token[1024];
 57 | long long	number;
 58 | int			neg_number;
 59 | int			line_number = 0;
 60 | int			cursor_pos = 0;
 61 | 
 62 | int			brackets = 0;
 63 | int			block = 0;
 64 | int			indent = 0;
 65 | int			newline = 1;
 66 | int			asm_active = 0;
 67 | 
 68 | FILE*		src_file;
 69 | FILE*		dst_file;
 70 | 
 71 | 
 72 | void error(char* msg, ...) {
 73 | 	fprintf(stderr, "%d:%d: error: ", line_number, cursor_pos);
 74 | 	va_list args;
 75 | 	va_start(args, msg);
 76 | 	vfprintf(stderr, msg, args);
 77 | 	va_end(args);
 78 | 	fprintf(stderr, "\n");
 79 | 	exit(1);
 80 | }
 81 | 
 82 | 
 83 | void output(char* msg, ...) {
 84 | 	va_list args;
 85 | 	va_start(args, msg);
 86 | 	vfprintf(dst_file, msg, args);
 87 | 	va_end(args);
 88 | }
 89 | 
 90 | 
 91 | int read_char() {
 92 | 	int c = character;
 93 | 	character = fgetc(src_file);
 94 | 	cursor_pos++;
 95 | 	if(character == '\n') {
 96 | 		line_number++;
 97 | 		cursor_pos = 0;
 98 | 	}
 99 | 	return c;
100 | }
101 | 
102 | 
103 | int scan() {
104 | space:
105 | 	while(isspace(character)) {
106 | 		if(newline) {
107 | 			if(character == ' ') indent++;
108 | 			if(character == '\t') indent = (indent & ~3) + 4;
109 | 		}
110 | 		if(character == '\n') {
111 | 			indent = 0;
112 | 			int n = newline;
113 | 			newline = 1;
114 | 			if(n == 0 && brackets == 0) return ';';
115 | 		}
116 | 		read_char();
117 | 	}
118 | 
119 | 	// ignore comment
120 | 	if(character == '#') {
121 | 		while(character != '\n') read_char();
122 | 		goto space;
123 | 	}
124 | 
125 | 	// indent
126 | 	if(!brackets) {
127 | 		if(indent > block) error("invalid indentation");
128 | 		if(indent < block) {
129 | 			asm_active = 0;
130 | 			block -= 4;
131 | 			return LEX_BLOCK_END;
132 | 		}
133 | 	}
134 | 
135 | 	// asm line
136 | 	if(asm_active) {
137 | 		int i = 0;
138 | 		while(character != '\n') {
139 | 			token[i] = read_char();
140 | 			i++;
141 | 		}
142 | 		token[i] = '\0';
143 | 		return LEX_ASM_LINE;
144 | 	}
145 | 
146 | 	newline = 0;
147 | 	// one character token
148 | 	if(strchr("-+*/%&|~!=<>;:()[],@{}", character)) {
149 | 		int c = read_char();
150 | 		if(c == ':') {	// new block
151 | 			block += 4;
152 | 			indent += 4;
153 | 		}
154 | 		else if(strchr("<>!=", c) && character == '=') {
155 | 			read_char();
156 | 			switch(c) {
157 | 			case '<': return LEX_LE;
158 | 			case '>': return LEX_GE;
159 | 			case '=': return LEX_EQ;
160 | 			case '!': return LEX_NE;
161 | 			}
162 | 		}
163 | 		else if(c == '(' || c == '[') brackets++;
164 | 		else if(c == ')' || c == ']') brackets--;
165 | 		if(isdigit(character)) neg_number = (c == '-');
166 | 		return c;
167 | 	}
168 | 
169 | 	// char
170 | 	if(character == '\'') {
171 | 		read_char();
172 | 		token[0] = '\'';
173 | 		int i = 1;
174 | 		if(character == '\\') token[i++] = read_char();
175 | 		token[i++] = read_char();
176 | 		token[i++] = '\'';
177 | 		token[i] = '\0';
178 | 		if(read_char() != '\'') error("bad character literal");
179 | 		return LEX_CHAR;
180 | 	}
181 | 
182 | 	// string
183 | 	if(character == '"') {
184 | 		int i = 0;
185 | 		do {
186 | 			if(character == '\\') token[i++] = read_char();
187 | 			token[i++] = read_char();
188 | 			if(i > 1020) error("string too long");
189 | 		} while(character != '"');
190 | 		token[i++] = read_char();
191 | 		token[i] = '\0';
192 | 		return LEX_STRING;
193 | 	}
194 | 
195 | 	// number
196 | 	if(isdigit(character)) {
197 | 		int i = 0;
198 | 		do {
199 | 			token[i++] = read_char();
200 | 			if(i > 20) error("number too long");
201 | 		} while(isdigit(character));
202 | 		token[i] = '\0';
203 | 		number = atoll(token);
204 | 		return LEX_NUMBER;
205 | 	}
206 | 
207 | 	// identifier and keywords
208 | 	if(isalpha(character) || character == '_') {
209 | 		int i = 0;
210 | 		do {
211 | 			token[i++] = read_char();
212 | 			if(i > 62) error("identifier too long");
213 | 		} while(isalnum(character) || character == '_');
214 | 		token[i] = '\0';
215 | 
216 | 		// check for keywords
217 | 		for(i = 0; i < LEX_KEYWORD_COUNT; i++) {
218 | 			if(strcmp(token, keywords[i]) == 0) return i;
219 | 		}
220 | 		return LEX_IDENT;
221 | 	}
222 | 
223 | 	if(character != EOF) error("unknown character");
224 | 	if(block > 0) {
225 | 		block -= 4;
226 | 		return LEX_BLOCK_END;
227 | 	}
228 | 	return LEX_EOF;
229 | }
230 | 
231 | 
232 | void read_lexeme() { lexeme = scan(); }
233 | 
234 | 
235 | void expect(int l) {
236 | 	if(lexeme != l) {
237 | 		if(l < LEX_SIZE) error("%s expected", keywords[l]);
238 | 		else error("<%c> expected", l);
239 | 	}
240 | 	read_lexeme();
241 | }
242 | 
243 | 
244 | // symbol table
245 | typedef struct {
246 | 	char	name[64];
247 | 	int		offset;
248 | } Variable;
249 | 
250 | Variable	locals[1024];
251 | int			local_count;
252 | 
253 | 
254 | // code generation
255 | const char* call_regs[] = { "rdi", "rsi", "rdx", "rcx", "r8", "r9" };
256 | const char*	regs[] = { "r8", "r9", "r11", "rax" };
257 | enum {
258 | 			cache_size = sizeof(regs) / sizeof(char*)
259 | };
260 | int			cache[cache_size];
261 | int			stack_size;
262 | int			label = 0;
263 | int			while_labels[256];
264 | int			while_level = -1;
265 | 
266 | 
267 | const char* regname(int i) { return regs[cache[i]]; }
268 | 
269 | 
270 | void init_cache() {
271 | 	for(int i = 0; i < cache_size; i++) cache[i] = i;
272 | 	stack_size = 0;
273 | }
274 | 
275 | 
276 | Variable* lookup_local() {
277 | 	for(int i = 0; i < local_count; i++) {
278 | 		if(strcmp(token, locals[i].name) == 0) return &locals[i];
279 | 	}
280 | 	return NULL;
281 | }
282 | 
283 | 
284 | void add_local(int offset) {
285 | 	for(int i = 0; i < 1024; i++) {
286 | 		if(i == local_count) {
287 | 			strcpy(locals[i].name, token);
288 | 			locals[i].offset = offset;
289 | 			local_count++;
290 | 			return;
291 | 		}
292 | 		if(strcmp(token, locals[i].name) == 0) error("multiple declarations");
293 | 	}
294 | 	error("too many variables");
295 | }
296 | 
297 | 
298 | void push() {
299 | 	int i = cache_size - 1;
300 | 	int tmp = cache[i];
301 | 	if(stack_size >= cache_size) output("\tpush %s\n", regs[tmp]);
302 | 	while(i > 0) {
303 | 		cache[i] = cache[i - 1];
304 | 		i--;
305 | 	}
306 | 	cache[0] = tmp;
307 | 	stack_size++;
308 | }
309 | 
310 | 
311 | void pop() {
312 | 	stack_size--;
313 | 	if(stack_size == 0) init_cache();
314 | 	else {
315 | 		int i = 0;
316 | 		int tmp = cache[0];
317 | 		while(i < cache_size - 1) {
318 | 			cache[i] = cache[i + 1];
319 | 			i++;
320 | 		}
321 | 		cache[i] = tmp;
322 | 		if(stack_size >= cache_size) output("\tpop %s\n", regs[i]);
323 | 	}
324 | }
325 | 
326 | 
327 | int is_expr_beginning() {
328 | 	static const int lexemes[] = {
329 | 		'-', '!', '(', LEX_NUMBER, LEX_CHAR, LEX_STRING, LEX_IDENT
330 | 	};
331 | 	for(int i = 0; i < sizeof(lexemes) / sizeof(int); i++)
332 | 		if(lexeme == lexemes[i]) return 1;
333 | 	return 0;
334 | }
335 | 
336 | 
337 | int is_stmt_beginning() {
338 | 	static const int lexemes[] = {
339 | 		LEX_ASM, LEX_IF, LEX_WHILE, LEX_BREAK, LEX_CONTINUE, LEX_RETURN, ';'
340 | 	};
341 | 	for(int i = 0; i < sizeof(lexemes) / sizeof(int); i++)
342 | 		if(lexeme == lexemes[i]) return 1;
343 | 	return is_expr_beginning();
344 | }
345 | 
346 | 
347 | void expression();
348 | void expr_level_zero() {
349 | 
350 | 	if(lexeme == '!') {
351 | 		read_lexeme();
352 | 		expr_level_zero();
353 | 		output("\ttest %s, %s\n", regname(0), regname(0));
354 | 		output("\tsetz cl\n");
355 | 		output("\tmovzx %s, cl\n", regname(0));
356 | 		return;
357 | 	}
358 | 	if(lexeme == '-') {
359 | 		if(!neg_number) {
360 | 			read_lexeme();
361 | 			expr_level_zero();
362 | 			output("\tneg %s\n", regname(0));
363 | 			return;
364 | 		}
365 | 		read_lexeme();
366 | 		push();
367 | 		output("\tmov %s, %ld\n", regname(0), -number);
368 | 		read_lexeme();
369 | 	}
370 | 	else if(lexeme == LEX_NUMBER) {
371 | 		push();
372 | 		output("\tmov %s, %ld\n", regname(0), number);
373 | 		read_lexeme();
374 | 	}
375 | 	else if(lexeme == LEX_CHAR) {
376 | 		push();
377 | 		output("\tmov %s, %s\n", regname(0), token);
378 | 		read_lexeme();
379 | 	}
380 | 	else if(lexeme == '(') {
381 | 		read_lexeme();
382 | 		expression();
383 | 		expect(')');
384 | 	}
385 | 	else if(lexeme == LEX_IDENT) {
386 | 		char name[64];
387 | 		strcpy(name, token);
388 | 		Variable* v = lookup_local();
389 | 
390 | 		read_lexeme();
391 | 		if(lexeme == '(') {	// function call
392 | 			// save used regs on stack
393 | 			int i = stack_size;
394 | 			if(i > cache_size) i = cache_size;
395 | 			while(i-- > 0) output("\tpush %s\n", regname(i));
396 | 
397 | 			int old_size = stack_size;
398 | 			stack_size = 0;
399 | 
400 | 			// expr list
401 | 			int args = 0;
402 | 			read_lexeme();
403 | 			if(is_expr_beginning()) {
404 | 				args++;
405 | 				expression();
406 | 				output("\tpush %s\n", regname(0));
407 | 				pop();
408 | 				while(lexeme == ',') {
409 | 					read_lexeme();
410 | 					args++;
411 | 					if(args > 6) error("too many arguments");
412 | 					expression();
413 | 					output("\tpush %s\n", regname(0));
414 | 					pop();
415 | 				}
416 | 			}
417 | 			expect(')');
418 | 
419 | 			// set-up registers
420 | 			for(int i = args - 1; i >= 0; i--) {
421 | 				output("\tpop %s\n", call_regs[i]);
422 | 			}
423 | 
424 | 			// call
425 | 			output("\txor rax, rax\n");
426 | 			output("\tcall %s@PLT\n", name);
427 | 
428 | 			init_cache();
429 | 			push();
430 | 			stack_size = old_size + 1;
431 | 			int m = stack_size;
432 | 			if(m > cache_size) m = cache_size;
433 | 			for(i = 1; i < m; i++) {
434 | 				output("\tpop %s\n", regname(i));
435 | 			}
436 | 		}
437 | 		else if(lexeme == '=') {
438 | 			read_lexeme();
439 | 			expression();
440 | 			if(!v)
441 | 				output("\tmov %s[rip], %s\n", name, regname(0));
442 | 			else
443 | 				output("\tmov QWORD PTR [rbp - %d], %s\n", v->offset, regname(0));
444 | 		}
445 | 		else {
446 | 			push();
447 | 			if(!v)
448 | 				output("\tmov %s, %s[rip]\n", regname(0), name);
449 | 			else
450 | 				output("\tmov %s, QWORD PTR [rbp - %d]\n", regname(0), v->offset);
451 | 		}
452 | 	}
453 | 	else if(lexeme == '@') {
454 | 		// dereference
455 | 		error("not implementet yet");
456 | 	}
457 | 	else if(lexeme == LEX_STRING) {
458 | 		push();
459 | 		output("\t.section .rodata\n");
460 | 		output("LC%d:\n", label);
461 | 		output("\t.string %s\n", token);
462 | 		output("\t.text\n");
463 | 		output("\tlea %s, LC%d[rip]\n", regname(0), label);
464 | 		label++;
465 | 		read_lexeme();
466 | 	}
467 | 	else error("bad expression");
468 | 
469 | 	while(lexeme == '[') {
470 | 		read_lexeme();
471 | 		expression();
472 | 		expect(']');
473 | 		if(lexeme == '=') {
474 | 			read_lexeme();
475 | 			expression();
476 | 			output("\tmov QWORD PTR [%s + %s * 8], %s\n", regname(2), regname(1), regname(0));
477 | 			int tmp = cache[2];
478 | 			cache[2] = cache[0];
479 | 			cache[0] = tmp;
480 | 			pop();
481 | 			pop();
482 | 			return;
483 | 		}
484 | 		output("\tmov %s, QWORD PTR [%s + %s * 8]\n", regname(1), regname(1), regname(0));
485 | 		pop();
486 | 	}
487 | 	if(lexeme == '{') {
488 | 		read_lexeme();
489 | 		expression();
490 | 		expect('}');
491 | 		if(lexeme == '=') {
492 | 			read_lexeme();
493 | 			expression();
494 | 			output("\tmov rcx, %s\n", regname(0));
495 | 			output("\tmov BYTE PTR [%s + %s], cl\n", regname(2), regname(1));
496 | 			int tmp = cache[2];
497 | 			cache[2] = cache[0];
498 | 			cache[0] = tmp;
499 | 			pop();
500 | 			pop();
501 | 			return;
502 | 		}
503 | 		output("\tmov cl, BYTE PTR [%s + %s]\n", regname(1), regname(0));
504 | 		output("\tmovzx %s, cl\n", regname(1));
505 | 		pop();
506 | 	}
507 | 
508 | }
509 | 
510 | 
511 | void expr_level_one() {
512 | 	expr_level_zero();
513 | 	while(strchr("*%/", lexeme)) {
514 | 		if(lexeme == '*') {
515 | 			read_lexeme();
516 | 			expr_level_zero();
517 | 			output("\timul %s, %s\n", regname(1), regname(0));
518 | 			pop();
519 | 		}
520 | 		else if(lexeme == '%') {
521 | 			error("TODO");
522 | 		}
523 | 		else if(lexeme == '/') {
524 | 			error("TODO");
525 | 		}
526 | 	}
527 | }
528 | 
529 | 
530 | void expr_level_two() {
531 | 	expr_level_one();
532 | 	while(strchr("+-", lexeme)) {
533 | 		if(lexeme == '+') {
534 | 			read_lexeme();
535 | 			expr_level_one();
536 | 			output("\tadd %s, %s\n", regname(1), regname(0));
537 | 			pop();
538 | 		}
539 | 		else if(lexeme == '-') {
540 | 			read_lexeme();
541 | 			expr_level_one();
542 | 			output("\tsub %s, %s\n", regname(1), regname(0));
543 | 			pop();
544 | 		}
545 | 	}
546 | }
547 | 
548 | 
549 | void expr_level_three() {
550 | 	expr_level_two();
551 | 	char* comp;
552 | 	switch(lexeme) {
553 | 	case '<': comp = "l"; break;
554 | 	case '>': comp = "g"; break;
555 | 	case LEX_LE: comp = "le"; break;
556 | 	case LEX_GE: comp = "ge"; break;
557 | 	case LEX_EQ: comp = "e"; break;
558 | 	case LEX_NE: comp = "ne"; break;
559 | 	default: return;
560 | 	}
561 | 	read_lexeme();
562 | 	expr_level_two();
563 | 	output("\tcmp %s, %s\n", regname(1), regname(0));
564 | 	output("\tset%s cl\n", comp);
565 | 	output("\tmovzx %s, cl\n", regname(1));
566 | 	pop();
567 | }
568 | 
569 | 
570 | void expr_level_four() {
571 | 	expr_level_three();
572 | 	while(lexeme == '&') {
573 | 		read_lexeme();
574 | 		expr_level_three();
575 | 		output("\tand %s, %s\n", regname(1), regname(0));
576 | 		pop();
577 | 	}
578 | }
579 | 
580 | 
581 | void expression() {
582 | 	expr_level_four();
583 | 	while(lexeme == '|') {
584 | 		read_lexeme();
585 | 		expr_level_four();
586 | 		output("\tor %s, %s\n", regname(1), regname(0));
587 | 		pop();
588 | 	}
589 | }
590 | 
591 | 
592 | void statement();
593 | void statement_list() {
594 | 	while(is_stmt_beginning()) statement();
595 | }
596 | 
597 | 
598 | void statement() {
599 | 	if(lexeme == LEX_ASM) {
600 | 		read_lexeme();
601 | 		asm_active = 1;
602 | 		newline = 1;
603 | 		expect(':');
604 | 		while(lexeme == LEX_ASM_LINE) {
605 | 			if(lexeme == LEX_ASM_LINE) output("\t%s\n", token);
606 | 			read_lexeme();
607 | 		}
608 | 		expect(LEX_BLOCK_END);
609 | 	}
610 | 	else if(lexeme == LEX_IF) {
611 | 		read_lexeme();
612 | 		expression();
613 | 		expect(':');
614 | 		int l_end = label++;
615 | 		int l_next = label++;
616 | 		int end = 0;
617 | 		output("\ttest %s, %s\n", regname(0), regname(0));
618 | 		output("\tjz .L%d\n", l_next);
619 | 		init_cache();
620 | 		statement_list();
621 | 		expect(LEX_BLOCK_END);
622 | 		if(lexeme == LEX_ELIF || lexeme == LEX_ELSE) {
623 | 			output("\tjmp .L%d\n", l_end);
624 | 			end = 1;
625 | 		}
626 | 		output(".L%d:\n", l_next);
627 | 		while(lexeme == LEX_ELIF) {
628 | 			read_lexeme();
629 | 			expression();
630 | 			expect(':');
631 | 			l_next = label++;
632 | 			output("\ttest %s, %s\n", regname(0), regname(0));
633 | 			output("\tjz .L%d\n", l_next);
634 | 			init_cache();
635 | 			statement_list();
636 | 			expect(LEX_BLOCK_END);
637 | 			if(lexeme == LEX_ELIF || lexeme == LEX_ELSE)
638 | 				output("\tjmp .L%d\n", l_end);
639 | 			output(".L%d:\n", l_next);
640 | 		}
641 | 		if(lexeme == LEX_ELSE) {
642 | 			read_lexeme();
643 | 			expect(':');
644 | 			init_cache();
645 | 			statement_list();
646 | 			expect(LEX_BLOCK_END);
647 | 		}
648 | 		if(end) output(".L%d:\n", l_end);
649 | 	}
650 | 	else if(lexeme == LEX_WHILE) {
651 | 		read_lexeme();
652 | 		while_level++;
653 | 		if(while_level == 256) error("while nesting limit exceeded");
654 | 		while_labels[while_level] = label;
655 | 		label += 2;
656 | 		output(".L%d:\n", while_labels[while_level]);
657 | 		expression();
658 | 		expect(':');
659 | 		output("\ttest %s, %s\n", regname(0), regname(0));
660 | 		output("\tjz .L%d\n", while_labels[while_level] + 1);
661 | 		init_cache();
662 | 		statement_list();
663 | 		expect(LEX_BLOCK_END);
664 | 		output("\tjmp .L%d\n", while_labels[while_level]);
665 | 		output(".L%d:\n", while_labels[while_level] + 1);
666 | 		while_level--;
667 | 	}
668 | 	else if(lexeme == LEX_BREAK) {
669 | 		read_lexeme();
670 | 		if(while_level < 0) error("break without while");
671 | 		output("\tjmp .L%d\n", while_labels[while_level] + 1);
672 | 	}
673 | 	else if(lexeme == LEX_CONTINUE) {
674 | 		read_lexeme();
675 | 		if(while_level < 0) error("continue without while");
676 | 		output("\tjmp .L%d\n", while_labels[while_level]);
677 | 	}
678 | 	else if(lexeme == LEX_RETURN) {
679 | 		read_lexeme();
680 | 		if(is_expr_beginning()) {
681 | 			expression();
682 | 			if(strcmp(regname(0), "rax") != 0)
683 | 				output("\tmov rax, %s\n", regname(0));
684 | 			pop();
685 | 		}
686 | 		output("\tleave\n");
687 | 		output("\tret\n");
688 | 	}
689 | 	else if(is_expr_beginning()) {
690 | 		expression();
691 | 		pop();
692 | 	}
693 | 	else expect(';');
694 | }
695 | 
696 | 
697 | void minilang() {
698 | 	line_number = 1;
699 | 	read_char();
700 | 	read_lexeme();
701 | 
702 | 	output("\t.intel_syntax noprefix\n");
703 | 	output("\t.text\n");
704 | 
705 | 	while(lexeme != LEX_EOF) {
706 | 
707 | 		// global variables
708 | 		while(lexeme == LEX_VAR) {
709 | 			read_lexeme();
710 | 			expect(LEX_IDENT);
711 | 		    output("\t.comm %s, 8, 8\n", token);
712 | 			while(lexeme == ',') {
713 | 				read_lexeme();
714 | 				expect(LEX_IDENT);
715 |     			output("\t.comm %s, 8, 8\n", token);
716 | 			}
717 | 			while(lexeme == ';') read_lexeme();
718 | 		}
719 | 
720 | 		expect(LEX_IDENT);
721 | 		output("\t.globl %s\n", token);
722 | 		output("%s:\n", token);
723 | 		output("\tpush rbp\n");
724 | 		output("\tmov rbp, rsp\n");
725 | 
726 | 		int frame = 0;
727 | 		local_count = 0;
728 | 
729 | 		// parameter list
730 | 		int params = 0;
731 | 		expect('(');
732 | 		if(lexeme == LEX_IDENT) {
733 | 			params++;
734 | 			expect(LEX_IDENT);
735 | 			frame += 8;
736 | 			add_local(frame);
737 | 			while(lexeme == ',') {
738 | 				read_lexeme();
739 | 				params++;
740 | 				if(params > 6) error("too many arguments");
741 | 				expect(LEX_IDENT);
742 | 				frame += 8;
743 | 				add_local(frame);
744 | 			}
745 | 		}
746 | 		expect(')');
747 | 		expect(':');
748 | 
749 | 		// local variables
750 | 		while(lexeme == ';') read_lexeme();
751 | 		while(lexeme == LEX_VAR) {
752 | 			read_lexeme();
753 | 			expect(LEX_IDENT);
754 | 			frame += 8;
755 | 			add_local(frame);
756 | 			while(lexeme == ',') {
757 | 				read_lexeme();
758 | 				expect(LEX_IDENT);
759 | 				frame += 8;
760 | 				add_local(frame);
761 | 			}
762 | 			while(lexeme == ';') read_lexeme();
763 | 		}
764 | 
765 | 		if(frame > 0) output("\tsub rsp, %d\n", frame);
766 | 		for(int i = 0; i < params; i++) {
767 | 			output("\tmov QWORD PTR [rbp - %d], %s\n", i * 8 + 8, call_regs[i]);
768 | 		}
769 | 
770 | 		init_cache();
771 | 		statement_list();
772 | 		output("\tleave\n");
773 | 		output("\tret\n");
774 | 		expect(LEX_BLOCK_END);
775 | 
776 | 	}
777 | }
778 | 
779 | 
780 | void cleanup() {
781 | 	if(src_file) fclose(src_file);
782 | 	if(dst_file && dst_file != stdin) fclose(dst_file);
783 | }
784 | 
785 | 
786 | int main(int argc, char** argv) {
787 | 
788 | 	if(argc < 2 || argc > 3) {
789 | 		printf("usuage: %s <source> [output]\n", argv[0]);
790 | 		exit(0);
791 | 	}
792 | 
793 | 	src_file = fopen(argv[1], "r");
794 | 	if(!src_file) error("opening source file failed");
795 | 
796 | 	if(argc == 3) {
797 | 		dst_file = fopen(argv[2], "w");
798 | 		if(!dst_file) error("opening output file failed");
799 | 	}
800 | 	else dst_file = stdout;
801 | 	atexit(cleanup);
802 | 	minilang();
803 | 	return 0;
804 | }
805 | 
806 | 


--------------------------------------------------------------------------------
/minilang.mini:
--------------------------------------------------------------------------------
  1 | # minilang compiler in minilang
  2 | 
  3 | 
  4 | var src_file, dst_file
  5 | 
  6 | # scanner
  7 | var line_number, cursor_pos
  8 | var character, lexeme
  9 | var token
 10 | var number, is_negative
 11 | var brackets, block, indent, newline, asm_active
 12 | var keywords
 13 | 
 14 | 
 15 | error(msg, a1, a2, a3, a4):
 16 | 	fprintf(stderr, "%d:%d: error: ", line_number, cursor_pos)
 17 | 	fprintf(stderr, msg, a1, a2, a3, a4)
 18 | 	fprintf(stderr, "\n")
 19 | 	exit(1)
 20 | 
 21 | 
 22 | output(msg, a1, a2, a3, a4): fprintf(dst_file, msg, a1, a2, a3, a4)
 23 | 
 24 | 
 25 | read_char():
 26 | 	var c
 27 | 	c = character
 28 | 	character = fgetc(src_file)
 29 | 	cursor_pos = cursor_pos + 1
 30 | 	if character == '\n':
 31 | 		line_number = line_number + 1
 32 | 		cursor_pos = 0
 33 | 	return c
 34 | 
 35 | 
 36 | scan():
 37 | 	var i
 38 | 
 39 | 	while 1:
 40 | 		# whitespace
 41 | 		while isspace(character):
 42 | 			if newline:
 43 | 				if character == ' ': indent = indent + 1
 44 | 				elif character == '\t': indent = (indent & -4) + 4
 45 | 			if character == '\n':
 46 | 				indent = 0
 47 | 				i = newline
 48 | 				newline = 1
 49 | 				if i == 0 & brackets == 0: return ';'
 50 | 			read_char()
 51 | 
 52 | 		# comment
 53 | 		if character == '#':
 54 | 			while character != '\n': read_char()
 55 | 		else: break
 56 | 
 57 | 	# indent
 58 | 	if !brackets:
 59 | 		if indent > block: error("invalid indentation")
 60 | 		if indent < block:
 61 | 			asm_active = 0
 62 | 			block = block - 4
 63 | 			return 'E'
 64 | 
 65 | 	# asm line
 66 | 	if asm_active:
 67 | 		i = 0
 68 | 		while character != '\n':
 69 | 			token{i} = read_char()
 70 | 			i = i + 1
 71 | 		token{i} = 0
 72 | 		return 'A'
 73 | 
 74 | 	newline = 0
 75 | 
 76 | 	if character == ':':
 77 | 		read_char()
 78 | 		block = block + 4
 79 | 		indent = indent + 4
 80 | 		return ':'
 81 | 
 82 | 	if strchr("-+*&|!=<>;()[]{},", character):
 83 | 		i = read_char()
 84 | 		if strchr("<>=!", i) != 0 & character == '=':
 85 | 			read_char()
 86 | 			if i == '<': return 'L'
 87 | 			if i == '>': return 'M'
 88 | 			if i == '=': return 'Q'
 89 | 			if i == '!': return 'T'
 90 | 		if i == '[' | i == '(': brackets = brackets + 1
 91 | 		elif i == ']' | i == ')': brackets = brackets - 1
 92 | 		if isdigit(character): is_negative = (i == '-')
 93 | 		return i
 94 | 
 95 | 	# char
 96 | 	if character == '\'':
 97 | 		token{0} = read_char()
 98 | 		i = 1
 99 | 		if character == '\\': token{1} = read_char(); i = 2
100 | 		token{i} = read_char()
101 | 		token{i + 1} = '\''
102 | 		token{i + 2} = 0
103 | 		if read_char() != '\'': error("bad character literal")
104 | 		return 'C'
105 | 
106 | 	# string
107 | 	if character == '"':
108 | 		i = 0
109 | 		while 1:
110 | 			if character == '\\': token{i} = read_char(); i = i + 1
111 | 			token{i} = read_char(); i = i + 1
112 | 			if i > 1020: error("string too long")
113 | 			if character == '"': break
114 | 		token{i} = read_char()
115 | 		token{i + 1} = 0
116 | 		return 'S'
117 | 
118 | 	# number
119 | 	if isdigit(character):
120 | 		i = 0
121 | 		while isdigit(character):
122 | 			token{i} = read_char()
123 | 			i = i + 1
124 | 			if i > 20: error("number too long")
125 | 		token{i} = 0
126 | 		number = atoll(token)
127 | 		return 'N'
128 | 
129 | 	# identifier/keyword
130 | 	if isalpha(character) | character == '_':
131 | 		token{0} = read_char()
132 | 		i = 1
133 | 		while isalnum(character) | character == '_':
134 | 			token{i} = read_char()
135 | 			i = i + 1
136 | 			if i > 62: error("identifier too long")
137 | 		token{i} = 0
138 | 
139 | 		# check for keyword
140 | 		i = 0
141 | 		while i < 9:
142 | 			if strcmp(token, keywords[i]) == 0: return "ailfwbcrv"{i}
143 | 			i = i + 1
144 | 		return 'I'
145 | 
146 | 	if character < 0: error("unknown character")
147 | 	if block > 0: block = block - 4; return 'E'
148 | 	return '0'
149 | 
150 | 
151 | read_lexeme(): lexeme = scan()
152 | 
153 | 
154 | expect(l):
155 | 	if lexeme != l: error("read <%c>, but <%c> expected", lexeme, l)
156 | 	read_lexeme()
157 | 
158 | 
159 | # parser
160 | var local_count, local_names, local_offsets
161 | var cache, stack_size, frame
162 | var label, while_labels, while_level
163 | var regs, callregs
164 | 
165 | 
166 | regname(i): return regs[cache[i]]
167 | 
168 | 
169 | init_cache():
170 | 	var i; i = 0
171 | 	while i < 4: cache[i] = i; i = i + 1
172 | 	stack_size = 0
173 | 
174 | 
175 | lookup_local():
176 | 	var i; i = 0
177 | 	while i < local_count:
178 | 		if strcmp(token, local_names[i]) == 0: return i
179 | 		i = i + 1
180 | 	return -1
181 | 
182 | 
183 | add_local():
184 | 	var i
185 | 	frame = frame + 8
186 | 	i = 0
187 | 	while i < 256:
188 | 		if i == local_count:
189 | 			strcpy(local_names[i], token)
190 | 			local_offsets[i] = frame
191 | 			local_count = local_count + 1
192 | 			return
193 | 		if strcmp(token, local_names[i]) == 0: error("multiple declarations")
194 | 		i = i + 1
195 | 	error("too many variables")
196 | 
197 | 
198 | push():
199 | 	var i, tmp
200 | 	i = 3
201 | 	tmp = cache[i]
202 | 	if stack_size >= 4: output("\tpush %s\n", regs[tmp])
203 | 	while i > 0:
204 | 		cache[i] = cache[i - 1]
205 | 		i = i - 1
206 | 	cache[0] = tmp
207 | 	stack_size = stack_size + 1
208 | 
209 | 
210 | pop():
211 | 	var i, tmp
212 | 	stack_size = stack_size - 1
213 | 	if stack_size == 0: init_cache()
214 | 	else:
215 | 		i = 0
216 | 		tmp = cache[0]
217 | 		while i < 3:
218 | 			cache[i] = cache[i + 1]
219 | 			i = i + 1
220 | 		cache[i] = tmp
221 | 		if stack_size >= 4: output("\tpop %s\n", regs[i])
222 | 
223 | 
224 | is_expr_beginning(): return strchr("-!(NCSI", lexeme) != 0
225 | is_stmt_beginning(): return is_expr_beginning() | strchr("aiwbcr;", lexeme) != 0
226 | 
227 | 
228 | expr_0():
229 | 	var i, v, s, name
230 | 	if lexeme == '!':
231 | 		read_lexeme()
232 | 		expr_0()
233 | 		output("\ttest %s, %s\n", regname(0), regname(0))
234 | 		output("\tsetz cl\n")
235 | 		output("\tmovzx %s, cl\n", regname(0))
236 | 		return
237 | 	if lexeme == '-':
238 | 		if !is_negative:
239 | 			read_lexeme()
240 | 			expr_0()
241 | 			output("\tneg %s\n", regname(0))
242 | 			return
243 | 		read_lexeme()
244 | 		push()
245 | 		output("\tmov %s, %ld\n", regname(0), -number)
246 | 		read_lexeme()
247 | 	elif lexeme == 'N':
248 | 		push()
249 | 		output("\tmov %s, %ld\n", regname(0), number)
250 | 		read_lexeme()
251 | 	elif lexeme == 'C':
252 | 		push()
253 | 		output("\tmov %s, %s\n", regname(0), token)
254 | 		read_lexeme()
255 | 	elif lexeme == '(':
256 | 		read_lexeme()
257 | 		expression()
258 | 		expect(')')
259 | 	elif lexeme == 'I':
260 | 		name = malloc(64)
261 | 		strcpy(name, token)
262 | 		v = lookup_local()
263 | 		read_lexeme()
264 | 
265 | 		if lexeme == '(': # function call
266 | 
267 | 			# save regs
268 | 			i = stack_size
269 | 			if i > 4: i = 4
270 | 			while i > 0:
271 | 				i = i - 1
272 | 				output("\tpush %s\n", regname(i))
273 | 
274 | 			s = stack_size
275 | 			stack_size = 0
276 | 
277 | 			read_lexeme()
278 | 			i = 0
279 | 			if is_expr_beginning():
280 | 				i = i + 1
281 | 				expression()
282 | 				output("\tpush %s\n", regname(0))
283 | 				pop()
284 | 				while lexeme == ',':
285 | 					read_lexeme()
286 | 					i = i + 1
287 | 					if i > 6: error("too many arguments")
288 | 					expression()
289 | 					output("\tpush %s\n", regname(0))
290 | 					pop()
291 | 			expect(')')
292 | 
293 | 			# set-up regs
294 | 			while i > 0:
295 | 				i = i - 1
296 | 				output("\tpop %s\n", callregs[i])
297 | 			output("\txor rax, rax\n")
298 | 			output("\tcall %s@PLT\n", name)
299 | 
300 | 			# return value in rax
301 | 			init_cache()
302 | 			push()
303 | 			stack_size = s + 1
304 | 
305 | 			if s > 3: s = 3
306 | 			i = 1
307 | 			while i <= s:
308 | 				output("\tpop %s\n", regname(i))
309 | 				i = i + 1
310 | 
311 | 		elif lexeme == '=': # assignment
312 | 			read_lexeme()
313 | 			expression()
314 | 			if v < 0: output("\tmov %s[rip], %s\n", name, regname(0))
315 | 			else: output("\tmov QWORD PTR [rbp - %d], %s\n",
316 | 					local_offsets[v], regname(0))
317 | 		else:
318 | 			push()
319 | 			if v < 0: output("\tmov %s, %s[rip]\n", regname(0), name)
320 | 			else: output("\tmov %s, QWORD PTR [rbp - %d]\n",
321 | 					regname(0), local_offsets[v])
322 | 		free(name)
323 | 
324 | 	elif lexeme == 'S': # string literal
325 | 		push()
326 | 		output("\t.section .rodata\n")
327 | 		output("LC%d:\n", label)
328 | 		output("\t.string %s\n", token)
329 | 		output("\t.text\n")
330 | 		output("\tlea %s, LC%d[rip]\n", regname(0), label)
331 | 		label = label + 1
332 | 		read_lexeme()
333 | 	else: error("bad expression")
334 | 
335 | 	while lexeme == '[':
336 | 		read_lexeme()
337 | 		expression()
338 | 		expect(']')
339 | 		if lexeme == '=':
340 | 			read_lexeme()
341 | 			expression()
342 | 			output("\tmov QWORD PTR [%s + %s * 8], %s\n",
343 | 					regname(2), regname(1), regname(0))
344 | 			i = cache[2]
345 | 			cache[2] = cache[0]
346 | 			cache[0] = i
347 | 			pop()
348 | 			pop()
349 | 			return
350 | 		output("\tmov %s, QWORD PTR [%s + %s * 8]\n",
351 | 				regname(1), regname(1), regname(0))
352 | 		pop()
353 | 
354 | 	if lexeme == '{':
355 | 		read_lexeme()
356 | 		expression()
357 | 		expect('}')
358 | 		if lexeme == '=':
359 | 			read_lexeme()
360 | 			expression()
361 | 			output("\tmov rcx, %s\n", regname(0))
362 | 			output("\tmov BYTE PTR [%s + %s], cl\n", regname(2), regname(1))
363 | 			i = cache[2]
364 | 			cache[2] = cache[0]
365 | 			cache[0] = i
366 | 			pop()
367 | 			pop()
368 | 			return
369 | 
370 | 		output("\tmov cl, BYTE PTR [%s + %s]\n", regname(1), regname(0))
371 | 		output("\tmovzx %s, cl\n", regname(1))
372 | 		pop()
373 | 
374 | 
375 | expr_1():
376 | 	expr_0()
377 | 	while lexeme == '*':
378 | 		read_lexeme()
379 | 		expr_0()
380 | 		output("\timul %s, %s\n", regname(1), regname(0))
381 | 		pop()
382 | 
383 | 
384 | expr_2():
385 | 	expr_1()
386 | 	while strchr("+-", lexeme):
387 | 		if lexeme == '+':
388 | 			read_lexeme()
389 | 			expr_1()
390 | 			output("\tadd %s, %s\n", regname(1), regname(0))
391 | 			pop()
392 | 		elif lexeme == '-':
393 | 			read_lexeme()
394 | 			expr_1()
395 | 			output("\tsub %s, %s\n", regname(1), regname(0))
396 | 			pop()
397 | 
398 | 
399 | expr_3():
400 | 	var c
401 | 	expr_2()
402 | 	if lexeme == '<': c = "l"
403 | 	elif lexeme == '>': c = "g"
404 | 	elif lexeme == 'L': c = "le"
405 | 	elif lexeme == 'M': c = "ge"
406 | 	elif lexeme == 'Q': c = "e"
407 | 	elif lexeme == 'T': c = "ne"
408 | 	else: return
409 | 	read_lexeme()
410 | 	expr_2()
411 | 	output("\tcmp %s, %s\n", regname(1), regname(0))
412 | 	output("\tset%s cl\n", c)
413 | 	output("\tmovzx %s, cl\n", regname(1))
414 | 	pop()
415 | 
416 | 
417 | expr_4():
418 | 	expr_3()
419 | 	while lexeme == '&':
420 | 		read_lexeme()
421 | 		expr_3()
422 | 		output("\tand %s, %s\n", regname(1), regname(0))
423 | 		pop()
424 | 
425 | 
426 | expression():
427 | 	expr_4()
428 | 	while lexeme == '|':
429 | 		read_lexeme()
430 | 		expr_4()
431 | 		output("\tor %s, %s\n", regname(1), regname(0))
432 | 		pop()
433 | 
434 | 
435 | statement_list(): while is_stmt_beginning(): statement()
436 | 
437 | 
438 | statement():
439 | 	var l_end, l_next, end
440 | 	if lexeme == 'a':
441 | 		read_lexeme()
442 | 		asm_active = 1
443 | 		newline = 1
444 | 		expect(':')
445 | 		while lexeme == 'A':
446 | 			output("\t%s\n", token)
447 | 			read_lexeme()
448 | 		expect('E')
449 | 	elif lexeme == 'i':
450 | 		read_lexeme()
451 | 		expression()
452 | 		expect(':')
453 | 		l_end = label
454 | 		l_next = label + 1
455 | 		label = label + 2
456 | 		end = 0
457 | 		output("\ttest %s, %s\n", regname(0), regname(0))
458 | 		output("\tjz .L%d\n", l_next)
459 | 		init_cache()
460 | 		statement_list()
461 | 		expect('E')
462 | 		if strchr("lf", lexeme):
463 | 			output("\tjmp .L%d\n", l_end)
464 | 			end = 1
465 | 		output(".L%d:\n", l_next)
466 | 		while lexeme == 'f':
467 | 			read_lexeme()
468 | 			expression()
469 | 			expect(':')
470 | 			l_next = label
471 | 			label = label + 1
472 | 			output("\ttest %s, %s\n", regname(0), regname(0))
473 | 			output("\tjz .L%d\n", l_next)
474 | 			init_cache()
475 | 			statement_list()
476 | 			expect('E')
477 | 			if strchr("lf", lexeme): output("\tjmp .L%d\n", l_end)
478 | 			output(".L%d:\n", l_next)
479 | 		if lexeme == 'l':
480 | 			read_lexeme()
481 | 			expect(':')
482 | 			init_cache()
483 | 			statement_list()
484 | 			expect('E')
485 | 		if end: output(".L%d:\n", l_end)
486 | 	elif lexeme == 'w':
487 | 		read_lexeme()
488 | 		while_level = while_level + 1
489 | 		if while_level == 256: error("while nesting limit exceeded")
490 | 		while_labels[while_level] = label
491 | 		label = label + 2
492 | 		output(".L%d:\n", while_labels[while_level])
493 | 		expression()
494 | 		expect(':')
495 | 		output("\ttest %s, %s\n", regname(0), regname(0))
496 | 		output("\tjz .L%d\n", while_labels[while_level] + 1)
497 | 		init_cache()
498 | 		statement_list()
499 | 		expect('E')
500 | 		output("\tjmp .L%d\n", while_labels[while_level])
501 | 		output(".L%d:\n", while_labels[while_level] + 1)
502 | 		while_level = while_level - 1
503 | 	elif lexeme == 'b':
504 | 		read_lexeme()
505 | 		if while_level < 0: error("break without while")
506 | 		output("\tjmp .L%d\n", while_labels[while_level] + 1)
507 | 	elif lexeme == 'c':
508 | 		read_lexeme()
509 | 		if while_level < 0: error("continue without while")
510 | 		output("\tjmp .L%d\n", while_labels[while_level])
511 | 	elif lexeme == 'r':
512 | 		read_lexeme()
513 | 		if is_expr_beginning():
514 | 			expression()
515 | 			if strcmp(regname(0), "rax") != 0:
516 | 				output("\tmov rax, %s\n", regname(0))
517 | 			pop()
518 | 		output("\tleave\n")
519 | 		output("\tret\n")
520 | 	elif is_expr_beginning():
521 | 		expression()
522 | 		pop()
523 | 	else: expect(';')
524 | 
525 | 
526 | minilang():
527 | 	var i, params
528 | 
529 | 	# init scanner
530 | 	token = malloc(1024)
531 | 	keywords = malloc(8 * 9)
532 | 	keywords[0] = "asm"
533 | 	keywords[1] = "if"
534 | 	keywords[2] = "else"
535 | 	keywords[3] = "elif"
536 | 	keywords[4] = "while"
537 | 	keywords[5] = "break"
538 | 	keywords[6] = "continue"
539 | 	keywords[7] = "return"
540 | 	keywords[8] = "var"
541 | 	line_number = 1
542 | 	newline = 1
543 | 	read_char()
544 | 	read_lexeme()
545 | 
546 | 	# init parser
547 | 	while_labels = malloc(8 * 256)
548 | 	cache = malloc(8 * 4)
549 | 	local_names = malloc(8 * 256)
550 | 	local_offsets = malloc(8 * 256)
551 | 	i = 0
552 | 	while i < 256:
553 | 		local_names[i] = malloc(64)
554 | 		i = i + 1
555 | 	regs = malloc(8 * 4)
556 | 	regs[0] = "r8"
557 | 	regs[1] = "r9"
558 | 	regs[2] = "r11"
559 | 	regs[3] = "rax"
560 | 	callregs = malloc(8 * 6)
561 | 	callregs[0] = "rdi"
562 | 	callregs[1] = "rsi"
563 | 	callregs[2] = "rdx"
564 | 	callregs[3] = "rcx"
565 | 	callregs[4] = "r8"
566 | 	callregs[5] = "r9"
567 | 	while_level = -1
568 | 
569 | 	output("\t.intel_syntax noprefix\n")
570 | 	output("\t.text\n")
571 | 
572 | 	while lexeme != '0':
573 | 		# global variables
574 | 		while lexeme == 'v':
575 | 			read_lexeme()
576 | 			expect('I')
577 | 		    output("\t.comm %s, 8, 8\n", token)
578 | 			while lexeme == ',':
579 | 				read_lexeme()
580 | 				expect('I')
581 |     			output("\t.comm %s, 8, 8\n", token)
582 | 			while lexeme == ';': read_lexeme()
583 | 
584 | 
585 | 		# function definition
586 | 		expect('I')
587 | 		output("\t.globl %s\n", token)
588 | 		output("%s:\n", token)
589 | 		output("\tpush rbp\n")
590 | 		output("\tmov rbp, rsp\n")
591 | 
592 | 		frame = 0
593 | 		local_count = 0
594 | 
595 | 		# parameter list
596 | 		params = 0
597 | 		expect('(')
598 | 		if lexeme == 'I':
599 | 			params = params + 1
600 | 			expect('I')
601 | 			add_local()
602 | 			while lexeme == ',':
603 | 				read_lexeme()
604 | 				params = params + 1
605 | 				if params > 6: error("too many arguments")
606 | 				expect('I')
607 | 				add_local()
608 | 		expect(')')
609 | 		expect(':')
610 | 
611 | 		# local variables
612 | 		while lexeme == ';': read_lexeme()
613 | 		while lexeme == 'v':
614 | 			read_lexeme()
615 | 			expect('I')
616 | 			add_local()
617 | 			while lexeme == ',':
618 | 				read_lexeme()
619 | 				expect('I')
620 | 				add_local()
621 | 			while lexeme == ';': read_lexeme()
622 | 
623 | 		if frame > 0: output("\tsub rsp, %d\n", frame)
624 | 		i = 0
625 | 		while i < params:
626 | 			output("\tmov QWORD PTR [rbp - %d], %s\n", i * 8 + 8, callregs[i])
627 | 			i = i + 1
628 | 
629 | 		init_cache()
630 | 		statement_list()
631 | 		output("\tleave\n")
632 | 		output("\tret\n")
633 | 		expect('E')
634 | 
635 | 
636 | main(argc, argv):
637 | 	var s
638 | 	if argc < 2 | argc > 3:
639 | 		printf("usuage: %s <source> [output]\n", argv[0])
640 | 		exit(0)
641 | 	src_file = s = fopen(argv[1], "r")
642 | 	if !src_file: error("opening source file failed")
643 | 	if argc == 3: dst_file = fopen(argv[2], "w")
644 | 	else: dst_file = stdout
645 | 	minilang()
646 | 	fclose(src_file)
647 | 	if dst_file != stdin: fclose(dst_file)
648 | 	return 0
649 | 
650 | 


--------------------------------------------------------------------------------