├── .gitignore
├── CMakeLists.txt
├── Makefile
├── README.md
├── src
    ├── CMakeLists.txt
    ├── CodeGenerator.cpp
    ├── Flexer.cpp
    ├── Lexer.cpp
    ├── Parser.cpp
    ├── SymbolTable.cpp
    ├── compiler.cpp
    ├── flex
    │   └── 1.l
    └── include
    │   ├── CodeGenerator.h
    │   ├── Flexer.h
    │   ├── ILexer.h
    │   ├── Lexer.h
    │   ├── Node.h
    │   ├── Parser.h
    │   ├── SymbolTable.h
    │   └── Token.h
└── tests
    ├── comment.calc
    ├── comment.test
    ├── expression.calc
    ├── expression.test
    ├── func-args.calc
    ├── func-args.test
    ├── func-call.calc
    ├── func-call.test
    ├── if-else.calc
    ├── if-else.test
    ├── if.calc
    ├── if.test
    ├── loop.calc
    └── loop.test


/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | src/flex/*
 3 | !src/flex/*.lex
 4 | tests/*
 5 | !tests/*.calc
 6 | !tests/*.test
 7 | *.swp
 8 | *.o
 9 | compiler
10 | .cproject
11 | .project
12 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 2.6)
 2 | 
 3 | project (COMPILER)
 4 | 
 5 | set(CMAKE_CXX_STANDARD 17)
 6 | set(CMAKE_CXX_EXTENSIONS OFF)
 7 | add_compile_options(-Wall -Wextra -pedantic)
 8 | 
 9 | add_subdirectory(src)
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | APP = compiler
 2 | CXX = g++
 3 | FLAGS = -std=c++17 -Wall -I ./src/include -g
 4 | SRC_DIR = src
 5 | SRC = $(wildcard $(SRC_DIR)/*.cpp)
 6 | BIN_DIR = build
 7 | 
 8 | EXT = calc
 9 | TSTDIR = tests
10 | TESTS = $(wildcard $(TSTDIR)/*.$(EXT))
11 | TESTS := $(TESTS:%.$(EXT)=%)
12 | 
13 | OBJ := $(SRC:.cpp=.o)
14 | OBJ := $(OBJ:$(SRC_DIR)/%=%)
15 | OBJ := $(addprefix $(BIN_DIR)/,$(OBJ))
16 | 
17 | .PHONY: all clean test test-clean
18 | 
19 | all: lexer $(APP) 
20 | 
21 | $(BIN_DIR): 
22 | 	mkdir -p $(BIN_DIR)
23 | $(APP): $(BIN_DIR) $(OBJ) lexer
24 | 	$(CXX) $(FLAGS) $(OBJ) build/clexer.o -o $@
25 | $(BIN_DIR)/%.o: $(SRC_DIR)/%.cpp 
26 | 	$(CXX) $(FLAGS) -c $< -o $@
27 | clean: test-clean
28 | 	rm -rf $(BIN_DIR) $(APP) src/flex/lex.yy.cpp src/flex/clexer.h
29 | test-clean:
30 | 	rm -f $(TSTDIR)/*.asm $(TSTDIR)/*.out $(TSTDIR)/*.o $(TESTS)
31 | 
32 | test: $(TESTS) 
33 | 	@for item in $(TESTS); do \
34 | 		./$$item > $$item.out; \
35 | 		if diff -u ./$$item.test ./$$item.out; then echo "Test $$item is OK"; else echo "Warning!!! Test $$item is failed!!!"; fi \
36 | 	done
37 | 
38 | $(TESTS): % : %.$(EXT) %.test 
39 | 	./$(APP) $< $@.asm
40 | 	nasm -f elf $@.asm
41 | 	ld -melf_i386 $@.o -o $@
42 | 
43 | lexer: src/flex/1.lex $(BIN_DIR)
44 | 	flex --header-file=src/flex/clexer.h -o src/flex/lex.yy.cpp src/flex/1.lex 
45 | 	g++ -std=c++17 src/flex/lex.yy.cpp -c -o build/clexer.o
46 | 
47 | #ld -lc -melf_i386 -dynamic-linker /lib/ld-linux.so.2 $< -o $@
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Compiler
 2 | 
 3 | Released:
 4 | - variables, simple scope
 5 | - functions, print function
 6 | - calculation: addition, subtruction, multiplication, division
 7 | - if-else (simple, ==, !=, <, !<, >, !>) 
 8 | - loop(only calculated expression without variables)
 9 | - only integers as operands and arguments for functions
10 | 
11 | It uses `ld` and `nasm` as backend.
12 | 
13 | Build the compiler:
14 | ```
15 | make
16 | ```
17 | 
18 | Build and run tests:
19 | 
20 | ```
21 | make test
22 | ```
23 | 
24 | Examples of code in \*.calc files of tests directory
25 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | find_package(FLEX REQUIRED)
3 | flex_target(lexer flex/1.l  ${CMAKE_CURRENT_BINARY_DIR}/lex.yy.cpp
4 |     DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/clexer.h
5 | )
6 | 
7 | set(APP compiler)
8 | include_directories(include ${CMAKE_CURRENT_BINARY_DIR})
9 | add_executable(${APP} ${APP}.cpp CodeGenerator.cpp Flexer.cpp Parser.cpp SymbolTable.cpp ${FLEX_lexer_OUTPUTS})


--------------------------------------------------------------------------------
/src/CodeGenerator.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "CodeGenerator.h"
  3 | 
  4 | CodeGenerator::CodeGenerator(std::string ofile) {
  5 | 	this->outfile.open(ofile);
  6 | }
  7 | 
  8 | CodeGenerator::~CodeGenerator() {
  9 | 	outfile.close();
 10 | }
 11 | 
 12 | 
 13 | void CodeGenerator::compile(Node* ast) {
 14 | 	using namespace std;
 15 | 	if (!ast) return;
 16 | 
 17 | 	switch(ast->type) {
 18 | 		case NodeType::N_LOOP:	{
 19 | 			Node* condition = ast->args;
 20 | 
 21 | 			outfile << "; LOOP COND" << endl;
 22 | 
 23 | 			if ( condition->type == NodeType::N_NUMBER_C || condition->type == NodeType::N_ID)
 24 | 				outfile << "mov eax, ";
 25 | 			compile(condition);
 26 | 			outfile << endl;
 27 | 			outfile << "mov ecx, eax" << endl;
 28 | 
 29 | 			int labelCount = ++this->label;
 30 | 
 31 | 			// block
 32 | 			outfile << ".L"<< labelCount << ": ; LOOP BEGIN" << endl;
 33 | 			outfile << "push ecx" << endl;
 34 | 
 35 | 			compile(ast->left);
 36 | 
 37 | 			outfile << "pop ecx" << endl;
 38 | 			outfile << "dec ecx" << endl;
 39 | 			outfile << "jnz .L"<< labelCount << " ; LOOP END" << endl;
 40 | 			outfile << endl;
 41 | 		}
 42 | 			break;
 43 | 
 44 | 		case NodeType::N_SEQ:
 45 | 			{
 46 | 				compile(ast->left);
 47 | 				compile(ast->right);
 48 | 				outfile << endl;
 49 | 			}
 50 | 			break;
 51 | 
 52 | 		case NodeType::N_IF:
 53 | 			{
 54 | 				outfile << "; IF " << endl;
 55 | 
 56 | 				Node* condition = ast->args;
 57 | 
 58 | 				if ( condition->left->type == NodeType::N_NUMBER_C || condition->left->type == NodeType::N_ID)
 59 | 					outfile << "mov eax, ";
 60 | 				compile(condition->left);
 61 | 				outfile << endl;
 62 | 				outfile << "mov edx, eax" << endl;
 63 | 
 64 | 				if ( condition->right->type == NodeType::N_NUMBER_C || condition->right->type == NodeType::N_ID)
 65 | 					outfile << "mov eax, ";
 66 | 				compile(condition->right);
 67 | 				outfile << endl;
 68 | 				outfile << "cmp edx, eax" << endl;
 69 | 
 70 | 				this->label++;
 71 | 
 72 | 				switch(condition->type) {
 73 | 				case NodeType::N_EQU:
 74 | 					outfile << "jnz .L" << this->label << endl;
 75 | 					break;
 76 | 					
 77 | 				case NodeType::N_NEQU:
 78 | 					outfile << "jz .L" << this->label << endl;
 79 | 					break;
 80 | 					
 81 | 				case NodeType::N_LESS:
 82 | 					outfile << "jnl .L" << this->label << endl;
 83 | 					break;
 84 | 
 85 | 				case NodeType::N_NLESS:
 86 | 					outfile << "jl .L" << this->label << endl;
 87 | 					break;
 88 | 
 89 | 				case NodeType::N_GREATER:
 90 | 					outfile << "jng .L" << this->label << endl;
 91 | 					break;
 92 | 
 93 | 				case NodeType::N_NGREATER:
 94 | 					outfile << "jg .L" << this->label << endl;
 95 | 					break;
 96 | 
 97 | 				default:
 98 | 					cout << "Error in compile if-condition" << endl;
 99 | 					exit(1);
100 | 					break;
101 | 				}
102 | 
103 | 				int endLabel = this->label;
104 | 				compile(ast->left);
105 | 
106 | 				if (ast->right) {
107 | 					outfile << "jmp .LE"<< this->label << endl;
108 | 				}
109 | 
110 | 				outfile << ".L"<< this->label << ": ; END IF" << endl;
111 | 				// ELSE begins
112 | 				if (ast->right) {
113 | 					compile(ast->right);
114 | 					outfile << ".LE"<< endLabel << ": ; END ELSE-IF" << endl;
115 | 				}
116 | 			}
117 | 			break;
118 | 
119 | 		case NodeType::N_BLOCK:
120 | 			{
121 | 				SymbolTable *temp = this->currentSymbolTable;
122 | 				this->currentSymbolTable = ast->symbolTable;
123 | 
124 | 				outfile << "push ebp" << endl;
125 | 				outfile << "mov ebp, esp" << endl;
126 | 				outfile << "sub esp, " << 4 * this->currentSymbolTable->getSize() << endl;
127 | 
128 | 				compile(ast->left);
129 | 
130 | 				outfile << "leave" << endl;
131 | 				this->currentSymbolTable = temp;
132 | 			}
133 | 			break;
134 | 			
135 | 		case NodeType::N_ASSIGN:
136 | 			{
137 | 				// find symbol in current symbol table
138 | 				SymbolTable* table = this->currentSymbolTable;
139 | 				int index = table->getSymbolIndex(ast->left->value);  
140 | 
141 | 				if ( ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID)
142 | 					outfile << "mov eax, ";
143 | 
144 | 				compile(ast->right);
145 | 
146 | 				outfile << endl;
147 | 				outfile << "mov DWORD [ebp-" << 4*(index+1)  << "], eax" << endl;
148 | 				outfile << endl;
149 | 			}
150 | 
151 | 			break;
152 | 
153 | 		case NodeType::N_ADD:
154 | 			if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 
155 | 			&& (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){
156 | 				outfile << "mov eax, ";
157 | 				compile(ast->left);
158 | 
159 | 				outfile << endl;
160 | 
161 | 				if (ast->right->value == "1") {
162 | 					outfile << "inc eax";
163 | 				} else {
164 | 					outfile << "add eax, ";
165 | 					compile(ast->right);
166 | 				}
167 | 				outfile << endl;
168 | 
169 | 			} else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) {
170 | 				compile(ast->right);
171 | 
172 | 				if (ast->left->value == "1") {
173 | 					outfile << "inc eax";
174 | 				} else {
175 | 					outfile << "add eax, ";
176 | 					compile(ast->left);
177 | 				}
178 | 
179 | 				outfile << endl;
180 | 
181 | 			} else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) {
182 | 				compile(ast->left);
183 | 
184 | 				if (ast->right->value == "1") {
185 | 					outfile << "inc eax";
186 | 				} else {
187 | 					outfile << "add eax, ";
188 | 					compile(ast->right);
189 | 				}
190 | 
191 | 				outfile << endl;
192 | 
193 | 			} else {
194 | 				compile(ast->left);
195 | 				outfile << "push eax" << endl;
196 | 				compile(ast->right);
197 | 				outfile << "pop ebx" << endl;
198 | 				outfile  << "add eax, ebx" << endl;
199 | 			}
200 | 			break;
201 | 
202 | 		case NodeType::N_SUB:
203 | 			if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 
204 | 			&& (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){
205 | 				outfile << "mov eax, ";
206 | 				compile(ast->left);
207 | 				outfile << endl;
208 | 
209 | 				if (ast->right->value == "1") {
210 | 					outfile << "dec eax";
211 | 				} else {
212 | 					outfile << "sub eax, ";
213 | 					compile(ast->right);
214 | 				}
215 | 
216 | 				outfile << endl;
217 | 
218 | 			} else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) {
219 | 				compile(ast->right);
220 | 
221 | 				if (ast->left->value == "1") {
222 | 					outfile << "dec eax";
223 | 				} else {
224 | 					outfile << "sub eax, ";
225 | 					compile(ast->left);
226 | 				}
227 | 
228 | 				outfile << endl;
229 | 				outfile << "neg eax" << endl;
230 | 
231 | 			} else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) {
232 | 				compile(ast->left);
233 | 
234 | 				if (ast->right->value == "1") {
235 | 					outfile << "dec eax";
236 | 				} else {
237 | 					outfile << "sub eax, ";
238 | 					compile(ast->right);
239 | 				}
240 | 
241 | 				outfile << endl;
242 | 
243 | 			} else {
244 | 				compile(ast->right);
245 | 				outfile << "push eax" << endl;
246 | 				compile(ast->left);
247 | 				outfile << "pop ebx" << endl;
248 | 				outfile  << "sub eax, ebx" << endl;
249 | 			}
250 | 			break;
251 | 
252 | 		case NodeType::N_MUL:
253 | 			if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 
254 | 			&& (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){
255 | 				outfile << "mov eax, ";
256 | 				compile(ast->left);
257 | 				outfile << endl << "imul eax, ";
258 | 				compile(ast->right);
259 | 				outfile << endl;
260 | 
261 | 			} else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) {
262 | 				compile(ast->right);
263 | 				outfile << "imul eax, ";
264 | 				compile(ast->left);
265 | 				outfile << endl;
266 | 
267 | 			} else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) {
268 | 				compile(ast->left);
269 | 				outfile << "imul eax, ";
270 | 				compile(ast->right);
271 | 				outfile << endl;
272 | 
273 | 			} else {
274 | 				compile(ast->left);
275 | 				outfile << "push eax" << endl;
276 | 				compile(ast->right);
277 | 				outfile << "pop ebx" << endl;
278 | 				outfile  << "imul eax, ebx" << endl;
279 | 			}
280 | 			break;
281 | 
282 | 		case NodeType::N_DIV:
283 | 			if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 
284 | 			&& (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){
285 | 				outfile << "mov eax, ";
286 | 				compile(ast->left);
287 | 				outfile << endl;
288 | 				outfile << "mov ebx, ";
289 | 				compile(ast->right);
290 | 				outfile << endl;
291 | 				outfile << "xor edx, edx" << endl;
292 | 				outfile << "idiv ebx" << endl;
293 | 
294 | 			} else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) {
295 | 				compile(ast->right);
296 | 				outfile << "mov ebx, eax" << endl;
297 | 				outfile << "mov eax, ";
298 | 				compile(ast->left);
299 | 				outfile << endl;
300 | 				outfile << "xor edx, edx" << endl;
301 | 				outfile << "idiv ebx" << endl;
302 | 
303 | 			} else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) {
304 | 				compile(ast->left);
305 | 				outfile << "mov ebx, ";
306 | 				compile(ast->right);
307 | 				outfile << endl;
308 | 				outfile << "xor edx, edx" << endl;
309 | 				outfile << "idiv ebx" << endl;
310 | 
311 | 			} else {
312 | 				compile(ast->right);
313 | 				outfile << "push eax" << endl;
314 | 				compile(ast->left);
315 | 				outfile << "pop ebx" << endl;
316 | 				outfile << "xor edx, edx" << endl;
317 | 				outfile  << "idiv ebx" << endl;
318 | 			}
319 | 			break;
320 | 
321 | 		case NodeType::N_PRINT:
322 | 			if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) {
323 | 				outfile << "mov eax, ";
324 | 			}
325 | 
326 | 			compile(ast->left);
327 | 
328 | 			outfile << endl;
329 | 
330 | 			//*
331 | 			//outfile << "push eax" << endl;
332 | 			outfile << "call print" << endl;
333 | 			//*/
334 | 
335 | 			/*
336 | 			outfile << "push eax" << endl;
337 | 			outfile << "push message2" << endl;
338 | 			outfile << "call printf" << endl;
339 | 			outfile << "add esp, 8" << endl;
340 | 			//*/
341 | 			break;
342 | 
343 | 		case NodeType::N_ID:
344 | 			{
345 | 				SymbolTable* table = this->currentSymbolTable;
346 | 				int index = table->getSymbolIndex(ast->value);  
347 | 				if (index >= 0) {
348 | 					outfile << "DWORD [ebp-" << (4*index) + 4 << "]";
349 | 				} else {
350 | 					// check input params in current function
351 | 
352 | 					Node* args = this->currentFunction->args;
353 | 					index = 0;
354 | 					bool inFunctionArgs = false;
355 | 
356 | 					while(args) {
357 | 						if (args->value == ast->value) {
358 | 							inFunctionArgs = true;
359 | 							break;
360 | 						}
361 | 						args = args->args;
362 | 						index++;
363 | 					}
364 | 					
365 | 					if (inFunctionArgs){
366 | 						outfile << "DWORD [ebp+" << (4*index) + 8 << "]";
367 | 					} else {
368 | 						cout << "Unexpected variable \"" << ast->value << "\" in compile:N_ID" << endl;
369 | 						exit(1);
370 | 					}
371 | 				}
372 | 			}
373 | 			break;
374 | 
375 | 		case NodeType::N_NUMBER_C:
376 | 			outfile << ast->value;
377 | 			break;
378 | 
379 | 		case NodeType::N_FUNC:
380 | 			this->currentFunction = ast;
381 | 
382 | 			compile(ast->right);
383 | 
384 | 			// prologue
385 | 			outfile << "fn_" << ast->value << ":" << endl << endl;
386 | 
387 | 			compile(ast->left);
388 | 
389 | 			// epilogue
390 | 			outfile << "ret" << endl;
391 | 			outfile << endl;
392 | 			break;
393 | 
394 | 		case NodeType::N_RET:
395 | 			if ( ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID)
396 | 				outfile << "mov eax, ";
397 | 			compile(ast->left);
398 | 			break;
399 | 
400 | 		case NodeType::N_FUNC_CALL:
401 | 			// prologue
402 | 			{
403 | 				Node* temp = ast;
404 | 				int count = 0;
405 | 				while(temp->args) {
406 | 					count++;
407 | 
408 | 					if ( temp->args->type == NodeType::N_NUMBER_C || temp->args->type == NodeType::N_ID) {
409 | 						outfile << "push ";
410 | 						this->compile(temp->args);
411 | 					} else {
412 | 						this->compile(temp->args);
413 | 						outfile << endl;
414 | 						outfile << "push eax";
415 | 					}
416 | 
417 | 					outfile << endl;
418 | 					temp = temp->args;
419 | 				}
420 | 
421 | 				outfile << "call fn_" << ast->value << endl;
422 | 
423 | 				// free arguments from stack
424 | 				if (count > 0) {
425 | 					outfile << "add esp, " <<  count*4 << endl;
426 | 				}
427 | 			}
428 | 			break;
429 | 
430 | 		case NodeType::N_PROG:
431 | 			{
432 | 				this->currentFunction = ast;
433 | 				this->currentSymbolTable = ast->symbolTable;
434 | 				// prologue
435 | 				outfile << "BITS 32" << endl;
436 | 				outfile << "global _start" << endl;
437 | 				//outfile << "extern printf" << endl;
438 | 				outfile << endl;
439 | 
440 | 				//outfile << "section .data" << endl;
441 | 				//outfile << "message2 db \"%d\",10,0" << endl;
442 | 				//outfile << "msg TIMES 20 db 0" << endl;
443 | 				//outfile << "msg2 db 0" << endl;
444 | 				//outfile << "len	equ $-msg " << endl;
445 | 				//outfile << "section .bss" << endl;
446 | 				//outfile << "msg resb 40" << endl;
447 | 				//outfile << "msg2 resb 0" << endl;
448 | 				outfile << endl;
449 | 
450 | 				outfile << "section .text" << endl;
451 | 
452 | 				outfile << ";==== FUNCTIONS ======" << endl;
453 | 				compile(ast->right);
454 | 				outfile << ";==== FUNCTIONS ======" << endl;
455 | 
456 | 				outfile << "_start:" << endl << endl;
457 | 
458 | 					SymbolTable *temp = this->currentSymbolTable;
459 | 					this->currentSymbolTable = ast->symbolTable;
460 | 
461 | 					outfile << "push ebp" << endl;
462 | 					outfile << "mov ebp, esp" << endl;
463 | 					outfile << "sub esp, " << 4 * this->currentSymbolTable->getSize() << endl;
464 | 
465 | 					compile(ast->left);
466 | 
467 | 					outfile << "leave" << endl;
468 | 					this->currentSymbolTable = temp;
469 | 
470 | 				// exit
471 | 				outfile << "mov eax, 1" << endl;
472 | 				outfile << "xor ebx, ebx" << endl;
473 | 				outfile << "int 0x80" << endl;
474 | 				outfile << endl;
475 | 
476 | 				// print function : prints a decimal number in eax register with a new line
477 | 				outfile << "print:" << endl;
478 | 				outfile << "mov edi, 1" << endl;
479 | 				outfile << "mov ecx, esp" << endl;
480 | 				outfile << "mov ebx, 10" << endl;
481 | 				outfile << "dec ecx" << endl;
482 | 				outfile << "mov [ecx], bl" << endl;
483 | 
484 | 				outfile << "print_loop:" << endl;
485 | 				outfile << "xor edx, edx" << endl;
486 | 				outfile << "idiv ebx" << endl;
487 | 				outfile << "add dl, '0'" << endl;
488 | 				outfile << "dec ecx" << endl;
489 | 				outfile << "inc edi" << endl;
490 | 				outfile << "mov [ecx],dl" << endl;
491 | 				outfile << "test eax, eax" << endl;
492 | 				outfile << "jnz print_loop" << endl;
493 | 
494 | 				outfile << "mov eax, 4" << endl;
495 | 				outfile << "mov ebx, 1" << endl;
496 | 				outfile << "mov edx, edi" << endl;
497 | 				outfile << "int 0x80" << endl;
498 | 
499 | 				outfile << "ret" << endl;
500 | 			}
501 | 			break;
502 | 		//*
503 | 		default:
504 | 			break;
505 | 		//*/
506 | 
507 | 	}
508 | }
509 | 
510 | 


--------------------------------------------------------------------------------
/src/Flexer.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/Flexer.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | // TODO: remove it or change
 6 | typedef union {
 7 | 	int ival;
 8 | 	char *cval;
 9 | } yylval_type;
10 | 
11 | yylval_type yylval;
12 | 
13 | Flexer::Flexer() {
14 | 
15 | }
16 | 
17 | std::vector<Token*>* Flexer::getTokens(std::string content) {
18 |  	std::vector<Token*> *result = new std::vector<Token*>();
19 | 	int p = 0;
20 | 	Token *token; 
21 | 
22 | 	YY_BUFFER_STATE buff = yy_scan_string(content.c_str());
23 | 	while( (p = yylex()) != 0) {
24 | 		token = new Token();
25 | 		token->type = (TokenType)p;
26 | 
27 | 		switch(p) {
28 | 		case T_NUMBER:
29 | 			token->value = std::to_string(yylval.ival);
30 | 			result->push_back(token);
31 | 			break;
32 | 
33 | 		default:
34 | 			token->value = yylval.cval;
35 | 			result->push_back(token);
36 | 			break;
37 | 		}
38 | 
39 | 		yylval.cval = (char*)"";
40 | 		yylval.ival = 0;
41 | 	}
42 | 
43 | 	yy_delete_buffer(buff);
44 | 
45 | 	return result;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/Lexer.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | 
  3 | #include "include/Lexer.h"
  4 | 
  5 | Lexer::Lexer() {
  6 | 	this->inToken = false;
  7 | 	this->position = 0;
  8 | 	this->token = nullptr;
  9 | }
 10 | 
 11 | void Lexer::checkID() {
 12 | 	if (token->type == TokenType::T_ID) {
 13 | 		if (token->value == "if") {
 14 | 			token->type = TokenType::T_IF;
 15 | 		} else if (token->value == "else") {
 16 | 			token->type = TokenType::T_ELSE;
 17 | 		} else if (token->value == "while") {
 18 | 			token->type = TokenType::T_WHILE;
 19 | 		} else if (token->value == "fn") {
 20 | 			token->type = TokenType::T_FUNC;
 21 | 		} else if (token->value == "ret") {
 22 | 			token->type = TokenType::T_RET;
 23 | 		} else if (token->value == "true") {
 24 | 			token->type = TokenType::T_BOOL;
 25 | 		} else if (token->value == "false") {
 26 | 			token->type = TokenType::T_BOOL;
 27 | 		} else if (token->value == "for") {
 28 | 			token->type = TokenType::T_FOR;
 29 | 		} else if (token->value == "do") {
 30 | 			token->type = TokenType::T_DO;
 31 | 		} else if (token->value == "print") {
 32 | 			token->type = TokenType::T_PRINT;
 33 | 		}
 34 | 	}
 35 | }
 36 | 
 37 | std::vector<Token*>* Lexer::getTokens(std::string content) {
 38 |  
 39 | 	std::vector<Token*> *result = new std::vector<Token*>();
 40 | 	this->position = 0;
 41 | 
 42 | 	std::string temp = "";
 43 | 	int size = content.length();
 44 | 	for (int i = 0; i<size;i ++) {
 45 | 		char c = content[i];
 46 | 		if (inToken) {
 47 | 			if (token->type == TokenType::T_COMMENT) {
 48 | 				if (c == '\n') {
 49 | 					inToken = false;
 50 | 				}
 51 | 				continue;
 52 | 			// create previous token
 53 | 			} else if (token->type == TokenType::T_MULTICOMMENT) {
 54 | 				if (c == '*') {
 55 | 					if ( i+1 < size && content[i+1] == '/') {
 56 | 						i = i+1;
 57 | 						inToken = false;
 58 | 					}
 59 | 				}
 60 | 				continue;
 61 | 			
 62 | 			} else if (c == '\"') {
 63 | 				if (token->type == TokenType::T_STRING) {
 64 | 					inToken = false;
 65 | 					result->push_back(token);
 66 | 					continue;
 67 | 				} 
 68 | 
 69 | 				this->checkID();	
 70 | 
 71 | 			} else if (c == '+') {
 72 | 
 73 | 				if (token->type == TokenType::T_ADD) {
 74 | 					token->type = TokenType::T_INC;
 75 | 					token->value += c;
 76 | 					result->push_back(token);
 77 | 					inToken = false;
 78 | 					continue;
 79 | 				} 
 80 | 
 81 | 			} else if (c == '-') {
 82 | 
 83 | 				if (token->type == TokenType::T_SUB) {
 84 | 					token->type = TokenType::T_DEC;
 85 | 					token->value += c;
 86 | 					result->push_back(token);
 87 | 					inToken = false;
 88 | 					continue;
 89 | 				} 
 90 | 
 91 | 			} else if (c >= 'a' && c <= 'z') {
 92 | 
 93 | 				if (token->type == TokenType::T_ID || token->type == TokenType::T_STRING) {
 94 | 					token->value += c;
 95 | 					continue;
 96 | 				} 
 97 | 
 98 | 			} else if (c >= '0' && c <= '9') {
 99 | 
100 | 				if (token->type == TokenType::T_NUMBER || token->type == TokenType::T_ID || token->type == TokenType::T_STRING) {
101 | 					token->value += c;
102 | 					continue;
103 | 				}
104 | 
105 | 			} else {
106 | 				//other symbols
107 | 				if (token->type == TokenType::T_STRING) {
108 | 					token->value += c;
109 | 					continue;
110 | 				} 
111 | 
112 | 				this->checkID();	
113 | 
114 | 			}
115 | 
116 | 			result->push_back(token);
117 | 		}
118 | 
119 | 		if (c == '=') {
120 | 			token = new Token();
121 | 			token->type = TokenType::T_ASSIGN;
122 | 			token->value = c;
123 | 			result->push_back(token);
124 | 			inToken = false;
125 | 
126 | 		} else if (c == '(') {
127 | 			token = new Token();
128 | 			token->type = TokenType::T_LPAR;
129 | 			token->value = c;
130 | 			result->push_back(token);
131 | 			inToken = false;
132 | 
133 | 		} else if (c == ')') {
134 | 			token = new Token();
135 | 			token->type = TokenType::T_RPAR;
136 | 			token->value = c;
137 | 			result->push_back(token);
138 | 			inToken = false;
139 | 
140 | 		} else if (c == '{') {
141 | 			token = new Token();
142 | 			token->type = TokenType::T_LBRACE;
143 | 			token->value = c;
144 | 			result->push_back(token);
145 | 			inToken = false;
146 | 
147 | 		} else if (c == '}') {
148 | 			token = new Token();
149 | 			token->type = TokenType::T_RBRACE;
150 | 			token->value = c;
151 | 			result->push_back(token);
152 | 			inToken = false;
153 | 
154 | 		} else if (c == '+') {
155 | 			token = new Token();
156 | 			token->type = TokenType::T_ADD;
157 | 			token->value = c;
158 | 			inToken = true;
159 | 
160 | 		} else if (c == '-') {
161 | 			token = new Token();
162 | 			token->type = TokenType::T_SUB;
163 | 			token->value = c;
164 | 			inToken = true;
165 | 
166 | 		} else if (c == '*') {
167 | 			token = new Token();
168 | 			token->type = TokenType::T_MUL;
169 | 			token->value = c;
170 | 			result->push_back(token);
171 | 			inToken = false;
172 | 
173 | 		} else if (c == '/') {
174 | 			if (i+1 < size) {
175 | 				if (content[i+1] == '/') {
176 | 					token = new Token();
177 | 					token->type = TokenType::T_COMMENT;
178 | 					token->value = "COMMENT";
179 | 					inToken = true;
180 | 					continue;
181 | 				} else if (content[i+1] == '*') {
182 | 					token = new Token();
183 | 					token->type = TokenType::T_MULTICOMMENT;
184 | 					token->value = "MULTICOMMENT";
185 | 					inToken = true;
186 | 					continue;
187 | 				}
188 | 			}
189 | 
190 | 			token = new Token();
191 | 			token->type = TokenType::T_DIV;
192 | 			token->value = c;
193 | 			result->push_back(token);
194 | 			inToken = false;
195 | 
196 | 		} else if (c == ';') {
197 | 			token = new Token();
198 | 			token->type = TokenType::T_SEMICOLON;
199 | 			token->value = c;
200 | 			result->push_back(token);
201 | 			inToken = false;
202 | 
203 | 		} else if (c == '\"') {
204 | 			token = new Token();
205 | 			token->type = TokenType::T_STRING;
206 | 			token->value = "";
207 | 			inToken = true;
208 | 
209 | 		} else if (c >= 'a' && c <= 'z') {
210 | 			token = new Token();
211 | 			token->type = TokenType::T_ID;
212 | 			token->value = c;
213 | 			inToken = true;
214 | 
215 | 		} else if (c >= '0' && c <= '9') {
216 | 			token = new Token();
217 | 			token->type = TokenType::T_NUMBER;
218 | 			token->value = c;
219 | 			inToken = true;
220 | 
221 | 		} else {
222 | 			// other symbols
223 | 			inToken = false;
224 | 		}
225 | 
226 | 	}
227 | 
228 | 	return result;
229 | }
230 | 


--------------------------------------------------------------------------------
/src/Parser.cpp:
--------------------------------------------------------------------------------
  1 | #include "include/Parser.h"
  2 | 
  3 | #include <iostream>
  4 | 
  5 | Parser::Parser() {
  6 | 	this->t = nullptr;
  7 | }
  8 | 
  9 | Token* Parser::nextToken() {
 10 | 	ti++;
 11 | 	if(ti < t->size()) {
 12 | 		return t->at(ti);	
 13 | 	}
 14 | 
 15 | 	return nullptr;
 16 | }
 17 | 
 18 | Token* Parser::getCurrentToken() {
 19 | 	if (ti < t->size()) {
 20 | 		return t->at(ti);
 21 | 	}
 22 | 	return nullptr;
 23 | }
 24 | 
 25 | bool Parser::expect(TokenType type) {
 26 | 	
 27 | 	Token* token = getCurrentToken();
 28 | 	if (token->type == type) {
 29 | 		nextToken();
 30 | 		return true;
 31 | 	}
 32 | 
 33 | 	return false;
 34 | }
 35 | 
 36 | void Parser::printError(std::string text) {
 37 | 	std::cout << "Error: " << text << std::endl;
 38 | 	exit(1);
 39 | }
 40 | 
 41 | Node* Parser::parse(std::vector<Token*> *tokens) {
 42 | 	using std::cout;
 43 | 	using std::endl;
 44 | 
 45 | 	t = tokens;
 46 | 
 47 | 	Node* prog = new Node();
 48 | 	prog->type = NodeType::N_PROG;
 49 | 	prog->value = "PROG";
 50 | 	prog->symbolTable = new SymbolTable();
 51 | 	this->scopes.push_back(prog->symbolTable);
 52 | 
 53 | 	this->prog = prog;
 54 | 	prog->left = statements();
 55 | 
 56 | 	this->scopes.pop_back();
 57 | 
 58 | 	cout << "parse: end of parsing" << endl;
 59 | 
 60 | 	return prog;
 61 | }
 62 | 
 63 | Node* Parser::statements() {
 64 | 
 65 | 	Node* temp;
 66 | 	Node* node = nullptr;
 67 | 	Token* token = getCurrentToken();
 68 | 
 69 | 	while(token && token->type != TokenType::T_RBRACE) {
 70 | 		temp = node;
 71 | 
 72 | 		node = new Node();
 73 | 		node->type = NodeType::N_SEQ;
 74 | 		node->value = "SEQ";
 75 | 		node->left = temp;
 76 | 		node->right = statement();
 77 | 		token = getCurrentToken();
 78 | 	}
 79 | 
 80 | 	return node;
 81 | }
 82 | 
 83 | 
 84 | Node* Parser::statement() {
 85 | 	using std::cout;
 86 | 	using std::endl;
 87 | 
 88 | 	Node* node = nullptr;
 89 | 	Token *token = getCurrentToken();
 90 | 	if (!token) {
 91 | 		printError("A token expected in statement");
 92 | 	}
 93 | 
 94 | 	switch(token->type) {
 95 | 		case TokenType::T_LOOP: {
 96 | 			token = nextToken();
 97 | 			if (!expect(TokenType::T_LPAR)) {
 98 | 				printError("A left parenthes expected in loop-statement.");
 99 | 			}
100 | 
101 | 			node = new Node();
102 | 			node->type = NodeType::N_LOOP;
103 | 			node->value = token->value;
104 | 			node->args = expression();
105 | 
106 | 			if (!expect(TokenType::T_RPAR)) {
107 | 				printError("A right parenthes expected in loop-statement.");
108 | 			}
109 | 
110 | 			node->left = block();
111 | 		}	
112 | 			break;
113 | 
114 | 		case TokenType::T_FUNC: 
115 | 			{
116 | 				token = nextToken();
117 | 
118 | 				if (!token || token->type != TokenType::T_ID) {
119 | 					printError("An id expected in function statement.");
120 | 				}
121 | 
122 | 				node = new Node();
123 | 				node->type = NodeType::N_FUNC;
124 | 				node->value = token->value; // id
125 | 
126 | 				nextToken();
127 | 				
128 | 				if (!expect(TokenType::T_LPAR)) {
129 | 					printError("A left parenthes expected in function statement.");
130 | 				}
131 | 
132 | 				////////////////////////////////////////////
133 | 
134 | 				Node* func = node;
135 | 				Node* temp = node;
136 | 				token = getCurrentToken();
137 | 
138 | 				while(token && token->type == TokenType::T_ID) {
139 | 					node = new Node();
140 | 					node->type = NodeType::N_ID; // FUNC_ARG
141 | 					node->value = token->value;
142 | 
143 | 					temp->args = node;
144 | 					temp = temp->args;
145 | 
146 | 					nextToken();
147 | 
148 | 					if (!expect(TokenType::T_COMMA)) {
149 | 						break;
150 | 					}
151 | 
152 | 					token = getCurrentToken();
153 | 				}
154 | 
155 | 				////////////////////////////////////////////
156 | 
157 | 				if (!expect(TokenType::T_RPAR)) {
158 | 					printError("A right parenthes expected in function statement.");
159 | 				}
160 | 
161 | 				func->left = block();
162 | 				temp = prog->right;
163 | 				prog->right = func;
164 | 				func->right = temp;
165 | 				node = nullptr;
166 | 			}
167 | 
168 | 			break;
169 | 
170 | 		case TokenType::T_RET: 
171 | 			{
172 | 				nextToken();
173 | 
174 | 				node = new Node();
175 | 				node->type = NodeType::N_RET;
176 | 				node->value = "RET";
177 | 				node->left = expression();
178 | 
179 | 				if (!node->left) {
180 | 					printError("An expression expected in return statement.");
181 | 				}
182 | 
183 | 				if (!expect(TokenType::T_SEMICOLON)) {
184 | 					printError("A right brace expected in return statement.");
185 | 				}
186 | 			}
187 | 			break;
188 | 			
189 | 		case TokenType::T_IF: 
190 | 			{
191 | 				nextToken();
192 | 				node = conditionBlock();
193 | 			}
194 | 			break;
195 | 			
196 | 		case TokenType::T_PRINT: 
197 | 			node = new Node();
198 | 			node->type = NodeType::N_PRINT;
199 | 			node->value = "PRINT";
200 | 			nextToken();
201 | 
202 | 			if (!expect(TokenType::T_LPAR)) {
203 | 				printError("A left parenthesis expected in print statement.");
204 | 			}
205 | 
206 | 			node->left = expression();
207 | 			
208 | 			if (!expect(TokenType::T_RPAR)) {
209 | 				printError("A right parenthesis expected in print statement.");
210 | 			}
211 | 
212 | 			if (!expect(TokenType::T_SEMICOLON)) {
213 | 				printError("A semicolon expected in print statement.");
214 | 			}
215 | 
216 | 			break;
217 | 
218 | 		case TokenType::T_WHILE:
219 | 			break;
220 | 
221 | 		case TokenType::T_DO:
222 | 			break;
223 | 
224 | 		case TokenType::T_ID:
225 | 			{
226 | 				Node* temp = new Node();
227 | 				temp->type = NodeType::N_ID;
228 | 				temp->value = token->value;
229 | 				nextToken();
230 | 
231 | 				// assign statement
232 | 				if (this->expect(TokenType::T_ASSIGN) ) {
233 | 					node = new Node();
234 | 					node->type = NodeType::N_ASSIGN;
235 | 					node->value = "SET";
236 | 					node->left = temp;
237 | 					// add to symbol table if it doesn't exist
238 | 					SymbolTable* table = this->scopes.back();
239 | 					if (!table->isSymbolExist(temp->value) ) {
240 | 						table->addSymbol(temp->value); 
241 | 					}
242 | 
243 | 					node->right = expression();
244 | 
245 | 					if (!this->expect(TokenType::T_SEMICOLON)) {
246 | 						printError("A semicolon expected in statement.");
247 | 					}
248 | 
249 | 				// function call statement
250 | 				} else if (this->expect(TokenType::T_LPAR) ) {
251 | 					node = new Node();
252 | 					node->type = NodeType::N_FUNC_CALL;
253 | 					node->value = token->value;
254 | 
255 | 					node->args = this->functionArgs();
256 | 
257 | 					if(this->expect(TokenType::T_RPAR) ) {
258 | 
259 | 						if (!this->expect(TokenType::T_SEMICOLON)) {
260 | 							printError("A semicolon expected in statement.");
261 | 						}
262 | 
263 | 					} else {
264 | 						printError("A right parenthesis expected in statement.");
265 | 					}
266 | 
267 | 				} else {
268 | 					printError("A left parenthesis or equal are expected in statement.");
269 | 				}
270 | 			}
271 | 			break;
272 | 
273 | 		default:
274 | 			node = expression();
275 | 			token = getCurrentToken();
276 | 			
277 | 			if (!token) {
278 | 				printError("A token expected in statement.");
279 | 			}
280 | 			
281 | 			if (token->type != TokenType::T_SEMICOLON) {
282 | 				cout << "in statement token-type="+std::to_string(token->type) << endl;
283 | 				printError("A semicolon expected in statement.");
284 | 			}
285 | 			break;
286 | 	}
287 | 
288 | 	return node;
289 | }
290 | 
291 | Node* Parser::expression() {
292 | 	using std::cout;
293 | 	using std::endl;
294 | 
295 | 	Node* node = term();
296 | 
297 | 	if (!node) {
298 | 		return node;
299 | 	} 
300 | 
301 | 	Token *token = getCurrentToken();
302 | 
303 | 	if (!token) {
304 | 		printError("A token expected in expression");
305 | 	} 
306 | 
307 | 	Node* temp;
308 | 
309 | 	while (token->type == TokenType::T_ADD || token->type == TokenType::T_SUB) {
310 | 		temp = node;
311 | 		node = new Node();
312 | 
313 | 		if (token->type == TokenType::T_ADD) {
314 | 			node->type = NodeType::N_ADD;
315 | 			node->value = "ADD";
316 | 		} else {
317 | 			node->type = NodeType::N_SUB;
318 | 			node->value = "SUB";
319 | 		}
320 | 		node->left=temp;
321 | 		nextToken();
322 | 		node->right = term();
323 | 		token = getCurrentToken();
324 | 	}
325 | 
326 | 	return node;
327 | }
328 | 
329 | Node* Parser::functionArgs() {
330 | 
331 | 	Node* temp;
332 | 	Node* tail = expression();
333 | 	
334 | 	Token* token = getCurrentToken();
335 | 
336 | 	while(token && token->type == TokenType::T_COMMA) {
337 | 		nextToken();
338 | 		temp = expression();
339 | 		temp->args = tail;
340 | 		tail = temp;
341 | 		token = getCurrentToken();
342 | 	}
343 | 
344 | 	return tail;
345 | }
346 | 
347 | Node* Parser::factor() {
348 | 	Node* node = nullptr;
349 | 	Token *token = getCurrentToken();
350 | 
351 | 	if (!token) {
352 | 		printError("A token expected in factor");
353 | 	}
354 | 
355 | 	switch(token->type) {
356 | 
357 | 		case TokenType::T_ID:
358 | 			{
359 | 				node = new Node();
360 | 				Token* ttoken = nextToken();
361 | 				if (ttoken->type == TokenType::T_LPAR) {
362 | 					nextToken();
363 | 					node->type = NodeType::N_FUNC_CALL;
364 | 					node->value = token->value;
365 | 
366 | 					node->args = this->functionArgs();
367 | 					
368 | 					if(!this->expect(TokenType::T_RPAR) ) {
369 | 						printError("A right parenthesis expected in funcion call.");
370 | 					}
371 | 
372 | 				} else {
373 | 					node->type = NodeType::N_ID;
374 | 					node->value = token->value;
375 | 				}
376 | 			}
377 | 			break;
378 | 
379 | 		case TokenType::T_NUMBER:
380 | 			node = new Node();
381 | 			nextToken();
382 | 			node->type = NodeType::N_NUMBER_C;
383 | 			node->value = token->value;
384 | 			break;
385 | 
386 | 		case TokenType::T_STRING:
387 | 			node = new Node();
388 | 			nextToken();
389 | 			node->type = NodeType::N_STRING_C;
390 | 			node->value = token->value;
391 | 			break;
392 | 
393 | 		case TokenType::T_LPAR:
394 | 			nextToken();
395 | 			node = expression();
396 | 
397 | 			if(!this->expect(TokenType::T_RPAR) ) {
398 | 				printError("A right parenthesis expected in statement.");
399 | 			}
400 | 
401 | 			break;
402 | 			
403 | 		case TokenType::T_RPAR:
404 | 			break;
405 | 
406 | 		default:
407 | 			//std::cout << "in term: another type of token value=" << token->value << " type=" << token->type << std::endl;
408 | 			//printError("Unexpected token in term");
409 | 			break;
410 | 	}
411 | 
412 | 	return node;
413 | }
414 | 
415 | Node* Parser::term() {
416 | 	Token *token = getCurrentToken();
417 | 
418 | 	if (!token) {
419 | 		printError("A token expected in term");
420 | 	}
421 | 
422 | 	Node* node = factor();
423 | 	Node* temp;
424 | 	token = getCurrentToken();
425 | 
426 | 	while (token->type == TokenType::T_DIV || token->type == TokenType::T_MUL) {
427 | 		temp = node;
428 | 		node = new Node();
429 | 
430 | 		if (token->type == TokenType::T_MUL) {
431 | 			node->type = NodeType::N_MUL;
432 | 			node->value = "MUL";
433 | 		} else {
434 | 			node->type = NodeType::N_DIV;
435 | 			node->value = "DIV";
436 | 		}
437 | 		node->left=temp;
438 | 		nextToken();
439 | 		node->right = factor();
440 | 		token = getCurrentToken();
441 | 	}
442 | 
443 | 	return node;
444 | }
445 | 
446 | Node* Parser::block() {
447 | 
448 | 	if (!expect(TokenType::T_LBRACE)) {
449 | 		printError("A left brace expected in block.");
450 | 	}
451 | 
452 | 	Node* block = new Node();
453 | 	block->type = NodeType::N_BLOCK;
454 | 	block->value = "BLOCK"; 
455 | 	block->symbolTable = new SymbolTable();
456 | 	block->right = nullptr;
457 | 
458 | 	this->scopes.push_back(block->symbolTable);
459 | 
460 | 	Node* st = statements();
461 | 
462 | 	if (!expect(TokenType::T_RBRACE)) {
463 | 		printError("A right brace expected in block.");
464 | 	}
465 | 
466 | 	this->scopes.pop_back();
467 | 
468 | 	block->left = st;
469 | 	return block;
470 | }
471 | 
472 | Node* Parser::condition() {
473 | 
474 | 	Node* node = new Node();
475 | 	node->left = expression();
476 | 	Token* token = getCurrentToken();
477 | 
478 | 	switch(token->type) {
479 | 	case TokenType::T_EQU:
480 | 		node->type = NodeType::N_EQU;
481 | 		node->value = "EQU";
482 | 		break;
483 | 
484 | 	case TokenType::T_NEQU:
485 | 		node->type = NodeType::N_NEQU;
486 | 		node->value = "NEQU";
487 | 		break;
488 | 
489 | 	case TokenType::T_LESS:
490 | 		node->type = NodeType::N_LESS;
491 | 		node->value = "LESS";
492 | 		break;
493 | 
494 | 	case TokenType::T_NLESS:
495 | 		node->type = NodeType::N_NLESS;
496 | 		node->value = "NLESS";
497 | 		break;
498 | 
499 | 	case TokenType::T_GREATER:
500 | 		node->type = NodeType::N_GREATER;
501 | 		node->value = "GREATER";
502 | 		break;
503 | 
504 | 	case TokenType::T_NGREATER:
505 | 		node->type = NodeType::N_NGREATER;
506 | 		node->value = "NGREATER";
507 | 		break;
508 | 
509 | 	default:
510 | 		printError("A bool operators expected in condition.");
511 | 		break;
512 | 	}
513 | 
514 | 	nextToken();
515 | 	node->right = expression();
516 | 
517 | 	return node;
518 | }
519 | 
520 | Node* Parser::conditionBlock() {
521 | 
522 | 	if (!expect(TokenType::T_LPAR)) {
523 | 		printError("A left parenthes expected in condition-block.");
524 | 	} 
525 | 
526 | 	Node* node = new Node();
527 | 	node->type = NodeType::N_IF;
528 | 	node->value = "IF";
529 | 	node->args = this->condition();
530 | 	
531 | 	if (!expect(TokenType::T_RPAR)) {
532 | 		printError("A right parenthes expected in condition-block.");
533 | 	} 
534 | 
535 | 	node->left = block();
536 | 
537 | 	Token* token = getCurrentToken();
538 | 
539 | 	if (expect(TokenType::T_ELSE)) {
540 | 		token = getCurrentToken();
541 | 		if (token) { 
542 | 			if (token->type == TokenType::T_LBRACE) 
543 | 				node->right = block();
544 | 			else if (token->type == TokenType::T_LPAR) 
545 | 				node->right = conditionBlock();
546 | 			else 
547 | 				node->right = statement();
548 | 		}
549 | 	} else if (token->type == TokenType::T_LPAR) {
550 | 		node->right = conditionBlock();
551 | 	}
552 | 
553 | 	return node;
554 | }
555 | 


--------------------------------------------------------------------------------
/src/SymbolTable.cpp:
--------------------------------------------------------------------------------
 1 | #include "SymbolTable.h"
 2 | 
 3 | bool SymbolTable::isSymbolExist(std::string symbol) {
 4 | 	int size = this->symbols.size();
 5 | 	for (int i=0;i<size;i++) {
 6 | 		if (symbol == this->symbols[i]) return true;
 7 | 	}
 8 | 	return false;
 9 | }
10 | 
11 | bool SymbolTable::addSymbol(std::string symbol) {
12 | 	if (this->isSymbolExist(symbol)) return false;
13 | 	this->symbols.push_back(symbol);	
14 | 	return true;
15 | }
16 | 
17 | int SymbolTable::getSize() {
18 | 	return this->symbols.size();	
19 | }
20 | 
21 | int SymbolTable::getSymbolIndex(std::string symbol) {
22 | 	int size = this->symbols.size();
23 | 	for (int i=0;i<size;i++) {
24 | 		if (symbol == this->symbols[i]) return i;
25 | 	}
26 | 	return -1;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/compiler.cpp:
--------------------------------------------------------------------------------
 1 | #include <Flexer.h>
 2 | #include <Parser.h>
 3 | #include <iostream>
 4 | #include <fstream>
 5 | #include <string>
 6 | #include <list>
 7 | //#include "lexer.h"
 8 | #include "ILexer.h"
 9 | #include "CodeGenerator.h"
10 | 
11 | std::string readFile(std::string fileName) {
12 | 	std::string line;
13 | 	std::string content;
14 | 	std::ifstream myfile(fileName);
15 | 
16 | 	if (myfile.is_open()) {
17 | 		while(std::getline(myfile,line)) {
18 | 			content += line + '\n';
19 | 		}
20 | 		myfile.close();
21 | 	}
22 | 
23 | 	return content;
24 | }
25 | 
26 | void traverse(Node* node) {
27 | 	std::cout << "type: " << node->type << ", value: " << node->value << std::endl;
28 | 	if (node->left) traverse(node->left);
29 | 	if (node->right) traverse(node->right);
30 | }
31 | 
32 | int main(int argc, char* argv[]) {
33 | 	using namespace std;
34 | 
35 |     if (argc < 3) {
36 |         cerr << "Usage: " << argv[0] << " infile.calc outfile.asm" << endl;
37 |         return 1;
38 |     }
39 |     // Print the user's name:
40 | 
41 | 	string content = readFile(argv[1]);
42 | 	if (content.empty()) {
43 | 		cout << "Error: bad filename or no content" << endl;
44 | 		return 1;
45 | 	}
46 | 
47 | 	cout << content;
48 | 	cout << "===============================" << endl;
49 | 
50 | 	//Lexer lexer;
51 | 	Flexer lexer;
52 | 	vector<Token*> *tokens = lexer.getTokens(content);
53 | 	
54 | 	for (unsigned long int i=0; i < tokens->size(); i++) {
55 | 		cout << tokens->at(i)->type << " " << tokens->at(i)->value << endl;
56 | 	}
57 | 
58 | 	Parser parser;
59 | 	Node* ast = parser.parse(tokens);
60 | 	cout << "+++++++++++++++++++++++++++++" << endl;
61 | 
62 | 	traverse(ast);
63 | 
64 | 	//*
65 | 	cout << "+++++ after traverse ast ++++" << endl;
66 | 	
67 | 	if (argv[2]) {
68 | 		CodeGenerator generator(argv[2]);
69 | 		generator.compile(ast);
70 | 	} else {
71 | 		CodeGenerator generator("output.asm");
72 | 		generator.compile(ast);
73 | 	}
74 | 	//*/
75 | 
76 | 	//delete(tokens);
77 | 	//delete(lexer);
78 | 	return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/src/flex/1.l:
--------------------------------------------------------------------------------
  1 | %{
  2 | 	#include <stdio.h>
  3 | 	#include <sstream>
  4 | 	#include "../include/Token.h"
  5 | 
  6 | 	typedef union{
  7 | 		int ival;
  8 | 		char *cval;
  9 | 	} yylval_type;
 10 | 
 11 | 	extern yylval_type yylval;
 12 | 	Token* token = nullptr;
 13 | 
 14 | 	char* commentStart;
 15 | %}
 16 | 
 17 | %option nounput
 18 | %option yylineno
 19 | %option noyywrap
 20 | %x COMMENT_MULTI
 21 | 
 22 | %%
 23 | 
 24 | \/\/.*?\n		; // one-line comment
 25 | 
 26 | <INITIAL>"/*" { 
 27 | 	/* begin of multi-line comment */
 28 | 	//commentStart = yytext; 
 29 | 	BEGIN(COMMENT_MULTI); 
 30 | }
 31 | 
 32 | <COMMENT_MULTI>"*/" { 
 33 | 	/* end of multi-line comment */
 34 | 	//char* comment = strndup(commentStart, yytext + 2 - commentStart);
 35 | 	//printf("'%s': was a multi-line comment\n", comment);
 36 | 	//free(comment); 
 37 | 	BEGIN(INITIAL); 
 38 | }
 39 | 
 40 | <COMMENT_MULTI>. { 
 41 | 	/* suppress whatever is in the comment */
 42 | }
 43 | 
 44 | "=="	{	
 45 | 			yylval.cval = yytext;	
 46 | 			return T_EQU;
 47 | 		}
 48 | "!<"	{	
 49 | 			yylval.cval = yytext;	
 50 | 			return T_NLESS;
 51 | 		}
 52 | "!>"	{	
 53 | 			yylval.cval = yytext;	
 54 | 			return T_NGREATER;
 55 | 		}
 56 | "!="	{	
 57 | 			yylval.cval = yytext;	
 58 | 			return T_NEQU;
 59 | 		}
 60 | "++"	{	
 61 | 			yylval.cval = yytext;	
 62 | 			return T_INC;
 63 | 		}
 64 | "--"	{	
 65 | 			yylval.cval = yytext;	
 66 | 			return T_DEC;
 67 | 		}
 68 | ">"		{	
 69 | 			yylval.cval = yytext;	
 70 | 			return T_GREATER;
 71 | 		}
 72 | "<"		{	
 73 | 			yylval.cval = yytext;	
 74 | 			return T_LESS;
 75 | 		}
 76 | ","		{	
 77 | 			yylval.cval = yytext;	
 78 | 			return T_COMMA;
 79 | 		}
 80 | "="		{	
 81 | 			yylval.cval = yytext;	
 82 | 			return T_ASSIGN;
 83 | 		}
 84 | ";"		{	
 85 | 			yylval.cval = yytext;	
 86 | 			return T_SEMICOLON;
 87 | 		}
 88 | "("		{	
 89 | 			yylval.cval = yytext;	
 90 | 			return T_LPAR;
 91 | 		}
 92 | ")"		{	
 93 | 			yylval.cval = yytext;	
 94 | 			return T_RPAR;
 95 | 		}
 96 | "{"		{	
 97 | 			yylval.cval = yytext;	
 98 | 			return T_LBRACE;
 99 | 		}
100 | "}"		{	
101 | 			yylval.cval = yytext;	
102 | 			return T_RBRACE;
103 | 		}
104 | "+"		{	
105 | 			yylval.cval = yytext;	
106 | 			return T_ADD;
107 | 		}
108 | "-"		{	
109 | 			yylval.cval = yytext;	
110 | 			return T_SUB;
111 | 		}
112 | "/"		{	
113 | 			yylval.cval = yytext;	
114 | 			return T_DIV;
115 | 		}
116 | "*"		{	
117 | 			yylval.cval = yytext;	
118 | 			return T_MUL;
119 | 		}
120 | fn		{	
121 | 			yylval.cval = yytext;	
122 | 			return T_FUNC;
123 | 		}
124 | ret		{	
125 | 			yylval.cval = yytext;	
126 | 			return T_RET;
127 | 		}
128 | for		{	
129 | 			yylval.cval = yytext;	
130 | 			return T_FOR;
131 | 		}
132 | if		{	
133 | 			yylval.cval = yytext;	
134 | 			return T_IF;
135 | 		}
136 | else	{	
137 | 			yylval.cval = yytext;	
138 | 			return T_ELSE;
139 | 		}
140 | print	{	
141 | 			yylval.cval = yytext;	
142 | 			return T_PRINT;
143 | 		}
144 | loop	{	
145 | 			yylval.cval = yytext;	
146 | 			return T_LOOP;
147 | 		}
148 | while	{	
149 | 			yylval.cval = yytext;	
150 | 			return T_WHILE;
151 | 		}
152 | do		{	
153 | 			yylval.cval = yytext;	
154 | 			return T_DO;
155 | 		}
156 | true	{	
157 | 			yylval.cval = yytext;	
158 | 			return T_BOOL;
159 | 		}
160 | false	{	
161 | 			yylval.cval = yytext;	
162 | 			return T_BOOL;
163 | 		}
164 | 
165 | [a-zA-Z][a-zA-Z0-9]*	{
166 | 							yylval.cval = yytext;	
167 | 							return T_ID;
168 | 						}
169 | 
170 | [0-9]+	{	
171 | 			yylval.ival = atoi(yytext);
172 | 			return T_NUMBER;
173 | 		}
174 | 
175 | [ \t\n]		{ // space 
176 | 			}
177 | 
178 | <<EOF>> {
179 | 			return 0;
180 | 		}
181 | 
182 | %%
183 | 
184 | 


--------------------------------------------------------------------------------
/src/include/CodeGenerator.h:
--------------------------------------------------------------------------------
 1 | #ifndef CODE_GENERATOR_H 
 2 | #define CODE_GENERATOR_H 
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <fstream>
 7 | 
 8 | #include "Node.h"
 9 | #include "SymbolTable.h"
10 | 
11 | class CodeGenerator{
12 | 	std::vector<std::string> variables;
13 | 	std::ofstream outfile;
14 | 	SymbolTable* currentSymbolTable = nullptr;
15 | 	Node* currentFunction = nullptr;
16 | 	int label = 0;
17 | 
18 | public:
19 | 	CodeGenerator(std::string ofile);
20 | 	~CodeGenerator();
21 | 	void compile(Node* tree);
22 | };
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/include/Flexer.h:
--------------------------------------------------------------------------------
 1 | #ifndef FLEXER_H
 2 | #define FLEXER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include "clexer.h"
 7 | #include "ILexer.h"
 8 | #include "Token.h"
 9 | 
10 | class Flexer : public ILexer {
11 | public:
12 | 	Flexer();
13 | 	virtual ~Flexer() {};
14 | 	std::vector<Token*>* getTokens(std::string content);
15 | };
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/include/ILexer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ILEXER_H
 2 | #define ILEXER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | 
 7 | #include "Token.h"
 8 | 
 9 | struct ILexer
10 | {
11 | 	virtual std::vector<Token*>* getTokens(std::string content) = 0;
12 | 	virtual ~ILexer() {};
13 | };
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/include/Lexer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LEXER_H
 2 | #define LEXER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include "ILexer.h"
 7 | #include "Token.h"
 8 | 
 9 | class Lexer : public ILexer {
10 | private:
11 | 	bool inToken;
12 | 	Token *token;
13 | 	void checkID();
14 | 	int position;
15 | 
16 | public:
17 | 	Lexer();
18 | 	virtual ~Lexer() {};
19 | 	std::vector<Token*>* getTokens(std::string content);
20 | };
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/include/Node.h:
--------------------------------------------------------------------------------
 1 | #ifndef NODE_H
 2 | #define NODE_H
 3 | 
 4 | #include <string>
 5 | #include "SymbolTable.h"
 6 | 
 7 | enum NodeType {
 8 | 	N_PROG=1,N_STATEMENT, N_NUMBER_C, N_STRING_C, N_ID, N_FUNC_CALL, N_FUNC, N_RET, N_ASSIGN, N_SEQ, N_CONDITION, N_ADD, N_SUB,
 9 | 	N_PRINT, N_MUL, N_DIV, N_BLOCK, N_IF, N_EQU, N_NEQU, N_LESS, N_NLESS, N_GREATER, N_NGREATER, N_LOOP
10 | };
11 | 
12 | class Node{
13 | public:
14 | 	Node *left = nullptr;
15 | 	Node *right = nullptr;
16 | 	SymbolTable *symbolTable = nullptr;
17 | 	NodeType type;
18 | 	std::string value;
19 | 	Node *args = nullptr;
20 | };
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/include/Parser.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARSER_H
 2 | #define PARSER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | 
 7 | #include "Node.h"
 8 | #include "SymbolTable.h"
 9 | #include "Token.h"
10 | 
11 | class Parser {
12 | 	std::vector<Token*> *t;
13 | 	std::vector<SymbolTable*> scopes;
14 | 	Node* prog = nullptr;
15 | 	uint ti = 0;
16 | 
17 | 	Token* nextToken();
18 | 	Token* getCurrentToken();
19 | 	void printError(std::string text);
20 | 	bool expect(TokenType type);
21 | 
22 | 	Node* statements();
23 | 	Node* statement();
24 | 	Node* expression();
25 | 	Node* term();
26 | 	Node* factor();
27 | 	Node* block();
28 | 	Node* condition();
29 | 	Node* conditionBlock();
30 | 	Node* functionArgs();
31 | 
32 | public:
33 | 	Parser();
34 | 	Node* parse(std::vector<Token*>*tokens);
35 | };
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/src/include/SymbolTable.h:
--------------------------------------------------------------------------------
 1 | #ifndef SYMBOL_TABLE_H
 2 | #define SYMBOL_TABLE_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | 
 7 | #include "Token.h"
 8 | 
 9 | class SymbolTable {
10 | 	std::vector<std::string> symbols;
11 | public:
12 | 	bool isSymbolExist(std::string symbol);
13 | 	bool addSymbol(std::string symbol);
14 | 	int getSize();
15 | 	int getSymbolIndex(std::string symbol);
16 | };
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/include/Token.h:
--------------------------------------------------------------------------------
 1 | #ifndef TOKEN_H
 2 | #define TOKEN_H
 3 | 
 4 | #include <string>
 5 | 
 6 | enum TokenType {
 7 | 	T_ID=1, T_ASSIGN, T_ENDOFSTATEMENT, T_NUMBER, T_SEMICOLON, T_LPAR, T_RPAR, T_LBRACE, T_FOR, 
 8 | 	T_RBRACE, T_ADD, T_SUB, T_MUL, T_DIV, T_STRING, T_IF, T_ELSE, T_DO, T_WHILE, T_FUNC, T_INC, T_DEC, T_RET, T_BOOL,
 9 | 	T_PRINT, T_COMMENT, T_MULTICOMMENT, T_COMMA, T_EQU, T_NLESS, T_NGREATER, T_NEQU, T_GREATER, T_LESS, T_LOOP 
10 | };
11 | 
12 | struct Token{
13 | 	TokenType type;
14 | 	std::string value;
15 | };
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/tests/comment.calc:
--------------------------------------------------------------------------------
 1 | // dsasdfaw
 2 | print(1+5*3);
 3 | //print(3*(5+1));
 4 | print((25/5/5)*5);
 5 | // pppp 
 6 | //
 7 | print(4444);
 8 | /*dasdf dsf
 9 | asdfsd
10 | dsffgsdfg
11 | sfdgg
12 | sdf
13 | gsdf
14 | gfgsdfgsdfgsdfgsdfg sdfgsfg
15 | sdfg sfdgsdf gsdf */   
16 | print(5555);
17 | 


--------------------------------------------------------------------------------
/tests/comment.test:
--------------------------------------------------------------------------------
1 | 16
2 | 5
3 | 4444
4 | 5555
5 | 


--------------------------------------------------------------------------------
/tests/expression.calc:
--------------------------------------------------------------------------------
1 | print(1+5*3);
2 | print(3*5+1);
3 | print(3*(5+1));
4 | print((5+1)*3);
5 | print((5/1*5)*5);
6 | print((25/5/5)*5);
7 | print(4*3+(25/5/5)*5/1 + 5/5);
8 | 


--------------------------------------------------------------------------------
/tests/expression.test:
--------------------------------------------------------------------------------
1 | 16
2 | 16
3 | 18
4 | 18
5 | 125
6 | 5
7 | 18
8 | 


--------------------------------------------------------------------------------
/tests/func-args.calc:
--------------------------------------------------------------------------------
1 | fn add(a, b, c) {
2 | 	ret a+b;
3 | }
4 | 
5 | i = 2;
6 | j = add(4*7-5*5, i+i*2-1, 10);
7 | print(j);
8 | 


--------------------------------------------------------------------------------
/tests/func-args.test:
--------------------------------------------------------------------------------
1 | 8
2 | 


--------------------------------------------------------------------------------
/tests/func-call.calc:
--------------------------------------------------------------------------------
 1 | fn get() {
 2 | 	k = 2+8;
 3 | 	i = 90;
 4 | 	ret i+k;
 5 | }
 6 | 
 7 | fn get2() {
 8 | 	b = 300;
 9 | 	ret 3+b;
10 | }
11 | 
12 | j = get();
13 | s = 0;
14 | k = get2();
15 | print(k);
16 | k = k;
17 | print(k);
18 | 


--------------------------------------------------------------------------------
/tests/func-call.test:
--------------------------------------------------------------------------------
1 | 303
2 | 303
3 | 


--------------------------------------------------------------------------------
/tests/if-else.calc:
--------------------------------------------------------------------------------
 1 | 
 2 | if(4 == 4) {
 3 | 	print(1);
 4 | } else {
 5 | 	print(2);
 6 | }
 7 | 
 8 | if(4 != 4) {
 9 | 	print(1);
10 | } else {
11 | 	print(2);
12 | }
13 | 
14 | j = 6;
15 | 
16 | if(j > 5) {
17 | 	print(11);
18 | } else if (j == 5) {
19 | 	print(12);
20 | } else {
21 | 	print(13);
22 | }
23 | 
24 | j = 5;
25 | 
26 | if(j > 5) {
27 | 	print(21);
28 | } (j == 5) {
29 | 	print(22);
30 | } else {
31 | 	print(23);
32 | }
33 | 
34 | j = 4;
35 | 
36 | if(j > 5) {
37 | 	print(31);
38 | } else (j == 5) {
39 | 	print(32);
40 | } else {
41 | 	print(33);
42 | }
43 | 
44 | print(100);
45 | 


--------------------------------------------------------------------------------
/tests/if-else.test:
--------------------------------------------------------------------------------
1 | 1
2 | 2
3 | 11
4 | 22
5 | 33
6 | 100
7 | 


--------------------------------------------------------------------------------
/tests/if.calc:
--------------------------------------------------------------------------------
 1 | fn get(a, b) {
 2 | 	i = b;
 3 | 	ret a+i;
 4 | }
 5 | 
 6 | j = get(6,44);
 7 | c = 10;
 8 | 
 9 | if (j == 30+c+9) {
10 | 	print(1);
11 | }
12 | 
13 | if (j == 5*c) {
14 | 	k = 2;
15 | 	print(k);
16 | }
17 | 
18 | if (j == 5+5*c-4) {
19 | 	print(3);
20 | }
21 | 
22 | if (j != 49) {
23 | 	print(4);
24 | }
25 | 
26 | if (j != 50) {
27 | 	print(5);
28 | }
29 | 
30 | if (j != 51) {
31 | 	print(6);
32 | }
33 | 
34 | if (j > 49) {
35 | 	print(7);
36 | }
37 | 
38 | if (j > 50) {
39 | 	print(8);
40 | }
41 | 
42 | if (j > 51) {
43 | 	print(9);
44 | }
45 | 
46 | if (j !> 49) {
47 | 	print(10);
48 | }
49 | 
50 | if (j !> 50) {
51 | 	print(11);
52 | }
53 | 
54 | if (j !> 51) {
55 | 	print(12);
56 | }
57 | 
58 | if (j < 49) {
59 | 	print(13);
60 | }
61 | 
62 | if (j < 50) {
63 | 	print(14);
64 | }
65 | 
66 | if (j < 51) {
67 | 	print(15);
68 | }
69 | 
70 | if (j !< 49) {
71 | 	print(16);
72 | }
73 | 
74 | if (j !< 50) {
75 | 	print(17);
76 | }
77 | 
78 | if (j !< 51) {
79 | 	print(18);
80 | }
81 | 
82 | print(100);
83 | 


--------------------------------------------------------------------------------
/tests/if.test:
--------------------------------------------------------------------------------
 1 | 2
 2 | 4
 3 | 6
 4 | 7
 5 | 11
 6 | 12
 7 | 15
 8 | 16
 9 | 17
10 | 100
11 | 


--------------------------------------------------------------------------------
/tests/loop.calc:
--------------------------------------------------------------------------------
 1 | loop(2) {
 2 | 	print(1);
 3 | }
 4 | 
 5 | loop(4*4-6*2-1) {
 6 | 	print(2);
 7 | }
 8 | 
 9 | loop(5*4-8*2) {
10 | 	print(3);
11 | }
12 | 
13 | print(100);
14 | 


--------------------------------------------------------------------------------
/tests/loop.test:
--------------------------------------------------------------------------------
 1 | 1
 2 | 1
 3 | 2
 4 | 2
 5 | 2
 6 | 3
 7 | 3
 8 | 3
 9 | 3
10 | 100
11 | 


--------------------------------------------------------------------------------