├── .gitignore ├── CMakeLists.txt ├── Makefile ├── README.md ├── src ├── CMakeLists.txt ├── CodeGenerator.cpp ├── Flexer.cpp ├── Lexer.cpp ├── Parser.cpp ├── SymbolTable.cpp ├── compiler.cpp ├── flex │ └── 1.l └── include │ ├── CodeGenerator.h │ ├── Flexer.h │ ├── ILexer.h │ ├── Lexer.h │ ├── Node.h │ ├── Parser.h │ ├── SymbolTable.h │ └── Token.h └── tests ├── comment.calc ├── comment.test ├── expression.calc ├── expression.test ├── func-args.calc ├── func-args.test ├── func-call.calc ├── func-call.test ├── if-else.calc ├── if-else.test ├── if.calc ├── if.test ├── loop.calc └── loop.test /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | src/flex/* 3 | !src/flex/*.lex 4 | tests/* 5 | !tests/*.calc 6 | !tests/*.test 7 | *.swp 8 | *.o 9 | compiler 10 | .cproject 11 | .project 12 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.6) 2 | 3 | project (COMPILER) 4 | 5 | set(CMAKE_CXX_STANDARD 17) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | add_compile_options(-Wall -Wextra -pedantic) 8 | 9 | add_subdirectory(src) 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | APP = compiler 2 | CXX = g++ 3 | FLAGS = -std=c++17 -Wall -I ./src/include -g 4 | SRC_DIR = src 5 | SRC = $(wildcard $(SRC_DIR)/*.cpp) 6 | BIN_DIR = build 7 | 8 | EXT = calc 9 | TSTDIR = tests 10 | TESTS = $(wildcard $(TSTDIR)/*.$(EXT)) 11 | TESTS := $(TESTS:%.$(EXT)=%) 12 | 13 | OBJ := $(SRC:.cpp=.o) 14 | OBJ := $(OBJ:$(SRC_DIR)/%=%) 15 | OBJ := $(addprefix $(BIN_DIR)/,$(OBJ)) 16 | 17 | .PHONY: all clean test test-clean 18 | 19 | all: lexer $(APP) 20 | 21 | $(BIN_DIR): 22 | mkdir -p $(BIN_DIR) 23 | $(APP): $(BIN_DIR) $(OBJ) lexer 24 | $(CXX) $(FLAGS) $(OBJ) build/clexer.o -o $@ 25 | $(BIN_DIR)/%.o: $(SRC_DIR)/%.cpp 26 | $(CXX) $(FLAGS) -c $< -o $@ 27 | clean: test-clean 28 | rm -rf $(BIN_DIR) $(APP) src/flex/lex.yy.cpp src/flex/clexer.h 29 | test-clean: 30 | rm -f $(TSTDIR)/*.asm $(TSTDIR)/*.out $(TSTDIR)/*.o $(TESTS) 31 | 32 | test: $(TESTS) 33 | @for item in $(TESTS); do \ 34 | ./$$item > $$item.out; \ 35 | if diff -u ./$$item.test ./$$item.out; then echo "Test $$item is OK"; else echo "Warning!!! Test $$item is failed!!!"; fi \ 36 | done 37 | 38 | $(TESTS): % : %.$(EXT) %.test 39 | ./$(APP) $< $@.asm 40 | nasm -f elf $@.asm 41 | ld -melf_i386 $@.o -o $@ 42 | 43 | lexer: src/flex/1.lex $(BIN_DIR) 44 | flex --header-file=src/flex/clexer.h -o src/flex/lex.yy.cpp src/flex/1.lex 45 | g++ -std=c++17 src/flex/lex.yy.cpp -c -o build/clexer.o 46 | 47 | #ld -lc -melf_i386 -dynamic-linker /lib/ld-linux.so.2 $< -o $@ 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compiler 2 | 3 | Released: 4 | - variables, simple scope 5 | - functions, print function 6 | - calculation: addition, subtruction, multiplication, division 7 | - if-else (simple, ==, !=, <, !<, >, !>) 8 | - loop(only calculated expression without variables) 9 | - only integers as operands and arguments for functions 10 | 11 | It uses `ld` and `nasm` as backend. 12 | 13 | Build the compiler: 14 | ``` 15 | make 16 | ``` 17 | 18 | Build and run tests: 19 | 20 | ``` 21 | make test 22 | ``` 23 | 24 | Examples of code in \*.calc files of tests directory 25 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | find_package(FLEX REQUIRED) 3 | flex_target(lexer flex/1.l ${CMAKE_CURRENT_BINARY_DIR}/lex.yy.cpp 4 | DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/clexer.h 5 | ) 6 | 7 | set(APP compiler) 8 | include_directories(include ${CMAKE_CURRENT_BINARY_DIR}) 9 | add_executable(${APP} ${APP}.cpp CodeGenerator.cpp Flexer.cpp Parser.cpp SymbolTable.cpp ${FLEX_lexer_OUTPUTS}) -------------------------------------------------------------------------------- /src/CodeGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "CodeGenerator.h" 3 | 4 | CodeGenerator::CodeGenerator(std::string ofile) { 5 | this->outfile.open(ofile); 6 | } 7 | 8 | CodeGenerator::~CodeGenerator() { 9 | outfile.close(); 10 | } 11 | 12 | 13 | void CodeGenerator::compile(Node* ast) { 14 | using namespace std; 15 | if (!ast) return; 16 | 17 | switch(ast->type) { 18 | case NodeType::N_LOOP: { 19 | Node* condition = ast->args; 20 | 21 | outfile << "; LOOP COND" << endl; 22 | 23 | if ( condition->type == NodeType::N_NUMBER_C || condition->type == NodeType::N_ID) 24 | outfile << "mov eax, "; 25 | compile(condition); 26 | outfile << endl; 27 | outfile << "mov ecx, eax" << endl; 28 | 29 | int labelCount = ++this->label; 30 | 31 | // block 32 | outfile << ".L"<< labelCount << ": ; LOOP BEGIN" << endl; 33 | outfile << "push ecx" << endl; 34 | 35 | compile(ast->left); 36 | 37 | outfile << "pop ecx" << endl; 38 | outfile << "dec ecx" << endl; 39 | outfile << "jnz .L"<< labelCount << " ; LOOP END" << endl; 40 | outfile << endl; 41 | } 42 | break; 43 | 44 | case NodeType::N_SEQ: 45 | { 46 | compile(ast->left); 47 | compile(ast->right); 48 | outfile << endl; 49 | } 50 | break; 51 | 52 | case NodeType::N_IF: 53 | { 54 | outfile << "; IF " << endl; 55 | 56 | Node* condition = ast->args; 57 | 58 | if ( condition->left->type == NodeType::N_NUMBER_C || condition->left->type == NodeType::N_ID) 59 | outfile << "mov eax, "; 60 | compile(condition->left); 61 | outfile << endl; 62 | outfile << "mov edx, eax" << endl; 63 | 64 | if ( condition->right->type == NodeType::N_NUMBER_C || condition->right->type == NodeType::N_ID) 65 | outfile << "mov eax, "; 66 | compile(condition->right); 67 | outfile << endl; 68 | outfile << "cmp edx, eax" << endl; 69 | 70 | this->label++; 71 | 72 | switch(condition->type) { 73 | case NodeType::N_EQU: 74 | outfile << "jnz .L" << this->label << endl; 75 | break; 76 | 77 | case NodeType::N_NEQU: 78 | outfile << "jz .L" << this->label << endl; 79 | break; 80 | 81 | case NodeType::N_LESS: 82 | outfile << "jnl .L" << this->label << endl; 83 | break; 84 | 85 | case NodeType::N_NLESS: 86 | outfile << "jl .L" << this->label << endl; 87 | break; 88 | 89 | case NodeType::N_GREATER: 90 | outfile << "jng .L" << this->label << endl; 91 | break; 92 | 93 | case NodeType::N_NGREATER: 94 | outfile << "jg .L" << this->label << endl; 95 | break; 96 | 97 | default: 98 | cout << "Error in compile if-condition" << endl; 99 | exit(1); 100 | break; 101 | } 102 | 103 | int endLabel = this->label; 104 | compile(ast->left); 105 | 106 | if (ast->right) { 107 | outfile << "jmp .LE"<< this->label << endl; 108 | } 109 | 110 | outfile << ".L"<< this->label << ": ; END IF" << endl; 111 | // ELSE begins 112 | if (ast->right) { 113 | compile(ast->right); 114 | outfile << ".LE"<< endLabel << ": ; END ELSE-IF" << endl; 115 | } 116 | } 117 | break; 118 | 119 | case NodeType::N_BLOCK: 120 | { 121 | SymbolTable *temp = this->currentSymbolTable; 122 | this->currentSymbolTable = ast->symbolTable; 123 | 124 | outfile << "push ebp" << endl; 125 | outfile << "mov ebp, esp" << endl; 126 | outfile << "sub esp, " << 4 * this->currentSymbolTable->getSize() << endl; 127 | 128 | compile(ast->left); 129 | 130 | outfile << "leave" << endl; 131 | this->currentSymbolTable = temp; 132 | } 133 | break; 134 | 135 | case NodeType::N_ASSIGN: 136 | { 137 | // find symbol in current symbol table 138 | SymbolTable* table = this->currentSymbolTable; 139 | int index = table->getSymbolIndex(ast->left->value); 140 | 141 | if ( ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) 142 | outfile << "mov eax, "; 143 | 144 | compile(ast->right); 145 | 146 | outfile << endl; 147 | outfile << "mov DWORD [ebp-" << 4*(index+1) << "], eax" << endl; 148 | outfile << endl; 149 | } 150 | 151 | break; 152 | 153 | case NodeType::N_ADD: 154 | if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 155 | && (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){ 156 | outfile << "mov eax, "; 157 | compile(ast->left); 158 | 159 | outfile << endl; 160 | 161 | if (ast->right->value == "1") { 162 | outfile << "inc eax"; 163 | } else { 164 | outfile << "add eax, "; 165 | compile(ast->right); 166 | } 167 | outfile << endl; 168 | 169 | } else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) { 170 | compile(ast->right); 171 | 172 | if (ast->left->value == "1") { 173 | outfile << "inc eax"; 174 | } else { 175 | outfile << "add eax, "; 176 | compile(ast->left); 177 | } 178 | 179 | outfile << endl; 180 | 181 | } else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) { 182 | compile(ast->left); 183 | 184 | if (ast->right->value == "1") { 185 | outfile << "inc eax"; 186 | } else { 187 | outfile << "add eax, "; 188 | compile(ast->right); 189 | } 190 | 191 | outfile << endl; 192 | 193 | } else { 194 | compile(ast->left); 195 | outfile << "push eax" << endl; 196 | compile(ast->right); 197 | outfile << "pop ebx" << endl; 198 | outfile << "add eax, ebx" << endl; 199 | } 200 | break; 201 | 202 | case NodeType::N_SUB: 203 | if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 204 | && (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){ 205 | outfile << "mov eax, "; 206 | compile(ast->left); 207 | outfile << endl; 208 | 209 | if (ast->right->value == "1") { 210 | outfile << "dec eax"; 211 | } else { 212 | outfile << "sub eax, "; 213 | compile(ast->right); 214 | } 215 | 216 | outfile << endl; 217 | 218 | } else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) { 219 | compile(ast->right); 220 | 221 | if (ast->left->value == "1") { 222 | outfile << "dec eax"; 223 | } else { 224 | outfile << "sub eax, "; 225 | compile(ast->left); 226 | } 227 | 228 | outfile << endl; 229 | outfile << "neg eax" << endl; 230 | 231 | } else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) { 232 | compile(ast->left); 233 | 234 | if (ast->right->value == "1") { 235 | outfile << "dec eax"; 236 | } else { 237 | outfile << "sub eax, "; 238 | compile(ast->right); 239 | } 240 | 241 | outfile << endl; 242 | 243 | } else { 244 | compile(ast->right); 245 | outfile << "push eax" << endl; 246 | compile(ast->left); 247 | outfile << "pop ebx" << endl; 248 | outfile << "sub eax, ebx" << endl; 249 | } 250 | break; 251 | 252 | case NodeType::N_MUL: 253 | if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 254 | && (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){ 255 | outfile << "mov eax, "; 256 | compile(ast->left); 257 | outfile << endl << "imul eax, "; 258 | compile(ast->right); 259 | outfile << endl; 260 | 261 | } else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) { 262 | compile(ast->right); 263 | outfile << "imul eax, "; 264 | compile(ast->left); 265 | outfile << endl; 266 | 267 | } else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) { 268 | compile(ast->left); 269 | outfile << "imul eax, "; 270 | compile(ast->right); 271 | outfile << endl; 272 | 273 | } else { 274 | compile(ast->left); 275 | outfile << "push eax" << endl; 276 | compile(ast->right); 277 | outfile << "pop ebx" << endl; 278 | outfile << "imul eax, ebx" << endl; 279 | } 280 | break; 281 | 282 | case NodeType::N_DIV: 283 | if ( (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 284 | && (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) ){ 285 | outfile << "mov eax, "; 286 | compile(ast->left); 287 | outfile << endl; 288 | outfile << "mov ebx, "; 289 | compile(ast->right); 290 | outfile << endl; 291 | outfile << "xor edx, edx" << endl; 292 | outfile << "idiv ebx" << endl; 293 | 294 | } else if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) { 295 | compile(ast->right); 296 | outfile << "mov ebx, eax" << endl; 297 | outfile << "mov eax, "; 298 | compile(ast->left); 299 | outfile << endl; 300 | outfile << "xor edx, edx" << endl; 301 | outfile << "idiv ebx" << endl; 302 | 303 | } else if (ast->right->type == NodeType::N_NUMBER_C || ast->right->type == NodeType::N_ID) { 304 | compile(ast->left); 305 | outfile << "mov ebx, "; 306 | compile(ast->right); 307 | outfile << endl; 308 | outfile << "xor edx, edx" << endl; 309 | outfile << "idiv ebx" << endl; 310 | 311 | } else { 312 | compile(ast->right); 313 | outfile << "push eax" << endl; 314 | compile(ast->left); 315 | outfile << "pop ebx" << endl; 316 | outfile << "xor edx, edx" << endl; 317 | outfile << "idiv ebx" << endl; 318 | } 319 | break; 320 | 321 | case NodeType::N_PRINT: 322 | if (ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) { 323 | outfile << "mov eax, "; 324 | } 325 | 326 | compile(ast->left); 327 | 328 | outfile << endl; 329 | 330 | //* 331 | //outfile << "push eax" << endl; 332 | outfile << "call print" << endl; 333 | //*/ 334 | 335 | /* 336 | outfile << "push eax" << endl; 337 | outfile << "push message2" << endl; 338 | outfile << "call printf" << endl; 339 | outfile << "add esp, 8" << endl; 340 | //*/ 341 | break; 342 | 343 | case NodeType::N_ID: 344 | { 345 | SymbolTable* table = this->currentSymbolTable; 346 | int index = table->getSymbolIndex(ast->value); 347 | if (index >= 0) { 348 | outfile << "DWORD [ebp-" << (4*index) + 4 << "]"; 349 | } else { 350 | // check input params in current function 351 | 352 | Node* args = this->currentFunction->args; 353 | index = 0; 354 | bool inFunctionArgs = false; 355 | 356 | while(args) { 357 | if (args->value == ast->value) { 358 | inFunctionArgs = true; 359 | break; 360 | } 361 | args = args->args; 362 | index++; 363 | } 364 | 365 | if (inFunctionArgs){ 366 | outfile << "DWORD [ebp+" << (4*index) + 8 << "]"; 367 | } else { 368 | cout << "Unexpected variable \"" << ast->value << "\" in compile:N_ID" << endl; 369 | exit(1); 370 | } 371 | } 372 | } 373 | break; 374 | 375 | case NodeType::N_NUMBER_C: 376 | outfile << ast->value; 377 | break; 378 | 379 | case NodeType::N_FUNC: 380 | this->currentFunction = ast; 381 | 382 | compile(ast->right); 383 | 384 | // prologue 385 | outfile << "fn_" << ast->value << ":" << endl << endl; 386 | 387 | compile(ast->left); 388 | 389 | // epilogue 390 | outfile << "ret" << endl; 391 | outfile << endl; 392 | break; 393 | 394 | case NodeType::N_RET: 395 | if ( ast->left->type == NodeType::N_NUMBER_C || ast->left->type == NodeType::N_ID) 396 | outfile << "mov eax, "; 397 | compile(ast->left); 398 | break; 399 | 400 | case NodeType::N_FUNC_CALL: 401 | // prologue 402 | { 403 | Node* temp = ast; 404 | int count = 0; 405 | while(temp->args) { 406 | count++; 407 | 408 | if ( temp->args->type == NodeType::N_NUMBER_C || temp->args->type == NodeType::N_ID) { 409 | outfile << "push "; 410 | this->compile(temp->args); 411 | } else { 412 | this->compile(temp->args); 413 | outfile << endl; 414 | outfile << "push eax"; 415 | } 416 | 417 | outfile << endl; 418 | temp = temp->args; 419 | } 420 | 421 | outfile << "call fn_" << ast->value << endl; 422 | 423 | // free arguments from stack 424 | if (count > 0) { 425 | outfile << "add esp, " << count*4 << endl; 426 | } 427 | } 428 | break; 429 | 430 | case NodeType::N_PROG: 431 | { 432 | this->currentFunction = ast; 433 | this->currentSymbolTable = ast->symbolTable; 434 | // prologue 435 | outfile << "BITS 32" << endl; 436 | outfile << "global _start" << endl; 437 | //outfile << "extern printf" << endl; 438 | outfile << endl; 439 | 440 | //outfile << "section .data" << endl; 441 | //outfile << "message2 db \"%d\",10,0" << endl; 442 | //outfile << "msg TIMES 20 db 0" << endl; 443 | //outfile << "msg2 db 0" << endl; 444 | //outfile << "len equ $-msg " << endl; 445 | //outfile << "section .bss" << endl; 446 | //outfile << "msg resb 40" << endl; 447 | //outfile << "msg2 resb 0" << endl; 448 | outfile << endl; 449 | 450 | outfile << "section .text" << endl; 451 | 452 | outfile << ";==== FUNCTIONS ======" << endl; 453 | compile(ast->right); 454 | outfile << ";==== FUNCTIONS ======" << endl; 455 | 456 | outfile << "_start:" << endl << endl; 457 | 458 | SymbolTable *temp = this->currentSymbolTable; 459 | this->currentSymbolTable = ast->symbolTable; 460 | 461 | outfile << "push ebp" << endl; 462 | outfile << "mov ebp, esp" << endl; 463 | outfile << "sub esp, " << 4 * this->currentSymbolTable->getSize() << endl; 464 | 465 | compile(ast->left); 466 | 467 | outfile << "leave" << endl; 468 | this->currentSymbolTable = temp; 469 | 470 | // exit 471 | outfile << "mov eax, 1" << endl; 472 | outfile << "xor ebx, ebx" << endl; 473 | outfile << "int 0x80" << endl; 474 | outfile << endl; 475 | 476 | // print function : prints a decimal number in eax register with a new line 477 | outfile << "print:" << endl; 478 | outfile << "mov edi, 1" << endl; 479 | outfile << "mov ecx, esp" << endl; 480 | outfile << "mov ebx, 10" << endl; 481 | outfile << "dec ecx" << endl; 482 | outfile << "mov [ecx], bl" << endl; 483 | 484 | outfile << "print_loop:" << endl; 485 | outfile << "xor edx, edx" << endl; 486 | outfile << "idiv ebx" << endl; 487 | outfile << "add dl, '0'" << endl; 488 | outfile << "dec ecx" << endl; 489 | outfile << "inc edi" << endl; 490 | outfile << "mov [ecx],dl" << endl; 491 | outfile << "test eax, eax" << endl; 492 | outfile << "jnz print_loop" << endl; 493 | 494 | outfile << "mov eax, 4" << endl; 495 | outfile << "mov ebx, 1" << endl; 496 | outfile << "mov edx, edi" << endl; 497 | outfile << "int 0x80" << endl; 498 | 499 | outfile << "ret" << endl; 500 | } 501 | break; 502 | //* 503 | default: 504 | break; 505 | //*/ 506 | 507 | } 508 | } 509 | 510 | -------------------------------------------------------------------------------- /src/Flexer.cpp: -------------------------------------------------------------------------------- 1 | #include "include/Flexer.h" 2 | 3 | #include 4 | 5 | // TODO: remove it or change 6 | typedef union { 7 | int ival; 8 | char *cval; 9 | } yylval_type; 10 | 11 | yylval_type yylval; 12 | 13 | Flexer::Flexer() { 14 | 15 | } 16 | 17 | std::vector* Flexer::getTokens(std::string content) { 18 | std::vector *result = new std::vector(); 19 | int p = 0; 20 | Token *token; 21 | 22 | YY_BUFFER_STATE buff = yy_scan_string(content.c_str()); 23 | while( (p = yylex()) != 0) { 24 | token = new Token(); 25 | token->type = (TokenType)p; 26 | 27 | switch(p) { 28 | case T_NUMBER: 29 | token->value = std::to_string(yylval.ival); 30 | result->push_back(token); 31 | break; 32 | 33 | default: 34 | token->value = yylval.cval; 35 | result->push_back(token); 36 | break; 37 | } 38 | 39 | yylval.cval = (char*)""; 40 | yylval.ival = 0; 41 | } 42 | 43 | yy_delete_buffer(buff); 44 | 45 | return result; 46 | } 47 | -------------------------------------------------------------------------------- /src/Lexer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "include/Lexer.h" 4 | 5 | Lexer::Lexer() { 6 | this->inToken = false; 7 | this->position = 0; 8 | this->token = nullptr; 9 | } 10 | 11 | void Lexer::checkID() { 12 | if (token->type == TokenType::T_ID) { 13 | if (token->value == "if") { 14 | token->type = TokenType::T_IF; 15 | } else if (token->value == "else") { 16 | token->type = TokenType::T_ELSE; 17 | } else if (token->value == "while") { 18 | token->type = TokenType::T_WHILE; 19 | } else if (token->value == "fn") { 20 | token->type = TokenType::T_FUNC; 21 | } else if (token->value == "ret") { 22 | token->type = TokenType::T_RET; 23 | } else if (token->value == "true") { 24 | token->type = TokenType::T_BOOL; 25 | } else if (token->value == "false") { 26 | token->type = TokenType::T_BOOL; 27 | } else if (token->value == "for") { 28 | token->type = TokenType::T_FOR; 29 | } else if (token->value == "do") { 30 | token->type = TokenType::T_DO; 31 | } else if (token->value == "print") { 32 | token->type = TokenType::T_PRINT; 33 | } 34 | } 35 | } 36 | 37 | std::vector* Lexer::getTokens(std::string content) { 38 | 39 | std::vector *result = new std::vector(); 40 | this->position = 0; 41 | 42 | std::string temp = ""; 43 | int size = content.length(); 44 | for (int i = 0; itype == TokenType::T_COMMENT) { 48 | if (c == '\n') { 49 | inToken = false; 50 | } 51 | continue; 52 | // create previous token 53 | } else if (token->type == TokenType::T_MULTICOMMENT) { 54 | if (c == '*') { 55 | if ( i+1 < size && content[i+1] == '/') { 56 | i = i+1; 57 | inToken = false; 58 | } 59 | } 60 | continue; 61 | 62 | } else if (c == '\"') { 63 | if (token->type == TokenType::T_STRING) { 64 | inToken = false; 65 | result->push_back(token); 66 | continue; 67 | } 68 | 69 | this->checkID(); 70 | 71 | } else if (c == '+') { 72 | 73 | if (token->type == TokenType::T_ADD) { 74 | token->type = TokenType::T_INC; 75 | token->value += c; 76 | result->push_back(token); 77 | inToken = false; 78 | continue; 79 | } 80 | 81 | } else if (c == '-') { 82 | 83 | if (token->type == TokenType::T_SUB) { 84 | token->type = TokenType::T_DEC; 85 | token->value += c; 86 | result->push_back(token); 87 | inToken = false; 88 | continue; 89 | } 90 | 91 | } else if (c >= 'a' && c <= 'z') { 92 | 93 | if (token->type == TokenType::T_ID || token->type == TokenType::T_STRING) { 94 | token->value += c; 95 | continue; 96 | } 97 | 98 | } else if (c >= '0' && c <= '9') { 99 | 100 | if (token->type == TokenType::T_NUMBER || token->type == TokenType::T_ID || token->type == TokenType::T_STRING) { 101 | token->value += c; 102 | continue; 103 | } 104 | 105 | } else { 106 | //other symbols 107 | if (token->type == TokenType::T_STRING) { 108 | token->value += c; 109 | continue; 110 | } 111 | 112 | this->checkID(); 113 | 114 | } 115 | 116 | result->push_back(token); 117 | } 118 | 119 | if (c == '=') { 120 | token = new Token(); 121 | token->type = TokenType::T_ASSIGN; 122 | token->value = c; 123 | result->push_back(token); 124 | inToken = false; 125 | 126 | } else if (c == '(') { 127 | token = new Token(); 128 | token->type = TokenType::T_LPAR; 129 | token->value = c; 130 | result->push_back(token); 131 | inToken = false; 132 | 133 | } else if (c == ')') { 134 | token = new Token(); 135 | token->type = TokenType::T_RPAR; 136 | token->value = c; 137 | result->push_back(token); 138 | inToken = false; 139 | 140 | } else if (c == '{') { 141 | token = new Token(); 142 | token->type = TokenType::T_LBRACE; 143 | token->value = c; 144 | result->push_back(token); 145 | inToken = false; 146 | 147 | } else if (c == '}') { 148 | token = new Token(); 149 | token->type = TokenType::T_RBRACE; 150 | token->value = c; 151 | result->push_back(token); 152 | inToken = false; 153 | 154 | } else if (c == '+') { 155 | token = new Token(); 156 | token->type = TokenType::T_ADD; 157 | token->value = c; 158 | inToken = true; 159 | 160 | } else if (c == '-') { 161 | token = new Token(); 162 | token->type = TokenType::T_SUB; 163 | token->value = c; 164 | inToken = true; 165 | 166 | } else if (c == '*') { 167 | token = new Token(); 168 | token->type = TokenType::T_MUL; 169 | token->value = c; 170 | result->push_back(token); 171 | inToken = false; 172 | 173 | } else if (c == '/') { 174 | if (i+1 < size) { 175 | if (content[i+1] == '/') { 176 | token = new Token(); 177 | token->type = TokenType::T_COMMENT; 178 | token->value = "COMMENT"; 179 | inToken = true; 180 | continue; 181 | } else if (content[i+1] == '*') { 182 | token = new Token(); 183 | token->type = TokenType::T_MULTICOMMENT; 184 | token->value = "MULTICOMMENT"; 185 | inToken = true; 186 | continue; 187 | } 188 | } 189 | 190 | token = new Token(); 191 | token->type = TokenType::T_DIV; 192 | token->value = c; 193 | result->push_back(token); 194 | inToken = false; 195 | 196 | } else if (c == ';') { 197 | token = new Token(); 198 | token->type = TokenType::T_SEMICOLON; 199 | token->value = c; 200 | result->push_back(token); 201 | inToken = false; 202 | 203 | } else if (c == '\"') { 204 | token = new Token(); 205 | token->type = TokenType::T_STRING; 206 | token->value = ""; 207 | inToken = true; 208 | 209 | } else if (c >= 'a' && c <= 'z') { 210 | token = new Token(); 211 | token->type = TokenType::T_ID; 212 | token->value = c; 213 | inToken = true; 214 | 215 | } else if (c >= '0' && c <= '9') { 216 | token = new Token(); 217 | token->type = TokenType::T_NUMBER; 218 | token->value = c; 219 | inToken = true; 220 | 221 | } else { 222 | // other symbols 223 | inToken = false; 224 | } 225 | 226 | } 227 | 228 | return result; 229 | } 230 | -------------------------------------------------------------------------------- /src/Parser.cpp: -------------------------------------------------------------------------------- 1 | #include "include/Parser.h" 2 | 3 | #include 4 | 5 | Parser::Parser() { 6 | this->t = nullptr; 7 | } 8 | 9 | Token* Parser::nextToken() { 10 | ti++; 11 | if(ti < t->size()) { 12 | return t->at(ti); 13 | } 14 | 15 | return nullptr; 16 | } 17 | 18 | Token* Parser::getCurrentToken() { 19 | if (ti < t->size()) { 20 | return t->at(ti); 21 | } 22 | return nullptr; 23 | } 24 | 25 | bool Parser::expect(TokenType type) { 26 | 27 | Token* token = getCurrentToken(); 28 | if (token->type == type) { 29 | nextToken(); 30 | return true; 31 | } 32 | 33 | return false; 34 | } 35 | 36 | void Parser::printError(std::string text) { 37 | std::cout << "Error: " << text << std::endl; 38 | exit(1); 39 | } 40 | 41 | Node* Parser::parse(std::vector *tokens) { 42 | using std::cout; 43 | using std::endl; 44 | 45 | t = tokens; 46 | 47 | Node* prog = new Node(); 48 | prog->type = NodeType::N_PROG; 49 | prog->value = "PROG"; 50 | prog->symbolTable = new SymbolTable(); 51 | this->scopes.push_back(prog->symbolTable); 52 | 53 | this->prog = prog; 54 | prog->left = statements(); 55 | 56 | this->scopes.pop_back(); 57 | 58 | cout << "parse: end of parsing" << endl; 59 | 60 | return prog; 61 | } 62 | 63 | Node* Parser::statements() { 64 | 65 | Node* temp; 66 | Node* node = nullptr; 67 | Token* token = getCurrentToken(); 68 | 69 | while(token && token->type != TokenType::T_RBRACE) { 70 | temp = node; 71 | 72 | node = new Node(); 73 | node->type = NodeType::N_SEQ; 74 | node->value = "SEQ"; 75 | node->left = temp; 76 | node->right = statement(); 77 | token = getCurrentToken(); 78 | } 79 | 80 | return node; 81 | } 82 | 83 | 84 | Node* Parser::statement() { 85 | using std::cout; 86 | using std::endl; 87 | 88 | Node* node = nullptr; 89 | Token *token = getCurrentToken(); 90 | if (!token) { 91 | printError("A token expected in statement"); 92 | } 93 | 94 | switch(token->type) { 95 | case TokenType::T_LOOP: { 96 | token = nextToken(); 97 | if (!expect(TokenType::T_LPAR)) { 98 | printError("A left parenthes expected in loop-statement."); 99 | } 100 | 101 | node = new Node(); 102 | node->type = NodeType::N_LOOP; 103 | node->value = token->value; 104 | node->args = expression(); 105 | 106 | if (!expect(TokenType::T_RPAR)) { 107 | printError("A right parenthes expected in loop-statement."); 108 | } 109 | 110 | node->left = block(); 111 | } 112 | break; 113 | 114 | case TokenType::T_FUNC: 115 | { 116 | token = nextToken(); 117 | 118 | if (!token || token->type != TokenType::T_ID) { 119 | printError("An id expected in function statement."); 120 | } 121 | 122 | node = new Node(); 123 | node->type = NodeType::N_FUNC; 124 | node->value = token->value; // id 125 | 126 | nextToken(); 127 | 128 | if (!expect(TokenType::T_LPAR)) { 129 | printError("A left parenthes expected in function statement."); 130 | } 131 | 132 | //////////////////////////////////////////// 133 | 134 | Node* func = node; 135 | Node* temp = node; 136 | token = getCurrentToken(); 137 | 138 | while(token && token->type == TokenType::T_ID) { 139 | node = new Node(); 140 | node->type = NodeType::N_ID; // FUNC_ARG 141 | node->value = token->value; 142 | 143 | temp->args = node; 144 | temp = temp->args; 145 | 146 | nextToken(); 147 | 148 | if (!expect(TokenType::T_COMMA)) { 149 | break; 150 | } 151 | 152 | token = getCurrentToken(); 153 | } 154 | 155 | //////////////////////////////////////////// 156 | 157 | if (!expect(TokenType::T_RPAR)) { 158 | printError("A right parenthes expected in function statement."); 159 | } 160 | 161 | func->left = block(); 162 | temp = prog->right; 163 | prog->right = func; 164 | func->right = temp; 165 | node = nullptr; 166 | } 167 | 168 | break; 169 | 170 | case TokenType::T_RET: 171 | { 172 | nextToken(); 173 | 174 | node = new Node(); 175 | node->type = NodeType::N_RET; 176 | node->value = "RET"; 177 | node->left = expression(); 178 | 179 | if (!node->left) { 180 | printError("An expression expected in return statement."); 181 | } 182 | 183 | if (!expect(TokenType::T_SEMICOLON)) { 184 | printError("A right brace expected in return statement."); 185 | } 186 | } 187 | break; 188 | 189 | case TokenType::T_IF: 190 | { 191 | nextToken(); 192 | node = conditionBlock(); 193 | } 194 | break; 195 | 196 | case TokenType::T_PRINT: 197 | node = new Node(); 198 | node->type = NodeType::N_PRINT; 199 | node->value = "PRINT"; 200 | nextToken(); 201 | 202 | if (!expect(TokenType::T_LPAR)) { 203 | printError("A left parenthesis expected in print statement."); 204 | } 205 | 206 | node->left = expression(); 207 | 208 | if (!expect(TokenType::T_RPAR)) { 209 | printError("A right parenthesis expected in print statement."); 210 | } 211 | 212 | if (!expect(TokenType::T_SEMICOLON)) { 213 | printError("A semicolon expected in print statement."); 214 | } 215 | 216 | break; 217 | 218 | case TokenType::T_WHILE: 219 | break; 220 | 221 | case TokenType::T_DO: 222 | break; 223 | 224 | case TokenType::T_ID: 225 | { 226 | Node* temp = new Node(); 227 | temp->type = NodeType::N_ID; 228 | temp->value = token->value; 229 | nextToken(); 230 | 231 | // assign statement 232 | if (this->expect(TokenType::T_ASSIGN) ) { 233 | node = new Node(); 234 | node->type = NodeType::N_ASSIGN; 235 | node->value = "SET"; 236 | node->left = temp; 237 | // add to symbol table if it doesn't exist 238 | SymbolTable* table = this->scopes.back(); 239 | if (!table->isSymbolExist(temp->value) ) { 240 | table->addSymbol(temp->value); 241 | } 242 | 243 | node->right = expression(); 244 | 245 | if (!this->expect(TokenType::T_SEMICOLON)) { 246 | printError("A semicolon expected in statement."); 247 | } 248 | 249 | // function call statement 250 | } else if (this->expect(TokenType::T_LPAR) ) { 251 | node = new Node(); 252 | node->type = NodeType::N_FUNC_CALL; 253 | node->value = token->value; 254 | 255 | node->args = this->functionArgs(); 256 | 257 | if(this->expect(TokenType::T_RPAR) ) { 258 | 259 | if (!this->expect(TokenType::T_SEMICOLON)) { 260 | printError("A semicolon expected in statement."); 261 | } 262 | 263 | } else { 264 | printError("A right parenthesis expected in statement."); 265 | } 266 | 267 | } else { 268 | printError("A left parenthesis or equal are expected in statement."); 269 | } 270 | } 271 | break; 272 | 273 | default: 274 | node = expression(); 275 | token = getCurrentToken(); 276 | 277 | if (!token) { 278 | printError("A token expected in statement."); 279 | } 280 | 281 | if (token->type != TokenType::T_SEMICOLON) { 282 | cout << "in statement token-type="+std::to_string(token->type) << endl; 283 | printError("A semicolon expected in statement."); 284 | } 285 | break; 286 | } 287 | 288 | return node; 289 | } 290 | 291 | Node* Parser::expression() { 292 | using std::cout; 293 | using std::endl; 294 | 295 | Node* node = term(); 296 | 297 | if (!node) { 298 | return node; 299 | } 300 | 301 | Token *token = getCurrentToken(); 302 | 303 | if (!token) { 304 | printError("A token expected in expression"); 305 | } 306 | 307 | Node* temp; 308 | 309 | while (token->type == TokenType::T_ADD || token->type == TokenType::T_SUB) { 310 | temp = node; 311 | node = new Node(); 312 | 313 | if (token->type == TokenType::T_ADD) { 314 | node->type = NodeType::N_ADD; 315 | node->value = "ADD"; 316 | } else { 317 | node->type = NodeType::N_SUB; 318 | node->value = "SUB"; 319 | } 320 | node->left=temp; 321 | nextToken(); 322 | node->right = term(); 323 | token = getCurrentToken(); 324 | } 325 | 326 | return node; 327 | } 328 | 329 | Node* Parser::functionArgs() { 330 | 331 | Node* temp; 332 | Node* tail = expression(); 333 | 334 | Token* token = getCurrentToken(); 335 | 336 | while(token && token->type == TokenType::T_COMMA) { 337 | nextToken(); 338 | temp = expression(); 339 | temp->args = tail; 340 | tail = temp; 341 | token = getCurrentToken(); 342 | } 343 | 344 | return tail; 345 | } 346 | 347 | Node* Parser::factor() { 348 | Node* node = nullptr; 349 | Token *token = getCurrentToken(); 350 | 351 | if (!token) { 352 | printError("A token expected in factor"); 353 | } 354 | 355 | switch(token->type) { 356 | 357 | case TokenType::T_ID: 358 | { 359 | node = new Node(); 360 | Token* ttoken = nextToken(); 361 | if (ttoken->type == TokenType::T_LPAR) { 362 | nextToken(); 363 | node->type = NodeType::N_FUNC_CALL; 364 | node->value = token->value; 365 | 366 | node->args = this->functionArgs(); 367 | 368 | if(!this->expect(TokenType::T_RPAR) ) { 369 | printError("A right parenthesis expected in funcion call."); 370 | } 371 | 372 | } else { 373 | node->type = NodeType::N_ID; 374 | node->value = token->value; 375 | } 376 | } 377 | break; 378 | 379 | case TokenType::T_NUMBER: 380 | node = new Node(); 381 | nextToken(); 382 | node->type = NodeType::N_NUMBER_C; 383 | node->value = token->value; 384 | break; 385 | 386 | case TokenType::T_STRING: 387 | node = new Node(); 388 | nextToken(); 389 | node->type = NodeType::N_STRING_C; 390 | node->value = token->value; 391 | break; 392 | 393 | case TokenType::T_LPAR: 394 | nextToken(); 395 | node = expression(); 396 | 397 | if(!this->expect(TokenType::T_RPAR) ) { 398 | printError("A right parenthesis expected in statement."); 399 | } 400 | 401 | break; 402 | 403 | case TokenType::T_RPAR: 404 | break; 405 | 406 | default: 407 | //std::cout << "in term: another type of token value=" << token->value << " type=" << token->type << std::endl; 408 | //printError("Unexpected token in term"); 409 | break; 410 | } 411 | 412 | return node; 413 | } 414 | 415 | Node* Parser::term() { 416 | Token *token = getCurrentToken(); 417 | 418 | if (!token) { 419 | printError("A token expected in term"); 420 | } 421 | 422 | Node* node = factor(); 423 | Node* temp; 424 | token = getCurrentToken(); 425 | 426 | while (token->type == TokenType::T_DIV || token->type == TokenType::T_MUL) { 427 | temp = node; 428 | node = new Node(); 429 | 430 | if (token->type == TokenType::T_MUL) { 431 | node->type = NodeType::N_MUL; 432 | node->value = "MUL"; 433 | } else { 434 | node->type = NodeType::N_DIV; 435 | node->value = "DIV"; 436 | } 437 | node->left=temp; 438 | nextToken(); 439 | node->right = factor(); 440 | token = getCurrentToken(); 441 | } 442 | 443 | return node; 444 | } 445 | 446 | Node* Parser::block() { 447 | 448 | if (!expect(TokenType::T_LBRACE)) { 449 | printError("A left brace expected in block."); 450 | } 451 | 452 | Node* block = new Node(); 453 | block->type = NodeType::N_BLOCK; 454 | block->value = "BLOCK"; 455 | block->symbolTable = new SymbolTable(); 456 | block->right = nullptr; 457 | 458 | this->scopes.push_back(block->symbolTable); 459 | 460 | Node* st = statements(); 461 | 462 | if (!expect(TokenType::T_RBRACE)) { 463 | printError("A right brace expected in block."); 464 | } 465 | 466 | this->scopes.pop_back(); 467 | 468 | block->left = st; 469 | return block; 470 | } 471 | 472 | Node* Parser::condition() { 473 | 474 | Node* node = new Node(); 475 | node->left = expression(); 476 | Token* token = getCurrentToken(); 477 | 478 | switch(token->type) { 479 | case TokenType::T_EQU: 480 | node->type = NodeType::N_EQU; 481 | node->value = "EQU"; 482 | break; 483 | 484 | case TokenType::T_NEQU: 485 | node->type = NodeType::N_NEQU; 486 | node->value = "NEQU"; 487 | break; 488 | 489 | case TokenType::T_LESS: 490 | node->type = NodeType::N_LESS; 491 | node->value = "LESS"; 492 | break; 493 | 494 | case TokenType::T_NLESS: 495 | node->type = NodeType::N_NLESS; 496 | node->value = "NLESS"; 497 | break; 498 | 499 | case TokenType::T_GREATER: 500 | node->type = NodeType::N_GREATER; 501 | node->value = "GREATER"; 502 | break; 503 | 504 | case TokenType::T_NGREATER: 505 | node->type = NodeType::N_NGREATER; 506 | node->value = "NGREATER"; 507 | break; 508 | 509 | default: 510 | printError("A bool operators expected in condition."); 511 | break; 512 | } 513 | 514 | nextToken(); 515 | node->right = expression(); 516 | 517 | return node; 518 | } 519 | 520 | Node* Parser::conditionBlock() { 521 | 522 | if (!expect(TokenType::T_LPAR)) { 523 | printError("A left parenthes expected in condition-block."); 524 | } 525 | 526 | Node* node = new Node(); 527 | node->type = NodeType::N_IF; 528 | node->value = "IF"; 529 | node->args = this->condition(); 530 | 531 | if (!expect(TokenType::T_RPAR)) { 532 | printError("A right parenthes expected in condition-block."); 533 | } 534 | 535 | node->left = block(); 536 | 537 | Token* token = getCurrentToken(); 538 | 539 | if (expect(TokenType::T_ELSE)) { 540 | token = getCurrentToken(); 541 | if (token) { 542 | if (token->type == TokenType::T_LBRACE) 543 | node->right = block(); 544 | else if (token->type == TokenType::T_LPAR) 545 | node->right = conditionBlock(); 546 | else 547 | node->right = statement(); 548 | } 549 | } else if (token->type == TokenType::T_LPAR) { 550 | node->right = conditionBlock(); 551 | } 552 | 553 | return node; 554 | } 555 | -------------------------------------------------------------------------------- /src/SymbolTable.cpp: -------------------------------------------------------------------------------- 1 | #include "SymbolTable.h" 2 | 3 | bool SymbolTable::isSymbolExist(std::string symbol) { 4 | int size = this->symbols.size(); 5 | for (int i=0;isymbols[i]) return true; 7 | } 8 | return false; 9 | } 10 | 11 | bool SymbolTable::addSymbol(std::string symbol) { 12 | if (this->isSymbolExist(symbol)) return false; 13 | this->symbols.push_back(symbol); 14 | return true; 15 | } 16 | 17 | int SymbolTable::getSize() { 18 | return this->symbols.size(); 19 | } 20 | 21 | int SymbolTable::getSymbolIndex(std::string symbol) { 22 | int size = this->symbols.size(); 23 | for (int i=0;isymbols[i]) return i; 25 | } 26 | return -1; 27 | } 28 | -------------------------------------------------------------------------------- /src/compiler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include "lexer.h" 8 | #include "ILexer.h" 9 | #include "CodeGenerator.h" 10 | 11 | std::string readFile(std::string fileName) { 12 | std::string line; 13 | std::string content; 14 | std::ifstream myfile(fileName); 15 | 16 | if (myfile.is_open()) { 17 | while(std::getline(myfile,line)) { 18 | content += line + '\n'; 19 | } 20 | myfile.close(); 21 | } 22 | 23 | return content; 24 | } 25 | 26 | void traverse(Node* node) { 27 | std::cout << "type: " << node->type << ", value: " << node->value << std::endl; 28 | if (node->left) traverse(node->left); 29 | if (node->right) traverse(node->right); 30 | } 31 | 32 | int main(int argc, char* argv[]) { 33 | using namespace std; 34 | 35 | if (argc < 3) { 36 | cerr << "Usage: " << argv[0] << " infile.calc outfile.asm" << endl; 37 | return 1; 38 | } 39 | // Print the user's name: 40 | 41 | string content = readFile(argv[1]); 42 | if (content.empty()) { 43 | cout << "Error: bad filename or no content" << endl; 44 | return 1; 45 | } 46 | 47 | cout << content; 48 | cout << "===============================" << endl; 49 | 50 | //Lexer lexer; 51 | Flexer lexer; 52 | vector *tokens = lexer.getTokens(content); 53 | 54 | for (unsigned long int i=0; i < tokens->size(); i++) { 55 | cout << tokens->at(i)->type << " " << tokens->at(i)->value << endl; 56 | } 57 | 58 | Parser parser; 59 | Node* ast = parser.parse(tokens); 60 | cout << "+++++++++++++++++++++++++++++" << endl; 61 | 62 | traverse(ast); 63 | 64 | //* 65 | cout << "+++++ after traverse ast ++++" << endl; 66 | 67 | if (argv[2]) { 68 | CodeGenerator generator(argv[2]); 69 | generator.compile(ast); 70 | } else { 71 | CodeGenerator generator("output.asm"); 72 | generator.compile(ast); 73 | } 74 | //*/ 75 | 76 | //delete(tokens); 77 | //delete(lexer); 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /src/flex/1.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include "../include/Token.h" 5 | 6 | typedef union{ 7 | int ival; 8 | char *cval; 9 | } yylval_type; 10 | 11 | extern yylval_type yylval; 12 | Token* token = nullptr; 13 | 14 | char* commentStart; 15 | %} 16 | 17 | %option nounput 18 | %option yylineno 19 | %option noyywrap 20 | %x COMMENT_MULTI 21 | 22 | %% 23 | 24 | \/\/.*?\n ; // one-line comment 25 | 26 | "/*" { 27 | /* begin of multi-line comment */ 28 | //commentStart = yytext; 29 | BEGIN(COMMENT_MULTI); 30 | } 31 | 32 | "*/" { 33 | /* end of multi-line comment */ 34 | //char* comment = strndup(commentStart, yytext + 2 - commentStart); 35 | //printf("'%s': was a multi-line comment\n", comment); 36 | //free(comment); 37 | BEGIN(INITIAL); 38 | } 39 | 40 | . { 41 | /* suppress whatever is in the comment */ 42 | } 43 | 44 | "==" { 45 | yylval.cval = yytext; 46 | return T_EQU; 47 | } 48 | "!<" { 49 | yylval.cval = yytext; 50 | return T_NLESS; 51 | } 52 | "!>" { 53 | yylval.cval = yytext; 54 | return T_NGREATER; 55 | } 56 | "!=" { 57 | yylval.cval = yytext; 58 | return T_NEQU; 59 | } 60 | "++" { 61 | yylval.cval = yytext; 62 | return T_INC; 63 | } 64 | "--" { 65 | yylval.cval = yytext; 66 | return T_DEC; 67 | } 68 | ">" { 69 | yylval.cval = yytext; 70 | return T_GREATER; 71 | } 72 | "<" { 73 | yylval.cval = yytext; 74 | return T_LESS; 75 | } 76 | "," { 77 | yylval.cval = yytext; 78 | return T_COMMA; 79 | } 80 | "=" { 81 | yylval.cval = yytext; 82 | return T_ASSIGN; 83 | } 84 | ";" { 85 | yylval.cval = yytext; 86 | return T_SEMICOLON; 87 | } 88 | "(" { 89 | yylval.cval = yytext; 90 | return T_LPAR; 91 | } 92 | ")" { 93 | yylval.cval = yytext; 94 | return T_RPAR; 95 | } 96 | "{" { 97 | yylval.cval = yytext; 98 | return T_LBRACE; 99 | } 100 | "}" { 101 | yylval.cval = yytext; 102 | return T_RBRACE; 103 | } 104 | "+" { 105 | yylval.cval = yytext; 106 | return T_ADD; 107 | } 108 | "-" { 109 | yylval.cval = yytext; 110 | return T_SUB; 111 | } 112 | "/" { 113 | yylval.cval = yytext; 114 | return T_DIV; 115 | } 116 | "*" { 117 | yylval.cval = yytext; 118 | return T_MUL; 119 | } 120 | fn { 121 | yylval.cval = yytext; 122 | return T_FUNC; 123 | } 124 | ret { 125 | yylval.cval = yytext; 126 | return T_RET; 127 | } 128 | for { 129 | yylval.cval = yytext; 130 | return T_FOR; 131 | } 132 | if { 133 | yylval.cval = yytext; 134 | return T_IF; 135 | } 136 | else { 137 | yylval.cval = yytext; 138 | return T_ELSE; 139 | } 140 | print { 141 | yylval.cval = yytext; 142 | return T_PRINT; 143 | } 144 | loop { 145 | yylval.cval = yytext; 146 | return T_LOOP; 147 | } 148 | while { 149 | yylval.cval = yytext; 150 | return T_WHILE; 151 | } 152 | do { 153 | yylval.cval = yytext; 154 | return T_DO; 155 | } 156 | true { 157 | yylval.cval = yytext; 158 | return T_BOOL; 159 | } 160 | false { 161 | yylval.cval = yytext; 162 | return T_BOOL; 163 | } 164 | 165 | [a-zA-Z][a-zA-Z0-9]* { 166 | yylval.cval = yytext; 167 | return T_ID; 168 | } 169 | 170 | [0-9]+ { 171 | yylval.ival = atoi(yytext); 172 | return T_NUMBER; 173 | } 174 | 175 | [ \t\n] { // space 176 | } 177 | 178 | <> { 179 | return 0; 180 | } 181 | 182 | %% 183 | 184 | -------------------------------------------------------------------------------- /src/include/CodeGenerator.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATOR_H 2 | #define CODE_GENERATOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "Node.h" 9 | #include "SymbolTable.h" 10 | 11 | class CodeGenerator{ 12 | std::vector variables; 13 | std::ofstream outfile; 14 | SymbolTable* currentSymbolTable = nullptr; 15 | Node* currentFunction = nullptr; 16 | int label = 0; 17 | 18 | public: 19 | CodeGenerator(std::string ofile); 20 | ~CodeGenerator(); 21 | void compile(Node* tree); 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/include/Flexer.h: -------------------------------------------------------------------------------- 1 | #ifndef FLEXER_H 2 | #define FLEXER_H 3 | 4 | #include 5 | #include 6 | #include "clexer.h" 7 | #include "ILexer.h" 8 | #include "Token.h" 9 | 10 | class Flexer : public ILexer { 11 | public: 12 | Flexer(); 13 | virtual ~Flexer() {}; 14 | std::vector* getTokens(std::string content); 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/include/ILexer.h: -------------------------------------------------------------------------------- 1 | #ifndef ILEXER_H 2 | #define ILEXER_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Token.h" 8 | 9 | struct ILexer 10 | { 11 | virtual std::vector* getTokens(std::string content) = 0; 12 | virtual ~ILexer() {}; 13 | }; 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/include/Lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXER_H 2 | #define LEXER_H 3 | 4 | #include 5 | #include 6 | #include "ILexer.h" 7 | #include "Token.h" 8 | 9 | class Lexer : public ILexer { 10 | private: 11 | bool inToken; 12 | Token *token; 13 | void checkID(); 14 | int position; 15 | 16 | public: 17 | Lexer(); 18 | virtual ~Lexer() {}; 19 | std::vector* getTokens(std::string content); 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/include/Node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H 2 | #define NODE_H 3 | 4 | #include 5 | #include "SymbolTable.h" 6 | 7 | enum NodeType { 8 | N_PROG=1,N_STATEMENT, N_NUMBER_C, N_STRING_C, N_ID, N_FUNC_CALL, N_FUNC, N_RET, N_ASSIGN, N_SEQ, N_CONDITION, N_ADD, N_SUB, 9 | N_PRINT, N_MUL, N_DIV, N_BLOCK, N_IF, N_EQU, N_NEQU, N_LESS, N_NLESS, N_GREATER, N_NGREATER, N_LOOP 10 | }; 11 | 12 | class Node{ 13 | public: 14 | Node *left = nullptr; 15 | Node *right = nullptr; 16 | SymbolTable *symbolTable = nullptr; 17 | NodeType type; 18 | std::string value; 19 | Node *args = nullptr; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/include/Parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Node.h" 8 | #include "SymbolTable.h" 9 | #include "Token.h" 10 | 11 | class Parser { 12 | std::vector *t; 13 | std::vector scopes; 14 | Node* prog = nullptr; 15 | uint ti = 0; 16 | 17 | Token* nextToken(); 18 | Token* getCurrentToken(); 19 | void printError(std::string text); 20 | bool expect(TokenType type); 21 | 22 | Node* statements(); 23 | Node* statement(); 24 | Node* expression(); 25 | Node* term(); 26 | Node* factor(); 27 | Node* block(); 28 | Node* condition(); 29 | Node* conditionBlock(); 30 | Node* functionArgs(); 31 | 32 | public: 33 | Parser(); 34 | Node* parse(std::vector*tokens); 35 | }; 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/include/SymbolTable.h: -------------------------------------------------------------------------------- 1 | #ifndef SYMBOL_TABLE_H 2 | #define SYMBOL_TABLE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Token.h" 8 | 9 | class SymbolTable { 10 | std::vector symbols; 11 | public: 12 | bool isSymbolExist(std::string symbol); 13 | bool addSymbol(std::string symbol); 14 | int getSize(); 15 | int getSymbolIndex(std::string symbol); 16 | }; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/include/Token.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_H 2 | #define TOKEN_H 3 | 4 | #include 5 | 6 | enum TokenType { 7 | T_ID=1, T_ASSIGN, T_ENDOFSTATEMENT, T_NUMBER, T_SEMICOLON, T_LPAR, T_RPAR, T_LBRACE, T_FOR, 8 | T_RBRACE, T_ADD, T_SUB, T_MUL, T_DIV, T_STRING, T_IF, T_ELSE, T_DO, T_WHILE, T_FUNC, T_INC, T_DEC, T_RET, T_BOOL, 9 | T_PRINT, T_COMMENT, T_MULTICOMMENT, T_COMMA, T_EQU, T_NLESS, T_NGREATER, T_NEQU, T_GREATER, T_LESS, T_LOOP 10 | }; 11 | 12 | struct Token{ 13 | TokenType type; 14 | std::string value; 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /tests/comment.calc: -------------------------------------------------------------------------------- 1 | // dsasdfaw 2 | print(1+5*3); 3 | //print(3*(5+1)); 4 | print((25/5/5)*5); 5 | // pppp 6 | // 7 | print(4444); 8 | /*dasdf dsf 9 | asdfsd 10 | dsffgsdfg 11 | sfdgg 12 | sdf 13 | gsdf 14 | gfgsdfgsdfgsdfgsdfg sdfgsfg 15 | sdfg sfdgsdf gsdf */ 16 | print(5555); 17 | -------------------------------------------------------------------------------- /tests/comment.test: -------------------------------------------------------------------------------- 1 | 16 2 | 5 3 | 4444 4 | 5555 5 | -------------------------------------------------------------------------------- /tests/expression.calc: -------------------------------------------------------------------------------- 1 | print(1+5*3); 2 | print(3*5+1); 3 | print(3*(5+1)); 4 | print((5+1)*3); 5 | print((5/1*5)*5); 6 | print((25/5/5)*5); 7 | print(4*3+(25/5/5)*5/1 + 5/5); 8 | -------------------------------------------------------------------------------- /tests/expression.test: -------------------------------------------------------------------------------- 1 | 16 2 | 16 3 | 18 4 | 18 5 | 125 6 | 5 7 | 18 8 | -------------------------------------------------------------------------------- /tests/func-args.calc: -------------------------------------------------------------------------------- 1 | fn add(a, b, c) { 2 | ret a+b; 3 | } 4 | 5 | i = 2; 6 | j = add(4*7-5*5, i+i*2-1, 10); 7 | print(j); 8 | -------------------------------------------------------------------------------- /tests/func-args.test: -------------------------------------------------------------------------------- 1 | 8 2 | -------------------------------------------------------------------------------- /tests/func-call.calc: -------------------------------------------------------------------------------- 1 | fn get() { 2 | k = 2+8; 3 | i = 90; 4 | ret i+k; 5 | } 6 | 7 | fn get2() { 8 | b = 300; 9 | ret 3+b; 10 | } 11 | 12 | j = get(); 13 | s = 0; 14 | k = get2(); 15 | print(k); 16 | k = k; 17 | print(k); 18 | -------------------------------------------------------------------------------- /tests/func-call.test: -------------------------------------------------------------------------------- 1 | 303 2 | 303 3 | -------------------------------------------------------------------------------- /tests/if-else.calc: -------------------------------------------------------------------------------- 1 | 2 | if(4 == 4) { 3 | print(1); 4 | } else { 5 | print(2); 6 | } 7 | 8 | if(4 != 4) { 9 | print(1); 10 | } else { 11 | print(2); 12 | } 13 | 14 | j = 6; 15 | 16 | if(j > 5) { 17 | print(11); 18 | } else if (j == 5) { 19 | print(12); 20 | } else { 21 | print(13); 22 | } 23 | 24 | j = 5; 25 | 26 | if(j > 5) { 27 | print(21); 28 | } (j == 5) { 29 | print(22); 30 | } else { 31 | print(23); 32 | } 33 | 34 | j = 4; 35 | 36 | if(j > 5) { 37 | print(31); 38 | } else (j == 5) { 39 | print(32); 40 | } else { 41 | print(33); 42 | } 43 | 44 | print(100); 45 | -------------------------------------------------------------------------------- /tests/if-else.test: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 11 4 | 22 5 | 33 6 | 100 7 | -------------------------------------------------------------------------------- /tests/if.calc: -------------------------------------------------------------------------------- 1 | fn get(a, b) { 2 | i = b; 3 | ret a+i; 4 | } 5 | 6 | j = get(6,44); 7 | c = 10; 8 | 9 | if (j == 30+c+9) { 10 | print(1); 11 | } 12 | 13 | if (j == 5*c) { 14 | k = 2; 15 | print(k); 16 | } 17 | 18 | if (j == 5+5*c-4) { 19 | print(3); 20 | } 21 | 22 | if (j != 49) { 23 | print(4); 24 | } 25 | 26 | if (j != 50) { 27 | print(5); 28 | } 29 | 30 | if (j != 51) { 31 | print(6); 32 | } 33 | 34 | if (j > 49) { 35 | print(7); 36 | } 37 | 38 | if (j > 50) { 39 | print(8); 40 | } 41 | 42 | if (j > 51) { 43 | print(9); 44 | } 45 | 46 | if (j !> 49) { 47 | print(10); 48 | } 49 | 50 | if (j !> 50) { 51 | print(11); 52 | } 53 | 54 | if (j !> 51) { 55 | print(12); 56 | } 57 | 58 | if (j < 49) { 59 | print(13); 60 | } 61 | 62 | if (j < 50) { 63 | print(14); 64 | } 65 | 66 | if (j < 51) { 67 | print(15); 68 | } 69 | 70 | if (j !< 49) { 71 | print(16); 72 | } 73 | 74 | if (j !< 50) { 75 | print(17); 76 | } 77 | 78 | if (j !< 51) { 79 | print(18); 80 | } 81 | 82 | print(100); 83 | -------------------------------------------------------------------------------- /tests/if.test: -------------------------------------------------------------------------------- 1 | 2 2 | 4 3 | 6 4 | 7 5 | 11 6 | 12 7 | 15 8 | 16 9 | 17 10 | 100 11 | -------------------------------------------------------------------------------- /tests/loop.calc: -------------------------------------------------------------------------------- 1 | loop(2) { 2 | print(1); 3 | } 4 | 5 | loop(4*4-6*2-1) { 6 | print(2); 7 | } 8 | 9 | loop(5*4-8*2) { 10 | print(3); 11 | } 12 | 13 | print(100); 14 | -------------------------------------------------------------------------------- /tests/loop.test: -------------------------------------------------------------------------------- 1 | 1 2 | 1 3 | 2 4 | 2 5 | 2 6 | 3 7 | 3 8 | 3 9 | 3 10 | 100 11 | --------------------------------------------------------------------------------