├── .gitignore ├── README.md ├── build.sh ├── build └── unn ├── buildasm.sh ├── builddebugasm.sh ├── codegeneratorf.c ├── codegeneratorf.h ├── examples ├── fizzbuzz.unn └── while.unn ├── hashmap ├── hashmap.h ├── hashmapoperators.c └── hashmapoperators.h ├── lexerf.c ├── lexerf.h ├── main.c ├── parserf.c ├── parserf.h └── test.unn /.gitignore: -------------------------------------------------------------------------------- 1 | build/main 2 | assembly/ 3 | generated.asm 4 | generated 5 | *.o 6 | *.asm 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unnamed-lang 2 | A programming language compiler built using C. For learning purposes only! 3 | 4 | The goal of this language was to compile FizzBuzz entirely, which meant writing to stdout, loops, mod operator, and if statements were required, which were all implemented. 5 | To see how the syntax looks, checks the examples folder. 6 | 7 | The project is not planned to go any further than its current state. 8 | 9 | Was all streamed live on Twitch, and the VODs are available on my YouTube. 10 | https://www.youtube.com/@cobbcoding/ 11 | https://www.twitch.tv/cobbcoding/ 12 | 13 | Quick Start: 14 | 15 | Dependencies: gcc, nasm 16 | 17 | ``` 18 | ./build.sh 19 | ./build/unn 20 | ``` 21 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | gcc main.c lexerf.c parserf.c codegeneratorf.c hashmap/hashmapoperators.c -o build/unn -Wall -Wextra 2 | -------------------------------------------------------------------------------- /build/unn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CobbCoding1/unnamed-lang/8e8655959dae325b9e15549d4080237a53839b75/build/unn -------------------------------------------------------------------------------- /buildasm.sh: -------------------------------------------------------------------------------- 1 | nasm -f elf64 generated.asm -o generated.o 2 | gcc generated.o -o generated -lc -no-pie 3 | -------------------------------------------------------------------------------- /builddebugasm.sh: -------------------------------------------------------------------------------- 1 | nasm -f elf64 -g -F dwarf -o test.o generated.asm 2 | ld test.o -o test 3 | -------------------------------------------------------------------------------- /codegeneratorf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "lexerf.h" 9 | #include "parserf.h" 10 | #include "./hashmap/hashmapoperators.h" 11 | #include "./hashmap/hashmap.h" 12 | 13 | #define MAX_STACK_SIZE_SIZE 1024 14 | 15 | char *curly_stack[MAX_STACK_SIZE_SIZE]; 16 | size_t curly_stack_size = 0; 17 | int curly_count = 0; 18 | int global_curly = 0; 19 | size_t stack_size = 0; 20 | int current_stack_size_size = 0; 21 | int label_number = 0; 22 | int loop_label_number = 0; 23 | int text_label = 0; 24 | size_t current_stack_size[MAX_STACK_SIZE_SIZE]; 25 | const unsigned initial_size = 100; 26 | struct hashmap_s hashmap; 27 | 28 | typedef enum{ 29 | ADD, 30 | SUB, 31 | DIV, 32 | MUL, 33 | MOD, 34 | NOT_OPERATOR 35 | } OperatorType; 36 | 37 | 38 | void create_label(FILE *file, int num){ 39 | label_number--; 40 | fprintf(file, "label%d:\n", num); 41 | } 42 | 43 | void create_end_loop(FILE *file){ 44 | loop_label_number--; 45 | fprintf(file, " jmp loop%d\n", loop_label_number); 46 | } 47 | 48 | void create_loop_label(FILE *file){ 49 | fprintf(file, "loop%d:\n", loop_label_number); 50 | loop_label_number++; 51 | } 52 | 53 | void if_label(FILE *file, char *comp, int num){ 54 | if(strcmp(comp, "EQ") == 0){ 55 | fprintf(file, " jne label%d\n", num); 56 | } else if(strcmp(comp, "NEQ") == 0){ 57 | fprintf(file, " je label%d\n", num); 58 | } else if(strcmp(comp, "LESS") == 0){ 59 | fprintf(file, " jge label%d\n", num); 60 | } else if(strcmp(comp, "GREATER") == 0){ 61 | fprintf(file, " jle label%d\n", label_number); 62 | } else { 63 | printf("ERROR: Unexpected comparator\n"); 64 | exit(1); 65 | } 66 | label_number++; 67 | } 68 | 69 | void stack_push(size_t value){ 70 | current_stack_size_size++; 71 | current_stack_size[current_stack_size_size] = value; 72 | } 73 | 74 | size_t stack_pop(){ 75 | if(current_stack_size_size == 0){ 76 | printf("ERROR: stack is already empty\n"); 77 | exit(1); 78 | } 79 | size_t result = current_stack_size[current_stack_size_size]; 80 | return result; 81 | } 82 | 83 | void curly_stack_push(char *value){ 84 | curly_stack_size++; 85 | curly_stack[curly_stack_size] = value; 86 | } 87 | 88 | char *curly_stack_pop(){ 89 | if(curly_stack_size == 0){ 90 | return NULL; 91 | } 92 | char *result = curly_stack[curly_stack_size]; 93 | curly_stack_size--; 94 | return result; 95 | } 96 | 97 | char *curly_stack_peek(){ 98 | if(curly_stack_size == 0){ 99 | return NULL; 100 | } 101 | return curly_stack[curly_stack_size]; 102 | } 103 | 104 | static int log_and_free_out_of_scope(void* const context, struct hashmap_element_s* const e){ 105 | (void)(context); 106 | if(*(size_t*)e->data > (current_stack_size[current_stack_size_size] + 1)){ 107 | if(hashmap_remove(&hashmap, e->key, strlen(e->key)) != 0){ 108 | printf("COULD NOT REMOVE ELEMENT\n"); 109 | } 110 | } 111 | return 0; 112 | } 113 | 114 | void push(char *reg, FILE *file){ 115 | fprintf(file, " push %s\n", reg); 116 | stack_size++; 117 | } 118 | 119 | void push_var(size_t stack_pos, char *var_name, FILE *file){ 120 | fprintf(file, " push QWORD [rsp + %zu]\n", (stack_size - stack_pos) * 8); 121 | stack_size++; 122 | } 123 | 124 | void modify_var(size_t stack_pos, char *new_value, char *var_name, FILE *file){ 125 | fprintf(file, " mov QWORD [rsp + %zu], %s\n", ((stack_size) - (stack_pos)) * 8, new_value); 126 | fprintf(file, " push QWORD [rsp + %zu]\n", (stack_size - stack_pos) * 8); 127 | } 128 | 129 | void pop(char *reg, FILE *file){ 130 | stack_size--; 131 | fprintf(file, " pop %s\n", reg); 132 | if(stack_size > 1000){ 133 | exit(1); 134 | } 135 | } 136 | 137 | void mov(char *reg1, char *reg2, FILE *file){ 138 | fprintf(file, " mov %s, %s\n", reg1, reg2); 139 | } 140 | 141 | OperatorType check_operator(Node *node){ 142 | if(node->type != OPERATOR){ 143 | return NOT_OPERATOR; 144 | } 145 | 146 | if(strcmp(node->value, "+") == 0){ 147 | return ADD; 148 | } 149 | if(strcmp(node->value, "-") == 0){ 150 | return SUB; 151 | } 152 | if(strcmp(node->value, "/") == 0){ 153 | return DIV; 154 | } 155 | if(strcmp(node->value, "*") == 0){ 156 | return MUL; 157 | } 158 | if(strcmp(node->value, "%") == 0){ 159 | return MOD; 160 | } 161 | return NOT_OPERATOR; 162 | } 163 | 164 | int mov_if_var_or_not(char *reg, Node *node, FILE *file){ 165 | if(node->type == IDENTIFIER){ 166 | int *value = malloc(sizeof(int)); 167 | value = hashmap_get(&hashmap, node->value, strlen(node->value)); 168 | if(value == NULL){ 169 | printf("ERROR: Variable %s not declared in current scope\n", node->value); 170 | exit(1); 171 | } 172 | push_var(*value, node->value, file); 173 | pop(reg, file); 174 | return 0; 175 | } 176 | if(node->type == INT){ 177 | fprintf(file, " mov %s, %s\n", reg, node->value); 178 | return 0; 179 | } 180 | return -1; 181 | } 182 | 183 | Node *generate_operator_code(Node *node, FILE *file){ 184 | mov_if_var_or_not("rax", node->left, file); 185 | push("rax", file); 186 | Node *tmp = node; 187 | OperatorType oper_type = check_operator(tmp); 188 | while(tmp->type == OPERATOR){ 189 | pop("rax", file); 190 | oper_type = check_operator(tmp); 191 | tmp = tmp->right; 192 | if(tmp->type != OPERATOR){ 193 | break; 194 | } 195 | mov_if_var_or_not("rbx", tmp->left, file); 196 | switch(oper_type){ 197 | case ADD: 198 | fprintf(file, " add rax, rbx\n"); 199 | break; 200 | case SUB: 201 | fprintf(file, " sub rax, rbx\n"); 202 | break; 203 | case DIV: 204 | fprintf(file, " xor rdx, rdx\n"); 205 | fprintf(file, " div rbx\n"); 206 | break; 207 | case MUL: 208 | fprintf(file, " mul rbx\n"); 209 | break; 210 | case MOD: 211 | fprintf(file, " xor rdx, rdx\n"); 212 | fprintf(file, " div rbx\n"); 213 | break; 214 | case NOT_OPERATOR: 215 | printf("ERROR: Invalid Syntax\n"); 216 | exit(1); 217 | break; 218 | } 219 | if(oper_type != MOD){ 220 | push("rax", file); 221 | } else { 222 | push("rdx", file); 223 | } 224 | oper_type = check_operator(tmp); 225 | } 226 | mov_if_var_or_not("rbx", tmp, file); 227 | switch(oper_type){ 228 | case ADD: 229 | fprintf(file, " add rax, rbx\n"); 230 | break; 231 | case SUB: 232 | fprintf(file, " sub rax, rbx\n"); 233 | break; 234 | case DIV: 235 | fprintf(file, " xor rdx, rdx\n"); 236 | fprintf(file, " div rbx\n"); 237 | break; 238 | case MUL: 239 | fprintf(file, " mul rbx\n"); 240 | break; 241 | case MOD: 242 | fprintf(file, " xor rdx, rdx\n"); 243 | fprintf(file, " div rbx\n"); 244 | break; 245 | case NOT_OPERATOR: 246 | printf("ERROR: Invalid Syntax\n"); 247 | exit(1); 248 | break; 249 | } 250 | if(oper_type != MOD){ 251 | push("rax", file); 252 | } else { 253 | push("rdx", file); 254 | } 255 | node->left = NULL; 256 | node->right = NULL; 257 | return node; 258 | } 259 | 260 | void traverse_tree(Node *node, int is_left, FILE *file, int syscall_number){ 261 | if(node == NULL){ 262 | return; 263 | } 264 | if(strcmp(node->value, "EXIT") == 0){ 265 | syscall_number = 60; 266 | } 267 | if(strcmp(node->value, "INT") == 0){ 268 | Node *value = malloc(sizeof(Node)); 269 | value = node->left->left->left; 270 | if(value->type == IDENTIFIER){ 271 | size_t *var_value = malloc(sizeof(size_t)); 272 | var_value = hashmap_get(&hashmap, value->value, strlen(value->value)); 273 | if(var_value != 0){ 274 | printf("ERROR\n"); 275 | exit(1); 276 | } 277 | if(var_value == NULL){ 278 | printf("ERROR: %s Not Declared In Current Context\n", value->value); 279 | exit(1); 280 | } 281 | push_var(*var_value, value->value, file); 282 | } else if(value->type == INT) { 283 | push(value->value, file); 284 | } else if(value->type == OPERATOR){ 285 | generate_operator_code(value, file); 286 | } else { 287 | printf("ERROR\n"); 288 | exit(1); 289 | } 290 | size_t *var_location = malloc(sizeof(size_t)); 291 | size_t *cur_size = malloc(sizeof(size_t)); 292 | *cur_size = stack_size; 293 | if(hashmap_get(&hashmap, node->left->value, strlen(node->left->value)) != NULL){ 294 | printf("ERROR: Variable %s is already declared in current scope\n", node->left->value); 295 | exit(1); 296 | } 297 | if(hashmap_put(&hashmap, node->left->value, strlen(node->left->value), cur_size) != 0){ 298 | printf("ERROR: Could not insert into hash table!\n"); 299 | exit(1); 300 | } 301 | node->left = NULL; 302 | 303 | } else if(strcmp(node->value, "IF") == 0){ 304 | curly_stack_push("IF"); 305 | Node *current = malloc(sizeof(Node)); 306 | current = node->left->left; 307 | if(current->left->type == INT || current->left->type == IDENTIFIER){ 308 | mov_if_var_or_not("rax", current->left, file); 309 | push("rax", file); 310 | } else { 311 | generate_operator_code(current->left, file); 312 | } 313 | if(current->right->type == INT || current->right->type == IDENTIFIER){ 314 | mov_if_var_or_not("rbx", current->right, file); 315 | push("rbx", file); 316 | } else { 317 | generate_operator_code(current->right, file); 318 | } 319 | pop("rax", file); 320 | pop("rbx", file); 321 | fprintf(file, " cmp rax, rbx\n"); 322 | if_label(file, current->value, curly_count); 323 | node->left->left = NULL; 324 | } else if(strcmp(node->value, "WHILE") == 0){ 325 | curly_stack_push("W"); 326 | create_loop_label(file); 327 | Node *current = malloc(sizeof(Node)); 328 | current = node->left->left; 329 | if(current->left->type == INT || current->left->type == IDENTIFIER){ 330 | mov_if_var_or_not("rax", current->left, file); 331 | push("rax", file); 332 | } else { 333 | generate_operator_code(current->left, file); 334 | } 335 | if(current->right->type == INT || current->right->type == IDENTIFIER){ 336 | mov_if_var_or_not("rbx", current->right, file); 337 | push("rbx", file); 338 | } else { 339 | generate_operator_code(current->right, file); 340 | } 341 | pop("rbx", file); 342 | pop("rax", file); 343 | fprintf(file, " cmp rax, rbx\n"); 344 | if(strcmp(current->value, "EQ") == 0){ 345 | if_label(file, "EQ", curly_count); 346 | } else if(strcmp(current->value, "NEQ") == 0){ 347 | if_label(file, "NEQ", curly_count); 348 | } else if(strcmp(current->value, "LESS") == 0){ 349 | if_label(file, "LESS", curly_count); 350 | } else if(strcmp(current->value, "GREATER") == 0){ 351 | if_label(file, "GREATER", curly_count); 352 | } else { 353 | printf("ERROR: Unknown Operator\n"); 354 | exit(1); 355 | } 356 | node->left->left = NULL; 357 | } else if(strcmp(node->value, "WRITE") == 0){ 358 | char *text = malloc(sizeof(char) * 8); 359 | char *identifier = malloc(sizeof(char)*8); 360 | if(node->left->type == IDENTIFIER){ 361 | identifier = hashmap_get(&hashmap, node->left->value, strlen(node->left->value)); 362 | if(identifier == NULL){ 363 | printf("ERROR: Value is not defined\n"); 364 | exit(1); 365 | } 366 | push_var(*identifier, node->right->value, file); 367 | mov("rdi", "printf_format", file); 368 | pop("rsi", file); 369 | 370 | fprintf(file, " xor rax, rax\n"); 371 | 372 | fprintf(file, " call printf WRT ..plt\n"); 373 | 374 | } else { 375 | identifier = node->left->value; 376 | sprintf(text, "text%d", text_label); 377 | fprintf(file, "section .data\n"); 378 | fprintf(file, " %s db \"%s\", 10\n", text, node->left->value); 379 | fprintf(file, "section .text\n"); 380 | mov("rax", "1", file); 381 | mov("rdx", node->right->value, file); 382 | mov("rdi", "1", file); 383 | mov("rsi", text, file); 384 | text_label++; 385 | free(text); 386 | fprintf(file, " syscall\n"); 387 | } 388 | Node *tmp = malloc(sizeof(Node)); 389 | tmp = node->right->right; 390 | node->right = NULL; 391 | node = tmp; 392 | } 393 | 394 | if(strcmp(node->value, "(") == 0){ 395 | 396 | } 397 | if(node->type == OPERATOR){ 398 | if(node->value[0] == '='){ 399 | 400 | } else { 401 | generate_operator_code(node, file); 402 | } 403 | } 404 | if(node->type == INT){ 405 | fprintf(file, " mov rax, %s\n", node->value); 406 | push("rax", file); 407 | } 408 | if(node->type == IDENTIFIER){ 409 | if(syscall_number == 60){ 410 | size_t *var_value = malloc(sizeof(size_t)); 411 | var_value = hashmap_get(&hashmap, node->value, strlen(node->value)); 412 | if(var_value == NULL){ 413 | printf("ERROR: Not Declared in current scope: %s\n", node->value); 414 | exit(1); 415 | } else { 416 | } 417 | push_var(*var_value, node->value, file); 418 | pop("rdi", file); 419 | fprintf(file, " mov rax, %d\n", syscall_number); 420 | fprintf(file, " syscall\n"); 421 | syscall_number = 0; 422 | } else { 423 | if(hashmap_get(&hashmap, node->value, strlen(node->value)) == NULL){ 424 | printf("ERROR: Variable %s is not declared in current scope\n", node->value); 425 | exit(1); 426 | } 427 | 428 | Node *value = node->left->left; 429 | size_t *var_location = malloc(sizeof(size_t)); 430 | var_location = hashmap_get(&hashmap, node->value, strlen(node->value)); 431 | if(value->type == IDENTIFIER){ 432 | size_t *var_value = malloc(sizeof(size_t)); 433 | var_value = hashmap_get(&hashmap, value->value, strlen(value->value)); 434 | if(var_value == NULL){ 435 | printf("ERROR: %s Not Declared In Current Context\n", value->value); 436 | exit(1); 437 | } 438 | } else if(value->type == INT) { 439 | push(value->value, file); 440 | } else if(value->type == OPERATOR){ 441 | generate_operator_code(value, file); 442 | } else { 443 | printf("ERROR\n"); 444 | exit(1); 445 | } 446 | size_t *cur_size = malloc(sizeof(size_t)); 447 | *cur_size = stack_size; 448 | 449 | pop("rax", file); 450 | modify_var(*var_location+1, "rax", node->value, file); 451 | node->left = NULL; 452 | } 453 | } 454 | if(strcmp(node->value, ")") == 0){ 455 | 456 | } 457 | 458 | if(strcmp(node->value, "{") == 0){ 459 | stack_push(stack_size); 460 | curly_count++; 461 | char *curly_count_string = malloc(sizeof(char) * 4); 462 | sprintf(curly_count_string, "%d", curly_count); 463 | curly_stack_push(curly_count_string); 464 | } 465 | 466 | if(strcmp(node->value, "}") == 0){ 467 | char *current_curly = curly_stack_pop(); 468 | char *next_curly = curly_stack_pop(); 469 | 470 | if(next_curly[0] == 'I'){ 471 | create_label(file, atoi(current_curly)-1); 472 | global_curly = atoi(current_curly); 473 | } else if(next_curly[0] == 'W'){ 474 | create_end_loop(file); 475 | create_label(file, atoi(current_curly)-1); 476 | global_curly = atoi(current_curly); 477 | } 478 | 479 | 480 | size_t stack_value = stack_pop(); 481 | for(; stack_size != stack_value;){ 482 | pop("rsi", file); 483 | } 484 | 485 | void* log = malloc(sizeof(char)); 486 | if(hashmap_iterate_pairs(&hashmap, log_and_free_out_of_scope, (void*)log) != 0){ 487 | exit(1); 488 | } 489 | 490 | } 491 | 492 | if(strcmp(node->value, ";") == 0){ 493 | if(syscall_number == 60){ 494 | fprintf(file, " mov rax, %d\n", syscall_number); 495 | fprintf(file, " pop rdi\n"); 496 | fprintf(file, " syscall\n"); 497 | syscall_number = 0; 498 | } 499 | } 500 | if(is_left){ 501 | 502 | } else { 503 | 504 | } 505 | traverse_tree(node->left, 1, file, syscall_number); 506 | traverse_tree(node->right, 0, file, syscall_number); 507 | 508 | } 509 | 510 | int generate_code(Node *root, char *filename){ 511 | insert('-', "sub"); 512 | insert('+', "add"); 513 | insert('*', "mul"); 514 | insert('/', "div"); 515 | FILE *file = fopen(filename, "w"); 516 | assert(file != NULL && "FILE COULD NOT BE OPENED\n"); 517 | printf("HERHE\n"); 518 | 519 | assert(hashmap_create(initial_size, &hashmap) == 0 && "ERROR: Could not create hashmap\n"); 520 | 521 | fprintf(file, "section .data\n"); 522 | fprintf(file, " printf_format: db '%s', 10, 0\n", "%d"); 523 | fprintf(file, "extern printf\n"); 524 | fprintf(file, "global main\n"); 525 | fprintf(file, "section .text\n"); 526 | fprintf(file, "main:\n"); 527 | 528 | traverse_tree(root, 0, file, 0); 529 | fclose(file); 530 | 531 | 532 | return 0; 533 | } 534 | -------------------------------------------------------------------------------- /codegeneratorf.h: -------------------------------------------------------------------------------- 1 | #ifndef GENERATOR_H_ 2 | #define GENERATOR_H_ 3 | 4 | int generate_code(Node *root, char *filename); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /examples/fizzbuzz.unn: -------------------------------------------------------------------------------- 1 | int i = 1; 2 | while(i neq 101){ 3 | int remainderthree = i % 3; 4 | int remainderfive = i % 5; 5 | int remainderfifteen = i % 15; 6 | if(remainderfifteen neq 0){ 7 | if(remainderthree eq 0){ 8 | write("FIZZ", 5); 9 | } 10 | if(remainderfive eq 0){ 11 | write("BUZZ", 5); 12 | } 13 | } 14 | if(remainderfifteen eq 0){ 15 | write("FIZZBUZZ", 9); 16 | } 17 | if(remainderfive neq 0){ 18 | if(remainderthree neq 0){ 19 | write(i, 2); 20 | } 21 | } 22 | i = i + 1; 23 | } 24 | 25 | exit(0); 26 | -------------------------------------------------------------------------------- /examples/while.unn: -------------------------------------------------------------------------------- 1 | int i = 0; 2 | while(i neq 101){ 3 | write(i, 2); 4 | i = i + 1; 5 | } 6 | 7 | exit(0); 8 | -------------------------------------------------------------------------------- /hashmap/hashmap.h: -------------------------------------------------------------------------------- 1 | /* 2 | The latest version of this library is available on GitHub; 3 | https://github.com/sheredom/hashmap.h 4 | */ 5 | 6 | /* 7 | This is free and unencumbered software released into the public domain. 8 | 9 | Anyone is free to copy, modify, publish, use, compile, sell, or 10 | distribute this software, either in source code form or as a compiled 11 | binary, for any purpose, commercial or non-commercial, and by any 12 | means. 13 | 14 | In jurisdictions that recognize copyright laws, the author or authors 15 | of this software dedicate any and all copyright interest in the 16 | software to the public domain. We make this dedication for the benefit 17 | of the public at large and to the detriment of our heirs and 18 | successors. We intend this dedication to be an overt act of 19 | relinquishment in perpetuity of all present and future rights to this 20 | software under copyright law. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 25 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 26 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 27 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 28 | OTHER DEALINGS IN THE SOFTWARE. 29 | 30 | For more information, please refer to 31 | */ 32 | #ifndef SHEREDOM_HASHMAP_H_INCLUDED 33 | #define SHEREDOM_HASHMAP_H_INCLUDED 34 | 35 | #if defined(_MSC_VER) 36 | // Workaround a bug in the MSVC runtime where it uses __cplusplus when not 37 | // defined. 38 | #pragma warning(push, 0) 39 | #pragma warning(disable : 4668) 40 | #endif 41 | 42 | #include 43 | #include 44 | 45 | #if (defined(_MSC_VER) && defined(__AVX__)) || \ 46 | (!defined(_MSC_VER) && defined(__SSE4_2__)) 47 | #define HASHMAP_X86_SSE42 48 | #endif 49 | 50 | #if defined(HASHMAP_X86_SSE42) 51 | #include 52 | #endif 53 | 54 | #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) 55 | #define HASHMAP_ARM_CRC32 56 | #endif 57 | 58 | #if defined(HASHMAP_ARM_CRC32) 59 | #include 60 | #endif 61 | 62 | #if defined(_MSC_VER) 63 | #include 64 | #endif 65 | 66 | #if defined(_MSC_VER) 67 | #pragma warning(pop) 68 | #endif 69 | 70 | #if defined(_MSC_VER) 71 | #pragma warning(push) 72 | /* Stop MSVC complaining about unreferenced functions */ 73 | #pragma warning(disable : 4505) 74 | /* Stop MSVC complaining about not inlining functions */ 75 | #pragma warning(disable : 4710) 76 | /* Stop MSVC complaining about inlining functions! */ 77 | #pragma warning(disable : 4711) 78 | #endif 79 | 80 | #if defined(__clang__) 81 | #pragma clang diagnostic push 82 | #pragma clang diagnostic ignored "-Wunused-function" 83 | #pragma clang diagnostic ignored "-Wstatic-in-inline" 84 | #endif 85 | 86 | #if defined(_MSC_VER) 87 | #define HASHMAP_WEAK __inline 88 | #elif defined(__clang__) || defined(__GNUC__) 89 | #define HASHMAP_WEAK __attribute__((weak)) 90 | #else 91 | #error Non clang, non gcc, non MSVC compiler found! 92 | #endif 93 | 94 | #if defined(_MSC_VER) 95 | #define HASHMAP_ALWAYS_INLINE __forceinline 96 | #elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ 97 | defined(__cplusplus) 98 | #define HASHMAP_ALWAYS_INLINE __attribute__((always_inline)) inline 99 | #else 100 | /* If we cannot use inline, its not safe to use always_inline, so we mark the 101 | * function weak. */ 102 | #define HASHMAP_ALWAYS_INLINE HASHMAP_WEAK 103 | #endif 104 | 105 | #if defined(_MSC_VER) && (_MSC_VER < 1920) 106 | typedef unsigned __int8 hashmap_uint8_t; 107 | typedef unsigned __int32 hashmap_uint32_t; 108 | typedef unsigned __int64 hashmap_uint64_t; 109 | #else 110 | #include 111 | typedef uint8_t hashmap_uint8_t; 112 | typedef uint32_t hashmap_uint32_t; 113 | typedef uint64_t hashmap_uint64_t; 114 | #endif 115 | 116 | typedef struct hashmap_element_s { 117 | const void *key; 118 | hashmap_uint32_t key_len; 119 | int in_use; 120 | void *data; 121 | } hashmap_element_t; 122 | 123 | typedef hashmap_uint32_t (*hashmap_hasher_t)(hashmap_uint32_t seed, 124 | const void *key, 125 | hashmap_uint32_t key_len); 126 | typedef int (*hashmap_comparer_t)(const void *a, hashmap_uint32_t a_len, 127 | const void *b, hashmap_uint32_t b_len); 128 | 129 | typedef struct hashmap_s { 130 | hashmap_uint32_t log2_capacity; 131 | hashmap_uint32_t size; 132 | hashmap_hasher_t hasher; 133 | hashmap_comparer_t comparer; 134 | struct hashmap_element_s *data; 135 | } hashmap_t; 136 | 137 | #define HASHMAP_LINEAR_PROBE_LENGTH (8) 138 | 139 | typedef struct hashmap_create_options_s { 140 | hashmap_hasher_t hasher; 141 | hashmap_comparer_t comparer; 142 | hashmap_uint32_t initial_capacity; 143 | hashmap_uint32_t _; 144 | } hashmap_create_options_t; 145 | 146 | #if defined(__cplusplus) 147 | extern "C" { 148 | #endif 149 | 150 | /// @brief Create a hashmap. 151 | /// @param initial_capacity The initial capacity of the hashmap. 152 | /// @param out_hashmap The storage for the created hashmap. 153 | /// @return On success 0 is returned. 154 | HASHMAP_WEAK int hashmap_create(const hashmap_uint32_t initial_capacity, 155 | struct hashmap_s *const out_hashmap); 156 | 157 | /// @brief Create a hashmap. 158 | /// @param options The options to create the hashmap with. 159 | /// @param out_hashmap The storage for the created hashmap. 160 | /// @return On success 0 is returned. 161 | /// 162 | /// The options members work as follows: 163 | /// - initial_capacity The initial capacity of the hashmap. 164 | /// - hasher Which hashing function to use with the hashmap (by default the 165 | // crc32 with Robert Jenkins' mix is used). 166 | HASHMAP_WEAK int hashmap_create_ex(struct hashmap_create_options_s options, 167 | struct hashmap_s *const out_hashmap); 168 | 169 | /// @brief Put an element into the hashmap. 170 | /// @param hashmap The hashmap to insert into. 171 | /// @param key The string key to use. 172 | /// @param len The length of the string key. 173 | /// @param value The value to insert. 174 | /// @return On success 0 is returned. 175 | /// 176 | /// The key string slice is not copied when creating the hashmap entry, and thus 177 | /// must remain a valid pointer until the hashmap entry is removed or the 178 | /// hashmap is destroyed. 179 | HASHMAP_WEAK int hashmap_put(struct hashmap_s *const hashmap, 180 | const void *const key, const hashmap_uint32_t len, 181 | void *const value); 182 | 183 | /// @brief Get an element from the hashmap. 184 | /// @param hashmap The hashmap to get from. 185 | /// @param key The string key to use. 186 | /// @param len The length of the string key. 187 | /// @return The previously set element, or NULL if none exists. 188 | HASHMAP_WEAK void *hashmap_get(const struct hashmap_s *const hashmap, 189 | const void *const key, 190 | const hashmap_uint32_t len); 191 | 192 | /// @brief Remove an element from the hashmap. 193 | /// @param hashmap The hashmap to remove from. 194 | /// @param key The string key to use. 195 | /// @param len The length of the string key. 196 | /// @return On success 0 is returned. 197 | HASHMAP_WEAK int hashmap_remove(struct hashmap_s *const hashmap, 198 | const void *const key, 199 | const hashmap_uint32_t len); 200 | 201 | /// @brief Remove an element from the hashmap. 202 | /// @param hashmap The hashmap to remove from. 203 | /// @param key The string key to use. 204 | /// @param len The length of the string key. 205 | /// @return On success the original stored key pointer is returned, on failure 206 | /// NULL is returned. 207 | HASHMAP_WEAK const void * 208 | hashmap_remove_and_return_key(struct hashmap_s *const hashmap, 209 | const void *const key, 210 | const hashmap_uint32_t len); 211 | 212 | /// @brief Iterate over all the elements in a hashmap. 213 | /// @param hashmap The hashmap to iterate over. 214 | /// @param iterator The function pointer to call on each element. 215 | /// @param context The context to pass as the first argument to f. 216 | /// @return If the entire hashmap was iterated then 0 is returned. Otherwise if 217 | /// the callback function f returned non-zero then non-zero is returned. 218 | HASHMAP_WEAK int hashmap_iterate(const struct hashmap_s *const hashmap, 219 | int (*iterator)(void *const context, 220 | void *const value), 221 | void *const context); 222 | 223 | /// @brief Iterate over all the elements in a hashmap. 224 | /// @param hashmap The hashmap to iterate over. 225 | /// @param iterator The function pointer to call on each element. 226 | /// @param context The context to pass as the first argument to f. 227 | /// @return If the entire hashmap was iterated then 0 is returned. 228 | /// Otherwise if the callback function f returned positive then the positive 229 | /// value is returned. If the callback function returns -1, the current item 230 | /// is removed and iteration continues. 231 | HASHMAP_WEAK int hashmap_iterate_pairs( 232 | struct hashmap_s *const hashmap, 233 | int (*iterator)(void *const, struct hashmap_element_s *const), 234 | void *const context); 235 | 236 | /// @brief Get the size of the hashmap. 237 | /// @param hashmap The hashmap to get the size of. 238 | /// @return The size of the hashmap. 239 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t 240 | hashmap_num_entries(const struct hashmap_s *const hashmap); 241 | 242 | /// @brief Get the capacity of the hashmap. 243 | /// @param hashmap The hashmap to get the size of. 244 | /// @return The capacity of the hashmap. 245 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t 246 | hashmap_capacity(const struct hashmap_s *const hashmap); 247 | 248 | /// @brief Destroy the hashmap. 249 | /// @param hashmap The hashmap to destroy. 250 | HASHMAP_WEAK void hashmap_destroy(struct hashmap_s *const hashmap); 251 | 252 | static hashmap_uint32_t hashmap_crc32_hasher(const hashmap_uint32_t seed, 253 | const void *const s, 254 | const hashmap_uint32_t len); 255 | static int hashmap_memcmp_comparer(const void *const a, 256 | const hashmap_uint32_t a_len, 257 | const void *const b, 258 | const hashmap_uint32_t b_len); 259 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t hashmap_hash_helper_int_helper( 260 | const struct hashmap_s *const m, const void *const key, 261 | const hashmap_uint32_t len); 262 | HASHMAP_ALWAYS_INLINE int 263 | hashmap_hash_helper(const struct hashmap_s *const m, const void *const key, 264 | const hashmap_uint32_t len, 265 | hashmap_uint32_t *const out_index); 266 | HASHMAP_WEAK int hashmap_rehash_iterator(void *const new_hash, 267 | struct hashmap_element_s *const e); 268 | HASHMAP_ALWAYS_INLINE int hashmap_rehash_helper(struct hashmap_s *const m); 269 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t hashmap_clz(const hashmap_uint32_t x); 270 | 271 | #if defined(__cplusplus) 272 | } 273 | #endif 274 | 275 | #if defined(__cplusplus) 276 | #define HASHMAP_CAST(type, x) static_cast(x) 277 | #define HASHMAP_PTR_CAST(type, x) reinterpret_cast(x) 278 | #define HASHMAP_NULL NULL 279 | #else 280 | #define HASHMAP_CAST(type, x) ((type)(x)) 281 | #define HASHMAP_PTR_CAST(type, x) ((type)(x)) 282 | #define HASHMAP_NULL 0 283 | #endif 284 | 285 | int hashmap_create(const hashmap_uint32_t initial_capacity, 286 | struct hashmap_s *const out_hashmap) { 287 | struct hashmap_create_options_s options; 288 | memset(&options, 0, sizeof(options)); 289 | options.initial_capacity = initial_capacity; 290 | 291 | return hashmap_create_ex(options, out_hashmap); 292 | } 293 | 294 | int hashmap_create_ex(struct hashmap_create_options_s options, 295 | struct hashmap_s *const out_hashmap) { 296 | if (2 > options.initial_capacity) { 297 | options.initial_capacity = 2; 298 | } else if (0 != (options.initial_capacity & (options.initial_capacity - 1))) { 299 | options.initial_capacity = 1u 300 | << (32 - hashmap_clz(options.initial_capacity)); 301 | } 302 | 303 | if (HASHMAP_NULL == options.hasher) { 304 | options.hasher = &hashmap_crc32_hasher; 305 | } 306 | 307 | if (HASHMAP_NULL == options.comparer) { 308 | options.comparer = &hashmap_memcmp_comparer; 309 | } 310 | 311 | out_hashmap->data = HASHMAP_CAST( 312 | struct hashmap_element_s *, 313 | calloc(options.initial_capacity + HASHMAP_LINEAR_PROBE_LENGTH, 314 | sizeof(struct hashmap_element_s))); 315 | 316 | out_hashmap->log2_capacity = 31 - hashmap_clz(options.initial_capacity); 317 | out_hashmap->size = 0; 318 | out_hashmap->hasher = options.hasher; 319 | out_hashmap->comparer = options.comparer; 320 | 321 | return 0; 322 | } 323 | 324 | int hashmap_put(struct hashmap_s *const m, const void *const key, 325 | const hashmap_uint32_t len, void *const value) { 326 | hashmap_uint32_t index; 327 | 328 | if ((HASHMAP_NULL == key) || (0 == len)) { 329 | return 1; 330 | } 331 | 332 | /* Find a place to put our value. */ 333 | while (!hashmap_hash_helper(m, key, len, &index)) { 334 | if (hashmap_rehash_helper(m)) { 335 | return 1; 336 | } 337 | } 338 | 339 | /* Set the data. */ 340 | m->data[index].data = value; 341 | m->data[index].key = key; 342 | m->data[index].key_len = len; 343 | 344 | /* If the hashmap element was not already in use, set that it is being used 345 | * and bump our size. */ 346 | if (0 == m->data[index].in_use) { 347 | m->data[index].in_use = 1; 348 | m->size++; 349 | } 350 | 351 | return 0; 352 | } 353 | 354 | void *hashmap_get(const struct hashmap_s *const m, const void *const key, 355 | const hashmap_uint32_t len) { 356 | hashmap_uint32_t i, curr; 357 | 358 | if ((HASHMAP_NULL == key) || (0 == len)) { 359 | return HASHMAP_NULL; 360 | } 361 | 362 | curr = hashmap_hash_helper_int_helper(m, key, len); 363 | 364 | /* Linear probing, if necessary */ 365 | for (i = 0; i < HASHMAP_LINEAR_PROBE_LENGTH; i++) { 366 | const hashmap_uint32_t index = curr + i; 367 | 368 | if (m->data[index].in_use) { 369 | if (m->comparer(m->data[index].key, m->data[index].key_len, key, len)) { 370 | return m->data[index].data; 371 | } 372 | } 373 | } 374 | 375 | /* Not found */ 376 | return HASHMAP_NULL; 377 | } 378 | 379 | int hashmap_remove(struct hashmap_s *const m, const void *const key, 380 | const hashmap_uint32_t len) { 381 | hashmap_uint32_t i, curr; 382 | 383 | if ((HASHMAP_NULL == key) || (0 == len)) { 384 | return 1; 385 | } 386 | 387 | curr = hashmap_hash_helper_int_helper(m, key, len); 388 | 389 | /* Linear probing, if necessary */ 390 | for (i = 0; i < HASHMAP_LINEAR_PROBE_LENGTH; i++) { 391 | const hashmap_uint32_t index = curr + i; 392 | 393 | if (m->data[index].in_use) { 394 | if (m->comparer(m->data[index].key, m->data[index].key_len, key, len)) { 395 | /* Blank out the fields including in_use */ 396 | memset(&m->data[index], 0, sizeof(struct hashmap_element_s)); 397 | 398 | /* Reduce the size */ 399 | m->size--; 400 | 401 | return 0; 402 | } 403 | } 404 | } 405 | 406 | return 1; 407 | } 408 | 409 | const void *hashmap_remove_and_return_key(struct hashmap_s *const m, 410 | const void *const key, 411 | const hashmap_uint32_t len) { 412 | hashmap_uint32_t i, curr; 413 | 414 | if ((HASHMAP_NULL == key) || (0 == len)) { 415 | return HASHMAP_NULL; 416 | } 417 | 418 | curr = hashmap_hash_helper_int_helper(m, key, len); 419 | 420 | /* Linear probing, if necessary */ 421 | for (i = 0; i < HASHMAP_LINEAR_PROBE_LENGTH; i++) { 422 | const hashmap_uint32_t index = curr + i; 423 | 424 | if (m->data[index].in_use) { 425 | if (m->comparer(m->data[index].key, m->data[index].key_len, key, len)) { 426 | const void *const stored_key = m->data[index].key; 427 | 428 | /* Blank out the fields */ 429 | memset(&m->data[index], 0, sizeof(struct hashmap_element_s)); 430 | 431 | /* Reduce the size */ 432 | m->size--; 433 | 434 | return stored_key; 435 | } 436 | } 437 | } 438 | 439 | return HASHMAP_NULL; 440 | } 441 | 442 | int hashmap_iterate(const struct hashmap_s *const m, 443 | int (*f)(void *const, void *const), void *const context) { 444 | hashmap_uint32_t i; 445 | 446 | for (i = 0; i < (hashmap_capacity(m) + HASHMAP_LINEAR_PROBE_LENGTH); i++) { 447 | if (m->data[i].in_use) { 448 | if (!f(context, m->data[i].data)) { 449 | return 1; 450 | } 451 | } 452 | } 453 | 454 | return 0; 455 | } 456 | 457 | int hashmap_iterate_pairs(struct hashmap_s *const m, 458 | int (*f)(void *const, 459 | struct hashmap_element_s *const), 460 | void *const context) { 461 | hashmap_uint32_t i; 462 | struct hashmap_element_s *p; 463 | int r; 464 | 465 | for (i = 0; i < (hashmap_capacity(m) + HASHMAP_LINEAR_PROBE_LENGTH); i++) { 466 | p = &m->data[i]; 467 | if (p->in_use) { 468 | r = f(context, p); 469 | switch (r) { 470 | case -1: /* remove item */ 471 | memset(p, 0, sizeof(struct hashmap_element_s)); 472 | m->size--; 473 | break; 474 | case 0: /* continue iterating */ 475 | break; 476 | default: /* early exit */ 477 | return 1; 478 | } 479 | } 480 | } 481 | return 0; 482 | } 483 | 484 | void hashmap_destroy(struct hashmap_s *const m) { 485 | free(m->data); 486 | memset(m, 0, sizeof(struct hashmap_s)); 487 | } 488 | 489 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t 490 | hashmap_num_entries(const struct hashmap_s *const m) { 491 | return m->size; 492 | } 493 | 494 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t 495 | hashmap_capacity(const struct hashmap_s *const m) { 496 | return 1u << m->log2_capacity; 497 | } 498 | 499 | hashmap_uint32_t hashmap_crc32_hasher(const hashmap_uint32_t seed, 500 | const void *const k, 501 | const hashmap_uint32_t len) { 502 | hashmap_uint32_t i = 0; 503 | hashmap_uint32_t crc32val = seed; 504 | const hashmap_uint8_t *const s = HASHMAP_PTR_CAST(const hashmap_uint8_t *, k); 505 | 506 | #if defined(HASHMAP_X86_SSE42) 507 | for (; (i + sizeof(hashmap_uint32_t)) < len; i += sizeof(hashmap_uint32_t)) { 508 | hashmap_uint32_t next; 509 | memcpy(&next, &s[i], sizeof(next)); 510 | crc32val = _mm_crc32_u32(crc32val, next); 511 | } 512 | 513 | for (; i < len; i++) { 514 | crc32val = _mm_crc32_u8(crc32val, s[i]); 515 | } 516 | #elif defined(HASHMAP_ARM_CRC32) 517 | for (; (i + sizeof(hashmap_uint64_t)) < len; i += sizeof(hashmap_uint64_t)) { 518 | hashmap_uint64_t next; 519 | memcpy(&next, &s[i], sizeof(next)); 520 | crc32val = __crc32d(crc32val, next); 521 | } 522 | 523 | for (; i < len; i++) { 524 | crc32val = __crc32b(crc32val, s[i]); 525 | } 526 | #else 527 | // Using polynomial 0x11EDC6F41 to match SSE 4.2's crc function. 528 | static const hashmap_uint32_t crc32_tab[] = { 529 | 0x00000000U, 0xF26B8303U, 0xE13B70F7U, 0x1350F3F4U, 0xC79A971FU, 530 | 0x35F1141CU, 0x26A1E7E8U, 0xD4CA64EBU, 0x8AD958CFU, 0x78B2DBCCU, 531 | 0x6BE22838U, 0x9989AB3BU, 0x4D43CFD0U, 0xBF284CD3U, 0xAC78BF27U, 532 | 0x5E133C24U, 0x105EC76FU, 0xE235446CU, 0xF165B798U, 0x030E349BU, 533 | 0xD7C45070U, 0x25AFD373U, 0x36FF2087U, 0xC494A384U, 0x9A879FA0U, 534 | 0x68EC1CA3U, 0x7BBCEF57U, 0x89D76C54U, 0x5D1D08BFU, 0xAF768BBCU, 535 | 0xBC267848U, 0x4E4DFB4BU, 0x20BD8EDEU, 0xD2D60DDDU, 0xC186FE29U, 536 | 0x33ED7D2AU, 0xE72719C1U, 0x154C9AC2U, 0x061C6936U, 0xF477EA35U, 537 | 0xAA64D611U, 0x580F5512U, 0x4B5FA6E6U, 0xB93425E5U, 0x6DFE410EU, 538 | 0x9F95C20DU, 0x8CC531F9U, 0x7EAEB2FAU, 0x30E349B1U, 0xC288CAB2U, 539 | 0xD1D83946U, 0x23B3BA45U, 0xF779DEAEU, 0x05125DADU, 0x1642AE59U, 540 | 0xE4292D5AU, 0xBA3A117EU, 0x4851927DU, 0x5B016189U, 0xA96AE28AU, 541 | 0x7DA08661U, 0x8FCB0562U, 0x9C9BF696U, 0x6EF07595U, 0x417B1DBCU, 542 | 0xB3109EBFU, 0xA0406D4BU, 0x522BEE48U, 0x86E18AA3U, 0x748A09A0U, 543 | 0x67DAFA54U, 0x95B17957U, 0xCBA24573U, 0x39C9C670U, 0x2A993584U, 544 | 0xD8F2B687U, 0x0C38D26CU, 0xFE53516FU, 0xED03A29BU, 0x1F682198U, 545 | 0x5125DAD3U, 0xA34E59D0U, 0xB01EAA24U, 0x42752927U, 0x96BF4DCCU, 546 | 0x64D4CECFU, 0x77843D3BU, 0x85EFBE38U, 0xDBFC821CU, 0x2997011FU, 547 | 0x3AC7F2EBU, 0xC8AC71E8U, 0x1C661503U, 0xEE0D9600U, 0xFD5D65F4U, 548 | 0x0F36E6F7U, 0x61C69362U, 0x93AD1061U, 0x80FDE395U, 0x72966096U, 549 | 0xA65C047DU, 0x5437877EU, 0x4767748AU, 0xB50CF789U, 0xEB1FCBADU, 550 | 0x197448AEU, 0x0A24BB5AU, 0xF84F3859U, 0x2C855CB2U, 0xDEEEDFB1U, 551 | 0xCDBE2C45U, 0x3FD5AF46U, 0x7198540DU, 0x83F3D70EU, 0x90A324FAU, 552 | 0x62C8A7F9U, 0xB602C312U, 0x44694011U, 0x5739B3E5U, 0xA55230E6U, 553 | 0xFB410CC2U, 0x092A8FC1U, 0x1A7A7C35U, 0xE811FF36U, 0x3CDB9BDDU, 554 | 0xCEB018DEU, 0xDDE0EB2AU, 0x2F8B6829U, 0x82F63B78U, 0x709DB87BU, 555 | 0x63CD4B8FU, 0x91A6C88CU, 0x456CAC67U, 0xB7072F64U, 0xA457DC90U, 556 | 0x563C5F93U, 0x082F63B7U, 0xFA44E0B4U, 0xE9141340U, 0x1B7F9043U, 557 | 0xCFB5F4A8U, 0x3DDE77ABU, 0x2E8E845FU, 0xDCE5075CU, 0x92A8FC17U, 558 | 0x60C37F14U, 0x73938CE0U, 0x81F80FE3U, 0x55326B08U, 0xA759E80BU, 559 | 0xB4091BFFU, 0x466298FCU, 0x1871A4D8U, 0xEA1A27DBU, 0xF94AD42FU, 560 | 0x0B21572CU, 0xDFEB33C7U, 0x2D80B0C4U, 0x3ED04330U, 0xCCBBC033U, 561 | 0xA24BB5A6U, 0x502036A5U, 0x4370C551U, 0xB11B4652U, 0x65D122B9U, 562 | 0x97BAA1BAU, 0x84EA524EU, 0x7681D14DU, 0x2892ED69U, 0xDAF96E6AU, 563 | 0xC9A99D9EU, 0x3BC21E9DU, 0xEF087A76U, 0x1D63F975U, 0x0E330A81U, 564 | 0xFC588982U, 0xB21572C9U, 0x407EF1CAU, 0x532E023EU, 0xA145813DU, 565 | 0x758FE5D6U, 0x87E466D5U, 0x94B49521U, 0x66DF1622U, 0x38CC2A06U, 566 | 0xCAA7A905U, 0xD9F75AF1U, 0x2B9CD9F2U, 0xFF56BD19U, 0x0D3D3E1AU, 567 | 0x1E6DCDEEU, 0xEC064EEDU, 0xC38D26C4U, 0x31E6A5C7U, 0x22B65633U, 568 | 0xD0DDD530U, 0x0417B1DBU, 0xF67C32D8U, 0xE52CC12CU, 0x1747422FU, 569 | 0x49547E0BU, 0xBB3FFD08U, 0xA86F0EFCU, 0x5A048DFFU, 0x8ECEE914U, 570 | 0x7CA56A17U, 0x6FF599E3U, 0x9D9E1AE0U, 0xD3D3E1ABU, 0x21B862A8U, 571 | 0x32E8915CU, 0xC083125FU, 0x144976B4U, 0xE622F5B7U, 0xF5720643U, 572 | 0x07198540U, 0x590AB964U, 0xAB613A67U, 0xB831C993U, 0x4A5A4A90U, 573 | 0x9E902E7BU, 0x6CFBAD78U, 0x7FAB5E8CU, 0x8DC0DD8FU, 0xE330A81AU, 574 | 0x115B2B19U, 0x020BD8EDU, 0xF0605BEEU, 0x24AA3F05U, 0xD6C1BC06U, 575 | 0xC5914FF2U, 0x37FACCF1U, 0x69E9F0D5U, 0x9B8273D6U, 0x88D28022U, 576 | 0x7AB90321U, 0xAE7367CAU, 0x5C18E4C9U, 0x4F48173DU, 0xBD23943EU, 577 | 0xF36E6F75U, 0x0105EC76U, 0x12551F82U, 0xE03E9C81U, 0x34F4F86AU, 578 | 0xC69F7B69U, 0xD5CF889DU, 0x27A40B9EU, 0x79B737BAU, 0x8BDCB4B9U, 579 | 0x988C474DU, 0x6AE7C44EU, 0xBE2DA0A5U, 0x4C4623A6U, 0x5F16D052U, 580 | 0xAD7D5351U}; 581 | 582 | for (; i < len; i++) { 583 | crc32val = crc32_tab[(HASHMAP_CAST(hashmap_uint8_t, crc32val) ^ s[i])] ^ 584 | (crc32val >> 8); 585 | } 586 | #endif 587 | 588 | // Use the mix function from murmur3. 589 | crc32val ^= len; 590 | 591 | crc32val ^= crc32val >> 16; 592 | crc32val *= 0x85ebca6b; 593 | crc32val ^= crc32val >> 13; 594 | crc32val *= 0xc2b2ae35; 595 | crc32val ^= crc32val >> 16; 596 | 597 | return crc32val; 598 | } 599 | 600 | int hashmap_memcmp_comparer(const void *const a, const hashmap_uint32_t a_len, 601 | const void *const b, const hashmap_uint32_t b_len) { 602 | return (a_len == b_len) && (0 == memcmp(a, b, a_len)); 603 | } 604 | 605 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t 606 | hashmap_hash_helper_int_helper(const struct hashmap_s *const m, 607 | const void *const k, const hashmap_uint32_t l) { 608 | return (m->hasher(~0u, k, l) * 2654435769u) >> (32u - m->log2_capacity); 609 | } 610 | 611 | HASHMAP_ALWAYS_INLINE int 612 | hashmap_hash_helper(const struct hashmap_s *const m, const void *const key, 613 | const hashmap_uint32_t len, 614 | hashmap_uint32_t *const out_index) { 615 | hashmap_uint32_t curr; 616 | hashmap_uint32_t i; 617 | hashmap_uint32_t first_free; 618 | 619 | /* If full, return immediately */ 620 | if (hashmap_num_entries(m) == hashmap_capacity(m)) { 621 | return 0; 622 | } 623 | 624 | /* Find the best index */ 625 | curr = hashmap_hash_helper_int_helper(m, key, len); 626 | first_free = ~0u; 627 | 628 | for (i = 0; i < HASHMAP_LINEAR_PROBE_LENGTH; i++) { 629 | const hashmap_uint32_t index = curr + i; 630 | 631 | if (!m->data[index].in_use) { 632 | first_free = (first_free < index) ? first_free : index; 633 | } else if (m->comparer(m->data[index].key, m->data[index].key_len, key, 634 | len)) { 635 | *out_index = index; 636 | return 1; 637 | } 638 | } 639 | 640 | // Couldn't find a free element in the linear probe. 641 | if (~0u == first_free) { 642 | return 0; 643 | } 644 | 645 | *out_index = first_free; 646 | return 1; 647 | } 648 | 649 | int hashmap_rehash_iterator(void *const new_hash, 650 | struct hashmap_element_s *const e) { 651 | int temp = hashmap_put(HASHMAP_PTR_CAST(struct hashmap_s *, new_hash), e->key, 652 | e->key_len, e->data); 653 | 654 | if (0 < temp) { 655 | return 1; 656 | } 657 | 658 | /* clear old value to avoid stale pointers */ 659 | return -1; 660 | } 661 | 662 | /* 663 | * Doubles the size of the hashmap, and rehashes all the elements 664 | */ 665 | HASHMAP_ALWAYS_INLINE int hashmap_rehash_helper(struct hashmap_s *const m) { 666 | struct hashmap_create_options_s options; 667 | struct hashmap_s new_m; 668 | int flag; 669 | 670 | memset(&options, 0, sizeof(options)); 671 | options.initial_capacity = hashmap_capacity(m) * 2; 672 | options.hasher = m->hasher; 673 | 674 | if (0 == options.initial_capacity) { 675 | return 1; 676 | } 677 | 678 | flag = hashmap_create_ex(options, &new_m); 679 | 680 | if (0 != flag) { 681 | return flag; 682 | } 683 | 684 | /* copy the old elements to the new table */ 685 | flag = hashmap_iterate_pairs(m, hashmap_rehash_iterator, 686 | HASHMAP_PTR_CAST(void *, &new_m)); 687 | 688 | if (0 != flag) { 689 | return flag; 690 | } 691 | 692 | hashmap_destroy(m); 693 | 694 | /* put new hash into old hash structure by copying */ 695 | memcpy(m, &new_m, sizeof(struct hashmap_s)); 696 | 697 | return 0; 698 | } 699 | 700 | HASHMAP_ALWAYS_INLINE hashmap_uint32_t hashmap_clz(const hashmap_uint32_t x) { 701 | #if defined(_MSC_VER) 702 | unsigned long result; 703 | _BitScanReverse(&result, x); 704 | return 31 - HASHMAP_CAST(hashmap_uint32_t, result); 705 | #else 706 | return HASHMAP_CAST(hashmap_uint32_t, __builtin_clz(x)); 707 | #endif 708 | } 709 | 710 | #if defined(_MSC_VER) 711 | #pragma warning(pop) 712 | #endif 713 | 714 | #if defined(__clang__) 715 | #pragma clang diagnostic pop 716 | #endif 717 | 718 | #endif 719 | -------------------------------------------------------------------------------- /hashmap/hashmapoperators.c: -------------------------------------------------------------------------------- 1 | // https://www.tutorialspoint.com/data_structures_algorithms/hash_table_program_in_c.htm 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define SIZE 20 9 | 10 | struct DataItem { 11 | char *data; 12 | char key; 13 | }; 14 | 15 | struct DataItem* hashArray[SIZE]; 16 | struct DataItem* dummyItem; 17 | struct DataItem* item; 18 | 19 | int hashCode(char key) { 20 | return key % SIZE; 21 | } 22 | 23 | struct DataItem *search(int key) { 24 | //get the hash 25 | int hashIndex = hashCode(key); 26 | 27 | //move in array until an empty 28 | while(hashArray[hashIndex] != NULL) { 29 | 30 | if(hashArray[hashIndex]->key == key) 31 | return hashArray[hashIndex]; 32 | 33 | //go to next cell 34 | ++hashIndex; 35 | 36 | //wrap around the table 37 | hashIndex %= SIZE; 38 | } 39 | assert(hashArray[hashIndex] != NULL && "ERROR: Not found in hashmap\n"); 40 | return NULL; 41 | } 42 | 43 | void insert(int key, char *data) { 44 | 45 | struct DataItem *item = (struct DataItem*) malloc(sizeof(struct DataItem)); 46 | item->data = data; 47 | item->key = key; 48 | 49 | //get the hash 50 | int hashIndex = hashCode(key); 51 | 52 | //move in array until an empty or deleted cell 53 | while(hashArray[hashIndex] != NULL && hashArray[hashIndex]->key != -1) { 54 | //go to next cell 55 | ++hashIndex; 56 | 57 | //wrap around the table 58 | hashIndex %= SIZE; 59 | } 60 | 61 | hashArray[hashIndex] = item; 62 | } 63 | 64 | struct DataItem* delete(struct DataItem* item) { 65 | int key = item->key; 66 | 67 | //get the hash 68 | int hashIndex = hashCode(key); 69 | 70 | //move in array until an empty 71 | while(hashArray[hashIndex] != NULL) { 72 | 73 | if(hashArray[hashIndex]->key == key) { 74 | struct DataItem* temp = hashArray[hashIndex]; 75 | 76 | //assign a dummy item at deleted position 77 | hashArray[hashIndex] = dummyItem; 78 | return temp; 79 | } 80 | 81 | //go to next cell 82 | ++hashIndex; 83 | 84 | //wrap around the table 85 | hashIndex %= SIZE; 86 | } 87 | 88 | return NULL; 89 | } 90 | 91 | void display() { 92 | int i = 0; 93 | 94 | for(i = 0; ikey,hashArray[i]->data); 98 | else 99 | printf(" ~~ "); 100 | } 101 | 102 | printf("\n"); 103 | } 104 | 105 | -------------------------------------------------------------------------------- /hashmap/hashmapoperators.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_H_ 2 | #define HASHMAP_H_ 3 | 4 | #define SIZE 20 5 | 6 | struct DataItem { 7 | char *data; 8 | char key; 9 | }; 10 | 11 | int hashCode(char key); 12 | struct DataItem *search(int key); 13 | void insert(int key, char *data); 14 | struct DataItem* delete(struct DataItem* item); 15 | void display(); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /lexerf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef enum { 7 | BEGINNING, 8 | INT, 9 | KEYWORD, 10 | SEPARATOR, 11 | OPERATOR, 12 | IDENTIFIER, 13 | STRING, 14 | COMP, 15 | END_OF_TOKENS, 16 | } TokenType; 17 | 18 | typedef struct { 19 | TokenType type; 20 | char *value; 21 | size_t line_num; 22 | } Token; 23 | 24 | size_t line_number = 0; 25 | 26 | void print_token(Token token){ 27 | printf("TOKEN VALUE: "); 28 | printf("'"); 29 | for(int i = 0; token.value[i] != '\0'; i++){ 30 | printf("%c", token.value[i]); 31 | } 32 | printf("'"); 33 | printf("\nline number: %zu", token.line_num); 34 | 35 | switch(token.type){ 36 | case INT: 37 | printf(" TOKEN TYPE: INT\n"); 38 | break; 39 | case KEYWORD: 40 | printf(" TOKEN TYPE: KEYWORD\n"); 41 | break; 42 | case SEPARATOR: 43 | printf(" TOKEN TYPE: SEPARATOR\n"); 44 | break; 45 | case OPERATOR: 46 | printf(" TOKEN TYPE: OPERATOR\n"); 47 | break; 48 | case IDENTIFIER: 49 | printf(" TOKEN TYPE: IDENTIFIER\n"); 50 | break; 51 | case STRING: 52 | printf(" TOKEN TYPE: STRING\n"); 53 | break; 54 | case COMP: 55 | printf(" TOKEN TYPE: COMPARATOR\n"); 56 | break; 57 | case END_OF_TOKENS: 58 | printf(" END OF TOKENS\n"); 59 | break; 60 | case BEGINNING: 61 | printf("BEGINNING\n"); 62 | break; 63 | } 64 | } 65 | 66 | Token *generate_number(char *current, int *current_index){ 67 | Token *token = malloc(sizeof(Token)); 68 | token->line_num = malloc(sizeof(size_t)); 69 | token->line_num = line_number; 70 | token->type = INT; 71 | char *value = malloc(sizeof(char) * 8); 72 | int value_index = 0; 73 | while(isdigit(current[*current_index]) && current[*current_index] != '\0'){ 74 | if(!isdigit(current[*current_index])){ 75 | break; 76 | } 77 | value[value_index] = current[*current_index]; 78 | value_index++; 79 | *current_index += 1; 80 | } 81 | value[value_index] = '\0'; 82 | token->value = value; 83 | return token; 84 | } 85 | 86 | Token *generate_keyword_or_identifier(char *current, int *current_index){ 87 | Token *token = malloc(sizeof(Token)); 88 | token->line_num = malloc(sizeof(size_t)); 89 | token->line_num = line_number; 90 | char *keyword = malloc(sizeof(char) * 8); 91 | int keyword_index = 0; 92 | while(isalpha(current[*current_index]) && current[*current_index] != '\0'){ 93 | keyword[keyword_index] = current[*current_index]; 94 | keyword_index++; 95 | *current_index += 1; 96 | } 97 | keyword[keyword_index] = '\0'; 98 | if(strcmp(keyword, "exit") == 0){ 99 | token->type = KEYWORD; 100 | token->value = "EXIT"; 101 | } else if(strcmp(keyword, "int") == 0){ 102 | token->type = KEYWORD; 103 | token->value = "INT"; 104 | } else if(strcmp(keyword, "if") == 0){ 105 | token->type = KEYWORD; 106 | token->value = "IF"; 107 | } else if(strcmp(keyword, "while") == 0){ 108 | token->type = KEYWORD; 109 | token->value = "WHILE"; 110 | } else if(strcmp(keyword, "write") == 0){ 111 | token->type = KEYWORD; 112 | token->value = "WRITE"; 113 | } else if(strcmp(keyword, "eq") == 0){ 114 | token->type = COMP; 115 | token->value = "EQ"; 116 | } else if(strcmp(keyword, "neq") == 0){ 117 | token->type = COMP; 118 | token->value = "NEQ"; 119 | } else if(strcmp(keyword, "less") == 0){ 120 | token->type = COMP; 121 | token->value = "LESS"; 122 | } else if(strcmp(keyword, "greater") == 0){ 123 | token->type = COMP; 124 | token->value = "GREATER"; 125 | } else { 126 | token->type = IDENTIFIER; 127 | token->value = keyword; 128 | } 129 | return token; 130 | } 131 | 132 | Token *generate_string_token(char *current, int *current_index){ 133 | Token *token = malloc(sizeof(Token)); 134 | token->line_num = malloc(sizeof(size_t)); 135 | token->line_num = line_number; 136 | char *value = malloc(sizeof(char) * 64); 137 | int value_index = 0; 138 | *current_index += 1; 139 | while(current[*current_index] != '"'){ 140 | value[value_index] = current[*current_index]; 141 | value_index++; 142 | *current_index += 1; 143 | } 144 | value[value_index] = '\0'; 145 | token->type = STRING; 146 | token->value = value; 147 | return token; 148 | } 149 | 150 | Token *generate_separator_or_operator(char *current, int *current_index, TokenType type){ 151 | Token *token = malloc(sizeof(Token)); 152 | token->value = malloc(sizeof(char) * 2); 153 | token->value[0] = current[*current_index]; 154 | token->value[1] = '\0'; 155 | token->line_num = malloc(sizeof(size_t)); 156 | token->line_num = line_number; 157 | token->type = type; 158 | return token; 159 | } 160 | 161 | size_t tokens_index; 162 | 163 | Token *lexer(FILE *file){ 164 | int length; 165 | char *current = 0; 166 | 167 | fseek(file, 0, SEEK_END); 168 | length = ftell(file); 169 | fseek(file, 0, SEEK_SET); 170 | 171 | current = malloc(sizeof(char) * length); 172 | fread(current, 1, length, file); 173 | 174 | fclose(file); 175 | 176 | current[length] = '\0'; 177 | int current_index = 0; 178 | 179 | int number_of_tokens = 12; 180 | int tokens_size = 0; 181 | Token *tokens = malloc(sizeof(Token) * number_of_tokens); 182 | tokens_index = 0; 183 | 184 | while(current[current_index] != '\0'){ 185 | Token *token = malloc(sizeof(Token)); 186 | tokens_size++; 187 | if(tokens_size > number_of_tokens){ 188 | number_of_tokens *= 1.5; 189 | tokens = realloc(tokens, sizeof(Token) * number_of_tokens); 190 | } 191 | if(current[current_index] == ';'){ 192 | token = generate_separator_or_operator(current, ¤t_index, SEPARATOR); 193 | tokens[tokens_index] = *token; 194 | tokens_index++; 195 | } else if(current[current_index] == ','){ 196 | token = generate_separator_or_operator(current, ¤t_index, SEPARATOR); 197 | tokens[tokens_index] = *token; 198 | tokens_index++; 199 | } else if(current[current_index] == '('){ 200 | token = generate_separator_or_operator(current, ¤t_index, SEPARATOR); 201 | tokens[tokens_index] = *token; 202 | tokens_index++; 203 | } else if(current[current_index] == ')'){ 204 | token = generate_separator_or_operator(current, ¤t_index, SEPARATOR); 205 | tokens[tokens_index] = *token; 206 | tokens_index++; 207 | } else if(current[current_index] == '{'){ 208 | token = generate_separator_or_operator(current, ¤t_index, SEPARATOR); 209 | tokens[tokens_index] = *token; 210 | tokens_index++; 211 | } else if(current[current_index] == '}'){ 212 | token = generate_separator_or_operator(current, ¤t_index, SEPARATOR); 213 | tokens[tokens_index] = *token; 214 | tokens_index++; 215 | } else if(current[current_index] == '='){ 216 | token = generate_separator_or_operator(current, ¤t_index, OPERATOR); 217 | tokens[tokens_index] = *token; 218 | tokens_index++; 219 | } else if(current[current_index] == '+'){ 220 | token = generate_separator_or_operator(current, ¤t_index, OPERATOR); 221 | tokens[tokens_index] = *token; 222 | tokens_index++; 223 | } else if(current[current_index] == '-'){ 224 | token = generate_separator_or_operator(current, ¤t_index, OPERATOR); 225 | tokens[tokens_index] = *token; 226 | tokens_index++; 227 | } else if(current[current_index] == '*'){ 228 | token = generate_separator_or_operator(current, ¤t_index, OPERATOR); 229 | tokens[tokens_index] = *token; 230 | tokens_index++; 231 | } else if(current[current_index] == '/'){ 232 | token = generate_separator_or_operator(current, ¤t_index, OPERATOR); 233 | tokens[tokens_index] = *token; 234 | tokens_index++; 235 | } else if(current[current_index] == '%'){ 236 | token = generate_separator_or_operator(current, ¤t_index, OPERATOR); 237 | tokens[tokens_index] = *token; 238 | tokens_index++; 239 | } else if(current[current_index] == '"'){ 240 | token = generate_string_token(current, ¤t_index); 241 | tokens[tokens_index] = *token; 242 | tokens_index++; 243 | }else if(isdigit(current[current_index])){ 244 | token = generate_number(current, ¤t_index); 245 | tokens[tokens_index] = *token; 246 | tokens_index++; 247 | current_index--; 248 | } else if(isalpha(current[current_index])){ 249 | token = generate_keyword_or_identifier(current, ¤t_index); 250 | tokens[tokens_index] = *token; 251 | tokens_index++; 252 | current_index--; 253 | } else if(current[current_index] == '\n'){ 254 | line_number += 1; 255 | } 256 | free(token); 257 | current_index++; 258 | } 259 | tokens[tokens_index].value = '\0'; 260 | tokens[tokens_index].type = END_OF_TOKENS; 261 | return tokens; 262 | } 263 | 264 | 265 | -------------------------------------------------------------------------------- /lexerf.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXER_H_ 2 | #define LEXER_H_ 3 | 4 | typedef enum { 5 | BEGINNING, 6 | INT, 7 | KEYWORD, 8 | SEPARATOR, 9 | OPERATOR, 10 | IDENTIFIER, 11 | STRING, 12 | COMP, 13 | END_OF_TOKENS, 14 | } TokenType; 15 | 16 | typedef struct { 17 | TokenType type; 18 | char *value; 19 | size_t line_num; 20 | } Token; 21 | 22 | 23 | void print_token(Token token); 24 | Token *generate_number(char *current, int *current_index); 25 | Token *generate_keyword(char *current, int *current_index); 26 | Token *generate_separator_or_operator(char *current, int *current_index, TokenType type); 27 | Token *lexer(FILE *file); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "lexerf.h" 7 | #include "parserf.h" 8 | #include "codegeneratorf.h" 9 | 10 | int main(int argc, char *argv[]){ 11 | if(argc != 3){ 12 | printf("Error: correct syntax: %s \n", argv[0]); 13 | exit(1); 14 | } 15 | char *output_filename = malloc(sizeof(char) * 16); 16 | sprintf(output_filename, "%s.asm", argv[2]); 17 | 18 | FILE *file; 19 | file = fopen(argv[1], "r"); 20 | 21 | if(!file){ 22 | printf("ERROR: File not found\n"); 23 | exit(1); 24 | } 25 | Token *tokens = lexer(file); 26 | 27 | Node *test = parser(tokens); 28 | 29 | generate_code(test, "generated.asm"); 30 | FILE *assembly_file = fopen("generated.asm", "r"); 31 | if(!assembly_file){ 32 | printf("ERRROR"); 33 | exit(1); 34 | } 35 | char *nasm = malloc(sizeof(char) * 64); 36 | char *gcc = malloc(sizeof(char) * 64); 37 | sprintf(nasm, "nasm -f elf64 generated.asm -o generated.o", argv[2], argv[2]); 38 | sprintf(gcc, "gcc generated.o -o %s -lc -no-pie", argv[2], argv[2]); 39 | system(nasm); 40 | system(gcc); 41 | printf("FINISHED\n"); 42 | } 43 | -------------------------------------------------------------------------------- /parserf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "lexerf.h" 7 | 8 | #define MAX_CURLY_STACK_LENGTH 64 9 | 10 | typedef struct Node { 11 | char *value; 12 | TokenType type; 13 | struct Node *right; 14 | struct Node *left; 15 | } Node; 16 | 17 | typedef struct { 18 | Node *content[MAX_CURLY_STACK_LENGTH]; 19 | int top; 20 | } curly_stack; 21 | 22 | Node *peek_curly(curly_stack *stack){ 23 | return stack->content[stack->top]; 24 | } 25 | 26 | void push_curly(curly_stack *stack, Node *element){ 27 | stack->top++; 28 | stack->content[stack->top] = element; 29 | } 30 | 31 | Node *pop_curly(curly_stack *stack){ 32 | Node *result = stack->content[stack->top]; 33 | stack->top--; 34 | return result; 35 | } 36 | 37 | void print_tree(Node *node, int indent, char *identifier){ 38 | if(node == NULL){ 39 | return; 40 | } 41 | for(int i = 0; i < indent; i++){ 42 | printf(" "); 43 | } 44 | printf("%s -> ", identifier); 45 | for(size_t i = 0; node->value[i] != '\0'; i++){ 46 | printf("%c", node->value[i]); 47 | } 48 | printf("\n"); 49 | print_tree(node->left, indent + 1, "left"); 50 | print_tree(node->right, indent + 1, "right"); 51 | } 52 | 53 | Node *init_node(Node *node, char *value, TokenType type){ 54 | node = malloc(sizeof(Node)); 55 | node->value = malloc(sizeof(char) * 2); 56 | node->type = (int)type; 57 | node->value = value; 58 | node->left = NULL; 59 | node->right = NULL; 60 | return node; 61 | } 62 | 63 | void print_error(char *error_type, size_t line_number){ 64 | printf("ERROR: %s on line number: %zu\n", error_type, line_number); 65 | exit(1); 66 | } 67 | 68 | Node *parse_expression(Token *current_token, Node *current_node){ 69 | Node *expr_node = malloc(sizeof(Node)); 70 | expr_node = init_node(expr_node, current_token->value, current_token->type); 71 | current_token++; 72 | if(current_token->type != OPERATOR){ 73 | return expr_node; 74 | } 75 | return expr_node; 76 | } 77 | 78 | 79 | Token *generate_operation_nodes(Token *current_token, Node *current_node){ 80 | Node *oper_node = malloc(sizeof(Node)); 81 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 82 | current_node->left->left = oper_node; 83 | current_node = oper_node; 84 | current_token--; 85 | if(current_token->type == INT){ 86 | Node *expr_node = malloc(sizeof(Node)); 87 | expr_node = init_node(expr_node, current_token->value, INT); 88 | current_node->left = expr_node; 89 | } else if(current_token->type == IDENTIFIER){ 90 | Node *identifier_node = malloc(sizeof(Node)); 91 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 92 | current_node->left = identifier_node; 93 | } else { 94 | printf("ERROR: expected int or identifier\n"); 95 | exit(1); 96 | } 97 | current_token++; 98 | current_token++; 99 | while(current_token->type == INT || current_token->type == IDENTIFIER || current_token->type == OPERATOR){ 100 | if(current_token->type == INT || current_token->type == IDENTIFIER){ 101 | if((current_token->type != INT && current_token->type != IDENTIFIER) || current_token == NULL){ 102 | printf("Syntax Error hERE\n"); 103 | exit(1); 104 | } 105 | current_token++; 106 | if(current_token->type != OPERATOR){ 107 | current_token--; 108 | if(current_token->type == INT){ 109 | Node *second_expr_node = malloc(sizeof(Node)); 110 | second_expr_node = init_node(second_expr_node, current_token->value, INT); 111 | current_node->right = second_expr_node; 112 | } else if(current_token->type == IDENTIFIER){ 113 | Node *second_identifier_node = malloc(sizeof(Node)); 114 | second_identifier_node = init_node(second_identifier_node, current_token->value, IDENTIFIER); 115 | current_node->right = second_identifier_node; 116 | } else { 117 | printf("ERROR: Expected Integer or Identifier\n"); 118 | exit(1); 119 | } 120 | } 121 | } 122 | if(current_token->type == OPERATOR){ 123 | Node *next_oper_node = malloc(sizeof(Node)); 124 | next_oper_node = init_node(next_oper_node, current_token->value, OPERATOR); 125 | current_node->right = next_oper_node; 126 | current_node = next_oper_node; 127 | current_token--; 128 | if(current_token->type == INT){ 129 | Node *second_expr_node = malloc(sizeof(Node)); 130 | second_expr_node = init_node(second_expr_node, current_token->value, INT); 131 | current_node->left = second_expr_node; 132 | } else if(current_token->type == IDENTIFIER){ 133 | Node *second_identifier_node = malloc(sizeof(Node)); 134 | second_identifier_node = init_node(second_identifier_node, current_token->value, IDENTIFIER); 135 | current_node->left = second_identifier_node; 136 | } else { 137 | printf("ERROR: Expected IDENTIFIER or INT\n"); 138 | exit(1); 139 | } 140 | current_token++; 141 | } 142 | current_token++; 143 | } 144 | return current_token; 145 | } 146 | 147 | Node *handle_exit_syscall(Node *root, Token *current_token, Node *current){ 148 | Node *exit_node = malloc(sizeof(Node)); 149 | exit_node = init_node(exit_node, current_token->value, KEYWORD); 150 | current->right = exit_node; 151 | current = exit_node; 152 | current_token++; 153 | if(current_token->type == END_OF_TOKENS){ 154 | print_error("Invalid Syntax on OPEN", current_token->line_num); 155 | } 156 | if(strcmp(current_token->value, "(") == 0 && current_token->type == SEPARATOR){ 157 | Node *open_paren_node = malloc(sizeof(Node)); 158 | open_paren_node = init_node(open_paren_node, current_token->value, SEPARATOR); 159 | current->left = open_paren_node; 160 | current_token++; 161 | if(current_token->type == END_OF_TOKENS){ 162 | print_error("Invalid Syntax on INT", current_token->line_num); 163 | } 164 | if(current_token->type == INT || current_token->type == IDENTIFIER){ 165 | current_token++; 166 | if(current_token->type == OPERATOR && current_token != NULL){ 167 | current_token = generate_operation_nodes(current_token, current); 168 | current_token--; 169 | } else { 170 | current_token--; 171 | Node *expr_node = malloc(sizeof(Node)); 172 | expr_node = init_node(expr_node, current_token->value, current_token->type); 173 | current->left->left = expr_node; 174 | } 175 | current_token++; 176 | if(current_token->type == END_OF_TOKENS){ 177 | print_error("Invalid Syntax on cLOSE", current_token->line_num); 178 | } 179 | if(strcmp(current_token->value, ")") == 0 && current_token->type == SEPARATOR && current_token->type != END_OF_TOKENS){ 180 | Node *close_paren_node = malloc(sizeof(Node)); 181 | close_paren_node = init_node(close_paren_node, current_token->value, SEPARATOR); 182 | current->left->right = close_paren_node; 183 | current_token++; 184 | if(current_token->type == END_OF_TOKENS){ 185 | print_error("Invalid Syntax on SEMI", current_token->line_num); 186 | } 187 | if(strcmp(current_token->value, ";") == 0 && current_token->type == SEPARATOR){ 188 | Node *semi_node = malloc(sizeof(Node)); 189 | semi_node = init_node(semi_node, current_token->value, SEPARATOR); 190 | current->right = semi_node; 191 | current = semi_node; 192 | } else { 193 | print_error("Invalid Syntax on SEMI", current_token->line_num); 194 | } 195 | } else { 196 | print_error("Invalid Syntax on CLOSE", current_token->line_num); 197 | } 198 | } else { 199 | print_error("Invalid Syntax INT", current_token->line_num); 200 | } 201 | } else { 202 | print_error("Invalid Syntax OPEN", current_token->line_num); 203 | } 204 | return current; 205 | } 206 | 207 | void handle_token_errors(char *error_text, Token *current_token, TokenType type){ 208 | if(current_token->type == END_OF_TOKENS || current_token->type != type){ 209 | print_error(error_text, current_token->line_num); 210 | } 211 | } 212 | 213 | Node *create_variable_reusage(Token *current_token, Node *current){ 214 | Node *main_identifier_node = malloc(sizeof(Node)); 215 | main_identifier_node = init_node(main_identifier_node, current_token->value, IDENTIFIER); 216 | current->left = main_identifier_node; 217 | current = main_identifier_node; 218 | current_token++; 219 | 220 | handle_token_errors("Invalid syntax after idenfitier", current_token, OPERATOR); 221 | 222 | if(current_token->type == OPERATOR){ 223 | if(strcmp(current_token->value, "=") != 0){ 224 | print_error("Invalid Variable Syntax on =", current_token->line_num); 225 | } 226 | Node *equals_node = malloc(sizeof(Node)); 227 | equals_node = init_node(equals_node, current_token->value, OPERATOR); 228 | current->left = equals_node; 229 | current = equals_node; 230 | current_token++; 231 | } 232 | if(current_token->type == END_OF_TOKENS || (current_token->type != INT && current_token->type != IDENTIFIER)){ 233 | print_error("Invalid Syntax After Equals", current_token->line_num); 234 | } 235 | 236 | current_token++; 237 | if(current_token->type == OPERATOR){ 238 | Node *oper_node = malloc(sizeof(Node)); 239 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 240 | current->left = oper_node; 241 | current = oper_node; 242 | current_token--; 243 | if(current_token->type == INT){ 244 | Node *expr_node = malloc(sizeof(Node)); 245 | expr_node = init_node(expr_node, current_token->value, INT); 246 | oper_node->left = expr_node; 247 | current_token++; 248 | current_token++; 249 | } else if(current_token->type == IDENTIFIER){ 250 | Node *identifier_node = malloc(sizeof(Node)); 251 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 252 | oper_node->left = identifier_node; 253 | current_token++; 254 | current_token++; 255 | } else { 256 | print_error("ERROR: Expected IDENTIFIER or INT", current_token->line_num); 257 | } 258 | current_token++; 259 | 260 | if(current_token->type == OPERATOR){ 261 | Node *oper_node = malloc(sizeof(Node)); 262 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 263 | current->right = oper_node; 264 | current = oper_node; 265 | int operation = 1; 266 | current_token--; 267 | current_token--; 268 | while(operation){ 269 | current_token++; 270 | if(current_token->type == INT){ 271 | Node *expr_node = malloc(sizeof(Node)); 272 | expr_node = init_node(expr_node, current_token->value, INT); 273 | current->left = expr_node; 274 | } else if(current_token->type == IDENTIFIER){ 275 | Node *identifier_node = malloc(sizeof(Node)); 276 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 277 | current->left = identifier_node; 278 | } else { 279 | printf("ERROR: Unexpected Token\n", current_token->line_num); 280 | exit(1); 281 | } 282 | current_token++; 283 | if(current_token->type == OPERATOR){ 284 | current_token++; 285 | current_token++; 286 | if(current_token->type != OPERATOR){ 287 | current_token--; 288 | if(current_token->type == INT){ 289 | Node *expr_node = malloc(sizeof(Node)); 290 | expr_node = init_node(expr_node, current_token->value, INT); 291 | current->right = expr_node; 292 | current_token++; 293 | } else if(current_token->type == IDENTIFIER){ 294 | Node *identifier_node = malloc(sizeof(Node)); 295 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 296 | current->right = identifier_node; 297 | current_token++; 298 | } else { 299 | printf("ERROR: UNRECOGNIZED TOKEN!\n"); 300 | exit(1); 301 | } 302 | operation = 0; 303 | } else { 304 | current_token--; 305 | current_token--; 306 | Node *oper_node = malloc(sizeof(Node)); 307 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 308 | current->right = oper_node; 309 | current = oper_node; 310 | } 311 | } else { 312 | operation = 0; 313 | } 314 | } 315 | } else { 316 | current_token--; 317 | if(current_token->type == INT){ 318 | Node *expr_node = malloc(sizeof(Node)); 319 | expr_node = init_node(expr_node, current_token->value, INT); 320 | oper_node->right = expr_node; 321 | } else if(current_token->type == IDENTIFIER){ 322 | Node *identifier_node = malloc(sizeof(Node)); 323 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 324 | oper_node->right = identifier_node; 325 | } 326 | current_token++; 327 | } 328 | } else { 329 | current_token--; 330 | if(current_token->type == INT){ 331 | Node *expr_node = malloc(sizeof(Node)); 332 | expr_node = init_node(expr_node, current_token->value, INT); 333 | current->left = expr_node; 334 | current_token++; 335 | } else if(current_token->type == IDENTIFIER){ 336 | Node *identifier_node = malloc(sizeof(Node)); 337 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 338 | current->left = identifier_node; 339 | current_token++; 340 | } 341 | } 342 | handle_token_errors("Invalid Syntax After Expression", current_token, SEPARATOR); 343 | 344 | current = main_identifier_node; 345 | if(strcmp(current_token->value, ";") == 0){ 346 | Node *semi_node = malloc(sizeof(Node)); 347 | semi_node = init_node(semi_node, current_token->value, SEPARATOR); 348 | current->right = semi_node; 349 | current = semi_node; 350 | } 351 | return current; 352 | } 353 | 354 | 355 | Node *create_variables(Token *current_token, Node *current){ 356 | Node *var_node = malloc(sizeof(Node)); 357 | var_node = init_node(var_node, current_token->value, KEYWORD); 358 | current->left = var_node; 359 | current = var_node; 360 | current_token++; 361 | handle_token_errors("Invalid syntax after INT", current_token, IDENTIFIER); 362 | if(current_token->type == IDENTIFIER){ 363 | Node *identifier_node = malloc(sizeof(Node)); 364 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 365 | current->left = identifier_node; 366 | current = identifier_node; 367 | current_token++; 368 | } 369 | handle_token_errors("Invalid Syntax After Identifier", current_token, OPERATOR); 370 | 371 | if(current_token->type == OPERATOR){ 372 | if(strcmp(current_token->value, "=") != 0){ 373 | print_error("Invalid Variable Syntax on =", current_token->line_num); 374 | } 375 | Node *equals_node = malloc(sizeof(Node)); 376 | equals_node = init_node(equals_node, current_token->value, OPERATOR); 377 | current->left = equals_node; 378 | current = equals_node; 379 | current_token++; 380 | } 381 | if(current_token->type == END_OF_TOKENS || (current_token->type != INT && current_token->type != IDENTIFIER)){ 382 | print_error("Invalid Syntax After Equals", current_token->line_num); 383 | } 384 | 385 | current_token++; 386 | if(current_token->type == OPERATOR){ 387 | Node *oper_node = malloc(sizeof(Node)); 388 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 389 | current->left = oper_node; 390 | current = oper_node; 391 | current_token--; 392 | if(current_token->type == INT){ 393 | Node *expr_node = malloc(sizeof(Node)); 394 | expr_node = init_node(expr_node, current_token->value, INT); 395 | oper_node->left = expr_node; 396 | current_token++; 397 | current_token++; 398 | } else if(current_token->type == IDENTIFIER){ 399 | Node *identifier_node = malloc(sizeof(Node)); 400 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 401 | oper_node->left = identifier_node; 402 | current_token++; 403 | current_token++; 404 | } else { 405 | print_error("ERROR: Expected IDENTIFIER or INT", current_token->line_num); 406 | } 407 | current_token++; 408 | 409 | if(current_token->type == OPERATOR){ 410 | Node *oper_node = malloc(sizeof(Node)); 411 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 412 | current->right = oper_node; 413 | current = oper_node; 414 | int operation = 1; 415 | current_token--; 416 | current_token--; 417 | while(operation){ 418 | current_token++; 419 | if(current_token->type == INT){ 420 | Node *expr_node = malloc(sizeof(Node)); 421 | expr_node = init_node(expr_node, current_token->value, INT); 422 | current->left = expr_node; 423 | } else if(current_token->type == IDENTIFIER){ 424 | Node *identifier_node = malloc(sizeof(Node)); 425 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 426 | current->left = identifier_node; 427 | } else { 428 | printf("ERROR: Unexpected Token\n"); 429 | exit(1); 430 | } 431 | current_token++; 432 | if(current_token->type == OPERATOR){ 433 | current_token++; 434 | current_token++; 435 | if(current_token->type != OPERATOR){ 436 | current_token--; 437 | if(current_token->type == INT){ 438 | Node *expr_node = malloc(sizeof(Node)); 439 | expr_node = init_node(expr_node, current_token->value, INT); 440 | current->right = expr_node; 441 | current_token++; 442 | } else if(current_token->type == IDENTIFIER){ 443 | Node *identifier_node = malloc(sizeof(Node)); 444 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 445 | current->right = identifier_node; 446 | current_token++; 447 | } else { 448 | printf("ERROR: UNRECOGNIZED TOKEN!\n"); 449 | exit(1); 450 | } 451 | operation = 0; 452 | } else { 453 | current_token--; 454 | current_token--; 455 | Node *oper_node = malloc(sizeof(Node)); 456 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 457 | current->right = oper_node; 458 | current = oper_node; 459 | } 460 | } else { 461 | operation = 0; 462 | } 463 | } 464 | } else { 465 | current_token--; 466 | if(current_token->type == INT){ 467 | Node *expr_node = malloc(sizeof(Node)); 468 | expr_node = init_node(expr_node, current_token->value, INT); 469 | oper_node->right = expr_node; 470 | } else if(current_token->type == IDENTIFIER){ 471 | Node *identifier_node = malloc(sizeof(Node)); 472 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 473 | oper_node->right = identifier_node; 474 | } 475 | current_token++; 476 | } 477 | } else { 478 | current_token--; 479 | if(current_token->type == INT){ 480 | Node *expr_node = malloc(sizeof(Node)); 481 | expr_node = init_node(expr_node, current_token->value, INT); 482 | current->left = expr_node; 483 | current_token++; 484 | } else if(current_token->type == IDENTIFIER){ 485 | Node *identifier_node = malloc(sizeof(Node)); 486 | identifier_node = init_node(identifier_node, current_token->value, IDENTIFIER); 487 | current->left = identifier_node; 488 | current_token++; 489 | } 490 | } 491 | 492 | //if(current_token->type == OPERATOR){ 493 | // current_token = generate_operation_nodes(current_token, current); 494 | //} 495 | 496 | handle_token_errors("Invalid Syntax After Expression", current_token, SEPARATOR); 497 | 498 | current = var_node; 499 | if(strcmp(current_token->value, ";") == 0){ 500 | Node *semi_node = malloc(sizeof(Node)); 501 | semi_node = init_node(semi_node, current_token->value, SEPARATOR); 502 | current->right = semi_node; 503 | current = semi_node; 504 | } 505 | return current; 506 | } 507 | 508 | Token *generate_if_operation_nodes(Token *current_token, Node *current_node){ 509 | Node *oper_node = malloc(sizeof(Node)); 510 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 511 | current_node->left->left = oper_node; 512 | current_node = oper_node; 513 | current_token--; 514 | 515 | Node *expr_node = malloc(sizeof(Node)); 516 | expr_node = init_node(expr_node, current_token->value, current_token->type); 517 | current_node->left = expr_node; 518 | 519 | current_token++; 520 | current_token++; 521 | while(current_token->type == INT || current_token->type == IDENTIFIER || current_token->type == OPERATOR){ 522 | if(current_token->type == INT || current_token->type == IDENTIFIER){ 523 | if((current_token->type != INT && current_token->type != IDENTIFIER) || current_token == NULL){ 524 | exit(1); 525 | } 526 | current_token++; 527 | if(current_token->type != OPERATOR || strcmp(current_token->value, "=") == 0){ 528 | current_token--; 529 | if(current_token->type == INT){ 530 | Node *second_expr_node = malloc(sizeof(Node)); 531 | second_expr_node = init_node(second_expr_node, current_token->value, INT); 532 | current_node->right = second_expr_node; 533 | } else if(current_token->type == IDENTIFIER){ 534 | Node *second_identifier_node = malloc(sizeof(Node)); 535 | second_identifier_node = init_node(second_identifier_node, current_token->value, IDENTIFIER); 536 | current_node->right = second_identifier_node; 537 | } else { 538 | printf("ERROR: Expected Integer or Identifier\n"); 539 | exit(1); 540 | } 541 | } 542 | } 543 | if(strcmp(current_token->value, "=") == 0){ 544 | break; 545 | } else if(current_token->type == OPERATOR){ 546 | Node *next_oper_node = malloc(sizeof(Node)); 547 | next_oper_node = init_node(next_oper_node, current_token->value, OPERATOR); 548 | current_node->right = next_oper_node; 549 | current_node = next_oper_node; 550 | current_token--; 551 | if(current_token->type == INT){ 552 | Node *second_expr_node = malloc(sizeof(Node)); 553 | second_expr_node = init_node(second_expr_node, current_token->value, INT); 554 | current_node->left = second_expr_node; 555 | } else if(current_token->type == IDENTIFIER){ 556 | Node *second_identifier_node = malloc(sizeof(Node)); 557 | second_identifier_node = init_node(second_identifier_node, current_token->value, IDENTIFIER); 558 | current_node->left = second_identifier_node; 559 | } else { 560 | printf("ERROR: Expected IDENTIFIER or INT\n"); 561 | exit(1); 562 | } 563 | current_token++; 564 | } 565 | current_token++; 566 | } 567 | return current_token; 568 | } 569 | 570 | Token *generate_if_operation_nodes_right(Token *current_token, Node *current_node){ 571 | Node *oper_node = malloc(sizeof(Node)); 572 | oper_node = init_node(oper_node, current_token->value, OPERATOR); 573 | current_node->left->right = oper_node; 574 | current_node = oper_node; 575 | current_token--; 576 | 577 | Node *expr_node = malloc(sizeof(Node)); 578 | expr_node = init_node(expr_node, current_token->value, current_token->type); 579 | current_node->left = expr_node; 580 | 581 | current_token++; 582 | current_token++; 583 | while(current_token->type == INT || current_token->type == IDENTIFIER || current_token->type == OPERATOR){ 584 | if(current_token->type == INT || current_token->type == IDENTIFIER){ 585 | if((current_token->type != INT && current_token->type != IDENTIFIER) || current_token == NULL){ 586 | printf("Syntax Error hERE\n"); 587 | exit(1); 588 | } 589 | current_token++; 590 | if(current_token->type != OPERATOR || strcmp(current_token->value, "=") == 0){ 591 | current_token--; 592 | if(current_token->type == INT){ 593 | Node *second_expr_node = malloc(sizeof(Node)); 594 | second_expr_node = init_node(second_expr_node, current_token->value, INT); 595 | current_node->right = second_expr_node; 596 | } else if(current_token->type == IDENTIFIER){ 597 | Node *second_identifier_node = malloc(sizeof(Node)); 598 | second_identifier_node = init_node(second_identifier_node, current_token->value, IDENTIFIER); 599 | current_node->right = second_identifier_node; 600 | } else { 601 | printf("ERROR: Expected Integer or Identifier\n"); 602 | exit(1); 603 | } 604 | } 605 | } 606 | if(strcmp(current_token->value, "=") == 0){ 607 | break; 608 | } else if(current_token->type == OPERATOR){ 609 | Node *next_oper_node = malloc(sizeof(Node)); 610 | next_oper_node = init_node(next_oper_node, current_token->value, OPERATOR); 611 | current_node->right = next_oper_node; 612 | current_node = next_oper_node; 613 | current_token--; 614 | if(current_token->type == INT){ 615 | Node *second_expr_node = malloc(sizeof(Node)); 616 | second_expr_node = init_node(second_expr_node, current_token->value, INT); 617 | current_node->left = second_expr_node; 618 | } else if(current_token->type == IDENTIFIER){ 619 | Node *second_identifier_node = malloc(sizeof(Node)); 620 | second_identifier_node = init_node(second_identifier_node, current_token->value, IDENTIFIER); 621 | current_node->left = second_identifier_node; 622 | } else { 623 | printf("ERROR: Expected IDENTIFIER or INT\n"); 624 | exit(1); 625 | } 626 | current_token++; 627 | } 628 | current_token++; 629 | } 630 | return current_token; 631 | } 632 | 633 | Node *create_if_statement(Token *current_token, Node *current){ 634 | Node *if_node = malloc(sizeof(Node)); 635 | if_node = init_node(if_node, current_token->value, current_token->type); 636 | current->left = if_node; 637 | current = if_node; 638 | current_token++; 639 | 640 | handle_token_errors("ERROR: Expected (", current_token, SEPARATOR); 641 | 642 | Node *open_paren_node = malloc(sizeof(Node)); 643 | open_paren_node = init_node(open_paren_node, current_token->value, SEPARATOR); 644 | current->left = open_paren_node; 645 | current = open_paren_node; 646 | 647 | current_token++; 648 | 649 | if(current_token->type != IDENTIFIER && current_token->type != INT){ 650 | printf("ERROR: Expected Identifier or INT\n"); 651 | exit(1); 652 | } 653 | 654 | 655 | while(current_token->type != END_OF_TOKENS && current_token->type != COMP){ 656 | current_token++; 657 | } 658 | 659 | if(current_token->type != COMP){ 660 | printf("ERROR: Expected =\n"); 661 | exit(1); 662 | } 663 | Node *comp_node = malloc(sizeof(Node)); 664 | comp_node = init_node(comp_node, current_token->value, current_token->type); 665 | open_paren_node->left = comp_node; 666 | 667 | while(current_token->type != SEPARATOR){ 668 | current_token--; 669 | } 670 | 671 | current_token++; 672 | current_token++; 673 | 674 | if(current_token->type != OPERATOR || current_token->type == COMP){ 675 | current_token--; 676 | Node *expr_node = malloc(sizeof(Node)); 677 | expr_node = init_node(expr_node, current_token->value, current_token->type); 678 | comp_node->left = expr_node; 679 | } else { 680 | current_token = generate_if_operation_nodes(current_token, current); 681 | } 682 | 683 | 684 | current_token++; 685 | while((current_token->type != END_OF_TOKENS && current_token->type != OPERATOR && current_token->type != SEPARATOR) || strcmp(current_token->value, "=") == 0){ 686 | current_token++; 687 | } 688 | if(current_token->type == SEPARATOR){ 689 | current_token--; 690 | Node *expr_node = malloc(sizeof(Node)); 691 | expr_node = init_node(expr_node, current_token->value, current_token->type); 692 | comp_node->right = expr_node; 693 | } else { 694 | current_token = generate_if_operation_nodes_right(current_token, current); 695 | } 696 | 697 | 698 | 699 | Node *close_paren_node = malloc(sizeof(Node)); 700 | close_paren_node = init_node(close_paren_node, ")", SEPARATOR); 701 | open_paren_node->right = close_paren_node; 702 | current = close_paren_node; 703 | 704 | return current; 705 | } 706 | 707 | Node *handle_write_node(Token *current_token, Node *current){ 708 | Node *write_node = NULL; 709 | write_node = init_node(write_node, current_token->value, current_token->type); 710 | current->left = write_node; 711 | current = write_node; 712 | 713 | current_token++; 714 | 715 | handle_token_errors("ERROR: Expected (", current_token, SEPARATOR); 716 | 717 | current_token++; 718 | if(current_token->type != STRING && current_token->type != IDENTIFIER){ 719 | handle_token_errors("ERROR: Expected String Literal", current_token, STRING); 720 | } 721 | 722 | Node *string_node = NULL; 723 | string_node = init_node(string_node, current_token->value, current_token->type); 724 | current->left = string_node; 725 | 726 | current_token++; 727 | 728 | handle_token_errors("ERROR: Expected ,", current_token, SEPARATOR); 729 | 730 | current_token++; 731 | 732 | Node *number_node = NULL; 733 | number_node = init_node(number_node, current_token->value, current_token->type); 734 | current->right = number_node; 735 | 736 | current_token++; 737 | 738 | handle_token_errors("ERROR: Expected )", current_token, SEPARATOR); 739 | 740 | current_token++; 741 | 742 | if(strcmp(current_token->value, ";") != 0){ 743 | print_error("ERROR: Expected ;", current_token->line_num); 744 | } 745 | 746 | 747 | Node *semi_node = NULL; 748 | semi_node = init_node(semi_node, current_token->value, current_token->type); 749 | number_node->right = semi_node; 750 | current = semi_node; 751 | return current; 752 | } 753 | 754 | Node *parser(Token *tokens){ 755 | Token *current_token = &tokens[0]; 756 | Node *root = malloc(sizeof(Node)); 757 | root = init_node(root, "PROGRAM", BEGINNING); 758 | 759 | Node *current = root; 760 | 761 | Node *open_curly = malloc(sizeof(Node)); 762 | //Node *close_curly = malloc(sizeof(Node)); 763 | 764 | curly_stack *stack = malloc(sizeof(curly_stack)); 765 | 766 | while(current_token->type != END_OF_TOKENS){ 767 | if(current == NULL){ 768 | break; 769 | } 770 | switch(current_token->type){ 771 | case KEYWORD: 772 | if(strcmp(current_token->value, "EXIT") == 0){ 773 | current = handle_exit_syscall(root, current_token, current); 774 | } else if(strcmp(current_token->value, "INT") == 0){ 775 | current = create_variables(current_token, current); 776 | } else if(strcmp(current_token->value, "IF") == 0){ 777 | current = create_if_statement(current_token, current); 778 | } else if(strcmp(current_token->value, "WHILE") == 0){ 779 | current = create_if_statement(current_token, current); 780 | } else if(strcmp(current_token->value, "WRITE") == 0){ 781 | current = handle_write_node(current_token, current); 782 | } 783 | break; 784 | case SEPARATOR: 785 | if(strcmp(current_token->value, "{") == 0){ 786 | Token *temp = current_token; 787 | open_curly = init_node(open_curly, temp->value, SEPARATOR); 788 | current->left = open_curly; 789 | current = open_curly; 790 | push_curly(stack, open_curly); 791 | current = peek_curly(stack); 792 | } 793 | if(strcmp(current_token->value, "}") == 0){ 794 | Node *close_curly = malloc(sizeof(Node)); 795 | open_curly = pop_curly(stack); 796 | if(open_curly == NULL){ 797 | printf("ERROR: Expected Open Parenthesis!\n"); 798 | exit(1); 799 | } 800 | close_curly = init_node(close_curly, current_token->value, current_token->type); 801 | current->right = close_curly; 802 | current = close_curly; 803 | } 804 | break; 805 | case OPERATOR: 806 | break; 807 | case INT: 808 | break; 809 | case IDENTIFIER: 810 | current_token--; 811 | if(current_token->type == SEPARATOR && ((strcmp(current_token->value, ";") == 0) || (strcmp(current_token->value, "}") == 0) || (strcmp(current_token->value, "{") == 0))){ 812 | current_token++; 813 | current = create_variable_reusage(current_token, current); 814 | } else { 815 | current_token++; 816 | } 817 | break; 818 | case STRING: 819 | break; 820 | case COMP: 821 | break; 822 | case BEGINNING: 823 | break; 824 | case END_OF_TOKENS: 825 | break; 826 | } 827 | current_token++; 828 | } 829 | return root; 830 | } 831 | -------------------------------------------------------------------------------- /parserf.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H_ 2 | #define PARSER_H_ 3 | 4 | typedef struct Node{ 5 | char *value; 6 | TokenType type; 7 | struct Node *right; 8 | struct Node *left; 9 | } Node; 10 | 11 | Node *parser(Token *tokens); 12 | void print_tree(Node *node, int indent, char *identifier); 13 | Node *init_node(Node *node, char *value, TokenType type); 14 | void print_error(char *error_type); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /test.unn: -------------------------------------------------------------------------------- 1 | int x = 0; 2 | int y = 0; 3 | int z = 0; 4 | 5 | int i = 1; 6 | while(i neq 100){ 7 | x = y; 8 | y = z; 9 | z = y + x; 10 | i = i + 1; 11 | } 12 | 13 | exit(0); 14 | --------------------------------------------------------------------------------