├── 1.txt ├── 2.txt ├── 3.txt ├── Grammer.pptx ├── README.md └── comiler.cpp /1.txt: -------------------------------------------------------------------------------- 1 | DATA B 2 | DATA A 3 | START: 4 | READ AX 5 | READ BX 6 | MOV A,AX 7 | MOV B,BX 8 | IF AX GT BX THEN 9 | PRINT AX 10 | ELSE 11 | PRINT BX 12 | ENDIF -------------------------------------------------------------------------------- /2.txt: -------------------------------------------------------------------------------- 1 | CONST X = 5 2 | CONST Y = 0 3 | CONST Z = 1 4 | START: 5 | MOV AX,X 6 | MOV BX,Z 7 | MOV CX,Y 8 | x: 9 | IF AX GT CX THEN 10 | PRINT AX 11 | SUB AX, AX, BX 12 | JUMP x 13 | ELSE 14 | PRINT AX 15 | ENDIF -------------------------------------------------------------------------------- /3.txt: -------------------------------------------------------------------------------- 1 | DATA C[3] 2 | START: 3 | READ AX 4 | READ BX 5 | READ CX 6 | IF AX GT BX THEN 7 | IF AX GT CX THEN 8 | PRINT AX 9 | ELSE 10 | PRINT CX 11 | ENDIF 12 | ELSE 13 | IF BX GT CX THEN 14 | PRINT BX 15 | ELSE 16 | PRINT CX 17 | ENDIF 18 | ENDIF -------------------------------------------------------------------------------- /Grammer.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/azhartalha/Assembly-Language-Compiler/c72de16f77a252e4325b21cee7076929a2991d5e/Grammer.pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Please go through the PPT provided to understand the grammer and various stages involved in compiling and executing the code. 2 | 3 | The languages supports nested loops, nested if else and recursive operations as well. 4 | 5 | Inorder to run and compile a program run, first, run compiler.cpp, and then give the path to your file. 6 | 7 | I am also adding a 3 sample programs in the same repo you can run it to save time. 8 | - 1.txt takes 2 numbers and returns the greatest of them 9 | - 2.txt has 3 constants x, y, z (you can change them in 2.txt file). The program will print a series x, x-y, x-2y, ..., z. 10 | Here x = 5, y = 1, z = 0 (you can change these values) so you will get an output 5, 4, 3, 2, 1, 0. 11 | - 3.txt prints the greatest of 3 numbers(nested if else). Try changing the x, y, z values. -------------------------------------------------------------------------------- /comiler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int stringToInt(char *str) 7 | { 8 | int num = 0; 9 | for(int i=0; str[i]!='\0'; i++) 10 | { 11 | num = str[i]-'0'; 12 | } 13 | return num; 14 | } 15 | int compareStrings(char *str1, char *str2) 16 | { 17 | int i; 18 | for(i=0; str1[i]!='\0'; i++) 19 | { 20 | if(str1[i] != str2[i]) 21 | return 0; 22 | } 23 | if(str1[i]==str2[i]) 24 | return 1; 25 | return 0; 26 | } 27 | int stringInRegisters(char * target, char registers[8][3]) 28 | { 29 | for(int i=0; i<8; i++) 30 | if(compareStrings(target, registers[i])) 31 | return i; 32 | return -1; 33 | } 34 | char ** tokenizer(char *line, int& noOfTokens) 35 | { 36 | char ** tokens = (char **)malloc(10 * sizeof(char *)); 37 | char * token = (char *)malloc(10 * sizeof(char)); 38 | int k=0; 39 | token[0]='\0'; 40 | int i=0; 41 | while(line[i]==' ') 42 | i++; 43 | for (; line[i] != '\n' && line[i] != '\0'; i++) 44 | { 45 | if (line[i] == ' ' || line[i] == ',' || line[i] == '[' || line[i] == ']' || line[i] == '\t') 46 | { 47 | if (token[0]!='\0') 48 | { 49 | token[k] = '\0'; 50 | tokens[noOfTokens++] = token; 51 | token = (char *)malloc(10 * sizeof(char)); 52 | token[0]='\0'; 53 | k = 0; 54 | } 55 | else 56 | continue; 57 | } 58 | else 59 | { 60 | token[k++] = line[i]; 61 | } 62 | } 63 | if(token[0]!='\0') 64 | { 65 | token[k] = '\0'; 66 | tokens[noOfTokens++] = token; 67 | } 68 | return tokens; 69 | } 70 | 71 | long hash(char *str) 72 | { 73 | unsigned long hash = 5381; 74 | int c; 75 | 76 | while (c = *str++) 77 | hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ 78 | 79 | return hash; 80 | } 81 | 82 | int checkIfLable(char * str) //will automatically remove the ':' 83 | { 84 | int i; 85 | for(i=0; str[i]!='\0' && str[i]!=':'; i++); 86 | if(str[i]!=':' || str[i+1]!='\0') 87 | return 0; 88 | str[i] = '\0'; 89 | return 1; 90 | } 91 | 92 | void codeExecution(int **intertCode, int lp, char *constants[117], int constantTable[117][2], int constantsCount) 93 | { 94 | void * memory = malloc(1024), *registers = malloc(8); 95 | 96 | for(int i=0; i *(( char *)(registers+intertCode[i][3])))) 142 | { 143 | i = intertCode[i][4]; 144 | continue; 145 | } 146 | else if(intertCode[i][2] == 11 && !(*(( char *)(registers+intertCode[i][1])) <= *(( char *)(registers+intertCode[i][3])))) 147 | { 148 | i = intertCode[i][4]; 149 | continue; 150 | } 151 | else if(intertCode[i][2] == 12 && !(*(( char *)(registers+intertCode[i][1])) >= *(( char *)(registers+intertCode[i][3])))) 152 | { 153 | i = intertCode[i][4]; 154 | continue; 155 | } 156 | } 157 | else if(intertCode[i][0] == 13) 158 | { 159 | printf("%d ", *( char *)(registers+intertCode[i][1])); 160 | } 161 | else if(intertCode[i][0] == 14) 162 | { 163 | scanf("%d", ( char *)(registers+intertCode[i][1])); 164 | } 165 | i++; 166 | } 167 | } 168 | 169 | int** generateOpcode(FILE* fr, int& lp) 170 | { 171 | int lableTable[117], lableCount = 0, symbolTable[117][2], symbolCount = 0, constantsTable[117][2] , constantstCount=0; 172 | char *symbols[117], *constants[117], *lables[117]; 173 | for(int i=0 ;i<117; i++) 174 | { 175 | lableTable[i] = -1; 176 | symbolTable[i][0] = -1; 177 | constantsTable[i][0] = -1; 178 | } 179 | char registers[8][3] = {"AX", "BX", "CX", "DX", "EX", "FX", "GX", "HX"}; 180 | int address = 0; 181 | while (!feof(fr)) 182 | { 183 | char line[100]; 184 | fgets(line, 100, fr); 185 | int noOfTokens = 0; 186 | char ** tokens = tokenizer(line, noOfTokens); 187 | if(noOfTokens == 0) 188 | continue; 189 | if(compareStrings(tokens[0], "START:")) 190 | break; 191 | if(compareStrings(tokens[0], "DATA")) 192 | { 193 | int index = hash(tokens[1])%117; 194 | symbolTable[index][0] = address; 195 | if(noOfTokens==2) 196 | { 197 | symbolTable[index][1] = 1; 198 | address++; 199 | } 200 | else if(noOfTokens == 3) 201 | { 202 | int tmp = stringToInt(tokens[2]); 203 | symbolTable[index][1] = tmp; 204 | address+= tmp; 205 | } 206 | else 207 | { 208 | printf("\nInvalid syntax 1\n"); 209 | return NULL; 210 | } 211 | symbols[symbolCount++] = tokens[1]; 212 | } 213 | else if(compareStrings(tokens[0], "CONST")) 214 | { 215 | if(noOfTokens == 4) 216 | { 217 | int index = hash(tokens[1])%117; 218 | constantsTable[index][0] = address; 219 | address++; 220 | constantsTable[index][1] = stringToInt(tokens[3]); 221 | } 222 | else 223 | { 224 | printf("\nInvalid syntax 2\n"); 225 | return NULL; 226 | } 227 | constants[constantstCount++] = tokens[1]; 228 | } 229 | else{ 230 | printf("\nInvalid syntax 3\n"); 231 | return NULL; 232 | } 233 | } 234 | printf("\nSymbol table\n"); 235 | for(int i=0; i4 || noOfTokens<3) 268 | { 269 | printf("Invalid syntax 1"); 270 | return NULL; 271 | } 272 | int index = hash(tokens[1])%117; 273 | if(symbolTable[index][0]!=-1) 274 | { 275 | if(noOfTokens==4) 276 | { 277 | int RI = stringInRegisters(tokens[3], registers); 278 | if(RI == -1) 279 | { 280 | printf("\nInvalid Synatx\n"); 281 | return NULL; 282 | } 283 | interLang[lp][0] = 1; 284 | interLang[lp][1] = symbolTable[index][0]+stringToInt(tokens[2]); 285 | interLang[lp][2] = RI; 286 | lp++; 287 | } 288 | else 289 | { 290 | int RI = stringInRegisters(tokens[2], registers); 291 | if(RI == -1) 292 | { 293 | printf("\nInvalid Synatx\n"); 294 | return NULL; 295 | } 296 | interLang[lp][0] = 1; 297 | interLang[lp][1] = symbolTable[index][0]; 298 | interLang[lp][2] = RI; 299 | lp++; 300 | } 301 | } 302 | else if(constantsTable[index][0]!=-1) 303 | { 304 | printf("\nCannot copy into constant\n"); 305 | return NULL; 306 | } 307 | else 308 | { 309 | int RI = stringInRegisters(tokens[1], registers); 310 | if(RI == -1) 311 | { 312 | printf("\nInvalid syntax\n"); 313 | return NULL; 314 | } 315 | index = hash(tokens[2])%117; 316 | if(noOfTokens == 4) 317 | { 318 | if(symbolTable[index][0] == -1) 319 | { 320 | printf("\nInvalid Syntax\n"); 321 | return NULL; 322 | } 323 | interLang[lp][0] = 2; 324 | interLang[lp][1] = RI; 325 | interLang[lp][2] = symbolTable[index][0]+stringToInt(tokens[3]); 326 | lp++; 327 | } 328 | else 329 | { 330 | if(symbolTable[index][0] != -1) 331 | { 332 | interLang[lp][0] = 2; 333 | interLang[lp][1] = RI; 334 | interLang[lp][2] = symbolTable[index][0]; 335 | lp++; 336 | } 337 | else if(constantsTable[index][0] != -1) 338 | { 339 | interLang[lp][0] = 2; 340 | interLang[lp][1] = RI; 341 | interLang[lp][2] = constantsTable[index][0]; 342 | lp++; 343 | } 344 | else 345 | { 346 | printf("\nInvalid syntax\n"); 347 | return NULL; 348 | } 349 | } 350 | } 351 | 352 | } 353 | else if(compareStrings(tokens[0], "ADD")) 354 | { 355 | if(noOfTokens != 4) 356 | { 357 | printf("\nInvalid String\n"); 358 | printf("\nInvalid String\n"); 359 | return NULL; 360 | } 361 | int i1 = stringInRegisters(tokens[1], registers), i2 = stringInRegisters(tokens[2], registers), i3 = stringInRegisters(tokens[3], registers); 362 | if(i1== -1|| i2 == -1 || i3 == -1) 363 | { 364 | printf("\nOnly registers can be used in arthematic operations\n"); 365 | return NULL; 366 | } 367 | interLang[lp][0] = 3; 368 | interLang[lp][1] = i1; 369 | interLang[lp][2] = i2; 370 | interLang[lp][3] = i3; 371 | lp++; 372 | } 373 | else if(compareStrings(tokens[0], "SUB")) 374 | { 375 | if(noOfTokens != 4) 376 | { 377 | printf("\nInvalid String x\n"); 378 | return NULL; 379 | } 380 | int i1 = stringInRegisters(tokens[1], registers), i2 = stringInRegisters(tokens[2], registers), i3 = stringInRegisters(tokens[3], registers); 381 | if(i1== -1|| i2 == -1 || i3 == -1) 382 | { 383 | printf("\nOnly registers can be used in arthematic operations\n"); 384 | return NULL; 385 | } 386 | interLang[lp][0] = 4; 387 | interLang[lp][1] = i1; 388 | interLang[lp][2] = i2; 389 | interLang[lp][3] = i3; 390 | lp++; 391 | } 392 | else if(compareStrings(tokens[0], "MUL")) 393 | { 394 | if(noOfTokens != 4) 395 | { 396 | printf("\nInvalid String\n"); 397 | return NULL; 398 | } 399 | int i1 = stringInRegisters(tokens[1], registers), i2 = stringInRegisters(tokens[2], registers), i3 = stringInRegisters(tokens[3], registers); 400 | if(i1== -1|| i2 == -1 || i3 == -1) 401 | { 402 | printf("\nOnly registers can be used in arthematic operations\n"); 403 | return NULL; 404 | } 405 | interLang[lp][0] = 5; 406 | interLang[lp][1] = i1; 407 | interLang[lp][2] = i2; 408 | interLang[lp][3] = i3; 409 | lp++; 410 | } 411 | else if(compareStrings(tokens[0], "READ")) 412 | { 413 | if(noOfTokens!= 2) 414 | { 415 | printf("\nInvalid syntax\n"); 416 | return NULL; 417 | } 418 | int ind = stringInRegisters(tokens[1], registers); 419 | if(ind == -1) 420 | { 421 | printf("\nInvalid syntax\n"); 422 | return NULL; 423 | } 424 | interLang[lp][0] = 14; 425 | interLang[lp][1] = ind; 426 | lp++; 427 | } 428 | else if(compareStrings(tokens[0], "PRINT")) 429 | { 430 | if(noOfTokens!= 2) 431 | { 432 | printf("\nInvalid syntax\n"); 433 | return NULL; 434 | } 435 | int ind = stringInRegisters(tokens[1], registers); 436 | if(ind == -1) 437 | { 438 | printf("\nInvalid syntax\n"); 439 | return NULL; 440 | } 441 | interLang[lp][0] = 13; 442 | interLang[lp][1] = ind; 443 | lp++; 444 | } 445 | else if(compareStrings(tokens[0], "JUMP")) 446 | { 447 | if(noOfTokens != 2) 448 | { 449 | printf("\nInvalid Syntax\n"); 450 | return NULL; 451 | } 452 | int index = hash(tokens[1])%117; 453 | if(lableTable[index]==-1) 454 | { 455 | printf("\nInvalid Syntax\n"); 456 | return NULL; 457 | } 458 | interLang[lp][0] = 6; 459 | interLang[lp][1] = lableTable[index]; 460 | lp++; 461 | } 462 | else if(compareStrings(tokens[0], "IF")) 463 | { 464 | if(noOfTokens != 5) 465 | { 466 | printf("\nInvalid syntax at if\n"); 467 | return NULL; 468 | } 469 | int i1 = stringInRegisters(tokens[1], registers), i2, i3 = stringInRegisters(tokens[3], registers); 470 | if(compareStrings(tokens[2], "EQ")) 471 | { 472 | i2 = 8; 473 | } 474 | else if(compareStrings(tokens[2], "LT")) 475 | { 476 | i2 = 9; 477 | } 478 | else if(compareStrings(tokens[2], "GT")) 479 | { 480 | i2 = 10; 481 | } 482 | else if(compareStrings(tokens[2], "LTEQ")) 483 | { 484 | i2 = 11; 485 | } 486 | else if(compareStrings(tokens[2], "GTEQ")) 487 | { 488 | i2 = 12; 489 | } 490 | else 491 | { 492 | printf("\nInvalid syntax 1\n"); 493 | return NULL; 494 | } 495 | if(i3 == -1 || i1 == -1 || !compareStrings(tokens[4], "THEN")) 496 | { 497 | printf("\n%d %d %s\n", i1, i3, tokens[3]); 498 | printf("\nInvalid syntax 2\n"); 499 | return NULL; 500 | } 501 | interLang[lp][0] = 7; 502 | interLang[lp][1] = i1; 503 | interLang[lp][2] = i2; 504 | interLang[lp][3] = i3; 505 | ifStack[top] = lp; 506 | ieTrackStack[top] = 1; 507 | top++; 508 | lp++; 509 | } 510 | else if(compareStrings(tokens[0], "ELSE")) 511 | { 512 | if(top > 0 && ieTrackStack[top-1]!=1) 513 | { 514 | printf("\nInvalid invalid if else\n"); 515 | return NULL; 516 | } 517 | interLang[ifStack[top-1]][4] = lp + 1; 518 | top--; 519 | interLang[lp][0] = 6; 520 | ifStack[top] = lp; 521 | ieTrackStack[top] = 0; 522 | top++; 523 | lp++; 524 | } 525 | else if(compareStrings(tokens[0], "ENDIF")) 526 | { 527 | if(top > 0 && ieTrackStack[top-1] == 1) 528 | interLang[ifStack[top-1]][4] = lp; 529 | else 530 | interLang[ifStack[top-1]][1] = lp; 531 | top--; 532 | } 533 | else if(noOfTokens == 1 && checkIfLable(tokens[0])) 534 | { 535 | lables[lableCount++] = tokens[0]; 536 | int index = hash(tokens[0])%117; 537 | lableTable[index] = lp; 538 | } 539 | else 540 | { 541 | for(int a0 = 0; a0