├── .gitignore ├── Project-1 ├── .gitignore ├── Project Report.pdf ├── README.md ├── lexer.l ├── symboltable.h ├── testcases │ ├── test-case-1.c │ ├── test-case-2.c │ ├── test-case-3.c │ ├── test-case-4.c │ └── test-case-5.c └── tokens.h ├── Project-2 ├── .gitignore ├── README.md ├── compile ├── lexl.l ├── parser.y ├── symboltable.h └── testcases │ ├── test-case-1.c │ ├── test-case-2.c │ ├── test-case-3.c │ ├── test-case-4.c │ ├── test-case-5.c │ └── test-case-6.c ├── Project-3 ├── README.md ├── compile ├── lexer.l ├── parser.y ├── symboltable.h ├── test.c └── testcases │ ├── test-case-1.c │ ├── test-case-2.c │ ├── test-case-3.c │ ├── test-case-4.c │ ├── test-case-5.c │ └── testcases ├── Project-4 ├── ICG.code ├── README.md ├── compile ├── icg ├── lexer.l ├── parser.y ├── symboltable.h ├── test-func.c └── test.c └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | -------------------------------------------------------------------------------- /Project-1/.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | lex.yy.c 3 | -------------------------------------------------------------------------------- /Project-1/Project Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaushiksk/mini-c-compiler/f55e6a84a2e82a2a88957c41f626b0a1954c232d/Project-1/Project Report.pdf -------------------------------------------------------------------------------- /Project-1/README.md: -------------------------------------------------------------------------------- 1 | Compiler Design Lab - Project 1 2 | =============================== 3 | 4 | # Lexical Analyser for a subset of the C langauge 5 | 6 | ### Team Members 7 | 1. Karthik M - 15CO221 8 | 2. Kaushik S Kalmady - 15CO222 9 | 10 | ### Lab Batch - Wednesday 11 | 12 | This folder contains the code for the Lexical Analyser. 13 | 14 | |file|contents| 15 | |----|--------| 16 | |lexel.l|lex file with rules| 17 | |symboltable.h|symbol table implementation using hash organisation| 18 | |tokens.h| tokens used (as enums)| 19 | 20 | The test cases are in the test-cases folder. 21 | The details about each test case is written in the header section of each file. 22 | -------------------------------------------------------------------------------- /Project-1/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | #include 4 | #include 5 | #include "symboltable.h" 6 | #include "tokens.h" 7 | 8 | entry_t** symbol_table; 9 | entry_t** constant_table; 10 | int cmnt_strt = 0; 11 | 12 | %} 13 | 14 | letter [a-zA-Z] 15 | digit [0-9] 16 | ws [ \t\r\f\v]+ 17 | identifier (_|{letter})({letter}|{digit}|_){0,31} 18 | hex [0-9a-f] 19 | 20 | /* Exclusive states */ 21 | %x CMNT 22 | %x PREPROC 23 | 24 | %% 25 | /* Keywords*/ 26 | "int" {printf("\t%-30s : %3d\n",yytext,INT);} 27 | "long" {printf("\t%-30s : %3d\n",yytext,LONG);} 28 | "long long" {printf("\t%-30s : %3d\n",yytext,LONG_LONG);} 29 | "short" {printf("\t%-30s : %3d\n",yytext,SHORT);} 30 | "signed" {printf("\t%-30s : %3d\n",yytext,SIGNED);} 31 | "unsigned" {printf("\t%-30s : %3d\n",yytext,UNSIGNED);} 32 | "for" {printf("\t%-30s : %3d\n",yytext,FOR);} 33 | "break" {printf("\t%-30s : %3d\n",yytext,BREAK);} 34 | "continue" {printf("\t%-30s : %3d\n",yytext,CONTINUE);} 35 | "if" {printf("\t%-30s : %3d\n",yytext,IF);} 36 | "else" {printf("\t%-30s : %3d\n",yytext,ELSE);} 37 | "return" {printf("\t%-30s : %3d\n",yytext,RETURN);} 38 | 39 | {identifier} {printf("\t%-30s : %3d\n", yytext,IDENTIFIER); 40 | insert( symbol_table,yytext,IDENTIFIER );} 41 | {ws} ; 42 | [+\-]?[0][x|X]{hex}+[lLuU]? {printf("\t%-30s : %3d\n", yytext,HEX_CONSTANT); 43 | insert( constant_table,yytext,HEX_CONSTANT);} 44 | [+\-]?{digit}+[lLuU]? {printf("\t%-30s : %3d\n", yytext,DEC_CONSTANT); 45 | insert( constant_table,yytext,DEC_CONSTANT);} 46 | "/*" {cmnt_strt = yylineno; BEGIN CMNT;} 47 | .|{ws} ; 48 | \n {yylineno++;} 49 | "*/" {BEGIN INITIAL;} 50 | "/*" {printf("Line %3d: Nested comments are not valid!\n",yylineno);} 51 | <> {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();} 52 | ^"#include" {BEGIN PREPROC;} 53 | "<"[^<>\n]+">" {printf("\t%-30s : %3d\n",yytext,HEADER_FILE);} 54 | {ws} ; 55 | \"[^"\n]+\" {printf("\t%-30s : %3d\n",yytext,HEADER_FILE);} 56 | \n {yylineno++; BEGIN INITIAL;} 57 | . {printf("Line %3d: Illegal header file format \n",yylineno);} 58 | "//".* ; 59 | 60 | \"[^\"\n]*\" { 61 | 62 | if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */ 63 | { 64 | yyless(yyleng-1); /* push the quote back if it was escaped */ 65 | yymore(); 66 | } 67 | else 68 | insert( constant_table,yytext,STRING); 69 | } 70 | 71 | \"[^\"\n]*$ {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);} 72 | {digit}+({letter}|_)+ {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);} 73 | \n {yylineno++;} 74 | "--" {printf("\t%-30s : %3d\n",yytext,DECREMENT);} 75 | "++" {printf("\t%-30s : %3d\n",yytext,INCREMENT);} 76 | "->" {printf("\t%-30s : %3d\n",yytext,PTR_SELECT);} 77 | "&&" {printf("\t%-30s : %3d\n",yytext,LOGICAL_AND);} 78 | "||" {printf("\t%-30s : %3d\n",yytext,LOGICAL_OR);} 79 | "<=" {printf("\t%-30s : %3d\n",yytext,LS_THAN_EQ);} 80 | ">=" {printf("\t%-30s : %3d\n",yytext,GR_THAN_EQ);} 81 | "==" {printf("\t%-30s : %3d\n",yytext,EQ);} 82 | "!=" {printf("\t%-30s : %3d\n",yytext,NOT_EQ);} 83 | ";" {printf("\t%-30s : %3d\n",yytext,DELIMITER);} 84 | "{" {printf("\t%-30s : %3d\n",yytext,OPEN_BRACES);} 85 | "}" {printf("\t%-30s : %3d\n",yytext,CLOSE_BRACES);} 86 | "," {printf("\t%-30s : %3d\n",yytext,COMMA);} 87 | "=" {printf("\t%-30s : %3d\n",yytext,ASSIGN);} 88 | "(" {printf("\t%-30s : %3d\n",yytext,OPEN_PAR);} 89 | ")" {printf("\t%-30s : %3d\n",yytext,CLOSE_PAR);} 90 | "[" {printf("\t%-30s : %3d\n",yytext,OPEN_SQ_BRKT);} 91 | "]" {printf("\t%-30s : %3d\n",yytext,CLOSE_SQ_BRKT);} 92 | "-" {printf("\t%-30s : %3d\n",yytext,MINUS);} 93 | "+" {printf("\t%-30s : %3d\n",yytext,PLUS);} 94 | "*" {printf("\t%-30s : %3d\n",yytext,STAR);} 95 | "/" {printf("\t%-30s : %3d\n",yytext,FW_SLASH);} 96 | "%" {printf("\t%-30s : %3d\n",yytext,MODULO);} 97 | "<" {printf("\t%-30s : %3d\n",yytext,LS_THAN);} 98 | ">" {printf("\t%-30s : %3d\n",yytext,GR_THAN);} 99 | . {printf("Line %3d: Illegal character %s\n",yylineno,yytext);} 100 | 101 | %% 102 | 103 | int main() 104 | { 105 | yyin=fopen("testcases/test-case-5.c","r"); 106 | symbol_table=create_table(); 107 | constant_table=create_table(); 108 | yylex(); 109 | printf("\n\tSymbol table"); 110 | display(symbol_table); 111 | printf("\n\tConstants Table"); 112 | display(constant_table); 113 | printf("NOTE: Please refer tokens.h for token meanings\n"); 114 | } 115 | -------------------------------------------------------------------------------- /Project-1/symboltable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Compiler Design Project 1 : Lexical Analyser 3 | * 4 | * File : symboltable.h 5 | * Description : This file contains functions related to a hash organised symbol table. 6 | * The functions implemented are: 7 | * create_table(), insert(), search, display() 8 | * 9 | * Authors : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222 10 | * Date : 17-1-2018 11 | */ 12 | 13 | 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define HASH_TABLE_SIZE 100 22 | 23 | /* struct to hold each entry */ 24 | struct entry_s 25 | { 26 | char* lexeme; 27 | int token_name; 28 | struct entry_s* successor; 29 | }; 30 | 31 | typedef struct entry_s entry_t; 32 | 33 | /* Create a new hash_table. */ 34 | entry_t** create_table() 35 | { 36 | entry_t** hash_table_ptr = NULL; // declare a pointer 37 | 38 | /* Allocate memroy for a hashtable array of size HASH_TABLE_SIZE */ 39 | if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL ) 40 | return NULL; 41 | 42 | int i; 43 | 44 | // Intitialise all entries as NULL 45 | for( i = 0; i < HASH_TABLE_SIZE; i++ ) 46 | { 47 | hash_table_ptr[i] = NULL; 48 | } 49 | 50 | return hash_table_ptr; 51 | } 52 | 53 | /* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */ 54 | uint32_t hash( char *lexeme ) 55 | { 56 | size_t i; 57 | uint32_t hash; 58 | 59 | /* Apply jenkin's hash function 60 | * https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time 61 | */ 62 | for ( hash = i = 0; i < strlen(lexeme); ++i ) { 63 | hash += lexeme[i]; 64 | hash += ( hash << 10 ); 65 | hash ^= ( hash >> 6 ); 66 | } 67 | hash += ( hash << 3 ); 68 | hash ^= ( hash >> 11 ); 69 | hash += ( hash << 15 ); 70 | 71 | return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE) 72 | } 73 | 74 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */ 75 | entry_t *create_entry( char *lexeme, int token_name ) 76 | { 77 | entry_t *newentry; 78 | 79 | /* Allocate space for newentry */ 80 | if( ( newentry = malloc( sizeof( entry_t ) ) ) == NULL ) { 81 | return NULL; 82 | } 83 | /* Copy lexeme to newentry location using strdup (string-duplicate). Return NULL if it fails */ 84 | if( ( newentry->lexeme = strdup( lexeme ) ) == NULL ) { 85 | return NULL; 86 | } 87 | 88 | newentry->token_name = token_name; 89 | newentry->successor = NULL; 90 | 91 | return newentry; 92 | } 93 | 94 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */ 95 | entry_t* search( entry_t** hash_table_ptr, char* lexeme ) 96 | { 97 | uint32_t idx = 0; 98 | entry_t* myentry; 99 | 100 | // get the index of this lexeme as per the hash function 101 | idx = hash( lexeme ); 102 | 103 | /* Traverse the linked list at this idx and see if lexeme exists */ 104 | myentry = hash_table_ptr[idx]; 105 | 106 | while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 ) 107 | { 108 | myentry = myentry->successor; 109 | } 110 | 111 | if(myentry == NULL) // lexeme is not found 112 | return NULL; 113 | 114 | else // lexeme found 115 | return myentry; 116 | 117 | } 118 | 119 | /* Insert an entry into a hash table. */ 120 | void insert( entry_t** hash_table_ptr, char* lexeme, int token_name ) 121 | { 122 | if( search( hash_table_ptr, lexeme ) != NULL) // If lexeme already exists, don't insert, return 123 | return; 124 | 125 | uint32_t idx; 126 | entry_t* newentry = NULL; 127 | entry_t* head = NULL; 128 | 129 | idx = hash( lexeme ); // Get the index for this lexeme based on the hash function 130 | newentry = create_entry( lexeme, token_name ); // Create an entry using the pair 131 | 132 | if(newentry == NULL) // In case there was some error while executing create_entry() 133 | { 134 | printf("Insert failed. New entry could not be created."); 135 | exit(1); 136 | } 137 | 138 | head = hash_table_ptr[idx]; // get the head entry at this index 139 | 140 | if(head == NULL) // This is the first lexeme that matches this hash index 141 | { 142 | hash_table_ptr[idx] = newentry; 143 | } 144 | else // if not, add this entry to the head 145 | { 146 | newentry->successor = hash_table_ptr[idx]; 147 | hash_table_ptr[idx] = newentry; 148 | } 149 | } 150 | 151 | // Traverse the hash table and print all the entries 152 | void display(entry_t** hash_table_ptr) 153 | { 154 | int i; 155 | entry_t* traverser; 156 | printf("\n==========================================\n"); 157 | printf("\t < lexeme , token >\n"); 158 | printf("==========================================\n"); 159 | 160 | for( i=0; i < HASH_TABLE_SIZE; i++) 161 | { 162 | traverser = hash_table_ptr[i]; 163 | 164 | while( traverser != NULL) 165 | { 166 | printf("< %-30s, %3d >\n", traverser->lexeme, traverser->token_name); 167 | traverser = traverser->successor; 168 | } 169 | } 170 | printf("==========================================\n"); 171 | 172 | } 173 | -------------------------------------------------------------------------------- /Project-1/testcases/test-case-1.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 1 5 | 6 | Test Case 1 7 | - Test for single line comments 8 | - Test for multi-line comments 9 | - Test for single line nested comments 10 | - Test for multiline nested comments 11 | 12 | The output in lex should remove all the comments including this one 13 | */ 14 | 15 | #include 16 | 17 | void main(){ 18 | // Single line comment 19 | 20 | /* Multi-line comment 21 | Like this */ 22 | 23 | /* here */ int a; /* "int a" should be untouched */ 24 | 25 | // This nested comment // This comment should be removed should be removed 26 | 27 | 28 | 29 | /* To make things /* nested multi-line comment */ interesting */ 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /Project-1/testcases/test-case-2.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 1 5 | 6 | Test Case 2 7 | - Test for multi-line comment that doesn't end till EOF 8 | 9 | The output in lex should print as error message when the comment does not terminate 10 | It should remove the comments that terminate 11 | */ 12 | 13 | #include 14 | 15 | void main(){ 16 | 17 | // This is fine 18 | /* This as well 19 | like we know */ 20 | 21 | /* This is not fine since 22 | this comment has to end somewhere 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /Project-1/testcases/test-case-3.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 1 5 | 6 | Test Case 3 7 | - Test for string 8 | - Test for string that doesn't end till EOF 9 | - Test for invalid header name 10 | 11 | The output in lex should identify the first string correct and display error message that the second one does not terminate 12 | */ 13 | 14 | #include 15 | #include < 16 | #include "custom.h" 17 | #include ""wrong.h" 18 | 19 | void main(){ 20 | 21 | printf("This is a string"); 22 | printf("This is a string that never terminates); 23 | } -------------------------------------------------------------------------------- /Project-1/testcases/test-case-4.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 1 5 | 6 | Test Case 4 7 | 8 | Following errors must be detected 9 | - Invalid identifiers: 9y, total$ 10 | - Invalid operator: @ 11 | - Escaped quoted should be part of the string that is identified 12 | - Stray characters: `, @, - 13 | 14 | The output should display appropriate errors 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | int main() 21 | { 22 | ` 23 | @ - 24 | short int b; 25 | int x, 9y, total$; 26 | total = x @ y; 27 | printf ("Total = %d \n \" ", total); 28 | } -------------------------------------------------------------------------------- /Project-1/testcases/test-case-5.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 1 5 | 6 | Test Case 5 7 | 8 | Identifying tokens and displaying symbol and constants table 9 | 10 | Following tokens must be detected 11 | - Keywords (int, long int, long long int, main include) 12 | - Identifiers (main,total,x,y,printf), 13 | - Constants (-10, 20, 0x0f, 123456l) 14 | - Strings ("Total = %d \n") 15 | - Special symbols and Brackets ( (), {}, ;, ,) 16 | - Operators (+,-,=,*,/,%,--,++) 17 | 18 | The output should display appropriate tokens with their type and also the symbol and constants table 19 | */ 20 | #include 21 | #include 22 | 23 | int main() 24 | { 25 | int x, y; 26 | long long int total, diff; 27 | int *ptr; 28 | unsigned int a = 0x0f; 29 | long int mylong = 123456l; 30 | long int i, j; 31 | for(i=0; i < 10; i++){ 32 | for(j=10; j > 0; j--){ 33 | printf("%d",i); 34 | } 35 | } 36 | x = -10, y = 20; 37 | x=x*3/2; 38 | total = x + y; 39 | diff = x - y; 40 | int rem = x % y; 41 | printf ("Total = %d \n", total); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /Project-1/tokens.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Compiler Design Project 1 : Lexical Analyser 3 | * 4 | * File : tokens.h 5 | * Description : This file defines tokens and the values associated to them. 6 | * 7 | * Authors : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222 8 | * Date : 17-1-2018 9 | */ 10 | 11 | enum keywords 12 | { 13 | INT=100, 14 | LONG, 15 | LONG_LONG, 16 | SHORT, 17 | SIGNED, 18 | UNSIGNED, 19 | FOR, 20 | BREAK, 21 | CONTINUE, 22 | RETURN, 23 | CHAR, 24 | IF, 25 | ELSE 26 | }; 27 | 28 | enum operators 29 | { 30 | DECREMENT=200, 31 | INCREMENT, 32 | PTR_SELECT, 33 | LOGICAL_AND, 34 | LOGICAL_OR, 35 | LS_THAN_EQ, 36 | GR_THAN_EQ, 37 | EQ, 38 | NOT_EQ, 39 | ASSIGN, 40 | MINUS, 41 | PLUS, 42 | STAR, 43 | MODULO, 44 | LS_THAN, 45 | GR_THAN 46 | }; 47 | 48 | enum special_symbols 49 | { 50 | DELIMITER=300, 51 | OPEN_BRACES, 52 | CLOSE_BRACES, 53 | COMMA, 54 | OPEN_PAR, 55 | CLOSE_PAR, 56 | OPEN_SQ_BRKT, 57 | CLOSE_SQ_BRKT, 58 | FW_SLASH 59 | }; 60 | 61 | enum constants 62 | { 63 | HEX_CONSTANT=400, 64 | DEC_CONSTANT, 65 | HEADER_FILE, 66 | STRING 67 | }; 68 | 69 | enum IDENTIFIER 70 | { 71 | IDENTIFIER=500 72 | }; 73 | -------------------------------------------------------------------------------- /Project-2/.gitignore: -------------------------------------------------------------------------------- 1 | y.* 2 | lex.yy.c 3 | -------------------------------------------------------------------------------- /Project-2/README.md: -------------------------------------------------------------------------------- 1 | # Compiler Design Project-2 2 | 3 | ## Parser for a subset of the C language 4 | 5 | ### Team Members 6 | - Karthik M (15CO221) 7 | - Kaushik S Kalmady (15CO222) 8 | 9 | ### Installation and running 10 | 1. Please make sure you have Lex/Flex and Yacc/Bison installed 11 | 2. Next run `$ chmod +x compile` 12 | 3. `$ ./compile` 13 | 4. To run the parser `$./parser test-file.c` 14 | 15 | 16 | ### FEATURES 17 | - Nested code blocks 18 | - +=, -= supported 19 | - for loops and nested for loops 20 | - integer assignment and declaration 21 | - if-else handled 22 | - array assignment, array declaration 23 | - comma separated initialization/declaration e.g int a=10,b; 24 | - support for short, long , long long , signed and unsigned 25 | - while statements supported 26 | - unary expressions supported in both lhs, rhs and standalone statements 27 | - display specific errors with line numbers 28 | - additionally evaluates expressions and adds to symbol table(implemented as a fun exercise - this is not the job of the parser) 29 | -------------------------------------------------------------------------------- /Project-2/compile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | lex lexl.l 3 | yacc -d parser.y -v 4 | gcc -w -g y.tab.c -ly -ll -o parser 5 | -------------------------------------------------------------------------------- /Project-2/lexl.l: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | #include 4 | #include 5 | #include 6 | /* #include "symboltable.h" */ 7 | #include "y.tab.h" 8 | 9 | /* entry_t** symbol_table; */ 10 | /* entry_t** constant_table; */ 11 | int cmnt_strt = 0; 12 | 13 | %} 14 | 15 | letter [a-zA-Z] 16 | digit [0-9] 17 | ws [ \t\r\f\v]+ 18 | identifier (_|{letter})({letter}|{digit}|_){0,31} 19 | hex [0-9a-f] 20 | 21 | /* Exclusive states */ 22 | %x CMNT 23 | /*%x PREPROC*/ 24 | 25 | %% 26 | /* Keywords*/ 27 | "int" {return INT;} 28 | "long" {return LONG;} 29 | "long long" {return LONG_LONG;} 30 | "short" {return SHORT;} 31 | "signed" {return SIGNED;} 32 | "unsigned" {return UNSIGNED;} 33 | "for" {return FOR;} 34 | "while" {return WHILE;} 35 | "break" {return BREAK;} 36 | "continue" {return CONTINUE;} 37 | "if" {return IF;} 38 | "else" {return ELSE;} 39 | "return" {return RETURN;} 40 | 41 | {identifier} {yylval.entry = insert(symbol_table, yytext, INT_MAX); return IDENTIFIER;} 42 | {ws} ; 43 | [+\-]?[0][x|X]{hex}+[lLuU]? { yylval.dval = (int)strtol(yytext, NULL, 16); return HEX_CONSTANT;} 44 | [+\-]?{digit}+[lLuU]? {yylval.dval = atoi(yytext); return DEC_CONSTANT;} 45 | 46 | "/*" {cmnt_strt = yylineno; BEGIN CMNT;} 47 | .|{ws} ; 48 | \n {yylineno++;} 49 | "*/" {BEGIN INITIAL;} 50 | "/*" {printf("Line %3d: Nested comments are not valid!\n",yylineno);} 51 | <> {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();} 52 | /*^"#include" {BEGIN PREPROC;}*/ 53 | /*"<"[^<>\n]+">" {return HEADER_FILE;}*/ 54 | /*{ws} ;*/ 55 | /*\"[^"\n]+\" {return HEADER_FILE;}*/ 56 | /*\n {yylineno++; BEGIN INITIAL;}*/ 57 | /*. {printf("Line %3d: Illegal header file format \n",yylineno);}*/ 58 | "//".* ; 59 | 60 | \"[^\"\n]*\" { 61 | 62 | if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */ 63 | { 64 | yyless(yyleng-1); /* push the quote back if it was escaped */ 65 | yymore(); 66 | } 67 | else{ 68 | insert( constant_table, yytext, INT_MAX); 69 | return STRING; 70 | } 71 | } 72 | 73 | \"[^\"\n]*$ {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);} 74 | {digit}+({letter}|_)+ {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);} 75 | \n {yylineno++;} 76 | 77 | "--" {return DECREMENT;} 78 | "++" {return INCREMENT;} 79 | /* "->" {return PTR_SELECT;} */ 80 | "+=" {return ADD_ASSIGN;} 81 | "-=" {return SUB_ASSIGN;} 82 | "*=" {return MUL_ASSIGN;} 83 | "/=" {return DIV_ASSIGN;} 84 | "%=" {return MOD_ASSIGN;} 85 | 86 | 87 | "&&" {return LOGICAL_AND;} 88 | "||" {return LOGICAL_OR;} 89 | "<=" {return LS_EQ;} 90 | ">=" {return GR_EQ;} 91 | "==" {return EQ;} 92 | "!=" {return NOT_EQ;} 93 | 94 | . {return yytext[0];} 95 | 96 | %% 97 | /* 98 | int main() 99 | { 100 | yyin=fopen("test2.c","r"); 101 | 102 | constant_table=create_table(); 103 | symbol_table = create_table(); 104 | 105 | yylex(); 106 | 107 | printf("\n\tSymbol table"); 108 | display(symbol_table); 109 | printf("\n\tConstants Table"); 110 | display(constant_table); 111 | 112 | printf("NOTE: Please refer tokens.h for token meanings\n"); 113 | } */ 114 | -------------------------------------------------------------------------------- /Project-2/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include "symboltable.h" 5 | 6 | entry_t** symbol_table; 7 | entry_t** constant_table; 8 | 9 | double Evaluate (double lhs_value,int assign_type,double rhs_value); 10 | int current_dtype; 11 | int yyerror(char *msg); 12 | %} 13 | 14 | %union 15 | { 16 | double dval; 17 | entry_t* entry; 18 | int ival; 19 | } 20 | 21 | %token IDENTIFIER 22 | 23 | /* Constants */ 24 | %token DEC_CONSTANT HEX_CONSTANT 25 | %token STRING 26 | 27 | /* Logical and Relational operators */ 28 | %token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ 29 | 30 | /* Short hand assignment operators */ 31 | %token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN 32 | %token LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN 33 | %token INCREMENT DECREMENT 34 | 35 | /* Data types */ 36 | %token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST 37 | 38 | /* Keywords */ 39 | %token IF FOR WHILE CONTINUE BREAK RETURN 40 | 41 | %type expression 42 | %type sub_expr 43 | %type constant 44 | %type unary_expr 45 | %type arithmetic_expr 46 | %type assignment_expr 47 | %type lhs 48 | %type assign_op 49 | 50 | %start starter 51 | 52 | %left ',' 53 | %right '=' 54 | %left LOGICAL_OR 55 | %left LOGICAL_AND 56 | %left EQ NOT_EQ 57 | %left '<' '>' LS_EQ GR_EQ 58 | %left '+' '-' 59 | %left '*' '/' '%' 60 | %right '!' 61 | 62 | 63 | %nonassoc UMINUS 64 | %nonassoc LOWER_THAN_ELSE 65 | %nonassoc ELSE 66 | 67 | 68 | %% 69 | 70 | /* Program is made up of multiple builder blocks. */ 71 | starter: starter builder 72 | |builder; 73 | 74 | /* Each builder block is either a function or a declaration */ 75 | builder: function| 76 | declaration; 77 | 78 | /* This is how a function looks like */ 79 | function: type IDENTIFIER '(' argument_list ')' compound_stmt; 80 | 81 | /* Now we will define a grammar for how types can be specified */ 82 | 83 | type :data_type pointer 84 | |data_type; 85 | 86 | pointer: '*' pointer 87 | |'*' 88 | ; 89 | 90 | data_type :sign_specifier type_specifier 91 | |type_specifier 92 | ; 93 | 94 | sign_specifier :SIGNED 95 | |UNSIGNED 96 | ; 97 | 98 | type_specifier :INT {current_dtype = INT;} 99 | |SHORT INT {current_dtype = SHORT;} 100 | |SHORT {current_dtype = SHORT;} 101 | |LONG {current_dtype = LONG;} 102 | |LONG INT {current_dtype = LONG;} 103 | |LONG_LONG {current_dtype = LONG_LONG;} 104 | |LONG_LONG INT {current_dtype = LONG_LONG;} 105 | ; 106 | 107 | /* grammar rules for argument list */ 108 | /* argument list can be empty */ 109 | argument_list :arguments 110 | | 111 | ; 112 | /* arguments are comma separated TYPE ID pairs */ 113 | arguments :arguments ',' arg 114 | |arg 115 | ; 116 | 117 | /* Each arg is a TYPE ID pair */ 118 | arg :type IDENTIFIER 119 | ; 120 | 121 | /* Generic statement. Can be compound or a single statement */ 122 | stmt:compound_stmt 123 | |single_stmt 124 | ; 125 | 126 | /* The function body is covered in braces and has multiple statements. */ 127 | compound_stmt :'{' statements '}' 128 | ; 129 | 130 | statements:statements stmt 131 | | 132 | ; 133 | 134 | /* Grammar for what constitutes every individual statement */ 135 | single_stmt :if_block 136 | |for_block 137 | |while_block 138 | |declaration 139 | |function_call ';' 140 | |RETURN ';' 141 | |CONTINUE ';' 142 | |BREAK ';' 143 | |RETURN sub_expr ';' 144 | ; 145 | 146 | for_block:FOR '(' expression_stmt expression_stmt ')' stmt 147 | |FOR '(' expression_stmt expression_stmt expression ')' stmt 148 | ; 149 | 150 | if_block:IF '(' expression ')' stmt %prec LOWER_THAN_ELSE 151 | |IF '(' expression ')' stmt ELSE stmt 152 | ; 153 | 154 | while_block: WHILE '(' expression ')' stmt 155 | ; 156 | 157 | declaration:type declaration_list ';' 158 | |declaration_list ';' 159 | | unary_expr ';' 160 | 161 | declaration_list: declaration_list ',' sub_decl 162 | |sub_decl; 163 | 164 | sub_decl: assignment_expr 165 | |IDENTIFIER {$1 -> data_type = current_dtype;} 166 | |array_index 167 | /*|struct_block ';'*/ 168 | ; 169 | 170 | /* This is because we can have empty expession statements inside for loops */ 171 | expression_stmt:expression ';' 172 | |';' 173 | ; 174 | 175 | expression: 176 | expression ',' sub_expr {$$ = $1,$3;} 177 | |sub_expr {$$ = $1;} 178 | ; 179 | 180 | sub_expr: 181 | sub_expr '>' sub_expr {$$ = ($1 > $3);} 182 | |sub_expr '<' sub_expr {$$ = ($1 < $3);} 183 | |sub_expr EQ sub_expr {$$ = ($1 == $3);} 184 | |sub_expr NOT_EQ sub_expr {$$ = ($1 != $3);} 185 | |sub_expr LS_EQ sub_expr {$$ = ($1 <= $3);} 186 | |sub_expr GR_EQ sub_expr {$$ = ($1 >= $3);} 187 | |sub_expr LOGICAL_AND sub_expr {$$ = ($1 && $3);} 188 | |sub_expr LOGICAL_OR sub_expr {$$ = ($1 || $3);} 189 | |'!' sub_expr {$$ = (!$2);} 190 | |arithmetic_expr {$$ = $1;} 191 | |assignment_expr {$$ = $1;} 192 | |unary_expr {$$ = $1;} 193 | /* |IDENTIFIER {$$ = $1->value;} 194 | |constant {$$ = $1;} */ 195 | //|array_index 196 | ; 197 | 198 | 199 | assignment_expr :lhs assign_op arithmetic_expr {$$ = $1->value = Evaluate($1->value,$2,$3);} 200 | |lhs assign_op array_index {$$ = 0;} 201 | |lhs assign_op function_call {$$ = 0;} 202 | |lhs assign_op unary_expr {$$ = $1->value = Evaluate($1->value,$2,$3);} 203 | |unary_expr assign_op unary_expr {$$ = 0;} 204 | ; 205 | 206 | unary_expr: lhs INCREMENT {$$ = $1->value = ($1->value)++;} 207 | |lhs DECREMENT {$$ = $1->value = ($1->value)--;} 208 | |DECREMENT lhs {$$ = $2->value = --($2->value);} 209 | |INCREMENT lhs {$$ = $2->value = ++($2->value);} 210 | 211 | lhs:IDENTIFIER {$$ = $1; if(! $1->data_type) $1->data_type = current_dtype;} 212 | //|array_index 213 | ; 214 | 215 | assign_op:'=' {$$ = '=';} 216 | |ADD_ASSIGN {$$ = ADD_ASSIGN;} 217 | |SUB_ASSIGN {$$ = SUB_ASSIGN;} 218 | |MUL_ASSIGN {$$ = MUL_ASSIGN;} 219 | |DIV_ASSIGN {$$ = DIV_ASSIGN;} 220 | |MOD_ASSIGN {$$ = MOD_ASSIGN;} 221 | ; 222 | 223 | arithmetic_expr: arithmetic_expr '+' arithmetic_expr {$$ = $1 + $3;} 224 | |arithmetic_expr '-' arithmetic_expr {$$ = $1 - $3;} 225 | |arithmetic_expr '*' arithmetic_expr {$$ = $1 * $3;} 226 | |arithmetic_expr '/' arithmetic_expr {$$ = ($3 == 0) ? yyerror("Divide by 0!") : ($1 / $3);} 227 | |arithmetic_expr '%' arithmetic_expr {$$ = (int)$1 % (int)$3;} 228 | |'(' arithmetic_expr ')' {$$ = $2;} 229 | |'-' arithmetic_expr %prec UMINUS {$$ = -$2;} 230 | |IDENTIFIER {$$ = $1 -> value;} 231 | |constant {$$ = $1;} 232 | ; 233 | 234 | constant: DEC_CONSTANT {$$ = $1;} 235 | |HEX_CONSTANT {$$ = $1;} 236 | ; 237 | 238 | array_index: IDENTIFIER '[' sub_expr ']' 239 | 240 | function_call: IDENTIFIER '(' parameter_list ')' 241 | |IDENTIFIER '(' ')' 242 | ; 243 | 244 | parameter_list: 245 | parameter_list ',' parameter 246 | |parameter 247 | ; 248 | 249 | parameter: sub_expr 250 | |STRING 251 | 252 | ; 253 | %% 254 | 255 | #include "lex.yy.c" 256 | #include 257 | 258 | 259 | double Evaluate (double lhs_value,int assign_type,double rhs_value) 260 | { 261 | switch(assign_type) 262 | { 263 | case '=': return rhs_value; 264 | case ADD_ASSIGN: return (lhs_value + rhs_value); 265 | case SUB_ASSIGN: return (lhs_value - rhs_value); 266 | case MUL_ASSIGN: return (lhs_value * rhs_value); 267 | case DIV_ASSIGN: return (lhs_value / rhs_value); 268 | case MOD_ASSIGN: return ((int)lhs_value % (int)rhs_value); 269 | } 270 | } 271 | 272 | int main(int argc, char *argv[]) 273 | { 274 | symbol_table = create_table(); 275 | constant_table = create_table(); 276 | 277 | yyin = fopen(argv[1], "r"); 278 | 279 | if(!yyparse()) 280 | { 281 | printf("\nParsing complete\n"); 282 | } 283 | else 284 | { 285 | printf("\nParsing failed\n"); 286 | } 287 | 288 | 289 | printf("\n\tSymbol table"); 290 | display(symbol_table); 291 | 292 | 293 | fclose(yyin); 294 | return 0; 295 | } 296 | 297 | int yyerror(char *msg) 298 | { 299 | printf("Line no: %d Error message: %s Token: %s\n", yylineno, msg, yytext); 300 | } 301 | -------------------------------------------------------------------------------- /Project-2/symboltable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Compiler Design Project 1 : Lexical Analyser 3 | * 4 | * File : symboltable.h 5 | * Description : This file contains functions related to a hash organised symbol table. 6 | * The functions implemented are: 7 | * create_table(), insert(), search, display() 8 | * 9 | * Authors : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222 10 | * Date : 17-1-2018 11 | */ 12 | 13 | 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define HASH_TABLE_SIZE 100 22 | 23 | /* struct to hold each entry */ 24 | struct entry_s 25 | { 26 | char* lexeme; 27 | double value; 28 | int data_type; 29 | struct entry_s* successor; 30 | }; 31 | 32 | typedef struct entry_s entry_t; 33 | 34 | /* Create a new hash_table. */ 35 | entry_t** create_table() 36 | { 37 | entry_t** hash_table_ptr = NULL; // declare a pointer 38 | 39 | /* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */ 40 | if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL ) 41 | return NULL; 42 | 43 | int i; 44 | 45 | // Intitialise all entries as NULL 46 | for( i = 0; i < HASH_TABLE_SIZE; i++ ) 47 | { 48 | hash_table_ptr[i] = NULL; 49 | } 50 | 51 | return hash_table_ptr; 52 | } 53 | 54 | /* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */ 55 | uint32_t hash( char *lexeme ) 56 | { 57 | size_t i; 58 | uint32_t hash; 59 | 60 | /* Apply jenkin's hash function 61 | * https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time 62 | */ 63 | for ( hash = i = 0; i < strlen(lexeme); ++i ) { 64 | hash += lexeme[i]; 65 | hash += ( hash << 10 ); 66 | hash ^= ( hash >> 6 ); 67 | } 68 | hash += ( hash << 3 ); 69 | hash ^= ( hash >> 11 ); 70 | hash += ( hash << 15 ); 71 | 72 | return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE) 73 | } 74 | 75 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */ 76 | entry_t *create_entry( char *lexeme, int value ) 77 | { 78 | entry_t *newentry; 79 | 80 | /* Allocate space for newentry */ 81 | if( ( newentry = malloc( sizeof( entry_t ) ) ) == NULL ) { 82 | return NULL; 83 | } 84 | /* Copy lexeme to newentry location using strdup (string-duplicate). Return NULL if it fails */ 85 | if( ( newentry->lexeme = strdup( lexeme ) ) == NULL ) { 86 | return NULL; 87 | } 88 | 89 | newentry->value = value; 90 | newentry->successor = NULL; 91 | 92 | return newentry; 93 | } 94 | 95 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */ 96 | entry_t* search( entry_t** hash_table_ptr, char* lexeme ) 97 | { 98 | uint32_t idx = 0; 99 | entry_t* myentry; 100 | 101 | // get the index of this lexeme as per the hash function 102 | idx = hash( lexeme ); 103 | 104 | /* Traverse the linked list at this idx and see if lexeme exists */ 105 | myentry = hash_table_ptr[idx]; 106 | 107 | while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 ) 108 | { 109 | myentry = myentry->successor; 110 | } 111 | 112 | if(myentry == NULL) // lexeme is not found 113 | return NULL; 114 | 115 | else // lexeme found 116 | return myentry; 117 | 118 | } 119 | 120 | /* Insert an entry into a hash table. */ 121 | entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value ) 122 | { 123 | entry_t* finder = search( hash_table_ptr, lexeme ); 124 | if( finder != NULL) // If lexeme already exists, don't insert, return 125 | return finder ; 126 | 127 | uint32_t idx; 128 | entry_t* newentry = NULL; 129 | entry_t* head = NULL; 130 | 131 | idx = hash( lexeme ); // Get the index for this lexeme based on the hash function 132 | newentry = create_entry( lexeme, value ); // Create an entry using the pair 133 | 134 | if(newentry == NULL) // In case there was some error while executing create_entry() 135 | { 136 | printf("Insert failed. New entry could not be created."); 137 | exit(1); 138 | } 139 | 140 | head = hash_table_ptr[idx]; // get the head entry at this index 141 | 142 | if(head == NULL) // This is the first lexeme that matches this hash index 143 | { 144 | hash_table_ptr[idx] = newentry; 145 | } 146 | else // if not, add this entry to the head 147 | { 148 | newentry->successor = hash_table_ptr[idx]; 149 | hash_table_ptr[idx] = newentry; 150 | } 151 | return hash_table_ptr[idx]; 152 | } 153 | 154 | // Traverse the hash table and print all the entries 155 | void display(entry_t** hash_table_ptr) 156 | { 157 | int i; 158 | entry_t* traverser; 159 | printf("\n====================================================\n"); 160 | printf(" %-20s %-20s %-20s\n","lexeme","value","data-type"); 161 | printf("====================================================\n"); 162 | 163 | for( i=0; i < HASH_TABLE_SIZE; i++) 164 | { 165 | traverser = hash_table_ptr[i]; 166 | 167 | while( traverser != NULL) 168 | { 169 | printf(" %-20s %-20d %-20d \n", traverser->lexeme, (int)traverser->value, traverser->data_type); 170 | traverser = traverser->successor; 171 | } 172 | } 173 | printf("====================================================\n"); 174 | 175 | } 176 | -------------------------------------------------------------------------------- /Project-2/testcases/test-case-1.c: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | 5 | /* 6 | Karthik M - 15CO221 7 | Kaushik S Kalmady - 15CO222 8 | Compiler Design Project 2 9 | 10 | Test Case 1 11 | - Missing semicolon 12 | - Single quotes for characters and double quotes for strings 13 | 14 | The output in lex/yacc should identify statements without a terminating semiclon and unmatched single and double quotes 15 | */ 16 | 17 | 18 | int main(){ 19 | 20 | int a = 10; 21 | int b = 10 22 | 23 | for(a = 2 a<3; a++) 24 | b = b + 1; 25 | 26 | printf (" This string is enclosed in double quotes "); 27 | printf (" This string is not enclosed in double quotes ); 28 | 29 | 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /Project-2/testcases/test-case-2.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 2 5 | 6 | Test Case 2 7 | - Test for unmatched parantheses 8 | 9 | The output in lex/yacc should print an error message when parantheses are not matched 10 | 11 | */ 12 | 13 | 14 | int main() 15 | { 16 | int array1[10]; 17 | int array2[20; 18 | 19 | for(int a = 3; a<4; a++ 20 | { 21 | a = array1[0]; 22 | } 23 | 24 | func(); 25 | } 26 | 27 | int func() 28 | { 29 | return 20; 30 | -------------------------------------------------------------------------------- /Project-2/testcases/test-case-3.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 2 5 | 6 | Test Case 3 7 | - Left hand side of an assignment statement should be an lvalue 8 | 9 | The output in lex/yacc should identify those assignment statements whose left habd side is not an lvalue and stray characters must be detected as an error 10 | */ 11 | 12 | 13 | int main() 14 | { 15 | 16 | ` 17 | @ - 18 | total = x @ y; 19 | 20 | int a = 4; 21 | 22 | if(a > 0 ) 23 | { 24 | printf("a is positive"); 25 | a * 2 = a; 26 | } 27 | else 28 | { 29 | printf("a is negative"); 30 | a = a * 2; 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /Project-2/testcases/test-case-4.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 2 5 | 6 | Test Case 4 7 | 8 | Following errors must be detected 9 | - Invalid identifiers: 9y, total$ 10 | - Invalid operator: @ 11 | - Escaped quoted should be part of the string that is identified 12 | - Stray characters: `, @, - 13 | 14 | The output should display appropriate errors 15 | */ 16 | 17 | 18 | int main() 19 | { 20 | int x, y; 21 | long long int total, diff; 22 | int *ptr; 23 | int a = 86; 24 | 25 | if (a > 90) 26 | { 27 | printf ("Grade is AA"); 28 | } 29 | else if (a > 80) 30 | { 31 | printf ("Grade is AB"); 32 | } 33 | else 34 | { 35 | printf ("Grade is BB"); 36 | } 37 | 38 | x = -10, y = 20; 39 | x=x*3/2; 40 | total = x + y; 41 | } 42 | 43 | // Errors: 44 | // 45 | -------------------------------------------------------------------------------- /Project-2/testcases/test-case-5.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 2 5 | 6 | Test Case 5 7 | 8 | - Program file with no errors. This should complete parsing successfully 9 | 10 | */ 11 | 12 | 13 | int main() 14 | { 15 | unsigned int a = 0x0f; 16 | long int mylong = 123456l; 17 | long int i, j; 18 | 19 | for(i=0; i < 10; i++) 20 | { 21 | for(j=10; j > 0; j--) 22 | { 23 | printf("%d",i); 24 | } 25 | } 26 | 27 | diff = x - y; 28 | int rem = x % y; 29 | printf ("Total = %d \n", total); 30 | 31 | int result = IncreaseBy10(x); 32 | } 33 | 34 | int IncreaseBy10(int x) 35 | { 36 | return x + 10; 37 | } 38 | -------------------------------------------------------------------------------- /Project-2/testcases/test-case-6.c: -------------------------------------------------------------------------------- 1 | int main() 2 | { 3 | int c = 0,d,e,f; 4 | 5 | c = c + 1; // c = 1 6 | d = c * 5; // d = 5 7 | e = d / 4; // e = 1 8 | f = d % 3; // f = 2 9 | f = f / 0; 10 | } 11 | -------------------------------------------------------------------------------- /Project-3/README.md: -------------------------------------------------------------------------------- 1 | # Compiler Design Project-3 2 | 3 | ## Semantic Analyser for a subset of the C language 4 | This code is built upon the parser designed in Project-2 with some modifications to the earlier structure. 5 | 6 | ### Team Members 7 | - Karthik M (15CO221) 8 | - Kaushik S Kalmady (15CO222) 9 | 10 | ### Installation and running 11 | 1. Please make sure you have Lex/Flex and Yacc/Bison installed 12 | 2. Next run `$ chmod +x compile` 13 | 3. `$ ./compile` 14 | 4. To run the parser `$./semantic_analyser test-file.c` 15 | 16 | 17 | ### FEATURES 18 | - Type checking in arithmetic expressions 19 | - Type checking in relational and logical expressions 20 | - Type checking in assignment expressions 21 | - Check of array index out of bound 22 | - Matching type in return statement in function 23 | - Checking matching function parameter type during function call 24 | - Checking number of parameters passed during function call 25 | -------------------------------------------------------------------------------- /Project-3/compile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | lex lexer.l 3 | yacc -d parser.y -v 4 | gcc -w -g y.tab.c -ly -ll -o semantic_analyser 5 | -------------------------------------------------------------------------------- /Project-3/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "y.tab.h" 8 | entry_t** constant_table; 9 | 10 | int cmnt_strt = 0; 11 | 12 | %} 13 | 14 | letter [a-zA-Z] 15 | digit [0-9] 16 | ws [ \t\r\f\v]+ 17 | identifier (_|{letter})({letter}|{digit}|_){0,31} 18 | hex [0-9a-f] 19 | 20 | /* Exclusive states */ 21 | %x CMNT 22 | 23 | %% 24 | /* Keywords*/ 25 | "int" {return INT;} 26 | "char" {return CHAR;} 27 | "float" {return FLOAT;} 28 | "void" {return VOID;} 29 | "long" {return LONG;} 30 | "long long" {return LONG_LONG;} 31 | "short" {return SHORT;} 32 | "signed" {return SIGNED;} 33 | "unsigned" {return UNSIGNED;} 34 | "for" {return FOR;} 35 | "while" {return WHILE;} 36 | "break" {return BREAK;} 37 | "continue" {return CONTINUE;} 38 | "if" {return IF;} 39 | "else" {return ELSE;} 40 | "return" {return RETURN;} 41 | 42 | {identifier} {return IDENTIFIER;} 43 | {ws} ; 44 | [+\-]?[0][x|X]{hex}+[lLuU]? { yylval.entry = insert(constant_table,yytext,(int)strtol(yytext, NULL, 16),INT); return HEX_CONSTANT;} 45 | '({letter}|{digit})' { yylval.entry = insert(constant_table,yytext,yytext[1],CHAR); return CHAR_CONSTANT;} 46 | [+\-]?{digit}+[lLuU]? { yylval.entry = insert(constant_table,yytext,atoi(yytext),INT); return DEC_CONSTANT;} 47 | [+\-]?{digit}*\.{digit}+ { yylval.entry = insert(constant_table,yytext,atof(yytext),FLOAT); return FLOAT_CONSTANT;} 48 | 49 | "/*" {cmnt_strt = yylineno; BEGIN CMNT;} 50 | .|{ws} ; 51 | \n {yylineno++;} 52 | "*/" {BEGIN INITIAL;} 53 | "/*" {printf("Line %3d: Nested comments are not valid!\n",yylineno);} 54 | <> {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();} 55 | 56 | "//".* ; 57 | 58 | \"[^\"\n]*\" { 59 | if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */ 60 | { 61 | yyless(yyleng-1); /* push the quote back if it was escaped */ 62 | yymore(); 63 | } 64 | else 65 | { 66 | insert(constant_table,yytext,INT_MAX,STRING); 67 | return STRING; 68 | } 69 | } 70 | 71 | \"[^\"\n]*$ {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);} 72 | {digit}+({letter}|_)+ {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);} 73 | \n {yylineno++;} 74 | 75 | "--" {return DECREMENT;} 76 | "++" {return INCREMENT;} 77 | "+=" {return ADD_ASSIGN;} 78 | "-=" {return SUB_ASSIGN;} 79 | "*=" {return MUL_ASSIGN;} 80 | "/=" {return DIV_ASSIGN;} 81 | "%=" {return MOD_ASSIGN;} 82 | 83 | 84 | "&&" {return LOGICAL_AND;} 85 | "||" {return LOGICAL_OR;} 86 | "<=" {return LS_EQ;} 87 | ">=" {return GR_EQ;} 88 | "==" {return EQ;} 89 | "!=" {return NOT_EQ;} 90 | 91 | . {return yytext[0];} 92 | 93 | %% 94 | -------------------------------------------------------------------------------- /Project-3/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | int yyerror(char *msg); 5 | 6 | #include "symboltable.h" 7 | #include "lex.yy.c" 8 | 9 | #define SYMBOL_TABLE symbol_table_list[current_scope].symbol_table 10 | 11 | extern entry_t** constant_table; 12 | 13 | int current_dtype; 14 | 15 | table_t symbol_table_list[NUM_TABLES]; 16 | 17 | int is_declaration = 0; 18 | int is_loop = 0; 19 | int is_func = 0; 20 | int func_type; 21 | 22 | int param_list[10]; 23 | int p_idx = 0; 24 | int p=0; 25 | int rhs = 0; 26 | 27 | void type_check(int,int,int); 28 | %} 29 | 30 | %union 31 | { 32 | int data_type; 33 | entry_t* entry; 34 | } 35 | 36 | %token IDENTIFIER 37 | 38 | /* Constants */ 39 | %token DEC_CONSTANT HEX_CONSTANT CHAR_CONSTANT FLOAT_CONSTANT 40 | %token STRING 41 | 42 | /* Logical and Relational operators */ 43 | %token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ 44 | 45 | /* Short hand assignment operators */ 46 | %token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN 47 | %token INCREMENT DECREMENT 48 | 49 | /* Data types */ 50 | %token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST VOID CHAR FLOAT 51 | 52 | /* Keywords */ 53 | %token IF FOR WHILE CONTINUE BREAK RETURN 54 | 55 | %type identifier 56 | %type constant 57 | %type array_index 58 | 59 | %type sub_expr 60 | %type unary_expr 61 | %type arithmetic_expr 62 | %type assignment_expr 63 | %type function_call 64 | %type array_access 65 | %type lhs 66 | 67 | %left ',' 68 | %right '=' 69 | %left LOGICAL_OR 70 | %left LOGICAL_AND 71 | %left EQ NOT_EQ 72 | %left '<' '>' LS_EQ GR_EQ 73 | %left '+' '-' 74 | %left '*' '/' '%' 75 | %right '!' 76 | 77 | 78 | %nonassoc UMINUS 79 | %nonassoc LOWER_THAN_ELSE 80 | %nonassoc ELSE 81 | 82 | 83 | %% 84 | 85 | /* Program is made up of multiple builder blocks. */ 86 | starter: starter builder 87 | | builder; 88 | 89 | /* Each builder block is either a function or a declaration */ 90 | builder: function 91 | | declaration 92 | ; 93 | 94 | /* This is how a function looks like */ 95 | function: type 96 | identifier { 97 | func_type = current_dtype; 98 | is_declaration = 0; 99 | current_scope = create_new_scope(); 100 | } 101 | 102 | '(' argument_list ')' { 103 | is_declaration = 0; 104 | fill_parameter_list($2,param_list,p_idx); 105 | p_idx = 0; 106 | is_func = 1; 107 | p=1; 108 | } 109 | 110 | compound_stmt { 111 | is_func = 0; 112 | } 113 | ; 114 | /* Now we will define a grammar for how types can be specified */ 115 | 116 | type : data_type pointer 117 | {is_declaration = 1; } 118 | | data_type 119 | {is_declaration = 1; } 120 | ; 121 | 122 | pointer: '*' pointer 123 | | '*' 124 | ; 125 | 126 | data_type : sign_specifier type_specifier 127 | | type_specifier 128 | ; 129 | 130 | sign_specifier : SIGNED 131 | | UNSIGNED 132 | ; 133 | 134 | type_specifier :INT {current_dtype = INT;} 135 | |SHORT INT {current_dtype = SHORT;} 136 | |SHORT {current_dtype = SHORT;} 137 | |LONG {current_dtype = LONG;} 138 | |LONG INT {current_dtype = LONG;} 139 | |LONG_LONG {current_dtype = LONG_LONG;} 140 | |LONG_LONG INT {current_dtype = LONG_LONG;} 141 | |CHAR {current_dtype = CHAR;} 142 | |FLOAT {current_dtype = FLOAT;} 143 | |VOID {current_dtype = VOID;} 144 | ; 145 | 146 | /* grammar rules for argument list */ 147 | /* argument list can be empty */ 148 | argument_list : arguments 149 | | 150 | ; 151 | /* arguments are comma separated TYPE ID pairs */ 152 | arguments : arguments ',' arg 153 | | arg 154 | ; 155 | 156 | /* Each arg is a TYPE ID pair */ 157 | arg : type identifier {param_list[p_idx++] = $2->data_type;} 158 | ; 159 | 160 | /* Generic statement. Can be compound or a single statement */ 161 | stmt:compound_stmt 162 | |single_stmt 163 | ; 164 | 165 | /* The function body is covered in braces and has multiple statements. */ 166 | compound_stmt : 167 | '{' { 168 | if(!p)current_scope = create_new_scope(); 169 | else p = 0; 170 | } 171 | 172 | statements 173 | 174 | '}' {current_scope = exit_scope();} 175 | ; 176 | 177 | statements:statements stmt 178 | | 179 | ; 180 | 181 | /* Grammar for what constitutes every individual statement */ 182 | single_stmt :if_block 183 | |for_block 184 | |while_block 185 | |declaration 186 | |function_call ';' 187 | |RETURN ';' { 188 | if(is_func) 189 | { 190 | if(func_type != VOID) 191 | yyerror("return type (VOID) does not match function type"); 192 | } 193 | else yyerror("return statement not inside function definition"); 194 | } 195 | 196 | |CONTINUE ';' {if(!is_loop) {yyerror("Illegal use of continue");}} 197 | |BREAK ';' {if(!is_loop) {yyerror("Illegal use of break");}} 198 | 199 | |RETURN sub_expr ';' { 200 | if(is_func) 201 | { 202 | if(func_type != $2) 203 | yyerror("return type does not match function type"); 204 | } 205 | else yyerror("return statement not in function definition"); 206 | } 207 | ; 208 | 209 | for_block:FOR '(' expression_stmt expression_stmt ')' {is_loop = 1;} stmt {is_loop = 0;} 210 | |FOR '(' expression_stmt expression_stmt expression ')' {is_loop = 1;} stmt {is_loop = 0;} 211 | ; 212 | 213 | if_block:IF '(' expression ')' stmt %prec LOWER_THAN_ELSE 214 | |IF '(' expression ')' stmt ELSE stmt 215 | ; 216 | 217 | while_block: WHILE '(' expression ')' {is_loop = 1;} stmt {is_loop = 0;} 218 | ; 219 | 220 | declaration: type declaration_list ';' 221 | {is_declaration = 0; } 222 | | declaration_list ';' 223 | | unary_expr ';' 224 | 225 | 226 | declaration_list: declaration_list ',' sub_decl 227 | |sub_decl 228 | ; 229 | 230 | sub_decl: assignment_expr 231 | |identifier 232 | |array_access 233 | ; 234 | 235 | /* This is because we can have empty expession statements inside for loops */ 236 | expression_stmt: expression ';' 237 | | ';' 238 | ; 239 | 240 | expression: expression ',' sub_expr 241 | | sub_expr 242 | ; 243 | 244 | sub_expr: 245 | sub_expr '>' sub_expr {type_check($1,$3,2); $$ = $1;} 246 | |sub_expr '<' sub_expr {type_check($1,$3,2); $$ = $1;} 247 | |sub_expr EQ sub_expr {type_check($1,$3,2); $$ = $1;} 248 | |sub_expr NOT_EQ sub_expr {type_check($1,$3,2); $$ = $1;} 249 | |sub_expr LS_EQ sub_expr {type_check($1,$3,2); $$ = $1;} 250 | |sub_expr GR_EQ sub_expr {type_check($1,$3,2); $$ = $1;} 251 | |sub_expr LOGICAL_AND sub_expr {type_check($1,$3,2); $$ = $1;} 252 | |sub_expr LOGICAL_OR sub_expr {type_check($1,$3,2); $$ = $1;} 253 | |'!' sub_expr {$$ = $2;} 254 | |arithmetic_expr {$$ = $1;} 255 | |assignment_expr {$$ = $1;} 256 | |unary_expr {$$ = $1;} 257 | ; 258 | 259 | 260 | assignment_expr : 261 | lhs assign_op arithmetic_expr {type_check($1,$3,1); $$ = $3; rhs=0;} 262 | |lhs assign_op array_access {type_check($1,$3,1); $$ = $3;rhs=0;} 263 | |lhs assign_op function_call {type_check($1,$3,1); $$ = $3;rhs=0;} 264 | |lhs assign_op unary_expr {type_check($1,$3,1); $$ = $3;rhs=0;} 265 | |unary_expr assign_op unary_expr {type_check($1,$3,1); $$ = $3;rhs=0;} 266 | ; 267 | 268 | unary_expr: identifier INCREMENT {$$ = $1->data_type;} 269 | | identifier DECREMENT {$$ = $1->data_type;} 270 | | DECREMENT identifier {$$ = $2->data_type;} 271 | | INCREMENT identifier {$$ = $2->data_type;} 272 | 273 | lhs: identifier {$$ = $1->data_type;} 274 | | array_access {$$ = $1;} 275 | ; 276 | 277 | identifier:IDENTIFIER { 278 | if(is_declaration 279 | && !rhs) 280 | { 281 | $1 = insert(SYMBOL_TABLE,yytext,INT_MAX,current_dtype); 282 | if($1 == NULL) yyerror("Redeclaration of variable"); 283 | } 284 | else 285 | { 286 | $1 = search_recursive(yytext); 287 | if($1 == NULL) yyerror("Variable not declared"); 288 | } 289 | $$ = $1; 290 | } 291 | ; 292 | 293 | assign_op:'=' {rhs=1;} 294 | |ADD_ASSIGN {rhs=1;} 295 | |SUB_ASSIGN {rhs=1;} 296 | |MUL_ASSIGN {rhs=1;} 297 | |DIV_ASSIGN {rhs=1;} 298 | |MOD_ASSIGN {rhs=1;} 299 | ; 300 | 301 | arithmetic_expr: arithmetic_expr '+' arithmetic_expr {type_check($1,$3,0);} 302 | |arithmetic_expr '-' arithmetic_expr {type_check($1,$3,0);} 303 | |arithmetic_expr '*' arithmetic_expr {type_check($1,$3,0);} 304 | |arithmetic_expr '/' arithmetic_expr {type_check($1,$3,0);} 305 | |arithmetic_expr '%' arithmetic_expr {type_check($1,$3,0);} 306 | |'(' arithmetic_expr ')' {$$ = $2;} 307 | |'-' arithmetic_expr %prec UMINUS {$$ = $2;} 308 | |identifier {$$ = $1->data_type;} 309 | |constant {$$ = $1->data_type;} 310 | ; 311 | 312 | constant: DEC_CONSTANT {$1->is_constant=1; $$ = $1;} 313 | | HEX_CONSTANT {$1->is_constant=1; $$ = $1;} 314 | | CHAR_CONSTANT {$1->is_constant=1; $$ = $1;} 315 | | FLOAT_CONSTANT {$1->is_constant=1; $$ = $1;} 316 | ; 317 | 318 | array_access: identifier '[' array_index ']' { 319 | if(is_declaration) 320 | { 321 | if($3->value <= 0) 322 | yyerror("size of array is not positive"); 323 | 324 | else 325 | if($3->is_constant && !rhs) 326 | $1->array_dimension = $3->value; 327 | else if(rhs){ 328 | { 329 | if($3->value > $1->array_dimension) 330 | yyerror("Array index out of bound"); 331 | 332 | if($3->value < 0) 333 | yyerror("Array index cannot be negative"); 334 | } 335 | } 336 | } 337 | 338 | else if($3->is_constant) 339 | { 340 | if($3->value > $1->array_dimension) 341 | yyerror("Array index out of bound"); 342 | 343 | if($3->value < 0) 344 | yyerror("Array index cannot be negative"); 345 | } 346 | $$ = $1->data_type; 347 | } 348 | 349 | array_index: constant {$$ = $1;} 350 | | identifier {$$ = $1;} 351 | ; 352 | 353 | function_call: identifier '(' parameter_list ')' { 354 | $$ = $1->data_type; 355 | check_parameter_list($1,param_list,p_idx); 356 | p_idx = 0; 357 | } 358 | 359 | | identifier '(' ')' { 360 | $$ = $1->data_type; 361 | check_parameter_list($1,param_list,p_idx); 362 | p_idx = 0; 363 | } 364 | ; 365 | 366 | parameter_list: 367 | parameter_list ',' parameter 368 | |parameter 369 | ; 370 | 371 | parameter: sub_expr {param_list[p_idx++] = $1;} 372 | | STRING {param_list[p_idx++] = STRING;} 373 | ; 374 | %% 375 | 376 | void type_check(int left, int right, int flag) 377 | { 378 | if(left != right) 379 | { 380 | switch(flag) 381 | { 382 | case 0: yyerror("Type mismatch in arithmetic expression"); break; 383 | case 1: yyerror("Type mismatch in assignment expression"); break; 384 | case 2: yyerror("Type mismatch in logical expression"); break; 385 | } 386 | } 387 | } 388 | 389 | int main(int argc, char *argv[]) 390 | { 391 | int i; 392 | for(i=0; i 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define HASH_TABLE_SIZE 100 18 | #define NUM_TABLES 10 19 | 20 | int table_index = 0; 21 | int current_scope = 0; 22 | 23 | /* struct to hold each entry */ 24 | struct entry_s 25 | { 26 | char* lexeme; 27 | double value; 28 | int data_type; 29 | int* parameter_list; // for functions 30 | int array_dimension; 31 | int is_constant; 32 | int num_params; 33 | struct entry_s* successor; 34 | }; 35 | 36 | typedef struct entry_s entry_t; 37 | 38 | /* Wrapper for symbol table with pointer to symbol table of parent scope */ 39 | struct table_s 40 | { 41 | entry_t** symbol_table; 42 | int parent; 43 | }; 44 | 45 | typedef struct table_s table_t; 46 | 47 | extern table_t symbol_table_list[NUM_TABLES]; 48 | 49 | /* Create a new hash_table. */ 50 | entry_t** create_table() 51 | { 52 | entry_t** hash_table_ptr = NULL; // declare a pointer 53 | 54 | /* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */ 55 | if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL ) 56 | return NULL; 57 | 58 | int i; 59 | 60 | // Intitialise all entries as NUscopeLL 61 | for( i = 0; i < HASH_TABLE_SIZE; i++ ) 62 | { 63 | hash_table_ptr[i] = NULL; 64 | } 65 | 66 | return hash_table_ptr; 67 | } 68 | 69 | int create_new_scope() 70 | { 71 | table_index++; 72 | 73 | symbol_table_list[table_index].symbol_table = create_table(); 74 | symbol_table_list[table_index].parent = current_scope; 75 | 76 | return table_index; 77 | } 78 | 79 | int exit_scope() 80 | { 81 | return symbol_table_list[current_scope].parent; 82 | } 83 | /* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */ 84 | uint32_t hash( char *lexeme ) 85 | { 86 | size_t i; 87 | uint32_t hash; 88 | 89 | /* Apply jenkin's hash function 90 | * https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time 91 | */ 92 | for ( hash = i = 0; i < strlen(lexeme); ++i ) { 93 | hash += lexeme[i]; 94 | hash += ( hash << 10 ); 95 | hash ^= ( hash >> 6 ); 96 | } 97 | hash += ( hash << 3 ); 98 | hash ^= ( hash >> 11 ); 99 | hash += ( hash << 15 ); 100 | 101 | return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE) 102 | } 103 | 104 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */ 105 | entry_t *create_entry( char *lexeme, int value, int data_type ) 106 | { 107 | entry_t *new_entry; 108 | 109 | /* Allocate space for new_entry */ 110 | if( ( new_entry = malloc( sizeof( entry_t ) ) ) == NULL ) { 111 | return NULL; 112 | } 113 | /* Copy lexeme to new_entry location using strdup (string-duplicate). Return NULL if it fails */ 114 | if( ( new_entry->lexeme = strdup( lexeme ) ) == NULL ) { 115 | return NULL; 116 | } 117 | 118 | new_entry->value = value; 119 | new_entry->successor = NULL; 120 | new_entry->parameter_list = NULL; 121 | new_entry->array_dimension = -1; 122 | new_entry->is_constant = 0; 123 | new_entry->num_params = 0; 124 | new_entry->data_type = data_type; 125 | 126 | return new_entry; 127 | } 128 | 129 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */ 130 | entry_t* search(entry_t** hash_table_ptr, char* lexeme) 131 | { 132 | uint32_t idx = 0; 133 | entry_t* myentry; 134 | 135 | // get the index of this lexeme as per the hash function 136 | idx = hash( lexeme ); 137 | 138 | /* Traverse the linked list at this idx and see if lexeme exists */ 139 | myentry = hash_table_ptr[idx]; 140 | 141 | while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 ) 142 | { 143 | myentry = myentry->successor; 144 | } 145 | 146 | if(myentry == NULL) // lexeme is not found 147 | return NULL; 148 | 149 | else // lexeme found 150 | return myentry; 151 | 152 | } 153 | 154 | // Search recursively in every parent scope for lexeme 155 | entry_t* search_recursive(char* lexeme) 156 | { 157 | int idx = current_scope; 158 | entry_t* finder = NULL; 159 | 160 | while(idx != -1) 161 | { 162 | finder = search(symbol_table_list[idx].symbol_table, lexeme); 163 | 164 | if(finder != NULL) 165 | return finder; 166 | 167 | idx = symbol_table_list[idx].parent; 168 | } 169 | 170 | return finder; 171 | } 172 | /* Insert an entry into a hash table. */ 173 | entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value, int data_type) 174 | { 175 | // Make sure you pass the current scope symbol table here 176 | entry_t* finder = search( hash_table_ptr, lexeme ); 177 | if( finder != NULL) // If lexeme already exists, don't insert, return NULL 178 | { 179 | if(finder->is_constant) 180 | return finder; 181 | return NULL; //capture this is callee code and do necessary error handling 182 | } 183 | 184 | uint32_t idx; 185 | entry_t* new_entry = NULL; 186 | entry_t* head = NULL; 187 | 188 | idx = hash( lexeme ); // Get the index for this lexeme based on the hash function 189 | new_entry = create_entry( lexeme, value, data_type ); // Create an entry using the pair 190 | 191 | if(new_entry == NULL) // In case there was some error while executing create_entry() 192 | { 193 | printf("Insert failed. New entry could not be created."); 194 | exit(1); 195 | } 196 | 197 | head = hash_table_ptr[idx]; // get the head entry at this index 198 | 199 | if(head == NULL) // This is the first lexeme that matches this hash index 200 | { 201 | hash_table_ptr[idx] = new_entry; 202 | } 203 | else // if not, add this entry to the head 204 | { 205 | new_entry->successor = hash_table_ptr[idx]; 206 | hash_table_ptr[idx] = new_entry; 207 | } 208 | return hash_table_ptr[idx]; 209 | } 210 | 211 | // This is called after a function call to check if param list match 212 | int check_parameter_list(entry_t* entry, int* list, int m) 213 | { 214 | int* parameter_list = entry->parameter_list; 215 | 216 | if(m != entry->num_params) 217 | { 218 | yyerror("Number of parameters and arguments do not match"); 219 | } 220 | 221 | int i; 222 | for(i=0; iparameter_list = (int *)malloc(n*sizeof(int)); 234 | 235 | int i; 236 | for(i=0; iparameter_list[i] = list[i]; 239 | } 240 | entry->num_params = n; 241 | } 242 | 243 | 244 | void print_dashes(int n) 245 | { 246 | printf("\n"); 247 | 248 | int i; 249 | for(i=0; i< n; i++) 250 | printf("="); 251 | printf("\n"); 252 | } 253 | 254 | // Traverse the hash table and print all the entries 255 | void display_symbol_table(entry_t** hash_table_ptr) 256 | { 257 | int i; 258 | entry_t* traverser; 259 | 260 | print_dashes(100); 261 | 262 | printf(" %-20s %-20s %-20s %-20s %-20s\n","lexeme","data-type","array_dimension","num_params","param_list"); 263 | 264 | print_dashes(100); 265 | 266 | for( i=0; i < HASH_TABLE_SIZE; i++) 267 | { 268 | traverser = hash_table_ptr[i]; 269 | while( traverser != NULL) 270 | { 271 | printf(" %-20s %-20d %-20d ", traverser->lexeme, traverser->data_type, traverser->array_dimension); 272 | 273 | printf(" %-20d", traverser->num_params); 274 | 275 | int j; 276 | for(j=0; j < traverser->num_params; j++) 277 | printf(" %d",traverser->parameter_list[j]); 278 | printf("\n"); 279 | 280 | traverser = traverser->successor; 281 | } 282 | } 283 | 284 | print_dashes(100); 285 | 286 | } 287 | 288 | void display_constant_table(entry_t** hash_table_ptr) 289 | { 290 | int i; 291 | entry_t* traverser; 292 | 293 | print_dashes(25); 294 | 295 | printf(" %-10s %-10s \n","lexeme","data-type"); 296 | 297 | print_dashes(25); 298 | 299 | for( i=0; i < HASH_TABLE_SIZE; i++) 300 | { 301 | traverser = hash_table_ptr[i]; 302 | while( traverser != NULL) 303 | { 304 | printf(" %-10s %-10d \n", traverser->lexeme, traverser->data_type); 305 | traverser = traverser->successor; 306 | } 307 | } 308 | 309 | print_dashes(25); 310 | } 311 | 312 | void display_all() 313 | { 314 | int i; 315 | for(i=0; i<=table_index; i++) 316 | { 317 | printf("Scope: %d\n",i); 318 | display_symbol_table(symbol_table_list[i].symbol_table); 319 | printf("\n\n"); 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /Project-3/test.c: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | 5 | /* 6 | Karthik M - 15CO221 7 | Kaushik S Kalmady - 15CO222 8 | Compiler Design Project 2 9 | 10 | Test Case 1 11 | - Missing semicolon 12 | - Single quotes for characters and double quotes for strings 13 | 14 | The output in lex/yacc should identify statements without a terminating semiclon and unmatched single and double quotes 15 | */ 16 | 17 | int fun(int a, int b, int c) 18 | { 19 | return 2 ; 20 | } 21 | 22 | int main() 23 | { 24 | 25 | // int arr[-1]; 26 | // int arr1[0]; 27 | // int arr2[-1]; 28 | // break; 29 | // arr[0] = 4; 30 | // arr[-1] = 10; 31 | // arr[11] = 2; 32 | 33 | int x = 1; 34 | 35 | int y; 36 | int z; 37 | 38 | if(x<0) 39 | { 40 | int y; 41 | 42 | int z=1; 43 | 44 | { 45 | int w =1; 46 | } 47 | //w=3; 48 | } 49 | char c='a'; 50 | for(x=2;x 0){ 21 | int x; //This is fine 22 | } 23 | 24 | if(y==1){ 25 | int x; // This is also fine 26 | y = 2; // This too 27 | int z; 28 | } 29 | 30 | /*z = 2; //Variable not declared in scope*/ 31 | 32 | } 33 | -------------------------------------------------------------------------------- /Project-3/testcases/test-case-2.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 3 5 | 6 | Test Case 3 7 | Check for invalid array dimensions 8 | Check for array index out of range 9 | In declaration list with assign statement differentiate lhs and rhs(i.e lhs should be checked for redeclaraion but rhs shouldn't. 10 | In "int a=b, c=d;" a,c have to be checked for redeclarion, b,d have to checked if they have been declared at all.) 11 | */ 12 | 13 | 14 | int main() 15 | { 16 | /*int arr[0]; //Invalid*/ 17 | /*int arr1[-1]; //Invalid*/ 18 | int arr2[10]; 19 | 20 | int x = arr2[5], y = arr2[9]; //Valid 21 | int w = arr2[4.5]; 22 | int z = arr2[11]; 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /Project-3/testcases/test-case-3.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 3 5 | 6 | Test Case 3 7 | LHS and RHS types should match in assignment expressions 8 | Types need to match in arithmetic/relational and logical expressions too 9 | */ 10 | 11 | 12 | int main() 13 | { 14 | int x; 15 | float y; 16 | char z; 17 | 18 | x = 1; //Valid 19 | /*y = 'c'; //Invalid*/ 20 | z ='2'; //valid 21 | 22 | int c = 2; 23 | int d; 24 | d = x + c; //valid 25 | /*d = z + c; // Invalid*/ 26 | 27 | char a; 28 | if(x < c){ 29 | /*a = c; //Invalid*/ 30 | } 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /Project-3/testcases/test-case-4.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 3 5 | 6 | Test Case 4 7 | Invalid use of BREAK and CONTINUE 8 | 9 | */ 10 | 11 | 12 | int main() 13 | { 14 | break; //invalid 15 | 16 | int i; 17 | for(i=1; i<10; i++){ 18 | int x; 19 | continue; 20 | } 21 | /*continue; //invalid*/ 22 | } 23 | -------------------------------------------------------------------------------- /Project-3/testcases/test-case-5.c: -------------------------------------------------------------------------------- 1 | /* 2 | Karthik M - 15CO221 3 | Kaushik S Kalmady - 15CO222 4 | Compiler Design Project 3 5 | 6 | Test Case 5 7 | Check function parameter number and type 8 | 9 | */ 10 | 11 | int fun(int i, char c){ 12 | return 2; 13 | } 14 | 15 | int fun2(int i, char c){ 16 | /*return '2';*/ 17 | } 18 | 19 | int main() 20 | { 21 | fun(2); //Number of parameters dont match 22 | /*fun(2,2); //Parameter types dont match*/ 23 | fun(2,'3'); // Valid 24 | /*fun(2,'3',4); //Invalid*/ 25 | 26 | return 2; 27 | } 28 | -------------------------------------------------------------------------------- /Project-3/testcases/testcases: -------------------------------------------------------------------------------- 1 | /* 2 | -lhs and rhs type not matching 3 | - Arithmetic and logical expression type mismatch 4 | -Return type and function type mismatch 5 | -Array index out of range for constants 6 | -Invalid array dimensions 7 | - Break and continue outside for loop 8 | -Scope 9 | -Parameter type and number checking 10 | 11 | */ 12 | 13 | 14 | -------------------------------------------------------------------------------- /Project-4/ICG.code: -------------------------------------------------------------------------------- 1 | 0: main: 2 | 1: arg argc 3 | 2: i = 0 4 | 3: if i < 10 goto 7 5 | 4: goto 38 6 | 5: i++ 7 | 6: goto 3 8 | 7: x = 120 9 | 8: i = 2 10 | 9: t0 = 2 + 3 11 | 10: if j < t0 goto 12 12 | 11: goto 37 13 | 12: i = 3 14 | 13: t1 = i / 6 15 | 14: t2 = 3 + t1 16 | 15: t3 = t2 + 2 17 | 16: t4 = t3 + 5 18 | 17: j = t4 19 | 18: j = arr[7] 20 | 19: if j < 5 goto 21 21 | 20: goto 23 22 | 21: if i == 3 goto 25 23 | 22: goto 23 24 | 23: if j != 5 goto 25 25 | 24: goto 36 26 | 25: j = 2 27 | 26: if 2 < 4 goto 28 28 | 27: goto 30 29 | 28: goto 37 30 | 29: goto 36 31 | 30: if 2 > 3 goto 32 32 | 31: goto 34 33 | 32: goto 9 34 | 33: goto 36 35 | 34: t5 = 4 + 5 36 | 35: i = t5 37 | 36: goto 9 38 | 37: goto 5 39 | 38: exit -------------------------------------------------------------------------------- /Project-4/README.md: -------------------------------------------------------------------------------- 1 | # Compiler Design Project-4 2 | 3 | ## Intermediate Code Generation for a subset of the C language 4 | This code is built upon the semantic analyser designed in Project-3 with some modifications to the earlier structure. 5 | 6 | ### Team Members 7 | - Karthik M (15CO221) 8 | - Kaushik S Kalmady (15CO222) 9 | 10 | ### Installation and running 11 | 1. Please make sure you have Lex/Flex and Yacc/Bison installed 12 | 2. Next run `$ chmod +x compile` 13 | 3. `$ ./compile` 14 | 4. To run the parser `$./icg test-file.c` 15 | 16 | 17 | ### FEATURES 18 | - Backward compatibility with Semantic Analyser from Project-3 19 | - Code generation for arithmetic expressions 20 | - Backpatching for `if-else` statements and `nested if-else` 21 | - Backpatching for `while` and `for` loops 22 | - Array indexing 23 | - Backpatching for logical amd relational expressions 24 | - Jumps for `break` and `continue` -------------------------------------------------------------------------------- /Project-4/compile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | lex lexer.l 3 | yacc -d parser.y -v 4 | g++ -std=c++11 -g y.tab.c -ly -ll -o icg 5 | -------------------------------------------------------------------------------- /Project-4/icg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaushiksk/mini-c-compiler/f55e6a84a2e82a2a88957c41f626b0a1954c232d/Project-4/icg -------------------------------------------------------------------------------- /Project-4/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "y.tab.h" 8 | 9 | entry_t** constant_table; 10 | 11 | int cmnt_strt = 0; 12 | 13 | %} 14 | 15 | letter [a-zA-Z] 16 | digit [0-9] 17 | ws [ \t\r\f\v]+ 18 | identifier (_|{letter})({letter}|{digit}|_){0,31} 19 | hex [0-9a-f] 20 | 21 | /* Exclusive states */ 22 | %x CMNT 23 | 24 | %% 25 | /* Keywords*/ 26 | "int" {return INT;} 27 | "char" {return CHAR;} 28 | "char*" {return CHAR_STAR;} 29 | "float" {return FLOAT;} 30 | "void" {return VOID;} 31 | "long" {return LONG;} 32 | "long long" {return LONG_LONG;} 33 | "short" {return SHORT;} 34 | "signed" {return SIGNED;} 35 | "unsigned" {return UNSIGNED;} 36 | "for" {return FOR;} 37 | "while" {return WHILE;} 38 | "break" {return BREAK;} 39 | "continue" {return CONTINUE;} 40 | "if" {return IF;} 41 | "else" {return ELSE;} 42 | "return" {return RETURN;} 43 | 44 | {identifier} {return IDENTIFIER;} 45 | {ws} ; 46 | [+\-]?[0][x|X]{hex}+[lLuU]? { yylval.entry = insert(constant_table,yytext,(int)strtol(yytext, NULL, 16),INT); return HEX_CONSTANT;} 47 | '({letter}|{digit})' { yylval.entry = insert(constant_table,yytext,yytext[1],CHAR); return CHAR_CONSTANT;} 48 | [+\-]?{digit}+[lLuU]? { yylval.entry = insert(constant_table,yytext,atoi(yytext),INT); return DEC_CONSTANT;} 49 | [+\-]?{digit}*\.{digit}+ { yylval.entry = insert(constant_table,yytext,atof(yytext),FLOAT); return FLOAT_CONSTANT;} 50 | 51 | "/*" {cmnt_strt = yylineno; BEGIN CMNT;} 52 | .|{ws} ; 53 | \n {yylineno++;} 54 | "*/" {BEGIN INITIAL;} 55 | "/*" {printf("Line %3d: Nested comments are not valid!\n",yylineno);} 56 | <> {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();} 57 | 58 | "//".* ; 59 | 60 | \"[^\"\n]*\" { 61 | if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */ 62 | { 63 | yyless(yyleng-1); /* push the quote back if it was escaped */ 64 | yymore(); 65 | } 66 | else 67 | { 68 | yylval.entry = insert(constant_table,yytext,INT_MAX,STRING); 69 | return STRING; 70 | } 71 | } 72 | 73 | \"[^\"\n]*$ {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);} 74 | {digit}+({letter}|_)+ {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);} 75 | \n {yylineno++;} 76 | 77 | "--" {return DECREMENT;} 78 | "++" {return INCREMENT;} 79 | "+=" {return ADD_ASSIGN;} 80 | "-=" {return SUB_ASSIGN;} 81 | "*=" {return MUL_ASSIGN;} 82 | "/=" {return DIV_ASSIGN;} 83 | "%=" {return MOD_ASSIGN;} 84 | 85 | 86 | "&&" {return LOGICAL_AND;} 87 | "||" {return LOGICAL_OR;} 88 | "<=" {return LS_EQ;} 89 | ">=" {return GR_EQ;} 90 | "==" {return EQ;} 91 | "!=" {return NOT_EQ;} 92 | 93 | . {return yytext[0];} 94 | 95 | %% 96 | -------------------------------------------------------------------------------- /Project-4/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include "symboltable.h" 4 | #include "lex.yy.c" 5 | 6 | using namespace std; 7 | 8 | int yyerror(char *msg); 9 | 10 | #define SYMBOL_TABLE symbol_table_list[current_scope].symbol_table 11 | 12 | extern entry_t** constant_table; 13 | 14 | int current_dtype; 15 | 16 | table_t symbol_table_list[NUM_TABLES]; 17 | 18 | int is_declaration = 0; 19 | int is_loop = 0; 20 | int is_func = 0; 21 | int func_type; 22 | 23 | int param_list[10]; 24 | int p_idx = 0; 25 | int p=0; 26 | int rhs = 0; 27 | 28 | void type_check(int,int,int); 29 | vector merge(vector& v1, vector& v2); 30 | void backpatch(vector&, int); 31 | void gencode(string); 32 | void gencode_math(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op); 33 | void gencode_rel(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op); 34 | void printlist(vector); 35 | 36 | int nextinstr = 0; 37 | int temp_var_number = 0; 38 | 39 | vector ICG; 40 | 41 | %} 42 | 43 | %union 44 | { 45 | int data_type; 46 | entry_t* entry; 47 | content_t* content; 48 | string* op; 49 | vector* nextlist; 50 | int instr; 51 | } 52 | 53 | %token IDENTIFIER 54 | 55 | /* Constants */ 56 | %token DEC_CONSTANT HEX_CONSTANT CHAR_CONSTANT FLOAT_CONSTANT STRING 57 | 58 | /* Logical and Relational operators */ 59 | %token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ 60 | 61 | /* Short hand assignment operators */ 62 | %token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN 63 | %token INCREMENT DECREMENT 64 | 65 | /* Data types */ 66 | %token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST VOID CHAR FLOAT CHAR_STAR 67 | 68 | /* Keywords */ 69 | %token IF FOR WHILE CONTINUE BREAK RETURN 70 | 71 | %type identifier 72 | %type constant 73 | %type array_index 74 | 75 | %type assign; 76 | %type function_call 77 | 78 | %type lhs 79 | %type sub_expr 80 | %type expression 81 | %type expression_stmt 82 | %type unary_expr 83 | %type arithmetic_expr 84 | %type assignment_expr 85 | %type array_access 86 | 87 | %type if_block 88 | %type for_block 89 | %type while_block 90 | %type compound_stmt 91 | 92 | %type statements 93 | %type single_stmt 94 | %type stmt 95 | 96 | 97 | %type M 98 | %type N 99 | 100 | %left ',' 101 | %right '=' 102 | %left LOGICAL_OR 103 | %left LOGICAL_AND 104 | %left EQ NOT_EQ 105 | %left '<' '>' LS_EQ GR_EQ 106 | %left '+' '-' 107 | %left '*' '/' '%' 108 | %right '!' 109 | 110 | 111 | %nonassoc UMINUS 112 | %nonassoc LOWER_THAN_ELSE 113 | %nonassoc ELSE 114 | 115 | 116 | %% 117 | 118 | /* Program is made up of multiple builder blocks. */ 119 | starter: starter builder 120 | | builder; 121 | 122 | /* Each builder block is either a function or a declaration */ 123 | builder: function 124 | | declaration 125 | ; 126 | 127 | /* This is how a function looks like */ 128 | function: type identifier 129 | { 130 | func_type = current_dtype; 131 | is_declaration = 0; 132 | current_scope = create_new_scope(); 133 | gencode($2->lexeme + string(":")); 134 | } 135 | 136 | '(' argument_list ')' 137 | { 138 | is_declaration = 0; 139 | fill_parameter_list($2,param_list,p_idx); 140 | p_idx = 0; 141 | is_func = 1; 142 | p=1; 143 | } 144 | 145 | compound_stmt { is_func = 0; } 146 | 147 | ; 148 | 149 | /* Now we will define a grammar for how types can be specified */ 150 | 151 | type : data_type pointer {is_declaration = 1; } 152 | | data_type {is_declaration = 1; } 153 | ; 154 | 155 | pointer: '*' pointer 156 | | '*' 157 | ; 158 | 159 | data_type : sign_specifier type_specifier 160 | | type_specifier 161 | ; 162 | 163 | sign_specifier : SIGNED 164 | | UNSIGNED 165 | ; 166 | 167 | type_specifier :INT {current_dtype = INT;} 168 | |SHORT INT {current_dtype = SHORT;} 169 | |SHORT {current_dtype = SHORT;} 170 | |LONG {current_dtype = LONG;} 171 | |LONG INT {current_dtype = LONG;} 172 | |LONG_LONG {current_dtype = LONG_LONG;} 173 | |LONG_LONG INT {current_dtype = LONG_LONG;} 174 | |CHAR {current_dtype = CHAR;} 175 | |FLOAT {current_dtype = FLOAT;} 176 | |VOID {current_dtype = VOID;} 177 | |CHAR_STAR {current_dtype = STRING;} 178 | ; 179 | 180 | /* grammar rules for argument list */ 181 | /* argument list can be empty */ 182 | argument_list : arguments 183 | | 184 | ; 185 | /* arguments are comma separated TYPE ID pairs */ 186 | arguments : arguments ',' arg 187 | | arg 188 | ; 189 | 190 | /* Each arg is a TYPE ID pair */ 191 | arg : type identifier { 192 | param_list[p_idx++] = $2->data_type; 193 | gencode(string("arg ") + $2->lexeme); 194 | } 195 | ; 196 | 197 | /* Generic statement. Can be compound or a single statement */ 198 | stmt:compound_stmt {$$ = new content_t(); $$=$1;} 199 | |single_stmt {$$ = new content_t(); $$=$1;} 200 | ; 201 | 202 | /* The function body is covered in braces and has multiple statements. */ 203 | compound_stmt : 204 | '{' 205 | 206 | { 207 | if(!p)current_scope = create_new_scope(); 208 | else p = 0; 209 | } 210 | 211 | statements 212 | 213 | '}' 214 | 215 | { 216 | current_scope = exit_scope(); 217 | $$ = new content_t(); 218 | $$ = $3; 219 | } 220 | ; 221 | 222 | statements:statements M stmt { 223 | backpatch($1->nextlist,$2); 224 | $$ = new content_t(); 225 | $$->nextlist = $3->nextlist; 226 | $$->breaklist = merge($1->breaklist,$3->breaklist); 227 | $$->continuelist = merge($1->continuelist,$3->continuelist); 228 | } 229 | 230 | | { $$ = new content_t(); } 231 | ; 232 | 233 | /* Grammar for what constitutes every individual statement */ 234 | single_stmt :if_block { 235 | $$ = new content_t(); 236 | $$ = $1; 237 | backpatch($$->nextlist, nextinstr); 238 | } 239 | 240 | |for_block { 241 | $$ = new content_t(); 242 | $$ = $1; 243 | backpatch($$->nextlist, nextinstr); 244 | } 245 | 246 | |while_block { 247 | $$ = new content_t(); 248 | $$ = $1; 249 | backpatch($$->nextlist, nextinstr); 250 | } 251 | |declaration {$$ = new content_t();} 252 | |function_call ';' {$$ = new content_t();} 253 | |RETURN ';' { 254 | if(is_func) 255 | { 256 | if(func_type != VOID) 257 | yyerror("return type (VOID) does not match function type"); 258 | } 259 | else yyerror("return statement not inside function definition"); 260 | } 261 | 262 | |CONTINUE ';' { 263 | if(!is_loop) 264 | yyerror("Illegal use of continue"); 265 | $$ = new content_t(); 266 | $$->continuelist = {nextinstr}; 267 | gencode("goto _"); 268 | } 269 | 270 | |BREAK ';' { 271 | if(!is_loop) {yyerror("Illegal use of break");} 272 | $$ = new content_t(); 273 | $$->breaklist = {nextinstr}; 274 | gencode("goto _"); 275 | } 276 | 277 | |RETURN sub_expr ';' 278 | { 279 | if(is_func) 280 | { 281 | if(func_type != $2->data_type) 282 | yyerror("return type does not match function type"); 283 | } 284 | else yyerror("return statement not in function definition"); 285 | } 286 | ; 287 | 288 | for_block: FOR '(' expression_stmt M expression_stmt M expression ')' {is_loop = 1;} N M stmt {is_loop = 0;} 289 | { 290 | backpatch($5->truelist,$11); 291 | backpatch($12->nextlist,$6); 292 | backpatch($12->continuelist, $6); 293 | backpatch($10->nextlist, $4); 294 | $$ = new content_t(); 295 | $$->nextlist = merge($5->falselist,$12->breaklist); 296 | gencode(string("goto ") + to_string($6)); 297 | } 298 | 299 | ; 300 | 301 | if_block:IF '(' expression ')' M stmt %prec LOWER_THAN_ELSE 302 | { 303 | backpatch($3->truelist,$5); 304 | $$ = new content_t(); 305 | $$->nextlist = merge($3->falselist,$6->nextlist); 306 | $$->breaklist = $6->breaklist; 307 | $$->continuelist = $6->continuelist; 308 | } 309 | 310 | |IF '(' expression ')' M stmt ELSE N M stmt 311 | { 312 | backpatch($3->truelist,$5); 313 | backpatch($3->falselist,$9); 314 | 315 | $$ = new content_t(); 316 | vector temp = merge($6->nextlist,$8->nextlist); 317 | $$->nextlist = merge(temp,$10->nextlist); 318 | $$->breaklist = merge($10->breaklist,$6->breaklist); 319 | $$->continuelist = merge($10->continuelist,$6->continuelist); 320 | } 321 | ; 322 | 323 | while_block: WHILE M '(' expression ')' M {is_loop = 1;} stmt {is_loop = 0;} 324 | { 325 | backpatch($8->nextlist,$2); 326 | backpatch($4->truelist,$6); 327 | backpatch($8->continuelist, $2); 328 | $$ = new content_t(); 329 | $$->nextlist = merge($4->falselist,$8->breaklist); 330 | gencode(string("goto ") + to_string($2)); 331 | } 332 | ; 333 | 334 | declaration: type declaration_list ';' {is_declaration = 0;} 335 | | declaration_list ';' 336 | | unary_expr ';' 337 | 338 | 339 | declaration_list: declaration_list ',' sub_decl 340 | |sub_decl 341 | ; 342 | 343 | sub_decl: assignment_expr 344 | |identifier 345 | |array_access 346 | ; 347 | 348 | /* This is because we can have empty expession statements inside for loops */ 349 | expression_stmt: expression ';' 350 | { 351 | $$ = new content_t(); 352 | $$->truelist = $1->truelist; 353 | $$->falselist = $1->falselist; 354 | } 355 | 356 | | ';' { $$ = new content_t(); } 357 | ; 358 | 359 | expression: expression ',' sub_expr 360 | { 361 | $$ = new content_t(); 362 | $$->truelist = $3->truelist; 363 | $$->falselist = $3->falselist; 364 | } 365 | | sub_expr 366 | { 367 | $$ = new content_t(); 368 | $$->truelist = $1->truelist; 369 | $$->falselist = $1->falselist; 370 | } 371 | ; 372 | 373 | sub_expr: 374 | 375 | sub_expr '>' sub_expr 376 | { 377 | type_check($1->data_type,$3->data_type,2); 378 | $$ = new content_t(); 379 | gencode_rel($$, $1, $3, string(" > ")); 380 | } 381 | | sub_expr '<' sub_expr 382 | { 383 | type_check($1->data_type,$3->data_type,2); 384 | $$ = new content_t(); 385 | gencode_rel($$, $1, $3, string(" < ")); 386 | } 387 | 388 | | sub_expr EQ sub_expr 389 | { 390 | type_check($1->data_type,$3->data_type,2); 391 | $$ = new content_t(); 392 | gencode_rel($$, $1, $3, string(" == ")); 393 | } 394 | 395 | | sub_expr NOT_EQ sub_expr 396 | { 397 | type_check($1->data_type,$3->data_type,2); 398 | $$ = new content_t(); 399 | gencode_rel($$, $1, $3, string(" != ")); 400 | } 401 | 402 | | sub_expr GR_EQ sub_expr 403 | { 404 | type_check($1->data_type,$3->data_type,2); 405 | $$ = new content_t(); 406 | gencode_rel($$, $1, $3, string(" >= ")); 407 | } 408 | 409 | | sub_expr LS_EQ sub_expr 410 | { 411 | type_check($1->data_type,$3->data_type,2); 412 | $$ = new content_t(); 413 | gencode_rel($$, $1, $3, string(" <= ")); 414 | } 415 | 416 | |sub_expr LOGICAL_AND M sub_expr 417 | { 418 | type_check($1->data_type,$4->data_type,2); 419 | $$ = new content_t(); 420 | $$->data_type = $1->data_type; 421 | backpatch($1->truelist,$3); 422 | $$->truelist = $4->truelist; 423 | $$->falselist = merge($1->falselist,$4->falselist); 424 | } 425 | 426 | |sub_expr LOGICAL_OR M sub_expr 427 | { 428 | type_check($1->data_type,$4->data_type,2); 429 | $$ = new content_t(); 430 | $$->data_type = $1->data_type; 431 | backpatch($1->falselist,$3); 432 | $$->truelist = merge($1->truelist,$4->truelist); 433 | $$->falselist = $4->falselist; 434 | } 435 | 436 | |'!' sub_expr 437 | { 438 | $$ = new content_t(); 439 | $$->data_type = $2->data_type; 440 | $$->truelist = $2->falselist; 441 | $$->falselist = $2->truelist; 442 | } 443 | 444 | |arithmetic_expr 445 | { 446 | $$ = new content_t(); 447 | $$->data_type = $1->data_type; 448 | $$->addr = $1->addr; 449 | } 450 | |assignment_expr 451 | { 452 | $$ = new content_t(); 453 | $$->data_type = $1->data_type; 454 | } 455 | |unary_expr 456 | { 457 | $$ = new content_t(); 458 | $$->data_type = $1->data_type; 459 | } 460 | ; 461 | 462 | assignment_expr : 463 | lhs assign arithmetic_expr 464 | { 465 | type_check($1->entry->data_type,$3->data_type,1); 466 | $$ = new content_t(); 467 | $$->data_type = $3->data_type; 468 | $$->code = $1->entry->lexeme + *$2 + $3->addr; 469 | gencode($$->code); 470 | rhs = 0; 471 | } 472 | 473 | |lhs assign array_access 474 | { 475 | type_check($1->entry->data_type,$3->data_type,1); 476 | $$ = new content_t(); 477 | $$->data_type = $3->data_type; 478 | $$->code = $1->entry->lexeme + *$2 + $3->code; 479 | gencode($$->code); 480 | rhs = 0; 481 | } 482 | 483 | |lhs assign function_call 484 | { 485 | type_check($1->entry->data_type,$3,1); 486 | $$ = new content_t(); 487 | $$->data_type = $3; 488 | } 489 | 490 | |lhs assign unary_expr 491 | { 492 | type_check($1->entry->data_type,$3->data_type,1); 493 | $$ = new content_t(); 494 | $$->data_type = $3->data_type; 495 | $$->code = $1->entry->lexeme + *$2 + $3->code; 496 | gencode($$->code); 497 | rhs = 0; 498 | } 499 | 500 | |unary_expr assign unary_expr 501 | { 502 | type_check($1->data_type,$3->data_type,1); 503 | $$ = new content_t(); 504 | $$->data_type = $3->data_type; 505 | $$->code = $1->code + *$2 + $3->code; 506 | gencode($$->code); 507 | rhs = 0; 508 | } 509 | ; 510 | 511 | unary_expr: 512 | identifier INCREMENT 513 | { 514 | $$ = new content_t(); 515 | $$->data_type = $1->data_type; 516 | $$->code = string($1->lexeme) + string("++"); 517 | gencode($$->code); 518 | } 519 | 520 | | identifier DECREMENT 521 | { 522 | $$ = new content_t(); 523 | $$->data_type = $1->data_type; 524 | $$->code = string($1->lexeme) + string("--"); 525 | gencode($$->code); 526 | } 527 | 528 | | DECREMENT identifier 529 | { 530 | $$ = new content_t(); 531 | $$->data_type = $2->data_type; 532 | $$->code = string("--") + string($2->lexeme); 533 | gencode($$->code); 534 | } 535 | 536 | | INCREMENT identifier 537 | { 538 | $$ = new content_t(); 539 | $$->data_type = $2->data_type; 540 | $$->code = string("++") + string($2->lexeme); 541 | gencode($$->code); 542 | } 543 | 544 | lhs: identifier {$$ = new content_t(); $$->entry = $1;} 545 | | array_access {$$ = new content_t(); $$->code = $1->code;} 546 | ; 547 | 548 | identifier:IDENTIFIER 549 | { 550 | if(is_declaration && !rhs) 551 | { 552 | $1 = insert(SYMBOL_TABLE,yytext,INT_MAX,current_dtype); 553 | if($1 == NULL) 554 | yyerror("Redeclaration of variable"); 555 | } 556 | else 557 | { 558 | $1 = search_recursive(yytext); 559 | if($1 == NULL) 560 | yyerror("Variable not declared"); 561 | } 562 | 563 | $$ = $1; 564 | } 565 | ; 566 | 567 | assign:'=' {rhs=1; $$ = new string(" = ");} 568 | |ADD_ASSIGN {rhs=1; $$ = new string(" += ");} 569 | |SUB_ASSIGN {rhs=1; $$ = new string(" -= ");} 570 | |MUL_ASSIGN {rhs=1; $$ = new string(" *= ");} 571 | |DIV_ASSIGN {rhs=1; $$ = new string(" /= ");} 572 | |MOD_ASSIGN {rhs=1; $$ = new string(" %= ");} 573 | ; 574 | 575 | arithmetic_expr: arithmetic_expr '+' arithmetic_expr 576 | { 577 | type_check($1->data_type,$3->data_type,0); 578 | $$ = new content_t(); 579 | $$->data_type = $1->data_type; 580 | gencode_math($$, $1, $3, string(" + ")); 581 | } 582 | 583 | | arithmetic_expr '-' arithmetic_expr 584 | { 585 | type_check($1->data_type,$3->data_type,0); 586 | $$ = new content_t(); 587 | $$->data_type = $1->data_type; 588 | gencode_math($$, $1, $3, string(" - ")); 589 | } 590 | 591 | | arithmetic_expr '*' arithmetic_expr 592 | { 593 | type_check($1->data_type,$3->data_type,0); 594 | $$ = new content_t(); 595 | $$->data_type = $1->data_type; 596 | gencode_math($$, $1, $3, string(" * ")); 597 | } 598 | 599 | | arithmetic_expr '/' arithmetic_expr 600 | { 601 | type_check($1->data_type,$3->data_type,0); 602 | $$ = new content_t(); 603 | $$->data_type = $1->data_type; 604 | gencode_math($$, $1, $3, string(" / ")); 605 | } 606 | 607 | | arithmetic_expr '%' arithmetic_expr 608 | { 609 | type_check($1->data_type,$3->data_type,0); 610 | $$ = new content_t(); 611 | $$->data_type = $1->data_type; 612 | gencode_math($$, $1, $3, string(" % ")); 613 | } 614 | 615 | |'(' arithmetic_expr ')' 616 | { 617 | $$ = new content_t(); 618 | $$->data_type = $2->data_type; 619 | $$->addr = $2->addr; 620 | $$->code = $2->code; 621 | } 622 | 623 | |'-' arithmetic_expr %prec UMINUS 624 | { 625 | $$ = new content_t(); 626 | $$->data_type = $2->data_type; 627 | $$->addr = "t" + to_string(temp_var_number); 628 | std::string expr = $$->addr + " = " + "minus " + $2->addr; 629 | $$->code = $2->code + expr; 630 | temp_var_number++; 631 | } 632 | 633 | |identifier 634 | { 635 | $$ = new content_t(); 636 | $$->data_type = $1->data_type; 637 | $$->addr = $1->lexeme; 638 | } 639 | 640 | |constant 641 | { 642 | $$ = new content_t(); 643 | $$->data_type = $1->data_type; 644 | $$->addr = to_string($1->value); 645 | } 646 | ; 647 | 648 | constant: DEC_CONSTANT {$1->is_constant=1; $$ = $1;} 649 | | HEX_CONSTANT {$1->is_constant=1; $$ = $1;} 650 | | CHAR_CONSTANT {$1->is_constant=1; $$ = $1;} 651 | | FLOAT_CONSTANT {$1->is_constant=1; $$ = $1;} 652 | ; 653 | 654 | array_access: identifier '[' array_index ']' 655 | { 656 | if(is_declaration) 657 | { 658 | if($3->value <= 0) 659 | yyerror("size of array is not positive"); 660 | else if($3->is_constant) 661 | $1->array_dimension = $3->value; 662 | } 663 | else if($3->is_constant) 664 | { 665 | if($3->value > $1->array_dimension) 666 | yyerror("Array index out of bound"); 667 | if($3->value < 0) 668 | yyerror("Array index cannot be negative"); 669 | } 670 | 671 | $$ = new content_t(); 672 | $$->data_type = $1->data_type; 673 | 674 | if($3->is_constant) 675 | $$->code = string($1->lexeme) + string("[") + to_string($3->value) + string("]"); 676 | else 677 | $$->code = string($1->lexeme) + string("[") + string($3->lexeme) + string("]"); 678 | $$->entry = $1; 679 | } 680 | 681 | array_index: constant {$$ = $1;} 682 | | identifier {$$ = $1;} 683 | ; 684 | 685 | function_call: identifier '(' parameter_list ')' 686 | { 687 | $$ = $1->data_type; 688 | check_parameter_list($1,param_list,p_idx); 689 | p_idx = 0; 690 | gencode(string("call ") + $1->lexeme); 691 | } 692 | 693 | | identifier '(' ')' 694 | { 695 | $$ = $1->data_type; 696 | check_parameter_list($1,param_list,p_idx); 697 | p_idx = 0; 698 | gencode(string("call ") + $1->lexeme); 699 | } 700 | ; 701 | 702 | parameter_list: 703 | parameter_list ',' parameter 704 | |parameter 705 | ; 706 | 707 | parameter: sub_expr 708 | { 709 | param_list[p_idx++] = $1->data_type; 710 | gencode(string("param ") + $1->addr); 711 | } 712 | | STRING 713 | { 714 | param_list[p_idx++] = STRING; 715 | gencode(string("param ") + $1->lexeme); 716 | } 717 | ; 718 | 719 | M: {$$ = nextinstr;} 720 | ; 721 | 722 | N: { 723 | $$ = new content_t; 724 | $$->nextlist = {nextinstr}; 725 | gencode("goto _"); 726 | } 727 | ; 728 | 729 | %% 730 | 731 | void gencode(string x) 732 | { 733 | std::string instruction; 734 | 735 | instruction = to_string(nextinstr) + string(": ") + x; 736 | ICG.push_back(instruction); 737 | nextinstr++; 738 | } 739 | 740 | 741 | void gencode_rel(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op) 742 | { 743 | lhs->data_type = arg1->data_type; 744 | 745 | lhs->truelist = {nextinstr}; 746 | lhs->falselist = {nextinstr + 1}; 747 | 748 | std::string code; 749 | 750 | code = string("if ") + arg1->addr + op + arg2->addr + string(" goto _"); 751 | gencode(code); 752 | 753 | code = string("goto _"); 754 | gencode(code); 755 | } 756 | 757 | void gencode_math(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op) 758 | { 759 | lhs->addr = "t" + to_string(temp_var_number); 760 | std::string expr = lhs->addr + string(" = ") + arg1->addr + op + arg2->addr; 761 | lhs->code = arg1->code + arg2->code + expr; 762 | 763 | temp_var_number++; 764 | 765 | gencode(expr); 766 | } 767 | 768 | void backpatch(vector& v1, int number) 769 | { 770 | for(int i = 0; i merge(vector& v1, vector& v2) 783 | { 784 | vector concat; 785 | concat.reserve(v1.size() + v2.size()); 786 | concat.insert(concat.end(), v1.begin(), v1.end()); 787 | concat.insert(concat.end(), v2.begin(), v2.end()); 788 | 789 | return concat; 790 | } 791 | 792 | void type_check(int left, int right, int flag) 793 | { 794 | if(left != right) 795 | { 796 | switch(flag) 797 | { 798 | case 0: yyerror("Type mismatch in arithmetic expression"); break; 799 | case 1: yyerror("Type mismatch in assignment expression"); break; 800 | case 2: yyerror("Type mismatch in logical expression"); break; 801 | } 802 | } 803 | } 804 | 805 | void displayICG() 806 | { 807 | ofstream outfile("ICG.code"); 808 | 809 | for(int i=0; i v){ 818 | for(auto it:v) 819 | cout< 12 | #include 13 | #include 14 | #include 15 | #include 16 | // #include 17 | // #include 18 | 19 | int yyerror(const char *msg); 20 | 21 | using namespace std; 22 | 23 | #define HASH_TABLE_SIZE 100 24 | #define NUM_TABLES 10 25 | 26 | int table_index = 0; 27 | int current_scope = 0; 28 | 29 | /* struct to hold each entry */ 30 | struct entry_t 31 | { 32 | char* lexeme; 33 | int value; 34 | int data_type; 35 | int* parameter_list; // for functions 36 | int array_dimension; 37 | int is_constant; 38 | int num_params; 39 | struct entry_t* successor; 40 | }; 41 | 42 | //typedef struct entry_t entry_t; 43 | 44 | /* Wrapper for symbol table with pointer to symbol table of parent scope */ 45 | struct table_t 46 | { 47 | entry_t** symbol_table; 48 | int parent; 49 | }; 50 | 51 | //typedef struct table_s table_t; 52 | 53 | struct content_t 54 | { 55 | vector truelist; 56 | vector falselist; 57 | vector nextlist; 58 | vector breaklist; 59 | vector continuelist; 60 | string addr; 61 | string code; 62 | 63 | entry_t* entry; 64 | int data_type; 65 | }; 66 | //typedef struct content_s content_t; 67 | 68 | extern table_t symbol_table_list[NUM_TABLES]; 69 | 70 | void display_symbol_table(entry_t** hash_table_ptr); 71 | /* Create a new hash_table. */ 72 | entry_t** create_table() 73 | { 74 | entry_t** hash_table_ptr = NULL; // declare a pointer 75 | 76 | /* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */ 77 | if( ( hash_table_ptr = (entry_t**) malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL ) 78 | return NULL; 79 | 80 | int i; 81 | 82 | // Intitialise all entries as NUscopeLL 83 | for( i = 0; i < HASH_TABLE_SIZE; i++ ) 84 | { 85 | hash_table_ptr[i] = NULL; 86 | } 87 | 88 | return hash_table_ptr; 89 | } 90 | 91 | int create_new_scope() 92 | { 93 | table_index++; 94 | // printf("Table index:%d\n",table_index); 95 | symbol_table_list[table_index].symbol_table = create_table(); 96 | symbol_table_list[table_index].parent = current_scope; 97 | 98 | return table_index; 99 | } 100 | 101 | int exit_scope() 102 | { 103 | return symbol_table_list[current_scope].parent; 104 | } 105 | /* Generate myhash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */ 106 | int myhash( char *lexeme ) 107 | { 108 | size_t i; 109 | int hashvalue; 110 | 111 | /* Apply jenkin's myhash function 112 | * https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time 113 | */ 114 | 115 | for ( hashvalue = i = 0; i < strlen(lexeme); ++i ) { 116 | hashvalue += lexeme[i]; 117 | hashvalue += ( hashvalue << 10 ); 118 | hashvalue ^= ( hashvalue >> 6 ); 119 | } 120 | hashvalue += ( hashvalue << 3 ); 121 | hashvalue ^= ( hashvalue >> 11 ); 122 | hashvalue += ( hashvalue << 15 ); 123 | //cout<<(hashvalue% HASH_TABLE_SIZE + HASH_TABLE_SIZE) % HASH_TABLE_SIZE<lexeme = strdup( lexeme ) ) == NULL ) { 138 | return NULL; 139 | } 140 | 141 | new_entry->value = value; 142 | new_entry->successor = NULL; 143 | new_entry->parameter_list = NULL; 144 | new_entry->array_dimension = -1; 145 | new_entry->is_constant = 0; 146 | new_entry->num_params = 0; 147 | new_entry->data_type = data_type; 148 | 149 | return new_entry; 150 | } 151 | 152 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */ 153 | entry_t* search(entry_t** hash_table_ptr, char* lexeme) 154 | { 155 | int idx = 0; 156 | entry_t* myentry; 157 | 158 | // get the index of this lexeme as per the myhash function 159 | idx = myhash( lexeme ); 160 | 161 | /* Traverse the linked list at this idx and see if lexeme exists */ 162 | myentry = hash_table_ptr[idx]; 163 | 164 | while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 ) 165 | { 166 | myentry = myentry->successor; 167 | } 168 | 169 | if(myentry == NULL) // lexeme is not found 170 | return NULL; 171 | 172 | else // lexeme found 173 | return myentry; 174 | 175 | } 176 | 177 | // Search recursively in every parent scope for lexeme 178 | entry_t* search_recursive(char* lexeme) 179 | { 180 | int idx = current_scope; 181 | entry_t* finder = NULL; 182 | 183 | while(idx != -1) 184 | { 185 | finder = search(symbol_table_list[idx].symbol_table, lexeme); 186 | 187 | if(finder != NULL) 188 | return finder; 189 | 190 | idx = symbol_table_list[idx].parent; 191 | } 192 | 193 | return finder; 194 | } 195 | /* Insert an entry into a myhash table. */ 196 | entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value, int data_type) 197 | { 198 | // Make sure you pass the current scope symbol table here 199 | entry_t* finder = search( hash_table_ptr, lexeme ); 200 | if( finder != NULL) // If lexeme already exists, don't insert, return NULL 201 | { 202 | if(finder->is_constant) 203 | return finder; 204 | return NULL; //capture this is callee code and do necessary error handling 205 | } 206 | 207 | int idx; 208 | entry_t* new_entry = NULL; 209 | entry_t* head = NULL; 210 | 211 | idx = myhash( lexeme ); // Get the index for this lexeme based on the myhash function 212 | new_entry = create_entry( lexeme, value, data_type ); // Create an entry using the pair 213 | 214 | if(new_entry == NULL) // In case there was some error while executing create_entry() 215 | { 216 | printf("Insert failed. New entry could not be created."); 217 | exit(1); 218 | } 219 | 220 | head = hash_table_ptr[idx]; 221 | // printf("Index: %d\n",idx);// get the head entry at this index 222 | 223 | if(head == NULL) // This is the first lexeme that matches this myhash index 224 | { 225 | hash_table_ptr[idx] = new_entry; 226 | } 227 | else // if not, add this entry to the head 228 | { 229 | new_entry->successor = hash_table_ptr[idx]; 230 | hash_table_ptr[idx] = new_entry; 231 | } 232 | 233 | // printf("in insert! Symbol table :%p, entry: %p, text: %s\n",hash_table_ptr, hash_table_ptr[idx], lexeme); 234 | // display_symbol_table(hash_table_ptr); 235 | return hash_table_ptr[idx]; 236 | } 237 | 238 | // This is called after a function call to check if param list match 239 | int check_parameter_list(entry_t* entry, int* list, int m) 240 | { 241 | int* parameter_list = entry->parameter_list; 242 | 243 | if(m != entry->num_params) 244 | { 245 | yyerror("Number of parameters and arguments do not match"); 246 | } 247 | 248 | int i; 249 | for(i=0; iparameter_list = (int *)malloc(n*sizeof(int)); 261 | 262 | int i; 263 | for(i=0; iparameter_list[i] = list[i]; 266 | } 267 | entry->num_params = n; 268 | } 269 | 270 | 271 | void print_dashes(int n) 272 | { 273 | printf("\n"); 274 | 275 | int i; 276 | for(i=0; i< n; i++) 277 | printf("="); 278 | printf("\n"); 279 | } 280 | 281 | // Traverse the myhash table and print all the entries 282 | void display_symbol_table(entry_t** hash_table_ptr) 283 | { 284 | int i; 285 | entry_t* traverser; 286 | 287 | print_dashes(100); 288 | 289 | printf(" %-20s %-20s %-20s %-20s %-20s\n","lexeme","data-type","array_dimension","num_params","param_list"); 290 | 291 | print_dashes(100); 292 | 293 | for( i=0; i < HASH_TABLE_SIZE; i++) 294 | { 295 | // printf("In loop\n"); 296 | traverser = hash_table_ptr[i]; 297 | while( traverser != NULL) 298 | { 299 | printf(" %-20s %-20d %-20d ", traverser->lexeme, traverser->data_type, traverser->array_dimension); 300 | 301 | printf(" %-20d", traverser->num_params); 302 | 303 | int j; 304 | for(j=0; j < traverser->num_params; j++) 305 | printf(" %d",traverser->parameter_list[j]); 306 | printf("\n"); 307 | 308 | traverser = traverser->successor; 309 | } 310 | } 311 | 312 | print_dashes(100); 313 | 314 | } 315 | 316 | void display_constant_table(entry_t** hash_table_ptr) 317 | { 318 | int i; 319 | entry_t* traverser; 320 | 321 | print_dashes(25); 322 | 323 | printf(" %-10s %-10s \n","lexeme","data-type"); 324 | 325 | print_dashes(25); 326 | 327 | for( i=0; i < HASH_TABLE_SIZE; i++) 328 | { 329 | traverser = hash_table_ptr[i]; 330 | while( traverser != NULL) 331 | { 332 | printf(" %-10s %-10d \n", traverser->lexeme, traverser->data_type); 333 | traverser = traverser->successor; 334 | } 335 | } 336 | 337 | print_dashes(25); 338 | } 339 | 340 | void display_all() 341 | { 342 | int i; 343 | for(i=0; i<=table_index; i++) 344 | { 345 | printf("Scope: %d\n",i); 346 | display_symbol_table(symbol_table_list[i].symbol_table); 347 | printf("\n\n"); 348 | } 349 | 350 | // display_symbol_table(symbol_table_list[0].symbol_table); 351 | // display_symbol_table(symbol_table_list[1].symbol_table); 352 | } 353 | -------------------------------------------------------------------------------- /Project-4/test-func.c: -------------------------------------------------------------------------------- 1 | void fun2(char* s) 2 | { 3 | 4 | } 5 | 6 | void fun(int a, int b, int c) 7 | { 8 | fun2("What else can I print?"); 9 | } 10 | 11 | int main() 12 | { 13 | int c; 14 | c = 5; 15 | fun(c, 2 , 5); 16 | } 17 | -------------------------------------------------------------------------------- /Project-4/test.c: -------------------------------------------------------------------------------- 1 | int main(int argc) 2 | { 3 | int i; 4 | int j; 5 | 6 | for(i=0;i<10;i++) 7 | { 8 | char x = 'x'; 9 | i = 2; 10 | 11 | while(j < 2 + 3) 12 | { 13 | int j; 14 | i = 3; 15 | j = 3 + i / 6 + 2 + 5; 16 | 17 | 18 | int arr[10]; 19 | j = arr[7]; 20 | 21 | if(j < 5 && i == 3 || j!= 5) 22 | { 23 | j = 2; 24 | 25 | if(2<4) 26 | break; 27 | 28 | else if(2>3) 29 | continue; 30 | 31 | else 32 | i = 4 + 5; 33 | } 34 | 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mini-c-compiler 2 | 3 | 4 | This is a mini compiler for a subset of the C language built as part of our Compiler Design Lab Course (CO351). 5 | 6 | It has been built as a series of four incremental phases, each contributing a key part of the compiler. 7 | 8 | | Phase | Objective | 9 | |-----------|----------------------------------------------------------------------------------------------| 10 | | Project-1 | Building a lexical analyser using Lex for a subset of C language to tokenise a given program | 11 | | Project-2 | Building a parser using Yacc by reading tokens from the lexical analyser from Project-1 | 12 | | Project-3 | Building a semantic analyser for the context free grammar implemented in Project-2 | 13 | | Project-4 | Building an Intermediate Code Generator | 14 | 15 | 16 | ### Team Members 17 | - Karthik M (15CO221) 18 | - Kaushik S Kalmady (15CO222) 19 | --------------------------------------------------------------------------------