├── .gitignore
├── Project-1
    ├── .gitignore
    ├── Project Report.pdf
    ├── README.md
    ├── lexer.l
    ├── symboltable.h
    ├── testcases
    │   ├── test-case-1.c
    │   ├── test-case-2.c
    │   ├── test-case-3.c
    │   ├── test-case-4.c
    │   └── test-case-5.c
    └── tokens.h
├── Project-2
    ├── .gitignore
    ├── README.md
    ├── compile
    ├── lexl.l
    ├── parser.y
    ├── symboltable.h
    └── testcases
    │   ├── test-case-1.c
    │   ├── test-case-2.c
    │   ├── test-case-3.c
    │   ├── test-case-4.c
    │   ├── test-case-5.c
    │   └── test-case-6.c
├── Project-3
    ├── README.md
    ├── compile
    ├── lexer.l
    ├── parser.y
    ├── symboltable.h
    ├── test.c
    └── testcases
    │   ├── test-case-1.c
    │   ├── test-case-2.c
    │   ├── test-case-3.c
    │   ├── test-case-4.c
    │   ├── test-case-5.c
    │   └── testcases
├── Project-4
    ├── ICG.code
    ├── README.md
    ├── compile
    ├── icg
    ├── lexer.l
    ├── parser.y
    ├── symboltable.h
    ├── test-func.c
    └── test.c
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | 


--------------------------------------------------------------------------------
/Project-1/.gitignore:
--------------------------------------------------------------------------------
1 | a.out
2 | lex.yy.c
3 | 


--------------------------------------------------------------------------------
/Project-1/Project Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaushiksk/mini-c-compiler/f55e6a84a2e82a2a88957c41f626b0a1954c232d/Project-1/Project Report.pdf


--------------------------------------------------------------------------------
/Project-1/README.md:
--------------------------------------------------------------------------------
 1 | Compiler Design Lab - Project 1
 2 | ===============================
 3 | 
 4 | # Lexical Analyser for a subset of the C langauge
 5 | 
 6 | ### Team Members
 7 |  1. Karthik M - 15CO221
 8 |  2. Kaushik S Kalmady - 15CO222
 9 | 
10 | ### Lab Batch - Wednesday
11 | 
12 | This folder contains the code for the Lexical Analyser.
13 | 
14 | |file|contents|
15 | |----|--------|
16 | |lexel.l|lex file with rules|
17 | |symboltable.h|symbol table implementation using hash organisation|
18 | |tokens.h| tokens used (as enums)|
19 | 
20 | The test cases are in the test-cases folder.
21 | The details about each test case is written in the header section of each file.
22 | 


--------------------------------------------------------------------------------
/Project-1/lexer.l:
--------------------------------------------------------------------------------
  1 | %{
  2 | 
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | #include "symboltable.h"
  6 | #include "tokens.h"
  7 | 
  8 | entry_t** symbol_table;
  9 | entry_t** constant_table;
 10 | int cmnt_strt = 0;
 11 | 
 12 | %}
 13 | 
 14 | letter [a-zA-Z]
 15 | digit [0-9]
 16 | ws  [ \t\r\f\v]+
 17 | identifier (_|{letter})({letter}|{digit}|_){0,31}
 18 | hex [0-9a-f]
 19 | 
 20 |  /* Exclusive states */
 21 | %x CMNT
 22 | %x PREPROC
 23 | 
 24 | %%
 25 |   /* Keywords*/
 26 | "int"                             {printf("\t%-30s : %3d\n",yytext,INT);}
 27 | "long"                            {printf("\t%-30s : %3d\n",yytext,LONG);}
 28 | "long long"                       {printf("\t%-30s : %3d\n",yytext,LONG_LONG);}
 29 | "short"                           {printf("\t%-30s : %3d\n",yytext,SHORT);}
 30 | "signed"                          {printf("\t%-30s : %3d\n",yytext,SIGNED);}
 31 | "unsigned"                        {printf("\t%-30s : %3d\n",yytext,UNSIGNED);}
 32 | "for"                             {printf("\t%-30s : %3d\n",yytext,FOR);}
 33 | "break"                           {printf("\t%-30s : %3d\n",yytext,BREAK);}
 34 | "continue"                        {printf("\t%-30s : %3d\n",yytext,CONTINUE);}
 35 | "if"                              {printf("\t%-30s : %3d\n",yytext,IF);}
 36 | "else"                            {printf("\t%-30s : %3d\n",yytext,ELSE);}
 37 | "return"                          {printf("\t%-30s : %3d\n",yytext,RETURN);}
 38 | 
 39 | {identifier}                      {printf("\t%-30s : %3d\n", yytext,IDENTIFIER);
 40 |                                   insert( symbol_table,yytext,IDENTIFIER );}
 41 | {ws}                              ;
 42 | [+\-]?[0][x|X]{hex}+[lLuU]?        {printf("\t%-30s : %3d\n", yytext,HEX_CONSTANT);
 43 | 									insert( constant_table,yytext,HEX_CONSTANT);}
 44 | [+\-]?{digit}+[lLuU]?              {printf("\t%-30s : %3d\n", yytext,DEC_CONSTANT);
 45 | 									insert( constant_table,yytext,DEC_CONSTANT);}
 46 | "/*"                              {cmnt_strt = yylineno; BEGIN CMNT;}
 47 | <CMNT>.|{ws}                      ;
 48 | <CMNT>\n                          {yylineno++;}
 49 | <CMNT>"*/"                        {BEGIN INITIAL;}
 50 | <CMNT>"/*"                        {printf("Line %3d: Nested comments are not valid!\n",yylineno);}
 51 | <CMNT><<EOF>>                     {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();}
 52 | ^"#include"                       {BEGIN PREPROC;}
 53 | <PREPROC>"<"[^<>\n]+">"            {printf("\t%-30s : %3d\n",yytext,HEADER_FILE);}
 54 | <PREPROC>{ws}                       ;
 55 | <PREPROC>\"[^"\n]+\"              {printf("\t%-30s : %3d\n",yytext,HEADER_FILE);}
 56 | <PREPROC>\n                       {yylineno++; BEGIN INITIAL;}
 57 | <PREPROC>.                        {printf("Line %3d: Illegal header file format \n",yylineno);}
 58 | "//".*                            ;
 59 | 
 60 | \"[^\"\n]*\"     {
 61 | 
 62 |   if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */
 63 |   {
 64 |     yyless(yyleng-1);       /* push the quote back if it was escaped */
 65 |     yymore();
 66 |   }
 67 |   else
 68 |   insert( constant_table,yytext,STRING);
 69 |  }
 70 | 
 71 | \"[^\"\n]*$                     {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);}
 72 | {digit}+({letter}|_)+	        {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);}
 73 | \n                              {yylineno++;}
 74 | "--"			                {printf("\t%-30s : %3d\n",yytext,DECREMENT);}
 75 | "++"			                {printf("\t%-30s : %3d\n",yytext,INCREMENT);}
 76 | "->"			                {printf("\t%-30s : %3d\n",yytext,PTR_SELECT);}
 77 | "&&"			                {printf("\t%-30s : %3d\n",yytext,LOGICAL_AND);}
 78 | "||"			                {printf("\t%-30s : %3d\n",yytext,LOGICAL_OR);}
 79 | "<="			                {printf("\t%-30s : %3d\n",yytext,LS_THAN_EQ);}
 80 | ">="			                {printf("\t%-30s : %3d\n",yytext,GR_THAN_EQ);}
 81 | "=="			                {printf("\t%-30s : %3d\n",yytext,EQ);}
 82 | "!="		                    {printf("\t%-30s : %3d\n",yytext,NOT_EQ);}
 83 | ";"			                    {printf("\t%-30s : %3d\n",yytext,DELIMITER);}
 84 | "{"                             {printf("\t%-30s : %3d\n",yytext,OPEN_BRACES);}
 85 | "}"                             {printf("\t%-30s : %3d\n",yytext,CLOSE_BRACES);}
 86 | ","			                    {printf("\t%-30s : %3d\n",yytext,COMMA);}
 87 | "="			                    {printf("\t%-30s : %3d\n",yytext,ASSIGN);}
 88 | "("			                    {printf("\t%-30s : %3d\n",yytext,OPEN_PAR);}
 89 | ")"			                    {printf("\t%-30s : %3d\n",yytext,CLOSE_PAR);}
 90 | "["                             {printf("\t%-30s : %3d\n",yytext,OPEN_SQ_BRKT);}
 91 | "]"                             {printf("\t%-30s : %3d\n",yytext,CLOSE_SQ_BRKT);}
 92 | "-"			                    {printf("\t%-30s : %3d\n",yytext,MINUS);}
 93 | "+"			                    {printf("\t%-30s : %3d\n",yytext,PLUS);}
 94 | "*"			                    {printf("\t%-30s : %3d\n",yytext,STAR);}
 95 | "/"		                        {printf("\t%-30s : %3d\n",yytext,FW_SLASH);}
 96 | "%"			                    {printf("\t%-30s : %3d\n",yytext,MODULO);}
 97 | "<"			                    {printf("\t%-30s : %3d\n",yytext,LS_THAN);}
 98 | ">"			                    {printf("\t%-30s : %3d\n",yytext,GR_THAN);}
 99 | .                               {printf("Line %3d: Illegal character %s\n",yylineno,yytext);}
100 | 
101 | %%
102 | 
103 | int main()
104 | {
105 |   yyin=fopen("testcases/test-case-5.c","r");
106 |   symbol_table=create_table();
107 |   constant_table=create_table();
108 |   yylex();
109 |   printf("\n\tSymbol table");
110 |   display(symbol_table);
111 |   printf("\n\tConstants Table");
112 |   display(constant_table);
113 |   printf("NOTE: Please refer tokens.h for token meanings\n");
114 | }
115 | 


--------------------------------------------------------------------------------
/Project-1/symboltable.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 | * Compiler Design Project 1 : Lexical Analyser
  3 | *
  4 | * File        : symboltable.h
  5 | * Description : This file contains functions related to a hash organised symbol table. 
  6 | *               The functions implemented are:
  7 | *               create_table(), insert(), search, display() 
  8 | *                  
  9 | * Authors     : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222
 10 | * Date        : 17-1-2018
 11 | */
 12 | 
 13 | 
 14 | 
 15 | #include <stdint.h>
 16 | #include <stdlib.h>
 17 | #include <stdio.h>
 18 | #include <limits.h>
 19 | #include <string.h>
 20 | 
 21 | #define HASH_TABLE_SIZE 100
 22 | 
 23 | /* struct to hold each entry */
 24 | struct entry_s
 25 | {
 26 | 	char* lexeme; 
 27 | 	int token_name;
 28 | 	struct entry_s* successor;
 29 | };
 30 | 
 31 | typedef struct entry_s entry_t;
 32 | 
 33 | /* Create a new hash_table. */
 34 | entry_t** create_table()
 35 | {
 36 | 	entry_t** hash_table_ptr = NULL; // declare a pointer
 37 | 
 38 | 	/* Allocate memroy for a hashtable array of size HASH_TABLE_SIZE */
 39 | 	if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL )
 40 |     	return NULL;
 41 | 
 42 | 	int i;
 43 | 	
 44 | 	// Intitialise all entries as NULL
 45 |     for( i = 0; i < HASH_TABLE_SIZE; i++ )
 46 | 	{
 47 | 		hash_table_ptr[i] = NULL;
 48 | 	}
 49 | 
 50 | 	return hash_table_ptr;
 51 | }
 52 | 
 53 | /* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */
 54 | uint32_t hash( char *lexeme )
 55 | {
 56 | 	size_t i;
 57 | 	uint32_t hash;
 58 | 
 59 | 	/* Apply jenkin's hash function
 60 | 	* https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time
 61 | 	*/
 62 | 	for ( hash = i = 0; i < strlen(lexeme); ++i ) {
 63 |         hash += lexeme[i];
 64 |         hash += ( hash << 10 );
 65 |         hash ^= ( hash >> 6 );
 66 |     }
 67 | 	hash += ( hash << 3 );
 68 | 	hash ^= ( hash >> 11 );
 69 |     hash += ( hash << 15 );
 70 | 
 71 | 	return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE)
 72 | }
 73 | 
 74 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */
 75 | entry_t *create_entry( char *lexeme, int token_name )
 76 | {
 77 | 	entry_t *newentry;
 78 | 
 79 | 	/* Allocate space for newentry */
 80 | 	if( ( newentry = malloc( sizeof( entry_t ) ) ) == NULL ) {
 81 | 		return NULL;
 82 | 	}
 83 | 	/* Copy lexeme to newentry location using strdup (string-duplicate). Return NULL if it fails */
 84 | 	if( ( newentry->lexeme = strdup( lexeme ) ) == NULL ) {
 85 | 		return NULL;
 86 | 	}
 87 | 
 88 | 	newentry->token_name = token_name;
 89 | 	newentry->successor = NULL;
 90 | 
 91 | 	return newentry;
 92 | }
 93 | 
 94 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */
 95 | entry_t* search( entry_t** hash_table_ptr, char* lexeme )
 96 | {
 97 | 	uint32_t idx = 0;
 98 | 	entry_t* myentry;
 99 |     
100 |     // get the index of this lexeme as per the hash function
101 | 	idx = hash( lexeme );
102 | 
103 | 	/* Traverse the linked list at this idx and see if lexeme exists */
104 | 	myentry = hash_table_ptr[idx];
105 | 	
106 | 	while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 )
107 | 	{
108 | 		myentry = myentry->successor;
109 | 	}
110 | 
111 | 	if(myentry == NULL) // lexeme is not found
112 | 		return NULL;
113 | 	
114 | 	else // lexeme found
115 | 		return myentry;
116 | 
117 | }
118 | 
119 | /* Insert an entry into a hash table. */
120 | void insert( entry_t** hash_table_ptr, char* lexeme, int token_name )
121 | {
122 | 	if( search( hash_table_ptr, lexeme ) != NULL) // If lexeme already exists, don't insert, return
123 | 	    return;
124 | 
125 | 	uint32_t idx;
126 | 	entry_t* newentry = NULL;
127 | 	entry_t* head = NULL;
128 | 
129 | 	idx = hash( lexeme ); // Get the index for this lexeme based on the hash function
130 | 	newentry = create_entry( lexeme, token_name ); // Create an entry using the <lexeme, token> pair
131 | 
132 | 	if(newentry == NULL) // In case there was some error while executing create_entry()
133 | 	{
134 | 		printf("Insert failed. New entry could not be created.");
135 | 		exit(1);
136 | 	}
137 | 
138 | 	head = hash_table_ptr[idx]; // get the head entry at this index
139 | 
140 | 	if(head == NULL) // This is the first lexeme that matches this hash index 
141 | 	{
142 | 		hash_table_ptr[idx] = newentry;
143 | 	}
144 | 	else // if not, add this entry to the head
145 | 	{
146 | 		newentry->successor = hash_table_ptr[idx];
147 | 		hash_table_ptr[idx] = newentry;
148 | 	}
149 | }
150 | 
151 | // Traverse the hash table and print all the entries
152 | void display(entry_t** hash_table_ptr)
153 | {
154 | 	int i;
155 | 	entry_t* traverser;
156 |     printf("\n==========================================\n");
157 |     printf("\t < lexeme , token >\n");
158 |     printf("==========================================\n");
159 | 
160 | 	for( i=0; i < HASH_TABLE_SIZE; i++)
161 | 	{
162 | 		traverser = hash_table_ptr[i];
163 | 
164 | 		while( traverser != NULL)
165 | 		{
166 | 			printf("< %-30s, %3d >\n", traverser->lexeme, traverser->token_name);
167 | 			traverser = traverser->successor;
168 | 		}
169 | 	}
170 |     printf("==========================================\n");
171 |    
172 | }
173 | 


--------------------------------------------------------------------------------
/Project-1/testcases/test-case-1.c:
--------------------------------------------------------------------------------
 1 | /* 
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 1
 5 | 
 6 | Test Case 1
 7 |  - Test for single line comments
 8 |  - Test for multi-line comments
 9 |  - Test for single line nested comments
10 |  - Test for multiline nested comments 
11 | 
12 | The output in lex should remove all the comments including this one 
13 | */
14 | 
15 | #include<stdio.h>
16 | 
17 | void main(){
18 |     // Single line comment
19 |     
20 |     /* Multi-line comment
21 |        Like this */
22 |     
23 |     /* here */ int a; /* "int a" should be untouched */
24 |     
25 | 	// This nested comment // This comment should be removed should be removed
26 | 
27 | 
28 | 	
29 | 	/* To make things /*  nested multi-line comment */ interesting */
30 | 
31 | 	return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/Project-1/testcases/test-case-2.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 1
 5 | 
 6 | Test Case 2
 7 |  - Test for multi-line comment that doesn't end till EOF
 8 | 
 9 | The output in lex should print as error message when the comment does not terminate
10 | It should remove the comments that terminate
11 | */
12 | 
13 | #include<stdio.h>
14 | 
15 | void main(){
16 | 
17 | 	// This is fine
18 | 	/* This as well
19 | 	like we know */
20 | 
21 | 	/* This is not fine since
22 | 	this comment has to end somewhere
23 | 
24 | 	return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/Project-1/testcases/test-case-3.c:
--------------------------------------------------------------------------------
 1 | /* 
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 1
 5 | 
 6 | Test Case 3
 7 |  - Test for string
 8 |  - Test for string that doesn't end till EOF
 9 |  - Test for invalid header name
10 | 
11 | The output in lex should identify the first string correct and display error message that the second one does not terminate
12 | */
13 | 
14 | #include<stdio.h>
15 | #include <<stdlib.h>
16 | #include "custom.h"
17 | #include ""wrong.h"
18 | 
19 | void main(){
20 | 
21 | 	printf("This is a string");
22 | 	printf("This is a string that never terminates);
23 | }


--------------------------------------------------------------------------------
/Project-1/testcases/test-case-4.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 1
 5 | 
 6 | Test Case 4
 7 | 
 8 | Following errors must be detected
 9 |  - Invalid identifiers: 9y, total$
10 |  - Invalid operator: @
11 |  - Escaped quoted should be part of the string that is identified
12 |  - Stray characters: `, @, -
13 |  
14 | The output should display appropriate errors
15 | */
16 | 
17 | #include<stdio.h>
18 | #include<stdlib.h>
19 | 
20 | int main()
21 | {
22 |   `
23 |   @ -
24 |   short int b;
25 |   int x, 9y, total$;
26 |   total = x @ y;
27 |   printf ("Total = %d \n \" ", total);
28 | }


--------------------------------------------------------------------------------
/Project-1/testcases/test-case-5.c:
--------------------------------------------------------------------------------
 1 | /* 
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 1
 5 | 
 6 | Test Case 5
 7 | 
 8 | Identifying tokens and displaying symbol and constants table
 9 | 
10 | Following tokens must be detected
11 |  - Keywords   (int, long int, long long int, main include)
12 |  - Identifiers (main,total,x,y,printf),
13 |  - Constants  (-10, 20, 0x0f, 123456l)
14 |  - Strings ("Total = %d \n")
15 |  - Special symbols and Brackets ( (), {}, ;, ,)
16 |  - Operators (+,-,=,*,/,%,--,++)
17 | 
18 | The output should display appropriate tokens with their type and also the symbol and constants table
19 | */
20 | #include<stdio.h>
21 | #include<stdlib.h>
22 | 
23 | int main()
24 | {
25 |   int x, y;
26 |   long long int total, diff;
27 |   int *ptr;
28 |   unsigned int a = 0x0f;
29 |   long int mylong = 123456l;
30 |   long int i, j;
31 |   for(i=0; i < 10; i++){
32 |     for(j=10; j > 0; j--){
33 |     printf("%d",i);
34 |     }
35 |   }
36 |   x = -10, y = 20;
37 |   x=x*3/2;
38 |   total = x + y;
39 |   diff = x - y;
40 |   int rem = x % y;
41 |   printf ("Total = %d \n", total);
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/Project-1/tokens.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 | * Compiler Design Project 1 : Lexical Analyser
 3 | *
 4 | * File        : tokens.h
 5 | * Description : This file defines tokens and the values associated to them.
 6 | *                  
 7 | * Authors     : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222
 8 | * Date        : 17-1-2018
 9 | */
10 | 
11 | enum keywords
12 | {
13 |   INT=100,
14 |   LONG,
15 |   LONG_LONG,
16 |   SHORT,
17 |   SIGNED,
18 |   UNSIGNED,
19 |   FOR,
20 |   BREAK,
21 |   CONTINUE,
22 |   RETURN,
23 |   CHAR,
24 |   IF,
25 |   ELSE
26 | };
27 | 
28 | enum operators
29 | {
30 |   DECREMENT=200,
31 |   INCREMENT,
32 |   PTR_SELECT,
33 |   LOGICAL_AND,
34 |   LOGICAL_OR,
35 |   LS_THAN_EQ,
36 |   GR_THAN_EQ,
37 |   EQ,
38 |   NOT_EQ,
39 |   ASSIGN,
40 |   MINUS,
41 |   PLUS,
42 |   STAR,
43 |   MODULO,
44 |   LS_THAN,
45 |   GR_THAN
46 | };
47 | 
48 | enum special_symbols
49 | {
50 |   DELIMITER=300,
51 |   OPEN_BRACES,
52 |   CLOSE_BRACES,
53 |   COMMA,
54 |   OPEN_PAR,
55 |   CLOSE_PAR,
56 |   OPEN_SQ_BRKT,
57 |   CLOSE_SQ_BRKT,
58 |   FW_SLASH
59 | };
60 | 
61 | enum constants
62 | {
63 |   HEX_CONSTANT=400,
64 |   DEC_CONSTANT,
65 |   HEADER_FILE,
66 |   STRING
67 | };
68 | 
69 | enum IDENTIFIER
70 | {
71 |   IDENTIFIER=500
72 | };
73 | 


--------------------------------------------------------------------------------
/Project-2/.gitignore:
--------------------------------------------------------------------------------
1 | y.*
2 | lex.yy.c
3 | 


--------------------------------------------------------------------------------
/Project-2/README.md:
--------------------------------------------------------------------------------
 1 | # Compiler Design Project-2
 2 | 
 3 | ## Parser for a subset of the C language
 4 | 
 5 | ### Team Members
 6 |  - Karthik M (15CO221)
 7 |  - Kaushik S Kalmady (15CO222)
 8 | 
 9 | ### Installation and running
10 |  1. Please make sure you have Lex/Flex and Yacc/Bison installed
11 |  2. Next run `$ chmod +x compile` 
12 |  3. `$ ./compile`
13 |  4. To run the parser `$./parser test-file.c`
14 | 
15 | 
16 | ### FEATURES
17 |  - Nested code blocks
18 |  - +=, -= supported
19 |  - for loops and nested for loops
20 |  - integer assignment and declaration
21 |  - if-else handled
22 |  - array assignment, array declaration
23 |  - comma separated initialization/declaration e.g int a=10,b;
24 |  - support for short, long , long long , signed and unsigned
25 |  - while statements supported
26 |  - unary expressions supported in both lhs, rhs and standalone statements
27 |  - display specific errors with line numbers
28 |  - additionally evaluates expressions and adds to symbol table(implemented as a fun exercise - this is not the job of the parser)
29 | 


--------------------------------------------------------------------------------
/Project-2/compile:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | lex lexl.l
3 | yacc -d parser.y -v
4 | gcc -w -g y.tab.c -ly -ll -o parser
5 | 


--------------------------------------------------------------------------------
/Project-2/lexl.l:
--------------------------------------------------------------------------------
  1 | %{
  2 | 
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | #include <limits.h>
  6 | /* #include "symboltable.h" */
  7 | #include "y.tab.h"
  8 | 
  9 | /* entry_t** symbol_table; */
 10 | /* entry_t** constant_table; */
 11 | int cmnt_strt = 0;
 12 | 
 13 | %}
 14 | 
 15 | letter [a-zA-Z]
 16 | digit [0-9]
 17 | ws  [ \t\r\f\v]+
 18 | identifier (_|{letter})({letter}|{digit}|_){0,31}
 19 | hex [0-9a-f]
 20 | 
 21 |  /* Exclusive states */
 22 | %x CMNT
 23 |  /*%x PREPROC*/
 24 | 
 25 | %%
 26 |   /* Keywords*/
 27 | "int"                             {return INT;}
 28 | "long"                            {return LONG;}
 29 | "long long"                       {return LONG_LONG;}
 30 | "short"                           {return SHORT;}
 31 | "signed"                          {return SIGNED;}
 32 | "unsigned"                        {return UNSIGNED;}
 33 | "for"                             {return FOR;}
 34 | "while"                           {return WHILE;}
 35 | "break"                           {return BREAK;}
 36 | "continue"                        {return CONTINUE;}
 37 | "if"                              {return IF;}
 38 | "else"                            {return ELSE;}
 39 | "return"                          {return RETURN;}
 40 | 
 41 | {identifier}                      {yylval.entry = insert(symbol_table, yytext, INT_MAX); return  IDENTIFIER;}
 42 | {ws}                              ;
 43 | [+\-]?[0][x|X]{hex}+[lLuU]?        { yylval.dval = (int)strtol(yytext, NULL, 16); return  HEX_CONSTANT;}
 44 | [+\-]?{digit}+[lLuU]?              {yylval.dval = atoi(yytext); return  DEC_CONSTANT;}
 45 | 
 46 | "/*"                              {cmnt_strt = yylineno; BEGIN CMNT;}
 47 | <CMNT>.|{ws}                      ;
 48 | <CMNT>\n                          {yylineno++;}
 49 | <CMNT>"*/"                        {BEGIN INITIAL;}
 50 | <CMNT>"/*"                        {printf("Line %3d: Nested comments are not valid!\n",yylineno);}
 51 | <CMNT><<EOF>>                     {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();}
 52 |  /*^"#include"                       {BEGIN PREPROC;}*/
 53 |  /*<PREPROC>"<"[^<>\n]+">"            {return HEADER_FILE;}*/
 54 |  /*<PREPROC>{ws}                       ;*/
 55 |  /*<PREPROC>\"[^"\n]+\"              {return HEADER_FILE;}*/
 56 |  /*<PREPROC>\n                       {yylineno++; BEGIN INITIAL;}*/
 57 |  /*<PREPROC>.                        {printf("Line %3d: Illegal header file format \n",yylineno);}*/
 58 | "//".*                            ;
 59 | 
 60 | \"[^\"\n]*\"     {
 61 | 
 62 |   if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */
 63 |   {
 64 |     yyless(yyleng-1);       /* push the quote back if it was escaped */
 65 |     yymore();
 66 |   }
 67 |   else{
 68 |   insert( constant_table, yytext, INT_MAX);
 69 |   return STRING;
 70 |   }
 71 |  }
 72 | 
 73 | \"[^\"\n]*$                     {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);}
 74 | {digit}+({letter}|_)+	        {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);}
 75 | \n                              {yylineno++;}
 76 | 
 77 | "--"			                {return DECREMENT;}
 78 | "++"			                {return INCREMENT;}
 79 |  /* "->"			                {return PTR_SELECT;} */
 80 | "+="                      {return ADD_ASSIGN;}
 81 | "-="                      {return SUB_ASSIGN;}
 82 | "*="                      {return MUL_ASSIGN;}
 83 | "/="                      {return DIV_ASSIGN;}
 84 | "%="                      {return MOD_ASSIGN;}
 85 | 
 86 | 
 87 | "&&"			                {return LOGICAL_AND;}
 88 | "||"			                {return LOGICAL_OR;}
 89 | "<="			                {return LS_EQ;}
 90 | ">="			                {return GR_EQ;}
 91 | "=="			                {return EQ;}
 92 | "!="		                  {return NOT_EQ;}
 93 | 
 94 | .                         {return yytext[0];}
 95 | 
 96 | %%
 97 | /*
 98 | int main()
 99 | {
100 |   yyin=fopen("test2.c","r");
101 | 
102 |   constant_table=create_table();
103 |   symbol_table = create_table();
104 | 
105 |   yylex();
106 | 
107 |   printf("\n\tSymbol table");
108 |   display(symbol_table);
109 |   printf("\n\tConstants Table");
110 |   display(constant_table);
111 | 
112 |   printf("NOTE: Please refer tokens.h for token meanings\n");
113 | } */
114 | 


--------------------------------------------------------------------------------
/Project-2/parser.y:
--------------------------------------------------------------------------------
  1 | %{
  2 | 	#include <stdlib.h>
  3 | 	#include <stdio.h>
  4 | 	#include "symboltable.h"
  5 | 
  6 | 	entry_t** symbol_table;
  7 |   entry_t** constant_table;
  8 | 
  9 | 	double Evaluate (double lhs_value,int assign_type,double rhs_value);
 10 | 	int current_dtype;
 11 | 	int yyerror(char *msg);
 12 | %}
 13 | 
 14 | %union
 15 | {
 16 | 	double dval;
 17 | 	entry_t* entry;
 18 | 	int ival;
 19 | }
 20 | 
 21 | %token <entry> IDENTIFIER
 22 | 
 23 |  /* Constants */
 24 | %token <dval> DEC_CONSTANT HEX_CONSTANT
 25 | %token STRING
 26 | 
 27 |  /* Logical and Relational operators */
 28 | %token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ
 29 | 
 30 |  /* Short hand assignment operators */
 31 | %token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN
 32 | %token LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN
 33 | %token INCREMENT DECREMENT
 34 | 
 35 |  /* Data types */
 36 | %token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST
 37 | 
 38 |  /* Keywords */
 39 | %token IF FOR WHILE CONTINUE BREAK RETURN
 40 | 
 41 | %type <dval> expression
 42 | %type <dval> sub_expr
 43 | %type <dval> constant
 44 | %type <dval> unary_expr
 45 | %type <dval> arithmetic_expr
 46 | %type <dval> assignment_expr
 47 | %type <entry> lhs
 48 | %type <ival> assign_op
 49 | 
 50 | %start starter
 51 | 
 52 | %left ','
 53 | %right '='
 54 | %left LOGICAL_OR
 55 | %left LOGICAL_AND
 56 | %left EQ NOT_EQ
 57 | %left '<' '>' LS_EQ GR_EQ
 58 | %left '+' '-'
 59 | %left '*' '/' '%'
 60 | %right '!'
 61 | 
 62 | 
 63 | %nonassoc UMINUS
 64 | %nonassoc LOWER_THAN_ELSE
 65 | %nonassoc ELSE
 66 | 
 67 | 
 68 | %%
 69 | 
 70 |  /* Program is made up of multiple builder blocks. */
 71 | starter: starter builder
 72 | 			 |builder;
 73 | 
 74 |  /* Each builder block is either a function or a declaration */
 75 | builder: function|
 76 |        declaration;
 77 | 
 78 |  /* This is how a function looks like */
 79 | function: type IDENTIFIER '(' argument_list ')' compound_stmt;
 80 | 
 81 |  /* Now we will define a grammar for how types can be specified */
 82 | 
 83 | type :data_type pointer
 84 |     |data_type;
 85 | 
 86 | pointer: '*' pointer
 87 |     |'*'
 88 |     ;
 89 | 
 90 | data_type :sign_specifier type_specifier
 91 |     |type_specifier
 92 |     ;
 93 | 
 94 | sign_specifier :SIGNED
 95 |     |UNSIGNED
 96 |     ;
 97 | 
 98 | type_specifier :INT                    {current_dtype = INT;}
 99 |     |SHORT INT                         {current_dtype = SHORT;}
100 |     |SHORT                             {current_dtype = SHORT;}
101 |     |LONG                              {current_dtype = LONG;}
102 | 	|LONG INT                          {current_dtype = LONG;}
103 |     |LONG_LONG                         {current_dtype = LONG_LONG;}
104 |     |LONG_LONG INT                     {current_dtype = LONG_LONG;}
105 |     ;
106 | 
107 |  /* grammar rules for argument list */
108 |  /* argument list can be empty */
109 | argument_list :arguments
110 |     |
111 |     ;
112 |  /* arguments are comma separated TYPE ID pairs */
113 | arguments :arguments ',' arg
114 |     |arg
115 |     ;
116 | 
117 |  /* Each arg is a TYPE ID pair */
118 | arg :type IDENTIFIER
119 |    ;
120 | 
121 |  /* Generic statement. Can be compound or a single statement */
122 | stmt:compound_stmt
123 |     |single_stmt
124 |     ;
125 | 
126 |  /* The function body is covered in braces and has multiple statements. */
127 | compound_stmt :'{' statements '}'
128 |     ;
129 | 
130 | statements:statements stmt
131 |     |
132 |     ;
133 | 
134 |  /* Grammar for what constitutes every individual statement */
135 | single_stmt :if_block
136 |     |for_block
137 |     |while_block
138 |     |declaration
139 |     |function_call ';'
140 | 	|RETURN ';'
141 | 	|CONTINUE ';'
142 | 	|BREAK ';'
143 | 	|RETURN sub_expr ';'
144 |     ;
145 | 
146 | for_block:FOR '(' expression_stmt  expression_stmt ')' stmt
147 |     |FOR '(' expression_stmt expression_stmt expression ')' stmt
148 |     ;
149 | 
150 | if_block:IF '(' expression ')' stmt %prec LOWER_THAN_ELSE
151 | 				|IF '(' expression ')' stmt ELSE stmt
152 |     ;
153 | 
154 | while_block: WHILE '(' expression	')' stmt
155 | 		;
156 | 
157 | declaration:type declaration_list ';'
158 | 			 |declaration_list ';'
159 | 			 | unary_expr ';'
160 | 
161 | declaration_list: declaration_list ',' sub_decl
162 | 		|sub_decl;
163 | 
164 | sub_decl: assignment_expr
165 |     |IDENTIFIER                     {$1 -> data_type = current_dtype;}
166 |     |array_index
167 |     /*|struct_block ';'*/
168 |     ;
169 | 
170 | /* This is because we can have empty expession statements inside for loops */
171 | expression_stmt:expression ';'
172 |     |';'
173 |     ;
174 | 
175 | expression:
176 |     expression ',' sub_expr								{$$ = $1,$3;}
177 |     |sub_expr		                                    {$$ = $1;}
178 | 		;
179 | 
180 | sub_expr:
181 |     sub_expr '>' sub_expr						{$$ = ($1 > $3);}
182 |     |sub_expr '<' sub_expr						{$$ = ($1 < $3);}
183 |     |sub_expr EQ sub_expr						{$$ = ($1 == $3);}
184 |     |sub_expr NOT_EQ sub_expr                   {$$ = ($1 != $3);}
185 |     |sub_expr LS_EQ sub_expr                    {$$ = ($1 <= $3);}
186 |     |sub_expr GR_EQ sub_expr                    {$$ = ($1 >= $3);}
187 | 	|sub_expr LOGICAL_AND sub_expr              {$$ = ($1 && $3);}
188 | 	|sub_expr LOGICAL_OR sub_expr               {$$ = ($1 || $3);}
189 | 	|'!' sub_expr                               {$$ = (!$2);}
190 | 	|arithmetic_expr							{$$ = $1;}
191 |     |assignment_expr                            {$$ = $1;}
192 | 	|unary_expr                                 {$$ = $1;}
193 |     /* |IDENTIFIER                                     {$$ = $1->value;}
194 |     |constant                                   {$$ = $1;} */
195 | 		//|array_index
196 |     ;
197 | 
198 | 
199 | assignment_expr :lhs assign_op arithmetic_expr     {$$ = $1->value = Evaluate($1->value,$2,$3);}
200 |     |lhs assign_op array_index                     {$$ = 0;}
201 |     |lhs assign_op function_call                   {$$ = 0;}
202 | 	|lhs assign_op unary_expr                      {$$ = $1->value = Evaluate($1->value,$2,$3);}
203 | 	|unary_expr assign_op unary_expr               {$$ = 0;}
204 |     ;
205 | 
206 | unary_expr:	lhs INCREMENT                          {$$ = $1->value = ($1->value)++;}
207 | 	|lhs DECREMENT                                 {$$ = $1->value = ($1->value)--;}
208 | 	|DECREMENT lhs                                 {$$ = $2->value = --($2->value);}
209 | 	|INCREMENT lhs                                 {$$ = $2->value = ++($2->value);}
210 | 
211 | lhs:IDENTIFIER                                     {$$ = $1; if(! $1->data_type) $1->data_type = current_dtype;}
212 |     //|array_index
213 |     ;
214 | 
215 | assign_op:'='                                      {$$ = '=';}
216 |     |ADD_ASSIGN                                    {$$ = ADD_ASSIGN;}
217 |     |SUB_ASSIGN                                    {$$ = SUB_ASSIGN;}
218 |     |MUL_ASSIGN                                    {$$ = MUL_ASSIGN;}
219 |     |DIV_ASSIGN                                    {$$ = DIV_ASSIGN;}
220 |     |MOD_ASSIGN                                    {$$ = MOD_ASSIGN;}
221 |     ;
222 | 
223 | arithmetic_expr: arithmetic_expr '+' arithmetic_expr    {$$ = $1 + $3;}
224 |     |arithmetic_expr '-' arithmetic_expr                {$$ = $1 - $3;}
225 |     |arithmetic_expr '*' arithmetic_expr                {$$ = $1 * $3;}
226 |     |arithmetic_expr '/' arithmetic_expr                {$$ = ($3 == 0) ? yyerror("Divide by 0!") : ($1 / $3);}
227 | 	|arithmetic_expr '%' arithmetic_expr                {$$ = (int)$1 % (int)$3;}
228 | 	|'(' arithmetic_expr ')'                            {$$ = $2;}
229 |     |'-' arithmetic_expr %prec UMINUS                   {$$ = -$2;}
230 |     |IDENTIFIER                                         {$$ = $1 -> value;}
231 |     |constant                                           {$$ = $1;}
232 |     ;
233 | 
234 | constant: DEC_CONSTANT                                  {$$ = $1;}
235 |     |HEX_CONSTANT                                       {$$ = $1;}
236 |     ;
237 | 
238 | array_index: IDENTIFIER '[' sub_expr ']'
239 | 
240 | function_call: IDENTIFIER '(' parameter_list ')'
241 |              |IDENTIFIER '(' ')'
242 |              ;
243 | 
244 | parameter_list:
245 |               parameter_list ','  parameter
246 |               |parameter
247 |               ;
248 | 
249 | parameter: sub_expr
250 | 					|STRING
251 | 
252 |         ;
253 | %%
254 | 
255 | #include "lex.yy.c"
256 | #include <ctype.h>
257 | 
258 | 
259 | double Evaluate (double lhs_value,int assign_type,double rhs_value)
260 | {
261 | 	switch(assign_type)
262 | 	{
263 | 		case '=': return rhs_value;
264 | 		case ADD_ASSIGN: return (lhs_value + rhs_value);
265 | 		case SUB_ASSIGN: return (lhs_value - rhs_value);
266 | 		case MUL_ASSIGN: return (lhs_value * rhs_value);
267 | 		case DIV_ASSIGN: return (lhs_value / rhs_value);
268 | 		case MOD_ASSIGN: return ((int)lhs_value % (int)rhs_value);
269 | 	}
270 | }
271 | 
272 | int main(int argc, char *argv[])
273 | {
274 | 	symbol_table = create_table();
275 | 	constant_table = create_table();
276 | 
277 | 	yyin = fopen(argv[1], "r");
278 | 
279 | 	if(!yyparse())
280 | 	{
281 | 		printf("\nParsing complete\n");
282 | 	}
283 | 	else
284 | 	{
285 | 			printf("\nParsing failed\n");
286 | 	}
287 | 
288 | 
289 | 	printf("\n\tSymbol table");
290 | 	display(symbol_table);
291 | 
292 | 
293 | 	fclose(yyin);
294 | 	return 0;
295 | }
296 | 
297 | int yyerror(char *msg)
298 | {
299 | 	printf("Line no: %d Error message: %s Token: %s\n", yylineno, msg, yytext);
300 | }
301 | 


--------------------------------------------------------------------------------
/Project-2/symboltable.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Compiler Design Project 1 : Lexical Analyser
  3 | *
  4 | * File        : symboltable.h
  5 | * Description : This file contains functions related to a hash organised symbol table.
  6 | *               The functions implemented are:
  7 | *               create_table(), insert(), search, display()
  8 | *
  9 | * Authors     : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222
 10 | * Date        : 17-1-2018
 11 | */
 12 | 
 13 | 
 14 | 
 15 | #include <stdint.h>
 16 | #include <stdlib.h>
 17 | #include <stdio.h>
 18 | #include <limits.h>
 19 | #include <string.h>
 20 | 
 21 | #define HASH_TABLE_SIZE 100
 22 | 
 23 | /* struct to hold each entry */
 24 | struct entry_s
 25 | {
 26 | 	char* lexeme;
 27 | 	double value;
 28 | 	int data_type;
 29 | 	struct entry_s* successor;
 30 | };
 31 | 
 32 | typedef struct entry_s entry_t;
 33 | 
 34 | /* Create a new hash_table. */
 35 | entry_t** create_table()
 36 | {
 37 | 	entry_t** hash_table_ptr = NULL; // declare a pointer
 38 | 
 39 | 	/* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */
 40 | 	if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL )
 41 |     	return NULL;
 42 | 
 43 | 	int i;
 44 | 
 45 | 	// Intitialise all entries as NULL
 46 |     for( i = 0; i < HASH_TABLE_SIZE; i++ )
 47 | 	{
 48 | 		hash_table_ptr[i] = NULL;
 49 | 	}
 50 | 
 51 | 	return hash_table_ptr;
 52 | }
 53 | 
 54 | /* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */
 55 | uint32_t hash( char *lexeme )
 56 | {
 57 | 	size_t i;
 58 | 	uint32_t hash;
 59 | 
 60 | 	/* Apply jenkin's hash function
 61 | 	* https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time
 62 | 	*/
 63 | 	for ( hash = i = 0; i < strlen(lexeme); ++i ) {
 64 |         hash += lexeme[i];
 65 |         hash += ( hash << 10 );
 66 |         hash ^= ( hash >> 6 );
 67 |     }
 68 | 	hash += ( hash << 3 );
 69 | 	hash ^= ( hash >> 11 );
 70 |     hash += ( hash << 15 );
 71 | 
 72 | 	return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE)
 73 | }
 74 | 
 75 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */
 76 | entry_t *create_entry( char *lexeme, int value )
 77 | {
 78 | 	entry_t *newentry;
 79 | 
 80 | 	/* Allocate space for newentry */
 81 | 	if( ( newentry = malloc( sizeof( entry_t ) ) ) == NULL ) {
 82 | 		return NULL;
 83 | 	}
 84 | 	/* Copy lexeme to newentry location using strdup (string-duplicate). Return NULL if it fails */
 85 | 	if( ( newentry->lexeme = strdup( lexeme ) ) == NULL ) {
 86 | 		return NULL;
 87 | 	}
 88 | 
 89 | 	newentry->value = value;
 90 | 	newentry->successor = NULL;
 91 | 
 92 | 	return newentry;
 93 | }
 94 | 
 95 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */
 96 | entry_t* search( entry_t** hash_table_ptr, char* lexeme )
 97 | {
 98 | 	uint32_t idx = 0;
 99 | 	entry_t* myentry;
100 | 
101 |     // get the index of this lexeme as per the hash function
102 | 	idx = hash( lexeme );
103 | 
104 | 	/* Traverse the linked list at this idx and see if lexeme exists */
105 | 	myentry = hash_table_ptr[idx];
106 | 
107 | 	while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 )
108 | 	{
109 | 		myentry = myentry->successor;
110 | 	}
111 | 
112 | 	if(myentry == NULL) // lexeme is not found
113 | 		return NULL;
114 | 
115 | 	else // lexeme found
116 | 		return myentry;
117 | 
118 | }
119 | 
120 | /* Insert an entry into a hash table. */
121 | entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value )
122 | {
123 | 	entry_t* finder = search( hash_table_ptr, lexeme );
124 | 	if( finder != NULL) // If lexeme already exists, don't insert, return
125 | 	    return finder ;
126 | 
127 | 	uint32_t idx;
128 | 	entry_t* newentry = NULL;
129 | 	entry_t* head = NULL;
130 | 
131 | 	idx = hash( lexeme ); // Get the index for this lexeme based on the hash function
132 | 	newentry = create_entry( lexeme, value ); // Create an entry using the <lexeme, token> pair
133 | 
134 | 	if(newentry == NULL) // In case there was some error while executing create_entry()
135 | 	{
136 | 		printf("Insert failed. New entry could not be created.");
137 | 		exit(1);
138 | 	}
139 | 
140 | 	head = hash_table_ptr[idx]; // get the head entry at this index
141 | 
142 | 	if(head == NULL) // This is the first lexeme that matches this hash index
143 | 	{
144 | 		hash_table_ptr[idx] = newentry;
145 | 	}
146 | 	else // if not, add this entry to the head
147 | 	{
148 | 		newentry->successor = hash_table_ptr[idx];
149 | 		hash_table_ptr[idx] = newentry;
150 | 	}
151 | 	return hash_table_ptr[idx];
152 | }
153 | 
154 | // Traverse the hash table and print all the entries
155 | void display(entry_t** hash_table_ptr)
156 | {
157 | 	int i;
158 | 	entry_t* traverser;
159 |     printf("\n====================================================\n");
160 |     printf(" %-20s %-20s %-20s\n","lexeme","value","data-type");
161 |     printf("====================================================\n");
162 | 
163 | 	for( i=0; i < HASH_TABLE_SIZE; i++)
164 | 	{
165 | 		traverser = hash_table_ptr[i];
166 | 
167 | 		while( traverser != NULL)
168 | 		{
169 | 			printf(" %-20s %-20d %-20d \n", traverser->lexeme, (int)traverser->value, traverser->data_type);
170 | 			traverser = traverser->successor;
171 | 		}
172 | 	}
173 |     printf("====================================================\n");
174 | 
175 | }
176 | 


--------------------------------------------------------------------------------
/Project-2/testcases/test-case-1.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  */
 3 | 
 4 | 
 5 | /*
 6 | Karthik M - 15CO221
 7 | Kaushik S Kalmady - 15CO222
 8 | Compiler Design Project 2
 9 | 
10 | Test Case 1
11 | - Missing semicolon
12 | - Single quotes for characters and double quotes for strings
13 | 
14 | The output in lex/yacc should identify statements without a terminating semiclon and unmatched single and double quotes
15 | */
16 | 
17 | 
18 | int main(){
19 | 
20 |    int a = 10;
21 |    int b = 10
22 | 
23 |    for(a = 2 a<3; a++)
24 |    b = b + 1;
25 | 
26 |    printf (" This string is enclosed in double quotes ");
27 |    printf (" This string is not enclosed in double quotes );
28 | 
29 | 
30 | 
31 |    return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/Project-2/testcases/test-case-2.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 2
 5 | 
 6 | Test Case 2
 7 |  - Test for unmatched parantheses
 8 | 
 9 | The output in lex/yacc should print an error message when parantheses are not matched
10 | 
11 | */
12 | 
13 | 
14 | int main()
15 | {
16 | 	int array1[10];
17 | 	int array2[20;
18 | 
19 | 	for(int a = 3; a<4; a++
20 | 	{
21 | 		a = array1[0];
22 | 	}
23 | 
24 | 	func();
25 | }
26 | 
27 | int func()
28 | {
29 | 	return 20;
30 | 


--------------------------------------------------------------------------------
/Project-2/testcases/test-case-3.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 2
 5 | 
 6 | Test Case 3
 7 |  - Left hand side of an assignment statement should be an lvalue
 8 | 
 9 | The output in lex/yacc should identify those assignment statements whose left habd side is not an lvalue and stray characters must be detected as an error
10 | */
11 | 
12 | 
13 | int main()
14 | {
15 | 
16 |   `
17 |   @ -
18 |   total = x @ y;
19 | 
20 | 	int a = 4;
21 | 
22 | 	if(a > 0 )
23 | 	{
24 | 		printf("a is positive");
25 | 		a * 2 = a;
26 | 	}
27 | 	else
28 | 	{
29 | 		printf("a is negative");
30 | 		a = a * 2;
31 | 	}
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/Project-2/testcases/test-case-4.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 2
 5 | 
 6 | Test Case 4
 7 | 
 8 | Following errors must be detected
 9 |  - Invalid identifiers: 9y, total$
10 |  - Invalid operator: @
11 |  - Escaped quoted should be part of the string that is identified
12 |  - Stray characters: `, @, -
13 | 
14 | The output should display appropriate errors
15 | */
16 | 
17 | 
18 | int main()
19 | {
20 |   int x, y;
21 |   long long int total, diff;
22 |   int *ptr;
23 |   int a = 86;
24 | 
25 |   if (a > 90)
26 |   {
27 | 	  printf ("Grade is AA");
28 |   }
29 |   else if (a > 80)
30 |   {
31 | 	  printf ("Grade is AB");
32 |   }
33 |   else
34 |   {
35 | 	  printf ("Grade is BB");
36 |   }
37 | 
38 |   x = -10, y = 20;
39 |   x=x*3/2;
40 |   total = x + y;
41 | }
42 | 
43 | // Errors:
44 | //
45 | 


--------------------------------------------------------------------------------
/Project-2/testcases/test-case-5.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 2
 5 | 
 6 | Test Case 5
 7 | 
 8 | - Program file with no errors. This should complete parsing successfully
 9 | 
10 | */
11 | 
12 | 
13 | int main()
14 | {
15 |   unsigned int a = 0x0f;
16 |   long int mylong = 123456l;
17 |   long int i, j;
18 | 
19 |   for(i=0; i < 10; i++)
20 |   {
21 |     for(j=10; j > 0; j--)
22 | 	{
23 | 		printf("%d",i);
24 |     }
25 |   }
26 | 
27 |   diff = x - y;
28 |   int rem = x % y;
29 |   printf ("Total = %d \n", total);
30 | 
31 |   int result = IncreaseBy10(x);
32 | }
33 | 
34 | int IncreaseBy10(int x)
35 | {
36 | 	return x + 10;
37 | }
38 | 


--------------------------------------------------------------------------------
/Project-2/testcases/test-case-6.c:
--------------------------------------------------------------------------------
 1 | int main()
 2 | {
 3 |   int c = 0,d,e,f;
 4 | 
 5 |   c = c + 1;  // c = 1
 6 |   d = c * 5;  // d = 5
 7 |   e = d / 4;  // e = 1
 8 |   f = d % 3;  // f = 2
 9 |   f = f / 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/Project-3/README.md:
--------------------------------------------------------------------------------
 1 | # Compiler Design Project-3
 2 | 
 3 | ## Semantic Analyser for a subset of the C language
 4 | This code is built upon the parser designed in Project-2 with some modifications to the earlier structure.
 5 | 
 6 | ### Team Members
 7 |  - Karthik M (15CO221)
 8 |  - Kaushik S Kalmady (15CO222)
 9 | 
10 | ### Installation and running
11 |  1. Please make sure you have Lex/Flex and Yacc/Bison installed
12 |  2. Next run `$ chmod +x compile` 
13 |  3. `$ ./compile`
14 |  4. To run the parser `$./semantic_analyser test-file.c`
15 | 
16 | 
17 | ### FEATURES
18 |  - Type checking in arithmetic expressions
19 |  - Type checking in relational and logical expressions
20 |  - Type checking in assignment expressions
21 |  - Check of array index out of bound
22 |  - Matching type in return statement in function
23 |  - Checking matching function parameter type during function call
24 |  - Checking number of parameters passed during function call
25 | 


--------------------------------------------------------------------------------
/Project-3/compile:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | lex lexer.l
3 | yacc -d parser.y -v
4 | gcc -w -g y.tab.c -ly -ll -o semantic_analyser
5 | 


--------------------------------------------------------------------------------
/Project-3/lexer.l:
--------------------------------------------------------------------------------
 1 | %{
 2 | 
 3 | #include <stdlib.h>
 4 | #include <stdio.h>
 5 | #include <limits.h>
 6 | 
 7 | #include "y.tab.h"
 8 | entry_t** constant_table;
 9 | 
10 | int cmnt_strt = 0;
11 | 
12 | %}
13 | 
14 | letter [a-zA-Z]
15 | digit [0-9]
16 | ws  [ \t\r\f\v]+
17 | identifier (_|{letter})({letter}|{digit}|_){0,31}
18 | hex [0-9a-f]
19 | 
20 |  /* Exclusive states */
21 | %x CMNT
22 | 
23 | %%
24 |   /* Keywords*/
25 | "int"                             {return INT;}
26 | "char"                            {return CHAR;}
27 | "float"                           {return FLOAT;}
28 | "void"                            {return VOID;}
29 | "long"                            {return LONG;}
30 | "long long"                       {return LONG_LONG;}
31 | "short"                           {return SHORT;}
32 | "signed"                          {return SIGNED;}
33 | "unsigned"                        {return UNSIGNED;}
34 | "for"                             {return FOR;}
35 | "while"                           {return WHILE;}
36 | "break"                           {return BREAK;}
37 | "continue"                        {return CONTINUE;}
38 | "if"                              {return IF;}
39 | "else"                            {return ELSE;}
40 | "return"                          {return RETURN;}
41 | 
42 | {identifier}                      {return  IDENTIFIER;}
43 | {ws}                              ;
44 | [+\-]?[0][x|X]{hex}+[lLuU]?       { yylval.entry = insert(constant_table,yytext,(int)strtol(yytext, NULL, 16),INT); return  HEX_CONSTANT;}
45 | '({letter}|{digit})'              { yylval.entry = insert(constant_table,yytext,yytext[1],CHAR); return CHAR_CONSTANT;}
46 | [+\-]?{digit}+[lLuU]?             { yylval.entry = insert(constant_table,yytext,atoi(yytext),INT); return  DEC_CONSTANT;}
47 | [+\-]?{digit}*\.{digit}+          { yylval.entry = insert(constant_table,yytext,atof(yytext),FLOAT); return FLOAT_CONSTANT;}
48 | 
49 | "/*"                              {cmnt_strt = yylineno; BEGIN CMNT;}
50 | <CMNT>.|{ws}                      ;
51 | <CMNT>\n                          {yylineno++;}
52 | <CMNT>"*/"                        {BEGIN INITIAL;}
53 | <CMNT>"/*"                        {printf("Line %3d: Nested comments are not valid!\n",yylineno);}
54 | <CMNT><<EOF>>                     {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();}
55 | 
56 | "//".*                            ;
57 | 
58 | \"[^\"\n]*\"                      {
59 |                                     if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */
60 |                                     {
61 |                                       yyless(yyleng-1);       /* push the quote back if it was escaped */
62 |                                       yymore();
63 |                                     }
64 |                                     else
65 |                                     {
66 |                                       insert(constant_table,yytext,INT_MAX,STRING);
67 |                                       return STRING;
68 |                                     }
69 |                                   }
70 | 
71 | \"[^\"\n]*$                     {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);}
72 | {digit}+({letter}|_)+	          {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);}
73 | \n                              {yylineno++;}
74 | 
75 | "--"			                {return DECREMENT;}
76 | "++"			                {return INCREMENT;}
77 | "+="                      {return ADD_ASSIGN;}
78 | "-="                      {return SUB_ASSIGN;}
79 | "*="                      {return MUL_ASSIGN;}
80 | "/="                      {return DIV_ASSIGN;}
81 | "%="                      {return MOD_ASSIGN;}
82 | 
83 | 
84 | "&&"			                {return LOGICAL_AND;}
85 | "||"			                {return LOGICAL_OR;}
86 | "<="			                {return LS_EQ;}
87 | ">="			                {return GR_EQ;}
88 | "=="			                {return EQ;}
89 | "!="		                  {return NOT_EQ;}
90 | 
91 | .                         {return yytext[0];}
92 | 
93 | %%
94 | 


--------------------------------------------------------------------------------
/Project-3/parser.y:
--------------------------------------------------------------------------------
  1 | %{
  2 | 	#include <stdlib.h>
  3 | 	#include <stdio.h>
  4 | 	int yyerror(char *msg);
  5 | 
  6 | 	#include "symboltable.h"
  7 | 	#include "lex.yy.c"
  8 | 
  9 | 	#define SYMBOL_TABLE symbol_table_list[current_scope].symbol_table
 10 | 
 11 |   extern entry_t** constant_table;
 12 | 
 13 | 	int current_dtype;
 14 | 
 15 | 	table_t symbol_table_list[NUM_TABLES];
 16 | 
 17 | 	int is_declaration = 0;
 18 | 	int is_loop = 0;
 19 | 	int is_func = 0;
 20 | 	int func_type;
 21 | 
 22 | 	int param_list[10];
 23 | 	int p_idx = 0;
 24 | 	int p=0;
 25 |     int rhs = 0;
 26 | 
 27 | 	void type_check(int,int,int);
 28 | %}
 29 | 
 30 | %union
 31 | {
 32 | 	int data_type;
 33 | 	entry_t* entry;
 34 | }
 35 | 
 36 | %token <entry> IDENTIFIER
 37 | 
 38 |  /* Constants */
 39 | %token <entry> DEC_CONSTANT HEX_CONSTANT CHAR_CONSTANT FLOAT_CONSTANT
 40 | %token STRING
 41 | 
 42 |  /* Logical and Relational operators */
 43 | %token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ
 44 | 
 45 |  /* Short hand assignment operators */
 46 | %token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN
 47 | %token INCREMENT DECREMENT
 48 | 
 49 |  /* Data types */
 50 | %token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST VOID CHAR FLOAT
 51 | 
 52 |  /* Keywords */
 53 | %token IF FOR WHILE CONTINUE BREAK RETURN
 54 | 
 55 | %type <entry> identifier
 56 | %type <entry> constant
 57 | %type <entry> array_index
 58 | 
 59 | %type <data_type> sub_expr
 60 | %type <data_type> unary_expr
 61 | %type <data_type> arithmetic_expr
 62 | %type <data_type> assignment_expr
 63 | %type <data_type> function_call
 64 | %type <data_type> array_access
 65 | %type <data_type> lhs
 66 | 
 67 | %left ','
 68 | %right '='
 69 | %left LOGICAL_OR
 70 | %left LOGICAL_AND
 71 | %left EQ NOT_EQ
 72 | %left '<' '>' LS_EQ GR_EQ
 73 | %left '+' '-'
 74 | %left '*' '/' '%'
 75 | %right '!'
 76 | 
 77 | 
 78 | %nonassoc UMINUS
 79 | %nonassoc LOWER_THAN_ELSE
 80 | %nonassoc ELSE
 81 | 
 82 | 
 83 | %%
 84 | 
 85 |  /* Program is made up of multiple builder blocks. */
 86 | starter: starter builder
 87 | 			 | builder;
 88 | 
 89 |  /* Each builder block is either a function or a declaration */
 90 | builder: function
 91 | 			 | declaration
 92 | 			 ;
 93 | 
 94 |  /* This is how a function looks like */
 95 | function: type
 96 | 					identifier 											{
 97 | 																						func_type = current_dtype;
 98 | 																						is_declaration = 0;
 99 | 																						current_scope = create_new_scope();
100 | 																					}
101 | 
102 | 					'(' argument_list ')' 					{
103 | 																						is_declaration = 0;
104 | 																						fill_parameter_list($2,param_list,p_idx);
105 | 																						p_idx = 0;
106 | 																						is_func = 1;
107 | 																						p=1;
108 | 																					}
109 | 
110 | 					compound_stmt										{
111 | 																						is_func = 0;
112 | 																					}
113 |           ;
114 |  /* Now we will define a grammar for how types can be specified */
115 | 
116 | type : data_type pointer
117 |      {is_declaration = 1; }
118 |      | data_type
119 |      {is_declaration = 1; }
120 | 		 ;
121 | 
122 | pointer: '*' pointer
123 |     	 | '*'
124 |        ;
125 | 
126 | data_type : sign_specifier type_specifier
127 |     			| type_specifier
128 |     			;
129 | 
130 | sign_specifier : SIGNED
131 |     					 | UNSIGNED
132 |     			 		 ;
133 | 
134 | type_specifier :INT                    {current_dtype = INT;}
135 |     |SHORT INT                         {current_dtype = SHORT;}
136 |     |SHORT                             {current_dtype = SHORT;}
137 |     |LONG                              {current_dtype = LONG;}
138 | 		|LONG INT                          {current_dtype = LONG;}
139 |     |LONG_LONG                         {current_dtype = LONG_LONG;}
140 |     |LONG_LONG INT                     {current_dtype = LONG_LONG;}
141 | 		|CHAR 														 {current_dtype = CHAR;}
142 | 		|FLOAT 														 {current_dtype = FLOAT;}
143 | 		|VOID															 {current_dtype = VOID;}
144 |     ;
145 | 
146 |  /* grammar rules for argument list */
147 |  /* argument list can be empty */
148 | argument_list : arguments
149 |     					|
150 |     					;
151 |  /* arguments are comma separated TYPE ID pairs */
152 | arguments : arguments ',' arg
153 |     			| arg
154 |     			;
155 | 
156 |  /* Each arg is a TYPE ID pair */
157 | arg : type identifier									{param_list[p_idx++] = $2->data_type;}
158 |     ;
159 | 
160 |  /* Generic statement. Can be compound or a single statement */
161 | stmt:compound_stmt
162 |     |single_stmt
163 |     ;
164 | 
165 |  /* The function body is covered in braces and has multiple statements. */
166 | compound_stmt :
167 | 							'{' 							{
168 | 																		if(!p)current_scope = create_new_scope();
169 | 																		else p = 0;
170 | 																}
171 | 
172 | 							statements
173 | 
174 | 							'}' 						{current_scope = exit_scope();}
175 |     ;
176 | 
177 | statements:statements stmt
178 |     |
179 |     ;
180 | 
181 |  /* Grammar for what constitutes every individual statement */
182 | single_stmt :if_block
183 |     |for_block
184 |     |while_block
185 |     |declaration
186 |     |function_call ';'
187 | 		|RETURN ';'								  {
188 | 																	if(is_func)
189 | 																	{
190 | 																		if(func_type != VOID)
191 | 																			yyerror("return type (VOID) does not match function type");
192 | 																	}
193 | 																  else yyerror("return statement not inside function definition");
194 | 																}
195 | 
196 | 		|CONTINUE ';'							 {if(!is_loop) {yyerror("Illegal use of continue");}}
197 | 		|BREAK ';'                 {if(!is_loop) {yyerror("Illegal use of break");}}
198 | 
199 | 		|RETURN sub_expr ';'			 {
200 | 																	if(is_func)
201 | 																	{
202 | 																		if(func_type != $2)
203 | 																			yyerror("return type does not match function type");
204 | 																	}
205 | 																	else yyerror("return statement not in function definition");
206 | 															 }
207 |     ;
208 | 
209 | for_block:FOR '(' expression_stmt  expression_stmt ')' {is_loop = 1;} stmt {is_loop = 0;}
210 |     		 |FOR '(' expression_stmt expression_stmt expression ')' {is_loop = 1;} stmt {is_loop = 0;}
211 |     		 ;
212 | 
213 | if_block:IF '(' expression ')' stmt 								%prec LOWER_THAN_ELSE
214 | 				|IF '(' expression ')' stmt ELSE stmt
215 |     ;
216 | 
217 | while_block: WHILE '(' expression	')' {is_loop = 1;} stmt {is_loop = 0;}
218 | 		;
219 | 
220 | declaration: type  declaration_list ';'
221 |            {is_declaration = 0; }
222 | 					 | declaration_list ';'
223 | 					 | unary_expr ';'
224 | 
225 | 
226 | declaration_list: declaration_list ',' sub_decl
227 | 								|sub_decl
228 | 								;
229 | 
230 | sub_decl: assignment_expr
231 |     		|identifier
232 |     		|array_access
233 | 				;
234 | 
235 | /* This is because we can have empty expession statements inside for loops */
236 | expression_stmt: expression ';'
237 |     					 | ';'
238 |     			 		 ;
239 | 
240 | expression: expression ',' sub_expr
241 |     			| sub_expr
242 | 					;
243 | 
244 | sub_expr:
245 |     sub_expr '>' sub_expr															  {type_check($1,$3,2); $$ = $1;}
246 |     |sub_expr '<' sub_expr															{type_check($1,$3,2); $$ = $1;}
247 |     |sub_expr EQ sub_expr																{type_check($1,$3,2); $$ = $1;}
248 |     |sub_expr NOT_EQ sub_expr														{type_check($1,$3,2); $$ = $1;}
249 |     |sub_expr LS_EQ sub_expr														{type_check($1,$3,2); $$ = $1;}
250 |     |sub_expr GR_EQ sub_expr														{type_check($1,$3,2); $$ = $1;}
251 | 		|sub_expr LOGICAL_AND sub_expr											{type_check($1,$3,2); $$ = $1;}
252 | 		|sub_expr LOGICAL_OR sub_expr												{type_check($1,$3,2); $$ = $1;}
253 | 		|'!' sub_expr																				{$$ = $2;}
254 | 		|arithmetic_expr																		{$$ = $1;}
255 |     |assignment_expr																		{$$ = $1;}
256 | 		|unary_expr																					{$$ = $1;}
257 |     ;
258 | 
259 | 
260 | assignment_expr :
261 | 		lhs assign_op  arithmetic_expr												{type_check($1,$3,1); $$ = $3; rhs=0;}
262 |     |lhs assign_op  array_access													{type_check($1,$3,1); $$ = $3;rhs=0;}
263 |     |lhs assign_op  function_call												{type_check($1,$3,1); $$ = $3;rhs=0;}
264 | 	|lhs assign_op  unary_expr                                                  {type_check($1,$3,1); $$ = $3;rhs=0;}
265 | 		|unary_expr assign_op  unary_expr										{type_check($1,$3,1); $$ = $3;rhs=0;}
266 |     ;
267 | 
268 | unary_expr:	identifier INCREMENT												{$$ = $1->data_type;}
269 | 					| identifier DECREMENT												{$$ = $1->data_type;}
270 | 					| DECREMENT identifier												{$$ = $2->data_type;}
271 | 					| INCREMENT identifier												{$$ = $2->data_type;}
272 | 
273 | lhs: identifier																					{$$ = $1->data_type;}
274 |    | array_access																				{$$ = $1;}
275 | 	 ;
276 | 
277 | identifier:IDENTIFIER                                    {
278 |                                                                     if(is_declaration
279 |                                                                     && !rhs) 
280 |                                                                     {
281 |                                                                         $1 = insert(SYMBOL_TABLE,yytext,INT_MAX,current_dtype);
282 |                                                                         if($1 == NULL) yyerror("Redeclaration of variable");
283 |                                                                     }
284 |                                                                     else
285 |                                                                     {
286 |                                                                         $1 = search_recursive(yytext);
287 |                                                                         if($1 == NULL) yyerror("Variable not declared");
288 |                                                                     }
289 |                                                                     $$ = $1;
290 |                                                             }
291 |     			 ;
292 | 
293 | assign_op:'=' {rhs=1;}
294 |     |ADD_ASSIGN {rhs=1;} 
295 |     |SUB_ASSIGN {rhs=1;}
296 |     |MUL_ASSIGN {rhs=1;}
297 |     |DIV_ASSIGN {rhs=1;}
298 |     |MOD_ASSIGN {rhs=1;}
299 |     ;
300 | 
301 | arithmetic_expr: arithmetic_expr '+' arithmetic_expr				{type_check($1,$3,0);}
302 |     |arithmetic_expr '-' arithmetic_expr										{type_check($1,$3,0);}
303 |     |arithmetic_expr '*' arithmetic_expr										{type_check($1,$3,0);}
304 |     |arithmetic_expr '/' arithmetic_expr										{type_check($1,$3,0);}
305 | 		|arithmetic_expr '%' arithmetic_expr										{type_check($1,$3,0);}
306 | 		|'(' arithmetic_expr ')'																{$$ = $2;}
307 |     |'-' arithmetic_expr %prec UMINUS												{$$ = $2;}
308 |     |identifier																							{$$ = $1->data_type;}
309 |     |constant																								{$$ = $1->data_type;}
310 |     ;
311 | 
312 | constant: DEC_CONSTANT 												{$1->is_constant=1; $$ = $1;}
313 |     | HEX_CONSTANT														{$1->is_constant=1; $$ = $1;}
314 | 		| CHAR_CONSTANT														{$1->is_constant=1; $$ = $1;}
315 | 		| FLOAT_CONSTANT													{$1->is_constant=1; $$ = $1;}
316 |     ;
317 | 
318 | array_access: identifier '[' array_index ']'								{
319 | 																															if(is_declaration)
320 | 																															{
321 | 																																if($3->value <= 0)
322 | 																																	yyerror("size of array is not positive");
323 | 
324 | 																																else
325 |                                                                                                                                 if($3->is_constant && !rhs)
326 | 																																	$1->array_dimension = $3->value;
327 | 																																	else if(rhs){
328 | 																																	{
329 | 																																if($3->value > $1->array_dimension)
330 | 																																	yyerror("Array index out of bound");
331 | 
332 | 																																if($3->value < 0)
333 | 																																	yyerror("Array index cannot be negative");
334 | 																															}
335 | 																															}
336 | 																															}
337 | 
338 | 																															else if($3->is_constant)
339 | 																															{
340 | 																																if($3->value > $1->array_dimension)
341 | 																																	yyerror("Array index out of bound");
342 | 
343 | 																																if($3->value < 0)
344 | 																																	yyerror("Array index cannot be negative");
345 | 																															}
346 | 																															$$ = $1->data_type;
347 | 																														}
348 | 
349 | array_index: constant																		{$$ = $1;}
350 | 					 | identifier																	{$$ = $1;}
351 | 					 ;
352 | 
353 | function_call: identifier '(' parameter_list ')'				{
354 | 																													$$ = $1->data_type;
355 | 																													check_parameter_list($1,param_list,p_idx);
356 | 																													p_idx = 0;
357 | 																												}
358 | 
359 |              | identifier '(' ')'												{
360 | 							 																						 $$ = $1->data_type;
361 | 																													 check_parameter_list($1,param_list,p_idx);
362 | 																													 p_idx = 0;
363 | 																												}
364 |              ;
365 | 
366 | parameter_list:
367 |               parameter_list ','  parameter
368 |               |parameter
369 |               ;
370 | 
371 | parameter: sub_expr																			{param_list[p_idx++] = $1;}
372 | 				 | STRING																				{param_list[p_idx++] = STRING;}
373 | 				 ;
374 | %%
375 | 
376 | void type_check(int left, int right, int flag)
377 | {
378 | 	if(left != right)
379 | 	{
380 | 		switch(flag)
381 | 		{
382 | 			case 0: yyerror("Type mismatch in arithmetic expression"); break;
383 | 			case 1: yyerror("Type mismatch in assignment expression"); break;
384 | 			case 2: yyerror("Type mismatch in logical expression"); break;
385 | 		}
386 | 	}
387 | }
388 | 
389 | int main(int argc, char *argv[])
390 | {
391 | 	 int i;
392 | 	 for(i=0; i<NUM_TABLES;i++)
393 | 	 {
394 | 	  symbol_table_list[i].symbol_table = NULL;
395 | 	  symbol_table_list[i].parent = -1;
396 | 	 }
397 | 
398 | 	constant_table = create_table();
399 |   symbol_table_list[0].symbol_table = create_table();
400 | 	yyin = fopen(argv[1], "r");
401 | 
402 | 	if(!yyparse())
403 | 	{
404 | 		printf("\nPARSING COMPLETE\n\n\n");
405 | 	}
406 | 	else
407 | 	{
408 | 			printf("\nPARSING FAILED!\n\n\n");
409 | 	}
410 | 
411 | 
412 | 	printf("SYMBOL TABLES\n\n");
413 | 	display_all();
414 | 
415 | 	printf("CONSTANT TABLE");
416 | 	display_constant_table(constant_table);
417 | 
418 | 
419 | 	fclose(yyin);
420 | 	return 0;
421 | }
422 | 
423 | int yyerror(char *msg)
424 | {
425 | 	printf("Line no: %d Error message: %s Token: %s\n", yylineno, msg, yytext);
426 | 	exit(0);
427 | }
428 | 


--------------------------------------------------------------------------------
/Project-3/symboltable.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Compiler Design Project 3 : Semantic Analyser
  3 | *
  4 | * File        : symboltable.h
  5 | *
  6 | * Authors     : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222
  7 | * Date        : 11-3-2018
  8 | */
  9 | 
 10 | 
 11 | #include <stdint.h>
 12 | #include <stdlib.h>
 13 | #include <stdio.h>
 14 | #include <limits.h>
 15 | #include <string.h>
 16 | 
 17 | #define HASH_TABLE_SIZE 100
 18 | #define NUM_TABLES 10
 19 | 
 20 | int table_index = 0;
 21 | int current_scope = 0;
 22 | 
 23 | /* struct to hold each entry */
 24 | struct entry_s
 25 | {
 26 | 	char* lexeme;
 27 | 	double value;
 28 | 	int data_type;
 29 | 	int* parameter_list; // for functions
 30 | 	int array_dimension;
 31 | 	int is_constant;
 32 | 	int num_params;
 33 | 	struct entry_s* successor;
 34 | };
 35 | 
 36 | typedef struct entry_s entry_t;
 37 | 
 38 | /* Wrapper for symbol table with pointer to symbol table of parent scope */
 39 | struct table_s
 40 | {
 41 | 	entry_t** symbol_table;
 42 | 	int parent;
 43 | };
 44 | 
 45 | typedef struct table_s table_t;
 46 | 
 47 | extern table_t symbol_table_list[NUM_TABLES];
 48 | 
 49 | /* Create a new hash_table. */
 50 | entry_t** create_table()
 51 | {
 52 | 	entry_t** hash_table_ptr = NULL; // declare a pointer
 53 | 
 54 | 	/* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */
 55 | 	if( ( hash_table_ptr = malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL )
 56 |     	return NULL;
 57 | 
 58 | 	int i;
 59 | 
 60 | 	// Intitialise all entries as NUscopeLL
 61 |     for( i = 0; i < HASH_TABLE_SIZE; i++ )
 62 | 	{
 63 | 		hash_table_ptr[i] = NULL;
 64 | 	}
 65 | 
 66 | 	return hash_table_ptr;
 67 | }
 68 | 
 69 | int create_new_scope()
 70 | {
 71 | 	table_index++;
 72 | 
 73 | 	symbol_table_list[table_index].symbol_table = create_table();
 74 | 	symbol_table_list[table_index].parent = current_scope;
 75 | 
 76 | 	return table_index;
 77 | }
 78 | 
 79 | int exit_scope()
 80 | {
 81 | 	return symbol_table_list[current_scope].parent;
 82 | }
 83 | /* Generate hash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */
 84 | uint32_t hash( char *lexeme )
 85 | {
 86 | 	size_t i;
 87 | 	uint32_t hash;
 88 | 
 89 | 	/* Apply jenkin's hash function
 90 | 	* https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time
 91 | 	*/
 92 | 	for ( hash = i = 0; i < strlen(lexeme); ++i ) {
 93 |         hash += lexeme[i];
 94 |         hash += ( hash << 10 );
 95 |         hash ^= ( hash >> 6 );
 96 |     }
 97 | 	hash += ( hash << 3 );
 98 | 	hash ^= ( hash >> 11 );
 99 |     hash += ( hash << 15 );
100 | 
101 | 	return hash % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE)
102 | }
103 | 
104 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */
105 | entry_t *create_entry( char *lexeme, int value, int data_type )
106 | {
107 | 	entry_t *new_entry;
108 | 
109 | 	/* Allocate space for new_entry */
110 | 	if( ( new_entry = malloc( sizeof( entry_t ) ) ) == NULL ) {
111 | 		return NULL;
112 | 	}
113 | 	/* Copy lexeme to new_entry location using strdup (string-duplicate). Return NULL if it fails */
114 | 	if( ( new_entry->lexeme = strdup( lexeme ) ) == NULL ) {
115 | 		return NULL;
116 | 	}
117 | 
118 | 	new_entry->value = value;
119 | 	new_entry->successor = NULL;
120 | 	new_entry->parameter_list = NULL;
121 | 	new_entry->array_dimension = -1;
122 | 	new_entry->is_constant = 0;
123 | 	new_entry->num_params = 0;
124 | 	new_entry->data_type = data_type;
125 | 
126 | 	return new_entry;
127 | }
128 | 
129 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */
130 | entry_t* search(entry_t** hash_table_ptr, char* lexeme)
131 | {
132 | 	uint32_t idx = 0;
133 | 	entry_t* myentry;
134 | 
135 |     // get the index of this lexeme as per the hash function
136 | 	idx = hash( lexeme );
137 | 
138 | 	/* Traverse the linked list at this idx and see if lexeme exists */
139 | 	myentry = hash_table_ptr[idx];
140 | 
141 | 	while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 )
142 | 	{
143 | 		myentry = myentry->successor;
144 | 	}
145 | 
146 | 	if(myentry == NULL) // lexeme is not found
147 | 		return NULL;
148 | 
149 | 	else // lexeme found
150 | 		return myentry;
151 | 
152 | }
153 | 
154 | // Search recursively in every parent scope for lexeme
155 | entry_t* search_recursive(char* lexeme)
156 | {
157 | 	int idx = current_scope;
158 | 	entry_t* finder = NULL;
159 | 
160 | 	while(idx != -1)
161 | 	{
162 | 		finder = search(symbol_table_list[idx].symbol_table, lexeme);
163 | 
164 | 		if(finder != NULL)
165 | 			return finder;
166 | 
167 | 		idx = symbol_table_list[idx].parent;
168 | 	}
169 | 
170 | 	return finder;
171 | }
172 | /* Insert an entry into a hash table. */
173 | entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value, int data_type)
174 | {
175 | 	// Make sure you pass the current scope symbol table here
176 | 	entry_t* finder = search( hash_table_ptr, lexeme );
177 | 	if( finder != NULL) // If lexeme already exists, don't insert, return NULL
178 | 	{
179 | 		if(finder->is_constant)
180 | 			return finder;
181 | 		return NULL; //capture this is callee code and do necessary error handling
182 | 	}
183 | 
184 | 	uint32_t idx;
185 | 	entry_t* new_entry = NULL;
186 | 	entry_t* head = NULL;
187 | 
188 | 	idx = hash( lexeme ); // Get the index for this lexeme based on the hash function
189 | 	new_entry = create_entry( lexeme, value, data_type ); // Create an entry using the <lexeme, token> pair
190 | 
191 | 	if(new_entry == NULL) // In case there was some error while executing create_entry()
192 | 	{
193 | 		printf("Insert failed. New entry could not be created.");
194 | 		exit(1);
195 | 	}
196 | 
197 | 	head = hash_table_ptr[idx]; // get the head entry at this index
198 | 
199 | 	if(head == NULL) // This is the first lexeme that matches this hash index
200 | 	{
201 | 		hash_table_ptr[idx] = new_entry;
202 | 	}
203 | 	else // if not, add this entry to the head
204 | 	{
205 | 		new_entry->successor = hash_table_ptr[idx];
206 | 		hash_table_ptr[idx] = new_entry;
207 | 	}
208 | 	return hash_table_ptr[idx];
209 | }
210 | 
211 | // This is called after a function call to check if param list match
212 | int check_parameter_list(entry_t* entry, int* list, int m)
213 | {
214 | 	int* parameter_list = entry->parameter_list;
215 | 
216 | 	if(m != entry->num_params)
217 | 	{
218 | 		yyerror("Number of parameters and arguments do not match");
219 | 	}
220 | 
221 | 	int i;
222 | 	for(i=0; i<m; i++)
223 | 	{
224 | 		if(list[i] != parameter_list[i])
225 | 		yyerror("Parameter and argument types do not match");
226 | 	}
227 | 
228 | 	return 1;
229 | }
230 | 
231 | void fill_parameter_list(entry_t* entry, int* list, int n)
232 | {
233 | 	entry->parameter_list = (int *)malloc(n*sizeof(int));
234 | 
235 | 	int i;
236 | 	for(i=0; i<n; i++)
237 | 	{
238 | 		entry->parameter_list[i] = list[i];
239 | 	}
240 | 	entry->num_params = n;
241 | }
242 | 
243 | 
244 | void print_dashes(int n)
245 | {
246 |   printf("\n");
247 | 
248 | 	int i;
249 | 	for(i=0; i< n; i++)
250 | 	printf("=");
251 | 	printf("\n");
252 | }
253 | 
254 | // Traverse the hash table and print all the entries
255 | void display_symbol_table(entry_t** hash_table_ptr)
256 | {
257 | 	int i;
258 | 	entry_t* traverser;
259 | 
260 | 	print_dashes(100);
261 | 
262 |   printf(" %-20s %-20s %-20s %-20s %-20s\n","lexeme","data-type","array_dimension","num_params","param_list");
263 | 
264 | 	print_dashes(100);
265 | 
266 | 	for( i=0; i < HASH_TABLE_SIZE; i++)
267 | 	{
268 | 		traverser = hash_table_ptr[i];
269 | 		while( traverser != NULL)
270 | 		{
271 | 			printf(" %-20s %-20d %-20d ", traverser->lexeme, traverser->data_type, traverser->array_dimension);
272 | 
273 | 			printf(" %-20d", traverser->num_params);
274 | 
275 | 			int j;
276 | 			for(j=0; j < traverser->num_params; j++)
277 | 			printf(" %d",traverser->parameter_list[j]);
278 | 			printf("\n");
279 | 
280 | 			traverser = traverser->successor;
281 | 		}
282 | 	}
283 | 
284 | 	print_dashes(100);
285 | 
286 | }
287 | 
288 | void display_constant_table(entry_t** hash_table_ptr)
289 | {
290 | 	int i;
291 | 	entry_t* traverser;
292 | 
293 | 	print_dashes(25);
294 | 
295 | 	printf(" %-10s %-10s \n","lexeme","data-type");
296 | 
297 | 	print_dashes(25);
298 | 
299 | 	for( i=0; i < HASH_TABLE_SIZE; i++)
300 | 	{
301 | 		traverser = hash_table_ptr[i];
302 | 		while( traverser != NULL)
303 | 		{
304 | 			printf(" %-10s %-10d \n", traverser->lexeme, traverser->data_type);
305 | 			traverser = traverser->successor;
306 | 		}
307 | 	}
308 | 
309 | 	print_dashes(25);
310 | }
311 | 
312 | void display_all()
313 | {
314 | 		int i;
315 | 		for(i=0; i<=table_index; i++)
316 | 		{
317 | 			printf("Scope: %d\n",i);
318 | 			display_symbol_table(symbol_table_list[i].symbol_table);
319 | 			printf("\n\n");
320 | 		}
321 | }
322 | 


--------------------------------------------------------------------------------
/Project-3/test.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  */
 3 | 
 4 | 
 5 | /*
 6 | Karthik M - 15CO221
 7 | Kaushik S Kalmady - 15CO222
 8 | Compiler Design Project 2
 9 | 
10 | Test Case 1
11 | - Missing semicolon
12 | - Single quotes for characters and double quotes for strings
13 | 
14 | The output in lex/yacc should identify statements without a terminating semiclon and unmatched single and double quotes
15 | */
16 | 
17 | int fun(int a, int b, int c)
18 | {
19 |   return 2 ;
20 | }
21 | 
22 | int main()
23 | {
24 | 
25 |   // int arr[-1];
26 |   // int arr1[0];
27 |   // int arr2[-1];
28 |   // break;
29 |   // arr[0] = 4;
30 |   // arr[-1] = 10;
31 |   // arr[11] = 2;
32 | 
33 |   int x = 1;
34 | 
35 |   int y;
36 |   int z;
37 | 
38 |   if(x<0)
39 |   {
40 |     int y;
41 | 
42 |     int z=1;
43 | 
44 |     {
45 |       int w =1;
46 |     }
47 |     //w=3;
48 |   }
49 |   char c='a';
50 |   for(x=2;x<y;x++)
51 |   {
52 |     break;
53 |   }
54 |   return 3;
55 | 
56 |   // fun();
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/Project-3/testcases/test-case-1.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 3
 5 | 
 6 | Test Case 1
 7 | 
 8 | Redeclaration of variables
 9 | Undeclared variables
10 | */
11 | 
12 | 
13 | int main()
14 | {
15 |     int x;
16 |     int y;
17 |     
18 |     y = 1;
19 |     int x; //Redeclaration error
20 |     if(y > 0){
21 |         int x; //This is fine
22 |     }
23 |     
24 |     if(y==1){
25 |         int x; // This is also fine
26 |         y = 2; // This too
27 |         int z;
28 |     }
29 | 
30 |     /*z = 2; //Variable not declared in scope*/
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/Project-3/testcases/test-case-2.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 3
 5 | 
 6 | Test Case 3
 7 | Check for invalid array dimensions
 8 | Check for array index out of range
 9 | In declaration list with assign statement differentiate lhs and rhs(i.e lhs should be checked for redeclaraion but rhs shouldn't.
10 | In "int a=b, c=d;" a,c have to be checked for redeclarion, b,d have to checked if they have been declared at all.)
11 | */
12 | 
13 | 
14 | int main()
15 | {
16 |     /*int arr[0]; //Invalid*/
17 |     /*int arr1[-1]; //Invalid*/
18 |     int arr2[10];
19 | 
20 |     int x = arr2[5],  y = arr2[9]; //Valid
21 |     int w = arr2[4.5];
22 |     int z = arr2[11];
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/Project-3/testcases/test-case-3.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 3
 5 | 
 6 | Test Case 3
 7 | LHS and RHS types should match in assignment expressions
 8 | Types need to match in arithmetic/relational and logical expressions too
 9 | */
10 | 
11 | 
12 | int main()
13 | {
14 |     int x;
15 |     float y;
16 |     char z;
17 | 
18 |     x = 1; //Valid
19 |     /*y = 'c'; //Invalid*/
20 |     z ='2'; //valid
21 | 
22 |     int c = 2;
23 |     int d;
24 |     d = x + c; //valid
25 |     /*d = z + c; // Invalid*/
26 | 
27 |     char a;
28 |     if(x < c){
29 |         /*a = c; //Invalid*/
30 |     }
31 |     return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/Project-3/testcases/test-case-4.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 3
 5 | 
 6 | Test Case 4
 7 | Invalid use of BREAK and CONTINUE
 8 | 
 9 | */
10 | 
11 | 
12 | int main()
13 | {
14 |     break; //invalid
15 | 
16 |     int i;
17 |     for(i=1; i<10; i++){
18 |         int x;
19 |         continue;
20 |     }
21 |     /*continue; //invalid*/
22 | }
23 | 


--------------------------------------------------------------------------------
/Project-3/testcases/test-case-5.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Karthik M - 15CO221
 3 | Kaushik S Kalmady - 15CO222
 4 | Compiler Design Project 3
 5 | 
 6 | Test Case 5
 7 | Check function parameter number and type
 8 | 
 9 | */
10 | 
11 | int fun(int i, char c){
12 |     return 2;
13 | }
14 | 
15 | int fun2(int i, char c){
16 |     /*return '2';*/
17 | }
18 | 
19 | int main()
20 | {
21 |     fun(2); //Number of parameters dont match
22 |     /*fun(2,2); //Parameter types dont match*/
23 |     fun(2,'3'); // Valid
24 |     /*fun(2,'3',4); //Invalid*/
25 | 
26 |     return 2;
27 | }
28 | 


--------------------------------------------------------------------------------
/Project-3/testcases/testcases:
--------------------------------------------------------------------------------
 1 | /* 
 2 | -lhs and rhs type not matching
 3 | - Arithmetic and logical expression type mismatch
 4 | -Return type and function type mismatch
 5 | -Array index out of range for constants
 6 | -Invalid array dimensions
 7 | - Break and continue outside for loop
 8 | -Scope
 9 | -Parameter type and number checking
10 | 
11 | */
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/Project-4/ICG.code:
--------------------------------------------------------------------------------
 1 | 0: main:
 2 | 1: arg argc
 3 | 2: i = 0
 4 | 3: if i < 10 goto 7
 5 | 4: goto 38
 6 | 5: i++
 7 | 6: goto 3
 8 | 7: x = 120
 9 | 8: i = 2
10 | 9: t0 = 2 + 3
11 | 10: if j < t0 goto 12
12 | 11: goto 37
13 | 12: i = 3
14 | 13: t1 = i / 6
15 | 14: t2 = 3 + t1
16 | 15: t3 = t2 + 2
17 | 16: t4 = t3 + 5
18 | 17: j = t4
19 | 18: j = arr[7]
20 | 19: if j < 5 goto 21
21 | 20: goto 23
22 | 21: if i == 3 goto 25
23 | 22: goto 23
24 | 23: if j != 5 goto 25
25 | 24: goto 36
26 | 25: j = 2
27 | 26: if 2 < 4 goto 28
28 | 27: goto 30
29 | 28: goto 37
30 | 29: goto 36
31 | 30: if 2 > 3 goto 32
32 | 31: goto 34
33 | 32: goto 9
34 | 33: goto 36
35 | 34: t5 = 4 + 5
36 | 35: i = t5
37 | 36: goto 9
38 | 37: goto 5
39 | 38: exit


--------------------------------------------------------------------------------
/Project-4/README.md:
--------------------------------------------------------------------------------
 1 | # Compiler Design Project-4
 2 | 
 3 | ## Intermediate Code Generation for a subset of the C language
 4 | This code is built upon the semantic analyser designed in Project-3 with some modifications to the earlier structure.
 5 | 
 6 | ### Team Members
 7 |  - Karthik M (15CO221)
 8 |  - Kaushik S Kalmady (15CO222)
 9 | 
10 | ### Installation and running
11 |  1. Please make sure you have Lex/Flex and Yacc/Bison installed
12 |  2. Next run `$ chmod +x compile` 
13 |  3. `$ ./compile`
14 |  4. To run the parser `$./icg test-file.c`
15 | 
16 | 
17 | ### FEATURES
18 |  - Backward compatibility with Semantic Analyser from Project-3
19 |  - Code generation for arithmetic expressions
20 |  - Backpatching for `if-else` statements and `nested if-else`
21 |  - Backpatching for `while` and `for` loops
22 |  - Array indexing
23 |  - Backpatching for logical amd relational expressions
24 |  - Jumps for `break` and `continue`


--------------------------------------------------------------------------------
/Project-4/compile:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | lex lexer.l
3 | yacc -d parser.y -v
4 | g++ -std=c++11 -g y.tab.c -ly -ll -o icg
5 | 


--------------------------------------------------------------------------------
/Project-4/icg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaushiksk/mini-c-compiler/f55e6a84a2e82a2a88957c41f626b0a1954c232d/Project-4/icg


--------------------------------------------------------------------------------
/Project-4/lexer.l:
--------------------------------------------------------------------------------
 1 | %{
 2 | 
 3 | #include <stdlib.h>
 4 | #include <stdio.h>
 5 | #include <limits.h>
 6 | 
 7 | #include "y.tab.h"
 8 | 
 9 | entry_t** constant_table;
10 | 
11 | int cmnt_strt = 0;
12 | 
13 | %}
14 | 
15 | letter [a-zA-Z]
16 | digit [0-9]
17 | ws  [ \t\r\f\v]+
18 | identifier (_|{letter})({letter}|{digit}|_){0,31}
19 | hex [0-9a-f]
20 | 
21 |  /* Exclusive states */
22 | %x CMNT
23 | 
24 | %%
25 |   /* Keywords*/
26 | "int"                             {return INT;}
27 | "char"                            {return CHAR;}
28 | "char*"                           {return CHAR_STAR;}
29 | "float"                           {return FLOAT;}
30 | "void"                            {return VOID;}
31 | "long"                            {return LONG;}
32 | "long long"                       {return LONG_LONG;}
33 | "short"                           {return SHORT;}
34 | "signed"                          {return SIGNED;}
35 | "unsigned"                        {return UNSIGNED;}
36 | "for"                             {return FOR;}
37 | "while"                           {return WHILE;}
38 | "break"                           {return BREAK;}
39 | "continue"                        {return CONTINUE;}
40 | "if"                              {return IF;}
41 | "else"                            {return ELSE;}
42 | "return"                          {return RETURN;}
43 | 
44 | {identifier}                      {return  IDENTIFIER;}
45 | {ws}                              ;
46 | [+\-]?[0][x|X]{hex}+[lLuU]?       { yylval.entry = insert(constant_table,yytext,(int)strtol(yytext, NULL, 16),INT); return  HEX_CONSTANT;}
47 | '({letter}|{digit})'              { yylval.entry = insert(constant_table,yytext,yytext[1],CHAR); return CHAR_CONSTANT;}
48 | [+\-]?{digit}+[lLuU]?             { yylval.entry = insert(constant_table,yytext,atoi(yytext),INT); return  DEC_CONSTANT;}
49 | [+\-]?{digit}*\.{digit}+          { yylval.entry = insert(constant_table,yytext,atof(yytext),FLOAT); return FLOAT_CONSTANT;}
50 | 
51 | "/*"                              {cmnt_strt = yylineno; BEGIN CMNT;}
52 | <CMNT>.|{ws}                      ;
53 | <CMNT>\n                          {yylineno++;}
54 | <CMNT>"*/"                        {BEGIN INITIAL;}
55 | <CMNT>"/*"                        {printf("Line %3d: Nested comments are not valid!\n",yylineno);}
56 | <CMNT><<EOF>>                     {printf("Line %3d: Unterminated comment\n", cmnt_strt); yyterminate();}
57 | 
58 | "//".*                            ;
59 | 
60 | \"[^\"\n]*\"                      {
61 |                                     if(yytext[yyleng-2]=='\\') /* check if it was an escaped quote */
62 |                                     {
63 |                                       yyless(yyleng-1);       /* push the quote back if it was escaped */
64 |                                       yymore();
65 |                                     }
66 |                                     else
67 |                                     {
68 |                                       yylval.entry = insert(constant_table,yytext,INT_MAX,STRING);
69 |                                       return STRING;
70 |                                     }
71 |                                   }
72 | 
73 | \"[^\"\n]*$                     {printf("Line %3d: Unterminated string %s\n",yylineno,yytext);}
74 | {digit}+({letter}|_)+	          {printf("Line %3d: Illegal identifier name %s\n",yylineno,yytext);}
75 | \n                              {yylineno++;}
76 | 
77 | "--"			                {return DECREMENT;}
78 | "++"			                {return INCREMENT;}
79 | "+="                      {return ADD_ASSIGN;}
80 | "-="                      {return SUB_ASSIGN;}
81 | "*="                      {return MUL_ASSIGN;}
82 | "/="                      {return DIV_ASSIGN;}
83 | "%="                      {return MOD_ASSIGN;}
84 | 
85 | 
86 | "&&"			                {return LOGICAL_AND;}
87 | "||"			                {return LOGICAL_OR;}
88 | "<="			                {return LS_EQ;}
89 | ">="			                {return GR_EQ;}
90 | "=="			                {return EQ;}
91 | "!="		                  {return NOT_EQ;}
92 | 
93 | .                         {return yytext[0];}
94 | 
95 | %%
96 | 


--------------------------------------------------------------------------------
/Project-4/parser.y:
--------------------------------------------------------------------------------
  1 | %{
  2 | 	#include <bits/stdc++.h>
  3 | 	#include "symboltable.h"
  4 | 	#include "lex.yy.c"
  5 | 
  6 | 	using namespace std;
  7 | 
  8 | 	int yyerror(char *msg);
  9 | 
 10 | 	#define SYMBOL_TABLE symbol_table_list[current_scope].symbol_table
 11 | 
 12 |   	extern entry_t** constant_table;
 13 | 
 14 | 	int current_dtype;
 15 | 
 16 | 	table_t symbol_table_list[NUM_TABLES];
 17 | 
 18 | 	int is_declaration = 0;
 19 | 	int is_loop = 0;
 20 | 	int is_func = 0;
 21 | 	int func_type;
 22 | 
 23 | 	int param_list[10];
 24 | 	int p_idx = 0;
 25 | 	int p=0;
 26 |   	int rhs = 0;
 27 | 
 28 | 	void type_check(int,int,int);
 29 | 	vector<int> merge(vector<int>& v1, vector<int>& v2);
 30 | 	void backpatch(vector<int>&, int);
 31 | 	void gencode(string);
 32 | 	void gencode_math(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op);
 33 | 	void gencode_rel(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op);
 34 | 	void printlist(vector<int>);
 35 | 
 36 | 	int nextinstr = 0;
 37 | 	int temp_var_number = 0;
 38 | 
 39 | 	vector<string> ICG;
 40 | 
 41 | %}
 42 | 
 43 | %union
 44 | {
 45 | 	int data_type;
 46 | 	entry_t* entry;
 47 | 	content_t* content;
 48 | 	string* op;
 49 | 	vector<int>* nextlist;
 50 | 	int instr;
 51 | }
 52 | 
 53 | %token <entry> IDENTIFIER
 54 | 
 55 |  /* Constants */
 56 | %token <entry> DEC_CONSTANT HEX_CONSTANT CHAR_CONSTANT FLOAT_CONSTANT STRING
 57 | 
 58 |  /* Logical and Relational operators */
 59 | %token LOGICAL_AND LOGICAL_OR LS_EQ GR_EQ EQ NOT_EQ
 60 | 
 61 |  /* Short hand assignment operators */
 62 | %token MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN
 63 | %token INCREMENT DECREMENT
 64 | 
 65 |  /* Data types */
 66 | %token SHORT INT LONG LONG_LONG SIGNED UNSIGNED CONST VOID CHAR FLOAT CHAR_STAR
 67 | 
 68 |  /* Keywords */
 69 | %token IF FOR WHILE CONTINUE BREAK RETURN
 70 | 
 71 | %type <entry> identifier
 72 | %type <entry> constant
 73 | %type <entry> array_index
 74 | 
 75 | %type <op> assign;
 76 | %type <data_type> function_call
 77 | 
 78 | %type <content> lhs
 79 | %type <content> sub_expr
 80 | %type <content> expression
 81 | %type <content> expression_stmt
 82 | %type <content> unary_expr
 83 | %type <content> arithmetic_expr
 84 | %type <content> assignment_expr
 85 | %type <content> array_access
 86 | 
 87 | %type <content> if_block
 88 | %type <content> for_block
 89 | %type <content> while_block
 90 | %type <content> compound_stmt
 91 | 
 92 | %type <content> statements
 93 | %type <content> single_stmt
 94 | %type <content> stmt
 95 | 
 96 | 
 97 | %type <instr> M
 98 | %type <content> N
 99 | 
100 | %left ','
101 | %right '='
102 | %left LOGICAL_OR
103 | %left LOGICAL_AND
104 | %left EQ NOT_EQ
105 | %left '<' '>' LS_EQ GR_EQ
106 | %left '+' '-'
107 | %left '*' '/' '%'
108 | %right '!'
109 | 
110 | 
111 | %nonassoc UMINUS
112 | %nonassoc LOWER_THAN_ELSE
113 | %nonassoc ELSE
114 | 
115 | 
116 | %%
117 | 
118 |  /* Program is made up of multiple builder blocks. */
119 | starter: starter builder
120 | 			 | builder;
121 | 
122 |  /* Each builder block is either a function or a declaration */
123 | builder: function
124 | 			 | declaration
125 | 			 ;
126 | 
127 |  /* This is how a function looks like */
128 | function: type identifier 	
129 | 			{
130 | 				func_type = current_dtype;
131 | 				is_declaration = 0;
132 | 				current_scope = create_new_scope();
133 | 				gencode($2->lexeme + string(":"));
134 | 			}
135 | 
136 | 		 '(' argument_list ')' 	
137 | 		 	{
138 | 				is_declaration = 0;
139 | 				fill_parameter_list($2,param_list,p_idx);
140 | 				p_idx = 0;
141 | 				is_func = 1;
142 | 				p=1;
143 | 			}
144 | 
145 | 		 compound_stmt	{   is_func = 0;	}
146 |           
147 | 		;
148 |  
149 |  /* Now we will define a grammar for how types can be specified */
150 | 
151 | type : data_type pointer    {is_declaration = 1; }
152 |      | data_type		    {is_declaration = 1; }
153 | 		 ;
154 | 
155 | pointer: '*' pointer
156 |     	| '*'
157 |        ;
158 | 
159 | data_type : sign_specifier type_specifier
160 |     	| type_specifier
161 |     	;
162 | 
163 | sign_specifier : SIGNED
164 |     		| UNSIGNED
165 |     		;
166 | 
167 | type_specifier :INT                    {current_dtype = INT;}
168 |     |SHORT INT                         {current_dtype = SHORT;}
169 |     |SHORT                             {current_dtype = SHORT;}
170 |     |LONG                              {current_dtype = LONG;}
171 | 	|LONG INT                          {current_dtype = LONG;}
172 |     |LONG_LONG                         {current_dtype = LONG_LONG;}
173 |     |LONG_LONG INT                     {current_dtype = LONG_LONG;}
174 | 	|CHAR 							   {current_dtype = CHAR;}
175 | 	|FLOAT 							   {current_dtype = FLOAT;}
176 | 	|VOID							   {current_dtype = VOID;}
177 | 	|CHAR_STAR					 	   {current_dtype = STRING;}
178 |     ;
179 | 
180 |  /* grammar rules for argument list */
181 |  /* argument list can be empty */
182 | argument_list : arguments
183 |     	|
184 |     	;
185 |  /* arguments are comma separated TYPE ID pairs */
186 | arguments : arguments ',' arg
187 |     	| arg
188 |     	;
189 | 
190 |  /* Each arg is a TYPE ID pair */
191 | arg : type identifier	{
192 | 							param_list[p_idx++] = $2->data_type;
193 | 							gencode(string("arg ") + $2->lexeme);
194 | 						}
195 |     ;
196 | 
197 |  /* Generic statement. Can be compound or a single statement */
198 | stmt:compound_stmt		{$$ = new content_t(); $$=$1;}
199 |     |single_stmt		{$$ = new content_t(); $$=$1;}
200 |     ;
201 | 
202 |  /* The function body is covered in braces and has multiple statements. */
203 | compound_stmt :
204 | 				'{' 	
205 | 				
206 | 				{
207 | 					if(!p)current_scope = create_new_scope();
208 | 					else p = 0;
209 | 				}
210 | 				
211 | 				statements
212 | 				
213 | 				'}' 
214 | 				
215 | 				{
216 | 					current_scope = exit_scope();
217 | 					$$ = new content_t();
218 | 					$$ = $3;
219 | 				}
220 |     ;
221 | 
222 | statements:statements M stmt	{
223 | 									backpatch($1->nextlist,$2);
224 | 									$$ = new content_t();
225 | 									$$->nextlist = $3->nextlist;
226 | 									$$->breaklist = merge($1->breaklist,$3->breaklist);
227 | 									$$->continuelist = merge($1->continuelist,$3->continuelist);
228 | 								}
229 | 
230 |     |							{	$$ = new content_t();	}
231 |     ;
232 | 
233 |  /* Grammar for what constitutes every individual statement */
234 | single_stmt :if_block	{
235 | 							$$ = new content_t();
236 | 							$$ = $1;
237 | 							backpatch($$->nextlist, nextinstr);
238 | 						}
239 | 
240 | 		    |for_block	{
241 | 							$$ = new content_t();
242 | 							$$ = $1;
243 | 							backpatch($$->nextlist, nextinstr);
244 | 						}
245 | 		
246 | 	    	|while_block {
247 | 							$$ = new content_t();
248 | 							$$ = $1;
249 | 							backpatch($$->nextlist, nextinstr);
250 | 						 }
251 | 	    	|declaration 		{$$ = new content_t();}
252 | 	    	|function_call ';'	{$$ = new content_t();}
253 | 			|RETURN ';'	  {
254 | 								if(is_func)
255 | 								{
256 | 									if(func_type != VOID)
257 | 										yyerror("return type (VOID) does not match function type");
258 | 								}
259 | 							  	else yyerror("return statement not inside function definition");
260 | 							}
261 | 	
262 | 			|CONTINUE ';'	{
263 | 								if(!is_loop)
264 | 									yyerror("Illegal use of continue");
265 | 								$$ = new content_t();
266 | 								$$->continuelist = {nextinstr};
267 | 								gencode("goto _");
268 | 							}
269 | 	
270 | 			|BREAK ';'      {
271 | 								if(!is_loop) {yyerror("Illegal use of break");}
272 | 								$$ = new content_t();
273 | 								$$->breaklist = {nextinstr};
274 | 								gencode("goto _");
275 | 						    }
276 | 	
277 | 			|RETURN sub_expr ';' 
278 | 							{
279 | 								if(is_func)
280 | 								{
281 | 									if(func_type != $2->data_type)
282 | 										yyerror("return type does not match function type");
283 | 								}
284 | 								else yyerror("return statement not in function definition");
285 | 							}
286 | 	    ;
287 | 
288 | for_block: FOR '(' expression_stmt M expression_stmt M expression ')' {is_loop = 1;} N M stmt {is_loop = 0;}
289 | 	         {
290 | 				backpatch($5->truelist,$11);
291 | 				backpatch($12->nextlist,$6);
292 | 				backpatch($12->continuelist, $6);
293 | 				backpatch($10->nextlist, $4);
294 | 				$$ = new content_t();
295 | 				$$->nextlist = merge($5->falselist,$12->breaklist);
296 | 				gencode(string("goto ") + to_string($6));
297 | 			 }
298 | 
299 |     		 ;
300 | 
301 | if_block:IF '(' expression ')' M stmt 	%prec LOWER_THAN_ELSE
302 | 	 		{
303 | 				backpatch($3->truelist,$5);
304 | 				$$ = new content_t();
305 | 				$$->nextlist = merge($3->falselist,$6->nextlist);
306 | 				$$->breaklist = $6->breaklist;
307 | 				$$->continuelist = $6->continuelist;
308 | 			}
309 | 
310 | 		|IF '(' expression ')' M stmt  ELSE N M stmt
311 | 			{
312 | 				backpatch($3->truelist,$5);
313 | 				backpatch($3->falselist,$9);
314 | 
315 | 				$$ = new content_t();
316 | 				vector<int> temp = merge($6->nextlist,$8->nextlist);
317 | 				$$->nextlist = merge(temp,$10->nextlist);
318 | 				$$->breaklist = merge($10->breaklist,$6->breaklist);
319 | 				$$->continuelist = merge($10->continuelist,$6->continuelist);
320 | 			}
321 |     ;
322 | 
323 | while_block: WHILE M '(' expression	')' M {is_loop = 1;} stmt {is_loop = 0;}
324 | 			{
325 | 				backpatch($8->nextlist,$2);
326 | 				backpatch($4->truelist,$6);
327 | 				backpatch($8->continuelist, $2);
328 | 				$$ = new content_t();
329 | 				$$->nextlist = merge($4->falselist,$8->breaklist);
330 | 				gencode(string("goto ") + to_string($2));
331 | 			}
332 | 		;
333 | 
334 | declaration: type  declaration_list ';'			{is_declaration = 0;}
335 | 			 | declaration_list ';'
336 | 			 | unary_expr ';'
337 | 
338 | 
339 | declaration_list: declaration_list ',' sub_decl
340 | 					|sub_decl
341 | 					;
342 | 
343 | sub_decl: assignment_expr
344 |     		|identifier
345 |     		|array_access
346 | 			;
347 | 
348 | /* This is because we can have empty expession statements inside for loops */
349 | expression_stmt: expression ';'	 
350 | 					{
351 | 						$$ = new content_t(); 
352 | 						$$->truelist = $1->truelist; 
353 | 						$$->falselist = $1->falselist;
354 | 					}
355 |     			
356 | 				| ';'	{	$$ = new content_t();	}
357 |     			;
358 | 
359 | expression: expression ',' sub_expr
360 | 				{
361 | 					$$ = new content_t();
362 | 					$$->truelist = $3->truelist; 
363 | 					$$->falselist = $3->falselist;
364 | 				}
365 |     		| sub_expr	
366 | 				{
367 | 					$$ = new content_t(); 
368 | 					$$->truelist = $1->truelist; 
369 | 					$$->falselist = $1->falselist;
370 | 				}
371 | 			;
372 | 
373 | sub_expr:
374 | 
375 | 		sub_expr '>' sub_expr	
376 | 			{
377 | 				type_check($1->data_type,$3->data_type,2);
378 | 				$$ = new content_t();
379 | 				gencode_rel($$, $1, $3, string(" > "));
380 | 			}
381 | 		| sub_expr '<' sub_expr
382 | 			{
383 | 				type_check($1->data_type,$3->data_type,2);
384 | 				$$ = new content_t();
385 | 				gencode_rel($$, $1, $3, string(" < "));
386 | 			}
387 | 
388 | 		| sub_expr EQ sub_expr
389 | 			{
390 | 				type_check($1->data_type,$3->data_type,2);
391 | 				$$ = new content_t();
392 | 				gencode_rel($$, $1, $3, string(" == "));
393 | 			}
394 | 
395 | 		| sub_expr NOT_EQ sub_expr
396 | 			{
397 | 				type_check($1->data_type,$3->data_type,2);
398 | 				$$ = new content_t();
399 | 				gencode_rel($$, $1, $3, string(" != "));
400 | 			}
401 | 
402 | 		| sub_expr GR_EQ sub_expr
403 | 			{
404 | 				type_check($1->data_type,$3->data_type,2);
405 | 				$$ = new content_t();
406 | 				gencode_rel($$, $1, $3, string(" >= "));
407 | 			}
408 | 
409 | 		| sub_expr LS_EQ sub_expr
410 | 			{
411 | 				type_check($1->data_type,$3->data_type,2);
412 | 				$$ = new content_t();
413 | 				gencode_rel($$, $1, $3, string(" <= "));
414 | 			}
415 | 
416 | 		|sub_expr LOGICAL_AND M sub_expr
417 | 			{
418 | 				type_check($1->data_type,$4->data_type,2);
419 | 				$$ = new content_t();
420 | 				$$->data_type = $1->data_type;
421 | 				backpatch($1->truelist,$3);
422 | 				$$->truelist = $4->truelist;
423 | 				$$->falselist = merge($1->falselist,$4->falselist);
424 | 			}
425 | 
426 | 		|sub_expr LOGICAL_OR M sub_expr
427 | 			{
428 | 				type_check($1->data_type,$4->data_type,2);
429 | 				$$ = new content_t();
430 | 				$$->data_type = $1->data_type;
431 | 				backpatch($1->falselist,$3);
432 | 				$$->truelist = merge($1->truelist,$4->truelist);
433 | 				$$->falselist = $4->falselist;
434 | 			}
435 | 
436 | 		|'!' sub_expr
437 | 			{
438 | 				$$ = new content_t();
439 | 				$$->data_type = $2->data_type;
440 | 				$$->truelist = $2->falselist;
441 | 				$$->falselist = $2->truelist;
442 | 			}
443 | 
444 | 		|arithmetic_expr
445 | 			{
446 | 				$$ = new content_t(); 
447 | 				$$->data_type = $1->data_type; 
448 | 				$$->addr = $1->addr;
449 | 			}
450 |     	|assignment_expr
451 | 			{
452 | 				$$ = new content_t(); 
453 | 				$$->data_type = $1->data_type;
454 | 			}
455 | 		|unary_expr	
456 | 			{
457 | 				$$ = new content_t(); 
458 | 				$$->data_type = $1->data_type;
459 | 			}
460 |     ;
461 | 
462 | assignment_expr :
463 | 	lhs assign arithmetic_expr	
464 | 			{
465 | 				type_check($1->entry->data_type,$3->data_type,1);
466 | 		 		$$ = new content_t();
467 | 				$$->data_type = $3->data_type;
468 | 		 		$$->code = $1->entry->lexeme + *$2 + $3->addr;
469 | 				gencode($$->code);
470 | 		 		rhs = 0;
471 | 			}
472 | 
473 |     |lhs assign array_access
474 | 			{
475 | 				type_check($1->entry->data_type,$3->data_type,1);
476 | 	 			$$ = new content_t();
477 | 				$$->data_type = $3->data_type;
478 | 	 			$$->code = $1->entry->lexeme + *$2 + $3->code;
479 | 				gencode($$->code);
480 | 	 			rhs = 0;
481 | 			}
482 | 
483 |     |lhs assign function_call
484 | 			{
485 | 				type_check($1->entry->data_type,$3,1); 
486 | 				$$ = new content_t(); 
487 | 				$$->data_type = $3;
488 | 			}
489 | 
490 | 	|lhs assign unary_expr  
491 | 	        {
492 | 				type_check($1->entry->data_type,$3->data_type,1);
493 | 			 	$$ = new content_t();
494 | 				$$->data_type = $3->data_type;
495 | 			 	$$->code = $1->entry->lexeme + *$2 + $3->code;
496 | 				gencode($$->code);
497 | 			 	rhs = 0;
498 | 			}
499 | 
500 | 	|unary_expr assign unary_expr		
501 | 			{
502 | 				type_check($1->data_type,$3->data_type,1);
503 | 				$$ = new content_t();
504 | 				$$->data_type = $3->data_type;
505 | 			 	$$->code = $1->code + *$2 + $3->code;
506 | 				gencode($$->code);
507 | 				rhs = 0;
508 | 			}
509 |     ;
510 | 
511 | unary_expr:	
512 | 	identifier INCREMENT	
513 | 			{
514 | 				$$ = new content_t();
515 | 				$$->data_type = $1->data_type;
516 | 				$$->code = string($1->lexeme) + string("++");
517 | 				gencode($$->code);
518 | 			}
519 | 
520 |  	| identifier DECREMENT		
521 | 	 		{
522 | 				$$ = new content_t();
523 | 				$$->data_type = $1->data_type;
524 | 				$$->code = string($1->lexeme) + string("--");
525 | 				gencode($$->code);
526 | 			}
527 | 
528 | 	| DECREMENT identifier	
529 | 			{
530 | 				$$ = new content_t();
531 | 				$$->data_type = $2->data_type;
532 | 				$$->code = string("--") + string($2->lexeme);
533 | 				gencode($$->code);
534 | 			}
535 | 
536 | 	| INCREMENT identifier
537 | 			{
538 | 				$$ = new content_t();
539 | 				$$->data_type = $2->data_type;
540 | 				$$->code = string("++") + string($2->lexeme);
541 | 				gencode($$->code);
542 | 			}
543 | 
544 | lhs: identifier		{$$ = new content_t(); $$->entry = $1;}
545 |    | array_access	{$$ = new content_t(); $$->code = $1->code;}
546 | 	 ;
547 | 
548 | identifier:IDENTIFIER
549 |                 {
550 |                     if(is_declaration && !rhs)
551 |                     {
552 |                       $1 = insert(SYMBOL_TABLE,yytext,INT_MAX,current_dtype);
553 |                       if($1 == NULL) 
554 | 					  	yyerror("Redeclaration of variable");
555 |                     }
556 |                     else
557 |                     {
558 |                       $1 = search_recursive(yytext);
559 |                       if($1 == NULL) 
560 | 					  	yyerror("Variable not declared");
561 |                     }
562 |                     
563 | 					$$ = $1;
564 |                 }
565 |     		 ;
566 | 
567 | assign:'=' 			{rhs=1; $$ = new string(" = ");}
568 |     |ADD_ASSIGN 	{rhs=1; $$ = new string(" += ");}
569 |     |SUB_ASSIGN 	{rhs=1; $$ = new string(" -= ");}
570 |     |MUL_ASSIGN 	{rhs=1; $$ = new string(" *= ");}
571 |     |DIV_ASSIGN 	{rhs=1;	$$ = new string(" /= ");}
572 |     |MOD_ASSIGN 	{rhs=1; $$ = new string(" %= ");}
573 |     ;
574 | 
575 | arithmetic_expr: arithmetic_expr '+' arithmetic_expr
576 | 					 {
577 | 						type_check($1->data_type,$3->data_type,0);
578 | 						$$ = new content_t();
579 | 						$$->data_type = $1->data_type;
580 | 						gencode_math($$, $1, $3, string(" + "));
581 | 					 }
582 | 
583 | 			| arithmetic_expr '-' arithmetic_expr
584 | 			  		 {
585 | 						type_check($1->data_type,$3->data_type,0);
586 | 						$$ = new content_t();
587 | 						$$->data_type = $1->data_type;
588 | 						gencode_math($$, $1, $3, string(" - "));
589 | 					 }
590 | 
591 | 			| arithmetic_expr '*' arithmetic_expr
592 | 					 {
593 | 						type_check($1->data_type,$3->data_type,0);
594 | 						$$ = new content_t();
595 | 		 				$$->data_type = $1->data_type;
596 | 						gencode_math($$, $1, $3, string(" * "));
597 | 					 }
598 | 
599 | 			| arithmetic_expr '/' arithmetic_expr
600 | 					 {
601 | 						type_check($1->data_type,$3->data_type,0);
602 | 						$$ = new content_t();
603 | 						$$->data_type = $1->data_type;
604 | 						gencode_math($$, $1, $3, string(" / "));
605 | 					 }
606 | 
607 | 		    | arithmetic_expr '%' arithmetic_expr
608 | 					 {
609 | 						type_check($1->data_type,$3->data_type,0);
610 | 						$$ = new content_t();
611 | 						$$->data_type = $1->data_type;
612 | 						gencode_math($$, $1, $3, string(" % "));
613 | 				 	 }
614 | 
615 | 			|'(' arithmetic_expr ')'
616 | 					 {
617 | 						$$ = new content_t();
618 | 						$$->data_type = $2->data_type;
619 | 						$$->addr = $2->addr;
620 | 						$$->code = $2->code;
621 | 					 }
622 | 
623 |     		|'-' arithmetic_expr %prec UMINUS	
624 | 					 {
625 | 						$$ = new content_t();
626 | 						$$->data_type = $2->data_type;
627 | 						$$->addr = "t" + to_string(temp_var_number);
628 | 						std::string expr = $$->addr + " = " + "minus " + $2->addr;
629 | 						$$->code = $2->code + expr;
630 | 						temp_var_number++;
631 | 				 	 }
632 | 
633 |     	    |identifier
634 | 					 {
635 | 						$$ = new content_t();
636 | 						$$->data_type = $1->data_type;
637 | 	 					$$->addr = $1->lexeme;
638 | 			   		 }
639 | 
640 |     		|constant
641 | 					 {
642 | 						$$ = new content_t();
643 | 						$$->data_type = $1->data_type;
644 | 						$$->addr = to_string($1->value);
645 | 					 }
646 |     		 ;
647 | 
648 | constant: DEC_CONSTANT 			{$1->is_constant=1; $$ = $1;}
649 |     	| HEX_CONSTANT			{$1->is_constant=1; $$ = $1;}
650 | 		| CHAR_CONSTANT			{$1->is_constant=1; $$ = $1;}
651 | 		| FLOAT_CONSTANT		{$1->is_constant=1; $$ = $1;}
652 |     ;
653 | 
654 | array_access: identifier '[' array_index ']'					
655 | 				{
656 | 					if(is_declaration)
657 | 					{
658 | 						if($3->value <= 0)
659 | 							yyerror("size of array is not positive");
660 | 						else if($3->is_constant)
661 | 							$1->array_dimension = $3->value;
662 | 					}
663 | 					else if($3->is_constant)
664 | 					{
665 | 						if($3->value > $1->array_dimension)
666 | 							yyerror("Array index out of bound");
667 | 						if($3->value < 0)
668 | 							yyerror("Array index cannot be negative");
669 | 					}
670 | 					
671 | 					$$ = new content_t();
672 | 					$$->data_type = $1->data_type;
673 | 					
674 | 					if($3->is_constant)
675 | 						$$->code = string($1->lexeme) + string("[") + to_string($3->value) + string("]");
676 | 					else
677 | 						$$->code = string($1->lexeme) + string("[") + string($3->lexeme) + string("]");
678 | 					$$->entry = $1;
679 | 				}
680 | 
681 | array_index: constant		{$$ = $1;}
682 | 		   | identifier		{$$ = $1;}
683 | 					 ;
684 | 
685 | function_call: identifier '(' parameter_list ')'
686 | 				{
687 | 					$$ = $1->data_type;
688 | 					check_parameter_list($1,param_list,p_idx);
689 | 					p_idx = 0;
690 | 					gencode(string("call ") + $1->lexeme);
691 | 				}
692 | 
693 |              | identifier '(' ')'	
694 | 			 	{
695 | 					$$ = $1->data_type;
696 | 				 	check_parameter_list($1,param_list,p_idx);
697 | 				 	p_idx = 0;
698 | 	 				gencode(string("call ") + $1->lexeme);
699 | 				}
700 |          ;
701 | 
702 | parameter_list:
703 |               parameter_list ','  parameter
704 |               |parameter
705 |               ;
706 | 
707 | parameter: sub_expr	
708 | 				{
709 | 					param_list[p_idx++] = $1->data_type;
710 | 					gencode(string("param ") + $1->addr);
711 | 				}
712 | 		 | STRING	
713 | 		 		{
714 | 					param_list[p_idx++] = STRING;
715 | 					gencode(string("param ") + $1->lexeme);
716 | 				}
717 | 		 ;
718 | 
719 | M: 			{$$ = nextinstr;}
720 |  ;
721 | 
722 | N:			{
723 | 				$$ = new content_t;
724 | 				$$->nextlist = {nextinstr};
725 | 				gencode("goto _");
726 | 			}
727 | 	;
728 | 
729 | %%
730 | 
731 | void gencode(string x)
732 | {
733 | 	std::string instruction;
734 | 
735 | 	instruction = to_string(nextinstr) + string(": ") + x;
736 | 	ICG.push_back(instruction);
737 | 	nextinstr++;
738 | }
739 | 
740 | 
741 | void gencode_rel(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op)
742 | {
743 | 	lhs->data_type = arg1->data_type;
744 | 
745 | 	lhs->truelist = {nextinstr};
746 | 	lhs->falselist = {nextinstr + 1};
747 | 
748 | 	std::string code;
749 | 
750 | 	code = string("if ") + arg1->addr + op + arg2->addr + string(" goto _");
751 | 	gencode(code);
752 | 
753 | 	code = string("goto _");
754 | 	gencode(code);
755 | }
756 | 
757 | void gencode_math(content_t* & lhs, content_t* arg1, content_t* arg2, const string& op)
758 | {
759 | 	lhs->addr = "t" + to_string(temp_var_number);
760 | 	std::string expr = lhs->addr + string(" = ") + arg1->addr + op + arg2->addr;
761 | 	lhs->code = arg1->code + arg2->code + expr;
762 | 
763 | 	temp_var_number++;
764 | 
765 | 	gencode(expr);
766 | }
767 | 
768 | void backpatch(vector<int>& v1, int number)
769 | {
770 | 	for(int i = 0; i<v1.size(); i++)
771 | 	{
772 | 		string instruction = ICG[v1[i]];
773 | 
774 | 		if(instruction.find("_") < instruction.size())
775 | 		{
776 | 			instruction.replace(instruction.find("_"),1,to_string(number));
777 | 			ICG[v1[i]] = instruction;
778 | 		}
779 | 	}
780 | }
781 | 
782 | vector<int> merge(vector<int>& v1, vector<int>& v2)
783 | {
784 | 	vector<int> concat;
785 | 	concat.reserve(v1.size() + v2.size());
786 | 	concat.insert(concat.end(), v1.begin(), v1.end());
787 | 	concat.insert(concat.end(), v2.begin(), v2.end());
788 | 
789 | 	return concat;
790 | }
791 | 
792 | void type_check(int left, int right, int flag)
793 | {
794 | 	if(left != right)
795 | 	{
796 | 		switch(flag)
797 | 		{
798 | 			case 0: yyerror("Type mismatch in arithmetic expression"); break;
799 | 			case 1: yyerror("Type mismatch in assignment expression"); break;
800 | 			case 2: yyerror("Type mismatch in logical expression"); break;
801 | 		}
802 | 	}
803 | }
804 | 
805 | void displayICG()
806 | {
807 | 	ofstream outfile("ICG.code");
808 | 
809 | 	for(int i=0; i<ICG.size();i++)
810 | 	outfile << ICG[i] <<endl;
811 | 
812 | 	outfile << nextinstr << ": exit";
813 | 
814 | 	outfile.close();
815 | }
816 | 
817 | void printlist(vector<int> v){
818 | 	for(auto it:v)
819 | 		cout<<it<<" ";
820 | 	cout<<"Next: "<<nextinstr<<endl;
821 | }
822 | 
823 | int main(int argc, char *argv[])
824 | {
825 | 	 int i;
826 | 	 for(i=0; i<NUM_TABLES;i++)
827 | 	 {
828 | 	  symbol_table_list[i].symbol_table = NULL;
829 | 	  symbol_table_list[i].parent = -1;
830 | 	 }
831 | 
832 | 	constant_table = create_table();
833 |   symbol_table_list[0].symbol_table = create_table();
834 | 	yyin = fopen(argv[1], "r");
835 | 
836 | 	if(!yyparse())
837 | 	{
838 | 		printf("\nPARSING COMPLETE\n\n\n");
839 | 	}
840 | 	else
841 | 	{
842 | 			printf("\nPARSING FAILED!\n\n\n");
843 | 	}
844 | 
845 | 	displayICG();
846 | /*
847 | 	printf("SYMBOL TABLES\n\n");
848 | 	display_all();
849 | 
850 | 	printf("CONSTANT TABLE");
851 | 	display_constant_table(constant_table);*/
852 | 
853 | }
854 | 
855 | int yyerror(const char *msg)
856 | {
857 | 	printf("Line no: %d Error message: %s Token: %s\n", yylineno, msg, yytext);
858 | 	exit(0);
859 | }
860 | 


--------------------------------------------------------------------------------
/Project-4/symboltable.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Compiler Design Project 4: Intermediate Code Generation
  3 | *
  4 | * File        : symboltable.h
  5 | *
  6 | * Authors     : Karthik M - 15CO22, Kaushik S Kalmady - 15CO222
  7 | * Date        : 4-4-2018
  8 | */
  9 | 
 10 | 
 11 | #include <stdint.h>
 12 | #include <stdlib.h>
 13 | #include <stdio.h>
 14 | #include <limits.h>
 15 | #include <string.h>
 16 | // #include <vector>
 17 | // #include <string>
 18 | 
 19 | int yyerror(const char *msg);
 20 | 
 21 | using namespace std;
 22 | 
 23 | #define HASH_TABLE_SIZE 100
 24 | #define NUM_TABLES 10
 25 | 
 26 | int table_index = 0;
 27 | int current_scope = 0;
 28 | 
 29 | /* struct to hold each entry */
 30 | struct entry_t
 31 | {
 32 | 	char* lexeme;
 33 | 	int value;
 34 | 	int data_type;
 35 | 	int* parameter_list; // for functions
 36 | 	int array_dimension;
 37 | 	int is_constant;
 38 | 	int num_params;
 39 | 	struct entry_t* successor;
 40 | };
 41 | 
 42 | //typedef struct entry_t entry_t;
 43 | 
 44 | /* Wrapper for symbol table with pointer to symbol table of parent scope */
 45 | struct table_t
 46 | {
 47 | 	entry_t** symbol_table;
 48 | 	int parent;
 49 | };
 50 | 
 51 | //typedef struct table_s table_t;
 52 | 
 53 | struct content_t
 54 | {
 55 | 	vector<int> truelist;
 56 | 	vector<int> falselist;
 57 | 	vector<int> nextlist;
 58 | 	vector<int> breaklist;
 59 | 	vector<int> continuelist;
 60 | 	string addr;
 61 | 	string code;
 62 | 
 63 | 	entry_t* entry;
 64 | 	int data_type;
 65 | };
 66 | //typedef struct content_s content_t;
 67 | 
 68 | extern table_t symbol_table_list[NUM_TABLES];
 69 | 
 70 | void display_symbol_table(entry_t** hash_table_ptr);
 71 | /* Create a new hash_table. */
 72 | entry_t** create_table()
 73 | {
 74 | 	entry_t** hash_table_ptr = NULL; // declare a pointer
 75 | 
 76 | 	/* Allocate memory for a hashtable array of size HASH_TABLE_SIZE */
 77 | 	if( ( hash_table_ptr = (entry_t**) malloc( sizeof( entry_t* ) * HASH_TABLE_SIZE ) ) == NULL )
 78 |     	return NULL;
 79 | 
 80 | 	int i;
 81 | 
 82 | 	// Intitialise all entries as NUscopeLL
 83 |     for( i = 0; i < HASH_TABLE_SIZE; i++ )
 84 | 	{
 85 | 		hash_table_ptr[i] = NULL;
 86 | 	}
 87 | 
 88 | 	return hash_table_ptr;
 89 | }
 90 | 
 91 | int create_new_scope()
 92 | {
 93 | 	table_index++;
 94 | 	// printf("Table index:%d\n",table_index);
 95 | 	symbol_table_list[table_index].symbol_table = create_table();
 96 | 	symbol_table_list[table_index].parent = current_scope;
 97 | 
 98 | 	return table_index;
 99 | }
100 | 
101 | int exit_scope()
102 | {
103 | 	return symbol_table_list[current_scope].parent;
104 | }
105 | /* Generate myhash from a string. Then generate an index in [0, HASH_TABLE_SIZE) */
106 | int myhash( char *lexeme )
107 | {
108 | 	size_t i;
109 | 	int hashvalue;
110 | 
111 | 	/* Apply jenkin's myhash function
112 | 	* https://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time
113 | 	*/
114 | 
115 | 	for ( hashvalue = i = 0; i < strlen(lexeme); ++i ) {
116 |         hashvalue += lexeme[i];
117 |         hashvalue += ( hashvalue << 10 );
118 |         hashvalue ^= ( hashvalue >> 6 );
119 |     }
120 | 	hashvalue += ( hashvalue << 3 );
121 | 	hashvalue ^= ( hashvalue >> 11 );
122 |     hashvalue += ( hashvalue << 15 );
123 | 	//cout<<(hashvalue% HASH_TABLE_SIZE + HASH_TABLE_SIZE) % HASH_TABLE_SIZE<<endl;
124 | 	return (hashvalue% HASH_TABLE_SIZE + HASH_TABLE_SIZE) % HASH_TABLE_SIZE; // return an index in [0, HASH_TABLE_SIZE)
125 | }
126 | 
127 | /* Create an entry for a lexeme, token pair. This will be called from the insert function */
128 | entry_t *create_entry( char *lexeme, int value, int data_type )
129 | {
130 | 	entry_t *new_entry;
131 | 
132 | 	/* Allocate space for new_entry */
133 | 	if( ( new_entry = (entry_t*) malloc( sizeof( entry_t ) ) ) == NULL ) {
134 | 		return NULL;
135 | 	}
136 | 	/* Copy lexeme to new_entry location using strdup (string-duplicate). Return NULL if it fails */
137 | 	if( ( new_entry->lexeme = strdup( lexeme ) ) == NULL ) {
138 | 		return NULL;
139 | 	}
140 | 
141 | 	new_entry->value = value;
142 | 	new_entry->successor = NULL;
143 | 	new_entry->parameter_list = NULL;
144 | 	new_entry->array_dimension = -1;
145 | 	new_entry->is_constant = 0;
146 | 	new_entry->num_params = 0;
147 | 	new_entry->data_type = data_type;
148 | 
149 | 	return new_entry;
150 | }
151 | 
152 | /* Search for an entry given a lexeme. Return a pointer to the entry of the lexeme exists, else return NULL */
153 | entry_t* search(entry_t** hash_table_ptr, char* lexeme)
154 | {
155 | 	int idx = 0;
156 | 	entry_t* myentry;
157 | 
158 |     // get the index of this lexeme as per the myhash function
159 | 	idx = myhash( lexeme );
160 | 
161 | 	/* Traverse the linked list at this idx and see if lexeme exists */
162 | 	myentry = hash_table_ptr[idx];
163 | 
164 | 	while( myentry != NULL && strcmp( lexeme, myentry->lexeme ) != 0 )
165 | 	{
166 | 		myentry = myentry->successor;
167 | 	}
168 | 
169 | 	if(myentry == NULL) // lexeme is not found
170 | 		return NULL;
171 | 
172 | 	else // lexeme found
173 | 		return myentry;
174 | 
175 | }
176 | 
177 | // Search recursively in every parent scope for lexeme
178 | entry_t* search_recursive(char* lexeme)
179 | {
180 | 	int idx = current_scope;
181 | 	entry_t* finder = NULL;
182 | 
183 | 	while(idx != -1)
184 | 	{
185 | 		finder = search(symbol_table_list[idx].symbol_table, lexeme);
186 | 
187 | 		if(finder != NULL)
188 | 			return finder;
189 | 
190 | 		idx = symbol_table_list[idx].parent;
191 | 	}
192 | 
193 | 	return finder;
194 | }
195 | /* Insert an entry into a myhash table. */
196 | entry_t* insert( entry_t** hash_table_ptr, char* lexeme, int value, int data_type)
197 | {
198 | 	// Make sure you pass the current scope symbol table here
199 | 	entry_t* finder = search( hash_table_ptr, lexeme );
200 | 	if( finder != NULL) // If lexeme already exists, don't insert, return NULL
201 | 	{
202 | 		if(finder->is_constant)
203 | 			return finder;
204 | 		return NULL; //capture this is callee code and do necessary error handling
205 | 	}
206 | 
207 | 	int idx;
208 | 	entry_t* new_entry = NULL;
209 | 	entry_t* head = NULL;
210 | 
211 | 	idx = myhash( lexeme ); // Get the index for this lexeme based on the myhash function
212 | 	new_entry = create_entry( lexeme, value, data_type ); // Create an entry using the <lexeme, token> pair
213 | 
214 | 	if(new_entry == NULL) // In case there was some error while executing create_entry()
215 | 	{
216 | 		printf("Insert failed. New entry could not be created.");
217 | 		exit(1);
218 | 	}
219 | 
220 | 	head = hash_table_ptr[idx];
221 | 	// printf("Index: %d\n",idx);// get the head entry at this index
222 | 
223 | 	if(head == NULL) // This is the first lexeme that matches this myhash index
224 | 	{
225 | 		hash_table_ptr[idx] = new_entry;
226 | 	}
227 | 	else // if not, add this entry to the head
228 | 	{
229 | 		new_entry->successor = hash_table_ptr[idx];
230 | 		hash_table_ptr[idx] = new_entry;
231 | 	}
232 | 
233 | 	// printf("in insert! Symbol table :%p, entry: %p, text: %s\n",hash_table_ptr, hash_table_ptr[idx], lexeme);
234 | 	// display_symbol_table(hash_table_ptr);
235 | 	return hash_table_ptr[idx];
236 | }
237 | 
238 | // This is called after a function call to check if param list match
239 | int check_parameter_list(entry_t* entry, int* list, int m)
240 | {
241 | 	int* parameter_list = entry->parameter_list;
242 | 
243 | 	if(m != entry->num_params)
244 | 	{
245 | 		yyerror("Number of parameters and arguments do not match");
246 | 	}
247 | 
248 | 	int i;
249 | 	for(i=0; i<m; i++)
250 | 	{
251 | 		if(list[i] != parameter_list[i])
252 | 		yyerror("Parameter and argument types do not match");
253 | 	}
254 | 
255 | 	return 1;
256 | }
257 | 
258 | void fill_parameter_list(entry_t* entry, int* list, int n)
259 | {
260 | 	entry->parameter_list = (int *)malloc(n*sizeof(int));
261 | 
262 | 	int i;
263 | 	for(i=0; i<n; i++)
264 | 	{
265 | 		entry->parameter_list[i] = list[i];
266 | 	}
267 | 	entry->num_params = n;
268 | }
269 | 
270 | 
271 | void print_dashes(int n)
272 | {
273 |   printf("\n");
274 | 
275 | 	int i;
276 | 	for(i=0; i< n; i++)
277 | 	printf("=");
278 | 	printf("\n");
279 | }
280 | 
281 | // Traverse the myhash table and print all the entries
282 | void display_symbol_table(entry_t** hash_table_ptr)
283 | {
284 | 	int i;
285 | 	entry_t* traverser;
286 | 
287 | 	print_dashes(100);
288 | 
289 |   printf(" %-20s %-20s %-20s %-20s %-20s\n","lexeme","data-type","array_dimension","num_params","param_list");
290 | 
291 | 	print_dashes(100);
292 | 
293 | 	for( i=0; i < HASH_TABLE_SIZE; i++)
294 | 	{
295 | 		// printf("In loop\n");
296 | 		traverser = hash_table_ptr[i];
297 | 		while( traverser != NULL)
298 | 		{
299 | 			printf(" %-20s %-20d %-20d ", traverser->lexeme, traverser->data_type, traverser->array_dimension);
300 | 
301 | 			printf(" %-20d", traverser->num_params);
302 | 
303 | 			int j;
304 | 			for(j=0; j < traverser->num_params; j++)
305 | 			printf(" %d",traverser->parameter_list[j]);
306 | 			printf("\n");
307 | 
308 | 			traverser = traverser->successor;
309 | 		}
310 | 	}
311 | 
312 | 	print_dashes(100);
313 | 
314 | }
315 | 
316 | void display_constant_table(entry_t** hash_table_ptr)
317 | {
318 | 	int i;
319 | 	entry_t* traverser;
320 | 
321 | 	print_dashes(25);
322 | 
323 | 	printf(" %-10s %-10s \n","lexeme","data-type");
324 | 
325 | 	print_dashes(25);
326 | 
327 | 	for( i=0; i < HASH_TABLE_SIZE; i++)
328 | 	{
329 | 		traverser = hash_table_ptr[i];
330 | 		while( traverser != NULL)
331 | 		{
332 | 			printf(" %-10s %-10d \n", traverser->lexeme, traverser->data_type);
333 | 			traverser = traverser->successor;
334 | 		}
335 | 	}
336 | 
337 | 	print_dashes(25);
338 | }
339 | 
340 | void display_all()
341 | {
342 | 		int i;
343 | 		for(i=0; i<=table_index; i++)
344 | 		{
345 | 			printf("Scope: %d\n",i);
346 | 			display_symbol_table(symbol_table_list[i].symbol_table);
347 | 			printf("\n\n");
348 | 		}
349 | 
350 | 		// display_symbol_table(symbol_table_list[0].symbol_table);
351 | 		// display_symbol_table(symbol_table_list[1].symbol_table);
352 | }
353 | 


--------------------------------------------------------------------------------
/Project-4/test-func.c:
--------------------------------------------------------------------------------
 1 | void fun2(char* s)
 2 | {
 3 | 
 4 | }
 5 | 
 6 | void fun(int a, int b, int c)
 7 | {
 8 |   fun2("What else can I print?");
 9 | }
10 | 
11 | int main()
12 | {
13 |     int c;
14 |     c = 5;
15 |     fun(c, 2 , 5);
16 | }
17 | 


--------------------------------------------------------------------------------
/Project-4/test.c:
--------------------------------------------------------------------------------
 1 | int main(int argc)
 2 | {
 3 |   int i;
 4 |   int j;
 5 | 
 6 |   for(i=0;i<10;i++)
 7 |   {
 8 |     char x = 'x';
 9 |     i = 2;
10 | 
11 |     while(j < 2 + 3)
12 |     {
13 |       int j;
14 |       i = 3;
15 |       j = 3 + i / 6 + 2 + 5;
16 | 
17 | 
18 |       int arr[10];
19 |       j = arr[7];
20 | 
21 |       if(j < 5 && i == 3 || j!= 5)
22 |       {
23 |         j = 2;
24 | 
25 |         if(2<4)
26 |         break;
27 | 
28 |         else if(2>3)
29 |         continue;
30 | 
31 |         else
32 |         i = 4 + 5;
33 |       }
34 | 
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mini-c-compiler
 2 | 
 3 | 
 4 | This is a mini compiler for a subset of the C language built as part of our Compiler Design Lab Course (CO351).
 5 | 
 6 | It has been built as a series of four incremental phases, each contributing a key part of the compiler.
 7 | 
 8 | | Phase     | Objective                                                                                    |
 9 | |-----------|----------------------------------------------------------------------------------------------|
10 | | Project-1 | Building a lexical analyser using Lex for a subset of C language to tokenise a given program |
11 | | Project-2 | Building a parser using Yacc by reading tokens from the lexical analyser from Project-1      |
12 | | Project-3 | Building a semantic analyser for the context free grammar implemented in Project-2           |
13 | | Project-4 | Building an Intermediate Code Generator                                                      |
14 | 
15 | 
16 | ### Team Members
17 |  - Karthik M (15CO221)
18 |  - Kaushik S Kalmady (15CO222)
19 | 


--------------------------------------------------------------------------------