├── .gitignore ├── Images ├── icg.png ├── tree.png ├── table.png ├── inorder.png └── semantic.png ├── archive ├── .DS_Store ├── parser_v8 │ ├── input2.c │ ├── input1.c │ ├── parser.l │ └── parser.y ├── parser_v4 │ ├── input1.c │ ├── parser.l │ └── parser.y ├── parser_v5 │ ├── input1.c │ ├── parser.l │ └── parser.y ├── for-loop │ ├── parser.l │ └── parser.y ├── parser_v1 │ ├── parser.l │ └── parser.y ├── parser_v3 │ ├── input1.c │ ├── parser.l │ └── parser.y ├── parser_v6 │ ├── input1.c │ ├── parser.l │ └── parser.y ├── parser_v2 │ ├── input1.c │ ├── parser.l │ └── parser.y ├── parser_v7 │ ├── input1.c │ ├── parser.l │ └── parser.y ├── README.md └── brainstorm.txt ├── input2.c ├── input3.c ├── Part 2 - Adding the Grammar Rules ├── input1.c ├── parser1.y └── lexer.l ├── Part 4 - Creating the Syntax Tree ├── input1.c ├── lexer.l └── parser3.y ├── Part 3 - Generating the Symbol Table ├── input1.c ├── lexer.l └── parser2.y ├── Part 5 - Adding the Semantic Analyzer ├── input1.c ├── lexer.l └── parser4.y ├── Part 6 - Intermediate Code Generation ├── input1.c ├── lexer.l └── parser5.y ├── input1.c ├── LICENSE ├── Part 1 - Creating the Lexical Analyzer └── lexer.l ├── lexer.l ├── README.md └── parser.y /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | y.tab.c 3 | y.output 4 | y.tab.h 5 | lex.yy.c 6 | a.out -------------------------------------------------------------------------------- /Images/icg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnjaneyaTripathi/c-compiler/HEAD/Images/icg.png -------------------------------------------------------------------------------- /Images/tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnjaneyaTripathi/c-compiler/HEAD/Images/tree.png -------------------------------------------------------------------------------- /Images/table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnjaneyaTripathi/c-compiler/HEAD/Images/table.png -------------------------------------------------------------------------------- /archive/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnjaneyaTripathi/c-compiler/HEAD/archive/.DS_Store -------------------------------------------------------------------------------- /Images/inorder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnjaneyaTripathi/c-compiler/HEAD/Images/inorder.png -------------------------------------------------------------------------------- /Images/semantic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnjaneyaTripathi/c-compiler/HEAD/Images/semantic.png -------------------------------------------------------------------------------- /archive/parser_v8/input2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | int a=3; 8 | a = x + 5; 9 | } -------------------------------------------------------------------------------- /input2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int i=1; 6 | float f = 2.5; 7 | char c = 'A'; 8 | int x = 3.5; 9 | i = x + f * c; 10 | return 3; 11 | } -------------------------------------------------------------------------------- /archive/parser_v4/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int a; 6 | int x=1; 7 | int y=2; 8 | int z=3; 9 | char f=97; 10 | x=3; 11 | y=10; 12 | z=5; 13 | for(int i=0; i<10; i++) { 14 | printf("Hello World"); 15 | scanf("%d", &x); 16 | for(int j=0; j 2 | #include 3 | 4 | int main() { 5 | int a; 6 | int x=1; 7 | int y=2; 8 | int z=3; 9 | char f=97; 10 | x=3; 11 | y=10; 12 | z=5; 13 | for(int i=0; i<10; i++) { 14 | printf("Hello World"); 15 | scanf("%d", &x); 16 | for(int j=0; j|!=|<=|>=|==|&&|"||"|[+-/*] 8 | unary "++"|"--" 9 | 10 | %% 11 | 12 | "for" { return FOR; } 13 | {binary} { return BINARY; } 14 | {unary} { return UNARY; } 15 | {num} { return NUMBER; } 16 | {id} { return ID; } 17 | [ \n\t] { ; } 18 | . {return *yytext; } 19 | 20 | %% 21 | 22 | int yywrap() { 23 | return 1; 24 | } -------------------------------------------------------------------------------- /archive/parser_v1/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | %} 4 | 5 | num [0-9]+ 6 | id [a-zA-Z]+ 7 | binary =|<|>|!=|<=|>=|==|&&|"||"|[+-/*] 8 | unary "++"|"--" 9 | 10 | %% 11 | 12 | "for" { return FOR; } 13 | {binary} { return BINARY; } 14 | {unary} { return UNARY; } 15 | {num} { return NUMBER; } 16 | {id} { return ID; } 17 | [ \n\t] { ; } 18 | . { return *yytext; } 19 | 20 | %% 21 | 22 | int yywrap() { 23 | return 1; 24 | } -------------------------------------------------------------------------------- /input3.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | int a=3; 8 | int x; 9 | a = x * 3 + 5; 10 | if(x>a) { 11 | printf("Hi!"); 12 | a = x * 3 + 100; 13 | if(x>a) { 14 | printf("Hi!"); 15 | a = x * 3 + 100; 16 | } 17 | else { 18 | x = a * 3 + 100; 19 | } 20 | } 21 | else { 22 | x = a * 3 + 100; 23 | } 24 | } -------------------------------------------------------------------------------- /archive/parser_v3/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x; 6 | int y; 7 | int z; 8 | float f; 9 | x=3; 10 | y=10; 11 | z=5; 12 | for(i=0; i<10; i++) { 13 | printf("Hello World"); 14 | for(j=0; jz) { 19 | char c; 20 | c='A'; 21 | } 22 | else { 23 | f=3.14; 24 | } 25 | return 0; 26 | } -------------------------------------------------------------------------------- /Part 2 - Adding the Grammar Rules/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | int a=3; 8 | int x; 9 | a = x * 3 + 5; 10 | if(x>a) { 11 | printf("Hi!"); 12 | a = x * 3 + 100; 13 | if(x>a) { 14 | printf("Hi!"); 15 | a = x * 3 + 100; 16 | } 17 | else { 18 | x = a * 3 + 100; 19 | } 20 | } 21 | else { 22 | x = a * 3 + 100; 23 | } 24 | } -------------------------------------------------------------------------------- /Part 4 - Creating the Syntax Tree/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | int a=3; 8 | int x; 9 | a = x * 3 + 5; 10 | if(x>a) { 11 | printf("Hi!"); 12 | a = x * 3 + 100; 13 | if(x>a) { 14 | printf("Hi!"); 15 | a = x * 3 + 100; 16 | } 17 | else { 18 | x = a * 3 + 100; 19 | } 20 | } 21 | else { 22 | x = a * 3 + 100; 23 | } 24 | } -------------------------------------------------------------------------------- /Part 3 - Generating the Symbol Table/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | int a=3; 8 | int x; 9 | a = x * 3 + 5; 10 | if(x>a) { 11 | printf("Hi!"); 12 | a = x * 3 + 100; 13 | if(x>a) { 14 | printf("Hi!"); 15 | a = x * 3 + 100; 16 | } 17 | else { 18 | x = a * 3 + 100; 19 | } 20 | } 21 | else { 22 | x = a * 3 + 98; 23 | } 24 | } -------------------------------------------------------------------------------- /Part 5 - Adding the Semantic Analyzer/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | char a=3; 8 | int x; 9 | a = x * 3 + 5; 10 | if(x>a) { 11 | printf("Hi!"); 12 | a = x * 3 + 100; 13 | if(x>a) { 14 | printf("Hi!"); 15 | a = x * 3 + 100; 16 | } 17 | else { 18 | x = a * 3 + 100; 19 | } 20 | } 21 | else { 22 | x = a * 3 + 100; 23 | } 24 | } -------------------------------------------------------------------------------- /Part 6 - Intermediate Code Generation/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int x=1; 6 | float f; 7 | int a=3; 8 | int x; 9 | a = x * 3 + 5; 10 | if(x>a) { 11 | printf("Hi!"); 12 | a = x * 3 + 100; 13 | if(x>a) { 14 | printf("Hi!"); 15 | a = x * 3 + 100; 16 | } 17 | else { 18 | x = a * 3 + 100; 19 | } 20 | } 21 | else { 22 | x = a * 3 + 100; 23 | } 24 | } -------------------------------------------------------------------------------- /archive/parser_v6/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int a; 6 | int x=1; 7 | int y=2; 8 | int z=3; 9 | char f=97; 10 | x=3; 11 | y=10; 12 | z=5; 13 | if (x>5) { 14 | printf("Hello World"); 15 | } else { 16 | int idx = 1; 17 | } 18 | for(int i=0; i<10; i++) { 19 | printf("Hello World"); 20 | scanf("%d", &x); 21 | for(int j=0; j 2 | #include 3 | 4 | int main(){ 5 | int x; 6 | x = y+3; 7 | if(x>y){ 8 | i=j; 9 | } 10 | else{ 11 | int i; 12 | i=m; 13 | } 14 | int y; 15 | y= m*4; 16 | printf("hey"); 17 | for(i=0;i<5;i++){ 18 | int a; 19 | a = b+c; 20 | for(j=0; j<10; j++){ 21 | x = y; 22 | } 23 | l = m/n; 24 | 25 | if(true){ 26 | i=j; 27 | } 28 | else{ 29 | i=m; 30 | } 31 | } 32 | int q; 33 | q = r*s; 34 | return 0; 35 | } -------------------------------------------------------------------------------- /input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int a; 6 | int x=1; 7 | int y=2; 8 | int z=3; 9 | x=3; 10 | y=10; 11 | z=5; 12 | if(x>5) { 13 | for(int k=0; k<10; k++) { 14 | y = x+3; 15 | printf("Hello!"); 16 | } 17 | } else { 18 | int idx = 1; 19 | } 20 | for(int i=0; i<10; i++) { 21 | printf("Hello World!"); 22 | scanf("%d", &x); 23 | if (x>5) { 24 | printf("Hi"); 25 | } 26 | for(int j=0; j 2 | #include 3 | 4 | int main() { 5 | int a; 6 | int x=1; 7 | int y=2; 8 | int z=3; 9 | char f=97; 10 | y=10; 11 | z=5; 12 | if (x>5) { 13 | for(int k=0; k<10; k++) { 14 | y = x+3; 15 | printf("Saddd"); 16 | } 17 | } else { 18 | int idx = 1; 19 | } 20 | for(int i=0; i<10; i++) { 21 | printf("Hello World"); 22 | scanf("%d", &x); 23 | if (x>5) { 24 | printf("Hi"); 25 | } 26 | for(int j=0; j 3 | void yyerror(const char *s); 4 | int yylex(); 5 | %} 6 | 7 | %token FOR ID NUMBER UNARY BINARY 8 | 9 | %% 10 | 11 | program: expressions 12 | | loops 13 | | program expressions 14 | | program loops 15 | ; 16 | 17 | loops: FOR '(' statement ';' statement ';' statement ')' '{' program '}' 18 | ; 19 | 20 | expressions: expressions statement ';' 21 | | statement ';' 22 | ; 23 | 24 | statement: ID BINARY statement 25 | | ID UNARY 26 | | UNARY ID 27 | | ID 28 | | NUMBER 29 | ; 30 | 31 | %% 32 | 33 | int main() { 34 | yyparse(); 35 | } 36 | 37 | void yyerror(const char* msg) { 38 | fprintf(stderr, "%s\n", msg); 39 | } 40 | -------------------------------------------------------------------------------- /archive/parser_v8/input1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int a; 6 | int x=1; 7 | int y=2; 8 | int z=3; 9 | int z; 10 | char f=97; 11 | x=3; 12 | y=10; 13 | z=5; 14 | if (x>5) { 15 | for(int k=0; k<10; k++) { 16 | y = x+3; 17 | printf("Saddd"); 18 | } 19 | } else { 20 | int idx = 1; 21 | } 22 | for(int i=0; i<10; i++) { 23 | printf("Hello World"); 24 | scanf("%d", &x); 25 | if (x>5) { 26 | printf("Hi"); 27 | } 28 | for(int j=0; j|!=|<=|>=|==|&&|"||"|[+-/*] 10 | unary "++"|"--" 11 | 12 | %% 13 | 14 | "for" { return FOR; } 15 | "if" { return IF; } 16 | "else" { return ELSE; } 17 | "true" { return TRUE; } 18 | "false" { return FALSE; } 19 | "printf" { return PRINTFF; } 20 | "scanf" { return SCANFF; } 21 | ^"#include"[ ]*<.+\.h> { return INCLUDE; } 22 | "return" { return RETURN; } 23 | {binary} { return BINARY; } 24 | {unary} { return UNARY; } 25 | {num} { return NUMBER; } 26 | {datatype} { return DATATYPE; } 27 | {id} { return ID; } 28 | [ \t]* { ; } 29 | [\n] { ; } 30 | . {return *yytext; } 31 | ["].*["] { return STRLT; } 32 | 33 | %% 34 | 35 | int yywrap() { 36 | return 1; 37 | } -------------------------------------------------------------------------------- /archive/for-loop/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | void yyerror(const char *s); 4 | int yylex(); 5 | %} 6 | 7 | %token FOR ID NUMBER UNARY BINARY 8 | 9 | %% 10 | 11 | program: program loop body { printf("Loops and more?"); } 12 | | loop body { printf("Loops!"); } 13 | ; 14 | 15 | loop: FOR '(' for_statements ')' { printf("For loop!"); } 16 | ; 17 | 18 | body: statement { printf("Nothin' much..."); } 19 | | '{' statements '}' { printf("Code Block!"); } 20 | | '{' loop '}' body { printf("Nested For?"); } 21 | ; 22 | 23 | for_statements: statement ';' statement ';' statement 24 | ; 25 | 26 | statements: statements statement ';' { printf("Lecture begins!"); } 27 | | statement ';' { printf("One liner!"); } 28 | ; 29 | 30 | statement: ID BINARY statement 31 | | ID UNARY 32 | | UNARY ID 33 | | ID 34 | | NUMBER 35 | ; 36 | 37 | %% 38 | 39 | int main() { 40 | yyparse(); 41 | } 42 | 43 | void yyerror(const char* msg) { 44 | fprintf(stderr, "%s\n", msg); 45 | } 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Anjaneya Tripathi, Isha Waghulde, Khushali Patel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /archive/parser_v3/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | /* Add negative numbers */ 8 | /* scanf does not work */ 9 | num [0-9]+ 10 | datatype "int"|"float"|"char"|"void" 11 | id [a-zA-Z]+ 12 | binary =|<|>|!=|<=|>=|==|&&|"||"|[+-/*] 13 | unary "++"|"--" 14 | 15 | %% 16 | 17 | "for" { return FOR; } 18 | "if" { return IF; } 19 | "else" { return ELSE; } 20 | "true" { return TRUE; } 21 | "false" { return FALSE; } 22 | "printf" { return PRINTFF; } 23 | "scanf" { return SCANFF; } 24 | ^"#include"[ ]*<.+\.h> { return INCLUDE; } 25 | "return" { return RETURN; } 26 | {binary} { return BINARY; } 27 | {unary} { return UNARY; } 28 | {num} { return NUMBER; } 29 | {datatype} { return DATATYPE; } 30 | {id} { return ID; } 31 | [ \t]* { ; } 32 | [\n] { countn++; } 33 | . {return *yytext; } 34 | ["].*["] { return STRLT; } 35 | 36 | %% 37 | 38 | int yywrap() { 39 | return 1; 40 | } -------------------------------------------------------------------------------- /archive/parser_v2/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | 4 | void yyerror(const char *s); 5 | int yylex(); 6 | int yywrap(); 7 | 8 | %} 9 | 10 | %token INCLUDE FOR IF ELSE ID NUMBER UNARY BINARY DATATYPE TRUE FALSE RETURN PRINTFF SCANFF STRLT 11 | 12 | %% 13 | 14 | program: headers main '(' ')' '{' body return '}' 15 | ; 16 | 17 | headers: headers INCLUDE 18 | | INCLUDE 19 | ; 20 | 21 | main: DATATYPE ID 22 | ; 23 | 24 | body: expressions 25 | | loops 26 | | conditionals 27 | | body expressions 28 | | body loops 29 | | body conditionals 30 | ; 31 | 32 | loops: FOR '(' statement ';' statement ';' statement ')' '{' body '}' 33 | ; 34 | 35 | expressions: expressions statement ';' 36 | | statement ';' 37 | ; 38 | 39 | conditionals: IF '(' condition ')' '{' expressions '}' 40 | | IF '(' condition ')' '{' expressions '}' ELSE '{' expressions '}' 41 | ; 42 | 43 | statement: ID BINARY statement 44 | | ID UNARY 45 | | UNARY ID 46 | | ID 47 | | NUMBER 48 | | DATATYPE variable 49 | | PRINTFF '(' STRLT ')' 50 | | SCANFF '(' STRLT ',' '&' ID ')' 51 | ; 52 | 53 | variable: ID array 54 | | '*' variable 55 | ; 56 | 57 | array: '[' NUMBER ']' array 58 | | '[' ID ']' array 59 | | '[' ']' array 60 | | 61 | ; 62 | 63 | condition: condition BINARY condition 64 | | boolean 65 | ; 66 | 67 | boolean: ID BINARY ID 68 | | ID BINARY NUMBER 69 | | NUMBER BINARY ID 70 | | TRUE 71 | | FALSE 72 | ; 73 | 74 | return: RETURN NUMBER ';' 75 | | RETURN ID ';' 76 | | 77 | ; 78 | 79 | %% 80 | 81 | int main() { 82 | yyparse(); 83 | } 84 | 85 | void yyerror(const char* msg) { 86 | fprintf(stderr, "%s\n", msg); 87 | } 88 | -------------------------------------------------------------------------------- /Part 2 - Adding the Grammar Rules/parser1.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | 8 | void yyerror(const char *s); 9 | int yylex(); 10 | int yywrap(); 11 | %} 12 | 13 | %token VOID CHARACTER PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 14 | 15 | %% 16 | 17 | program: headers main '(' ')' '{' body return '}' 18 | ; 19 | 20 | headers: headers headers 21 | | INCLUDE 22 | ; 23 | 24 | main: datatype ID 25 | ; 26 | 27 | datatype: INT 28 | | FLOAT 29 | | CHAR 30 | | VOID 31 | ; 32 | 33 | body: FOR '(' statement ';' condition ';' statement ')' '{' body '}' 34 | | IF '(' condition ')' '{' body '}' else 35 | | statement ';' 36 | | body body 37 | | PRINTFF '(' STR ')' ';' 38 | | SCANFF '(' STR ',' '&' ID ')' ';' 39 | ; 40 | 41 | else: ELSE '{' body '}' 42 | | 43 | ; 44 | 45 | condition: value relop value 46 | | TRUE 47 | | FALSE 48 | ; 49 | 50 | statement: datatype ID init 51 | | ID '=' expression 52 | | ID relop expression 53 | | ID UNARY 54 | | UNARY ID 55 | ; 56 | 57 | init: '=' value 58 | | 59 | ; 60 | 61 | expression: expression arithmetic expression 62 | | value 63 | ; 64 | 65 | arithmetic: ADD 66 | | SUBTRACT 67 | | MULTIPLY 68 | | DIVIDE 69 | ; 70 | 71 | relop: LT 72 | | GT 73 | | LE 74 | | GE 75 | | EQ 76 | | NE 77 | ; 78 | 79 | value: NUMBER 80 | | FLOAT_NUM 81 | | CHARACTER 82 | | ID 83 | ; 84 | 85 | return: RETURN value ';' 86 | | 87 | ; 88 | 89 | %% 90 | 91 | int main() { 92 | yyparse(); 93 | } 94 | 95 | void yyerror(const char* msg) { 96 | fprintf(stderr, "%s\n", msg); 97 | } -------------------------------------------------------------------------------- /archive/parser_v4/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { return PRINTFF; } 14 | "scanf" { return SCANFF; } 15 | "int" { return INT; } 16 | "float" { return FLOAT; } 17 | "char" { return CHAR; } 18 | "void" { return VOID; } 19 | "return" { return RETURN; } 20 | "for" { return FOR; } 21 | ^"#include"[ ]*<.+\.h> { return INCLUDE; } 22 | "true" { return TRUE; } 23 | "false" { return FALSE; } 24 | {digit}+ { return NUMBER; } 25 | {alpha}({alpha}|{digit})* { return ID; } 26 | {unary} { return UNARY; } 27 | "<=" { return LE; } 28 | ">=" { return GE; } 29 | "==" { return EQ; } 30 | "!=" { return NE; } 31 | ">" { return GT; } 32 | "<" { return LT; } 33 | "&&" { return AND; } 34 | "||" { return OR; } 35 | "+" { return ADD; } 36 | "-" { return SUBTRACT; } 37 | "/" { return DIVIDE; } 38 | "*" { return MULTIPLY; } 39 | \/\/.* { ; } 40 | \/\*(.*\n)*.*\*\/ { ; } 41 | [ \t]* { ; } 42 | [\n] { countn++; } 43 | . { return *yytext; } 44 | ["].*["] { return STR; } 45 | 46 | %% 47 | 48 | int yywrap() { 49 | return 1; 50 | } -------------------------------------------------------------------------------- /Part 2 - Adding the Grammar Rules/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | 6 | %option yylineno 7 | 8 | alpha [a-zA-Z] 9 | digit [0-9] 10 | unary "++"|"--" 11 | 12 | %% 13 | 14 | "printf" { return PRINTFF; } 15 | "scanf" { return SCANFF; } 16 | "int" { return INT; } 17 | "float" { return FLOAT; } 18 | "char" { return CHAR; } 19 | "void" { return VOID; } 20 | "return" { return RETURN; } 21 | "for" { return FOR; } 22 | "if" { return IF; } 23 | "else" { return ELSE; } 24 | ^"#include"[ ]*<.+\.h> { return INCLUDE; } 25 | "true" { return TRUE; } 26 | "false" { return FALSE; } 27 | [-]?{digit}+ { return NUMBER; } 28 | [-]?{digit}+\.{digit}{1,6} { return FLOAT_NUM; } 29 | {alpha}({alpha}|{digit})* { return ID; } 30 | {unary} { return UNARY; } 31 | "<=" { return LE; } 32 | ">=" { return GE; } 33 | "==" { return EQ; } 34 | "!=" { return NE; } 35 | ">" { return GT; } 36 | "<" { return LT; } 37 | "&&" { return AND; } 38 | "||" { return OR; } 39 | "+" { return ADD; } 40 | "-" { return SUBTRACT; } 41 | "/" { return DIVIDE; } 42 | "*" { return MULTIPLY; } 43 | \/\/.* { ; } 44 | \/\*(.*\n)*.*\*\/ { ; } 45 | [ \t]* { ; } 46 | [\n] { countn++; } 47 | . { return *yytext; } 48 | ["].*["] { return STR; } 49 | ['].['] { return CHARACTER; } 50 | 51 | %% 52 | 53 | int yywrap() { 54 | return 1; 55 | } -------------------------------------------------------------------------------- /Part 3 - Generating the Symbol Table/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | 6 | %option yylineno 7 | 8 | alpha [a-zA-Z] 9 | digit [0-9] 10 | unary "++"|"--" 11 | 12 | %% 13 | 14 | "printf" { return PRINTFF; } 15 | "scanf" { return SCANFF; } 16 | "int" { return INT; } 17 | "float" { return FLOAT; } 18 | "char" { return CHAR; } 19 | "void" { return VOID; } 20 | "return" { return RETURN; } 21 | "for" { return FOR; } 22 | "if" { return IF; } 23 | "else" { return ELSE; } 24 | ^"#include"[ ]*<.+\.h> { return INCLUDE; } 25 | "true" { return TRUE; } 26 | "false" { return FALSE; } 27 | [-]?{digit}+ { return NUMBER; } 28 | [-]?{digit}+\.{digit}{1,6} { return FLOAT_NUM; } 29 | {alpha}({alpha}|{digit})* { return ID; } 30 | {unary} { return UNARY; } 31 | "<=" { return LE; } 32 | ">=" { return GE; } 33 | "==" { return EQ; } 34 | "!=" { return NE; } 35 | ">" { return GT; } 36 | "<" { return LT; } 37 | "&&" { return AND; } 38 | "||" { return OR; } 39 | "+" { return ADD; } 40 | "-" { return SUBTRACT; } 41 | "/" { return DIVIDE; } 42 | "*" { return MULTIPLY; } 43 | \/\/.* { ; } 44 | \/\*(.*\n)*.*\*\/ { ; } 45 | [ \t]* { ; } 46 | [\n] { countn++; } 47 | . { return *yytext; } 48 | ["].*["] { return STR; } 49 | ['].['] { return CHARACTER; } 50 | 51 | %% 52 | 53 | int yywrap() { 54 | return 1; 55 | } -------------------------------------------------------------------------------- /Part 1 - Creating the Lexical Analyzer/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | 6 | %option yylineno 7 | 8 | alpha [a-zA-Z] 9 | digit [0-9] 10 | unary "++"|"--" 11 | 12 | %% 13 | 14 | "printf" { return PRINTFF; } 15 | "scanf" { return SCANFF; } 16 | "int" { return INT; } 17 | "float" { return FLOAT; } 18 | "char" { return CHAR; } 19 | "void" { return VOID; } 20 | "return" { return RETURN; } 21 | "for" { return FOR; } 22 | "if" { return IF; } 23 | "else" { return ELSE; } 24 | ^"#include"[ ]*<.+\.h> { return INCLUDE; } 25 | "true" { return TRUE; } 26 | "false" { return FALSE; } 27 | [-]?{digit}+ { return NUMBER; } 28 | [-]?{digit}+\.{digit}{1,6} { return FLOAT_NUM; } 29 | {alpha}({alpha}|{digit})* { return ID; } 30 | {unary} { return UNARY; } 31 | "<=" { return LE; } 32 | ">=" { return GE; } 33 | "==" { return EQ; } 34 | "!=" { return NE; } 35 | ">" { return GT; } 36 | "<" { return LT; } 37 | "&&" { return AND; } 38 | "||" { return OR; } 39 | "+" { return ADD; } 40 | "-" { return SUBTRACT; } 41 | "/" { return DIVIDE; } 42 | "*" { return MULTIPLY; } 43 | \/\/.* { ; } 44 | \/\*(.*\n)*.*\*\/ { ; } 45 | [ \t]* { ; } 46 | [\n] { countn++; } 47 | . { return *yytext; } 48 | ["].*["] { return STR; } 49 | ['].['] { return CHARACTER; } 50 | 51 | %% 52 | 53 | int yywrap() { 54 | return 1; 55 | } -------------------------------------------------------------------------------- /archive/README.md: -------------------------------------------------------------------------------- 1 | ## Development of the Compiler 2 | 3 | The final compiler was a culmination of various iterations. To see the development stages, take a look at the archive folder which has each iteration. The details of each stage are given below. 4 | 5 | | Parser | Description | 6 | | ----------------- | ------------------------------------------------------------------------------------------------------------ | 7 | | parser_v1 | This parser contains the lex file with all regular expressions required and a yacc file that defines the Context Free Grammar for a for loop. It accepts a code chunk with a single for loop and simple binary arithmetic and unary expressions in the body. | 8 | | parser_v2 | This parser improves the CFG defined in the previous parser. It accepts a complete C program, including headers, main function, body, and return. The body can contain nested for loops with if-else blocks, printf and scanf statements, and simple expressions. The expressions accepted are basic binary arithmetic operations and unary operations. | 9 | | parser_v3 | This parser is the first attempt at performing lexical analysis on the program accepted by parser_v2. It was observed that handling both if-else and nested for loops together was difficult, so a depth-first approach was followed for the next parsers by first tackling nested for loops and then moving on to if-else blocks. | 10 | | parser_v4 | This parser performs lexical analysis and generates a symbol table for a C program with nested for loops. | 11 | | parser_v5 | This parser performs syntax analysis of the input program with nested for loops. The output of this parser is the syntax tree of the input program. | 12 | | parser_v6 | This parser adds if-else blocks to the syntax analysis. The output of this parser is the syntax tree of the input program, which may now have nested for and if-else blocks. | 13 | | parser_v7 | The semantic analysis phase starts with this version. This parser checks if variables have been declared before use or if there are duplicate declarations. In case of an error, an informative error message is printed along with the line number. | 14 | | parser_v8 | This version completes the semantic analysis phase. It implements type conversion during arithmetic operations and variable initializations.| 15 | -------------------------------------------------------------------------------- /archive/parser_v5/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nam.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nam.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nam.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nam.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nam.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nam.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nam.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nam.name,(yytext)); return FOR; } 21 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nam.name,(yytext)); return INCLUDE; } 22 | "true" { strcpy(yylval.nam.name,(yytext)); return TRUE; } 23 | "false" { strcpy(yylval.nam.name,(yytext)); return FALSE; } 24 | {digit}+ { strcpy(yylval.nam.name,(yytext)); return NUMBER; } 25 | {alpha}({alpha}|{digit})* { strcpy(yylval.nam.name,(yytext)); return ID; } 26 | {unary} { strcpy(yylval.nam.name,(yytext)); return UNARY; } 27 | "<=" { strcpy(yylval.nam.name,(yytext)); return LE; } 28 | ">=" { strcpy(yylval.nam.name,(yytext)); return GE; } 29 | "==" { strcpy(yylval.nam.name,(yytext)); return EQ; } 30 | "!=" { strcpy(yylval.nam.name,(yytext)); return NE; } 31 | ">" { strcpy(yylval.nam.name,(yytext)); return GT; } 32 | "<" { strcpy(yylval.nam.name,(yytext)); return LT; } 33 | "&&" { strcpy(yylval.nam.name,(yytext)); return AND; } 34 | "||" { strcpy(yylval.nam.name,(yytext)); return OR; } 35 | "+" { strcpy(yylval.nam.name,(yytext)); return ADD; } 36 | "-" { strcpy(yylval.nam.name,(yytext)); return SUBTRACT; } 37 | "/" { strcpy(yylval.nam.name,(yytext)); return DIVIDE; } 38 | "*" { strcpy(yylval.nam.name,(yytext)); return MULTIPLY; } 39 | \/\/.* { ; } 40 | \/\*(.*\n)*.*\*\/ { ; } 41 | [ \t]* { ; } 42 | [\n] { countn++; } 43 | . { return *yytext; } 44 | ["].*["] { strcpy(yylval.nam.name,(yytext)); return STR; } 45 | 46 | %% 47 | 48 | int yywrap() { 49 | return 1; 50 | } -------------------------------------------------------------------------------- /archive/parser_v6/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nam.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nam.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nam.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nam.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nam.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nam.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nam.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nam.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nam.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nam.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nam.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nam.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nam.name,(yytext)); return FALSE; } 26 | {digit}+ { strcpy(yylval.nam.name,(yytext)); return NUMBER; } 27 | {alpha}({alpha}|{digit})* { strcpy(yylval.nam.name,(yytext)); return ID; } 28 | {unary} { strcpy(yylval.nam.name,(yytext)); return UNARY; } 29 | "<=" { strcpy(yylval.nam.name,(yytext)); return LE; } 30 | ">=" { strcpy(yylval.nam.name,(yytext)); return GE; } 31 | "==" { strcpy(yylval.nam.name,(yytext)); return EQ; } 32 | "!=" { strcpy(yylval.nam.name,(yytext)); return NE; } 33 | ">" { strcpy(yylval.nam.name,(yytext)); return GT; } 34 | "<" { strcpy(yylval.nam.name,(yytext)); return LT; } 35 | "&&" { strcpy(yylval.nam.name,(yytext)); return AND; } 36 | "||" { strcpy(yylval.nam.name,(yytext)); return OR; } 37 | "+" { strcpy(yylval.nam.name,(yytext)); return ADD; } 38 | "-" { strcpy(yylval.nam.name,(yytext)); return SUBTRACT; } 39 | "/" { strcpy(yylval.nam.name,(yytext)); return DIVIDE; } 40 | "*" { strcpy(yylval.nam.name,(yytext)); return MULTIPLY; } 41 | \/\/.* { ; } 42 | \/\*(.*\n)*.*\*\/ { ; } 43 | [ \t]* { ; } 44 | [\n] { countn++; } 45 | . { return *yytext; } 46 | ["].*["] { strcpy(yylval.nam.name,(yytext)); return STR; } 47 | 48 | %% 49 | 50 | int yywrap() { 51 | return 1; 52 | } -------------------------------------------------------------------------------- /archive/parser_v7/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nam.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nam.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nam.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nam.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nam.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nam.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nam.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nam.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nam.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nam.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nam.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nam.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nam.name,(yytext)); return FALSE; } 26 | [-]?{digit}+ { strcpy(yylval.nam.name,(yytext)); return NUMBER; } 27 | {alpha}({alpha}|{digit})* { strcpy(yylval.nam.name,(yytext)); return ID; } 28 | {unary} { strcpy(yylval.nam.name,(yytext)); return UNARY; } 29 | "<=" { strcpy(yylval.nam.name,(yytext)); return LE; } 30 | ">=" { strcpy(yylval.nam.name,(yytext)); return GE; } 31 | "==" { strcpy(yylval.nam.name,(yytext)); return EQ; } 32 | "!=" { strcpy(yylval.nam.name,(yytext)); return NE; } 33 | ">" { strcpy(yylval.nam.name,(yytext)); return GT; } 34 | "<" { strcpy(yylval.nam.name,(yytext)); return LT; } 35 | "&&" { strcpy(yylval.nam.name,(yytext)); return AND; } 36 | "||" { strcpy(yylval.nam.name,(yytext)); return OR; } 37 | "+" { strcpy(yylval.nam.name,(yytext)); return ADD; } 38 | "-" { strcpy(yylval.nam.name,(yytext)); return SUBTRACT; } 39 | "/" { strcpy(yylval.nam.name,(yytext)); return DIVIDE; } 40 | "*" { strcpy(yylval.nam.name,(yytext)); return MULTIPLY; } 41 | \/\/.* { ; } 42 | \/\*(.*\n)*.*\*\/ { ; } 43 | [ \t]* { ; } 44 | [\n] { countn++; } 45 | . { return *yytext; } 46 | ["].*["] { strcpy(yylval.nam.name,(yytext)); return STR; } 47 | 48 | %% 49 | 50 | int yywrap() { 51 | return 1; 52 | } -------------------------------------------------------------------------------- /archive/parser_v8/parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nam.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nam.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nam.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nam.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nam.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nam.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nam.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nam.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nam.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nam.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nam.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nam.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nam.name,(yytext)); return FALSE; } 26 | [-]?{digit}+ { strcpy(yylval.nam.name,(yytext)); return NUMBER; } 27 | [-]?{digit}+\.{digit}{1,6} { strcpy(yylval.nam.name,(yytext)); return FLOAT_NUM; } 28 | {alpha}({alpha}|{digit})* { strcpy(yylval.nam.name,(yytext)); return ID; } 29 | {unary} { strcpy(yylval.nam.name,(yytext)); return UNARY; } 30 | "<=" { strcpy(yylval.nam.name,(yytext)); return LE; } 31 | ">=" { strcpy(yylval.nam.name,(yytext)); return GE; } 32 | "==" { strcpy(yylval.nam.name,(yytext)); return EQ; } 33 | "!=" { strcpy(yylval.nam.name,(yytext)); return NE; } 34 | ">" { strcpy(yylval.nam.name,(yytext)); return GT; } 35 | "<" { strcpy(yylval.nam.name,(yytext)); return LT; } 36 | "&&" { strcpy(yylval.nam.name,(yytext)); return AND; } 37 | "||" { strcpy(yylval.nam.name,(yytext)); return OR; } 38 | "+" { strcpy(yylval.nam.name,(yytext)); return ADD; } 39 | "-" { strcpy(yylval.nam.name,(yytext)); return SUBTRACT; } 40 | "/" { strcpy(yylval.nam.name,(yytext)); return DIVIDE; } 41 | "*" { strcpy(yylval.nam.name,(yytext)); return MULTIPLY; } 42 | \/\/.* { ; } 43 | \/\*(.*\n)*.*\*\/ { ; } 44 | [ \t]* { ; } 45 | [\n] { countn++; } 46 | . { return *yytext; } 47 | ["].*["] { strcpy(yylval.nam.name,(yytext)); return STR; } 48 | 49 | %% 50 | 51 | int yywrap() { 52 | return 1; 53 | } -------------------------------------------------------------------------------- /lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nd_obj.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nd_obj.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nd_obj.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nd_obj.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nd_obj.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nd_obj.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nd_obj.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nd_obj.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nd_obj.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nd_obj.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nd_obj.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nd_obj.name,(yytext)); return FALSE; } 26 | [-]?{digit}+ { strcpy(yylval.nd_obj.name,(yytext)); return NUMBER; } 27 | [-]?{digit}+\.{digit}{1,6} { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT_NUM; } 28 | {alpha}({alpha}|{digit})* { strcpy(yylval.nd_obj.name,(yytext)); return ID; } 29 | {unary} { strcpy(yylval.nd_obj.name,(yytext)); return UNARY; } 30 | "<=" { strcpy(yylval.nd_obj.name,(yytext)); return LE; } 31 | ">=" { strcpy(yylval.nd_obj.name,(yytext)); return GE; } 32 | "==" { strcpy(yylval.nd_obj.name,(yytext)); return EQ; } 33 | "!=" { strcpy(yylval.nd_obj.name,(yytext)); return NE; } 34 | ">" { strcpy(yylval.nd_obj.name,(yytext)); return GT; } 35 | "<" { strcpy(yylval.nd_obj.name,(yytext)); return LT; } 36 | "&&" { strcpy(yylval.nd_obj.name,(yytext)); return AND; } 37 | "||" { strcpy(yylval.nd_obj.name,(yytext)); return OR; } 38 | "+" { strcpy(yylval.nd_obj.name,(yytext)); return ADD; } 39 | "-" { strcpy(yylval.nd_obj.name,(yytext)); return SUBTRACT; } 40 | "/" { strcpy(yylval.nd_obj.name,(yytext)); return DIVIDE; } 41 | "*" { strcpy(yylval.nd_obj.name,(yytext)); return MULTIPLY; } 42 | \/\/.* { ; } 43 | \/\*(.*\n)*.*\*\/ { ; } 44 | [ \t]* { ; } 45 | [\n] { countn++; } 46 | . { return *yytext; } 47 | ["].*["] { strcpy(yylval.nd_obj.name,(yytext)); return STR; } 48 | ['].['] { strcpy(yylval.nd_obj.name,(yytext)); return CHARACTER; } 49 | 50 | %% 51 | 52 | int yywrap() { 53 | return 1; 54 | } 55 | -------------------------------------------------------------------------------- /Part 4 - Creating the Syntax Tree/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nd_obj.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nd_obj.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nd_obj.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nd_obj.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nd_obj.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nd_obj.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nd_obj.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nd_obj.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nd_obj.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nd_obj.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nd_obj.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nd_obj.name,(yytext)); return FALSE; } 26 | [-]?{digit}+ { strcpy(yylval.nd_obj.name,(yytext)); return NUMBER; } 27 | [-]?{digit}+\.{digit}{1,6} { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT_NUM; } 28 | {alpha}({alpha}|{digit})* { strcpy(yylval.nd_obj.name,(yytext)); return ID; } 29 | {unary} { strcpy(yylval.nd_obj.name,(yytext)); return UNARY; } 30 | "<=" { strcpy(yylval.nd_obj.name,(yytext)); return LE; } 31 | ">=" { strcpy(yylval.nd_obj.name,(yytext)); return GE; } 32 | "==" { strcpy(yylval.nd_obj.name,(yytext)); return EQ; } 33 | "!=" { strcpy(yylval.nd_obj.name,(yytext)); return NE; } 34 | ">" { strcpy(yylval.nd_obj.name,(yytext)); return GT; } 35 | "<" { strcpy(yylval.nd_obj.name,(yytext)); return LT; } 36 | "&&" { strcpy(yylval.nd_obj.name,(yytext)); return AND; } 37 | "||" { strcpy(yylval.nd_obj.name,(yytext)); return OR; } 38 | "+" { strcpy(yylval.nd_obj.name,(yytext)); return ADD; } 39 | "-" { strcpy(yylval.nd_obj.name,(yytext)); return SUBTRACT; } 40 | "/" { strcpy(yylval.nd_obj.name,(yytext)); return DIVIDE; } 41 | "*" { strcpy(yylval.nd_obj.name,(yytext)); return MULTIPLY; } 42 | \/\/.* { ; } 43 | \/\*(.*\n)*.*\*\/ { ; } 44 | [ \t]* { ; } 45 | [\n] { countn++; } 46 | . { return *yytext; } 47 | ["].*["] { strcpy(yylval.nd_obj.name,(yytext)); return STR; } 48 | ['].['] { strcpy(yylval.nd_obj.name,(yytext)); return CHARACTER; } 49 | 50 | %% 51 | 52 | int yywrap() { 53 | return 1; 54 | } -------------------------------------------------------------------------------- /Part 5 - Adding the Semantic Analyzer/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nd_obj.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nd_obj.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nd_obj.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nd_obj.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nd_obj.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nd_obj.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nd_obj.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nd_obj.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nd_obj.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nd_obj.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nd_obj.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nd_obj.name,(yytext)); return FALSE; } 26 | [-]?{digit}+ { strcpy(yylval.nd_obj.name,(yytext)); return NUMBER; } 27 | [-]?{digit}+\.{digit}{1,6} { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT_NUM; } 28 | {alpha}({alpha}|{digit})* { strcpy(yylval.nd_obj.name,(yytext)); return ID; } 29 | {unary} { strcpy(yylval.nd_obj.name,(yytext)); return UNARY; } 30 | "<=" { strcpy(yylval.nd_obj.name,(yytext)); return LE; } 31 | ">=" { strcpy(yylval.nd_obj.name,(yytext)); return GE; } 32 | "==" { strcpy(yylval.nd_obj.name,(yytext)); return EQ; } 33 | "!=" { strcpy(yylval.nd_obj.name,(yytext)); return NE; } 34 | ">" { strcpy(yylval.nd_obj.name,(yytext)); return GT; } 35 | "<" { strcpy(yylval.nd_obj.name,(yytext)); return LT; } 36 | "&&" { strcpy(yylval.nd_obj.name,(yytext)); return AND; } 37 | "||" { strcpy(yylval.nd_obj.name,(yytext)); return OR; } 38 | "+" { strcpy(yylval.nd_obj.name,(yytext)); return ADD; } 39 | "-" { strcpy(yylval.nd_obj.name,(yytext)); return SUBTRACT; } 40 | "/" { strcpy(yylval.nd_obj.name,(yytext)); return DIVIDE; } 41 | "*" { strcpy(yylval.nd_obj.name,(yytext)); return MULTIPLY; } 42 | \/\/.* { ; } 43 | \/\*(.*\n)*.*\*\/ { ; } 44 | [ \t]* { ; } 45 | [\n] { countn++; } 46 | . { return *yytext; } 47 | ["].*["] { strcpy(yylval.nd_obj.name,(yytext)); return STR; } 48 | ['].['] { strcpy(yylval.nd_obj.name,(yytext)); return CHARACTER; } 49 | 50 | %% 51 | 52 | int yywrap() { 53 | return 1; 54 | } -------------------------------------------------------------------------------- /Part 6 - Intermediate Code Generation/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | int countn=0; 4 | %} 5 | %option yylineno 6 | 7 | alpha [a-zA-Z] 8 | digit [0-9] 9 | unary "++"|"--" 10 | 11 | %% 12 | 13 | "printf" { strcpy(yylval.nd_obj.name,(yytext)); return PRINTFF; } 14 | "scanf" { strcpy(yylval.nd_obj.name,(yytext)); return SCANFF; } 15 | "int" { strcpy(yylval.nd_obj.name,(yytext)); return INT; } 16 | "float" { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT; } 17 | "char" { strcpy(yylval.nd_obj.name,(yytext)); return CHAR; } 18 | "void" { strcpy(yylval.nd_obj.name,(yytext)); return VOID; } 19 | "return" { strcpy(yylval.nd_obj.name,(yytext)); return RETURN; } 20 | "for" { strcpy(yylval.nd_obj.name,(yytext)); return FOR; } 21 | "if" { strcpy(yylval.nd_obj.name,(yytext)); return IF; } 22 | "else" { strcpy(yylval.nd_obj.name,(yytext)); return ELSE; } 23 | ^"#include"[ ]*<.+\.h> { strcpy(yylval.nd_obj.name,(yytext)); return INCLUDE; } 24 | "true" { strcpy(yylval.nd_obj.name,(yytext)); return TRUE; } 25 | "false" { strcpy(yylval.nd_obj.name,(yytext)); return FALSE; } 26 | [-]?{digit}+ { strcpy(yylval.nd_obj.name,(yytext)); return NUMBER; } 27 | [-]?{digit}+\.{digit}{1,6} { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT_NUM; } 28 | {alpha}({alpha}|{digit})* { strcpy(yylval.nd_obj.name,(yytext)); return ID; } 29 | {unary} { strcpy(yylval.nd_obj.name,(yytext)); return UNARY; } 30 | "<=" { strcpy(yylval.nd_obj.name,(yytext)); return LE; } 31 | ">=" { strcpy(yylval.nd_obj.name,(yytext)); return GE; } 32 | "==" { strcpy(yylval.nd_obj.name,(yytext)); return EQ; } 33 | "!=" { strcpy(yylval.nd_obj.name,(yytext)); return NE; } 34 | ">" { strcpy(yylval.nd_obj.name,(yytext)); return GT; } 35 | "<" { strcpy(yylval.nd_obj.name,(yytext)); return LT; } 36 | "&&" { strcpy(yylval.nd_obj.name,(yytext)); return AND; } 37 | "||" { strcpy(yylval.nd_obj.name,(yytext)); return OR; } 38 | "+" { strcpy(yylval.nd_obj.name,(yytext)); return ADD; } 39 | "-" { strcpy(yylval.nd_obj.name,(yytext)); return SUBTRACT; } 40 | "/" { strcpy(yylval.nd_obj.name,(yytext)); return DIVIDE; } 41 | "*" { strcpy(yylval.nd_obj.name,(yytext)); return MULTIPLY; } 42 | \/\/.* { ; } 43 | \/\*(.*\n)*.*\*\/ { ; } 44 | [ \t]* { ; } 45 | [\n] { countn++; } 46 | . { return *yytext; } 47 | ["].*["] { strcpy(yylval.nd_obj.name,(yytext)); return STR; } 48 | ['].['] { strcpy(yylval.nd_obj.name,(yytext)); return CHARACTER; } 49 | 50 | %% 51 | 52 | int yywrap() { 53 | return 1; 54 | } -------------------------------------------------------------------------------- /archive/brainstorm.txt: -------------------------------------------------------------------------------- 1 | TARGET: 2 | 3 | program 4 | header files 5 | main function 6 | body 7 | statements 8 | loops 9 | statements 10 | statements 11 | loops 12 | statements 13 | loops 14 | statements 15 | statements 16 | return 17 | 18 | PARSER_v2: 19 | 20 | program 21 | header files 22 | main function 23 | body 24 | statements 25 | loops 26 | statements 27 | statements 28 | loops 29 | statements 30 | loops 31 | statements 32 | statements 33 | return 34 | 35 | 36 | PARSER_v1: 37 | 38 | body 39 | statements 40 | loops 41 | statements 42 | statements 43 | loops 44 | statements 45 | loops 46 | statements 47 | statements 48 | 49 | 50 | PARSER_v4 51 | 52 | program 53 | headers main ( ) { body return } 54 | headers 55 | headers headers 56 | INCLUDE 57 | main 58 | datatype ID 59 | datatype 60 | INT 61 | FLOAT 62 | CHAR 63 | VOID 64 | body 65 | FOR ( statement ; statement ; statement ) { body } 66 | statement ; 67 | body body 68 | PRINTFF ( STRLT ) ; 69 | SCANFF ( STRLT , & ID ) ; 70 | statement 71 | datatype ID init 72 | ID = expression 73 | ID relop value 74 | ID UNARY 75 | UNARY ID 76 | init 77 | = value 78 | nothing 79 | value 80 | ID 81 | NUMBER 82 | expression 83 | expression arithmetic expression 84 | value 85 | arithmetic 86 | ADD 87 | SUBTRACT 88 | MULTIPLY 89 | DIVIDE 90 | relop 91 | LT 92 | GT 93 | LE 94 | GE 95 | EQ 96 | NE 97 | return 98 | RETURN value ; 99 | nothing 100 | 101 | 102 | 103 | ################################ 104 | program 105 | headers main ( ) { body return } 106 | headers 107 | headers headers 108 | INCLUDE 109 | main 110 | datatype ID 111 | datatype 112 | INT 113 | FLOAT 114 | CHAR 115 | VOID 116 | body 117 | FOR ( stmt1 ; stmt2 ; stmt3 ) { body } 118 | IF ( condition ) { body } else 119 | assignment 120 | defintion 121 | body body 122 | PRINTFF ( STRLT ) ; 123 | SCANFF ( STRLT , & ID ) ; 124 | else 125 | ELSE { body } 126 | nothing 127 | condition 128 | condition AND boolean 129 | condition OR boolean 130 | NE condition 131 | boolean 132 | boolean 133 | expression relop expression 134 | TRUE 135 | FALSE 136 | value 137 | init 138 | = value 139 | nothing 140 | value 141 | ID 142 | NUMBER 143 | expression 144 | expression arithmetic expression 145 | value 146 | arithmetic 147 | ADD 148 | SUBTRACT 149 | MULTIPLY 150 | DIVIDE 151 | relop 152 | LT 153 | GT 154 | LE 155 | GE 156 | EQ 157 | NE 158 | return 159 | RETURN NUMBER ; 160 | RETURN ID ; 161 | nothing 162 | 163 | 164 | *********** Stuff to Handle *********** 165 | 1. Arrays 166 | 2. Negative numbers 167 | 168 | *********** Semantic Analysis phase *********** 169 | Static sematic checks 170 | 171 | 1. The variables must be declared before they are used 172 | 2. The variables must have matching types when used in arithmetic expressions 173 | 3. There should be no duplicate declarations - Allowed in different scopes but we consider single scope for simplicity. 174 | 175 | *********** Type conversions to handle *********** 176 | 177 | 1. int i = 3.14 => floattoint(3.14) 178 | 2. float i = 3 => inttofloat(3) 179 | 180 | 7. +, *, -, / of different types => convert to float 181 | 8. id = expression of different types => convert expression to type of id 182 | 9. expression relop expression => convert to float 183 | 184 | -------------------------------------------------------------------------------- /archive/parser_v4/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | 15 | struct dataType { 16 | char * id_name; 17 | char * data_type; 18 | char * type; 19 | int line_no; 20 | } symbolTable[20]; 21 | int count=0; 22 | int q; 23 | char type[10]; 24 | extern int countn; 25 | %} 26 | 27 | %token PRINTFF SCANFF INT FLOAT CHAR VOID RETURN FOR INCLUDE TRUE FALSE NUMBER ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY 28 | 29 | %% 30 | 31 | program: headers main '(' ')' '{' body return '}' 32 | ; 33 | 34 | headers: headers headers 35 | | INCLUDE { add('H'); } 36 | ; 37 | 38 | main: datatype ID { add('K'); } 39 | ; 40 | 41 | datatype: INT { insert_type(); } 42 | | FLOAT { insert_type(); } 43 | | CHAR { insert_type(); } 44 | | VOID { insert_type(); } 45 | ; 46 | 47 | body: FOR { add('K'); } '(' statement ';' statement ';' statement ')' '{' body '}' 48 | | statement ';' 49 | | body body 50 | | PRINTFF { add('K'); } '(' STR ')' ';' 51 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' 52 | ; 53 | 54 | statement: datatype ID { add('V'); } init 55 | | ID '=' expression 56 | | ID relop value 57 | | ID UNARY 58 | | UNARY ID 59 | ; 60 | 61 | init: '=' value 62 | | 63 | ; 64 | 65 | value: ID 66 | | NUMBER { add('C'); } 67 | ; 68 | 69 | expression: expression arithmetic expression 70 | | value 71 | ; 72 | 73 | arithmetic: ADD 74 | | SUBTRACT 75 | | MULTIPLY 76 | | DIVIDE 77 | ; 78 | 79 | relop: LT 80 | | GT 81 | | LE 82 | | GE 83 | | EQ 84 | | NE 85 | ; 86 | 87 | return: RETURN { add('K'); } value ';' 88 | | 89 | ; 90 | 91 | %% 92 | 93 | int main() { 94 | yyparse(); 95 | printf("\t\t\tSymbol table\n"); 96 | printf("#######################################################################################\n"); 97 | printf("\nsymbol \t identify \t line number\n"); 98 | printf("_____________________________\n"); 99 | int i=0; 100 | for(i=0; i=0; i--) { 112 | if(strcmp(symbolTable[i].id_name, type)==0) { 113 | return -1; 114 | break; 115 | } 116 | } 117 | return 0; 118 | } 119 | 120 | void add(char c) { 121 | q=search(yytext); 122 | if(q==0) { 123 | if(c=='H') { 124 | symbolTable[count].id_name=strdup(yytext); 125 | symbolTable[count].data_type=strdup(type); 126 | symbolTable[count].line_no=countn; 127 | symbolTable[count].type=strdup("Header"); 128 | count++; 129 | } 130 | else if(c =='K') { 131 | symbolTable[count].id_name=strdup(yytext); 132 | symbolTable[count].data_type=strdup("N/A"); 133 | symbolTable[count].line_no=countn; 134 | symbolTable[count].type=strdup("Keyword"); 135 | count++; 136 | } 137 | else if(c=='V') { 138 | symbolTable[count].id_name=strdup(yytext); 139 | symbolTable[count].data_type=strdup(type); 140 | symbolTable[count].line_no=countn; 141 | symbolTable[count].type=strdup("Variable"); 142 | count++; 143 | } 144 | else if(c=='C') { 145 | symbolTable[count].id_name=strdup(yytext); 146 | symbolTable[count].data_type=strdup("CONST"); 147 | symbolTable[count].line_no=countn; 148 | symbolTable[count].type=strdup("Constant"); 149 | count++; 150 | } 151 | } 152 | } 153 | 154 | void insert_type() { 155 | strcpy(type, yytext); 156 | } 157 | 158 | void yyerror(const char* msg) { 159 | fprintf(stderr, "%s\n", msg); 160 | } -------------------------------------------------------------------------------- /archive/parser_v3/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | 15 | struct dataType { 16 | char * id_name; 17 | char * data_type; 18 | char * type; 19 | int line_no; 20 | } symbolTable[20]; 21 | int count=0; 22 | int q; 23 | char type[10]; 24 | extern int countn; 25 | 26 | %} 27 | 28 | %token INCLUDE FOR IF ELSE ID NUMBER UNARY BINARY DATATYPE TRUE FALSE RETURN PRINTFF SCANFF STRLT 29 | 30 | %% 31 | 32 | program: headers main '(' ')' '{' body return '}' 33 | ; 34 | 35 | headers: headers headers 36 | | INCLUDE { add('H'); } 37 | ; 38 | 39 | main: DATATYPE { insert_type(); } ID { add('V'); } 40 | ; 41 | 42 | body: expressions 43 | | loops 44 | | conditionals 45 | | expressions body 46 | | loops body 47 | | conditionals body 48 | ; 49 | 50 | loops: FOR { add('K'); } '(' statement statement statement ')' '{' body '}' 51 | ; 52 | 53 | expressions: statement expressions 54 | | statement 55 | ; 56 | 57 | conditionals: IF { add('K'); } '(' condition ')' '{' expressions '}' else 58 | ; 59 | 60 | else: ELSE { add('K'); } '{' expressions '}' 61 | | 62 | ; 63 | 64 | statement: DATATYPE { insert_type(); } ID { add('V'); } ';' 65 | | DATATYPE { insert_type(); } variable 66 | | ID BINARY statement 67 | | ID UNARY ';' 68 | | UNARY ID ';' 69 | | ID ';' 70 | | NUMBER { add('C'); } ';' 71 | | PRINTFF '(' STRLT ')' ';' 72 | | SCANFF '(' STRLT ',' '&' ID ')' ';' 73 | ; 74 | 75 | variable: ID array 76 | | '*' variable 77 | | ID { add('V'); } '=' NUMBER { add('C'); } ';' 78 | | ',' variable 79 | ; 80 | 81 | array: '[' NUMBER { add('C'); } ']' array 82 | | '[' ID ']' array 83 | | '[' ']' array 84 | | ';' 85 | ; 86 | 87 | condition: condition BINARY boolean 88 | | boolean 89 | ; 90 | 91 | boolean: ID BINARY ID 92 | | ID BINARY NUMBER { add('C'); } 93 | | NUMBER { add('C'); } BINARY ID 94 | | TRUE { add('K'); } 95 | | FALSE { add('K'); } 96 | ; 97 | 98 | return: RETURN { add('K'); } NUMBER { add('C'); } ';' 99 | | RETURN { add('K'); } ID ';' 100 | | 101 | ; 102 | 103 | %% 104 | 105 | int main() { 106 | yyparse(); 107 | printf("\t\t\tSymbol table\n"); 108 | printf("#######################################################################################\n"); 109 | printf("\nsymbol \t identify \t line number\n"); 110 | printf("_____________________________\n"); 111 | int i=0; 112 | for(i=0; i=0; i--) { 124 | if(strcmp(symbolTable[i].id_name,type)==0) { 125 | return -1; 126 | break; 127 | } 128 | } 129 | return 0; 130 | } 131 | 132 | void add(char c) { 133 | q=search(yytext); 134 | if(q==0) { 135 | if(c=='H') { 136 | symbolTable[count].id_name=strdup(yytext); 137 | symbolTable[count].data_type=strdup(type); 138 | symbolTable[count].line_no = countn; 139 | symbolTable[count].type=strdup("Header"); 140 | count++; 141 | } 142 | else if(c =='K') { 143 | symbolTable[count].id_name=strdup(yytext); 144 | symbolTable[count].data_type=strdup("N/A"); 145 | symbolTable[count].line_no = countn; 146 | symbolTable[count].type=strdup("Keyword"); 147 | count++; 148 | } 149 | else if(c=='V') { 150 | symbolTable[count].id_name=strdup(yytext); 151 | symbolTable[count].data_type=strdup(type); 152 | symbolTable[count].line_no = countn; 153 | symbolTable[count].type=strdup("Variable"); 154 | count++; 155 | } 156 | else if(c=='C') { 157 | symbolTable[count].id_name=strdup(yytext); 158 | symbolTable[count].data_type=strdup(type); 159 | symbolTable[count].line_no = countn; 160 | symbolTable[count].type=strdup("Constant"); 161 | count++; 162 | } 163 | } 164 | } 165 | 166 | void insert_type() { 167 | strcpy(type,yytext); 168 | } 169 | 170 | void yyerror(const char* msg) { 171 | fprintf(stderr, "%s\n", msg); 172 | } -------------------------------------------------------------------------------- /Part 3 - Generating the Symbol Table/parser2.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | 8 | void yyerror(const char *s); 9 | int yylex(); 10 | int yywrap(); 11 | void add(char); 12 | void insert_type(); 13 | int search(char *); 14 | void insert_type(); 15 | 16 | struct dataType { 17 | char * id_name; 18 | char * data_type; 19 | char * type; 20 | int line_no; 21 | } symbol_table[40]; 22 | 23 | int count=0; 24 | int q; 25 | char type[10]; 26 | extern int countn; 27 | %} 28 | 29 | %token VOID CHARACTER PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 30 | 31 | %% 32 | 33 | program: headers main '(' ')' '{' body return '}' 34 | ; 35 | 36 | headers: headers headers 37 | | INCLUDE { add('H'); } 38 | ; 39 | 40 | main: datatype ID { add('F'); } 41 | ; 42 | 43 | datatype: INT { insert_type(); } 44 | | FLOAT { insert_type(); } 45 | | CHAR { insert_type(); } 46 | | VOID { insert_type(); } 47 | ; 48 | 49 | body: FOR { add('K'); } '(' statement ';' condition ';' statement ')' '{' body '}' 50 | | IF { add('K'); } '(' condition ')' '{' body '}' else 51 | | statement ';' 52 | | body body 53 | | PRINTFF { add('K'); } '(' STR ')' ';' 54 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' 55 | ; 56 | 57 | else: ELSE { add('K'); } '{' body '}' 58 | | 59 | ; 60 | 61 | condition: value relop value 62 | | TRUE { add('K'); } 63 | | FALSE { add('K'); } 64 | | 65 | ; 66 | 67 | statement: datatype ID { add('V'); } init 68 | | ID '=' expression 69 | | ID relop expression 70 | | ID UNARY 71 | | UNARY ID 72 | ; 73 | 74 | init: '=' value 75 | | 76 | ; 77 | 78 | expression: expression arithmetic expression 79 | | value 80 | ; 81 | 82 | arithmetic: ADD 83 | | SUBTRACT 84 | | MULTIPLY 85 | | DIVIDE 86 | ; 87 | 88 | relop: LT 89 | | GT 90 | | LE 91 | | GE 92 | | EQ 93 | | NE 94 | ; 95 | 96 | value: NUMBER { add('C'); } 97 | | FLOAT_NUM { add('C'); } 98 | | CHARACTER { add('C'); } 99 | | ID 100 | ; 101 | 102 | return: RETURN { add('K'); } value ';' 103 | | 104 | ; 105 | 106 | %% 107 | 108 | int main() { 109 | yyparse(); 110 | printf("\n\n"); 111 | printf("\t\t\t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 112 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 113 | printf("_______________________________________\n\n"); 114 | int i=0; 115 | for(i=0; i=0; i--) { 128 | if(strcmp(symbol_table[i].id_name, type)==0) { 129 | return -1; 130 | break; 131 | } 132 | } 133 | return 0; 134 | } 135 | 136 | void add(char c) { 137 | q=search(yytext); 138 | if(!q) { 139 | if(c == 'H') { 140 | symbol_table[count].id_name=strdup(yytext); 141 | symbol_table[count].data_type=strdup(type); 142 | symbol_table[count].line_no=countn; 143 | symbol_table[count].type=strdup("Header"); 144 | count++; 145 | } 146 | else if(c == 'K') { 147 | symbol_table[count].id_name=strdup(yytext); 148 | symbol_table[count].data_type=strdup("N/A"); 149 | symbol_table[count].line_no=countn; 150 | symbol_table[count].type=strdup("Keyword\t"); 151 | count++; 152 | } 153 | else if(c == 'V') { 154 | symbol_table[count].id_name=strdup(yytext); 155 | symbol_table[count].data_type=strdup(type); 156 | symbol_table[count].line_no=countn; 157 | symbol_table[count].type=strdup("Variable"); 158 | count++; 159 | } 160 | else if(c == 'C') { 161 | symbol_table[count].id_name=strdup(yytext); 162 | symbol_table[count].data_type=strdup("CONST"); 163 | symbol_table[count].line_no=countn; 164 | symbol_table[count].type=strdup("Constant"); 165 | count++; 166 | } 167 | else if(c == 'F') { 168 | symbol_table[count].id_name=strdup(yytext); 169 | symbol_table[count].data_type=strdup(type); 170 | symbol_table[count].line_no=countn; 171 | symbol_table[count].type=strdup("Function"); 172 | count++; 173 | } 174 | } 175 | } 176 | 177 | void insert_type() { 178 | strcpy(type, yytext); 179 | } 180 | 181 | void yyerror(const char* msg) { 182 | fprintf(stderr, "%s\n", msg); 183 | } -------------------------------------------------------------------------------- /archive/parser_v5/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void printtree(struct node*); 15 | void printTreeUtil(struct node*, int); 16 | void printInorder(struct node *); 17 | struct node* mknode(struct node *left, struct node *right, char *token); 18 | 19 | struct dataType { 20 | char * id_name; 21 | char * data_type; 22 | char * type; 23 | int line_no; 24 | } symbolTable[40]; 25 | int count=0; 26 | int q; 27 | char type[10]; 28 | extern int countn; 29 | struct node *head; 30 | 31 | struct node { 32 | struct node *left; 33 | struct node *right; 34 | char *token; 35 | }; 36 | 37 | %} 38 | 39 | %union { struct var_name { 40 | char name[100]; 41 | struct node* nd; 42 | } nam; 43 | } 44 | %token VOID 45 | %token PRINTFF SCANFF INT FLOAT CHAR FOR TRUE FALSE NUMBER ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 46 | %type headers main body return datatype expression statement init value arithmetic relop program 47 | 48 | %% 49 | 50 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 51 | head = $$.nd; 52 | } 53 | ; 54 | 55 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 56 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 57 | ; 58 | 59 | main: datatype ID { add('K'); } 60 | ; 61 | 62 | datatype: INT { insert_type(); } 63 | | FLOAT { insert_type(); } 64 | | CHAR { insert_type(); } 65 | | VOID { insert_type(); } 66 | ; 67 | 68 | body: FOR { add('K'); } '(' statement ';' statement ';' statement ')' '{' body '}' { struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); struct node *temp2 = mknode($4.nd, temp, "CONDITION"); $$.nd = mknode(temp2, $11.nd, $1.name); } 69 | | statement ';' { $$.nd = $1.nd; } 70 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 71 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 72 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 73 | ; 74 | 75 | statement: datatype ID { add('V'); } init { $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($2.nd, $4.nd, "declaration"); } 76 | | ID '=' expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $3.nd, "="); } 77 | | ID relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $3.nd, $2.name); } 78 | | ID UNARY { $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 79 | | UNARY ID { $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 80 | ; 81 | 82 | init: '=' value { $$.nd = $2.nd; } 83 | | { $$.nd = mknode(NULL, NULL, "NULL"); } 84 | ; 85 | 86 | expression: expression arithmetic expression { $$.nd = mknode($1.nd, $3.nd, $2.name); } 87 | | value { $$.nd = $1.nd; } 88 | ; 89 | 90 | arithmetic: ADD 91 | | SUBTRACT 92 | | MULTIPLY 93 | | DIVIDE 94 | ; 95 | 96 | relop: LT 97 | | GT 98 | | LE 99 | | GE 100 | | EQ 101 | | NE 102 | ; 103 | 104 | value: NUMBER { add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 105 | | ID { $$.nd = mknode(NULL, NULL, $1.name); } 106 | ; 107 | 108 | return: RETURN { add('K'); } value ';' { $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 109 | | { $$.nd = NULL; } 110 | ; 111 | 112 | %% 113 | 114 | int main() { 115 | yyparse(); 116 | printf("\n\n \t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 117 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 118 | printf("_______________________________________\n\n"); 119 | int i=0; 120 | for(i=0; i=0; i--) { 136 | if(strcmp(symbolTable[i].id_name, type)==0) { 137 | return -1; 138 | break; 139 | } 140 | } 141 | return 0; 142 | } 143 | 144 | void add(char c) { 145 | q=search(yytext); 146 | if(q==0) { 147 | if(c=='H') { 148 | symbolTable[count].id_name=strdup(yytext); 149 | symbolTable[count].data_type=strdup(type); 150 | symbolTable[count].line_no=countn; 151 | symbolTable[count].type=strdup("Header"); 152 | count++; 153 | } 154 | else if(c=='K') { 155 | symbolTable[count].id_name=strdup(yytext); 156 | symbolTable[count].data_type=strdup("N/A"); 157 | symbolTable[count].line_no=countn; 158 | symbolTable[count].type=strdup("Keyword\t"); 159 | count++; 160 | } 161 | else if(c=='V') { 162 | symbolTable[count].id_name=strdup(yytext); 163 | symbolTable[count].data_type=strdup(type); 164 | symbolTable[count].line_no=countn; 165 | symbolTable[count].type=strdup("Variable"); 166 | count++; 167 | } 168 | else if(c=='C') { 169 | symbolTable[count].id_name=strdup(yytext); 170 | symbolTable[count].data_type=strdup("CONST"); 171 | symbolTable[count].line_no=countn; 172 | symbolTable[count].type=strdup("Constant"); 173 | count++; 174 | } 175 | } 176 | } 177 | 178 | struct node* mknode(struct node *left, struct node *right, char *token) { 179 | struct node *newnode = (struct node *)malloc(sizeof(struct node)); 180 | char *newstr = (char *)malloc(strlen(token)+1); 181 | strcpy(newstr, token); 182 | newnode->left = left; 183 | newnode->right = right; 184 | newnode->token = newstr; 185 | return(newnode); 186 | } 187 | 188 | void printtree(struct node* tree) { 189 | // printTreeUtil(tree, 0); 190 | printf("\n\n the Inorder traversal of the above tree is: \n\n"); 191 | printInorder(tree); 192 | printf("\n\n"); 193 | } 194 | 195 | void printInorder(struct node *tree) { 196 | int i; 197 | if (tree->left) { 198 | printInorder(tree->left); 199 | } 200 | printf("%s, ", tree->token); 201 | if (tree->right) { 202 | printInorder(tree->right); 203 | } 204 | } 205 | 206 | void printTreeUtil(struct node *root, int space) { 207 | if(root == NULL) 208 | return; 209 | space += 7; 210 | printTreeUtil(root->right, space); 211 | // printf("\n"); 212 | for (int i = 7; i < space; i++) 213 | printf(" "); 214 | printf("%s\n", root->token); 215 | printTreeUtil(root->left, space); 216 | } 217 | 218 | void insert_type() { 219 | strcpy(type, yytext); 220 | } 221 | 222 | void yyerror(const char* msg) { 223 | fprintf(stderr, "%s\n", msg); 224 | } -------------------------------------------------------------------------------- /Part 4 - Creating the Syntax Tree/parser3.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void printtree(struct node*); 15 | void printInorder(struct node *); 16 | struct node* mknode(struct node *left, struct node *right, char *token); 17 | 18 | struct dataType { 19 | char * id_name; 20 | char * data_type; 21 | char * type; 22 | int line_no; 23 | } symbolTable[40]; 24 | int count=0; 25 | int q; 26 | char type[10]; 27 | extern int countn; 28 | struct node *head; 29 | struct node { 30 | struct node *left; 31 | struct node *right; 32 | char *token; 33 | }; 34 | %} 35 | 36 | %union { 37 | struct var_name { 38 | char name[100]; 39 | struct node* nd; 40 | } nd_obj; 41 | } 42 | 43 | %token VOID 44 | %token CHARACTER PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 45 | %type headers main body return datatype expression statement init value arithmetic relop program condition else 46 | 47 | %% 48 | 49 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); head = $$.nd; } 50 | ; 51 | 52 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 53 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 54 | ; 55 | 56 | main: datatype ID { add('K'); } 57 | ; 58 | 59 | datatype: INT { insert_type(); } 60 | | FLOAT { insert_type(); } 61 | | CHAR { insert_type(); } 62 | | VOID { insert_type(); } 63 | ; 64 | 65 | body: FOR { add('K'); } '(' statement ';' condition ';' statement ')' '{' body '}' { struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); struct node *temp2 = mknode($4.nd, temp, "CONDITION"); $$.nd = mknode(temp2, $11.nd, $1.name); } 66 | | IF { add('K'); } '(' condition ')' '{' body '}' else { struct node *iff = mknode($4.nd, $7.nd, $1.name); $$.nd = mknode(iff, $9.nd, "if-else"); } 67 | | statement ';' { $$.nd = $1.nd; } 68 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 69 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 70 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 71 | ; 72 | 73 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 74 | | { $$.nd = NULL; } 75 | ; 76 | 77 | condition: value relop value { $$.nd = mknode($1.nd, $3.nd, $2.name); } 78 | | TRUE { add('K'); $$.nd = NULL; } 79 | | FALSE { add('K'); $$.nd = NULL; } 80 | | { $$.nd = NULL; } 81 | ; 82 | 83 | statement: datatype ID { add('V'); } init { $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($2.nd, $4.nd, "declaration"); } 84 | | ID '=' expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $3.nd, "="); } 85 | | ID relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $3.nd, $2.name); } 86 | | ID UNARY { $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 87 | | UNARY ID { $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 88 | ; 89 | 90 | init: '=' value { $$.nd = $2.nd; } 91 | | { $$.nd = mknode(NULL, NULL, "NULL"); } 92 | ; 93 | 94 | expression: expression arithmetic expression { $$.nd = mknode($1.nd, $3.nd, $2.name); } 95 | | value { $$.nd = $1.nd; } 96 | ; 97 | 98 | arithmetic: ADD 99 | | SUBTRACT 100 | | MULTIPLY 101 | | DIVIDE 102 | ; 103 | 104 | relop: LT 105 | | GT 106 | | LE 107 | | GE 108 | | EQ 109 | | NE 110 | ; 111 | 112 | value: NUMBER { add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 113 | | FLOAT_NUM { add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 114 | | CHARACTER { add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 115 | | ID { $$.nd = mknode(NULL, NULL, $1.name); } 116 | ; 117 | 118 | return: RETURN { add('K'); } value ';' { $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 119 | | { $$.nd = NULL; } 120 | ; 121 | 122 | %% 123 | 124 | int main() { 125 | yyparse(); 126 | printf("\n\n \t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 127 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 128 | printf("_______________________________________\n\n"); 129 | int i=0; 130 | for(i=0; i=0; i--) { 146 | if(strcmp(symbolTable[i].id_name, type)==0) { 147 | return -1; 148 | break; 149 | } 150 | } 151 | return 0; 152 | } 153 | 154 | void add(char c) { 155 | q=search(yytext); 156 | if(q==0) { 157 | if(c=='H') { 158 | symbolTable[count].id_name=strdup(yytext); 159 | symbolTable[count].data_type=strdup(type); 160 | symbolTable[count].line_no=countn; 161 | symbolTable[count].type=strdup("Header"); 162 | count++; 163 | } 164 | else if(c=='K') { 165 | symbolTable[count].id_name=strdup(yytext); 166 | symbolTable[count].data_type=strdup("N/A"); 167 | symbolTable[count].line_no=countn; 168 | symbolTable[count].type=strdup("Keyword\t"); 169 | count++; 170 | } 171 | else if(c=='V') { 172 | symbolTable[count].id_name=strdup(yytext); 173 | symbolTable[count].data_type=strdup(type); 174 | symbolTable[count].line_no=countn; 175 | symbolTable[count].type=strdup("Variable"); 176 | count++; 177 | } 178 | else if(c=='C') { 179 | symbolTable[count].id_name=strdup(yytext); 180 | symbolTable[count].data_type=strdup("CONST"); 181 | symbolTable[count].line_no=countn; 182 | symbolTable[count].type=strdup("Constant"); 183 | count++; 184 | } 185 | } 186 | } 187 | 188 | struct node* mknode(struct node *left, struct node *right, char *token) { 189 | struct node *newnode = (struct node *)malloc(sizeof(struct node)); 190 | char *newstr = (char *)malloc(strlen(token)+1); 191 | strcpy(newstr, token); 192 | newnode->left = left; 193 | newnode->right = right; 194 | newnode->token = newstr; 195 | return(newnode); 196 | } 197 | 198 | void printtree(struct node* tree) { 199 | printf("\n\n Inorder traversal of the Parse Tree: \n\n"); 200 | printInorder(tree); 201 | printf("\n\n"); 202 | } 203 | 204 | void printInorder(struct node *tree) { 205 | int i; 206 | if (tree->left) { 207 | printInorder(tree->left); 208 | } 209 | printf("%s, ", tree->token); 210 | if (tree->right) { 211 | printInorder(tree->right); 212 | } 213 | } 214 | 215 | void insert_type() { 216 | strcpy(type, yytext); 217 | } 218 | 219 | void yyerror(const char* msg) { 220 | fprintf(stderr, "%s\n", msg); 221 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Frontend Phase of a C Compiler 2 | 3 | ## Using the Compiler 4 | 5 | ``` 6 | lex lexer.l 7 | yacc -d -v parser.y 8 | gcc -ll -w y.tab.c 9 | ./a.out”. 42 | - The main function does not take any input parameters. 43 | - Numbers follow the format “[-]?{digit}+| [-]?{digit}+\.{digit}{1,6}”. Hence numbers of the type +123.45 are not permitted. 44 | - The variables can be of int, float, or char type. No arrays are permitted. 45 | - The body of an if and else block is enclosed in parenthesis even if it is a single line. 46 | - The body of a for loop is enclosed in parenthesis, even if it is a single line. 47 | - For the sake of simplicity, the scope of all variables is considered the same. Hence, variables cannot be redeclared within loops or if-else blocks. 48 | - ```printf``` statements take a single string as a parameter. No type checking is performed. 49 | - ```scanf``` statements take a string and &id as input. No type checking is performed. 50 | - The condition statement of an if statement and for statement is of the form “value relop value” where value can be a number or id. 51 | 52 | 53 | ## Phases of the Compiler 54 | 55 | The final parser takes a C program with nested for loops or if-else blocks and performs lexical, syntax, and semantic analysis and then intermediate code generation. Let’s look at the implementation of each phase in detail: 56 | 57 | ### Lexer and Context-Free Grammar 58 | 59 | The first step was to code the lexer file to take the stream of characters from the input programs and identify the tokens defined with the help of regular expressions. Next, the yacc file was created, which contained the context-free grammar that accepts a complete C program constituting headers, main function, variable declarations, and initializations, nested for loops, if-else constructs, and expressions of the form of binary arithmetic and unary operations. At this stage, the parser will accept a program with the correct structure and throw a syntax error if the input program is not derived from the CFG. 60 | 61 | ### Lexical Analysis 62 | 63 | A struct was defined with the attributes id_name, data_type (int, float, char, etc.), type (keyword, identifier, constant, etc.), and line_no to generate a symbol table. The symbol table is an array of the above-defined struct. Whenever the program encounters a header, keyword, a constant or a variable declaration, the add function is called, which takes the type of the symbol as a parameter. In the add function, we first check if the element is already present in the symbol table. If it is not present, a new entry is created using the value of yytext as id_name, datatype from the global variable type in case of variables, type from the passed parameter, and line_no. After the CFG accepts the program, the symbol table is printed. 64 | 65 | ### Syntax Analysis 66 | 67 | For the syntax analysis phase, a struct was declared that represented the node for the binary tree that is to be generated. The struct node has attributes left, right, and a token which is a character array. All the tokens are declared to be of type nam, a struct with attributes node and name, representing the token’s name. To generate the syntax tree, a node is created for each token and linked to the nodes of the tokens, which occur to its left and right semantically. The inorder traversal of the generated syntax tree should recreate the program logically. 68 | 69 | ### Semantic Analysis 70 | 71 | The semantic analyzer handles three types of static checks. 72 | 1. Variables should be declared before use. For this, we use the check_declaration function that checks if the identifier passed as a parameter is present in the symbol table. If it is not, an informative error message is printed. The check declaration function is called every time an identifier is encountered in a statement apart from a declarative statement. 73 | 2. Variables cannot be redeclared. Since our compiler assumes a single scope, variables cannot be redeclared even within loops. For this check, the add function is changed to check if the symbol is present in the symbol table before inserting it. If the symbol is already present, and it is of the type variable, the user is attempting to redeclare it, and an error message is printed. 74 | 3. Pertains to type checking of variables in an arithmetic expression. For this, the check_types function is used, which takes the types of both variables as input. If the types match, nothing is done. If one variable needs to be converted to another type, the corresponding type conversion node is inserted in the syntax tree (inttofloat or floattoint). 75 | The type field was added to the struct representing value and expression tokens to keep track of the type of the compound expressions that are not present in the symbol table. The output of this phase is the annotated syntax tree. 76 | 77 | ### Intermediate Code Generation 78 | 79 | For intermediate code generation, the three address code representation was used. Variables were used to keep track of the next temporary variable and label to be generated. The condition statements of if and for were also declared to store the labels to goto in case the condition is satisfied or not satisfied. The output of this step is the intermediate code. 80 | 81 | 82 | ## Example of the Compiler in Action 83 | 84 | ``` 85 | #include 86 | #include 87 | 88 | int main() { 89 | int a; 90 | int x=1; 91 | int y=2; 92 | int z=3; 93 | x=3; 94 | y=10; 95 | z=5; 96 | if(x>5) { 97 | for(int k=0; k<10; k++) { 98 | y = x+3; 99 | printf("Hello!"); 100 | } 101 | } else { 102 | int idx = 1; 103 | } 104 | for(int i=0; i<10; i++) { 105 | printf("Hello World!"); 106 | scanf("%d", &x); 107 | if (x>5) { 108 | printf("Hi"); 109 | } 110 | for(int j=0; j 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void printtree(struct node*); 15 | void printTreeUtil(struct node*, int); 16 | void printInorder(struct node *); 17 | void printBT(char *, struct node *, char *); 18 | struct node* mknode(struct node *left, struct node *right, char *token); 19 | 20 | struct dataType { 21 | char * id_name; 22 | char * data_type; 23 | char * type; 24 | int line_no; 25 | } symbolTable[40]; 26 | 27 | int count=0; 28 | int q; 29 | char type[10]; 30 | extern int countn; 31 | struct node *head; 32 | 33 | struct node { 34 | struct node *left; 35 | struct node *right; 36 | char *token; 37 | }; 38 | 39 | %} 40 | 41 | %union { struct var_name { 42 | char name[100]; 43 | struct node* nd; 44 | } nam; 45 | } 46 | %token VOID 47 | %token PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 48 | %type headers main body return datatype expression statement init value arithmetic relop program else condition 49 | 50 | %% 51 | 52 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 53 | head = $$.nd; 54 | } 55 | ; 56 | 57 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 58 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 59 | ; 60 | 61 | main: datatype ID { add('K'); } 62 | ; 63 | 64 | datatype: INT { insert_type(); } 65 | | FLOAT { insert_type(); } 66 | | CHAR { insert_type(); } 67 | | VOID { insert_type(); } 68 | ; 69 | 70 | body: FOR { add('K'); } '(' statement ';' statement ';' statement ')' '{' body '}' { struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); struct node *temp2 = mknode($4.nd, temp, "CONDITION"); $$.nd = mknode(temp2, $11.nd, $1.name); } 71 | | IF { add('K'); } '(' condition ')' '{' body '}' else { struct node *iff = mknode($4.nd, $7.nd, $1.name); $$.nd = mknode(iff, $9.nd, "if-else"); } 72 | | statement ';' { $$.nd = $1.nd; } 73 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 74 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 75 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 76 | ; 77 | 78 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 79 | | { $$.nd = NULL; } 80 | ; 81 | 82 | condition: value relop value { $$.nd = mknode($1.nd, $3.nd, $2.name); } 83 | | TRUE { add('K'); $$.nd = NULL; } 84 | | FALSE { add('K'); $$.nd = NULL; } 85 | | { $$.nd = NULL; } 86 | ; 87 | 88 | statement: datatype ID { add('V'); } init { $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($2.nd, $4.nd, "declaration"); } 89 | | ID '=' expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $3.nd, "="); } 90 | | ID relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $3.nd, $2.name); } 91 | | ID UNARY { $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 92 | | UNARY ID { $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 93 | ; 94 | 95 | init: '=' value { $$.nd = $2.nd; } 96 | | { $$.nd = mknode(NULL, NULL, "NULL"); } 97 | ; 98 | 99 | expression: expression arithmetic expression { $$.nd = mknode($1.nd, $3.nd, $2.name); } 100 | | value { $$.nd = $1.nd; } 101 | ; 102 | 103 | arithmetic: ADD 104 | | SUBTRACT 105 | | MULTIPLY 106 | | DIVIDE 107 | ; 108 | 109 | relop: LT 110 | | GT 111 | | LE 112 | | GE 113 | | EQ 114 | | NE 115 | ; 116 | 117 | value: NUMBER { add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 118 | | ID { $$.nd = mknode(NULL, NULL, $1.name); } 119 | ; 120 | 121 | return: RETURN { add('K'); } value ';' { $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 122 | | { $$.nd = NULL; } 123 | ; 124 | 125 | %% 126 | 127 | int main() { 128 | yyparse(); 129 | printf("\n\n \t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 130 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 131 | printf("_______________________________________\n\n"); 132 | int i=0; 133 | for(i=0; i=0; i--) { 149 | if(strcmp(symbolTable[i].id_name, type)==0) { 150 | return -1; 151 | break; 152 | } 153 | } 154 | return 0; 155 | } 156 | 157 | void add(char c) { 158 | q=search(yytext); 159 | if(q==0) { 160 | if(c=='H') { 161 | symbolTable[count].id_name=strdup(yytext); 162 | symbolTable[count].data_type=strdup(type); 163 | symbolTable[count].line_no=countn; 164 | symbolTable[count].type=strdup("Header"); 165 | count++; 166 | } 167 | else if(c=='K') { 168 | symbolTable[count].id_name=strdup(yytext); 169 | symbolTable[count].data_type=strdup("N/A"); 170 | symbolTable[count].line_no=countn; 171 | symbolTable[count].type=strdup("Keyword\t"); 172 | count++; 173 | } 174 | else if(c=='V') { 175 | symbolTable[count].id_name=strdup(yytext); 176 | symbolTable[count].data_type=strdup(type); 177 | symbolTable[count].line_no=countn; 178 | symbolTable[count].type=strdup("Variable"); 179 | count++; 180 | } 181 | else if(c=='C') { 182 | symbolTable[count].id_name=strdup(yytext); 183 | symbolTable[count].data_type=strdup("CONST"); 184 | symbolTable[count].line_no=countn; 185 | symbolTable[count].type=strdup("Constant"); 186 | count++; 187 | } 188 | } 189 | } 190 | 191 | struct node* mknode(struct node *left, struct node *right, char *token) { 192 | struct node *newnode = (struct node *)malloc(sizeof(struct node)); 193 | char *newstr = (char *)malloc(strlen(token)+1); 194 | strcpy(newstr, token); 195 | newnode->left = left; 196 | newnode->right = right; 197 | newnode->token = newstr; 198 | return(newnode); 199 | } 200 | 201 | void printtree(struct node* tree) { 202 | printTreeUtil(tree, 0); 203 | printf("\n\n the Inorder traversal of the above tree is: \n\n"); 204 | printInorder(tree); 205 | //printf("\n\n"); 206 | //printBT("", tree, "false"); 207 | } 208 | 209 | void printInorder(struct node *tree) { 210 | int i; 211 | if (tree->left) { 212 | printInorder(tree->left); 213 | } 214 | printf("%s, ", tree->token); 215 | if (tree->right) { 216 | printInorder(tree->right); 217 | } 218 | } 219 | 220 | void printTreeUtil(struct node *root, int space) { 221 | if(root == NULL) 222 | return; 223 | space += 7; 224 | printTreeUtil(root->right, space); 225 | // printf("\n"); 226 | for (int i = 7; i < space; i++) 227 | printf(" "); 228 | printf("%s\n", root->token); 229 | printTreeUtil(root->left, space); 230 | } 231 | 232 | void printBT(char *prefix, struct node *node, char *isLeft) { 233 | if( node != NULL ) { 234 | printf("%s", prefix); 235 | if (isLeft == "true") { 236 | printf("├──"); 237 | } else { 238 | printf("└──"); 239 | } 240 | printf("%s\n", node->token); 241 | if (isLeft == "true") { 242 | prefix = *prefix + "│ "; 243 | } else{ 244 | prefix = *prefix + " "; 245 | } 246 | printBT(prefix, node->left, "true"); 247 | printBT(prefix, node->right, "false"); 248 | } 249 | } 250 | 251 | void insert_type() { 252 | strcpy(type, yytext); 253 | } 254 | 255 | void yyerror(const char* msg) { 256 | fprintf(stderr, "%s\n", msg); 257 | } -------------------------------------------------------------------------------- /archive/parser_v7/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void printtree(struct node*); 15 | void printTreeUtil(struct node*, int); 16 | void printInorder(struct node *); 17 | void check_declaration(char *); 18 | struct node* mknode(struct node *left, struct node *right, char *token); 19 | 20 | struct dataType { 21 | char * id_name; 22 | char * data_type; 23 | char * type; 24 | int line_no; 25 | } symbolTable[40]; 26 | 27 | int count=0; 28 | int q; 29 | char type[10]; 30 | extern int countn; 31 | struct node *head; 32 | int sem_errors=0; 33 | 34 | struct node { 35 | struct node *left; 36 | struct node *right; 37 | char *token; 38 | }; 39 | 40 | %} 41 | 42 | %union { struct var_name { 43 | char name[100]; 44 | struct node* nd; 45 | } nam; 46 | } 47 | %token VOID 48 | %token PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 49 | %type headers main body return datatype expression statement init value arithmetic relop program else condition 50 | 51 | %% 52 | 53 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 54 | head = $$.nd; 55 | } 56 | ; 57 | 58 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 59 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 60 | ; 61 | 62 | main: datatype ID { add('K'); } 63 | ; 64 | 65 | datatype: INT { insert_type(); } 66 | | FLOAT { insert_type(); } 67 | | CHAR { insert_type(); } 68 | | VOID { insert_type(); } 69 | ; 70 | 71 | body: FOR { add('K'); } '(' statement ';' statement ';' statement ')' '{' body '}' { struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); struct node *temp2 = mknode($4.nd, temp, "CONDITION"); $$.nd = mknode(temp2, $11.nd, $1.name); } 72 | | IF { add('K'); } '(' condition ')' '{' body '}' else { struct node *iff = mknode($4.nd, $7.nd, $1.name); $$.nd = mknode(iff, $9.nd, "if-else"); } 73 | | statement ';' { $$.nd = $1.nd; } 74 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 75 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 76 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 77 | ; 78 | 79 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 80 | | { $$.nd = NULL; } 81 | ; 82 | 83 | condition: value relop value { $$.nd = mknode($1.nd, $3.nd, $2.name); } 84 | | TRUE { add('K'); $$.nd = NULL; } 85 | | FALSE { add('K'); $$.nd = NULL; } 86 | | { $$.nd = NULL; } 87 | ; 88 | 89 | statement: datatype ID { add('V'); } init { $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($2.nd, $4.nd, "declaration"); } 90 | | ID { check_declaration($1.name); } '=' expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $4.nd, "="); } 91 | | ID { check_declaration($1.name); } relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $4.nd, $3.name); } 92 | | ID { check_declaration($1.name); } UNARY { $1.nd = mknode(NULL, NULL, $1.name); $3.nd = mknode(NULL, NULL, $3.name); $$.nd = mknode($1.nd, $3.nd, "ITERATOR"); } 93 | | UNARY ID { check_declaration($2.name); $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 94 | ; 95 | 96 | init: '=' value { $$.nd = $2.nd; } 97 | | { $$.nd = mknode(NULL, NULL, "NULL"); } 98 | ; 99 | 100 | expression: expression arithmetic expression { $$.nd = mknode($1.nd, $3.nd, $2.name); } 101 | | value { $$.nd = $1.nd; } 102 | ; 103 | 104 | arithmetic: ADD 105 | | SUBTRACT 106 | | MULTIPLY 107 | | DIVIDE 108 | ; 109 | 110 | relop: LT 111 | | GT 112 | | LE 113 | | GE 114 | | EQ 115 | | NE 116 | ; 117 | 118 | value: NUMBER { add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 119 | | ID { check_declaration($1.name); $$.nd = mknode(NULL, NULL, $1.name); } 120 | ; 121 | 122 | return: RETURN { add('K'); } value ';' { $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 123 | | { $$.nd = NULL; } 124 | ; 125 | 126 | %% 127 | 128 | int main() { 129 | yyparse(); 130 | printf("\n\n"); 131 | printf("\t\t\t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 132 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 133 | printf("_______________________________________\n\n"); 134 | int i=0; 135 | for(i=0; i0){ 148 | printf("Semantic analysis completed with %d errors!", sem_errors); 149 | }else{ 150 | printf("Semantic analysis completed with no errors"); 151 | } 152 | printf("\n\n"); 153 | } 154 | 155 | int search(char *type) { 156 | int i; 157 | for(i=count-1; i>=0; i--) { 158 | if(strcmp(symbolTable[i].id_name, type)==0) { 159 | return -1; 160 | break; 161 | } 162 | } 163 | return 0; 164 | } 165 | 166 | void check_declaration(char *c) { 167 | q = search(c); 168 | if(!q) { 169 | printf("ERROR: Line %d: Variable \"%s\" not declared before usage!\n", countn+1, c); 170 | sem_errors++; 171 | //exit(0); 172 | } 173 | } 174 | 175 | void add(char c) { 176 | q=search(yytext); 177 | if(!q) { 178 | if(c == 'H') { 179 | symbolTable[count].id_name=strdup(yytext); 180 | symbolTable[count].data_type=strdup(type); 181 | symbolTable[count].line_no=countn; 182 | symbolTable[count].type=strdup("Header"); 183 | count++; 184 | } 185 | else if(c == 'K') { 186 | symbolTable[count].id_name=strdup(yytext); 187 | symbolTable[count].data_type=strdup("N/A"); 188 | symbolTable[count].line_no=countn; 189 | symbolTable[count].type=strdup("Keyword\t"); 190 | count++; 191 | } 192 | else if(c == 'V') { 193 | symbolTable[count].id_name=strdup(yytext); 194 | symbolTable[count].data_type=strdup(type); 195 | symbolTable[count].line_no=countn; 196 | symbolTable[count].type=strdup("Variable"); 197 | count++; 198 | } 199 | else if(c == 'C') { 200 | symbolTable[count].id_name=strdup(yytext); 201 | symbolTable[count].data_type=strdup("CONST"); 202 | symbolTable[count].line_no=countn; 203 | symbolTable[count].type=strdup("Constant"); 204 | count++; 205 | } 206 | } 207 | else if(c == 'V' && q) { 208 | printf("ERROR: Line %d: Multiple declarations of \"%s\" not allowed!\n", countn+1, yytext); 209 | sem_errors++; 210 | } 211 | } 212 | 213 | struct node* mknode(struct node *left, struct node *right, char *token) { 214 | struct node *newnode = (struct node *)malloc(sizeof(struct node)); 215 | char *newstr = (char *)malloc(strlen(token)+1); 216 | strcpy(newstr, token); 217 | newnode->left = left; 218 | newnode->right = right; 219 | newnode->token = newstr; 220 | return(newnode); 221 | } 222 | 223 | void printtree(struct node* tree) { 224 | //printTreeUtil(tree, 0); 225 | printf("\n\nInorder traversal of the Parse Tree is: \n\n"); 226 | printInorder(tree); 227 | } 228 | 229 | void printInorder(struct node *tree) { 230 | int i; 231 | if (tree->left) { 232 | printInorder(tree->left); 233 | } 234 | printf("%s, ", tree->token); 235 | if (tree->right) { 236 | printInorder(tree->right); 237 | } 238 | } 239 | 240 | void printTreeUtil(struct node *root, int space) { 241 | if(root == NULL) 242 | return; 243 | space += 7; 244 | printTreeUtil(root->right, space); 245 | for (int i = 7; i < space; i++) 246 | printf(" "); 247 | printf("%s\n", root->token); 248 | printTreeUtil(root->left, space); 249 | } 250 | 251 | void insert_type() { 252 | strcpy(type, yytext); 253 | } 254 | 255 | void yyerror(const char* msg) { 256 | fprintf(stderr, "%s\n", msg); 257 | } -------------------------------------------------------------------------------- /archive/parser_v8/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void printtree(struct node*); 15 | void printTreeUtil(struct node*, int); 16 | void printInorder(struct node *); 17 | void check_declaration(char *); 18 | int check_types(char *, char *); 19 | char *get_type(char *); 20 | struct node* mknode(struct node *left, struct node *right, char *token); 21 | 22 | struct dataType { 23 | char * id_name; 24 | char * data_type; 25 | char * type; 26 | int line_no; 27 | } symbolTable[40]; 28 | 29 | int count=0; 30 | int q; 31 | char type[10]; 32 | extern int countn; 33 | struct node *head; 34 | int sem_errors=0; 35 | 36 | struct node { 37 | struct node *left; 38 | struct node *right; 39 | char *token; 40 | }; 41 | 42 | %} 43 | 44 | %union { struct var_name { 45 | char name[100]; 46 | struct node* nd; 47 | } nam; 48 | 49 | struct var_name2 { 50 | char name[100]; 51 | struct node* nd; 52 | char type[5]; 53 | } nam2; 54 | } 55 | %token VOID 56 | %token PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 57 | %type headers main body return datatype statement arithmetic relop program else condition 58 | %type init value expression 59 | 60 | %% 61 | 62 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 63 | head = $$.nd; 64 | } 65 | ; 66 | 67 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 68 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 69 | ; 70 | 71 | main: datatype ID { add('K'); } 72 | ; 73 | 74 | datatype: INT { insert_type(); } 75 | | FLOAT { insert_type(); } 76 | | CHAR { insert_type(); } 77 | | VOID { insert_type(); } 78 | ; 79 | 80 | body: FOR { add('K'); } '(' statement ';' statement ';' statement ')' '{' body '}' { struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); struct node *temp2 = mknode($4.nd, temp, "CONDITION"); $$.nd = mknode(temp2, $11.nd, $1.name); } 81 | | IF { add('K'); } '(' condition ')' '{' body '}' else { struct node *iff = mknode($4.nd, $7.nd, $1.name); $$.nd = mknode(iff, $9.nd, "if-else"); } 82 | | statement ';' { $$.nd = $1.nd; } 83 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 84 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 85 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 86 | ; 87 | 88 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 89 | | { $$.nd = NULL; } 90 | ; 91 | 92 | condition: value relop value { $$.nd = mknode($1.nd, $3.nd, $2.name); } 93 | | TRUE { add('K'); $$.nd = NULL; } 94 | | FALSE { add('K'); $$.nd = NULL; } 95 | | { $$.nd = NULL; } 96 | ; 97 | 98 | statement: datatype ID { add('V'); } init { 99 | $2.nd = mknode(NULL, NULL, $2.name); 100 | int t = check_types($1.name, $4.type); 101 | if(t>0){ 102 | if(t == 1) { 103 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 104 | $$.nd = mknode($2.nd, temp, "declaration"); 105 | } else { 106 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 107 | $$.nd = mknode($2.nd, temp, "declaration"); 108 | } 109 | } else { 110 | $$.nd = mknode($2.nd, $4.nd, "declaration"); 111 | } 112 | } 113 | | ID { check_declaration($1.name); } '=' expression { 114 | $1.nd = mknode(NULL, NULL, $1.name); 115 | char *id_type = get_type($1.name); 116 | if(id_type[0] != $4.type[0]) { 117 | if(id_type[0] == 'i') { 118 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 119 | $$.nd = mknode($1.nd, temp, "="); 120 | } 121 | else { 122 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 123 | $$.nd = mknode($1.nd, temp, "="); 124 | } 125 | } 126 | else{ 127 | $$.nd = mknode($1.nd, $4.nd, "="); 128 | } 129 | } 130 | | ID { check_declaration($1.name); } relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $4.nd, $3.name); } 131 | | ID { check_declaration($1.name); } UNARY { $1.nd = mknode(NULL, NULL, $1.name); $3.nd = mknode(NULL, NULL, $3.name); $$.nd = mknode($1.nd, $3.nd, "ITERATOR"); } 132 | | UNARY ID { check_declaration($2.name); $1.nd = mknode(NULL, NULL, $1.name); $2.nd = mknode(NULL, NULL, $2.name); $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); } 133 | ; 134 | 135 | init: '=' value { $$.nd = $2.nd; sprintf($$.type, $2.type); } 136 | | { sprintf($$.type, "null"); $$.nd = mknode(NULL, NULL, "NULL"); } 137 | ; 138 | 139 | expression: expression arithmetic expression { 140 | if($1.type[0] == $3.type[0]){ 141 | sprintf($$.type, $1.type); 142 | $$.nd = mknode($1.nd, $3.nd, $2.name); 143 | } 144 | else{ 145 | sprintf($$.type, "float"); 146 | if($1.type[0] == 'i'){ 147 | struct node *temp = mknode(NULL, $1.nd, "inttofloat"); 148 | $$.nd = mknode(temp, $3.nd, $2.name); 149 | } 150 | else{ 151 | struct node *temp = mknode(NULL, $3.nd, "inttofloat"); 152 | $$.nd = mknode($1.nd, temp, $2.name); 153 | } 154 | } 155 | } 156 | | value { sprintf($$.type, $1.type); $$.nd = $1.nd; } 157 | ; 158 | 159 | arithmetic: ADD 160 | | SUBTRACT 161 | | MULTIPLY 162 | | DIVIDE 163 | ; 164 | 165 | relop: LT 166 | | GT 167 | | LE 168 | | GE 169 | | EQ 170 | | NE 171 | ; 172 | 173 | value: NUMBER { sprintf($$.type, "int"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 174 | | FLOAT_NUM { sprintf($$.type, "float"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 175 | | ID { char *id_type = get_type($1.name); sprintf($$.type, id_type); check_declaration($1.name); $$.nd = mknode(NULL, NULL, $1.name); } 176 | ; 177 | 178 | return: RETURN { add('K'); } value ';' { $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 179 | | { $$.nd = NULL; } 180 | ; 181 | 182 | %% 183 | 184 | int main() { 185 | yyparse(); 186 | printf("\n\n"); 187 | printf("\t\t\t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 188 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 189 | printf("_______________________________________\n\n"); 190 | int i=0; 191 | for(i=0; i0){ 204 | printf("Semantic analysis completed with %d errors!", sem_errors); 205 | }else{ 206 | printf("Semnatic analysis completed with no errors"); 207 | } 208 | printf("\n\n"); 209 | } 210 | 211 | int search(char *type) { 212 | int i; 213 | for(i=count-1; i>=0; i--) { 214 | if(strcmp(symbolTable[i].id_name, type)==0) { 215 | return -1; 216 | break; 217 | } 218 | } 219 | return 0; 220 | } 221 | 222 | void check_declaration(char *c) { 223 | q = search(c); 224 | if(!q) { 225 | printf("ERROR: Line %d: Variable \"%s\" not declared before usage!\n", countn+1, c); 226 | sem_errors++; 227 | //exit(0); 228 | } 229 | } 230 | 231 | int check_types(char *type1, char *type2){ 232 | if(type1[0] == type2[0]) 233 | return 0; 234 | if(type1[0]=='f') 235 | return 1; 236 | return 2; 237 | } 238 | 239 | char *get_type(char *var){ 240 | for(int i=0; ileft = left; 292 | newnode->right = right; 293 | newnode->token = newstr; 294 | return(newnode); 295 | } 296 | 297 | void printtree(struct node* tree) { 298 | //printTreeUtil(tree, 0); 299 | printf("\n\nInorder traversal of the Parse Tree is: \n\n"); 300 | printInorder(tree); 301 | } 302 | 303 | void printInorder(struct node *tree) { 304 | int i; 305 | if (tree->left) { 306 | printInorder(tree->left); 307 | } 308 | printf("%s, ", tree->token); 309 | if (tree->right) { 310 | printInorder(tree->right); 311 | } 312 | } 313 | 314 | void printTreeUtil(struct node *root, int space) { 315 | if(root == NULL) 316 | return; 317 | space += 7; 318 | printTreeUtil(root->right, space); 319 | for (int i = 7; i < space; i++) 320 | printf(" "); 321 | printf("%s\n", root->token); 322 | printTreeUtil(root->left, space); 323 | } 324 | 325 | void insert_type() { 326 | strcpy(type, yytext); 327 | } 328 | 329 | void yyerror(const char* msg) { 330 | fprintf(stderr, "%s\n", msg); 331 | } -------------------------------------------------------------------------------- /Part 5 - Adding the Semantic Analyzer/parser4.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void print_tree(struct node*); 15 | void print_inorder(struct node *); 16 | void check_declaration(char *); 17 | void check_return_type(char *); 18 | int check_types(char *, char *); 19 | char *get_type(char *); 20 | struct node* mknode(struct node *left, struct node *right, char *token); 21 | 22 | struct dataType { 23 | char * id_name; 24 | char * data_type; 25 | char * type; 26 | int line_no; 27 | } symbol_table[40]; 28 | 29 | int count=0; 30 | int q; 31 | char type[10]; 32 | extern int countn; 33 | struct node *head; 34 | int sem_errors=0; 35 | int label=0; 36 | char buff[100]; 37 | char errors[10][100]; 38 | char reserved[10][10] = {"int", "float", "char", "void", "if", "else", "for", "main", "return", "include"}; 39 | 40 | struct node { 41 | struct node *left; 42 | struct node *right; 43 | char *token; 44 | }; 45 | 46 | %} 47 | 48 | %union { struct var_name { 49 | char name[100]; 50 | struct node* nd; 51 | } nd_obj; 52 | 53 | struct var_name2 { 54 | char name[100]; 55 | struct node* nd; 56 | char type[5]; 57 | } nd_obj2; 58 | } 59 | %token VOID 60 | %token CHARACTER PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 61 | %type headers main body return datatype statement arithmetic relop program else condition 62 | %type init value expression 63 | 64 | %% 65 | 66 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 67 | head = $$.nd; 68 | } 69 | ; 70 | 71 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 72 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 73 | ; 74 | 75 | main: datatype ID { add('F'); } 76 | ; 77 | 78 | datatype: INT { insert_type(); } 79 | | FLOAT { insert_type(); } 80 | | CHAR { insert_type(); } 81 | | VOID { insert_type(); } 82 | ; 83 | 84 | body: FOR { add('K'); } '(' statement ';' condition ';' statement ')' '{' body '}' { 85 | struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); 86 | struct node *temp2 = mknode($4.nd, temp, "CONDITION"); 87 | $$.nd = mknode(temp2, $11.nd, $1.name); 88 | } 89 | | IF { add('K'); } '(' condition ')' '{' body '}' else { 90 | struct node *iff = mknode($4.nd, $7.nd, $1.name); 91 | $$.nd = mknode(iff, $9.nd, "if-else"); 92 | } 93 | | statement ';' { $$.nd = $1.nd; } 94 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 95 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 96 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 97 | ; 98 | 99 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 100 | | { $$.nd = NULL; } 101 | ; 102 | 103 | condition: value relop value { $$.nd = mknode($1.nd, $3.nd, $2.name); } 104 | | TRUE { add('K'); $$.nd = NULL; } 105 | | FALSE { add('K'); $$.nd = NULL; } 106 | | { $$.nd = NULL; } 107 | ; 108 | 109 | statement: datatype ID { add('V'); } init { 110 | $2.nd = mknode(NULL, NULL, $2.name); 111 | int t = check_types($1.name, $4.type); 112 | if(t>0) { 113 | if(t == 1) { 114 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 115 | $$.nd = mknode($2.nd, temp, "declaration"); 116 | } 117 | else if(t == 2) { 118 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 119 | $$.nd = mknode($2.nd, temp, "declaration"); 120 | } 121 | else if(t == 3) { 122 | struct node *temp = mknode(NULL, $4.nd, "chartoint"); 123 | $$.nd = mknode($2.nd, temp, "declaration"); 124 | } 125 | else if(t == 4) { 126 | struct node *temp = mknode(NULL, $4.nd, "inttochar"); 127 | $$.nd = mknode($2.nd, temp, "declaration"); 128 | } 129 | else if(t == 5) { 130 | struct node *temp = mknode(NULL, $4.nd, "chartofloat"); 131 | $$.nd = mknode($2.nd, temp, "declaration"); 132 | } 133 | else{ 134 | struct node *temp = mknode(NULL, $4.nd, "floattochar"); 135 | $$.nd = mknode($2.nd, temp, "declaration"); 136 | } 137 | } 138 | else { 139 | $$.nd = mknode($2.nd, $4.nd, "declaration"); 140 | } 141 | } 142 | | ID { check_declaration($1.name); } '=' expression { 143 | $1.nd = mknode(NULL, NULL, $1.name); 144 | char *id_type = get_type($1.name); 145 | if(strcmp(id_type, $4.type)) { 146 | if(!strcmp(id_type, "int")) { 147 | if(!strcmp($4.type, "float")){ 148 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 149 | $$.nd = mknode($1.nd, temp, "="); 150 | } 151 | else{ 152 | struct node *temp = mknode(NULL, $4.nd, "chartoint"); 153 | $$.nd = mknode($1.nd, temp, "="); 154 | } 155 | 156 | } 157 | else if(!strcmp(id_type, "float")) { 158 | if(!strcmp($4.type, "int")){ 159 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 160 | $$.nd = mknode($1.nd, temp, "="); 161 | } 162 | else{ 163 | struct node *temp = mknode(NULL, $4.nd, "chartofloat"); 164 | $$.nd = mknode($1.nd, temp, "="); 165 | } 166 | 167 | } 168 | else{ 169 | if(!strcmp($4.type, "int")){ 170 | struct node *temp = mknode(NULL, $4.nd, "inttochar"); 171 | $$.nd = mknode($1.nd, temp, "="); 172 | } 173 | else{ 174 | struct node *temp = mknode(NULL, $4.nd, "floattochar"); 175 | $$.nd = mknode($1.nd, temp, "="); 176 | } 177 | } 178 | } 179 | else { 180 | $$.nd = mknode($1.nd, $4.nd, "="); 181 | } 182 | } 183 | | ID { check_declaration($1.name); } relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $4.nd, $3.name); } 184 | | ID { check_declaration($1.name); } UNARY { 185 | $1.nd = mknode(NULL, NULL, $1.name); 186 | $3.nd = mknode(NULL, NULL, $3.name); 187 | $$.nd = mknode($1.nd, $3.nd, "ITERATOR"); 188 | } 189 | | UNARY ID { 190 | check_declaration($2.name); 191 | $1.nd = mknode(NULL, NULL, $1.name); 192 | $2.nd = mknode(NULL, NULL, $2.name); 193 | $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); 194 | } 195 | ; 196 | 197 | init: '=' value { $$.nd = $2.nd; sprintf($$.type, $2.type); strcpy($$.name, $2.name); } 198 | | { sprintf($$.type, "null"); $$.nd = mknode(NULL, NULL, "NULL"); strcpy($$.name, "NULL"); } 199 | ; 200 | 201 | expression: expression arithmetic expression { 202 | if(!strcmp($1.type, $3.type)) { 203 | sprintf($$.type, $1.type); 204 | $$.nd = mknode($1.nd, $3.nd, $2.name); 205 | } 206 | else { 207 | if(!strcmp($1.type, "int") && !strcmp($3.type, "float")) { 208 | struct node *temp = mknode(NULL, $1.nd, "inttofloat"); 209 | sprintf($$.type, $3.type); 210 | $$.nd = mknode(temp, $3.nd, $2.name); 211 | } 212 | else if(!strcmp($1.type, "float") && !strcmp($3.type, "int")) { 213 | struct node *temp = mknode(NULL, $3.nd, "inttofloat"); 214 | sprintf($$.type, $1.type); 215 | $$.nd = mknode($1.nd, temp, $2.name); 216 | } 217 | else if(!strcmp($1.type, "int") && !strcmp($3.type, "char")) { 218 | struct node *temp = mknode(NULL, $3.nd, "chartoint"); 219 | sprintf($$.type, $1.type); 220 | $$.nd = mknode($1.nd, temp, $2.name); 221 | } 222 | else if(!strcmp($1.type, "char") && !strcmp($3.type, "int")) { 223 | struct node *temp = mknode(NULL, $1.nd, "chartoint"); 224 | sprintf($$.type, $3.type); 225 | $$.nd = mknode(temp, $3.nd, $2.name); 226 | } 227 | else if(!strcmp($1.type, "float") && !strcmp($3.type, "char")) { 228 | struct node *temp = mknode(NULL, $3.nd, "chartofloat"); 229 | sprintf($$.type, $1.type); 230 | $$.nd = mknode($1.nd, temp, $2.name); 231 | } 232 | else { 233 | struct node *temp = mknode(NULL, $1.nd, "chartofloat"); 234 | sprintf($$.type, $3.type); 235 | $$.nd = mknode(temp, $3.nd, $2.name); 236 | } 237 | } 238 | } 239 | | value { strcpy($$.name, $1.name); sprintf($$.type, $1.type); $$.nd = $1.nd; } 240 | ; 241 | 242 | arithmetic: ADD 243 | | SUBTRACT 244 | | MULTIPLY 245 | | DIVIDE 246 | ; 247 | 248 | relop: LT 249 | | GT 250 | | LE 251 | | GE 252 | | EQ 253 | | NE 254 | ; 255 | 256 | value: NUMBER { strcpy($$.name, $1.name); sprintf($$.type, "int"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 257 | | FLOAT_NUM { strcpy($$.name, $1.name); sprintf($$.type, "float"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 258 | | CHARACTER { strcpy($$.name, $1.name); sprintf($$.type, "char"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 259 | | ID { strcpy($$.name, $1.name); char *id_type = get_type($1.name); sprintf($$.type, id_type); check_declaration($1.name); $$.nd = mknode(NULL, NULL, $1.name); } 260 | ; 261 | 262 | return: RETURN { add('K'); } value ';' { check_return_type($3.name); $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 263 | | { $$.nd = NULL; } 264 | ; 265 | 266 | %% 267 | 268 | int main() { 269 | yyparse(); 270 | printf("\n\n"); 271 | printf("\t\t\t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 272 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 273 | printf("_______________________________________\n\n"); 274 | int i=0; 275 | for(i=0; i0) { 288 | printf("Semantic analysis completed with %d errors\n", sem_errors); 289 | for(int i=0; i=0; i--) { 301 | if(strcmp(symbol_table[i].id_name, type)==0) { 302 | return -1; 303 | break; 304 | } 305 | } 306 | return 0; 307 | } 308 | 309 | void check_declaration(char *c) { 310 | q = search(c); 311 | if(!q) { 312 | sprintf(errors[sem_errors], "Line %d: Variable \"%s\" not declared before usage!\n", countn+1, c); 313 | sem_errors++; 314 | } 315 | } 316 | 317 | void check_return_type(char *value) { 318 | char *main_datatype = get_type("main"); 319 | char *return_datatype = get_type(value); 320 | if((!strcmp(main_datatype, "int") && !strcmp(return_datatype, "CONST")) || !strcmp(main_datatype, return_datatype)){ 321 | return ; 322 | } 323 | else { 324 | sprintf(errors[sem_errors], "Line %d: Return type mismatch\n", countn+1); 325 | sem_errors++; 326 | } 327 | } 328 | 329 | int check_types(char *type1, char *type2){ 330 | // declaration with no init 331 | if(!strcmp(type2, "null")) 332 | return -1; 333 | // both datatypes are same 334 | if(!strcmp(type1, type2)) 335 | return 0; 336 | // both datatypes are different 337 | if(!strcmp(type1, "int") && !strcmp(type2, "float")) 338 | return 1; 339 | if(!strcmp(type1, "float") && !strcmp(type2, "int")) 340 | return 2; 341 | if(!strcmp(type1, "int") && !strcmp(type2, "char")) 342 | return 3; 343 | if(!strcmp(type1, "char") && !strcmp(type2, "int")) 344 | return 4; 345 | if(!strcmp(type1, "float") && !strcmp(type2, "char")) 346 | return 5; 347 | if(!strcmp(type1, "char") && !strcmp(type2, "float")) 348 | return 6; 349 | } 350 | 351 | char *get_type(char *var){ 352 | for(int i=0; ileft = left; 419 | newnode->right = right; 420 | newnode->token = newstr; 421 | return(newnode); 422 | } 423 | 424 | void print_tree(struct node* tree) { 425 | printf("\n\nInorder traversal of the Parse Tree is: \n\n"); 426 | print_inorder(tree); 427 | } 428 | 429 | void print_inorder(struct node *tree) { 430 | int i; 431 | if (tree->left) { 432 | print_inorder(tree->left); 433 | } 434 | printf("%s, ", tree->token); 435 | if (tree->right) { 436 | print_inorder(tree->right); 437 | } 438 | } 439 | 440 | void insert_type() { 441 | strcpy(type, yytext); 442 | } 443 | 444 | void yyerror(const char* msg) { 445 | fprintf(stderr, "%s\n", msg); 446 | } -------------------------------------------------------------------------------- /Part 6 - Intermediate Code Generation/parser5.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void print_tree(struct node*); 15 | void print_inorder(struct node *); 16 | void check_declaration(char *); 17 | void check_return_type(char *); 18 | int check_types(char *, char *); 19 | char *get_type(char *); 20 | struct node* mknode(struct node *left, struct node *right, char *token); 21 | 22 | struct dataType { 23 | char * id_name; 24 | char * data_type; 25 | char * type; 26 | int line_no; 27 | } symbol_table[40]; 28 | 29 | int count=0; 30 | int q; 31 | char type[10]; 32 | extern int countn; 33 | struct node *head; 34 | int sem_errors=0; 35 | int ic_idx=0; 36 | int temp_var=0; 37 | int label=0; 38 | int is_for=0; 39 | char buff[100]; 40 | char errors[10][100]; 41 | char reserved[10][10] = {"int", "float", "char", "void", "if", "else", "for", "main", "return", "include"}; 42 | char icg[50][100]; 43 | 44 | struct node { 45 | struct node *left; 46 | struct node *right; 47 | char *token; 48 | }; 49 | 50 | %} 51 | 52 | %union { struct var_name { 53 | char name[100]; 54 | struct node* nd; 55 | } nd_obj; 56 | 57 | struct var_name2 { 58 | char name[100]; 59 | struct node* nd; 60 | char type[5]; 61 | } nd_obj2; 62 | 63 | struct var_name3 { 64 | char name[100]; 65 | struct node* nd; 66 | char if_body[5]; 67 | char else_body[5]; 68 | } nd_obj3; 69 | } 70 | %token VOID 71 | %token CHARACTER PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 72 | %type headers main body return datatype statement arithmetic relop program else 73 | %type init value expression 74 | %type condition 75 | 76 | %% 77 | 78 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 79 | head = $$.nd; 80 | } 81 | ; 82 | 83 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 84 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 85 | ; 86 | 87 | main: datatype ID { add('F'); } 88 | ; 89 | 90 | datatype: INT { insert_type(); } 91 | | FLOAT { insert_type(); } 92 | | CHAR { insert_type(); } 93 | | VOID { insert_type(); } 94 | ; 95 | 96 | body: FOR { add('K'); is_for = 1; } '(' statement ';' condition ';' statement ')' '{' body '}' { 97 | struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); 98 | struct node *temp2 = mknode($4.nd, temp, "CONDITION"); 99 | $$.nd = mknode(temp2, $11.nd, $1.name); 100 | sprintf(icg[ic_idx++], buff); 101 | sprintf(icg[ic_idx++], "JUMP to %s\n", $6.if_body); 102 | sprintf(icg[ic_idx++], "\nLABEL %s:\n", $6.else_body); 103 | } 104 | | IF { add('K'); is_for = 0; } '(' condition ')' { sprintf(icg[ic_idx++], "\nLABEL %s:\n", $4.if_body); } '{' body '}' { sprintf(icg[ic_idx++], "\nLABEL %s:\n", $4.else_body); } else { 105 | struct node *iff = mknode($4.nd, $8.nd, $1.name); 106 | $$.nd = mknode(iff, $11.nd, "if-else"); 107 | sprintf(icg[ic_idx++], "GOTO next\n"); 108 | } 109 | | statement ';' { $$.nd = $1.nd; } 110 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 111 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 112 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 113 | ; 114 | 115 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 116 | | { $$.nd = NULL; } 117 | ; 118 | 119 | condition: value relop value { 120 | $$.nd = mknode($1.nd, $3.nd, $2.name); 121 | if(is_for) { 122 | sprintf($$.if_body, "L%d", label++); 123 | sprintf(icg[ic_idx++], "\nLABEL %s:\n", $$.if_body); 124 | sprintf(icg[ic_idx++], "\nif NOT (%s %s %s) GOTO L%d\n", $1.name, $2.name, $3.name, label); 125 | sprintf($$.else_body, "L%d", label++); 126 | } else { 127 | sprintf(icg[ic_idx++], "\nif (%s %s %s) GOTO L%d else GOTO L%d\n", $1.name, $2.name, $3.name, label, label+1); 128 | sprintf($$.if_body, "L%d", label++); 129 | sprintf($$.else_body, "L%d", label++); 130 | } 131 | } 132 | | TRUE { add('K'); $$.nd = NULL; } 133 | | FALSE { add('K'); $$.nd = NULL; } 134 | | { $$.nd = NULL; } 135 | ; 136 | 137 | statement: datatype ID { add('V'); } init { 138 | $2.nd = mknode(NULL, NULL, $2.name); 139 | int t = check_types($1.name, $4.type); 140 | if(t>0) { 141 | if(t == 1) { 142 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 143 | $$.nd = mknode($2.nd, temp, "declaration"); 144 | } 145 | else if(t == 2) { 146 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 147 | $$.nd = mknode($2.nd, temp, "declaration"); 148 | } 149 | else if(t == 3) { 150 | struct node *temp = mknode(NULL, $4.nd, "chartoint"); 151 | $$.nd = mknode($2.nd, temp, "declaration"); 152 | } 153 | else if(t == 4) { 154 | struct node *temp = mknode(NULL, $4.nd, "inttochar"); 155 | $$.nd = mknode($2.nd, temp, "declaration"); 156 | } 157 | else if(t == 5) { 158 | struct node *temp = mknode(NULL, $4.nd, "chartofloat"); 159 | $$.nd = mknode($2.nd, temp, "declaration"); 160 | } 161 | else{ 162 | struct node *temp = mknode(NULL, $4.nd, "floattochar"); 163 | $$.nd = mknode($2.nd, temp, "declaration"); 164 | } 165 | } 166 | else { 167 | $$.nd = mknode($2.nd, $4.nd, "declaration"); 168 | } 169 | sprintf(icg[ic_idx++], "%s = %s\n", $2.name, $4.name); 170 | } 171 | | ID { check_declaration($1.name); } '=' expression { 172 | $1.nd = mknode(NULL, NULL, $1.name); 173 | char *id_type = get_type($1.name); 174 | if(strcmp(id_type, $4.type)) { 175 | if(!strcmp(id_type, "int")) { 176 | if(!strcmp($4.type, "float")){ 177 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 178 | $$.nd = mknode($1.nd, temp, "="); 179 | } 180 | else{ 181 | struct node *temp = mknode(NULL, $4.nd, "chartoint"); 182 | $$.nd = mknode($1.nd, temp, "="); 183 | } 184 | 185 | } 186 | else if(!strcmp(id_type, "float")) { 187 | if(!strcmp($4.type, "int")){ 188 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 189 | $$.nd = mknode($1.nd, temp, "="); 190 | } 191 | else{ 192 | struct node *temp = mknode(NULL, $4.nd, "chartofloat"); 193 | $$.nd = mknode($1.nd, temp, "="); 194 | } 195 | 196 | } 197 | else{ 198 | if(!strcmp($4.type, "int")){ 199 | struct node *temp = mknode(NULL, $4.nd, "inttochar"); 200 | $$.nd = mknode($1.nd, temp, "="); 201 | } 202 | else{ 203 | struct node *temp = mknode(NULL, $4.nd, "floattochar"); 204 | $$.nd = mknode($1.nd, temp, "="); 205 | } 206 | } 207 | } 208 | else { 209 | $$.nd = mknode($1.nd, $4.nd, "="); 210 | } 211 | sprintf(icg[ic_idx++], "%s = %s\n", $1.name, $4.name); 212 | } 213 | | ID { check_declaration($1.name); } relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $4.nd, $3.name); } 214 | | ID { check_declaration($1.name); } UNARY { 215 | $1.nd = mknode(NULL, NULL, $1.name); 216 | $3.nd = mknode(NULL, NULL, $3.name); 217 | $$.nd = mknode($1.nd, $3.nd, "ITERATOR"); 218 | if(!strcmp($3.name, "++")) { 219 | sprintf(buff, "t%d = %s + 1\n%s = t%d\n", temp_var, $1.name, $1.name, temp_var++); 220 | } 221 | else { 222 | sprintf(buff, "t%d = %s + 1\n%s = t%d\n", temp_var, $1.name, $1.name, temp_var++); 223 | } 224 | } 225 | | UNARY ID { 226 | check_declaration($2.name); 227 | $1.nd = mknode(NULL, NULL, $1.name); 228 | $2.nd = mknode(NULL, NULL, $2.name); 229 | $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); 230 | if(!strcmp($1.name, "++")) { 231 | sprintf(buff, "t%d = %s + 1\n%s = t%d\n", temp_var, $2.name, $2.name, temp_var++); 232 | } 233 | else { 234 | sprintf(buff, "t%d = %s - 1\n%s = t%d\n", temp_var, $2.name, $2.name, temp_var++); 235 | 236 | } 237 | } 238 | ; 239 | 240 | init: '=' value { $$.nd = $2.nd; sprintf($$.type, $2.type); strcpy($$.name, $2.name); } 241 | | { sprintf($$.type, "null"); $$.nd = mknode(NULL, NULL, "NULL"); strcpy($$.name, "NULL"); } 242 | ; 243 | 244 | expression: expression arithmetic expression { 245 | if(!strcmp($1.type, $3.type)) { 246 | sprintf($$.type, $1.type); 247 | $$.nd = mknode($1.nd, $3.nd, $2.name); 248 | } 249 | else { 250 | if(!strcmp($1.type, "int") && !strcmp($3.type, "float")) { 251 | struct node *temp = mknode(NULL, $1.nd, "inttofloat"); 252 | sprintf($$.type, $3.type); 253 | $$.nd = mknode(temp, $3.nd, $2.name); 254 | } 255 | else if(!strcmp($1.type, "float") && !strcmp($3.type, "int")) { 256 | struct node *temp = mknode(NULL, $3.nd, "inttofloat"); 257 | sprintf($$.type, $1.type); 258 | $$.nd = mknode($1.nd, temp, $2.name); 259 | } 260 | else if(!strcmp($1.type, "int") && !strcmp($3.type, "char")) { 261 | struct node *temp = mknode(NULL, $3.nd, "chartoint"); 262 | sprintf($$.type, $1.type); 263 | $$.nd = mknode($1.nd, temp, $2.name); 264 | } 265 | else if(!strcmp($1.type, "char") && !strcmp($3.type, "int")) { 266 | struct node *temp = mknode(NULL, $1.nd, "chartoint"); 267 | sprintf($$.type, $3.type); 268 | $$.nd = mknode(temp, $3.nd, $2.name); 269 | } 270 | else if(!strcmp($1.type, "float") && !strcmp($3.type, "char")) { 271 | struct node *temp = mknode(NULL, $3.nd, "chartofloat"); 272 | sprintf($$.type, $1.type); 273 | $$.nd = mknode($1.nd, temp, $2.name); 274 | } 275 | else { 276 | struct node *temp = mknode(NULL, $1.nd, "chartofloat"); 277 | sprintf($$.type, $3.type); 278 | $$.nd = mknode(temp, $3.nd, $2.name); 279 | } 280 | } 281 | sprintf($$.name, "t%d", temp_var); 282 | temp_var++; 283 | sprintf(icg[ic_idx++], "%s = %s %s %s\n", $$.name, $1.name, $2.name, $3.name); 284 | } 285 | | value { strcpy($$.name, $1.name); sprintf($$.type, $1.type); $$.nd = $1.nd; } 286 | ; 287 | 288 | arithmetic: ADD 289 | | SUBTRACT 290 | | MULTIPLY 291 | | DIVIDE 292 | ; 293 | 294 | relop: LT 295 | | GT 296 | | LE 297 | | GE 298 | | EQ 299 | | NE 300 | ; 301 | 302 | value: NUMBER { strcpy($$.name, $1.name); sprintf($$.type, "int"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 303 | | FLOAT_NUM { strcpy($$.name, $1.name); sprintf($$.type, "float"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 304 | | CHARACTER { strcpy($$.name, $1.name); sprintf($$.type, "char"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 305 | | ID { strcpy($$.name, $1.name); char *id_type = get_type($1.name); sprintf($$.type, id_type); check_declaration($1.name); $$.nd = mknode(NULL, NULL, $1.name); } 306 | ; 307 | 308 | return: RETURN { add('K'); } value ';' { check_return_type($3.name); $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 309 | | { $$.nd = NULL; } 310 | ; 311 | 312 | %% 313 | 314 | int main() { 315 | yyparse(); 316 | printf("\n\n"); 317 | printf("\t\t\t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 318 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 319 | printf("_______________________________________\n\n"); 320 | int i=0; 321 | for(i=0; i0) { 334 | printf("Semantic analysis completed with %d errors\n", sem_errors); 335 | for(int i=0; i=0; i--) { 352 | if(strcmp(symbol_table[i].id_name, type)==0) { 353 | return -1; 354 | break; 355 | } 356 | } 357 | return 0; 358 | } 359 | 360 | void check_declaration(char *c) { 361 | q = search(c); 362 | if(!q) { 363 | sprintf(errors[sem_errors], "Line %d: Variable \"%s\" not declared before usage!\n", countn+1, c); 364 | sem_errors++; 365 | } 366 | } 367 | 368 | void check_return_type(char *value) { 369 | char *main_datatype = get_type("main"); 370 | char *return_datatype = get_type(value); 371 | if((!strcmp(main_datatype, "int") && !strcmp(return_datatype, "CONST")) || !strcmp(main_datatype, return_datatype)){ 372 | return ; 373 | } 374 | else { 375 | sprintf(errors[sem_errors], "Line %d: Return type mismatch\n", countn+1); 376 | sem_errors++; 377 | } 378 | } 379 | 380 | int check_types(char *type1, char *type2){ 381 | // declaration with no init 382 | if(!strcmp(type2, "null")) 383 | return -1; 384 | // both datatypes are same 385 | if(!strcmp(type1, type2)) 386 | return 0; 387 | // both datatypes are different 388 | if(!strcmp(type1, "int") && !strcmp(type2, "float")) 389 | return 1; 390 | if(!strcmp(type1, "float") && !strcmp(type2, "int")) 391 | return 2; 392 | if(!strcmp(type1, "int") && !strcmp(type2, "char")) 393 | return 3; 394 | if(!strcmp(type1, "char") && !strcmp(type2, "int")) 395 | return 4; 396 | if(!strcmp(type1, "float") && !strcmp(type2, "char")) 397 | return 5; 398 | if(!strcmp(type1, "char") && !strcmp(type2, "float")) 399 | return 6; 400 | } 401 | 402 | char *get_type(char *var){ 403 | for(int i=0; ileft = left; 470 | newnode->right = right; 471 | newnode->token = newstr; 472 | return(newnode); 473 | } 474 | 475 | void print_tree(struct node* tree) { 476 | printf("\n\nInorder traversal of the Parse Tree is: \n\n"); 477 | print_inorder(tree); 478 | } 479 | 480 | void print_inorder(struct node *tree) { 481 | int i; 482 | if (tree->left) { 483 | print_inorder(tree->left); 484 | } 485 | printf("%s, ", tree->token); 486 | if (tree->right) { 487 | print_inorder(tree->right); 488 | } 489 | } 490 | 491 | void insert_type() { 492 | strcpy(type, yytext); 493 | } 494 | 495 | void yyerror(const char* msg) { 496 | fprintf(stderr, "%s\n", msg); 497 | } -------------------------------------------------------------------------------- /parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include"lex.yy.c" 7 | void yyerror(const char *s); 8 | int yylex(); 9 | int yywrap(); 10 | void add(char); 11 | void insert_type(); 12 | int search(char *); 13 | void insert_type(); 14 | void print_tree(struct node*); 15 | void print_tree_util(struct node*, int); 16 | void print_inorder(struct node *); 17 | void check_declaration(char *); 18 | void check_return_type(char *); 19 | int check_types(char *, char *); 20 | char *get_type(char *); 21 | struct node* mknode(struct node *left, struct node *right, char *token); 22 | 23 | struct dataType { 24 | char * id_name; 25 | char * data_type; 26 | char * type; 27 | int line_no; 28 | } symbol_table[40]; 29 | 30 | int count=0; 31 | int q; 32 | char type[10]; 33 | extern int countn; 34 | struct node *head; 35 | int sem_errors=0; 36 | int ic_idx=0; 37 | int temp_var=0; 38 | int label=0; 39 | int is_for=0; 40 | char buff[100]; 41 | char errors[10][100]; 42 | char reserved[10][10] = {"int", "float", "char", "void", "if", "else", "for", "main", "return", "include"}; 43 | char icg[50][100]; 44 | 45 | struct node { 46 | struct node *left; 47 | struct node *right; 48 | char *token; 49 | }; 50 | 51 | %} 52 | 53 | %union { struct var_name { 54 | char name[100]; 55 | struct node* nd; 56 | } nd_obj; 57 | 58 | struct var_name2 { 59 | char name[100]; 60 | struct node* nd; 61 | char type[5]; 62 | } nd_obj2; 63 | 64 | struct var_name3 { 65 | char name[100]; 66 | struct node* nd; 67 | char if_body[5]; 68 | char else_body[5]; 69 | } nd_obj3; 70 | } 71 | %token VOID 72 | %token CHARACTER PRINTFF SCANFF INT FLOAT CHAR FOR IF ELSE TRUE FALSE NUMBER FLOAT_NUM ID LE GE EQ NE GT LT AND OR STR ADD MULTIPLY DIVIDE SUBTRACT UNARY INCLUDE RETURN 73 | %type headers main body return datatype statement arithmetic relop program else 74 | %type init value expression 75 | %type condition 76 | 77 | %% 78 | 79 | program: headers main '(' ')' '{' body return '}' { $2.nd = mknode($6.nd, $7.nd, "main"); $$.nd = mknode($1.nd, $2.nd, "program"); 80 | head = $$.nd; 81 | } 82 | ; 83 | 84 | headers: headers headers { $$.nd = mknode($1.nd, $2.nd, "headers"); } 85 | | INCLUDE { add('H'); } { $$.nd = mknode(NULL, NULL, $1.name); } 86 | ; 87 | 88 | main: datatype ID { add('F'); } 89 | ; 90 | 91 | datatype: INT { insert_type(); } 92 | | FLOAT { insert_type(); } 93 | | CHAR { insert_type(); } 94 | | VOID { insert_type(); } 95 | ; 96 | 97 | body: FOR { add('K'); is_for = 1; } '(' statement ';' condition ';' statement ')' '{' body '}' { 98 | struct node *temp = mknode($6.nd, $8.nd, "CONDITION"); 99 | struct node *temp2 = mknode($4.nd, temp, "CONDITION"); 100 | $$.nd = mknode(temp2, $11.nd, $1.name); 101 | sprintf(icg[ic_idx++], buff); 102 | sprintf(icg[ic_idx++], "JUMP to %s\n", $6.if_body); 103 | sprintf(icg[ic_idx++], "\nLABEL %s:\n", $6.else_body); 104 | } 105 | | IF { add('K'); is_for = 0; } '(' condition ')' { sprintf(icg[ic_idx++], "\nLABEL %s:\n", $4.if_body); } '{' body '}' { sprintf(icg[ic_idx++], "\nLABEL %s:\n", $4.else_body); } else { 106 | struct node *iff = mknode($4.nd, $8.nd, $1.name); 107 | $$.nd = mknode(iff, $11.nd, "if-else"); 108 | sprintf(icg[ic_idx++], "GOTO next\n"); 109 | } 110 | | statement ';' { $$.nd = $1.nd; } 111 | | body body { $$.nd = mknode($1.nd, $2.nd, "statements"); } 112 | | PRINTFF { add('K'); } '(' STR ')' ';' { $$.nd = mknode(NULL, NULL, "printf"); } 113 | | SCANFF { add('K'); } '(' STR ',' '&' ID ')' ';' { $$.nd = mknode(NULL, NULL, "scanf"); } 114 | ; 115 | 116 | else: ELSE { add('K'); } '{' body '}' { $$.nd = mknode(NULL, $4.nd, $1.name); } 117 | | { $$.nd = NULL; } 118 | ; 119 | 120 | condition: value relop value { 121 | $$.nd = mknode($1.nd, $3.nd, $2.name); 122 | if(is_for) { 123 | sprintf($$.if_body, "L%d", label++); 124 | sprintf(icg[ic_idx++], "\nLABEL %s:\n", $$.if_body); 125 | sprintf(icg[ic_idx++], "\nif NOT (%s %s %s) GOTO L%d\n", $1.name, $2.name, $3.name, label); 126 | sprintf($$.else_body, "L%d", label++); 127 | } else { 128 | sprintf(icg[ic_idx++], "\nif (%s %s %s) GOTO L%d else GOTO L%d\n", $1.name, $2.name, $3.name, label, label+1); 129 | sprintf($$.if_body, "L%d", label++); 130 | sprintf($$.else_body, "L%d", label++); 131 | } 132 | } 133 | | TRUE { add('K'); $$.nd = NULL; } 134 | | FALSE { add('K'); $$.nd = NULL; } 135 | | { $$.nd = NULL; } 136 | ; 137 | 138 | statement: datatype ID { add('V'); } init { 139 | $2.nd = mknode(NULL, NULL, $2.name); 140 | int t = check_types($1.name, $4.type); 141 | if(t>0) { 142 | if(t == 1) { 143 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 144 | $$.nd = mknode($2.nd, temp, "declaration"); 145 | } 146 | else if(t == 2) { 147 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 148 | $$.nd = mknode($2.nd, temp, "declaration"); 149 | } 150 | else if(t == 3) { 151 | struct node *temp = mknode(NULL, $4.nd, "chartoint"); 152 | $$.nd = mknode($2.nd, temp, "declaration"); 153 | } 154 | else if(t == 4) { 155 | struct node *temp = mknode(NULL, $4.nd, "inttochar"); 156 | $$.nd = mknode($2.nd, temp, "declaration"); 157 | } 158 | else if(t == 5) { 159 | struct node *temp = mknode(NULL, $4.nd, "chartofloat"); 160 | $$.nd = mknode($2.nd, temp, "declaration"); 161 | } 162 | else{ 163 | struct node *temp = mknode(NULL, $4.nd, "floattochar"); 164 | $$.nd = mknode($2.nd, temp, "declaration"); 165 | } 166 | } 167 | else { 168 | $$.nd = mknode($2.nd, $4.nd, "declaration"); 169 | } 170 | sprintf(icg[ic_idx++], "%s = %s\n", $2.name, $4.name); 171 | } 172 | | ID { check_declaration($1.name); } '=' expression { 173 | $1.nd = mknode(NULL, NULL, $1.name); 174 | char *id_type = get_type($1.name); 175 | if(strcmp(id_type, $4.type)) { 176 | if(!strcmp(id_type, "int")) { 177 | if(!strcmp($4.type, "float")){ 178 | struct node *temp = mknode(NULL, $4.nd, "floattoint"); 179 | $$.nd = mknode($1.nd, temp, "="); 180 | } 181 | else{ 182 | struct node *temp = mknode(NULL, $4.nd, "chartoint"); 183 | $$.nd = mknode($1.nd, temp, "="); 184 | } 185 | 186 | } 187 | else if(!strcmp(id_type, "float")) { 188 | if(!strcmp($4.type, "int")){ 189 | struct node *temp = mknode(NULL, $4.nd, "inttofloat"); 190 | $$.nd = mknode($1.nd, temp, "="); 191 | } 192 | else{ 193 | struct node *temp = mknode(NULL, $4.nd, "chartofloat"); 194 | $$.nd = mknode($1.nd, temp, "="); 195 | } 196 | 197 | } 198 | else{ 199 | if(!strcmp($4.type, "int")){ 200 | struct node *temp = mknode(NULL, $4.nd, "inttochar"); 201 | $$.nd = mknode($1.nd, temp, "="); 202 | } 203 | else{ 204 | struct node *temp = mknode(NULL, $4.nd, "floattochar"); 205 | $$.nd = mknode($1.nd, temp, "="); 206 | } 207 | } 208 | } 209 | else { 210 | $$.nd = mknode($1.nd, $4.nd, "="); 211 | } 212 | sprintf(icg[ic_idx++], "%s = %s\n", $1.name, $4.name); 213 | } 214 | | ID { check_declaration($1.name); } relop expression { $1.nd = mknode(NULL, NULL, $1.name); $$.nd = mknode($1.nd, $4.nd, $3.name); } 215 | | ID { check_declaration($1.name); } UNARY { 216 | $1.nd = mknode(NULL, NULL, $1.name); 217 | $3.nd = mknode(NULL, NULL, $3.name); 218 | $$.nd = mknode($1.nd, $3.nd, "ITERATOR"); 219 | if(!strcmp($3.name, "++")) { 220 | sprintf(buff, "t%d = %s + 1\n%s = t%d\n", temp_var, $1.name, $1.name, temp_var++); 221 | } 222 | else { 223 | sprintf(buff, "t%d = %s + 1\n%s = t%d\n", temp_var, $1.name, $1.name, temp_var++); 224 | } 225 | } 226 | | UNARY ID { 227 | check_declaration($2.name); 228 | $1.nd = mknode(NULL, NULL, $1.name); 229 | $2.nd = mknode(NULL, NULL, $2.name); 230 | $$.nd = mknode($1.nd, $2.nd, "ITERATOR"); 231 | if(!strcmp($1.name, "++")) { 232 | sprintf(buff, "t%d = %s + 1\n%s = t%d\n", temp_var, $2.name, $2.name, temp_var++); 233 | } 234 | else { 235 | sprintf(buff, "t%d = %s - 1\n%s = t%d\n", temp_var, $2.name, $2.name, temp_var++); 236 | 237 | } 238 | } 239 | ; 240 | 241 | init: '=' value { $$.nd = $2.nd; sprintf($$.type, $2.type); strcpy($$.name, $2.name); } 242 | | { sprintf($$.type, "null"); $$.nd = mknode(NULL, NULL, "NULL"); strcpy($$.name, "NULL"); } 243 | ; 244 | 245 | expression: expression arithmetic expression { 246 | if(!strcmp($1.type, $3.type)) { 247 | sprintf($$.type, $1.type); 248 | $$.nd = mknode($1.nd, $3.nd, $2.name); 249 | } 250 | else { 251 | if(!strcmp($1.type, "int") && !strcmp($3.type, "float")) { 252 | struct node *temp = mknode(NULL, $1.nd, "inttofloat"); 253 | sprintf($$.type, $3.type); 254 | $$.nd = mknode(temp, $3.nd, $2.name); 255 | } 256 | else if(!strcmp($1.type, "float") && !strcmp($3.type, "int")) { 257 | struct node *temp = mknode(NULL, $3.nd, "inttofloat"); 258 | sprintf($$.type, $1.type); 259 | $$.nd = mknode($1.nd, temp, $2.name); 260 | } 261 | else if(!strcmp($1.type, "int") && !strcmp($3.type, "char")) { 262 | struct node *temp = mknode(NULL, $3.nd, "chartoint"); 263 | sprintf($$.type, $1.type); 264 | $$.nd = mknode($1.nd, temp, $2.name); 265 | } 266 | else if(!strcmp($1.type, "char") && !strcmp($3.type, "int")) { 267 | struct node *temp = mknode(NULL, $1.nd, "chartoint"); 268 | sprintf($$.type, $3.type); 269 | $$.nd = mknode(temp, $3.nd, $2.name); 270 | } 271 | else if(!strcmp($1.type, "float") && !strcmp($3.type, "char")) { 272 | struct node *temp = mknode(NULL, $3.nd, "chartofloat"); 273 | sprintf($$.type, $1.type); 274 | $$.nd = mknode($1.nd, temp, $2.name); 275 | } 276 | else { 277 | struct node *temp = mknode(NULL, $1.nd, "chartofloat"); 278 | sprintf($$.type, $3.type); 279 | $$.nd = mknode(temp, $3.nd, $2.name); 280 | } 281 | } 282 | sprintf($$.name, "t%d", temp_var); 283 | temp_var++; 284 | sprintf(icg[ic_idx++], "%s = %s %s %s\n", $$.name, $1.name, $2.name, $3.name); 285 | } 286 | | value { strcpy($$.name, $1.name); sprintf($$.type, $1.type); $$.nd = $1.nd; } 287 | ; 288 | 289 | arithmetic: ADD 290 | | SUBTRACT 291 | | MULTIPLY 292 | | DIVIDE 293 | ; 294 | 295 | relop: LT 296 | | GT 297 | | LE 298 | | GE 299 | | EQ 300 | | NE 301 | ; 302 | 303 | value: NUMBER { strcpy($$.name, $1.name); sprintf($$.type, "int"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 304 | | FLOAT_NUM { strcpy($$.name, $1.name); sprintf($$.type, "float"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 305 | | CHARACTER { strcpy($$.name, $1.name); sprintf($$.type, "char"); add('C'); $$.nd = mknode(NULL, NULL, $1.name); } 306 | | ID { strcpy($$.name, $1.name); char *id_type = get_type($1.name); sprintf($$.type, id_type); check_declaration($1.name); $$.nd = mknode(NULL, NULL, $1.name); } 307 | ; 308 | 309 | return: RETURN { add('K'); } value ';' { check_return_type($3.name); $1.nd = mknode(NULL, NULL, "return"); $$.nd = mknode($1.nd, $3.nd, "RETURN"); } 310 | | { $$.nd = NULL; } 311 | ; 312 | 313 | %% 314 | 315 | int main() { 316 | yyparse(); 317 | printf("\n\n"); 318 | printf("\t\t\t\t\t\t\t\t PHASE 1: LEXICAL ANALYSIS \n\n"); 319 | printf("\nSYMBOL DATATYPE TYPE LINE NUMBER \n"); 320 | printf("_______________________________________\n\n"); 321 | int i=0; 322 | for(i=0; i0) { 335 | printf("Semantic analysis completed with %d errors\n", sem_errors); 336 | for(int i=0; i=0; i--) { 353 | if(strcmp(symbol_table[i].id_name, type)==0) { 354 | return -1; 355 | break; 356 | } 357 | } 358 | return 0; 359 | } 360 | 361 | void check_declaration(char *c) { 362 | q = search(c); 363 | if(!q) { 364 | sprintf(errors[sem_errors], "Line %d: Variable \"%s\" not declared before usage!\n", countn+1, c); 365 | sem_errors++; 366 | } 367 | } 368 | 369 | void check_return_type(char *value) { 370 | char *main_datatype = get_type("main"); 371 | char *return_datatype = get_type(value); 372 | if((!strcmp(main_datatype, "int") && !strcmp(return_datatype, "CONST")) || !strcmp(main_datatype, return_datatype)){ 373 | return ; 374 | } 375 | else { 376 | sprintf(errors[sem_errors], "Line %d: Return type mismatch\n", countn+1); 377 | sem_errors++; 378 | } 379 | } 380 | 381 | int check_types(char *type1, char *type2){ 382 | // declaration with no init 383 | if(!strcmp(type2, "null")) 384 | return -1; 385 | // both datatypes are same 386 | if(!strcmp(type1, type2)) 387 | return 0; 388 | // both datatypes are different 389 | if(!strcmp(type1, "int") && !strcmp(type2, "float")) 390 | return 1; 391 | if(!strcmp(type1, "float") && !strcmp(type2, "int")) 392 | return 2; 393 | if(!strcmp(type1, "int") && !strcmp(type2, "char")) 394 | return 3; 395 | if(!strcmp(type1, "char") && !strcmp(type2, "int")) 396 | return 4; 397 | if(!strcmp(type1, "float") && !strcmp(type2, "char")) 398 | return 5; 399 | if(!strcmp(type1, "char") && !strcmp(type2, "float")) 400 | return 6; 401 | } 402 | 403 | char *get_type(char *var){ 404 | for(int i=0; ileft = left; 471 | newnode->right = right; 472 | newnode->token = newstr; 473 | return(newnode); 474 | } 475 | 476 | void print_tree(struct node* tree) { 477 | // print_tree_util(tree, 0); 478 | printf("\n\nInorder traversal of the Parse Tree is: \n\n"); 479 | print_inorder(tree); 480 | } 481 | 482 | void print_inorder(struct node *tree) { 483 | int i; 484 | if (tree->left) { 485 | print_inorder(tree->left); 486 | } 487 | printf("%s, ", tree->token); 488 | if (tree->right) { 489 | print_inorder(tree->right); 490 | } 491 | } 492 | 493 | void print_tree_util(struct node *root, int space) { 494 | if(root == NULL) 495 | return; 496 | space += 7; 497 | print_tree_util(root->right, space); 498 | for (int i = 7; i < space; i++) 499 | printf(" "); 500 | printf("%s\n", root->token); 501 | print_tree_util(root->left, space); 502 | } 503 | 504 | void insert_type() { 505 | strcpy(type, yytext); 506 | } 507 | 508 | void yyerror(const char* msg) { 509 | fprintf(stderr, "%s\n", msg); 510 | } --------------------------------------------------------------------------------