├── .gitignore
├── .vscode
    └── settings.json
├── README.md
├── src
    ├── .gitignore
    ├── .vscode
    │   └── settings.json
    ├── Makefile
    ├── ast.c
    ├── ast.h
    ├── bintree.c
    ├── bintree.h
    ├── cgen.c
    ├── cgen.h
    ├── env.c
    ├── env.h
    ├── error.c
    ├── error.h
    ├── eval.c
    ├── eval.h
    ├── hashtable.c
    ├── hashtable.h
    ├── list.c
    ├── list.h
    ├── old
    │   ├── allocator.cpp
    │   ├── allocator.h
    │   ├── common.h
    │   ├── context.cpp
    │   ├── context.h
    │   ├── lex.cpp
    │   ├── lex.h
    │   ├── scope.cpp
    │   ├── scope.h
    │   ├── syntax.cpp
    │   ├── syntax.h
    │   ├── token.cpp
    │   └── token.h
    ├── parse.c
    ├── parse.h
    ├── parse_comp.c
    ├── parse_comp.h
    ├── parse_decl.c
    ├── parse_decl.h
    ├── parse_exp.c
    ├── parse_exp.h
    ├── parse_stmt.c
    ├── parse_stmt.h
    ├── parse_test_src.txt
    ├── python
    │   ├── Makefile
    │   ├── ast.py
    │   ├── basic_type.py
    │   ├── common.py
    │   ├── krc-earley.syntax
    │   ├── krc-lr.syntax
    │   ├── krc.syntax
    │   ├── lex.py
    │   ├── lex_test.c
    │   ├── symbol_table.py
    │   ├── syntax.py
    │   ├── token_list.txt
    │   └── type.py
    ├── stack.c
    ├── stack.h
    ├── str.c
    ├── str.h
    ├── tests
    │   ├── test_cgen.c
    │   ├── test_eval.c
    │   ├── test_lex.c
    │   ├── test_parse.c
    │   └── test_type.c
    ├── todo.txt
    ├── token.c
    ├── token.h
    ├── type.c
    ├── type.h
    └── x86
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── todo.txt
    │   ├── x86-test.c
    │   ├── x86.c
    │   └── x86.h
└── workspace.code-workspace


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o 
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | *.smod
19 | 
20 | # Compiled Static libraries
21 | *.lai
22 | *.la
23 | *.a
24 | *.lib
25 | 
26 | # Executables
27 | *.exe
28 | *.out
29 | *.app
30 | 
31 | *.log
32 | *.asm
33 | ./build/*
34 | ./bin/*
35 | bin
36 | build
37 | 
38 | ## Ignore python byte code 
39 | *.pyc
40 | *.table
41 | 
42 | # Specific files
43 | test_parse.txt
44 | .vscode/*
45 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files.associations": {
 3 |     "ast.h": "c",
 4 |     "hashtable.h": "c",
 5 |     "parse_decl.h": "c",
 6 |     "parse_exp.h": "c",
 7 |     "parse_stmt.h": "c",
 8 |     "bintree.h": "c",
 9 |     "error.h": "c",
10 |     "list.h": "c",
11 |     "eval.h": "c",
12 |     "token.h": "c",
13 |     "type.h": "c"
14 |   }
15 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CFront    
 2 | The goal of this project is to build a C compiler from the scratch without using any third-party code except standard C library.   
 3 |             
 4 | # Directory Structure
 5 | [./src](https://github.com/wangziqi2013/CFront/tree/master/src) - Main source directory   
 6 |           
 7 | [./src/test](https://github.com/wangziqi2013/CFront/tree/master/src/test) - Unit tests and functional tests
 8 |       
 9 | [./src/old](https://github.com/wangziqi2013/CFront/tree/master/src/old) - Deprecated code. Only for demonstration purposes.
10 |  
11 | [./src/python](https://github.com/wangziqi2013/CFront/tree/master/src/python) - A LL(1)/LR(1)/LALR(1) compiler generator implemented in Python  
12 | 
13 | # Source File Description
14 | 
15 | ## Main Files
16 | 
17 | ./src/token.c: Implements lexical analysis and the token stream interface
18 | 
19 | ./src/parse_exp.c: Implements parsing interface and expression parsing. The entire parser is based on expression parsing, which uses a hand-coded shift-reduce parser with operator precedence.
20 | 
21 | ./src/parse_decl.c: Implements declaration parsing. It uses expression parsing to build declaration tree (in C language, declaration has exactly the same format as an expression).
22 | 
23 | ./src/parse_comp.c: Implements composite type declaration parsing, including struct, union and enum.
24 | 
25 | ./src/parse_stmt.c: Implements statement parsing.
26 | 
27 | ./src/parse.c: Implements top-level (global declaration, definition and function definition) parsing.
28 | 
29 | ./src/type.c: Implements the type system.
30 | 
31 | ./src/eval.c: Implements compile-time evaluation support, including constant evaluation, atoi, string to binary, etc.
32 | 
33 | ./src/cgen.c: Implements top-level code generation.
34 |    
35 | ## Data Structure Files  
36 |  
37 | ./src/ast.c: Implements abstract syntax tree. We use left-child right-sibling organization for trees.
38 | 
39 | ./src/str.c: Implements vector and string.
40 | 
41 | ./src/hashtable.c: Implements hash table. We use hash table as symbol tables for scopes.
42 | 
43 | ./src/bintree.c: Implements a simple binary search tree. We use binary search trees as indices for composite types.
44 | 
45 | ./src/list.c: Implements singly linked list.
46 | 
47 | ./src/stack.c: Implements a stack. We use stack to maintain scopes and to perform shift-reduce parsing.
48 |  
49 | # Compile and Test
50 | To compile, enter ./src directory, and type `make all` or just `make`. This will build object files for each source file, and link them with the tests.
51 | 
52 | To test, directly run binary under ./bin directory. Test source files are independent from each other (i.e. there is no mutral dependency), and should be rather straightforward to understand.
53 | 
54 | # Contribution
55 | I only contribute to this project in my part-time. If you are interested in becoming a contributor feel free to drop me a message on Github.
56 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | bin/*
3 | obj/*
4 | *.d


--------------------------------------------------------------------------------
/src/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "files.associations": {
3 |     "error.h": "c"
4 |   }
5 | }


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CC=gcc
 3 | LD=ld
 4 | CFLAGS=-O0 -g -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unused-variable
 5 | PWD=$(CURDIR)
 6 | TESTFLAGS=-I$(PWD)
 7 | LDFLAGS=
 8 | BIN=./bin
 9 | 
10 | SRCS=$(wildcard *.c)
11 | OBJS=$(SRCS:.c=.o)
12 | DEPS=$(SRCS:.c=.d)
13 | 
14 | TEST_SRCS=$(wildcard ./tests/*.c)
15 | TEST_OBJS=$(patsubst ./tests/%.c,$(BIN)/%,$(TEST_SRCS))
16 | 
17 | ifeq ($(OPT), 1)
18 | 	CFLAGS=-O3 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unused-variable
19 | endif
20 | 
21 | .phony: all tests line-count mem-test clean
22 | 
23 | all: tests
24 | 
25 | tests: $(TEST_OBJS)
26 | 
27 | # Build rule for source files under the current directory, one object file per source file
28 | %.o: %.c
29 | 	$(CC) -MMD -MP -c $< -o $@ $(CFLAGS) $(LDFLAGS) $(TESTFLAGS)
30 | 
31 | # Build rule for test source files under ./tests directory, one binary per test source file
32 | ./bin/%: ./tests/%.c $(OBJS)
33 | 	$(CC) $< $(OBJS) -o $@ $(CFLAGS) $(LDFLAGS) $(TESTFLAGS)
34 | 
35 | # Include automatically generated dependency files for every source file
36 | -include $(DEPS)
37 | 
38 | line-count:
39 | 	cloc --exclude-lang=Python ./
40 | 
41 | clean:
42 | 	rm -f *.o
43 | 	rm -f $(BIN)/*
44 | 


--------------------------------------------------------------------------------
/src/ast.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "ast.h"
  3 | 
  4 | // Initialize a token to be an AST node. Return the node given to it
  5 | token_t *ast_make_node(token_t *token) {
  6 |   token->child = token->sibling = token->parent = NULL;
  7 |   return token;
  8 | }
  9 | 
 10 | int ast_isleaf(token_t *token) { return token->child == NULL; }
 11 | 
 12 | // Update the offset using the first non-NULL token in child list
 13 | void ast_update_offset(token_t *token) {
 14 |   if(token->offset) return;
 15 |   token_t *child = token->child;
 16 |   while(child && !child->offset) child = child->sibling;
 17 |   if(child) token->offset = child->offset;
 18 | }
 19 | 
 20 | token_t *ast_append_child(token_t *token, token_t *child) {
 21 |   if(token->child == NULL) {
 22 |     token->child = child;
 23 |   } else {
 24 |     token_t *last = token->child;
 25 |     while(last->sibling != NULL) last = last->sibling;
 26 |     last->sibling = child;
 27 |   }
 28 |   child->sibling = NULL;
 29 |   child->parent = token;
 30 |   ast_update_offset(token);
 31 |   return token;
 32 | }
 33 | 
 34 | // Adds the node as the first child of the token
 35 | token_t *ast_push_child(token_t *token, token_t *child) {
 36 |   child->sibling = token->child;
 37 |   token->child = child;
 38 |   child->parent = token;
 39 |   ast_update_offset(token);
 40 |   return token;
 41 | }
 42 | 
 43 | // Adds a node as a sibling after the given one, adding a child
 44 | token_t *ast_insert_after(token_t *token, token_t *child) {
 45 |   child->sibling = token->sibling;
 46 |   token->sibling = child;
 47 |   child->parent = token->parent;
 48 |   ast_update_offset(token);
 49 |   return token;
 50 | }
 51 | 
 52 | // Remove from parent node. Assume there is a parent node. Returns the node itself
 53 | token_t *ast_remove(token_t *token) {
 54 |   token_t *parent = token->parent;
 55 |   if(parent->child == token) parent->child = token->sibling;
 56 |   else {
 57 |     token_t *curr = parent->child; // Assumes that the tree is correctly formed, so curr will not be NULL
 58 |     while(curr->sibling != token) curr = curr->sibling; 
 59 |     curr->sibling = token->sibling;
 60 |   }
 61 |   return token;
 62 | }
 63 | 
 64 | void ast_print(token_t *token) { ast_print_(token, 0); }
 65 | 
 66 | void ast_print_(token_t *token, int depth) {
 67 |   for(int i = 0;i < depth * 2;i++) if(i % 2 == 0) printf("|"); else printf(" ");
 68 |   const char *symstr = token_symstr(token->type);
 69 |   printf("%04d:%04d:%s %s\n", 
 70 |          token->type, 
 71 |          token->offset ? error_get_offset(token->offset) : 0,
 72 |          token_typestr(token->type), 
 73 |          token->type == T_BASETYPE ? token_decl_print(token->decl_prop) : 
 74 |           (symstr == NULL ? (token->type >= T_LITERALS_BEGIN && token->type < T_LITERALS_END ? token->str : "") : symstr));
 75 |   for(token_t *child = token->child;child != NULL; child = child->sibling) ast_print_(child, depth + 1);
 76 |   return;
 77 | }
 78 | 
 79 | // Releases memory for every node in the AST
 80 | void ast_free(token_t *token) {
 81 |   while(token->child != NULL) {
 82 |     token_t *next = token->child->sibling;
 83 |     ast_free(token->child);
 84 |     token->child = next;
 85 |   }
 86 |   token_free(token);
 87 | }
 88 | 
 89 | int ast_child_count(token_t *token) {
 90 |   int count = 0;
 91 |   token_t *child = token->child;
 92 |   while(child) {
 93 |     count++;
 94 |     child = child->sibling;
 95 |   }
 96 |   return count;
 97 | }
 98 | 
 99 | // Get n-th child; Return NULL if index is larger than the number of children
100 | token_t *ast_getchild(token_t *token, int index) {
101 |   assert(index >= 0 && token != NULL);
102 |   token = token->child;
103 |   while(token != NULL && index-- != 0) token = token->sibling;
104 |   return token;
105 | }
106 | 
107 | // Returns the last inserted node
108 | token_t *_ast_collect_funcarg(token_t *comma, token_t *token) {
109 |   assert(ast_getchild(comma, 0) != NULL && ast_getchild(comma, 1) != NULL);
110 |   token_t *child1 = comma->child, *child2 = child1->sibling;
111 |   if(child1->type != EXP_COMMA) {
112 |     ast_insert_after(token, child2);
113 |     ast_insert_after(token, child1);
114 |     token = child1;
115 |   } else {
116 |     ast_insert_after(token, child2);
117 |     token = _ast_collect_funcarg(child1, token);
118 |   }
119 |   token_free(comma);
120 |   return token;
121 | }
122 | 
123 | // Transforms function argument from comma expression to flat structure
124 | // Three cases: argument-less func; one argument func (must not be comma exp)
125 | // and functions with >= 2 arguments
126 | void ast_collect_funcarg(token_t *token) {
127 |   assert(token->type == EXP_FUNC_CALL);
128 |   token_t *comma = ast_getchild(token, 1);
129 |   if(comma == NULL || comma->type != EXP_COMMA) return;
130 |   // The comma node has been freed. The function returns the last node inserted
131 |   token->child->sibling = _ast_collect_funcarg(comma, comma);
132 |   return;
133 | }
134 | 
135 | // Transforms conditional expression from two 2-operand operators to
136 | // a signle cond operator
137 | void ast_movecond(token_t *token) {
138 |   assert(token->type == EXP_COND);
139 |   if(ast_getchild(token, 1)->type != EXP_COLON) 
140 |     error_row_col_exit(token->offset, "Operator \'?\' must be followed by operator \':\'\n");
141 |   token_t *colon = ast_getchild(token, 1), *child2 = ast_getchild(colon, 1);
142 |   ast_append_child(token, colon->child);
143 |   ast_append_child(token, child2);
144 |   token->child->sibling = colon->child;
145 |   token_free(colon);
146 |   return;
147 | }
148 | 
149 | // Returns a pointer to the first child of given type, or NULL
150 | token_t *ast_gettype(token_t *token, token_type_t type) {
151 |   for(token = token->child;token && token->type != type;token = token->sibling);
152 |   return token;
153 | }
154 | 


--------------------------------------------------------------------------------
/src/ast.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _AST_H
 3 | #define _AST_H
 4 | 
 5 | #include "token.h"
 6 | 
 7 | token_t *ast_make_node(token_t *token);
 8 | int ast_isleaf(token_t *token);
 9 | void ast_update_offset(token_t *token);
10 | token_t *ast_append_child(token_t *token, token_t *child);
11 | token_t *ast_push_child(token_t *token, token_t *child);
12 | token_t *ast_insert_after(token_t *token, token_t *child);
13 | token_t *ast_remove(token_t *token);
14 | void ast_print(token_t *token);
15 | void ast_print_(token_t *token, int depth);
16 | void ast_free(token_t *token);
17 | int ast_child_count(token_t *token);
18 | token_t *ast_getchild(token_t *token, int index);
19 | void ast_collect_funcarg(token_t *token);
20 | void ast_movecond(token_t *token);
21 | token_t *ast_gettype(token_t *token, token_type_t type);
22 | 
23 | #endif


--------------------------------------------------------------------------------
/src/bintree.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bintree.h"
 3 | 
 4 | btnode_t *btnode_alloc(void *key, void *value) {
 5 |   btnode_t *node = (btnode_t *)malloc(sizeof(btnode_t));
 6 |   SYSEXPECT(node != NULL);
 7 |   node->key = key, node->value = value;
 8 |   node->left = node->right = NULL;
 9 |   return node;
10 | }
11 | void btnode_free(btnode_t *node) { free(node); }
12 | 
13 | bintree_t *bt_init(cmp_cb_t cmp) {
14 |   bintree_t *bt = (bintree_t *)malloc(sizeof(bintree_t));
15 |   SYSEXPECT(bt != NULL);
16 |   bt->cmp = cmp;
17 |   bt->root = NULL;
18 |   bt->size = 0;
19 |   return bt;
20 | }
21 | void bt_free(bintree_t *bt) { _bt_free(bt->root); free(bt); }
22 | void _bt_free(btnode_t *node) {
23 |   if(node == NULL) return;
24 |   _bt_free(node->left);
25 |   _bt_free(node->right);
26 |   btnode_free(node);
27 |   return;
28 | }
29 | bintree_t *bt_str_init() { return bt_init(strcmp_cb); }
30 | 
31 | int bt_size(bintree_t *bt) { return bt->size; }
32 | 
33 | // Insert the key, or return an existing key
34 | void *bt_insert(bintree_t *bt, void *key, void *value) {
35 |   btnode_t *found = NULL; // Set to new node if inserted, otherwise set to 
36 |   bt->root = _bt_insert(bt, bt->root, key, value, &found);
37 |   return found->value;
38 | }
39 | btnode_t *_bt_insert(bintree_t *bt, btnode_t *node, void *key, void *value, btnode_t **found) {
40 |   if(node == NULL) { bt->size++; *found = btnode_alloc(key, value); return *found; } // Creates a new node
41 |   int cmp = bt->cmp(key, node->key);
42 |   if(cmp == 0) *found = node;
43 |   else if(cmp < 0) node->left = _bt_insert(bt, node->left, key, value, found);
44 |   else node->right = _bt_insert(bt, node->right, key, value, found);
45 |   return node;
46 | }
47 | 
48 | // Return BT_NOTFOUND if not found, otherwise return the value
49 | void *bt_find(bintree_t *bt, void *key) { return _bt_find(bt, bt->root, key); }
50 | void *_bt_find(bintree_t *bt, btnode_t *node, void *key) {
51 |   if(node == NULL) return BT_NOTFOUND;
52 |   int cmp = bt->cmp(key, node->key);
53 |   if(cmp == 0) return node->value;
54 |   else if(cmp < 0) return _bt_find(bt, node->left, key);
55 |   else return _bt_find(bt, node->right, key);
56 | }
57 | 
58 | // Removes the given key, and returns the value if the key exists; otherwise return BT_NOTFOUND
59 | void *bt_remove(bintree_t *bt, void *key) { 
60 |   void *found = BT_NOTFOUND;
61 |   bt->root = _bt_remove(bt, bt->root, key, &found); 
62 |   return found;
63 | }
64 | 
65 | // Returns the child after performing remove
66 | void *_bt_remove(bintree_t *bt, btnode_t *node, void *key, void **found) {
67 |   if(node == NULL) { *found = BT_NOTFOUND; return NULL; }
68 |   int cmp = bt->cmp(key, node->key);
69 |   if(cmp == 0) { *found = node->value; bt->size--; return _bt_remove_node(bt, node); }
70 |   else if(cmp < 0) node->left = _bt_remove(bt, node->left, key, found);
71 |   else node->right = _bt_remove(bt, node->right, key, found);
72 |   return node;
73 | }
74 | 
75 | // Internal function only called by bt_remove()
76 | void *_bt_remove_node(bintree_t *bt, btnode_t *node) {
77 |   btnode_t *left = node->left, *right = node->right;
78 |   if(left == NULL) { btnode_free(node); return right; } // This also covers the leaf node case
79 |   else if(right == NULL) { btnode_free(node); return left; }
80 |   if(right->left == NULL) {
81 |     btnode_free(node);
82 |     right->left = left;
83 |     return right;
84 |   }
85 |   do { left = right; right = right->left; } while(right->left);
86 |   node->key = right->key; node->value = right->value;
87 |   left->left = right->right;
88 |   btnode_free(right);
89 |   return node;
90 | }


--------------------------------------------------------------------------------
/src/bintree.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _BIN_TREE_H
 3 | #define _BIN_TREE_H
 4 | 
 5 | #include "hashtable.h" // Need its def for call back functions
 6 | 
 7 | #define BT_NOTFOUND ((void *)-1)
 8 | 
 9 | // Binary tree node type
10 | typedef struct btnode {
11 |   void *key, *value;
12 |   struct btnode *left, *right;
13 | } btnode_t;
14 | 
15 | // The good thing about a binary tree search structure is that the physical size
16 | // grows proportionally with the logical size, which is desirable for structures
17 | // that are usually small, but sometimes huge
18 | typedef struct {
19 |   int size;
20 |   cmp_cb_t cmp;
21 |   btnode_t *root;
22 | } bintree_t;
23 | 
24 | btnode_t *btnode_alloc(void *key, void *value);
25 | void btnode_free(btnode_t *node);
26 | bintree_t *bt_init(cmp_cb_t cmp);
27 | void bt_free(bintree_t *bt);
28 | void _bt_free(btnode_t *node);
29 | bintree_t *bt_str_init();
30 | int bt_size(bintree_t *bt);
31 | void *bt_insert(bintree_t *bt, void *key, void *value);
32 | btnode_t *_bt_insert(bintree_t *bt, btnode_t *node, void *key, void *value, btnode_t **found);
33 | void *bt_find(bintree_t *bt, void *key);
34 | void *_bt_find(bintree_t *bt, btnode_t *node, void *key);
35 | void *bt_remove(bintree_t *bt, void *key);
36 | void *_bt_remove(bintree_t *bt, btnode_t *node, void *key, void **found);
37 | void *_bt_remove_node(bintree_t *bt, btnode_t *node);
38 | 
39 | #endif


--------------------------------------------------------------------------------
/src/cgen.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _CGEN_H
 3 | #define _CGEN_H
 4 | 
 5 | #include "ast.h"
 6 | #include "type.h"
 7 | 
 8 | #define CGEN_GDATA_PADDING 8 // To avoid allocating a zero byte object on the heap
 9 | 
10 | #define CGEN_ARRAY_DEF      0
11 | #define CGEN_ARRAY_DECL     1
12 | 
13 | #define CGEN_RELOC_CODE     0
14 | #define CGEN_RELOC_DATA     1
15 | 
16 | typedef struct {
17 |   type_cxt_t *type_cxt;  // Owns memory; will automatically init and free
18 |   list_t *import_list;       // Externally declared variable, function or array - only valid import is pending is 1
19 |   list_t *export_list;  // Non-statically declared global variable, function or array
20 |   list_t *gdata_list;   // A list of global data, i.e. actual storage
21 |   int64_t gdata_offset; // Next global data offset
22 |   list_t *reloc_list;   // A list of cgen_reloc_t *; Owns memory
23 | } cgen_cxt_t;
24 | 
25 | // A relocation entry provides info for converting relative reference (starting at address 0)
26 | // into absolute address when the binary is loaded into memory
27 | typedef struct {
28 |   int from, to;   // CGEN_RELOC_ series
29 |   int64_t offset; // The offset to be modified during relocation
30 |   size_t size;    // Number of bytes 
31 | } cgen_reloc_t;
32 | extern const char *cgen_reloc_name[];
33 | 
34 | // Global data container
35 | typedef struct cgen_data_struct_t {
36 |   uint8_t *data;   // Actual data; NULL means uninitialized
37 |   type_t *type;    // Type of the global data, which also contains the size
38 |   int64_t offset;  // Offset relative to the beginning of data segment
39 | } cgen_gdata_t;
40 | 
41 | void cgen_typed_print(type_t *type, void *data);
42 | void cgen_print_cxt(cgen_cxt_t *cxt);
43 | 
44 | cgen_cxt_t *cgen_init();
45 | void cgen_free(cgen_cxt_t *cxt);
46 | 
47 | cgen_gdata_t *cgen_gdata_init(cgen_cxt_t *cxt, type_t *type);
48 | void cgen_gdata_free(cgen_gdata_t *gdata);
49 | cgen_reloc_t *cgen_reloc_init(cgen_cxt_t *cxt);
50 | void cgen_reloc_free(cgen_reloc_t *reloc);
51 | 
52 | void cgen_resolve_extern(cgen_cxt_t *cxt, value_t *value);
53 | cgen_gdata_t *cgen_init_comp(cgen_cxt_t *cxt, type_t *type, token_t *token);
54 | int64_t cgen_init_comp_(cgen_cxt_t *cxt, type_t *type, token_t *token, cgen_gdata_t *gdata, int64_t offset);
55 | cgen_gdata_t *cgen_init_array(cgen_cxt_t *cxt, type_t *type, token_t *token);
56 | int64_t cgen_init_array_(cgen_cxt_t *cxt, type_t *type, token_t *token, cgen_gdata_t *gdata, int64_t offset);
57 | cgen_gdata_t *cgen_init_value(cgen_cxt_t *cxt, type_t *type, token_t *token);
58 | int64_t cgen_init_value_(cgen_cxt_t *cxt, type_t *type, token_t *token, cgen_gdata_t *gdata, int64_t offset);
59 | 
60 | void cgen_resolve_array_size(type_t *decl_type, type_t *def_type, token_t *init, int both_decl);
61 | void cgen_global_decl(cgen_cxt_t *cxt, type_t *type, token_t *basetype, token_t *decl, token_t *init);
62 | void cgen_global_def(cgen_cxt_t *cxt, type_t *type, token_t *basetype, token_t *decl, token_t *init);
63 | void cgen_global_func(cgen_cxt_t *cxt, token_t *func);
64 | void cgen_global(cgen_cxt_t *cxt, token_t *global_decl);
65 | void cgen(cgen_cxt_t *cxt, token_t *root);
66 | 
67 | #endif


--------------------------------------------------------------------------------
/src/env.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "env.h"
 3 | 
 4 | // This function initializes inclusion path from multiple sources
 5 | void env_init_include_path(env_t *env) {
 6 |   int count = 0;
 7 |   // Read environmental variable; These paths are inserted into the beginning of the list, i.e.,
 8 |   // they will override all other paths
 9 |   char *env_path = getenv("C_INCLUDE_PATH");
10 |   if(env_path != NULL) {
11 |     // Parse this string as a ":" separated path variable
12 |     char *p = env_path;
13 |     while(1) {
14 |       char *q = p;
15 |       if(*q == '\0') {
16 |         break;
17 |       }
18 |       // Stop at ':' or '\0'
19 |       while(*q != ':' && *q != '\0') {
20 |         q++;
21 |       }
22 |       if(*q == '\0') {
23 |         break;
24 |       }
25 |       int size = q - p;
26 |       if(size != 0) {
27 |         char *path = (char *)malloc(sizeof(q - p) + 1);
28 |         SYSEXPECT(path != NULL);
29 |         memcpy(path, p, size);
30 |         path[size] = '\0';
31 |         list_insertat(env->include_paths, path, path, count);
32 |         count++;
33 |       }
34 |       p = q + 1;
35 |     }
36 |   }
37 |   return;
38 | }
39 | 
40 | env_t *env_init() {
41 |   env_t *env = (env_t *)malloc(sizeof(env_t));
42 |   SYSEXPECT(env != NULL);
43 |   memset(env, 0x00, sizeof(env_t));
44 |   env->include_paths = list_init();
45 |   return env;
46 | }
47 | 
48 | void env_free(env_t *env) {
49 |   do {
50 |     listnode_t *node = list_head(env->include_paths);
51 |     while(node != NULL) {
52 |       free(node->key);
53 |       node = list_next(node);
54 |     }
55 |     list_free(env->include_paths);
56 |   } while(0);
57 |   free(env);
58 |   return;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/env.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // This file implements global environmental variables
 3 | 
 4 | #ifndef _CFRONT_ENV_H
 5 | #define _CFRONT_ENV_H
 6 | 
 7 | #include "hashtable.h"
 8 | #include "list.h"
 9 | 
10 | typedef struct {
11 |   // Search path for included files
12 |   list_t *include_paths;
13 | } env_t;
14 | 
15 | void env_init_include_path(env_t *env);
16 | 
17 | env_t *env_init();
18 | void env_free(env_t *env);
19 | 
20 | #endif


--------------------------------------------------------------------------------
/src/error.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "error.h"
 3 | 
 4 | // This global pointer holds the begin of the text. We use this pointer and 
 5 | // a given pointer to compute the line and column number
 6 | static const char *begin = NULL;
 7 | static int inited = 0;
 8 | // Whether test mode is on. Under test mode, error reporting functions calls 
 9 | // longjmp to jump to a previously set location
10 | static int testmode = 0;
11 | 
12 | jmp_buf env;
13 | 
14 | // This must be called in order for line number to work
15 | void error_init(const char *s) { 
16 |   begin = s; 
17 |   inited = 1; 
18 |   return;
19 | }
20 | 
21 | void error_free() { 
22 |   inited = 0; 
23 |   return;
24 | }
25 | 
26 | void error_testmode(int mode) { 
27 |   testmode = mode; 
28 |   return;
29 | }
30 | 
31 | void error_exit_or_jump(int need_exit) { 
32 |   if(testmode != 0) { 
33 |     fprintf(stderr, "*** %s are redirected ***\n", need_exit ? "Errors" : "Warnings"); 
34 |     longjmp(env, 1); 
35 |   } else if(need_exit) { 
36 |     #ifndef NDEBUG
37 |     assert(0); 
38 |     #else
39 |     exit(ERROR_CODE_EXIT); 
40 |     #endif
41 |   }
42 |   return;
43 | }
44 | 
45 | // Returns the row and column of a given pointer
46 | // Note:
47 | //   1. If error is not initialized then row and col will be set to -1
48 | //   2. If the pointer is not in the string registered during initialization
49 | //      then row and col will be set to -2
50 | void error_get_row_col(const char *s, int *row, int *col) {
51 |   if(inited == 0) { 
52 |     *row = *col = -1; 
53 |   } else {
54 |     *row = *col = 1;
55 |     const char *p;
56 |     const char *line_head = begin; // Track the beginning of the line
57 |     for(p = begin; p != s && *p != '\0';p++) {
58 |       if(*p == '\n') {
59 |         (*row)++; 
60 |         *col = 1; 
61 |         line_head = p + 1;
62 |       } else {
63 |         (*col)++;
64 |       }
65 |     }
66 |     if(*p == '\0' && p != s) { // if p == s then still valid
67 |       *row = *col = -2;
68 |       fprintf(stderr, "Did you forget to register a new pointer with error module?\n");
69 |     } else { 
70 |       // Print from line head to next line
71 |       printf("----\n");
72 |       while(*line_head != '\n' && *line_head != '\0') {
73 |         putchar(*line_head++);
74 |       }
75 |       putchar('\n');
76 |       for(int i = 0;i < *col - 1;i++) {
77 |         putchar(' ');
78 |       }
79 |       printf("^\n");
80 |       printf("----\n");
81 |     }
82 |   }
83 |   return;
84 | }
85 | 
86 | void syserror(const char *prompt) { 
87 |   fputs(prompt, stderr);
88 |   exit(ERROR_CODE_EXIT); 
89 | }
90 | 
91 | int error_get_offset(const char *offset) { 
92 |   return offset - begin + 1; // Begin with column 1 
93 | } 
94 | 


--------------------------------------------------------------------------------
/src/error.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // Note that this might be a common name, so we make it longer to avoid conflict
 3 | #ifndef _ERROR_H_CFRONT
 4 | #define _ERROR_H_CFRONT
 5 | 
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <setjmp.h>
 9 | #include <assert.h>
10 | 
11 | extern jmp_buf env;
12 | 
13 | #define ERROR_CODE_EXIT 1
14 | // Input to function error_exit_or_jump()
15 | #define ERROR_ACTION_CONT 0
16 | #define ERROR_ACTION_EXIT 1
17 | #define error_exit(fmt, ...) do { fprintf(stderr, "Error: " fmt, ##__VA_ARGS__); error_exit_or_jump(ERROR_ACTION_EXIT); } while(0);
18 | #define error_row_col_exit(s, fmt, ...) do { \
19 |                                           int row, col; error_get_row_col(s, &row, &col); \
20 |                                           fprintf(stderr, "Error (row %d col %d): " fmt, row, col, ##__VA_ARGS__); \
21 |                                           error_exit_or_jump(ERROR_ACTION_EXIT); } while(0);
22 | #define warn_row_col_exit(s, fmt, ...) do { \
23 |                                           int row, col; error_get_row_col(s, &row, &col); \
24 |                                           fprintf(stderr, "Warning (row %d col %d): " fmt, row, col, ##__VA_ARGS__); \
25 |                                           error_exit_or_jump(ERROR_ACTION_CONT); } while(0);
26 | 
27 | // The following two macros are used for testing. It redirects the control flow back to the testing function
28 | // if an error occurs. The testing function should set testmode to 1.
29 | // Usage: if(error_trycatch()) { ...code goes here } else { ... error happens } ... error did not happen
30 | #define error_trycatch() (setjmp(env) == ERROR_FIRSTTIME)
31 | #define ERROR_FIRSTTIME 0
32 | 
33 | #define SYSEXPECT(expr) do { if(!(expr)) syserror(__func__); } while(0) // Assertion for system calls; Valid under all modes
34 | 
35 | void error_init(const char *s);
36 | void error_free();
37 | void error_testmode(int mode);
38 | void error_exit_or_jump(int need_exit);
39 | void error_get_row_col(const char *s, int *row, int *col);
40 | void syserror(const char *prompt);
41 | 
42 | int error_get_offset(const char *offset); // Returns integer offset
43 | 
44 | #endif
45 | 
46 | 


--------------------------------------------------------------------------------
/src/eval.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _EVAL_H
 3 | #define _EVAL_H
 4 | 
 5 | #include "token.h"
 6 | #include "ast.h"
 7 | #include "type.h"
 8 | 
 9 | #define EVAL_MAX(a, b) (a > b ? a : b)
10 | #define EVAL_MIN(a, b) (a < b ? a : b)
11 | 
12 | // Used as the parameter to eval_const_atoi()
13 | #define ATOI_NO_CHECK_END   0
14 | #define ATOI_CHECK_END      1  // Do not report error if there is still char after the int literal
15 | #define ATOI_NO_MAX_CHAR    0  // For \xhh \ooo we only eat 2 and 3 chars respectively
16 | 
17 | #define EVAL_MAX_CONST_SIZE 8  // We only support evaluating constants smaller than this size
18 | 
19 | uint64_t eval_int_masks[9];
20 | 
21 | uint64_t eval_const_get_mask(int size);
22 | uint64_t eval_const_get_sign_mask(int size);
23 | int eval_const_is_zero(value_t *value, int size);
24 | uint64_t eval_const_adjust_size(value_t *value, int to, int from, int is_signed);
25 | uint64_t eval_const_add(value_t *op1, value_t *op2, int size, int is_signed, int *overflow);
26 | uint64_t eval_const_sub(value_t *op1, value_t *op2, int size, int is_signed, int *overflow);
27 | uint64_t eval_const_mul(value_t *op1, value_t *op2, int size, int is_signed, int *overflow);
28 | uint64_t eval_const_div_mod(int is_div, value_t *op1, value_t *op2, int size, int is_signed, int *div_zero);
29 | uint64_t eval_const_shift(int is_left, value_t *op1, value_t *op2, int size, int is_signed, int *shift_overflow);
30 | int      eval_const_cmp(token_type_t op, value_t *op1, value_t *op2, int size, int is_signed);
31 | uint64_t eval_const_bitwise(token_type_t op, value_t *op1, value_t *op2, int size);
32 | uint64_t eval_const_unary(token_type_t op, value_t *value, int size);
33 | 
34 | char *eval_hex_char(char ch);
35 | str_t *eval_print_const_str(str_t *s);
36 | 
37 | // Take a maximum bite and return the next to read
38 | char *eval_const_atoi_maxbite(char *s, int base, token_t *token, int *ret); 
39 | // Given a string and base convert to integer
40 | int eval_const_atoi(char *s, int base, token_t *token, int max_char, int check_end, char **next); 
41 | char eval_escaped_char(char escaped, token_t *token);
42 | 
43 | char eval_const_char_token(token_t *token); // Evaluates char type token to char
44 | str_t *eval_const_str_token(token_t *token); // Evaluates string token to str_t *
45 | 
46 | // Evaluating const expression using value_t objects
47 | value_t *eval_const_get_int_value(type_cxt_t *cxt, token_t *token); // Evaluates int literal and returns value object
48 | value_t *eval_const_exp(type_cxt_t *cxt, token_t *exp);
49 | value_t *eval_const_to_type(type_cxt_t *cxt, token_t *exp, type_t *type, int cast_type); // Evaluates and cast to type
50 | 
51 | #endif


--------------------------------------------------------------------------------
/src/hashtable.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "hashtable.h"
  3 | 
  4 | int streq_cb(void *a, void *b) { return strcmp(a, b) == 0; }
  5 | int strcmp_cb(void *a, void *b) { return strcmp(a, b); }
  6 | // Credits: K&R C Second Edition Page 144
  7 | hashval_t strhash_cb(void *a) { 
  8 |   char *s = (char *)a;
  9 |   hashval_t hashval;
 10 |   for(hashval = (hashval_t)0; *s != '\0'; s++) {
 11 |     hashval = *s + 31 * hashval;
 12 |   }
 13 |   return hashval;
 14 | }
 15 | 
 16 | hashtable_t *ht_init(eq_cb_t eq, hash_cb_t hash) {
 17 |   hashtable_t *ht = (hashtable_t *)malloc(sizeof(hashtable_t));
 18 |   SYSEXPECT(ht != NULL);
 19 |   ht->eq = eq;
 20 |   ht->hash = hash;
 21 |   ht->mask = HT_INIT_MASK;
 22 |   ht->size = 0;
 23 |   ht->capacity = HT_INIT_CAPACITY;
 24 |   ht->keys = (void **)malloc(sizeof(void *) * HT_INIT_CAPACITY);
 25 |   ht->values = (void **)malloc(sizeof(void *) * HT_INIT_CAPACITY);
 26 |   SYSEXPECT(ht->keys != NULL && ht->values != NULL);
 27 |   memset(ht->keys, 0x00, sizeof(void *) * ht->capacity);
 28 |   memset(ht->values, 0x00, sizeof(void *) * ht->capacity);
 29 |   return ht;
 30 | }
 31 | 
 32 | hashtable_t *ht_str_init() {
 33 |   return ht_init(streq_cb, strhash_cb);
 34 | }
 35 | 
 36 | void ht_free(hashtable_t *ht) {
 37 |   free(ht->keys);
 38 |   free(ht->values);
 39 |   free(ht);
 40 |   return;
 41 | }
 42 | 
 43 | int ht_size(hashtable_t *ht) { 
 44 |   return ht->size; 
 45 | }
 46 | 
 47 | // Returns an existing slot for key, if it already exists, or an empty one
 48 | int ht_find_slot(hashtable_t *ht, void **keys, void *key, int op) {
 49 |   assert(key != NULL && key != HT_REMOVED);
 50 |   hashval_t begin = ht->hash(key) & ht->mask;
 51 |   if(op == HT_OP_INSERT) {
 52 |     while(keys[begin] != NULL && keys[begin] != HT_REMOVED && !ht->eq(keys[begin], key)) {
 53 |       begin = (begin + 1) & ht->mask;
 54 |     }
 55 |   } else if(op == HT_OP_FIND) {
 56 |     while(keys[begin] && (keys[begin] == HT_REMOVED || !ht->eq(keys[begin], key))) {
 57 |       begin = (begin + 1) & ht->mask;
 58 |     }
 59 |   } else {
 60 |     assert(0);
 61 |   }
 62 |   return begin;
 63 | }
 64 | 
 65 | void ht_resize(hashtable_t *ht) {
 66 |   assert(ht->size < ht->capacity);
 67 |   ht->capacity *= 2;
 68 |   ht->mask |= (ht->mask << 1);
 69 |   void **new_keys = (void **)malloc(sizeof(void *) * ht->capacity);
 70 |   void **new_values = (void **)malloc(sizeof(void *) * ht->capacity);
 71 |   SYSEXPECT(new_keys != NULL && new_values != NULL);
 72 |   memset(new_keys, 0x00, sizeof(void *) * ht->capacity);
 73 |   memset(new_values, 0x00, sizeof(void *) * ht->capacity);  // Avoid values having HT_NOTFOUND
 74 |   for(int i = 0;i < ht->capacity / 2;i++) {
 75 |     if(ht->keys[i] && ht->keys[i] != HT_REMOVED) {
 76 |       int slot = ht_find_slot(ht, new_keys, ht->keys[i], HT_OP_INSERT);
 77 |       assert(new_keys[slot] == NULL);
 78 |       new_keys[slot] = ht->keys[i];
 79 |       new_values[slot] = ht->values[i];
 80 |     }
 81 |   }
 82 |   free(ht->keys);
 83 |   free(ht->values);
 84 |   ht->keys = new_keys;
 85 |   ht->values = new_values;
 86 |   return;
 87 | }
 88 | 
 89 | // Returns value, or HT_NOTFOUND if not found
 90 | void *ht_find(hashtable_t *ht, void *key) {
 91 |   assert(key != NULL);
 92 |   int slot = ht_find_slot(ht, ht->keys, key, HT_OP_FIND);  // Note that this will not return removed slot
 93 |   assert(ht->keys[slot] != HT_REMOVED);
 94 |   return ht->keys[slot] ? ht->values[slot] : HT_NOTFOUND;
 95 | }
 96 | 
 97 | // Inserts if key does not exist, and returns value. Returns current value otherwise;
 98 | void *ht_insert(hashtable_t *ht, void *key, void *value) {
 99 |   assert(key != NULL);
100 |   if(HT_RESIZE_THRESHOLD(ht->capacity) == ht->size) {
101 |     ht_resize(ht);
102 |   }
103 |   int slot = ht_find_slot(ht, ht->keys, key, HT_OP_INSERT);
104 |   if(ht->keys[slot] && ht->keys[slot] != HT_REMOVED) {
105 |     return ht->values[slot];
106 |   }
107 |   ht->keys[slot] = key;
108 |   ht->values[slot] = value;
109 |   ht->size++;
110 |   return value;
111 | }
112 | 
113 | // Removes the key, and returns value before removal. If key does not exist return NOTFOUND
114 | void *ht_remove(hashtable_t *ht, void *key) {
115 |   assert(key != NULL);
116 |   int slot = ht_find_slot(ht, ht->keys, key, HT_OP_FIND);
117 |   assert(ht->keys[slot] != HT_REMOVED);
118 |   if(ht->keys[slot] == NULL) {
119 |     return HT_NOTFOUND;
120 |   }
121 |   ht->keys[slot] = HT_REMOVED;
122 |   ht->size--;
123 |   return ht->values[slot];
124 | }
125 | 
126 | // The following return 1 means operation is successful, 0 otherwise. Note that insert always succeeds
127 | int set_find(set_t *set, void *key) { return ht_find(set, key) != HT_NOTFOUND; }
128 | int set_insert(set_t *set, void *key) { ht_insert(set, key, NULL); return SET_SUCCESS; }
129 | int set_remove(set_t *set, void *key) { return ht_remove(set, key) != HT_NOTFOUND; }


--------------------------------------------------------------------------------
/src/hashtable.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _HASHTABLE_H
 3 | #define _HASHTABLE_H
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include "error.h"
 9 | 
10 | // Must be a power of two
11 | #define HT_INIT_CAPACITY 128
12 | #define HT_INIT_MASK 0x7F
13 | #define HT_RESIZE_THRESHOLD(capacity) (capacity / 8 * 7)
14 | #define HT_NOTFOUND ((void *)-1)
15 | #define HT_REMOVED  ((void *)-2)
16 | 
17 | #define HT_OP_INSERT 0
18 | #define HT_OP_FIND   1
19 | 
20 | typedef unsigned long hashval_t;
21 | typedef int (*eq_cb_t)(void *, void *);    // Equality comparison function
22 | typedef int (*cmp_cb_t)(void *, void *);   // Comparison comparison function
23 | typedef hashval_t (*hash_cb_t)(void *);    // Hash value function
24 | 
25 | typedef struct {
26 |   eq_cb_t eq;
27 |   hash_cb_t hash;
28 |   hashval_t mask;
29 |   int size;
30 |   int capacity;
31 |   void **keys;
32 |   void **values;
33 | } hashtable_t;
34 | 
35 | int streq_cb(void *a, void *b);
36 | int strcmp_cb(void *a, void *b);
37 | hashval_t strhash_cb(void *a);
38 | hashtable_t *ht_init(eq_cb_t eq, hash_cb_t hash);
39 | hashtable_t *ht_str_init();
40 | void ht_free(hashtable_t *ht);
41 | int ht_size(hashtable_t *ht);
42 | int ht_find_slot(hashtable_t *ht, void **keys, void *key, int op);
43 | void ht_resize(hashtable_t *ht);
44 | void *ht_find(hashtable_t *ht, void *key);
45 | void *ht_insert(hashtable_t *ht, void *key, void *value);
46 | void *ht_remove(hashtable_t *ht, void *key);
47 | 
48 | typedef hashtable_t set_t; // Set is just a hash table (we waste some space)
49 | #define SET_FAIL     0
50 | #define SET_SUCCESS  1
51 | #define set_init(a, b) ht_init(a, b)
52 | #define set_str_init() ht_str_init()
53 | #define set_free(a) ht_free(a)
54 | #define set_size(a) ht_size(a)
55 | int set_find(set_t *set, void *key);
56 | int set_insert(set_t *set, void *key);
57 | int set_remove(set_t *set, void *key);
58 | 
59 | #endif


--------------------------------------------------------------------------------
/src/list.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "list.h"
  3 | 
  4 | void LIST_SIMPLE_FREE_CB(void *p) {
  5 |   free(p);
  6 |   return;
  7 | }
  8 | 
  9 | list_t *list_init() {
 10 |   list_t *list = (list_t *)malloc(sizeof(list_t));
 11 |   SYSEXPECT(list != NULL);
 12 |   list->size = 0;
 13 |   list->head = list->tail = NULL;
 14 |   return list;
 15 | }
 16 | 
 17 | void list_free(list_t *list) {
 18 |   assert(list->head || !list->tail);
 19 |   listnode_t *node = list->head;
 20 |   while(node != NULL) {
 21 |     listnode_t *next = node->next;
 22 |     if(list->key_free_cb != NULL) {
 23 |       list->key_free_cb(node->key);
 24 |     }
 25 |     if(list->value_free_cb != NULL) {
 26 |       list->value_free_cb(node->value);
 27 |     }
 28 |     listnode_free(node);
 29 |     node = next;
 30 |   }
 31 |   free(list);
 32 |   return;
 33 | }
 34 | 
 35 | void list_set_free_cb(list_t *list, void (*key_free_cb)(void *), void (*value_free_cb)(void *)) {
 36 |   list->key_free_cb = key_free_cb;
 37 |   list->value_free_cb = value_free_cb;
 38 |   return;
 39 | }
 40 | 
 41 | int list_size(list_t *list) { 
 42 |   return list->size; 
 43 | }
 44 | 
 45 | // Allocate a node. All fields are uninitialized
 46 | listnode_t *listnode_alloc() {
 47 |   listnode_t *node = (listnode_t *)malloc(sizeof(listnode_t));
 48 |   SYSEXPECT(node != NULL);
 49 |   return node;
 50 | }
 51 | void listnode_free(listnode_t *node) { 
 52 |   free(node); 
 53 |   return;
 54 | }
 55 | 
 56 | // Always insert to the end of the list; do not check for duplicate; Always return the inserted value
 57 | void *list_insert(list_t *list, void *key, void *value) {
 58 |   listnode_t *node = listnode_alloc();
 59 |   node->key = key;
 60 |   node->value = value;
 61 |   node->next = NULL;
 62 |   assert(list->head || !list->tail);  // If head is NULL then tail must also be NULL
 63 |   if(list->head == NULL) {
 64 |     list->head = list->tail = node;
 65 |   } else {
 66 |     list->tail->next = node;
 67 |     list->tail = node;
 68 |   }
 69 |   list->size++;
 70 |   return value;
 71 | }
 72 | 
 73 | // Inserts before the node specified by index; if index == list size then insert at the end
 74 | listnode_t *list_insertat(list_t *list, void *key, void *value, int index) {
 75 |   assert(index <= list->size && index >= 0);
 76 |   if(index == list->size) {
 77 |     return list_insert(list, key, value); // Empty insert will be caught here
 78 |   }
 79 |   assert(list->size > 0);
 80 |   list->size++;
 81 |   listnode_t *node = listnode_alloc();
 82 |   node->key = key;
 83 |   node->value = value;
 84 |   if(index == 0) {
 85 |     node->next = list->head;
 86 |     list->head = node;
 87 |     assert(list->tail);
 88 |   } else {
 89 |     listnode_t *curr = list->head;
 90 |     while(--index != 0) curr = curr->next;
 91 |     node->next = curr->next;
 92 |     curr->next = node;
 93 |     assert(curr->next);
 94 |   }
 95 |   return value;
 96 | }
 97 | 
 98 | void *list_insert_nodup(list_t *list, void *key, void *value, eq_cb_t eq) {
 99 |   void *ret = list_find(list, key, eq);
100 |   if(ret == LIST_NOTFOUND) {
101 |     value = list_insert(list, key, value);
102 |   }
103 |   return ret;
104 | }
105 | 
106 | // Search for the given key, and return value; Return LIST_NOTFOUND if not found
107 | void *list_find(list_t *list, void *key, eq_cb_t eq) {
108 |   listnode_t *curr = list->head;
109 |   while(curr != NULL) {
110 |     if(eq(key, curr->key)) {
111 |       return curr->value;
112 |     } else { 
113 |       curr = curr->next;
114 |     }
115 |   }
116 |   return LIST_NOTFOUND;
117 | }
118 | 
119 | // Returns the node specified by the index; If index is too large then return LIST_NOTFOUND. 
120 | // Index must be positive
121 | const listnode_t *list_findat(list_t *list, int index) {
122 |   assert(index >= 0);
123 |   if(index >= list->size) {
124 |     return LIST_NOTFOUND;
125 |   }
126 |   listnode_t *curr = list->head;
127 |   while(index-- != 0) {
128 |     curr = curr->next;
129 |   }
130 |   return curr;
131 | }
132 | 
133 | // Removes the key from the list. Return value if key exists; LIST_NOTFOUND otherwise
134 | void *list_remove(list_t *list, void *key, eq_cb_t eq) {
135 |   listnode_t *curr = list->head;
136 |   listnode_t *prev = curr;
137 |   if(curr == NULL) {
138 |     return LIST_NOTFOUND;
139 |   }
140 |   void *ret = NULL;
141 |   if(eq(curr->key, key)) {
142 |     list->head = curr->next;  // Could be NULL
143 |     ret = curr->value;
144 |     listnode_free(curr);
145 |     list->size--;
146 |     if(curr == list->tail) {
147 |       list->tail = NULL;
148 |     }
149 |     return ret;
150 |   }
151 |   do {
152 |     curr = curr->next;
153 |     if(curr != NULL && eq(curr->key, key)) {
154 |       prev->next = curr->next;
155 |       ret = curr->value;
156 |       listnode_free(curr);
157 |       list->size--;
158 |       if(curr == list->tail) {
159 |         list->tail = prev; // If deleting the last element then adjust tail
160 |       }
161 |       return ret;
162 |     }
163 |     prev = curr;
164 |   } while(curr);
165 |   return LIST_NOTFOUND;
166 | }
167 | 
168 | // Value is returned, and the second argument holds the key
169 | void *list_removeat(list_t *list, int index, void **key) {
170 |   assert(index >= 0);
171 |   if(index >= list->size) return LIST_NOTFOUND;
172 |   list->size--;
173 |   listnode_t *curr = list->head, *prev = NULL;
174 |   void *ret = NULL;
175 |   if(index == 0) { 
176 |     list->head = curr->next; 
177 |   } else {
178 |     while(index--) {
179 |       prev = curr;
180 |       curr = curr->next;
181 |     }
182 |     prev->next = curr->next;
183 |   }
184 |   ret = curr->value;
185 |   *key = curr->key;
186 |   listnode_free(curr);
187 |   if(curr == list->tail) {
188 |     list->tail = prev;
189 |   }
190 |   return ret;
191 | }


--------------------------------------------------------------------------------
/src/list.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _LIST_H
 3 | #define _LIST_H
 4 | 
 5 | #include "hashtable.h"
 6 | 
 7 | #define LIST_NOTFOUND ((void *)-1)  // Return value for find()
 8 | 
 9 | void LIST_SIMPLE_FREE_CB(void *p);
10 | 
11 | typedef struct listnode {
12 |   void *key;               // No ownership
13 |   void *value;             // No ownership
14 |   struct listnode *next;
15 | } listnode_t;
16 | 
17 | typedef struct {
18 |   listnode_t *head;
19 |   listnode_t *tail;
20 |   int size;
21 |   void (*key_free_cb)(void *);
22 |   void (*value_free_cb)(void *);
23 | } list_t;
24 | 
25 | inline static listnode_t *list_head(list_t *list) { return list->head; }
26 | inline static listnode_t *list_tail(list_t *list) { return list->tail; }
27 | inline static listnode_t *list_next(listnode_t *node) { return node->next; }
28 | inline static void *list_key(listnode_t *node) { return node->key; }
29 | inline static void *list_value(listnode_t *node) { return node->value; }
30 | 
31 | list_t *list_init();
32 | void list_free(list_t *list);
33 | 
34 | void list_set_free_cb(list_t *list, void (*key_free_cb)(void *), void (*value_free_cb)(void *));
35 | 
36 | int list_size(list_t *list);
37 | listnode_t *listnode_alloc();
38 | void listnode_free(listnode_t *node);
39 | void *list_insert(list_t *list, void *key, void *value);
40 | listnode_t *list_insertat(list_t *list, void *key, void *value, int index);
41 | void *list_insert_nodup(list_t *list, void *key, void *value, eq_cb_t eq);
42 | void *list_find(list_t *list, void *key, eq_cb_t eq);
43 | const listnode_t *list_findat(list_t *list, int index);
44 | void *list_remove(list_t *list, void *key, eq_cb_t eq);
45 | void *list_removeat(list_t *list, int index, void **key);
46 | 
47 | #endif


--------------------------------------------------------------------------------
/src/old/allocator.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "allocator.h"
3 | 
4 | using namespace wangziqi2013;
5 | using namespace cfront;
6 | 


--------------------------------------------------------------------------------
/src/old/allocator.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include "common.h"
  5 | 
  6 | namespace wangziqi2013 {
  7 | namespace cfront {
  8 | 
  9 | /*
 10 |  * class SlabAllocator - Allocates elements but never frees them until explicit
 11 |  *                       call of free function
 12 |  *
 13 |  * This class is used for two purposes:
 14 |  *   1. For many small allocations, reduce call to malloc() to reduce
 15 |  *      memory overhead, fragmentation and time cost
 16 |  *   2. For shared pointer where ownership is not clear, act as a pool
 17 |  *      and removes the need to free node
 18 |  *
 19 |  * Note that this slab allocator is not thread-safe
 20 |  */
 21 | template <typename ElementType>
 22 | class SlabAllocator {
 23 |  private:
 24 |    
 25 |   // This is the stack where we hold chunks
 26 |   std::stack<char *> chunk_stack;
 27 |   
 28 |   // This is the index inside current (topmost) chunk
 29 |   int next_element_index;
 30 |   
 31 |   // Number of elements per chunk. This is configurable at compile
 32 |   // time to let the caller choose
 33 |   int element_per_chunk;
 34 |   
 35 |   /*
 36 |    * AllocateChunk() - Allocate a chunk and push it to the top of the stack
 37 |    *
 38 |    * This function also resets next_element_index to be 0 in order to use
 39 |    * the topmost chunk
 40 |    */
 41 |   void AllocateChunk() {
 42 |     // Allocate the first chunk of memory
 43 |     char *ptr = \
 44 |       reinterpret_cast<char *>(malloc(element_per_chunk * sizeof(ElementType)));
 45 | 
 46 |     if(ptr == nullptr) {
 47 |       ThrowAllocatorOutOfMemoryError();
 48 |     }
 49 | 
 50 |     chunk_stack.push(ptr);
 51 |     
 52 |     next_element_index = 0;
 53 |     
 54 |     return;
 55 |   }
 56 |   
 57 |   /*
 58 |    * ThrowAllocatorOutOfMemoryError() - This is thrown when we are out of
 59 |    *                                    memory through malloc() call
 60 |    */
 61 |   void ThrowAllocatorOutOfMemoryError() const {
 62 |     throw std::string{"Slab allocator out of memory!"};
 63 |   }
 64 |   
 65 |  public:
 66 |    
 67 |   /*
 68 |    * Constructor - Initializes the stack and index structure
 69 |    */
 70 |   SlabAllocator(int p_element_per_chunk=64) :
 71 |     chunk_stack{},
 72 |     next_element_index{0},
 73 |     element_per_chunk{p_element_per_chunk} {
 74 |     
 75 |     // As part initialization allocate the first chunk on the internal stack
 76 |     AllocateChunk();
 77 |     
 78 |     return;
 79 |   }
 80 |   
 81 |   /*
 82 |    * Destructor - Frees all memory chunks in the slab allocator
 83 |    */
 84 |   ~SlabAllocator() {
 85 |     // Since we have next_element_index elements in the topmost chunk
 86 |     // just destruct the first next_element_index elements
 87 |     CallDestructorForEachElement(next_element_index);
 88 |     
 89 |     // Delete entire chunk of memory which is char * type
 90 |     free(chunk_stack.top());
 91 |     
 92 |     // Pop one chunk. We know there is at least one chunk on the stack
 93 |     chunk_stack.pop();
 94 |     
 95 |     // Delete all chunks until the stack is empty
 96 |     while(chunk_stack.size() > 0) {
 97 |       // Here since all other chunks are full, just delete chunks
 98 |       // using max element count as element to delete
 99 |       CallDestructorForEachElement(element_per_chunk);
100 |       
101 |       free(chunk_stack.top());
102 |       
103 |       chunk_stack.pop();
104 |     }
105 |     
106 |     dbg_printf("Allocator finished cleanup\n");
107 |     
108 |     return;
109 |   }
110 |   
111 |   /*
112 |    * Get() - Returns an element type pointer allocated from the current chunk
113 |    *
114 |    * Note that the use of template class here is to let compiler construct
115 |    * different Get() instances to forward constructor arguments to the
116 |    * placement new which might take arguments
117 |    *
118 |    * These template arguments do not have to be explicitly specified since
119 |    * the compiler could deduct them during compilation
120 |    */
121 |   template <typename ...Args>
122 |   ElementType *Get(Args&&... args) {
123 |     // If we have used up all slots in the current chunk
124 |     // just allocate a new one and reset next element index to 0
125 |     if(next_element_index == element_per_chunk) {
126 |       AllocateChunk();
127 |       
128 |       assert(next_element_index == 0);
129 |     }
130 |     
131 |     // This is the byte offset of the element being
132 |     // allocated
133 |     int byte_offset = sizeof(ElementType) * next_element_index;
134 |     
135 |     // Add the top most chunk address with the byte offset to yield element
136 |     // address
137 |     ElementType *element_ptr = \
138 |       reinterpret_cast<ElementType *>(chunk_stack.top() + byte_offset);
139 |       
140 |     // Do not forget this!!!
141 |     next_element_index++;
142 |       
143 |     // The last step is to call placement operator new to initialize the
144 |     // object
145 |     return new (element_ptr) ElementType{args...};
146 |   }
147 |   
148 |   /*
149 |    * CallDestructorForEachElement() - Calls destructor for the topmost chunk
150 |    *
151 |    * This function takes an extra argument as the element count on the top
152 |    * most chunk, since it might or might not be the capacity of each chunk
153 |    * caller needs to pass it in as an argument
154 |    */
155 |   void CallDestructorForEachElement(int element_count) {
156 |     for(int i = 0;i < element_count;i++) {
157 |       // Compute element pointer
158 |       ElementType *ptr = \
159 |         reinterpret_cast<ElementType *>(chunk_stack.top() +
160 |                                         sizeof(ElementType) * i);
161 | 
162 |       // Call destructor manually
163 |       ptr->~ElementType();
164 |     }
165 |     
166 |     return;
167 |   }
168 | };
169 |   
170 | }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/old/common.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | // Old C headers
 5 | #include <cstdio>
 6 | #include <cassert>
 7 | #include <cstring>
 8 | #include <cstdlib>
 9 | 
10 | #include <algorithm>
11 | #include <utility>
12 | #include <unordered_map>
13 | #include <unordered_set>
14 | #include <string>
15 | #include <string>
16 | #include <vector>
17 | #include <stack>
18 | #include <functional>
19 | 
20 | namespace wangziqi2013 {
21 | namespace cfront {
22 | 
23 | static void dummy(const char*, ...) {}
24 | 
25 | #define DEBUG_PRINT
26 | 
27 | #ifdef DEBUG_PRINT
28 | 
29 | #define dbg_printf(fmt, ...)                              \
30 |   do {                                                    \
31 |     fprintf(stderr, "%-24s: " fmt, __FUNCTION__, ##__VA_ARGS__); \
32 |     fflush(stdout);                                       \
33 |   } while (0);
34 | 
35 | #else
36 | 
37 | #define dbg_printf(fmt, ...)   \
38 |   do {                         \
39 |     dummy(fmt, ##__VA_ARGS__); \
40 |   } while (0);
41 | 
42 | #endif
43 | 
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/old/context.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "context.h"
 3 | #include "syntax.h"
 4 | 
 5 | using namespace wangziqi2013;
 6 | using namespace cfront;
 7 | 
 8 | void Context::InitializeBuiltInTypeMap() {
 9 |   for(const auto token_type : TokenInfo::builtin_type_set) {
10 |     // Create a Token node wrapped by a SyntaxNode
11 |     // and insert it into the type map for later use
12 |     builtin_type_map[token_type] = SyntaxNode::Get(Token::Get(token_type));
13 |   }
14 |   
15 |   return;
16 | }
17 | 


--------------------------------------------------------------------------------
/src/old/context.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include "common.h"
  5 | #include "token.h"
  6 | #include "scope.h"
  7 | 
  8 | namespace wangziqi2013 {
  9 | namespace cfront {
 10 | 
 11 | // Forward declaration here - since we not need to create syntax node it is OK
 12 | class SyntaxNode;
 13 | 
 14 | /*
 15 |  * class Context - The object for holding global values such as symbol tables
 16 |  *                 and type tables
 17 |  */
 18 | class Context {
 19 |  private:
 20 | 
 21 |   // Note that we could not use stack here since stack does not support
 22 |   // iteration, and during a search we have to iterate through the scope
 23 |   // to search a named type
 24 |   //
 25 |   // Use push_back() and pop_back() to access elements like a stack
 26 |   std::vector<ScopeNode> scope_stack;
 27 | 
 28 |   // Maps TokenType to SyntaxNode 8 for built in types
 29 |   std::unordered_map<TokenType,
 30 |                      SyntaxNode *,
 31 |                      TokenTypeHasher,
 32 |                      TokenTypeEq> builtin_type_map;
 33 | 
 34 |   /*
 35 |    * InitializeBuiltInTypeMap() - Initialize SyntaxNode for built in types
 36 |    *
 37 |    * We do this as an optimization to avoid creating too many built in type
 38 |    * nodes - they now all share the same pointer
 39 |    */
 40 |   void InitializeBuiltInTypeMap();
 41 | 
 42 |  public:
 43 | 
 44 |   /*
 45 |    * Constructor
 46 |    *
 47 |    * The ownership of source file belongs to the context object
 48 |    */
 49 |   Context() :
 50 |     scope_stack{} {
 51 |     // We initialize the first level of stack using an empty scope
 52 |     // possibly with few built-in symbols
 53 |     EnterScope();
 54 |     
 55 |     // Load the map with built in integral types
 56 |     InitializeBuiltInTypeMap();
 57 |     
 58 |     return;
 59 |   }
 60 | 
 61 |   /*
 62 |    * EnterScope() - Pushes a new ScopeNode object into the stack
 63 |    *                and return the pushed object
 64 |    */
 65 |   ScopeNode &EnterScope() {
 66 |     // Construct an empty scope node and pusu it back to the vector
 67 |     scope_stack.emplace_back();
 68 |     
 69 |     return scope_stack.back();
 70 |   }
 71 |   
 72 |   /*
 73 |    * LeaveScope() - Leaves the scope by popping the node out from the stack
 74 |    *
 75 |    * If the scope stack is already empty then the assertion would fail
 76 |    */
 77 |   void LeaveScope() {
 78 |     assert(scope_stack.size() > 0);
 79 |     
 80 |     scope_stack.pop_back();
 81 |     
 82 |     return;
 83 |   }
 84 |   
 85 |   /*
 86 |    * GetTypeNode() - Search on the stack for a named type
 87 |    *
 88 |    * This function searches the stack from the top top the bottom, and if
 89 |    * the name exists inside any level that are searched first then it returns
 90 |    * the associated type object
 91 |    *
 92 |    * If the type does not exist in all levels just return nullptr. Otherwise
 93 |    * the SyntaxNode pointer that represents the type structure is returned
 94 |    */
 95 |   SyntaxNode *GetTypeNode(const std::string &type_name) {
 96 |     // Iterate through the vector from high index to low index
 97 |     // i.e. from most recent name space to less recent ones
 98 |     for(auto it = scope_stack.rbegin(); it != scope_stack.rend();it++) {
 99 |       SyntaxNode *type_node_p = it->GetTypeNode(type_name);
100 |       
101 |       // If the type exists in the scope being searched just return it
102 |       // Otherwise need to continue to the next scope
103 |       if(type_node_p != nullptr) {
104 |         return type_node_p;
105 |       }
106 |     }
107 |     
108 |     // If at last we did not find such name then the type does
109 |     // not exist and return nullptr
110 |     return nullptr;
111 |   }
112 |   
113 |   /*
114 |    * GetBuiltInTypeNode() - Returns the SyntaxNode * for built in types
115 |    *
116 |    * This is used as an optimization to avoid too many nodes for builtin types
117 |    */
118 |   SyntaxNode *GetBuiltInTypeNode(TokenType token_type) {
119 |     // Find the built in type inside the map, and we must find it
120 |     // since the caller is responsible for verifying whether a type
121 |     // is built in type or not
122 |     auto it = builtin_type_map.find(token_type);
123 |     assert(it != builtin_type_map.end());
124 |     
125 |     return it->second;
126 |   }
127 | };
128 | 
129 | } // namespace wangziqi2013
130 | } // namespace cfront
131 | 


--------------------------------------------------------------------------------
/src/old/lex.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "lex.h"
3 | 
4 | using namespace wangziqi2013;
5 | using namespace cfront;
6 | 


--------------------------------------------------------------------------------
/src/old/scope.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "scope.h"
3 | 


--------------------------------------------------------------------------------
/src/old/scope.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include "common.h"
 5 | 
 6 | namespace wangziqi2013 {
 7 | namespace cfront {
 8 | 
 9 | class SyntaxNode;
10 | 
11 | /*
12 |  * class ScopeNode - This is a structure that contains information
13 |  *                   about a scope.
14 |  *
15 |  * ScopeNodes are put into a stack as translation units are entered
16 |  * and exited
17 |  */
18 | class ScopeNode {
19 |  private:
20 |   // The set of types we have currently seen
21 |   // Types are represented using SyntaxNode structure which means it
22 |   // could be organized as a tree
23 |   //
24 |   // For the topmost level in this structure we should put 8 basic types:
25 |   // char, short, int, long
26 |   // unsigned char, unsigned short, unsigned int, unsigned long
27 |   std::unordered_map<std::string, SyntaxNode *> type_map;
28 |   
29 |   // TODO: Change the mapped type to something more meaningful
30 |   // This should be
31 |   std::unordered_map<std::string, int> ident_map;
32 |  public:
33 | 
34 |   /*
35 |    * Constructor - This is necessary for emplacing it back in a stack
36 |    */
37 |   ScopeNode() {}
38 |   
39 |   /*
40 |    * Move Constructor - This is necessary in std::vector emplace_back
41 |    *                    since a vector might grow and it needs to move all
42 |    *                    previous contents to a new array
43 |    */
44 |   ScopeNode(ScopeNode &&other) :
45 |     type_map{std::move(other.type_map)},
46 |     ident_map{std::move(other.ident_map)}
47 |   {}
48 |   
49 |   /*
50 |    * These are deleted to avoid any undesirable effects
51 |    */
52 |   ScopeNode(const ScopeNode &) = delete;
53 |   ScopeNode &operator=(const ScopeNode &) = delete;
54 |   ScopeNode &operator=(ScopeNode &&) = delete;
55 |   
56 |   /*
57 |    * GetTypeNode() - Return the type node from type map
58 |    *
59 |    * If the type has not yet been defiend just return nullptr
60 |    */
61 |   SyntaxNode *GetTypeNode(const std::string &type_name) {
62 |     auto it = type_map.find(type_name);
63 |     
64 |     // If the type does not exist in the map just return nullptr
65 |     if(it == type_map.end()) {
66 |       return nullptr;
67 |     }
68 |     
69 |     return it->second;
70 |   }
71 |   
72 |   /*
73 |    * GetTypeMap() - Return the type map object reference
74 |    *
75 |    * The return value is a non-const reference which means that we could
76 |    * actually modify it
77 |    */
78 |   std::unordered_map<std::string, SyntaxNode *> &
79 |   GetTypeMap() {
80 |     return type_map;
81 |   }
82 | };
83 | 
84 | } // namespace wangziqi2013
85 | } // namespace cfront
86 | 


--------------------------------------------------------------------------------
/src/old/syntax.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "syntax.h"
3 | 
4 | using namespace wangziqi2013;
5 | using namespace cfront;
6 | 
7 | SlabAllocator<SyntaxNode> SyntaxNode::allocator{};
8 | 


--------------------------------------------------------------------------------
/src/old/token.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include "common.h"
  5 | #include "allocator.h"
  6 | 
  7 | namespace wangziqi2013 {
  8 | namespace cfront {
  9 | 
 10 | enum class TokenType {
 11 |   // This is a placeholder
 12 |   T_INVALID = 0,
 13 |   
 14 |   // The following is keyword types
 15 |   T_AUTO = 1,
 16 |   
 17 |   T_BREAK = 2,
 18 |   
 19 |   T_CASE = 3,
 20 | 	T_CHAR,
 21 | 	T_CONST,
 22 | 	T_CONTINUE,
 23 | 	
 24 | 	T_DEFAULT = 7,
 25 | 	T_DO,
 26 | 	T_DOUBLE,
 27 | 	
 28 | 	T_ELSE = 10,
 29 | 	T_ENUM,
 30 | 	T_EXTERN,
 31 | 	
 32 | 	T_FLOAT = 13,
 33 | 	T_FOR,
 34 | 	
 35 | 	T_GOTO = 15,
 36 | 	
 37 | 	T_IF = 16,
 38 | 	T_INT,
 39 | 	
 40 | 	T_LONG = 18,
 41 | 	
 42 | 	T_REGISTER = 19,
 43 | 	T_RETURN,
 44 | 	
 45 | 	T_SHORT = 21,
 46 | 	T_SIGNED,
 47 | 	// T_SIZEOF -> This is part of the expression system
 48 | 	T_STATIC,
 49 | 	T_STRUCT,
 50 | 	T_SWITCH,
 51 | 	
 52 | 	T_TYPEDEF = 26,
 53 | 	
 54 | 	T_UNION = 27,
 55 | 	T_UNSIGNED,
 56 | 	
 57 | 	T_VOID = 29,
 58 | 	T_VOLATILE,
 59 | 	
 60 | 	T_WHILE = 31,
 61 | 	
 62 | 	// The following are compound types
 63 | 	//
 64 | 	// unsigned char, unsigned short, unsigned int and unsigned long
 65 | 	// should be treated as one unit instead of two type structs
 66 | 	// Since they are represented as a single type rather than unsigned type
 67 | 	// of a known type
 68 |   T_UCHAR = 40,
 69 |   T_USHORT,
 70 |   T_UINT,
 71 |   T_ULONG,
 72 | 	
 73 | 	// The following are types with data (literal token type)
 74 | 	
 75 | 	T_IDENT = 80,   // Identifier
 76 |   T_INT_CONST,    // Integer constant (should be of the same length as unsigned long)
 77 |   T_STRING_CONST, // String literal
 78 |   T_CHAR_CONST,   // Character literal
 79 | 
 80 | 	// The following are primitive operator types
 81 | 	
 82 | 	T_INC = 100,
 83 | 	T_DEC,
 84 | 	T_LPAREN,
 85 | 	T_RPAREN,
 86 |   T_RSPAREN,
 87 |   T_LSPAREN,
 88 |   T_RCPAREN,
 89 |   T_LCPAREN,
 90 |   T_DOT,
 91 |   T_ARROW,
 92 |   T_PLUS = 110,
 93 |   T_MINUS,
 94 |   T_NOT,
 95 |   T_BITNOT,
 96 |   T_STAR,
 97 |   T_AMPERSAND,
 98 |   T_DIV,
 99 |   T_MOD,
100 |   T_LSHIFT,
101 |   T_RSHIFT,
102 |   T_LESS = 120,
103 |   T_LESSEQ,
104 |   T_GREATER,
105 |   T_GREATEREQ,
106 |   T_EQ,
107 |   T_NOTEQ,
108 |   T_BITXOR,
109 |   T_BITOR,
110 |   T_AND,
111 |   T_OR,
112 |   T_QMARK = 130,
113 |   T_COMMA,
114 |   T_COLON,
115 |   T_SEMICOLON,
116 |   T_SQUOTE,
117 |   T_DQUOTE,
118 |   T_ASSIGN,
119 |   T_PLUS_ASSIGN,
120 |   T_MINUS_ASSIGN,
121 |   T_STAR_ASSIGN,
122 |   T_DIV_ASSIGN = 140,
123 |   T_MOD_ASSIGN,
124 |   T_LSHIFT_ASSIGN,
125 |   T_RSHIFT_ASSIGN,
126 |   T_AMPERSAND_ASSIGN,
127 |   T_BITXOR_ASSIGN,
128 |   T_BITOR_ASSIGN,
129 |   T_SIZEOF = 147,
130 |   
131 |   // This is a primitive keyword that is used to indicate 
132 |   // varargs in C
133 |   // This is neither a keyword nor an operator, in a sense that
134 |   // on one hand it is not parsed as a keyword, and on the other hand
135 |   // it does not have operator attributes related to it
136 |   T_ELLIPSIS = 148,
137 |   
138 |   // The following are operator types for overloading in C
139 |   // e.g. ++ and -- have pre- and post-fix form
140 |   
141 |   // ++
142 |   T_POST_INC = 200,
143 |   T_PRE_INC = 201,
144 |   
145 |   // --
146 |   T_POST_DEC = 202,
147 |   T_PRE_DEC = 203,
148 | 
149 |   // *
150 |   T_MULT = 204,
151 |   T_DEREF = 205,
152 | 
153 |   // &
154 |   T_ADDR = 206,
155 |   T_BITAND = 207,
156 |   
157 |   // -
158 |   T_NEG = 208,
159 |   T_SUBTRACTION = 209,
160 |   
161 |   // +
162 |   T_POS = 210,
163 |   T_ADDITION = 211,
164 |   
165 |   // Prefix "(" is parsed as parenthesis, postfix ( is function call
166 |   // Though prefix ( could also be type cast, that requires some
167 |   // type checking.
168 |   T_PAREN = 212,
169 |   T_TYPECAST = 213,
170 |   T_FUNCCALL = 214,
171 |   
172 |   // []
173 |   T_ARRAYSUB = 215,
174 |   
175 |   // This one is artificial: function arguments
176 |   // Since T_FUNCCALL only has 2 parameters, we need to group all its
177 |   // arguments into one syntax node, otherwise the reduce functuon would not be
178 |   // able to know how many value node should it reduce
179 |   T_FUNCARG = 216,
180 |   
181 | };
182 | 
183 | /////////////////////////////////////////////////////////////////////
184 | // enum class TokenType ends
185 | /////////////////////////////////////////////////////////////////////
186 | 
187 | // This defines the evaluation order of operators in the same
188 | // precedence level
189 | // i.e. associativity
190 | enum class EvalOrder {
191 |   LEFT_TO_RIGHT = 0,
192 |   RIGHT_TO_LEFT,
193 | };
194 | 
195 | /*
196 |  * struct TokenTypeHasher - Hash function for enum class
197 |  */
198 | struct TokenTypeHasher {
199 |   inline size_t operator()(const TokenType &tt) const {
200 |     return static_cast<size_t>(tt);
201 |   }
202 | };
203 | 
204 | /*
205 |  * struct TokenTypeEq - Comparison function for enum class
206 |  */
207 | struct TokenTypeEq {
208 |   inline bool operator()(const TokenType &tt1, const TokenType &tt2) const {
209 |     return static_cast<int>(tt1) == static_cast<int>(tt2);
210 |   }
211 | };
212 | 
213 | /////////////////////////////////////////////////////////////////////
214 | // struct OpInfo
215 | /////////////////////////////////////////////////////////////////////
216 | 
217 | /*
218 |  * struct OpInfo - Stores information about operators including
219 |  *                 precedence, associativity and number of operands
220 |  */
221 | struct OpInfo {
222 |   // The smaller the higher
223 |   int precedence;
224 |   
225 |   // -1 for parenthesis, positive number for all others
226 |   int operand_num;
227 |   
228 |   // Associativity is used to resolve shift-reduce conflict
229 |   // when the precedence is the same
230 |   EvalOrder associativity;
231 |   
232 |   // Whether the operator is postfix unary operator
233 |   // This is used to determine whether the operator after
234 |   // this one is postfix or prefix
235 |   bool is_postfix_unary;
236 | };
237 | 
238 | /////////////////////////////////////////////////////////////////////
239 | // struct OpInfo ends
240 | /////////////////////////////////////////////////////////////////////
241 | 
242 | /////////////////////////////////////////////////////////////////////
243 | // class TokenInfo
244 | /////////////////////////////////////////////////////////////////////
245 | 
246 | /*
247 |  * class TokenInfo - This is the helper class that facilitates tokenizer and
248 |  *                   syntax analyzer
249 |  */
250 | class TokenInfo {
251 |  public:
252 |   using keyword_map_value_type = std::pair<std::string, TokenType>;
253 |   using keyword_map_type = std::unordered_map<std::string, TokenType>;
254 | 
255 |   // The value type used in operator map
256 |   using op_map_value_type = \
257 |     std::pair<TokenType, OpInfo>;
258 |     
259 |   using op_map_type = \
260 |     std::unordered_map<TokenType,
261 |                        OpInfo,
262 |                        TokenTypeHasher,
263 |                        TokenTypeEq>;
264 |                        
265 |   // The next two are used in token name map that maps token to string name
266 |   using token_name_map_value_type = std::pair<TokenType, std::string>;
267 |   using token_name_map_type = \
268 |     std::unordered_map<TokenType,
269 |                        std::string,
270 |                        TokenTypeHasher,
271 |                        TokenTypeEq>;
272 | 
273 |   static const keyword_map_type keyword_map;
274 |   static const op_map_type op_map;
275 |   // This is used for debugging and error reporting
276 |   static const token_name_map_type token_name_map;
277 |   
278 |   // This stored token types that represent built-in type
279 |   static const std::unordered_set<TokenType,
280 |                                   TokenTypeHasher,
281 |                                   TokenTypeEq> builtin_type_set;
282 |   
283 |   /*
284 |    * GetOpInfo() - Return the struct of (precedence, op count, associativity)
285 |    *               of a specific operator
286 |    *
287 |    * If the operator is not found then it implies the type is not part of
288 |    * an expression, and if we are parsing an expression then probably
289 |    * it is the end of an expression
290 |    *
291 |    * We return a constant pointer to the structure
292 |    */
293 |   static const OpInfo *GetOpInfo(TokenType type) {
294 |     auto it = TokenInfo::op_map.find(type);
295 |     
296 |     // If does not find then return nullptr to indicate this
297 |     // is not a valid operator type
298 |     //
299 |     // This branch is useful since
300 |     if(it == TokenInfo::op_map.end()) {
301 |       return nullptr;
302 |     }
303 |     
304 |     return &it->second;
305 |   }
306 |   
307 |   /*
308 |    * GetTokenName() - Given a token type, return the name of that type
309 |    *
310 |    * The name is returned in a constant string reference form
311 |    */
312 |   static const std::string &GetTokenName(TokenType type) {
313 |     auto it = TokenInfo::token_name_map.find(type);
314 | 
315 |     // Just avoid using unseen tokens in the program
316 |     assert(it != TokenInfo::token_name_map.end());
317 | 
318 |     return it->second;
319 |   }
320 |   
321 | };
322 | 
323 | /////////////////////////////////////////////////////////////////////
324 | // class TokenInfo ends
325 | /////////////////////////////////////////////////////////////////////
326 | 
327 | /////////////////////////////////////////////////////////////////////
328 | // class Token
329 | /////////////////////////////////////////////////////////////////////
330 | 
331 | /*
332 |  * class Token - Main class to represent lexicon
333 |  */
334 | class Token {
335 |   friend class SlabAllocator<Token>;
336 |  private:
337 |   TokenType type;
338 |   
339 |   union {
340 |     // Integer constant
341 |     unsigned long int_const;
342 |     
343 |     // char constant
344 |     char char_const;
345 |     
346 |     // String constant
347 |     std::string *string_const_p;
348 |     
349 |     // Identifier
350 |     std::string *ident_p;
351 |   } data;
352 |   
353 |   // Static data member to allocate node from a slab allocator
354 |   static SlabAllocator<Token> allocator;
355 | 
356 |   /*
357 |    * Constructor() - Construct a token object with corresponding type
358 |    *
359 |    * We choose not to set data here since it is a union
360 |    */
361 |   Token(TokenType p_type) :
362 |     type{p_type} {
363 |     // This will also clear the pointer
364 |     data.int_const = 0;
365 |     
366 |     assert(data.ident_p == nullptr);
367 |     assert(data.string_const_p == nullptr);
368 |   }
369 |   
370 |   /*
371 |    * Destructor - Frees the pointer if there is one
372 |    *
373 |    * The ownership of the pointer stored as identifier or string constant
374 |    * belongs to Token object
375 |    *
376 |    * This is made private to prevent being deleted by something other than
377 |    * the SlabAllocator
378 |    */
379 |   ~Token() {
380 |     // In both case the target is a string pointer
381 |     // so we could just delete it without distinguishing
382 |     // further on its type
383 |     if(type == TokenType::T_IDENT || \
384 |        type == TokenType::T_STRING_CONST) {
385 |          
386 |       // If we destruct the string in exception handler
387 |       // then the pointer is nullptr since during construction
388 |       // we set it to nullptr and it has not been filled with anything
389 |       if(data.string_const_p != nullptr) {
390 |         delete data.string_const_p;
391 |       }
392 |     }
393 |     
394 |     return;
395 |   }
396 | 
397 |  public:
398 | 
399 |   /*
400 |    * SetType() - Assigns a new type to the token
401 |    *
402 |    * This is necessary since we need to resolve ambiguity during parsing
403 |    * with operator types. e.g. "*" could either be used as multiplication
404 |    * or be used as pointer dereference operator
405 |    */
406 |   void SetType(TokenType p_type) {
407 |     type = p_type;
408 |     
409 |     return;
410 |   }
411 |   
412 |   /*
413 |    * GetType() - Returns the type of the token
414 |    */
415 |   TokenType GetType() const {
416 |     return type;
417 |   }
418 |   
419 |   /*
420 |    * SetIntConst() - Set a integer constant number to this object
421 |    *
422 |    * This function requires that the token type must be T_INT_CONST
423 |    */
424 |   void SetIntConst(unsigned long p_int_const) {
425 |     assert(type == TokenType::T_INT_CONST);
426 |     
427 |     data.int_const = p_int_const;
428 |     
429 |     return;
430 |   }
431 |   
432 |   /*
433 |    * GetIntConst() - Returns the integer constant
434 |    */
435 |   unsigned long GetIntConst() const {
436 |     assert(type == TokenType::T_INT_CONST);
437 |     
438 |     return data.int_const;
439 |   }
440 |   
441 |   /*
442 |    * SetCharConst() - Set a char constant to this object
443 |    *
444 |    * This function requires that the token must be of T_CHAR_CONST
445 |    */
446 |   void SetCharConst(char p_char_const) {
447 |     assert(type == TokenType::T_CHAR_CONST);
448 |     
449 |     data.char_const = p_char_const;
450 |     
451 |     return;
452 |   }
453 |   
454 |   /*
455 |    * GetCharConst() - Returns a char constant
456 |    */
457 |   char GetCharConst() const {
458 |     assert(type == TokenType::T_CHAR_CONST);
459 |     
460 |     return data.char_const;
461 |   }
462 |   
463 |   /*
464 |    * SetStringConst() - Set a string constant to this object
465 |    *
466 |    * This function requires that the token must be of T_STRING_CONST
467 |    */
468 |   void SetStringConst(std::string *p_string_const_p) {
469 |     assert(type == TokenType::T_STRING_CONST);
470 | 
471 |     data.string_const_p = p_string_const_p;
472 | 
473 |     return;
474 |   }
475 |   
476 |   /*
477 |    * GetStringConst() - Returns the string pointer
478 |    */
479 |   std::string *GetStringConst() const {
480 |     assert(type == TokenType::T_STRING_CONST);
481 |     
482 |     return data.string_const_p;
483 |   }
484 |   
485 |   /*
486 |    * SetIdentifier() - Set an identifier string to this object
487 |    *
488 |    * This function requires that the token must be of T_IDENT
489 |    */
490 |   void SetIdentifier(std::string *p_ident_p) {
491 |     assert(type == TokenType::T_IDENT);
492 | 
493 |     data.ident_p = p_ident_p;
494 | 
495 |     return;
496 |   }
497 |   
498 |   /*
499 |    * GetIdentifier() - Returns the identifier string object
500 |    */
501 |   std::string *GetIdentifier() const {
502 |     assert(type == TokenType::T_IDENT);
503 | 
504 |     return data.ident_p;
505 |   }
506 |   
507 |   /*
508 |    * ToString() - Convert the token node to string representation
509 |    *
510 |    * There is no trailing '\n' attached with the string
511 |    */
512 |   std::string ToString() const {
513 |     const std::string &name = TokenInfo::GetTokenName(type);
514 |     
515 |     if(type == TokenType::T_IDENT) {
516 |       return name + ' ' + *GetIdentifier();
517 |     } else if(type == TokenType::T_STRING_CONST) {
518 |       return name + ' ' + *GetStringConst();
519 |     } else if(type == TokenType::T_INT_CONST) {
520 |       return name + ' ' + std::to_string(GetIntConst());
521 |     } else if(type == TokenType::T_CHAR_CONST) {
522 |       return name + ' ' + \
523 |              std::to_string(static_cast<int>(GetCharConst()));
524 |     } else {
525 |       return name;
526 |     }
527 |   }
528 |   
529 |   /*
530 |    * Get() - static function to construct a token node object
531 |    */
532 |   template <typename ...Args>
533 |   static Token *Get(Args&&... args) {
534 |     return Token::allocator.Get(args...);
535 |   }
536 | };
537 | 
538 | } // namespace cfront
539 | } // namespace wangziqi2013
540 | 


--------------------------------------------------------------------------------
/src/parse.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "parse.h"
 3 | 
 4 | parse_stmt_cxt_t *parse_init(char *input) { return parse_exp_init(input); }
 5 | void parse_free(parse_cxt_t *cxt) { parse_exp_free(cxt); }
 6 | 
 7 | // Top-level parsing, i.e., global level parsing
 8 | // There are five possible cases:
 9 | //  1. Base type + ';' must be a type declaration, most likely struct/union/enum
10 | //  2. Base type + decl + "," must be a type declaration or data definition
11 | //  3. Base type + decl + "=" must be a data definition with initializer
12 | //  4. Base type + decl + ";" must be a global declaration, or function prototype
13 | //  5. Base type + func decl + '{' must be function definition
14 | token_t *parse(parse_cxt_t *cxt) {
15 |   token_t *root = token_alloc_type(T_ROOT);
16 |   while(1) {
17 |     if(token_lookahead(cxt->token_cxt, 1) == NULL) {
18 |       break; // Reached EOF
19 |     }
20 |     token_t *basetype = parse_decl_basetype(cxt);
21 |     if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_SEMICOLON) { // Case 1
22 |       token_consume_type(cxt->token_cxt, T_SEMICOLON);
23 |       ast_append_child(root, ast_append_child(token_alloc_type(T_GLOBAL_DECL_ENTRY), basetype));
24 |       continue;
25 |     }
26 |     token_t *decl = parse_decl(cxt, PARSE_DECL_NOBASETYPE);
27 |     token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
28 |     //printf("la type %s\n", token_typestr(la->type));
29 |     if(la->type == T_LCPAREN) { // Case 5
30 |       assert(ast_getchild(decl, 0) != NULL);
31 |       //ast_print(decl, 0);
32 |       //if(ast_getchild(decl, 0)->type != EXP_FUNC_CALL) // Only function type could have a body
33 |       //  error_row_col_exit(cxt->token_cxt->s, "Only function definition can have a body\n");
34 |       token_t *comp_stmt = parse_comp_stmt(cxt);
35 |       ast_push_child(decl, basetype);
36 |       ast_append_child(root, ast_append_child(ast_append_child(token_alloc_type(T_GLOBAL_FUNC), decl), comp_stmt));
37 |       continue;
38 |     }
39 |     token_t *entry = ast_append_child(token_alloc_type(T_GLOBAL_DECL_ENTRY), basetype);
40 |     ast_append_child(root, entry);
41 |     while(1) {
42 |       // Check decl's name here; If it is typedef then add the name into the token cxt
43 |       if(DECL_ISTYPEDEF(basetype->decl_prop)) {
44 |         token_t *name = ast_gettype(decl, T_IDENT);
45 |         if(name == NULL) {
46 |           error_row_col_exit(cxt->token_cxt->s, "Expecting a name for typedef\n");
47 |         }
48 |         assert(name->type == T_IDENT);
49 |         token_add_utype(cxt->token_cxt, name);
50 |       }
51 |       token_t *var = ast_append_child(token_alloc_type(T_GLOBAL_DECL_VAR), decl);
52 |       ast_append_child(entry, var);
53 |       if(la->type == T_ASSIGN) { // case 3
54 |         token_consume_type(cxt->token_cxt, T_ASSIGN);
55 |         if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN) ast_append_child(var, parse_init_list(cxt));
56 |         else ast_append_child(var, parse_exp(cxt, PARSE_EXP_NOCOMMA));
57 |         la = token_lookahead_notnull(cxt->token_cxt, 1);
58 |       }
59 |       if(la->type == T_SEMICOLON) { // case 4
60 |         token_consume_type(cxt->token_cxt, T_SEMICOLON); 
61 |         break; 
62 |       } else if(la->type == T_COMMA) { // case 2
63 |         token_consume_type(cxt->token_cxt, T_COMMA);
64 |         decl = parse_decl(cxt, PARSE_DECL_NOBASETYPE);
65 |         la = token_lookahead_notnull(cxt->token_cxt, 1);
66 |         continue;
67 |       } else {
68 |         error_row_col_exit(la->offset, "Expecting \',\', \'=\' or \';\' for global declaration\n");
69 |       }
70 |     }
71 |   }
72 |   return root;
73 | }


--------------------------------------------------------------------------------
/src/parse.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "parse_exp.h"
 3 | #include "parse_decl.h"
 4 | #include "parse_comp.h"
 5 | #include "parse_stmt.h"
 6 | 
 7 | #ifndef _PARSE_H
 8 | #define _PARSE_H
 9 | 
10 | typedef parse_exp_cxt_t parse_cxt_t;
11 | 
12 | parse_cxt_t *parse_init(char *input);
13 | void parse_free(parse_cxt_t *cxt);
14 | token_t *parse(parse_cxt_t *cxt);
15 | 
16 | #endif


--------------------------------------------------------------------------------
/src/parse_comp.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "parse_comp.h"
 3 | #include "eval.h"
 4 | 
 5 | parse_decl_cxt_t *parse_comp_init(char *input) { return parse_exp_init(input); }
 6 | void parse_comp_free(parse_comp_cxt_t *cxt) { parse_exp_free(cxt); }
 7 | 
 8 | // This parses struct or union or enum
 9 | token_t *parse_comp(parse_exp_cxt_t *cxt) {
10 |   token_t *token = token_get_next(cxt->token_cxt);
11 |   assert(token);
12 |   switch(token->type) {
13 |     case T_STRUCT: case T_UNION: return parse_struct_union(cxt, token);
14 |     case T_ENUM: return parse_enum(cxt, token);
15 |     default: assert(0);
16 |   }
17 | }
18 | 
19 | // Returns 1 if there is a body, 0 if no body; Name is pushed into root as either
20 | // empty node or IDENT node. If neither name nor body is present report error
21 | int parse_name_body(parse_comp_cxt_t *cxt, token_t *root) {
22 |   token_t *name = token_lookahead_notnull(cxt->token_cxt, 1);
23 |   int has_name = name->type == T_IDENT;
24 |   ast_append_child(root, has_name ? token_get_next(cxt->token_cxt) : token_get_empty());
25 |   int has_body = token_consume_type(cxt->token_cxt, T_LCPAREN);
26 |   if(!has_name && !has_body) error_row_col_exit(root->offset, "Expecting identifier or \'{\' after struct/union\n");
27 |   return has_body;
28 | }
29 | 
30 | // Returns the same node which is either T_STRUCT or T_UNION
31 | // The 2nd child is empty if there is no body or empty body
32 | // Check whether decl_prop has TYPE_EMPTY_BODY bit set
33 | token_t *parse_struct_union(parse_comp_cxt_t *cxt, token_t *root) {
34 |   if(parse_name_body(cxt, root)) {
35 |     
36 |     int has_body = 0; // Might be possible that there is {} as body but it is empty
37 |     while(1) { // loop on lines
38 |       if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_RCPAREN) { // Finish parsing on '}'
39 |         if(!has_body) {
40 |           ast_append_child(root, token_get_empty());
41 |           root->decl_prop = TYPE_EMPTY_BODY; // Distinguish this from no body defined
42 |         }
43 |         token_consume_type(cxt->token_cxt, T_RCPAREN); 
44 |         break; 
45 |       }
46 |       has_body = 1;
47 |       token_t *comp_decl = ast_append_child(token_alloc_type(T_COMP_DECL), parse_decl_basetype(cxt));
48 |       while(1) { // loop on fields
49 |         token_t *field = token_alloc_type(T_COMP_FIELD);
50 |         ast_append_child(comp_decl, ast_append_child(field, parse_decl(cxt, PARSE_DECL_NOBASETYPE)));
51 |         // Declarator body, can be named or unamed
52 |         token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
53 |         if(la->type == T_COLON) {
54 |           token_consume_type(cxt->token_cxt, T_COLON);
55 |           token_t *bf; // Assigned next line
56 |           ast_append_child(field, ast_append_child(token_alloc_type(T_BITFIELD), bf = parse_exp(cxt, PARSE_EXP_NOCOMMA)));
57 |           la = token_lookahead_notnull(cxt->token_cxt, 1);
58 |         }
59 |         if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); }
60 |         else if(la->type == T_SEMICOLON) { token_consume_type(cxt->token_cxt, T_SEMICOLON); break; } // Finish parsing the field on ';'
61 |         else { error_row_col_exit(la->offset, "Unexpected symbol \"%s\" in struct/union field declaration\n", 
62 |                                   token_typestr(la->type)); }
63 |       }
64 |       ast_append_child(root, comp_decl);
65 |     }
66 |   } else { ast_append_child(root, token_get_empty()); } // Otherwise append an empty child to indicate there is no body
67 |   return root;
68 | }
69 | 
70 | token_t *parse_enum(parse_comp_cxt_t *cxt, token_t *root) {
71 |   if(parse_name_body(cxt, root)) {
72 |     while(1) { // loop on lines
73 |       if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_RCPAREN) { 
74 |         token_consume_type(cxt->token_cxt, T_RCPAREN); break;
75 |       }
76 |       token_t *enum_field = token_alloc_type(T_ENUM_FIELD);
77 |       ast_append_child(root, enum_field);
78 |       token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
79 |       if(la->type == T_IDENT) ast_append_child(enum_field, token_get_next(cxt->token_cxt));
80 |       else error_row_col_exit(la->offset, "Expecting an identifier in enum body\n");
81 |       la = token_lookahead_notnull(cxt->token_cxt, 1);
82 |       if(la->type == T_ASSIGN) {
83 |         token_consume_type(cxt->token_cxt, T_ASSIGN);
84 |         ast_append_child(enum_field, parse_exp(cxt, PARSE_EXP_NOCOMMA));
85 |         la = token_lookahead_notnull(cxt->token_cxt, 1);
86 |       }
87 |       // Last entry does not have to use comma
88 |       if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); }
89 |       else if(la->type == T_RCPAREN) { token_consume_type(cxt->token_cxt, T_RCPAREN); break; }
90 |       else { error_row_col_exit(la->offset, "Unexpected symbol \"%s\" in enum body\n", 
91 |                                 token_typestr(la->type)); }
92 |     }
93 |   }
94 |   return root;
95 | }


--------------------------------------------------------------------------------
/src/parse_comp.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _PARSE_COMP_H
 3 | #define _PARSE_COMP_H
 4 | 
 5 | #include "parse_decl.h"
 6 | 
 7 | typedef parse_exp_cxt_t parse_comp_cxt_t;
 8 | 
 9 | parse_decl_cxt_t *parse_comp_init(char *input);
10 | void parse_comp_free(parse_decl_cxt_t *cxt);
11 | token_t *parse_comp(parse_exp_cxt_t *cxt);
12 | int parse_name_body(parse_comp_cxt_t *cxt, token_t *root);
13 | token_t *parse_struct_union(parse_comp_cxt_t *cxt, token_t *root);
14 | token_t *parse_enum(parse_comp_cxt_t *cxt, token_t *root);
15 | 
16 | #endif


--------------------------------------------------------------------------------
/src/parse_decl.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "parse_decl.h"
  3 | #include "parse_comp.h"
  4 | #include "eval.h"
  5 | 
  6 | parse_decl_cxt_t *parse_decl_init(char *input) { return parse_exp_init(input); }
  7 | void parse_decl_free(parse_decl_cxt_t *cxt) { parse_exp_free(cxt); }
  8 | 
  9 | // Whether the token could start a declaration, i.e. being a type, modifier, or udef type
 10 | int parse_decl_isbasetype(parse_decl_cxt_t *cxt, token_t *token) { 
 11 |   (void)cxt; return ((token->decl_prop & DECL_MASK) || token->type == T_UDEF) ? 1 : 0;
 12 | }
 13 | 
 14 | // Same rule as parse_exp_next_token()
 15 | // Note: The following tokens are considered as part of a type expression:
 16 | //   1. ( ) [ ] *  2. const volatile 3. identifier
 17 | token_t *parse_decl_next_token(parse_decl_cxt_t *cxt) {
 18 |   token_t *token = token_lookahead(cxt->token_cxt, 1);
 19 |   int valid; // Below are not "=="
 20 |   if((valid = (token != NULL))) {
 21 |     switch(token->type) {
 22 |       case T_LPAREN: { // If the next symbol constitutes a base type then this is func call
 23 |         token_t *lookahead = token_lookahead(cxt->token_cxt, 2); // Note that we already looked ahead one token
 24 |         if(lookahead != NULL && (parse_decl_isbasetype(cxt, lookahead) || lookahead->type == T_RPAREN)) 
 25 |           token->type = EXP_FUNC_CALL;
 26 |         else token->type = EXP_LPAREN;
 27 |         break;
 28 |       }
 29 |       case T_RPAREN:
 30 |         if(parse_exp_isallowed(cxt, token, PARSE_EXP_ALLOWALL)) token->type = EXP_RPAREN;
 31 |         else valid = 0;
 32 |         break;
 33 |       case T_STAR: token->type = EXP_DEREF; break;
 34 |       case T_LSPAREN: token->type = EXP_ARRAY_SUB; break;
 35 |       case T_RSPAREN: 
 36 |         if(parse_exp_isallowed(cxt, token, PARSE_EXP_ALLOWALL)) token->type = EXP_RSPAREN;
 37 |         else valid = 0;
 38 |         break;
 39 |       case T_IDENT: break;
 40 |       //case T_ELLIPSIS: break; // Ellipsis is processed only in function decl and does not go through this function
 41 |       default: if(!(token->decl_prop & DECL_QUAL_MASK)) valid = 0; // Only allow DECL_QUAL and identifier
 42 |     }
 43 |   }
 44 |   return valid ? token_get_next(cxt->token_cxt) : NULL;
 45 | }
 46 | 
 47 | // Parses the type specifier part of a base type declaration
 48 | // Sets the decl_prop of the basetype node according to the type being parsed, and push child for udef, s/u/e
 49 | void parse_typespec(parse_decl_cxt_t *cxt, token_t *basetype) {
 50 |   if(BASETYPE_GET(basetype->decl_prop) != BASETYPE_NONE) 
 51 |     error_row_col_exit(cxt->token_cxt->s, "Already has type specifier \"%s\"\n", token_decl_print(basetype->decl_prop));
 52 |   int usign = 0;
 53 |   token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
 54 |   basetype->offset = la->offset; // In case no child is pushed for the base type node, we assign the next token's offset
 55 |   token_type_t type = la->type;  // Use this to detect illegal "signed long double"
 56 |   // Note that this is not a while loop
 57 |   switch(type) {   // Basetype declaration cannot be the end of file
 58 |     case T_UNSIGNED: usign = 1;                                // Fall through
 59 |     /* fall through */
 60 |     case T_SIGNED: token_free(token_get_next(cxt->token_cxt)); // Fall through again
 61 |     /* fall through */
 62 |     case T_CHAR: case T_SHORT: case T_INT: case T_LONG: {      // Note: Do not get_next_token() on these types
 63 |       token_t *token = token_get_next(cxt->token_cxt);         // unsigned and signed have been processed before this line
 64 |       switch(token->type) {
 65 |         case T_CHAR: BASETYPE_SET(basetype, usign ? BASETYPE_UCHAR : BASETYPE_CHAR); token_free(token); return;
 66 |         case T_INT: BASETYPE_SET(basetype, usign ? BASETYPE_UINT : BASETYPE_INT); token_free(token); return;
 67 |         case T_SHORT: // short int has the same effect as short, so we just try to consume an extra int
 68 |           BASETYPE_SET(basetype, usign ? BASETYPE_USHORT : BASETYPE_SHORT); token_free(token); 
 69 |           token_consume_type(cxt->token_cxt, T_INT); return;
 70 |         case T_LONG: { // long long; long long int; long int; long
 71 |           token_free(token);
 72 |           token_t *token = token_get_next(cxt->token_cxt);
 73 |           switch(token->type) {
 74 |             case T_LONG: // Same as short [int]
 75 |               BASETYPE_SET(basetype, usign ? BASETYPE_ULLONG : BASETYPE_LLONG); token_free(token);
 76 |               token_consume_type(cxt->token_cxt, T_INT); return;
 77 |             case T_DOUBLE:
 78 |               if(type == T_SIGNED || type == T_UNSIGNED) 
 79 |                 error_row_col_exit(token->offset, "Type \"long double\" does not allow sign declaration\n");
 80 |               BASETYPE_SET(basetype, BASETYPE_LDOUBLE); token_free(token); return;
 81 |             case T_INT: BASETYPE_SET(basetype, usign ? BASETYPE_ULONG : BASETYPE_LONG); token_free(token); return;
 82 |             default: 
 83 |               BASETYPE_SET(basetype, usign ? BASETYPE_ULONG : BASETYPE_LONG);
 84 |               token_pushback(cxt->token_cxt, token); return;
 85 |           }
 86 |         } // unsigned / signed without other base type implies int type
 87 |         default: BASETYPE_SET(basetype, usign ? BASETYPE_UINT : BASETYPE_INT); token_pushback(cxt->token_cxt, token); return;
 88 |       }
 89 |     }
 90 |     case T_FLOAT: BASETYPE_SET(basetype, BASETYPE_FLOAT); token_free(token_get_next(cxt->token_cxt)); return;
 91 |     case T_DOUBLE: BASETYPE_SET(basetype, BASETYPE_DOUBLE); token_free(token_get_next(cxt->token_cxt)); return;
 92 |     case T_UDEF: BASETYPE_SET(ast_append_child(basetype, token_get_next(cxt->token_cxt)), BASETYPE_UDEF); return;
 93 |     case T_STRUCT: BASETYPE_SET(ast_append_child(basetype, parse_comp(cxt)), BASETYPE_STRUCT); return;
 94 |     case T_UNION: BASETYPE_SET(ast_append_child(basetype, parse_comp(cxt)), BASETYPE_UNION); return;
 95 |     case T_ENUM: BASETYPE_SET(ast_append_child(basetype, parse_comp(cxt)), BASETYPE_ENUM); return;
 96 |     case T_VOID: BASETYPE_SET(basetype, BASETYPE_VOID); token_free(token_get_next(cxt->token_cxt)); return;
 97 |     default: assert(0);
 98 |   }
 99 | }
100 | 
101 | // Base type = one of udef/builtin/enum/struct/union; In this stage only allows 
102 | // keywords with TOKEN_DECL set
103 | // The stack is not changed, calling this function does not need recurse
104 | token_t *parse_decl_basetype(parse_decl_cxt_t *cxt) {
105 |   token_t *token = token_lookahead(cxt->token_cxt, 1), *basetype = token_alloc_type(T_BASETYPE);
106 |   while(token != NULL && (token->decl_prop & DECL_MASK)) {
107 |     if(!(token->decl_prop & DECL_TYPE_MASK)) {
108 |       if(!token_decl_apply(basetype, token)) 
109 |         error_row_col_exit(token->offset, "Incompatible type modifier \"%s\" with \"%s\"\n",
110 |         token_symstr(token->type), token_decl_print(basetype->decl_prop));
111 |       token_consume_type(cxt->token_cxt, token->type); // Consume whatever it is
112 |     } else { parse_typespec(cxt, basetype); }
113 |     token = token_lookahead(cxt->token_cxt, 1);
114 |   } // Must have some type, cannot be just qualifiers and modifiers
115 |   if(BASETYPE_GET(basetype->decl_prop) == BASETYPE_NONE) error_row_col_exit(cxt->token_cxt->s, "Declaration lacks a type specifier\n");
116 |   return basetype;
117 | }
118 | 
119 | token_t *parse_decl(parse_decl_cxt_t *cxt, int hasbasetype) {
120 |   parse_exp_recurse(cxt);
121 |   assert(parse_exp_size(cxt, OP_STACK) == 0 && parse_exp_size(cxt, AST_STACK) == 0); // Must start on a new stack
122 |   token_t *decl = token_alloc_type(T_DECL);
123 |   // Append base type node if the flag indicates so, or empty node as placeholder
124 |   ast_append_child(decl, hasbasetype == PARSE_DECL_HASBASETYPE ? parse_decl_basetype(cxt) : token_get_empty()); 
125 |   token_t *placeholder = token_get_empty();
126 |   // Placeholder operand for the innremost operator because we do not push ident to AST stack
127 |   parse_exp_shift(cxt, AST_STACK, placeholder); 
128 |   token_t *decl_name = NULL;  // If not an abstract declarator this is the name
129 |   while(1) {
130 |     token_t *token = parse_decl_next_token(cxt);
131 |     if(token == NULL) {
132 |       ast_append_child(decl, parse_exp_reduce_all(cxt)); // This may directly put the placeholder node as a expression
133 |       ast_append_child(decl, decl_name ? decl_name : token_get_empty()); // Only appends the name if there is one, or empty node
134 |       parse_exp_decurse(cxt);
135 |       // Leaf operand always empty node as stop sign when traversing the type derivation chain
136 |       return decl;
137 |     }
138 |     if(token->decl_prop & DECL_QUAL_MASK) { // Special case for type qualifiers
139 |       token_t *top = parse_exp_peek(cxt, OP_STACK);
140 |       if(top == NULL || top->type != EXP_DEREF || cxt->last_active_stack != OP_STACK) 
141 |         error_row_col_exit(token->offset, "Qualifier \"%s\" must follow pointer\n", token_symstr(token->type));
142 |       if(!token_decl_apply(top, token))
143 |         error_row_col_exit(token->offset, "Qualifier \"%s\" not compatible with \"%s\"\n",
144 |                            token_symstr(token->type), token_decl_print(top->decl_prop));
145 |       token_free(token);
146 |     } else {
147 |       switch(token->type) {
148 |         case EXP_DEREF: // To avoid int **a*; being legal, because identifiers are not pushed to AST stack
149 |           if(decl_name) error_row_col_exit(token->offset, "Pointers can only occur before declared name\n") 
150 |           parse_exp_shift(cxt, OP_STACK, token); break;
151 |         case T_IDENT:  // Trick: Do not push it onto the stack
152 |           if(decl_name) error_row_col_exit(token->offset, "Type declaration can have at most one identifier\n");
153 |           decl_name = token; break;
154 |           /* Is the above sufficient? - As long as parenthesis is not parsed recursively it is fine
155 |           token_t *ast_top = parse_exp_peek(cxt, AST_STACK);
156 |           if(ast_top != NULL && ast_top->type == T_) token_free(stack_pop(cxt->stacks[AST_STACK]));
157 |           else if(ast_top != NULL) error_row_col_exit(token->offset, "Type declaration can have at most one identifier\n");
158 |           parse_exp_shift(cxt, AST_STACK, token);
159 |           */
160 |         case EXP_ARRAY_SUB: {
161 |           parse_exp_shift(cxt, OP_STACK, token);
162 |           token_t *la = token_lookahead(cxt->token_cxt, 1);
163 |           token_t *index;
164 |           if(la != NULL && la->type == T_RSPAREN) { index = token_get_empty(); }
165 |           else { index = parse_exp(cxt, PARSE_EXP_ALLOWALL); }
166 |           parse_exp_shift(cxt, AST_STACK, index);
167 |           parse_exp_reduce(cxt, -1, 1); // This reduces array sub
168 |           if(!token_consume_type(cxt->token_cxt, T_RSPAREN)) 
169 |             error_row_col_exit(token->offset, "Array declaration expects \']\'\n");
170 |           break;
171 |         }
172 |         case EXP_FUNC_CALL: {
173 |           parse_exp_shift(cxt, OP_STACK, token);
174 |           token_t *la = token_lookahead(cxt->token_cxt, 1);
175 |           if(la != NULL && la->type == T_RPAREN) {
176 |             ast_push_child(token, token_get_empty());
177 |             token_consume_type(cxt->token_cxt, T_RPAREN);
178 |           } else {
179 |             while(1) {
180 |               ast_append_child(token, parse_decl(cxt, PARSE_DECL_HASBASETYPE));
181 |               if(token_consume_type(cxt->token_cxt, T_COMMA)) { // Special: check "..." after ","
182 |                 if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_ELLIPSIS) { // after '...' there can only be ')'
183 |                   ast_append_child(token, token_get_next(cxt->token_cxt));
184 |                   if(!token_consume_type(cxt->token_cxt, T_RPAREN))
185 |                     error_row_col_exit(cxt->token_cxt->s, "\"...\" could only be the last function argument\n");
186 |                   break;
187 |                 }
188 |               }
189 |               else if(token_consume_type(cxt->token_cxt, T_RPAREN)) { break; }
190 |               else error_row_col_exit(token->offset, "Function declaration expects \')\' or \',\' or \"...\"\n");
191 |             }
192 |           }
193 |           parse_exp_reduce(cxt, 1, 1); // This reduces EXP_FUNC_CALL
194 |           break;
195 |         }
196 |         case EXP_LPAREN: parse_exp_shift(cxt, OP_STACK, token); break;
197 |         case EXP_RPAREN: {
198 |           token_t *op_top = parse_exp_peek(cxt, OP_STACK);
199 |           while(op_top != NULL && op_top->type != EXP_LPAREN) op_top = parse_exp_reduce(cxt, -1, 0);
200 |           if(op_top == NULL) error_row_col_exit(token->offset, "Did not find matching \'(\' in declaration\n");
201 |           token_free(stack_pop(cxt->stacks[OP_STACK]));
202 |           token_free(token);
203 |           break;
204 |         } // Note that unrelated tokens are filtered
205 |         default: printf("%s %s\n", token_typestr(token->type), token->offset); assert(0);
206 |       } // switch(token->type)
207 |     } // if(token is qualifier)
208 |   } // while(1)
209 | }
210 | 


--------------------------------------------------------------------------------
/src/parse_decl.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _PARSE_DECL_H
 3 | #define _PARSE_DECL_H
 4 | 
 5 | #include "parse_exp.h"
 6 | #include "hashtable.h"
 7 | 
 8 | #define PARSE_DECL_NOBASETYPE  0
 9 | #define PARSE_DECL_HASBASETYPE 1
10 | typedef parse_exp_cxt_t parse_decl_cxt_t;
11 | 
12 | parse_decl_cxt_t *parse_decl_init(char *input);
13 | void parse_decl_free(parse_decl_cxt_t *cxt);
14 | int parse_decl_isbasetype(parse_decl_cxt_t *cxt, token_t *token);
15 | token_t *parse_decl_next_token(parse_decl_cxt_t *cxt);
16 | void parse_typespec(parse_decl_cxt_t *cxt, token_t *basetype);
17 | token_t *parse_decl_basetype(parse_decl_cxt_t *cxt);
18 | token_t *parse_decl(parse_decl_cxt_t *cxt, int hasbasetype);
19 | 
20 | #endif


--------------------------------------------------------------------------------
/src/parse_exp.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _PARSE_EXP_H
 3 | #define _PARSE_EXP_H
 4 | 
 5 | #include "stack.h"
 6 | #include "token.h"
 7 | #include "ast.h"
 8 | #include "hashtable.h"
 9 | 
10 | #define AST_STACK 0
11 | #define OP_STACK 1
12 | 
13 | typedef uint32_t parse_exp_disallow_t; // A bit mask
14 | #define PARSE_EXP_ALLOWALL 0x00000000
15 | #define PARSE_EXP_NOCOMMA  0x00000001  // Do not allow outermost ','
16 | #define PARSE_EXP_NOCOLON  0x00000002  // Do not allow outermost ':'
17 | 
18 | typedef struct {
19 |   // Either AST_STACK or OP_STACK; do not need save because a shift will happen
20 |   int last_active_stack;
21 |   stack_t *stacks[2];
22 |   stack_t *tops[2];
23 |   stack_t *prev_active;
24 |   token_cxt_t *token_cxt;
25 | } parse_exp_cxt_t;
26 | 
27 | parse_exp_cxt_t *parse_exp_init(char *input);
28 | void parse_exp_reinit(parse_exp_cxt_t *cxt, char *input);
29 | void parse_exp_free(parse_exp_cxt_t *cxt);
30 | int parse_exp_isoutermost(parse_exp_cxt_t *cxt);
31 | int parse_exp_isallowed(parse_exp_cxt_t *cxt, token_t *token, parse_exp_disallow_t disallow);
32 | int parse_exp_isexp(parse_exp_cxt_t *cxt, token_t *token, parse_exp_disallow_t disallow);
33 | int parse_exp_isprimary(parse_exp_cxt_t *cxt, token_t *token);
34 | int parse_exp_la_isdecl(parse_exp_cxt_t *cxt);
35 | int parse_exp_size(parse_exp_cxt_t *cxt, int stack_id);
36 | token_t *parse_exp_peek(parse_exp_cxt_t *cxt, int stack_id);
37 | token_t *parse_exp_peek_at(parse_exp_cxt_t *cxt, int stack_id, int index);
38 | int parse_exp_isempty(parse_exp_cxt_t *cxt, int stack_id);
39 | void parse_exp_recurse(parse_exp_cxt_t *cxt);
40 | void parse_exp_decurse(parse_exp_cxt_t *cxt);
41 | token_t *parse_exp_next_token(parse_exp_cxt_t *cxt, parse_exp_disallow_t disallow);
42 | void parse_exp_shift(parse_exp_cxt_t *cxt, int stack_id, token_t *token);
43 | token_t *parse_exp_reduce(parse_exp_cxt_t *cxt, int op_num_override, int allow_paren);
44 | void parse_exp_reduce_preced(parse_exp_cxt_t *cxt, token_t *token);
45 | token_t *parse_exp_reduce_all(parse_exp_cxt_t *cxt);
46 | token_t *parse_exp(parse_exp_cxt_t *cxt, parse_exp_disallow_t disallow);
47 | 
48 | #endif


--------------------------------------------------------------------------------
/src/parse_stmt.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "parse_stmt.h"
  3 | #include "parse_decl.h"
  4 | 
  5 | parse_stmt_cxt_t *parse_stmt_init(char *input) { return parse_exp_init(input); }
  6 | void parse_stmt_free(parse_stmt_cxt_t *cxt) { parse_exp_free(cxt); }
  7 | 
  8 | // Return a labeled statement
  9 | token_t *parse_lbl_stmt(parse_stmt_cxt_t *cxt, token_type_t type) {
 10 |   if(type == T_IDENT) {
 11 |     token_t *token = token_alloc_type(T_LBL_STMT);
 12 |     ast_append_child(token, token_get_next(cxt->token_cxt));
 13 |     if(!token_consume_type(cxt->token_cxt, T_COLON)) assert(0); // Caller guarantees this
 14 |     return ast_append_child(token, parse_stmt(cxt));
 15 |   }  
 16 |   token_t *token = token_get_next(cxt->token_cxt);
 17 |   if(type == T_CASE) ast_append_child(token, parse_exp(cxt, PARSE_EXP_NOCOLON));
 18 |   if(!token_consume_type(cxt->token_cxt, T_COLON))
 19 |     error_row_col_exit(token->offset, "Expecting \':\' for \"%s\" statement\n", token_symstr(token->type));
 20 |   return ast_append_child(token, parse_stmt(cxt));
 21 | }
 22 | 
 23 | // Returns an expression statement
 24 | token_t *parse_exp_stmt(parse_stmt_cxt_t *cxt) {
 25 |   token_t *token = ast_append_child(token_alloc_type(T_EXP_STMT), parse_exp(cxt, PARSE_EXP_ALLOWALL));
 26 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
 27 |     error_row_col_exit(cxt->token_cxt->s, "Expecting \';\' after expression statement\n");
 28 |   return token;
 29 | }
 30 | 
 31 | token_t *parse_comp_stmt(parse_stmt_cxt_t *cxt) {
 32 |   token_t *decl_list = token_alloc_type(T_DECL_STMT_LIST);
 33 |   token_t *stmt_list = token_alloc_type(T_STMT_LIST);
 34 |   token_t *root = ast_append_child(ast_append_child(token_alloc_type(T_COMP_STMT), decl_list), stmt_list);
 35 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN);
 36 |   token_consume_type(cxt->token_cxt, T_LCPAREN); // After this line we enter a new scope
 37 |   token_enter_scope(cxt->token_cxt);
 38 |   while(parse_decl_isbasetype(cxt, token_lookahead_notnull(cxt->token_cxt, 1))) { // Loop through lines
 39 |     token_t *basetype = parse_decl_basetype(cxt);
 40 |     token_t *decl_entry = ast_append_child(token_alloc_type(T_DECL_STMT_ENTRY), basetype);
 41 |     ast_append_child(decl_list, decl_entry);
 42 |     while(1) { // Loop through variables
 43 |     token_t *decl = parse_decl(cxt, PARSE_DECL_NOBASETYPE);
 44 |       // Check decl's name here; If it is typedef then add the name into the token cxt
 45 |       if(DECL_ISTYPEDEF(basetype->decl_prop)) {
 46 |         token_t *name = ast_gettype(decl, T_IDENT);
 47 |         if(!name) error_row_col_exit(cxt->token_cxt->s, "Expecting a name for typedef\n");
 48 |         assert(name->type == T_IDENT);
 49 |         token_add_utype(cxt->token_cxt, name); // Add a name, but does not need to concrete type
 50 |       }
 51 |       token_t *var = ast_append_child(token_alloc_type(T_DECL_STMT_VAR), decl);
 52 |       ast_append_child(decl_entry, var);
 53 |       token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
 54 |       if(la->type == T_ASSIGN) {
 55 |         token_consume_type(cxt->token_cxt, T_ASSIGN);
 56 |         if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN) ast_append_child(var, parse_init_list(cxt));
 57 |         else ast_append_child(var, ast_append_child(token_alloc_type(T_INIT), parse_exp(cxt, PARSE_EXP_NOCOMMA)));
 58 |         la = token_lookahead_notnull(cxt->token_cxt, 1);
 59 |       }
 60 |       if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); continue; }
 61 |       else if(la->type == T_SEMICOLON) { token_consume_type(cxt->token_cxt, T_SEMICOLON); break; }
 62 |       else { error_row_col_exit(la->offset, "Expecting \',\' or \';\' after variable declaration\n"); }
 63 |     }
 64 |   } // Then parse statement list
 65 |   while(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RCPAREN) ast_append_child(stmt_list, parse_stmt(cxt));
 66 |   token_consume_type(cxt->token_cxt, T_RCPAREN); // After this line we exit new scope
 67 |   token_exit_scope(cxt->token_cxt);
 68 |   return root;
 69 | }
 70 | 
 71 | token_t *parse_if_stmt(parse_stmt_cxt_t *cxt) {
 72 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_IF);
 73 |   token_t *if_stmt = token_get_next(cxt->token_cxt);
 74 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \'(\' after \"if\"\n");
 75 |   ast_append_child(if_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
 76 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \')\' after \"if\"\n");
 77 |   ast_append_child(if_stmt, parse_stmt(cxt));
 78 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_ELSE) {
 79 |     token_t *else_stmt = token_get_next(cxt->token_cxt);
 80 |     ast_append_child(if_stmt, else_stmt);
 81 |     ast_append_child(else_stmt, parse_stmt(cxt));
 82 |   }
 83 |   return if_stmt;
 84 | }
 85 | 
 86 | token_t *parse_switch_stmt(parse_stmt_cxt_t *cxt) {
 87 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_SWITCH);
 88 |   token_t *switch_stmt = token_get_next(cxt->token_cxt);
 89 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(switch_stmt->offset, "Expecting \'(\' after \"switch\"\n");
 90 |   ast_append_child(switch_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
 91 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(switch_stmt->offset, "Expecting \')\' after \"switch\"\n");
 92 |   ast_append_child(switch_stmt, parse_stmt(cxt));
 93 |   return switch_stmt;
 94 | }
 95 | 
 96 | token_t *parse_while_stmt(parse_stmt_cxt_t *cxt) {
 97 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_WHILE);
 98 |   token_t *while_stmt = token_get_next(cxt->token_cxt);
 99 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(while_stmt->offset, "Expecting \'(\' after \"while\"\n");
100 |   ast_append_child(while_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
101 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(while_stmt->offset, "Expecting \')\' after \"while\"\n");
102 |   ast_append_child(while_stmt, parse_stmt(cxt));
103 |   return while_stmt;
104 | }
105 | 
106 | token_t *parse_do_stmt(parse_stmt_cxt_t *cxt) {
107 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_DO);
108 |   token_t *do_stmt = token_get_next(cxt->token_cxt);
109 |   ast_append_child(do_stmt, parse_stmt(cxt));
110 |   if(!token_consume_type(cxt->token_cxt, T_WHILE)) error_row_col_exit(do_stmt->offset, "Expecting \"while\" for \"do\" statement\n");
111 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(do_stmt->offset, "Expecting \'(\' after \"while\"\n");
112 |   ast_append_child(do_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
113 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(do_stmt->offset, "Expecting \')\' after \"while\"\n");
114 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(do_stmt->offset, "Expecting \';\' for \"do\" statement\n");
115 |   return do_stmt;
116 | }
117 | 
118 | token_t *parse_for_stmt(parse_stmt_cxt_t *cxt) {
119 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_FOR);
120 |   token_t *for_stmt = token_get_next(cxt->token_cxt);
121 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \'(\' after \"for\"\n");
122 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
123 |   else ast_append_child(for_stmt, token_get_empty());
124 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after first \"for\" expression\n");
125 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
126 |   else ast_append_child(for_stmt, token_get_empty());
127 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after second \"for\" expression\n");
128 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RPAREN) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
129 |   else ast_append_child(for_stmt, token_get_empty());
130 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \')\' after \"for\"\n");
131 |   ast_append_child(for_stmt, parse_stmt(cxt));
132 |   return for_stmt;
133 | }
134 | 
135 | token_t *parse_goto_stmt(parse_stmt_cxt_t *cxt) {
136 |   token_t *token = token_get_next(cxt->token_cxt);
137 |   assert(token->type == T_GOTO);
138 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_IDENT)
139 |     error_row_col_exit(token->offset, "Expecting a label for \"goto\" statement\n");
140 |   ast_append_child(token, token_get_next(cxt->token_cxt));
141 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
142 |     error_row_col_exit(token->offset, "Expecting \';\' after \"goto\" statement\n");
143 |   return token;
144 | }
145 | 
146 | token_t *parse_brk_cont_stmt(parse_stmt_cxt_t *cxt) {
147 |   token_t *token = token_get_next(cxt->token_cxt);
148 |   assert(token->type == T_BREAK || token->type == T_CONTINUE);
149 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
150 |     error_row_col_exit(token->offset, "Expecting \';\' after \"%s\" statement\n", token_symstr(token->type));
151 |   return token;
152 | }
153 | 
154 | token_t *parse_return_stmt(parse_stmt_cxt_t *cxt) {
155 |   token_t *token = token_get_next(cxt->token_cxt);
156 |   assert(token->type == T_RETURN);
157 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON)
158 |     ast_append_child(token, parse_exp(cxt, PARSE_EXP_ALLOWALL));
159 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) \
160 |     error_row_col_exit(token->offset, "Expecting \';\' after \"return\" statement\n");
161 |   return token;
162 | }
163 | 
164 | // Returns a initializer list, { expr, expr, ..., expr } where expr could be nested initializer list
165 | token_t *parse_init_list(parse_stmt_cxt_t *cxt) {
166 |   if(!token_consume_type(cxt->token_cxt, T_LCPAREN)) 
167 |     error_row_col_exit(cxt->token_cxt->s, "Expecting \'{\' for initializer list\n");
168 |   token_t *list = token_alloc_type(T_INIT_LIST);
169 |   while(1) {
170 |     token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
171 |     if(la->type == T_RCPAREN) { token_consume_type(cxt->token_cxt, T_RCPAREN); break; }
172 |     if(la->type == T_LCPAREN) ast_append_child(list, parse_init_list(cxt));
173 |     else ast_append_child(list, parse_exp(cxt, PARSE_EXP_NOCOMMA));
174 |     // Consume the comma, and if not a comma then let the loop continue
175 |     if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_COMMA)
176 |       token_consume_type(cxt->token_cxt, T_COMMA);
177 |   }
178 |   return list;
179 | }
180 | 
181 | token_t *parse_stmt(parse_stmt_cxt_t *cxt) {
182 |   while(1) {
183 |     token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
184 |     switch(la->type) {
185 |       case T_DEFAULT: // Fall through
186 |       case T_CASE: return parse_lbl_stmt(cxt, la->type);
187 |       case T_IDENT: 
188 |         if(token_lookahead_notnull(cxt->token_cxt, 2)->type == T_COLON) return parse_lbl_stmt(cxt, la->type);
189 |         else return parse_exp_stmt(cxt);
190 |       case T_LCPAREN: return parse_comp_stmt(cxt);
191 |       case T_IF: return parse_if_stmt(cxt);
192 |       case T_SWITCH: return parse_switch_stmt(cxt);
193 |       case T_WHILE: return parse_while_stmt(cxt);
194 |       case T_DO: return parse_do_stmt(cxt);
195 |       case T_FOR: return parse_for_stmt(cxt);
196 |       case T_GOTO: return parse_goto_stmt(cxt);
197 |       case T_CONTINUE: return parse_brk_cont_stmt(cxt);
198 |       case T_BREAK: return parse_brk_cont_stmt(cxt);
199 |       case T_RETURN: return parse_return_stmt(cxt);
200 |       case T_SEMICOLON: token_consume_type(cxt->token_cxt, T_SEMICOLON); return token_get_empty();
201 |       default: return parse_exp_stmt(cxt);
202 |     }
203 |   }
204 | }


--------------------------------------------------------------------------------
/src/parse_stmt.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _PARSE_STMT_H
 3 | #define _PARSE_STMT_H
 4 | 
 5 | #include "parse_exp.h"
 6 | 
 7 | typedef parse_exp_cxt_t parse_stmt_cxt_t;
 8 | 
 9 | parse_stmt_cxt_t *parse_stmt_init(char *input);
10 | void parse_stmt_free(parse_stmt_cxt_t *cxt);
11 | token_t *parse_lbl_stmt(parse_stmt_cxt_t *cxt, token_type_t type);
12 | token_t *parse_comp_stmt(parse_stmt_cxt_t *cxt);
13 | token_t *parse_if_stmt(parse_stmt_cxt_t *cxt);
14 | token_t *parse_switch_stmt(parse_stmt_cxt_t *cxt);
15 | token_t *parse_while_stmt(parse_stmt_cxt_t *cxt);
16 | token_t *parse_do_stmt(parse_stmt_cxt_t *cxt);
17 | token_t *parse_for_stmt(parse_stmt_cxt_t *cxt);
18 | token_t *parse_goto_stmt(parse_stmt_cxt_t *cxt);
19 | token_t *parse_brk_cont_stmt(parse_stmt_cxt_t *cxt);
20 | token_t *parse_return_stmt(parse_stmt_cxt_t *cxt);
21 | token_t *parse_init_list(parse_stmt_cxt_t *cxt);
22 | token_t *parse_stmt(parse_stmt_cxt_t *cxt);
23 | 
24 | #endif


--------------------------------------------------------------------------------
/src/parse_test_src.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | parse_stmt_cxt_t *parse_stmt_init(char *input) { return parse_exp_init(input); }
  3 | void parse_stmt_free(parse_stmt_cxt_t *cxt) { parse_exp_free(cxt); }
  4 | 
  5 | // Return a labeled statement
  6 | token_t *parse_lbl_stmt(parse_stmt_cxt_t *cxt, token_type_t type) {
  7 |   if(type == T_IDENT) {
  8 |     token_t *token = token_alloc_type(T_LBL_STMT);
  9 |     ast_append_child(token, token_get_next(cxt->token_cxt));
 10 |     if(!token_consume_type(cxt->token_cxt, T_COLON)) assert(0); // Caller guarantees this
 11 |     return ast_append_child(token, parse_stmt(cxt));
 12 |   }  
 13 |   //token_t *token = token_get_next(cxt->token_cxt);
 14 |   if(type == T_CASE) ast_append_child(token, parse_exp(cxt, PARSE_EXP_NOCOLON));
 15 |   if(!token_consume_type(cxt->token_cxt, T_COLON))
 16 |     error_row_col_exit(token->offset, "Expecting \':\' for \"%s\" statement\n", token_symstr(token->type));
 17 |   return ast_append_child(token, parse_stmt(cxt));
 18 | }
 19 | 
 20 | // Returns an expression statement
 21 | token_t *parse_exp_stmt(parse_stmt_cxt_t *cxt) {
 22 |   token_t *token = ast_append_child(token_alloc_type(T_EXP_STMT), parse_exp(cxt, PARSE_EXP_ALLOWALL));
 23 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
 24 |     error_row_col_exit(cxt->token_cxt->s, "Expecting \';\' after expression statement\n");
 25 |   return token;
 26 | }
 27 | 
 28 | token_t *parse_comp_stmt(parse_stmt_cxt_t *cxt) {
 29 |   token_t *decl_list = token_alloc_type(T_DECL_STMT_LIST);
 30 |   token_t *stmt_list = token_alloc_type(T_STMT_LIST);
 31 |   token_t *root = ast_append_child(ast_append_child(token_alloc_type(T_COMP_STMT), decl_list), stmt_list);
 32 |   assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN);
 33 |   token_consume_type(cxt->token_cxt, T_LCPAREN);
 34 |   while(parse_decl_isbasetype(cxt, token_lookahead_notnull(cxt->token_cxt, 1))) { // Loop through lines
 35 |     token_t *decl_entry = ast_append_child(token_alloc_type(T_DECL_STMT_ENTRY), parse_decl_basetype(cxt));
 36 |     ast_append_child(decl_list, decl_entry);
 37 |     while(1) { // Loop through variables
 38 |       token_t *var = ast_append_child(token_alloc_type(T_DECL_STMT_VAR), parse_decl(cxt, PARSE_DECL_NOBASETYPE));
 39 |       ast_append_child(decl_entry, var);
 40 |       //token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
 41 |       if(la->type == T_ASSIGN) {
 42 |         token_consume_type(cxt->token_cxt, T_ASSIGN);
 43 |         if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN) ast_append_child(var, parse_init_list(cxt));
 44 |         else ast_append_child(var, parse_exp(cxt, PARSE_EXP_NOCOMMA));
 45 |         la = token_lookahead_notnull(cxt->token_cxt, 1);
 46 |       }
 47 |       if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); continue; }
 48 |       else if(la->type == T_SEMICOLON) { token_consume_type(cxt->token_cxt, T_SEMICOLON); break; }
 49 |       else { error_row_col_exit(la->offset, "Expecting \',\' or \';\' after variable declaration\n"); }
 50 |     }
 51 |   } // Then parse statement list
 52 |   while(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RCPAREN) ast_append_child(stmt_list, parse_stmt(cxt));
 53 |   token_consume_type(cxt->token_cxt, T_RCPAREN);
 54 |   return root;
 55 | }
 56 | 
 57 | 
 58 | token_t *parse_if_stmt(parse_stmt_cxt_t *cxt) {
 59 |   //assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_IF);
 60 |   token_t *if_stmt = token_get_next(cxt->token_cxt);
 61 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \'(\' after \"if\"\n");
 62 |   ast_append_child(if_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
 63 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \')\' after \"if\"\n");
 64 |   ast_append_child(if_stmt, parse_stmt(cxt));
 65 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_ELSE) {
 66 |     token_t *else_stmt = token_get_next(cxt->token_cxt);
 67 |     ast_append_child(if_stmt, else_stmt);
 68 |     ast_append_child(else_stmt, parse_stmt(cxt));
 69 |   }
 70 |   return if_stmt;
 71 | }
 72 | 
 73 | token_t *parse_for_stmt(parse_stmt_cxt_t *cxt) {
 74 |   //assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_FOR);
 75 |   token_t *for_stmt = token_get_next(cxt->token_cxt);
 76 |   if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \'(\' after \"for\"\n");
 77 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
 78 |   else ast_append_child(for_stmt, token_get_empty());
 79 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after first \"for\" expression\n");
 80 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
 81 |   else ast_append_child(for_stmt, token_get_empty());
 82 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after second \"for\" expression\n");
 83 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RPAREN) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL));
 84 |   else ast_append_child(for_stmt, token_get_empty());
 85 |   if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \')\' after \"for\"\n");
 86 |   ast_append_child(for_stmt, parse_stmt(cxt));
 87 |   return for_stmt;
 88 | }
 89 | 
 90 | token_t *parse_goto_stmt(parse_stmt_cxt_t *cxt) {
 91 |   token_t *token = token_get_next(cxt->token_cxt);
 92 |   assert(token->type == T_GOTO);
 93 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_IDENT)
 94 |     error_row_col_exit(token->offset, "Expecting a label for \"goto\" statement\n");
 95 |   ast_append_child(token, token_get_next(cxt->token_cxt));
 96 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
 97 |     error_row_col_exit(token->offset, "Expecting \';\' after \"goto\" statement\n");
 98 |   return token;
 99 | }
100 | 
101 | token_t *parse_brk_cont_stmt(parse_stmt_cxt_t *cxt) {
102 |   token_t *token = token_get_next(cxt->token_cxt);
103 |   assert(token->type == T_BREAK || token->type == T_CONTINUE);
104 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
105 |     error_row_col_exit(token->offset, "Expecting \';\' after \"%s\" statement\n", token_symstr(token->type));
106 |   return token;
107 | }
108 | 
109 | token_t *parse_return_stmt(parse_stmt_cxt_t *cxt) {
110 |   token_t *token = token_get_next(cxt->token_cxt);
111 |   assert(token->type == T_RETURN);
112 |   if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON)
113 |     ast_append_child(token, parse_exp(cxt, PARSE_EXP_ALLOWALL));
114 |   if(!token_consume_type(cxt->token_cxt, T_SEMICOLON))
115 |     error_row_col_exit(token->offset, "Expecting \';\' after \"return\" statement\n");
116 |   return token;
117 | }
118 | 
119 | // Returns a initializer list, { expr, expr, ..., expr } where expr could be nested initializer list
120 | token_t *parse_init_list(parse_stmt_cxt_t *cxt) {
121 |   if(!token_consume_type(cxt->token_cxt, T_LCPAREN)) 
122 |     error_row_col_exit(cxt->token_cxt->s, "Expecting \'{\' for initializer list\n");
123 |   //token_t *list = token_alloc_type(T_INIT_LIST);
124 |   while(1) {
125 |     token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
126 |     if(la->type == T_RCPAREN) { token_consume_type(cxt->token_cxt, T_RCPAREN); break; }
127 |     if(la->type == T_LCPAREN) ast_append_child(list, parse_init_list(cxt));
128 |     else ast_append_child(list, parse_exp(cxt, PARSE_EXP_NOCOMMA));
129 |     // Consume the comma, and if not a comma then let the loop continue
130 |     if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_COMMA)
131 |       token_consume_type(cxt->token_cxt, T_COMMA);
132 |   }
133 |   return list;
134 | }
135 | 
136 | token_t *parse_stmt(parse_stmt_cxt_t *cxt) {
137 |   while(1) {
138 |     token_t *la = token_lookahead_notnull(cxt->token_cxt, 1);
139 |     switch(la->type) {
140 |       case T_DEFAULT: // Fall through
141 |       case T_CASE: return parse_lbl_stmt(cxt, la->type);
142 |       case T_IDENT: 
143 |         if(token_lookahead_notnull(cxt->token_cxt, 2)->type == T_COLON) return parse_lbl_stmt(cxt, la->type);
144 |         else return parse_exp_stmt(cxt);
145 |       case T_LCPAREN: return parse_comp_stmt(cxt);
146 |       case T_IF: return parse_if_stmt(cxt);
147 |       case T_SWITCH: return parse_switch_stmt(cxt);
148 |       case T_WHILE: return parse_while_stmt(cxt);
149 |       case T_DO: return parse_do_stmt(cxt);
150 |       case T_FOR: return parse_for_stmt(cxt);
151 |       case T_GOTO: return parse_goto_stmt(cxt);
152 |       case T_CONTINUE: return parse_brk_cont_stmt(cxt);
153 |       case T_BREAK: return parse_brk_cont_stmt(cxt);
154 |       case T_RETURN: return parse_return_stmt(cxt);
155 |       case T_SEMICOLON: token_consume_type(cxt->token_cxt, T_SEMICOLON); return token_get_empty();
156 |       default: return parse_exp_stmt(cxt);
157 |     }
158 |   }
159 | }


--------------------------------------------------------------------------------
/src/python/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: slr-gen lr-parse
 3 | 
 4 | slr-gen:
 5 | 	python ./syntax.py --slr ./krc-lr.syntax --dump-file=./krc-lr.table
 6 | 
 7 | lr-gen:
 8 | 	python ./syntax.py --lr1 ./krc-lr.syntax --dump-file=./krc-lr.table
 9 | 
10 | lalr-gen:
11 | 	python ./syntax.py --lalr ./krc-lr.syntax --dump-file=./krc-lr.tabl
12 | 
13 | earley-parse:
14 | 	python ./syntax.py --earley ./krc-earley.syntax --token-file=./lex_test.c
15 | 
16 | lr-parse:
17 | 	python ./syntax.py --lr ./krc-lr.table --token-file=./lex_test.c
18 | 


--------------------------------------------------------------------------------
/src/python/basic_type.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # basic_type.py - This file defines primitives types for the language. We use basic types
 3 | #                 to build more complicated types (e.g. arrays, structs, unions, etc.) and also
 4 | #                 to perform static evaluation of expressions.
 5 | #
 6 | # We need to support basic types for static evaluation, i.e. integer types that have different length
 7 | #
 8 | 
 9 | #####################################################################
10 | # class BaseType
11 | #####################################################################
12 | 
13 | class BaseType:
14 |     """
15 |     This class is the common interface for any type. It implements type system's most
16 |     fundamental functionality such as sizeof() operator.
17 |     """
18 |     def __init__(self):
19 |         """
20 |         Initialize the base type object
21 |         
22 |         :param length: Number of bytes this type occupies. Note that this is the real
23 |                        storage requirement, and does not contain padding value
24 |         """
25 |         return
26 | 
27 |     def sizeof(self):
28 |         """
29 |         Returns the size of the type. This must be overridden to avoid exception
30 |         :return: None
31 |         """
32 |         del self
33 |         raise RuntimeError("Sizeof operator of a base type must be overridden")
34 | 
35 | #####################################################################
36 | # class IntegerType
37 | #####################################################################
38 | 
39 | class IntegerType(BaseType):
40 |     """
41 |     This class represents integer types of arbitrary precision. The length of an integer
42 |     type is an attribute of the class rather than a different class. This makes adding
43 |     more integer types easier
44 |     """
45 |     def __init__(self, length, signed):
46 |         """
47 |         Initialize the integer type
48 |         
49 |         :param length: The byte length of the integer type 
50 |         :param signed: Boolean flag to indicate whether the type if signed or not
51 |         """
52 |         # Calls the base class constructor first
53 |         super(self.__class__, self).__init__()
54 |         # This is the size of the integer type
55 |         self.length = length
56 |         # Whether the integer type is signed or not
57 |         self.signed = signed
58 | 
59 |         return
60 | 
61 |     def sizeof(self):
62 |         """
63 |         Returns the size of the integer type
64 |         :return: int
65 |         """
66 |         return length
67 | 
68 | #####################################################################
69 | # class StaticExpression
70 | #####################################################################
71 | 
72 | class StaticExpression:
73 |     """
74 |     This class is used to evaluate static expressions. For static expression we only allow
75 |     constant integer value and operators, or sizeof() operator with a type.
76 |     """
77 |     def __init__(self):
78 |         """
79 |         Prevents initializing this class
80 |         """
81 |         raise RuntimeError("Please do not instantiate class StaticExpression")
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/src/python/krc.syntax:
--------------------------------------------------------------------------------
  1 | 
  2 | constant-expression:
  3 |     conditional-expression
  4 | 
  5 | expression:
  6 |     assignment-expression
  7 |     expression T_COMMA assignment-expression
  8 | 
  9 | assignment-expression:
 10 |     conditional-expression
 11 |     assignment-expression assignment-operator conditional-expression
 12 |     # This will cause a FIRST set conflict
 13 |     #unary-expression assignment-operator assignment-expression
 14 | 
 15 | assignment-operator:
 16 |     T_ASSIGN
 17 |     T_PLUS_ASSIGN
 18 |     T_MINUS_ASSIGN
 19 |     T_STAR_ASSIGN
 20 |     T_DIV_ASSIGN
 21 |     T_MOD_ASSIGN
 22 |     T_LSHIFT_ASSIGN
 23 |     T_RSHIFT_ASSIGN
 24 |     T_AMPERSAND_ASSIGN
 25 |     T_BITXOR_ASSIGN
 26 |     T_BITOR_ASSIGN
 27 |   
 28 | conditional-expression:
 29 |     logical-OR-expression
 30 |     logical-OR-expression T_QMARK expression T_COLON conditional-expression
 31 | 
 32 | logical-OR-expression:
 33 |     logical-AND-expression
 34 |     logical-OR-expression T_OR logical-AND-expression
 35 | 
 36 | logical-AND-expression:
 37 |     inclusive-OR-expression
 38 |     logical-AND-expression T_AND inclusive-OR-expression
 39 | 
 40 | inclusive-OR-expression:
 41 |     exclusive-OR-expression
 42 |     inclusive-OR-expression T_BIT_OR exclusive-OR-expression
 43 | 
 44 | exclusive-OR-expression:
 45 |     AND-expression
 46 |     exclusive-OR-expression T_BIT_XOR AND-expression
 47 | 
 48 | AND-expression:
 49 |     equality-expression
 50 |     AND-expression T_BIT_AND equality-expression
 51 | 
 52 | equality-expression:
 53 |     relational-expression
 54 |     equality-expression T_EQ relational-expression
 55 |     equality-expression T_NOTEQ relational-expression
 56 | 
 57 | relational-expression:
 58 |     shift-expression
 59 |     relational-expression T_LESS shift-expression
 60 |     relational-expression T_LESSEQ shift-expression
 61 |     relational-expression T_GREATER shift-expression
 62 |     relational-expression T_GREATEREQ shift-expression
 63 | 
 64 | shift-expression:
 65 |     additive-expression
 66 |     shift-expression T_LSHIFT additive-expression
 67 |     shift-expression T_RSHIFT additive-expression
 68 | 
 69 | additive-expression:
 70 |     multiplicative-expression
 71 |     additive-expression T_PLUS multiplicative-expression
 72 |     additive-expression T_MINUS multiplicative-expression
 73 | 
 74 | multiplicative-expression:
 75 |     cast-expression
 76 |     multiplicative-expression T_STAR cast-expression
 77 |     multiplicative-expression T_DIV cast-expression
 78 |     multiplicative-expression T_MOD cast-expression
 79 | 
 80 | cast-expression:
 81 |     unary-expression
 82 |     T_LESS type-name T_GREATER cast-expression
 83 | 
 84 | unary-operator:
 85 |     T_AMPERSAND
 86 |     T_STAR
 87 |     T_PLUS
 88 |     T_MINUS
 89 |     T_BITNOT
 90 |     T_NOT
 91 | 
 92 | unary-expression:
 93 |     postfix-expression
 94 |     T_INC unary-expression
 95 |     T_DEC unary-expression
 96 |     unary-operator cast-expression
 97 |     T_SIZEOF unary-expression
 98 |     T_SIZEOF T_LESS type-name T_GREATER
 99 | 
100 | argument-expression-list:
101 |     assignment-expression
102 |     argument-expression-list T_COMMA assignment-expression
103 | 
104 | postfix-expression:
105 |     primary-expression
106 |     postfix-expression T_LSPAREN expression T_RSPAREN
107 |     postfix-expression T_LPAREN T_RPAREN
108 |     postfix-expression T_LPAREN argument-expression-list T_RPAREN
109 |     postfix-expression T_DOT T_IDENT
110 |     postfix-expression T_ARROW T_IDENT
111 |     postfix-expression T_INC
112 |     postfix-expression T_DEC
113 | 
114 | primary-expression:
115 |     T_IDENT
116 |     T_INT_CONST
117 |     T_CHAR_CONST
118 |     T_STRING_CONST
119 |     T_LPAREN expression T_RPAREN
120 | 
121 | 
122 | ##################################
123 | # The following is the type system
124 | ##################################
125 | 
126 | declaration:
127 |     declaration-specifiers T_SEMICOLON
128 |     declaration-specifiers init-declarator-list T_SEMICOLON
129 | 
130 | declaration-specifiers:
131 |     storage-class-specifier
132 |     type-specifier
133 |     type-qualifier
134 |     storage-class-specifier declaration-specifiers
135 |     type-specifier          declaration-specifiers
136 |     type-qualifier          declaration-specifiers
137 | 
138 | init-declarator-list:
139 |     init-declarator
140 |     init-declarator-list T_COMMA init-declarator
141 | 
142 | init-declarator:
143 |     declarator
144 |     declarator T_ASSIGN initializer
145 | 
146 | storage-class-specifier:
147 |     T_TYPEDEF
148 |     T_EXTERN
149 |     T_STATIC
150 |     T_AUTO
151 |     T_REGISTER
152 | 
153 | type-specifier:
154 |     T_VOID
155 |     T_CHAR
156 |     T_SHORT
157 |     T_INT
158 |     T_LONG
159 |     T_FLOAT
160 |     T_DOUBLE
161 |     T_SIGNED
162 |     T_UNSIGNED
163 |     struct-or-union-specifier
164 |     enum-specifier
165 | 
166 | struct-or-union-specifier:
167 |     struct-or-union T_LCPAREN struct-declaration-list T_RCPAREN
168 |     struct-or-union T_IDENT T_LCPAREN struct-declaration-list T_RCPAREN
169 |     struct-or-union T_IDENT
170 | 
171 | struct-or-union:
172 |     T_STRUCT
173 |     T_UNION
174 | 
175 | struct-declaration-list:
176 |     struct-declaration
177 |     struct-declaration-list struct-declaration
178 | 
179 | struct-declaration:
180 |     specifier-qualifier-list struct-declarator-list T_SEMICOLON
181 | 
182 | specifier-qualifier-list:
183 |     type-specifier
184 |     type-qualifier
185 |     type-specifier specifier-qualifier-list
186 |     type-qualifier specifier-qualifier-list
187 | 
188 | struct-declarator-list:
189 |     struct-declarator
190 |     struct-declarator-list T_COMMA struct-declarator
191 | 
192 | struct-declarator:
193 |     declarator
194 |     T_COLON constant-expression
195 |     declarator T_COLON constant-expression
196 | 
197 | enum-specifier:
198 |     enum T_LCPAREN enumerator-list T_RCPAREN
199 |     enum T_IDENT T_LCPAREN enumerator-list T_RCPAREN
200 |     enum T_IDENT
201 | 
202 | enumerator-list:
203 |     enumerator
204 |     enumerator-list T_COMMA enumerator
205 | 
206 | enumerator:
207 |     enumeration-constant
208 |     enumeration-constant A_ASSIGN constant-expression
209 | 
210 | enumeration-constant:
211 |     T_IDENT
212 | 
213 | type-qualifier:
214 |     T_CONST
215 |     T_VOLATILE
216 | 
217 | declarator:
218 |     direct-declarator
219 |     pointer direct-declarator
220 | 
221 | direct-declarator:
222 |     T_IDENT
223 |     T_LPAREN declarator T_RPAREN
224 |     direct-declarator T_LSPAREN T_RSPAREN
225 |     direct-declarator T_LSPAREN constant-expression T_RSPAREN
226 |     direct-declarator T_LPAREN T_RPAREN
227 |     direct-declarator T_LPAREN parameter-type-list T_RPAREN
228 |     direct-declarator T_LPAREN identifier-list T_RPAREN
229 | 
230 | pointer:
231 |      T_STAR
232 |      T_STAR pointer
233 |      T_STAR type-qualifier-list
234 |      T_STAR type-qualifier-list pointer
235 | 
236 | type-qualifier-list:
237 |     type-qualifier
238 |     type-qualifier-list type-qualifier
239 | 
240 | parameter-type-list:
241 |     parameter-list
242 |     #TODO: DO NOT SUPPORT VARARG
243 |     #parameter-list , ...
244 | 
245 | parameter-list:
246 |     parameter-declaration
247 |     parameter-list T_COMMA parameter-declaration
248 | 
249 | parameter-declaration:
250 |     # We do not allow abstract type here, otherwise it conflicts
251 |     # with the declarator
252 |     declaration-specifiers declarator
253 |     declaration-specifiers
254 |     # Do not allow abstract type
255 |     #declaration-specifiers abstract-declarator
256 | 
257 | identifier-list:
258 |     T_IDENT
259 |     identifier-list T_COMMA T_IDENT
260 | 
261 | # This is used in cast expression or sizeof operator
262 | # Since we use < and > pair to denote abstract type in those
263 | # two cases, the abstract declarator could be used
264 | # In function declarations we could not rely on abstract declarator
265 | type-name:
266 |     specifier-qualifier-list
267 |     # We could use abstract declarator in type name for type casting
268 |     # because it is easier for us to specify an abstract type
269 |     # without giving a name
270 |     specifier-qualifier-list abstract-declarator
271 | 
272 | abstract-declarator:
273 |     pointer
274 |     direct-abstract-declarator
275 |     pointer direct-abstract-declarator
276 | 
277 | direct-abstract-declarator:
278 |     T_LPAREN abstract-declarator T_RPAREN
279 |     T_LSPAREN T_RSPAREN
280 |     T_LSPAREN constant-expression T_RSPAREN
281 |     T_LPAREN T_RPAREN
282 |     T_LPAREN parameter-type-list T_RPAREN
283 |     direct-abstract-declarator T_LSPAREN T_RSPAREN
284 |     direct-abstract-declarator T_LSPAREN constant-expression T_RSPAREN
285 |     direct-abstract-declarator T_LPAREN T_RPAREN
286 |     direct-abstract-declarator T_LPAREN parameter-type-list T_RPAREN
287 | 
288 | initializer:
289 |     assignment-expression
290 |     T_LCPAREN initializer-list T_RCPAREN
291 |     # Do not allow extra comma
292 |     #T_LCPAREN initializer-list T_COMMA T_RCPAREN
293 | 
294 | initializer-list:
295 |     initializer
296 |     initializer-list T_COMMA initializer
297 | 


--------------------------------------------------------------------------------
/src/python/lex_test.c:
--------------------------------------------------------------------------------
 1 | 
 2 | // Typedef must specify a name
 3 | //typedef 
 4 | const static register enum enum_struct {
 5 |   A = 1,
 6 |   B = 2,
 7 |   C = 3
 8 | };
 9 | 
10 | static const volatile register int aaa = 0x012345678ABCDEFL;
11 | 
12 | void f();
13 | 
14 | /*
15 |  * main() - The entry point of the program
16 |  */
17 | // Note that declaration list followed by function header is not supported
18 | int main(int argc, char **argv, typedef int what, ...) /* int x, y, z; */ {
19 |   // This is the declaration without an identifier (WTF do we allow this?)
20 |   //static const register long; 
21 |   
22 |   static const volatile register int * const * (*xyz)(int(*)(), long *, char()) = C;
23 |   long x = 1 & xyz;
24 |   void *c;
25 |   
26 |   // This struct is used to store data
27 |   static typedef struct struct_type {
28 |     int a;
29 |     char b : 20;   // 20 bit field
30 |     long c;
31 |   } bb, cc;
32 |   
33 |   int long register typedef ;
34 |   
35 |   // The following three tests whether we could resolve amiguity
36 |   // between expression and declaration
37 |   aa * x;
38 |   {
39 |     typedef int *bbb, (*ccc)(void), aa, (*ddd)(aa);
40 |     aa * x;
41 |     (aa)x;
42 |   }
43 |   
44 |   aa3 * x;
45 |   (aa);
46 |   printf("Hello, world!\n");
47 |   
48 |   a.a = 20UL;
49 |   a.b = 0x12345 >> (5 & 0xFFFFFFFF);
50 |   a.c = 0777;
51 |   b.b = '\n';
52 |   
53 |   (aa);
54 |   
55 |   return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/python/symbol_table.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # symbol_table.py - This file defines the symbol table
  3 | #                   for both types and identifiers
  4 | #
  5 | 
  6 | from common import dbg_printf, DebugRunTestCaseBase, Argv, TestNode
  7 | 
  8 | #####################################################################
  9 | # class Scope
 10 | #####################################################################
 11 | 
 12 | class Scope:
 13 |     """
 14 |     This class represents a scope. It includes a struct
 15 |     table that maps struct names to types; it also includes
 16 |     a union and typedef table that do the same. Finally it
 17 |     also has a identifier table which maps identifiers to
 18 |     their types
 19 |     """
 20 | 
 21 |     # The following constants defines the index of their
 22 |     # corresponding tables. Searching routine uses these
 23 |     # indices to access different tables rather than
 24 |     # implementing a separate routine for each table
 25 |     TABLE_TYPE_INDEX_BEGIN = 0
 26 |     TABLE_TYPE_STRUCT = 0
 27 |     TABLE_TYPE_UNION = 1
 28 |     TABLE_TYPE_TYPEDEF = 2
 29 |     TABLE_TYPE_IDENT = 3
 30 |     TABLE_TYPE_INDEX_END = 3
 31 | 
 32 |     # The following defines the type of the scope
 33 |     SCOPE_TYPE_INDEX_BEGIN = 100
 34 |     # This is the global scope (top level scope)
 35 |     SCOPE_TYPE_GLOBAL = 100
 36 |     # Functional level
 37 |     SCOPE_TYPE_FUNCTION = 101
 38 |     # Local scope inside a function
 39 |     SCOPE_TYPE_LOCAL = 102
 40 |     # Inside a struct or union definition because name conflict
 41 |     # can still occur at this level
 42 |     SCOPE_TYPE_STRUCT = 103
 43 |     SCOPE_TYPE_INDEX_END = 103
 44 | 
 45 |     def __init__(self, scope_type):
 46 |         """
 47 |         Initialize all mapping structures
 48 |         
 49 |         :param scope_type: Enum constants defined above
 50 |         """
 51 |         # We put them in a list such that we could
 52 |         # use an index to access them rather
 53 |         # than implement different routines for accessing
 54 |         # different tables
 55 |         self.symbols = [{}, {}, {}, {}]
 56 | 
 57 |         # The scope type must be a valid one
 58 |         assert(self.SCOPE_TYPE_INDEX_BEGIN <=
 59 |                scope_type <=
 60 |                self.SCOPE_TYPE_INDEX_END)
 61 | 
 62 |         # Save the type of the scope for later inspection
 63 |         self.scope_type = scope_type
 64 | 
 65 |         return
 66 | 
 67 |     def get_table(self, t):
 68 |         """
 69 |         Return a table given a type
 70 | 
 71 |         :param t: The type constant defined above
 72 |         :return: the table instance
 73 |         """
 74 |         assert(Scope.TABLE_TYPE_INDEX_BEGIN <=
 75 |                t <=
 76 |                Scope.TABLE_TYPE_INDEX_END)
 77 | 
 78 |         return self.symbols[t]
 79 | 
 80 |     def get_type(self):
 81 |         """
 82 |         This function returns the type of the current scope
 83 |         :return: Scope type constant
 84 |         """
 85 |         return self.scope_type
 86 | 
 87 |     def __getitem__(self, item):
 88 |         """
 89 |         Fetches an item from the scope's symbol table. The item
 90 |         is a tuple specifying the dict and the name
 91 | 
 92 |         :param item: Tuple(type, name)
 93 |         :return: Item stored in the table
 94 |         """
 95 |         t = self.get_table(item[0])
 96 |         return t[item[1]]
 97 | 
 98 |     def __contains__(self, item):
 99 |         """
100 |         Same as __getitem__ except that it checks for membership
101 | 
102 |         :param item: Tuple(type, name)
103 |         :return: bool
104 |         """
105 |         t = self.get_table(item[0])
106 |         return item[1] in t
107 | 
108 |     def __setitem__(self, key, value):
109 |         """
110 |         Same as __getitem__ except that it sets a value with the
111 |         given type and name
112 | 
113 |         :param key: Tuple(type, name)
114 |         :param value: Any value
115 |         :return: None
116 |         """
117 |         t = self.get_table(key[0])
118 |         t[key[1]] = value
119 |         return
120 | 
121 |     def get(self, key, ret):
122 |         """
123 |         This one mimics the behavior of dict.get() which returns
124 |         the alternative value if the desired value does not exist
125 | 
126 |         :param key: Tuple(type, name)
127 |         :param ret: Alternative value if the name does not exist
128 |         :return: Any value
129 |         """
130 |         t = self.get_table(key[0])
131 |         return t.get(key[1], ret)
132 | 
133 | #####################################################################
134 | # class SymbolTable
135 | #####################################################################
136 | 
137 | class SymbolTable:
138 |     """
139 |     This is the representation of a global symbol table
140 |     which holds a stack of scopes. Each scope has its own
141 |     symbol definitions. When we search names in the symbol
142 |     table, we always start from the topmost scope and descend
143 |     to the bottommost, which is the global scope.
144 |     """
145 |     def __init__(self):
146 |         """
147 |         Initialize the symbol table's stack
148 |         """
149 |         # This is the stack of scopes
150 |         # By default there is a global scope at initialization
151 |         # and the type is set as global scope type
152 |         self.scope_stack = [Scope(Scope.SCOPE_TYPE_GLOBAL)]
153 | 
154 |         return
155 | 
156 |     def enter_scope(self, scope_type):
157 |         """
158 |         Enters a new scope by pushing a new scope object into the
159 |         stack of tables
160 | 
161 |         :param scope_type: The type of the scope defined in class Scope
162 |         :return: None
163 |         """
164 |         # Use the given type to define a new scope
165 |         self.scope_stack.append(Scope(scope_type))
166 | 
167 |         return
168 | 
169 |     def leave_scope(self):
170 |         """
171 |         Leave the current scope by popping from the end of the list
172 | 
173 |         :return: None
174 |         """
175 |         assert(len(self.scope_stack) != 0)
176 |         self.scope_stack.pop()
177 |         return
178 | 
179 |     def get_current_scope_type(self):
180 |         """
181 |         This function returns the type of the current (i.e. topmost) scope
182 |         
183 |         :return: scope type constant
184 |         """
185 |         assert(len(self.scope_stack) != 0)
186 | 
187 |         return self.scope_stack[-1].get_type()
188 | 
189 |     def get_depth(self):
190 |         """
191 |         Get the current depth of the symbol table (i.e. the length of the list)
192 |         Note that depth starts from 1
193 |         
194 |         :return: int 
195 |         """
196 |         return len(self.scope_stack)
197 | 
198 |     def get(self, key, ret):
199 |         """
200 |         Searches for a given name in the given type. If we could not
201 |         find the name in all scopes then return the alternative
202 | 
203 |         :param key: Tuple(type, name)
204 |         :param ret: Alternative value if name not found
205 |         :return: Any object
206 |         """
207 |         i = len(self.scope_stack) - 1
208 |         while i >= 0:
209 |             scope = self.scope_stack[i]
210 |             # If the key exists then return the value
211 |             # we do not need get() here since it is
212 |             # guaranteed to exist
213 |             if key in scope:
214 |                 return scope[key]
215 |             else:
216 |                 i -= 1
217 | 
218 |         # If we could not find the value in all scopes
219 |         # then just return the alternative value
220 |         return ret
221 | 
222 |     def __contains__(self, item):
223 |         """
224 |         Checks whether a value exists in the symbol table
225 | 
226 |         :param item: Tuple(type, name)
227 |         :return: bool
228 |         """
229 |         i = len(self.scope_stack) - 1
230 |         while i >= 0:
231 |             scope = self.scope_stack[i]
232 |             if item in scope:
233 |                 return True
234 |             else:
235 |                 i -= 1
236 | 
237 |         return False
238 | 
239 |     def __getitem__(self, item):
240 |         """
241 |         Returns an item in all scopes if there is one. Note that
242 |         for this function if the name is not defined for all
243 |         scopes we need to assert False, and the caller should
244 |         avoid that
245 | 
246 |         :param item: Tuple(type, name)
247 |         :return: Any object
248 |         """
249 |         i = len(self.scope_stack) - 1
250 |         while i >= 0:
251 |             scope = self.scope_stack[i]
252 |             if item in scope:
253 |                 return scope[item]
254 |             else:
255 |                 i -= 1
256 | 
257 |         assert False
258 | 
259 |     def __setitem__(self, key, value):
260 |         """
261 |         This function sets the name in the topmost
262 |         scope because that is how scope works
263 | 
264 |         :param key: Tuple(type, name)
265 |         :param value: Any object
266 |         :return: None
267 |         """
268 |         assert(len(self.scope_stack) != 0)
269 |         # Use index = -1 to address the topmost scope
270 |         self.scope_stack[-1][key] = value
271 |         return
272 | 
273 | #####################################################################
274 | # Unit test cases
275 | #####################################################################
276 | 
277 | class ScopeTestCase(DebugRunTestCaseBase):
278 |     """
279 |     Unit tests for symbol table
280 |     """
281 |     def __init__(self):
282 |         """
283 |         This function calls the base class constructor
284 |         """
285 |         DebugRunTestCaseBase.__init__(self)
286 | 
287 |         # This is required for running the test case
288 |         argv = Argv()
289 |         # This calls the base class method and hence runs the test case
290 |         self.run_tests(argv)
291 | 
292 |         return
293 | 
294 |     @staticmethod
295 |     @TestNode()
296 |     def test_basic(argv, **kwargs):
297 |         """
298 |         This function tests whether basic symbol table works
299 |         
300 |         :param argv: Unused argv
301 |         :param kwargs: Keyword arguments
302 |         :return: None 
303 |         """
304 |         del argv
305 |         del kwargs
306 | 
307 |         # Build a symbol table, and the type must be global
308 |         st = SymbolTable()
309 |         assert(st.get_current_scope_type() == Scope.SCOPE_TYPE_GLOBAL)
310 |         assert(st.get_depth() == 1)
311 | 
312 |         st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] = 123
313 |         st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] = 456
314 | 
315 |         # Get the value on the same level
316 |         assert(st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] == 123)
317 |         assert(st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] == 456)
318 | 
319 |         st.enter_scope(Scope.SCOPE_TYPE_FUNCTION)
320 |         st[(Scope.TABLE_TYPE_STRUCT, "Functional Struct")] = 789
321 | 
322 |         assert (st.get_current_scope_type() == Scope.SCOPE_TYPE_FUNCTION)
323 |         assert(st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] == 123)
324 |         assert(st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] == 456)
325 |         assert(st[(Scope.TABLE_TYPE_STRUCT, "Functional Struct")] == 789)
326 | 
327 |         st.enter_scope(Scope.SCOPE_TYPE_LOCAL)
328 | 
329 |         assert(st.get_current_scope_type() == Scope.SCOPE_TYPE_LOCAL)
330 |         assert(st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] == 123)
331 |         assert(st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] == 456)
332 |         assert(st[(Scope.TABLE_TYPE_STRUCT, "Functional Struct")] == 789)
333 |         assert((Scope.TABLE_TYPE_IDENT, "Global Ident") in st)
334 |         assert((Scope.TABLE_TYPE_IDENT, "Global Ident 2") not in st)
335 |         assert(st.get((Scope.TABLE_TYPE_IDENT, "Global Ident"), None) == 123)
336 |         assert (st.get((Scope.TABLE_TYPE_IDENT, "Global Ident 2"), None) is None)
337 | 
338 |         st.leave_scope()
339 |         st.leave_scope()
340 |         st.leave_scope()
341 |         assert(st.get_depth() == 0)
342 | 
343 |         try:
344 |             caught = False
345 |             st.leave_scope()
346 |         except AssertionError:
347 |             caught = True
348 | 
349 |         assert(caught is True)
350 | 
351 |         return
352 | 
353 | # Finally, run the test case if this file is invoked
354 | if __name__ == "__main__":
355 |     ScopeTestCase()
356 | 
357 | 
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/src/python/token_list.txt:
--------------------------------------------------------------------------------
  1 | TokenType = T_INT                   ; Other token type
  2 | TokenType = T_IDENT                 ; Identifier = main
  3 | TokenType = T_LPAREN                ; Other token type
  4 | TokenType = T_INT                   ; Other token type
  5 | TokenType = T_IDENT                 ; Identifier = argv
  6 | TokenType = T_COMMA                 ; Other token type
  7 | TokenType = T_CHAR                  ; Other token type
  8 | TokenType = T_STAR                  ; Other token type
  9 | TokenType = T_STAR                  ; Other token type
 10 | TokenType = T_IDENT                 ; Identifier = argv
 11 | TokenType = T_COMMA                 ; Other token type
 12 | TokenType = T_ELLIPSIS              ; Other token type
 13 | TokenType = T_RPAREN                ; Other token type
 14 | TokenType = T_LCPAREN               ; Other token type
 15 | TokenType = T_INT                   ; Other token type
 16 | TokenType = T_LPAREN                ; Other token type
 17 | TokenType = T_STAR                  ; Other token type
 18 | TokenType = T_IDENT                 ; Identifier = q
 19 | TokenType = T_RPAREN                ; Other token type
 20 | TokenType = T_LPAREN                ; Other token type
 21 | TokenType = T_INT                   ; Other token type
 22 | TokenType = T_COMMA                 ; Other token type
 23 | TokenType = T_CHAR                  ; Other token type
 24 | TokenType = T_STAR                  ; Other token type
 25 | TokenType = T_RPAREN                ; Other token type
 26 | TokenType = T_COMMA                 ; Other token type
 27 | TokenType = T_STAR                  ; Other token type
 28 | TokenType = T_IDENT                 ; Identifier = p
 29 | TokenType = T_ASSIGN                ; Other token type
 30 | TokenType = T_INT_CONST             ; Int const = 5
 31 | TokenType = T_SEMICOLON             ; Other token type
 32 | TokenType = T_STRUCT                ; Other token type
 33 | TokenType = T_LCPAREN               ; Other token type
 34 | TokenType = T_INT                   ; Other token type
 35 | TokenType = T_IDENT                 ; Identifier = a
 36 | TokenType = T_SEMICOLON             ; Other token type
 37 | TokenType = T_LONG                  ; Other token type
 38 | TokenType = T_IDENT                 ; Identifier = b
 39 | TokenType = T_SEMICOLON             ; Other token type
 40 | TokenType = T_SHORT                 ; Other token type
 41 | TokenType = T_IDENT                 ; Identifier = c
 42 | TokenType = T_SEMICOLON             ; Other token type
 43 | TokenType = T_RCPAREN               ; Other token type
 44 | TokenType = T_IDENT                 ; Identifier = stat
 45 | TokenType = T_SEMICOLON             ; Other token type
 46 | TokenType = T_IDENT                 ; Identifier = p
 47 | TokenType = T_ASSIGN                ; Other token type
 48 | TokenType = T_IDENT                 ; Identifier = a
 49 | TokenType = T_PLUS                  ; Other token type
 50 | TokenType = T_INT_CONST             ; Int const = 2
 51 | TokenType = T_GREATER               ; Other token type
 52 | TokenType = T_STAR                  ; Other token type
 53 | TokenType = T_IDENT                 ; Identifier = b
 54 | TokenType = T_LSPAREN               ; Other token type
 55 | TokenType = T_INT_CONST             ; Int const = 1
 56 | TokenType = T_RSPAREN               ; Other token type
 57 | TokenType = T_QMARK                 ; Other token type
 58 | TokenType = T_IDENT                 ; Identifier = c
 59 | TokenType = T_COLON                 ; Other token type
 60 | TokenType = T_INT_CONST             ; Int const = 2080374784
 61 | TokenType = T_SEMICOLON             ; Other token type
 62 | TokenType = T_IF                    ; Other token type
 63 | TokenType = T_LPAREN                ; Other token type
 64 | TokenType = T_IDENT                 ; Identifier = i
 65 | TokenType = T_LESS                  ; Other token type
 66 | TokenType = T_INT_CONST             ; Int const = 10
 67 | TokenType = T_RPAREN                ; Other token type
 68 | TokenType = T_IDENT                 ; Identifier = i
 69 | TokenType = T_ASSIGN                ; Other token type
 70 | TokenType = T_INT_CONST             ; Int const = 100
 71 | TokenType = T_SEMICOLON             ; Other token type
 72 | TokenType = T_ELSE                  ; Other token type
 73 | TokenType = T_IDENT                 ; Identifier = i
 74 | TokenType = T_STAR_ASSIGN           ; Other token type
 75 | TokenType = T_INT_CONST             ; Int const = 2
 76 | TokenType = T_SEMICOLON             ; Other token type
 77 | TokenType = T_FOR                   ; Other token type
 78 | TokenType = T_LPAREN                ; Other token type
 79 | TokenType = T_SEMICOLON             ; Other token type
 80 | TokenType = T_IDENT                 ; Identifier = i
 81 | TokenType = T_LESS                  ; Other token type
 82 | TokenType = T_INT_CONST             ; Int const = 20
 83 | TokenType = T_SEMICOLON             ; Other token type
 84 | TokenType = T_RPAREN                ; Other token type
 85 | TokenType = T_LCPAREN               ; Other token type
 86 | TokenType = T_IDENT                 ; Identifier = i
 87 | TokenType = T_PLUS                  ; Other token type
 88 | TokenType = T_INT_CONST             ; Int const = 2
 89 | TokenType = T_SEMICOLON             ; Other token type
 90 | TokenType = T_RCPAREN               ; Other token type
 91 | TokenType = T_STAR                  ; Other token type
 92 | TokenType = T_IDENT                 ; Identifier = a
 93 | TokenType = T_ASSIGN                ; Other token type
 94 | TokenType = T_STAR                  ; Other token type
 95 | TokenType = T_IDENT                 ; Identifier = b
 96 | TokenType = T_ASSIGN                ; Other token type
 97 | TokenType = T_BITNOT                ; Other token type
 98 | TokenType = T_IDENT                 ; Identifier = c
 99 | TokenType = T_SEMICOLON             ; Other token type
100 | TokenType = T_IDENT                 ; Identifier = printf
101 | TokenType = T_LPAREN                ; Other token type
102 | TokenType = T_STRING_CONST          ; String const = "Hello, world"
103 | TokenType = T_RPAREN                ; Other token type
104 | TokenType = T_SEMICOLON             ; Other token type
105 | TokenType = T_IDENT                 ; Identifier = ret
106 | TokenType = T_COLON                 ; Other token type
107 | TokenType = T_RETURN                ; Other token type
108 | TokenType = T_MINUS                 ; Other token type
109 | TokenType = T_INT_CONST             ; Int const = 1
110 | TokenType = T_STAR                  ; Other token type
111 | TokenType = T_STAR                  ; Other token type
112 | TokenType = T_IDENT                 ; Identifier = p
113 | TokenType = T_SEMICOLON             ; Other token type
114 | TokenType = T_RCPAREN               ; Other token type
115 | TokenType = T_EOF                   ; EOF
116 | 


--------------------------------------------------------------------------------
/src/python/type.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # type.py - This function defines types and the type system
  3 | #
  4 | 
  5 | from symbol_table import Scope, SymbolTable
  6 | 
  7 | #####################################################################
  8 | # class BaseType and its sub-classes
  9 | #####################################################################
 10 | 
 11 | class BaseType:
 12 |     """
 13 |     This class serves as the base class for all types
 14 |     that could be used as a base type
 15 | 
 16 |     Note that typedef names are not considered as a separate
 17 |     type because typedef'ed names may be compatible with
 18 |     other types, so we always expand typedef'ed types
 19 |     """
 20 | 
 21 |     # These are constant values we use to check whether a flag is set or not
 22 |     TYPE_SPEC_NONE = 0x00000000
 23 |     TYPE_SPEC_CONST = 0x00000001
 24 |     TYPE_SPEC_VOLATILE = 0x00000002
 25 |     TYPE_SPEC_STATIC = 0x00000004
 26 |     TYPE_SPEC_REGISTER = 0x00000008
 27 |     TYPE_SPEC_EXTERN = 0x00000010
 28 |     TYPE_SPEC_UNSIGNED = 0x00000020
 29 |     TYPE_SPEC_AUTO = 0x00000040
 30 |     TYPE_SPEC_SIGNED = 0x00000080
 31 | 
 32 |     # This is a dict that maps the token type to spec value
 33 |     TYPE_SPEC_DICT = {
 34 |         # Type qualifier
 35 |         "T_CONST": TYPE_SPEC_CONST,
 36 |         "T_VOLATILE": TYPE_SPEC_VOLATILE,
 37 |         # Storage class specifier
 38 |         "T_STATIC": TYPE_SPEC_STATIC,
 39 |         "T_REGISTER": TYPE_SPEC_REGISTER,
 40 |         "T_EXTERN": TYPE_SPEC_EXTERN,
 41 |         "T_UNSIGNED": TYPE_SPEC_UNSIGNED,
 42 |         "T_AUTO": TYPE_SPEC_AUTO,
 43 |         "T_SIGNED": TYPE_SPEC_SIGNED,
 44 |     }
 45 | 
 46 |     # This defines the length of integers
 47 |     # Note that the length of long is 8 rather than 4
 48 |     TYPE_LENGTH_CHAR = 1
 49 |     TYPE_LENGTH_SHORT = 2
 50 |     TYPE_LENGTH_INT = 4
 51 |     TYPE_LENGTH_LONG = 8
 52 | 
 53 |     def __init__(self):
 54 |         """
 55 |         Initialize the base type attributes which are shared between
 56 |         all base types
 57 |         """
 58 |         self.type_spec = BaseType.TYPE_SPEC_NONE
 59 |         return
 60 | 
 61 |     def add_spec_list(self, spec_list):
 62 |         """
 63 |         Add specs from a spec list
 64 | 
 65 |         :param spec_list: T_DECL_SPEC OR T_SPEC_QUAL_LIST
 66 |         :return: None
 67 |         """
 68 |         assert(spec_list.symbol == "T_DECL_SPEC" or
 69 |                spec_list.symbol == "T_SPEC_QUAL_LIST")
 70 | 
 71 |         # For each node in the spec list, add the specifier
 72 |         for node in spec_list.child_list:
 73 |             self.add_spec(node.symbol)
 74 | 
 75 |         return
 76 | 
 77 |     def add_spec(self, spec_name):
 78 |         """
 79 |         Add a specifier to the base type.
 80 | 
 81 |           (1) If the spec is not defined then we ignore it
 82 |               because there might be other information
 83 |           (2) If the spec has already been defined then an exception
 84 |               is thrown because the input program is wrong
 85 | 
 86 |         :param spec_name: The string object that contains the
 87 |                           specification
 88 |         :return: None
 89 |         """
 90 |         # It must be found, otherwise it is implementation error
 91 |         # Mask could be None because there might be other
 92 |         # information such as the base type
 93 |         mask = BaseType.TYPE_SPEC_DICT.get(spec_name, None)
 94 | 
 95 |         # If already defined then throw error
 96 |         if (self.type_spec & mask) != 0x0:
 97 |             raise TypeError("Duplicated type specifier or qualifier: %s" %
 98 |                             (spec_name, ))
 99 | 
100 |         # Otherwise just OR it to the specifier bit mask
101 |         self.type_spec |= mask
102 | 
103 |         return
104 | 
105 | #####################################################################
106 | # class IntType, VoidType, StructType, UnionType, BitFieldType
107 | #####################################################################
108 | 
109 | class IntType(BaseType):
110 |     """
111 |     This class represents arbitrary precision integer types
112 |     It carries the byte length of the integer
113 |     """
114 |     def __init__(self, byte_length):
115 |         """
116 |         Initialize the byte length of the integer
117 |         """
118 |         BaseType.__init__(self)
119 |         self.byte_length = byte_length
120 |         return
121 | 
122 | class VoidType(BaseType):
123 |     """
124 |     This class represents void type which carries no
125 |     type related information
126 |     """
127 |     def __init__(self):
128 |         """
129 |         Initialize the void type
130 |         """
131 |         BaseType.__init__(self)
132 |         return
133 | 
134 | class StructType(BaseType):
135 |     """
136 |     This class represents the struct type. Note that struct types
137 |     are not compatible with any other types, and therefore the
138 |     name of the struct suffices as the identifier of the underlying
139 |     struct type. Same is true for unions
140 |     """
141 |     def __init__(self, name):
142 |         """
143 |         Initialize the struct name
144 | 
145 |         :param name: The name of the struct
146 |         """
147 |         BaseType.__init__(self)
148 |         self.name = name
149 |         return
150 | 
151 | class UnionType(BaseType):
152 |     """
153 |     This class represents the union type. Union type has the same
154 |     property of struct type, so it only requires a name
155 |     """
156 |     def __init__(self, name):
157 |         """
158 |         Initialize the struct name
159 | 
160 |         :param name: The name of the struct
161 |         """
162 |         BaseType.__init__(self)
163 |         self.name = name
164 |         return
165 | 
166 | class BitFieldType(BaseType):
167 |     """
168 |     Bitfield type that has a bit length. The declaration of this
169 |     type is not included, and should be checked when building
170 |     the bit field type (e.g. whether the bit length exceeds
171 |     the declared base length). Sign bit of the bit field is not
172 |     defined and should not be relied on
173 | 
174 |     Bit field type could not constitute pointers or arrays
175 |     """
176 |     def __init__(self, bit_length):
177 |         """
178 |         Initialize the bitfield with a bit length
179 | 
180 |         :param bit_length: The bit length of the type
181 |         """
182 |         BaseType.__init__(self)
183 |         self.bit_length = bit_length
184 |         return
185 | 
186 | #####################################################################
187 | # The following are derivation operations
188 | #####################################################################
189 | 
190 | class PtrType(BaseType):
191 |     """
192 |     This class represents pointer type. Pointer types need specifiers
193 |     as the type specifier for the current level
194 |     """
195 |     def __init__(self):
196 |         """
197 |         Initialize an empty object denoting the operation
198 |         """
199 |         BaseType.__init__()
200 |         return
201 | 
202 | class ArrayType(BaseType):
203 |     """
204 |     This class represents an array type. An array type has associated
205 |     data as the static size of the array (which requires static
206 |     evaluation of expressions). If the array size is not known
207 |     then we ignore it
208 |     """
209 |     def __init__(self, array_size):
210 |         """
211 |         Initialize the array type using array size
212 | 
213 |         :param array_size: The size of the array; Could be None
214 |                            if size is not known
215 |         """
216 |         BaseType.__init__(self)
217 |         self.array_size = array_size
218 |         return
219 | 
220 | class FuncType(BaseType):
221 |     """
222 |     This class represents a function pointer type. The data it carries
223 |     is function parameter type list, which is a list of types, optionally
224 |     with name bindings if names are specified for functions
225 |     """
226 |     def __init__(self, param_type_list, is_vararg):
227 |         """
228 |         Initialize the function type using a parameter list
229 | 
230 |         :param param_type_list: A list of parameter types
231 |         :param is_vararg: Whether the function is vararg
232 |         """
233 |         BaseType.__init__(self)
234 |         self.param_type_list = param_type_list
235 |         self.is_vararg = is_vararg
236 |         return
237 | 
238 | #####################################################################
239 | # class TypeNode
240 | #####################################################################
241 | 
242 | class TypeNode:
243 |     """
244 |     This class represents base type and type derivation rules
245 |     """
246 | 
247 |     # This maps base type names to their class instance
248 |     # This does not include bit field type
249 |     # These are only types that we could determine
250 |     BASE_TYPE_DICT = {
251 |         "T_INT": IntType(BaseType.TYPE_LENGTH_INT),
252 |         "T_CHAR": IntType(BaseType.TYPE_LENGTH_CHAR),
253 |         "T_LONG": IntType(BaseType.TYPE_LENGTH_LONG),
254 |         "T_SHORT": IntType(BaseType.TYPE_LENGTH_SHORT),
255 |         "T_VOID": VoidType(),
256 |     }
257 | 
258 |     def __init__(self):
259 |         """
260 |         Initialize the type node with a base type
261 |         """
262 |         # Type derivation rule. We store the operation
263 |         # with the highest precedence before operations
264 |         # with lower precedence
265 |         # Note that the last element must be the base type
266 |         self.rule_list = []
267 | 
268 |         # We use this as a lazy way of applying operations on
269 |         # a type. All interpretation of the type should start
270 |         # with elements using this index
271 |         self.index = 0
272 | 
273 |         return
274 | 
275 |     def __len__(self):
276 |         """
277 |         Returns the length of the actual array (i.e. starting at
278 |         index = 0). The array must have length greater than zero
279 | 
280 |         :return: int
281 |         """
282 |         assert(len(self.rule_list) > 0)
283 |         assert(0 <= index < len(self.rule_list))
284 | 
285 |         return len(self.rule_list)
286 | 
287 |     def expand_typedef_name(self, symbol_table, typedef_name):
288 |         """
289 |         This function expands typedef name into the current type
290 |         node to yield a new type
291 | 
292 |         If the typedef'ed name does not exist assertion fails,
293 |         because the parser guarantees that typedef'ed names are
294 |         recognized only if they are defined
295 | 
296 |         :param symbol_table: The symbol table
297 |         :param typedef_name: The type name that is typedef'ed
298 |         :return: None
299 |         """
300 |         t = symbol_table.get((Scope.TYPEDEF, typedef_name), None)
301 |         assert(t is not None)
302 |         assert(isinstance(t, TypeNode))
303 |         # Append the rule list of the typedef'ed name
304 |         # to the current type node. Note that this is only
305 |         # a shallow copy of the array
306 |         self.rule_list += t.rule_list[index:]
307 | 
308 |         return
309 | 
310 |     def add_derivation(self, spec_body_node):
311 |         """
312 |         This function processes a given derivation body
313 |         and adds them to the rule list, from the highest
314 |         precedence to the lowest precedence
315 | 
316 |         :param spec_body_node: T_DECL_BODY or T_ABS_DECL_BODY
317 |         :return: None
318 |         """
319 |         assert(spec_body_node.symbol == "T_DECL_BODY" or
320 |                spec_body_node.symbol == "T_ABS_DECL_BODY")
321 | 
322 |         i = 0
323 |         while i < len(spec_body_node):
324 |             child = spec_body_node[i]
325 |             child_name = child.symbol
326 |             if child_name == "T_PTR":
327 |                 # There might be multiple levels of pointers
328 |                 # We add specifier for each level
329 |                 for ptr in child.child_list:
330 |                     ptr_type = PtrType()
331 |                     # Then it must be a specifier list
332 |                     if ptr.symbol != "T_":
333 |                         ptr_type.add_spec_list(ptr)
334 | 
335 |                     self.rule_list.append(ptr_type)
336 | 
337 |                 # It only takes one slot
338 |                 i += 1
339 |             elif child_name == "T_IDENT":
340 |                 # It also only takes one slot
341 |                 i += 1
342 |             elif child_name == "T_ARRAY_SUB":
343 |                 sub = spec_body_node[i + 1]
344 | 
345 |                 if sub.symbol != "T_":
346 |                     raise NotImplementedError("Static evaluation of array sizes")
347 |                 else:
348 |                     array_type = ArrayType()
349 | 
350 |                 self.rule_list.append(array_type)
351 |                 # It takes two slots
352 |                 i += 2
353 |             elif child_name == "T_FUNC_CALL":
354 |                 sub = spec_body_node[i + 1]
355 |                 if sub.symbol != "T_":
356 |                     if sub.symbol == "T_IDENT_LIST":
357 |                         raise TypeError("Old-style function declaration" +
358 |                                         " no longer supported")
359 |                     raise NotImplementedError("Type for function arguments")
360 |                 else:
361 |                     # Empty list for arguments
362 |                     func_type = FuncType([], [])
363 |             else:
364 |                 # Do not know what is the type
365 |                 assert False
366 | 
367 |         return
368 | 
369 |     @staticmethod
370 |     def report_type_conflict(t1, t2):
371 |         """
372 |         Report a type conflict because we see two different
373 |         types being specified in declaration
374 | 
375 |         This function throws an exception and it never returns
376 | 
377 |         :param t1: The first type
378 |         :param t2: The second type
379 |         :return: None
380 |         """
381 |         raise TypeError("Conflicting types: %s %s" %
382 |                         (t1.symbol, t2.symbol))
383 | 
384 |     def add_base_type_node(self, symbol_table, spec_node):
385 |         """
386 |         Return a base type TypeNode with the syntax node that
387 |         specifies the base type and specifiers
388 | 
389 |         :param spec_node: The T_SPEC_QUAL_LIST or T_DECL_SPEC
390 |         :return: One of the BaseType node
391 |         """
392 |         assert (spec_node.symbol == "T_SPEC_QUAL_LIST" or
393 |                 spec_node.symbol == "T_DECL_SPEC")
394 | 
395 |         # This points to the base type node
396 |         base_type_node = None
397 |         # Whether we have seen short or long
398 |         ignore_int = False
399 | 
400 |         for node in spec_node.child_list:
401 |             name = node.symbol
402 |             if name == "T_INT":
403 |                 # If already seen short or long then
404 |                 # skip this because it is implied
405 |                 if ignore_int is True:
406 |                     continue
407 | 
408 |                 if base_type_node is not None:
409 |                     self.report_type_conflict(node, base_type_node)
410 |                 base_type_node = node
411 |             elif name == "T_CHAR" or \
412 |                  name == "T_VOID" or \
413 |                  name == "T_STRUCT" or \
414 |                  name == "T_UNION" or \
415 |                  name == "T_TYPEDEF_NAME":
416 |                 if base_type_node is not None:
417 |                     self.report_type_conflict(node, base_type_node)
418 |                 base_type_node = node
419 |             elif name == "T_LONG" or name == "T_SHORT":
420 |                 ignore_int = True
421 |                 # Also if we have seen INT just ignore it
422 |                 # and update the base type to short or long
423 |                 if base_type_node is not None and \
424 |                    base_type_node.symbol != "T_INT"
425 |                     self.report_type_conflict(node, base_type_node)
426 |                 base_type_node = node
427 | 
428 |         type_name = base_type_node.symbol
429 |         type_obj = self.BASE_TYPE_DICT.get(type_name, None)
430 |         if type_obj is None:
431 |             if type_name == "T_TYPEDEF_NAME":
432 |                 # Expand the typedef name into the type
433 |                 self.expand_typedef_name(symbol_table,
434 |                                          base_type_node.data)
435 | 
436 | 


--------------------------------------------------------------------------------
/src/stack.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "stack.h"
 3 | #include "error.h"
 4 | 
 5 | stack_t *stack_init() {
 6 |   stack_t *stack = (stack_t *)malloc(sizeof(stack_t));
 7 |   SYSEXPECT(stack != NULL);
 8 |   stack->data = (void **)malloc(sizeof(void *) * STACK_INIT_CAPACITY);
 9 |   SYSEXPECT(stack->data != NULL);
10 |   stack->size = 0;
11 |   stack->capacity = STACK_INIT_CAPACITY;
12 | 
13 |   return stack;
14 | }
15 | 
16 | void stack_free(stack_t *stack) {
17 |   free(stack->data);
18 |   free(stack);
19 |   return;
20 | }
21 | 
22 | void stack_push(stack_t *stack, void *p) {
23 |   if(stack->size == stack->capacity) {
24 |     void **old = stack->data;
25 |     stack->data = malloc(sizeof(void *) * stack->capacity * 2);
26 |     SYSEXPECT(stack->data != NULL);
27 |     memcpy(stack->data, old, sizeof(void *) * stack->capacity);
28 |     stack->capacity *= 2;
29 |     free(old);
30 |   }
31 |   assert(stack->size < stack->capacity);
32 |   stack->data[stack->size++] = p;
33 |   return;
34 | }
35 | 
36 | void *stack_pop(stack_t *stack) {
37 |   assert(stack->size != 0);
38 |   return stack->data[--stack->size];
39 | }
40 | 
41 | void *stack_peek(stack_t *stack) {
42 |   assert(stack->size != 0);
43 |   return stack->data[stack->size - 1];
44 | }
45 | 
46 | // Offset is from the top of the stack towards the bottom
47 | void *stack_peek_at(stack_t *stack, int offset) {
48 |   assert(offset >= 0 && offset < stack->size);
49 |   return stack->data[stack->size - 1 - offset];
50 | }
51 | 
52 | void *stack_at(stack_t *stack, int index) {
53 |   assert(index >= 0 && index < stack->size);
54 |   return stack->data[index];
55 | }
56 | 
57 | void **stack_topaddr(stack_t *stack) {
58 |   return stack->data + stack->size;
59 | }
60 | 
61 | int stack_empty(stack_t *stack) { return stack->size == 0; }
62 | int stack_size(stack_t *stack) { return stack->size; }
63 | 


--------------------------------------------------------------------------------
/src/stack.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _STACK_H
 3 | #define _STACK_H
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <assert.h>
 8 | #include <string.h>
 9 | 
10 | #define STACK_INIT_CAPACITY 128
11 | 
12 | // Implements a general stack which is used in the shift-reduce parsing algo.
13 | typedef struct {
14 |   int size;
15 |   int capacity;
16 |   void **data;
17 | } stack_t;
18 | 
19 | stack_t *stack_init();
20 | void stack_free(stack_t *stack);
21 | void stack_push(stack_t *stack, void *p);
22 | void *stack_pop(stack_t *stack);
23 | void *stack_peek(stack_t *stack);
24 | void *stack_peek_at(stack_t *stack, int offset);
25 | void *stack_at(stack_t *stack, int index);
26 | int stack_empty(stack_t *stack);
27 | int stack_size(stack_t *stack);
28 | void **stack_topaddr(stack_t *stack);
29 | 
30 | #endif


--------------------------------------------------------------------------------
/src/str.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include "str.h"
  6 | #include "error.h"
  7 | 
  8 | str_t *str_init() {
  9 |   str_t *str = (str_t *)malloc(sizeof(str_t));
 10 |   SYSEXPECT(str != NULL);
 11 |   str->s = (char *)malloc(STR_INIT_SIZE + 1);
 12 |   SYSEXPECT(str->s != NULL);
 13 |   str->capacity = STR_INIT_SIZE;
 14 |   str->size = 0;
 15 |   str->s[0] = '\0';
 16 |   return str;
 17 | }
 18 | void str_free(str_t *str) { free(str->s); free(str); }
 19 | void str_clear(str_t *str) { str->s[0] = '\0'; str->size = 0; }
 20 | int str_size(str_t *str) { return str->size; }
 21 | 
 22 | // Realloc the buffer to hold at least size + 1 bytes
 23 | void str_extend(str_t *str, int size) {
 24 |   if(size > str->capacity) {
 25 |     str->s = realloc(str->s, size + 1);
 26 |     SYSEXPECT(str->s != NULL);
 27 |     str->capacity = size;
 28 |   }
 29 | }
 30 | 
 31 | void str_append(str_t *str, char ch) {
 32 |   if(str->size == str->capacity) str_extend(str, str->capacity * 2);
 33 |   assert(str->size < str->capacity);
 34 |   str->s[str->size++] = ch;
 35 |   str->s[str->size] = '\0';
 36 | }
 37 | 
 38 | void str_prepend(str_t *str, char ch) {
 39 |   if(str->size == str->capacity) str_extend(str, str->capacity * 2);
 40 |   assert(str->size < str->capacity);
 41 |   memmove(str->s + 1, str->s, str->size + 1); // Including the trailing zero
 42 |   str->s[0] = ch;
 43 |   str->size++;
 44 | }
 45 | 
 46 | void str_prepend_str(str_t *str, const char *s) {
 47 |   int copylen = strlen(s);
 48 |   if(str->size + copylen >= str->capacity) str_extend(str, str->size + copylen);
 49 |   assert(str->size + copylen <= str->capacity);
 50 |   memmove(str->s + copylen, str->s, str->size + 1); // Including the trailing zero
 51 |   memcpy(str->s, s, copylen); // Do not include the trailing zero
 52 |   str->size += copylen;
 53 | }
 54 | 
 55 | void str_concat(str_t *str, const char *s) {
 56 |   int copylen = strlen(s);
 57 |   if(str->size + copylen >= str->capacity) str_extend(str, str->size + copylen);
 58 |   assert(str->size + copylen <= str->capacity);
 59 |   memmove(str->s + str->size, s, copylen + 1); // Includes the '\0'
 60 |   str->size += copylen;
 61 | }
 62 | 
 63 | void str_print_int(str_t *str, int d) {
 64 |   char temp[MAX_INT_DIGITS];
 65 |   sprintf(temp, "%d", d);
 66 |   str_concat(str, temp);
 67 | }
 68 | 
 69 | char *str_copy(const str_t *str) { // Returns a string allocated from heap. The str is not changed
 70 |   char *s = (char *)malloc(str->size + 1);
 71 |   SYSEXPECT(s != NULL);
 72 |   memcpy(s, str->s, str->size + 1);
 73 |   return s;
 74 | }
 75 | 
 76 | vector_t *vector_init() {
 77 |   vector_t *vector = (vector_t *)malloc(sizeof(vector_t));
 78 |   SYSEXPECT(vector != NULL);
 79 |   vector->data = (void **)malloc(VECTOR_INIT_SIZE * sizeof(void *));
 80 |   SYSEXPECT(vector->data != NULL);
 81 |   vector->size = 0;
 82 |   vector->capacity = VECTOR_INIT_SIZE;
 83 |   return vector;
 84 | }
 85 | void vector_free(vector_t *vector) { free(vector->data); free(vector); }
 86 | int vector_size(vector_t *vector) { return vector->size; }
 87 | 
 88 | void vector_extend(vector_t *vector, int size) {
 89 |   if(size > vector->capacity) {
 90 |     vector->capacity = size;
 91 |     vector->data = realloc(vector->data, size * sizeof(void *));
 92 |     SYSEXPECT(vector->data != NULL);
 93 |   }
 94 |   return;
 95 | }
 96 | 
 97 | void vector_append(vector_t *vector, void *value) {
 98 |   if(vector->size == vector->capacity) vector_extend(vector, vector->size * 2);
 99 |   assert(vector->size < vector->capacity);
100 |   vector->data[vector->size++] = value;
101 |   return;
102 | }
103 | 
104 | void *vector_at(vector_t *vector, int index) {
105 |   assert(index < vector->size && index >= 0);
106 |   return vector->data[index];
107 | }
108 | 
109 | void **vector_addrat(vector_t *vector, int index) {
110 |   assert(index < vector->capacity && index >= 0);  // Since we only take address, use capacity here
111 |   return vector->data + index;
112 | }


--------------------------------------------------------------------------------
/src/str.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _STR_H
 3 | #define _STR_H
 4 | 
 5 | #define STR_INIT_SIZE 32     // Excluding the terminating 0
 6 | #define VECTOR_INIT_SIZE 32
 7 | #define MAX_INT_DIGITS 64    // Can't be that long...
 8 | 
 9 | typedef struct {
10 |   int size;
11 |   int capacity;          // Both excluding the terminating 0
12 |   char *s;
13 | } str_t;
14 | 
15 | str_t *str_init();
16 | void str_free(str_t *str);
17 | void str_clear(str_t *str); // This does not free memory
18 | int str_size(str_t *str);
19 | void str_extend(str_t *str, int size);
20 | void str_append(str_t *str, char ch);
21 | void str_prepend(str_t *str, char ch);
22 | void str_concat(str_t *str, const char *s);
23 | void str_prepend_str(str_t *str, const char *s);
24 | void str_print_int(str_t *str, int d);  // Append an integer at the end of the str
25 | char *str_copy(const str_t *str);
26 | static inline char *str_cstr(str_t *s) { return s->s; }
27 | 
28 | typedef struct {
29 |   int size, capacity;
30 |   void **data;
31 | } vector_t;
32 | 
33 | vector_t *vector_init();
34 | void vector_free(vector_t *vector);
35 | int vector_size(vector_t *vector);
36 | void vector_extend(vector_t *vector, int size);
37 | void vector_append(vector_t *vector, void *value);
38 | void *vector_at(vector_t *vector, int index);
39 | void **vector_addrat(vector_t *vector, int index);
40 | 
41 | #endif


--------------------------------------------------------------------------------
/src/tests/test_cgen.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include "stack.h"
  5 | #include "token.h"
  6 | #include "error.h"
  7 | #include "ast.h"
  8 | #include "parse.h"
  9 | #include "hashtable.h"
 10 | #include "bintree.h"
 11 | #include "list.h"
 12 | #include "str.h"
 13 | #include "type.h"
 14 | #include "eval.h"
 15 | #include "cgen.h"
 16 | 
 17 | typedef struct {
 18 |   cgen_cxt_t *cgen_cxt;       // This contains type cxt
 19 |   type_cxt_t *type_cxt;
 20 |   parse_exp_cxt_t *parse_cxt; // This contains token cxt
 21 |   token_cxt_t *token_cxt;
 22 | } test_cxt_t;
 23 | 
 24 | test_cxt_t *test_init(char *s) {
 25 |   test_cxt_t *cxt = (test_cxt_t *)malloc(sizeof(test_cxt_t));
 26 |   SYSEXPECT(cxt != NULL);
 27 |   memset(cxt, 0x00, sizeof(test_cxt_t));
 28 |   cxt->cgen_cxt = cgen_init();
 29 |   cxt->type_cxt = cxt->cgen_cxt->type_cxt;
 30 |   cxt->parse_cxt = parse_exp_init(s);
 31 |   cxt->token_cxt = cxt->parse_cxt->token_cxt;
 32 |   return cxt;
 33 | }
 34 | 
 35 | void test_free(test_cxt_t *cxt) {
 36 |   cgen_free(cxt->cgen_cxt);
 37 |   parse_exp_free(cxt->parse_cxt);
 38 |   free(cxt);
 39 |   return;
 40 | }
 41 | 
 42 | void test_cgen_global_decl() {
 43 |   printf("=== Test cgen_global_decl ===\n");
 44 | 
 45 |   test_cxt_t *cxt;
 46 |   token_t *token;
 47 | 
 48 |   // Test basis import export
 49 |   cxt = test_init("extern const int array[120 + 20]; int array2[] = {1, 2, 3, 4, 5}; ");
 50 |   token = parse(cxt->parse_cxt);
 51 |   cgen(cxt->cgen_cxt, token);
 52 |   ast_print(token);
 53 |   cgen_print_cxt(cxt->cgen_cxt);
 54 |   ast_free(token);
 55 |   test_free(cxt);
 56 |   printf("=====================================\n");
 57 |   // Test array size + def after decl
 58 |   cxt = test_init("extern int array[2 + 3]; int array[] = {1, 2, 3, 4, }; ");
 59 |   token = parse(cxt->parse_cxt);
 60 |   cgen(cxt->cgen_cxt, token);
 61 |   ast_print(token);
 62 |   cgen_print_cxt(cxt->cgen_cxt);
 63 |   ast_free(token);
 64 |   test_free(cxt);
 65 |   printf("=====================================\n");
 66 |   // Test decl after decl + decl after def
 67 |   cxt = test_init("extern int array[2 + 3]; extern int array[]; \n"
 68 |   "extern int array2[]; extern int array2[]; \n"
 69 |   "extern int array3[]; extern int array3[3 + 4]; extern int array3[]; \n"
 70 |   "int array4[4 << 1]; extern int array4[8]; \n"
 71 |   "int array5[] = {1, 2, 3}; extern int array5[]; \n"
 72 |   "char array6[] = \"abcdefg\\n\"; extern char array6[]; \n" // size should be 9
 73 |   "extern const char array7[]; const char array7[10] = \"12345\"; \n"); // size should be 10
 74 |   token = parse(cxt->parse_cxt);
 75 |   cgen(cxt->cgen_cxt, token);
 76 |   ast_print(token);
 77 |   cgen_print_cxt(cxt->cgen_cxt);
 78 |   ast_free(token);
 79 |   test_free(cxt);
 80 |   printf("=====================================\n");
 81 | 
 82 |   printf("Pass!\n");
 83 |   return;
 84 | }
 85 | 
 86 | void test_cgen_init() {
 87 |   printf("=== Test cgen_init_ series ===\n");
 88 | 
 89 |   test_cxt_t *cxt;
 90 |   token_t *token;
 91 | 
 92 |   // Test array with const char *
 93 |   cxt = test_init("int x = 1, y = 2, z = 100; const char *a = \"123456\"; ");
 94 |   token = parse(cxt->parse_cxt);
 95 |   cgen(cxt->cgen_cxt, token);
 96 |   ast_print(token);
 97 |   cgen_print_cxt(cxt->cgen_cxt);
 98 |   ast_free(token);
 99 |   test_free(cxt);
100 |   printf("=====================================\n");
101 |   // Test array initialization
102 |   cxt = test_init(
103 |     "int x[] = {2, 4, 6, 8, 10}; \n "
104 |     "const char y[] = \"asdfghjkl\\n\"; \n"
105 |     "char z[20] = \"\"; \n");
106 |   token = parse(cxt->parse_cxt);
107 |   cgen(cxt->cgen_cxt, token);
108 |   ast_print(token);
109 |   cgen_print_cxt(cxt->cgen_cxt);
110 |   ast_free(token);
111 |   test_free(cxt);
112 |   printf("=====================================\n");
113 |   // Test struct
114 |   cxt = test_init(
115 |     "struct named_struct { int a; long b; char c[10]; } var1; \n "
116 |     "struct named_struct var2 = {100, 200L, \"qwert\"}; \n" 
117 |     );
118 |   token = parse(cxt->parse_cxt);
119 |   cgen(cxt->cgen_cxt, token);
120 |   ast_print(token);
121 |   cgen_print_cxt(cxt->cgen_cxt);
122 |   ast_free(token);
123 |   test_free(cxt);
124 |   printf("=====================================\n");
125 | 
126 |   printf("Pass!\n");
127 | }
128 | 
129 | 
130 | int main() {
131 |   printf("Hello World!\n");
132 |   test_cgen_global_decl();
133 |   test_cgen_init();
134 |   return 0;
135 | }


--------------------------------------------------------------------------------
/src/tests/test_eval.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include "stack.h"
  5 | #include "token.h"
  6 | #include "error.h"
  7 | #include "ast.h"
  8 | #include "parse.h"
  9 | #include "hashtable.h"
 10 | #include "bintree.h"
 11 | #include "list.h"
 12 | #include "str.h"
 13 | #include "type.h"
 14 | #include "eval.h"
 15 | 
 16 | void test_const_eval_int() {
 17 |   printf("=== Test eval_const_get_int_value ===\n");
 18 |   parse_exp_cxt_t *parse_cxt;
 19 |   type_cxt_t *type_cxt;
 20 |   token_t *token;
 21 |   type_t *type;
 22 |   value_t *value;
 23 |   
 24 |   type_cxt = type_sys_init();
 25 |   parse_cxt = parse_exp_init("123456");
 26 |   token = token_get_next(parse_cxt->token_cxt);
 27 |   assert(token);
 28 |   value = eval_const_get_int_value(type_cxt, token);
 29 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 30 |   parse_exp_free(parse_cxt);
 31 |   type_sys_free(type_cxt);
 32 |   printf("=====================================\n");
 33 |   type_cxt = type_sys_init();
 34 |   parse_cxt = parse_exp_init("0x80000000"); // This will be signed overflow
 35 |   token = token_get_next(parse_cxt->token_cxt);
 36 |   assert(token);
 37 |   value = eval_const_get_int_value(type_cxt, token);
 38 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 39 |   parse_exp_free(parse_cxt);
 40 |   type_sys_free(type_cxt);
 41 |   printf("=====================================\n");
 42 |   type_cxt = type_sys_init();
 43 |   parse_cxt = parse_exp_init("0x80000000U"); // This will be unsigned, no overflow
 44 |   token = token_get_next(parse_cxt->token_cxt);
 45 |   assert(token);
 46 |   value = eval_const_get_int_value(type_cxt, token);
 47 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 48 |   parse_exp_free(parse_cxt);
 49 |   type_sys_free(type_cxt);
 50 |   printf("=====================================\n");
 51 |   type_cxt = type_sys_init();
 52 |   parse_cxt = parse_exp_init("0xFFFFFFFFU"); // This will be unsigned, no overflow
 53 |   token = token_get_next(parse_cxt->token_cxt);
 54 |   assert(token);
 55 |   value = eval_const_get_int_value(type_cxt, token);
 56 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 57 |   parse_exp_free(parse_cxt);
 58 |   type_sys_free(type_cxt);
 59 |   printf("=====================================\n");
 60 |   type_cxt = type_sys_init();
 61 |   parse_cxt = parse_exp_init("0x1FFFFFFFFU"); // This will be unsigned, overflow
 62 |   token = token_get_next(parse_cxt->token_cxt);
 63 |   assert(token);
 64 |   value = eval_const_get_int_value(type_cxt, token);
 65 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 66 |   parse_exp_free(parse_cxt);
 67 |   type_sys_free(type_cxt);
 68 |   printf("=====================================\n");
 69 |   type_cxt = type_sys_init();
 70 |   parse_cxt = parse_exp_init("'\xfe'"); // Although it overflows for char type, it is evaluated by another function, no warning
 71 |   token = token_get_next(parse_cxt->token_cxt);
 72 |   assert(token);
 73 |   value = eval_const_get_int_value(type_cxt, token);
 74 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 75 |   parse_exp_free(parse_cxt);
 76 |   type_sys_free(type_cxt);
 77 |   printf("=====================================\n");
 78 | 
 79 |   printf("Pass!\n");
 80 |   return;
 81 | }
 82 | 
 83 | void test_eval_const_exp() {
 84 |   printf("=== Test eval_const_exp ===\n");
 85 |   parse_exp_cxt_t *parse_cxt;
 86 |   type_cxt_t *type_cxt;
 87 |   token_t *token;
 88 |   type_t *type;
 89 |   value_t *value;
 90 | 
 91 |   type_cxt = type_sys_init();
 92 |   parse_cxt = parse_exp_init("(1000 + 2 * 3) << 4"); 
 93 |   token = parse_exp(parse_cxt, PARSE_EXP_ALLOWALL);
 94 |   ast_print_(token, 0);
 95 |   value = eval_const_exp(type_cxt, token);
 96 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
 97 |   assert(value->int32 == 16096);
 98 |   parse_exp_free(parse_cxt);
 99 |   type_sys_free(type_cxt);
100 |   printf("=====================================\n");
101 |   type_cxt = type_sys_init();
102 |   parse_cxt = parse_exp_init("((char)1000 + 2ul * 3) << 4"); 
103 |   token = parse_exp(parse_cxt, PARSE_EXP_ALLOWALL);
104 |   ast_print_(token, 0);
105 |   value = eval_const_exp(type_cxt, token);
106 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
107 |   assert(value->int64 == -288); // Because of the sign extension of char type
108 |   parse_exp_free(parse_cxt);
109 |   type_sys_free(type_cxt);
110 |   printf("=====================================\n");
111 |   type_cxt = type_sys_init();
112 |   parse_cxt = parse_exp_init("(signed long)((long)(unsigned long *)(long)(unsigned long *)(long)100 + 2)"); 
113 |   token = parse_exp(parse_cxt, PARSE_EXP_ALLOWALL);
114 |   ast_print_(token, 0);
115 |   value = eval_const_exp(type_cxt, token);
116 |   printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64);
117 |   assert(value->int64 == 102); // Because of the sign extension of char type
118 |   parse_exp_free(parse_cxt);
119 |   type_sys_free(type_cxt);
120 |   printf("=====================================\n");
121 | 
122 |   printf("Pass!\n");
123 |   return;
124 | }
125 | 
126 | int main() {
127 |   test_const_eval_int();
128 |   test_eval_const_exp();
129 |   return 0;
130 | }


--------------------------------------------------------------------------------
/src/tests/test_lex.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include "stack.h"
  5 | #include "token.h"
  6 | #include "error.h"
  7 | #include "ast.h"
  8 | #include "parse.h"
  9 | #include "hashtable.h"
 10 | 
 11 | void test_get_op() {
 12 |   printf("=== Test token_get_op() ===\n");
 13 |   char *p;
 14 |   char test1[] = "-----====-=-=++>>=>>>.+..+...+....+......";
 15 |   char result[256];
 16 |   token_t token;
 17 |   p = test1;
 18 |   result[0] = '\0';
 19 |   token_cxt_t *token_cxt = token_cxt_init(test1);
 20 |   while(p != NULL) {
 21 |     p = token_get_op(p, &token);
 22 |     if(p == NULL) break;
 23 |     else if(token.type != T_ILLEGAL) {
 24 |       printf("%s(%s) ", token_typestr(token.type), token_symstr(token.type));
 25 |       strcat(result, token_symstr(token.type));
 26 |     } else {
 27 |       p = token_get_ident(token_cxt, p, &token);
 28 |       if(p == NULL) break;
 29 |       else if(token.type != T_ILLEGAL) {
 30 |         printf("%s(%s) ", token_typestr(token.type), token.str);
 31 |         strcat(result, token.str);
 32 |         free(token.str);
 33 |       } else {
 34 |         assert(0);
 35 |       }
 36 |     }
 37 |   }
 38 |   putchar('\n');
 39 |   assert(strcmp(result, test1) == 0);
 40 |   token_cxt_free(token_cxt);
 41 | 
 42 |   printf("Pass!\n");
 43 |   return;
 44 | }
 45 | 
 46 | void test_bin_search() {
 47 |   printf("=== Test token_get_keyword_type() ===\n");
 48 |   token_type_t type;
 49 |   for(int i = 0;i < (int)sizeof(keywords) / (int)sizeof(const char *);i++) {
 50 |     type = token_get_keyword_type(keywords[i]);
 51 |     if(type == T_ILLEGAL) {
 52 |       printf("ILLEGAL %s\n", keywords[i]);
 53 |       assert(0);
 54 |     } else {
 55 |       printf("%s(%s) ", token_typestr(type), token_symstr(type));
 56 |       assert(strcmp(token_symstr(type), keywords[i]) == 0);
 57 |     }
 58 |   }
 59 | 
 60 |   type = token_get_keyword_type("aaaa");
 61 |   assert(type == T_ILLEGAL);
 62 |   type = token_get_keyword_type("zzzzzzz");
 63 |   assert(type == T_ILLEGAL);
 64 |   type = token_get_keyword_type("wangziqi");
 65 |   assert(type == T_ILLEGAL);
 66 |   type = token_get_keyword_type("jklasd");
 67 |   assert(type == T_ILLEGAL);
 68 | 
 69 |   putchar('\n');
 70 |   printf("Pass!\n");
 71 |   return;
 72 | }
 73 | 
 74 | void test_token_get_next() {
 75 |   printf("=== Test test_token_get_next() ===\n");
 76 |   char test[] = \
 77 |     "// Hello World \n \
 78 |      void main() {  \n \
 79 |         /* This is a block comment   \n \
 80 |            That cross multiple lines \n \
 81 |          */                          \n \
 82 |      }                               \n \
 83 |      \n";
 84 |   error_init(test);
 85 |   token_cxt_t *token_cxt = token_cxt_init(test);
 86 |   token_t *token;
 87 |   while((token = token_get_next(token_cxt)) != NULL) {
 88 |     const char *sym = token_symstr(token->type);
 89 |     if(sym == NULL) printf("%s ", token->str);
 90 |     else printf("%s ", sym);
 91 |     token_free(token);
 92 |   }
 93 |   putchar('\n');
 94 | 
 95 |   char test2[] = " \n \
 96 |     // Returns the next token, or illegal                           \n \
 97 |     // Same rule for return value and conditions as token_get_op()  \n \
 98 |     char *token_get_next(char *s, token_t *token) {                 \n \
 99 |       while(1) {                                                    \n \
100 |         if(s == NULL || *s == '\\0') return NULL;                    \n \
101 |         else if(isspace(*s)) while(isspace(*s)) s++;                \n \
102 |         else if(s[0] == '/' && s[1] == '/') while(*s != '\\n' && *s != '\\0') s++; \n \
103 |         else if(s[0] == '/' && s[1] == '*') {                         \n \
104 |           while((s[0] != '\\0') && (s[0] != '*' || s[1] != '/')) s++;  \n \
105 |           s += 2;                                                     \n \
106 |         }                                                             \n \
107 |         else if(isalpha(*s) || *s == '_') return token_get_ident(s, token); \n \
108 |         else return token_get_op(s, token);                                 \n \
109 |       }                                                                     \n \
110 |                                                                             \n \
111 |       assert(0);    \n \
112 |       return NULL;  \n \
113 |     }               \n \
114 |   \" asda dasdasd\\n \" ";
115 |   error_init(test2);
116 |   while((token = token_get_next(token_cxt)) != NULL) {
117 |     const char *sym = token_symstr(token->type);
118 |     int row, col;
119 |     error_get_row_col(token->offset, &row, &col);
120 |     if(sym == NULL) printf("%s ", token->str);
121 |     else printf("%s(%d %d) ", sym, row, col);
122 |     token_free(token);
123 |   }
124 |   putchar('\n');
125 |   token_cxt_free(token_cxt);
126 | 
127 |   printf("Pass!\n");
128 |   return;
129 | }
130 | 
131 | void test_int_size() {
132 |   printf("=== Test Integer Size ===\n");
133 |   char test[] = "12 23l 34ll 45llu 56lu 67u 78ul 89ull 0x123LU 056ULL";
134 |   token_cxt_t *cxt = token_cxt_init(test);
135 |   token_t *token;
136 |   while((token = token_get_next(cxt)) != NULL) {
137 |     printf("%s %s\n", token->str, token_decl_print(token->decl_prop));
138 |   }
139 |   token_cxt_free(cxt);
140 |   printf("Pass!\n");
141 |   return;
142 | }
143 | 
144 | int main() {
145 |   printf("=== Hello World! ===\n");
146 |   test_get_op();
147 |   test_bin_search();
148 |   test_token_get_next();
149 |   test_int_size();
150 |   return 0;
151 | }
152 |   


--------------------------------------------------------------------------------
/src/todo.txt:
--------------------------------------------------------------------------------
1 | 
2 | - Parse positive and negative numbers as part of the integers
3 | 
4 | - Parse floating pointing numbers
5 | 
6 | - Processing macros
7 |   - Macro expansion at identifiers


--------------------------------------------------------------------------------
/src/token.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _TOKEN_H
  3 | #define _TOKEN_H
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdint.h>
  7 | #include <assert.h>
  8 | #include <ctype.h>
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | #include "error.h"
 12 | #include "stack.h"
 13 | #include "hashtable.h"
 14 | 
 15 | #define TOKEN_MAX_KWD_SIZE 31 // Keywords cannot be 32 chars long (enough for C keywords)
 16 | 
 17 | // Types of raw tokens. 
 18 | // This enum type does not distinguish between different expression operators, i.e. both
 19 | // unary "plus" and binary "add" is T_PLUS. Extra information such as operator property 
 20 | // is derived
 21 | typedef enum {
 22 |   // Expression token types
 23 |   T_OP_BEGIN = 0,
 24 |   T_LPAREN = 0, T_RPAREN, T_LSPAREN, T_RSPAREN,       // ( ) [ ]
 25 |   T_DOT, T_ARROW,                                     // . ->
 26 |   T_INC, T_DEC, T_PLUS, T_MINUS,                      // ++ -- + -
 27 |   T_LOGICAL_NOT = 10, T_BIT_NOT,                      // ! ~
 28 |   T_STAR, T_AND,                                      // * &
 29 |   T_DIV, T_MOD,                                       // / %
 30 |   T_LSHIFT, T_RSHIFT,                                 // << >>
 31 | 
 32 |   T_LESS, T_GREATER, T_LEQ = 20, T_GEQ, T_EQ, T_NEQ,  // < > <= >= == !=
 33 |   T_BIT_XOR, T_BIT_OR,                                // ^ |
 34 |   T_LOGICAL_AND, T_LOGICAL_OR,                        // && ||
 35 |   T_QMARK, T_COLON,                                   // ? :
 36 |   T_ASSIGN = 30,                                      // =
 37 |   T_PLUS_ASSIGN, T_MINUS_ASSIGN, T_MUL_ASSIGN,        // = += -= *=
 38 |   T_DIV_ASSIGN, T_MOD_ASSIGN,                         // /= %=
 39 |   T_LSHIFT_ASSIGN, T_RSHIFT_ASSIGN,                   // <<= >>=
 40 |   T_AND_ASSIGN, T_OR_ASSIGN, T_XOR_ASSIGN = 40,       // &= |= ^=
 41 |   T_COMMA,                                            // ,
 42 |   T_OP_END,
 43 | 
 44 |   T_LCPAREN,            // {
 45 |   T_RCPAREN,            // }
 46 |   T_SEMICOLON,          // ;
 47 |   T_ELLIPSIS,           // ...
 48 |   
 49 |   // Literal types (i.e. primary expressions)
 50 |   T_LITERALS_BEGIN = 200,
 51 |   T_DEC_INT_CONST = 200, T_HEX_INT_CONST, T_OCT_INT_CONST,
 52 |   T_CHAR_CONST, T_STR_CONST,
 53 |   T_FLOAT_CONST,
 54 |   T_IDENT,
 55 |   T_UDEF, // User-defined type using type-def; they are not literals
 56 |   T_LITERALS_END,
 57 | 
 58 |   // Add this to the index of keywords in the table
 59 |   T_KEYWORDS_BEGIN = 1000,
 60 |   T_AUTO = 1000, T_BREAK, T_CASE, T_CHAR, T_CONST, T_CONTINUE, T_DEFAULT, T_DO,
 61 |   T_DOUBLE, T_ELSE, T_ENUM, T_EXTERN, T_FLOAT, T_FOR, T_GOTO, T_IF,
 62 |   T_INT, T_LONG, T_REGISTER, T_RETURN, T_SHORT, T_SIGNED, T_SIZEOF, T_STATIC,
 63 |   T_STRUCT, T_SWITCH, T_TYPEDEF, T_UNION, T_UNSIGNED, T_VOID, T_VOLATILE, T_WHILE,
 64 |   T_KEYWORDS_END,
 65 | 
 66 |   // AST type used within an expression (51 elements)
 67 |   // Note that some are only used internally and will never occur in the AST,
 68 |   // specifically they are EXP_LPAREN, EXP_RPAREN, EXP_LSPAREN
 69 |   EXP_BEGIN = 2000,
 70 |   EXP_FUNC_CALL = 2000, EXP_ARRAY_SUB,      // func() array[]
 71 |   EXP_LPAREN, EXP_RPAREN,                   // ( and ) as parenthesis
 72 |   EXP_RSPAREN,                              // ]
 73 |   EXP_DOT, EXP_ARROW,                       // obj.field ptr->field
 74 |   EXP_POST_INC, EXP_PRE_INC,                // x++ x++
 75 |   EXP_POST_DEC, EXP_PRE_DEC,                // x-- --x
 76 |   EXP_PLUS, EXP_MINUS,                      // +x -x
 77 |   EXP_LOGICAL_NOT, EXP_BIT_NOT,             // !exp ~exp
 78 |   EXP_CAST,                                 // (type)
 79 |   EXP_DEREF, EXP_ADDR,                      // *ptr &x
 80 |   EXP_SIZEOF,                               // sizeof(type/name)
 81 |   EXP_MUL, EXP_DIV, EXP_MOD,                // binary * / %
 82 |   EXP_ADD, EXP_SUB,                         // binary + -
 83 |   EXP_LSHIFT, EXP_RSHIFT,                   // << >>
 84 |   EXP_LESS, EXP_GREATER, EXP_LEQ, EXP_GEQ,  // < > <= >=
 85 |   EXP_EQ, EXP_NEQ,                          // == !=
 86 |   EXP_BIT_AND, EXP_BIT_OR, EXP_BIT_XOR,     // binary & | ^
 87 |   EXP_LOGICAL_AND, EXP_LOGICAL_OR,          // && ||
 88 |   EXP_COND, EXP_COLON,                      // ? :
 89 |   EXP_ASSIGN_BEGIN,                         // We use these two to check whether exp has an assign
 90 |   EXP_ASSIGN = EXP_ASSIGN_BEGIN,            // =
 91 |   EXP_ADD_ASSIGN, EXP_SUB_ASSIGN,           // += -=
 92 |   EXP_MUL_ASSIGN, EXP_DIV_ASSIGN, EXP_MOD_ASSIGN, // *= /= %=
 93 |   EXP_AND_ASSIGN, EXP_OR_ASSIGN, EXP_XOR_ASSIGN,  // &= |= ^=
 94 |   EXP_LSHIFT_ASSIGN, EXP_RSHIFT_ASSIGN,     // <<= >>=
 95 |   EXP_ASSIGN_END = EXP_RSHIFT_ASSIGN,       // There must be no gap
 96 |   EXP_COMMA,                                // ,
 97 |   EXP_END,
 98 |   // Internal nodes
 99 |   
100 |   T_DECL, T_BASETYPE,             // Root node of a declaration
101 |   T_,                             // Placeholder
102 |   T_COMP_DECL,                    // structure or union declaration line, can contain one base and multiple declarator
103 |   T_COMP_FIELD,                   // Single field declaration; Contains a DECL and optional number for bitfield
104 |   T_ENUM_FIELD,                    // Enum declaration field (single line)
105 |   T_LBL_STMT,
106 |   T_EXP_STMT,
107 |   T_COMP_STMT,
108 |   T_INIT_LIST,
109 |   T_STMT_LIST,                    // Contains a list of statements
110 |   T_DECL_STMT_LIST,               // Contains a list of entries
111 |   T_DECL_STMT_ENTRY,              // Contains a base type and a list of vars
112 |   T_DECL_STMT_VAR,                // Contains a decl and optional initializer expression/list
113 |   T_ROOT,
114 |   T_GLOBAL_FUNC,                  // Global function definition
115 |   T_GLOBAL_DECL_ENTRY,            // Global declaration (same layout as T_DECL_STMT_ENTRY)
116 |   T_GLOBAL_DECL_VAR,              // Single entry that contains name and initializer
117 |   T_BITFIELD,                     // Bit field in struct/union; Contains an expression
118 |   T_INIT,                         // Single value init, only has one child
119 | 
120 |   T_ILLEGAL = 10000,    // Mark a return value
121 | } token_type_t;
122 | 
123 | // Declaration properties, see below
124 | typedef uint32_t decl_prop_t;
125 | 
126 | typedef struct token_t {
127 |   token_type_t type;         // This will be written during parsing to AST type
128 |   char *str;                 // Only valid for literals and identifiers; Owned by the token object
129 |   struct token_t *child;
130 |   union {
131 |     struct token_t *sibling; // If token is in AST then use child-sibling representation
132 |     struct token_t *next;    // If token is in pushbacks queue then form a circular queue
133 |   };
134 |   struct token_t *parent;    // Empty for root node
135 |   char *offset;              // The offset in source file, for error reporting purposes; AST node may also have this field
136 |   decl_prop_t decl_prop;     // Property if the kwd is part of declaration; Set when a kwd is found
137 | } token_t;
138 | 
139 | #define DECL_NULL          0x00000000
140 | #define DECL_INVALID       0xFFFFFFFF // Naturally incompatible with all
141 | // Type specifier bit mask (bit 4, 5, 6, 7), at the token level
142 | #define DECL_TYPE_MASK     0x000000F0
143 | #define DECL_CHAR     0x00000010
144 | #define DECL_SHORT    0x00000020
145 | #define DECL_INT      0x00000030
146 | #define DECL_LONG     0x00000040
147 | #define DECL_ENUM     0x00000050
148 | #define DECL_STRUCT   0x00000060
149 | #define DECL_UNION    0x00000070
150 | #define DECL_UDEF     0x00000080 // User defined using typedef
151 | #define DECL_FLOAT    0x00000090
152 | #define DECL_DOUBLE   0x000000A0
153 | #define DECL_VOID     0x000000B0
154 | #define DECL_UNSIGNED 0x000000C0
155 | #define DECL_SIGNED   0x000000D0
156 | // Storage class bit mask (bit 8, 9, 10, 11); Incompatible with each other
157 | #define DECL_STGCLS_MASK      0x00000F00
158 | #define DECL_TYPEDEF   0x00000100 // Define a new type using typedef storage class
159 | #define DECL_EXTERN    0x00000200
160 | #define DECL_AUTO      0x00000300
161 | #define DECL_REGISTER  0x00000400
162 | #define DECL_STATIC    0x00000500
163 | // Macro for accessing storage class
164 | #define DECL_STGCLS_GET(decl_prop) ((decl_prop) & DECL_STGCLS_MASK)
165 | #define DECL_ISTYPEDEF(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_TYPEDEF)
166 | #define DECL_ISEXTERN(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_EXTERN)
167 | #define DECL_ISAUTO(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_AUTO)
168 | #define DECL_ISREGISTER(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_REGISTER)
169 | #define DECL_ISSTATIC(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_STATIC)
170 | 
171 | // Type qualifier bit mask (bit 12, 13); Note that these two are compatible (so they are mask)
172 | #define DECL_QUAL_MASK     0x00003000
173 | #define DECL_VOLATILE_MASK 0x00001000
174 | #define DECL_CONST_MASK    0x00002000
175 | // All together, if any of these bits are present, then it is a declaration keyword
176 | #define DECL_MASK (DECL_TYPE_MASK | DECL_STGCLS_MASK | DECL_QUAL_MASK)
177 | // The following defines complete set of supported types (bit 16 - 23), at AST level
178 | #define BASETYPE_MASK       0x00FF0000
179 | #define BASETYPE_NONE       0x00000000
180 | #define BASETYPE_CHAR       0X00010000
181 | #define BASETYPE_SHORT      0X00020000
182 | #define BASETYPE_INT        0X00030000
183 | #define BASETYPE_LONG       0X00040000
184 | #define BASETYPE_UCHAR      0X00050000
185 | #define BASETYPE_USHORT     0X00060000
186 | #define BASETYPE_UINT       0X00070000
187 | #define BASETYPE_ULONG      0X00080000
188 | #define BASETYPE_LLONG      0x00090000
189 | #define BASETYPE_ULLONG     0x000A0000
190 | #define BASETYPE_FLOAT      0x000B0000
191 | #define BASETYPE_DOUBLE     0x000C0000
192 | #define BASETYPE_LDOUBLE    0x000D0000
193 | #define BASETYPE_STRUCT     0x000E0000
194 | #define BASETYPE_UNION      0x000F0000
195 | #define BASETYPE_ENUM       0x00100000
196 | #define BASETYPE_UDEF       0x00110000
197 | #define BASETYPE_VOID       0x00120000
198 | #define BASETYPE_BITFIELD   0x00130000
199 | #define BASETYPE_GET(decl_prop) (decl_prop & BASETYPE_MASK)
200 | // Better write setters as functions, not macros to avoid evaluating arguments multiple times
201 | inline static void BASETYPE_SET(token_t *token, decl_prop_t basetype) {
202 |   token->decl_prop &= ~BASETYPE_MASK; \
203 |   token->decl_prop |= ((basetype) & BASETYPE_MASK);
204 | }
205 | 
206 | #define BASETYPE_INDEX(decl_prop) ((decl_prop) >> 16)   // Returns the index into the integer size table
207 | #define BASETYPE_FROMINDEX(index) ((decl_prop_t)index << 16)
208 | // The following are used by type nodes to specify the derivation operation
209 | #define TYPE_OP_NONE           0x00000000
210 | #define TYPE_OP_DEREF          0x01000000
211 | #define TYPE_OP_ARRAY_SUB      0x02000000
212 | #define TYPE_OP_FUNC_CALL      0x03000000
213 | #define TYPE_OP_BITFIELD       0x04000000
214 | #define TYPE_OP_MASK           0xFF000000
215 | #define TYPE_OP_GET(decl_prop) (decl_prop & TYPE_OP_MASK)
216 | 
217 | #define TYPE_EMPTY_BODY        0x01000000 // Struct or union has body but it is empty; Valid only with token T_STRUCT, T_UNION
218 | 
219 | typedef struct {
220 |   stack_t *udef_types;       // Auto detected when lexing T_IDENT
221 |   token_t *pb_head;          // Pushback token head (removing end)
222 |   token_t *pb_tail;          // Pushback token tail (inserting end)
223 |   int pb_count;              // Number of pushbacks
224 |   char *s;                   // Current read position
225 |   char *begin;               // Begin of the current text (set once never changes)
226 | } token_cxt_t;
227 | 
228 | typedef enum {
229 |   ASSOC_LR, ASSOC_RL,
230 | } assoc_t;
231 | 
232 | extern const char *keywords[32];
233 | extern uint32_t kwd_props[32];
234 | extern int precedences[51];
235 | 
236 | // Note that both bounds are inclusive because there must be no gap in the exp token enum
237 | inline static int token_is_assign(token_t *token) { 
238 |   return token->type >= EXP_ASSIGN_BEGIN && token->type <= EXP_ASSIGN_END; 
239 | }
240 | 
241 | token_cxt_t *token_cxt_init(char *input);
242 | void token_cxt_reinit(token_cxt_t *cxt, char *input); // Change input stream
243 | void token_cxt_free(token_cxt_t *cxt);
244 | void token_enter_scope(token_cxt_t *cxt);
245 | void token_exit_scope(token_cxt_t *cxt);
246 | void token_add_utype(token_cxt_t *cxt, token_t *token);
247 | int token_isutype(token_cxt_t *cxt, token_t *token);
248 | int token_decl_compatible(token_t *dest, token_t *src);
249 | int token_decl_apply(token_t *dest, token_t *src);
250 | char *token_decl_print(decl_prop_t decl_prop);
251 | void token_get_property(token_type_t type, int *preced, assoc_t *assoc);
252 | int token_get_num_operand(token_type_t type);
253 | token_type_t token_get_keyword_type(const char *s);
254 | const char *token_typestr(token_type_t type);
255 | const char *token_symstr(token_type_t type);
256 | char *token_get_op(char *s, token_t *token);
257 | void token_copy_literal(token_t *token, const char *begin, const char *end);
258 | void token_free(token_t *token);
259 | token_t *token_alloc();
260 | token_t *token_alloc_type(token_type_t type);
261 | token_t *token_get_empty();
262 | char *token_get_ident(token_cxt_t *cxt, char *s, token_t *token);
263 | char *token_get_int(char *s, token_t *token);
264 | char *token_get_str(char *s, token_t *token, char closing);
265 | token_t *token_get_next_ignore_lookahead(token_cxt_t *cxt);
266 | token_t *token_get_next(token_cxt_t *cxt);
267 | int token_consume_type(token_cxt_t *cxt, token_type_t type);
268 | void token_pushback(token_cxt_t *cxt, token_t *token);
269 | token_t *token_lookahead(token_cxt_t *cxt, int count);
270 | token_t *token_lookahead_notnull(token_cxt_t *cxt, int count);
271 | 
272 | #endif


--------------------------------------------------------------------------------
/src/x86/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.lib
3 | x86-test
4 | 


--------------------------------------------------------------------------------
/src/x86/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CFLAGS=-O0 -g -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unused-variable
 3 | LDFLAGS=
 4 | 
 5 | all: x86-test
 6 | 
 7 | x86-test: x86.o x86-test.c
 8 | 	gcc x86-test.c x86.o -o x86-test $(CFLAGS) $(LDFLAGS)
 9 | 
10 | x86.o: x86.c x86.h
11 | 	gcc x86.c -c -o x86.o $(CFLAGS) $(LDFLAGS)
12 | 


--------------------------------------------------------------------------------
/src/x86/README.md:
--------------------------------------------------------------------------------
1 | 
2 | x86
3 | ===
4 | 
5 | This repo implements an abstracted interface to the x86 instruction set.
6 | It can be independently compiled and tested.


--------------------------------------------------------------------------------
/src/x86/todo.txt:
--------------------------------------------------------------------------------
1 | 
2 | In the test, assign each instruction an address
3 |  - Let jump/loop rel8 instruction print absolute address computed from rel8
4 | 
5 | Extend to 32-bit          


--------------------------------------------------------------------------------
/src/x86/x86.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _X86_H
  3 | #define _X86_H
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <ctype.h>
  8 | #include <stdint.h>
  9 | #include <limits.h>
 10 | #include <string.h>
 11 | #include <error.h>
 12 | #include <unistd.h>
 13 | #include <sys/stat.h>
 14 | #include <sys/types.h>
 15 | #include <fcntl.h>
 16 | #include <time.h>
 17 | #include <xmmintrin.h>
 18 | #include <emmintrin.h>
 19 | #include <immintrin.h>
 20 | #include <assert.h>
 21 | 
 22 | //* util
 23 | 
 24 | // Error reporting and system call assertion
 25 | #define SYSEXPECT(expr) do { if(!(expr)) { perror(__func__); assert(0); exit(1); } } while(0)
 26 | #define error_exit(fmt, ...) do { fprintf(stderr, "%s error: " fmt, __func__, ##__VA_ARGS__); assert(0); exit(1); } while(0);
 27 | #ifndef NDEBUG
 28 | #define dbg_printf(fmt, ...) do { fprintf(stderr, fmt, ##__VA_ARGS__); } while(0);
 29 | #else
 30 | #define dbg_printf(fmt, ...) do {} while(0);
 31 | #endif
 32 | 
 33 | #define warn_printf(fmt, ...) do { fprintf(stdout, "Warning: " fmt, ##__VA_ARGS__); } while(0);
 34 | 
 35 | // Branching macro (this may have already been defined in other source files)
 36 | #ifndef likely
 37 | #define likely(x)       __builtin_expect((x),1)
 38 | #endif
 39 | #ifndef unlikely
 40 | #define unlikely(x)     __builtin_expect((x),0)
 41 | #endif
 42 | 
 43 | // Testing function print name and pass
 44 | #define TEST_BEGIN() do { printf("========== %s ==========\n", __func__); } while(0);
 45 | #define TEST_PASS() do { printf("Pass!\n"); } while(0);
 46 | 
 47 | // String functions
 48 | inline static char *strclone(const char *s) {
 49 |   int len = strlen(s);
 50 |   char *ret = (char *)malloc(len + 1);
 51 |   SYSEXPECT(ret != NULL);
 52 |   strcpy(ret, s);
 53 |   return ret;
 54 | }
 55 | 
 56 | //* Prefix (raw value in instructions)
 57 | 
 58 | #define PREFIX_REP    0xf3
 59 | #define PREFIX_REPE   PREFIX_REP
 60 | #define PREFIX_REPZ   PREFIX_REP
 61 | 
 62 | #define PREFIX_REPNE  0xf2
 63 | #define PREFIX_REPNZ  PREFIX_REPNE
 64 | 
 65 | // Segment override
 66 | #define PREFIX_CS     0x2e
 67 | #define PREFIX_DS     0x3e
 68 | #define PREFIX_ES     0x26
 69 | #define PREFIX_SS     0x36
 70 | 
 71 | #define PREFIX_LOCK   0xf0
 72 | 
 73 | // Used for MMX instruction
 74 | #define PREFIX_MMX_MASK 0xf0f036263e2ef2f3UL
 75 | // Covert prefix to a flag; Returns FLAG_NONE if not a prefix
 76 | uint32_t prefix_to_flag_mmx(uint8_t byte);
 77 | uint32_t prefix_to_flag_scalar(uint8_t byte);
 78 | 
 79 | #ifdef ENABLE_MMX
 80 | #define prefix_to_flag prefix_to_flag_mmx
 81 | #else 
 82 | #define prefix_to_flag prefix_to_flag_scalar
 83 | #endif
 84 | 
 85 | //* Global control
 86 | 
 87 | #define ENABLE_MMX
 88 | 
 89 | typedef struct {
 90 |   int warn_repeated_prefix; // Whether to warn repeated prefix bytes
 91 | } global_t;
 92 | 
 93 | extern global_t global;
 94 | 
 95 | //* Prefix flags
 96 | 
 97 | #define FLAG_NONE     0x00000000
 98 | #define FLAG_REP      0x00000001
 99 | #define FLAG_REPE     FLAG_REP
100 | #define FLAG_REPZ     FLAG_REP
101 | 
102 | #define FLAG_REPNE    0x00000002
103 | #define FLAG_REPNZ    FLAG_REPNE
104 | 
105 | #define FLAG_CS       0x00000004
106 | #define FLAG_DS       0x00000008
107 | #define FLAG_ES       0x00000010
108 | #define FLAG_SS       0x00000020
109 | 
110 | #define FLAG_LOCK     0x00000040
111 | 
112 | // D flag in the opcode byte
113 | #define FLAG_D        0x00000080
114 | // W flag in the opcode byte
115 | #define FLAG_W        0x00000100
116 | // Whether call/jmp is far
117 | #define FLAG_FAR      0x00000200
118 | 
119 | //* Register constants
120 | 
121 | enum {
122 |   REG_NONE = 0,
123 |   REG_BEGIN,
124 |   REG_GEN_BEGIN = REG_BEGIN,
125 |   REG_GEN_16_BEGIN = REG_BEGIN,
126 |   REG_AX = REG_GEN_16_BEGIN,
127 |   REG_BX,
128 |   REG_CX,
129 |   REG_DX,
130 |   REG_SI,
131 |   REG_DI,
132 |   REG_BP,
133 |   REG_SP,
134 |   REG_GEN_16_END,
135 |   // 32-bit register
136 |   REG_GEN_32_BEGIN = REG_GEN_16_END,
137 |   REG_EAX = REG_GEN_32_BEGIN,
138 |   REG_EBX,
139 |   REG_ECX,
140 |   REG_EDX,
141 |   REG_ESI,
142 |   REG_EDI,
143 |   REG_EBP,
144 |   REG_ESP,
145 |   REG_GEN_32_END,
146 |   // 8-bit register
147 |   REG_GEN_8_BEGIN = REG_GEN_32_END,
148 |   REG_AH = REG_GEN_8_BEGIN,
149 |   REG_AL,
150 |   REG_BH,
151 |   REG_BL,
152 |   REG_CH,
153 |   REG_CL,
154 |   REG_DH,
155 |   REG_DL,
156 |   REG_GEN_8_END,
157 |   REG_GEN_END = REG_GEN_8_END,
158 |   // Segment register
159 |   REG_SEG_BEGIN,
160 |   REG_CS,
161 |   REG_DS,
162 |   REG_ES,
163 |   REG_SS,
164 |   REG_SEG_END,
165 |   REG_END,
166 |   // Non-general purpose registers
167 |   REG_IP,
168 |   REG_FLAGS,
169 | };
170 | 
171 | extern const char *reg_names[];
172 | 
173 | //* R/M Tables
174 | 
175 | // Maps REG field to register name, word size (w = 0)
176 | extern const int gen_reg_16_table[8];
177 | extern const int gen_reg_8_table[8];
178 | extern const int seg_reg_table[4];
179 | 
180 | // Register pair for R/M addressing
181 | typedef struct {
182 |   int reg1;
183 |   int reg2;
184 | } addr_mode_reg_t;
185 | 
186 | // Mode = 00/01/10
187 | extern const addr_mode_reg_t addr_mode_reg_table_16[8];
188 | extern const addr_mode_reg_t addr_mode_reg_table_32[8];
189 | 
190 | #define ADDR_MODE_MEM_REG_ONLY     0
191 | #define ADDR_MODE_MEM_REG_DISP_8   1
192 | #define ADDR_MODE_MEM_REG_DISP_16  2
193 | #define ADDR_MODE_MEM_REG_DISP_32  3
194 | // This will cause the addr_mode object be not initialized
195 | #define ADDR_MODE_REG              4
196 | // Direct addr mode; This is not in the raw instruction; Same for 16 and 32 bit
197 | #define ADDR_MODE_MEM_DIRECT       5
198 | 
199 | // Addressing mode for memory operands
200 | typedef struct {
201 |   int addr_mode;         // ADDR_MODE_ macros
202 |   addr_mode_reg_t regs;  // Register for addressing (one or two)
203 |   union {
204 |     uint8_t disp_8;
205 |     uint16_t disp_16;
206 |     uint32_t disp_32;
207 |     uint16_t direct_addr_16; // Direct addressing mode uses this
208 |     uint32_t direct_addr_32; // Direct addressing mode uses this
209 |   };
210 | } addr_mode_t;
211 | 
212 | // Prints memory operand (ADDR_MODE_REG will not be printed because its encoding is not stored)
213 | void addr_mode_fprint(addr_mode_t *addr_mode, uint32_t flags, FILE *fp);
214 | // Generate the mode r/m byte and the following displacement bits
215 | inline static uint8_t *addr_mode_gen(uint8_t mode, uint8_t reg, uint8_t rm, uint8_t *data) {
216 |   assert(mode <= 3);
217 |   assert(reg <= 7);
218 |   assert(rm <= 7);
219 |   data[0] = (mode << 6) | (reg << 3) | (rm);
220 |   return data + 1;
221 | }
222 | 
223 | // Operand type
224 | #define OPERAND_NONE       0
225 | #define OPERAND_REG        1
226 | #define OPERAND_MEM        2
227 | #define OPERAND_IMM_8      3
228 | #define OPERAND_IMM_16     4
229 | #define OPERAND_REL_8      5
230 | #define OPERAND_REL_16     6
231 | #define OPERAND_FARPTR     7
232 | // The operand is a const value 1, which is not stored
233 | #define OPERAND_IMPLIED_1  8
234 | 
235 | typedef struct {
236 |   uint16_t offset;
237 |   uint16_t seg;
238 | } farptr_t;
239 | 
240 | // An operand can be either register or memory, which is encoded by addr_node_t
241 | typedef struct {
242 |   int operand_mode;
243 |   union {
244 |     int reg;             // Operand is in one of the registers (size implied by register width)
245 |     addr_mode_t mem;     // Operand is in memory (size given by W flag)
246 |     uint16_t imm_16;     // 16 bit immediate value
247 |     uint8_t imm_8;       // 8 bit immediate value
248 |     uint16_t rel_16;     // 16 bit relative
249 |     uint16_t rel_8;      // 8 bit relative
250 |     farptr_t farptr;     // seg:offset full address (32-bit operand)
251 |   };
252 | } operand_t; 
253 | 
254 | inline static void *ptr_add_16(void *p) { return (void *)((uint16_t *)p + 1); }
255 | inline static void *ptr_add_8(void *p) { return (void *)((uint8_t *)p + 1); }
256 | inline static uint16_t ptr_load_16(void *p) { return *(uint16_t *)p; }
257 | inline static uint8_t ptr_load_8(void *p) { return *(uint8_t *)p; }
258 | 
259 | // Sets an operand as register. Register can be either general purpose or segment, but not IP or FLAGS
260 | inline static void operand_set_register(operand_t *operand, int reg) {
261 |   assert(reg >= REG_BEGIN && reg < REG_END);
262 |   operand->operand_mode = OPERAND_REG;
263 |   operand->reg = reg;
264 |   return;
265 | }
266 | 
267 | // Parse a 8-bit immediate value from the instruction stream
268 | inline static void *operand_set_imm_8(operand_t *operand, void *data) {
269 |   operand->operand_mode = OPERAND_IMM_8;
270 |   operand->imm_8 = ptr_load_8(data);
271 |   return ptr_add_8(data);
272 | }
273 | 
274 | inline static void *operand_set_imm_16(operand_t *operand, void *data) {
275 |   operand->operand_mode = OPERAND_IMM_16;
276 |   operand->imm_16 = ptr_load_16(data);
277 |   return ptr_add_16(data);
278 | }
279 | 
280 | inline static void *operand_set_rel_8(operand_t *operand, void *data) {
281 |   operand->operand_mode = OPERAND_REL_8;
282 |   operand->rel_8 = ptr_load_8(data);
283 |   return ptr_add_8(data);
284 | }
285 | 
286 | inline static void *operand_set_rel_16(operand_t *operand, void *data) {
287 |   operand->operand_mode = OPERAND_REL_16;
288 |   operand->rel_16 = ptr_load_16(data);
289 |   return ptr_add_16(data);
290 | }
291 | 
292 | inline static void operand_set_const_8(operand_t *operand, uint8_t value) {
293 |   operand->operand_mode = OPERAND_IMM_8;
294 |   operand->imm_8 = value;
295 |   return;
296 | }
297 | 
298 | inline static void operand_set_const_16(operand_t *operand, uint16_t value) {
299 |   operand->operand_mode = OPERAND_IMM_16;
300 |   operand->imm_16 = value;
301 |   return;
302 | }
303 | 
304 | inline static void operand_set_implied_one(operand_t *operand) {
305 |   operand->operand_mode = OPERAND_IMPLIED_1;
306 |   return;
307 | }
308 | 
309 | inline static void *operand_set_farptr(operand_t *operand, void *data) {
310 |   operand->operand_mode = OPERAND_FARPTR;
311 |   operand->farptr.offset = ptr_load_16(data);
312 |   data = ptr_add_16(data);
313 |   operand->farptr.seg = ptr_load_16(data);
314 |   return ptr_add_16(data);
315 | }
316 | 
317 | // Sets a direct memory operand
318 | // This is specifically used by mov 0xA0 - 0xA3
319 | inline static void *operand_set_mem_direct_addr_16(operand_t *operand, void *data) {
320 |   operand->operand_mode = OPERAND_MEM;
321 |   operand->mem.addr_mode = ADDR_MODE_MEM_DIRECT;
322 |   operand->mem.direct_addr_16 = ptr_load_16(data);
323 |   return ptr_add_16(data);
324 | }
325 | 
326 | inline static void *operand_set_mem_direct_addr_32(operand_t *operand, void *data) {
327 |   operand->operand_mode = OPERAND_MEM;
328 |   operand->mem.addr_mode = ADDR_MODE_MEM_DIRECT;
329 |   operand->mem.direct_addr_32 = ptr_load_32(data);
330 |   return ptr_add_32(data);
331 | }
332 | 
333 | // Given mode and r/m bits, set the operand
334 | void *parse_operand_mod_rm(operand_t *operand, int addr_mode, int flags, int rm, void *data);
335 | // Parsing 2 operands, must be either reg or mem
336 | void *parse_operand_2(operand_t *dest, operand_t *src, uint32_t flags, void *data);
337 | // Only parses mod + rm, returns REG
338 | void *parse_operand_1(operand_t *operand, uint32_t flags, int *reg, void *data);
339 | 
340 | void operand_fprint(operand_t *operand, uint32_t flags, FILE *fp);
341 | 
342 | // Instruction
343 | 
344 | enum {
345 |   OP_NOP = 0,
346 |   OP_ADD,
347 |   OP_PUSH,
348 |   OP_POP,
349 |   OP_OR,
350 |   OP_ADC,
351 |   OP_SBB,
352 |   OP_AND,
353 |   OP_DAA,
354 |   OP_SUB,
355 |   OP_DAS,
356 |   OP_XOR,
357 |   OP_AAA,
358 |   OP_CMP,
359 |   OP_AAS,
360 |   OP_INC,
361 |   OP_DEC,
362 |   // Jump short
363 |   OP_JO,
364 |   OP_JNO,
365 |   OP_JB,
366 |   OP_JNB,
367 |   OP_JZ,
368 |   OP_JNZ,
369 |   OP_JBE,
370 |   OP_JA,
371 |   OP_JS,
372 |   OP_JNS,
373 |   OP_JPE,
374 |   OP_JPO,
375 |   OP_JL,
376 |   OP_JGE,
377 |   OP_JLE,
378 |   OP_JG,
379 |   OP_TEST,
380 |   OP_XCHG,
381 |   OP_MOV,
382 |   OP_LEA,
383 |   OP_CBW,
384 |   OP_CWD,
385 |   OP_CALL,
386 |   OP_WAIT,
387 |   OP_PUSHF, 
388 |   OP_POPF, 
389 |   OP_SAHF,
390 |   OP_LAHF,
391 |   OP_MOVSB, 
392 |   OP_MOVSW,
393 |   OP_CMPSB,
394 |   OP_CMPSW,
395 |   OP_STOSB,
396 |   OP_STOSW,
397 |   OP_LODSB,
398 |   OP_LODSW,
399 |   OP_SCASB,
400 |   OP_SCASW,
401 |   OP_RET,
402 |   OP_LES,
403 |   OP_LDS,
404 |   OP_RETF,
405 |   OP_INT3,
406 |   OP_INT,
407 |   OP_INTO,
408 |   OP_IRET,
409 |   OP_ROL,
410 |   OP_ROR,
411 |   OP_RCL,
412 |   OP_RCR,
413 |   OP_SHL,
414 |   OP_SHR,
415 |   OP_SAR,
416 |   OP_AAM,
417 |   OP_AAD,
418 |   OP_XLAT,
419 |   OP_LOOPNZ,
420 |   OP_LOOPZ,
421 |   OP_LOOP,
422 |   OP_JCXZ,
423 |   OP_IN,
424 |   OP_OUT,
425 |   OP_JMP,
426 |   OP_HLT,
427 |   OP_CMC,
428 |   OP_NOT,
429 |   OP_NEG,
430 |   OP_MUL,
431 |   OP_IMUL,
432 |   OP_DIV,
433 |   OP_IDIV,
434 |   OP_CLC,
435 |   OP_STC,
436 |   OP_CLI,
437 |   OP_STI,
438 |   OP_CLD,
439 |   OP_STD,
440 | };
441 | 
442 | // Maps op macros (see above) to string names
443 | extern const char *op_names[];
444 | 
445 | typedef struct {
446 |   farptr_t addr;       // Address of the instruction
447 |   uint8_t opcode;      // This is the raw opcode byte includes D and W flag, i.e., it is the full 8 byte
448 |   uint8_t op;          // This is the abstract operation (OP_ class)
449 |   uint32_t flags;
450 |   uint8_t size;        // Number of bytes in the instruction
451 |   operand_t dest;
452 |   operand_t src;       // If there only one operand, the src is used
453 | } ins_t;
454 | 
455 | // Reads instructions from a file (used for debugging)
456 | typedef struct {
457 |   char *filename;              // File name
458 |   void *data;                  // Content of the file
459 |   void *end;                   // End pointer
460 |   void *ptr;                   // Current read position
461 |   int size;                    // File size (bytes)
462 |   uint16_t next_addr;          // Next address of the instruction
463 | } ins_reader_t;
464 | 
465 | ins_reader_t *ins_reader_init();
466 | void ins_reader_free(ins_reader_t *ins_reader);
467 | inline static int ins_reader_is_end(ins_reader_t *ins_reader) {
468 |   return ins_reader->ptr >= ins_reader->end;
469 | }
470 | inline static int ins_reader_is_exact_end(ins_reader_t *ins_reader) {
471 |   return ins_reader->ptr == ins_reader->end;
472 | }
473 | inline static void *ins_reader_get_curr_ptr(ins_reader_t *ins_reader) {
474 |   return ins_reader->ptr;
475 | }
476 | inline static uint16_t ins_reader_get_next_addr(ins_reader_t *ins_reader) {
477 |   return ins_reader->next_addr;
478 | }
479 | // The ins object is within the object
480 | void ins_reader_next(ins_reader_t *ins_reader, ins_t *ins);
481 | 
482 | inline static void print_ins_addr(ins_t *ins) {
483 |   fprintf(stderr, "Instruction at address %X:%X\n", ins->addr.seg, ins->addr.offset);
484 | }
485 | 
486 | // This is called at the beginning of an instruction
487 | void *parse_prefix(ins_t *ins, void *data);
488 | void *parse_opcode(ins_t *ins, void *data);
489 | 
490 | void *parse_alu_ins(ins_t *ins, int diff, int op, void *data);
491 | void *parse_ins_grp1(ins_t *ins, void *data);
492 | void *parse_ins_grp2(ins_t *ins, void *data);
493 | void *parse_ins_grp3(ins_t *ins, void *data);
494 | void *parse_ins_grp4(ins_t *ins, void *data);
495 | void *parse_ins_grp5(ins_t *ins, void *data);
496 | void *parse_ins(ins_t *ins, void *data);
497 | 
498 | void ins_rel_8_fprint(ins_t *ins, uint32_t next_addr, FILE *fp);
499 | void ins_rel_16_fprint(ins_t *ins, uint32_t next_addr, FILE *fp);
500 | void ins_fprint(ins_t *ins, uint32_t next_addr, FILE *fp);
501 | 
502 | #endif
503 | 


--------------------------------------------------------------------------------
/workspace.code-workspace:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"folders": [
 3 | 		{
 4 | 			"path": "."
 5 | 		}
 6 | 	],
 7 | 	"settings": {
 8 | 		"files.associations": {
 9 | 			"ast.h": "c",
10 | 			"hashtable.h": "c",
11 | 			"parse_decl.h": "c",
12 | 			"parse_exp.h": "c",
13 | 			"parse_stmt.h": "c",
14 | 			"bintree.h": "c",
15 | 			"error.h": "c",
16 | 			"list.h": "c",
17 | 			"eval.h": "c",
18 | 			"token.h": "c",
19 | 			"type.h": "c"
20 | 		}
21 | 	}
22 | }


--------------------------------------------------------------------------------