├── .gitignore ├── .vscode └── settings.json ├── README.md ├── src ├── .gitignore ├── .vscode │ └── settings.json ├── Makefile ├── ast.c ├── ast.h ├── bintree.c ├── bintree.h ├── cgen.c ├── cgen.h ├── env.c ├── env.h ├── error.c ├── error.h ├── eval.c ├── eval.h ├── hashtable.c ├── hashtable.h ├── list.c ├── list.h ├── old │ ├── allocator.cpp │ ├── allocator.h │ ├── common.h │ ├── context.cpp │ ├── context.h │ ├── lex.cpp │ ├── lex.h │ ├── scope.cpp │ ├── scope.h │ ├── syntax.cpp │ ├── syntax.h │ ├── token.cpp │ └── token.h ├── parse.c ├── parse.h ├── parse_comp.c ├── parse_comp.h ├── parse_decl.c ├── parse_decl.h ├── parse_exp.c ├── parse_exp.h ├── parse_stmt.c ├── parse_stmt.h ├── parse_test_src.txt ├── python │ ├── Makefile │ ├── ast.py │ ├── basic_type.py │ ├── common.py │ ├── krc-earley.syntax │ ├── krc-lr.syntax │ ├── krc.syntax │ ├── lex.py │ ├── lex_test.c │ ├── symbol_table.py │ ├── syntax.py │ ├── token_list.txt │ └── type.py ├── stack.c ├── stack.h ├── str.c ├── str.h ├── tests │ ├── test_cgen.c │ ├── test_eval.c │ ├── test_lex.c │ ├── test_parse.c │ └── test_type.c ├── todo.txt ├── token.c ├── token.h ├── type.c ├── type.h └── x86 │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── todo.txt │ ├── x86-test.c │ ├── x86.c │ └── x86.h └── workspace.code-workspace /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | *.smod 19 | 20 | # Compiled Static libraries 21 | *.lai 22 | *.la 23 | *.a 24 | *.lib 25 | 26 | # Executables 27 | *.exe 28 | *.out 29 | *.app 30 | 31 | *.log 32 | *.asm 33 | ./build/* 34 | ./bin/* 35 | bin 36 | build 37 | 38 | ## Ignore python byte code 39 | *.pyc 40 | *.table 41 | 42 | # Specific files 43 | test_parse.txt 44 | .vscode/* 45 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "ast.h": "c", 4 | "hashtable.h": "c", 5 | "parse_decl.h": "c", 6 | "parse_exp.h": "c", 7 | "parse_stmt.h": "c", 8 | "bintree.h": "c", 9 | "error.h": "c", 10 | "list.h": "c", 11 | "eval.h": "c", 12 | "token.h": "c", 13 | "type.h": "c" 14 | } 15 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CFront 2 | The goal of this project is to build a C compiler from the scratch without using any third-party code except standard C library. 3 | 4 | # Directory Structure 5 | [./src](https://github.com/wangziqi2013/CFront/tree/master/src) - Main source directory 6 | 7 | [./src/test](https://github.com/wangziqi2013/CFront/tree/master/src/test) - Unit tests and functional tests 8 | 9 | [./src/old](https://github.com/wangziqi2013/CFront/tree/master/src/old) - Deprecated code. Only for demonstration purposes. 10 | 11 | [./src/python](https://github.com/wangziqi2013/CFront/tree/master/src/python) - A LL(1)/LR(1)/LALR(1) compiler generator implemented in Python 12 | 13 | # Source File Description 14 | 15 | ## Main Files 16 | 17 | ./src/token.c: Implements lexical analysis and the token stream interface 18 | 19 | ./src/parse_exp.c: Implements parsing interface and expression parsing. The entire parser is based on expression parsing, which uses a hand-coded shift-reduce parser with operator precedence. 20 | 21 | ./src/parse_decl.c: Implements declaration parsing. It uses expression parsing to build declaration tree (in C language, declaration has exactly the same format as an expression). 22 | 23 | ./src/parse_comp.c: Implements composite type declaration parsing, including struct, union and enum. 24 | 25 | ./src/parse_stmt.c: Implements statement parsing. 26 | 27 | ./src/parse.c: Implements top-level (global declaration, definition and function definition) parsing. 28 | 29 | ./src/type.c: Implements the type system. 30 | 31 | ./src/eval.c: Implements compile-time evaluation support, including constant evaluation, atoi, string to binary, etc. 32 | 33 | ./src/cgen.c: Implements top-level code generation. 34 | 35 | ## Data Structure Files 36 | 37 | ./src/ast.c: Implements abstract syntax tree. We use left-child right-sibling organization for trees. 38 | 39 | ./src/str.c: Implements vector and string. 40 | 41 | ./src/hashtable.c: Implements hash table. We use hash table as symbol tables for scopes. 42 | 43 | ./src/bintree.c: Implements a simple binary search tree. We use binary search trees as indices for composite types. 44 | 45 | ./src/list.c: Implements singly linked list. 46 | 47 | ./src/stack.c: Implements a stack. We use stack to maintain scopes and to perform shift-reduce parsing. 48 | 49 | # Compile and Test 50 | To compile, enter ./src directory, and type `make all` or just `make`. This will build object files for each source file, and link them with the tests. 51 | 52 | To test, directly run binary under ./bin directory. Test source files are independent from each other (i.e. there is no mutral dependency), and should be rather straightforward to understand. 53 | 54 | # Contribution 55 | I only contribute to this project in my part-time. If you are interested in becoming a contributor feel free to drop me a message on Github. 56 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | bin/* 3 | obj/* 4 | *.d -------------------------------------------------------------------------------- /src/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "error.h": "c" 4 | } 5 | } -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC=gcc 3 | LD=ld 4 | CFLAGS=-O0 -g -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unused-variable 5 | PWD=$(CURDIR) 6 | TESTFLAGS=-I$(PWD) 7 | LDFLAGS= 8 | BIN=./bin 9 | 10 | SRCS=$(wildcard *.c) 11 | OBJS=$(SRCS:.c=.o) 12 | DEPS=$(SRCS:.c=.d) 13 | 14 | TEST_SRCS=$(wildcard ./tests/*.c) 15 | TEST_OBJS=$(patsubst ./tests/%.c,$(BIN)/%,$(TEST_SRCS)) 16 | 17 | ifeq ($(OPT), 1) 18 | CFLAGS=-O3 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unused-variable 19 | endif 20 | 21 | .phony: all tests line-count mem-test clean 22 | 23 | all: tests 24 | 25 | tests: $(TEST_OBJS) 26 | 27 | # Build rule for source files under the current directory, one object file per source file 28 | %.o: %.c 29 | $(CC) -MMD -MP -c $< -o $@ $(CFLAGS) $(LDFLAGS) $(TESTFLAGS) 30 | 31 | # Build rule for test source files under ./tests directory, one binary per test source file 32 | ./bin/%: ./tests/%.c $(OBJS) 33 | $(CC) $< $(OBJS) -o $@ $(CFLAGS) $(LDFLAGS) $(TESTFLAGS) 34 | 35 | # Include automatically generated dependency files for every source file 36 | -include $(DEPS) 37 | 38 | line-count: 39 | cloc --exclude-lang=Python ./ 40 | 41 | clean: 42 | rm -f *.o 43 | rm -f $(BIN)/* 44 | -------------------------------------------------------------------------------- /src/ast.c: -------------------------------------------------------------------------------- 1 | 2 | #include "ast.h" 3 | 4 | // Initialize a token to be an AST node. Return the node given to it 5 | token_t *ast_make_node(token_t *token) { 6 | token->child = token->sibling = token->parent = NULL; 7 | return token; 8 | } 9 | 10 | int ast_isleaf(token_t *token) { return token->child == NULL; } 11 | 12 | // Update the offset using the first non-NULL token in child list 13 | void ast_update_offset(token_t *token) { 14 | if(token->offset) return; 15 | token_t *child = token->child; 16 | while(child && !child->offset) child = child->sibling; 17 | if(child) token->offset = child->offset; 18 | } 19 | 20 | token_t *ast_append_child(token_t *token, token_t *child) { 21 | if(token->child == NULL) { 22 | token->child = child; 23 | } else { 24 | token_t *last = token->child; 25 | while(last->sibling != NULL) last = last->sibling; 26 | last->sibling = child; 27 | } 28 | child->sibling = NULL; 29 | child->parent = token; 30 | ast_update_offset(token); 31 | return token; 32 | } 33 | 34 | // Adds the node as the first child of the token 35 | token_t *ast_push_child(token_t *token, token_t *child) { 36 | child->sibling = token->child; 37 | token->child = child; 38 | child->parent = token; 39 | ast_update_offset(token); 40 | return token; 41 | } 42 | 43 | // Adds a node as a sibling after the given one, adding a child 44 | token_t *ast_insert_after(token_t *token, token_t *child) { 45 | child->sibling = token->sibling; 46 | token->sibling = child; 47 | child->parent = token->parent; 48 | ast_update_offset(token); 49 | return token; 50 | } 51 | 52 | // Remove from parent node. Assume there is a parent node. Returns the node itself 53 | token_t *ast_remove(token_t *token) { 54 | token_t *parent = token->parent; 55 | if(parent->child == token) parent->child = token->sibling; 56 | else { 57 | token_t *curr = parent->child; // Assumes that the tree is correctly formed, so curr will not be NULL 58 | while(curr->sibling != token) curr = curr->sibling; 59 | curr->sibling = token->sibling; 60 | } 61 | return token; 62 | } 63 | 64 | void ast_print(token_t *token) { ast_print_(token, 0); } 65 | 66 | void ast_print_(token_t *token, int depth) { 67 | for(int i = 0;i < depth * 2;i++) if(i % 2 == 0) printf("|"); else printf(" "); 68 | const char *symstr = token_symstr(token->type); 69 | printf("%04d:%04d:%s %s\n", 70 | token->type, 71 | token->offset ? error_get_offset(token->offset) : 0, 72 | token_typestr(token->type), 73 | token->type == T_BASETYPE ? token_decl_print(token->decl_prop) : 74 | (symstr == NULL ? (token->type >= T_LITERALS_BEGIN && token->type < T_LITERALS_END ? token->str : "") : symstr)); 75 | for(token_t *child = token->child;child != NULL; child = child->sibling) ast_print_(child, depth + 1); 76 | return; 77 | } 78 | 79 | // Releases memory for every node in the AST 80 | void ast_free(token_t *token) { 81 | while(token->child != NULL) { 82 | token_t *next = token->child->sibling; 83 | ast_free(token->child); 84 | token->child = next; 85 | } 86 | token_free(token); 87 | } 88 | 89 | int ast_child_count(token_t *token) { 90 | int count = 0; 91 | token_t *child = token->child; 92 | while(child) { 93 | count++; 94 | child = child->sibling; 95 | } 96 | return count; 97 | } 98 | 99 | // Get n-th child; Return NULL if index is larger than the number of children 100 | token_t *ast_getchild(token_t *token, int index) { 101 | assert(index >= 0 && token != NULL); 102 | token = token->child; 103 | while(token != NULL && index-- != 0) token = token->sibling; 104 | return token; 105 | } 106 | 107 | // Returns the last inserted node 108 | token_t *_ast_collect_funcarg(token_t *comma, token_t *token) { 109 | assert(ast_getchild(comma, 0) != NULL && ast_getchild(comma, 1) != NULL); 110 | token_t *child1 = comma->child, *child2 = child1->sibling; 111 | if(child1->type != EXP_COMMA) { 112 | ast_insert_after(token, child2); 113 | ast_insert_after(token, child1); 114 | token = child1; 115 | } else { 116 | ast_insert_after(token, child2); 117 | token = _ast_collect_funcarg(child1, token); 118 | } 119 | token_free(comma); 120 | return token; 121 | } 122 | 123 | // Transforms function argument from comma expression to flat structure 124 | // Three cases: argument-less func; one argument func (must not be comma exp) 125 | // and functions with >= 2 arguments 126 | void ast_collect_funcarg(token_t *token) { 127 | assert(token->type == EXP_FUNC_CALL); 128 | token_t *comma = ast_getchild(token, 1); 129 | if(comma == NULL || comma->type != EXP_COMMA) return; 130 | // The comma node has been freed. The function returns the last node inserted 131 | token->child->sibling = _ast_collect_funcarg(comma, comma); 132 | return; 133 | } 134 | 135 | // Transforms conditional expression from two 2-operand operators to 136 | // a signle cond operator 137 | void ast_movecond(token_t *token) { 138 | assert(token->type == EXP_COND); 139 | if(ast_getchild(token, 1)->type != EXP_COLON) 140 | error_row_col_exit(token->offset, "Operator \'?\' must be followed by operator \':\'\n"); 141 | token_t *colon = ast_getchild(token, 1), *child2 = ast_getchild(colon, 1); 142 | ast_append_child(token, colon->child); 143 | ast_append_child(token, child2); 144 | token->child->sibling = colon->child; 145 | token_free(colon); 146 | return; 147 | } 148 | 149 | // Returns a pointer to the first child of given type, or NULL 150 | token_t *ast_gettype(token_t *token, token_type_t type) { 151 | for(token = token->child;token && token->type != type;token = token->sibling); 152 | return token; 153 | } 154 | -------------------------------------------------------------------------------- /src/ast.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _AST_H 3 | #define _AST_H 4 | 5 | #include "token.h" 6 | 7 | token_t *ast_make_node(token_t *token); 8 | int ast_isleaf(token_t *token); 9 | void ast_update_offset(token_t *token); 10 | token_t *ast_append_child(token_t *token, token_t *child); 11 | token_t *ast_push_child(token_t *token, token_t *child); 12 | token_t *ast_insert_after(token_t *token, token_t *child); 13 | token_t *ast_remove(token_t *token); 14 | void ast_print(token_t *token); 15 | void ast_print_(token_t *token, int depth); 16 | void ast_free(token_t *token); 17 | int ast_child_count(token_t *token); 18 | token_t *ast_getchild(token_t *token, int index); 19 | void ast_collect_funcarg(token_t *token); 20 | void ast_movecond(token_t *token); 21 | token_t *ast_gettype(token_t *token, token_type_t type); 22 | 23 | #endif -------------------------------------------------------------------------------- /src/bintree.c: -------------------------------------------------------------------------------- 1 | 2 | #include "bintree.h" 3 | 4 | btnode_t *btnode_alloc(void *key, void *value) { 5 | btnode_t *node = (btnode_t *)malloc(sizeof(btnode_t)); 6 | SYSEXPECT(node != NULL); 7 | node->key = key, node->value = value; 8 | node->left = node->right = NULL; 9 | return node; 10 | } 11 | void btnode_free(btnode_t *node) { free(node); } 12 | 13 | bintree_t *bt_init(cmp_cb_t cmp) { 14 | bintree_t *bt = (bintree_t *)malloc(sizeof(bintree_t)); 15 | SYSEXPECT(bt != NULL); 16 | bt->cmp = cmp; 17 | bt->root = NULL; 18 | bt->size = 0; 19 | return bt; 20 | } 21 | void bt_free(bintree_t *bt) { _bt_free(bt->root); free(bt); } 22 | void _bt_free(btnode_t *node) { 23 | if(node == NULL) return; 24 | _bt_free(node->left); 25 | _bt_free(node->right); 26 | btnode_free(node); 27 | return; 28 | } 29 | bintree_t *bt_str_init() { return bt_init(strcmp_cb); } 30 | 31 | int bt_size(bintree_t *bt) { return bt->size; } 32 | 33 | // Insert the key, or return an existing key 34 | void *bt_insert(bintree_t *bt, void *key, void *value) { 35 | btnode_t *found = NULL; // Set to new node if inserted, otherwise set to 36 | bt->root = _bt_insert(bt, bt->root, key, value, &found); 37 | return found->value; 38 | } 39 | btnode_t *_bt_insert(bintree_t *bt, btnode_t *node, void *key, void *value, btnode_t **found) { 40 | if(node == NULL) { bt->size++; *found = btnode_alloc(key, value); return *found; } // Creates a new node 41 | int cmp = bt->cmp(key, node->key); 42 | if(cmp == 0) *found = node; 43 | else if(cmp < 0) node->left = _bt_insert(bt, node->left, key, value, found); 44 | else node->right = _bt_insert(bt, node->right, key, value, found); 45 | return node; 46 | } 47 | 48 | // Return BT_NOTFOUND if not found, otherwise return the value 49 | void *bt_find(bintree_t *bt, void *key) { return _bt_find(bt, bt->root, key); } 50 | void *_bt_find(bintree_t *bt, btnode_t *node, void *key) { 51 | if(node == NULL) return BT_NOTFOUND; 52 | int cmp = bt->cmp(key, node->key); 53 | if(cmp == 0) return node->value; 54 | else if(cmp < 0) return _bt_find(bt, node->left, key); 55 | else return _bt_find(bt, node->right, key); 56 | } 57 | 58 | // Removes the given key, and returns the value if the key exists; otherwise return BT_NOTFOUND 59 | void *bt_remove(bintree_t *bt, void *key) { 60 | void *found = BT_NOTFOUND; 61 | bt->root = _bt_remove(bt, bt->root, key, &found); 62 | return found; 63 | } 64 | 65 | // Returns the child after performing remove 66 | void *_bt_remove(bintree_t *bt, btnode_t *node, void *key, void **found) { 67 | if(node == NULL) { *found = BT_NOTFOUND; return NULL; } 68 | int cmp = bt->cmp(key, node->key); 69 | if(cmp == 0) { *found = node->value; bt->size--; return _bt_remove_node(bt, node); } 70 | else if(cmp < 0) node->left = _bt_remove(bt, node->left, key, found); 71 | else node->right = _bt_remove(bt, node->right, key, found); 72 | return node; 73 | } 74 | 75 | // Internal function only called by bt_remove() 76 | void *_bt_remove_node(bintree_t *bt, btnode_t *node) { 77 | btnode_t *left = node->left, *right = node->right; 78 | if(left == NULL) { btnode_free(node); return right; } // This also covers the leaf node case 79 | else if(right == NULL) { btnode_free(node); return left; } 80 | if(right->left == NULL) { 81 | btnode_free(node); 82 | right->left = left; 83 | return right; 84 | } 85 | do { left = right; right = right->left; } while(right->left); 86 | node->key = right->key; node->value = right->value; 87 | left->left = right->right; 88 | btnode_free(right); 89 | return node; 90 | } -------------------------------------------------------------------------------- /src/bintree.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _BIN_TREE_H 3 | #define _BIN_TREE_H 4 | 5 | #include "hashtable.h" // Need its def for call back functions 6 | 7 | #define BT_NOTFOUND ((void *)-1) 8 | 9 | // Binary tree node type 10 | typedef struct btnode { 11 | void *key, *value; 12 | struct btnode *left, *right; 13 | } btnode_t; 14 | 15 | // The good thing about a binary tree search structure is that the physical size 16 | // grows proportionally with the logical size, which is desirable for structures 17 | // that are usually small, but sometimes huge 18 | typedef struct { 19 | int size; 20 | cmp_cb_t cmp; 21 | btnode_t *root; 22 | } bintree_t; 23 | 24 | btnode_t *btnode_alloc(void *key, void *value); 25 | void btnode_free(btnode_t *node); 26 | bintree_t *bt_init(cmp_cb_t cmp); 27 | void bt_free(bintree_t *bt); 28 | void _bt_free(btnode_t *node); 29 | bintree_t *bt_str_init(); 30 | int bt_size(bintree_t *bt); 31 | void *bt_insert(bintree_t *bt, void *key, void *value); 32 | btnode_t *_bt_insert(bintree_t *bt, btnode_t *node, void *key, void *value, btnode_t **found); 33 | void *bt_find(bintree_t *bt, void *key); 34 | void *_bt_find(bintree_t *bt, btnode_t *node, void *key); 35 | void *bt_remove(bintree_t *bt, void *key); 36 | void *_bt_remove(bintree_t *bt, btnode_t *node, void *key, void **found); 37 | void *_bt_remove_node(bintree_t *bt, btnode_t *node); 38 | 39 | #endif -------------------------------------------------------------------------------- /src/cgen.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _CGEN_H 3 | #define _CGEN_H 4 | 5 | #include "ast.h" 6 | #include "type.h" 7 | 8 | #define CGEN_GDATA_PADDING 8 // To avoid allocating a zero byte object on the heap 9 | 10 | #define CGEN_ARRAY_DEF 0 11 | #define CGEN_ARRAY_DECL 1 12 | 13 | #define CGEN_RELOC_CODE 0 14 | #define CGEN_RELOC_DATA 1 15 | 16 | typedef struct { 17 | type_cxt_t *type_cxt; // Owns memory; will automatically init and free 18 | list_t *import_list; // Externally declared variable, function or array - only valid import is pending is 1 19 | list_t *export_list; // Non-statically declared global variable, function or array 20 | list_t *gdata_list; // A list of global data, i.e. actual storage 21 | int64_t gdata_offset; // Next global data offset 22 | list_t *reloc_list; // A list of cgen_reloc_t *; Owns memory 23 | } cgen_cxt_t; 24 | 25 | // A relocation entry provides info for converting relative reference (starting at address 0) 26 | // into absolute address when the binary is loaded into memory 27 | typedef struct { 28 | int from, to; // CGEN_RELOC_ series 29 | int64_t offset; // The offset to be modified during relocation 30 | size_t size; // Number of bytes 31 | } cgen_reloc_t; 32 | extern const char *cgen_reloc_name[]; 33 | 34 | // Global data container 35 | typedef struct cgen_data_struct_t { 36 | uint8_t *data; // Actual data; NULL means uninitialized 37 | type_t *type; // Type of the global data, which also contains the size 38 | int64_t offset; // Offset relative to the beginning of data segment 39 | } cgen_gdata_t; 40 | 41 | void cgen_typed_print(type_t *type, void *data); 42 | void cgen_print_cxt(cgen_cxt_t *cxt); 43 | 44 | cgen_cxt_t *cgen_init(); 45 | void cgen_free(cgen_cxt_t *cxt); 46 | 47 | cgen_gdata_t *cgen_gdata_init(cgen_cxt_t *cxt, type_t *type); 48 | void cgen_gdata_free(cgen_gdata_t *gdata); 49 | cgen_reloc_t *cgen_reloc_init(cgen_cxt_t *cxt); 50 | void cgen_reloc_free(cgen_reloc_t *reloc); 51 | 52 | void cgen_resolve_extern(cgen_cxt_t *cxt, value_t *value); 53 | cgen_gdata_t *cgen_init_comp(cgen_cxt_t *cxt, type_t *type, token_t *token); 54 | int64_t cgen_init_comp_(cgen_cxt_t *cxt, type_t *type, token_t *token, cgen_gdata_t *gdata, int64_t offset); 55 | cgen_gdata_t *cgen_init_array(cgen_cxt_t *cxt, type_t *type, token_t *token); 56 | int64_t cgen_init_array_(cgen_cxt_t *cxt, type_t *type, token_t *token, cgen_gdata_t *gdata, int64_t offset); 57 | cgen_gdata_t *cgen_init_value(cgen_cxt_t *cxt, type_t *type, token_t *token); 58 | int64_t cgen_init_value_(cgen_cxt_t *cxt, type_t *type, token_t *token, cgen_gdata_t *gdata, int64_t offset); 59 | 60 | void cgen_resolve_array_size(type_t *decl_type, type_t *def_type, token_t *init, int both_decl); 61 | void cgen_global_decl(cgen_cxt_t *cxt, type_t *type, token_t *basetype, token_t *decl, token_t *init); 62 | void cgen_global_def(cgen_cxt_t *cxt, type_t *type, token_t *basetype, token_t *decl, token_t *init); 63 | void cgen_global_func(cgen_cxt_t *cxt, token_t *func); 64 | void cgen_global(cgen_cxt_t *cxt, token_t *global_decl); 65 | void cgen(cgen_cxt_t *cxt, token_t *root); 66 | 67 | #endif -------------------------------------------------------------------------------- /src/env.c: -------------------------------------------------------------------------------- 1 | 2 | #include "env.h" 3 | 4 | // This function initializes inclusion path from multiple sources 5 | void env_init_include_path(env_t *env) { 6 | int count = 0; 7 | // Read environmental variable; These paths are inserted into the beginning of the list, i.e., 8 | // they will override all other paths 9 | char *env_path = getenv("C_INCLUDE_PATH"); 10 | if(env_path != NULL) { 11 | // Parse this string as a ":" separated path variable 12 | char *p = env_path; 13 | while(1) { 14 | char *q = p; 15 | if(*q == '\0') { 16 | break; 17 | } 18 | // Stop at ':' or '\0' 19 | while(*q != ':' && *q != '\0') { 20 | q++; 21 | } 22 | if(*q == '\0') { 23 | break; 24 | } 25 | int size = q - p; 26 | if(size != 0) { 27 | char *path = (char *)malloc(sizeof(q - p) + 1); 28 | SYSEXPECT(path != NULL); 29 | memcpy(path, p, size); 30 | path[size] = '\0'; 31 | list_insertat(env->include_paths, path, path, count); 32 | count++; 33 | } 34 | p = q + 1; 35 | } 36 | } 37 | return; 38 | } 39 | 40 | env_t *env_init() { 41 | env_t *env = (env_t *)malloc(sizeof(env_t)); 42 | SYSEXPECT(env != NULL); 43 | memset(env, 0x00, sizeof(env_t)); 44 | env->include_paths = list_init(); 45 | return env; 46 | } 47 | 48 | void env_free(env_t *env) { 49 | do { 50 | listnode_t *node = list_head(env->include_paths); 51 | while(node != NULL) { 52 | free(node->key); 53 | node = list_next(node); 54 | } 55 | list_free(env->include_paths); 56 | } while(0); 57 | free(env); 58 | return; 59 | } 60 | -------------------------------------------------------------------------------- /src/env.h: -------------------------------------------------------------------------------- 1 | 2 | // This file implements global environmental variables 3 | 4 | #ifndef _CFRONT_ENV_H 5 | #define _CFRONT_ENV_H 6 | 7 | #include "hashtable.h" 8 | #include "list.h" 9 | 10 | typedef struct { 11 | // Search path for included files 12 | list_t *include_paths; 13 | } env_t; 14 | 15 | void env_init_include_path(env_t *env); 16 | 17 | env_t *env_init(); 18 | void env_free(env_t *env); 19 | 20 | #endif -------------------------------------------------------------------------------- /src/error.c: -------------------------------------------------------------------------------- 1 | 2 | #include "error.h" 3 | 4 | // This global pointer holds the begin of the text. We use this pointer and 5 | // a given pointer to compute the line and column number 6 | static const char *begin = NULL; 7 | static int inited = 0; 8 | // Whether test mode is on. Under test mode, error reporting functions calls 9 | // longjmp to jump to a previously set location 10 | static int testmode = 0; 11 | 12 | jmp_buf env; 13 | 14 | // This must be called in order for line number to work 15 | void error_init(const char *s) { 16 | begin = s; 17 | inited = 1; 18 | return; 19 | } 20 | 21 | void error_free() { 22 | inited = 0; 23 | return; 24 | } 25 | 26 | void error_testmode(int mode) { 27 | testmode = mode; 28 | return; 29 | } 30 | 31 | void error_exit_or_jump(int need_exit) { 32 | if(testmode != 0) { 33 | fprintf(stderr, "*** %s are redirected ***\n", need_exit ? "Errors" : "Warnings"); 34 | longjmp(env, 1); 35 | } else if(need_exit) { 36 | #ifndef NDEBUG 37 | assert(0); 38 | #else 39 | exit(ERROR_CODE_EXIT); 40 | #endif 41 | } 42 | return; 43 | } 44 | 45 | // Returns the row and column of a given pointer 46 | // Note: 47 | // 1. If error is not initialized then row and col will be set to -1 48 | // 2. If the pointer is not in the string registered during initialization 49 | // then row and col will be set to -2 50 | void error_get_row_col(const char *s, int *row, int *col) { 51 | if(inited == 0) { 52 | *row = *col = -1; 53 | } else { 54 | *row = *col = 1; 55 | const char *p; 56 | const char *line_head = begin; // Track the beginning of the line 57 | for(p = begin; p != s && *p != '\0';p++) { 58 | if(*p == '\n') { 59 | (*row)++; 60 | *col = 1; 61 | line_head = p + 1; 62 | } else { 63 | (*col)++; 64 | } 65 | } 66 | if(*p == '\0' && p != s) { // if p == s then still valid 67 | *row = *col = -2; 68 | fprintf(stderr, "Did you forget to register a new pointer with error module?\n"); 69 | } else { 70 | // Print from line head to next line 71 | printf("----\n"); 72 | while(*line_head != '\n' && *line_head != '\0') { 73 | putchar(*line_head++); 74 | } 75 | putchar('\n'); 76 | for(int i = 0;i < *col - 1;i++) { 77 | putchar(' '); 78 | } 79 | printf("^\n"); 80 | printf("----\n"); 81 | } 82 | } 83 | return; 84 | } 85 | 86 | void syserror(const char *prompt) { 87 | fputs(prompt, stderr); 88 | exit(ERROR_CODE_EXIT); 89 | } 90 | 91 | int error_get_offset(const char *offset) { 92 | return offset - begin + 1; // Begin with column 1 93 | } 94 | -------------------------------------------------------------------------------- /src/error.h: -------------------------------------------------------------------------------- 1 | 2 | // Note that this might be a common name, so we make it longer to avoid conflict 3 | #ifndef _ERROR_H_CFRONT 4 | #define _ERROR_H_CFRONT 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | extern jmp_buf env; 12 | 13 | #define ERROR_CODE_EXIT 1 14 | // Input to function error_exit_or_jump() 15 | #define ERROR_ACTION_CONT 0 16 | #define ERROR_ACTION_EXIT 1 17 | #define error_exit(fmt, ...) do { fprintf(stderr, "Error: " fmt, ##__VA_ARGS__); error_exit_or_jump(ERROR_ACTION_EXIT); } while(0); 18 | #define error_row_col_exit(s, fmt, ...) do { \ 19 | int row, col; error_get_row_col(s, &row, &col); \ 20 | fprintf(stderr, "Error (row %d col %d): " fmt, row, col, ##__VA_ARGS__); \ 21 | error_exit_or_jump(ERROR_ACTION_EXIT); } while(0); 22 | #define warn_row_col_exit(s, fmt, ...) do { \ 23 | int row, col; error_get_row_col(s, &row, &col); \ 24 | fprintf(stderr, "Warning (row %d col %d): " fmt, row, col, ##__VA_ARGS__); \ 25 | error_exit_or_jump(ERROR_ACTION_CONT); } while(0); 26 | 27 | // The following two macros are used for testing. It redirects the control flow back to the testing function 28 | // if an error occurs. The testing function should set testmode to 1. 29 | // Usage: if(error_trycatch()) { ...code goes here } else { ... error happens } ... error did not happen 30 | #define error_trycatch() (setjmp(env) == ERROR_FIRSTTIME) 31 | #define ERROR_FIRSTTIME 0 32 | 33 | #define SYSEXPECT(expr) do { if(!(expr)) syserror(__func__); } while(0) // Assertion for system calls; Valid under all modes 34 | 35 | void error_init(const char *s); 36 | void error_free(); 37 | void error_testmode(int mode); 38 | void error_exit_or_jump(int need_exit); 39 | void error_get_row_col(const char *s, int *row, int *col); 40 | void syserror(const char *prompt); 41 | 42 | int error_get_offset(const char *offset); // Returns integer offset 43 | 44 | #endif 45 | 46 | -------------------------------------------------------------------------------- /src/eval.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _EVAL_H 3 | #define _EVAL_H 4 | 5 | #include "token.h" 6 | #include "ast.h" 7 | #include "type.h" 8 | 9 | #define EVAL_MAX(a, b) (a > b ? a : b) 10 | #define EVAL_MIN(a, b) (a < b ? a : b) 11 | 12 | // Used as the parameter to eval_const_atoi() 13 | #define ATOI_NO_CHECK_END 0 14 | #define ATOI_CHECK_END 1 // Do not report error if there is still char after the int literal 15 | #define ATOI_NO_MAX_CHAR 0 // For \xhh \ooo we only eat 2 and 3 chars respectively 16 | 17 | #define EVAL_MAX_CONST_SIZE 8 // We only support evaluating constants smaller than this size 18 | 19 | uint64_t eval_int_masks[9]; 20 | 21 | uint64_t eval_const_get_mask(int size); 22 | uint64_t eval_const_get_sign_mask(int size); 23 | int eval_const_is_zero(value_t *value, int size); 24 | uint64_t eval_const_adjust_size(value_t *value, int to, int from, int is_signed); 25 | uint64_t eval_const_add(value_t *op1, value_t *op2, int size, int is_signed, int *overflow); 26 | uint64_t eval_const_sub(value_t *op1, value_t *op2, int size, int is_signed, int *overflow); 27 | uint64_t eval_const_mul(value_t *op1, value_t *op2, int size, int is_signed, int *overflow); 28 | uint64_t eval_const_div_mod(int is_div, value_t *op1, value_t *op2, int size, int is_signed, int *div_zero); 29 | uint64_t eval_const_shift(int is_left, value_t *op1, value_t *op2, int size, int is_signed, int *shift_overflow); 30 | int eval_const_cmp(token_type_t op, value_t *op1, value_t *op2, int size, int is_signed); 31 | uint64_t eval_const_bitwise(token_type_t op, value_t *op1, value_t *op2, int size); 32 | uint64_t eval_const_unary(token_type_t op, value_t *value, int size); 33 | 34 | char *eval_hex_char(char ch); 35 | str_t *eval_print_const_str(str_t *s); 36 | 37 | // Take a maximum bite and return the next to read 38 | char *eval_const_atoi_maxbite(char *s, int base, token_t *token, int *ret); 39 | // Given a string and base convert to integer 40 | int eval_const_atoi(char *s, int base, token_t *token, int max_char, int check_end, char **next); 41 | char eval_escaped_char(char escaped, token_t *token); 42 | 43 | char eval_const_char_token(token_t *token); // Evaluates char type token to char 44 | str_t *eval_const_str_token(token_t *token); // Evaluates string token to str_t * 45 | 46 | // Evaluating const expression using value_t objects 47 | value_t *eval_const_get_int_value(type_cxt_t *cxt, token_t *token); // Evaluates int literal and returns value object 48 | value_t *eval_const_exp(type_cxt_t *cxt, token_t *exp); 49 | value_t *eval_const_to_type(type_cxt_t *cxt, token_t *exp, type_t *type, int cast_type); // Evaluates and cast to type 50 | 51 | #endif -------------------------------------------------------------------------------- /src/hashtable.c: -------------------------------------------------------------------------------- 1 | 2 | #include "hashtable.h" 3 | 4 | int streq_cb(void *a, void *b) { return strcmp(a, b) == 0; } 5 | int strcmp_cb(void *a, void *b) { return strcmp(a, b); } 6 | // Credits: K&R C Second Edition Page 144 7 | hashval_t strhash_cb(void *a) { 8 | char *s = (char *)a; 9 | hashval_t hashval; 10 | for(hashval = (hashval_t)0; *s != '\0'; s++) { 11 | hashval = *s + 31 * hashval; 12 | } 13 | return hashval; 14 | } 15 | 16 | hashtable_t *ht_init(eq_cb_t eq, hash_cb_t hash) { 17 | hashtable_t *ht = (hashtable_t *)malloc(sizeof(hashtable_t)); 18 | SYSEXPECT(ht != NULL); 19 | ht->eq = eq; 20 | ht->hash = hash; 21 | ht->mask = HT_INIT_MASK; 22 | ht->size = 0; 23 | ht->capacity = HT_INIT_CAPACITY; 24 | ht->keys = (void **)malloc(sizeof(void *) * HT_INIT_CAPACITY); 25 | ht->values = (void **)malloc(sizeof(void *) * HT_INIT_CAPACITY); 26 | SYSEXPECT(ht->keys != NULL && ht->values != NULL); 27 | memset(ht->keys, 0x00, sizeof(void *) * ht->capacity); 28 | memset(ht->values, 0x00, sizeof(void *) * ht->capacity); 29 | return ht; 30 | } 31 | 32 | hashtable_t *ht_str_init() { 33 | return ht_init(streq_cb, strhash_cb); 34 | } 35 | 36 | void ht_free(hashtable_t *ht) { 37 | free(ht->keys); 38 | free(ht->values); 39 | free(ht); 40 | return; 41 | } 42 | 43 | int ht_size(hashtable_t *ht) { 44 | return ht->size; 45 | } 46 | 47 | // Returns an existing slot for key, if it already exists, or an empty one 48 | int ht_find_slot(hashtable_t *ht, void **keys, void *key, int op) { 49 | assert(key != NULL && key != HT_REMOVED); 50 | hashval_t begin = ht->hash(key) & ht->mask; 51 | if(op == HT_OP_INSERT) { 52 | while(keys[begin] != NULL && keys[begin] != HT_REMOVED && !ht->eq(keys[begin], key)) { 53 | begin = (begin + 1) & ht->mask; 54 | } 55 | } else if(op == HT_OP_FIND) { 56 | while(keys[begin] && (keys[begin] == HT_REMOVED || !ht->eq(keys[begin], key))) { 57 | begin = (begin + 1) & ht->mask; 58 | } 59 | } else { 60 | assert(0); 61 | } 62 | return begin; 63 | } 64 | 65 | void ht_resize(hashtable_t *ht) { 66 | assert(ht->size < ht->capacity); 67 | ht->capacity *= 2; 68 | ht->mask |= (ht->mask << 1); 69 | void **new_keys = (void **)malloc(sizeof(void *) * ht->capacity); 70 | void **new_values = (void **)malloc(sizeof(void *) * ht->capacity); 71 | SYSEXPECT(new_keys != NULL && new_values != NULL); 72 | memset(new_keys, 0x00, sizeof(void *) * ht->capacity); 73 | memset(new_values, 0x00, sizeof(void *) * ht->capacity); // Avoid values having HT_NOTFOUND 74 | for(int i = 0;i < ht->capacity / 2;i++) { 75 | if(ht->keys[i] && ht->keys[i] != HT_REMOVED) { 76 | int slot = ht_find_slot(ht, new_keys, ht->keys[i], HT_OP_INSERT); 77 | assert(new_keys[slot] == NULL); 78 | new_keys[slot] = ht->keys[i]; 79 | new_values[slot] = ht->values[i]; 80 | } 81 | } 82 | free(ht->keys); 83 | free(ht->values); 84 | ht->keys = new_keys; 85 | ht->values = new_values; 86 | return; 87 | } 88 | 89 | // Returns value, or HT_NOTFOUND if not found 90 | void *ht_find(hashtable_t *ht, void *key) { 91 | assert(key != NULL); 92 | int slot = ht_find_slot(ht, ht->keys, key, HT_OP_FIND); // Note that this will not return removed slot 93 | assert(ht->keys[slot] != HT_REMOVED); 94 | return ht->keys[slot] ? ht->values[slot] : HT_NOTFOUND; 95 | } 96 | 97 | // Inserts if key does not exist, and returns value. Returns current value otherwise; 98 | void *ht_insert(hashtable_t *ht, void *key, void *value) { 99 | assert(key != NULL); 100 | if(HT_RESIZE_THRESHOLD(ht->capacity) == ht->size) { 101 | ht_resize(ht); 102 | } 103 | int slot = ht_find_slot(ht, ht->keys, key, HT_OP_INSERT); 104 | if(ht->keys[slot] && ht->keys[slot] != HT_REMOVED) { 105 | return ht->values[slot]; 106 | } 107 | ht->keys[slot] = key; 108 | ht->values[slot] = value; 109 | ht->size++; 110 | return value; 111 | } 112 | 113 | // Removes the key, and returns value before removal. If key does not exist return NOTFOUND 114 | void *ht_remove(hashtable_t *ht, void *key) { 115 | assert(key != NULL); 116 | int slot = ht_find_slot(ht, ht->keys, key, HT_OP_FIND); 117 | assert(ht->keys[slot] != HT_REMOVED); 118 | if(ht->keys[slot] == NULL) { 119 | return HT_NOTFOUND; 120 | } 121 | ht->keys[slot] = HT_REMOVED; 122 | ht->size--; 123 | return ht->values[slot]; 124 | } 125 | 126 | // The following return 1 means operation is successful, 0 otherwise. Note that insert always succeeds 127 | int set_find(set_t *set, void *key) { return ht_find(set, key) != HT_NOTFOUND; } 128 | int set_insert(set_t *set, void *key) { ht_insert(set, key, NULL); return SET_SUCCESS; } 129 | int set_remove(set_t *set, void *key) { return ht_remove(set, key) != HT_NOTFOUND; } -------------------------------------------------------------------------------- /src/hashtable.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _HASHTABLE_H 3 | #define _HASHTABLE_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include "error.h" 9 | 10 | // Must be a power of two 11 | #define HT_INIT_CAPACITY 128 12 | #define HT_INIT_MASK 0x7F 13 | #define HT_RESIZE_THRESHOLD(capacity) (capacity / 8 * 7) 14 | #define HT_NOTFOUND ((void *)-1) 15 | #define HT_REMOVED ((void *)-2) 16 | 17 | #define HT_OP_INSERT 0 18 | #define HT_OP_FIND 1 19 | 20 | typedef unsigned long hashval_t; 21 | typedef int (*eq_cb_t)(void *, void *); // Equality comparison function 22 | typedef int (*cmp_cb_t)(void *, void *); // Comparison comparison function 23 | typedef hashval_t (*hash_cb_t)(void *); // Hash value function 24 | 25 | typedef struct { 26 | eq_cb_t eq; 27 | hash_cb_t hash; 28 | hashval_t mask; 29 | int size; 30 | int capacity; 31 | void **keys; 32 | void **values; 33 | } hashtable_t; 34 | 35 | int streq_cb(void *a, void *b); 36 | int strcmp_cb(void *a, void *b); 37 | hashval_t strhash_cb(void *a); 38 | hashtable_t *ht_init(eq_cb_t eq, hash_cb_t hash); 39 | hashtable_t *ht_str_init(); 40 | void ht_free(hashtable_t *ht); 41 | int ht_size(hashtable_t *ht); 42 | int ht_find_slot(hashtable_t *ht, void **keys, void *key, int op); 43 | void ht_resize(hashtable_t *ht); 44 | void *ht_find(hashtable_t *ht, void *key); 45 | void *ht_insert(hashtable_t *ht, void *key, void *value); 46 | void *ht_remove(hashtable_t *ht, void *key); 47 | 48 | typedef hashtable_t set_t; // Set is just a hash table (we waste some space) 49 | #define SET_FAIL 0 50 | #define SET_SUCCESS 1 51 | #define set_init(a, b) ht_init(a, b) 52 | #define set_str_init() ht_str_init() 53 | #define set_free(a) ht_free(a) 54 | #define set_size(a) ht_size(a) 55 | int set_find(set_t *set, void *key); 56 | int set_insert(set_t *set, void *key); 57 | int set_remove(set_t *set, void *key); 58 | 59 | #endif -------------------------------------------------------------------------------- /src/list.c: -------------------------------------------------------------------------------- 1 | 2 | #include "list.h" 3 | 4 | void LIST_SIMPLE_FREE_CB(void *p) { 5 | free(p); 6 | return; 7 | } 8 | 9 | list_t *list_init() { 10 | list_t *list = (list_t *)malloc(sizeof(list_t)); 11 | SYSEXPECT(list != NULL); 12 | list->size = 0; 13 | list->head = list->tail = NULL; 14 | return list; 15 | } 16 | 17 | void list_free(list_t *list) { 18 | assert(list->head || !list->tail); 19 | listnode_t *node = list->head; 20 | while(node != NULL) { 21 | listnode_t *next = node->next; 22 | if(list->key_free_cb != NULL) { 23 | list->key_free_cb(node->key); 24 | } 25 | if(list->value_free_cb != NULL) { 26 | list->value_free_cb(node->value); 27 | } 28 | listnode_free(node); 29 | node = next; 30 | } 31 | free(list); 32 | return; 33 | } 34 | 35 | void list_set_free_cb(list_t *list, void (*key_free_cb)(void *), void (*value_free_cb)(void *)) { 36 | list->key_free_cb = key_free_cb; 37 | list->value_free_cb = value_free_cb; 38 | return; 39 | } 40 | 41 | int list_size(list_t *list) { 42 | return list->size; 43 | } 44 | 45 | // Allocate a node. All fields are uninitialized 46 | listnode_t *listnode_alloc() { 47 | listnode_t *node = (listnode_t *)malloc(sizeof(listnode_t)); 48 | SYSEXPECT(node != NULL); 49 | return node; 50 | } 51 | void listnode_free(listnode_t *node) { 52 | free(node); 53 | return; 54 | } 55 | 56 | // Always insert to the end of the list; do not check for duplicate; Always return the inserted value 57 | void *list_insert(list_t *list, void *key, void *value) { 58 | listnode_t *node = listnode_alloc(); 59 | node->key = key; 60 | node->value = value; 61 | node->next = NULL; 62 | assert(list->head || !list->tail); // If head is NULL then tail must also be NULL 63 | if(list->head == NULL) { 64 | list->head = list->tail = node; 65 | } else { 66 | list->tail->next = node; 67 | list->tail = node; 68 | } 69 | list->size++; 70 | return value; 71 | } 72 | 73 | // Inserts before the node specified by index; if index == list size then insert at the end 74 | listnode_t *list_insertat(list_t *list, void *key, void *value, int index) { 75 | assert(index <= list->size && index >= 0); 76 | if(index == list->size) { 77 | return list_insert(list, key, value); // Empty insert will be caught here 78 | } 79 | assert(list->size > 0); 80 | list->size++; 81 | listnode_t *node = listnode_alloc(); 82 | node->key = key; 83 | node->value = value; 84 | if(index == 0) { 85 | node->next = list->head; 86 | list->head = node; 87 | assert(list->tail); 88 | } else { 89 | listnode_t *curr = list->head; 90 | while(--index != 0) curr = curr->next; 91 | node->next = curr->next; 92 | curr->next = node; 93 | assert(curr->next); 94 | } 95 | return value; 96 | } 97 | 98 | void *list_insert_nodup(list_t *list, void *key, void *value, eq_cb_t eq) { 99 | void *ret = list_find(list, key, eq); 100 | if(ret == LIST_NOTFOUND) { 101 | value = list_insert(list, key, value); 102 | } 103 | return ret; 104 | } 105 | 106 | // Search for the given key, and return value; Return LIST_NOTFOUND if not found 107 | void *list_find(list_t *list, void *key, eq_cb_t eq) { 108 | listnode_t *curr = list->head; 109 | while(curr != NULL) { 110 | if(eq(key, curr->key)) { 111 | return curr->value; 112 | } else { 113 | curr = curr->next; 114 | } 115 | } 116 | return LIST_NOTFOUND; 117 | } 118 | 119 | // Returns the node specified by the index; If index is too large then return LIST_NOTFOUND. 120 | // Index must be positive 121 | const listnode_t *list_findat(list_t *list, int index) { 122 | assert(index >= 0); 123 | if(index >= list->size) { 124 | return LIST_NOTFOUND; 125 | } 126 | listnode_t *curr = list->head; 127 | while(index-- != 0) { 128 | curr = curr->next; 129 | } 130 | return curr; 131 | } 132 | 133 | // Removes the key from the list. Return value if key exists; LIST_NOTFOUND otherwise 134 | void *list_remove(list_t *list, void *key, eq_cb_t eq) { 135 | listnode_t *curr = list->head; 136 | listnode_t *prev = curr; 137 | if(curr == NULL) { 138 | return LIST_NOTFOUND; 139 | } 140 | void *ret = NULL; 141 | if(eq(curr->key, key)) { 142 | list->head = curr->next; // Could be NULL 143 | ret = curr->value; 144 | listnode_free(curr); 145 | list->size--; 146 | if(curr == list->tail) { 147 | list->tail = NULL; 148 | } 149 | return ret; 150 | } 151 | do { 152 | curr = curr->next; 153 | if(curr != NULL && eq(curr->key, key)) { 154 | prev->next = curr->next; 155 | ret = curr->value; 156 | listnode_free(curr); 157 | list->size--; 158 | if(curr == list->tail) { 159 | list->tail = prev; // If deleting the last element then adjust tail 160 | } 161 | return ret; 162 | } 163 | prev = curr; 164 | } while(curr); 165 | return LIST_NOTFOUND; 166 | } 167 | 168 | // Value is returned, and the second argument holds the key 169 | void *list_removeat(list_t *list, int index, void **key) { 170 | assert(index >= 0); 171 | if(index >= list->size) return LIST_NOTFOUND; 172 | list->size--; 173 | listnode_t *curr = list->head, *prev = NULL; 174 | void *ret = NULL; 175 | if(index == 0) { 176 | list->head = curr->next; 177 | } else { 178 | while(index--) { 179 | prev = curr; 180 | curr = curr->next; 181 | } 182 | prev->next = curr->next; 183 | } 184 | ret = curr->value; 185 | *key = curr->key; 186 | listnode_free(curr); 187 | if(curr == list->tail) { 188 | list->tail = prev; 189 | } 190 | return ret; 191 | } -------------------------------------------------------------------------------- /src/list.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _LIST_H 3 | #define _LIST_H 4 | 5 | #include "hashtable.h" 6 | 7 | #define LIST_NOTFOUND ((void *)-1) // Return value for find() 8 | 9 | void LIST_SIMPLE_FREE_CB(void *p); 10 | 11 | typedef struct listnode { 12 | void *key; // No ownership 13 | void *value; // No ownership 14 | struct listnode *next; 15 | } listnode_t; 16 | 17 | typedef struct { 18 | listnode_t *head; 19 | listnode_t *tail; 20 | int size; 21 | void (*key_free_cb)(void *); 22 | void (*value_free_cb)(void *); 23 | } list_t; 24 | 25 | inline static listnode_t *list_head(list_t *list) { return list->head; } 26 | inline static listnode_t *list_tail(list_t *list) { return list->tail; } 27 | inline static listnode_t *list_next(listnode_t *node) { return node->next; } 28 | inline static void *list_key(listnode_t *node) { return node->key; } 29 | inline static void *list_value(listnode_t *node) { return node->value; } 30 | 31 | list_t *list_init(); 32 | void list_free(list_t *list); 33 | 34 | void list_set_free_cb(list_t *list, void (*key_free_cb)(void *), void (*value_free_cb)(void *)); 35 | 36 | int list_size(list_t *list); 37 | listnode_t *listnode_alloc(); 38 | void listnode_free(listnode_t *node); 39 | void *list_insert(list_t *list, void *key, void *value); 40 | listnode_t *list_insertat(list_t *list, void *key, void *value, int index); 41 | void *list_insert_nodup(list_t *list, void *key, void *value, eq_cb_t eq); 42 | void *list_find(list_t *list, void *key, eq_cb_t eq); 43 | const listnode_t *list_findat(list_t *list, int index); 44 | void *list_remove(list_t *list, void *key, eq_cb_t eq); 45 | void *list_removeat(list_t *list, int index, void **key); 46 | 47 | #endif -------------------------------------------------------------------------------- /src/old/allocator.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "allocator.h" 3 | 4 | using namespace wangziqi2013; 5 | using namespace cfront; 6 | -------------------------------------------------------------------------------- /src/old/allocator.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include "common.h" 5 | 6 | namespace wangziqi2013 { 7 | namespace cfront { 8 | 9 | /* 10 | * class SlabAllocator - Allocates elements but never frees them until explicit 11 | * call of free function 12 | * 13 | * This class is used for two purposes: 14 | * 1. For many small allocations, reduce call to malloc() to reduce 15 | * memory overhead, fragmentation and time cost 16 | * 2. For shared pointer where ownership is not clear, act as a pool 17 | * and removes the need to free node 18 | * 19 | * Note that this slab allocator is not thread-safe 20 | */ 21 | template 22 | class SlabAllocator { 23 | private: 24 | 25 | // This is the stack where we hold chunks 26 | std::stack chunk_stack; 27 | 28 | // This is the index inside current (topmost) chunk 29 | int next_element_index; 30 | 31 | // Number of elements per chunk. This is configurable at compile 32 | // time to let the caller choose 33 | int element_per_chunk; 34 | 35 | /* 36 | * AllocateChunk() - Allocate a chunk and push it to the top of the stack 37 | * 38 | * This function also resets next_element_index to be 0 in order to use 39 | * the topmost chunk 40 | */ 41 | void AllocateChunk() { 42 | // Allocate the first chunk of memory 43 | char *ptr = \ 44 | reinterpret_cast(malloc(element_per_chunk * sizeof(ElementType))); 45 | 46 | if(ptr == nullptr) { 47 | ThrowAllocatorOutOfMemoryError(); 48 | } 49 | 50 | chunk_stack.push(ptr); 51 | 52 | next_element_index = 0; 53 | 54 | return; 55 | } 56 | 57 | /* 58 | * ThrowAllocatorOutOfMemoryError() - This is thrown when we are out of 59 | * memory through malloc() call 60 | */ 61 | void ThrowAllocatorOutOfMemoryError() const { 62 | throw std::string{"Slab allocator out of memory!"}; 63 | } 64 | 65 | public: 66 | 67 | /* 68 | * Constructor - Initializes the stack and index structure 69 | */ 70 | SlabAllocator(int p_element_per_chunk=64) : 71 | chunk_stack{}, 72 | next_element_index{0}, 73 | element_per_chunk{p_element_per_chunk} { 74 | 75 | // As part initialization allocate the first chunk on the internal stack 76 | AllocateChunk(); 77 | 78 | return; 79 | } 80 | 81 | /* 82 | * Destructor - Frees all memory chunks in the slab allocator 83 | */ 84 | ~SlabAllocator() { 85 | // Since we have next_element_index elements in the topmost chunk 86 | // just destruct the first next_element_index elements 87 | CallDestructorForEachElement(next_element_index); 88 | 89 | // Delete entire chunk of memory which is char * type 90 | free(chunk_stack.top()); 91 | 92 | // Pop one chunk. We know there is at least one chunk on the stack 93 | chunk_stack.pop(); 94 | 95 | // Delete all chunks until the stack is empty 96 | while(chunk_stack.size() > 0) { 97 | // Here since all other chunks are full, just delete chunks 98 | // using max element count as element to delete 99 | CallDestructorForEachElement(element_per_chunk); 100 | 101 | free(chunk_stack.top()); 102 | 103 | chunk_stack.pop(); 104 | } 105 | 106 | dbg_printf("Allocator finished cleanup\n"); 107 | 108 | return; 109 | } 110 | 111 | /* 112 | * Get() - Returns an element type pointer allocated from the current chunk 113 | * 114 | * Note that the use of template class here is to let compiler construct 115 | * different Get() instances to forward constructor arguments to the 116 | * placement new which might take arguments 117 | * 118 | * These template arguments do not have to be explicitly specified since 119 | * the compiler could deduct them during compilation 120 | */ 121 | template 122 | ElementType *Get(Args&&... args) { 123 | // If we have used up all slots in the current chunk 124 | // just allocate a new one and reset next element index to 0 125 | if(next_element_index == element_per_chunk) { 126 | AllocateChunk(); 127 | 128 | assert(next_element_index == 0); 129 | } 130 | 131 | // This is the byte offset of the element being 132 | // allocated 133 | int byte_offset = sizeof(ElementType) * next_element_index; 134 | 135 | // Add the top most chunk address with the byte offset to yield element 136 | // address 137 | ElementType *element_ptr = \ 138 | reinterpret_cast(chunk_stack.top() + byte_offset); 139 | 140 | // Do not forget this!!! 141 | next_element_index++; 142 | 143 | // The last step is to call placement operator new to initialize the 144 | // object 145 | return new (element_ptr) ElementType{args...}; 146 | } 147 | 148 | /* 149 | * CallDestructorForEachElement() - Calls destructor for the topmost chunk 150 | * 151 | * This function takes an extra argument as the element count on the top 152 | * most chunk, since it might or might not be the capacity of each chunk 153 | * caller needs to pass it in as an argument 154 | */ 155 | void CallDestructorForEachElement(int element_count) { 156 | for(int i = 0;i < element_count;i++) { 157 | // Compute element pointer 158 | ElementType *ptr = \ 159 | reinterpret_cast(chunk_stack.top() + 160 | sizeof(ElementType) * i); 161 | 162 | // Call destructor manually 163 | ptr->~ElementType(); 164 | } 165 | 166 | return; 167 | } 168 | }; 169 | 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/old/common.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | // Old C headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace wangziqi2013 { 21 | namespace cfront { 22 | 23 | static void dummy(const char*, ...) {} 24 | 25 | #define DEBUG_PRINT 26 | 27 | #ifdef DEBUG_PRINT 28 | 29 | #define dbg_printf(fmt, ...) \ 30 | do { \ 31 | fprintf(stderr, "%-24s: " fmt, __FUNCTION__, ##__VA_ARGS__); \ 32 | fflush(stdout); \ 33 | } while (0); 34 | 35 | #else 36 | 37 | #define dbg_printf(fmt, ...) \ 38 | do { \ 39 | dummy(fmt, ##__VA_ARGS__); \ 40 | } while (0); 41 | 42 | #endif 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/old/context.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "context.h" 3 | #include "syntax.h" 4 | 5 | using namespace wangziqi2013; 6 | using namespace cfront; 7 | 8 | void Context::InitializeBuiltInTypeMap() { 9 | for(const auto token_type : TokenInfo::builtin_type_set) { 10 | // Create a Token node wrapped by a SyntaxNode 11 | // and insert it into the type map for later use 12 | builtin_type_map[token_type] = SyntaxNode::Get(Token::Get(token_type)); 13 | } 14 | 15 | return; 16 | } 17 | -------------------------------------------------------------------------------- /src/old/context.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include "common.h" 5 | #include "token.h" 6 | #include "scope.h" 7 | 8 | namespace wangziqi2013 { 9 | namespace cfront { 10 | 11 | // Forward declaration here - since we not need to create syntax node it is OK 12 | class SyntaxNode; 13 | 14 | /* 15 | * class Context - The object for holding global values such as symbol tables 16 | * and type tables 17 | */ 18 | class Context { 19 | private: 20 | 21 | // Note that we could not use stack here since stack does not support 22 | // iteration, and during a search we have to iterate through the scope 23 | // to search a named type 24 | // 25 | // Use push_back() and pop_back() to access elements like a stack 26 | std::vector scope_stack; 27 | 28 | // Maps TokenType to SyntaxNode 8 for built in types 29 | std::unordered_map builtin_type_map; 33 | 34 | /* 35 | * InitializeBuiltInTypeMap() - Initialize SyntaxNode for built in types 36 | * 37 | * We do this as an optimization to avoid creating too many built in type 38 | * nodes - they now all share the same pointer 39 | */ 40 | void InitializeBuiltInTypeMap(); 41 | 42 | public: 43 | 44 | /* 45 | * Constructor 46 | * 47 | * The ownership of source file belongs to the context object 48 | */ 49 | Context() : 50 | scope_stack{} { 51 | // We initialize the first level of stack using an empty scope 52 | // possibly with few built-in symbols 53 | EnterScope(); 54 | 55 | // Load the map with built in integral types 56 | InitializeBuiltInTypeMap(); 57 | 58 | return; 59 | } 60 | 61 | /* 62 | * EnterScope() - Pushes a new ScopeNode object into the stack 63 | * and return the pushed object 64 | */ 65 | ScopeNode &EnterScope() { 66 | // Construct an empty scope node and pusu it back to the vector 67 | scope_stack.emplace_back(); 68 | 69 | return scope_stack.back(); 70 | } 71 | 72 | /* 73 | * LeaveScope() - Leaves the scope by popping the node out from the stack 74 | * 75 | * If the scope stack is already empty then the assertion would fail 76 | */ 77 | void LeaveScope() { 78 | assert(scope_stack.size() > 0); 79 | 80 | scope_stack.pop_back(); 81 | 82 | return; 83 | } 84 | 85 | /* 86 | * GetTypeNode() - Search on the stack for a named type 87 | * 88 | * This function searches the stack from the top top the bottom, and if 89 | * the name exists inside any level that are searched first then it returns 90 | * the associated type object 91 | * 92 | * If the type does not exist in all levels just return nullptr. Otherwise 93 | * the SyntaxNode pointer that represents the type structure is returned 94 | */ 95 | SyntaxNode *GetTypeNode(const std::string &type_name) { 96 | // Iterate through the vector from high index to low index 97 | // i.e. from most recent name space to less recent ones 98 | for(auto it = scope_stack.rbegin(); it != scope_stack.rend();it++) { 99 | SyntaxNode *type_node_p = it->GetTypeNode(type_name); 100 | 101 | // If the type exists in the scope being searched just return it 102 | // Otherwise need to continue to the next scope 103 | if(type_node_p != nullptr) { 104 | return type_node_p; 105 | } 106 | } 107 | 108 | // If at last we did not find such name then the type does 109 | // not exist and return nullptr 110 | return nullptr; 111 | } 112 | 113 | /* 114 | * GetBuiltInTypeNode() - Returns the SyntaxNode * for built in types 115 | * 116 | * This is used as an optimization to avoid too many nodes for builtin types 117 | */ 118 | SyntaxNode *GetBuiltInTypeNode(TokenType token_type) { 119 | // Find the built in type inside the map, and we must find it 120 | // since the caller is responsible for verifying whether a type 121 | // is built in type or not 122 | auto it = builtin_type_map.find(token_type); 123 | assert(it != builtin_type_map.end()); 124 | 125 | return it->second; 126 | } 127 | }; 128 | 129 | } // namespace wangziqi2013 130 | } // namespace cfront 131 | -------------------------------------------------------------------------------- /src/old/lex.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "lex.h" 3 | 4 | using namespace wangziqi2013; 5 | using namespace cfront; 6 | -------------------------------------------------------------------------------- /src/old/scope.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "scope.h" 3 | -------------------------------------------------------------------------------- /src/old/scope.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include "common.h" 5 | 6 | namespace wangziqi2013 { 7 | namespace cfront { 8 | 9 | class SyntaxNode; 10 | 11 | /* 12 | * class ScopeNode - This is a structure that contains information 13 | * about a scope. 14 | * 15 | * ScopeNodes are put into a stack as translation units are entered 16 | * and exited 17 | */ 18 | class ScopeNode { 19 | private: 20 | // The set of types we have currently seen 21 | // Types are represented using SyntaxNode structure which means it 22 | // could be organized as a tree 23 | // 24 | // For the topmost level in this structure we should put 8 basic types: 25 | // char, short, int, long 26 | // unsigned char, unsigned short, unsigned int, unsigned long 27 | std::unordered_map type_map; 28 | 29 | // TODO: Change the mapped type to something more meaningful 30 | // This should be 31 | std::unordered_map ident_map; 32 | public: 33 | 34 | /* 35 | * Constructor - This is necessary for emplacing it back in a stack 36 | */ 37 | ScopeNode() {} 38 | 39 | /* 40 | * Move Constructor - This is necessary in std::vector emplace_back 41 | * since a vector might grow and it needs to move all 42 | * previous contents to a new array 43 | */ 44 | ScopeNode(ScopeNode &&other) : 45 | type_map{std::move(other.type_map)}, 46 | ident_map{std::move(other.ident_map)} 47 | {} 48 | 49 | /* 50 | * These are deleted to avoid any undesirable effects 51 | */ 52 | ScopeNode(const ScopeNode &) = delete; 53 | ScopeNode &operator=(const ScopeNode &) = delete; 54 | ScopeNode &operator=(ScopeNode &&) = delete; 55 | 56 | /* 57 | * GetTypeNode() - Return the type node from type map 58 | * 59 | * If the type has not yet been defiend just return nullptr 60 | */ 61 | SyntaxNode *GetTypeNode(const std::string &type_name) { 62 | auto it = type_map.find(type_name); 63 | 64 | // If the type does not exist in the map just return nullptr 65 | if(it == type_map.end()) { 66 | return nullptr; 67 | } 68 | 69 | return it->second; 70 | } 71 | 72 | /* 73 | * GetTypeMap() - Return the type map object reference 74 | * 75 | * The return value is a non-const reference which means that we could 76 | * actually modify it 77 | */ 78 | std::unordered_map & 79 | GetTypeMap() { 80 | return type_map; 81 | } 82 | }; 83 | 84 | } // namespace wangziqi2013 85 | } // namespace cfront 86 | -------------------------------------------------------------------------------- /src/old/syntax.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "syntax.h" 3 | 4 | using namespace wangziqi2013; 5 | using namespace cfront; 6 | 7 | SlabAllocator SyntaxNode::allocator{}; 8 | -------------------------------------------------------------------------------- /src/old/token.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include "common.h" 5 | #include "allocator.h" 6 | 7 | namespace wangziqi2013 { 8 | namespace cfront { 9 | 10 | enum class TokenType { 11 | // This is a placeholder 12 | T_INVALID = 0, 13 | 14 | // The following is keyword types 15 | T_AUTO = 1, 16 | 17 | T_BREAK = 2, 18 | 19 | T_CASE = 3, 20 | T_CHAR, 21 | T_CONST, 22 | T_CONTINUE, 23 | 24 | T_DEFAULT = 7, 25 | T_DO, 26 | T_DOUBLE, 27 | 28 | T_ELSE = 10, 29 | T_ENUM, 30 | T_EXTERN, 31 | 32 | T_FLOAT = 13, 33 | T_FOR, 34 | 35 | T_GOTO = 15, 36 | 37 | T_IF = 16, 38 | T_INT, 39 | 40 | T_LONG = 18, 41 | 42 | T_REGISTER = 19, 43 | T_RETURN, 44 | 45 | T_SHORT = 21, 46 | T_SIGNED, 47 | // T_SIZEOF -> This is part of the expression system 48 | T_STATIC, 49 | T_STRUCT, 50 | T_SWITCH, 51 | 52 | T_TYPEDEF = 26, 53 | 54 | T_UNION = 27, 55 | T_UNSIGNED, 56 | 57 | T_VOID = 29, 58 | T_VOLATILE, 59 | 60 | T_WHILE = 31, 61 | 62 | // The following are compound types 63 | // 64 | // unsigned char, unsigned short, unsigned int and unsigned long 65 | // should be treated as one unit instead of two type structs 66 | // Since they are represented as a single type rather than unsigned type 67 | // of a known type 68 | T_UCHAR = 40, 69 | T_USHORT, 70 | T_UINT, 71 | T_ULONG, 72 | 73 | // The following are types with data (literal token type) 74 | 75 | T_IDENT = 80, // Identifier 76 | T_INT_CONST, // Integer constant (should be of the same length as unsigned long) 77 | T_STRING_CONST, // String literal 78 | T_CHAR_CONST, // Character literal 79 | 80 | // The following are primitive operator types 81 | 82 | T_INC = 100, 83 | T_DEC, 84 | T_LPAREN, 85 | T_RPAREN, 86 | T_RSPAREN, 87 | T_LSPAREN, 88 | T_RCPAREN, 89 | T_LCPAREN, 90 | T_DOT, 91 | T_ARROW, 92 | T_PLUS = 110, 93 | T_MINUS, 94 | T_NOT, 95 | T_BITNOT, 96 | T_STAR, 97 | T_AMPERSAND, 98 | T_DIV, 99 | T_MOD, 100 | T_LSHIFT, 101 | T_RSHIFT, 102 | T_LESS = 120, 103 | T_LESSEQ, 104 | T_GREATER, 105 | T_GREATEREQ, 106 | T_EQ, 107 | T_NOTEQ, 108 | T_BITXOR, 109 | T_BITOR, 110 | T_AND, 111 | T_OR, 112 | T_QMARK = 130, 113 | T_COMMA, 114 | T_COLON, 115 | T_SEMICOLON, 116 | T_SQUOTE, 117 | T_DQUOTE, 118 | T_ASSIGN, 119 | T_PLUS_ASSIGN, 120 | T_MINUS_ASSIGN, 121 | T_STAR_ASSIGN, 122 | T_DIV_ASSIGN = 140, 123 | T_MOD_ASSIGN, 124 | T_LSHIFT_ASSIGN, 125 | T_RSHIFT_ASSIGN, 126 | T_AMPERSAND_ASSIGN, 127 | T_BITXOR_ASSIGN, 128 | T_BITOR_ASSIGN, 129 | T_SIZEOF = 147, 130 | 131 | // This is a primitive keyword that is used to indicate 132 | // varargs in C 133 | // This is neither a keyword nor an operator, in a sense that 134 | // on one hand it is not parsed as a keyword, and on the other hand 135 | // it does not have operator attributes related to it 136 | T_ELLIPSIS = 148, 137 | 138 | // The following are operator types for overloading in C 139 | // e.g. ++ and -- have pre- and post-fix form 140 | 141 | // ++ 142 | T_POST_INC = 200, 143 | T_PRE_INC = 201, 144 | 145 | // -- 146 | T_POST_DEC = 202, 147 | T_PRE_DEC = 203, 148 | 149 | // * 150 | T_MULT = 204, 151 | T_DEREF = 205, 152 | 153 | // & 154 | T_ADDR = 206, 155 | T_BITAND = 207, 156 | 157 | // - 158 | T_NEG = 208, 159 | T_SUBTRACTION = 209, 160 | 161 | // + 162 | T_POS = 210, 163 | T_ADDITION = 211, 164 | 165 | // Prefix "(" is parsed as parenthesis, postfix ( is function call 166 | // Though prefix ( could also be type cast, that requires some 167 | // type checking. 168 | T_PAREN = 212, 169 | T_TYPECAST = 213, 170 | T_FUNCCALL = 214, 171 | 172 | // [] 173 | T_ARRAYSUB = 215, 174 | 175 | // This one is artificial: function arguments 176 | // Since T_FUNCCALL only has 2 parameters, we need to group all its 177 | // arguments into one syntax node, otherwise the reduce functuon would not be 178 | // able to know how many value node should it reduce 179 | T_FUNCARG = 216, 180 | 181 | }; 182 | 183 | ///////////////////////////////////////////////////////////////////// 184 | // enum class TokenType ends 185 | ///////////////////////////////////////////////////////////////////// 186 | 187 | // This defines the evaluation order of operators in the same 188 | // precedence level 189 | // i.e. associativity 190 | enum class EvalOrder { 191 | LEFT_TO_RIGHT = 0, 192 | RIGHT_TO_LEFT, 193 | }; 194 | 195 | /* 196 | * struct TokenTypeHasher - Hash function for enum class 197 | */ 198 | struct TokenTypeHasher { 199 | inline size_t operator()(const TokenType &tt) const { 200 | return static_cast(tt); 201 | } 202 | }; 203 | 204 | /* 205 | * struct TokenTypeEq - Comparison function for enum class 206 | */ 207 | struct TokenTypeEq { 208 | inline bool operator()(const TokenType &tt1, const TokenType &tt2) const { 209 | return static_cast(tt1) == static_cast(tt2); 210 | } 211 | }; 212 | 213 | ///////////////////////////////////////////////////////////////////// 214 | // struct OpInfo 215 | ///////////////////////////////////////////////////////////////////// 216 | 217 | /* 218 | * struct OpInfo - Stores information about operators including 219 | * precedence, associativity and number of operands 220 | */ 221 | struct OpInfo { 222 | // The smaller the higher 223 | int precedence; 224 | 225 | // -1 for parenthesis, positive number for all others 226 | int operand_num; 227 | 228 | // Associativity is used to resolve shift-reduce conflict 229 | // when the precedence is the same 230 | EvalOrder associativity; 231 | 232 | // Whether the operator is postfix unary operator 233 | // This is used to determine whether the operator after 234 | // this one is postfix or prefix 235 | bool is_postfix_unary; 236 | }; 237 | 238 | ///////////////////////////////////////////////////////////////////// 239 | // struct OpInfo ends 240 | ///////////////////////////////////////////////////////////////////// 241 | 242 | ///////////////////////////////////////////////////////////////////// 243 | // class TokenInfo 244 | ///////////////////////////////////////////////////////////////////// 245 | 246 | /* 247 | * class TokenInfo - This is the helper class that facilitates tokenizer and 248 | * syntax analyzer 249 | */ 250 | class TokenInfo { 251 | public: 252 | using keyword_map_value_type = std::pair; 253 | using keyword_map_type = std::unordered_map; 254 | 255 | // The value type used in operator map 256 | using op_map_value_type = \ 257 | std::pair; 258 | 259 | using op_map_type = \ 260 | std::unordered_map; 264 | 265 | // The next two are used in token name map that maps token to string name 266 | using token_name_map_value_type = std::pair; 267 | using token_name_map_type = \ 268 | std::unordered_map; 272 | 273 | static const keyword_map_type keyword_map; 274 | static const op_map_type op_map; 275 | // This is used for debugging and error reporting 276 | static const token_name_map_type token_name_map; 277 | 278 | // This stored token types that represent built-in type 279 | static const std::unordered_set builtin_type_set; 282 | 283 | /* 284 | * GetOpInfo() - Return the struct of (precedence, op count, associativity) 285 | * of a specific operator 286 | * 287 | * If the operator is not found then it implies the type is not part of 288 | * an expression, and if we are parsing an expression then probably 289 | * it is the end of an expression 290 | * 291 | * We return a constant pointer to the structure 292 | */ 293 | static const OpInfo *GetOpInfo(TokenType type) { 294 | auto it = TokenInfo::op_map.find(type); 295 | 296 | // If does not find then return nullptr to indicate this 297 | // is not a valid operator type 298 | // 299 | // This branch is useful since 300 | if(it == TokenInfo::op_map.end()) { 301 | return nullptr; 302 | } 303 | 304 | return &it->second; 305 | } 306 | 307 | /* 308 | * GetTokenName() - Given a token type, return the name of that type 309 | * 310 | * The name is returned in a constant string reference form 311 | */ 312 | static const std::string &GetTokenName(TokenType type) { 313 | auto it = TokenInfo::token_name_map.find(type); 314 | 315 | // Just avoid using unseen tokens in the program 316 | assert(it != TokenInfo::token_name_map.end()); 317 | 318 | return it->second; 319 | } 320 | 321 | }; 322 | 323 | ///////////////////////////////////////////////////////////////////// 324 | // class TokenInfo ends 325 | ///////////////////////////////////////////////////////////////////// 326 | 327 | ///////////////////////////////////////////////////////////////////// 328 | // class Token 329 | ///////////////////////////////////////////////////////////////////// 330 | 331 | /* 332 | * class Token - Main class to represent lexicon 333 | */ 334 | class Token { 335 | friend class SlabAllocator; 336 | private: 337 | TokenType type; 338 | 339 | union { 340 | // Integer constant 341 | unsigned long int_const; 342 | 343 | // char constant 344 | char char_const; 345 | 346 | // String constant 347 | std::string *string_const_p; 348 | 349 | // Identifier 350 | std::string *ident_p; 351 | } data; 352 | 353 | // Static data member to allocate node from a slab allocator 354 | static SlabAllocator allocator; 355 | 356 | /* 357 | * Constructor() - Construct a token object with corresponding type 358 | * 359 | * We choose not to set data here since it is a union 360 | */ 361 | Token(TokenType p_type) : 362 | type{p_type} { 363 | // This will also clear the pointer 364 | data.int_const = 0; 365 | 366 | assert(data.ident_p == nullptr); 367 | assert(data.string_const_p == nullptr); 368 | } 369 | 370 | /* 371 | * Destructor - Frees the pointer if there is one 372 | * 373 | * The ownership of the pointer stored as identifier or string constant 374 | * belongs to Token object 375 | * 376 | * This is made private to prevent being deleted by something other than 377 | * the SlabAllocator 378 | */ 379 | ~Token() { 380 | // In both case the target is a string pointer 381 | // so we could just delete it without distinguishing 382 | // further on its type 383 | if(type == TokenType::T_IDENT || \ 384 | type == TokenType::T_STRING_CONST) { 385 | 386 | // If we destruct the string in exception handler 387 | // then the pointer is nullptr since during construction 388 | // we set it to nullptr and it has not been filled with anything 389 | if(data.string_const_p != nullptr) { 390 | delete data.string_const_p; 391 | } 392 | } 393 | 394 | return; 395 | } 396 | 397 | public: 398 | 399 | /* 400 | * SetType() - Assigns a new type to the token 401 | * 402 | * This is necessary since we need to resolve ambiguity during parsing 403 | * with operator types. e.g. "*" could either be used as multiplication 404 | * or be used as pointer dereference operator 405 | */ 406 | void SetType(TokenType p_type) { 407 | type = p_type; 408 | 409 | return; 410 | } 411 | 412 | /* 413 | * GetType() - Returns the type of the token 414 | */ 415 | TokenType GetType() const { 416 | return type; 417 | } 418 | 419 | /* 420 | * SetIntConst() - Set a integer constant number to this object 421 | * 422 | * This function requires that the token type must be T_INT_CONST 423 | */ 424 | void SetIntConst(unsigned long p_int_const) { 425 | assert(type == TokenType::T_INT_CONST); 426 | 427 | data.int_const = p_int_const; 428 | 429 | return; 430 | } 431 | 432 | /* 433 | * GetIntConst() - Returns the integer constant 434 | */ 435 | unsigned long GetIntConst() const { 436 | assert(type == TokenType::T_INT_CONST); 437 | 438 | return data.int_const; 439 | } 440 | 441 | /* 442 | * SetCharConst() - Set a char constant to this object 443 | * 444 | * This function requires that the token must be of T_CHAR_CONST 445 | */ 446 | void SetCharConst(char p_char_const) { 447 | assert(type == TokenType::T_CHAR_CONST); 448 | 449 | data.char_const = p_char_const; 450 | 451 | return; 452 | } 453 | 454 | /* 455 | * GetCharConst() - Returns a char constant 456 | */ 457 | char GetCharConst() const { 458 | assert(type == TokenType::T_CHAR_CONST); 459 | 460 | return data.char_const; 461 | } 462 | 463 | /* 464 | * SetStringConst() - Set a string constant to this object 465 | * 466 | * This function requires that the token must be of T_STRING_CONST 467 | */ 468 | void SetStringConst(std::string *p_string_const_p) { 469 | assert(type == TokenType::T_STRING_CONST); 470 | 471 | data.string_const_p = p_string_const_p; 472 | 473 | return; 474 | } 475 | 476 | /* 477 | * GetStringConst() - Returns the string pointer 478 | */ 479 | std::string *GetStringConst() const { 480 | assert(type == TokenType::T_STRING_CONST); 481 | 482 | return data.string_const_p; 483 | } 484 | 485 | /* 486 | * SetIdentifier() - Set an identifier string to this object 487 | * 488 | * This function requires that the token must be of T_IDENT 489 | */ 490 | void SetIdentifier(std::string *p_ident_p) { 491 | assert(type == TokenType::T_IDENT); 492 | 493 | data.ident_p = p_ident_p; 494 | 495 | return; 496 | } 497 | 498 | /* 499 | * GetIdentifier() - Returns the identifier string object 500 | */ 501 | std::string *GetIdentifier() const { 502 | assert(type == TokenType::T_IDENT); 503 | 504 | return data.ident_p; 505 | } 506 | 507 | /* 508 | * ToString() - Convert the token node to string representation 509 | * 510 | * There is no trailing '\n' attached with the string 511 | */ 512 | std::string ToString() const { 513 | const std::string &name = TokenInfo::GetTokenName(type); 514 | 515 | if(type == TokenType::T_IDENT) { 516 | return name + ' ' + *GetIdentifier(); 517 | } else if(type == TokenType::T_STRING_CONST) { 518 | return name + ' ' + *GetStringConst(); 519 | } else if(type == TokenType::T_INT_CONST) { 520 | return name + ' ' + std::to_string(GetIntConst()); 521 | } else if(type == TokenType::T_CHAR_CONST) { 522 | return name + ' ' + \ 523 | std::to_string(static_cast(GetCharConst())); 524 | } else { 525 | return name; 526 | } 527 | } 528 | 529 | /* 530 | * Get() - static function to construct a token node object 531 | */ 532 | template 533 | static Token *Get(Args&&... args) { 534 | return Token::allocator.Get(args...); 535 | } 536 | }; 537 | 538 | } // namespace cfront 539 | } // namespace wangziqi2013 540 | -------------------------------------------------------------------------------- /src/parse.c: -------------------------------------------------------------------------------- 1 | 2 | #include "parse.h" 3 | 4 | parse_stmt_cxt_t *parse_init(char *input) { return parse_exp_init(input); } 5 | void parse_free(parse_cxt_t *cxt) { parse_exp_free(cxt); } 6 | 7 | // Top-level parsing, i.e., global level parsing 8 | // There are five possible cases: 9 | // 1. Base type + ';' must be a type declaration, most likely struct/union/enum 10 | // 2. Base type + decl + "," must be a type declaration or data definition 11 | // 3. Base type + decl + "=" must be a data definition with initializer 12 | // 4. Base type + decl + ";" must be a global declaration, or function prototype 13 | // 5. Base type + func decl + '{' must be function definition 14 | token_t *parse(parse_cxt_t *cxt) { 15 | token_t *root = token_alloc_type(T_ROOT); 16 | while(1) { 17 | if(token_lookahead(cxt->token_cxt, 1) == NULL) { 18 | break; // Reached EOF 19 | } 20 | token_t *basetype = parse_decl_basetype(cxt); 21 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_SEMICOLON) { // Case 1 22 | token_consume_type(cxt->token_cxt, T_SEMICOLON); 23 | ast_append_child(root, ast_append_child(token_alloc_type(T_GLOBAL_DECL_ENTRY), basetype)); 24 | continue; 25 | } 26 | token_t *decl = parse_decl(cxt, PARSE_DECL_NOBASETYPE); 27 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 28 | //printf("la type %s\n", token_typestr(la->type)); 29 | if(la->type == T_LCPAREN) { // Case 5 30 | assert(ast_getchild(decl, 0) != NULL); 31 | //ast_print(decl, 0); 32 | //if(ast_getchild(decl, 0)->type != EXP_FUNC_CALL) // Only function type could have a body 33 | // error_row_col_exit(cxt->token_cxt->s, "Only function definition can have a body\n"); 34 | token_t *comp_stmt = parse_comp_stmt(cxt); 35 | ast_push_child(decl, basetype); 36 | ast_append_child(root, ast_append_child(ast_append_child(token_alloc_type(T_GLOBAL_FUNC), decl), comp_stmt)); 37 | continue; 38 | } 39 | token_t *entry = ast_append_child(token_alloc_type(T_GLOBAL_DECL_ENTRY), basetype); 40 | ast_append_child(root, entry); 41 | while(1) { 42 | // Check decl's name here; If it is typedef then add the name into the token cxt 43 | if(DECL_ISTYPEDEF(basetype->decl_prop)) { 44 | token_t *name = ast_gettype(decl, T_IDENT); 45 | if(name == NULL) { 46 | error_row_col_exit(cxt->token_cxt->s, "Expecting a name for typedef\n"); 47 | } 48 | assert(name->type == T_IDENT); 49 | token_add_utype(cxt->token_cxt, name); 50 | } 51 | token_t *var = ast_append_child(token_alloc_type(T_GLOBAL_DECL_VAR), decl); 52 | ast_append_child(entry, var); 53 | if(la->type == T_ASSIGN) { // case 3 54 | token_consume_type(cxt->token_cxt, T_ASSIGN); 55 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN) ast_append_child(var, parse_init_list(cxt)); 56 | else ast_append_child(var, parse_exp(cxt, PARSE_EXP_NOCOMMA)); 57 | la = token_lookahead_notnull(cxt->token_cxt, 1); 58 | } 59 | if(la->type == T_SEMICOLON) { // case 4 60 | token_consume_type(cxt->token_cxt, T_SEMICOLON); 61 | break; 62 | } else if(la->type == T_COMMA) { // case 2 63 | token_consume_type(cxt->token_cxt, T_COMMA); 64 | decl = parse_decl(cxt, PARSE_DECL_NOBASETYPE); 65 | la = token_lookahead_notnull(cxt->token_cxt, 1); 66 | continue; 67 | } else { 68 | error_row_col_exit(la->offset, "Expecting \',\', \'=\' or \';\' for global declaration\n"); 69 | } 70 | } 71 | } 72 | return root; 73 | } -------------------------------------------------------------------------------- /src/parse.h: -------------------------------------------------------------------------------- 1 | 2 | #include "parse_exp.h" 3 | #include "parse_decl.h" 4 | #include "parse_comp.h" 5 | #include "parse_stmt.h" 6 | 7 | #ifndef _PARSE_H 8 | #define _PARSE_H 9 | 10 | typedef parse_exp_cxt_t parse_cxt_t; 11 | 12 | parse_cxt_t *parse_init(char *input); 13 | void parse_free(parse_cxt_t *cxt); 14 | token_t *parse(parse_cxt_t *cxt); 15 | 16 | #endif -------------------------------------------------------------------------------- /src/parse_comp.c: -------------------------------------------------------------------------------- 1 | 2 | #include "parse_comp.h" 3 | #include "eval.h" 4 | 5 | parse_decl_cxt_t *parse_comp_init(char *input) { return parse_exp_init(input); } 6 | void parse_comp_free(parse_comp_cxt_t *cxt) { parse_exp_free(cxt); } 7 | 8 | // This parses struct or union or enum 9 | token_t *parse_comp(parse_exp_cxt_t *cxt) { 10 | token_t *token = token_get_next(cxt->token_cxt); 11 | assert(token); 12 | switch(token->type) { 13 | case T_STRUCT: case T_UNION: return parse_struct_union(cxt, token); 14 | case T_ENUM: return parse_enum(cxt, token); 15 | default: assert(0); 16 | } 17 | } 18 | 19 | // Returns 1 if there is a body, 0 if no body; Name is pushed into root as either 20 | // empty node or IDENT node. If neither name nor body is present report error 21 | int parse_name_body(parse_comp_cxt_t *cxt, token_t *root) { 22 | token_t *name = token_lookahead_notnull(cxt->token_cxt, 1); 23 | int has_name = name->type == T_IDENT; 24 | ast_append_child(root, has_name ? token_get_next(cxt->token_cxt) : token_get_empty()); 25 | int has_body = token_consume_type(cxt->token_cxt, T_LCPAREN); 26 | if(!has_name && !has_body) error_row_col_exit(root->offset, "Expecting identifier or \'{\' after struct/union\n"); 27 | return has_body; 28 | } 29 | 30 | // Returns the same node which is either T_STRUCT or T_UNION 31 | // The 2nd child is empty if there is no body or empty body 32 | // Check whether decl_prop has TYPE_EMPTY_BODY bit set 33 | token_t *parse_struct_union(parse_comp_cxt_t *cxt, token_t *root) { 34 | if(parse_name_body(cxt, root)) { 35 | 36 | int has_body = 0; // Might be possible that there is {} as body but it is empty 37 | while(1) { // loop on lines 38 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_RCPAREN) { // Finish parsing on '}' 39 | if(!has_body) { 40 | ast_append_child(root, token_get_empty()); 41 | root->decl_prop = TYPE_EMPTY_BODY; // Distinguish this from no body defined 42 | } 43 | token_consume_type(cxt->token_cxt, T_RCPAREN); 44 | break; 45 | } 46 | has_body = 1; 47 | token_t *comp_decl = ast_append_child(token_alloc_type(T_COMP_DECL), parse_decl_basetype(cxt)); 48 | while(1) { // loop on fields 49 | token_t *field = token_alloc_type(T_COMP_FIELD); 50 | ast_append_child(comp_decl, ast_append_child(field, parse_decl(cxt, PARSE_DECL_NOBASETYPE))); 51 | // Declarator body, can be named or unamed 52 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 53 | if(la->type == T_COLON) { 54 | token_consume_type(cxt->token_cxt, T_COLON); 55 | token_t *bf; // Assigned next line 56 | ast_append_child(field, ast_append_child(token_alloc_type(T_BITFIELD), bf = parse_exp(cxt, PARSE_EXP_NOCOMMA))); 57 | la = token_lookahead_notnull(cxt->token_cxt, 1); 58 | } 59 | if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); } 60 | else if(la->type == T_SEMICOLON) { token_consume_type(cxt->token_cxt, T_SEMICOLON); break; } // Finish parsing the field on ';' 61 | else { error_row_col_exit(la->offset, "Unexpected symbol \"%s\" in struct/union field declaration\n", 62 | token_typestr(la->type)); } 63 | } 64 | ast_append_child(root, comp_decl); 65 | } 66 | } else { ast_append_child(root, token_get_empty()); } // Otherwise append an empty child to indicate there is no body 67 | return root; 68 | } 69 | 70 | token_t *parse_enum(parse_comp_cxt_t *cxt, token_t *root) { 71 | if(parse_name_body(cxt, root)) { 72 | while(1) { // loop on lines 73 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_RCPAREN) { 74 | token_consume_type(cxt->token_cxt, T_RCPAREN); break; 75 | } 76 | token_t *enum_field = token_alloc_type(T_ENUM_FIELD); 77 | ast_append_child(root, enum_field); 78 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 79 | if(la->type == T_IDENT) ast_append_child(enum_field, token_get_next(cxt->token_cxt)); 80 | else error_row_col_exit(la->offset, "Expecting an identifier in enum body\n"); 81 | la = token_lookahead_notnull(cxt->token_cxt, 1); 82 | if(la->type == T_ASSIGN) { 83 | token_consume_type(cxt->token_cxt, T_ASSIGN); 84 | ast_append_child(enum_field, parse_exp(cxt, PARSE_EXP_NOCOMMA)); 85 | la = token_lookahead_notnull(cxt->token_cxt, 1); 86 | } 87 | // Last entry does not have to use comma 88 | if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); } 89 | else if(la->type == T_RCPAREN) { token_consume_type(cxt->token_cxt, T_RCPAREN); break; } 90 | else { error_row_col_exit(la->offset, "Unexpected symbol \"%s\" in enum body\n", 91 | token_typestr(la->type)); } 92 | } 93 | } 94 | return root; 95 | } -------------------------------------------------------------------------------- /src/parse_comp.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _PARSE_COMP_H 3 | #define _PARSE_COMP_H 4 | 5 | #include "parse_decl.h" 6 | 7 | typedef parse_exp_cxt_t parse_comp_cxt_t; 8 | 9 | parse_decl_cxt_t *parse_comp_init(char *input); 10 | void parse_comp_free(parse_decl_cxt_t *cxt); 11 | token_t *parse_comp(parse_exp_cxt_t *cxt); 12 | int parse_name_body(parse_comp_cxt_t *cxt, token_t *root); 13 | token_t *parse_struct_union(parse_comp_cxt_t *cxt, token_t *root); 14 | token_t *parse_enum(parse_comp_cxt_t *cxt, token_t *root); 15 | 16 | #endif -------------------------------------------------------------------------------- /src/parse_decl.c: -------------------------------------------------------------------------------- 1 | 2 | #include "parse_decl.h" 3 | #include "parse_comp.h" 4 | #include "eval.h" 5 | 6 | parse_decl_cxt_t *parse_decl_init(char *input) { return parse_exp_init(input); } 7 | void parse_decl_free(parse_decl_cxt_t *cxt) { parse_exp_free(cxt); } 8 | 9 | // Whether the token could start a declaration, i.e. being a type, modifier, or udef type 10 | int parse_decl_isbasetype(parse_decl_cxt_t *cxt, token_t *token) { 11 | (void)cxt; return ((token->decl_prop & DECL_MASK) || token->type == T_UDEF) ? 1 : 0; 12 | } 13 | 14 | // Same rule as parse_exp_next_token() 15 | // Note: The following tokens are considered as part of a type expression: 16 | // 1. ( ) [ ] * 2. const volatile 3. identifier 17 | token_t *parse_decl_next_token(parse_decl_cxt_t *cxt) { 18 | token_t *token = token_lookahead(cxt->token_cxt, 1); 19 | int valid; // Below are not "==" 20 | if((valid = (token != NULL))) { 21 | switch(token->type) { 22 | case T_LPAREN: { // If the next symbol constitutes a base type then this is func call 23 | token_t *lookahead = token_lookahead(cxt->token_cxt, 2); // Note that we already looked ahead one token 24 | if(lookahead != NULL && (parse_decl_isbasetype(cxt, lookahead) || lookahead->type == T_RPAREN)) 25 | token->type = EXP_FUNC_CALL; 26 | else token->type = EXP_LPAREN; 27 | break; 28 | } 29 | case T_RPAREN: 30 | if(parse_exp_isallowed(cxt, token, PARSE_EXP_ALLOWALL)) token->type = EXP_RPAREN; 31 | else valid = 0; 32 | break; 33 | case T_STAR: token->type = EXP_DEREF; break; 34 | case T_LSPAREN: token->type = EXP_ARRAY_SUB; break; 35 | case T_RSPAREN: 36 | if(parse_exp_isallowed(cxt, token, PARSE_EXP_ALLOWALL)) token->type = EXP_RSPAREN; 37 | else valid = 0; 38 | break; 39 | case T_IDENT: break; 40 | //case T_ELLIPSIS: break; // Ellipsis is processed only in function decl and does not go through this function 41 | default: if(!(token->decl_prop & DECL_QUAL_MASK)) valid = 0; // Only allow DECL_QUAL and identifier 42 | } 43 | } 44 | return valid ? token_get_next(cxt->token_cxt) : NULL; 45 | } 46 | 47 | // Parses the type specifier part of a base type declaration 48 | // Sets the decl_prop of the basetype node according to the type being parsed, and push child for udef, s/u/e 49 | void parse_typespec(parse_decl_cxt_t *cxt, token_t *basetype) { 50 | if(BASETYPE_GET(basetype->decl_prop) != BASETYPE_NONE) 51 | error_row_col_exit(cxt->token_cxt->s, "Already has type specifier \"%s\"\n", token_decl_print(basetype->decl_prop)); 52 | int usign = 0; 53 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 54 | basetype->offset = la->offset; // In case no child is pushed for the base type node, we assign the next token's offset 55 | token_type_t type = la->type; // Use this to detect illegal "signed long double" 56 | // Note that this is not a while loop 57 | switch(type) { // Basetype declaration cannot be the end of file 58 | case T_UNSIGNED: usign = 1; // Fall through 59 | /* fall through */ 60 | case T_SIGNED: token_free(token_get_next(cxt->token_cxt)); // Fall through again 61 | /* fall through */ 62 | case T_CHAR: case T_SHORT: case T_INT: case T_LONG: { // Note: Do not get_next_token() on these types 63 | token_t *token = token_get_next(cxt->token_cxt); // unsigned and signed have been processed before this line 64 | switch(token->type) { 65 | case T_CHAR: BASETYPE_SET(basetype, usign ? BASETYPE_UCHAR : BASETYPE_CHAR); token_free(token); return; 66 | case T_INT: BASETYPE_SET(basetype, usign ? BASETYPE_UINT : BASETYPE_INT); token_free(token); return; 67 | case T_SHORT: // short int has the same effect as short, so we just try to consume an extra int 68 | BASETYPE_SET(basetype, usign ? BASETYPE_USHORT : BASETYPE_SHORT); token_free(token); 69 | token_consume_type(cxt->token_cxt, T_INT); return; 70 | case T_LONG: { // long long; long long int; long int; long 71 | token_free(token); 72 | token_t *token = token_get_next(cxt->token_cxt); 73 | switch(token->type) { 74 | case T_LONG: // Same as short [int] 75 | BASETYPE_SET(basetype, usign ? BASETYPE_ULLONG : BASETYPE_LLONG); token_free(token); 76 | token_consume_type(cxt->token_cxt, T_INT); return; 77 | case T_DOUBLE: 78 | if(type == T_SIGNED || type == T_UNSIGNED) 79 | error_row_col_exit(token->offset, "Type \"long double\" does not allow sign declaration\n"); 80 | BASETYPE_SET(basetype, BASETYPE_LDOUBLE); token_free(token); return; 81 | case T_INT: BASETYPE_SET(basetype, usign ? BASETYPE_ULONG : BASETYPE_LONG); token_free(token); return; 82 | default: 83 | BASETYPE_SET(basetype, usign ? BASETYPE_ULONG : BASETYPE_LONG); 84 | token_pushback(cxt->token_cxt, token); return; 85 | } 86 | } // unsigned / signed without other base type implies int type 87 | default: BASETYPE_SET(basetype, usign ? BASETYPE_UINT : BASETYPE_INT); token_pushback(cxt->token_cxt, token); return; 88 | } 89 | } 90 | case T_FLOAT: BASETYPE_SET(basetype, BASETYPE_FLOAT); token_free(token_get_next(cxt->token_cxt)); return; 91 | case T_DOUBLE: BASETYPE_SET(basetype, BASETYPE_DOUBLE); token_free(token_get_next(cxt->token_cxt)); return; 92 | case T_UDEF: BASETYPE_SET(ast_append_child(basetype, token_get_next(cxt->token_cxt)), BASETYPE_UDEF); return; 93 | case T_STRUCT: BASETYPE_SET(ast_append_child(basetype, parse_comp(cxt)), BASETYPE_STRUCT); return; 94 | case T_UNION: BASETYPE_SET(ast_append_child(basetype, parse_comp(cxt)), BASETYPE_UNION); return; 95 | case T_ENUM: BASETYPE_SET(ast_append_child(basetype, parse_comp(cxt)), BASETYPE_ENUM); return; 96 | case T_VOID: BASETYPE_SET(basetype, BASETYPE_VOID); token_free(token_get_next(cxt->token_cxt)); return; 97 | default: assert(0); 98 | } 99 | } 100 | 101 | // Base type = one of udef/builtin/enum/struct/union; In this stage only allows 102 | // keywords with TOKEN_DECL set 103 | // The stack is not changed, calling this function does not need recurse 104 | token_t *parse_decl_basetype(parse_decl_cxt_t *cxt) { 105 | token_t *token = token_lookahead(cxt->token_cxt, 1), *basetype = token_alloc_type(T_BASETYPE); 106 | while(token != NULL && (token->decl_prop & DECL_MASK)) { 107 | if(!(token->decl_prop & DECL_TYPE_MASK)) { 108 | if(!token_decl_apply(basetype, token)) 109 | error_row_col_exit(token->offset, "Incompatible type modifier \"%s\" with \"%s\"\n", 110 | token_symstr(token->type), token_decl_print(basetype->decl_prop)); 111 | token_consume_type(cxt->token_cxt, token->type); // Consume whatever it is 112 | } else { parse_typespec(cxt, basetype); } 113 | token = token_lookahead(cxt->token_cxt, 1); 114 | } // Must have some type, cannot be just qualifiers and modifiers 115 | if(BASETYPE_GET(basetype->decl_prop) == BASETYPE_NONE) error_row_col_exit(cxt->token_cxt->s, "Declaration lacks a type specifier\n"); 116 | return basetype; 117 | } 118 | 119 | token_t *parse_decl(parse_decl_cxt_t *cxt, int hasbasetype) { 120 | parse_exp_recurse(cxt); 121 | assert(parse_exp_size(cxt, OP_STACK) == 0 && parse_exp_size(cxt, AST_STACK) == 0); // Must start on a new stack 122 | token_t *decl = token_alloc_type(T_DECL); 123 | // Append base type node if the flag indicates so, or empty node as placeholder 124 | ast_append_child(decl, hasbasetype == PARSE_DECL_HASBASETYPE ? parse_decl_basetype(cxt) : token_get_empty()); 125 | token_t *placeholder = token_get_empty(); 126 | // Placeholder operand for the innremost operator because we do not push ident to AST stack 127 | parse_exp_shift(cxt, AST_STACK, placeholder); 128 | token_t *decl_name = NULL; // If not an abstract declarator this is the name 129 | while(1) { 130 | token_t *token = parse_decl_next_token(cxt); 131 | if(token == NULL) { 132 | ast_append_child(decl, parse_exp_reduce_all(cxt)); // This may directly put the placeholder node as a expression 133 | ast_append_child(decl, decl_name ? decl_name : token_get_empty()); // Only appends the name if there is one, or empty node 134 | parse_exp_decurse(cxt); 135 | // Leaf operand always empty node as stop sign when traversing the type derivation chain 136 | return decl; 137 | } 138 | if(token->decl_prop & DECL_QUAL_MASK) { // Special case for type qualifiers 139 | token_t *top = parse_exp_peek(cxt, OP_STACK); 140 | if(top == NULL || top->type != EXP_DEREF || cxt->last_active_stack != OP_STACK) 141 | error_row_col_exit(token->offset, "Qualifier \"%s\" must follow pointer\n", token_symstr(token->type)); 142 | if(!token_decl_apply(top, token)) 143 | error_row_col_exit(token->offset, "Qualifier \"%s\" not compatible with \"%s\"\n", 144 | token_symstr(token->type), token_decl_print(top->decl_prop)); 145 | token_free(token); 146 | } else { 147 | switch(token->type) { 148 | case EXP_DEREF: // To avoid int **a*; being legal, because identifiers are not pushed to AST stack 149 | if(decl_name) error_row_col_exit(token->offset, "Pointers can only occur before declared name\n") 150 | parse_exp_shift(cxt, OP_STACK, token); break; 151 | case T_IDENT: // Trick: Do not push it onto the stack 152 | if(decl_name) error_row_col_exit(token->offset, "Type declaration can have at most one identifier\n"); 153 | decl_name = token; break; 154 | /* Is the above sufficient? - As long as parenthesis is not parsed recursively it is fine 155 | token_t *ast_top = parse_exp_peek(cxt, AST_STACK); 156 | if(ast_top != NULL && ast_top->type == T_) token_free(stack_pop(cxt->stacks[AST_STACK])); 157 | else if(ast_top != NULL) error_row_col_exit(token->offset, "Type declaration can have at most one identifier\n"); 158 | parse_exp_shift(cxt, AST_STACK, token); 159 | */ 160 | case EXP_ARRAY_SUB: { 161 | parse_exp_shift(cxt, OP_STACK, token); 162 | token_t *la = token_lookahead(cxt->token_cxt, 1); 163 | token_t *index; 164 | if(la != NULL && la->type == T_RSPAREN) { index = token_get_empty(); } 165 | else { index = parse_exp(cxt, PARSE_EXP_ALLOWALL); } 166 | parse_exp_shift(cxt, AST_STACK, index); 167 | parse_exp_reduce(cxt, -1, 1); // This reduces array sub 168 | if(!token_consume_type(cxt->token_cxt, T_RSPAREN)) 169 | error_row_col_exit(token->offset, "Array declaration expects \']\'\n"); 170 | break; 171 | } 172 | case EXP_FUNC_CALL: { 173 | parse_exp_shift(cxt, OP_STACK, token); 174 | token_t *la = token_lookahead(cxt->token_cxt, 1); 175 | if(la != NULL && la->type == T_RPAREN) { 176 | ast_push_child(token, token_get_empty()); 177 | token_consume_type(cxt->token_cxt, T_RPAREN); 178 | } else { 179 | while(1) { 180 | ast_append_child(token, parse_decl(cxt, PARSE_DECL_HASBASETYPE)); 181 | if(token_consume_type(cxt->token_cxt, T_COMMA)) { // Special: check "..." after "," 182 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_ELLIPSIS) { // after '...' there can only be ')' 183 | ast_append_child(token, token_get_next(cxt->token_cxt)); 184 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) 185 | error_row_col_exit(cxt->token_cxt->s, "\"...\" could only be the last function argument\n"); 186 | break; 187 | } 188 | } 189 | else if(token_consume_type(cxt->token_cxt, T_RPAREN)) { break; } 190 | else error_row_col_exit(token->offset, "Function declaration expects \')\' or \',\' or \"...\"\n"); 191 | } 192 | } 193 | parse_exp_reduce(cxt, 1, 1); // This reduces EXP_FUNC_CALL 194 | break; 195 | } 196 | case EXP_LPAREN: parse_exp_shift(cxt, OP_STACK, token); break; 197 | case EXP_RPAREN: { 198 | token_t *op_top = parse_exp_peek(cxt, OP_STACK); 199 | while(op_top != NULL && op_top->type != EXP_LPAREN) op_top = parse_exp_reduce(cxt, -1, 0); 200 | if(op_top == NULL) error_row_col_exit(token->offset, "Did not find matching \'(\' in declaration\n"); 201 | token_free(stack_pop(cxt->stacks[OP_STACK])); 202 | token_free(token); 203 | break; 204 | } // Note that unrelated tokens are filtered 205 | default: printf("%s %s\n", token_typestr(token->type), token->offset); assert(0); 206 | } // switch(token->type) 207 | } // if(token is qualifier) 208 | } // while(1) 209 | } 210 | -------------------------------------------------------------------------------- /src/parse_decl.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _PARSE_DECL_H 3 | #define _PARSE_DECL_H 4 | 5 | #include "parse_exp.h" 6 | #include "hashtable.h" 7 | 8 | #define PARSE_DECL_NOBASETYPE 0 9 | #define PARSE_DECL_HASBASETYPE 1 10 | typedef parse_exp_cxt_t parse_decl_cxt_t; 11 | 12 | parse_decl_cxt_t *parse_decl_init(char *input); 13 | void parse_decl_free(parse_decl_cxt_t *cxt); 14 | int parse_decl_isbasetype(parse_decl_cxt_t *cxt, token_t *token); 15 | token_t *parse_decl_next_token(parse_decl_cxt_t *cxt); 16 | void parse_typespec(parse_decl_cxt_t *cxt, token_t *basetype); 17 | token_t *parse_decl_basetype(parse_decl_cxt_t *cxt); 18 | token_t *parse_decl(parse_decl_cxt_t *cxt, int hasbasetype); 19 | 20 | #endif -------------------------------------------------------------------------------- /src/parse_exp.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _PARSE_EXP_H 3 | #define _PARSE_EXP_H 4 | 5 | #include "stack.h" 6 | #include "token.h" 7 | #include "ast.h" 8 | #include "hashtable.h" 9 | 10 | #define AST_STACK 0 11 | #define OP_STACK 1 12 | 13 | typedef uint32_t parse_exp_disallow_t; // A bit mask 14 | #define PARSE_EXP_ALLOWALL 0x00000000 15 | #define PARSE_EXP_NOCOMMA 0x00000001 // Do not allow outermost ',' 16 | #define PARSE_EXP_NOCOLON 0x00000002 // Do not allow outermost ':' 17 | 18 | typedef struct { 19 | // Either AST_STACK or OP_STACK; do not need save because a shift will happen 20 | int last_active_stack; 21 | stack_t *stacks[2]; 22 | stack_t *tops[2]; 23 | stack_t *prev_active; 24 | token_cxt_t *token_cxt; 25 | } parse_exp_cxt_t; 26 | 27 | parse_exp_cxt_t *parse_exp_init(char *input); 28 | void parse_exp_reinit(parse_exp_cxt_t *cxt, char *input); 29 | void parse_exp_free(parse_exp_cxt_t *cxt); 30 | int parse_exp_isoutermost(parse_exp_cxt_t *cxt); 31 | int parse_exp_isallowed(parse_exp_cxt_t *cxt, token_t *token, parse_exp_disallow_t disallow); 32 | int parse_exp_isexp(parse_exp_cxt_t *cxt, token_t *token, parse_exp_disallow_t disallow); 33 | int parse_exp_isprimary(parse_exp_cxt_t *cxt, token_t *token); 34 | int parse_exp_la_isdecl(parse_exp_cxt_t *cxt); 35 | int parse_exp_size(parse_exp_cxt_t *cxt, int stack_id); 36 | token_t *parse_exp_peek(parse_exp_cxt_t *cxt, int stack_id); 37 | token_t *parse_exp_peek_at(parse_exp_cxt_t *cxt, int stack_id, int index); 38 | int parse_exp_isempty(parse_exp_cxt_t *cxt, int stack_id); 39 | void parse_exp_recurse(parse_exp_cxt_t *cxt); 40 | void parse_exp_decurse(parse_exp_cxt_t *cxt); 41 | token_t *parse_exp_next_token(parse_exp_cxt_t *cxt, parse_exp_disallow_t disallow); 42 | void parse_exp_shift(parse_exp_cxt_t *cxt, int stack_id, token_t *token); 43 | token_t *parse_exp_reduce(parse_exp_cxt_t *cxt, int op_num_override, int allow_paren); 44 | void parse_exp_reduce_preced(parse_exp_cxt_t *cxt, token_t *token); 45 | token_t *parse_exp_reduce_all(parse_exp_cxt_t *cxt); 46 | token_t *parse_exp(parse_exp_cxt_t *cxt, parse_exp_disallow_t disallow); 47 | 48 | #endif -------------------------------------------------------------------------------- /src/parse_stmt.c: -------------------------------------------------------------------------------- 1 | 2 | #include "parse_stmt.h" 3 | #include "parse_decl.h" 4 | 5 | parse_stmt_cxt_t *parse_stmt_init(char *input) { return parse_exp_init(input); } 6 | void parse_stmt_free(parse_stmt_cxt_t *cxt) { parse_exp_free(cxt); } 7 | 8 | // Return a labeled statement 9 | token_t *parse_lbl_stmt(parse_stmt_cxt_t *cxt, token_type_t type) { 10 | if(type == T_IDENT) { 11 | token_t *token = token_alloc_type(T_LBL_STMT); 12 | ast_append_child(token, token_get_next(cxt->token_cxt)); 13 | if(!token_consume_type(cxt->token_cxt, T_COLON)) assert(0); // Caller guarantees this 14 | return ast_append_child(token, parse_stmt(cxt)); 15 | } 16 | token_t *token = token_get_next(cxt->token_cxt); 17 | if(type == T_CASE) ast_append_child(token, parse_exp(cxt, PARSE_EXP_NOCOLON)); 18 | if(!token_consume_type(cxt->token_cxt, T_COLON)) 19 | error_row_col_exit(token->offset, "Expecting \':\' for \"%s\" statement\n", token_symstr(token->type)); 20 | return ast_append_child(token, parse_stmt(cxt)); 21 | } 22 | 23 | // Returns an expression statement 24 | token_t *parse_exp_stmt(parse_stmt_cxt_t *cxt) { 25 | token_t *token = ast_append_child(token_alloc_type(T_EXP_STMT), parse_exp(cxt, PARSE_EXP_ALLOWALL)); 26 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 27 | error_row_col_exit(cxt->token_cxt->s, "Expecting \';\' after expression statement\n"); 28 | return token; 29 | } 30 | 31 | token_t *parse_comp_stmt(parse_stmt_cxt_t *cxt) { 32 | token_t *decl_list = token_alloc_type(T_DECL_STMT_LIST); 33 | token_t *stmt_list = token_alloc_type(T_STMT_LIST); 34 | token_t *root = ast_append_child(ast_append_child(token_alloc_type(T_COMP_STMT), decl_list), stmt_list); 35 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN); 36 | token_consume_type(cxt->token_cxt, T_LCPAREN); // After this line we enter a new scope 37 | token_enter_scope(cxt->token_cxt); 38 | while(parse_decl_isbasetype(cxt, token_lookahead_notnull(cxt->token_cxt, 1))) { // Loop through lines 39 | token_t *basetype = parse_decl_basetype(cxt); 40 | token_t *decl_entry = ast_append_child(token_alloc_type(T_DECL_STMT_ENTRY), basetype); 41 | ast_append_child(decl_list, decl_entry); 42 | while(1) { // Loop through variables 43 | token_t *decl = parse_decl(cxt, PARSE_DECL_NOBASETYPE); 44 | // Check decl's name here; If it is typedef then add the name into the token cxt 45 | if(DECL_ISTYPEDEF(basetype->decl_prop)) { 46 | token_t *name = ast_gettype(decl, T_IDENT); 47 | if(!name) error_row_col_exit(cxt->token_cxt->s, "Expecting a name for typedef\n"); 48 | assert(name->type == T_IDENT); 49 | token_add_utype(cxt->token_cxt, name); // Add a name, but does not need to concrete type 50 | } 51 | token_t *var = ast_append_child(token_alloc_type(T_DECL_STMT_VAR), decl); 52 | ast_append_child(decl_entry, var); 53 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 54 | if(la->type == T_ASSIGN) { 55 | token_consume_type(cxt->token_cxt, T_ASSIGN); 56 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN) ast_append_child(var, parse_init_list(cxt)); 57 | else ast_append_child(var, ast_append_child(token_alloc_type(T_INIT), parse_exp(cxt, PARSE_EXP_NOCOMMA))); 58 | la = token_lookahead_notnull(cxt->token_cxt, 1); 59 | } 60 | if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); continue; } 61 | else if(la->type == T_SEMICOLON) { token_consume_type(cxt->token_cxt, T_SEMICOLON); break; } 62 | else { error_row_col_exit(la->offset, "Expecting \',\' or \';\' after variable declaration\n"); } 63 | } 64 | } // Then parse statement list 65 | while(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RCPAREN) ast_append_child(stmt_list, parse_stmt(cxt)); 66 | token_consume_type(cxt->token_cxt, T_RCPAREN); // After this line we exit new scope 67 | token_exit_scope(cxt->token_cxt); 68 | return root; 69 | } 70 | 71 | token_t *parse_if_stmt(parse_stmt_cxt_t *cxt) { 72 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_IF); 73 | token_t *if_stmt = token_get_next(cxt->token_cxt); 74 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \'(\' after \"if\"\n"); 75 | ast_append_child(if_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 76 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \')\' after \"if\"\n"); 77 | ast_append_child(if_stmt, parse_stmt(cxt)); 78 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_ELSE) { 79 | token_t *else_stmt = token_get_next(cxt->token_cxt); 80 | ast_append_child(if_stmt, else_stmt); 81 | ast_append_child(else_stmt, parse_stmt(cxt)); 82 | } 83 | return if_stmt; 84 | } 85 | 86 | token_t *parse_switch_stmt(parse_stmt_cxt_t *cxt) { 87 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_SWITCH); 88 | token_t *switch_stmt = token_get_next(cxt->token_cxt); 89 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(switch_stmt->offset, "Expecting \'(\' after \"switch\"\n"); 90 | ast_append_child(switch_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 91 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(switch_stmt->offset, "Expecting \')\' after \"switch\"\n"); 92 | ast_append_child(switch_stmt, parse_stmt(cxt)); 93 | return switch_stmt; 94 | } 95 | 96 | token_t *parse_while_stmt(parse_stmt_cxt_t *cxt) { 97 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_WHILE); 98 | token_t *while_stmt = token_get_next(cxt->token_cxt); 99 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(while_stmt->offset, "Expecting \'(\' after \"while\"\n"); 100 | ast_append_child(while_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 101 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(while_stmt->offset, "Expecting \')\' after \"while\"\n"); 102 | ast_append_child(while_stmt, parse_stmt(cxt)); 103 | return while_stmt; 104 | } 105 | 106 | token_t *parse_do_stmt(parse_stmt_cxt_t *cxt) { 107 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_DO); 108 | token_t *do_stmt = token_get_next(cxt->token_cxt); 109 | ast_append_child(do_stmt, parse_stmt(cxt)); 110 | if(!token_consume_type(cxt->token_cxt, T_WHILE)) error_row_col_exit(do_stmt->offset, "Expecting \"while\" for \"do\" statement\n"); 111 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(do_stmt->offset, "Expecting \'(\' after \"while\"\n"); 112 | ast_append_child(do_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 113 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(do_stmt->offset, "Expecting \')\' after \"while\"\n"); 114 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(do_stmt->offset, "Expecting \';\' for \"do\" statement\n"); 115 | return do_stmt; 116 | } 117 | 118 | token_t *parse_for_stmt(parse_stmt_cxt_t *cxt) { 119 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_FOR); 120 | token_t *for_stmt = token_get_next(cxt->token_cxt); 121 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \'(\' after \"for\"\n"); 122 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 123 | else ast_append_child(for_stmt, token_get_empty()); 124 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after first \"for\" expression\n"); 125 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 126 | else ast_append_child(for_stmt, token_get_empty()); 127 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after second \"for\" expression\n"); 128 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RPAREN) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 129 | else ast_append_child(for_stmt, token_get_empty()); 130 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \')\' after \"for\"\n"); 131 | ast_append_child(for_stmt, parse_stmt(cxt)); 132 | return for_stmt; 133 | } 134 | 135 | token_t *parse_goto_stmt(parse_stmt_cxt_t *cxt) { 136 | token_t *token = token_get_next(cxt->token_cxt); 137 | assert(token->type == T_GOTO); 138 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_IDENT) 139 | error_row_col_exit(token->offset, "Expecting a label for \"goto\" statement\n"); 140 | ast_append_child(token, token_get_next(cxt->token_cxt)); 141 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 142 | error_row_col_exit(token->offset, "Expecting \';\' after \"goto\" statement\n"); 143 | return token; 144 | } 145 | 146 | token_t *parse_brk_cont_stmt(parse_stmt_cxt_t *cxt) { 147 | token_t *token = token_get_next(cxt->token_cxt); 148 | assert(token->type == T_BREAK || token->type == T_CONTINUE); 149 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 150 | error_row_col_exit(token->offset, "Expecting \';\' after \"%s\" statement\n", token_symstr(token->type)); 151 | return token; 152 | } 153 | 154 | token_t *parse_return_stmt(parse_stmt_cxt_t *cxt) { 155 | token_t *token = token_get_next(cxt->token_cxt); 156 | assert(token->type == T_RETURN); 157 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) 158 | ast_append_child(token, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 159 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) \ 160 | error_row_col_exit(token->offset, "Expecting \';\' after \"return\" statement\n"); 161 | return token; 162 | } 163 | 164 | // Returns a initializer list, { expr, expr, ..., expr } where expr could be nested initializer list 165 | token_t *parse_init_list(parse_stmt_cxt_t *cxt) { 166 | if(!token_consume_type(cxt->token_cxt, T_LCPAREN)) 167 | error_row_col_exit(cxt->token_cxt->s, "Expecting \'{\' for initializer list\n"); 168 | token_t *list = token_alloc_type(T_INIT_LIST); 169 | while(1) { 170 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 171 | if(la->type == T_RCPAREN) { token_consume_type(cxt->token_cxt, T_RCPAREN); break; } 172 | if(la->type == T_LCPAREN) ast_append_child(list, parse_init_list(cxt)); 173 | else ast_append_child(list, parse_exp(cxt, PARSE_EXP_NOCOMMA)); 174 | // Consume the comma, and if not a comma then let the loop continue 175 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_COMMA) 176 | token_consume_type(cxt->token_cxt, T_COMMA); 177 | } 178 | return list; 179 | } 180 | 181 | token_t *parse_stmt(parse_stmt_cxt_t *cxt) { 182 | while(1) { 183 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 184 | switch(la->type) { 185 | case T_DEFAULT: // Fall through 186 | case T_CASE: return parse_lbl_stmt(cxt, la->type); 187 | case T_IDENT: 188 | if(token_lookahead_notnull(cxt->token_cxt, 2)->type == T_COLON) return parse_lbl_stmt(cxt, la->type); 189 | else return parse_exp_stmt(cxt); 190 | case T_LCPAREN: return parse_comp_stmt(cxt); 191 | case T_IF: return parse_if_stmt(cxt); 192 | case T_SWITCH: return parse_switch_stmt(cxt); 193 | case T_WHILE: return parse_while_stmt(cxt); 194 | case T_DO: return parse_do_stmt(cxt); 195 | case T_FOR: return parse_for_stmt(cxt); 196 | case T_GOTO: return parse_goto_stmt(cxt); 197 | case T_CONTINUE: return parse_brk_cont_stmt(cxt); 198 | case T_BREAK: return parse_brk_cont_stmt(cxt); 199 | case T_RETURN: return parse_return_stmt(cxt); 200 | case T_SEMICOLON: token_consume_type(cxt->token_cxt, T_SEMICOLON); return token_get_empty(); 201 | default: return parse_exp_stmt(cxt); 202 | } 203 | } 204 | } -------------------------------------------------------------------------------- /src/parse_stmt.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _PARSE_STMT_H 3 | #define _PARSE_STMT_H 4 | 5 | #include "parse_exp.h" 6 | 7 | typedef parse_exp_cxt_t parse_stmt_cxt_t; 8 | 9 | parse_stmt_cxt_t *parse_stmt_init(char *input); 10 | void parse_stmt_free(parse_stmt_cxt_t *cxt); 11 | token_t *parse_lbl_stmt(parse_stmt_cxt_t *cxt, token_type_t type); 12 | token_t *parse_comp_stmt(parse_stmt_cxt_t *cxt); 13 | token_t *parse_if_stmt(parse_stmt_cxt_t *cxt); 14 | token_t *parse_switch_stmt(parse_stmt_cxt_t *cxt); 15 | token_t *parse_while_stmt(parse_stmt_cxt_t *cxt); 16 | token_t *parse_do_stmt(parse_stmt_cxt_t *cxt); 17 | token_t *parse_for_stmt(parse_stmt_cxt_t *cxt); 18 | token_t *parse_goto_stmt(parse_stmt_cxt_t *cxt); 19 | token_t *parse_brk_cont_stmt(parse_stmt_cxt_t *cxt); 20 | token_t *parse_return_stmt(parse_stmt_cxt_t *cxt); 21 | token_t *parse_init_list(parse_stmt_cxt_t *cxt); 22 | token_t *parse_stmt(parse_stmt_cxt_t *cxt); 23 | 24 | #endif -------------------------------------------------------------------------------- /src/parse_test_src.txt: -------------------------------------------------------------------------------- 1 | 2 | parse_stmt_cxt_t *parse_stmt_init(char *input) { return parse_exp_init(input); } 3 | void parse_stmt_free(parse_stmt_cxt_t *cxt) { parse_exp_free(cxt); } 4 | 5 | // Return a labeled statement 6 | token_t *parse_lbl_stmt(parse_stmt_cxt_t *cxt, token_type_t type) { 7 | if(type == T_IDENT) { 8 | token_t *token = token_alloc_type(T_LBL_STMT); 9 | ast_append_child(token, token_get_next(cxt->token_cxt)); 10 | if(!token_consume_type(cxt->token_cxt, T_COLON)) assert(0); // Caller guarantees this 11 | return ast_append_child(token, parse_stmt(cxt)); 12 | } 13 | //token_t *token = token_get_next(cxt->token_cxt); 14 | if(type == T_CASE) ast_append_child(token, parse_exp(cxt, PARSE_EXP_NOCOLON)); 15 | if(!token_consume_type(cxt->token_cxt, T_COLON)) 16 | error_row_col_exit(token->offset, "Expecting \':\' for \"%s\" statement\n", token_symstr(token->type)); 17 | return ast_append_child(token, parse_stmt(cxt)); 18 | } 19 | 20 | // Returns an expression statement 21 | token_t *parse_exp_stmt(parse_stmt_cxt_t *cxt) { 22 | token_t *token = ast_append_child(token_alloc_type(T_EXP_STMT), parse_exp(cxt, PARSE_EXP_ALLOWALL)); 23 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 24 | error_row_col_exit(cxt->token_cxt->s, "Expecting \';\' after expression statement\n"); 25 | return token; 26 | } 27 | 28 | token_t *parse_comp_stmt(parse_stmt_cxt_t *cxt) { 29 | token_t *decl_list = token_alloc_type(T_DECL_STMT_LIST); 30 | token_t *stmt_list = token_alloc_type(T_STMT_LIST); 31 | token_t *root = ast_append_child(ast_append_child(token_alloc_type(T_COMP_STMT), decl_list), stmt_list); 32 | assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN); 33 | token_consume_type(cxt->token_cxt, T_LCPAREN); 34 | while(parse_decl_isbasetype(cxt, token_lookahead_notnull(cxt->token_cxt, 1))) { // Loop through lines 35 | token_t *decl_entry = ast_append_child(token_alloc_type(T_DECL_STMT_ENTRY), parse_decl_basetype(cxt)); 36 | ast_append_child(decl_list, decl_entry); 37 | while(1) { // Loop through variables 38 | token_t *var = ast_append_child(token_alloc_type(T_DECL_STMT_VAR), parse_decl(cxt, PARSE_DECL_NOBASETYPE)); 39 | ast_append_child(decl_entry, var); 40 | //token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 41 | if(la->type == T_ASSIGN) { 42 | token_consume_type(cxt->token_cxt, T_ASSIGN); 43 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_LCPAREN) ast_append_child(var, parse_init_list(cxt)); 44 | else ast_append_child(var, parse_exp(cxt, PARSE_EXP_NOCOMMA)); 45 | la = token_lookahead_notnull(cxt->token_cxt, 1); 46 | } 47 | if(la->type == T_COMMA) { token_consume_type(cxt->token_cxt, T_COMMA); continue; } 48 | else if(la->type == T_SEMICOLON) { token_consume_type(cxt->token_cxt, T_SEMICOLON); break; } 49 | else { error_row_col_exit(la->offset, "Expecting \',\' or \';\' after variable declaration\n"); } 50 | } 51 | } // Then parse statement list 52 | while(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RCPAREN) ast_append_child(stmt_list, parse_stmt(cxt)); 53 | token_consume_type(cxt->token_cxt, T_RCPAREN); 54 | return root; 55 | } 56 | 57 | 58 | token_t *parse_if_stmt(parse_stmt_cxt_t *cxt) { 59 | //assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_IF); 60 | token_t *if_stmt = token_get_next(cxt->token_cxt); 61 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \'(\' after \"if\"\n"); 62 | ast_append_child(if_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 63 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(if_stmt->offset, "Expecting \')\' after \"if\"\n"); 64 | ast_append_child(if_stmt, parse_stmt(cxt)); 65 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_ELSE) { 66 | token_t *else_stmt = token_get_next(cxt->token_cxt); 67 | ast_append_child(if_stmt, else_stmt); 68 | ast_append_child(else_stmt, parse_stmt(cxt)); 69 | } 70 | return if_stmt; 71 | } 72 | 73 | token_t *parse_for_stmt(parse_stmt_cxt_t *cxt) { 74 | //assert(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_FOR); 75 | token_t *for_stmt = token_get_next(cxt->token_cxt); 76 | if(!token_consume_type(cxt->token_cxt, T_LPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \'(\' after \"for\"\n"); 77 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 78 | else ast_append_child(for_stmt, token_get_empty()); 79 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after first \"for\" expression\n"); 80 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 81 | else ast_append_child(for_stmt, token_get_empty()); 82 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) error_row_col_exit(for_stmt->offset, "Expecting \';\' after second \"for\" expression\n"); 83 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_RPAREN) ast_append_child(for_stmt, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 84 | else ast_append_child(for_stmt, token_get_empty()); 85 | if(!token_consume_type(cxt->token_cxt, T_RPAREN)) error_row_col_exit(for_stmt->offset, "Expecting \')\' after \"for\"\n"); 86 | ast_append_child(for_stmt, parse_stmt(cxt)); 87 | return for_stmt; 88 | } 89 | 90 | token_t *parse_goto_stmt(parse_stmt_cxt_t *cxt) { 91 | token_t *token = token_get_next(cxt->token_cxt); 92 | assert(token->type == T_GOTO); 93 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_IDENT) 94 | error_row_col_exit(token->offset, "Expecting a label for \"goto\" statement\n"); 95 | ast_append_child(token, token_get_next(cxt->token_cxt)); 96 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 97 | error_row_col_exit(token->offset, "Expecting \';\' after \"goto\" statement\n"); 98 | return token; 99 | } 100 | 101 | token_t *parse_brk_cont_stmt(parse_stmt_cxt_t *cxt) { 102 | token_t *token = token_get_next(cxt->token_cxt); 103 | assert(token->type == T_BREAK || token->type == T_CONTINUE); 104 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 105 | error_row_col_exit(token->offset, "Expecting \';\' after \"%s\" statement\n", token_symstr(token->type)); 106 | return token; 107 | } 108 | 109 | token_t *parse_return_stmt(parse_stmt_cxt_t *cxt) { 110 | token_t *token = token_get_next(cxt->token_cxt); 111 | assert(token->type == T_RETURN); 112 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type != T_SEMICOLON) 113 | ast_append_child(token, parse_exp(cxt, PARSE_EXP_ALLOWALL)); 114 | if(!token_consume_type(cxt->token_cxt, T_SEMICOLON)) 115 | error_row_col_exit(token->offset, "Expecting \';\' after \"return\" statement\n"); 116 | return token; 117 | } 118 | 119 | // Returns a initializer list, { expr, expr, ..., expr } where expr could be nested initializer list 120 | token_t *parse_init_list(parse_stmt_cxt_t *cxt) { 121 | if(!token_consume_type(cxt->token_cxt, T_LCPAREN)) 122 | error_row_col_exit(cxt->token_cxt->s, "Expecting \'{\' for initializer list\n"); 123 | //token_t *list = token_alloc_type(T_INIT_LIST); 124 | while(1) { 125 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 126 | if(la->type == T_RCPAREN) { token_consume_type(cxt->token_cxt, T_RCPAREN); break; } 127 | if(la->type == T_LCPAREN) ast_append_child(list, parse_init_list(cxt)); 128 | else ast_append_child(list, parse_exp(cxt, PARSE_EXP_NOCOMMA)); 129 | // Consume the comma, and if not a comma then let the loop continue 130 | if(token_lookahead_notnull(cxt->token_cxt, 1)->type == T_COMMA) 131 | token_consume_type(cxt->token_cxt, T_COMMA); 132 | } 133 | return list; 134 | } 135 | 136 | token_t *parse_stmt(parse_stmt_cxt_t *cxt) { 137 | while(1) { 138 | token_t *la = token_lookahead_notnull(cxt->token_cxt, 1); 139 | switch(la->type) { 140 | case T_DEFAULT: // Fall through 141 | case T_CASE: return parse_lbl_stmt(cxt, la->type); 142 | case T_IDENT: 143 | if(token_lookahead_notnull(cxt->token_cxt, 2)->type == T_COLON) return parse_lbl_stmt(cxt, la->type); 144 | else return parse_exp_stmt(cxt); 145 | case T_LCPAREN: return parse_comp_stmt(cxt); 146 | case T_IF: return parse_if_stmt(cxt); 147 | case T_SWITCH: return parse_switch_stmt(cxt); 148 | case T_WHILE: return parse_while_stmt(cxt); 149 | case T_DO: return parse_do_stmt(cxt); 150 | case T_FOR: return parse_for_stmt(cxt); 151 | case T_GOTO: return parse_goto_stmt(cxt); 152 | case T_CONTINUE: return parse_brk_cont_stmt(cxt); 153 | case T_BREAK: return parse_brk_cont_stmt(cxt); 154 | case T_RETURN: return parse_return_stmt(cxt); 155 | case T_SEMICOLON: token_consume_type(cxt->token_cxt, T_SEMICOLON); return token_get_empty(); 156 | default: return parse_exp_stmt(cxt); 157 | } 158 | } 159 | } -------------------------------------------------------------------------------- /src/python/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: slr-gen lr-parse 3 | 4 | slr-gen: 5 | python ./syntax.py --slr ./krc-lr.syntax --dump-file=./krc-lr.table 6 | 7 | lr-gen: 8 | python ./syntax.py --lr1 ./krc-lr.syntax --dump-file=./krc-lr.table 9 | 10 | lalr-gen: 11 | python ./syntax.py --lalr ./krc-lr.syntax --dump-file=./krc-lr.tabl 12 | 13 | earley-parse: 14 | python ./syntax.py --earley ./krc-earley.syntax --token-file=./lex_test.c 15 | 16 | lr-parse: 17 | python ./syntax.py --lr ./krc-lr.table --token-file=./lex_test.c 18 | -------------------------------------------------------------------------------- /src/python/basic_type.py: -------------------------------------------------------------------------------- 1 | # 2 | # basic_type.py - This file defines primitives types for the language. We use basic types 3 | # to build more complicated types (e.g. arrays, structs, unions, etc.) and also 4 | # to perform static evaluation of expressions. 5 | # 6 | # We need to support basic types for static evaluation, i.e. integer types that have different length 7 | # 8 | 9 | ##################################################################### 10 | # class BaseType 11 | ##################################################################### 12 | 13 | class BaseType: 14 | """ 15 | This class is the common interface for any type. It implements type system's most 16 | fundamental functionality such as sizeof() operator. 17 | """ 18 | def __init__(self): 19 | """ 20 | Initialize the base type object 21 | 22 | :param length: Number of bytes this type occupies. Note that this is the real 23 | storage requirement, and does not contain padding value 24 | """ 25 | return 26 | 27 | def sizeof(self): 28 | """ 29 | Returns the size of the type. This must be overridden to avoid exception 30 | :return: None 31 | """ 32 | del self 33 | raise RuntimeError("Sizeof operator of a base type must be overridden") 34 | 35 | ##################################################################### 36 | # class IntegerType 37 | ##################################################################### 38 | 39 | class IntegerType(BaseType): 40 | """ 41 | This class represents integer types of arbitrary precision. The length of an integer 42 | type is an attribute of the class rather than a different class. This makes adding 43 | more integer types easier 44 | """ 45 | def __init__(self, length, signed): 46 | """ 47 | Initialize the integer type 48 | 49 | :param length: The byte length of the integer type 50 | :param signed: Boolean flag to indicate whether the type if signed or not 51 | """ 52 | # Calls the base class constructor first 53 | super(self.__class__, self).__init__() 54 | # This is the size of the integer type 55 | self.length = length 56 | # Whether the integer type is signed or not 57 | self.signed = signed 58 | 59 | return 60 | 61 | def sizeof(self): 62 | """ 63 | Returns the size of the integer type 64 | :return: int 65 | """ 66 | return length 67 | 68 | ##################################################################### 69 | # class StaticExpression 70 | ##################################################################### 71 | 72 | class StaticExpression: 73 | """ 74 | This class is used to evaluate static expressions. For static expression we only allow 75 | constant integer value and operators, or sizeof() operator with a type. 76 | """ 77 | def __init__(self): 78 | """ 79 | Prevents initializing this class 80 | """ 81 | raise RuntimeError("Please do not instantiate class StaticExpression") 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /src/python/krc.syntax: -------------------------------------------------------------------------------- 1 | 2 | constant-expression: 3 | conditional-expression 4 | 5 | expression: 6 | assignment-expression 7 | expression T_COMMA assignment-expression 8 | 9 | assignment-expression: 10 | conditional-expression 11 | assignment-expression assignment-operator conditional-expression 12 | # This will cause a FIRST set conflict 13 | #unary-expression assignment-operator assignment-expression 14 | 15 | assignment-operator: 16 | T_ASSIGN 17 | T_PLUS_ASSIGN 18 | T_MINUS_ASSIGN 19 | T_STAR_ASSIGN 20 | T_DIV_ASSIGN 21 | T_MOD_ASSIGN 22 | T_LSHIFT_ASSIGN 23 | T_RSHIFT_ASSIGN 24 | T_AMPERSAND_ASSIGN 25 | T_BITXOR_ASSIGN 26 | T_BITOR_ASSIGN 27 | 28 | conditional-expression: 29 | logical-OR-expression 30 | logical-OR-expression T_QMARK expression T_COLON conditional-expression 31 | 32 | logical-OR-expression: 33 | logical-AND-expression 34 | logical-OR-expression T_OR logical-AND-expression 35 | 36 | logical-AND-expression: 37 | inclusive-OR-expression 38 | logical-AND-expression T_AND inclusive-OR-expression 39 | 40 | inclusive-OR-expression: 41 | exclusive-OR-expression 42 | inclusive-OR-expression T_BIT_OR exclusive-OR-expression 43 | 44 | exclusive-OR-expression: 45 | AND-expression 46 | exclusive-OR-expression T_BIT_XOR AND-expression 47 | 48 | AND-expression: 49 | equality-expression 50 | AND-expression T_BIT_AND equality-expression 51 | 52 | equality-expression: 53 | relational-expression 54 | equality-expression T_EQ relational-expression 55 | equality-expression T_NOTEQ relational-expression 56 | 57 | relational-expression: 58 | shift-expression 59 | relational-expression T_LESS shift-expression 60 | relational-expression T_LESSEQ shift-expression 61 | relational-expression T_GREATER shift-expression 62 | relational-expression T_GREATEREQ shift-expression 63 | 64 | shift-expression: 65 | additive-expression 66 | shift-expression T_LSHIFT additive-expression 67 | shift-expression T_RSHIFT additive-expression 68 | 69 | additive-expression: 70 | multiplicative-expression 71 | additive-expression T_PLUS multiplicative-expression 72 | additive-expression T_MINUS multiplicative-expression 73 | 74 | multiplicative-expression: 75 | cast-expression 76 | multiplicative-expression T_STAR cast-expression 77 | multiplicative-expression T_DIV cast-expression 78 | multiplicative-expression T_MOD cast-expression 79 | 80 | cast-expression: 81 | unary-expression 82 | T_LESS type-name T_GREATER cast-expression 83 | 84 | unary-operator: 85 | T_AMPERSAND 86 | T_STAR 87 | T_PLUS 88 | T_MINUS 89 | T_BITNOT 90 | T_NOT 91 | 92 | unary-expression: 93 | postfix-expression 94 | T_INC unary-expression 95 | T_DEC unary-expression 96 | unary-operator cast-expression 97 | T_SIZEOF unary-expression 98 | T_SIZEOF T_LESS type-name T_GREATER 99 | 100 | argument-expression-list: 101 | assignment-expression 102 | argument-expression-list T_COMMA assignment-expression 103 | 104 | postfix-expression: 105 | primary-expression 106 | postfix-expression T_LSPAREN expression T_RSPAREN 107 | postfix-expression T_LPAREN T_RPAREN 108 | postfix-expression T_LPAREN argument-expression-list T_RPAREN 109 | postfix-expression T_DOT T_IDENT 110 | postfix-expression T_ARROW T_IDENT 111 | postfix-expression T_INC 112 | postfix-expression T_DEC 113 | 114 | primary-expression: 115 | T_IDENT 116 | T_INT_CONST 117 | T_CHAR_CONST 118 | T_STRING_CONST 119 | T_LPAREN expression T_RPAREN 120 | 121 | 122 | ################################## 123 | # The following is the type system 124 | ################################## 125 | 126 | declaration: 127 | declaration-specifiers T_SEMICOLON 128 | declaration-specifiers init-declarator-list T_SEMICOLON 129 | 130 | declaration-specifiers: 131 | storage-class-specifier 132 | type-specifier 133 | type-qualifier 134 | storage-class-specifier declaration-specifiers 135 | type-specifier declaration-specifiers 136 | type-qualifier declaration-specifiers 137 | 138 | init-declarator-list: 139 | init-declarator 140 | init-declarator-list T_COMMA init-declarator 141 | 142 | init-declarator: 143 | declarator 144 | declarator T_ASSIGN initializer 145 | 146 | storage-class-specifier: 147 | T_TYPEDEF 148 | T_EXTERN 149 | T_STATIC 150 | T_AUTO 151 | T_REGISTER 152 | 153 | type-specifier: 154 | T_VOID 155 | T_CHAR 156 | T_SHORT 157 | T_INT 158 | T_LONG 159 | T_FLOAT 160 | T_DOUBLE 161 | T_SIGNED 162 | T_UNSIGNED 163 | struct-or-union-specifier 164 | enum-specifier 165 | 166 | struct-or-union-specifier: 167 | struct-or-union T_LCPAREN struct-declaration-list T_RCPAREN 168 | struct-or-union T_IDENT T_LCPAREN struct-declaration-list T_RCPAREN 169 | struct-or-union T_IDENT 170 | 171 | struct-or-union: 172 | T_STRUCT 173 | T_UNION 174 | 175 | struct-declaration-list: 176 | struct-declaration 177 | struct-declaration-list struct-declaration 178 | 179 | struct-declaration: 180 | specifier-qualifier-list struct-declarator-list T_SEMICOLON 181 | 182 | specifier-qualifier-list: 183 | type-specifier 184 | type-qualifier 185 | type-specifier specifier-qualifier-list 186 | type-qualifier specifier-qualifier-list 187 | 188 | struct-declarator-list: 189 | struct-declarator 190 | struct-declarator-list T_COMMA struct-declarator 191 | 192 | struct-declarator: 193 | declarator 194 | T_COLON constant-expression 195 | declarator T_COLON constant-expression 196 | 197 | enum-specifier: 198 | enum T_LCPAREN enumerator-list T_RCPAREN 199 | enum T_IDENT T_LCPAREN enumerator-list T_RCPAREN 200 | enum T_IDENT 201 | 202 | enumerator-list: 203 | enumerator 204 | enumerator-list T_COMMA enumerator 205 | 206 | enumerator: 207 | enumeration-constant 208 | enumeration-constant A_ASSIGN constant-expression 209 | 210 | enumeration-constant: 211 | T_IDENT 212 | 213 | type-qualifier: 214 | T_CONST 215 | T_VOLATILE 216 | 217 | declarator: 218 | direct-declarator 219 | pointer direct-declarator 220 | 221 | direct-declarator: 222 | T_IDENT 223 | T_LPAREN declarator T_RPAREN 224 | direct-declarator T_LSPAREN T_RSPAREN 225 | direct-declarator T_LSPAREN constant-expression T_RSPAREN 226 | direct-declarator T_LPAREN T_RPAREN 227 | direct-declarator T_LPAREN parameter-type-list T_RPAREN 228 | direct-declarator T_LPAREN identifier-list T_RPAREN 229 | 230 | pointer: 231 | T_STAR 232 | T_STAR pointer 233 | T_STAR type-qualifier-list 234 | T_STAR type-qualifier-list pointer 235 | 236 | type-qualifier-list: 237 | type-qualifier 238 | type-qualifier-list type-qualifier 239 | 240 | parameter-type-list: 241 | parameter-list 242 | #TODO: DO NOT SUPPORT VARARG 243 | #parameter-list , ... 244 | 245 | parameter-list: 246 | parameter-declaration 247 | parameter-list T_COMMA parameter-declaration 248 | 249 | parameter-declaration: 250 | # We do not allow abstract type here, otherwise it conflicts 251 | # with the declarator 252 | declaration-specifiers declarator 253 | declaration-specifiers 254 | # Do not allow abstract type 255 | #declaration-specifiers abstract-declarator 256 | 257 | identifier-list: 258 | T_IDENT 259 | identifier-list T_COMMA T_IDENT 260 | 261 | # This is used in cast expression or sizeof operator 262 | # Since we use < and > pair to denote abstract type in those 263 | # two cases, the abstract declarator could be used 264 | # In function declarations we could not rely on abstract declarator 265 | type-name: 266 | specifier-qualifier-list 267 | # We could use abstract declarator in type name for type casting 268 | # because it is easier for us to specify an abstract type 269 | # without giving a name 270 | specifier-qualifier-list abstract-declarator 271 | 272 | abstract-declarator: 273 | pointer 274 | direct-abstract-declarator 275 | pointer direct-abstract-declarator 276 | 277 | direct-abstract-declarator: 278 | T_LPAREN abstract-declarator T_RPAREN 279 | T_LSPAREN T_RSPAREN 280 | T_LSPAREN constant-expression T_RSPAREN 281 | T_LPAREN T_RPAREN 282 | T_LPAREN parameter-type-list T_RPAREN 283 | direct-abstract-declarator T_LSPAREN T_RSPAREN 284 | direct-abstract-declarator T_LSPAREN constant-expression T_RSPAREN 285 | direct-abstract-declarator T_LPAREN T_RPAREN 286 | direct-abstract-declarator T_LPAREN parameter-type-list T_RPAREN 287 | 288 | initializer: 289 | assignment-expression 290 | T_LCPAREN initializer-list T_RCPAREN 291 | # Do not allow extra comma 292 | #T_LCPAREN initializer-list T_COMMA T_RCPAREN 293 | 294 | initializer-list: 295 | initializer 296 | initializer-list T_COMMA initializer 297 | -------------------------------------------------------------------------------- /src/python/lex_test.c: -------------------------------------------------------------------------------- 1 | 2 | // Typedef must specify a name 3 | //typedef 4 | const static register enum enum_struct { 5 | A = 1, 6 | B = 2, 7 | C = 3 8 | }; 9 | 10 | static const volatile register int aaa = 0x012345678ABCDEFL; 11 | 12 | void f(); 13 | 14 | /* 15 | * main() - The entry point of the program 16 | */ 17 | // Note that declaration list followed by function header is not supported 18 | int main(int argc, char **argv, typedef int what, ...) /* int x, y, z; */ { 19 | // This is the declaration without an identifier (WTF do we allow this?) 20 | //static const register long; 21 | 22 | static const volatile register int * const * (*xyz)(int(*)(), long *, char()) = C; 23 | long x = 1 & xyz; 24 | void *c; 25 | 26 | // This struct is used to store data 27 | static typedef struct struct_type { 28 | int a; 29 | char b : 20; // 20 bit field 30 | long c; 31 | } bb, cc; 32 | 33 | int long register typedef ; 34 | 35 | // The following three tests whether we could resolve amiguity 36 | // between expression and declaration 37 | aa * x; 38 | { 39 | typedef int *bbb, (*ccc)(void), aa, (*ddd)(aa); 40 | aa * x; 41 | (aa)x; 42 | } 43 | 44 | aa3 * x; 45 | (aa); 46 | printf("Hello, world!\n"); 47 | 48 | a.a = 20UL; 49 | a.b = 0x12345 >> (5 & 0xFFFFFFFF); 50 | a.c = 0777; 51 | b.b = '\n'; 52 | 53 | (aa); 54 | 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /src/python/symbol_table.py: -------------------------------------------------------------------------------- 1 | # 2 | # symbol_table.py - This file defines the symbol table 3 | # for both types and identifiers 4 | # 5 | 6 | from common import dbg_printf, DebugRunTestCaseBase, Argv, TestNode 7 | 8 | ##################################################################### 9 | # class Scope 10 | ##################################################################### 11 | 12 | class Scope: 13 | """ 14 | This class represents a scope. It includes a struct 15 | table that maps struct names to types; it also includes 16 | a union and typedef table that do the same. Finally it 17 | also has a identifier table which maps identifiers to 18 | their types 19 | """ 20 | 21 | # The following constants defines the index of their 22 | # corresponding tables. Searching routine uses these 23 | # indices to access different tables rather than 24 | # implementing a separate routine for each table 25 | TABLE_TYPE_INDEX_BEGIN = 0 26 | TABLE_TYPE_STRUCT = 0 27 | TABLE_TYPE_UNION = 1 28 | TABLE_TYPE_TYPEDEF = 2 29 | TABLE_TYPE_IDENT = 3 30 | TABLE_TYPE_INDEX_END = 3 31 | 32 | # The following defines the type of the scope 33 | SCOPE_TYPE_INDEX_BEGIN = 100 34 | # This is the global scope (top level scope) 35 | SCOPE_TYPE_GLOBAL = 100 36 | # Functional level 37 | SCOPE_TYPE_FUNCTION = 101 38 | # Local scope inside a function 39 | SCOPE_TYPE_LOCAL = 102 40 | # Inside a struct or union definition because name conflict 41 | # can still occur at this level 42 | SCOPE_TYPE_STRUCT = 103 43 | SCOPE_TYPE_INDEX_END = 103 44 | 45 | def __init__(self, scope_type): 46 | """ 47 | Initialize all mapping structures 48 | 49 | :param scope_type: Enum constants defined above 50 | """ 51 | # We put them in a list such that we could 52 | # use an index to access them rather 53 | # than implement different routines for accessing 54 | # different tables 55 | self.symbols = [{}, {}, {}, {}] 56 | 57 | # The scope type must be a valid one 58 | assert(self.SCOPE_TYPE_INDEX_BEGIN <= 59 | scope_type <= 60 | self.SCOPE_TYPE_INDEX_END) 61 | 62 | # Save the type of the scope for later inspection 63 | self.scope_type = scope_type 64 | 65 | return 66 | 67 | def get_table(self, t): 68 | """ 69 | Return a table given a type 70 | 71 | :param t: The type constant defined above 72 | :return: the table instance 73 | """ 74 | assert(Scope.TABLE_TYPE_INDEX_BEGIN <= 75 | t <= 76 | Scope.TABLE_TYPE_INDEX_END) 77 | 78 | return self.symbols[t] 79 | 80 | def get_type(self): 81 | """ 82 | This function returns the type of the current scope 83 | :return: Scope type constant 84 | """ 85 | return self.scope_type 86 | 87 | def __getitem__(self, item): 88 | """ 89 | Fetches an item from the scope's symbol table. The item 90 | is a tuple specifying the dict and the name 91 | 92 | :param item: Tuple(type, name) 93 | :return: Item stored in the table 94 | """ 95 | t = self.get_table(item[0]) 96 | return t[item[1]] 97 | 98 | def __contains__(self, item): 99 | """ 100 | Same as __getitem__ except that it checks for membership 101 | 102 | :param item: Tuple(type, name) 103 | :return: bool 104 | """ 105 | t = self.get_table(item[0]) 106 | return item[1] in t 107 | 108 | def __setitem__(self, key, value): 109 | """ 110 | Same as __getitem__ except that it sets a value with the 111 | given type and name 112 | 113 | :param key: Tuple(type, name) 114 | :param value: Any value 115 | :return: None 116 | """ 117 | t = self.get_table(key[0]) 118 | t[key[1]] = value 119 | return 120 | 121 | def get(self, key, ret): 122 | """ 123 | This one mimics the behavior of dict.get() which returns 124 | the alternative value if the desired value does not exist 125 | 126 | :param key: Tuple(type, name) 127 | :param ret: Alternative value if the name does not exist 128 | :return: Any value 129 | """ 130 | t = self.get_table(key[0]) 131 | return t.get(key[1], ret) 132 | 133 | ##################################################################### 134 | # class SymbolTable 135 | ##################################################################### 136 | 137 | class SymbolTable: 138 | """ 139 | This is the representation of a global symbol table 140 | which holds a stack of scopes. Each scope has its own 141 | symbol definitions. When we search names in the symbol 142 | table, we always start from the topmost scope and descend 143 | to the bottommost, which is the global scope. 144 | """ 145 | def __init__(self): 146 | """ 147 | Initialize the symbol table's stack 148 | """ 149 | # This is the stack of scopes 150 | # By default there is a global scope at initialization 151 | # and the type is set as global scope type 152 | self.scope_stack = [Scope(Scope.SCOPE_TYPE_GLOBAL)] 153 | 154 | return 155 | 156 | def enter_scope(self, scope_type): 157 | """ 158 | Enters a new scope by pushing a new scope object into the 159 | stack of tables 160 | 161 | :param scope_type: The type of the scope defined in class Scope 162 | :return: None 163 | """ 164 | # Use the given type to define a new scope 165 | self.scope_stack.append(Scope(scope_type)) 166 | 167 | return 168 | 169 | def leave_scope(self): 170 | """ 171 | Leave the current scope by popping from the end of the list 172 | 173 | :return: None 174 | """ 175 | assert(len(self.scope_stack) != 0) 176 | self.scope_stack.pop() 177 | return 178 | 179 | def get_current_scope_type(self): 180 | """ 181 | This function returns the type of the current (i.e. topmost) scope 182 | 183 | :return: scope type constant 184 | """ 185 | assert(len(self.scope_stack) != 0) 186 | 187 | return self.scope_stack[-1].get_type() 188 | 189 | def get_depth(self): 190 | """ 191 | Get the current depth of the symbol table (i.e. the length of the list) 192 | Note that depth starts from 1 193 | 194 | :return: int 195 | """ 196 | return len(self.scope_stack) 197 | 198 | def get(self, key, ret): 199 | """ 200 | Searches for a given name in the given type. If we could not 201 | find the name in all scopes then return the alternative 202 | 203 | :param key: Tuple(type, name) 204 | :param ret: Alternative value if name not found 205 | :return: Any object 206 | """ 207 | i = len(self.scope_stack) - 1 208 | while i >= 0: 209 | scope = self.scope_stack[i] 210 | # If the key exists then return the value 211 | # we do not need get() here since it is 212 | # guaranteed to exist 213 | if key in scope: 214 | return scope[key] 215 | else: 216 | i -= 1 217 | 218 | # If we could not find the value in all scopes 219 | # then just return the alternative value 220 | return ret 221 | 222 | def __contains__(self, item): 223 | """ 224 | Checks whether a value exists in the symbol table 225 | 226 | :param item: Tuple(type, name) 227 | :return: bool 228 | """ 229 | i = len(self.scope_stack) - 1 230 | while i >= 0: 231 | scope = self.scope_stack[i] 232 | if item in scope: 233 | return True 234 | else: 235 | i -= 1 236 | 237 | return False 238 | 239 | def __getitem__(self, item): 240 | """ 241 | Returns an item in all scopes if there is one. Note that 242 | for this function if the name is not defined for all 243 | scopes we need to assert False, and the caller should 244 | avoid that 245 | 246 | :param item: Tuple(type, name) 247 | :return: Any object 248 | """ 249 | i = len(self.scope_stack) - 1 250 | while i >= 0: 251 | scope = self.scope_stack[i] 252 | if item in scope: 253 | return scope[item] 254 | else: 255 | i -= 1 256 | 257 | assert False 258 | 259 | def __setitem__(self, key, value): 260 | """ 261 | This function sets the name in the topmost 262 | scope because that is how scope works 263 | 264 | :param key: Tuple(type, name) 265 | :param value: Any object 266 | :return: None 267 | """ 268 | assert(len(self.scope_stack) != 0) 269 | # Use index = -1 to address the topmost scope 270 | self.scope_stack[-1][key] = value 271 | return 272 | 273 | ##################################################################### 274 | # Unit test cases 275 | ##################################################################### 276 | 277 | class ScopeTestCase(DebugRunTestCaseBase): 278 | """ 279 | Unit tests for symbol table 280 | """ 281 | def __init__(self): 282 | """ 283 | This function calls the base class constructor 284 | """ 285 | DebugRunTestCaseBase.__init__(self) 286 | 287 | # This is required for running the test case 288 | argv = Argv() 289 | # This calls the base class method and hence runs the test case 290 | self.run_tests(argv) 291 | 292 | return 293 | 294 | @staticmethod 295 | @TestNode() 296 | def test_basic(argv, **kwargs): 297 | """ 298 | This function tests whether basic symbol table works 299 | 300 | :param argv: Unused argv 301 | :param kwargs: Keyword arguments 302 | :return: None 303 | """ 304 | del argv 305 | del kwargs 306 | 307 | # Build a symbol table, and the type must be global 308 | st = SymbolTable() 309 | assert(st.get_current_scope_type() == Scope.SCOPE_TYPE_GLOBAL) 310 | assert(st.get_depth() == 1) 311 | 312 | st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] = 123 313 | st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] = 456 314 | 315 | # Get the value on the same level 316 | assert(st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] == 123) 317 | assert(st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] == 456) 318 | 319 | st.enter_scope(Scope.SCOPE_TYPE_FUNCTION) 320 | st[(Scope.TABLE_TYPE_STRUCT, "Functional Struct")] = 789 321 | 322 | assert (st.get_current_scope_type() == Scope.SCOPE_TYPE_FUNCTION) 323 | assert(st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] == 123) 324 | assert(st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] == 456) 325 | assert(st[(Scope.TABLE_TYPE_STRUCT, "Functional Struct")] == 789) 326 | 327 | st.enter_scope(Scope.SCOPE_TYPE_LOCAL) 328 | 329 | assert(st.get_current_scope_type() == Scope.SCOPE_TYPE_LOCAL) 330 | assert(st[(Scope.TABLE_TYPE_IDENT, "Global Ident")] == 123) 331 | assert(st[(Scope.TABLE_TYPE_STRUCT, "Global Struct")] == 456) 332 | assert(st[(Scope.TABLE_TYPE_STRUCT, "Functional Struct")] == 789) 333 | assert((Scope.TABLE_TYPE_IDENT, "Global Ident") in st) 334 | assert((Scope.TABLE_TYPE_IDENT, "Global Ident 2") not in st) 335 | assert(st.get((Scope.TABLE_TYPE_IDENT, "Global Ident"), None) == 123) 336 | assert (st.get((Scope.TABLE_TYPE_IDENT, "Global Ident 2"), None) is None) 337 | 338 | st.leave_scope() 339 | st.leave_scope() 340 | st.leave_scope() 341 | assert(st.get_depth() == 0) 342 | 343 | try: 344 | caught = False 345 | st.leave_scope() 346 | except AssertionError: 347 | caught = True 348 | 349 | assert(caught is True) 350 | 351 | return 352 | 353 | # Finally, run the test case if this file is invoked 354 | if __name__ == "__main__": 355 | ScopeTestCase() 356 | 357 | 358 | 359 | 360 | -------------------------------------------------------------------------------- /src/python/token_list.txt: -------------------------------------------------------------------------------- 1 | TokenType = T_INT ; Other token type 2 | TokenType = T_IDENT ; Identifier = main 3 | TokenType = T_LPAREN ; Other token type 4 | TokenType = T_INT ; Other token type 5 | TokenType = T_IDENT ; Identifier = argv 6 | TokenType = T_COMMA ; Other token type 7 | TokenType = T_CHAR ; Other token type 8 | TokenType = T_STAR ; Other token type 9 | TokenType = T_STAR ; Other token type 10 | TokenType = T_IDENT ; Identifier = argv 11 | TokenType = T_COMMA ; Other token type 12 | TokenType = T_ELLIPSIS ; Other token type 13 | TokenType = T_RPAREN ; Other token type 14 | TokenType = T_LCPAREN ; Other token type 15 | TokenType = T_INT ; Other token type 16 | TokenType = T_LPAREN ; Other token type 17 | TokenType = T_STAR ; Other token type 18 | TokenType = T_IDENT ; Identifier = q 19 | TokenType = T_RPAREN ; Other token type 20 | TokenType = T_LPAREN ; Other token type 21 | TokenType = T_INT ; Other token type 22 | TokenType = T_COMMA ; Other token type 23 | TokenType = T_CHAR ; Other token type 24 | TokenType = T_STAR ; Other token type 25 | TokenType = T_RPAREN ; Other token type 26 | TokenType = T_COMMA ; Other token type 27 | TokenType = T_STAR ; Other token type 28 | TokenType = T_IDENT ; Identifier = p 29 | TokenType = T_ASSIGN ; Other token type 30 | TokenType = T_INT_CONST ; Int const = 5 31 | TokenType = T_SEMICOLON ; Other token type 32 | TokenType = T_STRUCT ; Other token type 33 | TokenType = T_LCPAREN ; Other token type 34 | TokenType = T_INT ; Other token type 35 | TokenType = T_IDENT ; Identifier = a 36 | TokenType = T_SEMICOLON ; Other token type 37 | TokenType = T_LONG ; Other token type 38 | TokenType = T_IDENT ; Identifier = b 39 | TokenType = T_SEMICOLON ; Other token type 40 | TokenType = T_SHORT ; Other token type 41 | TokenType = T_IDENT ; Identifier = c 42 | TokenType = T_SEMICOLON ; Other token type 43 | TokenType = T_RCPAREN ; Other token type 44 | TokenType = T_IDENT ; Identifier = stat 45 | TokenType = T_SEMICOLON ; Other token type 46 | TokenType = T_IDENT ; Identifier = p 47 | TokenType = T_ASSIGN ; Other token type 48 | TokenType = T_IDENT ; Identifier = a 49 | TokenType = T_PLUS ; Other token type 50 | TokenType = T_INT_CONST ; Int const = 2 51 | TokenType = T_GREATER ; Other token type 52 | TokenType = T_STAR ; Other token type 53 | TokenType = T_IDENT ; Identifier = b 54 | TokenType = T_LSPAREN ; Other token type 55 | TokenType = T_INT_CONST ; Int const = 1 56 | TokenType = T_RSPAREN ; Other token type 57 | TokenType = T_QMARK ; Other token type 58 | TokenType = T_IDENT ; Identifier = c 59 | TokenType = T_COLON ; Other token type 60 | TokenType = T_INT_CONST ; Int const = 2080374784 61 | TokenType = T_SEMICOLON ; Other token type 62 | TokenType = T_IF ; Other token type 63 | TokenType = T_LPAREN ; Other token type 64 | TokenType = T_IDENT ; Identifier = i 65 | TokenType = T_LESS ; Other token type 66 | TokenType = T_INT_CONST ; Int const = 10 67 | TokenType = T_RPAREN ; Other token type 68 | TokenType = T_IDENT ; Identifier = i 69 | TokenType = T_ASSIGN ; Other token type 70 | TokenType = T_INT_CONST ; Int const = 100 71 | TokenType = T_SEMICOLON ; Other token type 72 | TokenType = T_ELSE ; Other token type 73 | TokenType = T_IDENT ; Identifier = i 74 | TokenType = T_STAR_ASSIGN ; Other token type 75 | TokenType = T_INT_CONST ; Int const = 2 76 | TokenType = T_SEMICOLON ; Other token type 77 | TokenType = T_FOR ; Other token type 78 | TokenType = T_LPAREN ; Other token type 79 | TokenType = T_SEMICOLON ; Other token type 80 | TokenType = T_IDENT ; Identifier = i 81 | TokenType = T_LESS ; Other token type 82 | TokenType = T_INT_CONST ; Int const = 20 83 | TokenType = T_SEMICOLON ; Other token type 84 | TokenType = T_RPAREN ; Other token type 85 | TokenType = T_LCPAREN ; Other token type 86 | TokenType = T_IDENT ; Identifier = i 87 | TokenType = T_PLUS ; Other token type 88 | TokenType = T_INT_CONST ; Int const = 2 89 | TokenType = T_SEMICOLON ; Other token type 90 | TokenType = T_RCPAREN ; Other token type 91 | TokenType = T_STAR ; Other token type 92 | TokenType = T_IDENT ; Identifier = a 93 | TokenType = T_ASSIGN ; Other token type 94 | TokenType = T_STAR ; Other token type 95 | TokenType = T_IDENT ; Identifier = b 96 | TokenType = T_ASSIGN ; Other token type 97 | TokenType = T_BITNOT ; Other token type 98 | TokenType = T_IDENT ; Identifier = c 99 | TokenType = T_SEMICOLON ; Other token type 100 | TokenType = T_IDENT ; Identifier = printf 101 | TokenType = T_LPAREN ; Other token type 102 | TokenType = T_STRING_CONST ; String const = "Hello, world" 103 | TokenType = T_RPAREN ; Other token type 104 | TokenType = T_SEMICOLON ; Other token type 105 | TokenType = T_IDENT ; Identifier = ret 106 | TokenType = T_COLON ; Other token type 107 | TokenType = T_RETURN ; Other token type 108 | TokenType = T_MINUS ; Other token type 109 | TokenType = T_INT_CONST ; Int const = 1 110 | TokenType = T_STAR ; Other token type 111 | TokenType = T_STAR ; Other token type 112 | TokenType = T_IDENT ; Identifier = p 113 | TokenType = T_SEMICOLON ; Other token type 114 | TokenType = T_RCPAREN ; Other token type 115 | TokenType = T_EOF ; EOF 116 | -------------------------------------------------------------------------------- /src/python/type.py: -------------------------------------------------------------------------------- 1 | # 2 | # type.py - This function defines types and the type system 3 | # 4 | 5 | from symbol_table import Scope, SymbolTable 6 | 7 | ##################################################################### 8 | # class BaseType and its sub-classes 9 | ##################################################################### 10 | 11 | class BaseType: 12 | """ 13 | This class serves as the base class for all types 14 | that could be used as a base type 15 | 16 | Note that typedef names are not considered as a separate 17 | type because typedef'ed names may be compatible with 18 | other types, so we always expand typedef'ed types 19 | """ 20 | 21 | # These are constant values we use to check whether a flag is set or not 22 | TYPE_SPEC_NONE = 0x00000000 23 | TYPE_SPEC_CONST = 0x00000001 24 | TYPE_SPEC_VOLATILE = 0x00000002 25 | TYPE_SPEC_STATIC = 0x00000004 26 | TYPE_SPEC_REGISTER = 0x00000008 27 | TYPE_SPEC_EXTERN = 0x00000010 28 | TYPE_SPEC_UNSIGNED = 0x00000020 29 | TYPE_SPEC_AUTO = 0x00000040 30 | TYPE_SPEC_SIGNED = 0x00000080 31 | 32 | # This is a dict that maps the token type to spec value 33 | TYPE_SPEC_DICT = { 34 | # Type qualifier 35 | "T_CONST": TYPE_SPEC_CONST, 36 | "T_VOLATILE": TYPE_SPEC_VOLATILE, 37 | # Storage class specifier 38 | "T_STATIC": TYPE_SPEC_STATIC, 39 | "T_REGISTER": TYPE_SPEC_REGISTER, 40 | "T_EXTERN": TYPE_SPEC_EXTERN, 41 | "T_UNSIGNED": TYPE_SPEC_UNSIGNED, 42 | "T_AUTO": TYPE_SPEC_AUTO, 43 | "T_SIGNED": TYPE_SPEC_SIGNED, 44 | } 45 | 46 | # This defines the length of integers 47 | # Note that the length of long is 8 rather than 4 48 | TYPE_LENGTH_CHAR = 1 49 | TYPE_LENGTH_SHORT = 2 50 | TYPE_LENGTH_INT = 4 51 | TYPE_LENGTH_LONG = 8 52 | 53 | def __init__(self): 54 | """ 55 | Initialize the base type attributes which are shared between 56 | all base types 57 | """ 58 | self.type_spec = BaseType.TYPE_SPEC_NONE 59 | return 60 | 61 | def add_spec_list(self, spec_list): 62 | """ 63 | Add specs from a spec list 64 | 65 | :param spec_list: T_DECL_SPEC OR T_SPEC_QUAL_LIST 66 | :return: None 67 | """ 68 | assert(spec_list.symbol == "T_DECL_SPEC" or 69 | spec_list.symbol == "T_SPEC_QUAL_LIST") 70 | 71 | # For each node in the spec list, add the specifier 72 | for node in spec_list.child_list: 73 | self.add_spec(node.symbol) 74 | 75 | return 76 | 77 | def add_spec(self, spec_name): 78 | """ 79 | Add a specifier to the base type. 80 | 81 | (1) If the spec is not defined then we ignore it 82 | because there might be other information 83 | (2) If the spec has already been defined then an exception 84 | is thrown because the input program is wrong 85 | 86 | :param spec_name: The string object that contains the 87 | specification 88 | :return: None 89 | """ 90 | # It must be found, otherwise it is implementation error 91 | # Mask could be None because there might be other 92 | # information such as the base type 93 | mask = BaseType.TYPE_SPEC_DICT.get(spec_name, None) 94 | 95 | # If already defined then throw error 96 | if (self.type_spec & mask) != 0x0: 97 | raise TypeError("Duplicated type specifier or qualifier: %s" % 98 | (spec_name, )) 99 | 100 | # Otherwise just OR it to the specifier bit mask 101 | self.type_spec |= mask 102 | 103 | return 104 | 105 | ##################################################################### 106 | # class IntType, VoidType, StructType, UnionType, BitFieldType 107 | ##################################################################### 108 | 109 | class IntType(BaseType): 110 | """ 111 | This class represents arbitrary precision integer types 112 | It carries the byte length of the integer 113 | """ 114 | def __init__(self, byte_length): 115 | """ 116 | Initialize the byte length of the integer 117 | """ 118 | BaseType.__init__(self) 119 | self.byte_length = byte_length 120 | return 121 | 122 | class VoidType(BaseType): 123 | """ 124 | This class represents void type which carries no 125 | type related information 126 | """ 127 | def __init__(self): 128 | """ 129 | Initialize the void type 130 | """ 131 | BaseType.__init__(self) 132 | return 133 | 134 | class StructType(BaseType): 135 | """ 136 | This class represents the struct type. Note that struct types 137 | are not compatible with any other types, and therefore the 138 | name of the struct suffices as the identifier of the underlying 139 | struct type. Same is true for unions 140 | """ 141 | def __init__(self, name): 142 | """ 143 | Initialize the struct name 144 | 145 | :param name: The name of the struct 146 | """ 147 | BaseType.__init__(self) 148 | self.name = name 149 | return 150 | 151 | class UnionType(BaseType): 152 | """ 153 | This class represents the union type. Union type has the same 154 | property of struct type, so it only requires a name 155 | """ 156 | def __init__(self, name): 157 | """ 158 | Initialize the struct name 159 | 160 | :param name: The name of the struct 161 | """ 162 | BaseType.__init__(self) 163 | self.name = name 164 | return 165 | 166 | class BitFieldType(BaseType): 167 | """ 168 | Bitfield type that has a bit length. The declaration of this 169 | type is not included, and should be checked when building 170 | the bit field type (e.g. whether the bit length exceeds 171 | the declared base length). Sign bit of the bit field is not 172 | defined and should not be relied on 173 | 174 | Bit field type could not constitute pointers or arrays 175 | """ 176 | def __init__(self, bit_length): 177 | """ 178 | Initialize the bitfield with a bit length 179 | 180 | :param bit_length: The bit length of the type 181 | """ 182 | BaseType.__init__(self) 183 | self.bit_length = bit_length 184 | return 185 | 186 | ##################################################################### 187 | # The following are derivation operations 188 | ##################################################################### 189 | 190 | class PtrType(BaseType): 191 | """ 192 | This class represents pointer type. Pointer types need specifiers 193 | as the type specifier for the current level 194 | """ 195 | def __init__(self): 196 | """ 197 | Initialize an empty object denoting the operation 198 | """ 199 | BaseType.__init__() 200 | return 201 | 202 | class ArrayType(BaseType): 203 | """ 204 | This class represents an array type. An array type has associated 205 | data as the static size of the array (which requires static 206 | evaluation of expressions). If the array size is not known 207 | then we ignore it 208 | """ 209 | def __init__(self, array_size): 210 | """ 211 | Initialize the array type using array size 212 | 213 | :param array_size: The size of the array; Could be None 214 | if size is not known 215 | """ 216 | BaseType.__init__(self) 217 | self.array_size = array_size 218 | return 219 | 220 | class FuncType(BaseType): 221 | """ 222 | This class represents a function pointer type. The data it carries 223 | is function parameter type list, which is a list of types, optionally 224 | with name bindings if names are specified for functions 225 | """ 226 | def __init__(self, param_type_list, is_vararg): 227 | """ 228 | Initialize the function type using a parameter list 229 | 230 | :param param_type_list: A list of parameter types 231 | :param is_vararg: Whether the function is vararg 232 | """ 233 | BaseType.__init__(self) 234 | self.param_type_list = param_type_list 235 | self.is_vararg = is_vararg 236 | return 237 | 238 | ##################################################################### 239 | # class TypeNode 240 | ##################################################################### 241 | 242 | class TypeNode: 243 | """ 244 | This class represents base type and type derivation rules 245 | """ 246 | 247 | # This maps base type names to their class instance 248 | # This does not include bit field type 249 | # These are only types that we could determine 250 | BASE_TYPE_DICT = { 251 | "T_INT": IntType(BaseType.TYPE_LENGTH_INT), 252 | "T_CHAR": IntType(BaseType.TYPE_LENGTH_CHAR), 253 | "T_LONG": IntType(BaseType.TYPE_LENGTH_LONG), 254 | "T_SHORT": IntType(BaseType.TYPE_LENGTH_SHORT), 255 | "T_VOID": VoidType(), 256 | } 257 | 258 | def __init__(self): 259 | """ 260 | Initialize the type node with a base type 261 | """ 262 | # Type derivation rule. We store the operation 263 | # with the highest precedence before operations 264 | # with lower precedence 265 | # Note that the last element must be the base type 266 | self.rule_list = [] 267 | 268 | # We use this as a lazy way of applying operations on 269 | # a type. All interpretation of the type should start 270 | # with elements using this index 271 | self.index = 0 272 | 273 | return 274 | 275 | def __len__(self): 276 | """ 277 | Returns the length of the actual array (i.e. starting at 278 | index = 0). The array must have length greater than zero 279 | 280 | :return: int 281 | """ 282 | assert(len(self.rule_list) > 0) 283 | assert(0 <= index < len(self.rule_list)) 284 | 285 | return len(self.rule_list) 286 | 287 | def expand_typedef_name(self, symbol_table, typedef_name): 288 | """ 289 | This function expands typedef name into the current type 290 | node to yield a new type 291 | 292 | If the typedef'ed name does not exist assertion fails, 293 | because the parser guarantees that typedef'ed names are 294 | recognized only if they are defined 295 | 296 | :param symbol_table: The symbol table 297 | :param typedef_name: The type name that is typedef'ed 298 | :return: None 299 | """ 300 | t = symbol_table.get((Scope.TYPEDEF, typedef_name), None) 301 | assert(t is not None) 302 | assert(isinstance(t, TypeNode)) 303 | # Append the rule list of the typedef'ed name 304 | # to the current type node. Note that this is only 305 | # a shallow copy of the array 306 | self.rule_list += t.rule_list[index:] 307 | 308 | return 309 | 310 | def add_derivation(self, spec_body_node): 311 | """ 312 | This function processes a given derivation body 313 | and adds them to the rule list, from the highest 314 | precedence to the lowest precedence 315 | 316 | :param spec_body_node: T_DECL_BODY or T_ABS_DECL_BODY 317 | :return: None 318 | """ 319 | assert(spec_body_node.symbol == "T_DECL_BODY" or 320 | spec_body_node.symbol == "T_ABS_DECL_BODY") 321 | 322 | i = 0 323 | while i < len(spec_body_node): 324 | child = spec_body_node[i] 325 | child_name = child.symbol 326 | if child_name == "T_PTR": 327 | # There might be multiple levels of pointers 328 | # We add specifier for each level 329 | for ptr in child.child_list: 330 | ptr_type = PtrType() 331 | # Then it must be a specifier list 332 | if ptr.symbol != "T_": 333 | ptr_type.add_spec_list(ptr) 334 | 335 | self.rule_list.append(ptr_type) 336 | 337 | # It only takes one slot 338 | i += 1 339 | elif child_name == "T_IDENT": 340 | # It also only takes one slot 341 | i += 1 342 | elif child_name == "T_ARRAY_SUB": 343 | sub = spec_body_node[i + 1] 344 | 345 | if sub.symbol != "T_": 346 | raise NotImplementedError("Static evaluation of array sizes") 347 | else: 348 | array_type = ArrayType() 349 | 350 | self.rule_list.append(array_type) 351 | # It takes two slots 352 | i += 2 353 | elif child_name == "T_FUNC_CALL": 354 | sub = spec_body_node[i + 1] 355 | if sub.symbol != "T_": 356 | if sub.symbol == "T_IDENT_LIST": 357 | raise TypeError("Old-style function declaration" + 358 | " no longer supported") 359 | raise NotImplementedError("Type for function arguments") 360 | else: 361 | # Empty list for arguments 362 | func_type = FuncType([], []) 363 | else: 364 | # Do not know what is the type 365 | assert False 366 | 367 | return 368 | 369 | @staticmethod 370 | def report_type_conflict(t1, t2): 371 | """ 372 | Report a type conflict because we see two different 373 | types being specified in declaration 374 | 375 | This function throws an exception and it never returns 376 | 377 | :param t1: The first type 378 | :param t2: The second type 379 | :return: None 380 | """ 381 | raise TypeError("Conflicting types: %s %s" % 382 | (t1.symbol, t2.symbol)) 383 | 384 | def add_base_type_node(self, symbol_table, spec_node): 385 | """ 386 | Return a base type TypeNode with the syntax node that 387 | specifies the base type and specifiers 388 | 389 | :param spec_node: The T_SPEC_QUAL_LIST or T_DECL_SPEC 390 | :return: One of the BaseType node 391 | """ 392 | assert (spec_node.symbol == "T_SPEC_QUAL_LIST" or 393 | spec_node.symbol == "T_DECL_SPEC") 394 | 395 | # This points to the base type node 396 | base_type_node = None 397 | # Whether we have seen short or long 398 | ignore_int = False 399 | 400 | for node in spec_node.child_list: 401 | name = node.symbol 402 | if name == "T_INT": 403 | # If already seen short or long then 404 | # skip this because it is implied 405 | if ignore_int is True: 406 | continue 407 | 408 | if base_type_node is not None: 409 | self.report_type_conflict(node, base_type_node) 410 | base_type_node = node 411 | elif name == "T_CHAR" or \ 412 | name == "T_VOID" or \ 413 | name == "T_STRUCT" or \ 414 | name == "T_UNION" or \ 415 | name == "T_TYPEDEF_NAME": 416 | if base_type_node is not None: 417 | self.report_type_conflict(node, base_type_node) 418 | base_type_node = node 419 | elif name == "T_LONG" or name == "T_SHORT": 420 | ignore_int = True 421 | # Also if we have seen INT just ignore it 422 | # and update the base type to short or long 423 | if base_type_node is not None and \ 424 | base_type_node.symbol != "T_INT" 425 | self.report_type_conflict(node, base_type_node) 426 | base_type_node = node 427 | 428 | type_name = base_type_node.symbol 429 | type_obj = self.BASE_TYPE_DICT.get(type_name, None) 430 | if type_obj is None: 431 | if type_name == "T_TYPEDEF_NAME": 432 | # Expand the typedef name into the type 433 | self.expand_typedef_name(symbol_table, 434 | base_type_node.data) 435 | 436 | -------------------------------------------------------------------------------- /src/stack.c: -------------------------------------------------------------------------------- 1 | 2 | #include "stack.h" 3 | #include "error.h" 4 | 5 | stack_t *stack_init() { 6 | stack_t *stack = (stack_t *)malloc(sizeof(stack_t)); 7 | SYSEXPECT(stack != NULL); 8 | stack->data = (void **)malloc(sizeof(void *) * STACK_INIT_CAPACITY); 9 | SYSEXPECT(stack->data != NULL); 10 | stack->size = 0; 11 | stack->capacity = STACK_INIT_CAPACITY; 12 | 13 | return stack; 14 | } 15 | 16 | void stack_free(stack_t *stack) { 17 | free(stack->data); 18 | free(stack); 19 | return; 20 | } 21 | 22 | void stack_push(stack_t *stack, void *p) { 23 | if(stack->size == stack->capacity) { 24 | void **old = stack->data; 25 | stack->data = malloc(sizeof(void *) * stack->capacity * 2); 26 | SYSEXPECT(stack->data != NULL); 27 | memcpy(stack->data, old, sizeof(void *) * stack->capacity); 28 | stack->capacity *= 2; 29 | free(old); 30 | } 31 | assert(stack->size < stack->capacity); 32 | stack->data[stack->size++] = p; 33 | return; 34 | } 35 | 36 | void *stack_pop(stack_t *stack) { 37 | assert(stack->size != 0); 38 | return stack->data[--stack->size]; 39 | } 40 | 41 | void *stack_peek(stack_t *stack) { 42 | assert(stack->size != 0); 43 | return stack->data[stack->size - 1]; 44 | } 45 | 46 | // Offset is from the top of the stack towards the bottom 47 | void *stack_peek_at(stack_t *stack, int offset) { 48 | assert(offset >= 0 && offset < stack->size); 49 | return stack->data[stack->size - 1 - offset]; 50 | } 51 | 52 | void *stack_at(stack_t *stack, int index) { 53 | assert(index >= 0 && index < stack->size); 54 | return stack->data[index]; 55 | } 56 | 57 | void **stack_topaddr(stack_t *stack) { 58 | return stack->data + stack->size; 59 | } 60 | 61 | int stack_empty(stack_t *stack) { return stack->size == 0; } 62 | int stack_size(stack_t *stack) { return stack->size; } 63 | -------------------------------------------------------------------------------- /src/stack.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _STACK_H 3 | #define _STACK_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define STACK_INIT_CAPACITY 128 11 | 12 | // Implements a general stack which is used in the shift-reduce parsing algo. 13 | typedef struct { 14 | int size; 15 | int capacity; 16 | void **data; 17 | } stack_t; 18 | 19 | stack_t *stack_init(); 20 | void stack_free(stack_t *stack); 21 | void stack_push(stack_t *stack, void *p); 22 | void *stack_pop(stack_t *stack); 23 | void *stack_peek(stack_t *stack); 24 | void *stack_peek_at(stack_t *stack, int offset); 25 | void *stack_at(stack_t *stack, int index); 26 | int stack_empty(stack_t *stack); 27 | int stack_size(stack_t *stack); 28 | void **stack_topaddr(stack_t *stack); 29 | 30 | #endif -------------------------------------------------------------------------------- /src/str.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "str.h" 6 | #include "error.h" 7 | 8 | str_t *str_init() { 9 | str_t *str = (str_t *)malloc(sizeof(str_t)); 10 | SYSEXPECT(str != NULL); 11 | str->s = (char *)malloc(STR_INIT_SIZE + 1); 12 | SYSEXPECT(str->s != NULL); 13 | str->capacity = STR_INIT_SIZE; 14 | str->size = 0; 15 | str->s[0] = '\0'; 16 | return str; 17 | } 18 | void str_free(str_t *str) { free(str->s); free(str); } 19 | void str_clear(str_t *str) { str->s[0] = '\0'; str->size = 0; } 20 | int str_size(str_t *str) { return str->size; } 21 | 22 | // Realloc the buffer to hold at least size + 1 bytes 23 | void str_extend(str_t *str, int size) { 24 | if(size > str->capacity) { 25 | str->s = realloc(str->s, size + 1); 26 | SYSEXPECT(str->s != NULL); 27 | str->capacity = size; 28 | } 29 | } 30 | 31 | void str_append(str_t *str, char ch) { 32 | if(str->size == str->capacity) str_extend(str, str->capacity * 2); 33 | assert(str->size < str->capacity); 34 | str->s[str->size++] = ch; 35 | str->s[str->size] = '\0'; 36 | } 37 | 38 | void str_prepend(str_t *str, char ch) { 39 | if(str->size == str->capacity) str_extend(str, str->capacity * 2); 40 | assert(str->size < str->capacity); 41 | memmove(str->s + 1, str->s, str->size + 1); // Including the trailing zero 42 | str->s[0] = ch; 43 | str->size++; 44 | } 45 | 46 | void str_prepend_str(str_t *str, const char *s) { 47 | int copylen = strlen(s); 48 | if(str->size + copylen >= str->capacity) str_extend(str, str->size + copylen); 49 | assert(str->size + copylen <= str->capacity); 50 | memmove(str->s + copylen, str->s, str->size + 1); // Including the trailing zero 51 | memcpy(str->s, s, copylen); // Do not include the trailing zero 52 | str->size += copylen; 53 | } 54 | 55 | void str_concat(str_t *str, const char *s) { 56 | int copylen = strlen(s); 57 | if(str->size + copylen >= str->capacity) str_extend(str, str->size + copylen); 58 | assert(str->size + copylen <= str->capacity); 59 | memmove(str->s + str->size, s, copylen + 1); // Includes the '\0' 60 | str->size += copylen; 61 | } 62 | 63 | void str_print_int(str_t *str, int d) { 64 | char temp[MAX_INT_DIGITS]; 65 | sprintf(temp, "%d", d); 66 | str_concat(str, temp); 67 | } 68 | 69 | char *str_copy(const str_t *str) { // Returns a string allocated from heap. The str is not changed 70 | char *s = (char *)malloc(str->size + 1); 71 | SYSEXPECT(s != NULL); 72 | memcpy(s, str->s, str->size + 1); 73 | return s; 74 | } 75 | 76 | vector_t *vector_init() { 77 | vector_t *vector = (vector_t *)malloc(sizeof(vector_t)); 78 | SYSEXPECT(vector != NULL); 79 | vector->data = (void **)malloc(VECTOR_INIT_SIZE * sizeof(void *)); 80 | SYSEXPECT(vector->data != NULL); 81 | vector->size = 0; 82 | vector->capacity = VECTOR_INIT_SIZE; 83 | return vector; 84 | } 85 | void vector_free(vector_t *vector) { free(vector->data); free(vector); } 86 | int vector_size(vector_t *vector) { return vector->size; } 87 | 88 | void vector_extend(vector_t *vector, int size) { 89 | if(size > vector->capacity) { 90 | vector->capacity = size; 91 | vector->data = realloc(vector->data, size * sizeof(void *)); 92 | SYSEXPECT(vector->data != NULL); 93 | } 94 | return; 95 | } 96 | 97 | void vector_append(vector_t *vector, void *value) { 98 | if(vector->size == vector->capacity) vector_extend(vector, vector->size * 2); 99 | assert(vector->size < vector->capacity); 100 | vector->data[vector->size++] = value; 101 | return; 102 | } 103 | 104 | void *vector_at(vector_t *vector, int index) { 105 | assert(index < vector->size && index >= 0); 106 | return vector->data[index]; 107 | } 108 | 109 | void **vector_addrat(vector_t *vector, int index) { 110 | assert(index < vector->capacity && index >= 0); // Since we only take address, use capacity here 111 | return vector->data + index; 112 | } -------------------------------------------------------------------------------- /src/str.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _STR_H 3 | #define _STR_H 4 | 5 | #define STR_INIT_SIZE 32 // Excluding the terminating 0 6 | #define VECTOR_INIT_SIZE 32 7 | #define MAX_INT_DIGITS 64 // Can't be that long... 8 | 9 | typedef struct { 10 | int size; 11 | int capacity; // Both excluding the terminating 0 12 | char *s; 13 | } str_t; 14 | 15 | str_t *str_init(); 16 | void str_free(str_t *str); 17 | void str_clear(str_t *str); // This does not free memory 18 | int str_size(str_t *str); 19 | void str_extend(str_t *str, int size); 20 | void str_append(str_t *str, char ch); 21 | void str_prepend(str_t *str, char ch); 22 | void str_concat(str_t *str, const char *s); 23 | void str_prepend_str(str_t *str, const char *s); 24 | void str_print_int(str_t *str, int d); // Append an integer at the end of the str 25 | char *str_copy(const str_t *str); 26 | static inline char *str_cstr(str_t *s) { return s->s; } 27 | 28 | typedef struct { 29 | int size, capacity; 30 | void **data; 31 | } vector_t; 32 | 33 | vector_t *vector_init(); 34 | void vector_free(vector_t *vector); 35 | int vector_size(vector_t *vector); 36 | void vector_extend(vector_t *vector, int size); 37 | void vector_append(vector_t *vector, void *value); 38 | void *vector_at(vector_t *vector, int index); 39 | void **vector_addrat(vector_t *vector, int index); 40 | 41 | #endif -------------------------------------------------------------------------------- /src/tests/test_cgen.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "stack.h" 5 | #include "token.h" 6 | #include "error.h" 7 | #include "ast.h" 8 | #include "parse.h" 9 | #include "hashtable.h" 10 | #include "bintree.h" 11 | #include "list.h" 12 | #include "str.h" 13 | #include "type.h" 14 | #include "eval.h" 15 | #include "cgen.h" 16 | 17 | typedef struct { 18 | cgen_cxt_t *cgen_cxt; // This contains type cxt 19 | type_cxt_t *type_cxt; 20 | parse_exp_cxt_t *parse_cxt; // This contains token cxt 21 | token_cxt_t *token_cxt; 22 | } test_cxt_t; 23 | 24 | test_cxt_t *test_init(char *s) { 25 | test_cxt_t *cxt = (test_cxt_t *)malloc(sizeof(test_cxt_t)); 26 | SYSEXPECT(cxt != NULL); 27 | memset(cxt, 0x00, sizeof(test_cxt_t)); 28 | cxt->cgen_cxt = cgen_init(); 29 | cxt->type_cxt = cxt->cgen_cxt->type_cxt; 30 | cxt->parse_cxt = parse_exp_init(s); 31 | cxt->token_cxt = cxt->parse_cxt->token_cxt; 32 | return cxt; 33 | } 34 | 35 | void test_free(test_cxt_t *cxt) { 36 | cgen_free(cxt->cgen_cxt); 37 | parse_exp_free(cxt->parse_cxt); 38 | free(cxt); 39 | return; 40 | } 41 | 42 | void test_cgen_global_decl() { 43 | printf("=== Test cgen_global_decl ===\n"); 44 | 45 | test_cxt_t *cxt; 46 | token_t *token; 47 | 48 | // Test basis import export 49 | cxt = test_init("extern const int array[120 + 20]; int array2[] = {1, 2, 3, 4, 5}; "); 50 | token = parse(cxt->parse_cxt); 51 | cgen(cxt->cgen_cxt, token); 52 | ast_print(token); 53 | cgen_print_cxt(cxt->cgen_cxt); 54 | ast_free(token); 55 | test_free(cxt); 56 | printf("=====================================\n"); 57 | // Test array size + def after decl 58 | cxt = test_init("extern int array[2 + 3]; int array[] = {1, 2, 3, 4, }; "); 59 | token = parse(cxt->parse_cxt); 60 | cgen(cxt->cgen_cxt, token); 61 | ast_print(token); 62 | cgen_print_cxt(cxt->cgen_cxt); 63 | ast_free(token); 64 | test_free(cxt); 65 | printf("=====================================\n"); 66 | // Test decl after decl + decl after def 67 | cxt = test_init("extern int array[2 + 3]; extern int array[]; \n" 68 | "extern int array2[]; extern int array2[]; \n" 69 | "extern int array3[]; extern int array3[3 + 4]; extern int array3[]; \n" 70 | "int array4[4 << 1]; extern int array4[8]; \n" 71 | "int array5[] = {1, 2, 3}; extern int array5[]; \n" 72 | "char array6[] = \"abcdefg\\n\"; extern char array6[]; \n" // size should be 9 73 | "extern const char array7[]; const char array7[10] = \"12345\"; \n"); // size should be 10 74 | token = parse(cxt->parse_cxt); 75 | cgen(cxt->cgen_cxt, token); 76 | ast_print(token); 77 | cgen_print_cxt(cxt->cgen_cxt); 78 | ast_free(token); 79 | test_free(cxt); 80 | printf("=====================================\n"); 81 | 82 | printf("Pass!\n"); 83 | return; 84 | } 85 | 86 | void test_cgen_init() { 87 | printf("=== Test cgen_init_ series ===\n"); 88 | 89 | test_cxt_t *cxt; 90 | token_t *token; 91 | 92 | // Test array with const char * 93 | cxt = test_init("int x = 1, y = 2, z = 100; const char *a = \"123456\"; "); 94 | token = parse(cxt->parse_cxt); 95 | cgen(cxt->cgen_cxt, token); 96 | ast_print(token); 97 | cgen_print_cxt(cxt->cgen_cxt); 98 | ast_free(token); 99 | test_free(cxt); 100 | printf("=====================================\n"); 101 | // Test array initialization 102 | cxt = test_init( 103 | "int x[] = {2, 4, 6, 8, 10}; \n " 104 | "const char y[] = \"asdfghjkl\\n\"; \n" 105 | "char z[20] = \"\"; \n"); 106 | token = parse(cxt->parse_cxt); 107 | cgen(cxt->cgen_cxt, token); 108 | ast_print(token); 109 | cgen_print_cxt(cxt->cgen_cxt); 110 | ast_free(token); 111 | test_free(cxt); 112 | printf("=====================================\n"); 113 | // Test struct 114 | cxt = test_init( 115 | "struct named_struct { int a; long b; char c[10]; } var1; \n " 116 | "struct named_struct var2 = {100, 200L, \"qwert\"}; \n" 117 | ); 118 | token = parse(cxt->parse_cxt); 119 | cgen(cxt->cgen_cxt, token); 120 | ast_print(token); 121 | cgen_print_cxt(cxt->cgen_cxt); 122 | ast_free(token); 123 | test_free(cxt); 124 | printf("=====================================\n"); 125 | 126 | printf("Pass!\n"); 127 | } 128 | 129 | 130 | int main() { 131 | printf("Hello World!\n"); 132 | test_cgen_global_decl(); 133 | test_cgen_init(); 134 | return 0; 135 | } -------------------------------------------------------------------------------- /src/tests/test_eval.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "stack.h" 5 | #include "token.h" 6 | #include "error.h" 7 | #include "ast.h" 8 | #include "parse.h" 9 | #include "hashtable.h" 10 | #include "bintree.h" 11 | #include "list.h" 12 | #include "str.h" 13 | #include "type.h" 14 | #include "eval.h" 15 | 16 | void test_const_eval_int() { 17 | printf("=== Test eval_const_get_int_value ===\n"); 18 | parse_exp_cxt_t *parse_cxt; 19 | type_cxt_t *type_cxt; 20 | token_t *token; 21 | type_t *type; 22 | value_t *value; 23 | 24 | type_cxt = type_sys_init(); 25 | parse_cxt = parse_exp_init("123456"); 26 | token = token_get_next(parse_cxt->token_cxt); 27 | assert(token); 28 | value = eval_const_get_int_value(type_cxt, token); 29 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 30 | parse_exp_free(parse_cxt); 31 | type_sys_free(type_cxt); 32 | printf("=====================================\n"); 33 | type_cxt = type_sys_init(); 34 | parse_cxt = parse_exp_init("0x80000000"); // This will be signed overflow 35 | token = token_get_next(parse_cxt->token_cxt); 36 | assert(token); 37 | value = eval_const_get_int_value(type_cxt, token); 38 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 39 | parse_exp_free(parse_cxt); 40 | type_sys_free(type_cxt); 41 | printf("=====================================\n"); 42 | type_cxt = type_sys_init(); 43 | parse_cxt = parse_exp_init("0x80000000U"); // This will be unsigned, no overflow 44 | token = token_get_next(parse_cxt->token_cxt); 45 | assert(token); 46 | value = eval_const_get_int_value(type_cxt, token); 47 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 48 | parse_exp_free(parse_cxt); 49 | type_sys_free(type_cxt); 50 | printf("=====================================\n"); 51 | type_cxt = type_sys_init(); 52 | parse_cxt = parse_exp_init("0xFFFFFFFFU"); // This will be unsigned, no overflow 53 | token = token_get_next(parse_cxt->token_cxt); 54 | assert(token); 55 | value = eval_const_get_int_value(type_cxt, token); 56 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 57 | parse_exp_free(parse_cxt); 58 | type_sys_free(type_cxt); 59 | printf("=====================================\n"); 60 | type_cxt = type_sys_init(); 61 | parse_cxt = parse_exp_init("0x1FFFFFFFFU"); // This will be unsigned, overflow 62 | token = token_get_next(parse_cxt->token_cxt); 63 | assert(token); 64 | value = eval_const_get_int_value(type_cxt, token); 65 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 66 | parse_exp_free(parse_cxt); 67 | type_sys_free(type_cxt); 68 | printf("=====================================\n"); 69 | type_cxt = type_sys_init(); 70 | parse_cxt = parse_exp_init("'\xfe'"); // Although it overflows for char type, it is evaluated by another function, no warning 71 | token = token_get_next(parse_cxt->token_cxt); 72 | assert(token); 73 | value = eval_const_get_int_value(type_cxt, token); 74 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 75 | parse_exp_free(parse_cxt); 76 | type_sys_free(type_cxt); 77 | printf("=====================================\n"); 78 | 79 | printf("Pass!\n"); 80 | return; 81 | } 82 | 83 | void test_eval_const_exp() { 84 | printf("=== Test eval_const_exp ===\n"); 85 | parse_exp_cxt_t *parse_cxt; 86 | type_cxt_t *type_cxt; 87 | token_t *token; 88 | type_t *type; 89 | value_t *value; 90 | 91 | type_cxt = type_sys_init(); 92 | parse_cxt = parse_exp_init("(1000 + 2 * 3) << 4"); 93 | token = parse_exp(parse_cxt, PARSE_EXP_ALLOWALL); 94 | ast_print_(token, 0); 95 | value = eval_const_exp(type_cxt, token); 96 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 97 | assert(value->int32 == 16096); 98 | parse_exp_free(parse_cxt); 99 | type_sys_free(type_cxt); 100 | printf("=====================================\n"); 101 | type_cxt = type_sys_init(); 102 | parse_cxt = parse_exp_init("((char)1000 + 2ul * 3) << 4"); 103 | token = parse_exp(parse_cxt, PARSE_EXP_ALLOWALL); 104 | ast_print_(token, 0); 105 | value = eval_const_exp(type_cxt, token); 106 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 107 | assert(value->int64 == -288); // Because of the sign extension of char type 108 | parse_exp_free(parse_cxt); 109 | type_sys_free(type_cxt); 110 | printf("=====================================\n"); 111 | type_cxt = type_sys_init(); 112 | parse_cxt = parse_exp_init("(signed long)((long)(unsigned long *)(long)(unsigned long *)(long)100 + 2)"); 113 | token = parse_exp(parse_cxt, PARSE_EXP_ALLOWALL); 114 | ast_print_(token, 0); 115 | value = eval_const_exp(type_cxt, token); 116 | printf("Type: %s Value: 0x%016lX (%ld)\n", type_print_str(0, value->type, NULL, 0), value->uint64, value->int64); 117 | assert(value->int64 == 102); // Because of the sign extension of char type 118 | parse_exp_free(parse_cxt); 119 | type_sys_free(type_cxt); 120 | printf("=====================================\n"); 121 | 122 | printf("Pass!\n"); 123 | return; 124 | } 125 | 126 | int main() { 127 | test_const_eval_int(); 128 | test_eval_const_exp(); 129 | return 0; 130 | } -------------------------------------------------------------------------------- /src/tests/test_lex.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "stack.h" 5 | #include "token.h" 6 | #include "error.h" 7 | #include "ast.h" 8 | #include "parse.h" 9 | #include "hashtable.h" 10 | 11 | void test_get_op() { 12 | printf("=== Test token_get_op() ===\n"); 13 | char *p; 14 | char test1[] = "-----====-=-=++>>=>>>.+..+...+....+......"; 15 | char result[256]; 16 | token_t token; 17 | p = test1; 18 | result[0] = '\0'; 19 | token_cxt_t *token_cxt = token_cxt_init(test1); 20 | while(p != NULL) { 21 | p = token_get_op(p, &token); 22 | if(p == NULL) break; 23 | else if(token.type != T_ILLEGAL) { 24 | printf("%s(%s) ", token_typestr(token.type), token_symstr(token.type)); 25 | strcat(result, token_symstr(token.type)); 26 | } else { 27 | p = token_get_ident(token_cxt, p, &token); 28 | if(p == NULL) break; 29 | else if(token.type != T_ILLEGAL) { 30 | printf("%s(%s) ", token_typestr(token.type), token.str); 31 | strcat(result, token.str); 32 | free(token.str); 33 | } else { 34 | assert(0); 35 | } 36 | } 37 | } 38 | putchar('\n'); 39 | assert(strcmp(result, test1) == 0); 40 | token_cxt_free(token_cxt); 41 | 42 | printf("Pass!\n"); 43 | return; 44 | } 45 | 46 | void test_bin_search() { 47 | printf("=== Test token_get_keyword_type() ===\n"); 48 | token_type_t type; 49 | for(int i = 0;i < (int)sizeof(keywords) / (int)sizeof(const char *);i++) { 50 | type = token_get_keyword_type(keywords[i]); 51 | if(type == T_ILLEGAL) { 52 | printf("ILLEGAL %s\n", keywords[i]); 53 | assert(0); 54 | } else { 55 | printf("%s(%s) ", token_typestr(type), token_symstr(type)); 56 | assert(strcmp(token_symstr(type), keywords[i]) == 0); 57 | } 58 | } 59 | 60 | type = token_get_keyword_type("aaaa"); 61 | assert(type == T_ILLEGAL); 62 | type = token_get_keyword_type("zzzzzzz"); 63 | assert(type == T_ILLEGAL); 64 | type = token_get_keyword_type("wangziqi"); 65 | assert(type == T_ILLEGAL); 66 | type = token_get_keyword_type("jklasd"); 67 | assert(type == T_ILLEGAL); 68 | 69 | putchar('\n'); 70 | printf("Pass!\n"); 71 | return; 72 | } 73 | 74 | void test_token_get_next() { 75 | printf("=== Test test_token_get_next() ===\n"); 76 | char test[] = \ 77 | "// Hello World \n \ 78 | void main() { \n \ 79 | /* This is a block comment \n \ 80 | That cross multiple lines \n \ 81 | */ \n \ 82 | } \n \ 83 | \n"; 84 | error_init(test); 85 | token_cxt_t *token_cxt = token_cxt_init(test); 86 | token_t *token; 87 | while((token = token_get_next(token_cxt)) != NULL) { 88 | const char *sym = token_symstr(token->type); 89 | if(sym == NULL) printf("%s ", token->str); 90 | else printf("%s ", sym); 91 | token_free(token); 92 | } 93 | putchar('\n'); 94 | 95 | char test2[] = " \n \ 96 | // Returns the next token, or illegal \n \ 97 | // Same rule for return value and conditions as token_get_op() \n \ 98 | char *token_get_next(char *s, token_t *token) { \n \ 99 | while(1) { \n \ 100 | if(s == NULL || *s == '\\0') return NULL; \n \ 101 | else if(isspace(*s)) while(isspace(*s)) s++; \n \ 102 | else if(s[0] == '/' && s[1] == '/') while(*s != '\\n' && *s != '\\0') s++; \n \ 103 | else if(s[0] == '/' && s[1] == '*') { \n \ 104 | while((s[0] != '\\0') && (s[0] != '*' || s[1] != '/')) s++; \n \ 105 | s += 2; \n \ 106 | } \n \ 107 | else if(isalpha(*s) || *s == '_') return token_get_ident(s, token); \n \ 108 | else return token_get_op(s, token); \n \ 109 | } \n \ 110 | \n \ 111 | assert(0); \n \ 112 | return NULL; \n \ 113 | } \n \ 114 | \" asda dasdasd\\n \" "; 115 | error_init(test2); 116 | while((token = token_get_next(token_cxt)) != NULL) { 117 | const char *sym = token_symstr(token->type); 118 | int row, col; 119 | error_get_row_col(token->offset, &row, &col); 120 | if(sym == NULL) printf("%s ", token->str); 121 | else printf("%s(%d %d) ", sym, row, col); 122 | token_free(token); 123 | } 124 | putchar('\n'); 125 | token_cxt_free(token_cxt); 126 | 127 | printf("Pass!\n"); 128 | return; 129 | } 130 | 131 | void test_int_size() { 132 | printf("=== Test Integer Size ===\n"); 133 | char test[] = "12 23l 34ll 45llu 56lu 67u 78ul 89ull 0x123LU 056ULL"; 134 | token_cxt_t *cxt = token_cxt_init(test); 135 | token_t *token; 136 | while((token = token_get_next(cxt)) != NULL) { 137 | printf("%s %s\n", token->str, token_decl_print(token->decl_prop)); 138 | } 139 | token_cxt_free(cxt); 140 | printf("Pass!\n"); 141 | return; 142 | } 143 | 144 | int main() { 145 | printf("=== Hello World! ===\n"); 146 | test_get_op(); 147 | test_bin_search(); 148 | test_token_get_next(); 149 | test_int_size(); 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /src/todo.txt: -------------------------------------------------------------------------------- 1 | 2 | - Parse positive and negative numbers as part of the integers 3 | 4 | - Parse floating pointing numbers 5 | 6 | - Processing macros 7 | - Macro expansion at identifiers -------------------------------------------------------------------------------- /src/token.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _TOKEN_H 3 | #define _TOKEN_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "error.h" 12 | #include "stack.h" 13 | #include "hashtable.h" 14 | 15 | #define TOKEN_MAX_KWD_SIZE 31 // Keywords cannot be 32 chars long (enough for C keywords) 16 | 17 | // Types of raw tokens. 18 | // This enum type does not distinguish between different expression operators, i.e. both 19 | // unary "plus" and binary "add" is T_PLUS. Extra information such as operator property 20 | // is derived 21 | typedef enum { 22 | // Expression token types 23 | T_OP_BEGIN = 0, 24 | T_LPAREN = 0, T_RPAREN, T_LSPAREN, T_RSPAREN, // ( ) [ ] 25 | T_DOT, T_ARROW, // . -> 26 | T_INC, T_DEC, T_PLUS, T_MINUS, // ++ -- + - 27 | T_LOGICAL_NOT = 10, T_BIT_NOT, // ! ~ 28 | T_STAR, T_AND, // * & 29 | T_DIV, T_MOD, // / % 30 | T_LSHIFT, T_RSHIFT, // << >> 31 | 32 | T_LESS, T_GREATER, T_LEQ = 20, T_GEQ, T_EQ, T_NEQ, // < > <= >= == != 33 | T_BIT_XOR, T_BIT_OR, // ^ | 34 | T_LOGICAL_AND, T_LOGICAL_OR, // && || 35 | T_QMARK, T_COLON, // ? : 36 | T_ASSIGN = 30, // = 37 | T_PLUS_ASSIGN, T_MINUS_ASSIGN, T_MUL_ASSIGN, // = += -= *= 38 | T_DIV_ASSIGN, T_MOD_ASSIGN, // /= %= 39 | T_LSHIFT_ASSIGN, T_RSHIFT_ASSIGN, // <<= >>= 40 | T_AND_ASSIGN, T_OR_ASSIGN, T_XOR_ASSIGN = 40, // &= |= ^= 41 | T_COMMA, // , 42 | T_OP_END, 43 | 44 | T_LCPAREN, // { 45 | T_RCPAREN, // } 46 | T_SEMICOLON, // ; 47 | T_ELLIPSIS, // ... 48 | 49 | // Literal types (i.e. primary expressions) 50 | T_LITERALS_BEGIN = 200, 51 | T_DEC_INT_CONST = 200, T_HEX_INT_CONST, T_OCT_INT_CONST, 52 | T_CHAR_CONST, T_STR_CONST, 53 | T_FLOAT_CONST, 54 | T_IDENT, 55 | T_UDEF, // User-defined type using type-def; they are not literals 56 | T_LITERALS_END, 57 | 58 | // Add this to the index of keywords in the table 59 | T_KEYWORDS_BEGIN = 1000, 60 | T_AUTO = 1000, T_BREAK, T_CASE, T_CHAR, T_CONST, T_CONTINUE, T_DEFAULT, T_DO, 61 | T_DOUBLE, T_ELSE, T_ENUM, T_EXTERN, T_FLOAT, T_FOR, T_GOTO, T_IF, 62 | T_INT, T_LONG, T_REGISTER, T_RETURN, T_SHORT, T_SIGNED, T_SIZEOF, T_STATIC, 63 | T_STRUCT, T_SWITCH, T_TYPEDEF, T_UNION, T_UNSIGNED, T_VOID, T_VOLATILE, T_WHILE, 64 | T_KEYWORDS_END, 65 | 66 | // AST type used within an expression (51 elements) 67 | // Note that some are only used internally and will never occur in the AST, 68 | // specifically they are EXP_LPAREN, EXP_RPAREN, EXP_LSPAREN 69 | EXP_BEGIN = 2000, 70 | EXP_FUNC_CALL = 2000, EXP_ARRAY_SUB, // func() array[] 71 | EXP_LPAREN, EXP_RPAREN, // ( and ) as parenthesis 72 | EXP_RSPAREN, // ] 73 | EXP_DOT, EXP_ARROW, // obj.field ptr->field 74 | EXP_POST_INC, EXP_PRE_INC, // x++ x++ 75 | EXP_POST_DEC, EXP_PRE_DEC, // x-- --x 76 | EXP_PLUS, EXP_MINUS, // +x -x 77 | EXP_LOGICAL_NOT, EXP_BIT_NOT, // !exp ~exp 78 | EXP_CAST, // (type) 79 | EXP_DEREF, EXP_ADDR, // *ptr &x 80 | EXP_SIZEOF, // sizeof(type/name) 81 | EXP_MUL, EXP_DIV, EXP_MOD, // binary * / % 82 | EXP_ADD, EXP_SUB, // binary + - 83 | EXP_LSHIFT, EXP_RSHIFT, // << >> 84 | EXP_LESS, EXP_GREATER, EXP_LEQ, EXP_GEQ, // < > <= >= 85 | EXP_EQ, EXP_NEQ, // == != 86 | EXP_BIT_AND, EXP_BIT_OR, EXP_BIT_XOR, // binary & | ^ 87 | EXP_LOGICAL_AND, EXP_LOGICAL_OR, // && || 88 | EXP_COND, EXP_COLON, // ? : 89 | EXP_ASSIGN_BEGIN, // We use these two to check whether exp has an assign 90 | EXP_ASSIGN = EXP_ASSIGN_BEGIN, // = 91 | EXP_ADD_ASSIGN, EXP_SUB_ASSIGN, // += -= 92 | EXP_MUL_ASSIGN, EXP_DIV_ASSIGN, EXP_MOD_ASSIGN, // *= /= %= 93 | EXP_AND_ASSIGN, EXP_OR_ASSIGN, EXP_XOR_ASSIGN, // &= |= ^= 94 | EXP_LSHIFT_ASSIGN, EXP_RSHIFT_ASSIGN, // <<= >>= 95 | EXP_ASSIGN_END = EXP_RSHIFT_ASSIGN, // There must be no gap 96 | EXP_COMMA, // , 97 | EXP_END, 98 | // Internal nodes 99 | 100 | T_DECL, T_BASETYPE, // Root node of a declaration 101 | T_, // Placeholder 102 | T_COMP_DECL, // structure or union declaration line, can contain one base and multiple declarator 103 | T_COMP_FIELD, // Single field declaration; Contains a DECL and optional number for bitfield 104 | T_ENUM_FIELD, // Enum declaration field (single line) 105 | T_LBL_STMT, 106 | T_EXP_STMT, 107 | T_COMP_STMT, 108 | T_INIT_LIST, 109 | T_STMT_LIST, // Contains a list of statements 110 | T_DECL_STMT_LIST, // Contains a list of entries 111 | T_DECL_STMT_ENTRY, // Contains a base type and a list of vars 112 | T_DECL_STMT_VAR, // Contains a decl and optional initializer expression/list 113 | T_ROOT, 114 | T_GLOBAL_FUNC, // Global function definition 115 | T_GLOBAL_DECL_ENTRY, // Global declaration (same layout as T_DECL_STMT_ENTRY) 116 | T_GLOBAL_DECL_VAR, // Single entry that contains name and initializer 117 | T_BITFIELD, // Bit field in struct/union; Contains an expression 118 | T_INIT, // Single value init, only has one child 119 | 120 | T_ILLEGAL = 10000, // Mark a return value 121 | } token_type_t; 122 | 123 | // Declaration properties, see below 124 | typedef uint32_t decl_prop_t; 125 | 126 | typedef struct token_t { 127 | token_type_t type; // This will be written during parsing to AST type 128 | char *str; // Only valid for literals and identifiers; Owned by the token object 129 | struct token_t *child; 130 | union { 131 | struct token_t *sibling; // If token is in AST then use child-sibling representation 132 | struct token_t *next; // If token is in pushbacks queue then form a circular queue 133 | }; 134 | struct token_t *parent; // Empty for root node 135 | char *offset; // The offset in source file, for error reporting purposes; AST node may also have this field 136 | decl_prop_t decl_prop; // Property if the kwd is part of declaration; Set when a kwd is found 137 | } token_t; 138 | 139 | #define DECL_NULL 0x00000000 140 | #define DECL_INVALID 0xFFFFFFFF // Naturally incompatible with all 141 | // Type specifier bit mask (bit 4, 5, 6, 7), at the token level 142 | #define DECL_TYPE_MASK 0x000000F0 143 | #define DECL_CHAR 0x00000010 144 | #define DECL_SHORT 0x00000020 145 | #define DECL_INT 0x00000030 146 | #define DECL_LONG 0x00000040 147 | #define DECL_ENUM 0x00000050 148 | #define DECL_STRUCT 0x00000060 149 | #define DECL_UNION 0x00000070 150 | #define DECL_UDEF 0x00000080 // User defined using typedef 151 | #define DECL_FLOAT 0x00000090 152 | #define DECL_DOUBLE 0x000000A0 153 | #define DECL_VOID 0x000000B0 154 | #define DECL_UNSIGNED 0x000000C0 155 | #define DECL_SIGNED 0x000000D0 156 | // Storage class bit mask (bit 8, 9, 10, 11); Incompatible with each other 157 | #define DECL_STGCLS_MASK 0x00000F00 158 | #define DECL_TYPEDEF 0x00000100 // Define a new type using typedef storage class 159 | #define DECL_EXTERN 0x00000200 160 | #define DECL_AUTO 0x00000300 161 | #define DECL_REGISTER 0x00000400 162 | #define DECL_STATIC 0x00000500 163 | // Macro for accessing storage class 164 | #define DECL_STGCLS_GET(decl_prop) ((decl_prop) & DECL_STGCLS_MASK) 165 | #define DECL_ISTYPEDEF(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_TYPEDEF) 166 | #define DECL_ISEXTERN(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_EXTERN) 167 | #define DECL_ISAUTO(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_AUTO) 168 | #define DECL_ISREGISTER(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_REGISTER) 169 | #define DECL_ISSTATIC(decl_prop) (DECL_STGCLS_GET(decl_prop) == DECL_STATIC) 170 | 171 | // Type qualifier bit mask (bit 12, 13); Note that these two are compatible (so they are mask) 172 | #define DECL_QUAL_MASK 0x00003000 173 | #define DECL_VOLATILE_MASK 0x00001000 174 | #define DECL_CONST_MASK 0x00002000 175 | // All together, if any of these bits are present, then it is a declaration keyword 176 | #define DECL_MASK (DECL_TYPE_MASK | DECL_STGCLS_MASK | DECL_QUAL_MASK) 177 | // The following defines complete set of supported types (bit 16 - 23), at AST level 178 | #define BASETYPE_MASK 0x00FF0000 179 | #define BASETYPE_NONE 0x00000000 180 | #define BASETYPE_CHAR 0X00010000 181 | #define BASETYPE_SHORT 0X00020000 182 | #define BASETYPE_INT 0X00030000 183 | #define BASETYPE_LONG 0X00040000 184 | #define BASETYPE_UCHAR 0X00050000 185 | #define BASETYPE_USHORT 0X00060000 186 | #define BASETYPE_UINT 0X00070000 187 | #define BASETYPE_ULONG 0X00080000 188 | #define BASETYPE_LLONG 0x00090000 189 | #define BASETYPE_ULLONG 0x000A0000 190 | #define BASETYPE_FLOAT 0x000B0000 191 | #define BASETYPE_DOUBLE 0x000C0000 192 | #define BASETYPE_LDOUBLE 0x000D0000 193 | #define BASETYPE_STRUCT 0x000E0000 194 | #define BASETYPE_UNION 0x000F0000 195 | #define BASETYPE_ENUM 0x00100000 196 | #define BASETYPE_UDEF 0x00110000 197 | #define BASETYPE_VOID 0x00120000 198 | #define BASETYPE_BITFIELD 0x00130000 199 | #define BASETYPE_GET(decl_prop) (decl_prop & BASETYPE_MASK) 200 | // Better write setters as functions, not macros to avoid evaluating arguments multiple times 201 | inline static void BASETYPE_SET(token_t *token, decl_prop_t basetype) { 202 | token->decl_prop &= ~BASETYPE_MASK; \ 203 | token->decl_prop |= ((basetype) & BASETYPE_MASK); 204 | } 205 | 206 | #define BASETYPE_INDEX(decl_prop) ((decl_prop) >> 16) // Returns the index into the integer size table 207 | #define BASETYPE_FROMINDEX(index) ((decl_prop_t)index << 16) 208 | // The following are used by type nodes to specify the derivation operation 209 | #define TYPE_OP_NONE 0x00000000 210 | #define TYPE_OP_DEREF 0x01000000 211 | #define TYPE_OP_ARRAY_SUB 0x02000000 212 | #define TYPE_OP_FUNC_CALL 0x03000000 213 | #define TYPE_OP_BITFIELD 0x04000000 214 | #define TYPE_OP_MASK 0xFF000000 215 | #define TYPE_OP_GET(decl_prop) (decl_prop & TYPE_OP_MASK) 216 | 217 | #define TYPE_EMPTY_BODY 0x01000000 // Struct or union has body but it is empty; Valid only with token T_STRUCT, T_UNION 218 | 219 | typedef struct { 220 | stack_t *udef_types; // Auto detected when lexing T_IDENT 221 | token_t *pb_head; // Pushback token head (removing end) 222 | token_t *pb_tail; // Pushback token tail (inserting end) 223 | int pb_count; // Number of pushbacks 224 | char *s; // Current read position 225 | char *begin; // Begin of the current text (set once never changes) 226 | } token_cxt_t; 227 | 228 | typedef enum { 229 | ASSOC_LR, ASSOC_RL, 230 | } assoc_t; 231 | 232 | extern const char *keywords[32]; 233 | extern uint32_t kwd_props[32]; 234 | extern int precedences[51]; 235 | 236 | // Note that both bounds are inclusive because there must be no gap in the exp token enum 237 | inline static int token_is_assign(token_t *token) { 238 | return token->type >= EXP_ASSIGN_BEGIN && token->type <= EXP_ASSIGN_END; 239 | } 240 | 241 | token_cxt_t *token_cxt_init(char *input); 242 | void token_cxt_reinit(token_cxt_t *cxt, char *input); // Change input stream 243 | void token_cxt_free(token_cxt_t *cxt); 244 | void token_enter_scope(token_cxt_t *cxt); 245 | void token_exit_scope(token_cxt_t *cxt); 246 | void token_add_utype(token_cxt_t *cxt, token_t *token); 247 | int token_isutype(token_cxt_t *cxt, token_t *token); 248 | int token_decl_compatible(token_t *dest, token_t *src); 249 | int token_decl_apply(token_t *dest, token_t *src); 250 | char *token_decl_print(decl_prop_t decl_prop); 251 | void token_get_property(token_type_t type, int *preced, assoc_t *assoc); 252 | int token_get_num_operand(token_type_t type); 253 | token_type_t token_get_keyword_type(const char *s); 254 | const char *token_typestr(token_type_t type); 255 | const char *token_symstr(token_type_t type); 256 | char *token_get_op(char *s, token_t *token); 257 | void token_copy_literal(token_t *token, const char *begin, const char *end); 258 | void token_free(token_t *token); 259 | token_t *token_alloc(); 260 | token_t *token_alloc_type(token_type_t type); 261 | token_t *token_get_empty(); 262 | char *token_get_ident(token_cxt_t *cxt, char *s, token_t *token); 263 | char *token_get_int(char *s, token_t *token); 264 | char *token_get_str(char *s, token_t *token, char closing); 265 | token_t *token_get_next_ignore_lookahead(token_cxt_t *cxt); 266 | token_t *token_get_next(token_cxt_t *cxt); 267 | int token_consume_type(token_cxt_t *cxt, token_type_t type); 268 | void token_pushback(token_cxt_t *cxt, token_t *token); 269 | token_t *token_lookahead(token_cxt_t *cxt, int count); 270 | token_t *token_lookahead_notnull(token_cxt_t *cxt, int count); 271 | 272 | #endif -------------------------------------------------------------------------------- /src/x86/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.lib 3 | x86-test 4 | -------------------------------------------------------------------------------- /src/x86/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CFLAGS=-O0 -g -Wall -Wextra -Werror -Wno-unused-parameter -Wno-unused-variable 3 | LDFLAGS= 4 | 5 | all: x86-test 6 | 7 | x86-test: x86.o x86-test.c 8 | gcc x86-test.c x86.o -o x86-test $(CFLAGS) $(LDFLAGS) 9 | 10 | x86.o: x86.c x86.h 11 | gcc x86.c -c -o x86.o $(CFLAGS) $(LDFLAGS) 12 | -------------------------------------------------------------------------------- /src/x86/README.md: -------------------------------------------------------------------------------- 1 | 2 | x86 3 | === 4 | 5 | This repo implements an abstracted interface to the x86 instruction set. 6 | It can be independently compiled and tested. -------------------------------------------------------------------------------- /src/x86/todo.txt: -------------------------------------------------------------------------------- 1 | 2 | In the test, assign each instruction an address 3 | - Let jump/loop rel8 instruction print absolute address computed from rel8 4 | 5 | Extend to 32-bit -------------------------------------------------------------------------------- /src/x86/x86.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _X86_H 3 | #define _X86_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | //* util 23 | 24 | // Error reporting and system call assertion 25 | #define SYSEXPECT(expr) do { if(!(expr)) { perror(__func__); assert(0); exit(1); } } while(0) 26 | #define error_exit(fmt, ...) do { fprintf(stderr, "%s error: " fmt, __func__, ##__VA_ARGS__); assert(0); exit(1); } while(0); 27 | #ifndef NDEBUG 28 | #define dbg_printf(fmt, ...) do { fprintf(stderr, fmt, ##__VA_ARGS__); } while(0); 29 | #else 30 | #define dbg_printf(fmt, ...) do {} while(0); 31 | #endif 32 | 33 | #define warn_printf(fmt, ...) do { fprintf(stdout, "Warning: " fmt, ##__VA_ARGS__); } while(0); 34 | 35 | // Branching macro (this may have already been defined in other source files) 36 | #ifndef likely 37 | #define likely(x) __builtin_expect((x),1) 38 | #endif 39 | #ifndef unlikely 40 | #define unlikely(x) __builtin_expect((x),0) 41 | #endif 42 | 43 | // Testing function print name and pass 44 | #define TEST_BEGIN() do { printf("========== %s ==========\n", __func__); } while(0); 45 | #define TEST_PASS() do { printf("Pass!\n"); } while(0); 46 | 47 | // String functions 48 | inline static char *strclone(const char *s) { 49 | int len = strlen(s); 50 | char *ret = (char *)malloc(len + 1); 51 | SYSEXPECT(ret != NULL); 52 | strcpy(ret, s); 53 | return ret; 54 | } 55 | 56 | //* Prefix (raw value in instructions) 57 | 58 | #define PREFIX_REP 0xf3 59 | #define PREFIX_REPE PREFIX_REP 60 | #define PREFIX_REPZ PREFIX_REP 61 | 62 | #define PREFIX_REPNE 0xf2 63 | #define PREFIX_REPNZ PREFIX_REPNE 64 | 65 | // Segment override 66 | #define PREFIX_CS 0x2e 67 | #define PREFIX_DS 0x3e 68 | #define PREFIX_ES 0x26 69 | #define PREFIX_SS 0x36 70 | 71 | #define PREFIX_LOCK 0xf0 72 | 73 | // Used for MMX instruction 74 | #define PREFIX_MMX_MASK 0xf0f036263e2ef2f3UL 75 | // Covert prefix to a flag; Returns FLAG_NONE if not a prefix 76 | uint32_t prefix_to_flag_mmx(uint8_t byte); 77 | uint32_t prefix_to_flag_scalar(uint8_t byte); 78 | 79 | #ifdef ENABLE_MMX 80 | #define prefix_to_flag prefix_to_flag_mmx 81 | #else 82 | #define prefix_to_flag prefix_to_flag_scalar 83 | #endif 84 | 85 | //* Global control 86 | 87 | #define ENABLE_MMX 88 | 89 | typedef struct { 90 | int warn_repeated_prefix; // Whether to warn repeated prefix bytes 91 | } global_t; 92 | 93 | extern global_t global; 94 | 95 | //* Prefix flags 96 | 97 | #define FLAG_NONE 0x00000000 98 | #define FLAG_REP 0x00000001 99 | #define FLAG_REPE FLAG_REP 100 | #define FLAG_REPZ FLAG_REP 101 | 102 | #define FLAG_REPNE 0x00000002 103 | #define FLAG_REPNZ FLAG_REPNE 104 | 105 | #define FLAG_CS 0x00000004 106 | #define FLAG_DS 0x00000008 107 | #define FLAG_ES 0x00000010 108 | #define FLAG_SS 0x00000020 109 | 110 | #define FLAG_LOCK 0x00000040 111 | 112 | // D flag in the opcode byte 113 | #define FLAG_D 0x00000080 114 | // W flag in the opcode byte 115 | #define FLAG_W 0x00000100 116 | // Whether call/jmp is far 117 | #define FLAG_FAR 0x00000200 118 | 119 | //* Register constants 120 | 121 | enum { 122 | REG_NONE = 0, 123 | REG_BEGIN, 124 | REG_GEN_BEGIN = REG_BEGIN, 125 | REG_GEN_16_BEGIN = REG_BEGIN, 126 | REG_AX = REG_GEN_16_BEGIN, 127 | REG_BX, 128 | REG_CX, 129 | REG_DX, 130 | REG_SI, 131 | REG_DI, 132 | REG_BP, 133 | REG_SP, 134 | REG_GEN_16_END, 135 | // 32-bit register 136 | REG_GEN_32_BEGIN = REG_GEN_16_END, 137 | REG_EAX = REG_GEN_32_BEGIN, 138 | REG_EBX, 139 | REG_ECX, 140 | REG_EDX, 141 | REG_ESI, 142 | REG_EDI, 143 | REG_EBP, 144 | REG_ESP, 145 | REG_GEN_32_END, 146 | // 8-bit register 147 | REG_GEN_8_BEGIN = REG_GEN_32_END, 148 | REG_AH = REG_GEN_8_BEGIN, 149 | REG_AL, 150 | REG_BH, 151 | REG_BL, 152 | REG_CH, 153 | REG_CL, 154 | REG_DH, 155 | REG_DL, 156 | REG_GEN_8_END, 157 | REG_GEN_END = REG_GEN_8_END, 158 | // Segment register 159 | REG_SEG_BEGIN, 160 | REG_CS, 161 | REG_DS, 162 | REG_ES, 163 | REG_SS, 164 | REG_SEG_END, 165 | REG_END, 166 | // Non-general purpose registers 167 | REG_IP, 168 | REG_FLAGS, 169 | }; 170 | 171 | extern const char *reg_names[]; 172 | 173 | //* R/M Tables 174 | 175 | // Maps REG field to register name, word size (w = 0) 176 | extern const int gen_reg_16_table[8]; 177 | extern const int gen_reg_8_table[8]; 178 | extern const int seg_reg_table[4]; 179 | 180 | // Register pair for R/M addressing 181 | typedef struct { 182 | int reg1; 183 | int reg2; 184 | } addr_mode_reg_t; 185 | 186 | // Mode = 00/01/10 187 | extern const addr_mode_reg_t addr_mode_reg_table_16[8]; 188 | extern const addr_mode_reg_t addr_mode_reg_table_32[8]; 189 | 190 | #define ADDR_MODE_MEM_REG_ONLY 0 191 | #define ADDR_MODE_MEM_REG_DISP_8 1 192 | #define ADDR_MODE_MEM_REG_DISP_16 2 193 | #define ADDR_MODE_MEM_REG_DISP_32 3 194 | // This will cause the addr_mode object be not initialized 195 | #define ADDR_MODE_REG 4 196 | // Direct addr mode; This is not in the raw instruction; Same for 16 and 32 bit 197 | #define ADDR_MODE_MEM_DIRECT 5 198 | 199 | // Addressing mode for memory operands 200 | typedef struct { 201 | int addr_mode; // ADDR_MODE_ macros 202 | addr_mode_reg_t regs; // Register for addressing (one or two) 203 | union { 204 | uint8_t disp_8; 205 | uint16_t disp_16; 206 | uint32_t disp_32; 207 | uint16_t direct_addr_16; // Direct addressing mode uses this 208 | uint32_t direct_addr_32; // Direct addressing mode uses this 209 | }; 210 | } addr_mode_t; 211 | 212 | // Prints memory operand (ADDR_MODE_REG will not be printed because its encoding is not stored) 213 | void addr_mode_fprint(addr_mode_t *addr_mode, uint32_t flags, FILE *fp); 214 | // Generate the mode r/m byte and the following displacement bits 215 | inline static uint8_t *addr_mode_gen(uint8_t mode, uint8_t reg, uint8_t rm, uint8_t *data) { 216 | assert(mode <= 3); 217 | assert(reg <= 7); 218 | assert(rm <= 7); 219 | data[0] = (mode << 6) | (reg << 3) | (rm); 220 | return data + 1; 221 | } 222 | 223 | // Operand type 224 | #define OPERAND_NONE 0 225 | #define OPERAND_REG 1 226 | #define OPERAND_MEM 2 227 | #define OPERAND_IMM_8 3 228 | #define OPERAND_IMM_16 4 229 | #define OPERAND_REL_8 5 230 | #define OPERAND_REL_16 6 231 | #define OPERAND_FARPTR 7 232 | // The operand is a const value 1, which is not stored 233 | #define OPERAND_IMPLIED_1 8 234 | 235 | typedef struct { 236 | uint16_t offset; 237 | uint16_t seg; 238 | } farptr_t; 239 | 240 | // An operand can be either register or memory, which is encoded by addr_node_t 241 | typedef struct { 242 | int operand_mode; 243 | union { 244 | int reg; // Operand is in one of the registers (size implied by register width) 245 | addr_mode_t mem; // Operand is in memory (size given by W flag) 246 | uint16_t imm_16; // 16 bit immediate value 247 | uint8_t imm_8; // 8 bit immediate value 248 | uint16_t rel_16; // 16 bit relative 249 | uint16_t rel_8; // 8 bit relative 250 | farptr_t farptr; // seg:offset full address (32-bit operand) 251 | }; 252 | } operand_t; 253 | 254 | inline static void *ptr_add_16(void *p) { return (void *)((uint16_t *)p + 1); } 255 | inline static void *ptr_add_8(void *p) { return (void *)((uint8_t *)p + 1); } 256 | inline static uint16_t ptr_load_16(void *p) { return *(uint16_t *)p; } 257 | inline static uint8_t ptr_load_8(void *p) { return *(uint8_t *)p; } 258 | 259 | // Sets an operand as register. Register can be either general purpose or segment, but not IP or FLAGS 260 | inline static void operand_set_register(operand_t *operand, int reg) { 261 | assert(reg >= REG_BEGIN && reg < REG_END); 262 | operand->operand_mode = OPERAND_REG; 263 | operand->reg = reg; 264 | return; 265 | } 266 | 267 | // Parse a 8-bit immediate value from the instruction stream 268 | inline static void *operand_set_imm_8(operand_t *operand, void *data) { 269 | operand->operand_mode = OPERAND_IMM_8; 270 | operand->imm_8 = ptr_load_8(data); 271 | return ptr_add_8(data); 272 | } 273 | 274 | inline static void *operand_set_imm_16(operand_t *operand, void *data) { 275 | operand->operand_mode = OPERAND_IMM_16; 276 | operand->imm_16 = ptr_load_16(data); 277 | return ptr_add_16(data); 278 | } 279 | 280 | inline static void *operand_set_rel_8(operand_t *operand, void *data) { 281 | operand->operand_mode = OPERAND_REL_8; 282 | operand->rel_8 = ptr_load_8(data); 283 | return ptr_add_8(data); 284 | } 285 | 286 | inline static void *operand_set_rel_16(operand_t *operand, void *data) { 287 | operand->operand_mode = OPERAND_REL_16; 288 | operand->rel_16 = ptr_load_16(data); 289 | return ptr_add_16(data); 290 | } 291 | 292 | inline static void operand_set_const_8(operand_t *operand, uint8_t value) { 293 | operand->operand_mode = OPERAND_IMM_8; 294 | operand->imm_8 = value; 295 | return; 296 | } 297 | 298 | inline static void operand_set_const_16(operand_t *operand, uint16_t value) { 299 | operand->operand_mode = OPERAND_IMM_16; 300 | operand->imm_16 = value; 301 | return; 302 | } 303 | 304 | inline static void operand_set_implied_one(operand_t *operand) { 305 | operand->operand_mode = OPERAND_IMPLIED_1; 306 | return; 307 | } 308 | 309 | inline static void *operand_set_farptr(operand_t *operand, void *data) { 310 | operand->operand_mode = OPERAND_FARPTR; 311 | operand->farptr.offset = ptr_load_16(data); 312 | data = ptr_add_16(data); 313 | operand->farptr.seg = ptr_load_16(data); 314 | return ptr_add_16(data); 315 | } 316 | 317 | // Sets a direct memory operand 318 | // This is specifically used by mov 0xA0 - 0xA3 319 | inline static void *operand_set_mem_direct_addr_16(operand_t *operand, void *data) { 320 | operand->operand_mode = OPERAND_MEM; 321 | operand->mem.addr_mode = ADDR_MODE_MEM_DIRECT; 322 | operand->mem.direct_addr_16 = ptr_load_16(data); 323 | return ptr_add_16(data); 324 | } 325 | 326 | inline static void *operand_set_mem_direct_addr_32(operand_t *operand, void *data) { 327 | operand->operand_mode = OPERAND_MEM; 328 | operand->mem.addr_mode = ADDR_MODE_MEM_DIRECT; 329 | operand->mem.direct_addr_32 = ptr_load_32(data); 330 | return ptr_add_32(data); 331 | } 332 | 333 | // Given mode and r/m bits, set the operand 334 | void *parse_operand_mod_rm(operand_t *operand, int addr_mode, int flags, int rm, void *data); 335 | // Parsing 2 operands, must be either reg or mem 336 | void *parse_operand_2(operand_t *dest, operand_t *src, uint32_t flags, void *data); 337 | // Only parses mod + rm, returns REG 338 | void *parse_operand_1(operand_t *operand, uint32_t flags, int *reg, void *data); 339 | 340 | void operand_fprint(operand_t *operand, uint32_t flags, FILE *fp); 341 | 342 | // Instruction 343 | 344 | enum { 345 | OP_NOP = 0, 346 | OP_ADD, 347 | OP_PUSH, 348 | OP_POP, 349 | OP_OR, 350 | OP_ADC, 351 | OP_SBB, 352 | OP_AND, 353 | OP_DAA, 354 | OP_SUB, 355 | OP_DAS, 356 | OP_XOR, 357 | OP_AAA, 358 | OP_CMP, 359 | OP_AAS, 360 | OP_INC, 361 | OP_DEC, 362 | // Jump short 363 | OP_JO, 364 | OP_JNO, 365 | OP_JB, 366 | OP_JNB, 367 | OP_JZ, 368 | OP_JNZ, 369 | OP_JBE, 370 | OP_JA, 371 | OP_JS, 372 | OP_JNS, 373 | OP_JPE, 374 | OP_JPO, 375 | OP_JL, 376 | OP_JGE, 377 | OP_JLE, 378 | OP_JG, 379 | OP_TEST, 380 | OP_XCHG, 381 | OP_MOV, 382 | OP_LEA, 383 | OP_CBW, 384 | OP_CWD, 385 | OP_CALL, 386 | OP_WAIT, 387 | OP_PUSHF, 388 | OP_POPF, 389 | OP_SAHF, 390 | OP_LAHF, 391 | OP_MOVSB, 392 | OP_MOVSW, 393 | OP_CMPSB, 394 | OP_CMPSW, 395 | OP_STOSB, 396 | OP_STOSW, 397 | OP_LODSB, 398 | OP_LODSW, 399 | OP_SCASB, 400 | OP_SCASW, 401 | OP_RET, 402 | OP_LES, 403 | OP_LDS, 404 | OP_RETF, 405 | OP_INT3, 406 | OP_INT, 407 | OP_INTO, 408 | OP_IRET, 409 | OP_ROL, 410 | OP_ROR, 411 | OP_RCL, 412 | OP_RCR, 413 | OP_SHL, 414 | OP_SHR, 415 | OP_SAR, 416 | OP_AAM, 417 | OP_AAD, 418 | OP_XLAT, 419 | OP_LOOPNZ, 420 | OP_LOOPZ, 421 | OP_LOOP, 422 | OP_JCXZ, 423 | OP_IN, 424 | OP_OUT, 425 | OP_JMP, 426 | OP_HLT, 427 | OP_CMC, 428 | OP_NOT, 429 | OP_NEG, 430 | OP_MUL, 431 | OP_IMUL, 432 | OP_DIV, 433 | OP_IDIV, 434 | OP_CLC, 435 | OP_STC, 436 | OP_CLI, 437 | OP_STI, 438 | OP_CLD, 439 | OP_STD, 440 | }; 441 | 442 | // Maps op macros (see above) to string names 443 | extern const char *op_names[]; 444 | 445 | typedef struct { 446 | farptr_t addr; // Address of the instruction 447 | uint8_t opcode; // This is the raw opcode byte includes D and W flag, i.e., it is the full 8 byte 448 | uint8_t op; // This is the abstract operation (OP_ class) 449 | uint32_t flags; 450 | uint8_t size; // Number of bytes in the instruction 451 | operand_t dest; 452 | operand_t src; // If there only one operand, the src is used 453 | } ins_t; 454 | 455 | // Reads instructions from a file (used for debugging) 456 | typedef struct { 457 | char *filename; // File name 458 | void *data; // Content of the file 459 | void *end; // End pointer 460 | void *ptr; // Current read position 461 | int size; // File size (bytes) 462 | uint16_t next_addr; // Next address of the instruction 463 | } ins_reader_t; 464 | 465 | ins_reader_t *ins_reader_init(); 466 | void ins_reader_free(ins_reader_t *ins_reader); 467 | inline static int ins_reader_is_end(ins_reader_t *ins_reader) { 468 | return ins_reader->ptr >= ins_reader->end; 469 | } 470 | inline static int ins_reader_is_exact_end(ins_reader_t *ins_reader) { 471 | return ins_reader->ptr == ins_reader->end; 472 | } 473 | inline static void *ins_reader_get_curr_ptr(ins_reader_t *ins_reader) { 474 | return ins_reader->ptr; 475 | } 476 | inline static uint16_t ins_reader_get_next_addr(ins_reader_t *ins_reader) { 477 | return ins_reader->next_addr; 478 | } 479 | // The ins object is within the object 480 | void ins_reader_next(ins_reader_t *ins_reader, ins_t *ins); 481 | 482 | inline static void print_ins_addr(ins_t *ins) { 483 | fprintf(stderr, "Instruction at address %X:%X\n", ins->addr.seg, ins->addr.offset); 484 | } 485 | 486 | // This is called at the beginning of an instruction 487 | void *parse_prefix(ins_t *ins, void *data); 488 | void *parse_opcode(ins_t *ins, void *data); 489 | 490 | void *parse_alu_ins(ins_t *ins, int diff, int op, void *data); 491 | void *parse_ins_grp1(ins_t *ins, void *data); 492 | void *parse_ins_grp2(ins_t *ins, void *data); 493 | void *parse_ins_grp3(ins_t *ins, void *data); 494 | void *parse_ins_grp4(ins_t *ins, void *data); 495 | void *parse_ins_grp5(ins_t *ins, void *data); 496 | void *parse_ins(ins_t *ins, void *data); 497 | 498 | void ins_rel_8_fprint(ins_t *ins, uint32_t next_addr, FILE *fp); 499 | void ins_rel_16_fprint(ins_t *ins, uint32_t next_addr, FILE *fp); 500 | void ins_fprint(ins_t *ins, uint32_t next_addr, FILE *fp); 501 | 502 | #endif 503 | -------------------------------------------------------------------------------- /workspace.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": { 8 | "files.associations": { 9 | "ast.h": "c", 10 | "hashtable.h": "c", 11 | "parse_decl.h": "c", 12 | "parse_exp.h": "c", 13 | "parse_stmt.h": "c", 14 | "bintree.h": "c", 15 | "error.h": "c", 16 | "list.h": "c", 17 | "eval.h": "c", 18 | "token.h": "c", 19 | "type.h": "c" 20 | } 21 | } 22 | } --------------------------------------------------------------------------------