├── .gitignore ├── Makefile ├── README.md ├── TODO ├── src ├── ast.c ├── ast.h ├── codegen.c ├── codegen.h ├── kaleidoscope.c ├── lexer.l ├── parser.y └── uthash.h └── tests ├── ast_tests.c ├── codegen_tests.c ├── minunit.h ├── parser_tests.c └── runtests.sh /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | build/ 3 | src/*.o 4 | src/**/*.o 5 | tests/*_tests 6 | .DS_Store 7 | **/*.dSYM* 8 | valgrind.log 9 | src/lexer.c 10 | src/lexer.h 11 | src/parser.h 12 | src/parser.c 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Variables 3 | ################################################################################ 4 | 5 | CFLAGS=-g -Wall -Wextra -Wno-self-assign -std=c99 6 | CXXFLAGS=-g -Wall -Wextra -Wno-self-assign 7 | 8 | LEX_SOURCES=$(wildcard src/*.l) 9 | LEX_OBJECTS=$(patsubst %.l,%.c,${LEX_SOURCES}) $(patsubst %.l,%.h,${LEX_SOURCES}) 10 | 11 | YACC_SOURCES=$(wildcard src/*.y) 12 | YACC_OBJECTS=$(patsubst %.y,%.c,${YACC_SOURCES}) $(patsubst %.y,%.h,${YACC_SOURCES}) 13 | 14 | SOURCES=$(wildcard src/**/*.c src/*.c) 15 | OBJECTS=$(patsubst %.c,%.o,${SOURCES}) $(patsubst %.l,%.o,${LEX_SOURCES}) $(patsubst %.y,%.o,${YACC_SOURCES}) 16 | LIB_SOURCES=$(filter-out kaleidoscope.c,${SOURCES}) 17 | LIB_OBJECTS=$(filter-out kaleidoscope.o,${OBJECTS}) 18 | TEST_SOURCES=$(wildcard tests/*_tests.c) 19 | TEST_OBJECTS=$(filter-out tests/codegen_tests,$(patsubst %.c,%,${TEST_SOURCES})) 20 | 21 | LEX?=flex 22 | YACC?=bison 23 | YFLAGS?=-dv 24 | 25 | LLVM_CC_FLAGS=`llvm-config --cflags` 26 | LLVM_LINK_FLAGS=`llvm-config --libs --cflags --ldflags core analysis executionengine jit interpreter native` 27 | 28 | ################################################################################ 29 | # Default Target 30 | ################################################################################ 31 | 32 | all: build/libkaleidoscope.a build/kaleidoscope ${OBJECTS} test 33 | 34 | 35 | ################################################################################ 36 | # Binaries 37 | ################################################################################ 38 | 39 | build/libkaleidoscope.a: build ${LIB_OBJECTS} 40 | rm -f build/libkaleidoscope.a 41 | ar rcs $@ ${LIB_OBJECTS} 42 | ranlib $@ 43 | 44 | src/kaleidoscope.c: ${LEX_OBJECTS} 45 | 46 | src/kaleidoscope.o: src/kaleidoscope.c 47 | ${CC} ${LLVM_CC_FLAGS} ${CFLAGS} -c -o $@ $^ 48 | 49 | build/kaleidoscope: ${OBJECTS} 50 | $(CXX) $(LLVM_LINK_FLAGS) $(CXXFLAGS) -rdynamic -Isrc -o $@ src/kaleidoscope.o build/libkaleidoscope.a 51 | chmod 700 $@ 52 | 53 | build: 54 | mkdir -p build 55 | 56 | 57 | ################################################################################ 58 | # Bison / Flex 59 | ################################################################################ 60 | 61 | src/lexer.c: src/parser.c 62 | ${LEX} --header-file=src/lexer.h -o $@ src/lexer.l 63 | 64 | src/parser.c: src/parser.y 65 | mkdir -p build/bison 66 | ${YACC} ${YFLAGS} -o $@ $^ 67 | 68 | 69 | ################################################################################ 70 | # LLVM 71 | ################################################################################ 72 | 73 | src/codegen.o: src/codegen.c 74 | ${CC} ${LLVM_CC_FLAGS} ${CFLAGS} -c -o $@ $^ 75 | 76 | 77 | ################################################################################ 78 | # Tests 79 | ################################################################################ 80 | 81 | .PHONY: test 82 | test: $(TEST_OBJECTS) build/tests/codegen_tests 83 | @sh ./tests/runtests.sh 84 | 85 | build/tests: 86 | mkdir -p build/tests 87 | 88 | $(TEST_OBJECTS): %: %.c build/tests build/libkaleidoscope.a 89 | $(CC) $(CFLAGS) -Isrc -o build/$@ $< build/libkaleidoscope.a 90 | 91 | build/tests/codegen_tests.o: tests/codegen_tests.c build/libkaleidoscope.a 92 | $(CC) $(LLVM_CC_FLAGS) $(CFLAGS) -Isrc -c -o $@ tests/codegen_tests.c build/libkaleidoscope.a 93 | 94 | build/tests/codegen_tests: build/tests/codegen_tests.o build/libkaleidoscope.a 95 | $(CXX) $(LLVM_LINK_FLAGS) $(CXXFLAGS) -Isrc -o $@ build/tests/codegen_tests.o build/libkaleidoscope.a 96 | 97 | 98 | ################################################################################ 99 | # Clean up 100 | ################################################################################ 101 | 102 | clean: 103 | rm -rf kaleidoscope ${OBJECTS} ${LEX_OBJECTS} ${YACC_OBJECTS} 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | llvm-c-kaleidoscope 2 | =================== 3 | 4 | An implementation of the Kaleidoscope language using Flex, Bison & the 5 | LLVM-C bindings. To build the tool, simply run: 6 | 7 | $ make 8 | 9 | And then run the Kaleidoscope REPL: 10 | 11 | $ bin/kaleidoscope 12 | 13 | Once the program has started, you can enter Kaleidoscope commands and see the 14 | results printed after each line. -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | # Code Generation 2 | 3 | # REPL -------------------------------------------------------------------------------- /src/ast.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ast.h" 5 | 6 | 7 | //============================================================================== 8 | // 9 | // Functions 10 | // 11 | //============================================================================== 12 | 13 | //-------------------------------------- 14 | // Number AST 15 | //-------------------------------------- 16 | 17 | // Creates an AST node for a number. 18 | // 19 | // value - The value of the AST. 20 | // 21 | // Returns a Number AST Node. 22 | kal_ast_node *kal_ast_number_create(double value) 23 | { 24 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 25 | node->type = KAL_AST_TYPE_NUMBER; 26 | node->number.value = value; 27 | return node; 28 | } 29 | 30 | 31 | //-------------------------------------- 32 | // Variable AST 33 | //-------------------------------------- 34 | 35 | // Creates an AST node for a variable. 36 | // 37 | // name - The name of the variable. 38 | // 39 | // Returns a Variable AST Node. 40 | kal_ast_node *kal_ast_variable_create(char *name) 41 | { 42 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 43 | node->type = KAL_AST_TYPE_VARIABLE; 44 | node->variable.name = strdup(name); 45 | return node; 46 | } 47 | 48 | 49 | //-------------------------------------- 50 | // Binary Expression AST 51 | //-------------------------------------- 52 | 53 | // Creates an AST node for a binary expression. 54 | // 55 | // op - The operation being performed. 56 | // lhs - The AST node for the left hand side of the expression. 57 | // rhs - The AST node for the right hand side of the expression. 58 | // 59 | // Returns a Binary Expression AST Node. 60 | kal_ast_node *kal_ast_binary_expr_create(kal_ast_binop_e operator, 61 | kal_ast_node *lhs, 62 | kal_ast_node *rhs) 63 | { 64 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 65 | node->type = KAL_AST_TYPE_BINARY_EXPR; 66 | node->binary_expr.operator = operator; 67 | node->binary_expr.lhs = lhs; 68 | node->binary_expr.rhs = rhs; 69 | return node; 70 | } 71 | 72 | 73 | //-------------------------------------- 74 | // Function Call AST 75 | //-------------------------------------- 76 | 77 | // Creates an AST node for a function call. 78 | // 79 | // name - The name of the function being called. 80 | // args - A list of AST node expressions passed as arguments. 81 | // arg_count - The number of arguments. 82 | // 83 | // Returns a Function Call AST Node. 84 | kal_ast_node *kal_ast_call_create(char *name, kal_ast_node **args, 85 | int arg_count) 86 | { 87 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 88 | node->type = KAL_AST_TYPE_CALL; 89 | node->call.name = strdup(name); 90 | 91 | // Shallow copy arguments. 92 | node->prototype.args = malloc(sizeof(kal_ast_node*) * arg_count); 93 | memcpy(node->prototype.args, args, sizeof(kal_ast_node*) * arg_count); 94 | node->prototype.arg_count = arg_count; 95 | 96 | return node; 97 | } 98 | 99 | 100 | //-------------------------------------- 101 | // Function Prototype AST 102 | //-------------------------------------- 103 | 104 | // Creates an AST node for a function prototype. 105 | // 106 | // name - The name of the function. 107 | // args - A list of argument names. 108 | // arg_count - The number of arguments. 109 | // 110 | // Returns a Function Prototype AST Node. 111 | kal_ast_node *kal_ast_prototype_create(char *name, char **args, 112 | int arg_count) 113 | { 114 | int i; 115 | 116 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 117 | node->type = KAL_AST_TYPE_PROTOTYPE; 118 | node->prototype.name = strdup(name); 119 | 120 | // Copy arguments. 121 | node->prototype.args = malloc(sizeof(char*) * arg_count); 122 | for(i=0; iprototype.args[i] = strdup(args[i]); 124 | } 125 | node->prototype.arg_count = arg_count; 126 | 127 | return node; 128 | } 129 | 130 | 131 | //-------------------------------------- 132 | // Function AST 133 | //-------------------------------------- 134 | 135 | // Creates an AST node for a function declaration. 136 | // 137 | // prototype - The definition for the function. 138 | // body - The body expression. 139 | // 140 | // Returns a Function AST Node. 141 | kal_ast_node *kal_ast_function_create(kal_ast_node *prototype, 142 | kal_ast_node *body) 143 | { 144 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 145 | node->type = KAL_AST_TYPE_FUNCTION; 146 | node->function.prototype = prototype; 147 | node->function.body = body; 148 | return node; 149 | } 150 | 151 | 152 | //-------------------------------------- 153 | // If Expression AST 154 | //-------------------------------------- 155 | 156 | // Creates an AST node for an if statement. 157 | // 158 | // condition - The condition to evaluate. 159 | // true_expr - The expression to evaluate if the condition is true. 160 | // false_expr - The expression to evaluate if the condition is false. 161 | // 162 | // Returns a If Expression AST Node. 163 | kal_ast_node *kal_ast_if_expr_create(kal_ast_node *condition, 164 | kal_ast_node *true_expr, 165 | kal_ast_node *false_expr) 166 | { 167 | kal_ast_node *node = malloc(sizeof(kal_ast_node)); 168 | node->type = KAL_AST_TYPE_IF_EXPR; 169 | node->if_expr.condition = condition; 170 | node->if_expr.true_expr = true_expr; 171 | node->if_expr.false_expr = false_expr; 172 | return node; 173 | } 174 | 175 | 176 | //-------------------------------------- 177 | // Node Lifecycle 178 | //-------------------------------------- 179 | 180 | // Recursively frees an AST node. 181 | // 182 | // node - The node to free. 183 | void kal_ast_node_free(kal_ast_node *node) 184 | { 185 | unsigned int i; 186 | 187 | if(!node) return; 188 | 189 | // Recursively free dependent data. 190 | switch(node->type) { 191 | case KAL_AST_TYPE_NUMBER: break; 192 | case KAL_AST_TYPE_VARIABLE: { 193 | if(node->variable.name) free(node->variable.name); 194 | break; 195 | } 196 | case KAL_AST_TYPE_BINARY_EXPR: { 197 | if(node->binary_expr.lhs) kal_ast_node_free(node->binary_expr.lhs); 198 | if(node->binary_expr.rhs) kal_ast_node_free(node->binary_expr.rhs); 199 | break; 200 | } 201 | case KAL_AST_TYPE_CALL: { 202 | if(node->call.name) free(node->call.name); 203 | for(i=0; icall.arg_count; i++) { 204 | kal_ast_node_free(node->call.args[i]); 205 | } 206 | free(node->call.args); 207 | break; 208 | } 209 | case KAL_AST_TYPE_PROTOTYPE: { 210 | if(node->prototype.name) free(node->prototype.name); 211 | for(i=0; iprototype.arg_count; i++) { 212 | free(node->prototype.args[i]); 213 | } 214 | free(node->prototype.args); 215 | break; 216 | } 217 | case KAL_AST_TYPE_FUNCTION: { 218 | if(node->function.prototype) kal_ast_node_free(node->function.prototype); 219 | if(node->function.body) kal_ast_node_free(node->function.body); 220 | break; 221 | } 222 | case KAL_AST_TYPE_IF_EXPR: { 223 | if(node->if_expr.condition) kal_ast_node_free(node->if_expr.condition); 224 | if(node->if_expr.true_expr) kal_ast_node_free(node->if_expr.true_expr); 225 | if(node->if_expr.false_expr) kal_ast_node_free(node->if_expr.false_expr); 226 | break; 227 | } 228 | } 229 | 230 | free(node); 231 | } 232 | 233 | -------------------------------------------------------------------------------- /src/ast.h: -------------------------------------------------------------------------------- 1 | #ifndef _ast_h 2 | #define _ast_h 3 | 4 | //============================================================================== 5 | // 6 | // Definitions 7 | // 8 | //============================================================================== 9 | 10 | // Defines the types of expressions available. 11 | typedef enum kal_ast_node_type_e { 12 | KAL_AST_TYPE_NUMBER, 13 | KAL_AST_TYPE_VARIABLE, 14 | KAL_AST_TYPE_BINARY_EXPR, 15 | KAL_AST_TYPE_CALL, 16 | KAL_AST_TYPE_PROTOTYPE, 17 | KAL_AST_TYPE_FUNCTION, 18 | KAL_AST_TYPE_IF_EXPR, 19 | } kal_ast_node_type_e; 20 | 21 | // Defines the types of binary expressions. 22 | typedef enum kal_ast_binop_e { 23 | KAL_BINOP_PLUS, 24 | KAL_BINOP_MINUS, 25 | KAL_BINOP_MUL, 26 | KAL_BINOP_DIV, 27 | } kal_ast_binop_e; 28 | 29 | 30 | struct kal_ast_node; 31 | 32 | // Represents a number in the AST. 33 | typedef struct kal_ast_number { 34 | double value; 35 | } kal_ast_number; 36 | 37 | // Represents a variable in the AST. 38 | typedef struct kal_ast_variable { 39 | char *name; 40 | } kal_ast_variable; 41 | 42 | // Represents a binary expression in the AST. 43 | typedef struct kal_ast_binary_expr { 44 | kal_ast_binop_e operator; 45 | struct kal_ast_node *lhs; 46 | struct kal_ast_node *rhs; 47 | } kal_ast_binary_expr; 48 | 49 | // Represents a function call in the AST. 50 | typedef struct kal_ast_call { 51 | char *name; 52 | struct kal_ast_node **args; 53 | unsigned int arg_count; 54 | } kal_ast_call; 55 | 56 | // Represents a function prototype in the AST. 57 | typedef struct kal_ast_prototype { 58 | char *name; 59 | char **args; 60 | unsigned int arg_count; 61 | } kal_ast_prototype; 62 | 63 | // Represents a function in the AST. 64 | typedef struct kal_ast_function { 65 | struct kal_ast_node *prototype; 66 | struct kal_ast_node *body; 67 | } kal_ast_function; 68 | 69 | // Represents an if statement in the AST. 70 | typedef struct kal_ast_if_expr { 71 | struct kal_ast_node *condition; 72 | struct kal_ast_node *true_expr; 73 | struct kal_ast_node *false_expr; 74 | } kal_ast_if_expr; 75 | 76 | // Represents an expression in the AST. 77 | typedef struct kal_ast_node { 78 | kal_ast_node_type_e type; 79 | union { 80 | kal_ast_number number; 81 | kal_ast_variable variable; 82 | kal_ast_binary_expr binary_expr; 83 | kal_ast_call call; 84 | kal_ast_prototype prototype; 85 | kal_ast_function function; 86 | kal_ast_if_expr if_expr; 87 | }; 88 | } kal_ast_node; 89 | 90 | 91 | 92 | //============================================================================== 93 | // 94 | // Functions 95 | // 96 | //============================================================================== 97 | 98 | kal_ast_node *kal_ast_number_create(double value); 99 | 100 | kal_ast_node *kal_ast_variable_create(char *name); 101 | 102 | kal_ast_node *kal_ast_binary_expr_create(kal_ast_binop_e operator, 103 | kal_ast_node *lhs, kal_ast_node *rhs); 104 | 105 | kal_ast_node *kal_ast_call_create(char *name, kal_ast_node **args, 106 | int arg_count); 107 | 108 | kal_ast_node *kal_ast_prototype_create(char *name, char **args, 109 | int arg_count); 110 | 111 | kal_ast_node *kal_ast_function_create(kal_ast_node *prototype, 112 | kal_ast_node *body); 113 | 114 | kal_ast_node *kal_ast_if_expr_create(kal_ast_node *condition, 115 | kal_ast_node *true_expr, kal_ast_node *false_expr); 116 | 117 | void kal_ast_node_free(kal_ast_node *node); 118 | 119 | #endif -------------------------------------------------------------------------------- /src/codegen.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "codegen.h" 8 | 9 | //============================================================================== 10 | // 11 | // Variables 12 | // 13 | //============================================================================== 14 | 15 | // The look up for variables by name. 16 | kal_named_value *named_values = NULL; 17 | 18 | 19 | //============================================================================== 20 | // 21 | // Functions 22 | // 23 | //============================================================================== 24 | 25 | //-------------------------------------- 26 | // Number 27 | //-------------------------------------- 28 | 29 | // Generates an LLVM value object for a Number AST. 30 | // 31 | // node - The node to generate code for. 32 | // 33 | // Returns an LLVM value reference. 34 | LLVMValueRef kal_codegen_number(kal_ast_node *node) 35 | { 36 | return LLVMConstReal(LLVMDoubleType(), node->number.value); 37 | } 38 | 39 | 40 | //-------------------------------------- 41 | // Variable 42 | //-------------------------------------- 43 | 44 | // Generates an LLVM value object for a Variable AST. 45 | // 46 | // node - The node to generate code for. 47 | // 48 | // Returns an LLVM value reference. 49 | LLVMValueRef kal_codegen_variable(kal_ast_node *node) 50 | { 51 | // Lookup variable reference. 52 | kal_named_value *val = NULL; 53 | HASH_FIND_STR(named_values, node->variable.name, val); 54 | 55 | if(val != NULL) { 56 | return val->value; 57 | } 58 | else { 59 | return NULL; 60 | } 61 | } 62 | 63 | 64 | //-------------------------------------- 65 | // Variable 66 | //-------------------------------------- 67 | 68 | // Generates an LLVM value object for a Binary Expression AST. 69 | // 70 | // node - The node to generate code for. 71 | // 72 | // Returns an LLVM value reference. 73 | LLVMValueRef kal_codegen_binary_expr(kal_ast_node *node, LLVMModuleRef module, 74 | LLVMBuilderRef builder) 75 | { 76 | // Evaluate left and right hand values. 77 | LLVMValueRef lhs = kal_codegen(node->binary_expr.lhs, module, builder); 78 | LLVMValueRef rhs = kal_codegen(node->binary_expr.rhs, module, builder); 79 | 80 | // Return NULL if one of the sides is invalid. 81 | if(lhs == NULL || rhs == NULL) { 82 | return NULL; 83 | } 84 | 85 | // Create different IR code depending on the operator. 86 | switch(node->binary_expr.operator) { 87 | case KAL_BINOP_PLUS: { 88 | return LLVMBuildFAdd(builder, lhs, rhs, "addtmp"); 89 | } 90 | case KAL_BINOP_MINUS: { 91 | return LLVMBuildFSub(builder, lhs, rhs, "subtmp"); 92 | } 93 | case KAL_BINOP_MUL: { 94 | return LLVMBuildFMul(builder, lhs, rhs, "multmp"); 95 | } 96 | case KAL_BINOP_DIV: { 97 | return LLVMBuildFDiv(builder, lhs, rhs, "divtmp"); 98 | } 99 | } 100 | 101 | return NULL; 102 | } 103 | 104 | 105 | //-------------------------------------- 106 | // Function Call 107 | //-------------------------------------- 108 | 109 | // Generates an LLVM value object for a Function Call AST. 110 | // 111 | // node - The node to generate code for. 112 | // 113 | // Returns an LLVM value reference. 114 | LLVMValueRef kal_codegen_call(kal_ast_node *node, LLVMModuleRef module, 115 | LLVMBuilderRef builder) 116 | { 117 | // Retrieve function. 118 | LLVMValueRef func = LLVMGetNamedFunction(module, node->call.name); 119 | 120 | // Return error if function not found in module. 121 | if(func == NULL) { 122 | return NULL; 123 | } 124 | 125 | // Return error if number of arguments doesn't match. 126 | if(LLVMCountParams(func) != node->call.arg_count) { 127 | return NULL; 128 | } 129 | 130 | // Evaluate arguments. 131 | LLVMValueRef *args = malloc(sizeof(LLVMValueRef) * node->call.arg_count); 132 | unsigned int i; 133 | unsigned int arg_count = node->call.arg_count; 134 | for(i=0; icall.args[i], module, builder); 136 | 137 | if(args[i] == NULL) { 138 | free(args); 139 | return NULL; 140 | } 141 | } 142 | 143 | // Create call instruction. 144 | return LLVMBuildCall(builder, func, args, arg_count, "calltmp"); 145 | } 146 | 147 | 148 | //-------------------------------------- 149 | // Function Prototype 150 | //-------------------------------------- 151 | 152 | // Generates an LLVM value object for a Function Prototype AST. 153 | // 154 | // node - The node to generate code for. 155 | // 156 | // Returns an LLVM value reference. 157 | LLVMValueRef kal_codegen_prototype(kal_ast_node *node, LLVMModuleRef module) 158 | { 159 | unsigned int i; 160 | unsigned int arg_count = node->prototype.arg_count; 161 | 162 | // Use an existing definition if one exists. 163 | LLVMValueRef func = LLVMGetNamedFunction(module, node->prototype.name); 164 | if(func != NULL) { 165 | // Verify parameter count matches. 166 | if(LLVMCountParams(func) != arg_count) { 167 | fprintf(stderr, "Existing function exists with different parameter count"); 168 | return NULL; 169 | } 170 | 171 | // Verify that the function is empty. 172 | if(LLVMCountBasicBlocks(func) != 0) { 173 | fprintf(stderr, "Existing function exists with a body"); 174 | return NULL; 175 | } 176 | } 177 | // Otherwise create a new function definition. 178 | else { 179 | // Create argument list. 180 | LLVMTypeRef *params = malloc(sizeof(LLVMTypeRef) * arg_count); 181 | for(i=0; iprototype.name, funcType); 190 | LLVMSetLinkage(func, LLVMExternalLinkage); 191 | } 192 | 193 | // Assign arguments to named values lookup. 194 | for(i=0; iprototype.args[i]); 197 | 198 | kal_named_value *val = malloc(sizeof(kal_named_value)); 199 | val->name = strdup(node->prototype.args[i]); 200 | val->value = param; 201 | HASH_ADD_KEYPTR(hh, named_values, val->name, strlen(val->name), val); 202 | } 203 | 204 | return func; 205 | } 206 | 207 | 208 | //-------------------------------------- 209 | // Function 210 | //-------------------------------------- 211 | 212 | // Generates an LLVM value object for a Function AST. 213 | // 214 | // node - The node to generate code for. 215 | // 216 | // Returns an LLVM value reference. 217 | LLVMValueRef kal_codegen_function(kal_ast_node *node, LLVMModuleRef module, 218 | LLVMBuilderRef builder) 219 | { 220 | HASH_CLEAR(hh, named_values); 221 | 222 | // Generate the prototype first. 223 | LLVMValueRef func = kal_codegen(node->function.prototype, module, builder); 224 | if(func == NULL) { 225 | return NULL; 226 | } 227 | 228 | // Create basic block. 229 | LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); 230 | LLVMPositionBuilderAtEnd(builder, block); 231 | 232 | // Generate body. 233 | LLVMValueRef body = kal_codegen(node->function.body, module, builder); 234 | if(body == NULL) { 235 | LLVMDeleteFunction(func); 236 | return NULL; 237 | } 238 | 239 | // Insert body as return vale. 240 | LLVMBuildRet(builder, body); 241 | 242 | // Verify function. 243 | if(LLVMVerifyFunction(func, LLVMPrintMessageAction) == 1) { 244 | fprintf(stderr, "Invalid function"); 245 | LLVMDeleteFunction(func); 246 | return NULL; 247 | } 248 | 249 | return func; 250 | } 251 | 252 | 253 | //-------------------------------------- 254 | // If Expression 255 | //-------------------------------------- 256 | 257 | // Generates an LLVM value object for an If Expression AST. 258 | // 259 | // node - The node to generate code for. 260 | // 261 | // Returns an LLVM value reference. 262 | LLVMValueRef kal_codegen_if_expr(kal_ast_node *node, LLVMModuleRef module, 263 | LLVMBuilderRef builder) 264 | { 265 | // Generate the condition. 266 | LLVMValueRef condition = kal_codegen(node->if_expr.condition, module, builder); 267 | if(condition == NULL) { 268 | return NULL; 269 | } 270 | 271 | // Convert condition to bool. 272 | LLVMValueRef zero = LLVMConstReal(LLVMDoubleType(), 0); 273 | condition = LLVMBuildFCmp(builder, LLVMRealONE, condition, zero, "ifcond"); 274 | 275 | // Retrieve function. 276 | LLVMValueRef func = LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)); 277 | 278 | // Generate true/false expr and merge. 279 | LLVMBasicBlockRef then_block = LLVMAppendBasicBlock(func, "then"); 280 | LLVMBasicBlockRef else_block = LLVMAppendBasicBlock(func, "else"); 281 | LLVMBasicBlockRef merge_block = LLVMAppendBasicBlock(func, "ifcont"); 282 | 283 | LLVMBuildCondBr(builder, condition, then_block, else_block); 284 | 285 | // Generate 'then' block. 286 | LLVMPositionBuilderAtEnd(builder, then_block); 287 | LLVMValueRef then_value = kal_codegen(node->if_expr.true_expr, module, builder); 288 | if(then_value == NULL) { 289 | return NULL; 290 | } 291 | 292 | LLVMBuildBr(builder, merge_block); 293 | then_block = LLVMGetInsertBlock(builder); 294 | 295 | LLVMPositionBuilderAtEnd(builder, else_block); 296 | LLVMValueRef else_value = kal_codegen(node->if_expr.false_expr, module, builder); 297 | if(else_value == NULL) { 298 | return NULL; 299 | } 300 | LLVMBuildBr(builder, merge_block); 301 | else_block = LLVMGetInsertBlock(builder); 302 | 303 | LLVMPositionBuilderAtEnd(builder, merge_block); 304 | LLVMValueRef phi = LLVMBuildPhi(builder, LLVMDoubleType (), ""); 305 | LLVMAddIncoming(phi, &then_value, &then_block, 1); 306 | LLVMAddIncoming(phi, &else_value, &else_block, 1); 307 | 308 | return phi; 309 | } 310 | 311 | 312 | 313 | 314 | //-------------------------------------- 315 | // Code Generation 316 | //-------------------------------------- 317 | 318 | // Recursively generates LLVM objects to build the code. 319 | // 320 | // node - The node to generate code for. 321 | // module - The module that the code is being generated for. 322 | // builder - The LLVM builder that is creating the IR. 323 | // 324 | // Returns an LLVM value reference. 325 | LLVMValueRef kal_codegen(kal_ast_node *node, LLVMModuleRef module, 326 | LLVMBuilderRef builder) 327 | { 328 | // Recursively free dependent data. 329 | switch(node->type) { 330 | case KAL_AST_TYPE_NUMBER: { 331 | return kal_codegen_number(node); 332 | } 333 | case KAL_AST_TYPE_VARIABLE: { 334 | return kal_codegen_variable(node); 335 | } 336 | case KAL_AST_TYPE_BINARY_EXPR: { 337 | return kal_codegen_binary_expr(node, module, builder); 338 | } 339 | case KAL_AST_TYPE_CALL: { 340 | return kal_codegen_call(node, module, builder); 341 | } 342 | case KAL_AST_TYPE_PROTOTYPE: { 343 | return kal_codegen_prototype(node, module); 344 | } 345 | case KAL_AST_TYPE_FUNCTION: { 346 | return kal_codegen_function(node, module, builder); 347 | } 348 | case KAL_AST_TYPE_IF_EXPR: { 349 | return kal_codegen_if_expr(node, module, builder); 350 | } 351 | } 352 | 353 | return NULL; 354 | } 355 | 356 | 357 | //-------------------------------------- 358 | // Utility 359 | //-------------------------------------- 360 | 361 | // Clears the named variables. 362 | void kal_codegen_reset() 363 | { 364 | HASH_CLEAR(hh, named_values); 365 | } 366 | 367 | // Clears the named variables. 368 | kal_named_value *kal_codegen_named_value(const char *name) 369 | { 370 | kal_named_value *val = NULL; 371 | HASH_FIND_STR(named_values, name, val); 372 | return val; 373 | } 374 | -------------------------------------------------------------------------------- /src/codegen.h: -------------------------------------------------------------------------------- 1 | #ifndef _codegen_h 2 | #define _codegen_h 3 | 4 | #include 5 | #include "ast.h" 6 | #include "uthash.h" 7 | 8 | 9 | //============================================================================== 10 | // 11 | // Typedefs 12 | // 13 | //============================================================================== 14 | 15 | // Used to hold references to arguments by name. 16 | typedef struct kal_named_value { 17 | const char *name; 18 | LLVMValueRef value; 19 | UT_hash_handle hh; 20 | } kal_named_value; 21 | 22 | 23 | //============================================================================== 24 | // 25 | // Functions 26 | // 27 | //============================================================================== 28 | 29 | //-------------------------------------- 30 | // Codegen 31 | //-------------------------------------- 32 | 33 | LLVMValueRef kal_codegen(kal_ast_node *node, LLVMModuleRef module, 34 | LLVMBuilderRef builder); 35 | 36 | 37 | //-------------------------------------- 38 | // Utility 39 | //-------------------------------------- 40 | 41 | void kal_codegen_reset(); 42 | 43 | kal_named_value *kal_codegen_named_value(const char *name); 44 | 45 | 46 | #endif -------------------------------------------------------------------------------- /src/kaleidoscope.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "ast.h" 9 | #include "parser.h" 10 | #include "codegen.h" 11 | 12 | //============================================================================== 13 | // 14 | // Main 15 | // 16 | //============================================================================== 17 | 18 | int main(int argc, char **argv) 19 | { 20 | LLVMModuleRef module = LLVMModuleCreateWithName("kal"); 21 | LLVMBuilderRef builder = LLVMCreateBuilder(); 22 | LLVMExecutionEngineRef engine; 23 | 24 | LLVMInitializeNativeTarget(); 25 | LLVMLinkInJIT(); 26 | 27 | // Create execution engine. 28 | char *msg; 29 | if(LLVMCreateExecutionEngineForModule(&engine, module, &msg) == 1) { 30 | fprintf(stderr, "%s\n", msg); 31 | LLVMDisposeMessage(msg); 32 | return 1; 33 | } 34 | 35 | // Setup optimizations. 36 | LLVMPassManagerRef pass_manager = LLVMCreateFunctionPassManagerForModule(module); 37 | LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass_manager); 38 | LLVMAddPromoteMemoryToRegisterPass(pass_manager); 39 | LLVMAddInstructionCombiningPass(pass_manager); 40 | LLVMAddReassociatePass(pass_manager); 41 | LLVMAddGVNPass(pass_manager); 42 | LLVMAddCFGSimplificationPass(pass_manager); 43 | LLVMInitializeFunctionPassManager(pass_manager); 44 | 45 | // Main REPL loop. 46 | while(1) { 47 | // Show prompt. 48 | fprintf(stderr, "ready > "); 49 | 50 | // Read input. 51 | char *input = NULL; 52 | size_t len = 0; 53 | 54 | if(getline(&input, &len, stdin) == -1) { 55 | fprintf(stderr, "Error reading from stdin\n"); 56 | break; 57 | } 58 | 59 | // Exit if 'quit' is read. 60 | if(strcmp(input, "quit\n") == 0) { 61 | break; 62 | } 63 | 64 | // Parse 65 | kal_ast_node *node = NULL; 66 | int rc = kal_parse(input, &node); 67 | if(rc != 0) { 68 | fprintf(stderr, "Parse error\n"); 69 | continue; 70 | } 71 | 72 | // Wrap in an anonymous function if it's a top-level expression. 73 | bool is_top_level = (node->type != KAL_AST_TYPE_FUNCTION && node->type != KAL_AST_TYPE_PROTOTYPE); 74 | if(is_top_level) { 75 | kal_ast_node *prototype = kal_ast_prototype_create("", NULL, 0); 76 | node = kal_ast_function_create(prototype, node); 77 | } 78 | 79 | // Generate node. 80 | LLVMValueRef value = kal_codegen(node, module, builder); 81 | if(value == NULL) { 82 | fprintf(stderr, "Unable to codegen for node\n"); 83 | continue; 84 | } 85 | 86 | // Dump IR. 87 | LLVMDumpValue(value); 88 | 89 | // Run it if it's a top level expression. 90 | if(is_top_level) { 91 | void *fp = LLVMGetPointerToGlobal(engine, value); 92 | double (*FP)() = (double (*)())(intptr_t)fp; 93 | fprintf(stderr, "Evaluted to %f\n", FP()); 94 | } 95 | // If this is a function then optimize it. 96 | else if(node->type == KAL_AST_TYPE_FUNCTION) { 97 | LLVMRunFunctionPassManager(pass_manager, value); 98 | } 99 | 100 | // Clean up. 101 | kal_ast_node_free(node); 102 | } 103 | 104 | // Dump entire module. 105 | LLVMDumpModule(module); 106 | 107 | LLVMDisposePassManager(pass_manager); 108 | LLVMDisposeBuilder(builder); 109 | LLVMDisposeModule(module); 110 | 111 | return 0; 112 | } 113 | 114 | -------------------------------------------------------------------------------- /src/lexer.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "ast.h" 3 | #include "parser.h" 4 | #define SAVE_STRING yylval->string = strndup(yytext, yyleng) 5 | #define SAVE_NUMBER yylval->number = atof(yytext) 6 | #define TOKEN(t) (yylval->token = t) 7 | %} 8 | 9 | %option noyywrap 10 | %option reentrant 11 | %option bison-bridge 12 | 13 | 14 | %% 15 | 16 | "def" return TOKEN(TDEF); 17 | "extern" return TOKEN(TEXTERN); 18 | "if" return TOKEN(TIF); 19 | "then" return TOKEN(TTHEN); 20 | "else" return TOKEN(TELSE); 21 | [ \t\n] ; 22 | [a-zA-Z_][a-zA-Z0-9_]* SAVE_STRING; return TIDENTIFIER; 23 | [0-9]* SAVE_NUMBER; return TNUMBER; 24 | "=" return TOKEN(TEQUAL); 25 | "==" return TOKEN(TCEQ); 26 | "!=" return TOKEN(TCNE); 27 | "<" return TOKEN(TCLT); 28 | "<=" return TOKEN(TCLE); 29 | ">" return TOKEN(TCGT); 30 | ">=" return TOKEN(TCGE); 31 | "(" return TOKEN(TLPAREN); 32 | ")" return TOKEN(TRPAREN); 33 | "{" return TOKEN(TLBRACE); 34 | "}" return TOKEN(TRBRACE); 35 | "." return TOKEN(TDOT); 36 | "," return TOKEN(TCOMMA); 37 | "+" return TOKEN(TPLUS); 38 | "-" return TOKEN(TMINUS); 39 | "*" return TOKEN(TMUL); 40 | "/" return TOKEN(TDIV); 41 | . printf("Unknown token!\n"); yyterminate(); 42 | 43 | %% -------------------------------------------------------------------------------- /src/parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "stdio.h" 3 | #include "ast.h" 4 | #include "parser.h" 5 | #include "lexer.h" 6 | kal_ast_node *root; 7 | extern int yylex(); 8 | void yyerror(void *scanner, const char *s) { printf("ERROR: %s\n", s); } 9 | %} 10 | 11 | %debug 12 | %pure-parser 13 | %lex-param {void *scanner} 14 | %parse-param {void *scanner} 15 | 16 | %code provides { 17 | int kal_parse(char *text, kal_ast_node **node); 18 | } 19 | 20 | %code top { 21 | void free_args(void **args, int count); 22 | } 23 | 24 | %union { 25 | char *string; 26 | double number; 27 | kal_ast_node *node; 28 | struct { 29 | kal_ast_node **args; 30 | int count; 31 | } call_args; 32 | struct { 33 | char **args; 34 | int count; 35 | } proto_args; 36 | int token; 37 | } 38 | 39 | %token TIDENTIFIER 40 | %token TNUMBER 41 | %token TCEQ TCNE TCLT TCLE TCGT TCGE TEQUAL 42 | %token TLPAREN TRPAREN TLBRACE TRBRACE TCOMMA TDOT 43 | %token TPLUS TMINUS TMUL TDIV 44 | %token TEXTERN TDEF 45 | %token TIF TTHEN TELSE 46 | 47 | %type expr ident number call prototype extern_func function if_expr 48 | %type call_args 49 | %type proto_args 50 | 51 | %left TPLUS TMINUS 52 | %left TMUL TDIV 53 | %left TELSE 54 | 55 | %start program 56 | 57 | %% 58 | 59 | program : /* empty */ 60 | | extern_func { root = $1; } 61 | | function { root = $1; } 62 | | expr { root = $1; } 63 | ; 64 | 65 | ident : TIDENTIFIER { $$ = kal_ast_variable_create($1); free($1); }; 66 | 67 | number : TNUMBER { $$ = kal_ast_number_create($1);}; 68 | 69 | function : TDEF prototype expr { $$ = kal_ast_function_create($2, $3); }; 70 | 71 | call : TIDENTIFIER TLPAREN call_args TRPAREN { $$ = kal_ast_call_create($1, $3.args, $3.count); free($1); free($3.args); }; 72 | 73 | call_args : /* empty */ { $$.count = 0; $$.args = NULL; } 74 | | expr { $$.count = 1; $$.args = malloc(sizeof(kal_ast_node*)); $$.args[0] = $1; } 75 | | call_args TCOMMA expr { $1.count++; $1.args = realloc($1.args, sizeof(kal_ast_node*) * $1.count); $1.args[$1.count-1] = $3; $$ = $1; } 76 | ; 77 | 78 | prototype : TIDENTIFIER TLPAREN proto_args TRPAREN { $$ = kal_ast_prototype_create($1, $3.args, $3.count); free($1); free_args((void**)$3.args, $3.count); }; 79 | 80 | proto_args : /* empty */ { $$.count = 0; $$.args = NULL; } 81 | | TIDENTIFIER { $$.count = 1; $$.args = malloc(sizeof(char*)); $$.args[0] = strdup($1); } 82 | | proto_args TCOMMA TIDENTIFIER { $1.count++; $1.args = realloc($1.args, sizeof(char*) * $1.count); $1.args[$1.count-1] = strdup($3); $$ = $1; } 83 | ; 84 | 85 | extern_func : TEXTERN prototype { $$ = $2; }; 86 | 87 | if_expr : TIF expr TTHEN expr TELSE expr { $$ = kal_ast_if_expr_create($2, $4, $6); }; 88 | 89 | expr : expr TPLUS expr { $$ = kal_ast_binary_expr_create(KAL_BINOP_PLUS, $1, $3); } 90 | | expr TMINUS expr { $$ = kal_ast_binary_expr_create(KAL_BINOP_MINUS, $1, $3); } 91 | | expr TMUL expr { $$ = kal_ast_binary_expr_create(KAL_BINOP_MUL, $1, $3); } 92 | | expr TDIV expr { $$ = kal_ast_binary_expr_create(KAL_BINOP_DIV, $1, $3); } 93 | | if_expr 94 | | number 95 | | ident 96 | | call 97 | | TLPAREN expr TRPAREN { $$ = $2; } 98 | ; 99 | 100 | %% 101 | 102 | 103 | //============================================================================== 104 | // 105 | // Functions 106 | // 107 | //============================================================================== 108 | 109 | // Parses a string that contains Kaleidoscope program text. 110 | // 111 | // text - The text containing the kaleidoscope program. 112 | // node - The pointer to where the root AST node should be returned. 113 | // 114 | // Returns 0 if successful, otherwise returns -1. 115 | int kal_parse(char *text, kal_ast_node **node) 116 | { 117 | // yydebug = 1; 118 | 119 | // Parse using Bison. 120 | yyscan_t scanner; 121 | yylex_init(&scanner); 122 | YY_BUFFER_STATE buffer = yy_scan_string(text, scanner); 123 | int rc = yyparse(scanner); 124 | yy_delete_buffer(buffer, scanner); 125 | yylex_destroy(scanner); 126 | 127 | // If parse was successful, return root node. 128 | if(rc == 0) { 129 | *node = root; 130 | return 0; 131 | } 132 | // Otherwise return error. 133 | else { 134 | return -1; 135 | } 136 | } 137 | 138 | // Frees an array and all the elements of the array. 139 | // 140 | // args - The array to free. 141 | // count - The number of elements in the array. 142 | void free_args(void **args, int count) 143 | { 144 | int i; 145 | for(i=0; i /* memcmp,strlen */ 28 | #include /* ptrdiff_t */ 29 | #include /* exit() */ 30 | 31 | /* These macros use decltype or the earlier __typeof GNU extension. 32 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 | when compiling c++ source) this code uses whatever method is needed 34 | or, for VS2008 where neither is available, uses casting workarounds. */ 35 | #ifdef _MSC_VER /* MS compiler */ 36 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 | #define DECLTYPE(x) (decltype(x)) 38 | #else /* VS2008 or older (or VS2010 in C mode) */ 39 | #define NO_DECLTYPE 40 | #define DECLTYPE(x) 41 | #endif 42 | #else /* GNU, Sun and other compilers */ 43 | #define DECLTYPE(x) (__typeof(x)) 44 | #endif 45 | 46 | #ifdef NO_DECLTYPE 47 | #define DECLTYPE_ASSIGN(dst,src) \ 48 | do { \ 49 | char **_da_dst = (char**)(&(dst)); \ 50 | *_da_dst = (char*)(src); \ 51 | } while(0) 52 | #else 53 | #define DECLTYPE_ASSIGN(dst,src) \ 54 | do { \ 55 | (dst) = DECLTYPE(dst)(src); \ 56 | } while(0) 57 | #endif 58 | 59 | /* a number of the hash function use uint32_t which isn't defined on win32 */ 60 | #ifdef _MSC_VER 61 | typedef unsigned int uint32_t; 62 | typedef unsigned char uint8_t; 63 | #else 64 | #include /* uint32_t */ 65 | #endif 66 | 67 | #define UTHASH_VERSION 1.9.6 68 | 69 | #ifndef uthash_fatal 70 | #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 71 | #endif 72 | #ifndef uthash_malloc 73 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 74 | #endif 75 | #ifndef uthash_free 76 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 77 | #endif 78 | 79 | #ifndef uthash_noexpand_fyi 80 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 81 | #endif 82 | #ifndef uthash_expand_fyi 83 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 84 | #endif 85 | 86 | /* initial number of buckets */ 87 | #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ 88 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ 89 | #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ 90 | 91 | /* calculate the element whose hash handle address is hhe */ 92 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 93 | 94 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 95 | do { \ 96 | unsigned _hf_bkt,_hf_hashv; \ 97 | out=NULL; \ 98 | if (head) { \ 99 | HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 100 | if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ 101 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 102 | keyptr,keylen,out); \ 103 | } \ 104 | } \ 105 | } while (0) 106 | 107 | #ifdef HASH_BLOOM 108 | #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) 109 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) 110 | #define HASH_BLOOM_MAKE(tbl) \ 111 | do { \ 112 | (tbl)->bloom_nbits = HASH_BLOOM; \ 113 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 114 | if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 115 | memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 116 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 117 | } while (0) 118 | 119 | #define HASH_BLOOM_FREE(tbl) \ 120 | do { \ 121 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 122 | } while (0) 123 | 124 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) 125 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) 126 | 127 | #define HASH_BLOOM_ADD(tbl,hashv) \ 128 | HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 129 | 130 | #define HASH_BLOOM_TEST(tbl,hashv) \ 131 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 132 | 133 | #else 134 | #define HASH_BLOOM_MAKE(tbl) 135 | #define HASH_BLOOM_FREE(tbl) 136 | #define HASH_BLOOM_ADD(tbl,hashv) 137 | #define HASH_BLOOM_TEST(tbl,hashv) (1) 138 | #endif 139 | 140 | #define HASH_MAKE_TABLE(hh,head) \ 141 | do { \ 142 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 143 | sizeof(UT_hash_table)); \ 144 | if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 145 | memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 146 | (head)->hh.tbl->tail = &((head)->hh); \ 147 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 148 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 149 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 150 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 151 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 152 | if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 153 | memset((head)->hh.tbl->buckets, 0, \ 154 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 155 | HASH_BLOOM_MAKE((head)->hh.tbl); \ 156 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 157 | } while(0) 158 | 159 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 160 | HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) 161 | 162 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 163 | do { \ 164 | unsigned _ha_bkt; \ 165 | (add)->hh.next = NULL; \ 166 | (add)->hh.key = (char*)keyptr; \ 167 | (add)->hh.keylen = (unsigned)keylen_in; \ 168 | if (!(head)) { \ 169 | head = (add); \ 170 | (head)->hh.prev = NULL; \ 171 | HASH_MAKE_TABLE(hh,head); \ 172 | } else { \ 173 | (head)->hh.tbl->tail->next = (add); \ 174 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 175 | (head)->hh.tbl->tail = &((add)->hh); \ 176 | } \ 177 | (head)->hh.tbl->num_items++; \ 178 | (add)->hh.tbl = (head)->hh.tbl; \ 179 | HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 180 | (add)->hh.hashv, _ha_bkt); \ 181 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 182 | HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 183 | HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 184 | HASH_FSCK(hh,head); \ 185 | } while(0) 186 | 187 | #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 188 | do { \ 189 | bkt = ((hashv) & ((num_bkts) - 1)); \ 190 | } while(0) 191 | 192 | /* delete "delptr" from the hash table. 193 | * "the usual" patch-up process for the app-order doubly-linked-list. 194 | * The use of _hd_hh_del below deserves special explanation. 195 | * These used to be expressed using (delptr) but that led to a bug 196 | * if someone used the same symbol for the head and deletee, like 197 | * HASH_DELETE(hh,users,users); 198 | * We want that to work, but by changing the head (users) below 199 | * we were forfeiting our ability to further refer to the deletee (users) 200 | * in the patch-up process. Solution: use scratch space to 201 | * copy the deletee pointer, then the latter references are via that 202 | * scratch pointer rather than through the repointed (users) symbol. 203 | */ 204 | #define HASH_DELETE(hh,head,delptr) \ 205 | do { \ 206 | unsigned _hd_bkt; \ 207 | struct UT_hash_handle *_hd_hh_del; \ 208 | if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 209 | uthash_free((head)->hh.tbl->buckets, \ 210 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 211 | HASH_BLOOM_FREE((head)->hh.tbl); \ 212 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 213 | head = NULL; \ 214 | } else { \ 215 | _hd_hh_del = &((delptr)->hh); \ 216 | if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 217 | (head)->hh.tbl->tail = \ 218 | (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 219 | (head)->hh.tbl->hho); \ 220 | } \ 221 | if ((delptr)->hh.prev) { \ 222 | ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 223 | (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 224 | } else { \ 225 | DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 226 | } \ 227 | if (_hd_hh_del->next) { \ 228 | ((UT_hash_handle*)((char*)_hd_hh_del->next + \ 229 | (head)->hh.tbl->hho))->prev = \ 230 | _hd_hh_del->prev; \ 231 | } \ 232 | HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 233 | HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 234 | (head)->hh.tbl->num_items--; \ 235 | } \ 236 | HASH_FSCK(hh,head); \ 237 | } while (0) 238 | 239 | 240 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 241 | #define HASH_FIND_STR(head,findstr,out) \ 242 | HASH_FIND(hh,head,findstr,strlen(findstr),out) 243 | #define HASH_ADD_STR(head,strfield,add) \ 244 | HASH_ADD(hh,head,strfield,strlen(add->strfield),add) 245 | #define HASH_FIND_INT(head,findint,out) \ 246 | HASH_FIND(hh,head,findint,sizeof(int),out) 247 | #define HASH_ADD_INT(head,intfield,add) \ 248 | HASH_ADD(hh,head,intfield,sizeof(int),add) 249 | #define HASH_FIND_PTR(head,findptr,out) \ 250 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 251 | #define HASH_ADD_PTR(head,ptrfield,add) \ 252 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 253 | #define HASH_DEL(head,delptr) \ 254 | HASH_DELETE(hh,head,delptr) 255 | 256 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 257 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 258 | */ 259 | #ifdef HASH_DEBUG 260 | #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 261 | #define HASH_FSCK(hh,head) \ 262 | do { \ 263 | unsigned _bkt_i; \ 264 | unsigned _count, _bkt_count; \ 265 | char *_prev; \ 266 | struct UT_hash_handle *_thh; \ 267 | if (head) { \ 268 | _count = 0; \ 269 | for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 270 | _bkt_count = 0; \ 271 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 272 | _prev = NULL; \ 273 | while (_thh) { \ 274 | if (_prev != (char*)(_thh->hh_prev)) { \ 275 | HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 276 | _thh->hh_prev, _prev ); \ 277 | } \ 278 | _bkt_count++; \ 279 | _prev = (char*)(_thh); \ 280 | _thh = _thh->hh_next; \ 281 | } \ 282 | _count += _bkt_count; \ 283 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 284 | HASH_OOPS("invalid bucket count %d, actual %d\n", \ 285 | (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 286 | } \ 287 | } \ 288 | if (_count != (head)->hh.tbl->num_items) { \ 289 | HASH_OOPS("invalid hh item count %d, actual %d\n", \ 290 | (head)->hh.tbl->num_items, _count ); \ 291 | } \ 292 | /* traverse hh in app order; check next/prev integrity, count */ \ 293 | _count = 0; \ 294 | _prev = NULL; \ 295 | _thh = &(head)->hh; \ 296 | while (_thh) { \ 297 | _count++; \ 298 | if (_prev !=(char*)(_thh->prev)) { \ 299 | HASH_OOPS("invalid prev %p, actual %p\n", \ 300 | _thh->prev, _prev ); \ 301 | } \ 302 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 303 | _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 304 | (head)->hh.tbl->hho) : NULL ); \ 305 | } \ 306 | if (_count != (head)->hh.tbl->num_items) { \ 307 | HASH_OOPS("invalid app item count %d, actual %d\n", \ 308 | (head)->hh.tbl->num_items, _count ); \ 309 | } \ 310 | } \ 311 | } while (0) 312 | #else 313 | #define HASH_FSCK(hh,head) 314 | #endif 315 | 316 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 317 | * the descriptor to which this macro is defined for tuning the hash function. 318 | * The app can #include to get the prototype for write(2). */ 319 | #ifdef HASH_EMIT_KEYS 320 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 321 | do { \ 322 | unsigned _klen = fieldlen; \ 323 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 324 | write(HASH_EMIT_KEYS, keyptr, fieldlen); \ 325 | } while (0) 326 | #else 327 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 328 | #endif 329 | 330 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 331 | #ifdef HASH_FUNCTION 332 | #define HASH_FCN HASH_FUNCTION 333 | #else 334 | #define HASH_FCN HASH_JEN 335 | #endif 336 | 337 | /* The Bernstein hash function, used in Perl prior to v5.6 */ 338 | #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 339 | do { \ 340 | unsigned _hb_keylen=keylen; \ 341 | char *_hb_key=(char*)(key); \ 342 | (hashv) = 0; \ 343 | while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ 344 | bkt = (hashv) & (num_bkts-1); \ 345 | } while (0) 346 | 347 | 348 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 349 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 350 | #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 351 | do { \ 352 | unsigned _sx_i; \ 353 | char *_hs_key=(char*)(key); \ 354 | hashv = 0; \ 355 | for(_sx_i=0; _sx_i < keylen; _sx_i++) \ 356 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 357 | bkt = hashv & (num_bkts-1); \ 358 | } while (0) 359 | 360 | #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 361 | do { \ 362 | unsigned _fn_i; \ 363 | char *_hf_key=(char*)(key); \ 364 | hashv = 2166136261UL; \ 365 | for(_fn_i=0; _fn_i < keylen; _fn_i++) \ 366 | hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ 367 | bkt = hashv & (num_bkts-1); \ 368 | } while(0) 369 | 370 | #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 371 | do { \ 372 | unsigned _ho_i; \ 373 | char *_ho_key=(char*)(key); \ 374 | hashv = 0; \ 375 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 376 | hashv += _ho_key[_ho_i]; \ 377 | hashv += (hashv << 10); \ 378 | hashv ^= (hashv >> 6); \ 379 | } \ 380 | hashv += (hashv << 3); \ 381 | hashv ^= (hashv >> 11); \ 382 | hashv += (hashv << 15); \ 383 | bkt = hashv & (num_bkts-1); \ 384 | } while(0) 385 | 386 | #define HASH_JEN_MIX(a,b,c) \ 387 | do { \ 388 | a -= b; a -= c; a ^= ( c >> 13 ); \ 389 | b -= c; b -= a; b ^= ( a << 8 ); \ 390 | c -= a; c -= b; c ^= ( b >> 13 ); \ 391 | a -= b; a -= c; a ^= ( c >> 12 ); \ 392 | b -= c; b -= a; b ^= ( a << 16 ); \ 393 | c -= a; c -= b; c ^= ( b >> 5 ); \ 394 | a -= b; a -= c; a ^= ( c >> 3 ); \ 395 | b -= c; b -= a; b ^= ( a << 10 ); \ 396 | c -= a; c -= b; c ^= ( b >> 15 ); \ 397 | } while (0) 398 | 399 | #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 400 | do { \ 401 | unsigned _hj_i,_hj_j,_hj_k; \ 402 | char *_hj_key=(char*)(key); \ 403 | hashv = 0xfeedbeef; \ 404 | _hj_i = _hj_j = 0x9e3779b9; \ 405 | _hj_k = (unsigned)keylen; \ 406 | while (_hj_k >= 12) { \ 407 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 408 | + ( (unsigned)_hj_key[2] << 16 ) \ 409 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 410 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 411 | + ( (unsigned)_hj_key[6] << 16 ) \ 412 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 413 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 414 | + ( (unsigned)_hj_key[10] << 16 ) \ 415 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 416 | \ 417 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 418 | \ 419 | _hj_key += 12; \ 420 | _hj_k -= 12; \ 421 | } \ 422 | hashv += keylen; \ 423 | switch ( _hj_k ) { \ 424 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ 425 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ 426 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ 427 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ 428 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ 429 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ 430 | case 5: _hj_j += _hj_key[4]; \ 431 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ 432 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ 433 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ 434 | case 1: _hj_i += _hj_key[0]; \ 435 | } \ 436 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 437 | bkt = hashv & (num_bkts-1); \ 438 | } while(0) 439 | 440 | /* The Paul Hsieh hash function */ 441 | #undef get16bits 442 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 443 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 444 | #define get16bits(d) (*((const uint16_t *) (d))) 445 | #endif 446 | 447 | #if !defined (get16bits) 448 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 449 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 450 | #endif 451 | #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 452 | do { \ 453 | char *_sfh_key=(char*)(key); \ 454 | uint32_t _sfh_tmp, _sfh_len = keylen; \ 455 | \ 456 | int _sfh_rem = _sfh_len & 3; \ 457 | _sfh_len >>= 2; \ 458 | hashv = 0xcafebabe; \ 459 | \ 460 | /* Main loop */ \ 461 | for (;_sfh_len > 0; _sfh_len--) { \ 462 | hashv += get16bits (_sfh_key); \ 463 | _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ 464 | hashv = (hashv << 16) ^ _sfh_tmp; \ 465 | _sfh_key += 2*sizeof (uint16_t); \ 466 | hashv += hashv >> 11; \ 467 | } \ 468 | \ 469 | /* Handle end cases */ \ 470 | switch (_sfh_rem) { \ 471 | case 3: hashv += get16bits (_sfh_key); \ 472 | hashv ^= hashv << 16; \ 473 | hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ 474 | hashv += hashv >> 11; \ 475 | break; \ 476 | case 2: hashv += get16bits (_sfh_key); \ 477 | hashv ^= hashv << 11; \ 478 | hashv += hashv >> 17; \ 479 | break; \ 480 | case 1: hashv += *_sfh_key; \ 481 | hashv ^= hashv << 10; \ 482 | hashv += hashv >> 1; \ 483 | } \ 484 | \ 485 | /* Force "avalanching" of final 127 bits */ \ 486 | hashv ^= hashv << 3; \ 487 | hashv += hashv >> 5; \ 488 | hashv ^= hashv << 4; \ 489 | hashv += hashv >> 17; \ 490 | hashv ^= hashv << 25; \ 491 | hashv += hashv >> 6; \ 492 | bkt = hashv & (num_bkts-1); \ 493 | } while(0) 494 | 495 | #ifdef HASH_USING_NO_STRICT_ALIASING 496 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 497 | * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 498 | * MurmurHash uses the faster approach only on CPU's where we know it's safe. 499 | * 500 | * Note the preprocessor built-in defines can be emitted using: 501 | * 502 | * gcc -m64 -dM -E - < /dev/null (on gcc) 503 | * cc -## a.c (where a.c is a simple test file) (Sun Studio) 504 | */ 505 | #if (defined(__i386__) || defined(__x86_64__)) 506 | #define MUR_GETBLOCK(p,i) p[i] 507 | #else /* non intel */ 508 | #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) 509 | #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) 510 | #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) 511 | #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) 512 | #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 513 | #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 514 | #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 515 | #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 516 | #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 517 | #else /* assume little endian non-intel */ 518 | #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 519 | #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 520 | #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 521 | #endif 522 | #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 523 | (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 524 | (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 525 | MUR_ONE_THREE(p)))) 526 | #endif 527 | #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 528 | #define MUR_FMIX(_h) \ 529 | do { \ 530 | _h ^= _h >> 16; \ 531 | _h *= 0x85ebca6b; \ 532 | _h ^= _h >> 13; \ 533 | _h *= 0xc2b2ae35l; \ 534 | _h ^= _h >> 16; \ 535 | } while(0) 536 | 537 | #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 538 | do { \ 539 | const uint8_t *_mur_data = (const uint8_t*)(key); \ 540 | const int _mur_nblocks = (keylen) / 4; \ 541 | uint32_t _mur_h1 = 0xf88D5353; \ 542 | uint32_t _mur_c1 = 0xcc9e2d51; \ 543 | uint32_t _mur_c2 = 0x1b873593; \ 544 | const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ 545 | int _mur_i; \ 546 | for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ 547 | uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 548 | _mur_k1 *= _mur_c1; \ 549 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 550 | _mur_k1 *= _mur_c2; \ 551 | \ 552 | _mur_h1 ^= _mur_k1; \ 553 | _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 554 | _mur_h1 = _mur_h1*5+0xe6546b64; \ 555 | } \ 556 | const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ 557 | uint32_t _mur_k1=0; \ 558 | switch((keylen) & 3) { \ 559 | case 3: _mur_k1 ^= _mur_tail[2] << 16; \ 560 | case 2: _mur_k1 ^= _mur_tail[1] << 8; \ 561 | case 1: _mur_k1 ^= _mur_tail[0]; \ 562 | _mur_k1 *= _mur_c1; \ 563 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 564 | _mur_k1 *= _mur_c2; \ 565 | _mur_h1 ^= _mur_k1; \ 566 | } \ 567 | _mur_h1 ^= (keylen); \ 568 | MUR_FMIX(_mur_h1); \ 569 | hashv = _mur_h1; \ 570 | bkt = hashv & (num_bkts-1); \ 571 | } while(0) 572 | #endif /* HASH_USING_NO_STRICT_ALIASING */ 573 | 574 | /* key comparison function; return 0 if keys equal */ 575 | #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 576 | 577 | /* iterate over items in a known bucket to find desired item */ 578 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 579 | do { \ 580 | if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ 581 | else out=NULL; \ 582 | while (out) { \ 583 | if ((out)->hh.keylen == keylen_in) { \ 584 | if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \ 585 | } \ 586 | if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \ 587 | else out = NULL; \ 588 | } \ 589 | } while(0) 590 | 591 | /* add an item to a bucket */ 592 | #define HASH_ADD_TO_BKT(head,addhh) \ 593 | do { \ 594 | head.count++; \ 595 | (addhh)->hh_next = head.hh_head; \ 596 | (addhh)->hh_prev = NULL; \ 597 | if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ 598 | (head).hh_head=addhh; \ 599 | if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ 600 | && (addhh)->tbl->noexpand != 1) { \ 601 | HASH_EXPAND_BUCKETS((addhh)->tbl); \ 602 | } \ 603 | } while(0) 604 | 605 | /* remove an item from a given bucket */ 606 | #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 607 | (head).count--; \ 608 | if ((head).hh_head == hh_del) { \ 609 | (head).hh_head = hh_del->hh_next; \ 610 | } \ 611 | if (hh_del->hh_prev) { \ 612 | hh_del->hh_prev->hh_next = hh_del->hh_next; \ 613 | } \ 614 | if (hh_del->hh_next) { \ 615 | hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 616 | } 617 | 618 | /* Bucket expansion has the effect of doubling the number of buckets 619 | * and redistributing the items into the new buckets. Ideally the 620 | * items will distribute more or less evenly into the new buckets 621 | * (the extent to which this is true is a measure of the quality of 622 | * the hash function as it applies to the key domain). 623 | * 624 | * With the items distributed into more buckets, the chain length 625 | * (item count) in each bucket is reduced. Thus by expanding buckets 626 | * the hash keeps a bound on the chain length. This bounded chain 627 | * length is the essence of how a hash provides constant time lookup. 628 | * 629 | * The calculation of tbl->ideal_chain_maxlen below deserves some 630 | * explanation. First, keep in mind that we're calculating the ideal 631 | * maximum chain length based on the *new* (doubled) bucket count. 632 | * In fractions this is just n/b (n=number of items,b=new num buckets). 633 | * Since the ideal chain length is an integer, we want to calculate 634 | * ceil(n/b). We don't depend on floating point arithmetic in this 635 | * hash, so to calculate ceil(n/b) with integers we could write 636 | * 637 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 638 | * 639 | * and in fact a previous version of this hash did just that. 640 | * But now we have improved things a bit by recognizing that b is 641 | * always a power of two. We keep its base 2 log handy (call it lb), 642 | * so now we can write this with a bit shift and logical AND: 643 | * 644 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 645 | * 646 | */ 647 | #define HASH_EXPAND_BUCKETS(tbl) \ 648 | do { \ 649 | unsigned _he_bkt; \ 650 | unsigned _he_bkt_i; \ 651 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 652 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 653 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 654 | 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 655 | if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 656 | memset(_he_new_buckets, 0, \ 657 | 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 658 | tbl->ideal_chain_maxlen = \ 659 | (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ 660 | ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ 661 | tbl->nonideal_items = 0; \ 662 | for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 663 | { \ 664 | _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 665 | while (_he_thh) { \ 666 | _he_hh_nxt = _he_thh->hh_next; \ 667 | HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ 668 | _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 669 | if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 670 | tbl->nonideal_items++; \ 671 | _he_newbkt->expand_mult = _he_newbkt->count / \ 672 | tbl->ideal_chain_maxlen; \ 673 | } \ 674 | _he_thh->hh_prev = NULL; \ 675 | _he_thh->hh_next = _he_newbkt->hh_head; \ 676 | if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ 677 | _he_thh; \ 678 | _he_newbkt->hh_head = _he_thh; \ 679 | _he_thh = _he_hh_nxt; \ 680 | } \ 681 | } \ 682 | uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 683 | tbl->num_buckets *= 2; \ 684 | tbl->log2_num_buckets++; \ 685 | tbl->buckets = _he_new_buckets; \ 686 | tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 687 | (tbl->ineff_expands+1) : 0; \ 688 | if (tbl->ineff_expands > 1) { \ 689 | tbl->noexpand=1; \ 690 | uthash_noexpand_fyi(tbl); \ 691 | } \ 692 | uthash_expand_fyi(tbl); \ 693 | } while(0) 694 | 695 | 696 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 697 | /* Note that HASH_SORT assumes the hash handle name to be hh. 698 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 699 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 700 | #define HASH_SRT(hh,head,cmpfcn) \ 701 | do { \ 702 | unsigned _hs_i; \ 703 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 704 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 705 | if (head) { \ 706 | _hs_insize = 1; \ 707 | _hs_looping = 1; \ 708 | _hs_list = &((head)->hh); \ 709 | while (_hs_looping) { \ 710 | _hs_p = _hs_list; \ 711 | _hs_list = NULL; \ 712 | _hs_tail = NULL; \ 713 | _hs_nmerges = 0; \ 714 | while (_hs_p) { \ 715 | _hs_nmerges++; \ 716 | _hs_q = _hs_p; \ 717 | _hs_psize = 0; \ 718 | for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 719 | _hs_psize++; \ 720 | _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 721 | ((void*)((char*)(_hs_q->next) + \ 722 | (head)->hh.tbl->hho)) : NULL); \ 723 | if (! (_hs_q) ) break; \ 724 | } \ 725 | _hs_qsize = _hs_insize; \ 726 | while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ 727 | if (_hs_psize == 0) { \ 728 | _hs_e = _hs_q; \ 729 | _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 730 | ((void*)((char*)(_hs_q->next) + \ 731 | (head)->hh.tbl->hho)) : NULL); \ 732 | _hs_qsize--; \ 733 | } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ 734 | _hs_e = _hs_p; \ 735 | _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 736 | ((void*)((char*)(_hs_p->next) + \ 737 | (head)->hh.tbl->hho)) : NULL); \ 738 | _hs_psize--; \ 739 | } else if (( \ 740 | cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 741 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 742 | ) <= 0) { \ 743 | _hs_e = _hs_p; \ 744 | _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 745 | ((void*)((char*)(_hs_p->next) + \ 746 | (head)->hh.tbl->hho)) : NULL); \ 747 | _hs_psize--; \ 748 | } else { \ 749 | _hs_e = _hs_q; \ 750 | _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 751 | ((void*)((char*)(_hs_q->next) + \ 752 | (head)->hh.tbl->hho)) : NULL); \ 753 | _hs_qsize--; \ 754 | } \ 755 | if ( _hs_tail ) { \ 756 | _hs_tail->next = ((_hs_e) ? \ 757 | ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 758 | } else { \ 759 | _hs_list = _hs_e; \ 760 | } \ 761 | _hs_e->prev = ((_hs_tail) ? \ 762 | ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 763 | _hs_tail = _hs_e; \ 764 | } \ 765 | _hs_p = _hs_q; \ 766 | } \ 767 | _hs_tail->next = NULL; \ 768 | if ( _hs_nmerges <= 1 ) { \ 769 | _hs_looping=0; \ 770 | (head)->hh.tbl->tail = _hs_tail; \ 771 | DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 772 | } \ 773 | _hs_insize *= 2; \ 774 | } \ 775 | HASH_FSCK(hh,head); \ 776 | } \ 777 | } while (0) 778 | 779 | /* This function selects items from one hash into another hash. 780 | * The end result is that the selected items have dual presence 781 | * in both hashes. There is no copy of the items made; rather 782 | * they are added into the new hash through a secondary hash 783 | * hash handle that must be present in the structure. */ 784 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 785 | do { \ 786 | unsigned _src_bkt, _dst_bkt; \ 787 | void *_last_elt=NULL, *_elt; \ 788 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 789 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 790 | if (src) { \ 791 | for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 792 | for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 793 | _src_hh; \ 794 | _src_hh = _src_hh->hh_next) { \ 795 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 796 | if (cond(_elt)) { \ 797 | _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 798 | _dst_hh->key = _src_hh->key; \ 799 | _dst_hh->keylen = _src_hh->keylen; \ 800 | _dst_hh->hashv = _src_hh->hashv; \ 801 | _dst_hh->prev = _last_elt; \ 802 | _dst_hh->next = NULL; \ 803 | if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ 804 | if (!dst) { \ 805 | DECLTYPE_ASSIGN(dst,_elt); \ 806 | HASH_MAKE_TABLE(hh_dst,dst); \ 807 | } else { \ 808 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 809 | } \ 810 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 811 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 812 | (dst)->hh_dst.tbl->num_items++; \ 813 | _last_elt = _elt; \ 814 | _last_elt_hh = _dst_hh; \ 815 | } \ 816 | } \ 817 | } \ 818 | } \ 819 | HASH_FSCK(hh_dst,dst); \ 820 | } while (0) 821 | 822 | #define HASH_CLEAR(hh,head) \ 823 | do { \ 824 | if (head) { \ 825 | uthash_free((head)->hh.tbl->buckets, \ 826 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 827 | HASH_BLOOM_FREE((head)->hh.tbl); \ 828 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 829 | (head)=NULL; \ 830 | } \ 831 | } while(0) 832 | 833 | #ifdef NO_DECLTYPE 834 | #define HASH_ITER(hh,head,el,tmp) \ 835 | for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ 836 | el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 837 | #else 838 | #define HASH_ITER(hh,head,el,tmp) \ 839 | for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ 840 | el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) 841 | #endif 842 | 843 | /* obtain a count of items in the hash */ 844 | #define HASH_COUNT(head) HASH_CNT(hh,head) 845 | #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) 846 | 847 | typedef struct UT_hash_bucket { 848 | struct UT_hash_handle *hh_head; 849 | unsigned count; 850 | 851 | /* expand_mult is normally set to 0. In this situation, the max chain length 852 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 853 | * the bucket's chain exceeds this length, bucket expansion is triggered). 854 | * However, setting expand_mult to a non-zero value delays bucket expansion 855 | * (that would be triggered by additions to this particular bucket) 856 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 857 | * (The multiplier is simply expand_mult+1). The whole idea of this 858 | * multiplier is to reduce bucket expansions, since they are expensive, in 859 | * situations where we know that a particular bucket tends to be overused. 860 | * It is better to let its chain length grow to a longer yet-still-bounded 861 | * value, than to do an O(n) bucket expansion too often. 862 | */ 863 | unsigned expand_mult; 864 | 865 | } UT_hash_bucket; 866 | 867 | /* random signature used only to find hash tables in external analysis */ 868 | #define HASH_SIGNATURE 0xa0111fe1 869 | #define HASH_BLOOM_SIGNATURE 0xb12220f2 870 | 871 | typedef struct UT_hash_table { 872 | UT_hash_bucket *buckets; 873 | unsigned num_buckets, log2_num_buckets; 874 | unsigned num_items; 875 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 876 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 877 | 878 | /* in an ideal situation (all buckets used equally), no bucket would have 879 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 880 | unsigned ideal_chain_maxlen; 881 | 882 | /* nonideal_items is the number of items in the hash whose chain position 883 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 884 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 885 | unsigned nonideal_items; 886 | 887 | /* ineffective expands occur when a bucket doubling was performed, but 888 | * afterward, more than half the items in the hash had nonideal chain 889 | * positions. If this happens on two consecutive expansions we inhibit any 890 | * further expansion, as it's not helping; this happens when the hash 891 | * function isn't a good fit for the key domain. When expansion is inhibited 892 | * the hash will still work, albeit no longer in constant time. */ 893 | unsigned ineff_expands, noexpand; 894 | 895 | uint32_t signature; /* used only to find hash tables in external analysis */ 896 | #ifdef HASH_BLOOM 897 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 898 | uint8_t *bloom_bv; 899 | char bloom_nbits; 900 | #endif 901 | 902 | } UT_hash_table; 903 | 904 | typedef struct UT_hash_handle { 905 | struct UT_hash_table *tbl; 906 | void *prev; /* prev element in app order */ 907 | void *next; /* next element in app order */ 908 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 909 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 910 | void *key; /* ptr to enclosing struct's key */ 911 | unsigned keylen; /* enclosing struct's key len */ 912 | unsigned hashv; /* result of hash-fcn(key) */ 913 | } UT_hash_handle; 914 | 915 | #endif /* UTHASH_H */ 916 | -------------------------------------------------------------------------------- /tests/ast_tests.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "minunit.h" 5 | 6 | 7 | //============================================================================== 8 | // 9 | // Test Cases 10 | // 11 | //============================================================================== 12 | 13 | //-------------------------------------- 14 | // Number AST 15 | //-------------------------------------- 16 | 17 | int test_kal_ast_number_create() { 18 | kal_ast_node *node = kal_ast_number_create(10); 19 | mu_assert(node->type == KAL_AST_TYPE_NUMBER, ""); 20 | mu_assert(node->number.value == 10, ""); 21 | kal_ast_node_free(node); 22 | return 0; 23 | } 24 | 25 | 26 | //-------------------------------------- 27 | // Variable AST 28 | //-------------------------------------- 29 | 30 | int test_kal_ast_variable_create() { 31 | kal_ast_node *node = kal_ast_variable_create("foo"); 32 | mu_assert(node->type == KAL_AST_TYPE_VARIABLE, ""); 33 | mu_assert(strcmp(node->variable.name, "foo") == 0, ""); 34 | kal_ast_node_free(node); 35 | return 0; 36 | } 37 | 38 | 39 | //-------------------------------------- 40 | // Binary Expression AST 41 | //-------------------------------------- 42 | 43 | int test_kal_ast_binary_expr_create() { 44 | kal_ast_node *number = kal_ast_number_create(20); 45 | kal_ast_node *variable = kal_ast_variable_create("bar"); 46 | kal_ast_node *node = kal_ast_binary_expr_create(KAL_BINOP_PLUS, number, variable); 47 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 48 | mu_assert(node->binary_expr.operator == KAL_BINOP_PLUS, ""); 49 | mu_assert(node->binary_expr.lhs == number, ""); 50 | mu_assert(node->binary_expr.rhs == variable, ""); 51 | kal_ast_node_free(node); 52 | return 0; 53 | } 54 | 55 | 56 | //-------------------------------------- 57 | // Funtion Call AST 58 | //-------------------------------------- 59 | 60 | int test_kal_ast_call_create() { 61 | kal_ast_node *args[2]; 62 | args[0] = kal_ast_number_create(100); 63 | args[1] = kal_ast_number_create(200); 64 | kal_ast_node *node = kal_ast_call_create("baz", args, 2); 65 | mu_assert(node->type == KAL_AST_TYPE_CALL, ""); 66 | mu_assert(strcmp(node->call.name, "baz") == 0, ""); 67 | mu_assert(node->call.args[0] == args[0], ""); 68 | mu_assert(node->call.args[1] == args[1], ""); 69 | mu_assert(node->call.arg_count == 2, ""); 70 | kal_ast_node_free(node); 71 | return 0; 72 | } 73 | 74 | 75 | //-------------------------------------- 76 | // Funtion Prototype AST 77 | //-------------------------------------- 78 | 79 | int test_kal_ast_prototype_create() { 80 | char *args[2]; 81 | args[0] = "foo"; 82 | args[1] = "bar"; 83 | kal_ast_node *node = kal_ast_prototype_create("baz", args, 2); 84 | mu_assert(node->type == KAL_AST_TYPE_PROTOTYPE, ""); 85 | mu_assert(strcmp(node->prototype.name, "baz") == 0, ""); 86 | mu_assert(strcmp(node->prototype.args[0], "foo") == 0, ""); 87 | mu_assert(strcmp(node->prototype.args[1], "bar") == 0, ""); 88 | mu_assert(node->prototype.arg_count == 2, ""); 89 | kal_ast_node_free(node); 90 | return 0; 91 | } 92 | 93 | 94 | //-------------------------------------- 95 | // Funtion Prototype AST 96 | //-------------------------------------- 97 | 98 | int test_kal_ast_function_create() { 99 | kal_ast_node *prototype = kal_ast_prototype_create("baz", NULL, 0); 100 | kal_ast_node *body = kal_ast_variable_create("foo"); 101 | kal_ast_node *node = kal_ast_function_create(prototype, body); 102 | mu_assert(node->type == KAL_AST_TYPE_FUNCTION, ""); 103 | mu_assert(node->function.prototype == prototype, ""); 104 | mu_assert(node->function.body == body, ""); 105 | kal_ast_node_free(node); 106 | return 0; 107 | } 108 | 109 | 110 | //-------------------------------------- 111 | // If Expression AST 112 | //-------------------------------------- 113 | 114 | int test_kal_ast_if_expr_create() { 115 | kal_ast_node *condition = kal_ast_number_create(1); 116 | kal_ast_node *true_expr = kal_ast_number_create(2); 117 | kal_ast_node *false_expr = kal_ast_number_create(3); 118 | kal_ast_node *node = kal_ast_if_expr_create(condition, true_expr, false_expr); 119 | mu_assert(node->type == KAL_AST_TYPE_IF_EXPR, ""); 120 | mu_assert(node->if_expr.condition == condition, ""); 121 | mu_assert(node->if_expr.true_expr == true_expr, ""); 122 | mu_assert(node->if_expr.false_expr == false_expr, ""); 123 | kal_ast_node_free(node); 124 | return 0; 125 | } 126 | 127 | 128 | //============================================================================== 129 | // 130 | // Setup 131 | // 132 | //============================================================================== 133 | 134 | int all_tests() { 135 | mu_run_test(test_kal_ast_number_create); 136 | mu_run_test(test_kal_ast_variable_create); 137 | mu_run_test(test_kal_ast_binary_expr_create); 138 | mu_run_test(test_kal_ast_call_create); 139 | mu_run_test(test_kal_ast_prototype_create); 140 | mu_run_test(test_kal_ast_function_create); 141 | mu_run_test(test_kal_ast_if_expr_create); 142 | return 0; 143 | } 144 | 145 | RUN_TESTS() -------------------------------------------------------------------------------- /tests/codegen_tests.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "minunit.h" 7 | 8 | 9 | //============================================================================== 10 | // 11 | // Test Cases 12 | // 13 | //============================================================================== 14 | 15 | //-------------------------------------- 16 | // Number 17 | //-------------------------------------- 18 | 19 | int test_kal_codegen_number() { 20 | kal_ast_node *node = kal_ast_number_create(10); 21 | LLVMValueRef value = kal_codegen(node, NULL, NULL); 22 | LLVMTypeRef type = LLVMTypeOf(value); 23 | mu_assert(LLVMGetTypeKind(type) == LLVMDoubleTypeKind, ""); 24 | mu_assert(LLVMIsConstant(value), ""); 25 | kal_ast_node_free(node); 26 | return 0; 27 | } 28 | 29 | 30 | //-------------------------------------- 31 | // Binary Expression 32 | //-------------------------------------- 33 | 34 | int test_kal_codegen_binary_expr() { 35 | LLVMModuleRef module = LLVMModuleCreateWithName("kal"); 36 | LLVMBuilderRef builder = LLVMCreateBuilder(); 37 | kal_ast_node *lhs = kal_ast_number_create(20); 38 | kal_ast_node *rhs = kal_ast_number_create(30); 39 | kal_ast_node *node = kal_ast_binary_expr_create(KAL_BINOP_PLUS, lhs, rhs); 40 | LLVMValueRef value = kal_codegen(node, module, builder); 41 | mu_assert(LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMDoubleTypeKind, ""); 42 | mu_assert(LLVMIsConstant(value), ""); 43 | LLVMDisposeBuilder(builder); 44 | LLVMDisposeModule(module); 45 | kal_ast_node_free(node); 46 | return 0; 47 | } 48 | 49 | 50 | //-------------------------------------- 51 | // Prototype 52 | //-------------------------------------- 53 | 54 | int test_kal_codegen_prototype() { 55 | kal_named_value *val; 56 | unsigned int arg_count = 3; 57 | char **args = malloc(sizeof(char*) * arg_count); 58 | args[0] = "foo"; 59 | args[1] = "bar"; 60 | args[2] = "baz"; 61 | 62 | LLVMModuleRef module = LLVMModuleCreateWithName("kal"); 63 | LLVMBuilderRef builder = LLVMCreateBuilder(); 64 | kal_ast_node *node = kal_ast_prototype_create("my_func", args, 3); 65 | 66 | kal_codegen_reset(); 67 | LLVMValueRef value = kal_codegen(node, module, builder); 68 | 69 | mu_assert(value != NULL, ""); 70 | mu_assert(LLVMGetNamedFunction(module, "my_func") == value, ""); 71 | mu_assert(LLVMCountParams(value) == 3, ""); 72 | 73 | val = kal_codegen_named_value("foo"); 74 | mu_assert(val->value == LLVMGetParam(value, 0), ""); 75 | mu_assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(value, 0))) == LLVMDoubleTypeKind, ""); 76 | 77 | val = kal_codegen_named_value("bar"); 78 | mu_assert(val->value == LLVMGetParam(value, 1), ""); 79 | mu_assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(value, 1))) == LLVMDoubleTypeKind, ""); 80 | 81 | val = kal_codegen_named_value("baz"); 82 | mu_assert(val->value == LLVMGetParam(value, 2), ""); 83 | mu_assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(value, 2))) == LLVMDoubleTypeKind, ""); 84 | 85 | LLVMDisposeBuilder(builder); 86 | LLVMDisposeModule(module); 87 | kal_ast_node_free(node); 88 | return 0; 89 | } 90 | 91 | 92 | //-------------------------------------- 93 | // Function 94 | //-------------------------------------- 95 | 96 | int test_kal_codegen_function() { 97 | kal_named_value *val; 98 | unsigned int arg_count = 1; 99 | char **args = malloc(sizeof(char*) * arg_count); 100 | args[0] = "foo"; 101 | 102 | LLVMModuleRef module = LLVMModuleCreateWithName("kal"); 103 | LLVMBuilderRef builder = LLVMCreateBuilder(); 104 | kal_ast_node *prototype = kal_ast_prototype_create("my_func", args, arg_count); 105 | kal_ast_node *lhs = kal_ast_variable_create("foo"); 106 | kal_ast_node *rhs = kal_ast_number_create(20); 107 | kal_ast_node *body = kal_ast_binary_expr_create(KAL_BINOP_PLUS, lhs, rhs); 108 | kal_ast_node *node = kal_ast_function_create(prototype, body); 109 | 110 | kal_codegen_reset(); 111 | LLVMValueRef value = kal_codegen(node, module, builder); 112 | 113 | mu_assert(value != NULL, ""); 114 | mu_assert(LLVMGetNamedFunction(module, "my_func") == value, ""); 115 | mu_assert(LLVMCountParams(value) == 1, ""); 116 | 117 | val = kal_codegen_named_value("foo"); 118 | mu_assert(val->value == LLVMGetParam(value, 0), ""); 119 | mu_assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(value, 0))) == LLVMDoubleTypeKind, ""); 120 | 121 | LLVMDisposeBuilder(builder); 122 | LLVMDisposeModule(module); 123 | kal_ast_node_free(node); 124 | return 0; 125 | } 126 | 127 | 128 | 129 | //============================================================================== 130 | // 131 | // Setup 132 | // 133 | //============================================================================== 134 | 135 | int all_tests() { 136 | mu_run_test(test_kal_codegen_number); 137 | mu_run_test(test_kal_codegen_binary_expr); 138 | mu_run_test(test_kal_codegen_prototype); 139 | mu_run_test(test_kal_codegen_function); 140 | return 0; 141 | } 142 | 143 | RUN_TESTS() -------------------------------------------------------------------------------- /tests/minunit.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Minunit 4 | // 5 | //============================================================================== 6 | 7 | #define mu_fail(MSG, ...) do {\ 8 | fprintf(stderr, "%s:%d: " MSG "\n", __FILE__, __LINE__, ##__VA_ARGS__);\ 9 | return 1;\ 10 | } while(0) 11 | 12 | #define mu_assert(TEST, MSG, ...) do {\ 13 | if (!(TEST)) {\ 14 | fprintf(stderr, "%s:%d: %s " MSG "\n", __FILE__, __LINE__, #TEST, ##__VA_ARGS__);\ 15 | return 1;\ 16 | }\ 17 | } while (0) 18 | 19 | #define mu_run_test(TEST) do {\ 20 | fprintf(stderr, "%s\n", #TEST);\ 21 | int rc = TEST();\ 22 | if (rc) {\ 23 | fprintf(stderr, "\n Test Failure: %s()\n", #TEST);\ 24 | return rc;\ 25 | }\ 26 | } while (0) 27 | 28 | #define RUN_TESTS() int main() {\ 29 | fprintf(stderr, "== %s ==\n", __FILE__);\ 30 | int rc = all_tests();\ 31 | fprintf(stderr, "\n");\ 32 | return rc;\ 33 | } 34 | 35 | -------------------------------------------------------------------------------- /tests/parser_tests.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "minunit.h" 6 | 7 | 8 | //============================================================================== 9 | // 10 | // Test Cases 11 | // 12 | //============================================================================== 13 | 14 | //-------------------------------------- 15 | // Variable & Number 16 | //-------------------------------------- 17 | 18 | int test_parse_number() { 19 | kal_ast_node *node = NULL; 20 | kal_parse("200", &node); 21 | mu_assert(node->type == KAL_AST_TYPE_NUMBER, ""); 22 | mu_assert(node->number.value == 200, ""); 23 | kal_ast_node_free(node); 24 | return 0; 25 | } 26 | 27 | int test_parse_variable() { 28 | kal_ast_node *node = NULL; 29 | kal_parse("my_var2", &node); 30 | mu_assert(node->type == KAL_AST_TYPE_VARIABLE, ""); 31 | mu_assert(strcmp(node->variable.name, "my_var2") == 0, ""); 32 | kal_ast_node_free(node); 33 | return 0; 34 | } 35 | 36 | 37 | //-------------------------------------- 38 | // Expressions 39 | //-------------------------------------- 40 | 41 | int test_parse_addition() { 42 | kal_ast_node *node = NULL; 43 | int rc = kal_parse("2+3", &node); 44 | mu_assert(rc == 0, ""); 45 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 46 | mu_assert(node->binary_expr.operator == KAL_BINOP_PLUS, ""); 47 | mu_assert(node->binary_expr.lhs->number.value == 2, ""); 48 | mu_assert(node->binary_expr.rhs->number.value == 3, ""); 49 | kal_ast_node_free(node); 50 | return 0; 51 | } 52 | 53 | int test_parse_subtraction() { 54 | kal_ast_node *node = NULL; 55 | int rc = kal_parse("200-30", &node); 56 | mu_assert(rc == 0, ""); 57 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 58 | mu_assert(node->binary_expr.operator == KAL_BINOP_MINUS, ""); 59 | mu_assert(node->binary_expr.lhs->number.value == 200, ""); 60 | mu_assert(node->binary_expr.rhs->number.value == 30, ""); 61 | kal_ast_node_free(node); 62 | return 0; 63 | } 64 | 65 | int test_parse_multiplication() { 66 | kal_ast_node *node = NULL; 67 | int rc = kal_parse("4*8", &node); 68 | mu_assert(rc == 0, ""); 69 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 70 | mu_assert(node->binary_expr.operator == KAL_BINOP_MUL, ""); 71 | mu_assert(node->binary_expr.lhs->number.value == 4, ""); 72 | mu_assert(node->binary_expr.rhs->number.value == 8, ""); 73 | kal_ast_node_free(node); 74 | return 0; 75 | } 76 | 77 | int test_parse_division() { 78 | kal_ast_node *node = NULL; 79 | int rc = kal_parse("10/2", &node); 80 | mu_assert(rc == 0, ""); 81 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 82 | mu_assert(node->binary_expr.operator == KAL_BINOP_DIV, ""); 83 | mu_assert(node->binary_expr.lhs->number.value == 10, ""); 84 | mu_assert(node->binary_expr.rhs->number.value == 2, ""); 85 | kal_ast_node_free(node); 86 | return 0; 87 | } 88 | 89 | int test_parse_parens() { 90 | kal_ast_node *node = NULL; 91 | int rc = kal_parse("(12+200)", &node); 92 | mu_assert(rc == 0, ""); 93 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 94 | mu_assert(node->binary_expr.operator == KAL_BINOP_PLUS, ""); 95 | mu_assert(node->binary_expr.lhs->number.value == 12, ""); 96 | mu_assert(node->binary_expr.rhs->number.value == 200, ""); 97 | kal_ast_node_free(node); 98 | return 0; 99 | } 100 | 101 | int test_parse_complex() { 102 | kal_ast_node *node = NULL; 103 | int rc = kal_parse("4*2+3", &node); 104 | mu_assert(rc == 0, ""); 105 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 106 | mu_assert(node->binary_expr.operator == KAL_BINOP_PLUS, ""); 107 | 108 | // LHS 109 | mu_assert(node->binary_expr.lhs->type == KAL_AST_TYPE_BINARY_EXPR, ""); 110 | mu_assert(node->binary_expr.lhs->binary_expr.operator == KAL_BINOP_MUL, ""); 111 | mu_assert(node->binary_expr.lhs->binary_expr.lhs->number.value == 4, ""); 112 | mu_assert(node->binary_expr.lhs->binary_expr.rhs->number.value == 2, ""); 113 | 114 | // RHS 115 | mu_assert(node->binary_expr.rhs->number.value == 3, ""); 116 | kal_ast_node_free(node); 117 | return 0; 118 | } 119 | 120 | int test_parse_complex_with_parens() { 121 | kal_ast_node *node = NULL; 122 | int rc = kal_parse("4*(2+3)", &node); 123 | mu_assert(rc == 0, ""); 124 | mu_assert(node->type == KAL_AST_TYPE_BINARY_EXPR, ""); 125 | mu_assert(node->binary_expr.operator == KAL_BINOP_MUL, ""); 126 | 127 | // LHS 128 | mu_assert(node->binary_expr.lhs->number.value == 4, ""); 129 | 130 | // RHS 131 | mu_assert(node->binary_expr.rhs->type == KAL_AST_TYPE_BINARY_EXPR, ""); 132 | mu_assert(node->binary_expr.rhs->binary_expr.operator == KAL_BINOP_PLUS, ""); 133 | mu_assert(node->binary_expr.rhs->binary_expr.lhs->number.value == 2, ""); 134 | mu_assert(node->binary_expr.rhs->binary_expr.rhs->number.value == 3, ""); 135 | 136 | kal_ast_node_free(node); 137 | return 0; 138 | } 139 | 140 | 141 | //-------------------------------------- 142 | // Function Call 143 | //-------------------------------------- 144 | 145 | int test_parse_function_call() { 146 | kal_ast_node *node = NULL; 147 | int rc = kal_parse("my_func(12, foo+30)", &node); 148 | mu_assert(rc == 0, ""); 149 | mu_assert(node->type == KAL_AST_TYPE_CALL, ""); 150 | mu_assert(strcmp(node->call.name, "my_func") == 0, ""); 151 | mu_assert(node->call.arg_count == 2, "%d", node->call.arg_count); 152 | 153 | // Arg 1 154 | mu_assert(node->call.args[0]->type == KAL_AST_TYPE_NUMBER, ""); 155 | mu_assert(node->call.args[0]->number.value == 12, ""); 156 | 157 | // Arg 2 158 | mu_assert(node->call.args[1]->type == KAL_AST_TYPE_BINARY_EXPR, ""); 159 | mu_assert(node->call.args[1]->binary_expr.operator == KAL_BINOP_PLUS, ""); 160 | mu_assert(strcmp(node->call.args[1]->binary_expr.lhs->variable.name, "foo") == 0, ""); 161 | mu_assert(node->call.args[1]->binary_expr.rhs->number.value == 30, ""); 162 | 163 | kal_ast_node_free(node); 164 | return 0; 165 | } 166 | 167 | 168 | //-------------------------------------- 169 | // Extern Function 170 | //-------------------------------------- 171 | 172 | int test_parse_extern() { 173 | kal_ast_node *node = NULL; 174 | int rc = kal_parse("extern my_func(foo, bar)", &node); 175 | mu_assert(rc == 0, ""); 176 | mu_assert(node->type == KAL_AST_TYPE_PROTOTYPE, ""); 177 | mu_assert(strcmp(node->prototype.name, "my_func") == 0, ""); 178 | mu_assert(node->prototype.arg_count == 2, "%d", node->prototype.arg_count); 179 | mu_assert(strcmp(node->prototype.args[0], "foo") == 0, ""); 180 | mu_assert(strcmp(node->prototype.args[1], "bar") == 0, ""); 181 | kal_ast_node_free(node); 182 | return 0; 183 | } 184 | 185 | 186 | //-------------------------------------- 187 | // Function Definition 188 | //-------------------------------------- 189 | 190 | int test_parse_function() { 191 | kal_ast_node *node = NULL; 192 | int rc = kal_parse("def my_func(foo, bar) foo + bar", &node); 193 | mu_assert(rc == 0, ""); 194 | mu_assert(node->type == KAL_AST_TYPE_FUNCTION, ""); 195 | 196 | // Prototype 197 | mu_assert(strcmp(node->function.prototype->prototype.name, "my_func") == 0, ""); 198 | mu_assert(node->function.prototype->prototype.arg_count == 2, ""); 199 | mu_assert(strcmp(node->function.prototype->prototype.args[0], "foo") == 0, ""); 200 | mu_assert(strcmp(node->function.prototype->prototype.args[1], "bar") == 0, ""); 201 | 202 | // Body 203 | mu_assert(node->function.body->type == KAL_AST_TYPE_BINARY_EXPR, ""); 204 | mu_assert(node->function.body->binary_expr.operator == KAL_BINOP_PLUS, ""); 205 | mu_assert(strcmp(node->function.body->binary_expr.lhs->variable.name, "foo") == 0, ""); 206 | mu_assert(strcmp(node->function.body->binary_expr.rhs->variable.name, "bar") == 0, ""); 207 | 208 | kal_ast_node_free(node); 209 | return 0; 210 | } 211 | 212 | 213 | //-------------------------------------- 214 | // If Expression 215 | //-------------------------------------- 216 | 217 | int test_parse_if_expr() { 218 | kal_ast_node *node = NULL; 219 | int rc = kal_parse("if 1 then 2 else 3", &node); 220 | mu_assert(rc == 0, ""); 221 | mu_assert(node->type == KAL_AST_TYPE_IF_EXPR, ""); 222 | mu_assert(node->if_expr.condition->number.value == 1, ""); 223 | mu_assert(node->if_expr.true_expr->number.value == 2, ""); 224 | mu_assert(node->if_expr.false_expr->number.value == 3, ""); 225 | kal_ast_node_free(node); 226 | return 0; 227 | } 228 | 229 | 230 | 231 | //============================================================================== 232 | // 233 | // Setup 234 | // 235 | //============================================================================== 236 | 237 | int all_tests() { 238 | mu_run_test(test_parse_number); 239 | mu_run_test(test_parse_variable); 240 | mu_run_test(test_parse_addition); 241 | mu_run_test(test_parse_subtraction); 242 | mu_run_test(test_parse_multiplication); 243 | mu_run_test(test_parse_division); 244 | mu_run_test(test_parse_parens); 245 | mu_run_test(test_parse_complex); 246 | mu_run_test(test_parse_complex_with_parens); 247 | mu_run_test(test_parse_function_call); 248 | mu_run_test(test_parse_extern); 249 | mu_run_test(test_parse_function); 250 | mu_run_test(test_parse_if_expr); 251 | return 0; 252 | } 253 | 254 | RUN_TESTS() -------------------------------------------------------------------------------- /tests/runtests.sh: -------------------------------------------------------------------------------- 1 | echo "" 2 | echo "Unit Tests" 3 | 4 | # Loop over compiled tests and run them. 5 | for test_file in build/tests/*_tests 6 | do 7 | # Only execute if result is a file. 8 | if test -f $test_file 9 | then 10 | # Log execution to file. 11 | if ./$test_file 2>&1 > /tmp/sky-test.log 12 | then 13 | rm -f /tmp/sky-test.log 14 | else 15 | # If error occurred then print off log. 16 | cat /tmp/sky-test.log 17 | exit 1 18 | fi 19 | fi 20 | done 21 | 22 | echo "" 23 | --------------------------------------------------------------------------------