├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── env.c ├── env.h ├── infer.c ├── infer.h ├── main.c ├── mem.h ├── node.c ├── node.h ├── type.c └── type.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Szymon Urbaś 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = clang 2 | CFLAGS := $(CFLAGS) -W 3 | 4 | .PHONY: clean distclean 5 | 6 | OBJS = main.o env.o infer.o node.o type.o 7 | EXEC = main.out 8 | 9 | DEBUG ?= 0 10 | CFLAGS += -DDEBUG=$(DEBUG) 11 | 12 | # not main to not modify .gitignore (and have it a bit more generic) 13 | ${EXEC}: $(OBJS) 14 | $(CC) $(CFLAGS) -o $@ $(OBJS) 15 | 16 | main.o: main.c env.h infer.h node.h type.h 17 | env.o: env.c env.h infer.h mem.h type.h 18 | infer.o: infer.c infer.h env.h node.h type.h 19 | node.o: node.c node.h mem.h 20 | type.o: type.c type.h mem.h 21 | 22 | clean: 23 | rm -rf *.o 24 | 25 | distclean: clean 26 | rm -f *.out 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # type-inference 2 | 3 | A Hindley-Milner(-Damas) type inference implementation in C. 4 | 5 | Current output: 6 | 7 | ((+ 2) 3) : int 8 | ((* 2) ((+ 2) 2)) : int 9 | ((> 2) 3) : bool 10 | ((* 5) ((< 7) 11)) : type mismatch: bool != int 11 | (let bar = (fun x -> (fun y -> ((+ (strlen x)) y))) in (bar "fubar")) : (int -> int) 12 | ((pair 11) "13") : (int * string) 13 | (((cond true) "yes") "no") : string 14 | (let sum = (fun y -> (fun x -> ((+ x) y))) in ((sum 2) 3)) : int 15 | (letrec factorial = (fun n -> (((cond (zero n)) 1) ((* n) (factorial (pred n))))) in (factorial 5)) : int 16 | (fun x -> ((pair (x 3)) (x true))) : type mismatch: bool != int 17 | ((pair (f 4)) (f true)) : unknown symbol 'f' 18 | (let f = (fun x -> x) in ((pair (f 4)) (f true))) : (int * bool) 19 | (fun f -> (f f)) : recursive unification 20 | (let g = (fun f -> 5) in (g g)) : int 21 | (fun g -> (let f = (fun x -> g) in ((pair (f 3)) (f true)))) : (a -> (a * a)) 22 | (fun f -> (fun g -> (fun arg -> (g (f arg))))) : ((b -> c) -> ((c -> d) -> (b -> d))) 23 | 24 | -------------------------------------------------------------------------------- /env.c: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * env.c 4 | * 5 | * Created at: Mon Aug 24 13:36:13 2015 13:36:13 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "env.h" 18 | #include "infer.h" 19 | #include "mem.h" 20 | #include "type.h" 21 | 22 | env_t *new_env(void) 23 | { 24 | env_t *env = allocz(env_t); 25 | 26 | env->current_symbol = env->symbols; 27 | 28 | return env; 29 | } 30 | 31 | env_t *copy_env(env_t *env) 32 | { 33 | env_t *new_env = alloc(env_t); 34 | 35 | memcpy(new_env, env, sizeof(env_t)); 36 | new_env->current_symbol = new_env->symbols + env->symbols_num; 37 | 38 | return new_env; 39 | } 40 | 41 | void new_symbol(env_t *env, const char *name, type_t *const type) 42 | { 43 | /* overflow? nah. */ 44 | env->current_symbol->name = name; 45 | env->current_symbol->type = type; 46 | env->current_symbol++; 47 | env->symbols_num++; 48 | } 49 | 50 | type_t *symbol_lookup(env_t *env, const char *name, ng_t *non_generic) 51 | { 52 | symbol_t *sym; 53 | 54 | for (sym = env->symbols; sym != env->current_symbol; sym++) 55 | if (!strcmp(sym->name, name)){ 56 | #if DEBUG 57 | printf("fetching fresh for %s (%d)\n", name, sym->type->idd); 58 | #endif 59 | return fresh(sym->type, non_generic); 60 | } 61 | 62 | return NULL; 63 | } 64 | 65 | ng_t *new_non_generic(void) 66 | { 67 | ng_t *ng = allocz(ng_t); 68 | 69 | ng->current_slot = ng->slots; 70 | 71 | return ng; 72 | } 73 | 74 | ng_t *copy_non_generic(ng_t *ng) 75 | { 76 | /* nng = new non generics */ 77 | ng_t *nng = alloc(ng_t); 78 | 79 | memcpy(nng, ng, sizeof(ng_t)); 80 | nng->current_slot = nng->slots + ng->slots_num; 81 | 82 | return nng; 83 | } 84 | 85 | void add_to_non_generic(ng_t *ng, type_t *type) 86 | { 87 | *ng->current_slot = type; 88 | ng->current_slot++; 89 | ng->slots_num++; 90 | } 91 | 92 | /* 93 | * vi: ft=c:ts=2:sw=2:expandtab 94 | */ 95 | 96 | -------------------------------------------------------------------------------- /env.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * env.h 4 | * 5 | * Created at: Mon Aug 24 13:35:39 2015 13:35:39 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #ifndef ENV_H 14 | #define ENV_H 15 | 16 | #include "type.h" 17 | 18 | typedef struct { 19 | const char *name; 20 | type_t *type; 21 | } symbol_t; 22 | 23 | typedef struct { 24 | symbol_t symbols[256]; 25 | symbol_t *current_symbol; 26 | size_t symbols_num; 27 | } env_t; 28 | 29 | /* bleh.. */ 30 | typedef struct { 31 | type_t *slots[256]; 32 | type_t **current_slot; 33 | size_t slots_num; 34 | } ng_t; 35 | 36 | env_t *new_env(void); 37 | env_t *copy_env(env_t *env); 38 | void new_symbol(env_t *env, const char *name, type_t *const type); 39 | type_t *symbol_lookup(env_t *env, const char *name, ng_t *non_generic); 40 | 41 | ng_t *new_non_generic(void); 42 | ng_t *copy_non_generic(ng_t *); 43 | void add_to_non_generic(ng_t *, type_t *); 44 | 45 | #endif /* ENV_H */ 46 | 47 | /* 48 | * vi: ft=c:ts=2:sw=2:expandtab 49 | */ 50 | 51 | -------------------------------------------------------------------------------- /infer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * infer.c 4 | * 5 | * Created at: Mon Aug 24 14:48:17 2015 14:48:17 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "env.h" 19 | #include "infer.h" 20 | #include "node.h" 21 | #include "type.h" 22 | 23 | static bool debug = false; 24 | static jmp_buf infer_jmp_buf; 25 | 26 | bool is_generic(type_t *type, ng_t *non_generic) 27 | { 28 | unsigned i; 29 | 30 | #if DEBUG 31 | printf("non-generic(%p): {", (void *)non_generic); 32 | #endif 33 | 34 | for (i = 0; i < non_generic->slots_num; i++){ 35 | #if DEBUG 36 | printf("%d ", non_generic->slots[i]->idd); 37 | #endif 38 | 39 | if (occurs_in_type(type, non_generic->slots[i])) 40 | return false; 41 | } 42 | 43 | #if DEBUG 44 | printf("}\n"); 45 | #endif 46 | 47 | return true; 48 | } 49 | 50 | type_t *freshrec(type_t *type, ng_t *non_generic, mapping_t *mappings, unsigned *current_mapping, unsigned *mappings_num) 51 | { 52 | type_t *pruned = prune(type); 53 | 54 | if (is_type_variable(pruned)){ 55 | if (is_generic(pruned, non_generic)){ 56 | #if DEBUG 57 | printf("type %d (instance %p) is a generic variable\n", pruned->idd, (void*)pruned->instance); 58 | #endif 59 | 60 | unsigned i; 61 | mapping_t *ptr; 62 | bool found = false; 63 | 64 | #if DEBUG 65 | printf("%d: mappings(%d): {", pruned->idd, *mappings_num); 66 | #endif 67 | 68 | for (i = 0; i < *mappings_num; i++){ 69 | ptr = &mappings[i]; 70 | 71 | #if DEBUG 72 | printf("%p(%d): %p(%d)", (void*)ptr->from, ptr->from->idd, (void*)ptr->to, ptr->to->idd); 73 | 74 | if (i != *mappings_num - 1) 75 | printf(", "); 76 | #endif 77 | 78 | if (types_are_equal(ptr->from, pruned)){ 79 | found = true; 80 | break; 81 | } 82 | } 83 | 84 | #if DEBUG 85 | printf("}\n"); 86 | #endif 87 | 88 | if (!found){ 89 | #if DEBUG 90 | printf("type "); 91 | print_type(pruned); 92 | printf(" NOT in mappings\n"); 93 | #endif 94 | 95 | ptr = &mappings[*current_mapping]; 96 | ptr->from = pruned; 97 | ptr->to = type_variable(); 98 | 99 | (*current_mapping)++; 100 | (*mappings_num)++; 101 | } else { 102 | #if DEBUG 103 | printf("type "); 104 | print_type(pruned); 105 | printf(" IS in mappings\n"); 106 | #endif 107 | } 108 | 109 | return ptr->to; 110 | } else { 111 | /* not generic */ 112 | #if DEBUG 113 | printf("type %d (instance %p) is a non-generic variable\n", pruned->idd, (void*)pruned->instance); 114 | #endif 115 | 116 | return pruned; 117 | } 118 | } else if (is_type_operator(pruned)){ 119 | #if DEBUG 120 | printf("type %d is an operator\n", pruned->idd); 121 | #endif 122 | 123 | switch (pruned->types_num){ 124 | case 0: return type_operator_0(pruned->ttype); 125 | case 1: return type_operator_1(pruned->ttype, 126 | freshrec(pruned->types[0], non_generic, mappings, current_mapping, mappings_num)); 127 | case 2: return type_operator_2(pruned->ttype, 128 | freshrec(pruned->types[0], non_generic, mappings, current_mapping, mappings_num), 129 | freshrec(pruned->types[1], non_generic, mappings, current_mapping, mappings_num)); 130 | 131 | default: 132 | fprintf(stderr, "oy-vey, what types num is this?.\n"); 133 | exit(1); 134 | } 135 | } else { 136 | fprintf(stderr, "oy-vey wat is dat.\n"); 137 | exit(1); 138 | } 139 | } 140 | 141 | type_t *fresh(type_t *type, ng_t *non_generic) 142 | { 143 | mapping_t mappings[256] = { { NULL, NULL } }; 144 | unsigned current_mapping = 0; 145 | unsigned mappings_num = 0; 146 | 147 | return freshrec(type, non_generic, mappings, ¤t_mapping, &mappings_num); 148 | } 149 | 150 | bool types_are_equal(type_t *type1, type_t *type2) 151 | { 152 | /* FIXME? */ 153 | return type1 == type2; 154 | 155 | assert(type1 != NULL && type2 != NULL); 156 | 157 | if (type1->ttype != type2->ttype) 158 | return false; 159 | 160 | if (is_type_operator(type1)){ 161 | unsigned i; 162 | 163 | if (type1->types_num != type2->types_num) 164 | return false; 165 | 166 | for (i = 0; i < type1->types_num; i++) 167 | if (!types_are_equal(type1->types[i], type2->types[i])) 168 | return false; 169 | } 170 | 171 | return true; 172 | } 173 | 174 | bool occurs_in_type(type_t *type1, type_t *type2) 175 | { 176 | assert(type1 != NULL && type2 != NULL); 177 | 178 | type_t *pruned_type2 = prune(type2); 179 | 180 | if (is_type_variable(pruned_type2)) 181 | return types_are_equal(type1, pruned_type2); 182 | else if (is_type_operator(pruned_type2)) 183 | return occurs_in(type1, pruned_type2); 184 | else { 185 | fprintf(stderr, "neither a type operator nor a type variable\n"); 186 | exit(1); 187 | } 188 | } 189 | 190 | bool occurs_in(type_t *type1, type_t *type2) 191 | { 192 | assert(type1 != NULL && type2 != NULL); 193 | 194 | unsigned i; 195 | 196 | for (i = 0; i < type2->types_num; i++) 197 | if (occurs_in_type(type1, type2->types[i])) 198 | return true; 199 | 200 | return false; 201 | } 202 | 203 | type_t *prune(type_t *type) 204 | { 205 | assert(type != NULL); 206 | 207 | if (is_type_variable(type)) 208 | if (type->instance){ 209 | type->instance = prune(type->instance); 210 | return type->instance; 211 | } 212 | 213 | return type; 214 | } 215 | 216 | void unify(type_t *type1, type_t *type2) 217 | { 218 | /*printf("entering unify.\n");*/ 219 | assert(type1 != NULL); 220 | assert(type2 != NULL); 221 | 222 | type_t *a = prune(type1); 223 | type_t *b = prune(type2); 224 | 225 | assert(a != NULL && b != NULL); 226 | 227 | if (is_type_variable(a)){ 228 | /*printf("a is typeVARiable\n");*/ 229 | if (!types_are_equal(a, b)){ 230 | if (occurs_in_type(a, b)){ 231 | printf("recursive unification"); 232 | longjmp(infer_jmp_buf, 1); 233 | } else { 234 | /*print_type(a);*/ 235 | /*printf("->instance (%d) = ", a->idd);*/ 236 | /*print_type(b);*/ 237 | /*printf("\n");*/ 238 | 239 | a->instance = b; 240 | } 241 | } 242 | } else if (is_type_operator(a) && is_type_variable(b)){ 243 | /*printf("a is typeOPERator, b is typeVARiable\n");*/ 244 | unify(b, a); 245 | } else if (is_type_operator(a) && is_type_operator(b)){ 246 | /*printf("a is typeOPERator, b is typeOPERator\n");*/ 247 | /*print_type(a);*/ 248 | /*printf(" (%d) vs ", a->idd);*/ 249 | /*print_type(b);*/ 250 | /*printf(" (%d)\n", b->idd);*/ 251 | 252 | if ((a->ttype != b->ttype) || (a->types_num != b->types_num)){ 253 | printf("type mismatch: "); 254 | print_type(a); 255 | printf(" != "); 256 | print_type(b); 257 | longjmp(infer_jmp_buf, 1); 258 | } else { 259 | type_t *p, *q; 260 | unsigned i; 261 | 262 | for (i = 0; i < a->types_num; i++){ 263 | p = a->types[i]; 264 | q = b->types[i]; 265 | 266 | /*printf("unifying ");*/ 267 | /*print_type(p);*/ 268 | /*printf(" and ");*/ 269 | /*print_type(q);*/ 270 | /*printf("\n");*/ 271 | unify(p, q); 272 | } 273 | } 274 | } else { 275 | printf("couldn't unify the two types"); 276 | longjmp(infer_jmp_buf, 1); 277 | } 278 | } 279 | 280 | type_t *infer_type_internal(env_t *env, node_t *node, ng_t *non_generic) 281 | { 282 | assert(node); 283 | 284 | if (non_generic == NULL) 285 | non_generic = new_non_generic(); 286 | 287 | switch (node->type){ 288 | case INTEGER: 289 | return type_integer(); 290 | case STRING: 291 | return type_string(); 292 | case IDENT: { 293 | type_t *type = symbol_lookup(env, node->name, non_generic); 294 | 295 | if (!type){ 296 | printf("unknown symbol '%s'", node->name); 297 | longjmp(infer_jmp_buf, 1); 298 | } 299 | 300 | return type; 301 | } 302 | case LAMBDA: { 303 | type_t *arg_type = type_variable(); 304 | type_t *result_type; 305 | env_t *new_env = copy_env(env); 306 | ng_t *new_non_generic = copy_non_generic(non_generic); 307 | 308 | #if DEBUG 309 | printf("created a copy (%p) of non-generic (%p)\n", (void*)new_non_generic, (void*)non_generic); 310 | #endif 311 | new_symbol(new_env, node->name, arg_type); 312 | add_to_non_generic(new_non_generic, arg_type); 313 | 314 | result_type = infer_type_internal(new_env, node->body, new_non_generic); 315 | 316 | return type_function(arg_type, result_type); 317 | } 318 | case APPLY: { 319 | type_t *fun_type = infer_type_internal(env, node->fn, non_generic); 320 | type_t *arg_type = infer_type_internal(env, node->arg, non_generic); 321 | type_t *result_type = type_variable(); 322 | 323 | unify(type_function(arg_type, result_type), fun_type); 324 | 325 | return result_type; 326 | } 327 | case LET: { 328 | type_t *def_type = infer_type_internal(env, node->def, non_generic); 329 | env_t *new_env = copy_env(env); 330 | 331 | new_symbol(new_env, node->name, def_type); 332 | 333 | return infer_type_internal(new_env, node->body, non_generic); 334 | } 335 | case LETREC: { 336 | type_t *new_type = type_variable(); 337 | type_t *def_type; 338 | env_t *new_env = copy_env(env); 339 | ng_t *new_non_generic = copy_non_generic(non_generic); 340 | 341 | new_symbol(new_env, node->name, new_type); 342 | add_to_non_generic(new_non_generic, new_type); 343 | 344 | def_type = infer_type_internal(new_env, node->def, non_generic); 345 | 346 | unify(new_type, def_type); 347 | 348 | return infer_type_internal(new_env, node->body, non_generic); 349 | } 350 | default: 351 | fprintf(stderr, "#unknown#infer_type_internal#\n"); 352 | exit(1); 353 | } 354 | } 355 | 356 | type_t *infer_type(env_t *env, node_t *node) 357 | { 358 | if (!setjmp(infer_jmp_buf)) 359 | return infer_type_internal(env, node, NULL); 360 | else 361 | return NULL; 362 | } 363 | 364 | /* 365 | * vi: ft=c:ts=2:sw=2:expandtab 366 | */ 367 | 368 | -------------------------------------------------------------------------------- /infer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * infer.h 4 | * 5 | * Created at: Mon Aug 24 14:47:44 2015 14:47:44 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #ifndef INFER_H 14 | #define INFER_H 15 | 16 | #include 17 | 18 | #include "node.h" 19 | #include "type.h" 20 | #include "env.h" 21 | 22 | typedef struct { 23 | type_t *from; 24 | type_t *to; 25 | } mapping_t; 26 | 27 | type_t *infer_type_internal(env_t *env, node_t *node, ng_t *nongen); 28 | type_t *infer_type(env_t *env, node_t *node); 29 | type_t *fresh(type_t *type, ng_t *non_generic); 30 | bool types_are_equal(type_t *one, type_t *two); 31 | bool occurs_in_type(type_t *v, type_t *type2); 32 | bool occurs_in(type_t *v, type_t *type2); 33 | type_t *prune(type_t *type); 34 | void unify(type_t *one, type_t *two); 35 | 36 | #endif /* INFER_H */ 37 | 38 | /* 39 | * vi: ft=c:ts=2:sw=2:expandtab 40 | */ 41 | 42 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * main.c 4 | * 5 | * Created at: Tue Aug 11 12:31:14 2015 12:31:14 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #include 14 | #include 15 | 16 | #include "env.h" 17 | #include "infer.h" 18 | #include "node.h" 19 | #include "type.h" 20 | 21 | int main(void) 22 | { 23 | env_t *env = new_env(); 24 | 25 | type_t *t_int = type_integer(); 26 | type_t *t_bool = type_boolean(); 27 | type_t *t_str = type_string(); 28 | 29 | type_t *var1 = type_variable(); 30 | type_t *var2 = type_variable(); 31 | type_t *var3 = type_variable(); 32 | 33 | new_symbol(env, "true", t_bool); 34 | new_symbol(env, "zero", type_function(t_int, t_bool)); 35 | new_symbol(env, "pred", type_function(t_int, t_int)); 36 | new_symbol(env, "pair", type_function(var1, type_function(var2, type_pair(var1, var2)))); 37 | new_symbol(env, "cond", type_function(t_bool, type_function(var3, type_function(var3, var3)))), 38 | 39 | new_symbol(env, "+", type_function(t_int, type_function(t_int, t_int))); 40 | new_symbol(env, "*", type_function(t_int, type_function(t_int, t_int))); 41 | new_symbol(env, ">", type_function(t_int, type_function(t_int, t_bool))); 42 | new_symbol(env, "<", type_function(t_int, type_function(t_int, t_bool))); 43 | 44 | new_symbol(env, "strlen", type_function(t_str, t_int)); 45 | 46 | node_t *pair = apply(apply(ident("pair"), apply(ident("f"), integer(4))), apply(ident("f"), ident("true"))); 47 | 48 | node_t *examples[] = { 49 | /* basically 2 + 2 */ 50 | binop("+", integer(2), integer(3)), 51 | /* (2 + 2) * 2 */ 52 | binop("*", integer(2), binop("+", integer(2), integer(2))), 53 | /* 2 > 3 */ 54 | binop(">", integer(2), integer(3)), 55 | /* should fail */ 56 | /* 5 * (7 < 11) */ 57 | binop("*", integer(5), binop("<", integer(7), integer(11))), 58 | /* let bar(x, y) = foo(x) + y in bar("fubar") */ 59 | let("bar", lambda("x", lambda("y", binop("+", apply(ident("strlen"), ident("x")), ident("y")))), apply(ident("bar"), string("fubar"))), 60 | /* pair(11, "13") */ 61 | call(ident("pair"), integer(11), string("13"), NULL), 62 | /* cond(true, "yes", "no") */ 63 | call(ident("cond"), ident("true"), string("yes"), string("no"), NULL), 64 | /* let sum = fun x y -> x + y in sum(2, 3) */ 65 | let("sum", func(binop("+", ident("x"), ident("y")), "x", "y", NULL), call(ident("sum"), integer(2), integer(3), NULL)), 66 | /* factorial */ 67 | letrec("factorial", /* letrec factorial = */ 68 | lambda("n", /* fn n => */ 69 | apply( 70 | apply( /* cond (zero n) 1 */ 71 | apply(ident("cond"), /* cond (zero n) */ 72 | apply(ident("zero"), ident("n"))), 73 | integer(1)), 74 | apply( /* times n */ 75 | apply(ident("*"), ident("n")), 76 | apply(ident("factorial"), 77 | apply(ident("pred"), ident("n"))) 78 | ) 79 | ) 80 | ), /* in */ 81 | apply(ident("factorial"), integer(5)) 82 | ), 83 | 84 | /* Should fail: */ 85 | /* fn x => (pair(x(3) (x(true))) */ 86 | lambda("x", 87 | apply( 88 | apply(ident("pair"), 89 | apply(ident("x"), integer(3))), 90 | apply(ident("x"), ident("true")))), 91 | 92 | /* pair(f(3), f(true)) */ 93 | apply( 94 | apply(ident("pair"), apply(ident("f"), integer(4))), 95 | apply(ident("f"), ident("true"))), 96 | 97 | 98 | /* let f = (fn x => x) in ((pair (f 4)) (f true)) */ 99 | let("f", lambda("x", ident("x")), pair), 100 | 101 | /* fn f => f f (fail) */ 102 | lambda("f", apply(ident("f"), ident("f"))), 103 | 104 | /* let g = fn f => 5 in g g */ 105 | let("g", 106 | lambda("f", integer(5)), 107 | apply(ident("g"), ident("g"))), 108 | 109 | /* example that demonstrates generic and non-generic variables: */ 110 | /* fn g => let f = fn x => g in pair (f 3, f true) */ 111 | lambda("g", 112 | let("f", 113 | lambda("x", ident("g")), 114 | apply( 115 | apply(ident("pair"), 116 | apply(ident("f"), integer(3)) 117 | ), 118 | apply(ident("f"), ident("true"))))), 119 | 120 | /* Function composition */ 121 | /* fn f (fn g (fn arg (f g arg))) */ 122 | lambda("f", lambda("g", lambda("arg", apply(ident("g"), apply(ident("f"), ident("arg")))))) 123 | }; 124 | 125 | unsigned i; 126 | node_t *node; 127 | type_t *inferred_type; 128 | 129 | for (i = 0; i < sizeof(examples) / sizeof(*examples); i++){ 130 | node = examples[i]; 131 | 132 | print_node(node); 133 | printf(" : "); 134 | 135 | if ((inferred_type = infer_type(env, node)) != NULL) 136 | print_type(inferred_type); 137 | 138 | printf("\n"); 139 | } 140 | 141 | return 0; 142 | } 143 | 144 | /* 145 | * vi: ft=c:ts=2:sw=2:expandtab 146 | */ 147 | 148 | -------------------------------------------------------------------------------- /mem.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * mem.h 4 | * 5 | * Created at: Mon Aug 24 13:46:07 2015 13:46:07 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #ifndef MEM_H 14 | #define MEM_H 15 | 16 | #include 17 | #include 18 | 19 | static inline void *alloc_(size_t sz) 20 | { 21 | void *allocated_memory = malloc(sz); 22 | 23 | assert(allocated_memory != NULL); 24 | 25 | return allocated_memory; 26 | } 27 | 28 | static inline void *allocz_(size_t sz) 29 | { 30 | void *allocated_memory = calloc(1, sz); 31 | 32 | assert(allocated_memory != NULL); 33 | 34 | return allocated_memory; 35 | } 36 | 37 | #define alloc(t) alloc_(sizeof(t)) 38 | #define allocz(t) allocz_(sizeof(t)) 39 | 40 | #endif /* MEM_H */ 41 | 42 | /* 43 | * vi: ft=c:ts=2:sw=2:expandtab 44 | */ 45 | 46 | -------------------------------------------------------------------------------- /node.c: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * node.c 4 | * 5 | * Created at: Tue Aug 11 12:20:53 2015 12:20:53 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "node.h" 18 | #include "mem.h" 19 | 20 | node_t *make_node(enum node_type type) 21 | { 22 | node_t *node = alloc(node_t); 23 | 24 | node->type = type; 25 | 26 | return node; 27 | } 28 | 29 | node_t *integer(int value) 30 | { 31 | node_t *node = make_node(INTEGER); 32 | 33 | node->value.integer = value; 34 | 35 | return node; 36 | } 37 | 38 | node_t *string(const char *value) 39 | { 40 | node_t *node = make_node(STRING); 41 | 42 | node->value.string = value; 43 | 44 | return node; 45 | } 46 | 47 | node_t *lambda(const char *param, node_t *body) 48 | { 49 | node_t *node = make_node(LAMBDA); 50 | 51 | node->name = param; 52 | node->body = body; 53 | 54 | return node; 55 | } 56 | 57 | node_t *func(node_t *body, ...) 58 | { 59 | node_t *node; 60 | va_list vl; 61 | const char *param; 62 | 63 | va_start(vl, body); 64 | 65 | while (1) 66 | if ((param = va_arg(vl, const char *)) == NULL) 67 | break; 68 | else { 69 | node = lambda(param, body); 70 | body = node; 71 | } 72 | 73 | va_end(vl); 74 | 75 | return node; 76 | } 77 | 78 | node_t *ident(const char *name) 79 | { 80 | node_t *node = make_node(IDENT); 81 | 82 | node->name = name; 83 | 84 | return node; 85 | } 86 | 87 | node_t *let(const char *name, node_t *def, node_t *body) 88 | { 89 | node_t *node = make_node(LET); 90 | 91 | node->name = name; 92 | node->def = def; 93 | node->body = body; 94 | 95 | return node; 96 | } 97 | 98 | node_t *letrec(const char *name, node_t *def, node_t *body) 99 | { 100 | node_t *node = make_node(LETREC); 101 | 102 | node->name = name; 103 | node->def = def; 104 | node->body = body; 105 | 106 | return node; 107 | } 108 | 109 | node_t *apply(node_t *fn, node_t *arg) 110 | { 111 | node_t *node = make_node(APPLY); 112 | 113 | node->fn = fn; 114 | node->arg = arg; 115 | 116 | return node; 117 | } 118 | 119 | node_t *call(node_t *fn, node_t *arg, ...) 120 | { 121 | node_t *node = apply(fn, arg), *other_arg; 122 | va_list vl; 123 | 124 | va_start(vl, arg); 125 | 126 | while (1) 127 | if ((other_arg = va_arg(vl, node_t *)) == NULL) 128 | break; 129 | else 130 | node = apply(node, other_arg); 131 | 132 | va_end(vl); 133 | 134 | return node; 135 | } 136 | 137 | node_t *binop(const char *op, node_t *lhs, node_t *rhs) 138 | { 139 | return apply(apply(ident(op), lhs), rhs); 140 | } 141 | 142 | void print_node(node_t *node) 143 | { 144 | switch (node->type){ 145 | case INTEGER: 146 | printf("%d", node->value.integer); 147 | break; 148 | case STRING: 149 | printf("\"%s\"", node->value.string); 150 | break; 151 | case LAMBDA: 152 | printf("(fun %s -> ", node->name); 153 | print_node(node->body); 154 | printf(")"); 155 | break; 156 | case IDENT: 157 | printf("%s", node->name); 158 | break; 159 | case APPLY: 160 | printf("("); 161 | print_node(node->fn); 162 | printf(" "); 163 | print_node(node->arg); 164 | printf(")"); 165 | break; 166 | case LET: 167 | printf("(let %s = ", node->name); 168 | print_node(node->def); 169 | printf(" in "); 170 | print_node(node->body); 171 | printf(")"); 172 | break; 173 | case LETREC: 174 | printf("(letrec %s = ", node->name); 175 | print_node(node->def); 176 | printf(" in "); 177 | print_node(node->body); 178 | printf(")"); 179 | break; 180 | 181 | default: 182 | printf("#unknown_node_type#print_node#"); 183 | break; 184 | } 185 | } 186 | 187 | /* 188 | * vi: ft=c:ts=2:sw=2:expandtab 189 | */ 190 | 191 | -------------------------------------------------------------------------------- /node.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * node.h 4 | * 5 | * Created at: Tue Aug 11 12:28:23 2015 12:28:23 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #ifndef NODE_H 14 | #define NODE_H 15 | 16 | #include 17 | 18 | enum node_type { 19 | INTEGER, 20 | STRING, 21 | 22 | LAMBDA, 23 | IDENT, 24 | APPLY, 25 | LET, 26 | LETREC 27 | }; 28 | 29 | typedef struct _node { 30 | enum node_type type; 31 | /* we really don't care about space efficency */ 32 | /* common */ 33 | const char *name; 34 | struct _node *body; 35 | /* literals */ 36 | struct { 37 | int integer; 38 | const char *string; 39 | } value; 40 | /* lambda */ 41 | /* name, body */ 42 | /* ident */ 43 | /* name */ 44 | /* apply */ 45 | struct _node *fn; 46 | struct _node *arg; 47 | /* let / letrec */ 48 | struct _node *def; 49 | /* name, body */ 50 | } node_t; 51 | 52 | node_t *integer(int value); 53 | node_t *string(const char *value); 54 | 55 | node_t *lambda(const char *param, node_t *body); 56 | node_t *func(node_t *body, ...); 57 | node_t *ident(const char *name); 58 | node_t *let (const char *param, node_t *def, node_t *body); 59 | node_t *letrec(const char *param, node_t *def, node_t *body); 60 | node_t *apply(node_t *fn, node_t *arg); 61 | node_t *call(node_t *fn, node_t *arg, ...); 62 | node_t *binop(const char *op, node_t *lhs, node_t *rhs); 63 | 64 | void print_node(node_t *node); 65 | 66 | #endif /* NODE_H */ 67 | 68 | /* 69 | * vi: ft=c:ts=2:sw=2:expandtab 70 | */ 71 | 72 | -------------------------------------------------------------------------------- /type.c: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * type.c 4 | * 5 | * Created at: Tue Aug 11 13:10:40 2015 13:10:40 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #include 14 | #include 15 | 16 | #include "mem.h" 17 | #include "type.h" 18 | 19 | static size_t current_id = 0; 20 | static char current_ch = 'a'; 21 | 22 | static int curr_var_idd = 16; 23 | static int curr_oper_idd = 256; 24 | 25 | type_t *type_variable_(const char *file, unsigned line) 26 | { 27 | type_t *type = alloc(type_t); 28 | 29 | #if DEBUG 30 | printf("new type variable (%d) in %s line %u\n", curr_var_idd, file, line); 31 | #else 32 | /* supress warnings */ 33 | (void)file; 34 | (void)line; 35 | #endif 36 | 37 | type->ttype = TYPE_VARIABLE; 38 | type->id = current_id++; 39 | type->ch = 0; 40 | type->instance = NULL; 41 | type->idd = curr_var_idd++; /* debug */ 42 | 43 | return type; 44 | } 45 | 46 | type_t *type_operator_0_(const char *file, unsigned line, ttype_t ttype) 47 | { 48 | type_t *type = alloc(type_t); 49 | 50 | #if DEBUG 51 | printf("new type operator (%d) in %s line %u\n", curr_oper_idd, file, line); 52 | #else 53 | /* supress warnings */ 54 | (void)file; 55 | (void)line; 56 | #endif 57 | 58 | type->ttype = ttype; 59 | type->types[0] = NULL; 60 | type->types[1] = NULL; 61 | type->types_num = 0; 62 | type->idd = curr_oper_idd++; /* debug */ 63 | 64 | return type; 65 | } 66 | 67 | type_t *type_operator_1_(const char *file, unsigned line, ttype_t ttype, type_t *type_one) 68 | { 69 | type_t *type = alloc(type_t); 70 | 71 | #if DEBUG 72 | printf("new type operator (%d) in %s line %u\n", curr_oper_idd, file, line); 73 | #else 74 | /* supress warnings */ 75 | (void)file; 76 | (void)line; 77 | #endif 78 | 79 | type->ttype = ttype; 80 | type->types[0] = type_one; 81 | type->types[1] = NULL; 82 | type->types_num = 1; 83 | type->idd = curr_oper_idd++; /* debug */ 84 | 85 | return type; 86 | } 87 | 88 | type_t *type_operator_2_(const char *file, unsigned line, ttype_t ttype, type_t *type_one, type_t *type_two) 89 | { 90 | type_t *type = alloc(type_t); 91 | 92 | #if DEBUG 93 | printf("new type operator (%d) in %s line %u\n", curr_oper_idd, file, line); 94 | #else 95 | /* supress warnings */ 96 | (void)file; 97 | (void)line; 98 | #endif 99 | 100 | type->ttype = ttype; 101 | type->types[0] = type_one; 102 | type->types[1] = type_two; 103 | type->types_num = 2; 104 | type->idd = curr_oper_idd++; /* debug */ 105 | 106 | return type; 107 | } 108 | 109 | type_t *type_integer(void) 110 | { 111 | return type_operator_0(TYPE_INTEGER); 112 | } 113 | 114 | type_t *type_boolean(void) 115 | { 116 | return type_operator_0(TYPE_BOOLEAN); 117 | } 118 | 119 | type_t *type_string(void) 120 | { 121 | return type_operator_0(TYPE_STRING); 122 | } 123 | 124 | type_t *type_function(type_t *from, type_t *to) 125 | { 126 | return type_operator_2(TYPE_FUNCTION, from, to); 127 | } 128 | 129 | type_t *type_pair(type_t *fst, type_t *snd) 130 | { 131 | return type_operator_2(TYPE_PAIR, fst, snd); 132 | } 133 | 134 | bool is_type_variable(type_t *type) 135 | { 136 | assert(type); 137 | 138 | if (type->ttype == TYPE_VARIABLE) 139 | return true; 140 | 141 | return false; 142 | } 143 | 144 | bool is_type_operator(type_t *type) 145 | { 146 | assert(type); 147 | 148 | if (type->ttype == TYPE_VARIABLE) 149 | return false; 150 | 151 | return true; 152 | } 153 | 154 | void print_type(type_t *type) 155 | { 156 | switch (type->ttype){ 157 | case TYPE_VARIABLE: 158 | if (type->instance){ 159 | print_type(type->instance); 160 | } else { 161 | if (!type->ch){ 162 | printf("%c", type->ch = current_ch++); 163 | } else { 164 | printf("%c", type->ch); 165 | } 166 | } 167 | break; 168 | case TYPE_INTEGER: 169 | printf("int"); 170 | break; 171 | case TYPE_BOOLEAN: 172 | printf("bool"); 173 | break; 174 | case TYPE_STRING: 175 | printf("string"); 176 | break; 177 | case TYPE_FUNCTION: 178 | printf("("); 179 | print_type(type->types[0]); 180 | printf(" -> "); 181 | print_type(type->types[1]); 182 | printf(")"); 183 | break; 184 | case TYPE_PAIR: 185 | printf("("); 186 | print_type(type->types[0]); 187 | printf(" * "); 188 | print_type(type->types[1]); 189 | printf(")"); 190 | break; 191 | default: 192 | fprintf(stderr, "#unknown_type!#print_type#\n"); 193 | break; 194 | } 195 | } 196 | 197 | /* 198 | * vi: ft=c:ts=2:sw=2:expandtab 199 | */ 200 | 201 | -------------------------------------------------------------------------------- /type.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * type.h 4 | * 5 | * Created at: Tue Aug 11 13:13:50 2015 13:13:50 6 | * 7 | * Author: Szymon Urbaś 8 | * 9 | * License: please visit the LICENSE file for details. 10 | * 11 | */ 12 | 13 | #ifndef TYPE_H 14 | #define TYPE_H 15 | 16 | #include 17 | 18 | typedef enum { 19 | /* type variable */ 20 | TYPE_VARIABLE, 21 | /* type operator */ 22 | TYPE_INTEGER, 23 | TYPE_BOOLEAN, 24 | TYPE_STRING, 25 | TYPE_FUNCTION, 26 | TYPE_PAIR 27 | } ttype_t; 28 | 29 | typedef struct _type { 30 | int idd; /* debug */ 31 | /* for type_variable */ 32 | size_t id; /* unique id */ 33 | char ch; /* variable's name, assigned lazily */ 34 | struct _type *instance; 35 | /* for type_operator (and it's derivatives) */ 36 | ttype_t ttype; 37 | struct _type *types[2]; /* some types contain other types */ 38 | unsigned types_num; /* how many types in the `types` field */ 39 | } type_t; 40 | 41 | #define type_variable() type_variable_(__FILE__, __LINE__) 42 | type_t *type_variable_(const char *, unsigned); 43 | 44 | #define type_operator_0(t) type_operator_0_(__FILE__, __LINE__, t) 45 | #define type_operator_1(t,u) type_operator_1_(__FILE__, __LINE__, t, u) 46 | #define type_operator_2(t,u,v) type_operator_2_(__FILE__, __LINE__, t, u, v) 47 | type_t *type_operator_0_(const char *, unsigned, ttype_t); 48 | type_t *type_operator_1_(const char *, unsigned, ttype_t, type_t *type); 49 | type_t *type_operator_2_(const char *, unsigned, ttype_t, type_t *type_one, type_t *type_two); 50 | 51 | type_t *type_integer(void); 52 | type_t *type_boolean(void); 53 | type_t *type_string(void); 54 | 55 | type_t *type_function(type_t *in, type_t *out); 56 | type_t *type_pair(type_t *fst, type_t *snd); 57 | 58 | bool is_type_variable(type_t *type); 59 | bool is_type_operator(type_t *type); 60 | 61 | void print_type(type_t *type); 62 | 63 | #endif /* TYPE_H */ 64 | 65 | /* 66 | * vi: ft=c:ts=2:sw=2:expandtab 67 | */ 68 | 69 | --------------------------------------------------------------------------------