├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── include ├── env.h ├── expr.h ├── infer.h ├── lexer.h ├── nongeneric.h ├── parser.h ├── token.h ├── type.h └── util.h └── src ├── env.c ├── expr.c ├── infer.c ├── lexer.c ├── main.c ├── nongeneric.c ├── parser.c ├── token.c ├── type.c └── util.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | infer 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 mkei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC := gcc 2 | CFLAGS=-Wall -Wextra -std=c11 -I ./include/ -g 3 | SRCROOT = . 4 | SRCDIRS := $(shell find $(SRCROOT) -type d) 5 | SRCS=$(foreach dir, $(SRCDIRS), $(wildcard $(dir)/*.c)) 6 | OBJS=$(SRCS:.c=.o) 7 | .PHONY: clean 8 | 9 | release: $(OBJS) 10 | $(CC) -o infer $(OBJS) $(LDFLAGS) 11 | 12 | clean: 13 | $(RM) src/*.o 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | An implementation of Hindley-Milner type inference in C. 2 | 3 | ## Example 4 | 5 | ``` 6 | 200 : int 7 | choko : unknown identifer `choko` 8 | + : int -> int -> int 9 | true : bool 10 | pair : a -> b -> (a * b) 11 | fst : (c * d) -> c 12 | snd : (e * f) -> f 13 | let x = 10 in x : int 14 | let y = false in y : bool 15 | let z = 200 in ((+ z) 210) : int 16 | λa -> λb -> ((> a) b) : int -> int -> bool 17 | let add = λa -> λb -> ((+ a) b) in ((add 100) 200) : int 18 | λx -> x : g -> g 19 | λn -> λm -> 300 : h -> i -> int 20 | let f = λx -> x in ((pair (f 200)) (f true)) : (int * bool) 21 | (fst ((pair false) 100000)) : bool 22 | (snd ((pair false) 100000)) : int 23 | let rec fibo = λn -> (((if ((< n) 2)) n) ((+ (fibo ((- n) 2))) (fibo ((- n) 1)))) in (fibo 30) : int 24 | ``` 25 | 26 | ## Reference 27 | - https://github.com/semahawk/type-inference 28 | - https://qiita.com/reki2000/items/b7f26e65930519295355 29 | - http://lucacardelli.name/Papers/BasicTypechecking.pdf 30 | - http://dysphoria.net/2009/06/28/hindley-milner-type-inference-in-scala/ 31 | -------------------------------------------------------------------------------- /include/env.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_ENV_H 2 | #define TI_ENV_H 3 | 4 | #include "type.h" 5 | #include "nongeneric.h" 6 | 7 | typedef struct TupleST TupleST; 8 | typedef struct Env Env; 9 | 10 | struct TupleST { 11 | char *key; 12 | Type *type; 13 | }; 14 | 15 | struct Env { 16 | TupleST list[128]; 17 | int cursor; 18 | }; 19 | 20 | Env *new_env(); 21 | Env *copy_env(Env *); 22 | void add_to_env(Env *, char *, Type *); 23 | Type *lookup(Env *, char *, NonGeneric *); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /include/expr.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_EXPR_H 2 | #define TI_EXPR_H 3 | 4 | enum ExprKind { 5 | INTEGER, 6 | VAR, 7 | LAMBDA, 8 | APPLY, 9 | LET, 10 | LETREC, 11 | }; 12 | 13 | typedef struct Expr Expr; 14 | 15 | struct Expr { 16 | enum ExprKind kind; 17 | 18 | union { 19 | /* Integer */ 20 | struct { 21 | int num; 22 | }; 23 | /* Var */ 24 | struct { 25 | char *name; 26 | }; 27 | /* Lambda */ 28 | struct { 29 | char *x; 30 | Expr *e; 31 | }; 32 | /* Apply */ 33 | struct { 34 | Expr *fn, 35 | *arg; 36 | }; 37 | /* Let */ 38 | struct { 39 | char *lname; 40 | Expr *ldef, 41 | *lbody; 42 | }; 43 | /* Letrec */ 44 | struct { 45 | char *recname; 46 | Expr *recdef, 47 | *recbody; 48 | }; 49 | }; 50 | }; 51 | 52 | 53 | Expr *integer(int); 54 | Expr *var(char *); 55 | Expr *lambda(char *, Expr *); 56 | Expr *apply(Expr *, Expr *); 57 | Expr *let(char *, Expr *, Expr *); 58 | Expr *letrec(char *, Expr *, Expr *); 59 | Expr *binary(Expr *, char *, Expr *); 60 | void exprdump(Expr *); 61 | 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /include/infer.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_INFER_H 2 | #define TI_INFER_H 3 | 4 | #include "env.h" 5 | #include "expr.h" 6 | #include "nongeneric.h" 7 | 8 | Type *prune(Type *); 9 | Type *analyze(Env *, Expr *, NonGeneric *); 10 | Type *fresh(Type *, NonGeneric *); 11 | 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /include/lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_LEXER_H 2 | #define TI_LEXER_H 3 | 4 | #include "util.h" 5 | 6 | Vector *lex(char *); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /include/nongeneric.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_NONGENERIC_H 2 | #define TI_NONGENERIC_H 3 | 4 | #include "type.h" 5 | 6 | typedef struct NonGeneric NonGeneric; 7 | 8 | struct NonGeneric { 9 | Type *list[128]; 10 | int cursor; 11 | }; 12 | 13 | NonGeneric *new_non_generic(void); 14 | void add_to_non_generic(NonGeneric *, Type *); 15 | NonGeneric *copy_non_generic(NonGeneric *); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /include/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_PARSER_H 2 | #define TI_PARSER_H 3 | 4 | #include "expr.h" 5 | #include "util.h" 6 | 7 | Expr *parse(Vector *); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /include/token.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_TOKEN_H 2 | #define TI_TOKEN_H 3 | 4 | typedef struct Token Token; 5 | 6 | enum TokenKind { 7 | TK_END, 8 | TK_NUMBER, 9 | TK_IDENT, 10 | /* keyword */ 11 | TK_LET, 12 | TK_REC, 13 | TK_FUN, 14 | TK_IN, 15 | /* Symbol */ 16 | TK_ASSIGN, // = 17 | TK_ARROW, // -> 18 | TK_LPAREN, // ( 19 | TK_RPAREN, // ) 20 | }; 21 | 22 | struct Token { 23 | enum TokenKind kind; 24 | /* TK_NUMBER */ 25 | int number; 26 | /* TK_IDENT */ 27 | char *name; 28 | }; 29 | 30 | Token *new_token(enum TokenKind); 31 | Token *new_number_token(int); 32 | Token *new_ident_token(char *); 33 | Token *token_end(); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /include/type.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_TYPE_H 2 | #define TI_TYPE_H 3 | 4 | #include 5 | 6 | enum TypeKind { 7 | /* Type Operator */ 8 | TINT, 9 | TBOOL, 10 | TFN, 11 | TPAIR, 12 | /* Type Variable */ 13 | TVAR, 14 | }; 15 | 16 | typedef struct Type Type; 17 | 18 | struct Type { 19 | enum TypeKind kind; 20 | int ntype; 21 | Type *types[2]; 22 | 23 | union { 24 | /* Function */ 25 | struct { 26 | Type *arg; 27 | Type *result; 28 | }; 29 | 30 | /* Pair */ 31 | struct { 32 | Type *fst; 33 | Type *snd; 34 | }; 35 | 36 | /* Type Variable */ 37 | struct { 38 | int id; 39 | char name; 40 | Type *instance; 41 | }; 42 | }; 43 | }; 44 | 45 | Type *type_int(void); 46 | Type *type_bool(void); 47 | Type *type_fn(Type *, Type *); 48 | Type *type_var(void); 49 | Type *type_pair(Type *, Type *); 50 | bool is_type_variable(Type *); 51 | bool is_type_operator(Type *); 52 | bool same_type(Type *, Type *); 53 | 54 | Type *type_operator0(enum TypeKind); 55 | Type *type_operator2(enum TypeKind, Type *, Type *); 56 | 57 | void typedump(Type *); 58 | void typedump_core(Type *); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /include/util.h: -------------------------------------------------------------------------------- 1 | #ifndef TI_UTIL_H 2 | #define TI_UTIL_H 3 | 4 | typedef struct Vector { 5 | void **data; 6 | int len; 7 | int reserved; 8 | } Vector; 9 | 10 | Vector *New_Vector(void); 11 | Vector *New_Vector_With_Size(int); 12 | void Delete_Vector(Vector *); 13 | void vec_push(Vector *self, void *d); 14 | void *vec_pop(Vector *self); 15 | void *vec_last(Vector *self); 16 | 17 | typedef struct Map { 18 | Vector *key; 19 | Vector *value; 20 | } Map; 21 | 22 | Map *New_Map(void); 23 | void map_push(Map *, void *, void *); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/env.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "env.h" 5 | #include "infer.h" 6 | 7 | Env *new_env() { 8 | Env *self = malloc(sizeof(Env)); 9 | 10 | self->cursor = 0; 11 | 12 | return self; 13 | } 14 | 15 | Env *copy_env(Env *src) { 16 | Env *dst = malloc(sizeof(Env)); 17 | 18 | *dst = *src; 19 | 20 | return dst; 21 | } 22 | 23 | void add_to_env(Env *self, char *sym, Type *type) { 24 | self->list[self->cursor].key = sym; 25 | self->list[self->cursor].type = type; 26 | self->cursor++; 27 | } 28 | 29 | Type *lookup(Env *self, char *key, NonGeneric *nongeneric) { 30 | for(int i = 0; i < self->cursor; i++) { 31 | if(strcmp(key, self->list[i].key) == 0) { 32 | return fresh(self->list[i].type, nongeneric); 33 | } 34 | } 35 | 36 | return NULL; 37 | } 38 | -------------------------------------------------------------------------------- /src/expr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "expr.h" 5 | 6 | Expr *integer(int n) { 7 | Expr *self = malloc(sizeof(Expr)); 8 | 9 | self->kind = INTEGER; 10 | self->num = n; 11 | 12 | return self; 13 | } 14 | 15 | Expr *var(char *name) { 16 | Expr *self = malloc(sizeof(Expr)); 17 | 18 | self->kind = VAR; 19 | self->name = name; 20 | 21 | return self; 22 | } 23 | 24 | Expr *lambda(char *x, Expr *e) { 25 | Expr *self = malloc(sizeof(Expr)); 26 | 27 | self->kind = LAMBDA; 28 | self->x = x; 29 | self->e = e; 30 | 31 | return self; 32 | } 33 | 34 | Expr *apply(Expr *f, Expr *e) { 35 | Expr *self = malloc(sizeof(Expr)); 36 | 37 | self->kind = APPLY; 38 | self->fn = f; 39 | self->arg = e; 40 | 41 | return self; 42 | } 43 | 44 | Expr *let(char *name, Expr *d, Expr *b) { 45 | Expr *self = malloc(sizeof(Expr)); 46 | 47 | self->kind = LET; 48 | self->lname = name; 49 | self->ldef = d; 50 | self->lbody = b; 51 | 52 | return self; 53 | } 54 | 55 | Expr *letrec(char *name, Expr *d, Expr *b) { 56 | Expr *self = malloc(sizeof(Expr)); 57 | 58 | self->kind = LETREC; 59 | self->recname = name; 60 | self->recdef = d; 61 | self->recbody = b; 62 | 63 | return self; 64 | } 65 | 66 | Expr *binary(Expr *left, char *op, Expr *right) { 67 | return apply(apply(var(op), left), right); 68 | } 69 | 70 | void exprdump(Expr *e) { 71 | switch(e->kind) { 72 | case INTEGER: 73 | printf("%d", e->num); 74 | break; 75 | case VAR: 76 | printf("%s", e->name); 77 | break; 78 | case LAMBDA: 79 | printf("λ%s -> ", e->x); 80 | exprdump(e->e); 81 | break; 82 | case APPLY: 83 | printf("("); 84 | exprdump(e->fn); 85 | printf(" "); 86 | exprdump(e->arg); 87 | printf(")"); 88 | break; 89 | case LET: 90 | printf("let %s = ", e->lname); 91 | exprdump(e->ldef); 92 | printf(" in "); 93 | exprdump(e->lbody); 94 | break; 95 | case LETREC: 96 | printf("let rec %s = ", e->recname); 97 | exprdump(e->recdef); 98 | printf(" in "); 99 | exprdump(e->recbody); 100 | break; 101 | default: 102 | break; 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/infer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "infer.h" 5 | #include "type.h" 6 | #include "nongeneric.h" 7 | #include "util.h" 8 | 9 | static bool is_generic(Type *, NonGeneric *); 10 | static bool occursin(Type *, Type *); 11 | static bool occursin_type(Type *, Type *); 12 | 13 | bool error_occurred = false; 14 | 15 | Type *prune(Type *ty) { 16 | if(ty == NULL) return NULL; 17 | 18 | if(is_type_variable(ty)) { 19 | if(ty->instance != NULL) { 20 | ty->instance = prune(ty->instance); 21 | return ty->instance; 22 | } 23 | } 24 | 25 | return ty; 26 | } 27 | 28 | /* 29 | * 型変数がgenericかどうかを判定する 30 | * 31 | * 型変数がnon-generic型変数のリストに出現するかを見る 32 | * 出現したらnon-genericなので -> false 33 | * しなかったらgenericなので -> true 34 | */ 35 | static bool is_generic(Type *tvar, NonGeneric *nongeneric) { 36 | for(int i = 0; i < nongeneric->cursor; i++) { 37 | if(occursin_type(tvar, nongeneric->list[i])) 38 | return false; 39 | } 40 | 41 | return true; 42 | } 43 | 44 | static bool occursin_type(Type *tvar, Type *texp) { 45 | texp = prune(texp); 46 | 47 | if(is_type_variable(texp)) { 48 | return same_type(tvar, texp); 49 | } 50 | else if(is_type_operator(texp)) { 51 | return occursin(tvar, texp); 52 | } 53 | else return false; 54 | } 55 | 56 | 57 | /* 58 | * 型(第2引数)の中に型変数(第1引数)が出現するかチェックする 59 | */ 60 | static bool occursin(Type *tyvar, Type *tope) { 61 | for(int i = 0; i < tope->ntype; i++) { 62 | if(occursin_type(tyvar, tope->types[i])) return true; 63 | } 64 | 65 | return false; 66 | } 67 | 68 | Type *type_map_exist(Map *self, Type *key) { 69 | for(int i = 0; i < self->key->len; i++) { 70 | if(same_type((Type *)self->key->data[i], key)) { 71 | return (Type *)self->value->data[i]; 72 | } 73 | } 74 | 75 | return NULL; 76 | } 77 | 78 | Type *type_get_or_put(Map *self, Type *key, Type *default_value) { 79 | Type *e = type_map_exist(self, key); 80 | 81 | if(e != NULL) { 82 | return e; 83 | } 84 | else { 85 | map_push(self, key, default_value); 86 | return default_value; 87 | } 88 | } 89 | 90 | /* 91 | * type_operatorとgeneric変数は複製 92 | * non-generic変数は共有 93 | */ 94 | Type *freshrec(Type *ty, NonGeneric *nongeneric, Map *mappings) { 95 | Type *pty = prune(ty); 96 | 97 | if(is_type_variable(pty)) { 98 | if(is_generic(pty, nongeneric)) { 99 | return type_get_or_put(mappings, pty, type_var()); 100 | } 101 | else return pty; 102 | } 103 | else if(is_type_operator(pty)) { 104 | switch(pty->ntype) { 105 | case 0: return type_operator0(pty->kind); 106 | case 2: return type_operator2( 107 | pty->kind, 108 | freshrec(pty->types[0], nongeneric, mappings), 109 | freshrec(pty->types[1], nongeneric, mappings) 110 | ); 111 | default: 112 | puts("????"); 113 | } 114 | } 115 | 116 | /* unreachable */ 117 | return NULL; 118 | } 119 | 120 | Type *fresh(Type *t, NonGeneric *nongeneric) { 121 | Map *mappings = New_Map(); 122 | 123 | return freshrec(t, nongeneric, mappings); 124 | } 125 | 126 | void unify(Type *t1, Type *t2) { 127 | t1 = prune(t1); 128 | t2 = prune(t2); 129 | 130 | printf("unifying..."); 131 | typedump_core(t1); 132 | printf(", "); 133 | typedump_core(t2); 134 | puts(""); 135 | 136 | if(is_type_variable(t1)) { 137 | if(!same_type(t1, t2)) { 138 | if(occursin_type(t1, t2)) { 139 | printf("recursive unification"); 140 | error_occurred = true; 141 | return; 142 | } 143 | t1->instance = t2; 144 | } 145 | } 146 | else if(is_type_operator(t1) && 147 | is_type_variable(t2)) { 148 | unify(t2, t1); 149 | } 150 | else if(is_type_operator(t1) && 151 | is_type_operator(t2)) { 152 | if(t1->kind != t2->kind || t1->ntype != t2->ntype) { 153 | printf("type error: "); 154 | typedump_core(t1); 155 | printf(", "); 156 | typedump_core(t2); 157 | puts(""); 158 | 159 | error_occurred = true; 160 | 161 | return; 162 | } 163 | 164 | for(int i = 0; i < t1->ntype; i++) { 165 | unify(t1->types[i], t2->types[i]); 166 | } 167 | } 168 | else { 169 | puts("cannot infer"); 170 | } 171 | } 172 | 173 | Type *analyze(Env *env, Expr *e, NonGeneric *nongeneric) { 174 | if(nongeneric == NULL) { 175 | nongeneric = new_non_generic(); 176 | } 177 | 178 | if(!e) return NULL; 179 | 180 | switch(e->kind) { 181 | case INTEGER: { 182 | Type *result = type_int(); 183 | 184 | printf("integer %d: ", e->num); 185 | typedump(result); 186 | 187 | return result; 188 | } 189 | case VAR: { 190 | Type *ty = lookup(env, e->name, nongeneric); 191 | 192 | if(ty == NULL) { 193 | printf("unknown identifer `%s`\n", e->name); 194 | return NULL; 195 | } 196 | 197 | printf("var %s: ", e->name); 198 | typedump(ty); 199 | 200 | return ty; 201 | } 202 | case LAMBDA: { 203 | Type *argty = type_var(); 204 | 205 | Env *copied_env = copy_env(env); 206 | add_to_env(copied_env, e->x, argty); 207 | 208 | NonGeneric *copied_ng = copy_non_generic(nongeneric); 209 | add_to_non_generic(copied_ng, argty); 210 | 211 | Type *ret = analyze(copied_env, e->e, copied_ng); 212 | 213 | Type *result = type_fn(argty, ret); 214 | 215 | printf("lambda" ); 216 | exprdump(e); 217 | printf(": "); 218 | typedump(result); 219 | 220 | return result; 221 | } 222 | case APPLY: { 223 | Type *fn = analyze(env, e->fn, nongeneric); 224 | Type *arg = analyze(env, e->arg, nongeneric); 225 | Type *res = type_var(); 226 | 227 | unify(fn, type_fn(arg, res)); 228 | 229 | printf("apply "); 230 | exprdump(e); 231 | printf(": "); 232 | typedump(res); 233 | 234 | return res; 235 | } 236 | case LET: { 237 | Type *def = analyze(env, e->ldef, nongeneric); 238 | 239 | Env *new = copy_env(env); 240 | add_to_env(new, e->lname, def); 241 | 242 | Type *result = analyze(new, e->lbody, nongeneric); 243 | printf("let %s: ", e->lname); 244 | typedump(result); 245 | 246 | return result; 247 | } 248 | case LETREC: { 249 | Type *new = type_var(); 250 | 251 | Env *new_env = copy_env(env); 252 | NonGeneric *new_nongeneric = copy_non_generic(nongeneric); 253 | 254 | add_to_env(new_env, e->recname, new); 255 | add_to_non_generic(new_nongeneric, new); 256 | 257 | Type *def = analyze(new_env, e->recdef, new_nongeneric); 258 | 259 | unify(new, def); 260 | 261 | Type *result = analyze(new_env, e->recbody, new_nongeneric); 262 | printf("letrec %s: ", e->recname); 263 | typedump(result); 264 | 265 | return result; 266 | } 267 | default: 268 | printf("internal error"); 269 | } 270 | 271 | return NULL; 272 | } 273 | -------------------------------------------------------------------------------- /src/lexer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "lexer.h" 7 | #include "token.h" 8 | 9 | static void scan(Vector *, char *); 10 | 11 | Vector *lex(char *src) { 12 | Vector *tokens = New_Vector(); 13 | 14 | scan(tokens, src); 15 | 16 | return tokens; 17 | } 18 | 19 | static char *number(Vector *token, char *n) { 20 | int num = 0; 21 | 22 | while(isdigit(*n)) { 23 | num = num * 10 + *n - '0'; 24 | n++; 25 | } 26 | 27 | vec_push(token, new_number_token(num)); 28 | 29 | return n; 30 | } 31 | 32 | static char *ident(Vector *token, char *i) { 33 | int len = 1; 34 | 35 | while(!isblank(i[len]) && !strchr("()", i[len])) { 36 | len++; 37 | } 38 | 39 | char *name = malloc(sizeof(char) * (len + 1)); 40 | strncpy(name, i, len); 41 | 42 | vec_push(token, new_ident_token(name)); 43 | 44 | return i + len; 45 | } 46 | 47 | static void scan(Vector *token, char *src) { 48 | while(*src) { 49 | if(isdigit(*src)) { 50 | src = number(token, src); 51 | continue; 52 | } 53 | else if(isblank(*src)) { 54 | src++; 55 | continue; 56 | } 57 | else if(strchr("()", *src)) { 58 | char *name = malloc(sizeof(char) * 1); 59 | strncpy(name, src, 1); 60 | vec_push(token, new_ident_token(name)); 61 | src++; 62 | continue; 63 | } 64 | else { // var 65 | src = ident(token, src); 66 | continue; 67 | } 68 | } 69 | 70 | vec_push(token, token_end()); 71 | vec_push(token, NULL); 72 | } 73 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "type.h" 5 | #include "expr.h" 6 | #include "env.h" 7 | #include "infer.h" 8 | #include "util.h" 9 | #include "lexer.h" 10 | #include "parser.h" 11 | 12 | Type *Int; 13 | Type *Bool; 14 | extern char unique_name; 15 | extern bool error_occurred; 16 | 17 | void init(Env *env) { 18 | Int = type_int(); 19 | Bool = type_bool(); 20 | 21 | Type *var1 = type_var(); 22 | Type *var2 = type_var(); 23 | 24 | add_to_env(env, "true", Bool); 25 | add_to_env(env, "false", Bool); 26 | add_to_env(env, "+", type_fn(Int, type_fn(Int, Int))); 27 | add_to_env(env, "-", type_fn(Int, type_fn(Int, Int))); 28 | add_to_env(env, "*", type_fn(Int, type_fn(Int, Int))); 29 | add_to_env(env, ">", type_fn(Int, type_fn(Int, Bool))); 30 | add_to_env(env, "<", type_fn(Int, type_fn(Int, Bool))); 31 | 32 | add_to_env(env, "pair", type_fn( 33 | var1, 34 | type_fn(var2, type_pair(var1, var2)) 35 | ) 36 | ); 37 | 38 | Type *var3 = type_var(); 39 | Type *var4 = type_var(); 40 | add_to_env(env, "fst", type_fn(type_pair(var3, var4), var3)); 41 | Type *var5 = type_var(); 42 | Type *var6 = type_var(); 43 | add_to_env(env, "snd", type_fn(type_pair(var5, var6), var6)); 44 | 45 | Type *var7 = type_var(); 46 | add_to_env(env, "if", type_fn(Bool, type_fn(var7, type_fn(var7, var7)))); 47 | } 48 | 49 | int main(void) { 50 | Env *env = new_env(); 51 | 52 | init(env); 53 | 54 | /* 55 | Expr *els[] = { 56 | integer(200), 57 | var("choko"), // error 58 | var("+"), 59 | var("true"), 60 | var("pair"), 61 | var("fst"), 62 | var("snd"), 63 | let("x", integer(10), var("x")), 64 | let("y", var("false"), var("y")), 65 | let("z", integer(200), binary(var("z"), "+", integer(210))), 66 | lambda( 67 | "a", 68 | lambda( 69 | "b", 70 | binary(var("a"), ">", var("b")) 71 | ) 72 | ), 73 | let( 74 | "add", 75 | lambda( 76 | "a", 77 | lambda( 78 | "b", 79 | binary(var("a"), "+", var("b")) 80 | ) 81 | ), 82 | apply( 83 | apply( 84 | var("add"), 85 | integer(100) 86 | ), 87 | integer(200) 88 | ) 89 | ), 90 | lambda("x", var("x")), 91 | lambda("n", lambda("m", integer(300))), 92 | let( 93 | "f", 94 | lambda("x", var("x")), 95 | apply( 96 | apply( 97 | var("pair"), 98 | apply(var("f"), integer(200)) 99 | ), 100 | apply(var("f"), var("true")) 101 | ) 102 | ), 103 | apply( 104 | var("fst"), 105 | apply( 106 | apply(var("pair"), var("false")), 107 | integer(100000) 108 | ) 109 | ), 110 | apply( 111 | var("snd"), 112 | apply( 113 | apply(var("pair"), var("false")), 114 | integer(100000) 115 | ) 116 | ), 117 | letrec( // let rec fibo n = if n < 2 then n else fibo(n - 2) + fibo(n - 1) 118 | "fibo", 119 | lambda( 120 | "n", 121 | apply( 122 | apply( 123 | apply( 124 | var("if"), 125 | binary(var("n"), "<", integer(2)) 126 | ), 127 | var("n") 128 | ), 129 | binary( 130 | apply( 131 | var("fibo"), 132 | binary(var("n"), "-", integer(2)) 133 | ), 134 | "+", 135 | apply( 136 | var("fibo"), 137 | binary(var("n"), "-", integer(1)) 138 | ) 139 | ) 140 | ) 141 | ), 142 | var("fibo") 143 | ), 144 | }; 145 | 146 | int nels = sizeof(els) / sizeof(els[0]); 147 | 148 | for(int i = 0; i < nels; i++) { 149 | exprdump(els[i]); 150 | printf(" : "); 151 | printf("\e[1m"); 152 | typedump(prune(analyze(env, els[i], NULL))); 153 | printf("\e[0m"); 154 | } */ 155 | 156 | char src[256] = {0}; 157 | int cursor; 158 | char c; 159 | 160 | for(;;) { 161 | printf(">> "); 162 | memset(src, 0, 256); 163 | cursor = 0; 164 | 165 | while((c = getchar()) != '\n') { 166 | if(c == EOF) return 0; 167 | 168 | src[cursor++] = c; 169 | } 170 | 171 | Type *ret = analyze(env, parse(lex(src)), NULL); 172 | if(!error_occurred) { 173 | typedump(ret); 174 | } 175 | else { 176 | error_occurred = false; 177 | } 178 | 179 | unique_name = 'a'; 180 | } 181 | 182 | return 0; 183 | } 184 | -------------------------------------------------------------------------------- /src/nongeneric.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nongeneric.h" 4 | 5 | NonGeneric *new_non_generic() { 6 | NonGeneric *self = malloc(sizeof(NonGeneric)); 7 | 8 | self->cursor = 0; 9 | 10 | return self; 11 | } 12 | 13 | void add_to_non_generic(NonGeneric *self, Type *s) { 14 | self->list[self->cursor++] = s; 15 | } 16 | 17 | NonGeneric *copy_non_generic(NonGeneric *src) { 18 | NonGeneric *dst = malloc(sizeof(NonGeneric)); 19 | 20 | *dst = *src; 21 | 22 | return dst; 23 | } 24 | -------------------------------------------------------------------------------- /src/parser.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "parser.h" 5 | #include "token.h" 6 | #include "expr.h" 7 | 8 | #define Step() (++pos) 9 | #define Cur_Token() ((Token *)tokens->data[pos]) 10 | #define Get_Step_Token() ((Token *)tokens->data[pos++]) 11 | #define Cur_Token_Is(tk) ((Cur_Token()->kind) == (tk)) 12 | 13 | static Expr *enter(void); 14 | static Expr *expr(void); 15 | static Expr *expr_primary(void); 16 | 17 | static void unexpect_token(void); 18 | static bool expect(enum TokenKind); 19 | 20 | 21 | static int pos; 22 | static Vector *tokens; 23 | 24 | Expr *parse(Vector *_token) { 25 | pos = 0; 26 | tokens = _token; 27 | 28 | return enter(); 29 | } 30 | 31 | static Expr *enter() { 32 | if(tokens->len == 0) return NULL; 33 | 34 | Expr *st = expr(); 35 | 36 | return st; 37 | } 38 | 39 | static bool expect(enum TokenKind tk) { 40 | if(Cur_Token()->kind == tk) { 41 | ++pos; 42 | return true; 43 | } 44 | else { 45 | unexpect_token(); 46 | return false; 47 | } 48 | } 49 | 50 | static Expr *make_let() { 51 | // let a = expr in expr 52 | Step(); 53 | 54 | bool is_rec = false; 55 | 56 | if(Cur_Token_Is(TK_REC)) { 57 | is_rec = true; 58 | Step(); 59 | } 60 | 61 | char *name = Cur_Token()->name; 62 | expect(TK_IDENT); 63 | 64 | expect(TK_ASSIGN); 65 | 66 | Expr *def = expr(); 67 | 68 | expect(TK_IN); 69 | 70 | Expr *body = expr(); 71 | 72 | return is_rec ? letrec(name, def, body) 73 | : let(name, def, body); 74 | } 75 | 76 | static Expr *make_lambda() { 77 | Step(); 78 | 79 | char *name = Get_Step_Token()->name; 80 | 81 | expect(TK_ARROW); 82 | 83 | Expr *e = expr(); 84 | 85 | return lambda(name, e); 86 | } 87 | 88 | static Expr *make_apply() { 89 | Step(); 90 | 91 | Expr *f = expr(); 92 | 93 | Expr *e = expr(); 94 | 95 | expect(TK_RPAREN); 96 | 97 | return apply(f, e); 98 | } 99 | 100 | static Expr *make_identifer() { 101 | char *name = Cur_Token()->name; 102 | 103 | Step(); 104 | 105 | return var(name); 106 | } 107 | 108 | static Expr *expr() { 109 | return expr_primary(); 110 | } 111 | 112 | static Expr *expr_primary() { 113 | if(Cur_Token_Is(TK_LET)) { 114 | return make_let(); 115 | } 116 | else if(Cur_Token_Is(TK_FUN)) { 117 | return make_lambda(); 118 | } 119 | else if(Cur_Token_Is(TK_IDENT)) { 120 | return make_identifer(); 121 | } 122 | else if(Cur_Token_Is(TK_NUMBER)) { 123 | Expr *i = integer(Cur_Token()->number); 124 | Step(); 125 | return i; 126 | } 127 | else if(Cur_Token_Is(TK_LPAREN)) { 128 | return make_apply(); 129 | } 130 | else if(Cur_Token_Is(TK_END)) { 131 | return NULL; 132 | } 133 | 134 | unexpect_token(); 135 | 136 | return NULL; 137 | } 138 | 139 | static void unexpect_token() { 140 | fprintf(stderr, "parse error: "); 141 | fprintf(stderr, "unexpected token: `%s`\n", Cur_Token()->name); 142 | Step(); 143 | } 144 | -------------------------------------------------------------------------------- /src/token.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "token.h" 6 | 7 | Token *new_token(enum TokenKind k) { 8 | Token *tk = malloc(sizeof(Token)); 9 | 10 | tk->kind = k; 11 | 12 | return tk; 13 | } 14 | Token *new_number_token(int n) { 15 | Token *tk = new_token(TK_NUMBER); 16 | 17 | tk->number = n; 18 | 19 | return tk; 20 | } 21 | Token *new_ident_token(char *i) { 22 | enum TokenKind k = TK_IDENT; 23 | 24 | if(strcmp(i, "fun") == 0) { 25 | k = TK_FUN; 26 | } 27 | else if(strcmp(i, "in") == 0) { 28 | k = TK_IN; 29 | } 30 | else if(strcmp(i, "let") == 0) { 31 | k = TK_LET; 32 | } 33 | else if(strcmp(i, "rec") == 0) { 34 | k = TK_REC; 35 | } 36 | else if(strcmp(i, "=") == 0) { 37 | k = TK_ASSIGN; 38 | } 39 | else if(strcmp(i, "->") == 0) { 40 | k = TK_ARROW; 41 | } 42 | else if(strcmp(i, "(") == 0) { 43 | k = TK_LPAREN; 44 | } 45 | else if(strcmp(i, ")") == 0) { 46 | k = TK_RPAREN; 47 | } 48 | 49 | Token *tk = new_token(k); 50 | 51 | tk->name = i; 52 | 53 | return tk; 54 | } 55 | 56 | Token *token_end() { 57 | Token *tk = new_token(TK_END); 58 | 59 | return tk; 60 | } 61 | -------------------------------------------------------------------------------- /src/type.c: -------------------------------------------------------------------------------- 1 | #include "type.h" 2 | #include "infer.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | char unique_name = 'a'; 10 | int unique_id = 0; 11 | 12 | Type *type_operator0(enum TypeKind k) { 13 | Type *self = malloc(sizeof(Type)); 14 | 15 | self->kind = k; 16 | self->ntype = 0; 17 | self->types[0] = NULL; 18 | self->types[1] = NULL; 19 | 20 | return self; 21 | } 22 | 23 | Type *type_operator2(enum TypeKind k, Type *a1, Type *a2) { 24 | Type *self = malloc(sizeof(Type)); 25 | 26 | self->kind = k; 27 | self->ntype = 2; 28 | self->types[0] = a1; 29 | self->types[1] = a2; 30 | 31 | switch(k) { 32 | case TFN: 33 | self->arg = a1; 34 | self->result = a2; 35 | break; 36 | case TPAIR: 37 | self->fst = a1; 38 | self->snd = a2; 39 | break; 40 | default: 41 | break; 42 | } 43 | 44 | return self; 45 | } 46 | 47 | Type *type_int() { 48 | return type_operator0(TINT); 49 | } 50 | 51 | Type *type_bool() { 52 | return type_operator0(TBOOL); 53 | } 54 | 55 | Type *type_fn(Type *a, Type *r) { 56 | return type_operator2(TFN, a, r); 57 | } 58 | 59 | Type *type_pair(Type *f, Type *s) { 60 | return type_operator2(TPAIR, f, s); 61 | } 62 | 63 | Type *type_var() { 64 | Type *self = type_operator0(TVAR); 65 | 66 | self->id = unique_id++; 67 | self->name = 0; 68 | self->instance = NULL; 69 | 70 | return (Type *)self; 71 | } 72 | 73 | bool is_type_variable(Type *ty) { 74 | return ty->kind == TVAR; 75 | } 76 | 77 | bool is_type_operator(Type *ty) { 78 | return ty->kind != TVAR; 79 | } 80 | 81 | bool same_type(Type *t1, Type *t2) { 82 | if(t1 == NULL || t2 == NULL) { 83 | puts("NULL error"); 84 | return false; 85 | } 86 | 87 | if(t1->kind != t2->kind) { 88 | return false; 89 | } 90 | 91 | if(is_type_operator(t1)) { 92 | for(int i = 0; i < t1->ntype; i++) { 93 | if(!same_type(t1->types[i], t2->types[i])) 94 | return false; 95 | } 96 | } 97 | else if(is_type_variable(t1)) { 98 | if(t1->id != t2->id) return false; 99 | } 100 | 101 | return true; 102 | } 103 | 104 | void typedump(Type *ty) { 105 | if(ty == NULL) return; 106 | 107 | typedump_core(ty); 108 | puts(""); 109 | } 110 | 111 | void typedump_core(Type *ty) { 112 | if(ty == NULL) { 113 | return; 114 | } 115 | 116 | switch(ty->kind) { 117 | case TINT: printf("int"); break; 118 | case TBOOL: printf("bool"); break; 119 | case TFN: { 120 | printf("("); 121 | typedump_core(ty->arg); 122 | printf(" -> "); 123 | typedump_core(ty->result); 124 | printf(")"); 125 | break; 126 | } 127 | case TVAR: { 128 | if(ty->instance != NULL) { 129 | typedump_core(prune(ty)); 130 | } 131 | else if(ty->name == 0) { 132 | printf("%c", ty->name = unique_name++); 133 | } 134 | else { 135 | printf("%c", ty->name); 136 | } 137 | break; 138 | } 139 | case TPAIR: { 140 | printf("("); 141 | typedump_core(ty->fst); 142 | printf(" * "); 143 | typedump_core(ty->snd); 144 | printf(")"); 145 | break; 146 | } 147 | default: printf("error"); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "util.h" 5 | 6 | Vector *New_Vector() { 7 | Vector *self = malloc(sizeof(Vector)); 8 | 9 | self->data = malloc(sizeof(void *) * 16); 10 | self->len = 0; 11 | self->reserved = 16; 12 | 13 | return self; 14 | } 15 | 16 | Vector *New_Vector_With_Size(int size) { 17 | Vector *self = malloc(sizeof(Vector)); 18 | 19 | self->data = malloc(sizeof(void *) * size); 20 | self->len = size; 21 | self->reserved = size; 22 | 23 | for(int i = 0; i < size; ++i) { 24 | self->data[i] = NULL; 25 | } 26 | 27 | return self; 28 | } 29 | 30 | void Delete_Vector(Vector *self) { 31 | free(self->data); 32 | 33 | free(self); 34 | } 35 | 36 | void vec_push(Vector *self, void *d) { 37 | if(self->len == self->reserved) { 38 | self->reserved *= 2; 39 | self->data = realloc(self->data, sizeof(void *) * self->reserved); 40 | } 41 | 42 | self->data[self->len++] = d; 43 | } 44 | 45 | void *vec_pop(Vector *self) { 46 | assert(self->len != 0); 47 | 48 | return self->data[--self->len]; 49 | } 50 | 51 | void *vec_last(Vector *self) { return self->data[self->len - 1]; } 52 | 53 | Map *New_Map() { 54 | Map *self = malloc(sizeof(Map)); 55 | 56 | self->key = New_Vector(); 57 | self->value = New_Vector(); 58 | 59 | return self; 60 | } 61 | 62 | void map_push(Map *self, void *key, void *value) { 63 | vec_push(self->key, key); 64 | vec_push(self->value, value); 65 | } 66 | 67 | --------------------------------------------------------------------------------