├── README.md ├── main.c └── test.sasl /README.md: -------------------------------------------------------------------------------- 1 | SASL 2 | ==== 3 | 4 | Status 5 | ------ 6 | This project is pretty much complete and I'm unlikely to develop it further. 7 | 8 | A simple and educational compiler for the SASL programming language 9 | 10 | Here is what some actual code in SASL looks like. 11 | 12 | def id x = x. 13 | 14 | def until p f x = if p x then x else until p f (f x). 15 | def comp f g x = f (g x). 16 | def map f l = if l=nil then nil 17 | else (f x):(map f xs) where x = hd l; 18 | xs = tl l. 19 | def fold m z l = if l=nil then z 20 | else m x (fold m z xs) where x = hd l; 21 | xs = tl l. 22 | def append l1 l2 = if l1=nil then l2 23 | else x:(append xs l2) where x = hd l1; 24 | xs = tl l1. 25 | def reverse l = if l=nil then nil 26 | else append (reverse (tl l)) ((hd l):nil). 27 | def filter p l = if l=nil then nil 28 | else (if p x then x:(filter p xs) 29 | else filter p xs) where x = hd l; 30 | xs = tl l. 31 | def sort p l = if l=nil then nil 32 | else insert p (hd l) (sort p (tl l)) 33 | where 34 | insert pp e ll = if ll=nil then (e:nil) 35 | else 36 | if pp e (hd ll) then (e:ll) 37 | else 38 | ((hd ll):(insert pp e (tl ll))). 39 | def drop n l = if n<=0 then l 40 | else if l=nil then nil 41 | else drop (n-1) (tl l). 42 | def take n l = if n=0 or l=nil then nil 43 | else x:take (n-1) xs where x = hd l; 44 | xs = tl l. 45 | def at n l = if n=0 then hd l 46 | else at (n-1) (tl l). 47 | def null l = l=nil. 48 | def length l = if l=nil then 0 49 | else 1+(length(tl l)). 50 | def sum = fold plus 0. 51 | def product = fold times 1. 52 | def plus x y = x+y. 53 | def mul x y = x*y. 54 | def div x y = x/y. 55 | def div2 y x = y/x. 56 | def minus x y = x-y. 57 | def minus2 y x = y-x. 58 | def lt x y = x=y. 62 | def gt x y = x>y. 63 | 64 | def zipWith f x y = if x=nil then nil 65 | else f (hd x) (hd y):zipWith f (tl x) (tl y). 66 | 67 | at 19 fib where fib = 1:1:(zipWith plus fib (tl fib)) 68 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | /* 7 | * 7/5/05 8 | * Fixed memory leak (when using GC) in strdup. 9 | * Removed useless atom table. 10 | * Added lots of comments. 11 | * Reformatting. 12 | * Dead code elimination. 13 | * Factored cascaded conditionals and replaced some with switch. 14 | * list_eq for pairs fixed. 15 | * Fixed parsing of binary operators. 16 | */ 17 | 18 | /* 19 | * #define COLLECT_GARBAGE 20 | * If you want to use the Boehm-Demers-Weiser garbage collector. 21 | * 22 | * Portability issues: 23 | * Assumes int is as long as a pointer. 24 | */ 25 | 26 | #if defined(COLLECT_GARBAGE) 27 | #include "gc.h" 28 | #include "gc_typed.h" 29 | #endif 30 | 31 | int current_char; 32 | 33 | #if defined(COLLECT_GARBAGE) 34 | void *allocate_memory(size_t n) { 35 | return GC_malloc(n); 36 | } 37 | #else 38 | void *allocate_memory(size_t n) { 39 | return malloc(n); 40 | } 41 | #endif 42 | 43 | char *duplicate_string(const char *s) { 44 | return strcpy(allocate_memory(strlen(s)+1),s); 45 | } 46 | 47 | int next_char() { 48 | return current_char = getchar(); 49 | } 50 | 51 | void error_msg(char *a) { 52 | /* Potential buffer overflow */ 53 | char line[1024]; 54 | fgets(line,1024,stdin); 55 | fprintf(stderr,"Error: %s\nBefore: %s\n",a,line); 56 | exit(1); 57 | } 58 | 59 | typedef enum { 60 | COMB_ERROR, 61 | 62 | COMB_ATOM, 63 | COMB_COMB, 64 | COMB_PAIR, 65 | COMB_INTEGER, 66 | COMB_VAR 67 | } comb_type; 68 | 69 | typedef struct list { 70 | comb_type type; 71 | int value; 72 | char *var_name; 73 | struct list *head,*tail; 74 | char name; 75 | } list; 76 | 77 | /* 78 | * Convenience macros for list access. 79 | */ 80 | #define H(a) ((a)->head) 81 | #define T(a) ((a)->tail) 82 | #define TT(a) T(T(a)) 83 | #define TH(a) T(H(a)) 84 | #define HT(a) H(T(a)) 85 | #define HH(a) H(H(a)) 86 | #define TTH(a) T(TH(a)) 87 | #define HTH(a) H(TH(a)) 88 | #define THT(a) T(HT(a)) 89 | #define HHT(a) H(HT(a)) 90 | #define THTH(a) T(HTH(a)) 91 | #define HHTH(a) H(HTH(a)) 92 | 93 | int is_comb(list *); 94 | 95 | list *make_list() { 96 | list *c = (list *)allocate_memory(sizeof(list)); 97 | 98 | return c; 99 | } 100 | 101 | list *I; 102 | 103 | /* 104 | * Remove redundant I combinator. 105 | */ 106 | list *elide(list *a) { 107 | return is_comb(a) && H(a)==I ? elide(T(a)) : a; 108 | } 109 | 110 | /* 111 | * Apply combinator a to b. 112 | * Ie. a b 113 | */ 114 | list *apply(list *a,list *b) { 115 | list *c = make_list(); 116 | c->type = COMB_COMB; 117 | c->head = elide(a); 118 | c->tail = b; /* elide(b) */; 119 | 120 | return c; 121 | } 122 | 123 | /* 124 | * a b c 125 | */ 126 | list *apply2(list *a,list *b,list *c) { 127 | return apply(apply(a,b),c); 128 | } 129 | 130 | /* 131 | * a b c d 132 | */ 133 | list *apply3(list *a,list *b,list *c,list *d) { 134 | return apply(apply(apply(a,b),c),d); 135 | } 136 | 137 | list *make_atom(char a) { 138 | list *l; 139 | l = make_list(); 140 | l->type = COMB_ATOM; 141 | l->name = a; 142 | 143 | return l; 144 | } 145 | 146 | list *make_var(const char *a) { 147 | list *l = make_list(); 148 | l->type = COMB_VAR; 149 | l->var_name = duplicate_string(a); 150 | 151 | return l; 152 | } 153 | 154 | list *make_int(int a) { 155 | list *l = make_list(); 156 | l->type = COMB_INTEGER; 157 | l->value = a; 158 | 159 | return l; 160 | } 161 | 162 | list *make_pair(list *a,list *b) { 163 | list *l = make_list(); 164 | l->head = a; 165 | l->tail = b; 166 | l->type = COMB_PAIR; 167 | 168 | return l; 169 | } 170 | 171 | int is_atom(list *a) { 172 | return a->type==COMB_ATOM; 173 | } 174 | 175 | int is_int(list *a) { 176 | return a->type==COMB_INTEGER; 177 | } 178 | 179 | int is_var(list *a) { 180 | return a->type==COMB_VAR; 181 | } 182 | 183 | int is_comb(list *a) { 184 | return a->type==COMB_COMB; 185 | } 186 | 187 | int is_pair(list *a) { 188 | return a->type==COMB_PAIR; 189 | } 190 | 191 | int get_int(list *a) { 192 | if (!is_int(a)) { 193 | error_msg("Attempt to read non-int as int"); 194 | } 195 | return a->value; 196 | } 197 | 198 | /* 199 | * Built in combinators. 200 | */ 201 | list *S,*K,*I; 202 | list *plus; 203 | list *pair; 204 | list *head,*tail; 205 | list *true,*false; 206 | list *cond; 207 | list *U,*Y; 208 | list *nil,*equal,*times,*minus,*divide,*less,*lesseq,*or,*and,*not; 209 | list *B,*C,*Sdash,*Bstar,*Cdash; 210 | 211 | /* 212 | * A dictionary is a simple linked list structure that 213 | * maps C strings to void * pointers. 214 | */ 215 | typedef struct dictionary { 216 | struct dictionary *next; 217 | char *name; 218 | void *value; 219 | } dictionary; 220 | 221 | /* 222 | * Look up given key in dictionary. If doesn't exist 223 | * thenmake new entry with NULL value. 224 | */ 225 | void **lookup(dictionary **pointer,const char *name) { 226 | dictionary *p; 227 | for (p = *pointer; p!=NULL; p = p->next) { 228 | if (!strcmp(p->name,name)) { 229 | return &p->value; 230 | } 231 | } 232 | p = (dictionary *)allocate_memory(sizeof(dictionary)); 233 | p->next = *pointer; 234 | p->name = duplicate_string(name); 235 | p->value = NULL; 236 | *pointer = p; 237 | 238 | return &p->value; 239 | } 240 | 241 | /* 242 | * Test whether dictionary contains given string 243 | * as key. 244 | */ 245 | int contains(dictionary *pointer,char *name) { 246 | dictionary *p; 247 | for (p = pointer; p!=NULL; p = p->next) { 248 | if (!strcmp(p->name,name)) { 249 | return 1; 250 | } 251 | } 252 | 253 | return 0; 254 | } 255 | 256 | /* 257 | * Lexer 258 | */ 259 | typedef enum { 260 | TOKEN_ERROR, 261 | 262 | TOKEN_IDENTIFIER, 263 | TOKEN_CONSTANT, 264 | 265 | TOKEN_LPAREN, TOKEN_RPAREN, 266 | TOKEN_NIL, 267 | TOKEN_EQUAL, 268 | 269 | /* Definitions */ 270 | TOKEN_DEF, TOKEN_WHERE, TOKEN_SEMICOLON, TOKEN_PERIOD, 271 | 272 | /* List operations */ 273 | TOKEN_COLON, TOKEN_HEAD, TOKEN_TAIL, 274 | 275 | /* Conditional */ 276 | TOKEN_IF, TOKEN_THEN, TOKEN_ELSE, 277 | 278 | /* Arithmetic */ 279 | TOKEN_PLUS, TOKEN_MINUS, TOKEN_TIMES, TOKEN_DIVIDE, 280 | 281 | /* Comparison */ 282 | TOKEN_GREATER, TOKEN_LESS, TOKEN_GREATEREQ, TOKEN_LESSEQ, 283 | 284 | /* Logical */ 285 | TOKEN_OR,TOKEN_AND,TOKEN_NOT, TOKEN_TRUE, TOKEN_FALSE, TOKEN_EOF 286 | } token_type; 287 | 288 | typedef struct { 289 | token_type type; 290 | union { 291 | char *string_value; 292 | token_type *token_value; 293 | int int_value; 294 | } value; 295 | } token; 296 | 297 | token *current_token; 298 | 299 | token *make_token_string(int t,char *v) { 300 | token *tok = (token *)allocate_memory(sizeof(token)); 301 | 302 | tok->type = t; 303 | tok->value.string_value = v; 304 | 305 | return tok; 306 | } 307 | 308 | token *make_token_token(int t,token_type *v) { 309 | token *tok = (token *)allocate_memory(sizeof(token)); 310 | 311 | tok->type = t; 312 | tok->value.token_value = v; 313 | 314 | return tok; 315 | } 316 | 317 | token *make_token_int(int t,int v) { 318 | token *tok = (token *)allocate_memory(sizeof(token)); 319 | 320 | tok->type = t; 321 | tok->value.int_value = v; 322 | 323 | return tok; 324 | } 325 | 326 | dictionary *keywords = 0; 327 | 328 | /* 329 | * Parse either a keyword or an identifier. 330 | * First character should be alphabetic. 331 | */ 332 | token *lex_keyword_or_identifier() { 333 | /* Potential buffer overflow */ 334 | char name[4096]; 335 | int i = 0; 336 | 337 | name[i++] = (char)current_char; 338 | next_char(); 339 | 340 | while (isalnum(current_char)) { 341 | name[i++] = (char)current_char; 342 | next_char(); 343 | } 344 | 345 | name[i] = 0; 346 | if (contains(keywords,name)) { 347 | return make_token_token(*(token_type *)lookup(&keywords,name),0); 348 | } 349 | 350 | return make_token_string(TOKEN_IDENTIFIER,duplicate_string(name)); 351 | } 352 | 353 | /* 354 | * Parse integer. First character should be 355 | * digit. 356 | */ 357 | int get_number() { 358 | /* Potential buffer overflow */ 359 | char number[256],*p = number; 360 | 361 | while (isdigit(current_char)) { 362 | *p++ = (char)current_char; 363 | next_char(); 364 | } 365 | *p = 0; 366 | 367 | return atoi(number); 368 | } 369 | 370 | token *lex_numeric_constant() { 371 | return make_token_int(TOKEN_CONSTANT,get_number()); 372 | } 373 | 374 | /* 375 | * Parse various types of operator. 376 | */ 377 | token *lex_operator() { 378 | static char *op_chars = "()=.:;+-*/"; 379 | char *p = index(op_chars,current_char); 380 | if (p!=NULL) { 381 | static int token_table[] = { 382 | TOKEN_LPAREN, TOKEN_RPAREN, 383 | TOKEN_EQUAL, TOKEN_PERIOD, 384 | TOKEN_COLON, TOKEN_SEMICOLON, 385 | TOKEN_PLUS, TOKEN_MINUS, 386 | TOKEN_TIMES, TOKEN_DIVIDE 387 | }; 388 | next_char(); 389 | return make_token_int(token_table[p-op_chars],0); 390 | } 391 | 392 | if (current_char=='<') { 393 | if (next_char()=='=') { 394 | next_char(); 395 | return make_token_int(TOKEN_LESSEQ,0); 396 | } 397 | return make_token_int(TOKEN_LESS,0); 398 | } 399 | if (current_char=='>') { 400 | if (next_char()=='=') { 401 | next_char(); 402 | return make_token_int(TOKEN_GREATEREQ,0); 403 | } 404 | return make_token_int(TOKEN_GREATER,0); 405 | } 406 | error_msg("Unrecognised symbol"); 407 | } 408 | 409 | void lex_white_space() { 410 | while (isspace(current_char)) { 411 | next_char(); 412 | } 413 | } 414 | 415 | /* 416 | * Return next token in input. 417 | * Returns a keyword, identifier, number, operator 418 | * or end-of-file marker. 419 | */ 420 | token *lex() { 421 | lex_white_space(); 422 | if (isalpha(current_char)) { 423 | return lex_keyword_or_identifier(); 424 | } else if (isdigit(current_char)) { 425 | return lex_numeric_constant(); 426 | } else if (current_char==EOF) { 427 | return make_token_int(TOKEN_EOF,0); 428 | } else { 429 | return lex_operator(); 430 | } 431 | } 432 | 433 | /* 434 | * The parser code follows. 435 | * This is a simple recursive descent parser. 436 | * SASL doesn't need anything fancy. 437 | * Note that the grammar has been refactored slightly 438 | * from the BNF in the comments. 439 | */ 440 | 441 | list *parse_expr(); 442 | 443 | /* 444 | * Expect a specified single token in the parse stream. 445 | * E.g. after an 'if' we expect a 'then' and an 'else' 446 | */ 447 | void expect(int type,char *message) { 448 | if (current_token->type!=type) { 449 | error_msg(message); 450 | } 451 | current_token = lex(); 452 | } 453 | 454 | /* 455 | * CONDEXPR := 'if' EXPR 'then' EXPR 'else' EXPR 456 | */ 457 | list *parse_condexpr() { 458 | list *a,*b,*c; 459 | expect(TOKEN_IF,"Expected 'if'"); 460 | a = parse_expr(); 461 | expect(TOKEN_THEN,"Missing 'then'"); 462 | b = parse_expr(); 463 | expect(TOKEN_ELSE,"Missing 'else'"); 464 | c = parse_expr(); 465 | 466 | return apply3(cond,a,b,c); 467 | } 468 | 469 | /* 470 | * NAME := identifier 471 | */ 472 | list *parse_name() { 473 | if (current_token->type==TOKEN_IDENTIFIER) { 474 | list *r = make_var(current_token->value.string_value); 475 | current_token = lex(); 476 | 477 | return r; 478 | } else { 479 | error_msg("Expected a name"); 480 | } 481 | } 482 | 483 | /* 484 | * ATOMIC := CONSTANT | IDENTIFIER | '(' EXPR ')' | 'true' | 'false' | 'nil' | 485 | * 'head' | 'tail' | 'not' 486 | */ 487 | list *parse_atomic() { 488 | list *r; 489 | switch (current_token->type) { 490 | case TOKEN_CONSTANT: 491 | r = make_int(current_token->value.int_value); 492 | current_token = lex(); 493 | return r; 494 | case TOKEN_IDENTIFIER: 495 | r = make_var(current_token->value.string_value); 496 | current_token = lex(); 497 | return r; 498 | case TOKEN_LPAREN: 499 | current_token = lex(); 500 | r = parse_expr(); 501 | expect(TOKEN_RPAREN,"Missing ')'"); 502 | return r; 503 | case TOKEN_TRUE: 504 | current_token = lex(); 505 | return true; 506 | case TOKEN_FALSE: 507 | current_token = lex(); 508 | return false; 509 | case TOKEN_NIL: 510 | current_token = lex(); 511 | return nil; 512 | case TOKEN_HEAD: 513 | current_token = lex(); 514 | return head; 515 | case TOKEN_TAIL: 516 | current_token = lex(); 517 | return tail; 518 | case TOKEN_NOT: 519 | current_token = lex(); 520 | return not; 521 | default: 522 | error_msg("Parse error"); 523 | } 524 | } 525 | 526 | list *parse_sequence(list *r,token_type token,list *(*type)(),list *op) { 527 | while (current_token->type==token) { 528 | current_token = lex(); 529 | r = apply2(op,r,(*type)()); 530 | } 531 | return r; 532 | } 533 | 534 | list *parse_rsequence(list *r,token_type token,list *(*type)(),list *op) { 535 | while (current_token->type==token) { 536 | current_token = lex(); 537 | r = apply2(op,(*type)(),r); 538 | } 539 | return r; 540 | } 541 | 542 | /* 543 | * PRODUCT := ATOMIC { ('*' | '/') ATOMIC } 544 | */ 545 | list *parse_product() { 546 | list *r = parse_atomic(),*or; 547 | do { 548 | or = r; 549 | r = parse_sequence(r,TOKEN_TIMES,parse_atomic,times); 550 | r = parse_sequence(r,TOKEN_DIVIDE,parse_atomic,divide); 551 | } while (r!=or); 552 | return r; 553 | } 554 | 555 | /* 556 | * SUM := PRODUCT { ('+' | '-') PRODUCT } 557 | */ 558 | list *parse_sum() { 559 | list *r = parse_product(),*or; 560 | do { 561 | or = r; 562 | r = parse_sequence(r,TOKEN_PLUS,parse_product,plus); 563 | r = parse_sequence(r,TOKEN_MINUS,parse_product,minus); 564 | } while (r!=or); 565 | return r; 566 | } 567 | 568 | /* 569 | * COMPARISONEXPR := SUM { ('=' | '<' | '>' | '<=' | '>=') SUM } 570 | */ 571 | list *parse_comparisonexpr() { 572 | list *r = parse_sum(),*or; 573 | do { 574 | or = r; 575 | r = parse_sequence(r,TOKEN_EQUAL,parse_sum,equal); 576 | r = parse_sequence(r,TOKEN_LESS,parse_sum,less); 577 | r = parse_sequence(r,TOKEN_LESSEQ,parse_sum,lesseq); 578 | r = parse_rsequence(r,TOKEN_GREATER,parse_sum,less); 579 | r = parse_rsequence(r,TOKEN_GREATEREQ,parse_sum,lesseq); 580 | } while (r!=or); 581 | return r; 582 | } 583 | 584 | /* 585 | * LOGICALPRODUCT := COMPARISONEXPR { 'and' COMPARISONEXPR } 586 | */ 587 | list *parse_logicalproduct() { 588 | list *r = parse_comparisonexpr(),*or; 589 | r = parse_sequence(r,TOKEN_AND,parse_comparisonexpr,and); 590 | return r; 591 | } 592 | 593 | /* 594 | * LOGICALSUM := LOGICALPRODUCT { 'or' LOGICALPRODUCT } 595 | */ 596 | list *parse_logicalsum() { 597 | list *r = parse_logicalproduct(); 598 | r = parse_sequence(r,TOKEN_OR,parse_logicalproduct,or); 599 | return r; 600 | } 601 | 602 | /* 603 | * COMBEXPR := LOGICALSUM { LOGICALSUM } 604 | */ 605 | list *parse_combexpr() { 606 | list *r = parse_logicalsum(); 607 | for (;;) { 608 | /* 609 | * Identify end of sequence of arguments to 610 | * combinator. 611 | */ 612 | static char final[] = { 613 | TOKEN_EOF, TOKEN_RPAREN, TOKEN_THEN, 614 | TOKEN_ELSE, TOKEN_PERIOD, TOKEN_SEMICOLON, 615 | TOKEN_WHERE, TOKEN_COLON, 0 616 | }; 617 | if (index(final,current_token->type)) { 618 | return r; 619 | } 620 | r = apply(r,parse_atomic()); 621 | } 622 | } 623 | 624 | /* 625 | * LISTEXPR := COMBEXPR [ ':' LISTEXPR ] 626 | */ 627 | list *parse_listexpr() { 628 | list *r = parse_combexpr(); 629 | r = parse_sequence(r,TOKEN_COLON,parse_listexpr,pair); 630 | return r; 631 | } 632 | 633 | list *parse_abstraction(); 634 | list *parse_recursive_abstraction(list *name); 635 | list *abstract(list *var,list *expr); 636 | int mutual_recursion(list *l); 637 | 638 | void display(list *); 639 | 640 | /* 641 | * This is the main expression parser. 642 | * This is also where recursive defintions (as well as mutually 643 | * recursive definitions) are identified. 644 | * 645 | * EXPR := (CONDEXPR | LISTEXPR) 646 | * { WHERE NAME ABSTRACTION ';' { NAME ABSTRACTION ';' } } 647 | */ 648 | list *parse_expr() { 649 | list *r,*abstraction; 650 | if (current_token->type==TOKEN_IF) { 651 | return parse_condexpr(); 652 | } 653 | r = parse_listexpr(); 654 | for (;;) { 655 | if (current_token->type==TOKEN_WHERE) { 656 | list *definitions = 0,*dstart; 657 | list *name,*lhs,*rhs; 658 | 659 | /* 660 | * Get list of all definitions in this 'where' clause. 661 | */ 662 | do { 663 | list *expr; 664 | current_token = lex(); 665 | name = parse_name(); 666 | expr = parse_abstraction(); 667 | definitions = apply(apply(name,expr),definitions); 668 | } while (current_token->type==TOKEN_SEMICOLON); 669 | 670 | #if 0 671 | /* 672 | * Special case for single definition 673 | * Buggy. 674 | */ 675 | if (definitions && definitions->tail==0) { 676 | /* 677 | * Only one definition 678 | */ 679 | r = abstract(definitions->head->head,r); 680 | return apply(r,definitions->head->tail); 681 | } 682 | #endif 683 | 684 | rhs = nil; 685 | lhs = apply(K,r); 686 | if (!mutual_recursion(definitions)) { 687 | while (definitions) { 688 | rhs = apply(apply(pair,TH(definitions)),rhs); 689 | lhs = apply(U,abstract(HH(definitions),lhs)); 690 | definitions = T(definitions); 691 | } 692 | return apply(lhs,rhs); 693 | } else { 694 | /* 695 | * Mutually recursive definitions. 696 | */ 697 | dstart = definitions; 698 | while (definitions) { 699 | rhs = apply(apply(pair,TH(definitions)),rhs); 700 | lhs = apply(U,abstract(HH(definitions),lhs)); 701 | definitions = T(definitions); 702 | } 703 | rhs = apply(K,rhs); 704 | definitions = dstart; 705 | while (definitions) { 706 | rhs = apply(U,abstract(HH(definitions),rhs)); 707 | definitions = T(definitions); 708 | } 709 | rhs = apply(Y,rhs); 710 | return apply(lhs,rhs); 711 | } 712 | 713 | #if 0 714 | /* 715 | * Name of variable being defined. 716 | * Not sure why I commented this out. 717 | */ 718 | name = parse_name(); 719 | abstraction = parse_recursive_abstraction(name); 720 | r = abstract(name,r); 721 | return apply(r,abstraction); 722 | #endif 723 | } else { 724 | return r; 725 | } 726 | } 727 | return r; 728 | } 729 | 730 | char get_atom(list *a) { 731 | if (!is_atom(a)) { 732 | error_msg("Attempt to read non-atom as atom"); 733 | } 734 | return a->name; 735 | } 736 | 737 | const char *get_var(list *a) { 738 | if (!is_var(a)) { 739 | error_msg("Attempt to read non-var as var"); 740 | } 741 | return a->var_name; 742 | } 743 | 744 | /* 745 | * Returns 1 if expression 'expr' contains reference to 746 | * any variable in the list 'var'. 747 | */ 748 | int depends_on(list *expr,list *var) { 749 | if (is_var(expr) && !strcmp(expr->var_name,var->var_name)) { 750 | return 1; 751 | } 752 | if (is_int(expr) || is_var(expr) || is_atom(expr)) { 753 | return 0; 754 | } 755 | return depends_on(H(expr),var) || depends_on(T(expr),var); 756 | } 757 | 758 | /* 759 | * Input is a list of pairs [(var_i,expr_i)]. 760 | * Return 1 if any expr_i contains any var_i. 761 | */ 762 | int mutual_recursion(list *l) { 763 | list *p,*q = l; 764 | 765 | /* 766 | * For each expr... 767 | */ 768 | while (q) { 769 | p = l; 770 | 771 | /* 772 | * ...for each name... 773 | */ 774 | while (p) { 775 | if (depends_on(TH(q),HH(p))) { 776 | return 1; 777 | } 778 | p = T(p); 779 | } 780 | q = T(q); 781 | } 782 | 783 | return 0; 784 | } 785 | 786 | list *abstract(list *var,list *expr) { 787 | if (is_var(expr) && !strcmp(expr->var_name,var->var_name)) { 788 | return I; 789 | } 790 | if (is_int(expr) || is_var(expr) || is_atom(expr)) { 791 | return apply(K,expr); 792 | } 793 | return apply(apply(S,abstract(var,H(expr))),abstract(var,T(expr))); 794 | } 795 | 796 | /* 797 | * RECURSIVEABSTRACTION := { NAME } '=' EXPR 798 | * 799 | * Note: Not currently used. 800 | */ 801 | list *parse_recursive_abstraction(list *name) { 802 | list *names,*expr; 803 | /* 804 | * Parse arguments. 805 | */ 806 | names = nil; 807 | while (current_token->type!=TOKEN_EQUAL) { 808 | list *name = parse_name(); 809 | names = apply(name,names); 810 | } 811 | /* 812 | * Skip '=' 813 | */ 814 | current_token = lex(); 815 | expr = parse_expr(); 816 | while (names!=nil) { 817 | expr = abstract(names->head,expr); 818 | names = T(names); 819 | } 820 | 821 | if (depends_on(expr,name)) { 822 | expr = abstract(name,expr); 823 | return apply(Y,expr); 824 | } 825 | return expr; 826 | } 827 | 828 | /* 829 | * ABSTRACTION := { NAME } '=' EXPR 830 | */ 831 | list *parse_abstraction() { 832 | list *names,*expr; 833 | /* 834 | * Parse arguments. 835 | */ 836 | names = nil; 837 | while (current_token->type!=TOKEN_EQUAL) { 838 | list *name = parse_name(); 839 | names = apply(name,names); 840 | } 841 | /* 842 | * Skip '=' 843 | */ 844 | current_token = lex(); 845 | expr = parse_expr(); 846 | while (names!=nil) { 847 | expr = abstract(names->head,expr); 848 | names = T(names); 849 | } 850 | return expr; 851 | } 852 | 853 | dictionary *defs = 0; 854 | 855 | /* 856 | * DEF := NAME ABSTRACTION '.' 857 | */ 858 | void parse_def() { 859 | list **location; 860 | list *name = parse_name(); 861 | list *expr; 862 | const char *var_name = get_var(name); 863 | location = (list **)lookup(&defs,var_name); 864 | 865 | expr = parse_abstraction(); 866 | 867 | *location = expr; 868 | expect(TOKEN_PERIOD,"Expected '.' after 'def'"); 869 | } 870 | 871 | list *stack_eval(list *); 872 | 873 | void display(list *l) { 874 | switch (l->type) { 875 | case COMB_ATOM: 876 | putchar(get_atom(l)); 877 | break; 878 | case COMB_INTEGER: 879 | printf("%d",get_int(l)); 880 | break; 881 | case COMB_PAIR: 882 | putchar('['); 883 | display(stack_eval(l->head)); 884 | l = T(l); 885 | while (is_pair(l)) { 886 | putchar(','); 887 | display(stack_eval(l->head)); 888 | l = T(l); 889 | } 890 | putchar(':'); 891 | display(stack_eval(l)); 892 | putchar(']'); 893 | break; 894 | case COMB_VAR: 895 | printf("Var(%s)",get_var(l)); 896 | break; 897 | default: 898 | display(l->head); 899 | putchar(' '); 900 | if (!is_comb(T(l))) { 901 | display(T(l)); 902 | } else { 903 | putchar('('); 904 | display(T(l)); 905 | putchar(')'); 906 | } 907 | } 908 | } 909 | 910 | /* 911 | * Substitute variables stored in dictionary 'defs' in expressions. 912 | * Note: this is part of the compilation, not something 913 | * that happens at run time. 914 | */ 915 | list *substitute(list *expr) { 916 | if (is_comb(expr)) { 917 | expr->head = substitute(expr->head); 918 | expr->tail = substitute(expr->tail); 919 | return expr; 920 | } else if (is_var(expr)) { 921 | const char *s = get_var(expr); 922 | return *(list **)lookup(&defs,s); 923 | } else { 924 | return expr; 925 | } 926 | } 927 | 928 | int optimise(list *); 929 | 930 | /* 931 | * PROGRAM := { DEF } EXPR 932 | */ 933 | list *parse_program() { 934 | list *expr; 935 | dictionary *d; 936 | while (current_token->type==TOKEN_DEF) { 937 | current_token = lex(); 938 | parse_def(); 939 | } 940 | expr = parse_expr(); 941 | optimise(expr); 942 | for (d = defs; d; d = d->next) { 943 | optimise(d->value); 944 | } 945 | for (d = defs; d; d = d->next) { 946 | d->value = substitute(d->value); 947 | } 948 | expr = substitute(expr); 949 | return expr; 950 | } 951 | 952 | /* 953 | * Compare two lists for equality. 954 | */ 955 | int list_eq(list *a,list *b) { 956 | if (a->type!=b->type) { 957 | return 0; 958 | } 959 | switch (a->type) { 960 | case COMB_ATOM: 961 | return get_atom(a)==get_atom(b); 962 | case COMB_PAIR: 963 | return list_eq(H(a),H(b)) && list_eq(T(a),T(b)); 964 | case COMB_COMB: 965 | error_msg("Can't compare unevaluated expressions"); 966 | case COMB_INTEGER: 967 | return get_int(a)==get_int(b); 968 | } 969 | error_msg("Invalid equality comparison"); 970 | } 971 | 972 | int list_lt(list *a,list *b) { 973 | if (a->type!=COMB_INTEGER || b->type!=COMB_INTEGER) { 974 | error_msg("Can't compare non-integers"); 975 | } 976 | return get_int(a)type!=COMB_INTEGER || b->type!=COMB_INTEGER) { 981 | error_msg("Can't compare non-integers"); 982 | } 983 | return get_int(a)<=get_int(b); 984 | } 985 | 986 | /* 987 | * Implements a->Ia 988 | * The extra I looks superfulous but has certain uses. 989 | */ 990 | void copy(list *a,list *b) { 991 | *a = is_atom(b) ? *apply(I,b) : *b; 992 | } 993 | 994 | /* 995 | * This implements a number of shortcuts that speed up reductions. 996 | * They look like reduction rules but note that they are executed 997 | * at compile time, not reduction time. 998 | */ 999 | int optimise(list *a) { 1000 | int flag; 1001 | do { 1002 | flag = 0; 1003 | 1004 | /* 1005 | * S(Kf)(Kg) -> K(fg) 1006 | */ 1007 | if (is_comb(a) 1008 | && is_comb(H(a)) 1009 | && is_comb(TH(a)) 1010 | && is_comb(T(a)) 1011 | && HH(a)==S 1012 | && HTH(a)==K 1013 | && HT(a)==K) { 1014 | *a = *apply(K,apply(TTH(a),TT(a))); 1015 | flag |= 1; 1016 | } 1017 | 1018 | /* 1019 | * S(Kf)I -> f 1020 | */ 1021 | if (is_comb(a) 1022 | && is_comb(H(a)) 1023 | && is_comb(TH(a)) 1024 | && HH(a)==S 1025 | && HTH(a)==K 1026 | && T(a)==I) { 1027 | copy(a,TTH(a)); 1028 | flag |= 1; 1029 | } 1030 | 1031 | /* 1032 | * S(Kf)(Bgh) -> B*fgh 1033 | */ 1034 | if (is_comb(a) 1035 | && is_comb(H(a)) 1036 | && is_comb(TH(a)) 1037 | && is_comb(T(a)) 1038 | && is_comb(HT(a)) 1039 | && HH(a)==S 1040 | && HTH(a)==K 1041 | && HHT(a)==B) { 1042 | *a = *apply3(Bstar,TTH(a),THT(a),TT(a)); 1043 | flag |= 1; 1044 | } 1045 | 1046 | /* 1047 | * S(Kf)g->Bfg 1048 | */ 1049 | if (is_comb(a) 1050 | && is_comb(H(a)) 1051 | && is_comb(TH(a)) 1052 | && HH(a)==S 1053 | && HTH(a)==K) { 1054 | *a = *apply2(B,TTH(a),T(a)); 1055 | flag |= 1; 1056 | } 1057 | 1058 | /* 1059 | * S(Bfg)(Kh) -> C'fgh 1060 | */ 1061 | if (is_comb(a) 1062 | && is_comb(H(a)) 1063 | && is_comb(TH(a)) 1064 | && is_comb(HTH(a)) 1065 | && is_comb(T(a)) 1066 | && HH(a)==S 1067 | && HHTH(a)==B 1068 | && HT(a)==K) { 1069 | *a = *apply3(Cdash,THTH(a),TTH(a),TT(a)); 1070 | flag |= 1; 1071 | } 1072 | 1073 | /* 1074 | * Sf(Kg)->Cfg; 1075 | */ 1076 | if (is_comb(a) 1077 | && is_comb(H(a)) 1078 | && is_comb(T(a)) 1079 | && HH(a)==S 1080 | && HT(a)==K) { 1081 | *a = *apply2(C,TH(a),TT(a)); 1082 | flag |= 1; 1083 | } 1084 | 1085 | /* 1086 | * S(Bfg)h -> S'fgh 1087 | */ 1088 | if (is_comb(a) 1089 | && is_comb(H(a)) 1090 | && is_comb(TH(a)) 1091 | && is_comb(HTH(a)) 1092 | && HH(a)==S 1093 | && HHTH(a)==B) { 1094 | *a = *apply3(Sdash,THTH(a),TTH(a),T(a)); 1095 | flag |= 1; 1096 | } 1097 | if (is_comb(a)) { 1098 | flag |= optimise(H(a)) | optimise(T(a)); 1099 | } 1100 | } while (flag); 1101 | 1102 | return 0; 1103 | } 1104 | 1105 | /* 1106 | * Core combinatorial reduction engine. 1107 | * This is where program execution takes place. 1108 | */ 1109 | list *stack_eval(list *a) { 1110 | list *stack[1024],*result; 1111 | int sp = 0; 1112 | int i; 1113 | 1114 | stack[sp] = a; 1115 | 1116 | /* 1117 | * Reduction phase 1118 | */ 1119 | while (1) { 1120 | if (is_comb(stack[sp])) { 1121 | /* 1122 | * 'Unpack' top of stack until it is an atom 1123 | */ 1124 | list *a = stack[sp]; 1125 | ++sp; 1126 | stack[sp] = H(a); 1127 | stack[sp-1] = T(a); 1128 | continue; 1129 | } else if (sp>=1) { 1130 | char combinator = stack[sp]->name; 1131 | list *a = stack[sp-1]; 1132 | switch (combinator) { 1133 | case 'I': 1134 | --sp; 1135 | continue; 1136 | case 'h': 1137 | --sp; 1138 | a = stack_eval(a); 1139 | if (!is_pair(a)) { 1140 | error_msg("head needs a list"); 1141 | } 1142 | stack[sp] = H(a); 1143 | continue; 1144 | case 'Y': 1145 | --sp; 1146 | stack[sp] = apply(a,apply(Y,a)); 1147 | continue; 1148 | case '!': 1149 | --sp; 1150 | stack[sp] = a==true ? false : true; 1151 | continue; 1152 | case 't': 1153 | --sp; 1154 | a = stack_eval(a); 1155 | if (!is_pair(a)) { 1156 | error_msg("tail needs a list"); 1157 | } 1158 | stack[sp] = T(a); 1159 | continue; 1160 | default: 1161 | if (sp>=2) { 1162 | list *b = stack[sp-2]; 1163 | switch (combinator) { 1164 | case 'K': 1165 | sp -= 2; 1166 | stack[sp] = a; 1167 | continue; 1168 | case ':': 1169 | sp -= 2; 1170 | stack[sp] = make_pair(a,b); 1171 | continue; 1172 | case 'U': 1173 | stack[sp] = a; 1174 | stack[sp-1] = apply(head,b); 1175 | stack[sp-2] = apply(tail,b); 1176 | continue; 1177 | case '+': 1178 | a = stack_eval(a); 1179 | b = stack_eval(b); 1180 | sp -= 2; 1181 | stack[sp] = make_int(get_int(a)+get_int(b)); 1182 | continue; 1183 | case '-': 1184 | a = stack_eval(a); 1185 | b = stack_eval(b); 1186 | sp -= 2; 1187 | stack[sp] = make_int(get_int(a)-get_int(b)); 1188 | continue; 1189 | case '*': 1190 | a = stack_eval(a); 1191 | b = stack_eval(b); 1192 | sp -= 2; 1193 | stack[sp] = make_int(get_int(a)*get_int(b)); 1194 | continue; 1195 | case '/': 1196 | a = stack_eval(a); 1197 | b = stack_eval(b); 1198 | sp -= 2; 1199 | stack[sp] = make_int(get_int(a)/get_int(b)); 1200 | continue; 1201 | case '=': 1202 | a = stack_eval(a); 1203 | b = stack_eval(b); 1204 | sp -= 2; 1205 | stack[sp] = list_eq(a,b) ? true : false; 1206 | continue; 1207 | case '<': 1208 | a = stack_eval(a); 1209 | b = stack_eval(b); 1210 | sp -= 2; 1211 | stack[sp] = list_lt(a,b) ? true : false; 1212 | continue; 1213 | case 'l': 1214 | a = stack_eval(a); 1215 | b = stack_eval(b); 1216 | sp -= 2; 1217 | stack[sp] = list_le(a,b) ? true : false; 1218 | continue; 1219 | case '|': 1220 | a = stack_eval(a); 1221 | sp -= 2; 1222 | stack[sp] = (a==true || stack_eval(b)==true) ? true : false; 1223 | continue; 1224 | case '&': 1225 | a = stack_eval(a); 1226 | sp -= 2; 1227 | stack[sp] = (a==false || stack_eval(b)==false) ? false : true; 1228 | continue; 1229 | default: 1230 | if (sp>=3) { 1231 | list *c = stack[sp-3]; 1232 | switch (combinator) { 1233 | case 'S': 1234 | --sp; 1235 | stack[sp] = a; 1236 | stack[sp-1] = c; 1237 | stack[sp-2] = apply(b,c); 1238 | continue; 1239 | case 'B': 1240 | sp -= 2; 1241 | stack[sp] = a; 1242 | stack[sp-1] = apply(b,c); 1243 | continue; 1244 | case 'C': 1245 | --sp; 1246 | stack[sp] = a; 1247 | stack[sp-1] = c; 1248 | stack[sp-2] = b; 1249 | continue; 1250 | case '?': 1251 | a = stack_eval(a); 1252 | sp -= 3; 1253 | if (a==false) { 1254 | stack[sp] = c; 1255 | } else if (a==true) { 1256 | stack[sp] = b; 1257 | } else { 1258 | error_msg("'cond' expects 'true' or 'false'"); 1259 | } 1260 | continue; 1261 | default: 1262 | if (sp>=4) { 1263 | list *d = stack[sp-4]; 1264 | switch (combinator) { 1265 | case 's': 1266 | sp -= 2; 1267 | stack[sp] = a; 1268 | stack[sp-1] = apply(b,d); 1269 | stack[sp-2] = apply(c,d); 1270 | continue; 1271 | case 'b': 1272 | sp -= 3; 1273 | stack[sp] = a; 1274 | stack[sp-1] = apply(b,apply(c,d)); 1275 | continue; 1276 | case 'c': 1277 | sp -= 2; 1278 | stack[sp] = a; 1279 | stack[sp-1] = apply(b,d); 1280 | stack[sp-2] = c; 1281 | continue; 1282 | } 1283 | } 1284 | } 1285 | } 1286 | } 1287 | } 1288 | } 1289 | } 1290 | break; 1291 | } 1292 | 1293 | 1294 | /* 1295 | * Reassemble result 1296 | */ 1297 | result = stack[sp--]; 1298 | for (i = sp; i>=0; --i) { 1299 | result = apply(result,stack[i]); 1300 | } 1301 | 1302 | /* 1303 | * If we've evaluated to a combinator expression we should overwrite it. 1304 | * But we musn't copy atoms as they are unique. 1305 | */ 1306 | if (is_comb(a) && !is_atom(result)) { 1307 | *a = *result; 1308 | return a; 1309 | } else { 1310 | return result; 1311 | } 1312 | } 1313 | 1314 | /* 1315 | * All of the combinators 1316 | */ 1317 | void constants() { 1318 | S = make_atom('S'); 1319 | K = make_atom('K'); 1320 | I = make_atom('I'); 1321 | plus = make_atom('+'); 1322 | times = make_atom('*'); 1323 | minus = make_atom('-'); 1324 | divide = make_atom('/'); 1325 | pair = make_atom(':'); 1326 | head = make_atom('h'); 1327 | tail = make_atom('t'); 1328 | true = make_atom('T'); 1329 | false = make_atom('F'); 1330 | cond = make_atom('?'); 1331 | U = make_atom('U'); 1332 | Y = make_atom('Y'); 1333 | nil = make_atom('n'); 1334 | equal = make_atom('='); 1335 | less = make_atom('<'); 1336 | lesseq = make_atom('l'); 1337 | and = make_atom('&'); 1338 | or = make_atom('|'); 1339 | not = make_atom('!'); 1340 | B = make_atom('B'); 1341 | C = make_atom('C'); 1342 | Sdash = make_atom('s'); 1343 | Bstar = make_atom('b'); 1344 | Cdash = make_atom('c'); 1345 | } 1346 | 1347 | void make_keywords() { 1348 | *lookup(&keywords,"def") = (void *)TOKEN_DEF; 1349 | *lookup(&keywords,"if") = (void *)TOKEN_IF; 1350 | *lookup(&keywords,"then") = (void *)TOKEN_THEN; 1351 | *lookup(&keywords,"else") = (void *)TOKEN_ELSE; 1352 | *lookup(&keywords,"where") = (void *)TOKEN_WHERE; 1353 | *lookup(&keywords,"true") = (void *)TOKEN_TRUE; 1354 | *lookup(&keywords,"false") = (void *)TOKEN_FALSE; 1355 | *lookup(&keywords,"nil") = (void *)TOKEN_NIL; 1356 | *lookup(&keywords,"hd") = (void *)TOKEN_HEAD; 1357 | *lookup(&keywords,"tl") = (void *)TOKEN_TAIL; 1358 | *lookup(&keywords,"or") = (void *)TOKEN_OR; 1359 | *lookup(&keywords,"and") = (void *)TOKEN_AND; 1360 | *lookup(&keywords,"not") = (void *)TOKEN_NOT; 1361 | } 1362 | 1363 | int main(int argc,char **argv) { 1364 | list *t; 1365 | 1366 | constants(); 1367 | make_keywords(); 1368 | 1369 | next_char(); 1370 | current_token = lex(); 1371 | t = parse_program(); 1372 | t = stack_eval(t); 1373 | display(t); 1374 | printf("\n"); 1375 | 1376 | return 0; 1377 | } 1378 | -------------------------------------------------------------------------------- /test.sasl: -------------------------------------------------------------------------------- 1 | def id x = x. 2 | 3 | def until p f x = if p x then x else until p f (f x). 4 | def comp f g x = f (g x). 5 | def map f l = if l=nil then nil 6 | else (f x):(map f xs) where x = hd l; 7 | xs = tl l. 8 | def fold m z l = if l=nil then z 9 | else m x (fold m z xs) where x = hd l; 10 | xs = tl l. 11 | def append l1 l2 = if l1=nil then l2 12 | else x:(append xs l2) where x = hd l1; 13 | xs = tl l1. 14 | def reverse l = if l=nil then nil 15 | else append (reverse (tl l)) ((hd l):nil). 16 | def filter p l = if l=nil then nil 17 | else (if p x then x:(filter p xs) 18 | else filter p xs) where x = hd l; 19 | xs = tl l. 20 | def sort p l = if l=nil then nil 21 | else insert p (hd l) (sort p (tl l)) 22 | where 23 | insert pp e ll = if ll=nil then (e:nil) 24 | else 25 | if pp e (hd ll) then (e:ll) 26 | else 27 | ((hd ll):(insert pp e (tl ll))). 28 | def drop n l = if n<=0 then l 29 | else if l=nil then nil 30 | else drop (n-1) (tl l). 31 | def take n l = if n=0 or l=nil then nil 32 | else x:take (n-1) xs where x = hd l; 33 | xs = tl l. 34 | def at n l = if n=0 then hd l 35 | else at (n-1) (tl l). 36 | def null l = l=nil. 37 | def length l = if l=nil then 0 38 | else 1+(length(tl l)). 39 | def sum = fold plus 0. 40 | def product = fold times 1. 41 | def plus x y = x+y. 42 | def mul x y = x*y. 43 | def div x y = x/y. 44 | def div2 y x = y/x. 45 | def minus x y = x-y. 46 | def minus2 y x = y-x. 47 | def lt x y = x=y. 51 | def gt x y = x>y. 52 | 53 | def zipWith f x y = if x=nil then nil 54 | else f (hd x) (hd y):zipWith f (tl x) (tl y). 55 | 56 | at 19 fib where fib = 1:1:(zipWith plus fib (tl fib)) 57 | --------------------------------------------------------------------------------