├── examples ├── 1_var.b ├── 4_goto.b ├── 2_ext.b ├── printargs.b ├── 3_fun.b └── 5_while.b ├── Makefile ├── link.ld ├── LICENSE ├── lib.b ├── abc ├── README.md ├── brt.s ├── b.h ├── b1.c └── b0.c /examples/1_var.b: -------------------------------------------------------------------------------- 1 | main() { 2 | auto a, b, c, sum; 3 | 4 | a = 1; b = 2; c = 3; 5 | sum = a+b+c; 6 | putnumb(sum); 7 | } 8 | -------------------------------------------------------------------------------- /examples/4_goto.b: -------------------------------------------------------------------------------- 1 | main() { 2 | auto c; 3 | read: 4 | c= getchar(); 5 | putchar(c); 6 | if(c != '*n') goto read; 7 | } 8 | -------------------------------------------------------------------------------- /examples/2_ext.b: -------------------------------------------------------------------------------- 1 | main() { 2 | extrn a, b, c; 3 | putchar(a); putchar(b); putchar(c); putchar('!*n'); 4 | } 5 | 6 | a 'hell'; 7 | b 'o, w'; 8 | c 'orld'; 9 | 10 | -------------------------------------------------------------------------------- /examples/printargs.b: -------------------------------------------------------------------------------- 1 | main() { 2 | extrn argv; 3 | auto i; 4 | 5 | i = 1; 6 | printf("%d args:*n", argv[0]); 7 | while (i <= argv[0]) 8 | printf("%s*n", argv[i++]); 9 | return(0); 10 | } 11 | -------------------------------------------------------------------------------- /examples/3_fun.b: -------------------------------------------------------------------------------- 1 | main() { 2 | extrn a, b, c, d; 3 | put2char(a,b); 4 | put2char(c,d); 5 | } 6 | 7 | put2char(x,y) { 8 | putchar(x); 9 | putchar(y); 10 | } 11 | 12 | a 'hell'; b 'o, w'; c 'orld'; d '!*n'; 13 | 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-Wall -Wextra 2 | b: b0.o b1.o 3 | cc b0.o b1.o -o b 4 | b0.o: b0.c b.h 5 | b1.o: b1.c b.h 6 | 7 | libs: 8 | ./abc -c brt.s lib.b 9 | 10 | install: b abc 11 | cp abc $(HOME)/bin 12 | 13 | %.o: %.s 14 | as --32 $^ -o $@ 15 | 16 | -------------------------------------------------------------------------------- /examples/5_while.b: -------------------------------------------------------------------------------- 1 | main() { 2 | auto c; 3 | while (1) { 4 | while ( (c=getchar()) != ' ') 5 | if (putchar(c) == '*n') exit(); 6 | putchar( '*n' ); 7 | while ( (c=getchar()) == ' '); /* skip blanks */ 8 | if (putchar(c)=='*n') exit(); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_FORMAT("elf32-i386") 2 | OUTPUT_ARCH(i386) 3 | 4 | ENTRY(start) 5 | 6 | SECTIONS 7 | { 8 | . = 0x400000; 9 | .text : { *(.text) } 10 | . = 0x8000000; 11 | .data : { 12 | *(.data) 13 | __bsymb = .; 14 | *(.bsymb) 15 | __ebsymb = .; 16 | } 17 | . = ALIGN(16); 18 | __bss = .; 19 | .bss : { *(.bss) } 20 | __ebss = .; 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013-2016 aap 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /lib.b: -------------------------------------------------------------------------------- 1 | char(s, n) 2 | return((s[n/4]>>8*(n%4))&0377); /* s[n/4] */ 3 | 4 | lchar(s, n, char) { 5 | auto i; 6 | i = 8*(n%4); 7 | char = (char&0377)<> 8; 20 | } 21 | write(1, &char, 4-i); 22 | return(char); 23 | } 24 | 25 | getchar() { 26 | auto char; 27 | 28 | char = 0; 29 | read(1, &char, 1); 30 | return(char); 31 | } 32 | 33 | printn(n,b) { 34 | extrn putchar; 35 | auto a; 36 | 37 | if (a = n/b) 38 | printn(a, b); 39 | putchar(char("0123456789ABCDEF", n%b)); 40 | } 41 | 42 | putnumb(n) { 43 | printn(n,10); 44 | putchar('*n'); 45 | } 46 | 47 | putstr(s) { 48 | auto c, i; 49 | 50 | i = 0; 51 | while ((c = char(s,i++)) != '*e') 52 | putchar(c); 53 | } 54 | 55 | getstr(s) { 56 | auto c, i; 57 | 58 | while ((c = getchar()) != '*n') 59 | lchar(s,i++,c); 60 | lchar(s,i,'*e'); 61 | return(s); 62 | } 63 | 64 | printf(fmt, x1,x2,x3,x4,x5,x6,x7,x8,x9) { 65 | extrn printn, char, putchar; 66 | auto adx, x, c, i, j; 67 | 68 | i = 0; 69 | adx = &x1; 70 | loop: 71 | while((c=char(fmt,i++)) != '%') { 72 | if(c == '*e') 73 | return; 74 | putchar(c); 75 | } 76 | x = *adx++; 77 | switch (c = char(fmt,i++)) { 78 | 79 | case 'd': 80 | case 'o': 81 | if(x < 0) { 82 | x = -x; 83 | putchar('-'); 84 | } 85 | printn(x, c=='o'?8:10); 86 | goto loop; 87 | 88 | case 'x': 89 | if(x < 0) { 90 | x = -x; 91 | putchar('-'); 92 | } 93 | printn(x, 16); 94 | goto loop; 95 | 96 | case 'c': 97 | putchar(x); 98 | goto loop; 99 | 100 | case 's': 101 | j = 0; 102 | while((c=char(x,j++)) != '*e') 103 | putchar(c); 104 | goto loop; 105 | } 106 | putchar('%'); 107 | i--; 108 | adx--; 109 | goto loop; 110 | } 111 | 112 | -------------------------------------------------------------------------------- /abc: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | BDIR="$HOME/abc" 3 | objs="$BDIR/brt.o $BDIR/lib.o" 4 | BC="$BDIR/b" 5 | 6 | # compile in.b [out.s] 7 | compile() { 8 | if [ "${1##*.}" != "b" ]; then 9 | echo "Error: can only compile b files" >&2 10 | exit 1 11 | fi 12 | cout=$2 13 | [ "$cout" != "" ] || cout=${1%b}s 14 | tmp1=`mktemp`; tmp2=`mktemp` 15 | $BC $1 $tmp2 $tmp1 16 | retval=$? 17 | cat $tmp1 $tmp2 > $cout 18 | rm $tmp1 $tmp2 19 | [ $retval -eq 0 ] || rm $cout && return $retval 20 | echo $cout 21 | return $retval 22 | } 23 | 24 | # assemble in.{sb} [out.o] 25 | assemble() { 26 | atmp="" 27 | ain=$1 28 | aout=$2; 29 | if [ "${1##*.}" = "b" ]; then 30 | [ "$aout" != "" ] || aout=${ain%b}o 31 | ain=`mktemp --suffix=.s` 32 | compile $1 $ain >/dev/null || return 1 33 | atmp="foo" 34 | elif [ "${1##*.}" = "s" ]; then 35 | [ "$aout" != "" ] || aout=${ain%s}o 36 | else 37 | echo "Error: can only compile b and s files" >&2 38 | exit 1 39 | fi 40 | as --32 -g $ain -o $aout 41 | [ "$atmp" != "" ] && rm $ain 42 | echo $aout 43 | } 44 | 45 | out="" 46 | action="link" 47 | while getopts "o:Sc" o 48 | do case "$o" in 49 | o) out="$OPTARG";; 50 | S) action=compile;; 51 | c) action=assemble;; 52 | esac 53 | done 54 | shift $(($OPTIND - 1)) 55 | 56 | # ignore -o option if more than one file given and not linking objs 57 | if [ $# -gt 1 ]; then 58 | if [ "$action" != "link" ]; then 59 | out="" 60 | fi 61 | fi 62 | 63 | [ $# -ne 1 ] && havelist=yes 64 | tmpobjs="" 65 | for i in $@; do 66 | if [ "$action" != "link" ]; then 67 | [ "$havelist" = "yes" ] && echo $i: 68 | $action $i $out >/dev/null 69 | [ $? -eq 0 ] || break=1 70 | else 71 | if [ "${i##*.}" = "o" ]; then 72 | objs="$objs $i" 73 | else 74 | [ "$havelist" = "yes" ] && echo $i: 75 | ltmp=`mktemp --suffix=.o` 76 | tmpobjs="$tmpobjs $ltmp" 77 | assemble $i $ltmp >/dev/null 78 | [ $? -eq 0 ] || break=1 79 | fi 80 | fi 81 | done 82 | if [ $break ]; then 83 | [ "$tmpobjs" = "" ] || rm $tmpobjs 84 | echo "Error" >&2 85 | exit 1 86 | fi 87 | if [ "$action" = "link" ]; then 88 | if [ "$out" = "" ]; then 89 | out="-o a.out" 90 | else 91 | out="-o $out" 92 | fi 93 | ld -m elf_i386 -T $BDIR/link.ld $out $objs $tmpobjs 94 | rm $tmpobjs 95 | fi 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A B Compiler 2 | ============ 3 | 4 | abc is a compiler for the [B Programming Language](http://en.wikipedia.org/wiki/B_(programming_language)) that targets x86\_32 processors. It is currently tested under Linux but should work (or at least be easily ported) to other UNIX-like systems. The code is based on [an early C compiler (last1120c)](http://www.cs.bell-labs.com/who/dmr/primevalC.html) by Dennis Ritchie. 5 | 6 | Documentation 7 | ------------- 8 | 9 | * [The Programming Language B](http://9p.io/cm/cs/who/dmr/bintro.html) 10 | 11 | * [B Reference by Ken Thompson](http://9p.io/cm/cs/who/dmr/kbman.html) describes a presumably earlier variant of B, which is slightly different from the one described above. The compiler cannot understand it, but I plan to implement a compatibility mode (the differences are minor). 12 | 13 | Implementation 14 | -------------- 15 | 16 | Since B was first implemented for machines with word addressing, some hacking was required to make it work on the byte addressed x86. Addresses filled in by the linker are always byte addresses, so pointers to these addresses are collectively stored at the end of the .data section and are then converted to word addresses at runtime, before main() is called. 17 | 18 | The generated assembly is *very* inefficient, not even constant expressions are reduced at compile time. Also I/O is currently not buffered. 19 | 20 | How to use 21 | ---------- 22 | 23 | The installation requires a little configuration: 24 | 'abc' is a frontend for the actual compiler which feels somewhat like gcc (it also handles assembling and linking). Before you can use it, set it's BDIR variable to the directory of the B compiler. 25 | In the Makefile, change the directory of the 'install' rule to wherever you want your 'abc' file to reside. 26 | Then type 27 | 28 | make install libs 29 | 30 | which compiles the compiler 'b', installs the 'abc' frontend and compiles the B runtime and library (brt.o and lib.o). 31 | 32 | To compile and link a B program, simply type 33 | 34 | abc -o outfile file1.b [file2.b ...] 35 | 36 | If you want to compile and assemble only: 37 | 38 | abc -c file1.b [file2.b ...] 39 | 40 | or generate only the assembly: 41 | 42 | abc -S file1.b [file2.b ...] 43 | 44 | Examples of B programs are in the 'examples' directory, they are mostly from Brian Kernighan's tutorial. 45 | 46 | Bugs 47 | ---- 48 | 49 | Since command line parameters aren't passed word-aligned, B can't handle them easily. brt.s copies the strings to another location and aligns them, the space is not dynamically allocated however and only 256 bytes are available by default. 50 | 51 | The library is incomplete but has some of the most important functions. 52 | 53 | I have only tested the compiler on an x86\_64 gentoo system. 54 | -------------------------------------------------------------------------------- /brt.s: -------------------------------------------------------------------------------- 1 | .globl _argv 2 | .data 3 | .align 4 4 | .comm argstr,256,4 5 | _argv: .long 0 6 | 7 | .text 8 | .globl start 9 | start: 10 | # clear bss (could be done better) 11 | # is it actually necessary? 12 | mov $__bss,%eax 13 | 1: 14 | movb $0,(%eax) 15 | inc %eax 16 | cmp $__ebss,%eax 17 | jbe 1b 18 | 19 | # copy command line args (can't use them directly, not aligned) 20 | mov %esp,%eax 21 | shr $2,%eax 22 | mov %eax,_argv 23 | mov (%esp),%ecx # number of arguments 24 | mov $argstr,%edi 25 | 1: 26 | mov (%esp,%ecx,4),%esi 27 | mov %edi,%eax 28 | shr $2,%eax 29 | mov %eax,(%esp,%ecx,4) 30 | call cpystr 31 | loop 1b 32 | 33 | call bsymbs 34 | mov _main,%eax 35 | shl $2,%eax 36 | call *%eax 37 | mov %eax,%ebx 38 | mov $1,%eax 39 | int $0x80 40 | 41 | # copy string from esi to edi and convert '\0' to B's '*e', align edi 42 | cpystr: 43 | mov (%esi),%al 44 | test %al,%al 45 | jz 1f 46 | mov %al,(%edi) 47 | inc %edi 48 | inc %esi 49 | jmp cpystr 50 | 1: 51 | movb $04,(%edi) 52 | inc %edi 53 | add $3,%edi 54 | and $~3,%edi 55 | ret 56 | 57 | # shift addresses filled in by the linker 2 bits to the right 58 | # so B only ever sees word addresses 59 | bsymbs: 60 | mov $__bsymb,%eax 61 | 1: 62 | cmp $__ebsymb,%eax 63 | jge 1f 64 | mov (%eax),%ebx 65 | mov (%ebx),%ecx 66 | shr $2,%ecx 67 | mov %ecx,(%ebx) 68 | add $4,%eax 69 | jmp 1b 70 | 1: 71 | ret 72 | 73 | .globl retrn 74 | retrn: 75 | mov %ebp,%esp 76 | pop %ebp 77 | ret 78 | 79 | # handle switch table: 80 | # eax has the value, ebx the address of the switch table 81 | .globl bswitch 82 | bswitch: 83 | xor %ecx,%ecx 84 | 1: 85 | mov (%ebx,%ecx,8),%edx 86 | mov 4(%ebx,%ecx,8),%edi 87 | test %edi,%edi 88 | jz 1f # default (last in table) 89 | cmp %eax,%edx 90 | je 2f 91 | inc %ecx 92 | jmp 1b 93 | 1: 94 | jmp *%edx 95 | 2: 96 | jmp *%edi 97 | 98 | # 99 | # Library functions in assembly 100 | # 101 | .globl _exit 102 | .data 103 | .align 4 104 | .section .bsymb; .long _exit; .data 105 | _exit: .long 1f 106 | .text 107 | .align 4 108 | 1: mov $1,%eax 109 | mov $0,%ebx 110 | int $0x80 111 | 112 | .globl _write 113 | .data 114 | .align 4 115 | .section .bsymb; .long _write; .data 116 | _write: .long 1f 117 | .text 118 | .align 4 119 | 1: mov 4(%esp),%ebx 120 | mov 8(%esp),%ecx 121 | shl $2,%ecx 122 | mov 12(%esp),%edx 123 | mov $4,%eax 124 | int $0x80 125 | ret 126 | 127 | .globl _read 128 | .data 129 | .align 4 130 | .section .bsymb; .long _read; .data 131 | _read: .long 1f 132 | .text 133 | .align 4 134 | 1: mov 4(%esp),%ebx 135 | mov 8(%esp),%ecx 136 | shl $2,%ecx 137 | mov 12(%esp),%edx 138 | mov $3,%eax 139 | int $0x80 140 | ret 141 | 142 | .globl _inv 143 | .data 144 | .align 4 145 | .section .bsymb; .long _inv; .data 146 | _inv: .long 1f 147 | .text 148 | .align 4 149 | 1: mov 4(%esp),%eax 150 | not %eax 151 | ret 152 | -------------------------------------------------------------------------------- /b.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define NCPS 8 /* chars per symbol */ 6 | #define NCPW 4 /* chars per word */ 7 | #define ALIGN 4 /* Passed directly to the assembler's .align */ 8 | #define HSHSIZ 400 /* hash table size */ 9 | #define SWSIZ 230 /* switch table size */ 10 | #define CMSIZ 40 /* symbol stack size */ 11 | #define SSIZE 20 /* operator and precedence stack size */ 12 | #define OSSIZ 300*8 /* space for expression tree */ 13 | 14 | #define EOS 04 /* end of string marker */ 15 | 16 | /* 17 | * Holds a B symbol. 18 | * class is one of the storage classes below. 19 | * offset is used depending on class. 20 | */ 21 | struct hshtab { 22 | int class; 23 | int offset; 24 | int dim; 25 | struct hshtab *next; 26 | char name[NCPS+1]; 27 | }; 28 | 29 | struct tnode { 30 | int op; 31 | int value; 32 | struct tnode *tr1; 33 | struct tnode *tr2; 34 | }; 35 | 36 | struct swtab { 37 | int swlab; 38 | int swval; 39 | }; 40 | 41 | struct hshtab hshtab[HSHSIZ]; 42 | int hshused; 43 | int eof; 44 | int peekc; 45 | char ctab[128]; 46 | struct hshtab *bsym; 47 | struct hshtab *paraml, *parame; 48 | int cval; 49 | int isn; 50 | char symbuf[NCPS+1]; 51 | FILE *sbufp; 52 | int stack; 53 | struct tnode **cp; 54 | int *space; 55 | int ospace[OSSIZ]; 56 | int retlab; 57 | int nerror; 58 | struct swtab swtab[SWSIZ]; 59 | struct swtab *swp; 60 | int deflab; 61 | extern int contlab; 62 | extern int brklab; 63 | 64 | extern int opdope[]; 65 | extern int line; 66 | extern int peeksym, peeksym2; 67 | 68 | void error(char *s, ...); 69 | void printtoken(int tok, FILE *out); 70 | struct tnode * block(int op, int value, struct tnode *tr1, struct tnode *tr2); 71 | void rcexpr(struct tnode *tr); 72 | void cbranch(struct tnode *t, int lab, int val); 73 | void jump(int lab); 74 | void label(int l); 75 | 76 | #define EOFC 0 77 | #define SEMI 1 78 | #define LBRACE 2 79 | #define RBRACE 3 80 | #define LBRACK 4 81 | #define RBRACK 5 82 | #define LPARN 6 83 | #define RPARN 7 84 | #define COLON 8 85 | #define COMMA 9 86 | 87 | #define MCALL 15 88 | #define CALL 16 89 | #define DECBEF 17 90 | #define INCBEF 18 91 | #define DECAFT 19 92 | #define INCAFT 20 93 | #define EXCLA 21 94 | #define NEG 22 95 | #define AMPER 23 96 | #define STAR 24 97 | #define QUEST 25 98 | 99 | #define PLUS 30 100 | #define MINUS 31 101 | #define MOD 32 102 | #define TIMES 33 103 | #define DIVIDE 34 104 | #define OR 35 105 | #define AND 36 106 | #define LSHIFT 37 107 | #define RSHIFT 38 108 | #define EQUAL 39 109 | #define NEQUAL 40 110 | #define LESSEQ 41 111 | #define LESS 42 112 | #define GREATEQ 43 113 | #define GREAT 44 114 | 115 | #define ASSIGN 49 116 | #define ASPLUS 50 117 | #define ASMINUS 51 118 | #define ASMOD 52 119 | #define ASTIMES 53 120 | #define ASDIV 54 121 | #define ASOR 55 122 | #define ASAND 56 123 | #define ASLSH 57 124 | #define ASRSH 58 125 | #define ASEQUAL 59 126 | #define ASNEQL 60 127 | #define ASLEQ 61 128 | #define ASLESS 62 129 | #define ASGTQ 63 130 | #define ASGREAT 64 131 | 132 | #define CON 65 133 | #define STRING 66 134 | #define NAME 67 135 | #define KEYW 68 136 | 137 | #define SQUOTE 121 138 | #define DQUOTE 122 139 | #define NEWLN 123 140 | #define SPACE 124 141 | #define LETTER 125 142 | #define DIGIT 126 143 | #define UNKN 127 144 | 145 | #define SEOF 200 146 | 147 | /* storage classes */ 148 | #define AUTO 1 149 | #define EXTERN 2 150 | #define INTERN 3 151 | #define ARG 4 152 | #define KEYWF 5 153 | 154 | /* keywords */ 155 | #define CASE 3 156 | #define IF 4 157 | #define ELSE 5 158 | #define WHILE 6 159 | #define SWITCH 7 160 | #define GOTO 8 161 | #define RETURN 9 162 | #define DEFAULT 10 163 | #define BREAK 11 164 | 165 | -------------------------------------------------------------------------------- /b1.c: -------------------------------------------------------------------------------- 1 | #include "b.h" 2 | 3 | /* 4 | * Code generation (x86 assembly) 5 | */ 6 | 7 | void 8 | push(void) 9 | { 10 | printf("\tpush\t%%eax\n"); 11 | } 12 | 13 | void 14 | pop(char *s) 15 | { 16 | printf("\tpop\t%%%s\n", s); 17 | } 18 | 19 | void 20 | binary(struct tnode *tr) 21 | { 22 | rcexpr(tr->tr1); 23 | push(); 24 | rcexpr(tr->tr2); 25 | } 26 | 27 | int 28 | pushargs(struct tnode *tr) 29 | { 30 | int stk; 31 | 32 | if (tr == NULL) 33 | return 0; 34 | if (tr->op == COMMA) { 35 | rcexpr(tr->tr2); 36 | push(); 37 | stk = pushargs(tr->tr1); 38 | return stk+NCPW; 39 | } 40 | rcexpr(tr); 41 | push(); 42 | return NCPW; 43 | } 44 | 45 | void 46 | lvalexp(struct tnode *tr) 47 | { 48 | struct hshtab *bs; 49 | char memloc[64]; 50 | 51 | switch (tr->op) { 52 | 53 | case DECBEF: 54 | case INCBEF: 55 | case DECAFT: 56 | case INCAFT: 57 | if (tr->tr1->op == STAR) { 58 | rcexpr(tr->tr1->tr1); 59 | printf("\tmov\t%%eax,%%ebx\n"); 60 | sprintf(memloc,"(,%%ebx,4)"); 61 | } else { /* NAME, checked in "build" */ 62 | bs = (struct hshtab *) tr->tr1->tr1; 63 | if (bs->class == EXTERN) 64 | sprintf(memloc,"_%s", bs->name); 65 | else if (bs->class == AUTO) 66 | sprintf(memloc,"%d(%%ebp)", bs->offset); 67 | else 68 | goto classerror; 69 | } 70 | if (tr->op == DECBEF || tr->op == INCBEF) { 71 | printf("\t%s\t%s\n", tr->op == DECBEF ? "decl" : "incl", 72 | memloc); 73 | printf("\tmov\t%s,%%eax\n", memloc); 74 | } else { 75 | printf("\tmov\t%s,%%eax\n", memloc); 76 | printf("\t%s\t%s\n", tr->op == DECAFT ? "decl" : "incl", 77 | memloc); 78 | } 79 | return; 80 | 81 | case ASSIGN: 82 | rcexpr(tr->tr2); 83 | if (tr->tr1->op == STAR) { 84 | push(); 85 | rcexpr(tr->tr1->tr1); 86 | pop("ebx"); 87 | printf("\tmov\t%%ebx,(,%%eax,4)\n"); 88 | } else { /* NAME */ 89 | bs = (struct hshtab *) tr->tr1->tr1; 90 | if (bs->class == EXTERN) 91 | printf("\tmov\t%%eax,_%s\n", bs->name); 92 | else if (bs->class == AUTO) 93 | printf("\tmov\t%%eax,%d(%%ebp)\n", bs->offset); 94 | else 95 | goto classerror; 96 | } 97 | return; 98 | 99 | case ASPLUS: 100 | case ASMINUS: 101 | case ASMOD: 102 | case ASTIMES: 103 | case ASDIV: 104 | case ASOR: 105 | case ASAND: 106 | case ASLSH: 107 | case ASRSH: 108 | case ASEQUAL: 109 | case ASNEQL: 110 | case ASLEQ: 111 | case ASLESS: 112 | case ASGTQ: 113 | case ASGREAT: 114 | tr->op -= ASPLUS-PLUS; 115 | rcexpr(block(ASSIGN,0,tr->tr1,tr)); 116 | return; 117 | } 118 | 119 | classerror: 120 | error("Storage class"); 121 | } 122 | 123 | void 124 | rcexpr(struct tnode *tr) 125 | { 126 | int o1, o2; 127 | int stk; 128 | struct hshtab *bs; 129 | 130 | if (tr == NULL) 131 | return; 132 | 133 | if (opdope[tr->op]&02) { 134 | lvalexp(tr); 135 | return; 136 | } 137 | 138 | switch (tr->op) { 139 | 140 | case CON: 141 | printf("\tmov\t$%d,%%eax\n", tr->value); 142 | return; 143 | 144 | case STRING: 145 | printf("\tmov\t$L%d,%%eax\n", tr->value); 146 | printf("\tshr\t$2,%%eax\n"); 147 | return; 148 | 149 | case NAME: /* only rvalue */ 150 | bs = (struct hshtab *) tr->tr1; 151 | if (bs->class == EXTERN) 152 | printf("\tmov\t_%s,%%eax\n", bs->name); 153 | else if (bs->class == AUTO) 154 | printf("\tmov\t%d(%%ebp),%%eax\n", bs->offset); 155 | else 156 | goto classerror; 157 | return; 158 | 159 | case CALL: 160 | stk = pushargs(tr->tr2); 161 | rcexpr(tr->tr1); 162 | printf("\tshl\t$2,%%eax\n"); 163 | printf("\tcall\t*%%eax\n"); 164 | if (stk) 165 | printf("\tadd\t$%d,%%esp\n",stk); 166 | return; 167 | 168 | case AMPER: 169 | bs = (struct hshtab *) tr->tr1->tr1; 170 | if (bs->class == EXTERN) { 171 | printf("\tmov\t$_%s,%%eax\n", bs->name); 172 | printf("\tshr\t$2,%%eax\n"); 173 | } else if (bs->class == AUTO) { 174 | printf("\tlea\t%d(%%ebp),%%eax\n", bs->offset); 175 | printf("\tshr\t$2,%%eax\n"); 176 | } else 177 | goto classerror; 178 | return; 179 | 180 | case STAR: /* only rvalue */ 181 | rcexpr(tr->tr1); 182 | printf("\tmov\t(,%%eax,4),%%eax\n"); 183 | return; 184 | 185 | case PLUS: 186 | binary(tr); 187 | pop("ebx"); 188 | printf("\tadd\t%%ebx,%%eax\n"); 189 | return; 190 | 191 | case MINUS: 192 | binary(tr); 193 | printf("\tmov\t%%eax,%%ebx\n"); 194 | pop("eax"); 195 | printf("\tsub\t%%ebx,%%eax\n"); 196 | return; 197 | 198 | case TIMES: 199 | binary(tr); 200 | pop("ebx"); 201 | printf("\tmul\t%%ebx\n"); 202 | return; 203 | 204 | case DIVIDE: 205 | binary(tr); 206 | printf("\tmov\t%%eax,%%ebx\n"); 207 | pop("eax"); 208 | printf("\txor\t%%edx,%%edx\n"); 209 | printf("\tdiv\t%%ebx\n"); 210 | return; 211 | 212 | case MOD: 213 | binary(tr); 214 | printf("\tmov\t%%eax,%%ebx\n"); 215 | pop("eax"); 216 | printf("\txor\t%%edx,%%edx\n"); 217 | printf("\tdiv\t%%ebx\n"); 218 | printf("\tmov\t%%edx,%%eax\n"); 219 | return; 220 | 221 | case AND: 222 | binary(tr); 223 | pop("ebx"); 224 | printf("\tand\t%%ebx,%%eax\n"); 225 | return; 226 | 227 | case OR: 228 | binary(tr); 229 | pop("ebx"); 230 | printf("\tor\t%%ebx,%%eax\n"); 231 | return; 232 | 233 | case LSHIFT: 234 | binary(tr); 235 | printf("\tmov\t%%eax,%%ecx\n"); 236 | pop("eax"); 237 | printf("\tshl\t%%cl,%%eax\n"); 238 | return; 239 | 240 | case RSHIFT: 241 | binary(tr); 242 | printf("\tmov\t%%eax,%%ecx\n"); 243 | pop("eax"); 244 | printf("\tshr\t%%cl,%%eax\n"); 245 | return; 246 | 247 | case EQUAL: 248 | case NEQUAL: 249 | case LESS: 250 | case LESSEQ: 251 | case GREAT: 252 | case GREATEQ: 253 | binary(tr); 254 | pop("ebx"); 255 | printf("\tcmp\t%%eax,%%ebx\n"); 256 | switch (tr->op) { 257 | case EQUAL: 258 | printf("\tsete\t%%al\n"); 259 | break; 260 | case NEQUAL: 261 | printf("\tsetne\t%%al\n"); 262 | break; 263 | case LESS: 264 | printf("\tsetl\t%%al\n"); 265 | break; 266 | case LESSEQ: 267 | printf("\tsetle\t%%al\n"); 268 | break; 269 | case GREAT: 270 | printf("\tsetg\t%%al\n"); 271 | break; 272 | case GREATEQ: 273 | printf("\tsetge\t%%al\n"); 274 | break; 275 | } 276 | printf("\tmovzb\t%%al,%%eax\n"); 277 | return; 278 | 279 | case EXCLA: 280 | rcexpr(tr->tr1); 281 | printf("\ttest\t%%eax,%%eax\n"); 282 | printf("\tsete\t%%al\n"); 283 | printf("\tmovzb\t%%al,%%eax\n"); 284 | return; 285 | 286 | case NEG: 287 | rcexpr(tr->tr1); 288 | printf("\tneg\t%%eax\n"); 289 | return; 290 | 291 | case QUEST: 292 | cbranch(tr->tr1, o1=isn++, 0); 293 | rcexpr(tr->tr2->tr1); 294 | jump(o2 = isn++); 295 | label(o1); 296 | rcexpr(tr->tr2->tr2); 297 | label(o2); 298 | return; 299 | 300 | default: 301 | error("Can't print tree (op: %d)", tr->op); 302 | } 303 | 304 | classerror: 305 | error("Storage class"); 306 | } 307 | 308 | /* Prints the tree in RPN, for debugging */ 309 | /* 310 | void 311 | rcexpr(struct tnode *tr) 312 | { 313 | struct hshtab *bs; 314 | 315 | if (tr == NULL) 316 | printf("(NULL) "); 317 | else if (tr->op == CON) 318 | printf("%d ", tr->value); 319 | else if (tr->op == STRING) 320 | printf("s(L%d) ", tr->value); 321 | else if (tr->op == NAME) { 322 | bs = (struct hshtab *)tr->tr1; 323 | if (bs->class == AUTO) 324 | printf("%s(%d) ", bs->name, bs->offset); 325 | else 326 | printf("%s ", bs->name); 327 | } else { 328 | rcexpr(tr->tr1); 329 | if (opdope[tr->op]&01) 330 | rcexpr(tr->tr2); 331 | printtoken(tr->op, stdout); 332 | } 333 | } 334 | */ 335 | -------------------------------------------------------------------------------- /b0.c: -------------------------------------------------------------------------------- 1 | #include "b.h" 2 | 3 | void extdef(void); 4 | struct hshtab * lookup(void); 5 | void blkhed(void); 6 | void blkend(void); 7 | void retseq(void); 8 | void statement(int d); 9 | struct tnode * tree(void); 10 | void errflush(int o); 11 | 12 | int line = 1; 13 | int peeksym = -1, peeksym2 = -1;; 14 | int contlab = -1; 15 | int brklab = -1; 16 | 17 | void 18 | init(char *s, int val) 19 | { 20 | char *sp; 21 | struct hshtab *np; 22 | 23 | sp = symbuf; 24 | while (sp < symbuf+NCPS+1) 25 | if ((*sp++ = *s++) == '\0') 26 | s--; 27 | np = lookup(); 28 | np->class = KEYWF; 29 | np->offset = val; 30 | } 31 | 32 | int 33 | main(int argc, char *argv[]) 34 | { 35 | if (argc < 3) { 36 | error("Arg count"); 37 | exit(1); 38 | } 39 | if (freopen(argv[1], "r", stdin) == NULL) { 40 | error("Can't find %s", argv[1]); 41 | exit(1); 42 | } 43 | if ((sbufp=fopen(argv[2], "w")) == NULL) { 44 | error("Can't create %s", argv[2]); 45 | exit(1); 46 | } 47 | if (argc > 3) { 48 | if (freopen(argv[3], "w", stdout) == NULL) { 49 | error("Can't create %s", argv[2]); 50 | exit(1); 51 | } 52 | } 53 | init("auto", AUTO); 54 | init("extrn", EXTERN); 55 | init("case", CASE); 56 | init("if", IF); 57 | init("else", ELSE); 58 | init("while", WHILE); 59 | init("switch", SWITCH); 60 | init("goto", GOTO); 61 | init("return", RETURN); 62 | init("default", DEFAULT); 63 | init("break", BREAK); 64 | fprintf(sbufp, "\t.data\n"); 65 | while (!eof) { 66 | extdef(); 67 | blkend(); 68 | } 69 | return nerror != 0; 70 | } 71 | 72 | /* 73 | * Lexer 74 | */ 75 | 76 | int 77 | spnextchar(void) 78 | { 79 | int c; 80 | 81 | if ((c = peekc) == 0) 82 | c = getchar(); 83 | if (c == '\t') 84 | c = ' '; 85 | else if (c == '\n') { 86 | c = ' '; 87 | line++; 88 | } 89 | peekc = c; 90 | return c; 91 | } 92 | 93 | int 94 | nextchar(void) 95 | { 96 | while (spnextchar() == ' ') 97 | peekc = 0; 98 | return peekc; 99 | } 100 | 101 | int 102 | subseq(int c, int a, int b) 103 | { 104 | if (spnextchar() != c) 105 | return a; 106 | peekc = 0; 107 | return b; 108 | } 109 | 110 | /* Only decimal and octal bases, could extend */ 111 | int 112 | getnum(void) 113 | { 114 | int base; 115 | int c; 116 | 117 | base = 10; 118 | cval = 0; 119 | if ((c=spnextchar()) == '0') 120 | base = 8; 121 | for (; ctab[c] == DIGIT; c = getchar()) 122 | cval = cval*base + c-'0'; 123 | peekc = c; 124 | return CON; 125 | } 126 | 127 | int 128 | mapch(char c) 129 | { 130 | int a; 131 | 132 | if ((a=getchar()) == c) 133 | return -1; 134 | switch (a) { 135 | 136 | case '\n': 137 | case '\0': 138 | error("Nonterminated string"); 139 | peekc = a; 140 | return -1; 141 | 142 | case '*': 143 | switch (a=getchar()) { 144 | 145 | case 't': 146 | return('\t'); 147 | 148 | case 'n': 149 | return('\n'); 150 | 151 | case '0': 152 | return('\0'); 153 | 154 | case '(': 155 | return('{'); 156 | 157 | case ')': 158 | return('}'); 159 | 160 | case 'e': 161 | return(EOS); 162 | 163 | case '\n': 164 | line++; 165 | return('\n'); 166 | } 167 | } 168 | return a; 169 | } 170 | 171 | int 172 | getcc(void) 173 | { 174 | char *cp; 175 | int c, cc; 176 | 177 | cval = 0; 178 | cc = 0; 179 | cp = (char*) &cval; 180 | while ((c = mapch('\'')) >= 0) 181 | if (cc++ < NCPW) 182 | *cp++ = c; 183 | if (cc > NCPW) 184 | error("Long character constant"); 185 | return CON; 186 | } 187 | 188 | int 189 | getstr(void) 190 | { 191 | int c; 192 | int i; 193 | 194 | fprintf(sbufp, "\t.align 4\n"); 195 | fprintf(sbufp, "L%d:", cval = isn++); 196 | if ((c = mapch('"')) >= 0) 197 | fprintf(sbufp, "\t.byte %04o", c); 198 | for (i = 2; (c = mapch('"')) >= 0; i++) 199 | fprintf(sbufp, ",%04o", c); 200 | fprintf(sbufp, ",04"); 201 | while ((i++%4) != 0) 202 | fprintf(sbufp, ",00"); 203 | fprintf(sbufp, "\n"); 204 | return STRING; 205 | } 206 | 207 | struct hshtab * 208 | lookup(void) 209 | { 210 | int i; 211 | char *sp, *np; 212 | struct hshtab *rp; 213 | 214 | i = 0; 215 | sp = symbuf; 216 | while (sp < symbuf+NCPS) 217 | i += *sp++&0177; 218 | rp = &hshtab[i%HSHSIZ]; 219 | while (*(np = rp->name)) { 220 | for (sp=symbuf; sp < symbuf+NCPS;) 221 | if (*np++ != *sp++) 222 | goto no; 223 | return rp; 224 | no: 225 | if (++rp >= &hshtab[HSHSIZ]) 226 | rp = hshtab; 227 | } 228 | if (++hshused > HSHSIZ) { 229 | error("Symbol table overflow"); 230 | exit(1); 231 | } 232 | rp->class = 0; 233 | rp->offset = 0; 234 | rp->dim = 0; 235 | sp = symbuf; 236 | for (np = rp->name; sp < symbuf+NCPS+1;) 237 | *np++ = *sp++; 238 | return rp; 239 | } 240 | 241 | /* 242 | * Symbol peeking with one peeksym doesn't work if an ASSIGN is only peeked, 243 | * since it itself peeks a symbol, which is then overwritten. 244 | */ 245 | 246 | /* Note: does not push bsyms !! */ 247 | int 248 | pushsym(int sym) 249 | { 250 | if (peeksym < 0) 251 | peeksym = sym; 252 | else if (peeksym2 < 0) { 253 | peeksym2 = peeksym; 254 | peeksym = sym; 255 | } else 256 | error("Cannot push more than two symbols\n"); 257 | return sym; 258 | } 259 | 260 | int 261 | symbol(void) 262 | { 263 | int c; 264 | char *sp; 265 | 266 | if (peeksym >= 0) { 267 | c = peeksym; 268 | peeksym = peeksym2; 269 | peeksym2 = -1; 270 | return c; 271 | } 272 | if (peekc) { 273 | c = peekc; 274 | peekc = 0; 275 | } else 276 | if (eof) 277 | return EOFC; 278 | else 279 | c = getchar(); 280 | if (c==EOF) { 281 | eof++; 282 | return(EOFC); 283 | } 284 | 285 | loop: 286 | switch (ctab[c]) { 287 | 288 | case NEWLN: 289 | line++; 290 | 291 | case SPACE: 292 | c = getchar(); 293 | goto loop; 294 | 295 | case PLUS: 296 | return subseq(c,PLUS,INCBEF); 297 | 298 | case MINUS: 299 | return subseq(c,MINUS,DECBEF); 300 | 301 | case LESS: 302 | if (subseq(c,0,1)) 303 | return LSHIFT; 304 | return subseq('=', LESS, LESSEQ); 305 | 306 | case GREAT: 307 | if (subseq(c,0,1)) 308 | return RSHIFT; 309 | return subseq('=', GREAT, GREATEQ); 310 | 311 | case ASSIGN: 312 | if (subseq(' ',0,1)) 313 | return ASSIGN; 314 | /* avoid peeking a name, which could overwrite 315 | * an already set bsym. */ 316 | if (ctab[peekc = spnextchar()] == LETTER) 317 | return ASSIGN; 318 | c = symbol(); 319 | if (PLUS <= c && c <= GREAT) 320 | return c + ASPLUS-PLUS; 321 | if (c == ASSIGN) 322 | return EQUAL; 323 | pushsym(c); 324 | return ASSIGN; 325 | 326 | case EXCLA: 327 | return subseq('=',EXCLA,NEQUAL); 328 | 329 | case DIVIDE: 330 | if (subseq('*',1,0)) 331 | return DIVIDE; 332 | while ((c = spnextchar()) != EOFC) { 333 | peekc = 0; 334 | if (c == '*') { 335 | if (spnextchar() == '/') { 336 | peekc = 0; 337 | c = getchar(); 338 | goto loop; 339 | } 340 | } 341 | } 342 | eof++; 343 | error("Nonterminated comment"); 344 | return EOFC; 345 | 346 | case DIGIT: 347 | peekc = c; 348 | return getnum(); 349 | 350 | case SQUOTE: 351 | return(getcc()); 352 | 353 | case DQUOTE: 354 | return(getstr()); 355 | 356 | case LETTER: 357 | sp = symbuf; 358 | while (ctab[c] == LETTER || ctab[c] == DIGIT) { 359 | if (sp < symbuf+NCPS) 360 | *sp++ = c; 361 | c = getchar(); 362 | } 363 | while (sp < symbuf+NCPS+1) 364 | *sp++ = '\0'; 365 | peekc = c; 366 | bsym = lookup(); 367 | if (bsym->class == KEYWF) { 368 | cval = bsym->offset; 369 | return KEYW; 370 | } 371 | return NAME; 372 | 373 | case UNKN: 374 | error("Unknown character"); 375 | c = getchar(); 376 | goto loop; 377 | } 378 | return (ctab[c]); 379 | } 380 | 381 | /* 382 | * Declarations and Definitions 383 | */ 384 | 385 | /* Declares a list of names to be of storage class "kw". */ 386 | void 387 | declare(int kw) 388 | { 389 | int o; 390 | 391 | while ((o = symbol()) == NAME) { 392 | if (bsym->class) 393 | error("%s redeclared", bsym->name); 394 | bsym->class = kw; 395 | while ((o = symbol()) == LBRACK) { 396 | if ((o = symbol()) == CON) { 397 | if (bsym->dim) 398 | error("Bad vector"); 399 | bsym->dim = cval + 1; 400 | o = symbol(); 401 | } 402 | if (o != RBRACK) 403 | goto syntax; 404 | } 405 | if (kw == ARG) { 406 | bsym->next = NULL; 407 | if (!paraml) 408 | paraml = bsym; 409 | else 410 | parame->next = bsym; 411 | parame = bsym; 412 | } 413 | if (o != COMMA) 414 | break; 415 | } 416 | if ((o == SEMI && kw != ARG) || (o == RPARN && kw == ARG)) 417 | return; 418 | syntax: 419 | error("Declaration syntax"); 420 | errflush(o); 421 | } 422 | 423 | void 424 | declist(void) 425 | { 426 | int o; 427 | 428 | while ((o = symbol()) == KEYW && (cval == AUTO || cval == EXTERN)) 429 | declare(cval); 430 | pushsym(o); 431 | } 432 | 433 | void 434 | function(void) 435 | { 436 | printf("\tpush\t%%ebp\n"); 437 | printf("\tmov\t%%esp,%%ebp\n"); 438 | 439 | declare(ARG); 440 | statement(1); 441 | retseq(); 442 | } 443 | 444 | void 445 | global(char *s) 446 | { 447 | printf("\t.globl\t_%s\n", s); 448 | printf("\t.data\n"); 449 | printf("\t.align %d\n", ALIGN); 450 | } 451 | 452 | void 453 | bsymb(char *s, int und) 454 | { 455 | printf("\t.section .bsymb; .long %s%s; .data\n", und?"_":"", s); 456 | } 457 | 458 | void 459 | extdef(void) 460 | { 461 | int o, dim, i; 462 | char *bs; 463 | 464 | if ((o = symbol()) == EOFC || o == SEMI) 465 | return; 466 | if (o != NAME) 467 | goto syntax; 468 | bs = bsym->name; 469 | i = dim = 0; 470 | switch(o = symbol()) { 471 | 472 | case SEMI: 473 | printf("\t.comm\t_%s,%d,%d\n", bs, NCPW, ALIGN); 474 | goto done; 475 | 476 | /* init */ 477 | case CON: 478 | case STRING: 479 | global(bs); 480 | if (o == STRING) 481 | bsymb(bs,1); 482 | printf("_%s:", bs); 483 | pushsym(o); 484 | goto init; 485 | 486 | /* vector */ 487 | case LBRACK: 488 | if ((o=symbol()) == CON) { 489 | dim = cval + 1; 490 | o=symbol(); 491 | } 492 | if (o != RBRACK) 493 | goto syntax; 494 | global(bs); 495 | if ((o=symbol()) == SEMI) { 496 | printf("\t.comm\tL%d,%d,%d\n", isn, dim*NCPW, ALIGN); 497 | bsymb(bs,1); 498 | printf("_%s:\t.long L%d\n", bs, isn++); 499 | goto done; 500 | } 501 | bsymb(bs,1); 502 | printf("_%s:\t.long 1f\n1:", bs); 503 | pushsym(o); 504 | 505 | init: 506 | do { 507 | if ((o=symbol()) != CON && o != STRING && o != NAME) 508 | goto syntax; 509 | if (o == NAME) { 510 | bsymb("1f",0); 511 | printf("1:\t.long _%s\n", bsym->name); 512 | } else 513 | printf("\t.long %s%d\n", o==STRING?"L":"",cval); 514 | i++; 515 | } while ((o=symbol()) == COMMA); 516 | dim = (i > dim) ? i : dim; 517 | if (dim - i) 518 | printf("\t.zero %d\n", (dim-i)*NCPW); 519 | if (o == SEMI) 520 | goto done; 521 | goto syntax; 522 | 523 | /* function */ 524 | case LPARN: 525 | global(bs); 526 | bsymb(bs,1); 527 | printf("_%s:\t.long 1f\n", bs); 528 | printf("\t.text\n\t.align 4\n1:"); 529 | function(); 530 | done: 531 | printf("\n"); 532 | return; 533 | 534 | case EOFC: 535 | return; 536 | } 537 | syntax: 538 | error("External definition syntax"); 539 | printtoken(o, stderr); 540 | errflush(o); 541 | statement(0); 542 | } 543 | 544 | void 545 | setstk(int a) 546 | { 547 | int dif; 548 | 549 | dif = stack-a; 550 | stack = a; 551 | if (dif) 552 | printf("\tsub\t$%d, %%esp\n", dif); 553 | } 554 | 555 | void 556 | defvec(void) 557 | { 558 | stack -= NCPW; 559 | printf("\tmov\t%%esp,%%eax\n"); 560 | printf("\tshr\t$2,%%eax\n"); 561 | printf("\tpush\t%%eax\n"); 562 | } 563 | 564 | void 565 | blkhed(void) 566 | { 567 | int al, pl; 568 | struct hshtab *bs; 569 | 570 | declist(); 571 | stack = al = -4; 572 | pl = 8; 573 | while (paraml) { 574 | paraml = (bs = paraml)->next; 575 | bs->offset = pl; 576 | pl += NCPW; 577 | } 578 | for (bs = hshtab; bs < &hshtab[HSHSIZ]; bs++) 579 | if (bs->name[0]) { 580 | if (bs->class == AUTO) { 581 | bs->offset = al; 582 | if (bs->dim) { 583 | al -= bs->dim*NCPW; 584 | setstk(al); 585 | defvec(); 586 | bs->offset = al; 587 | } 588 | al -= NCPW; 589 | } else if (bs->class == ARG) 590 | bs->class = AUTO; 591 | } 592 | setstk(al); 593 | } 594 | 595 | void 596 | blkend(void) 597 | { 598 | struct hshtab *np; 599 | 600 | for (np = hshtab; np < &hshtab[HSHSIZ]; np++) 601 | if (np->class != KEYWF) { 602 | np->name[0] = '\0'; 603 | hshused--; 604 | } 605 | } 606 | 607 | /* 608 | * Statements and Expressions 609 | */ 610 | 611 | struct tnode * 612 | pexpr(void) 613 | { 614 | struct tnode *t; 615 | int o; 616 | 617 | if ((o = symbol()) != LPARN) 618 | goto syntax; 619 | t = tree(); 620 | if ((o = symbol()) != RPARN) 621 | goto syntax; 622 | return t; 623 | syntax: 624 | error("Statement syntax"); 625 | errflush(o); 626 | return NULL; 627 | } 628 | 629 | void 630 | label(int l) 631 | { 632 | printf("L%d:\n", l); 633 | } 634 | 635 | void 636 | retseq(void) 637 | { 638 | printf("\tjmp\tretrn\n"); 639 | } 640 | 641 | /* Jump to "lab", if the expression "t" evaluated to "val". */ 642 | void 643 | cbranch(struct tnode *t, int lab, int val) 644 | { 645 | rcexpr(t); 646 | if (val == 0) 647 | printf("\ttest\t%%eax,%%eax\n"); 648 | else 649 | printf("\tcmp\t%%eax,$%d\n", val); 650 | printf("\tje\tL%d\n", lab); 651 | } 652 | 653 | void 654 | jump(int lab) 655 | { 656 | printf("\tjmp\tL%d\n", lab); 657 | } 658 | 659 | void 660 | pswitch(void) 661 | { 662 | struct swtab *sswp; 663 | int dl, swlab; 664 | 665 | sswp = swp; 666 | if (swp == NULL) 667 | swp = swtab; 668 | swlab = isn++; 669 | printf("\tmov\t$L%d,%%ebx\n", swlab); 670 | printf("\tjmp\tbswitch\n"); 671 | dl = deflab; 672 | deflab = 0; 673 | statement(0); 674 | if (!deflab) { 675 | deflab = isn++; 676 | label(deflab); 677 | } 678 | printf("L%d:\n\t.data\nL%d:", brklab, swlab); 679 | while (swp > sswp && swp > swtab) { 680 | --swp; 681 | printf("\t.long %d,L%d\n", swp->swval, swp->swlab); 682 | } 683 | printf("\t.long L%d,0\n", deflab); 684 | printf("\t.text\n"); 685 | deflab = dl; 686 | swp = sswp; 687 | } 688 | 689 | void 690 | statement(int d) 691 | { 692 | int o, o1, o2; 693 | 694 | stmt: 695 | if ((o = symbol()) == LBRACE) { 696 | if (d) 697 | blkhed(); 698 | while (!eof) { 699 | if ((o = symbol()) == RBRACE) 700 | goto bend; 701 | pushsym(o); 702 | statement(0); 703 | } 704 | error("Missing '}'"); 705 | bend: 706 | return; 707 | } else { 708 | pushsym(o); 709 | if (d) 710 | blkhed(); 711 | } 712 | 713 | switch (o = symbol()) { 714 | 715 | case EOFC: 716 | error("Unexpected EOF"); 717 | 718 | case SEMI: 719 | case RBRACE: 720 | return; 721 | 722 | case KEYW: 723 | switch (cval) { 724 | case GOTO: 725 | if ((o = symbol()) != NAME) 726 | goto syntax; 727 | if(bsym->offset == 0) 728 | bsym->offset = isn++; 729 | jump(bsym->offset); 730 | goto semi; 731 | 732 | case RETURN: 733 | if (pushsym(symbol()) == LPARN) 734 | rcexpr(pexpr()); 735 | retseq(); 736 | goto semi; 737 | 738 | case IF: 739 | cbranch(pexpr(), o1=isn++, 0); 740 | statement(0); 741 | if ((o = symbol()) == KEYW && cval == ELSE) { 742 | jump(o2 = isn++); 743 | label(o1); 744 | statement(0); 745 | label(o2); 746 | return; 747 | } 748 | pushsym(o); 749 | label(o1); 750 | return; 751 | 752 | case WHILE: 753 | o1 = contlab; 754 | o2 = brklab; 755 | label(contlab = isn++); 756 | cbranch(pexpr(), brklab=isn++, 0); 757 | statement(0); 758 | jump(contlab); 759 | label(brklab); 760 | contlab = o1; 761 | brklab = o2; 762 | return; 763 | 764 | case BREAK: 765 | if (brklab < 0) 766 | error("Nothing to break from"); 767 | jump(brklab); 768 | goto semi; 769 | 770 | /* Not part of B, but very easy to implement */ 771 | /* 772 | case CONTINUE: 773 | if (contlab < 0) 774 | error("Nothing to continue"); 775 | jump(contlab); 776 | goto semi; 777 | */ 778 | 779 | case SWITCH: 780 | o1 = brklab; 781 | brklab = isn++; 782 | rcexpr(pexpr()); 783 | /* rcexpr(tree()); */ 784 | pswitch(); 785 | brklab = o1; 786 | return; 787 | 788 | case CASE: 789 | if ((o = symbol()) != CON) 790 | goto syntax; 791 | if ((o = symbol()) != COLON) 792 | goto syntax; 793 | if (swp == NULL) { 794 | error("Case not in switch"); 795 | goto stmt; 796 | } 797 | if (swp >= swtab+SWSIZ) 798 | error("Switch table overflow"); 799 | else { 800 | swp->swlab = isn; 801 | (swp++)->swval = cval; 802 | label(isn++); 803 | } 804 | goto stmt; 805 | 806 | case DEFAULT: 807 | if (swp == NULL) 808 | error("Default not in switch"); 809 | if ((o = symbol()) != COLON) 810 | goto syntax; 811 | deflab = isn++; 812 | label(deflab); 813 | goto stmt; 814 | } 815 | 816 | error("Unknown keyword"); 817 | goto syntax; 818 | 819 | case NAME: 820 | if (peekc == ':') { 821 | peekc = 0; 822 | if (bsym->class) { 823 | error("Redefinition"); 824 | goto stmt; 825 | } 826 | bsym->class = INTERN; 827 | if (bsym->offset == 0) 828 | bsym->offset = isn++; 829 | label(bsym->offset); 830 | goto stmt; 831 | } 832 | } 833 | pushsym(o); 834 | rcexpr(tree()); 835 | goto semi; 836 | 837 | semi: 838 | if ((o = symbol()) != SEMI) 839 | goto syntax; 840 | return; 841 | 842 | syntax: 843 | error("Statement syntax"); 844 | errflush(o); 845 | goto stmt; 846 | } 847 | 848 | struct tnode * 849 | block(int op, int value, struct tnode *tr1, struct tnode *tr2) 850 | { 851 | struct tnode t; 852 | int n; 853 | int *p, *ap; 854 | 855 | p = space; 856 | t.op = op; 857 | t.value = value; 858 | t.tr1 = tr1; 859 | t.tr2 = tr2; 860 | ap = (int*) &t; 861 | n = (sizeof(struct tnode)+sizeof(int)-1) & ~sizeof(int); 862 | if (space+n >= &ospace[OSSIZ]) { 863 | error("Expression overflow 1"); 864 | exit(1); 865 | } 866 | while (n--) 867 | *space++ = *ap++; 868 | return (struct tnode *) p; 869 | } 870 | 871 | void 872 | chklval(struct tnode *p) 873 | { 874 | if (p->op != NAME && p->op != STAR) 875 | error("Lvalue required"); 876 | } 877 | 878 | void 879 | build(int op) 880 | { 881 | struct tnode *p1, *p2; 882 | int dope; 883 | 884 | /* a[i] -> *(a+i) */ 885 | if (op == LBRACK) { 886 | build(PLUS); 887 | op = STAR; 888 | } 889 | dope = opdope[op]; 890 | if (dope&01) 891 | p2 = *--cp; 892 | p1 = *--cp; 893 | switch (op) { 894 | case QUEST: 895 | if (p2->op != COLON) 896 | error("Illegal conditional"); 897 | break; 898 | 899 | case AMPER: 900 | if (p1->op == STAR) { 901 | *cp++ = p1->tr1; 902 | return; 903 | } 904 | if (p1->op == NAME) { 905 | *cp++ = block(op,0,p1,NULL); 906 | return; 907 | } 908 | error("Illegal lvalue"); 909 | } 910 | if (dope&02) 911 | chklval(p1); 912 | if (dope&01) 913 | *cp++ = block(op,0,p1,p2); 914 | else 915 | *cp++ = block(op,0,p1,NULL); 916 | } 917 | 918 | struct tnode * 919 | tree(void) 920 | { 921 | struct tnode *cmst[CMSIZ]; 922 | int opst[SSIZE], prst[SSIZE]; 923 | int *op, *pp; 924 | int andflg; 925 | int o, os; 926 | int p, ps; 927 | 928 | space = ospace; 929 | op = opst; 930 | pp = prst; 931 | cp = cmst; 932 | *op = SEOF; 933 | *pp = 06; 934 | andflg = 0; 935 | 936 | advanc: 937 | switch (o=symbol()) { 938 | case NAME: 939 | if (pushsym(symbol()) == LPARN) { /* function */ 940 | if (bsym->class == 0) 941 | bsym->class = EXTERN; 942 | } else if (bsym->class == 0) { 943 | error("%s undefined", bsym->name); 944 | bsym->class = EXTERN; 945 | } 946 | *cp++ = block(NAME,0,(struct tnode *)bsym,NULL); 947 | goto tand; 948 | 949 | case STRING: 950 | *cp++ = block(STRING,cval,NULL,NULL); 951 | goto tand; 952 | 953 | case CON: 954 | caseCON: 955 | *cp++ = block(CON,cval,NULL,NULL); 956 | goto tand; 957 | 958 | tand: 959 | if (cp >= &cmst[CMSIZ]) { 960 | error("Expression overflow 2"); 961 | exit(1); 962 | } 963 | if (andflg) 964 | goto syntax; 965 | andflg = 1; 966 | goto advanc; 967 | 968 | case DECBEF: 969 | case INCBEF: 970 | if (andflg) 971 | o += 2; 972 | goto oponst; 973 | 974 | case EXCLA: 975 | if (andflg) 976 | goto syntax; 977 | goto oponst; 978 | 979 | case MINUS: 980 | if (!andflg) { 981 | if (pushsym(symbol()) == CON) { 982 | symbol(); 983 | cval = -cval; 984 | goto caseCON; 985 | } 986 | o = NEG; 987 | } 988 | andflg = 0; 989 | goto oponst; 990 | 991 | case AND: 992 | case TIMES: 993 | if (andflg) 994 | andflg = 0; 995 | else 996 | if (o == AND) 997 | o = AMPER; 998 | else 999 | o = STAR; 1000 | goto oponst; 1001 | 1002 | case LPARN: 1003 | if (andflg) { 1004 | o = symbol(); 1005 | if (o == RPARN) 1006 | o = MCALL; 1007 | else { 1008 | pushsym(o); 1009 | o = CALL; 1010 | andflg = 0; 1011 | } 1012 | } 1013 | goto oponst; 1014 | 1015 | case RPARN: 1016 | case RBRACK: 1017 | if (!andflg) 1018 | goto syntax; 1019 | goto oponst; 1020 | } 1021 | 1022 | if (!andflg) 1023 | goto syntax; 1024 | andflg = 0; 1025 | 1026 | oponst: 1027 | p = (opdope[o]>>9) & 077; 1028 | opon1: 1029 | ps = *pp; 1030 | if (p > ps || (p == ps && (opdope[o]&0200))) { /* right-assoc */ 1031 | switch (o) { 1032 | case LPARN: 1033 | case LBRACK: 1034 | case CALL: 1035 | p = 04; 1036 | } 1037 | if (op >= &opst[SSIZE]) { 1038 | error("Expression overflow 3"); 1039 | exit(1); 1040 | } 1041 | *++op = o; 1042 | *++pp = p; 1043 | goto advanc; 1044 | } 1045 | --pp; 1046 | switch (os = *op--) { 1047 | case SEOF: 1048 | pushsym(o); 1049 | return(*--cp); 1050 | 1051 | case CALL: 1052 | if (o != RPARN) 1053 | goto syntax; 1054 | build(os); 1055 | goto advanc; 1056 | 1057 | case MCALL: 1058 | *cp++ = NULL; 1059 | os = CALL; 1060 | goto fbuild; 1061 | 1062 | case LPARN: 1063 | if (o != RPARN) 1064 | goto syntax; 1065 | goto advanc; 1066 | 1067 | case LBRACK: 1068 | if (o != RBRACK) 1069 | goto syntax; 1070 | build(LBRACK); 1071 | goto advanc; 1072 | 1073 | } 1074 | fbuild: 1075 | build(os); 1076 | goto opon1; 1077 | syntax: 1078 | error("Expression syntax"); 1079 | errflush(o); 1080 | return NULL; 1081 | } 1082 | 1083 | void 1084 | error(char *s, ...) 1085 | { 1086 | va_list args; 1087 | 1088 | va_start(args, s); 1089 | nerror++; 1090 | fprintf(stderr, "%d: ", line); 1091 | vfprintf(stderr, s, args); 1092 | putc('\n', stderr); 1093 | va_end(args); 1094 | } 1095 | 1096 | void 1097 | errflush(int o) 1098 | { 1099 | while (o > RBRACE) /* ; { } */ 1100 | o = symbol(); 1101 | pushsym(o); 1102 | } 1103 | 1104 | /* 1105 | * 000001 binary 1106 | * 000002 need lvalue 1107 | * 000004 relational 1108 | * 000010 assignment 1109 | * 000100 commutative 1110 | * 000200 right-assoc 1111 | * 0XX000 precedence 1112 | */ 1113 | int opdope[] = { 1114 | 000000, /* EOFC */ 1115 | 000000, /* ; */ 1116 | 000000, /* { */ 1117 | 000000, /* } */ 1118 | 036000, /* [ */ 1119 | 002000, /* ] */ 1120 | 036000, /* ( */ 1121 | 002000, /* ) */ 1122 | 014201, /* : */ 1123 | 007001, /* , */ 1124 | 000000, /* 10 */ 1125 | 000000, /* 11 */ 1126 | 000000, /* 12 */ 1127 | 000000, /* 13 */ 1128 | 000000, /* 14 */ 1129 | 036001, /* mcall */ 1130 | 036001, /* call */ 1131 | 034202, /* ++a */ 1132 | 034202, /* a++ */ 1133 | 034202, /* --a */ 1134 | 034202, /* a-- */ 1135 | 034200, /* !un */ 1136 | 034200, /* -un */ 1137 | 034200, /* &un */ 1138 | 034200, /* *un */ 1139 | 014201, /* ? */ 1140 | 000000, /* 26 */ 1141 | 000000, /* 27 */ 1142 | 000000, /* 28 */ 1143 | 000000, /* 29 */ 1144 | 030101, /* + */ 1145 | 030001, /* - */ 1146 | 032001, /* % */ 1147 | 032101, /* * */ 1148 | 032001, /* / */ 1149 | 016101, /* | */ 1150 | 020101, /* & */ 1151 | 026001, /* << */ 1152 | 026001, /* >> */ 1153 | 022105, /* == */ 1154 | 022105, /* != */ 1155 | 024005, /* <= */ 1156 | 024005, /* < */ 1157 | 024005, /* >= */ 1158 | 024005, /* > */ 1159 | 000000, /* 45 */ 1160 | 000000, /* 46 */ 1161 | 000000, /* 47 */ 1162 | 000000, /* 48 */ 1163 | 012013, /* = */ 1164 | 012213, /* =+ */ 1165 | 012213, /* =- */ 1166 | 012213, /* =% */ 1167 | 012213, /* =* */ 1168 | 012213, /* =/ */ 1169 | 012213, /* =| */ 1170 | 012213, /* =& */ 1171 | 012213, /* =<< */ 1172 | 012213, /* =>> */ 1173 | 012213, /* === */ 1174 | 012213, /* =!= */ 1175 | 012213, /* =<= */ 1176 | 012213, /* =< */ 1177 | 012213, /* =>= */ 1178 | 012213, /* => */ 1179 | 000000, /* CON */ 1180 | 000000, /* STRING */ 1181 | 000000 /* NAME */ 1182 | }; 1183 | 1184 | char ctab[128] = { 1185 | EOFC, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 1186 | LETTER, SPACE, NEWLN, SPACE, SPACE, UNKN, UNKN, UNKN, 1187 | UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 1188 | UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 1189 | SPACE, EXCLA, DQUOTE, UNKN, UNKN, MOD, AND, SQUOTE, 1190 | LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, UNKN, DIVIDE, 1191 | DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, 1192 | DIGIT, DIGIT, COLON, SEMI, LESS, ASSIGN, GREAT, QUEST, 1193 | UNKN, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 1194 | LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 1195 | LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 1196 | LETTER, LETTER, LETTER, LBRACK, UNKN, RBRACK, UNKN, LETTER, 1197 | UNKN, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 1198 | LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 1199 | LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 1200 | LETTER, LETTER, LETTER, LBRACE, OR, RBRACE, UNKN, UNKN 1201 | }; 1202 | 1203 | /* debug function */ 1204 | void printtoken(int tok, FILE *out) 1205 | { 1206 | static char *strtab[128]; 1207 | strtab[0] = "EOFC"; 1208 | strtab[1] = "SEMI"; 1209 | strtab[2] = "LBRACE"; 1210 | strtab[3] = "RBRACE"; 1211 | strtab[4] = "LBRACK"; 1212 | strtab[5] = "RBRACK"; 1213 | strtab[6] = "LPARN"; 1214 | strtab[7] = "RPARN"; 1215 | strtab[8] = "COLON"; 1216 | strtab[9] = "COMMA"; 1217 | 1218 | strtab[15] = "MCALL"; 1219 | strtab[16] = "CALL"; 1220 | strtab[17] = "DECBEF"; 1221 | strtab[18] = "INCBEF"; 1222 | strtab[19] = "DECAFT"; 1223 | strtab[20] = "INCAFT"; 1224 | strtab[21] = "EXCLA"; 1225 | strtab[22] = "NEG"; 1226 | strtab[23] = "AMPER"; 1227 | strtab[24] = "STAR"; 1228 | strtab[25] = "QUEST"; 1229 | 1230 | strtab[30] = "PLUS"; 1231 | strtab[31] = "MINUS"; 1232 | strtab[32] = "MOD"; 1233 | strtab[33] = "TIMES"; 1234 | strtab[34] = "DIVIDE"; 1235 | strtab[35] = "OR"; 1236 | strtab[36] = "AND"; 1237 | strtab[37] = "LSHIFT"; 1238 | strtab[38] = "RSHIFT"; 1239 | strtab[39] = "EQUAL"; 1240 | strtab[40] = "NEQUAL"; 1241 | strtab[41] = "LESSEQ"; 1242 | strtab[42] = "LESS"; 1243 | strtab[43] = "GREATEQ"; 1244 | strtab[44] = "GREAT"; 1245 | 1246 | strtab[49] = "ASSIGN"; 1247 | strtab[50] = "ASPLUS"; 1248 | strtab[51] = "ASMINUS"; 1249 | strtab[52] = "ASMOD"; 1250 | strtab[53] = "ASTIMES"; 1251 | strtab[54] = "ASDIV"; 1252 | strtab[55] = "ASOR"; 1253 | strtab[56] = "ASAND"; 1254 | strtab[57] = "ASLSH"; 1255 | strtab[58] = "ASRSH"; 1256 | strtab[59] = "ASEQUAL"; 1257 | strtab[60] = "ASNEQL"; 1258 | strtab[61] = "ASLEQ"; 1259 | strtab[62] = "ASLESS"; 1260 | strtab[63] = "ASGTQ"; 1261 | strtab[64] = "ASGREAT"; 1262 | 1263 | strtab[65] = "CON"; 1264 | strtab[66] = "STRING"; 1265 | strtab[67] = "NAME"; 1266 | strtab[68] = "KEYW"; 1267 | 1268 | strtab[127] = "UNKN"; 1269 | 1270 | if (tok == CON || tok == STRING) { 1271 | fprintf(out, "%s(%d) ", strtab[tok], cval); 1272 | return; 1273 | } 1274 | if (tok == NAME) { 1275 | fprintf(out, "%s(%s) ", strtab[tok], symbuf); 1276 | return; 1277 | } 1278 | 1279 | fprintf(out, "%s ", strtab[tok]); 1280 | } 1281 | 1282 | --------------------------------------------------------------------------------