├── .gitattributes
├── .gitignore
├── LICENCE.txt
├── Makefile
├── README.md
├── stage-0
    ├── .gitignore
    ├── Makefile
    ├── README.md
    └── unhex.x
├── stage-1
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── elfify.x
    ├── unhexl.ts.x
    └── unhexl.ts.xl
├── stage-2
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── as.s
    └── as.ts.xl
├── stage-3
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── as.s
    ├── ld.s
    ├── test1.s
    ├── test2.s
    └── test3.s
├── stage-4
    ├── .gitignore
    ├── Makefile
    ├── README.txt
    ├── char.s
    ├── crt0.s
    ├── ctype.s
    ├── error.s
    ├── exit.c
    ├── expr.s
    ├── i386.s
    ├── imath.s
    ├── input.c
    ├── main.s
    ├── malloc.c
    ├── memory.s
    ├── output.c
    ├── scanner.s
    ├── signal.c
    ├── stdarg.c
    ├── stdio.s
    ├── stmt.s
    ├── string.s
    ├── string2.c
    ├── symtab.s
    └── unistd.s
└── stage-5
    ├── .gitignore
    ├── Makefile
    ├── README.txt
    ├── cc.c
    ├── cli.c
    ├── cmp.c
    ├── codegen.c
    ├── cpp-tests
        ├── Makefile
        ├── builtin.c
        ├── builtin.i
        ├── directive.c
        ├── directive.i
        ├── empty.c
        ├── empty.i
        ├── fn.c
        ├── fn.i
        ├── glue.c
        ├── hash.c
        ├── hash.i
        ├── include.c
        ├── include.i
        ├── includemacro.c
        ├── includemacro.i
        ├── macros.c
        ├── macros.i
        ├── nocpp.c
        ├── nocpp.i
        ├── obj.c
        ├── obj.i
        ├── rescan.c
        ├── rescan.i
        ├── simple.c
        ├── simple.i
        ├── suppress.c
        ├── suppress.i
        └── vers2.h
    ├── cpp.c
    ├── cpptype.c
    ├── eval.c
    ├── expr.c
    ├── i386.c
    ├── include
        ├── bits
        │   ├── eof.h
        │   ├── file.h
        │   ├── file_access.h
        │   ├── null.h
        │   ├── size_t.h
        │   ├── std_streams.h
        │   ├── string.h
        │   ├── struct_tm.h
        │   └── time_t.h
        ├── errno.h
        ├── rbc_init.h
        ├── stdio.h
        ├── string.h
        └── time.h
    ├── macros.c
    ├── main.c
    ├── node.c
    ├── nodenew.c
    ├── pvector.c
    ├── pvector.h
    ├── scanbase.c
    ├── scanner.c
    ├── stmt.c
    ├── symtab.c
    ├── timeconv.c
    └── type.c


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.x     linguist-language=text
2 | *.xl    linguist-language=gas
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | bin/
3 | lib/
4 | include/
5 | !stage-*/include/
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile
 2 | 
 3 | # Copyright (C) 2009, 2011, 2012, 2013, 2020 
 4 | # Richard Smith <richard@ex-parrot.com>
 5 | # All rights reserved.
 6 | 
 7 | STAGES = 0 1 2 3 4 5
 8 | 
 9 | SHELL  = /bin/sh
10 | PATH   = .
11 | 
12 | RM     = /bin/rm
13 | MKDIR  = /bin/mkdir
14 | MAKE   = /usr/bin/make
15 | 
16 | BINDIR = bin
17 | LIBDIR = lib
18 | INCDIR = include
19 | 
20 | world:
21 | 	$(RM) -rf $(BINDIR) $(LIBDIR) $(INCDIR)
22 | 	$(MAKE) init
23 | 	set -e; for n in $(STAGES); do $(MAKE) -r -C stage-$$n $@; done
24 | 
25 | init:
26 | 	$(MKDIR) -p $(BINDIR) $(LIBDIR) $(INCDIR)
27 | 
28 | check:
29 | 	set -e; for n in $(STAGES); do $(MAKE) -r -C stage-$$n $@; done
30 | 
31 | clean:
32 | 	set -e; for n in $(STAGES); do $(MAKE) -r -C stage-$$n $@; done
33 | 	$(RM) -rf $(BINDIR) $(LIBDIR) $(INCDIR)
34 | 
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bootstrap Experiment
 2 | 
 3 | In this experiment I aim to develop from the ground up a working compiler,
 4 | assembler, linker and library, for a C-like language.  I start with a minimal
 5 | program capable of generating itself from its source and gradually develop
 6 | higher level tools and abstractions, as follows.
 7 | 
 8 | This programs produced in this project are 32-bit ELF executables which run on
 9 | a Linux kernel running on an Intel x86 processor.  They run fine on modern
10 | 64-bit systems.
11 | 
12 | 
13 | ## Stage 0 – `unhex`
14 | 
15 | The starting point of the experiment is a tiny program for packing hexadecimal
16 | octets into binary.
17 | 
18 | ## Stage 1 – `unhexl` & `elfify`
19 | 
20 | This stage adds a tool to wrap a text section into a minimal ELF executable,
21 | as well as further developing the `unhex` program to support labels,
22 | references to earlier labels, and a freer input format that allows comments.
23 | 
24 | ## Stage 2 – `as`
25 | 
26 | Here we introduce a light-weight assembler, written in machine code using no
27 | forward jumps.  It generates a text section that can be wrapped with the stage
28 | 1 `elfify` program to produce an executable.
29 | 
30 | ## Stage 3 – `as` & `ld`
31 | 
32 | The assembler is rewritten in assembler language and is joined by a linker,
33 | which together allow for separate compilation units.
34 | 
35 | ## Stage 4 – `cc`, `crt0.o` & `libc.o`
36 | 
37 | The project's first compiler is added at this stage.  Its input language is a
38 | typeless subset of C similar to B, and it emits assembler language.  We also
39 | build a startup file (`crt0.o`) and the start of a simple C library.
40 | 
41 | ## Stage 5 – `ccx`, `cpp`, `cc` & `cmp`
42 | 
43 | The compiler is rewritten in its source language, and a type system added.  We
44 | use it to implement a fairly standards-compliant C preprocessor, and a
45 | compiler driver (`cc`) that spawns the `cpp`, `ccx` (the compiler proper),
46 | `as` and `ld`.  Finally, `cmp` is a POSIX compliant utility to compare two
47 | files, which is used in a preproccesor test suite.
48 | 
49 | ## Licensing
50 | 
51 | The code in this project is copyright (C) Richard Smith, 2009–2021, and
52 | is licensed for use under version 3 or later of the [GNU General Public
53 | License](LICENCE.txt), a copy of which can be found in the file `LICENCE.txt`.
54 | The documentation in these `README` files is licensed under the 
55 | [Creative Commons BY-NC-SA licence, 
56 | version 4](https://creativecommons.org/licenses/by-nc/4.0/).
57 | 


--------------------------------------------------------------------------------
/stage-0/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | unhex
3 | 


--------------------------------------------------------------------------------
/stage-0/Makefile:
--------------------------------------------------------------------------------
 1 | # stage-0/Makefile
 2 | 
 3 | # Copyright (C) 2009, 2011, 2015, 2020 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | SHELL  = /bin/sh
 7 | 
 8 | CHMOD  = /bin/chmod
 9 | RM     = /bin/rm
10 | CP     = /bin/cp
11 | CMP    = /usr/bin/cmp
12 | MAKE   = /usr/bin/make
13 | CAT    = /bin/cat
14 | PRINTF = /usr/bin/printf
15 | 
16 | BINDIR = ../bin
17 | PATH   = $(BINDIR)
18 | 
19 | all:	init unhex
20 | 
21 | init:
22 | 	@test -d $(BINDIR) || $(MAKE) -C .. init
23 | 
24 | unhex:	init unhex.x
25 | 	for x in `$(CAT) unhex.x`; do $(PRINTF) \\x$$x; done > unhex
26 | 	$(CHMOD) a+x unhex
27 | 
28 | check:	check-unhex
29 | 
30 | check-unhex:
31 | 	./unhex < unhex.x > unhex2
32 | 	$(CMP) unhex unhex2
33 | 	$(RM) unhex2
34 | 
35 | install: unhex
36 | 	$(CP) unhex $(BINDIR)
37 | 
38 | clean:
39 | 	$(RM) -f unhex unhex2
40 | 
41 | world:
42 | 	set -e; for TARGET in clean init all check install; do \
43 | 	    $(MAKE) $$TARGET; \
44 | 	done
45 | 


--------------------------------------------------------------------------------
/stage-0/README.md:
--------------------------------------------------------------------------------
 1 | # Bootstrap: Stage 0
 2 | 
 3 | The starting point for this bootstrap experiment is the `unhex` program. 
 4 | It is a very simple program for converting a stream of hexadecimal 
 5 | octets on standard input into a binary file written to standard output.
 6 | 
 7 | > Usage:  `unhex < test.x > test`
 8 | 
 9 | where `.x` is used as the canonical extension for its input files. The 
10 | source file format is very restrictive:
11 | 
12 | ```ebnf
13 |   XDIGIT ::= [0-9A-F]
14 |   CHAR   ::= any character
15 | 
16 |   octet  ::= XDIGIT XDIGIT CHAR
17 |   file   ::= octet*
18 | ```
19 | 
20 | This format is exceptionally easy to parse, which was the whole idea.
21 | By allowing an arbitrary third character it allows some degree of source
22 | code prettification by using spaces, new lines or other punctuation
23 | marks.
24 | 
25 | Any deviation from this format will result in garbage being written 
26 | to the output stream, as no error checking is done.  In particular,
27 | there must not be any trailing space on lines, nor can there be blank
28 | lines.
29 | 
30 | The `unhex.x` file contains the hexadecimal octets for `unhex`.  Processing
31 | it with `unhex` yields another copy of `unhex`, which we check is identical
32 | to the inital copy as a way of testing that the program is working.
33 | 
34 | The program is deliberately minimal.  Of necessity, it starts with an
35 | ELF header (52 bytes), followed by one program header for the whole file
36 | (32 bytes).  The executable code is at end (109 bytes).  There are no
37 | section headers and no `.shstrtab` section, which together mean that the
38 | binutils diagnostic tools (`objdump`, etc.) are of limited use on it.
39 | 
40 | Conceptually the program should have been written using some lower-level
41 | technique, such as with a hex-editor.  But instead, the Makefile
42 | contains a simple one-line shell script to perform the same action as
43 | `unhex`, which is used to create the first `unhex` binary.
44 | 


--------------------------------------------------------------------------------
/stage-0/unhex.x:
--------------------------------------------------------------------------------
 1 | 7F 45 4C 46 01 01 01 00
 2 | 00 00 00 00 00 00 00 00
 3 | 02 00 03 00 01 00 00 00
 4 | 6F 80 04 08 34 00 00 00
 5 | 00 00 00 00 00 00 00 00
 6 | 34 00
 7 | 20 00 01 00
 8 | 00 00 00 00
 9 | 00 00
10 | 01 00 00 00 00 00 00 00
11 | 00 80 04 08 00 80 04 08
12 | C1 00 00 00 C1 00 00 00
13 | 05 00 00 00 00 10 00 00
14 | 2C 30
15 | 5D
16 | C3
17 | 55
18 | 89 E5
19 | 8B 45 08
20 | 3C 41
21 | 7C F2
22 | 2C 37
23 | EB F0
24 | 89 C3
25 | B8 01 00 00 00
26 | CD 80
27 | 89 E5
28 | 50
29 | BA 03 00 00 00
30 | 8D 4D FC
31 | BB 00 00 00 00
32 | B8 03 00 00 00
33 | CD 80
34 | 3D 00 00 00 00
35 | 7E D9
36 | E8 C6 FF FF FF
37 | 88 C5
38 | 8B 45 FC
39 | 88 E0
40 | 50
41 | E8 B9 FF FF FF
42 | 83 C4 04
43 | B1 04
44 | D2 E5
45 | 00 C5
46 | 88 6D FC
47 | BA 01 00 00 00
48 | 8D 4D FC
49 | BB 01 00 00 00
50 | B8 04 00 00 00
51 | CD 80
52 | EB B1
53 | 


--------------------------------------------------------------------------------
/stage-1/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | unhex
3 | unhexl
4 | elfify
5 | 


--------------------------------------------------------------------------------
/stage-1/Makefile:
--------------------------------------------------------------------------------
 1 | # stage-1/Makefile
 2 | 
 3 | # Copyright (C) 2009, 2011, 2020 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | SHELL  = /bin/sh
 7 | 
 8 | CHMOD  = /bin/chmod
 9 | RM     = /bin/rm
10 | CP     = /bin/cp
11 | CMP    = /usr/bin/cmp
12 | MAKE   = /usr/bin/make
13 | 
14 | BINDIR = ../bin
15 | PATH   = $(BINDIR)
16 | 
17 | 
18 | all:	init unhexl elfify
19 | 
20 | init:
21 | 	@test -d $(BINDIR) || $(MAKE) -C .. init
22 | 	@test -x $(BINDIR)/unhex || $(MAKE) -C ../stage-0 install
23 | 
24 | elfify:	elfify.x
25 | 	unhex < elfify.x > elfify
26 | 	$(CHMOD) a+x elfify
27 | 
28 | unhexl:	elfify unhexl.ts.x
29 | 	unhex < unhexl.ts.x > unhexl.ts
30 | 	./elfify unhexl.ts > unhexl
31 | 	$(CHMOD) a+x unhexl
32 | 	$(RM) unhexl.ts
33 | 
34 | check:	check-unhexl
35 | 
36 | check-unhexl:	unhexl elfify unhexl.ts.xl
37 | 	./unhexl < unhexl.ts.xl > unhexl.ts
38 | 	./elfify unhexl.ts > unhexl2
39 | 	$(CHMOD) a+x unhexl2
40 | 	./unhexl2 < unhexl.ts.xl > unhexl2.ts
41 | 	$(CMP) unhexl.ts unhexl2.ts
42 | 	./elfify unhexl2.ts > unhexl3
43 | 	$(CMP) unhexl2 unhexl3
44 | 	$(RM) unhexl.ts unhexl2.ts unhexl2 unhexl3
45 | 
46 | install: unhexl elfify
47 | 	$(CP) unhexl elfify $(BINDIR)
48 | 	$(RM) -f $(BINDIR)/unhex
49 | 
50 | clean:
51 | 	$(RM) -f unhexl elfify unhexl.ts unhexl2.ts unhexl2 unhexl3
52 | 
53 | world:
54 | 	set -e; for TARGET in clean init all check install; do \
55 | 	    $(MAKE) $$TARGET; \
56 | 	done
57 | 


--------------------------------------------------------------------------------
/stage-1/README.md:
--------------------------------------------------------------------------------
  1 | # Bootstrap: Stage 1
  2 | 
  3 | In writing the stage 0 `unhex` tool, the two most tedious and error-prone
  4 | tasks were generating valid ELF headers, which entailed keeping track
  5 | of the size of the `.text` section and the location of the entry point,
  6 | and calculating the file offsets used as arguments to various `JMP` and 
  7 | `Jcc` statements.  These two tasks were particularly prone to introduce
  8 | errors as the code was modified, perhaps to correct some error found
  9 | during testing.  Keeping all of the offsets and sizes updated proved
 10 | rather more onerous than manually converting the assembly language 
 11 | into hexadecimal values.
 12 | 
 13 | Therefore stage 1 adds two new tools, `unhexl` and `elfify`, to handle 
 14 | these tasks.  The first, `unhexl`, is a significantly improved version of
 15 | the stage 0 `unhex`.  It allows arbitrary white-space and comments.
 16 | More importantly, it allows labels to be defined and referenced – that
 17 | is what the `l` at the end of the program name refers to.
 18 | 
 19 | The grammar is:
 20 | 
 21 | ```ebnf
 22 |   WS      ::= [ \t\n]
 23 |   XDIGIT  ::= [0-9A-F]
 24 |   LCHAR   ::= [0-9A-Za-z_] 
 25 |   LSTART  ::= [^ \t\n0-9A-F#]
 26 |   LREFEND ::= [^:0-9A-Za-z_]
 27 |   CHAR    ::= any character
 28 | 
 29 |   comment ::= '#' CHAR* '\n'
 30 |   octet   ::= XDIGIT XDIGIT
 31 |   label   ::= LSTART LCHAR+
 32 |   ldef    ::= label ':'
 33 |   lref    ::= label LREFEND
 34 | 
 35 |   file    ::= ( comment | octet | lref | ldef | WS* )*
 36 | ```
 37 | 
 38 | In order to keep the grammar simple, only upper case letters are 
 39 | accepted in hexadecimal octets and labels must not start with a
 40 | valid hexadecimal digit.  It is suggested that labels start with
 41 | a '.' or a lower case letter.
 42 | 
 43 | Label references are converted into little-endian 32-bit offsets 
 44 | relative to the end of the address being written.  For example, the 
 45 | following x86 assembly will generate an infinite loop with the label 
 46 | reference expanding to `BF FF FF FF` (or -5 in decimal). 
 47 | 
 48 | ```asm
 49 |   foo:
 50 |     E9 foo
 51 | ```
 52 | 
 53 | The program takes its source on standard input and does a single pass 
 54 | over it writing data to standard output. 
 55 | 
 56 | > Usage: `unhexl < test.xl > test`
 57 | 
 58 | The fact that it is a single pass means references can only be made to 
 59 | labels already defined.  The lack of dynamic memory allocation in stage 
 60 | 1 means the number of labels is limited to 256.  For the same reason, 
 61 | the line length is limited to 80 characters.  These constraints are not 
 62 | checked by the stage 1 program – failure to stick to 80 characters per 
 63 | lines or 256 labels *will* result in a buffer overflow.  Some other 
 64 | errors, including various syntax errors are flagged by a non-zero return 
 65 | status, but basically, only minimal error checking is done.
 66 | 
 67 | The second program, `elfify`, takes a `.text` section and converts it into 
 68 | a stand-alone ELF program.  Unlike the stage 0 `unhex`, this program has 
 69 | section headers and a minimimal `.shstrtab` section so that tools such as 
 70 | `objdump -d` will work on it.  It also adds these to any executable it 
 71 | creates.
 72 | 
 73 | Because `elfify` needs to find out place the size of the `.text` section
 74 | in the ELF program header, it cannot act as a straightforward filter 
 75 | on standard input.  (Placing the program header at the end of the file
 76 | does not help because the program header's offset is needed in the ELF
 77 | header.)  Instead it takes the name of the file containing the `.text`
 78 | section as its only command line argument.
 79 | 
 80 | > Usage: `elfify test.ts > test`
 81 | 
 82 | where `.ts` is used as the canonical extension for a `.text` section.
 83 | 
 84 | As `elfify` does not parse the `.text` section, it cannot work out where 
 85 | the entry point is: it just assumes that the entry point is 5 bytes
 86 | before the end of the `.text` section.  It is therefore suggested that all
 87 | programs end with a jump to the real entry point.  On x86, a 32-bit
 88 | relative jump requires precisely 5 bytes and can easily be generated
 89 | by `unhexl` by ending the file with:
 90 | 
 91 | ```asm
 92 |   E9 main
 93 | ```
 94 | 
 95 | We check the stage 1 tools are working correctly by using them to build
 96 | a new copy of `unhexl` from source in its own input language.  (This is
 97 | why there are two copies of the source for `unhexl`: a `.ts.x` file for
 98 | processing with the stage 0 `unhex`, and a `.ts.xl` file for use with the
 99 | stage 1 `unhexl`.)  This is repeated, and the second and third generation
100 | unhexl binaries are required to be identical.
101 | 


--------------------------------------------------------------------------------
/stage-1/elfify.x:
--------------------------------------------------------------------------------
  1 | 7F 45 4C 46 01 01 01 00
  2 | 00 00 00 00 00 00 00 00
  3 | 02 00 03 00 01 00 00 00
  4 | 0F 82 04 08 34 00 00 00
  5 | 54 00 00 00 00 00 00 00
  6 | 34 00
  7 | 20 00 01 00
  8 | 28 00 03 00
  9 | 02 00
 10 | 01 00 00 00 00 00 00 00
 11 | 00 80 04 08 00 80 04 08
 12 | 14 02 00 00 14 02 00 00
 13 | 05 00 00 00 00 10 00 00
 14 | 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 15 | 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 16 | 01 00 00 00 01 00 00 00 06 00 00 00 E0 80 04 08 E0 00 00 00
 17 | 34 01 00 00 00 00 00 00 00 00 00 00 04 00 00 00 00 00 00 00
 18 | 07 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 CC 00 00 00
 19 | 14 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00
 20 | 00 2E 74 65 78 74 00 2E 73 68 73 74 72 74 61 62 00 00 00 00
 21 | 55
 22 | 89 E5
 23 | 8B 75 04
 24 | 5D
 25 | C3
 26 | 55
 27 | 89 E5
 28 | BA 04 00 00 00
 29 | 8D 4D 08
 30 | BB 01 00 00 00
 31 | B8 04 00 00 00
 32 | CD 80
 33 | 5D
 34 | C3
 35 | BB 00 00 00 00
 36 | B8 01 00 00 00
 37 | CD 80
 38 | BB 01 00 00 00
 39 | B8 01 00 00 00
 40 | CD 80
 41 | 89 E5
 42 | 83 7D 00 01
 43 | 7E EC
 44 | B9 00 00 00 00
 45 | 8B 5D 08
 46 | B8 05 00 00 00
 47 | CD 80
 48 | 83 F8 00
 49 | 7C D8
 50 | 50
 51 | 81 EC 00 01 00 00
 52 | 89 E1
 53 | 8B 5D FC
 54 | B8 6C 00 00 00
 55 | CD 80
 56 | E8 93 FF FF FF
 57 | 81 EE 4D 01 00 00
 58 | BA 18 00 00 00
 59 | 89 F1
 60 | BB 01 00 00 00
 61 | B8 04 00 00 00
 62 | CD 80
 63 | 8B 85 10 FF FF FF
 64 | 81 C0 DB 80 04 08
 65 | 50
 66 | E8 70 FF FF FF
 67 | 83 C4 04
 68 | 83 C6 1C
 69 | BA 28 00 00 00
 70 | 89 F1
 71 | BB 01 00 00 00
 72 | B8 04 00 00 00
 73 | CD 80
 74 | 8B 85 10 FF FF FF
 75 | 81 C0 E0 00 00 00
 76 | 50
 77 | E8 45 FF FF FF
 78 | E8 40 FF FF FF
 79 | 83 C4 04
 80 | 83 C6 30
 81 | BA 44 00 00 00
 82 | 89 F1
 83 | BB 01 00 00 00
 84 | B8 04 00 00 00
 85 | CD 80
 86 | FF B5 10 FF FF FF
 87 | E8 1C FF FF FF
 88 | 83 C4 04
 89 | 83 C6 48
 90 | BA 4C 00 00 00
 91 | 89 F1
 92 | BB 01 00 00 00
 93 | B8 04 00 00 00
 94 | CD 80
 95 | 89 E1
 96 | BA 00 01 00 00
 97 | 8B 5D FC
 98 | B8 03 00 00 00
 99 | CD 80
100 | 83 F8 00
101 | 0F 8E 02 FF FF FF
102 | 89 C2
103 | BB 01 00 00 00
104 | B8 04 00 00 00
105 | CD 80
106 | EB D8
107 | E9 05 FF FF FF
108 | 


--------------------------------------------------------------------------------
/stage-1/unhexl.ts.x:
--------------------------------------------------------------------------------
  1 | 55
  2 | 89 E5
  3 | 8B 45 08
  4 | 3C 20
  5 | 74 0A
  6 | 3C 09
  7 | 74 06
  8 | 3C 0A
  9 | 74 02
 10 | 31 C0
 11 | 5D
 12 | C3
 13 | 55
 14 | 89 E5
 15 | 8B 45 08
 16 | 3C 30
 17 | 7C 18
 18 | 3C 3A
 19 | 7C 16
 20 | 3C 41
 21 | 7C 10
 22 | 3C 5B
 23 | 7C 0E
 24 | 3C 5F
 25 | 74 0A
 26 | 3C 61
 27 | 7C 04
 28 | 3C 7B
 29 | 7C 02
 30 | 31 C0
 31 | 5D
 32 | C3
 33 | 55
 34 | 89 E5
 35 | 8B 45 08
 36 | 3C 30
 37 | 7C 0C
 38 | 3C 3A
 39 | 7C 0F
 40 | 3C 41
 41 | 7C 04
 42 | 3C 46
 43 | 7E 0B
 44 | B8 FF FF FF FF
 45 | EB 06
 46 | 2C 30
 47 | EB 02
 48 | 2C 37
 49 | 5D
 50 | C3
 51 | 55
 52 | 89 E5
 53 | BA 01 00 00 00
 54 | 31 DB
 55 | B8 03 00 00 00
 56 | CD 80
 57 | 83 F8 01
 58 | 0F 85 3F 01 00 00
 59 | 5D
 60 | C3
 61 | 89 E5
 62 | 81 EC 58 10 00 00
 63 | 8D 85 A8 EF FF FF
 64 | 89 45 A8
 65 | C7 45 FC 00 00 00 00
 66 | BA 01 00 00 00
 67 | 8D 4D AC
 68 | 31 DB
 69 | B8 03 00 00 00
 70 | CD 80
 71 | 83 F8 00
 72 | 0F 8C 0B 01 00 00
 73 | 89 C3
 74 | 0F 84 08 01 00 00
 75 | 8A 45 AC
 76 | 50
 77 | E8 40 FF FF FF
 78 | 83 F8 00
 79 | 5A
 80 | 75 CF
 81 | 80 7D AC 23
 82 | 74 4D
 83 | 52
 84 | E8 6A FF FF FF
 85 | 5B
 86 | 3C FF
 87 | 74 55
 88 | 50
 89 | 8D 4D AD
 90 | E8 81 FF FF FF
 91 | 5B
 92 | FF 75 AD
 93 | E8 53 FF FF FF
 94 | 5A
 95 | 83 F8 FF
 96 | 0F 84 C7 00 00 00
 97 | C6 C1 04
 98 | D2 E3
 99 | 00 D8
100 | 50
101 | BA 01 00 00 00
102 | 89 E0
103 | 8D 08
104 | BB 01 00 00 00
105 | B8 04 00 00 00
106 | CD 80
107 | 5A
108 | FF 45 FC
109 | E9 7C FF FF FF
110 | 8D 4D AC
111 | E8 40 FF FF FF
112 | 80 7D AC 0A
113 | 75 F2
114 | E9 69 FF FF FF
115 | 8D 4D AC
116 | 41
117 | E8 2C FF FF FF
118 | FF 31
119 | E8 DA FE FF FF
120 | 5B
121 | 83 F8 00
122 | 75 ED
123 | 80 39 3A
124 | 9C
125 | C6 01 00
126 | 41
127 | 8D 75 AC
128 | 29 F1
129 | 83 F9 12
130 | 0F 8F 62 00 00 00
131 | 9D
132 | 75 1F
133 | 8D 5D A8
134 | 8B 3B
135 | 39 F7
136 | 0F 8D 52 00 00 00
137 | F3 A4
138 | 8B 45 FC
139 | 8B 3B
140 | 89 47 0C
141 | 83 03 10
142 | E9 1B FF FF FF
143 | 8D BD A8 EF FF FF
144 | 3B 7D A8
145 | 7D 35
146 | 51
147 | 56
148 | 57
149 | F3 A6
150 | 5F
151 | 5E
152 | 59
153 | 74 05
154 | 83 C7 10
155 | EB EC
156 | 83 45 FC 04
157 | 8B 47 0C
158 | 2B 45 FC
159 | 50
160 | BA 04 00 00 00
161 | 89 E0
162 | 8D 08
163 | BB 01 00 00 00
164 | B8 04 00 00 00
165 | CD 80
166 | 58
167 | E9 DB FE FF FF
168 | BB 01 00 00 00
169 | B8 01 00 00 00
170 | CD 80
171 | E9 B2 FE FF FF
172 | 


--------------------------------------------------------------------------------
/stage-1/unhexl.ts.xl:
--------------------------------------------------------------------------------
  1 | # unhexl.ts.xl
  2 | 
  3 | # Copyright (C) 2009, 2011 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | ####    #  Function: bool isws(char)
  7 | 	#  Tests whether its argument is in [ \t\n]
  8 | 
  9 | 	#  As with many of the functions here, it is turned upside down
 10 | 	#  so the entry point is in the middle.  This is because unhexl
 11 | 	#  is limited to jumps up the file.
 12 | .L1:
 13 | 	5D			#	POP	%ebp
 14 | 	C3			#	RET
 15 | isws:
 16 | 	55			#	PUSH	%ebp
 17 | 	89 E5			#	MOVL	%esp, %ebp
 18 | 	8B 45 08		#	MOVL	8(%ebp), %eax
 19 | 	3C 20			#	CMPB	$0x20, %al	# ' '
 20 | 	0F 84 .L1		#	JE	.L1
 21 | 	3C 09			#	CMPB	$0x09, %al	# '\t'
 22 | 	0F 84 .L1		#	JE	.L1
 23 | 	3C 0A			#	CMPB	$0x0A, %al	# '\n'
 24 | 	0F 84 .L1		#	JE	.L1
 25 | 	31 C0			#	XORL	%eax, %eax
 26 | 	E9 .L1			#	JMP	.L1
 27 | 
 28 | ####	#  Function: bool islchr(char)
 29 | 	#  Tests whether its argument is in [0-9A-Za-z_]
 30 | .L2:
 31 | 	31 C0			#	XORL	%eax, %eax
 32 | .L3:
 33 | 	5D			#	POP	%ebp
 34 | 	C3			#	RET
 35 | islchr:
 36 | 	55			#	PUSH	%ebp
 37 | 	89 E5			#	MOVL	%esp, %ebp
 38 | 	8B 45 08		#	MOVL	8(%ebp), %eax
 39 | 	3C 30			#	CMPB	$0x30, %al	# '0'
 40 | 	0F 8C .L2		#	JL	.L2
 41 | 	3C 39			#	CMPB	$0x39, %al	# '9'
 42 | 	0F 8E .L3		#	JLE	.L3
 43 | 	3C 41			#	CMPB	$0x41, %al	# 'A'
 44 | 	0F 8C .L2		#	JL	.L2
 45 | 	3C 5A			#	CMPB	$0x5A, %al	# 'Z'
 46 | 	0F 8E .L3		#	JLE	.L3
 47 | 	3C 5F			#	CMPB	$0x5F, %al	# '_'
 48 | 	0F 84 .L3		#	JE	.L3
 49 | 	3C 61			#	CMPB	$0x61, %al	# 'a'
 50 | 	0F 8C .L2		#	JL	.L2
 51 | 	3C 7A			#	CMPB	$0x7A, %al	# 'z'
 52 | 	0F 8E .L3		#	JLE	.L3
 53 | 	E9 .L2			#	JMP	.L2
 54 | 
 55 | ####	#  Function: int xchar(char)
 56 | 	#  Tests whether its argument is a character in [0-9A-F], and if so, 
 57 | 	#  coverts it to a decimal number; otherwise returns -1.
 58 | .L6:
 59 | 	2C 37			#	SUBB	$0x37, %al	# 'A'-10
 60 | .L7:
 61 | 	5D			#	POP	%ebp
 62 | 	C3			#	RET
 63 | .L4:
 64 | 	B8 FF FF FF FF		#	MOVL	$-1, %eax
 65 | 	E9 .L7			#	JMP	.L7
 66 | .L5:
 67 | 	2C 30			#	SUBB	$0x30, %al	# '0'
 68 | 	E9 .L7			#	JMP	.L7
 69 | xchr:
 70 | 	55			#	PUSH	%ebp
 71 | 	89 E5			#	MOVL	%esp, %ebp
 72 | 	8B 45 08		#	MOVL	8(%ebp), %eax
 73 | 	3C 30			#	CMPB	$0x30, %al	# '0'
 74 | 	0F 8C .L4		#	JL	.L4
 75 | 	3C 39			#	CMPB	$0x39, %al	# '9'
 76 | 	0F 8E .L5		#	JLE	.L5
 77 | 	3C 41			#	CMPB	$0x41, %al	# 'A'
 78 | 	0F 8C .L4		#	JL	.L4
 79 | 	3C 46			#	CMPB	$0x46, %al	# 'F'
 80 | 	0F 8E .L6		#	JLE	.L6
 81 | 	E9 .L4			#	JMP	.L4
 82 | 
 83 | ####	#  Not a proper function.
 84 | 	#  Exits program
 85 | error:
 86 | 	BB 01 00 00 00		#	MOVL	$1, %ebx
 87 | success:
 88 | 	B8 01 00 00 00		#	MOVL	$1, %eax
 89 | 	CD 80			#	INT	$0x80
 90 | 
 91 | ####	#  Function:	void readone( [%ecx] char* ) 
 92 | 	#  Reads one byte into (%ecx) which should already be set.
 93 | 	#  Clobbers %edx, %ebx and %eax.
 94 | 	#  Exits on failure.
 95 | readone:
 96 | 	55			#	PUSH	%ebp
 97 | 	89 E5			#	MOVL	%esp, %ebp
 98 | 	BA 01 00 00 00		#	MOVL	$1, %edx
 99 | 	31 DB			#	XORL	%ebx, %ebx
100 | 	B8 03 00 00 00		#	MOVL	$3, %eax
101 | 	CD 80			#	INT	$0x80
102 | 	83 F8 01		#	CMPL	$1, %eax
103 | 	0F 85 error		#	JNE	error
104 | 	5D			#	POP	%ebp
105 | 	C3			#	RET
106 | 
107 | ####	#  The main function.
108 | 	#  Stack is arranged as follows:
109 | 	#
110 | 	#       -4(%ebp)	int* addr
111 | 	#      -84(%ebp)	char buffer[80]
112 | 	#      -88(%ebp)	label* label_end
113 | 	#    -4184(%ebp)	label labels[256]
114 | 	#
115 | 	#  where label is a { char name[12]; int addr }.
116 | 
117 | ret:
118 | 	# This ret is labelled to allow various bits of main to
119 | 	# jump up to it in order to effect a forwards jump.
120 | 	31 C0			#	XORL	%eax, %eax
121 | 	C3			#	RET
122 | 
123 | 	#  --- Test for a comment.
124 | 	#  If found, skip over comment line until we've read a LF
125 | 	#  At end of section, %eax=1 iff we read a comment.
126 | 	#  If %eax=0, all other registers are unaltered.
127 | comment:
128 |         80 7D AC 23             #       CMPB    $0x23, -84(%ebp)
129 | 	0F 85 ret		#	JNE	ret
130 | .L10:
131 | 	8D 4D AC		#	LEA	-84(%ebp), %ecx
132 | 	E8 readone		#	CALL	readone
133 | 	80 7D AC 0A		#	CMPL	$0x0A, -84(%ebp)	# '\n'
134 | 	0F 85 .L10		#	JNE	.L10
135 | 	B8 01 00 00 00		#	MOVL	$1, %eax
136 | 	C3			#	RET
137 | 
138 | 	# --- Test for an octet.
139 | octet:
140 | 	FF 75 AC		#	PUSH	-84(%ebp)
141 | 	E8 xchr			#	CALL	xchr
142 | 	5B			#	POP	%ebx
143 | 	3C FF			#	CMPB	$-1, %al
144 | 	0F 84 ret		#	JE	ret
145 | 
146 | 	#  Yes, we do.  Read the next byte
147 | 	50			#	PUSH	%eax
148 | 	8D 4D AD		#	LEA	-83(%ebp), %ecx
149 | 	E8 readone		#	CALL	readone
150 | 	5B			#	POP	%ebx
151 | 
152 | 	#  Process it
153 | 	FF 75 AD		#	PUSH	-83(%ebp)
154 | 	E8 xchr			#	CALL	xchr
155 | 	5A			#	POP	%edx
156 | 	83 F8 FF		#	CMPL	$-1, %eax
157 | 	0F 84 error		#	JE	error
158 | 	C6 C1 04		#	MOVB	$4, %cl
159 | 	D2 E3			#	SALB	%cl, %bl
160 | 	00 D8			#	ADDB	%bl, %al
161 | 
162 | 	#  Byte is now in %al; lets write it
163 | 	50			#	PUSH	%eax
164 | 	BA 01 00 00 00		#	MOVL	$1, %edx
165 | 	89 E0			#	MOVL	%esp, %eax
166 | 	8D 08			#	LEA	(%eax), %ecx
167 | 	BB 01 00 00 00		#	MOVL	$1, %ebx
168 | 	B8 04 00 00 00		#	MOVL	$4, %eax
169 | 	CD 80			#	INT	$0x80
170 | 	5A			#	POP	%edx
171 | 
172 | 	#  Increment the address and return
173 | 	FF 45 FC		#	INCL	-4(%ebp)
174 | 	B8 01 00 00 00		#	MOVL	$1, %eax
175 | 	C3			#	RET
176 | 
177 | 
178 | 	#  Parts of the label section
179 | labeldef:
180 | 	#  Check that we're not about to over run the label store,
181 | 	#  and then store the label
182 | 	8D 5D A8		#	LEA	-88(%ebp), %ebx
183 | 	8B 3B			#	MOVL	(%ebx), %edi
184 | 	39 DF			#	CMPL	%ebx, %edi -- is this right?
185 | 	0F 8D error		#	JGE	error
186 | 	F3			#	REP
187 | 	   A4			#	  MOVSB
188 | 	8B 45 FC		#	MOVL	-4(%ebp), %eax
189 | 	8B 3B			#	MOVL	(%ebx), %edi
190 | 	89 47 0C		#	MOVL	%eax, 12(%edi)
191 | 	83 03 10		#	ADDL	$16, (%ebx)
192 | 	B8 01 00 00 00		#	MOVL	$1, %eax
193 | 	C3			#	RET
194 | 
195 | labelref:
196 | 	#  Look up the label
197 | 	8D BD 98 EF FF FF	#	LEA	-4200(%ebp), %edi
198 | .L14:
199 | 	83 C7 10		#	ADDL	$16, %edi
200 | 	3B 7D A8		#	CMPL	-88(%ebp), %edi
201 | 	0F 8D error		#	JGE	error
202 | 	51			#	PUSH	%ecx
203 | 	56			#	PUSH	%esi
204 | 	57			#	PUSH	%edi
205 | 	F3			#	REPE
206 | 	   A6			#	  CMPSB
207 | 	5F			#	POP	%edi
208 | 	5E			#	POP	%esi
209 | 	59			#	POP	%ecx
210 | 	0F 85 .L14		#	JNE	.L14
211 | 
212 | 	#  Found it.  Increment address by four and print offset
213 | 	83 45 FC 04		#	ADDL	$4, -4(%ebp)
214 | 	8B 47 0C		#	MOVL	12(%edi), %eax
215 | 	2B 45 FC		#	SUBL	-4(%ebp), %eax
216 | 	50			#	PUSH	%eax
217 | 	BA 04 00 00 00		#	MOVL	$4, %edx
218 | 	89 E0			#	MOVL	%esp, %eax
219 | 	8D 08			#	LEA	(%eax), %ecx
220 | 	BB 01 00 00 00		#	MOVL	$1, %ebx
221 | 	B8 04 00 00 00		#	MOVL	$4, %eax
222 | 	CD 80			#	INT	$0x80
223 | 	58			#	POP	%eax
224 | 	B8 01 00 00 00		#	MOVL	$1, %eax
225 | 	C3			#	RET
226 | 
227 | 	# --- Test for a label (either definition or reference).
228 | label:
229 | 	#  Read a label
230 | 	8D 4D AC		#	LEA	-84(%ebp), %ecx
231 | .L12:
232 | 	41			#	INCL	%ecx
233 | 	E8 readone		#	CALL	readone
234 | 	FF 31			#	PUSH	(%ecx)
235 | 	E8 islchr		#	CALL	islchr
236 | 	5B			#	POP	%ebx
237 | 	83 F8 00		#	CMPL	$0, %eax
238 | 	0F 85 .L12		#	JNE	.L12
239 | 
240 | 	#  (%ecx) is now something other than lchr.  Is it a colon?
241 | 	#  Also, null terminate, load %esi with start of string, and
242 | 	#  %ecx with its length inc. NUL.
243 | 	80 39 3A		#	CMPB	$0x3A, (%ecx)
244 | 	9C			#	PUSHF
245 | 	C6 01 00		#	MOVB	$0, (%ecx)
246 | 	41			#	INCL	%ecx
247 | 	8D 75 AC		#	LEA	-84(%ebp), %esi
248 | 	29 F1			#	SUBL	%esi, %ecx
249 | 	83 F9 12		#	CMPL	$12, %ecx
250 | 	0F 8F error		#	JG	error
251 | 	9D			#	POPF
252 | 	0F 85 labelref		#	JNE	labelref
253 | 	E9 labeldef		#	JMP	labeldef
254 | 
255 | 	#  --- The main loop
256 | main:
257 | 	89 E5			#	MOVL	%esp, %ebp
258 | 	81 EC 58 10 00 00	#	SUBL	$4184, %esp
259 | 	8D 85 A8 EF FF FF	#	LEA	-4184(%ebp), %eax
260 | 	89 45 A8		#	MOVL	%eax, -88(%ebp)
261 | 	C7 45 FC 00 00 00 00	#	MOVL	$0, -4(%ebp)
262 | 
263 | .L8:
264 | 	#  Read one byte (not with readone because EOF is permitted)
265 | 	BA 01 00 00 00		#	MOVL	$1, %edx
266 | 	8D 4D AC		#	LEA	-84(%ebp), %ecx
267 | 	31 DB			#	XORL	%ebx, %ebx
268 | 	B8 03 00 00 00		#	MOVL	$3, %eax
269 | 	CD 80			#	INT	$0x80
270 | 	83 F8 00		#	CMPL	$0, %eax
271 | 	0F 8C error		#	JL	error
272 | 	89 C3			#	MOVL	%eax, %ebx
273 | 	0F 84 success		#	JE	success
274 | 
275 | 	#  Is the byte white space?  If so, loop back
276 | 	8A 45 AC		#	MOVB	-84(%ebp), %al
277 | 	50			#	PUSH	%eax
278 | 	E8 isws			#	CALL	isws
279 | 	83 F8 00		#	CMPL	$0, %eax
280 | 	5A			#	POP	%edx
281 | 	0F 85 .L8		#	JNE	.L8
282 | 
283 | 	#  We have a byte.  What is it?
284 | 	E8 comment		#	CALL	comment
285 | 	83 F8 00		#	CMP	$0, %eax
286 | 	0F 85 .L8		#	JNE	.L8
287 | 
288 | 	E8 octet		#	CALL	octet
289 | 	83 F8 00		#	CMP	$0, %eax
290 | 	0F 85 .L8		#	JNE	.L8
291 | 
292 | 	E8 label		#	CALL	label
293 | 	83 F8 00		#	CMP	$0, %eax
294 | 	0F 85 .L8		#	JNE	.L8
295 | 
296 | 	E9 error		#	JMP	error
297 | 
298 | ####	#  And finally, the entry point.
299 | 	#  Last per requirement for elfify.
300 | 	E9 main			#	JMP	main
301 | 


--------------------------------------------------------------------------------
/stage-2/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | unhexl
3 | elfify
4 | as
5 | 


--------------------------------------------------------------------------------
/stage-2/Makefile:
--------------------------------------------------------------------------------
 1 | # stage-2/Makefile
 2 | 
 3 | # Copyright (C) 2010, 2011, 2020 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | SHELL  = /bin/sh
 7 | 
 8 | CHMOD  = /bin/chmod
 9 | RM     = /bin/rm
10 | CP     = /bin/cp
11 | CMP    = /usr/bin/cmp
12 | MAKE   = /usr/bin/make
13 | 
14 | BINDIR = ../bin
15 | PATH   = $(BINDIR)
16 | 
17 | all:	init as
18 | 
19 | init:
20 | 	@test -d $(BINDIR) || $(MAKE) -C .. init
21 | 	@test -x $(BINDIR)/unhexl || $(MAKE) -C ../stage-1 install
22 | 
23 | as:	as.ts.xl
24 | 	unhexl < as.ts.xl > as.ts
25 | 	elfify as.ts > as
26 | 	$(CHMOD) a+x as
27 | 	$(RM) as.ts
28 | 
29 | check:	check-as
30 | 
31 | check-as:	as as.s
32 | 	./as as.s > as2.ts
33 | 	elfify as2.ts > as2
34 | 	$(CHMOD) a+x as2
35 | 	./as2 as.s > as3.ts
36 | 	$(CMP) as2.ts as3.ts
37 | 	$(RM) as2.ts as3.ts as2
38 | 
39 | install: as
40 | 	$(CP) as $(BINDIR)/as0
41 | 	$(RM) -f $(BINDIR)/unhexl
42 | 
43 | clean:
44 | 	$(RM) -f as as.ts as2.ts as3.ts as2
45 | 
46 | world:
47 | 	set -e; for TARGET in clean init all check install; do \
48 | 	    $(MAKE) $$TARGET; \
49 | 	done
50 | 


--------------------------------------------------------------------------------
/stage-2/README.md:
--------------------------------------------------------------------------------
  1 | # Bootstrap: Stage 2
  2 | 
  3 | Although the two stage 1 tools, `unhexl` and `elfify`, certainly eased the
  4 | process of writing code, manually assembling opcodes is still painful,
  5 | especially the encoding of ModR/M bytes for instructions such as `MOV`.
  6 | After the need to manually track file offsets for jumps, badly-encoded 
  7 | opcodes were the most frequent source of error when writing the stage 1
  8 | tools.  As the stage 1 tools have already alleviated the need to 
  9 | handle offsets manually, the next logical step is a lightweight 
 10 | assembler.  This is the main tool introduced in this stage.
 11 | 
 12 | The stage 2 assembler makes two passes over the assembler source code, 
 13 | the first building up a symbol table and the second writing out machine 
 14 | code.  This means that forwards jumps are supported (unlike in the stage
 15 | 1 `unhexl`).  The supported instruction set is loosely based on the Intel
 16 | 8086 instruction set, but with 32-bit addressing (which the 8086, a
 17 | 16-bit microprocessor, lacked).  Some more recent instructions are added
 18 | such as the far conditional jumps (opcode `0F 8x`, introduced in the 386).
 19 | 
 20 | For many instructions, we simplify the implementation by only supporting
 21 | the general version (e.g.  the far jump) without the corresponding
 22 | special cases (e.g. near or short jumps).  Mandatory instruction size
 23 | suffixes (`L` for 32-bit, `B` for 8-bit) are used to limit each mnemonic to
 24 | a single opcode family; no 16-bit instructions are supported.  There is
 25 | no support for adressing that involves a SIB byte – so `(%ebp)` is not
 26 | supported, though `0(%ebp)` is.
 27 | 
 28 | Labels are restricted to 11 characters, and a maximum of 256 labels are
 29 | allowed.  When a label appears as an argument to a mnemonic, it is 
 30 | always treated as a 32-bit program counter relative (pcrel) offset.
 31 | This is correct for `JMP`, `Jcc` and `CALL`, but wrong when trying to locate
 32 | data complied into the program.  There is a `.hex` directive that takes a
 33 | stream of hexadecimal octets (without their '0x' prefix); it is useful
 34 | for including data into the object file, or manually assembling an
 35 | unsupported instruction.  This is the only assembler directive
 36 | implemented in the stage-2 assembler.
 37 | 
 38 | The assembler uses AT&T syntax, with the source before the destination
 39 | for operations with two operands, as that is the *de facto* standard in
 40 | the Unix environment.  This introduces some complications into the 
 41 | assembler: for example, in `MOVL addr, %eax`, `addr` is to be interpreted
 42 | as a pcrel address, which means relative to the end of the instruction.
 43 | However, when `addr` is parsed, the instruction length is unknown: 
 44 | had the destination been `4(%ebp)`, that would have been longer.  This 
 45 | requires some look-ahead in the parser.
 46 | 
 47 | Instructions are supposed to be delimited by either a new line or a 
 48 | semicolon; however, this has relaxed to allow prefixes (such as `REP`) to 
 49 | be treated as instructions with no arguments (like `NOP`).  Thus 
 50 | `REP SCASB` is valid, though the assembler believes it to be two 
 51 | instructions, not one with a prefix.  The full grammar is:
 52 | 
 53 | ```ebnf
 54 |   HWS          ::= [ \t]
 55 |   DIGIT        ::= [0-9]
 56 |   NZDIGIT      ::= [1-9]
 57 |   XDIGIT       ::= [0-9A-F]
 58 |   LCHAR        ::= [0-9A-Za-z_]
 59 |   LSTART       ::= [.A-Za-z_]
 60 |   CHAR         ::= any character
 61 | 
 62 |   comment      ::= '#' CHAR* '\n'
 63 |   endline      ::= HWS* ( comment | '\n' | ';' )
 64 |   identifier   ::= LSTART LCHAR+
 65 |   labeldef     ::= identifier ':'
 66 |   mnemonic     ::= identifier   # from the list of known mnemonics
 67 |   integer      ::= ( '0' 'x' XDIGIT+ | NZDIGIT DIGIT* | '0' )
 68 |   immediate    ::= '$' integer
 69 |   regname8     ::= 'al' | 'cl' | 'dl' | 'bl' | 'ah' | 'ch' | 'dh' | 'bh'
 70 |   regname32    ::= 'eax' | 'ecx' | 'edx' | 'ebx' | 'esp' | 'ebp' | 'esi' | 'edi'
 71 |   register     ::= '%' ( regname8 | regname32 )
 72 |   regmem       ::= register | integer? '(' '%' regname32 ')'
 73 |   argument     ::= HWS* ( immediate | identifier | regmem )
 74 |   arguments    ::= argument HWS* ',' arguments | argument
 75 |   instruction  ::= mnemonic arguments endline?
 76 |   octet        ::= HWS* XDIGIT XDIGIT
 77 |   hexbytes     ::= '.hex' octet* endline
 78 |   directive    ::= hexbytes
 79 |   file         ::= labeldef | instruction | directive | endline
 80 | ```
 81 | 
 82 | The list of supported mnemonics is:
 83 | 
 84 | ```
 85 |   ADCx ADDx ANDx BSFL BSRL CALL CBW CDQ CLC CLD CMPx CMPSx 
 86 |   CWDE DECx DIVx HLT IDIVx IMULx INCx INT Jcc JMP LEA LEAVE
 87 |   LODSx MOVx MOVSx MULx NEGx NOP NOTx ORx POP POPF PUSH
 88 |   PUSHF REP REPE REPNE RET SALx SARx SBBx SCASx SHLx SHRx
 89 |   STC STD STOSx SUBx TESTL XCHGL XORx.
 90 | ```
 91 | 
 92 | In that list, `x` represents a size suffix `L` or `B`, and `cc` is a
 93 | condition (`A`, `AE`, `B`, `BE`, `C`, `E`, `G`, `GE`, `L`, `LE`, `O`, `P`, 
 94 | `PE`, `PO`, `S`, `Z`, together with the negative `Ncc` versions, 
 95 | [except for `PE` and `PO`]).  Some instructions have implicit arguments,
 96 | and they *must not* be specified in the source.  The shift opcodes
 97 | (`SAL`, `SAR`, `SHL`, `SHR`) always shift by `%cl` bits, and the
 98 | multiplication-like opcodes (`MUL`, `IMUL`, `DIV`, `IDIV`) 
 99 | always act of `%edx:%eax` (in 32-bit mode) or `%ax` (in 8-bit mode).
100 | 
101 | Not all of the instructions normally represented by these mnemonics are 
102 | supported.  `INT` only takes a 8-bit immediate; `CALL`, `JMP` and `Jcc`
103 | take a program counter relative 32-bit immediate; the unary arithmetics,
104 | `INCx`, `DECx`, `NEGx`, `NOTx`, `SALx`, `SHLx`, `SARx`, `SHRx`, `MULx`, 
105 | `IMULx`, `DIVx` and `IDIVx`, take a single 8- or 32-bit r/m operand
106 | (matching the regmem production); and `PUSH` and `POP` take a 32-bit
107 | r/m.  `LEA` takes a 32-bit r/m followed by a 32-bit register; the binary
108 | arithmetics, `MOVx`, `ADDx`, `SUBx`, `ADCx`, `SBBx`, `CMPx`, `ANDx`,
109 | `ORx` and `XORx` take either a r/m and a register (in either order)
110 | or an immediate followed by a r/m, all of the appropriate size.  The
111 | remaining mnemonics have no operands.
112 | 
113 | Because of the need to make two passes over the source, it takes the
114 | name of the source code file as its only command line argument; a
115 | `.text` section is printed on standard output.
116 | 
117 | > Usage: `as test.s > test.ts`
118 | 
119 | The output is not a valid executable – it just the `.text` section.  It
120 | therefore needs using in conjunction with the stage 1 `elfify` tool to
121 | produce an executable.
122 | 


--------------------------------------------------------------------------------
/stage-3/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | elfify
3 | as0
4 | as
5 | ld
6 | 


--------------------------------------------------------------------------------
/stage-3/Makefile:
--------------------------------------------------------------------------------
 1 | # stage-3/Makefile
 2 | 
 3 | # Copyright (C) 2011, 2012, 2013, 2020 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | SHELL  = /bin/sh
 7 | 
 8 | CHMOD  = /bin/chmod
 9 | RM     = /bin/rm
10 | CP     = /bin/cp
11 | CMP    = /usr/bin/cmp
12 | MAKE   = /usr/bin/make
13 | 
14 | BINDIR = ../bin
15 | PATH   = $(BINDIR)
16 | 
17 | all:	init as ld
18 | 
19 | init:
20 | 	@test -d $(BINDIR) || $(MAKE) -C .. init
21 | 	@test -x $(BINDIR)/as0 || $(MAKE) -C ../stage-2 install
22 | 
23 | # as0 is the assemlber symlinked from stage 2.
24 | # as1 is this stage's assembler (written in asm), assembled by as0.
25 | # as  is the same source, assembled by itself (by as1), and therefore similar.
26 | # as2 is a test assembler produced by as; it should be binary identical to as.
27 | 
28 | # The same conventions apply to ld, except that there is no ld0.
29 | 
30 | as1:	as.s
31 | 	as0 as.s > as.ts
32 | 	elfify as.ts > as1
33 | 	$(CHMOD) a+x as1
34 | 	$(RM) as.ts
35 | 
36 | ld1:	ld.s
37 | 	as0 ld.s > ld.ts
38 | 	elfify ld.ts > ld1
39 | 	$(CHMOD) a+x ld1
40 | 	$(RM) ld.ts
41 | 
42 | check:	check-as check-ld check-sep
43 | 
44 | as:	as1 ld1 as.s
45 | 	./as1 as.s
46 | 	./ld1 -o as as.o
47 | 	$(RM) as.o
48 | 
49 | ld:	as1 ld1 ld.s
50 | 	./as1 ld.s
51 | 	./ld1 -o ld ld.o
52 | 	$(RM) ld.o
53 | 
54 | check-as:	as ld as.s
55 | 	$(RM) -f as.o
56 | 	./as as.s
57 | 	./ld -o as2 as.o
58 | 	$(CMP) as as2
59 | 	$(RM) as.o as2
60 | 
61 | check-ld:	as ld ld.s
62 | 	$(RM) -f ld.o
63 | 	./as ld.s
64 | 	./ld -o ld2 ld.o
65 | 	$(CMP) ld ld2
66 | 	$(RM) ld.o ld2
67 | 
68 | testprog:	as ld test1.s test2.s test3.s
69 | 	./as test2.s
70 | 	./as test3.s
71 | 	./ld -r -o test2+3.o test2.o test3.o
72 | 	./as test1.s
73 | 	./ld -o testprog test1.o test2+3.o
74 | 	$(RM) test1.o test2.o test3.o test2+3.o
75 | 
76 | check-sep:	testprog
77 | 	./testprog
78 | 	$(RM) testprog
79 | 
80 | .INTERMEDIATE:	as.ts ld.ts as.o ld.o as1 ld1
81 | 
82 | install: init as ld
83 | 	$(CP) as ld $(BINDIR)
84 | 	$(RM) -f $(BINDIR)/as0 $(BINDIR)/elfify
85 | 
86 | clean:
87 | 	$(RM) -f as.ts ld.ts as.o ld.o as1 ld1
88 | 	$(RM) -f as2.o ld2.o ld as2 ld2 as ld
89 | 	$(RM) -f test1.o test2.o test3.o test2+3.o testprog
90 | 
91 | world:
92 | 	set -e; for TARGET in clean init all check install; do \
93 | 	    $(MAKE) $$TARGET; \
94 | 	done
95 | 


--------------------------------------------------------------------------------
/stage-3/README.md:
--------------------------------------------------------------------------------
  1 | # Bootstrap: Stage 3
  2 | 
  3 | One major limitation to the stage 2 assembler is that there are rather 
  4 | small fixed limits to many quantities: labels are limited to 11
  5 | characters, and there must be no more than 256 of them; lines can be no
  6 | more than 80 characters long.  These limits exist because there is no
  7 | heap allocation of memory, and fixed-sized arrays are declared on the
  8 | stack.  This puts a severe limit on the complexity of any program
  9 | assembled using the stage 2 assembler.
 10 | 
 11 | To overcome this limitation, in this stage we take the stage 2 assembler
 12 | source, translates it into assembly language (from hexadecimal), and
 13 | adds dynamic memory mangement to the assembler with a standard `malloc()`
 14 | and `realloc()` interface.  This is used to remove the limit of 256
 15 | labels.
 16 | 
 17 | The present `malloc()` implementation is very inefficient because it punts
 18 | all the work to the kernel with an `mmap(MAP_ANON)` syscall.  This results
 19 | in a new memory page being allocated for every block of memory
 20 | allocated.  For the present purpose this is acceptable, but it will
 21 | rapidly cease to be as code becomes more complex.  However, at present
 22 | there is a strong disincentive to implementing a better `malloc()`
 23 | implementation: there is no mechanism for code reuse.  Bug fixes or
 24 | improvements to the implementation will tend to get lost or incorrectly
 25 | applied to the multiple copies scattered around the code.
 26 | 
 27 | To allow code reuse this stage introduces separate assembly and linking
 28 | steps allowing each executable to be formed from multiple object files.
 29 | This means that some object files (for instance, those containing the
 30 | `malloc()` implementation) can linked into several different executables.
 31 | To support this, the assembler (which now writes ELF directly, removing
 32 | the need for the stage 1 `elfify` program) includes `.rel.text`, `.symtab` and
 33 | `.strtab` sections in its output.  The `R_386_PC32` relocation type is used
 34 | for relocations between symbols in the `.text` section; the relocations
 35 | are stored in the `.rel.text` section.
 36 | 
 37 | Another difficulty with the stage 2 assembler was that the only place to
 38 | store data was on the stack or in a register.  Variables like the input
 39 | read buffer had to be placed on the stack and pointers to it passed
 40 | around to all functions, with the result that the code was rarely
 41 | refactored into separate functions.  This is addressed by the stage-3
 42 | assembler which supports a writable `.data` section.  References in the
 43 | `.text` section to objects in the `.data` section are handled by way of
 44 | `R_386_32` relocations.
 45 | 
 46 | Objects in the `.data` can be initialised with the address of other 
 47 | objects, e.g. by passing a symbol name to the argument of an `.int` 
 48 | directive.   These are handled by `R_386_32` relocations which are
 49 | stored in a new `.rel.data` section.
 50 | 
 51 | The `.text` and `.data` directives are used to switch between sections, and
 52 | several other new assembler directives are added.  The complete list is
 53 | now as follows
 54 | 
 55 | ```
 56 |   .text .data .global .globl .local .int .byte .long .hex
 57 |   .zero .align .string
 58 | ```
 59 | 
 60 | The `.global` (or equivalently, `.globl`) and `.local` directives take a
 61 | symbol name as their single argument.  They specify the binding of 
 62 | that symbol.  Global binding is currently the default (for compatibility
 63 | with stage 2), though that will be changed in a later stage.
 64 | 
 65 | The `.int` and `.byte` directives allow 32-bit and 8-bit integers to be
 66 | included directly into the output; `.long` is a synonym for `.int`.
 67 | Multiple integers, separated by commas, can be included as arguments. 
 68 | Unlike the existing `.hex` directive (unchanged from stage 2) which only 
 69 | accepts hexadecimal octets without prefixes, these support any form of 
 70 | literal.  The `.zero` directive takes one argument and writes that number 
 71 | of zeros to the output.  The `.align` directive also writes a number of
 72 | zeros to the output, but the argument to `.align` is the alignment 
 73 | required.  Thus `.align` 16 outputs enough zero bytes to align the section
 74 | to the next 16-byte boundary.
 75 | 
 76 | The `.string` directive allows for strings in double quotes with 
 77 | a maximum length of 78 characters.  They are automatically null 
 78 | terminated, and the following escapes understood:
 79 | 
 80 | ```
 81 |   \n \t \" \\ \0
 82 | ```
 83 | 
 84 | The use of symbols as address immediates without a `$` prefix (e.g. `ADDL
 85 | foo, %eax`) is deprecated.  In the stage-2 assembler, this stores the
 86 | address of foo in %eax.  To get this behaviour in the stage-3 assembler,
 87 | add the $ prefix.  We also now support the versions of the `MOV`
 88 | instruction that transfer a symbol value (rather than the address) to or
 89 | from the accumulator.  Thus `MOVL foo, %eax` copies the symbol value (as
 90 | is standard practice for that notation) and not the symbol address (as
 91 | in stage 2).  The following instructions are also added:
 92 | 
 93 | ```asm
 94 |   MOVZX, SETcc
 95 | ```
 96 | 
 97 | Support for the following AT&T aliases for Intel mnemonics has also
 98 | been added:
 99 | 
100 | ```
101 |   CBTW, CLTD, CWTL, MOVZBL, MOVSBL
102 | ```
103 | 
104 | There is also very limited support for instructions with a SIB bytes:
105 | just enough to allow `%esp` to be dereferenced in an r/m32, e.g. in 
106 | `MOVL (%esp), %eax`.
107 | 
108 | Indirect relative branches and calls are now supported, e.g. with 
109 | `CALL *%eax`.  These are needed for function pointers and computed jumps
110 | as used in a jump table for switch statements.
111 | 
112 | Character literals are now allowed as immediates, enclosed in single 
113 | quotes.  (Note this is unlike the GNU assembler, where character 
114 | literals begin with a single quote, but do not have a closing quote.) 
115 | They can be preceded by a `$` which is optional (on the basis that no-one
116 | would choose to write an address in terms of its ASCII representation).
117 | The same escape characters are accepted as for strings.  Multicharacter 
118 | literals are allowed where a 32-bit immediate is expected, and may 
119 | contain upto four characters.  Their layout is such as to make them 
120 | useful for short text fragments: 'xyz' the same layout as "xyz".  (Note
121 | that this layout is the opposite design decision to that made in gcc.
122 | Neither the C standard nor the ABI provide any guidance on the matter.)
123 | 
124 | The assembler requires its source file to be suffixed `.s` and
125 | automatically assigns the output file name by replacing the `.s` with a 
126 | `.o` suffix.
127 | 
128 | > Usage: `as test.s`
129 | 
130 | The linker can take arbitrary input and output file names.  The output
131 | file is specified with the `-o` option which must be first on the command
132 | line.
133 | 
134 | > Usage: `ld [-r] -o output file1.o file2.o ...`
135 | 
136 | If `-r` is specified, the linker partially links its input generating an 
137 | object file as output.  Without it, the output is an executable.  At 
138 | present, even when partial linking, there cannot be any undefined 
139 | symbols in the output.
140 | 
141 | 
142 | TODO
143 | 
144 | undefined symbols when partially linking (ld -r)
145 | ?? case insensitive mnemonics, registers & hex numbers
146 | 


--------------------------------------------------------------------------------
/stage-3/test1.s:
--------------------------------------------------------------------------------
 1 | # stage-3/test1.s
 2 | 
 3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | .data
 7 | .local bar
 8 | bar:
 9 | 	.int	0x2A
10 | 
11 | .text
12 | _start:
13 | 	MOVL	%esp, %ebp
14 | 
15 | 	#  bar = square(bar)
16 | 	MOVL	bar, %eax
17 | 	PUSH	%eax
18 | 	CALL	square
19 | 	POP	%ecx
20 | 	MOVL	%eax, bar
21 | 
22 | 	MOVL	bar, %eax
23 | 	MOVL	%eax, %ecx
24 | 	MOVL	foo, %eax
25 | 	CMPL	%eax, %ecx
26 | 	SETNE	%al
27 | 	MOVZBL	%al, %eax
28 | 	MOVL	%eax, foo
29 | 
30 | 	#  Call exit(foo)
31 | 	MOVL	foo, %eax
32 | 	PUSH	%eax
33 | 
34 | # It should be safe to embed this literal in the middle of this function
35 | .data
36 | 	.string	"\"Hello,\tworld\!\""
37 | 	.byte	0xFF
38 | 
39 | .text
40 | 	CALL	exit
41 | 	HLT
42 | 
43 | .data
44 | 


--------------------------------------------------------------------------------
/stage-3/test2.s:
--------------------------------------------------------------------------------
 1 | # stage-3/test2.s
 2 | 
 3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | .data
 7 | .global bar
 8 | bar:
 9 | 	.int	0x09
10 | 
11 | .text
12 | exit:
13 | 	PUSH	%ebp
14 | 	MOVL	%esp, %ebp
15 | 	PUSH	%ebx
16 | 
17 | 	MOVL	8(%ebp), %ebx
18 | 	MOVL	$1, %eax		# __NR_exit
19 | 	INT	$0x80
20 | 
21 | 	POP	%ebx
22 | 	LEAVE
23 | 	RET
24 | 
25 | .data
26 | 	.byte	'!', '_'
27 | 	.zero	35	
28 | 	.byte	0x22
29 | 	.int	'Fish'
30 | 


--------------------------------------------------------------------------------
/stage-3/test3.s:
--------------------------------------------------------------------------------
 1 | # stage-3/test3.s
 2 | 
 3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | .text
 7 | .global square
 8 | square:
 9 | 	PUSH	%ebp
10 | 	MOVL	%esp, %ebp
11 | 	MOVL	8(%ebp), %eax
12 | 	MOVL	%eax, %ecx
13 | 	MULL	%ecx
14 | 	LEAVE
15 | 	RET
16 | 
17 | .data 
18 | .globl foo
19 | foo:
20 | 	.int	1764	# == 0x2A * 0x2A
21 | 
22 | 


--------------------------------------------------------------------------------
/stage-4/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | cc
3 | crt0.o
4 | libc.o
5 | 


--------------------------------------------------------------------------------
/stage-4/Makefile:
--------------------------------------------------------------------------------
  1 | # stage-4/Makefile
  2 | 
  3 | # Copyright (C) 2012, 2013, 2014, 2016, 2020 
  4 | # Richard Smith <richard@ex-parrot.com>
  5 | # All rights reserved.
  6 | 
  7 | SHELL  = /bin/sh
  8 | 
  9 | RM     = /bin/rm
 10 | CP     = /bin/cp
 11 | LN_S   = /bin/ln -sf
 12 | MAKE   = /usr/bin/make
 13 | CMP    = /usr/bin/cmp
 14 | 
 15 | BINDIR = ../bin
 16 | LIBDIR = ../lib
 17 | PATH   = $(BINDIR)
 18 | 
 19 | 
 20 | all:	init cc libc.o crt0.o
 21 | 
 22 | init:
 23 | 	@test -d $(LIBDIR) || $(MAKE) -C .. init
 24 | 	@test -x $(BINDIR)/ld || $(MAKE) -C ../stage-3 install
 25 | 
 26 | # Suppress the default rules
 27 | .SUFFIXES:
 28 | 
 29 | %.o:	%.s
 30 | 	as $<
 31 | 
 32 | %.s:	%.c cc0
 33 | 	./cc0 -S $<
 34 | 
 35 | # LIB_OBJS contain a minimal C library written entirely in assembler.
 36 | # LIB0_OBJS adds bootstrap .o files written in assembler that will be replaced.
 37 | # LIB1_OBJS adds to and replaces the LIB0_OBJS with versions written in C.
 38 | 
 39 | LIB_OBJS = string.o ctype.o unistd.o char.o imath.o
 40 | LIB0_OBJS = $(LIB_OBJS) error.o stdio.o memory.o
 41 | LIB1_OBJS = $(LIB_OBJS) exit.o output.o input.o malloc.o signal.o string2.o \
 42 |   stdarg.o
 43 | 
 44 | CC_OBJS  = i386.o scanner.o symtab.o expr.o stmt.o main.o
 45 | 
 46 | # libc0.o is the primative libc, written solely in assembler.
 47 | # cc0     is the compiler linked against the libc0.o.
 48 | # libc.o  is the complete stage-4 libc, with some C code compiled using cc0.
 49 | # cc      is the compiler relinked against the new libc.o.
 50 | 
 51 | libc0.o:	$(LIB0_OBJS)
 52 | 	ld -r -o libc0.o $(LIB0_OBJS)
 53 | 
 54 | cc0:	libc0.o crt0.o $(CC_OBJS)
 55 | 	ld -o cc0 libc0.o crt0.o $(CC_OBJS)
 56 | 
 57 | 
 58 | libc.o:	$(LIB1_OBJS)
 59 | 	ld -r -o libc.o $(LIB1_OBJS)
 60 | 
 61 | cc:	libc.o crt0.o $(CC_OBJS)
 62 | 	ld -o cc libc.o crt0.o $(CC_OBJS)
 63 | 
 64 | .INTERMEDIATE:	$(CC_OBJS) $(LIB1_OBJS) $(LIB0_OBJS) libc0.o cc0
 65 | 
 66 | check:	check-output check-input check-malloc
 67 | 
 68 | check-output: cc output.c output.o
 69 | 	$(LN_S) output.c output2.c
 70 | 	./cc -S output2.c
 71 | 	as output2.s
 72 | 	$(CMP) output2.o output.o
 73 | 	$(RM) output2.c output2.o output2.s
 74 | 
 75 | check-input: cc input.c input.o
 76 | 	$(LN_S) input.c input2.c
 77 | 	./cc -S input2.c
 78 | 	as input2.s
 79 | 	$(CMP) input2.o input.o
 80 | 	$(RM) input2.c input2.o input2.s
 81 | 
 82 | check-malloc: cc malloc.c malloc.o
 83 | 	$(LN_S) malloc.c malloc2.c
 84 | 	./cc -S malloc2.c
 85 | 	as malloc2.s
 86 | 	$(CMP) malloc2.o malloc.o
 87 | 	$(RM) malloc2.c malloc2.o malloc2.s
 88 | 
 89 | install: init cc libc.o crt0.o
 90 | 	$(CP) cc $(BINDIR)/cc0
 91 | 	$(CP) libc.o crt0.o $(LIBDIR)
 92 | 
 93 | clean:
 94 | 	$(RM) -f $(LIB0_OBJS)
 95 | 	$(RM) -f $(LIB1_OBJS) output.s input.s malloc.s signal.s libc.o
 96 | 	$(RM) -f $(CC_OBJS) cc0 cc crt0.o
 97 | 	$(RM) -f output2.o output2.s input2.o input2.s malloc2.o malloc2.s
 98 | 
 99 | world:
100 | 	set -e; for TARGET in clean init all check install; do \
101 | 	    $(MAKE) $$TARGET; \
102 | 	done
103 | 


--------------------------------------------------------------------------------
/stage-4/README.txt:
--------------------------------------------------------------------------------
 1 | BOOTSTRAP STAGE 4
 2 | 
 3 | The main product of stage 4 is a complier.  The original intention had 
 4 | been to implement a B compiler, perhaps with the caveat that the few
 5 | constructs that changed syntactically between B and C would be 
 6 | implemented in the C way -- for example, += not =+, and 'extern' not
 7 | 'extrn'.  But it rapidly became obvious that the B memory model was
 8 | not forwards compatible with the C memory model on machines with byte
 9 | addressing.  Implementing B on a x86 would require pointers to be
10 | represented as integer offsets into memory, and a pointer dereference,
11 | *ptr, would translate into a ModR/M + SIB instruction: (%ebx, %eax, 4) 
12 | where %ebx is NULL and %eax is the pointer value.  Taking the address 
13 | of an automatic variable would be worse and involve an explicit bit-
14 | shift.
15 | 
16 | Nevertheless, B's lack of a type system significantly simplifies the 
17 | implementation, and this feature of B has been retained in the stage 4 
18 | compiler.  Our single type is a 32-bit integer which also serves as an
19 | address.  Incrementing the value increments the underlying address by 
20 | one, as with a char* in C.  This means, that unlike in B, incrementing 
21 | an address does not move to the next integer in an array: use ptr += 4 
22 | for that.  However, subscripting with [] works with 32-bit word offsets,
23 | so that ptr[1] is equivalent to *(ptr + 4).  To treat a pointer as a
24 | string and get character-level access, B uses two functions lchar(s,n)
25 | and rchar(s,n,c) to get and set a character, respectively, at the given
26 | offset.  These are provided in char.s.  When types are introduced in a
27 | subsequent stage, this behaviour can be preserved because subscripting
28 | an int (other than with a pointer) is not legal in C.
29 | 
30 | For forwards compatibility, certain type constructs are allowed and
31 | completely ignored.  The (otherwise unsupported) int keyword may be
32 | placed immediately after auto, and the identifiers in an auto 
33 | declaration can be preceded with one or more *.  A list of parameter 
34 | declarations may precede the opening brace of a function.
35 | 
36 | Summary of differences from B:
37 | 
38 |   * Compound assignment operators are spelt OP= instead of =OP.
39 |   * There are no relop assignment operators (e.g. =<, =>=, ===).
40 |   * Definitions require '=' (i.e. 'i = 42' not 'i 42').
41 |   * Arrays require a size (i.e. 'auto a[1] = {0}' not 'auto a[] = {0}').
42 |   * Arrays with too many intialisers do not expand to accommodate them.
43 |   * The '{' ... '}' around single-statement functions are required.
44 |   * We support logical && and || complete with short circuiting.
45 |   * We support the 'continue' keyword from C.
46 |   * We support C's 'do' ... 'while' loop construct.
47 |   * We allow 'static' on global variables and functions.
48 |   * The return statement does not require brackets.
49 |   * We don't allow backspace (character 0x7F) or dot (.) in identifiers.
50 |   * The escape characters in strings is \ not *, and there is no \e.
51 |   * We don't support the switch statement, or therefore case labels.
52 |   * We don't support goto and labeled statements.
53 |   * Not a difference, but B does not support 'for' loops and nor do we.
54 | 
55 | The stage 4 compiler is a simple afair, making a single pass over the 
56 | input file and code generation is done straight out of the parser,
57 | without building an abstract syntax tree (AST) representation.  This 
58 | means that the code generated is very inefficient, and even very obvious
59 | optimisations are not made.  As an example, all lvalue-to-rvalue
60 | conversions are done as separate statements, so to read a local auto
61 | variable, we generated LEA -offset(%ebp), %eax; MOVL (%eax), %eax
62 | instead of the more obvious MOVL -offset(%ebp), %eax.
63 | 
64 |   Usage: cc -S file.c
65 | 
66 | The compiler is initially linked against a trivial I/O library that
67 | implements the basic C I/O functions in an unbuffered manner, doing one
68 | syscall per call to getchar() or putchar().  Similarly, malloc() is
69 | implemented as in stage 3, by sending each allocation request to the
70 | kernel as a mmap(MAP_ANON) call.  The resultant compiler, cc0, is used
71 | to compile an improved set of I/O and memory-management functions that 
72 | do buffering.  These are linked together with ld -r into a proto-C-
73 | library, libc.o.  There is also a trivial startup file, crt0.o, that 
74 | implements _start() by calling exit(main()).  We use these to relink 
75 | the compiler against this to produce a significantly faster compiler.
76 | 
77 | Linking a program is typically achieved with a command such as:
78 | 
79 |   ld -o prog libc.o crt0.o file1.o file2.o ...
80 | 
81 | 


--------------------------------------------------------------------------------
/stage-4/char.s:
--------------------------------------------------------------------------------
 1 | # char.s  --  functions write/read a character to/from a string
 2 | 
 3 | # Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | # The functions here are needed to work around the lack of a character type
 7 | # in the B language.
 8 | 
 9 | .text
10 | 
11 | ####	#  Function:  char rchar(char const* s, size_t n);
12 | 	#
13 | 	#  B's char function, renamed to avoid forward compatibility problems 
14 | 	#  with C's keyword.  Returns the byte S[N], zero padded in a word.
15 | .globl rchar
16 | rchar:
17 | 	PUSH	%ebp
18 | 	MOVL	%esp, %ebp
19 | 
20 | 	XORL	%eax, %eax
21 | 	MOVL	8(%ebp), %edx
22 | 	ADDL	12(%ebp), %edx
23 | 	MOVB	(%edx), %al
24 | 
25 | 	POP	%ebp
26 | 	RET
27 | 
28 | ####	#  Function: char lchar(char* s, size_t n, char c);
29 | 	#
30 | 	#  B's lchar function.  Sets S[N] = C, and returns C, zero padded in 
31 | 	#  a word.
32 | .globl lchar
33 | lchar:
34 | 	PUSH	%ebp
35 | 	MOVL	%esp, %ebp
36 | 
37 | 	XORL	%eax, %eax
38 | 	MOVB	16(%ebp), %al
39 | 	MOVL	8(%ebp), %edx
40 | 	ADDL	12(%ebp), %edx
41 | 	MOVB	%al, (%edx)
42 | 
43 | 	POP	%ebp
44 | 	RET
45 | 	
46 | 


--------------------------------------------------------------------------------
/stage-4/crt0.s:
--------------------------------------------------------------------------------
 1 | # crt0.s
 2 | 
 3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | ####	#  Function:	void _start()
 7 | 	#
 8 | 	#  The ELF entry point.
 9 | _start:
10 | 	XORL	%ebp, %ebp
11 | 	PUSH	%ebp
12 | 	MOVL	%esp, %ebp
13 | 
14 | 	MOVL	$__io_flush, %eax
15 | 	PUSH	%eax
16 | 	CALL	atexit
17 | 	POP	%eax
18 | 
19 | 	LEA	8(%ebp), %eax		# argv
20 | 	PUSH	%eax
21 | 	PUSH	4(%ebp)			# argc
22 | 	CALL	main
23 | 	PUSH	%eax
24 | 	CALL	exit
25 | 	HLT
26 | 
27 | 


--------------------------------------------------------------------------------
/stage-4/ctype.s:
--------------------------------------------------------------------------------
  1 | # ctype.s
  2 | 
  3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | .data 
  7 | .local ctype_tbl
  8 | ctype_tbl:
  9 | 
 10 | #  This table contains character bits for the 0x80 characters in ASCII:
 11 | #
 12 | #    space = 0x1,  print = 0x2,  cntrl = 0x4,  upper = 0x8, 
 13 | #    lower = 0x10, alpha = 0x20, digit = 0x40, punct = 0x80, xdigit = 0x100
 14 | #
 15 | #  The categories match those in [category.ctype] in C++ and enable
 16 | #  effecient implementation of the C standard library ctype functions.
 17 | #  NB. We would ideally use .short, but the level 3 as doesn't support 
 18 | #  either the directive or the necessary operations on 16-bit registers.
 19 | 
 20 | .int	0x004,  0x004,  0x004,  0x004,  0x004,  0x004,  0x004,  0x004	# 0x00
 21 | .int	0x004,  0x005,  0x005,  0x005,  0x005,  0x005,  0x004,  0x004	
 22 | .int	0x004,  0x004,  0x004,  0x004,  0x004,  0x004,  0x004,  0x004	# 0x10
 23 | .int	0x004,  0x004,  0x004,  0x004,  0x004,  0x004,  0x004,  0x004
 24 | .int	0x003,  0x082,  0x082,  0x082,  0x082,  0x082,  0x082,  0x082	# 0x20
 25 | .int	0x082,  0x082,  0x082,  0x082,  0x082,  0x082,  0x082,  0x082
 26 | .int	0x142,  0x142,  0x142,  0x142,  0x142,  0x142,  0x142,  0x142	# 0x30
 27 | .int	0x142,  0x142,  0x082,  0x082,  0x082,  0x082,  0x082,  0x082
 28 | .int	0x082,  0x12A,  0x12A,  0x12A,  0x12A,  0x12A,  0x12A,  0x02A	# 0x40
 29 | .int	0x02A,  0x02A,  0x02A,  0x02A,  0x02A,  0x02A,  0x02A,  0x02A
 30 | .int	0x02A,  0x02A,  0x02A,  0x02A,  0x02A,  0x02A,  0x02A,  0x02A	# 0x50
 31 | .int	0x02A,  0x02A,  0x02A,  0x082,  0x082,  0x082,  0x082,  0x082
 32 | .int	0x082,  0x132,  0x132,  0x132,  0x132,  0x132,  0x132,  0x032	# 0x60
 33 | .int	0x032,  0x032,  0x032,  0x032,  0x032,  0x032,  0x032,  0x032
 34 | .int	0x032,  0x032,  0x032,  0x032,  0x032,  0x032,  0x032,  0x032	# 0x70
 35 | .int	0x032,  0x032,  0x032,  0x082,  0x082,  0x082,  0x082,  0x004
 36 | 
 37 | 
 38 | .text
 39 | .local getctype
 40 | ####	#  Function:	int getctype(int chr);
 41 | 	#
 42 | 	#  A utility function to return the character class(es) of CHR.
 43 | getctype:
 44 | 	PUSH	%ebp
 45 | 	MOVL	%esp, %ebp
 46 | 
 47 | 	#  Characters >= 0x80 (as unsigned, which catches EOF) return 0
 48 | 	XORL	%eax, %eax
 49 | 	MOVL	8(%ebp), %edx
 50 | 	CMPL	$0x80, %edx	
 51 | 	JAE	.L1
 52 | 
 53 | 	#  Find ctype_tbl[%eax]
 54 | 	#  Unfortunately, stage 3 as doesn't support MOVL (%eax,%edx,4), %eax.
 55 | 	MOVB	$2, %cl
 56 | 	SHLL	%edx			# Multiply %edx by 2
 57 | 	MOVL	$ctype_tbl, %eax
 58 | 	ADDL	%edx, %eax
 59 | 	MOVL	(%eax), %eax
 60 | 
 61 | .L1:
 62 | 	POP	%ebp
 63 | 	RET
 64 | 
 65 | 
 66 | 
 67 | ####	#  Function:	int isspace(int c);
 68 | 	#
 69 | 	#  Standard C library function to test for ASCII space characters.
 70 | .globl isspace
 71 | isspace:
 72 | 	PUSH	%ebp
 73 | 	MOVL	%esp, %ebp
 74 | 	PUSH	8(%ebp)
 75 | 	CALL	getctype
 76 | 	POP	%ecx
 77 | 	ANDL	$0x1, %eax
 78 | 	POP	%ebp
 79 | 	RET
 80 | 
 81 | 
 82 | ####	#  Function: int isdigit(int c);
 83 | 	#
 84 | 	#  Standard C library function to test for ASCII digits, 0-9.
 85 | .globl isdigit
 86 | isdigit:
 87 | 	PUSH	%ebp
 88 | 	MOVL	%esp, %ebp
 89 | 	PUSH	8(%ebp)
 90 | 	CALL	getctype
 91 | 	POP	%ecx
 92 | 	ANDL	$0x40, %eax
 93 | 	POP	%ebp
 94 | 	RET
 95 | 
 96 | 
 97 | ####	#  Function:	int isalpha(int c);
 98 | 	#
 99 | 	#  Standard C library function to test for ASCII letter, a-z, A-Z.
100 | .globl isalpha
101 | isalpha:
102 | 	PUSH	%ebp
103 | 	MOVL	%esp, %ebp
104 | 	PUSH	8(%ebp)
105 | 	CALL	getctype
106 | 	POP	%ecx
107 | 	ANDL	$0x20, %eax
108 | 	POP	%ebp
109 | 	RET
110 | 	
111 | 
112 | ####	#  Function:	int isalnum(int c);
113 | 	#
114 | 	#  Standard C library function to test for ASCII letters or digits.
115 | .globl isalnum
116 | isalnum:
117 | 	PUSH	%ebp
118 | 	MOVL	%esp, %ebp
119 | 	PUSH	8(%ebp)
120 | 	CALL	getctype
121 | 	POP	%ecx
122 | 	ANDL	$0x60, %eax
123 | 	POP	%ebp
124 | 	RET
125 | 
126 | 	
127 | ####	#  Function:	int ispunct(int c);
128 | 	#
129 | 	#  Standard C library function to test for ASCII punctuation,
130 | 	#  i.e. anything that is not a digit, letter, space or control.
131 | .globl ispunct
132 | ispunct:
133 | 	PUSH	%ebp
134 | 	MOVL	%esp, %ebp
135 | 	PUSH	8(%ebp)
136 | 	CALL	getctype
137 | 	POP	%ecx
138 | 	ANDL	$0x80, %eax
139 | 	POP	%ebp
140 | 	RET
141 | 
142 | 
143 | ####	#  Function: int isxdigit(int c);
144 | 	#
145 | 	#  Standard C library function to test for hex ASCII digits, 0-9A-Fa-f.
146 | .globl isxdigit
147 | isxdigit:
148 | 	PUSH	%ebp
149 | 	MOVL	%esp, %ebp
150 | 	PUSH	8(%ebp)
151 | 	CALL	getctype
152 | 	POP	%ecx
153 | 	ANDL	$0x100, %eax
154 | 	POP	%ebp
155 | 	RET
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/stage-4/error.s:
--------------------------------------------------------------------------------
 1 | # error.s  --  bootstrap code for error handling
 2 | 
 3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | ####	#  Function:	void _error()
 7 | 	#
 8 | 	#  All library error handling is done here.  
 9 | 	#  (Note we can JMP here instead of CALLing it, as we never RET.)
10 | _error:
11 | 	MOVL	$1, %eax
12 | 	PUSH	%eax
13 | 	CALL	exit
14 | 	HLT
15 | 
16 | 
17 | ####	#  Function:	int atexit( void (*fn)(void) )
18 | 	#
19 | 	#  Dummy function that does nothing.
20 | atexit:
21 | 	RET
22 | 
23 | 
24 | ####	#  Function:	void exit(int status)
25 | 	#
26 | 	#  Clear up streams and terminate program execution with given status.
27 | exit:
28 | 	PUSH	%ebp
29 | 	MOVL	%esp, %ebp
30 | 
31 | 	PUSH	8(%ebp)
32 | 	CALL	_exit
33 | 	HLT
34 | 


--------------------------------------------------------------------------------
/stage-4/exit.c:
--------------------------------------------------------------------------------
 1 | /* exit.c
 2 |  *
 3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com> 
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | static atexit_vec[3] = { 0, 0, 0 };  /* start, end, end_store */
 8 | 
 9 | atexit(fn) {
10 |     if ( !atexit_vec[0] ) {
11 |         auto sz = 1 * 4;  /* 4 == sizeof( void (*)() ) */
12 |         auto p = malloc(sz);
13 |         atexit_vec[0] = atexit_vec[1] = p;
14 |         atexit_vec[2] = p + sz;
15 |     }
16 |     else {
17 |         auto sz = atexit_vec[1] - atexit_vec[0];
18 |         auto p = atexit_vec[0];
19 |         if ( atexit_vec[1] == atexit_vec[2] ) {
20 |             p = realloc( atexit_vec[0], 2*sz );
21 |             atexit_vec[0] = p;
22 |             atexit_vec[1] = p + sz;
23 |             atexit_vec[2] = p + 2*sz;
24 |         }
25 |     }
26 | 
27 |     *atexit_vec[1] = fn;
28 |     atexit_vec[1] += 4;  /* sizeof( void (*)() ) */
29 |     return 1;
30 | }
31 | 
32 | exit(code) {
33 |     /* Call registered functions in reverse order */
34 |     auto ptr = atexit_vec[1], start = atexit_vec[0];
35 |     while ( ptr > start ) {
36 |         ptr -= 4;  /* sizeof( void (*)() ) */
37 |         (*ptr)();
38 |     }
39 | 
40 |     /* We don't need to flush stdout and stderr, because there is an 
41 |      * atexit handler registered that does that. */
42 | 
43 |     _exit( code );
44 | }
45 | 


--------------------------------------------------------------------------------
/stage-4/imath.s:
--------------------------------------------------------------------------------
 1 | # imath.s  --  functions for integer maths
 2 | 
 3 | # Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | .text
 7 | 
 8 | ####	#  Function:  int abs(int i);
 9 | 	#
10 | 	#  The C library's abs() function.
11 | .globl	abs
12 | abs:
13 | 	PUSH	%ebp
14 | 	MOVL	%esp, %ebp
15 | 	MOVL	8(%ebp), %eax
16 | 
17 | 	#  Implementation without branching
18 | 	CDQ				# sign extend into %edx:%eax
19 | 	XORL	%edx, %eax
20 | 	SUBL	%edx, %eax
21 | 
22 | 	POP	%ebp
23 | 	RET
24 | 
25 | 
26 | ####	#  Function:	unsigned __mul_add( unsigned* val, 
27 | 	#                                   unsigned mul, unsigned add );
28 | 	# 
29 | 	#  Calculate *val = *val * mul + add and return the carry bits
30 | 	#  (i.e. the high 32 bits).  This is used in strtoul().
31 | .globl	__mul_add
32 | __mul_add:
33 | 	PUSH	%ebp
34 | 	MOVL	%esp, %ebp
35 | 
36 | 	MOVL	8(%ebp), %ecx
37 | 	MOVL	(%ecx), %eax
38 | 	MULL	12(%ebp)		# %edx:%eax <= *val * mul (unsigned)
39 | 
40 | 	XORL	%ecx, %ecx		# do first as it clears CF
41 | 	ADDL	16(%ebp), %eax		# %eax += add;  sets CF
42 | 	ADCL	%ecx, %edx		# %edx += CF (%ecx is zero)
43 | 	
44 | 	MOVL	8(%ebp), %ecx
45 | 	MOVL	%eax, (%ecx)		# Update *val
46 | 	MOVL	%edx, %eax		# Return carry bits from %edx
47 | 
48 | 	POP	%ebp
49 | 	RET
50 | 
51 | 
52 | ####	#  Function:	void __add64( unsigned* hi1, unsigned* low1,
53 | 	#                             unsigned  hi2, unsigned  low2 );
54 | 	# 
55 | 	#  Treat hi1:low1 and h12:low2 as 64-bit integers and add them. 
56 | .globl	__add64
57 | __add64:
58 | 	PUSH	%ebp
59 | 	MOVL	%esp, %ebp
60 | 
61 | 	MOVL	12(%ebp), %ecx
62 | 	MOVL	20(%ebp), %eax
63 | 	ADDL	%eax, (%ecx)		# sets CF
64 | 
65 | 	MOVL	8(%ebp), %ecx
66 | 	MOVL	16(%ebp), %eax
67 | 	ADCL	%eax, (%ecx)		# adds in CF
68 | 
69 | 	POP	%ebp
70 | 	RET
71 | 
72 | ####	#  Function:	void __sub64( unsigned* hi1, unsigned* low1,
73 | 	#                             unsigned  hi2, unsigned  low2 );
74 | 	# 
75 | 	#  Treat hi1:low1 and h12:low2 as 64-bit integers and subtract the 
76 | 	#  second from the first.. 
77 | .globl	__sub64
78 | __sub64:
79 | 	PUSH	%ebp
80 | 	MOVL	%esp, %ebp
81 | 
82 | 	MOVL	12(%ebp), %ecx
83 | 	MOVL	20(%ebp), %eax
84 | 	SUBL	(%ecx), %eax		# sets CF
85 | 
86 | 	MOVL	8(%ebp), %ecx
87 | 	MOVL	16(%ebp), %eax
88 | 	SBBL	(%ecx), %eax		# subtracts CF too
89 | 
90 | 	POP	%ebp
91 | 	RET
92 | 
93 | 


--------------------------------------------------------------------------------
/stage-4/main.s:
--------------------------------------------------------------------------------
  1 | # main.s  --  entry point 
  2 | 
  3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | .data
  7 | .globl frame_size
  8 | frame_size:
  9 | 	.int	0
 10 | 
 11 | ####	#  Function:	void strg_class(char* name, bool is_static);
 12 | 	#
 13 | 	#  Emit a .globl or .local directive for NAME as appropriate for 
 14 | 	#  the storage class in IS_STATIC
 15 | .data .LC2:
 16 | 	.string ".globl\t%s\n"
 17 | .LC3:
 18 | 	.string ".local\t%s\n"
 19 | .text
 20 | .local strg_class
 21 | strg_class:
 22 | 	PUSH	%ebp	
 23 | 	MOVL	%esp, %ebp
 24 | 
 25 | 	PUSH	8(%ebp)
 26 | 	MOVL	12(%ebp), %eax
 27 | 	TESTL	%eax, %eax
 28 | 	JZ	.L9
 29 | 	MOVL	$.LC3, %eax
 30 | 	JMP	.L10
 31 | .L9:
 32 | 	MOVL	$.LC2, %eax	
 33 | .L10:
 34 | 	PUSH	%eax
 35 | 	CALL	printf
 36 | 
 37 | 	LEAVE
 38 | 	RET
 39 | 
 40 | 
 41 | ####	#  Function:	int init_a_decl(int dim);
 42 | 	#
 43 | 	#    init-a-list ::= constant ( ',' constant )*
 44 | 	#
 45 | 	#    init-a-decl ::= ( '=' '{' init-a-list '}' )? ';'
 46 | 	#
 47 | 	#  Current token is '=' if an initialiser is present.   
 48 | 	#  Returns the number of uninitialised elements.
 49 | .data .LC5:
 50 | 	.string ".int\t"
 51 | .text
 52 | .local init_a_decl
 53 | init_a_decl:
 54 | 	PUSH	%ebp
 55 | 	MOVL	%esp, %ebp
 56 | 	PUSH	8(%ebp)			# -4(%ebp) local copy of dim
 57 | 
 58 | 	MOVL	token, %eax
 59 | 	CMPL	'=', %eax
 60 | 	JNE	.L11
 61 | 	CALL	next
 62 | 	CMPL	'{', %eax
 63 | 	JNE	_error
 64 | 
 65 | 	MOVL	$.LC5, %eax
 66 | 	PUSH	%eax
 67 | 	CALL	putstr
 68 | 	POP	%eax
 69 | 
 70 | .L12:	#  Loop over initialisers.
 71 | 	#  Do we have too many?
 72 | 	DECL	-4(%ebp)
 73 | 	CMPL	$0, -4(%ebp)
 74 | 	JL	_error
 75 | 	
 76 | 	CALL	next
 77 | 	CMPL	'num', %eax
 78 | 	JE	.L13
 79 | 	CMPL	'char', %eax
 80 | 	JE	.L13
 81 | 	CMPL	'id', %eax
 82 | 	JE	.L13
 83 | 	JMP	_error
 84 | 
 85 | .L13:	# number or char
 86 | 	MOVL	$value, %eax
 87 | 	PUSH	%eax
 88 | 	CALL	putstr
 89 | 	POP	%eax
 90 | 
 91 | 	CALL	next
 92 | 	CMPL	',', %eax
 93 | 	JNE	.L14
 94 | 
 95 | 	PUSH	%eax
 96 | 	CALL	putchar
 97 | 	POP	%eax
 98 | 	JMP	.L12
 99 | 
100 | .L14:
101 | 	CMPL	'}', %eax
102 | 	JNE	_error
103 | 
104 | 	MOVL	'\n', %eax
105 | 	PUSH	%eax
106 | 	CALL	putchar
107 | 	POP	%eax
108 | 
109 | 	CALL	next
110 | .L11:
111 | 	CMPL	';', %eax
112 | 	JNE	_error
113 | 	CALL	next
114 | 
115 | 	POP	%eax
116 | 	POP	%ebp
117 | 	RET
118 | 
119 | 
120 | ####	#  Function:	void array_decl(char* name, bool is_static);
121 | 	#
122 | 	#    array-decl ::= ( 'static' )? name '[' number '] init-a-decl
123 | 	#
124 | 	#  Process an array declaration for NAME.  Current token is '['.
125 | 
126 | .data .LC4:
127 | 	.string ".data\n%s:\n"
128 | .LC6:
129 | 	.string ".zero\t%d\n"
130 | .text
131 | .local array_decl
132 | array_decl:
133 | 	PUSH	%ebp
134 | 	MOVL	%esp, %ebp
135 | 
136 | 	#  Require an array size
137 | 	CALL	next
138 | 	CMPL	'num', %eax
139 | 	JNE	_error
140 | 
141 | 	PUSH	%eax			# endptr slot
142 | 	MOVL	%esp, %ecx
143 | 	
144 | 	XORL	%eax, %eax
145 | 	PUSH	%eax			# guess base
146 | 	PUSH	%ecx			# &endptr
147 | 	MOVL	$value, %eax
148 | 	PUSH	%eax
149 | 	CALL	strtol
150 | 	ADDL	$12, %esp
151 | 	POP	%ecx
152 | 	PUSH	%eax			# store size
153 | 
154 | 	#  %ecx contains the end ptr
155 | 	MOVL	(%ecx), %ecx
156 | 	CMPB	$0, %cl
157 | 	JNE	_error
158 | 
159 | 	MOVB	$2, %cl			# *= sizeof(int)
160 | 	SHLL	%eax
161 | 	PUSH	%eax			# byte size: probably not neeed?
162 | 	XORL	%eax, %eax
163 | 	PUSH	%eax			# not an lvalue
164 | 	PUSH	%eax			# frame offset == 0 (i.e. for linker)
165 | 	PUSH	8(%ebp)			# name
166 | 	CALL	save_sym
167 | 	ADDL	$16, %esp
168 | 	
169 | 	CALL	next
170 | 	CMPL	']', %eax
171 | 	JNE	_error
172 | 
173 | 	#  Emit the symbol name
174 | 	PUSH	8(%ebp)
175 | 	MOVL	$.LC4, %eax
176 | 	PUSH	%eax
177 | 	CALL	printf
178 | 	POP	%eax
179 | 	POP	%eax
180 | 
181 | 	#  Do a series of .int decls for the initialisers
182 | 	CALL	next
183 | 	CALL	init_a_decl
184 | 
185 | 	MOVB	$2, %cl			# *= sizeof(int)
186 | 	SHLL	%eax
187 | 	TESTL	%eax, %eax
188 | 	JZ	.L15
189 | 
190 | 	PUSH	%eax
191 | 	MOVL	$.LC6, %eax
192 | 	PUSH	%eax
193 | 	CALL	printf
194 | 	POP	%eax
195 | 	POP	%eax
196 | 
197 | .L15:
198 | 	LEAVE
199 | 	RET	
200 | 
201 | 
202 | ####	#  Function:	void int_decl(char* name, bool is_static);
203 | 	#
204 | 	#    constant ::= number | char | name
205 | 	#
206 | 	#    int_decl ::= ( 'static' )? name ( '=' constant )? ';'
207 | 	#
208 | 	#  Process an integer declaration for NAME.  The name has been read, 
209 | 	#  and TOKEN advanced to the next token: either '=' or ';'
210 | .data .LC1:
211 | 	.string ".data\n%s:\n\t.int %s\n"
212 | .text
213 | .local int_decl
214 | int_decl:
215 | 	PUSH	%ebp	
216 | 	MOVL	%esp, %ebp
217 | 
218 | 	XORL	%eax, %eax
219 | 	PUSH	%eax			# external, so zero size on stack
220 | 	INCL	%eax
221 | 	PUSH	%eax			# objects are lvalues
222 | 	DECL	%eax
223 | 	PUSH	%eax			# frame_off == 0 for undefined
224 | 	PUSH	8(%ebp)			# name
225 | 	CALL	save_sym
226 | 	ADDL	$16, %esp
227 | 
228 | 	MOVL	'0', %eax
229 | 	PUSH	%eax
230 | 	MOVL	token, %eax
231 | 	CMPL	'=', %eax
232 | 	JNE	.L3
233 | 
234 | 	#  Read an initialiser
235 | 	CALL	next
236 | 	CMPL	'num', %eax
237 | 	JE	.L3a
238 | 	CMPL	'char', %eax
239 | 	JE	.L3a
240 | 	CMPL	'id', %eax
241 | 	JE	.L3a
242 | 	JMP	_error
243 | 
244 | .L3a:
245 | 	#  Because the next token is punctuation, value is not set.
246 | 	CALL	next
247 | 	MOVL	$value, %eax
248 | 	PUSH	%eax
249 | 	JMP	.L4
250 | .L3:
251 | 	PUSH	%esp
252 | .L4:
253 | 	PUSH	8(%ebp)
254 | 	MOVL	$.LC1, %eax
255 | 	PUSH	%eax
256 | 	CALL	printf
257 | 	POP	%eax
258 | 	POP	%eax
259 | 	POP	%eax
260 | 
261 | 	MOVL	token, %eax
262 | 	CMPL	';', %eax
263 | 	JNE	_error
264 | 	CALL	next
265 | 
266 | 	LEAVE
267 | 	RET
268 | 
269 | 
270 | ####	#  Function:	void func_decl(char* name, bool is_static);
271 | 	#
272 | 	#    func-params ::= name ( ',' name )*
273 | 	#
274 | 	#    func-head   ::= ( 'static' )? name '(' func-params? ')'
275 | 	#
276 | 	#    func-decl   ::= func-head param-decls block
277 | 	#
278 | 	#  Process a function declaration.  Current token is '('.
279 | .local func_decl
280 | func_decl:
281 | 	PUSH	%ebp	
282 | 	MOVL	%esp, %ebp
283 | 
284 | 	CALL	new_scope
285 | 	XORL	%eax, %eax
286 | 	MOVL	%eax, frame_size
287 | 
288 | 	CALL	next
289 | 	CMPL	')', %eax
290 | 	JE	.L5
291 | 
292 | 	MOVL	$4, %ecx
293 | 	PUSH	%ecx		# all parameters have size 4	-4(%ebp)
294 | 	MOVL	$1, %ecx
295 | 	PUSH	%ecx		# parameters are lvalues	-8(%ebp)
296 | 	MOVL	$8, %ecx
297 | 	PUSH	%ecx		# frame_off			-12(%ebp)
298 | 
299 | .L5a:
300 | 	CMPL	'id', %eax
301 | 	JNE	_error
302 | 
303 | 	MOVL	$value, %eax
304 | 	PUSH	%eax
305 | 	CALL	save_sym
306 | 	POP	%eax
307 | 	CALL	next
308 | 
309 | 	ADDL	$4, -12(%ebp)
310 | 	CMPL	',', %eax
311 | 	JNE	.L5b
312 | 	CALL	next
313 | 	JMP	.L5a
314 | 	
315 | .L5b:
316 | 	CMPL	')', %eax
317 | 	JNE	_error
318 | 	POP	%eax
319 | .L5:
320 | 	PUSH	8(%ebp)
321 | 	CALL	prolog
322 | 	POP	%eax
323 | 
324 | 	CALL	new_label	# For return
325 | 	PUSH	%eax
326 | 	XORL	%eax, %eax
327 | 	PUSH	%eax
328 | 	PUSH	%eax
329 | 
330 | 	CALL	next
331 | 	CALL	param_decls
332 | 	CALL	block
333 | 	POP	%eax
334 | 	POP	%eax
335 | 	CALL	local_label
336 | 	POP	%eax
337 | 
338 | 	#  Don't call clear_stack because this scope only contains 
339 | 	#  function parameters, and the caller cleans up the stack.
340 | 	CALL	end_scope
341 | 	CALL	epilog
342 | 
343 | 	LEAVE
344 | 	RET
345 | 
346 | ####	#  Function:	void ext_decl();
347 | 	#
348 | 	#  Process an external declaration.
349 | 	#
350 | 	#    ext-decl ::= func-decl | int-decl | array-decl
351 | 	#  
352 | 	#  When called, TOKEN should be the name.
353 | .local ext_decl
354 | ext_decl:
355 | 	PUSH	%ebp	
356 | 	MOVL	%esp, %ebp
357 | 
358 | 	SUBL	$16, %esp		# -16(%ebp) buffer
359 | 
360 | 	#  Are we static?
361 | 	XORL	%eax, %eax
362 | 	PUSH	%eax			# bool is_static = false;  -20(%ebp)
363 | 	MOVL	token, %eax
364 | 	CMPL	'stat', %eax
365 | 	JNE	.L8
366 | 	INCL	-20(%ebp)		# is_static = 1
367 | 	CALL	next
368 | .L8:
369 | 	CALL	skip_type
370 | .L8a:
371 | 	#  Skip any pointer declarators
372 | 	MOVL	token, %eax
373 | 	CMPL	'*', %eax
374 | 	JNE	.L8b
375 | 	CALL	next
376 | 	JMP	.L8a
377 | .L8b:
378 | 	#  Check that we've read a identifier first
379 | 	CMPL	'id', %eax
380 | 	JNE	_error
381 | 
382 | 	#  Store a copy of the name.  (We can't emit the label yet as we
383 | 	#  don't yet know which section it belongs in.)
384 | 	MOVL	$value, %eax
385 | 	PUSH	%eax			# src
386 | 	CALL	strlen
387 | 	CMPL	$11, %eax
388 | 	JG	_error
389 | 	LEA	-16(%ebp), %eax
390 | 	PUSH	%eax			# dest
391 | 	CALL	strcpy
392 | 	POP	%ecx
393 | 	POP	%eax
394 | 	PUSH	%ecx			# Pointer to name
395 | 
396 | 	CALL	strg_class
397 | 	
398 | 	#  Get the next token and dispatch based on it.
399 | 	CALL	next
400 | 	CMPL	'(', %eax
401 | 	JE	.L6
402 | 	CMPL	'[', %eax
403 | 	JE	.L6a
404 | 	CALL	int_decl
405 | 	JMP	.L7
406 | .L6:	
407 | 	CALL	func_decl
408 | 	JMP	.L7
409 | .L6a:
410 | 	CALL	array_decl
411 | .L7:
412 | 	LEAVE
413 | 	RET
414 | 
415 | ####	#  Function:  void program();
416 | 	#  
417 | 	#    program ::= ext-decl*
418 | 	#
419 | program:
420 | 	PUSH	%ebp
421 | 	MOVL	%esp, %ebp
422 | .L1:
423 | 	MOVL	token, %eax
424 | 	CMPL	$-1, %eax
425 | 	JE	.L2
426 | 
427 | 	CALL	ext_decl
428 | 
429 | 	#  Add a blank line 
430 | 	MOVL	'\n', %eax
431 | 	PUSH	%eax
432 | 	CALL	putchar
433 | 	POP	%eax
434 | 	JMP	.L1
435 | .L2:
436 | 	POP	%ebp
437 | 	RET
438 | 
439 | 
440 | ####	#  Function:  int main(int argc, char** argv);
441 | #
442 | main:
443 | 	PUSH	%ebp
444 | 	MOVL	%esp, %ebp
445 | 
446 | 	CMPL	$3, 8(%ebp)	# Require at least two arguments: -S file.c
447 | 	JL	_error
448 | 
449 | 	#  Check that argv[1] == '-S'
450 | 	MOVL	'-S', %eax
451 | 	PUSH	%eax
452 | 	PUSH	%esp
453 | 	MOVL	12(%ebp), %eax
454 | 	PUSH	4(%eax)
455 | 	CALL	strcmp
456 | 	ADDL	$12, %esp
457 | 	TESTL	%eax, %eax
458 | 	JNZ	_error
459 | 
460 | 	#  Do we have -o file.o?
461 | 	MOVL	$8, %edx	# 8 == 4*argn
462 | 	CMPL	$3, 8(%ebp)	# If just two args (-S file.c)
463 | 	JE	.L16
464 | 	CMPL	$5, 8(%ebp)	# Otherwise four (-S -o file.o file.c)
465 | 	JNE	_error
466 | 
467 | 	#  Check that argv[2] == '-o'
468 | 	MOVL	'-o', %eax
469 | 	PUSH	%eax
470 | 	PUSH	%esp
471 | 	MOVL	12(%ebp), %eax
472 | 	PUSH	8(%eax)
473 | 	CALL	strcmp
474 | 	ADDL	$12, %esp
475 | 	TESTL	%eax, %eax
476 | 	JNZ	_error
477 | 
478 | 	MOVL	$16, %edx	# 16 == 4*argn
479 | 
480 | .L16:
481 | 	#  Use argv[4*argn] as a filename and reopen stdin as it.
482 | 	MOVL	'r', %eax
483 | 	PUSH	%eax
484 | 	MOVL	%esp, %ecx
485 | 	MOVL	stdin, %eax
486 | 	PUSH	%eax			# stream
487 | 	PUSH	%ecx			# mode
488 | 	MOVL	12(%ebp), %eax
489 | 	ADDL	%edx, %eax
490 | 	PUSH	(%eax)			# filename (argv[argn])
491 | 	CALL	freopen
492 | 	ADDL	$16, %esp
493 | 	TESTL	%eax, %eax
494 | 	JZ	_error
495 | 
496 | 	#  Do we have an explicit output filename?
497 | 	MOVL	12(%ebp), %eax
498 | 	MOVL	12(%eax), %edx
499 | 	CMPL	$5, 8(%ebp)
500 | 	JE	.L17
501 | 
502 | 	#  Construct the output filename.
503 | 	MOVL	12(%ebp), %eax
504 | 	PUSH	8(%eax)
505 | 	CALL	strlen
506 | 	POP	%edx
507 | 	ADDL	%eax, %edx
508 | 	CMPB	'c', -1(%edx)
509 | 	JNE	_error
510 | 	CMPB	'.', -2(%edx)
511 | 	JNE	_error
512 | 	MOVB	's', -1(%edx)
513 | 	MOVL	12(%ebp), %eax
514 | 	MOVL	8(%eax), %edx
515 | 
516 | .L17:
517 | 	#  And reopen stdout as it.
518 | 	MOVL	'w', %eax
519 | 	PUSH	%eax
520 | 	MOVL	%esp, %ecx
521 | 	MOVL	stdout, %eax
522 | 	PUSH	%eax		# stream
523 | 	PUSH	%ecx		# mode
524 | 	PUSH	%edx		# filename
525 | 	CALL	freopen
526 | 	ADDL	$16, %esp
527 | 	TESTL	%eax, %eax
528 | 	JZ	_error
529 | 
530 | 	CALL	init_symtab
531 | 	CALL	next
532 | 	CALL	program
533 | 	XORL	%eax, %eax
534 | 	LEAVE
535 | 	RET
536 | 
537 | 
538 | ####	#  Function:	void _error()
539 | 	#
540 | 	#  All error handling is done here.  
541 | 	#  NB. There is a duplicate (identical) defintion in libc0.o
542 | 	#  (Note we can JMP here instead of CALLing it, as we never RET.)
543 | _error:
544 | 	MOVL	$1, %eax
545 | 	PUSH	%eax
546 | 	CALL	exit
547 | 	HLT
548 | 
549 | 
550 | 


--------------------------------------------------------------------------------
/stage-4/malloc.c:
--------------------------------------------------------------------------------
  1 | /* malloc.c
  2 |  *
  3 |  * Copyright (C) 2013, 2018 Richard Smith <richard@ex-parrot.com> 
  4 |  * All rights reserved.
  5 |  */
  6 | 
  7 | 
  8 | /* struct header { 
  9 |  *   size_t         size;
 10 |  *   bool           is_free;
 11 |  *   struct header* next;
 12 |  *   struct header* prev;
 13 |  *   void*          page_start;     // effectively a page id
 14 |  * }; */
 15 | 
 16 | static
 17 | __heap = 0;
 18 | 
 19 | /* This is a function pointer:  void (*__membdgfn)( int op, void *ptr )
 20 |  * If set, it is called just before malloc() returns with op == 1, and 
 21 |  * just after free() is called with a non-zero pointer with op == 2. */
 22 | static
 23 | __memdbgfn = 0;
 24 | 
 25 | __dbg_alloc( fn ) {
 26 |     __memdbgfn = fn;
 27 | }
 28 | 
 29 | static
 30 | __find_blk( last, size ) {
 31 |     /* Look for a block of at least SIZE bytes in the list at LAST */
 32 |     while ( last && !( last[1] && last[0] >= size ) )
 33 |         last = last[2];
 34 |     return last;
 35 | }
 36 | 
 37 | static
 38 | __new_blk( size ) {
 39 |     /* Allocate a new block for at least SIZE bytes and prepend to __heap */
 40 |     auto blksz = size > 0x0FEC ? size : 0x0FEC;  /* 0x0FEC == 0x1000 - 20 */
 41 |     auto p = mmap(0, blksz + 20, 0x3, 0x22, -1, 0);
 42 |     p[0] = blksz;
 43 |     p[1] = 1;
 44 |     p[2] = __heap;
 45 |     p[3] = 0;
 46 |     p[4] = p;
 47 |     if (__heap) __heap[3] = p;
 48 |     __heap = p;
 49 |     return p;
 50 | }
 51 | 
 52 | static
 53 | __frag( blk, size ) {
 54 |     /* If the block BLK is significantly bigger than SIZE bytes, then fragment
 55 |      * it into a block of exactly SIZE bytes and a second block for the rest */
 56 |     if ( blk[0] >= size + 20 + 4 ) {
 57 |         auto b2 = blk + size + 20;
 58 |         b2[0] = blk[0] - size - 20;
 59 |         b2[1] = 1;
 60 |         b2[2] = blk[2];
 61 |         b2[3] = blk;
 62 |         b2[4] = blk[4];
 63 |         if (b2[2]) b2[2][3] = b2;
 64 |         blk[0] = size;
 65 |         blk[2] = b2;
 66 |     }
 67 | }
 68 | 
 69 | static
 70 | __defrag2( blk, b2 ) {
 71 |     /* The block at BLK and the next block, which is at B2, are both empty
 72 |      * so coalesce them into a single block. */
 73 |     blk[0] += b2[0] + 20;
 74 |     blk[2] = b2[2];
 75 |     if (blk[2]) blk[2][3] = blk;
 76 | }
 77 | 
 78 | static
 79 | __defrag( blk ) {
 80 |     /* See whether the block at BLK can be coalesced with either neighbour */
 81 |     if ( blk[2] && blk[4] == blk[2][4] && blk[2][1] ) __defrag2( blk, blk[2] );
 82 |     if ( blk[3] && blk[4] == blk[3][4] && blk[3][1] ) __defrag2( blk[3], blk );
 83 | }
 84 | 
 85 | /* The C library malloc() */
 86 | malloc( size ) {
 87 |     auto p = __find_blk( __heap, size );
 88 |     if (!p) p  = __new_blk( size );
 89 |     __frag( p, size );
 90 |     p[1] = 0;
 91 |     auto ptr = p + 20;
 92 |     if (__memdbgfn) __memdbgfn( 1, ptr );
 93 |     return ptr;
 94 | }
 95 | 
 96 | /* The C library free() */
 97 | free( ptr ) {
 98 |     if (ptr) {
 99 |         if (__memdbgfn) __memdbgfn( 2, ptr );
100 |         auto p = ptr - 20;
101 |         if (p[1]) abort(); /* Double free */
102 |         p[1] = 1;
103 |         __defrag( p );
104 |     }
105 | }
106 | 
107 | /* The C library realloc() */
108 | realloc( ptr, size ) {
109 |     if ( !ptr ) 
110 |         return malloc( size );
111 | 
112 |     auto h = ptr - 20;
113 |     if ( h[2] && h[4] == h[2][4] && h[2][1] && h[0] + h[2][0] + 20 >= size ) {
114 |         __defrag2( h, h[2] );
115 |         __frag( h, size );
116 |         return ptr;
117 |     }
118 |     else {
119 |         auto p = malloc(size);
120 |         auto cpsz = h[0] < size ? h[0] : size;
121 |         memcpy( p, ptr, cpsz );
122 |         free( ptr );
123 |         return p;
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/stage-4/memory.s:
--------------------------------------------------------------------------------
 1 | # memory.s  --  bootstrap code for memory handling
 2 | 
 3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | ####	#  Function: void* malloc(size_t sz)
 7 | 	#  Crude dynamic memory allocation, by punting directly to kernel 
 8 | malloc:
 9 | 	PUSH	%ebp
10 | 	MOVL	%esp, %ebp
11 | 	PUSH	%ebx
12 | 
13 | 	#  How many bytes do we need?
14 | 	MOVL	8(%ebp), %ecx		# sz
15 | 	ADDL	$0x4, %ecx		# header containing size
16 | 
17 | 	#  Punt off to mmap(MAP_ANON).  Highly suboptimal, but simple to code.
18 | 	XORL	%eax, %eax		# 0 offset
19 | 	PUSH	%eax
20 | 	DECL	%eax
21 | 	PUSH	%eax			# fd -1 for MAP_ANON
22 | 	MOVL	$0x22, %eax		# MAP_ANON (0x20) | MAP_PRIVATE (0x2)
23 | 	PUSH	%eax
24 | 	MOVL	$0x3, %eax		# PROT_READ (0x1) | PROT_WRITE (0x2)
25 | 	PUSH	%eax
26 | 	PUSH	%ecx			# size
27 | 	XORL	%eax, %eax		# NULL 
28 | 	PUSH	%eax
29 | 	CALL	mmap
30 | 	CMPL	$-1, %eax
31 | 	JE	_error
32 | 	MOVL	-24(%ebp), %ecx		# restore %ecx
33 | 	
34 | 	#  Write size into malloc header
35 | 	MOVL	%ecx, (%eax)
36 | 	ADDL	$4, %eax
37 | 
38 | 	#  Cleanup
39 | 	MOVL	-4(%ebp), %ebx
40 | 	LEAVE
41 | 	RET
42 | 
43 | 
44 | ####	#  Function: void* realloc(void* ptr, size_t sz)
45 | 	#  Grows memory allocated by malloc, above
46 | realloc:
47 | 	PUSH	%ebp
48 | 	MOVL	%esp, %ebp
49 | 	PUSH	%ebx
50 | 	PUSH	%esi
51 | 
52 | 	#  Leave space for header (4 bytes)
53 | 	MOVL	8(%ebp), %ebx		# ptr
54 | 	SUBL	$4, %ebx
55 | 	MOVL	(%ebx), %ecx		# old size
56 | 	MOVL	12(%ebp), %edx		# size
57 | 	ADDL	$4, %edx
58 | 	PUSH	%edx
59 | 
60 | 	#  Get kernel to mremap the block
61 | 	MOVL	$1, %esi		# 1 == MREMAP_MAYMOVE
62 | 	MOVL	$163, %eax		# 163 == __NR_mremap
63 | 	INT	$0x80
64 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
65 | 	JA	_error			# unsigned comparison handles above
66 | 
67 | 	#  Write header
68 | 	POP	%ecx
69 | 	MOVL	%ecx, (%eax)
70 | 	ADDL	$4, %eax
71 | 
72 | 	#  Cleanup
73 | 	POP	%esi
74 | 	POP	%ebx
75 | 	POP	%ebp
76 | 	RET
77 | 
78 | 
79 | ####	#  Function: void free(void* ptr)
80 | 	#  Grows memory allocated by malloc, above
81 | free:
82 | 	PUSH	%ebp
83 | 	MOVL	%esp, %ebp
84 | 	PUSH	%ebx
85 | 
86 | 	MOVL	8(%ebp), %ebx		# ptr
87 | 	SUBL	$4, %ebx
88 | 	MOVL	(%ebx), %ecx		# old size
89 | 	MOVL	$91, %eax		# 91 == __NR_munmap
90 | 	INT	$0x80
91 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
92 | 	JA	_error			# unsigned comparison handles above
93 | 
94 | 	#  Cleanup
95 | 	POP	%ebx
96 | 	POP	%ebp
97 | 	RET
98 | 


--------------------------------------------------------------------------------
/stage-4/scanner.s:
--------------------------------------------------------------------------------
  1 | # scanner.s  --  code to tokenising B input stream
  2 | 
  3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | .data 
  7 | 
  8 | #  We use TOKEN as an enum for the different token types.
  9 | .globl token
 10 | token:
 11 | 	.int	0
 12 | 
 13 | #  The VALUE buffer contains tokens as they are being read.
 14 | .globl value
 15 | value:
 16 | 	.zero	80
 17 | 
 18 | 
 19 | .text
 20 | 
 21 | ####	#  Function:	void skip_ccomm();
 22 | 	#
 23 | 	#  Skips over a C-style comment (the opening /* having been read
 24 | 	#  already).
 25 | .local skip_ccomm
 26 | skip_ccomm:
 27 | 	PUSH	%ebp
 28 | 	MOVL	%esp, %ebp
 29 | 
 30 | .L20:
 31 | 	CALL	getchar
 32 | 	CMPL	$-1, %eax
 33 | 	JE	_error
 34 | 	CMPB	'*', %al
 35 | 	JNE	.L20
 36 | 
 37 | 	CALL	getchar
 38 | 	CMPL	$-1, %eax
 39 | 	JE	_error
 40 | 	CMPB	'/', %al
 41 | 	JNE	.L20
 42 | 
 43 | 	POP	%ebp
 44 | 	RET
 45 | 
 46 | 
 47 | ####	#  Function:	int skip_white();
 48 | 	#
 49 | 	#  Skips over any white space characters (including comments), and 
 50 | 	#  returns the next character (having ungot it).
 51 | .local skip_white
 52 | skip_white:
 53 | 	PUSH	%ebp
 54 | 	MOVL	%esp, %ebp
 55 | 
 56 | .L1:
 57 | 	CALL	getchar
 58 | 	PUSH	%eax
 59 | 	CALL	isspace
 60 | 	TESTL	%eax, %eax
 61 | 	POP	%eax
 62 | 	JNZ	.L1
 63 | 
 64 | 	#  Handle comments
 65 | 	CMPB	'/', %al
 66 | 	JNE	.L18
 67 | 	PUSH	%eax
 68 | 	CALL	getchar
 69 | 	CMPB	'*', %al
 70 | 	JNE	.L19
 71 | 	POP	%eax
 72 | 	CALL	skip_ccomm
 73 | 	JMP	.L1
 74 | 
 75 | .L19:
 76 | 	PUSH	%eax
 77 | 	CALL	ungetchar
 78 | 	POP	%eax
 79 | 	POP	%eax
 80 | 
 81 | .L18:
 82 | 	PUSH	%eax
 83 | 	CALL	ungetchar
 84 | 	POP	%eax
 85 | 
 86 | 	POP	%ebp
 87 | 	RET
 88 | 
 89 | 
 90 | ####	#  Function:	int isidchar1(int chr);
 91 | 	#
 92 | 	#  Test whether CHR can start an identifier.
 93 | .local isidchar1
 94 | isidchar1:
 95 | 	PUSH	%ebp
 96 | 	MOVL	%esp, %ebp
 97 | 
 98 | 	MOVL	8(%ebp), %ecx
 99 | 	MOVL	$1, %eax
100 | 	CMPB	'_', %cl
101 | 	JE	.L2
102 | 	PUSH	%ecx
103 | 	CALL	isalpha
104 | .L2:
105 | 	LEAVE
106 | 	RET
107 | 
108 | 
109 | ####	#  Function:	int isidchar(int chr);
110 | 	#
111 | 	#  Test whether CHR can occur in an identifier, other than as
112 | 	#  the first character.
113 | .local isidchar
114 | isidchar:
115 | 	PUSH	%ebp
116 | 	MOVL	%esp, %ebp
117 | 
118 | 	MOVL	8(%ebp), %ecx
119 | 	MOVL	$1, %eax
120 | 	CMPB	'_', %cl
121 | 	JE	.L3
122 | 	PUSH	%ecx
123 | 	CALL	isalnum
124 | .L3:
125 | 	LEAVE
126 | 	RET
127 | 
128 | 
129 | ####	#  Function:	int ismopchar(int chr);
130 | 	#
131 | 	#  Is CHR a character than can occur at the start of a multi-character
132 | 	#  operator?
133 | .data 
134 | .local mopchars
135 | mopchars:
136 | 	.string	"+-*/<>&|!=%^"
137 | .text 
138 | .local ismopchar
139 | ismopchar:
140 | 	PUSH	%ebp
141 | 	MOVL	%esp, %ebp
142 | 
143 | 	PUSH	8(%ebp)
144 | 	MOVL	$mopchars, %eax
145 | 	PUSH	%eax
146 | 	CALL	strchr
147 | 
148 | 	LEAVE
149 | 	RET
150 | 
151 | ####	#  Function:	void get_multiop();
152 | 	#
153 | 	#  Reads a multi-character operator.
154 | .data 
155 | .local mops2
156 | mops2:
157 | 	.int	'++', '--', '<<', '>>', '<=', '>=', '==', '!=', '&&', '||'
158 | 	.int	'*=', '%=', '/=', '+=', '-=', '&=', '|=', '^='
159 | 	.int	0 	# <-- end of table
160 | 	
161 | .text 
162 | .local get_multiop
163 | get_multiop:
164 | 	PUSH	%ebp
165 | 	MOVL	%esp, %ebp
166 | 
167 | 	CALL	getchar
168 | 	MOVL	%eax, token
169 | 
170 | 	CALL	getchar
171 | 	CMPL	$-1, %eax
172 | 	JE	.L13
173 | 	PUSH	%eax
174 | 	MOVL	token, %eax
175 | 	MOVL	%eax, %ecx
176 | 	MOVB	-4(%ebp), %ch		# %ecx is now the two-char token
177 | 
178 | 	MOVL	$mops2, %eax
179 | 	MOVL	%eax, %edx
180 | .L14:
181 | 	#  Loop testing tokens
182 | 	CMPL	%ecx, (%edx)
183 | 	JE	.L15
184 | 	INCL	%edx
185 | 	CMPL	$0, (%edx)
186 | 	JNE	.L14
187 | 	JMP	.L17
188 | 
189 | .L15:
190 | 	#  Definitely got a two-character token.  What about a third?
191 | 	POP	%eax
192 | 	MOVL	%ecx, %eax
193 | 	MOVL	%eax, token
194 | 	CMPL	'<<', %ecx
195 | 	JE	.L16
196 | 	CMPL	'>>', %ecx
197 | 	JE	.L16
198 | 	JMP	.L13
199 | .L16:
200 | 	# Handle <<= and >>=
201 | 	CALL	getchar
202 | 	CMPL	$-1, %eax
203 | 	JE	.L13
204 | 	PUSH	%eax
205 | 	CMPB	'=', %al
206 | 	JNE	.L17
207 | 	POP	%edx
208 | 	MOVB	$16, %cl
209 | 	SALL	%edx
210 | 	MOVL	token, %eax
211 | 	ORL	%edx, %eax
212 | 	MOVL	%eax, token
213 | 	JMP	.L13
214 | .L17:
215 | 	CALL	ungetchar
216 | 	POP	%eax
217 | .L13:
218 | 	POP	%ebp
219 | 	RET
220 | 
221 | ####	#  Function:	int get_word();
222 | 	#
223 | 	#  Reads an identifier or keyword (without distinguishing them)
224 | 	#  into VALUE, and returns the next byte (having ungot it).
225 | .local get_word
226 | get_word:
227 | 	PUSH	%ebp
228 | 	MOVL	%esp, %ebp
229 | 	PUSH	%edi
230 | 
231 | 	#  Skip whitespace and test for an identifier
232 | 	CALL	skip_white
233 | 	PUSH	%eax
234 | 	CALL	isidchar1
235 | 	POP	%ecx
236 | 	TESTL	%eax, %eax
237 | 	JZ	_error
238 | 
239 | 	MOVL	'id', %eax		# 'id' for identifier
240 | 	MOVL	%eax, token
241 | 	MOVL	$value, %eax
242 | 	MOVL	%eax, %edi		# string pointer
243 | 	DECL	%edi
244 | 
245 | .L4:	#  Loop reading characters, and check for buffer overflow
246 | 	INCL	%edi
247 | 	MOVL	$value, %eax
248 | 	SUBL	%edi, %eax
249 | 	CMPL	$-79, %eax
250 | 	JLE	_error
251 | 
252 | 	CALL	getchar
253 | 	MOVB	%al, (%edi)
254 | 	PUSH	%eax
255 | 	CALL	isidchar
256 | 	TESTL	%eax, %eax
257 | 	POP	%eax
258 | 	JNE	.L4
259 | 
260 | 	#  Unget the last character
261 | 	PUSH	%eax
262 | 	CALL	ungetchar
263 | 	POP	%eax
264 | 
265 | 	#  Write null terminator
266 | 	XORB	%cl, %cl
267 | 	MOVB	%cl, (%edi)
268 | 
269 | 	CALL	chk_keyword
270 | 
271 | 	POP	%edi
272 | 	POP	%ebp
273 | 	RET
274 | 
275 | 
276 | ####	#  Function:	void chk_keyword();
277 | 	#
278 | 	#  Check whether VALUE contains a keyword, and if so, sets TOKEN
279 | 	#  accordingly.
280 | 	#
281 | .data 
282 | .local keywords
283 | .align 12 
284 | keywords:
285 | 	#  These are the supported keywords
286 | 	.string "auto"		.align 12
287 | 	.string "break"		.align 12
288 |         .string "continue"      .align 12
289 | 	.string "do"		.align 12
290 | 	.string "else"		.align 12
291 | 	.string "extern"	.align 12
292 | 	.string "if"		.align 12
293 | 	.string "return"	.align 12
294 | 	.string "static"	.align 12
295 | 	.string "while"		.align 12
296 | 
297 | 	#  These keywords are not supported, but are skipped in some contexts
298 | 	.string "char"          .align 12
299 | 	.string "int"		.align 12
300 | 	.string "struct"	.align 12
301 | 	.byte  0	# <-- the end of table marker
302 | 
303 | .text 
304 | .local chk_keyword
305 | chk_keyword:
306 | 	PUSH	%ebp
307 | 	MOVL	%esp, %ebp
308 | 	PUSH	%edi
309 | 	PUSH	%esi
310 | 
311 | 	MOVL	$value, %edi
312 | 	MOVL	$keywords, %esi
313 | .L10:
314 | 	CMPB	$0, (%esi)
315 | 	JE	.L12
316 | 	PUSH	%edi
317 | 	PUSH	%esi
318 | 	CALL	strcmp
319 | 	POP	%ecx
320 | 	POP	%ecx
321 | 	TESTL	%eax, %eax
322 | 	JZ	.L11
323 | 	
324 | 	ADDL	$12, %esi
325 | 	JMP	.L10
326 | .L11:
327 | 	#  Found it.  Use the first dword of the name to put in TOKEN.
328 | 	MOVL	(%esi), %eax
329 | 	MOVL	%eax, token
330 | 
331 | .L12:
332 | 	POP	%esi
333 | 	POP	%edi
334 | 	POP	%ebp
335 | 	RET
336 | 
337 | 
338 | ####	#  Function:	int get_qlit();
339 | 	#
340 | 	#  Reads the textual representation of a character or string literal 
341 | 	#  into VALUE, including the quotation marks, sets TOKEN to 'chr' or
342 | 	#  'str' (as appropriate) and returns the next byte (having ungot it).
343 | .local get_qlit
344 | get_qlit:
345 | 	PUSH	%ebp
346 | 	MOVL	%esp, %ebp
347 | 
348 | 	#  Skip whitespace and test for the opening '\''
349 | 	CALL	skip_white
350 | 	PUSH	%eax			# -4(%ebp) is the quote character
351 | 	CMPB	'\'', %al
352 | 	JE	.L21a
353 | 	CMPB	'\"', %al
354 | 	JE	.L21b
355 | 	JNE	_error
356 | 
357 | .L21a:
358 | 	MOVL	'char', %eax		# 'char' for character literal
359 | 	JMP	.L21c
360 | .L21b:
361 | 	MOVL	'str', %eax		# 'str' for character literal
362 | .L21c:
363 | 	MOVL	%eax, token
364 | 	MOVL	$value, %eax
365 | 	MOVL	%eax, %edi		# string pointer
366 | 
367 | 	CALL	getchar
368 | 	MOVB	%al, (%edi)
369 | 
370 | .L21:	#  Loop reading characters, and check for buffer overflow
371 | 	INCL	%edi
372 | 	MOVL	$value, %eax
373 | 	SUBL	%edi, %eax
374 | 	CMPL	$-78, %eax		# 78 to allow for \', etc.
375 | 	JLE	_error
376 | 
377 | 	CALL	getchar
378 | 	CMPL	$-1, %eax
379 | 	JE	_error
380 | 	MOVB	%al, (%edi)
381 | 	CMPB	-4(%ebp), %al
382 | 	JE	.L21d
383 | 	CMPB	'\\', %al
384 | 	JNE	.L21
385 | 
386 | 	#  Read an escaped character
387 | 	INCL	%edi
388 | 	CALL	getchar
389 | 	CMPL	$-1, %eax
390 | 	JE	_error
391 | 	MOVB	%al, (%edi)
392 | 	
393 | 	JMP	.L21
394 | 
395 | .L21d:
396 | 	#  Write null terminator
397 | 	INCL	%edi
398 | 	XORB	%cl, %cl
399 | 	MOVB	%cl, (%edi)
400 | 
401 | 	#  Peek another character
402 | 	CALL	getchar
403 | 	PUSH	%eax
404 | 	CALL	ungetchar
405 | 	POP	%eax
406 | 
407 | 	LEAVE
408 | 	RET
409 | 
410 | 
411 | ####	#  Function:	int get_number();
412 | 	#
413 | 	#  Reads the textual representation of a number into VALUE, 
414 | 	#  and returns the next byte (having ungot it).
415 | .local get_number
416 | get_number:
417 | 	PUSH	%ebp
418 | 	MOVL	%esp, %ebp
419 | 	PUSH	%edi
420 | 
421 | 	MOVL	'num', %eax		# 'num' for number
422 | 	MOVL	%eax, token
423 | 	MOVL	$value, %eax
424 | 	MOVL	%eax, %edi		# string pointer
425 | 
426 | 	#  Skip whitespace and test for an identifier
427 | 	CALL	getchar
428 | 	MOVB	%al, (%edi)
429 | 	PUSH	%eax
430 | 	CALL	isdigit
431 | 	POP	%ecx
432 | 	TESTL	%eax, %eax
433 | 	JZ	_error
434 | 	CMPB	'0', (%edi)
435 | 	JNE	.L5
436 | 
437 | 	#  The first character was '0', so either octal, hex or zero.
438 | 	
439 | 	#  Store that one byte, and look at the next one
440 | 	INCL	%edi
441 | 	CALL	getchar
442 | 	MOVB	%al, (%edi)
443 | 	CMPB	'x', %al
444 | 	JE	.L24
445 | 	PUSH	%eax
446 | 	CALL	isdigit
447 | 	POP	%ecx
448 | 	TESTL	%eax, %eax
449 | 	JNZ	.L5
450 | 
451 | 	#  It must be a literal zero
452 | 	MOVL	%ecx, %eax		# restore character
453 | 	JMP	.L23
454 | 
455 | .L24:	#  It's a hex number
456 | 	INCL	%edi
457 | 	MOVL	$value, %eax
458 | 	SUBL	%edi, %eax
459 | 	CMPL	$-79, %eax
460 | 	JLE	_error
461 | 
462 | 	CALL	getchar
463 | 	MOVB	%al, (%edi)
464 | 	PUSH	%eax
465 | 	CALL	isxdigit
466 | 	TESTL	%eax, %eax
467 | 	POP	%eax
468 | 	JNZ	.L24
469 | 
470 | 	PUSH	%eax
471 | 	CALL	isalpha
472 | 	TESTL	%eax, %eax
473 | 	JNZ	_error
474 | 	POP	%eax
475 | 	JMP	.L23
476 | 
477 | .L5:	#  It's a decimal or octal number -- we don't care which
478 | 	#  Loop reading characters, and check for buffer overflow
479 | 	INCL	%edi
480 | 	MOVL	$value, %eax
481 | 	SUBL	%edi, %eax
482 | 	CMPL	$-79, %eax
483 | 	JLE	_error
484 | 
485 | 	CALL	getchar
486 | 	MOVB	%al, (%edi)
487 | 	PUSH	%eax
488 | 	CALL	isdigit
489 | 	TESTL	%eax, %eax
490 | 	POP	%eax
491 | 	JNZ	.L5
492 | 
493 | 	PUSH	%eax
494 | 	CALL	isalpha
495 | 	TESTL	%eax, %eax
496 | 	JNZ	_error
497 | 	POP	%eax
498 | 
499 | .L23:
500 | 	#  Unget the last character
501 | 	PUSH	%eax
502 | 	CALL	ungetchar
503 | 	POP	%eax
504 | 
505 | 	#  Write null terminator
506 | 	XORB	%cl, %cl
507 | 	MOVB	%cl, (%edi)
508 | 
509 | 	POP	%edi
510 | 	POP	%ebp
511 | 	RET
512 | 
513 | 
514 | ####	#  Function:	int next();
515 | 	#
516 | 	#  Reads the next token, returning the token type (or -1 for EOF)
517 | next:
518 | 	PUSH	%ebp
519 | 	MOVL	%esp, %ebp
520 | 
521 | 	CALL	skip_white
522 | 	CMPL	$-1, %eax
523 | 	JE	.L6a
524 | 
525 | 	PUSH	%eax
526 | 	CALL	isidchar1
527 | 	POP	%ecx
528 | 	TESTL	%eax, %eax
529 | 	JNZ	.L7
530 | 
531 | 	PUSH	%ecx
532 | 	CALL	isdigit
533 | 	POP	%ecx
534 | 	TESTL	%eax, %eax
535 | 	JNZ	.L8
536 | 
537 | 	PUSH	%ecx
538 | 	CALL	ismopchar
539 | 	POP	%ecx
540 | 	TESTL	%eax, %eax
541 | 	JNZ	.L8a
542 | 
543 | 	CMPB	'\'', %cl
544 | 	JE	.L8b
545 | 	CMPB	'\"', %cl
546 | 	JE	.L8b
547 | 
548 | 	CALL	getchar
549 | .L6a:
550 | 	MOVL	%eax, token
551 | 	JMP	.L6
552 | 	
553 | .L7:
554 | 	CALL	get_word
555 | 	JMP	.L9
556 | .L8:
557 | 	CALL	get_number
558 | 	JMP	.L9
559 | .L8a:
560 | 	CALL	get_multiop
561 | 	JMP	.L9
562 | .L8b:
563 | 	CALL	get_qlit
564 | 	JMP	.L9
565 | .L9:
566 | 	MOVL	token, %eax
567 | .L6:
568 | 	POP	%ebp
569 | 	RET	
570 | 


--------------------------------------------------------------------------------
/stage-4/signal.c:
--------------------------------------------------------------------------------
 1 | /* signal.c  --  signal handling functions
 2 |  *
 3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com> 
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | /* The C library raise() */
 8 | raise( sig ) {
 9 |     return kill( getpid(), sig );
10 | }
11 | 
12 | /* The C library abort() */
13 | abort() {
14 |     raise(6);   /* SIGABRT == 6 */
15 | 
16 |     /* If we're still here, reinstate the default handler and retry. */
17 |     signal(6, 0);   /* SIG_DFL == 0 */
18 |     raise(6);
19 | 
20 |     /* This shouldn't be possible. */
21 |     _exit(128 + 6);
22 | }
23 | 


--------------------------------------------------------------------------------
/stage-4/stdarg.c:
--------------------------------------------------------------------------------
 1 | /* stdarg.c
 2 |  *
 3 |  * Copyright (C) 2005, 2014 Richard Smith <richard@ex-parrot.com> 
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | /* The intention is that the C library macro va_arg can be implemented as:
 8 |  * 
 9 |  *   #define va_arg( ap, type ) \
10 |  *     ( * (type*) __va_arg( &(ap), sizeof(type) ) )
11 |  */
12 | __va_arg( ap, size ) {
13 |     auto a = *ap;
14 |     /* Round size to 4 byte alignment */
15 |     size = (size + 3) & ~3;
16 |     *ap += size;
17 |     return a;
18 | }
19 | 
20 | /* The intention is that the C library macro va_start can be implemented as:
21 |  *
22 |  *   #define va_start( ap, last ) \
23 |  *     ( __va_start( &(ap), &(last), sizeof(last) ) )                     
24 |  */
25 | __va_start( ap, last, last_size ) {
26 |     *ap = last;
27 |     __va_arg( ap, last_size );
28 | }
29 | 


--------------------------------------------------------------------------------
/stage-4/stdio.s:
--------------------------------------------------------------------------------
  1 | # stdio.s  --  bootstrap code for I/O
  2 | 
  3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | ####	#  Function:	void putstr(char* str)
  7 | 	#
  8 | 	#  The B library putstr() function.  Writes STR to standard output.
  9 | 	#  Unlike the C library puts(), no terminating '\n' is added 
 10 | 	#  automatically.
 11 | putstr:
 12 | 	PUSH	%ebp
 13 | 	MOVL	%esp, %ebp
 14 | 
 15 | 	PUSH	8(%ebp)
 16 | 	CALL	strlen
 17 | 	POP	%ecx
 18 | 
 19 | 	PUSH	%eax
 20 | 	PUSH	%ecx
 21 | 	MOVL	$1, %eax		# 1 == STDOUT_FILENO
 22 | 	PUSH	%eax
 23 | 	CALL	write
 24 | 	POP	%ecx
 25 | 	POP	%ecx
 26 | 	POP	%ecx
 27 | 
 28 | 	CMPL	%eax, %ecx
 29 | 	JNE	_error
 30 | 
 31 | 	POP	%ebp
 32 | 	RET
 33 | 
 34 | 
 35 | ####	#  Function:	void putchar(int chr)
 36 | 	#
 37 | 	#  The C standard library putchar() function.  Writes the one
 38 | 	#  characters in CHR to standard output.  The B library version
 39 | 	#  should write multiple characters from CHR (up to four): this
 40 | 	#  is prohibited by the C standard, and we do not currently do it.
 41 | putchar:
 42 | 	PUSH	%ebp
 43 | 	MOVL	%esp, %ebp
 44 | 
 45 | 	MOVL	$1, %eax
 46 | 	PUSH	%eax			# strlen
 47 | 	LEA	8(%ebp), %eax
 48 | 	PUSH	%eax			# &chr
 49 | 	MOVL	$1, %eax		# 1 == STDOUT_FILENO
 50 | 	PUSH	%eax
 51 | 	CALL	write
 52 | 	POP	%ecx
 53 | 	POP	%ecx
 54 | 	POP	%ecx
 55 | 
 56 | 	CMPL	%eax, %ecx
 57 | 	JNE	_error
 58 | 
 59 | 	POP	%ebp
 60 | 	RET
 61 | 
 62 | ####	#  Function:	void printf(char* fmt, ...);
 63 | 	#
 64 | 	#  A very light-weight version of printf, handling just the 
 65 | 	#  %%, %c, %d and %s format specifiers, with no widths or 
 66 | 	#  precisions.  The B library version also support %o which we
 67 | 	#  don't do yet.
 68 | printf:
 69 | 	PUSH	%ebp
 70 | 	MOVL	%esp, %ebp
 71 | 	PUSH	%ebx
 72 | 	PUSH	%esi
 73 | 	PUSH	%edi
 74 | 	
 75 | 	MOVL	8(%ebp), %esi		# %esi is ptr into fmt
 76 | 	LEA	8(%ebp), %edi		# %edi is ptr to prev va_arg
 77 | 
 78 | .L2:	#  Loop over the format string
 79 | 	CMPB	$0, (%esi)
 80 | 	JE	.L3
 81 | 	MOVB	(%esi), %al
 82 | 	CMPB	'%', %al
 83 | 	JE	.L4
 84 | 
 85 | .L6:	#  Write a raw character
 86 | 	PUSH	%eax
 87 | 	CALL	putchar
 88 | 	POP	%eax
 89 | .L5:
 90 | 	INCL	%esi
 91 | 	JMP	.L2
 92 | 
 93 | .L4:
 94 | 	#  We have a format specifier
 95 | 	INCL	%esi
 96 | 	CMPB	$0, (%esi)
 97 | 	JE	_error
 98 | 	MOVB	(%esi), %al
 99 | 	CMPB	'%', %al
100 | 	JE	.L6			# write a literal '%'
101 | 
102 | 	#  Read the next vararg into %eax, putting the format char in %cl
103 | 	ADDL	$4, %edi		# read next va_arg
104 | 	MOVB	%al, %cl
105 | 	MOVL	(%edi), %eax
106 | 
107 | 	#  Test for 'c', 's' and 'd' format specifiers, otherwise fail  
108 | 	CMPB	'c', %cl
109 | 	JE	.L6			# write the va_arg character
110 | 	CMPB	's', %cl
111 | 	JE	.L7
112 | 	CMPB	'd', %cl
113 | 	JE	.L8
114 | 	JMP	_error
115 | 
116 | .L7:	#  Handle the %s format specifier
117 | 	PUSH	%eax
118 | 	CALL	putstr
119 | 	POP	%eax
120 | 	JMP	.L5
121 | 
122 | .L8:	#  Handle the %d format specifier.  Special case 0
123 | 	TESTL	%eax, %eax
124 | 	JNZ	.L9
125 | 	MOVB	'0', %al
126 | 	JMP	.L6
127 | 
128 | .L9:	#  Do we need a -ve sign?
129 | 	CMPL	$0, %eax
130 | 	JG	.L10
131 | 	PUSH	%eax
132 | 	MOVB	'-', %cl
133 | 	PUSH	%ecx
134 | 	CALL	putchar
135 | 	POP	%ecx
136 | 	POP	%eax
137 | 	NEGL	%eax
138 | 
139 | .L10:	#  Set up a temporary buffer, as we'll write from right to left
140 | 	MOVL	%esp, %ebx
141 | 	DECL	%ebx
142 | 	SUBL	$16, %esp
143 | 	MOVB	$0, (%ebx)		# '\0' terminator
144 | 	DECL	%ebx
145 | 	MOVL	$10, %ecx
146 | 
147 | .L11:
148 | 	XORL	%edx, %edx
149 | 	IDIVL	%ecx			# acts on %edx:%eax
150 | 	ADDB	'0', %dl		# remainder is in %dl, conv. to char
151 | 	MOVB	%dl, (%ebx)
152 | 	TESTL	%eax, %eax
153 | 	JZ	.L12
154 | 	DECL	%ebx
155 | 	JMP	.L11
156 | 
157 | .L12:
158 | 	PUSH	%ebx
159 | 	CALL	putstr
160 | 	POP	%ebx
161 | 	ADDL	$16, %esp
162 | 	JMP	.L5
163 | 
164 | .L3:	#  Cleanup
165 | 	POP	%edi
166 | 	POP	%esi
167 | 	POP	%ebx
168 | 	POP	%ebp
169 | 	RET
170 | 
171 | ####	#  Function:	int ungetchar(int c);
172 | 	#
173 | 	#  A version of the C standard library ungetc() that acts
174 | 	#  on standard input.
175 | .data 
176 | .local unget_count 
177 | unget_count:
178 | 	.byte	0		# How many characters are in the unget slot?
179 | .local unget_data
180 | unget_data:
181 | 	.int	0		# %al -- bool: is slot in use?
182 | 				# %ah -- char: slot content
183 | .text ungetchar:
184 | 	PUSH	%ebp
185 | 	MOVL	%esp, %ebp
186 | 
187 | 	#  If c == EOF, we should do nothing and return EOF.
188 | 	MOVL	8(%ebp), %eax	
189 | 	CMPL	$-1, %eax
190 | 	JE	.L15
191 | 
192 | 	#  Have we space to write another character?
193 | 	MOVB	unget_count, %al
194 | 	CMPB	$4, %al
195 | 	JAE	_error
196 | 	INCB	%al
197 | 	MOVB	%al, unget_count
198 | 
199 | 	#  Write the character to the unget slot.
200 | 	MOVL	unget_data, %eax
201 | 	MOVB	$8, %cl
202 | 	SHLL	%eax
203 | 	MOVB	8(%ebp), %al
204 | 	MOVL	%eax, unget_data
205 | 
206 | 	#  And return the character
207 | 	XORL	%eax, %eax
208 | 	MOVB	8(%ebp), %al
209 | .L15:
210 | 	POP	%ebp
211 | 	RET
212 | 
213 | 
214 | ####	#  Function:	int getchar(void);
215 | 	#
216 | 	#  The C standard library getchar() function.  Reads one character
217 | 	#  from standard input and returns it.  If end-of-file occurs, we
218 | 	#  return -1: in this respect it differs from the B library 
219 | 	#  version which returns the ASCII EOT character (0x04, ^D).
220 | getchar:
221 | 	PUSH	%ebp
222 | 	MOVL	%esp, %ebp
223 | 
224 | 	#  Has a character been ungetc'd?
225 | 	MOVB	unget_count, %al
226 | 	CMPB	$0, %al
227 | 	JE	.L14
228 | 	DECB	%al
229 | 	MOVB	%al, unget_count
230 | 
231 | 	#  Read the character from the unget slot.
232 | 	MOVL	unget_data, %eax
233 | 	XORL	%edx, %edx
234 | 	MOVB	%al, %dl
235 | 	MOVB	$8, %cl
236 | 	SHRL	%eax
237 | 	MOVL	%eax, unget_data
238 | 	MOVL	%edx, %eax
239 | 	JMP	.L13
240 | 
241 | .L14:	#  Read from OS
242 | 	XORL	%eax, %eax
243 | 	PUSH	%eax			# A 4-byte buffer: %esp points here
244 | 	MOVL	%esp, %ecx
245 | 
246 | 	MOVL	$1, %eax
247 | 	PUSH	%eax			# strlen
248 | 	PUSH	%ecx			# ptr to buffer
249 | 	XORL	%eax, %eax
250 | 	PUSH	%eax			# 0 == STDOUT_FILENO
251 | 	CALL	read
252 | 	MOVL	%eax, %edx
253 | 	POP	%ecx
254 | 	POP	%ecx
255 | 	POP	%ecx			# strlen == 1
256 | 	POP	%eax			# character read
257 | 
258 | 	CMPL	%edx, %ecx		# Successfully read one byte
259 | 	JE	.L13
260 | 	CMPL	$0, %edx		# Necessarily indicates end of file
261 | 	JNE	_error
262 | 	MOVL	$-1, %eax		# -1 == EOF
263 | .L13:
264 | 	POP	%ebp
265 | 	RET
266 | 
267 | 
268 | __io_flush:
269 | 	RET
270 | 
271 | .data:
272 | stdout: .int 1  # These are nominally pointers and need to be distinguishable
273 | stdin:	.int 1  # from the NULL pointer returned on error from e.g. freopen.
274 | 
275 | 
276 | .text:
277 | ####	#  Function: FILE* freopen( char const* filename, char const* mode,
278 | 	#                           FILE* stream );
279 | 	#
280 | 	#  A minimal freopen, just to get the main() function of the compiler
281 | 	#  to work.
282 | freopen:
283 | 	PUSH	%ebp
284 | 	MOVL	%esp, %ebp
285 | 	PUSH	%ebx
286 | 
287 | 	MOVL	12(%ebp), %eax
288 | 	CMPB	'w', (%eax)
289 | 	JE	.L18
290 | 
291 | 	# reopen stdin
292 | 	XORL	%eax, %eax		# 0 == O_RDONLY
293 | 	PUSH	%eax
294 | 	PUSH	8(%ebp)
295 | 	CALL	open
296 | 	ADDL	$8, %esp
297 | 	CMPL	$-1, %eax
298 | 	JE	_error
299 | 
300 | 	XORL	%ecx, %ecx		# stdin
301 | 	JMP	.L19
302 | .L18:
303 | 	# reopen stdout
304 | 	MOVL	$0644, %eax		# permissions
305 | 	PUSH	%eax
306 | 	MOVL	$0x241, %eax		# O_WRONLY=1|O_CREAT=0x40|O_TRUNC=0x200
307 | 	PUSH	%eax
308 | 	PUSH	8(%ebp)
309 | 	CALL	open
310 | 	ADDL	$12, %esp
311 | 	CMPL	$-1, %eax
312 | 	JE	_error
313 | 	
314 | 	XORL	%ecx, %ecx
315 | 	INCL	%ecx			# stdout
316 | 
317 | .L19:
318 | 	PUSH	%ecx			# new_fd
319 | 	PUSH	%eax			# old_fd
320 | 	CALL	dup2
321 | 	CMPL	$-1, %eax
322 | 	JE	_error
323 | 	CALL	close
324 | 	POP	%eax
325 | 	POP	%eax
326 | 
327 | 	MOVL	16(%ebp), %eax
328 | 
329 | 	POP	%ebx
330 | 	POP	%ebp
331 | 	RET
332 | 
333 | 
334 | ####	#  Function: int strtol( char const* str, char const **endptr );
335 | 	#
336 | 	#  Convert STR to an integer.  This is really the standard atoi,
337 | 	#  rather than than strtol.  As the whole file is discarded after
338 | 	#  the bootstrap cc0 is created, this is okay.
339 | strtol:
340 | 	PUSH	%ebp
341 | 	MOVL	%esp, %ebp
342 | 	PUSH	%esi
343 | 
344 | 	XORL	%eax, %eax			# value
345 | 	MOVL	8(%ebp), %esi			# ptr
346 | 
347 | 	#  Recurse to process a -ve sign
348 | 	CMPB	'-', (%esi)
349 | 	JNE	.L16
350 | 	INCL	%esi
351 | 	PUSH	%esi
352 | 	CALL	strtol
353 | 	POP	%edx
354 | 	NEGL	%eax
355 | 	JMP	.L17
356 | .L16:
357 | 	XORL	%ecx, %ecx
358 | 	MOVB	(%esi), %cl
359 | 	SUBL	'0', %ecx
360 | 	CMPL	'9', %ecx
361 | 	JA	.L17			# unsigned, so everything else is > '9'
362 | 
363 | 	PUSH	%ecx
364 | 	MOVL	$10, %ecx
365 | 	MULL	%ecx
366 | 	POP	%ecx
367 | 	ADDL	%ecx, %eax
368 | 
369 | 	INCL	%esi
370 | 	JMP	.L16	
371 | .L17:
372 | 	MOVL	12(%ebp), %ecx
373 | 	MOVL	%esi, (%ecx)
374 | 	
375 | 	POP	%esi
376 | 	POP	%ebp
377 | 	RET
378 | 


--------------------------------------------------------------------------------
/stage-4/string.s:
--------------------------------------------------------------------------------
  1 | # stdio.s
  2 | 
  3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | 
  7 | ####	#  Function:	size_t strlen(char* s);
  8 | strlen:
  9 | 	PUSH	%ebp
 10 | 	MOVL	%esp, %ebp
 11 | 	PUSH	%edi
 12 | 
 13 | 	MOVL	8(%ebp), %edi
 14 | 	XORL	%eax, %eax
 15 | 	XORL	%ecx, %ecx
 16 | 	DECL	%ecx
 17 | 	REPNE SCASB
 18 | 	SUBL	8(%ebp), %edi
 19 | 	LEA	-1(%edi), %eax		# DEC %edi; MOVL %edi, %eax
 20 | 
 21 | 	POP	%edi
 22 | 	POP	%ebp
 23 | 	RET
 24 | 
 25 | 
 26 | ####	#  Function:	size_t strnlen(char* s, size_t maxlen);
 27 | strnlen:
 28 | 	PUSH	%ebp
 29 | 	MOVL	%esp, %ebp
 30 | 	PUSH	%edi
 31 | 
 32 | 	MOVL	8(%ebp), %edi
 33 | 	XORL	%eax, %eax
 34 | 	MOVL	12(%ebp), %ecx
 35 | 	INCL	%ecx
 36 | 	REPNE SCASB
 37 | 	SUBL	8(%ebp), %edi
 38 | 	LEA	-1(%edi), %eax		# DEC %edi; MOVL %edi, %eax
 39 | 
 40 | 	POP	%edi
 41 | 	POP	%ebp
 42 | 	RET
 43 | 
 44 | 
 45 | ####	#  Function:	int strcmp(char const* a, char const* b);
 46 | strcmp:
 47 | 	PUSH	%ebp
 48 | 	MOVL	%esp, %ebp
 49 | 	PUSH	%esi
 50 | 	PUSH	%edi
 51 | 
 52 | 	#  Note the order is chosen so we set CF correctly.
 53 | 	MOVL	12(%ebp), %edi
 54 | 	MOVL	8(%ebp), %esi
 55 | .L1:
 56 | 	LODSB			# Loads (%esi) to %al
 57 | 	SCASB			# Compares (%edi) to %al
 58 | 	JNE	.L2
 59 | 	CMPB	$0, %al
 60 | 	JNE	.L1
 61 | 
 62 | 	#  They're equal
 63 | 	XORL	%eax, %eax
 64 | 	JMP	.L3
 65 | .L2:
 66 | 	#  SCAS internally does SUBL (%edi), %al, so if (%edi) > %al
 67 | 	#  (or b > a), the carry flag will be set.  SBB %eax, %eax 
 68 | 	#  is a useful trick for setting %eax to -1 if CF.
 69 | 	SBBL	%eax, %eax
 70 | 	ORB	$1, %al
 71 | .L3:
 72 | 	POP	%edi
 73 | 	POP	%esi
 74 | 	POP	%ebp
 75 | 	RET
 76 | 
 77 | 
 78 | ####	#  Function:	int strncmp(char const* a, char const* b, size_t n);
 79 | strncmp:
 80 | 	PUSH	%ebp
 81 | 	MOVL	%esp, %ebp
 82 | 	PUSH	%esi
 83 | 	PUSH	%edi
 84 | 
 85 | 	MOVL	16(%ebp), %ecx
 86 | 
 87 | 	#  Note the order is chosen so we set CF correctly.
 88 | 	MOVL	12(%ebp), %edi
 89 | 	MOVL	8(%ebp), %esi
 90 | .L1a:
 91 | 	LODSB			# Loads (%esi) to %al
 92 | 	SCASB			# Compares (%edi) to %al
 93 | 	JNE	.L2a
 94 | 	CMPB	$0, %al
 95 | 	JE	.L1b
 96 | 	DECL	%ecx
 97 | 	JNZ	.L1a
 98 | .L1b:
 99 | 	#  They're equal
100 | 	XORL	%eax, %eax
101 | 	JMP	.L3a
102 | .L2a:
103 | 	#  SCAS internally does SUBL (%edi), %al, so if (%edi) > %al
104 | 	#  (or b > a), the carry flag will be set.  SBB %eax, %eax 
105 | 	#  is a useful trick for setting %eax to -1 if CF.
106 | 	SBBL	%eax, %eax
107 | 	ORB	$1, %al
108 | .L3a:
109 | 	POP	%edi
110 | 	POP	%esi
111 | 	POP	%ebp
112 | 	RET
113 | 
114 | 
115 | ####	#  Function:	int strchr(char const* a, int c);
116 | 	#
117 | strchr:
118 | 	PUSH	%ebp
119 | 	MOVL	%esp, %ebp
120 | 	PUSH	%esi
121 | 
122 | 	MOVL	12(%ebp), %ecx
123 | 	MOVL	8(%ebp), %esi
124 | .L4:
125 | 	LODSB
126 | 	CMPB	%cl, %al
127 | 	JE	.L5
128 | 	CMPB	$0, %al
129 | 	JNE	.L4
130 | 
131 | 	#  Not found	
132 | 	XORL	%eax, %eax
133 | 	JMP	.L6
134 | 	
135 | .L5:	# Found it.  Note LODSB will have incremented %esi
136 | 	LEA	-1(%esi), %eax		# DEC %esi; MOVL %esi, %eax
137 | .L6:
138 | 	POP	%esi
139 | 	POP	%ebp
140 | 	RET
141 | 
142 | 
143 | ####	#  Function:	int strcpy(char* dest, char const* str);
144 | 	#
145 | strcpy:
146 | 	PUSH	%ebp
147 | 	MOVL	%esp, %ebp
148 | 	PUSH	%esi
149 | 	PUSH	%edi
150 | 
151 | 	MOVL	12(%ebp), %esi
152 | 	MOVL	8(%ebp), %edi
153 | 
154 | .L7:	#  We cannot use REP MOVSB because that does not check for a
155 | 	#  terminating null character.
156 | 	LODSB
157 | 	STOSB
158 | 	CMPB	$0, %al
159 | 	JNE	.L7
160 |   
161 | 	MOVL	8(%ebp), %eax
162 | 
163 | 	POP	%edi
164 | 	POP	%esi
165 | 	POP	%ebp
166 | 	RET
167 | 
168 | 
169 | ####	#  Function:	int strncpy(char* dest, char const* str, size_t n);
170 | 	#
171 | strncpy:
172 | 	PUSH	%ebp
173 | 	MOVL	%esp, %ebp
174 | 	PUSH	%esi
175 | 	PUSH	%edi
176 | 
177 | 	MOVL	16(%ebp), %ecx
178 | 	TESTL	%ecx, %ecx
179 | 	JZ	.L9
180 | 
181 | 	MOVL	12(%ebp), %esi
182 | 	MOVL	8(%ebp), %edi
183 | 
184 | .L8:	#  We cannot use REP MOVSB because that does not check for a
185 | 	#  terminating null character.
186 | 	LODSB
187 | 	STOSB
188 | 	DECL	%ecx
189 | 	JZ	.L9
190 | 	CMPB	$0, %al
191 | 	JNE	.L8
192 | .L9:
193 | 	MOVL	8(%ebp), %eax
194 | 
195 | 	POP	%edi
196 | 	POP	%esi
197 | 	POP	%ebp
198 | 	RET
199 | 
200 | 
201 | ####	#  Function:	int memcpy(char* dest, char const* str, size_t n);
202 | 	#
203 | memcpy:
204 | 	PUSH	%ebp
205 | 	MOVL	%esp, %ebp
206 | 	PUSH	%esi
207 | 	PUSH	%edi
208 | 
209 | 	MOVL	16(%ebp), %ecx
210 | 	MOVL	12(%ebp), %esi
211 | 	MOVL	8(%ebp), %edi
212 | 
213 | 	REP MOVSB
214 |   
215 | 	MOVL	8(%ebp), %eax
216 | 
217 | 	POP	%edi
218 | 	POP	%esi
219 | 	POP	%ebp
220 | 	RET
221 | 
222 | 
223 | ####	#  Function:	void* memset(void* s, int c, size_t n);
224 | 	#  Set N bytes of memory pointed to by S to the C byte
225 | memset:
226 | 	PUSH	%ebp
227 | 	MOVL	%esp, %ebp
228 | 	PUSH	%edi
229 | 
230 | 	MOVL	8(%ebp), %edi
231 | 	MOVB	12(%ebp), %al
232 | 	MOVL	16(%ebp), %ecx
233 | 	REP STOSB
234 | 	MOVL	8(%ebp), %eax
235 | 
236 | 	POP	%edi
237 | 	POP	%ebp
238 | 	RET
239 | 
240 | 
241 | ####	#  __asm_std()  and  __asm_cld()  simply invoke those instructions.
242 | 	#  They're used by memmove() to invoke memcpy backwards 
243 | __asm_std:
244 | 	STD
245 | 	RET
246 | __asm_cld:
247 | 	CLD
248 | 	RET
249 | 
250 | 


--------------------------------------------------------------------------------
/stage-4/string2.c:
--------------------------------------------------------------------------------
 1 | /* string2.c  --  additional, higher-level string handling functions
 2 |  *
 3 |  * Copyright (C) 2013, 2021 Richard Smith <richard@ex-parrot.com> 
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | /* The BSD extension strlcat() */
 8 | strlcat(dest, src, n) {
 9 |     auto l1 = strnlen(dest, n), l2 = strnlen(src, n-l1);
10 |     strncpy( dest + l1, src, l2 );
11 |     lchar(dest, l1 + l2 < n ? l1 + l2 : n - 1, '\0');
12 |     return dest;
13 | }   
14 | 
15 | /* The C library strcat() */
16 | strcat(dest, src) {
17 |     auto l1 = strlen(dest), l2 = strlen(src);
18 |     strcpy( dest + l1, src );
19 |     lchar(dest, l1 + l2, '\0');
20 |     return dest;
21 | }
22 | 
23 | /* The C library strncat() */
24 | strncat(dest, src, n) {
25 |     auto l1 = strlen(dest), l2 = strlen(src);
26 |     if (l2 > n) l2 = n;
27 |     strcpy( dest + l1, src, l2 );
28 |     lchar(dest, l1 + l2, '\0');
29 |     return dest;
30 | }
31 | 
32 | /* The C library memmove() */
33 | memmove(dest, src, n) {
34 |     /* If we're copying to earlier memory, or if the blocks do not overlap,
35 |      * then a forwards copy, as done by memcpy, will be fine. */
36 |     if ( dest < src || dest > src + n ) return memcpy(dest, src, n);
37 |  
38 |     /* Otherwise we set the direction flag (DF), then call memcpy with the
39 |      * end pointers (which will then copy backwards), and clear DF. 
40 |      * We do not clear DF in memcpy because the ABI requires DF always to
41 |      * be cleared before library calls. */
42 |     __asm_std(); 
43 |     memcpy(dest+n-1, src+n-1, n); 
44 |     __asm_cld();
45 |     return dest;
46 | }
47 | 
48 | /* The C library strdup() */
49 | strdup( str ) {
50 |     auto l = strlen(str);
51 |     auto str2 = malloc(l + 1);
52 |     strcpy( str2, str );
53 |     lchar( str2, l, 0 );
54 |     return str2;
55 | }
56 | 
57 | /* The C library strspn() */
58 | strspn( str, chars ) {
59 |     auto i = 0, c;
60 |     while ( c = rchar(str, i) ) {
61 |         if ( !strchr(chars, c) )
62 |             break;
63 |         ++i;
64 |     }
65 |     return i;
66 | }
67 | 
68 | 
69 | /* The C library strcspn() */
70 | strcspn( str, chars ) {
71 |     auto i = 0, c;
72 |     while ( c = rchar(str, i) ) {
73 |         if ( strchr(chars, c) )
74 |             break;
75 |         ++i;
76 |     }
77 |     return i;
78 | }
79 | 


--------------------------------------------------------------------------------
/stage-4/symtab.s:
--------------------------------------------------------------------------------
  1 | # symtab.s  --  Code to manipulate the symbol table 
  2 | 
  3 | # Copyright (C) 2012, 2013 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | .data
  7 | 
  8 | .local st_start
  9 | st_start:
 10 | 	.int	0
 11 | .local st_end
 12 | st_end:
 13 | 	.int	0
 14 | .local st_endstore
 15 | st_endstore:
 16 | 	.int	0
 17 | .local st_scope_id
 18 | st_scope_id:
 19 | 	.int	0
 20 | 
 21 | #  struct entry { char sym[12]; int32_t frame_off; int32_t scope_id; 
 22 | #                 type_t lval; type_t size; };   -- sizeof(entry) == 28
 23 | 
 24 | .text
 25 | 
 26 | ####	#  Function:	void st_init();
 27 | 	#
 28 | 	#  Initialise the symbol table
 29 | init_symtab:
 30 | 	PUSH	%ebp
 31 | 	MOVL	%esp, %ebp
 32 | #	MOVL	$1792, %ecx		# 64 * sizeof(entry)
 33 | 	MOVL	$28, %ecx		# sizeof(entry)
 34 | 	PUSH	%ecx
 35 | 	CALL	malloc
 36 | 	POP	%ecx
 37 | 	MOVL	%eax, st_start
 38 | 	MOVL	%eax, st_end
 39 | 	ADDL	%ecx, %eax
 40 | 	MOVL	%eax, st_endstore
 41 | 	POP	%ebp
 42 | 	RET
 43 | 
 44 | 
 45 | ####	#  Function:	void grow_symtab();
 46 | 	#
 47 | 	#  Double the size of the symbol table storage
 48 | .local grow_symtab
 49 | grow_symtab:
 50 | 	PUSH	%ebp
 51 | 	MOVL	%esp, %ebp
 52 | 
 53 | 	MOVL	st_start, %eax
 54 | 	MOVL	%eax, %edx
 55 |         MOVL    st_endstore, %eax
 56 | 	SUBL	%edx, %eax
 57 | 	MOVB	$1, %cl
 58 | 	SHLL	%eax
 59 | 	
 60 | 	PUSH	%eax		# new size
 61 | 	PUSH	%edx		# current start ptr
 62 | 	CALL	realloc
 63 | 
 64 | 	#  Store new pointers
 65 | 	MOVL	%eax, st_start
 66 | 
 67 | 	MOVL	%eax, %edx	# new ptr
 68 | 	POP	%ecx		# old ptr
 69 | 	MOVL	st_end, %eax
 70 | 	SUBL	%ecx, %eax
 71 | 	ADDL	%edx, %eax
 72 | 	MOVL	%eax, st_end
 73 | 
 74 | 	POP	%eax		# new size
 75 | 	ADDL	%edx, %eax
 76 | 	MOVL	%eax, st_endstore
 77 | 
 78 | 	POP	%ebp
 79 | 	RET
 80 | 
 81 | 
 82 | ####	#  Function:	void save_sym( char const* name, int32_t frame_off,
 83 | 	#                              type_t lval, int32_t sym_size );
 84 | 	#
 85 | 	#  Save a local symbol.
 86 | save_sym:
 87 | 	PUSH	%ebp
 88 | 	MOVL	%esp, %ebp
 89 | 
 90 | 	PUSH	8(%ebp)		# src  -4(%ebp)
 91 | 	MOVL	st_end, %eax
 92 | 	PUSH	%eax		# dest -8(%ebp)
 93 | 
 94 |         #  Check that we're not about to overrun the symbol table,
 95 |         MOVL    st_endstore, %eax
 96 | 	CMPL	%eax, -8(%ebp)
 97 | 	JL	.L1
 98 | 	CALL	grow_symtab
 99 | 	MOVL	st_end, %eax
100 | 	MOVL	%eax, -8(%ebp)
101 | .L1:
102 | 	CALL	strcpy
103 | 	POP	%edx
104 | 	MOVL	12(%ebp), %ecx
105 | 	MOVL	%ecx, 12(%edx)	# save value
106 | 	MOVL	st_scope_id, %eax
107 | 	MOVL	%eax, 16(%edx)
108 | 	MOVL	16(%ebp), %ecx
109 | 	MOVL	%ecx, 20(%edx)	# lval flag
110 | 	MOVL	20(%ebp), %ecx
111 | 	MOVL	%ecx, 24(%edx)	# symbol size
112 | 	ADDL	$28, %edx	# sizeof(entry)
113 | 	MOVL	%edx, %eax
114 | 	MOVL	%eax, st_end
115 | 
116 | 	LEAVE
117 | 	RET
118 | 
119 | 
120 | ####	#  Function:	void new_scope();
121 | 	#
122 | 	#  Called on parsing '{' or similar to start a new nested scope.
123 | new_scope:
124 | 	PUSH	%ebp
125 | 	MOVL	%esp, %ebp
126 | 
127 | 	MOVL	st_scope_id, %eax
128 | 	INCL	%eax
129 | 	MOVL	%eax, st_scope_id
130 | 
131 | 	POP	%ebp
132 | 	RET
133 | 
134 | 
135 | ####	#  Function:	int end_scope();
136 | 	#
137 | 	#  Called on parsing '}' or similar to remove symbols from the table.
138 | 	#  Returns number of bytes that need removing from the stack.
139 | end_scope:
140 | 	PUSH	%ebp
141 | 	MOVL	%esp, %ebp
142 | 
143 | 	MOVL	st_end, %eax
144 | 	MOVL	%eax, %ecx		# %ecx  end
145 | 	MOVL	st_start, %eax
146 | 	SUBL	$28, %eax		# sizeof(entry)
147 | 	MOVL	%eax, %edx		# %edx  ptr
148 | .L4:
149 | 	#  Zero %eax in case we jump to the end where %eax is the scope size
150 | 	XORL	%eax, %eax
151 | 
152 | 	ADDL	$28, %edx		# sizeof(entry)
153 | 	CMPL	%ecx, %edx
154 | 	JGE	.L5
155 | 
156 | 	MOVL	st_scope_id, %eax
157 | 	CMPL	%eax, 16(%edx)
158 | 	JL	.L4
159 | 
160 | 	#  The symbol table is sorted by scope id, so as soon as we find
161 | 	#  one symbol in the current scope, all later ones must be too.
162 | 
163 | 	#  First, shrink the table
164 | 	MOVL	%edx, %eax
165 | 	MOVL	%eax, st_end
166 | 
167 | 	#  Then iterate over the remainder adding up the scope size
168 | 	MOVL	24(%edx), %eax		# %edx is now scope size
169 | .L7:
170 | 	ADDL	$28, %edx		# sizeof(entry)
171 | 	CMPL	%ecx, %edx
172 | 	JGE	.L5
173 | 	ADDL	24(%edx), %eax
174 | 	JMP	.L7
175 | .L5:
176 | 	PUSH	%eax			# store frame size
177 | 	MOVL	$st_scope_id, %eax
178 | 	DECL	(%eax)
179 | 	POP	%eax
180 | 
181 | 	POP	%ebp
182 | 	RET
183 | 
184 | 
185 | ####	#  Function:	int lookup_sym(char const* name, int* off);
186 | 	#
187 | 	#  Return the lvalue flag for the symbol NAME, or 1 if it is 
188 | 	#  not defined (as we assume external symbols are lvalues).  
189 | 	#  Also set *OFF to the symbol table offset of the symbol, or
190 | 	#  0 if it is not defined (as 0 is not a valid offset because 
191 | 	#  0(%ebp) is the calling frame's base pointer.)
192 | lookup_sym:
193 | 	PUSH	%ebp
194 | 	MOVL	%esp, %ebp
195 | 	PUSH	%edi
196 | 	PUSH	%esi
197 | 
198 | 	PUSH	8(%ebp)
199 | 	MOVL	st_start, %eax
200 | 	MOVL	%eax, %edi
201 | 	MOVL	st_end, %eax
202 | 	MOVL	%eax, %esi
203 | 	SUBL	$28, %edi	# sizeof(entry)
204 | .L2:
205 | 	ADDL	$28, %edi	# sizeof(entry)
206 | 	XORL	%eax, %eax
207 | 	CMPL	%esi, %edi
208 | 	JGE	.L3
209 | 	PUSH	%edi
210 | 	CALL	strcmp
211 | 	POP	%ecx
212 | 	TESTL	%eax, %eax
213 | 	JNZ	.L2
214 | 	MOVL	20(%edi), %eax		# return lv flag
215 | 	MOVL	12(%edi), %edx		# frame offset
216 | 	JMP	.L6
217 | .L3:
218 | 	#  Symbol not found -- we assume it's an external function
219 | 	#  which is not an lvalue (so return 0)
220 | 	XORL	%eax, %eax
221 | 	XORL	%edx, %edx		# use 0 frame offset for error
222 | .L6:
223 | 	# write *off
224 | 	MOVL	12(%ebp), %ecx
225 | 	MOVL	%edx, (%ecx)
226 | 
227 | 	POP	%ecx
228 | 	POP	%esi
229 | 	POP	%edi
230 | 	POP	%ebp
231 | 	RET
232 | 	
233 | 


--------------------------------------------------------------------------------
/stage-4/unistd.s:
--------------------------------------------------------------------------------
  1 | # unistd.s  --  Linux syscalls
  2 | 
  3 | # Copyright (C) 2013, 2014 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | .data
  7 | ####	#  Variable:	int errno;
  8 | .globl errno
  9 | errno:
 10 | 	.int	0
 11 | 
 12 | 
 13 | .text
 14 | 
 15 | ####	#  Function:	ssize_t write(int fd, const void *buf, size_t count);
 16 | 	#
 17 | .globl write
 18 | write:
 19 | 	PUSH	%ebp
 20 | 	MOVL	%esp, %ebp
 21 | 	PUSH	%ebx
 22 | 
 23 | 	MOVL	16(%ebp), %edx
 24 | 	MOVL	12(%ebp), %ecx
 25 | 	MOVL	8(%ebp), %ebx
 26 | 	MOVL	$4, %eax		# 4 == __NR_write
 27 | 	INT	$0x80
 28 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
 29 | 	JNA	.L1
 30 | 
 31 | 	NEGL	%eax
 32 | 	MOVL	%eax, errno
 33 | 	XORL	%eax, %eax
 34 | 	DECL	%eax
 35 | .L1:
 36 | 	POP	%ebx
 37 | 	POP	%ebp
 38 | 	RET
 39 | 
 40 | 
 41 | ####	#  Function:	ssize_t read(int fd, void *buf, size_t count);
 42 | 	#
 43 | .globl read
 44 | read:
 45 | 	PUSH	%ebp
 46 | 	MOVL	%esp, %ebp
 47 | 	PUSH	%ebx
 48 | 
 49 | 	MOVL	16(%ebp), %edx
 50 | 	MOVL	12(%ebp), %ecx
 51 | 	MOVL	8(%ebp), %ebx
 52 | 	MOVL	$3, %eax		# 3 == __NR_read
 53 | 	INT	$0x80
 54 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
 55 | 	JNA	.L2
 56 | 
 57 | 	NEGL	%eax
 58 | 	MOVL	%eax, errno
 59 | 	XORL	%eax, %eax
 60 | 	DECL	%eax
 61 | .L2:
 62 | 	POP	%ebx
 63 | 	POP	%ebp
 64 | 	RET
 65 | 	
 66 | 
 67 | ####	#  Function:	void _exit(int status)
 68 | 	#
 69 | 	#  Terminate program execution with given status.
 70 | .globl _exit
 71 | _exit:
 72 | 	PUSH	%ebp
 73 | 	MOVL	%esp, %ebp
 74 | 	MOVL	8(%ebp), %ebx	
 75 | 	MOVL	$1, %eax		# 1 == __NR_exit
 76 | 	INT	$0x80
 77 | 	HLT
 78 | 
 79 | 
 80 | ####	#  Function:	int open(char const* filename, int flags, int mode);
 81 | 	#
 82 | .globl open
 83 | open:
 84 | 	PUSH	%ebp
 85 | 	MOVL	%esp, %ebp
 86 | 	PUSH	%ebx
 87 | 
 88 | 	MOVL	16(%ebp), %edx
 89 | 	MOVL	12(%ebp), %ecx
 90 | 	MOVL	8(%ebp), %ebx
 91 | 	MOVL	$5, %eax		# 5 == __NR_open
 92 | 	INT	$0x80
 93 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
 94 | 	JNA	.L3
 95 | 
 96 | 	NEGL	%eax
 97 | 	MOVL	%eax, errno
 98 | 	XORL	%eax, %eax
 99 | 	DECL	%eax
100 | .L3:
101 | 	POP	%ebx
102 | 	POP	%ebp
103 | 	RET
104 | 
105 | 
106 | ####	#  Function:	int close(int fd);
107 | 	#
108 | .globl close
109 | close:
110 | 	PUSH	%ebp
111 | 	MOVL	%esp, %ebp
112 | 	PUSH	%ebx
113 | 
114 | 	MOVL	8(%ebp), %ebx
115 | 	MOVL	$6, %eax		# 6 == __NR_close
116 | 	INT	$0x80
117 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
118 | 	JNA	.L4
119 | 
120 | 	NEGL	%eax
121 | 	MOVL	%eax, errno
122 | 	XORL	%eax, %eax
123 | 	DECL	%eax
124 | .L4:
125 | 	POP	%ebx
126 | 	POP	%ebp
127 | 	RET
128 | 
129 | 
130 | ####	#  Function:	int dup2(int oldfd, int newfd);
131 | 	#
132 | .globl dup2
133 | dup2:
134 | 	PUSH	%ebp
135 | 	MOVL	%esp, %ebp
136 | 	PUSH	%ebx
137 | 
138 | 	MOVL	12(%ebp), %ecx
139 | 	MOVL	8(%ebp), %ebx
140 | 	MOVL	$63, %eax		# 63 == __NR_dup2
141 | 	INT	$0x80
142 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
143 | 	JNA	.L5
144 | 
145 | 	NEGL	%eax
146 | 	MOVL	%eax, errno
147 | 	XORL	%eax, %eax
148 | 	DECL	%eax
149 | .L5:
150 | 	POP	%ebx
151 | 	POP	%ebp
152 | 	RET
153 | 
154 | 
155 | ####	#  Function:	void* mmap(void *addr, size_t length, int prot, 
156 | 	#                          int flags, int fd, off_t offset);
157 | 	#
158 | .globl mmap
159 | mmap:
160 | 	PUSH	%ebp
161 | 	MOVL	%esp, %ebp
162 | 	PUSH	%ebx
163 | 
164 | 	LEA	8(%ebp), %ebx
165 | 	MOVL	$90, %eax		# 90 == __NR_mmap
166 | 	INT	$0x80
167 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
168 | 	JNA	.L6
169 | 
170 | 	NEGL	%eax
171 | 	MOVL	%eax, errno
172 | 	XORL	%eax, %eax
173 | 	DECL	%eax
174 | 
175 | .L6:
176 | 	POP	%ebx
177 | 	POP	%ebp
178 | 	RET
179 | 
180 | 
181 | ####	#  Function:	void (*signal(int signum, void (*handler)(int)))(int);
182 | 	#
183 | .globl signal
184 | signal:
185 | 	PUSH	%ebp
186 | 	MOVL	%esp, %ebp
187 | 	PUSH	%ebx
188 | 
189 | 	MOVL	12(%ebp), %ecx
190 | 	MOVL	8(%ebp), %ebx
191 | 	MOVL	$48, %eax		# 48 == __NR_signal
192 | 	INT	$0x80
193 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
194 | 	JNA	.L7
195 | 
196 | 	NEGL	%eax
197 | 	MOVL	%eax, errno
198 | 	XORL	%eax, %eax
199 | 	DECL	%eax
200 | .L7:
201 | 	POP	%ebx
202 | 	POP	%ebp
203 | 	RET
204 | 
205 | 
206 | ####	#  Function:	int kill( pid_t pid, int sig );
207 | 	#
208 | .globl kill
209 | kill:
210 | 	PUSH	%ebp
211 | 	MOVL	%esp, %ebp
212 | 	PUSH	%ebx
213 | 
214 | 	MOVL	12(%ebp), %ecx
215 | 	MOVL	8(%ebp), %ebx
216 | 	MOVL	$37, %eax		# 37 == __NR_kill
217 | 	INT	$0x80
218 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
219 | 	JNA	.L8
220 | 
221 | 	NEGL	%eax
222 | 	MOVL	%eax, errno
223 | 	XORL	%eax, %eax
224 | 	DECL	%eax
225 | .L8:
226 | 	POP	%ebx
227 | 	POP	%ebp
228 | 	RET
229 | 
230 | 
231 | ####	#  Function:	pid_t getpid();
232 | 	#
233 | .globl getpid
234 | getpid:
235 | 	PUSH	%ebp
236 | 	MOVL	%esp, %ebp
237 | 	MOVL	$20, %eax		# 20 == __NR_getpid
238 | 	INT	$0x80
239 | 	#  NB the getpid syscall cannot fail.
240 | 	POP	%ebp
241 | 	RET
242 | 
243 | 
244 | ####	#  Function:	int execve(char* filename, char* argv[], char* envp[]);
245 | 	#
246 | .globl execve
247 | execve:
248 | 	PUSH	%ebp
249 | 	MOVL	%esp, %ebp
250 | 	PUSH	%ebx
251 | 
252 | 	MOVL	16(%ebp), %edx
253 | 	MOVL	12(%ebp), %ecx
254 | 	MOVL	8(%ebp), %ebx
255 | 	MOVL	$11, %eax		# 11 == __NR_execve
256 | 	INT	$0x80
257 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
258 | 	JNA	.L9
259 | 
260 | 	NEGL	%eax
261 | 	MOVL	%eax, errno
262 | 	XORL	%eax, %eax
263 | 	DECL	%eax
264 | .L9:
265 | 	POP	%ebx
266 | 	POP	%ebp
267 | 	RET
268 | 
269 | 
270 | ####	#  Function:	int fork();
271 | 	#
272 | .globl fork
273 | fork:
274 | 	PUSH	%ebp
275 | 	MOVL	%esp, %ebp
276 | 
277 | 	MOVL	$2, %eax		# 2 == __NR_fork
278 | 	INT	$0x80
279 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
280 | 	JNA	.L10
281 | 
282 | 	NEGL	%eax
283 | 	MOVL	%eax, errno
284 | 	XORL	%eax, %eax
285 | 	DECL	%eax
286 | .L10:
287 | 	POP	%ebp
288 | 	RET
289 | 
290 | 
291 | ####	#  Function:	int waitpid(int pid, int* status, int options);
292 | 	#
293 | .globl waitpd
294 | waitpid:
295 | 	PUSH	%ebp
296 | 	MOVL	%esp, %ebp
297 | 	PUSH	%ebx
298 | 
299 | 	MOVL	16(%ebp), %edx
300 | 	MOVL	12(%ebp), %ecx
301 | 	MOVL	8(%ebp), %ebx
302 | 	MOVL	$7, %eax		# 7 == __NR_waitpid
303 | 	INT	$0x80
304 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
305 | 	JNA	.L11
306 | 
307 | 	NEGL	%eax
308 | 	MOVL	%eax, errno
309 | 	XORL	%eax, %eax
310 | 	DECL	%eax
311 | .L11:
312 | 	POP	%ebx
313 | 	POP	%ebp
314 | 	RET
315 | 
316 | 
317 | ####	#  Function:	int unlink(char* filename);
318 | 	#
319 | .globl unlink
320 | unlink:
321 | 	PUSH	%ebp
322 | 	MOVL	%esp, %ebp
323 | 	PUSH	%ebx
324 | 
325 | 	MOVL	8(%ebp), %ebx
326 | 	MOVL	$10, %eax		# 10 == __NR_unlink
327 | 	INT	$0x80
328 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
329 | 	JNA	.L12
330 | 
331 | 	NEGL	%eax
332 | 	MOVL	%eax, errno
333 | 	XORL	%eax, %eax
334 | 	DECL	%eax
335 | .L12:
336 | 	POP	%ebx
337 | 	POP	%ebp
338 | 	RET
339 | 
340 | 
341 | ####	#  Function:	time_t time(time_t *t);
342 | 	#
343 | .globl time
344 | time:
345 | 	PUSH	%ebp
346 | 	MOVL	%esp, %ebp
347 | 	PUSH	%ebx
348 | 
349 | 	MOVL	8(%ebp), %ebx
350 | 	MOVL	$13, %eax		# 13 == __NR_time
351 | 	INT	$0x80
352 | 	CMPL	$-4096, %eax		# -4095 <= %eax < 0 for errno
353 | 	JNA	.L13
354 | 
355 | 	NEGL	%eax
356 | 	MOVL	%eax, errno
357 | 	XORL	%eax, %eax
358 | 	DECL	%eax
359 | .L13:
360 | 	POP	%ebx
361 | 	POP	%ebp
362 | 	RET
363 | 


--------------------------------------------------------------------------------
/stage-5/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | *.o
3 | cc
4 | ccx
5 | cpp
6 | cmp
7 | ccx1
8 | 


--------------------------------------------------------------------------------
/stage-5/Makefile:
--------------------------------------------------------------------------------
  1 | # stage-5/Makefile
  2 | 
  3 | # Copyright (C) 2013, 2014, 2015, 2020 Richard Smith <richard@ex-parrot.com>
  4 | # All rights reserved.
  5 | 
  6 | SHELL  = /bin/sh
  7 | 
  8 | RM     = /bin/rm
  9 | CP     = /bin/cp
 10 | LN_S   = /bin/ln -sf
 11 | MAKE   = /usr/bin/make
 12 | 
 13 | BINDIR = ../bin
 14 | LIBDIR = ../lib
 15 | INCDIR = ../include
 16 | PATH   = $(BINDIR)
 17 | 
 18 | all:	init ccx cpp cc cmp
 19 | 
 20 | init:
 21 | 	@test -d $(INCDIR) || $(MAKE) -C .. init
 22 | 	@test -x $(BINDIR)/cc0 || $(MAKE) -C ../stage-4 install
 23 | 
 24 | # cc0  is the compiler symlinked from stage 4.
 25 | # ccx1 is this stage's compiler (written in the cc0 language) compiled with cc0.
 26 | # ccx  is nearly the same code, though with a replacement node.c, compiled 
 27 | #        by itself (by ccx1), and is therefore smaller.
 28 | # ccx2 is a test compiler produced by ccx; it should be binary identical to ccx.
 29 | 
 30 | 
 31 | # Suppress the default rules
 32 | .SUFFIXES:
 33 | 
 34 | %.o:	%.s 
 35 | 	as $<
 36 | 
 37 | %0.s:	%.c
 38 | 	cc0 -S -o $@ $<
 39 | 
 40 | # The --compatibility=4 flag enables compatibility with the stage-4 cc.
 41 | %1.s:	%.c ccx1
 42 | 	./ccx1 --compatibility=4 -o $@ $<
 43 | 
 44 | # We cannot remove --compatibility=4 on this build, because of the untyped
 45 | # use of structs-as-arrays in the code.  They should be coded as structs,
 46 | # but that's not possible because it would be too difficult to add struct 
 47 | # support to the stage-4 cc.
 48 | %2.s:	%.i ccx
 49 | 	./ccx --compatibility=4 -o $@ $<
 50 | 
 51 | %.i:	%.c cpp
 52 | 	./cpp -Iinclude -o $@ $<
 53 | 
 54 | %.s:	%.c ccx
 55 | 	./ccx -o $@ $<
 56 | 
 57 | # All these files get compiled with --compatibility=4
 58 | CCX_OBJS  = scanbase.o scanner.o symtab.o expr.o stmt.o type.o \
 59 |             codegen.o i386.o main.o cli.o
 60 | 
 61 | # We replace node.o with a new version written using structs (and which
 62 | # therefore won't compile using the stage-4 cc), partly as a test of the
 63 | # stage-4 cc.
 64 | CCX0_OBJS = $(CCX_OBJS) node.o
 65 | CCX1_OBJS = $(CCX_OBJS) nodenew.o
 66 | 
 67 | ccx1:	$(CCX0_OBJS:%.o=%0.o)
 68 | 	ld -o $@ $(LIBDIR)/crt0.o $(CCX0_OBJS:%.o=%0.o) $(LIBDIR)/libc.o
 69 | 
 70 | ccx:	$(CCX1_OBJS:%.o=%1.o)
 71 | 	ld -o $@ $(LIBDIR)/crt0.o $(CCX1_OBJS:%.o=%1.o) $(LIBDIR)/libc.o
 72 | 
 73 | # These files need to compile without preprocessing, and are not given
 74 | # --compatiibility=4.  Note that cli.o, scanbase.o and expr.o are also 
 75 | # used by the compiler and need also to compile with the stage 4 cc.
 76 | CPP_OBJS = scanbase.o cpp.o nodenew.o macros.o pvector.o cli.o expr.o \
 77 |            cpptype.o eval.o
 78 | 
 79 | cpp:	$(CPP_OBJS)
 80 | 	ld -o $@ $(LIBDIR)/crt0.o $(CPP_OBJS) $(LIBDIR)/libc.o
 81 | 
 82 | CC_OBJS = pvector.o timeconv.o cc.o cli.o
 83 | 
 84 | cc:	$(CC_OBJS:%.o=%2.o)
 85 | 	ld -o $@ $(LIBDIR)/crt0.o $(CC_OBJS:%.o=%2.o) $(LIBDIR)/libc.o
 86 | 
 87 | CC = ./cc --with-cpp=./cpp --with-ccx=./ccx -I./include
 88 | 
 89 | cmp:	cc cmp.c cli.c
 90 | 	$(CC) -o cmp cmp.c cli.c
 91 | 
 92 | install: init ccx cpp cc cmp
 93 | 	$(CP) ccx cpp cc cmp $(BINDIR)
 94 | 	$(RM) -f $(BINDIR)/cc0
 95 | 	$(CP) -r include/* $(INCDIR)
 96 | 
 97 | .INTERMEDIATE:  $(CCX0_OBJS:%.o=%0.o) $(CCX1_OBJS:%.o=%1.o) \
 98 |                 $(CCX1_OBJS:%.o=%.s) $(CCX1_OBJS) $(CPP_OBJS) $(CC_OBJS)
 99 | 
100 | clean:
101 | 	$(RM) -f $(CCX0_OBJS:%.o=%0.s) $(CCX0_OBJS:%.o=%0.o) ccx1
102 | 	$(RM) -f $(CCX1_OBJS:%.o=%1.s) $(CCX1_OBJS:%.o=%1.o) ccx
103 | 	$(RM) -f $(CPP_OBJS:%.o=%.s) $(CPP_OBJS) cpp
104 | 	$(RM) -f $(CCX1_OBJS:%.o=%2.s) $(CCX1_OBJS:%.o=%2.i) 
105 | 	$(RM) -f $(CC_OBJS:%.o=%.i) $(CC_OBJS:%.o=%2.s) $(CC_OBJS:%.o=%2.s) cc
106 | 	$(RM) -f $(CCX1_OBJS:%.o=%2.o) ccx2 $(CC_OBJS:%.o=%2.o) cc2
107 | 	$(RM) -f cmp.o cmp.s cmp.i cmp
108 | 
109 | check-cmp: ccx2 cc2 cmp
110 | 	./cmp ccx2 ccx
111 | 	./cmp cc2 cc
112 | 	! ./cmp -s cc2 ccx2
113 | 	$(RM) -f cc2 ccx2
114 | 
115 | # Build ccx2 and cc2 with a single command.  This is a good test of the 
116 | # driver logic.
117 | ccx2:	cc $(CCX1_OBJS:%.o=%.c)
118 | 	$(CC) -o ccx2 --compatibility=4 $(CCX1_OBJS:%.o=%.c)
119 | 
120 | cc2:	cc ccx2 $(CC_OBJS:%.o=%.c)
121 | 	$(CC) -o cc2 --with-ccx=ccx2 $(CC_OBJS:%.o=%.c)
122 | 
123 | check:	check-cmp
124 | 	$(MAKE) -r -C cpp-tests $@
125 | 
126 | world:
127 | 	set -e; for TARGET in clean init all check install; do \
128 | 	    $(MAKE) $$TARGET; \
129 | 	done
130 | 


--------------------------------------------------------------------------------
/stage-5/README.txt:
--------------------------------------------------------------------------------
  1 | BOOTSTRAP STAGE 5
  2 | 
  3 | Stage 5 reimplements the compiler from stage 4 in the B-like language 
  4 | of stage-4 compiler.  This allows a significantly more advanced 
  5 | implementation, with the result that it supports most of syntax of K&R
  6 | C (most notably, a type system) and is more efficient.
  7 | 
  8 | New features in stage 5 compiler:
  9 |   - for loops
 10 |   - goto and labelled statements
 11 |   - switch, case labels and default (implemented inefficiently as a 
 12 |       sequence of if-else statements)
 13 |   - a type system, including all of C's integer and character types, 
 14 |       pointers, arrays and function pointers
 15 |   - structs
 16 |   - member access with the -> and . operators
 17 |   - sizeof operator
 18 |   - comma operator
 19 |   - type casts
 20 |   - C++-style comments
 21 |   - typedefs
 22 |   - #line directives (in the compiler proper) for improved diagnostics
 23 | 
 24 | The compiler is named ccx.
 25 | 
 26 |   Usage: ccx [OPTIONS] FILENAME
 27 | 
 28 |   Options:
 29 |     --help              Displays the help text
 30 |     -o FILENAME         Specifies the output file name
 31 |     --compatibility=N   Sets compatibility with the stage N tools
 32 | 
 33 | If no -o option is specified, the input file has a .c or .i extension,
 34 | the output is the same file name but with a .s extension.  
 35 | 
 36 | The --compatibility=4 flag enables compatibility with the stage 4
 37 | compiler.  This will permit arbitrary assignment to implicit int (but
 38 | not any variable with a declared type).  It will give an error when
 39 | subscripting something other than an array of 4-byte objects; most
 40 | commonly this triggers with character arrays which had to be manipulated
 41 | with lchar and rchar in stage 4.  An error is also given when doing
 42 | pointer arithmetic on objects that are not single bytes. 
 43 | 
 44 | 
 45 | The new C-like compiler is used to implement a simple preprocessor, cpp,
 46 | which is almost entirely compliant with the C90 standard.  (The only
 47 | known deviations from the standard are that it fails to handle
 48 | white-space correctly in stringification, doesn't implement digraphs or
 49 | trigraphs, or the ... punctuator, and will not concatenate string
 50 | literals.)
 51 | 
 52 |   Usage: cpp [OPTIONS] FILENAME
 53 | 
 54 |   Options:
 55 |     --help              Displays the help text
 56 |     -o FILENAME         Specifies the output file name (default: stdout)
 57 |     -I DIRECTORY        Appends a directory to the header search path
 58 |     -D NAME[=VAL]       Pre-defines a macro, optionally with a value
 59 |     --include FILENAME  Prefixes the specified file to the input
 60 |     -P                  Don't put #line directives in output
 61 | 
 62 | 
 63 | Finally, a compiler driver called cc has been written to simplify the
 64 | use of the four build tools (cpp, ccx, as and ld).  
 65 | 
 66 |   Usage: cc [OPTIONS] FILES...
 67 | 
 68 |   Options:
 69 |     --help              Displays the help text
 70 |     -o FILENAME         Specifies the output file name
 71 |     -E                  Halt after preprocessing, generating .i files
 72 |     -S                  Halt after compiling, generating .s files
 73 |     -c                  Halt after assembling, generating .o files
 74 |     -I DIRECTORY        Appends a directory to the header search path
 75 |     -D NAME[=VAL]       Pre-defines a macro, optionally with a value
 76 |     --compatibility=N   Sets compatibility with the stage N tools
 77 |     --nostdlib          Do not link against crt0.o and libc.o
 78 |     --with-cpp=PROGRAM  Use the specified program as the preprocessor
 79 |     --with-ccx=PROGRAM  Use the specified program as the compiler
 80 |     --with-as=PROGRAM   Use the specified program as the assembler
 81 |     --with-ld=PROGRAM   Use the specified program as the linker
 82 | 
 83 | Input files are distinguished using their extensions.  A .c file as
 84 | assumed to be a C file that needs preprocessing; a .i file is assumed
 85 | not to require preprocessing; a .s file is assumed to be in assembly;
 86 | and a .o file is assumed to be an object file.
 87 | 
 88 | The compiler driver instructs the preprocessor to search the include/
 89 | directory and prepend include=include/rbc_init.h (which currently only
 90 | defines the version number in __RBC_INIT).  The __DATE__ and __TIME__
 91 | macros are also defined by the driver and passed to the preprocessor via
 92 | the command line.
 93 | 
 94 | TODO:
 95 |   - Errors on duplicate declarations at global scope
 96 |   - Tentative definitions
 97 |   - Prototypes
 98 |   - Unions, bit fields (probably not in this stage?), floats
 99 |   - n1062 #scopes?
100 |   - #pragma once?  Or #once and #forget per p0538r0?
101 |   - Use temporary file names in driver
102 | 


--------------------------------------------------------------------------------
/stage-5/cc.c:
--------------------------------------------------------------------------------
  1 | /* cc.c  --  the C compiler driver
  2 |  *  
  3 |  * Copyright (C) 2013, 2014 Richard Smith <richard@ex-parrot.com>
  4 |  * All rights reserved.
  5 |  */ 
  6 | 
  7 | /* The Makefile sticks --compatibility=4 on the command line.  Remove it. */
  8 | #pragma RBC compatibility 5 
  9 | 
 10 | #include <time.h>
 11 | #include "pvector.h"
 12 | 
 13 | static struct pvector *temps, *pp_args, *cc_args, *as_args, *ld_args;
 14 | static int last_stage = 4;  /* -E = 1, -S = -2, -c = -3 */
 15 | static char* o_name = 0;    /* The -o option, if any is given. */
 16 | static int nostdlib = 0;    /* --nostdlib */
 17 | 
 18 | static
 19 | usage() {
 20 |     cli_error("Usage: cc [-E | -S | -c] [-o output] [options] files...\n");
 21 | }
 22 | 
 23 | extern char* opt_arg();
 24 | 
 25 | parse_args(argc, argv)
 26 |     int argc;
 27 |     char **argv;
 28 | {
 29 |     int i = 1;
 30 | 
 31 |     while ( i < argc ) {
 32 |         char *arg = argv[i], *arg2;
 33 | 
 34 |         if ( arg2 = opt_arg( argv, argc, &i, "-I" ) ) {
 35 |             pvec_push( pp_args, "-I" );
 36 |             pvec_push( pp_args, arg2 );
 37 |         }
 38 | 
 39 |         else if ( arg2 = opt_arg( argv, argc, &i, "-D" ) ) {
 40 |             pvec_push( pp_args, "-D" );
 41 |             pvec_push( pp_args, arg2 );
 42 |         }
 43 | 
 44 |         else if ( strcmp( arg, "-E" ) == 0 ) {
 45 |             if ( last_stage != 4 )
 46 |                 cli_error("At most one of -E, -S and -c may be used\n");
 47 |             last_stage = 1; ++i;
 48 |         }
 49 | 
 50 |         else if ( strcmp( arg, "-S" ) == 0 ) {
 51 |             if ( last_stage != 4 )
 52 |                 cli_error("At most one of -E, -S and -c may be used\n");
 53 |             last_stage = 2; ++i;
 54 |         }
 55 | 
 56 |         else if ( strcmp( arg, "-c" ) == 0 ) {
 57 |             if ( last_stage != 4 )
 58 |                 cli_error("At most one of -E, -S and -c may be used\n");
 59 |             last_stage = 3; ++i;
 60 |         }
 61 | 
 62 |         else if ( arg2 = opt_arg( argv, argc, &i, "-o" ) ) {
 63 |             if ( o_name ) cli_error(
 64 |                 "Multiple output files specified: '%s' and '%s'\n",
 65 |                 o_name, arg2 );
 66 |             o_name = arg2;
 67 |         }
 68 | 
 69 |         else if ( arg2 = opt_arg( argv, argc, &i, "--compatibility" ) ) {
 70 |             pvec_push( cc_args, arg ); ++i;
 71 |         }
 72 | 
 73 |         else if ( strcmp( arg, "--nostdlib" ) == 0 ) {
 74 |             nostdlib = 1; ++i;
 75 |         }
 76 | 
 77 |         else if ( strcmp( arg, "--help" ) == 0 )
 78 |             usage();
 79 | 
 80 |         /* These options override the default backend programs */
 81 |         else if ( arg2 = opt_arg( argv, argc, &i, "--with-cpp" ) )
 82 |             pp_args->start[0] = arg2;
 83 |         else if ( arg2 = opt_arg( argv, argc, &i, "--with-ccx" ) )
 84 |             cc_args->start[0] = arg2;
 85 |         else if ( arg2 = opt_arg( argv, argc, &i, "--with-as" ) )
 86 |             as_args->start[0] = arg2;
 87 |         else if ( arg2 = opt_arg( argv, argc, &i, "--with-ld" ) )
 88 |             ld_args->start[0] = arg2;
 89 | 
 90 |         else if ( arg[0] == '-' )
 91 |             cli_error("Unknown option: %s\n", arg);
 92 | 
 93 |         else ++i;
 94 |    }
 95 | }
 96 | 
 97 | /* Invoke the command in ARGS.  Return 0 for success or 1 for failure. */
 98 | invoke(args) 
 99 |     struct pvector* args;
100 | {
101 |     int pid, status;
102 |     if ( ( pid = fork() ) == 0 )
103 |         execve( args->start[0], args->start, 0 );
104 |     else if ( pid == -1 ) {
105 |         extern stderr;
106 |         fprintf(stderr, "cc: Unable to invoke %s", args->start[0]);
107 |         exit(1);
108 |     }
109 | 
110 |     waitpid( pid, &status, 0 );
111 |     
112 |     /* WTERMSIG(status) || WEXITSTATUS(status) */
113 |     return (status & 0xff7f) ? 1 : 0;
114 | }
115 | 
116 | preprocess(argc, argv)
117 |     int argc;
118 |     char **argv;
119 | {
120 |     int i = 0, fail = 0;
121 |     extern char* strdup();
122 | 
123 |     while ( ++i < argc && !fail ) {
124 |         char *arg = argv[i];
125 |         int l = strlen(arg);
126 |         if ( arg[0] != '-' && l > 2 && arg[l-1] == 'c' && arg[l-2] == '.' ) {
127 |             char *oname;
128 |             if ( o_name && last_stage == 1 ) oname = strdup(o_name);
129 |             else { oname = strdup(arg); oname[l-1] = 'i'; }
130 | 
131 |             pvec_push( pp_args, "-o" );
132 |             pvec_push( pp_args, oname );
133 |             pvec_push( pp_args, arg );
134 |             fail = invoke( pp_args );
135 |             pvec_pop( pp_args );
136 |             pvec_pop( pp_args );
137 |             pvec_pop( pp_args );
138 | 
139 |             if ( last_stage != 1 ) pvec_push( temps, oname );
140 |             else free( oname );
141 |         }
142 |     } 
143 | 
144 |     return fail;
145 | }
146 | 
147 | compile(argc, argv)
148 |     int argc;
149 |     char **argv;
150 | {
151 |     int i = 0, fail = 0;
152 |     extern char* strdup();
153 | 
154 |     while ( ++i < argc && !fail ) {
155 |         char *arg = argv[i];
156 |         int l = strlen(arg);
157 |         if ( arg[0] != '-' && l > 2 && (arg[l-1] == 'c' || arg[l-1] == 'i') 
158 |                && arg[l-2] == '.' ) {
159 |             char *iname = strdup(arg);
160 |             iname[l-1] = 'i';
161 | 
162 |             if ( o_name && last_stage == 2 ) {
163 |                 pvec_push( cc_args, "-o" );
164 |                 pvec_push( cc_args, o_name );
165 |             }
166 | 
167 |             pvec_push( cc_args, iname );
168 |             fail = invoke( cc_args );
169 |             pvec_pop( cc_args );
170 | 
171 |             if ( o_name && last_stage == 2 ) {
172 |                 pvec_pop( cc_args );
173 |                 pvec_pop( cc_args );
174 |             }
175 |             else if ( last_stage != 2 ) {
176 |                 char *oname = strdup(arg);
177 |                 oname[l-1] = 's';
178 |                 pvec_push( temps, oname );
179 |             }
180 | 
181 |             free( iname );
182 |         }
183 |     } 
184 | 
185 |     return fail;
186 | }
187 | 
188 | assemble(argc, argv)
189 |     int argc;
190 |     char **argv;
191 | {
192 |     int i = 0, fail = 0;
193 |     extern char* strdup();
194 | 
195 |     while ( ++i < argc && !fail ) {
196 |         char *arg = argv[i];
197 |         int l = strlen(arg);
198 |         if ( arg[0] != '-' && l > 2 && (arg[l-1] == 'c' || arg[l-1] == 'i' 
199 |                || arg[l-1] == 's') && arg[l-2] == '.' ) {
200 |             char *iname = strdup(arg);
201 |             iname[l-1] = 's';
202 | 
203 |             if ( o_name && last_stage == 3 ) {
204 |                 /* The stage 3 assembler doesn't support -o, but we'll be 
205 |                  * replacing that soon. */
206 |                 pvec_push( as_args, "-o" );
207 |                 pvec_push( as_args, o_name );
208 |             }
209 | 
210 |             pvec_push( as_args, iname );
211 |             fail = invoke( as_args );
212 |             pvec_pop( as_args );
213 | 
214 |             if ( o_name && last_stage == 3 ) {
215 |                 pvec_pop( as_args );
216 |                 pvec_pop( as_args );
217 |             }
218 |             else if ( last_stage != 3 ) {
219 |                 char *oname = strdup(arg);
220 |                 oname[l-1] = 'o';
221 |                 pvec_push( temps, oname );
222 |             }
223 | 
224 |             free( iname );
225 |         }
226 |     } 
227 | 
228 |     return fail;
229 | }
230 | 
231 | link(argc, argv)
232 |     int argc;
233 |     char **argv;
234 | {
235 |     int i = 0, fail = 0;
236 |     struct pvector* free_list = pvec_new();
237 |     char** f;
238 |     extern char* strdup();
239 | 
240 |     if ( o_name ) {
241 |         pvec_push( ld_args, "-o" );
242 |         pvec_push( ld_args, o_name );
243 |     }
244 |     /* Ideally the linker would default to producing a.out, but we may still
245 |      * be using the stage-3 linker which is very primitive and does not. */
246 |     else {
247 |         pvec_push( ld_args, "-o" );
248 |         pvec_push( ld_args, "a.out" );
249 |     }
250 | 
251 |     if ( !nostdlib ) 
252 |         pvec_push( ld_args, "../lib/crt0.o" );
253 | 
254 |     while ( ++i < argc ) {
255 |         char *arg = argv[i];
256 |         int l = strlen(arg);
257 |         if ( arg[0] != '-' && l > 2 && (arg[l-1] == 'c' || arg[l-1] == 'i' 
258 |                || arg[l-1] == 's' || arg[l-1]== 'o') && arg[l-2] == '.' ) {
259 |             char *iname = strdup(arg);
260 |             iname[l-1] = 'o';
261 |             pvec_push( ld_args, iname );
262 |             pvec_push( free_list, iname );
263 |         }
264 |     } 
265 | 
266 |     if ( !nostdlib ) 
267 |         /* This should be -lc, but the stage 3 linker doesn't accept that. */
268 |         pvec_push( ld_args, "../lib/libc.o" );
269 | 
270 |     fail = invoke( ld_args );
271 |     for ( f = free_list->start; f != free_list->end; ++f ) free(*f);
272 |     return fail;
273 | }
274 | 
275 | static char *months[12] = { 
276 |     "Jan", "Feb", "Mar", "Apr", "May", "Jun", 
277 |     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
278 | };
279 | 
280 | main(argc, argv)
281 |     int argc;
282 |     char **argv;
283 | {
284 |     char** t;
285 |     int res;
286 |     time_t now = time(NULL);
287 |     struct tm *tm = gmtime(&now);
288 |     char buf1[32], buf2[32];
289 | 
290 |     temps = pvec_new();
291 |     pp_args = pvec_new(); pvec_push( pp_args, "../bin/cpp" );
292 |     cc_args = pvec_new(); pvec_push( cc_args, "../bin/ccx" ); 
293 |     as_args = pvec_new(); pvec_push( as_args, "../bin/as" ); 
294 |     ld_args = pvec_new(); pvec_push( ld_args, "../bin/ld" ); 
295 | 
296 |     /* The purpose of rbc_init.h is so that neither this compiler driver, 
297 |      * nor the preprocessor, need to be updated when the compiler is updated 
298 |      * to include new functionality. */
299 |     pvec_push( pp_args, "-I../include" );
300 |     pvec_push( pp_args, "--include=rbc_init.h" );
301 | 
302 |     /* Define these standard macros, as they need compiler support. 
303 |      * Other variables, such as __STDC__ probably belong in <rbc_init.h>. */ 
304 |     snprintf(buf1, 32, "-D__DATE__=\"%3s %2d %4d\"", 
305 |              months[tm->tm_mon], tm->tm_mday, tm->tm_year+1900);
306 |     snprintf(buf2, 32, "-D__TIME__=\"%02d:%02d:%02d\"", 
307 |              tm->tm_hour, tm->tm_min, tm->tm_sec);
308 |     pvec_push( pp_args, buf1 );
309 |     pvec_push( pp_args, buf2 );
310 | 
311 |     parse_args( argc, argv );
312 |     res = preprocess(argc, argv);
313 |     if (!res && last_stage > 1) res = compile(argc, argv);
314 |     if (!res && last_stage > 2) res = assemble(argc, argv);
315 |     if (!res && last_stage > 3) res = link(argc, argv);
316 | 
317 |     pvec_delete(pp_args);
318 |     pvec_delete(cc_args);
319 |     pvec_delete(ld_args);
320 |     pvec_delete(as_args);
321 | 
322 |     for ( t = temps->start; t != temps->end; ++t ) {
323 |         unlink(*t);
324 |         free(*t);        
325 |     }
326 |     pvec_delete(temps);
327 | 
328 |     return res;
329 | }
330 | 


--------------------------------------------------------------------------------
/stage-5/cli.c:
--------------------------------------------------------------------------------
 1 | /* cli.c  --  command line interface utils
 2 |  *
 3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | cli_error(fmt) 
 8 |     char *fmt;
 9 | {
10 |     extern stderr;
11 |     vfprintf(stderr, fmt, &fmt);
12 |     exit(1);
13 | }
14 | 
15 | char* opt_arg(argv, argc, argnptr, argname)
16 |     char **argv;
17 |     int argc, *argnptr;
18 |     char *argname;
19 | {
20 |     auto char *arg = argv[*argnptr];
21 |     auto int arglen = strlen(argname);
22 |     if ( strncmp( arg, argname, arglen ) == 0 ) {
23 |         if ( rchar( arg, arglen ) == 0 ) {
24 |             if ( ++*argnptr == argc )
25 |                 cli_error("The %s option takes an argument\n", argname);
26 |             arg = argv[*argnptr];
27 |             ++*argnptr;
28 |             return arg;
29 |         }
30 |         /* Short arguments (e.g. -X) do not have an '=' before their values. */
31 |         else if ( arglen == 2 ) {
32 |             arg += arglen;
33 |             ++*argnptr;
34 |             return arg;
35 |         }
36 |         /* Long arguments (e.g. --foo) need an '=' before their values. */
37 |         else if ( rchar( arg, arglen ) == '=' ) {
38 |             arg += arglen + 1;
39 |             ++*argnptr;
40 |             return arg;
41 |         }
42 |     }
43 |     return 0;
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/stage-5/cmp.c:
--------------------------------------------------------------------------------
 1 | /* cmp.c  --  an implementation of the POSIX cmp(1) utility
 2 |  *  
 3 |  * Copyright (C) 2015 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | /* The Makefile sticks --compatibility=4 on the command line.  Remove it. */
 8 | #pragma RBC compatibility 5 
 9 | 
10 | #include <stdio.h> 
11 | 
12 | usage() {
13 |     cli_error("Usage: cmp [-l|-s] file1 file2\n");
14 | }
15 | 
16 | main(argc, argv) 
17 |     int argc;
18 |     char **argv;
19 | {
20 |     char *na, *nb;
21 |     FILE *a, *b;
22 |     int opt_s = 0, opt_l = 0;
23 |     int i = 1, bytes = 0, lines = 1, status = 0;
24 | 
25 |     if (argc < 3) usage();
26 | 
27 |     /* The -s option suppressed output. */
28 |     if ( strcmp( argv[i], "-s" ) == 0 )
29 |         opt_s = 1, ++i;
30 |     /* The -l option prints a byte-by-byte comparison in a whacky format. */
31 |     else if ( strcmp( argv[i], "-l" ) == 0 )
32 |         opt_l = 1, ++i;
33 | 
34 |     na = argv[i++];
35 |     if ( strcmp( na, "-" ) ) {
36 |         a = fopen( na, "r" );
37 |         if (!a) cli_error( "cmp: unable to open file '%s'\n", na );
38 |     }
39 |     else a = stdin;
40 | 
41 |     nb = argv[i++];
42 |     if ( strcmp( nb, "-" ) ) {
43 |         b = fopen( nb, "r" );
44 |         if (!b) cli_error( "cmp: unable to open file '%s'\n", nb );
45 |     }
46 |     else if ( a == stdin ) 
47 |         cli_error( "cmp: cannot read standard input twice\n" );
48 |     else b = stdin;
49 | 
50 |     if (i != argc) usage();
51 | 
52 |     while (1) {
53 |         int ca = fgetc(a), cb = fgetc(b);
54 |         ++bytes;
55 | 
56 |         /* The format of these messages, and whether they go to stdout or 
57 |          * stderr, is prescribed by POSIX. */
58 |         if ( ca == EOF && cb == EOF ) 
59 |             break;
60 |         else if ( ca == EOF || cb == EOF ) {
61 |             if (!opt_s) {
62 |                 fflush(stdout);
63 |                 fprintf(stderr, "cmp: EOF on %s\n", ca == EOF ? na : nb);
64 |             }
65 |             status = 1; 
66 |             break;
67 |         }
68 |         else if ( ca != cb ) {
69 |             status = 1;
70 |             if (opt_l)
71 |                 printf( "%d %o %o\n", bytes, ca, cb );
72 |             else {
73 |                 if (!opt_s) 
74 |                     printf( "%s %s differ: char %d, line %d\n", 
75 |                             na, nb, bytes, lines );
76 |                 break;
77 |             }
78 |         }
79 |         else if ( ca == '\n' ) 
80 |             ++lines;
81 |     }
82 | 
83 |     return status;
84 | }
85 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/Makefile:
--------------------------------------------------------------------------------
 1 | # stage-5/cpp-tests/Makefile
 2 | 
 3 | # Copyright (C) 2015, 2016, 2018, 2020 Richard Smith <richard@ex-parrot.com>
 4 | # All rights reserved.
 5 | 
 6 | SHELL = /bin/sh
 7 | PATH  = ..
 8 | 
 9 | MAKE  = /usr/bin/make
10 | 
11 | all:	check
12 | 
13 | # We have a proper test suite for the preprocessor
14 | CPP_TESTS = empty nocpp obj builtin fn simple suppress directive hash \
15 |             rescan macros include includemacro
16 | 
17 | check:	$(CPP_TESTS:%=%.run)
18 | 
19 | # Suppress the default rules
20 | .SUFFIXES:
21 | 
22 | ../cpp ../cmp:
23 | 	$(MAKE) -C .. $(@:../%=%)
24 | 
25 | # The exit status of a pipeline is the status of the rightmost command.
26 | # That means that if the preprocessor exits with non-zero status (e.g.
27 | # because it aborts or segfaults) after it has written everything we 
28 | # expect, the error is ignored and the cmp succeeds.  In bash we'd fix
29 | # this with set -o pipefail, but that doesn't exist in more primitive
30 | # shells.  The 2>&1 ensures that the error message is diverted to the cmp
31 | # where it breaks the comparison forcing a test failure.
32 | %.run:	%.c %.i ../cpp ../cmp
33 | 	cpp -P $< 2>&1 | cmp - $(<:%.c=%.i)
34 | 
35 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/builtin.c:
--------------------------------------------------------------------------------
1 | /* This test checks that the two built-in macros work. */
2 | set_location(__FILE__, __LINE__);
3 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/builtin.i:
--------------------------------------------------------------------------------
1 | set_location ( "builtin.c" , 2 ) ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/directive.c:
--------------------------------------------------------------------------------
 1 | /* These are all null directives. */
 2 | #
 3 |  #
 4 |    #
 5 | 	\
 6 | #
 7 | /*  Foo # include */ #
 8 | 
 9 | /* Example from C99 6.10/4.
10 |  *
11 |  * "the sequence of preprocessing tokens on the second line is not a
12 |  * preprocessing directive, because it does not begin with a # at the
13 |  * start of translation phase 4, even though it will do so after the
14 |  * macro EMPTY has been replaced." */
15 | 
16 | #define EMPTY
17 | EMPTY # include <missing.h>
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/directive.i:
--------------------------------------------------------------------------------
1 | # include < missing . h >
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/empty.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ras52/bootstrap/9bea23e118d61a63a1fa6de70181706e5aca9f3a/stage-5/cpp-tests/empty.c


--------------------------------------------------------------------------------
/stage-5/cpp-tests/empty.i:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ras52/bootstrap/9bea23e118d61a63a1fa6de70181706e5aca9f3a/stage-5/cpp-tests/empty.i


--------------------------------------------------------------------------------
/stage-5/cpp-tests/fn.c:
--------------------------------------------------------------------------------
1 | /* This test checks that a very simple function-like macro works. */
2 | #define nop() 
3 | 
4 | nop()
5 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/fn.i:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ras52/bootstrap/9bea23e118d61a63a1fa6de70181706e5aca9f3a/stage-5/cpp-tests/fn.i


--------------------------------------------------------------------------------
/stage-5/cpp-tests/glue.c:
--------------------------------------------------------------------------------
 1 | /* Example from C99 6.10.3.5/6 */
 2 | #define str(s)      # s
 3 | #define xstr(s)     str(s)
 4 | #define debug(s, t) printf("x" # s "= %d, x" # t "= %s", \
 5 |                         x ## s, x ## t)
 6 | #define INCFILE(n)  vers ## n
 7 | #define glue(a, b)  a ## b
 8 | #define xglue(a, b) glue(a, b)
 9 | #define HIGHLOW     "hello"
10 | #define LOW         LOW ", world"
11 | 
12 | debug(1, 2);
13 | fputs(str(strncmp("abc\0d", "abc", '\4') // this goes away
14 |       == 0) str(: @\n), s);
15 | include xstr(INCFILE(2).h)
16 | glue(HIGH, LOW);
17 | xglue(HIGH, LOW)
18 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/hash.c:
--------------------------------------------------------------------------------
1 | /* Example from C99 6.10.3.3/4 */
2 | #define hash_hash # ## #
3 | #define mkstr(a) # a
4 | #define in_between(a) mkstr(a)
5 | #define join(c, d) in_between(c hash_hash d)
6 | char p[] = join(x, y); 
7 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/hash.i:
--------------------------------------------------------------------------------
1 | char p [ ] = "x ## y" ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/include.c:
--------------------------------------------------------------------------------
1 | #include "vers2.h"
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/include.i:
--------------------------------------------------------------------------------
1 | const int i = 42 ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/includemacro.c:
--------------------------------------------------------------------------------
 1 | /* Example from C99 6.10.2/8 */
 2 | #define VERSION 2
 3 | #if VERSION == 1
 4 | 	#define INCFILE "vers1.h"
 5 | #elif VERSION == 2
 6 | 	#define INCFILE "vers2.h"
 7 | #else
 8 | 	#define INCFILE "versN.h"
 9 | #endif
10 | #include INCFILE
11 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/includemacro.i:
--------------------------------------------------------------------------------
1 | const int i = 42 ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/macros.c:
--------------------------------------------------------------------------------
 1 | /* Example from C99 6.10.3.5/5 */
 2 | #define x       3
 3 | #define f(a)    f(x * (a))
 4 | #undef  x
 5 | #define x       2
 6 | #define g       f
 7 | #define z       z[0]
 8 | #define h       g(~
 9 | #define m(a)    a(w)
10 | #define w       0,1
11 | #define t(a)    a
12 | #define p()     int
13 | #define q(x)    x
14 | #define r(x,y)  x ## y
15 | #define str(x)  # x
16 | 
17 | f(y+1) 
18 |   + f(f(z)) % t(t(g)(0) + t)(1);
19 | g(x+(3,4)-w) | h 5) & m
20 |     (f)^m(m);
21 | p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
22 | char c[2][6] = { str(hello), str() };
23 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/macros.i:
--------------------------------------------------------------------------------
1 | f ( 2 * ( y + 1 ) ) + f ( 2 * ( f ( 2 * ( z [ 0 ] ) ) ) ) % f ( 2 * ( 0 ) ) + t ( 1 ) ;
2 | f ( 2 * ( 2 + ( 3 , 4 ) - 0 , 1 ) ) | f ( 2 * ( ~ 5 ) ) &
3 | f ( 2 * ( 0 , 1 ) ) ^ m ( 0 , 1 ) ;
4 | int i [ ] = { 1 , 23 , 4 , 5 , } ;
5 | char c [ 2 ] [ 6 ] = { "hello" , "" } ;
6 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/nocpp.c:
--------------------------------------------------------------------------------
1 | /* The purpose of this test it to check for memory leaks etc. when the
2 |  * preprocessor encounters identifiers that don't expand. */
3 | extern inf f();
4 | int i;
5 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/nocpp.i:
--------------------------------------------------------------------------------
1 | extern inf f ( ) ;
2 | int i ;
3 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/obj.c:
--------------------------------------------------------------------------------
1 | /* This test checks that a very simple object-like macro works. */
2 | #define NULL 0
3 | 
4 | NULL
5 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/obj.i:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/rescan.c:
--------------------------------------------------------------------------------
1 | /* This is a slightly non-trivial test of rescanning the result of macro
2 |  * expansion.  For a long time this case was causing a memory leak. */
3 | #define f(a)    (0+a)
4 | #define g       f
5 | #define t(a)    a
6 | 
7 | t(g);
8 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/rescan.i:
--------------------------------------------------------------------------------
1 | f ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/simple.c:
--------------------------------------------------------------------------------
1 | #if 1
2 | #define INT int
3 | #define DECLARE(type, name) type name;
4 | #endif
5 | 
6 | DECLARE(INT, i)
7 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/simple.i:
--------------------------------------------------------------------------------
1 | int i ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/suppress.c:
--------------------------------------------------------------------------------
1 | /* This test checks that macros are not expanded if not followed by an
2 |  * open parenthesis. */
3 | #define f() 1234
4 | 
5 | (f)();
6 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/suppress.i:
--------------------------------------------------------------------------------
1 | ( f ) ( ) ;
2 | 


--------------------------------------------------------------------------------
/stage-5/cpp-tests/vers2.h:
--------------------------------------------------------------------------------
1 | /* Used by include.c */
2 | const int i = 42;
3 | 


--------------------------------------------------------------------------------
/stage-5/cpptype.c:
--------------------------------------------------------------------------------
  1 | /* cpptype.c  --  stub code to avoid the preprocessor knowing about types
  2 |  *
  3 |  * Copyright (C) 2014 Richard Smith <richard@ex-parrot.com>
  4 |  * All rights reserved.
  5 |  */
  6 | 
  7 | is_typedef( name ) {
  8 |     return 0;
  9 | }
 10 | 
 11 | lookup_type( node ) {
 12 |     return implct_int();
 13 | }
 14 | 
 15 | is_declared( name ) {
 16 |     /* For the purpose of preprocessor expression evaluation, all names 
 17 |      * can be used undeclared (c.f. C90 6.8.1, which says they're equal to 0).
 18 |      * This effectively means all identifiers are declared. */
 19 |     return 1;
 20 | }
 21 | 
 22 | chk_subscr( node ) {
 23 |     /* The normal logic for disallowing [] in a integral constant expression 
 24 |      * is based on the impossibility of their being arguments of any type
 25 |      * for which it would be valid.  This logic is in the C version of 
 26 |      * chk_subscr().  In the preprocessor, we give a blanket rejection. */
 27 |     error("Subscript operator not permitted in constant expression");
 28 | }
 29 | 
 30 | chk_member( node ) {
 31 |     /* As per chk_subscr(), the logic for C is rather round-about. */
 32 |     error("Member access not permitted in constant expression");
 33 | }
 34 | 
 35 | chk_addr( node ) {
 36 |     /* As per chk_subscr(), the logic for C is rather round-about. */
 37 |     error("Taking addresses not permitted in constant expression");
 38 | }
 39 | 
 40 | chk_deref( node ) {
 41 |     /* As per chk_subscr(), the logic for C is rather round-about. */
 42 |     error("Dereferencing not permitted in constant expression");
 43 | }
 44 | 
 45 | chk_arg( node ) {
 46 |     error("Function calls not permitted in constant expression");
 47 | }
 48 | 
 49 | /* Operations that are already disallowed by virtue of being in a ICE */
 50 | chk_incdec( node ) {}
 51 | chk_call( node ) {}
 52 | chk_assign( node ) {}
 53 | chk_comma( node ) {}
 54 | 
 55 | /* Operations that are always valid because the only types are int */
 56 | chk_mult( node ) {}
 57 | chk_add( node ) {}
 58 | chk_shift( node ) {}
 59 | chk_cmp( node ) {}
 60 | chk_bitop( node ) {}
 61 | chk_int( node ) {}
 62 | 
 63 | type_name( node ) {
 64 |     /* This gets called in sizeof expessions and in casts.  The preprocessor
 65 |      * never sees a sizeof expression because it does not recognise keywords
 66 |      * and therefore sees sizeof(foo) as a function call.  Nor does the
 67 |      * preprocessor ever see a cast because, for similar reasons, it never
 68 |      * recognises anything as a decl spec.  Thus, in (int)foo, the (int)
 69 |      * is treated as a primary expression with int evaluation to 0, and
 70 |      * the 'foo' causes a parser error. */
 71 |     int_error("Unexpected type name in preprocessor");
 72 | }
 73 | 
 74 | prom_type( type ) {
 75 |     return type;
 76 | }
 77 | 
 78 | /* Static types */
 79 | static s_int = 0;
 80 | 
 81 | init_stypes() {
 82 |     s_int = new_node('dclt', 3);
 83 |     set_op( s_int, 0, new_node('int', 0) );
 84 | }
 85 | 
 86 | implct_int() {
 87 |     return s_int;
 88 | }
 89 | 
 90 | fini_stypes() {
 91 |     free_node(s_int);
 92 | }
 93 | 
 94 | size_t_type() {
 95 |     int_error("Use of size_t in preprocessor");
 96 | }
 97 | 
 98 | type_size() {
 99 |     int_error("Size of type required in preprocessor");
100 | }
101 | 
102 | is_dclspec() {
103 |     return 0;
104 | }
105 | 


--------------------------------------------------------------------------------
/stage-5/eval.c:
--------------------------------------------------------------------------------
 1 | /* eval.c  --  evaluate integral constant expressions 
 2 |  *
 3 |  * Copyright (C) 2014 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | /* If only we had a preprocessor to avoid these duplications ... :-) */
 8 | struct node {
 9 |     int code;           /* character code for the node, e.g. '+' or 'if'. */
10 |     int arity;          /* the number of nodes in the ops[] array. */
11 |     struct node* type;  /* Always NULL in the preprocessor. */
12 |     struct node* ops[4];
13 | };
14 | 
15 | struct node *add_ref();
16 | 
17 | static
18 | struct node *
19 | new_num(val) {
20 |     struct node *n = new_node('num', 0);
21 |     n->type = add_ref( implct_int() );
22 |     n->ops[0] = (struct node*) val;
23 |     return n;
24 | }
25 | 
26 | struct node *
27 | eval(n) 
28 |     struct node *n;
29 | {
30 |     int c = n->code, a = n->arity;
31 |     /* The following are not supported: 
32 |      *   id str unary-* unary-& ++ -- size () [] -> . (cast) , = @=
33 |      */
34 | 
35 |     if ( a == 0 ) {
36 |         if ( c == 'num' ) return add_ref(n);
37 |         else if ( c == 'chr' ) return new_num( parse_chr( node_str(n) ) );
38 |         else int_error("Unable to evaluate literal '%Mc'", c);
39 |     }
40 |     
41 |     else if ( a == 1 ) {
42 |         struct node *arg = eval( n->ops[0] ), *val;
43 |         if ( c == '+' ) return arg;
44 |         else if ( c == '-' ) val = new_num( -node_ival(arg) );
45 |         else if ( c == '~' ) val = new_num( ~node_ival(arg) );
46 |         else if ( c == '!' ) val = new_num( !node_ival(arg) );
47 |         else int_error("Unable to evaluate unary '%Mc'", c);
48 |         free_node(arg);
49 |         return val;
50 |     }
51 | 
52 |     else if ( a == 2 ) {
53 |         struct node *lhs = eval( n->ops[0] ), *rhs = eval( n->ops[1] ), *val;
54 |         if ( c == ',' ) val = add_ref(rhs);
55 |         else if ( c == '*'  ) val = new_num( node_ival(lhs) *  node_ival(rhs) );
56 |         else if ( c == '/'  ) val = new_num( node_ival(lhs) /  node_ival(rhs) );
57 |         else if ( c == '%'  ) val = new_num( node_ival(lhs) %  node_ival(rhs) );
58 |         else if ( c == '+'  ) val = new_num( node_ival(lhs) +  node_ival(rhs) );
59 |         else if ( c == '-'  ) val = new_num( node_ival(lhs) -  node_ival(rhs) );
60 |         else if ( c == '<<' ) val = new_num( node_ival(lhs) << node_ival(rhs) );
61 |         else if ( c == '>>' ) val = new_num( node_ival(lhs) >> node_ival(rhs) );
62 |         else if ( c == '<'  ) val = new_num( node_ival(lhs) <  node_ival(rhs) );
63 |         else if ( c == '>'  ) val = new_num( node_ival(lhs) >  node_ival(rhs) );
64 |         else if ( c == '<=' ) val = new_num( node_ival(lhs) <= node_ival(rhs) );
65 |         else if ( c == '>=' ) val = new_num( node_ival(lhs) >= node_ival(rhs) );
66 |         else if ( c == '==' ) val = new_num( node_ival(lhs) == node_ival(rhs) );
67 |         else if ( c == '!=' ) val = new_num( node_ival(lhs) != node_ival(rhs) );
68 |         else if ( c == '&'  ) val = new_num( node_ival(lhs) &  node_ival(rhs) );
69 |         else if ( c == '^'  ) val = new_num( node_ival(lhs) ^  node_ival(rhs) );
70 |         else if ( c == '|'  ) val = new_num( node_ival(lhs) |  node_ival(rhs) );
71 |         /* We don't care about whether these short circuit as it is not 
72 |          * possible to detect that. */
73 |         else if ( c == '&&' ) val = new_num( node_ival(lhs) && node_ival(rhs) );
74 |         else if ( c == '||' ) val = new_num( node_ival(lhs) || node_ival(rhs) );
75 |         else int_error("Unable to evaluate binry '%Mc'", c);
76 |         free_node(rhs); free_node(lhs);
77 |         return val;
78 |     }
79 | 
80 |     else if ( c == '?:' ) {
81 |         struct node *cond = eval( n->ops[0] );
82 |         int val = node_ival(cond);
83 |         free_node(cond);
84 |         return eval( n->ops[ val ? 1 : 2 ] );
85 |     }
86 | 
87 |     else int_error("Unable to evaluate unknown operator '%Mc'", c);
88 | }
89 | 


--------------------------------------------------------------------------------
/stage-5/i386.c:
--------------------------------------------------------------------------------
  1 | /* i386.c  --  i386 specific code
  2 |  *
  3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
  4 |  * All rights reserved.
  5 |  */
  6 | 
  7 | static
  8 | sz_suffix(sz) {
  9 |     extern compat_flag;
 10 |     if (sz == 4) return 'L';
 11 |     else if (sz == 2) return 'W';
 12 |     else if (sz == 1) return 'B';
 13 |     int_error("Unexpected size operand to instruction: %d", sz);
 14 | }
 15 | 
 16 | static
 17 | sz_accum(sz) {
 18 |     if (sz == 4) return "%eax";
 19 |     else if (sz == 2) return "%ax";
 20 |     else if (sz == 1) return "%al";
 21 |     int_error("Unexpected size register requested: %d", sz);
 22 | }
 23 | 
 24 | static
 25 | sz_aux_reg(sz) {
 26 |     if (sz == 4) return "%ecx";
 27 |     else if (sz == 2) return "%cx";
 28 |     else if (sz == 1) return "%cl";
 29 |     int_error("Unexpected size register requested: %d", sz);
 30 | }
 31 | 
 32 | static
 33 | acc_to_aux(stream) {
 34 |     fputs("\tMOVL\t%eax, %ecx\n", stream);
 35 | }
 36 | 
 37 | load_num(stream, num, is_unsgn) {
 38 |     /* We want to print big unsigned numbers in hex to avoid an error from
 39 |      * the stage-3 assembler which detects signed overflow on decimals. */
 40 |     fprintf(stream, is_unsgn && num & 0x80000000 
 41 |                       ? "\tMOVL\t$0x%X, %%eax\n" : "\tMOVL\t$%d, %%eax\n", num);
 42 | }
 43 | 
 44 | load_chr(stream, chr) {
 45 |     fprintf(stream, "\tMOVL\t$%s, %%eax\n", chr);
 46 | }
 47 | 
 48 | defn_str(stream, str, clabel) {
 49 |     fprintf(stream, ".data\n.LC%d:\n\t.string %s\n.text\n", clabel, str);
 50 | }
 51 | 
 52 | load_str(stream, clabel) {
 53 |     fprintf(stream, "\tMOVL\t$.LC%d, %%eax\n", clabel);
 54 | }
 55 | 
 56 | load_local(stream, offset, need_addr) {
 57 |     fprintf(stream, "\t%s\t%d(%%ebp), %%eax\n", 
 58 |             need_addr ? "LEA" : "MOVL", offset);
 59 | }
 60 | 
 61 | load_symbol(stream, name, need_addr) {
 62 |     fprintf(stream, "\tMOVL\t%s%s, %%eax\n", need_addr ? "$" : "", name);
 63 | }
 64 | 
 65 | save_local(stream, offset, sz) {
 66 |     fprintf(stream, "\tMOV%c\t%s, %d(%%ebp)\n", 
 67 |             sz_suffix(sz), sz_accum(sz), offset);
 68 | }
 69 | 
 70 | asm_push(stream) {
 71 |     fputs("\tPUSHL\t%eax\n", stream);
 72 | }
 73 | 
 74 | arith_neg(stream) {
 75 |     fputs("\tNEGL\t%eax\n", stream);
 76 | }
 77 | 
 78 | bit_not(stream) {
 79 |     fputs("\tNOTL\t%eax\n", stream);
 80 | }
 81 | 
 82 | logic_not(stream, sz) {
 83 |     auto accum = sz_accum(sz);
 84 |     fprintf(stream, "\tTEST%c\t%s, %s\n", sz_suffix(sz), accum, accum);
 85 |     fputs("\tSETZ\t%al\n\tMOVZBL\t%al, %eax\n", stream);
 86 | }
 87 | 
 88 | dereference(stream, sz, need_lval) {
 89 |     if (!need_lval) 
 90 |         fprintf(stream, "\tMOV%c\t(%%eax), %s\n", sz_suffix(sz),
 91 |                 sz_accum(sz));
 92 | }
 93 | 
 94 | static
 95 | do_inc(stream, reg, n) {
 96 |     if (n == 0)
 97 |         ;
 98 |     else if (n == 1 || n == -1)
 99 |         fprintf(stream, "\t%s%c\t(%s)\n", n == 1 ? "INC" : "DEC", 'L', reg);
100 |     else
101 |         fprintf(stream, "\t%s%c\t$%d, (%s)\n", 
102 |                 n > 0 ? "ADD" : "SUB", 'L', abs(n), reg);
103 | }
104 | 
105 | increment(stream, sz, n) {
106 |     do_inc(stream, "%eax", n);
107 |     dereference(stream, sz, 0);
108 | }
109 | 
110 | postfix_inc(stream, sz, n) {
111 |     acc_to_aux(stream);
112 |     dereference(stream, sz, 0);
113 |     do_inc(stream, "%ecx", n);
114 | }
115 | 
116 | static
117 | start_binop(stream, is_assign, is_sym) {
118 |     if (is_sym && !is_assign) fputs("\tPOPL\t%ecx\n", stream);
119 |     else {
120 |         acc_to_aux(stream);
121 |         fputs("\tPOPL\t%eax\n", stream);
122 |     }
123 | }
124 | 
125 | pop_mult(stream, is_assign, is_unsgn) {
126 |     fputs("\tPOPL\t%ecx\n", stream);
127 |     fprintf(stream, "\t%s%c\t%s\n", is_unsgn ? "MUL" : "IMUL", 'L',
128 |             is_assign ? "(%ecx)" : "%ecx");
129 |     if (is_assign)
130 |         fputs("\tMOVL\t%eax, (%ecx)\n\tMOVL\t%ecx, %eax\n", stream);
131 | }
132 | 
133 | /* Note that this *only* POPs the first argument if !is_assign. 
134 |  * Otherwise doing so is the caller's responsibility. */
135 | static
136 | common_div(stream, is_assign, is_unsgn) {
137 |     acc_to_aux(stream);
138 |     if (is_assign)
139 |         fputs("\tMOVL\t(%esp), %eax\nMOVL\t(%eax), %eax\n", stream);
140 |     else
141 |         fputs("\tPOPL\t%eax\n", stream);
142 |     fputs("\tXORL\t%edx, %edx\n", stream);
143 |     fprintf(stream, "\t%s%c\t%%ecx\n", is_unsgn ? "DIV" : "IDIV", 'L');
144 | }
145 | 
146 | pop_div(stream, is_assign, is_unsgn) {
147 |     common_div(stream, is_assign, is_unsgn);
148 |     if (is_assign)
149 |         fputs("\tPOPL\t%ecx\nMOVL\t%eax, (%ecx)\nMOVL\t%ecx, %eax\n", stream);
150 | }
151 | 
152 | pop_mod(stream, is_assign, is_unsgn) {
153 |     common_div(stream, is_assign, is_unsgn);
154 |     if (is_assign)
155 |         fputs("\tPOPL\t%eax\nMOVL\t%edx, (%eax)\n", stream);
156 |     else
157 |         fputs("\tMOVL\t%edx, %eax\n", stream);
158 | }
159 | 
160 | static
161 | pop_shift(stream, mnemonic, is_assign) {
162 |     start_binop(stream, is_assign, 0);
163 |     fprintf(stream, "\t%s\t%s\n", mnemonic, is_assign ? "(%eax)" : "%eax");
164 | }
165 | 
166 | pop_lshift(stream, is_assign) { pop_shift(stream, "SALL", is_assign); }
167 | pop_rshift(stream, is_assign) { pop_shift(stream, "SARL", is_assign); }
168 | 
169 | static
170 | pop_rel(stream, sz, cond) {
171 |     fprintf( stream, "\tPOPL\t%%ecx\n\tCMP%c\t%s, %s\n", 
172 |              sz_suffix(sz), sz_accum(sz), sz_aux_reg(sz) );
173 |     fprintf( stream, "\tSET%s\t%%al\n\tMOVZBL\t%%al, %%eax\n", cond );
174 | }
175 | 
176 | pop_gt(stream, sz, is_unsgn) { pop_rel(stream, sz, is_unsgn ? "A"  : "G" ); }
177 | pop_lt(stream, sz, is_unsgn) { pop_rel(stream, sz, is_unsgn ? "B"  : "L" ); }
178 | pop_ge(stream, sz, is_unsgn) { pop_rel(stream, sz, is_unsgn ? "AE" : "GE"); }
179 | pop_le(stream, sz, is_unsgn) { pop_rel(stream, sz, is_unsgn ? "BE" : "LE"); }
180 | pop_eq(stream, sz) { pop_rel(stream, sz, "E");  }
181 | pop_ne(stream, sz) { pop_rel(stream, sz, "NE"); }
182 | 
183 | static
184 | pop_binop(stream, mnemonic, is_assign, is_sym, sz) {
185 |     start_binop(stream, is_assign, is_sym);
186 |     fprintf(stream, "\t%s%c\t%s, %s\n", mnemonic, sz_suffix(sz), 
187 |             sz_aux_reg(sz), is_assign ? "(%eax)" : sz_accum(sz) );
188 | }
189 | 
190 | pop_add(stream, is_assign, sz) {
191 |     pop_binop(stream, "ADD", is_assign, 1, sz);
192 | }
193 | pop_sub(stream, is_assign, sz) {
194 |     pop_binop(stream, "SUB", is_assign, 0, sz);
195 | }
196 | pop_bitand(stream, is_assign, sz) {
197 |     pop_binop(stream, "AND", is_assign, 1, sz);
198 | }
199 | pop_bitor(stream, is_assign, sz) {
200 |     pop_binop(stream, "OR",  is_assign, 1, sz);
201 | }
202 | pop_bitxor(stream, is_assign, sz) {
203 |     pop_binop(stream, "XOR", is_assign, 1, sz);
204 | }
205 | 
206 | static
207 | ilog2(i) {
208 |     auto l = 0;
209 |     while (i >>= 1) ++l;
210 |     return l;
211 | }
212 | 
213 | mem_access(stream, offset, need_addr) {
214 |     fprintf(stream, "\t%s\t%d(%%eax), %%eax\n", 
215 |             need_addr ? "LEA" : "MOVL", offset);
216 | }
217 | 
218 | scale_elt(stream, elt_size, dir) {
219 |     if ( elt_size == 1 )
220 |         ;
221 |     else if ( (elt_size & (elt_size-1)) == 0 )
222 |         /* If the size is a power of two, then use SHLL for speed */
223 |         fprintf(stream, "\tMOVB\t$%d, %%cl\n\t%sL\t%%eax\n", ilog2(elt_size),
224 |                 dir > 0 ? "SHL" : "SHR");
225 |     else {
226 |         if (dir < 0) fputs("\tXORL\t%edx, %edx\n", stream);
227 |         fprintf(stream, "\tMOVL\t$%d, %%ecx\n\t%sL\t%%ecx\n", elt_size,
228 |                 dir > 0 ? "MUL" : "DIV");
229 |     }
230 | }
231 | 
232 | pop_assign(stream, sz) {
233 |     start_binop(stream, 1, 0);
234 |     fprintf(stream, "\tMOV%c\t%s, (%%eax)\n", 
235 |             sz_suffix(sz), sz_aux_reg(sz));
236 | }
237 | 
238 | load_zero(stream) {
239 |     fputs("\tXORL\t%eax, %eax\n", stream);
240 | }
241 | 
242 | alloc_stack(stream, sz) {
243 |     if (sz)
244 |         fprintf(stream, "\tSUBL\t$%d, %%esp\n", sz);
245 | }
246 | 
247 | clear_stack(stream, sz) {
248 |     if (sz)
249 |         fprintf(stream, "\tADDL\t$%d, %%esp\n", sz);
250 | }
251 | 
252 | asm_call(stream, fn_name, cleanup_sz) {
253 |     fprintf(stream, "\tCALL\t%s\n", fn_name);
254 |     clear_stack( stream, cleanup_sz );
255 | }
256 | 
257 | call_ptr(stream, cleanup_sz) {
258 |     fputs("\tCALL\t*%eax\n", stream);
259 |     clear_stack( stream, cleanup_sz );
260 | }
261 | 
262 | static
263 | cond_branch(stream, sz, mnemonic, label_num) {
264 |     auto acc = sz_accum(sz);
265 |     fprintf(stream, "\tTEST%c\t%s, %s\n\t%s\t.L%d\n", 
266 |             sz_suffix(sz), acc, acc, mnemonic, label_num);
267 | }
268 | 
269 | branch_ifz(stream, sz, label_num) {
270 |     cond_branch(stream, sz, "JZ", label_num);
271 | }
272 | 
273 | branch_ifnz(stream, sz, label_num) {
274 |     cond_branch(stream, sz, "JNZ", label_num);
275 | }
276 | 
277 | branch(stream, label_num) {
278 |     fprintf(stream, "\tJMP\t.L%d\n", label_num);
279 | }
280 | 
281 | branch_eq_n(stream, n, label_num) {
282 |     fprintf(stream, "\tCMPL\t$%d, %%eax\n\tJE\t.L%d\n", n, label_num);
283 | }
284 | 
285 | emit_label(stream, label_num) {
286 |     fprintf(stream, ".L%d:\n", label_num);
287 | }
288 | 
289 | cast_bool(stream) {
290 |     fputs("\tTESTL\t%eax, %eax\n\tSETNZ\t%al\n\tMOVZBL\t%al, %eax\n", stream);
291 | }
292 | 
293 | globl_decl(stream, name) {
294 |     fprintf(stream, ".globl\t%s\n", name);
295 | }
296 | 
297 | local_decl(stream, name) {
298 |     fprintf(stream, ".local\t%s\n", name);
299 | }
300 | 
301 | prolog(stream, name, frame_sz) {
302 |     fprintf(stream, ".text\n%s:\n\tPUSHL\t%%ebp\n\tMOVL\t%%esp, %%ebp\n", name);
303 |     alloc_stack(stream, frame_sz);
304 | }
305 | 
306 | epilog(stream, frame_sz) {
307 |     clear_stack(stream, frame_sz);
308 |     fputs("\tLEAVE\n\tRET\n\n", stream);
309 | }
310 | 
311 | data_decl(stream, name) {
312 |     fprintf(stream, ".data\n%s:\n", name);
313 | }
314 | 
315 | int_decl_n(stream, num) {
316 |     fprintf(stream, "\t.int\t%d\n", num);
317 | }
318 | 
319 | int_decl_s(stream, str) {
320 |     fprintf(stream, "\t.int\t%s\n", str);
321 | }
322 | 
323 | int_decl_lc(stream, clabel) {
324 |     fprintf(stream, "\t.int\t.LC%d\n", clabel);
325 | }
326 | 
327 | zero_direct(stream, n) {
328 |     fprintf(stream, "\t.zero\t%d\n", n);
329 | }
330 | 
331 | promote(stream, is_unsgn, oldsz, newsz) {
332 |     if ( oldsz < newsz )
333 |         fprintf( stream, "\tMOV%c%c%c\t%s, %s\n", is_unsgn ? 'Z' : 'S',
334 |                  sz_suffix(oldsz), sz_suffix(newsz), 
335 |                  sz_accum(oldsz), sz_accum(newsz) );
336 | }
337 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/eof.h:
--------------------------------------------------------------------------------
 1 | /* <bits/eof.h>  --  the EOF value for <stdio.h>
 2 |  *
 3 |  * Copyright (C) 2005, 2013 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_EOF_INCLUDED
 8 | #define __RBC_BITS_EOF_INCLUDED
 9 | 
10 | /* C90 7.9.1  The definition of the EOF macro.
11 |  * 
12 |  * EOF "expands to a negative integral constant expression that is returned
13 |  * by several functions to indicate end-of-file, that is, no more input from
14 |  * a stream." 
15 |  */ 
16 | #define EOF (-1)
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/file.h:
--------------------------------------------------------------------------------
 1 | /* <bits/file.h>  --  define FILE
 2 |  *  
 3 |  * Copyright (C) 2005, 2015 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef __RBC_BITS_FILE_INCLUDED
 8 | #define __RBC_BITS_FILE_INCLUDED
 9 | 
10 | /* C90 7.9.1  The definition of the FILE type.
11 |  *
12 |  * It can remain an incomplete type. */
13 | typedef struct __stdio_file_t FILE;
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/file_access.h:
--------------------------------------------------------------------------------
 1 | /* <bits/file_access.h>  --  file access functions for <stdio.h>
 2 |  *
 3 |  * Copyright (C) 2005, 2013, 2015 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_FILE_ACCESS_INCLUDED
 8 | #define __RBC_BITS_FILE_ACCESS_INCLUDED
 9 | 
10 | #include <bits/file.h>
11 | #include <bits/size_t.h>
12 | 
13 | /* These macros match values are hardcoded into stage-4/output.c. */
14 | 
15 | /* Values suitable for the mode argument to setvbuf */
16 | #define _IOFBF  1  /* Fully buffered */
17 | #define _IOLBF  2  /* Line buffered */
18 | #define _IONBF  3  /* Unbuffered */
19 | 
20 | /* The size buffer required as the buf argument to setbuf. 
21 |  * Note: This is not (necessarily) the default buffer size. */
22 | #define BUFSIZ  4096
23 | 
24 | #if 0
25 | /* C90 7.9.5.1:  The fclose function */
26 | int fclose( FILE* stream );
27 | 
28 | /* C90 7.9.5.2:  The fflush function */
29 | int fflush( FILE* stream );
30 | 
31 | /* C90 7.9.5.3:  The fopen function */
32 | FILE *fopen( char const* filename, char const* mode );
33 | 
34 | /* C90 7.9.5.4:  The freopen function */
35 | FILE *freopen( char const *filename, char const *mode, FILE *stream ); 
36 | 
37 | /* C90 7.9.5.5:  The setbuf function */
38 | void setbuf( FILE *stream, char *buf );
39 | 
40 | /* C90 7.9.5.6:  The setvbuf function */
41 | int setvbuf( FILE *stream, char *buf, int mode, size_t size );
42 | 
43 | #else
44 | 
45 | /* Temporary versions while we don't support prototypes. */
46 | FILE *freopen();
47 | FILE *fopen();
48 | 
49 | #endif
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/null.h:
--------------------------------------------------------------------------------
 1 | /* <bits/null.h>  --  the NULL macro is defined by multiple headers
 2 |  *
 3 |  * Copyright (C) 2005, 2013 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_NULL_INCLUDED
 8 | #define __RBC_BITS_NULL_INCLUDED
 9 | 
10 | /* C90 7.1.6 Common definitions <stddef.h> (part implementation)
11 |  *
12 |  * "An integral constant expression with the value 0, or such an 
13 |  * expression cast to type void *, is called a null pointer constant."
14 |  * [C90 6.2.2.3]
15 |  *
16 |  * NULL "expands to an implementation-defined null pointer
17 |  * constant." [C90 7.1.6]
18 |  *
19 |  * It is also defined in <stdio.h>, see C90 7.9.1
20 |  */
21 | 
22 | /* FIXME  There is no void type in stage 5. */
23 | #define NULL 0
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/size_t.h:
--------------------------------------------------------------------------------
 1 | /* <bits/size_t.h>  --  the size_t type is defined by multiple headers
 2 |  *
 3 |  * Copyright (C) 2005, 2013 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_SIZE_T_INCLUDED
 8 | #define __RBC_BITS_SIZE_T_INCLUDED
 9 | 
10 | /* C90 7.1.6 Common definitions <stddef.h> (part implementation)
11 |  *
12 |  * "size_t is the unsigned integral type of the result of the sizeof 
13 |  * operator." [C90 7.1.6]
14 |  *
15 |  * It is also defined in <stdio.h>, see C90 7.9.1
16 |  */
17 | typedef unsigned int size_t;
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/std_streams.h:
--------------------------------------------------------------------------------
 1 | /* <bits/std_streams.h>  --  define stdin, stdout & stderr
 2 |  *  
 3 |  * Copyright (C) 2005, 2015 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef __RBC_BITS_STD_STREAMS_INCLUDED
 8 | #define __RBC_BITS_STD_STREAMS_INCLUDED
 9 | 
10 | #include <bits/file.h>
11 | 
12 | /* C90 7.9.1  The definition of the standard stream macros.
13 |  *
14 |  * They "are expressions of type "pointed to FILE" that point to the FILE 
15 |  * objects associated, respectively, with standard error, input, and output 
16 |  * stream."  But we want ELF objects of the same name. 
17 |  */
18 | extern FILE *stdin, *stdout, *stderr;
19 | 
20 | #define stdin  stdin
21 | #define stdout stdout
22 | #define stderr stderr
23 | 
24 | #endif 
25 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/string.h:
--------------------------------------------------------------------------------
 1 | /* <bits/struct_tm.h>  --  string manipulation functions
 2 |  *
 3 |  * Copyright (C) 2005, 2021 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_STRING_INCLUDED
 8 | #define __RBC_BITS_STRING_INCLUDED
 9 | 
10 | #if 0
11 | /* C90 7.11.2:  Copying functions */
12 | void*  memcpy( void* dest, void const* src, size_t n );
13 | void*  memmove( void* deset, void const* src, size_t n );
14 | int    strcpy( char* dest, char const* str );
15 | int    strncpy( char* dest, char const* str, size_t n );
16 | 
17 | /* C90 7.11.3:  Concatenation functions */
18 | char*  strcat( char* dest, char const* src )
19 | char*  strncat( char* dest, char const* src, size_t n );
20 | 
21 | /* C90 7.11.4:  Comparison functions */
22 | /* int    memcmp( void const* s1, void const* s2, size_t n ); */
23 | int    strcmp( char const* s1, char const* s2 );
24 | /* int    strcoll( char const* s1, char const* s2 ); */
25 | int    strncmp( char const* s1, char const* s2, size_t n );
26 | /* int    strxfrm( char const* s1, char const* s2, size_t n ); */
27 | 
28 | /* C90 7.11.5:  Search functions */
29 | /* void*  memchr( void const* s, int c, size_t n ); */
30 | char*  strchr( char const* s, int c );
31 | size_t strcspn( char const* str, char const* chrs );
32 | /* char*  strpbrk( char const* str, char const* chrs ); */
33 | /* char*  strrchr( char const* str, int c ); */
34 | size_t strspn( char const* str, char const* chrs );
35 | /* size_t strstr( char const* str, char const* substr ); */
36 | /* size_t strtok( char const* str, char const* chrs ); */
37 | 
38 | /* C90 7.11.6:  Miscellaneous functions */
39 | void*  memset( void* s, int c, size_t n );
40 | /* char*  strerror( int errnum ); */
41 | size_t strlen( char const* s );
42 | 
43 | /* POSIX.1-2001 extensions */
44 | char *strdup( const char *str );
45 | /* TODO: memccpy, strtok_r */
46 | 
47 | /* POSIX.1-2008 extensions */
48 | /* TODO: stpcpy, stpncpy, strndup, strsignal */
49 | size_t strlen( char const* s, size_t n )
50 | 
51 | /* Random BSD extension */
52 | char*  strlcat( char* dest, char const* src, size_t n )
53 | 
54 | #else
55 | 
56 | /* Temporary versions while we don't support prototypes. */
57 | void* memcpy();
58 | void* memmove();
59 | char* strcat();
60 | char* strncat();
61 | char* strchr();
62 | void* memset();
63 | char* strdup();
64 | char* strlcat();
65 | 
66 | #endif
67 | 
68 | #endif 
69 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/struct_tm.h:
--------------------------------------------------------------------------------
 1 | /* <bits/struct_tm.h>  --  the definition of struct tm 
 2 |  *
 3 |  * Copyright (C) 2014 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_STRUCT_TM_INCLUDED
 8 | #define __RBC_BITS_STRUCT_TM_INCLUDED
 9 | 
10 | /* struct tm
11 |  *
12 |  * The C standard requires all the following fields to be present, but 
13 |  * imposes no order.  There seems no advantage to not following the order
14 |  * in the standard.  Nor do we have a need for additional fields. 
15 |  */
16 | struct tm {
17 |     int tm_sec;
18 |     int tm_min;
19 |     int tm_hour;
20 |     int tm_mday;
21 |     int tm_mon;
22 |     int tm_year;
23 |     int tm_wday;
24 |     int tm_yday;
25 |     int tm_isdst;
26 | };
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/stage-5/include/bits/time_t.h:
--------------------------------------------------------------------------------
 1 | /* <bits/struct_tm.h>  --  the definition of struct tm 
 2 |  *
 3 |  * Copyright (C) 2014 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | #ifndef __RBC_BITS_TIME_T_INCLUDED
 8 | #define __RBC_BITS_TIME_T_INCLUDED
 9 | 
10 | /* time_t
11 |  *
12 |  * The C standard simply requires this to be arithmetic, and not necessarily
13 |  * even signed.  The Linux kernel ABI makes it a 32-bit signed type. 
14 |  */
15 | typedef int time_t;
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/stage-5/include/errno.h:
--------------------------------------------------------------------------------
 1 | /* <errno.h>  --  standard C library header for error diagnostics
 2 |  *  
 3 |  * Copyright (C) 2005, 2008, 2020 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef __RBC_ERRNO_INCLUDED
 8 | #define __RBC_ERRNO_INCLUDED
 9 | 
10 | /* C90 7.1.4 defines three macros: EDOM, ERANGE and errno.  
11 |  *
12 |  * Other macro names beginning E[0-9A-Z] are reserved for use by the 
13 |  * implementation for use as errno values (though obviously EOF conflicts).
14 |  *
15 |  * See discussion in C N1338 which suggests requiring errno to be a macro. */
16 | 
17 | #define EDOM 33
18 | #define ERANGE 34
19 | 
20 | extern int errno; 
21 | 
22 | #define errno errno
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/stage-5/include/rbc_init.h:
--------------------------------------------------------------------------------
 1 | /* rbc_init.h  --  definitions read at the start of every compilation 
 2 |  * 
 3 |  * Copyright (C) 2013, 2015 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */
 6 | 
 7 | /* The primary purpose of this file was so that neither the compiler
 8 |  * driver, nor the preprocessor, need to be updated when the compiler
 9 |  * is updated to include new functionality. */
10 | 
11 | /* The current compiler version.  Note version numbers are stages, 
12 |  * so 4 was the first version, and this is 5. */
13 | #define __RBC_VERSION 5
14 | 
15 | /* To allow headers in this stage to be used later, it's convenient to
16 |  * allow some extra keywords to appear in them. */
17 | #define void int
18 | #define const
19 | 


--------------------------------------------------------------------------------
/stage-5/include/stdio.h:
--------------------------------------------------------------------------------
 1 | /* <stdio.h>  --  standard C library header for input/output
 2 |  *  
 3 |  * Copyright (C) 2005, 2008, 2015 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef __RBC_STDIO_INCLUDED
 8 | #define __RBC_STDIO_INCLUDED
 9 | 
10 | #include <bits/size_t.h>
11 | #include <bits/null.h>
12 | #include <bits/file.h>
13 | #include <bits/std_streams.h>
14 | #include <bits/eof.h>
15 | #include <bits/file_access.h>
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/stage-5/include/string.h:
--------------------------------------------------------------------------------
 1 | /* <string.h>  --  standard C library header for strings
 2 |  *  
 3 |  * Copyright (C) 2005, 2021 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef __RBC_STRING_INCLUDED
 8 | #define __RBC_STRING_INCLUDED
 9 | 
10 | #include <bits/size_t.h>
11 | #include <bits/null.h>
12 | #include <bits/string.h>
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/stage-5/include/time.h:
--------------------------------------------------------------------------------
 1 | /* <time.h>  --  standard C library header for date and time handling
 2 |  *  
 3 |  * Copyright (C) 2014 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef __RBC_TIME_INCLUDED
 8 | #define __RBC_TIME_INCLUDED
 9 | 
10 | #include <bits/null.h>
11 | #include <bits/size_t.h>
12 | #include <bits/time_t.h>
13 | #include <bits/struct_tm.h>
14 | 
15 | /* TODO: define clock_t and CLOCKS_PER_SEC */
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/stage-5/main.c:
--------------------------------------------------------------------------------
  1 | /* main.c  --  code to parse command line and initialise the compiler
  2 |  *
  3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
  4 |  * All rights reserved.
  5 |  */
  6 | 
  7 | /* Is the --compatibility=4 flag given? */
  8 | compat_flag = 0;
  9 | 
 10 | static
 11 | compile(output) {
 12 |     auto node;
 13 |     while ( peek_token() && (node = top_level()) ) {
 14 |         codegen( output, node );
 15 |         free_node( node );
 16 |     }
 17 | }
 18 | 
 19 | usage() {
 20 |     cli_error("Usage: ccx [--compatibility=N] [-o filename.s] filename.c\n");
 21 | }
 22 | 
 23 | main(argc, argv) 
 24 |     int argc;
 25 |     char **argv;
 26 | {
 27 |     extern char* strdup();
 28 |     extern struct FILE* fopen();
 29 |     extern char* opt_arg();
 30 | 
 31 |     auto char *filename = 0, *outname = 0;
 32 |     auto int l, i = 1, freeout = 0;
 33 |     auto struct FILE* file;
 34 | 
 35 |     while ( i < argc ) {
 36 |         auto char *arg = argv[i], *arg2;
 37 | 
 38 |         if ( arg2 = opt_arg( argv, argc, &i, "-o" ) ) {
 39 |             if ( outname ) cli_error(
 40 |                 "Multiple output files specified: '%s' and '%s'\n",
 41 |                 outname, arg2 );
 42 |             outname = arg2;
 43 |         }
 44 | 
 45 |         else if ( strcmp( arg, "--help" ) == 0 ) 
 46 |             usage();
 47 | 
 48 |         else if ( arg2 = opt_arg( argv, argc, &i, "--compatibility" ) ) {
 49 |             if ( strcmp( arg2, "4" ) == 0 )
 50 |                 compat_flag = 1;
 51 |             else if ( strcmp( arg2, "5" ) != 0 )
 52 |                 cli_error("Compatibility with stage %s not supported", arg2);
 53 |         }
 54 | 
 55 |         else if ( rchar(argv[i], 0) == '-' )
 56 |             cli_error("ccx: unknown option: %s\n", argv[i]);
 57 | 
 58 |         else {
 59 |             if ( filename ) cli_error(
 60 |                 "ccx: multiple input files specified: '%s' and '%s'\n",
 61 |                 filename, argv[i]);
 62 |             filename = argv[i];
 63 |             ++i;
 64 |         }
 65 |     }
 66 | 
 67 |     if ( !filename )
 68 |         cli_error("ccx: no input file specified\n");
 69 | 
 70 |     init_stypes();
 71 |     init_symtab();
 72 |     init_scan(filename, 0); 
 73 | 
 74 |     if (!outname) {
 75 |         /* We allow .c or .i filenames: .i is used for preprocessed source. */
 76 |         l = strlen(filename);
 77 |         if ( rchar( filename, l-1 ) != 'c' && rchar( filename, l-1 ) != 'i'
 78 |              || rchar( filename, l-2 ) != '.' )
 79 |             cli_error("ccx: input filename must have .c or .i extension\n");
 80 | 
 81 |         outname = strdup( filename );
 82 |         freeout = 1;
 83 |         lchar( outname, l-1, 's' );
 84 |     }
 85 | 
 86 |     file = fopen( outname, "w" );
 87 |     if (!file) cli_error( "ccx: unable to open file '%s'\n", outname );
 88 |     if (freeout) free( outname );
 89 | 
 90 |     compile( file );
 91 | 
 92 |     fclose( file );
 93 |     close_scan();
 94 | 
 95 |     fini_symtab();
 96 |     fini_stypes();
 97 |     rc_done();
 98 |     return 0;
 99 | }
100 | 


--------------------------------------------------------------------------------
/stage-5/node.c:
--------------------------------------------------------------------------------
  1 | /* node.c  --  low-level code for manipulating AST nodes
  2 |  *
  3 |  * Copyright (C) 2013, 2014 Richard Smith <richard@ex-parrot.com>
  4 |  * All rights reserved.
  5 |  */
  6 | 
  7 | static
  8 | rc_count = 0;
  9 | 
 10 | /* Allocate SZ bytes of memory, adding a reference-counted header. */
 11 | static
 12 | rc_alloc(sz) {
 13 |     auto ptr = malloc(8 + sz);
 14 |     ++rc_count;
 15 |     ptr[0] = 1;  /* the reference count */
 16 |     ptr[1] = sz; /* the capacity */
 17 |     return &ptr[2];
 18 | }
 19 | 
 20 | /* Unconditionally unallocate PTR which is memory allocated by rc_alloc. */
 21 | static
 22 | rc_free(ptr) {
 23 |     --rc_count;
 24 |     free( &ptr[-2] );
 25 | }
 26 | 
 27 | /* Diagnostic routine to check that all nodes have been unallocated. */
 28 | rc_done() {
 29 |     if (rc_count) {
 30 |         extern stderr;
 31 |         fprintf(stderr, "Internal error: program leaked %d objects\n",
 32 |                 rc_count);
 33 |     }
 34 | }
 35 | 
 36 | /* Wrapper around realloc to work with pointers returned by rc_alloc. */
 37 | static
 38 | rc_realloc(old_ptr, sz) {
 39 |     auto new_ptr;
 40 |     old_ptr = &old_ptr[-2];
 41 | 
 42 |     /* We cannot currently handle reallocating if there are multiple copies.  
 43 |      * What should it do?  If the address changes, we need to update all
 44 |      * the references, but we cannot do that.  So we'd have to create a unique
 45 |      * clone first, but then it's not really shared.  Best to prohibit it. */
 46 |     if ( *old_ptr > 1 )
 47 |         abort();
 48 | 
 49 |     new_ptr = realloc( old_ptr, sz + 8 );
 50 |     new_ptr[1] = sz;
 51 |     return &new_ptr[2];
 52 | }
 53 | 
 54 | /* Increment the reference count on a pointer */
 55 | add_ref(ptr) {
 56 |     ++ptr[-2];
 57 |     return ptr;
 58 | }
 59 | 
 60 | /* Allocate a new node of type TYPE. */
 61 | new_node(type, arity) {
 62 |     /* struct node { int type; int nops; node* type; node* op[4]; } 
 63 |      *
 64 |      * For binary operators, op[0] is the lhs and op[1] the rhs; for unary 
 65 |      * prefix operators, only op[0] is used; and for unary postfix only 
 66 |      * op[1] is used. 
 67 |      * 
 68 |      * The scanner never reads a ternary operator (because ?: has two separate
 69 |      * lexical elements), but we generate '?:' nodes in the expression parser
 70 |      * and want a uniform interface.  Similarly, the 'for' node is a quaternary
 71 |      * "operator" (init, test, incr, stmt). */
 72 |     auto n = rc_alloc(28);
 73 | 
 74 |     n[0] = type; n[1] = arity;
 75 | 
 76 |     /* The type and payload (operands) will get filled in by the parser */
 77 |     memset( &n[2], 0, 20 );
 78 | 
 79 |     return n;
 80 | }
 81 | 
 82 | /* Unallocate a node, NODE, created by new_node(). */
 83 | free_node(node) {
 84 |     auto i = 0;
 85 | 
 86 |     if (!node) return;
 87 |     /* Trap for double delete */ 
 88 |     else if (node[-2] == 0) abort();
 89 |     /* If the reference count doesn't drop to zero, do nothing. */
 90 |     else if (--node[-2]) return;
 91 | 
 92 |     free_node( node[2] );
 93 | 
 94 |     while ( i < node[1] ) 
 95 |         free_node( node[ 3 + i++ ] );
 96 | 
 97 |     rc_free(node);
 98 | }
 99 | 
100 | /* If SIZE is equal to the capacity of NODE, then reallocate it with twice
101 |  * capacity, and return the new node. */
102 | static
103 | grow_node(node, size) {
104 |     /* 12 is the size of the node before the payload. */
105 | 
106 |     if ( size + 12 == node[-1] ) {
107 |         size *= 2;
108 |         return rc_realloc( node, size + 12 );
109 |     }
110 | 
111 |     return node;
112 | }
113 | 
114 | /* Append node N to the vector node V, growing the vector if necessary, 
115 |  * and returning the (possibly reallocated) vector. */
116 | vnode_app( v, n ) {
117 |     v = grow_node(v, v[1] * 4);
118 |     v[ 3 + v[1]++ ] = n;
119 |     return v;
120 | }
121 | 
122 | /* Returns a pointer to the string payload of a node */
123 | node_str(node) {
124 |     return &node[3];
125 | }
126 | 
127 | /* Returns the node type.  This only exists to abstract the difference
128 |  * between t[0] (in stage-4) and t->code (in stage-5).  */
129 | node_code(node) {
130 |     return node[0];
131 | }
132 | 
133 | node_arity(node) {
134 |     return node[1];
135 | }
136 | 
137 | node_type(node) {
138 |     return node[2];
139 | }
140 | 
141 | node_op(node, n) {
142 |     return node[3+n];
143 | }
144 | 
145 | set_code(node, code) {
146 |     node[0] = code;
147 | }
148 | 
149 | set_arity(node, arity) {
150 |     node[1] = arity;
151 | }
152 | 
153 | set_type(node, type) {
154 |     node[2] = type;
155 | }
156 | 
157 | set_op(node, n, op) {
158 |     node[3+n] = op;
159 | }
160 | 
161 | /* Append character CHR to the payload of the node *NODE_PTR which is treated 
162 |  * as a string with current length *LEN_PTR.  The value of *LEN_PTR is 
163 |  * incremented.  The node may be reallocated. */
164 | node_lchar( node_ptr, len_ptr, chr )
165 |     int *len_ptr;
166 | {
167 |     auto node = *node_ptr;
168 |     node = grow_node( node, *len_ptr );
169 |     lchar( node_str(node), (*len_ptr)++, chr );
170 |     *node_ptr = node;
171 | }
172 | 
173 | /* Push-back facility doesn't really belong here, but having to keep 
174 |  * compatibility with the stage-5 cc without --compatibility=4 is 
175 |  * painful. 
176 |  *
177 |  * This is a struct pb_slot { struct node* token; struct pb_slot* next } *; */
178 | static pb_stack = 0;
179 | 
180 | pb_empty() {
181 |     return !pb_stack;
182 | }
183 | 
184 | pb_pop() {
185 |     auto ret = 0;
186 |     if ( pb_stack ) {
187 |         auto old = pb_stack;
188 |         ret = pb_stack[0];
189 |         pb_stack = pb_stack[1];
190 |         free(old);
191 |     }
192 |     return ret;
193 | }
194 | 
195 | pb_push(token) {
196 |     auto p = malloc(8);
197 |     p[1] = pb_stack;
198 |     pb_stack = p;
199 |     pb_stack[0] = token;
200 | }
201 | 
202 | /* Allocate a string node and set its payload to STR */
203 | struct node*
204 | new_strnode(code, str)
205 |     char* str;
206 | {
207 |     auto sz = strlen(str) + 1;
208 |     auto node = rc_alloc( 12 + sz );
209 |     set_code( node, code );
210 |     set_arity( node, 0 );
211 |     set_type( node, 0 );
212 |     strcpy( node_str(node), str, sz );
213 |     return node;
214 | }
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/stage-5/nodenew.c:
--------------------------------------------------------------------------------
  1 | /* nodenew.c  --  node.c rewritten to use structs
  2 |  *
  3 |  * Copyright (C) 2013, 2014, 2015, 2016, 2018 
  4 |  * Richard Smith <richard@ex-parrot.com>
  5 |  * All rights reserved.
  6 |  */
  7 | 
  8 | /* This disables the errors on incompatibilities with stage-4. 
  9 |  * This is safe because this file is never processed with the stage-4
 10 |  * compiler (and, indeed, wouldn't compile if were). */
 11 | #pragma RBC compatibility 5 
 12 | 
 13 | static
 14 | rc_count = 0;
 15 | 
 16 | static
 17 | int (*debug_fn)() = 0;
 18 | 
 19 | dbg_nodes(fn)
 20 |     int (*fn)();
 21 | {
 22 |     debug_fn = fn;
 23 | }
 24 | 
 25 | struct rc_node {
 26 |     int ref_count, capacity;
 27 | };
 28 | 
 29 | /* Allocate SZ bytes of memory, adding a reference-counted header. */
 30 | static
 31 | rc_alloc(sz) {
 32 |     struct rc_node* ptr = malloc( sizeof(struct rc_node) + sz );
 33 |     ++rc_count;
 34 |     ptr->ref_count = 1;
 35 |     ptr->capacity = sz;
 36 |     return ptr + 1;
 37 | }
 38 | 
 39 | /* Unconditionally unallocate PTR which is memory allocated by rc_alloc. */
 40 | static
 41 | rc_free(ptr) {
 42 |     --rc_count;
 43 |     free( ptr - sizeof(struct rc_node) );
 44 | }
 45 | 
 46 | /* Diagnostic routine to check that all nodes have been unallocated. */
 47 | rc_done() {
 48 |     if (rc_count)
 49 |         int_error("Internal error: program leaked %d objects\n", rc_count);
 50 | }
 51 | 
 52 | /* Wrapper around realloc to work with pointers returned by rc_alloc. */
 53 | static
 54 | rc_realloc(ptr, sz) {
 55 |     struct rc_node *old_ptr, *new_ptr;
 56 | 
 57 |     if ( !ptr )
 58 |         return rc_alloc(sz);
 59 |     
 60 |     old_ptr = (struct rc_node*)( (unsigned char*)ptr - sizeof(struct rc_node) );
 61 | 
 62 |     /* We cannot currently handle reallocating if there are multiple copies.  
 63 |      * What should it do?  If the address changes, we need to update all
 64 |      * the references, but we cannot do that.  So we'd have to create a unique
 65 |      * clone first, but then it's not really shared.  Best to prohibit it. */
 66 |     if ( old_ptr->ref_count != 1 )
 67 |         int_error("Attempt to reallocate a shared ref-counted object");
 68 | 
 69 |     new_ptr = (struct rc_node*) realloc( old_ptr, sizeof(struct rc_node) + sz );
 70 |     new_ptr->ref_count = 1;
 71 |     new_ptr->capacity = sz;
 72 |     return new_ptr + 1;
 73 | }
 74 | 
 75 | /* Increment the reference count on a pointer */
 76 | add_ref(ptr) {
 77 |     struct rc_node* n = ptr - sizeof(struct rc_node);
 78 |     ++n->ref_count;
 79 |     return ptr;
 80 | }
 81 | 
 82 | struct node {
 83 |     int code;           /* character code for the node, e.g. '+' or 'if'. */
 84 |     int arity;          /* the number of nodes in the ops[] array. */
 85 |     struct node* type;  /* a node representing the type of the node. */
 86 | 
 87 |     /* For binary operators, ops[0] is the lhs and ops[1] the rhs; for unary 
 88 |      * prefix operators, only ops[0] is used; and for unary postfix only 
 89 |      * ops[1] is used. 
 90 |      * 
 91 |      * The scanner never reads a ternary operator (because ?: has two separate
 92 |      * lexical elements), but we generate '?:' nodes in the expression parser
 93 |      * and want a uniform interface.  Similarly, the 'for' node is a quaternary
 94 |      * "operator" (init, test, incr, stmt). 
 95 |      *
 96 |      * Because of the way we store other data, but particularly strings, in 
 97 |      * nodes, this is really a:
 98 |      *
 99 |      *   union {
100 |      *     struct node* op[4];
101 |      *     int ivals[4];
102 |      *     char str[];
103 |      *   };  
104 |      *
105 |      * The ARITY field is zero when the node contains a string so as to
106 |      * prevent node_free from treating the string as a node pointer and
107 |      * freeing it.   There are some other instances where integers are 
108 |      * stored in the node.  E.g. in a 'macp' node (used to represent the 
109 |      * occurrence of a macro parameter in the replacement list of a 
110 |      * function-like macro) has ARITY=0 and nothing in the OP[] array,
111 |      * but uses IVAL[0] to represent the parameter number.  */
112 |     struct node* ops[4];
113 | };
114 | 
115 | /* Allocate a new node of code CODE, and arity ARITY. */
116 | new_node(code, arity) {
117 |     struct node* n = rc_alloc( sizeof(struct node) );
118 |     memset( n, 0, sizeof(struct node) );
119 | 
120 |     /* The type and payload (operands) will get filled in by the parser,
121 |      * but they were safely zeroed by the above call to memset(). */
122 |     n->code = code; 
123 |     n->arity = arity;
124 | 
125 |     if (debug_fn) debug_fn(n, "new_node");
126 | 
127 |     return n;
128 | }
129 | 
130 | /* Checks that NODE and its children still have non-zero a ref count. */
131 | check_node(node)
132 |     struct node* node;
133 | {
134 |     if (node) {
135 |         struct rc_node* rc = (unsigned char*)node - sizeof(struct rc_node);
136 |         int i;
137 | 
138 |         if ( rc->ref_count <= 0 || rc->ref_count > 1000 )
139 |             int_error( "Use of node type '%Mc' at 0x%x with %d ref-count\n",
140 |                        node->code, node, rc->ref_count );
141 | 
142 |         for ( i = 0; i < node->arity; ++i ) 
143 |             check_node( node->ops[i] );
144 | 
145 |         if (node->code & 0x80808080) 
146 |             int_error( "Invalid node code '%Mc' at 0x%x with %d ref-count\n",
147 |                        node->code, node, rc->ref_count );
148 |     }
149 | 
150 |     return node;
151 | }
152 | 
153 | /* Unallocate a node, NODE, created by new_node(). */
154 | free_node(node)
155 |     struct node* node;
156 | {
157 |     if (node) {
158 |         struct rc_node* rc = (unsigned char*)node - sizeof(struct rc_node);
159 | 
160 |         /* Trap for double delete */ 
161 |         if ( rc->ref_count == 0 )
162 |             int_error( "Double delete of node type '%Mc' at 0x%x\n",
163 |                        node->code, node );
164 | 
165 |         /* Only delete if the reference count drops to zero. */
166 |         if ( --rc->ref_count == 0 ) {
167 |             /* This is the latest we can call debug_fn as we're about to free
168 |              * the children, and the function will probably access them. */
169 |             if (debug_fn) debug_fn(node, "free_node");
170 | 
171 |             int i;
172 |             for ( i = 0; i < node->arity; ++i )
173 |                 free_node( node->ops[i] );
174 | 
175 |             free_node( node->type );
176 | 
177 |             rc_free(node);
178 |         }
179 |     }
180 | }
181 | 
182 | /* Expand, if necessary, the storage of NODE.  SIZE is the current size
183 |  * (in bytes) of the node, and EXTRA is the additional space required.
184 |  * If SIZE + EXTRA is greater than the capacity (in bytes) of NODE, then
185 |  * reallocate it with twice (or, if necessary, more) capacity, and return 
186 |  * the new node.  It does not increment the arity of the node.  More 
187 |  * friendly interfaces are provided by vnode_app() and node_lchar(). */
188 | static
189 | struct node*
190 | grow_node(node, size, extra) 
191 |     struct node* node;
192 | {
193 |     struct rc_node* rc 
194 |         = node ? (unsigned char*)node - sizeof(struct rc_node) : 0;
195 | 
196 |     /* This is the size of the node before the ops[] payload. */
197 |     int overhead = sizeof(struct node) - sizeof(struct node *[4]);
198 | 
199 |     if ( !rc || size + extra + overhead > rc->capacity ) {
200 |         struct node *new;
201 |         size += (extra <= size ? size : extra);
202 | 
203 |         if (debug_fn && node) debug_fn(node, "grow_node [free]");
204 | 
205 |         new = (struct node *)rc_realloc( node, size + overhead );
206 | 
207 |         /* Initialise if it's a new node */
208 |         if (!node) { 
209 |             new->code = 0;
210 |             new->arity = 0;
211 |             new->type = 0;
212 |         }
213 | 
214 |         if (debug_fn) debug_fn(new, "grow_node [realloced]");
215 | 
216 |         return new;
217 |     }
218 | 
219 |     return node;
220 | }
221 | 
222 | /* Append node CHILD to node VEC, growing the vector if necessary, 
223 |  * and returning the (possibly reallocated) vector. */
224 | struct node *
225 | vnode_app( vec, child )
226 |     struct node *vec, *child;
227 | {
228 |     vec = grow_node( vec, vec->arity * sizeof(struct node*), 
229 |                      sizeof(struct node*) );
230 |     vec->ops[ vec->arity++ ] = child;
231 |     return vec;
232 | }
233 | 
234 | /* Append nodes with index [FIRST, LAST) from SRC to vnode DEST, growing 
235 |  * the vector if necessary, and returning the (possibly reallocated) vector. 
236 |  * If LAST is -1, the whole source vector is copied. */
237 | struct node *
238 | vnode_copy( dest, src, first, last )
239 |     struct node *dest, *src;
240 | {
241 |     int i;
242 |     if ( last < 0 ) last = src->arity;
243 |     for ( i = first; i < last; ++i )
244 |         dest = vnode_app( dest, add_ref(src->ops[i]) );
245 |     return dest;
246 | }
247 | 
248 | /* Prepend node CHILD to node VEC, growing the vector if necessary, 
249 |  * and returning the (possibly reallocated) vector. */
250 | struct node *
251 | vnode_prep( vec, child )
252 |     struct node *vec, *child;
253 | {
254 |     vec = grow_node( vec, vec->arity * sizeof(struct node*), 
255 |                      sizeof(struct node*) );
256 |     memmove( &vec->ops[1], &vec->ops[0], vec->arity * sizeof(struct node*) );
257 |     vec->arity++;
258 |     vec->ops[0] = child;
259 |     return vec;
260 | }
261 | 
262 | /* Returns a pointer to the string payload of a node */
263 | node_str(node) 
264 |     struct node* node;
265 | {
266 |     return (char*) node->ops;
267 | }
268 | 
269 | /* Returns the node type.  This only exists to abstract the difference
270 |  * between t[0] (in stage-4) and t->code (in stage-5).  */
271 | node_code(node) 
272 |     struct node* node;
273 | {
274 |     return node->code;
275 | }
276 | 
277 | node_arity(node) 
278 |     struct node* node;
279 | {
280 |     return node->arity;
281 | }
282 | 
283 | node_type(node)
284 |     struct node* node;
285 | {
286 |     return node->type;
287 | }
288 | 
289 | node_op(node, n)
290 |     struct node* node;
291 | {
292 |     return node->ops[n];
293 | }
294 | 
295 | set_code(node, code)
296 |     struct node* node;
297 | {
298 |     node->code = code;
299 | }
300 | 
301 | set_arity(node, arity)
302 |     struct node* node;
303 | {
304 |     node->arity = arity;
305 | }
306 | 
307 | set_type(node, type)
308 |     struct node *node, *type;
309 | {
310 |     node->type = type;
311 | }
312 | 
313 | set_op(node, n, op)
314 |     struct node *node, *op;
315 | {
316 |     node->ops[n] = op;
317 | }
318 | 
319 | set_ival(node, val)
320 |     struct node *node;
321 | {
322 |     node->ops[0] = (struct node*) val;
323 | }
324 | 
325 | node_ival(node) 
326 |     struct node *node;
327 | {
328 |     return node->ops[0];   
329 | }
330 | 
331 | /* Allocate a string node and set its payload to STR */
332 | struct node*
333 | new_strnode(code, str)
334 |     char* str;
335 | {
336 |     int sz = strlen(str) + 1;
337 |     struct node* node = grow_node( 0, 0, sz );
338 |     /* grow_node() has already zeroed code, arity and type */
339 |     node->code = code;
340 |     strcpy( node_str(node), str, sz );
341 |     if (debug_fn) debug_fn(node, "new_strnode");
342 |     return node;
343 | }
344 | 
345 | 
346 | /* Append character CHR to the payload of the node *NODE_PTR which is treated 
347 |  * as a string with current length *LEN_PTR.  The value of *LEN_PTR is 
348 |  * incremented.  The node may be reallocated. */
349 | node_lchar( node_ptr, len_ptr, chr )
350 |     struct node** node_ptr;
351 |     int *len_ptr;
352 | {
353 |     struct node* node = grow_node( *node_ptr, *len_ptr, 1 );
354 |     char* buf = node_str(node);
355 |     buf[ (*len_ptr)++ ] = chr;
356 |     *node_ptr = node;
357 | 
358 |     if (debug_fn) debug_fn(node, "node_lchar");
359 | }
360 | 
361 | /* Append string STR to the payload of the node *NODE_PTR which is treated 
362 |  * as a string with current length *LEN_PTR.  The value of *LEN_PTR is 
363 |  * incremented.  The node may be reallocated. */
364 | node_strcat( node_ptr, len_ptr, str, len )
365 |     struct node** node_ptr;
366 |     int *len_ptr;
367 |     char *str;
368 | {
369 |     struct node* node = grow_node( *node_ptr, *len_ptr, len );
370 |     char* buf = node_str(node);
371 |     strncpy( buf + *len_ptr, str, len );
372 |     *len_ptr += len;
373 |     *node_ptr = node;
374 | }
375 |     
376 | /* Push-back facility doesn't really belong here, but having to keep 
377 |  * compatibility with the stage-4 cc is tricky. */
378 | static struct pb_slot {
379 |     struct node* node;
380 |     struct pb_slot* next;
381 | } *pb_stack = 0; 
382 | 
383 | pb_empty() {
384 |     return !pb_stack;
385 | }
386 | 
387 | struct node*
388 | pb_pop() {
389 |     struct node* ret = 0;
390 |     if ( pb_stack ) {
391 |         struct pb_slot* old = pb_stack;
392 |         ret = pb_stack->node;
393 |         pb_stack = pb_stack->next;
394 |         free(old);
395 |     }
396 |     return ret;
397 | }
398 | 
399 | pb_push(token) 
400 |     struct node* token;
401 | {
402 |     struct pb_slot* p = malloc( sizeof(struct pb_slot) );
403 |     p->next = pb_stack;
404 |     p->node = token;
405 |     pb_stack = p;
406 | }
407 | 
408 | 
409 | 
410 | 


--------------------------------------------------------------------------------
/stage-5/pvector.c:
--------------------------------------------------------------------------------
 1 | /* pvector.c  --  code to deal with vectors of pointers
 2 |  *  
 3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | /* The Makefile sticks --compatibility=4 on the command line.  Remove it. */
 8 | #pragma RBC compatibility 5 
 9 | 
10 | /* We would like to #include "pvector.h" here, but we don't so that it can
11 |  * be used in the implementation of the preprocessor.  Instead, repeat the
12 |  * definition here. */
13 | struct pvector {
14 |     char **start, **end, **end_store;
15 | };
16 | 
17 | struct pvector*
18 | pvec_new() {
19 |     struct pvector* v = (struct pvector*) malloc( sizeof(struct pvector) );
20 |     int cap = 8;
21 |     v->start = v->end = (char**) malloc( sizeof(char*) * cap );
22 |     *v->end = 0;  /* null termination */
23 |     v->end_store = v->start + cap;
24 |     return v;
25 | }
26 | 
27 | pvec_delete(v) 
28 |     struct pvector* v;
29 | {
30 |     if (v) {
31 |         free( v->start );
32 |         free( v );
33 |     }
34 | }
35 | 
36 | pvec_push(v, elt)
37 |     struct pvector* v;
38 |     char* elt;
39 | {
40 |     /* Overwrite the null termination: which means we're guaranteed to
41 |      * have space at this point. */
42 |     *v->end++ = elt;
43 | 
44 |     if (v->end == v->end_store) {
45 |         /* We need to reallocate now to push the null terminator */
46 |         int cap = v->end - v->start;
47 |         v->start = (char**) realloc( v->start, sizeof(char*) * 2*cap );
48 |         v->end = v->start + cap;
49 |         v->end_store = v->start + 2*cap;
50 |     }
51 | 
52 |     *v->end = 0;
53 | }
54 | 
55 | char* 
56 | pvec_pop(v) 
57 |     struct pvector* v;
58 | {
59 |     char* last = *--v->end;
60 |     *v->end = 0;
61 |     return last;
62 | }
63 | 


--------------------------------------------------------------------------------
/stage-5/pvector.h:
--------------------------------------------------------------------------------
 1 | /* pvector.h  --  code to deal with vectors of pointers
 2 |  *  
 3 |  * Copyright (C) 2013 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | #ifndef RBC5_PVECTOR_INCLUDED
 8 | #define RBC5_PVECTOR_INCLUDED
 9 | 
10 | struct pvector {
11 |     char **start, **end, **end_store;
12 | };
13 | 
14 | struct pvector* pvec_new();
15 | pvec_delete();
16 | pvec_push();
17 | char* pvec_pop();
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/stage-5/scanner.c:
--------------------------------------------------------------------------------
  1 | /* scanner.c  --  code for converting preprocessor tokens to C ones
  2 |  * This file is part of the compiler, ccx
  3 |  *
  4 |  * Copyright (C) 2013, 2014 Richard Smith <richard@ex-parrot.com>
  5 |  * All rights reserved.
  6 |  */
  7 | 
  8 | 
  9 | /* Check whether the null-terminated string, NODE->str, is a keyword, and 
 10 |  * if so set NODE->type to the keyword token (which is a multicharacter 
 11 |  * literal containing at most the first four characters of the keyword, 
 12 |  * e.g. 'whil' for "while"); otherwise set NODE->type = 'id' for an 
 13 |  * identifier.   Returns NODE. */
 14 | chk_keyword(node)
 15 | {
 16 |     /* Argument is:  struct node { int type; int dummy; char str[]; } */
 17 |     
 18 |     auto char *keywords[29] = {
 19 |         /* Complete list of keywords per K&R, minus 'entry', plus 'signed'
 20 |          * from C90.  C90 also adds 'const', 'enum', 'void', and 'volatile'.
 21 |          *
 22 |          * 'do' and 'if' have an extra NUL character to pad them to 4 bytes
 23 |          * for casting to an int (i.e. a multicharacter literal). 
 24 |          *
 25 |          * TODO: Not yet implemented: double, typedef, union. 
 26 |          */
 27 |         "auto", "break", "case", "char", "continue", "default", "do\0", 
 28 |         "double", "else", "extern", "float", "for", "goto", "if\0", 
 29 |         "int", "long", "register", "return", "signed", "short", "sizeof", 
 30 |         "static", "struct", "switch", "typedef", "union", "unsigned", "while", 
 31 |         0
 32 |     };
 33 | 
 34 |     auto i = 0;
 35 |     while ( keywords[i] && strcmp(keywords[i], &node[3]) != 0 )
 36 |         ++i;
 37 | 
 38 |     if ( keywords[i] ) {
 39 |         /* Change the id node to an op node, using the first four bytes
 40 |          * of the keyword as the multicharacter node code. */
 41 |         auto int* keyword = keywords[i];
 42 |         node[0] = *keyword;
 43 | 
 44 |         /* Zero the memory used by the string: it's now an node* array[4]. */
 45 |         memset( &node[3], 0, 16 );
 46 |     }
 47 |     
 48 |     return node;
 49 | }
 50 | 
 51 | /* Create a node, NODE, which will be returned; read a number (oct / hex / dec)
 52 |  * starting with character C (which has already been checked by isdigit), 
 53 |  * parse it into NODE->val, set NODE->type, and return NODE. */
 54 | get_number(stream, c, c2) {
 55 |     auto char *nptr;
 56 |     auto ppnode = get_ppnum(stream, c, c2);
 57 |     auto node = mk_number(ppnode);
 58 |     free_node(ppnode);
 59 |     return node;
 60 | }
 61 | 
 62 | /* Handle a #pragma directive */
 63 | prgm_direct(stream) {
 64 |     extern char* node_str();
 65 |     extern struct node* get_word();
 66 | 
 67 |     auto struct node* tok;
 68 |     auto char* str;
 69 |     auto int c = skip_hwhite(stream);
 70 | 
 71 |     /* The standard requires unrecognised #pragmas to be allowed, but
 72 |      * this is a bit silly. */
 73 |     if ( !isidchar1(c) ) {
 74 |         warning("Unfamiliar form of #pragma directive");
 75 |         /* A bare #pragma is a bit silly too, but the grammar allows it. */
 76 |         if ( c == '\n' ) ungetc(c, stream);  
 77 |         else pp_slurp(stream, 0, 0);
 78 |         return 0;
 79 |     }
 80 |   
 81 |     /* Get the pragma namespace */ 
 82 |     tok = get_word(stream, c);
 83 |     str = node_str(tok);
 84 | 
 85 |     /* Our #pragmas all live in the RBC namespace (which stands for
 86 |      * Richard's Bootstrap Compiler). */
 87 |     if ( strcmp( str, "RBC" ) != 0 ) {
 88 |         /* An unknown pragma: silently ignore it. */
 89 |         pp_slurp(stream, 0, 0);
 90 |         free_node(tok);
 91 |         return 0;
 92 |     }
 93 |     free_node(tok);
 94 | 
 95 |     c = skip_hwhite(stream);
 96 |     if ( !isidchar1(c) )
 97 |         error("#pragma RBC requires a command argument");
 98 |     tok = get_word(stream, c);
 99 |     str = node_str(tok);
100 |     
101 |     /* We only know about #pragma RBC compatibility */
102 |     if ( strcmp( str, "compatibility" ) == 0 ) {
103 |         extern compat_flag;
104 |         auto int n = pp_dir_num(stream);
105 |         if ( n < 4 || n > 5 )
106 |             error("Compatibility with stage %d not supported", n);
107 |         compat_flag = ( n == 4 );
108 |     }
109 |     else {
110 |         warning("Unhandled #pragma RBC %s", str);
111 |         pp_slurp(stream, 0, 0);
112 |     }
113 | 
114 |     end_ppdir(stream, "pragma RBC");
115 |     free_node(tok);
116 | 
117 |     /* The return is a null node*, and indicates that we have handled
118 |      * (or ignored) the #pragma, and not to include it in the output
119 |      * token stream produced by the scanner. */
120 |     return 0;
121 | }
122 | 
123 | /* Hook for handling preprocessor directives other than #line and #pragma */
124 | pp_direct(stream, str) {
125 |     error("Unknown preprocessor directive: %s", str);
126 | }
127 | 
128 | do_get_qlit(stream, c1, c2) {
129 |     auto int l;
130 |     auto tok = get_qlit(stream, c1, c2, &l);
131 | 
132 |     /* Character literals have type int in C. */
133 |     if (c1 == '\'') 
134 |         tok[2] = add_ref( implct_int() );
135 |     /* String literals have type char[N] */
136 |     else if (c1 == '\"')
137 |         tok[2] = chr_array_t(l);
138 |     else
139 |         int_error("Unknown type of quoted string: %c...%c", c1, c2);
140 |     return tok;
141 | }
142 | 
143 | handle_eof() {
144 |     return 0;
145 | }
146 | 
147 | cpp_pragma() {
148 |     return 0;
149 | }
150 | 


--------------------------------------------------------------------------------
/stage-5/timeconv.c:
--------------------------------------------------------------------------------
 1 | /* cc.c  --  functions to convert times
 2 |  *  
 3 |  * Copyright (C) 2014 Richard Smith <richard@ex-parrot.com>
 4 |  * All rights reserved.
 5 |  */ 
 6 | 
 7 | /* The Makefile sticks --compatibility=4 on the command line.  Remove it. */
 8 | #pragma RBC compatibility 5 
 9 | 
10 | #include <bits/struct_tm.h>
11 | 
12 | static 
13 | struct tm buf;
14 | 
15 | static
16 | leap_year(year) {
17 |     if (year % 4) return 0;
18 |     else if (year % 100) return 1;
19 |     else if (year % 400) return 0;
20 |     else return 1;
21 | }
22 | 
23 | /* The stage-5 compiler doesn't support multi-dimensional arrays */
24 | static int month_lens[26] = { 
25 |     31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0,
26 |     31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0
27 | };
28 | 
29 | /* The C standard gmtime */
30 | struct tm *
31 | gmtime(timep) 
32 |     int *timep;
33 | {
34 |     int t = *timep;
35 |     int ly = 0, year = 1970, month = 0, *lp;
36 | 
37 |     buf.tm_sec  = t % 60;  t /= 60;
38 |     buf.tm_min  = t % 60;  t /= 60;
39 |     buf.tm_hour = t % 24;  t /= 24;
40 | 
41 |     /* The Unix epoch, 1 Jan 1970, was a Thurday, day 4. */
42 |     buf.tm_wday = (t - 4) % 7;
43 | 
44 |     while ( t < 0 )
45 |         --year, ly = leap_year(year), t += 365 + ly;
46 |     while ( !ly && t == 365 || t > 365 ) 
47 |         ++year, t -= 365 + ly, ly = leap_year(year);
48 | 
49 |     buf.tm_year = year - 1900;
50 |     buf.tm_yday = t;
51 | 
52 |     for ( lp = month_lens + ly*13; *lp && t >= *lp; month++, t -= *lp, ++lp )
53 |         ;
54 |     buf.tm_mon = month;
55 |     buf.tm_mday = t + 1;
56 | 
57 |     buf.tm_isdst = 0;
58 | 
59 |     return &buf;
60 | }
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------