├── .gitignore
├── Makefile
├── README.md
├── jonesforth.S
├── jonesforth.f
├── perf_dupdrop.f
├── test_assembler.f
├── test_assembler.f.out
├── test_comparison.f
├── test_comparison.f.out
├── test_exception.f
├── test_exception.f.out
├── test_number.f
├── test_number.f.out
├── test_read_file.f
├── test_read_file.f.out
├── test_stack.f
├── test_stack.f.out
├── test_stack_trace.f
└── test_stack_trace.f.out


/.gitignore:
--------------------------------------------------------------------------------
1 | jonesforth
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # $Id: Makefile,v 1.9 2007-10-22 18:53:12 rich Exp $
 2 | 
 3 | SHELL	:= /bin/bash
 4 | GCC     := gcc
 5 | CFLAGS	:= -I /usr/include -I. -g
 6 | 
 7 | all:	jonesforth
 8 | 
 9 | jonesforth: jonesforth.S
10 | 	$(GCC) $(CFLAGS) -nostdlib -static -o $@ $<
11 | 
12 | run:
13 | 	cat jonesforth.f $(PROG) - | ./jonesforth
14 | 
15 | clean:
16 | 	rm -f jonesforth perf_dupdrop *~ core .test_*
17 | 
18 | # Tests.
19 | 
20 | TESTS	:= $(patsubst %.f,%.test,$(wildcard test_*.f))
21 | 
22 | test check: $(TESTS)
23 | 
24 | test_%.test: test_%.f jonesforth
25 | 	@echo -n "$< ... "
26 | 	@rm -f .$@
27 | 	@cat <(echo ': TEST-MODE ;') jonesforth.f $< <(echo 'TEST') | \
28 | 	  ./jonesforth 2>&1 | \
29 | 	  sed 's/DSP=[0-9]*//g' > .$@
30 | 	@diff -u .$@ $<.out
31 | 	@rm -f .$@
32 | 	@echo "ok"
33 | 
34 | # Performance.
35 | 
36 | perf_dupdrop: perf_dupdrop.c
37 | 	$(GCC) -O3 -Wall -Werror -o $@ $<
38 | 
39 | run_perf_dupdrop: jonesforth
40 | 	cat <(echo ': TEST-MODE ;') jonesforth.f perf_dupdrop.f | ./jonesforth
41 | 
42 | .SUFFIXES: .f .test
43 | .PHONY: test check run run_perf_dupdrop
44 | 
45 | push-remote:
46 | 	sshpass -p "riscv" scp -P 4321 -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -r . root@localhost:/jonesforth
47 | 
48 | ssh:
49 | 	sshpass -p "riscv" ssh -p 4321 -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@localhost
50 | 
51 | qemu:
52 | 	docker run --name riscv-qemu-fedora -p 4321:10000 jjy0/riscv-qemu-fedora
53 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Jonesforth RISC-V
 2 | 
 3 | RISC-V 64 implementation of Jones forth.
 4 | 
 5 | The code is based on Richard WM Jones's excellent literate x86 assembly
 6 | implementation of Forth, more on which here:
 7 | http://rwmj.wordpress.com/2010/08/07/jonesforth-git-repository/
 8 | 
 9 | The x86 version source code is copied from a mirror repo: https://github.com/nornagon/jonesforth
10 | 
11 | The RISC-V version is rewritten by [JJy](https://justjjy.com), mostly modification is in the `jonesforth.S` file.
12 | 
13 | > The RISC-V version jonesforth is using RV64 instructions, so the WORD size and alignment is 8 bytes.
14 | 
15 | ## Run
16 | 
17 | Run Qemu VM:
18 | 
19 | 1. Start qemu RISC-V VM: `make qemu` - will outputs lots out logs, wait until complete the boot.
20 | 2. Push files to qemu VM: `make push-remote` - the files are under `/jonesforth`.
21 | 3. Connect to RISC-V VM: `make ssh`.
22 | 
23 | > The docker image is very large, you can build it locally if you can't download it from server https://github.com/jjyr/docker-riscv-qemu-fedora
24 | 
25 | Compile & Run:
26 | 
27 | 1. Compile `make`.
28 | 2. Start REPL: `make run`.
29 | 3. Run all tests: `make test`
30 | 
31 | > We haven't passed all tests yet: [issue #1](https://github.com/jjyr/jonesforth_riscv/issues/1)
32 | 
33 | ## RISC-V references
34 | 
35 | * [RISCV Specification](https://riscv.org/technical/specifications/)
36 | * [RISC-V Assembly Programmer's Manual](https://github.com/riscv/riscv-asm-manual/blob/master/riscv-asm.md)
37 | 


--------------------------------------------------------------------------------
/jonesforth.S:
--------------------------------------------------------------------------------
   1 | /*      RISC-V implementation of jones forth.
   2 | 	This repository is intended to migrate the jonesforth compiler and tutorial to RISC-V ISA.
   3 | 
   4 | 	The assembler is rewritten into RISC-V 64 by JJy <jjyruby@gmail.com> https://justjjy.com,
   5 | 	and all the instructions are replaced with RISC-V,
   6 | 	ISA unrelated parts of the tutorial are kept untouched.
   7 | 
   8 | 	So you can use this tutorial just like the original one but for RISC-V ISA.
   9 | 
  10 | 	All the additional work is released under the same PUBLIC DOMAIN
  11 | 
  12 | 	The original file header:
  13 |  */
  14 | 
  15 | /*	A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
  16 | 	By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
  17 | 	This is PUBLIC DOMAIN (see public domain release statement below).
  18 | 	$Id: jonesforth.S,v 1.47 2009-09-11 08:33:13 rich Exp $
  19 | 
  20 | 	riscv64-unknown-elf-gcc -I /usr/include/ -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
  21 | */
  22 | 	.set JONES_VERSION,47
  23 | /*
  24 | 	INTRODUCTION ----------------------------------------------------------------------
  25 | 
  26 | 	FORTH is one of those alien languages which most working programmers regard in the same
  27 | 	way as Haskell, LISP, and so on.  Something so strange that they'd rather any thoughts
  28 | 	of it just go away so they can get on with writing this paying code.  But that's wrong
  29 | 	and if you care at all about programming then you should at least understand all these
  30 | 	languages, even if you will never use them.
  31 | 
  32 | 	LISP is the ultimate high-level language, and features from LISP are being added every
  33 | 	decade to the more common languages.  But FORTH is in some ways the ultimate in low level
  34 | 	programming.  Out of the box it lacks features like dynamic memory management and even
  35 | 	strings.  In fact, at its primitive level it lacks even basic concepts like IF-statements
  36 | 	and loops.
  37 | 
  38 | 	Why then would you want to learn FORTH?  There are several very good reasons.  First
  39 | 	and foremost, FORTH is minimal.  You really can write a complete FORTH in, say, 2000
  40 | 	lines of code.  I don't just mean a FORTH program, I mean a complete FORTH operating
  41 | 	system, environment and language.  You could boot such a FORTH on a bare PC and it would
  42 | 	come up with a prompt where you could start doing useful work.  The FORTH you have here
  43 | 	isn't minimal and uses a Linux process as its 'base PC' (both for the purposes of making
  44 | 	it a good tutorial). It's possible to completely understand the system.  Who can say they
  45 | 	completely understand how Linux works, or gcc?
  46 | 
  47 | 	Secondly FORTH has a peculiar bootstrapping property.  By that I mean that after writing
  48 | 	a little bit of assembly to talk to the hardware and implement a few primitives, all the
  49 | 	rest of the language and compiler is written in FORTH itself.  Remember I said before
  50 | 	that FORTH lacked IF-statements and loops?  Well of course it doesn't really because
  51 | 	such a lanuage would be useless, but my point was rather that IF-statements and loops are
  52 | 	written in FORTH itself.
  53 | 
  54 | 	Now of course this is common in other languages as well, and in those languages we call
  55 | 	them 'libraries'.  For example in C, 'printf' is a library function written in C.  But
  56 | 	in FORTH this goes way beyond mere libraries.  Can you imagine writing C's 'if' in C?
  57 | 	And that brings me to my third reason: If you can write 'if' in FORTH, then why restrict
  58 | 	yourself to the usual if/while/for/switch constructs?  You want a construct that iterates
  59 | 	over every other element in a list of numbers?  You can add it to the language.  What
  60 | 	about an operator which pulls in variables directly from a configuration file and makes
  61 | 	them available as FORTH variables?  Or how about adding Makefile-like dependencies to
  62 | 	the language?  No problem in FORTH.  How about modifying the FORTH compiler to allow
  63 | 	complex inlining strategies -- simple.  This concept isn't common in programming languages,
  64 | 	but it has a name (in fact two names): "macros" (by which I mean LISP-style macros, not
  65 | 	the lame C preprocessor) and "domain specific languages" (DSLs).
  66 | 
  67 | 	This tutorial isn't about learning FORTH as the language.  I'll point you to some references
  68 | 	you should read if you're not familiar with using FORTH.  This tutorial is about how to
  69 | 	write FORTH.  In fact, until you understand how FORTH is written, you'll have only a very
  70 | 	superficial understanding of how to use it.
  71 | 
  72 | 	So if you're not familiar with FORTH or want to refresh your memory here are some online
  73 | 	references to read:
  74 | 
  75 | 	http://en.wikipedia.org/wiki/Forth_%28programming_language%29
  76 | 
  77 | 	http://galileo.phys.virginia.edu/classes/551.jvn.fall01/primer.htm
  78 | 
  79 | 	http://wiki.laptop.org/go/Forth_Lessons
  80 | 
  81 | 	http://www.albany.net/~hello/simple.htm
  82 | 
  83 | 	Here is another "Why FORTH?" essay: http://www.jwdt.com/~paysan/why-forth.html
  84 | 
  85 | 	Discussion and criticism of this FORTH here: http://lambda-the-ultimate.org/node/2452
  86 | 
  87 | 	ACKNOWLEDGEMENTS ----------------------------------------------------------------------
  88 | 
  89 | 	This code draws heavily on the design of LINA FORTH (http://home.hccnet.nl/a.w.m.van.der.horst/lina.html)
  90 | 	by Albert van der Horst.  Any similarities in the code are probably not accidental.
  91 | 
  92 | 	Some parts of this FORTH are also based on this IOCCC entry from 1992:
  93 | 	http://ftp.funet.fi/pub/doc/IOCCC/1992/buzzard.2.design.
  94 | 	I was very proud when Sean Barrett, the original author of the IOCCC entry, commented in the LtU thread
  95 | 	http://lambda-the-ultimate.org/node/2452#comment-36818 about this FORTH.
  96 | 
  97 | 	And finally I'd like to acknowledge the (possibly forgotten?) authors of ARTIC FORTH because their
  98 | 	original program which I still have on original cassette tape kept nagging away at me all these years.
  99 | 	http://en.wikipedia.org/wiki/Artic_Software
 100 | 
 101 | 	PUBLIC DOMAIN ----------------------------------------------------------------------
 102 | 
 103 | 	I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
 104 | 
 105 | 	In case this is not legally possible, I grant any entity the right to use this work for any purpose,
 106 | 	without any conditions, unless such conditions are required by law.
 107 | 
 108 | 	SETTING UP ----------------------------------------------------------------------
 109 | 
 110 | 	Let's get a few housekeeping things out of the way.  Firstly because I need to draw lots of
 111 | 	ASCII-art diagrams to explain concepts, the best way to look at this is using a window which
 112 | 	uses a fixed width font and is at least this wide:
 113 | 
 114 |  <------------------------------------------------------------------------------------------------------------------------>
 115 | 
 116 | 	Secondly make sure TABS are set to 8 characters.  The following should be a vertical
 117 | 	line.  If not, sort out your tabs.
 118 | 
 119 | 		|
 120 | 		|
 121 | 	    	|
 122 | 
 123 | 	Thirdly I assume that your screen is at least 50 characters high.
 124 | 
 125 | 	ASSEMBLING ----------------------------------------------------------------------
 126 | 
 127 | 	If you want to actually run this FORTH, rather than just read it, you will need Linux on an
 128 | 	RISC-V.  Linux because instead of programming directly to the hardware on a bare PC which I
 129 | 	could have done, I went for a simpler tutorial by assuming that the 'hardware' is a Linux
 130 | 	process with a few basic system calls (read, write and exit and that's about all).  RISC-V
 131 | 	is needed because I had to write the assembly for a processor. 
 132 | 	(Of course when I say 'RISC-V', any 64-bit RISC-V processor or VM will do.  
 133 | 	I'm compiling this on a qemu VM).
 134 | 
 135 | 	Again, to assemble this you will need gcc and gas (the GNU assembler).  The commands to
 136 | 	assemble and run the code (save this file as 'jonesforth.S') are:
 137 | 
 138 | 	gcc -nostdlib -static -o jonesforth jonesforth.S
 139 | 	cat jonesforth.f - | ./jonesforth
 140 | 
 141 | 	If you want to run your own FORTH programs you can do:
 142 | 
 143 | 	cat jonesforth.f myprog.f | ./jonesforth
 144 | 
 145 | 	If you want to load your own FORTH code and then continue reading user commands, you can do:
 146 | 
 147 | 	cat jonesforth.f myfunctions.f - | ./jonesforth
 148 | 
 149 | 	ASSEMBLER ----------------------------------------------------------------------
 150 | 
 151 | 	(You can just skip to the next section -- you don't need to be able to read assembler to
 152 | 	follow this tutorial).
 153 | 
 154 | 	However if you do want to read the assembly code here are a few notes about gas (the GNU assembler):
 155 | 
 156 | 	(1) Register names are prefixed with 'a', `t` or 's', so a0 is the 64 bit RISC-V register.  The registers
 157 | 	    available on RISC-V are: `a0 - a7`, `t0 - t6`, `s0 - s11`.
 158 | 
 159 | 	(2) add, mv, etc. take arguments in the form RD, RS1[, RS2].  So mv a0, t0 moves t0 -> a0
 160 | 
 161 | 	(3) li instruction is used for setting a constant value to a register:
 162 | 	    li t0, 1		set value 1 to t0
 163 | 
 164 | 	(4) gas has a funky syntax for local labels, where '1f' (etc.) means label '1:' "forwards"
 165 | 	    and '1b' (etc.) means label '1:' "backwards".  Notice that these labels might be mistaken
 166 | 	    for hex numbers (eg. you might confuse 1b with $0x1b).
 167 | 
 168 | 	(5) 'beqz' is "jump if rs is zero", 'bnez' for "jump if rs is not zero", 'j' "jump without condition" etc.
 169 | 
 170 | 	(6) gas has a reasonably nice .macro syntax, and I use them a lot to make the code shorter and
 171 | 	    less repetitive.
 172 | 
 173 | 	For more help reading the assembler, do "info gas" at the Linux prompt.
 174 | 
 175 | 	Now the tutorial starts in earnest.
 176 | 
 177 | 	THE DICTIONARY ----------------------------------------------------------------------
 178 | 
 179 | 	In FORTH as you will know, functions are called "words", and just as in other languages they
 180 | 	have a name and a definition.  Here are two FORTH words:
 181 | 
 182 | 	: DOUBLE DUP + ;		\ name is "DOUBLE", definition is "DUP +"
 183 | 	: QUADRUPLE DOUBLE DOUBLE ;	\ name is "QUADRUPLE", definition is "DOUBLE DOUBLE"
 184 | 
 185 | 	Words, both built-in ones and ones which the programmer defines later, are stored in a dictionary
 186 | 	which is just a linked list of dictionary entries.
 187 | 
 188 | 	<--- DICTIONARY ENTRY (HEADER) ----------------------->
 189 | 	+------------------------+--------+---------- - - - - +----------- - - - -
 190 | 	| LINK POINTER           | LENGTH/| NAME	      | DEFINITION
 191 | 	|			 | FLAGS  |     	      |
 192 | 	+--- (8 bytes) ----------+- byte -+- n bytes  - - - - +----------- - - - -
 193 | 
 194 | 	I'll come to the definition of the word later.  For now just look at the header.  The first
 195 | 	8 bytes are the link pointer.  This points back to the previous word in the dictionary, or, for
 196 | 	the first word in the dictionary it is just a NULL pointer.  Then comes a length/flags byte.
 197 | 	The length of the word can be up to 31 characters (5 bits used) and the top three bits are used
 198 | 	for various flags which I'll come to later.  This is followed by the name itself, and in this
 199 | 	implementation the name is rounded up to a multiple of 8 bytes by padding it with zero bytes.
 200 | 	That's just to ensure that the definition starts on a 64 bit boundary.
 201 | 
 202 | 	A FORTH variable called LATEST contains a pointer to the most recently defined word, in
 203 | 	other words, the head of this linked list.
 204 | 
 205 | 	DOUBLE and QUADRUPLE might look like this:
 206 | 
 207 | 	  pointer to previous word
 208 | 	   ^
 209 | 	   |
 210 | 	+--|------+---+---+---+---+---+---+---+---+------------- - - - -
 211 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | (definition ...)
 212 | 	+---------+---+---+---+---+---+---+---+---+------------- - - - -
 213 | 	   ^       len                         padding
 214 | 	   |
 215 | 	+--|------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
 216 | 	| LINK    | 9 | Q | U | A | D | R | U | P | L | E | 0 | 0 | (definition ...)
 217 | 	+---------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
 218 | 	   ^       len                                     padding
 219 | 	   |
 220 | 	   |
 221 | 	  LATEST
 222 | 
 223 | 	You should be able to see from this how you might implement functions to find a word in
 224 | 	the dictionary (just walk along the dictionary entries starting at LATEST and matching
 225 | 	the names until you either find a match or hit the NULL pointer at the end of the dictionary);
 226 | 	and add a word to the dictionary (create a new definition, set its LINK to LATEST, and set
 227 | 	LATEST to point to the new word).  We'll see precisely these functions implemented in
 228 | 	assembly code later on.
 229 | 
 230 | 	One interesting consequence of using a linked list is that you can redefine words, and
 231 | 	a newer definition of a word overrides an older one.  This is an important concept in
 232 | 	FORTH because it means that any word (even "built-in" or "standard" words) can be
 233 | 	overridden with a new definition, either to enhance it, to make it faster or even to
 234 | 	disable it.  However because of the way that FORTH words get compiled, which you'll
 235 | 	understand below, words defined using the old definition of a word continue to use
 236 | 	the old definition.  Only words defined after the new definition use the new definition.
 237 | 
 238 | 	DIRECT THREADED CODE ----------------------------------------------------------------------
 239 | 
 240 | 	Now we'll get to the really crucial bit in understanding FORTH, so go and get a cup of tea
 241 | 	or coffee and settle down.  It's fair to say that if you don't understand this section, then you
 242 | 	won't "get" how FORTH works, and that would be a failure on my part for not explaining it well.
 243 | 	So if after reading this section a few times you don't understand it, please email me
 244 | 	(rich@annexia.org).
 245 | 
 246 | 	Let's talk first about what "threaded code" means.  Imagine a peculiar version of C where
 247 | 	you are only allowed to call functions without arguments.  (Don't worry for now that such a
 248 | 	language would be completely useless!)  So in our peculiar C, code would look like this:
 249 | 
 250 | 	f ()
 251 | 	{
 252 | 	  a ();
 253 | 	  b ();
 254 | 	  c ();
 255 | 	}
 256 | 
 257 | 	and so on.  How would a function, say 'f' above, be compiled by a standard C compiler?
 258 | 	Probably into assembly code like this.  On the right hand side I've written the actual
 259 | 	RISC-V machine code.
 260 | 
 261 | 	f:
 262 | 	  CALL a			E8 08 00 00 00
 263 | 	  CALL b			E8 1C 00 00 00
 264 | 	  CALL c			E8 2C 00 00 00
 265 | 	  ; ignore the return from the function for now
 266 | 
 267 | 	"E8" is the x86 machine code to "CALL" a function.  In the first 20 years of computing
 268 | 	memory was hideously expensive and we might have worried about the wasted space being used
 269 | 	by the repeated "E8" bytes.  We can save 20% in code size (and therefore, in expensive memory)
 270 | 	by compressing this into just:
 271 | 
 272 | 	08 00 00 00		Just the function addresses, without
 273 | 	1C 00 00 00		the CALL prefix.
 274 | 	2C 00 00 00
 275 | 
 276 | 	On a 16-bit machine like the ones which originally ran FORTH the savings are even greater - 33%.
 277 | 
 278 | 	[Historical note: If the execution model that FORTH uses looks strange from the following
 279 | 	paragraphs, then it was motivated entirely by the need to save memory on early computers.
 280 | 	This code compression isn't so important now when our machines have more memory in their L1
 281 | 	caches than those early computers had in total, but the execution model still has some
 282 | 	useful properties].
 283 | 
 284 | 	Of course this code won't run directly on the CPU any more.  Instead we need to write an
 285 | 	interpreter which takes each set of bytes and calls it.
 286 | 
 287 | 	On an RISC-V machine it turns out that we can write this interpreter rather easily, in just
 288 | 	two assembly instructions which turn into just 3 bytes of machine code.  Let's store the
 289 | 	pointer to the next word to execute in the s1 register:
 290 | 
 291 | 		08 00 00 00	<- We're executing this one now.  s1 is the _next_ one to execute.
 292 | 	  s1 -> 1C 00 00 00
 293 | 		2C 00 00 00
 294 | 
 295 | 	The all-important instructions are called `ld` and `addi`. Firstly `ld` reads the memory 
 296 | 	at s1 into the register (a0).  Secondly `addi` increments s1 by 8 bytes
 297 | 	(we are on a 64 bits machine, the pointer size is 64 bits).  
 298 | 	So after the two instructions, the situation now looks like this:
 299 | 
 300 | 		08 00 00 00	<- We're still executing this one
 301 | 		1C 00 00 00	<- a0 now contains this address (0x0000001C)
 302 | 	  s1 -> 2C 00 00 00
 303 | 
 304 | 	Now we just need to jump to the address in a0.  This is again needs two instructions:
 305 | 	firstly `ld t0, 0(a0)` to load the jump address to t0, then `jalr t0` to jump to the address.
 306 | 	And after doing the jump, the situation looks like:
 307 | 
 308 | 		08 00 00 00
 309 | 		1C 00 00 00	<- Now we're executing this subroutine.
 310 | 	  s1 -> 2C 00 00 00
 311 | 
 312 | 	To make this work, each subroutine is followed by the four instructions: 
 313 | 	'ld a0, 0(s1); addi s1, s1, 8; ld t0, 0(a0); jalr t0'
 314 | 	which make the jump to the next subroutine.
 315 | 
 316 | 	And that brings us to our first piece of actual code!  Well, it's a macro.
 317 | */
 318 | 
 319 | /* NEXT macro. */
 320 | /* use s1 pointing to the interpreter pc
 321 |  */
 322 | 	.macro NEXT
 323 | 	ld a0, 0(s1)
 324 | 	addi s1, s1, 8
 325 | 	ld t0, 0(a0)
 326 | 	jalr t0
 327 | 	.endm
 328 | 
 329 | 
 330 | /*	The macro is called NEXT.  That's a FORTH-ism.  It expands to those two instructions.
 331 | 
 332 | 	Every FORTH primitive that we write has to be ended by NEXT.  Think of it kind of like
 333 | 	a return.
 334 | 
 335 | 	The above describes what is known as direct threaded code.
 336 | 
 337 | 	To sum up: We compress our function calls down to a list of addresses and use a somewhat
 338 | 	magical macro to act as a "jump to next function in the list".  We also use one register (s1)
 339 | 	to act as a kind of instruction pointer, pointing to the next function in the list.
 340 | 
 341 | 	I'll just give you a hint of what is to come by saying that a FORTH definition such as:
 342 | 
 343 | 	: QUADRUPLE DOUBLE DOUBLE ;
 344 | 
 345 | 	actually compiles (almost, not precisely but we'll see why in a moment) to a list of
 346 | 	function addresses for DOUBLE, DOUBLE and a special function called EXIT to finish off.
 347 | 
 348 | 	At this point, REALLY EAGLE-EYED ASSEMBLY EXPERTS are saying "JONES, YOU'VE MADE A MISTAKE!".
 349 | 
 350 | 	INDIRECT THREADED CODE ----------------------------------------------------------------------
 351 | 
 352 | 	It turns out that direct threaded code is interesting but only if you want to just execute
 353 | 	a list of functions written in assembly language.  So QUADRUPLE would work only if DOUBLE
 354 | 	was an assembly language function.  In the direct threaded code, QUADRUPLE would look like:
 355 | 
 356 | 		+------------------+
 357 | 		| addr of DOUBLE  --------------------> (assembly code to do the double)
 358 | 		+------------------+                    NEXT
 359 | 	  s1 ->	| addr of DOUBLE   |
 360 | 		+------------------+
 361 | 
 362 | 	We can add an extra indirection to allow us to run both words written in assembly language
 363 | 	(primitives written for speed) and words written in FORTH themselves as lists of addresses.
 364 | 
 365 | 	The extra indirection is the reason for the `ld t0, 0(a0)`.
 366 | 
 367 | 	Let's have a look at how QUADRUPLE and DOUBLE really look in FORTH:
 368 | 
 369 | 		: QUADRUPLE DOUBLE DOUBLE ;
 370 | 
 371 | 		+------------------+
 372 | 		| codeword         |		   : DOUBLE DUP + ;
 373 | 		+------------------+
 374 | 		| addr of DOUBLE  ---------------> +------------------+
 375 | 		+------------------+               | codeword         |
 376 | 		| addr of DOUBLE   |		   +------------------+
 377 | 		+------------------+	   	   | addr of DUP   --------------> +------------------+
 378 | 		| addr of EXIT	   |		   +------------------+            | codeword      -------+
 379 | 		+------------------+	     s1 -> | addr of +     --------+	   +------------------+   |
 380 | 						   +------------------+	   |	   | assembly to    <-----+
 381 | 						   | addr of EXIT     |    |       | implement DUP    |
 382 | 						   +------------------+	   |	   |	..	      |
 383 | 									   |	   |    ..            |
 384 | 									   |	   | NEXT             |
 385 | 									   |	   +------------------+
 386 | 									   |
 387 | 									   +-----> +------------------+
 388 | 										   | codeword      -------+
 389 | 										   +------------------+   |
 390 | 										   | assembly to   <------+
 391 | 										   | implement +      |
 392 | 										   | 	..            |
 393 | 										   | 	..            |
 394 | 										   | NEXT      	      |
 395 | 										   +------------------+
 396 | 
 397 | 	This is the part where you may need an extra cup of tea/coffee/favourite caffeinated
 398 | 	beverage.  What has changed is that I've added an extra pointer to the beginning of
 399 | 	the definitions.  In FORTH this is sometimes called the "codeword".  The codeword is
 400 | 	a pointer to the interpreter to run the function.  For primitives written in
 401 | 	assembly language, the "interpreter" just points to the actual assembly code itself.
 402 | 	They don't need interpreting, they just run.
 403 | 
 404 | 	In words written in FORTH (like QUADRUPLE and DOUBLE), the codeword points to an interpreter
 405 | 	function.
 406 | 
 407 | 	I'll show you the interpreter function shortly, but let's recall our indirect
 408 | 	jump instructions.  Take the case where we're executing DOUBLE
 409 | 	as shown, and DUP has been called.  Note that s1 is pointing to the address of +
 410 | 
 411 | 	The assembly code for DUP eventually does a NEXT.  That:
 412 | 
 413 | 	(1) reads the address of + into a0		a0 points to the codeword of +
 414 | 	(2) increments s1 by 8
 415 | 	(3) jumps to the indirect t0			jumps to the address in the codeword of +,
 416 | 							ie. the assembly code to implement +
 417 | 
 418 | 		+------------------+
 419 | 		| codeword         |
 420 | 		+------------------+
 421 | 		| addr of DOUBLE  ---------------> +------------------+
 422 | 		+------------------+               | codeword         |
 423 | 		| addr of DOUBLE   |		   +------------------+
 424 | 		+------------------+	   	   | addr of DUP   --------------> +------------------+
 425 | 		| addr of EXIT	   |		   +------------------+            | codeword      -------+
 426 | 		+------------------+	   	   | addr of +     --------+	   +------------------+   |
 427 | 						   +------------------+	   |	   | assembly to    <-----+
 428 | 					     s1 -> | addr of EXIT     |    |       | implement DUP    |
 429 | 						   +------------------+	   |	   |	..	      |
 430 | 									   |	   |    ..            |
 431 | 									   |	   | NEXT             |
 432 | 									   |	   +------------------+
 433 | 									   |
 434 | 									   +-----> +------------------+
 435 | 										   | codeword      -------+
 436 | 										   +------------------+   |
 437 | 									now we're  | assembly to    <-----+
 438 | 									executing  | implement +      |
 439 | 									this	   | 	..            |
 440 | 									function   | 	..            |
 441 | 										   | NEXT      	      |
 442 | 										   +------------------+
 443 | 
 444 | 	So I hope that I've convinced you that NEXT does roughly what you'd expect.  This is
 445 | 	indirect threaded code.
 446 | 
 447 | 	I've glossed over four things.  I wonder if you can guess without reading on what they are?
 448 | 
 449 | 	.
 450 | 	.
 451 | 	.
 452 | 
 453 | 	My list of four things are: (1) What does "EXIT" do?  (2) which is related to (1) is how do
 454 | 	you call into a function, ie. how does s1 start off pointing at part of QUADRUPLE, but
 455 | 	then point at part of DOUBLE.  (3) What goes in the codeword for the words which are written
 456 | 	in FORTH?  (4) How do you compile a function which does anything except call other functions
 457 | 	ie. a function which contains a number like : DOUBLE 2 * ; ?
 458 | 
 459 | 	THE INTERPRETER AND RETURN STACK ------------------------------------------------------------
 460 | 
 461 | 	Going at these in no particular order, let's talk about issues (3) and (2), the interpreter
 462 | 	and the return stack.
 463 | 
 464 | 	Words which are defined in FORTH need a codeword which points to a little bit of code to
 465 | 	give them a "helping hand" in life.  They don't need much, but they do need what is known
 466 | 	as an "interpreter", although it doesn't really "interpret" in the same way that, say,
 467 | 	Java bytecode used to be interpreted (ie. slowly).  This interpreter just sets up a few
 468 | 	machine registers so that the word can then execute at full speed using the indirect
 469 | 	threaded model above.
 470 | 
 471 | 	One of the things that needs to happen when QUADRUPLE calls DOUBLE is that we save the old
 472 | 	s1 ("instruction pointer") and create a new one pointing to the first word in DOUBLE.
 473 | 	Because we will need to restore the old s1 at the end of DOUBLE (this is, after all, like
 474 | 	a function call), we will need a stack to store these "return addresses" (old values of s1).
 475 | 
 476 | 	As you will have seen in the background documentation, FORTH has two stacks, an ordinary
 477 | 	stack for parameters, and a return stack which is a bit more mysterious.  But our return
 478 | 	stack is just the stack I talked about in the previous paragraph, used to save s1 when
 479 | 	calling from a FORTH word into another FORTH word.
 480 | 
 481 | 	In this FORTH, we are using the normal stack pointer (sp) for the parameter stack.
 482 | 	We will use the RISC-V's "other" stack pointer (fp, usually called the "frame pointer")
 483 | 	for our return stack.
 484 | 
 485 | 	I've got two macros which just wrap up the details of using fp for the return stack.
 486 | 	You use them as for example "PUSHRSP a0" (push a0 on the return stack) or "POPRSP a1"
 487 | 	(pop top of return stack into a1).
 488 | */
 489 | 
 490 | /* Macros to deal with the return stack. */
 491 | 	.macro PUSH regs:vararg
 492 | 	PUSH_ADJ 0, \regs		// push reg on to stack
 493 | 	PUSH_REGS \regs
 494 | 	.endm
 495 | 
 496 | 	.macro PUSH_ADJ depth reg regs:vararg
 497 | 	.ifb \regs
 498 | 	addi sp,sp,\depth-8
 499 | 	.else
 500 | 	PUSH_ADJ \depth-8, \regs
 501 | 	.endif
 502 | 	.endm
 503 | 
 504 | 	.macro PUSH_REG dst off reg="" regs:vararg
 505 | 	.ifb \reg
 506 | 	sd \dst,(\off)(sp)
 507 | 	.else
 508 | 	PUSH_REG \dst, \off+8, \regs
 509 | 	.endif
 510 | 	.endm
 511 | 
 512 | 	.macro PUSH_REGS reg regs:vararg
 513 | 	.ifb \regs
 514 | 	.else
 515 | 	PUSH_REGS \regs
 516 | 	.endif
 517 | 	PUSH_REG \reg 0 \regs
 518 | 	.endm
 519 | 
 520 | 	.macro POP regs:vararg
 521 | 	POP_R 0, \regs			// pop regs off the stack
 522 | 	.endm
 523 | 
 524 | 	.macro POP_R depth reg regs:vararg
 525 | 	ld \reg,\depth(sp)
 526 | 	.ifb \regs
 527 | 	addi sp,sp,\depth+8
 528 | 	.else
 529 | 	POP_R \depth+8, \regs
 530 | 	.endif
 531 | 	.endm
 532 | 
 533 | 	.macro PUSHRSP reg
 534 | 	addi fp, fp, -8			// push  reg on to stack
 535 | 	sd \reg, 0(fp)
 536 | 	.endm
 537 | 
 538 | 	.macro POPRSP reg
 539 | 	ld \reg, 0(fp)            	// pop top of return stack to reg
 540 | 	addi fp, fp, 8
 541 | 	.endm
 542 | 
 543 | 	/* Macros to help us handling function calls. 
 544 | 	This macro saves all nessacery registers before a function call 
 545 | 	and resume these registers after the call */
 546 | 	.macro RCALL symbol
 547 | 	PUSH ra					// push ra (return address) on to stack
 548 | 	call \symbol
 549 | 	POP ra					// resume ra
 550 | 	.endm
 551 | 
 552 | 
 553 | /*
 554 | 	And with that we can now talk about the interpreter.
 555 | 
 556 | 	In FORTH the interpreter function is often called DOCOL (I think it means "DO COLON" because
 557 | 	all FORTH definitions start with a colon, as in : DOUBLE DUP + ;
 558 | 
 559 | 	The "interpreter" (it's not really "interpreting") just needs to push the old s1 on the
 560 | 	stack and set s1 to the first word in the definition.  Remember that we jumped to the
 561 | 	function using `ld t0, 0(a0); jalr t0`?  Well a consequence of that is that conveniently a0 contains
 562 | 	the address of this codeword, so just by adding 8 to it we get the address of the first
 563 | 	data word.  Finally after setting up s1, it just does NEXT which causes that first word
 564 | 	to run.
 565 | */
 566 | 
 567 | /* DOCOL - the interpreter! */
 568 | 
 569 | 	.text
 570 | 	.balign 8
 571 | DOCOL:
 572 | 	PUSHRSP s1		// push s1 on to the return stack
 573 | 	addi a0, a0, 8	 	// a0 points to codeword, so make
 574 | 	mv s1, a0		// s1 point to first data word
 575 | 	NEXT
 576 | 
 577 | /*
 578 | 	Just to make this absolutely clear, let's see how DOCOL works when jumping from QUADRUPLE
 579 | 	into DOUBLE:
 580 | 
 581 | 		QUADRUPLE:
 582 | 		+------------------+
 583 | 		| codeword         |
 584 | 		+------------------+		   DOUBLE:
 585 | 		| addr of DOUBLE  ---------------> +------------------+
 586 | 		+------------------+         a0 -> | addr of DOCOL    |
 587 | 	  s1 ->	| addr of DOUBLE   |		   +------------------+
 588 | 		+------------------+	   	   | addr of DUP      |
 589 | 		| addr of EXIT	   |		   +------------------+
 590 | 		+------------------+               | etc.             |
 591 | 
 592 | 	First, the call to DOUBLE calls DOCOL (the codeword of DOUBLE).  DOCOL does this:  It
 593 | 	pushes the old s1 on the return stack.  a0 points to the codeword of DOUBLE, so we
 594 | 	just add 8 on to it to get our new s1:
 595 | 
 596 | 		QUADRUPLE:
 597 | 		+------------------+
 598 | 		| codeword         |
 599 | 		+------------------+		   DOUBLE:
 600 | 		| addr of DOUBLE  ---------------> +------------------+
 601 | top of return	+------------------+         a0 -> | addr of DOCOL    |
 602 | stack points ->	| addr of DOUBLE   |	   + 8 =   +------------------+
 603 | 		+------------------+	     s1 -> | addr of DUP      |
 604 | 		| addr of EXIT	   |		   +------------------+
 605 | 		+------------------+               | etc.             |
 606 | 
 607 | 	Then we do NEXT, and because of the magic of threaded code that increments s1 again
 608 | 	and calls DUP.
 609 | 
 610 | 	Well, it seems to work.
 611 | 
 612 | 	One minor point here.  Because DOCOL is the first bit of assembly actually to be defined
 613 | 	in this file (the others were just macros), and because I usually compile this code with the
 614 | 	text segment starting at address 0, DOCOL has address 0.  So if you are disassembling the
 615 | 	code and see a word with a codeword of 0, you will immediately know that the word is
 616 | 	written in FORTH (it's not an assembler primitive) and so uses DOCOL as the interpreter.
 617 | 
 618 | 	STARTING UP ----------------------------------------------------------------------
 619 | 
 620 | 	Now let's get down to nuts and bolts.  When we start the program we need to set up
 621 | 	a few things like the return stack.  But as soon as we can, we want to jump into FORTH
 622 | 	code (albeit much of the "early" FORTH code will still need to be written as
 623 | 	assembly language primitives).
 624 | 
 625 | 	This is what the set up code does.  Does a tiny bit of house-keeping, sets up the
 626 | 	separate return stack (NB: Linux gives us the ordinary parameter stack already), then
 627 | 	immediately jumps to a FORTH word called QUIT.  Despite its name, QUIT doesn't quit
 628 | 	anything.  It resets some internal state and starts reading and interpreting commands.
 629 | 	(The reason it is called QUIT is because you can call QUIT from your own FORTH code
 630 | 	to "quit" your program and go back to interpreting).
 631 | */
 632 | 
 633 | /* Assembler entry point. */
 634 | 
 635 | 	.text
 636 | 	.globl _start
 637 | _start:
 638 | 	la t0, var_S0
 639 | 	sd sp, 0(t0)		// Save the initial data stack pointer in FORTH variable S0.
 640 | 	la fp, return_stack_top        // Initialise the return stack.
 641 | 	call set_up_data_segment
 642 | 
 643 | 	la s1, cold_start      	// Initialise interpreter.
 644 | 	NEXT			        // Run interpreter!
 645 | 
 646 | 	.section .rodata
 647 | 	.balign 8
 648 | cold_start:			// High-level code without a codeword.
 649 | 	.dword QUIT
 650 | 
 651 | /*
 652 | 	BUILT-IN WORDS ----------------------------------------------------------------------
 653 | 
 654 | 	Remember our dictionary entries (headers)?  Let's bring those together with the codeword
 655 | 	and data words to see how : DOUBLE DUP + ; really looks in memory.
 656 | 
 657 | 	  pointer to previous word
 658 | 	   ^
 659 | 	   |
 660 | 	+--|------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
 661 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
 662 | 	+---------+---+---+---+---+---+---+---+---+------------+--|---------+------------+------------+
 663 | 	   ^       len                         pad  codeword      |
 664 | 	   |							  V
 665 | 	  LINK in next word				points to codeword of DUP
 666 | 	
 667 | 	Initially we can't just write ": DOUBLE DUP + ;" (ie. that literal string) here because we
 668 | 	don't yet have anything to read the string, break it up at spaces, parse each word, etc. etc.
 669 | 	So instead we will have to define built-in words using the GNU assembler data constructors
 670 | 	(like .dword, .byte, .string, .ascii and so on -- look them up in the gas info page if you are
 671 | 	unsure of them).
 672 | 
 673 | 	The long way would be:
 674 | 
 675 | 	.dword <link to previous word>
 676 | 	.byte 6			// len
 677 | 	.ascii "DOUBLE"		// string
 678 | 	.byte 0			// padding
 679 | DOUBLE: .dword DOCOL		// codeword
 680 | 	.dword DUP		// pointer to codeword of DUP
 681 | 	.dword PLUS		// pointer to codeword of +
 682 | 	.dword EXIT		// pointer to codeword of EXIT
 683 | 
 684 | 	That's going to get quite tedious rather quickly, so here I define an assembler macro
 685 | 	so that I can just write:
 686 | 
 687 | 	defword "DOUBLE",6,,DOUBLE
 688 | 	.dword DUP,PLUS,EXIT
 689 | 
 690 | 	and I'll get exactly the same effect.
 691 | 
 692 | 	Don't worry too much about the exact implementation details of this macro - it's complicated!
 693 | */
 694 | 
 695 | /* Flags - these are discussed later. */
 696 | 	.set F_IMMED,0x80
 697 | 	.set F_HIDDEN,0x20
 698 | 	.set F_LENMASK,0x1f	// length mask
 699 | 
 700 | 	// Store the chain of links.
 701 | 	.macro defword name, namelen, flags=0, label, link
 702 | 	.section .rodata
 703 | 	.balign 8
 704 | 	.globl name_\label
 705 | name_\label :
 706 | 	.dword name_\link		// link
 707 | 	.byte \flags+\namelen	// flags + length byte
 708 | 	.ascii "\name"		// the name
 709 | 	.balign 8		// padding to next 8 byte boundary
 710 | 	.globl \label
 711 | \label :
 712 | 	.dword DOCOL		// codeword - the interpreter
 713 | 	// list of word pointers follow
 714 | 	.endm
 715 | 
 716 | /*
 717 | 	Similarly I want a way to write words written in assembly language.  There will quite a few
 718 | 	of these to start with because, well, everything has to start in assembly before there's
 719 | 	enough "infrastructure" to be able to start writing FORTH words, but also I want to define
 720 | 	some common FORTH words in assembly language for speed, even though I could write them in FORTH.
 721 | 
 722 | 	This is what DUP looks like in memory:
 723 | 
 724 | 	  pointer to previous word
 725 | 	   ^
 726 | 	   |
 727 | 	+--|------+---+---+---+---+------------+
 728 | 	| LINK    | 3 | D | U | P | code_DUP ---------------------> points to the assembly
 729 | 	+---------+---+---+---+---+------------+		    code used to write DUP,
 730 | 	   ^       len              codeword			    which ends with NEXT.
 731 | 	   |
 732 | 	  LINK in next word
 733 | 
 734 | 	Again, for brevity in writing the header I'm going to write an assembler macro called defcode.
 735 | 	As with defword above, don't worry about the complicated details of the macro.
 736 | */
 737 | 
 738 | 	.macro defcode name, namelen, flags=0, label, link
 739 | 	.section .rodata
 740 | 	.balign 8
 741 | 	.globl name_\label
 742 | name_\label :
 743 | 	.dword name_\link		// link
 744 | 	.byte \flags+\namelen	// flags + length byte
 745 | 	.ascii "\name"		// the name
 746 | 	.balign 8		// padding to next 8 byte boundary
 747 | 	.globl \label
 748 | \label :
 749 | 	.dword code_\label	// codeword
 750 | 	.text
 751 | 	.balign 8
 752 | 	.globl code_\label
 753 | code_\label :			// assembler code follows
 754 | 	.endm
 755 | 
 756 | /*
 757 | 	Now some easy FORTH primitives.  These are written in assembly for speed.  If you understand
 758 | 	RISC-V assembly language then it is worth reading these.  However if you don't understand assembly
 759 | 	you can skip the details.
 760 | */
 761 | 
 762 | 	.set name_NULL, 0
 763 | 
 764 | 	defcode "DROP",4,,DROP, NULL
 765 | 	POP a0		// drop top of stack
 766 | 	NEXT
 767 | 
 768 | 	defcode "SWAP",4,,SWAP, DROP
 769 | 	POP a0 a1	// swap top two elements on stack
 770 | 	PUSH a0 a1
 771 | 	NEXT
 772 | 
 773 | 	defcode "DUP",3,,DUP, SWAP
 774 | 	ld a0, 0(sp)    		// duplicate top of stack
 775 | 	PUSH a0
 776 | 	NEXT
 777 | 
 778 | 	defcode "OVER",4,,OVER, DUP
 779 | 	ld a0, 8(sp)	                // get the second element of stack
 780 | 	PUSH a0 		        // and push it on top
 781 | 	NEXT
 782 | 
 783 | 	defcode "ROT",3,,ROT, OVER
 784 | 	POP a0 a1 a2
 785 | 	PUSH a1 a0 a2
 786 | 	NEXT
 787 | 
 788 | 	defcode "-ROT",4,,NROT, ROT
 789 | 	POP a0 a1 a2
 790 | 	PUSH a0 a2 a1
 791 | 	NEXT
 792 | 
 793 | 	defcode "2DROP",5,,TWODROP, NROT // drop top two elements of stack
 794 | 	POP a0 a0
 795 | 	NEXT
 796 | 
 797 | 	defcode "2DUP",4,,TWODUP, TWODROP // duplicate top two elements of stack
 798 | 	ld a0, 0(sp)
 799 | 	ld a1, 8(sp)
 800 | 	PUSH a1 a0
 801 | 	NEXT
 802 | 
 803 | 	defcode "2SWAP",5,,TWOSWAP, TWODUP // swap top two pairs of elements of stack
 804 | 	POP a0 a1 a2 a3
 805 | 	PUSH a1 a0 a3 a2
 806 | 	NEXT
 807 | 
 808 | 	defcode "?DUP",4,,QDUP, TWOSWAP	// duplicate top of stack if non-zero
 809 | 	ld a0, 0(sp)
 810 | 	beqz a0, 1f
 811 | 	PUSH a0
 812 | 1:	NEXT
 813 | 
 814 | 	defcode "1+",2,,INCR, QDUP
 815 | 	POP a0
 816 | 	addi a0, a0, 1         // increment top of stack
 817 | 	PUSH a0
 818 | 	NEXT
 819 | 
 820 | 	defcode "1-",2,,DECR, INCR
 821 | 	POP a0
 822 | 	addi a0, a0, -1		// decrement top of stack
 823 | 	PUSH a0
 824 | 	NEXT
 825 | 
 826 | 	defcode "4+",2,,INCR4, DECR
 827 | 	POP a0
 828 | 	addi a0, a0, 4		// add 4 to top of stack
 829 | 	PUSH a0
 830 | 	NEXT
 831 | 
 832 | 	defcode "4-",2,,DECR4, INCR4
 833 | 	POP a0
 834 | 	addi a0, a0, -4 	// subtract 4 from top of stack
 835 | 	PUSH a0
 836 | 	NEXT
 837 | 
 838 | 	defcode "8+",2,,INCR8, INCR4
 839 | 	POP a0
 840 | 	addi a0, a0, 8		// add 8 to top of stack
 841 | 	PUSH a0
 842 | 	NEXT
 843 | 
 844 | 	defcode "8-",2,,DECR8, INCR8
 845 | 	POP a0
 846 | 	addi a0, a0, -8 	// subtract 8 from top of stack
 847 | 	PUSH a0
 848 | 	NEXT
 849 | 
 850 | 	defcode "+",1,,ADD, DECR8
 851 | 	POP a0 a1	// get top and second of stack
 852 | 	add a0, a0, a1	// and add the two number
 853 | 	PUSH a0         // push back the result to stack
 854 | 	NEXT
 855 | 
 856 | 	defcode "-",1,,SUB, ADD
 857 | 	POP a0 a1	// get top and second of stack
 858 | 	sub a0, a1, a0	// and subtract the two number
 859 | 	PUSH a0         // push back the result to stack
 860 | 	NEXT
 861 | 
 862 | 	defcode "*",1,,MUL, SUB
 863 | 	POP a0 a1
 864 | 	mul a0, a0, a1
 865 | 	PUSH a0		// ignore overflow
 866 | 	NEXT
 867 | 
 868 | /*
 869 | 	In this FORTH, only /MOD is primitive.  Later we will define the / and MOD words in
 870 | 	terms of the primitive /MOD.
 871 | */
 872 | 
 873 | /*
 874 | 	From the RISC-V spec [7.2]:
 875 | 	If both the quotient and remainder are required from the same division, the
 876 | 	recommended code sequence is: DIV[U] rdq, rs1, rs2; REM[U] rdr, rs1, rs2 (rdq cannot be the
 877 | 	same as rs1 or rs2). Microarchitectures can then fuse these into a single divide operation instead
 878 | 	of performing two separate divides.
 879 | */
 880 | 	defcode "/MOD",4,,DIVMOD, MUL
 881 | 	POP a0 a1
 882 | 	div a3, a1, a0
 883 | 	rem a4, a1, a0
 884 | 	PUSH a4 a3	// push a4 = remained  a3 = quotient
 885 | 	NEXT
 886 | 
 887 | /*
 888 | 	Lots of comparison operations like =, <, >, etc..
 889 | 
 890 | 	ANS FORTH says that the comparison words should return all (binary) 1's for
 891 | 	TRUE and all 0's for FALSE.  However this is a bit of a strange convention
 892 | 	so this FORTH breaks it and returns the more normal (for C programmers ...)
 893 | 	1 meaning TRUE and 0 meaning FALSE.
 894 | */
 895 | 
 896 | 	defcode "=",1,,EQU, DIVMOD	// top two words are equal?
 897 | 	POP a0 a1
 898 | 	sub a0, a0, a1
 899 | 	seqz a0, a0             // set a0 to 1 if a0 is zero
 900 | 	PUSH a0
 901 | 	NEXT
 902 | 
 903 | 	defcode "<>",2,,NEQU, EQU	// top two words are not equal?
 904 | 	POP a0 a1
 905 | 	sub a0, a0, a1
 906 | 	sltu a0, zero, a0         // set a0 to 1 if a0 is not equals to zero, otherwise set a0 to 0
 907 | 	PUSH a0
 908 | 	NEXT
 909 | 
 910 | 	defcode "<",1,,LT, NEQU
 911 | 	POP a0 a1
 912 | 	slt a0, a1, a0          // set a0 to 1 if a1 < a0, otherwise set a0 to 0
 913 | 	PUSH a0
 914 | 	NEXT
 915 | 
 916 | 	defcode ">",1,,GT, LT
 917 | 	POP a0 a1
 918 | 	slt a0, a0, a1          // set a0 to 1 if a0 < a1, otherwise set a0 to 0
 919 | 	PUSH a0
 920 | 	NEXT
 921 | 
 922 | 	defcode "<=",2,,LE, GT
 923 | 	POP a0 a1
 924 | 	slt t0, a0, a1 // if a1 <= a0, then !(a0 < a1)
 925 | 	li t1, 1
 926 | 	sub t0, t1, t0
 927 | 	PUSH t0
 928 | 	NEXT
 929 | 
 930 | 	defcode ">=",2,,GE, LE
 931 | 	POP a0 a1
 932 | 	slt t0, a1, a0          // if a1 >= a0, then !(a1 < a0)
 933 | 	li t1, 1
 934 | 	sub t0, t1, t0
 935 | 	PUSH t0
 936 | 	NEXT
 937 | 
 938 | 	defcode "0=",2,,ZEQU, GE	// top of stack equals 0?
 939 | 	POP a0
 940 | 	seqz a0, a0
 941 | 	PUSH a0
 942 | 	NEXT
 943 | 
 944 | 	defcode "0<>",3,,ZNEQU, ZEQU	// top of stack not 0?
 945 | 	POP a0
 946 | 	sltu a0, zero, a0
 947 | 	PUSH a0
 948 | 	NEXT
 949 | 
 950 | 	defcode "0<",2,,ZLT, ZNEQU	// comparisons with 0
 951 | 	POP a0
 952 | 	slt a0, a0, zero
 953 | 	PUSH a0
 954 | 	NEXT
 955 | 
 956 | 	defcode "0>",2,,ZGT, ZLT
 957 | 	POP a0
 958 | 	slt a0, zero, a0
 959 | 	PUSH a0
 960 | 	NEXT
 961 | 
 962 | 	defcode "0<=",3,,ZLE, ZGT
 963 | 	POP a0
 964 | 	slt t0, zero, a0
 965 | 	li t1, 1
 966 | 	sub t0, t1, t0
 967 | 	PUSH t0
 968 | 	NEXT
 969 | 
 970 | 	defcode "0>=",3,,ZGE, ZLE
 971 | 	POP a0
 972 | 	slt t0, a0, zero
 973 | 	li t1, 1
 974 | 	sub t0, t1, t0
 975 | 	PUSH t0
 976 | 	NEXT
 977 | 
 978 | 	defcode "AND",3,,AND, ZGE	// bitwise AND
 979 | 	POP a0 a1
 980 | 	and a0, a0, a1
 981 | 	PUSH a0
 982 | 	NEXT
 983 | 
 984 | 	defcode "OR",2,,OR, AND	// bitwise OR
 985 | 	POP a0 a1
 986 | 	or a0, a0, a1
 987 | 	PUSH a0
 988 | 	NEXT
 989 | 
 990 | 	defcode "XOR",3,,XOR, OR	// bitwise XOR
 991 | 	POP a0 a1
 992 | 	xor a0, a0, a1
 993 | 	PUSH a0
 994 | 	NEXT
 995 | 
 996 | 	defcode "INVERT",6,,INVERT, XOR // this is the FORTH bitwise "NOT" function (cf. NEGATE and NOT)
 997 | 	POP a0
 998 | 	not a0, a0
 999 | 	PUSH a0
1000 | 	NEXT
1001 | 
1002 | /*
1003 | 	RETURNING FROM FORTH WORDS ----------------------------------------------------------------------
1004 | 
1005 | 	Time to talk about what happens when we EXIT a function.  In this diagram QUADRUPLE has called
1006 | 	DOUBLE, and DOUBLE is about to exit (look at where s1 is pointing):
1007 | 
1008 | 		QUADRUPLE
1009 | 		+------------------+
1010 | 		| codeword         |
1011 | 		+------------------+		   DOUBLE
1012 | 		| addr of DOUBLE  ---------------> +------------------+
1013 | 		+------------------+               | codeword         |
1014 | 		| addr of DOUBLE   |		   +------------------+
1015 | 		+------------------+	   	   | addr of DUP      |
1016 | 		| addr of EXIT	   |		   +------------------+
1017 | 		+------------------+	   	   | addr of +        |
1018 | 						   +------------------+
1019 | 					     s1 -> | addr of EXIT     |
1020 | 						   +------------------+
1021 | 
1022 | 	What happens when the + function does NEXT?  Well, the following code is executed.
1023 | */
1024 | 
1025 | 	defcode "EXIT",4,,EXIT, INVERT
1026 | 	POPRSP s1		// pop return stack into s1
1027 | 	NEXT
1028 | 
1029 | /*
1030 | 	EXIT gets the old s1 which we saved from before on the return stack, and puts it in s1.
1031 | 	So after this (but just before NEXT) we get:
1032 | 
1033 | 		QUADRUPLE
1034 | 		+------------------+
1035 | 		| codeword         |
1036 | 		+------------------+		   DOUBLE
1037 | 		| addr of DOUBLE  ---------------> +------------------+
1038 | 		+------------------+               | codeword         |
1039 | 	  s1 ->	| addr of DOUBLE   |		   +------------------+
1040 | 		+------------------+	   	   | addr of DUP      |
1041 | 		| addr of EXIT	   |		   +------------------+
1042 | 		+------------------+	   	   | addr of +        |
1043 | 						   +------------------+
1044 | 						   | addr of EXIT     |
1045 | 						   +------------------+
1046 | 
1047 | 	And NEXT just completes the job by, well, in this case just by calling DOUBLE again :-)
1048 | 
1049 | 	LITERALS ----------------------------------------------------------------------
1050 | 
1051 | 	The final point I "glossed over" before was how to deal with functions that do anything
1052 | 	apart from calling other functions.  For example, suppose that DOUBLE was defined like this:
1053 | 
1054 | 	: DOUBLE 2 * ;
1055 | 
1056 | 	It does the same thing, but how do we compile it since it contains the literal 2?  One way
1057 | 	would be to have a function called "2" (which you'd have to write in assembler), but you'd need
1058 | 	a function for every single literal that you wanted to use.
1059 | 
1060 | 	FORTH solves this by compiling the function using a special word called LIT:
1061 | 
1062 | 	+---------------------------+-------+-------+-------+-------+-------+
1063 | 	| (usual header of DOUBLE)  | DOCOL | LIT   | 2     | *     | EXIT  |
1064 | 	+---------------------------+-------+-------+-------+-------+-------+
1065 | 
1066 | 	LIT is executed in the normal way, but what it does next is definitely not normal.  It
1067 | 	looks at s1 (which now points to the number 2), grabs it, pushes it on the stack, then
1068 | 	manipulates s1 in order to skip the number as if it had never been there.
1069 | 
1070 | 	What's neat is that the whole grab/manipulate can be done using two RISC-V instructions.
1071 | 	Rather than me drawing more ASCII-art diagrams, see if you can find out how LIT works:
1072 | */
1073 | 
1074 | 	defcode "LIT",3,,LIT, EXIT
1075 | 	// s1 points to the next command, but in this case it points to the next
1076 | 	// literal 32 bit integer.  Get that literal into a0 and increment s1.
1077 | 	ld a0, 0(s1)
1078 | 	PUSH a0			// push the literal number on to stack
1079 | 	addi s1, s1, 8		// skip next command
1080 | 	NEXT
1081 | 
1082 | /*
1083 | 	MEMORY ----------------------------------------------------------------------
1084 | 
1085 | 	As important point about FORTH is that it gives you direct access to the lowest levels
1086 | 	of the machine.  Manipulating memory directly is done frequently in FORTH, and these are
1087 | 	the primitive words for doing it.
1088 | */
1089 | 
1090 | 	defcode "!",1,,STORE, LIT
1091 | 	POP a0 a1		// a0 = address to store at  a1 = data to store there
1092 | 	sd a1, 0(a0)	// store it
1093 | 	NEXT
1094 | 
1095 | 	defcode "@",1,,FETCH, STORE
1096 | 	POP a0			// address to fetch
1097 | 	ld a1, 0(a0)	// fetch it
1098 | 	PUSH a1			// push value onto stack
1099 | 	NEXT
1100 | 
1101 | 	defcode "+!",2,,ADDSTORE, FETCH
1102 | 	POP a0 a1		// a0 = address  a1 = the amount to add
1103 | 	ld a2, 0(a0)
1104 | 	add a3, a1, a2	// add it
1105 | 	sd a3, 0(a0)
1106 | 	NEXT
1107 | 
1108 | 	defcode "-!",2,,SUBSTORE, ADDSTORE
1109 | 	POP a0 a1	// a0 = address  a1 = the amount to subtract
1110 | 	ld a2, 0(a0)
1111 | 	sub a3, a2, a1
1112 | 	sd a3, 0(a0)
1113 | 	NEXT
1114 | 
1115 | /*
1116 | 	! and @ (STORE and FETCH) store 32-bit words.  It's also useful to be able to read and write bytes
1117 | 	so we also define standard words C@ and C!.
1118 | 
1119 | 	Byte-oriented operations only work on architectures which permit them.
1120 |  */
1121 | 
1122 | 	defcode "C!",2,,STOREBYTE, SUBSTORE
1123 | 	POP a0 a1		// a0 = address to store at  a1 = data to store there
1124 | 	sb a1, 0(a0)	// store it
1125 | 	NEXT
1126 | 
1127 | 	defcode "C@",2,,FETCHBYTE, STOREBYTE
1128 | 	POP a0			// address to fetch
1129 | 	lb a1, 0(a0)		// fetch it
1130 | 	PUSH a1			// push value onto stack
1131 | 	NEXT
1132 | 
1133 | /* C@C! is a useful byte copy primitive. */
1134 | 	defcode "C@C!",4,,CCOPY, FETCHBYTE
1135 | 	POP a0 a1		// a0 = destination address  a1 = source address
1136 | 	lb a2, 0(a1)		// get source character
1137 | 	sb a2, 0(a0)		// copy to destination
1138 | 	addi a1, a1, 8
1139 | 	PUSH a0 a1		// increment destination and source address
1140 | 	NEXT
1141 | 
1142 | /* and CMOVE is a block copy operation. */
1143 | 	defcode "CMOVE",5,,CMOVE,CCOPY
1144 | 	POP a0 a1 a2		// a0 = length  a1 = destination address  a2 = source address
1145 | 	RCALL _COPY_BYTES
1146 | 	NEXT
1147 | 
1148 | /* Copy bytes	
1149 | 	a0: length	
1150 | 	a1: destination address	
1151 | 	a2: source address
1152 | */
1153 | _COPY_BYTES:
1154 | 	slti a4, a0, 8		// jump to copy byte if length is < 8
1155 | 	bnez a4, 2f
1156 | 1:	// copy word by word
1157 | 	ld a3, 0(a2)
1158 | 	sd a3, 0(a1)		// copy source to destination
1159 | 	addi a0, a0, -8		// update length, destination, source
1160 | 	beqz a0, 3f
1161 | 	addi a1, a1, 8
1162 | 	addi a2, a2, 8
1163 | 	slti a4, a0, 8		// check if length is < 8
1164 | 	beqz a4, 1b
1165 | 2:	// copy byte
1166 | 	lb a3, 0(a2)
1167 | 	sb a3, 0(a1)
1168 | 	addi a0, a0, -1
1169 | 	addi a1, a1, 1
1170 | 	addi a2, a2, 1
1171 | 	bnez a0, 2b
1172 | 3:
1173 | 	ret
1174 | 
1175 | /*
1176 | 	BUILT-IN VARIABLES ----------------------------------------------------------------------
1177 | 
1178 | 	These are some built-in variables and related standard FORTH words.  Of these, the only one that we
1179 | 	have discussed so far was LATEST, which points to the last (most recently defined) word in the
1180 | 	FORTH dictionary.  LATEST is also a FORTH word which pushes the address of LATEST (the variable)
1181 | 	on to the stack, so you can read or write it using @ and ! operators.  For example, to print
1182 | 	the current value of LATEST (and this can apply to any FORTH variable) you would do:
1183 | 
1184 | 	LATEST @ . CR
1185 | 
1186 | 	To make defining variables shorter, I'm using a macro called defvar, similar to defword and
1187 | 	defcode above.  (In fact the defvar macro uses defcode to do the dictionary header).
1188 | */
1189 | 
1190 | 	.macro defvar name, namelen, flags=0, label, initial=0,link
1191 | 	defcode \name,\namelen,\flags,\label,\link
1192 | 	la t0, var_\name
1193 | 	PUSH t0
1194 | 	NEXT
1195 | 	.data
1196 | 	.balign 8
1197 | var_\name :
1198 | 	.dword \initial
1199 | 	.endm
1200 | 
1201 | /*
1202 | 	The built-in variables are:
1203 | 
1204 | 	STATE		Is the interpreter executing code (0) or compiling a word (non-zero)?
1205 | 	LATEST		Points to the latest (most recently defined) word in the dictionary.
1206 | 	HERE		Points to the next free byte of memory.  When compiling, compiled words go here.
1207 | 	S0		Stores the address of the top of the parameter stack.
1208 | 	BASE		The current base for printing and reading numbers.
1209 | 
1210 | */
1211 | 	defvar "STATE",5,,STATE,,CMOVE
1212 | 	defvar "HERE",4,,HERE,,STATE
1213 | 	defvar "LATEST",6,,LATEST,name_SYSCALL0,HERE // SYSCALL0 must be last in built-in dictionary
1214 | 	defvar "S0",2,,SZ,,LATEST
1215 | 	defvar "BASE",4,,BASE,10,SZ
1216 | 
1217 | /*
1218 | 	BUILT-IN CONSTANTS ----------------------------------------------------------------------
1219 | 
1220 | 	It's also useful to expose a few constants to FORTH.  When the word is executed it pushes a
1221 | 	constant value on the stack.
1222 | 
1223 | 	The built-in constants are:
1224 | 
1225 | 	VERSION		Is the current version of this FORTH.
1226 | 	R0		The address of the top of the return stack.
1227 | 	DOCOL		Pointer to DOCOL.
1228 | 	F_IMMED		The IMMEDIATE flag's actual value.
1229 | 	F_HIDDEN	The HIDDEN flag's actual value.
1230 | 	F_LENMASK	The length mask in the flags/len byte.
1231 | 
1232 | 	SYS_*		and the numeric codes of various Linux syscalls (from <asm/unistd.h>)
1233 | */
1234 | 
1235 | #include <asm/unistd.h>	// you might need this instead
1236 | 
1237 | 	.macro defconst name, namelen, flags=0, label, value, link
1238 | 	defcode \name,\namelen,\flags,\label,\link
1239 | 	li t0, \value
1240 | 	PUSH t0
1241 | 	NEXT
1242 | 	.endm
1243 | 
1244 | 	// same as defconst but the value is a symbol
1245 | 	.macro defconstsym name, namelen, flags=0, label, value, link
1246 | 	defcode \name,\namelen,\flags,\label,\link
1247 | 	la t0, \value
1248 | 	PUSH t0
1249 | 	NEXT
1250 | 	.endm
1251 | 
1252 | 	defconst "VERSION",7,,VERSION,JONES_VERSION,BASE
1253 | 	defconstsym "R0",2,,RZ,return_stack_top,VERSION
1254 | 	defconstsym "DOCOL",5,,__DOCOL,DOCOL,RZ
1255 | 	defconst "F_IMMED",7,,__F_IMMED,F_IMMED,__DOCOL
1256 | 	defconst "F_HIDDEN",8,,__F_HIDDEN,F_HIDDEN,__F_IMMED
1257 | 	defconst "F_LENMASK",9,,__F_LENMASK,F_LENMASK,__F_HIDDEN
1258 | 
1259 | 	defconst "SYS_EXIT",8,,SYS_EXIT,__NR_exit,__F_LENMASK
1260 | 	defconst "SYS_OPEN",8,,SYS_OPEN,__NR_openat,SYS_EXIT
1261 | 	defconst "SYS_CLOSE",9,,SYS_CLOSE,__NR_close,SYS_OPEN
1262 | 	defconst "SYS_READ",8,,SYS_READ,__NR_read,SYS_CLOSE
1263 | 	defconst "SYS_WRITE",9,,SYS_WRITE,__NR_write,SYS_READ
1264 | 	defconst "SYS_CREAT",9,,SYS_CREAT,__NR_openat,SYS_WRITE
1265 | 	defconst "SYS_BRK",7,,SYS_BRK,__NR_brk,SYS_CREAT
1266 | 
1267 | 	defconst "O_RDONLY",8,,__O_RDONLY,0,SYS_BRK
1268 | 	defconst "O_WRONLY",8,,__O_WRONLY,1,__O_RDONLY
1269 | 	defconst "O_RDWR",6,,__O_RDWR,2,__O_WRONLY
1270 | 	defconst "O_CREAT",7,,__O_CREAT,0100,__O_RDWR
1271 | 	defconst "O_EXCL",6,,__O_EXCL,0200,__O_CREAT
1272 | 	defconst "O_TRUNC",7,,__O_TRUNC,01000,__O_EXCL
1273 | 	defconst "O_APPEND",8,,__O_APPEND,02000,__O_TRUNC
1274 | 	defconst "O_NONBLOCK",10,,__O_NONBLOCK,04000,__O_APPEND
1275 | 
1276 | /*
1277 | 	RETURN STACK ----------------------------------------------------------------------
1278 | 
1279 | 	These words allow you to access the return stack.  Recall that the register fp always points to
1280 | 	the top of the return stack.
1281 | */
1282 | 
1283 | 	defcode ">R",2,,TOR, __O_NONBLOCK
1284 | 	POP a0			// pop parameter stack into a0
1285 | 	PUSHRSP a0		// push it on to the return stack
1286 | 	NEXT
1287 | 
1288 | 	defcode "R>",2,,FROMR, TOR
1289 | 	POPRSP a0		// pop return stack on to a0
1290 | 	PUSH a0			// and push on to parameter stack
1291 | 	NEXT
1292 | 
1293 | 	defcode "RSP@",4,,RSPFETCH, FROMR
1294 | 	PUSH fp
1295 | 	NEXT
1296 | 
1297 | 	defcode "RSP!",4,,RSPSTORE, RSPFETCH
1298 | 	POP fp
1299 | 	NEXT
1300 | 
1301 | 	defcode "RDROP",5,,RDROP, RSPSTORE
1302 | 	addi fp, fp, 8		// pop return stack and throw away
1303 | 	NEXT
1304 | 
1305 | /*
1306 | 	PARAMETER (DATA) STACK ----------------------------------------------------------------------
1307 | 
1308 | 	These functions allow you to manipulate the parameter stack.  Recall that Linux sets up the parameter
1309 | 	stack for us, and it is accessed through sp.
1310 | */
1311 | 
1312 | 	defcode "DSP@",4,,DSPFETCH, RDROP
1313 | 	mv a0, sp
1314 | 	PUSH a0
1315 | 	NEXT
1316 | 
1317 | 	defcode "DSP!",4,,DSPSTORE, DSPFETCH
1318 | 	POP a0
1319 | 	mv sp, a0
1320 | 	NEXT
1321 | 
1322 | /*
1323 | 	INPUT AND OUTPUT ----------------------------------------------------------------------
1324 | 
1325 | 	These are our first really meaty/complicated FORTH primitives.  I have chosen to write them in
1326 | 	assembler, but surprisingly in "real" FORTH implementations these are often written in terms
1327 | 	of more fundamental FORTH primitives.  I chose to avoid that because I think that just obscures
1328 | 	the implementation.  After all, you may not understand assembler but you can just think of it
1329 | 	as an opaque block of code that does what it says.
1330 | 
1331 | 	Let's discuss input first.
1332 | 
1333 | 	The FORTH word KEY reads the next byte from stdin (and pushes it on the parameter stack).
1334 | 	So if KEY is called and someone hits the space key, then the number 32 (ASCII code of space)
1335 | 	is pushed on the stack.
1336 | 
1337 | 	In FORTH there is no distinction between reading code and reading input.  We might be reading
1338 | 	and compiling code, we might be reading words to execute, we might be asking for the user
1339 | 	to type their name -- ultimately it all comes in through KEY.
1340 | 
1341 | 	The implementation of KEY uses an input buffer of a certain size (defined at the end of this
1342 | 	file).  It calls the Linux read(2) system call to fill this buffer and tracks its position
1343 | 	in the buffer using a couple of variables, and if it runs out of input buffer then it refills
1344 | 	it automatically.  The other thing that KEY does is if it detects that stdin has closed, it
1345 | 	exits the program, which is why when you hit ^D the FORTH system cleanly exits.
1346 | 
1347 |      buffer			      bufftop
1348 | 	|				 |
1349 | 	V				 V
1350 | 	+-------------------------------+--------------------------------------+
1351 | 	| INPUT READ FROM STDIN ....... | unused part of the buffer            |
1352 | 	+-------------------------------+--------------------------------------+
1353 | 			  ^
1354 | 			  |
1355 | 		       currkey (next character to read)
1356 | 
1357 | 	<---------------------- BUFFER_SIZE (4096 bytes) ---------------------->
1358 | */
1359 | 
1360 | 	.set BUFFER_SIZE, 4096
1361 | 
1362 | 	defcode "KEY",3,,KEY, DSPSTORE
1363 | 	RCALL _KEY
1364 | 	PUSH a0				// push return value on stack
1365 | 	NEXT
1366 | _KEY:
1367 | 	la t1, currkey
1368 | 	ld a1, 0(t1)
1369 | 	la t0, bufftop
1370 | 	ld t0, 0(t0)
1371 | 	sltu a2, a1, t0
1372 | 	beqz a2, 1f			// exhausted the input buffer?
1373 | 	lb a0, 0(a1)		// get next key from input buffer
1374 | 	addi a3, a1, 1
1375 | 	sd a3, 0(t1)		// increment currkey
1376 | 	ret
1377 | 
1378 | 1:	// Out of input; use read(2) to fetch more input from stdin.
1379 | 	mv a0, zero			// 1st param: stdin
1380 | 	la a1, buffer		// 2nd param: buffer
1381 | 	la t0, currkey
1382 | 	sd a1, 0(t0)
1383 | 	li a2, BUFFER_SIZE	// 3rd param: max length
1384 | 	li a7, __NR_read	// syscall: read
1385 | 	ecall
1386 | 	slt t0, zero, a0	// If a0 <= 0, then exit.
1387 | 	beqz t0, 2f
1388 | 	add a0, a0, a1		// buffer+a0 = bufftop
1389 | 	la t0, bufftop
1390 | 	sd a0, 0(t0)
1391 | 	j _KEY
1392 | 
1393 | 2:	// Error or end of input: exit the program.
1394 | 	li a7, __NR_exit	// syscall: exit
1395 | 	ecall
1396 | 
1397 | 	.data
1398 | 	.balign 8
1399 | currkey:
1400 | 	.dword buffer		// Current place in input buffer (next character to read).
1401 | bufftop:
1402 | 	.dword buffer		// Last valid data in input buffer + 1.
1403 | 
1404 | /*
1405 | 	By contrast, output is much simpler.  The FORTH word EMIT writes out a single byte to stdout.
1406 | 	This implementation just uses the write system call.  No attempt is made to buffer output, but
1407 | 	it would be a good exercise to add it.
1408 | */
1409 | 
1410 | 	defcode "EMIT",4,,EMIT, KEY
1411 | 	POP a0
1412 | 	RCALL _EMIT
1413 | 	NEXT
1414 | _EMIT:
1415 | 	// write needs the address of the byte to write
1416 | 	la a1, emit_scratch     // 2nd param: address
1417 | 	sb a0, 0(a1)
1418 | 
1419 | 	li a0, 1		// 1st param: stdout
1420 | 
1421 | 	li a2, 1 		// 3rd param: nbytes = 1
1422 | 
1423 | 	li a7, __NR_write	// write syscall
1424 | 	ecall
1425 | 	ret
1426 | 
1427 | 	.data			// NB: easier to fit in the .data section
1428 | emit_scratch:
1429 | 	.space 1		// scratch used by EMIT
1430 | 
1431 | /*
1432 | 	Back to input, WORD is a FORTH word which reads the next full word of input.
1433 | 
1434 | 	What it does in detail is that it first skips any blanks (spaces, tabs, newlines and so on).
1435 | 	Then it calls KEY to read characters into an internal buffer until it hits a blank.  Then it
1436 | 	calculates the length of the word it read and returns the address and the length as
1437 | 	two words on the stack (with the length at the top of stack).
1438 | 
1439 | 	Notice that WORD has a single internal buffer which it overwrites each time (rather like
1440 | 	a static C string).  Also notice that WORD's internal buffer is just 32 bytes long and
1441 | 	there is NO checking for overflow.  31 bytes happens to be the maximum length of a
1442 | 	FORTH word that we support, and that is what WORD is used for: to read FORTH words when
1443 | 	we are compiling and executing code.  The returned strings are not NUL-terminated.
1444 | 
1445 | 	Start address+length is the normal way to represent strings in FORTH (not ending in an
1446 | 	ASCII NUL character as in C), and so FORTH strings can contain any character including NULs
1447 | 	and can be any length.
1448 | 
1449 | 	WORD is not suitable for just reading strings (eg. user input) because of all the above
1450 | 	peculiarities and limitations.
1451 | 
1452 | 	Note that when executing, you'll see:
1453 | 	WORD FOO
1454 | 	which puts "FOO" and length 3 on the stack, but when compiling:
1455 | 	: BAR WORD FOO ;
1456 | 	is an error (or at least it doesn't do what you might expect).  Later we'll talk about compiling
1457 | 	and immediate mode, and you'll understand why.
1458 | */
1459 | 
1460 | 
1461 | 	/* Macro to help us check a character is or is'nt blank. 
1462 | 	Set 0 to the result register if the src register is a blank char */
1463 | 	.macro IS_BLANK result, src
1464 | 	li t0, ' '
1465 | 	slt \result, t0, \src		// is src reg >= ' '?
1466 | 	.endm
1467 | 
1468 | 	defcode "WORD",4,,WORD, EMIT
1469 | 	RCALL _WORD
1470 | 	PUSH a0 a1	// push base address and length
1471 | 	NEXT
1472 | 
1473 | _WORD:
1474 | 	/* Search for first non-blank character.  Also skip \ comments. */
1475 | 1:
1476 | 	RCALL _KEY		// get next key, returned in a0
1477 | 	addi a1, a0, -'\\'	// start of a comment?
1478 | 	beqz a1, 4f		// if so, skip the comment
1479 | 	IS_BLANK a1, a0
1480 | 	beqz a1, 1b		// if so, keep looking
1481 | 
1482 | 	/* Search for the end of the word, storing chars as we go. */
1483 | 	la a2, word_buffer	// pointer to return buffer
1484 | 2:
1485 | 	sb a0, 0(a2)		// add character to return buffer
1486 | 	addi a2, a2, 1
1487 | 	PUSH a2
1488 | 	RCALL _KEY		// get next key, returned in a0
1489 | 	POP a2
1490 | 	IS_BLANK a1, a0	// is blank?
1491 | 	beqz a1, 3f		// if not, keep looping
1492 | 	j 2b
1493 | 3:
1494 | 	/* Return the word (well, the static buffer) and length. */
1495 | 	la a0, word_buffer	// return address of the word
1496 | 	sub a1, a2, a0		// return length of the word
1497 | 	ret
1498 | 
1499 | 	/* Code to skip \ comments to end of the current line. */
1500 | 4:
1501 | 	RCALL _KEY
1502 | 	addi a1, a0, -'\n'	// end of line yet?
1503 | 	bnez a1, 4b
1504 | 	j 1b
1505 | 
1506 | 	.data			// NB: easier to fit in the .data section
1507 | 	.balign 8
1508 | 	// A static buffer where WORD returns.  Subsequent calls
1509 | 	// overwrite this buffer.  Maximum word length is 32 chars.
1510 | word_buffer:
1511 | 	.space 32
1512 | 
1513 | /*
1514 | 	As well as reading in words we'll need to read in numbers and for that we are using a function
1515 | 	called NUMBER.  This parses a numeric string such as one returned by WORD and pushes the
1516 | 	number on the parameter stack.
1517 | 
1518 | 	The function uses the variable BASE as the base (radix) for conversion, so for example if
1519 | 	BASE is 2 then we expect a binary number.  Normally BASE is 10.
1520 | 
1521 | 	If the word starts with a '-' character then the returned value is negative.
1522 | 
1523 | 	If the string can't be parsed as a number (or contains characters outside the current BASE)
1524 | 	then we need to return an error indication.  So NUMBER actually returns two items on the stack.
1525 | 	At the top of stack we return the number of unconverted characters (ie. if 0 then all characters
1526 | 	were converted, so there is no error).  Second from top of stack is the parsed number or a
1527 | 	partial value if there was an error.
1528 | */
1529 | 	defcode "NUMBER",6,,NUMBER, WORD
1530 | 	POP a1 a2	// a1 = length of string  a2 = start address of string
1531 | 	RCALL _NUMBER
1532 | 	PUSH a0 a1	// push parsed number and number of unparsed characters (0 = no error)
1533 | 	NEXT
1534 | 
1535 | _NUMBER:
1536 | 	mv a0, zero
1537 | 	beqz a1, 5f		// trying to parse a zero-length string is an error, but will return 0.
1538 | 
1539 | 	la a3, var_BASE
1540 | 	ld a3, 0(a3)		// get BASE
1541 | 
1542 | 	// Check if first character is '-'.
1543 | 	lb a4, 0(a2)		// a4 = first character in string
1544 | 	addi a2, a2, 1
1545 | 	addi a5, a4, -'-'	// negative number?
1546 | 	bnez a5, 2f		// number is negative if a5 = 0
1547 | 	addi a1, a1, -1
1548 | 	bnez a1, 1f
1549 | 	li a1, 1		// error: string is only '-'.
1550 | 	ret
1551 | 
1552 | 	// Loop reading digits.
1553 | 1:	mul a0, a0, a3		// a0 *= BASE
1554 | 	lb a4, 0(a2)		// a4 = next character in string
1555 | 	addi a2, a2, 1
1556 | 
1557 | 	// Convert 0-9, A-Z to a number 0-35.
1558 | 2:	sltiu t0, a4, '0'	// < '0'?
1559 | 	bnez t0, 4f
1560 | 	addi a4, a4, -'0'
1561 | 	sltiu t0, a4, 10	// <= '9'?
1562 | 	bnez t0, 3f
1563 | 	sltiu t0, a4, 17	// < 'A'? (17 is 'A' - '0')
1564 | 	bnez t0, 4f
1565 | 	addi a4, a4, -7	// Char - 'A' + 10
1566 | 
1567 | 3:	slt t0, a4, a3		// >= BASE?
1568 | 	beqz t0, 4f
1569 | 
1570 | 	// OK, so add it to a0 and loop.
1571 | 	add a0, a0, a4
1572 | 	addi a1, a1, -1
1573 | 	bnez a1, 1b
1574 | 
1575 | 4:	// Negate the result if first character was '-' (a5 = 0?).
1576 | 	bnez a5, 5f
1577 | 	neg a0, a0
1578 | 
1579 | 5:	ret
1580 | 
1581 | /*
1582 | 	DICTIONARY LOOK UPS ----------------------------------------------------------------------
1583 | 
1584 | 	We're building up to our prelude on how FORTH code is compiled, but first we need yet more infrastructure.
1585 | 
1586 | 	The FORTH word FIND takes a string (a word as parsed by WORD -- see above) and looks it up in the
1587 | 	dictionary.  What it actually returns is the address of the dictionary header, if it finds it,
1588 | 	or 0 if it didn't.
1589 | 
1590 | 	So if DOUBLE is defined in the dictionary, then WORD DOUBLE FIND returns the following pointer:
1591 | 
1592 |     pointer to this
1593 | 	|
1594 | 	|
1595 | 	V
1596 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1597 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
1598 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1599 | 
1600 | 	See also >CFA and >DFA.
1601 | 
1602 | 	FIND doesn't find dictionary entries which are flagged as HIDDEN.  See below for why.
1603 | */
1604 | 
1605 | 	defcode "FIND",4,,FIND, NUMBER
1606 | 	POP a1 a2		// a1 = length  a2 = address
1607 | 	RCALL _FIND
1608 | 	PUSH a0			// a0 = address of dictionary entry (or NULL)
1609 | 	NEXT
1610 | 
1611 | _FIND:
1612 | 	// Now we start searching backwards through the dictionary for this word.
1613 | 	la a0, var_LATEST	// LATEST points to name header of the latest word in the dictionary
1614 | 	ld a0, 0(a0)
1615 | 1:	beqz a0, 4f		// NULL pointer?  (end of the linked list)
1616 | 
1617 | 	// Compare the length expected and the length of the word.
1618 | 	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
1619 | 	// this won't pick the word (the length will appear to be wrong).
1620 | 	lb a3, 8(a0)		// a0 = flags+length field
1621 | 	andi a3, a3, (F_HIDDEN|F_LENMASK) // a0 = name length
1622 | 	sub t0, a3, a1		// Length is the same?
1623 | 	bnez t0, 3f
1624 | 
1625 | 	// Compare the strings in detail.
1626 | 	mv t0, a0
1627 | 	mv t2, a2
1628 | 2:	lb t1, 9(t0) 		// Dictionary string we are checking against.
1629 | 	lb t3, 0(t2)
1630 | 	sub t1, t1, t3		// Compare the strings.
1631 | 	bnez t1, 3f		// Not the same.
1632 | 	addi t0, t0, 1
1633 | 	addi t2, t2, 1
1634 | 	addi a3, a3, -1
1635 | 	bnez a3, 2b
1636 | 
1637 | 	// The strings are the same - return the header pointer in a0
1638 | 	ret
1639 | 
1640 | 3:	ld a0, 0(a0)		// Move back through the link field to the previous word
1641 | 	j 1b			// .. and loop.
1642 | 
1643 | 4:	// Not found.
1644 | 	mv a0, zero		// Return zero to indicate not found.
1645 | 	ret
1646 | 
1647 | /*
1648 | 	FIND returns the dictionary pointer, but when compiling we need the codeword pointer (recall
1649 | 	that FORTH definitions are compiled into lists of codeword pointers).  The standard FORTH
1650 | 	word >CFA turns a dictionary pointer into a codeword pointer.
1651 | 
1652 | 	The example below shows the result of:
1653 | 
1654 | 		WORD DOUBLE FIND >CFA
1655 | 
1656 | 	FIND returns a pointer to this
1657 | 	|				>CFA converts it to a pointer to this
1658 | 	|					   |
1659 | 	V					   V
1660 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1661 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
1662 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1663 | 						   codeword
1664 | 
1665 | 	Notes:
1666 | 
1667 | 	Because names vary in length, this isn't just a simple increment.
1668 | 
1669 | 	In this FORTH you cannot easily turn a codeword pointer back into a dictionary entry pointer, but
1670 | 	that is not true in most FORTH implementations where they store a back pointer in the definition
1671 | 	(with an obvious memory/complexity cost).  The reason they do this is that it is useful to be
1672 | 	able to go backwards (codeword -> dictionary entry) in order to decompile FORTH definitions
1673 | 	quickly.
1674 | 
1675 | 	What does CFA stand for?  My best guess is "Code Field Address".
1676 | */
1677 | 
1678 | 	defcode ">CFA",4,,TCFA, FIND
1679 | 	POP a0
1680 | 	RCALL _TCFA
1681 | 	PUSH a0
1682 | 	NEXT
1683 | _TCFA:
1684 | 	lb a1, 8(a0)		// Load flags+len into a0.
1685 | 	andi a1, a1, F_LENMASK	// Just the length, not the flags.
1686 | 	addi a0, a0, 9		// Skip link pointer and flags+len byte.
1687 | 	add a0, a0, a1		// Skip the name.
1688 | 	addi a0, a0, 7		// The codeword is 8-byte aligned.
1689 | 	andi a0, a0, ~7
1690 | 	ret
1691 | 
1692 | /*
1693 | 	Related to >CFA is >DFA which takes a dictionary entry address as returned by FIND and
1694 | 	returns a pointer to the first data field.
1695 | 
1696 | 	FIND returns a pointer to this
1697 | 	|				>CFA converts it to a pointer to this
1698 | 	|					   |
1699 | 	|					   |	>DFA converts it to a pointer to this
1700 | 	|					   |		 |
1701 | 	V					   V		 V
1702 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1703 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
1704 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1705 | 						   codeword
1706 | 
1707 | 	(Note to those following the source of FIG-FORTH / ciforth: My >DFA definition is
1708 | 	different from theirs, because they have an extra indirection).
1709 | 
1710 | 	You can see that >DFA is easily defined in FORTH just by adding 8 to the result of >CFA.
1711 | */
1712 | 
1713 | 	defword ">DFA",4,,TDFA, TCFA
1714 | 	.dword TCFA		// >CFA		(get code field address)
1715 | 	.dword INCR8		// 8+		(add 8 to it to get to next word)
1716 | 	.dword EXIT		// EXIT		(return from FORTH word)
1717 | 
1718 | /*
1719 | 	COMPILING ----------------------------------------------------------------------
1720 | 
1721 | 	Now we'll talk about how FORTH compiles words.  Recall that a word definition looks like this:
1722 | 
1723 | 		: DOUBLE DUP + ;
1724 | 
1725 | 	and we have to turn this into:
1726 | 
1727 | 	  pointer to previous word
1728 | 	   ^
1729 | 	   |
1730 | 	+--|------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1731 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
1732 | 	+---------+---+---+---+---+---+---+---+---+------------+--|---------+------------+------------+
1733 | 	   ^       len                         pad  codeword      |
1734 | 	   |							  V
1735 | 	  LATEST points here				points to codeword of DUP
1736 | 
1737 | 	There are several problems to solve.  Where to put the new word?  How do we read words?  How
1738 | 	do we define the words : (COLON) and ; (SEMICOLON)?
1739 | 
1740 | 	FORTH solves this rather elegantly and as you might expect in a very low-level way which
1741 | 	allows you to change how the compiler works on your own code.
1742 | 
1743 | 	FORTH has an INTERPRET function (a true interpreter this time, not DOCOL) which runs in a
1744 | 	loop, reading words (using WORD), looking them up (using FIND), turning them into codeword
1745 | 	pointers (using >CFA) and deciding what to do with them.
1746 | 
1747 | 	What it does depends on the mode of the interpreter (in variable STATE).
1748 | 
1749 | 	When STATE is zero, the interpreter just runs each word as it looks them up.  This is known as
1750 | 	immediate mode.
1751 | 
1752 | 	The interesting stuff happens when STATE is non-zero -- compiling mode.  In this mode the
1753 | 	interpreter appends the codeword pointer to user memory (the HERE variable points to the next
1754 | 	free byte of user memory -- see DATA SEGMENT section below).
1755 | 
1756 | 	So you may be able to see how we could define : (COLON).  The general plan is:
1757 | 
1758 | 	(1) Use WORD to read the name of the function being defined.
1759 | 
1760 | 	(2) Construct the dictionary entry -- just the header part -- in user memory:
1761 | 
1762 |     pointer to previous word (from LATEST)			+-- Afterwards, HERE points here, where
1763 | 	   ^							|   the interpreter will start appending
1764 | 	   |							V   codewords.
1765 | 	+--|------+---+---+---+---+---+---+---+---+------------+
1766 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      |
1767 | 	+---------+---+---+---+---+---+---+---+---+------------+
1768 | 		   len                         pad  codeword
1769 | 
1770 | 	(3) Set LATEST to point to the newly defined word, ...
1771 | 
1772 | 	(4) .. and most importantly leave HERE pointing just after the new codeword.  This is where
1773 | 	    the interpreter will append codewords.
1774 | 
1775 | 	(5) Set STATE to 1.  This goes into compile mode so the interpreter starts appending codewords to
1776 | 	    our partially-formed header.
1777 | 
1778 | 	After : has run, our input is here:
1779 | 
1780 | 	: DOUBLE DUP + ;
1781 | 		 ^
1782 | 		 |
1783 | 		Next byte returned by KEY will be the 'D' character of DUP
1784 | 
1785 | 	so the interpreter (now it's in compile mode, so I guess it's really the compiler) reads "DUP",
1786 | 	looks it up in the dictionary, gets its codeword pointer, and appends it:
1787 | 
1788 | 									     +-- HERE updated to point here.
1789 | 									     |
1790 | 									     V
1791 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+
1792 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        |
1793 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+
1794 | 		   len                         pad  codeword
1795 | 
1796 | 	Next we read +, get the codeword pointer, and append it:
1797 | 
1798 | 											  +-- HERE updated to point here.
1799 | 											  |
1800 | 											  V
1801 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+
1802 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          |
1803 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+
1804 | 		   len                         pad  codeword
1805 | 
1806 | 	The issue is what happens next.  Obviously what we _don't_ want to happen is that we
1807 | 	read ";" and compile it and go on compiling everything afterwards.
1808 | 
1809 | 	At this point, FORTH uses a trick.  Remember the length byte in the dictionary definition
1810 | 	isn't just a plain length byte, but can also contain flags.  One flag is called the
1811 | 	IMMEDIATE flag (F_IMMED in this code).  If a word in the dictionary is flagged as
1812 | 	IMMEDIATE then the interpreter runs it immediately _even if it's in compile mode_.
1813 | 
1814 | 	This is how the word ; (SEMICOLON) works -- as a word flagged in the dictionary as IMMEDIATE.
1815 | 
1816 | 	And all it does is append the codeword for EXIT on to the current definition and switch
1817 | 	back to immediate mode (set STATE back to 0).  Shortly we'll see the actual definition
1818 | 	of ; and we'll see that it's really a very simple definition, declared IMMEDIATE.
1819 | 
1820 | 	After the interpreter reads ; and executes it 'immediately', we get this:
1821 | 
1822 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1823 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
1824 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1825 | 		   len                         pad  codeword					       ^
1826 | 												       |
1827 | 												      HERE
1828 | 	STATE is set to 0.
1829 | 
1830 | 	And that's it, job done, our new definition is compiled, and we're back in immediate mode
1831 | 	just reading and executing words, perhaps including a call to test our new word DOUBLE.
1832 | 
1833 | 	The only last wrinkle in this is that while our word was being compiled, it was in a
1834 | 	half-finished state.  We certainly wouldn't want DOUBLE to be called somehow during
1835 | 	this time.  There are several ways to stop this from happening, but in FORTH what we
1836 | 	do is flag the word with the HIDDEN flag (F_HIDDEN in this code) just while it is
1837 | 	being compiled.  This prevents FIND from finding it, and thus in theory stops any
1838 | 	chance of it being called.
1839 | 
1840 | 	The above explains how compiling, : (COLON) and ; (SEMICOLON) works and in a moment I'm
1841 | 	going to define them.  The : (COLON) function can be made a little bit more general by writing
1842 | 	it in two parts.  The first part, called CREATE, makes just the header:
1843 | 
1844 | 						   +-- Afterwards, HERE points here.
1845 | 						   |
1846 | 						   V
1847 | 	+---------+---+---+---+---+---+---+---+---+
1848 | 	| LINK    | 6 | D | O | U | B | L | E | 0 |
1849 | 	+---------+---+---+---+---+---+---+---+---+
1850 | 		   len                         pad
1851 | 
1852 | 	and the second part, the actual definition of : (COLON), calls CREATE and appends the
1853 | 	DOCOL codeword, so leaving:
1854 | 
1855 | 								+-- Afterwards, HERE points here.
1856 | 								|
1857 | 								V
1858 | 	+---------+---+---+---+---+---+---+---+---+------------+
1859 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      |
1860 | 	+---------+---+---+---+---+---+---+---+---+------------+
1861 | 		   len                         pad  codeword
1862 | 
1863 | 	CREATE is a standard FORTH word and the advantage of this split is that we can reuse it to
1864 | 	create other types of words (not just ones which contain code, but words which contain variables,
1865 | 	constants and other data).
1866 | */
1867 | 
1868 | 	defcode "CREATE",6,,CREATE, TDFA
1869 | 
1870 | 	// Get the name length and address.
1871 | 
1872 | 	POP a0 a2	// a0 = length  a2 = address of name
1873 | 
1874 | 	// Link pointer.
1875 | 
1876 | 	la a1, var_HERE		// a1 is the address of the header
1877 | 	ld a1, 0(a1)
1878 | 	la a3, var_LATEST	
1879 | 	ld t0, 0(a3)		// Get link pointer
1880 | 	sd t0, 0(a1)		// and store it in the header.
1881 | 	addi a1, a1, 8
1882 | 
1883 | 	// Length byte and the word itself.
1884 | 
1885 | 	sb a0, 0(a1)		// Store the length/flags byte.
1886 | 	addi a1, a1, 1
1887 | 	PUSH a0 a1		// save length and destination address
1888 | 	RCALL _COPY_BYTES	// Copy the word
1889 | 	POP a1 a0
1890 | 	add a1, a1, a0		// a1 is the address of the end of word
1891 | 	addi a1, a1, 7		// Align to next 8 byte boundary.
1892 | 	andi a1, a1, ~7
1893 | 
1894 | 	// Update LATEST and HERE.
1895 | 	la t0, var_HERE
1896 | 	la t1, var_LATEST
1897 | 	ld t2, 0(t0)
1898 | 	sd t2, 0(t1)
1899 | 	sd a1, 0(t0)
1900 | 	NEXT
1901 | 
1902 | /*
1903 | 	Because I want to define : (COLON) in FORTH, not assembler, we need a few more FORTH words
1904 | 	to use.
1905 | 
1906 | 	The first is , (COMMA) which is a standard FORTH word which appends a 32 bit integer to the user
1907 | 	memory pointed to by HERE, and adds 8 to HERE.  So the action of , (COMMA) is:
1908 | 
1909 | 							previous value of HERE
1910 | 								 |
1911 | 								 V
1912 | 	+---------+---+---+---+---+---+---+---+---+-- - - - - --+------------+
1913 | 	| LINK    | 6 | D | O | U | B | L | E | 0 |             |  <data>    |
1914 | 	+---------+---+---+---+---+---+---+---+---+-- - - - - --+------------+
1915 | 		   len                         pad		              ^
1916 | 									      |
1917 | 									new value of HERE
1918 | 
1919 | 	and <data> is whatever 32 bit integer was at the top of the stack.
1920 | 
1921 | 	, (COMMA) is quite a fundamental operation when compiling.  It is used to append codewords
1922 | 	to the current word that is being compiled.
1923 | */
1924 | 
1925 | 	defcode ",",1,,COMMA, CREATE
1926 | 	POP a0		// Code pointer to store.
1927 | 	RCALL _COMMA
1928 | 	NEXT
1929 | _COMMA:
1930 | 	la t0, var_HERE		// HERE
1931 | 	ld t1, 0(t0)
1932 | 	sd a0, 0(t1)		// Store it.
1933 | 	addi t1, t1, 8		// Increment
1934 | 	sd t1, 0(t0)		// Update HERE
1935 | 	ret
1936 | 
1937 | /*
1938 | 	Our definitions of : (COLON) and ; (SEMICOLON) will need to switch to and from compile mode.
1939 | 
1940 | 	Immediate mode vs. compile mode is stored in the global variable STATE, and by updating this
1941 | 	variable we can switch between the two modes.
1942 | 
1943 | 	For various reasons which may become apparent later, FORTH defines two standard words called
1944 | 	[ and ] (LBRAC and RBRAC) which switch between modes:
1945 | 
1946 | 	Word	Assembler	Action		Effect
1947 | 	[	LBRAC		STATE := 0	Switch to immediate mode.
1948 | 	]	RBRAC		STATE := 1	Switch to compile mode.
1949 | 
1950 | 	[ (LBRAC) is an IMMEDIATE word.  The reason is as follows: If we are in compile mode and the
1951 | 	interpreter saw [ then it would compile it rather than running it.  We would never be able to
1952 | 	switch back to immediate mode!  So we flag the word as IMMEDIATE so that even in compile mode
1953 | 	the word runs immediately, switching us back to immediate mode.
1954 | */
1955 | 
1956 | 	defcode "[",1,F_IMMED,LBRAC, COMMA
1957 | 	la t0, var_STATE
1958 | 	sd zero, 0(t0)	// Set STATE to 0.
1959 | 	NEXT
1960 | 
1961 | 	defcode "]",1,,RBRAC, LBRAC
1962 | 	la t0, var_STATE
1963 | 	la t1, 1
1964 | 	sd t1, 0(t0)	// Set STATE to 1.
1965 | 	NEXT
1966 | 
1967 | /*
1968 | 	Now we can define : (COLON) using CREATE.  It just calls CREATE, appends DOCOL (the codeword), sets
1969 | 	the word HIDDEN and goes into compile mode.
1970 | */
1971 | 
1972 | 	defword ":",1,,COLON, RBRAC
1973 | 	.dword WORD		// Get the name of the new word
1974 | 	.dword CREATE		// CREATE the dictionary entry / header
1975 | 	.dword LIT, DOCOL, COMMA	// Append DOCOL  (the codeword).
1976 | 	.dword LATEST, FETCH, HIDDEN // Make the word hidden (see below for definition).
1977 | 	.dword RBRAC		// Go into compile mode.
1978 | 	.dword EXIT		// Return from the function.
1979 | 
1980 | /*
1981 | 	; (SEMICOLON) is also elegantly simple.  Notice the F_IMMED flag.
1982 | */
1983 | 
1984 | 	defword ";",1,F_IMMED,SEMICOLON, COLON
1985 | 	.dword LIT, EXIT, COMMA	// Append EXIT (so the word will return).
1986 | 	.dword LATEST, FETCH, HIDDEN // Toggle hidden flag -- unhide the word (see below for definition).
1987 | 	.dword LBRAC		// Go back to IMMEDIATE mode.
1988 | 	.dword EXIT		// Return from the function.
1989 | 
1990 | /*
1991 | 	EXTENDING THE COMPILER ----------------------------------------------------------------------
1992 | 
1993 | 	Words flagged with IMMEDIATE (F_IMMED) aren't just for the FORTH compiler to use.  You can define
1994 | 	your own IMMEDIATE words too, and this is a crucial aspect when extending basic FORTH, because
1995 | 	it allows you in effect to extend the compiler itself.  Does gcc let you do that?
1996 | 
1997 | 	Standard FORTH words like IF, WHILE, ." and so on are all written as extensions to the basic
1998 | 	compiler, and are all IMMEDIATE words.
1999 | 
2000 | 	The IMMEDIATE word toggles the F_IMMED (IMMEDIATE flag) on the most recently defined word,
2001 | 	or on the current word if you call it in the middle of a definition.
2002 | 
2003 | 	Typical usage is:
2004 | 
2005 | 	: MYIMMEDWORD IMMEDIATE
2006 | 		...definition...
2007 | 	;
2008 | 
2009 | 	but some FORTH programmers write this instead:
2010 | 
2011 | 	: MYIMMEDWORD
2012 | 		...definition...
2013 | 	; IMMEDIATE
2014 | 
2015 | 	The two usages are equivalent, to a first approximation.
2016 | */
2017 | 
2018 | 	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE,SEMICOLON
2019 | 	la a0, var_LATEST	// LATEST word.
2020 | 	ld a0, 0(a0)
2021 | 	addi a0, a0, 8		// Point to name/flags byte.
2022 | 	lb t0, 0(a0)
2023 | 	xori t0, t0, F_IMMED	// Toggle the IMMED bit.
2024 | 	sb t0, 0(a0)
2025 | 	NEXT
2026 | 
2027 | /*
2028 | 	'addr HIDDEN' toggles the hidden flag (F_HIDDEN) of the word defined at addr.  To hide the
2029 | 	most recently defined word (used above in : and ; definitions) you would do:
2030 | 
2031 | 		LATEST @ HIDDEN
2032 | 
2033 | 	'HIDE word' toggles the flag on a named 'word'.
2034 | 
2035 | 	Setting this flag stops the word from being found by FIND, and so can be used to make 'private'
2036 | 	words.  For example, to break up a large word into smaller parts you might do:
2037 | 
2038 | 		: SUB1 ... subword ... ;
2039 | 		: SUB2 ... subword ... ;
2040 | 		: SUB3 ... subword ... ;
2041 | 		: MAIN ... defined in terms of SUB1, SUB2, SUB3 ... ;
2042 | 		HIDE SUB1
2043 | 		HIDE SUB2
2044 | 		HIDE SUB3
2045 | 
2046 | 	After this, only MAIN is 'exported' or seen by the rest of the program.
2047 | */
2048 | 
2049 | 	defcode "HIDDEN",6,,HIDDEN,IMMEDIATE
2050 | 	POP a0			// Dictionary entry.
2051 | 	addi a0, a0, 8		// Point to name/flags byte.
2052 | 	lb t0, 0(a0)
2053 | 	xori t0, t0, F_HIDDEN	// Toggle the HIDDEN bit.
2054 | 	sb t0, 0(a0)
2055 | 	NEXT
2056 | 
2057 | 	defword "HIDE",4,,HIDE,HIDDEN
2058 | 	.dword WORD		// Get the word (after HIDE).
2059 | 	.dword FIND		// Look up in the dictionary.
2060 | 	.dword HIDDEN		// Set F_HIDDEN flag.
2061 | 	.dword EXIT		// Return.
2062 | 
2063 | /*
2064 | 	' (TICK) is a standard FORTH word which returns the codeword pointer of the next word.
2065 | 
2066 | 	The common usage is:
2067 | 
2068 | 	' FOO ,
2069 | 
2070 | 	which appends the codeword of FOO to the current word we are defining (this only works in compiled code).
2071 | 
2072 | 	You tend to use ' in IMMEDIATE words.  For example an alternate (and rather useless) way to define
2073 | 	a literal 2 might be:
2074 | 
2075 | 	: LIT2 IMMEDIATE
2076 | 		' LIT ,		\ Appends LIT to the currently-being-defined word
2077 | 		2 ,		\ Appends the number 2 to the currently-being-defined word
2078 | 	;
2079 | 
2080 | 	So you could do:
2081 | 
2082 | 	: DOUBLE LIT2 * ;
2083 | 
2084 | 	(If you don't understand how LIT2 works, then you should review the material about compiling words
2085 | 	and immediate mode).
2086 | 
2087 | 	This definition of ' uses a cheat which I copied from buzzard92.  As a result it only works in
2088 | 	compiled code.  It is possible to write a version of ' based on WORD, FIND, >CFA which works in
2089 | 	immediate mode too.
2090 | */
2091 | 	defcode "'",1,,TICK,HIDE
2092 | 	ld t0, 0(s1)		// Get the address of the next word and skip it.
2093 | 	PUSH t0			// Push it on the stack.
2094 | 	addi s1, s1, 8
2095 | 	NEXT
2096 | 
2097 | /*
2098 | 	BRANCHING ----------------------------------------------------------------------
2099 | 
2100 | 	It turns out that all you need in order to define looping constructs, IF-statements, etc.
2101 | 	are two primitives.
2102 | 
2103 | 	BRANCH is an unconditional branch. 0BRANCH is a conditional branch (it only branches if the
2104 | 	top of stack is zero).
2105 | 
2106 | 	The diagram below shows how BRANCH works in some imaginary compiled word.  When BRANCH executes,
2107 | 	s1 starts by pointing to the offset field (compare to LIT above):
2108 | 
2109 | 	+---------------------+-------+---- - - ---+------------+------------+---- - - - ----+------------+
2110 | 	| (Dictionary header) | DOCOL |            | BRANCH     | offset     | (skipped)     | word       |
2111 | 	+---------------------+-------+---- - - ---+------------+-----|------+---- - - - ----+------------+
2112 | 								   ^  |			      ^
2113 | 								   |  |			      |
2114 | 								   |  +-----------------------+
2115 | 								  s1 added to offset
2116 | 
2117 | 	The offset is added to s1 to make the new s1, and the result is that when NEXT runs, execution
2118 | 	continues at the branch target.  Negative offsets work as expected.
2119 | 
2120 | 	0BRANCH is the same except the branch happens conditionally.
2121 | 
2122 | 	Now standard FORTH words such as IF, THEN, ELSE, WHILE, REPEAT, etc. can be implemented entirely
2123 | 	in FORTH.  They are IMMEDIATE words which append various combinations of BRANCH or 0BRANCH
2124 | 	into the word currently being compiled.
2125 | 
2126 | 	As an example, code written like this:
2127 | 
2128 | 		condition-code IF true-part THEN rest-code
2129 | 
2130 | 	compiles to:
2131 | 
2132 | 		condition-code 0BRANCH OFFSET true-part rest-code
2133 | 					  |		^
2134 | 					  |		|
2135 | 					  +-------------+
2136 | */
2137 | 
2138 | 	defcode "BRANCH",6,,BRANCH,TICK
2139 | 	ld t0, 0(s1)
2140 | 	add s1, s1, t0		// add the offset to the instruction pointer
2141 | 	NEXT
2142 | 
2143 | 	defcode "0BRANCH",7,,ZBRANCH,BRANCH
2144 | 	POP t0			// top of stack is zero?
2145 | 	beqz t0, code_BRANCH	// if so, jump back to the branch function above
2146 | 	addi s1, s1, 8		// otherwise we need to skip the offset
2147 | 	NEXT
2148 | 
2149 | /*
2150 | 	LITERAL STRINGS ----------------------------------------------------------------------
2151 | 
2152 | 	LITSTRING is a primitive used to implement the ." and S" operators (which are written in
2153 | 	FORTH).  See the definition of those operators later.
2154 | 
2155 | 	TELL just prints a string.  It's more efficient to define this in assembly because we
2156 | 	can make it a single Linux syscall.
2157 | */
2158 | 
2159 | 	defcode "LITSTRING",9,,LITSTRING,ZBRANCH
2160 | 	ld a0, 0(s1)		// get the length of the string
2161 | 	addi s1, s1, 8
2162 | 	PUSH s1 a0		// push the address of the start and length of the string
2163 | 	add s1, s1, a0		// skip past the string
2164 |  	addi s1, s1, 7		// but round up to next 8 byte boundary
2165 | 	andi s1, s1, ~7
2166 | 	NEXT
2167 | 
2168 | 	defcode "TELL",4,,TELL,LITSTRING
2169 | 	li a0, 1		// 1st param: stdout
2170 | 	POP a2 a1		// a2 = 3rd param: length of string  a1 = 2nd param: address of string
2171 | 	li a7, __NR_write	// write syscall
2172 | 	ecall
2173 | 	NEXT
2174 | 
2175 | /*
2176 | 	QUIT AND INTERPRET ----------------------------------------------------------------------
2177 | 
2178 | 	QUIT is the first FORTH function called, almost immediately after the FORTH system "boots".
2179 | 	As explained before, QUIT doesn't "quit" anything.  It does some initialisation (in particular
2180 | 	it clears the return stack) and it calls INTERPRET in a loop to interpret commands.  The
2181 | 	reason it is called QUIT is because you can call it from your own FORTH words in order to
2182 | 	"quit" your program and start again at the user prompt.
2183 | 
2184 | 	INTERPRET is the FORTH interpreter ("toploop", "toplevel" or "REPL" might be a more accurate
2185 | 	description -- see: http://en.wikipedia.org/wiki/REPL).
2186 | */
2187 | 
2188 | 	// QUIT must not return (ie. must not call EXIT).
2189 | 	defword "QUIT",4,,QUIT,TELL
2190 | 	.dword RZ,RSPSTORE	// R0 RSP!, clear the return stack
2191 | 	.dword INTERPRET		// interpret the next word
2192 | 	.dword BRANCH,-16		// and loop (indefinitely)
2193 | 
2194 | /*
2195 | 	This interpreter is pretty simple, but remember that in FORTH you can always override
2196 | 	it later with a more powerful one!
2197 |  */
2198 | 	defcode "INTERPRET",9,,INTERPRET,QUIT
2199 | 	RCALL _WORD		// Returns a1 = length, a0 = pointer to word.
2200 | 
2201 | 	// Is it in the dictionary? Use s2 as the interpret_is_lit flag.
2202 | 	mv s2, zero		// Not a literal number (not yet anyway ...)
2203 | 	mv a2, a0
2204 | 	RCALL _FIND		// Returns a0 = pointer to header or 0 if not found.
2205 | 	beqz a0, 1f
2206 | 
2207 | 	// In the dictionary.  Is it an IMMEDIATE codeword?
2208 | 	lb t0, 8(a0)		// Get name+flags.
2209 | 	PUSH t0			// Just save it for now.
2210 | 	RCALL _TCFA		// Convert dictionary entry (in a0) to codeword pointer.
2211 | 	POP t0
2212 | 	andi t0, t0, F_IMMED	// Is IMMED flag set?
2213 | 	bnez t0, 4f		// If IMMED, jump straight to executing.
2214 | 
2215 | 	j 2f
2216 | 
2217 | 1:	// Not in the dictionary (not a word) so assume it's a literal number.
2218 | 	addi s2, s2, 1		// inc interpret_is_lit
2219 | 	RCALL _NUMBER		// Returns the parsed number in a0, a1 > 0 if error
2220 | 	bnez a1, 6f
2221 | 	mv a1, a0
2222 | 	la a0, LIT		// The word is LIT
2223 | 
2224 | 2:	// Are we compiling or executing?
2225 | 	la t1, var_STATE
2226 | 	ld t0, 0(t1)
2227 | 	beqz t0, 4f		// Jump if executing.
2228 | 
2229 | 	// Compiling - just append the word to the current dictionary definition.
2230 | 	RCALL _COMMA
2231 | 	beqz s2, 3f		// Was it a literal?
2232 | 	mv a0, a1		// Yes, so LIT is followed by a number.
2233 | 	RCALL _COMMA
2234 | 3:	NEXT
2235 | 
2236 | 4:	// Executing - run it!
2237 | 	bnez s2, 5f		// Literal?
2238 | 
2239 | 	// Not a literal, execute it now.  This never returns, but the codeword will
2240 | 	// eventually call NEXT which will reenter the loop in QUIT.
2241 | 	ld t0, 0(a0)
2242 | 	jr t0
2243 | 
2244 | 5:	// Executing a literal, which means push it on the stack.
2245 | 	PUSH a1
2246 | 	NEXT
2247 | 
2248 | 6:	// Parse error (not a known word or a number in the current BASE).
2249 | 	// Print an error message followed by up to 40 characters of context.
2250 | 	li a0, 2		// 1st param: stderr
2251 | 	la a1, errmsg		// 2nd param: error message
2252 | 	la t0, errmsgend
2253 | 	sub a2, t0, a1		// 3rd param: length of string
2254 | 	li a7, __NR_write	// write syscall
2255 | 	ecall
2256 | 
2257 | 	la a1, currkey		// the error occurred just before currkey position
2258 | 	ld a1, 0(a1)
2259 | 	la t0, buffer
2260 | 	sub a2, a1, t0		// a2 = currkey - buffer (length in buffer before currkey)
2261 | 	li t2, 40
2262 | 	slt t0, t2, a2		// if > 40, then print only 40 characters
2263 | 	beqz t0, 7f
2264 | 	li a2, 40
2265 | 7:	sub a1, a1, a2		// a1 = start of area to print, a2 = length
2266 | 	li a0, 2
2267 | 	li a7, __NR_write	// write syscall
2268 | 	ecall
2269 | 
2270 | 	la a1, errmsgnl		// newline
2271 | 	li a2, 1
2272 | 	li a0, 2
2273 | 	li a7, __NR_write	// write syscall
2274 | 	ecall
2275 | 
2276 | 	NEXT
2277 | 
2278 | 	.section .rodata
2279 | errmsg: .ascii "PARSE ERROR: "
2280 | errmsgend:
2281 | errmsgnl: .ascii "\n"
2282 | 
2283 | 	.data			// NB: easier to fit in the .data section
2284 | 	.balign 8
2285 | 
2286 | /*
2287 | 	ODDS AND ENDS ----------------------------------------------------------------------
2288 | 
2289 | 	CHAR puts the ASCII code of the first character of the following word on the stack.  For example
2290 | 	CHAR A puts 65 on the stack.
2291 | 
2292 | 	EXECUTE is used to run execution tokens.  See the discussion of execution tokens in the
2293 | 	FORTH code for more details.
2294 | 
2295 | 	SYSCALL0, SYSCALL1, SYSCALL2, SYSCALL3 make a standard Linux system call.  (See <asm/unistd.h>
2296 | 	for a list of system call numbers).  As their name suggests these forms take between 0 and 3
2297 | 	syscall parameters, plus the system call number.
2298 | 
2299 | 	In this FORTH, SYSCALL0 must be the last word in the built-in (assembler) dictionary because we
2300 | 	initialise the LATEST variable to point to it.  This means that if you want to extend the assembler
2301 | 	part, you must put new words before SYSCALL0, or else change how LATEST is initialised.
2302 | */
2303 | 
2304 | 	defcode "CHAR",4,,CHAR,INTERPRET
2305 | 	RCALL _WORD		// Returns a1 = length, a0 = pointer to word.
2306 | 	lb t0, 0(a0)		// Get the first character of the word.
2307 | 	PUSH t0			// Push it onto the stack.
2308 | 	NEXT
2309 | 
2310 | 	defcode "EXECUTE",7,,EXECUTE,CHAR
2311 | 	POP a0			// Get xt into a0
2312 | 	ld t0, 0(a0)
2313 | 	jr t0			// and jump to it.
2314 | 				// After xt runs its NEXT will continue executing the current word.
2315 | 
2316 | 	defcode "SYSCALL3",8,,SYSCALL3,EXECUTE
2317 | 	POP a7 a0 a1 a2		// a7 = System call number (see <asm/unistd.h>)  a0..a2 = parameters
2318 | 	ecall
2319 | 	PUSH a0			// Result (negative for -errno)
2320 | 	NEXT
2321 | 
2322 | 	defcode "SYSCALL2",8,,SYSCALL2,SYSCALL3
2323 | 	POP a7 a0 a1		// a7 = System call number (see <asm/unistd.h>)  a0..a1 = parameters
2324 | 	ecall
2325 | 	PUSH a0			// Result (negative for -errno)
2326 | 	NEXT
2327 | 
2328 | 	defcode "SYSCALL1",8,,SYSCALL1,SYSCALL2
2329 | 	POP a7 a0		// a7 = System call number (see <asm/unistd.h>)  a0 = parameter
2330 | 	ecall
2331 | 	PUSH a0			// Result (negative for -errno)
2332 | 	NEXT
2333 | 
2334 | 	defcode "SYSCALL0",8,,SYSCALL0,SYSCALL1
2335 | 	POP a7			// System call number (see <asm/unistd.h>)
2336 | 	ecall
2337 | 	PUSH a0			// Result (negative for -errno)
2338 | 	NEXT
2339 | 
2340 | /*
2341 | 	DATA SEGMENT ----------------------------------------------------------------------
2342 | 
2343 | 	Here we set up the Linux data segment, used for user definitions and variously known as just
2344 | 	the 'data segment', 'user memory' or 'user definitions area'.  It is an area of memory which
2345 | 	grows upwards and stores both newly-defined FORTH words and global variables of various
2346 | 	sorts.
2347 | 
2348 | 	It is completely analogous to the C heap, except there is no generalised 'malloc' and 'free'
2349 | 	(but as with everything in FORTH, writing such functions would just be a Simple Matter
2350 | 	Of Programming).  Instead in normal use the data segment just grows upwards as new FORTH
2351 | 	words are defined/appended to it.
2352 | 
2353 | 	There are various "features" of the GNU toolchain which make setting up the data segment
2354 | 	more complicated than it really needs to be.  One is the GNU linker which inserts a random
2355 | 	"build ID" segment.  Another is Address Space Randomization which means we can't tell
2356 | 	where the kernel will choose to place the data segment (or the stack for that matter).
2357 | 
2358 | 	Therefore writing this set_up_data_segment assembler routine is a little more complicated
2359 | 	than it really needs to be.  We ask the Linux kernel where it thinks the data segment starts
2360 | 	using the brk(2) system call, then ask it to reserve some initial space (also using brk(2)).
2361 | 
2362 | 	You don't need to worry about this code.
2363 | */
2364 | 	.text
2365 | 	.set INITIAL_DATA_SEGMENT_SIZE,262144
2366 | set_up_data_segment:
2367 | 	mv a0, zero		// Call brk(0)
2368 | 	li a7, __NR_brk
2369 | 	ecall
2370 | 	la t0, var_HERE		// Initialise HERE to point at beginning of data segment.
2371 | 	sd a0, 0(t0)
2372 | 	li t0, INITIAL_DATA_SEGMENT_SIZE
2373 | 	add a0, a0, t0		// Reserve nn bytes of memory for initial data segment.
2374 | 	li a7, __NR_brk		// Call brk(HERE+INITIAL_DATA_SEGMENT_SIZE)
2375 | 	ecall
2376 | 	ret
2377 | 
2378 | /*
2379 | 	We allocate static buffers for the return static and input buffer (used when
2380 | 	reading in files and text that the user types in).
2381 | */
2382 | 	.set RETURN_STACK_SIZE, 8192
2383 | 
2384 | 	.bss
2385 | /* FORTH return stack. */
2386 | 	.balign 4096
2387 | return_stack:
2388 | 	.space RETURN_STACK_SIZE
2389 | return_stack_top:		// Initial top of return stack.
2390 | 
2391 | /* This is used as a temporary input buffer when reading from files or the terminal. */
2392 | 	.balign 4096
2393 | buffer:
2394 | 	.space BUFFER_SIZE
2395 | 
2396 | /*
2397 | 	START OF FORTH CODE ----------------------------------------------------------------------
2398 | 
2399 | 	We've now reached the stage where the FORTH system is running and self-hosting.  All further
2400 | 	words can be written as FORTH itself, including words like IF, THEN, .", etc which in most
2401 | 	languages would be considered rather fundamental.
2402 | 
2403 | 	I used to append this here in the assembly file, but I got sick of fighting against gas's
2404 | 	crack-smoking (lack of) multiline string syntax.  So now that is in a separate file called
2405 | 	jonesforth.f
2406 | 
2407 | 	If you don't already have that file, download it from http://annexia.org/forth in order
2408 | 	to continue the tutorial.
2409 | */
2410 | 
2411 | /* END OF jonesforth.S */
2412 | 


--------------------------------------------------------------------------------
/jonesforth.f:
--------------------------------------------------------------------------------
   1 | \ -*- text -*-
   2 | \	A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
   3 | \	By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
   4 | \	This is PUBLIC DOMAIN (see public domain release statement below).
   5 | \	$Id: jonesforth.f,v 1.18 2009-09-11 08:32:33 rich Exp $
   6 | \
   7 | \	The first part of this tutorial is in jonesforth.S.  Get if from http://annexia.org/forth
   8 | \
   9 | \	PUBLIC DOMAIN ----------------------------------------------------------------------
  10 | \
  11 | \	I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
  12 | \
  13 | \	In case this is not legally possible, I grant any entity the right to use this work for any purpose,
  14 | \	without any conditions, unless such conditions are required by law.
  15 | \
  16 | \	SETTING UP ----------------------------------------------------------------------
  17 | \
  18 | \	Let's get a few housekeeping things out of the way.  Firstly because I need to draw lots of
  19 | \	ASCII-art diagrams to explain concepts, the best way to look at this is using a window which
  20 | \	uses a fixed width font and is at least this wide:
  21 | \
  22 | \<------------------------------------------------------------------------------------------------------------------------>
  23 | \
  24 | \	Secondly make sure TABS are set to 8 characters.  The following should be a vertical
  25 | \	line.  If not, sort out your tabs.
  26 | \
  27 | \		|
  28 | \	        |
  29 | \	    	|
  30 | \
  31 | \	Thirdly I assume that your screen is at least 50 characters high.
  32 | \
  33 | \	START OF FORTH CODE ----------------------------------------------------------------------
  34 | \
  35 | \	We've now reached the stage where the FORTH system is running and self-hosting.  All further
  36 | \	words can be written as FORTH itself, including words like IF, THEN, .", etc which in most
  37 | \	languages would be considered rather fundamental.
  38 | \
  39 | \	Some notes about the code:
  40 | \
  41 | \	I use indenting to show structure.  The amount of whitespace has no meaning to FORTH however
  42 | \	except that you must use at least one whitespace character between words, and words themselves
  43 | \	cannot contain whitespace.
  44 | \
  45 | \	FORTH is case-sensitive.  Use capslock!
  46 | 
  47 | \ The primitive word /MOD (DIVMOD) leaves both the quotient and the remainder on the stack.  (On
  48 | \ i386, the idivl instruction gives both anyway).  Now we can define the / and MOD in terms of /MOD
  49 | \ and a few other primitives.
  50 | : / /MOD SWAP DROP ;
  51 | : MOD /MOD DROP ;
  52 | 
  53 | \ Define some character constants
  54 | : '\n' 10 ;
  55 | : BL   32 ; \ BL (BLank) is a standard FORTH word for space.
  56 | 
  57 | \ CR prints a carriage return
  58 | : CR '\n' EMIT ;
  59 | 
  60 | \ SPACE prints a space
  61 | : SPACE BL EMIT ;
  62 | 
  63 | \ NEGATE leaves the negative of a number on the stack.
  64 | : NEGATE 0 SWAP - ;
  65 | 
  66 | \ Standard words for booleans.
  67 | : TRUE  1 ;
  68 | : FALSE 0 ;
  69 | : NOT   0= ;
  70 | 
  71 | \ LITERAL takes whatever is on the stack and compiles LIT <foo>
  72 | : LITERAL IMMEDIATE
  73 | 	' LIT ,		\ compile LIT
  74 | 	,		\ compile the literal itself (from the stack)
  75 | 	;
  76 | 
  77 | \ Now we can use [ and ] to insert literals which are calculated at compile time.  (Recall that
  78 | \ [ and ] are the FORTH words which switch into and out of immediate mode.)
  79 | \ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you
  80 | \ would rather only compute once (at compile time, rather than calculating it each time your word runs).
  81 | : ':'
  82 | 	[		\ go into immediate mode (temporarily)
  83 | 	CHAR :		\ push the number 58 (ASCII code of colon) on the parameter stack
  84 | 	]		\ go back to compile mode
  85 | 	LITERAL		\ compile LIT 58 as the definition of ':' word
  86 | ;
  87 | 
  88 | \ A few more character constants defined the same way as above.
  89 | : ';' [ CHAR ; ] LITERAL ;
  90 | : '(' [ CHAR ( ] LITERAL ;
  91 | : ')' [ CHAR ) ] LITERAL ;
  92 | : '"' [ CHAR " ] LITERAL ;
  93 | : 'A' [ CHAR A ] LITERAL ;
  94 | : '0' [ CHAR 0 ] LITERAL ;
  95 | : '-' [ CHAR - ] LITERAL ;
  96 | : '.' [ CHAR . ] LITERAL ;
  97 | 
  98 | \ While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE.
  99 | : [COMPILE] IMMEDIATE
 100 | 	WORD		\ get the next word
 101 | 	FIND		\ find it in the dictionary
 102 | 	>CFA		\ get its codeword
 103 | 	,		\ and compile that
 104 | ;
 105 | 
 106 | \ RECURSE makes a recursive call to the current word that is being compiled.
 107 | \
 108 | \ Normally while a word is being compiled, it is marked HIDDEN so that references to the
 109 | \ same word within are calls to the previous definition of the word.  However we still have
 110 | \ access to the word which we are currently compiling through the LATEST pointer so we
 111 | \ can use that to compile a recursive call.
 112 | : RECURSE IMMEDIATE
 113 | 	LATEST @	\ LATEST points to the word being compiled at the moment
 114 | 	>CFA		\ get the codeword
 115 | 	,		\ compile it
 116 | ;
 117 | 
 118 | \	CONTROL STRUCTURES ----------------------------------------------------------------------
 119 | \
 120 | \ So far we have defined only very simple definitions.  Before we can go further, we really need to
 121 | \ make some control structures, like IF ... THEN and loops.  Luckily we can define arbitrary control
 122 | \ structures directly in FORTH.
 123 | \
 124 | \ Please note that the control structures as I have defined them here will only work inside compiled
 125 | \ words.  If you try to type in expressions using IF, etc. in immediate mode, then they won't work.
 126 | \ Making these work in immediate mode is left as an exercise for the reader.
 127 | 
 128 | \ condition IF true-part THEN rest
 129 | \	-- compiles to: --> condition 0BRANCH OFFSET true-part rest
 130 | \	where OFFSET is the offset of 'rest'
 131 | \ condition IF true-part ELSE false-part THEN
 132 | \ 	-- compiles to: --> condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest
 133 | \	where OFFSET if the offset of false-part and OFFSET2 is the offset of rest
 134 | 
 135 | \ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places
 136 | \ the address of the 0BRANCH on the stack.  Later when we see THEN, we pop that address
 137 | \ off the stack, calculate the offset, and back-fill the offset.
 138 | : IF IMMEDIATE
 139 | 	' 0BRANCH ,	\ compile 0BRANCH
 140 | 	HERE @		\ save location of the offset on the stack
 141 | 	0 ,		\ compile a dummy offset
 142 | ;
 143 | 
 144 | : THEN IMMEDIATE
 145 | 	DUP
 146 | 	HERE @ SWAP -	\ calculate the offset from the address saved on the stack
 147 | 	SWAP !		\ store the offset in the back-filled location
 148 | ;
 149 | 
 150 | : ELSE IMMEDIATE
 151 | 	' BRANCH ,	\ definite branch to just over the false-part
 152 | 	HERE @		\ save location of the offset on the stack
 153 | 	0 ,		\ compile a dummy offset
 154 | 	SWAP		\ now back-fill the original (IF) offset
 155 | 	DUP		\ same as for THEN word above
 156 | 	HERE @ SWAP -
 157 | 	SWAP !
 158 | ;
 159 | 
 160 | \ BEGIN loop-part condition UNTIL
 161 | \	-- compiles to: --> loop-part condition 0BRANCH OFFSET
 162 | \	where OFFSET points back to the loop-part
 163 | \ This is like do { loop-part } while (condition) in the C language
 164 | : BEGIN IMMEDIATE
 165 | 	HERE @		\ save location on the stack
 166 | ;
 167 | 
 168 | : UNTIL IMMEDIATE
 169 | 	' 0BRANCH ,	\ compile 0BRANCH
 170 | 	HERE @ -	\ calculate the offset from the address saved on the stack
 171 | 	,		\ compile the offset here
 172 | ;
 173 | 
 174 | \ BEGIN loop-part AGAIN
 175 | \	-- compiles to: --> loop-part BRANCH OFFSET
 176 | \	where OFFSET points back to the loop-part
 177 | \ In other words, an infinite loop which can only be returned from with EXIT
 178 | : AGAIN IMMEDIATE
 179 | 	' BRANCH ,	\ compile BRANCH
 180 | 	HERE @ -	\ calculate the offset back
 181 | 	,		\ compile the offset here
 182 | ;
 183 | 
 184 | \ BEGIN condition WHILE loop-part REPEAT
 185 | \	-- compiles to: --> condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET
 186 | \	where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code
 187 | \ So this is like a while (condition) { loop-part } loop in the C language
 188 | : WHILE IMMEDIATE
 189 | 	' 0BRANCH ,	\ compile 0BRANCH
 190 | 	HERE @		\ save location of the offset2 on the stack
 191 | 	0 ,		\ compile a dummy offset2
 192 | ;
 193 | 
 194 | : REPEAT IMMEDIATE
 195 | 	' BRANCH ,	\ compile BRANCH
 196 | 	SWAP		\ get the original offset (from BEGIN)
 197 | 	HERE @ - ,	\ and compile it after BRANCH
 198 | 	DUP
 199 | 	HERE @ SWAP -	\ calculate the offset2
 200 | 	SWAP !		\ and back-fill it in the original location
 201 | ;
 202 | 
 203 | \ UNLESS is the same as IF but the test is reversed.
 204 | \
 205 | \ Note the use of [COMPILE]: Since IF is IMMEDIATE we don't want it to be executed while UNLESS
 206 | \ is compiling, but while UNLESS is running (which happens to be when whatever word using UNLESS is
 207 | \ being compiled -- whew!).  So we use [COMPILE] to reverse the effect of marking IF as immediate.
 208 | \ This trick is generally used when we want to write our own control words without having to
 209 | \ implement them all in terms of the primitives 0BRANCH and BRANCH, but instead reusing simpler
 210 | \ control words like (in this instance) IF.
 211 | : UNLESS IMMEDIATE
 212 | 	' NOT ,		\ compile NOT (to reverse the test)
 213 | 	[COMPILE] IF	\ continue by calling the normal IF
 214 | ;
 215 | 
 216 | \	COMMENTS ----------------------------------------------------------------------
 217 | \
 218 | \ FORTH allows ( ... ) as comments within function definitions.  This works by having an IMMEDIATE
 219 | \ word called ( which just drops input characters until it hits the corresponding ).
 220 | : ( IMMEDIATE
 221 | 	1		\ allowed nested parens by keeping track of depth
 222 | 	BEGIN
 223 | 		KEY		\ read next character
 224 | 		DUP '(' = IF	\ open paren?
 225 | 			DROP		\ drop the open paren
 226 | 			1+		\ depth increases
 227 | 		ELSE
 228 | 			')' = IF	\ close paren?
 229 | 				1-		\ depth decreases
 230 | 			THEN
 231 | 		THEN
 232 | 	DUP 0= UNTIL		\ continue until we reach matching close paren, depth 0
 233 | 	DROP		\ drop the depth counter
 234 | ;
 235 | 
 236 | (
 237 | 	From now on we can use ( ... ) for comments.
 238 | 
 239 | 	STACK NOTATION ----------------------------------------------------------------------
 240 | 
 241 | 	In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the
 242 | 	parameter stack.  For example:
 243 | 
 244 | 	( n -- )	means that the word consumes an integer (n) from the parameter stack.
 245 | 	( b a -- c )	means that the word uses two integers (a and b, where a is at the top of stack)
 246 | 				and returns a single integer (c).
 247 | 	( -- )		means the word has no effect on the stack
 248 | )
 249 | 
 250 | ( Some more complicated stack examples, showing the stack notation. )
 251 | : NIP ( x y -- y ) SWAP DROP ;
 252 | : TUCK ( x y -- y x y ) SWAP OVER ;
 253 | : PICK ( x_u ... x_1 x_0 u -- x_u ... x_1 x_0 x_u )
 254 | 	1+		( add one because of 'u' on the stack )
 255 | 	8 *		( multiply by the word size )
 256 | 	DSP@ +		( add to the stack pointer )
 257 | 	@    		( and fetch )
 258 | ;
 259 | 
 260 | ( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. )
 261 | : SPACES	( n -- )
 262 | 	BEGIN
 263 | 		DUP 0>		( while n > 0 )
 264 | 	WHILE
 265 | 		SPACE		( print a space )
 266 | 		1-		( until we count down to 0 )
 267 | 	REPEAT
 268 | 	DROP
 269 | ;
 270 | 
 271 | ( Standard words for manipulating BASE. )
 272 | : DECIMAL ( -- ) 10 BASE ! ;
 273 | : HEX ( -- ) 16 BASE ! ;
 274 | 
 275 | (
 276 | 	PRINTING NUMBERS ----------------------------------------------------------------------
 277 | 
 278 | 	The standard FORTH word . (DOT) is very important.  It takes the number at the top
 279 | 	of the stack and prints it out.  However first I'm going to implement some lower-level
 280 | 	FORTH words:
 281 | 
 282 | 	U.R	( u width -- )	which prints an unsigned number, padded to a certain width
 283 | 	U.	( u -- )	which prints an unsigned number
 284 | 	.R	( n width -- )	which prints a signed number, padded to a certain width.
 285 | 
 286 | 	For example:
 287 | 		-123 6 .R
 288 | 	will print out these characters:
 289 | 		<space> <space> - 1 2 3
 290 | 
 291 | 	In other words, the number padded left to a certain number of characters.
 292 | 
 293 | 	The full number is printed even if it is wider than width, and this is what allows us to
 294 | 	define the ordinary functions U. and . (we just set width to zero knowing that the full
 295 | 	number will be printed anyway).
 296 | 
 297 | 	Another wrinkle of . and friends is that they obey the current base in the variable BASE.
 298 | 	BASE can be anything in the range 2 to 36.
 299 | 
 300 | 	While we're defining . &c we can also define .S which is a useful debugging tool.  This
 301 | 	word prints the current stack (non-destructively) from top to bottom.
 302 | )
 303 | 
 304 | ( This is the underlying recursive definition of U. )
 305 | : U.		( u -- )
 306 | 	BASE @ /MOD	( width rem quot )
 307 | 	?DUP IF			( if quotient <> 0 then )
 308 | 		RECURSE		( print the quotient )
 309 | 	THEN
 310 | 
 311 | 	( print the remainder )
 312 | 	DUP 10 < IF
 313 | 		'0'		( decimal digits 0..9 )
 314 | 	ELSE
 315 | 		10 -		( hex and beyond digits A..Z )
 316 | 		'A'
 317 | 	THEN
 318 | 	+
 319 | 	EMIT
 320 | ;
 321 | 
 322 | (
 323 | 	FORTH word .S prints the contents of the stack.  It doesn't alter the stack.
 324 | 	Very useful for debugging.
 325 | )
 326 | : .S		( -- )
 327 | 	DSP@		( get current stack pointer )
 328 | 	BEGIN
 329 | 		DUP S0 @ <
 330 | 	WHILE
 331 | 		DUP @ U.	( print the stack element )
 332 | 		SPACE
 333 | 		8+		( move up )
 334 | 	REPEAT
 335 | 	DROP
 336 | ;
 337 | 
 338 | ( This word returns the width (in characters) of an unsigned number in the current base )
 339 | : UWIDTH	( u -- width )
 340 | 	BASE @ /	( rem quot )
 341 | 	?DUP IF		( if quotient <> 0 then )
 342 | 		RECURSE 1+	( return 1+recursive call )
 343 | 	ELSE
 344 | 		1		( return 1 )
 345 | 	THEN
 346 | ;
 347 | 
 348 | : U.R		( u width -- )
 349 | 	SWAP		( width u )
 350 | 	DUP		( width u u )
 351 | 	UWIDTH		( width u uwidth )
 352 | 	ROT		( u uwidth width )
 353 | 	SWAP -		( u width-uwidth )
 354 | 	( At this point if the requested width is narrower, we'll have a negative number on the stack.
 355 | 	  Otherwise the number on the stack is the number of spaces to print.  But SPACES won't print
 356 | 	  a negative number of spaces anyway, so it's now safe to call SPACES ... )
 357 | 	SPACES
 358 | 	( ... and then call the underlying implementation of U. )
 359 | 	U.
 360 | ;
 361 | 
 362 | (
 363 | 	.R prints a signed number, padded to a certain width.  We can't just print the sign
 364 | 	and call U.R because we want the sign to be next to the number ('-123' instead of '-  123').
 365 | )
 366 | : .R		( n width -- )
 367 | 	SWAP		( width n )
 368 | 	DUP 0< IF
 369 | 		NEGATE		( width u )
 370 | 		1		( save a flag to remember that it was negative | width n 1 )
 371 | 		SWAP		( width 1 u )
 372 | 		ROT		( 1 u width )
 373 | 		1-		( 1 u width-1 )
 374 | 	ELSE
 375 | 		0		( width u 0 )
 376 | 		SWAP		( width 0 u )
 377 | 		ROT		( 0 u width )
 378 | 	THEN
 379 | 	SWAP		( flag width u )
 380 | 	DUP		( flag width u u )
 381 | 	UWIDTH		( flag width u uwidth )
 382 | 	ROT		( flag u uwidth width )
 383 | 	SWAP -		( flag u width-uwidth )
 384 | 
 385 | 	SPACES		( flag u )
 386 | 	SWAP		( u flag )
 387 | 
 388 | 	IF			( was it negative? print the - character )
 389 | 		'-' EMIT
 390 | 	THEN
 391 | 
 392 | 	U.
 393 | ;
 394 | 
 395 | ( Finally we can define word . in terms of .R, with a trailing space. )
 396 | : . 0 .R SPACE ;
 397 | 
 398 | ( The real U., note the trailing space. )
 399 | : U. U. SPACE ;
 400 | 
 401 | ( ? fetches the integer at an address and prints it. )
 402 | : ? ( addr -- ) @ . ;
 403 | 
 404 | ( c a b WITHIN returns true if a <= c and c < b )
 405 | (  or define without ifs: OVER - >R - R>  U<  )
 406 | : WITHIN
 407 | 	-ROT		( b c a )
 408 | 	OVER		( b c a c )
 409 | 	<= IF
 410 | 		> IF		( b c -- )
 411 | 			TRUE
 412 | 		ELSE
 413 | 			FALSE
 414 | 		THEN
 415 | 	ELSE
 416 | 		2DROP		( b c -- )
 417 | 		FALSE
 418 | 	THEN
 419 | ;
 420 | 
 421 | ( DEPTH returns the depth of the stack. )
 422 | : DEPTH		( -- n )
 423 | 	S0 @ DSP@ -
 424 | 	8-			( adjust because S0 was on the stack when we pushed DSP )
 425 | ;
 426 | 
 427 | (
 428 | 	ALIGNED takes an address and rounds it up (aligns it) to the next 8 byte boundary.
 429 | )
 430 | : ALIGNED	( addr -- addr )
 431 | 	7 + 7 INVERT AND	( (addr+7) & ~7 )
 432 | ;
 433 | 
 434 | (
 435 | 	ALIGN aligns the HERE pointer, so the next word appended will be aligned properly.
 436 | )
 437 | : ALIGN HERE @ ALIGNED HERE ! ;
 438 | 
 439 | (
 440 | 	STRINGS ----------------------------------------------------------------------
 441 | 
 442 | 	S" string" is used in FORTH to define strings.  It leaves the address of the string and
 443 | 	its length on the stack, (length at the top of stack).  The space following S" is the normal
 444 | 	space between FORTH words and is not a part of the string.
 445 | 
 446 | 	This is tricky to define because it has to do different things depending on whether
 447 | 	we are compiling or in immediate mode.  (Thus the word is marked IMMEDIATE so it can
 448 | 	detect this and do different things).
 449 | 
 450 | 	In compile mode we append
 451 | 		LITSTRING <string length> <string rounded up 8 bytes>
 452 | 	to the current word.  The primitive LITSTRING does the right thing when the current
 453 | 	word is executed.
 454 | 
 455 | 	In immediate mode there isn't a particularly good place to put the string, but in this
 456 | 	case we put the string at HERE (but we _don't_ change HERE).  This is meant as a temporary
 457 | 	location, likely to be overwritten soon after.
 458 | )
 459 | ( C, appends a byte to the current compiled word. )
 460 | : C,
 461 | 	HERE @ C!	( store the character in the compiled image )
 462 | 	1 HERE +!	( increment HERE pointer by 1 byte )
 463 | ;
 464 | 
 465 | : S" IMMEDIATE		( -- addr len )
 466 | 	STATE @ IF	( compiling? )
 467 | 		' LITSTRING ,	( compile LITSTRING )
 468 | 		HERE @		( save the address of the length word on the stack )
 469 | 		0 ,		( dummy length - we don't know what it is yet )
 470 | 		BEGIN
 471 | 			KEY 		( get next character of the string )
 472 | 			DUP '"' <>
 473 | 		WHILE
 474 | 			C,		( copy character )
 475 | 		REPEAT
 476 | 		DROP		( drop the double quote character at the end )
 477 | 		DUP		( get the saved address of the length word )
 478 | 		HERE @ SWAP -	( calculate the length )
 479 | 		8-		( subtract 8 (because we measured from the start of the length word) )
 480 | 		SWAP !		( and back-fill the length location )
 481 | 		ALIGN		( round up to next multiple of 8 bytes for the remaining code )
 482 | 	ELSE		( immediate mode )
 483 | 		HERE @		( get the start address of the temporary space )
 484 | 		BEGIN
 485 | 			KEY
 486 | 			DUP '"' <>
 487 | 		WHILE
 488 | 			OVER C!		( save next character )
 489 | 			1+		( increment address )
 490 | 		REPEAT
 491 | 		DROP		( drop the final " character )
 492 | 		HERE @ -	( calculate the length )
 493 | 		HERE @		( push the start address )
 494 | 		SWAP 		( addr len )
 495 | 	THEN
 496 | ;
 497 | 
 498 | (
 499 | 	." is the print string operator in FORTH.  Example: ." Something to print"
 500 | 	The space after the operator is the ordinary space required between words and is not
 501 | 	a part of what is printed.
 502 | 
 503 | 	In immediate mode we just keep reading characters and printing them until we get to
 504 | 	the next double quote.
 505 | 
 506 | 	In compile mode we use S" to store the string, then add TELL afterwards:
 507 | 		LITSTRING <string length> <string rounded up to 8 bytes> TELL
 508 | 
 509 | 	It may be interesting to note the use of [COMPILE] to turn the call to the immediate
 510 | 	word S" into compilation of that word.  It compiles it into the definition of .",
 511 | 	not into the definition of the word being compiled when this is running (complicated
 512 | 	enough for you?)
 513 | )
 514 | : ." IMMEDIATE		( -- )
 515 | 	STATE @ IF	( compiling? )
 516 | 		[COMPILE] S"	( read the string, and compile LITSTRING, etc. )
 517 | 		' TELL ,	( compile the final TELL )
 518 | 	ELSE
 519 | 		( In immediate mode, just read characters and print them until we get
 520 | 		  to the ending double quote. )
 521 | 		BEGIN
 522 | 			KEY
 523 | 			DUP '"' = IF
 524 | 				DROP	( drop the double quote character )
 525 | 				EXIT	( return from this function )
 526 | 			THEN
 527 | 			EMIT
 528 | 		AGAIN
 529 | 	THEN
 530 | ;
 531 | 
 532 | (
 533 | 	CONSTANTS AND VARIABLES ----------------------------------------------------------------------
 534 | 
 535 | 	In FORTH, global constants and variables are defined like this:
 536 | 
 537 | 	10 CONSTANT TEN		when TEN is executed, it leaves the integer 10 on the stack
 538 | 	VARIABLE VAR		when VAR is executed, it leaves the address of VAR on the stack
 539 | 
 540 | 	Constants can be read but not written, eg:
 541 | 
 542 | 	TEN . CR		prints 10
 543 | 
 544 | 	You can read a variable (in this example called VAR) by doing:
 545 | 
 546 | 	VAR @			leaves the value of VAR on the stack
 547 | 	VAR @ . CR		prints the value of VAR
 548 | 	VAR ? CR		same as above, since ? is the same as @ .
 549 | 
 550 | 	and update the variable by doing:
 551 | 
 552 | 	20 VAR !		sets VAR to 20
 553 | 
 554 | 	Note that variables are uninitialised (but see VALUE later on which provides initialised
 555 | 	variables with a slightly simpler syntax).
 556 | 
 557 | 	How can we define the words CONSTANT and VARIABLE?
 558 | 
 559 | 	The trick is to define a new word for the variable itself (eg. if the variable was called
 560 | 	'VAR' then we would define a new word called VAR).  This is easy to do because we exposed
 561 | 	dictionary entry creation through the CREATE word (part of the definition of : above).
 562 | 	A call to WORD [TEN] CREATE (where [TEN] means that "TEN" is the next word in the input)
 563 | 	leaves the dictionary entry:
 564 | 
 565 | 				   +--- HERE
 566 | 				   |
 567 | 				   V
 568 | 	+---------+---+---+---+---+
 569 | 	| LINK    | 3 | T | E | N |
 570 | 	+---------+---+---+---+---+
 571 |                    len
 572 | 
 573 | 	For CONSTANT we can continue by appending DOCOL (the codeword), then LIT followed by
 574 | 	the constant itself and then EXIT, forming a little word definition that returns the
 575 | 	constant:
 576 | 
 577 | 	+---------+---+---+---+---+------------+------------+------------+------------+
 578 | 	| LINK    | 3 | T | E | N | DOCOL      | LIT        | 10         | EXIT       |
 579 | 	+---------+---+---+---+---+------------+------------+------------+------------+
 580 |                    len              codeword
 581 | 
 582 | 	Notice that this word definition is exactly the same as you would have got if you had
 583 | 	written : TEN 10 ;
 584 | 
 585 | 	Note for people reading the code below: DOCOL is a constant word which we defined in the
 586 | 	assembler part which returns the value of the assembler symbol of the same name.
 587 | )
 588 | : CONSTANT
 589 | 	WORD		( get the name (the name follows CONSTANT) )
 590 | 	CREATE		( make the dictionary entry )
 591 | 	DOCOL ,		( append DOCOL (the codeword field of this word) )
 592 | 	' LIT ,		( append the codeword LIT )
 593 | 	,		( append the value on the top of the stack )
 594 | 	' EXIT ,	( append the codeword EXIT )
 595 | ;
 596 | 
 597 | (
 598 | 	VARIABLE is a little bit harder because we need somewhere to put the variable.  There is
 599 | 	nothing particularly special about the user memory (the area of memory pointed to by HERE
 600 | 	where we have previously just stored new word definitions).  We can slice off bits of this
 601 | 	memory area to store anything we want, so one possible definition of VARIABLE might create
 602 | 	this:
 603 | 
 604 | 	   +--------------------------------------------------------------+
 605 | 	   |								  |
 606 | 	   V								  |
 607 | 	+---------+---------+---+---+---+---+------------+------------+---|--------+------------+
 608 | 	| <var>   | LINK    | 3 | V | A | R | DOCOL      | LIT        | <addr var> | EXIT       |
 609 | 	+---------+---------+---+---+---+---+------------+------------+------------+------------+
 610 |         		     len              codeword
 611 | 
 612 | 	where <var> is the place to store the variable, and <addr var> points back to it.
 613 | 
 614 | 	To make this more general let's define a couple of words which we can use to allocate
 615 | 	arbitrary memory from the user memory.
 616 | 
 617 | 	First ALLOT, where n ALLOT allocates n bytes of memory.  (Note when calling this that
 618 | 	it's a very good idea to make sure that n is a multiple of 8, or at least that next time
 619 | 	a word is compiled that HERE has been left as a multiple of 8).
 620 | )
 621 | : ALLOT		( n -- addr )
 622 | 	HERE @ SWAP	( here n )
 623 | 	HERE +!		( adds n to HERE, after this the old value of HERE is still on the stack )
 624 | ;
 625 | 
 626 | (
 627 | 	Second, CELLS.  In FORTH the phrase 'n CELLS ALLOT' means allocate n integers of whatever size
 628 | 	is the natural size for integers on this machine architecture.  On this 32 bit machine therefore
 629 | 	CELLS just multiplies the top of stack by 8.
 630 | )
 631 | : CELLS ( n -- n ) 8 * ;
 632 | 
 633 | (
 634 | 	So now we can define VARIABLE easily in much the same way as CONSTANT above.  Refer to the
 635 | 	diagram above to see what the word that this creates will look like.
 636 | )
 637 | : VARIABLE
 638 | 	1 CELLS ALLOT	( allocate 1 cell of memory, push the pointer to this memory )
 639 | 	WORD CREATE	( make the dictionary entry (the name follows VARIABLE) )
 640 | 	DOCOL ,		( append DOCOL (the codeword field of this word) )
 641 | 	' LIT ,		( append the codeword LIT )
 642 | 	,		( append the pointer to the new memory )
 643 | 	' EXIT ,	( append the codeword EXIT )
 644 | ;
 645 | 
 646 | (
 647 | 	VALUES ----------------------------------------------------------------------
 648 | 
 649 | 	VALUEs are like VARIABLEs but with a simpler syntax.  You would generally use them when you
 650 | 	want a variable which is read often, and written infrequently.
 651 | 
 652 | 	20 VALUE VAL 	creates VAL with initial value 20
 653 | 	VAL		pushes the value (20) directly on the stack
 654 | 	30 TO VAL	updates VAL, setting it to 30
 655 | 	VAL		pushes the value (30) directly on the stack
 656 | 
 657 | 	Notice that 'VAL' on its own doesn't return the address of the value, but the value itself,
 658 | 	making values simpler and more obvious to use than variables (no indirection through '@').
 659 | 	The price is a more complicated implementation, although despite the complexity there is no
 660 | 	performance penalty at runtime.
 661 | 
 662 | 	A naive implementation of 'TO' would be quite slow, involving a dictionary search each time.
 663 | 	But because this is FORTH we have complete control of the compiler so we can compile TO more
 664 | 	efficiently, turning:
 665 | 		TO VAL
 666 | 	into:
 667 | 		LIT <addr> !
 668 | 	and calculating <addr> (the address of the value) at compile time.
 669 | 
 670 | 	Now this is the clever bit.  We'll compile our value like this:
 671 | 
 672 | 	+---------+---+---+---+---+------------+------------+------------+------------+
 673 | 	| LINK    | 3 | V | A | L | DOCOL      | LIT        | <value>    | EXIT       |
 674 | 	+---------+---+---+---+---+------------+------------+------------+------------+
 675 |                    len              codeword
 676 | 
 677 | 	where <value> is the actual value itself.  Note that when VAL executes, it will push the
 678 | 	value on the stack, which is what we want.
 679 | 
 680 | 	But what will TO use for the address <addr>?  Why of course a pointer to that <value>:
 681 | 
 682 | 		code compiled	- - - - --+------------+------------+------------+-- - - - -
 683 | 		by TO VAL		  | LIT        | <addr>     | !          |
 684 | 				- - - - --+------------+-----|------+------------+-- - - - -
 685 | 							     |
 686 | 							     V
 687 | 	+---------+---+---+---+---+------------+------------+------------+------------+
 688 | 	| LINK    | 3 | V | A | L | DOCOL      | LIT        | <value>    | EXIT       |
 689 | 	+---------+---+---+---+---+------------+------------+------------+------------+
 690 |                    len              codeword
 691 | 
 692 | 	In other words, this is a kind of self-modifying code.
 693 | 
 694 | 	(Note to the people who want to modify this FORTH to add inlining: values defined this
 695 | 	way cannot be inlined).
 696 | )
 697 | : VALUE		( n -- )
 698 | 	WORD CREATE	( make the dictionary entry (the name follows VALUE) )
 699 | 	DOCOL ,		( append DOCOL )
 700 | 	' LIT ,		( append the codeword LIT )
 701 | 	,		( append the initial value )
 702 | 	' EXIT ,	( append the codeword EXIT )
 703 | ;
 704 | 
 705 | : TO IMMEDIATE	( n -- )
 706 | 	WORD		( get the name of the value )
 707 | 	FIND		( look it up in the dictionary )
 708 | 	>DFA		( get a pointer to the first data field (the 'LIT') )
 709 | 	8+		( increment to point at the value )
 710 | 	STATE @ IF	( compiling? )
 711 | 		' LIT ,		( compile LIT )
 712 | 		,		( compile the address of the value )
 713 | 		' ! ,		( compile ! )
 714 | 	ELSE		( immediate mode )
 715 | 		!		( update it straightaway )
 716 | 	THEN
 717 | ;
 718 | 
 719 | ( x +TO VAL adds x to VAL )
 720 | : +TO IMMEDIATE
 721 | 	WORD		( get the name of the value )
 722 | 	FIND		( look it up in the dictionary )
 723 | 	>DFA		( get a pointer to the first data field (the 'LIT') )
 724 | 	8+		( increment to point at the value )
 725 | 	STATE @ IF	( compiling? )
 726 | 		' LIT ,		( compile LIT )
 727 | 		,		( compile the address of the value )
 728 | 		' +! ,		( compile +! )
 729 | 	ELSE		( immediate mode )
 730 | 		+!		( update it straightaway )
 731 | 	THEN
 732 | ;
 733 | 
 734 | (
 735 | 	PRINTING THE DICTIONARY ----------------------------------------------------------------------
 736 | 
 737 | 	ID. takes an address of a dictionary entry and prints the word's name.
 738 | 
 739 | 	For example: LATEST @ ID. would print the name of the last word that was defined.
 740 | )
 741 | : ID.
 742 | 	8+		( skip over the link pointer )
 743 | 	DUP C@		( get the flags/length byte )
 744 | 	F_LENMASK AND	( mask out the flags - just want the length )
 745 | 
 746 | 	BEGIN
 747 | 		DUP 0>		( length > 0? )
 748 | 	WHILE
 749 | 		SWAP 1+		( addr len -- len addr+1 )
 750 | 		DUP C@		( len addr -- len addr char | get the next character)
 751 | 		EMIT		( len addr char -- len addr | and print it)
 752 | 		SWAP 1-		( len addr -- addr len-1    | subtract one from length )
 753 | 	REPEAT
 754 | 	2DROP		( len addr -- )
 755 | ;
 756 | 
 757 | (
 758 | 	'WORD word FIND ?HIDDEN' returns true if 'word' is flagged as hidden.
 759 | 
 760 | 	'WORD word FIND ?IMMEDIATE' returns true if 'word' is flagged as immediate.
 761 | )
 762 | : ?HIDDEN
 763 | 	8+		( skip over the link pointer )
 764 | 	C@		( get the flags/length byte )
 765 | 	F_HIDDEN AND	( mask the F_HIDDEN flag and return it (as a truth value) )
 766 | ;
 767 | : ?IMMEDIATE
 768 | 	8+		( skip over the link pointer )
 769 | 	C@		( get the flags/length byte )
 770 | 	F_IMMED AND	( mask the F_IMMED flag and return it (as a truth value) )
 771 | ;
 772 | 
 773 | (
 774 | 	WORDS prints all the words defined in the dictionary, starting with the word defined most recently.
 775 | 	However it doesn't print hidden words.
 776 | 
 777 | 	The implementation simply iterates backwards from LATEST using the link pointers.
 778 | )
 779 | : WORDS
 780 | 	LATEST @	( start at LATEST dictionary entry )
 781 | 	BEGIN
 782 | 		?DUP		( while link pointer is not null )
 783 | 	WHILE
 784 | 		DUP ?HIDDEN NOT IF	( ignore hidden words )
 785 | 			DUP ID.		( but if not hidden, print the word )
 786 | 			SPACE
 787 | 		THEN
 788 | 		@		( dereference the link pointer - go to previous word )
 789 | 	REPEAT
 790 | 	CR
 791 | ;
 792 | 
 793 | (
 794 | 	FORGET ----------------------------------------------------------------------
 795 | 
 796 | 	So far we have only allocated words and memory.  FORTH provides a rather primitive method
 797 | 	to deallocate.
 798 | 
 799 | 	'FORGET word' deletes the definition of 'word' from the dictionary and everything defined
 800 | 	after it, including any variables and other memory allocated after.
 801 | 
 802 | 	The implementation is very simple - we look up the word (which returns the dictionary entry
 803 | 	address).  Then we set HERE to point to that address, so in effect all future allocations
 804 | 	and definitions will overwrite memory starting at the word.  We also need to set LATEST to
 805 | 	point to the previous word.
 806 | 
 807 | 	Note that you cannot FORGET built-in words (well, you can try but it will probably cause
 808 | 	a segfault).
 809 | 
 810 | 	XXX: Because we wrote VARIABLE to store the variable in memory allocated before the word,
 811 | 	in the current implementation VARIABLE FOO FORGET FOO will leak 1 cell of memory.
 812 | )
 813 | : FORGET
 814 | 	WORD FIND	( find the word, gets the dictionary entry address )
 815 | 	DUP @ LATEST !	( set LATEST to point to the previous word )
 816 | 	HERE !		( and store HERE with the dictionary address )
 817 | ;
 818 | 
 819 | (
 820 | 	DUMP ----------------------------------------------------------------------
 821 | 
 822 | 	DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format.
 823 | 
 824 | 	Notice that the parameters to DUMP (address, length) are compatible with string words
 825 | 	such as WORD and S".
 826 | 
 827 | 	You can dump out the raw code for the last word you defined by doing something like:
 828 | 
 829 | 		LATEST @ 128 DUMP
 830 | )
 831 | : DUMP		( addr len -- )
 832 | 	BASE @ -ROT		( save the current BASE at the bottom of the stack )
 833 | 	HEX			( and switch to hexadecimal mode )
 834 | 
 835 | 	BEGIN
 836 | 		?DUP		( while len > 0 )
 837 | 	WHILE
 838 | 		OVER 8 U.R	( print the address )
 839 | 		SPACE
 840 | 
 841 | 		( print up to 16 words on this line )
 842 | 		2DUP		( addr len addr len )
 843 | 		1- 15 AND 1+	( addr len addr linelen )
 844 | 		BEGIN
 845 | 			?DUP		( while linelen > 0 )
 846 | 		WHILE
 847 | 			SWAP		( addr len linelen addr )
 848 | 			DUP C@		( addr len linelen addr byte )
 849 | 			2 .R SPACE	( print the byte )
 850 | 			1+ SWAP 1-	( addr len linelen addr -- addr len addr+1 linelen-1 )
 851 | 		REPEAT
 852 | 		DROP		( addr len )
 853 | 
 854 | 		( print the ASCII equivalents )
 855 | 		2DUP 1- 15 AND 1+ ( addr len addr linelen )
 856 | 		BEGIN
 857 | 			?DUP		( while linelen > 0)
 858 | 		WHILE
 859 | 			SWAP		( addr len linelen addr )
 860 | 			DUP C@		( addr len linelen addr byte )
 861 | 			DUP 32 128 WITHIN IF	( 32 <= c < 128? )
 862 | 				EMIT
 863 | 			ELSE
 864 | 				DROP '.' EMIT
 865 | 			THEN
 866 | 			1+ SWAP 1-	( addr len linelen addr -- addr len addr+1 linelen-1 )
 867 | 		REPEAT
 868 | 		DROP		( addr len )
 869 | 		CR
 870 | 
 871 | 		DUP 1- 15 AND 1+ ( addr len linelen )
 872 | 		TUCK		( addr linelen len linelen )
 873 | 		-		( addr linelen len-linelen )
 874 | 		>R + R>		( addr+linelen len-linelen )
 875 | 	REPEAT
 876 | 
 877 | 	DROP			( restore stack )
 878 | 	BASE !			( restore saved BASE )
 879 | ;
 880 | 
 881 | (
 882 | 	CASE ----------------------------------------------------------------------
 883 | 
 884 | 	CASE...ENDCASE is how we do switch statements in FORTH.  There is no generally
 885 | 	agreed syntax for this, so I've gone for the syntax mandated by the ISO standard
 886 | 	FORTH (ANS-FORTH).
 887 | 
 888 | 		( some value on the stack )
 889 | 		CASE
 890 | 		test1 OF ... ENDOF
 891 | 		test2 OF ... ENDOF
 892 | 		testn OF ... ENDOF
 893 | 		... ( default case )
 894 | 		ENDCASE
 895 | 
 896 | 	The CASE statement tests the value on the stack by comparing it for equality with
 897 | 	test1, test2, ..., testn and executes the matching piece of code within OF ... ENDOF.
 898 | 	If none of the test values match then the default case is executed.  Inside the ... of
 899 | 	the default case, the value is still at the top of stack (it is implicitly DROP-ed
 900 | 	by ENDCASE).  When ENDOF is executed it jumps after ENDCASE (ie. there is no "fall-through"
 901 | 	and no need for a break statement like in C).
 902 | 
 903 | 	The default case may be omitted.  In fact the tests may also be omitted so that you
 904 | 	just have a default case, although this is probably not very useful.
 905 | 
 906 | 	An example (assuming that 'q', etc. are words which push the ASCII value of the letter
 907 | 	on the stack):
 908 | 
 909 | 		0 VALUE QUIT
 910 | 		0 VALUE SLEEP
 911 | 		KEY CASE
 912 | 			'q' OF 1 TO QUIT ENDOF
 913 | 			's' OF 1 TO SLEEP ENDOF
 914 | 			( default case: )
 915 | 			." Sorry, I didn't understand key <" DUP EMIT ." >, try again." CR
 916 | 		ENDCASE
 917 | 
 918 | 	(In some versions of FORTH, more advanced tests are supported, such as ranges, etc.
 919 | 	Other versions of FORTH need you to write OTHERWISE to indicate the default case.
 920 | 	As I said above, this FORTH tries to follow the ANS FORTH standard).
 921 | 
 922 | 	The implementation of CASE...ENDCASE is somewhat non-trivial.  I'm following the
 923 | 	implementations from here:
 924 | 	http://www.uni-giessen.de/faq/archiv/forthfaq.case_endcase/msg00000.html
 925 | 
 926 | 	The general plan is to compile the code as a series of IF statements:
 927 | 
 928 | 	CASE				(push 0 on the immediate-mode parameter stack)
 929 | 	test1 OF ... ENDOF		test1 OVER = IF DROP ... ELSE
 930 | 	test2 OF ... ENDOF		test2 OVER = IF DROP ... ELSE
 931 | 	testn OF ... ENDOF		testn OVER = IF DROP ... ELSE
 932 | 	... ( default case )		...
 933 | 	ENDCASE				DROP THEN [THEN [THEN ...]]
 934 | 
 935 | 	The CASE statement pushes 0 on the immediate-mode parameter stack, and that number
 936 | 	is used to count how many THEN statements we need when we get to ENDCASE so that each
 937 | 	IF has a matching THEN.  The counting is done implicitly.  If you recall from the
 938 | 	implementation above of IF, each IF pushes a code address on the immediate-mode stack,
 939 | 	and these addresses are non-zero, so by the time we get to ENDCASE the stack contains
 940 | 	some number of non-zeroes, followed by a zero.  The number of non-zeroes is how many
 941 | 	times IF has been called, so how many times we need to match it with THEN.
 942 | 
 943 | 	This code uses [COMPILE] so that we compile calls to IF, ELSE, THEN instead of
 944 | 	actually calling them while we're compiling the words below.
 945 | 
 946 | 	As is the case with all of our control structures, they only work within word
 947 | 	definitions, not in immediate mode.
 948 | )
 949 | : CASE IMMEDIATE
 950 | 	0		( push 0 to mark the bottom of the stack )
 951 | ;
 952 | 
 953 | : OF IMMEDIATE
 954 | 	' OVER ,	( compile OVER )
 955 | 	' = ,		( compile = )
 956 | 	[COMPILE] IF	( compile IF )
 957 | 	' DROP ,  	( compile DROP )
 958 | ;
 959 | 
 960 | : ENDOF IMMEDIATE
 961 | 	[COMPILE] ELSE	( ENDOF is the same as ELSE )
 962 | ;
 963 | 
 964 | : ENDCASE IMMEDIATE
 965 | 	' DROP ,	( compile DROP )
 966 | 
 967 | 	( keep compiling THEN until we get to our zero marker )
 968 | 	BEGIN
 969 | 		?DUP
 970 | 	WHILE
 971 | 		[COMPILE] THEN
 972 | 	REPEAT
 973 | ;
 974 | 
 975 | (
 976 | 	DECOMPILER ----------------------------------------------------------------------
 977 | 
 978 | 	CFA> is the opposite of >CFA.  It takes a codeword and tries to find the matching
 979 | 	dictionary definition.  (In truth, it works with any pointer into a word, not just
 980 | 	the codeword pointer, and this is needed to do stack traces).
 981 | 
 982 | 	In this FORTH this is not so easy.  In fact we have to search through the dictionary
 983 | 	because we don't have a convenient back-pointer (as is often the case in other versions
 984 | 	of FORTH).  Because of this search, CFA> should not be used when performance is critical,
 985 | 	so it is only used for debugging tools such as the decompiler and printing stack
 986 | 	traces.
 987 | 
 988 | 	This word returns 0 if it doesn't find a match.
 989 | )
 990 | : CFA>
 991 | 	LATEST @	( start at LATEST dictionary entry )
 992 | 	BEGIN
 993 | 		?DUP		( while link pointer is not null )
 994 | 	WHILE
 995 | 		2DUP SWAP	( cfa curr curr cfa )
 996 | 		< IF		( current dictionary entry < cfa? )
 997 | 			NIP		( leave curr dictionary entry on the stack )
 998 | 			EXIT
 999 | 		THEN
1000 | 		@		( follow link pointer back )
1001 | 	REPEAT
1002 | 	DROP		( restore stack )
1003 | 	0		( sorry, nothing found )
1004 | ;
1005 | 
1006 | (
1007 | 	SEE decompiles a FORTH word.
1008 | 
1009 | 	We search for the dictionary entry of the word, then search again for the next
1010 | 	word (effectively, the end of the compiled word).  This results in two pointers:
1011 | 
1012 | 	+---------+---+---+---+---+------------+------------+------------+------------+
1013 | 	| LINK    | 3 | T | E | N | DOCOL      | LIT        | 10         | EXIT       |
1014 | 	+---------+---+---+---+---+------------+------------+------------+------------+
1015 | 	 ^									       ^
1016 | 	 |									       |
1017 | 	Start of word							      End of word
1018 | 
1019 | 	With this information we can have a go at decompiling the word.  We need to
1020 | 	recognise "meta-words" like LIT, LITSTRING, BRANCH, etc. and treat those separately.
1021 | )
1022 | : SEE
1023 | 	WORD FIND	( find the dictionary entry to decompile )
1024 | 
1025 | 	( Now we search again, looking for the next word in the dictionary.  This gives us
1026 | 	  the length of the word that we will be decompiling.  (Well, mostly it does). )
1027 | 	HERE @		( address of the end of the last compiled word )
1028 | 	LATEST @	( word last curr )
1029 | 	BEGIN
1030 | 		2 PICK		( word last curr word )
1031 | 		OVER		( word last curr word curr )
1032 | 		<>		( word last curr word<>curr? )
1033 | 	WHILE			( word last curr )
1034 | 		NIP		( word curr )
1035 | 		DUP @		( word curr prev (which becomes: word last curr) )
1036 | 	REPEAT
1037 | 
1038 | 	DROP		( at this point, the stack is: start-of-word end-of-word )
1039 | 	SWAP		( end-of-word start-of-word )
1040 | 
1041 | 	( begin the definition with : NAME [IMMEDIATE] )
1042 | 	':' EMIT SPACE DUP ID. SPACE
1043 | 	DUP ?IMMEDIATE IF ." IMMEDIATE " THEN
1044 | 
1045 | 	>DFA		( get the data address, ie. points after DOCOL | end-of-word start-of-data )
1046 | 
1047 | 	( now we start decompiling until we hit the end of the word )
1048 | 	BEGIN		( end start )
1049 | 		2DUP >
1050 | 	WHILE
1051 | 		DUP @		( end start codeword )
1052 | 
1053 | 		CASE
1054 | 		' LIT OF		( is it LIT ? )
1055 | 			8 + DUP @		( get next word which is the integer constant )
1056 | 			.			( and print it )
1057 | 		ENDOF
1058 | 		' LITSTRING OF		( is it LITSTRING ? )
1059 | 			[ CHAR S ] LITERAL EMIT '"' EMIT SPACE ( print S"<space> )
1060 | 			8 + DUP @		( get the length word )
1061 | 			SWAP 8 + SWAP		( end start+8 length )
1062 | 			2DUP TELL		( print the string )
1063 | 			'"' EMIT SPACE		( finish the string with a final quote )
1064 | 			+ ALIGNED		( end start+8+len, aligned )
1065 | 			8 -			( because we're about to add 8 below )
1066 | 		ENDOF
1067 | 		' 0BRANCH OF		( is it 0BRANCH ? )
1068 | 			." 0BRANCH ( "
1069 | 			8 + DUP @		( print the offset )
1070 | 			.
1071 | 			." ) "
1072 | 		ENDOF
1073 | 		' BRANCH OF		( is it BRANCH ? )
1074 | 			." BRANCH ( "
1075 | 			8 + DUP @		( print the offset )
1076 | 			.
1077 | 			." ) "
1078 | 		ENDOF
1079 | 		' ' OF			( is it ' (TICK) ? )
1080 | 			[ CHAR ' ] LITERAL EMIT SPACE
1081 | 			8 + DUP @		( get the next codeword )
1082 | 			CFA>			( and force it to be printed as a dictionary entry )
1083 | 			ID. SPACE
1084 | 		ENDOF
1085 | 		' EXIT OF		( is it EXIT? )
1086 | 			( We expect the last word to be EXIT, and if it is then we don't print it
1087 | 			  because EXIT is normally implied by ;.  EXIT can also appear in the middle
1088 | 			  of words, and then it needs to be printed. )
1089 | 			2DUP			( end start end start )
1090 | 			8 +			( end start end start+8 )
1091 | 			<> IF			( end start | we're not at the end )
1092 | 				." EXIT "
1093 | 			THEN
1094 | 		ENDOF
1095 | 					( default case: )
1096 | 			DUP			( in the default case we always need to DUP before using )
1097 | 			CFA>			( look up the codeword to get the dictionary entry )
1098 | 			ID. SPACE		( and print it )
1099 | 		ENDCASE
1100 | 
1101 | 		8 +		( end start+8 )
1102 | 	REPEAT
1103 | 
1104 | 	';' EMIT CR
1105 | 
1106 | 	2DROP		( restore stack )
1107 | ;
1108 | 
1109 | (
1110 | 	EXECUTION TOKENS ----------------------------------------------------------------------
1111 | 
1112 | 	Standard FORTH defines a concept called an 'execution token' (or 'xt') which is very
1113 | 	similar to a function pointer in C.  We map the execution token to a codeword address.
1114 | 
1115 | 			execution token of DOUBLE is the address of this codeword
1116 | 						    |
1117 | 						    V
1118 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1119 | 	| LINK    | 6 | D | O | U | B | L | E | 0 | DOCOL      | DUP        | +          | EXIT       |
1120 | 	+---------+---+---+---+---+---+---+---+---+------------+------------+------------+------------+
1121 |                    len                         pad  codeword					       ^
1122 | 
1123 | 	There is one assembler primitive for execution tokens, EXECUTE ( xt -- ), which runs them.
1124 | 
1125 | 	You can make an execution token for an existing word the long way using >CFA,
1126 | 	ie: WORD [foo] FIND >CFA will push the xt for foo onto the stack where foo is the
1127 | 	next word in input.  So a very slow way to run DOUBLE might be:
1128 | 
1129 | 		: DOUBLE DUP + ;
1130 | 		: SLOW WORD FIND >CFA EXECUTE ;
1131 | 		5 SLOW DOUBLE . CR	\ prints 10
1132 | 
1133 | 	We also offer a simpler and faster way to get the execution token of any word FOO:
1134 | 
1135 | 		['] FOO
1136 | 
1137 | 	(Exercises for readers: (1) What is the difference between ['] FOO and ' FOO?
1138 | 	(2) What is the relationship between ', ['] and LIT?)
1139 | 
1140 | 	More useful is to define anonymous words and/or to assign xt's to variables.
1141 | 
1142 | 	To define an anonymous word (and push its xt on the stack) use :NONAME ... ; as in this
1143 | 	example:
1144 | 
1145 | 		:NONAME ." anon word was called" CR ;	\ pushes xt on the stack
1146 | 		DUP EXECUTE EXECUTE			\ executes the anon word twice
1147 | 
1148 | 	Stack parameters work as expected:
1149 | 
1150 | 		:NONAME ." called with parameter " . CR ;
1151 | 		DUP
1152 | 		10 SWAP EXECUTE		\ prints 'called with parameter 10'
1153 | 		20 SWAP EXECUTE		\ prints 'called with parameter 20'
1154 | 
1155 | 	Notice that the above code has a memory leak: the anonymous word is still compiled
1156 | 	into the data segment, so even if you lose track of the xt, the word continues to
1157 | 	occupy memory.  A good way to keep track of the xt and thus avoid the memory leak is
1158 | 	to assign it to a CONSTANT, VARIABLE or VALUE:
1159 | 
1160 | 		0 VALUE ANON
1161 | 		:NONAME ." anon word was called" CR ; TO ANON
1162 | 		ANON EXECUTE
1163 | 		ANON EXECUTE
1164 | 
1165 | 	Another use of :NONAME is to create an array of functions which can be called quickly
1166 | 	(think: fast switch statement).  This example is adapted from the ANS FORTH standard:
1167 | 
1168 | 		10 CELLS ALLOT CONSTANT CMD-TABLE
1169 | 		: SET-CMD CELLS CMD-TABLE + ! ;
1170 | 		: CALL-CMD CELLS CMD-TABLE + @ EXECUTE ;
1171 | 
1172 | 		:NONAME ." alternate 0 was called" CR ;	 0 SET-CMD
1173 | 		:NONAME ." alternate 1 was called" CR ;	 1 SET-CMD
1174 | 			\ etc...
1175 | 		:NONAME ." alternate 9 was called" CR ;	 9 SET-CMD
1176 | 
1177 | 		0 CALL-CMD
1178 | 		1 CALL-CMD
1179 | )
1180 | 
1181 | : :NONAME
1182 | 	0 0 CREATE	( create a word with no name - we need a dictionary header because ; expects it )
1183 | 	HERE @		( current HERE value is the address of the codeword, ie. the xt )
1184 | 	DOCOL ,		( compile DOCOL (the codeword) )
1185 | 	]		( go into compile mode )
1186 | ;
1187 | 
1188 | : ['] IMMEDIATE
1189 | 	' LIT ,		( compile LIT )
1190 | ;
1191 | 
1192 | (
1193 | 	EXCEPTIONS ----------------------------------------------------------------------
1194 | 
1195 | 	Amazingly enough, exceptions can be implemented directly in FORTH, in fact rather easily.
1196 | 
1197 | 	The general usage is as follows:
1198 | 
1199 | 		: FOO ( n -- ) THROW ;
1200 | 
1201 | 		: TEST-EXCEPTIONS
1202 | 			25 ['] FOO CATCH	\ execute 25 FOO, catching any exception
1203 | 			?DUP IF
1204 | 				." called FOO and it threw exception number: "
1205 | 				. CR
1206 | 				DROP		\ we have to drop the argument of FOO (25)
1207 | 			THEN
1208 | 		;
1209 | 		\ prints: called FOO and it threw exception number: 25
1210 | 
1211 | 	CATCH runs an execution token and detects whether it throws any exception or not.  The
1212 | 	stack signature of CATCH is rather complicated:
1213 | 
1214 | 		( a_n-1 ... a_1 a_0 xt -- r_m-1 ... r_1 r_0 0 )		if xt did NOT throw an exception
1215 | 		( a_n-1 ... a_1 a_0 xt -- ?_n-1 ... ?_1 ?_0 e )		if xt DID throw exception 'e'
1216 | 
1217 | 	where a_i and r_i are the (arbitrary number of) argument and return stack contents
1218 | 	before and after xt is EXECUTEd.  Notice in particular the case where an exception
1219 | 	is thrown, the stack pointer is restored so that there are n of _something_ on the
1220 | 	stack in the positions where the arguments a_i used to be.  We don't really guarantee
1221 | 	what is on the stack -- perhaps the original arguments, and perhaps other nonsense --
1222 | 	it largely depends on the implementation of the word that was executed.
1223 | 
1224 | 	THROW, ABORT and a few others throw exceptions.
1225 | 
1226 | 	Exception numbers are non-zero integers.  By convention the positive numbers can be used
1227 | 	for app-specific exceptions and the negative numbers have certain meanings defined in
1228 | 	the ANS FORTH standard.  (For example, -1 is the exception thrown by ABORT).
1229 | 
1230 | 	0 THROW does nothing.  This is the stack signature of THROW:
1231 | 
1232 | 		( 0 -- )
1233 | 		( * e -- ?_n-1 ... ?_1 ?_0 e )	the stack is restored to the state from the corresponding CATCH
1234 | 
1235 | 	The implementation hangs on the definitions of CATCH and THROW and the state shared
1236 | 	between them.
1237 | 
1238 | 	Up to this point, the return stack has consisted merely of a list of return addresses,
1239 | 	with the top of the return stack being the return address where we will resume executing
1240 | 	when the current word EXITs.  However CATCH will push a more complicated 'exception stack
1241 | 	frame' on the return stack.  The exception stack frame records some things about the
1242 | 	state of execution at the time that CATCH was called.
1243 | 
1244 | 	When called, THROW walks up the return stack (the process is called 'unwinding') until
1245 | 	it finds the exception stack frame.  It then uses the data in the exception stack frame
1246 | 	to restore the state allowing execution to continue after the matching CATCH.  (If it
1247 | 	unwinds the stack and doesn't find the exception stack frame then it prints a message
1248 | 	and drops back to the prompt, which is also normal behaviour for so-called 'uncaught
1249 | 	exceptions').
1250 | 
1251 | 	This is what the exception stack frame looks like.  (As is conventional, the return stack
1252 | 	is shown growing downwards from higher to lower memory addresses).
1253 | 
1254 | 		+------------------------------+
1255 | 		| return address from CATCH    |   Notice this is already on the
1256 | 		|                              |   return stack when CATCH is called.
1257 | 		+------------------------------+
1258 | 		| original parameter stack     |
1259 | 		| pointer                      |
1260 | 		+------------------------------+  ^
1261 | 		| exception stack marker       |  |
1262 | 		| (EXCEPTION-MARKER)           |  |   Direction of stack
1263 | 		+------------------------------+  |   unwinding by THROW.
1264 | 						  |
1265 | 						  |
1266 | 
1267 | 	The EXCEPTION-MARKER marks the entry as being an exception stack frame rather than an
1268 | 	ordinary return address, and it is this which THROW "notices" as it is unwinding the
1269 | 	stack.  (If you want to implement more advanced exceptions such as TRY...WITH then
1270 | 	you'll need to use a different value of marker if you want the old and new exception stack
1271 | 	frame layouts to coexist).
1272 | 
1273 | 	What happens if the executed word doesn't throw an exception?  It will eventually
1274 | 	return and call EXCEPTION-MARKER, so EXCEPTION-MARKER had better do something sensible
1275 | 	without us needing to modify EXIT.  This nicely gives us a suitable definition of
1276 | 	EXCEPTION-MARKER, namely a function that just drops the stack frame and itself
1277 | 	returns (thus "returning" from the original CATCH).
1278 | 
1279 | 	One thing to take from this is that exceptions are a relatively lightweight mechanism
1280 | 	in FORTH.
1281 | )
1282 | 
1283 | : EXCEPTION-MARKER
1284 | 	RDROP			( drop the original parameter stack pointer )
1285 | 	0			( there was no exception, this is the normal return path )
1286 | ;
1287 | 
1288 | : CATCH		( xt -- exn? )
1289 | 	DSP@ 8+ >R		( save parameter stack pointer (+8 because of xt) on the return stack )
1290 | 	' EXCEPTION-MARKER 4+	( push the address of the RDROP inside EXCEPTION-MARKER ... )
1291 | 	>R			( ... on to the return stack so it acts like a return address )
1292 | 	EXECUTE			( execute the nested function )
1293 | ;
1294 | 
1295 | : THROW		( n -- )
1296 | 	?DUP IF			( only act if the exception code <> 0 )
1297 | 		RSP@ 			( get return stack pointer )
1298 | 		BEGIN
1299 | 			DUP R0 8- <		( RSP < R0 )
1300 | 		WHILE
1301 | 			DUP @			( get the return stack entry )
1302 | 			' EXCEPTION-MARKER 4+ = IF	( found the EXCEPTION-MARKER on the return stack )
1303 | 				8+			( skip the EXCEPTION-MARKER on the return stack )
1304 | 				RSP!			( restore the return stack pointer )
1305 | 
1306 | 				( Restore the parameter stack. )
1307 | 				DUP DUP DUP		( reserve some working space so the stack for this word
1308 | 							  doesn't coincide with the part of the stack being restored )
1309 | 				R>			( get the saved parameter stack pointer | n dsp )
1310 | 				8-			( reserve space on the stack to store n )
1311 | 				SWAP OVER		( dsp n dsp )
1312 | 				!			( write n on the stack )
1313 | 				DSP! EXIT		( restore the parameter stack pointer, immediately exit )
1314 | 			THEN
1315 | 			8+
1316 | 		REPEAT
1317 | 
1318 | 		( No matching catch - print a message and restart the INTERPRETer. )
1319 | 		DROP
1320 | 
1321 | 		CASE
1322 | 		0 1- OF	( ABORT )
1323 | 			." ABORTED" CR
1324 | 		ENDOF
1325 | 			( default case )
1326 | 			." UNCAUGHT THROW "
1327 | 			DUP . CR
1328 | 		ENDCASE
1329 | 		QUIT
1330 | 	THEN
1331 | ;
1332 | 
1333 | : ABORT		( -- )
1334 | 	0 1- THROW
1335 | ;
1336 | 
1337 | ( Print a stack trace by walking up the return stack. )
1338 | : PRINT-STACK-TRACE
1339 | 	RSP@				( start at caller of this function )
1340 | 	BEGIN
1341 | 		DUP R0 8- <		( RSP < R0 )
1342 | 	WHILE
1343 | 		DUP @			( get the return stack entry )
1344 | 		CASE
1345 | 		' EXCEPTION-MARKER 4+ OF	( is it the exception stack frame? )
1346 | 			." CATCH ( DSP="
1347 | 			8+ DUP @ U.		( print saved stack pointer )
1348 | 			." ) "
1349 | 		ENDOF
1350 | 						( default case )
1351 | 			DUP
1352 | 			CFA>			( look up the codeword to get the dictionary entry )
1353 | 			?DUP IF			( and print it )
1354 | 				2DUP			( dea addr dea )
1355 | 				ID.			( print word from dictionary entry )
1356 | 				[ CHAR + ] LITERAL EMIT
1357 | 				SWAP >DFA 8+ - .	( print offset )
1358 | 			THEN
1359 | 		ENDCASE
1360 | 		8+			( move up the stack )
1361 | 	REPEAT
1362 | 	DROP
1363 | 	CR
1364 | ;
1365 | 
1366 | (
1367 | 	C STRINGS ----------------------------------------------------------------------
1368 | 
1369 | 	FORTH strings are represented by a start address and length kept on the stack or in memory.
1370 | 
1371 | 	Most FORTHs don't handle C strings, but we need them in order to access the process arguments
1372 | 	and environment left on the stack by the Linux kernel, and to make some system calls.
1373 | 
1374 | 	Operation	Input		Output		FORTH word	Notes
1375 | 	----------------------------------------------------------------------
1376 | 
1377 | 	Create FORTH string		addr len	S" ..."
1378 | 
1379 | 	Create C string			c-addr		Z" ..."
1380 | 
1381 | 	C -> FORTH	c-addr		addr len	DUP STRLEN
1382 | 
1383 | 	FORTH -> C	addr len	c-addr		CSTRING		Allocated in a temporary buffer, so
1384 | 									should be consumed / copied immediately.
1385 | 									FORTH string should not contain NULs.
1386 | 
1387 | 	For example, DUP STRLEN TELL prints a C string.
1388 | )
1389 | 
1390 | (
1391 | 	Z" .." is like S" ..." except that the string is terminated by an ASCII NUL character.
1392 | 
1393 | 	To make it more like a C string, at runtime Z" just leaves the address of the string
1394 | 	on the stack (not address & length as with S").  To implement this we need to add the
1395 | 	extra NUL to the string and also a DROP instruction afterwards.  Apart from that the
1396 | 	implementation just a modified S".
1397 | )
1398 | : Z" IMMEDIATE
1399 | 	STATE @ IF	( compiling? )
1400 | 		' LITSTRING ,	( compile LITSTRING )
1401 | 		HERE @		( save the address of the length word on the stack )
1402 | 		0 ,		( dummy length - we don't know what it is yet )
1403 | 		BEGIN
1404 | 			KEY 		( get next character of the string )
1405 | 			DUP '"' <>
1406 | 		WHILE
1407 | 			HERE @ C!	( store the character in the compiled image )
1408 | 			1 HERE +!	( increment HERE pointer by 1 byte )
1409 | 		REPEAT
1410 | 		0 HERE @ C!	( add the ASCII NUL byte )
1411 | 		1 HERE +!
1412 | 		DROP		( drop the double quote character at the end )
1413 | 		DUP		( get the saved address of the length word )
1414 | 		HERE @ SWAP -	( calculate the length )
1415 | 		8-		( subtract 8 (because we measured from the start of the length word) )
1416 | 		SWAP !		( and back-fill the length location )
1417 | 		ALIGN		( round up to next multiple of 8 bytes for the remaining code )
1418 | 		' DROP ,	( compile DROP (to drop the length) )
1419 | 	ELSE		( immediate mode )
1420 | 		HERE @		( get the start address of the temporary space )
1421 | 		BEGIN
1422 | 			KEY
1423 | 			DUP '"' <>
1424 | 		WHILE
1425 | 			OVER C!		( save next character )
1426 | 			1+		( increment address )
1427 | 		REPEAT
1428 | 		DROP		( drop the final " character )
1429 | 		0 SWAP C!	( store final ASCII NUL )
1430 | 		HERE @		( push the start address )
1431 | 	THEN
1432 | ;
1433 | 
1434 | : STRLEN 	( str -- len )
1435 | 	DUP		( save start address )
1436 | 	BEGIN
1437 | 		DUP C@ 0<>	( zero byte found? )
1438 | 	WHILE
1439 | 		1+
1440 | 	REPEAT
1441 | 
1442 | 	SWAP -		( calculate the length )
1443 | ;
1444 | 
1445 | : CSTRING	( addr len -- c-addr )
1446 | 	SWAP OVER	( len saddr len )
1447 | 	HERE @ SWAP	( len saddr daddr len )
1448 | 	CMOVE		( len )
1449 | 
1450 | 	HERE @ +	( daddr+len )
1451 | 	0 SWAP C!	( store terminating NUL char )
1452 | 
1453 | 	HERE @ 		( push start address )
1454 | ;
1455 | 
1456 | (
1457 | 	THE ENVIRONMENT ----------------------------------------------------------------------
1458 | 
1459 | 	Linux makes the process arguments and environment available to us on the stack.
1460 | 
1461 | 	The top of stack pointer is saved by the early assembler code when we start up in the FORTH
1462 | 	variable S0, and starting at this pointer we can read out the command line arguments and the
1463 | 	environment.
1464 | 
1465 | 	Starting at S0, S0 itself points to argc (the number of command line arguments).
1466 | 
1467 | 	S0+8 points to argv[0], S0+8 points to argv[1] etc up to argv[argc-1].
1468 | 
1469 | 	argv[argc] is a NULL pointer.
1470 | 
1471 | 	After that the stack contains environment variables, a set of pointers to strings of the
1472 | 	form NAME=VALUE and on until we get to another NULL pointer.
1473 | 
1474 | 	The first word that we define, ARGC, pushes the number of command line arguments (note that
1475 | 	as with C argc, this includes the name of the command).
1476 | )
1477 | : ARGC
1478 | 	S0 @ @
1479 | ;
1480 | 
1481 | (
1482 | 	n ARGV gets the nth command line argument.
1483 | 
1484 | 	For example to print the command name you would do:
1485 | 		0 ARGV TELL CR
1486 | )
1487 | : ARGV ( n -- str u )
1488 | 	1+ CELLS S0 @ +	( get the address of argv[n] entry )
1489 | 	@		( get the address of the string )
1490 | 	DUP STRLEN	( and get its length / turn it into a FORTH string )
1491 | ;
1492 | 
1493 | (
1494 | 	ENVIRON returns the address of the first environment string.  The list of strings ends
1495 | 	with a NULL pointer.
1496 | 
1497 | 	For example to print the first string in the environment you could do:
1498 | 		ENVIRON @ DUP STRLEN TELL
1499 | )
1500 | : ENVIRON	( -- addr )
1501 | 	ARGC		( number of command line parameters on the stack to skip )
1502 | 	2 +		( skip command line count and NULL pointer after the command line args )
1503 | 	CELLS		( convert to an offset )
1504 | 	S0 @ +		( add to base stack address )
1505 | ;
1506 | 
1507 | (
1508 | 	SYSTEM CALLS AND FILES  ----------------------------------------------------------------------
1509 | 
1510 | 	Miscellaneous words related to system calls, and standard access to files.
1511 | )
1512 | 
1513 | ( BYE exits by calling the Linux exit(2) syscall. )
1514 | : BYE		( -- )
1515 | 	0		( return code (0) )
1516 | 	SYS_EXIT	( system call number )
1517 | 	SYSCALL1
1518 | ;
1519 | 
1520 | (
1521 | 	UNUSED returns the number of cells remaining in the user memory (data segment).
1522 | 
1523 | 	For our implementation we will use Linux brk(2) system call to find out the end
1524 | 	of the data segment and subtract HERE from it.
1525 | )
1526 | : GET-BRK	( -- brkpoint )
1527 | 	0 SYS_BRK SYSCALL1	( call brk(0) )
1528 | ;
1529 | 
1530 | : UNUSED	( -- n )
1531 | 	GET-BRK		( get end of data segment according to the kernel )
1532 | 	HERE @		( get current position in data segment )
1533 | 	-
1534 | 	8 /		( returns number of cells )
1535 | ;
1536 | 
1537 | (
1538 | 	MORECORE increases the data segment by the specified number of (8 byte) cells.
1539 | 
1540 | 	NB. The number of cells requested should normally be a multiple of 1024.  The
1541 | 	reason is that Linux can't extend the data segment by less than a single page
1542 | 	(4096 bytes or 1024 cells).
1543 | 
1544 | 	This FORTH doesn't automatically increase the size of the data segment "on demand"
1545 | 	(ie. when , (COMMA), ALLOT, CREATE, and so on are used).  Instead the programmer
1546 | 	needs to be aware of how much space a large allocation will take, check UNUSED, and
1547 | 	call MORECORE if necessary.  A simple programming exercise is to change the
1548 | 	implementation of the data segment so that MORECORE is called automatically if
1549 | 	the program needs more memory.
1550 | )
1551 | : BRK		( brkpoint -- )
1552 | 	SYS_BRK SYSCALL1
1553 | ;
1554 | 
1555 | : MORECORE	( cells -- )
1556 | 	CELLS GET-BRK + BRK
1557 | ;
1558 | 
1559 | (
1560 | 	Standard FORTH provides some simple file access primitives which we model on
1561 | 	top of Linux syscalls.
1562 | 
1563 | 	The main complication is converting FORTH strings (address & length) into C
1564 | 	strings for the Linux kernel.
1565 | 
1566 | 	Notice there is no buffering in this implementation.
1567 | )
1568 | 
1569 | : R/O ( -- fam ) O_RDONLY ;
1570 | : R/W ( -- fam ) O_RDWR ;
1571 | 
1572 | : OPEN-FILE	( addr u fam -- fd 0 (if successful) | c-addr u fam -- fd errno (if there was an error) )
1573 | 	-ROT		( fam addr u )
1574 | 	CSTRING		( fam cstring )
1575 | 	SYS_OPEN SYSCALL2 ( open (filename, flags) )
1576 | 	DUP		( fd fd )
1577 | 	DUP 0< IF	( errno? )
1578 | 		NEGATE		( fd errno )
1579 | 	ELSE
1580 | 		DROP 0		( fd 0 )
1581 | 	THEN
1582 | ;
1583 | 
1584 | : CREATE-FILE	( addr u fam -- fd 0 (if successful) | c-addr u fam -- fd errno (if there was an error) )
1585 | 	O_CREAT OR
1586 | 	O_TRUNC OR
1587 | 	-ROT		( fam addr u )
1588 | 	CSTRING		( fam cstring )
1589 | 	420 -ROT	( 0644 fam cstring )
1590 | 	SYS_OPEN SYSCALL3 ( open (filename, flags|O_TRUNC|O_CREAT, 0644) )
1591 | 	DUP		( fd fd )
1592 | 	DUP 0< IF	( errno? )
1593 | 		NEGATE		( fd errno )
1594 | 	ELSE
1595 | 		DROP 0		( fd 0 )
1596 | 	THEN
1597 | ;
1598 | 
1599 | : CLOSE-FILE	( fd -- 0 (if successful) | fd -- errno (if there was an error) )
1600 | 	SYS_CLOSE SYSCALL1
1601 | 	NEGATE
1602 | ;
1603 | 
1604 | : READ-FILE	( addr u fd -- u2 0 (if successful) | addr u fd -- 0 0 (if EOF) | addr u fd -- u2 errno (if error) )
1605 | 	>R SWAP R>	( u addr fd )
1606 | 	SYS_READ SYSCALL3
1607 | 
1608 | 	DUP		( u2 u2 )
1609 | 	DUP 0< IF	( errno? )
1610 | 		NEGATE		( u2 errno )
1611 | 	ELSE
1612 | 		DROP 0		( u2 0 )
1613 | 	THEN
1614 | ;
1615 | 
1616 | (
1617 | 	PERROR prints a message for an errno, similar to C's perror(3) but we don't have the extensive
1618 | 	list of strerror strings available, so all we can do is print the errno.
1619 | )
1620 | : PERROR	( errno addr u -- )
1621 | 	TELL
1622 | 	':' EMIT SPACE
1623 | 	." ERRNO="
1624 | 	. CR
1625 | ;
1626 | 
1627 | (
1628 | 	ASSEMBLER CODE ----------------------------------------------------------------------
1629 | 
1630 | 	This is just the outline of a simple assembler, allowing you to write FORTH primitives
1631 | 	in assembly language.
1632 | 
1633 | 	Assembly primitives begin ': NAME' in the normal way, but are ended with ;CODE.  ;CODE
1634 | 	updates the header so that the codeword isn't DOCOL, but points instead to the assembled
1635 | 	code (in the DFA part of the word).
1636 | 
1637 | 	We provide a convenience macro NEXT (you guessed what it does).  However you don't need to
1638 | 	use it because ;CODE will put a NEXT at the end of your word.
1639 | 
1640 | 	The rest consists of some immediate words which expand into machine code appended to the
1641 | 	definition of the word.  Only a very tiny part of the i386 assembly space is covered, just
1642 | 	enough to write a few assembler primitives below.
1643 | )
1644 | 
1645 | HEX
1646 | 
1647 | ( Equivalent to the NEXT macro )
1648 | : NEXT IMMEDIATE AD C, FF C, 20 C, ;
1649 | 
1650 | : ;CODE IMMEDIATE
1651 | 	[COMPILE] NEXT		( end the word with NEXT macro )
1652 | 	ALIGN			( machine code is assembled in bytes so isn't necessarily aligned at the end )
1653 | 	LATEST @ DUP
1654 | 	HIDDEN			( unhide the word )
1655 | 	DUP >DFA SWAP >CFA !	( change the codeword to point to the data area )
1656 | 	[COMPILE] [		( go back to immediate mode )
1657 | ;
1658 | 
1659 | ( The i386 registers )
1660 | : EAX IMMEDIATE 0 ;
1661 | : ECX IMMEDIATE 1 ;
1662 | : EDX IMMEDIATE 2 ;
1663 | : EBX IMMEDIATE 3 ;
1664 | : ESP IMMEDIATE 4 ;
1665 | : EBP IMMEDIATE 5 ;
1666 | : ESI IMMEDIATE 6 ;
1667 | : EDI IMMEDIATE 7 ;
1668 | 
1669 | ( i386 stack instructions )
1670 | : PUSH IMMEDIATE 50 + C, ;
1671 | : POP IMMEDIATE 58 + C, ;
1672 | 
1673 | ( RDTSC instruction )
1674 | : RDTSC IMMEDIATE 0F C, 31 C, ;
1675 | 
1676 | DECIMAL
1677 | 
1678 | (
1679 | 	RDTSC is an assembler primitive which reads the Pentium timestamp counter (a very fine-
1680 | 	grained counter which counts processor clock cycles).  Because the TSC is 64 bits wide
1681 | 	we have to push it onto the stack in two slots.
1682 | )
1683 | : RDTSC		( -- lsb msb )
1684 | 	RDTSC		( writes the result in %edx:%eax )
1685 | 	EAX PUSH	( push lsb )
1686 | 	EDX PUSH	( push msb )
1687 | ;CODE
1688 | 
1689 | (
1690 | 	INLINE can be used to inline an assembler primitive into the current (assembler)
1691 | 	word.
1692 | 
1693 | 	For example:
1694 | 
1695 | 		: 2DROP INLINE DROP INLINE DROP ;CODE
1696 | 
1697 | 	will build an efficient assembler word 2DROP which contains the inline assembly code
1698 | 	for DROP followed by DROP (eg. two 'pop %eax' instructions in this case).
1699 | 
1700 | 	Another example.  Consider this ordinary FORTH definition:
1701 | 
1702 | 		: C@++ ( addr -- addr+1 byte ) DUP 1+ SWAP C@ ;
1703 | 
1704 | 	(it is equivalent to the C operation '*p++' where p is a pointer to char).  If we
1705 | 	notice that all of the words used to define C@++ are in fact assembler primitives,
1706 | 	then we can write a faster (but equivalent) definition like this:
1707 | 
1708 | 		: C@++ INLINE DUP INLINE 1+ INLINE SWAP INLINE C@ ;CODE
1709 | 
1710 | 	One interesting point to note is that this "concatenative" style of programming
1711 | 	allows you to write assembler words portably.  The above definition would work
1712 | 	for any CPU architecture.
1713 | 
1714 | 	There are several conditions that must be met for INLINE to be used successfully:
1715 | 
1716 | 	(1) You must be currently defining an assembler word (ie. : ... ;CODE).
1717 | 
1718 | 	(2) The word that you are inlining must be known to be an assembler word.  If you try
1719 | 	to inline a FORTH word, you'll get an error message.
1720 | 
1721 | 	(3) The assembler primitive must be position-independent code and must end with a
1722 | 	single NEXT macro.
1723 | 
1724 | 	Exercises for the reader: (a) Generalise INLINE so that it can inline FORTH words when
1725 | 	building FORTH words. (b) Further generalise INLINE so that it does something sensible
1726 | 	when you try to inline FORTH into assembler and vice versa.
1727 | 
1728 | 	The implementation of INLINE is pretty simple.  We find the word in the dictionary,
1729 | 	check it's an assembler word, then copy it into the current definition, byte by byte,
1730 | 	until we reach the NEXT macro (which is not copied).
1731 | )
1732 | HEX
1733 | : =NEXT		( addr -- next? )
1734 | 	   DUP C@ AD <> IF DROP FALSE EXIT THEN
1735 | 	1+ DUP C@ FF <> IF DROP FALSE EXIT THEN
1736 | 	1+     C@ 20 <> IF      FALSE EXIT THEN
1737 | 	TRUE
1738 | ;
1739 | DECIMAL
1740 | 
1741 | ( (INLINE) is the lowlevel inline function. )
1742 | : (INLINE)	( cfa -- )
1743 | 	@			( remember codeword points to the code )
1744 | 	BEGIN			( copy bytes until we hit NEXT macro )
1745 | 		DUP =NEXT NOT
1746 | 	WHILE
1747 | 		DUP C@ C,
1748 | 		1+
1749 | 	REPEAT
1750 | 	DROP
1751 | ;
1752 | 
1753 | : INLINE IMMEDIATE
1754 | 	WORD FIND		( find the word in the dictionary )
1755 | 	>CFA			( codeword )
1756 | 
1757 | 	DUP @ DOCOL = IF	( check codeword <> DOCOL (ie. not a FORTH word) )
1758 | 		." Cannot INLINE FORTH words" CR ABORT
1759 | 	THEN
1760 | 
1761 | 	(INLINE)
1762 | ;
1763 | 
1764 | HIDE =NEXT
1765 | 
1766 | (
1767 | 	NOTES ----------------------------------------------------------------------
1768 | 
1769 | 	DOES> isn't possible to implement with this FORTH because we don't have a separate
1770 | 	data pointer.
1771 | )
1772 | 
1773 | (
1774 | 	WELCOME MESSAGE ----------------------------------------------------------------------
1775 | 
1776 | 	Print the version and OK prompt.
1777 | )
1778 | 
1779 | : WELCOME
1780 | 	S" TEST-MODE" FIND NOT IF
1781 | 		." JONESFORTH VERSION " VERSION . CR
1782 | 		UNUSED . ." CELLS REMAINING" CR
1783 | 		." OK "
1784 | 	THEN
1785 | ;
1786 | 
1787 | WELCOME
1788 | HIDE WELCOME
1789 | 


--------------------------------------------------------------------------------
/perf_dupdrop.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*-
 2 |   FORTH repeated DUP DROP * 1000 using ordinary indirect threaded code
 3 |   and the assembler primitives.
 4 |   $Id: perf_dupdrop.f,v 1.3 2007-10-12 01:46:26 rich Exp $ )
 5 | 
 6 | 1024 32 * MORECORE
 7 | 
 8 | ( Print the time passed. )
 9 | : PRINT-TIME	( lsb msb lsb msb -- lsb lsb )
10 | 	( The test is very short so likely the MSBs will be the same.  This
11 | 	  makes calculating the time easier (because we can only do 32 bit
12 | 	    subtraction).  So check MSBs are equal. )
13 | 	2 PICK <> IF
14 | 		." MSBs not equal, please repeat the test" CR
15 | 	ELSE
16 | 		NIP
17 | 		SWAP - U. CR
18 | 	THEN
19 | ;
20 | 
21 | : 4DROP DROP DROP DROP DROP ;
22 | 
23 | : PERFORM-TEST	( xt -- )
24 | 	( Get everything in the cache. )
25 | 	DUP EXECUTE 4DROP
26 | 	DUP EXECUTE 4DROP
27 | 	DUP EXECUTE 4DROP
28 | 	DUP EXECUTE 4DROP
29 | 	DUP EXECUTE 4DROP
30 | 	DUP EXECUTE 4DROP
31 | 	0 0 0 0 PRINT-TIME
32 | 	( Run the test 10 times. )
33 | 	DUP EXECUTE PRINT-TIME
34 | 	DUP EXECUTE PRINT-TIME
35 | 	DUP EXECUTE PRINT-TIME
36 | 	DUP EXECUTE PRINT-TIME
37 | 	DUP EXECUTE PRINT-TIME
38 | 	DUP EXECUTE PRINT-TIME
39 | 	DUP EXECUTE PRINT-TIME
40 | 	DUP EXECUTE PRINT-TIME
41 | 	DUP EXECUTE PRINT-TIME
42 | 	DUP EXECUTE PRINT-TIME
43 | 	DROP
44 | ;
45 | 
46 | ( ---------------------------------------------------------------------- )
47 | ( Make a word which builds the repeated DUP DROP sequence. )
48 | : MAKE-DUPDROP	( n -- )
49 | 	BEGIN ?DUP WHILE ' DUP , ' DROP , 1- REPEAT
50 | ;
51 | 
52 | ( Now the actual test routine. )
53 | : TEST		( -- startlsb startmsb endlsb endmsb )
54 | 	RDTSC			( Start time )
55 | 	[ 1000 MAKE-DUPDROP ]	( 1000 * DUP DROP )
56 | 	RDTSC			( End time )
57 | ;
58 | 
59 | : RUN ['] TEST PERFORM-TEST ;
60 | RUN
61 | 
62 | ( ---------------------------------------------------------------------- )
63 | ( Try the inlined alternative. )
64 | 
65 | ( Inline the assembler primitive (cfa) n times. )
66 | : *(INLINE) ( cfa n -- )
67 | 	BEGIN ?DUP WHILE OVER (INLINE) 1- REPEAT DROP
68 | ;
69 | 
70 | : DUPDROP INLINE DUP INLINE DROP ;CODE
71 | 
72 | : TEST
73 | 	INLINE RDTSC
74 | 	[ S" DUPDROP" FIND >CFA 1000 *(INLINE) ]
75 | 	INLINE RDTSC
76 | ;CODE
77 | 
78 | : RUN ['] TEST PERFORM-TEST ;
79 | RUN
80 | 


--------------------------------------------------------------------------------
/test_assembler.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*- )
 2 | 
 3 | : 2DROP INLINE DROP INLINE DROP ;CODE
 4 | 
 5 | : C@++ INLINE DUP INLINE 1+ INLINE SWAP INLINE C@ ;CODE
 6 | 
 7 | : TEST
 8 | 	." 2DROP: "	1 2 3 4 2DROP . . CR
 9 | 
10 | 	S" testing" DROP
11 | 	C@++ EMIT CR
12 | 	C@++ EMIT CR
13 | 	C@++ EMIT CR
14 | 	C@++ EMIT CR
15 | 	C@++ EMIT CR
16 | 	C@++ EMIT CR
17 | 	C@++ EMIT CR
18 | 	DROP
19 | ;
20 | 


--------------------------------------------------------------------------------
/test_assembler.f.out:
--------------------------------------------------------------------------------
1 | 2DROP: 2 1 
2 | t
3 | e
4 | s
5 | t
6 | i
7 | n
8 | g
9 | 


--------------------------------------------------------------------------------
/test_comparison.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*- )
 2 | 
 3 | : TEST
 4 | 	1 0 < . CR
 5 | 	0 1 < . CR
 6 | 	1 -1 < . CR
 7 | 	-1 1 < . CR
 8 | 	-1 0 < . CR
 9 | 	0 -1 < . CR CR
10 | 
11 | 	1 0 > . CR
12 | 	0 1 > . CR
13 | 	1 -1 > . CR
14 | 	-1 1 > . CR
15 | 	-1 0 > . CR
16 | 	0 -1 > . CR CR
17 | 
18 | 	1 1 <= . CR
19 | 	0 0 <= . CR
20 | 	-1 -1 <= . CR
21 | 	1 0 <= . CR
22 | 	0 1 <= . CR
23 | 	1 -1 <= . CR
24 | 	-1 1 <= . CR
25 | 	-1 0 <= . CR
26 | 	0 -1 <= . CR CR
27 | 
28 | 	1 1 >= . CR
29 | 	0 0 >= . CR
30 | 	-1 -1 >= . CR
31 | 	1 0 >= . CR
32 | 	0 1 >= . CR
33 | 	1 -1 >= . CR
34 | 	-1 1 >= . CR
35 | 	-1 0 >= . CR
36 | 	0 -1 >= . CR CR
37 | 
38 | 	1 1 = . CR
39 | 	1 0 = . CR
40 | 	0 0 = . CR
41 | 	1 -1 = . CR
42 | 	-1 -1 = . CR CR
43 | 
44 | 	1 1 <> . CR
45 | 	1 0 <> . CR
46 | 	0 0 <> . CR
47 | 	1 -1 <> . CR
48 | 	-1 -1 <> . CR CR
49 | 
50 | 	1 0= . CR
51 | 	0 0= . CR
52 | 	-1 0= . CR CR
53 | 
54 | 	1 0<> . CR
55 | 	0 0<> . CR
56 | 	-1 0<> . CR CR
57 | 
58 | 	1 0< . CR
59 | 	0 0< . CR
60 | 	-1 0< . CR CR
61 | 
62 | 	1 0> . CR
63 | 	0 0> . CR
64 | 	-1 0> . CR CR
65 | 
66 | 	1 0<= . CR
67 | 	0 0<= . CR
68 | 	-1 0<= . CR CR
69 | 
70 | 	1 0>= . CR
71 | 	0 0>= . CR
72 | 	-1 0>= . CR CR
73 | ;
74 | 


--------------------------------------------------------------------------------
/test_comparison.f.out:
--------------------------------------------------------------------------------
 1 | 0 
 2 | 1 
 3 | 0 
 4 | 1 
 5 | 1 
 6 | 0 
 7 | 
 8 | 1 
 9 | 0 
10 | 1 
11 | 0 
12 | 0 
13 | 1 
14 | 
15 | 1 
16 | 1 
17 | 1 
18 | 0 
19 | 1 
20 | 0 
21 | 1 
22 | 1 
23 | 0 
24 | 
25 | 1 
26 | 1 
27 | 1 
28 | 1 
29 | 0 
30 | 1 
31 | 0 
32 | 0 
33 | 1 
34 | 
35 | 1 
36 | 0 
37 | 1 
38 | 0 
39 | 1 
40 | 
41 | 0 
42 | 1 
43 | 0 
44 | 1 
45 | 0 
46 | 
47 | 0 
48 | 1 
49 | 0 
50 | 
51 | 1 
52 | 0 
53 | 1 
54 | 
55 | 0 
56 | 0 
57 | 1 
58 | 
59 | 1 
60 | 0 
61 | 0 
62 | 
63 | 0 
64 | 1 
65 | 1 
66 | 
67 | 1 
68 | 1 
69 | 0 
70 | 
71 | 


--------------------------------------------------------------------------------
/test_exception.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*- )
 2 | 
 3 | : TEST4 PRINT-STACK-TRACE THROW ;
 4 | 
 5 | : TEST3 0 TEST4 26 TEST4 ;
 6 | 
 7 | : TEST2
 8 | 	['] TEST3 CATCH
 9 | 	?DUP IF ." TEST3 threw exception " . CR THEN
10 | 	TEST3
11 | ;
12 | 
13 | : TEST TEST2 ;
14 | 


--------------------------------------------------------------------------------
/test_exception.f.out:
--------------------------------------------------------------------------------
1 | TEST4+0 TEST3+16 CATCH+56 CATCH (  ) TEST2+16 TEST+0 
2 | TEST4+0 TEST3+40 CATCH+56 CATCH (  ) TEST2+16 TEST+0 
3 | TEST3 threw exception 26 
4 | TEST4+0 TEST3+16 TEST2+112 TEST+0 
5 | TEST4+0 TEST3+40 TEST2+112 TEST+0 
6 | UNCAUGHT THROW 26 
7 | 


--------------------------------------------------------------------------------
/test_number.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*- )
 2 | 
 3 | : TEST
 4 | 	123                                . CR
 5 | 	[ HEX -7F ] LITERAL      DECIMAL   . CR
 6 | 	[ HEX 7FF77FF7 ] LITERAL HEX       . CR
 7 | 	[ HEX -7FF77FF7 ] LITERAL 2 BASE ! . CR
 8 | 	[ 2 BASE ! 1111111111101110111111111110111 ] LITERAL HEX . CR
 9 | ;
10 | 
11 | DECIMAL ( restore immediate-mode base )
12 | 


--------------------------------------------------------------------------------
/test_number.f.out:
--------------------------------------------------------------------------------
1 | 123 
2 | -127 
3 | 7FF77FF7 
4 | -1111111111101110111111111110111 
5 | 7FF77FF7 
6 | 


--------------------------------------------------------------------------------
/test_read_file.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*-
 2 | 	Test READ-FILE.
 3 | 	$Id: test_read_file.f,v 1.2 2007-10-22 18:53:13 rich Exp $
 4 | )
 5 | 
 6 | 0 VALUE FD
 7 | 100 CELLS ALLOT CONSTANT BUFFER
 8 | 
 9 | : TEST
10 | 	S" test_read_file.f.out" R/O OPEN-FILE
11 | 	?DUP IF S" test_read_file.f.out" PERROR QUIT THEN
12 | 
13 | 	TO FD
14 | 
15 | 	BEGIN
16 | 		BUFFER 100 CELLS FD READ-FILE
17 | 		?DUP IF S" READ-FILE" PERROR QUIT THEN
18 | 		DUP
19 | 		BUFFER SWAP TELL
20 | 	0= UNTIL
21 | 
22 | 	FD CLOSE-FILE
23 | 	?DUP IF S" CLOSE-FILE" PERROR QUIT THEN
24 | ;
25 | 


--------------------------------------------------------------------------------
/test_read_file.f.out:
--------------------------------------------------------------------------------
1 | /dev/VolGroup00/LogVol00 /                       ext3    defaults,noatime 1 1
2 | LABEL=/boot             /boot                   ext3    defaults        1 2
3 | tmpfs                   /dev/shm                tmpfs   defaults        0 0
4 | devpts                  /dev/pts                devpts  gid=5,mode=620  0 0
5 | sysfs                   /sys                    sysfs   defaults        0 0
6 | proc                    /proc                   proc    defaults        0 0
7 | /dev/VolGroup00/LogVol01 swap                    swap    defaults        0 0
8 | 


--------------------------------------------------------------------------------
/test_stack.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*- )
 2 | 
 3 | : TEST
 4 | 	DEPTH . CR
 5 | 
 6 | 	42 DUP . . CR
 7 | 	23 DROP DEPTH . CR
 8 | 	1 2 SWAP . . CR
 9 | 	1 2 OVER . . . CR
10 | 	1 2 3 -ROT . . . CR
11 | 	1 2 3 ROT . . . CR
12 | 	1 2 3 4 2DROP . . CR
13 | 	1 2 3 4 2DUP . . . . . . CR
14 | 	1 2 3 4 2SWAP . . . . CR
15 | 
16 | 	DEPTH . CR
17 | ;
18 | 


--------------------------------------------------------------------------------
/test_stack.f.out:
--------------------------------------------------------------------------------
 1 | 0 
 2 | 42 42 
 3 | 0 
 4 | 1 2 
 5 | 1 2 1 
 6 | 2 1 3 
 7 | 1 3 2 
 8 | 2 1 
 9 | 4 3 4 3 2 1 
10 | 2 1 4 3 
11 | 0 
12 | 


--------------------------------------------------------------------------------
/test_stack_trace.f:
--------------------------------------------------------------------------------
 1 | ( -*- text -*- )
 2 | 
 3 | : TEST4 PRINT-STACK-TRACE ;
 4 | 
 5 | : TEST3 TEST4 1 2 + . CR TEST4 ;
 6 | 
 7 | : TEST2 TEST3 TEST3 ;
 8 | 
 9 | : TEST TEST2 ;
10 | 


--------------------------------------------------------------------------------
/test_stack_trace.f.out:
--------------------------------------------------------------------------------
1 | TEST4+0 TEST3+0 TEST2+0 TEST+0 
2 | 3 
3 | TEST4+0 TEST3+64 TEST2+0 TEST+0 
4 | TEST4+0 TEST3+0 TEST2+8 TEST+0 
5 | 3 
6 | TEST4+0 TEST3+64 TEST2+8 TEST+0 
7 | 


--------------------------------------------------------------------------------