├── README.md
├── linux
    ├── Makefile
    └── itsy-linux.asm
├── msdos
    ├── itsy.asm
    └── macros.asm
└── osx
    ├── Makefile
    └── itsy-osx.asm


/README.md:
--------------------------------------------------------------------------------
 1 | itsy-linux
 2 | ==========
 3 | 
 4 | Unix (Linux & MacOS X) port of itsy Forth by John Metcalf
 5 | 
 6 | John Metcalf (http://www.retroprogramming.com/) implemented minimal forth system:
 7 | <ul>
 8 | <li>http://www.retroprogramming.com/2012/03/itsy-forth-1k-tiny-compiler.html </li>
 9 | <li>http://www.retroprogramming.com/2012/04/itsy-forth-dictionary-and-inner.html </li>
10 | <li>http://www.retroprogramming.com/2012/04/itsy-forth-primitives.html </li>
11 | <li>http://www.retroprogramming.com/2012/06/itsy-forth-compiler.html </li>
12 | <li>http://www.retroprogramming.com/2012/09/itsy-documenting-bit-twiddling-voodoo.html </li>
13 | </ul>
14 | Resulting binary has very impressive size (978 bytes on Linux) and can be used to bootstrap a complete Forth system.
15 | 
16 | Original itsy code is producing .com files and can be found in "msdos" directory of this repository.
17 | 
18 | kt97679 ported it to 32-bit linux code. 
19 | DylanMc ported it to MacOS (also 32 bit). Should move to any BSD pretty easily.
20 | 
21 | Sample session:
22 | 
23 | <pre>
24 | $ make
25 | nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux
26 | chmod +x itsy-linux
27 | $ ./itsy-linux 
28 | : say_hi 72 emit 105 emit 33 emit 10 emit ;
29 | say_hi
30 | Hi!
31 | ^C
32 | $ 
33 | </pre>
34 | 
35 | TODO:
36 | <ul>
37 | <li> get string literals going </li>
38 | <li> implement string operations, like ." </li>
39 | <li> implement number->string </li>
40 | <li> implement . </li>
41 | <li> if then </li>
42 | <li> do .. loop </li>
43 | </ul>
44 | 


--------------------------------------------------------------------------------
/linux/Makefile:
--------------------------------------------------------------------------------
1 | all: itsy-linux
2 | 
3 | itsy-linux:
4 | 	nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux
5 | 	chmod +x itsy-linux
6 | clean:
7 | 	rm itsy-linux.lst itsy-linux
8 | 


--------------------------------------------------------------------------------
/linux/itsy-linux.asm:
--------------------------------------------------------------------------------
  1 | ; nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux && chmod +x itsy-linux
  2 | 
  3 | %define link 0
  4 | %define immediate 080h
  5 | 
  6 | %macro head 4
  7 | %%link dd link
  8 | %define link %%link
  9 | %strlen %%count %1
 10 | db %3 + %%count, %1
 11 | xt_ %+ %2 dd %4
 12 | %endmacro
 13 | 
 14 | %macro primitive 2-3 0
 15 | head %1, %2, %3, $ + 4
 16 | %endmacro
 17 | 
 18 | %macro colon 2-3 0
 19 | head %1, %2, %3, docolon
 20 | %endmacro
 21 | 
 22 | %macro variable 3
 23 | head %1, %2, 0, dovar
 24 | val_ %+ %2 dd %3
 25 | %endmacro
 26 | 
 27 | %define TEXTORG 0x00400000
 28 | %define MEMSIZE 1048576
 29 | %define TIBSIZE 80
 30 | %define STACKSIZE 4096
 31 | %define TIBPTR TEXTORG + MEMSIZE - TIBSIZE
 32 | %define SP0 TIBPTR - 4
 33 | %define RP0 SP0 - STACKSIZE
 34 | 
 35 | BITS 32
 36 |         org     TEXTORG
 37 | 
 38 | ehdr:                           ; Elf32_Ehdr
 39 |         db   0x7F, "ELF", 1, 1, 1, 0     ; e_ident
 40 |         times 8 db   0
 41 |         dw   2                  ; e_type
 42 |         dw   3                  ; e_machine
 43 |         dd   1                  ; e_version
 44 |         dd   xt_abort + 4       ; e_entry
 45 |         dd   phdr - $$  ; e_phoff
 46 |         dd   0                  ; e_shoff
 47 |         dd   0                  ; e_flags
 48 |         dw   ehdrsize   ; e_ehsize
 49 |         dw   phdrsize   ; e_phentsize
 50 |         dw   1                  ; e_phnum
 51 |         dw   0                  ; e_shentsize
 52 |         dw   0                  ; e_shnum
 53 |         dw   0                  ; e_shstrndx
 54 | 
 55 | ehdrsize   equ   $ - ehdr
 56 | 
 57 | phdr:                           ; Elf32_Phdr
 58 |         dd   1                  ; p_type
 59 |         dd   0                  ; p_offset
 60 |         dd   $$                 ; p_vaddr
 61 |         dd   $$                 ; p_paddr
 62 |         dd   filesize   ; p_filesz
 63 |         dd   MEMSIZE    ; p_memsz
 64 |         dd   7                  ; p_flags
 65 |         dd   0x1000             ; p_align
 66 | 
 67 | phdrsize   equ   $ - phdr
 68 | 
 69 | ; esp - data stack pointer
 70 | ; ebp - return stack pointer
 71 | ; esi - Forth instruction pointer
 72 | ; ebx - TOS (top of data stack)
 73 | 
 74 |     variable 'state', state, 0
 75 |     variable '>in', to_in, 0
 76 |     variable '#tib', number_t_i_b, 0
 77 |     variable 'dp', dp, freemem
 78 |     variable 'base', base, 10
 79 |     variable 'last', last, final
 80 |     variable 'tib', t_i_b, TIBPTR
 81 | 
 82 |     primitive 'execute', execute
 83 |         mov eax, ebx ; eax is important here, it is used by docolon and dovar
 84 |         pop ebx
 85 |         jmp dword[eax]
 86 | 
 87 |     primitive 'abort', abort
 88 |         mov eax, dword[val_number_t_i_b]
 89 |         mov dword[val_to_in], eax
 90 |         xor ebp, ebp
 91 |         mov dword[val_state], ebp
 92 |         mov esp, SP0
 93 |         mov ebp, RP0
 94 |         mov esi, xt_interpret + 4
 95 |         jmp next
 96 | 
 97 |     primitive ',', comma
 98 |         xchg eax, ebx
 99 |         mov ebx, val_dp
100 |         mov edi, [ebx]
101 |         stosd
102 |         mov [ebx], edi
103 |         pop ebx
104 |         jmp next
105 | 
106 |     primitive 'lit', lit
107 |         push ebx
108 |         lodsd
109 |         xchg eax, ebx
110 |         jmp next
111 | 
112 |     primitive 'rot', rote
113 |         pop edx
114 |         pop eax
115 |         push edx
116 |         push ebx
117 |         xchg eax, ebx
118 |         jmp next
119 | 
120 |     primitive 'drop', drop
121 |         pop ebx
122 |         jmp next
123 | 
124 |     primitive 'dup', dupe
125 |         push ebx
126 |         jmp next
127 | 
128 |     primitive 'swap', swap
129 |         xchg ebx, [esp]
130 |         jmp next
131 | 
132 |     primitive '+', plus
133 |         pop eax
134 |         add ebx, eax
135 |         jmp next
136 | 
137 |     primitive 'exit', exit
138 |         xchg ebp, esp
139 |         pop esi
140 |         xchg ebp, esp
141 | next    lodsd
142 |         jmp dword[eax] ; eax is later used by docolon and dovar
143 | 
144 |     primitive '=', equals
145 |         pop eax
146 |         sub ebx, eax
147 |         sub ebx, 1
148 |         sbb ebx, ebx
149 |         jmp next
150 | 
151 |     primitive '@', fetch
152 |         mov ebx, dword[ebx]
153 |         jmp next
154 | 
155 |     primitive '!', store
156 |         pop dword[ebx]
157 |         pop ebx
158 |         jmp next
159 | 
160 |     primitive '0branch', zero_branch
161 |         lodsd
162 |         test ebx, ebx
163 |         jne zerob_z
164 |         xchg eax, esi
165 | zerob_z pop ebx
166 |         jmp next
167 | 
168 |     primitive 'branch',branch
169 |         mov esi, dword[esi]
170 |         jmp next
171 | 
172 |     primitive 'count',count
173 |         movzx eax, byte[ebx]
174 |         inc ebx
175 |         push ebx
176 |         mov ebx, eax
177 |         jmp next
178 | 
179 |     primitive 'accept', accept
180 |         xor edx, edx
181 |         xchg edx, ebx ; now edx contains read byte count and ebx 0 (reading from stdin)
182 |         xor eax, eax
183 |         mov al, 3     ; sys_read
184 |         pop ecx       ; buffer
185 |         int 80h
186 |         xchg ebx, eax ; eax after sys_read contains number of bytes read (negative number means error), let's move it to TOS
187 |         dec ebx       ; last char is CR
188 |         jmp next
189 | 
190 |     primitive 'emit', emit
191 |         push ebx
192 |         xor eax, eax
193 |         mov al, 4    ; sys_write
194 |         xor ebx, ebx
195 |         inc ebx      ; ebx now contains 1 (stdout)
196 |         mov ecx, esp ; buffer
197 |         mov edx, ebx ; write byte count
198 |         int 80h
199 |         pop ebx
200 |         pop ebx
201 |         jmp next
202 | 
203 |     primitive '>number',to_number
204 |         pop edi
205 |         pop ecx
206 |         pop eax
207 | to_numl test ebx, ebx
208 |         je to_numz
209 |         push eax
210 |         movzx eax, byte[edi]
211 |         cmp al, 'a'
212 |         jc to_nums
213 |         sub al, 32
214 | to_nums cmp al, '9' + 1
215 |         jc to_numg
216 |         cmp al, 'A'
217 |         jc to_numh
218 |         sub al, 7
219 | to_numg sub al, 48
220 |         cmp al, byte[val_base]
221 |         jnc to_numh
222 |         xchg eax, edx
223 |         pop eax
224 |         push edx
225 |         xchg eax, ecx
226 |         mul dword[val_base]
227 |         xchg eax, ecx
228 |         mul dword[val_base]
229 |         add ecx, edx
230 |         pop edx
231 |         add eax, edx
232 |         dec ebx
233 |         inc edi
234 |         jmp to_numl
235 | to_numz push eax
236 | to_numh push ecx
237 |         push edi
238 |         jmp next
239 | 
240 |     primitive 'word', word
241 |         mov edi, dword[val_dp]
242 |         push edi
243 |         mov edx, ebx
244 |         mov ebx, dword[val_t_i_b]
245 |         mov ecx, ebx
246 |         add ebx, dword[val_to_in]
247 |         add ecx, dword[val_number_t_i_b]
248 | wordf   cmp ecx, ebx
249 |         je wordz
250 |         mov al, byte[ebx]
251 |         inc ebx
252 |         cmp al, dl
253 |         je wordf
254 | wordc   inc edi
255 |         mov byte[edi], al
256 |         cmp ecx, ebx
257 |         je wordz
258 |         mov al, byte[ebx]
259 |         inc ebx
260 |         cmp al, dl
261 |         jne wordc
262 | wordz   mov byte[edi + 1], 32
263 |         mov eax, dword[val_dp]
264 |         xchg eax, edi
265 |         sub eax, edi
266 |         mov byte[edi], al
267 |         sub ebx, dword[val_t_i_b]
268 |         mov dword[val_to_in], ebx
269 |         pop ebx
270 |         jmp next
271 | 
272 |     primitive 'find', find
273 |         mov edi, val_last
274 | findl   push edi
275 |         push ebx
276 |         movzx ecx, byte[ebx]
277 |         inc ecx
278 | findc   mov al, byte[edi + 4]
279 |         and al, 07Fh
280 |         cmp al, byte[ebx]
281 |         je findm
282 |         pop ebx
283 |         pop edi
284 |         mov edi, dword[edi]
285 |         test edi, edi
286 |         jne findl
287 | findnf  push ebx
288 |         xor ebx, ebx
289 |         jmp next
290 | findm   inc edi
291 |         inc ebx
292 |         loop findc
293 |         pop ebx
294 |         pop edi
295 |         xor ebx, ebx
296 |         inc ebx
297 |         lea edi, [edi + 4]
298 |         mov al, byte[edi]
299 |         test al, 080h
300 |         jne findi
301 |         neg ebx
302 | findi   and eax, 31
303 |         add edi, eax
304 |         inc edi
305 |         push edi
306 |         jmp next
307 | 
308 |     colon ':', colon
309 |         dd xt_lit, -1
310 |         dd xt_state
311 |         dd xt_store
312 |         dd xt_create
313 |         dd xt_do_semi_code
314 | 
315 | docolon xchg ebp, esp
316 |         push esi
317 |         xchg ebp, esp
318 |         lea esi, [eax + 4] ; eax value is set by next
319 |         jmp next
320 | 
321 |     colon ';', semicolon, immediate
322 |         dd xt_lit, xt_exit
323 |         dd xt_comma
324 |         dd xt_lit, 0
325 |         dd xt_state
326 |         dd xt_store
327 |         dd xt_exit
328 | 
329 |     colon 'create', create
330 |         dd xt_dp, xt_fetch
331 |         dd xt_last, xt_fetch
332 |         dd xt_comma
333 |         dd xt_last, xt_store
334 |         dd xt_lit, 32
335 |         dd xt_word
336 |         dd xt_count
337 |         dd xt_plus
338 |         dd xt_dp, xt_store
339 |         dd xt_lit, 0
340 |         dd xt_comma
341 |         dd xt_do_semi_code
342 | 
343 | dovar   push ebx
344 |         lea ebx, [eax + 4] ; eax value is set by next
345 |         jmp next
346 | 
347 |     primitive '(;code)', do_semi_code
348 |         mov edi, dword[val_last]
349 |         mov al, byte[edi + 4]
350 |         and eax, 31
351 |         add edi, eax
352 |         mov dword[edi + 5], esi
353 |         xchg ebp, esp
354 |         pop esi
355 |         xchg esp, ebp
356 |         jmp next
357 | 
358 | final:
359 | 
360 |     colon 'interpret', interpret
361 | interpt dd xt_number_t_i_b
362 |         dd xt_fetch
363 |         dd xt_to_in
364 |         dd xt_fetch
365 |         dd xt_equals
366 |         dd xt_zero_branch
367 |         dd intpar
368 |         dd xt_t_i_b
369 |         dd xt_fetch
370 |         dd xt_lit, 50
371 |         dd xt_accept
372 |         dd xt_number_t_i_b
373 |         dd xt_store
374 |         dd xt_lit, 0
375 |         dd xt_to_in
376 |         dd xt_store
377 | intpar  dd xt_lit, 32
378 |         dd xt_word
379 |         dd xt_find
380 |         dd xt_dupe
381 |         dd xt_zero_branch
382 |         dd intnf
383 |         dd xt_state
384 |         dd xt_fetch
385 |         dd xt_equals
386 |         dd xt_zero_branch
387 |         dd intexc
388 |         dd xt_comma
389 |         dd xt_branch
390 |         dd intdone
391 | intexc  dd xt_execute
392 |         dd xt_branch
393 |         dd intdone
394 | intnf   dd xt_dupe
395 |         dd xt_rote
396 |         dd xt_count
397 |         dd xt_to_number
398 |         dd xt_zero_branch
399 |         dd intskip
400 |         dd xt_state
401 |         dd xt_fetch
402 |         dd xt_zero_branch
403 |         dd intnc
404 |         dd xt_last
405 |         dd xt_fetch
406 |         dd xt_dupe
407 |         dd xt_fetch
408 |         dd xt_last
409 |         dd xt_store
410 |         dd xt_dp
411 |         dd xt_store
412 | intnc   dd xt_abort
413 | intskip dd xt_drop
414 |         dd xt_drop
415 |         dd xt_state
416 |         dd xt_fetch
417 |         dd xt_zero_branch
418 |         dd intdone
419 |         dd xt_lit
420 |         dd xt_lit
421 |         dd xt_comma
422 |         dd xt_comma
423 | intdone dd xt_branch
424 |         dd interpt
425 | 
426 | freemem:
427 | 
428 | filesize   equ   $ - $$
429 | 


--------------------------------------------------------------------------------
/msdos/itsy.asm:
--------------------------------------------------------------------------------
   1 | ; Itsy Forth
   2 | ;    Written by John Metcalf
   3 | ;    Commentary by John Metcalf and Mike Adams
   4 | ;
   5 | ; Itsy Forth was written for use with NASM, the "Netwide Assembler"
   6 | ; that's available for free download (http://www.nasm.us/).
   7 | ; The command line for assembling Itsy is:
   8 | ;
   9 | ;      nasm itsy.asm -fbin -o itsy.com
  10 | ;
  11 | ; If you wish to have an assembly listing, give it this command:
  12 | ;
  13 | ;      nasm itsy.asm -fbin -l itsy.lst -o itsy.com
  14 | ;
  15 | ;--------------------------------------------------------------------------
  16 | ; Implementation notes:
  17 | ;
  18 | ; Register Usage:
  19 | ;    sp - data stack pointer.
  20 | ;    bp - return stack pointer.
  21 | ;    si - Forth instruction pointer.
  22 | ;    di - pointer to current XT (CFA of word currently being executed).
  23 | ;    bx - TOS (top of data stack). The top value on the data stack is not
  24 | ;         actually kept on the CPU's data stack. It's kept in the BX register.
  25 | ;         Having it in a register like this speeds up the operation of
  26 | ;         the primitive words. They don't have to take the time to pull a
  27 | ;         value off of the stack; it's already in a register where it can
  28 | ;         be used right away!
  29 | ;    ax, cd, dx - Can all be freely used for processing data. The other
  30 | ;         registers can still be used also, but only with caution. Their
  31 | ;         contents must be pushed to the stack and then restored before
  32 | ;         exiting from the word or calling any other Forth words. LOTS of
  33 | ;         potential for program crashes if you don't do this correctly.
  34 | ;         The notable exception is the DI register, which can (and is, below)
  35 | ;         used pretty freely in assembly code, since the concept of a pointer
  36 | ;         to the current CFA is rather irrelevant in assembly.
  37 | ;
  38 | ;
  39 | ; Structure of an Itsy word definition:
  40 | ;     # of
  41 | ;    Bytes:   Description:
  42 | ;    ------   ---------------------------------------------------------
  43 | ;      2      Link Field. Contains the address of the link field of the
  44 | ;                definition preceding this one in the dictionary. The link
  45 | ;                field of the first def in the dictionary contains 0.
  46 | ;    Varies   Name Field. The first byte of the name field contains the length
  47 | ;                of the name; succeeding bytes contain the ASCII characters of
  48 | ;                the name itself. If the high bit of the length is set, the
  49 | ;                definition is tagged as being an "immediate" word.
  50 | ;      2      Code Field. Contains the address of the executable code for
  51 | ;                the word. For primitives, this will likely be the address
  52 | ;                of the word's own data field. Note that the header creation
  53 | ;                macros automatically generate labels for the code field
  54 | ;                addresses of the words they're used to define, though the
  55 | ;                CFA labels aren't visible in the code shown below. The
  56 | ;                assembler macros create labels, known as "execution tags"
  57 | ;                or XTs, for the code field of each word.
  58 | ;    Varies   Data Field. Contains either a list of the code field addresses
  59 | ;                of the words that make up this definition, or assembly-
  60 | ;                language code for primitives, or numeric data for variables
  61 | ;                 and constants and such.
  62 | 
  63 | 
  64 | ;-----------------------------------------------------------------------------
  65 | ;
  66 | ; Beginning of actual code.
  67 | ;
  68 | ; Include the definitions of the macros that are used in NASM to create
  69 | ; the headers of the words. See macros.asm for more details.
  70 | ;-----------------------------------------------------------------------------
  71 | %include "macros.asm"
  72 | 
  73 | ;-----------------------------------------------------------------------------
  74 | ; Define the location for the stack. -256 decimal = 0ff00h
  75 | ;-----------------------------------------------------------------------------
  76 | stack0  equ -256
  77 | 
  78 | ;-----------------------------------------------------------------------------
  79 | ; Set the starting point for the executable code. 0100h is the standard
  80 | ; origin for programs running under MS-DOS or its equivalents.
  81 | ;-----------------------------------------------------------------------------
  82 |         org 0100h
  83 | 
  84 | ;-----------------------------------------------------------------------------
  85 | ; Jump to the location of the start of Itsy's initialization code.
  86 | ;-----------------------------------------------------------------------------
  87 |         jmp xt_abort+2
  88 | 
  89 | ; -------------------
  90 | ; System Variables
  91 | ; -------------------
  92 | 
  93 |         ; state - ( -- addr ) true = compiling, false = interpreting
  94 |         variable 'state',state,0
  95 | 
  96 |         ; >in - ( -- addr ) next character in input buffer
  97 |         variable '>in',to_in,0
  98 | 
  99 |         ; #tib - ( -- addr ) number of characters in the input buffer
 100 |         variable '#tib',number_t_i_b,0
 101 | 
 102 |         ; dp - ( -- addr ) first free cell in the dictionary
 103 |         variable 'dp',dp,freemem
 104 | 
 105 |         ; base - ( -- addr ) number base
 106 |         variable 'base',base,10
 107 | 
 108 |         ; last - ( -- addr ) the last word to be defined
 109 |         ; NOTE: The label "final:" must be placed immediately before
 110 |         ; the last word defined in this file. If new words are added,
 111 |         ; make sure they're either added before the "final:" label
 112 |         ; or the "final:" label is moved to the position immediately
 113 |         ; before the last word added.
 114 |         variable 'last',last,final
 115 | 
 116 |         ; tib - ( -- addr ) address of the input buffer
 117 |         constant 'tib',t_i_b,32768
 118 | 
 119 | ; -------------------
 120 | ; Initialisation
 121 | ; -------------------
 122 | 
 123 | ; abort - ( -- ) initialise Itsy then jump to interpret
 124 |         primitive 'abort',abort
 125 |         mov ax,word[val_number_t_i_b]  ; Load AX with the value contained
 126 |                                        ; in the data field of #tib (which
 127 |                                        ; was pre-defined above as 0).
 128 |         mov word[val_to_in],ax         ; Save the same number to >in.
 129 |         xor bp,bp                      ; Clear the bp register, which is going
 130 |                                        ; to be used as the return stack
 131 |                                        ; pointer. Since it'll first be
 132 |                                        ; decremented when a value is pushed
 133 |                                        ; onto it, this means that the first
 134 |                                        ; value pushed onto the return stack
 135 |                                        ; will be stored at 0FFFEh and 0FFFFh,
 136 |                                        ; the very end of memory space, and
 137 |                                        ; the stack will grow downward from
 138 |                                        ; there.
 139 |         mov word[val_state],bp         ; Clear the value of state.
 140 |         mov sp,stack0                  ; Set the stack pointer to the value
 141 |                                        ; defined above.
 142 |         mov si,xt_interpret+2          ; Initialize Itsy's instruction pointer
 143 |                                        ; to the outer interpreter loop.
 144 |         jmp next                       ; Jump to the inner interpreter and
 145 |                                        ; actually start running Itsy.
 146 | 
 147 | ; -------------------
 148 | ; Compilation
 149 | ; -------------------
 150 | 
 151 | ; , - ( x -- ) compile x to the current definition.
 152 | ;    Stores the number on the stack to the memory location currently
 153 | ;    pointed to by dp.
 154 |         primitive ',',comma
 155 |         mov di,word[val_dp] ; Put the value of dp into the DI register.
 156 |         xchg ax,bx          ; Move the top of the stack into AX.
 157 |         stosw               ; Store the 16-bit value in AX directly
 158 |                             ; into the address pointed to by DI, and
 159 |                             ; automatically increment DI in the
 160 |                             ; process.
 161 |         mov word[val_dp],di ; Store the incremented value in DI as the
 162 |                             ; new value for the dictionary pointer.
 163 |         pop bx              ; Pop the new stack top into its proper place.
 164 |         jmp next            ; Go do the next word.
 165 | 
 166 | ; lit - ( -- ) push the value in the cell straight after lit.
 167 | ;   lit is the word that is compiled into a definition when you put a
 168 | ;   "literal" number in a Forth definition. When your word is compiled,
 169 | ;   the CFA of lit gets stored in the definition followed immediately
 170 | ;   by the value of the number you put into the code. At run time, lit
 171 | ;   pushes the value of your number onto the stack.
 172 |         primitive 'lit',lit
 173 |         push bx      ; Push the value in BX to the stack, so that now it'll
 174 |                      ; be 2nd from the top on the stack. The old value is
 175 |                      ; still in BX, though. Now we need to get the new
 176 |                      ; value into BX.
 177 |         lodsw        ; Load into the AX register the 16-bit value pointed
 178 |                      ; to by the SI register (Itsy's instruction pointer,
 179 |                      ; which this op then automatically increments SI by 2).
 180 |                      ; The net result is that we just loaded into AX the
 181 |                      ; 16-bit data immediately following the call to lit,
 182 |                      ; which'll be the data that lit is supposed to load.
 183 |         xchg ax,bx   ; Now swap the contents of the AX and BX registers.
 184 |                      ; lit's data is now in BX, the top of the stack, where
 185 |                      ; we want it. Slick, eh?
 186 |         jmp next     ; Go do the next word.
 187 | 
 188 | ; -------------------
 189 | ; Stack
 190 | ; -------------------
 191 | 
 192 | ; rot - ( x y z -- y z x ) rotate x, y and z.
 193 | ;   Standard Forth word that extracts number 3rd from the top of the stack
 194 | ;   and puts it on the top, effectively rotating the top 3 values.
 195 |         primitive 'rot',rote
 196 |         pop dx       ; Unload "y" from the stack.
 197 |         pop ax       ; Unload "x" from the stack. Remember that "z" is
 198 |                      ; already in BX.
 199 |         push dx      ; Push "y" back onto the stack.
 200 |         push bx      ; Push "z" down into the stack on top of "y".
 201 |         xchg ax,bx   ; Swap "x" into the BX register so that it's now
 202 |                      ; at the top of the stack.
 203 |         jmp next     ; Go do the next word.
 204 | 
 205 | ; drop - ( x -- ) remove x from the stack.
 206 |         primitive 'drop',drop
 207 |         pop bx       ; Pop the 2nd item on the stack into the BX register,
 208 |                      ; writing over the item that was already at the top
 209 |                      ; of the stack in BX. It's that simple.
 210 |         jmp next     ; Go do the next word.
 211 | 
 212 | ; dup - ( x -- x x ) add a copy of x to the stack
 213 |         primitive 'dup',dupe
 214 |         push bx      ; Remember that BX is the top of the stack. Push an
 215 |                      ; extra copy of what's in BX onto the stack.
 216 |         jmp next     ; Go do the next word.
 217 | 
 218 | ; # swap - ( x y -- y x ) exchange x and y
 219 |         primitive 'swap',swap
 220 |         pop ax       ; Pop "x", the number 2nd from the top, into AX.
 221 |         push bx      ; Push "y", the former top of the stack.
 222 |         xchg ax,bx   ; Swap "x" into BX to become the new stack top. We
 223 |                      ; don't care what happens to the value of "y" that
 224 |                      ; ends up in AX because that value is now safely
 225 |                      ; in the stack.
 226 |         jmp next     ; Go do the next word.
 227 | 
 228 | ; -------------------
 229 | ; Maths / Logic
 230 | ; -------------------
 231 | 
 232 | ; + - ( x y -- z) calculate z=x+y then return z
 233 |         primitive '+',plus
 234 |         pop ax       ; Pop the value of "x" off of the stack.
 235 |         add bx,ax    ; Add "x" to the value of "y" that's at the top of the
 236 |                      ; stack in the BX register. The way the opcode is
 237 |                      ; written, the result is left in the BX register,
 238 |                      ; conveniently at the top of the stack.
 239 |         jmp next     ; Go do the next word.
 240 | 
 241 | ; = - ( x y -- flag ) return true if x=y
 242 |         primitive '=',equals
 243 |         pop ax     ; Get the "x" value into a register.
 244 |         sub bx,ax  ; Perform BX-AX (or y-x)and leave result in BX. If x and
 245 |                    ; y are equal, this will result in a 0 in BX. But a zero
 246 |                    ; is a false flag in just about all Forth systems, and we
 247 |                    ; want a TRUE flag if the numbers are equal. So...
 248 |         sub bx,1   ; Subtract 1 from it. If we had a zero before, now we've
 249 |                    ; got a -1 (or 0ffffh), and a carry flag was generated.
 250 |                    ; Any other value in BX will not generate a carry.
 251 |         sbb bx,bx  ; This has the effect of moving the carry bit into the BX
 252 |                    ; register. So, if the numbers were not equal, then the
 253 |                    ; "sub bx,1" didn't generate a carry, so the result will
 254 |                    ; be a 0 in the BX (numbers were not equal, result is
 255 |                    ; false). If the original numbers on the stack were equal,
 256 |                    ; though, then the carry bit was set and then copied
 257 |                    ; into the BX register to act as our true flag.
 258 |                    ; This may seem a bit cryptic, but it produces smaller
 259 |                    ; code and runs faster than a bunch of conditional jumps
 260 |                    ; and immediate loads would.
 261 |         jmp next   ; Go do the next word.
 262 | 
 263 | ; -------------------
 264 | ; Peek and Poke
 265 | ; -------------------
 266 | 
 267 | ; @ - ( addr -- x ) read x from addr
 268 | ; "Fetch", as the name of this word is pronounced, reads a 16-bit number from
 269 | ; a given memory address, the way the Basic "peek" command does, and leaves
 270 | ; it at the top of the stack.
 271 |         primitive '@',fetch
 272 |         mov bx,word[bx]  ; Read the value in the memory address pointed to by
 273 |                          ; the BX register and move that value directly into
 274 |                          ; BX, replacing the address at the top of the stack.
 275 |         jmp next         ; Go do the next word.
 276 | 
 277 | ; ! - ( x addr -- ) store x at addr
 278 | ; Similar to @, ! ("store") writes a value directly to a memory address, like
 279 | ; the Basic "poke" command.
 280 |         primitive '!',store
 281 |         pop word[bx]  ; Okay, this is a bit slick. All in one opcode, we pop
 282 |                       ; the number that's 2nd from the top of the stack
 283 |                       ; (i.e. "x" in the argument list) and send it directly
 284 |                       ; to the memory address pointed to by BX (the address
 285 |                       ; at the top of the stack).
 286 |         pop bx        ; Pop whatever was 3rd from the top of the stack into
 287 |                       ; the BX register to become the new TOS.
 288 |         jmp next      ; Go do the next word.
 289 | 
 290 | ; -------------------
 291 | ; Inner Interpreter
 292 | ; -------------------
 293 | 
 294 | ; This routine is the very heart of the Forth system. After execution, all
 295 | ; Forth words jump to this routine, which pulls up the code field address
 296 | ; of the next word to be executed and then executes it. Note that next
 297 | ; doesn't have a header of its own.
 298 | next    lodsw         ; Load into the AX register the 16-bit value pointed
 299 |                       ; to by the SI register (Itsy's instruction pointer,
 300 |                       ; which this op then automatically increments SI by 2).
 301 |                       ; The net result is that we just loaded into AX the
 302 |                       ; CFA of the next word to be executed and left the
 303 |                       ; instruction pointer pointing to the word that
 304 |                       ; follows the next one.
 305 |         xchg di,ax    ; Move the CFA of the next word into the DI register.
 306 |                       ; We have to do this because the 8086 doesn't have
 307 |                       ; an opcode for "jmp [ax]".
 308 |         jmp word[di]  ; Jump and start executing code at the address pointed
 309 |                       ; to by the value in the DI register.
 310 | 
 311 | ; -------------------
 312 | ; Flow Control
 313 | ; -------------------
 314 | 
 315 | ; 0branch - ( x -- ) jump if x is zero
 316 | ; This is the primitive word that's compiled as the runtime code in
 317 | ; an IF...THEN statement. The number compiled into the word's definition
 318 | ; immediately after 0branch is the address of the word in the definition
 319 | ; that we're branching to. That address gets loaded into the instruction
 320 | ; pointer. In essence, this word sees a false flag (i.e. a zero) and
 321 | ; then jumps over the words that comprise the "do this if true" clause
 322 | ; of an IF...ELSE...THEN statement.
 323 |         primitive '0branch',zero_branch
 324 |         lodsw        ; Load into the AX register the 16-bit value pointed
 325 |                      ; to by the SI register (Itsy's instruction pointer,
 326 |                      ; which this op then automatically increments SI by 2).
 327 |                      ; The net result is that we just loaded into AX the
 328 |                      ; CFA of the next word to be executed and left the
 329 |                      ; instruction pointer pointing to the word that
 330 |                      ; follows the next one.
 331 |         test bx,bx   ; See if there's a 0 at the top of the stack.
 332 |         jne zerob_z  ; If it's not zero, jump.
 333 |         xchg ax,si   ; If the flag is a zero, we want to move the CFA of
 334 |                      ; the word we want to branch to into the Forth
 335 |                      ; instruction pointer. If the TOS was non-zero, the
 336 |                      ; instruction pointer is left still pointing to the CFA
 337 |                      ; of the word that follows the branch reference.
 338 | zerob_z pop bx       ; Throw away the flag and move everything on the stack
 339 |                      ; up by one spot.
 340 |         jmp next     ; Oh, you know what this does by now...
 341 | 
 342 | ; branch - ( addr -- ) unconditional jump
 343 | ; This is one of the pieces of runtime code that's compiled by
 344 | ; BEGIN/WHILE/REPEAT, BEGIN/AGAIN, and BEGIN/UNTIL loops. As with 0branch,
 345 | ; the number compiled into the dictionary immediately after the branch is
 346 | ; the address of the word in the definition that we're branching to.
 347 |         primitive 'branch',branch
 348 |         mov si,word[si]  ; The instruction pointer has already been
 349 |                          ; incremented to point to the address immediately
 350 |                          ; following the branch statement, which means it's
 351 |                          ; pointing to where our branch-to address is
 352 |                          ; stored. This opcode takes the value pointed to
 353 |                          ; by the SI register and loads it directly into
 354 |                          ; the SI, which is used as Forth's instruction
 355 |                          ; pointer.
 356 |         jmp next
 357 | 
 358 | ; execute - ( xt -- ) call the word at xt
 359 |         primitive 'execute',execute
 360 |         mov di,bx     ; Move the jump-to address to the DI register.
 361 |         pop bx        ; Pop the next number on the stack into the TOS.
 362 |         jmp word[di]  ; Jump to the address pointed to by the DI register.
 363 | 
 364 | ; exit - ( -- ) return from the current word
 365 |         primitive 'exit',exit
 366 |         mov si,word[bp]  ; The BP register is used as Itsy's return stack
 367 |                          ; pointer. The value at its top is the address of
 368 |                          ; the instruction being pointed to before the word
 369 |                          ; currently being executed was called. This opcode
 370 |                          ; loads that address into the SI register.
 371 |         inc bp           ; Now we have to increment BP twice to do a manual
 372 |                          ; "pop" of the return stack pointer.
 373 |         inc bp           ; 
 374 |         jmp next         ; We jump to next with the SI now having the address
 375 |                          ; pointing into the word that called the one we're
 376 |                          ; finishing up now. The result is that next will go
 377 |                          ; back into that calling word and pick up where it
 378 |                          ; left off earlier.
 379 | 
 380 | ; -------------------
 381 | ; String
 382 | ; -------------------
 383 | 
 384 | ; count - ( addr -- addr2 len )
 385 | ; count is given the address of a counted string (like the name field of a
 386 | ; word definition in Forth, with the first byte being the number of
 387 | ; characters in the string and immediately followed by the characters
 388 | ; themselves). It returns the length of the string and a pointer to the
 389 | ; first actual character in the string.
 390 |         primitive 'count',count
 391 |         inc bx             ; Increment the address past the length byte so
 392 |                            ; it now points to the actual string.
 393 |         push bx            ; Push the new address onto the stack.
 394 |         mov bl,byte[bx-1]  ; Move the length byte into the lower half of
 395 |                            ; the BX register.
 396 |         mov bh,0           ; Load a 0 into the upper half of the BX reg.
 397 |         jmp next
 398 | 
 399 | ; >number - ( double addr len -- double2 addr2 zero    ) if successful, or
 400 | ;           ( double addr len -- int     addr2 nonzero ) on error.
 401 | ; Convert a string to an unsigned double-precision integer.
 402 | ; addr points to a string of len characters which >number attempts to
 403 | ; convert to a number using the current number base. >number returns
 404 | ; the portion of the string which can't be converted, if any.
 405 | ; Note that, as is standard for most Forths, >number attempts to
 406 | ; convert a number into a double (most Forths also leave it as a double
 407 | ; if they find a decimal point, but >number doesn't check for that) and
 408 | ; that it's called with a dummy double value already on the stack.
 409 | ; On return, if the top of the stack is 0, the number was successfully
 410 | ; converted. If the top of the stack is non-zero, there was an error.
 411 |         primitive '>number',to_number
 412 |                               ; Start out by loading values from the stack
 413 |                               ; into various registers. Remember that the
 414 |                               ; top of the stack, the string length, is
 415 |                               ; already in bx.
 416 |         pop di                ; Put the address into di.
 417 |         pop cx                ; Put the high word of the double value into cx
 418 |         pop ax                ; and the low word of the double value into ax.
 419 | to_numl test bx,bx            ; Test the length byte.
 420 |         je to_numz            ; If the string's length is zero, we're done.
 421 |                               ; Jump to end.
 422 |         push ax               ; Push the contents of ax (low word) so we can
 423 |                               ; use it for other things.
 424 |         mov al,byte[di]       ; Get the next byte in the string.
 425 |         cmp al,'a'            ; Compare it to a lower-case 'a'.
 426 |         jc to_nums            ; "jc", "jump if carry", is a little cryptic.
 427 |                               ; I think a better choice of mnemonic would be
 428 |                               ; "jb", "jump if below", for understanding
 429 |                               ; what's going on here. Jump if the next byte
 430 |                               ; in the string is less than 'a'. If the chr
 431 |                               ; is greater than or equal to 'a', then it may
 432 |                               ; be a digit larger than 9 in a hex number.
 433 |         sub al,32             ; Subtract 32 from the character. If we're
 434 |                               ; converting hexadecimal input, this'll have
 435 |                               ; the effect of converting lower case to
 436 |                               ; upper case.
 437 | to_nums cmp al,'9'+1          ; Compare the character to whatever character
 438 |                               ; comes after '9'.
 439 |         jc to_numg            ; If it's '9' or less, it's possibly a decimal
 440 |                               ; digit. Jump for further testing.
 441 |         cmp al,'A'            ; Compare the character with 'A'.
 442 |         jc to_numh            ; If it's one of those punctuation marks
 443 |                               ; between '9' and 'A', we've got an error.
 444 |                               ; Jump to the end.
 445 |         sub al,7              ; The character is a potentially valid digit
 446 |                               ; for a base larger than 10. Resize it so
 447 |                               ; that 'A' becomes the digit for 11, 'B'
 448 |                               ; signifies a 11, etc.
 449 | to_numg sub al,48             ; Convert the digit to its corresponding
 450 |                               ; number. This op could also have been
 451 |                               ; written as "sub al,'0'"
 452 |         mov ah,0              ; Clear the ah register. The AX reg now
 453 |                               ; contains the numeric value of the new digit.
 454 |         cmp al,byte[val_base] ; Compare the digit's value to the base.
 455 |         jnc to_numh           ; If the digit's value is above or equal to
 456 |                               ; to the base, we've got an error. Jump to end.
 457 |                               ; (I think using "jae" would be less cryptic.)
 458 |                               ; (NASM's documentation doesn't list jae as a
 459 |                               ; valid opcode, but then again, it doesn't
 460 |                               ; list jnc in its opcode list either.)
 461 |         xchg ax,dx            ; Save the digit value in AX by swapping it
 462 |                               ; the contents of DX. (We don't care what's
 463 |                               ; in DX; it's scratchpad.)
 464 |         pop ax                ; Recall the low word of our accumulated
 465 |                               ; double number and load it into AX.
 466 |         push dx               ; Save the digit value. (The DX register
 467 |                               ; will get clobbered by the upcoming mul.)
 468 |         xchg ax,cx            ; Swap the low and high words of our double
 469 |                               ; number. AX now holds the high word, and
 470 |                               ; CX the low.
 471 |         mul word[val_base]    ; 16-bit multiply the high word by the base.
 472 |                               ; High word of product is in DX, low in AX.
 473 |                               ; But we don't need the high word. It's going
 474 |                               ; to get overwritten by the next mul.
 475 |         xchg ax,cx            ; Save the product of the first mul to the CX
 476 |                               ; register and put the low word of our double
 477 |                               ; number back into AX.
 478 |         mul word[val_base]    ; 16-bit multiply the low word of our converted
 479 |                               ; double number by the base, then add the high
 480 |         add cx,dx             ; word of the product to the low word of the
 481 |                               ; first mul (i.e. do the carry).
 482 |         pop dx                ; Recall the digit value, then add it in to
 483 |         add ax,dx             ; the low word of our accumulated double-
 484 |                               ; precision total.
 485 |                               ; NOTE: One might think, as I did at first,
 486 |                               ; that we need to deal with the carry from
 487 |                               ; this operation. But we just multiplied
 488 |                               ; the number by the base, and then added a
 489 |                               ; number that's already been checked to be
 490 |                               ; smaller than the base. In that case, there
 491 |                               ; will never be a carry out from this
 492 |                               ; addition. Think about it: You multiply a
 493 |                               ; number by 10 and get a new number whose
 494 |                               ; lowest digit is a zero. Then you add another
 495 |                               ; number less than 10 to it. You'll NEVER get
 496 |                               ; a carry from adding zero and a number less
 497 |                               ; than 10.
 498 |         dec bx                ; Decrement the length.
 499 |         inc di                ; Inc the address pointer to the next byte
 500 |                               ; of the string we're converting.
 501 |         jmp to_numl           ; Jump back and convert any remaining
 502 |                               ; characters in the string.
 503 | to_numz push ax               ; Push the low word of the accumulated total
 504 |                               ; back onto the stack.
 505 | to_numh push cx               ; Push the high word of the accumulated total
 506 |                               ; back onto the stack.
 507 |         push di               ; Push the string address back onto the stack.
 508 |                               ; Note that the character count is still in
 509 |                               ; BX and is therefore already at the top of
 510 |                               ; the stack. If BX is zero at this point,
 511 |                               ; we've successfully converted the number.
 512 |         jmp next              ; Done. Return to caller.
 513 | 
 514 | ; -----------------------
 515 | ; Terminal Input / Output
 516 | ; -----------------------
 517 | 
 518 | ; accept - ( addr len -- len2 ) read a string from the terminal
 519 | ; accept reads a string of characters from the terminal. The string
 520 | ; is stored at addr and can be up to len characters long.
 521 | ; accept returns the actual length of the string.
 522 |         primitive 'accept',accept
 523 |         pop di        ; Pop the address of the string buffer into DI.
 524 |         xor cx,cx     ; Clear the CX register.
 525 | acceptl call getchar  ; Do the bios call to get a chr from the keyboard.
 526 |         cmp al,8      ; See if it's a backspace (ASCII character 08h).
 527 |         jne acceptn   ; If not, jump for more testing.
 528 |         jcxz acceptb  ; "Jump if CX=0". If the user typed a backspace but
 529 |                       ; there isn't anything in the buffer to erase, jump
 530 |                       ; to the code that'll beep at him to let him know.
 531 |         call outchar  ; User typed a backspace. Go ahead and output it.
 532 |         mov al,' '    ; Then output a space to wipe out the character that
 533 |         call outchar  ; the user had just typed.
 534 |         mov al,8      ; Then output another backspace to put the cursor
 535 |         call outchar  ; back into position to read another character.
 536 |         dec cx        ; We just deleted a character. Now we need to decrement
 537 |         dec di        ; both the counter and the buffer pointer.
 538 |         jmp acceptl   ; Then go back for another character.
 539 | acceptn cmp al,13     ; See if the input chr is a carriage return.
 540 |         je acceptz    ; If so, we're done. jump to the end of the routine.
 541 |         cmp cx,bx     ; Compare current string length to the maximum allowed.
 542 |         jne accepts   ; If the string's not too long, jump.
 543 | acceptb mov al,7      ; User's input is unusable in some way. Send the
 544 |         call outchar  ; BEL chr to make a beep sound to let him know.
 545 |         jmp acceptl   ; Then go back and let him try again.
 546 | accepts stosb         ; Save the input character into the buffer. Note that
 547 |                       ; this opcode automatically increments the pointer
 548 |                       ; in the DI register.
 549 |         inc cx        ; But we have to increment the length counter manually.
 550 |         call outchar  ; Echo the input character back to the display.
 551 |         jmp acceptl   ; Go back for another character.
 552 | acceptz jcxz acceptb  ; If the buffer is empty, beep at the user and go
 553 |                       ; back for more input.
 554 |         mov al,13     ; Send a carriage return to the display...
 555 |         call outchar  ; 
 556 |         mov al,10     ; ...followed by a linefeed.
 557 |         call outchar  ; 
 558 |         mov bx,cx     ; Move the count to the top of the stack.
 559 |         jmp next      ; 
 560 | 
 561 | ; word - ( char -- addr ) parse the next word in the input buffer
 562 | ; word scans the "terminal input buffer" (whose address is given by the
 563 | ; system constant tib) for words to execute, starting at the current
 564 | ; address stored in the input buffer pointer >in. The character on the
 565 | ; stack when word is called is the one that the code will look for as
 566 | ; the separator between words. 999 times out of 1000,; this is going to
 567 | ; be a space.
 568 |         primitive 'word',word
 569 |         mov di,word[val_dp]           ; Load the dictionary pointer into DI.
 570 |                                       ; This is going to be the address that
 571 |                                       ; we copy the input word to. For the
 572 |                                       ; sake of tradition, let's call this
 573 |                                       ; scratchpad area the "pad".
 574 |         push di                       ; Save the pad pointer to the stack.
 575 |         mov dx,bx                     ; Copy the word separator to DX.
 576 |         mov bx,word[val_t_i_b]        ; Load the address of the input buffer
 577 |         mov cx,bx                     ; into BX, and save a copy to CX.
 578 |         add bx,word[val_to_in]        ; Add the value of >in to the address
 579 |                                       ; of tib to get a pointer into the
 580 |                                       ; buffer.
 581 |         add cx,word[val_number_t_i_b] ; Add the value of #tib to the address
 582 |                                       ; of tib to get a pointer to the last
 583 |                                       ; chr in the input buffer.
 584 | wordf   cmp cx,bx                     ; Compare the current buffer pointer to
 585 |                                       ; the end-of-buffer pointer.
 586 |         je wordz                      ; If we've reached the end, jump.
 587 |         mov al,byte[bx]               ; Get the next chr from the buffer
 588 |         inc bx                        ; and increment the pointer.
 589 |         cmp al,dl                     ; See if it's the separator.
 590 |         je wordf                      ; If so, jump.
 591 | wordc   inc di                        ; Increment our pad pointer. Note that
 592 |                                       ; if this is our first time through the
 593 |                                       ; routine, we're incrementing to the
 594 |                                       ; 2nd address in the pad, leaving the
 595 |                                       ; first byte of it empty.
 596 |         mov byte[di],al               ; Write the new chr to the pad.
 597 |         cmp cx,bx                     ; Have we reached the end of the
 598 |                                       ; input buffer?
 599 |         je wordz                      ; If so, jump.
 600 |         mov al,byte[bx]               ; Get another byte from the input
 601 |         inc bx                        ; buffer and increment the pointer.
 602 |         cmp al,dl                     ; Is the new chr a separator?
 603 |         jne wordc                     ; If not, go back for more.
 604 | wordz   mov byte[di+1],32             ; Write a space at the end of the text
 605 |                                       ; we've written so far to the pad.
 606 |         mov ax,word[val_dp]           ; Load the address of the pad into AX.
 607 |         xchg ax,di                    ; Swap the pad address with the pad
 608 |         sub ax,di                     ; pointer then subtract to get the
 609 |                                       ; length of the text in the pad.
 610 |                                       ; The result goes into AX, leaving the
 611 |                                       ; pad address in DI.
 612 |         mov byte[di],al               ; Save the length byte into the first
 613 |                                       ; byte of the pad.
 614 |         sub bx,word[val_t_i_b]        ; Subtract the base address of the
 615 |                                       ; input buffer from the pointer value
 616 |                                       ; to get the new value of >in...
 617 |         mov word[val_to_in],bx        ; ...then save it to its variable.
 618 |         pop bx                        ; Pop the value of the pad address
 619 |                                       ; that we saved earlier back out to
 620 |                                       ; the top of the stack as our return
 621 |                                       ; value.
 622 |         jmp next
 623 | 
 624 | ; emit - ( char -- ) display char on the terminal
 625 |         primitive 'emit',emit
 626 |         xchg ax,bx    ; Move our output character to the AX register.
 627 |         call outchar  ; Send it to the display.
 628 |         pop bx        ; Pop the argument off the stack.
 629 |         jmp next
 630 | 
 631 | getchar mov ah,7  ; This headerless routine does an MS-DOS Int 21h call,
 632 |         int 021h  ; reading a character from the standard input device into
 633 |         mov ah,0  ; the AL register. We start out by putting a 7 into AH to
 634 |         ret       ; identify the function we want to perform. The character
 635 |                   ; gets returned in AL, and then we manually clear out
 636 |                   ; AH so that we can have a 16-bit result in AX.
 637 | 
 638 | outchar xchg ax,dx  ; This headerless routine does an MS-DOS Int 21h call,
 639 |         mov ah,2    ; sending a character in the DL register to the standard
 640 |         int 021h    ; output device. The 2 in the AH register identifies what
 641 |         ret         ; function we want to perform.
 642 | 
 643 | ; -----------------------
 644 | ; Dictionary Search
 645 | ; -----------------------
 646 | 
 647 | ; find - ( addr -- addr2 flag ) look up word in the dictionary
 648 | ; find looks in the Forth dictionary for a word with the name given in the
 649 | ; counted string at addr. One of the following will be returned:
 650 | ;   flag =  0, addr2 = counted string --> word was not found
 651 | ;   flag =  1, addr2 = call address   --> word is immediate
 652 | ;   flag = -1, addr2 = call address   --> word is not immediate
 653 |         primitive 'find',find
 654 |         mov di,val_last    ; Get the address of the link field of the last
 655 |                            ; word in the dictionary. Put it in DI.
 656 | findl   push di            ; Save the link field pointer.
 657 |         push bx            ; Save the address of the name we're looking for.
 658 |         mov cl,byte[bx]    ; Copy the length of the string into CL
 659 |         mov ch,0           ; Clear CH to make a 16 bit counter.
 660 |         inc cx             ; Increment the counter.
 661 | findc   mov al,byte[di+2]  ; Get the length byte of whatever word in the
 662 |                            ; dictionary we're currently looking at.
 663 |         and al,07Fh        ; Mask off the immediate bit.
 664 |         cmp al,byte[bx]    ; Compare it with the length of the string.
 665 |         je findm           ; If they're the same, jump.
 666 |         pop bx             ; Nope, can't be the same if the lengths are
 667 |         pop di             ; different. Pop the saved values back to regs.
 668 |         mov di,word[di]    ; Get the next link address.
 669 |         test di,di         ; See if it's zero. If it's not, then we've not
 670 |         jne findl          ; hit the end of the dictionary yet. Then jump
 671 |                            ; back and check the next word in the dictionary.
 672 | findnf  push bx            ; End of dictionary. Word wasn't found. Push the
 673 |                            ; string address to the stack.
 674 |         xor bx,bx          ; Clear the BX register (make a "false" flag).
 675 |         jmp next           ; Return to caller.
 676 | findm   inc di             ; The lengths match, but do the chrs? Increment
 677 |                            ; the link field pointer. (That may sound weird,
 678 |                            ; especially on the first time through this loop.
 679 |                            ; But remember that, earlier in the loop, we
 680 |                            ; loaded the length byte out the dictionary by an
 681 |                            ; indirect reference to DI+2. We'll do that again
 682 |                            ; in a moment, so what in effect we're actually
 683 |                            ; doing here is incrementing what's now going to
 684 |                            ; be treated as a string pointer for the name in
 685 |                            ; the dictionary as we compare the characters
 686 |                            ; in the strings.)
 687 |         inc bx             ; Increment the pointer to the string we're
 688 |                            ; checking.
 689 |         loop findc         ; Decrements the counter in CX and, if it's not
 690 |                            ; zero yet, loops back. The same code that started
 691 |                            ; out comparing the length bytes will go through
 692 |                            ; and compare the characters in the string with
 693 |                            ; the chrs in the dictionary name we're pointing
 694 |                            ; at.
 695 |         pop bx             ; If we got here, then the strings match. The
 696 |                            ; word is in the dictionary. Pop the string's
 697 |                            ; starting address and throw it away. We don't
 698 |                            ; need it now that we know we're looking at a
 699 |                            ; defined word.
 700 |         pop di             ; Restore the link field address for the dictionary
 701 |                            ; word whose name we just looked at.
 702 |         mov bx,1           ; Put a 1 at the top of the stack.
 703 |         inc di             ; Increment the pointer past the link field to the
 704 |         inc di             ; name field.
 705 |         mov al,byte[di]    ; Get the length of the word's name.
 706 |         test al,080h       ; See if it's an immediate.
 707 |         jne findi          ; "test" basically performs an AND without
 708 |                            ; actually changing the register. If the
 709 |                            ; immediate bit is set, we'll have a non-zero
 710 |                            ; result and we'll skip the next instruction,
 711 |                            ; leaving a 1 in BX to represent that we found
 712 |                            ; an immediate word.
 713 |         neg bx             ; But if it's not an immediate word, we fall
 714 |                            ; through and generate a -1 instead to get the
 715 |                            ; flag for a non-immediate word.
 716 | findi   and ax,31          ; Mask off all but the valid part of the name's
 717 |                            ; length byte.
 718 |         add di,ax          ; Add the length to the name field address then
 719 |         inc di             ; add 1 to get the address of the code field.
 720 |         push di            ; Push the CFA onto the stack.
 721 |         jmp next           ; We're done.
 722 | 
 723 | ; -----------------------
 724 | ; Colon Definition
 725 | ; -----------------------
 726 | 
 727 | ; : - ( -- ) define a new Forth word, taking the name from the input buffer.
 728 | ; Ah! We've finally found a word that's actually defined as a Forth colon
 729 | ; definition rather than an assembly language routine! Partly, anyway; the
 730 | ; first part is Forth code, but the end is the assembly language run-time
 731 | ; routine that, incidentally, executes Forth colon definitions. Notice that
 732 | ; the first part is not a sequence of opcodes, but rather is a list of
 733 | ; code field addresses for the words used in the definition. In each code
 734 | ; field of each defined word is an "execution tag", or "xt", a pointer to
 735 | ; the runtime code that executes the word. In a Forth colon definition, this
 736 | ; is going to be a pointer to the docolon routine we see in the second part
 737 | ; of the definition of colon itself below.
 738 |         colon ':',colon
 739 |         dw xt_lit,-1       ; If you write a Forth routine where you put an
 740 |                            ; integer number right in the code, such as the
 741 |                            ; 2 in the phrase, "dp @ 2 +", lit is the name
 742 |                            ; of the routine that's called at runtime to put
 743 |                            ; that integer on the stack. Here, lit pushes
 744 |                            ; the -1 stored immediately after it onto the
 745 |                            ; stack.
 746 |         dw xt_state        ; The runtime code for a variable leaves its
 747 |                            ; address on the stack. The address of state,
 748 |                            ; in this case.
 749 |         dw xt_store        ; Store that -1 into state to tell the system
 750 |                            ; that we're switching from interpret mode into
 751 |                            ; compile mode. Other than creating the header,
 752 |                            ; colon doesn't actually compile the words into
 753 |                            ; the new word. That task is performed in
 754 |                            ; interpret, but it needs this new value stored
 755 |                            ; into state to tell it to do so.
 756 |         dw xt_create       ; Now we call the word that's going to create the
 757 |                            ; header for the new colon definition we're going
 758 |                            ; to compile.
 759 |         dw xt_do_semi_code ; Write, into the code field of the header we just
 760 |                            ; created, the address that immediately follows
 761 |                            ; this statement: the address of the docolon
 762 |                            ; routine, which is the code that's responsible
 763 |                            ; for executing the colon definition we're
 764 |                            ; creating.
 765 | docolon dec bp             ; Here's the runtime code for colon words.
 766 |         dec bp             ; Basically, what docolon does is similar to
 767 |                            ; calling a subroutine, in that we have to push
 768 |                            ; the return address to the stack. Since the 80x86
 769 |                            ; doesn't directly support more than one stack and
 770 |                            ; the "real" stack is used for data, we have to
 771 |                            ; operate the Forth virtual machine's return stack
 772 |                            ; manually. So, first, we manually decrement the
 773 |                            ; return stack pointer twice to point to where
 774 |                            ; we're going to save the return address.
 775 |         mov word[bp],si    ; Then we write that address directly from the
 776 |                            ; instruction pointer to that location.
 777 |         lea si,[di+2]      ; We now have to tell Forth to start running the
 778 |                            ; words in the colon definition we just started.
 779 |                            ; The value in DI was left pointing at the code
 780 |                            ; field of the word that we just started that just
 781 |                            ; jumped into docolon. By loading into the
 782 |                            ; instruction pointer the value that's 2 bytes
 783 |                            ; later, at the start of the data field, we're
 784 |                            ; loading into the IP the address of the first
 785 |                            ; word in that definition. Execution of the other
 786 |                            ; words in that definition will occur in sequence
 787 |                            ; from here on.
 788 |         jmp next           ; Now that we're pointing to the correct
 789 |                            ; instruction, go do it.
 790 | 
 791 | ; ; - ( -- ) complete the Forth word being compiled
 792 |         colon ';',semicolon,immediate
 793 |                            ; Note above that ; is immediate, the first such
 794 |                            ; word we've seen here. It needs to be so because
 795 |                            ; it's used only during the compilation of a colon
 796 |                            ; definition and we want it to execute rather than
 797 |                            ; just being stored in the definition.
 798 |         dw xt_lit,xt_exit  ; Put the address of the code field of exit onto
 799 |                            ; the stack.
 800 |         dw xt_comma        ; Store it into the dictionary.
 801 |         dw xt_lit,0        ; Now put a zero on the stack...
 802 |         dw xt_state        ; along with the address of the state variable.
 803 |         dw xt_store        ; Store the 0 into state to indicate that we're
 804 |                            ; done compiling a word and are now back into
 805 |                            ; interpret mode.
 806 |         dw xt_exit         ; exit is the routine that finishes up the
 807 |                            ; execution of a colon definition and jumps to
 808 |                            ; next in order to start execution of the next
 809 |                            ; word.
 810 | 
 811 | ; -----------------------
 812 | ; Headers
 813 | ; -----------------------
 814 | 
 815 | ; create - ( -- ) build a header for a new word in the dictionary, taking
 816 | ; the name from the input buffer
 817 |         colon 'create',create
 818 |         dw xt_dp,xt_fetch   ; Get the current dictionary pointer.
 819 |         dw xt_last,xt_fetch ; Get the LFA of the last word in the dictionary.
 820 |         dw xt_comma         ; Save the value of last at the current point in
 821 |                             ; the dictionary to become the link field for
 822 |                             ; the header we're creating. Remember that comma
 823 |                             ; automatically increments the value of dp.
 824 |         dw xt_last,xt_store ; Save the address of the link field we just
 825 |                             ; created as the new value of last.
 826 |         dw xt_lit,32        ; Parse the input buffer for the name of the
 827 |         dw xt_word          ; word we're creating, using a space for the
 828 |                             ; separation character when we invoke word.
 829 |                             ; Remember that word copies the parsed name
 830 |                             ; as a counted string to the location pointed
 831 |                             ; to by dp, which not coincidentally is
 832 |                             ; exactly what and where we need it for the
 833 |                             ; header we're creating.
 834 |         dw xt_count         ; Get the address of the first character of the
 835 |                             ; word's name, and the name's length.
 836 |         dw xt_plus          ; Add the length to the address to get the addr
 837 |                             ; of the first byte after the name, then store
 838 |         dw xt_dp,xt_store   ; that address as the new value of dp.
 839 |         dw xt_lit,0         ; Put a 0 on the stack, and store it as a dummy
 840 |         dw xt_comma         ; placeholder in the new header's CFA.
 841 |         dw xt_do_semi_code  ; Write, into the code field of the header we just
 842 |                             ; created, the address that immediately follows
 843 |                             ; this statement: the address of the dovar
 844 |                             ; routine, which is the code that's responsible
 845 |                             ; for pushing onto the stack the data field
 846 |                             ; address of the word whose header we just
 847 |                             ; created when it's executed.
 848 | dovar   push bx             ; Push the stack to make room for the new value
 849 |                             ; we're about to put on top.
 850 |         lea bx,[di+2]       ; This opcode loads into bx whatever two plus the
 851 |                             ; value of the contents of DI might be, as opposed
 852 |                             ; to a "mov bx,[di+2]", which would move into BX
 853 |                             ; the value stored in memory at that location.
 854 |                             ; What we're actually doing here is calculating
 855 |                             ; the address of the data field that follows
 856 |                             ; this header so we can leave it on the stack.
 857 |         jmp next            ; 
 858 | 
 859 | ; # (;code) - ( -- ) replace the xt of the word being defined with a pointer
 860 | ; to the code immediately following (;code)
 861 | ; The idea behind this compiler word is that you may have a word that does
 862 | ; various compiling/accounting tasks that are defined in terms of Forth code
 863 | ; when its being used to compile another word, but afterward, when the new
 864 | ; word is executed in interpreter mode, you want your compiling word to do
 865 | ; something else that needs to be coded in assembly. (;code) is the word that
 866 | ; says, "Okay, that's what you do when you're compiling, but THIS is what
 867 | ; you're going to do while executing, so look sharp, it's in assembly!"
 868 | ; Somewhat like the word DOES>, which is used in a similar manner to define
 869 | ; run-time code in terms of Forth words.
 870 |         primitive '(;code)',do_semi_code
 871 |         mov di,word[val_last] ; Get the LFA of the last word in dictionary
 872 |                               ; (i.e. the word we're currently in the middle
 873 |                               ; of compiling) and put it in DI. 
 874 |         mov al,byte[di+2]     ; Get the length byte from the name field.
 875 |         and ax,31             ; Mask off the immediate bit and leave only
 876 |                               ; the 5-bit integer length.
 877 |         add di,ax             ; Add the length to the pointer. If we add 3
 878 |                               ; to the value in DI at this point, we'll
 879 |                               ; have a pointer to the code field.
 880 |         mov word[di+3],si     ; Store the current value of the instruction
 881 |                               ; pointer into the code field. That value is
 882 |                               ; going to point to whatever follows (;code) in
 883 |                               ; the word being compiled, which in the case
 884 |                               ; of (;code) had better be assembly code.
 885 |         mov si,word[bp]       ; Okay, we just did something funky with the
 886 |                               ; instruction pointer; now we have to fix it.
 887 |                               ; Directly load into the instruction pointer
 888 |                               ; the value that's currently at the top of
 889 |                               ; the return stack.
 890 |         inc bp                ; Then manually increment the return stack
 891 |         inc bp                ; pointer.
 892 |         jmp next              ; Done. Go do another word.
 893 | 
 894 | ; -----------------------
 895 | ; Constants
 896 | ; -----------------------
 897 | 
 898 | ; constant - ( x -- ) create a new constant with the value x, taking the name
 899 | ; from the input buffer
 900 |         colon 'constant',constant
 901 |         dw xt_create       ; Create the constant's header.
 902 |         dw xt_comma        ; Store the constant's value into the word's
 903 |                            ; data field.
 904 |         dw xt_do_semi_code ; Write, into the code field of the header we just
 905 |                            ; created, the address that immediately follows
 906 |                            ; this statement: the address of the doconst
 907 |                            ; routine, which is the code that's responsible
 908 |                            ; for pushing onto the stack the value that's
 909 |                            ; contained in the data field of the word whose
 910 |                            ; header we just created when that word is
 911 |                            ; invoked.
 912 | doconst push bx            ; Push the stack down.
 913 |         mov bx,word[di+2]  ; DI should be pointing to the constant's code
 914 |                            ; field. Load into the top of the stack the
 915 |                            ; value 2 bytes further down from the code field,
 916 |                            ; i.e. the constant's actual value.
 917 |         jmp next           ; 
 918 | 
 919 | 
 920 | ; -----------------------
 921 | ; Outer Interpreter
 922 | ; -----------------------
 923 | 
 924 | ; -------------------------------------------------------
 925 | ; NOTE! The following line with the final: label MUST be
 926 | ; immediately before the final word definition!
 927 | ; -------------------------------------------------------
 928 | 
 929 | final:
 930 | 
 931 |         colon 'interpret',interpret
 932 | interpt dw xt_number_t_i_b  ; Get the number of characters in the input
 933 |         dw xt_fetch         ; buffer.
 934 |         dw xt_to_in         ; Get the index into the input buffer.
 935 |         dw xt_fetch         ; 
 936 |         dw xt_equals        ; See if they're the same.
 937 |         dw xt_zero_branch   ; If not, it means there's still some text in
 938 |         dw intpar           ; the buffer. Go process it.
 939 |         dw xt_t_i_b         ; if #tib = >in, we're out of text and need to
 940 |         dw xt_lit           ; read some more. Put a 50 on the stack to tell
 941 |         dw 50               ; accept to read up to 50 more characters.
 942 |         dw xt_accept        ; Go get more input.
 943 |         dw xt_number_t_i_b  ; Store into #tib the actual number of characters
 944 |         dw xt_store         ; that accept read.
 945 |         dw xt_lit           ; Reposition >in to index the 0th byte in the
 946 |         dw 0                ; input buffer.
 947 |         dw xt_to_in         ; 
 948 |         dw xt_store         ; 
 949 | intpar  dw xt_lit           ; Put a 32 on the stack to represent an ASCII
 950 |         dw 32               ; space character. Then tell word to scan the
 951 |         dw xt_word          ; buffer looking for that character.
 952 |         dw xt_find          ; Once word has parsed out a string, have find
 953 |                             ; see if that string matches the name of any
 954 |                             ; words already defined in the dictionary.
 955 |         dw xt_dupe          ; Copy the flag returned by find, then jump if
 956 |         dw xt_zero_branch   ; it's a zero, meaning that the string doesn't
 957 |         dw intnf            ; match any defined word names.
 958 |         dw xt_state         ; We've got a word match. Are we interpreting or
 959 |         dw xt_fetch         ; do we want to compile it? See if find's flag
 960 |         dw xt_equals        ; matches the current value of state.
 961 |         dw xt_zero_branch   ; If so, we've got an immediate. Jump.
 962 |         dw intexc           ; 
 963 |         dw xt_comma         ; Not immediate. Store the word's CFA in the
 964 |         dw xt_branch        ; dictionary then jump to the end of the loop.
 965 |         dw intdone          ; 
 966 | intexc  dw xt_execute       ; We found an immediate word. Execute it then
 967 |         dw xt_branch        ; jump to the end of the loop.
 968 |         dw intdone          ; 
 969 | intnf   dw xt_dupe          ; Okay, it's not a word. Is it a number? Copy
 970 |                             ; the flag, which we've already proved is 0,
 971 |                             ; thereby creating a double-precision value of
 972 |                             ; 0 at the top of the stack. We'll need this
 973 |                             ; shortly when we call >number.
 974 |         dw xt_rote          ; Rotate the string's address to the top of
 975 |                             ; the stack. Note that it's still a counted
 976 |                             ; string.
 977 |         dw xt_count         ; Use count to split the string's length byte
 978 |                             ; apart from its text.
 979 |         dw xt_to_number     ; See if we can convert the text into a number.
 980 |         dw xt_zero_branch   ; If we get a 0 from 0branch, we got a good
 981 |         dw intskip          ; conversion. Jump and continue.
 982 |         dw xt_state         ; We had a conversion error. Find out whether
 983 |         dw xt_fetch         ; we're interpreting or compiling.
 984 |         dw xt_zero_branch   ; If state=0, we're interpreting. Jump
 985 |         dw intnc            ; further down.
 986 |         dw xt_last          ; We're compiling. Shut the compiler down in an
 987 |         dw xt_fetch         ; orderly manner. Get the LFA of the word we
 988 |         dw xt_dupe          ; were trying to compile. Set aside a copy of it,
 989 |         dw xt_fetch         ; then retrieve from it the LFA of the old "last
 990 |         dw xt_last          ; word" and resave that as the current last word.
 991 |         dw xt_store         ; 
 992 |         dw xt_dp            ; Now we have to save the LFA of the word we just
 993 |         dw xt_store         ; tried to compile back into the dictionary
 994 |                             ; pointer.
 995 | intnc   dw xt_abort         ; Whether we were compiling or interpreting,
 996 |                             ; either way we end up here if we had an
 997 |                             ; unsuccessful number conversion. Call abort
 998 |                             ; and reset the system.
 999 | intskip dw xt_drop          ; >number was successful! Drop the address and
1000 |         dw xt_drop          ; the high word of the double-precision numeric
1001 |                             ; value it returned. We don't need either. What's
1002 |                             ; left on the stack is the single-precision
1003 |                             ; number we just converted.
1004 |         dw xt_state         ; Are we compiling or interpreting?
1005 |         dw xt_fetch         ; 
1006 |         dw xt_zero_branch   ; If we're interpreting, jump on down.
1007 |         dw intdone          ; 
1008 |         dw xt_lit           ; No, John didn't stutter here. These 4 lines are
1009 |         dw xt_lit           ; how "['] lit , ," get encoded. We need to store
1010 |         dw xt_comma         ; lit's own CFA into the word, followed by the
1011 |         dw xt_comma         ; number we just converted from text input.
1012 | intdone dw xt_branch        ; Jump back to the beginning of the interpreter
1013 |         dw interpt          ; loop and process more input.
1014 | 
1015 | freemem:
1016 | 
1017 | ; That's it! So, there you have it! Only 33 named Forth words...
1018 | ;
1019 | ;     ,  @   >in  dup   base  word   abort   0branch   interpret
1020 | ;     +  !   lit  swap  last  find   create  constant  (;code)
1021 | ;     =  ;   tib  drop  emit  state  accept  >number
1022 | ;     :  dp  rot  #tib  exit  count  execute
1023 | ;
1024 | ; ...plus 6 pieces of headerless code and run-time routines...
1025 | ;
1026 | ;     getchar  outchar  docolon  dovar  doconst  next
1027 | ;
1028 | ; ...are all that's required to produce a functional Forth interpreter
1029 | ; capable of compiling colon definitions, only 978 bytes long! Granted,
1030 | ; it's lacking a number of key critical words that make it nigh unto
1031 | ; impossible to do anything useful, but this just goes to show just
1032 | ; how small a functioning Forth system can be made.
1033 | 
1034 | 


--------------------------------------------------------------------------------
/msdos/macros.asm:
--------------------------------------------------------------------------------
  1 | ; Itsy Forth - Macros
  2 | ;   Written by John Metcalf
  3 | ;   Commentary by Mike Adams
  4 | ;
  5 | ; Itsy Forth was written for use with NASM, the "Netwide Assembler"
  6 | ; (http://www.nasm.us/). It uses a number of macros to deal with the tedium
  7 | ; of generating the headers for the words that are defined in Itsy's source
  8 | ; code file. The macros, and the explanations of what they're doing, are
  9 | ; listed below:
 10 | 
 11 | ;--------------------------------------------------------------------------
 12 | ; First, two variables are defined for use by the macros:
 13 |         ; link is the initial value for the first link field that'll
 14 |         ; be defined. It's value will be updated with each header
 15 |         ; that's created.
 16 |         %define link 0
 17 | 
 18 |         ; A bitmask that'll be called "immediate" will be used to
 19 |         ; encode the flag into the length bytes of word names in order
 20 |         ; to indicate that the word will be of the immediate type.
 21 |         %define immediate 080h
 22 | 
 23 | ;--------------------------------------------------------------------------
 24 | ; The first macro defined is the primary one used by the others, "head".
 25 | ; It does the lion's share of the work for the other macros that'll be
 26 | ; defined afterwards. Its commands perform the following operations:
 27 | 
 28 |         ; The first line of the macro declares it's name as "head".
 29 |         ; The 4 in this line signifies that it expects to receive
 30 |         ; 4 parameters when it's invoked: the string that will be the
 31 |         ; word's name and will be encoded into the header along with
 32 |         ; the string's name; an "execution tag" name that will have the
 33 |         ; prefix "xt_" attached to it and will be used as a label for
 34 |         ; the word's code field; a flag that will be 080h if the word
 35 |         ; will be immediate and a 0 otherwise; and the label for the
 36 |         ; word's runtime code, whose address will be put into the
 37 |         ; word's code field.
 38 |         %macro head 4
 39 | 
 40 |         ; Okay, what we're doing in this odd-looking bit of code is
 41 |         ; declaring a variable called "%%link" that's local only to this
 42 |         ; macro and is independent of the earlier variable we declared
 43 |         ; as "link". It's a label that will represent the current
 44 |         ; location in the object code we're creating. Then we lay down
 45 |         ; some actual object code, using the "dw" command to write the
 46 |         ; current value of "link" into the executable file.
 47 |         %%link dw link
 48 | 
 49 |         ; Here's one of the tricky parts. We now redefine the value of
 50 |         ; "link" to be whatever the current value of "%%link" is, which
 51 |         ; is basically the address of the link field that was created
 52 |         ; during this particular use of this macro. That way, the next
 53 |         ; time head is called, the value that will be written into the
 54 |         ; code in the "dw" command above will be whatever the value of
 55 |         ; "%%link" was during THIS use of the macro. This way, each time
 56 |         ; head is called, the value that'll be written into the new
 57 |         ; link field will be the address that was used for the link
 58 |         ; field the previous time head was called, which is just how
 59 |         ; we want the link fields to be in a Forth dictionary. Note that
 60 |         ; the first time that head is called, the value of link was
 61 |         ; predefined as 0, so that the link field of the first word in
 62 |         ; the dictionary will contain the value of 0 to mark it as
 63 |         ; being the first word in the dictionary.
 64 |         %define link %%link
 65 | 
 66 |         ; Now the name field. The first argument passed to head is the
 67 |         ; string defining the new word's name. The next line in the macro
 68 |         ; measures the length of the string (the "%1" tells it that it's
 69 |         ; supposed to look at argument #1) and assigns it to a macro-local
 70 |         ; variable called "%%count".
 71 |         %strlen %%count %1
 72 | 
 73 |         ; In this next line, we're writing data into the object code on
 74 |         ; a byte-by-byte basis. We first write a byte consisting of the
 75 |         ; value of argument 3 (which is 080h if we're writing the header
 76 |         ; for an immediate word or a 0 otherwise) added to the length of
 77 |         ; the name string to produce the length byte in the header. Then
 78 |         ; we write the name string itself into the file.
 79 |         db %3 + %%count,%1
 80 | 
 81 |         ; Okay, don't get confused by the "+" in this next line. Take
 82 |         ; careful note of the spaces; the actual command is "%+", which
 83 |         ; is string concatenation, not numeric addition. We're going to
 84 |         ; splice a string together. The first part consists of the "xt_",
 85 |         ; then we splice the macro's 2nd argument onto it. The resulting
 86 |         ; string is used as the head's "execution tag", the address of
 87 |         ; it's code field. This label is then used for the "dw" command
 88 |         ; that writes the value of argument #4 (the address of the word's
 89 |         ; runtime code) into the header's code field.
 90 |         xt_ %+ %2 dw %4
 91 | 
 92 |         ; As you might guess, the next line marks the end of the
 93 |         ; macro's definition. The entire header's been defined at this
 94 |         ; point, and we're now ready for the data field, whether it's
 95 |         ; composed of assembly code, a list of Forth words, or the
 96 |         ; numeric data for a variable or constant.
 97 |         %endmacro
 98 | 
 99 | ; For example, calling head with the following line:
100 | ;
101 | ;      head,'does>',does,080h,docolon
102 | ;
103 | ; will produce the following header code...
104 | ;
105 | ;               dw (address of link of previous header)
106 | ;               db 085h,'does>'
107 | ;      xt_does  dw docolon
108 | ;
109 | ; ...and records the address of this header's link field so that it can
110 | ; be written into the link field of the next word, just as the address
111 | ; of the previous link field was written into this header.
112 | ; This method saves the programmer a lot of tedium in manually generating
113 | ; the code for word headers when writing a Forth system's kernel in
114 | ; assembly language. Note that argument #2 is surrounded by single quotes.
115 | ; That's the format that the assembler expects to see when being told to
116 | ; lay down a string of characters byte-by-byte in a db command, so they
117 | ; have to be present when they're given as an arg to this macro so that
118 | ; the macro puts them in their proper place.
119 | 
120 | ;--------------------------------------------------------------------------
121 | ; The next macro is called "primitive", and is used for setting up a header
122 | ; for a word written in assembly language.
123 | ;
124 |         ; Here we declare the definition of the macro called "primitive".
125 |         ; Note, though, the odd manner in which the number of required
126 |         ; arguments is stated. Yes, that really does mean that it can
127 |         ; take from 2 to 3 arguments. Well, what does it do if the user
128 |         ; only gives it 2? That's what that 0 is: the default value that's
129 |         ; to be used for argument #3 if the user doesn't specify it. Most
130 |         ; of the time he won't; the only time arg #3 will be specifically
131 |         ; given will be if the user is defining an immediate word.
132 |         %macro primitive 2-3 0
133 | 
134 |         ; All primitive does is to pass its arguments on to head, which
135 |         ; does most of the actual work. It passes on the word name and
136 |         ; the execution tag name as-is. Parameter #3 will be given the
137 |         ; default value of 0 unless the user specifically states it.
138 |         ; This is meant to allow the user to add "immediate" to the
139 |         ; macro invocation to create an immediate word. The 4th arg,
140 |         ; "$+2", means that when head goes to write the address of the
141 |         ; run-time code into the code field, the address it's going to
142 |         ; use will be 2 bytes further along than the code field address,
143 |         ; i.e. the address of the start of the code immediately after
144 |         ; the code field. (The "$" symbol is used by most assemblers
145 |         ; to represent the address of the code that's currently being
146 |         ; assembled.)
147 |         head %1,%2,%3,$+2
148 | 
149 |         ; End of the macro definition.
150 |         %endmacro
151 | 
152 | ;--------------------------------------------------------------------------
153 | ; The macro "colon" operates very similarly to "primitive", except that
154 | ; it's used for colon definitions:
155 | ;
156 |         ; Declare the macro, with 2 to 3 arguments, using 0 for the default
157 |         ; value of arg #3 if one isn't specifically given.
158 |         %macro colon 2-3 0
159 | 
160 |         ; Pass the args on to head, using docolon as the runtime code.
161 |         head %1,%2,%3,docolon
162 | 
163 |         ; End of macro definition.
164 |         %endmacro
165 | 
166 | ;--------------------------------------------------------------------------
167 | ; The rest of the macros all require a specific number of arguments, since
168 | ; none of them have the option of being immediate. This one defines
169 | ; a constant:
170 | 
171 |         ; Macro name is, unsurprisingly, "constant", and gets 3 arguments.
172 |         ; As with head and primitive, the first 2 are the word's name and
173 |         ; the label name that'll be used for the word. The third argument
174 |         ; is the value that we want the constant to hold.
175 |         %macro constant 3
176 | 
177 |         ; Use the head macro. Args 1 and 2, the names, get passed on as-is.
178 |         ; Constants are never defined as immediate (though it's an intriguing
179 |         ; idea; a constant whose value is one thing when compiling and
180 |         ; another when interpreting might be useful for something), so arg #3
181 |         ; passed on to head is always a 0, and arg #4 will always be doconst,
182 |         ; the address of the runtime code for constants.
183 |         head %1,%2,0,doconst
184 | 
185 |         ; Similar to the way that the label is created for the execution
186 |         ; tags, here we create a label for the data field of the constant,
187 |         ; though this time we're prefixing the name with "val_" instead
188 |         ; of the "xt_" used for the execution tags. Then we use a dw to
189 |         ; write constant's arg #3, the constant's value, into the code.
190 |         val_ %+ %2 dw %3
191 | 
192 |         ; End of the definition.
193 |         %endmacro
194 | 
195 | ;--------------------------------------------------------------------------
196 | ; The macro for variables is very similar to the one for constants.
197 | 
198 |         ; Macro name "variable", 3 arguments, with arg #3 being the
199 |         ; initial value that will be given to the variable.
200 |         %macro variable 3
201 | 
202 |         ; Just like in "constant", except that the runtime code is dovar.
203 |         head %1,%2,0,dovar
204 | 
205 |         ; Exact same line as used in "constant", with the same effects.
206 |         val_ %+ %2 dw %3
207 | 
208 |         ; End of the definition.
209 |         %endmacro
210 | 
211 | ;--------------------------------------------------------------------------
212 | ;
213 | ; That's the last of the macros. They're accessed through the
214 | ; "%include macros.asm" command near the beginning of Itsy's
215 | ; source code file. Or, if you prefer, you can remove the
216 | ; %include command and splice the above code directly
217 | ; into itsy.asm in its place.
218 | ;
219 | ;--------------------------------------------------------------------------
220 | 
221 | 


--------------------------------------------------------------------------------
/osx/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	nasm -g -f macho itsy-osx.asm && ld -macosx_version_min 10.5.0 -o itsy itsy-osx.o
3 | 


--------------------------------------------------------------------------------
/osx/itsy-osx.asm:
--------------------------------------------------------------------------------
  1 | ; Itsy Forth
  2 | ;    Written by John Metcalf
  3 | ;    Commentary by John Metcalf and Mike Adams
  4 | ;    Translated to Linux by github user kt97679
  5 | ;    Translated to MacOS X by Dylan McNamee
  6 | 
  7 | ; nasm -g -f macho itsy-osx.asm && ld -macosx_version_min 10.5.0 -o itsy itsy-osx.o
  8 | ;
  9 | ; simple session:
 10 | ; ./itsy
 11 | ; : hi 72 emit 101 emit 108 emit 108 emit 111 emit 10 emit ;
 12 | ; hi
 13 | ; Hello
 14 | ; ^C
 15 | ; TODOs:
 16 | ;   fix seg fault on Ctrl-D
 17 | ;   fix that 72 101 108 108 111 emit emit emit emit emit doesn't work.
 18 | ;   build up Forth control structures - if then else, do while, etc.
 19 | ;   implement string functions num->string, string literals, ., ."
 20 | ;   hard-code the mach-o header (as with Linux version) so ld doesn't add 8K of header.
 21 | ;--------------------------------------------------------------------------
 22 | ; Implementation notes:
 23 | ;
 24 | ; Register Usage:
 25 | ;    sp - data stack pointer.
 26 | ;    bp - return stack pointer.
 27 | ;    si - Forth instruction pointer.
 28 | ;    di - pointer to current XT (CFA of word currently being executed).
 29 | ;    bx - TOS (top of data stack). The top value on the data stack is not
 30 | ;         actually kept on the CPU's data stack. It's kept in the BX register.
 31 | ;         Having it in a register like this speeds up the operation of
 32 | ;         the primitive words. They don't have to take the time to pull a
 33 | ;         value off of the stack; it's already in a register where it can
 34 | ;         be used right away!
 35 | ;    ax, cd, dx - Can all be freely used for processing data. The other
 36 | ;         registers can still be used also, but only with caution. Their
 37 | ;         contents must be pushed to the stack and then restored before
 38 | ;         exiting from the word or calling any other Forth words. LOTS of
 39 | ;         potential for program crashes if you don't do this correctly.
 40 | ;         The notable exception is the DI register, which can (and is, below)
 41 | ;         used pretty freely in assembly code, since the concept of a pointer
 42 | ;         to the current CFA is rather irrelevant in assembly.
 43 | ;
 44 | ;
 45 | ; Structure of an Itsy word definition:
 46 | ;     # of
 47 | ;    Bytes:   Description:
 48 | ;    ------   ---------------------------------------------------------
 49 | ;      2      Link Field. Contains the address of the link field of the
 50 | ;                definition preceding this one in the dictionary. The link
 51 | ;                field of the first def in the dictionary contains 0.
 52 | ;    Varies   Name Field. The first byte of the name field contains the length
 53 | ;                of the name; succeeding bytes contain the ASCII characters of
 54 | ;                the name itself. If the high bit of the length is set, the
 55 | ;                definition is tagged as being an "immediate" word.
 56 | ;      2      Code Field. Contains the address of the executable code for
 57 | ;                the word. For primitives, this will likely be the address
 58 | ;                of the word's own data field. Note that the header creation
 59 | ;                macros automatically generate labels for the code field
 60 | ;                addresses of the words they're used to define, though the
 61 | ;                CFA labels aren't visible in the code shown below. The
 62 | ;                assembler macros create labels, known as "execution tags"
 63 | ;                or XTs, for the code field of each word.
 64 | ;    Varies   Data Field. Contains either a list of the code field addresses
 65 | ;                of the words that make up this definition, or assembly-
 66 | ;                language code for primitives, or numeric data for variables
 67 | ;                 and constants and such.
 68 | 
 69 | %define link 0
 70 | %define immediate 080h
 71 | 
 72 | %macro head 4
 73 | %%link dd link
 74 | %define link %%link
 75 | %strlen %%count %1
 76 | db %3 + %%count, %1
 77 | xt_ %+ %2 dd %4
 78 | yt_ %+ %2:
 79 | %endmacro
 80 | 
 81 | %macro primitive 2-3 0
 82 | head %1, %2, %3, $ + 4
 83 | %endmacro
 84 | 
 85 | %macro colon 2-3 0
 86 | head %1, %2, %3, docolon
 87 | %endmacro
 88 | 
 89 | %macro variable 3
 90 | head %1, %2, 0, dovar
 91 | val_ %+ %2 dd %3
 92 | %endmacro
 93 | 
 94 | %define MEMSIZE 1048576
 95 | %define TIBSIZE 80
 96 | %define STACKSIZE 4096
 97 | %define TIBPTR fstack + MEMSIZE - TIBSIZE ; fheap ;TEXTORG + MEMSIZE - TIBSIZE
 98 | %define SP0 TIBPTR - 4
 99 | %define RP0 SP0 - STACKSIZE
100 | %define DSTACK RP0 - STACKSIZE
101 | 
102 | BITS 32
103 | 
104 | section .data
105 | 
106 | ; -------------------
107 | ; System Variables
108 | ; -------------------
109 | 
110 |     ; state - ( -- addr ) true = compiling, false = interpreting
111 |     variable 'state', state, 0
112 | 
113 |     ; >in - ( -- addr ) next character in input buffer
114 |     variable '>in', to_in, 0
115 | 
116 |     ; #tib - ( -- addr ) number of characters in the input buffer
117 |     variable '#tib', number_t_i_b, 0
118 | 
119 |     ; dp - ( -- addr ) first free cell in the dictionary
120 |     variable 'dp', dp, DSTACK
121 | 
122 |     ; base - ( -- addr ) number base
123 |     variable 'base', base, 10
124 | 
125 |     ; last - ( -- addr ) the last word to be defined
126 |     ; NOTE: The label "final:" must be placed immediately before
127 |     ; the last word defined in this file. If new words are added,
128 |     ; make sure they're either added before the "final:" label
129 |     ; or the "final:" label is moved to the position immediately
130 |     ; before the last word added.
131 |     variable 'last', last, final
132 | 
133 |     ; tib - ( -- addr ) address of the input buffer
134 |     variable 'tib', t_i_b, TIBPTR
135 | 
136 | section .text
137 | global start
138 | start:
139 |     jmp xt_abort+4
140 | 
141 | ; this is a separate routine to fix up the stack before the syscall
142 | _mysyscall:
143 |     int 80h
144 |     ret
145 | 
146 | ; execute - ( xt -- ) call the word at xt
147 |     primitive 'execute', execute
148 |         mov eax, ebx ; eax is important here, it is used by docolon and dovar
149 |         pop ebx
150 |         jmp dword[eax]
151 | 
152 | ; -------------------
153 | ; Initialisation
154 | ; -------------------
155 | 
156 | ; abort - ( -- ) initialise Itsy then jump to interpret
157 |     primitive 'abort', abort
158 |         mov eax, dword[val_number_t_i_b]
159 |         mov dword[val_to_in], eax
160 |         xor ebp, ebp
161 |         mov dword[val_state], ebp
162 |         mov esp, SP0
163 |         mov ebp, RP0
164 |         mov esi, xt_interpret + 4
165 |         jmp next
166 | 
167 | ; -------------------
168 | ; Compilation
169 | ; -------------------
170 | 
171 | ; , - ( x -- ) compile x to the current definition.
172 | ;    Stores the number on the stack to the memory location currently
173 | ;    pointed to by dp.
174 |     primitive ',', comma
175 |         xchg eax, ebx
176 |         mov ebx, val_dp
177 |         mov edi, [ebx]
178 |         stosd
179 |         mov [ebx], edi
180 |         pop ebx
181 |         jmp next
182 | 
183 | ; lit - ( -- ) push the value in the cell straight after lit.
184 | ;   lit is the word that is compiled into a definition when you put a
185 | ;   "literal" number in a Forth definition. When your word is compiled,
186 | ;   the CFA of lit gets stored in the definition followed immediately
187 | ;   by the value of the number you put into the code. At run time, lit
188 | ;   pushes the value of your number onto the stack.
189 |     primitive 'lit', lit
190 |         push ebx
191 |         lodsd
192 |         xchg eax, ebx
193 |         jmp next
194 | 
195 | ; -------------------
196 | ; Stack
197 | ; -------------------
198 | 
199 | ; rot - ( x y z -- y z x ) rotate x, y and z.
200 | ;   Standard Forth word that extracts number 3rd from the top of the stack
201 | ;   and puts it on the top, effectively rotating the top 3 values.
202 |     primitive 'rot', rote
203 |         pop edx
204 |         pop eax
205 |         push edx
206 |         push ebx
207 |         xchg eax, ebx
208 |         jmp next
209 | 
210 | ; drop - ( x -- ) remove x from the stack.
211 |     primitive 'drop', drop
212 |         pop ebx
213 |         jmp next
214 | 
215 | ; dup - ( x -- x x ) add a copy of x to the stack
216 |     primitive 'dup', dupe
217 |         push ebx
218 |         jmp next
219 | 
220 | ; # swap - ( x y -- y x ) exchange x and y
221 |     primitive 'swap', swap
222 |         xchg ebx, [esp]
223 |         jmp next
224 | 
225 | ; -------------------
226 | ; Maths / Logic
227 | ; -------------------
228 | 
229 | ; + - ( x y -- z) calculate z=x+y then return z
230 |     primitive '+', plus
231 |         pop eax
232 |         add ebx, eax
233 |         jmp next
234 | 
235 |     primitive '=', equals
236 |         pop eax
237 |         sub ebx, eax
238 |         sub ebx, 1
239 |         sbb ebx, ebx
240 |         jmp next
241 | 
242 |     primitive '@', fetch
243 |         mov ebx, dword[ebx]
244 |         jmp next
245 | 
246 |     primitive '!', store
247 |         pop dword[ebx]
248 |         pop ebx
249 |         jmp next
250 | 
251 | ; break ( -- ) trigger a breakpoint for debugging
252 |     primitive 'break', break
253 |         int 3
254 |         jmp next
255 | 
256 | ; ----------------------
257 | ; The inner interpteter (buried in here):
258 | ; ----------------------
259 |     primitive 'exit', exit
260 |         xchg ebp, esp
261 |         pop esi
262 |         xchg ebp, esp
263 | next    lodsd           ; funny that the key part of the inner interpreter is buried here
264 |         jmp dword[eax]  ; eax is later used by docolon and dovar
265 | 
266 | ; -------------------
267 | ; Flow Control
268 | ; -------------------
269 | 
270 | ; 0branch - ( x -- ) jump if x is zero
271 | ; This is the primitive word that's compiled as the runtime code in
272 | ; an IF...THEN statement. The number compiled into the word's definition
273 | ; immediately after 0branch is the address of the word in the definition
274 | ; that we're branching to. That address gets loaded into the instruction
275 | ; pointer. In essence, this word sees a false flag (i.e. a zero) and
276 | ; then jumps over the words that comprise the "do this if true" clause
277 | ; of an IF...ELSE...THEN statement.
278 | 
279 |     primitive '0branch', zero_branch
280 |         lodsd
281 |         test ebx, ebx
282 |         jne zerob_z
283 |         xchg eax, esi
284 | zerob_z pop ebx
285 |         jmp next
286 | 
287 | ; branch - ( addr -- ) unconditional jump
288 | ; This is one of the pieces of runtime code that's compiled by
289 | ; BEGIN/WHILE/REPEAT, BEGIN/AGAIN, and BEGIN/UNTIL loops. As with 0branch,
290 | ; the number compiled into the dictionary immediately after the branch is
291 | ; the address of the word in the definition that we're branching to.
292 |     primitive 'branch',branch
293 |         mov esi, dword[esi]
294 |         jmp next
295 | 
296 | ; -----------------------
297 | ; Terminal Input / Output
298 | ; -----------------------
299 | 
300 | ; accept - ( addr len -- len2 ) read a string from the terminal
301 | ; accept reads a string of characters from the terminal. The string
302 | ; is stored at addr and can be up to len characters long.
303 | ; accept returns the actual length of the string.
304 | 
305 | ; converted Linux syscall to MacOS syscall
306 | ; see: https://filippo.io/making-system-calls-from-assembly-in-mac-os-x/
307 | ; main difference is Linux passes syscall params in registers, MacOS / BSD does it on the stack
308 |     ; ( addr len -- len_read )
309 |     primitive 'accept', accept
310 |         ; ebx has the # bytes to read
311 |         ; stdin is handle 0
312 |         ; top of stack has the buffer address
313 |         pop  ecx       ; save buffer
314 |         push ebx         ; count
315 |         push ecx         ; buffer
316 |         push 0           ; stdin
317 |         mov eax, 0x3     ; sys_read
318 |         call _mysyscall
319 |         add esp, 12       ; discard args
320 |         xchg ebx, eax ; MacOS / BSD - eax after sys_read contains number of bytes read (negative number means error), let's move it to TOS
321 |         dec ebx       ; last char is CR
322 |         push ebx
323 |         jmp next
324 | 
325 |     ; ( char -- ) emit a character to the terminal
326 |     primitive 'emit', emit
327 |         ; char to print is in ebx
328 |         push ebx      ; needs to be in memory - why not stack?
329 |         mov  ebx, esp ; save stack pointer (pointing at our char)
330 |         push 1        ; count
331 |         push ebx      ; buffer (into the stack)
332 |         push 1        ; stdout
333 |         mov eax, 0x4  ; sys_write
334 |         call _mysyscall
335 | aftemt  add esp, 16   ; reset stack
336 |         pop ebx       ; TOS goes in ebx
337 |         jmp next
338 | 
339 | ; -------------------
340 | ; String
341 | ; -------------------
342 | 
343 | ; count - ( addr -- addr2 len )
344 | ; count is given the address of a "counted string" (like the name field of a
345 | ; word definition in Forth, with the first byte being the number of
346 | ; characters in the string and immediately followed by the characters
347 | ; themselves). It returns the length of the string and a pointer to the
348 | ; first actual character in the string.
349 |     primitive 'count',count
350 |         movzx eax, byte[ebx]
351 |         inc ebx
352 |         push ebx
353 |         mov ebx, eax
354 |         jmp next
355 | 
356 |     primitive '>number',to_number
357 |         pop edi
358 |         pop ecx
359 |         pop eax
360 | to_numl test ebx, ebx
361 |         je to_numz
362 |         push eax
363 |         movzx eax, byte[edi]
364 |         cmp al, 'a'
365 |         jc to_nums
366 |         sub al, 32
367 | to_nums cmp al, '9' + 1
368 |         jc to_numg
369 |         cmp al, 'A'
370 |         jc to_numh
371 |         sub al, 7
372 | to_numg sub al, 48
373 |         cmp al, byte[val_base]
374 |         jnc to_numh
375 |         xchg eax, edx
376 |         pop eax
377 |         push edx
378 |         xchg eax, ecx
379 |         mul dword[val_base]
380 |         xchg eax, ecx
381 |         mul dword[val_base]
382 |         add ecx, edx
383 |         pop edx
384 |         add eax, edx
385 |         dec ebx
386 |         inc edi
387 |         jmp to_numl
388 | to_numz push eax
389 | to_numh push ecx
390 |         push edi
391 |         jmp next
392 | 
393 | ; word - ( char -- addr ) parse the next word in the input buffer
394 | ; word scans the "terminal input buffer" (whose address is given by the
395 | ; system constant tib) for words to execute, starting at the current
396 | ; address stored in the input buffer pointer >in. The character on the
397 | ; stack when word is called is the one that the code will look for as
398 | ; the separator between words. 999 times out of 1000,; this is going to
399 | ; be a space.
400 |     primitive 'word', word
401 |         mov edi, dword[val_dp]
402 |         push edi
403 |         mov edx, ebx
404 |         mov ebx, dword[val_t_i_b]
405 |         mov ecx, ebx
406 |         add ebx, dword[val_to_in]
407 |         add ecx, dword[val_number_t_i_b]
408 | wordf   cmp ecx, ebx
409 |         je wordz
410 |         mov al, byte[ebx]
411 |         inc ebx
412 |         cmp al, dl
413 |         je wordf
414 | wordc   inc edi
415 |         mov byte[edi], al
416 |         cmp ecx, ebx
417 |         je wordz
418 |         mov al, byte[ebx]
419 |         inc ebx
420 |         cmp al, dl
421 |         jne wordc
422 | wordz   mov byte[edi + 1], 32
423 |         mov eax, dword[val_dp]
424 |         xchg eax, edi
425 |         sub eax, edi
426 |         mov byte[edi], al
427 |         sub ebx, dword[val_t_i_b]
428 |         mov dword[val_to_in], ebx
429 |         pop ebx
430 |         jmp next
431 | 
432 | ; -----------------------
433 | ; Dictionary Search
434 | ; -----------------------
435 | 
436 | ; find - ( addr -- addr2 flag ) look up word in the dictionary
437 | ; find looks in the Forth dictionary for a word with the name given in the
438 | ; counted string at addr. One of the following will be returned:
439 | ;   flag =  0, addr2 = counted string --> word was not found
440 | ;   flag =  1, addr2 = call address   --> word is immediate
441 | ;   flag = -1, addr2 = call address   --> word is not immediate
442 |     primitive 'find', find
443 |         mov edi, val_last
444 | findl   push edi
445 |         push ebx
446 |         movzx ecx, byte[ebx]
447 |         inc ecx
448 | findc   mov al, byte[edi + 4]
449 |         and al, 07Fh
450 |         cmp al, byte[ebx]
451 |         je findm
452 |         pop ebx
453 |         pop edi
454 |         mov edi, dword[edi]
455 |         test edi, edi
456 |         jne findl
457 | findnf  push ebx
458 |         xor ebx, ebx
459 |         jmp next
460 | findm   inc edi
461 |         inc ebx
462 |         loop findc
463 |         pop ebx
464 |         pop edi
465 |         xor ebx, ebx
466 |         inc ebx
467 |         lea edi, [edi + 4]
468 |         mov al, byte[edi]
469 |         test al, 080h
470 |         jne findi
471 |         neg ebx
472 | findi   and eax, 31
473 |         add edi, eax
474 |         inc edi
475 |         push edi
476 |         jmp next
477 | 
478 | ; -----------------------
479 | ; Colon Definition
480 | ; -----------------------
481 | 
482 | ; : - ( -- ) define a new Forth word, taking the name from the input buffer.
483 | ; Ah! We've finally found a word that's actually defined as a Forth colon
484 | ; definition rather than an assembly language routine! Partly, anyway; the
485 | ; first part is Forth code, but the end is the assembly language run-time
486 | ; routine that, incidentally, executes Forth colon definitions. Notice that
487 | ; the first part is not a sequence of opcodes, but rather is a list of
488 | ; code field addresses for the words used in the definition. In each code
489 | ; field of each defined word is an "execution tag", or "xt", a pointer to
490 | ; the runtime code that executes the word. In a Forth colon definition, this
491 | ; is going to be a pointer to the docolon routine we see in the second part
492 | ; of the definition of colon itself below.
493 |     colon ':', colon
494 |         dd xt_lit, -1
495 |         dd xt_state
496 |         dd xt_store
497 |         dd xt_create
498 |         dd xt_do_semi_code
499 | 
500 | docolon xchg ebp, esp
501 |         push esi
502 |         xchg ebp, esp
503 |         lea esi, [eax + 4] ; eax value is set by next
504 |         jmp next
505 | 
506 | ; ; - ( -- ) complete the Forth word being compiled
507 |     colon ';', semicolon, immediate
508 |         dd xt_lit, xt_exit
509 |         dd xt_comma
510 |         dd xt_lit, 0
511 |         dd xt_state
512 |         dd xt_store
513 |         dd xt_exit
514 | 
515 | ; -----------------------
516 | ; Headers
517 | ; -----------------------
518 | 
519 | ; create - ( -- ) build a header for a new word in the dictionary, taking
520 | ; the name from the input buffer - a runtime version of 'primitive'
521 |     colon 'create', create
522 |         dd xt_dp, xt_fetch
523 |         dd xt_last, xt_fetch
524 |         dd xt_comma
525 |         dd xt_last, xt_store
526 |         dd xt_lit, 32
527 |         dd xt_word
528 |         dd xt_count
529 |         dd xt_plus
530 |         dd xt_dp, xt_store
531 |         dd xt_lit, 0
532 |         dd xt_comma
533 |         dd xt_do_semi_code
534 | 
535 | dovar   push ebx
536 |         lea ebx, [eax + 4] ; eax value is set by next
537 |         jmp next
538 | 
539 | ; # (;code) - ( -- ) replace the xt of the word being defined with a pointer
540 | ; to the code immediately following (;code)
541 | ; The idea behind this compiler word is that you may have a word that does
542 | ; various compiling/accounting tasks that are defined in terms of Forth code
543 | ; when its being used to compile another word, but afterward, when the new
544 | ; word is executed in interpreter mode, you want your compiling word to do
545 | ; something else that needs to be coded in assembly. (;code) is the word that
546 | ; says, "Okay, that's what you do when you're compiling, but THIS is what
547 | ; you're going to do while executing, so look sharp, it's in assembly!"
548 | ; Somewhat like the word DOES>, which is used in a similar manner to define
549 | ; run-time code in terms of Forth words.
550 |     primitive '(;code)', do_semi_code
551 |         mov edi, dword[val_last]
552 |         mov al, byte[edi + 4]
553 |         and eax, 31
554 |         add edi, eax
555 |         mov dword[edi + 5], esi
556 |         xchg ebp, esp
557 |         pop esi
558 |         xchg esp, ebp
559 |         jmp next
560 | 
561 | ; -----------------------
562 | ; Constants
563 | ; -----------------------
564 | 
565 | ; constant - ( x -- ) create a new constant with the value x, taking the name
566 | ; from the input buffer
567 |         colon 'constant',constant
568 |         dw xt_create       ; Create the constant's header.
569 |         dw xt_comma        ; Store the constant's value into the word's
570 |                            ; data field.
571 |         dw xt_do_semi_code ; Write, into the code field of the header we just
572 |                            ; created, the address that immediately follows
573 |                            ; this statement: the address of the doconst
574 |                            ; routine, which is the code that's responsible
575 |                            ; for pushing onto the stack the value that's
576 |                            ; contained in the data field of the word whose
577 |                            ; header we just created when that word is
578 |                            ; invoked.
579 | doconst push bx            ; Push the stack down.
580 |         mov bx,word[di+2]  ; DI should be pointing to the constant's code
581 |                            ; field. Load into the top of the stack the
582 |                            ; value 2 bytes further down from the code field,
583 |                            ; i.e. the constant's actual value.
584 |         jmp next           ;
585 | 
586 | ; -----------------------
587 | ; Outer Interpreter
588 | ; -----------------------
589 | 
590 | ; -------------------------------------------------------
591 | ; NOTE! The following line with the final: label MUST be
592 | ; immediately before the final word definition!
593 | ; -------------------------------------------------------
594 | 
595 | final:
596 | 
597 |     colon 'interpret', interpret
598 | interpt dd xt_number_t_i_b
599 |         dd xt_fetch
600 |         dd xt_to_in
601 |         dd xt_fetch
602 |         dd xt_equals
603 |         dd xt_zero_branch
604 |         dd intpar
605 |         dd xt_t_i_b
606 |         dd xt_fetch
607 |         dd xt_lit, 50
608 |         dd xt_accept
609 |         dd xt_number_t_i_b
610 |         dd xt_store
611 |         dd xt_lit, 0
612 |         dd xt_to_in
613 |         dd xt_store
614 | intpar  dd xt_lit, 32           ; find a " "
615 |         dd xt_word
616 |         dd xt_find              ; is it in the dictionary?
617 |         dd xt_dupe
618 |         dd xt_zero_branch
619 |         dd intnf
620 |         dd xt_state
621 |         dd xt_fetch
622 |         dd xt_equals
623 |         dd xt_zero_branch
624 |         dd intexc
625 |         dd xt_comma
626 |         dd xt_branch
627 |         dd intdone
628 | intexc  dd xt_execute           ; found immediate word - execute it, then loop
629 |         dd xt_branch
630 |         dd intdone
631 | intnf   dd xt_dupe              ; not a forth word - is it a number?
632 |         dd xt_rote
633 |         dd xt_count
634 |         dd xt_to_number
635 |         dd xt_zero_branch
636 |         dd intskip
637 |         dd xt_state             ; conversion error:
638 |         dd xt_fetch             ; branch on State (interpreting vs compiling)
639 |         dd xt_zero_branch       ; interpreting -> loop
640 |         dd intnc
641 |         dd xt_last              ; compiling -> orderly shutdown
642 |         dd xt_fetch
643 |         dd xt_dupe
644 |         dd xt_fetch
645 |         dd xt_last
646 |         dd xt_store
647 |         dd xt_dp
648 |         dd xt_store
649 | intnc   dd xt_abort
650 | intskip dd xt_drop
651 |         dd xt_drop
652 |         dd xt_state
653 |         dd xt_fetch
654 |         dd xt_zero_branch
655 |         dd intdone
656 |         dd xt_lit
657 |         dd xt_lit
658 |         dd xt_comma
659 |         dd xt_comma
660 | intdone dd xt_branch
661 |         dd interpt
662 |         dd xt_abort
663 | 
664 |     colon 'testitsymac', testitsymac
665 | dotest  dd xt_t_i_b
666 |         dd xt_fetch
667 |         dd xt_lit, 1
668 |         dd xt_accept
669 | looper  dd xt_lit, 50
670 |         dd xt_lit, 50
671 |         dd xt_emit
672 |         dd dotest
673 | freemem:
674 | 
675 | section .bss
676 | fstack: resb 16000 + MEMSIZE
677 | ; rstack: resb 2048
678 | ; fheap:  resb 2048
679 | ; astack:  resb 2048
680 | ; dstack:  resb 2048
681 | 
682 | filesize   equ   $ - $$
683 | 


--------------------------------------------------------------------------------