├── README.md
├── linux
├── Makefile
└── itsy-linux.asm
├── msdos
├── itsy.asm
└── macros.asm
└── osx
├── Makefile
└── itsy-osx.asm
/README.md:
--------------------------------------------------------------------------------
1 | itsy-linux
2 | ==========
3 |
4 | Unix (Linux & MacOS X) port of itsy Forth by John Metcalf
5 |
6 | John Metcalf (http://www.retroprogramming.com/) implemented minimal forth system:
7 |
8 | - http://www.retroprogramming.com/2012/03/itsy-forth-1k-tiny-compiler.html
9 | - http://www.retroprogramming.com/2012/04/itsy-forth-dictionary-and-inner.html
10 | - http://www.retroprogramming.com/2012/04/itsy-forth-primitives.html
11 | - http://www.retroprogramming.com/2012/06/itsy-forth-compiler.html
12 | - http://www.retroprogramming.com/2012/09/itsy-documenting-bit-twiddling-voodoo.html
13 |
14 | Resulting binary has very impressive size (978 bytes on Linux) and can be used to bootstrap a complete Forth system.
15 |
16 | Original itsy code is producing .com files and can be found in "msdos" directory of this repository.
17 |
18 | kt97679 ported it to 32-bit linux code.
19 | DylanMc ported it to MacOS (also 32 bit). Should move to any BSD pretty easily.
20 |
21 | Sample session:
22 |
23 |
24 | $ make
25 | nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux
26 | chmod +x itsy-linux
27 | $ ./itsy-linux
28 | : say_hi 72 emit 105 emit 33 emit 10 emit ;
29 | say_hi
30 | Hi!
31 | ^C
32 | $
33 |
34 |
35 | TODO:
36 |
37 | - get string literals going
38 | - implement string operations, like ."
39 | - implement number->string
40 | - implement .
41 | - if then
42 | - do .. loop
43 |
44 |
--------------------------------------------------------------------------------
/linux/Makefile:
--------------------------------------------------------------------------------
1 | all: itsy-linux
2 |
3 | itsy-linux:
4 | nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux
5 | chmod +x itsy-linux
6 | clean:
7 | rm itsy-linux.lst itsy-linux
8 |
--------------------------------------------------------------------------------
/linux/itsy-linux.asm:
--------------------------------------------------------------------------------
1 | ; nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux && chmod +x itsy-linux
2 |
3 | %define link 0
4 | %define immediate 080h
5 |
6 | %macro head 4
7 | %%link dd link
8 | %define link %%link
9 | %strlen %%count %1
10 | db %3 + %%count, %1
11 | xt_ %+ %2 dd %4
12 | %endmacro
13 |
14 | %macro primitive 2-3 0
15 | head %1, %2, %3, $ + 4
16 | %endmacro
17 |
18 | %macro colon 2-3 0
19 | head %1, %2, %3, docolon
20 | %endmacro
21 |
22 | %macro variable 3
23 | head %1, %2, 0, dovar
24 | val_ %+ %2 dd %3
25 | %endmacro
26 |
27 | %define TEXTORG 0x00400000
28 | %define MEMSIZE 1048576
29 | %define TIBSIZE 80
30 | %define STACKSIZE 4096
31 | %define TIBPTR TEXTORG + MEMSIZE - TIBSIZE
32 | %define SP0 TIBPTR - 4
33 | %define RP0 SP0 - STACKSIZE
34 |
35 | BITS 32
36 | org TEXTORG
37 |
38 | ehdr: ; Elf32_Ehdr
39 | db 0x7F, "ELF", 1, 1, 1, 0 ; e_ident
40 | times 8 db 0
41 | dw 2 ; e_type
42 | dw 3 ; e_machine
43 | dd 1 ; e_version
44 | dd xt_abort + 4 ; e_entry
45 | dd phdr - $$ ; e_phoff
46 | dd 0 ; e_shoff
47 | dd 0 ; e_flags
48 | dw ehdrsize ; e_ehsize
49 | dw phdrsize ; e_phentsize
50 | dw 1 ; e_phnum
51 | dw 0 ; e_shentsize
52 | dw 0 ; e_shnum
53 | dw 0 ; e_shstrndx
54 |
55 | ehdrsize equ $ - ehdr
56 |
57 | phdr: ; Elf32_Phdr
58 | dd 1 ; p_type
59 | dd 0 ; p_offset
60 | dd $$ ; p_vaddr
61 | dd $$ ; p_paddr
62 | dd filesize ; p_filesz
63 | dd MEMSIZE ; p_memsz
64 | dd 7 ; p_flags
65 | dd 0x1000 ; p_align
66 |
67 | phdrsize equ $ - phdr
68 |
69 | ; esp - data stack pointer
70 | ; ebp - return stack pointer
71 | ; esi - Forth instruction pointer
72 | ; ebx - TOS (top of data stack)
73 |
74 | variable 'state', state, 0
75 | variable '>in', to_in, 0
76 | variable '#tib', number_t_i_b, 0
77 | variable 'dp', dp, freemem
78 | variable 'base', base, 10
79 | variable 'last', last, final
80 | variable 'tib', t_i_b, TIBPTR
81 |
82 | primitive 'execute', execute
83 | mov eax, ebx ; eax is important here, it is used by docolon and dovar
84 | pop ebx
85 | jmp dword[eax]
86 |
87 | primitive 'abort', abort
88 | mov eax, dword[val_number_t_i_b]
89 | mov dword[val_to_in], eax
90 | xor ebp, ebp
91 | mov dword[val_state], ebp
92 | mov esp, SP0
93 | mov ebp, RP0
94 | mov esi, xt_interpret + 4
95 | jmp next
96 |
97 | primitive ',', comma
98 | xchg eax, ebx
99 | mov ebx, val_dp
100 | mov edi, [ebx]
101 | stosd
102 | mov [ebx], edi
103 | pop ebx
104 | jmp next
105 |
106 | primitive 'lit', lit
107 | push ebx
108 | lodsd
109 | xchg eax, ebx
110 | jmp next
111 |
112 | primitive 'rot', rote
113 | pop edx
114 | pop eax
115 | push edx
116 | push ebx
117 | xchg eax, ebx
118 | jmp next
119 |
120 | primitive 'drop', drop
121 | pop ebx
122 | jmp next
123 |
124 | primitive 'dup', dupe
125 | push ebx
126 | jmp next
127 |
128 | primitive 'swap', swap
129 | xchg ebx, [esp]
130 | jmp next
131 |
132 | primitive '+', plus
133 | pop eax
134 | add ebx, eax
135 | jmp next
136 |
137 | primitive 'exit', exit
138 | xchg ebp, esp
139 | pop esi
140 | xchg ebp, esp
141 | next lodsd
142 | jmp dword[eax] ; eax is later used by docolon and dovar
143 |
144 | primitive '=', equals
145 | pop eax
146 | sub ebx, eax
147 | sub ebx, 1
148 | sbb ebx, ebx
149 | jmp next
150 |
151 | primitive '@', fetch
152 | mov ebx, dword[ebx]
153 | jmp next
154 |
155 | primitive '!', store
156 | pop dword[ebx]
157 | pop ebx
158 | jmp next
159 |
160 | primitive '0branch', zero_branch
161 | lodsd
162 | test ebx, ebx
163 | jne zerob_z
164 | xchg eax, esi
165 | zerob_z pop ebx
166 | jmp next
167 |
168 | primitive 'branch',branch
169 | mov esi, dword[esi]
170 | jmp next
171 |
172 | primitive 'count',count
173 | movzx eax, byte[ebx]
174 | inc ebx
175 | push ebx
176 | mov ebx, eax
177 | jmp next
178 |
179 | primitive 'accept', accept
180 | xor edx, edx
181 | xchg edx, ebx ; now edx contains read byte count and ebx 0 (reading from stdin)
182 | xor eax, eax
183 | mov al, 3 ; sys_read
184 | pop ecx ; buffer
185 | int 80h
186 | xchg ebx, eax ; eax after sys_read contains number of bytes read (negative number means error), let's move it to TOS
187 | dec ebx ; last char is CR
188 | jmp next
189 |
190 | primitive 'emit', emit
191 | push ebx
192 | xor eax, eax
193 | mov al, 4 ; sys_write
194 | xor ebx, ebx
195 | inc ebx ; ebx now contains 1 (stdout)
196 | mov ecx, esp ; buffer
197 | mov edx, ebx ; write byte count
198 | int 80h
199 | pop ebx
200 | pop ebx
201 | jmp next
202 |
203 | primitive '>number',to_number
204 | pop edi
205 | pop ecx
206 | pop eax
207 | to_numl test ebx, ebx
208 | je to_numz
209 | push eax
210 | movzx eax, byte[edi]
211 | cmp al, 'a'
212 | jc to_nums
213 | sub al, 32
214 | to_nums cmp al, '9' + 1
215 | jc to_numg
216 | cmp al, 'A'
217 | jc to_numh
218 | sub al, 7
219 | to_numg sub al, 48
220 | cmp al, byte[val_base]
221 | jnc to_numh
222 | xchg eax, edx
223 | pop eax
224 | push edx
225 | xchg eax, ecx
226 | mul dword[val_base]
227 | xchg eax, ecx
228 | mul dword[val_base]
229 | add ecx, edx
230 | pop edx
231 | add eax, edx
232 | dec ebx
233 | inc edi
234 | jmp to_numl
235 | to_numz push eax
236 | to_numh push ecx
237 | push edi
238 | jmp next
239 |
240 | primitive 'word', word
241 | mov edi, dword[val_dp]
242 | push edi
243 | mov edx, ebx
244 | mov ebx, dword[val_t_i_b]
245 | mov ecx, ebx
246 | add ebx, dword[val_to_in]
247 | add ecx, dword[val_number_t_i_b]
248 | wordf cmp ecx, ebx
249 | je wordz
250 | mov al, byte[ebx]
251 | inc ebx
252 | cmp al, dl
253 | je wordf
254 | wordc inc edi
255 | mov byte[edi], al
256 | cmp ecx, ebx
257 | je wordz
258 | mov al, byte[ebx]
259 | inc ebx
260 | cmp al, dl
261 | jne wordc
262 | wordz mov byte[edi + 1], 32
263 | mov eax, dword[val_dp]
264 | xchg eax, edi
265 | sub eax, edi
266 | mov byte[edi], al
267 | sub ebx, dword[val_t_i_b]
268 | mov dword[val_to_in], ebx
269 | pop ebx
270 | jmp next
271 |
272 | primitive 'find', find
273 | mov edi, val_last
274 | findl push edi
275 | push ebx
276 | movzx ecx, byte[ebx]
277 | inc ecx
278 | findc mov al, byte[edi + 4]
279 | and al, 07Fh
280 | cmp al, byte[ebx]
281 | je findm
282 | pop ebx
283 | pop edi
284 | mov edi, dword[edi]
285 | test edi, edi
286 | jne findl
287 | findnf push ebx
288 | xor ebx, ebx
289 | jmp next
290 | findm inc edi
291 | inc ebx
292 | loop findc
293 | pop ebx
294 | pop edi
295 | xor ebx, ebx
296 | inc ebx
297 | lea edi, [edi + 4]
298 | mov al, byte[edi]
299 | test al, 080h
300 | jne findi
301 | neg ebx
302 | findi and eax, 31
303 | add edi, eax
304 | inc edi
305 | push edi
306 | jmp next
307 |
308 | colon ':', colon
309 | dd xt_lit, -1
310 | dd xt_state
311 | dd xt_store
312 | dd xt_create
313 | dd xt_do_semi_code
314 |
315 | docolon xchg ebp, esp
316 | push esi
317 | xchg ebp, esp
318 | lea esi, [eax + 4] ; eax value is set by next
319 | jmp next
320 |
321 | colon ';', semicolon, immediate
322 | dd xt_lit, xt_exit
323 | dd xt_comma
324 | dd xt_lit, 0
325 | dd xt_state
326 | dd xt_store
327 | dd xt_exit
328 |
329 | colon 'create', create
330 | dd xt_dp, xt_fetch
331 | dd xt_last, xt_fetch
332 | dd xt_comma
333 | dd xt_last, xt_store
334 | dd xt_lit, 32
335 | dd xt_word
336 | dd xt_count
337 | dd xt_plus
338 | dd xt_dp, xt_store
339 | dd xt_lit, 0
340 | dd xt_comma
341 | dd xt_do_semi_code
342 |
343 | dovar push ebx
344 | lea ebx, [eax + 4] ; eax value is set by next
345 | jmp next
346 |
347 | primitive '(;code)', do_semi_code
348 | mov edi, dword[val_last]
349 | mov al, byte[edi + 4]
350 | and eax, 31
351 | add edi, eax
352 | mov dword[edi + 5], esi
353 | xchg ebp, esp
354 | pop esi
355 | xchg esp, ebp
356 | jmp next
357 |
358 | final:
359 |
360 | colon 'interpret', interpret
361 | interpt dd xt_number_t_i_b
362 | dd xt_fetch
363 | dd xt_to_in
364 | dd xt_fetch
365 | dd xt_equals
366 | dd xt_zero_branch
367 | dd intpar
368 | dd xt_t_i_b
369 | dd xt_fetch
370 | dd xt_lit, 50
371 | dd xt_accept
372 | dd xt_number_t_i_b
373 | dd xt_store
374 | dd xt_lit, 0
375 | dd xt_to_in
376 | dd xt_store
377 | intpar dd xt_lit, 32
378 | dd xt_word
379 | dd xt_find
380 | dd xt_dupe
381 | dd xt_zero_branch
382 | dd intnf
383 | dd xt_state
384 | dd xt_fetch
385 | dd xt_equals
386 | dd xt_zero_branch
387 | dd intexc
388 | dd xt_comma
389 | dd xt_branch
390 | dd intdone
391 | intexc dd xt_execute
392 | dd xt_branch
393 | dd intdone
394 | intnf dd xt_dupe
395 | dd xt_rote
396 | dd xt_count
397 | dd xt_to_number
398 | dd xt_zero_branch
399 | dd intskip
400 | dd xt_state
401 | dd xt_fetch
402 | dd xt_zero_branch
403 | dd intnc
404 | dd xt_last
405 | dd xt_fetch
406 | dd xt_dupe
407 | dd xt_fetch
408 | dd xt_last
409 | dd xt_store
410 | dd xt_dp
411 | dd xt_store
412 | intnc dd xt_abort
413 | intskip dd xt_drop
414 | dd xt_drop
415 | dd xt_state
416 | dd xt_fetch
417 | dd xt_zero_branch
418 | dd intdone
419 | dd xt_lit
420 | dd xt_lit
421 | dd xt_comma
422 | dd xt_comma
423 | intdone dd xt_branch
424 | dd interpt
425 |
426 | freemem:
427 |
428 | filesize equ $ - $$
429 |
--------------------------------------------------------------------------------
/msdos/itsy.asm:
--------------------------------------------------------------------------------
1 | ; Itsy Forth
2 | ; Written by John Metcalf
3 | ; Commentary by John Metcalf and Mike Adams
4 | ;
5 | ; Itsy Forth was written for use with NASM, the "Netwide Assembler"
6 | ; that's available for free download (http://www.nasm.us/).
7 | ; The command line for assembling Itsy is:
8 | ;
9 | ; nasm itsy.asm -fbin -o itsy.com
10 | ;
11 | ; If you wish to have an assembly listing, give it this command:
12 | ;
13 | ; nasm itsy.asm -fbin -l itsy.lst -o itsy.com
14 | ;
15 | ;--------------------------------------------------------------------------
16 | ; Implementation notes:
17 | ;
18 | ; Register Usage:
19 | ; sp - data stack pointer.
20 | ; bp - return stack pointer.
21 | ; si - Forth instruction pointer.
22 | ; di - pointer to current XT (CFA of word currently being executed).
23 | ; bx - TOS (top of data stack). The top value on the data stack is not
24 | ; actually kept on the CPU's data stack. It's kept in the BX register.
25 | ; Having it in a register like this speeds up the operation of
26 | ; the primitive words. They don't have to take the time to pull a
27 | ; value off of the stack; it's already in a register where it can
28 | ; be used right away!
29 | ; ax, cd, dx - Can all be freely used for processing data. The other
30 | ; registers can still be used also, but only with caution. Their
31 | ; contents must be pushed to the stack and then restored before
32 | ; exiting from the word or calling any other Forth words. LOTS of
33 | ; potential for program crashes if you don't do this correctly.
34 | ; The notable exception is the DI register, which can (and is, below)
35 | ; used pretty freely in assembly code, since the concept of a pointer
36 | ; to the current CFA is rather irrelevant in assembly.
37 | ;
38 | ;
39 | ; Structure of an Itsy word definition:
40 | ; # of
41 | ; Bytes: Description:
42 | ; ------ ---------------------------------------------------------
43 | ; 2 Link Field. Contains the address of the link field of the
44 | ; definition preceding this one in the dictionary. The link
45 | ; field of the first def in the dictionary contains 0.
46 | ; Varies Name Field. The first byte of the name field contains the length
47 | ; of the name; succeeding bytes contain the ASCII characters of
48 | ; the name itself. If the high bit of the length is set, the
49 | ; definition is tagged as being an "immediate" word.
50 | ; 2 Code Field. Contains the address of the executable code for
51 | ; the word. For primitives, this will likely be the address
52 | ; of the word's own data field. Note that the header creation
53 | ; macros automatically generate labels for the code field
54 | ; addresses of the words they're used to define, though the
55 | ; CFA labels aren't visible in the code shown below. The
56 | ; assembler macros create labels, known as "execution tags"
57 | ; or XTs, for the code field of each word.
58 | ; Varies Data Field. Contains either a list of the code field addresses
59 | ; of the words that make up this definition, or assembly-
60 | ; language code for primitives, or numeric data for variables
61 | ; and constants and such.
62 |
63 |
64 | ;-----------------------------------------------------------------------------
65 | ;
66 | ; Beginning of actual code.
67 | ;
68 | ; Include the definitions of the macros that are used in NASM to create
69 | ; the headers of the words. See macros.asm for more details.
70 | ;-----------------------------------------------------------------------------
71 | %include "macros.asm"
72 |
73 | ;-----------------------------------------------------------------------------
74 | ; Define the location for the stack. -256 decimal = 0ff00h
75 | ;-----------------------------------------------------------------------------
76 | stack0 equ -256
77 |
78 | ;-----------------------------------------------------------------------------
79 | ; Set the starting point for the executable code. 0100h is the standard
80 | ; origin for programs running under MS-DOS or its equivalents.
81 | ;-----------------------------------------------------------------------------
82 | org 0100h
83 |
84 | ;-----------------------------------------------------------------------------
85 | ; Jump to the location of the start of Itsy's initialization code.
86 | ;-----------------------------------------------------------------------------
87 | jmp xt_abort+2
88 |
89 | ; -------------------
90 | ; System Variables
91 | ; -------------------
92 |
93 | ; state - ( -- addr ) true = compiling, false = interpreting
94 | variable 'state',state,0
95 |
96 | ; >in - ( -- addr ) next character in input buffer
97 | variable '>in',to_in,0
98 |
99 | ; #tib - ( -- addr ) number of characters in the input buffer
100 | variable '#tib',number_t_i_b,0
101 |
102 | ; dp - ( -- addr ) first free cell in the dictionary
103 | variable 'dp',dp,freemem
104 |
105 | ; base - ( -- addr ) number base
106 | variable 'base',base,10
107 |
108 | ; last - ( -- addr ) the last word to be defined
109 | ; NOTE: The label "final:" must be placed immediately before
110 | ; the last word defined in this file. If new words are added,
111 | ; make sure they're either added before the "final:" label
112 | ; or the "final:" label is moved to the position immediately
113 | ; before the last word added.
114 | variable 'last',last,final
115 |
116 | ; tib - ( -- addr ) address of the input buffer
117 | constant 'tib',t_i_b,32768
118 |
119 | ; -------------------
120 | ; Initialisation
121 | ; -------------------
122 |
123 | ; abort - ( -- ) initialise Itsy then jump to interpret
124 | primitive 'abort',abort
125 | mov ax,word[val_number_t_i_b] ; Load AX with the value contained
126 | ; in the data field of #tib (which
127 | ; was pre-defined above as 0).
128 | mov word[val_to_in],ax ; Save the same number to >in.
129 | xor bp,bp ; Clear the bp register, which is going
130 | ; to be used as the return stack
131 | ; pointer. Since it'll first be
132 | ; decremented when a value is pushed
133 | ; onto it, this means that the first
134 | ; value pushed onto the return stack
135 | ; will be stored at 0FFFEh and 0FFFFh,
136 | ; the very end of memory space, and
137 | ; the stack will grow downward from
138 | ; there.
139 | mov word[val_state],bp ; Clear the value of state.
140 | mov sp,stack0 ; Set the stack pointer to the value
141 | ; defined above.
142 | mov si,xt_interpret+2 ; Initialize Itsy's instruction pointer
143 | ; to the outer interpreter loop.
144 | jmp next ; Jump to the inner interpreter and
145 | ; actually start running Itsy.
146 |
147 | ; -------------------
148 | ; Compilation
149 | ; -------------------
150 |
151 | ; , - ( x -- ) compile x to the current definition.
152 | ; Stores the number on the stack to the memory location currently
153 | ; pointed to by dp.
154 | primitive ',',comma
155 | mov di,word[val_dp] ; Put the value of dp into the DI register.
156 | xchg ax,bx ; Move the top of the stack into AX.
157 | stosw ; Store the 16-bit value in AX directly
158 | ; into the address pointed to by DI, and
159 | ; automatically increment DI in the
160 | ; process.
161 | mov word[val_dp],di ; Store the incremented value in DI as the
162 | ; new value for the dictionary pointer.
163 | pop bx ; Pop the new stack top into its proper place.
164 | jmp next ; Go do the next word.
165 |
166 | ; lit - ( -- ) push the value in the cell straight after lit.
167 | ; lit is the word that is compiled into a definition when you put a
168 | ; "literal" number in a Forth definition. When your word is compiled,
169 | ; the CFA of lit gets stored in the definition followed immediately
170 | ; by the value of the number you put into the code. At run time, lit
171 | ; pushes the value of your number onto the stack.
172 | primitive 'lit',lit
173 | push bx ; Push the value in BX to the stack, so that now it'll
174 | ; be 2nd from the top on the stack. The old value is
175 | ; still in BX, though. Now we need to get the new
176 | ; value into BX.
177 | lodsw ; Load into the AX register the 16-bit value pointed
178 | ; to by the SI register (Itsy's instruction pointer,
179 | ; which this op then automatically increments SI by 2).
180 | ; The net result is that we just loaded into AX the
181 | ; 16-bit data immediately following the call to lit,
182 | ; which'll be the data that lit is supposed to load.
183 | xchg ax,bx ; Now swap the contents of the AX and BX registers.
184 | ; lit's data is now in BX, the top of the stack, where
185 | ; we want it. Slick, eh?
186 | jmp next ; Go do the next word.
187 |
188 | ; -------------------
189 | ; Stack
190 | ; -------------------
191 |
192 | ; rot - ( x y z -- y z x ) rotate x, y and z.
193 | ; Standard Forth word that extracts number 3rd from the top of the stack
194 | ; and puts it on the top, effectively rotating the top 3 values.
195 | primitive 'rot',rote
196 | pop dx ; Unload "y" from the stack.
197 | pop ax ; Unload "x" from the stack. Remember that "z" is
198 | ; already in BX.
199 | push dx ; Push "y" back onto the stack.
200 | push bx ; Push "z" down into the stack on top of "y".
201 | xchg ax,bx ; Swap "x" into the BX register so that it's now
202 | ; at the top of the stack.
203 | jmp next ; Go do the next word.
204 |
205 | ; drop - ( x -- ) remove x from the stack.
206 | primitive 'drop',drop
207 | pop bx ; Pop the 2nd item on the stack into the BX register,
208 | ; writing over the item that was already at the top
209 | ; of the stack in BX. It's that simple.
210 | jmp next ; Go do the next word.
211 |
212 | ; dup - ( x -- x x ) add a copy of x to the stack
213 | primitive 'dup',dupe
214 | push bx ; Remember that BX is the top of the stack. Push an
215 | ; extra copy of what's in BX onto the stack.
216 | jmp next ; Go do the next word.
217 |
218 | ; # swap - ( x y -- y x ) exchange x and y
219 | primitive 'swap',swap
220 | pop ax ; Pop "x", the number 2nd from the top, into AX.
221 | push bx ; Push "y", the former top of the stack.
222 | xchg ax,bx ; Swap "x" into BX to become the new stack top. We
223 | ; don't care what happens to the value of "y" that
224 | ; ends up in AX because that value is now safely
225 | ; in the stack.
226 | jmp next ; Go do the next word.
227 |
228 | ; -------------------
229 | ; Maths / Logic
230 | ; -------------------
231 |
232 | ; + - ( x y -- z) calculate z=x+y then return z
233 | primitive '+',plus
234 | pop ax ; Pop the value of "x" off of the stack.
235 | add bx,ax ; Add "x" to the value of "y" that's at the top of the
236 | ; stack in the BX register. The way the opcode is
237 | ; written, the result is left in the BX register,
238 | ; conveniently at the top of the stack.
239 | jmp next ; Go do the next word.
240 |
241 | ; = - ( x y -- flag ) return true if x=y
242 | primitive '=',equals
243 | pop ax ; Get the "x" value into a register.
244 | sub bx,ax ; Perform BX-AX (or y-x)and leave result in BX. If x and
245 | ; y are equal, this will result in a 0 in BX. But a zero
246 | ; is a false flag in just about all Forth systems, and we
247 | ; want a TRUE flag if the numbers are equal. So...
248 | sub bx,1 ; Subtract 1 from it. If we had a zero before, now we've
249 | ; got a -1 (or 0ffffh), and a carry flag was generated.
250 | ; Any other value in BX will not generate a carry.
251 | sbb bx,bx ; This has the effect of moving the carry bit into the BX
252 | ; register. So, if the numbers were not equal, then the
253 | ; "sub bx,1" didn't generate a carry, so the result will
254 | ; be a 0 in the BX (numbers were not equal, result is
255 | ; false). If the original numbers on the stack were equal,
256 | ; though, then the carry bit was set and then copied
257 | ; into the BX register to act as our true flag.
258 | ; This may seem a bit cryptic, but it produces smaller
259 | ; code and runs faster than a bunch of conditional jumps
260 | ; and immediate loads would.
261 | jmp next ; Go do the next word.
262 |
263 | ; -------------------
264 | ; Peek and Poke
265 | ; -------------------
266 |
267 | ; @ - ( addr -- x ) read x from addr
268 | ; "Fetch", as the name of this word is pronounced, reads a 16-bit number from
269 | ; a given memory address, the way the Basic "peek" command does, and leaves
270 | ; it at the top of the stack.
271 | primitive '@',fetch
272 | mov bx,word[bx] ; Read the value in the memory address pointed to by
273 | ; the BX register and move that value directly into
274 | ; BX, replacing the address at the top of the stack.
275 | jmp next ; Go do the next word.
276 |
277 | ; ! - ( x addr -- ) store x at addr
278 | ; Similar to @, ! ("store") writes a value directly to a memory address, like
279 | ; the Basic "poke" command.
280 | primitive '!',store
281 | pop word[bx] ; Okay, this is a bit slick. All in one opcode, we pop
282 | ; the number that's 2nd from the top of the stack
283 | ; (i.e. "x" in the argument list) and send it directly
284 | ; to the memory address pointed to by BX (the address
285 | ; at the top of the stack).
286 | pop bx ; Pop whatever was 3rd from the top of the stack into
287 | ; the BX register to become the new TOS.
288 | jmp next ; Go do the next word.
289 |
290 | ; -------------------
291 | ; Inner Interpreter
292 | ; -------------------
293 |
294 | ; This routine is the very heart of the Forth system. After execution, all
295 | ; Forth words jump to this routine, which pulls up the code field address
296 | ; of the next word to be executed and then executes it. Note that next
297 | ; doesn't have a header of its own.
298 | next lodsw ; Load into the AX register the 16-bit value pointed
299 | ; to by the SI register (Itsy's instruction pointer,
300 | ; which this op then automatically increments SI by 2).
301 | ; The net result is that we just loaded into AX the
302 | ; CFA of the next word to be executed and left the
303 | ; instruction pointer pointing to the word that
304 | ; follows the next one.
305 | xchg di,ax ; Move the CFA of the next word into the DI register.
306 | ; We have to do this because the 8086 doesn't have
307 | ; an opcode for "jmp [ax]".
308 | jmp word[di] ; Jump and start executing code at the address pointed
309 | ; to by the value in the DI register.
310 |
311 | ; -------------------
312 | ; Flow Control
313 | ; -------------------
314 |
315 | ; 0branch - ( x -- ) jump if x is zero
316 | ; This is the primitive word that's compiled as the runtime code in
317 | ; an IF...THEN statement. The number compiled into the word's definition
318 | ; immediately after 0branch is the address of the word in the definition
319 | ; that we're branching to. That address gets loaded into the instruction
320 | ; pointer. In essence, this word sees a false flag (i.e. a zero) and
321 | ; then jumps over the words that comprise the "do this if true" clause
322 | ; of an IF...ELSE...THEN statement.
323 | primitive '0branch',zero_branch
324 | lodsw ; Load into the AX register the 16-bit value pointed
325 | ; to by the SI register (Itsy's instruction pointer,
326 | ; which this op then automatically increments SI by 2).
327 | ; The net result is that we just loaded into AX the
328 | ; CFA of the next word to be executed and left the
329 | ; instruction pointer pointing to the word that
330 | ; follows the next one.
331 | test bx,bx ; See if there's a 0 at the top of the stack.
332 | jne zerob_z ; If it's not zero, jump.
333 | xchg ax,si ; If the flag is a zero, we want to move the CFA of
334 | ; the word we want to branch to into the Forth
335 | ; instruction pointer. If the TOS was non-zero, the
336 | ; instruction pointer is left still pointing to the CFA
337 | ; of the word that follows the branch reference.
338 | zerob_z pop bx ; Throw away the flag and move everything on the stack
339 | ; up by one spot.
340 | jmp next ; Oh, you know what this does by now...
341 |
342 | ; branch - ( addr -- ) unconditional jump
343 | ; This is one of the pieces of runtime code that's compiled by
344 | ; BEGIN/WHILE/REPEAT, BEGIN/AGAIN, and BEGIN/UNTIL loops. As with 0branch,
345 | ; the number compiled into the dictionary immediately after the branch is
346 | ; the address of the word in the definition that we're branching to.
347 | primitive 'branch',branch
348 | mov si,word[si] ; The instruction pointer has already been
349 | ; incremented to point to the address immediately
350 | ; following the branch statement, which means it's
351 | ; pointing to where our branch-to address is
352 | ; stored. This opcode takes the value pointed to
353 | ; by the SI register and loads it directly into
354 | ; the SI, which is used as Forth's instruction
355 | ; pointer.
356 | jmp next
357 |
358 | ; execute - ( xt -- ) call the word at xt
359 | primitive 'execute',execute
360 | mov di,bx ; Move the jump-to address to the DI register.
361 | pop bx ; Pop the next number on the stack into the TOS.
362 | jmp word[di] ; Jump to the address pointed to by the DI register.
363 |
364 | ; exit - ( -- ) return from the current word
365 | primitive 'exit',exit
366 | mov si,word[bp] ; The BP register is used as Itsy's return stack
367 | ; pointer. The value at its top is the address of
368 | ; the instruction being pointed to before the word
369 | ; currently being executed was called. This opcode
370 | ; loads that address into the SI register.
371 | inc bp ; Now we have to increment BP twice to do a manual
372 | ; "pop" of the return stack pointer.
373 | inc bp ;
374 | jmp next ; We jump to next with the SI now having the address
375 | ; pointing into the word that called the one we're
376 | ; finishing up now. The result is that next will go
377 | ; back into that calling word and pick up where it
378 | ; left off earlier.
379 |
380 | ; -------------------
381 | ; String
382 | ; -------------------
383 |
384 | ; count - ( addr -- addr2 len )
385 | ; count is given the address of a counted string (like the name field of a
386 | ; word definition in Forth, with the first byte being the number of
387 | ; characters in the string and immediately followed by the characters
388 | ; themselves). It returns the length of the string and a pointer to the
389 | ; first actual character in the string.
390 | primitive 'count',count
391 | inc bx ; Increment the address past the length byte so
392 | ; it now points to the actual string.
393 | push bx ; Push the new address onto the stack.
394 | mov bl,byte[bx-1] ; Move the length byte into the lower half of
395 | ; the BX register.
396 | mov bh,0 ; Load a 0 into the upper half of the BX reg.
397 | jmp next
398 |
399 | ; >number - ( double addr len -- double2 addr2 zero ) if successful, or
400 | ; ( double addr len -- int addr2 nonzero ) on error.
401 | ; Convert a string to an unsigned double-precision integer.
402 | ; addr points to a string of len characters which >number attempts to
403 | ; convert to a number using the current number base. >number returns
404 | ; the portion of the string which can't be converted, if any.
405 | ; Note that, as is standard for most Forths, >number attempts to
406 | ; convert a number into a double (most Forths also leave it as a double
407 | ; if they find a decimal point, but >number doesn't check for that) and
408 | ; that it's called with a dummy double value already on the stack.
409 | ; On return, if the top of the stack is 0, the number was successfully
410 | ; converted. If the top of the stack is non-zero, there was an error.
411 | primitive '>number',to_number
412 | ; Start out by loading values from the stack
413 | ; into various registers. Remember that the
414 | ; top of the stack, the string length, is
415 | ; already in bx.
416 | pop di ; Put the address into di.
417 | pop cx ; Put the high word of the double value into cx
418 | pop ax ; and the low word of the double value into ax.
419 | to_numl test bx,bx ; Test the length byte.
420 | je to_numz ; If the string's length is zero, we're done.
421 | ; Jump to end.
422 | push ax ; Push the contents of ax (low word) so we can
423 | ; use it for other things.
424 | mov al,byte[di] ; Get the next byte in the string.
425 | cmp al,'a' ; Compare it to a lower-case 'a'.
426 | jc to_nums ; "jc", "jump if carry", is a little cryptic.
427 | ; I think a better choice of mnemonic would be
428 | ; "jb", "jump if below", for understanding
429 | ; what's going on here. Jump if the next byte
430 | ; in the string is less than 'a'. If the chr
431 | ; is greater than or equal to 'a', then it may
432 | ; be a digit larger than 9 in a hex number.
433 | sub al,32 ; Subtract 32 from the character. If we're
434 | ; converting hexadecimal input, this'll have
435 | ; the effect of converting lower case to
436 | ; upper case.
437 | to_nums cmp al,'9'+1 ; Compare the character to whatever character
438 | ; comes after '9'.
439 | jc to_numg ; If it's '9' or less, it's possibly a decimal
440 | ; digit. Jump for further testing.
441 | cmp al,'A' ; Compare the character with 'A'.
442 | jc to_numh ; If it's one of those punctuation marks
443 | ; between '9' and 'A', we've got an error.
444 | ; Jump to the end.
445 | sub al,7 ; The character is a potentially valid digit
446 | ; for a base larger than 10. Resize it so
447 | ; that 'A' becomes the digit for 11, 'B'
448 | ; signifies a 11, etc.
449 | to_numg sub al,48 ; Convert the digit to its corresponding
450 | ; number. This op could also have been
451 | ; written as "sub al,'0'"
452 | mov ah,0 ; Clear the ah register. The AX reg now
453 | ; contains the numeric value of the new digit.
454 | cmp al,byte[val_base] ; Compare the digit's value to the base.
455 | jnc to_numh ; If the digit's value is above or equal to
456 | ; to the base, we've got an error. Jump to end.
457 | ; (I think using "jae" would be less cryptic.)
458 | ; (NASM's documentation doesn't list jae as a
459 | ; valid opcode, but then again, it doesn't
460 | ; list jnc in its opcode list either.)
461 | xchg ax,dx ; Save the digit value in AX by swapping it
462 | ; the contents of DX. (We don't care what's
463 | ; in DX; it's scratchpad.)
464 | pop ax ; Recall the low word of our accumulated
465 | ; double number and load it into AX.
466 | push dx ; Save the digit value. (The DX register
467 | ; will get clobbered by the upcoming mul.)
468 | xchg ax,cx ; Swap the low and high words of our double
469 | ; number. AX now holds the high word, and
470 | ; CX the low.
471 | mul word[val_base] ; 16-bit multiply the high word by the base.
472 | ; High word of product is in DX, low in AX.
473 | ; But we don't need the high word. It's going
474 | ; to get overwritten by the next mul.
475 | xchg ax,cx ; Save the product of the first mul to the CX
476 | ; register and put the low word of our double
477 | ; number back into AX.
478 | mul word[val_base] ; 16-bit multiply the low word of our converted
479 | ; double number by the base, then add the high
480 | add cx,dx ; word of the product to the low word of the
481 | ; first mul (i.e. do the carry).
482 | pop dx ; Recall the digit value, then add it in to
483 | add ax,dx ; the low word of our accumulated double-
484 | ; precision total.
485 | ; NOTE: One might think, as I did at first,
486 | ; that we need to deal with the carry from
487 | ; this operation. But we just multiplied
488 | ; the number by the base, and then added a
489 | ; number that's already been checked to be
490 | ; smaller than the base. In that case, there
491 | ; will never be a carry out from this
492 | ; addition. Think about it: You multiply a
493 | ; number by 10 and get a new number whose
494 | ; lowest digit is a zero. Then you add another
495 | ; number less than 10 to it. You'll NEVER get
496 | ; a carry from adding zero and a number less
497 | ; than 10.
498 | dec bx ; Decrement the length.
499 | inc di ; Inc the address pointer to the next byte
500 | ; of the string we're converting.
501 | jmp to_numl ; Jump back and convert any remaining
502 | ; characters in the string.
503 | to_numz push ax ; Push the low word of the accumulated total
504 | ; back onto the stack.
505 | to_numh push cx ; Push the high word of the accumulated total
506 | ; back onto the stack.
507 | push di ; Push the string address back onto the stack.
508 | ; Note that the character count is still in
509 | ; BX and is therefore already at the top of
510 | ; the stack. If BX is zero at this point,
511 | ; we've successfully converted the number.
512 | jmp next ; Done. Return to caller.
513 |
514 | ; -----------------------
515 | ; Terminal Input / Output
516 | ; -----------------------
517 |
518 | ; accept - ( addr len -- len2 ) read a string from the terminal
519 | ; accept reads a string of characters from the terminal. The string
520 | ; is stored at addr and can be up to len characters long.
521 | ; accept returns the actual length of the string.
522 | primitive 'accept',accept
523 | pop di ; Pop the address of the string buffer into DI.
524 | xor cx,cx ; Clear the CX register.
525 | acceptl call getchar ; Do the bios call to get a chr from the keyboard.
526 | cmp al,8 ; See if it's a backspace (ASCII character 08h).
527 | jne acceptn ; If not, jump for more testing.
528 | jcxz acceptb ; "Jump if CX=0". If the user typed a backspace but
529 | ; there isn't anything in the buffer to erase, jump
530 | ; to the code that'll beep at him to let him know.
531 | call outchar ; User typed a backspace. Go ahead and output it.
532 | mov al,' ' ; Then output a space to wipe out the character that
533 | call outchar ; the user had just typed.
534 | mov al,8 ; Then output another backspace to put the cursor
535 | call outchar ; back into position to read another character.
536 | dec cx ; We just deleted a character. Now we need to decrement
537 | dec di ; both the counter and the buffer pointer.
538 | jmp acceptl ; Then go back for another character.
539 | acceptn cmp al,13 ; See if the input chr is a carriage return.
540 | je acceptz ; If so, we're done. jump to the end of the routine.
541 | cmp cx,bx ; Compare current string length to the maximum allowed.
542 | jne accepts ; If the string's not too long, jump.
543 | acceptb mov al,7 ; User's input is unusable in some way. Send the
544 | call outchar ; BEL chr to make a beep sound to let him know.
545 | jmp acceptl ; Then go back and let him try again.
546 | accepts stosb ; Save the input character into the buffer. Note that
547 | ; this opcode automatically increments the pointer
548 | ; in the DI register.
549 | inc cx ; But we have to increment the length counter manually.
550 | call outchar ; Echo the input character back to the display.
551 | jmp acceptl ; Go back for another character.
552 | acceptz jcxz acceptb ; If the buffer is empty, beep at the user and go
553 | ; back for more input.
554 | mov al,13 ; Send a carriage return to the display...
555 | call outchar ;
556 | mov al,10 ; ...followed by a linefeed.
557 | call outchar ;
558 | mov bx,cx ; Move the count to the top of the stack.
559 | jmp next ;
560 |
561 | ; word - ( char -- addr ) parse the next word in the input buffer
562 | ; word scans the "terminal input buffer" (whose address is given by the
563 | ; system constant tib) for words to execute, starting at the current
564 | ; address stored in the input buffer pointer >in. The character on the
565 | ; stack when word is called is the one that the code will look for as
566 | ; the separator between words. 999 times out of 1000,; this is going to
567 | ; be a space.
568 | primitive 'word',word
569 | mov di,word[val_dp] ; Load the dictionary pointer into DI.
570 | ; This is going to be the address that
571 | ; we copy the input word to. For the
572 | ; sake of tradition, let's call this
573 | ; scratchpad area the "pad".
574 | push di ; Save the pad pointer to the stack.
575 | mov dx,bx ; Copy the word separator to DX.
576 | mov bx,word[val_t_i_b] ; Load the address of the input buffer
577 | mov cx,bx ; into BX, and save a copy to CX.
578 | add bx,word[val_to_in] ; Add the value of >in to the address
579 | ; of tib to get a pointer into the
580 | ; buffer.
581 | add cx,word[val_number_t_i_b] ; Add the value of #tib to the address
582 | ; of tib to get a pointer to the last
583 | ; chr in the input buffer.
584 | wordf cmp cx,bx ; Compare the current buffer pointer to
585 | ; the end-of-buffer pointer.
586 | je wordz ; If we've reached the end, jump.
587 | mov al,byte[bx] ; Get the next chr from the buffer
588 | inc bx ; and increment the pointer.
589 | cmp al,dl ; See if it's the separator.
590 | je wordf ; If so, jump.
591 | wordc inc di ; Increment our pad pointer. Note that
592 | ; if this is our first time through the
593 | ; routine, we're incrementing to the
594 | ; 2nd address in the pad, leaving the
595 | ; first byte of it empty.
596 | mov byte[di],al ; Write the new chr to the pad.
597 | cmp cx,bx ; Have we reached the end of the
598 | ; input buffer?
599 | je wordz ; If so, jump.
600 | mov al,byte[bx] ; Get another byte from the input
601 | inc bx ; buffer and increment the pointer.
602 | cmp al,dl ; Is the new chr a separator?
603 | jne wordc ; If not, go back for more.
604 | wordz mov byte[di+1],32 ; Write a space at the end of the text
605 | ; we've written so far to the pad.
606 | mov ax,word[val_dp] ; Load the address of the pad into AX.
607 | xchg ax,di ; Swap the pad address with the pad
608 | sub ax,di ; pointer then subtract to get the
609 | ; length of the text in the pad.
610 | ; The result goes into AX, leaving the
611 | ; pad address in DI.
612 | mov byte[di],al ; Save the length byte into the first
613 | ; byte of the pad.
614 | sub bx,word[val_t_i_b] ; Subtract the base address of the
615 | ; input buffer from the pointer value
616 | ; to get the new value of >in...
617 | mov word[val_to_in],bx ; ...then save it to its variable.
618 | pop bx ; Pop the value of the pad address
619 | ; that we saved earlier back out to
620 | ; the top of the stack as our return
621 | ; value.
622 | jmp next
623 |
624 | ; emit - ( char -- ) display char on the terminal
625 | primitive 'emit',emit
626 | xchg ax,bx ; Move our output character to the AX register.
627 | call outchar ; Send it to the display.
628 | pop bx ; Pop the argument off the stack.
629 | jmp next
630 |
631 | getchar mov ah,7 ; This headerless routine does an MS-DOS Int 21h call,
632 | int 021h ; reading a character from the standard input device into
633 | mov ah,0 ; the AL register. We start out by putting a 7 into AH to
634 | ret ; identify the function we want to perform. The character
635 | ; gets returned in AL, and then we manually clear out
636 | ; AH so that we can have a 16-bit result in AX.
637 |
638 | outchar xchg ax,dx ; This headerless routine does an MS-DOS Int 21h call,
639 | mov ah,2 ; sending a character in the DL register to the standard
640 | int 021h ; output device. The 2 in the AH register identifies what
641 | ret ; function we want to perform.
642 |
643 | ; -----------------------
644 | ; Dictionary Search
645 | ; -----------------------
646 |
647 | ; find - ( addr -- addr2 flag ) look up word in the dictionary
648 | ; find looks in the Forth dictionary for a word with the name given in the
649 | ; counted string at addr. One of the following will be returned:
650 | ; flag = 0, addr2 = counted string --> word was not found
651 | ; flag = 1, addr2 = call address --> word is immediate
652 | ; flag = -1, addr2 = call address --> word is not immediate
653 | primitive 'find',find
654 | mov di,val_last ; Get the address of the link field of the last
655 | ; word in the dictionary. Put it in DI.
656 | findl push di ; Save the link field pointer.
657 | push bx ; Save the address of the name we're looking for.
658 | mov cl,byte[bx] ; Copy the length of the string into CL
659 | mov ch,0 ; Clear CH to make a 16 bit counter.
660 | inc cx ; Increment the counter.
661 | findc mov al,byte[di+2] ; Get the length byte of whatever word in the
662 | ; dictionary we're currently looking at.
663 | and al,07Fh ; Mask off the immediate bit.
664 | cmp al,byte[bx] ; Compare it with the length of the string.
665 | je findm ; If they're the same, jump.
666 | pop bx ; Nope, can't be the same if the lengths are
667 | pop di ; different. Pop the saved values back to regs.
668 | mov di,word[di] ; Get the next link address.
669 | test di,di ; See if it's zero. If it's not, then we've not
670 | jne findl ; hit the end of the dictionary yet. Then jump
671 | ; back and check the next word in the dictionary.
672 | findnf push bx ; End of dictionary. Word wasn't found. Push the
673 | ; string address to the stack.
674 | xor bx,bx ; Clear the BX register (make a "false" flag).
675 | jmp next ; Return to caller.
676 | findm inc di ; The lengths match, but do the chrs? Increment
677 | ; the link field pointer. (That may sound weird,
678 | ; especially on the first time through this loop.
679 | ; But remember that, earlier in the loop, we
680 | ; loaded the length byte out the dictionary by an
681 | ; indirect reference to DI+2. We'll do that again
682 | ; in a moment, so what in effect we're actually
683 | ; doing here is incrementing what's now going to
684 | ; be treated as a string pointer for the name in
685 | ; the dictionary as we compare the characters
686 | ; in the strings.)
687 | inc bx ; Increment the pointer to the string we're
688 | ; checking.
689 | loop findc ; Decrements the counter in CX and, if it's not
690 | ; zero yet, loops back. The same code that started
691 | ; out comparing the length bytes will go through
692 | ; and compare the characters in the string with
693 | ; the chrs in the dictionary name we're pointing
694 | ; at.
695 | pop bx ; If we got here, then the strings match. The
696 | ; word is in the dictionary. Pop the string's
697 | ; starting address and throw it away. We don't
698 | ; need it now that we know we're looking at a
699 | ; defined word.
700 | pop di ; Restore the link field address for the dictionary
701 | ; word whose name we just looked at.
702 | mov bx,1 ; Put a 1 at the top of the stack.
703 | inc di ; Increment the pointer past the link field to the
704 | inc di ; name field.
705 | mov al,byte[di] ; Get the length of the word's name.
706 | test al,080h ; See if it's an immediate.
707 | jne findi ; "test" basically performs an AND without
708 | ; actually changing the register. If the
709 | ; immediate bit is set, we'll have a non-zero
710 | ; result and we'll skip the next instruction,
711 | ; leaving a 1 in BX to represent that we found
712 | ; an immediate word.
713 | neg bx ; But if it's not an immediate word, we fall
714 | ; through and generate a -1 instead to get the
715 | ; flag for a non-immediate word.
716 | findi and ax,31 ; Mask off all but the valid part of the name's
717 | ; length byte.
718 | add di,ax ; Add the length to the name field address then
719 | inc di ; add 1 to get the address of the code field.
720 | push di ; Push the CFA onto the stack.
721 | jmp next ; We're done.
722 |
723 | ; -----------------------
724 | ; Colon Definition
725 | ; -----------------------
726 |
727 | ; : - ( -- ) define a new Forth word, taking the name from the input buffer.
728 | ; Ah! We've finally found a word that's actually defined as a Forth colon
729 | ; definition rather than an assembly language routine! Partly, anyway; the
730 | ; first part is Forth code, but the end is the assembly language run-time
731 | ; routine that, incidentally, executes Forth colon definitions. Notice that
732 | ; the first part is not a sequence of opcodes, but rather is a list of
733 | ; code field addresses for the words used in the definition. In each code
734 | ; field of each defined word is an "execution tag", or "xt", a pointer to
735 | ; the runtime code that executes the word. In a Forth colon definition, this
736 | ; is going to be a pointer to the docolon routine we see in the second part
737 | ; of the definition of colon itself below.
738 | colon ':',colon
739 | dw xt_lit,-1 ; If you write a Forth routine where you put an
740 | ; integer number right in the code, such as the
741 | ; 2 in the phrase, "dp @ 2 +", lit is the name
742 | ; of the routine that's called at runtime to put
743 | ; that integer on the stack. Here, lit pushes
744 | ; the -1 stored immediately after it onto the
745 | ; stack.
746 | dw xt_state ; The runtime code for a variable leaves its
747 | ; address on the stack. The address of state,
748 | ; in this case.
749 | dw xt_store ; Store that -1 into state to tell the system
750 | ; that we're switching from interpret mode into
751 | ; compile mode. Other than creating the header,
752 | ; colon doesn't actually compile the words into
753 | ; the new word. That task is performed in
754 | ; interpret, but it needs this new value stored
755 | ; into state to tell it to do so.
756 | dw xt_create ; Now we call the word that's going to create the
757 | ; header for the new colon definition we're going
758 | ; to compile.
759 | dw xt_do_semi_code ; Write, into the code field of the header we just
760 | ; created, the address that immediately follows
761 | ; this statement: the address of the docolon
762 | ; routine, which is the code that's responsible
763 | ; for executing the colon definition we're
764 | ; creating.
765 | docolon dec bp ; Here's the runtime code for colon words.
766 | dec bp ; Basically, what docolon does is similar to
767 | ; calling a subroutine, in that we have to push
768 | ; the return address to the stack. Since the 80x86
769 | ; doesn't directly support more than one stack and
770 | ; the "real" stack is used for data, we have to
771 | ; operate the Forth virtual machine's return stack
772 | ; manually. So, first, we manually decrement the
773 | ; return stack pointer twice to point to where
774 | ; we're going to save the return address.
775 | mov word[bp],si ; Then we write that address directly from the
776 | ; instruction pointer to that location.
777 | lea si,[di+2] ; We now have to tell Forth to start running the
778 | ; words in the colon definition we just started.
779 | ; The value in DI was left pointing at the code
780 | ; field of the word that we just started that just
781 | ; jumped into docolon. By loading into the
782 | ; instruction pointer the value that's 2 bytes
783 | ; later, at the start of the data field, we're
784 | ; loading into the IP the address of the first
785 | ; word in that definition. Execution of the other
786 | ; words in that definition will occur in sequence
787 | ; from here on.
788 | jmp next ; Now that we're pointing to the correct
789 | ; instruction, go do it.
790 |
791 | ; ; - ( -- ) complete the Forth word being compiled
792 | colon ';',semicolon,immediate
793 | ; Note above that ; is immediate, the first such
794 | ; word we've seen here. It needs to be so because
795 | ; it's used only during the compilation of a colon
796 | ; definition and we want it to execute rather than
797 | ; just being stored in the definition.
798 | dw xt_lit,xt_exit ; Put the address of the code field of exit onto
799 | ; the stack.
800 | dw xt_comma ; Store it into the dictionary.
801 | dw xt_lit,0 ; Now put a zero on the stack...
802 | dw xt_state ; along with the address of the state variable.
803 | dw xt_store ; Store the 0 into state to indicate that we're
804 | ; done compiling a word and are now back into
805 | ; interpret mode.
806 | dw xt_exit ; exit is the routine that finishes up the
807 | ; execution of a colon definition and jumps to
808 | ; next in order to start execution of the next
809 | ; word.
810 |
811 | ; -----------------------
812 | ; Headers
813 | ; -----------------------
814 |
815 | ; create - ( -- ) build a header for a new word in the dictionary, taking
816 | ; the name from the input buffer
817 | colon 'create',create
818 | dw xt_dp,xt_fetch ; Get the current dictionary pointer.
819 | dw xt_last,xt_fetch ; Get the LFA of the last word in the dictionary.
820 | dw xt_comma ; Save the value of last at the current point in
821 | ; the dictionary to become the link field for
822 | ; the header we're creating. Remember that comma
823 | ; automatically increments the value of dp.
824 | dw xt_last,xt_store ; Save the address of the link field we just
825 | ; created as the new value of last.
826 | dw xt_lit,32 ; Parse the input buffer for the name of the
827 | dw xt_word ; word we're creating, using a space for the
828 | ; separation character when we invoke word.
829 | ; Remember that word copies the parsed name
830 | ; as a counted string to the location pointed
831 | ; to by dp, which not coincidentally is
832 | ; exactly what and where we need it for the
833 | ; header we're creating.
834 | dw xt_count ; Get the address of the first character of the
835 | ; word's name, and the name's length.
836 | dw xt_plus ; Add the length to the address to get the addr
837 | ; of the first byte after the name, then store
838 | dw xt_dp,xt_store ; that address as the new value of dp.
839 | dw xt_lit,0 ; Put a 0 on the stack, and store it as a dummy
840 | dw xt_comma ; placeholder in the new header's CFA.
841 | dw xt_do_semi_code ; Write, into the code field of the header we just
842 | ; created, the address that immediately follows
843 | ; this statement: the address of the dovar
844 | ; routine, which is the code that's responsible
845 | ; for pushing onto the stack the data field
846 | ; address of the word whose header we just
847 | ; created when it's executed.
848 | dovar push bx ; Push the stack to make room for the new value
849 | ; we're about to put on top.
850 | lea bx,[di+2] ; This opcode loads into bx whatever two plus the
851 | ; value of the contents of DI might be, as opposed
852 | ; to a "mov bx,[di+2]", which would move into BX
853 | ; the value stored in memory at that location.
854 | ; What we're actually doing here is calculating
855 | ; the address of the data field that follows
856 | ; this header so we can leave it on the stack.
857 | jmp next ;
858 |
859 | ; # (;code) - ( -- ) replace the xt of the word being defined with a pointer
860 | ; to the code immediately following (;code)
861 | ; The idea behind this compiler word is that you may have a word that does
862 | ; various compiling/accounting tasks that are defined in terms of Forth code
863 | ; when its being used to compile another word, but afterward, when the new
864 | ; word is executed in interpreter mode, you want your compiling word to do
865 | ; something else that needs to be coded in assembly. (;code) is the word that
866 | ; says, "Okay, that's what you do when you're compiling, but THIS is what
867 | ; you're going to do while executing, so look sharp, it's in assembly!"
868 | ; Somewhat like the word DOES>, which is used in a similar manner to define
869 | ; run-time code in terms of Forth words.
870 | primitive '(;code)',do_semi_code
871 | mov di,word[val_last] ; Get the LFA of the last word in dictionary
872 | ; (i.e. the word we're currently in the middle
873 | ; of compiling) and put it in DI.
874 | mov al,byte[di+2] ; Get the length byte from the name field.
875 | and ax,31 ; Mask off the immediate bit and leave only
876 | ; the 5-bit integer length.
877 | add di,ax ; Add the length to the pointer. If we add 3
878 | ; to the value in DI at this point, we'll
879 | ; have a pointer to the code field.
880 | mov word[di+3],si ; Store the current value of the instruction
881 | ; pointer into the code field. That value is
882 | ; going to point to whatever follows (;code) in
883 | ; the word being compiled, which in the case
884 | ; of (;code) had better be assembly code.
885 | mov si,word[bp] ; Okay, we just did something funky with the
886 | ; instruction pointer; now we have to fix it.
887 | ; Directly load into the instruction pointer
888 | ; the value that's currently at the top of
889 | ; the return stack.
890 | inc bp ; Then manually increment the return stack
891 | inc bp ; pointer.
892 | jmp next ; Done. Go do another word.
893 |
894 | ; -----------------------
895 | ; Constants
896 | ; -----------------------
897 |
898 | ; constant - ( x -- ) create a new constant with the value x, taking the name
899 | ; from the input buffer
900 | colon 'constant',constant
901 | dw xt_create ; Create the constant's header.
902 | dw xt_comma ; Store the constant's value into the word's
903 | ; data field.
904 | dw xt_do_semi_code ; Write, into the code field of the header we just
905 | ; created, the address that immediately follows
906 | ; this statement: the address of the doconst
907 | ; routine, which is the code that's responsible
908 | ; for pushing onto the stack the value that's
909 | ; contained in the data field of the word whose
910 | ; header we just created when that word is
911 | ; invoked.
912 | doconst push bx ; Push the stack down.
913 | mov bx,word[di+2] ; DI should be pointing to the constant's code
914 | ; field. Load into the top of the stack the
915 | ; value 2 bytes further down from the code field,
916 | ; i.e. the constant's actual value.
917 | jmp next ;
918 |
919 |
920 | ; -----------------------
921 | ; Outer Interpreter
922 | ; -----------------------
923 |
924 | ; -------------------------------------------------------
925 | ; NOTE! The following line with the final: label MUST be
926 | ; immediately before the final word definition!
927 | ; -------------------------------------------------------
928 |
929 | final:
930 |
931 | colon 'interpret',interpret
932 | interpt dw xt_number_t_i_b ; Get the number of characters in the input
933 | dw xt_fetch ; buffer.
934 | dw xt_to_in ; Get the index into the input buffer.
935 | dw xt_fetch ;
936 | dw xt_equals ; See if they're the same.
937 | dw xt_zero_branch ; If not, it means there's still some text in
938 | dw intpar ; the buffer. Go process it.
939 | dw xt_t_i_b ; if #tib = >in, we're out of text and need to
940 | dw xt_lit ; read some more. Put a 50 on the stack to tell
941 | dw 50 ; accept to read up to 50 more characters.
942 | dw xt_accept ; Go get more input.
943 | dw xt_number_t_i_b ; Store into #tib the actual number of characters
944 | dw xt_store ; that accept read.
945 | dw xt_lit ; Reposition >in to index the 0th byte in the
946 | dw 0 ; input buffer.
947 | dw xt_to_in ;
948 | dw xt_store ;
949 | intpar dw xt_lit ; Put a 32 on the stack to represent an ASCII
950 | dw 32 ; space character. Then tell word to scan the
951 | dw xt_word ; buffer looking for that character.
952 | dw xt_find ; Once word has parsed out a string, have find
953 | ; see if that string matches the name of any
954 | ; words already defined in the dictionary.
955 | dw xt_dupe ; Copy the flag returned by find, then jump if
956 | dw xt_zero_branch ; it's a zero, meaning that the string doesn't
957 | dw intnf ; match any defined word names.
958 | dw xt_state ; We've got a word match. Are we interpreting or
959 | dw xt_fetch ; do we want to compile it? See if find's flag
960 | dw xt_equals ; matches the current value of state.
961 | dw xt_zero_branch ; If so, we've got an immediate. Jump.
962 | dw intexc ;
963 | dw xt_comma ; Not immediate. Store the word's CFA in the
964 | dw xt_branch ; dictionary then jump to the end of the loop.
965 | dw intdone ;
966 | intexc dw xt_execute ; We found an immediate word. Execute it then
967 | dw xt_branch ; jump to the end of the loop.
968 | dw intdone ;
969 | intnf dw xt_dupe ; Okay, it's not a word. Is it a number? Copy
970 | ; the flag, which we've already proved is 0,
971 | ; thereby creating a double-precision value of
972 | ; 0 at the top of the stack. We'll need this
973 | ; shortly when we call >number.
974 | dw xt_rote ; Rotate the string's address to the top of
975 | ; the stack. Note that it's still a counted
976 | ; string.
977 | dw xt_count ; Use count to split the string's length byte
978 | ; apart from its text.
979 | dw xt_to_number ; See if we can convert the text into a number.
980 | dw xt_zero_branch ; If we get a 0 from 0branch, we got a good
981 | dw intskip ; conversion. Jump and continue.
982 | dw xt_state ; We had a conversion error. Find out whether
983 | dw xt_fetch ; we're interpreting or compiling.
984 | dw xt_zero_branch ; If state=0, we're interpreting. Jump
985 | dw intnc ; further down.
986 | dw xt_last ; We're compiling. Shut the compiler down in an
987 | dw xt_fetch ; orderly manner. Get the LFA of the word we
988 | dw xt_dupe ; were trying to compile. Set aside a copy of it,
989 | dw xt_fetch ; then retrieve from it the LFA of the old "last
990 | dw xt_last ; word" and resave that as the current last word.
991 | dw xt_store ;
992 | dw xt_dp ; Now we have to save the LFA of the word we just
993 | dw xt_store ; tried to compile back into the dictionary
994 | ; pointer.
995 | intnc dw xt_abort ; Whether we were compiling or interpreting,
996 | ; either way we end up here if we had an
997 | ; unsuccessful number conversion. Call abort
998 | ; and reset the system.
999 | intskip dw xt_drop ; >number was successful! Drop the address and
1000 | dw xt_drop ; the high word of the double-precision numeric
1001 | ; value it returned. We don't need either. What's
1002 | ; left on the stack is the single-precision
1003 | ; number we just converted.
1004 | dw xt_state ; Are we compiling or interpreting?
1005 | dw xt_fetch ;
1006 | dw xt_zero_branch ; If we're interpreting, jump on down.
1007 | dw intdone ;
1008 | dw xt_lit ; No, John didn't stutter here. These 4 lines are
1009 | dw xt_lit ; how "['] lit , ," get encoded. We need to store
1010 | dw xt_comma ; lit's own CFA into the word, followed by the
1011 | dw xt_comma ; number we just converted from text input.
1012 | intdone dw xt_branch ; Jump back to the beginning of the interpreter
1013 | dw interpt ; loop and process more input.
1014 |
1015 | freemem:
1016 |
1017 | ; That's it! So, there you have it! Only 33 named Forth words...
1018 | ;
1019 | ; , @ >in dup base word abort 0branch interpret
1020 | ; + ! lit swap last find create constant (;code)
1021 | ; = ; tib drop emit state accept >number
1022 | ; : dp rot #tib exit count execute
1023 | ;
1024 | ; ...plus 6 pieces of headerless code and run-time routines...
1025 | ;
1026 | ; getchar outchar docolon dovar doconst next
1027 | ;
1028 | ; ...are all that's required to produce a functional Forth interpreter
1029 | ; capable of compiling colon definitions, only 978 bytes long! Granted,
1030 | ; it's lacking a number of key critical words that make it nigh unto
1031 | ; impossible to do anything useful, but this just goes to show just
1032 | ; how small a functioning Forth system can be made.
1033 |
1034 |
--------------------------------------------------------------------------------
/msdos/macros.asm:
--------------------------------------------------------------------------------
1 | ; Itsy Forth - Macros
2 | ; Written by John Metcalf
3 | ; Commentary by Mike Adams
4 | ;
5 | ; Itsy Forth was written for use with NASM, the "Netwide Assembler"
6 | ; (http://www.nasm.us/). It uses a number of macros to deal with the tedium
7 | ; of generating the headers for the words that are defined in Itsy's source
8 | ; code file. The macros, and the explanations of what they're doing, are
9 | ; listed below:
10 |
11 | ;--------------------------------------------------------------------------
12 | ; First, two variables are defined for use by the macros:
13 | ; link is the initial value for the first link field that'll
14 | ; be defined. It's value will be updated with each header
15 | ; that's created.
16 | %define link 0
17 |
18 | ; A bitmask that'll be called "immediate" will be used to
19 | ; encode the flag into the length bytes of word names in order
20 | ; to indicate that the word will be of the immediate type.
21 | %define immediate 080h
22 |
23 | ;--------------------------------------------------------------------------
24 | ; The first macro defined is the primary one used by the others, "head".
25 | ; It does the lion's share of the work for the other macros that'll be
26 | ; defined afterwards. Its commands perform the following operations:
27 |
28 | ; The first line of the macro declares it's name as "head".
29 | ; The 4 in this line signifies that it expects to receive
30 | ; 4 parameters when it's invoked: the string that will be the
31 | ; word's name and will be encoded into the header along with
32 | ; the string's name; an "execution tag" name that will have the
33 | ; prefix "xt_" attached to it and will be used as a label for
34 | ; the word's code field; a flag that will be 080h if the word
35 | ; will be immediate and a 0 otherwise; and the label for the
36 | ; word's runtime code, whose address will be put into the
37 | ; word's code field.
38 | %macro head 4
39 |
40 | ; Okay, what we're doing in this odd-looking bit of code is
41 | ; declaring a variable called "%%link" that's local only to this
42 | ; macro and is independent of the earlier variable we declared
43 | ; as "link". It's a label that will represent the current
44 | ; location in the object code we're creating. Then we lay down
45 | ; some actual object code, using the "dw" command to write the
46 | ; current value of "link" into the executable file.
47 | %%link dw link
48 |
49 | ; Here's one of the tricky parts. We now redefine the value of
50 | ; "link" to be whatever the current value of "%%link" is, which
51 | ; is basically the address of the link field that was created
52 | ; during this particular use of this macro. That way, the next
53 | ; time head is called, the value that will be written into the
54 | ; code in the "dw" command above will be whatever the value of
55 | ; "%%link" was during THIS use of the macro. This way, each time
56 | ; head is called, the value that'll be written into the new
57 | ; link field will be the address that was used for the link
58 | ; field the previous time head was called, which is just how
59 | ; we want the link fields to be in a Forth dictionary. Note that
60 | ; the first time that head is called, the value of link was
61 | ; predefined as 0, so that the link field of the first word in
62 | ; the dictionary will contain the value of 0 to mark it as
63 | ; being the first word in the dictionary.
64 | %define link %%link
65 |
66 | ; Now the name field. The first argument passed to head is the
67 | ; string defining the new word's name. The next line in the macro
68 | ; measures the length of the string (the "%1" tells it that it's
69 | ; supposed to look at argument #1) and assigns it to a macro-local
70 | ; variable called "%%count".
71 | %strlen %%count %1
72 |
73 | ; In this next line, we're writing data into the object code on
74 | ; a byte-by-byte basis. We first write a byte consisting of the
75 | ; value of argument 3 (which is 080h if we're writing the header
76 | ; for an immediate word or a 0 otherwise) added to the length of
77 | ; the name string to produce the length byte in the header. Then
78 | ; we write the name string itself into the file.
79 | db %3 + %%count,%1
80 |
81 | ; Okay, don't get confused by the "+" in this next line. Take
82 | ; careful note of the spaces; the actual command is "%+", which
83 | ; is string concatenation, not numeric addition. We're going to
84 | ; splice a string together. The first part consists of the "xt_",
85 | ; then we splice the macro's 2nd argument onto it. The resulting
86 | ; string is used as the head's "execution tag", the address of
87 | ; it's code field. This label is then used for the "dw" command
88 | ; that writes the value of argument #4 (the address of the word's
89 | ; runtime code) into the header's code field.
90 | xt_ %+ %2 dw %4
91 |
92 | ; As you might guess, the next line marks the end of the
93 | ; macro's definition. The entire header's been defined at this
94 | ; point, and we're now ready for the data field, whether it's
95 | ; composed of assembly code, a list of Forth words, or the
96 | ; numeric data for a variable or constant.
97 | %endmacro
98 |
99 | ; For example, calling head with the following line:
100 | ;
101 | ; head,'does>',does,080h,docolon
102 | ;
103 | ; will produce the following header code...
104 | ;
105 | ; dw (address of link of previous header)
106 | ; db 085h,'does>'
107 | ; xt_does dw docolon
108 | ;
109 | ; ...and records the address of this header's link field so that it can
110 | ; be written into the link field of the next word, just as the address
111 | ; of the previous link field was written into this header.
112 | ; This method saves the programmer a lot of tedium in manually generating
113 | ; the code for word headers when writing a Forth system's kernel in
114 | ; assembly language. Note that argument #2 is surrounded by single quotes.
115 | ; That's the format that the assembler expects to see when being told to
116 | ; lay down a string of characters byte-by-byte in a db command, so they
117 | ; have to be present when they're given as an arg to this macro so that
118 | ; the macro puts them in their proper place.
119 |
120 | ;--------------------------------------------------------------------------
121 | ; The next macro is called "primitive", and is used for setting up a header
122 | ; for a word written in assembly language.
123 | ;
124 | ; Here we declare the definition of the macro called "primitive".
125 | ; Note, though, the odd manner in which the number of required
126 | ; arguments is stated. Yes, that really does mean that it can
127 | ; take from 2 to 3 arguments. Well, what does it do if the user
128 | ; only gives it 2? That's what that 0 is: the default value that's
129 | ; to be used for argument #3 if the user doesn't specify it. Most
130 | ; of the time he won't; the only time arg #3 will be specifically
131 | ; given will be if the user is defining an immediate word.
132 | %macro primitive 2-3 0
133 |
134 | ; All primitive does is to pass its arguments on to head, which
135 | ; does most of the actual work. It passes on the word name and
136 | ; the execution tag name as-is. Parameter #3 will be given the
137 | ; default value of 0 unless the user specifically states it.
138 | ; This is meant to allow the user to add "immediate" to the
139 | ; macro invocation to create an immediate word. The 4th arg,
140 | ; "$+2", means that when head goes to write the address of the
141 | ; run-time code into the code field, the address it's going to
142 | ; use will be 2 bytes further along than the code field address,
143 | ; i.e. the address of the start of the code immediately after
144 | ; the code field. (The "$" symbol is used by most assemblers
145 | ; to represent the address of the code that's currently being
146 | ; assembled.)
147 | head %1,%2,%3,$+2
148 |
149 | ; End of the macro definition.
150 | %endmacro
151 |
152 | ;--------------------------------------------------------------------------
153 | ; The macro "colon" operates very similarly to "primitive", except that
154 | ; it's used for colon definitions:
155 | ;
156 | ; Declare the macro, with 2 to 3 arguments, using 0 for the default
157 | ; value of arg #3 if one isn't specifically given.
158 | %macro colon 2-3 0
159 |
160 | ; Pass the args on to head, using docolon as the runtime code.
161 | head %1,%2,%3,docolon
162 |
163 | ; End of macro definition.
164 | %endmacro
165 |
166 | ;--------------------------------------------------------------------------
167 | ; The rest of the macros all require a specific number of arguments, since
168 | ; none of them have the option of being immediate. This one defines
169 | ; a constant:
170 |
171 | ; Macro name is, unsurprisingly, "constant", and gets 3 arguments.
172 | ; As with head and primitive, the first 2 are the word's name and
173 | ; the label name that'll be used for the word. The third argument
174 | ; is the value that we want the constant to hold.
175 | %macro constant 3
176 |
177 | ; Use the head macro. Args 1 and 2, the names, get passed on as-is.
178 | ; Constants are never defined as immediate (though it's an intriguing
179 | ; idea; a constant whose value is one thing when compiling and
180 | ; another when interpreting might be useful for something), so arg #3
181 | ; passed on to head is always a 0, and arg #4 will always be doconst,
182 | ; the address of the runtime code for constants.
183 | head %1,%2,0,doconst
184 |
185 | ; Similar to the way that the label is created for the execution
186 | ; tags, here we create a label for the data field of the constant,
187 | ; though this time we're prefixing the name with "val_" instead
188 | ; of the "xt_" used for the execution tags. Then we use a dw to
189 | ; write constant's arg #3, the constant's value, into the code.
190 | val_ %+ %2 dw %3
191 |
192 | ; End of the definition.
193 | %endmacro
194 |
195 | ;--------------------------------------------------------------------------
196 | ; The macro for variables is very similar to the one for constants.
197 |
198 | ; Macro name "variable", 3 arguments, with arg #3 being the
199 | ; initial value that will be given to the variable.
200 | %macro variable 3
201 |
202 | ; Just like in "constant", except that the runtime code is dovar.
203 | head %1,%2,0,dovar
204 |
205 | ; Exact same line as used in "constant", with the same effects.
206 | val_ %+ %2 dw %3
207 |
208 | ; End of the definition.
209 | %endmacro
210 |
211 | ;--------------------------------------------------------------------------
212 | ;
213 | ; That's the last of the macros. They're accessed through the
214 | ; "%include macros.asm" command near the beginning of Itsy's
215 | ; source code file. Or, if you prefer, you can remove the
216 | ; %include command and splice the above code directly
217 | ; into itsy.asm in its place.
218 | ;
219 | ;--------------------------------------------------------------------------
220 |
221 |
--------------------------------------------------------------------------------
/osx/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | nasm -g -f macho itsy-osx.asm && ld -macosx_version_min 10.5.0 -o itsy itsy-osx.o
3 |
--------------------------------------------------------------------------------
/osx/itsy-osx.asm:
--------------------------------------------------------------------------------
1 | ; Itsy Forth
2 | ; Written by John Metcalf
3 | ; Commentary by John Metcalf and Mike Adams
4 | ; Translated to Linux by github user kt97679
5 | ; Translated to MacOS X by Dylan McNamee
6 |
7 | ; nasm -g -f macho itsy-osx.asm && ld -macosx_version_min 10.5.0 -o itsy itsy-osx.o
8 | ;
9 | ; simple session:
10 | ; ./itsy
11 | ; : hi 72 emit 101 emit 108 emit 108 emit 111 emit 10 emit ;
12 | ; hi
13 | ; Hello
14 | ; ^C
15 | ; TODOs:
16 | ; fix seg fault on Ctrl-D
17 | ; fix that 72 101 108 108 111 emit emit emit emit emit doesn't work.
18 | ; build up Forth control structures - if then else, do while, etc.
19 | ; implement string functions num->string, string literals, ., ."
20 | ; hard-code the mach-o header (as with Linux version) so ld doesn't add 8K of header.
21 | ;--------------------------------------------------------------------------
22 | ; Implementation notes:
23 | ;
24 | ; Register Usage:
25 | ; sp - data stack pointer.
26 | ; bp - return stack pointer.
27 | ; si - Forth instruction pointer.
28 | ; di - pointer to current XT (CFA of word currently being executed).
29 | ; bx - TOS (top of data stack). The top value on the data stack is not
30 | ; actually kept on the CPU's data stack. It's kept in the BX register.
31 | ; Having it in a register like this speeds up the operation of
32 | ; the primitive words. They don't have to take the time to pull a
33 | ; value off of the stack; it's already in a register where it can
34 | ; be used right away!
35 | ; ax, cd, dx - Can all be freely used for processing data. The other
36 | ; registers can still be used also, but only with caution. Their
37 | ; contents must be pushed to the stack and then restored before
38 | ; exiting from the word or calling any other Forth words. LOTS of
39 | ; potential for program crashes if you don't do this correctly.
40 | ; The notable exception is the DI register, which can (and is, below)
41 | ; used pretty freely in assembly code, since the concept of a pointer
42 | ; to the current CFA is rather irrelevant in assembly.
43 | ;
44 | ;
45 | ; Structure of an Itsy word definition:
46 | ; # of
47 | ; Bytes: Description:
48 | ; ------ ---------------------------------------------------------
49 | ; 2 Link Field. Contains the address of the link field of the
50 | ; definition preceding this one in the dictionary. The link
51 | ; field of the first def in the dictionary contains 0.
52 | ; Varies Name Field. The first byte of the name field contains the length
53 | ; of the name; succeeding bytes contain the ASCII characters of
54 | ; the name itself. If the high bit of the length is set, the
55 | ; definition is tagged as being an "immediate" word.
56 | ; 2 Code Field. Contains the address of the executable code for
57 | ; the word. For primitives, this will likely be the address
58 | ; of the word's own data field. Note that the header creation
59 | ; macros automatically generate labels for the code field
60 | ; addresses of the words they're used to define, though the
61 | ; CFA labels aren't visible in the code shown below. The
62 | ; assembler macros create labels, known as "execution tags"
63 | ; or XTs, for the code field of each word.
64 | ; Varies Data Field. Contains either a list of the code field addresses
65 | ; of the words that make up this definition, or assembly-
66 | ; language code for primitives, or numeric data for variables
67 | ; and constants and such.
68 |
69 | %define link 0
70 | %define immediate 080h
71 |
72 | %macro head 4
73 | %%link dd link
74 | %define link %%link
75 | %strlen %%count %1
76 | db %3 + %%count, %1
77 | xt_ %+ %2 dd %4
78 | yt_ %+ %2:
79 | %endmacro
80 |
81 | %macro primitive 2-3 0
82 | head %1, %2, %3, $ + 4
83 | %endmacro
84 |
85 | %macro colon 2-3 0
86 | head %1, %2, %3, docolon
87 | %endmacro
88 |
89 | %macro variable 3
90 | head %1, %2, 0, dovar
91 | val_ %+ %2 dd %3
92 | %endmacro
93 |
94 | %define MEMSIZE 1048576
95 | %define TIBSIZE 80
96 | %define STACKSIZE 4096
97 | %define TIBPTR fstack + MEMSIZE - TIBSIZE ; fheap ;TEXTORG + MEMSIZE - TIBSIZE
98 | %define SP0 TIBPTR - 4
99 | %define RP0 SP0 - STACKSIZE
100 | %define DSTACK RP0 - STACKSIZE
101 |
102 | BITS 32
103 |
104 | section .data
105 |
106 | ; -------------------
107 | ; System Variables
108 | ; -------------------
109 |
110 | ; state - ( -- addr ) true = compiling, false = interpreting
111 | variable 'state', state, 0
112 |
113 | ; >in - ( -- addr ) next character in input buffer
114 | variable '>in', to_in, 0
115 |
116 | ; #tib - ( -- addr ) number of characters in the input buffer
117 | variable '#tib', number_t_i_b, 0
118 |
119 | ; dp - ( -- addr ) first free cell in the dictionary
120 | variable 'dp', dp, DSTACK
121 |
122 | ; base - ( -- addr ) number base
123 | variable 'base', base, 10
124 |
125 | ; last - ( -- addr ) the last word to be defined
126 | ; NOTE: The label "final:" must be placed immediately before
127 | ; the last word defined in this file. If new words are added,
128 | ; make sure they're either added before the "final:" label
129 | ; or the "final:" label is moved to the position immediately
130 | ; before the last word added.
131 | variable 'last', last, final
132 |
133 | ; tib - ( -- addr ) address of the input buffer
134 | variable 'tib', t_i_b, TIBPTR
135 |
136 | section .text
137 | global start
138 | start:
139 | jmp xt_abort+4
140 |
141 | ; this is a separate routine to fix up the stack before the syscall
142 | _mysyscall:
143 | int 80h
144 | ret
145 |
146 | ; execute - ( xt -- ) call the word at xt
147 | primitive 'execute', execute
148 | mov eax, ebx ; eax is important here, it is used by docolon and dovar
149 | pop ebx
150 | jmp dword[eax]
151 |
152 | ; -------------------
153 | ; Initialisation
154 | ; -------------------
155 |
156 | ; abort - ( -- ) initialise Itsy then jump to interpret
157 | primitive 'abort', abort
158 | mov eax, dword[val_number_t_i_b]
159 | mov dword[val_to_in], eax
160 | xor ebp, ebp
161 | mov dword[val_state], ebp
162 | mov esp, SP0
163 | mov ebp, RP0
164 | mov esi, xt_interpret + 4
165 | jmp next
166 |
167 | ; -------------------
168 | ; Compilation
169 | ; -------------------
170 |
171 | ; , - ( x -- ) compile x to the current definition.
172 | ; Stores the number on the stack to the memory location currently
173 | ; pointed to by dp.
174 | primitive ',', comma
175 | xchg eax, ebx
176 | mov ebx, val_dp
177 | mov edi, [ebx]
178 | stosd
179 | mov [ebx], edi
180 | pop ebx
181 | jmp next
182 |
183 | ; lit - ( -- ) push the value in the cell straight after lit.
184 | ; lit is the word that is compiled into a definition when you put a
185 | ; "literal" number in a Forth definition. When your word is compiled,
186 | ; the CFA of lit gets stored in the definition followed immediately
187 | ; by the value of the number you put into the code. At run time, lit
188 | ; pushes the value of your number onto the stack.
189 | primitive 'lit', lit
190 | push ebx
191 | lodsd
192 | xchg eax, ebx
193 | jmp next
194 |
195 | ; -------------------
196 | ; Stack
197 | ; -------------------
198 |
199 | ; rot - ( x y z -- y z x ) rotate x, y and z.
200 | ; Standard Forth word that extracts number 3rd from the top of the stack
201 | ; and puts it on the top, effectively rotating the top 3 values.
202 | primitive 'rot', rote
203 | pop edx
204 | pop eax
205 | push edx
206 | push ebx
207 | xchg eax, ebx
208 | jmp next
209 |
210 | ; drop - ( x -- ) remove x from the stack.
211 | primitive 'drop', drop
212 | pop ebx
213 | jmp next
214 |
215 | ; dup - ( x -- x x ) add a copy of x to the stack
216 | primitive 'dup', dupe
217 | push ebx
218 | jmp next
219 |
220 | ; # swap - ( x y -- y x ) exchange x and y
221 | primitive 'swap', swap
222 | xchg ebx, [esp]
223 | jmp next
224 |
225 | ; -------------------
226 | ; Maths / Logic
227 | ; -------------------
228 |
229 | ; + - ( x y -- z) calculate z=x+y then return z
230 | primitive '+', plus
231 | pop eax
232 | add ebx, eax
233 | jmp next
234 |
235 | primitive '=', equals
236 | pop eax
237 | sub ebx, eax
238 | sub ebx, 1
239 | sbb ebx, ebx
240 | jmp next
241 |
242 | primitive '@', fetch
243 | mov ebx, dword[ebx]
244 | jmp next
245 |
246 | primitive '!', store
247 | pop dword[ebx]
248 | pop ebx
249 | jmp next
250 |
251 | ; break ( -- ) trigger a breakpoint for debugging
252 | primitive 'break', break
253 | int 3
254 | jmp next
255 |
256 | ; ----------------------
257 | ; The inner interpteter (buried in here):
258 | ; ----------------------
259 | primitive 'exit', exit
260 | xchg ebp, esp
261 | pop esi
262 | xchg ebp, esp
263 | next lodsd ; funny that the key part of the inner interpreter is buried here
264 | jmp dword[eax] ; eax is later used by docolon and dovar
265 |
266 | ; -------------------
267 | ; Flow Control
268 | ; -------------------
269 |
270 | ; 0branch - ( x -- ) jump if x is zero
271 | ; This is the primitive word that's compiled as the runtime code in
272 | ; an IF...THEN statement. The number compiled into the word's definition
273 | ; immediately after 0branch is the address of the word in the definition
274 | ; that we're branching to. That address gets loaded into the instruction
275 | ; pointer. In essence, this word sees a false flag (i.e. a zero) and
276 | ; then jumps over the words that comprise the "do this if true" clause
277 | ; of an IF...ELSE...THEN statement.
278 |
279 | primitive '0branch', zero_branch
280 | lodsd
281 | test ebx, ebx
282 | jne zerob_z
283 | xchg eax, esi
284 | zerob_z pop ebx
285 | jmp next
286 |
287 | ; branch - ( addr -- ) unconditional jump
288 | ; This is one of the pieces of runtime code that's compiled by
289 | ; BEGIN/WHILE/REPEAT, BEGIN/AGAIN, and BEGIN/UNTIL loops. As with 0branch,
290 | ; the number compiled into the dictionary immediately after the branch is
291 | ; the address of the word in the definition that we're branching to.
292 | primitive 'branch',branch
293 | mov esi, dword[esi]
294 | jmp next
295 |
296 | ; -----------------------
297 | ; Terminal Input / Output
298 | ; -----------------------
299 |
300 | ; accept - ( addr len -- len2 ) read a string from the terminal
301 | ; accept reads a string of characters from the terminal. The string
302 | ; is stored at addr and can be up to len characters long.
303 | ; accept returns the actual length of the string.
304 |
305 | ; converted Linux syscall to MacOS syscall
306 | ; see: https://filippo.io/making-system-calls-from-assembly-in-mac-os-x/
307 | ; main difference is Linux passes syscall params in registers, MacOS / BSD does it on the stack
308 | ; ( addr len -- len_read )
309 | primitive 'accept', accept
310 | ; ebx has the # bytes to read
311 | ; stdin is handle 0
312 | ; top of stack has the buffer address
313 | pop ecx ; save buffer
314 | push ebx ; count
315 | push ecx ; buffer
316 | push 0 ; stdin
317 | mov eax, 0x3 ; sys_read
318 | call _mysyscall
319 | add esp, 12 ; discard args
320 | xchg ebx, eax ; MacOS / BSD - eax after sys_read contains number of bytes read (negative number means error), let's move it to TOS
321 | dec ebx ; last char is CR
322 | push ebx
323 | jmp next
324 |
325 | ; ( char -- ) emit a character to the terminal
326 | primitive 'emit', emit
327 | ; char to print is in ebx
328 | push ebx ; needs to be in memory - why not stack?
329 | mov ebx, esp ; save stack pointer (pointing at our char)
330 | push 1 ; count
331 | push ebx ; buffer (into the stack)
332 | push 1 ; stdout
333 | mov eax, 0x4 ; sys_write
334 | call _mysyscall
335 | aftemt add esp, 16 ; reset stack
336 | pop ebx ; TOS goes in ebx
337 | jmp next
338 |
339 | ; -------------------
340 | ; String
341 | ; -------------------
342 |
343 | ; count - ( addr -- addr2 len )
344 | ; count is given the address of a "counted string" (like the name field of a
345 | ; word definition in Forth, with the first byte being the number of
346 | ; characters in the string and immediately followed by the characters
347 | ; themselves). It returns the length of the string and a pointer to the
348 | ; first actual character in the string.
349 | primitive 'count',count
350 | movzx eax, byte[ebx]
351 | inc ebx
352 | push ebx
353 | mov ebx, eax
354 | jmp next
355 |
356 | primitive '>number',to_number
357 | pop edi
358 | pop ecx
359 | pop eax
360 | to_numl test ebx, ebx
361 | je to_numz
362 | push eax
363 | movzx eax, byte[edi]
364 | cmp al, 'a'
365 | jc to_nums
366 | sub al, 32
367 | to_nums cmp al, '9' + 1
368 | jc to_numg
369 | cmp al, 'A'
370 | jc to_numh
371 | sub al, 7
372 | to_numg sub al, 48
373 | cmp al, byte[val_base]
374 | jnc to_numh
375 | xchg eax, edx
376 | pop eax
377 | push edx
378 | xchg eax, ecx
379 | mul dword[val_base]
380 | xchg eax, ecx
381 | mul dword[val_base]
382 | add ecx, edx
383 | pop edx
384 | add eax, edx
385 | dec ebx
386 | inc edi
387 | jmp to_numl
388 | to_numz push eax
389 | to_numh push ecx
390 | push edi
391 | jmp next
392 |
393 | ; word - ( char -- addr ) parse the next word in the input buffer
394 | ; word scans the "terminal input buffer" (whose address is given by the
395 | ; system constant tib) for words to execute, starting at the current
396 | ; address stored in the input buffer pointer >in. The character on the
397 | ; stack when word is called is the one that the code will look for as
398 | ; the separator between words. 999 times out of 1000,; this is going to
399 | ; be a space.
400 | primitive 'word', word
401 | mov edi, dword[val_dp]
402 | push edi
403 | mov edx, ebx
404 | mov ebx, dword[val_t_i_b]
405 | mov ecx, ebx
406 | add ebx, dword[val_to_in]
407 | add ecx, dword[val_number_t_i_b]
408 | wordf cmp ecx, ebx
409 | je wordz
410 | mov al, byte[ebx]
411 | inc ebx
412 | cmp al, dl
413 | je wordf
414 | wordc inc edi
415 | mov byte[edi], al
416 | cmp ecx, ebx
417 | je wordz
418 | mov al, byte[ebx]
419 | inc ebx
420 | cmp al, dl
421 | jne wordc
422 | wordz mov byte[edi + 1], 32
423 | mov eax, dword[val_dp]
424 | xchg eax, edi
425 | sub eax, edi
426 | mov byte[edi], al
427 | sub ebx, dword[val_t_i_b]
428 | mov dword[val_to_in], ebx
429 | pop ebx
430 | jmp next
431 |
432 | ; -----------------------
433 | ; Dictionary Search
434 | ; -----------------------
435 |
436 | ; find - ( addr -- addr2 flag ) look up word in the dictionary
437 | ; find looks in the Forth dictionary for a word with the name given in the
438 | ; counted string at addr. One of the following will be returned:
439 | ; flag = 0, addr2 = counted string --> word was not found
440 | ; flag = 1, addr2 = call address --> word is immediate
441 | ; flag = -1, addr2 = call address --> word is not immediate
442 | primitive 'find', find
443 | mov edi, val_last
444 | findl push edi
445 | push ebx
446 | movzx ecx, byte[ebx]
447 | inc ecx
448 | findc mov al, byte[edi + 4]
449 | and al, 07Fh
450 | cmp al, byte[ebx]
451 | je findm
452 | pop ebx
453 | pop edi
454 | mov edi, dword[edi]
455 | test edi, edi
456 | jne findl
457 | findnf push ebx
458 | xor ebx, ebx
459 | jmp next
460 | findm inc edi
461 | inc ebx
462 | loop findc
463 | pop ebx
464 | pop edi
465 | xor ebx, ebx
466 | inc ebx
467 | lea edi, [edi + 4]
468 | mov al, byte[edi]
469 | test al, 080h
470 | jne findi
471 | neg ebx
472 | findi and eax, 31
473 | add edi, eax
474 | inc edi
475 | push edi
476 | jmp next
477 |
478 | ; -----------------------
479 | ; Colon Definition
480 | ; -----------------------
481 |
482 | ; : - ( -- ) define a new Forth word, taking the name from the input buffer.
483 | ; Ah! We've finally found a word that's actually defined as a Forth colon
484 | ; definition rather than an assembly language routine! Partly, anyway; the
485 | ; first part is Forth code, but the end is the assembly language run-time
486 | ; routine that, incidentally, executes Forth colon definitions. Notice that
487 | ; the first part is not a sequence of opcodes, but rather is a list of
488 | ; code field addresses for the words used in the definition. In each code
489 | ; field of each defined word is an "execution tag", or "xt", a pointer to
490 | ; the runtime code that executes the word. In a Forth colon definition, this
491 | ; is going to be a pointer to the docolon routine we see in the second part
492 | ; of the definition of colon itself below.
493 | colon ':', colon
494 | dd xt_lit, -1
495 | dd xt_state
496 | dd xt_store
497 | dd xt_create
498 | dd xt_do_semi_code
499 |
500 | docolon xchg ebp, esp
501 | push esi
502 | xchg ebp, esp
503 | lea esi, [eax + 4] ; eax value is set by next
504 | jmp next
505 |
506 | ; ; - ( -- ) complete the Forth word being compiled
507 | colon ';', semicolon, immediate
508 | dd xt_lit, xt_exit
509 | dd xt_comma
510 | dd xt_lit, 0
511 | dd xt_state
512 | dd xt_store
513 | dd xt_exit
514 |
515 | ; -----------------------
516 | ; Headers
517 | ; -----------------------
518 |
519 | ; create - ( -- ) build a header for a new word in the dictionary, taking
520 | ; the name from the input buffer - a runtime version of 'primitive'
521 | colon 'create', create
522 | dd xt_dp, xt_fetch
523 | dd xt_last, xt_fetch
524 | dd xt_comma
525 | dd xt_last, xt_store
526 | dd xt_lit, 32
527 | dd xt_word
528 | dd xt_count
529 | dd xt_plus
530 | dd xt_dp, xt_store
531 | dd xt_lit, 0
532 | dd xt_comma
533 | dd xt_do_semi_code
534 |
535 | dovar push ebx
536 | lea ebx, [eax + 4] ; eax value is set by next
537 | jmp next
538 |
539 | ; # (;code) - ( -- ) replace the xt of the word being defined with a pointer
540 | ; to the code immediately following (;code)
541 | ; The idea behind this compiler word is that you may have a word that does
542 | ; various compiling/accounting tasks that are defined in terms of Forth code
543 | ; when its being used to compile another word, but afterward, when the new
544 | ; word is executed in interpreter mode, you want your compiling word to do
545 | ; something else that needs to be coded in assembly. (;code) is the word that
546 | ; says, "Okay, that's what you do when you're compiling, but THIS is what
547 | ; you're going to do while executing, so look sharp, it's in assembly!"
548 | ; Somewhat like the word DOES>, which is used in a similar manner to define
549 | ; run-time code in terms of Forth words.
550 | primitive '(;code)', do_semi_code
551 | mov edi, dword[val_last]
552 | mov al, byte[edi + 4]
553 | and eax, 31
554 | add edi, eax
555 | mov dword[edi + 5], esi
556 | xchg ebp, esp
557 | pop esi
558 | xchg esp, ebp
559 | jmp next
560 |
561 | ; -----------------------
562 | ; Constants
563 | ; -----------------------
564 |
565 | ; constant - ( x -- ) create a new constant with the value x, taking the name
566 | ; from the input buffer
567 | colon 'constant',constant
568 | dw xt_create ; Create the constant's header.
569 | dw xt_comma ; Store the constant's value into the word's
570 | ; data field.
571 | dw xt_do_semi_code ; Write, into the code field of the header we just
572 | ; created, the address that immediately follows
573 | ; this statement: the address of the doconst
574 | ; routine, which is the code that's responsible
575 | ; for pushing onto the stack the value that's
576 | ; contained in the data field of the word whose
577 | ; header we just created when that word is
578 | ; invoked.
579 | doconst push bx ; Push the stack down.
580 | mov bx,word[di+2] ; DI should be pointing to the constant's code
581 | ; field. Load into the top of the stack the
582 | ; value 2 bytes further down from the code field,
583 | ; i.e. the constant's actual value.
584 | jmp next ;
585 |
586 | ; -----------------------
587 | ; Outer Interpreter
588 | ; -----------------------
589 |
590 | ; -------------------------------------------------------
591 | ; NOTE! The following line with the final: label MUST be
592 | ; immediately before the final word definition!
593 | ; -------------------------------------------------------
594 |
595 | final:
596 |
597 | colon 'interpret', interpret
598 | interpt dd xt_number_t_i_b
599 | dd xt_fetch
600 | dd xt_to_in
601 | dd xt_fetch
602 | dd xt_equals
603 | dd xt_zero_branch
604 | dd intpar
605 | dd xt_t_i_b
606 | dd xt_fetch
607 | dd xt_lit, 50
608 | dd xt_accept
609 | dd xt_number_t_i_b
610 | dd xt_store
611 | dd xt_lit, 0
612 | dd xt_to_in
613 | dd xt_store
614 | intpar dd xt_lit, 32 ; find a " "
615 | dd xt_word
616 | dd xt_find ; is it in the dictionary?
617 | dd xt_dupe
618 | dd xt_zero_branch
619 | dd intnf
620 | dd xt_state
621 | dd xt_fetch
622 | dd xt_equals
623 | dd xt_zero_branch
624 | dd intexc
625 | dd xt_comma
626 | dd xt_branch
627 | dd intdone
628 | intexc dd xt_execute ; found immediate word - execute it, then loop
629 | dd xt_branch
630 | dd intdone
631 | intnf dd xt_dupe ; not a forth word - is it a number?
632 | dd xt_rote
633 | dd xt_count
634 | dd xt_to_number
635 | dd xt_zero_branch
636 | dd intskip
637 | dd xt_state ; conversion error:
638 | dd xt_fetch ; branch on State (interpreting vs compiling)
639 | dd xt_zero_branch ; interpreting -> loop
640 | dd intnc
641 | dd xt_last ; compiling -> orderly shutdown
642 | dd xt_fetch
643 | dd xt_dupe
644 | dd xt_fetch
645 | dd xt_last
646 | dd xt_store
647 | dd xt_dp
648 | dd xt_store
649 | intnc dd xt_abort
650 | intskip dd xt_drop
651 | dd xt_drop
652 | dd xt_state
653 | dd xt_fetch
654 | dd xt_zero_branch
655 | dd intdone
656 | dd xt_lit
657 | dd xt_lit
658 | dd xt_comma
659 | dd xt_comma
660 | intdone dd xt_branch
661 | dd interpt
662 | dd xt_abort
663 |
664 | colon 'testitsymac', testitsymac
665 | dotest dd xt_t_i_b
666 | dd xt_fetch
667 | dd xt_lit, 1
668 | dd xt_accept
669 | looper dd xt_lit, 50
670 | dd xt_lit, 50
671 | dd xt_emit
672 | dd dotest
673 | freemem:
674 |
675 | section .bss
676 | fstack: resb 16000 + MEMSIZE
677 | ; rstack: resb 2048
678 | ; fheap: resb 2048
679 | ; astack: resb 2048
680 | ; dstack: resb 2048
681 |
682 | filesize equ $ - $$
683 |
--------------------------------------------------------------------------------