├── README.md ├── linux ├── Makefile └── itsy-linux.asm ├── msdos ├── itsy.asm └── macros.asm └── osx ├── Makefile └── itsy-osx.asm /README.md: -------------------------------------------------------------------------------- 1 | itsy-linux 2 | ========== 3 | 4 | Unix (Linux & MacOS X) port of itsy Forth by John Metcalf 5 | 6 | John Metcalf (http://www.retroprogramming.com/) implemented minimal forth system: 7 | 14 | Resulting binary has very impressive size (978 bytes on Linux) and can be used to bootstrap a complete Forth system. 15 | 16 | Original itsy code is producing .com files and can be found in "msdos" directory of this repository. 17 | 18 | kt97679 ported it to 32-bit linux code. 19 | DylanMc ported it to MacOS (also 32 bit). Should move to any BSD pretty easily. 20 | 21 | Sample session: 22 | 23 |
24 | $ make
25 | nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux
26 | chmod +x itsy-linux
27 | $ ./itsy-linux 
28 | : say_hi 72 emit 105 emit 33 emit 10 emit ;
29 | say_hi
30 | Hi!
31 | ^C
32 | $ 
33 | 
34 | 35 | TODO: 36 | 44 | -------------------------------------------------------------------------------- /linux/Makefile: -------------------------------------------------------------------------------- 1 | all: itsy-linux 2 | 3 | itsy-linux: 4 | nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux 5 | chmod +x itsy-linux 6 | clean: 7 | rm itsy-linux.lst itsy-linux 8 | -------------------------------------------------------------------------------- /linux/itsy-linux.asm: -------------------------------------------------------------------------------- 1 | ; nasm itsy-linux.asm -fbin -l itsy-linux.lst -o itsy-linux && chmod +x itsy-linux 2 | 3 | %define link 0 4 | %define immediate 080h 5 | 6 | %macro head 4 7 | %%link dd link 8 | %define link %%link 9 | %strlen %%count %1 10 | db %3 + %%count, %1 11 | xt_ %+ %2 dd %4 12 | %endmacro 13 | 14 | %macro primitive 2-3 0 15 | head %1, %2, %3, $ + 4 16 | %endmacro 17 | 18 | %macro colon 2-3 0 19 | head %1, %2, %3, docolon 20 | %endmacro 21 | 22 | %macro variable 3 23 | head %1, %2, 0, dovar 24 | val_ %+ %2 dd %3 25 | %endmacro 26 | 27 | %define TEXTORG 0x00400000 28 | %define MEMSIZE 1048576 29 | %define TIBSIZE 80 30 | %define STACKSIZE 4096 31 | %define TIBPTR TEXTORG + MEMSIZE - TIBSIZE 32 | %define SP0 TIBPTR - 4 33 | %define RP0 SP0 - STACKSIZE 34 | 35 | BITS 32 36 | org TEXTORG 37 | 38 | ehdr: ; Elf32_Ehdr 39 | db 0x7F, "ELF", 1, 1, 1, 0 ; e_ident 40 | times 8 db 0 41 | dw 2 ; e_type 42 | dw 3 ; e_machine 43 | dd 1 ; e_version 44 | dd xt_abort + 4 ; e_entry 45 | dd phdr - $$ ; e_phoff 46 | dd 0 ; e_shoff 47 | dd 0 ; e_flags 48 | dw ehdrsize ; e_ehsize 49 | dw phdrsize ; e_phentsize 50 | dw 1 ; e_phnum 51 | dw 0 ; e_shentsize 52 | dw 0 ; e_shnum 53 | dw 0 ; e_shstrndx 54 | 55 | ehdrsize equ $ - ehdr 56 | 57 | phdr: ; Elf32_Phdr 58 | dd 1 ; p_type 59 | dd 0 ; p_offset 60 | dd $$ ; p_vaddr 61 | dd $$ ; p_paddr 62 | dd filesize ; p_filesz 63 | dd MEMSIZE ; p_memsz 64 | dd 7 ; p_flags 65 | dd 0x1000 ; p_align 66 | 67 | phdrsize equ $ - phdr 68 | 69 | ; esp - data stack pointer 70 | ; ebp - return stack pointer 71 | ; esi - Forth instruction pointer 72 | ; ebx - TOS (top of data stack) 73 | 74 | variable 'state', state, 0 75 | variable '>in', to_in, 0 76 | variable '#tib', number_t_i_b, 0 77 | variable 'dp', dp, freemem 78 | variable 'base', base, 10 79 | variable 'last', last, final 80 | variable 'tib', t_i_b, TIBPTR 81 | 82 | primitive 'execute', execute 83 | mov eax, ebx ; eax is important here, it is used by docolon and dovar 84 | pop ebx 85 | jmp dword[eax] 86 | 87 | primitive 'abort', abort 88 | mov eax, dword[val_number_t_i_b] 89 | mov dword[val_to_in], eax 90 | xor ebp, ebp 91 | mov dword[val_state], ebp 92 | mov esp, SP0 93 | mov ebp, RP0 94 | mov esi, xt_interpret + 4 95 | jmp next 96 | 97 | primitive ',', comma 98 | xchg eax, ebx 99 | mov ebx, val_dp 100 | mov edi, [ebx] 101 | stosd 102 | mov [ebx], edi 103 | pop ebx 104 | jmp next 105 | 106 | primitive 'lit', lit 107 | push ebx 108 | lodsd 109 | xchg eax, ebx 110 | jmp next 111 | 112 | primitive 'rot', rote 113 | pop edx 114 | pop eax 115 | push edx 116 | push ebx 117 | xchg eax, ebx 118 | jmp next 119 | 120 | primitive 'drop', drop 121 | pop ebx 122 | jmp next 123 | 124 | primitive 'dup', dupe 125 | push ebx 126 | jmp next 127 | 128 | primitive 'swap', swap 129 | xchg ebx, [esp] 130 | jmp next 131 | 132 | primitive '+', plus 133 | pop eax 134 | add ebx, eax 135 | jmp next 136 | 137 | primitive 'exit', exit 138 | xchg ebp, esp 139 | pop esi 140 | xchg ebp, esp 141 | next lodsd 142 | jmp dword[eax] ; eax is later used by docolon and dovar 143 | 144 | primitive '=', equals 145 | pop eax 146 | sub ebx, eax 147 | sub ebx, 1 148 | sbb ebx, ebx 149 | jmp next 150 | 151 | primitive '@', fetch 152 | mov ebx, dword[ebx] 153 | jmp next 154 | 155 | primitive '!', store 156 | pop dword[ebx] 157 | pop ebx 158 | jmp next 159 | 160 | primitive '0branch', zero_branch 161 | lodsd 162 | test ebx, ebx 163 | jne zerob_z 164 | xchg eax, esi 165 | zerob_z pop ebx 166 | jmp next 167 | 168 | primitive 'branch',branch 169 | mov esi, dword[esi] 170 | jmp next 171 | 172 | primitive 'count',count 173 | movzx eax, byte[ebx] 174 | inc ebx 175 | push ebx 176 | mov ebx, eax 177 | jmp next 178 | 179 | primitive 'accept', accept 180 | xor edx, edx 181 | xchg edx, ebx ; now edx contains read byte count and ebx 0 (reading from stdin) 182 | xor eax, eax 183 | mov al, 3 ; sys_read 184 | pop ecx ; buffer 185 | int 80h 186 | xchg ebx, eax ; eax after sys_read contains number of bytes read (negative number means error), let's move it to TOS 187 | dec ebx ; last char is CR 188 | jmp next 189 | 190 | primitive 'emit', emit 191 | push ebx 192 | xor eax, eax 193 | mov al, 4 ; sys_write 194 | xor ebx, ebx 195 | inc ebx ; ebx now contains 1 (stdout) 196 | mov ecx, esp ; buffer 197 | mov edx, ebx ; write byte count 198 | int 80h 199 | pop ebx 200 | pop ebx 201 | jmp next 202 | 203 | primitive '>number',to_number 204 | pop edi 205 | pop ecx 206 | pop eax 207 | to_numl test ebx, ebx 208 | je to_numz 209 | push eax 210 | movzx eax, byte[edi] 211 | cmp al, 'a' 212 | jc to_nums 213 | sub al, 32 214 | to_nums cmp al, '9' + 1 215 | jc to_numg 216 | cmp al, 'A' 217 | jc to_numh 218 | sub al, 7 219 | to_numg sub al, 48 220 | cmp al, byte[val_base] 221 | jnc to_numh 222 | xchg eax, edx 223 | pop eax 224 | push edx 225 | xchg eax, ecx 226 | mul dword[val_base] 227 | xchg eax, ecx 228 | mul dword[val_base] 229 | add ecx, edx 230 | pop edx 231 | add eax, edx 232 | dec ebx 233 | inc edi 234 | jmp to_numl 235 | to_numz push eax 236 | to_numh push ecx 237 | push edi 238 | jmp next 239 | 240 | primitive 'word', word 241 | mov edi, dword[val_dp] 242 | push edi 243 | mov edx, ebx 244 | mov ebx, dword[val_t_i_b] 245 | mov ecx, ebx 246 | add ebx, dword[val_to_in] 247 | add ecx, dword[val_number_t_i_b] 248 | wordf cmp ecx, ebx 249 | je wordz 250 | mov al, byte[ebx] 251 | inc ebx 252 | cmp al, dl 253 | je wordf 254 | wordc inc edi 255 | mov byte[edi], al 256 | cmp ecx, ebx 257 | je wordz 258 | mov al, byte[ebx] 259 | inc ebx 260 | cmp al, dl 261 | jne wordc 262 | wordz mov byte[edi + 1], 32 263 | mov eax, dword[val_dp] 264 | xchg eax, edi 265 | sub eax, edi 266 | mov byte[edi], al 267 | sub ebx, dword[val_t_i_b] 268 | mov dword[val_to_in], ebx 269 | pop ebx 270 | jmp next 271 | 272 | primitive 'find', find 273 | mov edi, val_last 274 | findl push edi 275 | push ebx 276 | movzx ecx, byte[ebx] 277 | inc ecx 278 | findc mov al, byte[edi + 4] 279 | and al, 07Fh 280 | cmp al, byte[ebx] 281 | je findm 282 | pop ebx 283 | pop edi 284 | mov edi, dword[edi] 285 | test edi, edi 286 | jne findl 287 | findnf push ebx 288 | xor ebx, ebx 289 | jmp next 290 | findm inc edi 291 | inc ebx 292 | loop findc 293 | pop ebx 294 | pop edi 295 | xor ebx, ebx 296 | inc ebx 297 | lea edi, [edi + 4] 298 | mov al, byte[edi] 299 | test al, 080h 300 | jne findi 301 | neg ebx 302 | findi and eax, 31 303 | add edi, eax 304 | inc edi 305 | push edi 306 | jmp next 307 | 308 | colon ':', colon 309 | dd xt_lit, -1 310 | dd xt_state 311 | dd xt_store 312 | dd xt_create 313 | dd xt_do_semi_code 314 | 315 | docolon xchg ebp, esp 316 | push esi 317 | xchg ebp, esp 318 | lea esi, [eax + 4] ; eax value is set by next 319 | jmp next 320 | 321 | colon ';', semicolon, immediate 322 | dd xt_lit, xt_exit 323 | dd xt_comma 324 | dd xt_lit, 0 325 | dd xt_state 326 | dd xt_store 327 | dd xt_exit 328 | 329 | colon 'create', create 330 | dd xt_dp, xt_fetch 331 | dd xt_last, xt_fetch 332 | dd xt_comma 333 | dd xt_last, xt_store 334 | dd xt_lit, 32 335 | dd xt_word 336 | dd xt_count 337 | dd xt_plus 338 | dd xt_dp, xt_store 339 | dd xt_lit, 0 340 | dd xt_comma 341 | dd xt_do_semi_code 342 | 343 | dovar push ebx 344 | lea ebx, [eax + 4] ; eax value is set by next 345 | jmp next 346 | 347 | primitive '(;code)', do_semi_code 348 | mov edi, dword[val_last] 349 | mov al, byte[edi + 4] 350 | and eax, 31 351 | add edi, eax 352 | mov dword[edi + 5], esi 353 | xchg ebp, esp 354 | pop esi 355 | xchg esp, ebp 356 | jmp next 357 | 358 | final: 359 | 360 | colon 'interpret', interpret 361 | interpt dd xt_number_t_i_b 362 | dd xt_fetch 363 | dd xt_to_in 364 | dd xt_fetch 365 | dd xt_equals 366 | dd xt_zero_branch 367 | dd intpar 368 | dd xt_t_i_b 369 | dd xt_fetch 370 | dd xt_lit, 50 371 | dd xt_accept 372 | dd xt_number_t_i_b 373 | dd xt_store 374 | dd xt_lit, 0 375 | dd xt_to_in 376 | dd xt_store 377 | intpar dd xt_lit, 32 378 | dd xt_word 379 | dd xt_find 380 | dd xt_dupe 381 | dd xt_zero_branch 382 | dd intnf 383 | dd xt_state 384 | dd xt_fetch 385 | dd xt_equals 386 | dd xt_zero_branch 387 | dd intexc 388 | dd xt_comma 389 | dd xt_branch 390 | dd intdone 391 | intexc dd xt_execute 392 | dd xt_branch 393 | dd intdone 394 | intnf dd xt_dupe 395 | dd xt_rote 396 | dd xt_count 397 | dd xt_to_number 398 | dd xt_zero_branch 399 | dd intskip 400 | dd xt_state 401 | dd xt_fetch 402 | dd xt_zero_branch 403 | dd intnc 404 | dd xt_last 405 | dd xt_fetch 406 | dd xt_dupe 407 | dd xt_fetch 408 | dd xt_last 409 | dd xt_store 410 | dd xt_dp 411 | dd xt_store 412 | intnc dd xt_abort 413 | intskip dd xt_drop 414 | dd xt_drop 415 | dd xt_state 416 | dd xt_fetch 417 | dd xt_zero_branch 418 | dd intdone 419 | dd xt_lit 420 | dd xt_lit 421 | dd xt_comma 422 | dd xt_comma 423 | intdone dd xt_branch 424 | dd interpt 425 | 426 | freemem: 427 | 428 | filesize equ $ - $$ 429 | -------------------------------------------------------------------------------- /msdos/itsy.asm: -------------------------------------------------------------------------------- 1 | ; Itsy Forth 2 | ; Written by John Metcalf 3 | ; Commentary by John Metcalf and Mike Adams 4 | ; 5 | ; Itsy Forth was written for use with NASM, the "Netwide Assembler" 6 | ; that's available for free download (http://www.nasm.us/). 7 | ; The command line for assembling Itsy is: 8 | ; 9 | ; nasm itsy.asm -fbin -o itsy.com 10 | ; 11 | ; If you wish to have an assembly listing, give it this command: 12 | ; 13 | ; nasm itsy.asm -fbin -l itsy.lst -o itsy.com 14 | ; 15 | ;-------------------------------------------------------------------------- 16 | ; Implementation notes: 17 | ; 18 | ; Register Usage: 19 | ; sp - data stack pointer. 20 | ; bp - return stack pointer. 21 | ; si - Forth instruction pointer. 22 | ; di - pointer to current XT (CFA of word currently being executed). 23 | ; bx - TOS (top of data stack). The top value on the data stack is not 24 | ; actually kept on the CPU's data stack. It's kept in the BX register. 25 | ; Having it in a register like this speeds up the operation of 26 | ; the primitive words. They don't have to take the time to pull a 27 | ; value off of the stack; it's already in a register where it can 28 | ; be used right away! 29 | ; ax, cd, dx - Can all be freely used for processing data. The other 30 | ; registers can still be used also, but only with caution. Their 31 | ; contents must be pushed to the stack and then restored before 32 | ; exiting from the word or calling any other Forth words. LOTS of 33 | ; potential for program crashes if you don't do this correctly. 34 | ; The notable exception is the DI register, which can (and is, below) 35 | ; used pretty freely in assembly code, since the concept of a pointer 36 | ; to the current CFA is rather irrelevant in assembly. 37 | ; 38 | ; 39 | ; Structure of an Itsy word definition: 40 | ; # of 41 | ; Bytes: Description: 42 | ; ------ --------------------------------------------------------- 43 | ; 2 Link Field. Contains the address of the link field of the 44 | ; definition preceding this one in the dictionary. The link 45 | ; field of the first def in the dictionary contains 0. 46 | ; Varies Name Field. The first byte of the name field contains the length 47 | ; of the name; succeeding bytes contain the ASCII characters of 48 | ; the name itself. If the high bit of the length is set, the 49 | ; definition is tagged as being an "immediate" word. 50 | ; 2 Code Field. Contains the address of the executable code for 51 | ; the word. For primitives, this will likely be the address 52 | ; of the word's own data field. Note that the header creation 53 | ; macros automatically generate labels for the code field 54 | ; addresses of the words they're used to define, though the 55 | ; CFA labels aren't visible in the code shown below. The 56 | ; assembler macros create labels, known as "execution tags" 57 | ; or XTs, for the code field of each word. 58 | ; Varies Data Field. Contains either a list of the code field addresses 59 | ; of the words that make up this definition, or assembly- 60 | ; language code for primitives, or numeric data for variables 61 | ; and constants and such. 62 | 63 | 64 | ;----------------------------------------------------------------------------- 65 | ; 66 | ; Beginning of actual code. 67 | ; 68 | ; Include the definitions of the macros that are used in NASM to create 69 | ; the headers of the words. See macros.asm for more details. 70 | ;----------------------------------------------------------------------------- 71 | %include "macros.asm" 72 | 73 | ;----------------------------------------------------------------------------- 74 | ; Define the location for the stack. -256 decimal = 0ff00h 75 | ;----------------------------------------------------------------------------- 76 | stack0 equ -256 77 | 78 | ;----------------------------------------------------------------------------- 79 | ; Set the starting point for the executable code. 0100h is the standard 80 | ; origin for programs running under MS-DOS or its equivalents. 81 | ;----------------------------------------------------------------------------- 82 | org 0100h 83 | 84 | ;----------------------------------------------------------------------------- 85 | ; Jump to the location of the start of Itsy's initialization code. 86 | ;----------------------------------------------------------------------------- 87 | jmp xt_abort+2 88 | 89 | ; ------------------- 90 | ; System Variables 91 | ; ------------------- 92 | 93 | ; state - ( -- addr ) true = compiling, false = interpreting 94 | variable 'state',state,0 95 | 96 | ; >in - ( -- addr ) next character in input buffer 97 | variable '>in',to_in,0 98 | 99 | ; #tib - ( -- addr ) number of characters in the input buffer 100 | variable '#tib',number_t_i_b,0 101 | 102 | ; dp - ( -- addr ) first free cell in the dictionary 103 | variable 'dp',dp,freemem 104 | 105 | ; base - ( -- addr ) number base 106 | variable 'base',base,10 107 | 108 | ; last - ( -- addr ) the last word to be defined 109 | ; NOTE: The label "final:" must be placed immediately before 110 | ; the last word defined in this file. If new words are added, 111 | ; make sure they're either added before the "final:" label 112 | ; or the "final:" label is moved to the position immediately 113 | ; before the last word added. 114 | variable 'last',last,final 115 | 116 | ; tib - ( -- addr ) address of the input buffer 117 | constant 'tib',t_i_b,32768 118 | 119 | ; ------------------- 120 | ; Initialisation 121 | ; ------------------- 122 | 123 | ; abort - ( -- ) initialise Itsy then jump to interpret 124 | primitive 'abort',abort 125 | mov ax,word[val_number_t_i_b] ; Load AX with the value contained 126 | ; in the data field of #tib (which 127 | ; was pre-defined above as 0). 128 | mov word[val_to_in],ax ; Save the same number to >in. 129 | xor bp,bp ; Clear the bp register, which is going 130 | ; to be used as the return stack 131 | ; pointer. Since it'll first be 132 | ; decremented when a value is pushed 133 | ; onto it, this means that the first 134 | ; value pushed onto the return stack 135 | ; will be stored at 0FFFEh and 0FFFFh, 136 | ; the very end of memory space, and 137 | ; the stack will grow downward from 138 | ; there. 139 | mov word[val_state],bp ; Clear the value of state. 140 | mov sp,stack0 ; Set the stack pointer to the value 141 | ; defined above. 142 | mov si,xt_interpret+2 ; Initialize Itsy's instruction pointer 143 | ; to the outer interpreter loop. 144 | jmp next ; Jump to the inner interpreter and 145 | ; actually start running Itsy. 146 | 147 | ; ------------------- 148 | ; Compilation 149 | ; ------------------- 150 | 151 | ; , - ( x -- ) compile x to the current definition. 152 | ; Stores the number on the stack to the memory location currently 153 | ; pointed to by dp. 154 | primitive ',',comma 155 | mov di,word[val_dp] ; Put the value of dp into the DI register. 156 | xchg ax,bx ; Move the top of the stack into AX. 157 | stosw ; Store the 16-bit value in AX directly 158 | ; into the address pointed to by DI, and 159 | ; automatically increment DI in the 160 | ; process. 161 | mov word[val_dp],di ; Store the incremented value in DI as the 162 | ; new value for the dictionary pointer. 163 | pop bx ; Pop the new stack top into its proper place. 164 | jmp next ; Go do the next word. 165 | 166 | ; lit - ( -- ) push the value in the cell straight after lit. 167 | ; lit is the word that is compiled into a definition when you put a 168 | ; "literal" number in a Forth definition. When your word is compiled, 169 | ; the CFA of lit gets stored in the definition followed immediately 170 | ; by the value of the number you put into the code. At run time, lit 171 | ; pushes the value of your number onto the stack. 172 | primitive 'lit',lit 173 | push bx ; Push the value in BX to the stack, so that now it'll 174 | ; be 2nd from the top on the stack. The old value is 175 | ; still in BX, though. Now we need to get the new 176 | ; value into BX. 177 | lodsw ; Load into the AX register the 16-bit value pointed 178 | ; to by the SI register (Itsy's instruction pointer, 179 | ; which this op then automatically increments SI by 2). 180 | ; The net result is that we just loaded into AX the 181 | ; 16-bit data immediately following the call to lit, 182 | ; which'll be the data that lit is supposed to load. 183 | xchg ax,bx ; Now swap the contents of the AX and BX registers. 184 | ; lit's data is now in BX, the top of the stack, where 185 | ; we want it. Slick, eh? 186 | jmp next ; Go do the next word. 187 | 188 | ; ------------------- 189 | ; Stack 190 | ; ------------------- 191 | 192 | ; rot - ( x y z -- y z x ) rotate x, y and z. 193 | ; Standard Forth word that extracts number 3rd from the top of the stack 194 | ; and puts it on the top, effectively rotating the top 3 values. 195 | primitive 'rot',rote 196 | pop dx ; Unload "y" from the stack. 197 | pop ax ; Unload "x" from the stack. Remember that "z" is 198 | ; already in BX. 199 | push dx ; Push "y" back onto the stack. 200 | push bx ; Push "z" down into the stack on top of "y". 201 | xchg ax,bx ; Swap "x" into the BX register so that it's now 202 | ; at the top of the stack. 203 | jmp next ; Go do the next word. 204 | 205 | ; drop - ( x -- ) remove x from the stack. 206 | primitive 'drop',drop 207 | pop bx ; Pop the 2nd item on the stack into the BX register, 208 | ; writing over the item that was already at the top 209 | ; of the stack in BX. It's that simple. 210 | jmp next ; Go do the next word. 211 | 212 | ; dup - ( x -- x x ) add a copy of x to the stack 213 | primitive 'dup',dupe 214 | push bx ; Remember that BX is the top of the stack. Push an 215 | ; extra copy of what's in BX onto the stack. 216 | jmp next ; Go do the next word. 217 | 218 | ; # swap - ( x y -- y x ) exchange x and y 219 | primitive 'swap',swap 220 | pop ax ; Pop "x", the number 2nd from the top, into AX. 221 | push bx ; Push "y", the former top of the stack. 222 | xchg ax,bx ; Swap "x" into BX to become the new stack top. We 223 | ; don't care what happens to the value of "y" that 224 | ; ends up in AX because that value is now safely 225 | ; in the stack. 226 | jmp next ; Go do the next word. 227 | 228 | ; ------------------- 229 | ; Maths / Logic 230 | ; ------------------- 231 | 232 | ; + - ( x y -- z) calculate z=x+y then return z 233 | primitive '+',plus 234 | pop ax ; Pop the value of "x" off of the stack. 235 | add bx,ax ; Add "x" to the value of "y" that's at the top of the 236 | ; stack in the BX register. The way the opcode is 237 | ; written, the result is left in the BX register, 238 | ; conveniently at the top of the stack. 239 | jmp next ; Go do the next word. 240 | 241 | ; = - ( x y -- flag ) return true if x=y 242 | primitive '=',equals 243 | pop ax ; Get the "x" value into a register. 244 | sub bx,ax ; Perform BX-AX (or y-x)and leave result in BX. If x and 245 | ; y are equal, this will result in a 0 in BX. But a zero 246 | ; is a false flag in just about all Forth systems, and we 247 | ; want a TRUE flag if the numbers are equal. So... 248 | sub bx,1 ; Subtract 1 from it. If we had a zero before, now we've 249 | ; got a -1 (or 0ffffh), and a carry flag was generated. 250 | ; Any other value in BX will not generate a carry. 251 | sbb bx,bx ; This has the effect of moving the carry bit into the BX 252 | ; register. So, if the numbers were not equal, then the 253 | ; "sub bx,1" didn't generate a carry, so the result will 254 | ; be a 0 in the BX (numbers were not equal, result is 255 | ; false). If the original numbers on the stack were equal, 256 | ; though, then the carry bit was set and then copied 257 | ; into the BX register to act as our true flag. 258 | ; This may seem a bit cryptic, but it produces smaller 259 | ; code and runs faster than a bunch of conditional jumps 260 | ; and immediate loads would. 261 | jmp next ; Go do the next word. 262 | 263 | ; ------------------- 264 | ; Peek and Poke 265 | ; ------------------- 266 | 267 | ; @ - ( addr -- x ) read x from addr 268 | ; "Fetch", as the name of this word is pronounced, reads a 16-bit number from 269 | ; a given memory address, the way the Basic "peek" command does, and leaves 270 | ; it at the top of the stack. 271 | primitive '@',fetch 272 | mov bx,word[bx] ; Read the value in the memory address pointed to by 273 | ; the BX register and move that value directly into 274 | ; BX, replacing the address at the top of the stack. 275 | jmp next ; Go do the next word. 276 | 277 | ; ! - ( x addr -- ) store x at addr 278 | ; Similar to @, ! ("store") writes a value directly to a memory address, like 279 | ; the Basic "poke" command. 280 | primitive '!',store 281 | pop word[bx] ; Okay, this is a bit slick. All in one opcode, we pop 282 | ; the number that's 2nd from the top of the stack 283 | ; (i.e. "x" in the argument list) and send it directly 284 | ; to the memory address pointed to by BX (the address 285 | ; at the top of the stack). 286 | pop bx ; Pop whatever was 3rd from the top of the stack into 287 | ; the BX register to become the new TOS. 288 | jmp next ; Go do the next word. 289 | 290 | ; ------------------- 291 | ; Inner Interpreter 292 | ; ------------------- 293 | 294 | ; This routine is the very heart of the Forth system. After execution, all 295 | ; Forth words jump to this routine, which pulls up the code field address 296 | ; of the next word to be executed and then executes it. Note that next 297 | ; doesn't have a header of its own. 298 | next lodsw ; Load into the AX register the 16-bit value pointed 299 | ; to by the SI register (Itsy's instruction pointer, 300 | ; which this op then automatically increments SI by 2). 301 | ; The net result is that we just loaded into AX the 302 | ; CFA of the next word to be executed and left the 303 | ; instruction pointer pointing to the word that 304 | ; follows the next one. 305 | xchg di,ax ; Move the CFA of the next word into the DI register. 306 | ; We have to do this because the 8086 doesn't have 307 | ; an opcode for "jmp [ax]". 308 | jmp word[di] ; Jump and start executing code at the address pointed 309 | ; to by the value in the DI register. 310 | 311 | ; ------------------- 312 | ; Flow Control 313 | ; ------------------- 314 | 315 | ; 0branch - ( x -- ) jump if x is zero 316 | ; This is the primitive word that's compiled as the runtime code in 317 | ; an IF...THEN statement. The number compiled into the word's definition 318 | ; immediately after 0branch is the address of the word in the definition 319 | ; that we're branching to. That address gets loaded into the instruction 320 | ; pointer. In essence, this word sees a false flag (i.e. a zero) and 321 | ; then jumps over the words that comprise the "do this if true" clause 322 | ; of an IF...ELSE...THEN statement. 323 | primitive '0branch',zero_branch 324 | lodsw ; Load into the AX register the 16-bit value pointed 325 | ; to by the SI register (Itsy's instruction pointer, 326 | ; which this op then automatically increments SI by 2). 327 | ; The net result is that we just loaded into AX the 328 | ; CFA of the next word to be executed and left the 329 | ; instruction pointer pointing to the word that 330 | ; follows the next one. 331 | test bx,bx ; See if there's a 0 at the top of the stack. 332 | jne zerob_z ; If it's not zero, jump. 333 | xchg ax,si ; If the flag is a zero, we want to move the CFA of 334 | ; the word we want to branch to into the Forth 335 | ; instruction pointer. If the TOS was non-zero, the 336 | ; instruction pointer is left still pointing to the CFA 337 | ; of the word that follows the branch reference. 338 | zerob_z pop bx ; Throw away the flag and move everything on the stack 339 | ; up by one spot. 340 | jmp next ; Oh, you know what this does by now... 341 | 342 | ; branch - ( addr -- ) unconditional jump 343 | ; This is one of the pieces of runtime code that's compiled by 344 | ; BEGIN/WHILE/REPEAT, BEGIN/AGAIN, and BEGIN/UNTIL loops. As with 0branch, 345 | ; the number compiled into the dictionary immediately after the branch is 346 | ; the address of the word in the definition that we're branching to. 347 | primitive 'branch',branch 348 | mov si,word[si] ; The instruction pointer has already been 349 | ; incremented to point to the address immediately 350 | ; following the branch statement, which means it's 351 | ; pointing to where our branch-to address is 352 | ; stored. This opcode takes the value pointed to 353 | ; by the SI register and loads it directly into 354 | ; the SI, which is used as Forth's instruction 355 | ; pointer. 356 | jmp next 357 | 358 | ; execute - ( xt -- ) call the word at xt 359 | primitive 'execute',execute 360 | mov di,bx ; Move the jump-to address to the DI register. 361 | pop bx ; Pop the next number on the stack into the TOS. 362 | jmp word[di] ; Jump to the address pointed to by the DI register. 363 | 364 | ; exit - ( -- ) return from the current word 365 | primitive 'exit',exit 366 | mov si,word[bp] ; The BP register is used as Itsy's return stack 367 | ; pointer. The value at its top is the address of 368 | ; the instruction being pointed to before the word 369 | ; currently being executed was called. This opcode 370 | ; loads that address into the SI register. 371 | inc bp ; Now we have to increment BP twice to do a manual 372 | ; "pop" of the return stack pointer. 373 | inc bp ; 374 | jmp next ; We jump to next with the SI now having the address 375 | ; pointing into the word that called the one we're 376 | ; finishing up now. The result is that next will go 377 | ; back into that calling word and pick up where it 378 | ; left off earlier. 379 | 380 | ; ------------------- 381 | ; String 382 | ; ------------------- 383 | 384 | ; count - ( addr -- addr2 len ) 385 | ; count is given the address of a counted string (like the name field of a 386 | ; word definition in Forth, with the first byte being the number of 387 | ; characters in the string and immediately followed by the characters 388 | ; themselves). It returns the length of the string and a pointer to the 389 | ; first actual character in the string. 390 | primitive 'count',count 391 | inc bx ; Increment the address past the length byte so 392 | ; it now points to the actual string. 393 | push bx ; Push the new address onto the stack. 394 | mov bl,byte[bx-1] ; Move the length byte into the lower half of 395 | ; the BX register. 396 | mov bh,0 ; Load a 0 into the upper half of the BX reg. 397 | jmp next 398 | 399 | ; >number - ( double addr len -- double2 addr2 zero ) if successful, or 400 | ; ( double addr len -- int addr2 nonzero ) on error. 401 | ; Convert a string to an unsigned double-precision integer. 402 | ; addr points to a string of len characters which >number attempts to 403 | ; convert to a number using the current number base. >number returns 404 | ; the portion of the string which can't be converted, if any. 405 | ; Note that, as is standard for most Forths, >number attempts to 406 | ; convert a number into a double (most Forths also leave it as a double 407 | ; if they find a decimal point, but >number doesn't check for that) and 408 | ; that it's called with a dummy double value already on the stack. 409 | ; On return, if the top of the stack is 0, the number was successfully 410 | ; converted. If the top of the stack is non-zero, there was an error. 411 | primitive '>number',to_number 412 | ; Start out by loading values from the stack 413 | ; into various registers. Remember that the 414 | ; top of the stack, the string length, is 415 | ; already in bx. 416 | pop di ; Put the address into di. 417 | pop cx ; Put the high word of the double value into cx 418 | pop ax ; and the low word of the double value into ax. 419 | to_numl test bx,bx ; Test the length byte. 420 | je to_numz ; If the string's length is zero, we're done. 421 | ; Jump to end. 422 | push ax ; Push the contents of ax (low word) so we can 423 | ; use it for other things. 424 | mov al,byte[di] ; Get the next byte in the string. 425 | cmp al,'a' ; Compare it to a lower-case 'a'. 426 | jc to_nums ; "jc", "jump if carry", is a little cryptic. 427 | ; I think a better choice of mnemonic would be 428 | ; "jb", "jump if below", for understanding 429 | ; what's going on here. Jump if the next byte 430 | ; in the string is less than 'a'. If the chr 431 | ; is greater than or equal to 'a', then it may 432 | ; be a digit larger than 9 in a hex number. 433 | sub al,32 ; Subtract 32 from the character. If we're 434 | ; converting hexadecimal input, this'll have 435 | ; the effect of converting lower case to 436 | ; upper case. 437 | to_nums cmp al,'9'+1 ; Compare the character to whatever character 438 | ; comes after '9'. 439 | jc to_numg ; If it's '9' or less, it's possibly a decimal 440 | ; digit. Jump for further testing. 441 | cmp al,'A' ; Compare the character with 'A'. 442 | jc to_numh ; If it's one of those punctuation marks 443 | ; between '9' and 'A', we've got an error. 444 | ; Jump to the end. 445 | sub al,7 ; The character is a potentially valid digit 446 | ; for a base larger than 10. Resize it so 447 | ; that 'A' becomes the digit for 11, 'B' 448 | ; signifies a 11, etc. 449 | to_numg sub al,48 ; Convert the digit to its corresponding 450 | ; number. This op could also have been 451 | ; written as "sub al,'0'" 452 | mov ah,0 ; Clear the ah register. The AX reg now 453 | ; contains the numeric value of the new digit. 454 | cmp al,byte[val_base] ; Compare the digit's value to the base. 455 | jnc to_numh ; If the digit's value is above or equal to 456 | ; to the base, we've got an error. Jump to end. 457 | ; (I think using "jae" would be less cryptic.) 458 | ; (NASM's documentation doesn't list jae as a 459 | ; valid opcode, but then again, it doesn't 460 | ; list jnc in its opcode list either.) 461 | xchg ax,dx ; Save the digit value in AX by swapping it 462 | ; the contents of DX. (We don't care what's 463 | ; in DX; it's scratchpad.) 464 | pop ax ; Recall the low word of our accumulated 465 | ; double number and load it into AX. 466 | push dx ; Save the digit value. (The DX register 467 | ; will get clobbered by the upcoming mul.) 468 | xchg ax,cx ; Swap the low and high words of our double 469 | ; number. AX now holds the high word, and 470 | ; CX the low. 471 | mul word[val_base] ; 16-bit multiply the high word by the base. 472 | ; High word of product is in DX, low in AX. 473 | ; But we don't need the high word. It's going 474 | ; to get overwritten by the next mul. 475 | xchg ax,cx ; Save the product of the first mul to the CX 476 | ; register and put the low word of our double 477 | ; number back into AX. 478 | mul word[val_base] ; 16-bit multiply the low word of our converted 479 | ; double number by the base, then add the high 480 | add cx,dx ; word of the product to the low word of the 481 | ; first mul (i.e. do the carry). 482 | pop dx ; Recall the digit value, then add it in to 483 | add ax,dx ; the low word of our accumulated double- 484 | ; precision total. 485 | ; NOTE: One might think, as I did at first, 486 | ; that we need to deal with the carry from 487 | ; this operation. But we just multiplied 488 | ; the number by the base, and then added a 489 | ; number that's already been checked to be 490 | ; smaller than the base. In that case, there 491 | ; will never be a carry out from this 492 | ; addition. Think about it: You multiply a 493 | ; number by 10 and get a new number whose 494 | ; lowest digit is a zero. Then you add another 495 | ; number less than 10 to it. You'll NEVER get 496 | ; a carry from adding zero and a number less 497 | ; than 10. 498 | dec bx ; Decrement the length. 499 | inc di ; Inc the address pointer to the next byte 500 | ; of the string we're converting. 501 | jmp to_numl ; Jump back and convert any remaining 502 | ; characters in the string. 503 | to_numz push ax ; Push the low word of the accumulated total 504 | ; back onto the stack. 505 | to_numh push cx ; Push the high word of the accumulated total 506 | ; back onto the stack. 507 | push di ; Push the string address back onto the stack. 508 | ; Note that the character count is still in 509 | ; BX and is therefore already at the top of 510 | ; the stack. If BX is zero at this point, 511 | ; we've successfully converted the number. 512 | jmp next ; Done. Return to caller. 513 | 514 | ; ----------------------- 515 | ; Terminal Input / Output 516 | ; ----------------------- 517 | 518 | ; accept - ( addr len -- len2 ) read a string from the terminal 519 | ; accept reads a string of characters from the terminal. The string 520 | ; is stored at addr and can be up to len characters long. 521 | ; accept returns the actual length of the string. 522 | primitive 'accept',accept 523 | pop di ; Pop the address of the string buffer into DI. 524 | xor cx,cx ; Clear the CX register. 525 | acceptl call getchar ; Do the bios call to get a chr from the keyboard. 526 | cmp al,8 ; See if it's a backspace (ASCII character 08h). 527 | jne acceptn ; If not, jump for more testing. 528 | jcxz acceptb ; "Jump if CX=0". If the user typed a backspace but 529 | ; there isn't anything in the buffer to erase, jump 530 | ; to the code that'll beep at him to let him know. 531 | call outchar ; User typed a backspace. Go ahead and output it. 532 | mov al,' ' ; Then output a space to wipe out the character that 533 | call outchar ; the user had just typed. 534 | mov al,8 ; Then output another backspace to put the cursor 535 | call outchar ; back into position to read another character. 536 | dec cx ; We just deleted a character. Now we need to decrement 537 | dec di ; both the counter and the buffer pointer. 538 | jmp acceptl ; Then go back for another character. 539 | acceptn cmp al,13 ; See if the input chr is a carriage return. 540 | je acceptz ; If so, we're done. jump to the end of the routine. 541 | cmp cx,bx ; Compare current string length to the maximum allowed. 542 | jne accepts ; If the string's not too long, jump. 543 | acceptb mov al,7 ; User's input is unusable in some way. Send the 544 | call outchar ; BEL chr to make a beep sound to let him know. 545 | jmp acceptl ; Then go back and let him try again. 546 | accepts stosb ; Save the input character into the buffer. Note that 547 | ; this opcode automatically increments the pointer 548 | ; in the DI register. 549 | inc cx ; But we have to increment the length counter manually. 550 | call outchar ; Echo the input character back to the display. 551 | jmp acceptl ; Go back for another character. 552 | acceptz jcxz acceptb ; If the buffer is empty, beep at the user and go 553 | ; back for more input. 554 | mov al,13 ; Send a carriage return to the display... 555 | call outchar ; 556 | mov al,10 ; ...followed by a linefeed. 557 | call outchar ; 558 | mov bx,cx ; Move the count to the top of the stack. 559 | jmp next ; 560 | 561 | ; word - ( char -- addr ) parse the next word in the input buffer 562 | ; word scans the "terminal input buffer" (whose address is given by the 563 | ; system constant tib) for words to execute, starting at the current 564 | ; address stored in the input buffer pointer >in. The character on the 565 | ; stack when word is called is the one that the code will look for as 566 | ; the separator between words. 999 times out of 1000,; this is going to 567 | ; be a space. 568 | primitive 'word',word 569 | mov di,word[val_dp] ; Load the dictionary pointer into DI. 570 | ; This is going to be the address that 571 | ; we copy the input word to. For the 572 | ; sake of tradition, let's call this 573 | ; scratchpad area the "pad". 574 | push di ; Save the pad pointer to the stack. 575 | mov dx,bx ; Copy the word separator to DX. 576 | mov bx,word[val_t_i_b] ; Load the address of the input buffer 577 | mov cx,bx ; into BX, and save a copy to CX. 578 | add bx,word[val_to_in] ; Add the value of >in to the address 579 | ; of tib to get a pointer into the 580 | ; buffer. 581 | add cx,word[val_number_t_i_b] ; Add the value of #tib to the address 582 | ; of tib to get a pointer to the last 583 | ; chr in the input buffer. 584 | wordf cmp cx,bx ; Compare the current buffer pointer to 585 | ; the end-of-buffer pointer. 586 | je wordz ; If we've reached the end, jump. 587 | mov al,byte[bx] ; Get the next chr from the buffer 588 | inc bx ; and increment the pointer. 589 | cmp al,dl ; See if it's the separator. 590 | je wordf ; If so, jump. 591 | wordc inc di ; Increment our pad pointer. Note that 592 | ; if this is our first time through the 593 | ; routine, we're incrementing to the 594 | ; 2nd address in the pad, leaving the 595 | ; first byte of it empty. 596 | mov byte[di],al ; Write the new chr to the pad. 597 | cmp cx,bx ; Have we reached the end of the 598 | ; input buffer? 599 | je wordz ; If so, jump. 600 | mov al,byte[bx] ; Get another byte from the input 601 | inc bx ; buffer and increment the pointer. 602 | cmp al,dl ; Is the new chr a separator? 603 | jne wordc ; If not, go back for more. 604 | wordz mov byte[di+1],32 ; Write a space at the end of the text 605 | ; we've written so far to the pad. 606 | mov ax,word[val_dp] ; Load the address of the pad into AX. 607 | xchg ax,di ; Swap the pad address with the pad 608 | sub ax,di ; pointer then subtract to get the 609 | ; length of the text in the pad. 610 | ; The result goes into AX, leaving the 611 | ; pad address in DI. 612 | mov byte[di],al ; Save the length byte into the first 613 | ; byte of the pad. 614 | sub bx,word[val_t_i_b] ; Subtract the base address of the 615 | ; input buffer from the pointer value 616 | ; to get the new value of >in... 617 | mov word[val_to_in],bx ; ...then save it to its variable. 618 | pop bx ; Pop the value of the pad address 619 | ; that we saved earlier back out to 620 | ; the top of the stack as our return 621 | ; value. 622 | jmp next 623 | 624 | ; emit - ( char -- ) display char on the terminal 625 | primitive 'emit',emit 626 | xchg ax,bx ; Move our output character to the AX register. 627 | call outchar ; Send it to the display. 628 | pop bx ; Pop the argument off the stack. 629 | jmp next 630 | 631 | getchar mov ah,7 ; This headerless routine does an MS-DOS Int 21h call, 632 | int 021h ; reading a character from the standard input device into 633 | mov ah,0 ; the AL register. We start out by putting a 7 into AH to 634 | ret ; identify the function we want to perform. The character 635 | ; gets returned in AL, and then we manually clear out 636 | ; AH so that we can have a 16-bit result in AX. 637 | 638 | outchar xchg ax,dx ; This headerless routine does an MS-DOS Int 21h call, 639 | mov ah,2 ; sending a character in the DL register to the standard 640 | int 021h ; output device. The 2 in the AH register identifies what 641 | ret ; function we want to perform. 642 | 643 | ; ----------------------- 644 | ; Dictionary Search 645 | ; ----------------------- 646 | 647 | ; find - ( addr -- addr2 flag ) look up word in the dictionary 648 | ; find looks in the Forth dictionary for a word with the name given in the 649 | ; counted string at addr. One of the following will be returned: 650 | ; flag = 0, addr2 = counted string --> word was not found 651 | ; flag = 1, addr2 = call address --> word is immediate 652 | ; flag = -1, addr2 = call address --> word is not immediate 653 | primitive 'find',find 654 | mov di,val_last ; Get the address of the link field of the last 655 | ; word in the dictionary. Put it in DI. 656 | findl push di ; Save the link field pointer. 657 | push bx ; Save the address of the name we're looking for. 658 | mov cl,byte[bx] ; Copy the length of the string into CL 659 | mov ch,0 ; Clear CH to make a 16 bit counter. 660 | inc cx ; Increment the counter. 661 | findc mov al,byte[di+2] ; Get the length byte of whatever word in the 662 | ; dictionary we're currently looking at. 663 | and al,07Fh ; Mask off the immediate bit. 664 | cmp al,byte[bx] ; Compare it with the length of the string. 665 | je findm ; If they're the same, jump. 666 | pop bx ; Nope, can't be the same if the lengths are 667 | pop di ; different. Pop the saved values back to regs. 668 | mov di,word[di] ; Get the next link address. 669 | test di,di ; See if it's zero. If it's not, then we've not 670 | jne findl ; hit the end of the dictionary yet. Then jump 671 | ; back and check the next word in the dictionary. 672 | findnf push bx ; End of dictionary. Word wasn't found. Push the 673 | ; string address to the stack. 674 | xor bx,bx ; Clear the BX register (make a "false" flag). 675 | jmp next ; Return to caller. 676 | findm inc di ; The lengths match, but do the chrs? Increment 677 | ; the link field pointer. (That may sound weird, 678 | ; especially on the first time through this loop. 679 | ; But remember that, earlier in the loop, we 680 | ; loaded the length byte out the dictionary by an 681 | ; indirect reference to DI+2. We'll do that again 682 | ; in a moment, so what in effect we're actually 683 | ; doing here is incrementing what's now going to 684 | ; be treated as a string pointer for the name in 685 | ; the dictionary as we compare the characters 686 | ; in the strings.) 687 | inc bx ; Increment the pointer to the string we're 688 | ; checking. 689 | loop findc ; Decrements the counter in CX and, if it's not 690 | ; zero yet, loops back. The same code that started 691 | ; out comparing the length bytes will go through 692 | ; and compare the characters in the string with 693 | ; the chrs in the dictionary name we're pointing 694 | ; at. 695 | pop bx ; If we got here, then the strings match. The 696 | ; word is in the dictionary. Pop the string's 697 | ; starting address and throw it away. We don't 698 | ; need it now that we know we're looking at a 699 | ; defined word. 700 | pop di ; Restore the link field address for the dictionary 701 | ; word whose name we just looked at. 702 | mov bx,1 ; Put a 1 at the top of the stack. 703 | inc di ; Increment the pointer past the link field to the 704 | inc di ; name field. 705 | mov al,byte[di] ; Get the length of the word's name. 706 | test al,080h ; See if it's an immediate. 707 | jne findi ; "test" basically performs an AND without 708 | ; actually changing the register. If the 709 | ; immediate bit is set, we'll have a non-zero 710 | ; result and we'll skip the next instruction, 711 | ; leaving a 1 in BX to represent that we found 712 | ; an immediate word. 713 | neg bx ; But if it's not an immediate word, we fall 714 | ; through and generate a -1 instead to get the 715 | ; flag for a non-immediate word. 716 | findi and ax,31 ; Mask off all but the valid part of the name's 717 | ; length byte. 718 | add di,ax ; Add the length to the name field address then 719 | inc di ; add 1 to get the address of the code field. 720 | push di ; Push the CFA onto the stack. 721 | jmp next ; We're done. 722 | 723 | ; ----------------------- 724 | ; Colon Definition 725 | ; ----------------------- 726 | 727 | ; : - ( -- ) define a new Forth word, taking the name from the input buffer. 728 | ; Ah! We've finally found a word that's actually defined as a Forth colon 729 | ; definition rather than an assembly language routine! Partly, anyway; the 730 | ; first part is Forth code, but the end is the assembly language run-time 731 | ; routine that, incidentally, executes Forth colon definitions. Notice that 732 | ; the first part is not a sequence of opcodes, but rather is a list of 733 | ; code field addresses for the words used in the definition. In each code 734 | ; field of each defined word is an "execution tag", or "xt", a pointer to 735 | ; the runtime code that executes the word. In a Forth colon definition, this 736 | ; is going to be a pointer to the docolon routine we see in the second part 737 | ; of the definition of colon itself below. 738 | colon ':',colon 739 | dw xt_lit,-1 ; If you write a Forth routine where you put an 740 | ; integer number right in the code, such as the 741 | ; 2 in the phrase, "dp @ 2 +", lit is the name 742 | ; of the routine that's called at runtime to put 743 | ; that integer on the stack. Here, lit pushes 744 | ; the -1 stored immediately after it onto the 745 | ; stack. 746 | dw xt_state ; The runtime code for a variable leaves its 747 | ; address on the stack. The address of state, 748 | ; in this case. 749 | dw xt_store ; Store that -1 into state to tell the system 750 | ; that we're switching from interpret mode into 751 | ; compile mode. Other than creating the header, 752 | ; colon doesn't actually compile the words into 753 | ; the new word. That task is performed in 754 | ; interpret, but it needs this new value stored 755 | ; into state to tell it to do so. 756 | dw xt_create ; Now we call the word that's going to create the 757 | ; header for the new colon definition we're going 758 | ; to compile. 759 | dw xt_do_semi_code ; Write, into the code field of the header we just 760 | ; created, the address that immediately follows 761 | ; this statement: the address of the docolon 762 | ; routine, which is the code that's responsible 763 | ; for executing the colon definition we're 764 | ; creating. 765 | docolon dec bp ; Here's the runtime code for colon words. 766 | dec bp ; Basically, what docolon does is similar to 767 | ; calling a subroutine, in that we have to push 768 | ; the return address to the stack. Since the 80x86 769 | ; doesn't directly support more than one stack and 770 | ; the "real" stack is used for data, we have to 771 | ; operate the Forth virtual machine's return stack 772 | ; manually. So, first, we manually decrement the 773 | ; return stack pointer twice to point to where 774 | ; we're going to save the return address. 775 | mov word[bp],si ; Then we write that address directly from the 776 | ; instruction pointer to that location. 777 | lea si,[di+2] ; We now have to tell Forth to start running the 778 | ; words in the colon definition we just started. 779 | ; The value in DI was left pointing at the code 780 | ; field of the word that we just started that just 781 | ; jumped into docolon. By loading into the 782 | ; instruction pointer the value that's 2 bytes 783 | ; later, at the start of the data field, we're 784 | ; loading into the IP the address of the first 785 | ; word in that definition. Execution of the other 786 | ; words in that definition will occur in sequence 787 | ; from here on. 788 | jmp next ; Now that we're pointing to the correct 789 | ; instruction, go do it. 790 | 791 | ; ; - ( -- ) complete the Forth word being compiled 792 | colon ';',semicolon,immediate 793 | ; Note above that ; is immediate, the first such 794 | ; word we've seen here. It needs to be so because 795 | ; it's used only during the compilation of a colon 796 | ; definition and we want it to execute rather than 797 | ; just being stored in the definition. 798 | dw xt_lit,xt_exit ; Put the address of the code field of exit onto 799 | ; the stack. 800 | dw xt_comma ; Store it into the dictionary. 801 | dw xt_lit,0 ; Now put a zero on the stack... 802 | dw xt_state ; along with the address of the state variable. 803 | dw xt_store ; Store the 0 into state to indicate that we're 804 | ; done compiling a word and are now back into 805 | ; interpret mode. 806 | dw xt_exit ; exit is the routine that finishes up the 807 | ; execution of a colon definition and jumps to 808 | ; next in order to start execution of the next 809 | ; word. 810 | 811 | ; ----------------------- 812 | ; Headers 813 | ; ----------------------- 814 | 815 | ; create - ( -- ) build a header for a new word in the dictionary, taking 816 | ; the name from the input buffer 817 | colon 'create',create 818 | dw xt_dp,xt_fetch ; Get the current dictionary pointer. 819 | dw xt_last,xt_fetch ; Get the LFA of the last word in the dictionary. 820 | dw xt_comma ; Save the value of last at the current point in 821 | ; the dictionary to become the link field for 822 | ; the header we're creating. Remember that comma 823 | ; automatically increments the value of dp. 824 | dw xt_last,xt_store ; Save the address of the link field we just 825 | ; created as the new value of last. 826 | dw xt_lit,32 ; Parse the input buffer for the name of the 827 | dw xt_word ; word we're creating, using a space for the 828 | ; separation character when we invoke word. 829 | ; Remember that word copies the parsed name 830 | ; as a counted string to the location pointed 831 | ; to by dp, which not coincidentally is 832 | ; exactly what and where we need it for the 833 | ; header we're creating. 834 | dw xt_count ; Get the address of the first character of the 835 | ; word's name, and the name's length. 836 | dw xt_plus ; Add the length to the address to get the addr 837 | ; of the first byte after the name, then store 838 | dw xt_dp,xt_store ; that address as the new value of dp. 839 | dw xt_lit,0 ; Put a 0 on the stack, and store it as a dummy 840 | dw xt_comma ; placeholder in the new header's CFA. 841 | dw xt_do_semi_code ; Write, into the code field of the header we just 842 | ; created, the address that immediately follows 843 | ; this statement: the address of the dovar 844 | ; routine, which is the code that's responsible 845 | ; for pushing onto the stack the data field 846 | ; address of the word whose header we just 847 | ; created when it's executed. 848 | dovar push bx ; Push the stack to make room for the new value 849 | ; we're about to put on top. 850 | lea bx,[di+2] ; This opcode loads into bx whatever two plus the 851 | ; value of the contents of DI might be, as opposed 852 | ; to a "mov bx,[di+2]", which would move into BX 853 | ; the value stored in memory at that location. 854 | ; What we're actually doing here is calculating 855 | ; the address of the data field that follows 856 | ; this header so we can leave it on the stack. 857 | jmp next ; 858 | 859 | ; # (;code) - ( -- ) replace the xt of the word being defined with a pointer 860 | ; to the code immediately following (;code) 861 | ; The idea behind this compiler word is that you may have a word that does 862 | ; various compiling/accounting tasks that are defined in terms of Forth code 863 | ; when its being used to compile another word, but afterward, when the new 864 | ; word is executed in interpreter mode, you want your compiling word to do 865 | ; something else that needs to be coded in assembly. (;code) is the word that 866 | ; says, "Okay, that's what you do when you're compiling, but THIS is what 867 | ; you're going to do while executing, so look sharp, it's in assembly!" 868 | ; Somewhat like the word DOES>, which is used in a similar manner to define 869 | ; run-time code in terms of Forth words. 870 | primitive '(;code)',do_semi_code 871 | mov di,word[val_last] ; Get the LFA of the last word in dictionary 872 | ; (i.e. the word we're currently in the middle 873 | ; of compiling) and put it in DI. 874 | mov al,byte[di+2] ; Get the length byte from the name field. 875 | and ax,31 ; Mask off the immediate bit and leave only 876 | ; the 5-bit integer length. 877 | add di,ax ; Add the length to the pointer. If we add 3 878 | ; to the value in DI at this point, we'll 879 | ; have a pointer to the code field. 880 | mov word[di+3],si ; Store the current value of the instruction 881 | ; pointer into the code field. That value is 882 | ; going to point to whatever follows (;code) in 883 | ; the word being compiled, which in the case 884 | ; of (;code) had better be assembly code. 885 | mov si,word[bp] ; Okay, we just did something funky with the 886 | ; instruction pointer; now we have to fix it. 887 | ; Directly load into the instruction pointer 888 | ; the value that's currently at the top of 889 | ; the return stack. 890 | inc bp ; Then manually increment the return stack 891 | inc bp ; pointer. 892 | jmp next ; Done. Go do another word. 893 | 894 | ; ----------------------- 895 | ; Constants 896 | ; ----------------------- 897 | 898 | ; constant - ( x -- ) create a new constant with the value x, taking the name 899 | ; from the input buffer 900 | colon 'constant',constant 901 | dw xt_create ; Create the constant's header. 902 | dw xt_comma ; Store the constant's value into the word's 903 | ; data field. 904 | dw xt_do_semi_code ; Write, into the code field of the header we just 905 | ; created, the address that immediately follows 906 | ; this statement: the address of the doconst 907 | ; routine, which is the code that's responsible 908 | ; for pushing onto the stack the value that's 909 | ; contained in the data field of the word whose 910 | ; header we just created when that word is 911 | ; invoked. 912 | doconst push bx ; Push the stack down. 913 | mov bx,word[di+2] ; DI should be pointing to the constant's code 914 | ; field. Load into the top of the stack the 915 | ; value 2 bytes further down from the code field, 916 | ; i.e. the constant's actual value. 917 | jmp next ; 918 | 919 | 920 | ; ----------------------- 921 | ; Outer Interpreter 922 | ; ----------------------- 923 | 924 | ; ------------------------------------------------------- 925 | ; NOTE! The following line with the final: label MUST be 926 | ; immediately before the final word definition! 927 | ; ------------------------------------------------------- 928 | 929 | final: 930 | 931 | colon 'interpret',interpret 932 | interpt dw xt_number_t_i_b ; Get the number of characters in the input 933 | dw xt_fetch ; buffer. 934 | dw xt_to_in ; Get the index into the input buffer. 935 | dw xt_fetch ; 936 | dw xt_equals ; See if they're the same. 937 | dw xt_zero_branch ; If not, it means there's still some text in 938 | dw intpar ; the buffer. Go process it. 939 | dw xt_t_i_b ; if #tib = >in, we're out of text and need to 940 | dw xt_lit ; read some more. Put a 50 on the stack to tell 941 | dw 50 ; accept to read up to 50 more characters. 942 | dw xt_accept ; Go get more input. 943 | dw xt_number_t_i_b ; Store into #tib the actual number of characters 944 | dw xt_store ; that accept read. 945 | dw xt_lit ; Reposition >in to index the 0th byte in the 946 | dw 0 ; input buffer. 947 | dw xt_to_in ; 948 | dw xt_store ; 949 | intpar dw xt_lit ; Put a 32 on the stack to represent an ASCII 950 | dw 32 ; space character. Then tell word to scan the 951 | dw xt_word ; buffer looking for that character. 952 | dw xt_find ; Once word has parsed out a string, have find 953 | ; see if that string matches the name of any 954 | ; words already defined in the dictionary. 955 | dw xt_dupe ; Copy the flag returned by find, then jump if 956 | dw xt_zero_branch ; it's a zero, meaning that the string doesn't 957 | dw intnf ; match any defined word names. 958 | dw xt_state ; We've got a word match. Are we interpreting or 959 | dw xt_fetch ; do we want to compile it? See if find's flag 960 | dw xt_equals ; matches the current value of state. 961 | dw xt_zero_branch ; If so, we've got an immediate. Jump. 962 | dw intexc ; 963 | dw xt_comma ; Not immediate. Store the word's CFA in the 964 | dw xt_branch ; dictionary then jump to the end of the loop. 965 | dw intdone ; 966 | intexc dw xt_execute ; We found an immediate word. Execute it then 967 | dw xt_branch ; jump to the end of the loop. 968 | dw intdone ; 969 | intnf dw xt_dupe ; Okay, it's not a word. Is it a number? Copy 970 | ; the flag, which we've already proved is 0, 971 | ; thereby creating a double-precision value of 972 | ; 0 at the top of the stack. We'll need this 973 | ; shortly when we call >number. 974 | dw xt_rote ; Rotate the string's address to the top of 975 | ; the stack. Note that it's still a counted 976 | ; string. 977 | dw xt_count ; Use count to split the string's length byte 978 | ; apart from its text. 979 | dw xt_to_number ; See if we can convert the text into a number. 980 | dw xt_zero_branch ; If we get a 0 from 0branch, we got a good 981 | dw intskip ; conversion. Jump and continue. 982 | dw xt_state ; We had a conversion error. Find out whether 983 | dw xt_fetch ; we're interpreting or compiling. 984 | dw xt_zero_branch ; If state=0, we're interpreting. Jump 985 | dw intnc ; further down. 986 | dw xt_last ; We're compiling. Shut the compiler down in an 987 | dw xt_fetch ; orderly manner. Get the LFA of the word we 988 | dw xt_dupe ; were trying to compile. Set aside a copy of it, 989 | dw xt_fetch ; then retrieve from it the LFA of the old "last 990 | dw xt_last ; word" and resave that as the current last word. 991 | dw xt_store ; 992 | dw xt_dp ; Now we have to save the LFA of the word we just 993 | dw xt_store ; tried to compile back into the dictionary 994 | ; pointer. 995 | intnc dw xt_abort ; Whether we were compiling or interpreting, 996 | ; either way we end up here if we had an 997 | ; unsuccessful number conversion. Call abort 998 | ; and reset the system. 999 | intskip dw xt_drop ; >number was successful! Drop the address and 1000 | dw xt_drop ; the high word of the double-precision numeric 1001 | ; value it returned. We don't need either. What's 1002 | ; left on the stack is the single-precision 1003 | ; number we just converted. 1004 | dw xt_state ; Are we compiling or interpreting? 1005 | dw xt_fetch ; 1006 | dw xt_zero_branch ; If we're interpreting, jump on down. 1007 | dw intdone ; 1008 | dw xt_lit ; No, John didn't stutter here. These 4 lines are 1009 | dw xt_lit ; how "['] lit , ," get encoded. We need to store 1010 | dw xt_comma ; lit's own CFA into the word, followed by the 1011 | dw xt_comma ; number we just converted from text input. 1012 | intdone dw xt_branch ; Jump back to the beginning of the interpreter 1013 | dw interpt ; loop and process more input. 1014 | 1015 | freemem: 1016 | 1017 | ; That's it! So, there you have it! Only 33 named Forth words... 1018 | ; 1019 | ; , @ >in dup base word abort 0branch interpret 1020 | ; + ! lit swap last find create constant (;code) 1021 | ; = ; tib drop emit state accept >number 1022 | ; : dp rot #tib exit count execute 1023 | ; 1024 | ; ...plus 6 pieces of headerless code and run-time routines... 1025 | ; 1026 | ; getchar outchar docolon dovar doconst next 1027 | ; 1028 | ; ...are all that's required to produce a functional Forth interpreter 1029 | ; capable of compiling colon definitions, only 978 bytes long! Granted, 1030 | ; it's lacking a number of key critical words that make it nigh unto 1031 | ; impossible to do anything useful, but this just goes to show just 1032 | ; how small a functioning Forth system can be made. 1033 | 1034 | -------------------------------------------------------------------------------- /msdos/macros.asm: -------------------------------------------------------------------------------- 1 | ; Itsy Forth - Macros 2 | ; Written by John Metcalf 3 | ; Commentary by Mike Adams 4 | ; 5 | ; Itsy Forth was written for use with NASM, the "Netwide Assembler" 6 | ; (http://www.nasm.us/). It uses a number of macros to deal with the tedium 7 | ; of generating the headers for the words that are defined in Itsy's source 8 | ; code file. The macros, and the explanations of what they're doing, are 9 | ; listed below: 10 | 11 | ;-------------------------------------------------------------------------- 12 | ; First, two variables are defined for use by the macros: 13 | ; link is the initial value for the first link field that'll 14 | ; be defined. It's value will be updated with each header 15 | ; that's created. 16 | %define link 0 17 | 18 | ; A bitmask that'll be called "immediate" will be used to 19 | ; encode the flag into the length bytes of word names in order 20 | ; to indicate that the word will be of the immediate type. 21 | %define immediate 080h 22 | 23 | ;-------------------------------------------------------------------------- 24 | ; The first macro defined is the primary one used by the others, "head". 25 | ; It does the lion's share of the work for the other macros that'll be 26 | ; defined afterwards. Its commands perform the following operations: 27 | 28 | ; The first line of the macro declares it's name as "head". 29 | ; The 4 in this line signifies that it expects to receive 30 | ; 4 parameters when it's invoked: the string that will be the 31 | ; word's name and will be encoded into the header along with 32 | ; the string's name; an "execution tag" name that will have the 33 | ; prefix "xt_" attached to it and will be used as a label for 34 | ; the word's code field; a flag that will be 080h if the word 35 | ; will be immediate and a 0 otherwise; and the label for the 36 | ; word's runtime code, whose address will be put into the 37 | ; word's code field. 38 | %macro head 4 39 | 40 | ; Okay, what we're doing in this odd-looking bit of code is 41 | ; declaring a variable called "%%link" that's local only to this 42 | ; macro and is independent of the earlier variable we declared 43 | ; as "link". It's a label that will represent the current 44 | ; location in the object code we're creating. Then we lay down 45 | ; some actual object code, using the "dw" command to write the 46 | ; current value of "link" into the executable file. 47 | %%link dw link 48 | 49 | ; Here's one of the tricky parts. We now redefine the value of 50 | ; "link" to be whatever the current value of "%%link" is, which 51 | ; is basically the address of the link field that was created 52 | ; during this particular use of this macro. That way, the next 53 | ; time head is called, the value that will be written into the 54 | ; code in the "dw" command above will be whatever the value of 55 | ; "%%link" was during THIS use of the macro. This way, each time 56 | ; head is called, the value that'll be written into the new 57 | ; link field will be the address that was used for the link 58 | ; field the previous time head was called, which is just how 59 | ; we want the link fields to be in a Forth dictionary. Note that 60 | ; the first time that head is called, the value of link was 61 | ; predefined as 0, so that the link field of the first word in 62 | ; the dictionary will contain the value of 0 to mark it as 63 | ; being the first word in the dictionary. 64 | %define link %%link 65 | 66 | ; Now the name field. The first argument passed to head is the 67 | ; string defining the new word's name. The next line in the macro 68 | ; measures the length of the string (the "%1" tells it that it's 69 | ; supposed to look at argument #1) and assigns it to a macro-local 70 | ; variable called "%%count". 71 | %strlen %%count %1 72 | 73 | ; In this next line, we're writing data into the object code on 74 | ; a byte-by-byte basis. We first write a byte consisting of the 75 | ; value of argument 3 (which is 080h if we're writing the header 76 | ; for an immediate word or a 0 otherwise) added to the length of 77 | ; the name string to produce the length byte in the header. Then 78 | ; we write the name string itself into the file. 79 | db %3 + %%count,%1 80 | 81 | ; Okay, don't get confused by the "+" in this next line. Take 82 | ; careful note of the spaces; the actual command is "%+", which 83 | ; is string concatenation, not numeric addition. We're going to 84 | ; splice a string together. The first part consists of the "xt_", 85 | ; then we splice the macro's 2nd argument onto it. The resulting 86 | ; string is used as the head's "execution tag", the address of 87 | ; it's code field. This label is then used for the "dw" command 88 | ; that writes the value of argument #4 (the address of the word's 89 | ; runtime code) into the header's code field. 90 | xt_ %+ %2 dw %4 91 | 92 | ; As you might guess, the next line marks the end of the 93 | ; macro's definition. The entire header's been defined at this 94 | ; point, and we're now ready for the data field, whether it's 95 | ; composed of assembly code, a list of Forth words, or the 96 | ; numeric data for a variable or constant. 97 | %endmacro 98 | 99 | ; For example, calling head with the following line: 100 | ; 101 | ; head,'does>',does,080h,docolon 102 | ; 103 | ; will produce the following header code... 104 | ; 105 | ; dw (address of link of previous header) 106 | ; db 085h,'does>' 107 | ; xt_does dw docolon 108 | ; 109 | ; ...and records the address of this header's link field so that it can 110 | ; be written into the link field of the next word, just as the address 111 | ; of the previous link field was written into this header. 112 | ; This method saves the programmer a lot of tedium in manually generating 113 | ; the code for word headers when writing a Forth system's kernel in 114 | ; assembly language. Note that argument #2 is surrounded by single quotes. 115 | ; That's the format that the assembler expects to see when being told to 116 | ; lay down a string of characters byte-by-byte in a db command, so they 117 | ; have to be present when they're given as an arg to this macro so that 118 | ; the macro puts them in their proper place. 119 | 120 | ;-------------------------------------------------------------------------- 121 | ; The next macro is called "primitive", and is used for setting up a header 122 | ; for a word written in assembly language. 123 | ; 124 | ; Here we declare the definition of the macro called "primitive". 125 | ; Note, though, the odd manner in which the number of required 126 | ; arguments is stated. Yes, that really does mean that it can 127 | ; take from 2 to 3 arguments. Well, what does it do if the user 128 | ; only gives it 2? That's what that 0 is: the default value that's 129 | ; to be used for argument #3 if the user doesn't specify it. Most 130 | ; of the time he won't; the only time arg #3 will be specifically 131 | ; given will be if the user is defining an immediate word. 132 | %macro primitive 2-3 0 133 | 134 | ; All primitive does is to pass its arguments on to head, which 135 | ; does most of the actual work. It passes on the word name and 136 | ; the execution tag name as-is. Parameter #3 will be given the 137 | ; default value of 0 unless the user specifically states it. 138 | ; This is meant to allow the user to add "immediate" to the 139 | ; macro invocation to create an immediate word. The 4th arg, 140 | ; "$+2", means that when head goes to write the address of the 141 | ; run-time code into the code field, the address it's going to 142 | ; use will be 2 bytes further along than the code field address, 143 | ; i.e. the address of the start of the code immediately after 144 | ; the code field. (The "$" symbol is used by most assemblers 145 | ; to represent the address of the code that's currently being 146 | ; assembled.) 147 | head %1,%2,%3,$+2 148 | 149 | ; End of the macro definition. 150 | %endmacro 151 | 152 | ;-------------------------------------------------------------------------- 153 | ; The macro "colon" operates very similarly to "primitive", except that 154 | ; it's used for colon definitions: 155 | ; 156 | ; Declare the macro, with 2 to 3 arguments, using 0 for the default 157 | ; value of arg #3 if one isn't specifically given. 158 | %macro colon 2-3 0 159 | 160 | ; Pass the args on to head, using docolon as the runtime code. 161 | head %1,%2,%3,docolon 162 | 163 | ; End of macro definition. 164 | %endmacro 165 | 166 | ;-------------------------------------------------------------------------- 167 | ; The rest of the macros all require a specific number of arguments, since 168 | ; none of them have the option of being immediate. This one defines 169 | ; a constant: 170 | 171 | ; Macro name is, unsurprisingly, "constant", and gets 3 arguments. 172 | ; As with head and primitive, the first 2 are the word's name and 173 | ; the label name that'll be used for the word. The third argument 174 | ; is the value that we want the constant to hold. 175 | %macro constant 3 176 | 177 | ; Use the head macro. Args 1 and 2, the names, get passed on as-is. 178 | ; Constants are never defined as immediate (though it's an intriguing 179 | ; idea; a constant whose value is one thing when compiling and 180 | ; another when interpreting might be useful for something), so arg #3 181 | ; passed on to head is always a 0, and arg #4 will always be doconst, 182 | ; the address of the runtime code for constants. 183 | head %1,%2,0,doconst 184 | 185 | ; Similar to the way that the label is created for the execution 186 | ; tags, here we create a label for the data field of the constant, 187 | ; though this time we're prefixing the name with "val_" instead 188 | ; of the "xt_" used for the execution tags. Then we use a dw to 189 | ; write constant's arg #3, the constant's value, into the code. 190 | val_ %+ %2 dw %3 191 | 192 | ; End of the definition. 193 | %endmacro 194 | 195 | ;-------------------------------------------------------------------------- 196 | ; The macro for variables is very similar to the one for constants. 197 | 198 | ; Macro name "variable", 3 arguments, with arg #3 being the 199 | ; initial value that will be given to the variable. 200 | %macro variable 3 201 | 202 | ; Just like in "constant", except that the runtime code is dovar. 203 | head %1,%2,0,dovar 204 | 205 | ; Exact same line as used in "constant", with the same effects. 206 | val_ %+ %2 dw %3 207 | 208 | ; End of the definition. 209 | %endmacro 210 | 211 | ;-------------------------------------------------------------------------- 212 | ; 213 | ; That's the last of the macros. They're accessed through the 214 | ; "%include macros.asm" command near the beginning of Itsy's 215 | ; source code file. Or, if you prefer, you can remove the 216 | ; %include command and splice the above code directly 217 | ; into itsy.asm in its place. 218 | ; 219 | ;-------------------------------------------------------------------------- 220 | 221 | -------------------------------------------------------------------------------- /osx/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | nasm -g -f macho itsy-osx.asm && ld -macosx_version_min 10.5.0 -o itsy itsy-osx.o 3 | -------------------------------------------------------------------------------- /osx/itsy-osx.asm: -------------------------------------------------------------------------------- 1 | ; Itsy Forth 2 | ; Written by John Metcalf 3 | ; Commentary by John Metcalf and Mike Adams 4 | ; Translated to Linux by github user kt97679 5 | ; Translated to MacOS X by Dylan McNamee 6 | 7 | ; nasm -g -f macho itsy-osx.asm && ld -macosx_version_min 10.5.0 -o itsy itsy-osx.o 8 | ; 9 | ; simple session: 10 | ; ./itsy 11 | ; : hi 72 emit 101 emit 108 emit 108 emit 111 emit 10 emit ; 12 | ; hi 13 | ; Hello 14 | ; ^C 15 | ; TODOs: 16 | ; fix seg fault on Ctrl-D 17 | ; fix that 72 101 108 108 111 emit emit emit emit emit doesn't work. 18 | ; build up Forth control structures - if then else, do while, etc. 19 | ; implement string functions num->string, string literals, ., ." 20 | ; hard-code the mach-o header (as with Linux version) so ld doesn't add 8K of header. 21 | ;-------------------------------------------------------------------------- 22 | ; Implementation notes: 23 | ; 24 | ; Register Usage: 25 | ; sp - data stack pointer. 26 | ; bp - return stack pointer. 27 | ; si - Forth instruction pointer. 28 | ; di - pointer to current XT (CFA of word currently being executed). 29 | ; bx - TOS (top of data stack). The top value on the data stack is not 30 | ; actually kept on the CPU's data stack. It's kept in the BX register. 31 | ; Having it in a register like this speeds up the operation of 32 | ; the primitive words. They don't have to take the time to pull a 33 | ; value off of the stack; it's already in a register where it can 34 | ; be used right away! 35 | ; ax, cd, dx - Can all be freely used for processing data. The other 36 | ; registers can still be used also, but only with caution. Their 37 | ; contents must be pushed to the stack and then restored before 38 | ; exiting from the word or calling any other Forth words. LOTS of 39 | ; potential for program crashes if you don't do this correctly. 40 | ; The notable exception is the DI register, which can (and is, below) 41 | ; used pretty freely in assembly code, since the concept of a pointer 42 | ; to the current CFA is rather irrelevant in assembly. 43 | ; 44 | ; 45 | ; Structure of an Itsy word definition: 46 | ; # of 47 | ; Bytes: Description: 48 | ; ------ --------------------------------------------------------- 49 | ; 2 Link Field. Contains the address of the link field of the 50 | ; definition preceding this one in the dictionary. The link 51 | ; field of the first def in the dictionary contains 0. 52 | ; Varies Name Field. The first byte of the name field contains the length 53 | ; of the name; succeeding bytes contain the ASCII characters of 54 | ; the name itself. If the high bit of the length is set, the 55 | ; definition is tagged as being an "immediate" word. 56 | ; 2 Code Field. Contains the address of the executable code for 57 | ; the word. For primitives, this will likely be the address 58 | ; of the word's own data field. Note that the header creation 59 | ; macros automatically generate labels for the code field 60 | ; addresses of the words they're used to define, though the 61 | ; CFA labels aren't visible in the code shown below. The 62 | ; assembler macros create labels, known as "execution tags" 63 | ; or XTs, for the code field of each word. 64 | ; Varies Data Field. Contains either a list of the code field addresses 65 | ; of the words that make up this definition, or assembly- 66 | ; language code for primitives, or numeric data for variables 67 | ; and constants and such. 68 | 69 | %define link 0 70 | %define immediate 080h 71 | 72 | %macro head 4 73 | %%link dd link 74 | %define link %%link 75 | %strlen %%count %1 76 | db %3 + %%count, %1 77 | xt_ %+ %2 dd %4 78 | yt_ %+ %2: 79 | %endmacro 80 | 81 | %macro primitive 2-3 0 82 | head %1, %2, %3, $ + 4 83 | %endmacro 84 | 85 | %macro colon 2-3 0 86 | head %1, %2, %3, docolon 87 | %endmacro 88 | 89 | %macro variable 3 90 | head %1, %2, 0, dovar 91 | val_ %+ %2 dd %3 92 | %endmacro 93 | 94 | %define MEMSIZE 1048576 95 | %define TIBSIZE 80 96 | %define STACKSIZE 4096 97 | %define TIBPTR fstack + MEMSIZE - TIBSIZE ; fheap ;TEXTORG + MEMSIZE - TIBSIZE 98 | %define SP0 TIBPTR - 4 99 | %define RP0 SP0 - STACKSIZE 100 | %define DSTACK RP0 - STACKSIZE 101 | 102 | BITS 32 103 | 104 | section .data 105 | 106 | ; ------------------- 107 | ; System Variables 108 | ; ------------------- 109 | 110 | ; state - ( -- addr ) true = compiling, false = interpreting 111 | variable 'state', state, 0 112 | 113 | ; >in - ( -- addr ) next character in input buffer 114 | variable '>in', to_in, 0 115 | 116 | ; #tib - ( -- addr ) number of characters in the input buffer 117 | variable '#tib', number_t_i_b, 0 118 | 119 | ; dp - ( -- addr ) first free cell in the dictionary 120 | variable 'dp', dp, DSTACK 121 | 122 | ; base - ( -- addr ) number base 123 | variable 'base', base, 10 124 | 125 | ; last - ( -- addr ) the last word to be defined 126 | ; NOTE: The label "final:" must be placed immediately before 127 | ; the last word defined in this file. If new words are added, 128 | ; make sure they're either added before the "final:" label 129 | ; or the "final:" label is moved to the position immediately 130 | ; before the last word added. 131 | variable 'last', last, final 132 | 133 | ; tib - ( -- addr ) address of the input buffer 134 | variable 'tib', t_i_b, TIBPTR 135 | 136 | section .text 137 | global start 138 | start: 139 | jmp xt_abort+4 140 | 141 | ; this is a separate routine to fix up the stack before the syscall 142 | _mysyscall: 143 | int 80h 144 | ret 145 | 146 | ; execute - ( xt -- ) call the word at xt 147 | primitive 'execute', execute 148 | mov eax, ebx ; eax is important here, it is used by docolon and dovar 149 | pop ebx 150 | jmp dword[eax] 151 | 152 | ; ------------------- 153 | ; Initialisation 154 | ; ------------------- 155 | 156 | ; abort - ( -- ) initialise Itsy then jump to interpret 157 | primitive 'abort', abort 158 | mov eax, dword[val_number_t_i_b] 159 | mov dword[val_to_in], eax 160 | xor ebp, ebp 161 | mov dword[val_state], ebp 162 | mov esp, SP0 163 | mov ebp, RP0 164 | mov esi, xt_interpret + 4 165 | jmp next 166 | 167 | ; ------------------- 168 | ; Compilation 169 | ; ------------------- 170 | 171 | ; , - ( x -- ) compile x to the current definition. 172 | ; Stores the number on the stack to the memory location currently 173 | ; pointed to by dp. 174 | primitive ',', comma 175 | xchg eax, ebx 176 | mov ebx, val_dp 177 | mov edi, [ebx] 178 | stosd 179 | mov [ebx], edi 180 | pop ebx 181 | jmp next 182 | 183 | ; lit - ( -- ) push the value in the cell straight after lit. 184 | ; lit is the word that is compiled into a definition when you put a 185 | ; "literal" number in a Forth definition. When your word is compiled, 186 | ; the CFA of lit gets stored in the definition followed immediately 187 | ; by the value of the number you put into the code. At run time, lit 188 | ; pushes the value of your number onto the stack. 189 | primitive 'lit', lit 190 | push ebx 191 | lodsd 192 | xchg eax, ebx 193 | jmp next 194 | 195 | ; ------------------- 196 | ; Stack 197 | ; ------------------- 198 | 199 | ; rot - ( x y z -- y z x ) rotate x, y and z. 200 | ; Standard Forth word that extracts number 3rd from the top of the stack 201 | ; and puts it on the top, effectively rotating the top 3 values. 202 | primitive 'rot', rote 203 | pop edx 204 | pop eax 205 | push edx 206 | push ebx 207 | xchg eax, ebx 208 | jmp next 209 | 210 | ; drop - ( x -- ) remove x from the stack. 211 | primitive 'drop', drop 212 | pop ebx 213 | jmp next 214 | 215 | ; dup - ( x -- x x ) add a copy of x to the stack 216 | primitive 'dup', dupe 217 | push ebx 218 | jmp next 219 | 220 | ; # swap - ( x y -- y x ) exchange x and y 221 | primitive 'swap', swap 222 | xchg ebx, [esp] 223 | jmp next 224 | 225 | ; ------------------- 226 | ; Maths / Logic 227 | ; ------------------- 228 | 229 | ; + - ( x y -- z) calculate z=x+y then return z 230 | primitive '+', plus 231 | pop eax 232 | add ebx, eax 233 | jmp next 234 | 235 | primitive '=', equals 236 | pop eax 237 | sub ebx, eax 238 | sub ebx, 1 239 | sbb ebx, ebx 240 | jmp next 241 | 242 | primitive '@', fetch 243 | mov ebx, dword[ebx] 244 | jmp next 245 | 246 | primitive '!', store 247 | pop dword[ebx] 248 | pop ebx 249 | jmp next 250 | 251 | ; break ( -- ) trigger a breakpoint for debugging 252 | primitive 'break', break 253 | int 3 254 | jmp next 255 | 256 | ; ---------------------- 257 | ; The inner interpteter (buried in here): 258 | ; ---------------------- 259 | primitive 'exit', exit 260 | xchg ebp, esp 261 | pop esi 262 | xchg ebp, esp 263 | next lodsd ; funny that the key part of the inner interpreter is buried here 264 | jmp dword[eax] ; eax is later used by docolon and dovar 265 | 266 | ; ------------------- 267 | ; Flow Control 268 | ; ------------------- 269 | 270 | ; 0branch - ( x -- ) jump if x is zero 271 | ; This is the primitive word that's compiled as the runtime code in 272 | ; an IF...THEN statement. The number compiled into the word's definition 273 | ; immediately after 0branch is the address of the word in the definition 274 | ; that we're branching to. That address gets loaded into the instruction 275 | ; pointer. In essence, this word sees a false flag (i.e. a zero) and 276 | ; then jumps over the words that comprise the "do this if true" clause 277 | ; of an IF...ELSE...THEN statement. 278 | 279 | primitive '0branch', zero_branch 280 | lodsd 281 | test ebx, ebx 282 | jne zerob_z 283 | xchg eax, esi 284 | zerob_z pop ebx 285 | jmp next 286 | 287 | ; branch - ( addr -- ) unconditional jump 288 | ; This is one of the pieces of runtime code that's compiled by 289 | ; BEGIN/WHILE/REPEAT, BEGIN/AGAIN, and BEGIN/UNTIL loops. As with 0branch, 290 | ; the number compiled into the dictionary immediately after the branch is 291 | ; the address of the word in the definition that we're branching to. 292 | primitive 'branch',branch 293 | mov esi, dword[esi] 294 | jmp next 295 | 296 | ; ----------------------- 297 | ; Terminal Input / Output 298 | ; ----------------------- 299 | 300 | ; accept - ( addr len -- len2 ) read a string from the terminal 301 | ; accept reads a string of characters from the terminal. The string 302 | ; is stored at addr and can be up to len characters long. 303 | ; accept returns the actual length of the string. 304 | 305 | ; converted Linux syscall to MacOS syscall 306 | ; see: https://filippo.io/making-system-calls-from-assembly-in-mac-os-x/ 307 | ; main difference is Linux passes syscall params in registers, MacOS / BSD does it on the stack 308 | ; ( addr len -- len_read ) 309 | primitive 'accept', accept 310 | ; ebx has the # bytes to read 311 | ; stdin is handle 0 312 | ; top of stack has the buffer address 313 | pop ecx ; save buffer 314 | push ebx ; count 315 | push ecx ; buffer 316 | push 0 ; stdin 317 | mov eax, 0x3 ; sys_read 318 | call _mysyscall 319 | add esp, 12 ; discard args 320 | xchg ebx, eax ; MacOS / BSD - eax after sys_read contains number of bytes read (negative number means error), let's move it to TOS 321 | dec ebx ; last char is CR 322 | push ebx 323 | jmp next 324 | 325 | ; ( char -- ) emit a character to the terminal 326 | primitive 'emit', emit 327 | ; char to print is in ebx 328 | push ebx ; needs to be in memory - why not stack? 329 | mov ebx, esp ; save stack pointer (pointing at our char) 330 | push 1 ; count 331 | push ebx ; buffer (into the stack) 332 | push 1 ; stdout 333 | mov eax, 0x4 ; sys_write 334 | call _mysyscall 335 | aftemt add esp, 16 ; reset stack 336 | pop ebx ; TOS goes in ebx 337 | jmp next 338 | 339 | ; ------------------- 340 | ; String 341 | ; ------------------- 342 | 343 | ; count - ( addr -- addr2 len ) 344 | ; count is given the address of a "counted string" (like the name field of a 345 | ; word definition in Forth, with the first byte being the number of 346 | ; characters in the string and immediately followed by the characters 347 | ; themselves). It returns the length of the string and a pointer to the 348 | ; first actual character in the string. 349 | primitive 'count',count 350 | movzx eax, byte[ebx] 351 | inc ebx 352 | push ebx 353 | mov ebx, eax 354 | jmp next 355 | 356 | primitive '>number',to_number 357 | pop edi 358 | pop ecx 359 | pop eax 360 | to_numl test ebx, ebx 361 | je to_numz 362 | push eax 363 | movzx eax, byte[edi] 364 | cmp al, 'a' 365 | jc to_nums 366 | sub al, 32 367 | to_nums cmp al, '9' + 1 368 | jc to_numg 369 | cmp al, 'A' 370 | jc to_numh 371 | sub al, 7 372 | to_numg sub al, 48 373 | cmp al, byte[val_base] 374 | jnc to_numh 375 | xchg eax, edx 376 | pop eax 377 | push edx 378 | xchg eax, ecx 379 | mul dword[val_base] 380 | xchg eax, ecx 381 | mul dword[val_base] 382 | add ecx, edx 383 | pop edx 384 | add eax, edx 385 | dec ebx 386 | inc edi 387 | jmp to_numl 388 | to_numz push eax 389 | to_numh push ecx 390 | push edi 391 | jmp next 392 | 393 | ; word - ( char -- addr ) parse the next word in the input buffer 394 | ; word scans the "terminal input buffer" (whose address is given by the 395 | ; system constant tib) for words to execute, starting at the current 396 | ; address stored in the input buffer pointer >in. The character on the 397 | ; stack when word is called is the one that the code will look for as 398 | ; the separator between words. 999 times out of 1000,; this is going to 399 | ; be a space. 400 | primitive 'word', word 401 | mov edi, dword[val_dp] 402 | push edi 403 | mov edx, ebx 404 | mov ebx, dword[val_t_i_b] 405 | mov ecx, ebx 406 | add ebx, dword[val_to_in] 407 | add ecx, dword[val_number_t_i_b] 408 | wordf cmp ecx, ebx 409 | je wordz 410 | mov al, byte[ebx] 411 | inc ebx 412 | cmp al, dl 413 | je wordf 414 | wordc inc edi 415 | mov byte[edi], al 416 | cmp ecx, ebx 417 | je wordz 418 | mov al, byte[ebx] 419 | inc ebx 420 | cmp al, dl 421 | jne wordc 422 | wordz mov byte[edi + 1], 32 423 | mov eax, dword[val_dp] 424 | xchg eax, edi 425 | sub eax, edi 426 | mov byte[edi], al 427 | sub ebx, dword[val_t_i_b] 428 | mov dword[val_to_in], ebx 429 | pop ebx 430 | jmp next 431 | 432 | ; ----------------------- 433 | ; Dictionary Search 434 | ; ----------------------- 435 | 436 | ; find - ( addr -- addr2 flag ) look up word in the dictionary 437 | ; find looks in the Forth dictionary for a word with the name given in the 438 | ; counted string at addr. One of the following will be returned: 439 | ; flag = 0, addr2 = counted string --> word was not found 440 | ; flag = 1, addr2 = call address --> word is immediate 441 | ; flag = -1, addr2 = call address --> word is not immediate 442 | primitive 'find', find 443 | mov edi, val_last 444 | findl push edi 445 | push ebx 446 | movzx ecx, byte[ebx] 447 | inc ecx 448 | findc mov al, byte[edi + 4] 449 | and al, 07Fh 450 | cmp al, byte[ebx] 451 | je findm 452 | pop ebx 453 | pop edi 454 | mov edi, dword[edi] 455 | test edi, edi 456 | jne findl 457 | findnf push ebx 458 | xor ebx, ebx 459 | jmp next 460 | findm inc edi 461 | inc ebx 462 | loop findc 463 | pop ebx 464 | pop edi 465 | xor ebx, ebx 466 | inc ebx 467 | lea edi, [edi + 4] 468 | mov al, byte[edi] 469 | test al, 080h 470 | jne findi 471 | neg ebx 472 | findi and eax, 31 473 | add edi, eax 474 | inc edi 475 | push edi 476 | jmp next 477 | 478 | ; ----------------------- 479 | ; Colon Definition 480 | ; ----------------------- 481 | 482 | ; : - ( -- ) define a new Forth word, taking the name from the input buffer. 483 | ; Ah! We've finally found a word that's actually defined as a Forth colon 484 | ; definition rather than an assembly language routine! Partly, anyway; the 485 | ; first part is Forth code, but the end is the assembly language run-time 486 | ; routine that, incidentally, executes Forth colon definitions. Notice that 487 | ; the first part is not a sequence of opcodes, but rather is a list of 488 | ; code field addresses for the words used in the definition. In each code 489 | ; field of each defined word is an "execution tag", or "xt", a pointer to 490 | ; the runtime code that executes the word. In a Forth colon definition, this 491 | ; is going to be a pointer to the docolon routine we see in the second part 492 | ; of the definition of colon itself below. 493 | colon ':', colon 494 | dd xt_lit, -1 495 | dd xt_state 496 | dd xt_store 497 | dd xt_create 498 | dd xt_do_semi_code 499 | 500 | docolon xchg ebp, esp 501 | push esi 502 | xchg ebp, esp 503 | lea esi, [eax + 4] ; eax value is set by next 504 | jmp next 505 | 506 | ; ; - ( -- ) complete the Forth word being compiled 507 | colon ';', semicolon, immediate 508 | dd xt_lit, xt_exit 509 | dd xt_comma 510 | dd xt_lit, 0 511 | dd xt_state 512 | dd xt_store 513 | dd xt_exit 514 | 515 | ; ----------------------- 516 | ; Headers 517 | ; ----------------------- 518 | 519 | ; create - ( -- ) build a header for a new word in the dictionary, taking 520 | ; the name from the input buffer - a runtime version of 'primitive' 521 | colon 'create', create 522 | dd xt_dp, xt_fetch 523 | dd xt_last, xt_fetch 524 | dd xt_comma 525 | dd xt_last, xt_store 526 | dd xt_lit, 32 527 | dd xt_word 528 | dd xt_count 529 | dd xt_plus 530 | dd xt_dp, xt_store 531 | dd xt_lit, 0 532 | dd xt_comma 533 | dd xt_do_semi_code 534 | 535 | dovar push ebx 536 | lea ebx, [eax + 4] ; eax value is set by next 537 | jmp next 538 | 539 | ; # (;code) - ( -- ) replace the xt of the word being defined with a pointer 540 | ; to the code immediately following (;code) 541 | ; The idea behind this compiler word is that you may have a word that does 542 | ; various compiling/accounting tasks that are defined in terms of Forth code 543 | ; when its being used to compile another word, but afterward, when the new 544 | ; word is executed in interpreter mode, you want your compiling word to do 545 | ; something else that needs to be coded in assembly. (;code) is the word that 546 | ; says, "Okay, that's what you do when you're compiling, but THIS is what 547 | ; you're going to do while executing, so look sharp, it's in assembly!" 548 | ; Somewhat like the word DOES>, which is used in a similar manner to define 549 | ; run-time code in terms of Forth words. 550 | primitive '(;code)', do_semi_code 551 | mov edi, dword[val_last] 552 | mov al, byte[edi + 4] 553 | and eax, 31 554 | add edi, eax 555 | mov dword[edi + 5], esi 556 | xchg ebp, esp 557 | pop esi 558 | xchg esp, ebp 559 | jmp next 560 | 561 | ; ----------------------- 562 | ; Constants 563 | ; ----------------------- 564 | 565 | ; constant - ( x -- ) create a new constant with the value x, taking the name 566 | ; from the input buffer 567 | colon 'constant',constant 568 | dw xt_create ; Create the constant's header. 569 | dw xt_comma ; Store the constant's value into the word's 570 | ; data field. 571 | dw xt_do_semi_code ; Write, into the code field of the header we just 572 | ; created, the address that immediately follows 573 | ; this statement: the address of the doconst 574 | ; routine, which is the code that's responsible 575 | ; for pushing onto the stack the value that's 576 | ; contained in the data field of the word whose 577 | ; header we just created when that word is 578 | ; invoked. 579 | doconst push bx ; Push the stack down. 580 | mov bx,word[di+2] ; DI should be pointing to the constant's code 581 | ; field. Load into the top of the stack the 582 | ; value 2 bytes further down from the code field, 583 | ; i.e. the constant's actual value. 584 | jmp next ; 585 | 586 | ; ----------------------- 587 | ; Outer Interpreter 588 | ; ----------------------- 589 | 590 | ; ------------------------------------------------------- 591 | ; NOTE! The following line with the final: label MUST be 592 | ; immediately before the final word definition! 593 | ; ------------------------------------------------------- 594 | 595 | final: 596 | 597 | colon 'interpret', interpret 598 | interpt dd xt_number_t_i_b 599 | dd xt_fetch 600 | dd xt_to_in 601 | dd xt_fetch 602 | dd xt_equals 603 | dd xt_zero_branch 604 | dd intpar 605 | dd xt_t_i_b 606 | dd xt_fetch 607 | dd xt_lit, 50 608 | dd xt_accept 609 | dd xt_number_t_i_b 610 | dd xt_store 611 | dd xt_lit, 0 612 | dd xt_to_in 613 | dd xt_store 614 | intpar dd xt_lit, 32 ; find a " " 615 | dd xt_word 616 | dd xt_find ; is it in the dictionary? 617 | dd xt_dupe 618 | dd xt_zero_branch 619 | dd intnf 620 | dd xt_state 621 | dd xt_fetch 622 | dd xt_equals 623 | dd xt_zero_branch 624 | dd intexc 625 | dd xt_comma 626 | dd xt_branch 627 | dd intdone 628 | intexc dd xt_execute ; found immediate word - execute it, then loop 629 | dd xt_branch 630 | dd intdone 631 | intnf dd xt_dupe ; not a forth word - is it a number? 632 | dd xt_rote 633 | dd xt_count 634 | dd xt_to_number 635 | dd xt_zero_branch 636 | dd intskip 637 | dd xt_state ; conversion error: 638 | dd xt_fetch ; branch on State (interpreting vs compiling) 639 | dd xt_zero_branch ; interpreting -> loop 640 | dd intnc 641 | dd xt_last ; compiling -> orderly shutdown 642 | dd xt_fetch 643 | dd xt_dupe 644 | dd xt_fetch 645 | dd xt_last 646 | dd xt_store 647 | dd xt_dp 648 | dd xt_store 649 | intnc dd xt_abort 650 | intskip dd xt_drop 651 | dd xt_drop 652 | dd xt_state 653 | dd xt_fetch 654 | dd xt_zero_branch 655 | dd intdone 656 | dd xt_lit 657 | dd xt_lit 658 | dd xt_comma 659 | dd xt_comma 660 | intdone dd xt_branch 661 | dd interpt 662 | dd xt_abort 663 | 664 | colon 'testitsymac', testitsymac 665 | dotest dd xt_t_i_b 666 | dd xt_fetch 667 | dd xt_lit, 1 668 | dd xt_accept 669 | looper dd xt_lit, 50 670 | dd xt_lit, 50 671 | dd xt_emit 672 | dd dotest 673 | freemem: 674 | 675 | section .bss 676 | fstack: resb 16000 + MEMSIZE 677 | ; rstack: resb 2048 678 | ; fheap: resb 2048 679 | ; astack: resb 2048 680 | ; dstack: resb 2048 681 | 682 | filesize equ $ - $$ 683 | --------------------------------------------------------------------------------