├── .gitattributes
├── .gitignore
├── LICENSE
├── enter.asm
├── int1chk
    ├── int1chk.c
    └── int1chk.prj
├── readme.md
├── xtize.c
└── xtize.prj


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Declare text and binary files
2 | 
3 | *.c	text
4 | *.asm	text
5 | *.prj	-text
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.obj
2 | *.dsk
3 | *.exe


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Michael Karcher
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/enter.asm:
--------------------------------------------------------------------------------
  1 | .MODEL LARGE
  2 | 
  3 | DEBUG = 1
  4 | 
  5 | .CODE
  6 | 
  7 | oldsp   dw ?
  8 | oldss   dw ?
  9 | 
 10 | _EnterSingleStep:
 11 | PUBLIC  _EnterSingleStep
 12 |     push    bp
 13 |     mov     bp,sp
 14 |     push    ds
 15 |     ; Activate callee PSP
 16 |     les     si, [bp + 6]      ; state block
 17 |     mov     bx, [es:si + 22]  ; new PSP
 18 |     mov     ah, 50h
 19 |     int     21h
 20 | 
 21 |     ; Set up callee entry point registers (including TF) and save old stack
 22 |     mov     bx, [es:si + 22]
 23 |     mov     di, [es:si + 14]  ; new stack pointer
 24 |     mov     [cs:oldsp], sp
 25 |     mov     [cs:oldss], ss
 26 |     mov     ss, [es:si + 16]
 27 |     mov     sp, di
 28 |     xor     ax, ax
 29 |     push    ax          ; COM return address (PSP:0)
 30 |     mov     ah, 3       ; Set IF and TF
 31 |     push    ax
 32 |     push    [WORD PTR es:si + 20]       ; new CS:IP
 33 |     push    [WORD PTR es:si + 18]
 34 |     mov     ds, bx
 35 |     mov     es, bx
 36 |     mov     [ds:WORD PTR 0Ch], cs
 37 |     mov     [ds:WORD PTR 0Ah], offset RETURNTO
 38 |     iret
 39 | RETURNTO:
 40 |     ; Restore stack
 41 |     mov     ss, [cs:oldss]
 42 |     mov     sp, [cs:oldsp]
 43 |     ; Return to caller environment
 44 |     pop     ds
 45 |     pop     bp
 46 |     retf
 47 | 
 48 | count   dd  0
 49 | _CountingSS:
 50 | PUBLIC _CountingSS
 51 |     add     [cs:WORD PTR count], 1
 52 |     adc     [cs:WORD PTR count+2], 0
 53 |     iret
 54 | 
 55 | _GetCount:
 56 | PUBLIC _GetCount
 57 |     mov     ax, [cs:WORD PTR count]
 58 |     mov     dx, [cs:WORD PTR count+2]
 59 |     retf
 60 | 
 61 | dispatch_table2:
 62 |   idx_emu_exit            = $ - OFFSET dispatch_table2
 63 |     dw      OFFSET emu_exit
 64 | ; The instructions until the next comment are not 8088 compatible. They are trapped and emulated.
 65 |   idx_emulate_enter       = $ - OFFSET dispatch_table2
 66 |     dw      OFFSET emulate_enter
 67 |   idx_emulate_leave       = $ - OFFSET dispatch_table2
 68 |     dw      OFFSET emulate_leave
 69 |   idx_emulate_push16      = $ - OFFSET dispatch_table2
 70 |     dw      OFFSET emulate_push16
 71 |   idx_emulate_push8       = $ - OFFSET dispatch_table2
 72 |     dw      OFFSET emulate_push8
 73 |   idx_emulate_imul        = $ - OFFSET dispatch_table2
 74 |     dw      OFFSET emulate_imul
 75 |   idx_emulate_shiftrotate = $ - OFFSET dispatch_table2
 76 |     dw      OFFSET emulate_shiftrotate
 77 | ; The instructions after this comment are 8088 compatible. They are trapped to assist single-stepping.
 78 |   idx_emulate_movsreg     = $ - OFFSET dispatch_table2
 79 |     dw      OFFSET emulate_movsreg
 80 |   idx_emulate_pop_es      = $ - OFFSET dispatch_table2
 81 |     dw      OFFSET emulate_pop_es
 82 |   idx_emulate_pop_ds      = $ - OFFSET dispatch_table2
 83 |     dw      OFFSET emulate_pop_ds
 84 |   idx_emulate_int         = $ - OFFSET dispatch_table2
 85 |     dw      OFFSET emulate_int
 86 | 
 87 | dispatch_table1:
 88 |     db      256 dup (idx_emu_exit)
 89 |   org dispatch_table1 + 07h
 90 |     db      idx_emulate_pop_es        ; POP ES
 91 |   org dispatch_table1 + 1Fh
 92 |     db      idx_emulate_pop_ds        ; POP DS
 93 |   org dispatch_table1 + 068h
 94 |     db      idx_emulate_push16        ; PUSH imm16
 95 |     db      idx_emulate_imul          ; IMUL r16, r/m16, imm16
 96 |     db      idx_emulate_push8         ; PUSH imm8
 97 |     db      idx_emulate_imul          ; IMUL r16, r/m16, imm8
 98 |   org dispatch_table1 + 08Eh
 99 |     db      idx_emulate_movsreg       ; MOV sreg, r/m16
100 |   org dispatch_table1 + 0C0h
101 |     db      idx_emulate_shiftrotate   ; shift/rotate r8, imm8
102 |     db      idx_emulate_shiftrotate   ; shift/rotate r16, imm8
103 |   org dispatch_table1 + 0C8h
104 |     db      idx_emulate_enter         ; ENTER
105 |     db      idx_emulate_leave         ; LEAVE
106 |   org dispatch_table1 + 0CDh
107 |     db      idx_emulate_int           ; INT xx
108 |   org dispatch_table1 + 100h
109 | 
110 | uhmsg:
111 |     db      "286 emulation failed", 0
112 | unhandled:
113 |     mov     ah, 0eh
114 |     mov     si, offset uhmsg
115 | uhloop:
116 |     lods    BYTE PTR [cs:si]
117 |     int     10h
118 |     cmp     al, 0
119 |     jne     uhloop
120 |     hlt
121 |     jmp     $-1
122 | 
123 | emulate_enter:
124 |     lodsw
125 |     mov     WORD PTR [cs:enter_immed], ax
126 |     lodsb
127 |     cmp     al, 0                 ; we don't support nested frame enter
128 |     jne     unhandled
129 |     mov     [bp + 0], OFFSET enter_inject
130 |     jmp     exit_for_reenter
131 | 
132 | emulate_leave:
133 |     mov     [bp + 0], OFFSET leave_inject
134 |     jmp     exit_for_reenter
135 | 
136 | emulate_push8:
137 |     lodsb
138 |     cbw
139 |     db      0B3h  ; MOV BL, xxh (kills lodsw)
140 | emulate_push16:
141 |     lodsw
142 | pushcommon:
143 |     mov     WORD PTR [cs:push_immed], ax
144 |     mov     [bp + 0], OFFSET push_inject
145 |     jmp     exit_for_reenter
146 | 
147 | IF DEBUG
148 | printhex16:
149 |     xchg    ah,al
150 |     call    printhex8
151 |     xchg    ah,al
152 | printhex8:
153 |     push    ax
154 |     shr     ax,1
155 |     shr     ax,1
156 |     shr     ax,1
157 |     shr     ax,1
158 |     call    printhex4
159 |     pop     ax
160 | printhex4:
161 |     and     al, 0Fh
162 |     add     al, 90h
163 |     daa
164 |     adc     al, 40h
165 |     daa
166 |     mov     [es:bx], al
167 |     mov     BYTE PTR [es:bx+1], 0Fh
168 |     add     bx,2
169 |     ret
170 | ENDIF
171 | 
172 | emulate_shiftrotate:
173 |     add     al, 12h     ; C0 -> D2, C1 -> D3
174 |     mov     BYTE PTR [cs:sr_patch_opcode], al
175 |     add     al, 86h - 0D2h ; generate 8-bit/16-bit XCHG opcode
176 |     mov     BYTE PTR [cs:sr_xchg1], al
177 |     mov     BYTE PTR [cs:sr_xchg2], al
178 |     lodsb               ; Fetch mod/RM byte
179 |     mov     ah, al
180 |     or      al, 0C0h    ; set memory operand to AX/AL (for shift/rotate)
181 |     and     al, 0F8h
182 |     mov     BYTE PTR [cs:sr_patch_opcode+1], al
183 |     and     ah, 0C7h    ; set register operand to AX (for XCHG instructions)
184 |     mov     BYTE PTR [cs:sr_xchg2 + 1], ah
185 |     mov     BYTE PTR [cs:sr_xchg1 + 1], ah
186 |     cmp     ah, 06h     ; special encoding for baseless 16-bit address
187 |     jne     not_mem16
188 |     mov     ah, 80h     ; replace mode by something with a 16-bit offset
189 | not_mem16:
190 |     sub     ah, 40h     ; 00..3F = 8bit, 40..7F = 16bit, 80..FF = nothing
191 |     js      no_offset
192 |     cmp     ah, 40h
193 |     jae     offset16
194 |     lodsb
195 |     mov     ah, 90h
196 |     jmp     SHORT ofs_common
197 | offset16:
198 |     lodsw
199 |     jmp     SHORT ofs_common
200 | no_offset:
201 |     mov     ax, 9090h
202 | ofs_common:
203 |     mov     WORD PTR [cs:sr_xchg2 + 2], ax
204 |     mov     WORD PTR [cs:sr_xchg1 + 2], ax
205 |     lodsb
206 |     mov     BYTE PTR [cs:sr_patch_count], al
207 |     mov     [bp + 0], OFFSET sr_inject ; return IP
208 |     jmp     exit_for_reenter
209 | 
210 | ; Entry-Points:
211 | ;  emu_reenter - jump here if an injected function is finished.
212 | ;                store oldbp before jumping here, and create a correct
213 | ;                interrupt stack frame returning to the final target
214 | _EmulatingSS:
215 | PUBLIC _EmulatingSS
216 |     mov     WORD PTR [cs:emuss_oldbp], bp
217 | emu_reenter:
218 |     mov     WORD PTR [cs:emuss_oldsi], si
219 |     mov     WORD PTR [cs:emuss_oldds], ds
220 |     mov     WORD PTR [cs:emuss_oldax], ax
221 |     mov     WORD PTR [cs:emuss_oldbx], bx
222 |     mov     bp, sp
223 |     test    BYTE PTR [bp + 5], 1     ; Called without TF - either interrupt entry or while quitting emulator
224 |     jz      emu_exit
225 |     cld
226 |     mov     ds, [bp + 2] ; caller CS
227 |     mov     si, [bp + 0] ; caller IP
228 | IF DEBUG
229 |     push    es
230 |     mov     ax, 0B800h
231 |     mov     es, ax
232 |     mov     bx, 0
233 |     mov     ax, ds
234 |     call    printhex16
235 |     mov     ax, si
236 |     call    printhex16
237 |     mov     bx, 200h
238 |     dec     bx
239 |     jnz     $-1
240 |     pop     es
241 | ENDIF
242 |     lodsb
243 |     mov     ah, 0
244 |     mov     bx, ax
245 |     mov     bl, BYTE PTR cs:[bx + offset dispatch_table1]
246 |     jmp     WORD PTR cs:[bx + offset dispatch_table2]
247 | 
248 | emulate_movsreg:
249 |     lodsb               ; Fetch mod/RM byte
250 |     mov     BYTE PTR [cs:movsreg_instruction+1], al
251 |     xor     al, 010h    ; Fudge register number, so that SS gets zero
252 |     test    al, 38h     ; Z if destionation of SS
253 |     jz      emu_exit    ; -> execute it natively.
254 |     and     al, 0C7h    ; clear destination register
255 |     cmp     al, 06h     ; special encoding for baseless 16-bit address
256 |     jne     movsreg_not_mem16
257 |     mov     al, 80h     ; replace mode by something with a 16-bit offset
258 | movsreg_not_mem16:
259 |     sub     al, 40h     ; 00..3F = 8bit, 40..7F = 16bit, 80..FF = nothing
260 |     js      movsreg_no_offset
261 |     cmp     al, 40h
262 |     jae     movsreg_offset16
263 |     lodsb
264 |     mov     ah, 90h
265 |     jmp     SHORT movsreg_ofs_common
266 | movsreg_offset16:
267 |     lodsw
268 |     jmp     SHORT movsreg_ofs_common
269 | movsreg_no_offset:
270 |     mov     ax, 9090h
271 | movsreg_ofs_common:
272 |     mov     WORD PTR [cs:movsreg_instruction+2], ax
273 |     mov     [bp + 0], OFFSET movsreg_inject ; return IP
274 | exit_for_reenter:
275 |     mov     [bp + 2], cs ; return CS
276 |     and     BYTE PTR [bp + 5], NOT 1        ; Clear TF
277 |     mov     WORD PTR [cs:reenter_offset], si
278 |     mov     WORD PTR [cs:reenter_segment], ds
279 | emu_exit:
280 |     mov     si, 1234h
281 |     emuss_oldds = $-2
282 |     mov     ds, si
283 |     mov     si, 1234h
284 |     emuss_oldsi = $-2
285 |     mov     bp, 1234h
286 |     emuss_oldbp = $-2
287 |     mov     ax, 1234h
288 |     emuss_oldax = $-2
289 |     mov     bx, 1234h
290 |     emuss_oldbx = $-2
291 |     iret
292 | 
293 | emulate_pop_ds:
294 |     mov     [bp + 0], OFFSET pop_ds_inject
295 |     jmp     exit_for_reenter
296 | 
297 | emulate_pop_es:
298 |     mov     [bp + 0], OFFSET pop_es_inject
299 |     jmp     exit_for_reenter
300 | 
301 | emulate_int:
302 |     lodsb
303 |     ; DOS interrupts 20h (terminate) and 27h (TSR) are sensitive to caller's CS,
304 |     ; so we may not trap them. DOS function 00 is equvalent to interrupt 20h, so the
305 |     ; don't-trap requirement applies there, too.
306 |     cmp     al, 20h
307 |     je      emu_exit
308 |     cmp     al, 27h
309 |     je      emu_exit
310 |     cmp     al, 21h
311 |     jne     int_do_it
312 |     cmp     BYTE PTR [cs:emuss_oldax + 1], 00h
313 |     je      emu_exit
314 | int_do_it:
315 |     mov     [bp + 0], OFFSET int_inject
316 |     mov     BYTE PTR [cs:intnumber], al
317 |     jmp     exit_for_reenter
318 | 
319 | emulate_imul:
320 |     mov     ah, al
321 |     lodsb
322 |     cmp     al, 0C0h
323 |     jb      imul_nonsrcax
324 |     test    al, 7
325 |     jnz     imul_nonsrcax
326 |     mov     BYTE PTR [cs:imul_srcax_mov + 1], al
327 |     shr     al, 1
328 |     shr     al, 1
329 |     shr     al, 1
330 |     add     al, 90h - (0C0h SHR 3)
331 |     mov     BYTE PTR [cs:imul_srcax_xchg], al
332 |     test    ah, 2
333 |     mov     bx, offset imul_srcax_inject
334 |     jmp     SHORT imul_do_immed
335 | imul_nonsrcax:
336 |     test    al, 38h
337 |     jnz     imul_nonax
338 |     or      al, 28h
339 |     mov     BYTE PTR [cs:imul_dstax_instruction+1], al
340 |     sub     al, 40h
341 |     xchg    ax, bx
342 |     mov     ax, 9090h
343 |     js      imul_dstax_ofsdone
344 |     test    al, 40h
345 |     jz      imul_dstax_ofs8
346 |     lodsw
347 |     jmp     SHORT imul_dstax_ofsdone
348 | imul_dstax_ofs8:
349 |     lodsb
350 | imul_dstax_ofsdone:
351 |     mov     WORD PTR [cs:imul_dstax_instruction+2], ax
352 |     test    bh, 2
353 |     mov     bx, offset imul_dstax_inject
354 |     jmp     SHORT imul_do_immed
355 | imul_nonax:
356 |     mov     bx, ax
357 |     and     al, 38h
358 |     shr     al, 1
359 |     shr     al, 1
360 |     shr     al, 1
361 |     add     al, 90h
362 |     mov     BYTE PTR [cs:imul_nonax_xchg], al
363 |     xchg    ax, bx
364 |     and     al, 0C7h
365 |     or      al, 28h
366 |     mov     BYTE PTR [cs:imul_nonax_instruction+1], al
367 |     sub     al, 40h
368 |     xchg    ax, bx
369 |     mov     ax, 9090h
370 |     js      imul_nonax_ofsdone
371 |     test    al, 40h
372 |     jz      imul_nonax_ofs8
373 |     lodsw
374 |     jmp     SHORT imul_nonax_ofsdone
375 | imul_nonax_ofs8:
376 |     lodsb
377 | imul_nonax_ofsdone:
378 |     mov     WORD PTR [cs:imul_nonax_instruction+2], ax
379 |     test    bh, 2
380 |     mov     bx, offset imul_nonax_inject
381 | imul_do_immed:
382 |     jnz     short imul_immed8
383 |     lodsw
384 |     jmp     short imul_immed_done
385 | imul_immed8:
386 |     lodsb
387 |     cbw
388 | imul_immed_done:
389 |     mov     WORD PTR [cs:imul_immed], ax
390 |     mov     [bp+0], bx
391 |     jmp     exit_for_reenter
392 | 
393 | ; This code gets injected after IRET by patching the return address
394 | ; to point here and patching the original return address into this
395 | ; code. This design was chosen, because
396 | ;  1. rcl/rcr takes the carry flag as input, so some kind of popf is
397 | ;     needed anyway
398 | ;  2. flag output is relevant, so we don't want to end with IRET
399 | ;  3. we want all registers restored, so we can blindly copy the
400 | ;     addressing scheme from the 286 instruction into the xchg
401 | ;     instructions
402 | ; This design is broken, because
403 | ;  1. after IRET, the IF is set, so an interrupt might occur while running
404 | ;     the self-modifying, non-reentrant code here. This does not matter
405 | ;     until we add a TF setting interrupt handler wrapper, though.
406 | sr_inject:
407 |     sr_xchg1 = $
408 |     xchg    ax, WORD PTR [ds:1234h]     ; TODO: segment prefixes
409 |     push    cx
410 |     mov     cl, 12h
411 |     sr_patch_count = $-1
412 |     sr_patch_opcode = $
413 |     shl     ax, cl
414 |     pop     cx
415 |     sr_xchg2 = $
416 |     xchg    ax, WORD PTR [ds:1234h]     ; TODO: segment prefixes
417 | prepare_reenter:
418 |     pushf
419 |     mov     WORD PTR [cs:emuss_oldbp], bp
420 |     mov     bp, sp
421 |     or      BYTE PTR [bp + 1], 1        ; Re-Set TF
422 |     mov     bp, 1234h
423 |   reenter_segment = $-2
424 |     push    bp
425 |     mov     bp, 5678h
426 |   reenter_offset  = $-2
427 |     push    bp
428 |     jmp     emu_reenter
429 | 
430 | imul_nonax_inject:
431 |     push    ax
432 |     push    dx
433 |     mov     ax, 1234h
434 |   imul_immed             = $-2
435 |   imul_nonax_instruction = $
436 |     imul    WORD PTR ds:1234h
437 |     pop     dx
438 |   imul_nonax_xchg        = $
439 |     xchg    ax, bx
440 |     pop     ax
441 |     jmp     prepare_reenter
442 | 
443 | imul_dstax_inject:
444 |     push    dx
445 |     mov     ax, WORD PTR [cs:imul_immed]
446 |   imul_dstax_instruction = $
447 |     imul    WORD PTR ds:1234h
448 |     pop     dx
449 |     jmp     prepare_reenter
450 | 
451 | imul_srcax_inject:
452 |   imul_srcax_mov  = $
453 |     mov     ax, [bx]
454 |     push    dx
455 |     imul    WORD PTR [cs:imul_immed]
456 |     pop     dx
457 |   imul_srcax_xchg = $
458 |     xchg    ax, bx
459 |     jmp     prepare_reenter
460 | 
461 | push_immed  dw ?
462 | push_inject:
463 |     push    WORD PTR [cs:push_immed]
464 |     jmp     prepare_reenter
465 | 
466 | enter_inject:
467 |     push    bp
468 |     mov     bp, sp
469 |     sub     sp, 1234h
470 |   enter_immed = $-2
471 |     jmp     prepare_reenter
472 | 
473 | leave_inject:
474 |     mov     sp, bp
475 |     pop     bp
476 |     jmp     prepare_reenter
477 | 
478 | movsreg_inject:
479 |   movsreg_instruction = $
480 |     mov     ds, WORD PTR ds:1234h
481 |     jmp     prepare_reenter
482 | 
483 | pop_es_inject:
484 |     pop     es
485 |     jmp     prepare_reenter
486 | 
487 | pop_ds_inject:
488 |     pop     ds
489 |     jmp     prepare_reenter
490 | 
491 | int_inject:
492 |     int     99h
493 |   intnumber = $-1
494 |     jmp     prepare_reenter
495 | 
496 | END
497 | 


--------------------------------------------------------------------------------
/int1chk/int1chk.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <dos.h>
 3 | 
 4 | struct calldata {
 5 |        unsigned char far *rcsip;
 6 |        int rflags;
 7 |        unsigned char far *rcsip2;
 8 |        int rflags2;
 9 | };
10 | 
11 | struct calldata last_int1_calls[20];
12 | int i1call_count;
13 | int i1call_writeptr;
14 | 
15 | #define ARRAY_SIZE(arr_) (sizeof(arr_)/sizeof(arr_[0]))
16 | 
17 | const unsigned char just_iret = 0xCF;
18 | 
19 | void interrupt singlestep(int rbp, int rdi, int rsi, int res, int rds,
20 |                           int rdx, int rcx, int rbx, int rax,
21 |                           int rip, int rcs, int rflags,
22 |                           int rip2,int rcs2,int rflags2)
23 | {
24 |     last_int1_calls[i1call_writeptr].rcsip = MK_FP(rcs, rip);
25 |     last_int1_calls[i1call_writeptr].rflags = rflags;
26 |     last_int1_calls[i1call_writeptr].rcsip2 = MK_FP(rcs2, rip2);
27 |     last_int1_calls[i1call_writeptr].rflags2 = rflags2;
28 |     i1call_writeptr = (i1call_writeptr + 1) % ARRAY_SIZE(last_int1_calls);
29 |     if (i1call_count < ARRAY_SIZE(last_int1_calls))
30 |     {
31 |         i1call_count++;
32 |     }
33 |     delay(10);
34 | }
35 | 
36 | int main()
37 | {
38 |     void (interrupt *timer_irq)(void);
39 |     timer_irq = getvect(8);
40 |     setvect(0xA1, (void(interrupt*)(void))&just_iret);
41 |     setvect(1, singlestep);
42 | 
43 |                                 // OPCODE         I1/dosbox  I1/XT
44 |     __emit__(0x9C);             // pushf
45 |     __emit__(0xB8, 0x0300u);    // mov  ax, 0x300
46 |     __emit__(0x50);             // push ax
47 |     __emit__(0x9D);             // popf
48 |     __emit__(0xB8, 0x1234u);    // mov  ax, 0x1234  no        no
49 |     __emit__(0xB3, 1);          // mov  bl, 1       YES       no
50 |     __emit__(0x40);             // inc  ax          YES       YES
51 |     __emit__(0x8C, 0xDA);       // mov  dx, ds      YES       YES
52 |     __emit__(0x8E, 0xC2);       // mov  es, ax      YES       YES
53 |     __emit__(0x90);             // nop              YES       no
54 |     __emit__(0x8C, 0xD2);       // mov  dx, ss      YES       YES
55 |     __emit__(0x8E, 0xD2);       // mov  ss, dx      YES       YES
56 |     __emit__(0x90);             // nop              no        no
57 |     __emit__(0xCD, 0xA1);       // int  0xA1        YES       YES
58 |         // first insn of IntA1  //   iret           no      YES/NoTF -> int 0xA1
59 |     __emit__(0x49);             // dec  cx          no        no
60 |     __emit__(0x9D);             // popf             YES       YES
61 |     __emit__(0x48);             // dex  ax        YES/NoTF  YES/NoTF
62 | 
63 |         // Timer-IRQ (1st insn) //                  no      YES/NoTF
64 |     while(i1call_count)
65 |     {
66 |         struct calldata *cur;
67 |         cur = &last_int1_calls[(i1call_writeptr
68 |                                 + 2*ARRAY_SIZE(last_int1_calls)
69 |                                 - i1call_count
70 |                                 ) % ARRAY_SIZE(last_int1_calls)];
71 |         if (cur->rflags & 0x100)
72 |             printf("     %Fp %02X\n", cur->rcsip, *cur->rcsip);
73 |         else
74 |         {
75 |             if (cur->rcsip == &just_iret)
76 |                 printf("IntA1 from %Fp\n", cur->rcsip2);
77 |             else if(cur->rcsip == (void far*)timer_irq)
78 |                 printf("IRQ 0 from %Fp\n", cur->rcsip2);
79 |             else
80 |                 printf("NoTF %Fp %02X\n", cur->rcsip, *cur->rcsip);
81 |         }
82 |         i1call_count--;
83 |     }
84 |     return 0;
85 | }


--------------------------------------------------------------------------------
/int1chk/int1chk.prj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karcherm/xtize/a2533f030191b9a408382d0d9f9917db7c2316ec/int1chk/int1chk.prj


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | XTIZE
  2 | =====
  3 | 
  4 | Goal
  5 | ----
  6 | 
  7 | The goal of the XTIZE project is to be able to run unmodified executables generated
  8 | by Borland/Turbo C or Microsoft C targetting the 286 instruction set on a 8088 class
  9 | computer, aka an XT.
 10 | 
 11 | The performance of an XT is quite limited. The performance of emulation without
 12 | suitable hardware assistance (heck! the 8088 does not even have an invalid opcode
 13 | trap) is even more limited. XTIZE is not supposed to be magic, but it will definitely
 14 | cause a notable slowdown compared to native 8088 executables.
 15 | 
 16 | This project was started because I got annoyed that soft configuration and
 17 | initialization tools for plug-in cards like sound cards that were put to the
 18 | market in the early 90's often are compiled targetting a 286 processor, although
 19 | the hardware component itself would work adequately in an XT class computer. I
 20 | decided against manually reengineering the tools, and I considered an "80286 emulating
 21 | automatic debugger" that breaks on all 286 instructions and emulates them a viable way.
 22 | 
 23 | This project is developed to find out the viability of this approach - it comes with not
 24 | guarantee to ever deliver a working product. As long as this project does not even
 25 | contain just basic functionality, I don't bother to provide executables, I just publish
 26 | the source code yet.
 27 | 
 28 | Running hand-written 286 assembly code is *not* a goal of XTIZE. As far as I know, compilers
 29 | of the day did not generate tail calls and did not embed data between instructions of a
 30 | procedure, so tracing out the instructions of a single procedure should be a straight-forward
 31 | task. Code that breaks this assumption is going to break XTIZE.
 32 | 
 33 | Toolchain
 34 | ---------
 35 | 
 36 | To project is developed using Borland C++, but it should work with Turbo C as well.
 37 | The assembler file should not use any TASM features not present in MASM, so porting
 38 | it to MASM / Microsoft C should be possible, too.
 39 | 
 40 | Approach
 41 | --------
 42 | 
 43 | The first approach I am trying to test the basic viability is plain single stepping,
 44 | which is likely too slow for anything useful. Optimizations will likely be added in later.
 45 | 
 46 | My first intention was to auto-patch the code to contain breakpoints on the 286 instructions,
 47 | but telling code and data apart is not that easy, even if only compiler-generated code is
 48 | supported, because compilers add jump tables to the code segment, which do not parse as valid
 49 | 80286 instructions.
 50 | 
 51 | The single-step emulator
 52 | ------------------------
 53 | 
 54 | While I expected single-stepping to be too slow to be useful, it turned out to not be the case.
 55 | As the intended use case of XTIZE is hardware setup software and not productivity software, there
 56 | is a wide gap between "annoyingly slow" (which single-step emulation definitely is) and "unusably slow",
 57 | as even a one-minute run of a setup program on an XT is faster than swapping a piece of hardware into
 58 | a more modern computer, running the EEPROM modifying setup tool on that computer and swapping the piece
 59 | of hardware back. In practice, a run of the command-line hardware setup program provided with the
 60 | Aztech Sound Galaxy 16 Pro II is still within 10 seconds.
 61 | 
 62 | Using single-stepping for instruction emulation seems like a piece of cake at first, because you get
 63 | a single-step interrupt just before each instruction is executed. You can look at the instruction, and
 64 | if it is a 286 real-mode instruction, you emulate it. If it is a 8086 instruction, you just let the
 65 | processor execute it. And indeed, single-stepping quickly yielded first results in getting at least
 66 | something printed to the sceen when executing the hardware setup program without the necessary command
 67 | line parameters. Getting it to work "perfectly" turned out to be a considerable amount of work and
 68 | research (as always).
 69 | 
 70 | The main emulation primitive of the single-step emulator is "tail-call injection". If an instruction
 71 | is recognized as being in need of emulation, emulation code for that instruction is prepared (by
 72 | hot-patching a template inside the emulator), and the return address of the INT1 stack frame is
 73 | adjusted to point there. Also the TF is cleared to avoid stepping through emulator code. This
 74 | emulation code first performs the effect of the instruction to emulate using only 8086 instructions.
 75 | Then the emulation code sets up a fake INT1 frame (containing the address of the instruction following
 76 | the emulated instruction and having the TF set) and jumps into the single-step handler.
 77 | 
 78 | There are two main advantages of this approach: There is no interrupt stack frame on the stack and
 79 | all general-purpose registers and the flags have user-program values while the instruction is emulated.
 80 | This means the emulator does not need to fully understand operands of the instruction (although it does
 81 | need to if the emulation code is going to clobber registers. The `IMUL` emulation is a nice example of
 82 | the effort needed to deal with clobbered registers). Also it greatly simplifies emulation of stack
 83 | related instructions (like `PUSH imm8/16`, `ENTER` and `LEAVE`).
 84 | 
 85 | In the following situations, a single-step interrupt is triggered that is (usually) not relevant to
 86 | the currently runnning user program:
 87 | 
 88 |  - An external interrupt was recognized (IRQ or NMI in IBM PC naming). The debugger "gets notified" by
 89 |    getting a single-step with TF *clear* in the pushed flags and the return address pointing to the first
 90 |    instruction of the interrupt handler. As long as I assume that the user program does not install
 91 |    interrupt handlers, the emulator can just ignore this invocation of the handler. The stack frame
 92 |    looks like this:
 93 | 
 94 |      - IRQ handler entry offset
 95 |      - IRQ handler entry segment
 96 |      - flags with `TF` and `IF` cleared
 97 |      - user code IP
 98 |      - user code CS
 99 |      - user code flags (with `TF` set)
100 | 
101 |  - An internal interrupt is caused by the current instruction (`DIV` with error, `INT`, `INT3`, `INTO` with
102 |    overflow flag set). The stack frame looks the same as in the case of an external interrupt. There are two
103 |    important things to consider, though:
104 | 
105 |      - You do *not* get a single-step invocation pointing to the instruction the interrupt handler returns
106 |        to (that is, the adderss called "user code CS:IP" in the stack frame listed above). You *do* get
107 |        that single-step invocation for an external interrupt, though. If an `INT` instruction is followed by
108 |        a 286 instruction, the 286 instruction is "missed" unless special precautions are taken.
109 |      - There are BIOS and DOS requests that do not return using IRET from a software interrupt, and thus
110 |        disable single-stepping as side effect:
111 |           - For example, on the Phoenix XT BIOS, `INT 16h`, subfunction 1 (poll keyboard) returns using
112 |             `RETF 2` to return the Zero Flag to indicate whether a key has been pressed.
113 |           - The infamous partition read/write requests by DOS (`INT 25`/`INT 26`) return with `RETF` leaving
114 |             the flags on the stack.
115 | 
116 | On the other hand, in some cases, you do *not* get a relevant single stepping interrupt:
117 | 
118 |  - As already mentioned in the previous paragraph, there is no single-step interrupt pointing to the instruction
119 |    immediately following an instruction that triggers an sofware interrupt.
120 | 
121 |  - After any move to an segment register or popping a segment register, there is no single-step interrupt for
122 |    the subsequent instruction. The intention is to prevent pushing the state to some invalid place for the
123 |    single-step interrupt between setting `SS` and setting `SP` when switching stacks (Does not apply to the buggy
124 |    first revision of the 8086/8088). This should generally be no issue for stack setup, because SP is nearly always
125 |    initialized by a 8086-compatible MOV instruction. This *is* an issue for other destination segments, though. IBM
126 |    observed the pattern of setting `ES` to the screen buffer segment (`B800` for color text mode page 0) immediately
127 |    followed by multiplying the target y coordinate by 80: `IMUL BX, [bp+target_y], 80`. Missing this instruction is
128 |    obviously fatal.
129 | 
130 |  - A conceptual issue: You never get a single-stack interrupt for the instruction you just returned to (which obviously
131 |    would cause an infinite loop otherwise). You need to keep in mind that this also applies if the return address has been
132 |    patched. So after emulating one instruction, you may not blindly return to the next instruction, but you need to check
133 |    whether that instruction needs to be emulated, too. A common pattern that exhibits back-to-back 286 instructions is
134 |    a function call with multiple constant parameters, all pushed using `PUSH imm8` or `PUSH imm16`.
135 | 
136 | The emulator deals with the challenges the following way:
137 | 
138 |  - single-step invocations with the TF clear are ignored. This gets rid of the spurious calls mentioned above, but currently
139 |    means that user-installed hardware interrupt handlers do not undergo emulation. This is subject to change.
140 | 
141 |    - Software interrupts are dealt with by running the `INT` instruction in an injected tail call and setting TF on
142 |      emulator-reentry. This solves several problems:
143 | 
144 |      - Setting TF after invoking the interrupt deals with the non-IRETting services mentioned above.
145 | 
146 |      - Not injecting the fix-up handler into the stack avoids issues with the `INT 25h` / `INT 26h`
147 |        interface of MS-DOS, where the interrupt handler returns with the stack in an "unconventional
148 |        state". The approach is independent of the way the stack looks after the handler returned.
149 | 
150 |      - Running the `INT` instruction inside emulator code and manually re-entering the single-step
151 |        handler after the `INT` instruction returns avoids the issue that the single-step interrupt
152 |        after a software interrupt call is "missing".
153 | 
154 |      The emulation still needs one special case: Program-terminating CP/M-like DOS calls (`INT 20h`, `INT 27h`,
155 |      `INT 21h` with `AH = 0`) determine the identity of the caller program by looking at the return code segment.
156 |      These instructions are just executed directly without tail-call injection.
157 | 
158 |  - Instructions that update segment registers (except SS) get copyied into an injected tail call. This moves
159 |    the unwanted effect of missing the next instruction into emulator code. As always for injected tail calls,
160 |    the call ends in setting up a fake stack frame to the next instruction (we were about to miss) and jumps
161 |    into the emulator.


--------------------------------------------------------------------------------
/xtize.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <dir.h>
  4 | #include <dos.h>
  5 | 
  6 | typedef unsigned char byte;
  7 | 
  8 | struct ExecParameters {
  9 |     unsigned env_segment;
 10 |     char far* cmdline;
 11 |     struct fcb far* arg1;
 12 |     struct fcb far* arg2;
 13 |     byte far* entry_stack;
 14 |     void (far* __cdecl entry_point)(int dummy);
 15 |     unsigned psp_segment;
 16 | };
 17 | 
 18 | void setpsp(unsigned newpsp)
 19 | {
 20 |     union REGS r;
 21 |     r.h.ah = 0x50;
 22 |     r.x.bx = newpsp;
 23 |     intdos(&r, &r);
 24 | }
 25 | 
 26 | int LoadForDebugging(const char *program, struct ExecParameters *state)
 27 | {
 28 |     union REGS r;
 29 |     struct SREGS sr;
 30 |     r.x.ax = 0x4B01;
 31 |     sr.es  = FP_SEG((void far*)state);
 32 |     r.x.bx = FP_OFF((void far*)state);
 33 |     sr.ds  = FP_SEG((void far*)program);
 34 |     r.x.dx = FP_OFF((void far*)program);
 35 |     intdosx(&r, &r, &sr);
 36 |     if (r.x.cflag)
 37 |         return -1;
 38 |     state->psp_segment = getpsp();
 39 |     setpsp(_psp);
 40 |     return 0;
 41 | }
 42 | 
 43 | void far EnterSingleStep(struct ExecParameters far *state);
 44 | void interrupt CountingSS();
 45 | long far GetCount(void);
 46 | void interrupt EmulatingSS();
 47 | 
 48 | void do_assert_eq(const char* x, unsigned v1, unsigned v2)
 49 | {
 50 |     if (v1 != v2)
 51 |         printf("FAIL: %s - %04X != %04X\n", x, v1, v2);
 52 | }
 53 | 
 54 | #define ASSERT_EQ(word1, word2) do_assert_eq(#word1 ## " == " ## #word2, word1, word2)
 55 | 
 56 | int main(int argc, char** argv)
 57 | {
 58 |     static int i = 0x1234;
 59 |     struct ExecParameters p;
 60 |     char *target;
 61 |     p.env_segment = 0;
 62 |     p.cmdline = MK_FP(_psp, 0x81);
 63 |     p.arg1 = 0;
 64 |     p.arg2 = 0;
 65 | 
 66 |     if (argc == 1)
 67 |     {
 68 |         fputs("Missing program name\n", stderr);
 69 |         return 2;
 70 |     }
 71 |     if (stricmp(argv[1], "/T") == 0)
 72 |     {
 73 |         unsigned old_sp = _SP;
 74 |         unsigned new_sp;
 75 |         unsigned new_bp;
 76 |         unsigned leave_sp;
 77 |         unsigned old_bp = _BP;
 78 |         unsigned leave_bp;
 79 |         unsigned shl_80_3;
 80 |         unsigned char rol_F7_2;
 81 |         unsigned mul_3_4, mul_6_8, mul_2_5, mul_7_11;
 82 |         unsigned tempds, tempes;
 83 |         puts("Test mode.");
 84 |         setvect(0xA1, EmulatingSS);
 85 |         asm {
 86 |             int 0xA1
 87 |             //enter 10, 0
 88 |             db 0xC8, 0x0A, 0x00, 0x00
 89 |             mov     cx, sp
 90 |             mov     dx, bp
 91 |             int 0xA1
 92 |             // push 0
 93 |             db 0x6A, 0x00
 94 |             int 0xA1
 95 |             // push 0x80
 96 |             db 0x68, 0x80, 0x00
 97 |             int 0xA1
 98 |             // shl [WORD PTR bp-14], 3
 99 |             db 0xC1, 0x66, 0xF2, 0x03
100 |             mov     bx, WORD PTR [bp-14]
101 |             //leave
102 |             int 0xA1
103 |             db 0xC9
104 | 
105 |             mov     [new_sp], cx
106 |             mov     [new_bp], dx
107 |             mov     [shl_80_3], bx
108 | 
109 |             mov al, 0xF7
110 |             int 0xA1
111 |             //rol al, 2
112 |             db 0xC0, 0xC0, 2
113 |             mov     [rol_F7_2], al
114 |             int 0xA1
115 |             //ror [i], 3
116 |             db 0xC1, 0x0E
117 |             dw offset i
118 |             db 3
119 | 
120 |             mov     ax, 3
121 |             int 0xA1
122 |             //imul  bx, ax, 4
123 |             db      0x6B, 0xD8, 0x04
124 |             mov     [mul_3_4], bx
125 | 
126 |             mov     bx, 6
127 |             int 0xA1
128 |             // imul ax, bx, 8
129 |             db      0x6B, 0xC3, 0x08
130 |             mov     [mul_6_8], ax
131 | 
132 |             mov     cx, 2
133 |             int 0xA1
134 |             // imul    bx, cx, 5
135 |             db      0x6B, 0xD9, 0x05
136 |             mov     [mul_2_5], bx
137 | 
138 |             mov     ax, 7
139 |             int 0xA1
140 |             //imul    ax, ax, 11
141 |             db      0x6B, 0xC0, 0x0B
142 |             mov     [mul_7_11], ax
143 | 
144 |             mov     ax, es
145 |             int 0xA1
146 |             mov     es, ax
147 | 
148 |             mov     ax, ss
149 |             int 0xA1
150 |             mov     ss, ax
151 | 
152 |             push    ds
153 | 
154 |             push    cs
155 |             int 0xA1
156 |             pop     ds
157 |             mov     [tempds], ds
158 | 
159 |             pop     ds
160 | 
161 |             push    cs
162 |             int 0xA1
163 |             pop     es
164 |             mov     [tempes], es
165 |         }
166 |         leave_bp = _BP;
167 |         leave_sp = _SP;
168 |         ASSERT_EQ(old_sp,      leave_sp);
169 |         ASSERT_EQ(old_sp - 2,  new_bp);
170 |         ASSERT_EQ(old_sp - 12, new_sp);
171 |         ASSERT_EQ(old_bp,      leave_bp);
172 |         ASSERT_EQ(0x80 << 3,   shl_80_3);
173 |         ASSERT_EQ(0xDF,        rol_F7_2);
174 |         ASSERT_EQ((0x1234 >> 3) | 0x8000, i);
175 |         ASSERT_EQ(3*4,         mul_3_4);
176 |         ASSERT_EQ(6*8,         mul_6_8);
177 |         ASSERT_EQ(2*5,         mul_2_5);
178 |         ASSERT_EQ(7*11,        mul_7_11);
179 |         ASSERT_EQ(_CS,         tempds);
180 |         ASSERT_EQ(_CS,         tempes);
181 |         return 0;
182 |     }
183 |     target = searchpath(argv[1]);
184 |     if (target == NULL)
185 |     {
186 |         fprintf(stderr, "%s not found\n", argv[1]);
187 |         return 1;
188 |     }
189 | 
190 |     while(*p.cmdline == ' ')
191 |         p.cmdline++;
192 |     while(*p.cmdline > ' ')
193 |         p.cmdline++;
194 |     p.cmdline--;
195 |     *p.cmdline = *(unsigned char*)(MK_FP(_psp, 0x80)) - (FP_OFF(p.cmdline) - 0x80);
196 |     if (LoadForDebugging(target, &p) < 0)
197 |     {
198 |         fputs("Error\n", stderr);
199 |         return 1;
200 |     }
201 |     setvect(1, EmulatingSS);
202 |     EnterSingleStep(&p);
203 |     //printf("\n%ld steps observed", GetCount());
204 |     return 0;
205 | }


--------------------------------------------------------------------------------
/xtize.prj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karcherm/xtize/a2533f030191b9a408382d0d9f9917db7c2316ec/xtize.prj


--------------------------------------------------------------------------------