├── .gitignore ├── Cargo.toml ├── README.md ├── build.rs ├── justfile ├── old ├── v1 │ ├── bin.h │ ├── build.sh │ ├── decode.c │ ├── dis86.c │ ├── dis86.h │ ├── dis86_private.h │ ├── header.h │ ├── instr.h │ ├── main.c │ ├── oper.h │ ├── operand.h │ ├── print.c │ ├── reg.h │ ├── test.c │ └── test_cases.c └── v2 │ ├── .gitignore │ ├── build.sh │ ├── meson.build │ ├── scripts │ ├── crash_info.py │ ├── instr_encoding_tbl_cvt.py │ └── opcode_tbl_cvt.py │ ├── src │ ├── app │ │ ├── dis86.c │ │ ├── exec_mode.h │ │ ├── exec_mode_decomp.c │ │ ├── exec_mode_dis.c │ │ └── meson.build │ ├── array.h │ ├── binary.h │ ├── cmdarg │ │ ├── cmdarg.c │ │ ├── cmdarg.h │ │ └── meson.build │ ├── core.c │ ├── datamap.c │ ├── datamap.h │ ├── decode.c │ ├── decompile │ │ ├── config.c │ │ ├── config.h │ │ ├── decompile.c │ │ ├── decompile_private.h │ │ ├── expr.c │ │ ├── expr.h │ │ ├── labels.h │ │ ├── symbols.c │ │ ├── symbols.h │ │ ├── transform.c │ │ ├── transform.h │ │ ├── type.c │ │ ├── type.h │ │ ├── util.h │ │ ├── value.c │ │ └── value.h │ ├── dis86.h │ ├── dis86_private.h │ ├── header.h │ ├── instr.c │ ├── instr.h │ ├── instr_tbl.h │ ├── meson.build │ ├── print_intel_syntax.c │ ├── segoff.h │ ├── str.h │ └── test │ │ ├── meson.build │ │ ├── test_datamap.c │ │ ├── test_decode.c │ │ └── test_decode_cases.inc │ ├── subprojects │ └── bsl │ └── test.sh ├── sample ├── func_01.s ├── func_02_old.c ├── func_03_new.c ├── func_04_manual.c └── run.sh ├── src ├── app.rs ├── asm │ ├── decode.rs │ ├── instr.rs │ ├── instr_fmt.rs │ ├── intel_syntax.rs │ └── mod.rs ├── ast.rs ├── bin │ ├── dis86.rs │ ├── mzfile.rs │ └── parse_config.rs ├── binary.rs ├── binfmt │ ├── mod.rs │ └── mz │ │ ├── decode.rs │ │ ├── defs.rs │ │ ├── methods.rs │ │ ├── mod.rs │ │ ├── overlay.rs │ │ ├── print.rs │ │ └── util.rs ├── bsl │ ├── bind.rs │ ├── mod.rs │ └── wrap.rs ├── config.rs ├── control_flow.rs ├── gen.rs ├── ir │ ├── build.rs │ ├── def.rs │ ├── display.rs │ ├── fin.rs │ ├── fuse.rs │ ├── helpers.rs │ ├── mod.rs │ ├── opcode.rs │ ├── opt.rs │ ├── sym.rs │ └── util.rs ├── lib.rs ├── region.rs ├── segoff.rs ├── spec.rs ├── types.rs └── util │ ├── arrayvec.rs │ ├── dvec.rs │ ├── mod.rs │ └── parse.rs └── subprojects └── bsl ├── .gitignore ├── README.md ├── bsl.rs ├── build.sh ├── foo.bsl ├── meson.build ├── src ├── app │ ├── bsl_check.c │ └── meson.build ├── bsl │ ├── bsl.c │ ├── bsl.h │ ├── meson.build │ └── test_bsl.c └── meson.build └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | \#*# 3 | *.dSYM/ 4 | .DS_Store 5 | old/v1 6 | /target 7 | /Cargo.lock 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dis86" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | pico-args = "0.5.0" 10 | itertools = "0.12.1" 11 | static_assertions = "1.1.0" 12 | 13 | [build-dependencies] 14 | cc = "1.0.87" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dis86 2 | 3 | Dis86 is a decompiler for 16-bit real-mode x86 DOS binaries. 4 | 5 | # Purpose 6 | 7 | Dis86 has been built for doing reverse-engineering work such as 8 | analyzing and re-implementing old DOS video games from the early 1990s. The project is a work-in-progress and the development team makes no guarantees 9 | it will work or be useful out-of-the-box for any applications other than their own. Features and improvements are made on-demand as needed. 10 | 11 | ## Goals and Non-goals 12 | 13 | Goals: 14 | 15 | - Support reverse-engineering 16-bit real-mode x86 DOS binaries 16 | - Generate code that is semantically correct (in so far as practical) 17 | - Generate code that integrates will with a hybrid-runtime system (Hydra) [currently unreleased] 18 | - Avoid making many assumptions or using heuristics that can lead to broken decompiled code 19 | - Be hackable and easy to extend as required 20 | - Automate away common manual transformations and let a human reverser focus on the subjective tasks a computer cannot do well (e.g. naming things) 21 | 22 | Non-goals: 23 | 24 | - Output code beauty (semantic correctness is more important) 25 | - Re-compilable to equivalent binaries 26 | 27 | Also, we generally prefer manual configuration/annotation tables to flawed heuristics that will generate incorrect code. 28 | 29 | ## Discussion of Internals 30 | 31 | Discussion of the internals will be published periodically on the author's blog: [xorvoid](https://www.xorvoid.com) 32 | 33 | ## Building 34 | 35 | Assuming you have rust and cargo installed: 36 | 37 | ``` 38 | just build 39 | ``` 40 | 41 | ## Some Commands 42 | 43 | Emit Disassembly: 44 | 45 | ``` 46 | ./target/debug/dis86 --config --binary --name --emit-dis 47 | ``` 48 | 49 | Emit initial Intermediate Representation (IR): 50 | 51 | ``` 52 | ./target/debug/dis86 --config --binary --name --emit-ir-initial 53 | ``` 54 | 55 | Emit final (optimized) Intermediate Representation (IR): 56 | 57 | ``` 58 | ./target/debug/dis86 --config --binary --name --emit-ir-final 59 | ``` 60 | 61 | Visualize the control-flow graph with graphviz: 62 | 63 | ``` 64 | ./target/debug/dis86 --config --binary --name --emit-graph /tmp/ctrlflow.dot 65 | dot -Tpng /tmp/ctrlflow.dot > /tmp/control_flow_graph.png 66 | open /tmp/control_flow_graph.png 67 | ``` 68 | 69 | Emit inferred higher-level control-flow structure: 70 | 71 | ``` 72 | ./target/debug/dis86 --config --binary --name --emit-ctrlflow 73 | ``` 74 | 75 | Emit an Abstract Syntax Tree (AST): 76 | 77 | ``` 78 | ./target/debug/dis86 --config --binary --name --emit-ast 79 | ``` 80 | 81 | Emit C code: 82 | 83 | ``` 84 | ./target/debug/dis86 --config --binary --name --emit-code 85 | ``` 86 | 87 | ## Caveats & Limitations 88 | 89 | Primary development goal is to support an ongoing reverse-engineering and reimplementation project. The decompiler is also designed to 90 | emit code that integrates well with a hybrid runtime system (called Hydra) that is used to run a partially-decompiled / reimplemented 91 | project. As such, uses that fall out of this scope have been unconsidered and may have numerous unknown issues. 92 | 93 | Some specific known limitations: 94 | 95 | - The decompiler accepts only a flat binary region for the text segment. It doesn't handle common binary file-formats (e.g. MZ) at the moment. 96 | - Handling of many 8086 opcodes are unimplemented in the assembly->ir build step. Implementations are added as needed. 97 | - Handling of some IR ops are unimplemented in the ir->ast convert step. Implementations are added as needed. 98 | - Control-flow synthesis is limited to while-loops, if-stmts, and switch-stmts. If-else is unimplemented. 99 | - Block scheduling and placement is very unoptimal for more complicated control-flow. 100 | - ... and many more ... 101 | 102 | ## Future Plans / Wishlist 103 | 104 | Feature wishlist 105 | 106 | - Array accesses 107 | - Compound types (struct and unions) 108 | - Synthesizing struct/union member access 109 | - If-else statements 110 | - Pointer analysis and arithmetic 111 | - More "u16 pair -> u32" fusing 112 | - Improved type-aware IR 113 | - Less verbose output C code patterns for common operations (e.g. passing pointer as a function call arg) 114 | 115 | ## Prehistoric versions 116 | 117 | Dis86 began life as a simple disassembler and 1-to-1 instruction => C-statement decompiler that integrated well with the Hydra Runtime. Over time it gained complexity and it became difficult to implement more 118 | sophisticated transformations. So, it was rebuilt and rearchitected with a proper SSA IR. 119 | 120 | The older versions remain in the repo under `old/`. In particular, `old/v2` was much less sophisticated albeit more complete in terms of the input machine-code it could handle. 121 | 122 | These versions remain as sometimes they are still useful when the latest version is missing some feature. 123 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rerun-if-changed=subprojects/bsl/src/bsl/bsl.c"); 3 | println!("cargo:rerun-if-changed=subprojects/bsl/src/bsl/bsl.h"); 4 | 5 | cc::Build::new() 6 | .cargo_warnings(false) 7 | .file("subprojects/bsl/src/bsl/bsl.c") 8 | .compile("bsl_c"); 9 | } 10 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env just --justfile 2 | 3 | # List all available recipies 4 | list: 5 | just --list 6 | 7 | # Edit the justfile 8 | edit: 9 | emacs -nw justfile 10 | 11 | # Build the repository 12 | build: 13 | cargo build 14 | 15 | # Test the repository 16 | test *opts: 17 | cargo test {{opts}} 18 | 19 | # Show control-flow-graph using graphviz 20 | vis name: 21 | just run {{name}} --emit-graph /tmp/ctrlflow.dot && dot -Tpng /tmp/ctrlflow.dot > /tmp/control_flow_graph.png && open /tmp/control_flow_graph.png 22 | 23 | # A temporary command for dev build-test-cycle 24 | run name *opts: build 25 | ./target/debug/dis86 --config ../gizmo/build/src/hydra/dis86_config.bsl --binary-exe ../gizmo/ISO/ssg.exe --name {{name}} {{opts}} 26 | 27 | # A temporary command for dev build-test-cycle 28 | run-old: 29 | ./old/v2/build/src/app/dis86 decomp --config ../gizmo/build/src/hydra/dis86_config.bsl --binary-exe ../gizmo/ISO/ssg.exe --start-addr 0622:0922 --end-addr 0622:09e5 30 | 31 | rundiff a b: build 32 | just run --emit-{{a}} /tmp/a 33 | just run --emit-{{b}} /tmp/b 34 | opendiff /tmp/a /tmp/b -------------------------------------------------------------------------------- /old/v1/bin.h: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | 3 | typedef struct bin bin_t; 4 | struct bin 5 | { 6 | u8 * mem; 7 | size_t len; 8 | size_t idx; 9 | size_t base_addr; 10 | }; 11 | 12 | static inline void bin_init(bin_t *b, size_t base_addr, char *mem, size_t len) 13 | { 14 | b->mem = malloc(len); 15 | memcpy(b->mem, mem, len); 16 | b->len = len; 17 | b->idx = base_addr; 18 | b->base_addr = base_addr; 19 | } 20 | 21 | static inline u8 bin_byte_at(bin_t *b, size_t idx) 22 | { 23 | if (idx < b->base_addr) FAIL("Binary access below start of region"); 24 | if (idx >= b->base_addr + b->len) FAIL("Binary access beyond end of region"); 25 | return b->mem[idx - b->base_addr]; 26 | } 27 | 28 | static inline u8 bin_fetch_u8(bin_t *b) 29 | { 30 | u8 byte = bin_byte_at(b, b->idx); 31 | b->idx++; 32 | return byte; 33 | } 34 | 35 | static inline u16 bin_fetch_u16(bin_t *b) 36 | { 37 | u8 low = bin_fetch_u8(b); 38 | u8 high = bin_fetch_u8(b); 39 | return (u16)high << 8 | (u16)low; 40 | } 41 | 42 | static inline size_t bin_baseaddr(bin_t *b) 43 | { 44 | return b->base_addr; 45 | } 46 | 47 | static inline size_t bin_location(bin_t *b) 48 | { 49 | return b->idx; 50 | } 51 | 52 | static inline size_t bin_length(bin_t *b) 53 | { 54 | return b->len; 55 | } 56 | 57 | static inline void bin_dump(bin_t *b) 58 | { 59 | printf("BIN DUMP LOCATION %zx: ", b->idx); 60 | size_t end = MIN(b->idx + 16, b->base_addr + b->len); 61 | for (size_t idx = b->idx; idx < end; idx++) { 62 | printf("%02x ", bin_byte_at(b, idx)); 63 | } 64 | printf("\n"); 65 | } 66 | -------------------------------------------------------------------------------- /old/v1/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | CFLAGS="-std=c99 -Wall -Werror -Wno-unused-variable -Wno-unused-function -O2 -g" 5 | 6 | LIB_SRC="dis86.c decode.c print.c" 7 | 8 | SRC="main.c $LIB_SRC" 9 | clang $CFLAGS -o dis86 $SRC 10 | 11 | SRC="test.c $LIB_SRC" 12 | clang $CFLAGS -o test $SRC 13 | -------------------------------------------------------------------------------- /old/v1/dis86.c: -------------------------------------------------------------------------------- 1 | #include "dis86_private.h" 2 | 3 | dis86_t *dis86_new(size_t base_addr, char *mem, size_t mem_sz) 4 | { 5 | dis86_t *d = calloc(1, sizeof(dis86_t)); 6 | bin_init(d->b, base_addr, mem, mem_sz); 7 | return d; 8 | } 9 | 10 | void dis86_delete(dis86_t *d) 11 | { 12 | free(d); 13 | } 14 | 15 | size_t dis86_position(dis86_t *d) 16 | { 17 | return bin_location(d->b); 18 | } 19 | 20 | size_t dis86_baseaddr(dis86_t *d) 21 | { 22 | return bin_baseaddr(d->b); 23 | } 24 | 25 | size_t dis86_length(dis86_t *d) 26 | { 27 | return bin_length(d->b); 28 | } 29 | -------------------------------------------------------------------------------- /old/v1/dis86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | 9 | typedef struct dis86 dis86_t; 10 | typedef struct dis86_instr dis86_instr_t; 11 | 12 | /* Create new instance: deep copies the memory */ 13 | dis86_t *dis86_new(size_t base_addr, char *mem, size_t mem_sz); 14 | 15 | /* Destroys an instance */ 16 | void dis86_delete(dis86_t *d); 17 | 18 | /* Get next instruction */ 19 | dis86_instr_t *dis86_next(dis86_t *d, size_t *addr, size_t *n_bytes); 20 | 21 | /* Get Position */ 22 | size_t dis86_position(dis86_t *d); 23 | 24 | /* Get Baseaddr */ 25 | size_t dis86_baseaddr(dis86_t *d); 26 | 27 | /* Get Length */ 28 | size_t dis86_length(dis86_t *d); 29 | 30 | /* Print */ 31 | char *dis86_print_intel_syntax(dis86_t *d, dis86_instr_t *ins, size_t addr, size_t n_bytes, bool with_detail); 32 | 33 | /* Print as C Code Equivalent */ 34 | char *dis86_print_c_code(dis86_t *d, dis86_instr_t *ins, size_t addr, size_t n_bytes); 35 | 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | -------------------------------------------------------------------------------- /old/v1/dis86_private.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dis86.h" 4 | #include "header.h" 5 | #include "bin.h" 6 | #include "instr.h" 7 | 8 | struct dis86 9 | { 10 | bin_t b[1]; 11 | dis86_instr_t ins[1]; 12 | }; 13 | -------------------------------------------------------------------------------- /old/v1/header.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | typedef uint8_t u8; 9 | typedef int8_t i8; 10 | typedef uint16_t u16; 11 | typedef int16_t i16; 12 | typedef uint32_t u32; 13 | typedef int32_t i32; 14 | typedef uint64_t u64; 15 | typedef int64_t i64; 16 | 17 | //static inline void bin_dump_and_abort(); 18 | 19 | #define MIN(a, b) (((a)<(b))?(a):(b)) 20 | #define MAX(a, b) (((a)>(b))?(a):(b)) 21 | #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof((arr)[0])) 22 | #define FAIL(...) do { fprintf(stderr, "FAIL: "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(42); } while(0) 23 | 24 | static inline char *read_file(const char *filename, size_t *out_sz) 25 | { 26 | FILE *fp = fopen(filename, "r"); 27 | if (!fp) FAIL("Failed to open: '%s'", filename); 28 | 29 | fseek(fp, 0, SEEK_END); 30 | size_t file_sz = ftell(fp); 31 | fseek(fp, 0, SEEK_SET); 32 | 33 | char *mem = malloc(file_sz); 34 | if (!mem) FAIL("Failed to allocate"); 35 | 36 | size_t n = fread(mem, 1, file_sz, fp); 37 | if (n != file_sz) FAIL("Failed to read"); 38 | fclose(fp); 39 | 40 | if (out_sz) *out_sz = file_sz; 41 | return mem; 42 | } 43 | 44 | static inline void hexdump(u8 *mem, size_t len) 45 | { 46 | size_t idx = 0; 47 | while (idx < len) { 48 | size_t line_end = MIN(idx+16, len); 49 | for (; idx < line_end; idx++) { 50 | printf("%02x ", mem[idx]); 51 | } 52 | printf("\n"); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /old/v1/instr.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | #include "operand.h" 4 | #include "reg.h" 5 | #include "oper.h" 6 | 7 | #define REP_NONE 0 8 | #define REP_EQ 1 9 | #define REP_NE 2 10 | 11 | #define SIZE_FLAG_NONE 0 12 | #define SIZE_FLAG_8 1 13 | #define SIZE_FLAG_16 2 14 | #define SIZE_FLAG_32 3 15 | 16 | enum { 17 | MODE_BX_PLUS_SI, 18 | MODE_BX_PLUS_DI, 19 | MODE_BP_PLUS_SI, 20 | MODE_BP_PLUS_DI, 21 | MODE_SI, 22 | MODE_DI, 23 | MODE_BP, 24 | MODE_BX, 25 | }; 26 | 27 | struct dis86_instr 28 | { 29 | int rep; 30 | int opcode; /* operation enum (not 8086 opcode) */ 31 | int size_flag; /* SIZE_FLAG_* */ 32 | operand_t operand[2]; /* operands */ 33 | }; 34 | -------------------------------------------------------------------------------- /old/v1/main.c: -------------------------------------------------------------------------------- 1 | #include "dis86.h" 2 | #include "dis86_private.h" 3 | 4 | static dis86_t *dis_exit = NULL; 5 | static void on_fail() 6 | { 7 | if (!dis_exit) return; 8 | bin_dump(dis_exit->b); 9 | } 10 | 11 | typedef struct segoff segoff_t; 12 | struct segoff 13 | { 14 | u16 seg; 15 | u16 off; 16 | }; 17 | 18 | static u16 parse_hex_u16(const char *s, size_t len) 19 | { 20 | if (len > 4) FAIL("Hex string too long to fit in u16"); 21 | 22 | u16 ret = 0; 23 | for (size_t i = 0; i < len; i++) { 24 | char c = s[i]; 25 | if ('0' <= c && c <= '9') { 26 | ret = ret*16 + (c-'0'); 27 | } else if ('a' <= c && c <= 'f') { 28 | ret = ret*16 + (c-'a'+10); 29 | } else if ('A' <= c && c <= 'F') { 30 | ret = ret*16 + (c-'A'+10); 31 | } else { 32 | FAIL("Invalid hex string: '%.*s'", (int)len, s); 33 | } 34 | } 35 | 36 | return ret; 37 | } 38 | 39 | static segoff_t parse_segoff(const char *s) 40 | { 41 | const char *end = s + strlen(s); 42 | 43 | const char *colon = strchr(s, ':'); 44 | if (!colon) FAIL("Invalid segoff: '%s'", s); 45 | 46 | segoff_t ret; 47 | ret.seg = parse_hex_u16(s, colon-s); 48 | ret.off = parse_hex_u16(colon+1, end-(colon+1)); 49 | return ret; 50 | } 51 | 52 | static size_t segoff_abs(segoff_t s) 53 | { 54 | return (size_t)s.seg * 16 + (size_t)s.off; 55 | } 56 | 57 | int main(int argc, char *argv[]) 58 | { 59 | atexit(on_fail); 60 | 61 | if (argc != 4) { 62 | fprintf(stderr, "usage: %s \n", argv[0]); 63 | return 1; 64 | } 65 | const char *filename = argv[1]; 66 | segoff_t start = parse_segoff(argv[2]); 67 | segoff_t end = parse_segoff(argv[3]); 68 | 69 | size_t start_idx = segoff_abs(start); 70 | size_t end_idx = segoff_abs(end); 71 | 72 | size_t mem_sz = 0; 73 | char *mem = read_file(filename, &mem_sz); 74 | 75 | /* printf("start_idx: %zu\n", start_idx); */ 76 | /* printf("end_idx: %zu\n", end_idx); */ 77 | /* hexdump((u8*)mem+start_idx, end_idx-start_idx); */ 78 | /* exit(42); */ 79 | 80 | char *region = &mem[start_idx]; 81 | size_t region_sz = end_idx - start_idx; 82 | 83 | dis86_t *d = dis86_new(start_idx, region, region_sz); 84 | if (!d) FAIL("Failed to allocate dis86 instance"); 85 | free(mem); 86 | dis_exit = d; 87 | 88 | char *s; 89 | while (1) { 90 | size_t addr, n_bytes; 91 | dis86_instr_t *ins = dis86_next(d, &addr, &n_bytes); 92 | if (!ins) break; 93 | 94 | s = dis86_print_c_code(d, ins, addr, n_bytes); 95 | printf("%-30s // ", s); 96 | free(s); 97 | 98 | s = dis86_print_intel_syntax(d, ins, addr, n_bytes, false); 99 | printf("%s\n", s); 100 | free(s); 101 | } 102 | 103 | dis_exit = NULL; 104 | dis86_delete(d); 105 | return 0; 106 | } 107 | -------------------------------------------------------------------------------- /old/v1/oper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | #define OPER_C_UNAVAIL 0 5 | #define OPER_C_FUNC 1 6 | #define OPER_C_RFUNC 2 7 | #define OPER_C_INFIX 3 8 | #define OPER_C_LITERAL 4 9 | 10 | #define OPERATIONS(_) \ 11 | _( OP_INVAL, "inval", "", OPER_C_UNAVAIL ) \ 12 | _( OP_ADC, "adc", "", OPER_C_UNAVAIL ) \ 13 | _( OP_ADD, "add", "+=", OPER_C_INFIX ) \ 14 | _( OP_AND, "and", "&=", OPER_C_INFIX ) \ 15 | _( OP_CALL, "call", "CALL", OPER_C_FUNC ) \ 16 | _( OP_CLI, "cli", "", OPER_C_UNAVAIL ) \ 17 | _( OP_CLD, "cld", "", OPER_C_UNAVAIL ) \ 18 | _( OP_CMP, "cmp", "", OPER_C_UNAVAIL ) \ 19 | _( OP_DEC, "dec", "-= 1", OPER_C_INFIX ) \ 20 | _( OP_DIV, "div", "", OPER_C_UNAVAIL ) \ 21 | _( OP_HLT, "hlt", "", OPER_C_UNAVAIL ) \ 22 | _( OP_IDIV, "idiv", "", OPER_C_UNAVAIL ) \ 23 | _( OP_IMUL, "imul", "", OPER_C_UNAVAIL ) \ 24 | _( OP_IN, "in", "", OPER_C_UNAVAIL ) \ 25 | _( OP_INC, "inc", "+= 1", OPER_C_INFIX ) \ 26 | _( OP_INT, "int", "", OPER_C_UNAVAIL ) \ 27 | _( OP_IRET, "iret", "", OPER_C_UNAVAIL ) \ 28 | _( OP_JA, "ja", "", OPER_C_UNAVAIL ) \ 29 | _( OP_JAE, "jae", "", OPER_C_UNAVAIL ) \ 30 | _( OP_JB, "jb", "", OPER_C_UNAVAIL ) \ 31 | _( OP_JBE, "jbe", "", OPER_C_UNAVAIL ) \ 32 | _( OP_JCXZ, "jcxz", "", OPER_C_UNAVAIL ) \ 33 | _( OP_JE, "je", "", OPER_C_UNAVAIL ) \ 34 | _( OP_JG, "jg", "", OPER_C_UNAVAIL ) \ 35 | _( OP_JGE, "jge", "", OPER_C_UNAVAIL ) \ 36 | _( OP_JL, "jl", "", OPER_C_UNAVAIL ) \ 37 | _( OP_JLE, "jle", "", OPER_C_UNAVAIL ) \ 38 | _( OP_JMP, "jmp", "", OPER_C_UNAVAIL ) \ 39 | _( OP_JNE, "jne", "", OPER_C_UNAVAIL ) \ 40 | _( OP_JZ, "jz", "", OPER_C_UNAVAIL ) \ 41 | _( OP_LDS, "lds", "", OPER_C_UNAVAIL ) \ 42 | _( OP_LEAVE, "leave", "sp = bp; bp = POP()", OPER_C_LITERAL ) \ 43 | _( OP_LES, "les", "", OPER_C_UNAVAIL ) \ 44 | _( OP_MOV, "mov", "=", OPER_C_INFIX ) \ 45 | _( OP_MUL, "mul", "", OPER_C_UNAVAIL ) \ 46 | _( OP_NEG, "neg", "", OPER_C_UNAVAIL ) \ 47 | _( OP_NOP, "nop", "", OPER_C_UNAVAIL ) \ 48 | _( OP_NOT, "not", "", OPER_C_UNAVAIL ) \ 49 | _( OP_OR, "or", "|=", OPER_C_INFIX ) \ 50 | _( OP_OUT, "out", "", OPER_C_UNAVAIL ) \ 51 | _( OP_POP, "pop", "POP", OPER_C_RFUNC ) \ 52 | _( OP_PUSH, "push", "PUSH", OPER_C_FUNC ) \ 53 | _( OP_REP, "rep", "", OPER_C_UNAVAIL ) \ 54 | _( OP_REPNE, "repne", "", OPER_C_UNAVAIL ) \ 55 | _( OP_RET, "ret", "return", OPER_C_LITERAL ) \ 56 | _( OP_RETF, "retf", "", OPER_C_UNAVAIL ) \ 57 | _( OP_ROL, "rol", "", OPER_C_UNAVAIL ) \ 58 | _( OP_ROR, "ror", "", OPER_C_UNAVAIL ) \ 59 | _( OP_SBB, "sbb", "", OPER_C_UNAVAIL ) \ 60 | _( OP_SCAS, "scas", "", OPER_C_UNAVAIL ) \ 61 | _( OP_SAR, "sar", "", OPER_C_UNAVAIL ) \ 62 | _( OP_SHL, "shl", "<<=", OPER_C_INFIX ) \ 63 | _( OP_SHR, "shr", ">>=", OPER_C_INFIX ) \ 64 | _( OP_STD, "std", "", OPER_C_UNAVAIL ) \ 65 | _( OP_STI, "sti", "", OPER_C_UNAVAIL ) \ 66 | _( OP_STOS, "stos", "", OPER_C_UNAVAIL ) \ 67 | _( OP_SUB, "sub", "-=", OPER_C_INFIX ) \ 68 | _( OP_XOR, "xor", "^=", OPER_C_INFIX ) \ 69 | 70 | enum { 71 | #define ELT(x, _1, _2, _3) x, 72 | OPERATIONS(ELT) 73 | #undef ELT 74 | }; 75 | 76 | static inline const char *opcode_str(int op) 77 | { 78 | static const char *str[] = { 79 | #define ELT(_1, x, _2, _3) x, 80 | OPERATIONS(ELT) 81 | #undef ELT 82 | }; 83 | if (op >= ARRAY_SIZE(str)) FAIL("Invalid operation number: %u", op); 84 | return str[op]; 85 | } 86 | 87 | static inline const char *opcode_c(int op) 88 | { 89 | static const char *str[] = { 90 | #define ELT(_1, _2, x, _3) x, 91 | OPERATIONS(ELT) 92 | #undef ELT 93 | }; 94 | if (op >= ARRAY_SIZE(str)) FAIL("Invalid operation number: %u", op); 95 | return str[op]; 96 | } 97 | 98 | static inline int opcode_c_type(int op) 99 | { 100 | static int ty[] = { 101 | #define ELT(_1, _2, _3, x) x, 102 | OPERATIONS(ELT) 103 | #undef ELT 104 | }; 105 | if (op >= ARRAY_SIZE(ty)) FAIL("Invalid operation number: %u", op); 106 | return ty[op]; 107 | } 108 | -------------------------------------------------------------------------------- /old/v1/operand.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | enum { 5 | OPERAND_TYPE_NONE, 6 | OPERAND_TYPE_VAL, 7 | OPERAND_TYPE_ADDR, 8 | }; 9 | 10 | typedef struct operand operand_t; 11 | struct operand { 12 | int type; 13 | int has_mode : 1; 14 | int has_reg : 1; 15 | int has_sreg : 1; 16 | int has_imm : 1; 17 | int has_rel : 1; 18 | int has_abs32 : 1; 19 | int has_seg_override : 1; 20 | int force_reg8 : 1; 21 | 22 | int mode; 23 | u8 reg; 24 | u8 sreg; 25 | u8 seg_override; 26 | u16 imm; 27 | u16 rel; 28 | u32 abs32; 29 | }; 30 | 31 | static inline operand_t operand_reg(u8 reg) 32 | { 33 | operand_t operand[1] = {{}}; 34 | operand->type = OPERAND_TYPE_VAL; 35 | operand->has_reg = 1; 36 | operand->reg = reg; 37 | return operand[0]; 38 | } 39 | 40 | static inline operand_t operand_sreg(u8 sreg) 41 | { 42 | operand_t operand[1] = {{}}; 43 | operand->type = OPERAND_TYPE_VAL; 44 | operand->has_sreg = 1; 45 | operand->sreg = sreg; 46 | return operand[0]; 47 | } 48 | 49 | static inline operand_t operand_imm(u16 imm) 50 | { 51 | operand_t operand[1] = {{}}; 52 | operand->type = OPERAND_TYPE_VAL; 53 | operand->has_imm = 1; 54 | operand->imm = imm; 55 | return operand[0]; 56 | } 57 | 58 | static inline operand_t operand_rel16(u16 rel) 59 | { 60 | operand_t operand[1] = {{}}; 61 | operand->type = OPERAND_TYPE_VAL; 62 | operand->has_rel = 1; 63 | operand->rel = rel; 64 | return operand[0]; 65 | } 66 | 67 | static inline operand_t operand_rel8(u8 rel) 68 | { 69 | operand_t operand[1] = {{}}; 70 | operand->type = OPERAND_TYPE_VAL; 71 | operand->has_rel = 1; 72 | operand->rel = (u16)(i16)(i8)rel; 73 | return operand[0]; 74 | } 75 | 76 | static inline operand_t operand_abs32(u32 abs32) 77 | { 78 | operand_t operand[1] = {{}}; 79 | operand->type = OPERAND_TYPE_VAL; 80 | operand->has_abs32 = 1; 81 | operand->abs32 = abs32; 82 | return operand[0]; 83 | } 84 | 85 | static inline operand_t operand_addr_imm(u16 imm, int has_seg, u8 seg) 86 | { 87 | operand_t operand[1] = {{}}; 88 | operand->type = OPERAND_TYPE_ADDR; 89 | operand->has_imm = 1; 90 | operand->has_seg_override = has_seg; 91 | operand->imm = imm; 92 | operand->seg_override = seg; 93 | return operand[0]; 94 | } 95 | 96 | static inline operand_t operand_addr_reg(u8 seg, u8 reg) 97 | { 98 | operand_t operand[1] = {{}}; 99 | operand->type = OPERAND_TYPE_ADDR; 100 | operand->has_reg = 1; 101 | operand->has_seg_override = 1; 102 | operand->reg = reg; 103 | operand->seg_override = seg; 104 | return operand[0]; 105 | } 106 | 107 | static inline operand_t operand_addr_mode(int mode, int has_seg, u8 seg) 108 | { 109 | operand_t operand[1] = {{}}; 110 | operand->type = OPERAND_TYPE_ADDR; 111 | operand->has_mode = 1; 112 | operand->has_seg_override = 1; 113 | operand->mode = mode; 114 | operand->seg_override = seg; 115 | return operand[0]; 116 | } 117 | 118 | static inline operand_t operand_addr_mode_imm(int mode, u16 imm, int has_seg, u8 seg) 119 | { 120 | operand_t operand[1] = {{}}; 121 | operand->type = OPERAND_TYPE_ADDR; 122 | operand->has_mode = 1; 123 | operand->has_imm = 1; 124 | operand->has_seg_override = 1; 125 | operand->mode = mode; 126 | operand->imm = imm; 127 | operand->seg_override = seg; 128 | return operand[0]; 129 | } 130 | -------------------------------------------------------------------------------- /old/v1/reg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | enum { 5 | REG8_AL = 0, 6 | REG8_CL, 7 | REG8_DL, 8 | REG8_BL, 9 | REG8_AH, 10 | REG8_CH, 11 | REG8_DH, 12 | REG8_BH, 13 | }; 14 | 15 | enum { 16 | REG16_AX = 0, 17 | REG16_CX, 18 | REG16_DX, 19 | REG16_BX, 20 | REG16_SP, 21 | REG16_BP, 22 | REG16_SI, 23 | REG16_DI, 24 | }; 25 | 26 | enum { 27 | SREG_ES = 0, 28 | SREG_CS, 29 | SREG_SS, 30 | SREG_DS, 31 | SREG_INVAL4, 32 | SREG_INVAL5, 33 | SREG_INVAL6, 34 | SREG_INVAL7, 35 | }; 36 | 37 | static inline const char *reg8_str(u8 r) 38 | { 39 | static const char *REG8[8] = { 40 | /* 0 */ "al", 41 | /* 1 */ "cl", 42 | /* 2 */ "dl", 43 | /* 3 */ "bl", 44 | /* 4 */ "ah", 45 | /* 5 */ "ch", 46 | /* 6 */ "dh", 47 | /* 7 */ "bh", 48 | }; 49 | if (r >= ARRAY_SIZE(REG8)) FAIL("Invalid 8-bit register number: %u", r); 50 | return REG8[r]; 51 | } 52 | 53 | static inline const char *reg16_str(u8 r) 54 | { 55 | static const char *REG16[8] = { 56 | /* 0 */ "ax", 57 | /* 1 */ "cx", 58 | /* 2 */ "dx", 59 | /* 3 */ "bx", 60 | /* 4 */ "sp", 61 | /* 5 */ "bp", 62 | /* 6 */ "si", 63 | /* 7 */ "di", 64 | }; 65 | if (r >= ARRAY_SIZE(REG16)) FAIL("Invalid 16-bit register number: %u", r); 66 | return REG16[r]; 67 | } 68 | 69 | static inline const char *sreg_str(u8 r) 70 | { 71 | static const char *SREG[8] = { 72 | /* 0 */ "es", 73 | /* 1 */ "cs", 74 | /* 2 */ "ss", 75 | /* 3 */ "ds", 76 | /* 4 */ 0, 77 | /* 5 */ 0, 78 | /* 6 */ 0, 79 | /* 7 */ 0, 80 | }; 81 | if (r >= ARRAY_SIZE(SREG)) FAIL("Invalid segment register number: %u", r); 82 | 83 | const char *s = SREG[r]; 84 | if (!s) FAIL("Invalid segment register number: %u", r); 85 | 86 | return s; 87 | } 88 | -------------------------------------------------------------------------------- /old/v1/test.c: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | #include "dis86.h" 3 | 4 | typedef struct binary_data binary_data_t; 5 | struct binary_data 6 | { 7 | uint8_t n_mem; 8 | uint8_t mem[16]; 9 | }; 10 | 11 | typedef struct test test_t; 12 | struct test 13 | { 14 | uint32_t address; 15 | binary_data_t data; 16 | const char * code; 17 | }; 18 | 19 | #define TEST(...) __VA_ARGS__, 20 | 21 | static test_t TESTS[] = { 22 | #include "test_cases.c" 23 | }; 24 | 25 | static int run_test(size_t num, bool verbose) 26 | { 27 | if (num >= ARRAY_SIZE(TESTS)) { 28 | FAIL("Invalid test number: %zu", num); 29 | } 30 | 31 | test_t *t = &TESTS[num]; 32 | printf("TEST %zu: %-40s | ", num, t->code); 33 | fflush(stdout); 34 | 35 | dis86_t *d = dis86_new(t->address, (char*)t->data.mem, t->data.n_mem); 36 | if (!d) FAIL("Failed to allocate instance"); 37 | 38 | size_t addr, n_bytes; 39 | dis86_instr_t *ins = dis86_next(d, &addr, &n_bytes); 40 | if (!ins) FAIL("Failed to decode instruction"); 41 | 42 | char *s = dis86_print_intel_syntax(d, ins, addr, n_bytes, false); 43 | bool pass = (0 == strcmp(s, t->code)); 44 | printf("%s", pass ? "PASS" : "FAIL"); 45 | printf(" | '%s'\n", s); 46 | free(s); 47 | 48 | if (verbose) { 49 | printf("ADDRESS: 0x%08x\n", t->address); 50 | printf("BINARY DATA: "); 51 | for (size_t i = 0; i < t->data.n_mem; i++) { 52 | printf("%02x ", t->data.mem[i]); 53 | } 54 | printf("\n"); 55 | } 56 | 57 | // Did we consume all of the input? 58 | assert(dis86_position(d) == dis86_baseaddr(d) + dis86_length(d)); 59 | 60 | dis86_delete(d); 61 | return 0; 62 | } 63 | 64 | static int run_all() 65 | { 66 | for (size_t i = 0; i < ARRAY_SIZE(TESTS); i++) { 67 | run_test(i, false); 68 | } 69 | return 0; 70 | } 71 | 72 | int main(int argc, char *argv[]) 73 | { 74 | if (argc > 2) { 75 | fprintf(stderr, "usage: %s []\n", argv[0]); 76 | return 1; 77 | } 78 | 79 | if (argc >= 2) { 80 | return run_test(atoi(argv[1]), true); 81 | } else { 82 | return run_all(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /old/v2/.gitignore: -------------------------------------------------------------------------------- 1 | dis86_config.bsl 2 | run.sh -------------------------------------------------------------------------------- /old/v2/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | THISDIR=$(dirname $(realpath $0)) 4 | cd $THISDIR 5 | 6 | if [ ! -d build ]; then 7 | meson build 8 | fi 9 | 10 | (cd build && ninja $@) 11 | -------------------------------------------------------------------------------- /old/v2/meson.build: -------------------------------------------------------------------------------- 1 | project('dis86', 'c') 2 | 3 | ################################## 4 | ## Common flags 5 | 6 | flags_c = [ 7 | '-std=c11', 8 | #'-fvisibility=hidden', 9 | ] 10 | 11 | flags_cpp = [ 12 | '-std=c++14', 13 | '-fno-exceptions', '-fno-rtti', 14 | #'-fvisibility=hidden', '-fvisibility-inlines-hidden', 15 | ] 16 | 17 | flags_common = [ 18 | '-fdiagnostics-color=always', 19 | '-D_GNU_SOURCE', 20 | '-Dtypeof=__typeof__', 21 | '-I' + meson.source_root() + '/src', 22 | '-g', '-fPIC', 23 | '-m64', 24 | '-ffast-math', '-fno-associative-math', '-fno-reciprocal-math', 25 | '-fno-strict-aliasing', 26 | ] 27 | 28 | flags_warn = [ 29 | ## Warning enables 30 | '-Wall', '-Werror', '-Wextra', 31 | 32 | ## Warning disables: annoying 33 | '-Wno-unused-function', '-Wno-unused-parameter', 34 | 35 | ## Warning disables: Complains about 'Type t[1] = {{0}}' 36 | '-Wno-missing-field-initializers', 37 | 38 | ## Warning disables: Complains about 'if (val < 0 || val >= LIMIT)' when val is unsigned 39 | '-Wno-type-limits', 40 | 41 | ## Warning disables: Complains about macros that expand to empty if-else bodies 42 | '-Wno-empty-body', 43 | ] 44 | 45 | flags_release = [ 46 | ## Optimization 47 | '-O2', 48 | 49 | ## Warning disables: 50 | '-Wno-unused-variable', '-Wno-unused-but-set-variable', 51 | 52 | ## Warning disables: Seriously, no comments in comments? 53 | '-Wno-comments', 54 | ] 55 | 56 | flags_debug = [ 57 | ] 58 | 59 | link_flags_common = [ 60 | '-fdiagnostics-color=always', 61 | #'-fvisibility=hidden', '-fvisibility-inlines-hidden', 62 | '-lpthread', '-m64' 63 | ] 64 | 65 | add_global_arguments(flags_c + flags_common + flags_warn, language : 'c') 66 | add_global_arguments(flags_cpp + flags_common + flags_warn, language : 'cpp') 67 | 68 | add_global_link_arguments(link_flags_common, language : 'c') 69 | add_global_link_arguments(link_flags_common, language : 'cpp') 70 | 71 | ## Release 72 | add_global_arguments(flags_release, language : 'c') 73 | add_global_arguments(flags_release, language : 'cpp') 74 | 75 | ## Debug 76 | # add_global_arguments(flags_debug, language : 'c') 77 | # add_global_arguments(flags_debug, language : 'cpp') 78 | 79 | ################################## 80 | ## Subprojects 81 | 82 | bsl = subproject('bsl') 83 | libbsl_dep = bsl.get_variable('libbsl_dep') 84 | 85 | ################################## 86 | ## Definitions 87 | 88 | subdir('src') 89 | -------------------------------------------------------------------------------- /old/v2/scripts/crash_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import subprocess 3 | import json 4 | 5 | def find_latest_crash(appname): 6 | crash_dir = '~/Library/Logs/DiagnosticReports' 7 | cmd = f'find {crash_dir}/{appname}-*.ips | sort -r | head -1' 8 | crash_file = subprocess.check_output(cmd, shell=True).decode('ascii').rstrip() 9 | return crash_file 10 | 11 | def read_data(filename): 12 | with open(filename, 'r') as f: 13 | dat = ''.join(f.readlines()[1:]) 14 | return json.loads(dat) 15 | 16 | dat = read_data(find_latest_crash('dis86')) 17 | frames = dat['threads'][0]['frames'] 18 | symbols = [frame['symbol'] for frame in frames] 19 | 20 | # d = frames 21 | # print(json.dumps(d, indent=' ')) 22 | 23 | print('Backtrace:') 24 | for sym in symbols: 25 | print(f' {sym}') 26 | -------------------------------------------------------------------------------- /old/v2/scripts/opcode_tbl_cvt.py: -------------------------------------------------------------------------------- 1 | 2 | TBL = '''\ 3 | OP_AAA, "aaa" ) \ 4 | _( OP_AAS, "aas" ) \ 5 | _( OP_ADC, "adc" ) \ 6 | _( OP_ADD, "add" ) \ 7 | _( OP_AND, "and" ) \ 8 | _( OP_CALL, "call" ) \ 9 | _( OP_CALLF, "callf" ) \ 10 | _( OP_CBW, "cbw" ) \ 11 | _( OP_CLC, "clc" ) \ 12 | _( OP_CLD, "cld" ) \ 13 | _( OP_CLI, "cli" ) \ 14 | _( OP_CMC, "cmc" ) \ 15 | _( OP_CMP, "cmp" ) \ 16 | _( OP_CMPS, "cmps" ) \ 17 | _( OP_CWD, "cwd" ) \ 18 | _( OP_DAA, "daa" ) \ 19 | _( OP_DAS, "das" ) \ 20 | _( OP_DEC, "dec" ) \ 21 | _( OP_DIV, "div" ) \ 22 | _( OP_ENTER, "enter" ) \ 23 | _( OP_HLT, "hlt" ) \ 24 | _( OP_IMUL, "imul" ) \ 25 | _( OP_IN, "in" ) \ 26 | _( OP_INC, "inc" ) \ 27 | _( OP_INS, "ins" ) \ 28 | _( OP_INT, "int" ) \ 29 | _( OP_INTO, "into" ) \ 30 | _( OP_INVAL, "inval" ) \ 31 | _( OP_IRET, "iret" ) \ 32 | _( OP_JA, "ja" ) \ 33 | _( OP_JAE, "jae" ) \ 34 | _( OP_JB, "jb" ) \ 35 | _( OP_JBE, "jbe" ) \ 36 | _( OP_JCXZ, "jcxz" ) \ 37 | _( OP_JE, "je" ) \ 38 | _( OP_JG, "jg" ) \ 39 | _( OP_JGE, "jge" ) \ 40 | _( OP_JL, "jl" ) \ 41 | _( OP_JLE, "jle" ) \ 42 | _( OP_JMP, "jmp" ) \ 43 | _( OP_JMPF, "jmpf" ) \ 44 | _( OP_JNE, "jne" ) \ 45 | _( OP_JNO, "jno" ) \ 46 | _( OP_JNP, "jnp" ) \ 47 | _( OP_JNS, "jns" ) \ 48 | _( OP_JO, "jo" ) \ 49 | _( OP_JP, "jp" ) \ 50 | _( OP_JS, "js" ) \ 51 | _( OP_LAHF, "lahf" ) \ 52 | _( OP_LDS, "lds" ) \ 53 | _( OP_LEA, "lea" ) \ 54 | _( OP_LEAVE, "leave" ) \ 55 | _( OP_LES, "les" ) \ 56 | _( OP_LODS, "lods" ) \ 57 | _( OP_LOOP, "loop" ) \ 58 | _( OP_LOOPE, "loope" ) \ 59 | _( OP_LOOPNE, "loopne" ) \ 60 | _( OP_MOV, "mov" ) \ 61 | _( OP_MOVS, "movs" ) \ 62 | _( OP_MUL, "mul" ) \ 63 | _( OP_NEG, "neg" ) \ 64 | _( OP_NOP, "nop" ) \ 65 | _( OP_NOT, "not" ) \ 66 | _( OP_OR, "or" ) \ 67 | _( OP_OUT, "out" ) \ 68 | _( OP_OUTS, "outs" ) \ 69 | _( OP_POP, "pop" ) \ 70 | _( OP_POPA, "popa" ) \ 71 | _( OP_POPF, "popf" ) \ 72 | _( OP_PUSH, "push" ) \ 73 | _( OP_PUSHA, "pusha" ) \ 74 | _( OP_PUSHF, "pushf" ) \ 75 | _( OP_RCL, "rcl" ) \ 76 | _( OP_RCR, "rcr" ) \ 77 | _( OP_RET, "ret" ) \ 78 | _( OP_RETF, "retf" ) \ 79 | _( OP_ROL, "rol" ) \ 80 | _( OP_ROR, "ror" ) \ 81 | _( OP_SAHF, "sahf" ) \ 82 | _( OP_SAR, "sar" ) \ 83 | _( OP_SBB, "sbb" ) \ 84 | _( OP_SCAS, "scas" ) \ 85 | _( OP_SHL, "shl" ) \ 86 | _( OP_SHR, "shr" ) \ 87 | _( OP_STC, "stc" ) \ 88 | _( OP_STD, "std" ) \ 89 | _( OP_STI, "sti" ) \ 90 | _( OP_STOS, "stos" ) \ 91 | _( OP_SUB, "sub" ) \ 92 | _( OP_TEST, "test" ) \ 93 | _( OP_XCHG, "xchg" ) \ 94 | _( OP_XLAT, "xlat" ) \ 95 | _( OP_XOR, "xor" 96 | '''.rstrip() 97 | 98 | def fmt_row(fmts, elts): 99 | assert(len(fmts) == len(elts)) 100 | s = '' 101 | for f,e in zip(fmts[:-1], elts[:-1]): 102 | s += f % (e+',') 103 | s += fmts[-1] % elts[-1] 104 | return s 105 | 106 | fmts = ['%-12s', '%-12s', '%-20s', '%-7s'] 107 | 108 | rows = [x.strip() for x in TBL.split(') _(')] 109 | for r in rows: 110 | enum, name = [x.strip() for x in r.split(',')] 111 | r = [enum, name, 'CODE_C_UNKNOWN', '""'] 112 | print(' _( %s )\\' % fmt_row(fmts, r)) 113 | -------------------------------------------------------------------------------- /old/v2/src/app/dis86.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "exec_mode.h" 4 | 5 | static void print_help(FILE *f, const char *appname) 6 | { 7 | fprintf(f, "usage: %s []\n", appname); 8 | fprintf(stderr, "\n"); 9 | fprintf(stderr, "MODES:\n"); 10 | fprintf(stderr, " dis disassemble the binary and emit intel syntax\n"); 11 | fprintf(stderr, " decomp decompile the binary\n"); 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | if (argc < 2) { 17 | print_help(stderr, argv[0]); 18 | return 1; 19 | } 20 | const char *mode = argv[1]; 21 | 22 | if (0) {} 23 | else if (0 == strcmp(mode, "dis")) return exec_mode_dis(argc, argv); 24 | else if (0 == strcmp(mode, "decomp")) return exec_mode_decomp(argc, argv); 25 | 26 | fprintf(stderr, "Error: Unknown mode '%s'", mode); 27 | print_help(stderr, argv[0]); 28 | return 2; 29 | } 30 | -------------------------------------------------------------------------------- /old/v2/src/app/exec_mode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | int exec_mode_dis(int argc, char *argv[]); 4 | int exec_mode_decomp(int argc, char *argv[]); 5 | -------------------------------------------------------------------------------- /old/v2/src/app/exec_mode_decomp.c: -------------------------------------------------------------------------------- 1 | #include "exec_mode.h" 2 | #include "dis86.h" 3 | #include "dis86_private.h" 4 | #include "segoff.h" 5 | #include "cmdarg/cmdarg.h" 6 | #include "array.h" 7 | 8 | static dis86_t *dis_exit = NULL; 9 | static void on_fail() 10 | { 11 | if (!dis_exit) return; 12 | binary_dump(dis_exit->b); 13 | } 14 | 15 | static void print_help(FILE *f, const char *appname) 16 | { 17 | fprintf(f, "usage: %s decomp OPTIONS\n", appname); 18 | fprintf(stderr, "\n"); 19 | fprintf(stderr, "OPTIONS:\n"); 20 | fprintf(stderr, " --config path to configuration file (.bsl) (optional)\n"); 21 | fprintf(stderr, " --binary path to binary on the filesystem (required)\n"); 22 | fprintf(stderr, " --start-addr start seg:off address (required)\n"); 23 | fprintf(stderr, " --end-addr end seg:off address (required)\n"); 24 | } 25 | 26 | static bool cmdarg_segoff(int * argc, char *** argv, const char * name, segoff_t *_out) 27 | { 28 | const char *s; 29 | if (!cmdarg_string(argc, argv, name, &s)) return false; 30 | 31 | *_out = parse_segoff(s); 32 | return true; 33 | } 34 | 35 | typedef struct options options_t; 36 | struct options 37 | { 38 | const char * config; 39 | const char * binary; 40 | segoff_t start; 41 | segoff_t end; 42 | }; 43 | 44 | static int run(options_t *opt); 45 | 46 | int exec_mode_decomp(int argc, char *argv[]) 47 | { 48 | atexit(on_fail); 49 | 50 | options_t opt[1] = {{}}; 51 | bool found; 52 | 53 | found = cmdarg_string(&argc, &argv, "--config", &opt->config); 54 | (void)found; /* optional */ 55 | 56 | found = cmdarg_string(&argc, &argv, "--binary", &opt->binary); 57 | if (!found) { print_help(stderr, argv[0]); return 3; } 58 | 59 | found = cmdarg_segoff(&argc, &argv, "--start-addr", &opt->start); 60 | if (!found) { print_help(stderr, argv[0]); return 3; } 61 | 62 | found = cmdarg_segoff(&argc, &argv, "--end-addr", &opt->end); 63 | if (!found) { print_help(stderr, argv[0]); return 3; } 64 | 65 | return run(opt); 66 | } 67 | 68 | static int run(options_t *opt) 69 | { 70 | dis86_decompile_config_t * cfg = NULL; 71 | if (opt->config) { 72 | cfg = dis86_decompile_config_read_new(opt->config); 73 | if (!cfg) FAIL("Failed to read config file: '%s'", opt->config); 74 | } 75 | 76 | if (opt->start.seg != opt->end.seg) { 77 | fprintf(stderr, "WARN: The start segment and end segment are different.. Near calls might decompile wrong.\n"); 78 | } 79 | 80 | size_t start_idx = segoff_abs(opt->start); 81 | size_t end_idx = segoff_abs(opt->end); 82 | 83 | size_t mem_sz = 0; 84 | char *mem = read_file(opt->binary, &mem_sz); 85 | 86 | char *region = &mem[start_idx]; 87 | size_t region_sz = end_idx - start_idx; 88 | 89 | dis86_t *d = dis86_new(start_idx, region, region_sz); 90 | if (!d) FAIL("Failed to allocate dis86 instance"); 91 | free(mem); 92 | dis_exit = d; 93 | 94 | array_t *ins_arr = array_new(sizeof(dis86_instr_t)); 95 | while (1) { 96 | dis86_instr_t *ins = dis86_next(d); 97 | if (!ins) break; 98 | 99 | dis86_instr_t *ins_ptr = array_append_dst(ins_arr); 100 | dis86_instr_copy(ins_ptr, ins); 101 | } 102 | 103 | size_t n_instr = 0; 104 | dis86_instr_t *instr = (dis86_instr_t*)array_borrow(ins_arr, &n_instr); 105 | 106 | char func_name[256]; 107 | sprintf(func_name, "func_%08x__%04x_%04x", (u32)start_idx, opt->start.seg, opt->start.off); 108 | 109 | const char *s = dis86_decompile(d, cfg, func_name, opt->start.seg, instr, n_instr); 110 | printf("%-30s\n", s); 111 | free((void*)s); 112 | 113 | dis_exit = NULL; 114 | dis86_decompile_config_delete(cfg); 115 | array_delete(ins_arr); 116 | dis86_delete(d); 117 | return 0; 118 | } 119 | -------------------------------------------------------------------------------- /old/v2/src/app/exec_mode_dis.c: -------------------------------------------------------------------------------- 1 | #include "exec_mode.h" 2 | #include "dis86.h" 3 | #include "dis86_private.h" 4 | #include "segoff.h" 5 | #include "cmdarg/cmdarg.h" 6 | 7 | static dis86_t *dis_exit = NULL; 8 | static void on_fail() 9 | { 10 | if (!dis_exit) return; 11 | binary_dump(dis_exit->b); 12 | } 13 | 14 | static void print_help(FILE *f, const char *appname) 15 | { 16 | fprintf(f, "usage: %s dis OPTIONS\n", appname); 17 | fprintf(stderr, "\n"); 18 | fprintf(stderr, "OPTIONS:\n"); 19 | fprintf(stderr, " --binary path to binary on the filesystem (required)\n"); 20 | fprintf(stderr, " --start-addr start seg:off address (required)\n"); 21 | fprintf(stderr, " --end-addr end seg:off address (required)\n"); 22 | } 23 | 24 | static bool cmdarg_segoff(int * argc, char *** argv, const char * name, segoff_t *_out) 25 | { 26 | const char *s; 27 | if (!cmdarg_string(argc, argv, name, &s)) return false; 28 | 29 | *_out = parse_segoff(s); 30 | return true; 31 | } 32 | 33 | static int _legacy_exec(const char *filename, segoff_t start, segoff_t end); 34 | 35 | int exec_mode_dis(int argc, char *argv[]) 36 | { 37 | atexit(on_fail); 38 | 39 | const char * binary = NULL; 40 | segoff_t start = {}; 41 | segoff_t end = {}; 42 | 43 | bool found; 44 | 45 | found = cmdarg_string(&argc, &argv, "--binary", &binary); 46 | if (!found) { print_help(stderr, argv[0]); return 3; } 47 | 48 | found = cmdarg_segoff(&argc, &argv, "--start-addr", &start); 49 | if (!found) { print_help(stderr, argv[0]); return 3; } 50 | 51 | found = cmdarg_segoff(&argc, &argv, "--end-addr", &end); 52 | if (!found) { print_help(stderr, argv[0]); return 3; } 53 | 54 | return _legacy_exec(binary, start, end); 55 | } 56 | 57 | static int _legacy_exec(const char *filename, segoff_t start, segoff_t end) 58 | { 59 | size_t start_idx = segoff_abs(start); 60 | size_t end_idx = segoff_abs(end); 61 | 62 | size_t mem_sz = 0; 63 | char *mem = read_file(filename, &mem_sz); 64 | 65 | char *region = &mem[start_idx]; 66 | size_t region_sz = end_idx - start_idx; 67 | 68 | dis86_t *d = dis86_new(start_idx, region, region_sz); 69 | if (!d) FAIL("Failed to allocate dis86 instance"); 70 | free(mem); 71 | dis_exit = d; 72 | 73 | char *s; 74 | while (1) { 75 | dis86_instr_t *ins = dis86_next(d); 76 | if (!ins) break; 77 | 78 | s = dis86_print_intel_syntax(d, ins, true); 79 | printf("%s\n", s); 80 | free(s); 81 | } 82 | 83 | dis_exit = NULL; 84 | dis86_delete(d); 85 | return 0; 86 | } 87 | -------------------------------------------------------------------------------- /old/v2/src/app/meson.build: -------------------------------------------------------------------------------- 1 | SRC = [ 2 | 'dis86.c', 3 | 'exec_mode_dis.c', 4 | 'exec_mode_decomp.c', 5 | ] 6 | executable('dis86', SRC, link_with: libdis86) 7 | -------------------------------------------------------------------------------- /old/v2/src/array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | #define INITIAL_CAP 32 5 | 6 | typedef struct array array_t; 7 | struct array 8 | { 9 | void * mem; 10 | size_t len; 11 | size_t cap; 12 | size_t elt_sz; 13 | }; 14 | 15 | static inline array_t *array_new(size_t elt_sz) 16 | { 17 | array_t *arr = calloc(1, sizeof(array_t)); 18 | arr->mem = malloc(INITIAL_CAP * elt_sz); 19 | arr->len = 0; 20 | arr->cap = INITIAL_CAP; 21 | arr->elt_sz = elt_sz; 22 | return arr; 23 | } 24 | 25 | static inline void array_delete(array_t *arr) 26 | { 27 | free(arr->mem); 28 | free(arr); 29 | } 30 | 31 | static inline size_t array_len(array_t *arr) 32 | { 33 | return arr->len; 34 | } 35 | 36 | static inline void *array_at(array_t *arr, size_t idx) 37 | { 38 | assert(idx < arr->len); 39 | return arr->mem + idx * arr->elt_sz; 40 | } 41 | 42 | static inline void *array_append_dst(array_t *arr) 43 | { 44 | if (arr->len == arr->cap) { 45 | arr->cap *= 2; 46 | arr->mem = realloc(arr->mem, arr->cap * arr->elt_sz); 47 | } 48 | arr->len++; 49 | return array_at(arr, arr->len-1); 50 | } 51 | 52 | static inline void array_append(array_t *arr, void *elt) 53 | { 54 | void *p = array_append_dst(arr); 55 | memcpy(p, elt, arr->elt_sz); 56 | } 57 | 58 | static inline void *array_borrow(array_t *arr, size_t *_len) 59 | { 60 | *_len = arr->len; 61 | return arr->mem; 62 | } 63 | -------------------------------------------------------------------------------- /old/v2/src/binary.h: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | 3 | typedef struct binary binary_t; 4 | struct binary 5 | { 6 | u8 * mem; 7 | size_t len; 8 | size_t idx; 9 | size_t base_addr; 10 | }; 11 | 12 | static inline void binary_init(binary_t *b, size_t base_addr, char *mem, size_t len) 13 | { 14 | b->mem = malloc(len); 15 | memcpy(b->mem, mem, len); 16 | b->len = len; 17 | b->idx = base_addr; 18 | b->base_addr = base_addr; 19 | } 20 | 21 | static inline u8 binary_byte_at(binary_t *b, size_t idx) 22 | { 23 | if (idx < b->base_addr) FAIL("Binary access below start of region"); 24 | if (idx >= b->base_addr + b->len) FAIL("Binary access beyond end of region"); 25 | return b->mem[idx - b->base_addr]; 26 | } 27 | 28 | static inline u8 binary_peek_u8(binary_t *b) 29 | { 30 | u8 byte = binary_byte_at(b, b->idx); 31 | return byte; 32 | } 33 | 34 | static inline void binary_advance_u8(binary_t *b) 35 | { 36 | b->idx++; 37 | } 38 | 39 | static inline u8 binary_fetch_u8(binary_t *b) 40 | { 41 | u8 byte = binary_peek_u8(b); 42 | binary_advance_u8(b); 43 | return byte; 44 | } 45 | 46 | static inline u16 binary_fetch_u16(binary_t *b) 47 | { 48 | u8 low = binary_fetch_u8(b); 49 | u8 high = binary_fetch_u8(b); 50 | return (u16)high << 8 | (u16)low; 51 | } 52 | 53 | static inline size_t binary_baseaddr(binary_t *b) 54 | { 55 | return b->base_addr; 56 | } 57 | 58 | static inline size_t binary_location(binary_t *b) 59 | { 60 | return b->idx; 61 | } 62 | 63 | static inline size_t binary_length(binary_t *b) 64 | { 65 | return b->len; 66 | } 67 | 68 | static inline void binary_dump(binary_t *b) 69 | { 70 | printf("BINARY DUMP LOCATION %zx: ", b->idx); 71 | size_t end = MIN(b->idx + 16, b->base_addr + b->len); 72 | for (size_t idx = b->idx; idx < end; idx++) { 73 | printf("%02x ", binary_byte_at(b, idx)); 74 | } 75 | printf("\n"); 76 | } 77 | -------------------------------------------------------------------------------- /old/v2/src/cmdarg/cmdarg.c: -------------------------------------------------------------------------------- 1 | #include "cmdarg.h" 2 | #include 3 | 4 | static inline bool parse_u64(const char *s, uint64_t *_num) 5 | { 6 | uint64_t num = 0; 7 | while (1) { 8 | char c = *s++; 9 | if (!c) break; 10 | if (!('0' <= c && c <= '9')) return false; // not a decimal digit 11 | 12 | uint64_t next_num = 10*num + (uint64_t)(c-'0'); 13 | if (next_num < num) return false; // overflow! 14 | num = next_num; 15 | } 16 | 17 | *_num = num; 18 | return true; 19 | } 20 | 21 | static inline bool parse_i64(const char *s, int64_t *_num) 22 | { 23 | bool neg = false; 24 | if (*s == '-') { 25 | neg = true; 26 | s++; 27 | } 28 | 29 | uint64_t unum = 0; 30 | if (!parse_u64(s, &unum)) return false; 31 | 32 | int64_t num; 33 | if (neg) { 34 | if (unum > (1ull<<63)) return false; // overflow 35 | num = -(int64_t)unum; 36 | } else { 37 | if (unum >= (1ull<<63)) return false; // overflow 38 | num = (int64_t)unum; 39 | } 40 | 41 | *_num = num; 42 | return true; 43 | } 44 | 45 | bool cmdarg_option(int * _argc, char *** _argv, const char * name, bool *_out) 46 | { 47 | char ** argv = *_argv; 48 | int argc = *_argc; 49 | 50 | // Search for the option name 51 | int found_idx = -1; 52 | for (int i = 0; i < argc; i++) { 53 | if (0 == strcmp(name, argv[i])) { 54 | found_idx = i; 55 | break; 56 | } 57 | } 58 | 59 | // Not found 60 | if (found_idx == -1) return false; 61 | 62 | // On success, remove from the arg list 63 | for (int i = found_idx+1; i < argc; i++) { 64 | argv[i-1] = argv[i]; 65 | } 66 | argc--; 67 | 68 | *_argc = argc; 69 | if (_out) *_out = true; 70 | return true; 71 | } 72 | 73 | 74 | bool cmdarg_string(int * _argc, char *** _argv, const char * name, const char ** _out) 75 | { 76 | char ** argv = *_argv; 77 | int argc = *_argc; 78 | 79 | // Search for the option name 80 | int found_idx = -1; 81 | for (int i = 0; i < argc; i++) { 82 | if (0 == strcmp(name, argv[i])) { 83 | found_idx = i; 84 | break; 85 | } 86 | } 87 | 88 | // Failure 89 | if (found_idx == -1 || found_idx+1 == argc) return false; 90 | 91 | // Capture return value 92 | const char *ret = argv[found_idx+1]; 93 | 94 | // On success, remove from the arg list 95 | for (int i = found_idx+2; i < argc; i++) { 96 | argv[i-2] = argv[i]; 97 | } 98 | argc -= 2; 99 | 100 | *_argc = argc; 101 | if (_out) *_out = ret; 102 | return true; 103 | } 104 | 105 | bool cmdarg_u64(int * _argc, char *** _argv, const char * name, uint64_t *_out) 106 | { 107 | char ** argv = *_argv; 108 | int argc = *_argc; 109 | 110 | // Search for the option name 111 | int found_idx = -1; 112 | for (int i = 0; i < argc; i++) { 113 | if (0 == strcmp(name, argv[i])) { 114 | found_idx = i; 115 | break; 116 | } 117 | } 118 | 119 | // Failure 120 | if (found_idx == -1 || found_idx+1 == argc) return false; 121 | 122 | // Try to parse that value 123 | uint64_t ret = 0; 124 | const char *data = argv[found_idx+1]; 125 | if (!parse_u64(data, &ret)) return false; 126 | 127 | // On success, remove from the arg list 128 | for (int i = found_idx+2; i < argc; i++) { 129 | argv[i-2] = argv[i]; 130 | } 131 | argc -= 2; 132 | 133 | *_argc = argc; 134 | if (_out) *_out = ret; 135 | return true; 136 | } 137 | 138 | bool cmdarg_i64(int * _argc, char *** _argv, const char * name, int64_t *_out) 139 | { 140 | char ** argv = *_argv; 141 | int argc = *_argc; 142 | 143 | // Search for the option name 144 | int found_idx = -1; 145 | for (int i = 0; i < argc; i++) { 146 | if (0 == strcmp(name, argv[i])) { 147 | found_idx = i; 148 | break; 149 | } 150 | } 151 | 152 | // Failure 153 | if (found_idx == -1 || found_idx+1 == argc) return false; 154 | 155 | // Try to parse that value 156 | int64_t ret = 0; 157 | const char *data = argv[found_idx+1]; 158 | if (!parse_i64(data, &ret)) return false; 159 | 160 | // On success, remove from the arg list 161 | for (int i = found_idx+2; i < argc; i++) { 162 | argv[i-2] = argv[i]; 163 | } 164 | argc -= 2; 165 | 166 | *_argc = argc; 167 | if (_out) *_out = ret; 168 | return true; 169 | } 170 | -------------------------------------------------------------------------------- /old/v2/src/cmdarg/cmdarg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | bool cmdarg_option (int * argc, char *** argv, const char * name, bool * _out); 6 | bool cmdarg_string (int * argc, char *** argv, const char * name, const char ** _out); 7 | bool cmdarg_u64 (int * argc, char *** argv, const char * name, uint64_t * _out); 8 | bool cmdarg_i64 (int * argc, char *** argv, const char * name, int64_t * _out); 9 | -------------------------------------------------------------------------------- /old/v2/src/cmdarg/meson.build: -------------------------------------------------------------------------------- 1 | SRC = [ 2 | 'cmdarg.c', 3 | ] 4 | libcmdarg = static_library('cmdarg', SRC) 5 | -------------------------------------------------------------------------------- /old/v2/src/core.c: -------------------------------------------------------------------------------- 1 | #include "dis86_private.h" 2 | 3 | dis86_t *dis86_new(size_t base_addr, char *mem, size_t mem_sz) 4 | { 5 | dis86_t *d = calloc(1, sizeof(dis86_t)); 6 | binary_init(d->b, base_addr, mem, mem_sz); 7 | return d; 8 | } 9 | 10 | void dis86_delete(dis86_t *d) 11 | { 12 | free(d); 13 | } 14 | 15 | size_t dis86_position(dis86_t *d) { return binary_location(d->b); } 16 | size_t dis86_baseaddr(dis86_t *d) { return binary_baseaddr(d->b); } 17 | size_t dis86_length(dis86_t *d) { return binary_length(d->b); } 18 | -------------------------------------------------------------------------------- /old/v2/src/datamap.c: -------------------------------------------------------------------------------- 1 | #include "datamap.h" 2 | #include 3 | 4 | #define INITIAL_CAP 32 5 | 6 | static inline bool is_white(char c) 7 | { 8 | return c == ' ' || c == '\t' || c == '\n'; 9 | } 10 | 11 | typedef struct parser parser_t; 12 | struct parser 13 | { 14 | const char *line; 15 | size_t len; 16 | size_t idx; 17 | }; 18 | 19 | static inline void parser_skip(parser_t *p); 20 | 21 | static void parser_init(parser_t *p, const char *line, size_t len) 22 | { 23 | p->line = line; 24 | p->len = len; 25 | p->idx = 0; 26 | parser_skip(p); 27 | } 28 | 29 | static inline bool parser_is_end(parser_t *p) 30 | { 31 | return p->idx == p->len; 32 | } 33 | 34 | static inline void parser_skip(parser_t *p) 35 | { 36 | bool in_comment = false; 37 | while (p->idx < p->len) { 38 | char c = p->line[p->idx]; 39 | if (c == '#') { 40 | in_comment = true; 41 | } 42 | if (!in_comment && !is_white(c)) { 43 | break; 44 | } 45 | p->idx++; 46 | } 47 | } 48 | 49 | static inline void parse_tok(parser_t *p, const char **tok, size_t *tok_len) 50 | { 51 | parser_skip(p); 52 | 53 | // consume non-white tok 54 | *tok = &p->line[p->idx]; 55 | while (p->idx < p->len && !is_white(p->line[p->idx])) p->idx++; 56 | *tok_len = &p->line[p->idx] - *tok; 57 | } 58 | 59 | static inline char *parse_name(parser_t *p) 60 | { 61 | const char *tok; 62 | size_t tok_len; 63 | parse_tok(p, &tok, &tok_len); 64 | if (tok_len == 0) FAIL("Reached end while parsing name in line: '%s'", p->line); 65 | 66 | char *s = malloc(tok_len+1); 67 | memcpy(s, tok, tok_len); 68 | s[tok_len] = '\0'; 69 | return s; 70 | } 71 | 72 | static inline int parse_type(parser_t *p) 73 | { 74 | const char *tok; 75 | size_t tok_len; 76 | parse_tok(p, &tok, &tok_len); 77 | if (tok_len == 0) FAIL("Reached end while parsing type in line: '%s'", p->line); 78 | 79 | if (tok_len == 2 && 0 == memcmp(tok, "u8", 2)) return DATAMAP_TYPE_U8; 80 | else if (tok_len == 3 && 0 == memcmp(tok, "u16", 3)) return DATAMAP_TYPE_U16; 81 | else FAIL("Unknown type '%.*s' in line: '%s'", (int)tok_len, tok, p->line); 82 | } 83 | 84 | static inline u16 parse_addr(parser_t *p) 85 | { 86 | const char *tok; 87 | size_t tok_len; 88 | parse_tok(p, &tok, &tok_len); 89 | if (tok_len == 0) FAIL("Reached end while parsing type in line: '%s'", p->line); 90 | 91 | if (tok_len < 2 || tok[0] != '0' || tok[1] != 'x') { 92 | FAIL("Expected hex number for addr in line: '%s'", p->line); 93 | } 94 | if (tok_len > 6) { 95 | FAIL("Hex number too long for addr in line: '%s'", p->line); 96 | } 97 | 98 | u16 num = 0; 99 | for (size_t i = 2; i < tok_len; i++) { 100 | char c = tok[i]; 101 | if ('0' <= c && c <= '9') num = num*16 + (c-'0'); 102 | else if ('a' <= c && c <= 'z') num = num*16 + (c-'a'+10); 103 | else if ('A' <= c && c <= 'Z') num = num*16 + (c-'A'+10); 104 | else FAIL("Invalid hex number for addr in line: '%s'", p->line); 105 | } 106 | 107 | return num; 108 | } 109 | 110 | static inline void parse_end(parser_t *p) 111 | { 112 | const char *tok; 113 | size_t tok_len; 114 | parse_tok(p, &tok, &tok_len); 115 | if (tok_len != 0) FAIL("Expected end of line in line: '%s'", p->line); 116 | } 117 | 118 | static inline datamap_entry_t *entry_begin(datamap_t *d, size_t *_cap) 119 | { 120 | size_t cap = *_cap; 121 | if (d->n_entries+1 > cap) { 122 | cap *= 2; 123 | d->entries = realloc(d->entries, cap * sizeof(datamap_entry_t)); 124 | *_cap = cap; 125 | } 126 | return &d->entries[d->n_entries]; 127 | } 128 | 129 | static inline void entry_commit(datamap_t *d, datamap_entry_t *ent) 130 | { 131 | assert(ent == &d->entries[d->n_entries]); 132 | d->n_entries++; 133 | } 134 | 135 | datamap_t *datamap_load_from_mem(const char *str, size_t n) 136 | { 137 | size_t cap = INITIAL_CAP; 138 | 139 | datamap_t *d = calloc(1, sizeof(datamap_t)); 140 | d->entries = malloc(cap * sizeof(datamap_entry_t)); 141 | d->n_entries = 0; 142 | 143 | 144 | const char *line = str; 145 | const char *line_end = str; 146 | while (*line) { 147 | // Find next line 148 | while (*line_end && *line_end != '\n') line_end++; 149 | 150 | // Init parser 151 | parser_t p[1]; 152 | parser_init(p, line, line_end - line); 153 | 154 | // Advance the line 155 | if (*line_end) line_end++; 156 | line = line_end; 157 | 158 | // Allow and ignore empty lines 159 | if (parser_is_end(p)) { 160 | continue; 161 | } 162 | 163 | // Parse the entry 164 | datamap_entry_t *ent = entry_begin(d, &cap); 165 | ent->name = parse_name(p); 166 | ent->type = parse_type(p); 167 | ent->addr = parse_addr(p); 168 | parse_end(p); 169 | entry_commit(d, ent); 170 | } 171 | 172 | return d; 173 | } 174 | 175 | datamap_t *datamap_load_from_file(const char *filename) 176 | { 177 | size_t mem_sz = 0; 178 | char * mem = read_file(filename, &mem_sz); 179 | 180 | datamap_t *d = datamap_load_from_mem(mem, mem_sz); 181 | 182 | free(mem); 183 | return d; 184 | } 185 | 186 | void datamap_delete(datamap_t *d) 187 | { 188 | for (size_t i = 0; i < d->n_entries; i++) { 189 | datamap_entry_t *ent = &d->entries[i]; 190 | free(ent->name); 191 | } 192 | free(d); 193 | } 194 | -------------------------------------------------------------------------------- /old/v2/src/datamap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | typedef struct datamap datamap_t; 5 | typedef struct datamap_entry datamap_entry_t; 6 | 7 | enum { 8 | DATAMAP_TYPE_U8, 9 | DATAMAP_TYPE_U16, 10 | }; 11 | 12 | struct datamap 13 | { 14 | datamap_entry_t * entries; 15 | size_t n_entries; 16 | }; 17 | 18 | struct datamap_entry 19 | { 20 | char * name; 21 | int type; /* DATAMAP_TYPE_ */ 22 | u16 addr; 23 | }; 24 | 25 | datamap_t *datamap_load_from_mem(const char *str, size_t n); 26 | datamap_t *datamap_load_from_file(const char *filename); 27 | void datamap_delete(datamap_t *d); 28 | -------------------------------------------------------------------------------- /old/v2/src/decompile/config.c: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | #include "dis86_private.h" 3 | #include "bsl/bsl.h" 4 | 5 | config_t * config_default_new(void) 6 | { 7 | config_t * cfg = calloc(1, sizeof(config_t)); 8 | return cfg; 9 | } 10 | 11 | config_t * config_read_new(const char *path) 12 | { 13 | config_t * cfg = calloc(1, sizeof(config_t)); 14 | 15 | size_t sz; 16 | char * data = read_file(path, &sz); 17 | if (!data) FAIL("Failed to read file: '%s'", path); 18 | 19 | bsl_t *root = bsl_parse_new(data, sz, NULL); 20 | if (!root) FAIL("Failed to read the config"); 21 | 22 | bsl_t *func = bsl_get_node(root, "dis86.functions"); 23 | if (!func) FAIL("Failed to get functions node"); 24 | 25 | bsl_iter_t it[1]; 26 | int type; 27 | const char * key; 28 | void * val; 29 | 30 | bsl_iter_begin(it, func); 31 | while (bsl_iter_next(it, &type, &key, &val)) { 32 | if (type != BSL_TYPE_NODE) FAIL("Expected function properties"); 33 | bsl_t *f = (bsl_t*)val; 34 | 35 | const char *addr_str = bsl_get_str(f, "start"); 36 | if (!addr_str) FAIL("No function addr property for '%s'", key); 37 | 38 | const char *ret_str = bsl_get_str(f, "ret"); 39 | if (!ret_str) FAIL("No function ret property for '%s'", key); 40 | 41 | const char *args_str = bsl_get_str(f, "args"); 42 | if (!args_str) FAIL("No function args property for '%s'", key); 43 | 44 | bool pop_args_after_call = !bsl_get_str(f, "dont_pop_args"); 45 | 46 | i16 args; 47 | if (!parse_bytes_i16(args_str, strlen(args_str), &args)) FAIL("Expected u16 for '%s.args', got '%s'", key, args_str); 48 | 49 | assert(cfg->func_len < ARRAY_SIZE(cfg->func_arr)); 50 | config_func_t *cf = &cfg->func_arr[cfg->func_len++]; 51 | cf->name = strdup(key); 52 | cf->addr = parse_segoff(addr_str); 53 | cf->ret = strdup(ret_str); 54 | cf->args = args; 55 | cf->pop_args_after_call = pop_args_after_call; 56 | } 57 | 58 | bsl_t *glob = bsl_get_node(root, "dis86.globals"); 59 | if (!glob) FAIL("Failed to get globals node"); 60 | 61 | bsl_iter_begin(it, glob); 62 | while (bsl_iter_next(it, &type, &key, &val)) { 63 | if (type != BSL_TYPE_NODE) FAIL("Expected global properties"); 64 | bsl_t *f = (bsl_t*)val; 65 | 66 | const char *off_str = bsl_get_str(f, "off"); 67 | if (!off_str) FAIL("No global off property for '%s'", key); 68 | 69 | const char *type_str = bsl_get_str(f, "type"); 70 | if (!type_str) FAIL("No global type property for '%s'", key); 71 | 72 | assert(cfg->global_len < ARRAY_SIZE(cfg->global_arr)); 73 | config_global_t *g = &cfg->global_arr[cfg->global_len++]; 74 | g->name = strdup(key); 75 | g->offset = parse_hex_u16(off_str, strlen(off_str)); 76 | g->type = strdup(type_str); 77 | } 78 | 79 | bsl_t *segmap = bsl_get_node(root, "dis86.segmap"); 80 | if (!segmap) FAIL("Failed to get segmap node"); 81 | 82 | bsl_iter_begin(it, segmap); 83 | while (bsl_iter_next(it, &type, &key, &val)) { 84 | if (type != BSL_TYPE_NODE) FAIL("Expected segmap properties"); 85 | bsl_t *s = (bsl_t*)val; 86 | 87 | const char *from_str = bsl_get_str(s, "from"); 88 | if (!from_str) FAIL("No segmap 'from' property for '%s'", key); 89 | u16 from = parse_hex_u16(from_str, strlen(from_str)); 90 | 91 | const char *to_str = bsl_get_str(s, "to"); 92 | if (!to_str) FAIL("No segmap 'to' property for '%s'", key); 93 | u16 to = parse_hex_u16(to_str, strlen(to_str)); 94 | 95 | assert(cfg->segmap_len < ARRAY_SIZE(cfg->segmap_arr)); 96 | config_segmap_t *sm = &cfg->segmap_arr[cfg->segmap_len++]; 97 | sm->name = strdup(key); 98 | sm->from = from; 99 | sm->to = to; 100 | } 101 | 102 | 103 | bsl_delete(root); 104 | free(data); 105 | 106 | return cfg; 107 | } 108 | 109 | void config_delete(config_t *cfg) 110 | { 111 | if (!cfg) return; 112 | for (size_t i = 0; i < cfg->func_len; i++) { 113 | free(cfg->func_arr[i].name); 114 | } 115 | for (size_t i = 0; i < cfg->global_len; i++) { 116 | free(cfg->global_arr[i].name); 117 | free(cfg->global_arr[i].type); 118 | } 119 | for (size_t i = 0; i < cfg->segmap_len; i++) { 120 | free(cfg->segmap_arr[i].name); 121 | } 122 | free(cfg); 123 | } 124 | 125 | void config_print(config_t *cfg) 126 | { 127 | printf("functions:\n"); 128 | for (size_t i = 0; i < cfg->func_len; i++) { 129 | config_func_t *f = &cfg->func_arr[i]; 130 | printf(" %-30s %04x:%04x %-8s %d\n", f->name, f->addr.seg, f->addr.off, f->ret, f->args); 131 | } 132 | printf("\n"); 133 | printf("globals:\n"); 134 | for (size_t i = 0; i < cfg->global_len; i++) { 135 | config_global_t *g = &cfg->global_arr[i]; 136 | printf(" %-30s %04x %s\n", g->name, g->offset, g->type); 137 | } 138 | printf("\n"); 139 | printf("segmap:\n"); 140 | for (size_t i = 0; i < cfg->segmap_len; i++) { 141 | config_segmap_t *s = &cfg->segmap_arr[i]; 142 | printf(" %-30s %04x => %04x\n", s->name, s->from, s->to); 143 | } 144 | } 145 | 146 | config_func_t * config_func_lookup(config_t *cfg, segoff_t s) 147 | { 148 | for (size_t i = 0; i < cfg->func_len; i++) { 149 | config_func_t *f = &cfg->func_arr[i]; 150 | if (f->addr.seg == s.seg && f->addr.off == s.off) { 151 | return f; 152 | } 153 | } 154 | return NULL; 155 | } 156 | 157 | bool config_seg_remap(config_t *cfg, u16 *_seg) 158 | { 159 | u16 seg = *_seg; 160 | for (size_t i = 0; i < cfg->segmap_len; i++) { 161 | config_segmap_t *sm = &cfg->segmap_arr[i]; 162 | if (seg == sm->from) { 163 | *_seg = sm->to; 164 | return true; 165 | } 166 | } 167 | return false; 168 | } 169 | 170 | dis86_decompile_config_t * dis86_decompile_config_read_new(const char *path) 171 | { return config_read_new(path); } 172 | 173 | void dis86_decompile_config_delete(dis86_decompile_config_t *cfg) 174 | { config_delete(cfg); } 175 | -------------------------------------------------------------------------------- /old/v2/src/decompile/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | #include "segoff.h" 4 | 5 | #define MAX_CONFIG_FUNCS 1024 6 | #define MAX_CONFIG_GLOBALS 1024 7 | #define MAX_CONFIG_SEGMAPS 1024 8 | 9 | typedef struct dis86_decompile_config config_t; 10 | typedef struct config_func config_func_t; 11 | typedef struct config_global config_global_t; 12 | typedef struct config_segmap config_segmap_t; 13 | 14 | struct config_func 15 | { 16 | char * name; 17 | segoff_t addr; 18 | char * ret; 19 | i16 args; // -1 means "unknown" 20 | bool pop_args_after_call; 21 | }; 22 | 23 | struct config_global 24 | { 25 | char * name; 26 | u16 offset; 27 | char * type; 28 | }; 29 | 30 | struct config_segmap 31 | { 32 | char * name; 33 | u16 from; 34 | u16 to; 35 | }; 36 | 37 | struct dis86_decompile_config 38 | { 39 | size_t func_len; 40 | config_func_t func_arr[MAX_CONFIG_FUNCS]; 41 | 42 | size_t global_len; 43 | config_global_t global_arr[MAX_CONFIG_GLOBALS]; 44 | 45 | size_t segmap_len; 46 | config_segmap_t segmap_arr[MAX_CONFIG_SEGMAPS]; 47 | }; 48 | 49 | config_t * config_read_new(const char *path); 50 | config_t * config_default_new(void); 51 | void config_delete(config_t *cfg); 52 | 53 | void config_print(config_t *cfg); 54 | config_func_t * config_func_lookup(config_t *cfg, segoff_t s); 55 | bool config_seg_remap(config_t *cfg, u16 *inout_seg); 56 | -------------------------------------------------------------------------------- /old/v2/src/decompile/decompile_private.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | #include "dis86_private.h" 4 | #include "instr_tbl.h" 5 | #include "util.h" 6 | #include "symbols.h" 7 | #include "config.h" 8 | #include "labels.h" 9 | #include "type.h" 10 | #include "value.h" 11 | #include "expr.h" 12 | #include "transform.h" 13 | #include "str.h" 14 | 15 | #define LOG_INFO(fmt, ...) do { \ 16 | fprintf(stderr, "INFO: "); \ 17 | fprintf(stderr, fmt, ##__VA_ARGS__); \ 18 | fprintf(stderr, "\n"); \ 19 | } while(0) 20 | 21 | #define LOG_WARN(fmt, ...) do { \ 22 | fprintf(stderr, "WARN: "); \ 23 | fprintf(stderr, fmt, ##__VA_ARGS__); \ 24 | fprintf(stderr, "\n"); \ 25 | } while(0) 26 | -------------------------------------------------------------------------------- /old/v2/src/decompile/expr.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | typedef struct meh meh_t; 4 | typedef struct expr expr_t; 5 | typedef struct expr_operator1 expr_operator1_t; 6 | typedef struct expr_operator2 expr_operator2_t; 7 | typedef struct expr_operator3 expr_operator3_t; 8 | typedef struct expr_abstract expr_abstract_t; 9 | typedef struct expr_branch_cond expr_branch_cond_t; 10 | typedef struct expr_branch_flags expr_branch_flags_t; 11 | typedef struct expr_branch expr_branch_t; 12 | typedef struct expr_call expr_call_t; 13 | typedef struct expr_call_with_args expr_call_with_args_t; 14 | 15 | enum { 16 | ADDR_TYPE_FAR, 17 | ADDR_TYPE_NEAR, 18 | }; 19 | 20 | typedef struct addr addr_t; 21 | struct addr 22 | { 23 | int type; 24 | union { 25 | segoff_t far; 26 | u16 near; 27 | } u; 28 | }; 29 | 30 | typedef struct operator operator_t; 31 | struct operator 32 | { 33 | const char * oper; 34 | int sign; 35 | }; 36 | 37 | enum { 38 | EXPR_KIND_UNKNOWN, 39 | EXPR_KIND_NONE, 40 | EXPR_KIND_OPERATOR1, 41 | EXPR_KIND_OPERATOR2, 42 | EXPR_KIND_OPERATOR3, 43 | EXPR_KIND_ABSTRACT, 44 | EXPR_KIND_BRANCH_COND, 45 | EXPR_KIND_BRANCH_FLAGS, 46 | EXPR_KIND_BRANCH, 47 | EXPR_KIND_CALL, 48 | EXPR_KIND_CALL_WITH_ARGS, 49 | }; 50 | 51 | struct expr_operator1 52 | { 53 | operator_t operator; 54 | value_t dest; 55 | }; 56 | 57 | struct expr_operator2 58 | { 59 | operator_t operator; 60 | value_t dest; 61 | value_t src; 62 | }; 63 | 64 | struct expr_operator3 65 | { 66 | operator_t operator; 67 | value_t dest; 68 | value_t left; 69 | value_t right; 70 | }; 71 | 72 | struct expr_abstract 73 | { 74 | const char * func_name; 75 | value_t ret; 76 | u16 n_args; 77 | value_t args[3]; 78 | }; 79 | 80 | struct expr_branch_cond 81 | { 82 | operator_t operator; 83 | value_t left; 84 | value_t right; 85 | u32 target; 86 | }; 87 | 88 | struct expr_branch_flags 89 | { 90 | const char * op; // FIXME 91 | value_t flags; 92 | u32 target; 93 | }; 94 | 95 | struct expr_branch 96 | { 97 | u32 target; 98 | }; 99 | 100 | struct expr_call 101 | { 102 | addr_t addr; 103 | bool remapped; 104 | config_func_t * func; // optional 105 | }; 106 | 107 | #define MAX_ARGS 16 108 | struct expr_call_with_args 109 | { 110 | addr_t addr; 111 | bool remapped; 112 | config_func_t * func; // required 113 | value_t args[MAX_ARGS]; 114 | }; 115 | 116 | struct expr 117 | { 118 | int kind; 119 | union { 120 | expr_operator1_t operator1[1]; 121 | expr_operator2_t operator2[1]; 122 | expr_operator3_t operator3[1]; 123 | expr_abstract_t abstract[1]; 124 | expr_branch_cond_t branch_cond[1]; 125 | expr_branch_flags_t branch_flags[1]; 126 | expr_branch_t branch[1]; 127 | expr_call_t call[1]; 128 | expr_call_with_args_t call_with_args[1]; 129 | } k; 130 | 131 | size_t n_ins; 132 | dis86_instr_t * ins; 133 | }; 134 | 135 | value_t expr_destination(expr_t *expr); 136 | 137 | #define EXPR_NONE ({ \ 138 | expr_t expr = {}; \ 139 | expr.kind = EXPR_KIND_NONE; \ 140 | expr; }) 141 | 142 | #define EXPR_MAX 4096 143 | struct meh 144 | { 145 | size_t expr_len; 146 | expr_t expr_arr[EXPR_MAX]; 147 | }; 148 | 149 | meh_t * meh_new(config_t *cfg, symbols_t *symbols, u16 seg, dis86_instr_t *ins, size_t n_ins); 150 | void meh_delete(meh_t *m); 151 | -------------------------------------------------------------------------------- /old/v2/src/decompile/labels.h: -------------------------------------------------------------------------------- 1 | 2 | #define MAX_LABELS 256 3 | 4 | typedef struct labels labels_t; 5 | struct labels 6 | { 7 | u32 addr[MAX_LABELS]; 8 | size_t n_addr; 9 | }; 10 | 11 | // FIXME: O(n) search 12 | static bool is_label(labels_t *labels, u32 addr) 13 | { 14 | for (size_t i = 0; i < labels->n_addr; i++) { 15 | if (labels->addr[i] == addr) return true; 16 | } 17 | return false; 18 | } 19 | 20 | static u32 branch_destination(dis86_instr_t *ins) 21 | { 22 | i16 rel = 0; 23 | switch (ins->opcode) { 24 | case OP_JO: rel = (i16)ins->operand[0].u.rel.val; break; 25 | case OP_JNO: rel = (i16)ins->operand[0].u.rel.val; break; 26 | case OP_JB: rel = (i16)ins->operand[0].u.rel.val; break; 27 | case OP_JAE: rel = (i16)ins->operand[0].u.rel.val; break; 28 | case OP_JE: rel = (i16)ins->operand[0].u.rel.val; break; 29 | case OP_JNE: rel = (i16)ins->operand[0].u.rel.val; break; 30 | case OP_JBE: rel = (i16)ins->operand[0].u.rel.val; break; 31 | case OP_JA: rel = (i16)ins->operand[0].u.rel.val; break; 32 | case OP_JS: rel = (i16)ins->operand[0].u.rel.val; break; 33 | case OP_JNS: rel = (i16)ins->operand[0].u.rel.val; break; 34 | case OP_JP: rel = (i16)ins->operand[0].u.rel.val; break; 35 | case OP_JNP: rel = (i16)ins->operand[0].u.rel.val; break; 36 | case OP_JL: rel = (i16)ins->operand[0].u.rel.val; break; 37 | case OP_JGE: rel = (i16)ins->operand[0].u.rel.val; break; 38 | case OP_JLE: rel = (i16)ins->operand[0].u.rel.val; break; 39 | case OP_JG: rel = (i16)ins->operand[0].u.rel.val; break; 40 | case OP_JMP: rel = (i16)ins->operand[0].u.rel.val; break; 41 | case OP_LOOP:rel = (i16)ins->operand[1].u.rel.val; break; 42 | default: return 0; 43 | } 44 | 45 | u16 effective = ins->addr + ins->n_bytes + rel; 46 | return effective; 47 | } 48 | 49 | static void find_labels(labels_t *labels, dis86_instr_t *ins_arr, size_t n_ins) 50 | { 51 | labels->n_addr = 0; 52 | 53 | for (size_t i = 0; i < n_ins; i++) { 54 | dis86_instr_t *ins = &ins_arr[i]; 55 | u16 dst = branch_destination(ins); 56 | if (!dst) continue; 57 | 58 | assert(labels->n_addr < ARRAY_SIZE(labels->addr)); 59 | labels->addr[labels->n_addr++] = dst; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /old/v2/src/decompile/symbols.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define NAME_MAX 128 4 | 5 | typedef struct sym sym_t; 6 | typedef struct symref symref_t; 7 | typedef struct symbols symbols_t; 8 | typedef struct symtab symtab_t; 9 | typedef struct symtab_iter symtab_iter_t; 10 | 11 | struct __attribute__((aligned(16))) symtab_iter { char _opaque[32]; }; 12 | 13 | enum { 14 | SYM_KIND_REGISTER, 15 | SYM_KIND_PARAM, 16 | SYM_KIND_LOCAL, 17 | SYM_KIND_GLOBAL, 18 | }; 19 | 20 | struct sym 21 | { 22 | int kind; 23 | i16 off; 24 | u16 len; // in bytes 25 | const char * name; // optional (default name is constructed otherwise) 26 | }; 27 | 28 | bool sym_deduce(sym_t *v, operand_mem_t *mem); 29 | bool sym_deduce_reg(sym_t *sym, int reg_id); 30 | const char * sym_name(sym_t *v, char *buf, size_t buf_sz); 31 | size_t sym_size_bytes(sym_t *v); 32 | 33 | struct symbols 34 | { 35 | symtab_t * registers; 36 | symtab_t * globals; 37 | symtab_t * params; 38 | symtab_t * locals; 39 | }; 40 | 41 | symbols_t * symbols_new(void); 42 | void symbols_delete(symbols_t *s); 43 | bool symbols_insert_deduced(symbols_t *s, sym_t *deduced_sym); 44 | symref_t symbols_find_ref(symbols_t *s, sym_t *deduced_sym); 45 | symref_t symbols_find_mem(symbols_t *s, operand_mem_t *mem); 46 | symref_t symbols_find_reg(symbols_t *s, int reg_id); 47 | void symbols_add_global(symbols_t *s, const char *name, u16 offset, u16 len); 48 | 49 | struct symref 50 | { 51 | sym_t * symbol; // NULL if the ref doesn't point anywhere 52 | u16 off; // offset into this symbol 53 | u16 len; // length from the offset 54 | }; 55 | 56 | bool symref_matches(symref_t *a, symref_t *b); 57 | 58 | symtab_t * symtab_new(void); 59 | void symtab_delete(symtab_t *s); 60 | 61 | void symtab_iter_begin(symtab_iter_t *it, symtab_t *s); 62 | sym_t * symtab_iter_next(symtab_iter_t *it); 63 | -------------------------------------------------------------------------------- /old/v2/src/decompile/transform.c: -------------------------------------------------------------------------------- 1 | #include "decompile_private.h" 2 | 3 | void transform_pass_xor_rr(meh_t *m) 4 | { 5 | for (size_t i = 0; i < m->expr_len; i++) { 6 | expr_t *expr = &m->expr_arr[i]; 7 | if (expr->kind != EXPR_KIND_OPERATOR2) continue; 8 | 9 | expr_operator2_t *k = expr->k.operator2; 10 | if (0 != memcmp(k->operator.oper, "^=", 2)) continue; 11 | if (k->dest.type != VALUE_TYPE_SYM) continue; 12 | if (k->src.type != VALUE_TYPE_SYM) continue; 13 | if (!value_matches(&k->dest, &k->src)) continue; 14 | 15 | // Rewrite 16 | k->operator.oper = "="; 17 | k->src = VALUE_IMM(0); 18 | } 19 | } 20 | 21 | static operator_t jump_operation(const char *op) 22 | { 23 | operator_t o = {}; 24 | if (0 == strcmp(op, "JB")) { o.oper = "<"; o.sign = 0; return o; } 25 | if (0 == strcmp(op, "JBE")) { o.oper = "<="; o.sign = 0; return o; } 26 | if (0 == strcmp(op, "JA")) { o.oper = ">"; o.sign = 0; return o; } 27 | if (0 == strcmp(op, "JAE")) { o.oper = ">="; o.sign = 0; return o; } 28 | if (0 == strcmp(op, "JE")) { o.oper = "=="; o.sign = 0; return o; } 29 | if (0 == strcmp(op, "JNE")) { o.oper = "!="; o.sign = 0; return o; } 30 | if (0 == strcmp(op, "JL")) { o.oper = "<"; o.sign = 1; return o; } 31 | if (0 == strcmp(op, "JLE")) { o.oper = "<="; o.sign = 1; return o; } 32 | if (0 == strcmp(op, "JG")) { o.oper = ">"; o.sign = 1; return o; } 33 | if (0 == strcmp(op, "JGE")) { o.oper = ">="; o.sign = 1; return o; } 34 | 35 | FAIL("Unexpected jump operation: '%s'", op); 36 | } 37 | 38 | void transform_pass_cmp_jmp(meh_t *m) 39 | { 40 | for (size_t i = 1; i < m->expr_len; i++) { 41 | expr_t *expr = &m->expr_arr[i]; 42 | if (expr->kind != EXPR_KIND_BRANCH_FLAGS) continue; 43 | 44 | expr_branch_flags_t *k = expr->k.branch_flags; 45 | size_t prev_idx = i-1; 46 | expr_t *prev_expr = &m->expr_arr[i-1]; 47 | value_t prev_dest = expr_destination(prev_expr); 48 | if (!value_matches(&k->flags, &prev_dest)) continue; 49 | 50 | if (prev_expr->kind != EXPR_KIND_ABSTRACT) continue; 51 | expr_abstract_t *p = prev_expr->k.abstract; 52 | if (p->n_args != 2) continue; 53 | 54 | // Unpack values 55 | const char * name = k->op; 56 | value_t left = p->args[0]; 57 | value_t right = p->args[1]; 58 | u32 target = k->target; 59 | 60 | // Rewrite 61 | prev_expr->kind = EXPR_KIND_BRANCH_COND; 62 | prev_expr->n_ins++; 63 | expr_branch_cond_t *b = prev_expr->k.branch_cond; 64 | b->operator = jump_operation(name); 65 | b->left = left; 66 | b->right = right; 67 | b->target = target; 68 | 69 | // Ignore the extra instruction 70 | m->expr_arr[i] = EXPR_NONE; 71 | } 72 | } 73 | 74 | void transform_pass_or_jmp(meh_t *m) 75 | { 76 | for (size_t i = 1; i < m->expr_len; i++) { 77 | expr_t *expr = &m->expr_arr[i]; 78 | if (expr->kind != EXPR_KIND_BRANCH_FLAGS) continue; 79 | expr_branch_flags_t *k = expr->k.branch_flags; 80 | 81 | const char *cmp; 82 | if (0 == memcmp(k->op, "JE", 2)) cmp = "=="; 83 | else if (0 == memcmp(k->op, "JNE", 3)) cmp = "!="; 84 | else continue; 85 | 86 | size_t prev_idx = i-1; 87 | expr_t *prev_expr = &m->expr_arr[i-1]; 88 | if (prev_expr->kind != EXPR_KIND_OPERATOR2) continue; 89 | expr_operator2_t *p = prev_expr->k.operator2; 90 | if (0 != memcmp(p->operator.oper, "|=", 2)) continue; 91 | if (!value_matches(&p->dest, &p->src)) continue; 92 | 93 | // Save 94 | value_t src = p->src; 95 | u32 target = k->target; 96 | 97 | // Rewrite 98 | prev_expr->kind = EXPR_KIND_BRANCH_COND; 99 | prev_expr->n_ins++; 100 | expr_branch_cond_t *b = prev_expr->k.branch_cond; 101 | b->operator.oper = cmp; 102 | b->operator.sign = 0; 103 | b->left = src; 104 | b->right = VALUE_IMM(0); 105 | b->target = target; 106 | 107 | // Ignore the extra instruction 108 | m->expr_arr[i] = EXPR_NONE; 109 | } 110 | } 111 | 112 | void _synthesize_calls_one(meh_t *m, size_t i) 113 | { 114 | expr_t *expr = &m->expr_arr[i]; 115 | if (expr->kind != EXPR_KIND_CALL) return; 116 | 117 | expr_call_t * k = expr->k.call; 118 | addr_t addr = k->addr; 119 | bool remapped = k->remapped; 120 | config_func_t * func = k->func; 121 | 122 | if (!func || func->args < 0) return; 123 | if (i < (size_t)func->args) return; 124 | if (i+1 >= m->expr_len) return; 125 | 126 | // Check and extract arguments 127 | value_t args[MAX_ARGS]; 128 | for (size_t j = 0; j < (size_t)func->args; j++) { 129 | size_t idx = i-1 - j; 130 | expr_t *arg_expr = &m->expr_arr[idx]; 131 | if (arg_expr->kind != EXPR_KIND_ABSTRACT) return; 132 | expr_abstract_t *a = arg_expr->k.abstract; 133 | if (0 != memcmp(a->func_name, "PUSH", 4)) return; 134 | args[j] = a->args[0]; 135 | } 136 | 137 | // Check for stack cleanup 138 | size_t num_cleanup_ins = 0; 139 | if (func->pop_args_after_call) { 140 | if (func->args > 1) { 141 | expr_t *cleanup_expr = &m->expr_arr[i+1]; 142 | if (cleanup_expr->kind != EXPR_KIND_OPERATOR2) return; 143 | expr_operator2_t *c = cleanup_expr->k.operator2; 144 | if (0 != memcmp(c->operator.oper, "+=", 2)) return; 145 | if (c->dest.type != VALUE_TYPE_SYM) return; 146 | // FIXME! 147 | //if (!symref_matches(c->dest.u.sym->ref, symbols_find_reg(symbols, REG_SP))) return; 148 | if (c->src.type != VALUE_TYPE_IMM) return; 149 | u16 val = c->src.u.imm->value; 150 | if (val != 2*(size_t)func->args) return; 151 | num_cleanup_ins = 1; 152 | } else if (func->args == 1) { 153 | expr_t *cleanup_expr = &m->expr_arr[i+1]; 154 | if (cleanup_expr->kind != EXPR_KIND_ABSTRACT) return; 155 | expr_abstract_t *a = cleanup_expr->k.abstract; 156 | if (0 != memcmp(a->func_name, "POP", 3)) return; 157 | num_cleanup_ins = 1; 158 | } 159 | } 160 | 161 | // Rewrite 162 | expr->kind = EXPR_KIND_CALL_WITH_ARGS; 163 | expr_call_with_args_t * a = expr->k.call_with_args; 164 | a->addr = addr; 165 | a->remapped = remapped; 166 | a->func = func; 167 | assert(ARRAY_SIZE(args) == ARRAY_SIZE(a->args)); 168 | memcpy(a->args, args, sizeof(args)); 169 | 170 | // Remove the old exprs 171 | dis86_instr_t *first_ins = NULL; 172 | size_t ins_count = 0; 173 | size_t n = (size_t)func->args + 1 + num_cleanup_ins; 174 | for (size_t j = 0; j < n; j++) { 175 | size_t idx = i - (size_t)func->args + j; 176 | if (!first_ins) { 177 | first_ins = m->expr_arr[idx].ins; 178 | } 179 | ins_count += m->expr_arr[idx].n_ins; 180 | if (i == idx) continue; 181 | m->expr_arr[idx] = EXPR_NONE; 182 | } 183 | 184 | // Update the ins array tracking 185 | expr->ins = first_ins; 186 | expr->n_ins = ins_count; 187 | } 188 | 189 | void transform_pass_synthesize_calls(meh_t *m) 190 | { 191 | for (size_t i = 0; i < m->expr_len; i++) { 192 | _synthesize_calls_one(m, i); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /old/v2/src/decompile/transform.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // xor r,r => mov r,0 4 | void transform_pass_xor_rr(meh_t *m); 5 | 6 | // cmp a,b; j{pred} target => {c-style code} 7 | void transform_pass_cmp_jmp(meh_t *m); 8 | 9 | // or r,r; j{e|ne} target => {c-style code} 10 | void transform_pass_or_jmp(meh_t *m); 11 | 12 | // synthesize normal calls where possible 13 | void transform_pass_synthesize_calls(meh_t *m); 14 | -------------------------------------------------------------------------------- /old/v2/src/decompile/type.c: -------------------------------------------------------------------------------- 1 | #include "type.h" 2 | 3 | const char *basetype_str(int t) 4 | { 5 | switch (t) { 6 | case BASETYPE_U8: return "u8"; 7 | case BASETYPE_U16: return "u16"; 8 | case BASETYPE_U32: return "u32"; 9 | default: FAIL("Unknown basetype: %d", t); 10 | } 11 | } 12 | 13 | static u16 basetype_size(int t) 14 | { 15 | switch (t) { 16 | case BASETYPE_U8: return 1; 17 | case BASETYPE_U16: return 2; 18 | case BASETYPE_U32: return 4; 19 | default: FAIL("Unknown basetype: %d", t); 20 | } 21 | } 22 | 23 | static bool basetype_parse(const char *s, size_t len, int *out) 24 | { 25 | int t = -1; 26 | if (len == 2) { 27 | if (0 == memcmp(s, "u8", 2)) t = BASETYPE_U8; 28 | } else if (len == 3) { 29 | if (0 == memcmp(s, "u16", 3)) t = BASETYPE_U16; 30 | if (0 == memcmp(s, "u32", 3)) t = BASETYPE_U32; 31 | } 32 | 33 | if (t == -1) return false; 34 | 35 | if (out) *out = t; 36 | return true; 37 | } 38 | 39 | bool type_parse(type_t *typ, const char *str) 40 | { 41 | const char *lbrace = str; 42 | while (*lbrace && *lbrace != '[') lbrace++; 43 | 44 | int base; 45 | if (!basetype_parse(str, lbrace-str, &base)) return false; 46 | 47 | if (!*lbrace) { // not an array? 48 | typ->basetype = base; 49 | typ->is_array = false; 50 | typ->array_len = 0; 51 | return true; 52 | } 53 | 54 | // is an array 55 | 56 | // find ending ']' 57 | const char *rbrace = lbrace+1; 58 | while (*rbrace && *rbrace != ']') rbrace++; 59 | if (*rbrace == 0 || *(rbrace+1) != 0) return false; 60 | 61 | const char * size_str = lbrace+1; 62 | size_t size_len = rbrace - size_str; 63 | 64 | u64 size; 65 | if (!parse_bytes_u64(size_str, size_len, &size)) { 66 | return false; 67 | } 68 | 69 | typ->basetype = base; 70 | typ->is_array = true; 71 | typ->array_len = size; 72 | return true; 73 | } 74 | 75 | u16 type_size(type_t *typ) 76 | { 77 | u16 sz = basetype_size(typ->basetype); 78 | if (typ->is_array) { 79 | sz *= typ->array_len; 80 | } 81 | return sz; 82 | } 83 | -------------------------------------------------------------------------------- /old/v2/src/decompile/type.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | enum { 5 | BASETYPE_U8, 6 | BASETYPE_U16, 7 | BASETYPE_U32, 8 | }; 9 | 10 | const char *basetype_str(int t); 11 | 12 | typedef struct type type_t; 13 | struct type 14 | { 15 | int basetype; 16 | int is_array; 17 | size_t array_len; 18 | }; 19 | 20 | bool type_parse(type_t *typ, const char *str); 21 | u16 type_size(type_t *typ); 22 | -------------------------------------------------------------------------------- /old/v2/src/decompile/util.h: -------------------------------------------------------------------------------- 1 | 2 | static const char *as_upper(const char *s) 3 | { 4 | static char buf[256]; 5 | 6 | size_t len = strlen(s); 7 | if (len+1 >= sizeof(buf)) FAIL("String too long!"); 8 | 9 | for (size_t i = 0; i < len+1; i++) { 10 | char c = s[i]; 11 | if ('a' <= c && c <= 'z') c += ('A' - 'a'); 12 | buf[i] = c; 13 | } 14 | 15 | return buf; 16 | } 17 | -------------------------------------------------------------------------------- /old/v2/src/decompile/value.c: -------------------------------------------------------------------------------- 1 | #include "decompile_private.h" 2 | 3 | value_t value_from_operand(operand_t *o, symbols_t *symbols) 4 | { 5 | value_t val[1] = {{}}; 6 | 7 | switch (o->type) { 8 | case OPERAND_TYPE_REG: { 9 | sym_t deduced_sym[1]; 10 | sym_deduce_reg(deduced_sym, o->u.reg.id); 11 | 12 | val->type = VALUE_TYPE_SYM; 13 | val->u.sym->ref = symbols_find_ref(symbols, deduced_sym); 14 | assert(val->u.sym->ref.symbol); 15 | } break; 16 | case OPERAND_TYPE_MEM: { 17 | operand_mem_t *m = &o->u.mem; 18 | symref_t ref = symbols_find_mem(symbols, m); 19 | if (ref.symbol) { 20 | val->type = VALUE_TYPE_SYM; 21 | val->u.sym->ref = ref; 22 | } else { 23 | val->type = VALUE_TYPE_MEM; 24 | val->u.mem->sz = m->sz; 25 | val->u.mem->sreg = symbols_find_reg(symbols, m->sreg); 26 | val->u.mem->reg1 = symbols_find_reg(symbols, m->reg1); 27 | val->u.mem->reg2 = symbols_find_reg(symbols, m->reg2); 28 | val->u.mem->off = m->off; 29 | } 30 | } break; 31 | case OPERAND_TYPE_IMM: { 32 | val->type = VALUE_TYPE_IMM; 33 | val->u.imm->sz = o->u.imm.sz; 34 | val->u.imm->value = o->u.imm.val; 35 | } break; 36 | case OPERAND_TYPE_REL: { 37 | FAIL("OPERAND_TYPE_REL UNIMPL"); 38 | } break; 39 | case OPERAND_TYPE_FAR: { 40 | FAIL("OPERAND_TYPE_FAR UNIMPL"); 41 | } break; 42 | default: FAIL("INVALID OPERAND TYPE: %d", o->type); 43 | } 44 | 45 | return *val; 46 | } 47 | 48 | value_t value_from_symref(symref_t ref) 49 | { 50 | assert(ref.symbol); 51 | 52 | value_t val[1]; 53 | val->type = VALUE_TYPE_SYM; 54 | val->u.sym->ref = ref; 55 | return *val; 56 | } 57 | 58 | value_t value_from_imm(u16 imm) 59 | { 60 | value_t val[1]; 61 | val->type = VALUE_TYPE_IMM; 62 | val->u.imm->sz = SIZE_16; 63 | val->u.imm->value = imm; 64 | return *val; 65 | } 66 | 67 | bool value_matches(value_t *a, value_t *b) 68 | { 69 | if (a->type != b->type) return false; 70 | 71 | switch (a->type) { 72 | case VALUE_TYPE_NONE: return true; 73 | case VALUE_TYPE_SYM: { 74 | return symref_matches(&a->u.sym->ref, &b->u.sym->ref); 75 | } break; 76 | case VALUE_TYPE_MEM: { 77 | value_mem_t *ak = a->u.mem; 78 | value_mem_t *bk = b->u.mem; 79 | return 80 | ak->sz == bk->sz && 81 | symref_matches(&ak->sreg, &bk->sreg) && 82 | symref_matches(&ak->reg1, &bk->reg1) && 83 | symref_matches(&ak->reg2, &bk->reg2) && 84 | ak->off == bk->off; 85 | } break; 86 | case VALUE_TYPE_IMM: { 87 | value_imm_t *ak = a->u.imm; 88 | value_imm_t *bk = b->u.imm; 89 | return 90 | ak->sz == bk->sz && 91 | ak->value == bk->value; 92 | } break; 93 | default: { 94 | FAIL("Unknown value type: %d", a->type); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /old/v2/src/decompile/value.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | typedef struct value value_t; 4 | typedef struct value_sym value_sym_t; 5 | typedef struct value_mem value_mem_t; 6 | typedef struct value_imm value_imm_t; 7 | 8 | enum { 9 | VALUE_TYPE_NONE = 0, 10 | VALUE_TYPE_SYM, 11 | VALUE_TYPE_MEM, 12 | VALUE_TYPE_IMM, 13 | }; 14 | 15 | struct value_sym 16 | { 17 | symref_t ref; 18 | }; 19 | 20 | struct value_mem 21 | { 22 | // TODO: Remove 8086-isms and dis86-isms 23 | int sz; // SIZE_* 24 | symref_t sreg; 25 | symref_t reg1; 26 | symref_t reg2; 27 | u16 off; 28 | }; 29 | 30 | struct value_imm 31 | { 32 | // TODO: Remove 8086-isms and dis86-isms 33 | int sz; // SIZE_* 34 | u16 value; 35 | }; 36 | 37 | struct value 38 | { 39 | int type; 40 | union { 41 | value_sym_t sym[1]; 42 | value_mem_t mem[1]; 43 | value_imm_t imm[1]; 44 | } u; 45 | }; 46 | 47 | value_t value_from_operand(operand_t *o, symbols_t *symbols); 48 | value_t value_from_symref(symref_t ref); 49 | value_t value_from_imm(u16 imm); 50 | bool value_matches(value_t *a, value_t *b); 51 | 52 | #define VALUE_NONE ({ \ 53 | value_t v = {}; \ 54 | v.type = VALUE_TYPE_NONE; \ 55 | v; }) 56 | 57 | #define VALUE_IMM(_val) ({\ 58 | value_t v = {};\ 59 | v.type = VALUE_TYPE_IMM;\ 60 | v.u.imm->sz = SIZE_16; \ 61 | v.u.imm->value = _val; \ 62 | v; }) 63 | 64 | #define VALUE_IS_NONE(v) ((v).type == VALUE_TYPE_NONE) 65 | -------------------------------------------------------------------------------- /old/v2/src/dis86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | 9 | /*****************************************************************/ 10 | /* CORE TYPES */ 11 | /*****************************************************************/ 12 | typedef struct dis86 dis86_t; 13 | typedef struct dis86_instr dis86_instr_t; 14 | 15 | /*****************************************************************/ 16 | /* CORE ROUTINES */ 17 | /*****************************************************************/ 18 | 19 | /* Create new instance: deep copies the memory */ 20 | dis86_t *dis86_new(size_t base_addr, char *mem, size_t mem_sz); 21 | 22 | /* Destroys an instance */ 23 | void dis86_delete(dis86_t *d); 24 | 25 | /* Get next instruction */ 26 | dis86_instr_t *dis86_next(dis86_t *d); 27 | 28 | /* Get Position */ 29 | size_t dis86_position(dis86_t *d); 30 | 31 | /* Get Baseaddr */ 32 | size_t dis86_baseaddr(dis86_t *d); 33 | 34 | /* Get Length */ 35 | size_t dis86_length(dis86_t *d); 36 | 37 | /*****************************************************************/ 38 | /* INSTR ROUTINES */ 39 | /*****************************************************************/ 40 | 41 | /* Get the address where the instruction resides */ 42 | size_t dis86_instr_addr(dis86_instr_t *ins); 43 | 44 | /* Get the number of bytes used in the encoding */ 45 | size_t dis86_instr_n_bytes(dis86_instr_t *ins); 46 | 47 | /* Copy the instruction */ 48 | void dis86_instr_copy(dis86_instr_t *dst, dis86_instr_t *src); 49 | 50 | /*****************************************************************/ 51 | /* PRINT ROUTINES */ 52 | /*****************************************************************/ 53 | 54 | /* Print */ 55 | char *dis86_print_intel_syntax(dis86_t *d, dis86_instr_t *ins, bool with_detail); 56 | 57 | /*****************************************************************/ 58 | /* DECOMPILE ROUTINES */ 59 | /*****************************************************************/ 60 | 61 | /* Configuration info for decompiler */ 62 | typedef struct dis86_decompile_config dis86_decompile_config_t; 63 | 64 | /* Construct a config from file */ 65 | dis86_decompile_config_t * dis86_decompile_config_read_new(const char *path); 66 | void dis86_decompile_config_delete(dis86_decompile_config_t *cfg); 67 | 68 | /* Decompile to C code */ 69 | char *dis86_decompile( dis86_t * dis, 70 | dis86_decompile_config_t * opt_cfg, /* optional */ 71 | const char * func_name, 72 | uint16_t seg, 73 | dis86_instr_t * ins, 74 | size_t n_ins ); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | -------------------------------------------------------------------------------- /old/v2/src/dis86_private.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dis86.h" 4 | #include "header.h" 5 | #include "binary.h" 6 | #include "instr.h" 7 | 8 | enum { 9 | RESULT_SUCCESS = 0, 10 | RESULT_NEED_OPCODE2, 11 | RESULT_NOT_FOUND, 12 | }; 13 | 14 | struct dis86 15 | { 16 | binary_t b[1]; 17 | dis86_instr_t ins[1]; 18 | }; 19 | -------------------------------------------------------------------------------- /old/v2/src/header.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef uint8_t u8; 10 | typedef int8_t i8; 11 | typedef uint16_t u16; 12 | typedef int16_t i16; 13 | typedef uint32_t u32; 14 | typedef int32_t i32; 15 | typedef uint64_t u64; 16 | typedef int64_t i64; 17 | 18 | //static inline void bin_dump_and_abort(); 19 | 20 | #define MIN(a, b) (((a)<(b))?(a):(b)) 21 | #define MAX(a, b) (((a)>(b))?(a):(b)) 22 | #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof((arr)[0])) 23 | #define FAIL(...) do { fprintf(stderr, "FAIL: "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(42); } while(0) 24 | #define UNIMPL() FAIL("UNIMPLEMENTED: %s:%d", __FILE__, __LINE__) 25 | 26 | static inline char *read_file(const char *filename, size_t *out_sz) 27 | { 28 | FILE *fp = fopen(filename, "r"); 29 | if (!fp) FAIL("Failed to open: '%s'", filename); 30 | 31 | fseek(fp, 0, SEEK_END); 32 | size_t file_sz = ftell(fp); 33 | fseek(fp, 0, SEEK_SET); 34 | 35 | char *mem = malloc(file_sz); 36 | if (!mem) FAIL("Failed to allocate"); 37 | 38 | size_t n = fread(mem, 1, file_sz, fp); 39 | if (n != file_sz) FAIL("Failed to read"); 40 | fclose(fp); 41 | 42 | if (out_sz) *out_sz = file_sz; 43 | return mem; 44 | } 45 | 46 | static inline void hexdump(u8 *mem, size_t len) 47 | { 48 | size_t idx = 0; 49 | while (idx < len) { 50 | size_t line_end = MIN(idx+16, len); 51 | for (; idx < line_end; idx++) { 52 | printf("%02x ", mem[idx]); 53 | } 54 | printf("\n"); 55 | } 56 | } 57 | 58 | static u64 parse_hex_u64(const char *s, size_t len) 59 | { 60 | if (len > 16) FAIL("Hex string too long to fit in u64"); 61 | 62 | u64 ret = 0; 63 | for (size_t i = 0; i < len; i++) { 64 | char c = s[i]; 65 | if ('0' <= c && c <= '9') { 66 | ret = ret*16 + (c-'0'); 67 | } else if ('a' <= c && c <= 'f') { 68 | ret = ret*16 + (c-'a'+10); 69 | } else if ('A' <= c && c <= 'F') { 70 | ret = ret*16 + (c-'A'+10); 71 | } else { 72 | FAIL("Invalid hex string: '%.*s'", (int)len, s); 73 | } 74 | } 75 | 76 | return ret; 77 | } 78 | 79 | static u32 parse_hex_u32(const char *s, size_t len) 80 | { 81 | if (len > 8) FAIL("Hex string too long to fit in u16"); 82 | return (u32)parse_hex_u64(s, len); 83 | } 84 | 85 | static u16 parse_hex_u16(const char *s, size_t len) 86 | { 87 | if (len > 4) FAIL("Hex string too long to fit in u16"); 88 | return (u16)parse_hex_u64(s, len); 89 | } 90 | 91 | static u8 parse_hex_u8(const char *s, size_t len) 92 | { 93 | if (len > 2) FAIL("Hex string too long to fit in u16"); 94 | return (u16)parse_hex_u64(s, len); 95 | } 96 | 97 | static inline bool parse_bytes_u64(const char *buf, size_t len, uint64_t *_num) 98 | { 99 | if (len == 0) return false; 100 | 101 | uint64_t num = 0; 102 | for (size_t i = 0; i < len; i++) { 103 | char c = buf[i]; 104 | if (!('0' <= c && c <= '9')) return false; // not a decimal digit 105 | 106 | uint64_t next_num = 10*num + (uint64_t)(c-'0'); 107 | if (next_num < num) return false; // overflow! 108 | num = next_num; 109 | } 110 | 111 | *_num = num; 112 | return true; 113 | } 114 | 115 | static inline bool parse_bytes_u32(const char *buf, size_t len, uint32_t *_num) 116 | { 117 | u64 num; 118 | if (!parse_bytes_u64(buf, len, &num)) return false; 119 | if ((u64)(u32)num != num) return false; 120 | *_num = (u32)num; 121 | return true; 122 | } 123 | 124 | static inline bool parse_bytes_u16(const char *buf, size_t len, uint16_t *_num) 125 | { 126 | u64 num; 127 | if (!parse_bytes_u64(buf, len, &num)) return false; 128 | if ((u64)(u16)num != num) return false; 129 | *_num = (u16)num; 130 | return true; 131 | } 132 | 133 | static inline bool parse_bytes_u8(const char *buf, size_t len, uint8_t *_num) 134 | { 135 | u64 num; 136 | if (!parse_bytes_u64(buf, len, &num)) return false; 137 | if ((u64)(u8)num != num) return false; 138 | *_num = (u8)num; 139 | return true; 140 | } 141 | 142 | static inline bool parse_bytes_i64(const char *buf, size_t len, int64_t *_num) 143 | { 144 | if (len == 0) return false; 145 | 146 | bool neg = false; 147 | if (buf[0] == '-') { 148 | neg = true; 149 | buf++; 150 | len--; 151 | } 152 | 153 | uint64_t unum = 0; 154 | if (!parse_bytes_u64(buf, len, &unum)) return false; 155 | 156 | int64_t num; 157 | if (neg) { 158 | if (unum > (1ull<<63)) return false; // overflow 159 | num = -(int64_t)unum; 160 | } else { 161 | if (unum >= (1ull<<63)) return false; // overflow 162 | num = (int64_t)unum; 163 | } 164 | 165 | *_num = num; 166 | return true; 167 | } 168 | 169 | static inline bool parse_bytes_i32(const char *buf, size_t len, int32_t *_num) 170 | { 171 | i64 num; 172 | if (!parse_bytes_i64(buf, len, &num)) return false; 173 | if ((i64)(i32)num != num) return false; 174 | *_num = (i32)num; 175 | return true; 176 | } 177 | 178 | static inline bool parse_bytes_i16(const char *buf, size_t len, int16_t *_num) 179 | { 180 | i64 num; 181 | if (!parse_bytes_i64(buf, len, &num)) return false; 182 | if ((i64)(i16)num != num) return false; 183 | *_num = (i16)num; 184 | return true; 185 | } 186 | 187 | static inline bool parse_bytes_i8(const char *buf, size_t len, int8_t *_num) 188 | { 189 | i64 num; 190 | if (!parse_bytes_i64(buf, len, &num)) return false; 191 | if ((i64)(i8)num != num) return false; 192 | *_num = (i8)num; 193 | return true; 194 | } 195 | -------------------------------------------------------------------------------- /old/v2/src/instr.c: -------------------------------------------------------------------------------- 1 | #include "dis86_private.h" 2 | #include "instr_tbl.h" 3 | 4 | size_t dis86_instr_addr(dis86_instr_t *ins) 5 | { 6 | return ins->addr; 7 | } 8 | 9 | size_t dis86_instr_n_bytes(dis86_instr_t *ins) 10 | { 11 | return ins->n_bytes; 12 | } 13 | 14 | int instr_fmt_lookup(int opcode1, int opcode2, instr_fmt_t **_fmt) 15 | { 16 | // TODO FIXME .. VERY INEFFICENT O(N) SEARCH 17 | // WE COULD DO A BINARY SEARCH, BUT REALLY WE SHOULD JUST USE A TABLE 18 | // THAT'S MUCH MORE EFFICENT FOR O(1) LOOKUPS. WE COULD EITHER BUILD 19 | // ANOTHER TABLE AT RUNTIME.. OR MORE IDEALLY, JUST REFORMT THE CURRENT 20 | // TABLE. THE PRIMARY CHALLENGE IS THE OPCODE2 THE WE OCCASIONALLY HAVE.. 21 | // BUT NOT ALWAYS.. 22 | 23 | int opcode1_found = 0; 24 | for (size_t i = 0; i < ARRAY_SIZE(instr_tbl); i++) { 25 | instr_fmt_t *fmt = &instr_tbl[i]; 26 | if (opcode1 == fmt->opcode1) { 27 | if (fmt->op == OP_INVAL) return RESULT_NOT_FOUND; 28 | opcode1_found = 1; 29 | if (opcode2 == fmt->opcode2) { 30 | *_fmt = fmt; 31 | return RESULT_SUCCESS; 32 | } 33 | } 34 | } 35 | 36 | if (opcode1_found && opcode2 == -1) { 37 | return RESULT_NEED_OPCODE2; 38 | } 39 | 40 | return RESULT_NOT_FOUND; 41 | } 42 | 43 | const char *instr_op_mneumonic(int op) 44 | { 45 | static const char *arr[] = { 46 | #define ELT(_1, str) str, 47 | INSTR_OP_ARRAY(ELT) 48 | #undef ELT 49 | }; 50 | if ((size_t)op >= ARRAY_SIZE(arr)) return NULL; 51 | return arr[op]; 52 | } 53 | 54 | void dis86_instr_copy(dis86_instr_t *dst, dis86_instr_t *src) 55 | { 56 | *dst = *src; 57 | } 58 | -------------------------------------------------------------------------------- /old/v2/src/instr.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | #define OPERAND_MAX 3 5 | 6 | typedef struct operand operand_t; 7 | typedef struct operand_reg operand_reg_t; 8 | typedef struct operand_mem operand_mem_t; 9 | typedef struct operand_imm operand_imm_t; 10 | typedef struct operand_rel operand_rel_t; 11 | typedef struct operand_far operand_far_t; 12 | typedef struct instr_fmt instr_fmt_t; 13 | 14 | #define REGISTER_ARRAY(_)\ 15 | /* Standard 16-bit registers */ \ 16 | _( REG_AX, 16, "ax", "AX" )\ 17 | _( REG_CX, 16, "cx", "CX" )\ 18 | _( REG_DX, 16, "dx", "DX" )\ 19 | _( REG_BX, 16, "bx", "BX" )\ 20 | _( REG_SP, 16, "sp", "SP" )\ 21 | _( REG_BP, 16, "bp", "BP" )\ 22 | _( REG_SI, 16, "si", "SI" )\ 23 | _( REG_DI, 16, "di", "DI" )\ 24 | /* Standard 8-bit registers (may overlap with above) */\ 25 | _( REG_AL, 8, "al", "AL" )\ 26 | _( REG_CL, 8, "cl", "CL" )\ 27 | _( REG_DL, 8, "dl", "DL" )\ 28 | _( REG_BL, 8, "bl", "BL" )\ 29 | _( REG_AH, 8, "ah", "AH" )\ 30 | _( REG_CH, 8, "ch", "CH" )\ 31 | _( REG_DH, 8, "dh", "DH" )\ 32 | _( REG_BH, 8, "bh", "BH" )\ 33 | /* Segment registers */\ 34 | _( REG_ES, 16, "es", "ES" )\ 35 | _( REG_CS, 16, "cs", "CS" )\ 36 | _( REG_SS, 16, "ss", "SS" )\ 37 | _( REG_DS, 16, "ds", "DS" )\ 38 | /* Other registers */\ 39 | _( REG_IP, 16, "ip", "IP" )\ 40 | _( REG_FLAGS, 16, "flags", "FLAGS" )\ 41 | 42 | enum { 43 | REG_INVAL = 0, 44 | #define ELT(r, _1, _2, _3) r, 45 | REGISTER_ARRAY(ELT) 46 | #undef ELT 47 | _REG_LAST, 48 | }; 49 | 50 | static inline const char *reg_name(int reg) 51 | { 52 | static const char *arr[] = { 53 | NULL, 54 | #define ELT(_1, _2, s, _3) s, 55 | REGISTER_ARRAY(ELT) 56 | #undef ELT 57 | }; 58 | if ((size_t)reg >= ARRAY_SIZE(arr)) return NULL; 59 | return arr[reg]; 60 | } 61 | 62 | static inline const char *reg_name_upper(int reg) 63 | { 64 | static const char *arr[] = { 65 | NULL, 66 | #define ELT(_1, _2, _3, s) s, 67 | REGISTER_ARRAY(ELT) 68 | #undef ELT 69 | }; 70 | if ((size_t)reg >= ARRAY_SIZE(arr)) return NULL; 71 | return arr[reg]; 72 | } 73 | 74 | enum { 75 | OPERAND_TYPE_NONE = 0, 76 | OPERAND_TYPE_REG, 77 | OPERAND_TYPE_MEM, 78 | OPERAND_TYPE_IMM, 79 | OPERAND_TYPE_REL, 80 | OPERAND_TYPE_FAR, 81 | }; 82 | 83 | struct operand_reg 84 | { 85 | int id; 86 | }; 87 | 88 | enum { 89 | SIZE_8, 90 | SIZE_16, 91 | SIZE_32, 92 | }; 93 | 94 | struct operand_mem 95 | { 96 | int sz; // SIZE_ 97 | int sreg; // always must be populated 98 | int reg1; // 0 if unused 99 | int reg2; // 0 if unused 100 | u16 off; // 0 if unused 101 | }; 102 | 103 | struct operand_imm 104 | { 105 | int sz; 106 | u16 val; 107 | }; 108 | 109 | struct operand_rel 110 | { 111 | u16 val; 112 | }; 113 | 114 | struct operand_far 115 | { 116 | u16 seg; 117 | u16 off; 118 | }; 119 | 120 | struct operand 121 | { 122 | int type; 123 | union { 124 | operand_reg_t reg; 125 | operand_mem_t mem; 126 | operand_imm_t imm; 127 | operand_rel_t rel; 128 | operand_far_t far; 129 | } u; 130 | }; 131 | 132 | enum { 133 | REP_NONE = 0, 134 | REP_NE, 135 | REP_E, 136 | }; 137 | 138 | struct dis86_instr 139 | { 140 | int rep; 141 | int opcode; 142 | operand_t operand[OPERAND_MAX]; 143 | size_t addr; 144 | size_t n_bytes; 145 | int intel_hidden; /* bitmap of operands hidden in intel assembly */ 146 | }; 147 | 148 | const char *instr_op_mneumonic(int op); 149 | 150 | struct instr_fmt 151 | { 152 | int op; /* OP_ */ 153 | int opcode1; /* first byte: opcode */ 154 | int opcode2; /* 3-bit modrm reg field: sometimes used as level 2 opcode */ 155 | int operand1; /* OPER_ */ 156 | int operand2; /* OPER_ */ 157 | int operand3; /* OPER_ */ 158 | int intel_hidden; /* bitmap of operands hidden in intel assembly */ 159 | }; 160 | 161 | int instr_fmt_lookup(int opcode1, int opcode2, instr_fmt_t **fmt); 162 | -------------------------------------------------------------------------------- /old/v2/src/meson.build: -------------------------------------------------------------------------------- 1 | subdir('cmdarg') 2 | 3 | SRC = [ 4 | 'core.c', 5 | 'decode.c', 6 | 'instr.c', 7 | 'datamap.c', 8 | 'print_intel_syntax.c', 9 | 'decompile/decompile.c', 10 | 'decompile/config.c', 11 | 'decompile/type.c', 12 | 'decompile/symbols.c', 13 | 'decompile/value.c', 14 | 'decompile/expr.c', 15 | 'decompile/transform.c', 16 | ] 17 | 18 | libdis86 = static_library( 19 | 'dis86', SRC, 20 | link_with: libcmdarg, 21 | dependencies : libbsl_dep) 22 | 23 | subdir('app') 24 | subdir('test') 25 | -------------------------------------------------------------------------------- /old/v2/src/print_intel_syntax.c: -------------------------------------------------------------------------------- 1 | #include "dis86_private.h" 2 | #include "str.h" 3 | 4 | static void print_operand_intel_syntax(str_t *s, dis86_instr_t *ins, operand_t *o) 5 | { 6 | switch (o->type) { 7 | case OPERAND_TYPE_REG: str_fmt(s, "%s", reg_name(o->u.reg.id)); break; 8 | case OPERAND_TYPE_MEM: { 9 | operand_mem_t *m = &o->u.mem; 10 | switch (m->sz) { 11 | case SIZE_8: str_fmt(s, "BYTE PTR "); break; 12 | case SIZE_16: str_fmt(s, "WORD PTR "); break; 13 | case SIZE_32: str_fmt(s, "DWORD PTR "); break; 14 | } 15 | str_fmt(s, "%s:", reg_name(m->sreg)); 16 | if (!m->reg1 && !m->reg2) { 17 | if (m->off) str_fmt(s, "0x%x", m->off); 18 | } else { 19 | str_fmt(s, "["); 20 | if (m->reg1) str_fmt(s, "%s", reg_name(m->reg1)); 21 | if (m->reg2) str_fmt(s, "+%s", reg_name(m->reg2)); 22 | if (m->off) { 23 | i16 disp = (i16)m->off; 24 | if (disp >= 0) str_fmt(s, "+0x%x", (u16)disp); 25 | else str_fmt(s, "-0x%x", (u16)-disp); 26 | } 27 | str_fmt(s, "]"); 28 | } 29 | } break; 30 | case OPERAND_TYPE_IMM: str_fmt(s, "0x%x", o->u.imm.val); break; 31 | case OPERAND_TYPE_REL: { 32 | u16 effective = ins->addr + ins->n_bytes + o->u.rel.val; 33 | str_fmt(s, "0x%x", effective); 34 | } break; 35 | case OPERAND_TYPE_FAR: str_fmt(s, "0x%x:0x%x", o->u.far.seg, o->u.far.off); break; 36 | default: FAIL("INVALID OPERAND TYPE: %d", o->type); 37 | } 38 | } 39 | 40 | char *dis86_print_intel_syntax(dis86_t *d, dis86_instr_t *ins, bool with_detail) 41 | { 42 | str_t s[1]; 43 | str_init(s); 44 | 45 | if (with_detail) { 46 | str_fmt(s, "%8zx:\t", ins->addr); 47 | for (size_t i = 0; i < ins->n_bytes; i++) { 48 | u8 b = binary_byte_at(d->b, ins->addr + i); 49 | str_fmt(s, "%02x ", b); 50 | } 51 | size_t used = ins->n_bytes * 3; 52 | size_t remain = (used <= 21) ? 21 - used : 0; 53 | str_fmt(s, "%*s\t", (int)remain, " "); 54 | } 55 | 56 | if (ins->rep == REP_NE) str_fmt(s, "repne "); 57 | else if (ins->rep == REP_E) str_fmt(s, "rep "); 58 | 59 | str_fmt(s, "%-5s", instr_op_mneumonic(ins->opcode)); 60 | 61 | int n_operands = 0; 62 | for (size_t i = 0; i < ARRAY_SIZE(ins->operand); i++) { 63 | operand_t *o = &ins->operand[i]; 64 | if (o->type == OPERAND_TYPE_NONE) break; 65 | if ((int)(1<intel_hidden) continue; 66 | if (n_operands == 0) str_fmt(s, " "); 67 | else str_fmt(s, ","); 68 | print_operand_intel_syntax(s, ins, o); 69 | n_operands++; 70 | } 71 | 72 | /* remove any trailing space */ 73 | str_rstrip(s); 74 | 75 | return str_to_cstr(s); 76 | } 77 | 78 | char *dis86_print_c_code(dis86_t *d, dis86_instr_t *ins, size_t addr, size_t n_bytes) 79 | { 80 | UNIMPL(); 81 | } 82 | -------------------------------------------------------------------------------- /old/v2/src/segoff.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "header.h" 3 | 4 | typedef struct segoff segoff_t; 5 | struct segoff 6 | { 7 | u16 seg; 8 | u16 off; 9 | }; 10 | 11 | static segoff_t parse_segoff(const char *s) 12 | { 13 | const char *end = s + strlen(s); 14 | 15 | const char *colon = strchr(s, ':'); 16 | if (!colon) FAIL("Invalid segoff: '%s'", s); 17 | 18 | segoff_t ret; 19 | ret.seg = parse_hex_u16(s, colon-s); 20 | ret.off = parse_hex_u16(colon+1, end-(colon+1)); 21 | return ret; 22 | } 23 | 24 | static size_t segoff_abs(segoff_t s) 25 | { 26 | return (size_t)s.seg * 16 + (size_t)s.off; 27 | } 28 | -------------------------------------------------------------------------------- /old/v2/src/str.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | typedef struct str str_t; 6 | struct str 7 | { 8 | char *buf; 9 | size_t idx; 10 | size_t len; 11 | }; 12 | 13 | static inline void str_init(str_t *s) 14 | { 15 | s->buf = malloc(4); 16 | s->idx = 0; 17 | s->len = 4; 18 | } 19 | 20 | static inline char *str_to_cstr(str_t *s) 21 | { 22 | assert(s->buf); // Sanity: cannot call this twice 23 | 24 | // extractg underlying c-str and ensure it's null-terminated 25 | assert(s->idx < s->len); 26 | char *ret = s->buf; 27 | ret[s->idx] = 0; 28 | 29 | // invalid it 30 | s->buf = NULL; 31 | s->idx = 0; 32 | s->len = 0; 33 | 34 | return ret; 35 | } 36 | 37 | static inline void str_fmt(str_t *s, const char *fmt, ...) 38 | { 39 | while (1) { 40 | va_list va; 41 | 42 | va_start(va, fmt); 43 | size_t n = vsnprintf(s->buf + s->idx, s->len - s->idx, fmt, va); 44 | va_end(va); 45 | 46 | if (s->idx + n < s->len) { 47 | s->idx += n; 48 | return; 49 | } 50 | 51 | /* resize */ 52 | s->len *= 2; 53 | s->buf = realloc(s->buf, s->len); 54 | if (s->buf == NULL) FAIL("Failed to realloc buffer"); 55 | } 56 | } 57 | 58 | static inline void str_rstrip(str_t *s) 59 | { 60 | while (s->idx > 0) { 61 | if (s->buf[s->idx - 1] != ' ') break; 62 | s->idx--; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /old/v2/src/test/meson.build: -------------------------------------------------------------------------------- 1 | 2 | test('decode', executable('test_decode', 'test_decode.c', link_with: libdis86)) 3 | test('datamap', executable('test_datamap', 'test_datamap.c', link_with: libdis86)) 4 | -------------------------------------------------------------------------------- /old/v2/src/test/test_datamap.c: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | #include "datamap.h" 3 | 4 | #define TESTCASE \ 5 | "## THIS is A Comment\n"\ 6 | " # Also a comment\n"\ 7 | "foo u8 0x643\n"\ 8 | " bar u16 0x1 # and another"\ 9 | 10 | #define FMT_HDR "%-10s %-6s %s\n" 11 | #define FMT_DATA "%-10s %-6s 0x%x\n" 12 | 13 | const char *type_str(int typ) 14 | { 15 | switch (typ) { 16 | case DATAMAP_TYPE_U8: return "u8"; 17 | case DATAMAP_TYPE_U16: return "u16"; 18 | default: return "unknown"; 19 | } 20 | } 21 | 22 | int main(void) 23 | { 24 | datamap_t *d = datamap_load_from_mem(TESTCASE, strlen(TESTCASE)); 25 | if (!d) FAIL("Failed to load datamap"); 26 | 27 | printf(FMT_HDR, "name", "type", "addr"); 28 | printf("-----------------------------\n"); 29 | 30 | for (size_t i = 0; i < d->n_entries; i++) { 31 | datamap_entry_t *ent = &d->entries[i]; 32 | printf(FMT_DATA, ent->name, type_str(ent->type), ent->addr); 33 | } 34 | 35 | datamap_delete(d); 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /old/v2/src/test/test_decode.c: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | #include "dis86.h" 3 | 4 | typedef struct binary_data binary_data_t; 5 | struct binary_data 6 | { 7 | uint8_t n_mem; 8 | uint8_t mem[16]; 9 | }; 10 | 11 | typedef struct test test_t; 12 | struct test 13 | { 14 | uint32_t address; 15 | binary_data_t data; 16 | const char * code; 17 | }; 18 | 19 | #define TEST(...) __VA_ARGS__, 20 | 21 | static test_t TESTS[] = { 22 | #include "test_decode_cases.inc" 23 | }; 24 | 25 | static int run_test(size_t num, bool verbose) 26 | { 27 | if (num >= ARRAY_SIZE(TESTS)) { 28 | FAIL("Invalid test number: %zu", num); 29 | } 30 | 31 | test_t *t = &TESTS[num]; 32 | printf("TEST %zu: %-40s | ", num, t->code); 33 | fflush(stdout); 34 | 35 | dis86_t *d = dis86_new(t->address, (char*)t->data.mem, t->data.n_mem); 36 | if (!d) FAIL("Failed to allocate instance"); 37 | 38 | dis86_instr_t *ins = dis86_next(d); 39 | if (!ins) FAIL("Failed to decode instruction"); 40 | 41 | char *s = dis86_print_intel_syntax(d, ins, false); 42 | bool pass = (0 == strcmp(s, t->code)); 43 | printf("%s", pass ? "PASS" : "FAIL"); 44 | printf(" | '%s'\n", s); 45 | free(s); 46 | 47 | if (verbose) { 48 | printf("ADDRESS: 0x%08x\n", t->address); 49 | printf("BINARY DATA: "); 50 | for (size_t i = 0; i < t->data.n_mem; i++) { 51 | printf("%02x ", t->data.mem[i]); 52 | } 53 | printf("\n"); 54 | } 55 | 56 | // Did we consume all of the input? 57 | assert(dis86_position(d) == dis86_baseaddr(d) + dis86_length(d)); 58 | 59 | dis86_delete(d); 60 | return pass ? 0 : 1; 61 | } 62 | 63 | static int run_all() 64 | { 65 | int ret = 0; 66 | for (size_t i = 0; i < ARRAY_SIZE(TESTS); i++) { 67 | int r = run_test(i, false); 68 | if (!ret) ret = r; 69 | } 70 | return ret; 71 | } 72 | 73 | int main(int argc, char *argv[]) 74 | { 75 | if (argc > 2) { 76 | fprintf(stderr, "usage: %s []\n", argv[0]); 77 | return 1; 78 | } 79 | 80 | if (argc >= 2) { 81 | return run_test(atoi(argv[1]), true); 82 | } else { 83 | return run_all(); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /old/v2/subprojects/bsl: -------------------------------------------------------------------------------- 1 | ../../../subprojects/bsl -------------------------------------------------------------------------------- /old/v2/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | THISDIR=$(dirname $(realpath $0)) 4 | cd $THISDIR 5 | 6 | if [ ! -d build ]; then 7 | meson build 8 | fi 9 | 10 | (cd build && meson test) 11 | -------------------------------------------------------------------------------- /sample/func_01.s: -------------------------------------------------------------------------------- 1 | 6b42: 55 push bp 2 | 6b43: 8b ec mov bp,sp 3 | 6b45: 83 ec 0a sub sp,0xa 4 | 6b48: 56 push si 5 | 6b49: 57 push di 6 | 6b4a: 8b 7e 06 mov di,WORD PTR ss:[bp+0x6] 7 | 6b4d: 8b df mov bx,di 8 | 6b4f: c1 e3 02 shl bx,0x2 9 | 6b52: 8b 87 46 09 mov ax,WORD PTR ds:[bx+0x946] 10 | 6b56: 8b 97 44 09 mov dx,WORD PTR ds:[bx+0x944] 11 | 6b5a: 89 46 fc mov WORD PTR ss:[bp-0x4],ax 12 | 6b5d: 89 56 fa mov WORD PTR ss:[bp-0x6],dx 13 | 6b60: 33 f6 xor si,si 14 | 6b62: eb 2f jmp 0x6b93 15 | 6b64: c4 5e fa les bx,DWORD PTR ss:[bp-0x6] 16 | 6b67: 26 f7 07 40 00 test WORD PTR es:[bx],0x40 17 | 6b6c: 74 04 je 0x6b72 18 | 6b6e: 26 83 37 50 xor WORD PTR es:[bx],0x50 19 | 6b72: c4 5e fa les bx,DWORD PTR ss:[bp-0x6] 20 | 6b75: 26 8b 07 mov ax,WORD PTR es:[bx] 21 | 6b78: 89 46 fe mov WORD PTR ss:[bp-0x2],ax 22 | 6b7b: f7 46 fe 14 00 test WORD PTR ss:[bp-0x2],0x14 23 | 6b80: 75 0c jne 0x6b8e 24 | 6b82: ff 76 fc push WORD PTR ss:[bp-0x4] 25 | 6b85: 53 push bx 26 | 6b86: 9a 96 04 81 05 callf 0x581:0x496 27 | 6b8b: 83 c4 04 add sp,0x4 28 | 6b8e: 83 46 fa 20 add WORD PTR ss:[bp-0x6],0x20 29 | 6b92: 46 inc si 30 | 6b93: 8b df mov bx,di 31 | 6b95: d1 e3 shl bx,0x1 32 | 6b97: 39 b7 c8 08 cmp WORD PTR ds:[bx+0x8c8],si 33 | 6b9b: 7f c7 jg 0x6b64 34 | 6b9d: 83 ff 03 cmp di,0x3 35 | 6ba0: 75 5f jne 0x6c01 36 | 6ba2: 8b 1e c2 08 mov bx,WORD PTR ds:0x8c2 37 | 6ba6: c1 e3 02 shl bx,0x2 38 | 6ba9: c4 9f 04 7a les bx,DWORD PTR ds:[bx+0x7a04] 39 | 6bad: 26 ff b7 a2 00 push WORD PTR es:[bx+0xa2] 40 | 6bb2: 26 ff b7 a0 00 push WORD PTR es:[bx+0xa0] 41 | 6bb7: 9a 0f 00 c0 09 callf 0x9c0:0xf 42 | 6bbc: 83 c4 04 add sp,0x4 43 | 6bbf: 89 56 f8 mov WORD PTR ss:[bp-0x8],dx 44 | 6bc2: 89 46 f6 mov WORD PTR ss:[bp-0xa],ax 45 | 6bc5: 33 f6 xor si,si 46 | 6bc7: 3b 36 64 09 cmp si,WORD PTR ds:0x964 47 | 6bcb: 7d 34 jge 0x6c01 48 | 6bcd: 8b 46 f8 mov ax,WORD PTR ss:[bp-0x8] 49 | 6bd0: 8b 56 f6 mov dx,WORD PTR ss:[bp-0xa] 50 | 6bd3: 83 c2 0c add dx,0xc 51 | 6bd6: 89 46 fc mov WORD PTR ss:[bp-0x4],ax 52 | 6bd9: 89 56 fa mov WORD PTR ss:[bp-0x6],dx 53 | 6bdc: 50 push ax 54 | 6bdd: 52 push dx 55 | 6bde: 9a 96 04 81 05 callf 0x581:0x496 56 | 6be3: 83 c4 04 add sp,0x4 57 | 6be6: 46 inc si 58 | 6be7: ff 76 f8 push WORD PTR ss:[bp-0x8] 59 | 6bea: ff 76 f6 push WORD PTR ss:[bp-0xa] 60 | 6bed: 9a 2e 00 c0 09 callf 0x9c0:0x2e 61 | 6bf2: 83 c4 04 add sp,0x4 62 | 6bf5: 89 56 f8 mov WORD PTR ss:[bp-0x8],dx 63 | 6bf8: 89 46 f6 mov WORD PTR ss:[bp-0xa],ax 64 | 6bfb: 3b 36 64 09 cmp si,WORD PTR ds:0x964 65 | 6bff: 7c cc jl 0x6bcd 66 | 6c01: 5f pop di 67 | 6c02: 5e pop si 68 | 6c03: c9 leave 69 | 6c04: cb retf 70 | -------------------------------------------------------------------------------- /sample/func_02_old.c: -------------------------------------------------------------------------------- 1 | #define _param_0006 ARG_16(0x6) 2 | #define _local_0002 LOCAL_16(0x2) 3 | #define _local_0008 LOCAL_16(0x8) 4 | #define _local_0006 LOCAL_32(0x6) 5 | #define _local_000a LOCAL_16(0xa) 6 | void func_00006b42__0622_0922(void) 7 | { 8 | PUSH(BP); // push bp 9 | BP = SP; // mov bp,sp 10 | SP -= 0xa; // sub sp,0xa 11 | PUSH(SI); // push si 12 | PUSH(DI); // push di 13 | DI = _param_0006; // mov di,WORD PTR ss:[bp+0x6] 14 | BX = DI; // mov bx,di 15 | BX <<= 0x2; // shl bx,0x2 16 | AX = *PTR_16(DS, BX+0x946); // mov ax,WORD PTR ds:[bx+0x946] 17 | DX = *PTR_16(DS, BX+0x944); // mov dx,WORD PTR ds:[bx+0x944] 18 | *(u16*)((u8*)&_local_0006 + 2) = AX; // mov WORD PTR ss:[bp-0x4],ax 19 | *(u16*)((u8*)&_local_0006 + 0) = DX; // mov WORD PTR ss:[bp-0x6],dx 20 | SI = 0; // xor si,si 21 | goto label_00006b93; // jmp 0x6b93 22 | 23 | label_00006b64: 24 | LOAD_SEG_OFF(ES, BX, _local_0006); // les bx,DWORD PTR ss:[bp-0x6] 25 | // test WORD PTR es:[bx],0x40 26 | if (*PTR_16(ES, BX) == 0x40) goto label_00006b72; // je 0x6b72 27 | *PTR_16(ES, BX) ^= 0x50; // xor WORD PTR es:[bx],0x50 28 | 29 | label_00006b72: 30 | LOAD_SEG_OFF(ES, BX, _local_0006); // les bx,DWORD PTR ss:[bp-0x6] 31 | AX = *PTR_16(ES, BX); // mov ax,WORD PTR es:[bx] 32 | _local_0002 = AX; // mov WORD PTR ss:[bp-0x2],ax 33 | // test WORD PTR ss:[bp-0x2],0x14 34 | if (_local_0002 != 0x14) goto label_00006b8e; // jne 0x6b8e 35 | // push WORD PTR ss:[bp-0x4] 36 | // push bx 37 | // callf 0x581:0x496 38 | F_vga_dyn_append(m, BX, (u16)(_local_0006>>16)); // add sp,0x4 39 | 40 | label_00006b8e: 41 | *(u16*)((u8*)&_local_0006 + 0) += 0x20; // add WORD PTR ss:[bp-0x6],0x20 42 | SI += 1 ; // inc si 43 | 44 | label_00006b93: 45 | BX = DI; // mov bx,di 46 | BX <<= 0x1; // shl bx,0x1 47 | // cmp WORD PTR ds:[bx+0x8c8],si 48 | if ((i16)*PTR_16(DS, BX+0x8c8) > (i16)SI) goto label_00006b64; // jg 0x6b64 49 | // cmp di,0x3 50 | if (DI != 0x3) goto label_00006c01; // jne 0x6c01 51 | BX = G_data_08c2; // mov bx,WORD PTR ds:0x8c2 52 | BX <<= 0x2; // shl bx,0x2 53 | LOAD_SEG_OFF(ES, BX, *PTR_32(DS, BX+0x7a04)); // les bx,DWORD PTR ds:[bx+0x7a04] 54 | // push WORD PTR es:[bx+0xa2] 55 | // push WORD PTR es:[bx+0xa0] 56 | // callf 0x9c0:0xf 57 | F_list_load_next(m, *PTR_16(ES, BX+0xa0), *PTR_16(ES, BX+0xa2)); // add sp,0x4 58 | _local_0008 = DX; // mov WORD PTR ss:[bp-0x8],dx 59 | _local_000a = AX; // mov WORD PTR ss:[bp-0xa],ax 60 | SI = 0; // xor si,si 61 | // cmp si,WORD PTR ds:0x964 62 | if ((i16)SI >= (i16)G_data_0964) goto label_00006c01; // jge 0x6c01 63 | 64 | label_00006bcd: 65 | AX = _local_0008; // mov ax,WORD PTR ss:[bp-0x8] 66 | DX = _local_000a; // mov dx,WORD PTR ss:[bp-0xa] 67 | DX += 0xc; // add dx,0xc 68 | *(u16*)((u8*)&_local_0006 + 2) = AX; // mov WORD PTR ss:[bp-0x4],ax 69 | *(u16*)((u8*)&_local_0006 + 0) = DX; // mov WORD PTR ss:[bp-0x6],dx 70 | // push ax 71 | // push dx 72 | // callf 0x581:0x496 73 | F_vga_dyn_append(m, DX, AX); // add sp,0x4 74 | SI += 1 ; // inc si 75 | // push WORD PTR ss:[bp-0x8] 76 | // push WORD PTR ss:[bp-0xa] 77 | // callf 0x9c0:0x2e 78 | F_list_load_next_2(m, _local_000a, _local_0008); // add sp,0x4 79 | _local_0008 = DX; // mov WORD PTR ss:[bp-0x8],dx 80 | _local_000a = AX; // mov WORD PTR ss:[bp-0xa],ax 81 | // cmp si,WORD PTR ds:0x964 82 | if ((i16)SI < (i16)G_data_0964) goto label_00006bcd; // jl 0x6bcd 83 | 84 | label_00006c01: 85 | DI = POP(); // pop di 86 | SI = POP(); // pop si 87 | LEAVE(BP, SP); // leave 88 | RETURN_FAR(); // retf 89 | } 90 | #undef _param_0006 91 | #undef _local_0002 92 | #undef _local_0008 93 | #undef _local_0006 94 | #undef _local_000a 95 | 96 | -------------------------------------------------------------------------------- /sample/func_03_new.c: -------------------------------------------------------------------------------- 1 | void func_00006b42__0622_0922(void) 2 | { 3 | bx_2 = _param_0006; 4 | bx_3 = bx_2 << 2; 5 | *(u16*)((u8*)&_local_0006 + 2) = *PTR_16(ds_1, bx_3 + 0x946); 6 | *(u16*)(u8*)&_local_0006 = *PTR_16(ds_1, bx_3 + 0x944); 7 | si_5 = 0; 8 | while (1) { 9 | addr_6b93:; 10 | if (!(*PTR_16(ds_1, (bx_2 << 1) + 0x8c8) > si_5)) { 11 | goto addr_6b9d; 12 | } 13 | tmp_0 = _local_0006; 14 | es_2 = tmp_0 >> 16; 15 | bx_4 = tmp_0; 16 | if ((*PTR_16(es_2, bx_4) & 64) == 0) { 17 | *PTR_16(es_2, bx_4) = *PTR_16(es_2, bx_4) ^ 80; 18 | } 19 | tmp_1 = _local_0006; 20 | bx_6 = tmp_1; 21 | if ((*PTR_16(tmp_1 >> 16, bx_6) & 20) != 0) { 22 | F_vga_dyn_append(bx_6, *(u16*)((u8*)&_local_0006 + 2)); 23 | } 24 | *(u16*)(u8*)&_local_0006 = *(u16*)(u8*)&_local_0006 + 32; 25 | si_5 = si_5 + 1; 26 | goto addr_6b93; 27 | } 28 | addr_6b9d:; 29 | if (bx_2 != 3) { 30 | tmp_2 = *PTR_32(ds_1, (G_data_08c2 << 2) + 0x7a04); 31 | es_5 = tmp_2 >> 16; 32 | bx_12 = tmp_2; 33 | tmp_3 = F_list_load_next(*PTR_16(es_5, bx_12 + 0xa0), *PTR_16(es_5, bx_12 + 0xa2)); 34 | if (0 >= G_data_0964) { 35 | _local_0010_2 = tmp_3; 36 | _local_0008_2 = tmp_3 >> 16; 37 | si_8 = 0; 38 | while (1) { 39 | addr_6bcd:; 40 | dx_5 = _local_0010_2 + 12; 41 | *(u16*)((u8*)&_local_0006 + 2) = _local_0008_2; 42 | *(u16*)(u8*)&_local_0006 = dx_5; 43 | F_vga_dyn_append(dx_5, _local_0008_2); 44 | si_9 = si_8 + 1; 45 | tmp_4 = F_list_load_next_2(_local_0010_2, _local_0008_2); 46 | if (!(si_9 < G_data_0964)) { 47 | goto addr_6c01; 48 | } 49 | _local_0010_2 = tmp_4; 50 | _local_0008_2 = tmp_4 >> 16; 51 | si_8 = si_9; 52 | goto addr_6bcd; 53 | } 54 | } 55 | } 56 | addr_6c01:; 57 | return; 58 | } 59 | -------------------------------------------------------------------------------- /sample/func_04_manual.c: -------------------------------------------------------------------------------- 1 | void my_function() 2 | { 3 | u32 _local_0006 = PTR_32(ds_1, 0x944)[_param_0006]; 4 | for (u16 i = 0;; i++) { 5 | u16 val = PTR_16(ds_1, 0x8c8)[_param_0006]; 6 | if (i >= val) { 7 | break; 8 | } 9 | u16 *ptr = PTR_16_FROM_32(_local_0006); 10 | if ((*ptr & 64) == 0) { 11 | *ptr = *ptr ^ 80; 12 | } 13 | if ((*ptr & 20) != 0) { 14 | F_vga_dyn_append((u16)_local_0006, (u16)(_local_0006>>16)); 15 | } 16 | _local_0006 += 32; 17 | } 18 | 19 | if (_param_0006 != 3) { 20 | u32 tmp_3 = PTR_32(ds_1, 0x7a04)[G_data_08c2]; 21 | u8 *ptr = PTR_8_FROM_32(tmp_3); 22 | u32 addr = *(u32*)(ptr + 0xa0); 23 | 24 | u32 _local_0010 = F_list_load_next((u16)addr, (u16)(addr>>16)); 25 | for (u16 i = 0; i < G_data_0964; i++) { 26 | _local_0006 = _local_0010 + 12; 27 | F_vga_dyn_append((u16)_local_0010 + 12, (u16)(_local_0010>>16)); 28 | _local_0010 = F_list_load_next_2((u16)_local_0010, (u16)(_local_0010>>16)); 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /sample/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd .. 3 | ./target/debug/dis86 decomp --config ../gizmo/build/src/hooklib/dis86_config.bsl --binary ../gizmo/dis/exe.bin --start-addr 0622:0922 --end-addr 0622:09e5 --emit-code - 4 | -------------------------------------------------------------------------------- /src/asm/instr.rs: -------------------------------------------------------------------------------- 1 | use crate::segoff::SegOff; 2 | use crate::util::arrayvec::ArrayVec; 3 | pub use crate::asm::instr_fmt::Opcode; 4 | 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct Instr { 7 | pub rep: Option, 8 | pub opcode: Opcode, 9 | pub operands: ArrayVec, 10 | pub addr: SegOff, 11 | pub n_bytes: u16, 12 | pub intel_hidden_operand_bitmask: u8, 13 | } 14 | 15 | impl Instr { 16 | pub fn end_addr(&self) -> SegOff { 17 | self.addr.add_offset(self.n_bytes) 18 | } 19 | pub fn rel_addr(&self, rel: &OperandRel) -> SegOff { 20 | self.end_addr().add_offset(rel.val) 21 | } 22 | } 23 | 24 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 25 | pub enum Operand { 26 | Reg(OperandReg), 27 | Mem(OperandMem), 28 | Imm(OperandImm), 29 | Rel(OperandRel), 30 | Far(OperandFar), 31 | } 32 | 33 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 34 | pub struct OperandReg(pub Reg); 35 | 36 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 37 | pub struct OperandMem { 38 | pub sz: Size, 39 | pub sreg: Reg, 40 | pub reg1: Option, 41 | pub reg2: Option, 42 | pub off: Option, 43 | } 44 | 45 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 46 | pub struct OperandImm { 47 | pub sz: Size, 48 | pub val: u16, 49 | } 50 | 51 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 52 | pub struct OperandRel { 53 | pub val: u16, 54 | } 55 | 56 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 57 | pub struct OperandFar { 58 | pub seg: u16, 59 | pub off: u16, 60 | } 61 | 62 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 63 | pub enum Size { 64 | Size8, 65 | Size16, 66 | Size32, 67 | } 68 | 69 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 70 | pub enum Rep { 71 | NE, 72 | EQ, 73 | } 74 | 75 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 76 | #[repr(u8)] 77 | pub enum Reg { 78 | AX, 79 | CX, 80 | DX, 81 | BX, 82 | SP, 83 | BP, 84 | SI, 85 | DI, 86 | AL, 87 | CL, 88 | DL, 89 | BL, 90 | AH, 91 | CH, 92 | DH, 93 | BH, 94 | ES, 95 | CS, 96 | SS, 97 | DS, 98 | IP, 99 | FLAGS, 100 | } 101 | 102 | impl Reg { 103 | pub fn reg8(num: u8) -> Reg { 104 | assert!(num <= 7); 105 | unsafe { std::mem::transmute(Reg::AL as u8 + num) } 106 | } 107 | 108 | pub fn reg16(num: u8) -> Reg { 109 | assert!(num <= 7); 110 | unsafe { std::mem::transmute(Reg::AX as u8 + num) } 111 | } 112 | 113 | pub fn sreg16(num: u8) -> Reg { 114 | assert!(num <= 3); 115 | unsafe { std::mem::transmute(Reg::ES as u8 + num) } 116 | } 117 | 118 | pub fn name(&self) -> &'static str { 119 | match self { 120 | Reg::AX => "ax", 121 | Reg::CX => "cx", 122 | Reg::DX => "dx", 123 | Reg::BX => "bx", 124 | Reg::SP => "sp", 125 | Reg::BP => "bp", 126 | Reg::SI => "si", 127 | Reg::DI => "di", 128 | Reg::AL => "al", 129 | Reg::CL => "cl", 130 | Reg::DL => "dl", 131 | Reg::BL => "bl", 132 | Reg::AH => "ah", 133 | Reg::CH => "ch", 134 | Reg::DH => "dh", 135 | Reg::BH => "bh", 136 | Reg::ES => "es", 137 | Reg::CS => "cs", 138 | Reg::SS => "ss", 139 | Reg::DS => "ds", 140 | Reg::IP => "ip", 141 | Reg::FLAGS => "flags", 142 | } 143 | } 144 | } 145 | 146 | pub struct RegInfo { 147 | pub name: &'static str, 148 | pub sz: Size, 149 | pub seg: bool, 150 | } 151 | 152 | const REG_INFO: &[RegInfo] = &[ 153 | RegInfo { name: "AX", sz: Size::Size16, seg: false }, 154 | RegInfo { name: "CX", sz: Size::Size16, seg: false }, 155 | RegInfo { name: "DX", sz: Size::Size16, seg: false }, 156 | RegInfo { name: "BX", sz: Size::Size16, seg: false }, 157 | RegInfo { name: "SP", sz: Size::Size16, seg: false }, 158 | RegInfo { name: "BP", sz: Size::Size16, seg: false }, 159 | RegInfo { name: "SI", sz: Size::Size16, seg: false }, 160 | RegInfo { name: "DI", sz: Size::Size16, seg: false }, 161 | RegInfo { name: "AL", sz: Size::Size8, seg: false }, 162 | RegInfo { name: "CL", sz: Size::Size8, seg: false }, 163 | RegInfo { name: "DL", sz: Size::Size8, seg: false }, 164 | RegInfo { name: "BL", sz: Size::Size8, seg: false }, 165 | RegInfo { name: "AH", sz: Size::Size8, seg: false }, 166 | RegInfo { name: "CH", sz: Size::Size8, seg: false }, 167 | RegInfo { name: "DH", sz: Size::Size8, seg: false }, 168 | RegInfo { name: "BH", sz: Size::Size8, seg: false }, 169 | RegInfo { name: "ES", sz: Size::Size16, seg: true }, 170 | RegInfo { name: "CS", sz: Size::Size16, seg: true }, 171 | RegInfo { name: "SS", sz: Size::Size16, seg: true }, 172 | RegInfo { name: "DS", sz: Size::Size16, seg: true }, 173 | RegInfo { name: "IP", sz: Size::Size16, seg: false }, 174 | RegInfo { name: "FLAGS", sz: Size::Size16, seg: false }, 175 | ]; 176 | 177 | impl Reg { 178 | pub fn info(&self) -> &RegInfo { 179 | let idx = *self as usize; 180 | assert!(idx < REG_INFO.len()); 181 | ®_INFO[idx] 182 | } 183 | 184 | pub fn from_str_upper(s: &str) -> Option { 185 | match s { 186 | "AX" => Some(Reg::AX), 187 | "CX" => Some(Reg::CX), 188 | "DX" => Some(Reg::DX), 189 | "BX" => Some(Reg::BX), 190 | "SP" => Some(Reg::SP), 191 | "BP" => Some(Reg::BP), 192 | "SI" => Some(Reg::SI), 193 | "DI" => Some(Reg::DI), 194 | "AL" => Some(Reg::AL), 195 | "CL" => Some(Reg::CL), 196 | "DL" => Some(Reg::DL), 197 | "BL" => Some(Reg::BL), 198 | "AH" => Some(Reg::AH), 199 | "CH" => Some(Reg::CH), 200 | "DH" => Some(Reg::DH), 201 | "BH" => Some(Reg::BH), 202 | "ES" => Some(Reg::ES), 203 | "CS" => Some(Reg::CS), 204 | "SS" => Some(Reg::SS), 205 | "DS" => Some(Reg::DS), 206 | "IP" => Some(Reg::IP), 207 | "FLAGS" => Some(Reg::FLAGS), 208 | _ => None, 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/asm/intel_syntax.rs: -------------------------------------------------------------------------------- 1 | use crate::asm::instr::*; 2 | use crate::segoff::SegOff; 3 | use std::fmt::Write; 4 | 5 | type Result = std::result::Result; 6 | 7 | fn format_operand(s: &mut String, ins: &Instr, oper: &Operand) -> Result<()> { 8 | match oper { 9 | Operand::Reg(o) => write!(s, "{}", o.0.name())?, 10 | Operand::Mem(o) => { 11 | match o.sz { 12 | Size::Size8 => write!(s, "BYTE PTR ")?, 13 | Size::Size16 => write!(s, "WORD PTR ")?, 14 | Size::Size32 => write!(s, "DWORD PTR ")?, 15 | }; 16 | write!(s, "{}:", o.sreg.name())?; 17 | 18 | if o.reg1.is_none() && o.reg2.is_none() { 19 | if o.off.is_some() { 20 | write!(s, "0x{:x}", o.off.unwrap())?; 21 | } 22 | } else { 23 | write!(s, "[")?; 24 | if o.reg1.is_some() { write!(s, "{}", o.reg1.unwrap().name())?; } 25 | if o.reg2.is_some() { write!(s, "+{}", o.reg2.unwrap().name())?; } 26 | if o.off.is_some() { 27 | let disp = o.off.unwrap() as i16; 28 | if disp >= 0 { write!(s, "+0x{:x}", disp as u16)?; } 29 | else { write!(s, "-0x{:x}", (-disp) as u16)?; } 30 | } 31 | write!(s, "]")?; 32 | } 33 | } 34 | Operand::Imm(o) => write!(s, "0x{:x}", o.val)?, 35 | Operand::Rel(o) => { 36 | let effective = ins.rel_addr(o); 37 | write!(s, "{}", effective)?; 38 | } 39 | Operand::Far(o) => write!(s, "{:04x}:{:04x}", o.seg, o.off)?, 40 | }; 41 | 42 | Ok(()) 43 | } 44 | 45 | fn format_instr_impl(s: &mut String, ins: &Instr, bytes: &[u8], with_detail: bool) -> Result<()> { 46 | if with_detail { 47 | write!(s, "{}:\t", ins.addr)?; 48 | for b in bytes { 49 | write!(s, "{:02x} ", b)?; 50 | } 51 | let used = ins.n_bytes as usize * 3; 52 | let remain = if used <= 21 { 21 - used } else { 0 }; 53 | write!(s, "{:1$}\t", "", remain)?; 54 | } 55 | 56 | match ins.rep { 57 | None => (), 58 | Some(Rep::NE) => write!(s, "repne ")?, 59 | Some(Rep::EQ) => write!(s, "rep ")?, 60 | } 61 | 62 | write!(s, "{:<5}", ins.opcode.name())?; 63 | 64 | let mut first = true; 65 | for (i, oper) in ins.operands.as_slice().iter().enumerate() { 66 | if ((1u8< Result<()> { 78 | if with_detail { 79 | write!(s, "{}:\t", addr)?; 80 | for b in bytes { 81 | write!(s, "{:02x} ", b)?; 82 | } 83 | let used = bytes.len() as usize * 3; 84 | let remain = if used <= 21 { 21 - used } else { 0 }; 85 | write!(s, "{:1$}\t", "", remain)?; 86 | } 87 | write!(s, "(data)")?; 88 | Ok(()) 89 | } 90 | 91 | // FIXME: THIS IS KLUDGY 92 | pub fn format(addr: SegOff, ins: Option<&Instr>, bytes: &[u8], with_detail: bool) -> Result { 93 | let mut s = String::new(); 94 | match ins { 95 | Some(ins) => format_instr_impl(&mut s, ins, bytes, with_detail)?, 96 | None => format_data_impl(&mut s, addr, bytes, with_detail)?, 97 | } 98 | Ok(s.trim_end().to_string()) 99 | } 100 | -------------------------------------------------------------------------------- /src/asm/mod.rs: -------------------------------------------------------------------------------- 1 | mod instr_fmt; 2 | pub mod instr; 3 | pub mod decode; 4 | pub mod intel_syntax; 5 | -------------------------------------------------------------------------------- /src/bin/dis86.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | std::process::exit(dis86::app::run()); 3 | } 4 | -------------------------------------------------------------------------------- /src/bin/parse_config.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use dis86::config::Config; 3 | 4 | fn main() { 5 | let args: Vec<_> = env::args().collect(); 6 | let cfg = Config::from_path(&args[1]).unwrap(); 7 | println!("{:#?}", cfg); 8 | } 9 | -------------------------------------------------------------------------------- /src/binary.rs: -------------------------------------------------------------------------------- 1 | use crate::segoff::{Seg, SegOff}; 2 | use crate::region::RegionIter; 3 | use crate::config::{self, Config}; 4 | use crate::binfmt; 5 | 6 | #[derive(Debug)] 7 | pub enum Fmt { 8 | Raw(String), 9 | Exe(String), 10 | } 11 | 12 | impl Fmt { 13 | pub fn path(&self) -> &str { 14 | match self { 15 | Fmt::Raw(path) => path, 16 | Fmt::Exe(path) => path, 17 | } 18 | } 19 | } 20 | 21 | struct Data(Vec); 22 | 23 | pub struct Binary<'a> { 24 | main: Data, 25 | overlays: Vec, 26 | config: Option<&'a Config>, 27 | segmap: Option>, 28 | } 29 | 30 | fn build_segmap(exe: &binfmt::mz::Exe) -> Option> { 31 | let segmap = exe.seginfo?; 32 | let mut out = vec![]; 33 | for s in segmap { 34 | out.push(s.seg); 35 | } 36 | Some(out) 37 | } 38 | 39 | impl<'a> Binary<'a> { 40 | pub fn from_fmt(fmt: &Fmt, config: Option<&'a Config>) -> Result { 41 | let path = fmt.path(); 42 | 43 | let data = std::fs::read(path).map_err( 44 | |err| format!("Failed to read file: '{}': {:?}", path, err))?; 45 | 46 | let binary = match fmt { 47 | Fmt::Raw(_) => { 48 | Binary::from_raw(&data, config) 49 | } 50 | Fmt::Exe(_) => { 51 | let exe = binfmt::mz::Exe::decode(&data).unwrap(); 52 | Self::from_exe(&exe, config) 53 | } 54 | }; 55 | 56 | Ok(binary) 57 | } 58 | 59 | pub fn from_exe(exe: &binfmt::mz::Exe, config: Option<&'a Config>) -> Self { 60 | let main = Data(exe.exe_data().to_vec()); 61 | let mut overlays = vec![]; 62 | for i in 0..exe.num_overlay_segments() { 63 | overlays.push(Data(exe.overlay_data(i).to_vec())); 64 | } 65 | let segmap = build_segmap(&exe); 66 | Binary { main, overlays, config, segmap, } 67 | } 68 | 69 | pub fn from_raw(data: &[u8], config: Option<&'a Config>) -> Self { 70 | Self { main: Data(data.to_vec()), overlays: vec![], config, segmap: None } 71 | } 72 | 73 | pub fn region(&self, start: SegOff, end: SegOff) -> &[u8] { 74 | assert!(start.seg == end.seg); 75 | match start.seg { 76 | Seg::Normal(_) => &self.main.0[start.abs_normal() .. end.abs_normal()], 77 | Seg::Overlay(seg) => &self.overlays[seg as usize].0[start.off.0 as usize .. end.off.0 as usize], 78 | } 79 | } 80 | 81 | pub fn region_iter(&self, start: SegOff, end: SegOff) -> RegionIter<'_> { 82 | RegionIter::new(self.region(start, end), start) 83 | } 84 | 85 | pub fn remap_to_segment(&self, old: u16) -> Seg { 86 | let Some(segmap) = self.segmap.as_ref() else { 87 | panic!("Cannot remap segments when binary has no seginfo table"); 88 | }; 89 | assert!(old%8 == 0); 90 | Seg::Normal(segmap[(old/8) as usize]) 91 | } 92 | 93 | pub fn lookup_call(&self, from: SegOff, to: SegOff) -> Option<&'a config::Func> { 94 | match &from.seg { 95 | Seg::Normal(_) => cfg_func(self.config, to), 96 | Seg::Overlay(_) => { 97 | // We're calling from an overlay, so we need to remap the dest seg before making the call... 98 | let Seg::Normal(seg) = to.seg else { return None; /*panic!("Unexpected destination segment as overlay!") */ }; 99 | let remapped_seg = self.remap_to_segment(seg); 100 | let to_modified = SegOff { seg: remapped_seg, off: to.off }; 101 | cfg_func(self.config, to_modified) 102 | } 103 | } 104 | } 105 | } 106 | 107 | fn cfg_func(cfg: Option<&Config>, addr: SegOff) -> Option<&config::Func> { 108 | cfg?.func_lookup(addr) 109 | } 110 | -------------------------------------------------------------------------------- /src/binfmt/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod mz; 2 | -------------------------------------------------------------------------------- /src/binfmt/mz/decode.rs: -------------------------------------------------------------------------------- 1 | use crate::binfmt::mz::*; 2 | 3 | fn decode_exe<'a>(data: &'a [u8]) -> Result, String> { 4 | // Decode the header 5 | let hdr = decode_hdr(data)?; 6 | 7 | // Compute the EXE Region 8 | let exe_start = hdr.cparhdr as u32 * 16; 9 | let mut exe_end = hdr.cp as u32 * 512; 10 | if hdr.cblp != 0 { exe_end -= 512 - hdr.cblp as u32; } 11 | if exe_end as usize > data.len() { 12 | return Err(format!("End of exe region is beyond the end of data")); 13 | } 14 | 15 | // Determine the relocs array 16 | let relocs = unsafe { util::try_slice_from_bytes(&data[hdr.lfarlc as usize..], hdr.crlc as usize) }?; 17 | 18 | // Optional FBOV 19 | let data_rem = &data[exe_end as usize..]; 20 | let fbov = decode_fbov(data_rem); 21 | 22 | // Optional seginfo 23 | let mut seginfo = None; 24 | if let Some(fbov) = fbov { 25 | // Decode seginfo 26 | if fbov.segnum < 0 { 27 | let segnum = fbov.segnum; // unaligned 28 | return Err(format!("Negative FBOV segnum: {}", segnum)); 29 | } 30 | seginfo = Some(unsafe { util::try_slice_from_bytes(&data[fbov.exeinfo as usize..], fbov.segnum as usize) }?); 31 | } 32 | 33 | // Optional overlay info 34 | let mut ovr = None; 35 | if let Some(fbov) = fbov { 36 | ovr = Some(overlay::decode_overlay_info(data, exe_start, fbov, seginfo.unwrap())?); 37 | } 38 | 39 | Ok(Exe { 40 | hdr, 41 | exe_start, 42 | exe_end, 43 | relocs, 44 | fbov, 45 | seginfo, 46 | ovr, 47 | rawdata: data, 48 | }) 49 | } 50 | 51 | fn decode_hdr<'a>(data: &'a [u8]) -> Result<&'a Header, String> { 52 | // Get the header and perform magic check 53 | let hdr: &Header = unsafe { util::try_struct_from_bytes(data) }?; 54 | let magic_expect = ['M' as u8, 'Z' as u8]; 55 | if hdr.magic != magic_expect { 56 | return Err(format!("Magic number mismatch: got {:?}, expected {:?}", hdr.magic, magic_expect)); 57 | } 58 | 59 | Ok(hdr) 60 | } 61 | 62 | fn decode_fbov<'a>(data: &'a [u8]) -> Option<&'a FBOV> { 63 | // Get the struct and perform magic check 64 | let fbov: &FBOV = unsafe { util::try_struct_from_bytes(data) }.ok()?; 65 | let magic_expect = ['F' as u8, 'B' as u8, 'O' as u8, 'V' as u8]; 66 | if fbov.magic != magic_expect { 67 | return None; 68 | } 69 | 70 | // All good 71 | Some(fbov) 72 | } 73 | 74 | impl<'a> Exe<'a> { 75 | #[cfg(target_endian = "big")] 76 | pub fn decode(data: &'a [u8]) -> Result { 77 | panic!("MZ decoding only works on little-endian machines"); 78 | } 79 | 80 | #[cfg(target_endian = "little")] 81 | pub fn decode(data: &'a [u8]) -> Result { 82 | decode_exe(data) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/binfmt/mz/defs.rs: -------------------------------------------------------------------------------- 1 | 2 | #[repr(C, packed)] 3 | #[derive(Debug, Clone)] 4 | pub struct Header { 5 | pub magic: [u8; 2], /* "MZ" */ 6 | pub cblp: u16, 7 | pub cp: u16, 8 | pub crlc: u16, 9 | pub cparhdr: u16, 10 | pub minalloc: u16, 11 | pub maxalloc: u16, 12 | pub ss: i16, 13 | pub sp: u16, 14 | pub csum: u16, 15 | pub ip: u16, 16 | pub cs: i16, 17 | pub lfarlc: u16, 18 | pub ovno: u16, 19 | } 20 | 21 | #[repr(C, packed)] 22 | #[derive(Debug, Clone)] 23 | pub struct Reloc { 24 | pub offset: u16, 25 | pub segment: u16, 26 | } 27 | 28 | // Borland C/C++ FBOV Header for Overlays (VROOM?) 29 | #[repr(C, packed)] 30 | #[derive(Debug, Clone)] 31 | pub struct FBOV { 32 | pub magic: [u8; 4], /* "FBOV" */ 33 | pub ovrsize: u32, 34 | pub exeinfo: u32, /* points to mz_seginfo array in binary */ 35 | pub segnum: i32, /* number of entries in the mz_seginfo array */ 36 | } 37 | 38 | #[allow(non_snake_case)] 39 | pub mod SegInfoType { 40 | pub const DATA: u16 = 0; 41 | pub const CODE: u16 = 1; 42 | pub const STUB: u16 = 3; 43 | pub const OVERLAY: u16 = 4; 44 | } 45 | 46 | #[repr(C, packed)] 47 | #[derive(Debug, Clone)] 48 | pub struct SegInfo 49 | { 50 | pub seg: u16, 51 | pub maxoff: u16, 52 | pub typ: u16, // SegInfoType::* 53 | pub minoff: u16, 54 | } 55 | 56 | #[derive(Debug, Clone)] 57 | pub struct OverlaySeg { 58 | pub stub_segment: u16, // Segment number where the stubs are located 59 | pub segment_size: u16, // Size of the destination segment 60 | pub data_offset: u32, // Offset to the destination segment in the binary image (from OverlayInfo::file_offset) 61 | pub _unknown_1: u16, 62 | pub _unknown_2: u16, 63 | } 64 | 65 | #[derive(Debug, Clone)] 66 | pub struct OverlayStub { 67 | pub overlay_seg_num: u16, // Id or index of the overlay segment this stub belongs to 68 | pub stub_segment: u16, // Segment this stub is located at (as called) 69 | pub stub_offset: u16, // Offset this stub is located at (as called) 70 | pub dest_offset: u16, // Destination offset into the overlay segment (wherever it ends up resident) 71 | } 72 | 73 | #[derive(Debug, Clone)] 74 | pub struct OverlayInfo { 75 | pub file_offset: u32, 76 | pub segs: Vec, 77 | pub stubs: Vec, 78 | } 79 | 80 | #[derive(Debug, Clone)] 81 | pub struct Exe<'a> { 82 | pub hdr: &'a Header, 83 | pub exe_start: u32, 84 | pub exe_end: u32, 85 | pub relocs: &'a [Reloc], 86 | pub fbov: Option<&'a FBOV>, 87 | pub seginfo: Option<&'a [SegInfo]>, 88 | pub ovr: Option, 89 | pub rawdata: &'a [u8], 90 | } 91 | -------------------------------------------------------------------------------- /src/binfmt/mz/methods.rs: -------------------------------------------------------------------------------- 1 | use crate::binfmt::mz::*; 2 | use crate::segoff::{Seg, Off, SegOff}; 3 | 4 | impl<'a> Exe<'a> { 5 | pub fn exe_data(&self) -> &[u8] { 6 | &self.rawdata[self.exe_start as usize..self.exe_end as usize] 7 | } 8 | 9 | pub fn overlay_data(&self, id: usize) -> &[u8] { 10 | let ovr = self.ovr.as_ref().unwrap(); 11 | let seg = &ovr.segs[id]; 12 | let start = ovr.file_offset as usize + seg.data_offset as usize; 13 | let end = start + seg.segment_size as usize; 14 | &self.rawdata[start..end] 15 | } 16 | 17 | pub fn num_overlay_segments(&self) -> usize { 18 | self.ovr.as_ref().map(|ovr| ovr.segs.len()).unwrap_or(0) 19 | } 20 | } 21 | 22 | impl SegInfo { 23 | pub fn size(&self) -> u16 { 24 | self.maxoff.wrapping_sub(self.minoff) 25 | } 26 | } 27 | 28 | impl OverlayStub { 29 | pub fn stub_addr(&self) -> SegOff { 30 | SegOff { seg: Seg::Normal(self.stub_segment), off: Off(self.stub_offset) } 31 | } 32 | 33 | pub fn dest_addr(&self) -> SegOff { 34 | SegOff { seg: Seg::Overlay(self.overlay_seg_num), off: Off(self.dest_offset) } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/binfmt/mz/mod.rs: -------------------------------------------------------------------------------- 1 | // public modules 2 | mod defs; 3 | mod methods; 4 | mod decode; 5 | mod print; 6 | 7 | // internal modules 8 | mod util; 9 | mod overlay; 10 | 11 | // re-exports 12 | pub use defs::*; 13 | pub use methods::*; 14 | pub use decode::*; 15 | pub use print::*; 16 | -------------------------------------------------------------------------------- /src/binfmt/mz/overlay.rs: -------------------------------------------------------------------------------- 1 | use crate::binfmt::mz::*; 2 | 3 | const CODE_OVERLAY_SEG_INTERRUPT_CODE: [u8; 4] = [0xcd, 0x3f, 0x00, 0x00]; 4 | const CODE_OVERLAY_SEG_ZEROS: [u8; 18] = [0; 18]; 5 | const CODE_OVERLAY_STUB_INTERRUPT_CODE: [u8; 2] = [0xcd, 0x3f]; 6 | const CODE_OVERLAY_STUB_ZEROS: [u8; 1] = [0; 1]; 7 | 8 | #[repr(C, packed)] 9 | #[derive(Debug)] 10 | pub struct CodeOverlaySeg { 11 | interrupt_code: [u8; 4], // should be: cd 3f 00 00 12 | data_offset: u32, 13 | seg_size: u16, 14 | _unknown_1: u16, 15 | _unknown_2: u16, 16 | _zeros: [u8; 18], 17 | /* stubs follow: [CodeOverlaySeg] */ 18 | } 19 | sa::const_assert!(std::mem::size_of::() == 32); 20 | 21 | #[repr(C, packed)] 22 | #[derive(Debug)] 23 | pub struct CodeOverlayStub { 24 | interrupt_code: [u8; 2], // should be: cd 3f 25 | call_offset: u16, 26 | _zeros: [u8; 1], 27 | } 28 | sa::const_assert!(std::mem::size_of::() == 5); 29 | 30 | pub(super) fn decode_overlay_info(data: &[u8], exe_start: u32, fbov: &FBOV, seginfo: &[SegInfo]) -> Result { 31 | let mut out_segs = vec![]; 32 | let mut out_stubs = vec![]; 33 | 34 | let exe_data = &data[exe_start as usize..]; 35 | // let exe_data = exe.exe_data(); 36 | // let Some(seginfo) = exe.seginfo else { 37 | // return Ok(None); 38 | // }; 39 | 40 | let mut next_seg = 0; 41 | for s in seginfo { 42 | // iterate all stubs 43 | if s.typ != SegInfoType::STUB { continue } 44 | 45 | // sanity check: might not be required but it's generally true how with 46 | // how this compiler liked to layout things 47 | assert!(next_seg == 0 || s.seg == next_seg); 48 | let n_segs = (s.maxoff + 15) >> 4; 49 | next_seg = s.seg + n_segs; 50 | 51 | // unpack the actual stub code 52 | let dat = &exe_data[16 * s.seg as usize..]; 53 | let sz = s.maxoff as usize; 54 | assert!(sz >= 32); // each hdr section is 32-bytes 55 | assert!((sz-32)%5 == 0); // each launcher entry is 5 bytes 56 | let num_entries = (sz-32)/5; 57 | 58 | // get the seg struct 59 | let seg: &CodeOverlaySeg = unsafe { util::try_struct_from_bytes(dat) }.unwrap(); 60 | if seg.interrupt_code != CODE_OVERLAY_SEG_INTERRUPT_CODE { 61 | return Err(format!("Invalid seg interrupt code, got {:?} expected {:?}", seg.interrupt_code, CODE_OVERLAY_SEG_INTERRUPT_CODE)); 62 | } 63 | if seg._zeros != CODE_OVERLAY_SEG_ZEROS { 64 | return Err(format!("Zeros in seg aren't zero, got {:?} expected {:?}", seg._zeros, CODE_OVERLAY_SEG_ZEROS)); 65 | } 66 | 67 | // create a user struct 68 | out_segs.push(OverlaySeg { 69 | stub_segment: s.seg, 70 | segment_size: seg.seg_size, 71 | data_offset: seg.data_offset, 72 | _unknown_1: seg._unknown_1, 73 | _unknown_2: seg._unknown_2, 74 | }); 75 | 76 | let seg_num: u16 = (out_segs.len() - 1).try_into().unwrap(); 77 | 78 | // process each stub 79 | let stubs: &[CodeOverlayStub] = unsafe { util::try_slice_from_bytes(&dat[32..], num_entries as usize) }.unwrap(); 80 | for (i, stub) in stubs.iter().enumerate() { 81 | if stub.interrupt_code != CODE_OVERLAY_STUB_INTERRUPT_CODE { 82 | return Err(format!("Invalid stub interrupt code, got {:?} expected {:?}", stub.interrupt_code, CODE_OVERLAY_STUB_INTERRUPT_CODE)); 83 | } 84 | if stub._zeros != CODE_OVERLAY_STUB_ZEROS { 85 | return Err(format!("Zeros in stub aren't zero, got {:?} expected {:?}", stub._zeros, CODE_OVERLAY_STUB_ZEROS)); 86 | } 87 | if stub.call_offset >= seg.seg_size { 88 | let call_offset = stub.call_offset; // unaligned 89 | let seg_size = seg.seg_size; // unaligned 90 | return Err(format!("Stub call offset exceeds the segment size, offset {} segsize: {}", call_offset, seg_size)); 91 | } 92 | 93 | out_stubs.push(OverlayStub { 94 | overlay_seg_num: seg_num, 95 | stub_segment: s.seg, 96 | stub_offset: (32 + 5*i).try_into().unwrap(), 97 | dest_offset: stub.call_offset, 98 | }); 99 | } 100 | } 101 | 102 | // Overlay data starts immediately after the FBOV header 103 | let file_offset = 104 | fbov as *const _ as usize 105 | + std::mem::size_of::() 106 | - data.as_ptr() as usize; 107 | 108 | Ok(OverlayInfo { 109 | file_offset: file_offset.try_into().unwrap(), 110 | segs: out_segs, 111 | stubs: out_stubs 112 | }) 113 | } 114 | -------------------------------------------------------------------------------- /src/binfmt/mz/print.rs: -------------------------------------------------------------------------------- 1 | use crate::binfmt::mz::*; 2 | 3 | impl<'a> Exe<'a> { 4 | pub fn print_hdr(&self) { 5 | // Load everything to stack because rust thinks it's unaligned and complains otherwise... 6 | let magic = self.hdr.magic; 7 | let cblp = self.hdr.cblp; 8 | let cp = self.hdr.cp; 9 | let crlc = self.hdr.crlc; 10 | let cparhdr = self.hdr.cparhdr; 11 | let minalloc = self.hdr.minalloc; 12 | let maxalloc = self.hdr.maxalloc; 13 | let ss = self.hdr.ss; 14 | let sp = self.hdr.sp; 15 | let csum = self.hdr.csum; 16 | let ip = self.hdr.ip; 17 | let cs = self.hdr.cs; 18 | let lfarlc = self.hdr.lfarlc; 19 | let ovno = self.hdr.ovno; 20 | 21 | let magic_str = std::str::from_utf8(&magic).unwrap_or("??"); 22 | 23 | println!("MZ Header:"); 24 | println!(" magic: 0x{:02x}{:02x} (\"{}\")", magic[0], magic[1], magic_str); 25 | println!(" cblp 0x{:04x} ({})", cblp, cblp); 26 | println!(" cp 0x{:04x} ({})", cp, cp); 27 | println!(" crlc 0x{:04x} ({})", crlc, crlc); 28 | println!(" cparhdr 0x{:04x} ({})", cparhdr, cparhdr); 29 | println!(" minalloc 0x{:04x} ({})", minalloc, minalloc); 30 | println!(" maxalloc 0x{:04x} ({})", maxalloc, maxalloc); 31 | println!(" ss 0x{:04x} ({})", ss, ss); 32 | println!(" sp 0x{:04x} ({})", sp, sp); 33 | println!(" csum 0x{:04x} ({})", csum, csum); 34 | println!(" ip 0x{:04x} ({})", ip, ip); 35 | println!(" cs 0x{:04x} ({})", cs, cs); 36 | println!(" lfarlc 0x{:04x} ({})", lfarlc, lfarlc); 37 | println!(" ovno 0x{:04x} ({})", ovno, ovno); 38 | println!(""); 39 | println!("Exe Region:"); 40 | println!(" start 0x{:08x}", self.exe_start); 41 | println!(" end 0x{:08x}", self.exe_end); 42 | println!(""); 43 | } 44 | 45 | pub fn print_relocs(relocs: &[Reloc]) { 46 | print!("Relocations:"); 47 | for (i, r) in relocs.iter().enumerate() { 48 | if i%16 == 0 { println!(""); } 49 | // Load everything to stack because rust thinks it's unaligned and complains otherwise... 50 | let segment = r.segment; 51 | let offset = r.offset; 52 | print!(" {:04x}:{:04x}", segment, offset); 53 | } 54 | println!(""); 55 | println!(""); 56 | } 57 | 58 | pub fn print_fbov(fbov: &FBOV) { 59 | // Load everything to stack because rust thinks it's unaligned and complains otherwise... 60 | let magic = fbov.magic; 61 | let ovrsize = fbov.ovrsize; 62 | let exeinfo = fbov.exeinfo; 63 | let segnum = fbov.segnum; 64 | 65 | let magic_str = std::str::from_utf8(&magic).unwrap_or("????"); 66 | 67 | println!("FBOV Header:"); 68 | println!(" magic: 0x{:02x}{:02x}{:02x}{:02x} (\"{}\")", magic[0], magic[1], magic[2], magic[3], magic_str); 69 | println!(" ovrsize 0x{:08x} ({})", ovrsize, ovrsize); 70 | println!(" exeinfo 0x{:08x} ({})", exeinfo, exeinfo); 71 | println!(" segnum 0x{:08x} ({})", segnum, segnum); 72 | println!(""); 73 | } 74 | 75 | pub fn print_seginfo(seginfo: &[SegInfo]) { 76 | println!("Segment Information:"); 77 | println!(" {:<4} {:<8} {:<12} {:<8} {:<8} {:<10}", 78 | "num", "seg", "type", "minoff", "maxoff", "size"); 79 | 80 | for (i, s) in seginfo.iter().enumerate() { 81 | // Load everything to stack because rust thinks it's unaligned and complains otherwise... 82 | let seg = s.seg; 83 | let maxoff = s.maxoff; 84 | let typ = s.typ; 85 | let minoff = s.minoff; 86 | 87 | let typ_str = match typ { 88 | SegInfoType::DATA => "DATA", 89 | SegInfoType::CODE => "CODE", 90 | SegInfoType::STUB => "STUB", 91 | SegInfoType::OVERLAY => "OVERLAY", 92 | _ => "UNKNOWN", 93 | }; 94 | let typ_str = format!("{}({})", typ_str, typ); 95 | let n = s.size(); 96 | println!(" {:4} 0x{:04x} {:<12} 0x{:04x} 0x{:04x} {:5} (0x{:04x})", 97 | i, seg, typ_str, minoff, maxoff, n, n); 98 | } 99 | println!(""); 100 | } 101 | 102 | 103 | fn print_overlayinfo(ovr: &OverlayInfo) { 104 | println!("Overlay File Offset: 0x{:x}", ovr.file_offset); 105 | println!(""); 106 | 107 | println!("Overlay Segments:"); 108 | println!(" num data_off data_end seg_size _unknown_1 _unknown_2"); 109 | for (i, seg) in ovr.segs.iter().enumerate() { 110 | let end = seg.data_offset + seg.segment_size as u32; 111 | println!(" {:3} 0x{:08x} 0x{:08x} {:9} 0x{:04x} 0x{:04x}", 112 | i, seg.data_offset, end, seg.segment_size, seg._unknown_1, seg._unknown_2); 113 | } 114 | println!(""); 115 | 116 | println!("Overlay Stubs:"); 117 | for stub in &ovr.stubs { 118 | println!(" {} => {}", stub.stub_addr(), stub.dest_addr()); 119 | } 120 | } 121 | 122 | pub fn print(&self) { 123 | Self::print_hdr(self); 124 | Self::print_relocs(&self.relocs); 125 | if let Some(fbov) = self.fbov { 126 | Self::print_fbov(fbov); 127 | } 128 | if let Some(seginfo) = self.seginfo { 129 | Self::print_seginfo(seginfo); 130 | } 131 | if let Some(ovr) = self.ovr.as_ref() { 132 | Self::print_overlayinfo(ovr); 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/binfmt/mz/util.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | 3 | pub(super) unsafe fn try_struct_from_bytes(data: &[u8]) -> Result<&T, String> { 4 | let sz = size_of::(); 5 | if data.len() < sz { 6 | Err(format!("Data is too short for {}: got {}, expected {}", std::any::type_name::(), data.len(), sz)) 7 | } else { 8 | Ok(unsafe { &*(data.as_ptr() as *const T) }) 9 | } 10 | } 11 | 12 | pub(super) unsafe fn try_slice_from_bytes(data: &[u8], nelts: usize) -> Result<&[T], String> { 13 | let len = nelts * size_of::(); 14 | if data.len() < len { 15 | Err(format!("Data is too short for {}: got {}, expected {}", std::any::type_name::<[T]>(), data.len(), len)) 16 | } else { 17 | Ok(unsafe { std::slice::from_raw_parts(data.as_ptr() as *const T, nelts) }) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/bsl/bind.rs: -------------------------------------------------------------------------------- 1 | /* automatically generated by rust-bindgen 0.69.4 (and then modified by hand) */ 2 | pub use std::os::raw::{c_int, c_char, c_void}; 3 | 4 | #[repr(C)] 5 | #[derive(Debug, Copy, Clone)] 6 | pub struct bsl { 7 | _unused: [u8; 0], 8 | } 9 | 10 | #[allow(non_camel_case_types)] 11 | pub type bsl_t = bsl; 12 | #[allow(non_camel_case_types)] 13 | pub type bsl_iter_t = bsl_iter; 14 | 15 | #[repr(C)] 16 | #[repr(align(16))] 17 | #[derive(Debug, Copy, Clone)] 18 | pub struct bsl_iter { 19 | pub _opaque: [c_char; 32usize], 20 | } 21 | 22 | #[allow(unused)] 23 | pub const BSL_SUCCESS: c_int = 0; 24 | #[allow(unused)] 25 | pub const BSL_ERR_PARSE: c_int = 1; 26 | 27 | pub const BSL_TYPE_STR: c_int = 0; 28 | pub const BSL_TYPE_NODE: c_int = 1; 29 | 30 | extern "C" { 31 | pub fn bsl_parse_new( 32 | buf: *const c_char, 33 | sz: usize, 34 | opt_err: *mut c_int, 35 | ) -> *mut bsl_t; 36 | } 37 | 38 | extern "C" { 39 | pub fn bsl_delete(bsl: *mut bsl_t); 40 | } 41 | extern "C" { 42 | pub fn bsl_get_generic( 43 | bsl: *mut bsl_t, 44 | key: *const c_char, 45 | opt_type: *mut c_int, 46 | ) -> *mut c_void; 47 | } 48 | #[allow(unused)] 49 | extern "C" { 50 | pub fn bsl_get_str( 51 | bsl: *mut bsl_t, 52 | key: *const c_char, 53 | ) -> *const c_char; 54 | } 55 | #[allow(unused)] 56 | extern "C" { 57 | pub fn bsl_get_node(bsl: *mut bsl_t, key: *const c_char) -> *mut bsl_t; 58 | } 59 | #[allow(unused)] 60 | extern "C" { 61 | pub fn bsl_iter_begin(it: *mut bsl_iter_t, bsl: *mut bsl_t); 62 | } 63 | #[allow(unused)] 64 | extern "C" { 65 | pub fn bsl_iter_next( 66 | it: *mut bsl_iter_t, 67 | _type: *mut c_int, 68 | _key: *mut *const c_char, 69 | _val: *mut *mut c_void, 70 | ) -> bool; 71 | } 72 | -------------------------------------------------------------------------------- /src/bsl/mod.rs: -------------------------------------------------------------------------------- 1 | mod bind; 2 | mod wrap; 3 | pub use wrap::*; 4 | -------------------------------------------------------------------------------- /src/bsl/wrap.rs: -------------------------------------------------------------------------------- 1 | use crate::bsl::bind::*; 2 | use std::ffi::{CStr, CString}; 3 | use std::mem::MaybeUninit; 4 | use std::marker::PhantomData; 5 | 6 | pub enum Value<'a> { 7 | Node(Node<'a>), 8 | Str(&'a str), 9 | } 10 | 11 | #[derive(Clone)] 12 | pub struct Node<'a> { 13 | ctx: *mut bsl_t, 14 | phantom: PhantomData<&'a Root>, 15 | } 16 | 17 | pub struct Root { 18 | ctx: *mut bsl_t, 19 | } 20 | 21 | pub struct Iter<'a> { 22 | it: bsl_iter_t, 23 | phantom: PhantomData>, 24 | } 25 | 26 | pub fn parse(inp: &str) -> Option { 27 | Root::parse(inp) 28 | } 29 | 30 | impl Root { 31 | pub fn parse(inp: &str) -> Option { 32 | let mut err: c_int = 0; 33 | let ctx = unsafe { bsl_parse_new(inp.as_ptr() as *const _, inp.len(), &mut err) }; 34 | if ctx.is_null() { 35 | return None 36 | } 37 | 38 | Some(Root { ctx }) 39 | } 40 | 41 | #[allow(unused)] 42 | pub fn get(&self, key: &str) -> Option { node_get(self.ctx, key) } 43 | #[allow(unused)] 44 | pub fn get_str(&self, key: &str) -> Option<&str> { node_get_str(self.ctx, key) } 45 | #[allow(unused)] 46 | pub fn get_node(&self, key: &str) -> Option { node_get_node(self.ctx, key) } 47 | } 48 | 49 | impl Drop for Root { 50 | fn drop(&mut self) { 51 | unsafe { bsl_delete(self.ctx) }; 52 | } 53 | } 54 | 55 | impl<'a> Value<'a> { 56 | fn from_raw(typ: c_int, ptr: *mut c_void) -> Value<'a> { 57 | if typ == BSL_TYPE_STR { 58 | let cstr = unsafe { CStr::from_ptr(ptr as *const c_char) }; 59 | Value::Str(cstr.to_str().unwrap()) 60 | } else if typ == BSL_TYPE_NODE { 61 | Value::Node(Node { ctx: ptr as *mut bsl_t, phantom: PhantomData}) 62 | } else { 63 | panic!("Unexpected type: {}", typ); 64 | } 65 | } 66 | 67 | pub fn as_node(&self) -> Option> { 68 | match self { 69 | Self::Node(n) => Some(n.clone()), 70 | _ => None, 71 | } 72 | } 73 | 74 | #[allow(unused)] 75 | pub fn as_str(&self) -> Option<&'a str> { 76 | match self { 77 | Self::Str(s) => Some(s), 78 | _ => None, 79 | } 80 | } 81 | } 82 | 83 | fn node_get<'a>(ctx: *mut bsl_t, key: &str) -> Option> { 84 | let key_cstr = CString::new(key).unwrap(); 85 | let mut typ: c_int = 0; 86 | let ptr = unsafe { bsl_get_generic(ctx, key_cstr.as_ptr(), &mut typ) }; 87 | if ptr.is_null() { 88 | return None; 89 | } 90 | Some(Value::from_raw(typ, ptr)) 91 | } 92 | 93 | fn node_get_str<'a>(ctx: *mut bsl_t, key: &str) -> Option<&'a str> { 94 | let elt = node_get(ctx, key)?; 95 | match elt { 96 | Value::Str(s) => Some(s), 97 | _ => None, 98 | } 99 | } 100 | 101 | fn node_get_node<'a>(ctx: *mut bsl_t, key: &str) -> Option> { 102 | let elt = node_get(ctx, key)?; 103 | match elt { 104 | Value::Node(n) => Some(n), 105 | _ => None, 106 | } 107 | } 108 | 109 | impl<'a> Node<'a> { 110 | #[allow(unused)] 111 | pub fn get(&self, key: &str) -> Option { node_get(self.ctx, key) } 112 | #[allow(unused)] 113 | pub fn get_str(&self, key: &str) -> Option<&str> { node_get_str(self.ctx, key) } 114 | #[allow(unused)] 115 | pub fn get_node(&self, key: &str) -> Option { node_get_node(self.ctx, key) } 116 | 117 | pub fn iter(&self) -> Iter { 118 | let mut it: bsl_iter_t = unsafe { MaybeUninit::zeroed().assume_init() }; 119 | unsafe { bsl_iter_begin(&mut it, self.ctx) }; 120 | Iter { it, phantom: PhantomData } 121 | } 122 | } 123 | 124 | impl<'a> Iterator for Iter<'a> { 125 | type Item = (&'a str, Value<'a>); 126 | fn next(&mut self) -> Option { 127 | let mut typ: c_int = -1; 128 | let mut key: *const c_char = std::ptr::null(); 129 | let mut val: *mut c_void = std::ptr::null_mut(); 130 | let valid = unsafe { bsl_iter_next(&mut self.it, &mut typ, &mut key, &mut val) }; 131 | if !valid { 132 | return None; 133 | } 134 | let key = unsafe { CStr::from_ptr(key) }.to_str().unwrap(); 135 | let elt = Value::from_raw(typ, val); 136 | Some((key, elt)) 137 | } 138 | } 139 | 140 | #[cfg(test)] 141 | mod tests { 142 | use super::*; 143 | 144 | #[test] 145 | fn test_1() { 146 | let inp = "foo bar"; 147 | let root = Root::parse(inp).unwrap(); 148 | assert_eq!(root.get_str("foo"), Some("bar")); 149 | assert_eq!(root.get_str("foo1"), None); 150 | } 151 | 152 | #[test] 153 | fn test_2() { 154 | let inp = "foo bar good stuff "; 155 | let root = Root::parse(inp).unwrap(); 156 | assert_eq!(root.get_str("foo"), Some("bar")); 157 | assert_eq!(root.get_str("good"), Some("stuff")); 158 | assert_eq!(root.get_str("foo1"), None); 159 | } 160 | 161 | #[test] 162 | fn test_3() { 163 | let inp = "top {foo bar baz {} } top2 r "; 164 | let root = Root::parse(inp).unwrap(); 165 | assert_eq!(root.get_str("top.foo"), Some("bar")); 166 | assert_eq!(root.get_str("top.foo.baz"), None); 167 | assert!(root.get_node("top.baz").is_some()); 168 | } 169 | 170 | #[test] 171 | fn test_4() { 172 | let inp = "top \"foo bar\" bot g quote \"{ key val }\""; 173 | let root = Root::parse(inp).unwrap(); 174 | assert_eq!(root.get_str("top"), Some("foo bar")); 175 | assert_eq!(root.get_str("bot"), Some("g")); 176 | assert_eq!(root.get_str("quote"), Some("{ key val }")); 177 | } 178 | 179 | #[test] 180 | fn test_5() { 181 | let inp = "top { a b c { d e } }"; 182 | let root = Root::parse(inp).unwrap(); 183 | let top = root.get_node("top").unwrap(); 184 | 185 | let keys: Vec<_> = top.iter().map(|(k,_)| k).collect(); 186 | assert_eq!(keys, vec!["a", "c"]); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/ir/def.rs: -------------------------------------------------------------------------------- 1 | pub use crate::ir::opcode::Opcode; 2 | use crate::asm::instr; 3 | use crate::ir::sym; 4 | use crate::types::Type; 5 | use crate::util::dvec::{DVec, DVecIndex}; 6 | use std::collections::HashMap; 7 | 8 | // SSA IR Definitions 9 | 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ConstRef(pub usize); 11 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct BlockRef(pub usize); 12 | 13 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 14 | pub enum Ref { 15 | //None, 16 | Const(ConstRef), 17 | Instr(BlockRef, DVecIndex), 18 | Init(instr::Reg), 19 | Block(BlockRef), 20 | Symbol(sym::SymbolRef), 21 | Func(usize), 22 | } 23 | 24 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 25 | pub enum Name { 26 | Reg(instr::Reg), 27 | Var(String), 28 | } 29 | 30 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 31 | pub struct FullName(pub Name, pub usize); 32 | 33 | #[allow(non_snake_case)] 34 | pub mod Attribute { 35 | pub const NONE: u8 = 0; 36 | pub const MAY_ESCAPE: u8 = 1<<0; 37 | pub const STACK_PTR: u8 = 1<<1; 38 | pub const PIN: u8 = 1<<2; 39 | } 40 | 41 | #[derive(Debug, PartialEq, Eq, Clone, Hash)] 42 | pub struct Instr { 43 | pub typ: Type, 44 | pub attrs: u8, 45 | pub opcode: Opcode, 46 | pub operands: Vec, 47 | } 48 | 49 | #[derive(Debug)] 50 | pub struct Block { 51 | pub name: String, 52 | pub defs: HashMap, 53 | pub preds: Vec, 54 | pub instrs: DVec, 55 | pub sealed: bool, // has all predecessors? 56 | pub incomplete_phis: Vec<(Name, Ref)>, 57 | } 58 | 59 | #[derive(Debug)] 60 | pub struct IR { 61 | pub consts: Vec, 62 | pub symbols: sym::SymbolMap, 63 | pub funcs: Vec, 64 | pub names: HashMap, 65 | pub name_next: HashMap, 66 | pub blocks: Vec>, // Optional because dead blocks can be pruned out 67 | } 68 | -------------------------------------------------------------------------------- /src/ir/display.rs: -------------------------------------------------------------------------------- 1 | use crate::asm::instr; 2 | use crate::ir::*; 3 | use std::fmt::{self, Write}; 4 | use std::collections::HashMap; 5 | 6 | impl fmt::Display for Opcode { 7 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 8 | write!(f, "{}", self.as_str()) 9 | } 10 | } 11 | 12 | fn reg_name(r: instr::Reg) -> &'static str { 13 | match r { 14 | instr::Reg::AX => "ax", 15 | instr::Reg::CX => "cx", 16 | instr::Reg::DX => "dx", 17 | instr::Reg::BX => "bx", 18 | instr::Reg::SP => "sp", 19 | instr::Reg::BP => "bp", 20 | instr::Reg::SI => "si", 21 | instr::Reg::DI => "di", 22 | instr::Reg::AL => "al", 23 | instr::Reg::CL => "cl", 24 | instr::Reg::DL => "dl", 25 | instr::Reg::BL => "bl", 26 | instr::Reg::AH => "ah", 27 | instr::Reg::CH => "ch", 28 | instr::Reg::DH => "dh", 29 | instr::Reg::BH => "bh", 30 | instr::Reg::ES => "es", 31 | instr::Reg::CS => "cs", 32 | instr::Reg::SS => "ss", 33 | instr::Reg::DS => "ds", 34 | instr::Reg::IP => "ip", 35 | instr::Reg::FLAGS => "flags", 36 | } 37 | } 38 | 39 | fn attributes_string(attr: u8) -> String { 40 | let mut out = String::new(); 41 | if (attr & Attribute::MAY_ESCAPE) != 0 { 42 | if out.len() > 0 { out += ","; } 43 | out += &format!("may_escape"); 44 | } 45 | if (attr & Attribute::STACK_PTR) != 0 { 46 | if out.len() > 0 { out += ","; } 47 | out += &format!("stack_ptr"); 48 | } 49 | if (attr & Attribute::PIN) != 0 { 50 | if out.len() > 0 { out += ","; } 51 | out += &format!("pin"); 52 | } 53 | out 54 | } 55 | 56 | impl fmt::Display for Name { 57 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 58 | match self { 59 | Name::Reg(r) => write!(f, "{}", reg_name(*r)), 60 | Name::Var(v) => write!(f, "{}", v), 61 | } 62 | } 63 | } 64 | 65 | pub struct Formatter { 66 | map: HashMap, 67 | next: usize, 68 | pub(crate) out: String, // HAX pub(crate) 69 | } 70 | 71 | impl Formatter { 72 | pub fn new() -> Self { 73 | Self { 74 | map: HashMap::new(), 75 | next: 0, 76 | out: String::new(), 77 | } 78 | } 79 | 80 | fn map(&mut self, r: Ref) -> usize { 81 | match self.map.get(&r) { 82 | Some(val) => *val, 83 | None => { 84 | let val = self.next; 85 | self.map.insert(r, val); 86 | self.next += 1; 87 | val 88 | } 89 | } 90 | } 91 | 92 | pub fn finish(self) -> String { 93 | self.out 94 | } 95 | 96 | pub fn ref_string(&mut self, ir: &IR, r: Ref) -> Result { 97 | let mut buf = String::new(); 98 | let f = &mut buf; 99 | 100 | match r { 101 | Ref::Const(ConstRef(num)) => { 102 | let k = ir.consts[num] as i16; 103 | if -1024 <= k && k <= 16 { 104 | write!(f, "#{}", k)?; 105 | } else { 106 | write!(f, "#0x{:x}", k)?; 107 | } 108 | } 109 | Ref::Init(reg) => write!(f, "{}", reg.info().name)?, 110 | Ref::Block(blk) => write!(f, "b{}", blk.0)?, 111 | Ref::Instr(_, _) => { 112 | if let Some(FullName(sym, num)) = ir.names.get(&r) { 113 | write!(f, "{}.{}", sym, num)?; 114 | } else { 115 | write!(f, "t{}", self.map(r))?; 116 | } 117 | } 118 | Ref::Symbol(sym) => write!(f, "{}", sym.name(&ir.symbols))?, 119 | Ref::Func(idx) => write!(f, "{}", ir.funcs[idx])?, 120 | } 121 | 122 | Ok(buf) 123 | } 124 | 125 | pub fn fmt_blkhdr(&mut self, blkref: BlockRef, blk: &Block) -> fmt::Result { 126 | writeln!(&mut self.out, "")?; 127 | write!(&mut self.out, "b{}: (", blkref.0)?; 128 | for (k, p) in blk.preds.iter().enumerate() { 129 | if k != 0 { 130 | write!(&mut self.out, " ")?; 131 | } 132 | write!(&mut self.out, "b{}", p.0)?; 133 | } 134 | writeln!(&mut self.out, ") {}", blk.name)?; 135 | Ok(()) 136 | } 137 | 138 | 139 | pub fn fmt_instr(&mut self, ir: &IR, dst: Ref, instr: &Instr) -> fmt::Result { 140 | let s = self.ref_string(ir, dst)?; 141 | if !instr.opcode.has_no_result() { 142 | write!(&mut self.out, " {:<8} = ", s)?; 143 | } else { 144 | write!(&mut self.out, " {:<11}", "")?; 145 | } 146 | write!(&mut self.out, "{:<8} ", format!("{}", instr.typ))?; 147 | write!(&mut self.out, "{:<10}", instr.opcode.to_string())?; 148 | for oper in &instr.operands { 149 | let s = self.ref_string(ir, *oper)?; 150 | write!(&mut self.out, " {:<20}", s)?; 151 | } 152 | if instr.attrs != 0 { 153 | write!(&mut self.out, " [{}]", attributes_string(instr.attrs))?; 154 | } 155 | writeln!(&mut self.out, "")?; 156 | 157 | Ok(()) 158 | } 159 | 160 | fn fmt_ir(&mut self, ir: &IR) -> fmt::Result { 161 | for bref in ir.iter_blocks() { 162 | let blk = ir.block(bref); 163 | self.fmt_blkhdr(bref, blk)?; 164 | for idx in blk.instrs.range() { 165 | let iref = Ref::Instr(bref, idx); 166 | let instr = &blk.instrs[idx]; 167 | if instr.opcode == Opcode::Nop { continue; } 168 | self.fmt_instr(ir, iref, instr)?; 169 | } 170 | } 171 | 172 | Ok(()) 173 | } 174 | } 175 | 176 | impl fmt::Display for IR { 177 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 178 | let mut r = Formatter::new(); 179 | r.fmt_ir(self)?; 180 | write!(f, "{}", r.finish()) 181 | } 182 | } 183 | 184 | pub fn display_ir_with_uses(ir: &IR) -> Result { 185 | let n_uses = util::compute_uses(ir); 186 | let mut r = Formatter::new(); 187 | for bref in ir.iter_blocks() { 188 | let blk = ir.block(bref); 189 | r.fmt_blkhdr(bref, blk)?; 190 | for idx in blk.instrs.range() { 191 | let iref = Ref::Instr(bref, idx); 192 | let instr = &blk.instrs[idx]; 193 | if instr.opcode == Opcode::Nop { continue; } 194 | 195 | let n = n_uses.get(&iref).unwrap_or(&0); 196 | write!(&mut r.out, "{:<3} | ", n)?; 197 | r.fmt_instr(ir, iref, instr)?; 198 | } 199 | } 200 | Ok(r.finish()) 201 | } 202 | 203 | pub fn instr_to_string(ir: &IR, iref: Ref) -> String { 204 | let mut r = Formatter::new(); 205 | let instr = ir.instr(iref).unwrap(); 206 | r.fmt_instr(ir, iref, instr).unwrap(); 207 | r.finish() 208 | } 209 | -------------------------------------------------------------------------------- /src/ir/fin.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::def::*; 2 | use std::collections::HashMap; 3 | 4 | struct Finalizer { 5 | num: usize, 6 | } 7 | 8 | impl Finalizer { 9 | fn new() -> Self { 10 | Self { num: 0 } 11 | } 12 | 13 | // Insert blocks between jne and phi so that astgen can 14 | // implement phi's in the intermediate block. 15 | // This is required only when a block ends with a jne and 16 | // one or more target block contain phis 17 | fn insert_intermediate_phi_blocks(&mut self, ir: &mut IR) { 18 | for blkref in ir.iter_blocks() { 19 | let r = Ref::Instr(blkref, ir.block(blkref).instrs.last_idx().unwrap()); 20 | let exits = ir.block(blkref).exits(); 21 | if exits.len() <= 1 { continue; } 22 | let mut old_to_new = HashMap::new(); 23 | for (i, exit) in exits.into_iter().enumerate() { 24 | if let Some(new_exit) = old_to_new.get(&exit) { 25 | // Already generated phi block.. rewrite it 26 | ir.instr_mut(r).unwrap().operands[i+1] = Ref::Block(*new_exit); 27 | continue; 28 | } 29 | if target_has_phis(ir, exit) { 30 | let new_exit = self.insert_block(ir, blkref, r, i+1); 31 | old_to_new.insert(exit, new_exit); 32 | } 33 | } 34 | } 35 | } 36 | 37 | fn insert_block(&mut self, ir: &mut IR, blkref: BlockRef, r: Ref, oper_idx: usize) -> BlockRef { 38 | // unique number for block name 39 | let num = self.num; 40 | self.num += 1; 41 | 42 | // fetch the dest_blkref 43 | let instr = ir.instr(r).unwrap(); 44 | let dest_blkref = instr.operands[oper_idx]; 45 | 46 | // generate new block 47 | let mut new_blk = Block::new(&format!("phi_{:04}", num)); 48 | new_blk.sealed = true; 49 | 50 | // have the new block jump to the original destination 51 | new_blk.instrs.push_back(Instr { 52 | typ: crate::types::Type::Void, 53 | attrs: Attribute::NONE, 54 | opcode: Opcode::Jmp, 55 | operands: vec![dest_blkref], 56 | }); 57 | 58 | // add preds to new blk 59 | new_blk.preds.push(blkref); 60 | 61 | // append the block to the ir 62 | let new_blkref = ir.push_block(new_blk); 63 | 64 | // update the jne instruction to jump to the new blk 65 | let instr = ir.instr_mut(r).unwrap(); 66 | let dest_blkref = instr.operands[oper_idx]; 67 | instr.operands[oper_idx] = Ref::Block(new_blkref); 68 | 69 | // update the dest block preds to be the new block instead of the old block 70 | let dest_blk = ir.block_mut(dest_blkref.unwrap_block()); 71 | for pred in &mut dest_blk.preds { 72 | if *pred == blkref { 73 | *pred = new_blkref; 74 | } 75 | } 76 | 77 | new_blkref 78 | } 79 | } 80 | 81 | fn target_has_phis(ir: &IR, b: BlockRef) -> bool { 82 | for r in ir.iter_instrs(b) { 83 | if ir.instr(r).unwrap().opcode == Opcode::Phi { 84 | return true; 85 | } 86 | } 87 | false 88 | } 89 | 90 | pub fn finalize(ir: &mut IR) { 91 | let mut fin = Finalizer::new(); 92 | fin.insert_intermediate_phi_blocks(ir); 93 | } 94 | -------------------------------------------------------------------------------- /src/ir/fuse.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::def::*; 2 | use crate::ir::sym; 3 | use crate::types::Type; 4 | 5 | pub fn fuse_adjacent_writevar16_to_writevar32(ir: &mut IR) { 6 | // FIXME: THIS FUNCTION IS MESSY... CAN WE MAKE IT CLEANER??? 7 | for b in ir.iter_blocks() { 8 | for r in ir.iter_instrs(b) { 9 | 10 | // Find low write16: E.g. 'writevar16 _local_0028 dx.3' where _local_0028 is u32 11 | let low_ref = r; 12 | let low_instr = ir.instr(low_ref).unwrap(); 13 | if low_instr.opcode != Opcode::WriteVar16 { continue; } 14 | let Ref::Symbol(low_symref) = &low_instr.operands[0] else { continue }; 15 | let low_sym = low_symref.def(&ir.symbols); 16 | if low_symref.off() != 0 { continue; } 17 | if low_symref.sz() != 2 { continue; } 18 | if low_sym.size != 4 { continue; } 19 | 20 | // Find high write16: E.g. 'writevar16 _local_0028@2 ax.3' where _local_0028 is u32 21 | let Some(high_ref) = ir.prev_ref_in_block(low_ref) else { continue }; 22 | let high_instr = ir.instr(high_ref).unwrap(); 23 | if high_instr.opcode != Opcode::WriteVar16 { continue; } 24 | let Ref::Symbol(high_symref) = &high_instr.operands[0] else { continue }; 25 | let high_sym = high_symref.def(&ir.symbols); 26 | if high_symref.off() != 2 { continue; } 27 | if high_symref.sz() != 2 { continue; } 28 | if low_sym as *const _ != high_sym as *const _ { continue; } 29 | 30 | // New sequence: Make32 and WriteVar32 31 | let symref = sym::SymbolRef::join_adjacent(&ir.symbols, *low_symref, *high_symref).unwrap(); 32 | 33 | let low_val = low_instr.operands[1]; 34 | let high_val = high_instr.operands[1]; 35 | *ir.instr_mut(high_ref).unwrap() = Instr { 36 | typ: Type::U32, 37 | attrs: Attribute::NONE, 38 | opcode: Opcode::Make32, 39 | operands: vec![high_val, low_val], 40 | }; 41 | *ir.instr_mut(low_ref).unwrap() = Instr { 42 | typ: Type::Void, 43 | attrs: Attribute::MAY_ESCAPE, 44 | opcode: Opcode::WriteVar32, 45 | operands: vec![Ref::Symbol(symref), high_ref], 46 | }; 47 | } 48 | } 49 | } 50 | 51 | fn is_fusable_load16_to_load32(ir: &IR, high_ref: Ref, low_ref: Ref) -> bool { 52 | let Some(high_instr) = ir.instr(high_ref) else { return false }; 53 | let Some(low_instr) = ir.instr(low_ref) else { return false }; 54 | if high_instr.opcode != Opcode::Load16 { return false } // high instr is load16 55 | if low_instr.opcode != Opcode::Load16 { return false } // low instr is load16 56 | if high_instr.operands[0] != low_instr.operands[0] { return false } // matching segments? 57 | 58 | let high_ref_ref = high_instr.operands[1]; 59 | let low_ref_ref = low_instr.operands[1]; 60 | let (high_k, low_k) = match (high_ref_ref, low_ref_ref) { 61 | //(Ref::Const(_), Ref::Const(_)) => (high_ref_ref, low_ref_ref), 62 | (Ref::Instr(_, _), Ref::Instr(_, _)) => { 63 | let high_off = ir.instr(high_ref_ref).unwrap(); 64 | let low_off = ir.instr(low_ref_ref).unwrap(); 65 | if high_off.opcode != Opcode::Add { return false } 66 | 67 | //if false { unreachable!(); 68 | if high_off.operands[0] == low_ref_ref && high_off.operands[1].is_const() { 69 | (ir.lookup_const(high_off.operands[1]).unwrap(), 0) 70 | } else { 71 | if low_off.opcode != Opcode::Add { return false } 72 | if high_off.operands[0] != low_off.operands[0] { return false } 73 | let Ref::Const(_) = &high_off.operands[1] else { return false }; 74 | let Ref::Const(_) = &low_off.operands[1] else { return false }; 75 | ( 76 | ir.lookup_const(high_off.operands[1]).unwrap(), 77 | ir.lookup_const(low_off.operands[1]).unwrap(), 78 | ) 79 | } 80 | } 81 | _ => return false, 82 | }; 83 | 84 | high_k == low_k+2 85 | } 86 | 87 | pub fn fuse_make32_load16_to_load32(ir: &mut IR) { 88 | // FIXME: THIS FUNCTION IS MESSY... CAN WE MAKE IT CLEANER??? 89 | for b in ir.iter_blocks() { 90 | for r in ir.iter_instrs(b) { 91 | let make32_ref = r; 92 | let make32_instr = ir.instr(make32_ref).unwrap(); 93 | if make32_instr.opcode != Opcode::Make32 { continue } 94 | 95 | let high_ref = make32_instr.operands[0]; 96 | let low_ref = make32_instr.operands[1]; 97 | if !is_fusable_load16_to_load32(ir, high_ref, low_ref) { continue } 98 | 99 | // Need to check that the load16's are in the same block and no other memory references 100 | // are between them and the make32 (otherwise we could have aliasing and break everything) 101 | let Ref::Instr(make32_b, _) = make32_ref else { unreachable!() }; 102 | let Ref::Instr(high_b, high_i) = high_ref else { unreachable!() }; 103 | let Ref::Instr(low_b, low_i) = low_ref else { unreachable!() }; 104 | if make32_b != high_b { continue } 105 | if make32_b != low_b { continue } 106 | let start_i = std::cmp::min(high_i, low_i); 107 | let mut cur = Ref::Instr(make32_b, start_i); 108 | let mut allowed = true; 109 | loop { 110 | cur = ir.next_ref_in_block(cur).unwrap(); 111 | if cur == make32_ref { break } 112 | if cur == high_ref || cur == low_ref { continue }; 113 | let instr = ir.instr(cur).unwrap(); 114 | if instr.opcode.is_mem_op() { 115 | allowed = false; 116 | break; 117 | } 118 | } 119 | if !allowed { continue } 120 | 121 | // Okay, do the rewrite! 122 | let low_instr = ir.instr(low_ref).unwrap(); 123 | let (seg, off) = (low_instr.operands[0], low_instr.operands[1]); 124 | *ir.instr_mut(make32_ref).unwrap() = Instr { 125 | typ: Type::U32, 126 | attrs: Attribute::MAY_ESCAPE, 127 | opcode: Opcode::Load32, 128 | operands: vec![seg, off], 129 | }; 130 | //println!("FUSEABLE: {}", crate::ir::display::instr_to_string(ir, make32_ref)); 131 | } 132 | } 133 | } 134 | 135 | pub fn fuse_mem(ir: &mut IR) { 136 | fuse_adjacent_writevar16_to_writevar32(ir); 137 | fuse_make32_load16_to_load32(ir); 138 | } 139 | -------------------------------------------------------------------------------- /src/ir/mod.rs: -------------------------------------------------------------------------------- 1 | mod opcode; 2 | mod def; 3 | mod helpers; 4 | pub use def::*; 5 | 6 | pub mod display; 7 | pub mod build; 8 | pub mod opt; 9 | pub mod fuse; 10 | pub mod sym; 11 | pub mod fin; 12 | pub mod util; 13 | -------------------------------------------------------------------------------- /src/ir/opcode.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 2 | pub enum Opcode { 3 | Nop, 4 | Pin, 5 | Ref, 6 | Phi, 7 | Unimpl, 8 | 9 | Add, 10 | Sub, 11 | Shl, 12 | Shr, // signed 13 | UShr, // unsigned 14 | And, 15 | Or, 16 | Xor, 17 | IMul, // signed 18 | UMul, // unsigned 19 | IDiv, // signed 20 | UDiv, // unsigned 21 | 22 | Neg, 23 | Not, // bitwise 24 | 25 | SignExtTo32, 26 | 27 | Load8, 28 | Load16, 29 | Load32, 30 | Store8, 31 | Store16, 32 | Store32, 33 | ReadVar8, 34 | ReadVar16, 35 | ReadVar32, 36 | WriteVar8, 37 | WriteVar16, 38 | WriteVar32, 39 | ReadArr8, 40 | ReadArr16, 41 | ReadArr32, 42 | WriteArr8, 43 | WriteArr16, 44 | Lower16, // |n: u32| => n as u16 45 | Upper16, // |n: u32| => (n >> 16) as u16 46 | Make32, // |high: u16, low: u16| => (high as u32) << 16 | (low as u32) 47 | 48 | UpdateFlags, 49 | EqFlags, // Maps to: JE / JZ 50 | NeqFlags, // Maps to: JNE / JNZ 51 | GtFlags, // Maps to: JG / JNLE 52 | GeqFlags, // Maps to: JGE / JNL 53 | LtFlags, // Maps to: JL / JNGE 54 | LeqFlags, // Maps to: JLE / JNG 55 | UGtFlags, // Maps to: JA / JNBE 56 | UGeqFlags, // Maps to: JAE / JNB / JNC 57 | ULtFlags, // Maps to: JB / JNAE / JC 58 | ULeqFlags, // Maps to: JBE / JNA 59 | SignFlags, // Maps to: JS and inverted for JNS, 60 | 61 | Eq, // Operator: == (any sign) 62 | Neq, // Operator: != (any sign) 63 | Gt, // Operator: > (signed) 64 | Geq, // Operator: >= (signed) 65 | Lt, // Operator: < (signed) 66 | Leq, // Operator: <= (signed) 67 | UGt, // Operator: > (unsigned) 68 | UGeq, // Operator: >= (unsigned) 69 | ULt, // Operator: < (unsigned) 70 | ULeq, // Operator: <= (unsigned) 71 | Sign, // Is Signed? 72 | NotSign, // Is not signed? 73 | 74 | CallFar, 75 | CallNear, 76 | CallPtr, 77 | CallArgs, 78 | Int, 79 | 80 | RetFar, 81 | RetNear, 82 | 83 | Jmp, 84 | Jne, 85 | JmpTbl, 86 | 87 | // TODO: HMMM.... Better Impl? 88 | AssertEven, 89 | AssertPos, 90 | } 91 | 92 | 93 | impl Opcode { 94 | pub fn as_str(&self) -> &'static str { 95 | match self { 96 | Opcode::Nop => "nop", 97 | Opcode::Pin => "pin", 98 | Opcode::Ref => "ref", 99 | Opcode::Phi => "phi", 100 | Opcode::Unimpl => "unimpl", 101 | Opcode::Sub => "sub", 102 | Opcode::Add => "add", 103 | Opcode::Shl => "shl", 104 | Opcode::Shr => "shr", 105 | Opcode::UShr => "ushr", 106 | Opcode::And => "and", 107 | Opcode::Or => "or", 108 | Opcode::Xor => "xor", 109 | Opcode::IMul => "imul", 110 | Opcode::UMul => "umul", 111 | Opcode::IDiv => "idiv", 112 | Opcode::UDiv => "udiv", 113 | Opcode::Neg => "neg", 114 | Opcode::Not => "not", 115 | Opcode::SignExtTo32 => "signext32", 116 | //Opcode::AddrOf => "addrof", 117 | Opcode::Load8 => "load8", 118 | Opcode::Load16 => "load16", 119 | Opcode::Load32 => "load32", 120 | Opcode::Store8 => "store8", 121 | Opcode::Store16 => "store16", 122 | Opcode::Store32 => "store32", 123 | Opcode::ReadVar8 => "readvar8", 124 | Opcode::ReadVar16 => "readvar16", 125 | Opcode::ReadVar32 => "readvar32", 126 | Opcode::WriteVar8 => "writevar8", 127 | Opcode::WriteVar16 => "writevar16", 128 | Opcode::WriteVar32 => "writevar32", 129 | Opcode::ReadArr8 => "readarr8", 130 | Opcode::ReadArr16 => "readarr16", 131 | Opcode::ReadArr32 => "readarr32", 132 | Opcode::WriteArr8 => "writearr8", 133 | Opcode::WriteArr16 => "writearr16", 134 | Opcode::Lower16 => "lower16", 135 | Opcode::Upper16 => "upper16", 136 | Opcode::Make32 => "make32", 137 | Opcode::UpdateFlags => "updf", 138 | Opcode::EqFlags => "eqf", 139 | Opcode::NeqFlags => "neqf", 140 | Opcode::GtFlags => "gtf", 141 | Opcode::GeqFlags => "geqf", 142 | Opcode::LtFlags => "ltf", 143 | Opcode::LeqFlags => "leqf", 144 | Opcode::UGtFlags => "ugtf", 145 | Opcode::UGeqFlags => "ugeqf", 146 | Opcode::ULtFlags => "ultf", 147 | Opcode::ULeqFlags => "uleqf", 148 | Opcode::SignFlags => "signf", 149 | Opcode::Eq => "eq", 150 | Opcode::Neq => "neq", 151 | Opcode::Gt => "gt", 152 | Opcode::Geq => "geq", 153 | Opcode::Lt => "lt", 154 | Opcode::Leq => "leq", 155 | Opcode::UGt => "ugt", 156 | Opcode::UGeq => "ugeq", 157 | Opcode::ULt => "ult", 158 | Opcode::ULeq => "uleq", 159 | Opcode::Sign => "sign", 160 | Opcode::NotSign => "notsign", 161 | Opcode::CallFar => "callfar", 162 | Opcode::CallNear => "callnear", 163 | Opcode::CallPtr => "callptr", 164 | Opcode::CallArgs => "callargs", 165 | Opcode::Int => "int", 166 | Opcode::RetFar => "retf", 167 | Opcode::RetNear => "retn", 168 | Opcode::Jmp => "jmp", 169 | Opcode::Jne => "jne", 170 | Opcode::JmpTbl => "jmptbl", 171 | 172 | Opcode::AssertEven => "assert_even", 173 | Opcode::AssertPos => "assert_pos", 174 | } 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/ir/util.rs: -------------------------------------------------------------------------------- 1 | use crate::ir; 2 | use std::collections::HashMap; 3 | use std::fmt::Write; 4 | 5 | pub fn compute_uses(ir: &ir::IR) -> HashMap { 6 | let mut n_uses = HashMap::new(); 7 | for b in ir.iter_blocks() { 8 | for r in ir.iter_instrs(b) { 9 | let instr = ir.instr(r).unwrap(); 10 | for oper in &instr.operands { 11 | *n_uses.entry(*oper).or_insert(0) += 1; 12 | } 13 | } 14 | } 15 | n_uses 16 | } 17 | 18 | pub fn gen_graphviz_dotfile(ir: &ir::IR) -> Result { 19 | let mut buf = String::new(); 20 | let f = &mut buf; 21 | writeln!(f, "strict digraph control_flow {{")?; 22 | for b in ir.iter_blocks() { 23 | let blk = ir.block(b); 24 | let src = &blk.name; 25 | 26 | let exits = blk.exits(); 27 | if exits.len() == 0 { // block returns 28 | writeln!(f, " {}_{} -> exit;", src, b.0)?; 29 | continue; 30 | } 31 | for exit in exits { 32 | let exit_name = &ir.block(exit).name; 33 | writeln!(f, " {}_{} -> {}_{};", src, b.0, exit_name, exit.0)?; 34 | } 35 | } 36 | writeln!(f, "}}\n")?; 37 | Ok(buf) 38 | } 39 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate static_assertions as sa; 2 | 3 | // Helper libraries 4 | mod util; 5 | mod bsl; 6 | 7 | // Core support libraries 8 | pub mod binfmt; 9 | pub mod binary; 10 | pub mod region; 11 | pub mod segoff; 12 | pub mod spec; 13 | pub mod config; 14 | pub mod types; 15 | 16 | // Subsystems 17 | pub mod asm; 18 | pub mod ir; 19 | pub mod ast; 20 | pub mod control_flow; 21 | pub mod gen; 22 | 23 | // Main application glue 24 | pub mod app; 25 | -------------------------------------------------------------------------------- /src/region.rs: -------------------------------------------------------------------------------- 1 | use crate::segoff::{Seg, Off, SegOff}; 2 | 3 | pub struct RegionIter<'a> { 4 | mem: &'a [u8], 5 | base_seg: Seg, 6 | base_off: Off, 7 | off: usize 8 | } 9 | 10 | impl<'a> RegionIter<'a> { 11 | pub fn new(mem: &'a [u8], base_addr: SegOff) -> Self { 12 | Self { mem, base_seg: base_addr.seg, base_off: base_addr.off, off: 0 } 13 | } 14 | 15 | pub fn get_checked(&self, addr: SegOff) -> Result { 16 | if addr.seg != self.base_seg { return Err(format!("Mismatching segments")); } 17 | let addr = addr.off.0 as usize; 18 | let base = self.base_off.0 as usize; 19 | if addr < base { return Err(format!("RegionIter access below start of region")); } 20 | if addr >= base + self.mem.len() { return Err(format!("RegionIter access beyond end of region")); } 21 | Ok(self.mem[addr - base]) 22 | } 23 | 24 | pub fn get(&self, addr: SegOff) -> u8 { 25 | self.get_checked(addr).unwrap() 26 | } 27 | 28 | pub fn slice(&self, addr: SegOff, len: u16) -> &'a [u8] { 29 | if addr.seg != self.base_seg { panic!("Mismatching segments"); } 30 | let addr = addr.off.0 as usize; 31 | let base = self.base_off.0 as usize; 32 | let len = len as usize; 33 | if addr < base { panic!("RegionIter access below start of region"); } 34 | if addr+len > base + self.mem.len() { panic!("RegionIter access beyond end of region"); } 35 | &self.mem[addr - base .. addr - base + len] 36 | } 37 | 38 | pub fn peek_checked(&self) -> Result { 39 | self.get_checked(self.addr()) 40 | } 41 | 42 | pub fn peek(&self) -> u8 { 43 | self.peek_checked().unwrap() 44 | } 45 | 46 | pub fn advance(&mut self) { 47 | self.off += 1; 48 | } 49 | 50 | pub fn advance_by(&mut self, n: usize) { 51 | self.off += n; 52 | } 53 | 54 | pub fn fetch(&mut self) -> Result { 55 | let b = self.peek_checked()?; 56 | self.advance(); 57 | Ok(b) 58 | } 59 | 60 | pub fn fetch_sext(&mut self) -> Result { 61 | let b = self.fetch()?; 62 | Ok(b as i8 as i16 as u16) 63 | } 64 | 65 | pub fn fetch_u16(&mut self) -> Result { 66 | let low = self.fetch()?; 67 | let high = self.fetch()?; 68 | Ok((high as u16) << 8 | (low as u16)) 69 | } 70 | 71 | pub fn addr(&self) -> SegOff { 72 | let off: u16 = self.off.try_into().unwrap(); 73 | self.base_addr().add_offset(off) 74 | } 75 | 76 | pub fn reset_addr(&mut self, addr: SegOff) { 77 | assert!(self.base_addr() <= addr && addr < self.end_addr()); 78 | self.off = (addr.off.0 - self.base_off.0) as usize; 79 | } 80 | 81 | pub fn base_addr(&self) -> SegOff { 82 | SegOff { seg: self.base_seg, off: self.base_off } 83 | } 84 | 85 | pub fn end_addr(&self) -> SegOff { 86 | let off: u16 = self.mem.len().try_into().unwrap(); 87 | self.base_addr().add_offset(off) 88 | } 89 | 90 | pub fn bytes_remaining(&self) -> usize { 91 | self.mem.len() - self.off 92 | } 93 | } 94 | 95 | #[cfg(test)] 96 | mod tests { 97 | use super::*; 98 | #[test] 99 | fn test() { 100 | let addr = SegOff { seg: 0, off: 10 }; 101 | let mut b = RegionIter::new(&[0x12, 0x34, 0x56, 0x78, 0x9a], addr); 102 | assert_eq!(b.peek(), 0x12); 103 | assert_eq!(b.peek(), 0x12); 104 | 105 | b.advance(); 106 | assert_eq!(b.peek(), 0x34); 107 | assert_eq!(b.get(addr), 0x12); 108 | 109 | let v = b.fetch(); 110 | assert_eq!(v, 0x34); 111 | assert_eq!(b.peek(), 0x56); 112 | 113 | let v = b.fetch_u16(); 114 | assert_eq!(v, 0x7856); 115 | 116 | assert_eq!(b.peek(), 0x9a); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/segoff.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | use std::fmt; 3 | 4 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 5 | pub enum Seg { 6 | Normal(u16), 7 | Overlay(u16), 8 | } 9 | 10 | impl Seg { 11 | pub fn unwrap_normal(self) -> u16 { 12 | let Seg::Normal(seg) = self else { panic!("Expected Seg::Normal") }; 13 | seg 14 | } 15 | 16 | pub fn unwrap_overlay(self) -> u16 { 17 | let Seg::Overlay(seg) = self else { panic!("Expected Seg::Overlay") }; 18 | seg 19 | } 20 | } 21 | 22 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 23 | pub struct Off(pub u16); 24 | 25 | // FIXME: SegOff probably shouldn't be PartialOrd/Ord 26 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 27 | pub struct SegOff { 28 | pub seg: Seg, 29 | pub off: Off, 30 | } 31 | 32 | impl SegOff { 33 | pub fn abs_normal(&self) -> usize { 34 | (self.seg.unwrap_normal() as usize) * 16 + (self.off.0 as usize) 35 | } 36 | 37 | pub fn is_overlay_addr(&self) -> bool { 38 | matches!(self.seg, Seg::Overlay(_)) 39 | } 40 | 41 | pub fn add_offset(&self, off: u16) -> SegOff { 42 | SegOff { seg: self.seg, off: Off(self.off.0.wrapping_add(off)) } 43 | } 44 | 45 | pub fn offset_to(&self, other: SegOff) -> u16 { 46 | if self.seg != other.seg { panic!("Cannot take difference of different segments"); } 47 | if self.off > other.off { panic!("Not a positive offset"); } 48 | other.off.0 - self.off.0 49 | } 50 | } 51 | 52 | impl FromStr for SegOff { 53 | type Err = String; 54 | fn from_str(s: &str) -> Result { 55 | // FIXME: EXTEND TO PARSE OVERLAYS? "ovrxx:yyyy" 56 | // format: 'xxxx:yyyy' where xxxx and yyyy are 16-bit hexdecimal 57 | let idx = s.find(':').ok_or_else(|| format!("Invalid segoff: '{}'", s))?; 58 | Ok(SegOff { 59 | seg: Seg::Normal(u16::from_str_radix(&s[..idx], 16).map_err(|_| format!("Invalid segoff: '{}'", s))?), 60 | off: Off(u16::from_str_radix(&s[idx+1..], 16).map_err(|_| format!("Invalid segoff: '{}'", s))?), 61 | }) 62 | } 63 | } 64 | 65 | impl fmt::Display for Seg { 66 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 67 | match self { 68 | Seg::Normal(seg) => write!(f, "{:04x}", seg), 69 | Seg::Overlay(seg) => write!(f, "ovr{:02x}", seg), 70 | } 71 | } 72 | } 73 | 74 | impl fmt::Display for Off { 75 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 76 | write!(f, "{:04x}", self.0) 77 | } 78 | } 79 | 80 | impl fmt::Display for SegOff { 81 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 82 | write!(f, "{}:{}", self.seg, self.off) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/spec.rs: -------------------------------------------------------------------------------- 1 | use crate::segoff::{Seg, Off, SegOff}; 2 | use crate::config::{self, Config}; 3 | 4 | pub struct Spec<'a> { 5 | pub func: Option<&'a config::Func>, 6 | pub name: String, 7 | pub start: SegOff, 8 | pub end: SegOff, 9 | } 10 | 11 | impl<'a> Spec<'a> { 12 | pub fn from_config_name(cfg: &'a Config, name: &str) -> Self { 13 | let Some(func) = cfg.func_lookup_by_name(name) else { 14 | panic!("Failed to lookup function named: {}", name); 15 | }; 16 | let (start, end) = match &func.overlay { 17 | None => { // ordinary 18 | let start = func.start; 19 | let Some(end) = func.end else { 20 | panic!("Function has no 'end' addr defined in config"); 21 | }; 22 | (start, end) 23 | } 24 | Some(ovr) => { // overlay 25 | let start = SegOff { seg: Seg::Overlay(ovr.num), off: Off(ovr.start) }; 26 | let end = SegOff { seg: Seg::Overlay(ovr.num), off: Off(ovr.end) }; 27 | (start, end) 28 | } 29 | }; 30 | Spec { 31 | name: name.to_string(), 32 | func: Some(func), 33 | start, 34 | end, 35 | } 36 | } 37 | 38 | pub fn from_start_and_end(start: Option, end: Option) -> Self { 39 | let Some(start) = start else { panic!("No start address provided") }; 40 | let Some(end) = end else { panic!("No end address provided") }; 41 | let name = format!("func_{}_{}", start.seg, start.off); 42 | Self { name, func: None, start, end } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | use crate::config; 2 | use std::fmt; 3 | use std::collections::HashMap; 4 | 5 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 6 | pub struct StructRef { 7 | idx: usize, 8 | size: u16, 9 | } 10 | 11 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 12 | pub enum Type { 13 | Void, U8, U16, U32, I8, I16, I32, 14 | Array(Box, ArraySize), 15 | Ptr(Box), 16 | Struct(StructRef), 17 | Unknown, 18 | } 19 | 20 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 21 | pub enum ArraySize { 22 | Known(usize), 23 | Unknown, 24 | } 25 | 26 | impl Type { 27 | pub fn ptr(base: Type) -> Type { 28 | Type::Ptr(Box::new(base)) 29 | } 30 | 31 | pub fn is_primitive(&self) -> bool { 32 | match self { 33 | Type::Void => true, 34 | Type::U8 => true, 35 | Type::U16 => true, 36 | Type::U32 => true, 37 | Type::I8 => true, 38 | Type::I16 => true, 39 | Type::I32 => true, 40 | Type::Unknown => true, 41 | _ => false, 42 | } 43 | } 44 | 45 | pub fn size_in_bytes(&self) -> Option { 46 | match self { 47 | Type::Void => None, 48 | Type::U8 => Some(1), 49 | Type::U16 => Some(2), 50 | Type::U32 => Some(4), 51 | Type::I8 => Some(1), 52 | Type::I16 => Some(2), 53 | Type::I32 => Some(4), 54 | Type::Array(typ, sz) => { 55 | let elt_sz = typ.size_in_bytes()?; 56 | let count = match sz { 57 | ArraySize::Known(n) => Some(n), 58 | ArraySize::Unknown => None, 59 | }?; 60 | Some(elt_sz * count) 61 | } 62 | Type::Ptr(_) => None, // Not sure what to do here.. on 8086 this a (seg:off) pair, but on a modern machine (like in hydra) it'll be 8 bytes... Hmmm 63 | Type::Struct(r) => Some(r.size as usize), 64 | Type::Unknown => None, 65 | } 66 | } 67 | } 68 | 69 | impl fmt::Display for Type { 70 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 71 | match self { 72 | Type::Void => write!(f, "void"), 73 | Type::U8 => write!(f, "u8"), 74 | Type::U16 => write!(f, "u16"), 75 | Type::U32 => write!(f, "u32"), 76 | Type::I8 => write!(f, "i8"), 77 | Type::I16 => write!(f, "i16"), 78 | Type::I32 => write!(f, "i32"), 79 | Type::Array(typ, sz) => { 80 | write!(f, "{}[", typ)?; 81 | if let ArraySize::Known(n) = sz { 82 | write!(f, "{}", n)?; 83 | } 84 | write!(f, "]") 85 | } 86 | Type::Ptr(base) => write!(f, "{}*", base), 87 | Type::Struct(r) => write!(f, "struct_id_{}", r.idx), 88 | Type::Unknown => write!(f, "?unknown_type?"), 89 | } 90 | } 91 | } 92 | 93 | #[derive(Debug)] 94 | pub struct Builder { 95 | structs: Vec, 96 | basetypes: HashMap, 97 | } 98 | 99 | impl Builder { 100 | pub fn new() -> Self { 101 | let mut basetypes = HashMap::new(); 102 | basetypes.insert("void".to_string(), Type::Void); 103 | basetypes.insert("u8".to_string(), Type::U8); 104 | basetypes.insert("u16".to_string(), Type::U16); 105 | basetypes.insert("u32".to_string(), Type::U32); 106 | basetypes.insert("i8".to_string(), Type::I8); 107 | basetypes.insert("i16".to_string(), Type::I16); 108 | basetypes.insert("i32".to_string(), Type::I32); 109 | 110 | Self { structs: vec![], basetypes } 111 | } 112 | 113 | pub fn append_struct(&mut self, s: &config::Struct) { 114 | let r = StructRef { idx: self.structs.len(), size: s.size }; 115 | self.structs.push(s.clone()); 116 | self.basetypes.insert(s.name.to_string(), Type::Struct(r)); 117 | } 118 | 119 | pub fn lookup_struct(&self, r: StructRef) -> Option<&config::Struct> { 120 | self.structs.get(r.idx) 121 | } 122 | 123 | fn parse_array_type(&self, s: &str) -> Result { 124 | let array_start = s.find('[') 125 | .ok_or_else(|| format!("No opening an array bracket"))?; 126 | let array_end = s.find(']') 127 | .ok_or_else(|| format!("No closing an array bracket"))?; 128 | if array_end != s.len() - 1 { 129 | return Err(format!("Array closing bracket isn't at the end of the type")); 130 | } 131 | 132 | let base_str = &s[..array_start]; 133 | let size_str = &s[array_start+1..array_end]; 134 | 135 | let base = self.parse_type(base_str)?; 136 | let size = if size_str.len() > 0 { 137 | let n: usize = size_str.parse() 138 | .map_err(|_| format!("Cannot parse array size: {}", size_str))?; 139 | ArraySize::Known(n) 140 | } else { 141 | ArraySize::Unknown 142 | }; 143 | 144 | Ok(Type::Array(Box::new(base), size)) 145 | } 146 | 147 | pub fn parse_type(&self, s: &str) -> Result { 148 | if let Some(typ) = self.basetypes.get(s) { 149 | return Ok(typ.clone()) 150 | } 151 | self.parse_array_type(s) 152 | .map_err(|_| format!("Failed to parse type: '{}'", s)) 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/util/arrayvec.rs: -------------------------------------------------------------------------------- 1 | use std::mem::MaybeUninit; 2 | use std::ops::Index; 3 | 4 | #[derive(Clone, Copy)] 5 | pub struct ArrayVec { 6 | mem: [MaybeUninit; N], 7 | len: usize, 8 | } 9 | 10 | impl ArrayVec { 11 | pub fn new() -> Self { 12 | Self { 13 | mem: [MaybeUninit::uninit(); N], 14 | len: 0, 15 | } 16 | } 17 | 18 | pub fn as_slice(&self) -> &[T] { 19 | // SAFETY: we maintain the invariant that `len` specifies the valid 20 | // region of elements 0..len 21 | unsafe { slice_assume_init_ref(&self.mem[..self.len]) } 22 | } 23 | 24 | pub fn len(&self) -> usize { 25 | self.len 26 | } 27 | 28 | pub fn push(&mut self, obj: T) { 29 | if self.len >= N { 30 | panic!("ArrayVec capacity overflow"); 31 | } 32 | self.mem[self.len].write(obj); 33 | self.len += 1; 34 | } 35 | } 36 | 37 | pub unsafe fn slice_assume_init_ref(slice: &[MaybeUninit]) -> &[T] { 38 | // SAFETY: casting `slice` to a `*const [T]` is safe since the caller guarantees that 39 | // `slice` is initialized, and `MaybeUninit` is guaranteed to have the same layout as `T`. 40 | // The pointer obtained is valid since it refers to memory owned by `slice` which is a 41 | // reference and thus guaranteed to be valid for reads. 42 | unsafe { &*(slice as *const [MaybeUninit] as *const [T]) } 43 | } 44 | 45 | impl std::fmt::Debug for ArrayVec { 46 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { 47 | let mut first = true; 48 | write!(f, "[")?; 49 | for val in self.as_slice() { 50 | if !first { 51 | write!(f, ", ")?; 52 | } 53 | first = false; 54 | write!(f, "{:?}", val)?; 55 | } 56 | write!(f, "]") 57 | } 58 | } 59 | 60 | impl Index for ArrayVec { 61 | type Output = T; 62 | fn index(&self, idx: usize) -> &Self::Output { 63 | &self.as_slice()[idx] 64 | } 65 | } 66 | 67 | impl AsRef<[T]> for ArrayVec { 68 | fn as_ref(&self) -> &[T] { 69 | self.as_slice() 70 | } 71 | } 72 | 73 | impl<'a, T: Copy, const N: usize> IntoIterator for &'a ArrayVec { 74 | type Item = &'a T; 75 | type IntoIter = std::slice::Iter<'a, T>; 76 | fn into_iter(self) -> <&'a ArrayVec as IntoIterator>::IntoIter { 77 | self.as_slice().into_iter() 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/util/dvec.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Index, IndexMut, Range}; 2 | 3 | pub type DVecIndex = i64; 4 | 5 | // Double-ended Vector 6 | #[derive(Debug)] 7 | pub struct DVec { 8 | neg: Vec, 9 | pos: Vec, 10 | } 11 | 12 | impl DVec { 13 | pub fn new() -> Self { 14 | Self { 15 | neg: vec![], 16 | pos: vec![], 17 | } 18 | } 19 | 20 | pub fn start(&self) -> DVecIndex { 21 | -(self.neg.len() as i64) 22 | } 23 | 24 | pub fn end(&self) -> DVecIndex { 25 | self.pos.len() as i64 26 | } 27 | 28 | pub fn range(&self) -> Range { 29 | self.start()..self.end() 30 | } 31 | 32 | pub fn empty(&self) -> bool { 33 | self.start() == 0 && self.end() == 0 34 | } 35 | 36 | pub fn push_front(&mut self, val: T) -> DVecIndex { 37 | self.neg.push(val); 38 | self.start() 39 | } 40 | 41 | pub fn push_back(&mut self, val: T) -> DVecIndex { 42 | let idx = self.end(); 43 | self.pos.push(val); 44 | idx 45 | } 46 | 47 | pub fn last_idx(&self) -> Option { 48 | if self.pos.len() > 0 { 49 | Some((self.pos.len()-1) as i64) 50 | } else if self.neg.len() > 0 { 51 | Some(-1) 52 | } else { 53 | None 54 | } 55 | } 56 | 57 | pub fn last(&self) -> Option<&T> { 58 | let idx = self.last_idx()?; 59 | Some(&self[idx]) 60 | } 61 | } 62 | 63 | impl Index for DVec { 64 | type Output = T; 65 | fn index(&self, index: DVecIndex) -> &Self::Output { 66 | if index < 0 { 67 | &self.neg[-(index+1) as usize] 68 | } else { 69 | &self.pos[index as usize] 70 | } 71 | } 72 | } 73 | 74 | impl IndexMut for DVec { 75 | fn index_mut(&mut self, index: DVecIndex) -> &mut Self::Output { 76 | if index < 0 { 77 | &mut self.neg[-(index+1) as usize] 78 | } else { 79 | &mut self.pos[index as usize] 80 | } 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod tests { 86 | use super::*; 87 | #[test] 88 | fn test() { 89 | let mut d = DVec::new(); 90 | assert_eq!(d.start(), 0); 91 | assert_eq!(d.end(), 0); 92 | 93 | d.push_back(4); 94 | assert_eq!(d.start(), 0); 95 | assert_eq!(d.end(), 1); 96 | 97 | d.push_front(3); 98 | assert_eq!(d.start(), -1); 99 | assert_eq!(d.end(), 1); 100 | 101 | d.push_front(5); 102 | assert_eq!(d.start(), -2); 103 | assert_eq!(d.end(), 1); 104 | 105 | let idx: Vec<_> = d.range().collect(); 106 | assert_eq!(idx, vec![-2, -1, 0]); 107 | 108 | let elts: Vec<_> = d.range().map(|i| d[i]).collect(); 109 | assert_eq!(elts, vec![5, 3, 4]); 110 | 111 | d[-1] = 42; 112 | let elts: Vec<_> = d.range().map(|i| d[i]).collect(); 113 | assert_eq!(elts, vec![5, 42, 4]); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod dvec; 2 | pub mod arrayvec; 3 | pub mod parse; 4 | -------------------------------------------------------------------------------- /src/util/parse.rs: -------------------------------------------------------------------------------- 1 | 2 | pub fn hex_u64(s: &str) -> Result { 3 | if s.len() > 16 { 4 | return Err("Hex string too long to fit in u64"); 5 | } 6 | let mut ret: u64 = 0; 7 | for c in s.chars() { 8 | let d = c.to_digit(16).ok_or_else(|| "Non-hexdigit char in string")?; 9 | ret = ret*16 + d as u64; 10 | } 11 | Ok(ret) 12 | } 13 | 14 | #[allow(unused)] 15 | pub fn hex_u32(s: &str) -> Result { 16 | if s.len() > 8 { 17 | return Err("Hex string too long to fit in u32"); 18 | } 19 | let n = hex_u64(s)?; 20 | Ok(n as u32) 21 | } 22 | 23 | pub fn hex_u16(s: &str) -> Result { 24 | if s.len() > 4 { 25 | return Err("Hex string too long to fit in u16"); 26 | } 27 | let n = hex_u64(s)?; 28 | Ok(n as u16) 29 | } 30 | 31 | #[allow(unused)] 32 | pub fn hex_u8(s: &str) -> Result { 33 | if s.len() > 2 { 34 | return Err("Hex string too long to fit in u8"); 35 | } 36 | let n = hex_u64(s)?; 37 | Ok(n as u8) 38 | } 39 | 40 | #[cfg(test)] 41 | mod tests { 42 | use super::*; 43 | #[test] 44 | fn test() { 45 | assert_eq!(hex_u64("5"), Ok(5)); 46 | assert_eq!(hex_u64("a"), Ok(10)); 47 | assert_eq!(hex_u64("deadbeaf"), Ok(3735928495)); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /subprojects/bsl/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | \#*# 3 | *.dSYM/ 4 | -------------------------------------------------------------------------------- /subprojects/bsl/README.md: -------------------------------------------------------------------------------- 1 | BSL: Barebones Specification Language 2 | -------------------------------------------------------------------------------- /subprojects/bsl/bsl.rs: -------------------------------------------------------------------------------- 1 | /* automatically generated by rust-bindgen 0.69.4 (and then modified by hand) */ 2 | use std::os::raw::{c_int, c_char, c_void}; 3 | 4 | pub const __bool_true_false_are_defined: u32 = 1; 5 | pub const true_: u32 = 1; 6 | pub const false_: u32 = 0; 7 | 8 | #[repr(C)] 9 | #[derive(Debug, Copy, Clone)] 10 | pub struct bsl { 11 | _unused: [u8; 0], 12 | } 13 | 14 | pub type bsl_t = bsl; 15 | pub type bsl_iter_t = bsl_iter; 16 | 17 | #[repr(C)] 18 | #[repr(align(16))] 19 | #[derive(Debug, Copy, Clone)] 20 | pub struct bsl_iter { 21 | pub _opaque: [c_char; 32usize], 22 | } 23 | 24 | pub const BSL_SUCCESS: c_int = 0; 25 | pub const BSL_ERR_PARSE: c_int = 1; 26 | 27 | pub const BSL_TYPE_STR: c_int = 0; 28 | pub const BSL_TYPE_NODE: c_int = 1; 29 | 30 | extern "C" { 31 | pub fn bsl_parse_new( 32 | buf: *const c_char, 33 | sz: usize, 34 | opt_err: *mut c_int, 35 | ) -> *mut bsl_t; 36 | } 37 | 38 | extern "C" { 39 | pub fn bsl_delete(bsl: *mut bsl_t); 40 | } 41 | extern "C" { 42 | pub fn bsl_get_generic( 43 | bsl: *mut bsl_t, 44 | key: *const c_char, 45 | opt_type: *mut c_int, 46 | ) -> *mut c_void; 47 | } 48 | extern "C" { 49 | pub fn bsl_get_str( 50 | bsl: *mut bsl_t, 51 | key: *const c_char, 52 | ) -> *const c_char; 53 | } 54 | extern "C" { 55 | pub fn bsl_get_node(bsl: *mut bsl_t, key: *const c_char) -> *mut bsl_t; 56 | } 57 | extern "C" { 58 | pub fn bsl_iter_begin(it: *mut bsl_iter_t, bsl: *mut bsl_t); 59 | } 60 | extern "C" { 61 | pub fn bsl_iter_next( 62 | it: *mut bsl_iter_t, 63 | _type: *mut c_int, 64 | _key: *mut *const c_char, 65 | _val: *mut *mut c_void, 66 | ) -> bool; 67 | } 68 | -------------------------------------------------------------------------------- /subprojects/bsl/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | THISDIR=$(dirname $(realpath $0)) 4 | cd $THISDIR 5 | 6 | if [ ! -d build ]; then 7 | meson build 8 | fi 9 | 10 | (cd build && ninja $@) 11 | -------------------------------------------------------------------------------- /subprojects/bsl/foo.bsl: -------------------------------------------------------------------------------- 1 | my { 2 | data g 3 | hj { 4 | go { to 4 store 7 } 5 | by { to 3 store 9 } 6 | } 7 | } -------------------------------------------------------------------------------- /subprojects/bsl/meson.build: -------------------------------------------------------------------------------- 1 | project('bsl', 'c') 2 | 3 | ################################## 4 | ## Common flags 5 | 6 | flags_c = [ 7 | '-std=c11', 8 | #'-fvisibility=hidden', 9 | ] 10 | 11 | flags_cpp = [ 12 | '-std=c++14', 13 | '-fno-exceptions', '-fno-rtti', 14 | #'-fvisibility=hidden', '-fvisibility-inlines-hidden', 15 | ] 16 | 17 | flags_common = [ 18 | '-fdiagnostics-color=always', 19 | '-D_GNU_SOURCE', 20 | '-Dtypeof=__typeof__', 21 | '-I' + meson.source_root() + '/src', 22 | '-g', '-fPIC', 23 | '-m64', 24 | '-ffast-math', '-fno-associative-math', '-fno-reciprocal-math', 25 | '-fno-strict-aliasing', 26 | ] 27 | 28 | flags_warn = [ 29 | ## Warning enables 30 | '-Wall', '-Werror', '-Wextra', 31 | 32 | ## Warning disables: annoying 33 | '-Wno-unused-function', '-Wno-unused-parameter', 34 | 35 | ## Warning disables: Complains about 'Type t[1] = {{0}}' 36 | '-Wno-missing-field-initializers', 37 | 38 | ## Warning disables: Complains about 'if (val < 0 || val >= LIMIT)' when val is unsigned 39 | '-Wno-type-limits', 40 | 41 | ## Warning disables: Complains about macros that expand to empty if-else bodies 42 | '-Wno-empty-body', 43 | ] 44 | 45 | flags_release = [ 46 | ## Optimization 47 | '-O2', 48 | 49 | ## Warning disables: 50 | '-Wno-unused-variable', '-Wno-unused-but-set-variable', 51 | 52 | ## Warning disables: Seriously, no comments in comments? 53 | '-Wno-comments', 54 | ] 55 | 56 | flags_debug = [ 57 | ] 58 | 59 | link_flags_common = [ 60 | '-fdiagnostics-color=always', 61 | #'-fvisibility=hidden', '-fvisibility-inlines-hidden', 62 | '-lpthread', '-m64' 63 | ] 64 | 65 | if not meson.is_subproject() 66 | add_global_arguments(flags_c + flags_common + flags_warn, language : 'c') 67 | add_global_arguments(flags_cpp + flags_common + flags_warn, language : 'cpp') 68 | 69 | add_global_link_arguments(link_flags_common, language : 'c') 70 | add_global_link_arguments(link_flags_common, language : 'cpp') 71 | 72 | ## Release 73 | add_global_arguments(flags_release, language : 'c') 74 | add_global_arguments(flags_release, language : 'cpp') 75 | 76 | ## Debug 77 | # add_global_arguments(flags_debug, language : 'c') 78 | # add_global_arguments(flags_debug, language : 'cpp') 79 | endif 80 | 81 | ################################## 82 | ## Definitions 83 | 84 | subdir('src') 85 | 86 | ################################## 87 | ## Dependency definition 88 | 89 | libbsl_dep = declare_dependency(include_directories : 'src', link_with : libbsl) 90 | -------------------------------------------------------------------------------- /subprojects/bsl/src/app/bsl_check.c: -------------------------------------------------------------------------------- 1 | #include "bsl/bsl.h" 2 | #include 3 | #include 4 | 5 | #define FAIL(...) do { fprintf(stderr, "FAIL: "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(42); } while(0) 6 | 7 | static inline char *file_read(const char *name, size_t *_len) 8 | { 9 | FILE *fp = fopen(name, "r"); 10 | if (!fp) FAIL("Failed to open file: %s", name); 11 | 12 | fseek(fp, 0, SEEK_END); 13 | size_t len = ftell(fp); 14 | fseek(fp, 0, SEEK_SET); 15 | 16 | char *mem = malloc(len); 17 | if (!mem) FAIL("Failed to allocate file buffer"); 18 | 19 | size_t n = fread(mem, 1, len, fp); 20 | if (n != len) FAIL("Failed to read everything from file: %s", name); 21 | 22 | fclose(fp); 23 | 24 | *_len = len; 25 | return mem; 26 | } 27 | 28 | 29 | int main(int argc, char *argv[]) 30 | { 31 | if (argc != 2) { 32 | fprintf(stderr, "usage: %s \n", argv[0]); 33 | return 1; 34 | } 35 | const char *filename = argv[1]; 36 | 37 | size_t data_len; 38 | char *data = file_read(filename, &data_len); 39 | 40 | bsl_t *b = bsl_parse_new(data, data_len, NULL); 41 | 42 | int ret = 0; 43 | if (!b) { 44 | fprintf(stderr, "Failed to parse bsl from '%s'\n", filename); 45 | ret = 1; 46 | } 47 | 48 | if (b) bsl_delete(b); 49 | free(data); 50 | 51 | return ret; 52 | } 53 | -------------------------------------------------------------------------------- /subprojects/bsl/src/app/meson.build: -------------------------------------------------------------------------------- 1 | 2 | executable('bsl_check', 'bsl_check.c', link_with: libbsl) 3 | -------------------------------------------------------------------------------- /subprojects/bsl/src/bsl/bsl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* BSL: Barebones Specification Language */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | typedef struct bsl bsl_t; 10 | typedef struct bsl_iter bsl_iter_t; 11 | struct __attribute__((aligned(16))) bsl_iter { char _opaque[32]; }; 12 | 13 | enum { 14 | BSL_SUCCESS, 15 | BSL_ERR_PARSE, 16 | }; 17 | 18 | enum { 19 | BSL_TYPE_STR = 0, // char * 20 | BSL_TYPE_NODE = 1, // bsl_t * 21 | }; 22 | 23 | bsl_t * bsl_parse_new(const char *buf, size_t sz, int *opt_err); 24 | void bsl_delete(bsl_t *bsl); 25 | 26 | void * bsl_get_generic(bsl_t *bsl, const char *key, int *opt_type); 27 | const char * bsl_get_str(bsl_t *bsl, const char *key); 28 | bsl_t * bsl_get_node(bsl_t *bsl, const char *key); 29 | 30 | void bsl_iter_begin(bsl_iter_t *it, bsl_t *bsl); 31 | bool bsl_iter_next(bsl_iter_t *it, int *_type, const char **_key, void **_val); 32 | -------------------------------------------------------------------------------- /subprojects/bsl/src/bsl/meson.build: -------------------------------------------------------------------------------- 1 | 2 | libbsl = static_library('bsl', 'bsl.c') 3 | test('test_bsl', executable('test_bsl', 'test_bsl.c', link_with: libbsl)) 4 | -------------------------------------------------------------------------------- /subprojects/bsl/src/bsl/test_bsl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "bsl.h" 6 | 7 | #define TEST_FAIL(...) do { fprintf(stderr, "TEST FAIL: "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); abort(); } while(0) 8 | 9 | #define PARSE(s) ({ \ 10 | bsl_t *b = bsl_parse_new(s, strlen(s), NULL); \ 11 | if (!b) TEST_FAIL("'%s'", s); \ 12 | b; }) 13 | #define CLEANUP(b) bsl_delete(b) 14 | 15 | #define GET(b, k, v) get_helper(b, k, v, true) 16 | #define GET_NODE(b, k) get_node_helper(b, k, true) 17 | #define GET_FAIL(b, k) get_helper(b, k, "", false) 18 | 19 | static inline void get_helper(bsl_t *b, const char *key, const char *exp_val, bool succeed) 20 | { 21 | const char *val = bsl_get_str(b, key); 22 | if (succeed) { 23 | if (!val) TEST_FAIL("Failed to get string key: '%s'", key); 24 | if (0 != strcmp(val, exp_val)) TEST_FAIL("Mismatch value: expected '%s', got '%s'", exp_val, val); 25 | } else { 26 | if (val) TEST_FAIL("Expected failure, but got success on key: '%s'", key); 27 | } 28 | } 29 | 30 | static inline void get_node_helper(bsl_t *b, const char *key, bool succeed) 31 | { 32 | bsl_t *val = bsl_get_node(b, key); 33 | if (succeed) { 34 | if (!val) TEST_FAIL("Failed to get node key: '%s'", key); 35 | } else { 36 | if (val) TEST_FAIL("Expected failure, but got success on key: '%s'", key); 37 | } 38 | } 39 | 40 | static void test_1(void) 41 | { 42 | bsl_t *b = PARSE("foo bar"); 43 | GET(b, "foo", "bar"); 44 | GET_FAIL(b, "foo1"); 45 | CLEANUP(b); 46 | } 47 | 48 | static void test_2(void) 49 | { 50 | bsl_t *b = PARSE("foo bar good stuff "); 51 | GET(b, "foo", "bar"); 52 | GET(b, "good", "stuff"); 53 | GET_FAIL(b, "foo1"); 54 | CLEANUP(b); 55 | } 56 | 57 | static void test_3(void) 58 | { 59 | bsl_t *b = PARSE("top {foo bar baz {} } top2 r "); 60 | GET(b, "top.foo", "bar"); 61 | GET_FAIL(b, "top.foo.baz"); 62 | GET_NODE(b, "top.baz"); 63 | GET(b, "top2", "r"); 64 | CLEANUP(b); 65 | } 66 | 67 | static void test_4(void) 68 | { 69 | bsl_t *b = PARSE("top \"foo bar\" bot g quote \"{ key val }\""); 70 | GET(b, "top", "foo bar"); 71 | GET(b, "bot", "g"); 72 | GET(b, "quote", "{ key val }"); 73 | CLEANUP(b); 74 | } 75 | 76 | int main(void) 77 | { 78 | test_1(); 79 | test_2(); 80 | test_3(); 81 | test_4(); 82 | } 83 | -------------------------------------------------------------------------------- /subprojects/bsl/src/meson.build: -------------------------------------------------------------------------------- 1 | 2 | subdir('bsl') 3 | 4 | if not meson.is_subproject() 5 | subdir('app') 6 | endif 7 | -------------------------------------------------------------------------------- /subprojects/bsl/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | THISDIR=$(dirname $(realpath $0)) 4 | cd $THISDIR 5 | 6 | if [ ! -d build ]; then 7 | meson build 8 | fi 9 | 10 | (cd build && meson test) 11 | --------------------------------------------------------------------------------