├── .cargo └── config.toml ├── .github └── workflows │ ├── build.yml │ ├── crates.yml │ └── rust.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Cross.toml ├── LICENSE ├── README.md ├── arch └── x64 │ ├── build-vmimage │ ├── .gitignore │ ├── Cargo.toml │ ├── src │ │ └── main.rs │ └── vmimage-x86-64 │ │ ├── .cargo │ │ └── config.toml │ │ ├── .gitignore │ │ ├── Cargo.lock │ │ ├── Cargo.toml │ │ ├── gdb.sh │ │ ├── link.x │ │ ├── src │ │ ├── allocator.rs │ │ ├── gdt.rs │ │ ├── interrupts.rs │ │ ├── lib.rs │ │ ├── main.rs │ │ ├── memory.rs │ │ ├── observer.rs │ │ ├── pci.rs │ │ ├── queue.rs │ │ ├── serial.rs │ │ ├── timer.rs │ │ └── userspace.rs │ │ └── tests │ │ ├── basic_boot.rs │ │ ├── heap_allocation.rs │ │ ├── should_panic.rs │ │ └── stack_overflow.rs │ ├── liblisa-x64-observer-shmqueue │ ├── Cargo.toml │ └── src │ │ ├── frame │ │ ├── command.rs │ │ ├── control.rs │ │ └── mod.rs │ │ ├── lib.rs │ │ ├── queue.rs │ │ └── regs.rs │ └── liblisa-x64-observer │ ├── Cargo.toml │ ├── benches │ ├── oracle.rs │ └── vm.rs │ ├── examples │ ├── multitest.rs │ ├── test-qemu-mem-spdi.rs │ ├── test-qemu-spdi.rs │ ├── test.rs │ └── undocumented.rs │ ├── image │ └── .gitignore │ └── src │ ├── lib.rs │ ├── selftest.rs │ └── vm.rs ├── cli ├── liblisa-libcli │ ├── Cargo.toml │ └── src │ │ ├── detect_changes.rs │ │ ├── enumerate.rs │ │ ├── infer_accesses.rs │ │ ├── infer_dataflows.rs │ │ ├── infer_encoding.rs │ │ ├── infer_validity.rs │ │ ├── lib.rs │ │ ├── observe.rs │ │ ├── quick_enumerate.rs │ │ ├── synthesize.rs │ │ ├── synthesize_encoding.rs │ │ └── threadpool │ │ ├── cache.rs │ │ ├── cpu.rs │ │ ├── enumeration.rs │ │ ├── mod.rs │ │ ├── oracle.rs │ │ ├── synthesis.rs │ │ └── work.rs ├── liblisa-semantics-tool │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── arch_compare.rs │ │ ├── main.rs │ │ ├── merge.rs │ │ ├── progress.rs │ │ └── server.rs └── liblisa-x64 │ ├── Cargo.toml │ └── src │ └── main.rs ├── crates.io_version_check.sh ├── crossbuild ├── arm64 │ └── Dockerfile └── x64-stretch │ ├── .gitignore │ ├── build.sh │ └── docker │ └── Dockerfile ├── github_artifact_upload.sh ├── liblisa-enc ├── Cargo.toml ├── benches │ ├── accesses.rs │ ├── dataflow.rs │ ├── observe.rs │ ├── skip.rs │ └── validity.rs └── src │ ├── accesses │ ├── computation.rs │ └── mod.rs │ ├── cache.rs │ ├── changes │ ├── addrs.rs │ ├── imm.rs │ ├── inputs.rs │ ├── mod.rs │ └── outputs.rs │ ├── cleanup │ ├── accesses.rs │ ├── bits.rs │ ├── dontcare.rs │ ├── generalizations.rs │ └── mod.rs │ ├── dataflow │ ├── analyzer.rs │ ├── flow.rs │ ├── fuzz.rs │ ├── mod.rs │ ├── results.rs │ └── spec.rs │ ├── encoding.rs │ ├── lib.rs │ ├── skip │ ├── mod.rs │ ├── random_search.rs │ └── tunnel.rs │ └── validity.rs ├── liblisa-synth ├── Cargo.toml ├── benches │ ├── template_synthesis.rs │ └── term_searcher.rs └── src │ ├── cond │ ├── cache.rs │ ├── caselist.rs │ ├── casemap.rs │ ├── combine_simple.rs │ ├── combiner.rs │ ├── input_hash.rs │ ├── isomorphisms.rs │ ├── mod.rs │ ├── switch.rs │ ├── synthesizer.rs │ └── transitions.rs │ ├── gen.rs │ ├── lib.rs │ ├── normalizer.rs │ ├── output.rs │ ├── predicate.rs │ ├── search │ ├── exprsearcher.rs │ ├── mod.rs │ ├── searcher.rs │ └── termsearcher.rs │ ├── synthesis_loop.rs │ ├── templates │ ├── mod.rs │ ├── normalize_filter.rs │ ├── ordering.rs │ ├── preprocess.rs │ └── symexec.rs │ ├── tree │ ├── expr_finder │ │ ├── bitmap_mcs.rs │ │ ├── greedy.rs │ │ ├── mcs.rs │ │ └── mod.rs │ ├── mapping.rs │ ├── mod.rs │ └── synthesizer.rs │ ├── utils │ ├── delta_vec.rs │ └── mod.rs │ └── write_order.rs ├── liblisa ├── Cargo.toml ├── README.md ├── benches │ ├── bitmap.rs │ ├── find-differences.rs │ ├── instantiation.rs │ ├── randomization.rs │ ├── system-state.rs │ ├── terms.rs │ └── utils.rs ├── examples │ └── liblisa-encoding-to-z3.rs └── src │ ├── arch │ ├── fake.rs │ ├── mod.rs │ ├── scope.rs │ ├── undef.rs │ └── x64 │ │ ├── disasm.rs │ │ ├── mod.rs │ │ └── undef │ │ ├── mod.rs │ │ └── xed_convert.rs │ ├── compare │ ├── addresses.rs │ ├── computations.rs │ ├── group.rs │ ├── mapping.rs │ ├── mod.rs │ ├── rows.rs │ ├── split.rs │ └── summary.rs │ ├── encoding │ ├── bitpattern │ │ ├── locs.rs │ │ └── mod.rs │ ├── dataflows │ │ ├── accesses.rs │ │ ├── address_computation.rs │ │ ├── inputs.rs │ │ ├── locs.rs │ │ └── mod.rs │ ├── display.rs │ ├── indexed.rs │ ├── mcs.rs │ ├── merge │ │ ├── mod.rs │ │ ├── semantical.rs │ │ └── structural.rs │ └── mod.rs │ ├── instr │ ├── counter.rs │ ├── extended.rs │ ├── filter.rs │ ├── filter_mcs.rs │ ├── map.rs │ ├── mod.rs │ ├── select_covering_set8.txt │ ├── set.rs │ └── tree.rs │ ├── lib.rs │ ├── oracle │ ├── careful.rs │ ├── counter.rs │ ├── iter.rs │ ├── mod.rs │ └── verifier.rs │ ├── semantics │ ├── default │ │ ├── builder.rs │ │ ├── codegen │ │ │ ├── mod.rs │ │ │ ├── sexpr.rs │ │ │ └── smt.rs │ │ ├── computation.rs │ │ ├── mod.rs │ │ ├── ops.rs │ │ └── smtgen.rs │ └── mod.rs │ ├── smt │ ├── cache.rs │ ├── mod.rs │ ├── solver.rs │ ├── tree.rs │ └── z3 │ │ ├── mod.rs │ │ └── tests │ │ ├── computations.rs │ │ ├── equivalence.rs │ │ └── mod.rs │ ├── state │ ├── addr.rs │ ├── byteview.rs │ ├── jit │ │ ├── complex.rs │ │ ├── gpreg.rs │ │ ├── mod.rs │ │ └── simple.rs │ ├── locs.rs │ ├── memory.rs │ ├── mod.rs │ ├── random │ │ ├── mod.rs │ │ └── value.rs │ └── split_dests.rs │ ├── utils │ ├── bitmap │ │ ├── fixed.rs │ │ ├── growing.rs │ │ ├── mod.rs │ │ └── tiny.rs │ ├── cmov.rs │ ├── iter.rs │ ├── matrix.rs │ ├── mcs │ │ ├── bfs.rs │ │ └── mod.rs │ ├── min_cover_with_exclusions.rs │ ├── minisat.rs │ └── mod.rs │ └── value.rs └── rustfmt.toml /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'*'] 2 | rustflags = ["-C", "link-arg=-fuse-ld=lld"] 3 | 4 | [target.x86_64-unknown-linux-gnu] 5 | rustflags = ["-C", "target-feature=+sse3,+avx,+avx2,+fma"] 6 | 7 | [target.x86_64-unknown-linux-musl] 8 | rustflags = ["-C", "target-feature=+sse3,+avx,+avx2,+fma"] 9 | 10 | [unstable] 11 | bindeps = true -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [created] 4 | name: Build release binaries 5 | jobs: 6 | build: 7 | name: Create release artifacts 8 | runs-on: ubuntu-20.04 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: Install rust nightly 12 | run: rustup update nightly-2025-01-05 && rustup default nightly-2025-01-05 && rustup component add rustfmt && rustup component add clippy && rustup component add rust-src && rustup component add llvm-tools 13 | - name: Install cross 14 | run: cargo install cross --git https://github.com/cross-rs/cross 15 | - name: Build vmimage 16 | run: cargo run -r --bin build-vmimage 17 | - name: Build (x86_64) 18 | run: cargo build -r --bin liblisa-semantics-tool --bin liblisa-x64 19 | - name: Upload liblisa-x64 (x64) 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | RELEASE_ID: ${{ needs.create_release.outputs.id }} 23 | FILE: 'target/release/liblisa-x64' 24 | FILENAME: 'liblisa-x64' 25 | run: ./github_artifact_upload.sh 26 | - name: Upload liblisa-semantics-tool (x64) 27 | env: 28 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 29 | RELEASE_ID: ${{ needs.create_release.outputs.id }} 30 | FILE: 'target/release/liblisa-semantics-tool' 31 | FILENAME: 'liblisa-semantics-tool-x64' 32 | run: ./github_artifact_upload.sh 33 | - name: Build (arm64) 34 | run: cross build -r --target aarch64-unknown-linux-gnu --bin liblisa-semantics-tool 35 | - name: Upload liblisa-semantics-tool (arm64) 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | RELEASE_ID: ${{ needs.create_release.outputs.id }} 39 | FILE: 'target/aarch64-unknown-linux-gnu/release/liblisa-semantics-tool' 40 | FILENAME: 'liblisa-semantics-tool-arm64' 41 | run: ./github_artifact_upload.sh -------------------------------------------------------------------------------- /.github/workflows/crates.yml: -------------------------------------------------------------------------------- 1 | on: workflow_dispatch 2 | name: Publish to crates.io 3 | jobs: 4 | build: 5 | name: Publish to crates.io 6 | runs-on: ubuntu-20.04 7 | steps: 8 | - uses: actions/checkout@v4 9 | - name: Install rust nightly 10 | run: rustup update nightly-2025-01-05 && rustup default nightly-2025-01-05 && rustup component add rustfmt && rustup component add clippy && rustup component add rust-src && rustup component add llvm-tools 11 | - name: Build vmimage 12 | run: cargo run -r --bin build-vmimage 13 | - name: Package (liblisa) 14 | run: ./crates.io_version_check.sh liblisa || cargo publish -p liblisa --token ${{ secrets.CRATES_IO_API_TOKEN }} 15 | - name: Package (liblisa-x64-observer-shmqueue) 16 | run: ./crates.io_version_check.sh liblisa-x64-observer-shmqueue || cargo publish -p liblisa-x64-observer-shmqueue --token ${{ secrets.CRATES_IO_API_TOKEN }} 17 | - name: Package (liblisa-x64-observer) 18 | run: ./crates.io_version_check.sh liblisa-x64-observer || cargo publish -p liblisa-x64-observer --token ${{ secrets.CRATES_IO_API_TOKEN }} --allow-dirty 19 | - name: Package (liblisa-enc) 20 | run: ./crates.io_version_check.sh liblisa-enc || cargo publish -p liblisa-enc --token ${{ secrets.CRATES_IO_API_TOKEN }} 21 | - name: Package (liblisa-synth) 22 | run: ./crates.io_version_check.sh liblisa-synth || cargo publish -p liblisa-synth --token ${{ secrets.CRATES_IO_API_TOKEN }} 23 | - name: Package (liblisa-semantics-tool) 24 | run: ./crates.io_version_check.sh liblisa-semantics-tool || cargo publish -p liblisa-semantics-tool --token ${{ secrets.CRATES_IO_API_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Install rust nightly 17 | run: rustup update nightly-2025-01-05 && rustup default nightly-2025-01-05 && rustup component add rustfmt && rustup component add clippy && rustup component add rust-src && rustup component add llvm-tools 18 | - uses: Swatinem/rust-cache@v2 19 | - name: Build vmimage 20 | run: cargo run -r --bin build-vmimage 21 | - name: Clippy 22 | run: cargo clippy --no-deps 23 | - name: Check formatting 24 | run: cargo fmt --check 25 | - name: Build 26 | run: cargo build --verbose 27 | - name: Run tests 28 | run: cargo test -p liblisa # TODO: Run tests for the other crates as well 29 | publish-docs: 30 | runs-on: ubuntu-latest 31 | permissions: 32 | contents: read 33 | deployments: write 34 | name: Publish to Cloudflare Pages 35 | steps: 36 | - name: Checkout 37 | uses: actions/checkout@v3 38 | 39 | - name: Install rust nightly 40 | run: rustup update nightly-2025-01-05 && rustup default nightly-2025-01-05 && rustup component add rustfmt && rustup component add clippy && rustup component add rust-src && rustup component add llvm-tools 41 | - uses: Swatinem/rust-cache@v2 42 | 43 | - name: Build documentation 44 | run: cargo doc --workspace --lib --no-deps 45 | 46 | - name: Add redirect 47 | run: echo "/ /liblisa/ 302" > target/doc/_redirects 48 | 49 | - name: Publish to Cloudflare Pages 50 | uses: cloudflare/pages-action@v1 51 | with: 52 | apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} 53 | accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} 54 | projectName: liblisa-docs 55 | directory: target/doc/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | perf.data 3 | perf.data.old 4 | .vscode 5 | .gdb_history 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | # Main libraries 5 | "liblisa", 6 | "liblisa-enc", 7 | "liblisa-synth", 8 | 9 | # Architecture-specific 10 | "arch/x64/liblisa-x64-observer", 11 | "arch/x64/liblisa-x64-observer-shmqueue", 12 | 13 | # CLI tools 14 | "cli/liblisa-x64", 15 | "cli/liblisa-libcli", 16 | "cli/liblisa-semantics-tool", "arch/x64/build-vmimage", 17 | ] 18 | 19 | [profile.dev] 20 | opt-level = 2 21 | 22 | [profile.release] 23 | lto = "fat" 24 | debug = true 25 | codegen-units = 1 26 | opt-level = 3 27 | 28 | [profile.test] 29 | opt-level = 2 30 | 31 | [profile.dev.package.liblisa-x64-observer] 32 | opt-level = 3 33 | -------------------------------------------------------------------------------- /Cross.toml: -------------------------------------------------------------------------------- 1 | [target.aarch64-unknown-linux-gnu.dockerfile] 2 | file = "./crossbuild/arm64/Dockerfile" 3 | context = "./crossbuild/arm64/" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libLISA 2 | `libLISA` is a library for automatically discovering and analyzing CPU instructions. 3 | It relies on minimal human input: only a definition of CPU state and a CPU observer are required to be implemented. 4 | 5 | ## Results 6 | We have analyzed 5 different x86-64 architectures. 7 | You can download the generated semantics [here](https://osf.io/2hfq9/?view_only=a9fb6f0d639b46a287b0ade9f293b249). 8 | 9 | ## Using the semantics 10 | The easiest way to use the semantics is to use the `liblisa-semantics-tool`. 11 | This tool provides various ways to access the semantics. 12 | One of these, the "semantics server", makes the semantics available over stdin/stdout. 13 | 14 | The semantics server can be started with (assuming `encodings.json` is the path to the semantics): 15 | 16 | ```sh 17 | cargo run -r --bin liblisa-semantics-tool -- server encodings.json 18 | ``` 19 | 20 | When writing a hexadecimal instruction followed by a newline to stdin, the semantics server will instantiate a matching encoding and output easy-to-parse JSON semantics for this specific instruction. 21 | 22 | The semantics are stored as JSON files. 23 | To aid parsing, the schema can be obtained by running `cargo run --bin liblisa-semantics-tool -- schema`. 24 | Libraries are provided to load and manipulate the semantics using Rust. 25 | The semantics can be loaded using the `serde_json` crate. This can be done as follows: 26 | 27 | ```rust 28 | let file = BufReader::new(File::open("semantics.json")?); 29 | let semantics: Vec> = serde_json::from_reader(file)?; 30 | ``` 31 | 32 | # Project structure 33 | The project is split into several crates: 34 | 35 | * `liblisa`: definitions of CPU state, ISAs, encodings, dataflows and other core components of libLISA. 36 | * `liblisa-enc`: encoding analysis. 37 | * `liblisa-synth`: semantics synthesis. 38 | * `cli/liblisa-libcli`: the generic analysis CLI. It is instantiated by: 39 | * `cli/liblisa-x64` 40 | * `arch`: folder that contains architecture-specific observers. 41 | 42 | # License 43 | The code in this repository is licensed under the AGPLv3. -------------------------------------------------------------------------------- /arch/x64/build-vmimage/.gitignore: -------------------------------------------------------------------------------- 1 | image/ -------------------------------------------------------------------------------- /arch/x64/build-vmimage/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "build-vmimage" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | bootloader = { version = "0.11.7" } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | use std::process::Command; 3 | 4 | fn main() { 5 | let cargo_binary = "cargo"; 6 | println!("Cargo binary: {cargo_binary}"); 7 | let mut cmd = Command::new(cargo_binary); 8 | cmd.arg("build"); 9 | 10 | // TODO: Generate a separate debug image 11 | cmd.arg("--release"); 12 | 13 | cmd.args(["--target", "x86_64-unknown-none"]); 14 | cmd.args(["--target-dir", "../image"]); 15 | cmd.args(["-Z", "build-std=core,alloc"]); 16 | 17 | let dir = PathBuf::from(&std::env::var("CARGO_MANIFEST_DIR").unwrap()).join("vmimage-x86-64"); 18 | 19 | cmd.current_dir(&dir); 20 | let to_remove = std::env::vars() 21 | // .inspect(|x| println!("Current env: {x:?}")) 22 | .map(|(k, _)| k) 23 | .filter(|k| k.starts_with("CARGO") || k.starts_with("RUST")) 24 | .collect::>(); 25 | for x in to_remove { 26 | cmd.env_remove(x); 27 | } 28 | 29 | let root = std::env::current_dir().unwrap(); 30 | let root = root.display(); 31 | 32 | let home = std::env::var("HOME").unwrap(); 33 | let rustflags = format!( 34 | "-Clink-arg=-Tlink.x -Ccode-model=large -Crelocation-model=static -Ctarget-feature=-mmx,-sse,+soft-float --remap-path-prefix {root}=. --remap-path-prefix {home}=.." 35 | ); 36 | cmd.env("RUSTFLAGS", rustflags); 37 | 38 | let status = cmd.spawn().unwrap().wait().unwrap(); 39 | 40 | assert!(status.success()); 41 | 42 | // set by cargo, build scripts should use this directory for output files 43 | let out_dir = PathBuf::from("arch/x64/liblisa-x64-observer/image"); 44 | 45 | // TODO: Use cargo artifacts once we're able to specify rustflags in the dependency. 46 | let kernel = dir 47 | .parent() 48 | .unwrap() 49 | .join("image") 50 | .join("x86_64-unknown-none") 51 | .join("release") 52 | .join("vmimage-x86-64"); 53 | 54 | std::fs::create_dir_all(&out_dir).unwrap(); 55 | 56 | // create a BIOS disk image 57 | let bios_path = out_dir.join("bootdisk.img"); 58 | bootloader::BiosBoot::new(&kernel).create_disk_image(&bios_path).unwrap(); 59 | } 60 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(target_os = "none")'] 2 | rustflags = [ 3 | "-C", "link-arg=-Tlink.x", 4 | "-C", "target-feature=-mmx,-sse,+soft-float", 5 | "-C", "link-arg=--image-base=0x00001eecb3620000", 6 | "-C", "code-model=large", 7 | "-C", "relocation-model=static", 8 | ] -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "vmimage-x86-64" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | bootloader_api = { version = "0.11.7" } 8 | volatile = "0.2.6" 9 | spin = "0.5.2" 10 | x86_64 = "0.14.2" 11 | uart_16550 = "0.2.0" 12 | pic8259 = "0.10.1" 13 | pc-keyboard = "0.5.0" 14 | linked_list_allocator = "0.9.0" 15 | lazy_static = { version = "1.0", features = ["spin_no_std"] } 16 | qemu-exit = "3.0.1" 17 | pci_types = "0.2" 18 | memoffset = { version = "0.9", features = ["unstable_const"] } 19 | arr_macro = "0.1.3" 20 | liblisa-x64-observer-shmqueue = { version = "0.1.0", path = "../../liblisa-x64-observer-shmqueue" } 21 | arrayvec = { version = "0.7", default-features = false } 22 | 23 | [workspace] -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/gdb.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | gdb -ex "target remote localhost:1234" -ex "file $1" 4 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/link.x: -------------------------------------------------------------------------------- 1 | EXTERN(DATA_END); 2 | 3 | SECTIONS 4 | { 5 | . = 0x00001eecb3620000; 6 | 7 | .text : ALIGN(0x1000) 8 | { 9 | *(.text .text* .ltext .ltext*) 10 | } 11 | 12 | .bootloader : ALIGN(0x1000) 13 | { 14 | *(.bootloader) 15 | } 16 | 17 | .data.rel.ro : ALIGN(0x1000) 18 | { 19 | *(.data.rel.ro*) 20 | } 21 | 22 | .got : ALIGN(0x1000) 23 | { 24 | *(.got .got*) 25 | } 26 | 27 | .data : ALIGN(0x1000) 28 | { 29 | *(.data .data.*) 30 | *(.ldata .ldata.*) 31 | } 32 | 33 | .rodata : ALIGN(0x1000) 34 | { 35 | *(.rodata .rodata.*) 36 | *(.lrodata .lrodata.*) 37 | } 38 | 39 | .bss : ALIGN(0x1000) 40 | { 41 | *(COMMON) 42 | *(.bss .bss*) 43 | *(.lbss .lbss*) 44 | *(.gnu.linkonce.b*) 45 | } 46 | 47 | .bootloader-config : ALIGN(0x1000) 48 | { 49 | *(.bootloader-config .bootloader-config*) 50 | } 51 | 52 | DATA_END = .; 53 | 54 | /DISCARD/ : 55 | { 56 | *(.eh_frame .eh_frame_hdr) 57 | } 58 | } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/allocator.rs: -------------------------------------------------------------------------------- 1 | use core::{mem::MaybeUninit, alloc::{GlobalAlloc, Layout}, ptr::null_mut}; 2 | use linked_list_allocator::LockedHeap; 3 | 4 | #[global_allocator] 5 | static ALLOCATOR: LockedHeap = LockedHeap::empty(); 6 | 7 | pub fn init(heap: &'static mut [MaybeUninit]) { 8 | ALLOCATOR.lock().init_from_slice(heap) 9 | } 10 | 11 | pub struct Dummy; 12 | 13 | unsafe impl GlobalAlloc for Dummy { 14 | unsafe fn alloc(&self, _layout: Layout) -> *mut u8 { 15 | null_mut() 16 | } 17 | 18 | unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) { 19 | panic!("dealloc should be never called") 20 | } 21 | } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/gdt.rs: -------------------------------------------------------------------------------- 1 | // Based on: https://github.com/phil-opp/blog_os/blob/post-08/src/gdt.rs 2 | 3 | use lazy_static::lazy_static; 4 | use x86_64::instructions::tables::load_tss; 5 | use x86_64::registers::segmentation::{CS, Segment, SS, DS}; 6 | use x86_64::structures::gdt::{Descriptor, GlobalDescriptorTable, SegmentSelector, DescriptorFlags}; 7 | use x86_64::structures::tss::TaskStateSegment; 8 | use x86_64::{VirtAddr, PrivilegeLevel}; 9 | 10 | pub const MAIN_INTERRUPT_HANDLER_IST_INDEX: u16 = 0; 11 | pub const STACKED_INTERRUPT_HANDLER_IST_INDEX: u16 = 1; 12 | 13 | static mut TSS: (TaskStateSegment, [u8; 16]) = (TaskStateSegment::new(), [0xff; 16]); 14 | 15 | fn init_tss() { 16 | let tss = unsafe { &mut TSS.0 }; 17 | const STACK_SIZE: usize = 4096 * 5; 18 | tss.interrupt_stack_table[MAIN_INTERRUPT_HANDLER_IST_INDEX as usize] = { 19 | #[used] 20 | static mut STACK1: [u8; STACK_SIZE] = [0; STACK_SIZE]; 21 | 22 | let stack_start = VirtAddr::new_truncate(unsafe { &STACK1 as *const _ } as u64); 23 | let stack_end = stack_start + STACK_SIZE; 24 | stack_end 25 | }; 26 | tss.interrupt_stack_table[STACKED_INTERRUPT_HANDLER_IST_INDEX as usize] = { 27 | #[used] 28 | static mut STACK2: [u8; STACK_SIZE] = [0; STACK_SIZE]; 29 | 30 | let stack_start = VirtAddr::new_truncate(unsafe { &STACK2 as *const _ } as u64); 31 | let stack_end = stack_start + STACK_SIZE; 32 | stack_end 33 | }; 34 | tss.iomap_base = unsafe { &TSS.1 as *const _ as u64 } 35 | .wrapping_sub(tss as *const _ as u64) 36 | .try_into() 37 | .expect("IOPRIV_BITMAP too far away"); 38 | } 39 | 40 | lazy_static! { 41 | static ref GDT: (GlobalDescriptorTable, Selectors, Selectors) = { 42 | let mut gdt = GlobalDescriptorTable::new(); 43 | // let code_selector = gdt.add_entry(Descriptor::kernel_code_segment()); 44 | // let tss_selector = gdt.add_entry(Descriptor::tss_segment(&TSS)); 45 | let kernel_data_flags = DescriptorFlags::USER_SEGMENT | DescriptorFlags::PRESENT | DescriptorFlags::WRITABLE; 46 | let code_selector = gdt.add_entry(Descriptor::kernel_code_segment()); // kernel code segment 47 | let data_selector = gdt.add_entry(Descriptor::UserSegment(kernel_data_flags.bits())); // kernel data segment 48 | let tss_selector = gdt.add_entry(Descriptor::tss_segment(unsafe { &TSS.0 })); // task state segment 49 | let user_data_selector = gdt.add_entry(Descriptor::user_data_segment()); // user data segment 50 | let user_code_selector = gdt.add_entry(Descriptor::user_code_segment()); // user code segment 51 | ( 52 | gdt, 53 | Selectors { 54 | code: code_selector, 55 | data: data_selector, 56 | tss: tss_selector, 57 | }, 58 | Selectors { 59 | code: user_code_selector, 60 | data: user_data_selector, 61 | tss: tss_selector, 62 | }, 63 | ) 64 | }; 65 | } 66 | 67 | struct Selectors { 68 | code: SegmentSelector, 69 | data: SegmentSelector, 70 | tss: SegmentSelector, 71 | } 72 | 73 | pub fn init() { 74 | init_tss(); 75 | GDT.0.load(); 76 | unsafe { 77 | CS::set_reg(GDT.1.code); 78 | DS::set_reg(GDT.1.data); 79 | 80 | // Load a null selector into SS to prevent our interrupts from crashing. 81 | // iretq checks the value of SS, which must be either a valid selector or a null selector. 82 | // It seems the bootloader is just leaving a random value there, which is causing problems. 83 | SS::set_reg(SegmentSelector::new(0, x86_64::PrivilegeLevel::Ring0)); 84 | load_tss(GDT.1.tss); 85 | } 86 | } 87 | 88 | #[inline(always)] 89 | pub unsafe fn set_usermode_segs() -> (u16, u16) { 90 | // set ds and tss, return cs and ds 91 | let (mut cs, mut ds) = (GDT.2.code, GDT.2.data); 92 | cs.0 |= PrivilegeLevel::Ring3 as u16; 93 | ds.0 |= PrivilegeLevel::Ring3 as u16; 94 | DS::set_reg(ds); 95 | SS::set_reg(SegmentSelector::new(0, x86_64::PrivilegeLevel::Ring0)); 96 | 97 | (cs.0, ds.0) 98 | } 99 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Based on: https://github.com/phil-opp/blog_os/blob/post-08/src/lib.rs 2 | 3 | #![no_std] 4 | #![cfg_attr(test, no_main)] 5 | #![feature(custom_test_frameworks, abi_x86_interrupt, alloc_error_handler, naked_functions)] 6 | #![feature(generic_const_exprs)] 7 | #![test_runner(crate::test_runner)] 8 | #![reexport_test_harness_main = "test_main"] 9 | 10 | extern crate alloc; 11 | use core::{panic::PanicInfo, mem::MaybeUninit}; 12 | use x86_64::instructions::port::Port; 13 | 14 | pub mod gdt; 15 | pub mod interrupts; 16 | pub mod serial; 17 | pub mod allocator; 18 | pub mod pci; 19 | pub mod memory; 20 | pub mod userspace; 21 | pub mod observer; 22 | pub mod queue; 23 | pub mod timer; 24 | 25 | const HEAP_SIZE: usize = 128 * 1024; // 128KiB 26 | static mut HEAP: [MaybeUninit; HEAP_SIZE] = [MaybeUninit::uninit(); HEAP_SIZE]; 27 | 28 | pub fn init() { 29 | gdt::init(); 30 | interrupts::init_idt(); 31 | unsafe { interrupts::PICS.lock().initialize() }; 32 | x86_64::instructions::interrupts::enable(); 33 | allocator::init(unsafe { &mut HEAP }); 34 | } 35 | pub trait Testable { 36 | fn run(&self) -> (); 37 | } 38 | 39 | impl Testable for T 40 | where 41 | T: Fn(), 42 | { 43 | fn run(&self) { 44 | serial_print!("{}...\t", core::any::type_name::()); 45 | self(); 46 | serial_println!("[ok]"); 47 | } 48 | } 49 | 50 | pub fn test_runner(tests: &[&dyn Testable]) { 51 | serial_println!("Running {} tests", tests.len()); 52 | for test in tests { 53 | test.run(); 54 | } 55 | exit_qemu(ExitCode::Success); 56 | } 57 | 58 | pub fn test_panic_handler(info: &PanicInfo) -> ! { 59 | serial_println!("[failed]\n"); 60 | serial_println!("Error: {}\n", info); 61 | exit_qemu(ExitCode::Failed); 62 | } 63 | 64 | /// Exit code is reported by qemu as (value << 1) | 1 65 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 66 | #[repr(u32)] 67 | pub enum ExitCode { 68 | Success = 0x10, // 33 69 | Failed = 0x11, // 35 70 | Panic = 0x12, // 37 71 | } 72 | 73 | pub fn exit_qemu(exit_code: ExitCode) -> ! { 74 | serial_println!("Exit: {:?}", exit_code); 75 | 76 | let mut port = Port::new(0xf4); 77 | unsafe { 78 | port.write(exit_code as u32); 79 | } 80 | 81 | hlt_loop() 82 | } 83 | 84 | pub fn hlt_loop() -> ! { 85 | loop { 86 | x86_64::instructions::hlt(); 87 | } 88 | } 89 | 90 | #[cfg(test)] 91 | use bootloader_api::{entry_point, BootInfo}; 92 | 93 | #[cfg(test)] 94 | entry_point!(test_kernel_main); 95 | 96 | /// Entry point for `cargo xtest` 97 | #[cfg(test)] 98 | fn test_kernel_main(_boot_info: &'static mut BootInfo) -> ! { 99 | init(); 100 | test_main(); 101 | hlt_loop(); 102 | } 103 | 104 | #[cfg(test)] 105 | #[panic_handler] 106 | fn panic(info: &PanicInfo) -> ! { 107 | test_panic_handler(info) 108 | } 109 | 110 | #[alloc_error_handler] 111 | fn alloc_error_handler(layout: alloc::alloc::Layout) -> ! { 112 | panic!("allocation error: {:?}", layout) 113 | } 114 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/observer.rs: -------------------------------------------------------------------------------- 1 | use core::arch::asm; 2 | 3 | use liblisa_x64_observer_shmqueue::frame::command::{Permissions, CommandFrame}; 4 | use x86_64::{VirtAddr, structures::paging::Page}; 5 | use crate::userspace::jmp_to_usermode; 6 | 7 | pub struct Observer {} 8 | 9 | pub trait ObservationMapper { 10 | /// Maps a new page into userspace. 11 | fn map(&mut self, frame_index: usize, page: Page, permissions: Permissions); 12 | 13 | /// Maps a new page into userspace. 14 | fn map_executable(&mut self, frame_index: usize, page: Page, addr: u64, permissions: Permissions); 15 | 16 | /// Hints to the mapper that we're about to use the changes we made in map() or reset(). 17 | fn ready_hint(&mut self); 18 | 19 | /// Resets all mappings. 20 | /// [`ObservationMapper::unmap_hint`] must correctly hint all mapped pages after this function is called. 21 | fn reset_before(&mut self); 22 | 23 | /// Hints to the mapper that the specified page can now be unmapped. 24 | /// A call to [`ObservationMapper::reset`] must follow before [`ObservationMapper::map`] can be called again. 25 | fn unmap_hint(&mut self, page: Page); 26 | 27 | /// Resets all mappings. 28 | /// [`ObservationMapper::unmap_hint`] must correctly hint all mapped pages before this function is called. 29 | fn reset_after(&mut self); 30 | } 31 | 32 | fn valid_address(addr: u64) -> bool { 33 | addr >= 0xffff_8000_0000_0000 || addr <= 0x0000_7fff_ffff_ffff 34 | } 35 | 36 | impl Observer { 37 | pub fn new() -> Observer { 38 | Observer { } 39 | } 40 | 41 | pub fn observe<'a>(&mut self, mapper: &mut impl ObservationMapper, allowed_component_bitmap: u64, cmd: &mut CommandFrame) { 42 | if !valid_address(cmd.gpregs.fs_base) || !valid_address(cmd.gpregs.gs_base) { 43 | cmd.gpregs.exception_id = 13; 44 | return; 45 | } 46 | 47 | for request in cmd.memory_mappings.active().iter() { 48 | // SAFETY: page_start_addr is guaranteed to return an address with the lower 12 bits all 0. 49 | let requested_page = unsafe { Page::from_start_address_unchecked(VirtAddr::new(request.page_start_addr())) }; 50 | if request.permissions() == Permissions::Executable { 51 | mapper.map_executable(request.frame_index() as usize, requested_page, cmd.gpregs.rip, request.permissions()); 52 | } else { 53 | mapper.map(request.frame_index() as usize, requested_page, request.permissions()); 54 | } 55 | } 56 | 57 | mapper.ready_hint(); 58 | 59 | unsafe { 60 | let component_bitmap = cmd.restore_extended_registers; 61 | cmd.extended_regs.restore(component_bitmap & allowed_component_bitmap).unwrap(); 62 | } 63 | 64 | let debug_regs = &mut cmd.debug_regs; 65 | if debug_regs.dr7 != 0 { 66 | unsafe { 67 | asm!( 68 | "mov dr0, rax", 69 | "mov dr1, rcx", 70 | "mov dr2, rdx", 71 | "mov dr3, rsi", 72 | "mov dr6, r10", 73 | "mov dr7, rdi", 74 | in("rax") debug_regs.dr0, 75 | in("rcx") debug_regs.dr1, 76 | in("rdx") debug_regs.dr2, 77 | in("rsi") debug_regs.dr3, 78 | in("r10") debug_regs.dr6, 79 | in("rdi") debug_regs.dr7, 80 | ) 81 | } 82 | } 83 | 84 | unsafe { 85 | jmp_to_usermode(&mut cmd.gpregs); 86 | } 87 | 88 | if debug_regs.dr7 != 0 { 89 | unsafe { 90 | asm!( 91 | "mov rax, dr6", 92 | "mov rdx, dr7", 93 | "mov dr7, rcx", 94 | out("rax") debug_regs.dr6, 95 | out("rdx") debug_regs.dr7, 96 | in("rcx") 0, 97 | ) 98 | } 99 | } 100 | 101 | unsafe { 102 | let component_bitmap = cmd.save_extended_registers; 103 | cmd.extended_regs.save_current(component_bitmap & allowed_component_bitmap); 104 | } 105 | 106 | mapper.reset_before(); 107 | 108 | for request in cmd.memory_mappings.active().iter() { 109 | // SAFETY: see above 110 | let requested_page = unsafe { Page::from_start_address_unchecked(VirtAddr::new(request.page_start_addr())) }; 111 | mapper.unmap_hint(requested_page); 112 | } 113 | 114 | mapper.reset_after(); 115 | } 116 | } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/pci.rs: -------------------------------------------------------------------------------- 1 | use core::cell::RefCell; 2 | 3 | use pci_types::ConfigRegionAccess; 4 | use x86_64::instructions::port::Port; 5 | 6 | pub struct PciConfig { 7 | config_address: Port, 8 | config_data: Port, 9 | } 10 | 11 | impl PciConfig { 12 | pub fn new() -> PciConfig { 13 | PciConfig { 14 | config_address: Port::new(0xCF8), 15 | config_data: Port::new(0xCFC), 16 | } 17 | } 18 | 19 | pub fn read_u32(&mut self, bus: u8, slot: u8, func: u8, offset: u16) -> u32 { 20 | let address = ((bus as u32) << 16) | ((slot as u32) << 11) | ((func as u32) << 8) | ((offset as u32) & 0xFC) | 0x80000000; 21 | unsafe { 22 | self.config_address.write(address); 23 | self.config_data.read() 24 | } 25 | } 26 | 27 | pub fn write_u32(&mut self, bus: u8, slot: u8, func: u8, offset: u16, value: u32) { 28 | let address = ((bus as u32) << 16) | ((slot as u32) << 11) | ((func as u32) << 8) | ((offset as u32) & 0xFC) | 0x80000000; 29 | unsafe { 30 | self.config_address.write(address); 31 | self.config_data.write(value); 32 | } 33 | } 34 | } 35 | 36 | pub struct PciConfigAccessor(pub RefCell); 37 | 38 | impl ConfigRegionAccess for PciConfigAccessor { 39 | fn function_exists(&self, address: pci_types::PciAddress) -> bool { 40 | self.0.borrow_mut().read_u32(address.bus(), address.device(), address.function(), 0) != 0xffffffff 41 | } 42 | 43 | unsafe fn read(&self, address: pci_types::PciAddress, offset: u16) -> u32 { 44 | self.0.borrow_mut().read_u32(address.bus(), address.device(), address.function(), offset) 45 | } 46 | 47 | unsafe fn write(&self, address: pci_types::PciAddress, offset: u16, value: u32) { 48 | self.0.borrow_mut().write_u32(address.bus(), address.device(), address.function(), offset, value) 49 | } 50 | } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/queue.rs: -------------------------------------------------------------------------------- 1 | use liblisa_x64_observer_shmqueue::frame::FRAME_SIZE; 2 | use liblisa_x64_observer_shmqueue::frame::{control::Client, command::CommandFrame}; 3 | use liblisa_x64_observer_shmqueue::queue::Queue; 4 | 5 | use crate::serial_println; 6 | 7 | pub struct Incoming<'a> { 8 | queues: &'a mut [Queue], 9 | current_queue: usize, 10 | } 11 | 12 | pub struct PageOffsetTranslator { 13 | base_offset: usize, 14 | } 15 | 16 | impl PageOffsetTranslator { 17 | pub fn page_offset(&self, index: usize) -> usize { 18 | self.base_offset + index * FRAME_SIZE 19 | } 20 | } 21 | 22 | impl<'a> Incoming<'a> { 23 | pub fn new(queues: &'a mut [Queue]) -> Result, ()> { 24 | for queue in queues.iter() { 25 | if queue.control_frame().num_command_frames() == 0 { 26 | serial_println!("Shared memory has not been set up correctly. `num_command_frames` cannot be 0."); 27 | return Err(()); 28 | } 29 | 30 | if queue.total_size() < (queue.control_frame().num_command_frames() as usize + 1) * 4096 { 31 | panic!("Shared memory has not been set up correctly. Not enough memory is available for all queue frames."); 32 | } 33 | } 34 | 35 | Ok(Incoming { 36 | queues, 37 | current_queue: 0, 38 | }) 39 | } 40 | 41 | pub fn offset_translator(&self) -> PageOffsetTranslator { 42 | let frame_offset = self.queues.iter() 43 | .map(|queue| 1 + queue.control_frame().num_command_frames()) 44 | .sum::(); 45 | PageOffsetTranslator { 46 | base_offset: frame_offset as usize * FRAME_SIZE, 47 | } 48 | } 49 | 50 | pub fn receive_request(&mut self) -> Option<&mut CommandFrame> { 51 | let num_queues = self.queues.len(); 52 | 53 | for shift in 0..num_queues { 54 | let index = (self.current_queue + shift) % num_queues; 55 | if self.queues[index].request_available() { 56 | self.current_queue = index; 57 | return self.queues[index].try_dequeue(); 58 | } 59 | } 60 | 61 | None 62 | } 63 | 64 | pub fn mark_processed(&mut self) { 65 | let queue = &mut self.queues[self.current_queue]; 66 | queue.control_frame().update_current(queue.read_index()); 67 | self.current_queue = (self.current_queue + 1) % self.queues.len(); 68 | } 69 | } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/serial.rs: -------------------------------------------------------------------------------- 1 | // Based on: https://github.com/phil-opp/blog_os/blob/post-08/src/serial.rs 2 | 3 | use lazy_static::lazy_static; 4 | use spin::Mutex; 5 | use uart_16550::SerialPort; 6 | 7 | lazy_static! { 8 | pub static ref SERIAL1: Mutex = { 9 | let mut serial_port = unsafe { SerialPort::new(0x3F8) }; 10 | serial_port.init(); 11 | Mutex::new(serial_port) 12 | }; 13 | } 14 | 15 | #[doc(hidden)] 16 | pub fn _print(args: ::core::fmt::Arguments) { 17 | use core::fmt::Write; 18 | use x86_64::instructions::interrupts; 19 | 20 | interrupts::without_interrupts(|| { 21 | SERIAL1 22 | .lock() 23 | .write_fmt(args) 24 | .expect("Printing to serial failed"); 25 | }); 26 | } 27 | 28 | /// Prints to the host through the serial interface. 29 | #[macro_export] 30 | macro_rules! serial_print { 31 | ($($arg:tt)*) => { 32 | $crate::serial::_print(format_args!($($arg)*)) 33 | }; 34 | } 35 | 36 | /// Prints to the host through the serial interface, appending a newline. 37 | #[macro_export] 38 | macro_rules! serial_println { 39 | () => ($crate::serial_print!("\n")); 40 | ($fmt:expr) => ($crate::serial_print!(concat!($fmt, "\n"))); 41 | ($fmt:expr, $($arg:tt)*) => ($crate::serial_print!( 42 | concat!($fmt, "\n"), $($arg)*)); 43 | } 44 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/src/timer.rs: -------------------------------------------------------------------------------- 1 | use core::arch::asm; 2 | use x86_64::instructions::port::{Port, PortWriteOnly}; 3 | 4 | pub struct Timer { 5 | ch0: Port, 6 | ch1: Port, 7 | ch2: Port, 8 | command: PortWriteOnly, 9 | } 10 | 11 | pub enum AccessMode { 12 | LatchCountValue = 0b00, 13 | Lo = 0b01, 14 | Hi = 0b10, 15 | LoHi = 0b11, 16 | } 17 | 18 | pub enum OperatingMode { 19 | /// Interrupt on terminal count 20 | Mode0 = 0b000, 21 | /// Hardware re-triggerable one-shot 22 | Mode1 = 0b001, 23 | /// Rate generator 24 | Mode2 = 0b010, 25 | /// Square wave generator 26 | Mode3 = 0b011, 27 | /// Software triggered strobe 28 | Mode4 = 0b100, 29 | /// Hardware triggered strobe 30 | Mode5 = 0b101, 31 | } 32 | 33 | impl Timer { 34 | /// SAFETY: Allocates ports. Do not call more than once. 35 | pub unsafe fn init() -> Timer { 36 | Timer { 37 | ch0: Port::new(0x40), 38 | ch1: Port::new(0x41), 39 | ch2: Port::new(0x42), 40 | command: PortWriteOnly::new(0x43), 41 | } 42 | } 43 | 44 | /// The PIT runs at roughly 1193181.6666Hz 45 | pub fn set_ch0_reset_value(&mut self, reset_value: u16) { 46 | unsafe { 47 | // Disable interrupts while we're modifying the timer 48 | asm!("cli"); 49 | 50 | self.ch0.write(reset_value as u8); 51 | self.ch0.write((reset_value >> 8) as u8); 52 | 53 | asm!("sti"); 54 | }; 55 | } 56 | 57 | pub fn configure_ch0(&mut self, mode: OperatingMode, lohi: AccessMode) { 58 | unsafe { 59 | asm!("cli"); 60 | 61 | let channel = 0; 62 | self.command.write((channel as u8) << 6 | (lohi as u8) << 4 | (mode as u8) << 1); 63 | 64 | asm!("sti"); 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/tests/basic_boot.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | #![feature(custom_test_frameworks)] 4 | #![test_runner(vmimage_x86_64::test_runner)] 5 | #![reexport_test_harness_main = "test_main"] 6 | 7 | use core::panic::PanicInfo; 8 | 9 | #[no_mangle] // don't mangle the name of this function 10 | pub extern "C" fn _start() -> ! { 11 | test_main(); 12 | 13 | loop {} 14 | } 15 | 16 | #[panic_handler] 17 | fn panic(info: &PanicInfo) -> ! { 18 | vmimage_x86_64::test_panic_handler(info) 19 | } 20 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/tests/heap_allocation.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | #![feature(custom_test_frameworks)] 4 | #![test_runner(vmimage_x86_64::test_runner)] 5 | #![reexport_test_harness_main = "test_main"] 6 | 7 | extern crate alloc; 8 | 9 | use alloc::{boxed::Box, vec::Vec}; 10 | use bootloader_api::{entry_point, BootInfo}; 11 | use core::panic::PanicInfo; 12 | 13 | entry_point!(main); 14 | 15 | fn main(boot_info: &'static mut BootInfo) -> ! { 16 | // use vmimage_x86_64::allocator; 17 | // use vmimage_x86_64::memory::{self, BootInfoFrameAllocator}; 18 | // use x86_64::VirtAddr; 19 | 20 | // vmimage_x86_64::init(); 21 | // let phys_mem_offset = VirtAddr::new(boot_info.physical_memory_offset); 22 | // let mut mapper = unsafe { memory::init(phys_mem_offset) }; 23 | // let mut frame_allocator = unsafe { BootInfoFrameAllocator::init(&boot_info.memory_map) }; 24 | // allocator::init_heap(&mut mapper, &mut frame_allocator).expect("heap initialization failed"); 25 | 26 | test_main(); 27 | loop {} 28 | } 29 | 30 | #[test_case] 31 | fn simple_allocation() { 32 | let heap_value_1 = Box::new(41); 33 | let heap_value_2 = Box::new(13); 34 | assert_eq!(*heap_value_1, 41); 35 | assert_eq!(*heap_value_2, 13); 36 | } 37 | 38 | #[test_case] 39 | fn large_vec() { 40 | let n = 1000; 41 | let mut vec = Vec::new(); 42 | for i in 0..n { 43 | vec.push(i); 44 | } 45 | assert_eq!(vec.iter().sum::(), (n - 1) * n / 2); 46 | } 47 | 48 | #[panic_handler] 49 | fn panic(info: &PanicInfo) -> ! { 50 | vmimage_x86_64::test_panic_handler(info) 51 | } 52 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/tests/should_panic.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use vmimage_x86_64::{exit_qemu, serial_print, serial_println, ExitCode}; 5 | use core::panic::PanicInfo; 6 | 7 | #[no_mangle] 8 | pub extern "C" fn _start() -> ! { 9 | should_fail(); 10 | serial_println!("[test did not panic]"); 11 | exit_qemu(ExitCode::Failed); 12 | } 13 | 14 | fn should_fail() { 15 | serial_print!("should_panic::should_fail...\t"); 16 | assert_eq!(0, 1); 17 | } 18 | 19 | // #[panic_handler] 20 | // fn panic(_info: &PanicInfo) -> ! { 21 | // serial_println!("[ok]"); 22 | // exit_qemu(ExitCode::Success); 23 | // } 24 | -------------------------------------------------------------------------------- /arch/x64/build-vmimage/vmimage-x86-64/tests/stack_overflow.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | #![feature(abi_x86_interrupt)] 4 | 5 | use vmimage_x86_64::{exit_qemu, serial_print, serial_println, ExitCode}; 6 | use lazy_static::lazy_static; 7 | use x86_64::structures::idt::{InterruptDescriptorTable, InterruptStackFrame}; 8 | 9 | #[no_mangle] 10 | pub extern "C" fn _start() -> ! { 11 | serial_print!("stack_overflow::stack_overflow...\t"); 12 | 13 | vmimage_x86_64::gdt::init(); 14 | init_test_idt(); 15 | 16 | // trigger a stack overflow 17 | stack_overflow(); 18 | 19 | panic!("Execution continued after stack overflow"); 20 | } 21 | 22 | #[allow(unconditional_recursion)] 23 | fn stack_overflow() { 24 | stack_overflow(); // for each recursion, the return address is pushed 25 | volatile::Volatile::new(0).read(); // prevent tail recursion optimizations 26 | } 27 | 28 | lazy_static! { 29 | static ref TEST_IDT: InterruptDescriptorTable = { 30 | let mut idt = InterruptDescriptorTable::new(); 31 | unsafe { 32 | idt.double_fault 33 | .set_handler_fn(test_double_fault_handler) 34 | .set_stack_index(vmimage_x86_64::gdt::MAIN_INTERRUPT_HANDLER_IST_INDEX); 35 | } 36 | 37 | idt 38 | }; 39 | } 40 | 41 | pub fn init_test_idt() { 42 | TEST_IDT.load(); 43 | } 44 | 45 | extern "x86-interrupt" fn test_double_fault_handler( 46 | _stack_frame: InterruptStackFrame, 47 | _error_code: u64, 48 | ) -> ! { 49 | serial_println!("[ok]"); 50 | exit_qemu(ExitCode::Success); 51 | } 52 | 53 | // #[panic_handler] 54 | // fn panic(info: &PanicInfo) -> ! { 55 | // vmimage_x86_64::test_panic_handler(info) 56 | // } 57 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer-shmqueue/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-x64-observer-shmqueue" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | 11 | [dependencies] 12 | memoffset = { version = "0.9", features = ["unstable_const"] } 13 | static_assertions = "1.1.0" -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer-shmqueue/src/frame/mod.rs: -------------------------------------------------------------------------------- 1 | //! Types representing the frames in the shared memory queue. 2 | 3 | pub mod command; 4 | pub mod control; 5 | 6 | /// The size of a single command or control frame, which is equal to the x86 page size. 7 | pub const FRAME_SIZE: usize = 4096; 8 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer-shmqueue/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![doc(html_no_source)] 3 | 4 | pub mod frame; 5 | pub mod queue; 6 | pub mod regs; 7 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer-shmqueue/src/queue.rs: -------------------------------------------------------------------------------- 1 | //! A shared memory queue for communication between the host and the VM client. 2 | 3 | use core::fmt::Debug; 4 | use core::marker::PhantomData; 5 | 6 | use crate::frame::command::CommandFrame; 7 | use crate::frame::control::{Client, ControlFrame, Host}; 8 | 9 | /// A shared memory queue. 10 | pub struct Queue { 11 | queue_ptr: *mut u8, 12 | command_frame_base: *mut CommandFrame, 13 | read_index: u32, 14 | cached_current_pos: u32, 15 | total_size: usize, 16 | _phantom: PhantomData, 17 | } 18 | 19 | impl Debug for Queue { 20 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 21 | f.debug_struct("Queue") 22 | .field("control", &self.control_frame()) 23 | .field("command_frame_base", &self.command_frame_base) 24 | .field("read_index", &self.read_index) 25 | .field("total_size", &self.total_size) 26 | .finish() 27 | } 28 | } 29 | 30 | impl Queue { 31 | /// # Safety 32 | /// You must ensure that queue_ptr is valid for the lifetime of the returned struct. 33 | pub unsafe fn new(queue_ptr: *mut u8, queue_byte_size: usize) -> Queue { 34 | Queue { 35 | queue_ptr, 36 | command_frame_base: (queue_ptr as *mut CommandFrame).wrapping_add(1), 37 | read_index: 0, 38 | cached_current_pos: 0, 39 | total_size: queue_byte_size, 40 | _phantom: PhantomData, 41 | } 42 | } 43 | 44 | #[inline(always)] 45 | pub fn total_size(&self) -> usize { 46 | self.total_size 47 | } 48 | 49 | #[inline(always)] 50 | pub fn read_index(&self) -> u32 { 51 | self.read_index 52 | } 53 | 54 | #[inline] 55 | pub fn control_frame(&self) -> ControlFrame<'_, K> { 56 | unsafe { 57 | // SAFETY: `self.queue_ptr` must be a valid queue pointer for Self::new 58 | // SAFETY: The lifetime of the returned struct is less than or equal to the lifetime of self 59 | ControlFrame::new(self.queue_ptr) 60 | } 61 | } 62 | 63 | #[inline] 64 | pub fn command_frame(&mut self, index: usize) -> &mut CommandFrame { 65 | debug_assert!(index < self.control_frame().num_command_frames() as usize); 66 | unsafe { &mut *self.command_frame_base.wrapping_add(index) } 67 | } 68 | } 69 | 70 | impl Queue { 71 | #[inline] 72 | pub fn try_dequeue(&mut self) -> Option<&mut CommandFrame> { 73 | if self.request_available() { 74 | let frame_index = self.read_index as usize; 75 | self.read_index = (self.read_index + 1) % self.control_frame().num_command_frames(); 76 | let frame = self.command_frame(frame_index); 77 | Some(frame) 78 | } else { 79 | // No commands are ready to be read 80 | None 81 | } 82 | } 83 | 84 | #[inline(always)] 85 | pub fn request_available(&mut self) -> bool { 86 | if self.read_index != self.cached_current_pos { 87 | true 88 | } else { 89 | self.cached_current_pos = K::current_pos(&self.control_frame()); 90 | self.read_index != self.cached_current_pos 91 | } 92 | } 93 | } 94 | 95 | /// A trait that returns the current read position in the queue. 96 | /// 97 | /// It is implemented separately for the [`Host`] and [`Client`]. 98 | pub trait CurrentPos: Sized { 99 | fn current_pos(control: &ControlFrame) -> u32; 100 | } 101 | 102 | impl CurrentPos for Host { 103 | #[inline(always)] 104 | fn current_pos(control: &ControlFrame) -> u32 { 105 | control.current() 106 | } 107 | } 108 | 109 | impl CurrentPos for Client { 110 | #[inline(always)] 111 | fn current_pos(control: &ControlFrame) -> u32 { 112 | control.next() 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer-shmqueue/src/regs.rs: -------------------------------------------------------------------------------- 1 | //! Registers in the command frames. 2 | 3 | use core::mem::size_of; 4 | 5 | use static_assertions::const_assert; 6 | 7 | /// The general-purpose registers. 8 | #[repr(C)] 9 | #[derive(Default, Copy, Debug, Clone, PartialEq, Eq)] 10 | pub struct GpRegs { 11 | pub rax: u64, 12 | pub rbx: u64, 13 | pub rcx: u64, 14 | pub rdx: u64, 15 | 16 | pub rbp: u64, 17 | pub rsi: u64, 18 | pub rdi: u64, 19 | 20 | pub r8: u64, 21 | pub r9: u64, 22 | pub r10: u64, 23 | pub r11: u64, 24 | pub r12: u64, 25 | pub r13: u64, 26 | pub r14: u64, 27 | pub r15: u64, 28 | 29 | // Note that the ordering here is chosen so that memory is accessed linearly in userspace::handle_interrupt 30 | // Make sure to benchmark any changes you make to the order of the fields. 31 | pub exception_id: u64, 32 | pub error_code: u64, 33 | pub rip: u64, 34 | pub rsp: u64, 35 | pub access_address: u64, 36 | pub rflags: u64, 37 | 38 | pub fs_base: u64, 39 | pub gs_base: u64, 40 | } 41 | 42 | impl GpRegs { 43 | pub fn clear(&mut self) { 44 | *self = Default::default(); 45 | } 46 | } 47 | 48 | /// The debug registers. 49 | #[repr(C)] 50 | #[derive(Default, Copy, Debug, Clone, PartialEq, Eq)] 51 | pub struct DebugRegs { 52 | pub dr0: u64, 53 | pub dr1: u64, 54 | pub dr2: u64, 55 | pub dr3: u64, 56 | pub dr6: u64, 57 | pub dr7: u64, 58 | } 59 | 60 | /// Error returned by [`crate::frame::command::ExtendedRegs::restore`]. 61 | #[derive(Debug)] 62 | pub enum RestoreError { 63 | /// Reserved flags in the MXCSR register (bit 16, 18-31) must be cleared. 64 | ReservedMxcsrFlagsSet, 65 | } 66 | 67 | /// A trait representing an XSAVE component. 68 | pub trait XsaveComponent {} 69 | impl XsaveComponent for YmmRegs {} 70 | impl XsaveComponent for XsaveLegacyArea {} 71 | impl XsaveComponent for XsaveHeader {} 72 | 73 | /// The x87 ST(n) registers. 74 | #[repr(C)] 75 | #[derive(Copy, Clone, Default)] 76 | pub struct St([u8; 10], [u8; 6]); 77 | 78 | impl St { 79 | #[inline] 80 | pub fn new(bytes: [u8; 10]) -> Self { 81 | St(bytes, Default::default()) 82 | } 83 | 84 | #[inline] 85 | pub fn bytes(&self) -> [u8; 10] { 86 | self.0 87 | } 88 | } 89 | 90 | impl core::fmt::Debug for St { 91 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 92 | core::fmt::Debug::fmt(&self.0, f) 93 | } 94 | } 95 | 96 | const_assert!(size_of::() == 16); 97 | 98 | /// The XSAVE legacy area (which contains mostly x87). 99 | #[repr(C)] 100 | #[derive(Copy, Clone, Debug)] 101 | pub struct XsaveLegacyArea { 102 | /// The fcw register (rounding mode, precision control, exception masking) 103 | pub control_word: u16, 104 | /// The fsw register 105 | pub status_word: u16, 106 | /// The ftw register 107 | pub tag_word: u8, 108 | pub _reserved1: u8, 109 | pub fop: u16, 110 | /// The last instruction pointer 111 | pub rip: u64, 112 | 113 | /// The last data pointer 114 | pub rdp: u64, 115 | pub mxcsr: u32, 116 | pub mxcsr_mask: u32, 117 | 118 | pub st: [St; 8], 119 | pub xmm: [u128; 16], 120 | } 121 | 122 | // TODO: Assert sizes and offsets of legacy area 123 | 124 | impl Default for XsaveLegacyArea { 125 | fn default() -> Self { 126 | Self { 127 | control_word: 0x37F, 128 | status_word: 0x3800, 129 | tag_word: 0x80, 130 | _reserved1: 0x00, 131 | fop: 0, 132 | rip: 0, 133 | rdp: 0, 134 | mxcsr: 0x1f80, 135 | mxcsr_mask: 0x2ffff, 136 | st: Default::default(), 137 | xmm: [0; 16], 138 | } 139 | } 140 | } 141 | 142 | /// The XSAVE header. 143 | #[repr(packed, C)] 144 | #[derive(Copy, Clone, Debug)] 145 | pub struct XsaveHeader { 146 | pub xstate_bv: u64, 147 | pub xcomp_bv: u64, 148 | } 149 | 150 | /// The AVX YMM registers. 151 | #[repr(C)] 152 | #[derive(Copy, Clone, Debug)] 153 | pub struct YmmRegs { 154 | pub ymm_hi128: [u128; 16], 155 | } 156 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-x64-observer" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | include = [ 11 | "src/**/*.rs", 12 | "tests/**/*.rs", 13 | "examples/**/*.rs", 14 | "benches/**/*.rs", 15 | "build.rs", 16 | "Cargo.toml", 17 | "image/bootdisk.img", 18 | ] 19 | 20 | [[bench]] 21 | name = "vm" 22 | harness = false 23 | 24 | [[bench]] 25 | name = "oracle" 26 | harness = false 27 | 28 | [dependencies] 29 | itertools = "0.12.0" 30 | log = "0.4" 31 | nix = { version = "0.27.1", features = ["process", "signal", "sched"] } 32 | liblisa = { version = "0.1.0", path = "../../../liblisa" } 33 | shared_memory = "0.12.4" 34 | rand = "0.8" 35 | tempfile = "3.3" 36 | liblisa-x64-observer-shmqueue = { version = "0.1.0", path = "../liblisa-x64-observer-shmqueue" } 37 | thiserror = "1" 38 | 39 | [build-dependencies] 40 | bootloader = { version = "0.11.7" } 41 | 42 | [dev-dependencies] 43 | criterion = "0.5" 44 | env_logger = "0.10" 45 | test-log = "0.2.10" 46 | rand = "0.8" 47 | rand_xoshiro = "0.6" 48 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer/benches/oracle.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::arch::x64::X64Arch; 5 | use liblisa::arch::CpuState; 6 | use liblisa::instr::Instruction; 7 | use liblisa::oracle::Oracle; 8 | use liblisa::state::{Addr, MemoryState, Permissions, SystemState}; 9 | use liblisa_x64_observer::with_oracle; 10 | 11 | fn simple_oracle_observe(c: &mut Criterion) { 12 | println!("Make sure to restrict this process to cores sharing the same L3 cache"); 13 | 14 | with_oracle(|mut o| { 15 | let page = o.random_mappable_page(&mut rand::thread_rng()); 16 | let pc = page.start_addr(); 17 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); // XOR rax, rdx 18 | let state = SystemState::::new( 19 | CpuState::::default_with_pc(pc.as_u64()), 20 | MemoryState::from_vec(vec![(pc, Permissions::Execute, instr.bytes().to_owned())]), 21 | ); 22 | 23 | c.bench_function("Oracle::observe[XOR rax, rdx]", |b| { 24 | b.iter(|| black_box(o.observe(&state).unwrap())) 25 | }); 26 | 27 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); // XOR rax, rdx 28 | let state2 = SystemState::::new( 29 | CpuState::::default_with_pc(pc.as_u64()), 30 | MemoryState::from_vec(vec![ 31 | (pc, Permissions::Execute, instr.bytes().to_owned()), 32 | ( 33 | Addr::new(0x11223344), 34 | Permissions::Read, 35 | vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], 36 | ), 37 | ( 38 | Addr::new(0x11323344), 39 | Permissions::Read, 40 | vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], 41 | ), 42 | ( 43 | Addr::new(0x11423344), 44 | Permissions::Read, 45 | vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], 46 | ), 47 | ( 48 | Addr::new(0x11523344), 49 | Permissions::Read, 50 | vec![0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], 51 | ), 52 | ]), 53 | ); 54 | 55 | c.bench_function("Oracle::observe[XOR rax, rdx + 4 memory mappings]", |b| { 56 | b.iter(|| black_box(o.observe(&state2).unwrap())) 57 | }); 58 | 59 | c.bench_function("Oracle::observe_carefully[XOR rax, rdx + 4 memory mappings]", |b| { 60 | b.iter(|| black_box(o.observe_carefully(&state2).unwrap())) 61 | }); 62 | 63 | c.bench_function("Oracle::observe[XOR rax, rdx + 4 memory mappings alternating]", |b| { 64 | b.iter(|| { 65 | black_box(o.observe(&state).unwrap()); 66 | black_box(o.observe(&state2).unwrap()); 67 | }) 68 | }); 69 | }) 70 | } 71 | 72 | criterion_group! { 73 | name = benches; 74 | config = Criterion::default().measurement_time(Duration::from_secs(60)); 75 | targets = simple_oracle_observe 76 | } 77 | criterion_main!(benches); 78 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer/examples/multitest.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | 3 | use liblisa_x64_observer::vm::{ 4 | CpuFeatures, DebugRegs, DebugRegsWriter, ExtendedRegsWriter, MemoryMapper, ObservationRequest, PageAllocator, 5 | ResultMemoryAccess, Vm, 6 | }; 7 | use liblisa_x64_observer_shmqueue::frame::command::{ExtendedRegs, Permissions}; 8 | use liblisa_x64_observer_shmqueue::regs::GpRegs; 9 | 10 | #[derive(Copy, Clone)] 11 | struct TestRequest; 12 | 13 | impl ObservationRequest for TestRequest { 14 | type Result = (); 15 | 16 | #[inline(always)] 17 | fn setup( 18 | &mut self, _: CpuFeatures, gpregs: &mut GpRegs, _debug_regs: DebugRegsWriter, _extended_regs: ExtendedRegsWriter, 19 | alloc: &mut PageAllocator, mapper: MemoryMapper, 20 | ) { 21 | gpregs.rip = 0x1234000; 22 | 23 | mapper.set([alloc.allocate_page(0x1234000, &[0xCC], Permissions::Executable)].into_iter()); 24 | } 25 | 26 | #[inline(always)] 27 | fn result( 28 | self, _: CpuFeatures, _gpregs: &GpRegs, _debug_regs: &DebugRegs, _extended_regs: &ExtendedRegs, 29 | _memory: ResultMemoryAccess, 30 | ) { 31 | } 32 | } 33 | 34 | fn main() { 35 | println!("Starting observer..."); 36 | println!("Make sure to restrict this process to cores sharing the same L3 cache"); 37 | 38 | let mut vm = Vm::start(2).unwrap(); 39 | let mut observers = vm.observers().collect::>(); 40 | 41 | const COUNT: usize = 5_000_000; 42 | println!("{} observations in {} threads", COUNT, observers.len()); 43 | 44 | for batch_size in [100_000, 1, 2, 5, 10, 100, 1000, 10_000, 100_000] { 45 | println!("{} observations in batches of {}", COUNT, batch_size); 46 | 47 | std::thread::scope(|scope| { 48 | let start = Instant::now(); 49 | let threads = observers 50 | .iter_mut() 51 | .map(|observer| { 52 | scope.spawn(move || { 53 | for _n in 0..COUNT / 2 / batch_size { 54 | observer 55 | .batch_iter(std::iter::repeat(TestRequest).take(batch_size)) 56 | .for_each(drop) 57 | } 58 | }) 59 | }) 60 | .collect::>(); 61 | 62 | for thread in threads { 63 | thread.join().unwrap(); 64 | } 65 | 66 | println!( 67 | "{}ns per observation with batch size {}", 68 | start.elapsed().as_nanos() / (COUNT) as u128, 69 | batch_size 70 | ); 71 | }); 72 | } 73 | 74 | // println!("Sleeping now; Check CPU usage"); 75 | // std::thread::sleep(std::time::Duration::from_secs(30)); 76 | } 77 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer/examples/test.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | 3 | use liblisa_x64_observer::vm::{ 4 | CpuFeatures, DebugRegs, DebugRegsWriter, ExtendedRegsWriter, MemoryMapper, ObservationRequest, PageAllocator, 5 | ResultMemoryAccess, Vm, YmmRegs, 6 | }; 7 | use liblisa_x64_observer_shmqueue::frame::command::{ExtendedRegs, Permissions}; 8 | use liblisa_x64_observer_shmqueue::regs::{GpRegs, XsaveLegacyArea}; 9 | 10 | #[derive(Copy, Clone)] 11 | struct TestRequest(u64); 12 | const TRAP_FLAG: u64 = 1 << 8; 13 | 14 | impl ObservationRequest for TestRequest { 15 | type Result = (); 16 | 17 | #[inline(always)] 18 | fn setup( 19 | &mut self, _: CpuFeatures, gpregs: &mut GpRegs, _debug_regs: DebugRegsWriter, mut extended_regs: ExtendedRegsWriter, 20 | alloc: &mut PageAllocator, mapper: MemoryMapper, 21 | ) { 22 | gpregs.rip = 0x1234000; 23 | gpregs.rflags = TRAP_FLAG; 24 | 25 | mapper.set([alloc.allocate_page(0x1234000, &[0xCC], Permissions::Executable)].into_iter()); 26 | 27 | extended_regs.set_legacy(XsaveLegacyArea { 28 | xmm: [self.0 as u128; 16], 29 | ..Default::default() 30 | }); 31 | 32 | extended_regs.set_ymm(YmmRegs { 33 | ymm_hi128: [self.0 as u128; 16], 34 | }); 35 | } 36 | 37 | #[inline(always)] 38 | fn result( 39 | self, _: CpuFeatures, _gpregs: &GpRegs, _debug_regs: &DebugRegs, _extended_regs: &ExtendedRegs, 40 | _memory: ResultMemoryAccess, 41 | ) { 42 | } 43 | } 44 | 45 | fn main() { 46 | println!("Starting observer..."); 47 | println!("Make sure to restrict this process to cores sharing the same L3 cache"); 48 | 49 | let mut vm = Vm::start(1).unwrap(); 50 | let mut observer = vm.first_observer_only(); 51 | 52 | println!("Reserved range: {:?}", observer.reserved_range()); 53 | 54 | // The shmem is currently configured to contain only 512 command pages. 55 | // Batch sizes of more than a few thousand are unlikely to affect performance much. 56 | 57 | const COUNT: usize = 5_000_000; 58 | for batch_size in [100_000, 1, 2, 5, 10, 100, 1000, 10_000, 100_000] { 59 | println!("{} observations in batches of {}", COUNT, batch_size); 60 | 61 | let start = Instant::now(); 62 | for _n in 0..COUNT / batch_size { 63 | let mut n = 0u64; 64 | let mut num_seen = 0; 65 | observer 66 | .batch_iter( 67 | std::iter::repeat_with(|| { 68 | TestRequest({ 69 | n = n.wrapping_add(1); 70 | n 71 | }) 72 | }) 73 | .take(batch_size), 74 | ) 75 | .for_each(|_| num_seen += 1); 76 | assert_eq!(num_seen, batch_size); 77 | } 78 | 79 | println!( 80 | "{}ns per observation with batch size {}", 81 | start.elapsed().as_nanos() / (COUNT) as u128, 82 | batch_size 83 | ); 84 | } 85 | 86 | // println!("Sleeping now; Check CPU usage"); 87 | // std::thread::sleep(std::time::Duration::from_secs(30)); 88 | } 89 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer/examples/undocumented.rs: -------------------------------------------------------------------------------- 1 | use liblisa_x64_observer::vm::{ 2 | CpuFeatures, DebugRegs, DebugRegsWriter, ExtendedRegs, ExtendedRegsWriter, GpRegs, MemoryMapper, ObservationRequest, 3 | PageAllocator, Permissions, ResultMemoryAccess, Vm, XsaveLegacyArea, YmmRegs, 4 | }; 5 | 6 | /// This is a weird instruction; 7 | /// 8 | /// It takes two 32-byte memory regions: M1 and M2 9 | /// It takes the lowerst 8 bytes of M1: M1[0..8] 10 | /// It fills an XMM reg with M1[0..8] . M1[0..8] (that is, M1[0..8] concatenated to itself) 11 | 12 | const INSTR: &[u8] = &[0xC4, 0x03, 0x7D, 0x00, 0x00, 0x10]; 13 | const TRAP_FLAG: u64 = 1 << 8; 14 | 15 | #[derive(Copy, Clone)] 16 | struct TestRequest { 17 | avx_offset: usize, 18 | } 19 | 20 | impl ObservationRequest for TestRequest { 21 | type Result = (); 22 | 23 | #[inline(always)] 24 | fn setup( 25 | &mut self, _: CpuFeatures, gpregs: &mut GpRegs, _debug_regs: DebugRegsWriter, mut extended_regs: ExtendedRegsWriter, 26 | alloc: &mut PageAllocator, mapper: MemoryMapper, 27 | ) { 28 | *gpregs = GpRegs { 29 | rip: 0x1234000, 30 | r8: 0x8fe0, 31 | rax: 0x4fe0, 32 | rbx: 0x11_000, 33 | rcx: 0x12_000, 34 | rdx: 0x13_000, 35 | rbp: 0x14_000, 36 | rsi: 0x15_000, 37 | rdi: 0x16_000, 38 | r9: 0x17_000, 39 | r10: 0x18_000, 40 | r11: 0x19_000, 41 | r12: 0x1a_000, 42 | r13: 0x1b_000, 43 | r14: 0x1c_000, 44 | r15: 0x1d_000, 45 | rsp: 0x1e_000, 46 | rflags: TRAP_FLAG, 47 | ..Default::default() 48 | }; 49 | 50 | mapper.set( 51 | [ 52 | alloc.allocate_page(0x1234000, INSTR, Permissions::Executable), 53 | alloc.allocate_page( 54 | 0x8fe0, 55 | &[ 56 | 0x13, 0x23, 0x33, 0x43, 0x53, 0x63, 0x73, 0x83, 0x14, 0x24, 0x34, 0x44, 0x54, 0x64, 0x74, 0x84, 0x15, 57 | 0x25, 0x35, 0x45, 0x55, 0x65, 0x75, 0x85, 0x16, 0x26, 0x36, 0x46, 0x56, 0x66, 0x76, 0x86, 58 | ], 59 | Permissions::Read, 60 | ), 61 | alloc.allocate_page(0x4fe0, &[0x66; 32], Permissions::ReadWrite), 62 | ] 63 | .into_iter(), 64 | ); 65 | 66 | extended_regs.set_legacy(XsaveLegacyArea { 67 | xmm: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, u128::MAX], 68 | ..Default::default() 69 | }); 70 | } 71 | 72 | #[inline(always)] 73 | fn result( 74 | self, _: CpuFeatures, gpregs: &GpRegs, _debug_regs: &DebugRegs, extended_regs: &ExtendedRegs, memory: ResultMemoryAccess, 75 | ) { 76 | println!("GPREGS: {gpregs:#08X?}"); 77 | let xmm = &extended_regs.legacy_area().xmm; 78 | println!("XMM: {xmm:#034X?}"); 79 | 80 | println!("{}", self.avx_offset); 81 | let ymm = unsafe { extended_regs.component::(self.avx_offset) }; 82 | println!("YMM: {ymm:#034X?}"); 83 | 84 | for (index, entry) in memory.iter().enumerate() { 85 | let data = &entry[4096 - 32..]; 86 | println!("Mem{index} = {data:02X?}"); 87 | } 88 | 89 | assert_eq!(gpregs.exception_id, 0x01); // TRAP exception 90 | } 91 | } 92 | 93 | fn main() { 94 | let mut vm = Vm::start(1).unwrap(); 95 | let mut observer = vm.first_observer_only(); 96 | let layout = observer.layout(); 97 | 98 | observer 99 | .batch_iter( 100 | [TestRequest { 101 | avx_offset: layout.avx256.offset as usize, 102 | }] 103 | .into_iter(), 104 | ) 105 | .for_each(drop); 106 | } 107 | -------------------------------------------------------------------------------- /arch/x64/liblisa-x64-observer/image/.gitignore: -------------------------------------------------------------------------------- 1 | *.img -------------------------------------------------------------------------------- /cli/liblisa-libcli/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-libcli" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | 11 | [dependencies] 12 | liblisa = { version = "0.1.0", path = "../../liblisa" } 13 | liblisa-enc = { version = "0.1.0", path = "../../liblisa-enc" } 14 | liblisa-synth = { version = "0.1.0", path = "../../liblisa-synth" } 15 | serde = { version = "1.0", features = [ "derive" ] } 16 | serde_json = "1.0" 17 | clap = { version = "4.4.8", features = ["derive"] } 18 | itertools = "0.12.0" 19 | rand = "0.8" 20 | rand_xoshiro = "0.6" 21 | colored = "2" 22 | rayon = "1.5" 23 | hex = "0.4" 24 | log = "0.4" 25 | nix = { version = "0.27.1", features = ["process", "ptrace", "signal", "sched"] } 26 | rmp-serde = "1.1.1" 27 | tempfile = "3.5.0" 28 | thiserror = "1" 29 | 30 | [dev-dependencies] 31 | test-log = "0.2.10" 32 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/detect_changes.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use liblisa::arch::Arch; 4 | use liblisa::encoding::dataflows::{Dataflows, MemoryAccesses}; 5 | use liblisa::instr::Instruction; 6 | use liblisa::oracle::Oracle; 7 | use liblisa::state::random::StateGen; 8 | use liblisa_enc::cache::CombinedCache; 9 | use liblisa_enc::{ChangeAnalysis, DataflowAnalysis, MemoryAccessAnalysis, ThresholdValues}; 10 | use rand::{Rng, SeedableRng}; 11 | use rand_xoshiro::Xoshiro256PlusPlus; 12 | 13 | use crate::SimpleCommand; 14 | 15 | #[derive(Clone, Debug, clap::Parser)] 16 | /// Detect changes between two individual instructions. 17 | pub struct DetectChangesCommand { 18 | /// The left-hand side instruction 19 | lhs: Instruction, 20 | 21 | /// The right-hand side instruction 22 | rhs: Instruction, 23 | 24 | #[clap(skip)] 25 | _phantom: PhantomData, 26 | } 27 | 28 | impl SimpleCommand for DetectChangesCommand { 29 | type Setup = (MemoryAccesses, Dataflows); 30 | 31 | fn setup(&self, oracle: &mut impl Oracle) -> Self::Setup { 32 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 33 | let instr = self.lhs; 34 | let accesses = MemoryAccessAnalysis::infer::(oracle, &instr).unwrap(); 35 | println!("Accesses ({}): {:#?}", accesses.len(), accesses); 36 | 37 | let dataflows = DataflowAnalysis::infer(&mut rng, oracle, &accesses).unwrap(); 38 | println!("Dataflows: {dataflows:#?}"); 39 | 40 | (accesses, dataflows) 41 | } 42 | 43 | fn run(&self, oracle: &mut impl Oracle, (accesses, dataflows): &mut Self::Setup) { 44 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 45 | let cache = CombinedCache::default(); 46 | let mappable = Oracle::::mappable_area(oracle); 47 | let state_gen = StateGen::new(accesses, &mappable).unwrap(); 48 | let threshold_values = ThresholdValues::infer(oracle, &mut rng, &state_gen, dataflows); 49 | let change = ChangeAnalysis { 50 | cache: &cache, 51 | dataflows, 52 | state_gen: StateGen::new(&dataflows.addresses, &mappable).unwrap(), 53 | o: oracle, 54 | use_trap_flag: &mut false, 55 | threshold_values: &threshold_values, 56 | found_dependent_bytes: &mut false, 57 | } 58 | .detect_change(&mut rng, &self.rhs) 59 | .unwrap(); 60 | 61 | println!("Change: {change:?}"); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/infer_accesses.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use liblisa::arch::Arch; 4 | use liblisa::encoding::dataflows::MemoryAccesses; 5 | use liblisa::instr::Instruction; 6 | use liblisa::oracle::Oracle; 7 | use liblisa_enc::MemoryAccessAnalysis; 8 | 9 | use crate::SimpleCommand; 10 | 11 | #[derive(Clone, Debug, clap::Parser)] 12 | /// Infer memory accesses for a single instruction 13 | pub struct InferAccessesCommand { 14 | /// The instruction to analyze. 15 | instr: Instruction, 16 | 17 | #[clap(skip)] 18 | _phantom: PhantomData, 19 | } 20 | 21 | impl SimpleCommand for InferAccessesCommand { 22 | type Setup = Option>; 23 | 24 | fn setup(&self, _oracle: &mut impl Oracle) -> Self::Setup { 25 | None 26 | } 27 | 28 | fn run(&self, oracle: &mut impl Oracle, prev: &mut Self::Setup) { 29 | let instr = self.instr; 30 | let accesses = MemoryAccessAnalysis::infer::(oracle, &instr); 31 | if accesses.is_err() || prev.as_ref() != accesses.as_ref().ok() { 32 | println!( 33 | "Accesses ({}): {accesses:#?}", 34 | accesses.as_ref().map(|m| m.len()).unwrap_or(0) 35 | ); 36 | } else { 37 | println!(" ( Identical )"); 38 | } 39 | 40 | *prev = accesses.ok(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/infer_dataflows.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use liblisa::arch::Arch; 4 | use liblisa::encoding::dataflows::{Dataflows, MemoryAccesses}; 5 | use liblisa::instr::Instruction; 6 | use liblisa::oracle::Oracle; 7 | use liblisa_enc::{DataflowAnalysis, MemoryAccessAnalysis}; 8 | use rand::{Rng, SeedableRng}; 9 | use rand_xoshiro::Xoshiro256PlusPlus; 10 | 11 | use crate::SimpleCommand; 12 | 13 | #[derive(Clone, Debug, clap::Parser)] 14 | /// Infer dataflows for a single instruction 15 | pub struct InferDataflowsCommand { 16 | /// The instruction to analyze. 17 | instr: Instruction, 18 | 19 | #[clap(skip)] 20 | _phantom: PhantomData, 21 | } 22 | 23 | impl SimpleCommand for InferDataflowsCommand { 24 | type Setup = (MemoryAccesses, Option>); 25 | 26 | fn setup(&self, oracle: &mut impl Oracle) -> Self::Setup { 27 | let instr = self.instr; 28 | 29 | let accesses = MemoryAccessAnalysis::infer::(oracle, &instr).unwrap(); 30 | println!("Accesses ({}): {:#?}", accesses.len(), accesses); 31 | 32 | (accesses, None) 33 | } 34 | 35 | fn run(&self, oracle: &mut impl Oracle, (accesses, prev): &mut Self::Setup) { 36 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 37 | 38 | let dataflows = DataflowAnalysis::infer(&mut rng, oracle, accesses); 39 | if dataflows.is_err() || prev.as_ref() != dataflows.as_ref().ok() { 40 | println!("Dataflows: {dataflows:#?}"); 41 | } else { 42 | println!(" ( Identical )"); 43 | } 44 | 45 | *prev = dataflows.ok(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/infer_encoding.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use liblisa::arch::Arch; 4 | use liblisa::instr::Instruction; 5 | use liblisa::oracle::Oracle; 6 | use liblisa_enc::infer_encoding; 7 | 8 | use crate::SimpleCommand; 9 | 10 | #[derive(Clone, Debug, clap::Parser)] 11 | /// Infer an encoding for a single instruction 12 | pub struct InferEncodingCommand { 13 | /// The instruction to analyze. 14 | instr: Instruction, 15 | 16 | #[clap(long)] 17 | /// When enabled, the JSON representation of the encoding is printed to stdout. 18 | print_json: bool, 19 | 20 | #[clap(skip)] 21 | _phantom: PhantomData, 22 | } 23 | 24 | impl SimpleCommand for InferEncodingCommand { 25 | type Setup = (); 26 | 27 | fn setup(&self, _oracle: &mut impl Oracle) -> Self::Setup {} 28 | 29 | fn run(&self, oracle: &mut impl Oracle, _: &mut Self::Setup) { 30 | let instr = self.instr; 31 | let mut encoding = infer_encoding::(&instr, oracle).unwrap(); 32 | println!("Encoding: {encoding}"); 33 | 34 | if self.print_json { 35 | let json = serde_json::to_string(&encoding).unwrap(); 36 | println!("Json = {json}"); 37 | } 38 | 39 | encoding.split_flag_output(); 40 | println!("Split flags encoding: {encoding}"); 41 | 42 | println!("Filters: {:?}", encoding.filters()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/infer_validity.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use liblisa::arch::Arch; 4 | use liblisa::instr::Instruction; 5 | use liblisa::oracle::Oracle; 6 | use liblisa_enc::Validity; 7 | 8 | use crate::SimpleCommand; 9 | 10 | #[derive(Clone, Debug, clap::Parser)] 11 | /// Check the validity of a single instruction 12 | pub struct InferValidityCommand { 13 | /// The instruction to analyze. 14 | instr: Instruction, 15 | 16 | #[clap(skip)] 17 | _phantom: PhantomData, 18 | } 19 | 20 | impl SimpleCommand for InferValidityCommand { 21 | type Setup = (); 22 | 23 | fn setup(&self, _oracle: &mut impl Oracle) -> Self::Setup {} 24 | 25 | fn run(&self, oracle: &mut impl Oracle, _: &mut Self::Setup) { 26 | let validity = Validity::infer::(oracle, &self.instr); 27 | println!("Validity: {validity:#?}"); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/observe.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use liblisa::arch::Arch; 4 | use liblisa::instr::Instruction; 5 | use liblisa::oracle::Oracle; 6 | use liblisa::state::SystemState; 7 | 8 | use crate::{SimpleCommand, StateSpecArgs}; 9 | 10 | #[derive(Clone, Debug, clap::Parser)] 11 | /// Observe the CPU state after execution of an instruction 12 | pub struct ObserveCommand { 13 | /// The instruction to analyze. 14 | instr: Instruction, 15 | 16 | #[clap(flatten)] 17 | state_spec: StateSpecArgs, 18 | 19 | #[clap(long = "careful")] 20 | /// When enabled, observation will call observe_carefully(..) instead of observe(..). 21 | careful: bool, 22 | 23 | #[clap(skip)] 24 | _phantom: PhantomData, 25 | } 26 | 27 | impl SimpleCommand for ObserveCommand { 28 | type Setup = (); 29 | 30 | fn setup(&self, _oracle: &mut impl Oracle) -> Self::Setup {} 31 | 32 | fn run(&self, oracle: &mut impl Oracle, _: &mut Self::Setup) { 33 | println!("Mappable area: {:X?}", oracle.mappable_area()); 34 | let default_pc = oracle.random_mappable_page(&mut rand::thread_rng()).start_addr(); 35 | let state: SystemState = self.state_spec.create_state(self.instr, default_pc.as_u64()); 36 | 37 | println!("Input {state:X?}"); 38 | 39 | let output = if self.careful { 40 | oracle.observe_carefully(&state) 41 | } else { 42 | oracle.observe(&state) 43 | }; 44 | println!("Output {output:X?}"); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/quick_enumerate.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use std::marker::PhantomData; 3 | 4 | use liblisa::arch::{Arch, FullScope}; 5 | use liblisa::instr::{FilterList, Instruction}; 6 | use liblisa::oracle::Oracle; 7 | use liblisa_enc::cache::CombinedCache; 8 | 9 | use crate::threadpool::enumeration::{AnalysisRequest, EnumWorkItem, EnumerationArtifactData}; 10 | use crate::SimpleCommand; 11 | 12 | #[derive(Clone, Debug, clap::Parser)] 13 | /// Run enumeration between two specified instructions, without saving results 14 | pub struct QuickEnumerateCommand { 15 | /// The starting point of the enumeration. 16 | instr: Instruction, 17 | 18 | #[clap(long)] 19 | /// An optional upper bound for the enumeration. 20 | to: Option, 21 | 22 | #[clap(skip)] 23 | _phantom: PhantomData, 24 | } 25 | 26 | impl SimpleCommand for QuickEnumerateCommand { 27 | type Setup = (); 28 | 29 | fn setup(&self, _oracle: &mut impl Oracle) -> Self::Setup {} 30 | 31 | fn run(&self, oracle: &mut impl Oracle, _: &mut Self::Setup) { 32 | let mut worker = EnumWorkItem::new(&self.instr, self.to); 33 | 34 | let cache = CombinedCache::::default(); 35 | let mut filters = FilterList::new(); 36 | let mut perfect_instrs_seen = HashSet::new(); 37 | while let Some(counter) = worker.next_instruction() { 38 | let instr = counter.current(); 39 | let request = AnalysisRequest::new(0, counter, filters.next_matching_instruction(&instr), FullScope); 40 | println!("Next: ({instr:X}) {request:?}"); 41 | let result = request.run(0, oracle, &cache); 42 | for filter in result.filters() { 43 | filters.add(filter); 44 | } 45 | 46 | if let Some(artifact) = worker.complete(instr, &filters, &mut perfect_instrs_seen, result) { 47 | match artifact { 48 | EnumerationArtifactData::Encoding(encoding) => { 49 | println!("{encoding}"); 50 | 51 | let json = serde_json::to_string(&encoding).unwrap(); 52 | println!("Json = {json}"); 53 | }, 54 | EnumerationArtifactData::Failed(instr) => println!("FAILED: {instr:?}"), 55 | EnumerationArtifactData::Excluded(instr) => println!("Excluded: {instr:?}"), 56 | EnumerationArtifactData::InvalidInstructions(range) => { 57 | println!("Invalid instructions between {:X} and {:X}", range.start, range.end) 58 | }, 59 | EnumerationArtifactData::MemoryErrorInstructions(range) => { 60 | println!("Bad memory accesses between {:X} and {:X}", range.start, range.end) 61 | }, 62 | } 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/threadpool/cpu.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::path::Path; 3 | 4 | use nix::sched::CpuSet; 5 | use nix::unistd::Pid; 6 | 7 | #[derive(Debug)] 8 | pub struct Cache { 9 | id: u32, 10 | level: u32, 11 | shared_with: CpuSet, 12 | } 13 | 14 | #[derive(Debug)] 15 | pub struct CpuCaches { 16 | caches: Vec, 17 | } 18 | 19 | #[derive(Debug)] 20 | pub struct CpuTopology { 21 | core_id: u32, 22 | // core_cpus: CpuSet, 23 | } 24 | 25 | #[derive(Debug)] 26 | pub struct CpuInfo { 27 | id: u32, 28 | caches: CpuCaches, 29 | topology: CpuTopology, 30 | } 31 | 32 | impl CpuInfo { 33 | pub fn all() -> Result, Box> { 34 | let mut result = Vec::new(); 35 | let path = "/sys/devices/system/cpu/"; 36 | for dir in std::fs::read_dir(path)? { 37 | let entry = dir?; 38 | if entry.file_type()?.is_dir() 39 | && entry.file_name().to_str().map(|v| v.starts_with("cpu")).unwrap_or(false) 40 | && entry 41 | .file_name() 42 | .to_str() 43 | .and_then(|s| s.strip_prefix("cpu").map(|s| s.trim().chars().all(char::is_numeric))) 44 | .unwrap_or(false) 45 | { 46 | result.push(Self::from_path(entry.path())?); 47 | } 48 | } 49 | 50 | Ok(result) 51 | } 52 | 53 | pub fn from_path>(path: P) -> Result> { 54 | let path = path.as_ref(); 55 | let id = path.file_name().unwrap().to_str().unwrap(); 56 | let id = id.strip_prefix("cpu").unwrap().parse::()?; 57 | 58 | Ok(CpuInfo { 59 | id, 60 | caches: CpuCaches::from_path(path.join("cache"))?, 61 | topology: CpuTopology::from_path(path.join("topology"))?, 62 | }) 63 | } 64 | 65 | pub fn id(&self) -> u32 { 66 | self.id 67 | } 68 | 69 | pub fn caches(&self) -> &CpuCaches { 70 | &self.caches 71 | } 72 | 73 | pub fn topology(&self) -> &CpuTopology { 74 | &self.topology 75 | } 76 | } 77 | 78 | fn hex_str_to_cpuset(s: &str) -> Result> { 79 | let mut set = CpuSet::new(); 80 | for (index, c) in s.trim().chars().rev().filter(|&c| c != ',').enumerate() { 81 | let n = u32::from_str_radix(&c.to_string(), 16)?; 82 | for offset in 0..4 { 83 | if (n >> offset) & 1 != 0 { 84 | set.set(index * 4 + offset)?; 85 | } 86 | } 87 | } 88 | 89 | Ok(set) 90 | } 91 | 92 | impl CpuCaches { 93 | pub fn from_path>(path: P) -> Result> { 94 | let mut caches = Vec::new(); 95 | for dir in std::fs::read_dir(&path)? { 96 | let entry = dir?; 97 | if entry.file_type()?.is_dir() && entry.file_name().to_str().unwrap().starts_with("index") { 98 | // TODO: Id might not be available 99 | let id = fs::read_to_string(entry.path().join("id")) 100 | .unwrap_or(String::from("0")) 101 | .trim() 102 | .parse::()?; 103 | let level = fs::read_to_string(entry.path().join("level"))?.trim().parse::()?; 104 | let shared_cpu_map = fs::read_to_string(entry.path().join("shared_cpu_map"))?; 105 | let shared_with = hex_str_to_cpuset(&shared_cpu_map)?; 106 | 107 | caches.push(Cache { 108 | id, 109 | level, 110 | shared_with, 111 | }); 112 | } 113 | } 114 | 115 | Ok(CpuCaches { 116 | caches, 117 | }) 118 | } 119 | 120 | pub fn caches(&self) -> impl Iterator { 121 | self.caches.iter() 122 | } 123 | } 124 | 125 | impl Cache { 126 | pub fn id(&self) -> u32 { 127 | self.id 128 | } 129 | 130 | pub fn restrict_current_thread_affinity_to_shared_caches(&self) -> nix::Result<()> { 131 | nix::sched::sched_setaffinity(Pid::from_raw(0), &self.shared_with) 132 | } 133 | 134 | pub fn level(&self) -> u32 { 135 | self.level 136 | } 137 | 138 | pub fn shared_with(&self) -> CpuSet { 139 | self.shared_with 140 | } 141 | } 142 | 143 | impl CpuTopology { 144 | pub fn from_path>(path: P) -> Result> { 145 | let path = path.as_ref(); 146 | let core_id = fs::read_to_string(path.join("core_id"))?.trim().parse()?; 147 | // let core_cpus = hex_str_to_cpuset(&fs::read_to_string(path.join("core_cpus"))?)?; 148 | 149 | Ok(CpuTopology { 150 | core_id, 151 | // core_cpus, 152 | }) 153 | } 154 | 155 | pub fn core_id(&self) -> u32 { 156 | self.core_id 157 | } 158 | 159 | // pub fn core_cpus(&self) -> CpuSet { 160 | // self.core_cpus 161 | // } 162 | } 163 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/threadpool/oracle.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::sync::atomic::{AtomicU64, Ordering}; 3 | 4 | use liblisa::arch::Arch; 5 | use liblisa::oracle::{Observation, Oracle, OracleError, OracleSource}; 6 | use liblisa::state::{Addr, AsSystemState, SystemState}; 7 | 8 | pub struct PooledOracle { 9 | source_id: u64, 10 | oracle: O, 11 | } 12 | 13 | impl> Oracle for PooledOracle { 14 | type MappableArea = >::MappableArea; 15 | 16 | fn mappable_area(&self) -> Self::MappableArea { 17 | self.oracle.mappable_area() 18 | } 19 | 20 | fn page_size(&mut self) -> u64 { 21 | self.oracle.page_size() 22 | } 23 | 24 | fn observe(&mut self, before: &SystemState) -> Result, OracleError> { 25 | self.oracle.observe(before) 26 | } 27 | 28 | fn scan_memory_accesses(&mut self, before: &SystemState) -> Result, OracleError> { 29 | self.oracle.scan_memory_accesses(before) 30 | } 31 | 32 | fn debug_dump(&mut self) { 33 | self.oracle.debug_dump() 34 | } 35 | 36 | fn restart(&mut self) { 37 | self.oracle.restart() 38 | } 39 | 40 | fn kill(self) { 41 | self.oracle.kill() 42 | } 43 | 44 | fn observe_carefully(&mut self, before: &SystemState) -> Result, OracleError> { 45 | self.oracle.observe_carefully(before) 46 | } 47 | 48 | fn batch_observe_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 49 | &'a mut self, states: I, 50 | ) -> impl Iterator> { 51 | self.oracle.batch_observe_iter(states) 52 | } 53 | 54 | fn batch_observe_gpreg_only_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 55 | &'a mut self, states: I, 56 | ) -> impl Iterator> { 57 | self.oracle.batch_observe_gpreg_only_iter(states) 58 | } 59 | 60 | const UNRELIABLE_INSTRUCTION_FETCH_ERRORS: bool = O::UNRELIABLE_INSTRUCTION_FETCH_ERRORS; 61 | } 62 | 63 | static NEXT_ID: AtomicU64 = AtomicU64::new(0); 64 | 65 | pub struct OracleGroup { 66 | num_oracles: usize, 67 | oracles: Vec, 68 | } 69 | 70 | pub struct OraclePool { 71 | source: S, 72 | idle: HashMap>, 73 | } 74 | 75 | impl OraclePool { 76 | pub fn new(source: S) -> Self { 77 | Self { 78 | source, 79 | idle: HashMap::new(), 80 | } 81 | } 82 | 83 | pub fn get(&mut self) -> PooledOracle { 84 | // Try to find the best available oracle 85 | let mut choices = self.idle.iter_mut().collect::>(); 86 | choices.sort_by_key(|(_, entry)| entry.oracles.len()); 87 | for (&key, entry) in choices { 88 | if let Some(oracle) = entry.oracles.pop() { 89 | println!("Returning observer from VM {key}"); 90 | return PooledOracle { 91 | source_id: key, 92 | oracle, 93 | } 94 | } 95 | } 96 | 97 | // Create a new oracle 98 | // SAFETY: This ID must be globally unique to ensure that oracles cannot be returned to the wrong VM. 99 | let key = NEXT_ID.fetch_add(1, Ordering::SeqCst); 100 | 101 | println!("Spawning new VM {key}"); 102 | let mut oracles = self.source.start(); 103 | let num_oracles = oracles.len(); 104 | let oracle = oracles.pop().unwrap(); 105 | 106 | self.idle.insert( 107 | key, 108 | OracleGroup { 109 | num_oracles, 110 | oracles, 111 | }, 112 | ); 113 | 114 | PooledOracle { 115 | source_id: key, 116 | oracle, 117 | } 118 | } 119 | 120 | pub fn release(&mut self, oracle: PooledOracle) { 121 | let key = oracle.source_id; 122 | 123 | println!("Returning observer to {key}"); 124 | if let Some(entry) = self.idle.get_mut(&key) { 125 | println!("Pushing on list of {} observers", entry.oracles.len()); 126 | entry.oracles.push(oracle.oracle); 127 | 128 | // If all oracles are idle, we can kill the process. 129 | if entry.oracles.len() >= entry.num_oracles { 130 | println!("Killing VM {key}"); 131 | self.idle.remove(&key).unwrap(); 132 | } 133 | } else { 134 | panic!("You released an oracle that does not belong to this pool."); 135 | } 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /cli/liblisa-libcli/src/threadpool/work.rs: -------------------------------------------------------------------------------- 1 | use liblisa::arch::Arch; 2 | use liblisa::oracle::Oracle; 3 | 4 | pub trait Work { 5 | type RuntimeData; 6 | type Request; 7 | type Result; 8 | type Artifact; 9 | 10 | fn next(&mut self, data: &mut Self::RuntimeData) -> Option; 11 | fn complete(&mut self, data: &mut Self::RuntimeData, request: Self::Request, result: Self::Result) -> Option; 12 | 13 | fn run>(oracle: &mut O, cache: &C, request: &Self::Request) -> Self::Result; 14 | } 15 | -------------------------------------------------------------------------------- /cli/liblisa-semantics-tool/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-semantics-tool" 3 | version = "0.1.1" 4 | edition = "2021" 5 | description = "A command-line tool that can query and manipulate libLISA's semantics." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | readme = "README.md" 11 | 12 | [features] 13 | default = ["nolog"] 14 | nolog = ["log/release_max_level_off"] 15 | 16 | [dependencies] 17 | liblisa = { version = "0.1.0", path = "../../liblisa", features = ["schemars", "z3"] } 18 | serde_json = "1.0" 19 | clap = { version = "4.4.8", features = ["derive"] } 20 | schemars = "0.8.16" 21 | log = "0.4" 22 | env_logger = "0.10" 23 | serde = { version = "1.0", features = [ "derive" ] } 24 | itertools = "0.12.0" 25 | rand = "0.8" 26 | rayon = "1.5" 27 | bincode = "1.3.3" 28 | 29 | [dev-dependencies] 30 | test-log = "0.2.10" 31 | -------------------------------------------------------------------------------- /cli/liblisa-semantics-tool/README.md: -------------------------------------------------------------------------------- 1 | # The `liblisa-semantics-tool` 2 | The `liblisa-semantics-tool` is a command-line program that can be used to query libLISA's semantics. 3 | It requires the raw semantics that can be downloaded from [here](https://osf.io/2hfq9/?view_only=a9fb6f0d639b46a287b0ade9f293b249). 4 | 5 | ## Installation 6 | Through [`cargo`](https://rustup.rs): 7 | ``` 8 | rustup toolchain install nightly # if you don't have it 9 | cargo +nightly install liblisa-semantics-tool 10 | ``` 11 | 12 | ## Usage 13 | Run the tool with the `--help` flag to see all possible commands. 14 | 15 | ### Semantics server 16 | One of the main features of the `liblisa-semantics-tool` is the *semantics server*. 17 | This allows semantics to be queried programmatically via stdin and stdout. 18 | The semantics server *instantiates* semantics: it fills in the correct registers, flags and immediate values from parts in the encoding. 19 | If you were using the raw semantics directly, you would have to implement this instantiation yourself. 20 | 21 | You can start the server with: 22 | 23 | ```bash 24 | liblisa-semantics-tool server amd-3900x.json 25 | ``` 26 | 27 | It may take a while to build the lookup table. 28 | This process can be cached by passing the `--cache lookup-table.cache` flag. 29 | However, note that no verification is performed to ensure that the loaded cache matches the original semantics. 30 | 31 | In order to query the semantics, provide an instruction in hexadecimal form followed by a newline to stdin. 32 | A JSON representation of the semantics will be printed to stdout. 33 | 34 | The `--debug` flag enables printing of debugging information to `stderr`. 35 | The JSON schema will be printed to `stderr` when the binary starts. 36 | A human-readable representation of the queried semantics is also printed to `stderr`. 37 | 38 | ### Other examples 39 | Obtaining the JSON schema of the raw semantics: 40 | ```bash 41 | liblisa-semantics-tool schema 42 | ``` 43 | 44 | General statistics of the semantics: 45 | ```bash 46 | liblisa-semantics-tool get amd-3900x.json stats 47 | ``` 48 | 49 | Print a single encoding: 50 | ```bash 51 | liblisa-semantics-tool get amd-3900x.json encoding 00d3 52 | ``` 53 | 54 | Print *all* encodings (Note: this prints over a million lines of text): 55 | ```bash 56 | liblisa-semantics-tool get amd-3900x.json full-encodings 57 | ``` 58 | 59 | Print only the bitpatterns of all encodings: 60 | ```bash 61 | liblisa-semantics-tool get amd-3900x.json bitpatterns 62 | ``` 63 | 64 | 65 | ## Building 66 | Clone the repository and run: 67 | 68 | ``` 69 | cargo build --release --bin liblisa-semantics-tool 70 | ``` 71 | -------------------------------------------------------------------------------- /cli/liblisa-semantics-tool/src/progress.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Display}; 2 | use std::io::Write; 3 | use std::ops::Deref; 4 | use std::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering}; 5 | use std::time::Duration; 6 | 7 | pub use crate::progress_data; 8 | 9 | #[macro_export] 10 | macro_rules! progress_data { 11 | ($name:ident $(<$( $lt:lifetime ),+>)? { $($field:ident: $ty:ty = $e:expr),* $(,)* }, $display:expr) => { 12 | { 13 | struct $name $(<$($lt),+>)? { 14 | $($field: $ty),* 15 | } 16 | 17 | impl $(<$($lt),+>)? std::fmt::Display for $name $(<$($lt),+>)? { 18 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 19 | fn force_type(f: impl FnOnce(&mut std::fmt::Formatter<'_>, &$name) -> std::fmt::Result) 20 | -> impl FnOnce(&mut std::fmt::Formatter<'_>, &$name) -> std::fmt::Result { f } 21 | let display = force_type($display); 22 | 23 | display(f, self) 24 | } 25 | } 26 | 27 | #[allow(clippy::redundant_field_names)] 28 | $name { 29 | $($field: $e.into()),* 30 | } 31 | } 32 | } 33 | } 34 | 35 | pub struct ProgressUsize(AtomicUsize); 36 | 37 | impl Display for ProgressUsize { 38 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 39 | let val = self.0.load(Ordering::Relaxed); 40 | Display::fmt(&val, f) 41 | } 42 | } 43 | 44 | impl Debug for ProgressUsize { 45 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 46 | let val = self.0.load(Ordering::Relaxed); 47 | Debug::fmt(&val, f) 48 | } 49 | } 50 | 51 | impl From for ProgressUsize { 52 | fn from(value: usize) -> Self { 53 | Self::new(value) 54 | } 55 | } 56 | 57 | impl ProgressUsize { 58 | pub fn new(val: usize) -> Self { 59 | Self(AtomicUsize::new(val)) 60 | } 61 | 62 | pub fn increment(&self) { 63 | self.0.fetch_add(1, Ordering::Relaxed); 64 | } 65 | 66 | pub fn delayed_increment(&self) -> DelayedIncrement<'_> { 67 | DelayedIncrement(self) 68 | } 69 | } 70 | 71 | pub struct DelayedIncrement<'a>(&'a ProgressUsize); 72 | 73 | impl Drop for DelayedIncrement<'_> { 74 | fn drop(&mut self) { 75 | self.0.increment(); 76 | } 77 | } 78 | 79 | pub struct Progress { 80 | current: T, 81 | modified: AtomicU32, 82 | } 83 | 84 | impl Deref for Progress { 85 | type Target = T; 86 | 87 | fn deref(&self) -> &Self::Target { 88 | self.modified.fetch_add(1, Ordering::Relaxed); 89 | &self.current 90 | } 91 | } 92 | 93 | struct SetFalseIfDropped<'a>(&'a AtomicBool); 94 | 95 | impl Drop for SetFalseIfDropped<'_> { 96 | fn drop(&mut self) { 97 | self.0.store(false, Ordering::Relaxed); 98 | } 99 | } 100 | 101 | impl Progress { 102 | pub fn run(init: T, f: impl FnOnce(&Progress) -> K) -> K { 103 | let running = AtomicBool::new(true); 104 | let progress = Progress { 105 | current: init, 106 | modified: AtomicU32::new(0), 107 | }; 108 | let progress = &progress; 109 | 110 | let result = std::thread::scope(|scope: &std::thread::Scope<'_, '_>| { 111 | scope.spawn(|| { 112 | let mut last_printed_tick = 0; 113 | while running.load(Ordering::Relaxed) { 114 | let tick = progress.modified.load(Ordering::Relaxed); 115 | if tick != last_printed_tick { 116 | last_printed_tick = tick; 117 | eprint!("\r{}", progress.current); 118 | std::io::stderr().flush().unwrap(); 119 | } 120 | 121 | std::thread::sleep(Duration::from_millis(15)); 122 | } 123 | }); 124 | 125 | let run = || { 126 | // We need to use a struct here that sets running to false when it is dropped at the end of this scope. 127 | // f might panic, so we can't be sure that any code we execute after f terminates is ever executed. 128 | // Panicking will still try to drop the struct, guaranteeing that the progress printing thread in our scope actually terminates. 129 | let _guard = SetFalseIfDropped(&running); 130 | f(progress) 131 | }; 132 | 133 | run() 134 | }); 135 | 136 | eprintln!("\r{}", progress.current); 137 | 138 | result 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /cli/liblisa-x64/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-x64" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | 11 | [features] 12 | default = ["nolog"] 13 | nolog = ["log/release_max_level_off"] 14 | 15 | [dependencies] 16 | env_logger = "0.10" 17 | liblisa-libcli = { version = "0.1.0", path = "../../cli/liblisa-libcli" } 18 | liblisa = { version = "0.1.0", path = "../../liblisa" } 19 | liblisa-x64-observer = { version = "0.1.0", path = "../../arch/x64/liblisa-x64-observer" } 20 | clap = { version = "4.4.8", features = ["derive"] } 21 | log = "0.4.0" 22 | 23 | [dev-dependencies] 24 | test-log = "0.2.10" 25 | -------------------------------------------------------------------------------- /cli/liblisa-x64/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use liblisa::arch::x64::{PrefixScope, X64Arch}; 3 | use liblisa_libcli::CliCommand; 4 | use liblisa_x64_observer::VmOracleSource; 5 | use log::trace; 6 | 7 | pub fn main() { 8 | env_logger::init(); 9 | 10 | let args = CliCommand::::parse(); 11 | trace!("Args: {args:#?}"); 12 | 13 | args.run(|affinity| VmOracleSource::new(Some(affinity), 2), PrefixScope); 14 | } 15 | -------------------------------------------------------------------------------- /crates.io_version_check.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # Usage: crates.io_version_check.sh liblisa || cargo publish ... 4 | 5 | PROJECT=$1 6 | LOCAL_VERSION=$(cargo tree -p $PROJECT | head -n 1 | cut -d ' ' -f2 | tail -c +2) 7 | 8 | echo "Checking version $LOCAL_VERSION" 9 | 10 | if curl https://crates.io/api/v1/crates/$PROJECT | jq --arg local_version "$LOCAL_VERSION" '.versions | any(.num == $local_version)' | grep true; then 11 | exit 0 12 | else 13 | exit 1 14 | fi 15 | -------------------------------------------------------------------------------- /crossbuild/arm64/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main 2 | 3 | RUN dpkg --add-architecture arm64 && \ 4 | apt-get update && \ 5 | apt-get install --assume-yes 'libclang-*-dev' libz3-dev libz3-dev:arm64 clang python3 -------------------------------------------------------------------------------- /crossbuild/x64-stretch/.gitignore: -------------------------------------------------------------------------------- 1 | out/* -------------------------------------------------------------------------------- /crossbuild/x64-stretch/build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | docker build -t liblisa-crossbuild crossbuild/x64-stretch/docker && \ 4 | docker run --mount src=`pwd`,target=/files,type=bind \ 5 | --mount src=/usr/include,target=/usr/include,type=bind \ 6 | --mount src=`readlink -f ./crossbuild/x64-stretch/out`,target=/out,type=bind liblisa- -------------------------------------------------------------------------------- /crossbuild/x64-stretch/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:stretch 2 | 3 | RUN echo deb http://archive.debian.org/debian stretch-backports main | tee -a /etc/apt/sources.list.d/stretch-backports.list 4 | RUN echo deb http://archive.debian.org/debian stretch-backports-sloppy main | tee -a /etc/apt/sources.list.d/stretch-backports-sloppy.list 5 | RUN echo "deb http://archive.debian.org/debian stretch main contrib non-free" > /etc/apt/sources.list 6 | RUN apt-get update && apt-get upgrade -y 7 | RUN apt-get update && apt-get install -y git curl clang build-essential python3 linux-headers-amd64 libc6-dev libclang-dev cmake pkg-config unicorn libclang-5.0 libclang-5.0-dev llvm-dev z3 libz3-dev 8 | RUN apt-get update && apt-get -t stretch-backports-sloppy install -y libarchive13 9 | RUN apt-get update && apt-get -t stretch-backports install -y cmake 10 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly-2024-03-06 11 | RUN $HOME/.cargo/bin/rustup component add rust-src --toolchain nightly-2024-03-06-x86_64-unknown-linux-gnu 12 | RUN $HOME/.cargo/bin/rustup component add llvm-tools-preview --toolchain nightly-2024-03-06-x86_64-unknown-linux-gnu 13 | 14 | RUN apt-get update && apt-get install -y git g++ 15 | 16 | ENV CARGO_HOME /out/.cargo 17 | 18 | CMD ["/bin/bash", "-c", "source $HOME/.cargo/env && cd /files && source ~/.bashrc && cargo build --release --bin liblisa-x64 --target-dir /out"] -------------------------------------------------------------------------------- /github_artifact_upload.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | if [ -z "$RELEASE_ID" ]; then 4 | RELEASE_ID=$(jq --raw-output '.release.id' "$GITHUB_EVENT_PATH") 5 | echo "Discovered RELEASE_ID: $RELEASE_ID" 6 | fi 7 | 8 | if ! curl \ 9 | -v \ 10 | -sSL \ 11 | -XPOST \ 12 | -H "Authorization: token $GITHUB_TOKEN" \ 13 | -H "Content-Type: application/octet-stream" \ 14 | --upload-file "$FILE" \ 15 | --fail \ 16 | "https://uploads.github.com/repos/$GITHUB_REPOSITORY/releases/$RELEASE_ID/assets?name=$FILENAME"; 17 | then 18 | exit 1 19 | fi -------------------------------------------------------------------------------- /liblisa-enc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-enc" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | 11 | [[bench]] 12 | name = "validity" 13 | harness = false 14 | 15 | [[bench]] 16 | name = "accesses" 17 | harness = false 18 | 19 | [[bench]] 20 | name = "dataflow" 21 | harness = false 22 | 23 | [[bench]] 24 | name = "observe" 25 | harness = false 26 | 27 | [[bench]] 28 | name = "skip" 29 | harness = false 30 | 31 | [dependencies] 32 | liblisa = { version = "0.1.0", path = "../liblisa" } 33 | thiserror = "1" 34 | serde = { version = "1.0", features = [ "derive" ] } 35 | rand = "0.8" 36 | rand_xoshiro = "0.6" 37 | itertools = "0.12.0" 38 | log = "0.4" 39 | arrayvec = "0.7" 40 | fxhash = "0.2.1" 41 | 42 | [dev-dependencies] 43 | criterion = "0.5" 44 | env_logger = "0.10" 45 | test-log = "0.2.10" 46 | liblisa-x64-observer = { version = "0.1.0", path = "../arch/x64/liblisa-x64-observer" } 47 | serde_json = "1.0" 48 | rmp-serde = "1.1.1" -------------------------------------------------------------------------------- /liblisa-enc/benches/accesses.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::arch::x64::X64Arch; 5 | use liblisa::instr::Instruction; 6 | use liblisa::oracle::Oracle; 7 | use liblisa::state::random::StateGen; 8 | use liblisa_enc::MemoryAccessAnalysis; 9 | use liblisa_x64_observer::with_oracle; 10 | use rand::{Rng, SeedableRng}; 11 | use rand_xoshiro::Xoshiro256PlusPlus; 12 | 13 | fn infer(c: &mut Criterion) { 14 | with_oracle(|mut o| { 15 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); 16 | c.bench_function("Accesses::::infer[XOR rax, rdx]", |b| { 17 | b.iter(|| black_box(MemoryAccessAnalysis::infer::(&mut o, &instr))) 18 | }); 19 | 20 | let instr = Instruction::new(&[0xFF, 0x74, 0xB8, 0x01]); 21 | c.bench_function( 22 | "Accesses::::infer[PUSH QWORD PTR [rax+rdi*4+0x1]]", 23 | |b| b.iter(|| black_box(MemoryAccessAnalysis::infer::(&mut o, &instr))), 24 | ); 25 | }); 26 | } 27 | 28 | fn randomize_new(c: &mut Criterion) { 29 | with_oracle(|mut o| { 30 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 31 | let mappable = Oracle::::mappable_area(&o); 32 | 33 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); 34 | let accesses = MemoryAccessAnalysis::infer::(&mut o, &instr).unwrap(); 35 | let state_gen = StateGen::new(&accesses, &mappable).unwrap(); 36 | c.bench_function("randomize_no_memory_accesses", |b| { 37 | b.iter(|| black_box(state_gen.randomize_new(&mut rng).unwrap())) 38 | }); 39 | 40 | let instr = Instruction::new(&[0xFF, 0x74, 0xB8, 0x01]); 41 | let accesses = MemoryAccessAnalysis::infer::(&mut o, &instr).unwrap(); 42 | let state_gen = StateGen::new(&accesses, &mappable).unwrap(); 43 | c.bench_function("randomize_double_memory_access", |b| { 44 | b.iter(|| black_box(state_gen.randomize_new(&mut rng).unwrap())) 45 | }); 46 | }); 47 | } 48 | 49 | criterion_group! { 50 | name = benches; 51 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 52 | targets = infer, randomize_new 53 | } 54 | criterion_main!(benches); 55 | -------------------------------------------------------------------------------- /liblisa-enc/benches/dataflow.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | use liblisa::arch::x64::X64Arch; 3 | use liblisa::instr::Instruction; 4 | use liblisa_enc::{DataflowAnalysis, MemoryAccessAnalysis}; 5 | use liblisa_x64_observer::with_oracle; 6 | use rand::{Rng, SeedableRng}; 7 | use rand_xoshiro::Xoshiro256PlusPlus; 8 | 9 | fn infer(c: &mut Criterion) { 10 | with_oracle(|mut o| { 11 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 12 | let instr = Instruction::new(&[0x30, 0xD0]); 13 | let memory_accesses = MemoryAccessAnalysis::infer::(&mut o, &instr).unwrap(); 14 | c.bench_function("Dataflow::::infer[XOR al, dl]", |b| { 15 | b.iter(|| black_box(DataflowAnalysis::infer(&mut rng, &mut o, &memory_accesses).unwrap())) 16 | }); 17 | 18 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); 19 | let memory_accesses = MemoryAccessAnalysis::infer::(&mut o, &instr).unwrap(); 20 | c.bench_function("Dataflow::::infer[XOR rax, rdx]", |b| { 21 | b.iter(|| black_box(DataflowAnalysis::infer(&mut rng, &mut o, &memory_accesses).unwrap())) 22 | }); 23 | 24 | let instr = Instruction::new(&[0xFF, 0x74, 0xB8, 0x01]); 25 | let memory_accesses = MemoryAccessAnalysis::infer::(&mut o, &instr).unwrap(); 26 | c.bench_function( 27 | "Dataflow::::infer[PUSH QWORD PTR [rax+rdi*4+0x1]]", 28 | |b| b.iter(|| black_box(DataflowAnalysis::infer(&mut rng, &mut o, &memory_accesses).unwrap())), 29 | ); 30 | }); 31 | } 32 | 33 | criterion_group!(benches, infer); 34 | criterion_main!(benches); 35 | -------------------------------------------------------------------------------- /liblisa-enc/benches/observe.rs: -------------------------------------------------------------------------------- 1 | use std::iter::repeat; 2 | use std::time::{Duration, Instant}; 3 | 4 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 5 | use liblisa::arch::x64::X64Arch; 6 | use liblisa::instr::Instruction; 7 | use liblisa::oracle::Oracle; 8 | use liblisa::state::random::random_state; 9 | use liblisa_x64_observer::with_oracle; 10 | use rand::{Rng, SeedableRng}; 11 | use rand_xoshiro::Xoshiro256PlusPlus; 12 | 13 | fn observe(c: &mut Criterion) { 14 | with_oracle(|mut o| { 15 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); 16 | let mappable = Oracle::::mappable_area(&o); 17 | 18 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 19 | let page = o.random_mappable_page(&mut rng); 20 | let pc = page.first_address_after_page() - instr.byte_len() as u64; 21 | let mut state = random_state::(&mut rng, &instr, &mappable, pc.as_u64()); 22 | state.use_trap_flag = false; 23 | 24 | c.bench_function("observe[XOR rax, rdx]", |b| { 25 | b.iter_custom(|num| { 26 | let requests = repeat(&state).take(num as usize); 27 | let start = Instant::now(); 28 | o.batch_observe_iter(requests).for_each(|(_, r)| drop(black_box(r.unwrap()))); 29 | start.elapsed() 30 | }) 31 | }); 32 | }); 33 | } 34 | 35 | criterion_group! { 36 | name = benches; 37 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 38 | targets = observe 39 | } 40 | criterion_main!(benches); 41 | -------------------------------------------------------------------------------- /liblisa-enc/benches/skip.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | use std::time::Duration; 3 | 4 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 5 | use liblisa::instr::Instruction; 6 | use liblisa_enc::random_instr_bytes; 7 | use rand::{Rng, SeedableRng}; 8 | use rand_xoshiro::Xoshiro256PlusPlus; 9 | 10 | fn randomize_bytes(c: &mut Criterion) { 11 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(rand::thread_rng().gen()); 12 | let start = Instruction::from_str("420F00B000000000").unwrap(); 13 | 14 | c.bench_function("randomize_bytes", |b| { 15 | b.iter(|| black_box(random_instr_bytes(&mut rng, start, None))) 16 | }); 17 | } 18 | 19 | criterion_group! { 20 | name = benches; 21 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 22 | targets = randomize_bytes 23 | } 24 | criterion_main!(benches); 25 | -------------------------------------------------------------------------------- /liblisa-enc/benches/validity.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::arch::x64::X64Arch; 5 | use liblisa::arch::FullScope; 6 | use liblisa::instr::Instruction; 7 | use liblisa_enc::Validity; 8 | use liblisa_x64_observer::with_oracle; 9 | 10 | fn infer(c: &mut Criterion) { 11 | with_oracle(|mut o| { 12 | let instr = Instruction::new(&[0x48, 0x31, 0xD0]); 13 | c.bench_function("Validity::::infer[XOR rax, rdx]", |b| { 14 | b.iter(|| black_box(Validity::infer::(&mut o, &instr))) 15 | }); 16 | 17 | let instr = Instruction::new(&[0xFF, 0x74, 0xB8, 0x01]); 18 | c.bench_function("Validity::::infer[PUSH QWORD PTR [rax+rdi*4+0x1]]", |b| { 19 | b.iter(|| black_box(Validity::infer::(&mut o, &instr))) 20 | }); 21 | 22 | let instrs = (0..=0xff) 23 | .map(|n| Instruction::new(&[0xFF, 0x74, 0xB8, n])) 24 | .collect::>(); 25 | c.bench_function("batch_infer-256-instrs", |b| { 26 | b.iter(|| black_box(Validity::infer_batch::(&mut o, &instrs, &FullScope)).for_each(drop)) 27 | }); 28 | 29 | let instrs = (0..0x1000) 30 | .map(|n| Instruction::new(&[0xFF, 0x74, (n >> 8) as u8, n as u8])) 31 | .collect::>(); 32 | c.bench_function("batch_infer-4096-instrs", |b| { 33 | b.iter(|| black_box(Validity::infer_batch::(&mut o, &instrs, &FullScope)).for_each(drop)) 34 | }); 35 | }); 36 | } 37 | 38 | criterion_group! { 39 | name = benches; 40 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 41 | targets = infer 42 | } 43 | criterion_main!(benches); 44 | -------------------------------------------------------------------------------- /liblisa-enc/src/changes/outputs.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /liblisa-enc/src/cleanup/accesses.rs: -------------------------------------------------------------------------------- 1 | use liblisa::arch::Arch; 2 | use liblisa::encoding::bitpattern::{FlowValueLocation, PartMapping}; 3 | use liblisa::encoding::dataflows::Dataflows; 4 | use liblisa::encoding::Encoding; 5 | use liblisa::oracle::Oracle; 6 | use log::info; 7 | 8 | use crate::cache::EncodingAnalysisCache; 9 | use crate::changes; 10 | use crate::changes::DataflowChange; 11 | 12 | pub fn remove_incorrect_memory_computations>( 13 | o: &mut O, cache: &impl EncodingAnalysisCache, encoding: &mut Encoding, 14 | ) { 15 | for memory_computation_part_index in 0..encoding.parts.len() { 16 | let part = &encoding.parts[memory_computation_part_index]; 17 | if let PartMapping::MemoryComputation { 18 | mapping, 19 | memory_indexes, 20 | } = &part.mapping 21 | { 22 | let participating_parts = encoding.parts.iter() 23 | .enumerate() 24 | .flat_map(|(index, part)| if let PartMapping::Register { locations, mapping } = &part.mapping { 25 | if locations.iter().any(|loc| matches!(loc, FlowValueLocation::MemoryAddress { memory_index, .. } if memory_indexes.contains(memory_index))) { 26 | Some((index, part, mapping)) 27 | } else { 28 | None 29 | } 30 | } else { 31 | None 32 | }) 33 | .collect::>(); 34 | 35 | info!("Participating parts for {part:?} = {participating_parts:?}"); 36 | 37 | let mut invalid_mappings = vec![0; mapping.len()]; 38 | for (reg_part_index, _, part_mapping) in participating_parts.iter() { 39 | for (computation_val, computation_mapping) in mapping.iter().enumerate() { 40 | if computation_mapping.is_some() { 41 | for (reg_val, reg_mapping) in part_mapping.iter().enumerate() { 42 | if reg_mapping.is_some() { 43 | let mut part_values = encoding.extract_parts(encoding.dataflows.instr()); 44 | part_values[memory_computation_part_index] = computation_val as u64; 45 | part_values[*reg_part_index] = reg_val as u64; 46 | if let Ok(dataflows) = encoding.instantiate(&part_values) { 47 | info!("Checking {dataflows:?}"); 48 | if !check_mapping(cache, o, dataflows, encoding) { 49 | invalid_mappings[computation_val] += 1; 50 | } 51 | } 52 | } 53 | } 54 | } 55 | } 56 | } 57 | 58 | info!("Invalid mapping counts: {invalid_mappings:?}"); 59 | let part = &mut encoding.parts[memory_computation_part_index]; 60 | if let PartMapping::MemoryComputation { 61 | mapping, .. 62 | } = &mut part.mapping 63 | { 64 | for (invalid, mapping) in invalid_mappings.iter().zip(mapping.iter_mut()) { 65 | if *invalid > 0 { 66 | *mapping = None; 67 | } 68 | } 69 | } else { 70 | unreachable!() 71 | } 72 | } 73 | } 74 | 75 | info!("Remaining encoding: {encoding}"); 76 | } 77 | 78 | fn check_mapping>( 79 | cache: &impl EncodingAnalysisCache, o: &mut O, dataflows: Dataflows, encoding: &Encoding, 80 | ) -> bool { 81 | if let Ok(new_memory_accesses) = cache.infer_accesses(o, dataflows.instr()) { 82 | info!("New memory accesses: {new_memory_accesses:?}"); 83 | 84 | let changes = DataflowChange::compare_memory_accesses(&encoding.dataflows.addresses, &new_memory_accesses); 85 | let memory_change = DataflowChange::into_change(changes); 86 | info!("Change after comparing memory accesses: {:?}", memory_change); 87 | if memory_change.is_invalid_or_error() { 88 | return false; 89 | } 90 | 91 | let memory_change = changes::find_memory_access_imm(&encoding.dataflows.addresses, &new_memory_accesses, memory_change); 92 | if memory_change.is_invalid_or_error() { 93 | return false; 94 | } 95 | 96 | info!("Change: {memory_change:?}"); 97 | true 98 | } else { 99 | false 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /liblisa-enc/src/cleanup/bits.rs: -------------------------------------------------------------------------------- 1 | use liblisa::arch::{Arch, Register}; 2 | use liblisa::encoding::bitpattern::{Bit, PartMapping}; 3 | use liblisa::encoding::Encoding; 4 | use liblisa::utils::EitherIter; 5 | use log::info; 6 | 7 | pub fn remove_useless_bits(encoding: &mut Encoding) { 8 | let encoding_ref = &*encoding; 9 | let bits_to_remove = encoding 10 | .parts 11 | .iter() 12 | .enumerate() 13 | .flat_map(|(part_index, part)| match &part.mapping { 14 | PartMapping::Register { 15 | mapping, .. 16 | } => Some(EitherIter::Left((0..part.size).flat_map(move |bit_index| { 17 | let real_bit_index = encoding_ref 18 | .bits 19 | .iter() 20 | .enumerate() 21 | .filter(|(_, bit)| bit == &&Bit::Part(part_index)) 22 | .nth(bit_index) 23 | .unwrap() 24 | .0; 25 | let bit_value = encoding_ref.dataflows.instr().nth_bit_from_right(real_bit_index); 26 | 27 | let any_useful = mapping 28 | .iter() 29 | .enumerate() 30 | .filter(|(index, _)| (index >> bit_index) & 1 != bit_value as usize) 31 | .any(|(_, reg)| reg.as_ref().map(|reg| !reg.is_zero()).unwrap_or(false)); 32 | 33 | if any_useful { 34 | None 35 | } else { 36 | Some(real_bit_index) 37 | } 38 | }))), 39 | PartMapping::MemoryComputation { 40 | mapping, .. 41 | } => Some(EitherIter::Right((0..part.size).flat_map(move |bit_index| { 42 | let real_bit_index = encoding_ref 43 | .bits 44 | .iter() 45 | .enumerate() 46 | .filter(|(_, bit)| bit == &&Bit::Part(part_index)) 47 | .nth(bit_index) 48 | .unwrap() 49 | .0; 50 | let bit_value = encoding_ref.dataflows.instr().nth_bit_from_right(real_bit_index); 51 | 52 | let any_useful = mapping 53 | .iter() 54 | .enumerate() 55 | .filter(|(index, _)| (index >> bit_index) & 1 != bit_value as usize) 56 | .any(|(_, computation)| computation.is_some()); 57 | 58 | if any_useful { 59 | None 60 | } else { 61 | Some(real_bit_index) 62 | } 63 | }))), 64 | _ => None, 65 | }) 66 | .flatten() 67 | .collect::>(); 68 | 69 | if !bits_to_remove.is_empty() { 70 | info!( 71 | "Removing the following bits because they're practically useless: {:?} in {}", 72 | bits_to_remove, encoding 73 | ); 74 | for bit_index in bits_to_remove { 75 | encoding.make_bit_fixed(bit_index).unwrap(); 76 | } 77 | 78 | info!("Result: {}", encoding); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /liblisa-enc/src/cleanup/mod.rs: -------------------------------------------------------------------------------- 1 | mod accesses; 2 | mod bits; 3 | mod dontcare; 4 | mod generalizations; 5 | 6 | pub use accesses::remove_incorrect_memory_computations; 7 | pub use bits::remove_useless_bits; 8 | pub use dontcare::DontCareValidator; 9 | pub use generalizations::remove_incorrect_generalizations; 10 | -------------------------------------------------------------------------------- /liblisa-enc/src/dataflow/flow.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Eq; 2 | use std::collections::HashMap; 3 | use std::fmt::Debug; 4 | use std::hash::Hash; 5 | 6 | use liblisa::arch::{Arch, Register}; 7 | use liblisa::state::{StateByte, SystemStateByteView, SystemStateByteViewReg}; 8 | 9 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] 10 | pub struct FlowItem { 11 | pub reg: SystemStateByteViewReg, 12 | pub start_byte: usize, 13 | pub end_byte: usize, 14 | } 15 | 16 | impl Debug for FlowItem { 17 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 18 | write!(f, "{:?}[{}..{}]", self.reg, self.start_byte, self.end_byte) 19 | } 20 | } 21 | 22 | impl FlowItem { 23 | pub fn merge(&mut self, other: &FlowItem) { 24 | assert_eq!(self.reg, other.reg); 25 | self.start_byte = self.start_byte.min(other.start_byte); 26 | self.end_byte = self.end_byte.max(other.end_byte); 27 | } 28 | 29 | pub fn contains>(&self, view: SystemStateByteView, b: StateByte) -> bool { 30 | let (reg, index) = view.as_reg(b); 31 | reg == self.reg && index >= self.start_byte && index <= self.end_byte 32 | } 33 | } 34 | 35 | #[derive(Clone, PartialEq)] 36 | pub struct Dataflow { 37 | pub sources: Vec>, 38 | pub dest: FlowItem, 39 | pub unobservable_inputs: bool, 40 | } 41 | 42 | impl Debug for Dataflow { 43 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 44 | write!(f, "{:?} => {:?}", self.sources, self.dest) 45 | } 46 | } 47 | 48 | pub fn merge_flowitems( 49 | items: impl Iterator>, 50 | ) -> impl Iterator> { 51 | let mut tmp = HashMap::new(); 52 | for item in items { 53 | tmp.entry(( 54 | item.reg, 55 | if item.reg.is_flags() { 56 | (item.start_byte, item.end_byte) 57 | } else { 58 | Default::default() 59 | }, 60 | )) 61 | .or_insert_with(|| item.clone()) 62 | .merge(&item); 63 | } 64 | 65 | tmp.into_values() 66 | } 67 | 68 | impl Dataflow { 69 | pub fn merge(&mut self, other: &Dataflow) { 70 | self.sources = merge_flowitems(self.sources.iter().cloned().chain(other.sources.iter().cloned())).collect(); 71 | self.dest.merge(&other.dest); 72 | } 73 | } 74 | 75 | impl Dataflow { 76 | pub fn sources_equal(&self, other: &Dataflow) -> bool { 77 | self.sources 78 | .iter() 79 | .all(|source| other.sources.iter().any(|other| other == source)) 80 | && other 81 | .sources 82 | .iter() 83 | .all(|source| self.sources.iter().any(|other| other == source)) 84 | } 85 | 86 | pub fn sources_overlap_but_not_equal(&self, other: &Dataflow) -> bool { 87 | // Check to make sure the sources aren't equal 88 | if self.sources_equal(other) { 89 | return false; 90 | } 91 | 92 | // Check if any sources are overlapping 93 | for lhs in self.sources.iter() { 94 | for rhs in other.sources.iter() { 95 | if lhs.reg == rhs.reg && lhs.start_byte <= rhs.end_byte && rhs.start_byte <= lhs.end_byte 96 | // Make sure lhs and rhs are not equal 97 | && (lhs.start_byte != rhs.start_byte || lhs.end_byte != rhs.end_byte) 98 | { 99 | return true; 100 | } 101 | } 102 | } 103 | 104 | false 105 | } 106 | 107 | pub fn num_source_bytes(&self) -> usize { 108 | self.sources 109 | .iter() 110 | .map(|source| source.end_byte - source.start_byte + 1) 111 | .sum() 112 | } 113 | 114 | pub fn destinations_overlap(&self, other: &Dataflow) -> bool { 115 | self.dest.end_byte >= other.dest.start_byte && self.dest.start_byte <= other.dest.end_byte 116 | } 117 | 118 | pub fn destinations_adjacent(&self, other: &Dataflow) -> bool { 119 | self.dest.end_byte + 1 == other.dest.start_byte || other.dest.end_byte + 1 == self.dest.start_byte 120 | } 121 | 122 | pub fn destination_flag_registers_match(&self, other: &Dataflow) -> bool { 123 | self.dest.reg.is_flags() && self.dest.reg == other.dest.reg 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /liblisa-enc/src/skip/mod.rs: -------------------------------------------------------------------------------- 1 | mod random_search; 2 | mod tunnel; 3 | 4 | #[doc(hidden)] // see `random_instr_bytes` 5 | pub use random_search::random_instr_bytes; 6 | pub use random_search::random_search_skip_invalid_instrs; 7 | pub use tunnel::{tunnel_invalid_instrs, tunnel_memory_errors}; 8 | -------------------------------------------------------------------------------- /liblisa-synth/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa-synth" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | 11 | [[bench]] 12 | name = "template_synthesis" 13 | harness = false 14 | 15 | [[bench]] 16 | name = "term_searcher" 17 | harness = false 18 | 19 | [dependencies] 20 | liblisa = { version = "0.1.0", path = "../liblisa" } 21 | itertools = "0.12.0" 22 | arrayvec = { version = "0.7", features = [ "serde" ] } 23 | serde = { version = "1.0", features = [ "derive" ] } 24 | rand_xoshiro = "0.6" 25 | log = "0.4" 26 | rand = "0.8" 27 | fxhash = "0.2.1" 28 | lazy_static = "1.4" 29 | thiserror = "1" 30 | hashbrown = "0.14.3" 31 | rayon = "1.5" 32 | 33 | [dev-dependencies] 34 | liblisa-x64-observer = { version = "0.1.0", path = "../arch/x64/liblisa-x64-observer" } 35 | criterion = "0.5" 36 | test-log = "0.2.10" 37 | env_logger = "0.10" 38 | rand_xoshiro = "0.6" 39 | -------------------------------------------------------------------------------- /liblisa-synth/benches/term_searcher.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::semantics::IoType; 5 | use liblisa::value::OwnedValue; 6 | use liblisa_synth::search::termsearcher::BoolTermSearcher; 7 | use liblisa_synth::{Synthesizer, SynthesizerBase}; 8 | 9 | fn single_template_search(c: &mut Criterion) { 10 | c.bench_function("TermSearcher", |b| { 11 | b.iter(|| { 12 | let mut t = BoolTermSearcher::new( 13 | &[ 14 | IoType::Integer { 15 | num_bits: 64, 16 | }, 17 | IoType::Integer { 18 | num_bits: 64, 19 | }, 20 | ], 21 | IoType::Integer { 22 | num_bits: 1, 23 | }, 24 | ); 25 | 26 | use OwnedValue::*; 27 | 28 | let match_true = [ 29 | [Num(0x0), Num(0xD48765C89B2)], 30 | [Num(0x4000000000000000), Num(0xD48765C89B2)], 31 | [Num(0x4000000000000000), Num(0xD48765C89B2)], 32 | [Num(0x0), Num(0x69818482BFF60BA0)], 33 | [Num(0x0), Num(0x69818482BFF60BA0)], 34 | [Num(0xFFFFFFFFFFFF860B), Num(0xD48765C89B2)], 35 | [Num(0xFFFFFFFFFFFF860B), Num(0xD48765C89B2)], 36 | [Num(0xFFFFFFFFFFFF860B), Num(0xD48765C89B2)], 37 | [Num(0x0), Num(0x8000000000000000)], 38 | [Num(0x0), Num(0x8000000000000000)], 39 | [Num(0x0), Num(0x8000000000000000)], 40 | [Num(0xFFFFFFFFFFFF860B), Num(0x69818482BFF60BA0)], 41 | [Num(0xFFFFFFFFFFFF860B), Num(0x69818482BFF60BA0)], 42 | [Num(0xFFFFFFFFFFFF860B), Num(0x69818482BFF60BA0)], 43 | [Num(0x4000000000000000), Num(0x8000000000000000)], 44 | [Num(0x4000000000000000), Num(0x8000000000000000)], 45 | [Num(0x4000000000000000), Num(0x8000000000000000)], 46 | [Num(0xA525C00000000000), Num(0xD48765C89B2)], 47 | [Num(0xA525C00000000000), Num(0xD48765C89B2)], 48 | [Num(0xA525C00000000000), Num(0xD48765C89B2)], 49 | [Num(0xA525C00000000000), Num(0xD48765C89B2)], 50 | [Num(0x0), Num(0xBDFFFFFFFFFFFFFF)], 51 | [Num(0x0), Num(0xBDFFFFFFFFFFFFFF)], 52 | [Num(0x0), Num(0xBDFFFFFFFFFFFFFF)], 53 | [Num(0x0), Num(0xBDFFFFFFFFFFFFFF)], 54 | [Num(0xA525C00000000000), Num(0x69818482BFF60BA0)], 55 | [Num(0xA525C00000000000), Num(0x69818482BFF60BA0)], 56 | [Num(0xA525C00000000000), Num(0x69818482BFF60BA0)], 57 | [Num(0xA525C00000000000), Num(0x69818482BFF60BA0)], 58 | [Num(0x4000000000000000), Num(0xBDFFFFFFFFFFFFFF)], 59 | [Num(0x4000000000000000), Num(0xBDFFFFFFFFFFFFFF)], 60 | [Num(0x4000000000000000), Num(0xBDFFFFFFFFFFFFFF)], 61 | [Num(0x4000000000000000), Num(0xBDFFFFFFFFFFFFFF)], 62 | [Num(0xFFFFFFFFFFFF860B), Num(0xBDFFFFFFFFFFFFFF)], 63 | [Num(0xFFFFFFFFFFFF860B), Num(0xBDFFFFFFFFFFFFFF)], 64 | [Num(0xFFFFFFFFFFFF860B), Num(0xBDFFFFFFFFFFFFFF)], 65 | [Num(0xFFFFFFFFFFFF860B), Num(0xBDFFFFFFFFFFFFFF)], 66 | [Num(0x80BB83F8DD4EDFFF), Num(0xFFFFFFFFB9A08B02)], 67 | ]; 68 | let match_false = [ 69 | [Num(0x4000000000000000), Num(0x69818482BFF60BA0)], 70 | [Num(0x7E35000000000000), Num(0x7FFFFFFFFFFFFFFF)], 71 | [Num(0xA525C00000000000), Num(0x8000000000000000)], 72 | [Num(0xA525C00000000000), Num(0xBDFFFFFFFFFFFFFF)], 73 | [Num(0xFFFFFFFFFFFF860B), Num(0x8000000000000000)], 74 | ]; 75 | 76 | for case in match_true.into_iter() { 77 | t.add_case(&case, true); 78 | } 79 | 80 | for case in match_false.into_iter() { 81 | t.add_case(&case, false); 82 | } 83 | 84 | black_box(t) 85 | }) 86 | }); 87 | } 88 | 89 | criterion_group!( 90 | name = benches; 91 | config = Criterion::default().measurement_time(Duration::from_secs(30)).sample_size(10); 92 | targets = single_template_search 93 | ); 94 | criterion_main!(benches); 95 | -------------------------------------------------------------------------------- /liblisa-synth/src/cond/cache.rs: -------------------------------------------------------------------------------- 1 | use super::caselist::CaseIndex; 2 | 3 | #[derive(Clone, Debug)] 4 | struct Entry { 5 | match_true: Vec, 6 | match_false: Vec, 7 | synthesizer: S, 8 | last_use_tick: u64, 9 | } 10 | 11 | /// The SynthesizerCache can store used synthesizers. 12 | /// These synthesizers already have some cases applied on them. 13 | /// It saves some time when multiple iterations or multiple groups use identical or almost identical synthesizers. 14 | #[derive(Clone, Debug)] 15 | pub struct SynthesizerCache { 16 | synthesizers: Vec>, 17 | tick: u64, 18 | } 19 | 20 | impl Default for SynthesizerCache { 21 | fn default() -> Self { 22 | Self { 23 | synthesizers: Vec::new(), 24 | tick: 0, 25 | } 26 | } 27 | } 28 | 29 | impl SynthesizerCache { 30 | pub fn tick(&mut self) { 31 | self.tick += 1; 32 | 33 | self.synthesizers.retain(|item| item.last_use_tick + 2 > self.tick); 34 | } 35 | 36 | /// Returns the closest synthesizer that we can find as a tuple of `(synthesizer, is_exact)`. 37 | /// `is_exact` indicates whether the synthesizer has exactly matched all cases. 38 | pub fn get(&mut self, match_true: &[CaseIndex], match_false: &[CaseIndex]) -> Option<(&S, bool)> { 39 | self.synthesizers 40 | .iter_mut() 41 | .filter(|entry| { 42 | entry.match_true.iter().all(|c| match_true.contains(c)) 43 | && entry.match_false.iter().all(|c| match_false.contains(c)) 44 | }) 45 | .max_by_key(|entry| entry.match_true.len() + entry.match_false.len()) 46 | .map(|s| { 47 | s.last_use_tick = self.tick; 48 | ( 49 | &s.synthesizer, 50 | s.match_true.len() == match_true.len() && s.match_false.len() == match_false.len(), 51 | ) 52 | }) 53 | } 54 | 55 | pub fn add(&mut self, match_true: &[CaseIndex], match_false: &[CaseIndex], synthesizer: S) { 56 | self.synthesizers.push(Entry { 57 | match_true: match_true.to_vec(), 58 | match_false: match_false.to_vec(), 59 | synthesizer, 60 | last_use_tick: self.tick, 61 | }) 62 | } 63 | 64 | #[must_use] 65 | pub fn len(&self) -> usize { 66 | self.synthesizers.len() 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /liblisa-synth/src/cond/casemap.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self, Debug}; 2 | 3 | use crate::SynthesizerOutput; 4 | 5 | #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] 6 | pub struct CaseMap(u64); 7 | 8 | impl CaseMap { 9 | pub fn new(matches: impl Iterator) -> CaseMap { 10 | CaseMap( 11 | matches 12 | .enumerate() 13 | .fold(0u64, |acc, (index, b)| acc | if b { 1 << index } else { 0 }), 14 | ) 15 | } 16 | 17 | pub fn new_from_u64(map: u64) -> CaseMap { 18 | CaseMap(map) 19 | } 20 | 21 | #[must_use] 22 | pub fn is_none(&self) -> bool { 23 | self.0 == 0 24 | } 25 | 26 | #[must_use] 27 | pub fn matches(&self, index: usize) -> bool { 28 | self.0 & (1 << index) != 0 29 | } 30 | 31 | #[must_use] 32 | pub fn first_index(&self) -> usize { 33 | self.0.trailing_zeros() as usize 34 | } 35 | 36 | #[must_use] 37 | pub fn as_u64(&self) -> u64 { 38 | self.0 39 | } 40 | 41 | #[must_use] 42 | pub fn overlaps(&self, other: CaseMap) -> bool { 43 | self.0 & other.0 != 0 || self.0 == other.0 44 | } 45 | 46 | #[must_use] 47 | pub fn len(&self) -> usize { 48 | self.0.count_ones() as usize 49 | } 50 | 51 | #[must_use] 52 | pub fn is_empty(&self) -> bool { 53 | self.len() == 0 54 | } 55 | 56 | pub fn restrict_to(&mut self, other: CaseMap) { 57 | self.0 &= other.0; 58 | } 59 | 60 | pub fn covers(&self, covered: CaseMap) -> bool { 61 | self.0 & covered.0 == covered.0 62 | } 63 | } 64 | 65 | impl SynthesizerOutput for CaseMap { 66 | type Borrowed<'o> 67 | = &'o CaseMap 68 | where 69 | Self: 'o; 70 | 71 | fn borrow(&self) -> Self::Borrowed<'_> { 72 | self 73 | } 74 | } 75 | 76 | impl Debug for CaseMap { 77 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 78 | if self.0.is_power_of_two() { 79 | write!(f, "{}", self.first_index()) 80 | } else { 81 | write!(f, "<")?; 82 | 83 | let mut first = true; 84 | for index in 0..64 { 85 | if self.matches(index) { 86 | if !first { 87 | write!(f, ", ")?; 88 | } 89 | 90 | first = false; 91 | write!(f, "{index}")?; 92 | } 93 | } 94 | 95 | write!(f, ">") 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /liblisa-synth/src/cond/input_hash.rs: -------------------------------------------------------------------------------- 1 | use liblisa::semantics::IoType; 2 | use liblisa::value::OwnedValue; 3 | 4 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] 5 | pub struct InputHash(u64); 6 | 7 | #[derive(Copy, Clone, Debug)] 8 | pub struct InputsEqualQuery(u64); 9 | 10 | impl InputsEqualQuery { 11 | /// Returns false if lhs and rhs are not equal in the input indices specified when creating the query. 12 | /// Returns true if it is *possible* that lhs and rhs are equal. 13 | /// You should always perform a comparison of the real inputs to determine whether the inputs are actually equal. 14 | pub fn check(&self, lhs: InputHash, rhs: InputHash) -> bool { 15 | let differing_bits = lhs.0 ^ rhs.0; 16 | 17 | // If none of the bits we care about differ, the input indices specified when creating the query *might* be equal. 18 | differing_bits & self.0 == 0 19 | } 20 | } 21 | 22 | #[derive(Clone, Debug)] 23 | struct Input { 24 | ty: IoType, 25 | num_bits: usize, 26 | mask: u64, 27 | values: Vec, 28 | } 29 | 30 | /// Comparing `[OwnedValue]`s can be expensive. 31 | /// The `InputHasher` can compute hashes for a `[OwnedValue]`. 32 | /// These hashes can then be used to more quickly filter `[OwnedValue]`s that don't match. 33 | /// 34 | /// The hashes support checking for partial equality via [`InputsEqualQuery`], 35 | /// which can be created by calling `create_inputs_equal_query[_inv]`. 36 | /// 37 | /// The number of bits in the hash per input is scaled based on the input types. 38 | /// 1-bit inputs always use a perfect hash. 39 | /// Larger inputs use a perfect hash when possible within the number of bits used for the input. 40 | #[derive(Clone, Debug)] 41 | pub(crate) struct InputHasher { 42 | inputs: Vec, 43 | } 44 | 45 | impl InputHasher { 46 | pub fn new(input_types: &[IoType]) -> InputHasher { 47 | if input_types.is_empty() { 48 | return InputHasher { 49 | inputs: Vec::new(), 50 | }; 51 | } 52 | 53 | let bits_per_input = 64 / input_types.len(); 54 | 55 | let mut inputs = input_types 56 | .iter() 57 | .map(|&ty| Input { 58 | ty, 59 | num_bits: ty.num_bits().min(bits_per_input), 60 | mask: 0, 61 | values: Vec::new(), 62 | }) 63 | .collect::>(); 64 | 65 | let mut bits_in_use = inputs.iter().map(|i| i.num_bits).sum::(); 66 | 67 | loop { 68 | let mut done = true; 69 | for input in inputs.iter_mut() { 70 | if input.num_bits < input.ty.num_bits() { 71 | done = false; 72 | input.num_bits += 1; 73 | input.mask = (input.mask << 1) | 1; 74 | bits_in_use += 1; 75 | 76 | if bits_in_use >= 64 { 77 | break 78 | } 79 | } 80 | } 81 | 82 | if done { 83 | break 84 | } 85 | } 86 | 87 | InputHasher { 88 | inputs, 89 | } 90 | } 91 | 92 | pub fn hash(&mut self, inputs: &[OwnedValue]) -> InputHash { 93 | let mut result: u64 = 0; 94 | for (val, input) in inputs.iter().zip(self.inputs.iter_mut()).rev() { 95 | let index = if let Some(pos) = input.values.iter().position(|v| v == val) { 96 | pos 97 | } else { 98 | let pos = input.values.len(); 99 | input.values.push(val.clone()); 100 | pos 101 | }; 102 | 103 | result = (result.wrapping_shl(input.num_bits as u32)) | (index as u64 & input.mask); 104 | } 105 | 106 | InputHash(result) 107 | } 108 | 109 | fn create_mask(&self, indices: &[usize]) -> u64 { 110 | let mut result: u64 = 0; 111 | for (index, input) in self.inputs.iter().enumerate().rev() { 112 | let val = if indices.contains(&index) { input.mask } else { 0 }; 113 | 114 | result = result.wrapping_shl(input.num_bits as u32) | val; 115 | } 116 | 117 | result 118 | } 119 | 120 | /// Creates a query that checks whether all inputs except the inputs in `indices_that_may_be_unequal` are equal. 121 | pub fn create_inputs_equal_query_inv(&self, indices_that_may_be_unequal: &[usize]) -> InputsEqualQuery { 122 | InputsEqualQuery(!self.create_mask(indices_that_may_be_unequal)) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /liblisa-synth/src/cond/mod.rs: -------------------------------------------------------------------------------- 1 | const MAX_INPUTS: usize = 20; 2 | 3 | pub mod cache; 4 | pub mod caselist; 5 | pub mod casemap; 6 | pub mod combine_simple; 7 | mod combiner; 8 | pub mod input_hash; 9 | pub mod isomorphisms; 10 | pub mod switch; 11 | pub mod synthesizer; 12 | pub mod transitions; 13 | -------------------------------------------------------------------------------- /liblisa-synth/src/cond/switch.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use liblisa::semantics::default::computation::{AsComputationRef, SynthesizedComputation}; 4 | use liblisa::utils::bitmap::GrowingBitmap; 5 | use liblisa::value::AsValue; 6 | 7 | use crate::predicate::Disjunction; 8 | 9 | #[derive(Clone, Debug)] 10 | pub struct SwitchCase { 11 | condition: Disjunction, 12 | index: usize, 13 | } 14 | 15 | impl SwitchCase { 16 | pub fn new(condition: Disjunction, index: usize) -> Self { 17 | Self { 18 | condition, 19 | index, 20 | } 21 | } 22 | } 23 | 24 | #[derive(Clone, Debug)] 25 | pub struct Switch { 26 | cases: Vec, 27 | } 28 | 29 | impl Switch { 30 | pub fn new() -> Self { 31 | Self { 32 | cases: Vec::new(), 33 | } 34 | } 35 | 36 | pub fn instantiate_with_outputs(&self, outputs: &[SynthesizedComputation]) -> SynthesizedComputation { 37 | let mut result = outputs[self.cases[self.cases.len() - 1].index].clone(); 38 | 39 | for group in self.cases.iter().rev().skip(1) { 40 | let condition = SynthesizedComputation::from(group.condition.clone()); 41 | let output = &outputs[group.index]; 42 | result = condition.if_zero(&result, output); 43 | } 44 | 45 | result 46 | } 47 | 48 | pub fn used_input_indices(&self) -> Vec { 49 | let mut seen = GrowingBitmap::new(); 50 | self.cases 51 | .iter() 52 | .flat_map(|group| group.condition.used_input_indices()) 53 | .filter(|&n| seen.set(n)) 54 | .collect() 55 | } 56 | 57 | pub fn remap_inputs(&mut self, map: &[Option]) { 58 | for case in self.cases.iter_mut() { 59 | case.condition.remap_inputs(map); 60 | } 61 | } 62 | 63 | pub fn add_case(&mut self, case: SwitchCase) { 64 | self.cases.push(case); 65 | } 66 | 67 | pub fn conditions(&self) -> impl Iterator { 68 | self.cases.iter().map(|case| &case.condition) 69 | } 70 | } 71 | 72 | impl Default for Switch { 73 | fn default() -> Self { 74 | Self { 75 | cases: vec![SwitchCase { 76 | condition: Disjunction::always_true(), 77 | index: 0, 78 | }], 79 | } 80 | } 81 | } 82 | 83 | impl Switch { 84 | pub fn evaluate(&self, inputs: &[V]) -> usize { 85 | for group in self.cases.iter() { 86 | if group.condition.evaluate(inputs) { 87 | return group.index; 88 | } 89 | } 90 | 91 | panic!("This should not happen!") 92 | } 93 | 94 | pub fn display<'a, S: AsRef>(&'a self, input_names: &'a [S]) -> DisplaySwitch<'a> { 95 | DisplaySwitch(self, input_names.iter().map(|s| s.as_ref().to_string()).collect::>()) 96 | } 97 | } 98 | 99 | #[derive(Debug)] 100 | pub struct DisplaySwitch<'a>(&'a Switch, Vec); 101 | 102 | impl fmt::Display for DisplaySwitch<'_> { 103 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 104 | write!(f, "match {{ ")?; 105 | 106 | for case in self.0.cases.iter() { 107 | write!(f, "{} => {:X?}, ", case.condition.display(&self.1), case.index)?; 108 | } 109 | 110 | write!(f, "}}") 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /liblisa-synth/src/tree/expr_finder/greedy.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /liblisa-synth/src/tree/expr_finder/mod.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | 3 | use itertools::Itertools; 4 | use liblisa::semantics::default::computation::ExpressionComputation; 5 | use liblisa::semantics::{Computation, IoType, ARG_NAMES}; 6 | use liblisa::value::{AsValue, OwnedValue, Value}; 7 | use log::info; 8 | 9 | use crate::{Synthesizer, SynthesizerBase}; 10 | 11 | pub mod bitmap_mcs; 12 | pub mod greedy; 13 | pub mod mcs; 14 | 15 | pub trait ExpressionFinder { 16 | fn new(input_types: &[IoType], output_type: IoType) -> Self; 17 | 18 | fn add_case(&mut self, inputs: &[V], output: Value); 19 | 20 | fn find_expressions(&mut self) -> Vec; 21 | 22 | fn set_timeout(&mut self, stop_at: Instant); 23 | 24 | fn has_given_up(&self) -> bool; 25 | } 26 | 27 | struct TestSynthesizer { 28 | mcs: Vec, 29 | f: F, 30 | } 31 | 32 | impl SynthesizerBase for TestSynthesizer { 33 | type Hypothesis = Vec; 34 | type Computation = Vec; 35 | 36 | fn new(input_types: &[IoType], output_type: IoType) -> Self { 37 | Self { 38 | mcs: Vec::new(), 39 | f: F::new(input_types, output_type), 40 | } 41 | } 42 | 43 | fn hypothesis(&self) -> Option<&Self::Hypothesis> { 44 | Some(&self.mcs) 45 | } 46 | 47 | fn has_given_up(&self) -> bool { 48 | self.f.has_given_up() 49 | } 50 | 51 | fn needs_requester(&self) -> bool { 52 | false 53 | } 54 | 55 | fn into_computation(self) -> Option { 56 | Some(self.mcs) 57 | } 58 | } 59 | 60 | impl Synthesizer for TestSynthesizer { 61 | fn add_case(&mut self, inputs: &[V], output: OwnedValue) { 62 | self.f.add_case(inputs, output.as_value()); 63 | self.mcs = self.f.find_expressions(); 64 | info!("Mcs = {}", self.mcs.iter().map(|expr| expr.display(ARG_NAMES)).format(", ")); 65 | } 66 | 67 | fn is_consistent(&self, inputs: &[V], output: Value<'_>) -> bool { 68 | self.mcs.iter().any(|expr| expr.compare_eq(inputs, output)) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /liblisa-synth/src/tree/mapping.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | use std::hash::Hash; 3 | use std::iter::once; 4 | 5 | #[derive(Clone, Debug)] 6 | pub struct PerfectMapping { 7 | runs: Vec, 8 | reset_indices: Vec, 9 | } 10 | 11 | #[derive(Copy, Clone, Debug)] 12 | pub struct Ptr(usize); 13 | 14 | impl Default for PerfectMapping { 15 | fn default() -> Self { 16 | Self { 17 | runs: Vec::new(), 18 | reset_indices: Vec::new(), 19 | } 20 | } 21 | } 22 | 23 | impl PerfectMapping { 24 | pub fn new() -> Self { 25 | Self::default() 26 | } 27 | 28 | pub fn get(&self, key: &K) -> Option { 29 | if self.is_empty() { 30 | return None 31 | } 32 | 33 | let ranges = self 34 | .reset_indices 35 | .windows(2) 36 | .map(|slice| <[_; 2]>::try_from(slice).unwrap()) 37 | .chain(once([*self.reset_indices.last().unwrap(), self.runs.len()])); 38 | for [mut start_index, mut end_index] in ranges { 39 | while start_index > end_index { 40 | let mid = start_index + (end_index - start_index) / 2; 41 | match self.runs[mid].cmp(key) { 42 | Ordering::Less => start_index = mid, 43 | Ordering::Equal => return Some(mid as u32), 44 | Ordering::Greater => end_index = mid, 45 | } 46 | } 47 | } 48 | 49 | None 50 | } 51 | 52 | pub fn get_inv(&self, n: u32) -> Option<&K> { 53 | self.runs.get(n as usize) 54 | } 55 | 56 | pub fn get_or_insert(&mut self, key: K) -> u32 { 57 | if let Some(index) = self.get(&key) { 58 | index 59 | } else { 60 | let index = self.runs.len(); 61 | if self.runs.last().map(|&last| key < last).unwrap_or(true) { 62 | self.reset_indices.push(index); 63 | } 64 | 65 | self.runs.push(key); 66 | 67 | index as u32 68 | } 69 | } 70 | 71 | pub fn iter_keys(&self) -> impl Iterator { 72 | self.runs.iter() 73 | } 74 | 75 | pub fn iter(&self) -> impl Iterator { 76 | self.runs.iter().enumerate().map(|(index, k)| (k, index as u32)) 77 | } 78 | 79 | pub fn len(&self) -> usize { 80 | self.runs.len() 81 | } 82 | 83 | pub fn is_empty(&self) -> bool { 84 | self.runs.is_empty() 85 | } 86 | 87 | pub fn pointer(&self) -> Ptr { 88 | Ptr(self.runs.len()) 89 | } 90 | 91 | pub fn iter_inv_added_since(&self, pointer: Ptr) -> impl Iterator { 92 | self.runs[pointer.0..] 93 | .iter() 94 | .enumerate() 95 | .map(move |(index, item)| (item, (index + pointer.0) as u32)) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /liblisa-synth/src/tree/mod.rs: -------------------------------------------------------------------------------- 1 | use liblisa::semantics::default::computation::PreparedComparison; 2 | use liblisa::semantics::InputValues; 3 | use liblisa::value::{AsValue, OwnedValue, Value}; 4 | 5 | use crate::search::{ComputationEnumerator, InterpretedArgs}; 6 | use crate::InputSlice; 7 | 8 | pub mod expr_finder; 9 | pub mod mapping; 10 | pub mod synthesizer; 11 | 12 | #[derive(Debug, Clone)] 13 | pub struct Case { 14 | inputs: Vec, 15 | output: OwnedValue, 16 | } 17 | 18 | impl Case { 19 | pub fn new(inputs: &[V], output: Value) -> Self { 20 | Case { 21 | inputs: inputs.as_owned(), 22 | output: output.to_owned_value(), 23 | } 24 | } 25 | 26 | pub fn inputs(&self) -> &[OwnedValue] { 27 | self.inputs.as_ref() 28 | } 29 | 30 | pub fn output(&self) -> Value { 31 | self.output.as_value() 32 | } 33 | } 34 | 35 | #[derive(Debug, Clone)] 36 | pub struct PreparedCase { 37 | args: InterpretedArgs, 38 | comparison: PreparedComparison, 39 | } 40 | 41 | impl PreparedCase { 42 | pub fn new(inputs: &[V], output: Value, enumerator: &ComputationEnumerator) -> Self { 43 | PreparedCase { 44 | args: enumerator.prepare_interpreted_args(inputs), 45 | comparison: PreparedComparison::from(&output), 46 | } 47 | } 48 | 49 | pub fn arg_slice(&self) -> &[i128] { 50 | self.args.as_slice() 51 | } 52 | } 53 | 54 | #[derive(Debug, Clone)] 55 | pub struct PreparedInputs { 56 | inputs: InputValues, 57 | args: InterpretedArgs, 58 | } 59 | 60 | impl PreparedInputs { 61 | pub fn new(inputs: &[V], enumerator: &ComputationEnumerator) -> Self { 62 | PreparedInputs { 63 | inputs: InputValues::from(inputs), 64 | args: enumerator.prepare_interpreted_args(inputs), 65 | } 66 | } 67 | 68 | pub fn arg_slice(&self) -> &[i128] { 69 | self.args.as_slice() 70 | } 71 | 72 | pub fn inputs(&self) -> &InputValues { 73 | &self.inputs 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /liblisa-synth/src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | use liblisa::semantics::default::Expr; 2 | pub mod delta_vec; 3 | 4 | pub const fn concat_exprs<'a, const A: usize, const B: usize>(a: [Expr<'a>; A], b: [Expr<'a>; B]) -> [Expr<'a>; A + B] { 5 | let mut whole: [Expr; A + B] = [Expr::const_default(); A + B]; 6 | let mut whole_index = 0; 7 | let mut index = 0; 8 | while index < A { 9 | whole[whole_index] = a[index]; 10 | index += 1; 11 | whole_index += 1; 12 | } 13 | 14 | let mut index = 0; 15 | while index < B { 16 | whole[whole_index] = b[index]; 17 | index += 1; 18 | whole_index += 1; 19 | } 20 | 21 | whole 22 | } 23 | -------------------------------------------------------------------------------- /liblisa/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "liblisa" 3 | version = "0.1.4" 4 | edition = "2021" 5 | description = "A tool for automated discovery and analysis of the ISA of a CPU." 6 | license = "AGPL-3.0-only" 7 | documentation = "https://docs.liblisa.nl/" 8 | homepage = "https://liblisa.nl/" 9 | repository = "https://github.com/liblisa/liblisa" 10 | readme = "README.md" 11 | 12 | [features] 13 | default = [ "z3", "x64-undef" ] 14 | x64-undef = [ "dep:rusty-xed" ] 15 | 16 | [[bench]] 17 | name = "randomization" 18 | harness = false 19 | 20 | [[bench]] 21 | name = "instantiation" 22 | harness = false 23 | 24 | [[bench]] 25 | name = "utils" 26 | harness = false 27 | 28 | [[bench]] 29 | name = "bitmap" 30 | harness = false 31 | 32 | [dependencies] 33 | thiserror = "1" 34 | serde = { version = "1.0", features = [ "derive" ] } 35 | rand = "0.8" 36 | itertools = "0.12.0" 37 | log = "0.4" 38 | hex = "0.4" 39 | arrayvec = { version = "0.7", features = [ "serde" ] } 40 | hashbrown = "0.14.3" 41 | bitcode = "0.5.0" 42 | schemars = { version = "0.8.16", features = ["arrayvec07"], optional = true } 43 | rayon = "1.5" 44 | fxhash = "0.2.1" 45 | sha1 = "0.10.6" 46 | arbitrary = { version = "1", features = ["derive"] } 47 | z3 = { version = "0.12.1", git = 'https://github.com/prove-rs/z3.rs', optional = true } 48 | rustc_apfloat = "0.2.0" 49 | rusty-xed = { version = "0.1.1", optional = true } 50 | 51 | [dev-dependencies] 52 | env_logger = "0.10" 53 | test-log = "0.2.10" 54 | serde_json = "1.0" 55 | criterion = "0.5" 56 | rand_xoshiro = "0.6" 57 | memoffset = "0.9.1" 58 | clap = { version = "4.4.8", features = ["derive"] } -------------------------------------------------------------------------------- /liblisa/README.md: -------------------------------------------------------------------------------- 1 | libLISA is a library for automated discovery and analysis of CPU instructions. 2 | This crate is the core library that can be used to load and manipulate already-analyzed datasets. 3 | Several separate crates are available for enumeration, synthesis and architecture support: 4 | 5 | - [`liblisa-enc`](https://crates.io/crates/liblisa-enc) for enumeration and encoding analysis 6 | - [`liblisa-synth`](https://crates.io/crates/liblisa-synth) for synthesis 7 | - [`liblisa-x64-observer`](https://crates.io/crates/liblisa-x64-observer) for observing instruction execution on x86-64 8 | 9 | # Loading semantics from disk 10 | Encodings support serde, and can be serialized and deserialized by any library that supports serde. 11 | By default, libLISA uses JSON. 12 | You can import these semantics as follows: 13 | 14 | ```rust,ignore 15 | use std::fs::File; 16 | use std::io::BufReader; 17 | use std::path::PathBuf; 18 | 19 | use liblisa::encoding::Encoding; 20 | use liblisa::semantics::default::computation::SynthesizedComputation; 21 | use liblisa::arch::x64::X64Arch; 22 | 23 | let file = File::open("semantics.json").unwrap(); 24 | let reader = BufReader::new(file); 25 | let semantics: Vec> = 26 | serde_json::from_reader(reader).unwrap(); 27 | ``` 28 | 29 | See [`encoding::Encoding`](https://docs.liblisa.nl/liblisa/encoding/struct.Encoding) for how these semantics can be used. 30 | 31 | # Features 32 | 33 | - `z3`: adds the `z3` crate as a dependency, and enables the Z3 implementation for `smt::SmtSolver`. 34 | - `x64-undef`: enables the `arch::x64::undef` namespace, which uses the XED disassembler library to provide definitions for undefined behavior. -------------------------------------------------------------------------------- /liblisa/benches/bitmap.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::utils::bitmap::GrowingBitmap; 5 | 6 | fn bench_get(c: &mut Criterion) { 7 | let map = GrowingBitmap::new_all_ones(512); 8 | c.bench_function("GrowingBitmap::get", |b| { 9 | b.iter(|| { 10 | black_box(black_box(&map).get(211)); 11 | }) 12 | }); 13 | } 14 | 15 | fn bench_get_then_reset(c: &mut Criterion) { 16 | let mut map = GrowingBitmap::new_all_ones(512); 17 | c.bench_function("GrowingBitmap::get_then_reset", |b| { 18 | b.iter(|| { 19 | black_box(black_box(&mut map).get_then_reset(211)); 20 | }) 21 | }); 22 | } 23 | 24 | fn bench_set(c: &mut Criterion) { 25 | let mut map = GrowingBitmap::new_all_ones(512); 26 | c.bench_function("GrowingBitmap::set", |b| { 27 | b.iter(|| { 28 | black_box(black_box(&mut map).set(211)); 29 | }) 30 | }); 31 | } 32 | 33 | fn bench_reset(c: &mut Criterion) { 34 | let mut map = GrowingBitmap::new_all_ones(512); 35 | c.bench_function("GrowingBitmap::reset", |b| { 36 | b.iter(|| { 37 | black_box(black_box(&mut map).reset(211)); 38 | }) 39 | }); 40 | } 41 | 42 | criterion_group! { 43 | name = benches; 44 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 45 | targets = bench_get, bench_get_then_reset, bench_set, bench_reset 46 | } 47 | 48 | criterion_main!(benches); 49 | -------------------------------------------------------------------------------- /liblisa/benches/randomization.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::arch::fake::{AnyArea, FakeArch, FakeReg}; 5 | use liblisa::encoding::dataflows::{AccessKind, AddressComputation, Dest, Inputs, MemoryAccess, MemoryAccesses, Size, Source}; 6 | use liblisa::instr::Instruction; 7 | use liblisa::state::random::StateGen; 8 | use rand::SeedableRng; 9 | use rand_xoshiro::Xoshiro256PlusPlus; 10 | 11 | fn randomized_value(c: &mut Criterion) { 12 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(0); 13 | c.bench_function("randomized_value", |b| { 14 | b.iter(|| { 15 | black_box(liblisa::state::random::randomized_value(&mut rng)); 16 | }) 17 | }); 18 | } 19 | 20 | fn randomized_bytes_into_buffer(c: &mut Criterion) { 21 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(0); 22 | c.bench_function("randomized_bytes_into_buffer", |b| { 23 | b.iter(|| { 24 | let mut buffer = [0u8; 16]; 25 | liblisa::state::random::randomized_bytes_into_buffer(&mut rng, &mut buffer); 26 | black_box(&mut buffer); 27 | }) 28 | }); 29 | } 30 | 31 | fn randomize_new1(c: &mut Criterion) { 32 | let accesses = MemoryAccesses:: { 33 | instr: Instruction::new(&[0x00, 0x00]), 34 | memory: vec![MemoryAccess { 35 | kind: AccessKind::Executable, 36 | inputs: Inputs::sorted(vec![Source::Dest(Dest::Reg(FakeReg::R0, Size::qword()))]), 37 | size: 3..3, 38 | calculation: AddressComputation::unscaled_sum(1), 39 | alignment: 1, 40 | }], 41 | use_trap_flag: false, 42 | }; 43 | let state_gen = StateGen::new(&accesses, &AnyArea).unwrap(); 44 | 45 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(0); 46 | c.bench_function("StateGen::randomize_new::<1 memory access>", |b| { 47 | b.iter(|| { 48 | black_box(state_gen.randomize_new(&mut rng).unwrap()); 49 | }) 50 | }); 51 | } 52 | 53 | fn randomize_new2(c: &mut Criterion) { 54 | let accesses = MemoryAccesses:: { 55 | instr: Instruction::new(&[0x00, 0x00]), 56 | memory: vec![ 57 | MemoryAccess { 58 | kind: AccessKind::Executable, 59 | inputs: Inputs::sorted(vec![Source::Dest(Dest::Reg(FakeReg::R0, Size::qword()))]), 60 | size: 3..3, 61 | calculation: AddressComputation::unscaled_sum(1), 62 | alignment: 1, 63 | }, 64 | MemoryAccess { 65 | kind: AccessKind::InputOutput, 66 | inputs: Inputs::sorted(vec![Source::Dest(Dest::Reg(FakeReg::R0, Size::qword()))]), 67 | size: 3..3, 68 | calculation: AddressComputation::unscaled_sum(1).with_offset(10), 69 | alignment: 1, 70 | }, 71 | ], 72 | use_trap_flag: false, 73 | }; 74 | let state_gen = StateGen::new(&accesses, &AnyArea).unwrap(); 75 | 76 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(0); 77 | c.bench_function("StateGen::randomize_new::<2 memory access>", |b| { 78 | b.iter(|| { 79 | black_box(state_gen.randomize_new(&mut rng).unwrap()); 80 | }) 81 | }); 82 | } 83 | 84 | criterion_group! { 85 | name = benches; 86 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 87 | targets = randomized_value, randomized_bytes_into_buffer, randomize_new1, randomize_new2 88 | } 89 | criterion_main!(benches); 90 | -------------------------------------------------------------------------------- /liblisa/benches/terms.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::encoding::dataflows::AddrTerm; 5 | use rand::{Rng, SeedableRng}; 6 | use rand_xoshiro::Xoshiro256PlusPlus; 7 | 8 | fn is_valid_delta(c: &mut Criterion) { 9 | let mut rng = Xoshiro256PlusPlus::seed_from_u64(0); 10 | let ts = AddrTerm::all(); 11 | let t = ts[ts.len() - 1]; 12 | c.bench_function("is_valid_delta", |b| { 13 | b.iter(|| { 14 | black_box(t.is_valid_delta(rng.gen(), rng.gen(), rng.gen(), rng.gen())); 15 | }) 16 | }); 17 | } 18 | 19 | criterion_group! { 20 | name = benches; 21 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 22 | targets = is_valid_delta, 23 | } 24 | 25 | criterion_main!(benches); 26 | -------------------------------------------------------------------------------- /liblisa/benches/utils.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use liblisa::utils::{create_from_le_bytes, sign_extend}; 5 | 6 | fn bench_sign_extend(c: &mut Criterion) { 7 | c.bench_function("sign_extend", |b| { 8 | b.iter(|| { 9 | black_box(sign_extend(black_box(0), black_box(64))); 10 | }) 11 | }); 12 | } 13 | 14 | fn bench_u128_from_le_bytes(c: &mut Criterion) { 15 | for n in 1..=16 { 16 | let bytes = vec![0x12; n]; 17 | c.bench_function(&format!("u128_from_le_bytes({n} bytes)"), |b| { 18 | b.iter(|| { 19 | black_box(create_from_le_bytes(black_box(&bytes), |n| n as i128, |n| n as i128)); 20 | }) 21 | }); 22 | } 23 | } 24 | 25 | criterion_group! { 26 | name = benches; 27 | config = Criterion::default().measurement_time(Duration::from_secs(15)); 28 | targets = bench_sign_extend, bench_u128_from_le_bytes 29 | } 30 | 31 | criterion_main!(benches); 32 | -------------------------------------------------------------------------------- /liblisa/examples/liblisa-encoding-to-z3.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::BufReader; 3 | use std::path::PathBuf; 4 | use std::time::Duration; 5 | 6 | use clap::Parser; 7 | use liblisa::arch::x64::X64Arch; 8 | use liblisa::encoding::Encoding; 9 | use liblisa::instr::Instruction; 10 | use liblisa::semantics::default::computation::SynthesizedComputation; 11 | use liblisa::semantics::default::smtgen::{StorageLocations, Z3Model}; 12 | use liblisa::smt::z3::{Z3Solver, BV}; 13 | use liblisa::smt::SmtBV; 14 | 15 | #[derive(Clone, Debug, clap::Parser)] 16 | pub struct Args { 17 | encodings: PathBuf, 18 | instr: Instruction, 19 | 20 | #[clap(long)] 21 | instantiate: bool, 22 | } 23 | 24 | fn shortest_smt(smt: BV) -> String { 25 | let simplified_str = smt.clone().simplify().to_string(); 26 | let normal_str = smt.to_string(); 27 | if simplified_str.len() < normal_str.len() { 28 | simplified_str 29 | } else { 30 | normal_str 31 | } 32 | } 33 | 34 | pub fn main() { 35 | env_logger::init(); 36 | 37 | let args = Args::parse(); 38 | 39 | println!("Loading encodings..."); 40 | let encodings: Vec> = 41 | serde_json::from_reader(BufReader::new(File::open(args.encodings).unwrap())).unwrap(); 42 | 43 | Z3Solver::with_thread_local(Duration::from_secs(30), |mut context| { 44 | let mut storage = StorageLocations::new(&mut context); 45 | 46 | for encoding in encodings.iter() { 47 | if encoding.bitpattern_as_filter().matches(&args.instr) && encoding.filters().iter().any(|f| f.matches(&args.instr)) { 48 | println!("Generating z3 model of: {encoding}"); 49 | let encoding = if args.instantiate { 50 | let part_values = encoding.extract_parts(&args.instr).into_iter().map(Some).collect::>(); 51 | let encoding = encoding.instantiate_partially(&part_values).unwrap(); 52 | println!("Instantiated to: {encoding}"); 53 | encoding 54 | } else { 55 | encoding.clone() 56 | }; 57 | 58 | let model = Z3Model::of(&encoding, &mut storage, &mut context); 59 | let concrete = model.compute_concrete_outputs(&encoding, &mut storage, &mut context); 60 | 61 | println!(); 62 | 63 | for item in model.constraints() { 64 | println!("constraint: {item}"); 65 | } 66 | 67 | for &index in concrete.intermediate_values_needed() { 68 | let intermediate = &model.intermediate_outputs()[index]; 69 | if let Some(smt) = intermediate.smt() { 70 | println!("intermediate: {:?} = {}", intermediate.name(), shortest_smt(smt.clone())) 71 | } else { 72 | println!("intermediate: {:?} = ", intermediate.name()) 73 | } 74 | } 75 | 76 | println!(); 77 | 78 | for part_name in concrete.part_names().iter() { 79 | println!("part: {:?} = {:?}", part_name.name(), part_name.smt()) 80 | } 81 | 82 | println!(); 83 | 84 | for output in concrete.concrete_outputs().iter() { 85 | if let Some(smt) = output.smt() { 86 | println!("output: {:?} = {}", output.target(), shortest_smt(smt.clone())) 87 | } else { 88 | println!("output: {:?} = ", output.target()) 89 | } 90 | } 91 | } 92 | } 93 | }); 94 | } 95 | -------------------------------------------------------------------------------- /liblisa/src/arch/scope.rs: -------------------------------------------------------------------------------- 1 | //! Defines [`Scope`], which can be used to specify which instructions should be enumerated. 2 | 3 | use std::fmt::Debug; 4 | 5 | use serde::{Deserialize, Serialize}; 6 | 7 | /// Defines an instruction scope for an architecture. 8 | /// This can be used to limit which parts of the instruction space are enumerated. 9 | pub trait Scope: Clone + Debug + Send + Sync { 10 | /// Returns true if and only if `instr` should be enumerated. 11 | fn is_instr_in_scope(&self, instr: &[u8]) -> bool; 12 | } 13 | 14 | /// A [`Scope`] that considers every instruction in-scope. 15 | #[derive(Copy, Clone, Debug, Serialize, Deserialize)] 16 | pub struct FullScope; 17 | 18 | impl Scope for FullScope { 19 | fn is_instr_in_scope(&self, _instr: &[u8]) -> bool { 20 | true 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /liblisa/src/compare/summary.rs: -------------------------------------------------------------------------------- 1 | //! A condensed summary of the architecture comparison, suitable for export to a file. 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | use crate::arch::Arch; 6 | use crate::encoding::indexed::{EncodingId, IndexedEncodings}; 7 | use crate::instr::InstructionFilter; 8 | use crate::semantics::Computation; 9 | 10 | /// Information about an architecture. 11 | #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] 12 | pub struct ArchInfo { 13 | /// A human-readable name of the architecture. 14 | pub name: String, 15 | } 16 | 17 | /// An identifier that references an architecture. 18 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] 19 | pub struct ArchId(pub usize); 20 | 21 | /// Set of encodings shared between architectures. 22 | #[derive(Serialize, Deserialize)] 23 | pub struct SharedEncodings { 24 | /// The architectures that share the semantics of `encodings`. 25 | pub architectures: Vec, 26 | 27 | /// The encodings that have equivalent semantics on all `architectures`. 28 | pub encodings: Vec, 29 | } 30 | 31 | /// A grouping of sets of encodings that cover the exact same instruction space (when restricted to `filter`). 32 | #[derive(Serialize, Deserialize)] 33 | pub struct SharedEncodingGroup { 34 | /// The filter to which all encodings should be restricted. 35 | pub filter: InstructionFilter, 36 | 37 | /// The sets of encodings. 38 | pub encodings: Vec, 39 | } 40 | 41 | /// A collection of architectures and encoding groups that describes which encodings are semantically equivalent between different architectures. 42 | #[derive(Serialize, Deserialize)] 43 | #[serde(bound(serialize = "C: Serialize", deserialize = "C: Deserialize<'de>"))] 44 | pub struct ArchComparisonSummary { 45 | /// [`ArchInfo`] for each architecture. 46 | pub architectures: Vec, 47 | 48 | /// Groups of encodings and their semantics. 49 | pub encodings: Vec, 50 | 51 | /// An indexing of [`EncodingId`] to [`Encoding`](crate::encoding::Encoding). 52 | pub index: IndexedEncodings, 53 | } 54 | -------------------------------------------------------------------------------- /liblisa/src/encoding/dataflows/inputs.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Display}; 2 | use std::ops::{Index, IndexMut}; 3 | 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use super::{Dest, Source}; 7 | use crate::arch::Arch; 8 | 9 | /// A set of inputs to a dataflow or memory address computation. 10 | #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] 11 | #[cfg_attr( 12 | feature = "schemars", 13 | schemars(bound = "A: schemars::JsonSchema, A::Reg: schemars::JsonSchema") 14 | )] 15 | #[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] 16 | #[serde(bound = "")] 17 | pub struct Inputs { 18 | inputs: Vec>, 19 | } 20 | 21 | impl Inputs { 22 | /// Creates the inputs from the provided `inputs` without sorting. 23 | /// 24 | /// You should not sort inputs if you already have references to specific indices in the inputs. 25 | /// For example, if the inputs are part of a dataflow in an encoding. 26 | #[inline] 27 | pub fn unsorted(inputs: Vec>) -> Self { 28 | Inputs { 29 | inputs, 30 | } 31 | } 32 | 33 | /// Creates the inputs form the provided `inputs` and sorts them. 34 | /// 35 | /// You should not sort inputs if you already have references to specific indices in the inputs. 36 | /// For example, if the inputs are part of a dataflow in an encoding. 37 | #[inline] 38 | pub fn sorted(mut inputs: Vec>) -> Self { 39 | inputs.sort(); 40 | Self::unsorted(inputs) 41 | } 42 | 43 | /// Iterates over all inputs 44 | #[inline] 45 | pub fn iter(&self) -> impl Iterator> { 46 | self.inputs.iter() 47 | } 48 | 49 | /// Iterates over `&mut` references to all inputs. 50 | #[inline] 51 | pub fn iter_mut(&mut self) -> impl Iterator> { 52 | self.inputs.iter_mut() 53 | } 54 | 55 | /// Removes all inputs for which `f` returns false. 56 | #[inline] 57 | pub fn retain(&mut self, f: impl FnMut(&Source) -> bool) { 58 | self.inputs.retain(f); 59 | } 60 | 61 | /// Returns the number of inputs. 62 | #[inline] 63 | pub fn len(&self) -> usize { 64 | self.inputs.len() 65 | } 66 | 67 | /// Returns true if the set of inputs is empty. 68 | #[inline] 69 | pub fn is_empty(&self) -> bool { 70 | self.inputs.is_empty() 71 | } 72 | 73 | /// Returns true if the set of inputs contains `item`. 74 | #[inline] 75 | pub fn contains(&self, item: &Source) -> bool { 76 | self.inputs.contains(item) 77 | } 78 | 79 | /// Removes `item` from the inputs. 80 | /// If `item` does not exist in the inputs, no change occurs. 81 | #[inline] 82 | pub fn remove(&mut self, item: &Source) { 83 | self.inputs.retain(|input| input != item); 84 | } 85 | 86 | /// Removes an input by index. 87 | #[inline] 88 | pub fn remove_index(&mut self, index: usize) { 89 | self.inputs.remove(index); 90 | } 91 | 92 | /// Pushes a new input at the end. 93 | /// Does not re-sort the inputs. 94 | #[inline] 95 | pub fn push(&mut self, item: Source) { 96 | self.inputs.push(item) 97 | } 98 | } 99 | 100 | impl Index for Inputs { 101 | type Output = Source; 102 | 103 | #[inline] 104 | fn index(&self, index: usize) -> &Source { 105 | &self.inputs[index] 106 | } 107 | } 108 | 109 | impl IndexMut for Inputs { 110 | #[inline] 111 | fn index_mut(&mut self, index: usize) -> &mut Source { 112 | &mut self.inputs[index] 113 | } 114 | } 115 | 116 | impl Debug for Inputs { 117 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 118 | self.inputs.fmt(f) 119 | } 120 | } 121 | 122 | // impl FromIterator> for Inputs { 123 | // fn from_iter>>(iter: T) -> Self { 124 | // iter.into_iter().collect::>>().into() 125 | // } 126 | // } 127 | 128 | impl Display for Inputs { 129 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 130 | for (index, input) in self.inputs.iter().enumerate() { 131 | if index != 0 { 132 | write!(f, ", ")?; 133 | } 134 | 135 | write!(f, "{input}")?; 136 | } 137 | 138 | Ok(()) 139 | } 140 | } 141 | 142 | impl PartialEq<[A::GpReg]> for Inputs { 143 | #[inline] 144 | fn eq(&self, other: &[A::GpReg]) -> bool { 145 | self.inputs.len() == other.len() 146 | && self.inputs.iter().zip(other.iter()).all(|(a, &b)| match a { 147 | Source::Dest(Dest::Reg(reg, _)) => reg == &A::reg(b), 148 | _ => false, 149 | }) 150 | } 151 | } 152 | 153 | impl PartialEq<&[A::GpReg]> for Inputs { 154 | #[inline] 155 | fn eq(&self, other: &&[A::GpReg]) -> bool { 156 | self == *other 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /liblisa/src/encoding/indexed.rs: -------------------------------------------------------------------------------- 1 | //! Provides a collection type for [`crate::encoding::Encoding`]s that can be used in self-referential structures and can be serialized to disk. 2 | //! 3 | //! When building self-referential datastructures or serializing to disk, it is not possible to use references. 4 | //! The [`IndexedEncodings`] type can be used to obtain [`EncodingId`]s. 5 | //! These IDs reference encodings in the [`IndexedEncodings`] type. 6 | //! The IDs can be safely used in self-referential types and can be serialized to disk. 7 | 8 | use std::ops::Index; 9 | 10 | use serde::{Deserialize, Serialize}; 11 | 12 | use crate::arch::Arch; 13 | use crate::encoding::EncodingWithFilters; 14 | use crate::semantics::Computation; 15 | 16 | /// A reference to an encoding in an [`IndexedEncodings`] collection. 17 | #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] 18 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, bitcode::Encode, bitcode::Decode)] 19 | pub struct EncodingId(usize); 20 | 21 | impl EncodingId { 22 | /// Returns the ID as an `usize`. 23 | pub fn as_usize(&self) -> usize { 24 | self.0 25 | } 26 | 27 | /// Creates an EncodingId from an `usize`. 28 | pub fn from_usize(n: usize) -> EncodingId { 29 | Self(n) 30 | } 31 | } 32 | 33 | /// A collection of encodings which can be accessed using [`EncodingId`]s. 34 | #[derive(Serialize, Deserialize)] 35 | #[serde(bound(serialize = "C: Serialize", deserialize = "C: Deserialize<'de>"))] 36 | pub struct IndexedEncodings { 37 | encodings: Vec>, 38 | } 39 | 40 | impl Default for IndexedEncodings { 41 | fn default() -> Self { 42 | Self { 43 | encodings: Vec::new(), 44 | } 45 | } 46 | } 47 | 48 | impl IndexedEncodings { 49 | /// Creates an empty collection. 50 | pub fn new() -> Self { 51 | Self::default() 52 | } 53 | 54 | /// Adds a new encoding. 55 | /// The ID returned can be permanently used to obtain a reference to the encoding from this collection. 56 | pub fn add(&mut self, encoding: EncodingWithFilters) -> EncodingId { 57 | let id = EncodingId(self.encodings.len()); 58 | self.encodings.push(encoding); 59 | id 60 | } 61 | 62 | /// Returns the number of encodings in the collection. 63 | #[must_use] 64 | pub fn len(&self) -> usize { 65 | self.encodings.len() 66 | } 67 | 68 | /// Returns true if the collection contains no encodings. 69 | #[must_use] 70 | pub fn is_empty(&self) -> bool { 71 | self.len() == 0 72 | } 73 | 74 | /// Iterates over all encodings in the collection. 75 | pub fn all(&self) -> impl Iterator> { 76 | self.encodings.iter() 77 | } 78 | } 79 | 80 | impl Index for IndexedEncodings { 81 | type Output = EncodingWithFilters; 82 | 83 | fn index(&self, index: EncodingId) -> &Self::Output { 84 | &self.encodings[index.0] 85 | } 86 | } 87 | 88 | impl<'a, A: Arch, C: Computation> Index<&'a EncodingId> for IndexedEncodings { 89 | type Output = EncodingWithFilters; 90 | 91 | fn index(&self, index: &'a EncodingId) -> &Self::Output { 92 | &self[*index] 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /liblisa/src/encoding/merge/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod semantical; 2 | pub mod structural; 3 | 4 | pub use semantical::merge_encodings_semantically; 5 | pub use structural::merge_encodings_structurally; 6 | -------------------------------------------------------------------------------- /liblisa/src/instr/map.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use super::InstructionFilter; 4 | use crate::instr::Instruction; 5 | 6 | #[derive(Clone, Debug)] 7 | struct FilterGroup { 8 | filters: [Vec<(InstructionFilter, T)>; 256], 9 | } 10 | 11 | impl Serialize for FilterGroup { 12 | fn serialize(&self, serializer: S) -> Result 13 | where 14 | S: serde::Serializer, 15 | { 16 | self.filters.as_slice().serialize(serializer) 17 | } 18 | } 19 | 20 | impl<'de, T: Clone + Deserialize<'de>> Deserialize<'de> for FilterGroup { 21 | fn deserialize(deserializer: D) -> Result 22 | where 23 | D: serde::Deserializer<'de>, 24 | { 25 | let v = Vec::>::deserialize(deserializer)?; 26 | Ok(FilterGroup { 27 | filters: match v.try_into() { 28 | Ok(arr) => arr, 29 | Err(_) => panic!("Deserializing a filtergroup with wrong number of entries"), 30 | }, 31 | }) 32 | } 33 | } 34 | 35 | impl FilterGroup { 36 | pub fn new() -> FilterGroup { 37 | FilterGroup { 38 | filters: vec![Vec::new(); 256].try_into().unwrap(), 39 | } 40 | } 41 | } 42 | 43 | /// A map of filters, with faster lookup. 44 | #[derive(Clone, Serialize, Deserialize)] 45 | pub struct FilterMap { 46 | filters: [FilterGroup; 16], 47 | } 48 | 49 | impl FilterMap { 50 | /// Returns an empty [`FilterMap`]. 51 | pub fn new() -> FilterMap { 52 | FilterMap { 53 | filters: vec![FilterGroup::new(); 16].try_into().unwrap(), 54 | } 55 | } 56 | 57 | /// Inserts a new entry into the filter map. 58 | /// Does not overwrite the data of existing filters. 59 | pub fn add(&mut self, filter: InstructionFilter, data: T) { 60 | let len = filter.len(); 61 | let b = &filter.data[0]; 62 | if let Some(index) = b.as_value() { 63 | self.filters[len].filters[index as usize].push((filter, data)); 64 | } else { 65 | for index in 0..256 { 66 | if b.matches(index as u8) { 67 | self.filters[len].filters[index].push((filter.clone(), data.clone())); 68 | } 69 | } 70 | } 71 | } 72 | 73 | /// Finds one filter that matches `instruction`, and returns the data associated with this filter. 74 | pub fn filters(&self, instruction: &Instruction) -> Option<&T> { 75 | for (filter, data) in self.filters[instruction.byte_len()].filters[instruction.bytes()[0] as usize].iter() { 76 | if filter.matches(instruction) { 77 | return Some(data); 78 | } 79 | } 80 | 81 | None 82 | } 83 | } 84 | 85 | impl Default for FilterMap { 86 | fn default() -> Self { 87 | Self::new() 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /liblisa/src/instr/set.rs: -------------------------------------------------------------------------------- 1 | use log::info; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | use super::InstructionFilter; 5 | use crate::instr::Instruction; 6 | 7 | /// A list of filters. 8 | #[derive(Clone, Debug, Serialize, Deserialize)] 9 | pub struct FilterList { 10 | filters: Vec, 11 | } 12 | 13 | impl FilterList { 14 | /// Creates an empty list of filters. 15 | pub fn new() -> Self { 16 | FilterList { 17 | filters: Vec::new(), 18 | } 19 | } 20 | 21 | /// Returns the number of filters in the list. 22 | pub fn len(&self) -> usize { 23 | self.filters.len() 24 | } 25 | 26 | /// Returns true if the list is empty 27 | pub fn is_empty(&self) -> bool { 28 | self.filters.is_empty() 29 | } 30 | 31 | /// Adds a filter to the list, and attempts to merge this filter with existing filters. 32 | pub fn add(&mut self, mut filter: InstructionFilter) { 33 | #[cfg(test)] 34 | println!("New filter: {filter:?}"); 35 | if let Some(index) = self.filters.iter().position(|f| f.covers(&filter) || f == &filter) { 36 | // The filter is not necessary, because we already have another filter that filters the same instructions. 37 | info!( 38 | "Filter {:?} ignored, because it is redundant with {:?}", 39 | filter, self.filters[index] 40 | ); 41 | } else { 42 | info!("Adding filter: {:?}", filter); 43 | for f in self.filters.iter_mut() { 44 | if let Some(new) = f.try_merge(&filter) { 45 | if new.covers(f) { 46 | *f = new; 47 | } 48 | } 49 | } 50 | 51 | for f in self.filters.iter() { 52 | if let Some(new) = filter.try_merge(f) { 53 | if new.covers(&filter) { 54 | filter = new; 55 | } 56 | } 57 | } 58 | 59 | self.filters.retain(|f| !filter.covers(f)); 60 | self.filters.push(filter); 61 | } 62 | } 63 | 64 | /// Adds a new filter to the list, but does not attempt to merge this filter with existing filters. 65 | pub fn add_nomerge(&mut self, filter: InstructionFilter) { 66 | if !self.filters.iter().any(|f| f.covers(&filter)) { 67 | self.filters.push(filter); 68 | } 69 | } 70 | 71 | /// Returns true if there is at least one filter that partially matches `instr`. 72 | /// This means that all instructions starting with `instr` must have at least one more byte. 73 | pub fn should_extend(&self, instr: &Instruction) -> bool { 74 | self.filters 75 | .iter() 76 | .any(|f| instr.byte_len() < f.len() && f.matches_smaller_instr_partially(instr)) 77 | } 78 | 79 | /// Returns the filter that matches `instr` if it exists. 80 | /// Otherwise, returns `None`. 81 | pub fn matching_filter(&self, instr: &Instruction) -> Option<&InstructionFilter> { 82 | self.filters 83 | .iter() 84 | .rev() 85 | .find(|filter| filter.matches(instr) && filter.data.iter().skip(instr.byte_len()).all(|bf| bf.mask == 0)) 86 | } 87 | 88 | /// Returns the (lexicographically) next instruction that matches one of the filters in the list. 89 | pub fn next_matching_instruction(&self, instr: &Instruction) -> Option { 90 | let mut next = None; 91 | for filter in self.filters.iter() { 92 | if let Some(nmi) = filter.next_matching_instruction(instr) { 93 | if next.map(|next| nmi < next).unwrap_or(true) { 94 | next = Some(nmi); 95 | } 96 | } 97 | } 98 | 99 | next 100 | } 101 | } 102 | 103 | impl Default for FilterList { 104 | fn default() -> Self { 105 | Self::new() 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /liblisa/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(incomplete_features)] 2 | #![deny(rustdoc::missing_crate_level_docs, rustdoc::invalid_codeblock_attributes)] 3 | #![warn(missing_docs)] 4 | #![feature(let_chains)] 5 | #![feature(generic_const_exprs)] 6 | #![feature(const_size_of_val)] 7 | #![doc(html_no_source)] 8 | #![doc = include_str!("../README.md")] 9 | 10 | pub mod arch; 11 | pub mod compare; 12 | pub mod encoding; 13 | pub mod instr; 14 | pub mod oracle; 15 | pub mod semantics; 16 | pub mod smt; 17 | pub mod state; 18 | pub mod utils; 19 | pub mod value; 20 | 21 | pub use instr::Instruction; 22 | -------------------------------------------------------------------------------- /liblisa/src/oracle/careful.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use super::{FallbackBatchObserveIter, Observation, Oracle, OracleError}; 4 | use crate::arch::Arch; 5 | use crate::state::{Addr, AsSystemState, SystemState}; 6 | 7 | /// An oracle that always executes [`Oracle::observe_carefully`]. 8 | pub struct CarefulOracle<'o, A: Arch, O: Oracle> { 9 | oracle: &'o mut O, 10 | _phantom: PhantomData, 11 | } 12 | 13 | impl<'o, A: Arch, O: Oracle> Oracle for CarefulOracle<'o, A, O> { 14 | type MappableArea = O::MappableArea; 15 | 16 | fn mappable_area(&self) -> Self::MappableArea { 17 | self.oracle.mappable_area() 18 | } 19 | 20 | fn page_size(&mut self) -> u64 { 21 | self.oracle.page_size() 22 | } 23 | 24 | fn observe(&mut self, before: &SystemState) -> Result, OracleError> { 25 | match self.oracle.observe(before) { 26 | Ok(_) => self.observe_carefully(before), 27 | other => other, 28 | } 29 | } 30 | 31 | fn debug_dump(&mut self) { 32 | self.oracle.debug_dump() 33 | } 34 | 35 | fn observe_carefully(&mut self, before: &SystemState) -> Result, OracleError> { 36 | self.oracle.observe_carefully(before) 37 | } 38 | 39 | fn scan_memory_accesses(&mut self, before: &SystemState) -> Result, OracleError> { 40 | self.oracle.scan_memory_accesses(before) 41 | } 42 | 43 | fn restart(&mut self) { 44 | self.oracle.restart() 45 | } 46 | 47 | fn kill(self) { 48 | todo!() 49 | } 50 | 51 | fn batch_observe_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 52 | &'a mut self, states: I, 53 | ) -> impl Iterator> { 54 | FallbackBatchObserveIter::new(self, states.into_iter()) 55 | } 56 | 57 | fn batch_observe_gpreg_only_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 58 | &'a mut self, states: I, 59 | ) -> impl Iterator> { 60 | self.batch_observe_iter(states) 61 | } 62 | 63 | const UNRELIABLE_INSTRUCTION_FETCH_ERRORS: bool = O::UNRELIABLE_INSTRUCTION_FETCH_ERRORS; 64 | } 65 | 66 | impl<'a, A: Arch, O: Oracle> CarefulOracle<'a, A, O> { 67 | /// Wraps `oracle` in a [`CarefulOracle`]. 68 | pub fn new(oracle: &'a mut O) -> Self { 69 | CarefulOracle { 70 | oracle, 71 | _phantom: Default::default(), 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /liblisa/src/oracle/counter.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::marker::PhantomData; 3 | 4 | use super::{Observation, Oracle, OracleError}; 5 | use crate::arch::Arch; 6 | use crate::state::{Addr, AsSystemState, SystemState}; 7 | 8 | /// An oracle that counts how many times it was invoked. 9 | pub struct InvocationCountingOracle> { 10 | oracle: O, 11 | observations: usize, 12 | _phantom: PhantomData, 13 | } 14 | 15 | impl> Oracle for InvocationCountingOracle { 16 | type MappableArea = O::MappableArea; 17 | 18 | fn mappable_area(&self) -> Self::MappableArea { 19 | self.oracle.mappable_area() 20 | } 21 | 22 | fn page_size(&mut self) -> u64 { 23 | self.oracle.page_size() 24 | } 25 | 26 | fn observe(&mut self, before: &SystemState) -> Result, OracleError> { 27 | self.observations += 1; 28 | self.oracle.observe(before) 29 | } 30 | 31 | fn debug_dump(&mut self) { 32 | self.oracle.debug_dump() 33 | } 34 | 35 | fn scan_memory_accesses(&mut self, before: &SystemState) -> Result, OracleError> { 36 | self.observations += 1; 37 | self.oracle.scan_memory_accesses(before) 38 | } 39 | 40 | fn restart(&mut self) { 41 | self.oracle.restart() 42 | } 43 | 44 | fn kill(self) { 45 | self.oracle.kill() 46 | } 47 | 48 | fn batch_observe_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 49 | &'a mut self, states: I, 50 | ) -> impl Iterator> { 51 | InvocationCounterIter { 52 | iter: self.oracle.batch_observe_iter(states), 53 | num_invocations: &mut self.observations, 54 | _phantom: PhantomData, 55 | } 56 | } 57 | 58 | fn batch_observe_gpreg_only_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 59 | &'a mut self, states: I, 60 | ) -> impl Iterator> { 61 | InvocationCounterIter { 62 | iter: self.oracle.batch_observe_gpreg_only_iter(states), 63 | num_invocations: &mut self.observations, 64 | _phantom: PhantomData, 65 | } 66 | } 67 | 68 | const UNRELIABLE_INSTRUCTION_FETCH_ERRORS: bool = O::UNRELIABLE_INSTRUCTION_FETCH_ERRORS; 69 | } 70 | 71 | struct InvocationCounterIter<'a, A: Arch, S, I: Iterator>> { 72 | iter: I, 73 | num_invocations: &'a mut usize, 74 | _phantom: PhantomData, 75 | } 76 | 77 | impl>> Iterator for InvocationCounterIter<'_, A, S, I> { 78 | type Item = Observation; 79 | 80 | fn next(&mut self) -> Option { 81 | match self.iter.next() { 82 | Some(item) => { 83 | *self.num_invocations += 1; 84 | Some(item) 85 | }, 86 | None => None, 87 | } 88 | } 89 | } 90 | 91 | impl> Debug for InvocationCountingOracle { 92 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 93 | f.debug_struct("InvocationCountingOracle") 94 | .field("observations", &self.observations) 95 | .finish() 96 | } 97 | } 98 | 99 | impl> InvocationCountingOracle { 100 | /// Wraps `oracle` in an invocation-counting oracle. 101 | pub fn new(oracle: O) -> Self { 102 | InvocationCountingOracle { 103 | oracle, 104 | observations: 0, 105 | _phantom: Default::default(), 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /liblisa/src/oracle/iter.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use super::{Observation, Oracle}; 4 | use crate::arch::Arch; 5 | use crate::state::AsSystemState; 6 | 7 | pub(crate) struct FallbackBatchObserveIter<'a, A: Arch, O: Oracle, S: AsSystemState, I: Iterator> { 8 | oracle: &'a mut O, 9 | iter: I, 10 | _phantom: PhantomData, 11 | } 12 | 13 | impl<'a, A: Arch, O: Oracle, S: AsSystemState, I: Iterator> FallbackBatchObserveIter<'a, A, O, S, I> { 14 | pub fn new(oracle: &'a mut O, iter: I) -> Self { 15 | FallbackBatchObserveIter { 16 | oracle, 17 | iter, 18 | _phantom: PhantomData, 19 | } 20 | } 21 | } 22 | 23 | impl, S: AsSystemState, I: Iterator> Iterator for FallbackBatchObserveIter<'_, A, O, S, I> { 24 | type Item = Observation; 25 | 26 | fn next(&mut self) -> Option { 27 | self.iter.next().map(|item| { 28 | let result = self.oracle.observe(item.as_system_state().as_ref()); 29 | (item, result) 30 | }) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /liblisa/src/oracle/verifier.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use super::Observation; 4 | use crate::arch::Arch; 5 | use crate::oracle::{FallbackBatchObserveIter, MappableArea, Oracle, OracleError}; 6 | use crate::state::{Addr, AsSystemState, SystemState}; 7 | 8 | /// An oracle that observes execution on two oracles, and panics if the results are not identical. 9 | pub struct VerifyOracle, O2: Oracle>(O1, O2, PhantomData); 10 | 11 | impl, O2: Oracle> VerifyOracle { 12 | /// Creates a new [`VerifyOracle`], which verifies the observations of `o1` against the observations of `o2`. 13 | pub fn new(o1: O1, o2: O2) -> VerifyOracle { 14 | VerifyOracle(o1, o2, PhantomData) 15 | } 16 | } 17 | 18 | #[derive(Clone, Debug)] 19 | pub struct DoubleCheckedMappableArea(A, B); 20 | 21 | impl MappableArea for DoubleCheckedMappableArea { 22 | fn can_map(&self, addr: Addr) -> bool { 23 | self.0.can_map(addr) && self.1.can_map(addr) 24 | } 25 | } 26 | 27 | impl, O2: Oracle> Oracle for VerifyOracle { 28 | type MappableArea = DoubleCheckedMappableArea; 29 | const UNRELIABLE_INSTRUCTION_FETCH_ERRORS: bool = 30 | O1::UNRELIABLE_INSTRUCTION_FETCH_ERRORS || O2::UNRELIABLE_INSTRUCTION_FETCH_ERRORS; 31 | 32 | fn mappable_area(&self) -> Self::MappableArea { 33 | DoubleCheckedMappableArea(self.0.mappable_area(), self.1.mappable_area()) 34 | } 35 | 36 | fn page_size(&mut self) -> u64 { 37 | assert_eq!(self.0.page_size(), self.1.page_size()); 38 | self.0.page_size() 39 | } 40 | 41 | fn observe(&mut self, before: &SystemState) -> Result, OracleError> { 42 | use OracleError::*; 43 | let r1 = self.0.observe(before); 44 | let r2 = self.1.observe(before); 45 | 46 | assert!( 47 | match (&r1, &r2) { 48 | (Ok(a), Ok(b)) if a == b => true, 49 | (Err(a), Err(b)) => match (a, b) { 50 | (MemoryAccess(a), MemoryAccess(b)) if a == b => true, 51 | (InvalidInstruction, InvalidInstruction) => true, 52 | (GeneralFault, GeneralFault) => true, 53 | (ComputationError, ComputationError) => true, 54 | _ => false, 55 | }, 56 | _ => { 57 | self.debug_dump(); 58 | 59 | for _ in 0..1000 { 60 | let rprime1 = self.0.observe(before); 61 | let rprime2 = self.1.observe(before); 62 | 63 | println!( 64 | "Repeating yields: equal={} for first / equal={} for second", 65 | rprime1.as_ref().unwrap() == r1.as_ref().unwrap(), 66 | rprime2.as_ref().unwrap() == r2.as_ref().unwrap() 67 | ); 68 | } 69 | 70 | false 71 | }, 72 | }, 73 | "Observations don't match: {before:X?} results in {r1:X?} vs {r2:X?}" 74 | ); 75 | 76 | r1 77 | } 78 | 79 | fn scan_memory_accesses(&mut self, before: &SystemState) -> Result, OracleError> { 80 | let r1 = self.0.scan_memory_accesses(before)?; 81 | let r2 = self.1.scan_memory_accesses(before)?; 82 | 83 | assert_eq!(r1, r2); 84 | Ok(r1) 85 | } 86 | 87 | fn debug_dump(&mut self) { 88 | println!("First:"); 89 | self.0.debug_dump(); 90 | 91 | println!(); 92 | println!("Second:"); 93 | self.1.debug_dump(); 94 | } 95 | 96 | fn restart(&mut self) { 97 | self.0.restart(); 98 | self.1.restart(); 99 | } 100 | 101 | fn kill(self) { 102 | self.0.kill(); 103 | self.1.kill(); 104 | } 105 | 106 | fn batch_observe_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 107 | &'a mut self, states: I, 108 | ) -> impl Iterator> { 109 | FallbackBatchObserveIter::new(self, states.into_iter()) 110 | } 111 | 112 | fn batch_observe_gpreg_only_iter<'a, S: AsSystemState + 'a, I: IntoIterator + 'a>( 113 | &'a mut self, states: I, 114 | ) -> impl Iterator> { 115 | self.batch_observe_iter(states) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /liblisa/src/semantics/default/codegen/sexpr.rs: -------------------------------------------------------------------------------- 1 | //! S-expression code generator. 2 | 3 | use itertools::Itertools; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use super::{CodeGenerator, Term}; 7 | 8 | /// An S-expression representation for the [`SExprCodeGen`] code generator. 9 | #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] 10 | #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] 11 | pub enum SExpr { 12 | /// A constant value. 13 | Const { 14 | /// A string representation of the constant. 15 | /// Is printed without any escaping. 16 | data: String, 17 | }, 18 | 19 | /// An input 20 | Input { 21 | /// The index of the input. 22 | index: usize, 23 | }, 24 | 25 | /// Applies an operation to the arguments. 26 | /// 27 | /// Generates a string of the form `"(op arg0 arg1 arg2 ...)"` 28 | App { 29 | /// The name of the operation. 30 | /// Is printed without any escaping. 31 | op: String, 32 | 33 | /// The arguments of the operation. 34 | args: Vec, 35 | }, 36 | } 37 | 38 | impl std::fmt::Display for SExpr { 39 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 40 | match self { 41 | SExpr::Const { 42 | data, 43 | } => write!(f, "{data}"), 44 | SExpr::Input { 45 | index, 46 | } => write!(f, "arg{index}"), 47 | SExpr::App { 48 | op, 49 | args, 50 | } => write!(f, "({op} {})", args.iter().join(" ")), 51 | } 52 | } 53 | } 54 | 55 | /// A code generator that generates S-expressions. 56 | pub struct SExprCodeGen; 57 | 58 | impl SExprCodeGen { 59 | /// Creates a new [`SExprCodeGen`]. 60 | pub fn new() -> Self { 61 | Self 62 | } 63 | 64 | fn f(name: &str, args: &[SExpr]) -> SExpr { 65 | SExpr::App { 66 | op: name.to_string(), 67 | args: args.to_vec(), 68 | } 69 | } 70 | } 71 | 72 | impl Default for SExprCodeGen { 73 | fn default() -> Self { 74 | SExprCodeGen 75 | } 76 | } 77 | 78 | impl CodeGenerator for SExprCodeGen { 79 | type T = SExpr; 80 | 81 | fn leaf_const(&mut self, value: i128) -> Self::T { 82 | SExpr::Const { 83 | data: format!("#x{:032X}", value as u128), 84 | } 85 | } 86 | 87 | fn leaf_arg(&mut self, arg_index: usize) -> Term { 88 | Term::simple(SExpr::Input { 89 | index: arg_index, 90 | }) 91 | } 92 | 93 | fn unknown_op_any(&mut self, name: &str, args: &[Self::T]) -> Self::T { 94 | Self::f(name, args) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /liblisa/src/smt/mod.rs: -------------------------------------------------------------------------------- 1 | //! A generic interface for SMT solvers. 2 | //! 3 | //! We do not want libLISA to depend on a specific SMT solver. 4 | //! This pulls in unwanted C dependencies that make the library less portable. 5 | //! 6 | //! The generic [`SmtSolver`] trait abstracts over the SMT solver implementation. 7 | //! This allows us to specify operations that use an SMT solver, 8 | //! but leave the actual implementation up to a separate crate (`liblisa-z3`) that can be imported when necessary. 9 | 10 | mod cache; 11 | mod solver; 12 | mod tree; 13 | 14 | #[cfg(feature = "z3")] 15 | pub mod z3; 16 | 17 | use std::collections::HashMap; 18 | use std::fs::File; 19 | use std::io::{BufReader, Read, Seek, SeekFrom, Write}; 20 | use std::path::Path; 21 | 22 | pub use cache::*; 23 | pub use solver::*; 24 | 25 | /// A [`SolverCache`] that persists its cache to disk. 26 | pub struct FileCache { 27 | map: HashMap, 28 | backing: File, 29 | } 30 | 31 | impl FileCache { 32 | /// Loads or creates a new cache at the provided path. 33 | /// Truncates the file if the end does not contain valid cache data. 34 | /// This ensures the cache can still be used (partially) if a crash occurs during a write to the cache. 35 | pub fn new(path: &Path) -> Self { 36 | let mut map = HashMap::new(); 37 | let mut pos = 0; 38 | match File::open(path) { 39 | Ok(mut file) => { 40 | let size = { 41 | let len = file.seek(SeekFrom::End(0)).unwrap(); 42 | file.seek(SeekFrom::Start(0)).unwrap(); 43 | len 44 | }; 45 | let mut r = BufReader::new(file); 46 | let mut entry = [0u8; 21]; 47 | loop { 48 | match r.read_exact(&mut entry) { 49 | Ok(_) => { 50 | let result = match entry[20] { 51 | 0 => CacheResult::Unsat, 52 | 1 => CacheResult::Unknown, 53 | 2 => CacheResult::Sat, 54 | _ => unreachable!(), 55 | }; 56 | 57 | map.insert(AssertionHash::from_bytes(entry[..20].try_into().unwrap()), result); 58 | pos += 21; 59 | }, 60 | Err(_) => { 61 | if pos != size { 62 | eprintln!("Truncating {} bytes in solver cache", size - pos); 63 | r.into_inner().set_len(pos).unwrap(); 64 | } 65 | 66 | break 67 | }, 68 | } 69 | } 70 | }, 71 | Err(e) => eprintln!("Unable to open {path:?}: {e}"), 72 | } 73 | 74 | FileCache { 75 | map, 76 | backing: File::options().append(true).create(true).open(path).unwrap(), 77 | } 78 | } 79 | } 80 | 81 | impl SolverCache for FileCache { 82 | fn get(&mut self, hash: &AssertionHash) -> Option { 83 | self.map.get(hash).cloned() 84 | } 85 | 86 | fn insert(&mut self, hash: AssertionHash, result: CacheResult) { 87 | let mut data = [0u8; 21]; 88 | data[..20].copy_from_slice(hash.as_bytes()); 89 | data[20] = match result { 90 | CacheResult::Unsat => 0, 91 | CacheResult::Unknown => 1, 92 | CacheResult::Sat => 2, 93 | }; 94 | 95 | self.map.insert(hash, result); 96 | 97 | self.backing.write_all(&data).unwrap(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /liblisa/src/state/jit/mod.rs: -------------------------------------------------------------------------------- 1 | //! Efficient just-in-time generation of CPU states. 2 | //! 3 | //! Cloning and manipulating CPU states can be slow, especially if they do not fit in CPU cache. 4 | //! Sometimes it is more efficient to generate a state on-the-fly by modifying an existing state, rather than cloning the state and keeping multiple copies in memory. 5 | //! This module defines various ways to generate states just-in-time from a base state. 6 | 7 | use crate::arch::Arch; 8 | use crate::state::{AsSystemState, SystemState}; 9 | 10 | mod complex; 11 | mod gpreg; 12 | mod simple; 13 | 14 | pub use complex::*; 15 | pub use gpreg::*; 16 | pub use simple::*; 17 | 18 | /// A wrapper that implements [`AsSystemState`] and can either contain a [`SimpleJitState`], [`ComplexJitState`] or a normal [`SystemState`]. 19 | #[derive(Clone)] 20 | pub enum MaybeJitState<'j, A: Arch> { 21 | /// Simple JIT state. 22 | SimpleJit(SimpleJitState<'j, A>), 23 | 24 | /// Complex JIT state. 25 | ComplexJit(ComplexJitState<'j, A>), 26 | 27 | /// Normal [`SystemState`]. 28 | Normal(SystemState), 29 | } 30 | 31 | impl std::fmt::Debug for MaybeJitState<'_, A> { 32 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 33 | std::fmt::Debug::fmt(self.as_system_state().as_ref(), f) 34 | } 35 | } 36 | 37 | impl From> for MaybeJitState<'_, A> { 38 | fn from(value: SystemState) -> Self { 39 | Self::Normal(value) 40 | } 41 | } 42 | 43 | impl<'j, A: Arch> From> for MaybeJitState<'j, A> { 44 | fn from(value: SimpleJitState<'j, A>) -> Self { 45 | Self::SimpleJit(value) 46 | } 47 | } 48 | 49 | impl<'j, A: Arch> From> for MaybeJitState<'j, A> { 50 | fn from(value: ComplexJitState<'j, A>) -> Self { 51 | Self::ComplexJit(value) 52 | } 53 | } 54 | 55 | impl AsSystemState for MaybeJitState<'_, A> { 56 | type Output<'a> 57 | = MaybeRef<'a, A> 58 | where 59 | Self: 'a; 60 | 61 | fn as_system_state(&self) -> Self::Output<'_> { 62 | match self { 63 | MaybeJitState::SimpleJit(jit) => MaybeRef::Ref(jit.as_system_state()), 64 | MaybeJitState::ComplexJit(jit) => MaybeRef::ComplexRef(jit.as_system_state()), 65 | MaybeJitState::Normal(state) => MaybeRef::Normal(state), 66 | } 67 | } 68 | 69 | fn num_memory_mappings(&self) -> usize { 70 | match self { 71 | MaybeJitState::SimpleJit(v) => v.num_memory_mappings(), 72 | MaybeJitState::ComplexJit(v) => v.num_memory_mappings(), 73 | MaybeJitState::Normal(v) => v.num_memory_mappings(), 74 | } 75 | } 76 | } 77 | 78 | impl AsSystemState for &'_ MaybeJitState<'_, A> { 79 | type Output<'a> 80 | = MaybeRef<'a, A> 81 | where 82 | Self: 'a; 83 | 84 | fn as_system_state(&self) -> Self::Output<'_> { 85 | match self { 86 | MaybeJitState::SimpleJit(jit) => MaybeRef::Ref(jit.as_system_state()), 87 | MaybeJitState::ComplexJit(jit) => MaybeRef::ComplexRef(jit.as_system_state()), 88 | MaybeJitState::Normal(state) => MaybeRef::Normal(state), 89 | } 90 | } 91 | 92 | fn num_memory_mappings(&self) -> usize { 93 | match self { 94 | MaybeJitState::SimpleJit(v) => v.num_memory_mappings(), 95 | MaybeJitState::ComplexJit(v) => v.num_memory_mappings(), 96 | MaybeJitState::Normal(v) => v.num_memory_mappings(), 97 | } 98 | } 99 | } 100 | 101 | /// A reference to a [`MaybeJitState`]. 102 | pub enum MaybeRef<'a, A: Arch> { 103 | /// Reference of [`MaybeJitState::SimpleJit`] 104 | Ref(SimpleStateRef<'a, A>), 105 | 106 | /// Reference of [`MaybeJitState::ComplexJit`] 107 | ComplexRef(ComplexStateRef<'a, A>), 108 | 109 | /// Reference of [`MaybeJitState::Normal`] 110 | Normal(&'a SystemState), 111 | } 112 | 113 | impl AsRef> for MaybeRef<'_, A> { 114 | fn as_ref(&self) -> &SystemState { 115 | match self { 116 | MaybeRef::Ref(r) => r.as_ref(), 117 | MaybeRef::ComplexRef(r) => r.as_ref(), 118 | MaybeRef::Normal(v) => v, 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /liblisa/src/state/locs.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | use crate::arch::{Arch, Register}; 6 | use crate::value::ValueType; 7 | 8 | /// The kind of a storage location. 9 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 10 | pub enum LocationKind { 11 | /// A register. 12 | Reg, 13 | 14 | /// An accessed memory area. 15 | Memory, 16 | } 17 | 18 | /// A storage location in a CPU state. 19 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] 20 | pub enum Location { 21 | /// A register 22 | Reg(A::Reg), 23 | 24 | /// The nth memory access. 25 | Memory(usize), 26 | } 27 | 28 | impl Location { 29 | /// Returns the type of the location. 30 | pub fn kind(&self) -> LocationKind { 31 | match self { 32 | Location::Reg(_) => LocationKind::Reg, 33 | Location::Memory(_) => LocationKind::Memory, 34 | } 35 | } 36 | 37 | /// Returns true if this location has the same [`ValueType`] as `other`. 38 | pub fn matches_value_type_with(&self, other: &Location) -> bool { 39 | match (self, other) { 40 | (Location::Reg(a), Location::Reg(b)) => a.reg_type() == b.reg_type(), 41 | (Location::Reg(r), Location::Memory(_)) => matches!(r.reg_type(), ValueType::Bytes(_)), 42 | (Location::Memory(_), Location::Reg(r)) => matches!(r.reg_type(), ValueType::Bytes(_)), 43 | (Location::Memory(_), Location::Memory(_)) => true, 44 | } 45 | } 46 | 47 | /// Returns true if the location is a flags register.1 48 | pub fn is_flags(&self) -> bool { 49 | if let Location::Reg(reg) = self { 50 | reg.is_flags() 51 | } else { 52 | false 53 | } 54 | } 55 | } 56 | 57 | impl Debug for Location { 58 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 59 | match self { 60 | Location::Reg(reg) => write!(f, "Reg[{reg}]")?, 61 | Location::Memory(index) => write!(f, "Memory[#{index}]")?, 62 | } 63 | 64 | Ok(()) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /liblisa/src/state/split_dests.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use log::trace; 4 | 5 | use crate::arch::Arch; 6 | use crate::encoding::dataflows::{Dest, IntoDestWithSize, Size}; 7 | use crate::state::Location; 8 | use crate::utils::bitmap::GrowingBitmap; 9 | 10 | /// Splits dests into smaller, non-overlapping chunks. 11 | /// 12 | /// # Example 13 | /// ```rust 14 | /// use liblisa::arch::x64::{GpReg, X64Arch, X64Reg}; 15 | /// use liblisa::encoding::dataflows::{Dest, Size}; 16 | /// use liblisa::state::SplitDests; 17 | /// const Rax: X64Reg = X64Reg::GpReg(GpReg::Rax); 18 | /// 19 | /// let mut split = SplitDests::::new(); 20 | /// split.split(Dest::Reg(Rax, Size::new(0, 3))); 21 | /// split.split(Dest::Reg(Rax, Size::new(2, 5))); 22 | /// 23 | /// assert_eq!( 24 | /// split 25 | /// .get(Dest::Reg(Rax, Size::new(2, 5))) 26 | /// .collect::>(), 27 | /// vec![ 28 | /// Dest::Reg(Rax, Size::new(2, 3)), 29 | /// Dest::Reg(Rax, Size::new(4, 5)), 30 | /// ] 31 | /// ) 32 | /// ``` 33 | #[derive(Clone, Debug, Default)] 34 | pub struct SplitDests { 35 | outputs: HashMap, Vec>, 36 | } 37 | 38 | impl SplitDests { 39 | /// Creates an empty [`SplitDests`]. 40 | pub fn new() -> Self { 41 | Self::default() 42 | } 43 | 44 | /// Splits the location in `dest` into non-overlapping chunks. 45 | /// If the location has not been inserted before, the entire size is added as one chunk. 46 | /// If the location has been inserted before, the sizes are split such that only subsets 47 | /// of one or more of the previously inserted [`Dest`]s are included. 48 | pub fn split(&mut self, dest: Dest) { 49 | let location = Location::from(dest); 50 | let size = dest.size(); 51 | let existing_sizes = self.outputs.remove(&location).unwrap_or_default(); 52 | 53 | trace!("Splitting {dest:?} with existing sizes {existing_sizes:?}"); 54 | 55 | let mut split_sizes = Vec::new(); 56 | let mut covered = GrowingBitmap::new_all_zeros(size.end_byte + 1); 57 | for existing_size in existing_sizes { 58 | covered.set_range(existing_size.start_byte..existing_size.end_byte + 1); 59 | trace!("Checking {existing_size:?} vs {size:?}"); 60 | if let Some((before, overlapping, after)) = size.split_by_overlap(existing_size) { 61 | trace!("Split into: {before:?} {overlapping:?} {after:?}"); 62 | split_sizes.push(overlapping); 63 | 64 | for item in [before, after].into_iter().flatten() { 65 | if existing_size.contains(&item) { 66 | split_sizes.push(item); 67 | } 68 | } 69 | } else { 70 | split_sizes.push(existing_size); 71 | } 72 | } 73 | 74 | trace!("Covered: {covered:?}"); 75 | let mut index = size.start_byte; 76 | while index <= size.end_byte { 77 | if !covered[index] { 78 | let num = covered 79 | .iter() 80 | .skip(index) 81 | .take(size.end_byte + 1 - index) 82 | .take_while(|&b| !b) 83 | .count(); 84 | 85 | let uncovered_size = Size::new(index, index + num - 1); 86 | trace!("Adding uncovered {uncovered_size:?}"); 87 | split_sizes.push(uncovered_size); 88 | index += num; 89 | } else { 90 | index += 1; 91 | } 92 | } 93 | 94 | trace!("Result: {split_sizes:?}"); 95 | self.outputs.insert(location, split_sizes); 96 | } 97 | 98 | /// Returns the non-overlapping chunks for the specified location `loc`. 99 | /// You must have called `split(loc)` at least once. 100 | /// If you have not, this function may panick. 101 | /// 102 | /// Returns only the chunks that `loc` contains. 103 | pub fn get(&self, loc: Dest) -> impl Iterator> + '_ { 104 | let loc_size = loc.size(); 105 | self.outputs 106 | .get(&Location::from(loc)) 107 | .iter() 108 | .flat_map(|v| v.iter()) 109 | .flat_map(move |size| { 110 | if loc_size.contains(size) { 111 | Some(loc.with_size(*size)) 112 | } else { 113 | // No partial overlaps! 114 | assert!(!loc_size.overlaps(size)); 115 | None 116 | } 117 | }) 118 | .collect::>() 119 | .into_iter() 120 | } 121 | 122 | /// Returns all non-overlapping chunks for all locations. 123 | pub fn iter(&self) -> impl Iterator> + '_ { 124 | self.outputs 125 | .iter() 126 | .flat_map(|(loc, sizes)| sizes.iter().map(|&size| loc.into_dest_with_size(size))) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /liblisa/src/utils/cmov.rs: -------------------------------------------------------------------------------- 1 | pub trait CmovAnd { 2 | /// Moves `other` into `self` if `value & TEST == 0`. 3 | fn cmov_if_and_imm_zero(&mut self, other: &Self, value_to_test: T); 4 | 5 | /// Moves `other` into `self` if `value & TEST != 0`. 6 | fn cmov_if_and_imm_nonzero(&mut self, other: &Self, value_to_test: T); 7 | } 8 | 9 | #[cfg(not(miri))] 10 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 11 | mod impls_x86 { 12 | use std::arch::asm; 13 | 14 | use super::CmovAnd; 15 | 16 | macro_rules! x86_impl { 17 | ($ty:ty) => { 18 | impl CmovAnd for $ty { 19 | fn cmov_if_and_imm_zero(&mut self, other: &Self, value_to_test: u8) { 20 | unsafe { 21 | asm! { 22 | "test {value}, {test}", 23 | "cmovz {dest}, {src}", 24 | 25 | value = in(reg_byte) value_to_test, 26 | test = const TEST, 27 | dest = inlateout(reg) *self, 28 | src = in(reg) *other, 29 | options(pure, nomem, nostack), 30 | } 31 | } 32 | } 33 | 34 | fn cmov_if_and_imm_nonzero(&mut self, other: &Self, value_to_test: u8) { 35 | unsafe { 36 | asm! { 37 | "test {value}, {test}", 38 | "cmovnz {dest}, {src}", 39 | 40 | value = in(reg_byte) value_to_test, 41 | test = const TEST, 42 | dest = inlateout(reg) *self, 43 | src = in(reg) *other, 44 | options(pure, nomem, nostack), 45 | } 46 | } 47 | } 48 | } 49 | }; 50 | } 51 | 52 | x86_impl!(u64); 53 | x86_impl!(i64); 54 | } 55 | 56 | #[cfg(any(not(any(target_arch = "x86", target_arch = "x86_64")), miri))] 57 | mod impls_generic { 58 | use super::CmovAnd; 59 | 60 | impl CmovAnd for u64 { 61 | fn cmov_if_and_imm_zero(&mut self, other: &Self, value_to_test: u8) { 62 | if value_to_test & TEST == 0 { 63 | *self = *other; 64 | } 65 | } 66 | 67 | fn cmov_if_and_imm_nonzero(&mut self, other: &Self, value_to_test: u8) { 68 | if value_to_test & TEST != 0 { 69 | *self = *other; 70 | } 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /liblisa/src/utils/iter.rs: -------------------------------------------------------------------------------- 1 | /// Convenience trait that rotates elements in a slice, and maps them to a new value. 2 | pub trait MapRotated { 3 | /// The items in the slice. 4 | type Item; 5 | 6 | /// The output type of [`MapRotated::Output`]. 7 | type Output; 8 | 9 | /// Returns an iterator that maps every value to another value using `f`, but rotates the results by `start` positions. 10 | /// This means that the iterator returns `f(self[start])`, `f(self[start + 1])`, .., `f(self[0])`, `f(self[1])`, .., `f(self[start - 1])`. 11 | fn map_rotated(self, start: usize, f: impl FnMut(Self::Item) -> I) -> Self::Output; 12 | } 13 | 14 | impl MapRotated for [T; N] 15 | where 16 | T: Copy, 17 | { 18 | type Item = T; 19 | type Output = [I; N]; 20 | 21 | fn map_rotated(self, start: usize, mut f: impl FnMut(Self::Item) -> I) -> Self::Output { 22 | let mut index = 0; 23 | [(); N].map(|_| { 24 | let item = self[(index + start) % self.len()]; 25 | index += 1; 26 | f(item) 27 | }) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /liblisa/src/utils/matrix.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Write}; 2 | 3 | use super::bitmap::GrowingBitmap; 4 | 5 | /// A matrix of boolean values, where matrix[x, y] == matrix[y, x] 6 | #[derive(Default, Clone)] 7 | pub struct Symmetric2DMatrix { 8 | data: GrowingBitmap, 9 | size: usize, 10 | } 11 | 12 | impl Symmetric2DMatrix { 13 | /// Creates an empty matrix. 14 | pub const fn new() -> Self { 15 | Symmetric2DMatrix { 16 | data: GrowingBitmap::new(), 17 | size: 0, 18 | } 19 | } 20 | 21 | /// Translates the 2d coordinates to a 1d index. 22 | #[inline] 23 | fn index(x: usize, y: usize) -> usize { 24 | let (x, y) = (x.min(y), x.max(y)); 25 | 26 | // (0, 0) -> 0, 27 | // (0, 1) -> 1, (1, 1) -> 2, 28 | // (0, 2) -> 3, (1, 2) -> 4, (2, 2) -> 5, 29 | // (0, 3) -> 6, .., 30 | // (0, 4) -> 10, .., 31 | // Note how the base index is 1 + 2 + .. + y. 32 | // That computation can be simplified to (y + 1) * y / 2. 33 | 34 | let base_index = ((y + 1) * y) / 2; 35 | base_index + x 36 | } 37 | 38 | /// Returns the value of the matrix at position `(x, y)`. 39 | #[inline] 40 | pub fn get(&self, x: usize, y: usize) -> bool { 41 | self.data[Self::index(x, y)] 42 | } 43 | 44 | /// Sets the value of the matrix at position `(x, y)` to true. 45 | /// Returns true if the value at (x, y) was changed, otherwise false. 46 | pub fn set(&mut self, x: usize, y: usize) -> bool { 47 | self.size = self.size.max(x + 1).max(y + 1); 48 | 49 | self.data.set(Self::index(x, y)) 50 | } 51 | 52 | /// Yields all indices `x` where `self.get(x, y)` is true. 53 | pub fn iter_row_indices(&self, y: usize) -> impl Iterator + '_ { 54 | (0..self.size).filter(move |&x| self.get(x, y)) 55 | } 56 | 57 | /// Returns the backing bitmap. 58 | pub fn raw_data(&self) -> &GrowingBitmap { 59 | &self.data 60 | } 61 | } 62 | 63 | impl Debug for Symmetric2DMatrix { 64 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 65 | for y in 0..self.size { 66 | for x in 0..self.size { 67 | f.write_char(if x >= self.size - y { 68 | ' ' 69 | } else if self.get(x, y) { 70 | '1' 71 | } else { 72 | '0' 73 | })?; 74 | } 75 | 76 | f.write_char('\n')?; 77 | } 78 | 79 | Ok(()) 80 | } 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use log::debug; 86 | use rand::Rng; 87 | 88 | use super::Symmetric2DMatrix; 89 | 90 | #[test] 91 | pub fn is_symmetric() { 92 | let mut m = Symmetric2DMatrix::new(); 93 | 94 | m.set(5, 1); 95 | assert!(m.get(1, 5)); 96 | assert!(m.get(5, 1)); 97 | 98 | m.set(2, 7); 99 | assert!(m.get(2, 7)); 100 | assert!(m.get(7, 2)); 101 | 102 | assert_eq!(m.iter_row_indices(7).collect::>(), vec![2]); 103 | assert_eq!(m.iter_row_indices(2).collect::>(), vec![7]); 104 | } 105 | 106 | #[test] 107 | pub fn middle_entries() { 108 | let mut m = Symmetric2DMatrix::new(); 109 | for n in 0..256 { 110 | m.set(n, n); 111 | } 112 | 113 | for n in 0..256 { 114 | assert_eq!(m.iter_row_indices(n).collect::>(), vec![n]); 115 | } 116 | } 117 | 118 | #[test] 119 | pub fn distinct_indices() { 120 | let mut seen = vec![false; 1000 * 1000]; 121 | for x in 0..1000 { 122 | for y in x..1000 { 123 | let index = Symmetric2DMatrix::index(x, y); 124 | assert!(!seen[index]); 125 | 126 | seen[index] = true; 127 | } 128 | } 129 | } 130 | 131 | #[test] 132 | pub fn fuzz() { 133 | let mut rng = rand::thread_rng(); 134 | for size in 1..100 { 135 | println!("Size {size}"); 136 | 137 | let mut m = Symmetric2DMatrix::new(); 138 | let mut arr = vec![false; size * size]; 139 | 140 | debug!("Size {size}"); 141 | 142 | for _ in 0..20_000 { 143 | let x = rng.gen_range(0..size); 144 | let y = rng.gen_range(0..size); 145 | 146 | debug!(" - setting ({x}, {y}) = true"); 147 | 148 | m.set(x, y); 149 | arr[x * size + y] = true; 150 | arr[y * size + x] = true; 151 | 152 | for x in 0..size { 153 | for y in 0..size { 154 | assert_eq!(m.get(x, y), arr[x * size + y], "({x}, {y}) not equal; matrix: \n{m:?}"); 155 | } 156 | } 157 | } 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | style_edition = "2021" 2 | edition = "2021" 3 | max_width = 130 4 | newline_style = "Unix" 5 | group_imports = "StdExternalCrate" 6 | imports_granularity = "Module" 7 | struct_lit_single_line = false 8 | trailing_semicolon = false 9 | use_field_init_shorthand = true 10 | condense_wildcard_suffixes = true 11 | fn_params_layout = "Compressed" 12 | format_code_in_doc_comments = true 13 | match_block_trailing_comma = true --------------------------------------------------------------------------------