├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── dependency-review.yml │ └── test.yml ├── .gitignore ├── .gitmodules ├── README.md ├── clean.sh ├── images └── logo.png ├── launcher.py ├── output ├── log │ └── .placeholder └── result │ └── .placeholder ├── requirements.txt ├── seewasm ├── __init__.py ├── __init__.pyc ├── analysis │ ├── __init__.py │ ├── __init__.pyc │ └── cfg.py ├── arch │ ├── __init__.py │ ├── __init__.pyc │ └── wasm │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── analyzer.py │ │ ├── cfg.py │ │ ├── configuration.py │ │ ├── constant.py │ │ ├── decode.py │ │ ├── disassembler.py │ │ ├── dwarfParser.py │ │ ├── emulator.py │ │ ├── exceptions.py │ │ ├── format.py │ │ ├── graph.py │ │ ├── instruction.py │ │ ├── instructions │ │ ├── ArithmeticInstructions.py │ │ ├── BitwiseInstructions.py │ │ ├── ConstantInstructions.py │ │ ├── ControlInstructions.py │ │ ├── ConversionInstructions.py │ │ ├── LogicalInstructions.py │ │ ├── MemoryInstructions.py │ │ ├── ParametricInstructions.py │ │ ├── VariableInstructions.py │ │ └── __init__.py │ │ ├── lib │ │ ├── c_lib.py │ │ ├── go_lib.py │ │ ├── utils.py │ │ └── wasi.py │ │ ├── memory.py │ │ ├── solver.py │ │ ├── utils.py │ │ ├── visualizator.py │ │ ├── vmstate.py │ │ └── wasm.py ├── core │ ├── __init__.py │ ├── __init__.pyc │ ├── basicblock.py │ ├── edge.py │ ├── function.py │ ├── instruction.py │ └── utils.py └── engine │ ├── __init__.py │ ├── __init__.pyc │ ├── disassembler.py │ ├── emulator.py │ └── engine.py ├── test.py ├── test ├── c │ └── src │ │ ├── hello.c │ │ └── sym.c ├── go │ └── src │ │ └── hello.go ├── hello_world.wasm ├── hello_world_go.wasm ├── hello_world_rust.wasm ├── password.wasm ├── rust │ └── hello │ │ ├── Cargo.toml │ │ └── src │ │ └── main.rs ├── sym_c.wasm ├── test.wasm ├── test_linux.py ├── test_return.wasm └── test_unreachable.wasm └── wasm ├── __init__.py ├── __main__.py ├── compat.py ├── decode.py ├── formatter.py ├── immtypes.py ├── modtypes.py ├── opcodes.py ├── types.py └── wasmtypes.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Additional context** 24 | Add any other context about the problem here. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Request, 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR. 5 | # Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable 6 | # packages will be blocked from merging. 7 | # 8 | # Source repository: https://github.com/actions/dependency-review-action 9 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement 10 | name: 'Dependency review' 11 | on: 12 | pull_request: 13 | branches: [ "main" ] 14 | 15 | # If using a dependency submission action in this workflow this permission will need to be set to: 16 | # 17 | # permissions: 18 | # contents: write 19 | # 20 | # https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api 21 | permissions: 22 | contents: read 23 | # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option 24 | pull-requests: write 25 | 26 | jobs: 27 | dependency-review: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: 'Checkout repository' 31 | uses: actions/checkout@v4 32 | - name: 'Dependency Review' 33 | uses: actions/dependency-review-action@v4 34 | # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options. 35 | with: 36 | comment-summary-in-pr: always 37 | # fail-on-severity: moderate 38 | # deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later 39 | # retry-on-snapshot-warnings: true 40 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | paths: 6 | - '**.py' 7 | - ".github/workflows/*.yml" 8 | pull_request: 9 | types: [opened, synchronize, reopened] 10 | paths: 11 | - '**.py' 12 | - ".github/workflows/*.yml" 13 | 14 | jobs: 15 | test: 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python: 20 | - "3.7" 21 | - "3.8" 22 | - "3.9" 23 | - "3.10" 24 | - "3.11" 25 | - "3.12" 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/checkout@v4 29 | - name: Setup Python 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: ${{ matrix.python }} 33 | cache: pip 34 | # Set this option if you want the action to check for the latest available version that satisfies the version spec. 35 | # check-latest: # optional 36 | - name: Install requirements 37 | run: | 38 | pip install -r requirements.txt 39 | sudo apt update && sudo apt install graphviz 40 | - name: Cache wabt 41 | id: cache-wabt 42 | uses: actions/cache@v4 43 | with: 44 | path: wabt-1.0.32 45 | key: wabt 46 | - name: Install wabt 47 | if: steps.cache-wabt.outputs.cache-hit != 'true' 48 | run: | 49 | curl -JLO "https://github.com/WebAssembly/wabt/releases/download/1.0.32/wabt-1.0.32-ubuntu.tar.gz" 50 | tar xzf wabt-1.0.32-ubuntu.tar.gz 51 | - name: Cache wasi-sdk 52 | id: cache-wasi-sdk 53 | uses: actions/cache@v4 54 | with: 55 | path: wasi-sdk-22.0 56 | key: wasi-sdk 57 | - name: Install wasi-sdk 58 | if: steps.cache-wasi-sdk.outputs.cache-hit != 'true' 59 | run: | 60 | curl -JLO "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-22/wasi-sdk-22.0-linux.tar.gz" 61 | tar xzf wasi-sdk-22.0-linux.tar.gz 62 | - name: Cache wasmtime 63 | id: cache-wasmtime 64 | uses: actions/cache@v4 65 | with: 66 | path: ~/.wasmtime 67 | key: wasmtime 68 | - name: Install wasmtime 69 | if: steps.cache-wasmtime.outputs.cache-hit != 'true' 70 | run: | 71 | curl https://wasmtime.dev/install.sh -sSf | bash 72 | - name: Install Rust 73 | run: | 74 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 75 | rustup target add wasm32-wasi 76 | - uses: actions/setup-go@v5 77 | with: 78 | go-version: 1.22 79 | check-latest: true 80 | cache: true 81 | - name: Install tinygo 82 | run: | 83 | wget https://github.com/tinygo-org/tinygo/releases/download/v0.32.0/tinygo_0.32.0_amd64.deb 84 | sudo dpkg -i tinygo_0.32.0_amd64.deb 85 | - name: Run pytest 86 | run: | 87 | export PATH=$(pwd)/wabt-1.0.32/bin:$PATH 88 | export PATH=$(pwd)/wasi-sdk-22.0/bin:$PATH 89 | export PATH=$(pwd)/.cargo/bin:$PATH 90 | export PATH=$(pwd)/.wasmtime/bin:$PATH 91 | pytest test.py --tb=short --durations=0 92 | pytest test/test_linux.py --tb=short --durations=0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | .devcontainer/ 3 | .pytest_cache/ 4 | .DS_Store 5 | __pycache__ 6 | .idea 7 | .vscode 8 | ready_to_production.py 9 | *.wat 10 | output/log/* 11 | output/result/* 12 | bfs_test/* 13 | **/debug 14 | **/rust/**/target -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Wasm-samples"] 2 | path = Wasm-samples 3 | url = https://github.com/HNYuuu/Wasm-samples.git 4 | [submodule "DSL"] 5 | path = DSL 6 | url = https://github.com/HNYuuu/DSL.git 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SeeWasm [![Test](https://github.com/HNYuuu/SeeWasm/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/HNYuuu/SeeWasm) 2 | ![SeeWasm-logo](./images/logo.png) 3 | 4 | 5 | WebAssembly (Wasm), a low-level language, offers several advantages and can be translated from high-level mainstream programming languages such as C, C++, Go, and Rust. 6 | 7 | In this project, we have implemented a **symbolic execution engine** for Wasm binaries, SeeWasm. Our goal is to build a toolchain that takes source code files (written in other programming languages) as input, performs symbolic execution, and outputs feasible paths with their solutions for further analysis (e.g., vulnerability detection). 8 | 9 | ## Prerequisites  10 | To run SeeWasm, ensure you have Python 3.7 or a later version installed. Then, install the required Python libraries by executing the following command: 11 | 12 | ```shell 13 | python3 -m pip install -r requirements.txt 14 | ``` 15 | 16 | If you encounter issues building the wheel for leb128, update pip and wheel, then reinstall leb128: 17 | 18 | ```shell 19 | pip install --upgrade pip wheel 20 | pip install --force-reinstall leb128==1.0.4 21 | ``` 22 | 23 | To verify everything is set up correctly, run the following command: 24 | 25 | ```shell 26 | python3 -m pytest test.py -vv 27 | ``` 28 | 29 | This command traverses the `./test` folder and performs symbolic execution on all Wasm binaries. 30 | If successful, a success message will be displayed, typically **after several seconds**. 31 | 32 | Sample Wasm binaries, including "Hello World" in C, Go, and Rust, are provided in the folder. 33 | These can be compiled from their respective source languages; the compilation processes are detailed in [WASI tutorial](https://github.com/bytecodealliance/wasmtime/blob/main/docs/WASI-tutorial.md#compiling-to-wasi) (C and Rust), and [WASI "Hello World" example](https://wasmbyexample.dev/examples/wasi-hello-world/wasi-hello-world.go.en-us.html) (Go). 34 | 35 | For Rust and C++ project, you can use `wasm-tools` to demangle symbol names in the `name` section. Install with `cargo install wasm-tools`. Confirm by `wasm-tools --version`. Details can be found at [Wasm Tools](https://github.com/bytecodealliance/wasm-tools). 36 | 37 | ## Analyze 38 | This section demonstrates how to use SeeWasm to analyze a generated WebAssembly file. 39 | 40 | ### Options 41 | All valid options are shown in below: 42 | 43 | ```shell 44 | SeeWasm, a symbolic execution engine for Wasm binaries 45 | 46 | Optional arguments: 47 | -h, --help show this help message and exit 48 | 49 | Input arguments: 50 | -f WASMMODULE, --file WASMMODULE 51 | binary file (.wasm) 52 | --stdin STDIN stream of stdin 53 | --sym_stdin SYM_STDIN 54 | stream of stdin in N bytes symbols 55 | --args ARGS command line 56 | --sym_args SYM_ARGS [SYM_ARGS ...] 57 | command line in symbols, each of them is N bytes at most 58 | --sym_files SYM_FILES SYM_FILES 59 | Create N symbolic files, each of them has M symbolic bytes 60 | --source_type [{c,go,rust}] 61 | type of source file 62 | 63 | Features: 64 | --entry ENTRY set entry point as the specilized function 65 | --visualize visualize the ICFG on basic blocks level 66 | --incremental enable incremental solving 67 | -v [{warning,info,debug}], --verbose [{warning,info,debug}] 68 | set the logging level 69 | 70 | Analyze: 71 | -s, --symbolic perform the symbolic execution 72 | --search [{dfs,bfs,random,interval}] 73 | set the search algorithm (default: dfs) 74 | ``` 75 | 76 | We will detail these options according to their functionalities. 77 | 78 | ### Input Arguments 79 | SeeWasm can deassemble the target binary and construct valid inputs based on the values of the input arguments. 80 | 81 | Specifically, `-f` option is mandatory, and it must be followed by the path of the Wasm binary to be analyzed. The `--stdin STRING` and `--sym_stdin N` options allow users to pass concrete and symbolic bytes through the stdin stream, respectively. A concrete string must be passed using `--stdin`, while a string consisting of `N` symbolic characters must be passed using `--sym_stdin`. For example, `--sym_stdin 5` inputs 5 symbolic bytes for functions that read from stdin. 82 | 83 | Similarly, `--args STRING1, STRING2, ...` and `--sym_args N1, N2, ...` options pass concrete and symbolic arguments to the Wasm binary. For instance, if `main` requires three arguments, each two bytes long, `--sym_args 2 2 2` is enough. 84 | 85 | Some programs interact with files. SeeWasm simulates this using a *symbolic file system*. Users can create `N` symbolic files, each with up to `M` bytes, using the `--sym_files N M` option. 86 | 87 | As multiple high-level programming languages can be compiled to Wasm binaries, we have implemented specific optimizations. To take advantage of these optimizations, users must indicate the source language using the `--source_type` option. 88 | 89 | ### Features 90 | `--entry` specifies the entry function from which symbolic execution begins. By default, the entry function is `__original_main`. Users must specify a proper entry function to ensure the symbolic execution is performed correctly. 91 | 92 | The input Wasm is parsed into an Interprocedural Control Flow Graph (ICFG), which can be visualized for debugging purposes using the `--visualize` option (requires `graphviz`, installable via `sudo apt install graphviz` on Ubuntu). 93 | 94 | The constraint solving process is a bottleneck for symbolic execution performance; however, we have implemented some optimizations to mitigate this issue. The `--incremental` flag enables *incremental solving*. Note that it may not always yield positive results during analysis, and is therefore optional. 95 | 96 | The `-v` option controls the logging level, allowing users to adjust the verbosity of logging output to aid in debugging. 97 | 98 | ### Analyze 99 | The `-s` is a mandatory option. It enables symbolic execution analysis on the given Wasm binary. 100 | 101 | The `--search` option specifies the search algorithm used during symbolic execution. The default algorithm is Depth-First Search (DFS), but users can choose from the following options: `bfs`, `random`, and `interval`. 102 | 103 | ## Output 104 | The output of SeeWasm, including logs and results, is stored in the `output` folder, with each file named according to the pattern `NAME_TIMESTAMP`. 105 | 106 | The log file follows a specific format, which illustrates the call trace of the anaylzed program: 107 | 108 | ```log 109 | 2024-07-01 07:50:36,191 | WARNING | Totally remove 27 unrelated functions, around 50.000% of all functions 110 | 2024-07-01 07:50:36,205 | INFO | Call: __original_main -> __main_void 111 | 2024-07-01 07:50:36,218 | INFO | Call: __main_void -> __wasi_args_sizes_get 112 | 2024-07-01 07:50:36,219 | INFO | Call: args_sizes_get (import) 113 | 2024-07-01 07:50:36,219 | INFO | args_sizes_get, argc_addr: 70792, arg_buf_size_addr: 70796 114 | 2024-07-01 07:50:36,219 | INFO | Return: args_sizes_get (import) 115 | 2024-07-01 07:50:36,219 | INFO | Return: __wasi_args_sizes_get 116 | ... 117 | ``` 118 | 119 | The result is a JSON file containing feasible paths with their solutions, formatted as follows: 120 | 121 | ```json 122 | { 123 | "Status": "xxx", 124 | "Solution": {"xxx"}, 125 | "Output": [ 126 | { 127 | "name": "stdout", 128 | "output": "xxx" 129 | }, 130 | { 131 | "name": "stderr", 132 | "output": "xxx" 133 | } 134 | ] 135 | } 136 | ``` 137 | 138 | You can use `./clean.sh -f` to remove all files in the `output` folder. 139 | 140 | ## Example 141 | To execute a program that takes no extra arguments or input, use the following command: 142 | 143 | ```shell 144 | python3 launcher.py -f PATH_TO_WASM_BINARY -s 145 | ``` 146 | 147 | If compilicated arguments are required, for example, a `base64` program with a `main` function like: 148 | 149 | ```c 150 | // main of base64 151 | int main(int argc, char **argv) 152 | { 153 | // environment setting 154 | ... 155 | 156 | while ((opt = getopt_long(argc, argv, "diw:", long_options, NULL)) != -1) 157 | switch (opt) { 158 | // call functions according to passed arguments 159 | ... 160 | } 161 | 162 | // encode or decode 163 | } 164 | ``` 165 | 166 | The `base64` program expects two-byte arguments and a string input to encode or decode, producing output that is written to a file. 167 | Thus, the command to analyze `base64` is like: 168 | 169 | ```shell 170 | python3 launcher.py -f PATH_TO_BASE64 -s --sym_args 2 --sym_stdin 5 --sym_files 1 10 171 | ``` 172 | 173 | ## Feedback 174 | 175 | If you have any questions or need further clarification, please post on the [Issues](https://github.com/HNYuuu/SeeWasm/issues) page. 176 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | OUTPUT_DIR=output 5 | 6 | error() { 7 | command printf '\033[1;31mError: %s\033[0m\n\n' "$1" 1>&2 8 | } 9 | 10 | usage() { 11 | cat >&2 < 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # modified code from https://github.com/athre0z/wasm/blob/master/wasm/modtypes.py 24 | # no need of that if PyPI wasm version 1.2 release 25 | 26 | 27 | """Provides functions for decoding WASM modules and bytecode.""" 28 | from __future__ import (absolute_import, division, print_function, 29 | unicode_literals) 30 | 31 | from collections import namedtuple 32 | 33 | from wasm.compat import byte2int 34 | from wasm.modtypes import (SEC_NAME, SEC_UNK, ModuleHeader, NameSubSection, 35 | Section) 36 | from wasm.opcodes import OPCODE_MAP 37 | 38 | Instruction = namedtuple('Instruction', 'op imm len') 39 | ModuleFragment = namedtuple('ModuleFragment', 'type data') 40 | 41 | 42 | def decode_bytecode(bytecode): 43 | """Decodes raw bytecode, yielding `Instruction`s.""" 44 | bytecode_wnd = memoryview(bytecode) 45 | while bytecode_wnd: 46 | opcode_id = byte2int(bytecode_wnd[0]) 47 | opcode = OPCODE_MAP[opcode_id] 48 | 49 | if opcode.imm_struct is not None: 50 | offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:]) 51 | else: 52 | imm = None 53 | offs = 0 54 | 55 | insn_len = 1 + offs 56 | yield Instruction(opcode, imm, insn_len) 57 | bytecode_wnd = bytecode_wnd[insn_len:] 58 | 59 | 60 | def decode_module(module, decode_name_subsections=False): 61 | """Decodes raw WASM modules, yielding `ModuleFragment`s.""" 62 | module_wnd = memoryview(module) 63 | 64 | # Read & yield module header. 65 | hdr = ModuleHeader() 66 | hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd) 67 | yield ModuleFragment(hdr, hdr_data) 68 | module_wnd = module_wnd[hdr_len:] 69 | 70 | # Read & yield sections. 71 | while module_wnd: 72 | sec = Section() 73 | # bypass the error caused by -g1 to -g3 compiled C code 74 | try: 75 | sec_len, sec_data, _ = sec.from_raw(None, module_wnd) 76 | except Exception: 77 | break 78 | 79 | # If requested, decode name subsections when encountered. 80 | if (decode_name_subsections and sec_data.id == SEC_UNK and sec_data.name == SEC_NAME): 81 | sec_wnd = sec_data.payload 82 | while sec_wnd: 83 | subsec = NameSubSection() 84 | subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd) 85 | yield ModuleFragment(subsec, subsec_data) 86 | sec_wnd = sec_wnd[subsec_len:] 87 | else: 88 | yield ModuleFragment(sec, sec_data) 89 | 90 | # fix bug KeyError 91 | # if sec_data.id == SEC_UNK and sec_data.name: 92 | # sec_len -= sec_data.name_len + 1 93 | module_wnd = module_wnd[sec_len:] 94 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/disassembler.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from seewasm.arch.wasm.decode import decode_module 4 | from seewasm.arch.wasm.instruction import WasmInstruction 5 | from seewasm.arch.wasm.wasm import Wasm 6 | from seewasm.core.function import Function 7 | from seewasm.core.utils import bytecode_to_bytes 8 | from seewasm.engine.disassembler import Disassembler 9 | 10 | from wasm.compat import byte2int 11 | from wasm.formatter import format_instruction 12 | from wasm.modtypes import CodeSection 13 | from wasm.opcodes import OPCODE_MAP 14 | 15 | inst_namedtuple = namedtuple('Instruction', 'op imm len') 16 | 17 | 18 | class WasmDisassembler(Disassembler): 19 | 20 | def __init__(self, bytecode=None): 21 | Disassembler.__init__(self, asm=Wasm(), bytecode=bytecode) 22 | 23 | def disassemble_opcode(self, bytecode=None, offset=0, nature_offset=0): 24 | ''' 25 | based on decode_bytecode() 26 | https://github.com/athre0z/wasm/blob/master/wasm/decode.py 27 | 28 | ''' 29 | 30 | bytecode_wnd = memoryview(bytecode) 31 | bytecode_idx = 0 32 | opcode_id = byte2int(bytecode_wnd[bytecode_idx]) 33 | opcode_size = 1 34 | 35 | bytecode_idx += 1 36 | if opcode_id == 0xfc: 37 | opcode_id = (opcode_id << 8) | byte2int(bytecode_wnd[bytecode_idx]) 38 | if opcode_id == 0xfc0a: # memory.copy 39 | opcode_size = 4 40 | elif opcode_id == 0xfc0b: # memory.fill 41 | opcode_size = 3 42 | # default value 43 | # opcode:(mnemonic/name, imm_struct, pops, pushes, description) 44 | invalid = ('INVALID', 0, 0, 0, 'Unknown opcode') 45 | name, imm_struct, pops, pushes, description = \ 46 | self.asm.table.get(opcode_id, invalid) 47 | 48 | operand_size = 0 49 | operand = None 50 | operand_interpretation = None 51 | 52 | if imm_struct is not None: 53 | assert not isinstance(imm_struct, int), f"imm_struct is int, most likely encountered unsupported inst.\nname: {name}\nimm_struct: {imm_struct}\npops: {pops} pushes: {pushes}\ndesc: {description}\nopcode_id: {hex(opcode_id)}" 54 | operand_size, operand, _ = imm_struct.from_raw( 55 | None, bytecode_wnd[bytecode_idx:]) 56 | insn = inst_namedtuple( 57 | OPCODE_MAP[opcode_id], operand, bytecode_idx + operand_size) 58 | operand_interpretation = format_instruction(insn) 59 | insn_byte = bytecode_wnd[:bytecode_idx + operand_size].tobytes() 60 | instruction = WasmInstruction( 61 | opcode_id, opcode_size, name, imm_struct, operand_size, insn_byte, pops, pushes, 62 | description, operand_interpretation=operand_interpretation, 63 | offset=offset, nature_offset=nature_offset) 64 | # print('%d %s' % (offset, str(instruction))) 65 | return instruction 66 | 67 | def disassemble(self, bytecode=None, offset=0, nature_offset=0, 68 | r_format='list'): 69 | """Disassemble WASM bytecode 70 | 71 | :param bytecode: bytecode sequence 72 | :param offset: start offset 73 | :param r_format: output format ('list'/'text'/'reverse') 74 | :type bytecode: bytes, str 75 | :type offset: int 76 | :type r_format: list, str, dict 77 | :return: dissassembly result depending of r_format 78 | :rtype: list, str, dict 79 | """ 80 | 81 | return super().disassemble(bytecode, offset, nature_offset, r_format) 82 | 83 | def extract_functions_code(self, module_bytecode): 84 | functions = list() 85 | mod_iter = iter(decode_module(module_bytecode)) 86 | _, _ = next(mod_iter) 87 | sections = list(mod_iter) 88 | 89 | # iterate over all section 90 | # code_data = [cur_sec_data for cur_sec, cur_sec_data in sections if isinstance(cur_sec_data.get_decoder_meta()['types']['payload'], CodeSection)][0] 91 | for cur_sec, cur_sec_data in sections: 92 | sec = cur_sec_data.get_decoder_meta()['types']['payload'] 93 | if isinstance(sec, CodeSection): 94 | code_data = cur_sec_data 95 | break 96 | if not code_data: 97 | raise ValueError('No functions/codes in the module') 98 | for idx, func in enumerate(code_data.payload.bodies): 99 | instructions = self.disassemble(func.code.tobytes()) 100 | cur_function = Function(0, instructions[0]) 101 | cur_function.instructions = instructions 102 | 103 | functions.append(cur_function) 104 | return functions 105 | 106 | def disassemble_module( 107 | self, module_bytecode=None, offset=0, r_format='list'): 108 | 109 | bytecode = bytecode_to_bytes(module_bytecode) 110 | 111 | functions = self.extract_functions_code(bytecode[offset:]) 112 | self.instructions = [f.instructions for f in functions] 113 | 114 | # return instructions 115 | if r_format == 'list': 116 | return self.instructions 117 | elif r_format == 'text': 118 | text = '' 119 | for index, func in enumerate(functions): 120 | text += ('func %d\n' % index) 121 | text += ('\n'.join(map(str, func.instructions))) 122 | text += ('\n\n') 123 | return text 124 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/exceptions.py: -------------------------------------------------------------------------------- 1 | # This file defines our own exceptions 2 | NO_EXIT = -99 3 | INVALIDMEMORY = -2 4 | ASSERT_FAIL = -3 5 | 6 | 7 | class UnsupportZ3TypeError(Exception): 8 | """ 9 | used in `utils.py` 10 | indicating that the variable type is not in ['i32', 'i64', 'f32', 'f64'] 11 | """ 12 | pass 13 | 14 | 15 | class UninitializedLocalVariableError(Exception): 16 | """ 17 | used in `emulator.py` 18 | indicating the local variable is not initialized before retriving 19 | """ 20 | pass 21 | 22 | 23 | class UnsupportGlobalTypeError(Exception): 24 | """ 25 | used in `emulator.py` 26 | indicating the unsupport global type encoutering global.get 27 | """ 28 | pass 29 | 30 | 31 | class UnsupportInstructionError(Exception): 32 | """ 33 | used in `emulator.py` 34 | indicating the unsupport instructions 35 | """ 36 | pass 37 | 38 | 39 | class NotDeterminedRetValError(Exception): 40 | """ 41 | indicateing the return value is bool but cannot be determined as True or False 42 | """ 43 | pass 44 | 45 | 46 | class UninitializedStateError(Exception): 47 | """ 48 | indicateing the state is not initialized before emulate_one_function 49 | """ 50 | pass 51 | 52 | 53 | class MemoryLoadError(Exception): 54 | """ 55 | indicating the memory load error 56 | """ 57 | pass 58 | 59 | 60 | class UnsupportExternalFuncError(Exception): 61 | """ 62 | indicating the library function is not emulated by us 63 | """ 64 | pass 65 | 66 | 67 | class UnexpectedDataType(Exception): 68 | """ 69 | Typically raised if there is a `if-elif-else` statement 70 | depending on the data type 71 | """ 72 | pass 73 | 74 | 75 | class ProcSuccessTermination(Exception): 76 | """ 77 | Indicate the process is successfully terminated 78 | """ 79 | 80 | def __init__(self, value): 81 | self.value = value 82 | 83 | # __str__ is to print() the value 84 | def __str__(self): 85 | return (repr(self.value)) 86 | 87 | 88 | class ProcFailTermination(Exception): 89 | """ 90 | Indicate the process is failedly terminated 91 | """ 92 | 93 | def __init__(self, value): 94 | self.value = value 95 | 96 | # __str__ is to print() the value 97 | def __str__(self): 98 | return (repr(self.value)) 99 | 100 | class HaltTermination(Exception): 101 | def __init__(self): 102 | pass 103 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/format.py: -------------------------------------------------------------------------------- 1 | # It is for formatting 2 | 3 | import re 4 | 5 | from seewasm.arch.wasm.constant import LANG_TYPE 6 | 7 | 8 | def format_func_name(name, param_str, return_str): 9 | result = '{} '.format(return_str) if return_str else '' 10 | return ('{}{}({})'.format(result, name, param_str)) 11 | 12 | 13 | def format_bb_name(function_id, offset): 14 | return ('block_%x_%x' % (function_id, offset)) 15 | 16 | 17 | def format_kind_function(f_type): 18 | return f_type 19 | 20 | 21 | def format_kind_table(element_type, flags, initial, maximum): 22 | return {'element_type': LANG_TYPE.get(element_type), 23 | 'limits_flags': flags, 24 | 'limits_initial': initial, 25 | 'limits_maximum': maximum} 26 | 27 | 28 | def format_kind_memory(flags, initial, maximum): 29 | return {'limits_flags': flags, 30 | 'limits_initial': initial, 31 | 'limits_maximum': maximum} 32 | 33 | 34 | def format_kind_global(mutability, content_type, current_instruction): 35 | # leave mutability temporarily 36 | return [content_type, current_instruction] 37 | 38 | 39 | def format_scan_result(result): 40 | def name_to_string(val=13949526960272233840): 41 | charmap = ".12345abcdefghijklmnopqrstuvwxyz" 42 | result = ['.'] * 13 43 | for i in range(12 + 1): 44 | c = charmap[val & (0x0f if i == 0 else 0x1f)] 45 | result[12 - i] = c 46 | val >>= (4 if i == 0 else 5) 47 | result = ''.join(result).rstrip('.') 48 | return result 49 | 50 | def decode(matchobj): 51 | original = int(matchobj.group(0)) 52 | result = name_to_string(original) 53 | return result 54 | 55 | new_result = list() 56 | 57 | for key_functions, constraints in result: 58 | new_key_functions = key_functions 59 | new_constraints = list() 60 | for constraint in constraints: 61 | constraint = str(constraint) 62 | if 'action ==' in constraint or 'code ==' in constraint: 63 | constraint = re.sub(r'[0-9]{10,}', decode, constraint) 64 | new_constraints.append(constraint) 65 | new_result.append([new_key_functions.copy(), new_constraints.copy()]) 66 | 67 | return new_result 68 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instruction.py: -------------------------------------------------------------------------------- 1 | # It will parse each instructions in Wasm 2 | 3 | from seewasm.arch.wasm.wasm import _groups 4 | from seewasm.core.instruction import Instruction 5 | 6 | 7 | class WasmInstruction(Instruction): 8 | """Wasm Instruction 9 | TODO 10 | 11 | """ 12 | 13 | def __init__( 14 | self, opcode, opcode_size, name, imm_struct, operand_size, insn_byte, pops, 15 | pushes, description, operand_interpretation=None, offset=0, 16 | nature_offset=0): 17 | """ TODO """ 18 | self.opcode = opcode 19 | self.opcode_size = opcode_size 20 | self.offset = offset 21 | self.nature_offset = nature_offset 22 | self.name = name 23 | self.description = description 24 | self.operand_size = operand_size 25 | if len(insn_byte) > 1: 26 | # Immediate operand if any 27 | self.operand = insn_byte[-operand_size:] 28 | else: 29 | self.operand = None 30 | # specific interpretation of operand value 31 | self.operand_interpretation = operand_interpretation 32 | self.insn_byte = insn_byte 33 | self.pops = pops 34 | self.pushes = pushes 35 | self.imm_struct = imm_struct 36 | self.xref = list() 37 | self.ssa = None 38 | # which basic block locates in 39 | self.cur_bb = '' 40 | 41 | def __eq__(self, other): 42 | """ Instructions are equal if all features match """ 43 | return self.opcode == other.opcode and \ 44 | self.name == other.name and \ 45 | self.offset == other.offset and \ 46 | self.insn_byte == other.insn_byte and \ 47 | self.operand_size == other.operand_size and \ 48 | self.pops == other.pops and \ 49 | self.pushes == other.pushes and \ 50 | self.operand_interpretation == other.operand_interpretation and \ 51 | self.description == other.description 52 | 53 | def __str__(self): 54 | """ String representation of the instruction """ 55 | if self.operand: 56 | return self.operand_interpretation 57 | # elif self.operand: 58 | # return self.name + str(self.operand) 59 | else: 60 | return self.name 61 | 62 | @property 63 | def group(self): 64 | """ Instruction classification per group """ 65 | last_class = _groups.get(0) 66 | for k, v in _groups.items(): 67 | if self.opcode >= k: 68 | last_class = v 69 | else: 70 | return last_class 71 | return last_class 72 | 73 | @property 74 | def is_control(self): 75 | return self.group == 'Control' 76 | 77 | @property 78 | def is_parametric(self): 79 | return self.group == 'Parametric' 80 | 81 | @property 82 | def is_variable(self): 83 | return self.group == 'Variable' 84 | 85 | @property 86 | def is_memory(self): 87 | return self.group == 'Memory' 88 | 89 | @property 90 | def is_constant(self): 91 | return self.group == 'Constant' 92 | 93 | @property 94 | def is_logical_i32(self): 95 | return self.group == 'Logical_i32' 96 | 97 | @property 98 | def is_logical_i64(self): 99 | return self.group == 'Logical_i64' 100 | 101 | @property 102 | def is_logical_f32(self): 103 | return self.group == 'Logical_f32' 104 | 105 | @property 106 | def is_logical_f64(self): 107 | return self.group == 'Logical_f64' 108 | 109 | @property 110 | def is_arithmetic_i32(self): 111 | return self.group == 'Arithmetic_i32' 112 | 113 | @property 114 | def is_bitwise_i32(self): 115 | return self.group == 'Bitwise_i32' 116 | 117 | @property 118 | def is_arithmetic_i64(self): 119 | return self.group == 'Arithmetic_i64' 120 | 121 | @property 122 | def is_bitwise_i64(self): 123 | return self.group == 'Bitwise_i64' 124 | 125 | @property 126 | def is_arithmetic_f32(self): 127 | return self.group == 'Arithmetic_f32' 128 | 129 | @property 130 | def is_arithmetic_f64(self): 131 | return self.group == 'Arithmetic_f64' 132 | 133 | @property 134 | def is_conversion(self): 135 | return self.group == 'Conversion' 136 | 137 | @property 138 | def is_branch_conditional(self): 139 | """ Return True if the instruction is a conditional jump """ 140 | return self.name in {'br_if', 'br_table', 'if'} 141 | 142 | @property 143 | def is_branch_unconditional(self): 144 | """ Return True if the instruction is a unconditional jump """ 145 | return self.name in {'br'} 146 | 147 | @property 148 | def is_call(self): 149 | """ True if the instruction is a call instruction """ 150 | return self.name in {'call', 'call_indirect'} 151 | 152 | @property 153 | def is_branch(self): 154 | return self.is_branch_conditional or self.is_branch_unconditional 155 | 156 | @property 157 | def is_halt(self): 158 | """ Return True if the instruction is a branch terminator """ 159 | return self.name in {'unreachable', 'return'} 160 | 161 | @property 162 | def is_terminator(self): 163 | """ True if the instruction is a basic block terminator """ 164 | return self.is_branch or self.is_halt 165 | 166 | @property 167 | def is_block_starter(self): 168 | """ Return True if the instruction is a basic block starter """ 169 | return self.name in {'block', 'loop', 'if', 'else'} 170 | 171 | @property 172 | def is_block_terminator(self): 173 | """ Return True if the instruction is a basic block terminator """ 174 | return self.name in {'else', 'end'} 175 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ArithmeticInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the arithmetic related instructions 2 | 3 | import logging 4 | 5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 6 | from z3 import (RNE, RTN, RTP, RTZ, BitVec, BitVecVal, Float32, Float64, SRem, 7 | UDiv, URem, fpAbs, fpAdd, fpDiv, fpMax, fpMin, fpMul, fpNeg, 8 | fpRoundToIntegral, fpSqrt, fpSub, is_bool, simplify) 9 | 10 | helper_map = { 11 | 'i32': 32, 12 | 'i64': 64, 13 | 'f32': [8, 24], 14 | 'f64': [11, 53] 15 | } 16 | 17 | float_helper_map = { 18 | 'f32': Float32, 19 | 'f64': Float64 20 | } 21 | 22 | 23 | class ArithmeticInstructions: 24 | def __init__(self, instr_name, instr_operand, _): 25 | self.instr_name = instr_name 26 | self.instr_operand = instr_operand 27 | 28 | def emulate(self, state): 29 | def do_emulate_arithmetic_int_instruction(state): 30 | instr_type = self.instr_name[:3] 31 | 32 | if '.clz' in self.instr_name or '.ctz' in self.instr_name: 33 | # wasm documentation says: 34 | # This instruction is fully defined when all bits are zero; 35 | # it returns the number of bits in the operand type. 36 | state.symbolic_stack.pop() 37 | state.symbolic_stack.append( 38 | BitVecVal(helper_map[instr_type], helper_map[instr_type])) 39 | elif '.popcnt' in self.instr_name: 40 | # wasm documentation says: 41 | # This instruction is fully defined when all bits are zero; 42 | # it returns 0. 43 | state.symbolic_stack.pop() 44 | state.symbolic_stack.append( 45 | BitVecVal(0, helper_map[instr_type])) 46 | else: 47 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 48 | 49 | # arg1 and arg2 could be BitVecRef, BitVecValRef and BoolRef 50 | if is_bool(arg1): 51 | arg1 = BitVec(str(arg1), helper_map[instr_type]) 52 | logging.warning( 53 | f"[!] In `ArithmeticInstructions.py`, arg1 is BoolRef, translated to BitVec which may lead to some information loss") 54 | if is_bool(arg2): 55 | arg2 = BitVec(str(arg2), helper_map[instr_type]) 56 | logging.warning( 57 | f"[!] In `ArithmeticInstructions.py`, arg2 is BoolRef, translated to BitVec which may lead to some information loss") 58 | 59 | assert arg1.size( 60 | ) == helper_map[instr_type], f"in arithmetic instruction, arg1 size is {arg1.size()} instead of {helper_map[instr_type]}" 61 | assert arg2.size( 62 | ) == helper_map[instr_type], f"in arithmetic instruction, arg2 size is {arg2.size()} instead of {helper_map[instr_type]}" 63 | 64 | if '.sub' in self.instr_name: 65 | result = arg2 - arg1 66 | elif '.add' in self.instr_name: 67 | result = arg2 + arg1 68 | elif '.mul' in self.instr_name: 69 | result = arg2 * arg1 70 | elif '.div_s' in self.instr_name: 71 | result = arg2 / arg1 72 | elif '.div_u' in self.instr_name: 73 | result = UDiv(arg2, arg1) 74 | elif '.rem_s' in self.instr_name: 75 | result = SRem(arg2, arg1) 76 | elif '.rem_u' in self.instr_name: 77 | result = URem(arg2, arg1) 78 | else: 79 | raise UnsupportInstructionError 80 | 81 | result = simplify(result) 82 | state.symbolic_stack.append(result) 83 | 84 | return [state] 85 | 86 | def do_emulate_arithmetic_float_instruction(state): 87 | # TODO need to be clarified 88 | # wasm default rounding rules 89 | rm = RNE() 90 | 91 | instr_type = self.instr_name[:3] 92 | 93 | two_arguments_instrs = ['add', 'sub', 94 | 'mul', 'div', 'min', 'max', 'copysign'] 95 | one_argument_instrs = ['sqrt', 'floor', 96 | 'ceil', 'trunc', 'nearest', 'abs', 'neg'] 97 | 98 | # add instr_type before each instr 99 | two_arguments_instrs = [str(instr_type + '.' + i) 100 | for i in two_arguments_instrs] 101 | one_argument_instrs = [str(instr_type + '.' + i) 102 | for i in one_argument_instrs] 103 | 104 | # pop two elements 105 | if self.instr_name in two_arguments_instrs: 106 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 107 | 108 | assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits( 109 | ) == helper_map[instr_type][1], 'In do_emulate_arithmetic_float_instruction, arg1 type mismatch' 110 | assert arg2.ebits() == helper_map[instr_type][0] and arg2.sbits( 111 | ) == helper_map[instr_type][1], 'In do_emulate_arithmetic_float_instruction, arg2 type mismatch' 112 | 113 | if '.add' in self.instr_name: 114 | result = fpAdd(rm, arg2, arg1) 115 | elif '.sub' in self.instr_name: 116 | result = fpSub(rm, arg2, arg1) 117 | elif '.mul' in self.instr_name: 118 | result = fpMul(rm, arg2, arg1) 119 | elif '.div' in self.instr_name: 120 | result = fpDiv(rm, arg2, arg1) 121 | elif '.min' in self.instr_name: 122 | result = fpMin(arg2, arg1) 123 | elif '.max' in self.instr_name: 124 | result = fpMax(arg2, arg1) 125 | elif '.copysign' in self.instr_name == 'f32.copysign': 126 | # extract arg2's sign to overwrite arg1's sign 127 | if arg2.isPositive() ^ arg1.isPositive(): 128 | result = fpNeg(arg1) 129 | # pop one element 130 | elif self.instr_name in one_argument_instrs: 131 | arg1 = state.symbolic_stack.pop() 132 | 133 | assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits( 134 | ) == helper_map[instr_type][1], 'In do_emulate_arithmetic_float_instruction, arg1 type mismatch' 135 | 136 | if '.sqrt' in self.instr_name: 137 | result = fpSqrt(rm, arg1) 138 | elif '.floor' in self.instr_name: 139 | # round toward negative 140 | result = fpRoundToIntegral(RTN(), arg1) 141 | elif '.ceil' in self.instr_name: 142 | # round toward positive 143 | result = fpRoundToIntegral(RTP(), arg1) 144 | elif '.trunc' in self.instr_name: 145 | # round toward zero 146 | result = fpRoundToIntegral(RTZ(), arg1) 147 | elif '.nearest' in self.instr_name: 148 | # round to integeral ties to even 149 | result = fpRoundToIntegral(RNE(), arg1) 150 | elif '.abs' in self.instr_name: 151 | result = fpAbs(arg1) 152 | elif '.neg' in self.instr_name: 153 | result = fpNeg(arg1) 154 | else: 155 | raise UnsupportInstructionError 156 | 157 | result = simplify(result) 158 | state.symbolic_stack.append(result) 159 | 160 | return [state] 161 | 162 | op_type = self.instr_name[:1] 163 | if op_type == 'i': 164 | return do_emulate_arithmetic_int_instruction(state) 165 | else: 166 | return do_emulate_arithmetic_float_instruction(state) 167 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/BitwiseInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the bitwise related instructions 2 | 3 | import logging 4 | 5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 6 | from z3 import (BitVec, BitVecVal, LShR, RotateLeft, RotateRight, is_bool, 7 | is_bv, is_false, is_true, simplify) 8 | 9 | helper_map = { 10 | 'i32': 32, 11 | 'i64': 64, 12 | } 13 | 14 | 15 | class BitwiseInstructions: 16 | def __init__(self, instr_name, instr_operand, _): 17 | self.instr_name = instr_name 18 | self.instr_operand = instr_operand 19 | 20 | # TODO overflow check in this function? 21 | def emulate(self, state): 22 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 23 | instr_type = self.instr_name[:3] 24 | 25 | # arg1 and arg2 could be BitVecRef, BitVecValRef and BoolRef 26 | if is_bool(arg1): 27 | arg1 = BitVec(str(arg1), helper_map[instr_type]) 28 | logging.warning( 29 | f"[!] In `BitwiseInstructions.py`, arg1 is BoolRef, translated to BitVec which may lead to some information loss") 30 | if is_bool(arg2): 31 | arg2 = BitVec(str(arg2), helper_map[instr_type]) 32 | logging.warning( 33 | f"[!] In `BitwiseInstructions.py`, arg2 is BoolRef, translated to BitVec which may lead to some information loss") 34 | 35 | assert arg1.size( 36 | ) == helper_map[instr_type], f'arg1 size is {arg1.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction' 37 | assert arg2.size( 38 | ) == helper_map[instr_type], f'arg2 size is {arg2.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction' 39 | 40 | if '.and' in self.instr_name: 41 | result = simplify(arg1 & arg2) 42 | elif '.or' in self.instr_name: 43 | result = simplify(arg1 | arg2) 44 | elif '.xor' in self.instr_name: 45 | result = simplify(arg1 ^ arg2) 46 | elif '.shr_s' in self.instr_name: 47 | result = simplify(arg2 >> arg1) 48 | elif '.shr_u' in self.instr_name: 49 | result = simplify(LShR(arg2, arg1)) 50 | elif '.shl' in self.instr_name: 51 | result = simplify(arg2 << arg1) 52 | elif '.rotl' in self.instr_name: 53 | result = simplify(RotateLeft(arg2, arg1)) 54 | elif '.rotr' in self.instr_name: 55 | result = simplify(RotateRight(arg2, arg1)) 56 | else: 57 | raise UnsupportInstructionError 58 | 59 | if is_bool(result): 60 | if is_true(result): 61 | result = BitVecVal(1, 32) 62 | elif is_false(result): 63 | result = BitVecVal(0, 32) 64 | 65 | assert is_bv(result) or is_bool( 66 | result), f"in bitwise instruction, the value to be pushed is {type(result)} instead of BitVec or Bool" 67 | 68 | state.symbolic_stack.append(result) 69 | 70 | return [state] 71 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ConstantInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the constant related instructions 2 | 3 | import re 4 | from struct import unpack 5 | 6 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 7 | from z3 import BitVecVal, Float32, Float64, FPVal 8 | 9 | 10 | class ConstantInstructions: 11 | def __init__(self, instr_name, instr_operand, instr_string): 12 | self.instr_name = instr_name 13 | self.instr_operand = instr_operand 14 | self.instr_str = instr_string 15 | 16 | # TODO overflow check in this function? 17 | def emulate(self, state): 18 | # there are two types of const: i and f, like: 19 | # i32.const 0 20 | # f64.const 0x1.9p+6 (;=100;) 21 | # thus we have to deal with the different situations 22 | mnemonic = self.instr_str.split(' ')[0] 23 | const_num = self.instr_str.split(' ')[-1] 24 | const_type_prefix, _ = mnemonic.split('.') 25 | 26 | if const_type_prefix == 'i32': 27 | state.symbolic_stack.append(BitVecVal(const_num, 32)) 28 | elif const_type_prefix == 'i64': 29 | state.symbolic_stack.append(BitVecVal(const_num, 64)) 30 | elif const_type_prefix == 'f32' or const_type_prefix == 'f64': 31 | # extract float number 100 from (;=100;) 32 | # TODO: need to be verified 33 | num_found = re.search(';=([0-9.-]+);', const_num) 34 | if num_found: 35 | float_num = num_found.group(1) 36 | if const_type_prefix == 'f32': 37 | state.symbolic_stack.append(FPVal(float_num, Float32())) 38 | else: 39 | state.symbolic_stack.append(FPVal(float_num, Float64())) 40 | elif const_num[:2] == '0x': 41 | # remove '0x' prefix 42 | const_num = const_num[2:] 43 | # extend with '0' till const_num length is 4 bytes 44 | current_const_num_length = len(const_num) 45 | 46 | need_zero = (8 - current_const_num_length) if const_type_prefix == 'f32' else ( 47 | 16 - current_const_num_length) 48 | const_num = '0' * need_zero + const_num 49 | 50 | if const_type_prefix == 'f32': 51 | float_num = unpack('!f', bytes.fromhex(const_num))[0] 52 | state.symbolic_stack.append(FPVal(float_num, Float32())) 53 | else: 54 | float_num = unpack('!d', bytes.fromhex(const_num))[0] 55 | state.symbolic_stack.append(FPVal(float_num, Float64())) 56 | else: 57 | raise UnsupportInstructionError 58 | else: 59 | raise UnsupportInstructionError 60 | 61 | return [state] 62 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ControlInstructions.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | from collections import defaultdict 4 | 5 | from z3 import (Not, Or, is_bool, is_bv, is_bv_value, is_false, is_true, 6 | simplify, unsat) 7 | 8 | from seewasm.arch.wasm.configuration import Configuration 9 | from seewasm.arch.wasm.exceptions import (ASSERT_FAIL, ProcFailTermination, 10 | ProcSuccessTermination, 11 | UnsupportInstructionError) 12 | from seewasm.arch.wasm.lib.c_lib import CPredefinedFunction 13 | from seewasm.arch.wasm.lib.go_lib import GoPredefinedFunction 14 | from seewasm.arch.wasm.lib.utils import is_modeled 15 | from seewasm.arch.wasm.lib.wasi import WASIImportFunction 16 | from seewasm.arch.wasm.utils import (log_in_out, one_time_query_cache, 17 | readable_internal_func_name) 18 | 19 | TERMINATED_FUNCS = {'__assert_fail', 'runtime.divideByZeroPanic'} 20 | 21 | 22 | class ControlInstructions: 23 | def __init__(self, instr_name, instr_operand, instr_string): 24 | self.instr_name = instr_name 25 | self.instr_operand = instr_operand 26 | self.instr_string = instr_string 27 | self.skip_command = {'loop', 'end', 'br', 'else', 'block'} 28 | self.term_command = {'unreachable', 'return'} 29 | 30 | def store_context(self, param_str, return_str, state, callee_func_name): 31 | """ 32 | Store the context of current stack and local. 33 | The sequence is: 34 | 1. pop specific number of elements from stack, which will be used by callee 35 | 2. store the current context, including (current_func, current_block, stack, local, require_return) 36 | 3. assign popped elements in step 1 in local, change the current_func_name 37 | """ 38 | logging.info( 39 | f"Call: {readable_internal_func_name(Configuration.get_func_index_to_func_name(), state.current_func_name)} -> {callee_func_name}") 40 | 41 | # step 1 42 | num_arg = 0 43 | if param_str: 44 | num_arg = len(param_str.split(' ')) 45 | arg = [state.symbolic_stack.pop() for _ in range(num_arg)] 46 | 47 | # step 2 48 | state.context_stack.append((state.current_func_name, 49 | state.instr.cur_bb, 50 | [e for e in state.symbolic_stack], 51 | copy.copy(state.local_var), 52 | True if return_str else False)) 53 | 54 | # step 3 55 | for x in range(num_arg): 56 | state.local_var[num_arg - 1 - x] = arg[x] 57 | # set the remaining local vars as None 58 | for x in range(num_arg, len(state.local_var)): 59 | try: 60 | state.local_var.pop(x) 61 | except KeyError: 62 | # if some of the local var is unused during the caller 63 | # there is no need to pop it, thus continue the loop 64 | continue 65 | 66 | state.current_func_name = callee_func_name 67 | 68 | def restore_context(self, state): 69 | """ 70 | Restore context. 71 | 72 | 1. pop an element from stack if require return 73 | 2. restore the context 74 | 3. push the element in step 1 into stack 75 | """ 76 | if len(state.context_stack) == 0: 77 | raise ProcSuccessTermination(0) 78 | 79 | caller_func_name, cur_bb, stack, local, require_return = state.context_stack.pop() 80 | 81 | logging.info( 82 | f"Return: {readable_internal_func_name(Configuration.get_func_index_to_func_name(), state.current_func_name)}") 83 | 84 | # step 1 85 | if require_return: 86 | return_val = state.symbolic_stack.pop() 87 | 88 | # step 2 89 | state.current_func_name = caller_func_name 90 | state.current_bb_name = cur_bb 91 | state.symbolic_stack = stack 92 | state.local_var = local 93 | 94 | # step 3 95 | if require_return: 96 | state.symbolic_stack.append(return_val) 97 | 98 | def deal_with_call(self, state, f_offset, data_section, analyzer, lvar): 99 | # get the callee's function signature 100 | target_func = analyzer.func_prototypes[f_offset] 101 | callee_func_name, param_str, return_str, _ = target_func 102 | 103 | readable_callee_func_name = readable_internal_func_name( 104 | Configuration.get_func_index_to_func_name(), 105 | callee_func_name) 106 | if Configuration.get_dsl_flag() and readable_callee_func_name.startswith("checker"): 107 | # if it is a instrumented function 108 | idx = int(readable_callee_func_name.split('$')[1]) 109 | """ 110 | if idx == -1: 111 | arg = _extract_params(param_str, state)[0] 112 | state.solver.add(arg > 0); 113 | elif idx == -2: 114 | arg = _extract_params(param_str, state)[0] 115 | state.solver.add(arg > 0); 116 | elif idx == 3: 117 | lvar['prior'] = abs(20 - lvar['rounds_i']) - 20 118 | elif idx == 4: 119 | lvar['prior'] = abs(3 - lvar['rounds_j']) 120 | """ 121 | states = [state] 122 | elif Configuration.get_source_type() == 'c' and is_modeled(readable_callee_func_name, specify_lang='c'): 123 | func = CPredefinedFunction( 124 | readable_callee_func_name, state.current_func_name) 125 | states = log_in_out( 126 | readable_callee_func_name, "C Library")( 127 | func.emul)( 128 | state, param_str, return_str, data_section, analyzer) 129 | elif Configuration.get_source_type() == 'go' and is_modeled(readable_callee_func_name, specify_lang='go'): 130 | # TODO Go library func modeling is not tested 131 | func = GoPredefinedFunction( 132 | readable_callee_func_name, state.current_func_name) 133 | states = log_in_out( 134 | readable_callee_func_name, "Go Library")( 135 | func.emul)( 136 | state, param_str, return_str, data_section, analyzer) 137 | elif Configuration.get_source_type() == 'rust' and is_modeled(readable_callee_func_name, specify_lang='rust'): 138 | # TODO may model some rust library funcs 139 | pass 140 | # if the callee is imported (WASI) 141 | elif is_modeled(readable_callee_func_name, specify_lang='wasi'): 142 | func = WASIImportFunction( 143 | readable_callee_func_name, state.current_func_name) 144 | states = log_in_out( 145 | readable_callee_func_name, "import")( 146 | func.emul)( 147 | state, param_str, return_str, data_section) 148 | elif readable_callee_func_name in TERMINATED_FUNCS: 149 | logging.info(f"Termination: {readable_callee_func_name}") 150 | raise ProcFailTermination(ASSERT_FAIL) 151 | else: 152 | self.store_context(param_str, return_str, state, 153 | readable_callee_func_name) 154 | states = [state] 155 | return states 156 | 157 | def emulate(self, state, data_section, analyzer, lvar): 158 | if self.instr_name in self.skip_command: 159 | return [state] 160 | if self.instr_name in self.term_command: 161 | return [state] 162 | 163 | if self.instr_name == 'nop': 164 | if state.instr.xref: 165 | self.restore_context(state) 166 | return [state] 167 | elif self.instr_name == 'br_if' or self.instr_name == 'if': 168 | op = state.symbolic_stack.pop() 169 | assert is_bv(op) or is_bool( 170 | op), f"the type of op popped from stack in `br_if`/`if` is {type(op)} instead of bv or bool" 171 | states = [] 172 | if is_bv(op): 173 | op = simplify(op != 0) 174 | 175 | # | op | branch | 176 | # | ------- | ------------------- | 177 | # | False | conditional_false_0 | 178 | # | True | conditional_true_0 | 179 | # | BoolRef | both | 180 | 181 | if is_true(op): 182 | state.edge_type = 'conditional_true_0' 183 | states.append(state) 184 | elif is_false(op): 185 | state.edge_type = 'conditional_false_0' 186 | states.append(state) 187 | elif not is_true(op) and not is_false(op): 188 | # these two flags are used to jump over unnecessary deepcopy 189 | no_need_true, no_need_false = False, False 190 | if unsat == one_time_query_cache(state.solver, op): 191 | no_need_true = True 192 | if unsat == one_time_query_cache(state.solver, Not(op)): 193 | no_need_false = True 194 | 195 | if no_need_true and no_need_false: 196 | pass 197 | elif not no_need_true and not no_need_false: 198 | new_state = copy.deepcopy(state) 199 | # conditional_true 200 | state.edge_type = 'conditional_true_0' 201 | state.solver.add(op) 202 | # conditional_false 203 | new_state.edge_type = 'conditional_false_0' 204 | new_state.solver.add(Not(op)) 205 | # append 206 | states.append(state) 207 | states.append(new_state) 208 | else: 209 | if no_need_true: 210 | state.edge_type = 'conditional_false_0' 211 | state.solver.add(Not(op)) 212 | states.append(state) 213 | else: 214 | state.edge_type = 'conditional_true_0' 215 | state.solver.add(op) 216 | states.append(state) 217 | else: 218 | exit(f"br_if/if instruction error. op is {op}") 219 | 220 | return states 221 | elif self.instr_name == 'call_indirect': 222 | # refer to: https://developer.mozilla.org/en-US/docs/WebAssembly/Understanding_the_text_format#webassembly_tables 223 | # this instruction will pop an element out of the stack, and use this as an index in the table, i.e., elem section in Wasm module, to dynamically determine which fucntion will be invoked 224 | elem_index_to_func = Configuration.get_elem_index_to_func() 225 | 226 | # target function index 227 | op = state.symbolic_stack.pop() 228 | assert is_bv_value( 229 | op), f"in call_indirect, op is a symbol ({op}), not support yet" 230 | op = op.as_long() 231 | 232 | offset = analyzer.elements[0]['offset'] 233 | 234 | callee_func_name = elem_index_to_func[op - offset] 235 | callee_func_offset = -1 236 | for func_offset, item in enumerate(analyzer.func_prototypes): 237 | if callee_func_name == readable_internal_func_name( 238 | Configuration.get_func_index_to_func_name(), 239 | item[0]): 240 | state.call_indirect_callee = callee_func_name 241 | callee_func_offset = func_offset 242 | break 243 | 244 | if callee_func_offset == -1: 245 | exit("no valid callee in call_indirect") 246 | else: 247 | return self.deal_with_call( 248 | state, callee_func_offset, data_section, analyzer, lvar) 249 | elif self.instr_name == 'br_table': 250 | # state.instr.xref indicates the destination instruction's offset 251 | # TODO examine br_table 252 | op = state.symbolic_stack.pop() 253 | 254 | # operands of br_table instruction 255 | ops = [i for i in self.instr_operand] 256 | n_br, br_lis = ops[0], ops[1:-1] 257 | 258 | # construct a dict to minimize the possible states 259 | target_branch2index = defaultdict(list) 260 | for index, target in enumerate(br_lis): 261 | target_branch2index[target].append(index) 262 | 263 | # construct possible state 264 | states = [] 265 | for target, index_list in target_branch2index.items(): 266 | index_list = [simplify(op == i) for i in index_list] 267 | cond = simplify(Or(index_list)) 268 | if is_false(cond): 269 | continue 270 | elif is_true(cond): 271 | # we can omit the "True" apppended into the constraint 272 | new_state = copy.deepcopy(state) 273 | new_state.edge_type = f"conditional_true_{target}" 274 | states.append(new_state) 275 | else: 276 | # we have to query z3 277 | new_state = copy.deepcopy(state) 278 | new_state.solver.add(cond) 279 | new_state.edge_type = f"conditional_true_{target}" 280 | states.append(new_state) 281 | 282 | # determine if we need the default branch 283 | cond = simplify(Or(op >= n_br, op < 0)) 284 | if is_false(cond): 285 | # we don't need it 286 | pass 287 | elif is_true(cond): 288 | state.edge_type = "conditional_false_0" 289 | states.append(state) 290 | else: 291 | state.solver.add(cond) 292 | state.edge_type = "conditional_false_0" 293 | states.append(state) 294 | 295 | assert len(states) != 0, f"in br_table, no branch is selected" 296 | return states 297 | elif self.instr_name == 'call': 298 | self.instr_operand = self.instr_string.split(' ')[1] 299 | # get the callee's function signature 300 | try: 301 | f_offset = int(self.instr_operand) 302 | except ValueError: 303 | # it's possible that the `call` operand is a hex 304 | f_offset = int(self.instr_operand, 16) 305 | return self.deal_with_call( 306 | state, f_offset, data_section, analyzer, lvar) 307 | else: 308 | raise UnsupportInstructionError 309 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ConversionInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the conversion related instructions 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 4 | from z3 import (RNE, RTZ, BitVecSort, BitVecVal, Extract, Float32, Float64, 5 | SignExt, ZeroExt, fpBVToFP, fpFPToFP, fpSignedToFP, fpToIEEEBV, 6 | fpToSBV, fpToUBV, fpUnsignedToFP, simplify) 7 | 8 | 9 | class ConversionInstructions: 10 | def __init__(self, instr_name, instr_operand, _): 11 | self.instr_name = instr_name 12 | self.instr_operand = instr_operand 13 | 14 | def emulate(self, state): 15 | arg0 = state.symbolic_stack.pop() 16 | 17 | if self.instr_name == 'i32.wrap/i64': 18 | assert arg0.size() == 64, 'i32.wrap/i64 has wrong arg type' 19 | divisor = BitVecVal(2 ** 32, 64) 20 | # mod 21 | result = simplify(Extract(31, 0, arg0 % divisor)) 22 | elif self.instr_name == 'i64.extend_s/i32': 23 | assert arg0.size() == 32, 'i64.extend_s/i32 has wrong arg type' 24 | 25 | result = simplify(SignExt(32, arg0)) 26 | elif self.instr_name == 'i64.extend_u/i32': 27 | assert arg0.size() == 32, 'i64.extend_u/i32 has wrong arg type' 28 | 29 | result = simplify(ZeroExt(32, arg0)) 30 | elif self.instr_name == 'i32.trunc_s/f32': 31 | assert arg0.ebits() == 8, 'i32.trunc_s/f32 has wrong arg type' 32 | assert arg0.sbits() == 24, 'i32.trunc_s/f32 has wrong arg type' 33 | 34 | rm = RTZ() 35 | result = simplify(fpToSBV(rm, arg0, BitVecSort(32))) 36 | assert result.size() == 32, 'i32.trunc_s/f32 convert fail' 37 | elif self.instr_name == 'i32.trunc_s/f64': 38 | assert arg0.ebits() == 11, 'i32.trunc_s/f64 has wrong arg type' 39 | assert arg0.sbits() == 53, 'i32.trunc_s/f64 has wrong arg type' 40 | 41 | rm = RTZ() 42 | result = simplify(fpToSBV(rm, arg0, BitVecSort(32))) 43 | assert result.size() == 32, 'i32.trunc_s/f64 convert fail' 44 | elif self.instr_name == 'i64.trunc_s/f32': 45 | assert arg0.ebits() == 8, 'i64.trunc_s/f32 has wrong arg type' 46 | assert arg0.sbits() == 24, 'i64.trunc_s/f32 has wrong arg type' 47 | 48 | rm = RTZ() 49 | result = simplify(fpToSBV(rm, arg0, BitVecSort(64))) 50 | assert result.size() == 64, 'i64.trunc_s/f32 convert fail' 51 | elif self.instr_name == 'i64.trunc_s/f64': 52 | assert arg0.ebits() == 11, 'i64.trunc_s/f64 has wrong arg type' 53 | assert arg0.sbits() == 53, 'i64.trunc_s/f64 has wrong arg type' 54 | 55 | rm = RTZ() 56 | result = simplify(fpToSBV(rm, arg0, BitVecSort(64))) 57 | assert result.size() == 64, 'i64.trunc_s/f64 convert fail' 58 | elif self.instr_name == 'i32.trunc_u/f32': 59 | assert arg0.ebits() == 8, 'i32.trunc_u/f32 has wrong arg type' 60 | assert arg0.sbits() == 24, 'i32.trunc_u/f32 has wrong arg type' 61 | 62 | rm = RTZ() 63 | result = simplify(fpToUBV(rm, arg0, BitVecSort(32))) 64 | assert result.size() == 32, 'i32.trunc_u/f32 convert fail' 65 | elif self.instr_name == 'i32.trunc_u/f64': 66 | assert arg0.ebits() == 11, 'i32.trunc_u/f64 has wrong arg type' 67 | assert arg0.sbits() == 53, 'i32.trunc_u/f64 has wrong arg type' 68 | 69 | rm = RTZ() 70 | result = simplify(fpToUBV(rm, arg0, BitVecSort(32))) 71 | assert result.size() == 32, 'i32.trunc_u/f64 convert fail' 72 | elif self.instr_name == 'i64.trunc_u/f32': 73 | assert arg0.ebits() == 8, 'i64.trunc_u/f32 has wrong arg type' 74 | assert arg0.sbits() == 24, 'i64.trunc_u/f32 has wrong arg type' 75 | 76 | rm = RTZ() 77 | result = simplify(fpToUBV(rm, arg0, BitVecSort(64))) 78 | assert result.size() == 64, 'i64.trunc_u/f32 convert fail' 79 | elif self.instr_name == 'i64.trunc_u/f64': 80 | assert arg0.ebits() == 11, 'i64.trunc_u/f64 has wrong arg type' 81 | assert arg0.sbits() == 53, 'i64.trunc_u/f64 has wrong arg type' 82 | 83 | rm = RTZ() 84 | result = simplify(fpToUBV(rm, arg0, BitVecSort(64))) 85 | assert result.size() == 64, 'i64.trunc_u/f64 convert fail' 86 | elif self.instr_name == 'f32.demote/f64': 87 | assert arg0.ebits() == 11, 'f32.demote/f64 has wrong arg type' 88 | assert arg0.sbits() == 53, 'f32.demote/f64 has wrong arg type' 89 | 90 | rm = RNE() 91 | result = simplify(fpFPToFP(rm, arg0, Float32())) 92 | assert result.ebits() == 8, 'f32.demote/f64 conversion fail' 93 | assert result.sbits() == 24, 'f32.demote/f64 conversion fail' 94 | elif self.instr_name == 'f64.promote/f32': 95 | assert arg0.ebits() == 8, 'f64.promote/f32 has wrong arg type' 96 | assert arg0.sbits() == 24, 'f64.promote/f32 has wrong arg type' 97 | 98 | rm = RNE() 99 | result = simplify(fpFPToFP(rm, arg0, Float64())) 100 | assert result.ebits() == 11, 'f64.promote/f32 conversion fail' 101 | assert result.sbits() == 53, 'f64.promote/f32 conversion fail' 102 | elif self.instr_name == 'f32.convert_s/i32': 103 | assert arg0.size() == 32, 'f32.convert_s/i32 has wrong arg type' 104 | 105 | rm = RNE() 106 | result = simplify(fpSignedToFP(rm, arg0, Float32())) 107 | assert result.ebits() == 8, 'f32.convert_s/i32 conversion fail' 108 | assert result.sbits() == 24, 'f32.convert_s/i32 conversion fail' 109 | elif self.instr_name == 'f32.convert_s/i64': 110 | assert arg0.size() == 64, 'f32.convert_s/i64 has wrong arg type' 111 | 112 | rm = RNE() 113 | result = simplify(fpSignedToFP(rm, arg0, Float32())) 114 | assert result.ebits() == 8, 'f32.convert_s/i64 conversion fail' 115 | assert result.sbits() == 24, 'f32.convert_s/i64 conversion fail' 116 | elif self.instr_name == 'f64.convert_s/i32': 117 | assert arg0.size() == 32, 'f64.convert_s/i32 has wrong arg type' 118 | 119 | rm = RNE() 120 | result = simplify(fpSignedToFP(rm, arg0, Float64())) 121 | assert result.ebits() == 11, 'f64.convert_s/i32 conversion fail' 122 | assert result.sbits() == 53, 'f64.convert_s/i32 conversion fail' 123 | elif self.instr_name == 'f64.convert_s/i64': 124 | assert arg0.size() == 64, 'f64.convert_s/i64 has wrong arg type' 125 | 126 | rm = RNE() 127 | result = simplify(fpSignedToFP(rm, arg0, Float64())) 128 | assert result.ebits() == 11, 'f64.convert_s/i64 conversion fail' 129 | assert result.sbits() == 53, 'f64.convert_s/i64 conversion fail' 130 | elif self.instr_name == 'f32.convert_u/i32': 131 | assert arg0.size() == 32, 'f32.convert_u/i32 has wrong arg type' 132 | 133 | rm = RNE() 134 | result = simplify(fpUnsignedToFP(rm, arg0, Float32())) 135 | assert result.ebits() == 8, 'f32.convert_u/i32 conversion fail' 136 | assert result.sbits() == 24, 'f32.convert_u/i32 conversion fail' 137 | elif self.instr_name == 'f32.convert_u/i64': 138 | assert arg0.size() == 64, 'f32.convert_u/i64 has wrong arg type' 139 | 140 | rm = RNE() 141 | result = simplify(fpUnsignedToFP(rm, arg0, Float32())) 142 | assert result.ebits() == 8, 'f32.convert_u/i64 conversion fail' 143 | assert result.sbits() == 24, 'f32.convert_u/i64 conversion fail' 144 | elif self.instr_name == 'f64.convert_u/i32': 145 | assert arg0.size() == 32, 'f64.convert_u/i32 has wrong arg type' 146 | 147 | rm = RNE() 148 | result = simplify(fpUnsignedToFP(rm, arg0, Float64())) 149 | assert result.ebits() == 11, 'f64.convert_u/i32 conversion fail' 150 | assert result.sbits() == 53, 'f64.convert_u/i32 conversion fail' 151 | elif self.instr_name == 'f64.convert_u/i64': 152 | assert arg0.size() == 64, 'f64.convert_u/i64 has wrong arg type' 153 | 154 | rm = RNE() 155 | result = simplify(fpUnsignedToFP(rm, arg0, Float64())) 156 | assert result.ebits() == 11, 'f64.convert_u/i64 conversion fail' 157 | assert result.sbits() == 53, 'f64.convert_u/i64 conversion fail' 158 | elif self.instr_name == 'i32.reinterpret/f32': 159 | assert arg0.ebits() == 8, 'i32.reinterpret/f32 has wrong arg type' 160 | assert arg0.sbits() == 24, 'i32.reinterpret/f32 has wrong arg type' 161 | 162 | result = simplify(fpToIEEEBV(arg0)) 163 | assert result.size() == 32, 'i32.reinterpret/f32 conversion fail' 164 | elif self.instr_name == 'i64.reinterpret/f64': 165 | assert arg0.ebits() == 11, 'i64.reinterpret/f64 has wrong arg type' 166 | assert arg0.sbits() == 53, 'i64.reinterpret/f64 has wrong arg type' 167 | 168 | result = simplify(fpToIEEEBV(arg0)) 169 | assert result.size() == 64, 'i64.reinterpret/f64 conversion fail' 170 | elif self.instr_name == 'f32.reinterpret/i32': 171 | assert arg0.size() == 32, 'f32.reinterpret/i32 has wrong arg type' 172 | 173 | result = simplify(fpBVToFP(arg0, Float32())) 174 | assert result.ebits() == 8, 'f32.reinterpret/i32 conversion fail' 175 | assert result.sbits() == 24, 'f32.reinterpret/i32 conversion fail' 176 | elif self.instr_name == 'f64.reinterpret/i64': 177 | assert arg0.size() == 64, 'f64.reinterpret/i64 has wrong arg type' 178 | 179 | result = simplify(fpBVToFP(arg0, Float64())) 180 | assert result.ebits() == 11, 'f64.reinterpret/i64 conversion fail' 181 | assert result.sbits() == 53, 'f64.reinterpret/i64 conversion fail' 182 | elif self.instr_name == 'i32.extend_s/i8': 183 | assert arg0.size() == 8, 'i32.extend_s/i8 has wrong arg type' 184 | 185 | result = simplify(SignExt(24, arg0)) 186 | else: 187 | print('\nErr:\nUnsupported instruction: %s\n' % self.instr_name) 188 | raise UnsupportInstructionError 189 | 190 | state.symbolic_stack.append(result) 191 | 192 | return [state] 193 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/LogicalInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the logical related instructions 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 4 | from z3 import (UGE, UGT, ULE, ULT, BitVecVal, If, fpEQ, fpGEQ, fpGT, fpLEQ, 5 | fpLT, fpNEQ, is_bv, is_false, is_true, simplify) 6 | 7 | helper_map = { 8 | 'i32': 32, 9 | 'i64': 64, 10 | 'f32': [8, 24], 11 | 'f64': [11, 53] 12 | } 13 | 14 | 15 | class LogicalInstructions: 16 | def __init__(self, instr_name, instr_operand, _): 17 | self.instr_name = instr_name 18 | self.instr_operand = instr_operand 19 | 20 | # TODO overflow check in this function? 21 | def emulate(self, state): 22 | def do_emulate_logical_int_instruction(state): 23 | instr_type = self.instr_name[:3] 24 | if 'eqz' in self.instr_name: 25 | arg0 = state.symbolic_stack.pop() 26 | 27 | assert arg0.size( 28 | ) == helper_map[instr_type], f"in `eqz` the argument popped size is {arg0.size()} instead of {helper_map[instr_type]}" 29 | 30 | result = arg0 == 0 31 | else: 32 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 33 | 34 | assert is_bv(arg1) and is_bv( 35 | arg2), f"in `logical` instruction, arg1 or arg2 type is wrong instead of BitVec" 36 | 37 | if 'eq' in self.instr_name: 38 | result = arg1 == arg2 39 | elif 'ne' in self.instr_name: 40 | result = arg1 != arg2 41 | elif 'lt_s' in self.instr_name: 42 | result = arg2 < arg1 43 | elif 'lt_u' in self.instr_name: 44 | result = ULT(arg2, arg1) 45 | elif 'gt_s' in self.instr_name: 46 | result = arg2 > arg1 47 | elif 'gt_u' in self.instr_name: 48 | result = UGT(arg2, arg1) 49 | elif 'le_s' in self.instr_name: 50 | result = arg2 <= arg1 51 | elif 'le_u' in self.instr_name: 52 | result = ULE(arg2, arg1) 53 | elif 'ge_s' in self.instr_name: 54 | result = arg2 >= arg1 55 | elif 'ge_u' in self.instr_name: 56 | result = UGE(arg2, arg1) 57 | else: 58 | raise UnsupportInstructionError 59 | 60 | # try to simplify result and insert 1 or 0 directly, instead of an ite statement 61 | result = simplify(result) 62 | if is_true(result): 63 | state.symbolic_stack.append(BitVecVal(1, 32)) 64 | elif is_false(result): 65 | state.symbolic_stack.append(BitVecVal(0, 32)) 66 | else: 67 | state.symbolic_stack.append( 68 | If(result, BitVecVal(1, 32), BitVecVal(0, 32))) 69 | 70 | return [state] 71 | 72 | def do_emulate_logical_float_instruction(state): 73 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 74 | instr_type = self.instr_name[:3] 75 | 76 | assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits( 77 | ) == helper_map[instr_type][1], 'emul_logical_f_instr arg1 type mismatch' 78 | assert arg2.ebits() == helper_map[instr_type][0] and arg2.sbits( 79 | ) == helper_map[instr_type][1], 'emul_logical_f_instr arg2 type mismatch' 80 | 81 | if 'eq' in self.instr_name: 82 | result = fpEQ(arg1, arg2) 83 | elif 'ne' in self.instr_name: 84 | result = fpNEQ(arg1, arg2) 85 | elif 'lt' in self.instr_name: 86 | result = fpLT(arg2, arg1) 87 | elif 'le' in self.instr_name: 88 | result = fpLEQ(arg2, arg1) 89 | elif 'gt' in self.instr_name: 90 | result = fpGT(arg2, arg1) 91 | elif 'ge' in self.instr_name: 92 | result = fpGEQ(arg2, arg1) 93 | else: 94 | raise UnsupportInstructionError 95 | 96 | # try to simplify result and insert 1 or 0 directly, instead of an ite statement 97 | result = simplify(result) 98 | if is_true(result): 99 | state.symbolic_stack.append(BitVecVal(1, 32)) 100 | elif is_false(result): 101 | state.symbolic_stack.append(BitVecVal(0, 32)) 102 | else: 103 | state.symbolic_stack.append( 104 | If(result, BitVecVal(1, 32), BitVecVal(0, 32))) 105 | 106 | return [state] 107 | 108 | op_type = self.instr_name[:1] 109 | if op_type == 'i': 110 | return do_emulate_logical_int_instruction(state) 111 | else: 112 | return do_emulate_logical_float_instruction(state) 113 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/MemoryInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the memory related instructions 2 | 3 | import re 4 | 5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 6 | from seewasm.arch.wasm.memory import (insert_symbolic_memory, 7 | lookup_symbolic_memory_data_section) 8 | from seewasm.arch.wasm.utils import getConcreteBitVec 9 | from z3 import (BitVecVal, Extract, Float32, Float64, SignExt, ZeroExt, 10 | fpBVToFP, fpToIEEEBV, is_bv_value, simplify) 11 | 12 | memory_count = 2 13 | memory_step = 2 14 | 15 | 16 | class MemoryInstructions: 17 | def __init__(self, instr_name, instr_operand, instr_string): 18 | self.instr_name = instr_name 19 | self.instr_operand = instr_operand 20 | self.instr_str = instr_string 21 | 22 | def emulate(self, state, data_section): 23 | global memory_count, memory_step 24 | if self.instr_name == 'current_memory': 25 | state.symbolic_stack.append(BitVecVal(memory_count, 32)) 26 | elif self.instr_name == 'grow_memory': 27 | prev_size = memory_count 28 | memory_count += memory_step 29 | state.symbolic_stack.append(BitVecVal(prev_size, 32)) 30 | elif self.instr_name == "memory.copy": 31 | # memory.copy 32 | # The instruction has the signature [i32 i32 i32] -> []. The parameters are, in order: 33 | # top-0: Number of bytes to copy 34 | # top-1: Source address to copy from 35 | # top-2: Destination address to copy to 36 | # example: 37 | # ;; Copy data in default memory from [100, 125] to [50, 75] 38 | # i32.const 50 ;; Destination address to copy to (top-2) 39 | # i32.const 100 ;; Source address to copy from (top-1) 40 | # i32.const 25 ;; Number of bytes to copy (top-0) 41 | # memory.copy ;; Copy memory 42 | len_v = state.symbolic_stack.pop().as_long() 43 | src_addr = state.symbolic_stack.pop().as_long() 44 | dest_addr = state.symbolic_stack.pop().as_long() 45 | # copy memory from src to dst 46 | vlis = [ 47 | lookup_symbolic_memory_data_section( 48 | state.symbolic_memory, data_section, src_addr + i, 1) 49 | for i in range(len_v)] 50 | for i, v in enumerate(vlis): 51 | state.symbolic_memory = insert_symbolic_memory( 52 | state.symbolic_memory, dest_addr + i, 1, v) 53 | print(f"memory.copy: src_addr={src_addr}, dest_addr={dest_addr}, len={len_v}") 54 | elif self.instr_name == "memory.fill": 55 | # memory.fill 56 | # The instruction has the signature [i32 i32 i32] -> []. The parameters are, in order: 57 | # top-0: The number of bytes to update 58 | # top-1: The value to set each byte to (must be < 256) 59 | # top-2: The pointer to the region to update 60 | # example: 61 | # ;; Fill region at offset/range in default memory with 255 62 | # i32.const 200 ;; The pointer to the region to update (top-2) 63 | # i32.const 255 ;; The value to set each byte to (must be < 256) (top-1) 64 | # i32.const 100 ;; The number of bytes to update (top-0) 65 | # memory.fill ;; Fill default memory 66 | len_v = state.symbolic_stack.pop().as_long() 67 | val = state.symbolic_stack.pop().as_long() 68 | addr = state.symbolic_stack.pop().as_long() 69 | print(f"memory.fill: addr={addr}, val={val}, len={len_v}") 70 | elif 'load' in self.instr_name: 71 | load_instr(self.instr_str, state, data_section) 72 | elif 'store' in self.instr_name: 73 | store_instr(self.instr_str, state) 74 | else: 75 | print('\nErr:\nUnsupported instruction: %s\n' % self.instr_name) 76 | raise UnsupportInstructionError 77 | 78 | return [state] 79 | 80 | 81 | def load_instr(instr, state, data_section): 82 | base = state.symbolic_stack.pop() 83 | # offset maybe int or hex 84 | try: 85 | offset = int(instr.split(' ')[2]) 86 | except ValueError: 87 | offset = int(instr.split(' ')[2], 16) 88 | addr = simplify(base + offset) 89 | 90 | if is_bv_value(addr): 91 | addr = addr.as_long() 92 | 93 | # determine how many bytes should be loaded 94 | # the dict is like {'8': 1} 95 | bytes_length_mapping = {str(k): k // 8 for k in range(8, 65, 8)} 96 | instr_name = instr.split(' ')[0] 97 | if len(instr_name) == 8: 98 | load_length = bytes_length_mapping[instr_name[1:3]] 99 | else: 100 | load_length = bytes_length_mapping[re.search( 101 | r"load([0-9]+)\_", instr_name).group(1)] 102 | 103 | val = lookup_symbolic_memory_data_section( 104 | state.symbolic_memory, data_section, addr, load_length) 105 | 106 | if val.size() != 8 * load_length: 107 | # we assume the memory are filled by 0 initially 108 | val = ZeroExt(8 * load_length - val.size(), val) 109 | 110 | if val is None: 111 | exit(f"the loaded value should not be None") 112 | # val = BitVec(f'load{load_length}*({addr})', 8*load_length) 113 | 114 | # cast to other type of bit vector 115 | float_mapping = { 116 | 'f32': Float32, 117 | 'f64': Float64, 118 | } 119 | if len(instr_name) == 8 and instr_name[0] == "f": 120 | val = simplify(fpBVToFP(val, float_mapping[instr_name[:3]]())) 121 | elif instr_name[-2] == "_": 122 | if instr_name[-1] == "s": # sign extend 123 | val = simplify( 124 | SignExt(int(instr_name[1: 3]) - load_length * 8, val)) 125 | else: 126 | val = simplify( 127 | ZeroExt(int(instr_name[1: 3]) - load_length * 8, val)) 128 | 129 | # if can not load from the memory area 130 | if val is not None: 131 | state.symbolic_stack.append(val) 132 | else: 133 | state.symbolic_stack.append(getConcreteBitVec( 134 | instr_name[:3], f'load_{instr_name[:3]}*({str(addr)})')) 135 | 136 | 137 | # deal with store instruction 138 | def store_instr(instr, state): 139 | # offset may be int or hex 140 | try: 141 | offset = int(instr.split(' ')[2]) 142 | except ValueError: 143 | offset = int(instr.split(' ')[2], 16) 144 | 145 | val, base = state.symbolic_stack.pop(), state.symbolic_stack.pop() 146 | addr = simplify(base + offset) 147 | 148 | # change addr's type to int if possible 149 | # or it will be the BitVecRef 150 | if is_bv_value(addr): 151 | addr = addr.as_long() 152 | 153 | # determine how many bytes should be stored 154 | # the dict is like {'8': 1} 155 | bytes_length_mapping = {str(k): k // 8 for k in range(8, 65, 8)} 156 | instr_name = instr.split(' ')[0] 157 | if len(instr_name) == 9: 158 | if instr_name[0] == 'f': 159 | val = fpToIEEEBV(val) 160 | state.symbolic_memory = insert_symbolic_memory( 161 | state.symbolic_memory, addr, bytes_length_mapping[instr_name[1:3]], val) 162 | else: 163 | stored_length = bytes_length_mapping[re.search( 164 | r"store([0-9]+)", instr_name).group(1)] 165 | val = simplify(Extract(stored_length * 8 - 1, 0, val)) 166 | state.symbolic_memory = insert_symbolic_memory( 167 | state.symbolic_memory, addr, stored_length, val) 168 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ParametricInstructions.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 4 | from seewasm.arch.wasm.utils import one_time_query_cache 5 | from z3 import Not, is_bool, is_bv, is_false, is_true, simplify, unsat 6 | 7 | 8 | class ParametricInstructions: 9 | def __init__(self, instr_name, instr_operand, _): 10 | self.instr_name = instr_name 11 | self.instr_operand = instr_operand 12 | 13 | def emulate(self, state): 14 | if self.instr_name == 'drop': 15 | state.symbolic_stack.pop() 16 | return [state] 17 | elif self.instr_name == 'select': # select instruction 18 | arg0, arg1, arg2 = state.symbolic_stack.pop( 19 | ), state.symbolic_stack.pop(), state.symbolic_stack.pop() 20 | assert is_bv(arg0) or is_bool( 21 | arg0), f"in select, arg0 type is {type(arg0)} instead of bv or bool" 22 | # mimic the br_if 23 | if is_bv(arg0): 24 | # NOTE: if arg0 is zero, return arg1, or arg2 25 | # ref: https://developer.mozilla.org/en-US/docs/WebAssembly/Reference/Control_flow/Select 26 | op = simplify(arg0 == 0) 27 | 28 | if is_true(op): 29 | state.symbolic_stack.append(arg1) 30 | return [state] 31 | elif is_false(op): 32 | state.symbolic_stack.append(arg2) 33 | return [state] 34 | elif not is_true(op) and not is_false(op): 35 | # these two flags are used to jump over unnecessary deepcopy 36 | no_need_true, no_need_false = False, False 37 | if unsat == one_time_query_cache(state.solver, op): 38 | no_need_true = True 39 | if unsat == one_time_query_cache(state.solver, Not(op)): 40 | no_need_false = True 41 | 42 | if no_need_true and no_need_false: 43 | pass 44 | elif not no_need_true and not no_need_false: 45 | new_state = deepcopy(state) 46 | 47 | state.solver.add(op) 48 | state.symbolic_stack.append(arg1) 49 | 50 | new_state.solver.add(Not(op)) 51 | new_state.symbolic_stack.append(arg2) 52 | 53 | return [state, new_state] 54 | else: 55 | if no_need_true: 56 | state.solver.add(Not(op)) 57 | state.symbolic_stack.append(arg2) 58 | else: 59 | state.solver.add(op) 60 | state.symbolic_stack.append(arg1) 61 | return [state] 62 | else: 63 | exit(f"select instruction error. op is {op}") 64 | else: 65 | raise UnsupportInstructionError 66 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/VariableInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the variable related instructions 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError, UnsupportGlobalTypeError 4 | from z3 import BitVecVal, is_bv, is_bv_value 5 | 6 | 7 | class VariableInstructions: 8 | def __init__(self, instr_name, instr_operand, _): 9 | self.instr_name = instr_name 10 | self.instr_operand = instr_operand 11 | 12 | def emulate(self, state): 13 | # TODO 14 | # for go_samples.nosync/tinygo_main.wasm, the global.get operand would be prefixed by four \x80 15 | if self.instr_operand.startswith(b'\x80\x80\x80\x80'): 16 | self.instr_operand = self.instr_operand[4:] 17 | op = int.from_bytes(self.instr_operand, byteorder='little') 18 | 19 | if self.instr_name == 'get_local': 20 | if state.local_var.get(op, None) is not None: 21 | state.symbolic_stack.append(state.local_var[op]) 22 | else: 23 | state.symbolic_stack.append(state.local_var[op]) 24 | # raise UninitializedLocalVariableError 25 | elif self.instr_name == 'set_local': 26 | var = state.symbolic_stack.pop() 27 | state.local_var[op] = var 28 | elif self.instr_name == 'get_global': 29 | global_index = op 30 | global_operand = state.globals[global_index] 31 | 32 | if isinstance( 33 | global_operand, str) or isinstance( 34 | global_operand, int): 35 | state.symbolic_stack.append(BitVecVal(global_operand, 32)) 36 | elif is_bv(global_operand) or is_bv_value(global_operand): 37 | # the operand is a BitVecRef or BitVecNumRef 38 | state.symbolic_stack.append(global_operand) 39 | else: 40 | raise UnsupportGlobalTypeError 41 | elif self.instr_name == 'set_global': 42 | global_operand = state.symbolic_stack.pop() 43 | global_index = op 44 | 45 | state.globals[global_index] = global_operand 46 | elif self.instr_name == 'tee_local': 47 | var = state.symbolic_stack[-1] 48 | state.local_var[op] = var 49 | else: 50 | raise UnsupportInstructionError 51 | return [state] 52 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/__init__.py: -------------------------------------------------------------------------------- 1 | from .ArithmeticInstructions import * 2 | from .BitwiseInstructions import * 3 | from .ConstantInstructions import * 4 | from .ControlInstructions import * 5 | from .ConversionInstructions import * 6 | from .LogicalInstructions import * 7 | from .MemoryInstructions import * 8 | from .ParametricInstructions import * 9 | from .VariableInstructions import * 10 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/lib/utils.py: -------------------------------------------------------------------------------- 1 | # this is the helper function which are only used in lib folder 2 | 3 | from z3 import BitVecVal, is_bv, is_bv_value 4 | 5 | from seewasm.arch.wasm.configuration import Configuration 6 | from seewasm.arch.wasm.memory import (insert_symbolic_memory, 7 | lookup_symbolic_memory_data_section) 8 | 9 | MODELED_FUNCS = { 10 | 'c': 11 | {'__small_printf', 'abs', 'atof', 'atoi', 'exp', 'getchar', 12 | 'iprintf', 'printf', 'putchar', 'puts', 'scanf', 'swap', 13 | 'system', 'emscripten_resize_heap', 'fopen', 'vfprintf', 14 | 'open', 'exit', 'setlocale', 'hard_locale'}, 15 | 'go': {'fmt.Scanf', 'fmt.Printf', 'runtime.divideByZeroPanic', 'runtime.lookupPanic', 'runtime.nilPanic' 16 | 'runtime.slicePanic', 'runtime.sliceToArrayPointerPanic', 'runtime.unsafeSlicePanic', 'runtime.chanMakePanic', 17 | 'runtime.negativeShiftPanic', 'runtime.blockingPanic', 'runtime.calculateHeapAddresses', 'memset', 'runtime.alloc', 'memcpy', 18 | 'syscall/js.valueGet', 'runtime.putchar'}, 19 | 'rust': {}, 20 | 'wasi': 21 | {'args_sizes_get', 'args_get', 'environ_sizes_get', 22 | 'fd_advise', 'fd_fdstat_get', 'fd_tell', 'fd_seek', 23 | 'fd_close', 'fd_read', 'fd_write', 'proc_exit', 24 | 'fd_prestat_get', 'fd_prestat_dir_name', 'path_open'}, } 25 | 26 | 27 | def is_modeled(func_name, specify_lang=None): 28 | if specify_lang: 29 | return func_name in MODELED_FUNCS[specify_lang] 30 | else: 31 | return func_name in MODELED_FUNCS['wasi'] or func_name in MODELED_FUNCS[Configuration.get_source_type()] 32 | 33 | 34 | def _extract_params(param_str, state): 35 | """ 36 | Return a list of elements, which are the arguments of the given import function. 37 | Note that, the order will be reversed. 38 | For example, if the signature of function foo is: foo (a, b), the returned arguments will be [b, a] 39 | """ 40 | param_cnt = len(param_str.split(" ")) 41 | params = [] 42 | for _ in range(param_cnt): 43 | params.append(state.symbolic_stack.pop()) 44 | 45 | # concretize 46 | params_result = [] 47 | for i in params: 48 | if is_bv_value(i): 49 | params_result.append(i.as_long()) 50 | else: 51 | params_result.append(i) 52 | 53 | return params_result 54 | 55 | 56 | def _storeN(state, dest, val, len_in_bytes): 57 | if not is_bv(val): 58 | state.symbolic_memory = insert_symbolic_memory( 59 | state.symbolic_memory, dest, len_in_bytes, 60 | BitVecVal(val, len_in_bytes * 8)) 61 | else: 62 | state.symbolic_memory = insert_symbolic_memory( 63 | state.symbolic_memory, dest, len_in_bytes, val) 64 | 65 | 66 | def _loadN(state, data_section, dest, len_in_bytes): 67 | val = lookup_symbolic_memory_data_section( 68 | state.symbolic_memory, data_section, dest, len_in_bytes) 69 | if is_bv_value(val): 70 | val = val.as_long() 71 | return val 72 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/solver.py: -------------------------------------------------------------------------------- 1 | from z3 import Solver 2 | 3 | # from lab_solver import * 4 | 5 | 6 | class SMTSolver: 7 | def __new__(cls, designated_solver): 8 | if designated_solver == 'z3': 9 | return Solver() 10 | else: 11 | raise Exception("No SMT backend found") 12 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/utils.py: -------------------------------------------------------------------------------- 1 | # This file gives some practical functions that will be adopted by other files 2 | 3 | import json 4 | import logging 5 | import re 6 | import struct 7 | from codecs import decode 8 | from datetime import datetime 9 | from os import makedirs, path 10 | from random import random 11 | 12 | from seewasm.arch.wasm.configuration import Configuration 13 | from seewasm.arch.wasm.exceptions import (NO_EXIT, INVALIDMEMORY, ProcFailTermination, 14 | UnsupportZ3TypeError) 15 | from seewasm.arch.wasm.solver import SMTSolver 16 | from z3 import (FP, BitVec, BitVecRef, Float32, Float64, is_bv, is_bv_value, 17 | sat, unsat) 18 | 19 | # this is the opened files base addr 20 | FILE_BASE_ADDR = 100000000 21 | 22 | 23 | # this is a mapping, which maps the data type to the corresponding BitVec 24 | def getConcreteBitVec(type, name): 25 | if type == 'i32': 26 | return BitVec(name, 32) 27 | elif type == 'i64': 28 | return BitVec(name, 64) 29 | elif type == 'f32': 30 | return FP(name, Float32()) 31 | elif type == 'f64': 32 | return FP(name, Float64()) 33 | else: 34 | raise UnsupportZ3TypeError 35 | 36 | 37 | def readable_internal_func_name(func_index_to_func_name, internal_func_name): 38 | """ 39 | Convert the internal name to a more readable one with the help of func_index_to_func_name 40 | """ 41 | if func_index_to_func_name is None: 42 | return internal_func_name 43 | 44 | if not internal_func_name.startswith('$'): 45 | return internal_func_name 46 | 47 | readable_name = None 48 | try: 49 | readable_name = func_index_to_func_name[int( 50 | re.search('(\d+)', internal_func_name).group())] 51 | except (AttributeError, KeyError) as _: 52 | # if the internal_function_name is the readable name already 53 | readable_name = internal_func_name 54 | assert readable_name is not None, f"the internal funciton {internal_func_name} cannot find its corresponding readable name" 55 | return readable_name 56 | 57 | 58 | def bin_to_float(b): 59 | """ Convert binary string to a float. """ 60 | bf = int_to_bytes(int(b, 2), 8) # 8 bytes needed for IEEE 754 binary64. 61 | return struct.unpack('>d', bf)[0] 62 | 63 | 64 | def int_to_bytes(n, length): # Helper function 65 | """ Int/long to byte string. 66 | 67 | Python 3.2+ has a built-in int.to_bytes() method that could be used 68 | instead, but the following works in earlier versions including 2.x. 69 | """ 70 | return decode('%%0%dx' % (length << 1) % n, 'hex')[-length:] 71 | 72 | 73 | # the patterns used in C printf, and their corresponding length of to be loaded memory 74 | C_TYPE_TO_LENGTH = {'s': 4, 'c': 4, 'd': 4, 'u': 4, 'x': 4, 'f': 8} 75 | 76 | 77 | def calc_memory_align(parsed_pattern): 78 | """ 79 | Used for calculate memory align in printf 80 | """ 81 | offset = [] 82 | for i, item in enumerate(parsed_pattern): 83 | cur_type = item[-1][-1] 84 | offset.append(C_TYPE_TO_LENGTH[cur_type]) 85 | 86 | # decide if we should align the memory 87 | if cur_type == 'f': 88 | previous_sum = sum(offset[:i]) 89 | if previous_sum % 8 != 0: 90 | offset[i - 1] += 4 91 | 92 | return offset 93 | 94 | 95 | def parse_printf_formatting(lines): 96 | cfmt = '''\ 97 | ( # start of capture group 1 98 | % # literal "%" 99 | (?: # first option 100 | (?:[-+0 #]{0,5}) # optional flags 101 | (?:\d+|\*)? # width 102 | (?:\.(?:\d+|\*))? # precision 103 | (?:h|l|ll|w|I|I32|I64)? # size 104 | [cCdiouxXeEfgGaAnpsSZ] # type 105 | ) | # OR 106 | %%) # literal "%%" 107 | ''' 108 | 109 | # tuple list, in which each element consisting of line number, begin position and pattern 110 | result = [] 111 | for line_num, line in enumerate(lines.splitlines()): 112 | for m in re.finditer(cfmt, line, flags=re.X): 113 | result.append([line_num, m.start(1), m.group(1)]) 114 | return result 115 | 116 | 117 | def _extract_outermost_int(num): 118 | """ 119 | This function is used to extract the outermost int for a symbol. 120 | For example, if num is: a + 87, the function will return 87. 121 | If num is: a + b, the function will return None. 122 | """ 123 | the_int = None 124 | if is_bv(num): 125 | for i in range(num.num_args()): 126 | if is_bv_value(num.arg(i)): 127 | the_int = num.arg(i).as_long() 128 | break 129 | elif isinstance(num, int): 130 | the_int = num 131 | else: 132 | exit(f"the type of num is {type(num)}, cannot extract the int args") 133 | 134 | return the_int 135 | 136 | 137 | def str_to_little_endian_int(string): 138 | """ 139 | Convert the given string to an integer, little endian 140 | For example, "abc" is 6513249 141 | """ 142 | return int.from_bytes(str.encode(string), "little") 143 | 144 | 145 | def write_result(state, exit_code=NO_EXIT): 146 | """ 147 | Write result in ./output/result folder in json format 148 | """ 149 | # if the checker is unsat 150 | if unsat == state.solver.check(): 151 | return 152 | 153 | file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/state_{datetime.timestamp(datetime.now()):.3f}_{random():.5f}.json" 154 | makedirs(path.dirname(file_name), exist_ok=True) 155 | state_result = {} 156 | with open(file_name, 'w') as fp: 157 | if exit_code != NO_EXIT: 158 | if int(exit_code.value) >= 0: 159 | state_result["Status"] = f"Exit with status code {exit_code}" 160 | else: 161 | # constructed exit_code 162 | state_result["Status"] = f"Exit" 163 | else: 164 | # return value 165 | # get_entry_signature() returns a tuple (name, params, return, type) 166 | if Configuration.get_entry_signature()[2]: 167 | state_result["Return"] = str(state.symbolic_stack[-1]) 168 | else: 169 | # default return value 170 | state_result["Return"] = "0" 171 | 172 | # solution of constraints 173 | state_result["Solution"] = {} 174 | m = state.solver.model() 175 | # this check if there exist symbols with same name 176 | # which may lead to the result overwriting 177 | if len(set([k for k in m])) != len(m): 178 | logging.warning( 179 | f"the solving process found there exist symbols with identical name, please double check. ({[k for k in m]})") 180 | for k in m: 181 | # the decode is weird, we just want to convert unprintable characters 182 | # into printable chars 183 | # ref: https://stackoverflow.com/questions/13837848/converting-byte-string-in-unicode-string 184 | solution_hex_str = hex(m[k].as_long())[2:] 185 | if len(solution_hex_str) % 2 == 1: 186 | solution_hex_str = "0" + solution_hex_str 187 | solution = [] 188 | for i in range(0, len(solution_hex_str), 2): 189 | solution.append(chr(int(solution_hex_str[i: i + 2], 16))) 190 | state_result["Solution"][str(k)] = "".join(solution[::-1]) 191 | 192 | candidate_fds = [] 193 | # filter out all output buffer 194 | for fd, file_info in state.file_sys.items(): 195 | if "w" in file_info["flag"]: 196 | if isinstance(fd, int) or fd[0] == "-": 197 | candidate_fds.append(fd) 198 | 199 | state_result["Output"] = [] 200 | # stdout and stderr buffer 201 | for fd in candidate_fds: 202 | assert all(isinstance(x, (int, BitVecRef)) 203 | for x in state.file_sys[fd]["content"]), f"buffer is: {state.file_sys[fd]['content']}, not all int and bitvec" 204 | tmp_dict = {"name": None, "output": None} 205 | # output_buffer = [] 206 | output_solve_buffer = [] 207 | for el in state.file_sys[fd]["content"]: 208 | if isinstance(el, int): 209 | # output_buffer.append(chr(el).encode()) 210 | output_solve_buffer.append(chr(el)) 211 | elif isinstance(el, BitVecRef): 212 | assert el.size() == 8, f"{el} size is not 8" 213 | # output_buffer.append(str(el).encode()) 214 | # if can solve a concrete number 215 | solve_char = m.evaluate(el) 216 | if is_bv_value(solve_char): 217 | output_solve_buffer.append( 218 | chr(solve_char.as_long())) 219 | elif is_bv(solve_char): 220 | output_solve_buffer.append("`@`") 221 | else: 222 | exit( 223 | f"result of solving {el} is {solve_char} and type is {type(solve_char)}") 224 | 225 | tmp_dict["name"] = state.file_sys[fd]["name"] 226 | # tmp_dict["output"] = f'{b"".join(output_buffer)}' 227 | tmp_dict["output"] = "".join(output_solve_buffer) 228 | state_result["Output"].append(tmp_dict) 229 | 230 | json.dump(state_result, fp, indent=4) 231 | 232 | 233 | def init_file_for_file_sys(): 234 | """ 235 | The item for file_sys of state should be initialized here. 236 | """ 237 | return {"name": "", "status": False, "flag": "", "content": []} 238 | 239 | 240 | def log_in_out(func_name, directory): 241 | """ 242 | A decorator to log before entering and after exiting call emulation 243 | """ 244 | def decorator(f): 245 | def wrapper(*args, **kw): 246 | logging.info(f"Call: {func_name} ({directory})") 247 | states = f(*args, **kw) 248 | logging.info(f"Return: {func_name} ({directory})") 249 | return states 250 | return wrapper 251 | return decorator 252 | 253 | 254 | def query_cache(solver): 255 | """ 256 | Check is assertions in solver are cached. 257 | If they are, return directly, or update the cache and return 258 | """ 259 | cons_hash_set = {hash(c) for c in solver.assertions()} 260 | cons_hash_list = list(cons_hash_set) 261 | cons_hash_list.sort() 262 | cons_hash_tuple = tuple(cons_hash_list) 263 | 264 | if cons_hash_tuple not in Configuration._z3_cache_dict: 265 | solver_check_result = solver.check() 266 | 267 | # try to terminate invalid-memory in advance 268 | if solver_check_result == sat: 269 | m = solver.model() 270 | for k in m: 271 | if str(k) == 'invalid-memory': 272 | Configuration._z3_cache_dict[cons_hash_tuple] = unsat 273 | raise ProcFailTermination(INVALIDMEMORY) 274 | 275 | Configuration._z3_cache_dict[cons_hash_tuple] = solver_check_result 276 | else: 277 | solver_check_result = Configuration._z3_cache_dict[cons_hash_tuple] 278 | 279 | return solver_check_result 280 | 281 | 282 | def one_time_query_cache(solver, con): 283 | """ 284 | the *args are received constraints, they will not be inserted into the solver. 285 | It is an one-time query 286 | """ 287 | solver.push() 288 | solver.add(con) 289 | solver_check_result = query_cache(solver) 290 | solver.pop() 291 | 292 | return solver_check_result 293 | 294 | 295 | def one_time_query_cache_without_solver(con): 296 | cons_hash_set = set([hash(c) for c in [con]]) 297 | cons_hash_list = list(cons_hash_set) 298 | cons_hash_list.sort() 299 | cons_hash_tuple = tuple(cons_hash_list) 300 | if cons_hash_tuple not in Configuration._z3_cache_dict: 301 | s = SMTSolver(Configuration.get_solver()) 302 | s.add(con) 303 | solver_check_result = s.check() 304 | Configuration._z3_cache_dict[cons_hash_tuple] = solver_check_result 305 | else: 306 | solver_check_result = Configuration._z3_cache_dict[cons_hash_tuple] 307 | 308 | return solver_check_result 309 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/visualizator.py: -------------------------------------------------------------------------------- 1 | from seewasm.arch.wasm.configuration import Configuration 2 | from graphviz import Digraph 3 | 4 | 5 | def visualize(Graph, filename="wasm_ICFG.gv"): 6 | entry_func = Configuration.get_entry() 7 | entry_func_index_name = Graph.wasmVM.get_signature(entry_func)[0] 8 | entry_bb = Graph.func_to_bbs[entry_func_index_name][0] 9 | assert entry_bb.endswith('_0'), f"entry_bb ({entry_bb}) not ends with 0" 10 | 11 | g = Digraph(filename, filename=filename) 12 | g.attr(rankdir="TB") 13 | 14 | # construct a set consisting of edges (nodeA, nodeB, edge_type) 15 | visited = set() 16 | edges_set = set() 17 | stack = list() 18 | stack.append(entry_bb) 19 | while stack: 20 | bb = stack.pop() 21 | visited.add(bb) 22 | if bb in Graph.bbs_graph: 23 | for edge_type, succ_bb in Graph.bbs_graph[bb].items(): 24 | if succ_bb not in visited: 25 | edges_set.add((bb, succ_bb, edge_type)) 26 | stack.append(succ_bb) 27 | elif (bb, succ_bb, edge_type) not in edges_set: 28 | edges_set.add((bb, succ_bb, edge_type)) 29 | 30 | with g.subgraph(name='global') as c: 31 | # construct the graph 32 | for edge in edges_set: 33 | node_from, node_to, _ = edge 34 | c.node(node_from) 35 | c.node(node_to) 36 | c.edge(node_from, node_to) 37 | 38 | print("Rendering disabled on the server.") 39 | g.render(filename, view=False) 40 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/vmstate.py: -------------------------------------------------------------------------------- 1 | # This file defines the `state` that will be passed within Wasm-SE 2 | from collections import defaultdict 3 | 4 | from seewasm.arch.wasm.configuration import Configuration 5 | from seewasm.arch.wasm.solver import SMTSolver 6 | from seewasm.arch.wasm.utils import (init_file_for_file_sys, 7 | readable_internal_func_name) 8 | from seewasm.engine.engine import VMstate 9 | from z3 import BitVecVal 10 | 11 | 12 | class WasmVMstate(VMstate): 13 | def __init__(self): 14 | # data structure: 15 | def local_default(): 16 | return BitVecVal(0, 32) 17 | self.symbolic_stack = [] 18 | self.symbolic_memory = {} 19 | self.local_var = defaultdict(local_default) 20 | self.globals = {} 21 | # instruction 22 | self.instr = "end" 23 | # current function name 24 | self.current_func_name = '' 25 | # current basic block's name, used in recursive process 26 | self.current_bb_name = '' 27 | # keep the operator and its speculated sign 28 | self.sign_mapping = defaultdict(bool) 29 | # context stack 30 | # whose element is 4-tuple: (func_name, stack, local, require_return) 31 | # TODO files buffer may need to maintained in context 32 | self.context_stack = [] 33 | 34 | self.args = "" 35 | 36 | # all items should be initialized by init_file_for_file_sys in utils 37 | self.file_sys = {} 38 | for fd in range(0, 3): 39 | self.file_sys[fd] = init_file_for_file_sys() 40 | self.file_sys[0]["name"] = "stdin" 41 | self.file_sys[0]["status"] = True 42 | self.file_sys[0]["flag"] = "r" 43 | self.file_sys[1]["name"] = "stdout" 44 | self.file_sys[1]["status"] = True 45 | self.file_sys[1]["flag"] = "w" 46 | self.file_sys[2]["name"] = "stderr" 47 | self.file_sys[2]["status"] = True 48 | self.file_sys[2]["flag"] = "w" 49 | 50 | # used by br_if instruction 51 | self.edge_type = '' 52 | # the corresponding solver 53 | self.solver = SMTSolver(Configuration.get_solver()) 54 | # the name of function that is called in call_indirect 55 | self.call_indirect_callee = '' 56 | 57 | def __str__(self): 58 | return f'''Current Func:\t{readable_internal_func_name(Configuration.get_func_index_to_func_name(), self.current_func_name)} 59 | Stack:\t\t{self.symbolic_stack} 60 | Local Var:\t{self.local_var} 61 | Global Var:\t{self.globals} 62 | Memory:\t\t{self.symbolic_memory} 63 | Constraints:\t{self.solver.assertions()}\n''' 64 | 65 | def details(self): 66 | raise NotImplementedError 67 | 68 | def __lt__(self, other): 69 | return False 70 | 71 | def __getstate__(self): 72 | return self.__dict__.copy() 73 | -------------------------------------------------------------------------------- /seewasm/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/core/__init__.py -------------------------------------------------------------------------------- /seewasm/core/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/core/__init__.pyc -------------------------------------------------------------------------------- /seewasm/core/basicblock.py: -------------------------------------------------------------------------------- 1 | class BasicBlock(object): 2 | """ 3 | The basic block in the CFG, consisting of instructions 4 | """ 5 | 6 | def __init__(self, start_offset=0x00, start_instr=None, 7 | name='block_default_name'): 8 | """ 9 | The properties of basic blocks 10 | 11 | Properties: 12 | start_offset: the `offset` of the first instruction 13 | start_instr: the first instruction of the current basic block 14 | name: the name of the basic block, whose naming style is "block_[func_index]_[start_offset]" 15 | end_offset: the `offset_end` of the last instruction 16 | end_instr: the last instruction 17 | 18 | Below are properties may be deprecated in the future 19 | states: not clear 20 | function_name: its corresponding function's name 21 | """ 22 | self.start_offset = start_offset 23 | self.start_instr = start_instr 24 | self.name = name 25 | self.end_offset = None 26 | self.end_instr = None 27 | self.instructions = list() 28 | 29 | # may be deprecated in the future 30 | self.states = [] 31 | self.function_name = "unknown" 32 | 33 | @property 34 | def size(self): 35 | return self.end_offset - self.start_offset 36 | 37 | def __str__(self): 38 | out = '' 39 | line = '' 40 | line = str(self.start_offset) + ': ' + str(self.name) + '\n' 41 | line += 'start_instr = ' + str(self.start_instr.name) + '\n' 42 | line += 'size = ' + str(self.size) + '\n' 43 | line += 'end_offset = ' + str(self.end_offset) + '\n' 44 | line += 'end_instr = ' + str(self.end_instr.name) + '\n' 45 | line += 'function_name = ' + str(self.function_name) + '\n' 46 | out += line + '\n\n' 47 | return out 48 | 49 | def instructions_details(self, format='hex'): 50 | out = '' 51 | line = '' 52 | for i in self.instructions: 53 | line = '%x: ' % i.offset 54 | if i.operand is not None and not i.xref: 55 | line += '%s' % str(i) 56 | elif isinstance(i.xref, list) and i.xref: 57 | line += '%s %s' % (i.name, i.xref) 58 | elif isinstance(i.xref, int) and i.xref: 59 | line += '%s %x' % (i.name, i.xref) 60 | elif i.operand_interpretation: 61 | line += i.operand_interpretation 62 | else: 63 | line += i.name + ' ' 64 | 65 | out += line + '\n' 66 | return out 67 | 68 | def instructions_ssa(self, format='hex'): 69 | out = '' 70 | line = '' 71 | for i in self.instructions: 72 | line = '%x: ' % i.offset 73 | if i.ssa: 74 | line += '' + i.ssa.format() 75 | else: 76 | line += '[NO_SSA] ' + i.name 77 | out += line + '\n' 78 | return out 79 | -------------------------------------------------------------------------------- /seewasm/core/edge.py: -------------------------------------------------------------------------------- 1 | EDGE_UNCONDITIONAL = 'unconditional' 2 | EDGE_CONDITIONAL_TRUE = 'conditional_true' 3 | EDGE_CONDITIONAL_FALSE = 'conditional_false' 4 | EDGE_FALLTHROUGH = 'fallthrough' 5 | EDGE_CALL = 'call' 6 | 7 | 8 | class Edge: 9 | """ 10 | The edges in the CFG, connecting basic blocks 11 | """ 12 | 13 | def __init__(self, node_from, node_to, edge_type=EDGE_UNCONDITIONAL, 14 | condition=None): 15 | """ 16 | Properties of edges in the CFG 17 | 18 | Properties: 19 | node_from: the 'name' of the basic block pointed from 20 | node_to: the 'name' of the basic block pointed to 21 | type: the type of the edge, including five types listed at the beginning of the current file 22 | 23 | Below are properties may be deprecated in the future 24 | condition: do not understand its actual meaning 25 | """ 26 | 27 | self.node_from = node_from 28 | self.node_to = node_to 29 | self.type = edge_type 30 | 31 | self.condition = condition 32 | 33 | def __str__(self): 34 | return str(self.as_dict()) 35 | 36 | def __eq__(self, other): 37 | return self.node_from == other.node_from and\ 38 | self.node_to == other.node_to and\ 39 | self.type == other.type and\ 40 | self.condition == other.condition 41 | 42 | def __hash__(self): 43 | return hash(('from', self.node_from, 44 | 'to', self.node_to, 45 | 'type', self.type, 46 | 'condition', self.condition)) 47 | 48 | def as_dict(self): 49 | return {'from': str(self.node_from), 'to': str(self.node_to), 50 | 'type': self.type, 'condition': self.condition} 51 | -------------------------------------------------------------------------------- /seewasm/core/function.py: -------------------------------------------------------------------------------- 1 | class Function(object): 2 | """ 3 | The function object of the given Wasm module 4 | """ 5 | 6 | def __init__(self, start_offset, start_instr=None, 7 | name='func_default_name', prefered_name=None): 8 | """ 9 | The properties of the functions of the given Wasm module 10 | 11 | Properties: 12 | start_offset: the start offset of the first instruction 13 | start_instr: the first instruction of the function 14 | name: the function's name, represented in '$funcX' or readable name (TODO will make them all to readable name in the future) 15 | prefered_name: the signature of the function, including type of arguments and return value 16 | size: the size of the function, the sum of all its composed instructions 17 | end_offset: the end_offset of its last basic block 18 | end_instr: the last instruction of the function 19 | basicblocks: the list of all composed basic blocks 20 | instructions: the list of all composed instructions 21 | """ 22 | self.start_offset = start_offset 23 | self.start_instr = start_instr 24 | self.name = name 25 | self.prefered_name = prefered_name if prefered_name else name 26 | self.size = 0 27 | self.end_offset = None 28 | self.end_instr = None 29 | self.basicblocks = list() 30 | self.instructions = list() 31 | 32 | def __str__(self): 33 | line = ('%x' % self.start_offset) + ': ' + str(self.name) + '\n' 34 | line += 'prefered_name: %s\n' % self.prefered_name 35 | line += 'start_offset = %x\n' % self.start_offset 36 | line += 'start_instr = ' + str(self.start_instr.name) + '\n' 37 | if self.size: 38 | line += 'size = ' + str(self.size) + '\n' 39 | if self.end_offset: 40 | line += 'end_offset = ' + str(self.end_offset) + '\n' 41 | if self.end_instr: 42 | line += 'end_instr = ' + str(self.end_instr.name) + '\n' 43 | line += 'lenght basicblocks: %s\n' % len(self.basicblocks) 44 | line += 'lenght instructions: %s\n' % len(self.instructions) 45 | line += '\n\n' 46 | return line 47 | -------------------------------------------------------------------------------- /seewasm/core/instruction.py: -------------------------------------------------------------------------------- 1 | class Instruction(object): 2 | """ 3 | The instruction object 4 | """ 5 | 6 | def __init__(self, opcode, name, 7 | operand_size, pops, pushes, fee, 8 | description, operand=None, 9 | operand_interpretation=None, offset=0, xref=None): 10 | """ 11 | The properties of instruction object 12 | 13 | Properties: 14 | opcode: the int value of the instruction 15 | offset: the offset of the instruction on function level 16 | name: the readable name of the instruction 17 | description: a brief description of the instruction 18 | operand_size: the size of its corresponding operand 19 | operand: Immediate operand if any specific interpretation of operand value, in bytes. The operand value for JUMP is xref 20 | operand_interpretation: the instruction and its operand in a readable way, same as the string in the wat file 21 | pops: how many elements will be popped from the stack 22 | pushes: how many elements will be pushed into the stack 23 | fee: not clear 24 | xref: the jump target of the current instruction 25 | ssa: not clear 26 | """ 27 | 28 | self.opcode = opcode 29 | self.opcode_size = 1 30 | self.offset = offset 31 | self.name = name 32 | self.description = description 33 | self.operand_size = operand_size 34 | self.operand = operand 35 | self.operand_interpretation = operand_interpretation 36 | self.pops = pops 37 | self.pushes = pushes 38 | self.fee = fee 39 | self.xref = xref 40 | self.ssa = None 41 | 42 | def __eq__(self, other): 43 | """ Instructions are equal if all features match """ 44 | return self.opcode == other.opcode and\ 45 | self.name == other.name and\ 46 | self.operand == other.operand and\ 47 | self.operand_size == other.operand_size and\ 48 | self.pops == other.pops and\ 49 | self.pushes == other.pushes and\ 50 | self.fee == other.fee and\ 51 | self.offset == other.offset and\ 52 | self.description == other.description 53 | 54 | def __simple_output_format(self, offset=True): 55 | output = self.name 56 | if self.has_operand: 57 | output += ' 0x%x' % int.from_bytes(self.operand, 58 | byteorder='big') 59 | 60 | if offset: 61 | return "%d %s" % (self.offset, output) 62 | else: 63 | return "%s" % output 64 | 65 | # def __repr__(self): 66 | # """ Entire representation of the instruction 67 | # output = 'Instruction(0x%x, %r, %d, %d, %d, %d, %r, %r, %r)' \ 68 | # % (self._opcode, self._name, self._operand_size, 69 | # self._pops, self._pushes, self._fee, 70 | # self._description, self._operand, self._offset)""" 71 | # return self.__simple_output_format() 72 | 73 | def __str__(self): 74 | """ String representation of the instruction """ 75 | return self.__simple_output_format(offset=False) 76 | 77 | @property 78 | def bytes(self): 79 | """ Encoded instruction """ 80 | byte = bytearray() 81 | byte.append(self.opcode) 82 | if self.operand: 83 | [byte.append(x) for x in self.operand] 84 | return "".join(map(chr, byte)) 85 | 86 | @property 87 | def offset_end(self): 88 | """ Location in the program (optional) """ 89 | return self.offset + self.size - 1 90 | 91 | @property 92 | def semantics(self): 93 | """ Canonical semantics """ 94 | return self.name 95 | 96 | @property 97 | def size(self): 98 | """ Size of the encoded instruction """ 99 | return self.opcode_size + self.operand_size 100 | 101 | @property 102 | def has_operand(self): 103 | """ True if the instruction uses an immediate operand """ 104 | return self.operand_size > 0 105 | 106 | @property 107 | def is_branch_conditional(self): 108 | """ Return list if the instruction is a jump """ 109 | raise NotImplementedError 110 | 111 | @property 112 | def is_branch_unconditional(self): 113 | """ Return list if the instruction is a jump """ 114 | raise NotImplementedError 115 | 116 | @property 117 | def is_branch(self): 118 | """ True if the instruction is a jump """ 119 | return self.is_branch_conditional or self.is_branch_unconditional 120 | 121 | @property 122 | def is_halt(self): 123 | """ Return list if the instruction is a basic block terminator """ 124 | raise NotImplementedError 125 | 126 | @property 127 | def is_terminator(self): 128 | """ True if the instruction is a basic block terminator """ 129 | raise NotImplementedError 130 | 131 | @property 132 | def have_xref(self): 133 | """ TODO """ 134 | raise NotImplementedError 135 | -------------------------------------------------------------------------------- /seewasm/core/utils.py: -------------------------------------------------------------------------------- 1 | from binascii import unhexlify 2 | 3 | 4 | def bytecode_to_bytes(bytecode): 5 | if str(bytecode).startswith("0x"): 6 | bytecode = bytecode[2:] 7 | 8 | try: 9 | # python > 2.7 10 | bytecode = bytes.fromhex(bytecode) 11 | except AttributeError: 12 | # python <= 2.7 13 | try: 14 | bytecode = bytecode.decode("hex") 15 | except TypeError: 16 | # last chance 17 | bytecode = unhexlify(bytecode) 18 | # already bytes or bytearray 19 | except TypeError: 20 | pass 21 | return bytecode 22 | 23 | 24 | def search_in_list_of_dict(string_to_search, target_list, key_dict): 25 | return list( 26 | filter( 27 | lambda elem: str(string_to_search) in str( 28 | elem[key_dict]), 29 | target_list)) 30 | -------------------------------------------------------------------------------- /seewasm/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/engine/__init__.py -------------------------------------------------------------------------------- /seewasm/engine/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/engine/__init__.pyc -------------------------------------------------------------------------------- /seewasm/engine/disassembler.py: -------------------------------------------------------------------------------- 1 | from seewasm.core.utils import bytecode_to_bytes 2 | 3 | 4 | class BytecodeEmptyException(Exception): 5 | """Exception raised when bytecode is None""" 6 | pass 7 | 8 | 9 | class Disassembler(object): 10 | """ Generic Disassembler class """ 11 | 12 | def __init__(self, asm, bytecode=None): 13 | self.bytecode = bytecode 14 | self.instructions = list() 15 | self.reverse_instructions = dict() 16 | self.asm = asm 17 | 18 | def attributes_reset(self): 19 | """Reset instructions class attributes """ 20 | self.instructions = list() 21 | self.reverse_instructions = dict() 22 | 23 | def disassemble_opcode(self, bytecode, offset=0): 24 | """ Generic method to disassemble one instruction """ 25 | raise NotImplementedError 26 | 27 | def disassemble(self, bytecode=None, offset=0, nature_offset=0, 28 | r_format='list'): 29 | """Generic method to disassemble bytecode 30 | 31 | :param bytecode: bytecode sequence 32 | :param offset: start offset 33 | :param r_format: output format ('list'/'text'/'reverse') 34 | :type bytecode: bytes, str 35 | :type offset: int 36 | :type r_format: list, str, dict 37 | :return: dissassembly result depending of r_format 38 | :rtype: list, str, dict 39 | """ 40 | # reinitialize class variable 41 | self.attributes_reset() 42 | 43 | self.bytecode = bytecode if bytecode else self.bytecode 44 | if not self.bytecode: 45 | raise BytecodeEmptyException() 46 | 47 | self.bytecode = bytecode_to_bytes(self.bytecode) 48 | 49 | while offset < len(self.bytecode): 50 | instr = self.disassemble_opcode( 51 | self.bytecode[offset:], 52 | offset, nature_offset) 53 | offset += instr.size 54 | nature_offset += 1 55 | self.instructions.append(instr) 56 | 57 | # fill reverse instructions 58 | self.reverse_instructions = {k: v for k, v in 59 | enumerate(self.instructions)} 60 | 61 | # return instructions 62 | if r_format == 'list': 63 | return self.instructions 64 | elif r_format == 'text': 65 | return '\n'.join(map(str, self.instructions)) 66 | elif r_format == 'reverse': 67 | return self.reverse_instructions 68 | -------------------------------------------------------------------------------- /seewasm/engine/emulator.py: -------------------------------------------------------------------------------- 1 | # ======================================= 2 | # # Emulator # 3 | # ======================================= 4 | 5 | 6 | class EmulatorEngine(object): 7 | 8 | def __init__(self, instructions): 9 | """ TODO """ 10 | raise NotImplementedError 11 | 12 | def emulate(self, state, depth=0): 13 | """ TODO """ 14 | raise NotImplementedError 15 | 16 | def emulate_one_instruction(self, instr, state, depth): 17 | """ TODO """ 18 | raise NotImplementedError 19 | -------------------------------------------------------------------------------- /seewasm/engine/engine.py: -------------------------------------------------------------------------------- 1 | class VMstate(object): 2 | 3 | def __init__(self, gas=1000000): 4 | """ TODO """ 5 | raise NotImplementedError 6 | 7 | def details(self): 8 | """ TODO """ 9 | raise NotImplementedError 10 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import json 2 | import glob 3 | import os 4 | import pytest 5 | import subprocess 6 | import sys 7 | 8 | testcase_dir = './test/' 9 | 10 | @pytest.mark.parametrize('wasm_path, entry', [ 11 | ('hello_world.wasm', ''), 12 | ('hello_world_go.wasm', '_start'), 13 | ('hello_world_rust.wasm', ''), 14 | ('test.wasm', ''), 15 | ('password.wasm', '') 16 | ]) 17 | 18 | def test_wasm_can_be_analyzed(wasm_path, entry): 19 | wasm_path = os.path.join(testcase_dir, wasm_path) 20 | cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info'] 21 | if entry != "": 22 | cmd.extend(['--entry', entry]) 23 | subprocess.run(cmd, timeout=60, check=True) 24 | 25 | def test_return_simulation(): 26 | wasm_path = './test/test_return.wasm' 27 | cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info'] 28 | subprocess.run(cmd, timeout=60, check=True) 29 | 30 | result_dir = glob.glob('./output/result/test_return_*') 31 | result_dir.sort() 32 | result_dir = result_dir[-1] 33 | state_path = glob.glob(f'{result_dir}/state*.json') 34 | assert len(state_path) == 1, 'should have only one state returning `1`' 35 | 36 | with open(state_path[0], 'r') as f: 37 | state = json.load(f) 38 | assert state['Return'] == "1", f'should return 1, got {state["Return"]}' 39 | 40 | def test_unreachable_simulation(): 41 | wasm_path = './test/test_unreachable.wasm' 42 | cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info'] 43 | subprocess.run(cmd, timeout=60, check=True) 44 | 45 | result_dir = glob.glob('./output/result/test_unreachable_*') 46 | result_dir.sort() 47 | result_dir = result_dir[-1] 48 | state_path = glob.glob(f'{result_dir}/state*.json') 49 | assert len(state_path) == 1, 'should have only one state output `null`' 50 | with open(state_path[0], 'r') as f: 51 | state = json.load(f) 52 | assert state['Solution'] == {}, f'should have no solution, got {state["Solution"]}' 53 | 54 | def test_c_sym_args(): 55 | wasm_path = './test/sym_c.wasm' 56 | cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '--sym_args', '1', '--source_type', 'c', '--entry', '__main_void', '-v', 'info'] 57 | subprocess.run(cmd, timeout=60, check=True) 58 | 59 | result_dir = glob.glob('./output/result/sym_c*') 60 | result_dir.sort() 61 | result_dir = result_dir[-1] 62 | state_path = glob.glob(f'{result_dir}/state*.json') 63 | assert len(state_path) == 3, 'should have three states output' 64 | for state in state_path: 65 | with open(state, 'r') as f: 66 | state = json.load(f) 67 | assert 'Solution' in state and 'sym_arg_1' in state['Solution'], f'no sym_arg_1 solution found in {state}' 68 | assert 'Return' in state, f'no Return found in {state}' 69 | assert 'Output' in state and len(state['Output']) == 2, f'no Output found in {state}' 70 | inp = state['Solution']["sym_arg_1"] 71 | analyzed_return = state['Return'] 72 | analyzed_stdout = state['Output'][0]['output'] 73 | expected_return_to_stdout = {"0": "a", "1": "b", "2": "c"} 74 | assert analyzed_return in expected_return_to_stdout, f'analyzed return value {analyzed_return} not found in expected_return_to_stdout' 75 | assert analyzed_stdout == expected_return_to_stdout[analyzed_return], f'output mismatched, got {analyzed_stdout}, expected {expected_return_to_stdout[analyzed_return]}' 76 | 77 | def test_password_sym_args(): 78 | wasm_path = './test/password.wasm' 79 | cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '--sym_args', '10', '--source_type', 'c', '--entry', '_start', '-v', 'info'] 80 | subprocess.run(cmd, timeout=60, check=True) 81 | 82 | result_dir = glob.glob('./output/result/password*') 83 | result_dir.sort() 84 | result_dir = result_dir[-1] 85 | state_path = glob.glob(f'{result_dir}/state*.json') 86 | assert len(state_path) == 6, 'should have six states output' 87 | for state in state_path: 88 | with open(state, 'r') as f: 89 | state = json.load(f) 90 | assert 'Solution' in state and 'sym_arg_1' in state['Solution'], f'no sym_arg_1 solution found in {state}' 91 | assert 'Output' in state and len(state['Output']) == 2, f'no Output found in {state}' 92 | inp = state['Solution']["sym_arg_1"] 93 | analyzed_stdout = state['Output'][0]['output'] 94 | if 'Return' in state: 95 | assert state['Return'] == "0", f'should return 0, got {state["Return"]}' 96 | assert inp == "hello", f'solved input mismatched, got {inp}' 97 | assert analyzed_stdout == "Password found!\n", f'output mismatched, got {analyzed_stdout}' 98 | else: 99 | assert 'Status' in state, f'no Status found in {state}' 100 | assert state['Status'] == "Exit with status code 1", f'should exit with status code 1, got {state["Status"]}' -------------------------------------------------------------------------------- /test/c/src/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) 4 | { 5 | printf("Hello, world!\n"); 6 | return 0; 7 | } -------------------------------------------------------------------------------- /test/c/src/sym.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int foo(char a){ 4 | if (a < 'a') { 5 | printf("a"); 6 | return 0; 7 | } 8 | else if (a < 'z') { 9 | printf("b"); 10 | return 1; 11 | } 12 | else { 13 | printf("c"); 14 | return 2; 15 | } 16 | } 17 | 18 | int main(int argc, char* argv[]){ 19 | return foo(argv[1][0]); 20 | } -------------------------------------------------------------------------------- /test/go/src/hello.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | println("Hello, world!") 5 | } -------------------------------------------------------------------------------- /test/hello_world.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/hello_world.wasm -------------------------------------------------------------------------------- /test/hello_world_go.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/hello_world_go.wasm -------------------------------------------------------------------------------- /test/hello_world_rust.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/hello_world_rust.wasm -------------------------------------------------------------------------------- /test/password.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/password.wasm -------------------------------------------------------------------------------- /test/rust/hello/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hello_rust" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | -------------------------------------------------------------------------------- /test/rust/hello/src/main.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("Hello, world!"); 3 | } 4 | -------------------------------------------------------------------------------- /test/sym_c.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/sym_c.wasm -------------------------------------------------------------------------------- /test/test.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/test.wasm -------------------------------------------------------------------------------- /test/test_linux.py: -------------------------------------------------------------------------------- 1 | import json 2 | import glob 3 | import os 4 | import pytest 5 | import subprocess 6 | import sys 7 | 8 | def test_hello_c_to_wasm(): 9 | source_path = "./test/c/src/hello.c" 10 | cmd = ["clang", "-g", source_path, "-o", "hello_c.wasm"] 11 | subprocess.run(cmd, timeout=60, check=True) 12 | assert os.path.exists("hello_c.wasm"), "hello_c.wasm not found. Compilation failed." 13 | cmd = [sys.executable, 'launcher.py', '-f', "hello_c.wasm", '-s', '-v', 'info', '--source_type', 'c', '--entry', '__main_void'] 14 | subprocess.run(cmd, timeout=60, check=True) 15 | os.remove("hello_c.wasm") 16 | os.remove("hello_c.wat") 17 | 18 | result_dir = glob.glob('./output/result/hello_c*') 19 | result_dir.sort() 20 | result_dir = result_dir[-1] 21 | state_path = glob.glob(f'{result_dir}/state*.json') 22 | assert len(state_path) == 1, 'should have only one state output' 23 | with open(state_path[0], 'r') as f: 24 | state = json.load(f) 25 | assert state['Output'][0] == { 26 | "name": "stdout", 27 | "output": "Hello, world!\n" 28 | }, f'output mismatched, got {state["Output"]}' 29 | 30 | @pytest.mark.parametrize('algo', ['dfs', 'bfs', 'random', 'interval']) 31 | def test_sym_c_to_wasm(algo): 32 | source_path = "./test/c/src/sym.c" 33 | cmd = ["clang", "-g", source_path, "-o", "sym_c.wasm"] 34 | subprocess.run(cmd, timeout=60, check=True) 35 | assert os.path.exists("sym_c.wasm"), "sym_c.wasm not found. Compilation failed." 36 | cmd = [sys.executable, 'launcher.py', '-f', "sym_c.wasm", '-s', '--sym_args', '1', '-v', 'info', '--source_type', 'c', '--entry', '__main_void', '--search', algo] 37 | subprocess.run(cmd, timeout=60, check=True) 38 | 39 | result_dir = glob.glob('./output/result/sym_c*') 40 | result_dir.sort() 41 | result_dir = result_dir[-1] 42 | state_path = glob.glob(f'{result_dir}/state*.json') 43 | assert len(state_path) == 3, 'should have three states output' 44 | for state in state_path: 45 | with open(state, 'r') as f: 46 | state = json.load(f) 47 | assert 'Solution' in state and 'sym_arg_1' in state['Solution'], f'no sym_arg_1 solution found in {state}' 48 | assert 'Return' in state, f'no Return found in {state}' 49 | assert 'Output' in state and len(state['Output']) == 2, f'no Output found in {state}' 50 | inp = state['Solution']["sym_arg_1"] 51 | analyzed_return = int(state['Return']) 52 | analyzed_stdout = state['Output'][0]['output'] 53 | if state['Return'] != 1: # only test the printable input, should be a char in a~z 54 | continue 55 | # call wasmtime with inp 56 | cmd = ["wasmtime", "sym_c.wasm", inp] 57 | p = subprocess.run(cmd, timeout=60, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 58 | # compare results 59 | assert p.returncode == analyzed_return, f'analyzed return code {analyzed_return}, wasmtime returned {p.returncode}, input {inp}, wasmtime stderr {p.stderr.decode("utf-8")}' 60 | assert p.stdout.decode('utf-8') == analyzed_stdout, f'output mismatched, analyzed {analyzed_stdout}, wasmtime returned {p.stdout.decode("utf-8")}' 61 | 62 | os.remove("sym_c.wasm") 63 | os.remove("sym_c.wat") 64 | 65 | def test_hello_rust_to_wasm(): 66 | source_dir = "./test/rust/hello" 67 | expected_wasm_path = "./test/rust/hello/target/wasm32-wasi/debug/hello_rust.wasm" 68 | cmd = ["cargo", "build", "--target", "wasm32-wasi"] 69 | subprocess.run(cmd, cwd=source_dir, timeout=60, check=True) 70 | assert os.path.exists(expected_wasm_path), "hello_rust.wasm not found. Compilation failed." 71 | cmd = [sys.executable, 'launcher.py', '-f', expected_wasm_path, '-s', '-v', 'info', '--source_type', 'rust', '--entry', '__main_void'] 72 | subprocess.run(cmd, timeout=60, check=True) 73 | cmd = ["rm", "-rf", "./test/rust/hello/target"] 74 | subprocess.run(cmd, timeout=60, check=True) 75 | 76 | result_dir = glob.glob('./output/result/hello_rust*') 77 | result_dir.sort() 78 | result_dir = result_dir[-1] 79 | state_path = glob.glob(f'{result_dir}/state*.json') 80 | assert len(state_path) == 1, 'should have only one state output' 81 | with open(state_path[0], 'r') as f: 82 | state = json.load(f) 83 | assert state['Output'][0] == { 84 | "name": "stdout", 85 | "output": "Hello, world!\n" 86 | }, f'output mismatched, got {state["Output"]}' 87 | 88 | def test_hello_go_to_wasm(): 89 | source_path = "./test/go/src/hello.go" 90 | cmd = ["tinygo", "build", "-target=wasi", "-o", "hello_go.wasm", source_path] 91 | subprocess.run(cmd, timeout=60, check=True) 92 | assert os.path.exists("hello_go.wasm"), "hello_go.wasm not found. Compilation failed." 93 | cmd = [sys.executable, 'launcher.py', '-f', "hello_go.wasm", '-s', '-v', 'info', '--source_type', 'go', '--entry', 'runtime.run$1'] 94 | subprocess.run(cmd, timeout=60, check=True) 95 | os.remove("hello_go.wasm") 96 | os.remove("hello_go.wat") 97 | 98 | result_dir = glob.glob('./output/result/hello_go*') 99 | result_dir.sort() 100 | result_dir = result_dir[-1] 101 | state_path = glob.glob(f'{result_dir}/state*.json') 102 | assert len(state_path) == 1, 'should have only one state output' 103 | with open(state_path[0], 'r') as f: 104 | state = json.load(f) 105 | assert 'Return' in state, f'no Return found in {state}' 106 | assert state['Return'] == "0", f'should return 0, got {state["Return"]}' 107 | assert state['Output'][0] == { 108 | "name": "stdout", 109 | "output": "Hello, world!\n" 110 | }, f'output mismatched, got {state["Output"]}' 111 | 112 | def test_visualize_graph(): 113 | wasm_path = './test/hello_world.wasm' 114 | cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info', '--visualize'] 115 | subprocess.run(cmd, timeout=30, check=True) 116 | result_dir = glob.glob('./output/visualized_graph/hello_world*.pdf') 117 | assert len(result_dir) == 1, 'more than one matching results, do you have multiple `hello_world*` cases?' 118 | -------------------------------------------------------------------------------- /test/test_return.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/test_return.wasm -------------------------------------------------------------------------------- /test/test_unreachable.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/test_unreachable.wasm -------------------------------------------------------------------------------- /wasm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | __version__ = '1.2' 4 | 5 | from .decode import ( 6 | decode_bytecode, 7 | decode_module, 8 | ) 9 | 10 | from .formatter import ( 11 | format_function, 12 | format_instruction, 13 | format_lang_type, 14 | format_mutability, 15 | ) 16 | 17 | from .modtypes import ( 18 | ModuleHeader, 19 | FunctionImportEntryData, 20 | ResizableLimits, 21 | TableType, 22 | MemoryType, 23 | GlobalType, 24 | ImportEntry, 25 | ImportSection, 26 | FuncType, 27 | TypeSection, 28 | FunctionSection, 29 | TableSection, 30 | MemorySection, 31 | InitExpr, 32 | GlobalEntry, 33 | GlobalSection, 34 | ExportEntry, 35 | ExportSection, 36 | StartSection, 37 | ElementSegment, 38 | ElementSection, 39 | LocalEntry, 40 | FunctionBody, 41 | CodeSection, 42 | DataSegment, 43 | DataSection, 44 | Naming, 45 | NameMap, 46 | LocalNames, 47 | LocalNameMap, 48 | NameSubSection, 49 | Section, 50 | ) 51 | 52 | from .immtypes import ( 53 | BlockImm, 54 | BranchImm, 55 | BranchTableImm, 56 | CallImm, 57 | CallIndirectImm, 58 | LocalVarXsImm, 59 | GlobalVarXsImm, 60 | MemoryImm, 61 | CurGrowMemImm, 62 | I32ConstImm, 63 | I64ConstImm, 64 | F32ConstImm, 65 | F64ConstImm, 66 | ) 67 | 68 | from .opcodes import ( 69 | Opcode, 70 | INSN_ENTER_BLOCK, 71 | INSN_LEAVE_BLOCK, 72 | INSN_BRANCH, 73 | INSN_NO_FLOW, 74 | ) 75 | 76 | for cur_op in opcodes.OPCODES: 77 | globals()[ 78 | 'OP_' + cur_op.mnemonic.upper().replace('.', '_').replace('/', '_') 79 | ] = cur_op.id 80 | 81 | from .wasmtypes import ( 82 | UInt8Field, 83 | UInt16Field, 84 | UInt32Field, 85 | UInt64Field, 86 | VarUInt1Field, 87 | VarUInt7Field, 88 | VarUInt32Field, 89 | VarInt7Field, 90 | VarInt32Field, 91 | VarInt64Field, 92 | ElementTypeField, 93 | ValueTypeField, 94 | ExternalKindField, 95 | BlockTypeField, 96 | SEC_UNK, 97 | SEC_TYPE, 98 | SEC_IMPORT, 99 | SEC_FUNCTION, 100 | SEC_TABLE, 101 | SEC_MEMORY, 102 | SEC_GLOBAL, 103 | SEC_EXPORT, 104 | SEC_START, 105 | SEC_ELEMENT, 106 | SEC_CODE, 107 | SEC_DATA, 108 | SEC_DATACOUNT, 109 | SEC_NAME, 110 | LANG_TYPE_I32, 111 | LANG_TYPE_I64, 112 | LANG_TYPE_F32, 113 | LANG_TYPE_F64, 114 | LANG_TYPE_ANYFUNC, 115 | LANG_TYPE_FUNC, 116 | LANG_TYPE_EMPTY, 117 | VAL_TYPE_I32, 118 | VAL_TYPE_I64, 119 | VAL_TYPE_F32, 120 | VAL_TYPE_F64, 121 | NAME_SUBSEC_FUNCTION, 122 | NAME_SUBSEC_LOCAL, 123 | IMMUTABLE, 124 | MUTABLE, 125 | ) 126 | -------------------------------------------------------------------------------- /wasm/__main__.py: -------------------------------------------------------------------------------- 1 | """Testing & debug stuff.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | import argparse 5 | import sys 6 | 7 | from .formatter import format_function 8 | from .modtypes import SEC_CODE, SEC_TYPE, SEC_FUNCTION, Section 9 | from .decode import decode_module 10 | 11 | 12 | def dump(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('wasm_file', type=str) 15 | parser.add_argument('--disas', action='store_true', help="Disassemble code") 16 | args = parser.parse_args() 17 | 18 | try: 19 | with open(args.wasm_file, 'rb') as raw: 20 | raw = raw.read() 21 | except IOError as exc: 22 | print("[-] Can't open input file: " + str(exc), file=sys.stderr) 23 | return 24 | 25 | # Parse & print header. 26 | mod_iter = iter(decode_module(raw, decode_name_subsections=False)) 27 | hdr, hdr_data = next(mod_iter) 28 | print(hdr.to_string(hdr_data)) 29 | 30 | # Parse & print other sections. 31 | code_sec = None 32 | type_sec = None 33 | func_sec = None 34 | for cur_sec, cur_sec_data in mod_iter: 35 | print(cur_sec.to_string(cur_sec_data)) 36 | if type(cur_sec) == Section: 37 | if cur_sec_data.id == SEC_CODE: 38 | code_sec = cur_sec_data.payload 39 | elif cur_sec_data.id == SEC_TYPE: 40 | type_sec = cur_sec_data.payload 41 | elif cur_sec_data.id == SEC_FUNCTION: 42 | func_sec = cur_sec_data.payload 43 | 44 | # If ordered to disassemble, do so. 45 | # TODO: We might want to make use of debug names, if available. 46 | if args.disas and code_sec is not None: 47 | for i, func_body in enumerate(code_sec.bodies): 48 | print('{x} sub_{id:04X} {x}'.format(x='=' * 35, id=i)) 49 | 50 | # If we have type info, use it. 51 | func_type = type_sec.entries[func_sec.types[i]] if ( 52 | None not in (type_sec, func_sec) 53 | ) else None 54 | 55 | print() 56 | print('\n'.join(format_function(func_body, func_type))) 57 | print() 58 | -------------------------------------------------------------------------------- /wasm/compat.py: -------------------------------------------------------------------------------- 1 | """Defines compatibility quirks for Python 2.7.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | import sys 5 | import functools 6 | import logging 7 | import warnings 8 | 9 | 10 | def add_metaclass(metaclass): 11 | """ 12 | Class decorator for creating a class with a metaclass. 13 | Borrowed from `six` module. 14 | """ 15 | @functools.wraps(metaclass) 16 | def wrapper(cls): 17 | orig_vars = cls.__dict__.copy() 18 | slots = orig_vars.get('__slots__') 19 | if slots is not None: 20 | if isinstance(slots, str): 21 | slots = [slots] 22 | for slots_var in slots: 23 | orig_vars.pop(slots_var) 24 | orig_vars.pop('__dict__', None) 25 | orig_vars.pop('__weakref__', None) 26 | return metaclass(cls.__name__, cls.__bases__, orig_vars) 27 | return wrapper 28 | 29 | 30 | def indent(text, prefix, predicate=None): 31 | """Adds 'prefix' to the beginning of selected lines in 'text'. 32 | 33 | If 'predicate' is provided, 'prefix' will only be added to the lines 34 | where 'predicate(line)' is True. If 'predicate' is not provided, 35 | it will default to adding 'prefix' to all non-empty lines that do not 36 | consist solely of whitespace characters. 37 | 38 | Borrowed from Py3 `textwrap` module. 39 | """ 40 | if predicate is None: 41 | def predicate(line): 42 | return line.strip() 43 | 44 | def prefixed_lines(): 45 | for line in text.splitlines(True): 46 | yield (prefix + line if predicate(line) else line) 47 | return ''.join(prefixed_lines()) 48 | 49 | 50 | def deprecated_func(func): 51 | """Deprecates a function, printing a warning on the first usage.""" 52 | 53 | # We use a mutable container here to work around Py2's lack of 54 | # the `nonlocal` keyword. 55 | first_usage = [True] 56 | 57 | @functools.wraps(func) 58 | def wrapper(*args, **kwargs): 59 | if first_usage[0]: 60 | warnings.warn( 61 | "Call to deprecated function {}.".format(func.__name__), 62 | DeprecationWarning, 63 | ) 64 | first_usage[0] = False 65 | return func(*args, **kwargs) 66 | 67 | return wrapper 68 | 69 | 70 | if sys.version_info[0] >= 3: 71 | def byte2int(x): 72 | return x 73 | 74 | elif sys.version_info[0] == 2: 75 | def byte2int(x): 76 | return ord(x) if type(x) == str else x 77 | 78 | else: 79 | raise Exception("Unsupported Python version") 80 | 81 | 82 | -------------------------------------------------------------------------------- /wasm/decode.py: -------------------------------------------------------------------------------- 1 | """Provides functions for decoding WASM modules and bytecode.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from collections import namedtuple 5 | from .modtypes import ModuleHeader, Section, SEC_UNK, SEC_NAME, NameSubSection 6 | from .opcodes import OPCODE_MAP 7 | from .compat import byte2int 8 | 9 | 10 | Instruction = namedtuple('Instruction', 'op imm len') 11 | ModuleFragment = namedtuple('ModuleFragment', 'type data') 12 | 13 | 14 | def decode_bytecode(bytecode): 15 | """Decodes raw bytecode, yielding `Instruction`s.""" 16 | bytecode_wnd = memoryview(bytecode) 17 | while bytecode_wnd: 18 | opcode_id = byte2int(bytecode_wnd[0]) 19 | opcode = OPCODE_MAP[opcode_id] 20 | 21 | if opcode.imm_struct is not None: 22 | offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:]) 23 | else: 24 | imm = None 25 | offs = 0 26 | 27 | insn_len = 1 + offs 28 | yield Instruction(opcode, imm, insn_len) 29 | bytecode_wnd = bytecode_wnd[insn_len:] 30 | 31 | 32 | def decode_module(module, decode_name_subsections=False): 33 | """Decodes raw WASM modules, yielding `ModuleFragment`s.""" 34 | module_wnd = memoryview(module) 35 | 36 | # Read & yield module header. 37 | hdr = ModuleHeader() 38 | hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd) 39 | yield ModuleFragment(hdr, hdr_data) 40 | module_wnd = module_wnd[hdr_len:] 41 | 42 | # Read & yield sections. 43 | while module_wnd: 44 | sec = Section() 45 | sec_len, sec_data, _ = sec.from_raw(None, module_wnd) 46 | 47 | # If requested, decode name subsections when encountered. 48 | if ( 49 | decode_name_subsections and 50 | sec_data.id == SEC_UNK and 51 | sec_data.name == SEC_NAME 52 | ): 53 | sec_wnd = sec_data.payload 54 | while sec_wnd: 55 | subsec = NameSubSection() 56 | subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd) 57 | yield ModuleFragment(subsec, subsec_data) 58 | sec_wnd = sec_wnd[subsec_len:] 59 | else: 60 | yield ModuleFragment(sec, sec_data) 61 | 62 | module_wnd = module_wnd[sec_len:] 63 | -------------------------------------------------------------------------------- /wasm/formatter.py: -------------------------------------------------------------------------------- 1 | """Defines functions converting raw instructions into textual form.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | import itertools 5 | 6 | from .opcodes import INSN_LEAVE_BLOCK, INSN_ENTER_BLOCK 7 | from .decode import decode_bytecode 8 | from .wasmtypes import VAL_TYPE_I32, VAL_TYPE_I64, VAL_TYPE_F32, VAL_TYPE_F64, MUTABLE, IMMUTABLE 9 | 10 | 11 | def format_instruction(insn): 12 | """ 13 | Takes a raw `Instruction` and translates it into a human readable text 14 | representation. As of writing, the text representation for WASM is not yet 15 | standardized, so we just emit some generic format. 16 | """ 17 | text = insn.op.mnemonic 18 | 19 | if not insn.imm: 20 | return text 21 | 22 | return text + ' ' + ', '.join([ 23 | getattr(insn.op.imm_struct, x.name).to_string( 24 | getattr(insn.imm, x.name) 25 | ) 26 | for x in insn.op.imm_struct._meta.fields 27 | ]) 28 | 29 | _mutability_str_mapping = { 30 | MUTABLE: "mut", 31 | IMMUTABLE: "" 32 | } 33 | 34 | def format_mutability(mutability): 35 | """Takes a value type `int`, returning its string representation.""" 36 | try: 37 | return _mutability_str_mapping[mutability] 38 | except KeyError: 39 | raise ValueError('Bad value for value type ({})'.format(mutability)) 40 | 41 | _lang_type_str_mapping = { 42 | VAL_TYPE_I32: 'i32', 43 | VAL_TYPE_I64: 'i64', 44 | VAL_TYPE_F32: 'f32', 45 | VAL_TYPE_F64: 'f64', 46 | } 47 | 48 | 49 | def format_lang_type(lang_type): 50 | """Takes a value type `int`, returning its string representation.""" 51 | try: 52 | return _lang_type_str_mapping[lang_type] 53 | except KeyError: 54 | raise ValueError('Bad value for value type ({})'.format(lang_type)) 55 | 56 | 57 | def format_function( 58 | func_body, 59 | func_type=None, 60 | indent=2, 61 | format_locals=True, 62 | ): 63 | """ 64 | Takes a `FunctionBody` and optionally a `FunctionType`, yielding the string 65 | representation of the function line by line. The function type is required 66 | for formatting function parameter and return value information. 67 | """ 68 | if func_type is None: 69 | yield 'func' 70 | else: 71 | param_section = ' (param {})'.format(' '.join( 72 | map(format_lang_type, func_type.param_types) 73 | )) if func_type.param_types else '' 74 | result_section = ' (result {})'.format( 75 | format_lang_type(func_type.return_type) 76 | ) if func_type.return_type else '' 77 | yield 'func' + param_section + result_section 78 | 79 | if format_locals and func_body.locals: 80 | yield '(locals {})'.format(' '.join(itertools.chain.from_iterable( 81 | itertools.repeat(format_lang_type(x.type), x.count) 82 | for x in func_body.locals 83 | ))) 84 | 85 | level = 1 86 | for cur_insn in decode_bytecode(func_body.code): 87 | if cur_insn.op.flags & INSN_LEAVE_BLOCK: 88 | level -= 1 89 | yield ' ' * (level * indent) + format_instruction(cur_insn) 90 | if cur_insn.op.flags & INSN_ENTER_BLOCK: 91 | level += 1 92 | -------------------------------------------------------------------------------- /wasm/immtypes.py: -------------------------------------------------------------------------------- 1 | """Defines immediate types for WASM bytecode instructions.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .wasmtypes import * 5 | from .types import Structure, RepeatField 6 | 7 | 8 | class BlockImm(Structure): 9 | sig = BlockTypeField() 10 | 11 | 12 | class BranchImm(Structure): 13 | relative_depth = VarUInt32Field() 14 | 15 | 16 | class BranchTableImm(Structure): 17 | target_count = VarUInt32Field() 18 | target_table = RepeatField(VarUInt32Field(), lambda x: x.target_count) 19 | default_target = VarUInt32Field() 20 | 21 | 22 | class CallImm(Structure): 23 | function_index = VarUInt32Field() 24 | 25 | 26 | class CallIndirectImm(Structure): 27 | type_index = VarUInt32Field() 28 | reserved = VarUInt1Field() 29 | 30 | 31 | class LocalVarXsImm(Structure): 32 | local_index = VarUInt32Field() 33 | 34 | 35 | class GlobalVarXsImm(Structure): 36 | global_index = VarUInt32Field() 37 | 38 | 39 | class MemoryImm(Structure): 40 | flags = VarUInt32Field() 41 | offset = VarUInt32Field() 42 | 43 | 44 | class CurGrowMemImm(Structure): 45 | reserved = VarUInt1Field() 46 | 47 | 48 | class I32ConstImm(Structure): 49 | value = VarInt32Field() 50 | 51 | 52 | class I64ConstImm(Structure): 53 | value = VarInt64Field() 54 | 55 | 56 | class F32ConstImm(Structure): 57 | value = UInt32Field() 58 | 59 | 60 | class F64ConstImm(Structure): 61 | value = UInt64Field() 62 | -------------------------------------------------------------------------------- /wasm/modtypes.py: -------------------------------------------------------------------------------- 1 | """Defines data structures used in WASM (binary) modules.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .wasmtypes import * 5 | from .opcodes import OP_END 6 | from .types import ( 7 | Structure, CondField, RepeatField, 8 | ChoiceField, WasmField, ConstField, BytesField, 9 | ) 10 | 11 | 12 | class ModuleHeader(Structure): 13 | magic = UInt32Field() 14 | version = UInt32Field() 15 | 16 | 17 | class FunctionImportEntryData(Structure): 18 | type = VarUInt32Field() 19 | 20 | 21 | class ResizableLimits(Structure): 22 | flags = VarUInt32Field() 23 | initial = VarUInt32Field() 24 | maximum = CondField(VarUInt32Field(), lambda x: x.flags & 1) 25 | 26 | 27 | class TableType(Structure): 28 | element_type = ElementTypeField() 29 | limits = ResizableLimits() 30 | 31 | 32 | class MemoryType(Structure): 33 | limits = ResizableLimits() 34 | 35 | 36 | class GlobalType(Structure): 37 | content_type = ValueTypeField() 38 | mutability = VarUInt1Field() 39 | 40 | 41 | class ImportEntry(Structure): 42 | module_len = VarUInt32Field() 43 | module_str = BytesField(lambda x: x.module_len, is_str=True) 44 | field_len = VarUInt32Field() 45 | field_str = BytesField(lambda x: x.field_len, is_str=True) 46 | kind = ExternalKindField() 47 | type = ChoiceField({ 48 | 0: FunctionImportEntryData(), 49 | 1: TableType(), 50 | 2: MemoryType(), 51 | 3: GlobalType(), 52 | }, lambda x: x.kind) 53 | 54 | 55 | class ImportSection(Structure): 56 | count = VarUInt32Field() 57 | entries = RepeatField(ImportEntry(), lambda x: x.count) 58 | 59 | 60 | class FuncType(Structure): 61 | form = VarInt7Field() 62 | param_count = VarUInt32Field() 63 | param_types = RepeatField(ValueTypeField(), lambda x: x.param_count) 64 | return_count = VarUInt1Field() 65 | return_type = CondField(ValueTypeField(), lambda x: bool(x.return_count)) 66 | 67 | 68 | class TypeSection(Structure): 69 | count = VarUInt32Field() 70 | entries = RepeatField(FuncType(), lambda x: x.count) 71 | 72 | 73 | class FunctionSection(Structure): 74 | count = VarUInt32Field() 75 | types = RepeatField(VarUInt32Field(), lambda x: x.count) 76 | 77 | 78 | class TableSection(Structure): 79 | count = VarUInt32Field() 80 | entries = RepeatField(TableType(), lambda x: x.count) 81 | 82 | 83 | class MemorySection(Structure): 84 | count = VarUInt32Field() 85 | entries = RepeatField(MemoryType(), lambda x: x.count) 86 | 87 | 88 | class InitExpr(WasmField): 89 | def from_raw(self, struct, raw): 90 | from .decode import decode_bytecode 91 | 92 | offs = 0 93 | instrs = [] 94 | for cur_insn in decode_bytecode(raw): 95 | offs += cur_insn.len 96 | instrs.append(cur_insn) 97 | if cur_insn.op.id == OP_END: 98 | break 99 | 100 | return offs, instrs, self 101 | 102 | 103 | class GlobalEntry(Structure): 104 | type = GlobalType() 105 | init = InitExpr() 106 | 107 | 108 | class GlobalSection(Structure): 109 | count = VarUInt32Field() 110 | globals = RepeatField(GlobalEntry(), lambda x: x.count) 111 | 112 | 113 | class ExportEntry(Structure): 114 | field_len = VarUInt32Field() 115 | field_str = BytesField(lambda x: x.field_len, is_str=True) 116 | kind = ExternalKindField() 117 | index = VarUInt32Field() 118 | 119 | 120 | class ExportSection(Structure): 121 | count = VarUInt32Field() 122 | entries = RepeatField(ExportEntry(), lambda x: x.count) 123 | 124 | 125 | class StartSection(Structure): 126 | index = VarUInt32Field() 127 | 128 | 129 | class ElementSegment(Structure): 130 | index = VarUInt32Field() 131 | offset = InitExpr() 132 | num_elem = VarUInt32Field() 133 | elems = RepeatField(VarUInt32Field(), lambda x: x.num_elem) 134 | 135 | 136 | class ElementSection(Structure): 137 | count = VarUInt32Field() 138 | entries = RepeatField(ElementSegment(), lambda x: x.count) 139 | 140 | 141 | class LocalEntry(Structure): 142 | count = VarUInt32Field() 143 | type = ValueTypeField() 144 | 145 | 146 | class FunctionBody(Structure): 147 | body_size = VarUInt32Field() 148 | local_count = VarUInt32Field() 149 | locals = RepeatField( 150 | LocalEntry(), 151 | lambda x: x.local_count, 152 | ) 153 | code = BytesField( 154 | lambda x: ( 155 | x.body_size - 156 | x.get_decoder_meta()['lengths']['local_count'] - 157 | x.get_decoder_meta()['lengths']['locals'] 158 | ) 159 | ) 160 | 161 | 162 | class CodeSection(Structure): 163 | count = VarUInt32Field() 164 | bodies = RepeatField(FunctionBody(), lambda x: x.count) 165 | 166 | 167 | class DataSegment(Structure): 168 | index = VarUInt32Field() 169 | offset = InitExpr() 170 | size = VarUInt32Field() 171 | data = BytesField(lambda x: x.size) 172 | 173 | 174 | class DataSection(Structure): 175 | count = VarUInt32Field() 176 | entries = RepeatField(DataSegment(), lambda x: x.count) 177 | 178 | class DataCountSection(Structure): 179 | count = VarUInt32Field() 180 | 181 | class Naming(Structure): 182 | index = VarUInt32Field() 183 | name_len = VarUInt32Field() 184 | name_str = BytesField(lambda x: x.name_len, is_str=True) 185 | 186 | 187 | class NameMap(Structure): 188 | count = VarUInt32Field() 189 | names = RepeatField(Naming(), lambda x: x.count) 190 | 191 | 192 | class LocalNames(Structure): 193 | index = VarUInt32Field() 194 | local_map = NameMap() 195 | 196 | 197 | class LocalNameMap(Structure): 198 | count = VarUInt32Field() 199 | funcs = RepeatField(LocalNames, lambda x: x.count) 200 | 201 | 202 | class NameSubSection(Structure): 203 | name_type = VarUInt7Field() 204 | payload_len = VarUInt32Field() 205 | payload = ChoiceField({ 206 | NAME_SUBSEC_FUNCTION: NameMap(), 207 | NAME_SUBSEC_LOCAL: LocalNameMap(), 208 | }, lambda x: x.name_type) 209 | 210 | 211 | class Section(Structure): 212 | id = VarUInt7Field() 213 | payload_len = VarUInt32Field() 214 | name_len = CondField( 215 | VarUInt32Field(), 216 | lambda x: x.id == 0, 217 | ) 218 | name = CondField( 219 | BytesField(lambda x: x.name_len, is_str=True), 220 | lambda x: x.id == 0, 221 | ) 222 | 223 | payload = ChoiceField({ 224 | SEC_UNK: BytesField(lambda x: ( 225 | x.payload_len - 226 | x.get_decoder_meta()['lengths']['name'] - 227 | x.get_decoder_meta()['lengths']['name_len'] 228 | )), 229 | SEC_TYPE: TypeSection(), 230 | SEC_IMPORT: ImportSection(), 231 | SEC_FUNCTION: FunctionSection(), 232 | SEC_TABLE: TableSection(), 233 | SEC_MEMORY: MemorySection(), 234 | SEC_GLOBAL: GlobalSection(), 235 | SEC_EXPORT: ExportSection(), 236 | SEC_START: StartSection(), 237 | SEC_ELEMENT: ElementSection(), 238 | SEC_CODE: CodeSection(), 239 | SEC_DATA: DataSection(), 240 | SEC_DATACOUNT: DataCountSection(), 241 | }, lambda x: x.id) 242 | 243 | overhang = BytesField(lambda x: max(0, ( 244 | x.payload_len - 245 | x.get_decoder_meta()['lengths']['name'] - 246 | x.get_decoder_meta()['lengths']['name_len'] - 247 | x.get_decoder_meta()['lengths']['payload'] 248 | ))) 249 | -------------------------------------------------------------------------------- /wasm/opcodes.py: -------------------------------------------------------------------------------- 1 | """Defines mappings of opcodes to their info structures.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from collections import namedtuple 5 | from .immtypes import * 6 | 7 | 8 | Opcode = namedtuple('Opcode', 'id mnemonic imm_struct flags') 9 | 10 | # Flags describing generic characteristics of instructions 11 | INSN_ENTER_BLOCK = 1 << 0 12 | INSN_LEAVE_BLOCK = 1 << 1 13 | INSN_BRANCH = 1 << 2 14 | INSN_NO_FLOW = 1 << 3 # does not pass control to next insn 15 | 16 | 17 | OPCODES = [ 18 | Opcode(0x00, 'unreachable', None, INSN_NO_FLOW), 19 | Opcode(0x01, 'nop', None, 0), 20 | Opcode(0x02, 'block', BlockImm(), INSN_ENTER_BLOCK), 21 | Opcode(0x03, 'loop', BlockImm(), INSN_ENTER_BLOCK), 22 | Opcode(0x04, 'if', BlockImm(), INSN_ENTER_BLOCK), 23 | Opcode(0x05, 'else', None, INSN_ENTER_BLOCK | INSN_LEAVE_BLOCK), 24 | Opcode(0x0b, 'end', None, INSN_LEAVE_BLOCK), 25 | Opcode(0x0c, 'br', BranchImm(), INSN_BRANCH), 26 | Opcode(0x0d, 'br_if', BranchImm(), INSN_BRANCH), 27 | Opcode(0x0e, 'br_table', BranchTableImm(), INSN_BRANCH), 28 | Opcode(0x0f, 'return', None, INSN_NO_FLOW), 29 | 30 | Opcode(0x10, 'call', CallImm(), INSN_BRANCH), 31 | Opcode(0x11, 'call_indirect', CallIndirectImm(), INSN_BRANCH), 32 | 33 | Opcode(0x1a, 'drop', None, 0), 34 | Opcode(0x1b, 'select', None, 0), 35 | 36 | Opcode(0x20, 'get_local', LocalVarXsImm(), 0), 37 | Opcode(0x21, 'set_local', LocalVarXsImm(), 0), 38 | Opcode(0x22, 'tee_local', LocalVarXsImm(), 0), 39 | Opcode(0x23, 'get_global', GlobalVarXsImm(), 0), 40 | Opcode(0x24, 'set_global', GlobalVarXsImm(), 0), 41 | 42 | Opcode(0x28, 'i32.load', MemoryImm(), 0), 43 | Opcode(0x29, 'i64.load', MemoryImm(), 0), 44 | Opcode(0x2a, 'f32.load', MemoryImm(), 0), 45 | Opcode(0x2b, 'f64.load', MemoryImm(), 0), 46 | Opcode(0x2c, 'i32.load8_s', MemoryImm(), 0), 47 | Opcode(0x2d, 'i32.load8_u', MemoryImm(), 0), 48 | Opcode(0x2e, 'i32.load16_s', MemoryImm(), 0), 49 | Opcode(0x2f, 'i32.load16_u', MemoryImm(), 0), 50 | Opcode(0x30, 'i64.load8_s', MemoryImm(), 0), 51 | Opcode(0x31, 'i64.load8_u', MemoryImm(), 0), 52 | Opcode(0x32, 'i64.load16_s', MemoryImm(), 0), 53 | Opcode(0x33, 'i64.load16_u', MemoryImm(), 0), 54 | Opcode(0x34, 'i64.load32_s', MemoryImm(), 0), 55 | Opcode(0x35, 'i64.load32_u', MemoryImm(), 0), 56 | Opcode(0x36, 'i32.store', MemoryImm(), 0), 57 | Opcode(0x37, 'i64.store', MemoryImm(), 0), 58 | Opcode(0x38, 'f32.store', MemoryImm(), 0), 59 | Opcode(0x39, 'f64.store', MemoryImm(), 0), 60 | Opcode(0x3a, 'i32.store8', MemoryImm(), 0), 61 | Opcode(0x3b, 'i32.store16', MemoryImm(), 0), 62 | Opcode(0x3c, 'i64.store8', MemoryImm(), 0), 63 | Opcode(0x3d, 'i64.store16', MemoryImm(), 0), 64 | Opcode(0x3e, 'i64.store32', MemoryImm(), 0), 65 | Opcode(0x3f, 'current_memory', CurGrowMemImm(), 0), 66 | Opcode(0x40, 'grow_memory', CurGrowMemImm(), 0), 67 | 68 | Opcode(0x41, 'i32.const', I32ConstImm(), 0), 69 | Opcode(0x42, 'i64.const', I64ConstImm(), 0), 70 | Opcode(0x43, 'f32.const', F32ConstImm(), 0), 71 | Opcode(0x44, 'f64.const', F64ConstImm(), 0), 72 | 73 | Opcode(0x45, 'i32.eqz', None, 0), 74 | Opcode(0x46, 'i32.eq', None, 0), 75 | Opcode(0x47, 'i32.ne', None, 0), 76 | Opcode(0x48, 'i32.lt_s', None, 0), 77 | Opcode(0x49, 'i32.lt_u', None, 0), 78 | Opcode(0x4a, 'i32.gt_s', None, 0), 79 | Opcode(0x4b, 'i32.gt_u', None, 0), 80 | Opcode(0x4c, 'i32.le_s', None, 0), 81 | Opcode(0x4d, 'i32.le_u', None, 0), 82 | Opcode(0x4e, 'i32.ge_s', None, 0), 83 | Opcode(0x4f, 'i32.ge_u', None, 0), 84 | Opcode(0x50, 'i64.eqz', None, 0), 85 | Opcode(0x51, 'i64.eq', None, 0), 86 | Opcode(0x52, 'i64.ne', None, 0), 87 | Opcode(0x53, 'i64.lt_s', None, 0), 88 | Opcode(0x54, 'i64.lt_u', None, 0), 89 | Opcode(0x55, 'i64.gt_s', None, 0), 90 | Opcode(0x56, 'i64.gt_u', None, 0), 91 | Opcode(0x57, 'i64.le_s', None, 0), 92 | Opcode(0x58, 'i64.le_u', None, 0), 93 | Opcode(0x59, 'i64.ge_s', None, 0), 94 | Opcode(0x5a, 'i64.ge_u', None, 0), 95 | Opcode(0x5b, 'f32.eq', None, 0), 96 | Opcode(0x5c, 'f32.ne', None, 0), 97 | Opcode(0x5d, 'f32.lt', None, 0), 98 | Opcode(0x5e, 'f32.gt', None, 0), 99 | Opcode(0x5f, 'f32.le', None, 0), 100 | Opcode(0x60, 'f32.ge', None, 0), 101 | Opcode(0x61, 'f64.eq', None, 0), 102 | Opcode(0x62, 'f64.ne', None, 0), 103 | Opcode(0x63, 'f64.lt', None, 0), 104 | Opcode(0x64, 'f64.gt', None, 0), 105 | Opcode(0x65, 'f64.le', None, 0), 106 | Opcode(0x66, 'f64.ge', None, 0), 107 | 108 | Opcode(0x67, 'i32.clz', None, 0), 109 | Opcode(0x68, 'i32.ctz', None, 0), 110 | Opcode(0x69, 'i32.popcnt', None, 0), 111 | Opcode(0x6a, 'i32.add', None, 0), 112 | Opcode(0x6b, 'i32.sub', None, 0), 113 | Opcode(0x6c, 'i32.mul', None, 0), 114 | Opcode(0x6d, 'i32.div_s', None, 0), 115 | Opcode(0x6e, 'i32.div_u', None, 0), 116 | Opcode(0x6f, 'i32.rem_s', None, 0), 117 | Opcode(0x70, 'i32.rem_u', None, 0), 118 | Opcode(0x71, 'i32.and', None, 0), 119 | Opcode(0x72, 'i32.or', None, 0), 120 | Opcode(0x73, 'i32.xor', None, 0), 121 | Opcode(0x74, 'i32.shl', None, 0), 122 | Opcode(0x75, 'i32.shr_s', None, 0), 123 | Opcode(0x76, 'i32.shr_u', None, 0), 124 | Opcode(0x77, 'i32.rotl', None, 0), 125 | Opcode(0x78, 'i32.rotr', None, 0), 126 | Opcode(0x79, 'i64.clz', None, 0), 127 | Opcode(0x7a, 'i64.ctz', None, 0), 128 | Opcode(0x7b, 'i64.popcnt', None, 0), 129 | Opcode(0x7c, 'i64.add', None, 0), 130 | Opcode(0x7d, 'i64.sub', None, 0), 131 | Opcode(0x7e, 'i64.mul', None, 0), 132 | Opcode(0x7f, 'i64.div_s', None, 0), 133 | Opcode(0x80, 'i64.div_u', None, 0), 134 | Opcode(0x81, 'i64.rem_s', None, 0), 135 | Opcode(0x82, 'i64.rem_u', None, 0), 136 | Opcode(0x83, 'i64.and', None, 0), 137 | Opcode(0x84, 'i64.or', None, 0), 138 | Opcode(0x85, 'i64.xor', None, 0), 139 | Opcode(0x86, 'i64.shl', None, 0), 140 | Opcode(0x87, 'i64.shr_s', None, 0), 141 | Opcode(0x88, 'i64.shr_u', None, 0), 142 | Opcode(0x89, 'i64.rotl', None, 0), 143 | Opcode(0x8a, 'i64.rotr', None, 0), 144 | Opcode(0x8b, 'f32.abs', None, 0), 145 | Opcode(0x8c, 'f32.neg', None, 0), 146 | Opcode(0x8d, 'f32.ceil', None, 0), 147 | Opcode(0x8e, 'f32.floor', None, 0), 148 | Opcode(0x8f, 'f32.trunc', None, 0), 149 | Opcode(0x90, 'f32.nearest', None, 0), 150 | Opcode(0x91, 'f32.sqrt', None, 0), 151 | Opcode(0x92, 'f32.add', None, 0), 152 | Opcode(0x93, 'f32.sub', None, 0), 153 | Opcode(0x94, 'f32.mul', None, 0), 154 | Opcode(0x95, 'f32.div', None, 0), 155 | Opcode(0x96, 'f32.min', None, 0), 156 | Opcode(0x97, 'f32.max', None, 0), 157 | Opcode(0x98, 'f32.copysign', None, 0), 158 | Opcode(0x99, 'f64.abs', None, 0), 159 | Opcode(0x9a, 'f64.neg', None, 0), 160 | Opcode(0x9b, 'f64.ceil', None, 0), 161 | Opcode(0x9c, 'f64.floor', None, 0), 162 | Opcode(0x9d, 'f64.trunc', None, 0), 163 | Opcode(0x9e, 'f64.nearest', None, 0), 164 | Opcode(0x9f, 'f64.sqrt', None, 0), 165 | Opcode(0xa0, 'f64.add', None, 0), 166 | Opcode(0xa1, 'f64.sub', None, 0), 167 | Opcode(0xa2, 'f64.mul', None, 0), 168 | Opcode(0xa3, 'f64.div', None, 0), 169 | Opcode(0xa4, 'f64.min', None, 0), 170 | Opcode(0xa5, 'f64.max', None, 0), 171 | Opcode(0xa6, 'f64.copysign', None, 0), 172 | 173 | Opcode(0xa7, 'i32.wrap/i64', None, 0), 174 | Opcode(0xa8, 'i32.trunc_s/f32', None, 0), 175 | Opcode(0xa9, 'i32.trunc_u/f32', None, 0), 176 | Opcode(0xaa, 'i32.trunc_s/f64', None, 0), 177 | Opcode(0xab, 'i32.trunc_u/f64', None, 0), 178 | Opcode(0xac, 'i64.extend_s/i32', None, 0), 179 | Opcode(0xad, 'i64.extend_u/i32', None, 0), 180 | Opcode(0xae, 'i64.trunc_s/f32', None, 0), 181 | Opcode(0xaf, 'i64.trunc_u/f32', None, 0), 182 | Opcode(0xb0, 'i64.trunc_s/f64', None, 0), 183 | Opcode(0xb1, 'i64.trunc_u/f64', None, 0), 184 | Opcode(0xb2, 'f32.convert_s/i32', None, 0), 185 | Opcode(0xb3, 'f32.convert_u/i32', None, 0), 186 | Opcode(0xb4, 'f32.convert_s/i64', None, 0), 187 | Opcode(0xb5, 'f32.convert_u/i64', None, 0), 188 | Opcode(0xb6, 'f32.demote/f64', None, 0), 189 | Opcode(0xb7, 'f64.convert_s/i32', None, 0), 190 | Opcode(0xb8, 'f64.convert_u/i32', None, 0), 191 | Opcode(0xb9, 'f64.convert_s/i64', None, 0), 192 | Opcode(0xba, 'f64.convert_u/i64', None, 0), 193 | Opcode(0xbb, 'f64.promote/f32', None, 0), 194 | 195 | Opcode(0xbc, 'i32.reinterpret/f32', None, 0), 196 | Opcode(0xbd, 'i64.reinterpret/f64', None, 0), 197 | Opcode(0xbe, 'f32.reinterpret/i32', None, 0), 198 | Opcode(0xbf, 'f64.reinterpret/i64', None, 0), 199 | 200 | Opcode(0xc0, 'i32.extend_s/i8', None, 0), 201 | 202 | Opcode(0xfc0a, 'memory.copy', None, 0), 203 | Opcode(0xfc0b, 'memory.fill', None, 0), 204 | ] 205 | 206 | OPCODE_MAP = {x.id: x for x in OPCODES} 207 | 208 | # Generate integer constants for opcodes. 209 | for cur_op in OPCODES: 210 | globals()[ 211 | 'OP_' + cur_op.mnemonic.upper().replace('.', '_').replace('/', '_') 212 | ] = cur_op.id 213 | -------------------------------------------------------------------------------- /wasm/types.py: -------------------------------------------------------------------------------- 1 | """Defines a simple, generic data (de)serialization mechanism.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .compat import add_metaclass, byte2int, indent, deprecated_func 5 | import collections 6 | import logging 7 | import struct as pystruct 8 | 9 | try: 10 | from collections import Callable 11 | except ImportError: 12 | # for Python 3.10+ 13 | from collections.abc import Callable 14 | 15 | logger = logging.getLogger() 16 | 17 | 18 | class WasmField(object): 19 | """ 20 | Abstract base class for all fields. 21 | 22 | Fields are purely a (de)serialization mechanism. They don't hold the value 23 | of decoded information, but take Python data-types and convert them 24 | to a raw byte format or vice versa. Thus, a field instance can be reused 25 | to de/encode multiple values. 26 | 27 | Besides the abstract interface, implements type counting and IDing to allow 28 | field order detection in Python 2, where `__prepare__` doesn't exist yet. 29 | In order to work correctly, field instances MUST NOT be shared between 30 | multiple structures using it but have to be instantiated per structure. 31 | """ 32 | _type_ctr = 0 33 | 34 | def __init__(self): 35 | self._type_id = WasmField._type_ctr 36 | WasmField._type_ctr += 1 37 | 38 | def from_raw(self, struct, raw): 39 | raise NotImplementedError() 40 | 41 | def to_string(self, value): 42 | return repr(value) 43 | 44 | 45 | class UIntNField(WasmField): 46 | """Field handling an unsigned LE int of fixed size.""" 47 | CONVERTER_MAP = { 48 | 8: pystruct.Struct(' 1000 else str(value) 87 | 88 | 89 | class SignedLeb128Field(WasmField): 90 | """ 91 | Field handling signed LEB128 values. 92 | https://en.wikipedia.org/wiki/LEB128 93 | """ 94 | def from_raw(self, ctx, raw): 95 | offs = 0 96 | val = 0 97 | bits = 0 98 | 99 | while True: 100 | segment = byte2int(raw[offs]) 101 | val |= (segment & 0x7F) << bits 102 | offs += 1 103 | bits += 7 104 | if not (segment & 0x80): 105 | break 106 | 107 | if val & (1 << (bits - 1)): 108 | val -= 1 << bits 109 | 110 | return offs, val, self 111 | 112 | 113 | class CondField(WasmField): 114 | """Optionalizes a field, depending on the context.""" 115 | def __init__(self, field, condition, **kwargs): 116 | super(CondField, self).__init__(**kwargs) 117 | self.field = field 118 | self.condition = condition 119 | 120 | def from_raw(self, ctx, raw): 121 | if self.condition(ctx): 122 | return self.field.from_raw(ctx, raw) 123 | return 0, None, self 124 | 125 | def to_string(self, value): 126 | return 'None' if value is None else self.field.to_string(value) 127 | 128 | 129 | class RepeatField(WasmField): 130 | """Repeats a field, having the repeat count depend on the context.""" 131 | def __init__(self, field, repeat_count_getter, **kwargs): 132 | super(RepeatField, self).__init__(**kwargs) 133 | self.field = field 134 | self.repeat_count_getter = repeat_count_getter 135 | 136 | def from_raw(self, ctx, raw): 137 | repeat_count = self.repeat_count_getter(ctx) 138 | 139 | # Avoiding complex processing for byte arrays. 140 | if type(self.field) == UIntNField and self.field.n == 8: 141 | return repeat_count, raw[:repeat_count], self 142 | 143 | # For more complex types, invoke the field for parsing the 144 | # individual fields. 145 | offs = 0 146 | items = [] 147 | for i in range(repeat_count): 148 | length, item, element_type = self.field.from_raw(ctx, raw[offs:]) 149 | offs += length 150 | items.append(item) 151 | 152 | return offs, items, self 153 | 154 | def to_string(self, value): 155 | if value is None: 156 | return 'None' 157 | if len(value) > 100: 158 | return '' 159 | if len(value) == 0: 160 | return '[]' 161 | if isinstance(value[0], StructureData): 162 | return '\n' + indent( 163 | '\n'.join(self.field.to_string(x) for x in value), 164 | ' ' 165 | ) 166 | else: 167 | return '[' + ', '.join(self.field.to_string(x) for x in value) + ']' 168 | 169 | 170 | class ConstField(WasmField): 171 | """Pseudo-Field, always returning a constant, consuming/generating no data.""" 172 | def __init__(self, const, **kwargs): 173 | super(ConstField, self).__init__(**kwargs) 174 | self.const = const 175 | 176 | def from_raw(self, ctx, raw): 177 | return 0, self.const, self 178 | 179 | 180 | class ChoiceField(WasmField): 181 | """Depending on context, either represent this or that field type.""" 182 | _shared_none_field = ConstField(None) 183 | 184 | def __init__(self, choice_field_map, choice_getter, **kwargs): 185 | super(ChoiceField, self).__init__(**kwargs) 186 | self.choice_field_map = choice_field_map 187 | self.choice_getter = choice_getter 188 | 189 | def from_raw(self, ctx, raw): 190 | choice = self.choice_getter(ctx) 191 | if choice is None: 192 | return 0, None, self._shared_none_field 193 | return self.choice_field_map[choice].from_raw(ctx, raw) 194 | 195 | 196 | class BytesField(RepeatField): 197 | """Shorthand for U8 `RepeatField`, adding string support.""" 198 | def __init__(self, length_getter, is_str=False): 199 | super(BytesField, self).__init__(UIntNField(8), length_getter) 200 | self.is_str = is_str 201 | 202 | def to_string(self, value): 203 | if not self.is_str: 204 | return super(BytesField, self).to_string(value) 205 | 206 | try: 207 | return '"' + bytearray(value).decode('utf8') + '"' 208 | except UnicodeDecodeError: 209 | return '' 210 | 211 | 212 | FieldMeta = collections.namedtuple('FieldMeta', 'name field') 213 | 214 | 215 | class MetaInfo(object): 216 | """Meta information for a `Structure`.""" 217 | def __init__(self): 218 | self.fields = [] 219 | self.data_class = None 220 | self.structure = None 221 | 222 | 223 | class StructureData(object): 224 | """Base class for generated structure data classes.""" 225 | __slots__ = ('_meta', '_decoder_meta') 226 | 227 | def __init__(self, for_decoding=False): 228 | self._decoder_meta = {'lengths': {}, 'types': {}} if for_decoding else None 229 | for cur_field_name, cur_field in self._meta.fields: 230 | setattr(self, cur_field_name, None) 231 | 232 | def get_meta(self): 233 | """ 234 | Obtains meta info for this object. The object returned is shared 235 | between all objects of the same structure type. 236 | 237 | A getter is utilized here instead of a property to allow strict 238 | distinction of meta info from regular fields. 239 | """ 240 | return self._meta 241 | 242 | def get_decoder_meta(self): 243 | """ 244 | Obtains meta info from the decoder, like byte length in raw format. 245 | For objects not created through decoding, `None` is returned. 246 | """ 247 | return self._decoder_meta 248 | 249 | @property 250 | @deprecated_func 251 | def _data_meta(self): 252 | """Property emulating old name of `_decoder_meta`. Deprecated, do not use.""" 253 | return self._decoder_meta 254 | 255 | 256 | class StructureMeta(type): 257 | """ 258 | Metaclass used to create `Structure` classes, 259 | populating their `_meta` field and performing sanity checks. 260 | """ 261 | def __new__(mcs, name, bases, cls_dict): 262 | # Inject _meta. 263 | meta = cls_dict['_meta'] = MetaInfo() 264 | 265 | # Iterate over fields, move relevant data to meta. 266 | for cur_field_name, cur_field in list(cls_dict.items()): 267 | # Is callable, property, private or magic? We don't touch those. 268 | if ( 269 | isinstance(cur_field, Callable) or 270 | isinstance(cur_field, property) or 271 | cur_field_name.startswith('_') 272 | ): 273 | pass 274 | 275 | # Is one of our types? Metafy. 276 | elif isinstance(cur_field, WasmField): 277 | meta.fields.append(FieldMeta(cur_field_name, cur_field)) 278 | 279 | # Unknown type, print warning. 280 | else: 281 | logger.warning( 282 | 'Non-WasmField typed field "{}" found on type "{}". ' 283 | 'Ignoring.'.format(cur_field_name, name) 284 | ) 285 | 286 | # Order fields by type ID (see `WasmField` for the "why"). 287 | meta.fields = sorted(meta.fields, key=lambda x: x.field._type_id) 288 | 289 | # Create data class type for "instances". 290 | class GeneratedStructureData(StructureData): 291 | __slots__ = [x for x, _ in meta.fields] 292 | _meta = meta 293 | meta.data_class = GeneratedStructureData 294 | 295 | # Create class, saving type ref in meta. 296 | meta.structure = type.__new__(mcs, name, bases, cls_dict) 297 | return meta.structure 298 | 299 | 300 | @add_metaclass(StructureMeta) 301 | class Structure(WasmField): 302 | """Represents a collection of named fields.""" 303 | def from_raw(self, ctx, raw): 304 | offs = 0 305 | data = self._meta.data_class(for_decoding=True) 306 | for cur_field_name, cur_field in self._meta.fields: 307 | data_len, val, data_type = cur_field.from_raw(data, raw[offs:]) 308 | setattr(data, cur_field_name, val) 309 | decoder_meta = data.get_decoder_meta() 310 | decoder_meta['lengths'][cur_field_name] = data_len 311 | decoder_meta['types'][cur_field_name] = data_type 312 | offs += data_len 313 | return offs, data, self 314 | 315 | def to_string(self, value): 316 | lines = ['- [ {}'.format(self.__class__.__name__)] 317 | for cur_field_name, cur_field in self._meta.fields: 318 | field_val = getattr(value, cur_field_name) 319 | field_type = value.get_decoder_meta()['types'][cur_field_name] 320 | if isinstance(field_val, StructureData): 321 | lines.append(' | {} =\n{}'.format( 322 | cur_field_name, 323 | indent(field_type.to_string(field_val), ' ') 324 | )) 325 | else: 326 | lines.append(' | {} = {}'.format( 327 | cur_field_name, 328 | field_type.to_string(field_val) 329 | )) 330 | 331 | return '\n'.join(lines) 332 | -------------------------------------------------------------------------------- /wasm/wasmtypes.py: -------------------------------------------------------------------------------- 1 | """Defines types used for both modules and bytecode.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .types import UIntNField, UnsignedLeb128Field, SignedLeb128Field 5 | 6 | 7 | def _make_shortcut(klass, *args, **kwargs): 8 | def proxy(**kwargs2): 9 | kwargs.update(kwargs2) 10 | return klass(*args, **kwargs) 11 | return proxy 12 | 13 | 14 | UInt8Field = _make_shortcut(UIntNField, 8) 15 | UInt16Field = _make_shortcut(UIntNField, 16) 16 | UInt32Field = _make_shortcut(UIntNField, 32) 17 | UInt64Field = _make_shortcut(UIntNField, 64) 18 | 19 | VarUInt1Field = _make_shortcut(UnsignedLeb128Field) 20 | VarUInt7Field = _make_shortcut(UnsignedLeb128Field) 21 | VarUInt32Field = _make_shortcut(UnsignedLeb128Field) 22 | 23 | VarInt7Field = _make_shortcut(SignedLeb128Field) 24 | VarInt32Field = _make_shortcut(SignedLeb128Field) 25 | VarInt64Field = _make_shortcut(SignedLeb128Field) 26 | 27 | ElementTypeField = VarInt7Field 28 | ValueTypeField = VarInt7Field 29 | ExternalKindField = UInt8Field 30 | BlockTypeField = VarInt7Field 31 | 32 | 33 | # 34 | # Constants 35 | # 36 | 37 | 38 | # Section types. 39 | SEC_UNK = 0 40 | SEC_TYPE = 1 41 | SEC_IMPORT = 2 42 | SEC_FUNCTION = 3 43 | SEC_TABLE = 4 44 | SEC_MEMORY = 5 45 | SEC_GLOBAL = 6 46 | SEC_EXPORT = 7 47 | SEC_START = 8 48 | SEC_ELEMENT = 9 49 | SEC_CODE = 10 50 | SEC_DATA = 11 51 | SEC_DATACOUNT = 12 52 | SEC_NAME = b'name' 53 | 54 | # Language types. 55 | LANG_TYPE_I32 = -0x01 56 | LANG_TYPE_I64 = -0x02 57 | LANG_TYPE_F32 = -0x03 58 | LANG_TYPE_F64 = -0x04 59 | LANG_TYPE_ANYFUNC = -0x10 60 | LANG_TYPE_FUNC = -0x20 61 | LANG_TYPE_EMPTY = -0x40 62 | 63 | # Value types. 64 | VAL_TYPE_I32 = LANG_TYPE_I32 65 | VAL_TYPE_I64 = LANG_TYPE_I64 66 | VAL_TYPE_F32 = LANG_TYPE_F32 67 | VAL_TYPE_F64 = LANG_TYPE_F64 68 | 69 | # Name subsection types. 70 | NAME_SUBSEC_FUNCTION = 1 71 | NAME_SUBSEC_LOCAL = 2 72 | 73 | # Mutability in global types. 74 | IMMUTABLE = 0 75 | MUTABLE = 1 --------------------------------------------------------------------------------