├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── dependency-review.yml
    │   └── test.yml
├── .gitignore
├── .gitmodules
├── README.md
├── clean.sh
├── images
    └── logo.png
├── launcher.py
├── output
    ├── log
    │   └── .placeholder
    └── result
    │   └── .placeholder
├── requirements.txt
├── seewasm
    ├── __init__.py
    ├── __init__.pyc
    ├── analysis
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   └── cfg.py
    ├── arch
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   └── wasm
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── analyzer.py
    │   │   ├── cfg.py
    │   │   ├── configuration.py
    │   │   ├── constant.py
    │   │   ├── decode.py
    │   │   ├── disassembler.py
    │   │   ├── dwarfParser.py
    │   │   ├── emulator.py
    │   │   ├── exceptions.py
    │   │   ├── format.py
    │   │   ├── graph.py
    │   │   ├── instruction.py
    │   │   ├── instructions
    │   │       ├── ArithmeticInstructions.py
    │   │       ├── BitwiseInstructions.py
    │   │       ├── ConstantInstructions.py
    │   │       ├── ControlInstructions.py
    │   │       ├── ConversionInstructions.py
    │   │       ├── LogicalInstructions.py
    │   │       ├── MemoryInstructions.py
    │   │       ├── ParametricInstructions.py
    │   │       ├── VariableInstructions.py
    │   │       └── __init__.py
    │   │   ├── lib
    │   │       ├── c_lib.py
    │   │       ├── go_lib.py
    │   │       ├── utils.py
    │   │       └── wasi.py
    │   │   ├── memory.py
    │   │   ├── solver.py
    │   │   ├── utils.py
    │   │   ├── visualizator.py
    │   │   ├── vmstate.py
    │   │   └── wasm.py
    ├── core
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── basicblock.py
    │   ├── edge.py
    │   ├── function.py
    │   ├── instruction.py
    │   └── utils.py
    └── engine
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── disassembler.py
    │   ├── emulator.py
    │   └── engine.py
├── test.py
├── test
    ├── c
    │   └── src
    │   │   ├── hello.c
    │   │   └── sym.c
    ├── go
    │   └── src
    │   │   └── hello.go
    ├── hello_world.wasm
    ├── hello_world_go.wasm
    ├── hello_world_rust.wasm
    ├── password.wasm
    ├── rust
    │   └── hello
    │   │   ├── Cargo.toml
    │   │   └── src
    │   │       └── main.rs
    ├── sym_c.wasm
    ├── test.wasm
    ├── test_linux.py
    ├── test_return.wasm
    └── test_unreachable.wasm
└── wasm
    ├── __init__.py
    ├── __main__.py
    ├── compat.py
    ├── decode.py
    ├── formatter.py
    ├── immtypes.py
    ├── modtypes.py
    ├── opcodes.py
    ├── types.py
    └── wasmtypes.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Additional context**
24 | Add any other context about the problem here.
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/dependency-review.yml:
--------------------------------------------------------------------------------
 1 | # Dependency Review Action
 2 | #
 3 | # This Action will scan dependency manifest files that change as part of a Pull Request,
 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR.
 5 | # Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable
 6 | # packages will be blocked from merging.
 7 | #
 8 | # Source repository: https://github.com/actions/dependency-review-action
 9 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
10 | name: 'Dependency review'
11 | on:
12 |   pull_request:
13 |     branches: [ "main" ]
14 | 
15 | # If using a dependency submission action in this workflow this permission will need to be set to:
16 | #
17 | # permissions:
18 | #   contents: write
19 | #
20 | # https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api
21 | permissions:
22 |   contents: read
23 |   # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option
24 |   pull-requests: write
25 | 
26 | jobs:
27 |   dependency-review:
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |       - name: 'Checkout repository'
31 |         uses: actions/checkout@v4
32 |       - name: 'Dependency Review'
33 |         uses: actions/dependency-review-action@v4
34 |         # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options.
35 |         with:
36 |           comment-summary-in-pr: always
37 |         #   fail-on-severity: moderate
38 |         #   deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later
39 |         #   retry-on-snapshot-warnings: true
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - '**.py'
 7 |       - ".github/workflows/*.yml"
 8 |   pull_request:
 9 |     types: [opened, synchronize, reopened]
10 |     paths:
11 |       - '**.py'
12 |       - ".github/workflows/*.yml"
13 | 
14 | jobs:
15 |   test:
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python:
20 |           - "3.7"
21 |           - "3.8"
22 |           - "3.9"
23 |           - "3.10"
24 |           - "3.11"
25 |           - "3.12"
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - uses: actions/checkout@v4
29 |       - name: Setup Python
30 |         uses: actions/setup-python@v5
31 |         with:
32 |           python-version: ${{ matrix.python }}
33 |           cache: pip
34 |           # Set this option if you want the action to check for the latest available version that satisfies the version spec.
35 |           # check-latest: # optional
36 |       - name: Install requirements
37 |         run: |
38 |           pip install -r requirements.txt
39 |           sudo apt update && sudo apt install graphviz
40 |       - name: Cache wabt
41 |         id: cache-wabt
42 |         uses: actions/cache@v4
43 |         with:
44 |           path: wabt-1.0.32
45 |           key: wabt
46 |       - name: Install wabt
47 |         if: steps.cache-wabt.outputs.cache-hit != 'true'
48 |         run: |
49 |           curl -JLO "https://github.com/WebAssembly/wabt/releases/download/1.0.32/wabt-1.0.32-ubuntu.tar.gz"
50 |           tar xzf wabt-1.0.32-ubuntu.tar.gz
51 |       - name: Cache wasi-sdk
52 |         id: cache-wasi-sdk
53 |         uses: actions/cache@v4
54 |         with:
55 |           path: wasi-sdk-22.0
56 |           key: wasi-sdk
57 |       - name: Install wasi-sdk
58 |         if: steps.cache-wasi-sdk.outputs.cache-hit != 'true'
59 |         run: |
60 |           curl -JLO "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-22/wasi-sdk-22.0-linux.tar.gz"
61 |           tar xzf wasi-sdk-22.0-linux.tar.gz
62 |       - name: Cache wasmtime
63 |         id: cache-wasmtime
64 |         uses: actions/cache@v4
65 |         with:
66 |           path: ~/.wasmtime
67 |           key: wasmtime
68 |       - name: Install wasmtime
69 |         if: steps.cache-wasmtime.outputs.cache-hit != 'true'
70 |         run: |
71 |           curl https://wasmtime.dev/install.sh -sSf | bash
72 |       - name: Install Rust
73 |         run: |
74 |           curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
75 |           rustup target add wasm32-wasi
76 |       - uses: actions/setup-go@v5
77 |         with:
78 |           go-version: 1.22
79 |           check-latest: true
80 |           cache: true
81 |       - name: Install tinygo
82 |         run: |
83 |           wget https://github.com/tinygo-org/tinygo/releases/download/v0.32.0/tinygo_0.32.0_amd64.deb
84 |           sudo dpkg -i tinygo_0.32.0_amd64.deb
85 |       - name: Run pytest
86 |         run: |
87 |           export PATH=$(pwd)/wabt-1.0.32/bin:$PATH
88 |           export PATH=$(pwd)/wasi-sdk-22.0/bin:$PATH
89 |           export PATH=$(pwd)/.cargo/bin:$PATH
90 |           export PATH=$(pwd)/.wasmtime/bin:$PATH
91 |           pytest test.py --tb=short --durations=0
92 |           pytest test/test_linux.py --tb=short --durations=0


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | env/
 2 | .devcontainer/
 3 | .pytest_cache/
 4 | .DS_Store
 5 | __pycache__
 6 | .idea
 7 | .vscode
 8 | ready_to_production.py
 9 | *.wat
10 | output/log/*
11 | output/result/*
12 | bfs_test/*
13 | **/debug
14 | **/rust/**/target


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Wasm-samples"]
2 | 	path = Wasm-samples
3 | 	url = https://github.com/HNYuuu/Wasm-samples.git
4 | [submodule "DSL"]
5 | 	path = DSL
6 | 	url = https://github.com/HNYuuu/DSL.git
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SeeWasm [![Test](https://github.com/HNYuuu/SeeWasm/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/HNYuuu/SeeWasm)
  2 | ![SeeWasm-logo](./images/logo.png)
  3 | 
  4 | 
  5 | WebAssembly (Wasm), a low-level language, offers several advantages and can be translated from high-level mainstream programming languages such as C, C++, Go, and Rust.
  6 | 
  7 | In this project, we have implemented a **symbolic execution engine** for Wasm binaries, SeeWasm. Our goal is to build a toolchain that takes source code files (written in other programming languages) as input, performs symbolic execution, and outputs feasible paths with their solutions for further analysis (e.g., vulnerability detection).
  8 | 
  9 | ## Prerequisites 
 10 | To run SeeWasm, ensure you have Python 3.7 or a later version installed. Then, install the required Python libraries by executing the following command:
 11 | 
 12 | ```shell
 13 | python3 -m pip install -r requirements.txt
 14 | ```
 15 | 
 16 | If you encounter issues building the wheel for leb128, update pip and wheel, then reinstall leb128:
 17 | 
 18 | ```shell
 19 | pip install --upgrade pip wheel
 20 | pip install --force-reinstall leb128==1.0.4
 21 | ```
 22 | 
 23 | To verify everything is set up correctly, run the following command:
 24 | 
 25 | ```shell
 26 | python3 -m pytest test.py -vv
 27 | ```
 28 | 
 29 | This command traverses the `./test` folder and performs symbolic execution on all Wasm binaries.
 30 | If successful, a success message will be displayed, typically **after several seconds**.
 31 | 
 32 | Sample Wasm binaries, including "Hello World" in C, Go, and Rust, are provided in the folder. 
 33 | These can be compiled from their respective source languages; the compilation processes are detailed in [WASI tutorial](https://github.com/bytecodealliance/wasmtime/blob/main/docs/WASI-tutorial.md#compiling-to-wasi) (C and Rust), and [WASI "Hello World" example](https://wasmbyexample.dev/examples/wasi-hello-world/wasi-hello-world.go.en-us.html) (Go).
 34 | 
 35 | For Rust and C++ project, you can use `wasm-tools` to demangle symbol names in the `name` section. Install with `cargo install wasm-tools`. Confirm by `wasm-tools --version`. Details can be found at [Wasm Tools](https://github.com/bytecodealliance/wasm-tools).
 36 | 
 37 | ## Analyze
 38 | This section demonstrates how to use SeeWasm to analyze a generated WebAssembly file.
 39 | 
 40 | ### Options
 41 | All valid options are shown in below:
 42 | 
 43 | ```shell
 44 | SeeWasm, a symbolic execution engine for Wasm binaries
 45 | 
 46 | Optional arguments:
 47 |   -h, --help            show this help message and exit
 48 | 
 49 | Input arguments:
 50 |   -f WASMMODULE, --file WASMMODULE
 51 |                         binary file (.wasm)
 52 |   --stdin STDIN         stream of stdin
 53 |   --sym_stdin SYM_STDIN
 54 |                         stream of stdin in N bytes symbols
 55 |   --args ARGS           command line
 56 |   --sym_args SYM_ARGS [SYM_ARGS ...]
 57 |                         command line in symbols, each of them is N bytes at most
 58 |   --sym_files SYM_FILES SYM_FILES
 59 |                         Create N symbolic files, each of them has M symbolic bytes
 60 |   --source_type [{c,go,rust}]
 61 |                         type of source file
 62 | 
 63 | Features:
 64 |   --entry ENTRY         set entry point as the specilized function
 65 |   --visualize           visualize the ICFG on basic blocks level
 66 |   --incremental         enable incremental solving
 67 |   -v [{warning,info,debug}], --verbose [{warning,info,debug}]
 68 |                         set the logging level
 69 | 
 70 | Analyze:
 71 |   -s, --symbolic        perform the symbolic execution
 72 |   --search [{dfs,bfs,random,interval}]
 73 |                         set the search algorithm (default: dfs)
 74 | ```
 75 | 
 76 | We will detail these options according to their functionalities.
 77 | 
 78 | ### Input Arguments
 79 | SeeWasm can deassemble the target binary and construct valid inputs based on the values of the input arguments.
 80 | 
 81 | Specifically, `-f` option is mandatory, and it must be followed by the path of the Wasm binary to be analyzed. The `--stdin STRING` and `--sym_stdin N` options allow users to pass concrete and symbolic bytes through the stdin stream, respectively. A concrete string must be passed using `--stdin`, while a string consisting of `N` symbolic characters must be passed using `--sym_stdin`. For example, `--sym_stdin 5` inputs 5 symbolic bytes for functions that read from stdin.
 82 | 
 83 | Similarly, `--args STRING1, STRING2, ...` and `--sym_args N1, N2, ...` options pass concrete and symbolic arguments to the Wasm binary. For instance, if `main` requires three arguments, each two bytes long, `--sym_args 2 2 2` is enough.
 84 | 
 85 | Some programs interact with files. SeeWasm simulates this using a *symbolic file system*. Users can create `N` symbolic files, each with up to `M` bytes, using the `--sym_files N M` option.
 86 | 
 87 | As multiple high-level programming languages can be compiled to Wasm binaries, we have implemented specific optimizations. To take advantage of these optimizations, users must indicate the source language using the `--source_type` option.
 88 | 
 89 | ### Features
 90 | `--entry` specifies the entry function from which symbolic execution begins. By default, the entry function is `__original_main`. Users must specify a proper entry function to ensure the symbolic execution is performed correctly.
 91 | 
 92 | The input Wasm is parsed into an Interprocedural Control Flow Graph (ICFG), which can be visualized for debugging purposes using the `--visualize` option (requires `graphviz`, installable via `sudo apt install graphviz` on Ubuntu).
 93 | 
 94 | The constraint solving process is a bottleneck for symbolic execution performance; however, we have implemented some optimizations to mitigate this issue. The `--incremental` flag enables *incremental solving*. Note that it may not always yield positive results during analysis, and is therefore optional.
 95 | 
 96 | The `-v` option controls the logging level, allowing users to adjust the verbosity of logging output to aid in debugging.
 97 | 
 98 | ### Analyze
 99 | The `-s` is a mandatory option. It enables symbolic execution analysis on the given Wasm binary.
100 | 
101 | The `--search` option specifies the search algorithm used during symbolic execution. The default algorithm is Depth-First Search (DFS), but users can choose from the following options: `bfs`, `random`, and `interval`.
102 | 
103 | ## Output
104 | The output of SeeWasm, including logs and results, is stored in the `output` folder, with each file named according to the pattern `NAME_TIMESTAMP`.
105 | 
106 | The log file follows a specific format, which illustrates the call trace of the anaylzed program:
107 | 
108 | ```log
109 | 2024-07-01 07:50:36,191 | WARNING | Totally remove 27 unrelated functions, around 50.000% of all functions
110 | 2024-07-01 07:50:36,205 | INFO | Call: __original_main -> __main_void
111 | 2024-07-01 07:50:36,218 | INFO | Call: __main_void -> __wasi_args_sizes_get
112 | 2024-07-01 07:50:36,219 | INFO | Call: args_sizes_get (import)
113 | 2024-07-01 07:50:36,219 | INFO | 	args_sizes_get, argc_addr: 70792, arg_buf_size_addr: 70796
114 | 2024-07-01 07:50:36,219 | INFO | Return: args_sizes_get (import)
115 | 2024-07-01 07:50:36,219 | INFO | Return: __wasi_args_sizes_get
116 | ...
117 | ```
118 | 
119 | The result is a JSON file containing feasible paths with their solutions, formatted as follows:
120 | 
121 | ```json
122 | {
123 |     "Status": "xxx",
124 |     "Solution": {"xxx"},
125 |     "Output": [
126 |         {
127 |             "name": "stdout",
128 |             "output": "xxx"
129 |         },
130 |         {
131 |             "name": "stderr",
132 |             "output": "xxx"
133 |         }
134 |     ]
135 | }
136 | ```
137 | 
138 | You can use `./clean.sh -f` to remove all files in the `output` folder.
139 | 
140 | ## Example
141 | To execute a program that takes no extra arguments or input, use the following command:
142 | 
143 | ```shell
144 | python3 launcher.py -f PATH_TO_WASM_BINARY -s
145 | ```
146 | 
147 | If compilicated arguments are required, for example, a `base64` program with a `main` function like:
148 | 
149 | ```c
150 | // main of base64
151 | int main(int argc, char **argv)
152 | {
153 |   // environment setting
154 |   ...
155 | 
156 |   while ((opt = getopt_long(argc, argv, "diw:", long_options, NULL)) != -1)
157 |     switch (opt) {
158 |       // call functions according to passed arguments
159 |       ...
160 |     }
161 | 
162 |   // encode or decode
163 | }
164 | ```
165 | 
166 | The `base64` program expects two-byte arguments and a string input to encode or decode, producing output that is written to a file.
167 | Thus, the command to analyze `base64` is like:
168 | 
169 | ```shell
170 | python3 launcher.py -f PATH_TO_BASE64 -s --sym_args 2 --sym_stdin 5 --sym_files 1 10
171 | ```
172 | 
173 | ## Feedback
174 | 
175 | If you have any questions or need further clarification, please post on the [Issues](https://github.com/HNYuuu/SeeWasm/issues) page.
176 | 


--------------------------------------------------------------------------------
/clean.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | OUTPUT_DIR=output
 5 | 
 6 | error() {
 7 |   command printf '\033[1;31mError: %s\033[0m\n\n' "$1" 1>&2
 8 | }
 9 | 
10 | usage() {
11 |     cat >&2 <<END_USAGE
12 | clean.sh: remove and recreate empty "log" and "result" directories
13 | 
14 | USAGE:
15 |     clean.sh -[fi]
16 | 
17 | FLAGS:
18 |     -h, --help                  Prints help information
19 |     -i, --interactive           Interactive mode
20 |     -f, --force                 Force remove output files and directories
21 | 
22 | END_USAGE
23 | }
24 | 
25 | # parse command line options
26 | while [ $# -gt 0 ]
27 | do
28 |   arg="$1"
29 | 
30 |   case "$arg" in
31 |     -h|--help)
32 |       usage
33 |       exit 1
34 |       ;;
35 |     -f|--force)
36 |       shift # shift off the argument
37 |       rm -rf $OUTPUT_DIR
38 |       mkdir -p $OUTPUT_DIR/log $OUTPUT_DIR/result
39 |       touch $OUTPUT_DIR/log/.placeholder $OUTPUT_DIR/result/.placeholder
40 |       exit 0
41 |       ;;
42 |     -i|--interactive)
43 |       shift # shift off the argument
44 |       read -p "Are you sure you want to remove all output files and directories (rm -rf $OUTPUT_DIR)? [y/N] " -n 1 -r
45 |       echo
46 |       if [[ $REPLY =~ ^[Yy]$ ]]
47 |       then
48 |         rm -rf $OUTPUT_DIR
49 |         mkdir -p $OUTPUT_DIR/log $OUTPUT_DIR/result
50 |         touch $OUTPUT_DIR/log/.placeholder $OUTPUT_DIR/result/.placeholder
51 |         exit 0
52 |       fi
53 |       exit 1
54 |       ;;
55 |     *)
56 |       error "Unknown option: '$arg'"
57 |       usage
58 |       exit 1
59 |       ;;
60 |     esac
61 | done
62 | 
63 | usage
64 | exit 1


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/images/logo.png


--------------------------------------------------------------------------------
/launcher.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import json
  6 | import sys
  7 | from datetime import datetime
  8 | from os import makedirs, path
  9 | 
 10 | import sh
 11 | 
 12 | from seewasm.arch.wasm.configuration import Configuration
 13 | from seewasm.arch.wasm.graph import Graph
 14 | from seewasm.arch.wasm.visualizator import visualize
 15 | 
 16 | 
 17 | def main():
 18 |     parser = argparse.ArgumentParser(
 19 |         description='SeeWasm, a symbolic execution engine for Wasm binaries')
 20 | 
 21 |     inputs = parser.add_argument_group('Input arguments')
 22 |     inputs.add_argument('-f', '--file',
 23 |                         type=argparse.FileType('rb'),
 24 |                         help='binary file (.wasm)',
 25 |                         metavar='WASMMODULE', required=True)
 26 |     inputs.add_argument('--stdin',
 27 |                         action='store',
 28 |                         type=str,
 29 |                         help='stream of stdin')
 30 |     inputs.add_argument('--sym_stdin',
 31 |                         action='store',
 32 |                         type=int,
 33 |                         nargs=1,
 34 |                         help='stream of stdin in N bytes symbols')
 35 |     inputs.add_argument('--args',
 36 |                         action='store',
 37 |                         type=str,
 38 |                         help='command line')
 39 |     inputs.add_argument(
 40 |         '--sym_args', type=int, nargs='+',
 41 |         help="command line in symbols, each of them is N bytes at most")
 42 |     inputs.add_argument(
 43 |         '--sym_files', type=int, nargs=2,
 44 |         help="Create N symbolic files, each of them has M symbolic bytes")
 45 |     inputs.add_argument(
 46 |         '--source_type', default='c', const='c', nargs='?',
 47 |         choices=['c', 'go', 'rust'],
 48 |         help='type of source file')
 49 | 
 50 |     features = parser.add_argument_group('Features')
 51 |     features.add_argument(
 52 |         '--entry', type=str, nargs=1, default=["__original_main"],
 53 |         help='set entry point as the specilized function')
 54 |     features.add_argument(
 55 |         '--visualize', action='store_true',
 56 |         help='visualize the ICFG on basic blocks level')
 57 |     features.add_argument(
 58 |         '--incremental', action='store_true',
 59 |         help='enable incremental solving')
 60 |     features.add_argument(
 61 |         '-v', '--verbose', default='warning', const='warning', nargs='?',
 62 |         choices=['warning', 'info', 'debug'],
 63 |         help='set the logging level')
 64 | 
 65 |     analyze = parser.add_argument_group('Analyze')
 66 |     analyze.add_argument(
 67 |         '-s', '--symbolic', action='store_true',
 68 |         help='perform the symbolic execution')
 69 |     analyze.add_argument(
 70 |         '--search', default='dfs', const='dfs', nargs='?',
 71 |         choices=['dfs', 'bfs', 'random', 'interval'],
 72 |         help='set the search algorithm (default: dfs)')
 73 | 
 74 |     args = parser.parse_args()
 75 | 
 76 |     module_bytecode = args.file.read()
 77 |     # create the corresponding wat file
 78 |     wat_file_path = args.file.name.replace('.wasm', '.wat')
 79 |     if not path.exists(wat_file_path):
 80 |         sh.Command('wasm2wat')([args.file.name, "-o", wat_file_path])
 81 |         print(
 82 |             f"The corresponding wat file is written in: {wat_file_path}",
 83 |             flush=True)
 84 | 
 85 |     # conduct symbolic execution
 86 |     if args.symbolic:
 87 |         Configuration.set_verbose_flag(args.verbose)
 88 |         Configuration.set_file(args.file.name)
 89 |         Configuration.set_entry(args.entry)
 90 |         Configuration.set_visualize(args.visualize)
 91 |         Configuration.set_source_type(args.source_type)
 92 |         Configuration.set_stdin(args.stdin, args.sym_stdin)
 93 |         Configuration.set_sym_files(args.sym_files)
 94 |         Configuration.set_incremental_solving(args.incremental)
 95 |         Configuration.set_elem_index_to_func(wat_file_path)
 96 |         Configuration.set_algo(args.search)
 97 | 
 98 |         command_file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/command.json"
 99 |         makedirs(path.dirname(command_file_name), exist_ok=False)
100 |         with open(command_file_name, 'w') as fp:
101 |             json.dump({"Command": " ".join(sys.argv)}, fp, indent=4)
102 | 
103 |         # --args and --sym_args can exist simultaneously
104 |         # their order are fixed, i.e., --args is in front of --sym_args
105 |         # the file_name is always the argv[0]
106 |         Configuration.set_args(
107 |             Configuration.get_file_name(),
108 |             args.args, args.sym_args)
109 | 
110 |         # import necessary part
111 |         from seewasm.arch.wasm.emulator import WasmSSAEmulatorEngine
112 | 
113 |         wasmVM = WasmSSAEmulatorEngine(module_bytecode)
114 |         # run the emulator for SSA
115 |         Graph.wasmVM = wasmVM
116 |         Graph.initialize()
117 |         # draw the ICFG on basic block level, and exit
118 |         if Configuration.get_visualize():
119 |             # draw here
120 |             graph_path = path.join("output", "visualized_graph", f"{Configuration.get_file_name()}_{Configuration.get_start_time()}.gv")
121 |             visualize(Graph, graph_path)
122 |             print(f"The visualization of ICFG is done.")
123 |             return
124 | 
125 |         graph = Graph()
126 |         graph.traverse()
127 |     else:
128 |         parser.print_help()
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     job_start_time = datetime.now()
133 |     current_time_start = job_start_time.strftime("%Y-%m-%d %H:%M:%S_%f")
134 |     print(f"Start to analyze: {current_time_start}", flush=True)
135 |     Configuration.set_start_time(current_time_start)
136 | 
137 |     print(f"Running...", flush=True)
138 |     main()
139 |     print(f"Finished.", flush=True)
140 | 
141 |     job_end_time = datetime.now()
142 |     current_time_end = job_end_time.strftime("%Y-%m-%d %H:%M:%S_%f")
143 |     print(f"End of analyze: {current_time_end}", flush=True)
144 |     elapsed_time = job_end_time - job_start_time
145 |     print(f"Time elapsed: {elapsed_time}", flush=True)
146 | 


--------------------------------------------------------------------------------
/output/log/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/output/log/.placeholder


--------------------------------------------------------------------------------
/output/result/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/output/result/.placeholder


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | graphviz==0.18
2 | leb128==1.0.4
3 | pyelftools==0.27
4 | pytest==6.2.5
5 | sh==1.14.2
6 | z3-solver==4.13.0.0


--------------------------------------------------------------------------------
/seewasm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/__init__.py


--------------------------------------------------------------------------------
/seewasm/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/__init__.pyc


--------------------------------------------------------------------------------
/seewasm/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/analysis/__init__.py


--------------------------------------------------------------------------------
/seewasm/analysis/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/analysis/__init__.pyc


--------------------------------------------------------------------------------
/seewasm/analysis/cfg.py:
--------------------------------------------------------------------------------
 1 | class CFG(object):
 2 | 
 3 |     def __init__(self):
 4 |         """ TODO """
 5 |         raise NotImplementedError
 6 | 
 7 |     def visualize(self):
 8 |         """ TODO """
 9 |         raise NotImplementedError
10 | 
11 |     def visualize_call_flow(self):
12 |         """ TODO """
13 |         raise NotImplementedError
14 | 


--------------------------------------------------------------------------------
/seewasm/arch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/arch/__init__.py


--------------------------------------------------------------------------------
/seewasm/arch/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/arch/__init__.pyc


--------------------------------------------------------------------------------
/seewasm/arch/wasm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/arch/wasm/__init__.py


--------------------------------------------------------------------------------
/seewasm/arch/wasm/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/arch/wasm/__init__.pyc


--------------------------------------------------------------------------------
/seewasm/arch/wasm/configuration.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from z3 import BitVec, Extract
  4 | 
  5 | 
  6 | class bcolors:
  7 |     HEADER = '\033[95m'
  8 |     OKBLUE = '\033[94m'
  9 |     OKCYAN = '\033[96m'
 10 |     OKGREEN = '\033[92m'
 11 |     WARNING = '\033[93m'
 12 |     FAIL = '\033[91m'
 13 |     ENDC = '\033[0m'
 14 |     BOLD = '\033[1m'
 15 |     UNDERLINE = '\033[4m'
 16 | 
 17 | 
 18 | class Configuration:
 19 |     """
 20 |     The static class that maintain the user's input option
 21 |     """
 22 |     _source_type = 'c'              # the original source file's type
 23 |     _algo = 'dfs'                   # the traverse algorithm, default is dfs
 24 |     # _algo = 'bfs'
 25 |     # _algo = 'interval'
 26 |     # _algo = 'random'
 27 |     _symbol_globals = False         # init the global with the declaration in global sections
 28 |     _verbose_flag = 'warning'       # if user set -v flag, the debugging info would be printed
 29 |     _solver = 'z3'                  # the backend SMT solver
 30 |     # the command to run the to be analyzed program, like ['base64', a]
 31 |     # where 'a' is a symbol
 32 |     _args = []
 33 |     # the to-be-analyzed file's path and name
 34 |     _file_path = ''
 35 |     _file_name = ''
 36 |     # the start time of analyze
 37 |     _start_time = ''
 38 |     # the entry function
 39 |     _entry_func = ''
 40 |     _entry_func_signature = ''
 41 |     # the mapping of func index to func name
 42 |     _func_index_to_func_name = {}
 43 |     # if enable the instruction-level coverage calculation
 44 |     _coverage = False
 45 |     # the stdin buffer, can be a list of char or symbols with length of 8 bits
 46 |     _stdin_buffer = []
 47 |     # how many files can be opened in total
 48 |     _sym_file_limit = 0
 49 |     # how many bytes a sym file can hold
 50 |     _sym_file_byte_limit = 0
 51 |     # keep z3 cache
 52 |     # each value is a tuple, consisting of query times, sat or not, and solver
 53 |     _z3_cache_dict = {}
 54 |     # used by args_sizes_get in wasi.py
 55 |     _argc_addr = None
 56 |     _arg_buf_size_addr = None
 57 |     # each element is a list, consisting of argc and size of each argv
 58 |     # like (2, 4, 3) means there are 2 args, the first one is in 4 bytes, and the second is in 3 bytes
 59 |     _argc_arg_buf_size = []
 60 |     # enable the incremental solving or not
 61 |     _incremental_solving = False
 62 |     # indicating the analyzed file is instrumented by a dsl file
 63 |     _dsl_flag = False
 64 |     # the index to function in element section
 65 |     _elem_index_to_func = {}
 66 | 
 67 |     @ staticmethod
 68 |     def set_source_type(source_type):
 69 |         Configuration._source_type = source_type
 70 | 
 71 |     @ staticmethod
 72 |     def get_source_type():
 73 |         return Configuration._source_type
 74 | 
 75 |     @ staticmethod
 76 |     def set_algo(algo):
 77 |         Configuration._algo = algo
 78 | 
 79 |     @ staticmethod
 80 |     def get_algo():
 81 |         return Configuration._algo
 82 | 
 83 |     @ staticmethod
 84 |     def set_symbol_globals(symbol_globals):
 85 |         pass
 86 | 
 87 |     @ staticmethod
 88 |     def get_symbol_globals():
 89 |         return Configuration._symbol_globals
 90 | 
 91 |     @ staticmethod
 92 |     def set_verbose_flag(verbose_flag):
 93 |         Configuration._verbose_flag = verbose_flag
 94 | 
 95 |     @ staticmethod
 96 |     def get_verbose_flag():
 97 |         return Configuration._verbose_flag
 98 | 
 99 |     @ staticmethod
100 |     def set_solver(solver):
101 |         pass
102 | 
103 |     @ staticmethod
104 |     def get_solver():
105 |         return Configuration._solver
106 | 
107 |     @ staticmethod
108 |     def get_args():
109 |         return Configuration._args
110 | 
111 |     @ staticmethod
112 |     def set_args(filename, args, sym_args):
113 |         """
114 |         Parse the given args and symbolic args into the _args
115 | 
116 |         args: str: typically is the argv[0] and is given concretely, like "base64"
117 |         sym_args: [int, ...]: each symbolic arg is given with designated length, like [1, 2]
118 |         """
119 |         # the filename is argv[0]
120 |         Configuration._args += [filename]
121 | 
122 |         if args:
123 |             Configuration._args += args.split(" ")
124 | 
125 |         if sym_args:
126 |             for i, sym_len in enumerate(sym_args):
127 |                 Configuration._args.append(
128 |                     BitVec(f"sym_arg_{i + 1}", 8 * sym_len))
129 | 
130 |     @ staticmethod
131 |     def get_file_name():
132 |         return Configuration._file_name
133 | 
134 |     @ staticmethod
135 |     def get_file_path():
136 |         return Configuration._file_path
137 | 
138 |     @ staticmethod
139 |     def set_file(file_path):
140 |         Configuration._file_path = file_path
141 |         # keep the file name without path and extended type
142 |         Configuration._file_name = file_path.split('/')[-1].split('.')[0]
143 | 
144 |     @ staticmethod
145 |     def get_start_time():
146 |         return Configuration._start_time
147 | 
148 |     @ staticmethod
149 |     def set_start_time(start_time):
150 |         Configuration._start_time = start_time
151 | 
152 |     @ staticmethod
153 |     def get_entry():
154 |         return Configuration._entry_func
155 | 
156 |     @ staticmethod
157 |     def set_entry(entry_func):
158 |         Configuration._entry_func = entry_func[0]
159 |     
160 |     @ staticmethod
161 |     def get_entry_signature():
162 |         return Configuration._entry_func_signature
163 | 
164 |     @ staticmethod
165 |     def set_entry_signature(entry_func_signature):
166 |         Configuration._entry_func_signature = entry_func_signature
167 | 
168 |     @ staticmethod
169 |     def get_func_index_to_func_name():
170 |         return Configuration._func_index_to_func_name
171 | 
172 |     @ staticmethod
173 |     def set_func_index_to_func_name(ana_names, func_prototypes):
174 |         if ana_names:
175 |             # if the wasm has name section
176 |             for item in ana_names:
177 |                 index, _, func_name = item
178 |                 func_name = func_name.decode()
179 |                 if "__imported_wasi_snapshot_preview1_" in func_name:
180 |                     func_name = func_name[34:]
181 |                 Configuration._func_index_to_func_name[index] = func_name
182 |         else:
183 |             for index, item in enumerate(func_prototypes):
184 |                 func_name = item[0]
185 |                 Configuration._func_index_to_func_name[index] = func_name
186 | 
187 |     @ staticmethod
188 |     def get_coverage():
189 |         return Configuration._coverage
190 | 
191 |     @ staticmethod
192 |     def set_coverage(coverage):
193 |         pass
194 | 
195 |     @ staticmethod
196 |     def set_stdin(stdin, sym_stdin):
197 |         """
198 |         Store stdin buffer into the `stdin_buffer`
199 |         """
200 |         if stdin and sym_stdin:
201 |             exit("Cannot set `stdin` and `sym_stdin` simultaneously")
202 | 
203 |         if stdin:
204 |             # the encode is necessary
205 |             stdin_encoded = stdin.encode().replace(b'\\n', b'\n')
206 |             Configuration._stdin_buffer = list(stdin_encoded)
207 |         elif sym_stdin:
208 |             sym_stdin_len = sym_stdin[0]
209 |             raw_symbol = BitVec('sym_stdin', sym_stdin_len * 8)
210 |             # split by chars
211 |             for i in range(sym_stdin_len, 0, -1):
212 |                 Configuration._stdin_buffer.append(
213 |                     Extract(i * 8 - 1, (i - 1) * 8, raw_symbol))
214 |         else:
215 |             # no stdin is given
216 |             pass
217 | 
218 |     @ staticmethod
219 |     def get_stdin():
220 |         """
221 |         return the stdin buffer
222 |         """
223 |         return Configuration._stdin_buffer
224 | 
225 |     @ staticmethod
226 |     def set_sym_files(sym_files):
227 |         """
228 |         the sym files take two arguments:
229 |         the first is how many files will be opened;
230 |         the second is how many btyes are in each of them.
231 | 
232 |         So, we store these two information
233 |         """
234 |         if not sym_files:
235 |             return
236 |         sym_file_num, sym_file_byte = sym_files
237 |         Configuration._sym_file_limit = sym_file_num
238 |         Configuration._sym_file_byte_limit = sym_file_byte
239 | 
240 |     @ staticmethod
241 |     def get_sym_file_limits():
242 |         return Configuration._sym_file_limit, Configuration._sym_file_byte_limit
243 | 
244 |     @ staticmethod
245 |     def get_visualize():
246 |         return Configuration._visualize_flag
247 | 
248 |     @ staticmethod
249 |     def set_visualize(visualize_flag):
250 |         Configuration._visualize_flag = visualize_flag
251 | 
252 |     @ staticmethod
253 |     def set_incremental_solving(incremental_solving_flag):
254 |         Configuration._incremental_solving = incremental_solving_flag
255 | 
256 |     @ staticmethod
257 |     def get_incremental_solving():
258 |         return Configuration._incremental_solving
259 | 
260 |     @ staticmethod
261 |     def set_dsl_flag(dsl_flag):
262 |         pass
263 | 
264 |     @ staticmethod
265 |     def get_dsl_flag():
266 |         return Configuration._dsl_flag
267 | 
268 |     @staticmethod
269 |     def set_elem_index_to_func(wat_file_path):
270 |         with open(wat_file_path) as fp:
271 |             wat_content = fp.read()
272 |         # extract the element section
273 |         result = re.search(r"\(elem.*func ([\w\._\$ ]*)\)", wat_content)
274 |         # if there is element section in the given wat
275 |         if result:
276 |             elem_sec_funcs = result.group(1).split(' ')
277 |             for i, func in enumerate(elem_sec_funcs):
278 |                 if "__imported_wasi_snapshot_preview1_" in func:
279 |                     func = func[34:]  # remove the prefix
280 |                 # remove the leading $
281 |                 Configuration._elem_index_to_func[i] = func[1:]
282 | 
283 |     @staticmethod
284 |     def get_elem_index_to_func():
285 |         return Configuration._elem_index_to_func
286 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/constant.py:
--------------------------------------------------------------------------------
 1 | # It defines some constants
 2 | 
 3 | # https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md#language-types
 4 | LANG_TYPE = {
 5 |     # Opcode, Type constructor
 6 |     -0x01: 'i32',
 7 |     -0x02: 'i64',
 8 |     -0x03: 'f32',
 9 |     -0x04: 'f64',
10 |     -0x10: 'anyfunc',
11 |     -0x20: 'func',
12 |     -0x40: 'block_type'
13 | }
14 | 
15 | KIND_TYPE = {
16 |     0: 'function',
17 |     1: 'table',
18 |     2: 'memory',
19 |     3: 'global',
20 | }
21 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/decode.py:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | #
 3 | # Copyright (c) 2016 Joel Höner <athre0z@zyantific.com>
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # modified code from https://github.com/athre0z/wasm/blob/master/wasm/modtypes.py
24 | # no need of that if PyPI wasm version 1.2 release
25 | 
26 | 
27 | """Provides functions for decoding WASM modules and bytecode."""
28 | from __future__ import (absolute_import, division, print_function,
29 |                         unicode_literals)
30 | 
31 | from collections import namedtuple
32 | 
33 | from wasm.compat import byte2int
34 | from wasm.modtypes import (SEC_NAME, SEC_UNK, ModuleHeader, NameSubSection,
35 |                            Section)
36 | from wasm.opcodes import OPCODE_MAP
37 | 
38 | Instruction = namedtuple('Instruction', 'op imm len')
39 | ModuleFragment = namedtuple('ModuleFragment', 'type data')
40 | 
41 | 
42 | def decode_bytecode(bytecode):
43 |     """Decodes raw bytecode, yielding `Instruction`s."""
44 |     bytecode_wnd = memoryview(bytecode)
45 |     while bytecode_wnd:
46 |         opcode_id = byte2int(bytecode_wnd[0])
47 |         opcode = OPCODE_MAP[opcode_id]
48 | 
49 |         if opcode.imm_struct is not None:
50 |             offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:])
51 |         else:
52 |             imm = None
53 |             offs = 0
54 | 
55 |         insn_len = 1 + offs
56 |         yield Instruction(opcode, imm, insn_len)
57 |         bytecode_wnd = bytecode_wnd[insn_len:]
58 | 
59 | 
60 | def decode_module(module, decode_name_subsections=False):
61 |     """Decodes raw WASM modules, yielding `ModuleFragment`s."""
62 |     module_wnd = memoryview(module)
63 | 
64 |     # Read & yield module header.
65 |     hdr = ModuleHeader()
66 |     hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd)
67 |     yield ModuleFragment(hdr, hdr_data)
68 |     module_wnd = module_wnd[hdr_len:]
69 | 
70 |     # Read & yield sections.
71 |     while module_wnd:
72 |         sec = Section()
73 |         # bypass the error caused by -g1 to -g3 compiled C code
74 |         try:
75 |             sec_len, sec_data, _ = sec.from_raw(None, module_wnd)
76 |         except Exception:
77 |             break
78 | 
79 |         # If requested, decode name subsections when encountered.
80 |         if (decode_name_subsections and sec_data.id == SEC_UNK and sec_data.name == SEC_NAME):
81 |             sec_wnd = sec_data.payload
82 |             while sec_wnd:
83 |                 subsec = NameSubSection()
84 |                 subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd)
85 |                 yield ModuleFragment(subsec, subsec_data)
86 |                 sec_wnd = sec_wnd[subsec_len:]
87 |         else:
88 |             yield ModuleFragment(sec, sec_data)
89 | 
90 |         # fix bug KeyError
91 |         # if sec_data.id == SEC_UNK and sec_data.name:
92 |         #     sec_len -= sec_data.name_len + 1
93 |         module_wnd = module_wnd[sec_len:]
94 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/disassembler.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | 
  3 | from seewasm.arch.wasm.decode import decode_module
  4 | from seewasm.arch.wasm.instruction import WasmInstruction
  5 | from seewasm.arch.wasm.wasm import Wasm
  6 | from seewasm.core.function import Function
  7 | from seewasm.core.utils import bytecode_to_bytes
  8 | from seewasm.engine.disassembler import Disassembler
  9 | 
 10 | from wasm.compat import byte2int
 11 | from wasm.formatter import format_instruction
 12 | from wasm.modtypes import CodeSection
 13 | from wasm.opcodes import OPCODE_MAP
 14 | 
 15 | inst_namedtuple = namedtuple('Instruction', 'op imm len')
 16 | 
 17 | 
 18 | class WasmDisassembler(Disassembler):
 19 | 
 20 |     def __init__(self, bytecode=None):
 21 |         Disassembler.__init__(self, asm=Wasm(), bytecode=bytecode)
 22 | 
 23 |     def disassemble_opcode(self, bytecode=None, offset=0, nature_offset=0):
 24 |         '''
 25 |         based on decode_bytecode()
 26 |         https://github.com/athre0z/wasm/blob/master/wasm/decode.py
 27 | 
 28 |         '''
 29 | 
 30 |         bytecode_wnd = memoryview(bytecode)
 31 |         bytecode_idx = 0
 32 |         opcode_id = byte2int(bytecode_wnd[bytecode_idx])
 33 |         opcode_size = 1
 34 | 
 35 |         bytecode_idx += 1
 36 |         if opcode_id == 0xfc:
 37 |             opcode_id = (opcode_id << 8) | byte2int(bytecode_wnd[bytecode_idx])
 38 |             if opcode_id == 0xfc0a: # memory.copy
 39 |                 opcode_size = 4
 40 |             elif opcode_id == 0xfc0b: # memory.fill
 41 |                 opcode_size = 3
 42 |         # default value
 43 |         # opcode:(mnemonic/name, imm_struct, pops, pushes, description)
 44 |         invalid = ('INVALID', 0, 0, 0, 'Unknown opcode')
 45 |         name, imm_struct, pops, pushes, description = \
 46 |             self.asm.table.get(opcode_id, invalid)
 47 | 
 48 |         operand_size = 0
 49 |         operand = None
 50 |         operand_interpretation = None
 51 | 
 52 |         if imm_struct is not None:
 53 |             assert not isinstance(imm_struct, int), f"imm_struct is int, most likely encountered unsupported inst.\nname: {name}\nimm_struct: {imm_struct}\npops: {pops} pushes: {pushes}\ndesc: {description}\nopcode_id: {hex(opcode_id)}"
 54 |             operand_size, operand, _ = imm_struct.from_raw(
 55 |                 None, bytecode_wnd[bytecode_idx:])
 56 |             insn = inst_namedtuple(
 57 |                 OPCODE_MAP[opcode_id], operand, bytecode_idx + operand_size)
 58 |             operand_interpretation = format_instruction(insn)
 59 |         insn_byte = bytecode_wnd[:bytecode_idx + operand_size].tobytes()
 60 |         instruction = WasmInstruction(
 61 |             opcode_id, opcode_size, name, imm_struct, operand_size, insn_byte, pops, pushes,
 62 |             description, operand_interpretation=operand_interpretation,
 63 |             offset=offset, nature_offset=nature_offset)
 64 |         # print('%d %s' % (offset, str(instruction)))
 65 |         return instruction
 66 | 
 67 |     def disassemble(self, bytecode=None, offset=0, nature_offset=0,
 68 |                     r_format='list'):
 69 |         """Disassemble WASM bytecode
 70 | 
 71 |         :param bytecode: bytecode sequence
 72 |         :param offset: start offset
 73 |         :param r_format: output format ('list'/'text'/'reverse')
 74 |         :type bytecode: bytes, str
 75 |         :type offset: int
 76 |         :type r_format: list, str, dict
 77 |         :return: dissassembly result depending of r_format
 78 |         :rtype: list, str, dict
 79 |         """
 80 | 
 81 |         return super().disassemble(bytecode, offset, nature_offset, r_format)
 82 | 
 83 |     def extract_functions_code(self, module_bytecode):
 84 |         functions = list()
 85 |         mod_iter = iter(decode_module(module_bytecode))
 86 |         _, _ = next(mod_iter)
 87 |         sections = list(mod_iter)
 88 | 
 89 |         # iterate over all section
 90 |         # code_data = [cur_sec_data for cur_sec, cur_sec_data in sections if isinstance(cur_sec_data.get_decoder_meta()['types']['payload'], CodeSection)][0]
 91 |         for cur_sec, cur_sec_data in sections:
 92 |             sec = cur_sec_data.get_decoder_meta()['types']['payload']
 93 |             if isinstance(sec, CodeSection):
 94 |                 code_data = cur_sec_data
 95 |                 break
 96 |         if not code_data:
 97 |             raise ValueError('No functions/codes in the module')
 98 |         for idx, func in enumerate(code_data.payload.bodies):
 99 |             instructions = self.disassemble(func.code.tobytes())
100 |             cur_function = Function(0, instructions[0])
101 |             cur_function.instructions = instructions
102 | 
103 |             functions.append(cur_function)
104 |         return functions
105 | 
106 |     def disassemble_module(
107 |             self, module_bytecode=None, offset=0, r_format='list'):
108 | 
109 |         bytecode = bytecode_to_bytes(module_bytecode)
110 | 
111 |         functions = self.extract_functions_code(bytecode[offset:])
112 |         self.instructions = [f.instructions for f in functions]
113 | 
114 |         # return instructions
115 |         if r_format == 'list':
116 |             return self.instructions
117 |         elif r_format == 'text':
118 |             text = ''
119 |             for index, func in enumerate(functions):
120 |                 text += ('func %d\n' % index)
121 |                 text += ('\n'.join(map(str, func.instructions)))
122 |                 text += ('\n\n')
123 |             return text
124 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/exceptions.py:
--------------------------------------------------------------------------------
  1 | # This file defines our own exceptions
  2 | NO_EXIT = -99
  3 | INVALIDMEMORY = -2
  4 | ASSERT_FAIL = -3
  5 | 
  6 | 
  7 | class UnsupportZ3TypeError(Exception):
  8 |     """
  9 |     used in `utils.py`
 10 |     indicating that the variable type is not in ['i32', 'i64', 'f32', 'f64']
 11 |     """
 12 |     pass
 13 | 
 14 | 
 15 | class UninitializedLocalVariableError(Exception):
 16 |     """
 17 |     used in `emulator.py`
 18 |     indicating the local variable is not initialized before retriving
 19 |     """
 20 |     pass
 21 | 
 22 | 
 23 | class UnsupportGlobalTypeError(Exception):
 24 |     """
 25 |     used in `emulator.py`
 26 |     indicating the unsupport global type encoutering global.get
 27 |     """
 28 |     pass
 29 | 
 30 | 
 31 | class UnsupportInstructionError(Exception):
 32 |     """
 33 |     used in `emulator.py`
 34 |     indicating the unsupport instructions
 35 |     """
 36 |     pass
 37 | 
 38 | 
 39 | class NotDeterminedRetValError(Exception):
 40 |     """
 41 |     indicateing the return value is bool but cannot be determined as True or False
 42 |     """
 43 |     pass
 44 | 
 45 | 
 46 | class UninitializedStateError(Exception):
 47 |     """
 48 |     indicateing the state is not initialized before emulate_one_function
 49 |     """
 50 |     pass
 51 | 
 52 | 
 53 | class MemoryLoadError(Exception):
 54 |     """
 55 |     indicating the memory load error
 56 |     """
 57 |     pass
 58 | 
 59 | 
 60 | class UnsupportExternalFuncError(Exception):
 61 |     """
 62 |     indicating the library function is not emulated by us
 63 |     """
 64 |     pass
 65 | 
 66 | 
 67 | class UnexpectedDataType(Exception):
 68 |     """
 69 |     Typically raised if there is a `if-elif-else` statement
 70 |     depending on the data type
 71 |     """
 72 |     pass
 73 | 
 74 | 
 75 | class ProcSuccessTermination(Exception):
 76 |     """
 77 |     Indicate the process is successfully terminated
 78 |     """
 79 | 
 80 |     def __init__(self, value):
 81 |         self.value = value
 82 | 
 83 |     # __str__ is to print() the value
 84 |     def __str__(self):
 85 |         return (repr(self.value))
 86 | 
 87 | 
 88 | class ProcFailTermination(Exception):
 89 |     """
 90 |     Indicate the process is failedly terminated
 91 |     """
 92 | 
 93 |     def __init__(self, value):
 94 |         self.value = value
 95 | 
 96 |     # __str__ is to print() the value
 97 |     def __str__(self):
 98 |         return (repr(self.value))
 99 | 
100 | class HaltTermination(Exception):
101 |     def __init__(self):
102 |         pass
103 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/format.py:
--------------------------------------------------------------------------------
 1 | # It is for formatting
 2 | 
 3 | import re
 4 | 
 5 | from seewasm.arch.wasm.constant import LANG_TYPE
 6 | 
 7 | 
 8 | def format_func_name(name, param_str, return_str):
 9 |     result = '{} '.format(return_str) if return_str else ''
10 |     return ('{}{}({})'.format(result, name, param_str))
11 | 
12 | 
13 | def format_bb_name(function_id, offset):
14 |     return ('block_%x_%x' % (function_id, offset))
15 | 
16 | 
17 | def format_kind_function(f_type):
18 |     return f_type
19 | 
20 | 
21 | def format_kind_table(element_type, flags, initial, maximum):
22 |     return {'element_type': LANG_TYPE.get(element_type),
23 |             'limits_flags': flags,
24 |             'limits_initial': initial,
25 |             'limits_maximum': maximum}
26 | 
27 | 
28 | def format_kind_memory(flags, initial, maximum):
29 |     return {'limits_flags': flags,
30 |             'limits_initial': initial,
31 |             'limits_maximum': maximum}
32 | 
33 | 
34 | def format_kind_global(mutability, content_type, current_instruction):
35 |     # leave mutability temporarily
36 |     return [content_type, current_instruction]
37 | 
38 | 
39 | def format_scan_result(result):
40 |     def name_to_string(val=13949526960272233840):
41 |         charmap = ".12345abcdefghijklmnopqrstuvwxyz"
42 |         result = ['.'] * 13
43 |         for i in range(12 + 1):
44 |             c = charmap[val & (0x0f if i == 0 else 0x1f)]
45 |             result[12 - i] = c
46 |             val >>= (4 if i == 0 else 5)
47 |         result = ''.join(result).rstrip('.')
48 |         return result
49 | 
50 |     def decode(matchobj):
51 |         original = int(matchobj.group(0))
52 |         result = name_to_string(original)
53 |         return result
54 | 
55 |     new_result = list()
56 | 
57 |     for key_functions, constraints in result:
58 |         new_key_functions = key_functions
59 |         new_constraints = list()
60 |         for constraint in constraints:
61 |             constraint = str(constraint)
62 |             if 'action ==' in constraint or 'code ==' in constraint:
63 |                 constraint = re.sub(r'[0-9]{10,}', decode, constraint)
64 |             new_constraints.append(constraint)
65 |         new_result.append([new_key_functions.copy(), new_constraints.copy()])
66 | 
67 |     return new_result
68 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instruction.py:
--------------------------------------------------------------------------------
  1 | # It will parse each instructions in Wasm
  2 | 
  3 | from seewasm.arch.wasm.wasm import _groups
  4 | from seewasm.core.instruction import Instruction
  5 | 
  6 | 
  7 | class WasmInstruction(Instruction):
  8 |     """Wasm Instruction
  9 |     TODO
 10 | 
 11 |     """
 12 | 
 13 |     def __init__(
 14 |             self, opcode, opcode_size, name, imm_struct, operand_size, insn_byte, pops,
 15 |             pushes, description, operand_interpretation=None, offset=0,
 16 |             nature_offset=0):
 17 |         """ TODO """
 18 |         self.opcode = opcode
 19 |         self.opcode_size = opcode_size
 20 |         self.offset = offset
 21 |         self.nature_offset = nature_offset
 22 |         self.name = name
 23 |         self.description = description
 24 |         self.operand_size = operand_size
 25 |         if len(insn_byte) > 1:
 26 |             # Immediate operand if any
 27 |             self.operand = insn_byte[-operand_size:]
 28 |         else:
 29 |             self.operand = None
 30 |             # specific interpretation of operand value
 31 |         self.operand_interpretation = operand_interpretation
 32 |         self.insn_byte = insn_byte
 33 |         self.pops = pops
 34 |         self.pushes = pushes
 35 |         self.imm_struct = imm_struct
 36 |         self.xref = list()
 37 |         self.ssa = None
 38 |         # which basic block locates in
 39 |         self.cur_bb = ''
 40 | 
 41 |     def __eq__(self, other):
 42 |         """ Instructions are equal if all features match  """
 43 |         return self.opcode == other.opcode and \
 44 |             self.name == other.name and \
 45 |             self.offset == other.offset and \
 46 |             self.insn_byte == other.insn_byte and \
 47 |             self.operand_size == other.operand_size and \
 48 |             self.pops == other.pops and \
 49 |             self.pushes == other.pushes and \
 50 |             self.operand_interpretation == other.operand_interpretation and \
 51 |             self.description == other.description
 52 | 
 53 |     def __str__(self):
 54 |         """ String representation of the instruction """
 55 |         if self.operand:
 56 |             return self.operand_interpretation
 57 |         # elif self.operand:
 58 |         #    return self.name + str(self.operand)
 59 |         else:
 60 |             return self.name
 61 | 
 62 |     @property
 63 |     def group(self):
 64 |         """ Instruction classification per group """
 65 |         last_class = _groups.get(0)
 66 |         for k, v in _groups.items():
 67 |             if self.opcode >= k:
 68 |                 last_class = v
 69 |             else:
 70 |                 return last_class
 71 |         return last_class
 72 | 
 73 |     @property
 74 |     def is_control(self):
 75 |         return self.group == 'Control'
 76 | 
 77 |     @property
 78 |     def is_parametric(self):
 79 |         return self.group == 'Parametric'
 80 | 
 81 |     @property
 82 |     def is_variable(self):
 83 |         return self.group == 'Variable'
 84 | 
 85 |     @property
 86 |     def is_memory(self):
 87 |         return self.group == 'Memory'
 88 | 
 89 |     @property
 90 |     def is_constant(self):
 91 |         return self.group == 'Constant'
 92 | 
 93 |     @property
 94 |     def is_logical_i32(self):
 95 |         return self.group == 'Logical_i32'
 96 | 
 97 |     @property
 98 |     def is_logical_i64(self):
 99 |         return self.group == 'Logical_i64'
100 | 
101 |     @property
102 |     def is_logical_f32(self):
103 |         return self.group == 'Logical_f32'
104 | 
105 |     @property
106 |     def is_logical_f64(self):
107 |         return self.group == 'Logical_f64'
108 | 
109 |     @property
110 |     def is_arithmetic_i32(self):
111 |         return self.group == 'Arithmetic_i32'
112 | 
113 |     @property
114 |     def is_bitwise_i32(self):
115 |         return self.group == 'Bitwise_i32'
116 | 
117 |     @property
118 |     def is_arithmetic_i64(self):
119 |         return self.group == 'Arithmetic_i64'
120 | 
121 |     @property
122 |     def is_bitwise_i64(self):
123 |         return self.group == 'Bitwise_i64'
124 | 
125 |     @property
126 |     def is_arithmetic_f32(self):
127 |         return self.group == 'Arithmetic_f32'
128 | 
129 |     @property
130 |     def is_arithmetic_f64(self):
131 |         return self.group == 'Arithmetic_f64'
132 | 
133 |     @property
134 |     def is_conversion(self):
135 |         return self.group == 'Conversion'
136 | 
137 |     @property
138 |     def is_branch_conditional(self):
139 |         """ Return True if the instruction is a conditional jump """
140 |         return self.name in {'br_if', 'br_table', 'if'}
141 | 
142 |     @property
143 |     def is_branch_unconditional(self):
144 |         """ Return True if the instruction is a unconditional jump """
145 |         return self.name in {'br'}
146 | 
147 |     @property
148 |     def is_call(self):
149 |         """ True if the instruction is a call instruction """
150 |         return self.name in {'call', 'call_indirect'}
151 | 
152 |     @property
153 |     def is_branch(self):
154 |         return self.is_branch_conditional or self.is_branch_unconditional
155 | 
156 |     @property
157 |     def is_halt(self):
158 |         """ Return True if the instruction is a branch terminator """
159 |         return self.name in {'unreachable', 'return'}
160 | 
161 |     @property
162 |     def is_terminator(self):
163 |         """ True if the instruction is a basic block terminator """
164 |         return self.is_branch or self.is_halt
165 | 
166 |     @property
167 |     def is_block_starter(self):
168 |         """ Return True if the instruction is a basic block starter """
169 |         return self.name in {'block', 'loop', 'if', 'else'}
170 | 
171 |     @property
172 |     def is_block_terminator(self):
173 |         """ Return True if the instruction is a basic block terminator """
174 |         return self.name in {'else', 'end'}
175 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/ArithmeticInstructions.py:
--------------------------------------------------------------------------------
  1 | # emulate the arithmetic related instructions
  2 | 
  3 | import logging
  4 | 
  5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
  6 | from z3 import (RNE, RTN, RTP, RTZ, BitVec, BitVecVal, Float32, Float64, SRem,
  7 |                 UDiv, URem, fpAbs, fpAdd, fpDiv, fpMax, fpMin, fpMul, fpNeg,
  8 |                 fpRoundToIntegral, fpSqrt, fpSub, is_bool, simplify)
  9 | 
 10 | helper_map = {
 11 |     'i32': 32,
 12 |     'i64': 64,
 13 |     'f32': [8, 24],
 14 |     'f64': [11, 53]
 15 | }
 16 | 
 17 | float_helper_map = {
 18 |     'f32': Float32,
 19 |     'f64': Float64
 20 | }
 21 | 
 22 | 
 23 | class ArithmeticInstructions:
 24 |     def __init__(self, instr_name, instr_operand, _):
 25 |         self.instr_name = instr_name
 26 |         self.instr_operand = instr_operand
 27 | 
 28 |     def emulate(self, state):
 29 |         def do_emulate_arithmetic_int_instruction(state):
 30 |             instr_type = self.instr_name[:3]
 31 | 
 32 |             if '.clz' in self.instr_name or '.ctz' in self.instr_name:
 33 |                 # wasm documentation says:
 34 |                 # This instruction is fully defined when all bits are zero;
 35 |                 # it returns the number of bits in the operand type.
 36 |                 state.symbolic_stack.pop()
 37 |                 state.symbolic_stack.append(
 38 |                     BitVecVal(helper_map[instr_type], helper_map[instr_type]))
 39 |             elif '.popcnt' in self.instr_name:
 40 |                 # wasm documentation says:
 41 |                 # This instruction is fully defined when all bits are zero;
 42 |                 # it returns 0.
 43 |                 state.symbolic_stack.pop()
 44 |                 state.symbolic_stack.append(
 45 |                     BitVecVal(0, helper_map[instr_type]))
 46 |             else:
 47 |                 arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop()
 48 | 
 49 |                 # arg1 and arg2 could be BitVecRef, BitVecValRef and BoolRef
 50 |                 if is_bool(arg1):
 51 |                     arg1 = BitVec(str(arg1), helper_map[instr_type])
 52 |                     logging.warning(
 53 |                         f"[!] In `ArithmeticInstructions.py`, arg1 is BoolRef, translated to BitVec which may lead to some information loss")
 54 |                 if is_bool(arg2):
 55 |                     arg2 = BitVec(str(arg2), helper_map[instr_type])
 56 |                     logging.warning(
 57 |                         f"[!] In `ArithmeticInstructions.py`, arg2 is BoolRef, translated to BitVec which may lead to some information loss")
 58 | 
 59 |                 assert arg1.size(
 60 |                 ) == helper_map[instr_type], f"in arithmetic instruction, arg1 size is {arg1.size()} instead of {helper_map[instr_type]}"
 61 |                 assert arg2.size(
 62 |                 ) == helper_map[instr_type], f"in arithmetic instruction, arg2 size is {arg2.size()} instead of {helper_map[instr_type]}"
 63 | 
 64 |                 if '.sub' in self.instr_name:
 65 |                     result = arg2 - arg1
 66 |                 elif '.add' in self.instr_name:
 67 |                     result = arg2 + arg1
 68 |                 elif '.mul' in self.instr_name:
 69 |                     result = arg2 * arg1
 70 |                 elif '.div_s' in self.instr_name:
 71 |                     result = arg2 / arg1
 72 |                 elif '.div_u' in self.instr_name:
 73 |                     result = UDiv(arg2, arg1)
 74 |                 elif '.rem_s' in self.instr_name:
 75 |                     result = SRem(arg2, arg1)
 76 |                 elif '.rem_u' in self.instr_name:
 77 |                     result = URem(arg2, arg1)
 78 |                 else:
 79 |                     raise UnsupportInstructionError
 80 | 
 81 |                 result = simplify(result)
 82 |                 state.symbolic_stack.append(result)
 83 | 
 84 |             return [state]
 85 | 
 86 |         def do_emulate_arithmetic_float_instruction(state):
 87 |             # TODO need to be clarified
 88 |             # wasm default rounding rules
 89 |             rm = RNE()
 90 | 
 91 |             instr_type = self.instr_name[:3]
 92 | 
 93 |             two_arguments_instrs = ['add', 'sub',
 94 |                                     'mul', 'div', 'min', 'max', 'copysign']
 95 |             one_argument_instrs = ['sqrt', 'floor',
 96 |                                    'ceil', 'trunc', 'nearest', 'abs', 'neg']
 97 | 
 98 |             # add instr_type before each instr
 99 |             two_arguments_instrs = [str(instr_type + '.' + i)
100 |                                     for i in two_arguments_instrs]
101 |             one_argument_instrs = [str(instr_type + '.' + i)
102 |                                    for i in one_argument_instrs]
103 | 
104 |             # pop two elements
105 |             if self.instr_name in two_arguments_instrs:
106 |                 arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop()
107 | 
108 |                 assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits(
109 |                 ) == helper_map[instr_type][1], 'In do_emulate_arithmetic_float_instruction, arg1 type mismatch'
110 |                 assert arg2.ebits() == helper_map[instr_type][0] and arg2.sbits(
111 |                 ) == helper_map[instr_type][1], 'In do_emulate_arithmetic_float_instruction, arg2 type mismatch'
112 | 
113 |                 if '.add' in self.instr_name:
114 |                     result = fpAdd(rm, arg2, arg1)
115 |                 elif '.sub' in self.instr_name:
116 |                     result = fpSub(rm, arg2, arg1)
117 |                 elif '.mul' in self.instr_name:
118 |                     result = fpMul(rm, arg2, arg1)
119 |                 elif '.div' in self.instr_name:
120 |                     result = fpDiv(rm, arg2, arg1)
121 |                 elif '.min' in self.instr_name:
122 |                     result = fpMin(arg2, arg1)
123 |                 elif '.max' in self.instr_name:
124 |                     result = fpMax(arg2, arg1)
125 |                 elif '.copysign' in self.instr_name == 'f32.copysign':
126 |                     # extract arg2's sign to overwrite arg1's sign
127 |                     if arg2.isPositive() ^ arg1.isPositive():
128 |                         result = fpNeg(arg1)
129 |             # pop one element
130 |             elif self.instr_name in one_argument_instrs:
131 |                 arg1 = state.symbolic_stack.pop()
132 | 
133 |                 assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits(
134 |                 ) == helper_map[instr_type][1], 'In do_emulate_arithmetic_float_instruction, arg1 type mismatch'
135 | 
136 |                 if '.sqrt' in self.instr_name:
137 |                     result = fpSqrt(rm, arg1)
138 |                 elif '.floor' in self.instr_name:
139 |                     # round toward negative
140 |                     result = fpRoundToIntegral(RTN(), arg1)
141 |                 elif '.ceil' in self.instr_name:
142 |                     # round toward positive
143 |                     result = fpRoundToIntegral(RTP(), arg1)
144 |                 elif '.trunc' in self.instr_name:
145 |                     # round toward zero
146 |                     result = fpRoundToIntegral(RTZ(), arg1)
147 |                 elif '.nearest' in self.instr_name:
148 |                     # round to integeral ties to even
149 |                     result = fpRoundToIntegral(RNE(), arg1)
150 |                 elif '.abs' in self.instr_name:
151 |                     result = fpAbs(arg1)
152 |                 elif '.neg' in self.instr_name:
153 |                     result = fpNeg(arg1)
154 |             else:
155 |                 raise UnsupportInstructionError
156 | 
157 |             result = simplify(result)
158 |             state.symbolic_stack.append(result)
159 | 
160 |             return [state]
161 | 
162 |         op_type = self.instr_name[:1]
163 |         if op_type == 'i':
164 |             return do_emulate_arithmetic_int_instruction(state)
165 |         else:
166 |             return do_emulate_arithmetic_float_instruction(state)
167 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/BitwiseInstructions.py:
--------------------------------------------------------------------------------
 1 | # emulate the bitwise related instructions
 2 | 
 3 | import logging
 4 | 
 5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
 6 | from z3 import (BitVec, BitVecVal, LShR, RotateLeft, RotateRight, is_bool,
 7 |                 is_bv, is_false, is_true, simplify)
 8 | 
 9 | helper_map = {
10 |     'i32': 32,
11 |     'i64': 64,
12 | }
13 | 
14 | 
15 | class BitwiseInstructions:
16 |     def __init__(self, instr_name, instr_operand, _):
17 |         self.instr_name = instr_name
18 |         self.instr_operand = instr_operand
19 | 
20 |     # TODO overflow check in this function?
21 |     def emulate(self, state):
22 |         arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop()
23 |         instr_type = self.instr_name[:3]
24 | 
25 |         # arg1 and arg2 could be BitVecRef, BitVecValRef and BoolRef
26 |         if is_bool(arg1):
27 |             arg1 = BitVec(str(arg1), helper_map[instr_type])
28 |             logging.warning(
29 |                 f"[!] In `BitwiseInstructions.py`, arg1 is BoolRef, translated to BitVec which may lead to some information loss")
30 |         if is_bool(arg2):
31 |             arg2 = BitVec(str(arg2), helper_map[instr_type])
32 |             logging.warning(
33 |                 f"[!] In `BitwiseInstructions.py`, arg2 is BoolRef, translated to BitVec which may lead to some information loss")
34 | 
35 |         assert arg1.size(
36 |         ) == helper_map[instr_type], f'arg1 size is {arg1.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction'
37 |         assert arg2.size(
38 |         ) == helper_map[instr_type], f'arg2 size is {arg2.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction'
39 | 
40 |         if '.and' in self.instr_name:
41 |             result = simplify(arg1 & arg2)
42 |         elif '.or' in self.instr_name:
43 |             result = simplify(arg1 | arg2)
44 |         elif '.xor' in self.instr_name:
45 |             result = simplify(arg1 ^ arg2)
46 |         elif '.shr_s' in self.instr_name:
47 |             result = simplify(arg2 >> arg1)
48 |         elif '.shr_u' in self.instr_name:
49 |             result = simplify(LShR(arg2, arg1))
50 |         elif '.shl' in self.instr_name:
51 |             result = simplify(arg2 << arg1)
52 |         elif '.rotl' in self.instr_name:
53 |             result = simplify(RotateLeft(arg2, arg1))
54 |         elif '.rotr' in self.instr_name:
55 |             result = simplify(RotateRight(arg2, arg1))
56 |         else:
57 |             raise UnsupportInstructionError
58 | 
59 |         if is_bool(result):
60 |             if is_true(result):
61 |                 result = BitVecVal(1, 32)
62 |             elif is_false(result):
63 |                 result = BitVecVal(0, 32)
64 | 
65 |         assert is_bv(result) or is_bool(
66 |             result), f"in bitwise instruction, the value to be pushed is {type(result)} instead of BitVec or Bool"
67 | 
68 |         state.symbolic_stack.append(result)
69 | 
70 |         return [state]
71 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/ConstantInstructions.py:
--------------------------------------------------------------------------------
 1 | # emulate the constant related instructions
 2 | 
 3 | import re
 4 | from struct import unpack
 5 | 
 6 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
 7 | from z3 import BitVecVal, Float32, Float64, FPVal
 8 | 
 9 | 
10 | class ConstantInstructions:
11 |     def __init__(self, instr_name, instr_operand, instr_string):
12 |         self.instr_name = instr_name
13 |         self.instr_operand = instr_operand
14 |         self.instr_str = instr_string
15 | 
16 |     # TODO overflow check in this function?
17 |     def emulate(self, state):
18 |         # there are two types of const: i and f, like:
19 |         # i32.const 0
20 |         # f64.const 0x1.9p+6 (;=100;)
21 |         # thus we have to deal with the different situations
22 |         mnemonic = self.instr_str.split(' ')[0]
23 |         const_num = self.instr_str.split(' ')[-1]
24 |         const_type_prefix, _ = mnemonic.split('.')
25 | 
26 |         if const_type_prefix == 'i32':
27 |             state.symbolic_stack.append(BitVecVal(const_num, 32))
28 |         elif const_type_prefix == 'i64':
29 |             state.symbolic_stack.append(BitVecVal(const_num, 64))
30 |         elif const_type_prefix == 'f32' or const_type_prefix == 'f64':
31 |             # extract float number 100 from (;=100;)
32 |             # TODO: need to be verified
33 |             num_found = re.search(';=([0-9.-]+);', const_num)
34 |             if num_found:
35 |                 float_num = num_found.group(1)
36 |                 if const_type_prefix == 'f32':
37 |                     state.symbolic_stack.append(FPVal(float_num, Float32()))
38 |                 else:
39 |                     state.symbolic_stack.append(FPVal(float_num, Float64()))
40 |             elif const_num[:2] == '0x':
41 |                 # remove '0x' prefix
42 |                 const_num = const_num[2:]
43 |                 # extend with '0' till const_num length is 4 bytes
44 |                 current_const_num_length = len(const_num)
45 | 
46 |                 need_zero = (8 - current_const_num_length) if const_type_prefix == 'f32' else (
47 |                     16 - current_const_num_length)
48 |                 const_num = '0' * need_zero + const_num
49 | 
50 |                 if const_type_prefix == 'f32':
51 |                     float_num = unpack('!f', bytes.fromhex(const_num))[0]
52 |                     state.symbolic_stack.append(FPVal(float_num, Float32()))
53 |                 else:
54 |                     float_num = unpack('!d', bytes.fromhex(const_num))[0]
55 |                     state.symbolic_stack.append(FPVal(float_num, Float64()))
56 |             else:
57 |                 raise UnsupportInstructionError
58 |         else:
59 |             raise UnsupportInstructionError
60 | 
61 |         return [state]
62 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/ControlInstructions.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import logging
  3 | from collections import defaultdict
  4 | 
  5 | from z3 import (Not, Or, is_bool, is_bv, is_bv_value, is_false, is_true,
  6 |                 simplify, unsat)
  7 | 
  8 | from seewasm.arch.wasm.configuration import Configuration
  9 | from seewasm.arch.wasm.exceptions import (ASSERT_FAIL, ProcFailTermination,
 10 |                                           ProcSuccessTermination,
 11 |                                           UnsupportInstructionError)
 12 | from seewasm.arch.wasm.lib.c_lib import CPredefinedFunction
 13 | from seewasm.arch.wasm.lib.go_lib import GoPredefinedFunction
 14 | from seewasm.arch.wasm.lib.utils import is_modeled
 15 | from seewasm.arch.wasm.lib.wasi import WASIImportFunction
 16 | from seewasm.arch.wasm.utils import (log_in_out, one_time_query_cache,
 17 |                                      readable_internal_func_name)
 18 | 
 19 | TERMINATED_FUNCS = {'__assert_fail', 'runtime.divideByZeroPanic'}
 20 | 
 21 | 
 22 | class ControlInstructions:
 23 |     def __init__(self, instr_name, instr_operand, instr_string):
 24 |         self.instr_name = instr_name
 25 |         self.instr_operand = instr_operand
 26 |         self.instr_string = instr_string
 27 |         self.skip_command = {'loop', 'end', 'br', 'else', 'block'}
 28 |         self.term_command = {'unreachable', 'return'}
 29 | 
 30 |     def store_context(self, param_str, return_str, state, callee_func_name):
 31 |         """
 32 |         Store the context of current stack and local.
 33 |         The sequence is:
 34 |         1. pop specific number of elements from stack, which will be used by callee
 35 |         2. store the current context, including (current_func, current_block, stack, local, require_return)
 36 |         3. assign popped elements in step 1 in local, change the current_func_name
 37 |         """
 38 |         logging.info(
 39 |             f"Call: {readable_internal_func_name(Configuration.get_func_index_to_func_name(), state.current_func_name)} -> {callee_func_name}")
 40 | 
 41 |         # step 1
 42 |         num_arg = 0
 43 |         if param_str:
 44 |             num_arg = len(param_str.split(' '))
 45 |             arg = [state.symbolic_stack.pop() for _ in range(num_arg)]
 46 | 
 47 |         # step 2
 48 |         state.context_stack.append((state.current_func_name,
 49 |                                     state.instr.cur_bb,
 50 |                                     [e for e in state.symbolic_stack],
 51 |                                     copy.copy(state.local_var),
 52 |                                     True if return_str else False))
 53 | 
 54 |         # step 3
 55 |         for x in range(num_arg):
 56 |             state.local_var[num_arg - 1 - x] = arg[x]
 57 |         # set the remaining local vars as None
 58 |         for x in range(num_arg, len(state.local_var)):
 59 |             try:
 60 |                 state.local_var.pop(x)
 61 |             except KeyError:
 62 |                 # if some of the local var is unused during the caller
 63 |                 # there is no need to pop it, thus continue the loop
 64 |                 continue
 65 | 
 66 |         state.current_func_name = callee_func_name
 67 | 
 68 |     def restore_context(self, state):
 69 |         """
 70 |         Restore context.
 71 | 
 72 |         1. pop an element from stack if require return
 73 |         2. restore the context
 74 |         3. push the element in step 1 into stack
 75 |         """
 76 |         if len(state.context_stack) == 0:
 77 |             raise ProcSuccessTermination(0)
 78 | 
 79 |         caller_func_name, cur_bb, stack, local, require_return = state.context_stack.pop()
 80 | 
 81 |         logging.info(
 82 |             f"Return: {readable_internal_func_name(Configuration.get_func_index_to_func_name(), state.current_func_name)}")
 83 | 
 84 |         # step 1
 85 |         if require_return:
 86 |             return_val = state.symbolic_stack.pop()
 87 | 
 88 |         # step 2
 89 |         state.current_func_name = caller_func_name
 90 |         state.current_bb_name = cur_bb
 91 |         state.symbolic_stack = stack
 92 |         state.local_var = local
 93 | 
 94 |         # step 3
 95 |         if require_return:
 96 |             state.symbolic_stack.append(return_val)
 97 | 
 98 |     def deal_with_call(self, state, f_offset, data_section, analyzer, lvar):
 99 |         # get the callee's function signature
100 |         target_func = analyzer.func_prototypes[f_offset]
101 |         callee_func_name, param_str, return_str, _ = target_func
102 | 
103 |         readable_callee_func_name = readable_internal_func_name(
104 |             Configuration.get_func_index_to_func_name(),
105 |             callee_func_name)
106 |         if Configuration.get_dsl_flag() and readable_callee_func_name.startswith("checker"):
107 |             # if it is a instrumented function
108 |             idx = int(readable_callee_func_name.split('$')[1])
109 |             """
110 |             if idx == -1:
111 |                 arg = _extract_params(param_str, state)[0]
112 |                 state.solver.add(arg > 0);
113 |             elif idx == -2:
114 |                 arg = _extract_params(param_str, state)[0]
115 |                 state.solver.add(arg > 0);
116 |             elif idx == 3:
117 |                 lvar['prior'] = abs(20 - lvar['rounds_i']) - 20
118 |             elif idx == 4:
119 |                 lvar['prior'] = abs(3 - lvar['rounds_j'])
120 |             """
121 |             states = [state]
122 |         elif Configuration.get_source_type() == 'c' and is_modeled(readable_callee_func_name, specify_lang='c'):
123 |             func = CPredefinedFunction(
124 |                 readable_callee_func_name, state.current_func_name)
125 |             states = log_in_out(
126 |                 readable_callee_func_name, "C Library")(
127 |                 func.emul)(
128 |                 state, param_str, return_str, data_section, analyzer)
129 |         elif Configuration.get_source_type() == 'go' and is_modeled(readable_callee_func_name, specify_lang='go'):
130 |             # TODO Go library func modeling is not tested
131 |             func = GoPredefinedFunction(
132 |                 readable_callee_func_name, state.current_func_name)
133 |             states = log_in_out(
134 |                 readable_callee_func_name, "Go Library")(
135 |                 func.emul)(
136 |                 state, param_str, return_str, data_section, analyzer)
137 |         elif Configuration.get_source_type() == 'rust' and is_modeled(readable_callee_func_name, specify_lang='rust'):
138 |             # TODO may model some rust library funcs
139 |             pass
140 |         # if the callee is imported (WASI)
141 |         elif is_modeled(readable_callee_func_name, specify_lang='wasi'):
142 |             func = WASIImportFunction(
143 |                 readable_callee_func_name, state.current_func_name)
144 |             states = log_in_out(
145 |                 readable_callee_func_name, "import")(
146 |                 func.emul)(
147 |                 state, param_str, return_str, data_section)
148 |         elif readable_callee_func_name in TERMINATED_FUNCS:
149 |             logging.info(f"Termination: {readable_callee_func_name}")
150 |             raise ProcFailTermination(ASSERT_FAIL)
151 |         else:
152 |             self.store_context(param_str, return_str, state,
153 |                                readable_callee_func_name)
154 |             states = [state]
155 |         return states
156 | 
157 |     def emulate(self, state, data_section, analyzer, lvar):
158 |         if self.instr_name in self.skip_command:
159 |             return [state]
160 |         if self.instr_name in self.term_command:
161 |             return [state]
162 | 
163 |         if self.instr_name == 'nop':
164 |             if state.instr.xref:
165 |                 self.restore_context(state)
166 |             return [state]
167 |         elif self.instr_name == 'br_if' or self.instr_name == 'if':
168 |             op = state.symbolic_stack.pop()
169 |             assert is_bv(op) or is_bool(
170 |                 op), f"the type of op popped from stack in `br_if`/`if` is {type(op)} instead of bv or bool"
171 |             states = []
172 |             if is_bv(op):
173 |                 op = simplify(op != 0)
174 | 
175 |             # | op      | branch              |
176 |             # | ------- | ------------------- |
177 |             # | False   | conditional_false_0 |
178 |             # | True    | conditional_true_0  |
179 |             # | BoolRef | both                |
180 | 
181 |             if is_true(op):
182 |                 state.edge_type = 'conditional_true_0'
183 |                 states.append(state)
184 |             elif is_false(op):
185 |                 state.edge_type = 'conditional_false_0'
186 |                 states.append(state)
187 |             elif not is_true(op) and not is_false(op):
188 |                 # these two flags are used to jump over unnecessary deepcopy
189 |                 no_need_true, no_need_false = False, False
190 |                 if unsat == one_time_query_cache(state.solver, op):
191 |                     no_need_true = True
192 |                 if unsat == one_time_query_cache(state.solver, Not(op)):
193 |                     no_need_false = True
194 | 
195 |                 if no_need_true and no_need_false:
196 |                     pass
197 |                 elif not no_need_true and not no_need_false:
198 |                     new_state = copy.deepcopy(state)
199 |                     # conditional_true
200 |                     state.edge_type = 'conditional_true_0'
201 |                     state.solver.add(op)
202 |                     # conditional_false
203 |                     new_state.edge_type = 'conditional_false_0'
204 |                     new_state.solver.add(Not(op))
205 |                     # append
206 |                     states.append(state)
207 |                     states.append(new_state)
208 |                 else:
209 |                     if no_need_true:
210 |                         state.edge_type = 'conditional_false_0'
211 |                         state.solver.add(Not(op))
212 |                         states.append(state)
213 |                     else:
214 |                         state.edge_type = 'conditional_true_0'
215 |                         state.solver.add(op)
216 |                         states.append(state)
217 |             else:
218 |                 exit(f"br_if/if instruction error. op is {op}")
219 | 
220 |             return states
221 |         elif self.instr_name == 'call_indirect':
222 |             # refer to: https://developer.mozilla.org/en-US/docs/WebAssembly/Understanding_the_text_format#webassembly_tables
223 |             # this instruction will pop an element out of the stack, and use this as an index in the table, i.e., elem section in Wasm module, to dynamically determine which fucntion will be invoked
224 |             elem_index_to_func = Configuration.get_elem_index_to_func()
225 | 
226 |             # target function index
227 |             op = state.symbolic_stack.pop()
228 |             assert is_bv_value(
229 |                 op), f"in call_indirect, op is a symbol ({op}), not support yet"
230 |             op = op.as_long()
231 | 
232 |             offset = analyzer.elements[0]['offset']
233 | 
234 |             callee_func_name = elem_index_to_func[op - offset]
235 |             callee_func_offset = -1
236 |             for func_offset, item in enumerate(analyzer.func_prototypes):
237 |                 if callee_func_name == readable_internal_func_name(
238 |                         Configuration.get_func_index_to_func_name(),
239 |                         item[0]):
240 |                     state.call_indirect_callee = callee_func_name
241 |                     callee_func_offset = func_offset
242 |                     break
243 | 
244 |             if callee_func_offset == -1:
245 |                 exit("no valid callee in call_indirect")
246 |             else:
247 |                 return self.deal_with_call(
248 |                     state, callee_func_offset, data_section, analyzer, lvar)
249 |         elif self.instr_name == 'br_table':
250 |             # state.instr.xref indicates the destination instruction's offset
251 |             # TODO examine br_table
252 |             op = state.symbolic_stack.pop()
253 | 
254 |             # operands of br_table instruction
255 |             ops = [i for i in self.instr_operand]
256 |             n_br, br_lis = ops[0], ops[1:-1]
257 | 
258 |             # construct a dict to minimize the possible states
259 |             target_branch2index = defaultdict(list)
260 |             for index, target in enumerate(br_lis):
261 |                 target_branch2index[target].append(index)
262 | 
263 |             # construct possible state
264 |             states = []
265 |             for target, index_list in target_branch2index.items():
266 |                 index_list = [simplify(op == i) for i in index_list]
267 |                 cond = simplify(Or(index_list))
268 |                 if is_false(cond):
269 |                     continue
270 |                 elif is_true(cond):
271 |                     # we can omit the "True" apppended into the constraint
272 |                     new_state = copy.deepcopy(state)
273 |                     new_state.edge_type = f"conditional_true_{target}"
274 |                     states.append(new_state)
275 |                 else:
276 |                     # we have to query z3
277 |                     new_state = copy.deepcopy(state)
278 |                     new_state.solver.add(cond)
279 |                     new_state.edge_type = f"conditional_true_{target}"
280 |                     states.append(new_state)
281 | 
282 |             # determine if we need the default branch
283 |             cond = simplify(Or(op >= n_br, op < 0))
284 |             if is_false(cond):
285 |                 # we don't need it
286 |                 pass
287 |             elif is_true(cond):
288 |                 state.edge_type = "conditional_false_0"
289 |                 states.append(state)
290 |             else:
291 |                 state.solver.add(cond)
292 |                 state.edge_type = "conditional_false_0"
293 |                 states.append(state)
294 | 
295 |             assert len(states) != 0, f"in br_table, no branch is selected"
296 |             return states
297 |         elif self.instr_name == 'call':
298 |             self.instr_operand = self.instr_string.split(' ')[1]
299 |             # get the callee's function signature
300 |             try:
301 |                 f_offset = int(self.instr_operand)
302 |             except ValueError:
303 |                 # it's possible that the `call` operand is a hex
304 |                 f_offset = int(self.instr_operand, 16)
305 |             return self.deal_with_call(
306 |                 state, f_offset, data_section, analyzer, lvar)
307 |         else:
308 |             raise UnsupportInstructionError
309 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/ConversionInstructions.py:
--------------------------------------------------------------------------------
  1 | # emulate the conversion related instructions
  2 | 
  3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
  4 | from z3 import (RNE, RTZ, BitVecSort, BitVecVal, Extract, Float32, Float64,
  5 |                 SignExt, ZeroExt, fpBVToFP, fpFPToFP, fpSignedToFP, fpToIEEEBV,
  6 |                 fpToSBV, fpToUBV, fpUnsignedToFP, simplify)
  7 | 
  8 | 
  9 | class ConversionInstructions:
 10 |     def __init__(self, instr_name, instr_operand, _):
 11 |         self.instr_name = instr_name
 12 |         self.instr_operand = instr_operand
 13 | 
 14 |     def emulate(self, state):
 15 |         arg0 = state.symbolic_stack.pop()
 16 | 
 17 |         if self.instr_name == 'i32.wrap/i64':
 18 |             assert arg0.size() == 64, 'i32.wrap/i64 has wrong arg type'
 19 |             divisor = BitVecVal(2 ** 32, 64)
 20 |             # mod
 21 |             result = simplify(Extract(31, 0, arg0 % divisor))
 22 |         elif self.instr_name == 'i64.extend_s/i32':
 23 |             assert arg0.size() == 32, 'i64.extend_s/i32 has wrong arg type'
 24 | 
 25 |             result = simplify(SignExt(32, arg0))
 26 |         elif self.instr_name == 'i64.extend_u/i32':
 27 |             assert arg0.size() == 32, 'i64.extend_u/i32 has wrong arg type'
 28 | 
 29 |             result = simplify(ZeroExt(32, arg0))
 30 |         elif self.instr_name == 'i32.trunc_s/f32':
 31 |             assert arg0.ebits() == 8, 'i32.trunc_s/f32 has wrong arg type'
 32 |             assert arg0.sbits() == 24, 'i32.trunc_s/f32 has wrong arg type'
 33 | 
 34 |             rm = RTZ()
 35 |             result = simplify(fpToSBV(rm, arg0, BitVecSort(32)))
 36 |             assert result.size() == 32, 'i32.trunc_s/f32 convert fail'
 37 |         elif self.instr_name == 'i32.trunc_s/f64':
 38 |             assert arg0.ebits() == 11, 'i32.trunc_s/f64 has wrong arg type'
 39 |             assert arg0.sbits() == 53, 'i32.trunc_s/f64 has wrong arg type'
 40 | 
 41 |             rm = RTZ()
 42 |             result = simplify(fpToSBV(rm, arg0, BitVecSort(32)))
 43 |             assert result.size() == 32, 'i32.trunc_s/f64 convert fail'
 44 |         elif self.instr_name == 'i64.trunc_s/f32':
 45 |             assert arg0.ebits() == 8, 'i64.trunc_s/f32 has wrong arg type'
 46 |             assert arg0.sbits() == 24, 'i64.trunc_s/f32 has wrong arg type'
 47 | 
 48 |             rm = RTZ()
 49 |             result = simplify(fpToSBV(rm, arg0, BitVecSort(64)))
 50 |             assert result.size() == 64, 'i64.trunc_s/f32 convert fail'
 51 |         elif self.instr_name == 'i64.trunc_s/f64':
 52 |             assert arg0.ebits() == 11, 'i64.trunc_s/f64 has wrong arg type'
 53 |             assert arg0.sbits() == 53, 'i64.trunc_s/f64 has wrong arg type'
 54 | 
 55 |             rm = RTZ()
 56 |             result = simplify(fpToSBV(rm, arg0, BitVecSort(64)))
 57 |             assert result.size() == 64, 'i64.trunc_s/f64 convert fail'
 58 |         elif self.instr_name == 'i32.trunc_u/f32':
 59 |             assert arg0.ebits() == 8, 'i32.trunc_u/f32 has wrong arg type'
 60 |             assert arg0.sbits() == 24, 'i32.trunc_u/f32 has wrong arg type'
 61 | 
 62 |             rm = RTZ()
 63 |             result = simplify(fpToUBV(rm, arg0, BitVecSort(32)))
 64 |             assert result.size() == 32, 'i32.trunc_u/f32 convert fail'
 65 |         elif self.instr_name == 'i32.trunc_u/f64':
 66 |             assert arg0.ebits() == 11, 'i32.trunc_u/f64 has wrong arg type'
 67 |             assert arg0.sbits() == 53, 'i32.trunc_u/f64 has wrong arg type'
 68 | 
 69 |             rm = RTZ()
 70 |             result = simplify(fpToUBV(rm, arg0, BitVecSort(32)))
 71 |             assert result.size() == 32, 'i32.trunc_u/f64 convert fail'
 72 |         elif self.instr_name == 'i64.trunc_u/f32':
 73 |             assert arg0.ebits() == 8, 'i64.trunc_u/f32 has wrong arg type'
 74 |             assert arg0.sbits() == 24, 'i64.trunc_u/f32 has wrong arg type'
 75 | 
 76 |             rm = RTZ()
 77 |             result = simplify(fpToUBV(rm, arg0, BitVecSort(64)))
 78 |             assert result.size() == 64, 'i64.trunc_u/f32 convert fail'
 79 |         elif self.instr_name == 'i64.trunc_u/f64':
 80 |             assert arg0.ebits() == 11, 'i64.trunc_u/f64 has wrong arg type'
 81 |             assert arg0.sbits() == 53, 'i64.trunc_u/f64 has wrong arg type'
 82 | 
 83 |             rm = RTZ()
 84 |             result = simplify(fpToUBV(rm, arg0, BitVecSort(64)))
 85 |             assert result.size() == 64, 'i64.trunc_u/f64 convert fail'
 86 |         elif self.instr_name == 'f32.demote/f64':
 87 |             assert arg0.ebits() == 11, 'f32.demote/f64 has wrong arg type'
 88 |             assert arg0.sbits() == 53, 'f32.demote/f64 has wrong arg type'
 89 | 
 90 |             rm = RNE()
 91 |             result = simplify(fpFPToFP(rm, arg0, Float32()))
 92 |             assert result.ebits() == 8, 'f32.demote/f64 conversion fail'
 93 |             assert result.sbits() == 24, 'f32.demote/f64 conversion fail'
 94 |         elif self.instr_name == 'f64.promote/f32':
 95 |             assert arg0.ebits() == 8, 'f64.promote/f32 has wrong arg type'
 96 |             assert arg0.sbits() == 24, 'f64.promote/f32 has wrong arg type'
 97 | 
 98 |             rm = RNE()
 99 |             result = simplify(fpFPToFP(rm, arg0, Float64()))
100 |             assert result.ebits() == 11, 'f64.promote/f32 conversion fail'
101 |             assert result.sbits() == 53, 'f64.promote/f32 conversion fail'
102 |         elif self.instr_name == 'f32.convert_s/i32':
103 |             assert arg0.size() == 32, 'f32.convert_s/i32 has wrong arg type'
104 | 
105 |             rm = RNE()
106 |             result = simplify(fpSignedToFP(rm, arg0, Float32()))
107 |             assert result.ebits() == 8, 'f32.convert_s/i32 conversion fail'
108 |             assert result.sbits() == 24, 'f32.convert_s/i32 conversion fail'
109 |         elif self.instr_name == 'f32.convert_s/i64':
110 |             assert arg0.size() == 64, 'f32.convert_s/i64 has wrong arg type'
111 | 
112 |             rm = RNE()
113 |             result = simplify(fpSignedToFP(rm, arg0, Float32()))
114 |             assert result.ebits() == 8, 'f32.convert_s/i64 conversion fail'
115 |             assert result.sbits() == 24, 'f32.convert_s/i64 conversion fail'
116 |         elif self.instr_name == 'f64.convert_s/i32':
117 |             assert arg0.size() == 32, 'f64.convert_s/i32 has wrong arg type'
118 | 
119 |             rm = RNE()
120 |             result = simplify(fpSignedToFP(rm, arg0, Float64()))
121 |             assert result.ebits() == 11, 'f64.convert_s/i32 conversion fail'
122 |             assert result.sbits() == 53, 'f64.convert_s/i32 conversion fail'
123 |         elif self.instr_name == 'f64.convert_s/i64':
124 |             assert arg0.size() == 64, 'f64.convert_s/i64 has wrong arg type'
125 | 
126 |             rm = RNE()
127 |             result = simplify(fpSignedToFP(rm, arg0, Float64()))
128 |             assert result.ebits() == 11, 'f64.convert_s/i64 conversion fail'
129 |             assert result.sbits() == 53, 'f64.convert_s/i64 conversion fail'
130 |         elif self.instr_name == 'f32.convert_u/i32':
131 |             assert arg0.size() == 32, 'f32.convert_u/i32 has wrong arg type'
132 | 
133 |             rm = RNE()
134 |             result = simplify(fpUnsignedToFP(rm, arg0, Float32()))
135 |             assert result.ebits() == 8, 'f32.convert_u/i32 conversion fail'
136 |             assert result.sbits() == 24, 'f32.convert_u/i32 conversion fail'
137 |         elif self.instr_name == 'f32.convert_u/i64':
138 |             assert arg0.size() == 64, 'f32.convert_u/i64 has wrong arg type'
139 | 
140 |             rm = RNE()
141 |             result = simplify(fpUnsignedToFP(rm, arg0, Float32()))
142 |             assert result.ebits() == 8, 'f32.convert_u/i64 conversion fail'
143 |             assert result.sbits() == 24, 'f32.convert_u/i64 conversion fail'
144 |         elif self.instr_name == 'f64.convert_u/i32':
145 |             assert arg0.size() == 32, 'f64.convert_u/i32 has wrong arg type'
146 | 
147 |             rm = RNE()
148 |             result = simplify(fpUnsignedToFP(rm, arg0, Float64()))
149 |             assert result.ebits() == 11, 'f64.convert_u/i32 conversion fail'
150 |             assert result.sbits() == 53, 'f64.convert_u/i32 conversion fail'
151 |         elif self.instr_name == 'f64.convert_u/i64':
152 |             assert arg0.size() == 64, 'f64.convert_u/i64 has wrong arg type'
153 | 
154 |             rm = RNE()
155 |             result = simplify(fpUnsignedToFP(rm, arg0, Float64()))
156 |             assert result.ebits() == 11, 'f64.convert_u/i64 conversion fail'
157 |             assert result.sbits() == 53, 'f64.convert_u/i64 conversion fail'
158 |         elif self.instr_name == 'i32.reinterpret/f32':
159 |             assert arg0.ebits() == 8, 'i32.reinterpret/f32 has wrong arg type'
160 |             assert arg0.sbits() == 24, 'i32.reinterpret/f32 has wrong arg type'
161 | 
162 |             result = simplify(fpToIEEEBV(arg0))
163 |             assert result.size() == 32, 'i32.reinterpret/f32 conversion fail'
164 |         elif self.instr_name == 'i64.reinterpret/f64':
165 |             assert arg0.ebits() == 11, 'i64.reinterpret/f64 has wrong arg type'
166 |             assert arg0.sbits() == 53, 'i64.reinterpret/f64 has wrong arg type'
167 | 
168 |             result = simplify(fpToIEEEBV(arg0))
169 |             assert result.size() == 64, 'i64.reinterpret/f64 conversion fail'
170 |         elif self.instr_name == 'f32.reinterpret/i32':
171 |             assert arg0.size() == 32, 'f32.reinterpret/i32 has wrong arg type'
172 | 
173 |             result = simplify(fpBVToFP(arg0, Float32()))
174 |             assert result.ebits() == 8, 'f32.reinterpret/i32 conversion fail'
175 |             assert result.sbits() == 24, 'f32.reinterpret/i32 conversion fail'
176 |         elif self.instr_name == 'f64.reinterpret/i64':
177 |             assert arg0.size() == 64, 'f64.reinterpret/i64 has wrong arg type'
178 | 
179 |             result = simplify(fpBVToFP(arg0, Float64()))
180 |             assert result.ebits() == 11, 'f64.reinterpret/i64 conversion fail'
181 |             assert result.sbits() == 53, 'f64.reinterpret/i64 conversion fail'
182 |         elif self.instr_name == 'i32.extend_s/i8':
183 |             assert arg0.size() == 8, 'i32.extend_s/i8 has wrong arg type'
184 | 
185 |             result = simplify(SignExt(24, arg0))
186 |         else:
187 |             print('\nErr:\nUnsupported instruction: %s\n' % self.instr_name)
188 |             raise UnsupportInstructionError
189 | 
190 |         state.symbolic_stack.append(result)
191 | 
192 |         return [state]
193 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/LogicalInstructions.py:
--------------------------------------------------------------------------------
  1 | # emulate the logical related instructions
  2 | 
  3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
  4 | from z3 import (UGE, UGT, ULE, ULT, BitVecVal, If, fpEQ, fpGEQ, fpGT, fpLEQ,
  5 |                 fpLT, fpNEQ, is_bv, is_false, is_true, simplify)
  6 | 
  7 | helper_map = {
  8 |     'i32': 32,
  9 |     'i64': 64,
 10 |     'f32': [8, 24],
 11 |     'f64': [11, 53]
 12 | }
 13 | 
 14 | 
 15 | class LogicalInstructions:
 16 |     def __init__(self, instr_name, instr_operand, _):
 17 |         self.instr_name = instr_name
 18 |         self.instr_operand = instr_operand
 19 | 
 20 |     # TODO overflow check in this function?
 21 |     def emulate(self, state):
 22 |         def do_emulate_logical_int_instruction(state):
 23 |             instr_type = self.instr_name[:3]
 24 |             if 'eqz' in self.instr_name:
 25 |                 arg0 = state.symbolic_stack.pop()
 26 | 
 27 |                 assert arg0.size(
 28 |                 ) == helper_map[instr_type], f"in `eqz` the argument popped size is {arg0.size()} instead of {helper_map[instr_type]}"
 29 | 
 30 |                 result = arg0 == 0
 31 |             else:
 32 |                 arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop()
 33 | 
 34 |                 assert is_bv(arg1) and is_bv(
 35 |                     arg2), f"in `logical` instruction, arg1 or arg2 type is wrong instead of BitVec"
 36 | 
 37 |                 if 'eq' in self.instr_name:
 38 |                     result = arg1 == arg2
 39 |                 elif 'ne' in self.instr_name:
 40 |                     result = arg1 != arg2
 41 |                 elif 'lt_s' in self.instr_name:
 42 |                     result = arg2 < arg1
 43 |                 elif 'lt_u' in self.instr_name:
 44 |                     result = ULT(arg2, arg1)
 45 |                 elif 'gt_s' in self.instr_name:
 46 |                     result = arg2 > arg1
 47 |                 elif 'gt_u' in self.instr_name:
 48 |                     result = UGT(arg2, arg1)
 49 |                 elif 'le_s' in self.instr_name:
 50 |                     result = arg2 <= arg1
 51 |                 elif 'le_u' in self.instr_name:
 52 |                     result = ULE(arg2, arg1)
 53 |                 elif 'ge_s' in self.instr_name:
 54 |                     result = arg2 >= arg1
 55 |                 elif 'ge_u' in self.instr_name:
 56 |                     result = UGE(arg2, arg1)
 57 |                 else:
 58 |                     raise UnsupportInstructionError
 59 | 
 60 |             # try to simplify result and insert 1 or 0 directly, instead of an ite statement
 61 |             result = simplify(result)
 62 |             if is_true(result):
 63 |                 state.symbolic_stack.append(BitVecVal(1, 32))
 64 |             elif is_false(result):
 65 |                 state.symbolic_stack.append(BitVecVal(0, 32))
 66 |             else:
 67 |                 state.symbolic_stack.append(
 68 |                     If(result, BitVecVal(1, 32), BitVecVal(0, 32)))
 69 | 
 70 |             return [state]
 71 | 
 72 |         def do_emulate_logical_float_instruction(state):
 73 |             arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop()
 74 |             instr_type = self.instr_name[:3]
 75 | 
 76 |             assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits(
 77 |             ) == helper_map[instr_type][1], 'emul_logical_f_instr arg1 type mismatch'
 78 |             assert arg2.ebits() == helper_map[instr_type][0] and arg2.sbits(
 79 |             ) == helper_map[instr_type][1], 'emul_logical_f_instr arg2 type mismatch'
 80 | 
 81 |             if 'eq' in self.instr_name:
 82 |                 result = fpEQ(arg1, arg2)
 83 |             elif 'ne' in self.instr_name:
 84 |                 result = fpNEQ(arg1, arg2)
 85 |             elif 'lt' in self.instr_name:
 86 |                 result = fpLT(arg2, arg1)
 87 |             elif 'le' in self.instr_name:
 88 |                 result = fpLEQ(arg2, arg1)
 89 |             elif 'gt' in self.instr_name:
 90 |                 result = fpGT(arg2, arg1)
 91 |             elif 'ge' in self.instr_name:
 92 |                 result = fpGEQ(arg2, arg1)
 93 |             else:
 94 |                 raise UnsupportInstructionError
 95 | 
 96 |             # try to simplify result and insert 1 or 0 directly, instead of an ite statement
 97 |             result = simplify(result)
 98 |             if is_true(result):
 99 |                 state.symbolic_stack.append(BitVecVal(1, 32))
100 |             elif is_false(result):
101 |                 state.symbolic_stack.append(BitVecVal(0, 32))
102 |             else:
103 |                 state.symbolic_stack.append(
104 |                     If(result, BitVecVal(1, 32), BitVecVal(0, 32)))
105 | 
106 |             return [state]
107 | 
108 |         op_type = self.instr_name[:1]
109 |         if op_type == 'i':
110 |             return do_emulate_logical_int_instruction(state)
111 |         else:
112 |             return do_emulate_logical_float_instruction(state)
113 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/MemoryInstructions.py:
--------------------------------------------------------------------------------
  1 | # emulate the memory related instructions
  2 | 
  3 | import re
  4 | 
  5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
  6 | from seewasm.arch.wasm.memory import (insert_symbolic_memory,
  7 |                                       lookup_symbolic_memory_data_section)
  8 | from seewasm.arch.wasm.utils import getConcreteBitVec
  9 | from z3 import (BitVecVal, Extract, Float32, Float64, SignExt, ZeroExt,
 10 |                 fpBVToFP, fpToIEEEBV, is_bv_value, simplify)
 11 | 
 12 | memory_count = 2
 13 | memory_step = 2
 14 | 
 15 | 
 16 | class MemoryInstructions:
 17 |     def __init__(self, instr_name, instr_operand, instr_string):
 18 |         self.instr_name = instr_name
 19 |         self.instr_operand = instr_operand
 20 |         self.instr_str = instr_string
 21 | 
 22 |     def emulate(self, state, data_section):
 23 |         global memory_count, memory_step
 24 |         if self.instr_name == 'current_memory':
 25 |             state.symbolic_stack.append(BitVecVal(memory_count, 32))
 26 |         elif self.instr_name == 'grow_memory':
 27 |             prev_size = memory_count
 28 |             memory_count += memory_step
 29 |             state.symbolic_stack.append(BitVecVal(prev_size, 32))
 30 |         elif self.instr_name == "memory.copy":
 31 |             # memory.copy
 32 |             # The instruction has the signature [i32 i32 i32] -> []. The parameters are, in order:
 33 |             # top-0: Number of bytes to copy
 34 |             # top-1: Source address to copy from
 35 |             # top-2: Destination address to copy to
 36 |             # example:
 37 |             #   ;; Copy data in default memory from [100, 125] to [50, 75]
 38 |             #   i32.const 50 ;; Destination address to copy to (top-2)
 39 |             #   i32.const 100 ;; Source address to copy from (top-1)
 40 |             #   i32.const 25 ;; Number of bytes to copy (top-0)
 41 |             #   memory.copy  ;; Copy memory
 42 |             len_v = state.symbolic_stack.pop().as_long()
 43 |             src_addr = state.symbolic_stack.pop().as_long()
 44 |             dest_addr = state.symbolic_stack.pop().as_long()
 45 |             # copy memory from src to dst
 46 |             vlis = [
 47 |                 lookup_symbolic_memory_data_section(
 48 |                     state.symbolic_memory, data_section, src_addr + i, 1)
 49 |                 for i in range(len_v)]
 50 |             for i, v in enumerate(vlis):
 51 |                 state.symbolic_memory = insert_symbolic_memory(
 52 |                     state.symbolic_memory, dest_addr + i, 1, v)
 53 |             print(f"memory.copy: src_addr={src_addr}, dest_addr={dest_addr}, len={len_v}")
 54 |         elif self.instr_name == "memory.fill":
 55 |             # memory.fill
 56 |             # The instruction has the signature [i32 i32 i32] -> []. The parameters are, in order:
 57 |             # top-0: The number of bytes to update
 58 |             # top-1: The value to set each byte to (must be < 256)
 59 |             # top-2: The pointer to the region to update
 60 |             # example:
 61 |             #   ;; Fill region at offset/range in default memory with 255
 62 |             #   i32.const 200 ;; The pointer to the region to update (top-2)
 63 |             #   i32.const 255 ;; The value to set each byte to (must be < 256) (top-1)
 64 |             #   i32.const 100 ;; The number of bytes to update (top-0)
 65 |             #   memory.fill ;; Fill default memory
 66 |             len_v = state.symbolic_stack.pop().as_long()
 67 |             val = state.symbolic_stack.pop().as_long()
 68 |             addr = state.symbolic_stack.pop().as_long()
 69 |             print(f"memory.fill: addr={addr}, val={val}, len={len_v}")
 70 |         elif 'load' in self.instr_name:
 71 |             load_instr(self.instr_str, state, data_section)
 72 |         elif 'store' in self.instr_name:
 73 |             store_instr(self.instr_str, state)
 74 |         else:
 75 |             print('\nErr:\nUnsupported instruction: %s\n' % self.instr_name)
 76 |             raise UnsupportInstructionError
 77 | 
 78 |         return [state]
 79 | 
 80 | 
 81 | def load_instr(instr, state, data_section):
 82 |     base = state.symbolic_stack.pop()
 83 |     # offset maybe int or hex
 84 |     try:
 85 |         offset = int(instr.split(' ')[2])
 86 |     except ValueError:
 87 |         offset = int(instr.split(' ')[2], 16)
 88 |     addr = simplify(base + offset)
 89 | 
 90 |     if is_bv_value(addr):
 91 |         addr = addr.as_long()
 92 | 
 93 |     # determine how many bytes should be loaded
 94 |     # the dict is like {'8': 1}
 95 |     bytes_length_mapping = {str(k): k // 8 for k in range(8, 65, 8)}
 96 |     instr_name = instr.split(' ')[0]
 97 |     if len(instr_name) == 8:
 98 |         load_length = bytes_length_mapping[instr_name[1:3]]
 99 |     else:
100 |         load_length = bytes_length_mapping[re.search(
101 |             r"load([0-9]+)\_", instr_name).group(1)]
102 | 
103 |     val = lookup_symbolic_memory_data_section(
104 |         state.symbolic_memory, data_section, addr, load_length)
105 | 
106 |     if val.size() != 8 * load_length:
107 |         # we assume the memory are filled by 0 initially
108 |         val = ZeroExt(8 * load_length - val.size(), val)
109 | 
110 |     if val is None:
111 |         exit(f"the loaded value should not be None")
112 |         # val = BitVec(f'load{load_length}*({addr})', 8*load_length)
113 | 
114 |     # cast to other type of bit vector
115 |     float_mapping = {
116 |         'f32': Float32,
117 |         'f64': Float64,
118 |     }
119 |     if len(instr_name) == 8 and instr_name[0] == "f":
120 |         val = simplify(fpBVToFP(val, float_mapping[instr_name[:3]]()))
121 |     elif instr_name[-2] == "_":
122 |         if instr_name[-1] == "s":  # sign extend
123 |             val = simplify(
124 |                 SignExt(int(instr_name[1: 3]) - load_length * 8, val))
125 |         else:
126 |             val = simplify(
127 |                 ZeroExt(int(instr_name[1: 3]) - load_length * 8, val))
128 | 
129 |     # if can not load from the memory area
130 |     if val is not None:
131 |         state.symbolic_stack.append(val)
132 |     else:
133 |         state.symbolic_stack.append(getConcreteBitVec(
134 |             instr_name[:3], f'load_{instr_name[:3]}*({str(addr)})'))
135 | 
136 | 
137 | # deal with store instruction
138 | def store_instr(instr, state):
139 |     # offset may be int or hex
140 |     try:
141 |         offset = int(instr.split(' ')[2])
142 |     except ValueError:
143 |         offset = int(instr.split(' ')[2], 16)
144 | 
145 |     val, base = state.symbolic_stack.pop(), state.symbolic_stack.pop()
146 |     addr = simplify(base + offset)
147 | 
148 |     # change addr's type to int if possible
149 |     # or it will be the BitVecRef
150 |     if is_bv_value(addr):
151 |         addr = addr.as_long()
152 | 
153 |     # determine how many bytes should be stored
154 |     # the dict is like {'8': 1}
155 |     bytes_length_mapping = {str(k): k // 8 for k in range(8, 65, 8)}
156 |     instr_name = instr.split(' ')[0]
157 |     if len(instr_name) == 9:
158 |         if instr_name[0] == 'f':
159 |             val = fpToIEEEBV(val)
160 |         state.symbolic_memory = insert_symbolic_memory(
161 |             state.symbolic_memory, addr, bytes_length_mapping[instr_name[1:3]], val)
162 |     else:
163 |         stored_length = bytes_length_mapping[re.search(
164 |             r"store([0-9]+)", instr_name).group(1)]
165 |         val = simplify(Extract(stored_length * 8 - 1, 0, val))
166 |         state.symbolic_memory = insert_symbolic_memory(
167 |             state.symbolic_memory, addr, stored_length, val)
168 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/ParametricInstructions.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | 
 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError
 4 | from seewasm.arch.wasm.utils import one_time_query_cache
 5 | from z3 import Not, is_bool, is_bv, is_false, is_true, simplify, unsat
 6 | 
 7 | 
 8 | class ParametricInstructions:
 9 |     def __init__(self, instr_name, instr_operand, _):
10 |         self.instr_name = instr_name
11 |         self.instr_operand = instr_operand
12 | 
13 |     def emulate(self, state):
14 |         if self.instr_name == 'drop':
15 |             state.symbolic_stack.pop()
16 |             return [state]
17 |         elif self.instr_name == 'select':  # select instruction
18 |             arg0, arg1, arg2 = state.symbolic_stack.pop(
19 |             ), state.symbolic_stack.pop(), state.symbolic_stack.pop()
20 |             assert is_bv(arg0) or is_bool(
21 |                 arg0), f"in select, arg0 type is {type(arg0)} instead of bv or bool"
22 |             # mimic the br_if
23 |             if is_bv(arg0):
24 |                 # NOTE: if arg0 is zero, return arg1, or arg2
25 |                 # ref: https://developer.mozilla.org/en-US/docs/WebAssembly/Reference/Control_flow/Select
26 |                 op = simplify(arg0 == 0)
27 | 
28 |             if is_true(op):
29 |                 state.symbolic_stack.append(arg1)
30 |                 return [state]
31 |             elif is_false(op):
32 |                 state.symbolic_stack.append(arg2)
33 |                 return [state]
34 |             elif not is_true(op) and not is_false(op):
35 |                 # these two flags are used to jump over unnecessary deepcopy
36 |                 no_need_true, no_need_false = False, False
37 |                 if unsat == one_time_query_cache(state.solver, op):
38 |                     no_need_true = True
39 |                 if unsat == one_time_query_cache(state.solver, Not(op)):
40 |                     no_need_false = True
41 | 
42 |                 if no_need_true and no_need_false:
43 |                     pass
44 |                 elif not no_need_true and not no_need_false:
45 |                     new_state = deepcopy(state)
46 | 
47 |                     state.solver.add(op)
48 |                     state.symbolic_stack.append(arg1)
49 | 
50 |                     new_state.solver.add(Not(op))
51 |                     new_state.symbolic_stack.append(arg2)
52 | 
53 |                     return [state, new_state]
54 |                 else:
55 |                     if no_need_true:
56 |                         state.solver.add(Not(op))
57 |                         state.symbolic_stack.append(arg2)
58 |                     else:
59 |                         state.solver.add(op)
60 |                         state.symbolic_stack.append(arg1)
61 |                     return [state]
62 |             else:
63 |                 exit(f"select instruction error. op is {op}")
64 |         else:
65 |             raise UnsupportInstructionError
66 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/VariableInstructions.py:
--------------------------------------------------------------------------------
 1 | # emulate the variable related instructions
 2 | 
 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError, UnsupportGlobalTypeError
 4 | from z3 import BitVecVal, is_bv, is_bv_value
 5 | 
 6 | 
 7 | class VariableInstructions:
 8 |     def __init__(self, instr_name, instr_operand, _):
 9 |         self.instr_name = instr_name
10 |         self.instr_operand = instr_operand
11 | 
12 |     def emulate(self, state):
13 |         # TODO
14 |         # for go_samples.nosync/tinygo_main.wasm, the global.get operand would be prefixed by four \x80
15 |         if self.instr_operand.startswith(b'\x80\x80\x80\x80'):
16 |             self.instr_operand = self.instr_operand[4:]
17 |         op = int.from_bytes(self.instr_operand, byteorder='little')
18 | 
19 |         if self.instr_name == 'get_local':
20 |             if state.local_var.get(op, None) is not None:
21 |                 state.symbolic_stack.append(state.local_var[op])
22 |             else:
23 |                 state.symbolic_stack.append(state.local_var[op])
24 |                 # raise UninitializedLocalVariableError
25 |         elif self.instr_name == 'set_local':
26 |             var = state.symbolic_stack.pop()
27 |             state.local_var[op] = var
28 |         elif self.instr_name == 'get_global':
29 |             global_index = op
30 |             global_operand = state.globals[global_index]
31 | 
32 |             if isinstance(
33 |                     global_operand, str) or isinstance(
34 |                     global_operand, int):
35 |                 state.symbolic_stack.append(BitVecVal(global_operand, 32))
36 |             elif is_bv(global_operand) or is_bv_value(global_operand):
37 |                 # the operand is a BitVecRef or BitVecNumRef
38 |                 state.symbolic_stack.append(global_operand)
39 |             else:
40 |                 raise UnsupportGlobalTypeError
41 |         elif self.instr_name == 'set_global':
42 |             global_operand = state.symbolic_stack.pop()
43 |             global_index = op
44 | 
45 |             state.globals[global_index] = global_operand
46 |         elif self.instr_name == 'tee_local':
47 |             var = state.symbolic_stack[-1]
48 |             state.local_var[op] = var
49 |         else:
50 |             raise UnsupportInstructionError
51 |         return [state]
52 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/instructions/__init__.py:
--------------------------------------------------------------------------------
 1 | from .ArithmeticInstructions import *
 2 | from .BitwiseInstructions import *
 3 | from .ConstantInstructions import *
 4 | from .ControlInstructions import *
 5 | from .ConversionInstructions import *
 6 | from .LogicalInstructions import *
 7 | from .MemoryInstructions import *
 8 | from .ParametricInstructions import *
 9 | from .VariableInstructions import *
10 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/lib/utils.py:
--------------------------------------------------------------------------------
 1 | # this is the helper function which are only used in lib folder
 2 | 
 3 | from z3 import BitVecVal, is_bv, is_bv_value
 4 | 
 5 | from seewasm.arch.wasm.configuration import Configuration
 6 | from seewasm.arch.wasm.memory import (insert_symbolic_memory,
 7 |                                       lookup_symbolic_memory_data_section)
 8 | 
 9 | MODELED_FUNCS = {
10 |     'c':
11 |     {'__small_printf', 'abs', 'atof', 'atoi', 'exp', 'getchar',
12 |      'iprintf', 'printf', 'putchar', 'puts', 'scanf', 'swap',
13 |      'system', 'emscripten_resize_heap', 'fopen', 'vfprintf',
14 |      'open', 'exit', 'setlocale', 'hard_locale'},
15 |     'go': {'fmt.Scanf', 'fmt.Printf', 'runtime.divideByZeroPanic', 'runtime.lookupPanic', 'runtime.nilPanic'
16 |       'runtime.slicePanic', 'runtime.sliceToArrayPointerPanic', 'runtime.unsafeSlicePanic', 'runtime.chanMakePanic',
17 |       'runtime.negativeShiftPanic', 'runtime.blockingPanic', 'runtime.calculateHeapAddresses', 'memset', 'runtime.alloc', 'memcpy',
18 |       'syscall/js.valueGet', 'runtime.putchar'},
19 |     'rust': {},
20 |     'wasi':
21 |     {'args_sizes_get', 'args_get', 'environ_sizes_get',
22 |      'fd_advise', 'fd_fdstat_get', 'fd_tell', 'fd_seek',
23 |                   'fd_close', 'fd_read', 'fd_write', 'proc_exit',
24 |                   'fd_prestat_get', 'fd_prestat_dir_name', 'path_open'}, }
25 | 
26 | 
27 | def is_modeled(func_name, specify_lang=None):
28 |     if specify_lang:
29 |         return func_name in MODELED_FUNCS[specify_lang]
30 |     else:
31 |         return func_name in MODELED_FUNCS['wasi'] or func_name in MODELED_FUNCS[Configuration.get_source_type()]
32 | 
33 | 
34 | def _extract_params(param_str, state):
35 |     """
36 |     Return a list of elements, which are the arguments of the given import function.
37 |     Note that, the order will be reversed.
38 |     For example, if the signature of function foo is: foo (a, b), the returned arguments will be [b, a]
39 |     """
40 |     param_cnt = len(param_str.split(" "))
41 |     params = []
42 |     for _ in range(param_cnt):
43 |         params.append(state.symbolic_stack.pop())
44 | 
45 |     # concretize
46 |     params_result = []
47 |     for i in params:
48 |         if is_bv_value(i):
49 |             params_result.append(i.as_long())
50 |         else:
51 |             params_result.append(i)
52 | 
53 |     return params_result
54 | 
55 | 
56 | def _storeN(state, dest, val, len_in_bytes):
57 |     if not is_bv(val):
58 |         state.symbolic_memory = insert_symbolic_memory(
59 |             state.symbolic_memory, dest, len_in_bytes,
60 |             BitVecVal(val, len_in_bytes * 8))
61 |     else:
62 |         state.symbolic_memory = insert_symbolic_memory(
63 |             state.symbolic_memory, dest, len_in_bytes, val)
64 | 
65 | 
66 | def _loadN(state, data_section, dest, len_in_bytes):
67 |     val = lookup_symbolic_memory_data_section(
68 |         state.symbolic_memory, data_section, dest, len_in_bytes)
69 |     if is_bv_value(val):
70 |         val = val.as_long()
71 |     return val
72 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/solver.py:
--------------------------------------------------------------------------------
 1 | from z3 import Solver
 2 | 
 3 | # from lab_solver import *
 4 | 
 5 | 
 6 | class SMTSolver:
 7 |     def __new__(cls, designated_solver):
 8 |         if designated_solver == 'z3':
 9 |             return Solver()
10 |         else:
11 |             raise Exception("No SMT backend found")
12 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/utils.py:
--------------------------------------------------------------------------------
  1 | # This file gives some practical functions that will be adopted by other files
  2 | 
  3 | import json
  4 | import logging
  5 | import re
  6 | import struct
  7 | from codecs import decode
  8 | from datetime import datetime
  9 | from os import makedirs, path
 10 | from random import random
 11 | 
 12 | from seewasm.arch.wasm.configuration import Configuration
 13 | from seewasm.arch.wasm.exceptions import (NO_EXIT, INVALIDMEMORY, ProcFailTermination,
 14 |                                           UnsupportZ3TypeError)
 15 | from seewasm.arch.wasm.solver import SMTSolver
 16 | from z3 import (FP, BitVec, BitVecRef, Float32, Float64, is_bv, is_bv_value,
 17 |                 sat, unsat)
 18 | 
 19 | # this is the opened files base addr
 20 | FILE_BASE_ADDR = 100000000
 21 | 
 22 | 
 23 | # this is a mapping, which maps the data type to the corresponding BitVec
 24 | def getConcreteBitVec(type, name):
 25 |     if type == 'i32':
 26 |         return BitVec(name, 32)
 27 |     elif type == 'i64':
 28 |         return BitVec(name, 64)
 29 |     elif type == 'f32':
 30 |         return FP(name, Float32())
 31 |     elif type == 'f64':
 32 |         return FP(name, Float64())
 33 |     else:
 34 |         raise UnsupportZ3TypeError
 35 | 
 36 | 
 37 | def readable_internal_func_name(func_index_to_func_name, internal_func_name):
 38 |     """
 39 |     Convert the internal name to a more readable one with the help of func_index_to_func_name
 40 |     """
 41 |     if func_index_to_func_name is None:
 42 |         return internal_func_name
 43 | 
 44 |     if not internal_func_name.startswith('$'):
 45 |         return internal_func_name
 46 | 
 47 |     readable_name = None
 48 |     try:
 49 |         readable_name = func_index_to_func_name[int(
 50 |             re.search('(\d+)', internal_func_name).group())]
 51 |     except (AttributeError, KeyError) as _:
 52 |         # if the internal_function_name is the readable name already
 53 |         readable_name = internal_func_name
 54 |     assert readable_name is not None, f"the internal funciton {internal_func_name} cannot find its corresponding readable name"
 55 |     return readable_name
 56 | 
 57 | 
 58 | def bin_to_float(b):
 59 |     """ Convert binary string to a float. """
 60 |     bf = int_to_bytes(int(b, 2), 8)  # 8 bytes needed for IEEE 754 binary64.
 61 |     return struct.unpack('>d', bf)[0]
 62 | 
 63 | 
 64 | def int_to_bytes(n, length):  # Helper function
 65 |     """ Int/long to byte string.
 66 | 
 67 |         Python 3.2+ has a built-in int.to_bytes() method that could be used
 68 |         instead, but the following works in earlier versions including 2.x.
 69 |     """
 70 |     return decode('%%0%dx' % (length << 1) % n, 'hex')[-length:]
 71 | 
 72 | 
 73 | # the patterns used in C printf, and their corresponding length of to be loaded memory
 74 | C_TYPE_TO_LENGTH = {'s': 4, 'c': 4, 'd': 4, 'u': 4, 'x': 4, 'f': 8}
 75 | 
 76 | 
 77 | def calc_memory_align(parsed_pattern):
 78 |     """
 79 |     Used for calculate memory align in printf
 80 |     """
 81 |     offset = []
 82 |     for i, item in enumerate(parsed_pattern):
 83 |         cur_type = item[-1][-1]
 84 |         offset.append(C_TYPE_TO_LENGTH[cur_type])
 85 | 
 86 |         # decide if we should align the memory
 87 |         if cur_type == 'f':
 88 |             previous_sum = sum(offset[:i])
 89 |             if previous_sum % 8 != 0:
 90 |                 offset[i - 1] += 4
 91 | 
 92 |     return offset
 93 | 
 94 | 
 95 | def parse_printf_formatting(lines):
 96 |     cfmt = '''\
 97 | (                                  # start of capture group 1
 98 | %                                  # literal "%"
 99 | (?:                                # first option
100 | (?:[-+0 #]{0,5})                   # optional flags
101 | (?:\d+|\*)?                        # width
102 | (?:\.(?:\d+|\*))?                  # precision
103 | (?:h|l|ll|w|I|I32|I64)?            # size
104 | [cCdiouxXeEfgGaAnpsSZ]             # type
105 | ) |                                # OR
106 | %%)                                # literal "%%"
107 | '''
108 | 
109 |     # tuple list, in which each element consisting of line number, begin position and pattern
110 |     result = []
111 |     for line_num, line in enumerate(lines.splitlines()):
112 |         for m in re.finditer(cfmt, line, flags=re.X):
113 |             result.append([line_num, m.start(1), m.group(1)])
114 |     return result
115 | 
116 | 
117 | def _extract_outermost_int(num):
118 |     """
119 |     This function is used to extract the outermost int for a symbol.
120 |     For example, if num is: a + 87, the function will return 87.
121 |     If num is: a + b, the function will return None.
122 |     """
123 |     the_int = None
124 |     if is_bv(num):
125 |         for i in range(num.num_args()):
126 |             if is_bv_value(num.arg(i)):
127 |                 the_int = num.arg(i).as_long()
128 |                 break
129 |     elif isinstance(num, int):
130 |         the_int = num
131 |     else:
132 |         exit(f"the type of num is {type(num)}, cannot extract the int args")
133 | 
134 |     return the_int
135 | 
136 | 
137 | def str_to_little_endian_int(string):
138 |     """
139 |     Convert the given string to an integer, little endian
140 |     For example, "abc" is 6513249
141 |     """
142 |     return int.from_bytes(str.encode(string), "little")
143 | 
144 | 
145 | def write_result(state, exit_code=NO_EXIT):
146 |     """
147 |     Write result in ./output/result folder in json format
148 |     """
149 |     # if the checker is unsat
150 |     if unsat == state.solver.check():
151 |         return
152 | 
153 |     file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/state_{datetime.timestamp(datetime.now()):.3f}_{random():.5f}.json"
154 |     makedirs(path.dirname(file_name), exist_ok=True)
155 |     state_result = {}
156 |     with open(file_name, 'w') as fp:
157 |         if exit_code != NO_EXIT:
158 |             if int(exit_code.value) >= 0:
159 |                 state_result["Status"] = f"Exit with status code {exit_code}"
160 |             else:
161 |                 # constructed exit_code
162 |                 state_result["Status"] = f"Exit"
163 |         else:
164 |             # return value
165 |             # get_entry_signature() returns a tuple (name, params, return, type)
166 |             if Configuration.get_entry_signature()[2]:
167 |                 state_result["Return"] = str(state.symbolic_stack[-1])
168 |             else:
169 |                 # default return value
170 |                 state_result["Return"] = "0"
171 | 
172 |         # solution of constraints
173 |         state_result["Solution"] = {}
174 |         m = state.solver.model()
175 |         # this check if there exist symbols with same name
176 |         # which may lead to the result overwriting
177 |         if len(set([k for k in m])) != len(m):
178 |             logging.warning(
179 |                 f"the solving process found there exist symbols with identical name, please double check. ({[k for k in m]})")
180 |         for k in m:
181 |             # the decode is weird, we just want to convert unprintable characters
182 |             # into printable chars
183 |             # ref: https://stackoverflow.com/questions/13837848/converting-byte-string-in-unicode-string
184 |             solution_hex_str = hex(m[k].as_long())[2:]
185 |             if len(solution_hex_str) % 2 == 1:
186 |                 solution_hex_str = "0" + solution_hex_str
187 |             solution = []
188 |             for i in range(0, len(solution_hex_str), 2):
189 |                 solution.append(chr(int(solution_hex_str[i: i + 2], 16)))
190 |             state_result["Solution"][str(k)] = "".join(solution[::-1])
191 | 
192 |         candidate_fds = []
193 |         # filter out all output buffer
194 |         for fd, file_info in state.file_sys.items():
195 |             if "w" in file_info["flag"]:
196 |                 if isinstance(fd, int) or fd[0] == "-":
197 |                     candidate_fds.append(fd)
198 | 
199 |         state_result["Output"] = []
200 |         # stdout and stderr buffer
201 |         for fd in candidate_fds:
202 |             assert all(isinstance(x, (int, BitVecRef))
203 |                        for x in state.file_sys[fd]["content"]), f"buffer is: {state.file_sys[fd]['content']}, not all int and bitvec"
204 |             tmp_dict = {"name": None, "output": None}
205 |             # output_buffer = []
206 |             output_solve_buffer = []
207 |             for el in state.file_sys[fd]["content"]:
208 |                 if isinstance(el, int):
209 |                     # output_buffer.append(chr(el).encode())
210 |                     output_solve_buffer.append(chr(el))
211 |                 elif isinstance(el, BitVecRef):
212 |                     assert el.size() == 8, f"{el} size is not 8"
213 |                     # output_buffer.append(str(el).encode())
214 |                     # if can solve a concrete number
215 |                     solve_char = m.evaluate(el)
216 |                     if is_bv_value(solve_char):
217 |                         output_solve_buffer.append(
218 |                             chr(solve_char.as_long()))
219 |                     elif is_bv(solve_char):
220 |                         output_solve_buffer.append("`@`")
221 |                     else:
222 |                         exit(
223 |                             f"result of solving {el} is {solve_char} and type is {type(solve_char)}")
224 | 
225 |             tmp_dict["name"] = state.file_sys[fd]["name"]
226 |             # tmp_dict["output"] = f'{b"".join(output_buffer)}'
227 |             tmp_dict["output"] = "".join(output_solve_buffer)
228 |             state_result["Output"].append(tmp_dict)
229 | 
230 |         json.dump(state_result, fp, indent=4)
231 | 
232 | 
233 | def init_file_for_file_sys():
234 |     """
235 |     The item for file_sys of state should be initialized here.
236 |     """
237 |     return {"name": "", "status": False, "flag": "", "content": []}
238 | 
239 | 
240 | def log_in_out(func_name, directory):
241 |     """
242 |     A decorator to log before entering and after exiting call emulation
243 |     """
244 |     def decorator(f):
245 |         def wrapper(*args, **kw):
246 |             logging.info(f"Call: {func_name} ({directory})")
247 |             states = f(*args, **kw)
248 |             logging.info(f"Return: {func_name} ({directory})")
249 |             return states
250 |         return wrapper
251 |     return decorator
252 | 
253 | 
254 | def query_cache(solver):
255 |     """
256 |     Check is assertions in solver are cached.
257 |     If they are, return directly, or update the cache and return
258 |     """
259 |     cons_hash_set = {hash(c) for c in solver.assertions()}
260 |     cons_hash_list = list(cons_hash_set)
261 |     cons_hash_list.sort()
262 |     cons_hash_tuple = tuple(cons_hash_list)
263 | 
264 |     if cons_hash_tuple not in Configuration._z3_cache_dict:
265 |         solver_check_result = solver.check()
266 | 
267 |         # try to terminate invalid-memory in advance
268 |         if solver_check_result == sat:
269 |             m = solver.model()
270 |             for k in m:
271 |                 if str(k) == 'invalid-memory':
272 |                     Configuration._z3_cache_dict[cons_hash_tuple] = unsat
273 |                     raise ProcFailTermination(INVALIDMEMORY)
274 | 
275 |         Configuration._z3_cache_dict[cons_hash_tuple] = solver_check_result
276 |     else:
277 |         solver_check_result = Configuration._z3_cache_dict[cons_hash_tuple]
278 | 
279 |     return solver_check_result
280 | 
281 | 
282 | def one_time_query_cache(solver, con):
283 |     """
284 |     the *args are received constraints, they will not be inserted into the solver.
285 |     It is an one-time query
286 |     """
287 |     solver.push()
288 |     solver.add(con)
289 |     solver_check_result = query_cache(solver)
290 |     solver.pop()
291 | 
292 |     return solver_check_result
293 | 
294 | 
295 | def one_time_query_cache_without_solver(con):
296 |     cons_hash_set = set([hash(c) for c in [con]])
297 |     cons_hash_list = list(cons_hash_set)
298 |     cons_hash_list.sort()
299 |     cons_hash_tuple = tuple(cons_hash_list)
300 |     if cons_hash_tuple not in Configuration._z3_cache_dict:
301 |         s = SMTSolver(Configuration.get_solver())
302 |         s.add(con)
303 |         solver_check_result = s.check()
304 |         Configuration._z3_cache_dict[cons_hash_tuple] = solver_check_result
305 |     else:
306 |         solver_check_result = Configuration._z3_cache_dict[cons_hash_tuple]
307 | 
308 |     return solver_check_result
309 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/visualizator.py:
--------------------------------------------------------------------------------
 1 | from seewasm.arch.wasm.configuration import Configuration
 2 | from graphviz import Digraph
 3 | 
 4 | 
 5 | def visualize(Graph, filename="wasm_ICFG.gv"):
 6 |     entry_func = Configuration.get_entry()
 7 |     entry_func_index_name = Graph.wasmVM.get_signature(entry_func)[0]
 8 |     entry_bb = Graph.func_to_bbs[entry_func_index_name][0]
 9 |     assert entry_bb.endswith('_0'), f"entry_bb ({entry_bb}) not ends with 0"
10 | 
11 |     g = Digraph(filename, filename=filename)
12 |     g.attr(rankdir="TB")
13 | 
14 |     # construct a set consisting of edges (nodeA, nodeB, edge_type)
15 |     visited = set()
16 |     edges_set = set()
17 |     stack = list()
18 |     stack.append(entry_bb)
19 |     while stack:
20 |         bb = stack.pop()
21 |         visited.add(bb)
22 |         if bb in Graph.bbs_graph:
23 |             for edge_type, succ_bb in Graph.bbs_graph[bb].items():
24 |                 if succ_bb not in visited:
25 |                     edges_set.add((bb, succ_bb, edge_type))
26 |                     stack.append(succ_bb)
27 |                 elif (bb, succ_bb, edge_type) not in edges_set:
28 |                     edges_set.add((bb, succ_bb, edge_type))
29 | 
30 |     with g.subgraph(name='global') as c:
31 |         # construct the graph
32 |         for edge in edges_set:
33 |             node_from, node_to, _ = edge
34 |             c.node(node_from)
35 |             c.node(node_to)
36 |             c.edge(node_from, node_to)
37 | 
38 |     print("Rendering disabled on the server.")
39 |     g.render(filename, view=False)
40 | 


--------------------------------------------------------------------------------
/seewasm/arch/wasm/vmstate.py:
--------------------------------------------------------------------------------
 1 | # This file defines the `state` that will be passed within Wasm-SE
 2 | from collections import defaultdict
 3 | 
 4 | from seewasm.arch.wasm.configuration import Configuration
 5 | from seewasm.arch.wasm.solver import SMTSolver
 6 | from seewasm.arch.wasm.utils import (init_file_for_file_sys,
 7 |                                      readable_internal_func_name)
 8 | from seewasm.engine.engine import VMstate
 9 | from z3 import BitVecVal
10 | 
11 | 
12 | class WasmVMstate(VMstate):
13 |     def __init__(self):
14 |         # data structure:
15 |         def local_default():
16 |             return BitVecVal(0, 32)
17 |         self.symbolic_stack = []
18 |         self.symbolic_memory = {}
19 |         self.local_var = defaultdict(local_default)
20 |         self.globals = {}
21 |         # instruction
22 |         self.instr = "end"
23 |         # current function name
24 |         self.current_func_name = ''
25 |         # current basic block's name, used in recursive process
26 |         self.current_bb_name = ''
27 |         # keep the operator and its speculated sign
28 |         self.sign_mapping = defaultdict(bool)
29 |         # context stack
30 |         # whose element is 4-tuple: (func_name, stack, local, require_return)
31 |         # TODO files buffer may need to maintained in context
32 |         self.context_stack = []
33 | 
34 |         self.args = ""
35 | 
36 |         # all items should be initialized by init_file_for_file_sys in utils
37 |         self.file_sys = {}
38 |         for fd in range(0, 3):
39 |             self.file_sys[fd] = init_file_for_file_sys()
40 |         self.file_sys[0]["name"] = "stdin"
41 |         self.file_sys[0]["status"] = True
42 |         self.file_sys[0]["flag"] = "r"
43 |         self.file_sys[1]["name"] = "stdout"
44 |         self.file_sys[1]["status"] = True
45 |         self.file_sys[1]["flag"] = "w"
46 |         self.file_sys[2]["name"] = "stderr"
47 |         self.file_sys[2]["status"] = True
48 |         self.file_sys[2]["flag"] = "w"
49 | 
50 |         # used by br_if instruction
51 |         self.edge_type = ''
52 |         # the corresponding solver
53 |         self.solver = SMTSolver(Configuration.get_solver())
54 |         # the name of function that is called in call_indirect
55 |         self.call_indirect_callee = ''
56 | 
57 |     def __str__(self):
58 |         return f'''Current Func:\t{readable_internal_func_name(Configuration.get_func_index_to_func_name(), self.current_func_name)}
59 | Stack:\t\t{self.symbolic_stack}
60 | Local Var:\t{self.local_var}
61 | Global Var:\t{self.globals}
62 | Memory:\t\t{self.symbolic_memory}
63 | Constraints:\t{self.solver.assertions()}\n'''
64 | 
65 |     def details(self):
66 |         raise NotImplementedError
67 | 
68 |     def __lt__(self, other):
69 |         return False
70 | 
71 |     def __getstate__(self):
72 |         return self.__dict__.copy()
73 | 


--------------------------------------------------------------------------------
/seewasm/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/core/__init__.py


--------------------------------------------------------------------------------
/seewasm/core/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/core/__init__.pyc


--------------------------------------------------------------------------------
/seewasm/core/basicblock.py:
--------------------------------------------------------------------------------
 1 | class BasicBlock(object):
 2 |     """
 3 |     The basic block in the CFG, consisting of instructions
 4 |     """
 5 | 
 6 |     def __init__(self, start_offset=0x00, start_instr=None,
 7 |                  name='block_default_name'):
 8 |         """
 9 |         The properties of basic blocks
10 | 
11 |         Properties:
12 |             start_offset: the `offset` of the first instruction
13 |             start_instr: the first instruction of the current basic block
14 |             name: the name of the basic block, whose naming style is "block_[func_index]_[start_offset]"
15 |             end_offset: the `offset_end` of the last instruction
16 |             end_instr: the last instruction
17 | 
18 |         Below are properties may be deprecated in the future
19 |             states: not clear
20 |             function_name: its corresponding function's name
21 |         """
22 |         self.start_offset = start_offset
23 |         self.start_instr = start_instr
24 |         self.name = name
25 |         self.end_offset = None
26 |         self.end_instr = None
27 |         self.instructions = list()
28 | 
29 |         # may be deprecated in the future
30 |         self.states = []
31 |         self.function_name = "unknown"
32 | 
33 |     @property
34 |     def size(self):
35 |         return self.end_offset - self.start_offset
36 | 
37 |     def __str__(self):
38 |         out = ''
39 |         line = ''
40 |         line = str(self.start_offset) + ': ' + str(self.name) + '\n'
41 |         line += 'start_instr = ' + str(self.start_instr.name) + '\n'
42 |         line += 'size = ' + str(self.size) + '\n'
43 |         line += 'end_offset = ' + str(self.end_offset) + '\n'
44 |         line += 'end_instr = ' + str(self.end_instr.name) + '\n'
45 |         line += 'function_name = ' + str(self.function_name) + '\n'
46 |         out += line + '\n\n'
47 |         return out
48 | 
49 |     def instructions_details(self, format='hex'):
50 |         out = ''
51 |         line = ''
52 |         for i in self.instructions:
53 |             line = '%x: ' % i.offset
54 |             if i.operand is not None and not i.xref:
55 |                 line += '%s' % str(i)
56 |             elif isinstance(i.xref, list) and i.xref:
57 |                 line += '%s %s' % (i.name, i.xref)
58 |             elif isinstance(i.xref, int) and i.xref:
59 |                 line += '%s %x' % (i.name, i.xref)
60 |             elif i.operand_interpretation:
61 |                 line += i.operand_interpretation
62 |             else:
63 |                 line += i.name + ' '
64 | 
65 |             out += line + '\n'
66 |         return out
67 | 
68 |     def instructions_ssa(self, format='hex'):
69 |         out = ''
70 |         line = ''
71 |         for i in self.instructions:
72 |             line = '%x: ' % i.offset
73 |             if i.ssa:
74 |                 line += '' + i.ssa.format()
75 |             else:
76 |                 line += '[NO_SSA] ' + i.name
77 |             out += line + '\n'
78 |         return out
79 | 


--------------------------------------------------------------------------------
/seewasm/core/edge.py:
--------------------------------------------------------------------------------
 1 | EDGE_UNCONDITIONAL = 'unconditional'
 2 | EDGE_CONDITIONAL_TRUE = 'conditional_true'
 3 | EDGE_CONDITIONAL_FALSE = 'conditional_false'
 4 | EDGE_FALLTHROUGH = 'fallthrough'
 5 | EDGE_CALL = 'call'
 6 | 
 7 | 
 8 | class Edge:
 9 |     """
10 |     The edges in the CFG, connecting basic blocks
11 |     """
12 | 
13 |     def __init__(self, node_from, node_to, edge_type=EDGE_UNCONDITIONAL,
14 |                  condition=None):
15 |         """
16 |         Properties of edges in the CFG
17 | 
18 |         Properties:
19 |             node_from: the 'name' of the basic block pointed from
20 |             node_to: the 'name' of the basic block pointed to
21 |             type: the type of the edge, including five types listed at the beginning of the current file
22 | 
23 |         Below are properties may be deprecated in the future
24 |             condition: do not understand its actual meaning
25 |         """
26 | 
27 |         self.node_from = node_from
28 |         self.node_to = node_to
29 |         self.type = edge_type
30 | 
31 |         self.condition = condition
32 | 
33 |     def __str__(self):
34 |         return str(self.as_dict())
35 | 
36 |     def __eq__(self, other):
37 |         return self.node_from == other.node_from and\
38 |             self.node_to == other.node_to and\
39 |             self.type == other.type and\
40 |             self.condition == other.condition
41 | 
42 |     def __hash__(self):
43 |         return hash(('from', self.node_from,
44 |                      'to', self.node_to,
45 |                      'type', self.type,
46 |                      'condition', self.condition))
47 | 
48 |     def as_dict(self):
49 |         return {'from': str(self.node_from), 'to': str(self.node_to),
50 |                 'type': self.type, 'condition': self.condition}
51 | 


--------------------------------------------------------------------------------
/seewasm/core/function.py:
--------------------------------------------------------------------------------
 1 | class Function(object):
 2 |     """
 3 |     The function object of the given Wasm module
 4 |     """
 5 | 
 6 |     def __init__(self, start_offset, start_instr=None,
 7 |                  name='func_default_name', prefered_name=None):
 8 |         """
 9 |         The properties of the functions of the given Wasm module
10 | 
11 |         Properties:
12 |             start_offset: the start offset of the first instruction
13 |             start_instr: the first instruction of the function
14 |             name: the function's name, represented in '$funcX' or readable name (TODO will make them all to readable name in the future)
15 |             prefered_name: the signature of the function, including type of arguments and return value
16 |             size: the size of the function, the sum of all its composed instructions
17 |             end_offset: the end_offset of its last basic block
18 |             end_instr: the last instruction of the function
19 |             basicblocks: the list of all composed basic blocks
20 |             instructions: the list of all composed instructions
21 |         """
22 |         self.start_offset = start_offset
23 |         self.start_instr = start_instr
24 |         self.name = name
25 |         self.prefered_name = prefered_name if prefered_name else name
26 |         self.size = 0
27 |         self.end_offset = None
28 |         self.end_instr = None
29 |         self.basicblocks = list()
30 |         self.instructions = list()
31 | 
32 |     def __str__(self):
33 |         line = ('%x' % self.start_offset) + ': ' + str(self.name) + '\n'
34 |         line += 'prefered_name: %s\n' % self.prefered_name
35 |         line += 'start_offset = %x\n' % self.start_offset
36 |         line += 'start_instr = ' + str(self.start_instr.name) + '\n'
37 |         if self.size:
38 |             line += 'size = ' + str(self.size) + '\n'
39 |         if self.end_offset:
40 |             line += 'end_offset = ' + str(self.end_offset) + '\n'
41 |         if self.end_instr:
42 |             line += 'end_instr = ' + str(self.end_instr.name) + '\n'
43 |         line += 'lenght basicblocks: %s\n' % len(self.basicblocks)
44 |         line += 'lenght instructions: %s\n' % len(self.instructions)
45 |         line += '\n\n'
46 |         return line
47 | 


--------------------------------------------------------------------------------
/seewasm/core/instruction.py:
--------------------------------------------------------------------------------
  1 | class Instruction(object):
  2 |     """
  3 |     The instruction object
  4 |     """
  5 | 
  6 |     def __init__(self, opcode, name,
  7 |                  operand_size, pops, pushes, fee,
  8 |                  description, operand=None,
  9 |                  operand_interpretation=None, offset=0, xref=None):
 10 |         """
 11 |         The properties of instruction object
 12 | 
 13 |         Properties:
 14 |             opcode: the int value of the instruction
 15 |             offset: the offset of the instruction on function level
 16 |             name: the readable name of the instruction
 17 |             description: a brief description of the instruction
 18 |             operand_size: the size of its corresponding operand
 19 |             operand: Immediate operand if any specific interpretation of operand value, in bytes. The operand value for JUMP is xref
 20 |             operand_interpretation: the instruction and its operand in a readable way, same as the string in the wat file
 21 |             pops: how many elements will be popped from the stack
 22 |             pushes: how many elements will be pushed into the stack
 23 |             fee: not clear
 24 |             xref: the jump target of the current instruction
 25 |             ssa: not clear
 26 |         """
 27 | 
 28 |         self.opcode = opcode
 29 |         self.opcode_size = 1
 30 |         self.offset = offset
 31 |         self.name = name
 32 |         self.description = description
 33 |         self.operand_size = operand_size
 34 |         self.operand = operand
 35 |         self.operand_interpretation = operand_interpretation
 36 |         self.pops = pops
 37 |         self.pushes = pushes
 38 |         self.fee = fee
 39 |         self.xref = xref
 40 |         self.ssa = None
 41 | 
 42 |     def __eq__(self, other):
 43 |         """ Instructions are equal if all features match  """
 44 |         return self.opcode == other.opcode and\
 45 |             self.name == other.name and\
 46 |             self.operand == other.operand and\
 47 |             self.operand_size == other.operand_size and\
 48 |             self.pops == other.pops and\
 49 |             self.pushes == other.pushes and\
 50 |             self.fee == other.fee and\
 51 |             self.offset == other.offset and\
 52 |             self.description == other.description
 53 | 
 54 |     def __simple_output_format(self, offset=True):
 55 |         output = self.name
 56 |         if self.has_operand:
 57 |             output += ' 0x%x' % int.from_bytes(self.operand,
 58 |                                                byteorder='big')
 59 | 
 60 |         if offset:
 61 |             return "%d %s" % (self.offset, output)
 62 |         else:
 63 |             return "%s" % output
 64 | 
 65 |     # def __repr__(self):
 66 |     #    """ Entire representation of the instruction
 67 |     #    output = 'Instruction(0x%x, %r, %d, %d, %d, %d, %r, %r, %r)' \
 68 |     #        % (self._opcode, self._name, self._operand_size,
 69 |     #            self._pops, self._pushes, self._fee,
 70 |     #            self._description, self._operand, self._offset)"""
 71 |     #    return self.__simple_output_format()
 72 | 
 73 |     def __str__(self):
 74 |         """ String representation of the instruction """
 75 |         return self.__simple_output_format(offset=False)
 76 | 
 77 |     @property
 78 |     def bytes(self):
 79 |         """ Encoded instruction """
 80 |         byte = bytearray()
 81 |         byte.append(self.opcode)
 82 |         if self.operand:
 83 |             [byte.append(x) for x in self.operand]
 84 |         return "".join(map(chr, byte))
 85 | 
 86 |     @property
 87 |     def offset_end(self):
 88 |         """ Location in the program (optional) """
 89 |         return self.offset + self.size - 1
 90 | 
 91 |     @property
 92 |     def semantics(self):
 93 |         """ Canonical semantics """
 94 |         return self.name
 95 | 
 96 |     @property
 97 |     def size(self):
 98 |         """ Size of the encoded instruction """
 99 |         return self.opcode_size + self.operand_size
100 | 
101 |     @property
102 |     def has_operand(self):
103 |         """ True if the instruction uses an immediate operand """
104 |         return self.operand_size > 0
105 | 
106 |     @property
107 |     def is_branch_conditional(self):
108 |         """ Return list if the instruction is a jump """
109 |         raise NotImplementedError
110 | 
111 |     @property
112 |     def is_branch_unconditional(self):
113 |         """ Return list if the instruction is a jump """
114 |         raise NotImplementedError
115 | 
116 |     @property
117 |     def is_branch(self):
118 |         """ True if the instruction is a jump """
119 |         return self.is_branch_conditional or self.is_branch_unconditional
120 | 
121 |     @property
122 |     def is_halt(self):
123 |         """ Return list if the instruction is a basic block terminator """
124 |         raise NotImplementedError
125 | 
126 |     @property
127 |     def is_terminator(self):
128 |         """ True if the instruction is a basic block terminator """
129 |         raise NotImplementedError
130 | 
131 |     @property
132 |     def have_xref(self):
133 |         """ TODO """
134 |         raise NotImplementedError
135 | 


--------------------------------------------------------------------------------
/seewasm/core/utils.py:
--------------------------------------------------------------------------------
 1 | from binascii import unhexlify
 2 | 
 3 | 
 4 | def bytecode_to_bytes(bytecode):
 5 |     if str(bytecode).startswith("0x"):
 6 |         bytecode = bytecode[2:]
 7 | 
 8 |     try:
 9 |         # python > 2.7
10 |         bytecode = bytes.fromhex(bytecode)
11 |     except AttributeError:
12 |         # python <= 2.7
13 |         try:
14 |             bytecode = bytecode.decode("hex")
15 |         except TypeError:
16 |             # last chance
17 |             bytecode = unhexlify(bytecode)
18 |     # already bytes or bytearray
19 |     except TypeError:
20 |         pass
21 |     return bytecode
22 | 
23 | 
24 | def search_in_list_of_dict(string_to_search, target_list, key_dict):
25 |     return list(
26 |         filter(
27 |             lambda elem: str(string_to_search) in str(
28 |                 elem[key_dict]),
29 |             target_list))
30 | 


--------------------------------------------------------------------------------
/seewasm/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/engine/__init__.py


--------------------------------------------------------------------------------
/seewasm/engine/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/seewasm/engine/__init__.pyc


--------------------------------------------------------------------------------
/seewasm/engine/disassembler.py:
--------------------------------------------------------------------------------
 1 | from seewasm.core.utils import bytecode_to_bytes
 2 | 
 3 | 
 4 | class BytecodeEmptyException(Exception):
 5 |     """Exception raised when bytecode is None"""
 6 |     pass
 7 | 
 8 | 
 9 | class Disassembler(object):
10 |     """ Generic Disassembler class """
11 | 
12 |     def __init__(self, asm, bytecode=None):
13 |         self.bytecode = bytecode
14 |         self.instructions = list()
15 |         self.reverse_instructions = dict()
16 |         self.asm = asm
17 | 
18 |     def attributes_reset(self):
19 |         """Reset instructions class attributes """
20 |         self.instructions = list()
21 |         self.reverse_instructions = dict()
22 | 
23 |     def disassemble_opcode(self, bytecode, offset=0):
24 |         """ Generic method to disassemble one instruction """
25 |         raise NotImplementedError
26 | 
27 |     def disassemble(self, bytecode=None, offset=0, nature_offset=0,
28 |                     r_format='list'):
29 |         """Generic method to disassemble bytecode
30 | 
31 |         :param bytecode: bytecode sequence
32 |         :param offset: start offset
33 |         :param r_format: output format ('list'/'text'/'reverse')
34 |         :type bytecode: bytes, str
35 |         :type offset: int
36 |         :type r_format: list, str, dict
37 |         :return: dissassembly result depending of r_format
38 |         :rtype: list, str, dict
39 |         """
40 |         # reinitialize class variable
41 |         self.attributes_reset()
42 | 
43 |         self.bytecode = bytecode if bytecode else self.bytecode
44 |         if not self.bytecode:
45 |             raise BytecodeEmptyException()
46 | 
47 |         self.bytecode = bytecode_to_bytes(self.bytecode)
48 | 
49 |         while offset < len(self.bytecode):
50 |             instr = self.disassemble_opcode(
51 |                 self.bytecode[offset:],
52 |                 offset, nature_offset)
53 |             offset += instr.size
54 |             nature_offset += 1
55 |             self.instructions.append(instr)
56 | 
57 |         # fill reverse instructions
58 |         self.reverse_instructions = {k: v for k, v in
59 |                                      enumerate(self.instructions)}
60 | 
61 |         # return instructions
62 |         if r_format == 'list':
63 |             return self.instructions
64 |         elif r_format == 'text':
65 |             return '\n'.join(map(str, self.instructions))
66 |         elif r_format == 'reverse':
67 |             return self.reverse_instructions
68 | 


--------------------------------------------------------------------------------
/seewasm/engine/emulator.py:
--------------------------------------------------------------------------------
 1 | # =======================================
 2 | # #         Emulator                    #
 3 | # =======================================
 4 | 
 5 | 
 6 | class EmulatorEngine(object):
 7 | 
 8 |     def __init__(self, instructions):
 9 |         """ TODO """
10 |         raise NotImplementedError
11 | 
12 |     def emulate(self, state, depth=0):
13 |         """ TODO """
14 |         raise NotImplementedError
15 | 
16 |     def emulate_one_instruction(self, instr, state, depth):
17 |         """ TODO """
18 |         raise NotImplementedError
19 | 


--------------------------------------------------------------------------------
/seewasm/engine/engine.py:
--------------------------------------------------------------------------------
 1 | class VMstate(object):
 2 | 
 3 |     def __init__(self, gas=1000000):
 4 |         """ TODO """
 5 |         raise NotImplementedError
 6 | 
 7 |     def details(self):
 8 |         """ TODO """
 9 |         raise NotImplementedError
10 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import glob
  3 | import os
  4 | import pytest
  5 | import subprocess
  6 | import sys
  7 | 
  8 | testcase_dir = './test/'
  9 | 
 10 | @pytest.mark.parametrize('wasm_path, entry', [
 11 |     ('hello_world.wasm', ''),
 12 |     ('hello_world_go.wasm', '_start'),
 13 |     ('hello_world_rust.wasm', ''),
 14 |     ('test.wasm', ''),
 15 |     ('password.wasm', '')
 16 | ])
 17 | 
 18 | def test_wasm_can_be_analyzed(wasm_path, entry):
 19 |     wasm_path = os.path.join(testcase_dir, wasm_path)
 20 |     cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info']
 21 |     if entry != "":
 22 |         cmd.extend(['--entry', entry])
 23 |     subprocess.run(cmd, timeout=60, check=True)
 24 | 
 25 | def test_return_simulation():
 26 |     wasm_path = './test/test_return.wasm'
 27 |     cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info']
 28 |     subprocess.run(cmd, timeout=60, check=True)
 29 | 
 30 |     result_dir = glob.glob('./output/result/test_return_*')
 31 |     result_dir.sort()
 32 |     result_dir = result_dir[-1]
 33 |     state_path = glob.glob(f'{result_dir}/state*.json')
 34 |     assert len(state_path) == 1, 'should have only one state returning `1`'
 35 | 
 36 |     with open(state_path[0], 'r') as f:
 37 |         state = json.load(f)
 38 |     assert state['Return'] == "1", f'should return 1, got {state["Return"]}'
 39 | 
 40 | def test_unreachable_simulation():
 41 |     wasm_path = './test/test_unreachable.wasm'
 42 |     cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info']
 43 |     subprocess.run(cmd, timeout=60, check=True)
 44 | 
 45 |     result_dir = glob.glob('./output/result/test_unreachable_*')
 46 |     result_dir.sort()
 47 |     result_dir = result_dir[-1]
 48 |     state_path = glob.glob(f'{result_dir}/state*.json')
 49 |     assert len(state_path) == 1, 'should have only one state output `null`'
 50 |     with open(state_path[0], 'r') as f:
 51 |         state = json.load(f)
 52 |     assert state['Solution'] == {}, f'should have no solution, got {state["Solution"]}'
 53 | 
 54 | def test_c_sym_args():
 55 |     wasm_path = './test/sym_c.wasm'
 56 |     cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '--sym_args', '1', '--source_type', 'c', '--entry', '__main_void', '-v', 'info']
 57 |     subprocess.run(cmd, timeout=60, check=True)
 58 | 
 59 |     result_dir = glob.glob('./output/result/sym_c*')
 60 |     result_dir.sort()
 61 |     result_dir = result_dir[-1]
 62 |     state_path = glob.glob(f'{result_dir}/state*.json')
 63 |     assert len(state_path) == 3, 'should have three states output'
 64 |     for state in state_path:
 65 |         with open(state, 'r') as f:
 66 |             state = json.load(f)
 67 |         assert 'Solution' in state and 'sym_arg_1' in state['Solution'], f'no sym_arg_1 solution found in {state}'
 68 |         assert 'Return' in state, f'no Return found in {state}'
 69 |         assert 'Output' in state and len(state['Output']) == 2, f'no Output found in {state}'
 70 |         inp = state['Solution']["sym_arg_1"]
 71 |         analyzed_return = state['Return']
 72 |         analyzed_stdout = state['Output'][0]['output']
 73 |         expected_return_to_stdout = {"0": "a", "1": "b", "2": "c"}
 74 |         assert analyzed_return in expected_return_to_stdout, f'analyzed return value {analyzed_return} not found in expected_return_to_stdout'
 75 |         assert analyzed_stdout == expected_return_to_stdout[analyzed_return], f'output mismatched, got {analyzed_stdout}, expected {expected_return_to_stdout[analyzed_return]}'
 76 | 
 77 | def test_password_sym_args():
 78 |     wasm_path = './test/password.wasm'
 79 |     cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '--sym_args', '10', '--source_type', 'c', '--entry', '_start', '-v', 'info']
 80 |     subprocess.run(cmd, timeout=60, check=True)
 81 | 
 82 |     result_dir = glob.glob('./output/result/password*')
 83 |     result_dir.sort()
 84 |     result_dir = result_dir[-1]
 85 |     state_path = glob.glob(f'{result_dir}/state*.json')
 86 |     assert len(state_path) == 6, 'should have six states output'
 87 |     for state in state_path:
 88 |         with open(state, 'r') as f:
 89 |             state = json.load(f)
 90 |         assert 'Solution' in state and 'sym_arg_1' in state['Solution'], f'no sym_arg_1 solution found in {state}'
 91 |         assert 'Output' in state and len(state['Output']) == 2, f'no Output found in {state}'
 92 |         inp = state['Solution']["sym_arg_1"]
 93 |         analyzed_stdout = state['Output'][0]['output']
 94 |         if 'Return' in state:
 95 |             assert state['Return'] == "0", f'should return 0, got {state["Return"]}'
 96 |             assert inp == "hello", f'solved input mismatched, got {inp}'
 97 |             assert analyzed_stdout == "Password found!\n", f'output mismatched, got {analyzed_stdout}'
 98 |         else:
 99 |             assert 'Status' in state, f'no Status found in {state}'
100 |             assert state['Status'] == "Exit with status code 1", f'should exit with status code 1, got {state["Status"]}'


--------------------------------------------------------------------------------
/test/c/src/hello.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int main(int argc, char **argv)
4 | {
5 |     printf("Hello, world!\n");
6 |     return 0;
7 | }


--------------------------------------------------------------------------------
/test/c/src/sym.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int foo(char a){
 4 |     if (a < 'a') {
 5 |         printf("a");
 6 |         return 0;
 7 |     }
 8 |     else if (a < 'z') {
 9 |         printf("b");
10 |         return 1;
11 |     }
12 |     else {
13 |         printf("c");
14 |         return 2;
15 |     }
16 | }
17 | 
18 | int main(int argc, char* argv[]){
19 |     return foo(argv[1][0]);
20 | }


--------------------------------------------------------------------------------
/test/go/src/hello.go:
--------------------------------------------------------------------------------
1 | package main
2 | 
3 | func main() {
4 | 	println("Hello, world!")
5 | }


--------------------------------------------------------------------------------
/test/hello_world.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/hello_world.wasm


--------------------------------------------------------------------------------
/test/hello_world_go.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/hello_world_go.wasm


--------------------------------------------------------------------------------
/test/hello_world_rust.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/hello_world_rust.wasm


--------------------------------------------------------------------------------
/test/password.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/password.wasm


--------------------------------------------------------------------------------
/test/rust/hello/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "hello_rust"
3 | version = "0.1.0"
4 | edition = "2021"
5 | 
6 | [dependencies]
7 | 


--------------------------------------------------------------------------------
/test/rust/hello/src/main.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     println!("Hello, world!");
3 | }
4 | 


--------------------------------------------------------------------------------
/test/sym_c.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/sym_c.wasm


--------------------------------------------------------------------------------
/test/test.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/test.wasm


--------------------------------------------------------------------------------
/test/test_linux.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import glob
  3 | import os
  4 | import pytest
  5 | import subprocess
  6 | import sys
  7 | 
  8 | def test_hello_c_to_wasm():
  9 |     source_path = "./test/c/src/hello.c"
 10 |     cmd = ["clang", "-g", source_path, "-o", "hello_c.wasm"]
 11 |     subprocess.run(cmd, timeout=60, check=True)
 12 |     assert os.path.exists("hello_c.wasm"), "hello_c.wasm not found. Compilation failed."
 13 |     cmd = [sys.executable, 'launcher.py', '-f', "hello_c.wasm", '-s', '-v', 'info', '--source_type', 'c', '--entry', '__main_void']
 14 |     subprocess.run(cmd, timeout=60, check=True)
 15 |     os.remove("hello_c.wasm")
 16 |     os.remove("hello_c.wat")
 17 | 
 18 |     result_dir = glob.glob('./output/result/hello_c*')
 19 |     result_dir.sort()
 20 |     result_dir = result_dir[-1]
 21 |     state_path = glob.glob(f'{result_dir}/state*.json')
 22 |     assert len(state_path) == 1, 'should have only one state output'
 23 |     with open(state_path[0], 'r') as f:
 24 |         state = json.load(f)
 25 |     assert state['Output'][0] == {
 26 |         "name": "stdout",
 27 |         "output": "Hello, world!\n"
 28 |     }, f'output mismatched, got {state["Output"]}'
 29 | 
 30 | @pytest.mark.parametrize('algo', ['dfs', 'bfs', 'random', 'interval'])
 31 | def test_sym_c_to_wasm(algo):
 32 |     source_path = "./test/c/src/sym.c"
 33 |     cmd = ["clang", "-g", source_path, "-o", "sym_c.wasm"]
 34 |     subprocess.run(cmd, timeout=60, check=True)
 35 |     assert os.path.exists("sym_c.wasm"), "sym_c.wasm not found. Compilation failed."
 36 |     cmd = [sys.executable, 'launcher.py', '-f', "sym_c.wasm", '-s', '--sym_args', '1', '-v', 'info', '--source_type', 'c', '--entry', '__main_void', '--search', algo]
 37 |     subprocess.run(cmd, timeout=60, check=True)
 38 | 
 39 |     result_dir = glob.glob('./output/result/sym_c*')
 40 |     result_dir.sort()
 41 |     result_dir = result_dir[-1]
 42 |     state_path = glob.glob(f'{result_dir}/state*.json')
 43 |     assert len(state_path) == 3, 'should have three states output'
 44 |     for state in state_path:
 45 |         with open(state, 'r') as f:
 46 |             state = json.load(f)
 47 |         assert 'Solution' in state and 'sym_arg_1' in state['Solution'], f'no sym_arg_1 solution found in {state}'
 48 |         assert 'Return' in state, f'no Return found in {state}'
 49 |         assert 'Output' in state and len(state['Output']) == 2, f'no Output found in {state}'
 50 |         inp = state['Solution']["sym_arg_1"]
 51 |         analyzed_return = int(state['Return'])
 52 |         analyzed_stdout = state['Output'][0]['output']
 53 |         if state['Return'] != 1: # only test the printable input, should be a char in a~z
 54 |             continue
 55 |         # call wasmtime with inp
 56 |         cmd = ["wasmtime", "sym_c.wasm", inp]
 57 |         p = subprocess.run(cmd, timeout=60, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 58 |         # compare results
 59 |         assert p.returncode == analyzed_return, f'analyzed return code {analyzed_return}, wasmtime returned {p.returncode}, input {inp}, wasmtime stderr {p.stderr.decode("utf-8")}'
 60 |         assert p.stdout.decode('utf-8') == analyzed_stdout, f'output mismatched, analyzed {analyzed_stdout}, wasmtime returned {p.stdout.decode("utf-8")}'
 61 | 
 62 |     os.remove("sym_c.wasm")
 63 |     os.remove("sym_c.wat")
 64 | 
 65 | def test_hello_rust_to_wasm():
 66 |     source_dir = "./test/rust/hello"
 67 |     expected_wasm_path = "./test/rust/hello/target/wasm32-wasi/debug/hello_rust.wasm"
 68 |     cmd = ["cargo", "build", "--target", "wasm32-wasi"]
 69 |     subprocess.run(cmd, cwd=source_dir, timeout=60, check=True)
 70 |     assert os.path.exists(expected_wasm_path), "hello_rust.wasm not found. Compilation failed."
 71 |     cmd = [sys.executable, 'launcher.py', '-f', expected_wasm_path, '-s', '-v', 'info', '--source_type', 'rust', '--entry', '__main_void']
 72 |     subprocess.run(cmd, timeout=60, check=True)
 73 |     cmd = ["rm", "-rf", "./test/rust/hello/target"]
 74 |     subprocess.run(cmd, timeout=60, check=True)
 75 | 
 76 |     result_dir = glob.glob('./output/result/hello_rust*')
 77 |     result_dir.sort()
 78 |     result_dir = result_dir[-1]
 79 |     state_path = glob.glob(f'{result_dir}/state*.json')
 80 |     assert len(state_path) == 1, 'should have only one state output'
 81 |     with open(state_path[0], 'r') as f:
 82 |         state = json.load(f)
 83 |     assert state['Output'][0] == {
 84 |         "name": "stdout",
 85 |         "output": "Hello, world!\n"
 86 |     }, f'output mismatched, got {state["Output"]}'
 87 | 
 88 | def test_hello_go_to_wasm():
 89 |     source_path = "./test/go/src/hello.go"
 90 |     cmd = ["tinygo", "build", "-target=wasi", "-o", "hello_go.wasm", source_path]
 91 |     subprocess.run(cmd, timeout=60, check=True)
 92 |     assert os.path.exists("hello_go.wasm"), "hello_go.wasm not found. Compilation failed."
 93 |     cmd = [sys.executable, 'launcher.py', '-f', "hello_go.wasm", '-s', '-v', 'info', '--source_type', 'go', '--entry', 'runtime.run$1']
 94 |     subprocess.run(cmd, timeout=60, check=True)
 95 |     os.remove("hello_go.wasm")
 96 |     os.remove("hello_go.wat")
 97 | 
 98 |     result_dir = glob.glob('./output/result/hello_go*')
 99 |     result_dir.sort()
100 |     result_dir = result_dir[-1]
101 |     state_path = glob.glob(f'{result_dir}/state*.json')
102 |     assert len(state_path) == 1, 'should have only one state output'
103 |     with open(state_path[0], 'r') as f:
104 |         state = json.load(f)
105 |     assert 'Return' in state, f'no Return found in {state}'
106 |     assert state['Return'] == "0", f'should return 0, got {state["Return"]}'
107 |     assert state['Output'][0] == {
108 |         "name": "stdout",
109 |         "output": "Hello, world!\n"
110 |     }, f'output mismatched, got {state["Output"]}'
111 | 
112 | def test_visualize_graph():
113 |     wasm_path = './test/hello_world.wasm'
114 |     cmd = [sys.executable, 'launcher.py', '-f', wasm_path, '-s', '-v', 'info', '--visualize']
115 |     subprocess.run(cmd, timeout=30, check=True)
116 |     result_dir = glob.glob('./output/visualized_graph/hello_world*.pdf')
117 |     assert len(result_dir) == 1, 'more than one matching results, do you have multiple `hello_world*` cases?'
118 | 


--------------------------------------------------------------------------------
/test/test_return.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/test_return.wasm


--------------------------------------------------------------------------------
/test/test_unreachable.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PKU-ASAL/SeeWasm/e2a40c2fd2bb19fb7f6f5bd646a862cc5ddeaa67/test/test_unreachable.wasm


--------------------------------------------------------------------------------
/wasm/__init__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | 
  3 | __version__ = '1.2'
  4 | 
  5 | from .decode import (
  6 |     decode_bytecode,
  7 |     decode_module,
  8 | )
  9 | 
 10 | from .formatter import (
 11 |     format_function,
 12 |     format_instruction,
 13 |     format_lang_type,
 14 |     format_mutability,
 15 | )
 16 | 
 17 | from .modtypes import (
 18 |     ModuleHeader,
 19 |     FunctionImportEntryData,
 20 |     ResizableLimits,
 21 |     TableType,
 22 |     MemoryType,
 23 |     GlobalType,
 24 |     ImportEntry,
 25 |     ImportSection,
 26 |     FuncType,
 27 |     TypeSection,
 28 |     FunctionSection,
 29 |     TableSection,
 30 |     MemorySection,
 31 |     InitExpr,
 32 |     GlobalEntry,
 33 |     GlobalSection,
 34 |     ExportEntry,
 35 |     ExportSection,
 36 |     StartSection,
 37 |     ElementSegment,
 38 |     ElementSection,
 39 |     LocalEntry,
 40 |     FunctionBody,
 41 |     CodeSection,
 42 |     DataSegment,
 43 |     DataSection,
 44 |     Naming,
 45 |     NameMap,
 46 |     LocalNames,
 47 |     LocalNameMap,
 48 |     NameSubSection,
 49 |     Section,
 50 | )
 51 | 
 52 | from .immtypes import (
 53 |     BlockImm,
 54 |     BranchImm,
 55 |     BranchTableImm,
 56 |     CallImm,
 57 |     CallIndirectImm,
 58 |     LocalVarXsImm,
 59 |     GlobalVarXsImm,
 60 |     MemoryImm,
 61 |     CurGrowMemImm,
 62 |     I32ConstImm,
 63 |     I64ConstImm,
 64 |     F32ConstImm,
 65 |     F64ConstImm,
 66 | )
 67 | 
 68 | from .opcodes import (
 69 |     Opcode,
 70 |     INSN_ENTER_BLOCK,
 71 |     INSN_LEAVE_BLOCK,
 72 |     INSN_BRANCH,
 73 |     INSN_NO_FLOW,
 74 | )
 75 | 
 76 | for cur_op in opcodes.OPCODES:
 77 |     globals()[
 78 |         'OP_' + cur_op.mnemonic.upper().replace('.', '_').replace('/', '_')
 79 |     ] = cur_op.id
 80 | 
 81 | from .wasmtypes import (
 82 |     UInt8Field,
 83 |     UInt16Field,
 84 |     UInt32Field,
 85 |     UInt64Field,
 86 |     VarUInt1Field,
 87 |     VarUInt7Field,
 88 |     VarUInt32Field,
 89 |     VarInt7Field,
 90 |     VarInt32Field,
 91 |     VarInt64Field,
 92 |     ElementTypeField,
 93 |     ValueTypeField,
 94 |     ExternalKindField,
 95 |     BlockTypeField,
 96 |     SEC_UNK,
 97 |     SEC_TYPE,
 98 |     SEC_IMPORT,
 99 |     SEC_FUNCTION,
100 |     SEC_TABLE,
101 |     SEC_MEMORY,
102 |     SEC_GLOBAL,
103 |     SEC_EXPORT,
104 |     SEC_START,
105 |     SEC_ELEMENT,
106 |     SEC_CODE,
107 |     SEC_DATA,
108 |     SEC_DATACOUNT,
109 |     SEC_NAME,
110 |     LANG_TYPE_I32,
111 |     LANG_TYPE_I64,
112 |     LANG_TYPE_F32,
113 |     LANG_TYPE_F64,
114 |     LANG_TYPE_ANYFUNC,
115 |     LANG_TYPE_FUNC,
116 |     LANG_TYPE_EMPTY,
117 |     VAL_TYPE_I32,
118 |     VAL_TYPE_I64,
119 |     VAL_TYPE_F32,
120 |     VAL_TYPE_F64,
121 |     NAME_SUBSEC_FUNCTION,
122 |     NAME_SUBSEC_LOCAL,
123 |     IMMUTABLE,
124 |     MUTABLE,
125 | )
126 | 


--------------------------------------------------------------------------------
/wasm/__main__.py:
--------------------------------------------------------------------------------
 1 | """Testing & debug stuff."""
 2 | from __future__ import print_function, absolute_import, division, unicode_literals
 3 | 
 4 | import argparse
 5 | import sys
 6 | 
 7 | from .formatter import format_function
 8 | from .modtypes import SEC_CODE, SEC_TYPE, SEC_FUNCTION, Section
 9 | from .decode import decode_module
10 | 
11 | 
12 | def dump():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('wasm_file', type=str)
15 |     parser.add_argument('--disas', action='store_true', help="Disassemble code")
16 |     args = parser.parse_args()
17 | 
18 |     try:
19 |         with open(args.wasm_file, 'rb') as raw:
20 |             raw = raw.read()
21 |     except IOError as exc:
22 |         print("[-] Can't open input file: " + str(exc), file=sys.stderr)
23 |         return
24 | 
25 |     # Parse & print header.
26 |     mod_iter = iter(decode_module(raw, decode_name_subsections=False))
27 |     hdr, hdr_data = next(mod_iter)
28 |     print(hdr.to_string(hdr_data))
29 | 
30 |     # Parse & print other sections.
31 |     code_sec = None
32 |     type_sec = None
33 |     func_sec = None
34 |     for cur_sec, cur_sec_data in mod_iter:
35 |         print(cur_sec.to_string(cur_sec_data))
36 |         if type(cur_sec) == Section:
37 |             if cur_sec_data.id == SEC_CODE:
38 |                 code_sec = cur_sec_data.payload
39 |             elif cur_sec_data.id == SEC_TYPE:
40 |                 type_sec = cur_sec_data.payload
41 |             elif cur_sec_data.id == SEC_FUNCTION:
42 |                 func_sec = cur_sec_data.payload
43 | 
44 |     # If ordered to disassemble, do so.
45 |     # TODO: We might want to make use of debug names, if available.
46 |     if args.disas and code_sec is not None:
47 |         for i, func_body in enumerate(code_sec.bodies):
48 |             print('{x} sub_{id:04X} {x}'.format(x='=' * 35, id=i))
49 | 
50 |             # If we have type info, use it.
51 |             func_type = type_sec.entries[func_sec.types[i]] if (
52 |                 None not in (type_sec, func_sec)
53 |             ) else None
54 | 
55 |             print()
56 |             print('\n'.join(format_function(func_body, func_type)))
57 |             print()
58 | 


--------------------------------------------------------------------------------
/wasm/compat.py:
--------------------------------------------------------------------------------
 1 | """Defines compatibility quirks for Python 2.7."""
 2 | from __future__ import print_function, absolute_import, division, unicode_literals
 3 | 
 4 | import sys
 5 | import functools
 6 | import logging
 7 | import warnings
 8 | 
 9 | 
10 | def add_metaclass(metaclass):
11 |     """
12 |     Class decorator for creating a class with a metaclass.
13 |     Borrowed from `six` module.
14 |     """
15 |     @functools.wraps(metaclass)
16 |     def wrapper(cls):
17 |         orig_vars = cls.__dict__.copy()
18 |         slots = orig_vars.get('__slots__')
19 |         if slots is not None:
20 |             if isinstance(slots, str):
21 |                 slots = [slots]
22 |             for slots_var in slots:
23 |                 orig_vars.pop(slots_var)
24 |         orig_vars.pop('__dict__', None)
25 |         orig_vars.pop('__weakref__', None)
26 |         return metaclass(cls.__name__, cls.__bases__, orig_vars)
27 |     return wrapper
28 | 
29 | 
30 | def indent(text, prefix, predicate=None):
31 |     """Adds 'prefix' to the beginning of selected lines in 'text'.
32 | 
33 |     If 'predicate' is provided, 'prefix' will only be added to the lines
34 |     where 'predicate(line)' is True. If 'predicate' is not provided,
35 |     it will default to adding 'prefix' to all non-empty lines that do not
36 |     consist solely of whitespace characters.
37 | 
38 |     Borrowed from Py3 `textwrap` module.
39 |     """
40 |     if predicate is None:
41 |         def predicate(line):
42 |             return line.strip()
43 | 
44 |     def prefixed_lines():
45 |         for line in text.splitlines(True):
46 |             yield (prefix + line if predicate(line) else line)
47 |     return ''.join(prefixed_lines())
48 | 
49 | 
50 | def deprecated_func(func):
51 |     """Deprecates a function, printing a warning on the first usage."""
52 | 
53 |     # We use a mutable container here to work around Py2's lack of
54 |     # the `nonlocal` keyword.
55 |     first_usage = [True]
56 | 
57 |     @functools.wraps(func)
58 |     def wrapper(*args, **kwargs):
59 |         if first_usage[0]:
60 |             warnings.warn(
61 |                 "Call to deprecated function {}.".format(func.__name__),
62 |                 DeprecationWarning,
63 |             )
64 |             first_usage[0] = False
65 |         return func(*args, **kwargs)
66 | 
67 |     return wrapper
68 | 
69 | 
70 | if sys.version_info[0] >= 3:
71 |     def byte2int(x):
72 |         return x
73 | 
74 | elif sys.version_info[0] == 2:
75 |     def byte2int(x):
76 |         return ord(x) if type(x) == str else x
77 | 
78 | else:
79 |     raise Exception("Unsupported Python version")
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/wasm/decode.py:
--------------------------------------------------------------------------------
 1 | """Provides functions for decoding WASM modules and bytecode."""
 2 | from __future__ import print_function, absolute_import, division, unicode_literals
 3 | 
 4 | from collections import namedtuple
 5 | from .modtypes import ModuleHeader, Section, SEC_UNK, SEC_NAME, NameSubSection
 6 | from .opcodes import OPCODE_MAP
 7 | from .compat import byte2int
 8 | 
 9 | 
10 | Instruction = namedtuple('Instruction', 'op imm len')
11 | ModuleFragment = namedtuple('ModuleFragment', 'type data')
12 | 
13 | 
14 | def decode_bytecode(bytecode):
15 |     """Decodes raw bytecode, yielding `Instruction`s."""
16 |     bytecode_wnd = memoryview(bytecode)
17 |     while bytecode_wnd:
18 |         opcode_id = byte2int(bytecode_wnd[0])
19 |         opcode = OPCODE_MAP[opcode_id]
20 | 
21 |         if opcode.imm_struct is not None:
22 |             offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:])
23 |         else:
24 |             imm = None
25 |             offs = 0
26 | 
27 |         insn_len = 1 + offs
28 |         yield Instruction(opcode, imm, insn_len)
29 |         bytecode_wnd = bytecode_wnd[insn_len:]
30 | 
31 | 
32 | def decode_module(module, decode_name_subsections=False):
33 |     """Decodes raw WASM modules, yielding `ModuleFragment`s."""
34 |     module_wnd = memoryview(module)
35 | 
36 |     # Read & yield module header.
37 |     hdr = ModuleHeader()
38 |     hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd)
39 |     yield ModuleFragment(hdr, hdr_data)
40 |     module_wnd = module_wnd[hdr_len:]
41 | 
42 |     # Read & yield sections.
43 |     while module_wnd:
44 |         sec = Section()
45 |         sec_len, sec_data, _ = sec.from_raw(None, module_wnd)
46 | 
47 |         # If requested, decode name subsections when encountered.
48 |         if (
49 |             decode_name_subsections and
50 |             sec_data.id == SEC_UNK and
51 |             sec_data.name == SEC_NAME
52 |         ):
53 |             sec_wnd = sec_data.payload
54 |             while sec_wnd:
55 |                 subsec = NameSubSection()
56 |                 subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd)
57 |                 yield ModuleFragment(subsec, subsec_data)
58 |                 sec_wnd = sec_wnd[subsec_len:]
59 |         else:
60 |             yield ModuleFragment(sec, sec_data)
61 | 
62 |         module_wnd = module_wnd[sec_len:]
63 | 


--------------------------------------------------------------------------------
/wasm/formatter.py:
--------------------------------------------------------------------------------
 1 | """Defines functions converting raw instructions into textual form."""
 2 | from __future__ import print_function, absolute_import, division, unicode_literals
 3 | 
 4 | import itertools
 5 | 
 6 | from .opcodes import INSN_LEAVE_BLOCK, INSN_ENTER_BLOCK
 7 | from .decode import decode_bytecode
 8 | from .wasmtypes import VAL_TYPE_I32, VAL_TYPE_I64, VAL_TYPE_F32, VAL_TYPE_F64, MUTABLE, IMMUTABLE
 9 | 
10 | 
11 | def format_instruction(insn):
12 |     """
13 |     Takes a raw `Instruction` and translates it into a human readable text
14 |     representation. As of writing, the text representation for WASM is not yet
15 |     standardized, so we just emit some generic format.
16 |     """
17 |     text = insn.op.mnemonic
18 | 
19 |     if not insn.imm:
20 |         return text
21 | 
22 |     return text + ' ' + ', '.join([
23 |         getattr(insn.op.imm_struct, x.name).to_string(
24 |             getattr(insn.imm, x.name)
25 |         )
26 |         for x in insn.op.imm_struct._meta.fields
27 |     ])
28 | 
29 | _mutability_str_mapping = {
30 |     MUTABLE: "mut",
31 |     IMMUTABLE: ""
32 | }
33 | 
34 | def format_mutability(mutability):
35 |     """Takes a value type `int`, returning its string representation."""
36 |     try:
37 |         return _mutability_str_mapping[mutability]
38 |     except KeyError:
39 |         raise ValueError('Bad value for value type ({})'.format(mutability))
40 | 
41 | _lang_type_str_mapping = {
42 |     VAL_TYPE_I32: 'i32',
43 |     VAL_TYPE_I64: 'i64',
44 |     VAL_TYPE_F32: 'f32',
45 |     VAL_TYPE_F64: 'f64',
46 | }
47 | 
48 | 
49 | def format_lang_type(lang_type):
50 |     """Takes a value type `int`, returning its string representation."""
51 |     try:
52 |         return _lang_type_str_mapping[lang_type]
53 |     except KeyError:
54 |         raise ValueError('Bad value for value type ({})'.format(lang_type))
55 | 
56 | 
57 | def format_function(
58 |     func_body,
59 |     func_type=None,
60 |     indent=2,
61 |     format_locals=True,
62 | ):
63 |     """
64 |     Takes a `FunctionBody` and optionally a `FunctionType`, yielding the string 
65 |     representation of the function line by line. The function type is required
66 |     for formatting function parameter and return value information.
67 |     """
68 |     if func_type is None:
69 |         yield 'func'
70 |     else:
71 |         param_section = ' (param {})'.format(' '.join(
72 |             map(format_lang_type, func_type.param_types)
73 |         )) if func_type.param_types else ''
74 |         result_section = ' (result {})'.format(
75 |             format_lang_type(func_type.return_type)
76 |         ) if func_type.return_type else ''
77 |         yield 'func' + param_section + result_section
78 | 
79 |     if format_locals and func_body.locals:
80 |         yield '(locals {})'.format(' '.join(itertools.chain.from_iterable(
81 |             itertools.repeat(format_lang_type(x.type), x.count)
82 |             for x in func_body.locals
83 |         )))
84 | 
85 |     level = 1
86 |     for cur_insn in decode_bytecode(func_body.code):
87 |         if cur_insn.op.flags & INSN_LEAVE_BLOCK:
88 |             level -= 1
89 |         yield ' ' * (level * indent) + format_instruction(cur_insn)
90 |         if cur_insn.op.flags & INSN_ENTER_BLOCK:
91 |             level += 1
92 | 


--------------------------------------------------------------------------------
/wasm/immtypes.py:
--------------------------------------------------------------------------------
 1 | """Defines immediate types for WASM bytecode instructions."""
 2 | from __future__ import print_function, absolute_import, division, unicode_literals
 3 | 
 4 | from .wasmtypes import *
 5 | from .types import Structure, RepeatField
 6 | 
 7 | 
 8 | class BlockImm(Structure):
 9 |     sig = BlockTypeField()
10 | 
11 | 
12 | class BranchImm(Structure):
13 |     relative_depth = VarUInt32Field()
14 | 
15 | 
16 | class BranchTableImm(Structure):
17 |     target_count = VarUInt32Field()
18 |     target_table = RepeatField(VarUInt32Field(), lambda x: x.target_count)
19 |     default_target = VarUInt32Field()
20 | 
21 | 
22 | class CallImm(Structure):
23 |     function_index = VarUInt32Field()
24 | 
25 | 
26 | class CallIndirectImm(Structure):
27 |     type_index = VarUInt32Field()
28 |     reserved = VarUInt1Field()
29 | 
30 | 
31 | class LocalVarXsImm(Structure):
32 |     local_index = VarUInt32Field()
33 | 
34 | 
35 | class GlobalVarXsImm(Structure):
36 |     global_index = VarUInt32Field()
37 | 
38 | 
39 | class MemoryImm(Structure):
40 |     flags = VarUInt32Field()
41 |     offset = VarUInt32Field()
42 | 
43 | 
44 | class CurGrowMemImm(Structure):
45 |     reserved = VarUInt1Field()
46 | 
47 | 
48 | class I32ConstImm(Structure):
49 |     value = VarInt32Field()
50 | 
51 | 
52 | class I64ConstImm(Structure):
53 |     value = VarInt64Field()
54 | 
55 | 
56 | class F32ConstImm(Structure):
57 |     value = UInt32Field()
58 | 
59 | 
60 | class F64ConstImm(Structure):
61 |     value = UInt64Field()
62 | 


--------------------------------------------------------------------------------
/wasm/modtypes.py:
--------------------------------------------------------------------------------
  1 | """Defines data structures used in WASM (binary) modules."""
  2 | from __future__ import print_function, absolute_import, division, unicode_literals
  3 | 
  4 | from .wasmtypes import *
  5 | from .opcodes import OP_END
  6 | from .types import (
  7 |     Structure, CondField, RepeatField,
  8 |     ChoiceField, WasmField, ConstField, BytesField,
  9 | )
 10 | 
 11 | 
 12 | class ModuleHeader(Structure):
 13 |     magic = UInt32Field()
 14 |     version = UInt32Field()
 15 | 
 16 | 
 17 | class FunctionImportEntryData(Structure):
 18 |     type = VarUInt32Field()
 19 | 
 20 | 
 21 | class ResizableLimits(Structure):
 22 |     flags = VarUInt32Field()
 23 |     initial = VarUInt32Field()
 24 |     maximum = CondField(VarUInt32Field(), lambda x: x.flags & 1)
 25 | 
 26 | 
 27 | class TableType(Structure):
 28 |     element_type = ElementTypeField()
 29 |     limits = ResizableLimits()
 30 | 
 31 | 
 32 | class MemoryType(Structure):
 33 |     limits = ResizableLimits()
 34 | 
 35 | 
 36 | class GlobalType(Structure):
 37 |     content_type = ValueTypeField()
 38 |     mutability = VarUInt1Field()
 39 | 
 40 | 
 41 | class ImportEntry(Structure):
 42 |     module_len = VarUInt32Field()
 43 |     module_str = BytesField(lambda x: x.module_len, is_str=True)
 44 |     field_len = VarUInt32Field()
 45 |     field_str = BytesField(lambda x: x.field_len, is_str=True)
 46 |     kind = ExternalKindField()
 47 |     type = ChoiceField({
 48 |         0: FunctionImportEntryData(),
 49 |         1: TableType(),
 50 |         2: MemoryType(),
 51 |         3: GlobalType(),
 52 |     }, lambda x: x.kind)
 53 | 
 54 | 
 55 | class ImportSection(Structure):
 56 |     count = VarUInt32Field()
 57 |     entries = RepeatField(ImportEntry(), lambda x: x.count)
 58 | 
 59 | 
 60 | class FuncType(Structure):
 61 |     form = VarInt7Field()
 62 |     param_count = VarUInt32Field()
 63 |     param_types = RepeatField(ValueTypeField(), lambda x: x.param_count)
 64 |     return_count = VarUInt1Field()
 65 |     return_type = CondField(ValueTypeField(), lambda x: bool(x.return_count))
 66 | 
 67 | 
 68 | class TypeSection(Structure):
 69 |     count = VarUInt32Field()
 70 |     entries = RepeatField(FuncType(), lambda x: x.count)
 71 | 
 72 | 
 73 | class FunctionSection(Structure):
 74 |     count = VarUInt32Field()
 75 |     types = RepeatField(VarUInt32Field(), lambda x: x.count)
 76 | 
 77 | 
 78 | class TableSection(Structure):
 79 |     count = VarUInt32Field()
 80 |     entries = RepeatField(TableType(), lambda x: x.count)
 81 | 
 82 | 
 83 | class MemorySection(Structure):
 84 |     count = VarUInt32Field()
 85 |     entries = RepeatField(MemoryType(), lambda x: x.count)
 86 | 
 87 | 
 88 | class InitExpr(WasmField):
 89 |     def from_raw(self, struct, raw):
 90 |         from .decode import decode_bytecode
 91 | 
 92 |         offs = 0
 93 |         instrs = []
 94 |         for cur_insn in decode_bytecode(raw):
 95 |             offs += cur_insn.len
 96 |             instrs.append(cur_insn)
 97 |             if cur_insn.op.id == OP_END:
 98 |                 break
 99 | 
100 |         return offs, instrs, self
101 | 
102 | 
103 | class GlobalEntry(Structure):
104 |     type = GlobalType()
105 |     init = InitExpr()
106 | 
107 | 
108 | class GlobalSection(Structure):
109 |     count = VarUInt32Field()
110 |     globals = RepeatField(GlobalEntry(), lambda x: x.count)
111 | 
112 | 
113 | class ExportEntry(Structure):
114 |     field_len = VarUInt32Field()
115 |     field_str = BytesField(lambda x: x.field_len, is_str=True)
116 |     kind = ExternalKindField()
117 |     index = VarUInt32Field()
118 | 
119 | 
120 | class ExportSection(Structure):
121 |     count = VarUInt32Field()
122 |     entries = RepeatField(ExportEntry(), lambda x: x.count)
123 | 
124 | 
125 | class StartSection(Structure):
126 |     index = VarUInt32Field()
127 | 
128 | 
129 | class ElementSegment(Structure):
130 |     index = VarUInt32Field()
131 |     offset = InitExpr()
132 |     num_elem = VarUInt32Field()
133 |     elems = RepeatField(VarUInt32Field(), lambda x: x.num_elem)
134 | 
135 | 
136 | class ElementSection(Structure):
137 |     count = VarUInt32Field()
138 |     entries = RepeatField(ElementSegment(), lambda x: x.count)
139 | 
140 | 
141 | class LocalEntry(Structure):
142 |     count = VarUInt32Field()
143 |     type = ValueTypeField()
144 | 
145 | 
146 | class FunctionBody(Structure):
147 |     body_size = VarUInt32Field()
148 |     local_count = VarUInt32Field()
149 |     locals = RepeatField(
150 |         LocalEntry(),
151 |         lambda x: x.local_count,
152 |     )
153 |     code = BytesField(
154 |         lambda x: (
155 |             x.body_size -
156 |             x.get_decoder_meta()['lengths']['local_count'] -
157 |             x.get_decoder_meta()['lengths']['locals']
158 |         )
159 |     )
160 | 
161 | 
162 | class CodeSection(Structure):
163 |     count = VarUInt32Field()
164 |     bodies = RepeatField(FunctionBody(), lambda x: x.count)
165 | 
166 | 
167 | class DataSegment(Structure):
168 |     index = VarUInt32Field()
169 |     offset = InitExpr()
170 |     size = VarUInt32Field()
171 |     data = BytesField(lambda x: x.size)
172 | 
173 | 
174 | class DataSection(Structure):
175 |     count = VarUInt32Field()
176 |     entries = RepeatField(DataSegment(), lambda x: x.count)
177 | 
178 | class DataCountSection(Structure):
179 |     count = VarUInt32Field()
180 | 
181 | class Naming(Structure):
182 |     index = VarUInt32Field()
183 |     name_len = VarUInt32Field()
184 |     name_str = BytesField(lambda x: x.name_len, is_str=True)
185 | 
186 | 
187 | class NameMap(Structure):
188 |     count = VarUInt32Field()
189 |     names = RepeatField(Naming(), lambda x: x.count)
190 | 
191 | 
192 | class LocalNames(Structure):
193 |     index = VarUInt32Field()
194 |     local_map = NameMap()
195 | 
196 | 
197 | class LocalNameMap(Structure):
198 |     count = VarUInt32Field()
199 |     funcs = RepeatField(LocalNames, lambda x: x.count)
200 | 
201 | 
202 | class NameSubSection(Structure):
203 |     name_type = VarUInt7Field()
204 |     payload_len = VarUInt32Field()
205 |     payload = ChoiceField({
206 |         NAME_SUBSEC_FUNCTION: NameMap(),
207 |         NAME_SUBSEC_LOCAL: LocalNameMap(),
208 |     }, lambda x: x.name_type)
209 | 
210 | 
211 | class Section(Structure):
212 |     id = VarUInt7Field()
213 |     payload_len = VarUInt32Field()
214 |     name_len = CondField(
215 |         VarUInt32Field(),
216 |         lambda x: x.id == 0,
217 |     )
218 |     name = CondField(
219 |         BytesField(lambda x: x.name_len, is_str=True),
220 |         lambda x: x.id == 0,
221 |     )
222 | 
223 |     payload = ChoiceField({
224 |         SEC_UNK: BytesField(lambda x: (
225 |             x.payload_len -
226 |             x.get_decoder_meta()['lengths']['name'] -
227 |             x.get_decoder_meta()['lengths']['name_len']
228 |         )),
229 |         SEC_TYPE: TypeSection(),
230 |         SEC_IMPORT: ImportSection(),
231 |         SEC_FUNCTION: FunctionSection(),
232 |         SEC_TABLE: TableSection(),
233 |         SEC_MEMORY: MemorySection(),
234 |         SEC_GLOBAL: GlobalSection(),
235 |         SEC_EXPORT: ExportSection(),
236 |         SEC_START: StartSection(),
237 |         SEC_ELEMENT: ElementSection(),
238 |         SEC_CODE: CodeSection(),
239 |         SEC_DATA: DataSection(),
240 |         SEC_DATACOUNT: DataCountSection(),
241 |     }, lambda x: x.id)
242 | 
243 |     overhang = BytesField(lambda x: max(0, (
244 |         x.payload_len -
245 |         x.get_decoder_meta()['lengths']['name'] -
246 |         x.get_decoder_meta()['lengths']['name_len'] -
247 |         x.get_decoder_meta()['lengths']['payload']
248 |     )))
249 | 


--------------------------------------------------------------------------------
/wasm/opcodes.py:
--------------------------------------------------------------------------------
  1 | """Defines mappings of opcodes to their info structures."""
  2 | from __future__ import print_function, absolute_import, division, unicode_literals
  3 | 
  4 | from collections import namedtuple
  5 | from .immtypes import *
  6 | 
  7 | 
  8 | Opcode = namedtuple('Opcode', 'id mnemonic imm_struct flags')
  9 | 
 10 | # Flags describing generic characteristics of instructions
 11 | INSN_ENTER_BLOCK = 1 << 0
 12 | INSN_LEAVE_BLOCK = 1 << 1
 13 | INSN_BRANCH = 1 << 2
 14 | INSN_NO_FLOW = 1 << 3  # does not pass control to next insn
 15 | 
 16 | 
 17 | OPCODES = [
 18 |     Opcode(0x00, 'unreachable',           None,                     INSN_NO_FLOW),
 19 |     Opcode(0x01, 'nop',                   None,                     0),
 20 |     Opcode(0x02, 'block',                 BlockImm(),               INSN_ENTER_BLOCK),
 21 |     Opcode(0x03, 'loop',                  BlockImm(),               INSN_ENTER_BLOCK),
 22 |     Opcode(0x04, 'if',                    BlockImm(),               INSN_ENTER_BLOCK),
 23 |     Opcode(0x05, 'else',                  None,                     INSN_ENTER_BLOCK | INSN_LEAVE_BLOCK),
 24 |     Opcode(0x0b, 'end',                   None,                     INSN_LEAVE_BLOCK),
 25 |     Opcode(0x0c, 'br',                    BranchImm(),              INSN_BRANCH),
 26 |     Opcode(0x0d, 'br_if',                 BranchImm(),              INSN_BRANCH),
 27 |     Opcode(0x0e, 'br_table',              BranchTableImm(),         INSN_BRANCH),
 28 |     Opcode(0x0f, 'return',                None,                     INSN_NO_FLOW),
 29 | 
 30 |     Opcode(0x10, 'call',                  CallImm(),                INSN_BRANCH),
 31 |     Opcode(0x11, 'call_indirect',         CallIndirectImm(),        INSN_BRANCH),
 32 | 
 33 |     Opcode(0x1a, 'drop',                  None,                     0),
 34 |     Opcode(0x1b, 'select',                None,                     0),
 35 | 
 36 |     Opcode(0x20, 'get_local',             LocalVarXsImm(),          0),
 37 |     Opcode(0x21, 'set_local',             LocalVarXsImm(),          0),
 38 |     Opcode(0x22, 'tee_local',             LocalVarXsImm(),          0),
 39 |     Opcode(0x23, 'get_global',            GlobalVarXsImm(),         0),
 40 |     Opcode(0x24, 'set_global',            GlobalVarXsImm(),         0),
 41 | 
 42 |     Opcode(0x28, 'i32.load',              MemoryImm(),              0),
 43 |     Opcode(0x29, 'i64.load',              MemoryImm(),              0),
 44 |     Opcode(0x2a, 'f32.load',              MemoryImm(),              0),
 45 |     Opcode(0x2b, 'f64.load',              MemoryImm(),              0),
 46 |     Opcode(0x2c, 'i32.load8_s',           MemoryImm(),              0),
 47 |     Opcode(0x2d, 'i32.load8_u',           MemoryImm(),              0),
 48 |     Opcode(0x2e, 'i32.load16_s',          MemoryImm(),              0),
 49 |     Opcode(0x2f, 'i32.load16_u',          MemoryImm(),              0),
 50 |     Opcode(0x30, 'i64.load8_s',           MemoryImm(),              0),
 51 |     Opcode(0x31, 'i64.load8_u',           MemoryImm(),              0),
 52 |     Opcode(0x32, 'i64.load16_s',          MemoryImm(),              0),
 53 |     Opcode(0x33, 'i64.load16_u',          MemoryImm(),              0),
 54 |     Opcode(0x34, 'i64.load32_s',          MemoryImm(),              0),
 55 |     Opcode(0x35, 'i64.load32_u',          MemoryImm(),              0),
 56 |     Opcode(0x36, 'i32.store',             MemoryImm(),              0),
 57 |     Opcode(0x37, 'i64.store',             MemoryImm(),              0),
 58 |     Opcode(0x38, 'f32.store',             MemoryImm(),              0),
 59 |     Opcode(0x39, 'f64.store',             MemoryImm(),              0),
 60 |     Opcode(0x3a, 'i32.store8',            MemoryImm(),              0),
 61 |     Opcode(0x3b, 'i32.store16',           MemoryImm(),              0),
 62 |     Opcode(0x3c, 'i64.store8',            MemoryImm(),              0),
 63 |     Opcode(0x3d, 'i64.store16',           MemoryImm(),              0),
 64 |     Opcode(0x3e, 'i64.store32',           MemoryImm(),              0),
 65 |     Opcode(0x3f, 'current_memory',        CurGrowMemImm(),          0),
 66 |     Opcode(0x40, 'grow_memory',           CurGrowMemImm(),          0),
 67 | 
 68 |     Opcode(0x41, 'i32.const',             I32ConstImm(),            0),
 69 |     Opcode(0x42, 'i64.const',             I64ConstImm(),            0),
 70 |     Opcode(0x43, 'f32.const',             F32ConstImm(),            0),
 71 |     Opcode(0x44, 'f64.const',             F64ConstImm(),            0),
 72 | 
 73 |     Opcode(0x45, 'i32.eqz',               None,                     0),
 74 |     Opcode(0x46, 'i32.eq',                None,                     0),
 75 |     Opcode(0x47, 'i32.ne',                None,                     0),
 76 |     Opcode(0x48, 'i32.lt_s',              None,                     0),
 77 |     Opcode(0x49, 'i32.lt_u',              None,                     0),
 78 |     Opcode(0x4a, 'i32.gt_s',              None,                     0),
 79 |     Opcode(0x4b, 'i32.gt_u',              None,                     0),
 80 |     Opcode(0x4c, 'i32.le_s',              None,                     0),
 81 |     Opcode(0x4d, 'i32.le_u',              None,                     0),
 82 |     Opcode(0x4e, 'i32.ge_s',              None,                     0),
 83 |     Opcode(0x4f, 'i32.ge_u',              None,                     0),
 84 |     Opcode(0x50, 'i64.eqz',               None,                     0),
 85 |     Opcode(0x51, 'i64.eq',                None,                     0),
 86 |     Opcode(0x52, 'i64.ne',                None,                     0),
 87 |     Opcode(0x53, 'i64.lt_s',              None,                     0),
 88 |     Opcode(0x54, 'i64.lt_u',              None,                     0),
 89 |     Opcode(0x55, 'i64.gt_s',              None,                     0),
 90 |     Opcode(0x56, 'i64.gt_u',              None,                     0),
 91 |     Opcode(0x57, 'i64.le_s',              None,                     0),
 92 |     Opcode(0x58, 'i64.le_u',              None,                     0),
 93 |     Opcode(0x59, 'i64.ge_s',              None,                     0),
 94 |     Opcode(0x5a, 'i64.ge_u',              None,                     0),
 95 |     Opcode(0x5b, 'f32.eq',                None,                     0),
 96 |     Opcode(0x5c, 'f32.ne',                None,                     0),
 97 |     Opcode(0x5d, 'f32.lt',                None,                     0),
 98 |     Opcode(0x5e, 'f32.gt',                None,                     0),
 99 |     Opcode(0x5f, 'f32.le',                None,                     0),
100 |     Opcode(0x60, 'f32.ge',                None,                     0),
101 |     Opcode(0x61, 'f64.eq',                None,                     0),
102 |     Opcode(0x62, 'f64.ne',                None,                     0),
103 |     Opcode(0x63, 'f64.lt',                None,                     0),
104 |     Opcode(0x64, 'f64.gt',                None,                     0),
105 |     Opcode(0x65, 'f64.le',                None,                     0),
106 |     Opcode(0x66, 'f64.ge',                None,                     0),
107 | 
108 |     Opcode(0x67, 'i32.clz',               None,                     0),
109 |     Opcode(0x68, 'i32.ctz',               None,                     0),
110 |     Opcode(0x69, 'i32.popcnt',            None,                     0),
111 |     Opcode(0x6a, 'i32.add',               None,                     0),
112 |     Opcode(0x6b, 'i32.sub',               None,                     0),
113 |     Opcode(0x6c, 'i32.mul',               None,                     0),
114 |     Opcode(0x6d, 'i32.div_s',             None,                     0),
115 |     Opcode(0x6e, 'i32.div_u',             None,                     0),
116 |     Opcode(0x6f, 'i32.rem_s',             None,                     0),
117 |     Opcode(0x70, 'i32.rem_u',             None,                     0),
118 |     Opcode(0x71, 'i32.and',               None,                     0),
119 |     Opcode(0x72, 'i32.or',                None,                     0),
120 |     Opcode(0x73, 'i32.xor',               None,                     0),
121 |     Opcode(0x74, 'i32.shl',               None,                     0),
122 |     Opcode(0x75, 'i32.shr_s',             None,                     0),
123 |     Opcode(0x76, 'i32.shr_u',             None,                     0),
124 |     Opcode(0x77, 'i32.rotl',              None,                     0),
125 |     Opcode(0x78, 'i32.rotr',              None,                     0),
126 |     Opcode(0x79, 'i64.clz',               None,                     0),
127 |     Opcode(0x7a, 'i64.ctz',               None,                     0),
128 |     Opcode(0x7b, 'i64.popcnt',            None,                     0),
129 |     Opcode(0x7c, 'i64.add',               None,                     0),
130 |     Opcode(0x7d, 'i64.sub',               None,                     0),
131 |     Opcode(0x7e, 'i64.mul',               None,                     0),
132 |     Opcode(0x7f, 'i64.div_s',             None,                     0),
133 |     Opcode(0x80, 'i64.div_u',             None,                     0),
134 |     Opcode(0x81, 'i64.rem_s',             None,                     0),
135 |     Opcode(0x82, 'i64.rem_u',             None,                     0),
136 |     Opcode(0x83, 'i64.and',               None,                     0),
137 |     Opcode(0x84, 'i64.or',                None,                     0),
138 |     Opcode(0x85, 'i64.xor',               None,                     0),
139 |     Opcode(0x86, 'i64.shl',               None,                     0),
140 |     Opcode(0x87, 'i64.shr_s',             None,                     0),
141 |     Opcode(0x88, 'i64.shr_u',             None,                     0),
142 |     Opcode(0x89, 'i64.rotl',              None,                     0),
143 |     Opcode(0x8a, 'i64.rotr',              None,                     0),
144 |     Opcode(0x8b, 'f32.abs',               None,                     0),
145 |     Opcode(0x8c, 'f32.neg',               None,                     0),
146 |     Opcode(0x8d, 'f32.ceil',              None,                     0),
147 |     Opcode(0x8e, 'f32.floor',             None,                     0),
148 |     Opcode(0x8f, 'f32.trunc',             None,                     0),
149 |     Opcode(0x90, 'f32.nearest',           None,                     0),
150 |     Opcode(0x91, 'f32.sqrt',              None,                     0),
151 |     Opcode(0x92, 'f32.add',               None,                     0),
152 |     Opcode(0x93, 'f32.sub',               None,                     0),
153 |     Opcode(0x94, 'f32.mul',               None,                     0),
154 |     Opcode(0x95, 'f32.div',               None,                     0),
155 |     Opcode(0x96, 'f32.min',               None,                     0),
156 |     Opcode(0x97, 'f32.max',               None,                     0),
157 |     Opcode(0x98, 'f32.copysign',          None,                     0),
158 |     Opcode(0x99, 'f64.abs',               None,                     0),
159 |     Opcode(0x9a, 'f64.neg',               None,                     0),
160 |     Opcode(0x9b, 'f64.ceil',              None,                     0),
161 |     Opcode(0x9c, 'f64.floor',             None,                     0),
162 |     Opcode(0x9d, 'f64.trunc',             None,                     0),
163 |     Opcode(0x9e, 'f64.nearest',           None,                     0),
164 |     Opcode(0x9f, 'f64.sqrt',              None,                     0),
165 |     Opcode(0xa0, 'f64.add',               None,                     0),
166 |     Opcode(0xa1, 'f64.sub',               None,                     0),
167 |     Opcode(0xa2, 'f64.mul',               None,                     0),
168 |     Opcode(0xa3, 'f64.div',               None,                     0),
169 |     Opcode(0xa4, 'f64.min',               None,                     0),
170 |     Opcode(0xa5, 'f64.max',               None,                     0),
171 |     Opcode(0xa6, 'f64.copysign',          None,                     0),
172 | 
173 |     Opcode(0xa7, 'i32.wrap/i64',          None,                     0),
174 |     Opcode(0xa8, 'i32.trunc_s/f32',       None,                     0),
175 |     Opcode(0xa9, 'i32.trunc_u/f32',       None,                     0),
176 |     Opcode(0xaa, 'i32.trunc_s/f64',       None,                     0),
177 |     Opcode(0xab, 'i32.trunc_u/f64',       None,                     0),
178 |     Opcode(0xac, 'i64.extend_s/i32',      None,                     0),
179 |     Opcode(0xad, 'i64.extend_u/i32',      None,                     0),
180 |     Opcode(0xae, 'i64.trunc_s/f32',       None,                     0),
181 |     Opcode(0xaf, 'i64.trunc_u/f32',       None,                     0),
182 |     Opcode(0xb0, 'i64.trunc_s/f64',       None,                     0),
183 |     Opcode(0xb1, 'i64.trunc_u/f64',       None,                     0),
184 |     Opcode(0xb2, 'f32.convert_s/i32',     None,                     0),
185 |     Opcode(0xb3, 'f32.convert_u/i32',     None,                     0),
186 |     Opcode(0xb4, 'f32.convert_s/i64',     None,                     0),
187 |     Opcode(0xb5, 'f32.convert_u/i64',     None,                     0),
188 |     Opcode(0xb6, 'f32.demote/f64',        None,                     0),
189 |     Opcode(0xb7, 'f64.convert_s/i32',     None,                     0),
190 |     Opcode(0xb8, 'f64.convert_u/i32',     None,                     0),
191 |     Opcode(0xb9, 'f64.convert_s/i64',     None,                     0),
192 |     Opcode(0xba, 'f64.convert_u/i64',     None,                     0),
193 |     Opcode(0xbb, 'f64.promote/f32',       None,                     0),
194 | 
195 |     Opcode(0xbc, 'i32.reinterpret/f32',   None,                     0),
196 |     Opcode(0xbd, 'i64.reinterpret/f64',   None,                     0),
197 |     Opcode(0xbe, 'f32.reinterpret/i32',   None,                     0),
198 |     Opcode(0xbf, 'f64.reinterpret/i64',   None,                     0),
199 |     
200 |     Opcode(0xc0, 'i32.extend_s/i8',       None,                     0),
201 | 
202 |     Opcode(0xfc0a, 'memory.copy',         None,                     0),
203 |     Opcode(0xfc0b, 'memory.fill',         None,                     0),
204 | ]
205 | 
206 | OPCODE_MAP = {x.id: x for x in OPCODES}
207 | 
208 | # Generate integer constants for opcodes.
209 | for cur_op in OPCODES:
210 |     globals()[
211 |         'OP_' + cur_op.mnemonic.upper().replace('.', '_').replace('/', '_')
212 |     ] = cur_op.id
213 | 


--------------------------------------------------------------------------------
/wasm/types.py:
--------------------------------------------------------------------------------
  1 | """Defines a simple, generic data (de)serialization mechanism."""
  2 | from __future__ import print_function, absolute_import, division, unicode_literals
  3 | 
  4 | from .compat import add_metaclass, byte2int, indent, deprecated_func
  5 | import collections
  6 | import logging
  7 | import struct as pystruct
  8 | 
  9 | try:
 10 |     from collections import Callable
 11 | except ImportError:
 12 |     # for Python 3.10+
 13 |     from collections.abc import Callable
 14 | 
 15 | logger = logging.getLogger()
 16 | 
 17 | 
 18 | class WasmField(object):
 19 |     """
 20 |     Abstract base class for all fields.
 21 | 
 22 |     Fields are purely a (de)serialization mechanism. They don't hold the value
 23 |     of decoded information, but take Python data-types and convert them
 24 |     to a raw byte format or vice versa. Thus, a field instance can be reused
 25 |     to de/encode multiple values.
 26 | 
 27 |     Besides the abstract interface, implements type counting and IDing to allow
 28 |     field order detection in Python 2, where `__prepare__` doesn't exist yet.
 29 |     In order to work correctly, field instances MUST NOT be shared between
 30 |     multiple structures using it but have to be instantiated per structure.
 31 |     """
 32 |     _type_ctr = 0
 33 | 
 34 |     def __init__(self):
 35 |         self._type_id = WasmField._type_ctr
 36 |         WasmField._type_ctr += 1
 37 | 
 38 |     def from_raw(self, struct, raw):
 39 |         raise NotImplementedError()
 40 | 
 41 |     def to_string(self, value):
 42 |         return repr(value)
 43 | 
 44 | 
 45 | class UIntNField(WasmField):
 46 |     """Field handling an unsigned LE int of fixed size."""
 47 |     CONVERTER_MAP = {
 48 |         8: pystruct.Struct('<B'),
 49 |         16: pystruct.Struct('<H'),
 50 |         32: pystruct.Struct('<I'),
 51 |         64: pystruct.Struct('<Q'),
 52 |     }
 53 | 
 54 |     def __init__(self, n, **kwargs):
 55 |         super(UIntNField, self).__init__(**kwargs)
 56 |         self.n = n
 57 |         self.byte_size = n // 8
 58 |         self.converter = self.CONVERTER_MAP[n]
 59 | 
 60 |     def from_raw(self, ctx, raw):
 61 |         return self.byte_size, self.converter.unpack(raw[:self.byte_size])[0], self
 62 | 
 63 |     def to_string(self, value):
 64 |         return hex(byte2int(value) if self.n == 8 else value)
 65 | 
 66 | 
 67 | class UnsignedLeb128Field(WasmField):
 68 |     """
 69 |     Field handling unsigned LEB128 values.
 70 |     https://en.wikipedia.org/wiki/LEB128
 71 |     """
 72 |     def from_raw(self, ctx, raw):
 73 |         offs = 0
 74 |         val = 0
 75 | 
 76 |         while True:
 77 |             segment = byte2int(raw[offs])
 78 |             val |= (segment & 0x7F) << (offs * 7)
 79 |             offs += 1
 80 |             if not (segment & 0x80):
 81 |                 break
 82 | 
 83 |         return offs, val, self
 84 | 
 85 |     def to_string(self, value):
 86 |         return hex(value) if value > 1000 else str(value)
 87 | 
 88 | 
 89 | class SignedLeb128Field(WasmField):
 90 |     """
 91 |     Field handling signed LEB128 values.
 92 |     https://en.wikipedia.org/wiki/LEB128
 93 |     """
 94 |     def from_raw(self, ctx, raw):
 95 |         offs = 0
 96 |         val = 0
 97 |         bits = 0
 98 | 
 99 |         while True:
100 |             segment = byte2int(raw[offs])
101 |             val |= (segment & 0x7F) << bits
102 |             offs += 1
103 |             bits += 7
104 |             if not (segment & 0x80):
105 |                 break
106 | 
107 |         if val & (1 << (bits - 1)):
108 |             val -= 1 << bits
109 | 
110 |         return offs, val, self
111 | 
112 | 
113 | class CondField(WasmField):
114 |     """Optionalizes a field, depending on the context."""
115 |     def __init__(self, field, condition, **kwargs):
116 |         super(CondField, self).__init__(**kwargs)
117 |         self.field = field
118 |         self.condition = condition
119 | 
120 |     def from_raw(self, ctx, raw):
121 |         if self.condition(ctx):
122 |             return self.field.from_raw(ctx, raw)
123 |         return 0, None, self
124 | 
125 |     def to_string(self, value):
126 |         return 'None' if value is None else self.field.to_string(value)
127 | 
128 | 
129 | class RepeatField(WasmField):
130 |     """Repeats a field, having the repeat count depend on the context."""
131 |     def __init__(self, field, repeat_count_getter, **kwargs):
132 |         super(RepeatField, self).__init__(**kwargs)
133 |         self.field = field
134 |         self.repeat_count_getter = repeat_count_getter
135 | 
136 |     def from_raw(self, ctx, raw):
137 |         repeat_count = self.repeat_count_getter(ctx)
138 | 
139 |         # Avoiding complex processing for byte arrays.
140 |         if type(self.field) == UIntNField and self.field.n == 8:
141 |             return repeat_count, raw[:repeat_count], self
142 | 
143 |         # For more complex types, invoke the field for parsing the
144 |         # individual fields.
145 |         offs = 0
146 |         items = []
147 |         for i in range(repeat_count):
148 |             length, item, element_type = self.field.from_raw(ctx, raw[offs:])
149 |             offs += length
150 |             items.append(item)
151 | 
152 |         return offs, items, self
153 | 
154 |     def to_string(self, value):
155 |         if value is None:
156 |             return 'None'
157 |         if len(value) > 100:
158 |             return '<too long>'
159 |         if len(value) == 0:
160 |             return '[]'
161 |         if isinstance(value[0], StructureData):
162 |             return '\n' + indent(
163 |                 '\n'.join(self.field.to_string(x) for x in value),
164 |                 '  '
165 |             )
166 |         else:
167 |             return '[' + ', '.join(self.field.to_string(x) for x in value) + ']'
168 | 
169 | 
170 | class ConstField(WasmField):
171 |     """Pseudo-Field, always returning a constant, consuming/generating no data."""
172 |     def __init__(self, const, **kwargs):
173 |         super(ConstField, self).__init__(**kwargs)
174 |         self.const = const
175 | 
176 |     def from_raw(self, ctx, raw):
177 |         return 0, self.const, self
178 | 
179 | 
180 | class ChoiceField(WasmField):
181 |     """Depending on context, either represent this or that field type."""
182 |     _shared_none_field = ConstField(None)
183 | 
184 |     def __init__(self, choice_field_map, choice_getter, **kwargs):
185 |         super(ChoiceField, self).__init__(**kwargs)
186 |         self.choice_field_map = choice_field_map
187 |         self.choice_getter = choice_getter
188 | 
189 |     def from_raw(self, ctx, raw):
190 |         choice = self.choice_getter(ctx)
191 |         if choice is None:
192 |             return 0, None, self._shared_none_field
193 |         return self.choice_field_map[choice].from_raw(ctx, raw)
194 | 
195 | 
196 | class BytesField(RepeatField):
197 |     """Shorthand for U8 `RepeatField`, adding string support."""
198 |     def __init__(self, length_getter, is_str=False):
199 |         super(BytesField, self).__init__(UIntNField(8), length_getter)
200 |         self.is_str = is_str
201 | 
202 |     def to_string(self, value):
203 |         if not self.is_str:
204 |             return super(BytesField, self).to_string(value)
205 | 
206 |         try:
207 |             return '"' + bytearray(value).decode('utf8') + '"'
208 |         except UnicodeDecodeError:
209 |             return '<bad utf8>'
210 | 
211 | 
212 | FieldMeta = collections.namedtuple('FieldMeta', 'name field')
213 | 
214 | 
215 | class MetaInfo(object):
216 |     """Meta information for a `Structure`."""
217 |     def __init__(self):
218 |         self.fields = []
219 |         self.data_class = None
220 |         self.structure = None
221 | 
222 | 
223 | class StructureData(object):
224 |     """Base class for generated structure data classes."""
225 |     __slots__ = ('_meta', '_decoder_meta')
226 | 
227 |     def __init__(self, for_decoding=False):
228 |         self._decoder_meta = {'lengths': {}, 'types': {}} if for_decoding else None
229 |         for cur_field_name, cur_field in self._meta.fields:
230 |             setattr(self, cur_field_name, None)
231 | 
232 |     def get_meta(self):
233 |         """
234 |         Obtains meta info for this object. The object returned is shared
235 |         between all objects of the same structure type.
236 | 
237 |         A getter is utilized here instead of a property to allow strict
238 |         distinction of meta info from regular fields.
239 |         """
240 |         return self._meta
241 | 
242 |     def get_decoder_meta(self):
243 |         """
244 |         Obtains meta info from the decoder, like byte length in raw format.
245 |         For objects not created through decoding, `None` is returned.
246 |         """
247 |         return self._decoder_meta
248 | 
249 |     @property
250 |     @deprecated_func
251 |     def _data_meta(self):
252 |         """Property emulating old name of `_decoder_meta`. Deprecated, do not use."""
253 |         return self._decoder_meta
254 | 
255 | 
256 | class StructureMeta(type):
257 |     """
258 |     Metaclass used to create `Structure` classes,
259 |     populating their `_meta` field and performing sanity checks.
260 |     """
261 |     def __new__(mcs, name, bases, cls_dict):
262 |         # Inject _meta.
263 |         meta = cls_dict['_meta'] = MetaInfo()
264 | 
265 |         # Iterate over fields, move relevant data to meta.
266 |         for cur_field_name, cur_field in list(cls_dict.items()):
267 |             # Is callable, property, private or magic? We don't touch those.
268 |             if (
269 |                 isinstance(cur_field, Callable) or
270 |                 isinstance(cur_field, property) or
271 |                 cur_field_name.startswith('_')
272 |             ):
273 |                 pass
274 | 
275 |             # Is one of our types? Metafy.
276 |             elif isinstance(cur_field, WasmField):
277 |                 meta.fields.append(FieldMeta(cur_field_name, cur_field))
278 | 
279 |             # Unknown type, print warning.
280 |             else:
281 |                 logger.warning(
282 |                     'Non-WasmField typed field "{}" found on type "{}". '
283 |                     'Ignoring.'.format(cur_field_name, name)
284 |                 )
285 | 
286 |         # Order fields by type ID (see `WasmField` for the "why").
287 |         meta.fields = sorted(meta.fields, key=lambda x: x.field._type_id)
288 | 
289 |         # Create data class type for "instances".
290 |         class GeneratedStructureData(StructureData):
291 |             __slots__ = [x for x, _ in meta.fields]
292 |             _meta = meta
293 |         meta.data_class = GeneratedStructureData
294 | 
295 |         # Create class, saving type ref in meta.
296 |         meta.structure = type.__new__(mcs, name, bases, cls_dict)
297 |         return meta.structure
298 | 
299 | 
300 | @add_metaclass(StructureMeta)
301 | class Structure(WasmField):
302 |     """Represents a collection of named fields."""
303 |     def from_raw(self, ctx, raw):
304 |         offs = 0
305 |         data = self._meta.data_class(for_decoding=True)
306 |         for cur_field_name, cur_field in self._meta.fields:
307 |             data_len, val, data_type = cur_field.from_raw(data, raw[offs:])
308 |             setattr(data, cur_field_name, val)
309 |             decoder_meta = data.get_decoder_meta()
310 |             decoder_meta['lengths'][cur_field_name] = data_len
311 |             decoder_meta['types'][cur_field_name] = data_type
312 |             offs += data_len
313 |         return offs, data, self
314 | 
315 |     def to_string(self, value):
316 |         lines = ['- [ {}'.format(self.__class__.__name__)]
317 |         for cur_field_name, cur_field in self._meta.fields:
318 |             field_val = getattr(value, cur_field_name)
319 |             field_type = value.get_decoder_meta()['types'][cur_field_name]
320 |             if isinstance(field_val, StructureData):
321 |                 lines.append('  | {} =\n{}'.format(
322 |                     cur_field_name,
323 |                     indent(field_type.to_string(field_val), '  ')
324 |                 ))
325 |             else:
326 |                 lines.append('  | {} = {}'.format(
327 |                     cur_field_name,
328 |                     field_type.to_string(field_val)
329 |                 ))
330 | 
331 |         return '\n'.join(lines)
332 | 


--------------------------------------------------------------------------------
/wasm/wasmtypes.py:
--------------------------------------------------------------------------------
 1 | """Defines types used for both modules and bytecode."""
 2 | from __future__ import print_function, absolute_import, division, unicode_literals
 3 | 
 4 | from .types import UIntNField, UnsignedLeb128Field, SignedLeb128Field
 5 | 
 6 | 
 7 | def _make_shortcut(klass, *args, **kwargs):
 8 |     def proxy(**kwargs2):
 9 |         kwargs.update(kwargs2)
10 |         return klass(*args, **kwargs)
11 |     return proxy
12 | 
13 | 
14 | UInt8Field = _make_shortcut(UIntNField, 8)
15 | UInt16Field = _make_shortcut(UIntNField, 16)
16 | UInt32Field = _make_shortcut(UIntNField, 32)
17 | UInt64Field = _make_shortcut(UIntNField, 64)
18 | 
19 | VarUInt1Field = _make_shortcut(UnsignedLeb128Field)
20 | VarUInt7Field = _make_shortcut(UnsignedLeb128Field)
21 | VarUInt32Field = _make_shortcut(UnsignedLeb128Field)
22 | 
23 | VarInt7Field = _make_shortcut(SignedLeb128Field)
24 | VarInt32Field = _make_shortcut(SignedLeb128Field)
25 | VarInt64Field = _make_shortcut(SignedLeb128Field)
26 | 
27 | ElementTypeField = VarInt7Field
28 | ValueTypeField = VarInt7Field
29 | ExternalKindField = UInt8Field
30 | BlockTypeField = VarInt7Field
31 | 
32 | 
33 | #
34 | # Constants
35 | #
36 | 
37 | 
38 | # Section types.
39 | SEC_UNK = 0
40 | SEC_TYPE = 1
41 | SEC_IMPORT = 2
42 | SEC_FUNCTION = 3
43 | SEC_TABLE = 4
44 | SEC_MEMORY = 5
45 | SEC_GLOBAL = 6
46 | SEC_EXPORT = 7
47 | SEC_START = 8
48 | SEC_ELEMENT = 9
49 | SEC_CODE = 10
50 | SEC_DATA = 11
51 | SEC_DATACOUNT = 12
52 | SEC_NAME = b'name'
53 | 
54 | # Language types.
55 | LANG_TYPE_I32 = -0x01
56 | LANG_TYPE_I64 = -0x02
57 | LANG_TYPE_F32 = -0x03
58 | LANG_TYPE_F64 = -0x04
59 | LANG_TYPE_ANYFUNC = -0x10
60 | LANG_TYPE_FUNC = -0x20
61 | LANG_TYPE_EMPTY = -0x40
62 | 
63 | # Value types.
64 | VAL_TYPE_I32 = LANG_TYPE_I32
65 | VAL_TYPE_I64 = LANG_TYPE_I64
66 | VAL_TYPE_F32 = LANG_TYPE_F32
67 | VAL_TYPE_F64 = LANG_TYPE_F64
68 | 
69 | # Name subsection types.
70 | NAME_SUBSEC_FUNCTION = 1
71 | NAME_SUBSEC_LOCAL = 2
72 | 
73 | # Mutability in global types.
74 | IMMUTABLE = 0
75 | MUTABLE = 1


--------------------------------------------------------------------------------