├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── codeql.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── SymGX.py ├── benchmarks ├── CE.wasm ├── bi2.wasm ├── bi2.wat ├── dnet.wasm ├── dnet.wat ├── isdl.wasm ├── isdl.wat ├── kmeans.wasm ├── kmeans.wat ├── pw.wasm ├── pw.wat ├── reencrypt.wasm ├── rsa.wasm ├── rsa.wat ├── sgxcrypto.wasm ├── sgxcrypto.wat ├── sl.wasm ├── sl.wat ├── spf.wasm ├── spf.wat ├── sse.wasm ├── sse.wat ├── ve.wasm ├── ve.wat ├── wallet.wasm └── wallet.wat ├── clean.sh ├── eunomia ├── __init__.py ├── __init__.pyc ├── analysis │ ├── __init__.py │ ├── __init__.pyc │ ├── cfg.py │ └── graph.py ├── arch │ ├── __init__.py │ ├── __init__.pyc │ └── wasm │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── analyzer.py │ │ ├── cfg.py │ │ ├── configuration.py │ │ ├── constant.py │ │ ├── decode.py │ │ ├── disassembler.py │ │ ├── dwarfParser.py │ │ ├── emulator.py │ │ ├── exceptions.py │ │ ├── format.py │ │ ├── instruction.py │ │ ├── instructions │ │ ├── ArithmeticInstructions.py │ │ ├── BitwiseInstructions.py │ │ ├── ConstantInstructions.py │ │ ├── ControlInstructions.py │ │ ├── ConversionInstructions.py │ │ ├── LogicalInstructions.py │ │ ├── MemoryInstructions.py │ │ ├── ParametricInstructions.py │ │ ├── VariableInstructions.py │ │ └── __init__.py │ │ ├── lib │ │ ├── Import.py │ │ ├── c_lib.py │ │ ├── go_lib.py │ │ ├── sgx.py │ │ ├── utils.py │ │ └── wasi.py │ │ ├── memanalyzer.py │ │ ├── memory.py │ │ ├── modules │ │ ├── BufferOverflowLaser.py │ │ ├── DivZeroLaser.py │ │ ├── OverflowLaser.py │ │ └── __init__.py │ │ ├── mythread.py │ │ ├── pathgraph.py │ │ ├── shadow.py │ │ ├── solver.py │ │ ├── utils.py │ │ ├── visualizator.py │ │ ├── vmstate.py │ │ └── wasm.py ├── core │ ├── __init__.py │ ├── __init__.pyc │ ├── basicblock.py │ ├── edge.py │ ├── function.py │ ├── instruction.py │ └── utils.py └── engine │ ├── __init__.py │ ├── __init__.pyc │ ├── disassembler.py │ ├── emulator.py │ └── engine.py ├── main.py ├── output ├── log │ └── .placeholder └── result │ └── .placeholder ├── pic ├── 104848503.jfif └── logo.png ├── requirements.txt ├── run.sh ├── seewasm ├── __init__.py ├── analysis │ ├── __init__.py │ ├── __init__.pyc │ └── cfg.py ├── arch │ ├── __init__.py │ └── wasm │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── analyzer.py │ │ ├── cfg.py │ │ ├── configuration.py │ │ ├── constant.py │ │ ├── decode.py │ │ ├── disassembler.py │ │ ├── dwarfParser.py │ │ ├── emulator.py │ │ ├── exceptions.py │ │ ├── format.py │ │ ├── graph.py │ │ ├── instruction.py │ │ ├── instructions │ │ ├── ArithmeticInstructions.py │ │ ├── BitwiseInstructions.py │ │ ├── ConstantInstructions.py │ │ ├── ControlInstructions.py │ │ ├── ConversionInstructions.py │ │ ├── LogicalInstructions.py │ │ ├── MemoryInstructions.py │ │ ├── ParametricInstructions.py │ │ ├── VariableInstructions.py │ │ └── __init__.py │ │ ├── lib │ │ ├── utils.py │ │ └── wasi.py │ │ ├── memory.py │ │ ├── solver.py │ │ ├── utils.py │ │ ├── visualizator.py │ │ ├── vmstate.py │ │ └── wasm.py ├── core │ ├── __init__.py │ ├── __init__.pyc │ ├── basicblock.py │ ├── edge.py │ ├── function.py │ ├── instruction.py │ └── utils.py └── engine │ ├── __init__.py │ ├── __init__.pyc │ ├── disassembler.py │ ├── emulator.py │ └── engine.py ├── test.py ├── test ├── c │ └── src │ │ ├── hello.c │ │ └── sym.c ├── go │ └── src │ │ └── hello.go ├── hello_world.wasm ├── hello_world_go.wasm ├── hello_world_rust.wasm ├── password.wasm ├── rust │ └── hello │ │ ├── Cargo.toml │ │ └── src │ │ └── main.rs ├── sym_c.wasm ├── test.wasm ├── test_c_library.wasm ├── test_linux.py ├── test_return.wasm └── test_unreachable.wasm └── wasm ├── __init__.py ├── __main__.py ├── compat.py ├── decode.py ├── formatter.py ├── immtypes.py ├── modtypes.py ├── opcodes.py ├── types.py └── wasmtypes.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | day: "saturday" 13 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | branches: [ "main" ] 19 | schedule: 20 | - cron: '25 8 * * 6' 21 | workflow_dispatch: 22 | 23 | jobs: 24 | check_skip: 25 | # continue-on-error: true # Uncomment once integration is finished 26 | runs-on: ubuntu-latest 27 | # Map a step output to a job output 28 | outputs: 29 | should_skip: ${{ steps.skip_check.outputs.should_skip }} 30 | steps: 31 | - id: skip_check 32 | uses: fkirc/skip-duplicate-actions@v5 33 | with: 34 | concurrent_skipping: 'same_content_newer' 35 | skip_after_successful_duplicate: 'true' 36 | paths: '["**.py"]' 37 | do_not_skip: '["workflow_dispatch", "schedule", "merge_group", "pull_request"]' 38 | analyze: 39 | needs: check_skip 40 | if: needs.check_skip.outputs.should_skip != 'true' 41 | name: Analyze (${{ matrix.language }}) 42 | # Runner size impacts CodeQL analysis time. To learn more, please see: 43 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 44 | # - https://gh.io/supported-runners-and-hardware-resources 45 | # - https://gh.io/using-larger-runners (GitHub.com only) 46 | # Consider using larger runners or machines with greater resources for possible analysis time improvements. 47 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 48 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 49 | permissions: 50 | # required for all workflows 51 | security-events: write 52 | 53 | # required to fetch internal or private CodeQL packs 54 | packages: read 55 | 56 | # only required for workflows in private repositories 57 | actions: read 58 | contents: read 59 | 60 | strategy: 61 | fail-fast: false 62 | matrix: 63 | include: 64 | - language: python 65 | build-mode: none 66 | # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' 67 | # Use `c-cpp` to analyze code written in C, C++ or both 68 | # Use 'java-kotlin' to analyze code written in Java, Kotlin or both 69 | # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 70 | # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, 71 | # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. 72 | # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how 73 | # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages 74 | steps: 75 | - name: Checkout repository 76 | uses: actions/checkout@v4 77 | 78 | # Initializes the CodeQL tools for scanning. 79 | - name: Initialize CodeQL 80 | uses: github/codeql-action/init@v3 81 | with: 82 | languages: ${{ matrix.language }} 83 | build-mode: ${{ matrix.build-mode }} 84 | # If you wish to specify custom queries, you can do so here or in a config file. 85 | # By default, queries listed here will override any specified in a config file. 86 | # Prefix the list here with "+" to use these queries and those in the config file. 87 | 88 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 89 | # queries: security-extended,security-and-quality 90 | 91 | # If the analyze step fails for one of the languages you are analyzing with 92 | # "We were unable to automatically build your code", modify the matrix above 93 | # to set the build mode to "manual" for that language. Then modify this step 94 | # to build your code. 95 | # ℹ️ Command-line programs to run using the OS shell. 96 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 97 | - if: matrix.build-mode == 'manual' 98 | shell: bash 99 | run: | 100 | echo 'If you are using a "manual" build mode for one or more of the' \ 101 | 'languages you are analyzing, replace this with the commands to build' \ 102 | 'your code, for example:' 103 | echo ' make bootstrap' 104 | echo ' make release' 105 | exit 1 106 | 107 | - name: Perform CodeQL Analysis 108 | uses: github/codeql-action/analyze@v3 109 | with: 110 | category: "/language:${{matrix.language}}" 111 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | types: [opened, synchronize, reopened] 7 | workflow_dispatch: 8 | 9 | jobs: 10 | check_skip: 11 | # continue-on-error: true # Uncomment once integration is finished 12 | runs-on: ubuntu-latest 13 | # Map a step output to a job output 14 | outputs: 15 | should_skip: ${{ steps.skip_check.outputs.should_skip }} 16 | steps: 17 | - id: skip_check 18 | uses: fkirc/skip-duplicate-actions@v5 19 | with: 20 | concurrent_skipping: 'same_content_newer' 21 | skip_after_successful_duplicate: 'true' 22 | paths: '["**.py", ".github/workflows/*.yml", "requirements.txt", "test/**"]' 23 | test: 24 | needs: check_skip 25 | if: needs.check_skip.outputs.should_skip != 'true' 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | python: 30 | - "3.7" 31 | - "3.8" 32 | - "3.9" 33 | - "3.10" 34 | - "3.11" 35 | - "3.12" 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - name: Setup Python 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: ${{ matrix.python }} 43 | cache: pip 44 | # Set this option if you want the action to check for the latest available version that satisfies the version spec. 45 | # check-latest: # optional 46 | - name: Install requirements 47 | run: | 48 | pip install -r requirements.txt 49 | sudo apt update && sudo apt install graphviz 50 | - name: Cache wabt 51 | id: cache-wabt 52 | uses: actions/cache@v4 53 | with: 54 | path: wabt-1.0.32 55 | key: wabt 56 | - name: Install wabt 57 | if: steps.cache-wabt.outputs.cache-hit != 'true' 58 | run: | 59 | curl -JLO "https://github.com/WebAssembly/wabt/releases/download/1.0.32/wabt-1.0.32-ubuntu.tar.gz" 60 | tar xzf wabt-1.0.32-ubuntu.tar.gz 61 | - name: Cache wasi-sdk 62 | id: cache-wasi-sdk 63 | uses: actions/cache@v4 64 | with: 65 | path: wasi-sdk-22.0 66 | key: wasi-sdk 67 | - name: Install wasi-sdk 68 | if: steps.cache-wasi-sdk.outputs.cache-hit != 'true' 69 | run: | 70 | curl -JLO "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-22/wasi-sdk-22.0-linux.tar.gz" 71 | tar xzf wasi-sdk-22.0-linux.tar.gz 72 | - name: Cache wasmtime 73 | id: cache-wasmtime 74 | uses: actions/cache@v4 75 | with: 76 | path: ~/.wasmtime 77 | key: wasmtime 78 | - name: Install wasmtime 79 | if: steps.cache-wasmtime.outputs.cache-hit != 'true' 80 | run: | 81 | curl https://wasmtime.dev/install.sh -sSf | bash 82 | - name: Install Rust 83 | run: | 84 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 85 | rustup target add wasm32-wasi 86 | - uses: actions/setup-go@v5 87 | with: 88 | go-version: 1.22 89 | check-latest: true 90 | cache: true 91 | - name: Install tinygo 92 | run: | 93 | wget https://github.com/tinygo-org/tinygo/releases/download/v0.32.0/tinygo_0.32.0_amd64.deb 94 | sudo dpkg -i tinygo_0.32.0_amd64.deb 95 | - name: Run pytest 96 | run: | 97 | export PATH=$(pwd)/wabt-1.0.32/bin:$PATH 98 | export PATH=$(pwd)/wasi-sdk-22.0/bin:$PATH 99 | export PATH=$(pwd)/.cargo/bin:$PATH 100 | export PATH=$(pwd)/.wasmtime/bin:$PATH 101 | pytest test.py --tb=short --durations=0 102 | ./clean.sh -f 103 | pytest test/test_linux.py --tb=short --durations=0 104 | test_result: 105 | if: needs.check_skip.outputs.should_skip != 'true' && always() 106 | runs-on: ubuntu-latest 107 | needs: 108 | - check_skip 109 | - test 110 | steps: 111 | - name: Mark result as failed 112 | if: needs.test.result != 'success' 113 | run: exit 1 114 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python-specific files 2 | __pycache__/ 3 | *.pyc 4 | *.pyo 5 | *.pyd 6 | 7 | # Virtual environment files 8 | venv/ 9 | env 10 | .env 11 | myenv/ 12 | 13 | # IDE-specific files 14 | .vscode/ 15 | пион/ 16 | .idea/ 17 | *.iml 18 | 19 | # Package installation files 20 | pip-log.txt 21 | pip-debug.log 22 | 23 | # Test files 24 | *.tmp 25 | *.coverage 26 | *.log 27 | /tox/ 28 | 29 | # Config files 30 | config.py 31 | settings.py 32 | 33 | # Data files 34 | /data/ 35 | /tests/data/ 36 | 37 | # Logs 38 | log/ 39 | 40 | # Results 41 | output/ 42 | **/debug 43 | **/rust/**/target 44 | 45 | # Cache files 46 | .cache/ 47 | .pytest_cache/ 48 | 49 | # Node modules (if using npm/yarn) 50 | node_modules/ 51 | 52 | # Other files 53 | *.bak 54 | *.swap 55 | *.orig 56 | *.rej 57 | *.elf 58 | 59 | # test files 60 | test/* 61 | !test/*.py 62 | !test/*.wasm 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, The OSLab of Peking University 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /SymGX.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | from eunomia.arch.wasm.mythread import multi_thread_process 6 | 7 | def SymGX(args): 8 | 9 | octo_bytecode = args.file.read() 10 | Ecall_list = args.ecall_list.split(",") 11 | 12 | if not args.func_list: 13 | namelist = [] 14 | filename = os.path.basename(args.file.name) 15 | watfile = filename[:-5] + ".wat" 16 | watpath = os.path.join(os.path.dirname(args.file.name), watfile) 17 | with open(watpath, 'r') as wf: 18 | while True: 19 | line = wf.readline() 20 | if line == "": 21 | break 22 | if line[0:9] == " (func $": 23 | start = 9 24 | end = 10 25 | while line[end] != ' ': 26 | end += 1 27 | name = line[start:end] 28 | namelist.append(name) 29 | wf.close() 30 | else: 31 | namelist = args.func_list.split(",") 32 | 33 | if not args.max_time: 34 | max_time = 12*60*60 35 | else: 36 | max_time = args.max_time 37 | print("set time limit: %d seconds" % max_time) 38 | 39 | multi_thread_process(octo_bytecode, namelist, Ecall_list, max_time) 40 | -------------------------------------------------------------------------------- /benchmarks/CE.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/CE.wasm -------------------------------------------------------------------------------- /benchmarks/bi2.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/bi2.wasm -------------------------------------------------------------------------------- /benchmarks/dnet.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/dnet.wasm -------------------------------------------------------------------------------- /benchmarks/isdl.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/isdl.wasm -------------------------------------------------------------------------------- /benchmarks/kmeans.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/kmeans.wasm -------------------------------------------------------------------------------- /benchmarks/pw.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/pw.wasm -------------------------------------------------------------------------------- /benchmarks/reencrypt.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/reencrypt.wasm -------------------------------------------------------------------------------- /benchmarks/rsa.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/rsa.wasm -------------------------------------------------------------------------------- /benchmarks/sgxcrypto.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/sgxcrypto.wasm -------------------------------------------------------------------------------- /benchmarks/sl.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/sl.wasm -------------------------------------------------------------------------------- /benchmarks/spf.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/spf.wasm -------------------------------------------------------------------------------- /benchmarks/sse.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/sse.wasm -------------------------------------------------------------------------------- /benchmarks/ve.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/ve.wasm -------------------------------------------------------------------------------- /benchmarks/wallet.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/benchmarks/wallet.wasm -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | OUTPUT_DIR=output 5 | 6 | error() { 7 | command printf '\033[1;31mError: %s\033[0m\n\n' "$1" 1>&2 8 | } 9 | 10 | usage() { 11 | cat >&2 < 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # modified code from https://github.com/athre0z/wasm/blob/master/wasm/modtypes.py 24 | # no need of that if PyPI wasm version 1.2 release 25 | 26 | 27 | """Provides functions for decoding WASM modules and bytecode.""" 28 | from __future__ import (absolute_import, division, print_function, 29 | unicode_literals) 30 | 31 | from collections import namedtuple 32 | 33 | from wasm.compat import byte2int 34 | from wasm.modtypes import (SEC_NAME, SEC_UNK, ModuleHeader, NameSubSection, 35 | Section) 36 | from wasm.opcodes import OPCODE_MAP 37 | 38 | Instruction = namedtuple('Instruction', 'op imm len') 39 | ModuleFragment = namedtuple('ModuleFragment', 'type data') 40 | 41 | 42 | def decode_bytecode(bytecode): 43 | """Decodes raw bytecode, yielding `Instruction`s.""" 44 | bytecode_wnd = memoryview(bytecode) 45 | while bytecode_wnd: 46 | opcode_id = byte2int(bytecode_wnd[0]) 47 | opcode = OPCODE_MAP[opcode_id] 48 | 49 | if opcode.imm_struct is not None: 50 | offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:]) 51 | else: 52 | imm = None 53 | offs = 0 54 | 55 | insn_len = 1 + offs 56 | yield Instruction(opcode, imm, insn_len) 57 | bytecode_wnd = bytecode_wnd[insn_len:] 58 | 59 | 60 | def decode_module(module, decode_name_subsections=False): 61 | """Decodes raw WASM modules, yielding `ModuleFragment`s.""" 62 | module_wnd = memoryview(module) 63 | 64 | # Read & yield module header. 65 | hdr = ModuleHeader() 66 | hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd) 67 | yield ModuleFragment(hdr, hdr_data) 68 | module_wnd = module_wnd[hdr_len:] 69 | 70 | # Read & yield sections. 71 | while module_wnd: 72 | sec = Section() 73 | # bypass the error caused by -g1 to -g3 compiled C code 74 | try: 75 | sec_len, sec_data, _ = sec.from_raw(None, module_wnd) 76 | except Exception: 77 | break 78 | 79 | # If requested, decode name subsections when encountered. 80 | if (decode_name_subsections and sec_data.id == SEC_UNK and sec_data.name == SEC_NAME): 81 | sec_wnd = sec_data.payload 82 | while sec_wnd: 83 | subsec = NameSubSection() 84 | subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd) 85 | yield ModuleFragment(subsec, subsec_data) 86 | sec_wnd = sec_wnd[subsec_len:] 87 | else: 88 | yield ModuleFragment(sec, sec_data) 89 | 90 | # fix bug KeyError 91 | # if sec_data.id == SEC_UNK and sec_data.name: 92 | # sec_len -= sec_data.name_len + 1 93 | module_wnd = module_wnd[sec_len:] 94 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/disassembler.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from eunomia.arch.wasm.decode import decode_module 4 | from eunomia.arch.wasm.instruction import WasmInstruction 5 | from eunomia.arch.wasm.wasm import Wasm 6 | from eunomia.core.function import Function 7 | from eunomia.core.utils import bytecode_to_bytes 8 | from eunomia.engine.disassembler import Disassembler 9 | 10 | from wasm.compat import byte2int 11 | from wasm.formatter import format_instruction 12 | from wasm.modtypes import CodeSection 13 | from wasm.opcodes import OPCODE_MAP 14 | 15 | inst_namedtuple = namedtuple('Instruction', 'op imm len') 16 | 17 | 18 | class WasmDisassembler(Disassembler): 19 | 20 | def __init__(self, bytecode=None): 21 | Disassembler.__init__(self, asm=Wasm(), bytecode=bytecode) 22 | 23 | def disassemble_opcode(self, bytecode=None, offset=0, nature_offset=0): 24 | ''' 25 | based on decode_bytecode() 26 | https://github.com/athre0z/wasm/blob/master/wasm/decode.py 27 | 28 | ''' 29 | 30 | bytecode_wnd = memoryview(bytecode) 31 | opcode_id = byte2int(bytecode_wnd[0]) 32 | 33 | # default value 34 | # opcode:(mnemonic/name, imm_struct, pops, pushes, description) 35 | invalid = ('INVALID', 0, 0, 0, 'Unknown opcode') 36 | name, imm_struct, pops, pushes, description = \ 37 | self.asm.table.get(opcode_id, invalid) 38 | 39 | operand_size = 0 40 | operand = None 41 | operand_interpretation = None 42 | 43 | if imm_struct is not None: 44 | operand_size, operand, _ = imm_struct.from_raw( 45 | None, bytecode_wnd[1:]) 46 | insn = inst_namedtuple( 47 | OPCODE_MAP[opcode_id], operand, 1 + operand_size) 48 | operand_interpretation = format_instruction(insn) 49 | insn_byte = bytecode_wnd[:1 + operand_size].tobytes() 50 | instruction = WasmInstruction( 51 | opcode_id, name, imm_struct, operand_size, insn_byte, pops, pushes, 52 | description, operand_interpretation=operand_interpretation, 53 | offset=offset, nature_offset=nature_offset) 54 | # print('%d %s' % (offset, str(instruction))) 55 | return instruction 56 | 57 | def disassemble(self, bytecode=None, offset=0, nature_offset=0, 58 | r_format='list'): 59 | """Disassemble WASM bytecode 60 | 61 | :param bytecode: bytecode sequence 62 | :param offset: start offset 63 | :param r_format: output format ('list'/'text'/'reverse') 64 | :type bytecode: bytes, str 65 | :type offset: int 66 | :type r_format: list, str, dict 67 | :return: dissassembly result depending of r_format 68 | :rtype: list, str, dict 69 | 70 | :Example: 71 | 72 | >>> disasm = WasmDisassembler() 73 | >>> 74 | >>> disasm.disassemble(r_format='text') 75 | >>> 'block -1\ni32.const 24\ncall 28\ni32.const 0\nreturn\nend' 76 | >>> 77 | >>> disasm.disassemble(r_format='text') 78 | >>> [, 79 | ... 80 | , 81 | ] 82 | >>> 83 | >>> disasm.disassemble(r_format='reverse') 84 | >>> {0: , 85 | ... 86 | 4: , 87 | 5: } 88 | """ 89 | 90 | return super().disassemble(bytecode, offset, nature_offset, r_format) 91 | 92 | def extract_functions_code(self, module_bytecode): 93 | functions = list() 94 | mod_iter = iter(decode_module(module_bytecode)) 95 | _, _ = next(mod_iter) 96 | sections = list(mod_iter) 97 | 98 | # iterate over all section 99 | # code_data = [cur_sec_data for cur_sec, cur_sec_data in sections if isinstance(cur_sec_data.get_decoder_meta()['types']['payload'], CodeSection)][0] 100 | for cur_sec, cur_sec_data in sections: 101 | sec = cur_sec_data.get_decoder_meta()['types']['payload'] 102 | if isinstance(sec, CodeSection): 103 | code_data = cur_sec_data 104 | break 105 | if not code_data: 106 | raise ValueError('No functions/codes in the module') 107 | for idx, func in enumerate(code_data.payload.bodies): 108 | instructions = self.disassemble(func.code.tobytes()) 109 | cur_function = Function(0, instructions[0]) 110 | cur_function.instructions = instructions 111 | 112 | functions.append(cur_function) 113 | return functions 114 | 115 | def disassemble_module( 116 | self, module_bytecode=None, offset=0, r_format='list'): 117 | 118 | bytecode = bytecode_to_bytes(module_bytecode) 119 | 120 | functions = self.extract_functions_code(bytecode[offset:]) 121 | self.instructions = [f.instructions for f in functions] 122 | 123 | # return instructions 124 | if r_format == 'list': 125 | return self.instructions 126 | elif r_format == 'text': 127 | text = '' 128 | for index, func in enumerate(functions): 129 | text += ('func %d\n' % index) 130 | text += ('\n'.join(map(str, func.instructions))) 131 | text += ('\n\n') 132 | return text 133 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/exceptions.py: -------------------------------------------------------------------------------- 1 | # This file defines our own exceptions 2 | INVALIDMEMORY = -2 3 | ASSERT_FAIL = -3 4 | 5 | 6 | class UnsupportZ3TypeError(Exception): 7 | """ 8 | used in `utils.py` 9 | indicating that the variable type is not in ['i32', 'i64', 'f32', 'f64'] 10 | """ 11 | pass 12 | 13 | 14 | class UninitializedLocalVariableError(Exception): 15 | """ 16 | used in `emulator.py` 17 | indicating the local variable is not initialized before retriving 18 | """ 19 | pass 20 | 21 | 22 | class UnsupportGlobalTypeError(Exception): 23 | """ 24 | used in `emulator.py` 25 | indicating the unsupport global type encoutering global.get 26 | """ 27 | pass 28 | 29 | 30 | class UnsupportInstructionError(Exception): 31 | """ 32 | used in `emulator.py` 33 | indicating the unsupport instructions 34 | """ 35 | pass 36 | 37 | 38 | class NotDeterminedRetValError(Exception): 39 | """ 40 | indicateing the return value is bool but cannot be determined as True or False 41 | """ 42 | pass 43 | 44 | 45 | class UninitializedStateError(Exception): 46 | """ 47 | indicateing the state is not initialized before emulate_one_function 48 | """ 49 | pass 50 | 51 | 52 | class MemoryLoadError(Exception): 53 | """ 54 | indicating the memory load error 55 | """ 56 | pass 57 | 58 | 59 | class UnsupportExternalFuncError(Exception): 60 | """ 61 | indicating the library function is not emulated by us 62 | """ 63 | pass 64 | 65 | 66 | class UnexpectedDataType(Exception): 67 | """ 68 | Typically raised if there is a `if-elif-else` statement 69 | depending on the data type 70 | """ 71 | pass 72 | 73 | 74 | class ProcSuccessTermination(Exception): 75 | """ 76 | Indicate the process is successfully terminated 77 | """ 78 | 79 | def __init__(self, value): 80 | self.value = value 81 | 82 | # __str__ is to print() the value 83 | def __str__(self): 84 | return (repr(self.value)) 85 | 86 | 87 | class ProcFailTermination(Exception): 88 | """ 89 | Indicate the process is failedly terminated 90 | """ 91 | 92 | def __init__(self, value): 93 | self.value = value 94 | 95 | # __str__ is to print() the value 96 | def __str__(self): 97 | return (repr(self.value)) 98 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/format.py: -------------------------------------------------------------------------------- 1 | # This file is written by Octopus 2 | # It is for formatting 3 | 4 | import re 5 | 6 | from eunomia.arch.wasm.constant import LANG_TYPE 7 | 8 | 9 | def format_func_name(name, param_str, return_str): 10 | result = '{} '.format(return_str) if return_str else '' 11 | return ('{}{}({})'.format(result, name, param_str)) 12 | 13 | 14 | def format_bb_name(function_id, offset): 15 | return ('block_%x_%x' % (function_id, offset)) 16 | 17 | 18 | def format_kind_function(f_type): 19 | return f_type 20 | 21 | 22 | def format_kind_table(element_type, flags, initial, maximum): 23 | return {'element_type': LANG_TYPE.get(element_type), 24 | 'limits_flags': flags, 25 | 'limits_initial': initial, 26 | 'limits_maximum': maximum} 27 | 28 | 29 | def format_kind_memory(flags, initial, maximum): 30 | return {'limits_flags': flags, 31 | 'limits_initial': initial, 32 | 'limits_maximum': maximum} 33 | 34 | 35 | def format_kind_global(mutability, content_type, current_instruction): 36 | # leave mutability temporarily 37 | return [content_type, current_instruction] 38 | 39 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instruction.py: -------------------------------------------------------------------------------- 1 | # This file is written by Octopus 2 | # It will parse each instructions in Wasm 3 | 4 | from eunomia.arch.wasm.wasm import _groups 5 | from eunomia.core.instruction import Instruction 6 | 7 | 8 | class WasmInstruction(Instruction): 9 | """Wasm Instruction 10 | TODO 11 | 12 | """ 13 | 14 | def __init__( 15 | self, opcode, name, imm_struct, operand_size, insn_byte, pops, 16 | pushes, description, operand_interpretation=None, offset=0, 17 | nature_offset=0): 18 | """ TODO """ 19 | self.opcode = opcode 20 | self.offset = offset 21 | self.nature_offset = nature_offset 22 | self.name = name 23 | self.description = description 24 | self.operand_size = operand_size 25 | if len(insn_byte) > 1: 26 | # Immediate operand if any 27 | self.operand = insn_byte[-operand_size:] 28 | else: 29 | self.operand = None 30 | # specific interpretation of operand value 31 | self.operand_interpretation = operand_interpretation 32 | self.insn_byte = insn_byte 33 | self.pops = pops 34 | self.pushes = pushes 35 | self.imm_struct = imm_struct 36 | self.xref = list() 37 | self.ssa = None 38 | # which basic block locates in 39 | self.cur_bb = '' 40 | 41 | def __eq__(self, other): 42 | """ Instructions are equal if all features match """ 43 | return self.opcode == other.opcode and \ 44 | self.name == other.name and \ 45 | self.offset == other.offset and \ 46 | self.insn_byte == other.insn_byte and \ 47 | self.operand_size == other.operand_size and \ 48 | self.pops == other.pops and \ 49 | self.pushes == other.pushes and \ 50 | self.operand_interpretation == other.operand_interpretation and \ 51 | self.description == other.description 52 | 53 | def __str__(self): 54 | """ String representation of the instruction """ 55 | if self.operand: 56 | return self.operand_interpretation 57 | # elif self.operand: 58 | # return self.name + str(self.operand) 59 | else: 60 | return self.name 61 | 62 | @property 63 | def group(self): 64 | """ Instruction classification per group """ 65 | last_class = _groups.get(0) 66 | for k, v in _groups.items(): 67 | if self.opcode >= k: 68 | last_class = v 69 | else: 70 | return last_class 71 | return last_class 72 | 73 | @property 74 | def is_control(self): 75 | return self.group == 'Control' 76 | 77 | @property 78 | def is_parametric(self): 79 | return self.group == 'Parametric' 80 | 81 | @property 82 | def is_variable(self): 83 | return self.group == 'Variable' 84 | 85 | @property 86 | def is_memory(self): 87 | return self.group == 'Memory' 88 | 89 | @property 90 | def is_constant(self): 91 | return self.group == 'Constant' 92 | 93 | @property 94 | def is_logical_i32(self): 95 | return self.group == 'Logical_i32' 96 | 97 | @property 98 | def is_logical_i64(self): 99 | return self.group == 'Logical_i64' 100 | 101 | @property 102 | def is_logical_f32(self): 103 | return self.group == 'Logical_f32' 104 | 105 | @property 106 | def is_logical_f64(self): 107 | return self.group == 'Logical_f64' 108 | 109 | @property 110 | def is_arithmetic_i32(self): 111 | return self.group == 'Arithmetic_i32' 112 | 113 | @property 114 | def is_bitwise_i32(self): 115 | return self.group == 'Bitwise_i32' 116 | 117 | @property 118 | def is_arithmetic_i64(self): 119 | return self.group == 'Arithmetic_i64' 120 | 121 | @property 122 | def is_bitwise_i64(self): 123 | return self.group == 'Bitwise_i64' 124 | 125 | @property 126 | def is_arithmetic_f32(self): 127 | return self.group == 'Arithmetic_f32' 128 | 129 | @property 130 | def is_arithmetic_f64(self): 131 | return self.group == 'Arithmetic_f64' 132 | 133 | @property 134 | def is_conversion(self): 135 | return self.group == 'Conversion' 136 | 137 | @property 138 | def is_branch_conditional(self): 139 | """ Return True if the instruction is a conditional jump """ 140 | return self.name in {'br_if', 'br_table', 'if'} 141 | 142 | @property 143 | def is_branch_unconditional(self): 144 | """ Return True if the instruction is a unconditional jump """ 145 | return self.name in {'br'} 146 | 147 | @property 148 | def is_call(self): 149 | """ True if the instruction is a call instruction """ 150 | return self.name in {'call', 'call_indirect'} 151 | 152 | @property 153 | def is_branch(self): 154 | return self.is_branch_conditional or self.is_branch_unconditional 155 | 156 | @property 157 | def is_halt(self): 158 | """ Return True if the instruction is a branch terminator """ 159 | return self.name in {'unreachable', 'return'} 160 | 161 | @property 162 | def is_terminator(self): 163 | """ True if the instruction is a basic block terminator """ 164 | return self.is_branch or self.is_halt 165 | 166 | @property 167 | def is_block_starter(self): 168 | """ Return True if the instruction is a basic block starter """ 169 | return self.name in {'block', 'loop', 'if', 'else'} 170 | 171 | @property 172 | def is_block_terminator(self): 173 | """ Return True if the instruction is a basic block terminator """ 174 | return self.name in {'else', 'end'} 175 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instructions/BitwiseInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the bitwise related instructions 2 | 3 | import logging 4 | 5 | from eunomia.arch.wasm.exceptions import UnsupportInstructionError 6 | from eunomia.arch.wasm.shadow import shadow 7 | from z3 import (BitVec, BitVecVal, LShR, RotateLeft, RotateRight, is_bool, 8 | is_bv, is_false, is_true, simplify) 9 | 10 | helper_map = { 11 | 'i32': 32, 12 | 'i64': 64, 13 | } 14 | 15 | 16 | class BitwiseInstructions: 17 | def __init__(self, instr_name, instr_operand, _): 18 | self.instr_name = instr_name 19 | self.instr_operand = instr_operand 20 | 21 | # TODO overflow check in this function? 22 | def emulate(self, state): 23 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 24 | shadow1, shadow2 = state.shadow_stack.pop(), state.shadow_stack.pop() 25 | assert not (shadow1.pointer and shadow2.pointer) 26 | instr_type = self.instr_name[:3] 27 | 28 | # arg1 and arg2 could be BitVecRef, BitVecValRef and BoolRef 29 | if is_bool(arg1): 30 | arg1 = BitVec(str(arg1), helper_map[instr_type]) 31 | logging.warning( 32 | f"[!] In `BitwiseInstructions.py`, arg1 is BoolRef, translated to BitVec which may lead to some information loss") 33 | if is_bool(arg2): 34 | arg2 = BitVec(str(arg2), helper_map[instr_type]) 35 | logging.warning( 36 | f"[!] In `BitwiseInstructions.py`, arg2 is BoolRef, translated to BitVec which may lead to some information loss") 37 | 38 | assert arg1.size( 39 | ) == helper_map[instr_type], f'arg1 size is {arg1.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction' 40 | assert arg2.size( 41 | ) == helper_map[instr_type], f'arg2 size is {arg2.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction' 42 | 43 | if '.and' in self.instr_name: 44 | result = simplify(arg1 & arg2) 45 | elif '.or' in self.instr_name: 46 | result = simplify(arg1 | arg2) 47 | elif '.xor' in self.instr_name: 48 | result = simplify(arg1 ^ arg2) 49 | elif '.shr_s' in self.instr_name: 50 | result = simplify(arg2 >> arg1) 51 | elif '.shr_u' in self.instr_name: 52 | result = simplify(LShR(arg2, arg1)) 53 | elif '.shl' in self.instr_name: 54 | result = simplify(arg2 << arg1) 55 | elif '.rotl' in self.instr_name: 56 | result = simplify(RotateLeft(arg2, arg1)) 57 | elif '.rotr' in self.instr_name: 58 | result = simplify(RotateRight(arg2, arg1)) 59 | else: 60 | raise UnsupportInstructionError 61 | 62 | if is_bool(result): 63 | if is_true(result): 64 | result = BitVecVal(1, 32) 65 | elif is_false(result): 66 | result = BitVecVal(0, 32) 67 | 68 | assert is_bv(result) or is_bool( 69 | result), f"in bitwise instruction, the value to be pushed is {type(result)} instead of BitVec or Bool" 70 | 71 | 72 | 73 | 74 | state.symbolic_stack.append(result) 75 | 76 | taint = shadow1.taint or shadow2.taint 77 | _shadow = None 78 | if shadow1.pointer: 79 | _shadow = shadow1 80 | elif shadow2.pointer: 81 | _shadow = shadow2 82 | if _shadow: 83 | assert not _shadow.stack_pointer 84 | state.shadow_stack.append(shadow(taint, True, _shadow.base, _shadow.base_taint, _shadow.size, _shadow.stack_pointer)) 85 | else: 86 | state.shadow_stack.append(shadow(taint, False)) 87 | 88 | return [state] 89 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instructions/ConstantInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the constant related instructions 2 | 3 | import re 4 | from struct import unpack 5 | 6 | from eunomia.arch.wasm.exceptions import UnsupportInstructionError 7 | from eunomia.arch.wasm.shadow import shadow 8 | from z3 import BitVecVal, Float32, Float64, FPVal 9 | 10 | 11 | class ConstantInstructions: 12 | def __init__(self, instr_name, instr_operand, instr_string): 13 | self.instr_name = instr_name 14 | self.instr_operand = instr_operand 15 | self.instr_str = instr_string 16 | 17 | # TODO overflow check in this function? 18 | def emulate(self, state, ro_data_section): 19 | # there are two types of const: i and f, like: 20 | # i32.const 0 21 | # f64.const 0x1.9p+6 (;=100;) 22 | # thus we have to deal with the different situations 23 | mnemonic = self.instr_str.split(' ')[0] 24 | const_num = self.instr_str.split(' ')[-1] 25 | const_type_prefix, _ = mnemonic.split('.') 26 | 27 | if const_type_prefix == 'i32': 28 | state.symbolic_stack.append(BitVecVal(const_num, 32)) 29 | 30 | 31 | const_num = int(const_num) 32 | _shadow = shadow(False, False) 33 | for low, up in state.memory_manager.data_section: 34 | if low == const_num: 35 | _shadow = shadow(False, True, BitVecVal(const_num, 32), False, up - low, False) 36 | break 37 | for low, up in ro_data_section: 38 | if const_num >= low and const_num < up: 39 | _shadow = shadow(False, True, low, False, up - low) 40 | break 41 | 42 | state.shadow_stack.append(_shadow) 43 | elif const_type_prefix == 'i64': 44 | state.symbolic_stack.append(BitVecVal(const_num, 64)) 45 | state.shadow_stack.append(shadow(False,False)) 46 | elif const_type_prefix == 'f32' or const_type_prefix == 'f64': 47 | # extract float number 100 from (;=100;) 48 | # TODO: need to be verified 49 | num_found = re.search(';=([0-9.-]+);', const_num) 50 | if num_found: 51 | float_num = num_found.group(1) 52 | if const_type_prefix == 'f32': 53 | state.symbolic_stack.append(FPVal(float_num, Float32())) 54 | state.shadow_stack.append(shadow(False,False)) 55 | else: 56 | state.symbolic_stack.append(FPVal(float_num, Float64())) 57 | state.shadow_stack.append(shadow(False,False)) 58 | elif const_num[:2] == '0x': 59 | # remove '0x' prefix 60 | const_num = const_num[2:] 61 | # extend with '0' till const_num length is 4 bytes 62 | current_const_num_length = len(const_num) 63 | 64 | need_zero = (8 - current_const_num_length) if const_type_prefix == 'f32' else ( 65 | 16 - current_const_num_length) 66 | const_num = '0' * need_zero + const_num 67 | 68 | if const_type_prefix == 'f32': 69 | float_num = unpack('!f', bytes.fromhex(const_num))[0] 70 | state.symbolic_stack.append(FPVal(float_num, Float32())) 71 | state.shadow_stack.append(shadow(False,False)) 72 | else: 73 | float_num = unpack('!d', bytes.fromhex(const_num))[0] 74 | state.symbolic_stack.append(FPVal(float_num, Float64())) 75 | state.shadow_stack.append(shadow(False,False)) 76 | else: 77 | raise UnsupportInstructionError 78 | else: 79 | raise UnsupportInstructionError 80 | 81 | return [state] 82 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instructions/LogicalInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the logical related instructions 2 | 3 | from eunomia.arch.wasm.configuration import Configuration, Enable_Lasers 4 | from eunomia.arch.wasm.exceptions import UnsupportInstructionError 5 | from eunomia.arch.wasm.shadow import shadow 6 | from z3 import ( 7 | UGE, UGT, ULE, ULT, BitVecVal, If, fpEQ, fpGEQ, fpGT, fpLEQ, fpLT, fpNEQ, 8 | is_bool, is_bv, is_bv_value, simplify, is_true, is_false) 9 | 10 | helper_map = { 11 | 'i32': 32, 12 | 'i64': 64, 13 | 'f32': [8, 24], 14 | 'f64': [11, 53] 15 | } 16 | 17 | 18 | class LogicalInstructions: 19 | def __init__(self, instr_name, instr_operand, _): 20 | self.instr_name = instr_name 21 | self.instr_operand = instr_operand 22 | 23 | # TODO overflow check in this function? 24 | def emulate(self, state): 25 | overflow_check_flag = False 26 | if Configuration.get_lasers() & Enable_Lasers.OVERFLOW.value: 27 | overflow_check_flag = True 28 | 29 | def do_emulate_logical_int_instruction(state, overflow_check_flag): 30 | instr_type = self.instr_name[:3] 31 | if 'eqz' in self.instr_name: 32 | arg0 = state.symbolic_stack.pop() 33 | shadow0 = state.shadow_stack.pop() 34 | 35 | assert arg0.size( 36 | ) == helper_map[instr_type], f"in `eqz` the argument popped size is {arg0.size()} instead of {helper_map[instr_type]}" 37 | 38 | result = arg0 == 0 39 | _shadow = shadow(shadow0.taint, False) 40 | 41 | 42 | else: 43 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 44 | shadow1, shadow2 = state.shadow_stack.pop(), state.shadow_stack.pop() 45 | assert not (shadow1.pointer and shadow2.pointer) 46 | 47 | assert is_bv(arg1) and is_bv( 48 | arg2), f"in `logical` instruction, arg1 or arg2 type is wrong instead of BitVec" 49 | 50 | if 'eq' in self.instr_name: 51 | result = arg1 == arg2 52 | elif 'ne' in self.instr_name: 53 | result = arg1 != arg2 54 | elif 'lt_s' in self.instr_name: 55 | result = arg2 < arg1 56 | elif 'lt_u' in self.instr_name: 57 | result = ULT(arg2, arg1) 58 | elif 'gt_s' in self.instr_name: 59 | result = arg2 > arg1 60 | elif 'gt_u' in self.instr_name: 61 | result = UGT(arg2, arg1) 62 | elif 'le_s' in self.instr_name: 63 | result = arg2 <= arg1 64 | elif 'le_u' in self.instr_name: 65 | result = ULE(arg2, arg1) 66 | elif 'ge_s' in self.instr_name: 67 | result = arg2 >= arg1 68 | elif 'ge_u' in self.instr_name: 69 | result = UGE(arg2, arg1) 70 | else: 71 | raise UnsupportInstructionError 72 | 73 | # record if the op is signed or unsigned when the overflow check flag is enabled 74 | def speculate_sign(op, instr_name, sign_mapping): 75 | # if the op is a bitvecval, we do not change anything 76 | if not (is_bv(op) and not is_bv_value(op)): 77 | return 78 | 79 | # unsigned is False and signed is True 80 | # the signed will overlap the unsigned 81 | if '_u' in instr_name: 82 | sign_mapping[op.hash()] = sign_mapping.get( 83 | op.hash(), 0) | 0 84 | else: 85 | sign_mapping[op.hash()] = sign_mapping.get( 86 | op.hash(), 0) | 1 87 | 88 | if overflow_check_flag and ( 89 | '_u' in self.instr_name or '_s' in self.instr_name): 90 | speculate_sign(arg1, self.instr_name, state.sign_mapping) 91 | speculate_sign(arg2, self.instr_name, state.sign_mapping) 92 | 93 | taint = shadow1.taint or shadow2.taint 94 | _shadow = shadow(taint, False) 95 | 96 | # try to simplify result and insert 1 or 0 directly, instead of an ite statement 97 | result = simplify(result) 98 | if is_true(result): 99 | state.symbolic_stack.append(BitVecVal(1, 32)) 100 | elif is_false(result): 101 | state.symbolic_stack.append(BitVecVal(0, 32)) 102 | else: 103 | state.symbolic_stack.append( 104 | If(result, BitVecVal(1, 32), BitVecVal(0, 32))) 105 | 106 | 107 | state.shadow_stack.append(_shadow) 108 | 109 | return [state] 110 | 111 | def do_emulate_logical_float_instruction(state): 112 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 113 | shadow1, shadow2 = state.shadow_stack.pop(), state.shadow_stack.pop() 114 | assert not (shadow1.pointer or shadow2.pointer) 115 | instr_type = self.instr_name[:3] 116 | 117 | assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits( 118 | ) == helper_map[instr_type][1], 'emul_logical_f_instr arg1 type mismatch' 119 | assert arg2.ebits() == helper_map[instr_type][0] and arg2.sbits( 120 | ) == helper_map[instr_type][1], 'emul_logical_f_instr arg2 type mismatch' 121 | 122 | if 'eq' in self.instr_name: 123 | result = fpEQ(arg1, arg2) 124 | elif 'ne' in self.instr_name: 125 | result = fpNEQ(arg1, arg2) 126 | elif 'lt' in self.instr_name: 127 | result = fpLT(arg2, arg1) 128 | elif 'le' in self.instr_name: 129 | result = fpLEQ(arg2, arg1) 130 | elif 'gt' in self.instr_name: 131 | result = fpGT(arg2, arg1) 132 | elif 'ge' in self.instr_name: 133 | result = fpGEQ(arg2, arg1) 134 | else: 135 | raise UnsupportInstructionError 136 | 137 | # try to simplify result and insert 1 or 0 directly, instead of an ite statement 138 | result = simplify(result) 139 | if is_true(result): 140 | state.symbolic_stack.append(BitVecVal(1, 32)) 141 | elif is_false(result): 142 | state.symbolic_stack.append(BitVecVal(0, 32)) 143 | else: 144 | state.symbolic_stack.append( 145 | If(result, BitVecVal(1, 32), BitVecVal(0, 32))) 146 | 147 | taint = shadow1.taint or shadow2.taint 148 | state.shadow_stack.append(shadow(taint, False)) 149 | 150 | return [state] 151 | 152 | op_type = self.instr_name[:1] 153 | if op_type == 'i': 154 | return do_emulate_logical_int_instruction( 155 | state, overflow_check_flag) 156 | else: 157 | return do_emulate_logical_float_instruction(state) 158 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instructions/ParametricInstructions.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | from eunomia.arch.wasm.exceptions import UnsupportInstructionError 4 | from eunomia.arch.wasm.utils import one_time_query_cache 5 | from z3 import Not, is_bool, is_bv, is_false, is_true, simplify, unsat 6 | 7 | 8 | class ParametricInstructions: 9 | def __init__(self, instr_name, instr_operand, _): 10 | self.instr_name = instr_name 11 | self.instr_operand = instr_operand 12 | 13 | def emulate(self, state): 14 | if self.instr_name == 'drop': 15 | state.symbolic_stack.pop() 16 | state.shadow_stack.pop() 17 | return [state] 18 | elif self.instr_name == 'select': # select instruction 19 | arg0, arg1, arg2 = state.symbolic_stack.pop( 20 | ), state.symbolic_stack.pop(), state.symbolic_stack.pop() 21 | shadow0, shadow1, shadow2 = state.shadow_stack.pop(), state.shadow_stack.pop(), state.shadow_stack.pop() 22 | assert not shadow0.pointer 23 | assert is_bv(arg0) or is_bool( 24 | arg0), f"in select, arg0 type is {type(arg0)} instead of bv or bool" 25 | # mimic the br_if 26 | if is_bv(arg0): 27 | # NOTE: if arg0 is zero, return arg1, or arg2 28 | # ref: https://developer.mozilla.org/en-US/docs/WebAssembly/Reference/Control_flow/Select 29 | op = simplify(arg0 == 0) 30 | 31 | if is_true(op): 32 | state.symbolic_stack.append(arg1) 33 | state.shadow_stack.append(shadow1) 34 | return [state] 35 | elif is_false(op): 36 | state.symbolic_stack.append(arg2) 37 | state.shadow_stack.append(shadow2) 38 | return [state] 39 | elif not is_true(op) and not is_false(op): 40 | # these two flags are used to jump over unnecessary deepcopy 41 | no_need_true, no_need_false = False, False 42 | if unsat == one_time_query_cache(state.solver, op): 43 | no_need_true = True 44 | if unsat == one_time_query_cache(state.solver, Not(op)): 45 | no_need_false = True 46 | 47 | if no_need_true and no_need_false: 48 | pass 49 | elif not no_need_true and not no_need_false: 50 | new_state = deepcopy(state) 51 | 52 | state.solver.add(op) 53 | state.symbolic_stack.append(arg1) 54 | state.shadow_stack.append(shadow1) 55 | 56 | new_state.solver.add(Not(op)) 57 | new_state.symbolic_stack.append(arg2) 58 | new_state.shadow_stack.append(shadow2) 59 | 60 | return [state, new_state] 61 | else: 62 | if no_need_true: 63 | state.solver.add(Not(op)) 64 | state.symbolic_stack.append(arg2) 65 | state.shadow_stack.append(shadow2) 66 | else: 67 | state.solver.add(op) 68 | state.symbolic_stack.append(arg1) 69 | state.shadow_stack.append(shadow1) 70 | return [state] 71 | else: 72 | exit(f"select instruction error. op is {op}") 73 | else: 74 | raise UnsupportInstructionError 75 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instructions/VariableInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the variable related instructions 2 | 3 | from eunomia.arch.wasm.exceptions import UnsupportInstructionError, UnsupportGlobalTypeError 4 | from z3 import BitVecVal, is_bv, is_bv_value 5 | from eunomia.arch.wasm.utils import getConcreteBitVec, write_vulnerabilities 6 | from eunomia.arch.wasm.dwarfParser import (get_func_index_from_state, 7 | get_source_location_string) 8 | 9 | 10 | class VariableInstructions: 11 | def __init__(self, instr_name, instr_operand, _): 12 | self.instr_name = instr_name 13 | self.instr_operand = instr_operand 14 | 15 | def emulate(self, state, analyzer): 16 | # TODO 17 | # for go_samples.nosync/tinygo_main.wasm, the global.get operand would be prefixed by four \x80 18 | if self.instr_operand.startswith(b'\x80\x80\x80\x80'): 19 | self.instr_operand = self.instr_operand[4:] 20 | op = int.from_bytes(self.instr_operand, byteorder='little') 21 | 22 | if self.instr_name == 'get_local': 23 | if op in state.local_var: 24 | state.symbolic_stack.append(state.local_var[op]) 25 | state.shadow_stack.append(state.shadow_local[op]) 26 | else: 27 | assert 0,"local not exists" 28 | ''' 29 | if state.local_var.get(op, None) is not None: 30 | state.symbolic_stack.append(state.local_var[op]) 31 | else: 32 | state.symbolic_stack.append(state.local_var[op]) 33 | ''' 34 | # raise UninitializedLocalVariableError 35 | elif self.instr_name == 'set_local': 36 | var = state.symbolic_stack.pop() 37 | shadow = state.shadow_stack.pop() 38 | state.local_var[op] = var 39 | state.shadow_local[op] = shadow 40 | elif self.instr_name == 'get_global': 41 | global_index = op 42 | global_operand = state.globals[global_index] 43 | global_shadow = state.shadow_globals[global_index] 44 | assert op == 0 45 | 46 | if isinstance( 47 | global_operand, str) or isinstance( 48 | global_operand, int): 49 | state.symbolic_stack.append(BitVecVal(global_operand, 32)) 50 | state.shadow_stack.append(global_shadow) 51 | elif is_bv(global_operand) or is_bv_value(global_operand): 52 | # the operand is a BitVecRef or BitVecNumRef 53 | state.symbolic_stack.append(global_operand) 54 | state.shadow_stack.append(global_shadow) 55 | else: 56 | raise UnsupportGlobalTypeError 57 | elif self.instr_name == 'set_global': 58 | global_operand = state.symbolic_stack.pop() 59 | global_index = op 60 | global_shadow = state.shadow_stack.pop() 61 | assert op == 0 62 | if not is_bv_value(global_operand): 63 | assert global_shadow.taint 64 | func_ind = get_func_index_from_state(analyzer, state) 65 | func_offset = state.instr.offset 66 | write_vulnerabilities(state, f"store taint length out of bound{get_source_location_string(analyzer, func_ind, func_offset)}") 67 | return [] 68 | state.globals[global_index] = global_operand 69 | state.shadow_globals[global_index] = global_shadow 70 | elif self.instr_name == 'tee_local': 71 | var = state.symbolic_stack[-1] 72 | shadow = state.shadow_stack[-1] 73 | state.local_var[op] = var 74 | state.shadow_local[op]=shadow 75 | else: 76 | raise UnsupportInstructionError 77 | return [state] 78 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/instructions/__init__.py: -------------------------------------------------------------------------------- 1 | from .ArithmeticInstructions import * 2 | from .BitwiseInstructions import * 3 | from .ConstantInstructions import * 4 | from .ControlInstructions import * 5 | from .ConversionInstructions import * 6 | from .LogicalInstructions import * 7 | from .MemoryInstructions import * 8 | from .ParametricInstructions import * 9 | from .VariableInstructions import * 10 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/lib/Import.py: -------------------------------------------------------------------------------- 1 | from eunomia.arch.wasm.lib.utils import sgx_extract_params 2 | from eunomia.arch.wasm.shadow import shadow 3 | from z3 import BitVec, FP, Float32, Float64 4 | from datetime import datetime 5 | class ImportFunction: 6 | def __init__(self, name, cur_func_name): 7 | self.name = name 8 | self.cur_func = cur_func_name 9 | 10 | def emul(self, state, param_str, return_str): 11 | params, shadow_params = sgx_extract_params(param_str, state) 12 | 13 | if return_str: 14 | if return_str == 'i32' : 15 | state.symbolic_stack.append(BitVec("__"+self.name+"_from_"+self.cur_func+str(datetime.timestamp(datetime.now()))[-5:]+"__",32)) 16 | elif return_str == 'i64': 17 | state.symbolic_stack.append(BitVec("__"+self.name+"_from_"+self.cur_func+str(datetime.timestamp(datetime.now()))[-5:]+"__",64)) 18 | 19 | elif return_str == 'f32': 20 | ret = BitVec("__"+self.name+"_from_"+self.cur_func+str(datetime.timestamp(datetime.now()))[-5:]+"__", 32) 21 | state.symbolic_stack.append(FP("__"+self.name+"_from_"+self.cur_func+str(datetime.timestamp(datetime.now()))[-5:]+"__", Float32())) 22 | elif return_str == 'f64': 23 | state.symbolic_stack.append(FP("__"+self.name+"_from_"+self.cur_func+str(datetime.timestamp(datetime.now()))[-5:]+"__", Float64())) 24 | else: 25 | assert 0 26 | state.shadow_stack.append(shadow(False, False)) 27 | 28 | return [state] -------------------------------------------------------------------------------- /eunomia/arch/wasm/lib/utils.py: -------------------------------------------------------------------------------- 1 | # this is the helper function which are only used in lib folder 2 | 3 | from eunomia.arch.wasm.memory import (insert_symbolic_memory, 4 | lookup_symbolic_memory_data_section) 5 | from z3 import BitVecVal, is_bv, is_bv_value 6 | 7 | def _extract_params(param_str, state): 8 | assert 0 9 | param_cnt = len(param_str.split(" ")) 10 | params = [] 11 | for _ in range(param_cnt): 12 | params.append(state.symbolic_stack.pop()) 13 | 14 | # concretize 15 | params_result = [] 16 | for i in params: 17 | if is_bv_value(i): 18 | params_result.append(i.as_long()) 19 | else: 20 | params_result.append(i) 21 | 22 | return params_result 23 | 24 | def sgx_extract_params(param_str, state): 25 | """ 26 | Return a list of elements, which are the arguments of the given import function. 27 | Note that, the order will be reversed. 28 | For example, if the signature of function foo is: foo (a, b), the returned arguments will be [b, a] 29 | """ 30 | param_cnt = len([x for x in param_str.split(" ") if x]) 31 | params = [] 32 | shadow_params = [] 33 | for _ in range(param_cnt): 34 | params.append(state.symbolic_stack.pop()) 35 | shadow_params.append(state.shadow_stack.pop()) 36 | 37 | 38 | 39 | 40 | return params,shadow_params 41 | 42 | 43 | def _storeN(state, dest, val, len_in_bytes): 44 | if not is_bv(val): 45 | state.symbolic_memory = insert_symbolic_memory( 46 | state.symbolic_memory, dest, len_in_bytes, 47 | BitVecVal(val, len_in_bytes * 8)) 48 | else: 49 | state.symbolic_memory = insert_symbolic_memory( 50 | state.symbolic_memory, dest, len_in_bytes, val) 51 | 52 | 53 | def _loadN(state, data_section, dest, len_in_bytes): 54 | val = lookup_symbolic_memory_data_section( 55 | state.symbolic_memory, data_section, dest, len_in_bytes) 56 | if is_bv_value(val): 57 | val = val.as_long() 58 | return val 59 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/memanalyzer.py: -------------------------------------------------------------------------------- 1 | 2 | MAX_HEAP_SIZE = 0x100000 3 | MAX_STACK_SIZE = 0x40000 4 | HEAP_BASE = 0x40000 5 | STACK_TOP = 0x200000 6 | DATA_BASE = 1024 7 | 8 | ################################## 9 | 10 | class memory_manager: 11 | 12 | def __init__(self, state, analyzer): 13 | ################################## 14 | #globals 15 | datas = analyzer.datas 16 | self.data_section = dict() 17 | self.shadow_data_section = dict() 18 | if len(datas) == 0: 19 | data_bound = DATA_BASE 20 | elif len(datas) == 1: 21 | data_bound = datas[0]['offset'] + datas[0]['size'] 22 | else: 23 | assert len(datas) == 2 24 | data_bound = max(datas[0]['offset'] + datas[0]['size'], datas[1]['offset'] + datas[1]['size']) 25 | data_section_value = datas[1] 26 | data = data_section_value['data'] 27 | offset = data_section_value['offset'] 28 | size = data_section_value['size'] 29 | self.data_section[(offset,offset+size)]=data 30 | self.shadow_data_section[(offset,offset+size)] = [None] 31 | 32 | globals = analyzer.globals[1:] 33 | exports = analyzer.exports 34 | global_exports = [x for x in exports if x['kind'] == 3] 35 | assert len(globals) == len(global_exports) 36 | period = 1 37 | additional_globals = list() 38 | __data_end = None 39 | for i in range(len(globals)): 40 | assert global_exports[i]['index'] == i + 1 41 | if period == 1: 42 | if global_exports[i]['field_str'] == '__dso_handle': 43 | period = 2 44 | continue 45 | if int(globals[i][1]) >= data_bound: 46 | additional_globals.append(int(globals[i][1])) 47 | else: 48 | assert global_exports[i]['field_str'][0:2] == '__' 49 | if global_exports[i]['field_str'] == '__data_end': 50 | __data_end = int(globals[i][1]) 51 | assert __data_end 52 | 53 | additional_globals.sort() 54 | 55 | for i, global_var in enumerate(additional_globals): 56 | if i != len(additional_globals) - 1: 57 | assert additional_globals[i+1] > additional_globals[i] 58 | self.data_section[additional_globals[i],additional_globals[i+1]] = [2,0] 59 | self.shadow_data_section[additional_globals[i],additional_globals[i+1]] = None 60 | else: 61 | assert __data_end > additional_globals[i] 62 | self.data_section[additional_globals[i], __data_end] = [2,0] 63 | self.shadow_data_section[additional_globals[i], __data_end] = None 64 | 65 | 66 | self.data_bound = __data_end 67 | assert self.data_bound <= HEAP_BASE 68 | 69 | ################################## 70 | #heap 71 | self.heap_base = HEAP_BASE 72 | self.max_heap_size = MAX_HEAP_SIZE 73 | self.free_list = [[self.heap_base, MAX_HEAP_SIZE]] 74 | self.heap = dict() 75 | 76 | 77 | ################################## 78 | #stack 79 | self.stack_upperbound = STACK_TOP 80 | self.max_stack_size = MAX_STACK_SIZE 81 | state.globals[0] = STACK_TOP -------------------------------------------------------------------------------- /eunomia/arch/wasm/modules/BufferOverflowLaser.py: -------------------------------------------------------------------------------- 1 | # Implement an buffer overflow detector 2 | 3 | import logging 4 | 5 | from eunomia.arch.wasm.configuration import bcolors 6 | from eunomia.arch.wasm.dwarfParser import (decode_var_type, 7 | get_func_index_from_state, 8 | get_source_location_string) 9 | 10 | 11 | class BufferOverflowLaser: 12 | def __init__(self): 13 | pass 14 | 15 | def fire(self, analyzer, state, dest, the_string, the_string_len): 16 | buffer_overflowed = False 17 | 18 | # the destination's type (should be array) and its corresponding size 19 | # TODO better approach than using global stack pointer 20 | _, var_size = decode_var_type( 21 | analyzer, state, dest, use_global_sp=True) 22 | if var_size is None: 23 | logging.warning( 24 | f"{bcolors.WARNING}unable to decode variable type for address {hex(dest)}{bcolors.ENDC}") 25 | return 26 | 27 | if the_string_len > var_size: 28 | logging.warning( 29 | f"{bcolors.WARNING}The string '{the_string}' may result in buffer overflow due to unlimited copy and write{bcolors.ENDC}") 30 | buffer_overflowed = True 31 | 32 | if buffer_overflowed: 33 | import datetime 34 | logging.info(f"Current Time: {datetime.datetime.now()}") 35 | func_ind = get_func_index_from_state(analyzer, state) 36 | func_offset = state.instr.offset 37 | logging.warning( 38 | f"{bcolors.WARNING}Buffer overflowed! {get_source_location_string(analyzer, func_ind, func_offset)}{bcolors.ENDC}") 39 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/modules/DivZeroLaser.py: -------------------------------------------------------------------------------- 1 | # Implement an div zero detector 2 | 3 | import logging 4 | from copy import deepcopy 5 | 6 | from eunomia.arch.wasm.configuration import Configuration, bcolors 7 | from eunomia.arch.wasm.solver import SMTSolver 8 | from z3 import sat 9 | 10 | div_operations = {'bvsdiv', 'bvudic', 'bvsrem', 'bvurem', 'fp.div'} 11 | 12 | 13 | class DivZeroLaser: 14 | def __init__(self): 15 | pass 16 | 17 | def _check(self, condition): 18 | s = SMTSolver(Configuration.get_solver()) 19 | s.add(condition) 20 | if sat == s.check(): 21 | return True 22 | return False 23 | 24 | def fire(self, expr, solver): 25 | # TODO revise for replace constraints as solver 26 | # two operands 27 | _, op2 = expr.arg(0), expr.arg(1) 28 | # copy the original_constraints 29 | new_cond = deepcopy(original_constraints) 30 | # indicate if div zero vulnerability 31 | divzeroed = False 32 | 33 | if expr.decl().name() in div_operations: 34 | # if the dividend is zero, it's vulnerable 35 | new_cond += [op2 == 0] 36 | if self._check(new_cond): 37 | logging.warning( 38 | f"{bcolors.WARNING}The op2 ({op2}) may be zero, which may result in Div-Zero vulnerability!{bcolors.ENDC}") 39 | divzeroed = True 40 | if divzeroed: 41 | import datetime 42 | logging.info(f"Current Time: {datetime.datetime.now()}") 43 | return divzeroed 44 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/modules/OverflowLaser.py: -------------------------------------------------------------------------------- 1 | # Implement an integer overflow detector 2 | 3 | import logging 4 | from copy import deepcopy 5 | 6 | from eunomia.arch.wasm.configuration import Configuration, bcolors 7 | from eunomia.arch.wasm.solver import SMTSolver 8 | from z3 import (BitVecNumRef, BVAddNoOverflow, BVMulNoOverflow, 9 | BVSubNoUnderflow, Not, sat) 10 | 11 | overflow_group = {'bvadd', 'bvmul', 'bvsub'} 12 | 13 | 14 | class OverflowLaser: 15 | def __init__(self): 16 | pass 17 | 18 | def _check(self, constraint): 19 | s = SMTSolver(Configuration.get_solver()) 20 | s.add(constraint) 21 | if sat == s.check(): 22 | # print(s.model()) 23 | return True 24 | return False 25 | 26 | def fire(self, expr, solver, sign_mapping): 27 | # TODO revise for replace constraints as solver 28 | # two operands 29 | op1, op2 = expr.arg(0), expr.arg(1) 30 | # copy the original_constraints 31 | new_cond = deepcopy(original_constraints) 32 | # indicate if it is overflowed 33 | overflowed = False 34 | 35 | # we only consider the instructions in `overflow_group` 36 | if expr.decl().name() not in overflow_group: 37 | return 38 | 39 | free_variable = True 40 | 41 | # step 1: 42 | # if two BitVecNumRef, means no free variables 43 | if isinstance(op1, BitVecNumRef) and isinstance(op2, BitVecNumRef): 44 | free_variable = False 45 | 46 | def contain_op(cons, op): 47 | for sub_cons in cons.children(): 48 | if sub_cons.get_id() == op.get_id(): 49 | return True 50 | if contain_op(sub_cons, op): 51 | return True 52 | 53 | # step 2: 54 | # if both of op1 and op2 are free, overflow may happen 55 | # op2con = defaultdict(list) 56 | for op in [op1, op2]: 57 | for constraint in new_cond: 58 | if contain_op(constraint, op): 59 | free_variable = False 60 | # op2con[(op, op.get_id())].append(constraint) 61 | if not free_variable: 62 | break 63 | if free_variable: 64 | logging.warning( 65 | f"{bcolors.WARNING}op1 ({op1}) or op2 ({op2}) is free, which may result in overflow!{bcolors.ENDC}") 66 | overflowed = True 67 | 68 | # step 3: 69 | # infer the data type according to its passed instruction 70 | 71 | # speculate the sign, default is signed 72 | is_signed = sign_mapping.get( 73 | op1.hash(), True) & sign_mapping.get(op2.hash(), True) 74 | op_name = expr.decl().name() 75 | if op_name == 'bvadd': 76 | new_cond += [Not(BVAddNoOverflow(op1, op2, is_signed))] 77 | if self._check(new_cond): 78 | if is_signed: 79 | logging.warning( 80 | f"{bcolors.WARNING}The bvadd of op1 ({op1}) and op2 ({op2}) may overflow (signed){bcolors.ENDC}") 81 | else: 82 | logging.warning( 83 | f"{bcolors.WARNING}The bvadd of op1 ({op1}) and op2 ({op2}) may overflow (unsigned){bcolors.ENDC}") 84 | overflowed = True 85 | elif op_name == 'bvsub': 86 | new_cond += [Not(BVSubNoUnderflow(op1, op2, is_signed))] 87 | if self._check(new_cond): 88 | if is_signed: 89 | logging.warning( 90 | f"{bcolors.WARNING}The bvsub of op1 ({op1}) and op2 ({op2}) may underflow (signed){bcolors.ENDC}") 91 | else: 92 | logging.warning( 93 | f"{bcolors.WARNING}The bvsub of op1 ({op1}) and op2 ({op2}) may underflow (unsigned){bcolors.ENDC}") 94 | overflowed = True 95 | elif op_name == 'bvmul': 96 | new_cond += [Not(BVMulNoOverflow(op1, op2, is_signed))] 97 | if self._check(new_cond): 98 | if is_signed: 99 | logging.warning( 100 | f"{bcolors.WARNING}The bvmul of op1 ({op1}) and op2 ({op2}) may overflow (signed){bcolors.ENDC}") 101 | else: 102 | logging.warning( 103 | f"{bcolors.WARNING}The bvmul of op1 ({op1}) and op2 ({op2}) may overflow (unsigned){bcolors.ENDC}") 104 | overflowed = True 105 | if overflowed: 106 | import datetime 107 | logging.info(f"Current Time: {datetime.datetime.now()}") 108 | return overflowed 109 | -------------------------------------------------------------------------------- /eunomia/arch/wasm/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/eunomia/arch/wasm/modules/__init__.py -------------------------------------------------------------------------------- /eunomia/arch/wasm/mythread.py: -------------------------------------------------------------------------------- 1 | from threading import Lock, Thread, currentThread 2 | from time import sleep 3 | from eunomia.arch.wasm.emulator import WasmSSAEmulatorEngine 4 | #from eunomia.arch.wasm.pathgraph import Graph 5 | from eunomia.arch.wasm.configuration import Configuration 6 | from collections import defaultdict 7 | from queue import PriorityQueue 8 | import time 9 | 10 | CoreNum = 1 11 | alive = False 12 | block_visit = set() 13 | tuple_seen = list() 14 | state_pool = PriorityQueue() 15 | state_pool_lock = Lock() 16 | edge_num = defaultdict(int) 17 | edge_num_lock = Lock() 18 | 19 | wasmVMdict = dict() 20 | 21 | 22 | GlobalEcallList = list() 23 | 24 | statenum = 0 25 | 26 | basicblock_num = 0 27 | 28 | basicblock_file = "bb.txt" 29 | 30 | def Init_state(wasmVM, func): 31 | func_index_name, param_str, _, _ = wasmVM.get_signature(func) 32 | 33 | return wasmVM.init_state(func, param_str) 34 | 35 | class myThread(Thread): 36 | def __init__(self, results): 37 | Thread.__init__(self) 38 | self.results = results 39 | 40 | def run(self): 41 | from eunomia.arch.wasm.pathgraph import Graph 42 | t = currentThread() 43 | with open(basicblock_file, 'a') as f: 44 | f.write("basic block num: %d\n"%(basicblock_num)) 45 | T1 = time.time() 46 | global alive 47 | while alive: 48 | state_pool_lock.acquire() 49 | if not state_pool.empty(): 50 | score, (state, func) = state_pool.get() 51 | state_pool_lock.release() 52 | print('Thread id : %d get a state %d and start to process' % (t.ident, state.statenum)) 53 | wasmVM = wasmVMdict[func] 54 | # run the emulator for SSA 55 | graph = Graph(func) 56 | graph.wasmVM = wasmVM 57 | graph.GlobalEcallList = GlobalEcallList 58 | graph.round = round 59 | graph.manual_guide = False 60 | graph.initialize() 61 | graph.traverse(state) 62 | print('Thread id : %d finish a state' % t.ident) 63 | T2 = time.time() 64 | with open(basicblock_file, 'a') as f: 65 | f.write("%f %d\n"%(T2-T1, len(block_visit))) 66 | else: 67 | state_pool_lock.release() 68 | print('Thread id : %d gets nothing, now exits.' % t.ident) 69 | break 70 | self.results.append(0) 71 | 72 | def multi_thread_process(octocode, namelist, Ecall_list, max_time): 73 | global GlobalEcallList 74 | global statenum 75 | global basicblock_num 76 | GlobalEcallList = Ecall_list 77 | 78 | wasmVM = WasmSSAEmulatorEngine(isglobal = True, bytecode = octocode, namelist = namelist) 79 | 80 | basicblock_num = len(wasmVM.cfg.basicblocks) 81 | for func in Ecall_list: 82 | state = Init_state(wasmVM, func) 83 | state.statenum = statenum 84 | statenum += 1 85 | state_pool.put((-999, (state, func))) 86 | wasmVMdict[func] = WasmSSAEmulatorEngine(isglobal = False, Engine = wasmVM, entryFunc = func) 87 | 88 | global alive 89 | alive = True 90 | threadlist = [] 91 | results = [] 92 | for i in range(CoreNum): 93 | threadlist.append(myThread(results)) 94 | 95 | for thread in threadlist: 96 | thread.start() 97 | 98 | start_time = time.time() 99 | while any(thread.is_alive() for thread in threadlist): 100 | if max_time and time.time() - start_time > max_time: 101 | print('Time limit (%d seconds) reached, killing threads' % max_time) 102 | alive = False 103 | break 104 | sleep(1) 105 | 106 | for thread in threadlist: 107 | thread.join() 108 | 109 | assert len(results) == CoreNum, "Some threads did not exit properly" 110 | for result in results: 111 | assert result == 0 -------------------------------------------------------------------------------- /eunomia/arch/wasm/shadow.py: -------------------------------------------------------------------------------- 1 | from z3 import is_bv, BitVec, BitVecVal, simplify, Extract 2 | class shadow: 3 | def __init__(self, taint:'bool', pointer = False, base = None, base_taint = None, size = None, stack_pointer = 0): 4 | self.taint = taint 5 | #0--non-pointer 1--pointer -1--unknown 6 | self.pointer = pointer 7 | if self.pointer == True or self.pointer == 1: 8 | self.base = base 9 | self.base_taint = base_taint 10 | if is_bv(size) and size.size() == 64: 11 | divisor = BitVecVal(2 ** 32, 64) 12 | size = simplify(Extract(31, 0, size % divisor)) 13 | self.size = size 14 | #0--normal pointer -1--global variable 1--stack pointer 15 | self.stack_pointer = stack_pointer 16 | else: 17 | self.base = None 18 | self.base_taint = None 19 | self.size = None 20 | self.stack_pointer = None 21 | 22 | 23 | 24 | def __str__(self): 25 | s = ' 0 104 | 105 | @property 106 | def is_branch_conditional(self): 107 | """ Return list if the instruction is a jump """ 108 | raise NotImplementedError 109 | 110 | @property 111 | def is_branch_unconditional(self): 112 | """ Return list if the instruction is a jump """ 113 | raise NotImplementedError 114 | 115 | @property 116 | def is_branch(self): 117 | """ True if the instruction is a jump """ 118 | return self.is_branch_conditional or self.is_branch_unconditional 119 | 120 | @property 121 | def is_halt(self): 122 | """ Return list if the instruction is a basic block terminator """ 123 | raise NotImplementedError 124 | 125 | @property 126 | def is_terminator(self): 127 | """ True if the instruction is a basic block terminator """ 128 | raise NotImplementedError 129 | 130 | @property 131 | def have_xref(self): 132 | """ TODO """ 133 | raise NotImplementedError 134 | -------------------------------------------------------------------------------- /eunomia/core/utils.py: -------------------------------------------------------------------------------- 1 | from binascii import unhexlify 2 | 3 | 4 | def bytecode_to_bytes(bytecode): 5 | if str(bytecode).startswith("0x"): 6 | bytecode = bytecode[2:] 7 | 8 | try: 9 | # python > 2.7 10 | bytecode = bytes.fromhex(bytecode) 11 | except AttributeError: 12 | # python <= 2.7 13 | try: 14 | bytecode = bytecode.decode("hex") 15 | except TypeError: 16 | # last chance 17 | bytecode = unhexlify(bytecode) 18 | # already bytes or bytearray 19 | except TypeError: 20 | pass 21 | return bytecode 22 | 23 | 24 | def search_in_list_of_dict(string_to_search, target_list, key_dict): 25 | return list( 26 | filter( 27 | lambda elem: str(string_to_search) in str( 28 | elem[key_dict]), 29 | target_list)) 30 | -------------------------------------------------------------------------------- /eunomia/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/eunomia/engine/__init__.py -------------------------------------------------------------------------------- /eunomia/engine/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/eunomia/engine/__init__.pyc -------------------------------------------------------------------------------- /eunomia/engine/disassembler.py: -------------------------------------------------------------------------------- 1 | from eunomia.core.utils import bytecode_to_bytes 2 | 3 | 4 | class BytecodeEmptyException(Exception): 5 | """Exception raised when bytecode is None""" 6 | pass 7 | 8 | 9 | class Disassembler(object): 10 | """ Generic Disassembler class """ 11 | 12 | def __init__(self, asm, bytecode=None): 13 | self.bytecode = bytecode 14 | self.instructions = list() 15 | self.reverse_instructions = dict() 16 | self.asm = asm 17 | 18 | def attributes_reset(self): 19 | """Reset instructions class attributes """ 20 | self.instructions = list() 21 | self.reverse_instructions = dict() 22 | 23 | def disassemble_opcode(self, bytecode, offset=0): 24 | """ Generic method to disassemble one instruction """ 25 | raise NotImplementedError 26 | 27 | def disassemble(self, bytecode=None, offset=0, nature_offset=0, 28 | r_format='list'): 29 | """Generic method to disassemble bytecode 30 | 31 | :param bytecode: bytecode sequence 32 | :param offset: start offset 33 | :param r_format: output format ('list'/'text'/'reverse') 34 | :type bytecode: bytes, str 35 | :type offset: int 36 | :type r_format: list, str, dict 37 | :return: dissassembly result depending of r_format 38 | :rtype: list, str, dict 39 | """ 40 | # reinitialize class variable 41 | self.attributes_reset() 42 | 43 | self.bytecode = bytecode if bytecode else self.bytecode 44 | if not self.bytecode: 45 | raise BytecodeEmptyException() 46 | 47 | self.bytecode = bytecode_to_bytes(self.bytecode) 48 | 49 | while offset < len(self.bytecode): 50 | instr = self.disassemble_opcode( 51 | self.bytecode[offset:], 52 | offset, nature_offset) 53 | offset += instr.size 54 | nature_offset += 1 55 | self.instructions.append(instr) 56 | 57 | # fill reverse instructions 58 | self.reverse_instructions = {k: v for k, v in 59 | enumerate(self.instructions)} 60 | 61 | # return instructions 62 | if r_format == 'list': 63 | return self.instructions 64 | elif r_format == 'text': 65 | return '\n'.join(map(str, self.instructions)) 66 | elif r_format == 'reverse': 67 | return self.reverse_instructions 68 | -------------------------------------------------------------------------------- /eunomia/engine/emulator.py: -------------------------------------------------------------------------------- 1 | # ======================================= 2 | # # Emulator # 3 | # ======================================= 4 | 5 | 6 | class EmulatorEngine(object): 7 | 8 | def __init__(self, instructions): 9 | """ TODO """ 10 | raise NotImplementedError 11 | 12 | def emulate(self, state, depth=0): 13 | """ TODO """ 14 | raise NotImplementedError 15 | 16 | def emulate_one_instruction(self, instr, state, depth): 17 | """ TODO """ 18 | raise NotImplementedError 19 | -------------------------------------------------------------------------------- /eunomia/engine/engine.py: -------------------------------------------------------------------------------- 1 | class VMstate(object): 2 | 3 | def __init__(self, gas=1000000): 4 | """ TODO """ 5 | raise NotImplementedError 6 | 7 | def details(self): 8 | """ TODO """ 9 | raise NotImplementedError 10 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | from datetime import datetime 6 | import json 7 | from os import makedirs, path 8 | import resource 9 | import sh 10 | import sys 11 | 12 | def do_symgx(args): 13 | from eunomia.arch.wasm.configuration import Configuration 14 | Configuration.set_start_time(datetime.now().strftime("%Y%m%d%H%M%S%f")) 15 | from SymGX import SymGX 16 | 17 | Configuration.set_file(args.file.name) 18 | 19 | # ecall_list must be specified 20 | if not args.ecall_list: 21 | print("Error: --symgx requires --ecall-list", file=sys.stderr) 22 | exit(1) 23 | SymGX(args) 24 | 25 | def do_normal(args): 26 | from seewasm.arch.wasm.configuration import Configuration 27 | Configuration.set_start_time(datetime.now().strftime("%Y%m%d%H%M%S%f")) 28 | 29 | print(args) 30 | 31 | module_bytecode = args.file.read() 32 | # create the corresponding wat file 33 | wat_file_path = args.file.name.replace('.wasm', '.wat') 34 | if not path.exists(wat_file_path): 35 | sh.Command('wasm2wat')([args.file.name, "-o", wat_file_path]) 36 | print( 37 | f"The corresponding wat file is written in: {wat_file_path}", 38 | flush=True) 39 | # conduct symbolic execution 40 | if args.symbolic: 41 | Configuration.set_verbose_flag(args.verbose) 42 | Configuration.set_file(args.file.name) 43 | Configuration.set_entry(args.entry) 44 | Configuration.set_visualize(args.visualize) 45 | Configuration.set_source_type(args.source_type) 46 | Configuration.set_stdin(args.stdin, args.sym_stdin) 47 | Configuration.set_sym_files(args.sym_files) 48 | Configuration.set_incremental_solving(args.incremental) 49 | Configuration.set_elem_index_to_func(wat_file_path) 50 | Configuration.set_algo(args.search) 51 | 52 | command_file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/command.json" 53 | makedirs(path.dirname(command_file_name), exist_ok=False) 54 | with open(command_file_name, 'w') as fp: 55 | json.dump({"Command": " ".join(sys.argv)}, fp, indent=4) 56 | 57 | # --args and --sym_args can exist simultaneously 58 | # their order are fixed, i.e., --args is in front of --sym_args 59 | # the file_name is always the argv[0] 60 | Configuration.set_args( 61 | Configuration.get_file_name(), 62 | args.args, args.sym_args) 63 | 64 | # import necessary part 65 | from seewasm.arch.wasm.emulator import WasmSSAEmulatorEngine 66 | from seewasm.arch.wasm.graph import Graph 67 | 68 | wasmVM = WasmSSAEmulatorEngine(module_bytecode) 69 | # run the emulator for SSA 70 | Graph.wasmVM = wasmVM 71 | Graph.initialize() 72 | # draw the ICFG on basic block level, and exit 73 | if Configuration.get_visualize(): 74 | from seewasm.arch.wasm.visualizator import visualize 75 | # draw here 76 | graph_path = path.join("output", "visualized_graph", f"{Configuration.get_file_name()}_{Configuration.get_start_time()}.gv") 77 | visualize(Graph, graph_path) 78 | print(f"The visualization of ICFG is done.") 79 | return 80 | 81 | graph = Graph() 82 | graph.traverse() 83 | else: 84 | pass 85 | 86 | def parse(): 87 | parser = argparse.ArgumentParser( 88 | description='WASEM, a general symbolic execution framework for WebAssembly (WASM) binaries') 89 | 90 | inputs = parser.add_argument_group('Input arguments') 91 | inputs.add_argument('-f', '--file', 92 | type=argparse.FileType('rb'), 93 | help='binary file (.wasm)', 94 | metavar='WASMMODULE', required=True) 95 | inputs.add_argument('--stdin', 96 | action='store', 97 | type=str, 98 | help='stream of stdin') 99 | inputs.add_argument('--sym_stdin', 100 | action='store', 101 | type=int, 102 | nargs=1, 103 | help='stream of stdin in N bytes symbols') 104 | inputs.add_argument('--args', 105 | action='store', 106 | type=str, 107 | help='command line') 108 | inputs.add_argument( 109 | '--sym_args', type=int, nargs='+', 110 | help="command line in symbols, each of them is N bytes at most") 111 | inputs.add_argument( 112 | '--sym_files', type=int, nargs=2, 113 | help="Create N symbolic files, each of them has M symbolic bytes") 114 | inputs.add_argument( 115 | '--source_type', default='c', const='c', nargs='?', 116 | choices=['c', 'go', 'rust'], 117 | help='type of source file') 118 | 119 | features = parser.add_argument_group('Features') 120 | features.add_argument( 121 | '--entry', type=str, nargs=1, default=["__original_main"], 122 | help='set entry point as the specilized function') 123 | features.add_argument( 124 | '--visualize', action='store_true', 125 | help='visualize the ICFG on basic blocks level') 126 | features.add_argument( 127 | '--incremental', action='store_true', 128 | help='enable incremental solving') 129 | features.add_argument( 130 | '-v', '--verbose', default='warning', const='warning', nargs='?', 131 | choices=['warning', 'info', 'debug'], 132 | help='set the logging level') 133 | 134 | analyze = parser.add_argument_group('Analyze') 135 | analyze.add_argument( 136 | '-s', '--symbolic', action='store_true', 137 | help='perform the symbolic execution') 138 | analyze.add_argument( 139 | '--search', default='dfs', const='dfs', nargs='?', 140 | choices=['dfs', 'bfs', 'random', 'interval'], 141 | help='set the search algorithm') 142 | analyze.add_argument( 143 | '--max-time', action='store', type=int, 144 | help='maximum time in seconds') 145 | analyze.add_argument( 146 | '--max-memory', action='store', type=int, 147 | help='maximum memory in MB') 148 | 149 | symgx = parser.add_argument_group('Symgx') 150 | symgx.add_argument('--symgx', action='store_true', help='enable the branch of symgx', default=False) 151 | symgx.add_argument('--ecall-list', help='ecall list string, separated by commas (`,`)') 152 | symgx.add_argument('--func-list', help='function list string, separated by commas (`,`)') 153 | 154 | args = parser.parse_args() 155 | return args 156 | 157 | def main(): 158 | args = parse() 159 | 160 | if args.max_memory: 161 | resource.setrlimit(resource.RLIMIT_AS, (args.max_memory * 1024 * 1024, args.max_memory * 1024 * 1024)) 162 | print(f"Memory limit set to {args.max_memory} MB", flush=True) 163 | 164 | job_start_time = datetime.now() 165 | current_time_start = job_start_time.strftime("%Y-%m-%d %H:%M:%S_%f") 166 | print(f"Start to analyze: {current_time_start}", flush=True) 167 | print(f"Running...", flush=True) 168 | 169 | if args.symgx: 170 | do_symgx(args) 171 | else: 172 | do_normal(args) 173 | 174 | print(f"Finished.", flush=True) 175 | job_end_time = datetime.now() 176 | current_time_end = job_end_time.strftime("%Y-%m-%d %H:%M:%S_%f") 177 | print(f"End of analyze: {current_time_end}", flush=True) 178 | elapsed_time = job_end_time - job_start_time 179 | print(f"Time elapsed: {elapsed_time}", flush=True) 180 | 181 | if __name__ == '__main__': 182 | main() -------------------------------------------------------------------------------- /output/log/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/output/log/.placeholder -------------------------------------------------------------------------------- /output/result/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/output/result/.placeholder -------------------------------------------------------------------------------- /pic/104848503.jfif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/pic/104848503.jfif -------------------------------------------------------------------------------- /pic/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/pic/logo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyelftools>=0.31 2 | graphviz>=0.20.1 3 | leb128==1.0.8 4 | pyelftools==0.31 5 | pytest>=7.4.4 6 | pytest-parallel==0.1.1 7 | sh>=1.14.2 8 | z3-solver==4.13.0.0 9 | wllvm==1.3.1 -------------------------------------------------------------------------------- /seewasm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/__init__.py -------------------------------------------------------------------------------- /seewasm/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/analysis/__init__.py -------------------------------------------------------------------------------- /seewasm/analysis/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/analysis/__init__.pyc -------------------------------------------------------------------------------- /seewasm/analysis/cfg.py: -------------------------------------------------------------------------------- 1 | class CFG(object): 2 | 3 | def __init__(self): 4 | """ TODO """ 5 | raise NotImplementedError 6 | 7 | def visualize(self): 8 | """ TODO """ 9 | raise NotImplementedError 10 | 11 | def visualize_call_flow(self): 12 | """ TODO """ 13 | raise NotImplementedError 14 | -------------------------------------------------------------------------------- /seewasm/arch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/arch/__init__.py -------------------------------------------------------------------------------- /seewasm/arch/wasm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/arch/wasm/__init__.py -------------------------------------------------------------------------------- /seewasm/arch/wasm/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/arch/wasm/__init__.pyc -------------------------------------------------------------------------------- /seewasm/arch/wasm/constant.py: -------------------------------------------------------------------------------- 1 | # It defines some constants 2 | 3 | # https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md#language-types 4 | LANG_TYPE = { 5 | # Opcode, Type constructor 6 | -0x01: 'i32', 7 | -0x02: 'i64', 8 | -0x03: 'f32', 9 | -0x04: 'f64', 10 | -0x10: 'anyfunc', 11 | -0x20: 'func', 12 | -0x40: 'block_type' 13 | } 14 | 15 | KIND_TYPE = { 16 | 0: 'function', 17 | 1: 'table', 18 | 2: 'memory', 19 | 3: 'global', 20 | } 21 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/decode.py: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2016 Joel Höner 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # modified code from https://github.com/athre0z/wasm/blob/master/wasm/modtypes.py 24 | # no need of that if PyPI wasm version 1.2 release 25 | 26 | 27 | """Provides functions for decoding WASM modules and bytecode.""" 28 | from __future__ import (absolute_import, division, print_function, 29 | unicode_literals) 30 | 31 | from collections import namedtuple 32 | 33 | from wasm.compat import byte2int 34 | from wasm.modtypes import (SEC_NAME, SEC_UNK, ModuleHeader, NameSubSection, 35 | Section) 36 | from wasm.opcodes import OPCODE_MAP 37 | 38 | Instruction = namedtuple('Instruction', 'op imm len') 39 | ModuleFragment = namedtuple('ModuleFragment', 'type data') 40 | 41 | 42 | def decode_bytecode(bytecode): 43 | """Decodes raw bytecode, yielding `Instruction`s.""" 44 | bytecode_wnd = memoryview(bytecode) 45 | while bytecode_wnd: 46 | opcode_id = byte2int(bytecode_wnd[0]) 47 | opcode = OPCODE_MAP[opcode_id] 48 | 49 | if opcode.imm_struct is not None: 50 | offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:]) 51 | else: 52 | imm = None 53 | offs = 0 54 | 55 | insn_len = 1 + offs 56 | yield Instruction(opcode, imm, insn_len) 57 | bytecode_wnd = bytecode_wnd[insn_len:] 58 | 59 | 60 | def decode_module(module, decode_name_subsections=False): 61 | """Decodes raw WASM modules, yielding `ModuleFragment`s.""" 62 | module_wnd = memoryview(module) 63 | 64 | # Read & yield module header. 65 | hdr = ModuleHeader() 66 | hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd) 67 | yield ModuleFragment(hdr, hdr_data) 68 | module_wnd = module_wnd[hdr_len:] 69 | 70 | # Read & yield sections. 71 | while module_wnd: 72 | sec = Section() 73 | # bypass the error caused by -g1 to -g3 compiled C code 74 | try: 75 | sec_len, sec_data, _ = sec.from_raw(None, module_wnd) 76 | except Exception: 77 | break 78 | 79 | # If requested, decode name subsections when encountered. 80 | if (decode_name_subsections and sec_data.id == SEC_UNK and sec_data.name == SEC_NAME): 81 | sec_wnd = sec_data.payload 82 | while sec_wnd: 83 | subsec = NameSubSection() 84 | subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd) 85 | yield ModuleFragment(subsec, subsec_data) 86 | sec_wnd = sec_wnd[subsec_len:] 87 | else: 88 | yield ModuleFragment(sec, sec_data) 89 | 90 | # fix bug KeyError 91 | # if sec_data.id == SEC_UNK and sec_data.name: 92 | # sec_len -= sec_data.name_len + 1 93 | module_wnd = module_wnd[sec_len:] 94 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/disassembler.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from seewasm.arch.wasm.decode import decode_module 4 | from seewasm.arch.wasm.instruction import WasmInstruction 5 | from seewasm.arch.wasm.wasm import Wasm 6 | from seewasm.core.function import Function 7 | from seewasm.core.utils import bytecode_to_bytes 8 | from seewasm.engine.disassembler import Disassembler 9 | 10 | from wasm.compat import byte2int 11 | from wasm.formatter import format_instruction 12 | from wasm.modtypes import CodeSection 13 | from wasm.opcodes import OPCODE_MAP 14 | 15 | inst_namedtuple = namedtuple('Instruction', 'op imm len') 16 | 17 | 18 | class WasmDisassembler(Disassembler): 19 | 20 | def __init__(self, bytecode=None): 21 | Disassembler.__init__(self, asm=Wasm(), bytecode=bytecode) 22 | 23 | def disassemble_opcode(self, bytecode=None, offset=0, nature_offset=0): 24 | ''' 25 | based on decode_bytecode() 26 | https://github.com/athre0z/wasm/blob/master/wasm/decode.py 27 | 28 | ''' 29 | 30 | bytecode_wnd = memoryview(bytecode) 31 | bytecode_idx = 0 32 | opcode_id = byte2int(bytecode_wnd[bytecode_idx]) 33 | opcode_size = 1 34 | 35 | bytecode_idx += 1 36 | if opcode_id == 0xfc: 37 | opcode_id = (opcode_id << 8) | byte2int(bytecode_wnd[bytecode_idx]) 38 | if opcode_id == 0xfc0a: # memory.copy 39 | opcode_size = 4 40 | elif opcode_id == 0xfc0b: # memory.fill 41 | opcode_size = 3 42 | # default value 43 | # opcode:(mnemonic/name, imm_struct, pops, pushes, description) 44 | invalid = ('INVALID', 0, 0, 0, 'Unknown opcode') 45 | name, imm_struct, pops, pushes, description = \ 46 | self.asm.table.get(opcode_id, invalid) 47 | 48 | operand_size = 0 49 | operand = None 50 | operand_interpretation = None 51 | 52 | if imm_struct is not None: 53 | assert not isinstance(imm_struct, int), f"imm_struct is int, most likely encountered unsupported inst.\nname: {name}\nimm_struct: {imm_struct}\npops: {pops} pushes: {pushes}\ndesc: {description}\nopcode_id: {hex(opcode_id)}" 54 | operand_size, operand, _ = imm_struct.from_raw( 55 | None, bytecode_wnd[bytecode_idx:]) 56 | insn = inst_namedtuple( 57 | OPCODE_MAP[opcode_id], operand, bytecode_idx + operand_size) 58 | operand_interpretation = format_instruction(insn) 59 | insn_byte = bytecode_wnd[:bytecode_idx + operand_size].tobytes() 60 | instruction = WasmInstruction( 61 | opcode_id, opcode_size, name, imm_struct, operand_size, insn_byte, pops, pushes, 62 | description, operand_interpretation=operand_interpretation, 63 | offset=offset, nature_offset=nature_offset) 64 | # print('%d %s' % (offset, str(instruction))) 65 | return instruction 66 | 67 | def disassemble(self, bytecode=None, offset=0, nature_offset=0, 68 | r_format='list'): 69 | """Disassemble WASM bytecode 70 | 71 | :param bytecode: bytecode sequence 72 | :param offset: start offset 73 | :param r_format: output format ('list'/'text'/'reverse') 74 | :type bytecode: bytes, str 75 | :type offset: int 76 | :type r_format: list, str, dict 77 | :return: dissassembly result depending of r_format 78 | :rtype: list, str, dict 79 | """ 80 | 81 | return super().disassemble(bytecode, offset, nature_offset, r_format) 82 | 83 | def extract_functions_code(self, module_bytecode): 84 | functions = list() 85 | mod_iter = iter(decode_module(module_bytecode)) 86 | _, _ = next(mod_iter) 87 | sections = list(mod_iter) 88 | 89 | # iterate over all section 90 | # code_data = [cur_sec_data for cur_sec, cur_sec_data in sections if isinstance(cur_sec_data.get_decoder_meta()['types']['payload'], CodeSection)][0] 91 | for cur_sec, cur_sec_data in sections: 92 | sec = cur_sec_data.get_decoder_meta()['types']['payload'] 93 | if isinstance(sec, CodeSection): 94 | code_data = cur_sec_data 95 | break 96 | if not code_data: 97 | raise ValueError('No functions/codes in the module') 98 | for idx, func in enumerate(code_data.payload.bodies): 99 | instructions = self.disassemble(func.code.tobytes()) 100 | cur_function = Function(0, instructions[0]) 101 | cur_function.instructions = instructions 102 | 103 | functions.append(cur_function) 104 | return functions 105 | 106 | def disassemble_module( 107 | self, module_bytecode=None, offset=0, r_format='list'): 108 | 109 | bytecode = bytecode_to_bytes(module_bytecode) 110 | 111 | functions = self.extract_functions_code(bytecode[offset:]) 112 | self.instructions = [f.instructions for f in functions] 113 | 114 | # return instructions 115 | if r_format == 'list': 116 | return self.instructions 117 | elif r_format == 'text': 118 | text = '' 119 | for index, func in enumerate(functions): 120 | text += ('func %d\n' % index) 121 | text += ('\n'.join(map(str, func.instructions))) 122 | text += ('\n\n') 123 | return text 124 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/exceptions.py: -------------------------------------------------------------------------------- 1 | # This file defines our own exceptions 2 | NO_EXIT = -99 3 | INVALIDMEMORY = -2 4 | ASSERT_FAIL = -3 5 | 6 | 7 | class UnsupportZ3TypeError(Exception): 8 | """ 9 | used in `utils.py` 10 | indicating that the variable type is not in ['i32', 'i64', 'f32', 'f64'] 11 | """ 12 | pass 13 | 14 | 15 | class UninitializedLocalVariableError(Exception): 16 | """ 17 | used in `emulator.py` 18 | indicating the local variable is not initialized before retriving 19 | """ 20 | pass 21 | 22 | 23 | class UnsupportGlobalTypeError(Exception): 24 | """ 25 | used in `emulator.py` 26 | indicating the unsupport global type encoutering global.get 27 | """ 28 | pass 29 | 30 | 31 | class UnsupportInstructionError(Exception): 32 | """ 33 | used in `emulator.py` 34 | indicating the unsupport instructions 35 | """ 36 | pass 37 | 38 | 39 | class NotDeterminedRetValError(Exception): 40 | """ 41 | indicateing the return value is bool but cannot be determined as True or False 42 | """ 43 | pass 44 | 45 | 46 | class UninitializedStateError(Exception): 47 | """ 48 | indicateing the state is not initialized before emulate_one_function 49 | """ 50 | pass 51 | 52 | 53 | class MemoryLoadError(Exception): 54 | """ 55 | indicating the memory load error 56 | """ 57 | pass 58 | 59 | 60 | class UnsupportExternalFuncError(Exception): 61 | """ 62 | indicating the library function is not emulated by us 63 | """ 64 | pass 65 | 66 | 67 | class UnexpectedDataType(Exception): 68 | """ 69 | Typically raised if there is a `if-elif-else` statement 70 | depending on the data type 71 | """ 72 | pass 73 | 74 | 75 | class ProcSuccessTermination(Exception): 76 | """ 77 | Indicate the process is successfully terminated 78 | """ 79 | 80 | def __init__(self, value): 81 | self.value = value 82 | 83 | # __str__ is to print() the value 84 | def __str__(self): 85 | return (repr(self.value)) 86 | 87 | 88 | class ProcFailTermination(Exception): 89 | """ 90 | Indicate the process is failedly terminated 91 | """ 92 | 93 | def __init__(self, value): 94 | self.value = value 95 | 96 | # __str__ is to print() the value 97 | def __str__(self): 98 | return (repr(self.value)) 99 | 100 | class HaltTermination(Exception): 101 | def __init__(self): 102 | pass 103 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/format.py: -------------------------------------------------------------------------------- 1 | # It is for formatting 2 | 3 | import re 4 | 5 | from seewasm.arch.wasm.constant import LANG_TYPE 6 | 7 | 8 | def format_func_name(name, param_str, return_str): 9 | result = '{} '.format(return_str) if return_str else '' 10 | return ('{}{}({})'.format(result, name, param_str)) 11 | 12 | 13 | def format_bb_name(function_id, offset): 14 | return ('block_%x_%x' % (function_id, offset)) 15 | 16 | 17 | def format_kind_function(f_type): 18 | return f_type 19 | 20 | 21 | def format_kind_table(element_type, flags, initial, maximum): 22 | return {'element_type': LANG_TYPE.get(element_type), 23 | 'limits_flags': flags, 24 | 'limits_initial': initial, 25 | 'limits_maximum': maximum} 26 | 27 | 28 | def format_kind_memory(flags, initial, maximum): 29 | return {'limits_flags': flags, 30 | 'limits_initial': initial, 31 | 'limits_maximum': maximum} 32 | 33 | 34 | def format_kind_global(mutability, content_type, current_instruction): 35 | # leave mutability temporarily 36 | return [content_type, current_instruction] 37 | 38 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instruction.py: -------------------------------------------------------------------------------- 1 | # It will parse each instructions in Wasm 2 | 3 | from seewasm.arch.wasm.wasm import _groups 4 | from seewasm.core.instruction import Instruction 5 | 6 | 7 | class WasmInstruction(Instruction): 8 | """Wasm Instruction 9 | TODO 10 | 11 | """ 12 | 13 | def __init__( 14 | self, opcode, opcode_size, name, imm_struct, operand_size, insn_byte, pops, 15 | pushes, description, operand_interpretation=None, offset=0, 16 | nature_offset=0): 17 | """ TODO """ 18 | self.opcode = opcode 19 | self.opcode_size = opcode_size 20 | self.offset = offset 21 | self.nature_offset = nature_offset 22 | self.name = name 23 | self.description = description 24 | self.operand_size = operand_size 25 | if len(insn_byte) > 1: 26 | # Immediate operand if any 27 | self.operand = insn_byte[-operand_size:] 28 | else: 29 | self.operand = None 30 | # specific interpretation of operand value 31 | self.operand_interpretation = operand_interpretation 32 | self.insn_byte = insn_byte 33 | self.pops = pops 34 | self.pushes = pushes 35 | self.imm_struct = imm_struct 36 | self.xref = list() 37 | self.ssa = None 38 | # which basic block locates in 39 | self.cur_bb = '' 40 | 41 | def __eq__(self, other): 42 | """ Instructions are equal if all features match """ 43 | return self.opcode == other.opcode and \ 44 | self.name == other.name and \ 45 | self.offset == other.offset and \ 46 | self.insn_byte == other.insn_byte and \ 47 | self.operand_size == other.operand_size and \ 48 | self.pops == other.pops and \ 49 | self.pushes == other.pushes and \ 50 | self.operand_interpretation == other.operand_interpretation and \ 51 | self.description == other.description 52 | 53 | def __str__(self): 54 | """ String representation of the instruction """ 55 | if self.operand: 56 | return self.operand_interpretation 57 | # elif self.operand: 58 | # return self.name + str(self.operand) 59 | else: 60 | return self.name 61 | 62 | @property 63 | def group(self): 64 | """ Instruction classification per group """ 65 | last_class = _groups.get(0) 66 | for k, v in _groups.items(): 67 | if self.opcode >= k: 68 | last_class = v 69 | else: 70 | return last_class 71 | return last_class 72 | 73 | @property 74 | def is_control(self): 75 | return self.group == 'Control' 76 | 77 | @property 78 | def is_parametric(self): 79 | return self.group == 'Parametric' 80 | 81 | @property 82 | def is_variable(self): 83 | return self.group == 'Variable' 84 | 85 | @property 86 | def is_memory(self): 87 | return self.group == 'Memory' 88 | 89 | @property 90 | def is_constant(self): 91 | return self.group == 'Constant' 92 | 93 | @property 94 | def is_logical_i32(self): 95 | return self.group == 'Logical_i32' 96 | 97 | @property 98 | def is_logical_i64(self): 99 | return self.group == 'Logical_i64' 100 | 101 | @property 102 | def is_logical_f32(self): 103 | return self.group == 'Logical_f32' 104 | 105 | @property 106 | def is_logical_f64(self): 107 | return self.group == 'Logical_f64' 108 | 109 | @property 110 | def is_arithmetic_i32(self): 111 | return self.group == 'Arithmetic_i32' 112 | 113 | @property 114 | def is_bitwise_i32(self): 115 | return self.group == 'Bitwise_i32' 116 | 117 | @property 118 | def is_arithmetic_i64(self): 119 | return self.group == 'Arithmetic_i64' 120 | 121 | @property 122 | def is_bitwise_i64(self): 123 | return self.group == 'Bitwise_i64' 124 | 125 | @property 126 | def is_arithmetic_f32(self): 127 | return self.group == 'Arithmetic_f32' 128 | 129 | @property 130 | def is_arithmetic_f64(self): 131 | return self.group == 'Arithmetic_f64' 132 | 133 | @property 134 | def is_conversion(self): 135 | return self.group == 'Conversion' 136 | 137 | @property 138 | def is_branch_conditional(self): 139 | """ Return True if the instruction is a conditional jump """ 140 | return self.name in {'br_if', 'br_table', 'if'} 141 | 142 | @property 143 | def is_branch_unconditional(self): 144 | """ Return True if the instruction is a unconditional jump """ 145 | return self.name in {'br'} 146 | 147 | @property 148 | def is_call(self): 149 | """ True if the instruction is a call instruction """ 150 | return self.name in {'call', 'call_indirect'} 151 | 152 | @property 153 | def is_branch(self): 154 | return self.is_branch_conditional or self.is_branch_unconditional 155 | 156 | @property 157 | def is_halt(self): 158 | """ Return True if the instruction is a branch terminator """ 159 | return self.name in {'unreachable', 'return'} 160 | 161 | @property 162 | def is_terminator(self): 163 | """ True if the instruction is a basic block terminator """ 164 | return self.is_branch or self.is_halt 165 | 166 | @property 167 | def is_block_starter(self): 168 | """ Return True if the instruction is a basic block starter """ 169 | return self.name in {'block', 'loop', 'if', 'else'} 170 | 171 | @property 172 | def is_block_terminator(self): 173 | """ Return True if the instruction is a basic block terminator """ 174 | return self.name in {'else', 'end'} 175 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/BitwiseInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the bitwise related instructions 2 | 3 | import logging 4 | 5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 6 | from z3 import (BitVec, BitVecVal, LShR, RotateLeft, RotateRight, is_bool, 7 | is_bv, is_false, is_true, simplify) 8 | 9 | # Helper map for the bit sizes of different WebAssembly data types 10 | helper_map = { 11 | 'i32': 32, # 32-bit integer 12 | 'i64': 64, # 64-bit integer 13 | } 14 | 15 | 16 | class BitwiseInstructions: 17 | """ 18 | Class to emulate bitwise operations for WebAssembly instructions using Z3 symbolic execution. 19 | """ 20 | def __init__(self, instr_name, instr_operand, _): 21 | """ 22 | Initialize the instruction with its name and operand. 23 | 24 | :param instr_name: The WebAssembly instruction name (e.g., "i32.and") 25 | :param instr_operand: Operand for the instruction (not used in this implementation) 26 | """ 27 | self.instr_name = instr_name 28 | self.instr_operand = instr_operand 29 | 30 | # TODO overflow check in this function? 31 | def emulate(self, state): 32 | """ 33 | Emulate the bitwise instruction by performing symbolic operations on two arguments 34 | from the symbolic stack, and push the result back onto the stack. 35 | 36 | :param state: The current execution state, including the symbolic stack. 37 | :return: The modified state after emulation. 38 | """ 39 | 40 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 41 | instr_type = self.instr_name[:3] 42 | 43 | # Handle the case where the arguments are BoolRef types (Boolean), converting them to BitVec 44 | if is_bool(arg1): 45 | arg1 = BitVec(str(arg1), helper_map[instr_type]) 46 | logging.warning( 47 | f"[!] In `BitwiseInstructions.py`, arg1 is BoolRef, translated to BitVec which may lead to some information loss") 48 | if is_bool(arg2): 49 | arg2 = BitVec(str(arg2), helper_map[instr_type]) 50 | logging.warning( 51 | f"[!] In `BitwiseInstructions.py`, arg2 is BoolRef, translated to BitVec which may lead to some information loss") 52 | 53 | # Ensure that both arguments match the expected size for the WebAssembly type (i32 or i64) 54 | assert arg1.size( 55 | ) == helper_map[instr_type], f'arg1 size is {arg1.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction' 56 | assert arg2.size( 57 | ) == helper_map[instr_type], f'arg2 size is {arg2.size()} instead of {helper_map[instr_type]} in do_emulate_bitwise_instruction' 58 | 59 | # Determine the bitwise operation to perform based on the instruction name 60 | if '.and' in self.instr_name: 61 | result = simplify(arg1 & arg2) # Bitwise AND operation 62 | elif '.or' in self.instr_name: 63 | result = simplify(arg1 | arg2) # Bitwise OR operation 64 | elif '.xor' in self.instr_name: 65 | result = simplify(arg1 ^ arg2) # Bitwise XOR operation 66 | elif '.shr_s' in self.instr_name: 67 | result = simplify(arg2 >> arg1) # Signed right shift (arithmetic shift) 68 | elif '.shr_u' in self.instr_name: 69 | result = simplify(LShR(arg2, arg1)) # Unsigned right shift (logical shift) 70 | elif '.shl' in self.instr_name: 71 | result = simplify(arg2 << arg1) # Left shift 72 | elif '.rotl' in self.instr_name: 73 | result = simplify(RotateLeft(arg2, arg1)) # Rotate left 74 | elif '.rotr' in self.instr_name: 75 | result = simplify(RotateRight(arg2, arg1)) # Rotate right 76 | else: 77 | raise UnsupportInstructionError 78 | 79 | # Handle the case where the result is a boolean value 80 | if is_bool(result): 81 | if is_true(result): 82 | result = BitVecVal(1, 32) 83 | elif is_false(result): 84 | result = BitVecVal(0, 32) 85 | 86 | assert is_bv(result) or is_bool( 87 | result), f"in bitwise instruction, the value to be pushed is {type(result)} instead of BitVec or Bool" 88 | 89 | state.symbolic_stack.append(result) 90 | 91 | return [state] 92 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ConstantInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the constant related instructions 2 | 3 | import re 4 | from struct import unpack 5 | 6 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 7 | from z3 import BitVecVal, Float32, Float64, FPVal 8 | 9 | 10 | class ConstantInstructions: 11 | def __init__(self, instr_name, instr_operand, instr_string): 12 | self.instr_name = instr_name 13 | self.instr_operand = instr_operand 14 | self.instr_str = instr_string 15 | 16 | # TODO overflow check in this function? 17 | def emulate(self, state): 18 | # there are two types of const: i and f, like: 19 | # i32.const 0 20 | # f64.const 0x1.9p+6 (;=100;) 21 | # thus we have to deal with the different situations 22 | mnemonic = self.instr_str.split(' ')[0] 23 | const_num = self.instr_str.split(' ')[-1] 24 | const_type_prefix, _ = mnemonic.split('.') 25 | 26 | if const_type_prefix == 'i32': 27 | state.symbolic_stack.append(BitVecVal(const_num, 32)) 28 | elif const_type_prefix == 'i64': 29 | state.symbolic_stack.append(BitVecVal(const_num, 64)) 30 | elif const_type_prefix == 'f32' or const_type_prefix == 'f64': 31 | # extract float number 100 from (;=100;) 32 | # TODO: need to be verified 33 | num_found = re.search(';=([0-9.-]+);', const_num) 34 | if num_found: 35 | float_num = num_found.group(1) 36 | if const_type_prefix == 'f32': 37 | state.symbolic_stack.append(FPVal(float_num, Float32())) 38 | else: 39 | state.symbolic_stack.append(FPVal(float_num, Float64())) 40 | elif const_num[:2] == '0x': 41 | # remove '0x' prefix 42 | const_num = const_num[2:] 43 | # extend with '0' till const_num length is 4 bytes 44 | current_const_num_length = len(const_num) 45 | 46 | need_zero = (8 - current_const_num_length) if const_type_prefix == 'f32' else ( 47 | 16 - current_const_num_length) 48 | const_num = '0' * need_zero + const_num 49 | 50 | if const_type_prefix == 'f32': 51 | float_num = unpack('!f', bytes.fromhex(const_num))[0] 52 | state.symbolic_stack.append(FPVal(float_num, Float32())) 53 | else: 54 | float_num = unpack('!d', bytes.fromhex(const_num))[0] 55 | state.symbolic_stack.append(FPVal(float_num, Float64())) 56 | else: 57 | raise UnsupportInstructionError 58 | else: 59 | raise UnsupportInstructionError 60 | 61 | return [state] 62 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/LogicalInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the logical related instructions 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 4 | from z3 import (UGE, UGT, ULE, ULT, BitVecVal, If, fpEQ, fpGEQ, fpGT, fpLEQ, 5 | fpLT, fpNEQ, is_bv, is_false, is_true, simplify) 6 | 7 | helper_map = { 8 | 'i32': 32, 9 | 'i64': 64, 10 | 'f32': [8, 24], 11 | 'f64': [11, 53] 12 | } 13 | 14 | 15 | class LogicalInstructions: 16 | def __init__(self, instr_name, instr_operand, _): 17 | self.instr_name = instr_name 18 | self.instr_operand = instr_operand 19 | 20 | # TODO overflow check in this function? 21 | def emulate(self, state): 22 | def do_emulate_logical_int_instruction(state): 23 | instr_type = self.instr_name[:3] 24 | if 'eqz' in self.instr_name: 25 | arg0 = state.symbolic_stack.pop() 26 | 27 | assert arg0.size( 28 | ) == helper_map[instr_type], f"in `eqz` the argument popped size is {arg0.size()} instead of {helper_map[instr_type]}" 29 | 30 | result = arg0 == 0 31 | else: 32 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 33 | 34 | assert is_bv(arg1) and is_bv( 35 | arg2), f"in `logical` instruction, arg1 or arg2 type is wrong instead of BitVec" 36 | 37 | if 'eq' in self.instr_name: 38 | result = arg1 == arg2 39 | elif 'ne' in self.instr_name: 40 | result = arg1 != arg2 41 | elif 'lt_s' in self.instr_name: 42 | result = arg2 < arg1 43 | elif 'lt_u' in self.instr_name: 44 | result = ULT(arg2, arg1) 45 | elif 'gt_s' in self.instr_name: 46 | result = arg2 > arg1 47 | elif 'gt_u' in self.instr_name: 48 | result = UGT(arg2, arg1) 49 | elif 'le_s' in self.instr_name: 50 | result = arg2 <= arg1 51 | elif 'le_u' in self.instr_name: 52 | result = ULE(arg2, arg1) 53 | elif 'ge_s' in self.instr_name: 54 | result = arg2 >= arg1 55 | elif 'ge_u' in self.instr_name: 56 | result = UGE(arg2, arg1) 57 | else: 58 | raise UnsupportInstructionError 59 | 60 | # try to simplify result and insert 1 or 0 directly, instead of an ite statement 61 | result = simplify(result) 62 | if is_true(result): 63 | state.symbolic_stack.append(BitVecVal(1, 32)) 64 | elif is_false(result): 65 | state.symbolic_stack.append(BitVecVal(0, 32)) 66 | else: 67 | state.symbolic_stack.append( 68 | If(result, BitVecVal(1, 32), BitVecVal(0, 32))) 69 | 70 | return [state] 71 | 72 | def do_emulate_logical_float_instruction(state): 73 | arg1, arg2 = state.symbolic_stack.pop(), state.symbolic_stack.pop() 74 | instr_type = self.instr_name[:3] 75 | 76 | assert arg1.ebits() == helper_map[instr_type][0] and arg1.sbits( 77 | ) == helper_map[instr_type][1], 'emul_logical_f_instr arg1 type mismatch' 78 | assert arg2.ebits() == helper_map[instr_type][0] and arg2.sbits( 79 | ) == helper_map[instr_type][1], 'emul_logical_f_instr arg2 type mismatch' 80 | 81 | if 'eq' in self.instr_name: 82 | result = fpEQ(arg1, arg2) 83 | elif 'ne' in self.instr_name: 84 | result = fpNEQ(arg1, arg2) 85 | elif 'lt' in self.instr_name: 86 | result = fpLT(arg2, arg1) 87 | elif 'le' in self.instr_name: 88 | result = fpLEQ(arg2, arg1) 89 | elif 'gt' in self.instr_name: 90 | result = fpGT(arg2, arg1) 91 | elif 'ge' in self.instr_name: 92 | result = fpGEQ(arg2, arg1) 93 | else: 94 | raise UnsupportInstructionError 95 | 96 | # try to simplify result and insert 1 or 0 directly, instead of an ite statement 97 | result = simplify(result) 98 | if is_true(result): 99 | state.symbolic_stack.append(BitVecVal(1, 32)) 100 | elif is_false(result): 101 | state.symbolic_stack.append(BitVecVal(0, 32)) 102 | else: 103 | state.symbolic_stack.append( 104 | If(result, BitVecVal(1, 32), BitVecVal(0, 32))) 105 | 106 | return [state] 107 | 108 | op_type = self.instr_name[:1] 109 | if op_type == 'i': 110 | return do_emulate_logical_int_instruction(state) 111 | else: 112 | return do_emulate_logical_float_instruction(state) 113 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/MemoryInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the memory related instructions 2 | 3 | import re 4 | 5 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 6 | from seewasm.arch.wasm.memory import (insert_symbolic_memory, 7 | lookup_symbolic_memory_data_section) 8 | from seewasm.arch.wasm.utils import getConcreteBitVec 9 | from z3 import (BitVecVal, Extract, Float32, Float64, SignExt, ZeroExt, 10 | fpBVToFP, fpToIEEEBV, is_bv_value, simplify) 11 | 12 | memory_count = 2 13 | memory_step = 2 14 | 15 | 16 | class MemoryInstructions: 17 | def __init__(self, instr_name, instr_operand, instr_string): 18 | self.instr_name = instr_name 19 | self.instr_operand = instr_operand 20 | self.instr_str = instr_string 21 | 22 | def emulate(self, state, data_section): 23 | global memory_count, memory_step 24 | if self.instr_name == 'current_memory': 25 | state.symbolic_stack.append(BitVecVal(memory_count, 32)) 26 | elif self.instr_name == 'grow_memory': 27 | prev_size = memory_count 28 | memory_count += memory_step 29 | state.symbolic_stack.append(BitVecVal(prev_size, 32)) 30 | elif self.instr_name == "memory.copy": 31 | # memory.copy 32 | # The instruction has the signature [i32 i32 i32] -> []. The parameters are, in order: 33 | # top-0: Number of bytes to copy 34 | # top-1: Source address to copy from 35 | # top-2: Destination address to copy to 36 | # example: 37 | # ;; Copy data in default memory from [100, 125] to [50, 75] 38 | # i32.const 50 ;; Destination address to copy to (top-2) 39 | # i32.const 100 ;; Source address to copy from (top-1) 40 | # i32.const 25 ;; Number of bytes to copy (top-0) 41 | # memory.copy ;; Copy memory 42 | len_v = state.symbolic_stack.pop().as_long() 43 | src_addr = state.symbolic_stack.pop().as_long() 44 | dest_addr = state.symbolic_stack.pop().as_long() 45 | # copy memory from src to dst 46 | vlis = [ 47 | lookup_symbolic_memory_data_section( 48 | state.symbolic_memory, data_section, src_addr + i, 1) 49 | for i in range(len_v)] 50 | for i, v in enumerate(vlis): 51 | state.symbolic_memory = insert_symbolic_memory( 52 | state.symbolic_memory, dest_addr + i, 1, v) 53 | print(f"memory.copy: src_addr={src_addr}, dest_addr={dest_addr}, len={len_v}") 54 | elif self.instr_name == "memory.fill": 55 | # memory.fill 56 | # The instruction has the signature [i32 i32 i32] -> []. The parameters are, in order: 57 | # top-0: The number of bytes to update 58 | # top-1: The value to set each byte to (must be < 256) 59 | # top-2: The pointer to the region to update 60 | # example: 61 | # ;; Fill region at offset/range in default memory with 255 62 | # i32.const 200 ;; The pointer to the region to update (top-2) 63 | # i32.const 255 ;; The value to set each byte to (must be < 256) (top-1) 64 | # i32.const 100 ;; The number of bytes to update (top-0) 65 | # memory.fill ;; Fill default memory 66 | len_v = state.symbolic_stack.pop().as_long() 67 | val = state.symbolic_stack.pop().as_long() 68 | addr = state.symbolic_stack.pop().as_long() 69 | print(f"memory.fill: addr={addr}, val={val}, len={len_v}") 70 | elif 'load' in self.instr_name: 71 | load_instr(self.instr_str, state, data_section) 72 | elif 'store' in self.instr_name: 73 | store_instr(self.instr_str, state) 74 | else: 75 | print('\nErr:\nUnsupported instruction: %s\n' % self.instr_name) 76 | raise UnsupportInstructionError 77 | 78 | return [state] 79 | 80 | 81 | def load_instr(instr, state, data_section): 82 | base = state.symbolic_stack.pop() 83 | # offset maybe int or hex 84 | try: 85 | offset = int(instr.split(' ')[2]) 86 | except ValueError: 87 | offset = int(instr.split(' ')[2], 16) 88 | addr = simplify(base + offset) 89 | 90 | if is_bv_value(addr): 91 | addr = addr.as_long() 92 | 93 | # determine how many bytes should be loaded 94 | # the dict is like {'8': 1} 95 | bytes_length_mapping = {str(k): k // 8 for k in range(8, 65, 8)} 96 | instr_name = instr.split(' ')[0] 97 | if len(instr_name) == 8: 98 | load_length = bytes_length_mapping[instr_name[1:3]] 99 | else: 100 | load_length = bytes_length_mapping[re.search( 101 | r"load([0-9]+)\_", instr_name).group(1)] 102 | 103 | val = lookup_symbolic_memory_data_section( 104 | state.symbolic_memory, data_section, addr, load_length) 105 | 106 | if val.size() != 8 * load_length: 107 | # we assume the memory are filled by 0 initially 108 | val = ZeroExt(8 * load_length - val.size(), val) 109 | 110 | if val is None: 111 | exit(f"the loaded value should not be None") 112 | # val = BitVec(f'load{load_length}*({addr})', 8*load_length) 113 | 114 | # cast to other type of bit vector 115 | float_mapping = { 116 | 'f32': Float32, 117 | 'f64': Float64, 118 | } 119 | if len(instr_name) == 8 and instr_name[0] == "f": 120 | val = simplify(fpBVToFP(val, float_mapping[instr_name[:3]]())) 121 | elif instr_name[-2] == "_": 122 | if instr_name[-1] == "s": # sign extend 123 | val = simplify( 124 | SignExt(int(instr_name[1: 3]) - load_length * 8, val)) 125 | else: 126 | val = simplify( 127 | ZeroExt(int(instr_name[1: 3]) - load_length * 8, val)) 128 | 129 | # if can not load from the memory area 130 | if val is not None: 131 | state.symbolic_stack.append(val) 132 | else: 133 | state.symbolic_stack.append(getConcreteBitVec( 134 | instr_name[:3], f'load_{instr_name[:3]}*({str(addr)})')) 135 | 136 | 137 | # deal with store instruction 138 | def store_instr(instr, state): 139 | # offset may be int or hex 140 | try: 141 | offset = int(instr.split(' ')[2]) 142 | except ValueError: 143 | offset = int(instr.split(' ')[2], 16) 144 | 145 | val, base = state.symbolic_stack.pop(), state.symbolic_stack.pop() 146 | addr = simplify(base + offset) 147 | 148 | # change addr's type to int if possible 149 | # or it will be the BitVecRef 150 | if is_bv_value(addr): 151 | addr = addr.as_long() 152 | 153 | # determine how many bytes should be stored 154 | # the dict is like {'8': 1} 155 | bytes_length_mapping = {str(k): k // 8 for k in range(8, 65, 8)} 156 | instr_name = instr.split(' ')[0] 157 | if len(instr_name) == 9: 158 | if instr_name[0] == 'f': 159 | val = fpToIEEEBV(val) 160 | state.symbolic_memory = insert_symbolic_memory( 161 | state.symbolic_memory, addr, bytes_length_mapping[instr_name[1:3]], val) 162 | else: 163 | stored_length = bytes_length_mapping[re.search( 164 | r"store([0-9]+)", instr_name).group(1)] 165 | val = simplify(Extract(stored_length * 8 - 1, 0, val)) 166 | state.symbolic_memory = insert_symbolic_memory( 167 | state.symbolic_memory, addr, stored_length, val) 168 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/ParametricInstructions.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError 4 | from seewasm.arch.wasm.utils import one_time_query_cache 5 | from z3 import Not, is_bool, is_bv, is_false, is_true, simplify, unsat 6 | 7 | 8 | class ParametricInstructions: 9 | def __init__(self, instr_name, instr_operand, _): 10 | self.instr_name = instr_name 11 | self.instr_operand = instr_operand 12 | 13 | def emulate(self, state): 14 | if self.instr_name == 'drop': 15 | state.symbolic_stack.pop() 16 | return [state] 17 | elif self.instr_name == 'select': # select instruction 18 | arg0, arg1, arg2 = state.symbolic_stack.pop( 19 | ), state.symbolic_stack.pop(), state.symbolic_stack.pop() 20 | assert is_bv(arg0) or is_bool( 21 | arg0), f"in select, arg0 type is {type(arg0)} instead of bv or bool" 22 | # mimic the br_if 23 | if is_bv(arg0): 24 | # NOTE: if arg0 is zero, return arg1, or arg2 25 | # ref: https://developer.mozilla.org/en-US/docs/WebAssembly/Reference/Control_flow/Select 26 | op = simplify(arg0 == 0) 27 | 28 | if is_true(op): 29 | state.symbolic_stack.append(arg1) 30 | return [state] 31 | elif is_false(op): 32 | state.symbolic_stack.append(arg2) 33 | return [state] 34 | elif not is_true(op) and not is_false(op): 35 | # these two flags are used to jump over unnecessary deepcopy 36 | no_need_true, no_need_false = False, False 37 | if unsat == one_time_query_cache(state.solver, op): 38 | no_need_true = True 39 | if unsat == one_time_query_cache(state.solver, Not(op)): 40 | no_need_false = True 41 | 42 | if no_need_true and no_need_false: 43 | pass 44 | elif not no_need_true and not no_need_false: 45 | new_state = deepcopy(state) 46 | 47 | state.solver.add(op) 48 | state.symbolic_stack.append(arg1) 49 | 50 | new_state.solver.add(Not(op)) 51 | new_state.symbolic_stack.append(arg2) 52 | 53 | return [state, new_state] 54 | else: 55 | if no_need_true: 56 | state.solver.add(Not(op)) 57 | state.symbolic_stack.append(arg2) 58 | else: 59 | state.solver.add(op) 60 | state.symbolic_stack.append(arg1) 61 | return [state] 62 | else: 63 | exit(f"select instruction error. op is {op}") 64 | else: 65 | raise UnsupportInstructionError 66 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/VariableInstructions.py: -------------------------------------------------------------------------------- 1 | # emulate the variable related instructions 2 | 3 | from seewasm.arch.wasm.exceptions import UnsupportInstructionError, UnsupportGlobalTypeError 4 | from z3 import BitVecVal, is_bv, is_bv_value 5 | 6 | 7 | class VariableInstructions: 8 | def __init__(self, instr_name, instr_operand, _): 9 | self.instr_name = instr_name 10 | self.instr_operand = instr_operand 11 | 12 | def emulate(self, state): 13 | # TODO 14 | # for go_samples.nosync/tinygo_main.wasm, the global.get operand would be prefixed by four \x80 15 | if self.instr_operand.startswith(b'\x80\x80\x80\x80'): 16 | self.instr_operand = self.instr_operand[4:] 17 | op = int.from_bytes(self.instr_operand, byteorder='little') 18 | 19 | if self.instr_name == 'get_local': 20 | if state.local_var.get(op, None) is not None: 21 | state.symbolic_stack.append(state.local_var[op]) 22 | else: 23 | state.symbolic_stack.append(state.local_var[op]) 24 | # raise UninitializedLocalVariableError 25 | elif self.instr_name == 'set_local': 26 | var = state.symbolic_stack.pop() 27 | state.local_var[op] = var 28 | elif self.instr_name == 'get_global': 29 | global_index = op 30 | global_operand = state.globals[global_index] 31 | 32 | if isinstance( 33 | global_operand, str) or isinstance( 34 | global_operand, int): 35 | state.symbolic_stack.append(BitVecVal(global_operand, 32)) 36 | elif is_bv(global_operand) or is_bv_value(global_operand): 37 | # the operand is a BitVecRef or BitVecNumRef 38 | state.symbolic_stack.append(global_operand) 39 | else: 40 | raise UnsupportGlobalTypeError 41 | elif self.instr_name == 'set_global': 42 | global_operand = state.symbolic_stack.pop() 43 | global_index = op 44 | 45 | state.globals[global_index] = global_operand 46 | elif self.instr_name == 'tee_local': 47 | var = state.symbolic_stack[-1] 48 | state.local_var[op] = var 49 | else: 50 | raise UnsupportInstructionError 51 | return [state] 52 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/instructions/__init__.py: -------------------------------------------------------------------------------- 1 | from .ArithmeticInstructions import * 2 | from .BitwiseInstructions import * 3 | from .ConstantInstructions import * 4 | from .ControlInstructions import * 5 | from .ConversionInstructions import * 6 | from .LogicalInstructions import * 7 | from .MemoryInstructions import * 8 | from .ParametricInstructions import * 9 | from .VariableInstructions import * 10 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/lib/utils.py: -------------------------------------------------------------------------------- 1 | # this is the helper function which are only used in lib folder 2 | 3 | from z3 import BitVecVal, is_bv, is_bv_value 4 | 5 | from seewasm.arch.wasm.configuration import Configuration 6 | from seewasm.arch.wasm.memory import (insert_symbolic_memory, 7 | lookup_symbolic_memory_data_section) 8 | 9 | MODELED_FUNCS = { 10 | 'c': 11 | {'__small_printf', 'abs', 'atof', 'atoi', 'exp', 'getchar', 12 | 'iprintf', 'printf', 'putchar', 'puts', 'scanf', 'swap', 13 | 'system', 'emscripten_resize_heap', 'fopen', 'vfprintf', 14 | 'open', 'exit', 'setlocale', 'hard_locale', 'strstr'}, 15 | 'go': {'fmt.Scanf', 'fmt.Printf', 'runtime.divideByZeroPanic', 'runtime.lookupPanic', 'runtime.nilPanic' 16 | 'runtime.slicePanic', 'runtime.sliceToArrayPointerPanic', 'runtime.unsafeSlicePanic', 'runtime.chanMakePanic', 17 | 'runtime.negativeShiftPanic', 'runtime.blockingPanic', 'runtime.calculateHeapAddresses', 'memset', 'runtime.alloc', 'memcpy', 18 | 'syscall/js.valueGet', 'runtime.putchar'}, 19 | 'rust': {}, 20 | 'wasi': 21 | {'args_sizes_get', 'args_get', 'environ_sizes_get', 22 | 'fd_advise', 'fd_fdstat_get', 'fd_tell', 'fd_seek', 23 | 'fd_close', 'fd_read', 'fd_write', 'proc_exit', 24 | 'fd_prestat_get', 'fd_prestat_dir_name', 'path_open'}, } 25 | 26 | 27 | def is_modeled(func_name, specify_lang=None): 28 | if specify_lang: 29 | return func_name in MODELED_FUNCS[specify_lang] 30 | else: 31 | return func_name in MODELED_FUNCS['wasi'] or func_name in MODELED_FUNCS[Configuration.get_source_type()] 32 | 33 | 34 | def _extract_params(param_str, state): 35 | """ 36 | Return a list of elements, which are the arguments of the given import function. 37 | Note that, the order will be reversed. 38 | For example, if the signature of function foo is: foo (a, b), the returned arguments will be [b, a] 39 | """ 40 | param_cnt = len(param_str.split(" ")) 41 | params = [] 42 | for _ in range(param_cnt): 43 | params.append(state.symbolic_stack.pop()) 44 | 45 | # concretize 46 | params_result = [] 47 | for i in params: 48 | if is_bv_value(i): 49 | params_result.append(i.as_long()) 50 | else: 51 | params_result.append(i) 52 | 53 | return params_result 54 | 55 | 56 | def _storeN(state, dest, val, len_in_bytes): 57 | if not is_bv(val): 58 | state.symbolic_memory = insert_symbolic_memory( 59 | state.symbolic_memory, dest, len_in_bytes, 60 | BitVecVal(val, len_in_bytes * 8)) 61 | else: 62 | state.symbolic_memory = insert_symbolic_memory( 63 | state.symbolic_memory, dest, len_in_bytes, val) 64 | 65 | 66 | def _loadN(state, data_section, dest, len_in_bytes): 67 | val = lookup_symbolic_memory_data_section( 68 | state.symbolic_memory, data_section, dest, len_in_bytes) 69 | if is_bv_value(val): 70 | val = val.as_long() 71 | return val 72 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/solver.py: -------------------------------------------------------------------------------- 1 | from z3 import Solver 2 | 3 | # from lab_solver import * 4 | 5 | 6 | class SMTSolver: 7 | def __new__(cls, designated_solver): 8 | if designated_solver == 'z3': 9 | return Solver() 10 | else: 11 | raise Exception("No SMT backend found") 12 | -------------------------------------------------------------------------------- /seewasm/arch/wasm/visualizator.py: -------------------------------------------------------------------------------- 1 | from seewasm.arch.wasm.configuration import Configuration 2 | from graphviz import Digraph 3 | 4 | 5 | def visualize(Graph, filename="wasm_ICFG.gv"): 6 | entry_func = Configuration.get_entry() 7 | entry_func_index_name = Graph.wasmVM.get_signature(entry_func)[0] 8 | entry_bb = Graph.func_to_bbs[entry_func_index_name][0] 9 | assert entry_bb.endswith('_0'), f"entry_bb ({entry_bb}) not ends with 0" 10 | 11 | g = Digraph(filename, filename=filename) 12 | g.attr(rankdir="TB") 13 | 14 | # construct a set consisting of edges (nodeA, nodeB, edge_type) 15 | visited = set() 16 | edges_set = set() 17 | stack = list() 18 | stack.append(entry_bb) 19 | while stack: 20 | bb = stack.pop() 21 | visited.add(bb) 22 | if bb in Graph.bbs_graph: 23 | for edge_type, succ_bb in Graph.bbs_graph[bb].items(): 24 | if succ_bb not in visited: 25 | edges_set.add((bb, succ_bb, edge_type)) 26 | stack.append(succ_bb) 27 | elif (bb, succ_bb, edge_type) not in edges_set: 28 | edges_set.add((bb, succ_bb, edge_type)) 29 | 30 | with g.subgraph(name='global') as c: 31 | # construct the graph 32 | for edge in edges_set: 33 | node_from, node_to, _ = edge 34 | c.node(node_from) 35 | c.node(node_to) 36 | c.edge(node_from, node_to) 37 | 38 | print("Rendering disabled on the server.") 39 | g.render(filename, view=False) -------------------------------------------------------------------------------- /seewasm/arch/wasm/vmstate.py: -------------------------------------------------------------------------------- 1 | # This file defines the `state` that will be passed within Wasm-SE 2 | from collections import defaultdict 3 | 4 | from seewasm.arch.wasm.configuration import Configuration 5 | from seewasm.arch.wasm.solver import SMTSolver 6 | from seewasm.arch.wasm.utils import (init_file_for_file_sys, 7 | readable_internal_func_name) 8 | from seewasm.engine.engine import VMstate 9 | from z3 import BitVecVal 10 | 11 | 12 | class WasmVMstate(VMstate): 13 | def __init__(self): 14 | # data structure: 15 | def local_default(): 16 | return BitVecVal(0, 32) 17 | self.symbolic_stack = [] 18 | self.symbolic_memory = {} 19 | self.local_var = defaultdict(local_default) 20 | self.globals = {} 21 | # instruction 22 | self.instr = "end" 23 | # current function name 24 | self.current_func_name = '' 25 | # current basic block's name, used in recursive process 26 | self.current_bb_name = '' 27 | # keep the operator and its speculated sign 28 | self.sign_mapping = defaultdict(bool) 29 | # context stack 30 | # whose element is 4-tuple: (func_name, stack, local, require_return) 31 | # TODO files buffer may need to maintained in context 32 | self.context_stack = [] 33 | 34 | self.args = "" 35 | 36 | # all items should be initialized by init_file_for_file_sys in utils 37 | self.file_sys = {} 38 | for fd in range(0, 3): 39 | self.file_sys[fd] = init_file_for_file_sys() 40 | self.file_sys[0]["name"] = "stdin" 41 | self.file_sys[0]["status"] = True 42 | self.file_sys[0]["flag"] = "r" 43 | self.file_sys[1]["name"] = "stdout" 44 | self.file_sys[1]["status"] = True 45 | self.file_sys[1]["flag"] = "w" 46 | self.file_sys[2]["name"] = "stderr" 47 | self.file_sys[2]["status"] = True 48 | self.file_sys[2]["flag"] = "w" 49 | 50 | # used by br_if instruction 51 | self.edge_type = '' 52 | # the corresponding solver 53 | self.solver = SMTSolver(Configuration.get_solver()) 54 | # the name of function that is called in call_indirect 55 | self.call_indirect_callee = '' 56 | 57 | def __str__(self): 58 | return f'''Current Func:\t{readable_internal_func_name(Configuration.get_func_index_to_func_name(), self.current_func_name)} 59 | Stack:\t\t{self.symbolic_stack} 60 | Local Var:\t{self.local_var} 61 | Global Var:\t{self.globals} 62 | Memory:\t\t{self.symbolic_memory} 63 | Constraints:\t{self.solver.assertions()}\n''' 64 | 65 | def details(self): 66 | raise NotImplementedError 67 | 68 | def __lt__(self, other): 69 | return False 70 | 71 | def __getstate__(self): 72 | return self.__dict__.copy() 73 | -------------------------------------------------------------------------------- /seewasm/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/core/__init__.py -------------------------------------------------------------------------------- /seewasm/core/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/core/__init__.pyc -------------------------------------------------------------------------------- /seewasm/core/basicblock.py: -------------------------------------------------------------------------------- 1 | class BasicBlock(object): 2 | """ 3 | The basic block in the CFG, consisting of instructions 4 | """ 5 | 6 | def __init__(self, start_offset=0x00, start_instr=None, 7 | name='block_default_name'): 8 | """ 9 | The properties of basic blocks 10 | 11 | Properties: 12 | start_offset: the `offset` of the first instruction 13 | start_instr: the first instruction of the current basic block 14 | name: the name of the basic block, whose naming style is "block_[func_index]_[start_offset]" 15 | end_offset: the `offset_end` of the last instruction 16 | end_instr: the last instruction 17 | 18 | Below are properties may be deprecated in the future 19 | states: not clear 20 | function_name: its corresponding function's name 21 | """ 22 | self.start_offset = start_offset 23 | self.start_instr = start_instr 24 | self.name = name 25 | self.end_offset = None 26 | self.end_instr = None 27 | self.instructions = list() 28 | 29 | # may be deprecated in the future 30 | self.states = [] 31 | self.function_name = "unknown" 32 | 33 | @property 34 | def size(self): 35 | return self.end_offset - self.start_offset 36 | 37 | def __str__(self): 38 | out = '' 39 | line = '' 40 | line = str(self.start_offset) + ': ' + str(self.name) + '\n' 41 | line += 'start_instr = ' + str(self.start_instr.name) + '\n' 42 | line += 'size = ' + str(self.size) + '\n' 43 | line += 'end_offset = ' + str(self.end_offset) + '\n' 44 | line += 'end_instr = ' + str(self.end_instr.name) + '\n' 45 | line += 'function_name = ' + str(self.function_name) + '\n' 46 | out += line + '\n\n' 47 | return out 48 | 49 | def instructions_details(self, format='hex'): 50 | out = '' 51 | line = '' 52 | for i in self.instructions: 53 | line = '%x: ' % i.offset 54 | if i.operand is not None and not i.xref: 55 | line += '%s' % str(i) 56 | elif isinstance(i.xref, list) and i.xref: 57 | line += '%s %s' % (i.name, i.xref) 58 | elif isinstance(i.xref, int) and i.xref: 59 | line += '%s %x' % (i.name, i.xref) 60 | elif i.operand_interpretation: 61 | line += i.operand_interpretation 62 | else: 63 | line += i.name + ' ' 64 | 65 | out += line + '\n' 66 | return out 67 | 68 | def instructions_ssa(self, format='hex'): 69 | out = '' 70 | line = '' 71 | for i in self.instructions: 72 | line = '%x: ' % i.offset 73 | if i.ssa: 74 | line += '' + i.ssa.format() 75 | else: 76 | line += '[NO_SSA] ' + i.name 77 | out += line + '\n' 78 | return out 79 | -------------------------------------------------------------------------------- /seewasm/core/edge.py: -------------------------------------------------------------------------------- 1 | EDGE_UNCONDITIONAL = 'unconditional' 2 | EDGE_CONDITIONAL_TRUE = 'conditional_true' 3 | EDGE_CONDITIONAL_FALSE = 'conditional_false' 4 | EDGE_FALLTHROUGH = 'fallthrough' 5 | EDGE_CALL = 'call' 6 | 7 | 8 | class Edge: 9 | """ 10 | The edges in the CFG, connecting basic blocks 11 | """ 12 | 13 | def __init__(self, node_from, node_to, edge_type=EDGE_UNCONDITIONAL, 14 | condition=None): 15 | """ 16 | Properties of edges in the CFG 17 | 18 | Properties: 19 | node_from: the 'name' of the basic block pointed from 20 | node_to: the 'name' of the basic block pointed to 21 | type: the type of the edge, including five types listed at the beginning of the current file 22 | 23 | Below are properties may be deprecated in the future 24 | condition: do not understand its actual meaning 25 | """ 26 | 27 | self.node_from = node_from 28 | self.node_to = node_to 29 | self.type = edge_type 30 | 31 | self.condition = condition 32 | 33 | def __str__(self): 34 | return str(self.as_dict()) 35 | 36 | def __eq__(self, other): 37 | return self.node_from == other.node_from and\ 38 | self.node_to == other.node_to and\ 39 | self.type == other.type and\ 40 | self.condition == other.condition 41 | 42 | def __hash__(self): 43 | return hash(('from', self.node_from, 44 | 'to', self.node_to, 45 | 'type', self.type, 46 | 'condition', self.condition)) 47 | 48 | def as_dict(self): 49 | return {'from': str(self.node_from), 'to': str(self.node_to), 50 | 'type': self.type, 'condition': self.condition} 51 | -------------------------------------------------------------------------------- /seewasm/core/function.py: -------------------------------------------------------------------------------- 1 | class Function(object): 2 | """ 3 | The function object of the given Wasm module 4 | """ 5 | 6 | def __init__(self, start_offset, start_instr=None, 7 | name='func_default_name', prefered_name=None): 8 | """ 9 | The properties of the functions of the given Wasm module 10 | 11 | Properties: 12 | start_offset: the start offset of the first instruction 13 | start_instr: the first instruction of the function 14 | name: the function's name, represented in '$funcX' or readable name (TODO will make them all to readable name in the future) 15 | prefered_name: the signature of the function, including type of arguments and return value 16 | size: the size of the function, the sum of all its composed instructions 17 | end_offset: the end_offset of its last basic block 18 | end_instr: the last instruction of the function 19 | basicblocks: the list of all composed basic blocks 20 | instructions: the list of all composed instructions 21 | """ 22 | self.start_offset = start_offset 23 | self.start_instr = start_instr 24 | self.name = name 25 | self.prefered_name = prefered_name if prefered_name else name 26 | self.size = 0 27 | self.end_offset = None 28 | self.end_instr = None 29 | self.basicblocks = list() 30 | self.instructions = list() 31 | 32 | def __str__(self): 33 | line = ('%x' % self.start_offset) + ': ' + str(self.name) + '\n' 34 | line += 'prefered_name: %s\n' % self.prefered_name 35 | line += 'start_offset = %x\n' % self.start_offset 36 | line += 'start_instr = ' + str(self.start_instr.name) + '\n' 37 | if self.size: 38 | line += 'size = ' + str(self.size) + '\n' 39 | if self.end_offset: 40 | line += 'end_offset = ' + str(self.end_offset) + '\n' 41 | if self.end_instr: 42 | line += 'end_instr = ' + str(self.end_instr.name) + '\n' 43 | line += 'lenght basicblocks: %s\n' % len(self.basicblocks) 44 | line += 'lenght instructions: %s\n' % len(self.instructions) 45 | line += '\n\n' 46 | return line 47 | -------------------------------------------------------------------------------- /seewasm/core/instruction.py: -------------------------------------------------------------------------------- 1 | class Instruction(object): 2 | """ 3 | The instruction object 4 | """ 5 | 6 | def __init__(self, opcode, name, 7 | operand_size, pops, pushes, fee, 8 | description, operand=None, 9 | operand_interpretation=None, offset=0, xref=None): 10 | """ 11 | The properties of instruction object 12 | 13 | Properties: 14 | opcode: the int value of the instruction 15 | offset: the offset of the instruction on function level 16 | name: the readable name of the instruction 17 | description: a brief description of the instruction 18 | operand_size: the size of its corresponding operand 19 | operand: Immediate operand if any specific interpretation of operand value, in bytes. The operand value for JUMP is xref 20 | operand_interpretation: the instruction and its operand in a readable way, same as the string in the wat file 21 | pops: how many elements will be popped from the stack 22 | pushes: how many elements will be pushed into the stack 23 | fee: not clear 24 | xref: the jump target of the current instruction 25 | ssa: not clear 26 | """ 27 | 28 | self.opcode = opcode 29 | self.opcode_size = 1 30 | self.offset = offset 31 | self.name = name 32 | self.description = description 33 | self.operand_size = operand_size 34 | self.operand = operand 35 | self.operand_interpretation = operand_interpretation 36 | self.pops = pops 37 | self.pushes = pushes 38 | self.fee = fee 39 | self.xref = xref 40 | self.ssa = None 41 | 42 | def __eq__(self, other): 43 | """ Instructions are equal if all features match """ 44 | return self.opcode == other.opcode and\ 45 | self.name == other.name and\ 46 | self.operand == other.operand and\ 47 | self.operand_size == other.operand_size and\ 48 | self.pops == other.pops and\ 49 | self.pushes == other.pushes and\ 50 | self.fee == other.fee and\ 51 | self.offset == other.offset and\ 52 | self.description == other.description 53 | 54 | def __simple_output_format(self, offset=True): 55 | output = self.name 56 | if self.has_operand: 57 | output += ' 0x%x' % int.from_bytes(self.operand, 58 | byteorder='big') 59 | 60 | if offset: 61 | return "%d %s" % (self.offset, output) 62 | else: 63 | return "%s" % output 64 | 65 | # def __repr__(self): 66 | # """ Entire representation of the instruction 67 | # output = 'Instruction(0x%x, %r, %d, %d, %d, %d, %r, %r, %r)' \ 68 | # % (self._opcode, self._name, self._operand_size, 69 | # self._pops, self._pushes, self._fee, 70 | # self._description, self._operand, self._offset)""" 71 | # return self.__simple_output_format() 72 | 73 | def __str__(self): 74 | """ String representation of the instruction """ 75 | return self.__simple_output_format(offset=False) 76 | 77 | @property 78 | def bytes(self): 79 | """ Encoded instruction """ 80 | byte = bytearray() 81 | byte.append(self.opcode) 82 | if self.operand: 83 | [byte.append(x) for x in self.operand] 84 | return "".join(map(chr, byte)) 85 | 86 | @property 87 | def offset_end(self): 88 | """ Location in the program (optional) """ 89 | return self.offset + self.size - 1 90 | 91 | @property 92 | def semantics(self): 93 | """ Canonical semantics """ 94 | return self.name 95 | 96 | @property 97 | def size(self): 98 | """ Size of the encoded instruction """ 99 | return self.opcode_size + self.operand_size 100 | 101 | @property 102 | def has_operand(self): 103 | """ True if the instruction uses an immediate operand """ 104 | return self.operand_size > 0 105 | 106 | @property 107 | def is_branch_conditional(self): 108 | """ Return list if the instruction is a jump """ 109 | raise NotImplementedError 110 | 111 | @property 112 | def is_branch_unconditional(self): 113 | """ Return list if the instruction is a jump """ 114 | raise NotImplementedError 115 | 116 | @property 117 | def is_branch(self): 118 | """ True if the instruction is a jump """ 119 | return self.is_branch_conditional or self.is_branch_unconditional 120 | 121 | @property 122 | def is_halt(self): 123 | """ Return list if the instruction is a basic block terminator """ 124 | raise NotImplementedError 125 | 126 | @property 127 | def is_terminator(self): 128 | """ True if the instruction is a basic block terminator """ 129 | raise NotImplementedError 130 | 131 | @property 132 | def have_xref(self): 133 | """ TODO """ 134 | raise NotImplementedError 135 | -------------------------------------------------------------------------------- /seewasm/core/utils.py: -------------------------------------------------------------------------------- 1 | from binascii import unhexlify 2 | 3 | 4 | def bytecode_to_bytes(bytecode): 5 | if str(bytecode).startswith("0x"): 6 | bytecode = bytecode[2:] 7 | 8 | try: 9 | # python > 2.7 10 | bytecode = bytes.fromhex(bytecode) 11 | except AttributeError: 12 | # python <= 2.7 13 | try: 14 | bytecode = bytecode.decode("hex") 15 | except TypeError: 16 | # last chance 17 | bytecode = unhexlify(bytecode) 18 | # already bytes or bytearray 19 | except TypeError: 20 | pass 21 | return bytecode 22 | 23 | 24 | def search_in_list_of_dict(string_to_search, target_list, key_dict): 25 | return list( 26 | filter( 27 | lambda elem: str(string_to_search) in str( 28 | elem[key_dict]), 29 | target_list)) 30 | -------------------------------------------------------------------------------- /seewasm/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/engine/__init__.py -------------------------------------------------------------------------------- /seewasm/engine/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/seewasm/engine/__init__.pyc -------------------------------------------------------------------------------- /seewasm/engine/disassembler.py: -------------------------------------------------------------------------------- 1 | from seewasm.core.utils import bytecode_to_bytes 2 | 3 | 4 | class BytecodeEmptyException(Exception): 5 | """Exception raised when bytecode is None""" 6 | pass 7 | 8 | 9 | class Disassembler(object): 10 | """ Generic Disassembler class """ 11 | 12 | def __init__(self, asm, bytecode=None): 13 | self.bytecode = bytecode 14 | self.instructions = list() 15 | self.reverse_instructions = dict() 16 | self.asm = asm 17 | 18 | def attributes_reset(self): 19 | """Reset instructions class attributes """ 20 | self.instructions = list() 21 | self.reverse_instructions = dict() 22 | 23 | def disassemble_opcode(self, bytecode, offset=0): 24 | """ Generic method to disassemble one instruction """ 25 | raise NotImplementedError 26 | 27 | def disassemble(self, bytecode=None, offset=0, nature_offset=0, 28 | r_format='list'): 29 | """Generic method to disassemble bytecode 30 | 31 | :param bytecode: bytecode sequence 32 | :param offset: start offset 33 | :param r_format: output format ('list'/'text'/'reverse') 34 | :type bytecode: bytes, str 35 | :type offset: int 36 | :type r_format: list, str, dict 37 | :return: dissassembly result depending of r_format 38 | :rtype: list, str, dict 39 | """ 40 | # reinitialize class variable 41 | self.attributes_reset() 42 | 43 | self.bytecode = bytecode if bytecode else self.bytecode 44 | if not self.bytecode: 45 | raise BytecodeEmptyException() 46 | 47 | self.bytecode = bytecode_to_bytes(self.bytecode) 48 | 49 | while offset < len(self.bytecode): 50 | instr = self.disassemble_opcode( 51 | self.bytecode[offset:], 52 | offset, nature_offset) 53 | offset += instr.size 54 | nature_offset += 1 55 | self.instructions.append(instr) 56 | 57 | # fill reverse instructions 58 | self.reverse_instructions = {k: v for k, v in 59 | enumerate(self.instructions)} 60 | 61 | # return instructions 62 | if r_format == 'list': 63 | return self.instructions 64 | elif r_format == 'text': 65 | return '\n'.join(map(str, self.instructions)) 66 | elif r_format == 'reverse': 67 | return self.reverse_instructions 68 | -------------------------------------------------------------------------------- /seewasm/engine/emulator.py: -------------------------------------------------------------------------------- 1 | # ======================================= 2 | # # Emulator # 3 | # ======================================= 4 | 5 | 6 | class EmulatorEngine(object): 7 | 8 | def __init__(self, instructions): 9 | """ TODO """ 10 | raise NotImplementedError 11 | 12 | def emulate(self, state, depth=0): 13 | """ TODO """ 14 | raise NotImplementedError 15 | 16 | def emulate_one_instruction(self, instr, state, depth): 17 | """ TODO """ 18 | raise NotImplementedError 19 | -------------------------------------------------------------------------------- /seewasm/engine/engine.py: -------------------------------------------------------------------------------- 1 | class VMstate(object): 2 | 3 | def __init__(self, gas=1000000): 4 | """ TODO """ 5 | raise NotImplementedError 6 | 7 | def details(self): 8 | """ TODO """ 9 | raise NotImplementedError 10 | -------------------------------------------------------------------------------- /test/c/src/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) 4 | { 5 | printf("Hello, world!\n"); 6 | return 0; 7 | } -------------------------------------------------------------------------------- /test/c/src/sym.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int foo(char a){ 4 | if (a < 'a') { 5 | printf("a"); 6 | return 0; 7 | } 8 | else if (a < 'z') { 9 | printf("b"); 10 | return 1; 11 | } 12 | else { 13 | printf("c"); 14 | return 2; 15 | } 16 | } 17 | 18 | int main(int argc, char* argv[]){ 19 | return foo(argv[1][0]); 20 | } -------------------------------------------------------------------------------- /test/go/src/hello.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | println("Hello, world!") 5 | } -------------------------------------------------------------------------------- /test/hello_world.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/hello_world.wasm -------------------------------------------------------------------------------- /test/hello_world_go.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/hello_world_go.wasm -------------------------------------------------------------------------------- /test/hello_world_rust.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/hello_world_rust.wasm -------------------------------------------------------------------------------- /test/password.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/password.wasm -------------------------------------------------------------------------------- /test/rust/hello/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hello_rust" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | -------------------------------------------------------------------------------- /test/rust/hello/src/main.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("Hello, world!"); 3 | } 4 | -------------------------------------------------------------------------------- /test/sym_c.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/sym_c.wasm -------------------------------------------------------------------------------- /test/test.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/test.wasm -------------------------------------------------------------------------------- /test/test_c_library.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/test_c_library.wasm -------------------------------------------------------------------------------- /test/test_return.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/test_return.wasm -------------------------------------------------------------------------------- /test/test_unreachable.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PKU-ASAL/WASEM/8cfdc5e25f42c41d95aa061e3bfe98a987b5914c/test/test_unreachable.wasm -------------------------------------------------------------------------------- /wasm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | __version__ = '1.2' 4 | 5 | from .decode import ( 6 | decode_bytecode, 7 | decode_module, 8 | ) 9 | 10 | from .formatter import ( 11 | format_function, 12 | format_instruction, 13 | format_lang_type, 14 | format_mutability, 15 | ) 16 | 17 | from .modtypes import ( 18 | ModuleHeader, 19 | FunctionImportEntryData, 20 | ResizableLimits, 21 | TableType, 22 | MemoryType, 23 | GlobalType, 24 | ImportEntry, 25 | ImportSection, 26 | FuncType, 27 | TypeSection, 28 | FunctionSection, 29 | TableSection, 30 | MemorySection, 31 | InitExpr, 32 | GlobalEntry, 33 | GlobalSection, 34 | ExportEntry, 35 | ExportSection, 36 | StartSection, 37 | ElementSegment, 38 | ElementSection, 39 | LocalEntry, 40 | FunctionBody, 41 | CodeSection, 42 | DataSegment, 43 | DataSection, 44 | Naming, 45 | NameMap, 46 | LocalNames, 47 | LocalNameMap, 48 | NameSubSection, 49 | Section, 50 | ) 51 | 52 | from .immtypes import ( 53 | BlockImm, 54 | BranchImm, 55 | BranchTableImm, 56 | CallImm, 57 | CallIndirectImm, 58 | LocalVarXsImm, 59 | GlobalVarXsImm, 60 | MemoryImm, 61 | CurGrowMemImm, 62 | I32ConstImm, 63 | I64ConstImm, 64 | F32ConstImm, 65 | F64ConstImm, 66 | ) 67 | 68 | from .opcodes import ( 69 | Opcode, 70 | INSN_ENTER_BLOCK, 71 | INSN_LEAVE_BLOCK, 72 | INSN_BRANCH, 73 | INSN_NO_FLOW, 74 | ) 75 | 76 | for cur_op in opcodes.OPCODES: 77 | globals()[ 78 | 'OP_' + cur_op.mnemonic.upper().replace('.', '_').replace('/', '_') 79 | ] = cur_op.id 80 | 81 | from .wasmtypes import ( 82 | UInt8Field, 83 | UInt16Field, 84 | UInt32Field, 85 | UInt64Field, 86 | VarUInt1Field, 87 | VarUInt7Field, 88 | VarUInt32Field, 89 | VarInt7Field, 90 | VarInt32Field, 91 | VarInt64Field, 92 | ElementTypeField, 93 | ValueTypeField, 94 | ExternalKindField, 95 | BlockTypeField, 96 | SEC_UNK, 97 | SEC_TYPE, 98 | SEC_IMPORT, 99 | SEC_FUNCTION, 100 | SEC_TABLE, 101 | SEC_MEMORY, 102 | SEC_GLOBAL, 103 | SEC_EXPORT, 104 | SEC_START, 105 | SEC_ELEMENT, 106 | SEC_CODE, 107 | SEC_DATA, 108 | SEC_DATACOUNT, 109 | SEC_NAME, 110 | LANG_TYPE_I32, 111 | LANG_TYPE_I64, 112 | LANG_TYPE_F32, 113 | LANG_TYPE_F64, 114 | LANG_TYPE_ANYFUNC, 115 | LANG_TYPE_FUNC, 116 | LANG_TYPE_EMPTY, 117 | VAL_TYPE_I32, 118 | VAL_TYPE_I64, 119 | VAL_TYPE_F32, 120 | VAL_TYPE_F64, 121 | NAME_SUBSEC_FUNCTION, 122 | NAME_SUBSEC_LOCAL, 123 | IMMUTABLE, 124 | MUTABLE, 125 | ) 126 | -------------------------------------------------------------------------------- /wasm/__main__.py: -------------------------------------------------------------------------------- 1 | """Testing & debug stuff.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | import argparse 5 | import sys 6 | 7 | from .formatter import format_function 8 | from .modtypes import SEC_CODE, SEC_TYPE, SEC_FUNCTION, Section 9 | from .decode import decode_module 10 | 11 | 12 | def dump(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('wasm_file', type=str) 15 | parser.add_argument('--disas', action='store_true', help="Disassemble code") 16 | args = parser.parse_args() 17 | 18 | try: 19 | with open(args.wasm_file, 'rb') as raw: 20 | raw = raw.read() 21 | except IOError as exc: 22 | print("[-] Can't open input file: " + str(exc), file=sys.stderr) 23 | return 24 | 25 | # Parse & print header. 26 | mod_iter = iter(decode_module(raw, decode_name_subsections=False)) 27 | hdr, hdr_data = next(mod_iter) 28 | print(hdr.to_string(hdr_data)) 29 | 30 | # Parse & print other sections. 31 | code_sec = None 32 | type_sec = None 33 | func_sec = None 34 | for cur_sec, cur_sec_data in mod_iter: 35 | print(cur_sec.to_string(cur_sec_data)) 36 | if type(cur_sec) == Section: 37 | if cur_sec_data.id == SEC_CODE: 38 | code_sec = cur_sec_data.payload 39 | elif cur_sec_data.id == SEC_TYPE: 40 | type_sec = cur_sec_data.payload 41 | elif cur_sec_data.id == SEC_FUNCTION: 42 | func_sec = cur_sec_data.payload 43 | 44 | # If ordered to disassemble, do so. 45 | # TODO: We might want to make use of debug names, if available. 46 | if args.disas and code_sec is not None: 47 | for i, func_body in enumerate(code_sec.bodies): 48 | print('{x} sub_{id:04X} {x}'.format(x='=' * 35, id=i)) 49 | 50 | # If we have type info, use it. 51 | func_type = type_sec.entries[func_sec.types[i]] if ( 52 | None not in (type_sec, func_sec) 53 | ) else None 54 | 55 | print() 56 | print('\n'.join(format_function(func_body, func_type))) 57 | print() 58 | -------------------------------------------------------------------------------- /wasm/compat.py: -------------------------------------------------------------------------------- 1 | """Defines compatibility quirks for Python 2.7.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | import sys 5 | import functools 6 | import logging 7 | import warnings 8 | 9 | 10 | def add_metaclass(metaclass): 11 | """ 12 | Class decorator for creating a class with a metaclass. 13 | Borrowed from `six` module. 14 | """ 15 | @functools.wraps(metaclass) 16 | def wrapper(cls): 17 | orig_vars = cls.__dict__.copy() 18 | slots = orig_vars.get('__slots__') 19 | if slots is not None: 20 | if isinstance(slots, str): 21 | slots = [slots] 22 | for slots_var in slots: 23 | orig_vars.pop(slots_var) 24 | orig_vars.pop('__dict__', None) 25 | orig_vars.pop('__weakref__', None) 26 | return metaclass(cls.__name__, cls.__bases__, orig_vars) 27 | return wrapper 28 | 29 | 30 | def indent(text, prefix, predicate=None): 31 | """Adds 'prefix' to the beginning of selected lines in 'text'. 32 | 33 | If 'predicate' is provided, 'prefix' will only be added to the lines 34 | where 'predicate(line)' is True. If 'predicate' is not provided, 35 | it will default to adding 'prefix' to all non-empty lines that do not 36 | consist solely of whitespace characters. 37 | 38 | Borrowed from Py3 `textwrap` module. 39 | """ 40 | if predicate is None: 41 | def predicate(line): 42 | return line.strip() 43 | 44 | def prefixed_lines(): 45 | for line in text.splitlines(True): 46 | yield (prefix + line if predicate(line) else line) 47 | return ''.join(prefixed_lines()) 48 | 49 | 50 | def deprecated_func(func): 51 | """Deprecates a function, printing a warning on the first usage.""" 52 | 53 | # We use a mutable container here to work around Py2's lack of 54 | # the `nonlocal` keyword. 55 | first_usage = [True] 56 | 57 | @functools.wraps(func) 58 | def wrapper(*args, **kwargs): 59 | if first_usage[0]: 60 | warnings.warn( 61 | "Call to deprecated function {}.".format(func.__name__), 62 | DeprecationWarning, 63 | ) 64 | first_usage[0] = False 65 | return func(*args, **kwargs) 66 | 67 | return wrapper 68 | 69 | 70 | if sys.version_info[0] >= 3: 71 | def byte2int(x): 72 | return x 73 | 74 | elif sys.version_info[0] == 2: 75 | def byte2int(x): 76 | return ord(x) if type(x) == str else x 77 | 78 | else: 79 | raise Exception("Unsupported Python version") 80 | 81 | 82 | -------------------------------------------------------------------------------- /wasm/decode.py: -------------------------------------------------------------------------------- 1 | """Provides functions for decoding WASM modules and bytecode.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from collections import namedtuple 5 | from .modtypes import ModuleHeader, Section, SEC_UNK, SEC_NAME, NameSubSection 6 | from .opcodes import OPCODE_MAP 7 | from .compat import byte2int 8 | 9 | 10 | Instruction = namedtuple('Instruction', 'op imm len') 11 | ModuleFragment = namedtuple('ModuleFragment', 'type data') 12 | 13 | 14 | def decode_bytecode(bytecode): 15 | """Decodes raw bytecode, yielding `Instruction`s.""" 16 | bytecode_wnd = memoryview(bytecode) 17 | while bytecode_wnd: 18 | opcode_id = byte2int(bytecode_wnd[0]) 19 | opcode = OPCODE_MAP[opcode_id] 20 | 21 | if opcode.imm_struct is not None: 22 | offs, imm, _ = opcode.imm_struct.from_raw(None, bytecode_wnd[1:]) 23 | else: 24 | imm = None 25 | offs = 0 26 | 27 | insn_len = 1 + offs 28 | yield Instruction(opcode, imm, insn_len) 29 | bytecode_wnd = bytecode_wnd[insn_len:] 30 | 31 | 32 | def decode_module(module, decode_name_subsections=False): 33 | """Decodes raw WASM modules, yielding `ModuleFragment`s.""" 34 | module_wnd = memoryview(module) 35 | 36 | # Read & yield module header. 37 | hdr = ModuleHeader() 38 | hdr_len, hdr_data, _ = hdr.from_raw(None, module_wnd) 39 | yield ModuleFragment(hdr, hdr_data) 40 | module_wnd = module_wnd[hdr_len:] 41 | 42 | # Read & yield sections. 43 | while module_wnd: 44 | sec = Section() 45 | sec_len, sec_data, _ = sec.from_raw(None, module_wnd) 46 | 47 | # If requested, decode name subsections when encountered. 48 | if ( 49 | decode_name_subsections and 50 | sec_data.id == SEC_UNK and 51 | sec_data.name == SEC_NAME 52 | ): 53 | sec_wnd = sec_data.payload 54 | while sec_wnd: 55 | subsec = NameSubSection() 56 | subsec_len, subsec_data, _ = subsec.from_raw(None, sec_wnd) 57 | yield ModuleFragment(subsec, subsec_data) 58 | sec_wnd = sec_wnd[subsec_len:] 59 | else: 60 | yield ModuleFragment(sec, sec_data) 61 | 62 | module_wnd = module_wnd[sec_len:] 63 | -------------------------------------------------------------------------------- /wasm/formatter.py: -------------------------------------------------------------------------------- 1 | """Defines functions converting raw instructions into textual form.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | import itertools 5 | 6 | from .opcodes import INSN_LEAVE_BLOCK, INSN_ENTER_BLOCK 7 | from .decode import decode_bytecode 8 | from .wasmtypes import VAL_TYPE_I32, VAL_TYPE_I64, VAL_TYPE_F32, VAL_TYPE_F64, MUTABLE, IMMUTABLE 9 | 10 | 11 | def format_instruction(insn): 12 | """ 13 | Takes a raw `Instruction` and translates it into a human readable text 14 | representation. As of writing, the text representation for WASM is not yet 15 | standardized, so we just emit some generic format. 16 | """ 17 | text = insn.op.mnemonic 18 | 19 | if not insn.imm: 20 | return text 21 | 22 | return text + ' ' + ', '.join([ 23 | getattr(insn.op.imm_struct, x.name).to_string( 24 | getattr(insn.imm, x.name) 25 | ) 26 | for x in insn.op.imm_struct._meta.fields 27 | ]) 28 | 29 | _mutability_str_mapping = { 30 | MUTABLE: "mut", 31 | IMMUTABLE: "" 32 | } 33 | 34 | def format_mutability(mutability): 35 | """Takes a value type `int`, returning its string representation.""" 36 | try: 37 | return _mutability_str_mapping[mutability] 38 | except KeyError: 39 | raise ValueError('Bad value for value type ({})'.format(mutability)) 40 | 41 | _lang_type_str_mapping = { 42 | VAL_TYPE_I32: 'i32', 43 | VAL_TYPE_I64: 'i64', 44 | VAL_TYPE_F32: 'f32', 45 | VAL_TYPE_F64: 'f64', 46 | } 47 | 48 | 49 | def format_lang_type(lang_type): 50 | """Takes a value type `int`, returning its string representation.""" 51 | try: 52 | return _lang_type_str_mapping[lang_type] 53 | except KeyError: 54 | raise ValueError('Bad value for value type ({})'.format(lang_type)) 55 | 56 | 57 | def format_function( 58 | func_body, 59 | func_type=None, 60 | indent=2, 61 | format_locals=True, 62 | ): 63 | """ 64 | Takes a `FunctionBody` and optionally a `FunctionType`, yielding the string 65 | representation of the function line by line. The function type is required 66 | for formatting function parameter and return value information. 67 | """ 68 | if func_type is None: 69 | yield 'func' 70 | else: 71 | param_section = ' (param {})'.format(' '.join( 72 | map(format_lang_type, func_type.param_types) 73 | )) if func_type.param_types else '' 74 | result_section = ' (result {})'.format( 75 | format_lang_type(func_type.return_type) 76 | ) if func_type.return_type else '' 77 | yield 'func' + param_section + result_section 78 | 79 | if format_locals and func_body.locals: 80 | yield '(locals {})'.format(' '.join(itertools.chain.from_iterable( 81 | itertools.repeat(format_lang_type(x.type), x.count) 82 | for x in func_body.locals 83 | ))) 84 | 85 | level = 1 86 | for cur_insn in decode_bytecode(func_body.code): 87 | if cur_insn.op.flags & INSN_LEAVE_BLOCK: 88 | level -= 1 89 | yield ' ' * (level * indent) + format_instruction(cur_insn) 90 | if cur_insn.op.flags & INSN_ENTER_BLOCK: 91 | level += 1 92 | -------------------------------------------------------------------------------- /wasm/immtypes.py: -------------------------------------------------------------------------------- 1 | """Defines immediate types for WASM bytecode instructions.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .wasmtypes import * 5 | from .types import Structure, RepeatField 6 | 7 | 8 | class BlockImm(Structure): 9 | sig = BlockTypeField() 10 | 11 | 12 | class BranchImm(Structure): 13 | relative_depth = VarUInt32Field() 14 | 15 | 16 | class BranchTableImm(Structure): 17 | target_count = VarUInt32Field() 18 | target_table = RepeatField(VarUInt32Field(), lambda x: x.target_count) 19 | default_target = VarUInt32Field() 20 | 21 | 22 | class CallImm(Structure): 23 | function_index = VarUInt32Field() 24 | 25 | 26 | class CallIndirectImm(Structure): 27 | type_index = VarUInt32Field() 28 | reserved = VarUInt1Field() 29 | 30 | 31 | class LocalVarXsImm(Structure): 32 | local_index = VarUInt32Field() 33 | 34 | 35 | class GlobalVarXsImm(Structure): 36 | global_index = VarUInt32Field() 37 | 38 | 39 | class MemoryImm(Structure): 40 | flags = VarUInt32Field() 41 | offset = VarUInt32Field() 42 | 43 | 44 | class CurGrowMemImm(Structure): 45 | reserved = VarUInt1Field() 46 | 47 | 48 | class I32ConstImm(Structure): 49 | value = VarInt32Field() 50 | 51 | 52 | class I64ConstImm(Structure): 53 | value = VarInt64Field() 54 | 55 | 56 | class F32ConstImm(Structure): 57 | value = UInt32Field() 58 | 59 | 60 | class F64ConstImm(Structure): 61 | value = UInt64Field() 62 | -------------------------------------------------------------------------------- /wasm/modtypes.py: -------------------------------------------------------------------------------- 1 | """Defines data structures used in WASM (binary) modules.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .wasmtypes import * 5 | from .opcodes import OP_END 6 | from .types import ( 7 | Structure, CondField, RepeatField, 8 | ChoiceField, WasmField, ConstField, BytesField, 9 | ) 10 | 11 | 12 | class ModuleHeader(Structure): 13 | magic = UInt32Field() 14 | version = UInt32Field() 15 | 16 | 17 | class FunctionImportEntryData(Structure): 18 | type = VarUInt32Field() 19 | 20 | 21 | class ResizableLimits(Structure): 22 | flags = VarUInt32Field() 23 | initial = VarUInt32Field() 24 | maximum = CondField(VarUInt32Field(), lambda x: x.flags & 1) 25 | 26 | 27 | class TableType(Structure): 28 | element_type = ElementTypeField() 29 | limits = ResizableLimits() 30 | 31 | 32 | class MemoryType(Structure): 33 | limits = ResizableLimits() 34 | 35 | 36 | class GlobalType(Structure): 37 | content_type = ValueTypeField() 38 | mutability = VarUInt1Field() 39 | 40 | 41 | class ImportEntry(Structure): 42 | module_len = VarUInt32Field() 43 | module_str = BytesField(lambda x: x.module_len, is_str=True) 44 | field_len = VarUInt32Field() 45 | field_str = BytesField(lambda x: x.field_len, is_str=True) 46 | kind = ExternalKindField() 47 | type = ChoiceField({ 48 | 0: FunctionImportEntryData(), 49 | 1: TableType(), 50 | 2: MemoryType(), 51 | 3: GlobalType(), 52 | }, lambda x: x.kind) 53 | 54 | 55 | class ImportSection(Structure): 56 | count = VarUInt32Field() 57 | entries = RepeatField(ImportEntry(), lambda x: x.count) 58 | 59 | 60 | class FuncType(Structure): 61 | form = VarInt7Field() 62 | param_count = VarUInt32Field() 63 | param_types = RepeatField(ValueTypeField(), lambda x: x.param_count) 64 | return_count = VarUInt1Field() 65 | return_type = CondField(ValueTypeField(), lambda x: bool(x.return_count)) 66 | 67 | 68 | class TypeSection(Structure): 69 | count = VarUInt32Field() 70 | entries = RepeatField(FuncType(), lambda x: x.count) 71 | 72 | 73 | class FunctionSection(Structure): 74 | count = VarUInt32Field() 75 | types = RepeatField(VarUInt32Field(), lambda x: x.count) 76 | 77 | 78 | class TableSection(Structure): 79 | count = VarUInt32Field() 80 | entries = RepeatField(TableType(), lambda x: x.count) 81 | 82 | 83 | class MemorySection(Structure): 84 | count = VarUInt32Field() 85 | entries = RepeatField(MemoryType(), lambda x: x.count) 86 | 87 | 88 | class InitExpr(WasmField): 89 | def from_raw(self, struct, raw): 90 | from .decode import decode_bytecode 91 | 92 | offs = 0 93 | instrs = [] 94 | for cur_insn in decode_bytecode(raw): 95 | offs += cur_insn.len 96 | instrs.append(cur_insn) 97 | if cur_insn.op.id == OP_END: 98 | break 99 | 100 | return offs, instrs, self 101 | 102 | 103 | class GlobalEntry(Structure): 104 | type = GlobalType() 105 | init = InitExpr() 106 | 107 | 108 | class GlobalSection(Structure): 109 | count = VarUInt32Field() 110 | globals = RepeatField(GlobalEntry(), lambda x: x.count) 111 | 112 | 113 | class ExportEntry(Structure): 114 | field_len = VarUInt32Field() 115 | field_str = BytesField(lambda x: x.field_len, is_str=True) 116 | kind = ExternalKindField() 117 | index = VarUInt32Field() 118 | 119 | 120 | class ExportSection(Structure): 121 | count = VarUInt32Field() 122 | entries = RepeatField(ExportEntry(), lambda x: x.count) 123 | 124 | 125 | class StartSection(Structure): 126 | index = VarUInt32Field() 127 | 128 | 129 | class ElementSegment(Structure): 130 | index = VarUInt32Field() 131 | offset = InitExpr() 132 | num_elem = VarUInt32Field() 133 | elems = RepeatField(VarUInt32Field(), lambda x: x.num_elem) 134 | 135 | 136 | class ElementSection(Structure): 137 | count = VarUInt32Field() 138 | entries = RepeatField(ElementSegment(), lambda x: x.count) 139 | 140 | 141 | class LocalEntry(Structure): 142 | count = VarUInt32Field() 143 | type = ValueTypeField() 144 | 145 | 146 | class FunctionBody(Structure): 147 | body_size = VarUInt32Field() 148 | local_count = VarUInt32Field() 149 | locals = RepeatField( 150 | LocalEntry(), 151 | lambda x: x.local_count, 152 | ) 153 | code = BytesField( 154 | lambda x: ( 155 | x.body_size - 156 | x.get_decoder_meta()['lengths']['local_count'] - 157 | x.get_decoder_meta()['lengths']['locals'] 158 | ) 159 | ) 160 | 161 | 162 | class CodeSection(Structure): 163 | count = VarUInt32Field() 164 | bodies = RepeatField(FunctionBody(), lambda x: x.count) 165 | 166 | 167 | class DataSegment(Structure): 168 | index = VarUInt32Field() 169 | offset = InitExpr() 170 | size = VarUInt32Field() 171 | data = BytesField(lambda x: x.size) 172 | 173 | 174 | class DataSection(Structure): 175 | count = VarUInt32Field() 176 | entries = RepeatField(DataSegment(), lambda x: x.count) 177 | 178 | class DataCountSection(Structure): 179 | count = VarUInt32Field() 180 | 181 | class Naming(Structure): 182 | index = VarUInt32Field() 183 | name_len = VarUInt32Field() 184 | name_str = BytesField(lambda x: x.name_len, is_str=True) 185 | 186 | 187 | class NameMap(Structure): 188 | count = VarUInt32Field() 189 | names = RepeatField(Naming(), lambda x: x.count) 190 | 191 | 192 | class LocalNames(Structure): 193 | index = VarUInt32Field() 194 | local_map = NameMap() 195 | 196 | 197 | class LocalNameMap(Structure): 198 | count = VarUInt32Field() 199 | funcs = RepeatField(LocalNames, lambda x: x.count) 200 | 201 | 202 | class NameSubSection(Structure): 203 | name_type = VarUInt7Field() 204 | payload_len = VarUInt32Field() 205 | payload = ChoiceField({ 206 | NAME_SUBSEC_FUNCTION: NameMap(), 207 | NAME_SUBSEC_LOCAL: LocalNameMap(), 208 | }, lambda x: x.name_type) 209 | 210 | 211 | class Section(Structure): 212 | id = VarUInt7Field() 213 | payload_len = VarUInt32Field() 214 | name_len = CondField( 215 | VarUInt32Field(), 216 | lambda x: x.id == 0, 217 | ) 218 | name = CondField( 219 | BytesField(lambda x: x.name_len, is_str=True), 220 | lambda x: x.id == 0, 221 | ) 222 | 223 | payload = ChoiceField({ 224 | SEC_UNK: BytesField(lambda x: ( 225 | x.payload_len - 226 | x.get_decoder_meta()['lengths']['name'] - 227 | x.get_decoder_meta()['lengths']['name_len'] 228 | )), 229 | SEC_TYPE: TypeSection(), 230 | SEC_IMPORT: ImportSection(), 231 | SEC_FUNCTION: FunctionSection(), 232 | SEC_TABLE: TableSection(), 233 | SEC_MEMORY: MemorySection(), 234 | SEC_GLOBAL: GlobalSection(), 235 | SEC_EXPORT: ExportSection(), 236 | SEC_START: StartSection(), 237 | SEC_ELEMENT: ElementSection(), 238 | SEC_CODE: CodeSection(), 239 | SEC_DATA: DataSection(), 240 | SEC_DATACOUNT: DataCountSection(), 241 | }, lambda x: x.id) 242 | 243 | overhang = BytesField(lambda x: max(0, ( 244 | x.payload_len - 245 | x.get_decoder_meta()['lengths']['name'] - 246 | x.get_decoder_meta()['lengths']['name_len'] - 247 | x.get_decoder_meta()['lengths']['payload'] 248 | ))) 249 | -------------------------------------------------------------------------------- /wasm/wasmtypes.py: -------------------------------------------------------------------------------- 1 | """Defines types used for both modules and bytecode.""" 2 | from __future__ import print_function, absolute_import, division, unicode_literals 3 | 4 | from .types import UIntNField, UnsignedLeb128Field, SignedLeb128Field 5 | 6 | 7 | def _make_shortcut(klass, *args, **kwargs): 8 | def proxy(**kwargs2): 9 | kwargs.update(kwargs2) 10 | return klass(*args, **kwargs) 11 | return proxy 12 | 13 | 14 | UInt8Field = _make_shortcut(UIntNField, 8) 15 | UInt16Field = _make_shortcut(UIntNField, 16) 16 | UInt32Field = _make_shortcut(UIntNField, 32) 17 | UInt64Field = _make_shortcut(UIntNField, 64) 18 | 19 | VarUInt1Field = _make_shortcut(UnsignedLeb128Field) 20 | VarUInt7Field = _make_shortcut(UnsignedLeb128Field) 21 | VarUInt32Field = _make_shortcut(UnsignedLeb128Field) 22 | 23 | VarInt7Field = _make_shortcut(SignedLeb128Field) 24 | VarInt32Field = _make_shortcut(SignedLeb128Field) 25 | VarInt64Field = _make_shortcut(SignedLeb128Field) 26 | 27 | ElementTypeField = VarInt7Field 28 | ValueTypeField = VarInt7Field 29 | ExternalKindField = UInt8Field 30 | BlockTypeField = VarInt7Field 31 | 32 | 33 | # 34 | # Constants 35 | # 36 | 37 | 38 | # Section types. 39 | SEC_UNK = 0 40 | SEC_TYPE = 1 41 | SEC_IMPORT = 2 42 | SEC_FUNCTION = 3 43 | SEC_TABLE = 4 44 | SEC_MEMORY = 5 45 | SEC_GLOBAL = 6 46 | SEC_EXPORT = 7 47 | SEC_START = 8 48 | SEC_ELEMENT = 9 49 | SEC_CODE = 10 50 | SEC_DATA = 11 51 | SEC_DATACOUNT = 12 52 | SEC_NAME = b'name' 53 | 54 | # Language types. 55 | LANG_TYPE_I32 = -0x01 56 | LANG_TYPE_I64 = -0x02 57 | LANG_TYPE_F32 = -0x03 58 | LANG_TYPE_F64 = -0x04 59 | LANG_TYPE_ANYFUNC = -0x10 60 | LANG_TYPE_FUNC = -0x20 61 | LANG_TYPE_EMPTY = -0x40 62 | 63 | # Value types. 64 | VAL_TYPE_I32 = LANG_TYPE_I32 65 | VAL_TYPE_I64 = LANG_TYPE_I64 66 | VAL_TYPE_F32 = LANG_TYPE_F32 67 | VAL_TYPE_F64 = LANG_TYPE_F64 68 | 69 | # Name subsection types. 70 | NAME_SUBSEC_FUNCTION = 1 71 | NAME_SUBSEC_LOCAL = 2 72 | 73 | # Mutability in global types. 74 | IMMUTABLE = 0 75 | MUTABLE = 1 --------------------------------------------------------------------------------