├── .github ├── dependabot.yml ├── mypy │ └── mypy.ini └── workflows │ ├── python-publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── tests ├── data │ ├── 038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_ │ └── Practical Malware Analysis Lab 01-01.dll_ ├── fixtures.py ├── test_cfg.py ├── test_driver.py └── test_meta.py └── viv_utils ├── __init__.py ├── emulator_drivers.py ├── flirt.py ├── idaloader.py ├── scripts ├── __init__.py ├── get_flirt_matches.py ├── get_function_args.py ├── show_flirt_references.py └── trace_function_emulation.py └── types.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/mypy/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-pefile.*] 4 | ignore_missing_imports = True 5 | 6 | [mypy-viv_utils.*] 7 | ignore_missing_imports = True 8 | 9 | [mypy-flirt.*] 10 | ignore_missing_imports = True 11 | 12 | [mypy-idc.*] 13 | ignore_missing_imports = True 14 | 15 | [mypy-vivisect.*] 16 | ignore_missing_imports = True 17 | 18 | [mypy-envi.*] 19 | ignore_missing_imports = True 20 | 21 | [mypy-visgraph.*] 22 | ignore_missing_imports = True 23 | 24 | [mypy-PE.*] 25 | ignore_missing_imports = True 26 | 27 | [mypy-idaapi.*] 28 | ignore_missing_imports = True 29 | 30 | [mypy-idautils.*] 31 | ignore_missing_imports = True 32 | 33 | [mypy-ida_bytes.*] 34 | ignore_missing_imports = True 35 | 36 | [mypy-ida_kernwin.*] 37 | ignore_missing_imports = True 38 | 39 | [mypy-ida_settings.*] 40 | ignore_missing_imports = True 41 | 42 | [mypy-ida_funcs.*] 43 | ignore_missing_imports = True 44 | 45 | [mypy-ida_loader.*] 46 | ignore_missing_imports = True 47 | 48 | [mypy-ida_ida.*] 49 | ignore_missing_imports = True 50 | 51 | [mypy-ida_nalt.*] 52 | ignore_missing_imports = True 53 | 54 | [mypy-PyQt5.*] 55 | ignore_missing_imports = True 56 | 57 | [mypy-funcy.*] 58 | ignore_missing_imports = True 59 | 60 | [mypy-intervaltree.*] 61 | ignore_missing_imports = True -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 15 | - name: Set up Python 16 | uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 17 | with: 18 | python-version: '3.x' 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install -r requirements.txt 23 | pip install -e .[build] 24 | - name: Build package 25 | run: | 26 | python -m build 27 | - uses: actions/upload-artifact@v4 28 | name: upload 29 | with: 30 | path: dist/* 31 | 32 | pypi-publish: 33 | runs-on: ubuntu-latest 34 | if: "startsWith(github.ref, 'refs/tags/')" 35 | needs: [ build ] 36 | environment: 37 | name: release 38 | permissions: 39 | id-token: write 40 | steps: 41 | - uses: actions/download-artifact@v4 42 | with: 43 | path: artifacts/ 44 | - name: move files to dist/ 45 | run: | 46 | shopt -s globstar # Enable recursive globbing (**) 47 | rm -rf dist/ 48 | mkdir dist/ 49 | cp -r artifacts/**/*.whl dist/ 50 | - name: publish package 51 | uses: pypa/gh-action-pypi-publish@f5622bde02b04381239da3573277701ceca8f6a0 # release/v1 52 | with: 53 | skip-existing: true 54 | verbose: true 55 | print-hash: true 56 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: "*" 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | code_style: 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - name: Checkout viv-utils 14 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 15 | - name: Set up Python 3.13 16 | uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 17 | with: 18 | python-version: "3.13" 19 | - name: Install dependencies 20 | run: | 21 | pip install -r requirements.txt 22 | pip install -e .[dev,flirt] 23 | - name: Lint with isort 24 | run: isort --profile black --length-sort --line-width 120 -c . 25 | - name: Lint with black 26 | run: black -l 120 --check . 27 | - name: Lint with pycodestyle 28 | run: pycodestyle --show-source viv_utils tests 29 | - name: Check types with mypy 30 | run: mypy --config-file .github/mypy/mypy.ini viv_utils/ tests/ 31 | 32 | tests: 33 | name: Tests in ${{ matrix.python-version }} on ${{ matrix.os }} 34 | runs-on: ${{ matrix.os }} 35 | needs: [code_style] 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | os: [ubuntu-22.04, windows-2022, macos-14] 40 | # across all operating systems 41 | python-version: ["3.10", "3.12"] 42 | steps: 43 | - name: Checkout viv-utils 44 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 45 | - name: Set up Python ${{ matrix.python-version }} 46 | uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 47 | with: 48 | python-version: ${{ matrix.python-version }} 49 | - name: Install viv-utils 50 | run: | 51 | pip install -r requirements.txt 52 | pip install -e .[dev,flirt] 53 | - name: Run tests 54 | run: pytest -v tests/ 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | .hypothesis/ 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | #Ipython Notebook 61 | .ipynb_checkpoints 62 | .idea 63 | .direnv 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # viv-utils 2 | Utilities for working with vivisect 3 | 4 | ``` 5 | pip install viv-utils 6 | ``` 7 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "viv_utils" 7 | version="0.8.0" 8 | authors = [ 9 | {name = "Willi Ballenthin", email = "william.ballenthin@mandiant.com"}, 10 | {name = "Moritz Raabe", email = "moritz.raabe@mandiant.com"}, 11 | ] 12 | description="Utilities for binary analysis using vivisect." 13 | readme = {file = "README.md", content-type = "text/markdown"} 14 | license = {file = "LICENSE"} 15 | requires-python = ">=3.9" 16 | keywords = ["malware analysis", "reverse engineering", "emulation", "disassembly"] 17 | classifiers = [ 18 | "Development Status :: 5 - Production/Stable", 19 | "Intended Audience :: Developers", 20 | "Intended Audience :: Information Technology", 21 | "License :: OSI Approved :: Apache Software License", 22 | "Natural Language :: English", 23 | "Programming Language :: Python :: 3", 24 | "Topic :: Security", 25 | ] 26 | dependencies = [ 27 | # --------------------------------------- 28 | # As a library, we use lower version bounds 29 | # when specifying its dependencies. This lets 30 | # other programs find a compatible set of 31 | # dependency versions. 32 | # 33 | # We can optionally pin to specific versions or 34 | # limit the upper bound when there's a good reason; 35 | # but the default is to assume all greater versions 36 | # probably work until proven otherwise. 37 | # 38 | # The following link provides good background: 39 | # https://iscinumpy.dev/post/bound-version-constraints/ 40 | # 41 | # When we develop this library, and when we distribute it as 42 | # a standalone binary, we'll use specific versions 43 | # that are pinned in requirements.txt. 44 | # But the requirements for a library are specified here 45 | # and are looser. 46 | # 47 | # Related discussions: 48 | # 49 | # - https://github.com/mandiant/capa/issues/2053 50 | # - https://github.com/mandiant/capa/pull/2059 51 | # - https://github.com/mandiant/capa/pull/2079 52 | # 53 | # --------------------------------------- 54 | # The following dependency versions were imported 55 | # during June 2024 by truncating specific versions to 56 | # their major-most version (major version when possible, 57 | # or minor otherwise). 58 | # As specific constraints are identified, please provide 59 | # comments and context. 60 | "funcy>=2.0", 61 | "pefile>=2023.2.7", 62 | "vivisect>=1.1.0", 63 | "intervaltree>=3.1.0", 64 | "typing_extensions>=4.5.0", 65 | ] 66 | 67 | [tool.setuptools.packages.find] 68 | include = ["viv_utils*"] 69 | namespaces = false 70 | 71 | [project.optional-dependencies] 72 | flirt = [ 73 | "python-flirt>=0.9.0", 74 | ] 75 | # Dev and build dependencies are not relaxed because 76 | # we want all developer environments to be consistent. 77 | # These dependencies are not used in production environments 78 | # and should not conflict with other libraries/tooling. 79 | dev = [ 80 | "pytest==8.2.2", 81 | "pytest-sugar==1.0.0", 82 | "pytest-instafail==0.5.0", 83 | "pycodestyle==2.12.0", 84 | "black==24.4.2", 85 | "isort==5.13.2", # last version supporting Python 3.7 86 | "mypy==1.11.2", 87 | "types-setuptools==75.2.0.20241019", 88 | ] 89 | build = [ 90 | "setuptools==75.2.0", 91 | "build==1.2.1" 92 | ] 93 | 94 | [project.urls] 95 | Homepage = "https://github.com/williballenthin/viv-utils" 96 | Repository = "https://github.com/williballenthin/viv-utils.git" 97 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black==24.4.2 2 | build==1.2.1 3 | click==8.1.7 4 | cxxfilt==0.2.2 5 | funcy==2.0 6 | iniconfig==2.0.0 7 | intervaltree==3.1.0 8 | isort==5.13.2 9 | msgpack==1.0.8 10 | mypy==1.11.2 11 | packaging==24.1 12 | pathspec==0.12.1 13 | pefile==2023.2.7 14 | pip==24.2 15 | platformdirs==4.2.2 16 | pluggy==1.5.0 17 | pyasn1==0.4.8 18 | pyasn1-modules==0.2.8 19 | pycodestyle==2.12.0 20 | pycparser==2.22 21 | pyproject-hooks==1.1.0 22 | pytest==8.2.2 23 | pytest-instafail==0.5.0 24 | pytest-sugar==1.0.0 25 | python-flirt==0.9.2 26 | setuptools==75.2.0 27 | sortedcontainers==2.4.0 28 | termcolor==2.4.0 29 | types-setuptools==75.2.0.20241019 30 | vivisect==1.1.1 31 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [aliases] 5 | test = pytest 6 | 7 | [pycodestyle] 8 | # the following suppress lints that conflict with the project's style: 9 | # 10 | # E203: whitespace before : (from black) 11 | ignore = E203 12 | max-line-length = 180 13 | statistics = True 14 | -------------------------------------------------------------------------------- /tests/data/038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/viv-utils/50ea43422c49539fea5a48f74d0935b7fef131f3/tests/data/038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_ -------------------------------------------------------------------------------- /tests/data/Practical Malware Analysis Lab 01-01.dll_: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/viv-utils/50ea43422c49539fea5a48f74d0935b7fef131f3/tests/data/Practical Malware Analysis Lab 01-01.dll_ -------------------------------------------------------------------------------- /tests/fixtures.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | import viv_utils 6 | 7 | CD = Path(__file__).parent 8 | DATA = CD / "data" 9 | 10 | 11 | @pytest.fixture 12 | def pma01(): 13 | return viv_utils.getWorkspace(str(DATA / "Practical Malware Analysis Lab 01-01.dll_"), should_save=False) 14 | 15 | 16 | @pytest.fixture 17 | def sample_038476(): 18 | return viv_utils.getWorkspace( 19 | str(DATA / "038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_"), should_save=False 20 | ) 21 | -------------------------------------------------------------------------------- /tests/test_cfg.py: -------------------------------------------------------------------------------- 1 | from fixtures import * 2 | 3 | 4 | def test_prev_loc(pma01): 5 | # start of insn 6 | assert viv_utils.get_prev_loc(pma01, 0x10001015)[0] == 0x10001010 7 | # middle of insn 8 | assert viv_utils.get_prev_loc(pma01, 0x10001016)[0] == 0x10001010 9 | # undefined location, directly after loc 10 | assert viv_utils.get_prev_loc(pma01, 0x100011FA)[0] == 0x100011F7 11 | 12 | 13 | def test_prev_opcode(pma01): 14 | assert viv_utils.get_prev_opcode(pma01, 0x10001015).va == 0x10001010 15 | assert viv_utils.get_prev_opcode(pma01, 0x10001016).va == 0x10001010 16 | 17 | 18 | def test_all_xrefs_from(pma01): 19 | # mov eax, 11F8h 20 | # single xref: fallthrough 21 | assert len(list(viv_utils.get_all_xrefs_from(pma01, 0x10001010))) == 1 22 | 23 | # jnz loc_100011E8 24 | # two xrefs: fallthrough and conditional jump 25 | assert len(list(viv_utils.get_all_xrefs_from(pma01, 0x10001028))) == 2 26 | 27 | 28 | def test_all_xrefs_to(pma01): 29 | # single xref: fallthrough 30 | assert len(list(viv_utils.get_all_xrefs_to(pma01, 0x10001015))) == 1 31 | 32 | # four xrefs: fallthrough and three jumps 33 | assert len(list(viv_utils.get_all_xrefs_to(pma01, 0x100011E8))) == 4 34 | 35 | 36 | def test_cfg(pma01): 37 | f = viv_utils.Function(pma01, 0x10001010) 38 | cfg = viv_utils.CFG(f) 39 | 40 | roots = list(cfg.get_root_basic_blocks()) 41 | assert len(roots) == 1 42 | root = roots[0] 43 | assert int(root) == 0x10001010 44 | 45 | tails = list(cfg.get_leaf_basic_blocks()) 46 | assert len(tails) == 1 47 | 48 | tail = tails[0] 49 | assert int(tail) == 0x100011E8 50 | 51 | assert len(list(cfg.get_predecessor_basic_blocks(tail))) == 4 52 | 53 | 54 | def test_bad_cfg(pma01): 55 | with pytest.raises(ValueError): 56 | _ = viv_utils.CFG(viv_utils.Function(pma01, 0x10002000)) 57 | -------------------------------------------------------------------------------- /tests/test_driver.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | import envi.const 4 | from fixtures import * 5 | 6 | import viv_utils.emulator_drivers as vudrv 7 | 8 | 9 | class LoggingMonitor(vudrv.Monitor): 10 | """log the emulated addresses""" 11 | 12 | def prehook(self, emu, op, startpc): 13 | print("emu: 0x%x %s" % (startpc, op)) 14 | 15 | def preblock(self, emu, blockstart): 16 | print("emu: block: start: 0x%x" % (blockstart)) 17 | 18 | def postblock(self, emu, blockstart, blockend): 19 | print("emu: block: 0x%x - 0x%x" % (blockstart, blockend)) 20 | 21 | 22 | class CoverageMonitor(vudrv.Monitor): 23 | """capture the emulated addresses""" 24 | 25 | def __init__(self, *args, **kwargs): 26 | super().__init__(*args, **kwargs) 27 | self.addresses = collections.Counter() 28 | 29 | def prehook(self, emu, op, startpc): 30 | self.addresses[startpc] += 1 31 | 32 | 33 | def test_driver_monitor(pma01): 34 | emu = pma01.getEmulator() 35 | drv = vudrv.DebuggerEmulatorDriver(emu) 36 | cov = CoverageMonitor() 37 | drv.add_monitor(cov) 38 | 39 | # 10001010 B8 F8 11 00 00 mov eax, 11F8h 40 | # 10001015 E8 06 02 00 00 call __alloca_probe 41 | 42 | drv.setProgramCounter(0x10001010) 43 | drv.stepi() 44 | assert drv.getProgramCounter() == 0x10001015 45 | 46 | assert 0x10001010 in cov.addresses 47 | assert 0x10001015 not in cov.addresses 48 | 49 | 50 | def test_dbg_driver_stepi(pma01): 51 | emu = pma01.getEmulator() 52 | drv = vudrv.DebuggerEmulatorDriver(emu) 53 | 54 | # .text:10001342 57 push edi 55 | # .text:10001343 56 push esi ; fdwReason 56 | # .text:10001344 53 push ebx ; hinstDLL 57 | # .text:10001345 E8 C6 FC FF FF call DllMain (0x10001010) 58 | # .text:1000134A 83 FE 01 cmp esi, 1 59 | drv.setProgramCounter(0x10001342) 60 | drv.stepi() 61 | drv.stepi() 62 | drv.stepi() 63 | drv.stepi() 64 | assert drv.getProgramCounter() == 0x10001010 65 | 66 | 67 | def test_dbg_driver_stepo(pma01): 68 | emu = pma01.getEmulator() 69 | drv = vudrv.DebuggerEmulatorDriver(emu) 70 | 71 | # .text:10001342 57 push edi 72 | # .text:10001343 56 push esi ; fdwReason 73 | # .text:10001344 53 push ebx ; hinstDLL 74 | # .text:10001345 E8 C6 FC FF FF call DllMain (0x10001010) 75 | # .text:1000134A 83 FE 01 cmp esi, 1 76 | drv.setProgramCounter(0x10001342) 77 | drv.stepo() 78 | drv.stepo() 79 | drv.stepo() 80 | drv.stepo() 81 | assert drv.getProgramCounter() == 0x1000134A 82 | 83 | 84 | class CreateMutexAHook: 85 | """capture the mutex names passed to CreateMutexA""" 86 | 87 | def __init__(self, *args, **kwargs): 88 | super().__init__(*args, **kwargs) 89 | self.mutexes = set() 90 | 91 | def __call__(self, emu, api, argv): 92 | _, _, cconv, name, _ = api 93 | 94 | if name != "kernel32.CreateMutexA": 95 | return 96 | 97 | mutex = emu.readString(argv[2]) 98 | self.mutexes.add(mutex) 99 | 100 | cconv = emu.getCallingConvention(cconv) 101 | cconv.execCallReturn(emu, 0, len(argv)) 102 | 103 | return True 104 | 105 | 106 | def test_driver_hook(pma01): 107 | emu = pma01.getEmulator() 108 | drv = vudrv.DebuggerEmulatorDriver(emu) 109 | hk = CreateMutexAHook() 110 | drv.add_hook(hk) 111 | 112 | # .text:10001067 68 38 60 02 10 push offset Name ; "SADFHUHF" 113 | # .text:1000106C 50 push eax ; bInitialOwner 114 | # .text:1000106D 50 push eax ; lpMutexAttributes 115 | # .text:1000106E FF 15 08 20 00 10 call ds:CreateMutexA 116 | # .text:10001074 8D 4C 24 78 lea ecx, [esp+1208h+var_1190] 117 | 118 | drv.setProgramCounter(0x10001067) 119 | drv.stepi() 120 | drv.stepi() 121 | drv.stepi() 122 | drv.stepi() 123 | assert drv.getProgramCounter() == 0x10001074 124 | assert "SADFHUHF" in hk.mutexes 125 | 126 | 127 | def protect_memory(imem, va, size, perms): 128 | # see: https://github.com/vivisect/vivisect/issues/511 129 | maps = imem._map_defs 130 | for i in range(len(maps)): 131 | map = maps[i] 132 | start, end, mmap, bytez = map 133 | mva, msize, mperms, mfilename = mmap 134 | 135 | if mva == va and msize == size: 136 | maps[i] = [start, end, [mva, msize, perms, mfilename], bytez] 137 | return 138 | 139 | raise KeyError("unknown memory map: 0x%x (0x%x bytes)", va, size) 140 | 141 | 142 | def test_driver_hook_tailjump(pma01): 143 | # patch: 144 | # 145 | # .text:10001067 68 38 60 02 10 push offset Name ; "SADFHUHF" 146 | # .text:1000106C 50 push eax ; bInitialOwner 147 | # .text:1000106D 50 push eax ; lpMutexAttributes 148 | # .text:1000106E FF 15 08 20 00 10 call ds:CreateMutexA 149 | # .text:10001074 8D 4C 24 78 lea ecx, [esp+1208h+var_1190] 150 | # 151 | # to: 152 | # 153 | # .text:10001067 68 38 60 02 10 push offset Name ; "SADFHUHF" 154 | # .text:1000106C 50 push eax ; bInitialOwner 155 | # .text:1000106D 50 push eax ; lpMutexAttributes 156 | # .text:1000106E 68 79 10 00 10 push offset loc_10001079 157 | # .text:10001073 FF 25 08 20 00 10 jmp ds:CreateMutexA 158 | # .text:10001079 ... ... 159 | # 160 | # so that we have a tail jump to `CreateMutexA` (but with the return address on the stack). 161 | # the hook handler should pick up on this, and handle the transition to `CreateMutexA` as a call. 162 | # 163 | # note: we have to patch the vw, because patching emu mem doesn't work. 164 | # the emu instance reads opcodes from the vw not emu memory. 165 | # see: https://github.com/vivisect/vivisect/issues/512 166 | vw = pma01 167 | mapva, size, perms, filename = vw.getMemoryMap(0x1000106E) 168 | protect_memory(vw, mapva, size, envi.const.MM_RWX) 169 | vw.writeMemory(0x1000106E, bytes.fromhex("68 79 10 00 10 FF 25 08 20 00 10")) 170 | vw.clearOpcache() 171 | assert vw.parseOpcode(0x1000106E).mnem == "push" 172 | assert vw.parseOpcode(0x10001073).mnem == "jmp" 173 | protect_memory(vw, mapva, size, perms) 174 | 175 | emu = vw.getEmulator() 176 | drv = vudrv.DebuggerEmulatorDriver(emu) 177 | hk = CreateMutexAHook() 178 | drv.add_hook(hk) 179 | 180 | drv.setProgramCounter(0x10001067) 181 | drv.stepi() 182 | drv.stepi() 183 | drv.stepi() 184 | drv.stepi() 185 | assert drv.parseOpcode(drv.getProgramCounter()).mnem == "jmp" 186 | drv.stepi() 187 | assert drv.getProgramCounter() == 0x10001079 188 | assert "SADFHUHF" in hk.mutexes 189 | 190 | 191 | def test_dbg_driver_max_insn(pma01): 192 | emu = pma01.getEmulator() 193 | drv = vudrv.DebuggerEmulatorDriver(emu, max_insn=1) 194 | 195 | # .text:10001342 57 push edi 196 | # .text:10001343 56 push esi ; fdwReason 197 | # .text:10001344 53 push ebx ; hinstDLL 198 | # .text:10001345 E8 C6 FC FF FF call DllMain (0x10001010) 199 | # .text:1000134A 83 FE 01 cmp esi, 1 200 | drv.setProgramCounter(0x10001342) 201 | with pytest.raises(vudrv.BreakpointHit) as e: 202 | drv.run() 203 | assert e.value.reason == "max_insn" 204 | assert drv.getProgramCounter() == 0x10001343 205 | 206 | 207 | def test_dbg_driver_bp(pma01): 208 | emu = pma01.getEmulator() 209 | drv = vudrv.DebuggerEmulatorDriver(emu) 210 | 211 | # .text:10001342 57 push edi 212 | # .text:10001343 56 push esi ; fdwReason 213 | # .text:10001344 53 push ebx ; hinstDLL 214 | # .text:10001345 E8 C6 FC FF FF call DllMain (0x10001010) 215 | # .text:1000134A 83 FE 01 cmp esi, 1 216 | drv.setProgramCounter(0x10001342) 217 | drv.breakpoints.add(0x10001344) 218 | with pytest.raises(vudrv.BreakpointHit) as e: 219 | drv.run() 220 | assert e.value.reason == "breakpoint" 221 | assert drv.getProgramCounter() == 0x10001344 222 | 223 | 224 | def test_dbg_driver_until_mnem(pma01): 225 | emu = pma01.getEmulator() 226 | drv = vudrv.DebuggerEmulatorDriver(emu) 227 | 228 | # .text:10001342 57 push edi 229 | # .text:10001343 56 push esi ; fdwReason 230 | # .text:10001344 53 push ebx ; hinstDLL 231 | # .text:10001345 E8 C6 FC FF FF call DllMain (0x10001010) 232 | # .text:1000134A 83 FE 01 cmp esi, 1 233 | drv.setProgramCounter(0x10001342) 234 | with pytest.raises(vudrv.BreakpointHit) as e: 235 | drv.run_to_mnem(["call"]) 236 | assert e.value.reason == "mnemonic" 237 | assert drv.getProgramCounter() == 0x10001345 238 | 239 | 240 | def test_dbg_driver_until_va(pma01): 241 | emu = pma01.getEmulator() 242 | drv = vudrv.DebuggerEmulatorDriver(emu) 243 | 244 | # .text:10001342 57 push edi 245 | # .text:10001343 56 push esi ; fdwReason 246 | # .text:10001344 53 push ebx ; hinstDLL 247 | # .text:10001345 E8 C6 FC FF FF call DllMain (0x10001010) 248 | # .text:1000134A 83 FE 01 cmp esi, 1 249 | drv.setProgramCounter(0x10001342) 250 | drv.run_to_va(0x10001344) 251 | assert drv.getProgramCounter() == 0x10001344 252 | 253 | 254 | def test_fc_driver(pma01): 255 | emu = pma01.getEmulator() 256 | vudrv.remove_default_viv_hooks(emu) 257 | drv = vudrv.FullCoverageEmulatorDriver(emu) 258 | cov = CoverageMonitor() 259 | drv.add_monitor(cov) 260 | 261 | drv.run(0x10001010) 262 | 263 | # each instruction should have been emulated exactly once. 264 | assert list(set(cov.addresses.values())) == [1] 265 | 266 | # there's a call to __alloca_probe, 267 | # however, we should not have emulated into its body. 268 | # 269 | # .text:10001010 B8 F8 11 00 00 mov eax, 11F8h 270 | # .text:10001015 E8 06 02 00 00 call __alloca_probe == 0x10001220 271 | assert 0x10001220 not in cov.addresses 272 | 273 | # these are a selection of addresses from the function 274 | # pulled from IDA manually. 275 | for va in [ 276 | 0x10001010, 277 | 0x10001033, 278 | 0x10001086, 279 | 0x100010E9, 280 | 0x100011D0, 281 | 0x100011DB, 282 | 0x100011E2, 283 | 0x100011E8, 284 | 0x100011F7, 285 | ]: 286 | assert va in cov.addresses 287 | 288 | 289 | def test_fc_driver_jmp_bb_ends(sample_038476): 290 | emu = sample_038476.getEmulator() 291 | vudrv.remove_default_viv_hooks(emu) 292 | drv = vudrv.FullCoverageEmulatorDriver(emu) 293 | cov = CoverageMonitor() 294 | drv.add_monitor(cov) 295 | 296 | # at the end of basic blocks there's a jump to the next block 297 | # don't confuse this with a tail jump / API call and emulate the entire function 298 | # with a fauly handle_jmp, emulation would end after the first basic block 299 | # 300 | # example snippit: 301 | # .text:00401842 E9 04 00 00 00 jmp loc_40184B 302 | # .text:00401847 ; --------------------------------------------------------------------------- 303 | # .text:00401847 9B wait 304 | # .text:00401848 9B wait 305 | # .text:00401849 9B wait 306 | # .text:0040184A 9B wait 307 | # .text:0040184B 308 | # .text:0040184B loc_40184B: 309 | # .text:0040184B E9 04 00 00 00 jmp loc_401854 310 | # .text:00401850 ; --------------------------------------------------------------------------- 311 | # .text:00401850 9B wait 312 | # .text:00401851 9B wait 313 | # .text:00401852 9B wait 314 | # .text:00401853 9B wait 315 | # .text:00401854 316 | # .text:00401854 loc_401854: 317 | # .text:00401854 C7 45 E8 00 00 00+ mov [ebp+var_18], 0 318 | drv.run(0x401830) 319 | 320 | # these are a selection of random addresses from the function 321 | # pulled from IDA manually. 322 | for va in [ 323 | 0x40184B, 324 | 0x40185B, 325 | 0x4019C2, 326 | 0x401A1D, 327 | 0x401A3C, 328 | 0x401A68, 329 | 0x401B96, 330 | 0x401C55, 331 | 0x401E79, 332 | 0x401ED2, 333 | ]: 334 | assert va in cov.addresses 335 | 336 | 337 | def test_fc_driver_rep(pma01): 338 | class LocalMonitor(vudrv.Monitor): 339 | """capture the value of ecx at 0x100010FA""" 340 | 341 | def __init__(self, *args, **kwargs): 342 | super().__init__(*args, **kwargs) 343 | self.ecx = -1 344 | 345 | def prehook(self, emu, op, startpc): 346 | if startpc == 0x100010FA: 347 | self.ecx = emu.getRegisterByName("ecx") 348 | 349 | REPMAX = 0x70 350 | emu = pma01.getEmulator() 351 | vudrv.remove_default_viv_hooks(emu) 352 | drv = vudrv.FullCoverageEmulatorDriver(emu, repmax=REPMAX) 353 | mon = LocalMonitor() 354 | drv.add_monitor(mon) 355 | 356 | drv.run(0x10001010) 357 | 358 | # should be strlen("hello") 359 | # however viv doesn't correctly handle repnz with a repmax option. 360 | # see: https://github.com/vivisect/vivisect/pull/513 361 | # 362 | # instead we have 0xFFFFFFFF - repmax - strlen("hello") 363 | assert mon.ecx in ( 364 | # correct answer 365 | len("hello"), 366 | # buggy viv answer 367 | 0xFFFFFFFF - REPMAX + len("hello"), 368 | ) 369 | 370 | 371 | def test_dbg_driver_rep(pma01): 372 | REPMAX = 0x70 373 | 374 | emu = pma01.getEmulator() 375 | drv = vudrv.DebuggerEmulatorDriver(emu, repmax=REPMAX) 376 | 377 | # .text:100010E9 BF 20 60 02 10 mov edi, offset aHello ; "hello" 378 | # .text:100010EE 83 C9 FF or ecx, 0FFFFFFFFh 379 | # .text:100010F1 33 C0 xor eax, eax 380 | # .text:100010F3 6A 00 push 0 381 | # .text:100010F5 F2 AE repne scasb 382 | # .text:100010F7 F7 D1 not ecx 383 | # .text:100010F9 49 dec ecx 384 | # .text:100010FA 51 push ecx 385 | drv.setProgramCounter(0x100010E9) 386 | 387 | drv.stepi() 388 | drv.stepi() 389 | drv.stepi() 390 | drv.stepi() 391 | assert drv.getProgramCounter() == 0x100010F5 392 | assert drv.getRegisterByName("edi") == 0x10026020 393 | assert drv.readString(0x10026020) == "hello" 394 | assert drv.getRegisterByName("eax") == 0x0 395 | assert drv.getRegisterByName("ecx") == 0xFFFFFFFF 396 | 397 | drv.stepi() 398 | # should be 0xFFFFFFFF - strlen("hello") 399 | # however viv doesn't correctly handle repnz with a repmax option. 400 | # see: https://github.com/vivisect/vivisect/pull/513 401 | # 402 | # instead we have repmax - strlen("hello") 403 | assert drv.getRegisterByName("ecx") in ( 404 | # correct answer 405 | 0xFFFFFFFF - len("hello\x00"), 406 | # buggy viv answer 407 | REPMAX - len("hello\x00"), 408 | ) 409 | 410 | drv.stepi() 411 | drv.stepi() 412 | 413 | assert drv.getRegisterByName("ecx") in ( 414 | # correct answer 415 | len("hello"), 416 | # buggy viv answer 417 | 0xFFFFFFFF - REPMAX + len("hello"), 418 | ) 419 | 420 | 421 | def test_dbg_driver_maxhit(pma01): 422 | emu = pma01.getEmulator() 423 | vudrv.remove_default_viv_hooks(emu) 424 | drv = vudrv.DebuggerEmulatorDriver(emu) 425 | cov = CoverageMonitor() 426 | drv.add_monitor(cov) 427 | 428 | # .text:10001010 B8 F8 11 00 00 mov eax, 11F8h 429 | # .text:10001015 E8 06 02 00 00 call __alloca_probe 430 | # .text:1000101A 8B 84 24 00 12 00 00 mov eax, [esp+11F8h+fdwReason] 431 | # 432 | # and __alloca_probe loops across pages on the stack, like: 433 | # 434 | # .text:1000122A 72 14 jb short loc_10001240 435 | # .text:1000122C 436 | # .text:1000122C 81 E9 00 10 00 00 sub ecx, 1000h 437 | # .text:10001232 2D 00 10 00 00 sub eax, 1000h 438 | # .text:10001237 85 01 test [ecx], eax 439 | # .text:10001239 3D 00 10 00 00 cmp eax, 1000h 440 | # .text:1000123E 73 EC jnb short loc_1000122C 441 | # .text:10001240 442 | # .text:10001240 2B C8 sub ecx, eax 443 | drv.setProgramCounter(0x10001015) 444 | # alloca(0x2000): two probing loops 445 | drv.setRegisterByName("eax", 0x2000) 446 | drv.run_to_va(0x1000101A) 447 | 448 | # outside the loop: hit once 449 | assert cov.addresses[0x1000122A] == 1 450 | # inside the loop: hit twice 451 | assert cov.addresses[0x1000122C] == 2 452 | 453 | drv = vudrv.DebuggerEmulatorDriver(emu, max_hit=2) 454 | drv.setProgramCounter(0x10001015) 455 | drv.setRegisterByName("eax", 0x2000) 456 | drv.run_to_va(0x1000101A) 457 | 458 | drv = vudrv.DebuggerEmulatorDriver(emu, max_hit=1) 459 | drv.setProgramCounter(0x10001015) 460 | drv.setRegisterByName("eax", 0x2000) 461 | with pytest.raises(vudrv.BreakpointHit) as e: 462 | drv.run_to_va(0x1000101A) 463 | 464 | # first address in the inner loop 465 | # which will be hit twice, and therefore, break. 466 | assert e.value.va == 0x1000122C 467 | assert e.value.reason == "max_hit" 468 | -------------------------------------------------------------------------------- /tests/test_meta.py: -------------------------------------------------------------------------------- 1 | from fixtures import * 2 | 3 | 4 | def test_md5(pma01): 5 | assert viv_utils.getVwSampleMd5(pma01) == "290934c61de9176ad682ffdd65f0a669" 6 | 7 | 8 | def test_name(pma01): 9 | viv_utils.set_function_name(pma01, 0x10001010, "DllMain") 10 | assert viv_utils.get_function_name(pma01, 0x10001010) 11 | 12 | 13 | def test_function(pma01): 14 | f = viv_utils.Function(pma01, 0x10001010) 15 | 16 | assert f.id == "290934c61de9176ad682ffdd65f0a669:0x10001010" 17 | assert int(f) == 0x10001010 18 | 19 | assert f.name is None 20 | f.name = "DllMain" 21 | assert f.name == "DllMain" 22 | 23 | assert len(list(f.basic_blocks)) == 19 24 | assert list(sorted(map(int, f.basic_blocks))) == [ 25 | 0x10001010, 26 | 0x1000102E, 27 | 0x10001067, 28 | 0x1000108C, 29 | 0x100010A3, 30 | 0x100010DD, 31 | 0x100010E9, 32 | 0x10001110, 33 | 0x10001122, 34 | 0x1000113C, 35 | 0x10001154, 36 | 0x10001161, 37 | 0x10001179, 38 | 0x100011B6, 39 | 0x100011C0, 40 | 0x100011D0, 41 | 0x100011DB, 42 | 0x100011E2, 43 | 0x100011E8, 44 | ] 45 | 46 | bb = list(f.basic_blocks)[0] 47 | assert int(bb) == 0x10001010 48 | assert len(bb) == 0x1E 49 | 50 | assert len(list(bb.instructions)) == 9 51 | insn = list(bb.instructions)[0] 52 | 53 | assert insn.mnem == "mov" 54 | 55 | 56 | def test_function_name(pma01): 57 | assert viv_utils.getFunctionName(pma01, 0x10001398) == "msvcrt._initterm" 58 | 59 | 60 | def test_function_cconv(pma01): 61 | assert viv_utils.getFunctionCallingConvention(pma01, 0x10001398) == "cdecl" 62 | 63 | 64 | def test_function_args(pma01): 65 | assert len(viv_utils.getFunctionArgs(pma01, 0x10001398)) == 2 66 | -------------------------------------------------------------------------------- /viv_utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import struct 4 | import hashlib 5 | import logging 6 | import tempfile 7 | import textwrap 8 | import importlib.metadata 9 | from typing import Any, Dict, List, Tuple, Iterator 10 | 11 | import envi 12 | import funcy 13 | import vivisect 14 | import intervaltree 15 | import vivisect.const 16 | 17 | from viv_utils.types import * 18 | from viv_utils.idaloader import loadWorkspaceFromIdb 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | SHELLCODE_BASE = 0x690000 24 | 25 | 26 | class IncompatibleVivVersion(ValueError): 27 | pass 28 | 29 | 30 | def getVwFirstMeta(vw: Workspace) -> Dict[str, Any]: 31 | # return the first set of metadata from the vw. 32 | # this is for the first loaded file. 33 | # if other files have been added to the vw, 34 | # then this may not do what you want. 35 | return list(vw.filemeta.values())[0] 36 | 37 | 38 | def getVwSampleMd5(vw: Workspace) -> str: 39 | return getVwFirstMeta(vw)["md5sum"] 40 | 41 | 42 | # while building and testing capa, 43 | # we found that upstream changes to vivisect did not play well with existing serialized vivisect analysis results. 44 | # this manifested as confusing or incorrect .viv file contents - and our tests would suddenly fail. 45 | # so, 46 | # we embed the installed vivisect library version in vivisect workspaces created by viv-utils. 47 | # when we load a .viv, then we assert that the versions match. 48 | # if they don't, emit a warning. 49 | # ideally, we'd bail, but the vivisect distribution situation is already a mess, so let's not further touch that. 50 | # to minimize unexpected dependencies this check is ignored if a package does not embed the vivisect version 51 | 52 | 53 | def getVivisectLibraryVersion() -> str: 54 | # ref: https://stackoverflow.com/questions/710609/checking-a-python-module-version-at-runtime 55 | try: 56 | return importlib.metadata.distribution("vivisect").version 57 | except importlib.metadata.PackageNotFoundError: 58 | logger.debug("package does not include vivisect distribution") 59 | return "N/A" 60 | 61 | 62 | def setVwVivisectLibraryVersion(vw: Workspace): 63 | vw.setMeta("version", getVivisectLibraryVersion()) 64 | 65 | 66 | def getVwVivisectLibraryVersion(vw) -> str: 67 | return vw.getMeta("version") 68 | 69 | 70 | def assertVwMatchesVivisectLibrary(vw): 71 | wanted = getVivisectLibraryVersion() 72 | found = getVwVivisectLibraryVersion(vw) 73 | if wanted != found: 74 | logger.warning("vivisect version mismatch! wanted: %s, found: %s", wanted, found) 75 | else: 76 | logger.debug("vivisect version match: %s", wanted) 77 | 78 | 79 | def loadWorkspaceFromViv(vw: Workspace, viv_file): 80 | if sys.version_info >= (3, 0): 81 | try: 82 | vw.loadWorkspace(viv_file) 83 | except UnicodeDecodeError as e: 84 | raise IncompatibleVivVersion( 85 | "'%s' is an invalid .viv file. It may have been generated with Python 2 (incompatible with Python 3)." 86 | % viv_file 87 | ) 88 | else: 89 | vw.loadWorkspace(viv_file) 90 | 91 | 92 | def getWorkspace(fp: str, analyze=True, reanalyze=False, verbose=False, should_save=True) -> Workspace: 93 | """ 94 | For a file path return a workspace, it will create one if the extension 95 | is not .viv, otherwise it will load the existing one. Reanalyze will cause 96 | it to create and save a new one. 97 | """ 98 | vw = Workspace() 99 | vw.verbose = verbose 100 | # this is pretty insane, but simply prop assignment doesn't work. 101 | vw.config.getSubConfig("viv").getSubConfig("parsers").getSubConfig("pe")["loadresources"] = True 102 | vw.config.getSubConfig("viv").getSubConfig("parsers").getSubConfig("pe")["nx"] = True 103 | if fp.endswith(".viv"): 104 | loadWorkspaceFromViv(vw, fp) 105 | assertVwMatchesVivisectLibrary(vw) 106 | if reanalyze: 107 | setVwVivisectLibraryVersion(vw) 108 | vw.analyze() 109 | else: 110 | viv_file = fp + ".viv" 111 | if os.path.exists(viv_file): 112 | loadWorkspaceFromViv(vw, viv_file) 113 | assertVwMatchesVivisectLibrary(vw) 114 | if reanalyze: 115 | setVwVivisectLibraryVersion(vw) 116 | vw.analyze() 117 | else: 118 | vw.loadFromFile(fp) 119 | setVwVivisectLibraryVersion(vw) 120 | if analyze: 121 | vw.analyze() 122 | 123 | if should_save: 124 | vw.saveWorkspace() 125 | 126 | return vw 127 | 128 | 129 | def set_function_name(vw, va: int, new_name: str): 130 | # vivgui seems to override function_name with symbol names, but this is correct 131 | ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(va) 132 | vw.setFunctionApi(va, (ret_type, ret_name, call_conv, new_name, args)) 133 | 134 | 135 | def get_function_name(vw, va: int) -> str: 136 | ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(va) 137 | return func_name 138 | 139 | 140 | class Function: 141 | def __init__(self, vw: Workspace, va: int): 142 | super(Function, self).__init__() 143 | self.vw = vw 144 | self.va = va 145 | 146 | @funcy.cached_property 147 | def basic_blocks(self) -> List["BasicBlock"]: 148 | bb = map(lambda b: BasicBlock(self.vw, *b), self.vw.getFunctionBlocks(self.va)) 149 | return list(sorted(bb, key=lambda b: b.va)) 150 | 151 | @funcy.cached_property 152 | def id(self): 153 | return getVwFirstMeta(self.vw)["md5sum"] + ":" + hex(self.va) 154 | 155 | def __repr__(self): 156 | return "Function(va: {:s})".format(hex(self.va)) 157 | 158 | def __int__(self): 159 | return self.va 160 | 161 | @property 162 | def name(self): 163 | return get_function_name(self.vw, self.va) 164 | 165 | @name.setter 166 | def name(self, new_name): 167 | return set_function_name(self.vw, self.va, new_name) 168 | 169 | 170 | class BasicBlock: 171 | def __init__(self, vw: Workspace, va: int, size: int, fva: int): 172 | super(BasicBlock, self).__init__() 173 | self.vw = vw 174 | self.va = va 175 | self.size = size 176 | self.fva = fva 177 | 178 | @funcy.cached_property 179 | def instructions(self) -> List[envi.Opcode]: 180 | """ 181 | from envi/__init__.py:class Opcode 182 | 391 opcode - An architecture specific numerical value for the opcode 183 | 392 mnem - A humon readable mnemonic for the opcode 184 | 393 prefixes - a bitmask of architecture specific instruction prefixes 185 | 394 size - The size of the opcode in bytes 186 | 395 operands - A list of Operand objects for this opcode 187 | 396 iflags - A list of Envi (architecture independant) instruction flags (see IF_FOO) 188 | 397 va - The virtual address the instruction lives at (used for PC relative im mediates etc...) 189 | """ 190 | ret = [] 191 | va = self.va 192 | while va < self.va + self.size: 193 | try: 194 | o = self.vw.parseOpcode(va) 195 | except Exception as e: 196 | logger.debug("failed to disassemble: %s: %s", hex(va), e) 197 | break 198 | ret.append(o) 199 | va += len(o) 200 | return ret 201 | 202 | def __repr__(self): 203 | return "BasicBlock(va: {:s}, size: {:s}, fva: {:s})".format(hex(self.va), hex(self.size), hex(self.fva)) 204 | 205 | def __int__(self): 206 | return self.va 207 | 208 | def __len__(self): 209 | return self.size 210 | 211 | 212 | def one(s): 213 | for i in s: 214 | return i 215 | 216 | 217 | class InstructionFunctionIndex: 218 | """Index from VA to containing function VA""" 219 | 220 | def __init__(self, vw: Workspace): 221 | super(InstructionFunctionIndex, self).__init__() 222 | self.vw = vw 223 | self._index = intervaltree.IntervalTree() 224 | self._do_index() 225 | 226 | def _do_index(self): 227 | for funcva in self.vw.getFunctions(): 228 | f = Function(self.vw, funcva) 229 | for bb in f.basic_blocks: 230 | if bb.size == 0: 231 | continue 232 | self._index[bb.va : bb.va + bb.size] = funcva 233 | 234 | def __getitem__(self, key): 235 | v = one(self._index[key]) 236 | if v is None: 237 | raise KeyError() 238 | return v.data 239 | 240 | 241 | def getFunctionName(vw: Workspace, fva: Address): 242 | ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(fva) 243 | return func_name 244 | 245 | 246 | def getFunctionCallingConvention(vw: Workspace, fva: Address): 247 | ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(fva) 248 | return call_conv 249 | 250 | 251 | def getFunctionArgs(vw: Workspace, fva: Address): 252 | return vw.getFunctionArgs(fva) 253 | 254 | 255 | def getShellcodeWorkspaceFromFile( 256 | filepath: str, arch: str, base: Address = SHELLCODE_BASE, entry_point: Address = 0, analyze=True, should_save=False 257 | ) -> Workspace: 258 | with open(filepath, "rb") as f: 259 | sample_bytes = f.read() 260 | 261 | vw = getShellcodeWorkspace( 262 | sample_bytes, arch, base=base, entry_point=entry_point, analyze=analyze, should_save=should_save 263 | ) 264 | 265 | vw.setMeta("StorageName", "%s.viv" % filepath) 266 | 267 | return vw 268 | 269 | 270 | def getShellcodeWorkspace( 271 | buf: bytes, 272 | arch: str, 273 | base: Address = SHELLCODE_BASE, 274 | entry_point: Address = 0, 275 | analyze=True, 276 | should_save=False, 277 | save_path=None, 278 | ) -> Workspace: 279 | """ 280 | Load shellcode into memory object and generate vivisect workspace. 281 | Thanks to Tom for most of the code. 282 | 283 | Arguments: 284 | buf: shellcode buffer bytes 285 | arch: architecture string 286 | base: base address where shellcode will be loaded 287 | entry_point: entry point of shellcode, relative to base 288 | analyze: analyze workspace or otherwise leave it to caller 289 | should_save: save workspace to disk 290 | save_path: path to save workspace to 291 | 292 | Returns: vivisect workspace 293 | """ 294 | md5 = hashlib.md5() 295 | md5.update(buf) 296 | 297 | vw = Workspace() 298 | vw.addFile("shellcode", base, md5.hexdigest()) 299 | vw.setMeta("Architecture", arch) 300 | vw.setMeta("Platform", "windows") 301 | # blob gives weaker results in some cases 302 | # so we will update this below 303 | vw.setMeta("Format", "pe") 304 | vw._snapInAnalysisModules() 305 | 306 | vw.addMemoryMap(base, envi.memory.MM_RWX, "shellcode", buf) 307 | vw.addSegment(base, len(buf), "shellcode_0x%x" % base, "shellcode") 308 | 309 | vw.addEntryPoint(base + entry_point) # defaults to start of shellcode 310 | 311 | if analyze: 312 | setVwVivisectLibraryVersion(vw) 313 | vw.analyze() 314 | 315 | vw.setMeta("Format", "blob") 316 | 317 | if should_save: 318 | if save_path is None: 319 | raise Exception("Failed to save workspace, destination save path cannot be empty") 320 | vw.setMeta("StorageName", "%s.viv" % save_path) 321 | vw.saveWorkspace() 322 | 323 | return vw 324 | 325 | 326 | def saveWorkspaceToBytes(vw: Workspace) -> bytes: 327 | """ 328 | serialize a vivisect workspace to a Python string/bytes. 329 | 330 | note, this creates and deletes a temporary file on the 331 | local filesystem. 332 | """ 333 | orig_storage = vw.getMeta("StorageName") 334 | try: 335 | _, temp_path = tempfile.mkstemp(suffix="viv") 336 | try: 337 | vw.setMeta("StorageName", temp_path) 338 | vw.saveWorkspace() 339 | with open(temp_path, "rb") as f: 340 | # note: here's the exit point. 341 | return f.read() 342 | finally: 343 | try: 344 | os.rmdir(temp_path) 345 | except Exception: 346 | pass 347 | finally: 348 | vw.setMeta("StorageName", orig_storage) 349 | 350 | 351 | def loadWorkspaceFromBytes(vw: Workspace, buf: bytes): 352 | """ 353 | deserialize a vivisect workspace from a Python string/bytes. 354 | """ 355 | _, temp_path = tempfile.mkstemp(suffix="viv") 356 | try: 357 | with open(temp_path, "wb") as f: 358 | f.write(buf) 359 | vw.loadWorkspace(temp_path) 360 | assertVwMatchesVivisectLibrary(vw) 361 | # note: here's the exit point. 362 | return vw 363 | finally: 364 | try: 365 | os.rmdir(temp_path) 366 | except Exception: 367 | pass 368 | 369 | 370 | def getWorkspaceFromBytes(buf: bytes, analyze=True) -> Workspace: 371 | """ 372 | create a new vivisect workspace and load it from a 373 | Python string/bytes. 374 | """ 375 | vw = Workspace() 376 | vw.verbose = True 377 | vw.config.viv.parsers.pe.nx = True 378 | loadWorkspaceFromBytes(vw, buf) 379 | assertVwMatchesVivisectLibrary(vw) 380 | if analyze: 381 | setVwVivisectLibraryVersion(vw) 382 | vw.analyze() 383 | return vw 384 | 385 | 386 | def getWorkspaceFromFile(filepath: str, analyze=True) -> Workspace: 387 | """ 388 | deserialize a file into a new vivisect workspace. 389 | """ 390 | vw = Workspace() 391 | vw.verbose = True 392 | vw.config.viv.parsers.pe.nx = True 393 | vw.loadFromFile(filepath) 394 | setVwVivisectLibraryVersion(vw) 395 | if analyze: 396 | setVwVivisectLibraryVersion(vw) 397 | vw.analyze() 398 | return vw 399 | 400 | 401 | def get_prev_loc(vw: Workspace, va: Address): 402 | this_item = vw.getLocation(va) 403 | if this_item is None: 404 | # no location at the given address, 405 | # probe for a location directly before this one. 406 | prev_item = vw.getLocation(va - 1) 407 | else: 408 | this_va, _, _, _ = this_item 409 | prev_item = vw.getLocation(this_va - 1) 410 | 411 | if prev_item is None: 412 | raise ValueError("failed to find prev location for va: %x" % va) 413 | 414 | return prev_item 415 | 416 | 417 | def get_prev_opcode(vw: Workspace, va: Address): 418 | lva, lsize, ltype, linfo = get_prev_loc(vw, va) 419 | if ltype != vivisect.const.LOC_OP: 420 | raise ValueError("failed to find prev instruction for va: %x" % va) 421 | 422 | try: 423 | op = vw.parseOpcode(lva) 424 | except Exception: 425 | raise ValueError("failed to parse prev instruction for va: %x" % va) 426 | 427 | return op 428 | 429 | 430 | def get_all_xrefs_from(vw: Workspace, va: Address): 431 | """ 432 | get all xrefs, including fallthrough instructions, from this address. 433 | 434 | vivisect doesn't consider fallthroughs as xrefs. 435 | see: https://github.com/fireeye/flare-ida/blob/7207a46c18a81ad801720ce0595a151b777ef5d8/python/flare/jayutils.py#L311 436 | """ 437 | op = vw.parseOpcode(va) 438 | for tova, bflags in op.getBranches(): 439 | if bflags & envi.BR_PROC: 440 | continue 441 | yield (va, tova, vivisect.const.REF_CODE, bflags) 442 | 443 | 444 | def get_all_xrefs_to(vw: Workspace, va: Address): 445 | """ 446 | get all xrefs, including fallthrough instructions, to this address. 447 | 448 | vivisect doesn't consider fallthroughs as xrefs. 449 | see: https://github.com/fireeye/flare-ida/blob/7207a46c18a81ad801720ce0595a151b777ef5d8/python/flare/jayutils.py#L311 450 | """ 451 | for xref in vw.getXrefsTo(va): 452 | yield xref 453 | 454 | try: 455 | op = get_prev_opcode(vw, va) 456 | except ValueError: 457 | return 458 | 459 | for tova, bflags in op.getBranches(): 460 | if tova == va: 461 | yield (op.va, va, vivisect.const.REF_CODE, bflags) 462 | 463 | 464 | def empty(s) -> bool: 465 | for c in s: 466 | return False 467 | return True 468 | 469 | 470 | class CFG(object): 471 | def __init__(self, func: Function): 472 | self.vw = func.vw 473 | self.func = func 474 | self.bb_by_start = {bb.va: bb for bb in self.func.basic_blocks} 475 | if self.func.va not in self.bb_by_start: 476 | # particularly when dealing with junk code, 477 | # the address that we think starts a function may not, 478 | # such as when the given address falls in the middle of a basic block. 479 | raise ValueError("function at 0x%x not recognized" % (self.func.va)) 480 | 481 | self.bb_by_end = {} 482 | for bb in self.func.basic_blocks: 483 | try: 484 | lva, _, ltype, _ = get_prev_loc(self.vw, bb.va + bb.size) 485 | if ltype != vivisect.const.LOC_OP: 486 | raise RuntimeError("failed to find prev instruction for va: %x" % (bb.va + bb.size)) 487 | self.bb_by_end[lva] = bb 488 | except RuntimeError as e: 489 | # viv detects "function blocks" that we interpret as "basic blocks". 490 | # viv may have incorrect analysis, such that a block may not be made up of contiguous instructions. 491 | # if we can't find an instruction at the end of a basic block, 492 | # we're dealing with junk. don't index that BB. 493 | continue 494 | 495 | if len(self.bb_by_start) != len(self.bb_by_end): 496 | # there's probably junk code encountered 497 | logger.warning("cfg: incomplete control flow graph") 498 | 499 | self._succ_cache: Dict[Address, List[BasicBlock]] = {} 500 | self._pred_cache: Dict[Address, List[BasicBlock]] = {} 501 | 502 | def get_successor_basic_blocks(self, bb: BasicBlock) -> Iterator[BasicBlock]: 503 | if bb.va in self._succ_cache: 504 | for nbb in self._succ_cache[bb.va]: 505 | yield nbb 506 | return 507 | 508 | next_va = bb.va + bb.size 509 | try: 510 | op = get_prev_opcode(self.vw, next_va) 511 | except RuntimeError: 512 | # like above, if there's not an insn at the end of the BB, 513 | # we're dealing with junk, and there's not much point. 514 | self._succ_cache[bb.va] = [] 515 | return 516 | 517 | successors = [] 518 | for xref in get_all_xrefs_from(self.vw, op.va): 519 | try: 520 | succ = self.bb_by_start[xref[vivisect.const.XR_TO]] 521 | yield succ 522 | successors.append(succ) 523 | except KeyError: 524 | # if we have a jump to the import table, 525 | # the target of the jump is not a basic block in the function. 526 | continue 527 | 528 | self._succ_cache[bb.va] = successors 529 | 530 | def get_predecessor_basic_blocks(self, bb: BasicBlock) -> Iterator[BasicBlock]: 531 | if bb.va in self._pred_cache: 532 | for nbb in self._pred_cache[bb.va]: 533 | yield nbb 534 | return 535 | 536 | predecessors = [] 537 | for xref in get_all_xrefs_to(self.vw, bb.va): 538 | try: 539 | pred = self.bb_by_end[xref[vivisect.const.XR_FROM]] 540 | yield pred 541 | predecessors.append(pred) 542 | except KeyError: 543 | continue 544 | 545 | self._pred_cache[bb.va] = predecessors 546 | 547 | def get_root_basic_blocks(self) -> Iterator[BasicBlock]: 548 | for bb in self.func.basic_blocks: 549 | if empty(self.get_predecessor_basic_blocks(bb)): 550 | yield bb 551 | 552 | def get_leaf_basic_blocks(self) -> Iterator[BasicBlock]: 553 | for bb in self.func.basic_blocks: 554 | if empty(self.get_successor_basic_blocks(bb)): 555 | yield bb 556 | 557 | 558 | def get_strings(vw: Workspace) -> Iterator[Tuple[Address, str]]: 559 | """ 560 | enumerate the strings in the given vivisect workspace. 561 | 562 | Args: 563 | vw (vivisect.Workspace): the workspace. 564 | 565 | Yields: 566 | Tuple[int, str]: the address, string pair. 567 | """ 568 | for loc in vw.getLocations(ltype=vivisect.const.LOC_STRING): 569 | va = loc[vivisect.const.L_VA] 570 | size = loc[vivisect.const.L_SIZE] 571 | yield va, vw.readMemory(va, size).decode("ascii") 572 | 573 | for loc in vw.getLocations(ltype=vivisect.const.LOC_UNI): 574 | va = loc[vivisect.const.L_VA] 575 | size = loc[vivisect.const.L_SIZE] 576 | try: 577 | yield va, vw.readMemory(va, size).decode("utf-16le") 578 | except UnicodeDecodeError: 579 | continue 580 | 581 | 582 | def is_valid_address(vw: Workspace, va: Address) -> bool: 583 | """ 584 | test if the given address is valid in the given vivisect workspace. 585 | 586 | Args: 587 | vw (vivisect.Workspace): the workspace. 588 | va (int): a possible memory address. 589 | 590 | Returns: 591 | bool: True if the given address is valid in the given workspace. 592 | """ 593 | return vw.probeMemory(va, 1, envi.memory.MM_READ) 594 | 595 | 596 | def get_function_constants(vw: Workspace, fva: Address) -> Iterator[int]: 597 | """ 598 | enumerate the immediate constants referenced by instructions in the given function. 599 | does not yield valid addresses in the given workspace. 600 | 601 | Args: 602 | vw (vivisect.Workspace): the workspace. 603 | fva (int): the address of a function in the workspace. 604 | 605 | Yields: 606 | int: immediate constant referenced by an instruction. 607 | """ 608 | f = Function(vw, fva) 609 | for bb in f.basic_blocks: 610 | for i in bb.instructions: 611 | for o in i.getOperands(): 612 | if not o.isImmed(): 613 | continue 614 | 615 | c = o.getOperValue(i) 616 | if is_valid_address(vw, c): 617 | continue 618 | 619 | yield c 620 | 621 | 622 | def get_section_data(pe, section) -> bytes: 623 | """ 624 | fetch the raw data of the given section. 625 | 626 | Args: 627 | pe (PE.PE): the parsed PE file. 628 | section (vstruct.VStruct): pe.IMAGE_SECTION_HEADER instance. 629 | 630 | Returns: 631 | bytes: the raw bytes of the section. 632 | """ 633 | return pe.readAtOffset(section.PointerToRawData, section.SizeOfRawData) 634 | 635 | 636 | class Debugger(object): 637 | REGISTERS = { 638 | "eax", 639 | "ebx", 640 | "ecx", 641 | "edx", 642 | "esi", 643 | "edi", 644 | "esp", 645 | "ebp", 646 | "eip", 647 | } 648 | 649 | def __init__(self, v): 650 | super(Debugger, self).__init__() 651 | self.v = v 652 | 653 | def __getattr__(self, k): 654 | """ 655 | support reg access shortcut, like:: 656 | print(hex(dbg.pc)) 657 | print(hex(dbg.rax)) 658 | register names are lowercase. 659 | `pc` is a shortcut for the platform program counter. 660 | """ 661 | if k == "v": 662 | return super(object, self).__getattr__(k) 663 | elif k == "pc" or k == "program_counter": 664 | return self.v.getTrace().getRegisterByName("eip") 665 | elif k == "stack_pointer": 666 | return self.v.getTrace().getRegisterByName("esp") 667 | elif k == "base_pointer": 668 | return self.v.getTrace().getRegisterByName("ebp") 669 | elif k in self.REGISTERS: 670 | return self.v.getTrace().getRegisterByName(k) 671 | else: 672 | return self.v.__getattribute__(k) 673 | 674 | def __setattr__(self, k, v): 675 | """ 676 | set reg shortcut, like:: 677 | dbg.pc = 0x401000 678 | dbg.rax = 0xAABBCCDD 679 | register names are lowercase. 680 | `pc` is a shortcut for the platform program counter. 681 | """ 682 | if k == "v": 683 | object.__setattr__(self, k, v) 684 | elif k == "pc" or k == "program_counter": 685 | return self.v.getTrace().setRegisterByName("eip", v) 686 | elif k == "stack_pointer": 687 | return self.v.getTrace().setRegisterByName("esp", v) 688 | elif k == "base_pointer": 689 | return self.v.getTrace().setRegisterByName("ebp", v) 690 | elif k in self.REGISTERS: 691 | return self.v.getTrace().setRegisterByName(k, v) 692 | else: 693 | return self.v.__setattribute__(k, v) 694 | 695 | def write_memory(self, va: Address, buf: bytes): 696 | self.v.memobj.writeMemory(va, buf) 697 | 698 | def read_memory(self, va: Address, size: int): 699 | return self.v.trace.readMemory(va, size) 700 | 701 | def read_dword(self, va: Address) -> int: 702 | return struct.unpack(" str: 708 | buf = self.read_memory(va, 1024) 709 | return buf.partition(b"\x00")[0].decode("ascii") 710 | 711 | def pop(self) -> int: 712 | v = self.read_dword(self.esp) # type: ignore 713 | self.esp = self.esp + 4 # type: ignore 714 | return v 715 | 716 | def push(self, v: int): 717 | self.esp = self.esp - 4 718 | self.write_dword(self.esp, v) 719 | 720 | 721 | def readMemoryCurrentSection(vw: Workspace, va: Address, size: int) -> bytes: 722 | """ 723 | only read memory up to current section end 724 | """ 725 | mva, msize, mperms, mfname = vw.getMemoryMap(va) 726 | offset = va - mva 727 | maxreadlen = msize - offset 728 | if size > maxreadlen: 729 | size = maxreadlen 730 | return vw.readMemory(va, size) 731 | 732 | 733 | class hexdump: 734 | # via: https://gist.github.com/NeatMonster/c06c61ba4114a2b31418a364341c26c0 735 | def __init__(self, buf, off=0): 736 | self.buf = buf 737 | self.off = off 738 | 739 | def __iter__(self): 740 | last_bs, last_line = None, None 741 | for i in range(0, len(self.buf), 16): 742 | bs = bytearray(self.buf[i : i + 16]) 743 | line = "{:08x} {:23} {:23} |{:16}|".format( 744 | self.off + i, 745 | " ".join(("{:02x}".format(x) for x in bs[:8])), 746 | " ".join(("{:02x}".format(x) for x in bs[8:])), 747 | "".join((chr(x) if 32 <= x < 127 else "." for x in bs)), 748 | ) 749 | if bs == last_bs: 750 | line = "*" 751 | if bs != last_bs or line != last_line: 752 | yield line 753 | last_bs, last_line = bs, line 754 | yield "{:08x}".format(self.off + len(self.buf)) 755 | 756 | def __str__(self): 757 | return "\n".join(self) 758 | 759 | def __repr__(self): 760 | return "\n".join(self) 761 | 762 | 763 | def dump_emu_state(emu): 764 | print( 765 | textwrap.dedent( 766 | f""" 767 | eip: {emu.getRegisterByName('eip'):#08x} 768 | eax: {emu.getRegisterByName('eax'):#08x} 769 | ebx: {emu.getRegisterByName('ebx'):#08x} 770 | ecx: {emu.getRegisterByName('ecx'):#08x} 771 | edx: {emu.getRegisterByName('edx'):#08x} 772 | esi: {emu.getRegisterByName('esi'):#08x} 773 | edi: {emu.getRegisterByName('edi'):#08x} 774 | esp: {emu.getRegisterByName('esp'):#08x} 775 | ebp: {emu.getRegisterByName('ebp'):#08x} 776 | """ 777 | ) 778 | ) 779 | 780 | print("memory segments:") 781 | for va, size, flags, name in emu.getMemoryMaps(): 782 | print(f" {va:#08x}-{va + size:#08x} {flags}") 783 | print() 784 | 785 | # print a hex dump of everything between 786 | # esp and ebp 787 | esp = emu.getRegisterByName("esp") 788 | ebp = emu.getRegisterByName("ebp") 789 | size = ebp - esp 790 | stack = emu.readMemory(esp, size) 791 | 792 | print("stack:") 793 | 794 | for line in hexdump(stack, esp): 795 | print(" " + line) 796 | -------------------------------------------------------------------------------- /viv_utils/emulator_drivers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | import collections 4 | from typing import List, Callable, Optional 5 | 6 | import envi as v_envi 7 | import envi.exc 8 | import vivisect 9 | import envi.memory as v_mem 10 | import vivisect.const 11 | import envi.archs.i386.disasm 12 | 13 | from viv_utils.types import * 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class StopEmulation(Exception): 19 | pass 20 | 21 | 22 | class BreakpointHit(Exception): 23 | def __init__(self, va: int, reason=None): 24 | self.va = va 25 | self.reason = reason 26 | 27 | 28 | # a hook overrides an API encountered by an emulator. 29 | # 30 | # returning True indicates the hook handled the function. 31 | # this should include returning from the function and cleaning up the stack, if appropriate. 32 | # a hook can also raise `StopEmulation` to ...stop the emulator. 33 | # 34 | # hooks can fetch the current $PC, registers, mem, etc. via the provided emulator parameter. 35 | # 36 | # a hook is a callable, such as a function or class with `__call__`, 37 | # if the hook is "stateless", use a simple function (note that the 38 | # hook API and vivisect's imphook API differ slightly): 39 | # 40 | # hook_OutputDebugString(emu, api, argv): 41 | # _, _, cconv, name, _ = api 42 | # if name != "kernel32.OutputDebugString": return False 43 | # logger.debug("OutputDebugString: %s", emu.readString(argv[0])) 44 | # cconv = emu.getCallingConvention(cconv) 45 | # cconv.execCallReturn(emu, 0, len(argv)) 46 | # return True 47 | # 48 | # if the hook is "stateful", such as a hook that records arguments, use a class: 49 | # 50 | # class CreateFileAHook: 51 | # def __init__(self): 52 | # self.paths = set() 53 | # 54 | # def __call__(self, emu, api, argv): 55 | # _, _, cconv, name, _ = api 56 | # if name != "kernel32.CreateFileA": return False 57 | # self.paths.add(emu.readString(argv[0])) 58 | # cconv = emu.getCallingConvention(cconv) 59 | # cconv.execCallReturn(emu, 0, len(argv)) 60 | # return True 61 | # 62 | Hook = Callable[[Emulator, API, List[int]], bool] 63 | 64 | 65 | class Monitor(vivisect.impemu.monitor.EmulationMonitor): 66 | def prehook(self, emu, op, startpc): 67 | pass 68 | 69 | def posthook(self, emu, op, endpc): 70 | pass 71 | 72 | def preblock(self, emu, blockstart): 73 | # called when entering a newly recognized basic block. 74 | # the block analysis here is not guaranteed to be perfect, 75 | # but should work fairly well during FullCoverage emulation. 76 | pass 77 | 78 | def postblock(self, emu, blockstart, blockend): 79 | # called when entering a leaving recognized basic block. 80 | pass 81 | 82 | def apicall(self, emu, api, argv): 83 | # returning True signals that the API call was handled. 84 | return False 85 | 86 | def logAnomaly(self, emu, pc, e): 87 | logger.warning("monitor: anomaly: %s", e) 88 | 89 | 90 | class UntilVAMonitor(Monitor): 91 | def __init__(self, va: int): 92 | super().__init__() 93 | self.va = va 94 | 95 | def prehook(self, emu, op, pc): 96 | if pc == self.va: 97 | raise BreakpointHit(pc, reason="va") 98 | 99 | 100 | class EmuHelperMixin: 101 | def readString(self, va, maxlength=0x100): 102 | """naively read ascii string""" 103 | return self.readMemory(va, maxlength).partition(b"\x00")[0].decode("ascii") 104 | 105 | def getStackValue(self, offset): 106 | return self.readMemoryFormat(self._emu.getStackCounter() + offset, "= self.max_insn: 450 | raise BreakpointHit(pc, reason="max_insn") 451 | 452 | self.counter += 1 453 | 454 | def reset(self): 455 | self.counter = 0 456 | 457 | class MaxHitMonitor(Monitor): 458 | def __init__(self, max_hit): 459 | super().__init__() 460 | self.max_hit = max_hit 461 | self.counter = collections.Counter() 462 | 463 | def prehook(self, emu, op, pc): 464 | if self.counter.get(pc, 0) >= self.max_hit: 465 | raise BreakpointHit(pc, reason="max_hit") 466 | 467 | self.counter[pc] += 1 468 | 469 | def reset(self): 470 | self.counter = collections.Counter() 471 | 472 | class BreakpointMonitor(Monitor): 473 | def __init__(self): 474 | super().__init__() 475 | self.breakpoints = set() 476 | 477 | def prehook(self, emu, op, pc): 478 | if pc in self.breakpoints: 479 | raise BreakpointHit(pc, reason="breakpoint") 480 | 481 | def __init__(self, *args, repmax=None, max_insn=None, max_hit=None, **kwargs): 482 | super().__init__(*args, **kwargs) 483 | if repmax is not None: 484 | self.setEmuOpt("i386:repmax", repmax) 485 | 486 | self.max_insn_mon = self.MaxInsnMonitor(max_insn or sys.maxsize) 487 | self.max_hit_mon = self.MaxHitMonitor(max_hit or sys.maxsize) 488 | self.bp_mon = self.BreakpointMonitor() 489 | 490 | self.add_monitor(self.max_insn_mon) 491 | self.add_monitor(self.max_hit_mon) 492 | self.add_monitor(self.bp_mon) 493 | 494 | # this is a public member. 495 | # add and remove breakpoints by manipulating this set. 496 | # 497 | # implementation: note that we're sharing the set() instance here. 498 | self.breakpoints = self.bp_mon.breakpoints 499 | 500 | def step(self, avoid_calls): 501 | emu = self._emu 502 | 503 | startpc = emu.getProgramCounter() 504 | op = emu.parseOpcode(startpc) 505 | 506 | for mon in self._monitors: 507 | mon.prehook(emu, op, startpc) 508 | 509 | if self.is_call(op): 510 | self.handle_call(op, avoid_calls=avoid_calls) 511 | elif self.is_jmp(op): 512 | self.handle_jmp(op, avoid_calls=avoid_calls) 513 | else: 514 | emu.executeOpcode(op) 515 | 516 | endpc = emu.getProgramCounter() 517 | 518 | for mon in self._monitors: 519 | mon.posthook(emu, op, endpc) 520 | 521 | def stepo(self): 522 | return self.step(True) 523 | 524 | def stepi(self): 525 | return self.step(False) 526 | 527 | def run(self): 528 | """ 529 | stepi until breakpoint is hit or max_instruction_count reached. 530 | raises the exception in either case. 531 | """ 532 | self.max_hit_mon.reset() 533 | self.max_insn_mon.reset() 534 | 535 | while True: 536 | self.stepi() 537 | 538 | class UntilMnemonicMonitor(Monitor): 539 | def __init__(self, mnems: List[str]): 540 | super().__init__() 541 | self.mnems = mnems 542 | 543 | def prehook(self, emu, op, pc): 544 | if op.mnem in self.mnems: 545 | raise BreakpointHit(pc, reason="mnemonic") 546 | 547 | def run_to_mnem(self, mnems: List[str]): 548 | """ 549 | stepi until: 550 | - breakpoint is hit, or 551 | - max_instruction_count reached, or 552 | - given mnemonic reached (but not executed). 553 | raises the exception in any case. 554 | """ 555 | mon = self.UntilMnemonicMonitor(mnems) 556 | self.add_monitor(mon) 557 | 558 | try: 559 | self.run() 560 | finally: 561 | self.remove_monitor(mon) 562 | 563 | def run_to_va(self, va: int): 564 | """ 565 | stepi until: 566 | - breakpoint is hit, or 567 | - max_instruction_count reached, or 568 | - given address reached (but not executed). 569 | raises the exception in any case. 570 | """ 571 | mon = UntilVAMonitor(va) 572 | self.add_monitor(mon) 573 | 574 | try: 575 | self.run() 576 | except BreakpointHit as e: 577 | if e.va != va: 578 | raise 579 | finally: 580 | self.remove_monitor(mon) 581 | 582 | 583 | class FullCoverageEmulatorDriver(EmulatorDriver): 584 | """ 585 | an emulator that attempts to explore all code paths from a given entry. 586 | that is, it explores all branches encountered (though it doesn't follow calls). 587 | it should emulate each instruction once (unless REP prefix, and limited to repmax iterations). 588 | 589 | use a monitor to receive callbacks describing the found instructions and blocks. 590 | """ 591 | 592 | def __init__(self, *args, repmax=None, **kwargs): 593 | super().__init__(*args, **kwargs) 594 | if repmax is not None: 595 | self.setEmuOpt("i386:repmax", repmax) 596 | 597 | def is_table(self, op, xrefs): 598 | if not self.vw.getLocation(op.va): 599 | return False 600 | if not xrefs: 601 | return False 602 | 603 | for bto, bflags in op.getBranches(emu=None): 604 | if bflags & envi.BR_TABLE: 605 | return True 606 | 607 | return False 608 | 609 | @staticmethod 610 | def is_conditional(op): 611 | if not (op.iflags & envi.IF_BRANCH): 612 | return False 613 | return op.iflags & envi.IF_COND 614 | 615 | def get_branches(self, op): 616 | emu = self._emu 617 | vw = emu.vw 618 | ret = [] 619 | 620 | if not (op.iflags & envi.IF_BRANCH): 621 | return [] 622 | 623 | xrefs = vw.getXrefsFrom(op.va, rtype=vivisect.const.REF_CODE) 624 | if self.is_table(op, xrefs): 625 | for xrfrom, xrto, xrtype, xrflags in xrefs: 626 | ret.append(xrto) 627 | return ret 628 | 629 | xrefs = op.getBranches(emu=emu) 630 | if not xrefs: 631 | return [] 632 | 633 | if self.is_conditional(op): 634 | for bto, bflags in xrefs: 635 | if not bto: 636 | continue 637 | ret.append(bto) 638 | return ret 639 | 640 | # we've hit a branch that doesn't go anywhere. 641 | # probably a switchcase we don't handle well. 642 | for bto, bflags in xrefs: 643 | if bflags & envi.BR_DEREF: 644 | continue 645 | 646 | ret.append(bto) 647 | 648 | return ret 649 | 650 | def step(self): 651 | """ 652 | emulate one instruction. 653 | return : 654 | - whether the instruction falls through, and 655 | - the list of branch target to which execution may flow from this instruction. 656 | """ 657 | emu = self._emu 658 | 659 | startpc = emu.getProgramCounter() 660 | op = emu.parseOpcode(startpc) 661 | 662 | for mon in self._monitors: 663 | mon.prehook(emu, op, startpc) 664 | 665 | branches = self.get_branches(op) 666 | 667 | if self.is_call(op): 668 | skipped = not self.handle_call(op, avoid_calls=True) 669 | elif self.is_jmp(op): 670 | skipped = not self.handle_jmp(op, avoid_calls=True) 671 | else: 672 | emu.executeOpcode(op) 673 | skipped = False 674 | 675 | endpc = emu.getProgramCounter() 676 | 677 | for mon in self._monitors: 678 | mon.posthook(emu, op, endpc) 679 | 680 | does_fallthrough = not (op.iflags & envi.IF_NOFALL) 681 | 682 | if skipped: 683 | return does_fallthrough, [] 684 | else: 685 | return does_fallthrough, branches 686 | 687 | def run(self, va: int): 688 | # explore from the given address, emulating all encountered instructions once. 689 | # 690 | # use a queue of emulator snaps, one for each block that still needs to be explored. 691 | # use a set to track the instructions already emulated. 692 | # 693 | # when emulating an instruction, here are the cases: 694 | # - instruction not supported: skip to next one 695 | # - invalid instruction: stop emulation 696 | # - branching instruction: stop emulation, add snap for each branch 697 | # - fallthrough to new instruction: step to next instruction 698 | # - fallthrough to seen instruction: stop emulation 699 | # - no fallthrough (like ret): stop emulation 700 | emu = self._emu 701 | emu.setProgramCounter(va) 702 | 703 | # queue of emulator snapshots to explore 704 | q = collections.deque([emu.getEmuSnap()]) 705 | 706 | # set of branch targets that have already been explored. 707 | seen = set() 708 | 709 | while q: 710 | snap = q.popleft() 711 | 712 | emu.setEmuSnap(snap) 713 | blockstart = emu.getProgramCounter() 714 | 715 | if blockstart in seen: 716 | # this block has already been explored, 717 | # don't do duplicate work. 718 | continue 719 | 720 | seen.add(blockstart) 721 | 722 | for mon in self._monitors: 723 | mon.preblock(self, blockstart) 724 | 725 | while True: 726 | # the address of the instruction we're about to emulate. 727 | lastpc = emu.getProgramCounter() 728 | seen.add(lastpc) 729 | 730 | try: 731 | does_fallthrough, branches = self.step() 732 | except v_envi.UnsupportedInstruction: 733 | # don't know how to emulate the instruction. 734 | # skip it and hope we can fallthrough and keep emulating. 735 | op = emu.parseOpcode(lastpc) 736 | emu.setProgramCounter(lastpc + op.size) 737 | 738 | logger.debug( 739 | "driver: run_function: skipping unsupported instruction: 0x%x %s", 740 | lastpc, 741 | op.mnem, 742 | ) 743 | 744 | continue 745 | except v_envi.InvalidInstruction: 746 | # don't know how to decode the instruction. 747 | # so we don't know its length, and there's nothing we can do. 748 | 749 | logger.debug( 750 | "driver: run_function: invalid instruction: 0x%x", 751 | lastpc, 752 | ) 753 | 754 | blockend = lastpc 755 | for mon in self._monitors: 756 | mon.postblock(self, blockstart, blockend) 757 | 758 | # stop emulating, and go to next block in the queue. 759 | break 760 | except envi.exc.BreakpointHit: 761 | # emulation likely wandered off, e.g., into alignment (CC bytes) 762 | 763 | # stop emulating, and go to next block in the queue. 764 | break 765 | 766 | if branches: 767 | blockend = lastpc 768 | 769 | # other case: branching instruction. 770 | # enqueue all the branch options for exploration. 771 | for branch in branches: 772 | if branch in seen: 773 | continue 774 | 775 | emu.setProgramCounter(branch) 776 | q.append(emu.getEmuSnap()) 777 | 778 | for mon in self._monitors: 779 | mon.postblock(self, blockstart, blockend) 780 | 781 | # stop emulating this basic block, 782 | # go to next block in the queue. 783 | break 784 | 785 | elif does_fallthrough: 786 | # common case: middle of BB, keep stepping. 787 | 788 | nextpc = emu.getProgramCounter() 789 | if nextpc in seen: 790 | if nextpc == lastpc: 791 | # candidates: 792 | # - jump to self 793 | # - REP instruction 794 | # - ??? 795 | op = emu.parseOpcode(lastpc) 796 | if op.prefixes & envi.archs.i386.disasm.PREFIX_REP: 797 | # its a REP instruction, 798 | # do this max_rep times, 799 | # then be done. 800 | # TODO 801 | continue 802 | 803 | # other cases: like a new basic block 804 | # so fallthrough and break. 805 | 806 | # the next instruction has already been explored. 807 | # must be an overlapping block. 808 | # stop emulating and go to next block in queue. 809 | blockend = lastpc 810 | 811 | for mon in self._monitors: 812 | mon.postblock(self, blockstart, blockend) 813 | 814 | break 815 | else: 816 | # next instruction is not yet explored, 817 | # keep stepping. 818 | continue 819 | 820 | else: 821 | # uncommon case: no fallthrough, like ret. 822 | # stop emulating this basic block. 823 | # go to next block in the queue. 824 | blockend = lastpc 825 | 826 | for mon in self._monitors: 827 | mon.postblock(self, blockstart, blockend) 828 | 829 | break 830 | 831 | 832 | class SinglePathEmulatorDriver(FullCoverageEmulatorDriver): 833 | """ 834 | an emulator that emulates the first path found to a target VA. 835 | path is brute-forced via the full coverage emulator. 836 | """ 837 | 838 | def __init__(self, *args, **kwargs): 839 | super().__init__(*args, **kwargs) 840 | 841 | def run_to_va(self, va: int, tova: int): 842 | """ 843 | explore from the given address up to an address, see run function 844 | """ 845 | mon = UntilVAMonitor(tova) 846 | self.add_monitor(mon) 847 | try: 848 | self.run(va) 849 | except BreakpointHit as e: 850 | if e.va != tova: 851 | raise 852 | finally: 853 | self.remove_monitor(mon) 854 | 855 | 856 | def remove_default_viv_hooks(emu, allow_list=None): 857 | """ 858 | vivisect comes with default emulation hooks (imphooks) that emulate 859 | - API calls, e.g. GetProcAddress 860 | - abstractions of library code functionality, e.g. _alloca_probe 861 | 862 | in our testing there are inconsistencies in the hook implementation, e.g. around function returns 863 | this function removes all imphooks except ones explicitly allowed 864 | """ 865 | for hook_name in list(emu.hooks): 866 | if allow_list and hook_name in allow_list: 867 | continue 868 | del emu.hooks[hook_name] 869 | -------------------------------------------------------------------------------- /viv_utils/flirt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gzip 3 | import time 4 | import logging 5 | import contextlib 6 | 7 | import envi 8 | import flirt 9 | import vivisect 10 | import vivisect.exc 11 | import vivisect.const 12 | 13 | import viv_utils 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | # vivisect funcmeta key for a bool to indicate if a function is recognized from a library. 19 | # not expecting anyone to use this, aka private symbol. 20 | _LIBRARY_META_KEY = "is-library" 21 | 22 | 23 | @contextlib.contextmanager 24 | def timing(msg): 25 | t0 = time.time() 26 | yield 27 | t1 = time.time() 28 | logger.debug("perf: %s: %0.2fs", msg, t1 - t0) 29 | 30 | 31 | def is_library_function(vw, va): 32 | """ 33 | is the function at the given address a library function? 34 | this may be determined by a signature matching backend. 35 | if there's no function at the given address, `False` is returned. 36 | 37 | note: if its a library function, it should also have a name set. 38 | 39 | args: 40 | vw (vivisect.Workspace): 41 | va (int): the virtual address of a function. 42 | 43 | returns: 44 | bool: if the function is recognized as from a library. 45 | """ 46 | return vw.funcmeta.get(va, {}).get(_LIBRARY_META_KEY, False) 47 | 48 | 49 | def make_library_function(vw, va): 50 | """ 51 | mark the function with the given address a library function. 52 | the associated accessor is `is_library_function`. 53 | 54 | if there's no function at the given address, this routine has no effect. 55 | 56 | note: if its a library function, it should also have a name set. 57 | its up to the caller to do this part. 58 | 59 | args: 60 | vw (vivisect.Workspace): 61 | va (int): the virtual address of a function. 62 | """ 63 | fmeta = vw.funcmeta.get(va, {}) 64 | fmeta[_LIBRARY_META_KEY] = True 65 | 66 | 67 | def add_function_flirt_match(vw, va, name): 68 | """ 69 | mark the function at the given address as a library function with the given name. 70 | the name overrides any existing function name. 71 | 72 | args: 73 | vw (vivisect.Workspace): 74 | va (int): the virtual address of a function. 75 | name (str): the name to assign to the function. 76 | """ 77 | make_library_function(vw, va) 78 | viv_utils.set_function_name(vw, va, name) 79 | 80 | 81 | def get_match_name(match): 82 | """ 83 | fetch the best name for a `flirt.FlirtSignature` instance. 84 | these instances returned by `flirt.FlirtMatcher.match()` 85 | may have multiple names, such as public and local names for different parts 86 | of a function. the best name is that at offset zero (the function name). 87 | 88 | probably every signature has a best name, though I'm not 100% sure. 89 | 90 | args: 91 | match (flirt.FlirtSignature): the signature to get a name from. 92 | 93 | returns: 94 | str: the best name of the function matched by the given signature. 95 | """ 96 | for name, type_, offset in match.names: 97 | if offset == 0: 98 | return name 99 | raise ValueError("flirt: match: no best name: %s", match.names) 100 | 101 | 102 | def match_function_flirt_signatures(matcher, vw, va, cache=None): 103 | """ 104 | match the given FLIRT signatures against the function at the given address. 105 | upon success, update the workspace with match metadata, setting the 106 | function as a library function and assigning its name. 107 | 108 | if multiple different signatures match the function, don't do anything. 109 | 110 | args: 111 | match (flirt.FlirtMatcher): the compiled FLIRT signature matcher. 112 | vw (vivisect.workspace): the analyzed program's workspace. 113 | va (int): the virtual address of a function to match. 114 | cache (Optional[Dict[int, Union[str, None]]]): internal cache of matches VA -> name or None on "no match". 115 | no need to provide as external caller. 116 | """ 117 | if cache is None: 118 | # we cache both successful and failed lookups. 119 | # 120 | # (callers of this function don't need to initialize the cache. 121 | # we'll provide one during recursive calls when we need it.) 122 | # 123 | # while we can use funcmeta to retrieve existing successful matches, 124 | # we don't persist failed matches, 125 | # because another FLIRT matcher might come along with better knowledge. 126 | # 127 | # however, when we match reference names, especially chained together, 128 | # then we need to cache the negative result, or we do a ton of extra work. 129 | # "accidentally quadratic" or worse. 130 | # see https://github.com/fireeye/capa/issues/448 131 | cache = {} 132 | 133 | function_meta = vw.funcmeta.get(va) 134 | if not function_meta: 135 | # not a function, we're not going to consider this. 136 | return 137 | 138 | if va in cache: 139 | return 140 | 141 | if is_library_function(vw, va): 142 | # already matched here. 143 | # this might be the case if recursive matching visited this address. 144 | name = viv_utils.get_function_name(vw, va) 145 | cache[va] = name 146 | return 147 | 148 | # as seen in https://github.com/williballenthin/lancelot/issues/112 149 | # Hex-Rays may distribute signatures that match across multiple functions. 150 | # therefore, we cannot rely on fetching just a single function's data. 151 | # in fact, we really don't know how much data to fetch. 152 | # so, lets pick an unreasonably large number and hope it works. 153 | # 154 | # perf: larger the size, more to memcpy. 155 | size = max(0x10000, function_meta.get("Size", 0)) 156 | 157 | buf = viv_utils.readMemoryCurrentSection(vw, va, size) 158 | 159 | matches = [] 160 | for match in matcher.match(buf): 161 | # collect all the name tuples (name, type, offset) with type==reference. 162 | # ignores other name types like "public" and "local". 163 | references = list(filter(lambda n: n[1] == "reference", match.names)) 164 | 165 | if not references: 166 | # there are no references that we need to check, so this is a complete match. 167 | # common case. 168 | matches.append(match) 169 | 170 | else: 171 | # flirt uses reference names to assert that 172 | # the function contains a reference to another function with a given name. 173 | # 174 | # we need to loop through these references, 175 | # potentially recursively FLIRT match, 176 | # and check the name matches (or doesn't). 177 | 178 | # at the end of the following loop, 179 | # if this flag is still true, 180 | # then all the references have been validated. 181 | does_match_references = True 182 | 183 | for ref_name, _, ref_offset in references: 184 | ref_va = va + ref_offset 185 | 186 | # the reference offset may be inside an instruction, 187 | # so we use getLocation to select the containing instruction address. 188 | location = vw.getLocation(ref_va) 189 | if location is None: 190 | does_match_references = False 191 | break 192 | 193 | loc_va = location[vivisect.const.L_VA] 194 | 195 | # an instruction may have multiple xrefs from 196 | # so we loop through all code references, 197 | # searching for that name. 198 | # 199 | # if the name is found, then this flag will be set. 200 | does_match_the_reference = False 201 | for xref in vw.getXrefsFrom(loc_va): 202 | if ref_name == ".": 203 | # special case: reference named `.` 204 | # which right now we interpret to mean "any data reference". 205 | # see: https://github.com/williballenthin/lancelot/issues/112#issuecomment-802379966 206 | # 207 | # unfortunately, viv doesn't extract the xref for this one sample, 208 | # so this is untested. 209 | does_match_the_reference = xref[vivisect.const.XR_RTYPE] == vivisect.const.REF_DATA 210 | 211 | else: 212 | # common case 213 | # 214 | # FLIRT signatures only match code, 215 | # so we're only going to resolve references that point to code. 216 | if xref[vivisect.const.XR_RTYPE] != vivisect.const.REF_CODE: 217 | continue 218 | 219 | target = xref[vivisect.const.XR_TO] 220 | match_function_flirt_signatures(matcher, vw, target, cache) 221 | 222 | # the matching will have updated the vw in place, 223 | # so now we inspect any names found at the target location. 224 | if is_library_function(vw, target): 225 | found_name = viv_utils.get_function_name(vw, target) 226 | cache[target] = found_name 227 | if found_name == ref_name: 228 | does_match_the_reference = True 229 | break 230 | else: 231 | cache[target] = None 232 | 233 | if not does_match_the_reference: 234 | does_match_references = False 235 | break 236 | 237 | if does_match_references: 238 | # only if all references pass do we count it. 239 | matches.append(match) 240 | 241 | if not matches: 242 | cache[va] = None 243 | return 244 | 245 | # we may have multiple signatures that match the same function, like `strcpy`. 246 | # these could be copies from multiple libraries. 247 | # so we don't mind if there are multiple matches, as long as names are the same. 248 | # 249 | # but if there are multiple candidate names, that's a problem. 250 | # our signatures are not precise enough. 251 | # we could maybe mark the function as "is a library function", but not assign name. 252 | # though, if we have signature FPs among library functions, it could easily FP with user code too. 253 | # so safest thing to do is not make any claim about the function. 254 | names = list(set(map(get_match_name, matches))) 255 | 256 | if len(names) != 1: 257 | cache[va] = None 258 | logger.debug("conflicting names: 0x%x: %s", va, names) 259 | return 260 | 261 | # there's one candidate name, 262 | # so all the matches *should* be about the same, i'd assume. 263 | match = matches[0] 264 | 265 | # first add local names, then we'll do public names 266 | # this way public names have precedence. 267 | # see: https://github.com/williballenthin/lancelot/issues/112#issuecomment-802221966 268 | for name, type_, offset in match.names: 269 | if type_ != "local": 270 | continue 271 | 272 | if not vw.isFunction(va + offset): 273 | # since we're registered as a function analyzer, 274 | # we have to deal with a race condition: 275 | # the location for which we have a name may not yet be a function. 276 | # 277 | # we can detect via two facts: 278 | # - the location hasn't been processed yet 279 | # - the address is executable 280 | if vw.getLocation(va + offset) is None and vw.probeMemory(va + offset, 1, envi.memory.MM_EXEC): 281 | # so lets try to turn it into a function 282 | vw.makeFunction(va + offset) 283 | 284 | try: 285 | add_function_flirt_match(vw, va + offset, name) 286 | except vivisect.exc.InvalidFunction: 287 | continue 288 | else: 289 | cache[va + offset] = name 290 | logger.debug("found local function name: 0x%x: %s", va + offset, name) 291 | 292 | for name, type_, offset in match.names: 293 | if type_ != "public": 294 | continue 295 | 296 | try: 297 | add_function_flirt_match(vw, va + offset, name) 298 | except vivisect.exc.InvalidFunction: 299 | continue 300 | else: 301 | cache[va + offset] = name 302 | logger.debug("found library function: 0x%x: %s", va + offset, name) 303 | 304 | return 305 | 306 | 307 | class FlirtFunctionAnalyzer: 308 | def __init__(self, matcher, name=None): 309 | self.matcher = matcher 310 | self.name = name 311 | 312 | def analyzeFunction(self, vw: vivisect.VivWorkspace, funcva: int): 313 | match_function_flirt_signatures(self.matcher, vw, funcva) 314 | 315 | @property 316 | def __name__(self): 317 | if self.name: 318 | return f"{self.__class__.__name__} ({self.name})" 319 | else: 320 | return f"{self.__class__.__name__}" 321 | 322 | def __repr__(self): 323 | return self.__name__ 324 | 325 | 326 | def addFlirtFunctionAnalyzer(vw, analyzer): 327 | # this is basically the logic in `vivisect.VivWorkspace.addFuncAnalysisModule`. 328 | # however, that routine assumes the analyzer is a Python module, which is basically a global, 329 | # and i am very against globals. 330 | # so, we manually place the analyzer into the analyzer queue. 331 | # 332 | # notably, this enables a user to register multiple FlirtAnalyzers for different signature sets. 333 | key = repr(analyzer) 334 | 335 | if key in vw.fmodlist: 336 | raise ValueError("analyzer already present") 337 | 338 | vw.fmodlist.append(key) 339 | vw.fmods[key] = analyzer 340 | 341 | 342 | def register_flirt_signature_analyzers(vw, sigpaths): 343 | """ 344 | args: 345 | vw (vivisect.VivWorkspace): 346 | sigpaths (List[str]): file system paths of .sig/.pat files 347 | """ 348 | for sigpath in sigpaths: 349 | try: 350 | sigs = load_flirt_signature(sigpath) 351 | except ValueError as e: 352 | logger.warning("could not load %s: %s", sigpath, str(e)) 353 | continue 354 | 355 | logger.debug("flirt: sig count: %d", len(sigs)) 356 | 357 | with timing("flirt: compiling sigs"): 358 | matcher = flirt.compile(sigs) 359 | 360 | analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath) 361 | logger.debug("registering viv function analyzer: %s", repr(analyzer)) 362 | viv_utils.flirt.addFlirtFunctionAnalyzer(vw, analyzer) 363 | 364 | 365 | def load_flirt_signature(path): 366 | if path.endswith(".sig"): 367 | with open(path, "rb") as f: 368 | with timing("flirt: parsing .sig: " + path): 369 | sigs = flirt.parse_sig(f.read()) 370 | 371 | elif path.endswith(".pat"): 372 | with open(path, "rb") as f: 373 | with timing("flirt: parsing .pat: " + path): 374 | sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n")) 375 | 376 | elif path.endswith(".pat.gz"): 377 | with gzip.open(path, "rb") as f: 378 | with timing("flirt: parsing .pat.gz: " + path): 379 | sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n")) 380 | 381 | else: 382 | raise ValueError("unexpect signature file extension: " + path) 383 | 384 | return sigs 385 | -------------------------------------------------------------------------------- /viv_utils/idaloader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | load the module currently open in IDA Pro into a vivisect workspace. 4 | 5 | author: Willi Ballenthin 6 | email: willi.ballenthin@gmail.com 7 | website: https://gist.github.com/williballenthin/f88c5c95f3e41157de3806dfbeef4bd4 8 | """ 9 | import logging 10 | import functools 11 | 12 | import vivisect 13 | import vivisect.const 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | try: 18 | import idc 19 | import idaapi 20 | import ida_ida 21 | import ida_nalt 22 | import idautils 23 | except ImportError: 24 | logger.debug("failed to import IDA Pro modules") 25 | 26 | 27 | def requires_ida(f): 28 | """ 29 | declare that the wrapped function requires the IDA Pro scripting API. 30 | calling the function will raise `RuntimeError` if the API cannot be imported. 31 | """ 32 | 33 | @functools.wraps(f) 34 | def inner(*args, **kwargs): 35 | if not ("idc" in locals() or "idc" in globals()): 36 | raise RuntimeError("IDA Pro not present") 37 | return f(*args, **kwargs) 38 | 39 | return f 40 | 41 | 42 | @requires_ida 43 | def is_x86(): 44 | """ 45 | is the currently loaded module 32-bit x86? 46 | """ 47 | try: 48 | inf = idaapi.get_inf_structure() 49 | procname = inf.procname 50 | except AttributeError: 51 | procname = ida_ida.inf_get_procname() 52 | return procname == "metapc" and ida_ida.inf_is_32bit_exactly() and not ida_ida.inf_is_64bit() 53 | 54 | 55 | @requires_ida 56 | def is_x64(): 57 | """ 58 | is the currently loaded module 64-bit x86? 59 | """ 60 | try: 61 | inf = idaapi.get_inf_structure() 62 | procname = inf.procname 63 | except AttributeError: 64 | procname = ida_ida.inf_get_procname() 65 | return procname == "metapc" and not ida_ida.inf_is_32bit_exactly() and ida_ida.inf_is_64bit() 66 | 67 | 68 | @requires_ida 69 | def is_exe(): 70 | """ 71 | is the currently loaded module a PE file? 72 | you can *probably* assume its for windows, if so. 73 | """ 74 | return "Portable executable" in idaapi.get_file_type_name() 75 | 76 | 77 | @requires_ida 78 | def get_data(start, size): 79 | """ 80 | read the given amount of data from the given start address. 81 | better than `idc.GetManyBytes` as it fills in missing bytes with NULLs. 82 | 83 | Args: 84 | start (int): start address. 85 | size (int): number of bytes to read. 86 | 87 | Returns: 88 | bytes: `size` bytes, filled with NULL when byte not available from database. 89 | """ 90 | # best case, works pretty often. 91 | buf = idc.get_bytes(start, size) 92 | if buf: 93 | return buf 94 | 95 | # but may fail, when there's no byte defined. 96 | buf = [] 97 | for ea in range(start, start + size): 98 | b = idc.get_bytes(ea, 1) 99 | if b: 100 | buf.append(b) 101 | else: 102 | buf.append(b"\x00") 103 | return b"".join(buf) 104 | 105 | 106 | PAGE_SIZE = 0x1000 107 | 108 | 109 | @requires_ida 110 | def get_segment_data(segstart): 111 | """ 112 | read the contents of the segment containing the given address. 113 | 114 | Args: 115 | segstart (int): start address of a segment. 116 | 117 | Returns: 118 | bytes: the bytes of the segment, filled with NULL when byte not available from database. 119 | """ 120 | bufs = [] 121 | 122 | segend = idc.get_segm_end(segstart) 123 | segsize = segend - segstart 124 | pagecount = segsize // PAGE_SIZE 125 | remainder = segsize - (pagecount * PAGE_SIZE) 126 | 127 | # read in page-sized chunks, since these should ususally be accessible together. 128 | for i in range(pagecount): 129 | bufs.append(get_data(segstart + i * PAGE_SIZE, PAGE_SIZE)) 130 | 131 | # in a real PE, these *should* be page- or sector-aligned, but its not guaranteed, esp in IDA. 132 | if remainder != 0: 133 | bufs.append(get_data(segstart + pagecount * PAGE_SIZE, remainder)) 134 | 135 | return b"".join(bufs) 136 | 137 | 138 | @requires_ida 139 | def get_exports(): 140 | """ 141 | enumerate the exports of the currently loaded module. 142 | 143 | Yields: 144 | Tuple[int, int, str]: 145 | - address of exported function 146 | - export ordinal 147 | - name of exported function 148 | """ 149 | for index, ordinal, ea, name in idautils.Entries(): 150 | yield ea, ordinal, name 151 | 152 | 153 | @requires_ida 154 | def get_imports(): 155 | """ 156 | enumerate the imports of the currently loaded module. 157 | 158 | Yields: 159 | Tuple[int, str, str, int]: 160 | - address of import table pointer 161 | - name of imported library 162 | - name of imported function 163 | - ordinal of import 164 | """ 165 | for i in range(idaapi.get_import_module_qty()): 166 | dllname = idaapi.get_import_module_name(i) 167 | if not dllname: 168 | continue 169 | 170 | entries = [] 171 | 172 | def cb(ea, name, ordinal): 173 | entries.append((ea, name, ordinal)) 174 | return True # continue enumeration 175 | 176 | idaapi.enum_import_names(i, cb) 177 | 178 | for ea, name, ordinal in entries: 179 | yield ea, dllname, name, ordinal 180 | 181 | 182 | @requires_ida 183 | def get_import_thunk(import_addr): 184 | """ 185 | find import thunk for the given import pointer. 186 | this is a function that simply jumps to the external implementation of the routine. 187 | 188 | Args: 189 | import_addr (int): address of import table pointer. 190 | 191 | Returns: 192 | int: address of function thunk. 193 | 194 | Raises: 195 | ValueError: when the thunk does not exist. 196 | """ 197 | for xref in idautils.XrefsTo(import_addr): 198 | if xref.type != 3: # XrefTypeName(3) == 'Data_Read' 199 | continue 200 | 201 | if idc.print_insn_mnem(xref.frm) != "jmp": 202 | continue 203 | 204 | return xref.frm 205 | 206 | raise ValueError("thunk does not exist") 207 | 208 | 209 | @requires_ida 210 | def get_functions(): 211 | """ 212 | enumerate the functions in the currently loaded module. 213 | 214 | Yields: 215 | int: address of the function. 216 | """ 217 | startea = ida_ida.inf_get_min_ea() 218 | for fva in idautils.Functions(idc.get_segm_start(startea), idc.get_segm_end(startea)): 219 | yield fva 220 | 221 | 222 | @requires_ida 223 | def loadWorkspaceFromIdb(): 224 | """ 225 | from IDA Pro, load the currently loaded module into a vivisect workspace. 226 | currently only supports windows PE files. 227 | 228 | Returns: 229 | vivisect.Workspace: the loaded and analyzed vivisect workspace. 230 | """ 231 | vw = vivisect.VivWorkspace() 232 | 233 | if is_x86(): 234 | vw.setMeta("Architecture", "i386") 235 | elif is_x64(): 236 | vw.setMeta("Architecture", "amd64") 237 | else: 238 | raise NotImplementedError("unsupported architecture") 239 | 240 | if not is_exe(): 241 | raise NotImplementedError("unsupported file format") 242 | 243 | vw.setMeta("Platform", "windows") 244 | vw.setMeta("Format", "pe") 245 | vw._snapInAnalysisModules() 246 | 247 | filename = vw.addFile(ida_nalt.get_root_filename(), idaapi.get_imagebase(), idautils.GetInputFileMD5()) 248 | 249 | for segstart in idautils.Segments(): 250 | segname = idc.get_segm_name(segstart) 251 | segbuf = get_segment_data(segstart) 252 | 253 | if segbuf is None: 254 | raise RuntimeError("failed to read segment data") 255 | 256 | logger.debug("mapping section %s with %x bytes", segname, len(segbuf)) 257 | vw.addMemoryMap(segstart, idautils.ida_segment.get_segm_by_name(segname).perm, filename, segbuf) 258 | vw.addSegment(segstart, len(segbuf), segname, filename) 259 | 260 | for ea, ordinal, name in get_exports(): 261 | logger.debug("marking export %s at %x", name, ea) 262 | vw.addEntryPoint(ea) 263 | vw.addExport(ea, vivisect.const.EXP_FUNCTION, name, filename) 264 | 265 | for ea, dllname, name, ordinal in get_imports(): 266 | logger.debug("marking import %s!%s at %x", dllname, name, ea) 267 | vw.makeImport(ea, dllname, name) 268 | 269 | logger.debug("running vivisect auto-analysis") 270 | vw.analyze() 271 | 272 | for fva in get_functions(): 273 | logger.debug("marking function %s at %x", idc.get_func_name(fva), fva) 274 | vw.makeFunction(fva) 275 | vw.makeName(fva, idc.get_func_name(fva)) 276 | 277 | # can only set thunk-ness after a function is defined. 278 | for ea, dllname, name, ordinal in get_imports(): 279 | try: 280 | thunk = get_import_thunk(ea) 281 | except ValueError: 282 | pass 283 | else: 284 | logger.debug("found thunk for %s.%s at %x", dllname, name, thunk) 285 | vw.makeFunction(thunk) 286 | vw.makeFunctionThunk(thunk, "%s.%s" % (dllname, name)) 287 | 288 | return vw 289 | -------------------------------------------------------------------------------- /viv_utils/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/viv-utils/50ea43422c49539fea5a48f74d0935b7fef131f3/viv_utils/scripts/__init__.py -------------------------------------------------------------------------------- /viv_utils/scripts/get_flirt_matches.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import gzip 3 | import logging 4 | import argparse 5 | 6 | import flirt 7 | 8 | import viv_utils 9 | import viv_utils.flirt 10 | 11 | logger = logging.getLogger("get_flirt_matches") 12 | 13 | 14 | def load_flirt_signature(path): 15 | if path.endswith(".sig"): 16 | with open(path, "rb") as f: 17 | sigs = flirt.parse_sig(f.read()) 18 | 19 | elif path.endswith(".pat"): 20 | with open(path, "rb") as f: 21 | sigs = flirt.parse_pat(f.read().decode("utf-8")) 22 | 23 | elif path.endswith(".pat.gz"): 24 | with gzip.open(path, "rb") as f: 25 | sigs = flirt.parse_pat(f.read().decode("utf-8")) 26 | 27 | else: 28 | raise ValueError("unexpect signature file extension: " + path) 29 | 30 | return sigs 31 | 32 | 33 | def register_flirt_signature_analyzers(vw, sigpaths): 34 | """ 35 | args: 36 | vw (vivisect.VivWorkspace): 37 | sigpaths (List[str]): file system paths of .sig/.pat files 38 | """ 39 | for sigpath in sigpaths: 40 | sigs = load_flirt_signature(sigpath) 41 | 42 | logger.debug("flirt: sig count: %d", len(sigs)) 43 | 44 | matcher = flirt.compile(sigs) 45 | 46 | analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath) 47 | logger.debug("registering viv function analyzer: %s", repr(analyzer)) 48 | viv_utils.flirt.addFlirtFunctionAnalyzer(vw, analyzer) 49 | 50 | 51 | def get_workspace(path, sigpaths): 52 | vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) 53 | register_flirt_signature_analyzers(vw, sigpaths) 54 | vw.analyze() 55 | return vw 56 | 57 | 58 | def main(): 59 | parser = argparse.ArgumentParser() 60 | parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR") 61 | parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors") 62 | parser.add_argument( 63 | "signature", 64 | type=str, 65 | help="use the given signatures to identify library functions, file system paths to .sig/.pat files.", 66 | ) 67 | parser.add_argument( 68 | "sample", 69 | type=str, 70 | help="path to sample to analyze", 71 | ) 72 | 73 | args = parser.parse_args() 74 | 75 | if args.quiet: 76 | logging.basicConfig(level=logging.WARNING) 77 | logging.getLogger().setLevel(logging.WARNING) 78 | elif args.debug: 79 | logging.basicConfig(level=logging.DEBUG) 80 | logging.getLogger().setLevel(logging.DEBUG) 81 | 82 | logging.getLogger("vivisect").setLevel(logging.INFO) 83 | logging.getLogger("vivisect.base").setLevel(logging.INFO) 84 | logging.getLogger("vivisect.impemu").setLevel(logging.INFO) 85 | logging.getLogger("vtrace").setLevel(logging.INFO) 86 | logging.getLogger("envi").setLevel(logging.INFO) 87 | logging.getLogger("envi.codeflow").setLevel(logging.INFO) 88 | else: 89 | logging.basicConfig(level=logging.INFO) 90 | logging.getLogger().setLevel(logging.INFO) 91 | logging.getLogger("vivisect").setLevel(logging.WARNING) 92 | 93 | vw = get_workspace(args.sample, [args.signature]) 94 | 95 | names = set() 96 | for va in sorted(vw.getFunctions()): 97 | if viv_utils.flirt.is_library_function(vw, va): 98 | name = viv_utils.get_function_name(vw, va) 99 | print("0x%x: %s" % (va, name)) 100 | names.add(name) 101 | 102 | return 0 103 | 104 | 105 | if __name__ == "__main__": 106 | sys.exit(main()) 107 | -------------------------------------------------------------------------------- /viv_utils/scripts/get_function_args.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | import logging 3 | 4 | import viv_utils 5 | import viv_utils.emulator_drivers 6 | 7 | g_pp = pprint.PrettyPrinter() 8 | 9 | 10 | class CallArgumentMonitor(viv_utils.emulator_drivers.Monitor): 11 | """collect call arguments to a target function during emulation""" 12 | 13 | def __init__(self, vw, target_fva): 14 | """:param target_fva: address of function whose arguments to monitor""" 15 | viv_utils.emulator_drivers.Monitor.__init__(self, vw) 16 | self._fva = target_fva 17 | self._calls = {} 18 | 19 | def apicall(self, emu, op, pc, api, argv): 20 | rv = self.getStackValue(emu, 0) 21 | if pc == self._fva: 22 | self._calls[rv] = argv 23 | 24 | def getCalls(self): 25 | """get map of return value of function call to arguments to function call""" 26 | return self._calls.copy() 27 | 28 | 29 | def emulate_function(vw, fva, target_fva): 30 | """run the given function while collecting arguments to a target function""" 31 | emu = vw.getEmulator() 32 | d = viv_utils.emulator_drivers.FunctionRunnerEmulatorDriver(emu) 33 | 34 | m = CallArgumentMonitor(vw, target_fva) 35 | d.add_monitor(m) 36 | 37 | d.runFunction(fva, maxhit=1) 38 | 39 | for k, v in m.getCalls().iteritems(): 40 | print(hex(k) + ": " + str(v)) 41 | 42 | 43 | def _main(bin_path, ofva): 44 | fva = int(ofva, 0x10) 45 | logging.basicConfig(level=logging.DEBUG) 46 | 47 | vw = viv_utils.getWorkspace(bin_path) 48 | 49 | index = viv_utils.InstructionFunctionIndex(vw) 50 | 51 | # optimization: avoid re-processing the same function repeatedly 52 | called_fvas = set([]) 53 | for callerva in vw.getCallers(fva): 54 | callerfva = index[callerva] # the address of the function that contains this instruction 55 | if callerfva in called_fvas: 56 | continue 57 | 58 | emulate_function(vw, index[callerva], fva) 59 | 60 | called_fvas.add(callerfva) 61 | 62 | return 63 | 64 | 65 | def main(): 66 | import sys 67 | 68 | sys.exit(_main(*sys.argv[1:])) 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /viv_utils/scripts/show_flirt_references.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import gzip 3 | import logging 4 | import argparse 5 | import binascii 6 | 7 | import flirt 8 | import vivisect.const 9 | 10 | import viv_utils 11 | import viv_utils.flirt 12 | 13 | logger = logging.getLogger("get_flirt_matches") 14 | 15 | 16 | def load_flirt_signature(path): 17 | if path.endswith(".sig"): 18 | with open(path, "rb") as f: 19 | sigs = flirt.parse_sig(f.read()) 20 | 21 | elif path.endswith(".pat"): 22 | with open(path, "rb") as f: 23 | sigs = flirt.parse_pat(f.read().decode("utf-8")) 24 | 25 | elif path.endswith(".pat.gz"): 26 | with gzip.open(path, "rb") as f: 27 | sigs = flirt.parse_pat(f.read().decode("utf-8")) 28 | 29 | else: 30 | raise ValueError("unexpect signature file extension: " + path) 31 | 32 | return sigs 33 | 34 | 35 | def get_workspace(path, sigpaths): 36 | vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) 37 | vw.analyze() 38 | return vw 39 | 40 | 41 | def main(): 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR") 44 | parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors") 45 | parser.add_argument( 46 | "signature", 47 | type=str, 48 | help="use the given signatures to identify library functions, file system paths to .sig/.pat files.", 49 | ) 50 | parser.add_argument( 51 | "sample", 52 | type=str, 53 | help="path to sample to analyze", 54 | ) 55 | 56 | args = parser.parse_args() 57 | 58 | if args.quiet: 59 | logging.basicConfig(level=logging.WARNING) 60 | logging.getLogger().setLevel(logging.WARNING) 61 | elif args.debug: 62 | logging.basicConfig(level=logging.DEBUG) 63 | logging.getLogger().setLevel(logging.DEBUG) 64 | 65 | logging.getLogger("vivisect").setLevel(logging.INFO) 66 | logging.getLogger("vivisect.base").setLevel(logging.INFO) 67 | logging.getLogger("vivisect.impemu").setLevel(logging.INFO) 68 | logging.getLogger("vtrace").setLevel(logging.INFO) 69 | logging.getLogger("envi").setLevel(logging.INFO) 70 | logging.getLogger("envi.codeflow").setLevel(logging.INFO) 71 | else: 72 | logging.basicConfig(level=logging.INFO) 73 | logging.getLogger().setLevel(logging.INFO) 74 | logging.getLogger("vivisect").setLevel(logging.WARNING) 75 | 76 | vw = get_workspace(args.sample, [args.signature]) 77 | 78 | sigs = load_flirt_signature(args.signature) 79 | logger.debug("flirt: sig count: %d", len(sigs)) 80 | matcher = flirt.compile(sigs) 81 | 82 | seen = set() 83 | 84 | for function in vw.getFunctions(): 85 | buf = viv_utils.readMemoryCurrentSection(vw, function, 0x10000) 86 | 87 | for match in matcher.match(buf): 88 | references = list(filter(lambda n: n[1] == "reference" and (function + n[2]) not in seen, match.names)) 89 | 90 | if not references: 91 | continue 92 | 93 | print("matching function: 0x%x" % (function)) 94 | print(" candidate match: 0x%x: %s" % (function, match)) 95 | 96 | print(" references:") 97 | 98 | for ref_name, _, ref_offset in references: 99 | ref_va = function + ref_offset 100 | seen.add(ref_va) 101 | 102 | print(" - 0x%x: %s" % (ref_va, ref_name)) 103 | 104 | loc = vw.getLocation(ref_va) 105 | loc_va = loc[vivisect.const.L_VA] 106 | print(" loc: 0x%x" % (loc_va)) 107 | print(" delta: 0x%x" % (ref_va - loc_va)) 108 | 109 | size = loc[vivisect.const.L_SIZE] 110 | buf = viv_utils.readMemoryCurrentSection(vw, loc_va, size) 111 | print(" bytes: %s" % (binascii.hexlify(buf).decode("ascii"))) 112 | 113 | print(" %s^" % (" " * (ref_va - loc_va))) 114 | 115 | insn = vw.parseOpcode(loc_va) 116 | print(" insn: %s" % (insn)) 117 | 118 | print(" xrefs:") 119 | for xref in sorted(set(map(lambda x: x[vivisect.const.XR_TO], vw.getXrefsFrom(loc_va)))): 120 | print(" - 0x%x" % (xref)) 121 | 122 | pass 123 | 124 | return 0 125 | 126 | 127 | if __name__ == "__main__": 128 | sys.exit(main()) 129 | -------------------------------------------------------------------------------- /viv_utils/scripts/trace_function_emulation.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import viv_utils 4 | import viv_utils.emulator_drivers 5 | 6 | 7 | class AMonitor(viv_utils.emulator_drivers.Monitor): 8 | def __init__(self, vw): 9 | viv_utils.emulator_drivers.Monitor.__init__(self, vw) 10 | 11 | def prehook(self, emu, op, starteip): 12 | self._logger.debug("prehook: %s: %s", hex(starteip), op) 13 | 14 | def apicall(self, emu, op, pc, api, argv): 15 | self._logger.debug("apicall: %s %s %s %s", op, pc, api, argv) 16 | 17 | 18 | def _main(bin_path, fva): 19 | logging.basicConfig(level=logging.DEBUG) 20 | 21 | vw = viv_utils.getWorkspace(bin_path) 22 | emu = vw.getEmulator() 23 | d = viv_utils.emulator_drivers.FunctionRunnerEmulatorDriver(emu) 24 | 25 | m = AMonitor(vw) 26 | d.add_monitor(m) 27 | 28 | logging.getLogger("trace").debug("%s %s %s %s", vw, emu, d, m) 29 | 30 | d.runFunction(int(fva, 0x10), maxhit=1) 31 | 32 | 33 | def main(): 34 | import sys 35 | 36 | sys.exit(_main(*sys.argv[1:])) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /viv_utils/types.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Optional 2 | 3 | import vivisect 4 | import vivisect.impemu.emulator 5 | from typing_extensions import TypeAlias 6 | 7 | Address: TypeAlias = int 8 | DataType: TypeAlias = str 9 | SymbolName: TypeAlias = str 10 | 11 | CallingConvention: TypeAlias = str 12 | ReturnType: TypeAlias = DataType 13 | ReturnName: TypeAlias = str 14 | FunctionName: TypeAlias = SymbolName 15 | ArgType: TypeAlias = DataType 16 | ArgName: TypeAlias = SymbolName 17 | FunctionArg: TypeAlias = Tuple[ArgType, ArgName] 18 | # type returned by `vw.getImpApi` 19 | API: TypeAlias = Tuple[ReturnType, ReturnName, Optional[CallingConvention], FunctionName, List[FunctionArg]] 20 | # shortcuts 21 | Emulator: TypeAlias = vivisect.impemu.emulator.WorkspaceEmulator 22 | Workspace: TypeAlias = vivisect.VivWorkspace 23 | --------------------------------------------------------------------------------