├── .github
    ├── dependabot.yml
    ├── mypy
    │   └── mypy.ini
    └── workflows
    │   ├── python-publish.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── pyproject.toml
├── requirements.txt
├── setup.cfg
├── tests
    ├── data
    │   ├── 038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_
    │   └── Practical Malware Analysis Lab 01-01.dll_
    ├── fixtures.py
    ├── test_cfg.py
    ├── test_driver.py
    └── test_meta.py
└── viv_utils
    ├── __init__.py
    ├── emulator_drivers.py
    ├── flirt.py
    ├── idaloader.py
    ├── scripts
        ├── __init__.py
        ├── get_flirt_matches.py
        ├── get_function_args.py
        ├── show_flirt_references.py
        └── trace_function_emulation.py
    └── types.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "pip"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/mypy/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | 
 3 | [mypy-pefile.*]
 4 | ignore_missing_imports = True
 5 | 
 6 | [mypy-viv_utils.*]
 7 | ignore_missing_imports = True
 8 | 
 9 | [mypy-flirt.*]
10 | ignore_missing_imports = True
11 | 
12 | [mypy-idc.*]
13 | ignore_missing_imports = True
14 | 
15 | [mypy-vivisect.*]
16 | ignore_missing_imports = True
17 | 
18 | [mypy-envi.*]
19 | ignore_missing_imports = True
20 | 
21 | [mypy-visgraph.*]
22 | ignore_missing_imports = True
23 | 
24 | [mypy-PE.*]
25 | ignore_missing_imports = True
26 | 
27 | [mypy-idaapi.*]
28 | ignore_missing_imports = True
29 | 
30 | [mypy-idautils.*]
31 | ignore_missing_imports = True
32 | 
33 | [mypy-ida_bytes.*]
34 | ignore_missing_imports = True
35 | 
36 | [mypy-ida_kernwin.*]
37 | ignore_missing_imports = True
38 | 
39 | [mypy-ida_settings.*]
40 | ignore_missing_imports = True
41 | 
42 | [mypy-ida_funcs.*]
43 | ignore_missing_imports = True
44 | 
45 | [mypy-ida_loader.*]
46 | ignore_missing_imports = True
47 | 
48 | [mypy-ida_ida.*]
49 | ignore_missing_imports = True
50 | 
51 | [mypy-ida_nalt.*]
52 | ignore_missing_imports = True
53 | 
54 | [mypy-PyQt5.*]
55 | ignore_missing_imports = True
56 | 
57 | [mypy-funcy.*]
58 | ignore_missing_imports = True
59 | 
60 | [mypy-intervaltree.*]
61 | ignore_missing_imports = True


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
15 |     - name: Set up Python
16 |       uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
17 |       with:
18 |         python-version: '3.x'
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install -r requirements.txt
23 |         pip install -e .[build]
24 |     - name: Build package
25 |       run: |
26 |         python -m build
27 |     - uses: actions/upload-artifact@v4
28 |       name: upload
29 |       with:
30 |         path: dist/*
31 |     
32 |   pypi-publish:
33 |     runs-on: ubuntu-latest
34 |     if: "startsWith(github.ref, 'refs/tags/')"
35 |     needs: [ build ]
36 |     environment:
37 |       name: release
38 |     permissions:
39 |       id-token: write
40 |     steps:
41 |       - uses: actions/download-artifact@v4
42 |         with:
43 |           path: artifacts/
44 |       - name: move files to dist/
45 |         run: |
46 |           shopt -s globstar  # Enable recursive globbing (**)
47 |           rm -rf dist/
48 |           mkdir dist/
49 |           cp -r artifacts/**/*.whl dist/
50 |       - name: publish package
51 |         uses: pypa/gh-action-pypi-publish@f5622bde02b04381239da3573277701ceca8f6a0  # release/v1
52 |         with:
53 |           skip-existing: true
54 |           verbose: true
55 |           print-hash: true
56 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: "*"
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   code_style:
11 |     runs-on: ubuntu-20.04
12 |     steps:
13 |     - name: Checkout viv-utils
14 |       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
15 |     - name: Set up Python 3.13
16 |       uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
17 |       with:
18 |         python-version: "3.13"
19 |     - name: Install dependencies
20 |       run: |
21 |         pip install -r requirements.txt
22 |         pip install -e .[dev,flirt]
23 |     - name: Lint with isort
24 |       run: isort --profile black --length-sort --line-width 120 -c .
25 |     - name: Lint with black
26 |       run: black -l 120 --check .
27 |     - name: Lint with pycodestyle
28 |       run: pycodestyle --show-source viv_utils tests
29 |     - name: Check types with mypy
30 |       run: mypy --config-file .github/mypy/mypy.ini viv_utils/ tests/
31 | 
32 |   tests:
33 |     name: Tests in ${{ matrix.python-version }} on ${{ matrix.os }}
34 |     runs-on: ${{ matrix.os }}
35 |     needs: [code_style]
36 |     strategy:
37 |       fail-fast: false
38 |       matrix:
39 |         os: [ubuntu-22.04, windows-2022, macos-14]
40 |         # across all operating systems
41 |         python-version: ["3.10", "3.12"]
42 |     steps:
43 |     - name: Checkout viv-utils
44 |       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
45 |     - name: Set up Python ${{ matrix.python-version }}
46 |       uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
47 |       with:
48 |         python-version: ${{ matrix.python-version }}
49 |     - name: Install viv-utils
50 |       run: |
51 |         pip install -r requirements.txt
52 |         pip install -e .[dev,flirt]
53 |     - name: Run tests
54 |       run: pytest -v tests/
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | *$py.class
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | .hypothesis/
46 | 
47 | # Translations
48 | *.mo
49 | *.pot
50 | 
51 | # Django stuff:
52 | *.log
53 | 
54 | # Sphinx documentation
55 | docs/_build/
56 | 
57 | # PyBuilder
58 | target/
59 | 
60 | #Ipython Notebook
61 | .ipynb_checkpoints
62 | .idea
63 | .direnv
64 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # viv-utils
2 | Utilities for working with vivisect
3 | 
4 | ```
5 | pip install viv-utils
6 | ```
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "viv_utils"
 7 | version="0.8.0"
 8 | authors = [
 9 |     {name = "Willi Ballenthin", email = "william.ballenthin@mandiant.com"},
10 |     {name = "Moritz Raabe", email = "moritz.raabe@mandiant.com"},
11 | ]
12 | description="Utilities for binary analysis using vivisect."
13 | readme = {file = "README.md", content-type = "text/markdown"}
14 | license = {file = "LICENSE"}
15 | requires-python = ">=3.9"
16 | keywords = ["malware analysis", "reverse engineering", "emulation", "disassembly"]
17 | classifiers = [
18 |     "Development Status :: 5 - Production/Stable",
19 |     "Intended Audience :: Developers",
20 |     "Intended Audience :: Information Technology",
21 |     "License :: OSI Approved :: Apache Software License",
22 |     "Natural Language :: English",
23 |     "Programming Language :: Python :: 3",
24 |     "Topic :: Security",
25 | ]
26 | dependencies = [
27 |     # ---------------------------------------
28 |     # As a library, we use lower version bounds
29 |     # when specifying its dependencies. This lets
30 |     # other programs find a compatible set of 
31 |     # dependency versions.
32 |     #
33 |     # We can optionally pin to specific versions or
34 |     # limit the upper bound when there's a good reason;
35 |     # but the default is to assume all greater versions
36 |     # probably work until proven otherwise.
37 |     #
38 |     # The following link provides good background:
39 |     # https://iscinumpy.dev/post/bound-version-constraints/
40 |     #
41 |     # When we develop this library, and when we distribute it as
42 |     # a standalone binary, we'll use specific versions
43 |     # that are pinned in requirements.txt.
44 |     # But the requirements for a library are specified here
45 |     # and are looser.
46 |     #
47 |     # Related discussions:
48 |     # 
49 |     #   - https://github.com/mandiant/capa/issues/2053
50 |     #   - https://github.com/mandiant/capa/pull/2059
51 |     #   - https://github.com/mandiant/capa/pull/2079
52 |     #
53 |     # ---------------------------------------
54 |     # The following dependency versions were imported
55 |     # during June 2024 by truncating specific versions to
56 |     # their major-most version (major version when possible, 
57 |     # or minor otherwise).
58 |     # As specific constraints are identified, please provide
59 |     # comments and context.
60 |     "funcy>=2.0",
61 |     "pefile>=2023.2.7",
62 |     "vivisect>=1.1.0",
63 |     "intervaltree>=3.1.0",
64 |     "typing_extensions>=4.5.0",
65 | ]
66 | 
67 | [tool.setuptools.packages.find]
68 | include = ["viv_utils*"]
69 | namespaces = false
70 | 
71 | [project.optional-dependencies]
72 | flirt = [
73 |     "python-flirt>=0.9.0",
74 | ]
75 | # Dev and build dependencies are not relaxed because
76 | # we want all developer environments to be consistent.
77 | # These dependencies are not used in production environments
78 | # and should not conflict with other libraries/tooling.
79 | dev = [
80 |     "pytest==8.2.2",
81 |     "pytest-sugar==1.0.0",
82 |     "pytest-instafail==0.5.0",
83 |     "pycodestyle==2.12.0",
84 |     "black==24.4.2",
85 |     "isort==5.13.2",  # last version supporting Python 3.7
86 |     "mypy==1.11.2",
87 |     "types-setuptools==75.2.0.20241019",
88 | ]
89 | build = [
90 |     "setuptools==75.2.0",
91 |     "build==1.2.1"
92 | ]
93 | 
94 | [project.urls]
95 | Homepage = "https://github.com/williballenthin/viv-utils"
96 | Repository = "https://github.com/williballenthin/viv-utils.git"
97 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | black==24.4.2
 2 | build==1.2.1
 3 | click==8.1.7
 4 | cxxfilt==0.2.2
 5 | funcy==2.0
 6 | iniconfig==2.0.0
 7 | intervaltree==3.1.0
 8 | isort==5.13.2
 9 | msgpack==1.0.8
10 | mypy==1.11.2
11 | packaging==24.1
12 | pathspec==0.12.1
13 | pefile==2023.2.7
14 | pip==24.2
15 | platformdirs==4.2.2
16 | pluggy==1.5.0
17 | pyasn1==0.4.8
18 | pyasn1-modules==0.2.8
19 | pycodestyle==2.12.0
20 | pycparser==2.22
21 | pyproject-hooks==1.1.0
22 | pytest==8.2.2
23 | pytest-instafail==0.5.0
24 | pytest-sugar==1.0.0
25 | python-flirt==0.9.2
26 | setuptools==75.2.0
27 | sortedcontainers==2.4.0
28 | termcolor==2.4.0
29 | types-setuptools==75.2.0.20241019
30 | vivisect==1.1.1
31 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal = 1
 3 | 
 4 | [aliases]
 5 | test = pytest
 6 | 
 7 | [pycodestyle]
 8 | # the following suppress lints that conflict with the project's style:
 9 | #
10 | # E203: whitespace before : (from black)
11 | ignore = E203
12 | max-line-length = 180
13 | statistics = True
14 | 


--------------------------------------------------------------------------------
/tests/data/038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/viv-utils/50ea43422c49539fea5a48f74d0935b7fef131f3/tests/data/038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_


--------------------------------------------------------------------------------
/tests/data/Practical Malware Analysis Lab 01-01.dll_:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/viv-utils/50ea43422c49539fea5a48f74d0935b7fef131f3/tests/data/Practical Malware Analysis Lab 01-01.dll_


--------------------------------------------------------------------------------
/tests/fixtures.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | import viv_utils
 6 | 
 7 | CD = Path(__file__).parent
 8 | DATA = CD / "data"
 9 | 
10 | 
11 | @pytest.fixture
12 | def pma01():
13 |     return viv_utils.getWorkspace(str(DATA / "Practical Malware Analysis Lab 01-01.dll_"), should_save=False)
14 | 
15 | 
16 | @pytest.fixture
17 | def sample_038476():
18 |     return viv_utils.getWorkspace(
19 |         str(DATA / "038476f1705f3ac1237ac57f4c1753e0aa085dd7cda5669d4e93399cf7a565af.exe_"), should_save=False
20 |     )
21 | 


--------------------------------------------------------------------------------
/tests/test_cfg.py:
--------------------------------------------------------------------------------
 1 | from fixtures import *
 2 | 
 3 | 
 4 | def test_prev_loc(pma01):
 5 |     # start of insn
 6 |     assert viv_utils.get_prev_loc(pma01, 0x10001015)[0] == 0x10001010
 7 |     # middle of insn
 8 |     assert viv_utils.get_prev_loc(pma01, 0x10001016)[0] == 0x10001010
 9 |     # undefined location, directly after loc
10 |     assert viv_utils.get_prev_loc(pma01, 0x100011FA)[0] == 0x100011F7
11 | 
12 | 
13 | def test_prev_opcode(pma01):
14 |     assert viv_utils.get_prev_opcode(pma01, 0x10001015).va == 0x10001010
15 |     assert viv_utils.get_prev_opcode(pma01, 0x10001016).va == 0x10001010
16 | 
17 | 
18 | def test_all_xrefs_from(pma01):
19 |     # mov     eax, 11F8h
20 |     # single xref: fallthrough
21 |     assert len(list(viv_utils.get_all_xrefs_from(pma01, 0x10001010))) == 1
22 | 
23 |     # jnz     loc_100011E8
24 |     # two xrefs: fallthrough and conditional jump
25 |     assert len(list(viv_utils.get_all_xrefs_from(pma01, 0x10001028))) == 2
26 | 
27 | 
28 | def test_all_xrefs_to(pma01):
29 |     # single xref: fallthrough
30 |     assert len(list(viv_utils.get_all_xrefs_to(pma01, 0x10001015))) == 1
31 | 
32 |     # four xrefs: fallthrough and three jumps
33 |     assert len(list(viv_utils.get_all_xrefs_to(pma01, 0x100011E8))) == 4
34 | 
35 | 
36 | def test_cfg(pma01):
37 |     f = viv_utils.Function(pma01, 0x10001010)
38 |     cfg = viv_utils.CFG(f)
39 | 
40 |     roots = list(cfg.get_root_basic_blocks())
41 |     assert len(roots) == 1
42 |     root = roots[0]
43 |     assert int(root) == 0x10001010
44 | 
45 |     tails = list(cfg.get_leaf_basic_blocks())
46 |     assert len(tails) == 1
47 | 
48 |     tail = tails[0]
49 |     assert int(tail) == 0x100011E8
50 | 
51 |     assert len(list(cfg.get_predecessor_basic_blocks(tail))) == 4
52 | 
53 | 
54 | def test_bad_cfg(pma01):
55 |     with pytest.raises(ValueError):
56 |         _ = viv_utils.CFG(viv_utils.Function(pma01, 0x10002000))
57 | 


--------------------------------------------------------------------------------
/tests/test_driver.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | 
  3 | import envi.const
  4 | from fixtures import *
  5 | 
  6 | import viv_utils.emulator_drivers as vudrv
  7 | 
  8 | 
  9 | class LoggingMonitor(vudrv.Monitor):
 10 |     """log the emulated addresses"""
 11 | 
 12 |     def prehook(self, emu, op, startpc):
 13 |         print("emu: 0x%x %s" % (startpc, op))
 14 | 
 15 |     def preblock(self, emu, blockstart):
 16 |         print("emu: block: start: 0x%x" % (blockstart))
 17 | 
 18 |     def postblock(self, emu, blockstart, blockend):
 19 |         print("emu: block: 0x%x - 0x%x" % (blockstart, blockend))
 20 | 
 21 | 
 22 | class CoverageMonitor(vudrv.Monitor):
 23 |     """capture the emulated addresses"""
 24 | 
 25 |     def __init__(self, *args, **kwargs):
 26 |         super().__init__(*args, **kwargs)
 27 |         self.addresses = collections.Counter()
 28 | 
 29 |     def prehook(self, emu, op, startpc):
 30 |         self.addresses[startpc] += 1
 31 | 
 32 | 
 33 | def test_driver_monitor(pma01):
 34 |     emu = pma01.getEmulator()
 35 |     drv = vudrv.DebuggerEmulatorDriver(emu)
 36 |     cov = CoverageMonitor()
 37 |     drv.add_monitor(cov)
 38 | 
 39 |     # 10001010 B8 F8 11 00 00          mov     eax, 11F8h
 40 |     # 10001015 E8 06 02 00 00          call    __alloca_probe
 41 | 
 42 |     drv.setProgramCounter(0x10001010)
 43 |     drv.stepi()
 44 |     assert drv.getProgramCounter() == 0x10001015
 45 | 
 46 |     assert 0x10001010 in cov.addresses
 47 |     assert 0x10001015 not in cov.addresses
 48 | 
 49 | 
 50 | def test_dbg_driver_stepi(pma01):
 51 |     emu = pma01.getEmulator()
 52 |     drv = vudrv.DebuggerEmulatorDriver(emu)
 53 | 
 54 |     # .text:10001342 57                      push    edi
 55 |     # .text:10001343 56                      push    esi             ; fdwReason
 56 |     # .text:10001344 53                      push    ebx             ; hinstDLL
 57 |     # .text:10001345 E8 C6 FC FF FF          call    DllMain (0x10001010)
 58 |     # .text:1000134A 83 FE 01                cmp     esi, 1
 59 |     drv.setProgramCounter(0x10001342)
 60 |     drv.stepi()
 61 |     drv.stepi()
 62 |     drv.stepi()
 63 |     drv.stepi()
 64 |     assert drv.getProgramCounter() == 0x10001010
 65 | 
 66 | 
 67 | def test_dbg_driver_stepo(pma01):
 68 |     emu = pma01.getEmulator()
 69 |     drv = vudrv.DebuggerEmulatorDriver(emu)
 70 | 
 71 |     # .text:10001342 57                      push    edi
 72 |     # .text:10001343 56                      push    esi             ; fdwReason
 73 |     # .text:10001344 53                      push    ebx             ; hinstDLL
 74 |     # .text:10001345 E8 C6 FC FF FF          call    DllMain (0x10001010)
 75 |     # .text:1000134A 83 FE 01                cmp     esi, 1
 76 |     drv.setProgramCounter(0x10001342)
 77 |     drv.stepo()
 78 |     drv.stepo()
 79 |     drv.stepo()
 80 |     drv.stepo()
 81 |     assert drv.getProgramCounter() == 0x1000134A
 82 | 
 83 | 
 84 | class CreateMutexAHook:
 85 |     """capture the mutex names passed to CreateMutexA"""
 86 | 
 87 |     def __init__(self, *args, **kwargs):
 88 |         super().__init__(*args, **kwargs)
 89 |         self.mutexes = set()
 90 | 
 91 |     def __call__(self, emu, api, argv):
 92 |         _, _, cconv, name, _ = api
 93 | 
 94 |         if name != "kernel32.CreateMutexA":
 95 |             return
 96 | 
 97 |         mutex = emu.readString(argv[2])
 98 |         self.mutexes.add(mutex)
 99 | 
100 |         cconv = emu.getCallingConvention(cconv)
101 |         cconv.execCallReturn(emu, 0, len(argv))
102 | 
103 |         return True
104 | 
105 | 
106 | def test_driver_hook(pma01):
107 |     emu = pma01.getEmulator()
108 |     drv = vudrv.DebuggerEmulatorDriver(emu)
109 |     hk = CreateMutexAHook()
110 |     drv.add_hook(hk)
111 | 
112 |     # .text:10001067 68 38 60 02 10          push    offset Name     ; "SADFHUHF"
113 |     # .text:1000106C 50                      push    eax             ; bInitialOwner
114 |     # .text:1000106D 50                      push    eax             ; lpMutexAttributes
115 |     # .text:1000106E FF 15 08 20 00 10       call    ds:CreateMutexA
116 |     # .text:10001074 8D 4C 24 78             lea     ecx, [esp+1208h+var_1190]
117 | 
118 |     drv.setProgramCounter(0x10001067)
119 |     drv.stepi()
120 |     drv.stepi()
121 |     drv.stepi()
122 |     drv.stepi()
123 |     assert drv.getProgramCounter() == 0x10001074
124 |     assert "SADFHUHF" in hk.mutexes
125 | 
126 | 
127 | def protect_memory(imem, va, size, perms):
128 |     # see: https://github.com/vivisect/vivisect/issues/511
129 |     maps = imem._map_defs
130 |     for i in range(len(maps)):
131 |         map = maps[i]
132 |         start, end, mmap, bytez = map
133 |         mva, msize, mperms, mfilename = mmap
134 | 
135 |         if mva == va and msize == size:
136 |             maps[i] = [start, end, [mva, msize, perms, mfilename], bytez]
137 |             return
138 | 
139 |     raise KeyError("unknown memory map: 0x%x (0x%x bytes)", va, size)
140 | 
141 | 
142 | def test_driver_hook_tailjump(pma01):
143 |     # patch:
144 |     #
145 |     # .text:10001067 68 38 60 02 10          push    offset Name     ; "SADFHUHF"
146 |     # .text:1000106C 50                      push    eax             ; bInitialOwner
147 |     # .text:1000106D 50                      push    eax             ; lpMutexAttributes
148 |     # .text:1000106E FF 15 08 20 00 10       call    ds:CreateMutexA
149 |     # .text:10001074 8D 4C 24 78             lea     ecx, [esp+1208h+var_1190]
150 |     #
151 |     # to:
152 |     #
153 |     # .text:10001067 68 38 60 02 10          push    offset Name     ; "SADFHUHF"
154 |     # .text:1000106C 50                      push    eax             ; bInitialOwner
155 |     # .text:1000106D 50                      push    eax             ; lpMutexAttributes
156 |     # .text:1000106E 68 79 10 00 10          push    offset loc_10001079
157 |     # .text:10001073 FF 25 08 20 00 10       jmp     ds:CreateMutexA
158 |     # .text:10001079 ...                     ...
159 |     #
160 |     # so that we have a tail jump to `CreateMutexA` (but with the return address on the stack).
161 |     # the hook handler should pick up on this, and handle the transition to `CreateMutexA` as a call.
162 |     #
163 |     # note: we have to patch the vw, because patching emu mem doesn't work.
164 |     # the emu instance reads opcodes from the vw not emu memory.
165 |     # see: https://github.com/vivisect/vivisect/issues/512
166 |     vw = pma01
167 |     mapva, size, perms, filename = vw.getMemoryMap(0x1000106E)
168 |     protect_memory(vw, mapva, size, envi.const.MM_RWX)
169 |     vw.writeMemory(0x1000106E, bytes.fromhex("68 79 10 00 10 FF 25 08 20 00 10"))
170 |     vw.clearOpcache()
171 |     assert vw.parseOpcode(0x1000106E).mnem == "push"
172 |     assert vw.parseOpcode(0x10001073).mnem == "jmp"
173 |     protect_memory(vw, mapva, size, perms)
174 | 
175 |     emu = vw.getEmulator()
176 |     drv = vudrv.DebuggerEmulatorDriver(emu)
177 |     hk = CreateMutexAHook()
178 |     drv.add_hook(hk)
179 | 
180 |     drv.setProgramCounter(0x10001067)
181 |     drv.stepi()
182 |     drv.stepi()
183 |     drv.stepi()
184 |     drv.stepi()
185 |     assert drv.parseOpcode(drv.getProgramCounter()).mnem == "jmp"
186 |     drv.stepi()
187 |     assert drv.getProgramCounter() == 0x10001079
188 |     assert "SADFHUHF" in hk.mutexes
189 | 
190 | 
191 | def test_dbg_driver_max_insn(pma01):
192 |     emu = pma01.getEmulator()
193 |     drv = vudrv.DebuggerEmulatorDriver(emu, max_insn=1)
194 | 
195 |     # .text:10001342 57                      push    edi
196 |     # .text:10001343 56                      push    esi             ; fdwReason
197 |     # .text:10001344 53                      push    ebx             ; hinstDLL
198 |     # .text:10001345 E8 C6 FC FF FF          call    DllMain (0x10001010)
199 |     # .text:1000134A 83 FE 01                cmp     esi, 1
200 |     drv.setProgramCounter(0x10001342)
201 |     with pytest.raises(vudrv.BreakpointHit) as e:
202 |         drv.run()
203 |     assert e.value.reason == "max_insn"
204 |     assert drv.getProgramCounter() == 0x10001343
205 | 
206 | 
207 | def test_dbg_driver_bp(pma01):
208 |     emu = pma01.getEmulator()
209 |     drv = vudrv.DebuggerEmulatorDriver(emu)
210 | 
211 |     # .text:10001342 57                      push    edi
212 |     # .text:10001343 56                      push    esi             ; fdwReason
213 |     # .text:10001344 53                      push    ebx             ; hinstDLL
214 |     # .text:10001345 E8 C6 FC FF FF          call    DllMain (0x10001010)
215 |     # .text:1000134A 83 FE 01                cmp     esi, 1
216 |     drv.setProgramCounter(0x10001342)
217 |     drv.breakpoints.add(0x10001344)
218 |     with pytest.raises(vudrv.BreakpointHit) as e:
219 |         drv.run()
220 |     assert e.value.reason == "breakpoint"
221 |     assert drv.getProgramCounter() == 0x10001344
222 | 
223 | 
224 | def test_dbg_driver_until_mnem(pma01):
225 |     emu = pma01.getEmulator()
226 |     drv = vudrv.DebuggerEmulatorDriver(emu)
227 | 
228 |     # .text:10001342 57                      push    edi
229 |     # .text:10001343 56                      push    esi             ; fdwReason
230 |     # .text:10001344 53                      push    ebx             ; hinstDLL
231 |     # .text:10001345 E8 C6 FC FF FF          call    DllMain (0x10001010)
232 |     # .text:1000134A 83 FE 01                cmp     esi, 1
233 |     drv.setProgramCounter(0x10001342)
234 |     with pytest.raises(vudrv.BreakpointHit) as e:
235 |         drv.run_to_mnem(["call"])
236 |     assert e.value.reason == "mnemonic"
237 |     assert drv.getProgramCounter() == 0x10001345
238 | 
239 | 
240 | def test_dbg_driver_until_va(pma01):
241 |     emu = pma01.getEmulator()
242 |     drv = vudrv.DebuggerEmulatorDriver(emu)
243 | 
244 |     # .text:10001342 57                      push    edi
245 |     # .text:10001343 56                      push    esi             ; fdwReason
246 |     # .text:10001344 53                      push    ebx             ; hinstDLL
247 |     # .text:10001345 E8 C6 FC FF FF          call    DllMain (0x10001010)
248 |     # .text:1000134A 83 FE 01                cmp     esi, 1
249 |     drv.setProgramCounter(0x10001342)
250 |     drv.run_to_va(0x10001344)
251 |     assert drv.getProgramCounter() == 0x10001344
252 | 
253 | 
254 | def test_fc_driver(pma01):
255 |     emu = pma01.getEmulator()
256 |     vudrv.remove_default_viv_hooks(emu)
257 |     drv = vudrv.FullCoverageEmulatorDriver(emu)
258 |     cov = CoverageMonitor()
259 |     drv.add_monitor(cov)
260 | 
261 |     drv.run(0x10001010)
262 | 
263 |     # each instruction should have been emulated exactly once.
264 |     assert list(set(cov.addresses.values())) == [1]
265 | 
266 |     # there's a call to __alloca_probe,
267 |     # however, we should not have emulated into its body.
268 |     #
269 |     # .text:10001010 B8 F8 11 00 00          mov     eax, 11F8h
270 |     # .text:10001015 E8 06 02 00 00          call    __alloca_probe == 0x10001220
271 |     assert 0x10001220 not in cov.addresses
272 | 
273 |     # these are a selection of addresses from the function
274 |     # pulled from IDA manually.
275 |     for va in [
276 |         0x10001010,
277 |         0x10001033,
278 |         0x10001086,
279 |         0x100010E9,
280 |         0x100011D0,
281 |         0x100011DB,
282 |         0x100011E2,
283 |         0x100011E8,
284 |         0x100011F7,
285 |     ]:
286 |         assert va in cov.addresses
287 | 
288 | 
289 | def test_fc_driver_jmp_bb_ends(sample_038476):
290 |     emu = sample_038476.getEmulator()
291 |     vudrv.remove_default_viv_hooks(emu)
292 |     drv = vudrv.FullCoverageEmulatorDriver(emu)
293 |     cov = CoverageMonitor()
294 |     drv.add_monitor(cov)
295 | 
296 |     # at the end of basic blocks there's a jump to the next block
297 |     # don't confuse this with a tail jump / API call and emulate the entire function
298 |     # with a fauly handle_jmp, emulation would end after the first basic block
299 |     #
300 |     # example snippit:
301 |     # .text:00401842 E9 04 00 00 00                    jmp     loc_40184B
302 |     # .text:00401847                   ; ---------------------------------------------------------------------------
303 |     # .text:00401847 9B                                wait
304 |     # .text:00401848 9B                                wait
305 |     # .text:00401849 9B                                wait
306 |     # .text:0040184A 9B                                wait
307 |     # .text:0040184B
308 |     # .text:0040184B                   loc_40184B:
309 |     # .text:0040184B E9 04 00 00 00                    jmp     loc_401854
310 |     # .text:00401850                   ; ---------------------------------------------------------------------------
311 |     # .text:00401850 9B                                wait
312 |     # .text:00401851 9B                                wait
313 |     # .text:00401852 9B                                wait
314 |     # .text:00401853 9B                                wait
315 |     # .text:00401854
316 |     # .text:00401854                   loc_401854:
317 |     # .text:00401854 C7 45 E8 00 00 00+                mov     [ebp+var_18], 0
318 |     drv.run(0x401830)
319 | 
320 |     # these are a selection of random addresses from the function
321 |     # pulled from IDA manually.
322 |     for va in [
323 |         0x40184B,
324 |         0x40185B,
325 |         0x4019C2,
326 |         0x401A1D,
327 |         0x401A3C,
328 |         0x401A68,
329 |         0x401B96,
330 |         0x401C55,
331 |         0x401E79,
332 |         0x401ED2,
333 |     ]:
334 |         assert va in cov.addresses
335 | 
336 | 
337 | def test_fc_driver_rep(pma01):
338 |     class LocalMonitor(vudrv.Monitor):
339 |         """capture the value of ecx at 0x100010FA"""
340 | 
341 |         def __init__(self, *args, **kwargs):
342 |             super().__init__(*args, **kwargs)
343 |             self.ecx = -1
344 | 
345 |         def prehook(self, emu, op, startpc):
346 |             if startpc == 0x100010FA:
347 |                 self.ecx = emu.getRegisterByName("ecx")
348 | 
349 |     REPMAX = 0x70
350 |     emu = pma01.getEmulator()
351 |     vudrv.remove_default_viv_hooks(emu)
352 |     drv = vudrv.FullCoverageEmulatorDriver(emu, repmax=REPMAX)
353 |     mon = LocalMonitor()
354 |     drv.add_monitor(mon)
355 | 
356 |     drv.run(0x10001010)
357 | 
358 |     # should be strlen("hello")
359 |     # however viv doesn't correctly handle repnz with a repmax option.
360 |     # see: https://github.com/vivisect/vivisect/pull/513
361 |     #
362 |     # instead we have 0xFFFFFFFF - repmax - strlen("hello")
363 |     assert mon.ecx in (
364 |         # correct answer
365 |         len("hello"),
366 |         # buggy viv answer
367 |         0xFFFFFFFF - REPMAX + len("hello"),
368 |     )
369 | 
370 | 
371 | def test_dbg_driver_rep(pma01):
372 |     REPMAX = 0x70
373 | 
374 |     emu = pma01.getEmulator()
375 |     drv = vudrv.DebuggerEmulatorDriver(emu, repmax=REPMAX)
376 | 
377 |     # .text:100010E9 BF 20 60 02 10          mov     edi, offset aHello ; "hello"
378 |     # .text:100010EE 83 C9 FF                or      ecx, 0FFFFFFFFh
379 |     # .text:100010F1 33 C0                   xor     eax, eax
380 |     # .text:100010F3 6A 00                   push    0
381 |     # .text:100010F5 F2 AE                   repne scasb
382 |     # .text:100010F7 F7 D1                   not     ecx
383 |     # .text:100010F9 49                      dec     ecx
384 |     # .text:100010FA 51                      push    ecx
385 |     drv.setProgramCounter(0x100010E9)
386 | 
387 |     drv.stepi()
388 |     drv.stepi()
389 |     drv.stepi()
390 |     drv.stepi()
391 |     assert drv.getProgramCounter() == 0x100010F5
392 |     assert drv.getRegisterByName("edi") == 0x10026020
393 |     assert drv.readString(0x10026020) == "hello"
394 |     assert drv.getRegisterByName("eax") == 0x0
395 |     assert drv.getRegisterByName("ecx") == 0xFFFFFFFF
396 | 
397 |     drv.stepi()
398 |     # should be 0xFFFFFFFF - strlen("hello")
399 |     # however viv doesn't correctly handle repnz with a repmax option.
400 |     # see: https://github.com/vivisect/vivisect/pull/513
401 |     #
402 |     # instead we have repmax - strlen("hello")
403 |     assert drv.getRegisterByName("ecx") in (
404 |         # correct answer
405 |         0xFFFFFFFF - len("hello\x00"),
406 |         # buggy viv answer
407 |         REPMAX - len("hello\x00"),
408 |     )
409 | 
410 |     drv.stepi()
411 |     drv.stepi()
412 | 
413 |     assert drv.getRegisterByName("ecx") in (
414 |         # correct answer
415 |         len("hello"),
416 |         # buggy viv answer
417 |         0xFFFFFFFF - REPMAX + len("hello"),
418 |     )
419 | 
420 | 
421 | def test_dbg_driver_maxhit(pma01):
422 |     emu = pma01.getEmulator()
423 |     vudrv.remove_default_viv_hooks(emu)
424 |     drv = vudrv.DebuggerEmulatorDriver(emu)
425 |     cov = CoverageMonitor()
426 |     drv.add_monitor(cov)
427 | 
428 |     # .text:10001010 B8 F8 11 00 00          mov     eax, 11F8h
429 |     # .text:10001015 E8 06 02 00 00          call    __alloca_probe
430 |     # .text:1000101A 8B 84 24 00 12 00 00    mov     eax, [esp+11F8h+fdwReason]
431 |     #
432 |     # and __alloca_probe loops across pages on the stack, like:
433 |     #
434 |     # .text:1000122A 72 14                                   jb      short loc_10001240
435 |     # .text:1000122C
436 |     # .text:1000122C 81 E9 00 10 00 00                       sub     ecx, 1000h
437 |     # .text:10001232 2D 00 10 00 00                          sub     eax, 1000h
438 |     # .text:10001237 85 01                                   test    [ecx], eax
439 |     # .text:10001239 3D 00 10 00 00                          cmp     eax, 1000h
440 |     # .text:1000123E 73 EC                                   jnb     short loc_1000122C
441 |     # .text:10001240
442 |     # .text:10001240 2B C8                                   sub     ecx, eax
443 |     drv.setProgramCounter(0x10001015)
444 |     # alloca(0x2000): two probing loops
445 |     drv.setRegisterByName("eax", 0x2000)
446 |     drv.run_to_va(0x1000101A)
447 | 
448 |     # outside the loop: hit once
449 |     assert cov.addresses[0x1000122A] == 1
450 |     # inside the loop: hit twice
451 |     assert cov.addresses[0x1000122C] == 2
452 | 
453 |     drv = vudrv.DebuggerEmulatorDriver(emu, max_hit=2)
454 |     drv.setProgramCounter(0x10001015)
455 |     drv.setRegisterByName("eax", 0x2000)
456 |     drv.run_to_va(0x1000101A)
457 | 
458 |     drv = vudrv.DebuggerEmulatorDriver(emu, max_hit=1)
459 |     drv.setProgramCounter(0x10001015)
460 |     drv.setRegisterByName("eax", 0x2000)
461 |     with pytest.raises(vudrv.BreakpointHit) as e:
462 |         drv.run_to_va(0x1000101A)
463 | 
464 |     # first address in the inner loop
465 |     # which will be hit twice, and therefore, break.
466 |     assert e.value.va == 0x1000122C
467 |     assert e.value.reason == "max_hit"
468 | 


--------------------------------------------------------------------------------
/tests/test_meta.py:
--------------------------------------------------------------------------------
 1 | from fixtures import *
 2 | 
 3 | 
 4 | def test_md5(pma01):
 5 |     assert viv_utils.getVwSampleMd5(pma01) == "290934c61de9176ad682ffdd65f0a669"
 6 | 
 7 | 
 8 | def test_name(pma01):
 9 |     viv_utils.set_function_name(pma01, 0x10001010, "DllMain")
10 |     assert viv_utils.get_function_name(pma01, 0x10001010)
11 | 
12 | 
13 | def test_function(pma01):
14 |     f = viv_utils.Function(pma01, 0x10001010)
15 | 
16 |     assert f.id == "290934c61de9176ad682ffdd65f0a669:0x10001010"
17 |     assert int(f) == 0x10001010
18 | 
19 |     assert f.name is None
20 |     f.name = "DllMain"
21 |     assert f.name == "DllMain"
22 | 
23 |     assert len(list(f.basic_blocks)) == 19
24 |     assert list(sorted(map(int, f.basic_blocks))) == [
25 |         0x10001010,
26 |         0x1000102E,
27 |         0x10001067,
28 |         0x1000108C,
29 |         0x100010A3,
30 |         0x100010DD,
31 |         0x100010E9,
32 |         0x10001110,
33 |         0x10001122,
34 |         0x1000113C,
35 |         0x10001154,
36 |         0x10001161,
37 |         0x10001179,
38 |         0x100011B6,
39 |         0x100011C0,
40 |         0x100011D0,
41 |         0x100011DB,
42 |         0x100011E2,
43 |         0x100011E8,
44 |     ]
45 | 
46 |     bb = list(f.basic_blocks)[0]
47 |     assert int(bb) == 0x10001010
48 |     assert len(bb) == 0x1E
49 | 
50 |     assert len(list(bb.instructions)) == 9
51 |     insn = list(bb.instructions)[0]
52 | 
53 |     assert insn.mnem == "mov"
54 | 
55 | 
56 | def test_function_name(pma01):
57 |     assert viv_utils.getFunctionName(pma01, 0x10001398) == "msvcrt._initterm"
58 | 
59 | 
60 | def test_function_cconv(pma01):
61 |     assert viv_utils.getFunctionCallingConvention(pma01, 0x10001398) == "cdecl"
62 | 
63 | 
64 | def test_function_args(pma01):
65 |     assert len(viv_utils.getFunctionArgs(pma01, 0x10001398)) == 2
66 | 


--------------------------------------------------------------------------------
/viv_utils/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import struct
  4 | import hashlib
  5 | import logging
  6 | import tempfile
  7 | import textwrap
  8 | import importlib.metadata
  9 | from typing import Any, Dict, List, Tuple, Iterator
 10 | 
 11 | import envi
 12 | import funcy
 13 | import vivisect
 14 | import intervaltree
 15 | import vivisect.const
 16 | 
 17 | from viv_utils.types import *
 18 | from viv_utils.idaloader import loadWorkspaceFromIdb
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | SHELLCODE_BASE = 0x690000
 24 | 
 25 | 
 26 | class IncompatibleVivVersion(ValueError):
 27 |     pass
 28 | 
 29 | 
 30 | def getVwFirstMeta(vw: Workspace) -> Dict[str, Any]:
 31 |     # return the first set of metadata from the vw.
 32 |     # this is for the first loaded file.
 33 |     # if other files have been added to the vw,
 34 |     # then this may not do what you want.
 35 |     return list(vw.filemeta.values())[0]
 36 | 
 37 | 
 38 | def getVwSampleMd5(vw: Workspace) -> str:
 39 |     return getVwFirstMeta(vw)["md5sum"]
 40 | 
 41 | 
 42 | # while building and testing capa,
 43 | # we found that upstream changes to vivisect did not play well with existing serialized vivisect analysis results.
 44 | # this manifested as confusing or incorrect .viv file contents - and our tests would suddenly fail.
 45 | # so,
 46 | # we embed the installed vivisect library version in vivisect workspaces created by viv-utils.
 47 | # when we load a .viv, then we assert that the versions match.
 48 | # if they don't, emit a warning.
 49 | # ideally, we'd bail, but the vivisect distribution situation is already a mess, so let's not further touch that.
 50 | # to minimize unexpected dependencies this check is ignored if a package does not embed the vivisect version
 51 | 
 52 | 
 53 | def getVivisectLibraryVersion() -> str:
 54 |     # ref: https://stackoverflow.com/questions/710609/checking-a-python-module-version-at-runtime
 55 |     try:
 56 |         return importlib.metadata.distribution("vivisect").version
 57 |     except importlib.metadata.PackageNotFoundError:
 58 |         logger.debug("package does not include vivisect distribution")
 59 |     return "N/A"
 60 | 
 61 | 
 62 | def setVwVivisectLibraryVersion(vw: Workspace):
 63 |     vw.setMeta("version", getVivisectLibraryVersion())
 64 | 
 65 | 
 66 | def getVwVivisectLibraryVersion(vw) -> str:
 67 |     return vw.getMeta("version")
 68 | 
 69 | 
 70 | def assertVwMatchesVivisectLibrary(vw):
 71 |     wanted = getVivisectLibraryVersion()
 72 |     found = getVwVivisectLibraryVersion(vw)
 73 |     if wanted != found:
 74 |         logger.warning("vivisect version mismatch! wanted: %s, found: %s", wanted, found)
 75 |     else:
 76 |         logger.debug("vivisect version match: %s", wanted)
 77 | 
 78 | 
 79 | def loadWorkspaceFromViv(vw: Workspace, viv_file):
 80 |     if sys.version_info >= (3, 0):
 81 |         try:
 82 |             vw.loadWorkspace(viv_file)
 83 |         except UnicodeDecodeError as e:
 84 |             raise IncompatibleVivVersion(
 85 |                 "'%s' is an invalid .viv file. It may have been generated with Python 2 (incompatible with Python 3)."
 86 |                 % viv_file
 87 |             )
 88 |     else:
 89 |         vw.loadWorkspace(viv_file)
 90 | 
 91 | 
 92 | def getWorkspace(fp: str, analyze=True, reanalyze=False, verbose=False, should_save=True) -> Workspace:
 93 |     """
 94 |     For a file path return a workspace, it will create one if the extension
 95 |     is not .viv, otherwise it will load the existing one. Reanalyze will cause
 96 |     it to create and save a new one.
 97 |     """
 98 |     vw = Workspace()
 99 |     vw.verbose = verbose
100 |     # this is pretty insane, but simply prop assignment doesn't work.
101 |     vw.config.getSubConfig("viv").getSubConfig("parsers").getSubConfig("pe")["loadresources"] = True
102 |     vw.config.getSubConfig("viv").getSubConfig("parsers").getSubConfig("pe")["nx"] = True
103 |     if fp.endswith(".viv"):
104 |         loadWorkspaceFromViv(vw, fp)
105 |         assertVwMatchesVivisectLibrary(vw)
106 |         if reanalyze:
107 |             setVwVivisectLibraryVersion(vw)
108 |             vw.analyze()
109 |     else:
110 |         viv_file = fp + ".viv"
111 |         if os.path.exists(viv_file):
112 |             loadWorkspaceFromViv(vw, viv_file)
113 |             assertVwMatchesVivisectLibrary(vw)
114 |             if reanalyze:
115 |                 setVwVivisectLibraryVersion(vw)
116 |                 vw.analyze()
117 |         else:
118 |             vw.loadFromFile(fp)
119 |             setVwVivisectLibraryVersion(vw)
120 |             if analyze:
121 |                 vw.analyze()
122 | 
123 |     if should_save:
124 |         vw.saveWorkspace()
125 | 
126 |     return vw
127 | 
128 | 
129 | def set_function_name(vw, va: int, new_name: str):
130 |     # vivgui seems to override function_name with symbol names, but this is correct
131 |     ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(va)
132 |     vw.setFunctionApi(va, (ret_type, ret_name, call_conv, new_name, args))
133 | 
134 | 
135 | def get_function_name(vw, va: int) -> str:
136 |     ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(va)
137 |     return func_name
138 | 
139 | 
140 | class Function:
141 |     def __init__(self, vw: Workspace, va: int):
142 |         super(Function, self).__init__()
143 |         self.vw = vw
144 |         self.va = va
145 | 
146 |     @funcy.cached_property
147 |     def basic_blocks(self) -> List["BasicBlock"]:
148 |         bb = map(lambda b: BasicBlock(self.vw, *b), self.vw.getFunctionBlocks(self.va))
149 |         return list(sorted(bb, key=lambda b: b.va))
150 | 
151 |     @funcy.cached_property
152 |     def id(self):
153 |         return getVwFirstMeta(self.vw)["md5sum"] + ":" + hex(self.va)
154 | 
155 |     def __repr__(self):
156 |         return "Function(va: {:s})".format(hex(self.va))
157 | 
158 |     def __int__(self):
159 |         return self.va
160 | 
161 |     @property
162 |     def name(self):
163 |         return get_function_name(self.vw, self.va)
164 | 
165 |     @name.setter
166 |     def name(self, new_name):
167 |         return set_function_name(self.vw, self.va, new_name)
168 | 
169 | 
170 | class BasicBlock:
171 |     def __init__(self, vw: Workspace, va: int, size: int, fva: int):
172 |         super(BasicBlock, self).__init__()
173 |         self.vw = vw
174 |         self.va = va
175 |         self.size = size
176 |         self.fva = fva
177 | 
178 |     @funcy.cached_property
179 |     def instructions(self) -> List[envi.Opcode]:
180 |         """
181 |         from envi/__init__.py:class Opcode
182 |         391         opcode   - An architecture specific numerical value for the opcode
183 |         392         mnem     - A humon readable mnemonic for the opcode
184 |         393         prefixes - a bitmask of architecture specific instruction prefixes
185 |         394         size     - The size of the opcode in bytes
186 |         395         operands - A list of Operand objects for this opcode
187 |         396         iflags   - A list of Envi (architecture independant) instruction flags (see IF_FOO)
188 |         397         va       - The virtual address the instruction lives at (used for PC relative im mediates etc...)
189 |         """
190 |         ret = []
191 |         va = self.va
192 |         while va < self.va + self.size:
193 |             try:
194 |                 o = self.vw.parseOpcode(va)
195 |             except Exception as e:
196 |                 logger.debug("failed to disassemble: %s: %s", hex(va), e)
197 |                 break
198 |             ret.append(o)
199 |             va += len(o)
200 |         return ret
201 | 
202 |     def __repr__(self):
203 |         return "BasicBlock(va: {:s}, size: {:s}, fva: {:s})".format(hex(self.va), hex(self.size), hex(self.fva))
204 | 
205 |     def __int__(self):
206 |         return self.va
207 | 
208 |     def __len__(self):
209 |         return self.size
210 | 
211 | 
212 | def one(s):
213 |     for i in s:
214 |         return i
215 | 
216 | 
217 | class InstructionFunctionIndex:
218 |     """Index from VA to containing function VA"""
219 | 
220 |     def __init__(self, vw: Workspace):
221 |         super(InstructionFunctionIndex, self).__init__()
222 |         self.vw = vw
223 |         self._index = intervaltree.IntervalTree()
224 |         self._do_index()
225 | 
226 |     def _do_index(self):
227 |         for funcva in self.vw.getFunctions():
228 |             f = Function(self.vw, funcva)
229 |             for bb in f.basic_blocks:
230 |                 if bb.size == 0:
231 |                     continue
232 |                 self._index[bb.va : bb.va + bb.size] = funcva
233 | 
234 |     def __getitem__(self, key):
235 |         v = one(self._index[key])
236 |         if v is None:
237 |             raise KeyError()
238 |         return v.data
239 | 
240 | 
241 | def getFunctionName(vw: Workspace, fva: Address):
242 |     ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(fva)
243 |     return func_name
244 | 
245 | 
246 | def getFunctionCallingConvention(vw: Workspace, fva: Address):
247 |     ret_type, ret_name, call_conv, func_name, args = vw.getFunctionApi(fva)
248 |     return call_conv
249 | 
250 | 
251 | def getFunctionArgs(vw: Workspace, fva: Address):
252 |     return vw.getFunctionArgs(fva)
253 | 
254 | 
255 | def getShellcodeWorkspaceFromFile(
256 |     filepath: str, arch: str, base: Address = SHELLCODE_BASE, entry_point: Address = 0, analyze=True, should_save=False
257 | ) -> Workspace:
258 |     with open(filepath, "rb") as f:
259 |         sample_bytes = f.read()
260 | 
261 |     vw = getShellcodeWorkspace(
262 |         sample_bytes, arch, base=base, entry_point=entry_point, analyze=analyze, should_save=should_save
263 |     )
264 | 
265 |     vw.setMeta("StorageName", "%s.viv" % filepath)
266 | 
267 |     return vw
268 | 
269 | 
270 | def getShellcodeWorkspace(
271 |     buf: bytes,
272 |     arch: str,
273 |     base: Address = SHELLCODE_BASE,
274 |     entry_point: Address = 0,
275 |     analyze=True,
276 |     should_save=False,
277 |     save_path=None,
278 | ) -> Workspace:
279 |     """
280 |     Load shellcode into memory object and generate vivisect workspace.
281 |     Thanks to Tom for most of the code.
282 | 
283 |     Arguments:
284 |       buf: shellcode buffer bytes
285 |       arch: architecture string
286 |       base: base address where shellcode will be loaded
287 |       entry_point: entry point of shellcode, relative to base
288 |       analyze: analyze workspace or otherwise leave it to caller
289 |       should_save: save workspace to disk
290 |       save_path: path to save workspace to
291 | 
292 |     Returns: vivisect workspace
293 |     """
294 |     md5 = hashlib.md5()
295 |     md5.update(buf)
296 | 
297 |     vw = Workspace()
298 |     vw.addFile("shellcode", base, md5.hexdigest())
299 |     vw.setMeta("Architecture", arch)
300 |     vw.setMeta("Platform", "windows")
301 |     # blob gives weaker results in some cases
302 |     # so we will update this below
303 |     vw.setMeta("Format", "pe")
304 |     vw._snapInAnalysisModules()
305 | 
306 |     vw.addMemoryMap(base, envi.memory.MM_RWX, "shellcode", buf)
307 |     vw.addSegment(base, len(buf), "shellcode_0x%x" % base, "shellcode")
308 | 
309 |     vw.addEntryPoint(base + entry_point)  # defaults to start of shellcode
310 | 
311 |     if analyze:
312 |         setVwVivisectLibraryVersion(vw)
313 |         vw.analyze()
314 | 
315 |     vw.setMeta("Format", "blob")
316 | 
317 |     if should_save:
318 |         if save_path is None:
319 |             raise Exception("Failed to save workspace, destination save path cannot be empty")
320 |         vw.setMeta("StorageName", "%s.viv" % save_path)
321 |         vw.saveWorkspace()
322 | 
323 |     return vw
324 | 
325 | 
326 | def saveWorkspaceToBytes(vw: Workspace) -> bytes:
327 |     """
328 |     serialize a vivisect workspace to a Python string/bytes.
329 | 
330 |     note, this creates and deletes a temporary file on the
331 |       local filesystem.
332 |     """
333 |     orig_storage = vw.getMeta("StorageName")
334 |     try:
335 |         _, temp_path = tempfile.mkstemp(suffix="viv")
336 |         try:
337 |             vw.setMeta("StorageName", temp_path)
338 |             vw.saveWorkspace()
339 |             with open(temp_path, "rb") as f:
340 |                 # note: here's the exit point.
341 |                 return f.read()
342 |         finally:
343 |             try:
344 |                 os.rmdir(temp_path)
345 |             except Exception:
346 |                 pass
347 |     finally:
348 |         vw.setMeta("StorageName", orig_storage)
349 | 
350 | 
351 | def loadWorkspaceFromBytes(vw: Workspace, buf: bytes):
352 |     """
353 |     deserialize a vivisect workspace from a Python string/bytes.
354 |     """
355 |     _, temp_path = tempfile.mkstemp(suffix="viv")
356 |     try:
357 |         with open(temp_path, "wb") as f:
358 |             f.write(buf)
359 |         vw.loadWorkspace(temp_path)
360 |         assertVwMatchesVivisectLibrary(vw)
361 |         # note: here's the exit point.
362 |         return vw
363 |     finally:
364 |         try:
365 |             os.rmdir(temp_path)
366 |         except Exception:
367 |             pass
368 | 
369 | 
370 | def getWorkspaceFromBytes(buf: bytes, analyze=True) -> Workspace:
371 |     """
372 |     create a new vivisect workspace and load it from a
373 |       Python string/bytes.
374 |     """
375 |     vw = Workspace()
376 |     vw.verbose = True
377 |     vw.config.viv.parsers.pe.nx = True
378 |     loadWorkspaceFromBytes(vw, buf)
379 |     assertVwMatchesVivisectLibrary(vw)
380 |     if analyze:
381 |         setVwVivisectLibraryVersion(vw)
382 |         vw.analyze()
383 |     return vw
384 | 
385 | 
386 | def getWorkspaceFromFile(filepath: str, analyze=True) -> Workspace:
387 |     """
388 |     deserialize a file into a new vivisect workspace.
389 |     """
390 |     vw = Workspace()
391 |     vw.verbose = True
392 |     vw.config.viv.parsers.pe.nx = True
393 |     vw.loadFromFile(filepath)
394 |     setVwVivisectLibraryVersion(vw)
395 |     if analyze:
396 |         setVwVivisectLibraryVersion(vw)
397 |         vw.analyze()
398 |     return vw
399 | 
400 | 
401 | def get_prev_loc(vw: Workspace, va: Address):
402 |     this_item = vw.getLocation(va)
403 |     if this_item is None:
404 |         # no location at the given address,
405 |         # probe for a location directly before this one.
406 |         prev_item = vw.getLocation(va - 1)
407 |     else:
408 |         this_va, _, _, _ = this_item
409 |         prev_item = vw.getLocation(this_va - 1)
410 | 
411 |     if prev_item is None:
412 |         raise ValueError("failed to find prev location for va: %x" % va)
413 | 
414 |     return prev_item
415 | 
416 | 
417 | def get_prev_opcode(vw: Workspace, va: Address):
418 |     lva, lsize, ltype, linfo = get_prev_loc(vw, va)
419 |     if ltype != vivisect.const.LOC_OP:
420 |         raise ValueError("failed to find prev instruction for va: %x" % va)
421 | 
422 |     try:
423 |         op = vw.parseOpcode(lva)
424 |     except Exception:
425 |         raise ValueError("failed to parse prev instruction for va: %x" % va)
426 | 
427 |     return op
428 | 
429 | 
430 | def get_all_xrefs_from(vw: Workspace, va: Address):
431 |     """
432 |     get all xrefs, including fallthrough instructions, from this address.
433 | 
434 |     vivisect doesn't consider fallthroughs as xrefs.
435 |     see: https://github.com/fireeye/flare-ida/blob/7207a46c18a81ad801720ce0595a151b777ef5d8/python/flare/jayutils.py#L311
436 |     """
437 |     op = vw.parseOpcode(va)
438 |     for tova, bflags in op.getBranches():
439 |         if bflags & envi.BR_PROC:
440 |             continue
441 |         yield (va, tova, vivisect.const.REF_CODE, bflags)
442 | 
443 | 
444 | def get_all_xrefs_to(vw: Workspace, va: Address):
445 |     """
446 |     get all xrefs, including fallthrough instructions, to this address.
447 | 
448 |     vivisect doesn't consider fallthroughs as xrefs.
449 |     see: https://github.com/fireeye/flare-ida/blob/7207a46c18a81ad801720ce0595a151b777ef5d8/python/flare/jayutils.py#L311
450 |     """
451 |     for xref in vw.getXrefsTo(va):
452 |         yield xref
453 | 
454 |     try:
455 |         op = get_prev_opcode(vw, va)
456 |     except ValueError:
457 |         return
458 | 
459 |     for tova, bflags in op.getBranches():
460 |         if tova == va:
461 |             yield (op.va, va, vivisect.const.REF_CODE, bflags)
462 | 
463 | 
464 | def empty(s) -> bool:
465 |     for c in s:
466 |         return False
467 |     return True
468 | 
469 | 
470 | class CFG(object):
471 |     def __init__(self, func: Function):
472 |         self.vw = func.vw
473 |         self.func = func
474 |         self.bb_by_start = {bb.va: bb for bb in self.func.basic_blocks}
475 |         if self.func.va not in self.bb_by_start:
476 |             # particularly when dealing with junk code,
477 |             # the address that we think starts a function may not,
478 |             # such as when the given address falls in the middle of a basic block.
479 |             raise ValueError("function at 0x%x not recognized" % (self.func.va))
480 | 
481 |         self.bb_by_end = {}
482 |         for bb in self.func.basic_blocks:
483 |             try:
484 |                 lva, _, ltype, _ = get_prev_loc(self.vw, bb.va + bb.size)
485 |                 if ltype != vivisect.const.LOC_OP:
486 |                     raise RuntimeError("failed to find prev instruction for va: %x" % (bb.va + bb.size))
487 |                 self.bb_by_end[lva] = bb
488 |             except RuntimeError as e:
489 |                 # viv detects "function blocks" that we interpret as "basic blocks".
490 |                 # viv may have incorrect analysis, such that a block may not be made up of contiguous instructions.
491 |                 # if we can't find an instruction at the end of a basic block,
492 |                 # we're dealing with junk. don't index that BB.
493 |                 continue
494 | 
495 |         if len(self.bb_by_start) != len(self.bb_by_end):
496 |             # there's probably junk code encountered
497 |             logger.warning("cfg: incomplete control flow graph")
498 | 
499 |         self._succ_cache: Dict[Address, List[BasicBlock]] = {}
500 |         self._pred_cache: Dict[Address, List[BasicBlock]] = {}
501 | 
502 |     def get_successor_basic_blocks(self, bb: BasicBlock) -> Iterator[BasicBlock]:
503 |         if bb.va in self._succ_cache:
504 |             for nbb in self._succ_cache[bb.va]:
505 |                 yield nbb
506 |             return
507 | 
508 |         next_va = bb.va + bb.size
509 |         try:
510 |             op = get_prev_opcode(self.vw, next_va)
511 |         except RuntimeError:
512 |             # like above, if there's not an insn at the end of the BB,
513 |             # we're dealing with junk, and there's not much point.
514 |             self._succ_cache[bb.va] = []
515 |             return
516 | 
517 |         successors = []
518 |         for xref in get_all_xrefs_from(self.vw, op.va):
519 |             try:
520 |                 succ = self.bb_by_start[xref[vivisect.const.XR_TO]]
521 |                 yield succ
522 |                 successors.append(succ)
523 |             except KeyError:
524 |                 # if we have a jump to the import table,
525 |                 # the target of the jump is not a basic block in the function.
526 |                 continue
527 | 
528 |         self._succ_cache[bb.va] = successors
529 | 
530 |     def get_predecessor_basic_blocks(self, bb: BasicBlock) -> Iterator[BasicBlock]:
531 |         if bb.va in self._pred_cache:
532 |             for nbb in self._pred_cache[bb.va]:
533 |                 yield nbb
534 |             return
535 | 
536 |         predecessors = []
537 |         for xref in get_all_xrefs_to(self.vw, bb.va):
538 |             try:
539 |                 pred = self.bb_by_end[xref[vivisect.const.XR_FROM]]
540 |                 yield pred
541 |                 predecessors.append(pred)
542 |             except KeyError:
543 |                 continue
544 | 
545 |         self._pred_cache[bb.va] = predecessors
546 | 
547 |     def get_root_basic_blocks(self) -> Iterator[BasicBlock]:
548 |         for bb in self.func.basic_blocks:
549 |             if empty(self.get_predecessor_basic_blocks(bb)):
550 |                 yield bb
551 | 
552 |     def get_leaf_basic_blocks(self) -> Iterator[BasicBlock]:
553 |         for bb in self.func.basic_blocks:
554 |             if empty(self.get_successor_basic_blocks(bb)):
555 |                 yield bb
556 | 
557 | 
558 | def get_strings(vw: Workspace) -> Iterator[Tuple[Address, str]]:
559 |     """
560 |     enumerate the strings in the given vivisect workspace.
561 | 
562 |     Args:
563 |       vw (vivisect.Workspace): the workspace.
564 | 
565 |     Yields:
566 |       Tuple[int, str]: the address, string pair.
567 |     """
568 |     for loc in vw.getLocations(ltype=vivisect.const.LOC_STRING):
569 |         va = loc[vivisect.const.L_VA]
570 |         size = loc[vivisect.const.L_SIZE]
571 |         yield va, vw.readMemory(va, size).decode("ascii")
572 | 
573 |     for loc in vw.getLocations(ltype=vivisect.const.LOC_UNI):
574 |         va = loc[vivisect.const.L_VA]
575 |         size = loc[vivisect.const.L_SIZE]
576 |         try:
577 |             yield va, vw.readMemory(va, size).decode("utf-16le")
578 |         except UnicodeDecodeError:
579 |             continue
580 | 
581 | 
582 | def is_valid_address(vw: Workspace, va: Address) -> bool:
583 |     """
584 |     test if the given address is valid in the given vivisect workspace.
585 | 
586 |     Args:
587 |       vw (vivisect.Workspace): the workspace.
588 |       va (int): a possible memory address.
589 | 
590 |     Returns:
591 |       bool: True if the given address is valid in the given workspace.
592 |     """
593 |     return vw.probeMemory(va, 1, envi.memory.MM_READ)
594 | 
595 | 
596 | def get_function_constants(vw: Workspace, fva: Address) -> Iterator[int]:
597 |     """
598 |     enumerate the immediate constants referenced by instructions in the given function.
599 |     does not yield valid addresses in the given workspace.
600 | 
601 |     Args:
602 |       vw (vivisect.Workspace): the workspace.
603 |       fva (int): the address of a function in the workspace.
604 | 
605 |     Yields:
606 |       int: immediate constant referenced by an instruction.
607 |     """
608 |     f = Function(vw, fva)
609 |     for bb in f.basic_blocks:
610 |         for i in bb.instructions:
611 |             for o in i.getOperands():
612 |                 if not o.isImmed():
613 |                     continue
614 | 
615 |                 c = o.getOperValue(i)
616 |                 if is_valid_address(vw, c):
617 |                     continue
618 | 
619 |                 yield c
620 | 
621 | 
622 | def get_section_data(pe, section) -> bytes:
623 |     """
624 |     fetch the raw data of the given section.
625 | 
626 |     Args:
627 |       pe (PE.PE): the parsed PE file.
628 |       section (vstruct.VStruct): pe.IMAGE_SECTION_HEADER instance.
629 | 
630 |     Returns:
631 |       bytes: the raw bytes of the section.
632 |     """
633 |     return pe.readAtOffset(section.PointerToRawData, section.SizeOfRawData)
634 | 
635 | 
636 | class Debugger(object):
637 |     REGISTERS = {
638 |         "eax",
639 |         "ebx",
640 |         "ecx",
641 |         "edx",
642 |         "esi",
643 |         "edi",
644 |         "esp",
645 |         "ebp",
646 |         "eip",
647 |     }
648 | 
649 |     def __init__(self, v):
650 |         super(Debugger, self).__init__()
651 |         self.v = v
652 | 
653 |     def __getattr__(self, k):
654 |         """
655 |         support reg access shortcut, like::
656 |             print(hex(dbg.pc))
657 |             print(hex(dbg.rax))
658 |         register names are lowercase.
659 |         `pc` is a shortcut for the platform program counter.
660 |         """
661 |         if k == "v":
662 |             return super(object, self).__getattr__(k)
663 |         elif k == "pc" or k == "program_counter":
664 |             return self.v.getTrace().getRegisterByName("eip")
665 |         elif k == "stack_pointer":
666 |             return self.v.getTrace().getRegisterByName("esp")
667 |         elif k == "base_pointer":
668 |             return self.v.getTrace().getRegisterByName("ebp")
669 |         elif k in self.REGISTERS:
670 |             return self.v.getTrace().getRegisterByName(k)
671 |         else:
672 |             return self.v.__getattribute__(k)
673 | 
674 |     def __setattr__(self, k, v):
675 |         """
676 |         set reg shortcut, like::
677 |             dbg.pc  = 0x401000
678 |             dbg.rax = 0xAABBCCDD
679 |         register names are lowercase.
680 |         `pc` is a shortcut for the platform program counter.
681 |         """
682 |         if k == "v":
683 |             object.__setattr__(self, k, v)
684 |         elif k == "pc" or k == "program_counter":
685 |             return self.v.getTrace().setRegisterByName("eip", v)
686 |         elif k == "stack_pointer":
687 |             return self.v.getTrace().setRegisterByName("esp", v)
688 |         elif k == "base_pointer":
689 |             return self.v.getTrace().setRegisterByName("ebp", v)
690 |         elif k in self.REGISTERS:
691 |             return self.v.getTrace().setRegisterByName(k, v)
692 |         else:
693 |             return self.v.__setattribute__(k, v)
694 | 
695 |     def write_memory(self, va: Address, buf: bytes):
696 |         self.v.memobj.writeMemory(va, buf)
697 | 
698 |     def read_memory(self, va: Address, size: int):
699 |         return self.v.trace.readMemory(va, size)
700 | 
701 |     def read_dword(self, va: Address) -> int:
702 |         return struct.unpack("<I", self.read_memory(va, 4))[0]
703 | 
704 |     def write_dword(self, va: Address, v: int):
705 |         self.write_memory(va, struct.pack("<I", v))
706 | 
707 |     def read_ascii(self, va: Address) -> str:
708 |         buf = self.read_memory(va, 1024)
709 |         return buf.partition(b"\x00")[0].decode("ascii")
710 | 
711 |     def pop(self) -> int:
712 |         v = self.read_dword(self.esp)  # type: ignore
713 |         self.esp = self.esp + 4  # type: ignore
714 |         return v
715 | 
716 |     def push(self, v: int):
717 |         self.esp = self.esp - 4
718 |         self.write_dword(self.esp, v)
719 | 
720 | 
721 | def readMemoryCurrentSection(vw: Workspace, va: Address, size: int) -> bytes:
722 |     """
723 |     only read memory up to current section end
724 |     """
725 |     mva, msize, mperms, mfname = vw.getMemoryMap(va)
726 |     offset = va - mva
727 |     maxreadlen = msize - offset
728 |     if size > maxreadlen:
729 |         size = maxreadlen
730 |     return vw.readMemory(va, size)
731 | 
732 | 
733 | class hexdump:
734 |     # via: https://gist.github.com/NeatMonster/c06c61ba4114a2b31418a364341c26c0
735 |     def __init__(self, buf, off=0):
736 |         self.buf = buf
737 |         self.off = off
738 | 
739 |     def __iter__(self):
740 |         last_bs, last_line = None, None
741 |         for i in range(0, len(self.buf), 16):
742 |             bs = bytearray(self.buf[i : i + 16])
743 |             line = "{:08x}  {:23}  {:23}  |{:16}|".format(
744 |                 self.off + i,
745 |                 " ".join(("{:02x}".format(x) for x in bs[:8])),
746 |                 " ".join(("{:02x}".format(x) for x in bs[8:])),
747 |                 "".join((chr(x) if 32 <= x < 127 else "." for x in bs)),
748 |             )
749 |             if bs == last_bs:
750 |                 line = "*"
751 |             if bs != last_bs or line != last_line:
752 |                 yield line
753 |             last_bs, last_line = bs, line
754 |         yield "{:08x}".format(self.off + len(self.buf))
755 | 
756 |     def __str__(self):
757 |         return "\n".join(self)
758 | 
759 |     def __repr__(self):
760 |         return "\n".join(self)
761 | 
762 | 
763 | def dump_emu_state(emu):
764 |     print(
765 |         textwrap.dedent(
766 |             f"""
767 |       eip: {emu.getRegisterByName('eip'):#08x}
768 |       eax: {emu.getRegisterByName('eax'):#08x}
769 |       ebx: {emu.getRegisterByName('ebx'):#08x}
770 |       ecx: {emu.getRegisterByName('ecx'):#08x}
771 |       edx: {emu.getRegisterByName('edx'):#08x}
772 |       esi: {emu.getRegisterByName('esi'):#08x}
773 |       edi: {emu.getRegisterByName('edi'):#08x}
774 |       esp: {emu.getRegisterByName('esp'):#08x}
775 |       ebp: {emu.getRegisterByName('ebp'):#08x}
776 |     """
777 |         )
778 |     )
779 | 
780 |     print("memory segments:")
781 |     for va, size, flags, name in emu.getMemoryMaps():
782 |         print(f"     {va:#08x}-{va + size:#08x} {flags}")
783 |     print()
784 | 
785 |     # print a hex dump of everything between
786 |     # esp and ebp
787 |     esp = emu.getRegisterByName("esp")
788 |     ebp = emu.getRegisterByName("ebp")
789 |     size = ebp - esp
790 |     stack = emu.readMemory(esp, size)
791 | 
792 |     print("stack:")
793 | 
794 |     for line in hexdump(stack, esp):
795 |         print("     " + line)
796 | 


--------------------------------------------------------------------------------
/viv_utils/emulator_drivers.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import logging
  3 | import collections
  4 | from typing import List, Callable, Optional
  5 | 
  6 | import envi as v_envi
  7 | import envi.exc
  8 | import vivisect
  9 | import envi.memory as v_mem
 10 | import vivisect.const
 11 | import envi.archs.i386.disasm
 12 | 
 13 | from viv_utils.types import *
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class StopEmulation(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | class BreakpointHit(Exception):
 23 |     def __init__(self, va: int, reason=None):
 24 |         self.va = va
 25 |         self.reason = reason
 26 | 
 27 | 
 28 | # a hook overrides an API encountered by an emulator.
 29 | #
 30 | # returning True indicates the hook handled the function.
 31 | # this should include returning from the function and cleaning up the stack, if appropriate.
 32 | # a hook can also raise `StopEmulation` to ...stop the emulator.
 33 | #
 34 | # hooks can fetch the current $PC, registers, mem, etc. via the provided emulator parameter.
 35 | #
 36 | # a hook is a callable, such as a function or class with `__call__`,
 37 | # if the hook is "stateless", use a simple function (note that the
 38 | # hook API and vivisect's imphook API differ slightly):
 39 | #
 40 | #     hook_OutputDebugString(emu, api, argv):
 41 | #         _, _, cconv, name, _ = api
 42 | #         if name != "kernel32.OutputDebugString": return False
 43 | #         logger.debug("OutputDebugString: %s", emu.readString(argv[0]))
 44 | #         cconv = emu.getCallingConvention(cconv)
 45 | #         cconv.execCallReturn(emu, 0, len(argv))
 46 | #         return True
 47 | #
 48 | # if the hook is "stateful", such as a hook that records arguments, use a class:
 49 | #
 50 | #     class CreateFileAHook:
 51 | #         def __init__(self):
 52 | #             self.paths = set()
 53 | #
 54 | #         def __call__(self, emu, api, argv):
 55 | #             _, _, cconv, name, _ = api
 56 | #             if name != "kernel32.CreateFileA": return False
 57 | #             self.paths.add(emu.readString(argv[0]))
 58 | #             cconv = emu.getCallingConvention(cconv)
 59 | #             cconv.execCallReturn(emu, 0, len(argv))
 60 | #             return True
 61 | #
 62 | Hook = Callable[[Emulator, API, List[int]], bool]
 63 | 
 64 | 
 65 | class Monitor(vivisect.impemu.monitor.EmulationMonitor):
 66 |     def prehook(self, emu, op, startpc):
 67 |         pass
 68 | 
 69 |     def posthook(self, emu, op, endpc):
 70 |         pass
 71 | 
 72 |     def preblock(self, emu, blockstart):
 73 |         # called when entering a newly recognized basic block.
 74 |         # the block analysis here is not guaranteed to be perfect,
 75 |         # but should work fairly well during FullCoverage emulation.
 76 |         pass
 77 | 
 78 |     def postblock(self, emu, blockstart, blockend):
 79 |         # called when entering a leaving recognized basic block.
 80 |         pass
 81 | 
 82 |     def apicall(self, emu, api, argv):
 83 |         # returning True signals that the API call was handled.
 84 |         return False
 85 | 
 86 |     def logAnomaly(self, emu, pc, e):
 87 |         logger.warning("monitor: anomaly: %s", e)
 88 | 
 89 | 
 90 | class UntilVAMonitor(Monitor):
 91 |     def __init__(self, va: int):
 92 |         super().__init__()
 93 |         self.va = va
 94 | 
 95 |     def prehook(self, emu, op, pc):
 96 |         if pc == self.va:
 97 |             raise BreakpointHit(pc, reason="va")
 98 | 
 99 | 
100 | class EmuHelperMixin:
101 |     def readString(self, va, maxlength=0x100):
102 |         """naively read ascii string"""
103 |         return self.readMemory(va, maxlength).partition(b"\x00")[0].decode("ascii")
104 | 
105 |     def getStackValue(self, offset):
106 |         return self.readMemoryFormat(self._emu.getStackCounter() + offset, "<P")[0]
107 | 
108 |     def readStackMemory(self, offset, length):
109 |         return self.readMemory(self._emu.getStackCounter() + offset, length)
110 | 
111 |     def readStackString(self, offset, maxlength=0x1000):
112 |         """naively read ascii string"""
113 |         return self.readMemory(self._emu.getStackCounter() + offset, maxlength).partition(b"\x00")[0].decode("ascii")
114 | 
115 | 
116 | class EmulatorDriver(EmuHelperMixin):
117 |     """
118 |     this is a superclass for strategies for controlling viv emulator instances.
119 | 
120 |     you can also treat it as an emulator instance, e.g.:
121 | 
122 |         emu = vw.getEmulator()
123 |         drv = EmulatorDriver(emu)
124 |         drv.getProgramCounter()
125 | 
126 |     note it also inherits from EmuHelperMixin, so there are convenience routines:
127 | 
128 |         emu = vw.getEmulator()
129 |         drv = EmulatorDriver(emu)
130 |         drv.readString(0x401000)
131 |     """
132 | 
133 |     def __init__(self, emu):
134 |         super(EmulatorDriver, self).__init__()
135 |         self._emu = emu
136 |         self._monitors = set([])
137 |         self._hooks = set([])
138 | 
139 |     def __getattr__(self, name):
140 |         # look just like an emulator
141 |         return getattr(self._emu, name)
142 | 
143 |     def add_monitor(self, mon):
144 |         """
145 |         monitors are collections of callbacks that are invoked at various places:
146 | 
147 |           - pre instruction emulation
148 |           - post instruction emulation
149 |           - during API call
150 | 
151 |         see the `Monitor` superclass.
152 | 
153 |         install monitors using this routine `add_monitor`.
154 |         there can be multiple monitors added.
155 |         """
156 |         self._monitors.add(mon)
157 | 
158 |     def remove_monitor(self, mon):
159 |         self._monitors.remove(mon)
160 | 
161 |     def add_hook(self, hook):
162 |         """
163 |         hooks are functions that can override APIs encountered during emulation.
164 |         see the `Hook` superclass.
165 | 
166 |         there can be multiple hooks added, even for the same API.
167 |         hooks are invoked in the order that they were added.
168 |         """
169 |         self._hooks.add(hook)
170 | 
171 |     def remove_hook(self, hook):
172 |         self._hooks.remove(hook)
173 | 
174 |     @staticmethod
175 |     def is_call(op):
176 |         return bool(op.iflags & v_envi.IF_CALL)
177 | 
178 |     @staticmethod
179 |     def is_jmp(op):
180 |         return op.mnem == "jmp"
181 | 
182 |     @staticmethod
183 |     def is_ret(op):
184 |         return bool(op.iflags & v_envi.IF_RET)
185 | 
186 |     def is_function_or_tainted(self, va):
187 |         emu = self._emu
188 |         return emu.vw.isFunction(va) or emu.getVivTaint(va)
189 | 
190 |     def get_calling_convention(self, convname: Optional[str]):
191 |         if convname:
192 |             return self._emu.getCallingConvention(convname)
193 |         else:
194 |             return self._emu.getCallingConvention("stdcall")
195 | 
196 |     def _handle_hook(self):
197 |         """
198 |         return True if a hook handled the call, False otherwise.
199 |         if hook handled, then pc will be back at the call site,
200 |         otherwise, pc remains where it was.
201 |         """
202 |         emu = self._emu
203 |         pc = emu.getProgramCounter()
204 | 
205 |         api = emu.getCallApi(pc)
206 |         _, _, convname, callname, funcargs = api
207 | 
208 |         callconv = self.get_calling_convention(convname)
209 | 
210 |         argv = []
211 |         if callconv:
212 |             argv = callconv.getCallArgs(emu, len(funcargs))
213 | 
214 |         # attempt to invoke hooks to handle function calls.
215 |         # priority:
216 |         #   - monitor.apicall handler
217 |         #   - driver.hooks
218 |         #   - emu.hooks (default vivisect hooks)
219 | 
220 |         for mon in self._monitors:
221 |             try:
222 |                 r = mon.apicall(self, api, argv)
223 |             except StopEmulation:
224 |                 raise
225 |             except Exception as e:
226 |                 logger.debug("driver: %s.apicall failed: %s", mon.__class__.__name__, e)
227 |                 continue
228 |             else:
229 |                 if r:
230 |                     # note: short circuit
231 |                     logger.debug("driver: %s.apicall: handled call: %s", mon.__class__.__name__, callname)
232 |                     return True
233 | 
234 |         for hook in self._hooks:
235 |             try:
236 |                 ret = hook(self, api, argv)
237 |             except StopEmulation:
238 |                 raise
239 |             except Exception as e:
240 |                 logger.debug("driver: hook: %r failed: %s", hook, e)
241 |                 continue
242 |             else:
243 |                 if ret:
244 |                     # note: short circuit
245 |                     logger.debug("driver: hook handled call: %s", callname)
246 |                     return True
247 | 
248 |         if callname in emu.hooks:
249 |             # this is where vivisect-internal hooks are stored,
250 |             # such as those provided by impapi.
251 |             # note that we prefer locally configured hooks, first.
252 |             hook = emu.hooks.get(callname)
253 |             try:
254 |                 # the vivisect imphook API differs from the viv-utils hooks
255 |                 hook(self, callconv, api, argv)
256 |             except StopEmulation:
257 |                 raise
258 |             except Exception as e:
259 |                 logger.debug("driver: emu.hook.%s failed: %s", callname, e)
260 |             else:
261 |                 # note: short circuit
262 |                 logger.debug("driver: emu hook handled call: %s", callname)
263 |                 return True
264 | 
265 |         if callname and callname not in ("UnknownApi", "?"):
266 |             logger.debug("driver: API call NOT hooked: %s", callname)
267 | 
268 |         return False
269 | 
270 |     def handle_call(self, op, avoid_calls=False):
271 |         """
272 |         emulate a call instruction (pc should be at a the call instruction).
273 |         if the target is hooked, do the hook instead of executing it.
274 | 
275 |         pending `avoid_calls`, try to step into or over the function.
276 | 
277 |         general algorithm:
278 | 
279 |             check to see if the function is hooked.
280 |             if its hooked, do the hook, and pc goes to next instruction after the call.
281 |             else,
282 |                 if avoid_calls is false, step into the call, and pc is at first instruction of function.
283 |                 if avoid_calls is true, step over the call, as best as possible.
284 |                 this means attempting to clean up the stack if its a cdecl call.
285 |                 also returning 0.
286 | 
287 |         return True if stepped into the function, False if the function is completely handled.
288 |         """
289 |         emu = self._emu
290 | 
291 |         pc = emu.getProgramCounter()
292 |         emu.executeOpcode(op)
293 |         target = emu.getProgramCounter()
294 | 
295 |         if self._handle_hook():
296 |             # some hook handled the call,
297 |             # so make sure PC is at the next instruction
298 |             # this may fail during emulation, e.g. if the stack gets corrupted during emulation or by a function hook
299 |             if emu.getProgramCounter() != pc + len(op):
300 |                 logger.warning(
301 |                     "hook failed to restore PC correctly after call, from: 0x%x, expected: 0x%x, found: 0x%x",
302 |                     pc,
303 |                     pc + len(op),
304 |                     emu.getProgramCounter(),
305 |                 )
306 |                 # pc is undefined (emulation error)
307 |                 raise StopEmulation
308 | 
309 |             # hook handled it
310 |             # pc is at instruction after call
311 |             return False
312 | 
313 |         elif avoid_calls or emu.getVivTaint(target) or not emu.probeMemory(target, 0x1, v_mem.MM_EXEC):
314 |             # either:
315 |             #  - we don't to emulate into functions, or
316 |             #  - the target is unavailable/unresolved
317 |             #  - the target is not executable
318 |             #
319 |             # jump over the call instruction.
320 |             #
321 |             # attempt to clean up stack, as necessary.
322 |             # assume return value is 0
323 |             _, _, convname, _, funcargs = emu.getCallApi(target)
324 |             callconv = self.get_calling_convention(convname)
325 |             # this will jump to the return address from the stack.
326 |             callconv.execCallReturn(emu, 0, len(funcargs))
327 |             if emu.getProgramCounter() != pc + len(op):
328 |                 logger.warning(
329 |                     "hook failed to restore PC correctly after call, from: 0x%x, expected: 0x%x, found: 0x%x",
330 |                     pc,
331 |                     pc + len(op),
332 |                     emu.getProgramCounter(),
333 |                 )
334 |                 # pc is undefined (emulation error)
335 |                 raise StopEmulation
336 | 
337 |             # pc is at instruction after call
338 |             return False
339 | 
340 |         else:
341 |             # we want to emulate into the function,
342 |             # and its available and executable.
343 | 
344 |             # pc is at first instruction in the call.
345 |             return True
346 | 
347 |     def handle_jmp(self, op, avoid_calls=False):
348 |         """
349 |         emulate a jmp instruction.
350 | 
351 |         most of the time, this is to implement loops and such.
352 |         however, occasionally we may encounter a "tail call";
353 |         that is, a jmp to the start of a function.
354 |         in these cases, we want to treat the transition like a call.
355 | 
356 |         this function is like `handle_call` when the target of the
357 |          jump is the start of a recognized function/API.
358 | 
359 |         returns True when the emulator followed the jmp and is now at the target.
360 |         returns False when the emulator handled a tail call and is now after the call.
361 |         """
362 |         emu = self._emu
363 | 
364 |         pc = emu.getProgramCounter()
365 | 
366 |         # if the target address has an associated API
367 |         # then this was a tail call (jump to function entry).
368 |         # otherwise, its just a normal jmp and our handling is done.
369 |         # careful to raise the segmentation violation for normal jmps.
370 |         try:
371 |             emu.executeOpcode(op)
372 |         except envi.exc.SegmentationViolation as e:
373 |             target = e.va
374 |             if not self.is_function_or_tainted(target):
375 |                 # normal jump, but to invalid location
376 |                 # let caller handle the exception
377 |                 raise
378 |         else:
379 |             target = emu.getProgramCounter()
380 |             # before this we verified that emu.getCallApi() returns a value, however
381 |             # this returns a default API tuple for most addresses
382 |             if not self.is_function_or_tainted(target):
383 |                 # normal jump, to valid location.
384 |                 # emulation is complete.
385 |                 # pc is at the destination of the jump.
386 |                 return True
387 | 
388 |         # if we reach here, we're in a tail call,
389 |         # because the target address is a function or tainted.
390 | 
391 |         if self._handle_hook():
392 |             if emu.getProgramCounter() != pc + len(op):
393 |                 logger.warning(
394 |                     "hook failed to restore PC correctly after call, from: 0x%x, expected: 0x%x, found: 0x%x",
395 |                     pc,
396 |                     pc + len(op),
397 |                     emu.getProgramCounter(),
398 |                 )
399 |                 # pc is undefined (emulation error)
400 |                 raise StopEmulation
401 | 
402 |             # some hook handled the tail call,
403 |             # pc is at call's return address.
404 |             return False
405 | 
406 |         elif avoid_calls or emu.getVivTaint(target) or not emu.probeMemory(target, 0x1, v_mem.MM_EXEC):
407 |             # either:
408 |             #  - we don't to emulate into functions, or
409 |             #  - the target is unavailable/unresolved
410 |             #  - the target is not executable
411 |             #
412 |             # jump over the call instruction.
413 |             #
414 |             # attempt to clean up stack, as necessary.
415 |             # assume return value is 0
416 |             _, _, convname, _, funcargs = emu.getCallApi(target)
417 |             callconv = self.get_calling_convention(convname)
418 |             # this will jump to the return address from the stack.
419 |             callconv.execCallReturn(emu, 0, len(funcargs))
420 | 
421 |             # pc is at the return address.
422 |             return False
423 | 
424 |         else:
425 |             # we want to emulate into the function,
426 |             # and its available and executable.
427 | 
428 |             # pc is at first instruction in the function.
429 |             return True
430 | 
431 | 
432 | class DebuggerEmulatorDriver(EmulatorDriver):
433 |     """
434 |     this is a EmulatorDriver that supports debugger-like operations,
435 |       such as stepi, stepo, call, etc.
436 |     these operations are implemented as monitors, and serve as good examples.
437 | 
438 |     it also supports "breakpoints": a set of addresses such that,
439 |      when encountering the address, a `BreakpointHit` exception is raised.
440 |     """
441 | 
442 |     class MaxInsnMonitor(Monitor):
443 |         def __init__(self, max_insn):
444 |             super().__init__()
445 |             self.max_insn = max_insn
446 |             self.counter = 0
447 | 
448 |         def prehook(self, emu, op, pc):
449 |             if self.counter >= self.max_insn:
450 |                 raise BreakpointHit(pc, reason="max_insn")
451 | 
452 |             self.counter += 1
453 | 
454 |         def reset(self):
455 |             self.counter = 0
456 | 
457 |     class MaxHitMonitor(Monitor):
458 |         def __init__(self, max_hit):
459 |             super().__init__()
460 |             self.max_hit = max_hit
461 |             self.counter = collections.Counter()
462 | 
463 |         def prehook(self, emu, op, pc):
464 |             if self.counter.get(pc, 0) >= self.max_hit:
465 |                 raise BreakpointHit(pc, reason="max_hit")
466 | 
467 |             self.counter[pc] += 1
468 | 
469 |         def reset(self):
470 |             self.counter = collections.Counter()
471 | 
472 |     class BreakpointMonitor(Monitor):
473 |         def __init__(self):
474 |             super().__init__()
475 |             self.breakpoints = set()
476 | 
477 |         def prehook(self, emu, op, pc):
478 |             if pc in self.breakpoints:
479 |                 raise BreakpointHit(pc, reason="breakpoint")
480 | 
481 |     def __init__(self, *args, repmax=None, max_insn=None, max_hit=None, **kwargs):
482 |         super().__init__(*args, **kwargs)
483 |         if repmax is not None:
484 |             self.setEmuOpt("i386:repmax", repmax)
485 | 
486 |         self.max_insn_mon = self.MaxInsnMonitor(max_insn or sys.maxsize)
487 |         self.max_hit_mon = self.MaxHitMonitor(max_hit or sys.maxsize)
488 |         self.bp_mon = self.BreakpointMonitor()
489 | 
490 |         self.add_monitor(self.max_insn_mon)
491 |         self.add_monitor(self.max_hit_mon)
492 |         self.add_monitor(self.bp_mon)
493 | 
494 |         # this is a public member.
495 |         # add and remove breakpoints by manipulating this set.
496 |         #
497 |         # implementation: note that we're sharing the set() instance here.
498 |         self.breakpoints = self.bp_mon.breakpoints
499 | 
500 |     def step(self, avoid_calls):
501 |         emu = self._emu
502 | 
503 |         startpc = emu.getProgramCounter()
504 |         op = emu.parseOpcode(startpc)
505 | 
506 |         for mon in self._monitors:
507 |             mon.prehook(emu, op, startpc)
508 | 
509 |         if self.is_call(op):
510 |             self.handle_call(op, avoid_calls=avoid_calls)
511 |         elif self.is_jmp(op):
512 |             self.handle_jmp(op, avoid_calls=avoid_calls)
513 |         else:
514 |             emu.executeOpcode(op)
515 | 
516 |         endpc = emu.getProgramCounter()
517 | 
518 |         for mon in self._monitors:
519 |             mon.posthook(emu, op, endpc)
520 | 
521 |     def stepo(self):
522 |         return self.step(True)
523 | 
524 |     def stepi(self):
525 |         return self.step(False)
526 | 
527 |     def run(self):
528 |         """
529 |         stepi until breakpoint is hit or max_instruction_count reached.
530 |         raises the exception in either case.
531 |         """
532 |         self.max_hit_mon.reset()
533 |         self.max_insn_mon.reset()
534 | 
535 |         while True:
536 |             self.stepi()
537 | 
538 |     class UntilMnemonicMonitor(Monitor):
539 |         def __init__(self, mnems: List[str]):
540 |             super().__init__()
541 |             self.mnems = mnems
542 | 
543 |         def prehook(self, emu, op, pc):
544 |             if op.mnem in self.mnems:
545 |                 raise BreakpointHit(pc, reason="mnemonic")
546 | 
547 |     def run_to_mnem(self, mnems: List[str]):
548 |         """
549 |         stepi until:
550 |           - breakpoint is hit, or
551 |           - max_instruction_count reached, or
552 |           - given mnemonic reached (but not executed).
553 |         raises the exception in any case.
554 |         """
555 |         mon = self.UntilMnemonicMonitor(mnems)
556 |         self.add_monitor(mon)
557 | 
558 |         try:
559 |             self.run()
560 |         finally:
561 |             self.remove_monitor(mon)
562 | 
563 |     def run_to_va(self, va: int):
564 |         """
565 |         stepi until:
566 |           - breakpoint is hit, or
567 |           - max_instruction_count reached, or
568 |           - given address reached (but not executed).
569 |         raises the exception in any case.
570 |         """
571 |         mon = UntilVAMonitor(va)
572 |         self.add_monitor(mon)
573 | 
574 |         try:
575 |             self.run()
576 |         except BreakpointHit as e:
577 |             if e.va != va:
578 |                 raise
579 |         finally:
580 |             self.remove_monitor(mon)
581 | 
582 | 
583 | class FullCoverageEmulatorDriver(EmulatorDriver):
584 |     """
585 |     an emulator that attempts to explore all code paths from a given entry.
586 |     that is, it explores all branches encountered (though it doesn't follow calls).
587 |     it should emulate each instruction once (unless REP prefix, and limited to repmax iterations).
588 | 
589 |     use a monitor to receive callbacks describing the found instructions and blocks.
590 |     """
591 | 
592 |     def __init__(self, *args, repmax=None, **kwargs):
593 |         super().__init__(*args, **kwargs)
594 |         if repmax is not None:
595 |             self.setEmuOpt("i386:repmax", repmax)
596 | 
597 |     def is_table(self, op, xrefs):
598 |         if not self.vw.getLocation(op.va):
599 |             return False
600 |         if not xrefs:
601 |             return False
602 | 
603 |         for bto, bflags in op.getBranches(emu=None):
604 |             if bflags & envi.BR_TABLE:
605 |                 return True
606 | 
607 |         return False
608 | 
609 |     @staticmethod
610 |     def is_conditional(op):
611 |         if not (op.iflags & envi.IF_BRANCH):
612 |             return False
613 |         return op.iflags & envi.IF_COND
614 | 
615 |     def get_branches(self, op):
616 |         emu = self._emu
617 |         vw = emu.vw
618 |         ret = []
619 | 
620 |         if not (op.iflags & envi.IF_BRANCH):
621 |             return []
622 | 
623 |         xrefs = vw.getXrefsFrom(op.va, rtype=vivisect.const.REF_CODE)
624 |         if self.is_table(op, xrefs):
625 |             for xrfrom, xrto, xrtype, xrflags in xrefs:
626 |                 ret.append(xrto)
627 |             return ret
628 | 
629 |         xrefs = op.getBranches(emu=emu)
630 |         if not xrefs:
631 |             return []
632 | 
633 |         if self.is_conditional(op):
634 |             for bto, bflags in xrefs:
635 |                 if not bto:
636 |                     continue
637 |                 ret.append(bto)
638 |             return ret
639 | 
640 |         # we've hit a branch that doesn't go anywhere.
641 |         # probably a switchcase we don't handle well.
642 |         for bto, bflags in xrefs:
643 |             if bflags & envi.BR_DEREF:
644 |                 continue
645 | 
646 |             ret.append(bto)
647 | 
648 |         return ret
649 | 
650 |     def step(self):
651 |         """
652 |         emulate one instruction.
653 |         return :
654 |           - whether the instruction falls through, and
655 |           - the list of branch target to which execution may flow from this instruction.
656 |         """
657 |         emu = self._emu
658 | 
659 |         startpc = emu.getProgramCounter()
660 |         op = emu.parseOpcode(startpc)
661 | 
662 |         for mon in self._monitors:
663 |             mon.prehook(emu, op, startpc)
664 | 
665 |         branches = self.get_branches(op)
666 | 
667 |         if self.is_call(op):
668 |             skipped = not self.handle_call(op, avoid_calls=True)
669 |         elif self.is_jmp(op):
670 |             skipped = not self.handle_jmp(op, avoid_calls=True)
671 |         else:
672 |             emu.executeOpcode(op)
673 |             skipped = False
674 | 
675 |         endpc = emu.getProgramCounter()
676 | 
677 |         for mon in self._monitors:
678 |             mon.posthook(emu, op, endpc)
679 | 
680 |         does_fallthrough = not (op.iflags & envi.IF_NOFALL)
681 | 
682 |         if skipped:
683 |             return does_fallthrough, []
684 |         else:
685 |             return does_fallthrough, branches
686 | 
687 |     def run(self, va: int):
688 |         # explore from the given address, emulating all encountered instructions once.
689 |         #
690 |         # use a queue of emulator snaps, one for each block that still needs to be explored.
691 |         # use a set to track the instructions already emulated.
692 |         #
693 |         # when emulating an instruction, here are the cases:
694 |         #  - instruction not supported: skip to next one
695 |         #  - invalid instruction: stop emulation
696 |         #  - branching instruction: stop emulation, add snap for each branch
697 |         #  - fallthrough to new instruction: step to next instruction
698 |         #  - fallthrough to seen instruction: stop emulation
699 |         #  - no fallthrough (like ret): stop emulation
700 |         emu = self._emu
701 |         emu.setProgramCounter(va)
702 | 
703 |         # queue of emulator snapshots to explore
704 |         q = collections.deque([emu.getEmuSnap()])
705 | 
706 |         # set of branch targets that have already been explored.
707 |         seen = set()
708 | 
709 |         while q:
710 |             snap = q.popleft()
711 | 
712 |             emu.setEmuSnap(snap)
713 |             blockstart = emu.getProgramCounter()
714 | 
715 |             if blockstart in seen:
716 |                 # this block has already been explored,
717 |                 # don't do duplicate work.
718 |                 continue
719 | 
720 |             seen.add(blockstart)
721 | 
722 |             for mon in self._monitors:
723 |                 mon.preblock(self, blockstart)
724 | 
725 |             while True:
726 |                 # the address of the instruction we're about to emulate.
727 |                 lastpc = emu.getProgramCounter()
728 |                 seen.add(lastpc)
729 | 
730 |                 try:
731 |                     does_fallthrough, branches = self.step()
732 |                 except v_envi.UnsupportedInstruction:
733 |                     # don't know how to emulate the instruction.
734 |                     # skip it and hope we can fallthrough and keep emulating.
735 |                     op = emu.parseOpcode(lastpc)
736 |                     emu.setProgramCounter(lastpc + op.size)
737 | 
738 |                     logger.debug(
739 |                         "driver: run_function: skipping unsupported instruction: 0x%x %s",
740 |                         lastpc,
741 |                         op.mnem,
742 |                     )
743 | 
744 |                     continue
745 |                 except v_envi.InvalidInstruction:
746 |                     # don't know how to decode the instruction.
747 |                     # so we don't know its length, and there's nothing we can do.
748 | 
749 |                     logger.debug(
750 |                         "driver: run_function: invalid instruction: 0x%x",
751 |                         lastpc,
752 |                     )
753 | 
754 |                     blockend = lastpc
755 |                     for mon in self._monitors:
756 |                         mon.postblock(self, blockstart, blockend)
757 | 
758 |                     # stop emulating, and go to next block in the queue.
759 |                     break
760 |                 except envi.exc.BreakpointHit:
761 |                     # emulation likely wandered off, e.g., into alignment (CC bytes)
762 | 
763 |                     # stop emulating, and go to next block in the queue.
764 |                     break
765 | 
766 |                 if branches:
767 |                     blockend = lastpc
768 | 
769 |                     # other case: branching instruction.
770 |                     # enqueue all the branch options for exploration.
771 |                     for branch in branches:
772 |                         if branch in seen:
773 |                             continue
774 | 
775 |                         emu.setProgramCounter(branch)
776 |                         q.append(emu.getEmuSnap())
777 | 
778 |                     for mon in self._monitors:
779 |                         mon.postblock(self, blockstart, blockend)
780 | 
781 |                     # stop emulating this basic block,
782 |                     # go to next block in the queue.
783 |                     break
784 | 
785 |                 elif does_fallthrough:
786 |                     # common case: middle of BB, keep stepping.
787 | 
788 |                     nextpc = emu.getProgramCounter()
789 |                     if nextpc in seen:
790 |                         if nextpc == lastpc:
791 |                             # candidates:
792 |                             #   - jump to self
793 |                             #   - REP instruction
794 |                             #   - ???
795 |                             op = emu.parseOpcode(lastpc)
796 |                             if op.prefixes & envi.archs.i386.disasm.PREFIX_REP:
797 |                                 # its a REP instruction,
798 |                                 # do this max_rep times,
799 |                                 # then be done.
800 |                                 # TODO
801 |                                 continue
802 | 
803 |                             # other cases: like a new basic block
804 |                             # so fallthrough and break.
805 | 
806 |                         # the next instruction has already been explored.
807 |                         # must be an overlapping block.
808 |                         # stop emulating and go to next block in queue.
809 |                         blockend = lastpc
810 | 
811 |                         for mon in self._monitors:
812 |                             mon.postblock(self, blockstart, blockend)
813 | 
814 |                         break
815 |                     else:
816 |                         # next instruction is not yet explored,
817 |                         # keep stepping.
818 |                         continue
819 | 
820 |                 else:
821 |                     # uncommon case: no fallthrough, like ret.
822 |                     # stop emulating this basic block.
823 |                     # go to next block in the queue.
824 |                     blockend = lastpc
825 | 
826 |                     for mon in self._monitors:
827 |                         mon.postblock(self, blockstart, blockend)
828 | 
829 |                     break
830 | 
831 | 
832 | class SinglePathEmulatorDriver(FullCoverageEmulatorDriver):
833 |     """
834 |     an emulator that emulates the first path found to a target VA.
835 |     path is brute-forced via the full coverage emulator.
836 |     """
837 | 
838 |     def __init__(self, *args, **kwargs):
839 |         super().__init__(*args, **kwargs)
840 | 
841 |     def run_to_va(self, va: int, tova: int):
842 |         """
843 |         explore from the given address up to an address, see run function
844 |         """
845 |         mon = UntilVAMonitor(tova)
846 |         self.add_monitor(mon)
847 |         try:
848 |             self.run(va)
849 |         except BreakpointHit as e:
850 |             if e.va != tova:
851 |                 raise
852 |         finally:
853 |             self.remove_monitor(mon)
854 | 
855 | 
856 | def remove_default_viv_hooks(emu, allow_list=None):
857 |     """
858 |     vivisect comes with default emulation hooks (imphooks) that emulate
859 |      - API calls, e.g. GetProcAddress
860 |      - abstractions of library code functionality, e.g. _alloca_probe
861 | 
862 |     in our testing there are inconsistencies in the hook implementation, e.g. around function returns
863 |     this function removes all imphooks except ones explicitly allowed
864 |     """
865 |     for hook_name in list(emu.hooks):
866 |         if allow_list and hook_name in allow_list:
867 |             continue
868 |         del emu.hooks[hook_name]
869 | 


--------------------------------------------------------------------------------
/viv_utils/flirt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gzip
  3 | import time
  4 | import logging
  5 | import contextlib
  6 | 
  7 | import envi
  8 | import flirt
  9 | import vivisect
 10 | import vivisect.exc
 11 | import vivisect.const
 12 | 
 13 | import viv_utils
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | # vivisect funcmeta key for a bool to indicate if a function is recognized from a library.
 19 | # not expecting anyone to use this, aka private symbol.
 20 | _LIBRARY_META_KEY = "is-library"
 21 | 
 22 | 
 23 | @contextlib.contextmanager
 24 | def timing(msg):
 25 |     t0 = time.time()
 26 |     yield
 27 |     t1 = time.time()
 28 |     logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
 29 | 
 30 | 
 31 | def is_library_function(vw, va):
 32 |     """
 33 |     is the function at the given address a library function?
 34 |     this may be determined by a signature matching backend.
 35 |     if there's no function at the given address, `False` is returned.
 36 | 
 37 |     note: if its a library function, it should also have a name set.
 38 | 
 39 |     args:
 40 |       vw (vivisect.Workspace):
 41 |       va (int): the virtual address of a function.
 42 | 
 43 |     returns:
 44 |       bool: if the function is recognized as from a library.
 45 |     """
 46 |     return vw.funcmeta.get(va, {}).get(_LIBRARY_META_KEY, False)
 47 | 
 48 | 
 49 | def make_library_function(vw, va):
 50 |     """
 51 |     mark the function with the given address a library function.
 52 |     the associated accessor is `is_library_function`.
 53 | 
 54 |     if there's no function at the given address, this routine has no effect.
 55 | 
 56 |     note: if its a library function, it should also have a name set.
 57 |     its up to the caller to do this part.
 58 | 
 59 |     args:
 60 |       vw (vivisect.Workspace):
 61 |       va (int): the virtual address of a function.
 62 |     """
 63 |     fmeta = vw.funcmeta.get(va, {})
 64 |     fmeta[_LIBRARY_META_KEY] = True
 65 | 
 66 | 
 67 | def add_function_flirt_match(vw, va, name):
 68 |     """
 69 |     mark the function at the given address as a library function with the given name.
 70 |     the name overrides any existing function name.
 71 | 
 72 |     args:
 73 |       vw (vivisect.Workspace):
 74 |       va (int): the virtual address of a function.
 75 |       name (str): the name to assign to the function.
 76 |     """
 77 |     make_library_function(vw, va)
 78 |     viv_utils.set_function_name(vw, va, name)
 79 | 
 80 | 
 81 | def get_match_name(match):
 82 |     """
 83 |     fetch the best name for a `flirt.FlirtSignature` instance.
 84 |     these instances returned by `flirt.FlirtMatcher.match()`
 85 |     may have multiple names, such as public and local names for different parts
 86 |     of a function. the best name is that at offset zero (the function name).
 87 | 
 88 |     probably every signature has a best name, though I'm not 100% sure.
 89 | 
 90 |     args:
 91 |       match (flirt.FlirtSignature): the signature to get a name from.
 92 | 
 93 |     returns:
 94 |       str: the best name of the function matched by the given signature.
 95 |     """
 96 |     for name, type_, offset in match.names:
 97 |         if offset == 0:
 98 |             return name
 99 |     raise ValueError("flirt: match: no best name: %s", match.names)
100 | 
101 | 
102 | def match_function_flirt_signatures(matcher, vw, va, cache=None):
103 |     """
104 |     match the given FLIRT signatures against the function at the given address.
105 |     upon success, update the workspace with match metadata, setting the
106 |     function as a library function and assigning its name.
107 | 
108 |     if multiple different signatures match the function, don't do anything.
109 | 
110 |     args:
111 |       match (flirt.FlirtMatcher): the compiled FLIRT signature matcher.
112 |       vw (vivisect.workspace): the analyzed program's workspace.
113 |       va (int): the virtual address of a function to match.
114 |       cache (Optional[Dict[int, Union[str, None]]]): internal cache of matches VA -> name or None on "no match".
115 |        no need to provide as external caller.
116 |     """
117 |     if cache is None:
118 |         # we cache both successful and failed lookups.
119 |         #
120 |         # (callers of this function don't need to initialize the cache.
121 |         #  we'll provide one during recursive calls when we need it.)
122 |         #
123 |         # while we can use funcmeta to retrieve existing successful matches,
124 |         # we don't persist failed matches,
125 |         # because another FLIRT matcher might come along with better knowledge.
126 |         #
127 |         # however, when we match reference names, especially chained together,
128 |         # then we need to cache the negative result, or we do a ton of extra work.
129 |         # "accidentally quadratic" or worse.
130 |         # see https://github.com/fireeye/capa/issues/448
131 |         cache = {}
132 | 
133 |     function_meta = vw.funcmeta.get(va)
134 |     if not function_meta:
135 |         # not a function, we're not going to consider this.
136 |         return
137 | 
138 |     if va in cache:
139 |         return
140 | 
141 |     if is_library_function(vw, va):
142 |         # already matched here.
143 |         # this might be the case if recursive matching visited this address.
144 |         name = viv_utils.get_function_name(vw, va)
145 |         cache[va] = name
146 |         return
147 | 
148 |     # as seen in https://github.com/williballenthin/lancelot/issues/112
149 |     # Hex-Rays may distribute signatures that match across multiple functions.
150 |     # therefore, we cannot rely on fetching just a single function's data.
151 |     # in fact, we really don't know how much data to fetch.
152 |     # so, lets pick an unreasonably large number and hope it works.
153 |     #
154 |     # perf: larger the size, more to memcpy.
155 |     size = max(0x10000, function_meta.get("Size", 0))
156 | 
157 |     buf = viv_utils.readMemoryCurrentSection(vw, va, size)
158 | 
159 |     matches = []
160 |     for match in matcher.match(buf):
161 |         # collect all the name tuples (name, type, offset) with type==reference.
162 |         # ignores other name types like "public" and "local".
163 |         references = list(filter(lambda n: n[1] == "reference", match.names))
164 | 
165 |         if not references:
166 |             # there are no references that we need to check, so this is a complete match.
167 |             # common case.
168 |             matches.append(match)
169 | 
170 |         else:
171 |             # flirt uses reference names to assert that
172 |             # the function contains a reference to another function with a given name.
173 |             #
174 |             # we need to loop through these references,
175 |             # potentially recursively FLIRT match,
176 |             # and check the name matches (or doesn't).
177 | 
178 |             # at the end of the following loop,
179 |             # if this flag is still true,
180 |             # then all the references have been validated.
181 |             does_match_references = True
182 | 
183 |             for ref_name, _, ref_offset in references:
184 |                 ref_va = va + ref_offset
185 | 
186 |                 # the reference offset may be inside an instruction,
187 |                 # so we use getLocation to select the containing instruction address.
188 |                 location = vw.getLocation(ref_va)
189 |                 if location is None:
190 |                     does_match_references = False
191 |                     break
192 | 
193 |                 loc_va = location[vivisect.const.L_VA]
194 | 
195 |                 # an instruction may have multiple xrefs from
196 |                 # so we loop through all code references,
197 |                 # searching for that name.
198 |                 #
199 |                 # if the name is found, then this flag will be set.
200 |                 does_match_the_reference = False
201 |                 for xref in vw.getXrefsFrom(loc_va):
202 |                     if ref_name == ".":
203 |                         # special case: reference named `.`
204 |                         # which right now we interpret to mean "any data reference".
205 |                         # see: https://github.com/williballenthin/lancelot/issues/112#issuecomment-802379966
206 |                         #
207 |                         # unfortunately, viv doesn't extract the xref for this one sample,
208 |                         # so this is untested.
209 |                         does_match_the_reference = xref[vivisect.const.XR_RTYPE] == vivisect.const.REF_DATA
210 | 
211 |                     else:
212 |                         # common case
213 |                         #
214 |                         # FLIRT signatures only match code,
215 |                         # so we're only going to resolve references that point to code.
216 |                         if xref[vivisect.const.XR_RTYPE] != vivisect.const.REF_CODE:
217 |                             continue
218 | 
219 |                         target = xref[vivisect.const.XR_TO]
220 |                         match_function_flirt_signatures(matcher, vw, target, cache)
221 | 
222 |                         # the matching will have updated the vw in place,
223 |                         # so now we inspect any names found at the target location.
224 |                         if is_library_function(vw, target):
225 |                             found_name = viv_utils.get_function_name(vw, target)
226 |                             cache[target] = found_name
227 |                             if found_name == ref_name:
228 |                                 does_match_the_reference = True
229 |                                 break
230 |                         else:
231 |                             cache[target] = None
232 | 
233 |                 if not does_match_the_reference:
234 |                     does_match_references = False
235 |                     break
236 | 
237 |             if does_match_references:
238 |                 # only if all references pass do we count it.
239 |                 matches.append(match)
240 | 
241 |     if not matches:
242 |         cache[va] = None
243 |         return
244 | 
245 |     # we may have multiple signatures that match the same function, like `strcpy`.
246 |     # these could be copies from multiple libraries.
247 |     # so we don't mind if there are multiple matches, as long as names are the same.
248 |     #
249 |     # but if there are multiple candidate names, that's a problem.
250 |     # our signatures are not precise enough.
251 |     # we could maybe mark the function as "is a library function", but not assign name.
252 |     # though, if we have signature FPs among library functions, it could easily FP with user code too.
253 |     # so safest thing to do is not make any claim about the function.
254 |     names = list(set(map(get_match_name, matches)))
255 | 
256 |     if len(names) != 1:
257 |         cache[va] = None
258 |         logger.debug("conflicting names: 0x%x: %s", va, names)
259 |         return
260 | 
261 |     # there's one candidate name,
262 |     # so all the matches *should* be about the same, i'd assume.
263 |     match = matches[0]
264 | 
265 |     # first add local names, then we'll do public names
266 |     # this way public names have precedence.
267 |     # see: https://github.com/williballenthin/lancelot/issues/112#issuecomment-802221966
268 |     for name, type_, offset in match.names:
269 |         if type_ != "local":
270 |             continue
271 | 
272 |         if not vw.isFunction(va + offset):
273 |             # since we're registered as a function analyzer,
274 |             # we have to deal with a race condition:
275 |             # the location for which we have a name may not yet be a function.
276 |             #
277 |             # we can detect via two facts:
278 |             #   - the location hasn't been processed yet
279 |             #   - the address is executable
280 |             if vw.getLocation(va + offset) is None and vw.probeMemory(va + offset, 1, envi.memory.MM_EXEC):
281 |                 # so lets try to turn it into a function
282 |                 vw.makeFunction(va + offset)
283 | 
284 |         try:
285 |             add_function_flirt_match(vw, va + offset, name)
286 |         except vivisect.exc.InvalidFunction:
287 |             continue
288 |         else:
289 |             cache[va + offset] = name
290 |             logger.debug("found local function name: 0x%x: %s", va + offset, name)
291 | 
292 |     for name, type_, offset in match.names:
293 |         if type_ != "public":
294 |             continue
295 | 
296 |         try:
297 |             add_function_flirt_match(vw, va + offset, name)
298 |         except vivisect.exc.InvalidFunction:
299 |             continue
300 |         else:
301 |             cache[va + offset] = name
302 |             logger.debug("found library function: 0x%x: %s", va + offset, name)
303 | 
304 |     return
305 | 
306 | 
307 | class FlirtFunctionAnalyzer:
308 |     def __init__(self, matcher, name=None):
309 |         self.matcher = matcher
310 |         self.name = name
311 | 
312 |     def analyzeFunction(self, vw: vivisect.VivWorkspace, funcva: int):
313 |         match_function_flirt_signatures(self.matcher, vw, funcva)
314 | 
315 |     @property
316 |     def __name__(self):
317 |         if self.name:
318 |             return f"{self.__class__.__name__} ({self.name})"
319 |         else:
320 |             return f"{self.__class__.__name__}"
321 | 
322 |     def __repr__(self):
323 |         return self.__name__
324 | 
325 | 
326 | def addFlirtFunctionAnalyzer(vw, analyzer):
327 |     # this is basically the logic in `vivisect.VivWorkspace.addFuncAnalysisModule`.
328 |     # however, that routine assumes the analyzer is a Python module, which is basically a global,
329 |     # and i am very against globals.
330 |     # so, we manually place the analyzer into the analyzer queue.
331 |     #
332 |     # notably, this enables a user to register multiple FlirtAnalyzers for different signature sets.
333 |     key = repr(analyzer)
334 | 
335 |     if key in vw.fmodlist:
336 |         raise ValueError("analyzer already present")
337 | 
338 |     vw.fmodlist.append(key)
339 |     vw.fmods[key] = analyzer
340 | 
341 | 
342 | def register_flirt_signature_analyzers(vw, sigpaths):
343 |     """
344 |     args:
345 |       vw (vivisect.VivWorkspace):
346 |       sigpaths (List[str]): file system paths of .sig/.pat files
347 |     """
348 |     for sigpath in sigpaths:
349 |         try:
350 |             sigs = load_flirt_signature(sigpath)
351 |         except ValueError as e:
352 |             logger.warning("could not load %s: %s", sigpath, str(e))
353 |             continue
354 | 
355 |         logger.debug("flirt: sig count: %d", len(sigs))
356 | 
357 |         with timing("flirt: compiling sigs"):
358 |             matcher = flirt.compile(sigs)
359 | 
360 |         analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath)
361 |         logger.debug("registering viv function analyzer: %s", repr(analyzer))
362 |         viv_utils.flirt.addFlirtFunctionAnalyzer(vw, analyzer)
363 | 
364 | 
365 | def load_flirt_signature(path):
366 |     if path.endswith(".sig"):
367 |         with open(path, "rb") as f:
368 |             with timing("flirt: parsing .sig: " + path):
369 |                 sigs = flirt.parse_sig(f.read())
370 | 
371 |     elif path.endswith(".pat"):
372 |         with open(path, "rb") as f:
373 |             with timing("flirt: parsing .pat: " + path):
374 |                 sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n"))
375 | 
376 |     elif path.endswith(".pat.gz"):
377 |         with gzip.open(path, "rb") as f:
378 |             with timing("flirt: parsing .pat.gz: " + path):
379 |                 sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n"))
380 | 
381 |     else:
382 |         raise ValueError("unexpect signature file extension: " + path)
383 | 
384 |     return sigs
385 | 


--------------------------------------------------------------------------------
/viv_utils/idaloader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | load the module currently open in IDA Pro into a vivisect workspace.
  4 | 
  5 | author: Willi Ballenthin
  6 | email: willi.ballenthin@gmail.com
  7 | website: https://gist.github.com/williballenthin/f88c5c95f3e41157de3806dfbeef4bd4
  8 | """
  9 | import logging
 10 | import functools
 11 | 
 12 | import vivisect
 13 | import vivisect.const
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | try:
 18 |     import idc
 19 |     import idaapi
 20 |     import ida_ida
 21 |     import ida_nalt
 22 |     import idautils
 23 | except ImportError:
 24 |     logger.debug("failed to import IDA Pro modules")
 25 | 
 26 | 
 27 | def requires_ida(f):
 28 |     """
 29 |     declare that the wrapped function requires the IDA Pro scripting API.
 30 |     calling the function will raise `RuntimeError` if the API cannot be imported.
 31 |     """
 32 | 
 33 |     @functools.wraps(f)
 34 |     def inner(*args, **kwargs):
 35 |         if not ("idc" in locals() or "idc" in globals()):
 36 |             raise RuntimeError("IDA Pro not present")
 37 |         return f(*args, **kwargs)
 38 | 
 39 |     return f
 40 | 
 41 | 
 42 | @requires_ida
 43 | def is_x86():
 44 |     """
 45 |     is the currently loaded module 32-bit x86?
 46 |     """
 47 |     try:
 48 |         inf = idaapi.get_inf_structure()
 49 |         procname = inf.procname
 50 |     except AttributeError:
 51 |         procname = ida_ida.inf_get_procname()
 52 |     return procname == "metapc" and ida_ida.inf_is_32bit_exactly() and not ida_ida.inf_is_64bit()
 53 | 
 54 | 
 55 | @requires_ida
 56 | def is_x64():
 57 |     """
 58 |     is the currently loaded module 64-bit x86?
 59 |     """
 60 |     try:
 61 |         inf = idaapi.get_inf_structure()
 62 |         procname = inf.procname
 63 |     except AttributeError:
 64 |         procname = ida_ida.inf_get_procname()
 65 |     return procname == "metapc" and not ida_ida.inf_is_32bit_exactly() and ida_ida.inf_is_64bit()
 66 | 
 67 | 
 68 | @requires_ida
 69 | def is_exe():
 70 |     """
 71 |     is the currently loaded module a PE file?
 72 |     you can *probably* assume its for windows, if so.
 73 |     """
 74 |     return "Portable executable" in idaapi.get_file_type_name()
 75 | 
 76 | 
 77 | @requires_ida
 78 | def get_data(start, size):
 79 |     """
 80 |     read the given amount of data from the given start address.
 81 |     better than `idc.GetManyBytes` as it fills in missing bytes with NULLs.
 82 | 
 83 |     Args:
 84 |       start (int): start address.
 85 |       size (int): number of bytes to read.
 86 | 
 87 |     Returns:
 88 |       bytes: `size` bytes, filled with NULL when byte not available from database.
 89 |     """
 90 |     # best case, works pretty often.
 91 |     buf = idc.get_bytes(start, size)
 92 |     if buf:
 93 |         return buf
 94 | 
 95 |     # but may fail, when there's no byte defined.
 96 |     buf = []
 97 |     for ea in range(start, start + size):
 98 |         b = idc.get_bytes(ea, 1)
 99 |         if b:
100 |             buf.append(b)
101 |         else:
102 |             buf.append(b"\x00")
103 |     return b"".join(buf)
104 | 
105 | 
106 | PAGE_SIZE = 0x1000
107 | 
108 | 
109 | @requires_ida
110 | def get_segment_data(segstart):
111 |     """
112 |     read the contents of the segment containing the given address.
113 | 
114 |     Args:
115 |       segstart (int): start address of a segment.
116 | 
117 |     Returns:
118 |       bytes: the bytes of the segment, filled with NULL when byte not available from database.
119 |     """
120 |     bufs = []
121 | 
122 |     segend = idc.get_segm_end(segstart)
123 |     segsize = segend - segstart
124 |     pagecount = segsize // PAGE_SIZE
125 |     remainder = segsize - (pagecount * PAGE_SIZE)
126 | 
127 |     # read in page-sized chunks, since these should ususally be accessible together.
128 |     for i in range(pagecount):
129 |         bufs.append(get_data(segstart + i * PAGE_SIZE, PAGE_SIZE))
130 | 
131 |     # in a real PE, these *should* be page- or sector-aligned, but its not guaranteed, esp in IDA.
132 |     if remainder != 0:
133 |         bufs.append(get_data(segstart + pagecount * PAGE_SIZE, remainder))
134 | 
135 |     return b"".join(bufs)
136 | 
137 | 
138 | @requires_ida
139 | def get_exports():
140 |     """
141 |     enumerate the exports of the currently loaded module.
142 | 
143 |     Yields:
144 |       Tuple[int, int, str]:
145 |         - address of exported function
146 |         - export ordinal
147 |         - name of exported function
148 |     """
149 |     for index, ordinal, ea, name in idautils.Entries():
150 |         yield ea, ordinal, name
151 | 
152 | 
153 | @requires_ida
154 | def get_imports():
155 |     """
156 |     enumerate the imports of the currently loaded module.
157 | 
158 |     Yields:
159 |       Tuple[int, str, str, int]:
160 |         - address of import table pointer
161 |         - name of imported library
162 |         - name of imported function
163 |         - ordinal of import
164 |     """
165 |     for i in range(idaapi.get_import_module_qty()):
166 |         dllname = idaapi.get_import_module_name(i)
167 |         if not dllname:
168 |             continue
169 | 
170 |         entries = []
171 | 
172 |         def cb(ea, name, ordinal):
173 |             entries.append((ea, name, ordinal))
174 |             return True  # continue enumeration
175 | 
176 |         idaapi.enum_import_names(i, cb)
177 | 
178 |         for ea, name, ordinal in entries:
179 |             yield ea, dllname, name, ordinal
180 | 
181 | 
182 | @requires_ida
183 | def get_import_thunk(import_addr):
184 |     """
185 |     find import thunk for the given import pointer.
186 |     this is a function that simply jumps to the external implementation of the routine.
187 | 
188 |     Args:
189 |       import_addr (int): address of import table pointer.
190 | 
191 |     Returns:
192 |       int: address of function thunk.
193 | 
194 |     Raises:
195 |       ValueError: when the thunk does not exist.
196 |     """
197 |     for xref in idautils.XrefsTo(import_addr):
198 |         if xref.type != 3:  # XrefTypeName(3) == 'Data_Read'
199 |             continue
200 | 
201 |         if idc.print_insn_mnem(xref.frm) != "jmp":
202 |             continue
203 | 
204 |         return xref.frm
205 | 
206 |     raise ValueError("thunk does not exist")
207 | 
208 | 
209 | @requires_ida
210 | def get_functions():
211 |     """
212 |     enumerate the functions in the currently loaded module.
213 | 
214 |     Yields:
215 |       int: address of the function.
216 |     """
217 |     startea = ida_ida.inf_get_min_ea()
218 |     for fva in idautils.Functions(idc.get_segm_start(startea), idc.get_segm_end(startea)):
219 |         yield fva
220 | 
221 | 
222 | @requires_ida
223 | def loadWorkspaceFromIdb():
224 |     """
225 |     from IDA Pro, load the currently loaded module into a vivisect workspace.
226 |     currently only supports windows PE files.
227 | 
228 |     Returns:
229 |       vivisect.Workspace: the loaded and analyzed vivisect workspace.
230 |     """
231 |     vw = vivisect.VivWorkspace()
232 | 
233 |     if is_x86():
234 |         vw.setMeta("Architecture", "i386")
235 |     elif is_x64():
236 |         vw.setMeta("Architecture", "amd64")
237 |     else:
238 |         raise NotImplementedError("unsupported architecture")
239 | 
240 |     if not is_exe():
241 |         raise NotImplementedError("unsupported file format")
242 | 
243 |     vw.setMeta("Platform", "windows")
244 |     vw.setMeta("Format", "pe")
245 |     vw._snapInAnalysisModules()
246 | 
247 |     filename = vw.addFile(ida_nalt.get_root_filename(), idaapi.get_imagebase(), idautils.GetInputFileMD5())
248 | 
249 |     for segstart in idautils.Segments():
250 |         segname = idc.get_segm_name(segstart)
251 |         segbuf = get_segment_data(segstart)
252 | 
253 |         if segbuf is None:
254 |             raise RuntimeError("failed to read segment data")
255 | 
256 |         logger.debug("mapping section %s with %x bytes", segname, len(segbuf))
257 |         vw.addMemoryMap(segstart, idautils.ida_segment.get_segm_by_name(segname).perm, filename, segbuf)
258 |         vw.addSegment(segstart, len(segbuf), segname, filename)
259 | 
260 |     for ea, ordinal, name in get_exports():
261 |         logger.debug("marking export %s at %x", name, ea)
262 |         vw.addEntryPoint(ea)
263 |         vw.addExport(ea, vivisect.const.EXP_FUNCTION, name, filename)
264 | 
265 |     for ea, dllname, name, ordinal in get_imports():
266 |         logger.debug("marking import %s!%s at %x", dllname, name, ea)
267 |         vw.makeImport(ea, dllname, name)
268 | 
269 |     logger.debug("running vivisect auto-analysis")
270 |     vw.analyze()
271 | 
272 |     for fva in get_functions():
273 |         logger.debug("marking function %s at %x", idc.get_func_name(fva), fva)
274 |         vw.makeFunction(fva)
275 |         vw.makeName(fva, idc.get_func_name(fva))
276 | 
277 |     # can only set thunk-ness after a function is defined.
278 |     for ea, dllname, name, ordinal in get_imports():
279 |         try:
280 |             thunk = get_import_thunk(ea)
281 |         except ValueError:
282 |             pass
283 |         else:
284 |             logger.debug("found thunk for %s.%s at %x", dllname, name, thunk)
285 |             vw.makeFunction(thunk)
286 |             vw.makeFunctionThunk(thunk, "%s.%s" % (dllname, name))
287 | 
288 |     return vw
289 | 


--------------------------------------------------------------------------------
/viv_utils/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/viv-utils/50ea43422c49539fea5a48f74d0935b7fef131f3/viv_utils/scripts/__init__.py


--------------------------------------------------------------------------------
/viv_utils/scripts/get_flirt_matches.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import gzip
  3 | import logging
  4 | import argparse
  5 | 
  6 | import flirt
  7 | 
  8 | import viv_utils
  9 | import viv_utils.flirt
 10 | 
 11 | logger = logging.getLogger("get_flirt_matches")
 12 | 
 13 | 
 14 | def load_flirt_signature(path):
 15 |     if path.endswith(".sig"):
 16 |         with open(path, "rb") as f:
 17 |             sigs = flirt.parse_sig(f.read())
 18 | 
 19 |     elif path.endswith(".pat"):
 20 |         with open(path, "rb") as f:
 21 |             sigs = flirt.parse_pat(f.read().decode("utf-8"))
 22 | 
 23 |     elif path.endswith(".pat.gz"):
 24 |         with gzip.open(path, "rb") as f:
 25 |             sigs = flirt.parse_pat(f.read().decode("utf-8"))
 26 | 
 27 |     else:
 28 |         raise ValueError("unexpect signature file extension: " + path)
 29 | 
 30 |     return sigs
 31 | 
 32 | 
 33 | def register_flirt_signature_analyzers(vw, sigpaths):
 34 |     """
 35 |     args:
 36 |       vw (vivisect.VivWorkspace):
 37 |       sigpaths (List[str]): file system paths of .sig/.pat files
 38 |     """
 39 |     for sigpath in sigpaths:
 40 |         sigs = load_flirt_signature(sigpath)
 41 | 
 42 |         logger.debug("flirt: sig count: %d", len(sigs))
 43 | 
 44 |         matcher = flirt.compile(sigs)
 45 | 
 46 |         analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath)
 47 |         logger.debug("registering viv function analyzer: %s", repr(analyzer))
 48 |         viv_utils.flirt.addFlirtFunctionAnalyzer(vw, analyzer)
 49 | 
 50 | 
 51 | def get_workspace(path, sigpaths):
 52 |     vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
 53 |     register_flirt_signature_analyzers(vw, sigpaths)
 54 |     vw.analyze()
 55 |     return vw
 56 | 
 57 | 
 58 | def main():
 59 |     parser = argparse.ArgumentParser()
 60 |     parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
 61 |     parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
 62 |     parser.add_argument(
 63 |         "signature",
 64 |         type=str,
 65 |         help="use the given signatures to identify library functions, file system paths to .sig/.pat files.",
 66 |     )
 67 |     parser.add_argument(
 68 |         "sample",
 69 |         type=str,
 70 |         help="path to sample to analyze",
 71 |     )
 72 | 
 73 |     args = parser.parse_args()
 74 | 
 75 |     if args.quiet:
 76 |         logging.basicConfig(level=logging.WARNING)
 77 |         logging.getLogger().setLevel(logging.WARNING)
 78 |     elif args.debug:
 79 |         logging.basicConfig(level=logging.DEBUG)
 80 |         logging.getLogger().setLevel(logging.DEBUG)
 81 | 
 82 |         logging.getLogger("vivisect").setLevel(logging.INFO)
 83 |         logging.getLogger("vivisect.base").setLevel(logging.INFO)
 84 |         logging.getLogger("vivisect.impemu").setLevel(logging.INFO)
 85 |         logging.getLogger("vtrace").setLevel(logging.INFO)
 86 |         logging.getLogger("envi").setLevel(logging.INFO)
 87 |         logging.getLogger("envi.codeflow").setLevel(logging.INFO)
 88 |     else:
 89 |         logging.basicConfig(level=logging.INFO)
 90 |         logging.getLogger().setLevel(logging.INFO)
 91 |         logging.getLogger("vivisect").setLevel(logging.WARNING)
 92 | 
 93 |     vw = get_workspace(args.sample, [args.signature])
 94 | 
 95 |     names = set()
 96 |     for va in sorted(vw.getFunctions()):
 97 |         if viv_utils.flirt.is_library_function(vw, va):
 98 |             name = viv_utils.get_function_name(vw, va)
 99 |             print("0x%x: %s" % (va, name))
100 |             names.add(name)
101 | 
102 |     return 0
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     sys.exit(main())
107 | 


--------------------------------------------------------------------------------
/viv_utils/scripts/get_function_args.py:
--------------------------------------------------------------------------------
 1 | import pprint
 2 | import logging
 3 | 
 4 | import viv_utils
 5 | import viv_utils.emulator_drivers
 6 | 
 7 | g_pp = pprint.PrettyPrinter()
 8 | 
 9 | 
10 | class CallArgumentMonitor(viv_utils.emulator_drivers.Monitor):
11 |     """collect call arguments to a target function during emulation"""
12 | 
13 |     def __init__(self, vw, target_fva):
14 |         """:param target_fva: address of function whose arguments to monitor"""
15 |         viv_utils.emulator_drivers.Monitor.__init__(self, vw)
16 |         self._fva = target_fva
17 |         self._calls = {}
18 | 
19 |     def apicall(self, emu, op, pc, api, argv):
20 |         rv = self.getStackValue(emu, 0)
21 |         if pc == self._fva:
22 |             self._calls[rv] = argv
23 | 
24 |     def getCalls(self):
25 |         """get map of return value of function call to arguments to function call"""
26 |         return self._calls.copy()
27 | 
28 | 
29 | def emulate_function(vw, fva, target_fva):
30 |     """run the given function while collecting arguments to a target function"""
31 |     emu = vw.getEmulator()
32 |     d = viv_utils.emulator_drivers.FunctionRunnerEmulatorDriver(emu)
33 | 
34 |     m = CallArgumentMonitor(vw, target_fva)
35 |     d.add_monitor(m)
36 | 
37 |     d.runFunction(fva, maxhit=1)
38 | 
39 |     for k, v in m.getCalls().iteritems():
40 |         print(hex(k) + ": " + str(v))
41 | 
42 | 
43 | def _main(bin_path, ofva):
44 |     fva = int(ofva, 0x10)
45 |     logging.basicConfig(level=logging.DEBUG)
46 | 
47 |     vw = viv_utils.getWorkspace(bin_path)
48 | 
49 |     index = viv_utils.InstructionFunctionIndex(vw)
50 | 
51 |     # optimization: avoid re-processing the same function repeatedly
52 |     called_fvas = set([])
53 |     for callerva in vw.getCallers(fva):
54 |         callerfva = index[callerva]  # the address of the function that contains this instruction
55 |         if callerfva in called_fvas:
56 |             continue
57 | 
58 |         emulate_function(vw, index[callerva], fva)
59 | 
60 |         called_fvas.add(callerfva)
61 | 
62 |     return
63 | 
64 | 
65 | def main():
66 |     import sys
67 | 
68 |     sys.exit(_main(*sys.argv[1:]))
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/viv_utils/scripts/show_flirt_references.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import gzip
  3 | import logging
  4 | import argparse
  5 | import binascii
  6 | 
  7 | import flirt
  8 | import vivisect.const
  9 | 
 10 | import viv_utils
 11 | import viv_utils.flirt
 12 | 
 13 | logger = logging.getLogger("get_flirt_matches")
 14 | 
 15 | 
 16 | def load_flirt_signature(path):
 17 |     if path.endswith(".sig"):
 18 |         with open(path, "rb") as f:
 19 |             sigs = flirt.parse_sig(f.read())
 20 | 
 21 |     elif path.endswith(".pat"):
 22 |         with open(path, "rb") as f:
 23 |             sigs = flirt.parse_pat(f.read().decode("utf-8"))
 24 | 
 25 |     elif path.endswith(".pat.gz"):
 26 |         with gzip.open(path, "rb") as f:
 27 |             sigs = flirt.parse_pat(f.read().decode("utf-8"))
 28 | 
 29 |     else:
 30 |         raise ValueError("unexpect signature file extension: " + path)
 31 | 
 32 |     return sigs
 33 | 
 34 | 
 35 | def get_workspace(path, sigpaths):
 36 |     vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
 37 |     vw.analyze()
 38 |     return vw
 39 | 
 40 | 
 41 | def main():
 42 |     parser = argparse.ArgumentParser()
 43 |     parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
 44 |     parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
 45 |     parser.add_argument(
 46 |         "signature",
 47 |         type=str,
 48 |         help="use the given signatures to identify library functions, file system paths to .sig/.pat files.",
 49 |     )
 50 |     parser.add_argument(
 51 |         "sample",
 52 |         type=str,
 53 |         help="path to sample to analyze",
 54 |     )
 55 | 
 56 |     args = parser.parse_args()
 57 | 
 58 |     if args.quiet:
 59 |         logging.basicConfig(level=logging.WARNING)
 60 |         logging.getLogger().setLevel(logging.WARNING)
 61 |     elif args.debug:
 62 |         logging.basicConfig(level=logging.DEBUG)
 63 |         logging.getLogger().setLevel(logging.DEBUG)
 64 | 
 65 |         logging.getLogger("vivisect").setLevel(logging.INFO)
 66 |         logging.getLogger("vivisect.base").setLevel(logging.INFO)
 67 |         logging.getLogger("vivisect.impemu").setLevel(logging.INFO)
 68 |         logging.getLogger("vtrace").setLevel(logging.INFO)
 69 |         logging.getLogger("envi").setLevel(logging.INFO)
 70 |         logging.getLogger("envi.codeflow").setLevel(logging.INFO)
 71 |     else:
 72 |         logging.basicConfig(level=logging.INFO)
 73 |         logging.getLogger().setLevel(logging.INFO)
 74 |         logging.getLogger("vivisect").setLevel(logging.WARNING)
 75 | 
 76 |     vw = get_workspace(args.sample, [args.signature])
 77 | 
 78 |     sigs = load_flirt_signature(args.signature)
 79 |     logger.debug("flirt: sig count: %d", len(sigs))
 80 |     matcher = flirt.compile(sigs)
 81 | 
 82 |     seen = set()
 83 | 
 84 |     for function in vw.getFunctions():
 85 |         buf = viv_utils.readMemoryCurrentSection(vw, function, 0x10000)
 86 | 
 87 |         for match in matcher.match(buf):
 88 |             references = list(filter(lambda n: n[1] == "reference" and (function + n[2]) not in seen, match.names))
 89 | 
 90 |             if not references:
 91 |                 continue
 92 | 
 93 |             print("matching function: 0x%x" % (function))
 94 |             print("  candidate match: 0x%x: %s" % (function, match))
 95 | 
 96 |             print("  references:")
 97 | 
 98 |             for ref_name, _, ref_offset in references:
 99 |                 ref_va = function + ref_offset
100 |                 seen.add(ref_va)
101 | 
102 |                 print("  - 0x%x: %s" % (ref_va, ref_name))
103 | 
104 |                 loc = vw.getLocation(ref_va)
105 |                 loc_va = loc[vivisect.const.L_VA]
106 |                 print("    loc: 0x%x" % (loc_va))
107 |                 print("    delta: 0x%x" % (ref_va - loc_va))
108 | 
109 |                 size = loc[vivisect.const.L_SIZE]
110 |                 buf = viv_utils.readMemoryCurrentSection(vw, loc_va, size)
111 |                 print("    bytes: %s" % (binascii.hexlify(buf).decode("ascii")))
112 | 
113 |                 print("           %s^" % ("  " * (ref_va - loc_va)))
114 | 
115 |                 insn = vw.parseOpcode(loc_va)
116 |                 print("    insn: %s" % (insn))
117 | 
118 |                 print("    xrefs:")
119 |                 for xref in sorted(set(map(lambda x: x[vivisect.const.XR_TO], vw.getXrefsFrom(loc_va)))):
120 |                     print("    - 0x%x" % (xref))
121 | 
122 |         pass
123 | 
124 |     return 0
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     sys.exit(main())
129 | 


--------------------------------------------------------------------------------
/viv_utils/scripts/trace_function_emulation.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import viv_utils
 4 | import viv_utils.emulator_drivers
 5 | 
 6 | 
 7 | class AMonitor(viv_utils.emulator_drivers.Monitor):
 8 |     def __init__(self, vw):
 9 |         viv_utils.emulator_drivers.Monitor.__init__(self, vw)
10 | 
11 |     def prehook(self, emu, op, starteip):
12 |         self._logger.debug("prehook: %s: %s", hex(starteip), op)
13 | 
14 |     def apicall(self, emu, op, pc, api, argv):
15 |         self._logger.debug("apicall: %s %s %s %s", op, pc, api, argv)
16 | 
17 | 
18 | def _main(bin_path, fva):
19 |     logging.basicConfig(level=logging.DEBUG)
20 | 
21 |     vw = viv_utils.getWorkspace(bin_path)
22 |     emu = vw.getEmulator()
23 |     d = viv_utils.emulator_drivers.FunctionRunnerEmulatorDriver(emu)
24 | 
25 |     m = AMonitor(vw)
26 |     d.add_monitor(m)
27 | 
28 |     logging.getLogger("trace").debug("%s %s %s %s", vw, emu, d, m)
29 | 
30 |     d.runFunction(int(fva, 0x10), maxhit=1)
31 | 
32 | 
33 | def main():
34 |     import sys
35 | 
36 |     sys.exit(_main(*sys.argv[1:]))
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/viv_utils/types.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple, Optional
 2 | 
 3 | import vivisect
 4 | import vivisect.impemu.emulator
 5 | from typing_extensions import TypeAlias
 6 | 
 7 | Address: TypeAlias = int
 8 | DataType: TypeAlias = str
 9 | SymbolName: TypeAlias = str
10 | 
11 | CallingConvention: TypeAlias = str
12 | ReturnType: TypeAlias = DataType
13 | ReturnName: TypeAlias = str
14 | FunctionName: TypeAlias = SymbolName
15 | ArgType: TypeAlias = DataType
16 | ArgName: TypeAlias = SymbolName
17 | FunctionArg: TypeAlias = Tuple[ArgType, ArgName]
18 | # type returned by `vw.getImpApi`
19 | API: TypeAlias = Tuple[ReturnType, ReturnName, Optional[CallingConvention], FunctionName, List[FunctionArg]]
20 | # shortcuts
21 | Emulator: TypeAlias = vivisect.impemu.emulator.WorkspaceEmulator
22 | Workspace: TypeAlias = vivisect.VivWorkspace
23 | 


--------------------------------------------------------------------------------