├── .bazelrc
├── .bazelversion
├── BUILD.bazel
├── CONTRIBUTING.md
├── LICENSE
├── MODULE.bazel
├── README.md
├── WORKSPACE.bazel
├── docs
    └── images
    │   ├── vanir_detector_report.png
    │   ├── vanir_macro_arch.png
    │   └── vanir_micro_arch.png
├── extensions.bzl
├── repositories.bzl
├── requirements.txt
├── requirements_antlr4.txt
├── requirements_antlr4_lock.txt
├── requirements_lock.txt
└── vanir
    ├── cache
        ├── BUILD.bazel
        └── ecosystem_file_lists.json
    ├── code_extractors
        ├── BUILD.bazel
        ├── code_extractor.py
        ├── code_extractor_android.py
        ├── code_extractor_android_test.py
        ├── code_extractor_base.py
        └── code_extractor_test.py
    ├── detector_common_flags.py
    ├── detector_common_flags_test.py
    ├── detector_runner.py
    ├── detector_runner_test.py
    ├── file_list_manager.py
    ├── file_list_manager_test.py
    ├── hasher.py
    ├── hasher_test.py
    ├── integration_tests
        ├── BUILD.bazel
        └── missing_patch_detection_hermetic_test.py
    ├── language_parsers
        ├── BUILD.bazel
        ├── abstract_language_parser.py
        ├── common.py
        ├── cpp
        │   ├── BUILD.bazel
        │   ├── cpp_parser.py
        │   ├── cpp_parser_test.py
        │   ├── parser_core.cc
        │   ├── parser_core.h
        │   └── python
        │   │   ├── BUILD.bazel
        │   │   └── parser_core.cc
        ├── java
        │   ├── BUILD.bazel
        │   ├── antlr4.external.bzl
        │   ├── java_parser.py
        │   ├── java_parser_test.py
        │   ├── parser_core.cc
        │   ├── parser_core.h
        │   └── python
        │   │   ├── BUILD.bazel
        │   │   └── parser_core.cc
        └── language_parsers.py
    ├── normalizer.py
    ├── normalizer_test.py
    ├── osv_client.py
    ├── overwrite_specs_validity_test.py
    ├── parser.py
    ├── parser_test.py
    ├── refiner.py
    ├── refiner_test.py
    ├── reporter.py
    ├── reporter_test.py
    ├── scanners
        ├── BUILD.bazel
        ├── android_kernel_scanner.py
        ├── android_kernel_scanner_test.py
        ├── offline_directory_scanner.py
        ├── offline_directory_scanner_test.py
        ├── package_identifier.py
        ├── package_identifier_test.py
        ├── package_scanner.py
        ├── package_scanner_test.py
        ├── repo_scanner.py
        ├── repo_scanner_test.py
        ├── scanner_base.py
        ├── scanner_base_test.py
        ├── target_selection_strategy.py
        └── target_selection_strategy_test.py
    ├── sign_generator.py
    ├── sign_generator_runner.py
    ├── sign_generator_runner_test.py
    ├── sign_generator_test.py
    ├── signature.py
    ├── signature_test.py
    ├── testdata
        ├── BUILD.bazel
        ├── gitiles
        │   ├── b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patch.base64
        │   ├── b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patchinfo.base64
        │   ├── b2dc041a4e84986e3a6932b127d3a18ef02b6d0a_services_core_java_com_android_server_om_OverlayManagerService.java.base64
        │   └── c3c9ada6e5d946ce7d224649f1d6528ce80b1a24_services_core_java_com_android_server_om_OverlayManagerService.java.base64
        ├── test_frameworks_base.tar.gz
        ├── test_overwrite_specs.json
        ├── test_patch_file
        ├── test_patched_file
        ├── test_signatures.json
        ├── test_signatures.py
        ├── test_signatures.zip
        ├── test_unpatched_file
        ├── test_unrelated_file
        ├── test_vulnerabilities_kernel.json
        └── test_vulnerabilities_platform.json
    ├── truncated_path.py
    ├── truncated_path_test.py
    ├── vanir_test_base.py
    ├── version_extractor.py
    ├── version_extractor_test.py
    ├── vulnerability.py
    ├── vulnerability_manager.py
    ├── vulnerability_manager_test.py
    ├── vulnerability_overwriter.py
    ├── vulnerability_overwriter_test.py
    └── vulnerability_test.py


/.bazelrc:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # The following file specifies the Bazel configuration. Currently, Vanir is
 8 | # only tested with Bazel >= 6.0.0. For Bazel >= 7.2.0, Vanir runs
 9 | # without any changes in this file. For earlier versions, following
10 | # configuration changes are needed.
11 | 
12 | # In order to run on Bazel <= 7.1.0, following line needs to be
13 | # commented as bzlmod build does not work on Bazel 7.1 and earlier.
14 | 
15 | common --enable_workspace=False
16 | 
17 | # Additionally, for Bazel 7.1.0 and 7.0.0, following line needs
18 | # to be uncommented.
19 | 
20 | # common --enable_bzlmod=False
21 | 
22 | build --cxxopt=-std=c++17 --cxxopt=-fexceptions --cxxopt=-Wno-nonnull --cxxopt=-Wno-sign-compare --cxxopt=-Wno-parentheses --cxxopt=-Wno-deprecated-declarations
23 | 


--------------------------------------------------------------------------------
/.bazelversion:
--------------------------------------------------------------------------------
1 | 8.1.1
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project.
 4 | 
 5 | ## Before you begin
 6 | 
 7 | ### Sign our Contributor License Agreement
 8 | 
 9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 | 
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 | 
18 | Visit <https://cla.developers.google.com/> to see your current agreements or to
19 | sign a new one.
20 | 
21 | ### Review our community guidelines
22 | 
23 | This project follows
24 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
25 | 
26 | ## Contribution process
27 | 
28 | ### Code reviews
29 | 
30 | All submissions, including submissions by project members, require review. We
31 | use GitHub pull requests for this purpose. Consult
32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
33 | information on using pull requests.
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2023 Google LLC
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google LLC nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/MODULE.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Vanir build and test dependencies."""
 8 | 
 9 | module(name = "vanir")
10 | 
11 | bazel_dep(name = "rules_python", version = "1.1.0")
12 | bazel_dep(name = "platforms", version = "0.0.11")
13 | bazel_dep(name = "abseil-cpp", version = "20250127.0", repo_name = "com_google_absl")
14 | bazel_dep(name = "pybind11_bazel", version = "2.13.6")
15 | bazel_dep(name = "pybind11_abseil", version = "202402.0")
16 | bazel_dep(name = "rules_proto", version = "7.1.0")
17 | bazel_dep(name = "protobuf", version = "29.3", repo_name = "com_google_protobuf")
18 | 
19 | python = use_extension("@rules_python//python/extensions:python.bzl", "python")
20 | python.toolchain(
21 |     is_default = True,
22 |     python_version = "3.9"
23 | )
24 | 
25 | pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
26 | pip.parse(
27 |     hub_name = "vanir_deps",
28 |     python_version = "3.9",
29 |     requirements_lock = "//:requirements_lock.txt",
30 | )
31 | use_repo(pip, "vanir_deps")
32 | 
33 | pip.parse(
34 |     hub_name = "antlr4_deps",
35 |     python_version = "3.9",
36 |     requirements_lock = "//:requirements_antlr4_lock.txt",
37 | )
38 | use_repo(pip, "antlr4_deps")
39 | 
40 | antlr4_grammar_java_lexer_g4_extension = use_extension("//:extensions.bzl", "antlr4_grammar_java_lexer_g4_extension")
41 | use_repo(antlr4_grammar_java_lexer_g4_extension, "antlr4_grammar_java_lexer_g4")
42 | 
43 | antlr4_grammar_java_parser_g4_extension = use_extension("//:extensions.bzl", "antlr4_grammar_java_parser_g4_extension")
44 | use_repo(antlr4_grammar_java_parser_g4_extension, "antlr4_grammar_java_parser_g4")
45 | 
46 | antlr4_runtimes_extension = use_extension("//:extensions.bzl", "antlr4_runtimes_extension")
47 | use_repo(antlr4_runtimes_extension, "antlr4_runtimes")
48 | 
49 | fuzzyc_extension = use_extension("//:extensions.bzl", "fuzzyc_extension")
50 | use_repo(fuzzyc_extension, "fuzzyc")
51 | 
52 | com_google_osv_extension = use_extension("//:extensions.bzl", "com_google_osv_extension")
53 | use_repo(com_google_osv_extension, "com_google_osv")
54 | 
55 | jsonpath_rw_extension = use_extension("//:extensions.bzl", "jsonpath_rw_extension")
56 | use_repo(jsonpath_rw_extension, "jsonpath-rw-git")
57 | 
58 | antlr4_entry_points_extension = use_extension("//:extensions.bzl", "antlr4_entry_points_extension")
59 | use_repo(antlr4_entry_points_extension, "antlr4_entry_points")
60 | 


--------------------------------------------------------------------------------
/WORKSPACE.bazel:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | # Bazel workspace file for Vanir.
  8 | 
  9 | workspace(name = "vanir")
 10 | 
 11 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")
 12 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository")
 13 | 
 14 | # Install Vanir python dependencies through PIP.
 15 | http_archive(
 16 |     name = "rules_python",
 17 |     sha256 = "8c15896f6686beb5c631a4459a3aa8392daccaab805ea899c9d14215074b60ef",
 18 |     strip_prefix = "rules_python-0.17.3",
 19 |     url = "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.17.3.tar.gz",
 20 | )
 21 | 
 22 | load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains")
 23 | 
 24 | py_repositories()
 25 | 
 26 | 
 27 | python_register_toolchains(
 28 |     name = "python3_9",
 29 |     python_version = "3.9",
 30 | )
 31 | 
 32 | load("@rules_python//python:pip.bzl", "pip_parse")
 33 | load("@python3_9//:defs.bzl", "interpreter")
 34 | 
 35 | pip_parse(
 36 |     name = "vanir_deps",
 37 |     python_interpreter_target = interpreter,
 38 |     requirements_lock = "//:requirements_lock.txt",
 39 | )
 40 | 
 41 | load("@vanir_deps//:requirements.bzl", "install_deps")
 42 | 
 43 | install_deps()
 44 | 
 45 | # Install Fuzzyc and its transitive dependencies.
 46 | git_repository(
 47 |     name = "fuzzyc",
 48 |     commit = "0f00ba6804c56f5b6d91bc214a91bb05fd17fcda",
 49 |     remote = "https://third-party-mirror.googlesource.com/fuzzyc",
 50 | )
 51 | 
 52 | # Install antlr4 tools
 53 | pip_parse(
 54 |     name = "antlr4_deps",
 55 |     python_interpreter_target = interpreter,
 56 |     requirements_lock = "//:requirements_antlr4_lock.txt",
 57 | )
 58 | 
 59 | load("@antlr4_deps//:requirements.bzl", install_antlr4_deps = "install_deps")
 60 | install_antlr4_deps()
 61 | 
 62 | new_local_repository(
 63 |     name = "antlr4_entry_points",
 64 |     path = "vanir/language_parsers/java",
 65 |     build_file_content = """
 66 | alias(
 67 |     name = "antlr4",
 68 |     actual = "@antlr4_deps_antlr4_tools//:rules_python_wheel_entry_point_antlr4",
 69 |     visibility = ["//visibility:public"],
 70 | )
 71 | """
 72 | )
 73 | 
 74 | # Download Antlr4 Java grammar
 75 | ANTLR4_JAVA_REV = "c85ec510bd7cfba4649aec1ac2cf66bebd8ce2ed"
 76 | http_file(
 77 |     name = "antlr4_grammar_java_lexer_g4",
 78 |     url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaLexer.g4" % ANTLR4_JAVA_REV,
 79 |     downloaded_file_path = "JavaLexer.g4",
 80 |     sha256 = "9a812eea62aeddc7bd54f8ba9dac4615d0f3f6b98328cf46b4143fdf75ba2c92",
 81 | )
 82 | http_file(
 83 |     name = "antlr4_grammar_java_parser_g4",
 84 |     url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaParser.g4" % ANTLR4_JAVA_REV,
 85 |     downloaded_file_path = "JavaParser.g4",
 86 |     sha256 = "0555bd978b2a7e47ec373ee0671cd13f6ba576ca8c26d127fa0b7467dd6df8ce",
 87 | )
 88 | 
 89 | # Antlr4 CC Runtime Library.
 90 | http_archive(
 91 |     name = "antlr4_runtimes",
 92 |     build_file_content = """
 93 | package(default_visibility = ["//visibility:public"])
 94 | cc_library(
 95 |     name = "cpp",
 96 |     srcs = glob(["runtime/Cpp/runtime/src/**/*.cpp"]),
 97 |     hdrs = glob(["runtime/Cpp/runtime/src/**/*.h"]),
 98 |     includes = ["runtime/Cpp/runtime/src"],
 99 | )
100 | """,
101 |     sha256 = "50e87636a61daabd424d884c60f804387430920072f585a9fee2b90e2043fdcc",
102 |     strip_prefix = "antlr4-4.11.1",
103 |     urls = ["https://github.com/antlr/antlr4/archive/v4.11.1.tar.gz"],
104 | )
105 | 
106 | # Google absl
107 | http_archive(
108 |     name = "com_google_absl",
109 |     sha256 = "3ea49a7d97421b88a8c48a0de16c16048e17725c7ec0f1d3ea2683a2a75adc21",
110 |     strip_prefix = "abseil-cpp-20230125.0",
111 |     urls = ["https://github.com/abseil/abseil-cpp/archive/refs/tags/20230125.0.tar.gz"],
112 | )
113 | 
114 | # GoogleTest
115 | http_archive(
116 |     name = "com_google_googletest",
117 |     sha256 = "ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363",
118 |     strip_prefix = "googletest-1.13.0",
119 |     urls = ["https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz"],
120 | )
121 | 
122 | # Pybind11 Bazel extension
123 | git_repository(
124 |     name = "pybind11_bazel",
125 |     commit = "5f458fa53870223a0de7eeb60480dd278b442698",
126 |     remote = "https://github.com/pybind/pybind11_bazel.git",
127 | )
128 | 
129 | # Pybind11
130 | new_git_repository(
131 |     name = "pybind11",
132 |     build_file = "@pybind11_bazel//:pybind11.BUILD",
133 |     remote = "https://github.com/pybind/pybind11.git",
134 |     tag = "v2.10.4",
135 | )
136 | 
137 | load("@pybind11_bazel//:python_configure.bzl", "python_configure")
138 | 
139 | python_configure(
140 |     name = "local_config_python",
141 |     python_interpreter_target = interpreter,
142 | )
143 | 
144 | # Pybind11 Abseil. Don't use master - it can be unstable.
145 | git_repository(
146 |     name = "pybind11_abseil",
147 |     commit = "13d4f99d5309df3d5afa80fe2ae332d7a2a64c6b",
148 |     remote = "https://github.com/pybind/pybind11_abseil.git",
149 | )
150 | 
151 | http_archive(
152 |     name = "com_google_protobuf",
153 |     urls = ["https://github.com/google/protobuf/archive/v3.10.0.zip"],
154 |     strip_prefix = "protobuf-3.10.0",
155 | )
156 | 
157 | # OSV
158 | OSV_REV = "bbb8ab4f0491bf367f8e1406d8ddf9e9dbf5de86"
159 | http_archive(
160 |     name = "com_google_osv",
161 |     strip_prefix = "osv.dev-%s" % OSV_REV,
162 |     build_file_content = """
163 | load("@com_google_protobuf//:protobuf.bzl", "py_proto_library")
164 | load("@rules_proto//proto:defs.bzl", "proto_library")
165 | 
166 | package(default_visibility = ["//visibility:public"])
167 | 
168 | PROTO_FILES = [
169 | "osv/vulnerability.proto",
170 | ]
171 | 
172 | filegroup(
173 |   name = "protobuf_files",
174 |   srcs = PROTO_FILES,
175 |   visibility = ["//visibility:public"],
176 | )
177 | 
178 | proto_library(
179 |     name = 'vulnerability_proto',
180 |     srcs = PROTO_FILES,
181 |     deps = [
182 |         '@com_google_protobuf//:struct_proto',
183 |         '@com_google_protobuf//:timestamp_proto',
184 |     ],
185 |     visibility = ['//visibility:public'],
186 | )
187 | 
188 | py_proto_library(
189 |     name = "vulnerability_py_pb2",
190 |     visibility = ["//visibility:public"],
191 |     srcs = PROTO_FILES,
192 |     deps = [
193 |         "@com_google_protobuf//:protobuf_python",
194 |     ],
195 | )
196 | """,
197 |     urls = ["https://github.com/google/osv.dev/archive/%s.tar.gz" % OSV_REV],
198 | )
199 | 
200 | load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
201 | 
202 | protobuf_deps()
203 | 
204 | 
205 | # jsonpath_rw
206 | git_repository(
207 |     name = "jsonpath-rw-git",
208 |     build_file_content = """
209 | load("@rules_python//python:defs.bzl", "py_library")
210 | load("@vanir_deps//:requirements.bzl", "requirement")
211 | 
212 | py_library(
213 |     name = "jsonpath_rw",
214 |     visibility = ["//visibility:public"],
215 |     srcs = [
216 |         "jsonpath_rw/__init__.py",
217 |         "jsonpath_rw/jsonpath.py",
218 |         "jsonpath_rw/lexer.py",
219 |         "jsonpath_rw/parser.py"
220 |     ],
221 |     srcs_version = "PY3",
222 |     deps = [
223 |        requirement("six"),
224 |        requirement("ply"),
225 |        requirement("decorator"),
226 |     ],
227 | )
228 | """,
229 |     commit = "6f5647bb3ad2395c20f0191fef07a1df51c9fed8",
230 |     remote = "https://github.com/kennknowles/python-jsonpath-rw.git",
231 | )
232 | 


--------------------------------------------------------------------------------
/docs/images/vanir_detector_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/docs/images/vanir_detector_report.png


--------------------------------------------------------------------------------
/docs/images/vanir_macro_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/docs/images/vanir_macro_arch.png


--------------------------------------------------------------------------------
/docs/images/vanir_micro_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/docs/images/vanir_micro_arch.png


--------------------------------------------------------------------------------
/extensions.bzl:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Extensions for Vanir."""
 8 | 
 9 | load(
10 |     "//:repositories.bzl",
11 |     "antlr4_entry_points_repo",
12 |     "antlr4_grammar_java_lexer_g4_repo",
13 |     "antlr4_grammar_java_parser_g4_repo",
14 |     "antlr4_runtimes_repo",
15 |     "com_google_osv_repo",
16 |     "fuzzyc_repo",
17 |     "jsonpath_rw_repo",
18 | )
19 | 
20 | def _antlr4_grammar_java_lexer_g4_impl(_ctx):
21 |     antlr4_grammar_java_lexer_g4_repo()
22 | 
23 | antlr4_grammar_java_lexer_g4_extension = module_extension(
24 |     implementation = _antlr4_grammar_java_lexer_g4_impl,
25 | )
26 | 
27 | def _antlr4_grammar_java_parser_g4_impl(_ctx):
28 |     antlr4_grammar_java_parser_g4_repo()
29 | 
30 | antlr4_grammar_java_parser_g4_extension = module_extension(
31 |     implementation = _antlr4_grammar_java_parser_g4_impl,
32 | )
33 | 
34 | def _fuzzyc_impl(_ctx):
35 |     fuzzyc_repo()
36 | 
37 | fuzzyc_extension = module_extension(implementation = _fuzzyc_impl)
38 | 
39 | def _antlr4_runtimes_impl(_ctx):
40 |     antlr4_runtimes_repo()
41 | 
42 | antlr4_runtimes_extension = module_extension(implementation = _antlr4_runtimes_impl)
43 | 
44 | def _com_google_osv_impl(_ctx):
45 |     com_google_osv_repo()
46 | 
47 | com_google_osv_extension = module_extension(implementation = _com_google_osv_impl)
48 | 
49 | def _jsonpath_rw_impl(_ctx):
50 |     jsonpath_rw_repo()
51 | 
52 | jsonpath_rw_extension = module_extension(implementation = _jsonpath_rw_impl)
53 | 
54 | def _antlr4_entry_points_impl(_ctx):
55 |     antlr4_entry_points_repo()
56 | 
57 | antlr4_entry_points_extension = module_extension(implementation = _antlr4_entry_points_impl)
58 | 


--------------------------------------------------------------------------------
/repositories.bzl:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Repositories for Vanir."""
  8 | 
  9 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 10 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")
 11 | load("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")
 12 | 
 13 | ANTLR4_JAVA_REV = "c85ec510bd7cfba4649aec1ac2cf66bebd8ce2ed"
 14 | 
 15 | def antlr4_grammar_java_parser_g4_repo():
 16 |     # Download Antlr4 Java grammar - Parser
 17 |     http_file(
 18 |         name = "antlr4_grammar_java_parser_g4",
 19 |         url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaParser.g4" % ANTLR4_JAVA_REV,
 20 |         downloaded_file_path = "JavaParser.g4",
 21 |         sha256 = "0555bd978b2a7e47ec373ee0671cd13f6ba576ca8c26d127fa0b7467dd6df8ce",
 22 |     )
 23 | 
 24 | def antlr4_grammar_java_lexer_g4_repo():
 25 |     # Download Antlr4 Java grammar - Lexer
 26 |     http_file(
 27 |         name = "antlr4_grammar_java_lexer_g4",
 28 |         url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaLexer.g4" % ANTLR4_JAVA_REV,
 29 |         downloaded_file_path = "JavaLexer.g4",
 30 |         sha256 = "9a812eea62aeddc7bd54f8ba9dac4615d0f3f6b98328cf46b4143fdf75ba2c92",
 31 |     )
 32 | 
 33 | def fuzzyc_repo():
 34 |     git_repository(
 35 |         name = "fuzzyc",
 36 |         commit = "f227d19e433a53e264ec6151c66dd85ec53b4c71",
 37 |         remote = "https://third-party-mirror.googlesource.com/fuzzyc",
 38 |     )
 39 | 
 40 | def antlr4_runtimes_repo():
 41 |     http_archive(
 42 |         name = "antlr4_runtimes",
 43 |         build_file_content = """
 44 | package(default_visibility = ["//visibility:public"])
 45 | cc_library(
 46 |     name = "cpp",
 47 |     srcs = glob(["runtime/Cpp/runtime/src/**/*.cpp"]),
 48 |     hdrs = glob(["runtime/Cpp/runtime/src/**/*.h"]),
 49 |     includes = ["runtime/Cpp/runtime/src"],
 50 | )
 51 | """,
 52 |         sha256 = "50e87636a61daabd424d884c60f804387430920072f585a9fee2b90e2043fdcc",
 53 |         strip_prefix = "antlr4-4.11.1",
 54 |         urls = ["https://github.com/antlr/antlr4/archive/v4.11.1.tar.gz"],
 55 |     )
 56 | 
 57 | def com_google_osv_repo():
 58 |     # OSV
 59 |     OSV_REV = "bbb8ab4f0491bf367f8e1406d8ddf9e9dbf5de86"
 60 |     http_archive(
 61 |         name = "com_google_osv",
 62 |         strip_prefix = "osv.dev-%s" % OSV_REV,
 63 |         build_file_content = """
 64 | load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library")
 65 | load("@rules_proto//proto:defs.bzl", "proto_library")
 66 | 
 67 | package(default_visibility = ["//visibility:public"])
 68 | 
 69 | PROTO_FILES = [
 70 | "osv/vulnerability.proto",
 71 | ]
 72 | 
 73 | filegroup(
 74 | name = "protobuf_files",
 75 | srcs = PROTO_FILES,
 76 | visibility = ["//visibility:public"],
 77 | )
 78 | 
 79 | proto_library(
 80 |     name = 'vulnerability_proto',
 81 |     srcs = PROTO_FILES,
 82 |     deps = [
 83 |         '@com_google_protobuf//:struct_proto',
 84 |         '@com_google_protobuf//:timestamp_proto',
 85 |     ],
 86 |     visibility = ['//visibility:public'],
 87 | )
 88 | 
 89 | py_proto_library(
 90 |     name = "vulnerability_py_pb2",
 91 |     deps = [
 92 |         "vulnerability_proto",
 93 |     ],
 94 | )
 95 | """,
 96 |         urls = ["https://github.com/google/osv.dev/archive/%s.tar.gz" % OSV_REV],
 97 |     )
 98 | 
 99 | def jsonpath_rw_repo():
100 |     git_repository(
101 |         name = "jsonpath-rw-git",
102 |         build_file_content = """
103 | load("@rules_python//python:defs.bzl", "py_library")
104 | load("@vanir_deps//:requirements.bzl", "requirement")
105 | 
106 | py_library(
107 |     name = "jsonpath_rw",
108 |     visibility = ["//visibility:public"],
109 |     srcs = [
110 |         "jsonpath_rw/__init__.py",
111 |         "jsonpath_rw/jsonpath.py",
112 |         "jsonpath_rw/lexer.py",
113 |         "jsonpath_rw/parser.py"
114 |     ],
115 |     srcs_version = "PY3",
116 |     deps = [
117 |        requirement("six"),
118 |        requirement("ply"),
119 |        requirement("decorator"),
120 |     ],
121 | )
122 | """,
123 |         commit = "6f5647bb3ad2395c20f0191fef07a1df51c9fed8",
124 |         remote = "https://github.com/kennknowles/python-jsonpath-rw.git",
125 |     )
126 | 
127 | def antlr4_entry_points_repo():
128 |     new_local_repository(
129 |         name = "antlr4_entry_points",
130 |         path = "vanir/language_parsers/java",
131 |         build_file_content = """
132 | load("@rules_python//python/entry_points:py_console_script_binary.bzl", "py_console_script_binary")
133 | 
134 | py_console_script_binary(
135 |     name = "antlr4",
136 |     pkg = "@antlr4_deps//antlr4_tools",
137 |     script = "antlr4",
138 |     visibility =  ["//visibility:public"],
139 | )
140 | """,
141 |     )
142 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Vanir direct dependencies
 2 | requests
 3 | absl-py
 4 | mmh3
 5 | unidiff
 6 | jinja2
 7 | typing_extensions>=4,<5
 8 | python-dateutil
 9 | 
10 | # jsonpath_rw dependencies
11 | six
12 | ply
13 | decorator
14 | 


--------------------------------------------------------------------------------
/requirements_antlr4.txt:
--------------------------------------------------------------------------------
1 | antlr4-tools==0.2
2 | 


--------------------------------------------------------------------------------
/requirements_antlr4_lock.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.10
 3 | # by the following command:
 4 | #
 5 | #    pip-compile --output-file=requirements_antlr4_lock.txt requirements_antlr4.txt
 6 | #
 7 | antlr4-tools==0.2
 8 |     # via -r requirements_fuzzyc.txt
 9 | install-jdk==0.3.0
10 |     # via antlr4-tools
11 | 


--------------------------------------------------------------------------------
/requirements_lock.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.9
 3 | # by the following command:
 4 | #
 5 | #    pip-compile --output-file=requirements_lock.txt requirements.txt
 6 | #
 7 | absl-py==2.1.0
 8 |     # via -r requirements.txt
 9 | certifi==2024.12.14
10 |     # via requests
11 | charset-normalizer==3.4.1
12 |     # via requests
13 | decorator==5.1.1
14 |     # via -r requirements.txt
15 | idna==3.10
16 |     # via requests
17 | jinja2==3.1.5
18 |     # via -r requirements.txt
19 | markupsafe==3.0.2
20 |     # via jinja2
21 | mmh3==5.0.1
22 |     # via -r requirements.txt
23 | ply==3.11
24 |     # via -r requirements.txt
25 | python-dateutil==2.9.0.post0
26 |     # via -r requirements.txt
27 | requests==2.32.3
28 |     # via -r requirements.txt
29 | six==1.17.0
30 |     # via
31 |     #   -r requirements.txt
32 |     #   python-dateutil
33 | typing-extensions==4.12.2
34 |     # via -r requirements.txt
35 | unidiff==0.7.5
36 |     # via -r requirements.txt
37 | urllib3==2.3.0
38 |     # via requests
39 | 


--------------------------------------------------------------------------------
/vanir/cache/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Package for Vanir cached data.
 8 | package(default_visibility = ["//visibility:public"])
 9 | 
10 | filegroup(
11 |     name = "ecosystem_file_lists",
12 |     srcs = [
13 |         "ecosystem_file_lists.json",
14 |     ],
15 | )
16 | 


--------------------------------------------------------------------------------
/vanir/code_extractors/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Bazel build rules for Vanir code extractors.
 8 | load("@rules_python//python:defs.bzl", "py_library", "py_test")
 9 | load("@vanir_deps//:requirements.bzl", "requirement")
10 | 
11 | package(default_visibility = [
12 |     "//visibility:public",
13 | ])
14 | 
15 | py_library(
16 |     name = "code_extractor_base",
17 |     srcs = ["code_extractor_base.py"],
18 |     deps = [
19 |         "//:vulnerability",
20 |         requirement("requests"),
21 |         requirement("unidiff"),
22 |     ],
23 | )
24 | 
25 | py_library(
26 |     name = "code_extractor",
27 |     srcs = ["code_extractor.py"],
28 |     deps = [
29 |         ":code_extractor_android",
30 |         ":code_extractor_base",
31 |         "//:vulnerability",
32 |         requirement("requests"),
33 |     ],
34 | )
35 | 
36 | py_library(
37 |     name = "code_extractor_android",
38 |     srcs = ["code_extractor_android.py"],
39 |     deps = [
40 |         ":code_extractor_base",
41 |         "//:vulnerability",
42 |         requirement("requests"),
43 |         requirement("unidiff"),
44 |     ],
45 | )
46 | 
47 | py_test(
48 |     name = "code_extractor_test",
49 |     srcs = ["code_extractor_test.py"],
50 |     data = [
51 |         "//vanir/testdata:test_patch_set",
52 |     ],
53 |     deps = [
54 |         ":code_extractor",
55 |         ":code_extractor_base",
56 |         "//:vulnerability",
57 |         requirement("absl-py"),
58 |     ],
59 | )
60 | 
61 | py_test(
62 |     name = "code_extractor_android_test",
63 |     srcs = ["code_extractor_android_test.py"],
64 |     data = [
65 |         "//vanir/testdata:test_patch_set",
66 |     ],
67 |     deps = [
68 |         ":code_extractor_android",
69 |         ":code_extractor_base",
70 |         "//:vulnerability",
71 |         requirement("absl-py"),
72 |         requirement("requests"),
73 |     ],
74 | )
75 | 


--------------------------------------------------------------------------------
/vanir/code_extractors/code_extractor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Extracts code snippets and metadata needed for CVE signature generation.
  8 | 
  9 | This module contains utility classes and functions to extract code snippets and
 10 | metadata of CVEs such as patch files (i.e., file diff) and unpatched files.
 11 | """
 12 | 
 13 | from typing import Collection, Optional, Sequence, Tuple, Type, TypeVar
 14 | 
 15 | import requests
 16 | from vanir import vulnerability
 17 | # Simply importing the extractors will register them as subclasses of the
 18 | # abstract extractor class and therefore available for use.
 19 | # pylint: disable=unused-import
 20 | from vanir.code_extractors import code_extractor_android
 21 | # pylint: enable=unused-import
 22 | from vanir.code_extractors import code_extractor_base
 23 | 
 24 | _P = TypeVar('_P', bound=code_extractor_base.AbstractCodeExtractor)
 25 | 
 26 | OSV_ID = 'id'
 27 | REF_URL = 'url'
 28 | REF_TYPE = 'type'
 29 | REF_TYPE_FIX = 'FIX'
 30 | VULN_AFFECTED = 'affected'
 31 | AFFECTED_PACKAGE = 'package'
 32 | PACKAGE_NAME = 'name'
 33 | PACKAGE_ECOSYSTEM = 'ecosystem'
 34 | AFFECTED_ECOSYSTEM_SPECIFIC = 'ecosystem_specific'
 35 | 
 36 | 
 37 | class DuplicatedCodeExtractorError(Exception):
 38 |   pass
 39 | 
 40 | 
 41 | def _get_extractor_class(ecosystem: str) -> Optional[Type[_P]]:
 42 |   """Returns the extractor class for the given ecosystem, or None."""
 43 |   extractors = code_extractor_base.AbstractCodeExtractor.__subclasses__()
 44 |   found_extractors = []
 45 |   for extractor_class in extractors:
 46 |     if extractor_class.is_supported_ecosystem(ecosystem):
 47 |       found_extractors.append(extractor_class)
 48 | 
 49 |   if not found_extractors:
 50 |     return None
 51 |   if len(found_extractors) > 1:
 52 |     raise DuplicatedCodeExtractorError(
 53 |         'Multiple code extractors supported ecosystem "%s": %s' %
 54 |         (ecosystem, found_extractors))
 55 |   return found_extractors[0]
 56 | 
 57 | 
 58 | def extract_for_affected_entry(
 59 |     affected: vulnerability.AffectedEntry,
 60 |     session: Optional[requests.sessions.Session] = None,
 61 | ) -> Tuple[Sequence[code_extractor_base.Commit],
 62 |            Sequence[code_extractor_base.FailedCommitUrl]]:
 63 |   """Extracts fix commit data for the given Vulnerability.
 64 | 
 65 |   For each commit, this class extracts the following data:
 66 |   1. commit message
 67 |   2. per-file patch (diff)
 68 |   3. unmodified & modified versions of the files changed by the patch
 69 | 
 70 |   Args:
 71 |     affected: the OSV affected entry to extract fixes for.
 72 |     session: requests session to use for retrieving files and patches. If
 73 |       None, a new session will be used.
 74 | 
 75 |   Returns:
 76 |     A tuple where the first item is the list of |Commit| objects pertaining
 77 |     to the given |vuln|, and the second item is the list of URLs found but
 78 |     failed to be converted to |Commit| objects.
 79 |   """
 80 |   extractor_class = _get_extractor_class(affected.ecosystem)
 81 |   if not extractor_class:
 82 |     raise NotImplementedError(f'Unsupported ecosystem: {affected.ecosystem}')
 83 |   return extractor_class(session).extract_commits_for_affected_entry(affected)
 84 | 
 85 | 
 86 | def extract_files_at_tip_of_unaffected_versions(
 87 |     ecosystem: str,
 88 |     package_name: str,
 89 |     affected_versions: Sequence[str],
 90 |     files: Collection[str],
 91 |     session: Optional[requests.sessions.Session] = None,
 92 | ) -> Tuple[
 93 |     Sequence[code_extractor_base.Commit],
 94 |     Sequence[code_extractor_base.FailedCommitUrl],
 95 | ]:
 96 |   """Extracts files tip of unmentioned versions of the given package.
 97 | 
 98 |   This method checks the list of given versions and determine the active tips of
 99 |   branches that are not mentioned in the list and extract the listed files at
100 |   the those tips.
101 | 
102 |   Args:
103 |     ecosystem: the ecosystem of the package.
104 |     package_name: the name of the package.
105 |     affected_versions: the list of affected versions of the package. Tip of
106 |       versions not in this list will be extracted.
107 |     files: the list of files to include.
108 |     session: requests session to use for retrieving files and patches. If
109 |       None, a new session will be used.
110 | 
111 |   Returns:
112 |     A tuple where the first item is the list of |Commit| objects pertaining
113 |     to the tip of a version not mentioned in |versions|, and the second item
114 |     is the list of tip URLs failed to convert to |Commit| objects.
115 |   """
116 |   extractor_class = _get_extractor_class(ecosystem)
117 |   if not extractor_class:
118 |     raise NotImplementedError(f'Unsupported ecosystem: {ecosystem}')
119 |   return extractor_class(session).extract_files_at_tip_of_unaffected_versions(
120 |       package_name, affected_versions, files,
121 |   )
122 | 


--------------------------------------------------------------------------------
/vanir/code_extractors/code_extractor_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Tests for code_extractor."""
  8 | 
  9 | from vanir import vulnerability
 10 | from vanir.code_extractors import code_extractor
 11 | from vanir.code_extractors import code_extractor_base
 12 | 
 13 | from absl.testing import absltest
 14 | from absl.testing import parameterized
 15 | 
 16 | _TEST_COMMIT = 'abcdef0000000000000000000000000000000000'
 17 | _TEST_PARENT_COMMIT = 'fedcba1111111111111111111111111111111111'
 18 | 
 19 | _ANDROID_PATCH_URL_BASE = 'https://android.googlesource.com/kernel/common/+/'
 20 | _TEST_ANDROID_COMMIT_URL = _ANDROID_PATCH_URL_BASE + _TEST_COMMIT
 21 | 
 22 | 
 23 | class CodeExtractorTest(parameterized.TestCase):
 24 | 
 25 |   @absltest.mock.patch.object(
 26 |       code_extractor_base, 'Commit', autospec=True, instance=True
 27 |   )
 28 |   @absltest.mock.patch.object(code_extractor_base, 'AbstractCodeExtractor')
 29 |   def test_extract(self, mock_extractor_class, mock_commit):
 30 |     mock_extractor_class.__subclasses__ = lambda self: [mock_extractor_class]
 31 |     mock_extractor_class.is_supported_ecosystem.side_effect = (
 32 |         lambda s: True if s == 'test_ecosystem' else False
 33 |     )
 34 | 
 35 |     mock_extractor_class(
 36 |         None
 37 |     ).extract_commits_for_affected_entry.return_value = ([mock_commit], [])
 38 |     test_affected = vulnerability.AffectedEntry(
 39 |         {'package': {'ecosystem': 'test_ecosystem', 'name': 'pkg'}}
 40 |     )
 41 |     commits, failures = code_extractor.extract_for_affected_entry(test_affected)
 42 |     self.assertEmpty(failures)
 43 |     self.assertListEqual(commits, [mock_commit])
 44 | 
 45 |   def test_extract_with_no_package(self):
 46 |     with self.assertRaisesRegex(ValueError, 'Missing package info.*'):
 47 |       code_extractor.extract_for_affected_entry(
 48 |           vulnerability.AffectedEntry({})
 49 |       )
 50 | 
 51 |   @absltest.mock.patch.object(code_extractor_base, 'AbstractCodeExtractor')
 52 |   def test_extract_with_no_patch_found(self, mock_extractor_class):
 53 |     mock_extractor_class.__subclasses__ = lambda self: [mock_extractor_class]
 54 |     mock_extractor_class.is_supported_ecosystem.side_effect = (
 55 |         lambda s: True if s == 'test_ecosystem' else False
 56 |     )
 57 |     mock_extractor_class(
 58 |         None
 59 |     ).extract_commits_for_affected_entry.return_value = ([], [])
 60 |     test_affected = vulnerability.AffectedEntry(
 61 |         {'package': {'ecosystem': 'test_ecosystem', 'name': 'pkg'}}
 62 |     )
 63 |     commits, failures = code_extractor.extract_for_affected_entry(test_affected)
 64 |     self.assertEmpty(commits)
 65 |     self.assertEmpty(failures)
 66 | 
 67 |   def test_extract_with_unsupported_ecosystem(self):
 68 |     test_affected = vulnerability.AffectedEntry(
 69 |         {'package': {'ecosystem': 'unknown_ecosystem', 'name': 'pkg'}}
 70 |     )
 71 |     with self.assertRaises(NotImplementedError):
 72 |       _, _ = code_extractor.extract_for_affected_entry(test_affected)
 73 | 
 74 |   @absltest.mock.patch.object(
 75 |       code_extractor_base, 'Commit', autospec=True, instance=True
 76 |   )
 77 |   @absltest.mock.patch.object(code_extractor_base, 'AbstractCodeExtractor')
 78 |   def test_extract_files_at_tip_of_unaffected_versions(
 79 |       self, mock_extractor_class, mock_commit,
 80 |   ):
 81 |     mock_extractor_class.__subclasses__ = lambda self: [mock_extractor_class]
 82 |     mock_extractor_class.is_supported_ecosystem.side_effect = (
 83 |         lambda s: True if s == 'test_ecosystem' else False
 84 |     )
 85 |     mock_extractor_class(
 86 |         None
 87 |     ).extract_files_at_tip_of_unaffected_versions.return_value = (
 88 |         [mock_commit], []
 89 |     )
 90 | 
 91 |     commits, failures = (
 92 |         code_extractor.extract_files_at_tip_of_unaffected_versions(
 93 |             'test_ecosystem', 'test_package', ['1.0.0'], ['file1'], None,
 94 |         )
 95 |     )
 96 |     self.assertEmpty(failures)
 97 |     self.assertListEqual(commits, [mock_commit])
 98 | 
 99 |   def test_extract_files_at_tip_of_unaffected_versions_unsupported_ecosystem(
100 |       self
101 |   ):
102 |     with self.assertRaises(NotImplementedError):
103 |       code_extractor.extract_files_at_tip_of_unaffected_versions(
104 |           'test_ecosystem', 'test_package', ['1.0.0'], ['file1', 'file2'], None,
105 |       )
106 | 
107 | 
108 | if __name__ == '__main__':
109 |   absltest.main()
110 | 


--------------------------------------------------------------------------------
/vanir/file_list_manager.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Module for managing known files for each ecosystem/package.
 8 | 
 9 | This module manages lists of known files for each ecysostem & package needed
10 | for calculating truncated path level.
11 | """
12 | 
13 | import collections
14 | import enum
15 | import json
16 | from typing import Mapping, Sequence
17 | 
18 | from vanir import parser
19 | 
20 | 
21 | 
22 | _GITFS_TIMEOUT_SEC = 60
23 | _GITFS_ADDR = 'blade:git'
24 | 
25 | ANDROID_ECOSYSTEM = 'Android'
26 | KERNEL_PACKAGE = ':linux_kernel:'
27 | _MAINLINE_KERNEL_PROJECT = 'android:kernel/common:refs/heads/android-mainline:'
28 | 
29 | _KNOWN_SOURCES = [(ANDROID_ECOSYSTEM, KERNEL_PACKAGE, _MAINLINE_KERNEL_PROJECT)]
30 | 
31 | ECOSYSTEM_FILE_LISTS_CACHE = (
32 |     'vanir/cache/ecosystem_file_lists.json'
33 | )
34 | 
35 | 
36 | @enum.unique
37 | class Source(enum.Enum):
38 |   CACHE = 'cache'
39 | 
40 | 
41 | def get_file_lists(
42 |     source: Source = Source.CACHE,
43 | ) -> Mapping[str, Mapping[str, Sequence[str]]]:
44 |   """Returns reference file lists for signature generation.
45 | 
46 |   Args:
47 |     source: source to retrieve file lists.
48 | 
49 |   Returns:
50 |     Reference file list map where the first key is ecosystem, the second key is
51 |     package name and the value is list of files.
52 |   """
53 |   if source == Source.CACHE:
54 |     resource = open(ECOSYSTEM_FILE_LISTS_CACHE, mode='rb').read()
55 |     file_lists = json.loads(resource)
56 |     return file_lists
57 |   else:
58 |     raise ValueError('Unknown file list source: %s' % source)
59 | 


--------------------------------------------------------------------------------
/vanir/file_list_manager_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Test for file list manager module."""
 8 | 
 9 | import json
10 | 
11 | from vanir import file_list_manager
12 | 
13 | from absl.testing import absltest
14 | 
15 | _TEST_SUPPORTED_FILE_LIST = ['foo.c', 'bar.c']
16 | _TEST_UNSUPPORTED_FILE_LIST = ['unsupported_filetype.asp']
17 | _TEST_FILE_LIST = _TEST_SUPPORTED_FILE_LIST + _TEST_UNSUPPORTED_FILE_LIST
18 | _TEST_SHA = 'abcdef1234567890'
19 | _TEST_FILE_LISTS_JSON_STR = json.dumps(
20 |     {'Android': {':linux_kernel:': _TEST_SUPPORTED_FILE_LIST}}
21 | )
22 | 
23 | 
24 | class FileListManagerTest(absltest.TestCase):
25 | 
26 |   def test_get_file_lists_with_cache(self):
27 |     file_lists = file_list_manager.get_file_lists(
28 |         file_list_manager.Source.CACHE
29 |     )
30 |     kernel_file_list = file_lists.get('Android', {}).get(':linux_kernel:')
31 |     self.assertGreater(len(kernel_file_list), 50000)
32 | 
33 |   def test_get_file_lists_fail_with_unknown_source(self):
34 |     with self.assertRaises(ValueError):
35 |       file_list_manager.get_file_lists('unknown_source')
36 | 
37 | 
38 | if __name__ == '__main__':
39 |   absltest.main()
40 | 


--------------------------------------------------------------------------------
/vanir/hasher.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Hasher to generate signature hashes for the given code snippets."""
  8 | 
  9 | import functools
 10 | from typing import Mapping, Optional, Sequence, Tuple
 11 | 
 12 | from absl import logging
 13 | import mmh3
 14 | 
 15 | # For experimental purpose, the n-gram size can be adjusted, but note that any
 16 | # change on the line n-gram size requires regeneration of entire signatures.
 17 | # Generally, decreasing n-gram size may end up with increase of findings,
 18 | # including both true positives and false positives.
 19 | _LINE_SIGNATURE_NGRAM_SIZE = 4
 20 | 
 21 | _HASH = functools.partial(mmh3.hash128, seed=0, x64arch=True, signed=False)
 22 | 
 23 | 
 24 | class _LineNgram:
 25 |   """Class for maintaining an n-gram where the units are code lines."""
 26 | 
 27 |   def __init__(self,
 28 |                normalized_code: Mapping[int, str],
 29 |                line_numbers: Sequence[int],
 30 |                is_first: Optional[bool] = False,
 31 |                is_last: Optional[bool] = False):
 32 |     """Initializes the line n-gram.
 33 | 
 34 |     The first and last ngram of a file must be explicitly marked through
 35 |     |is_first| and |is_last| in order to cover patch hunks adding lines at the
 36 |     top or the bottom of the file. Internally, the first ngram will be regarded
 37 |     as ranging from the line number negative infinity, and the last ngram
 38 |     ranging to the line number infinity.
 39 | 
 40 |     Args:
 41 |       normalized_code: dictionary of normalized code lines including (but not
 42 |         limited to) the lines for the n-gram. Each key is a line number, and the
 43 |         value is normalized line in string.
 44 |       line_numbers: the list of line numbers comprising the n-gram. Each line
 45 |         number must be a valid line number existing in |normalized_code|.
 46 |       is_first: True if the n-gram is the first n-gram of the target file.
 47 |       is_last: True if the n-gram is the last n-gram of the target file.
 48 | 
 49 |     Raises:
 50 |       ValueError: raises value error if any line number in |line_numbers| is not
 51 |       a valid line number in |normalized_code|.
 52 |     """
 53 | 
 54 |     self._normalized_code = normalized_code
 55 |     self._line_numbers = sorted(line_numbers)
 56 |     self._is_first = is_first
 57 |     self._is_last = is_last
 58 | 
 59 |   def is_overlapping(self, line_range: Tuple[int, int]) -> bool:
 60 |     """Returns true if the line ngram range overlaps with given |line_range|."""
 61 |     if self._is_first and self._is_last:
 62 |       # This n-gram is the first and the last n-gram, covering the entire file.
 63 |       return True
 64 | 
 65 |     range_start, range_end = line_range
 66 |     if range_start > range_end:
 67 |       raise ValueError(f'line_range: start ({range_start}) cannot be greater '
 68 |                        f'than end ({range_end})')
 69 | 
 70 |     # For given ranges r1 and r2, if r1.start <= r2.end && r1.end >= r2.start,
 71 |     # r1 and r2 overlaps.
 72 |     if self._is_first:
 73 |       return self._line_numbers[-1] >= range_start
 74 |     if self._is_last:
 75 |       return self._line_numbers[0] <= range_end
 76 |     return (self._line_numbers[0] <= range_end and
 77 |             self._line_numbers[-1] >= range_start)
 78 | 
 79 |   def get_ngram_string(self) -> str:
 80 |     """Returns the actual string of the n-gram."""
 81 |     try:
 82 |       return ' '.join([
 83 |           self._normalized_code[line_number]
 84 |           for line_number in self._line_numbers
 85 |       ])
 86 |     except KeyError as e:
 87 |       raise KeyError(
 88 |           f'Invalid line numbers for ngram: {self._line_numbers}. This is a '
 89 |           'bug and should never have happened. A _LineNgram object should only '
 90 |           'be initialized with line_numbers being a subset of normalized_code.'
 91 |       ) from e
 92 | 
 93 |   def get_line_numbers(self) -> Sequence[int]:
 94 |     """Returns the line numbers comprising the n-gram."""
 95 |     return self._line_numbers
 96 | 
 97 | 
 98 | def hash_function_chunk(normalized_code: str) -> int:
 99 |   """Computes hash for the normalized code of a function chunk.
100 | 
101 |   A function chunk signature is a Murmur3 128-bit x64 hash of the normalized
102 |   function code.
103 | 
104 |   Args:
105 |     normalized_code: a normalized function code in string.
106 | 
107 |   Returns:
108 |     The 128-bit hash in integer.
109 |   """
110 |   return _HASH(normalized_code)
111 | 
112 | 
113 | def hash_line_chunk(
114 |     normalized_code: Mapping[int, str],
115 |     affected_line_ranges: Sequence[Tuple[int, int]]
116 | ) -> Tuple[Sequence[int], Sequence[int]]:
117 |   """Computes hash for the normalized code of a line chunk.
118 | 
119 |   A line chunk signature is a set of n-gram line hashes. Each n-gram consists
120 |   of affected lines and their context lines (up to n - 1 lines before and
121 |   after the affected lines). Note that any empty lines / comment lines are not
122 |   regarded as valid lines so the actual context lines can be located further
123 |   than n - 1 lines of an affected line.
124 | 
125 |   Args:
126 |     normalized_code: a normalized code of a line chunk.
127 |     affected_line_ranges: list of the ranges indicating the lines changed by the
128 |       patch in the chunk's target file. The line numbers are based on the
129 |       unpatched file. Inclusive.
130 | 
131 |   Returns:
132 |     A tuple of the hash list and used line list. The hash list is a list of
133 |     128-bit line n-gram hashes. The used line list is a list of integer line
134 |     numbers used as elements of the n-grams.
135 |   """
136 |   valid_line_numbers = sorted(normalized_code.keys())
137 |   if not valid_line_numbers:
138 |     logging.debug('No valid line found from the normalized code. Returning '
139 |                   'empty lists.')
140 |     return [], []
141 | 
142 |   if not affected_line_ranges:
143 |     # If no affected line range is specified, regard all lines as affected.
144 |     affected_line_ranges = [
145 |         (valid_line_numbers[0], valid_line_numbers[-1])
146 |     ]
147 | 
148 |   # Make a list of all valid line ngrams.
149 |   ngrams = []
150 |   if len(valid_line_numbers) < _LINE_SIGNATURE_NGRAM_SIZE:
151 |     # If the number of valid lines in a file is shorter than n-gram size,
152 |     # just use all valid lines.
153 |     ngrams.append(
154 |         _LineNgram(
155 |             normalized_code, valid_line_numbers, is_first=True, is_last=True))
156 |   else:
157 |     ngram_first_line_indices = range(
158 |         len(valid_line_numbers) - _LINE_SIGNATURE_NGRAM_SIZE + 1)
159 |     for line_index in ngram_first_line_indices:
160 |       ngram_line_numbers = valid_line_numbers[
161 |           line_index:_LINE_SIGNATURE_NGRAM_SIZE + line_index]
162 |       is_first = line_index == ngram_first_line_indices[0]
163 |       is_last = line_index == ngram_first_line_indices[-1]
164 |       ngrams.append(
165 |           _LineNgram(normalized_code, ngram_line_numbers, is_first, is_last))
166 | 
167 |   # For only "valid & affected" ngrams, compute ngram hashes.
168 |   line_hashes = []
169 |   used_lines = set()
170 |   for affected_range in affected_line_ranges:
171 |     for ngram in ngrams.copy():
172 |       if ngram.is_overlapping(affected_range):
173 |         ngram_hash = _HASH(ngram.get_ngram_string())
174 |         line_hashes.append(ngram_hash)
175 |         used_lines.update(ngram.get_line_numbers())
176 |         ngrams.remove(ngram)
177 | 
178 |   return line_hashes, sorted(used_lines)
179 | 


--------------------------------------------------------------------------------
/vanir/hasher_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Tests for hasher."""
  8 | 
  9 | import functools
 10 | 
 11 | from absl import logging
 12 | import mmh3
 13 | from vanir import hasher
 14 | 
 15 | from absl.testing import absltest
 16 | from absl.testing import parameterized
 17 | 
 18 | 
 19 | _TEST_LINE_CHUNK_NORMALIZED_CODE = {
 20 |     5: 'void __init testdev_init ( void )',
 21 |     6: '{',
 22 |     7: 'memset ( cdev , 0 , sizeof * cdev ) ;',
 23 |     8: 'init_list_head ( & cdev -> list ) ;',
 24 |     9: 'kobject_init ( & cdev -> kobj , & ktype_cdev_default ) ;',
 25 |     10: 'cdev -> ops = fops ;',
 26 |     11: '}',
 27 |     19: 'cdev_map = kobj_map_init ( base_probe , & testdevs_lock ) ;',
 28 |     20: '}',
 29 |     24: 'export_symbol ( register_testdev_region ) ;',
 30 |     34: 'export_symbol ( __register_testdev ) ;',
 31 |     35: 'export_symbol ( __unregister_testdev ) ;'
 32 | }
 33 | 
 34 | 
 35 | class HasherTest(parameterized.TestCase):
 36 | 
 37 |   def setUp(self):
 38 |     self._hash = functools.partial(
 39 |         mmh3.hash128, seed=0, x64arch=True, signed=False)
 40 |     super().setUp()
 41 | 
 42 |   def test_function_chunk_hash(self):
 43 |     test_normalized_code = (
 44 |         'DTYPE FUNCNAME ( const unsigned DTYPE PARAM ) { const DTYPE '
 45 |         '* VAR = ( DTYPE * ) globalvar -> data '
 46 |         '; FUNCCALL ( PARAM , VAR ) ; 0xe8 ( ) ; return 0 ; }')
 47 | 
 48 |     function_hash = hasher.hash_function_chunk(test_normalized_code)
 49 | 
 50 |     expected_function_hash = self._hash(test_normalized_code)
 51 |     self.assertEqual(function_hash, expected_function_hash)
 52 | 
 53 |   @parameterized.named_parameters(
 54 |       dict(
 55 |           testcase_name='with_no_affected_ranges',
 56 |           normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
 57 |           affected_ranges=[],
 58 |           expected_used_lines=[5, 6, 7, 8, 9, 10, 11, 19, 20, 24, 34, 35]),
 59 |       dict(
 60 |           testcase_name='with_affected_ranges_in_middle',
 61 |           normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
 62 |           affected_ranges=[(9, 10)],
 63 |           expected_used_lines=[6, 7, 8, 9, 10, 11, 19, 20]),
 64 |       dict(
 65 |           testcase_name='with_affected_ranges_at_file_start',
 66 |           normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
 67 |           affected_ranges=[(0, 0)],
 68 |           expected_used_lines=[5, 6, 7, 8]),
 69 |       dict(
 70 |           testcase_name='with_affected_ranges_at_file_end',
 71 |           normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE,
 72 |           affected_ranges=[(50, 50)],
 73 |           expected_used_lines=[20, 24, 34, 35]),
 74 |       dict(
 75 |           testcase_name='with_short_normalized_code',
 76 |           normalized_code={
 77 |               3: '#define AUDIT_NAMES 5',
 78 |               4: '#define auditsc_get_stamp ( c , t , s ) 0'
 79 |           },
 80 |           affected_ranges=[],
 81 |           expected_used_lines=[3, 4]))
 82 |   def test_line_chunk_hash(self, normalized_code, affected_ranges,
 83 |                            expected_used_lines):
 84 |     """Tests various successful cases of line chunk hash generation.
 85 | 
 86 |     Args:
 87 |       normalized_code: the normalized code to test.
 88 |       affected_ranges: the affected ranges to test with for the normalized code.
 89 |       expected_used_lines: expected lines to be used for signature hash
 90 |         generation. This value varies depending on |affected_ranges| but this
 91 |         test explicitly requires this arg because we want to test the
 92 |         corresponding logic in the main code rather than to run the identical
 93 |         logic again in the test.
 94 |     """
 95 |     expected_hashes = []
 96 |     expected_line_number_ngrams = []
 97 |     index = 0
 98 |     while index + 3 < len(expected_used_lines):
 99 |       expected_line_number_ngrams.append(expected_used_lines[index:index + 4])
100 |       index += 1
101 |     if not expected_line_number_ngrams:
102 |       expected_line_number_ngrams.append(normalized_code.keys())
103 | 
104 |     for line_numbers in expected_line_number_ngrams:
105 |       ngram = ' '.join(
106 |           [normalized_code[line_number] for line_number in line_numbers])
107 |       expected_hashes.append(self._hash(ngram))
108 | 
109 |     line_hashes, used_lines = hasher.hash_line_chunk(normalized_code,
110 |                                                      affected_ranges)
111 | 
112 |     self.assertCountEqual(expected_used_lines, used_lines)
113 |     self.assertEqual(expected_hashes, line_hashes)
114 | 
115 |   def test_line_chunk_hash_with_empty_normalized_code_is_warned(self):
116 |     test_normalized_code = {}
117 |     with self.assertLogs(level=logging.WARNING) as logs:
118 |       line_hashes, used_lines = hasher.hash_line_chunk(test_normalized_code, [])
119 |     self.assertIn(
120 |         'No valid line found from the normalized code. Returning empty lists.',
121 |         logs.output[0])
122 |     self.assertEmpty(line_hashes)
123 |     self.assertEmpty(used_lines)
124 | 
125 |   def test_line_ngram_overlap_check_fails_with_reversed_line_range(self):
126 |     # Case unable to be triggered by public class; directly test private class.
127 |     test_ngram_line_numbers = [7, 8, 9, 10]
128 |     test_affected_line_range = (10, 7)
129 |     ngram = hasher._LineNgram(_TEST_LINE_CHUNK_NORMALIZED_CODE,
130 |                               test_ngram_line_numbers)
131 |     expected_error_msg = (
132 |         r'line_range: start \(10\) cannot be greater than end \(7\)')
133 |     with self.assertRaisesRegex(ValueError, expected_error_msg):
134 |       ngram.is_overlapping(test_affected_line_range)
135 | 
136 | if __name__ == '__main__':
137 |   absltest.main()
138 | 


--------------------------------------------------------------------------------
/vanir/integration_tests/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Integration tests for Vanir
 8 | 
 9 | load("@rules_python//python:defs.bzl", "py_test")
10 | load("@vanir_deps//:requirements.bzl", "requirement")
11 | 
12 | package(default_visibility = ["//visibility:public"])
13 | 
14 | py_test(
15 |     name = "missing_patch_detection_hermetic_test",
16 |     srcs = ["missing_patch_detection_hermetic_test.py"],
17 |     data = [
18 |         "//vanir/testdata:test_gitiles_data",
19 |         "//vanir/testdata:test_vulnerabilities",
20 |         "//vanir/testdata:vanir_test_source_simplified",
21 |     ],
22 |     deps = [
23 |         "//:osv_client",
24 |         "//:reporter",
25 |         "//:signature",
26 |         "//:vanir_test_base",
27 |         "//:vulnerability",
28 |         "//:vulnerability_manager",
29 |         "//vanir/scanners:scanner_base",
30 |         "//vanir/scanners:target_selection_strategy",
31 |         "//vanir/testdata:test_signatures",
32 |         requirement("absl-py"),
33 |         requirement("requests"),
34 |     ],
35 | )
36 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Package containing all programming language parsers for Vanir.
 8 | 
 9 | load("@rules_python//python:defs.bzl", "py_library", "py_test")
10 | 
11 | package(default_visibility = ["//visibility:public"])
12 | 
13 | py_library(
14 |     name = "abstract_language_parser",
15 |     srcs = [
16 |         "abstract_language_parser.py",
17 |     ],
18 |     deps = [
19 |         ":common",
20 |     ],
21 | )
22 | 
23 | py_library(
24 |     name = "common",
25 |     srcs = [
26 |         "common.py",
27 |     ],
28 | )
29 | 
30 | py_library(
31 |     name = "language_parsers",
32 |     srcs = ["language_parsers.py"],
33 |     deps = [
34 |         ":abstract_language_parser",
35 |         ":common",
36 |         "//vanir/language_parsers/cpp:cpp_parser",
37 |         "//vanir/language_parsers/java:java_parser",
38 |     ],
39 | )
40 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/abstract_language_parser.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Abstract Parser base class that all parsers implement.
 8 | """
 9 | 
10 | import abc
11 | from typing import Optional, Sequence, Tuple
12 | 
13 | from vanir.language_parsers import common
14 | 
15 | 
16 | class AbstractLanguageParser(abc.ABC):
17 |   """Abstract language parser interface that all language parsers implement.
18 | 
19 |   A Parser object parses one file, optionally only on a set of select line
20 |   ranges, and presents methods to extract function and line chunks to Vanir
21 |   signature generation and scanning.
22 | 
23 |   A Parser supports a set of file extensions, given by each implementation as
24 |   the return value of get_supported_extensions().
25 |   """
26 | 
27 |   @classmethod
28 |   @abc.abstractmethod
29 |   def get_supported_extensions(cls) -> Sequence[str]:
30 |     """Returns a list of supported file extensions. Should include the dot."""
31 | 
32 |   def __init__(self, filename: str):
33 |     """Construct the Parser object for given filename.
34 | 
35 |     Args:
36 |       filename: the absolute path to the file to analyze.
37 |     """
38 | 
39 |   @abc.abstractmethod
40 |   def get_chunks(
41 |       self,
42 |       affected_line_ranges_for_functions: Optional[
43 |           Sequence[Tuple[int, int]]
44 |       ] = None,
45 |   ) -> common.ParseResults:
46 |     """Parse the file and return the line chunk and function chunks.
47 | 
48 |     Args:
49 |       affected_line_ranges_for_functions: list of line ranges of interest to
50 |         filter function chunks on. A parser should return only functions that
51 |         contains at least one line in this range. If
52 |         affected_line_ranges_for_functions is empty, return all functions.
53 |     Return: A ParseResults object containing all the parsing output.
54 |     """
55 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Common data types for Vanir language parser."""
 8 | 
 9 | import dataclasses
10 | from typing import Mapping, Sequence
11 | 
12 | 
13 | @dataclasses.dataclass(frozen=True)
14 | class FunctionChunkBase:
15 |   """Data class representing a function and metadata extracted by a parser.
16 | 
17 |   Attributes:
18 |     name: function name
19 |     return_types: sequence of return types. Each return type is a sequence of
20 |       tokens representing the type. E.g. [['struct', 'foo'], ['int']]
21 |     parameters: sequence of parameter names that the function accepts
22 |     used_data_types: sequence of all data types used by the function. Similar
23 |       to return_types, each entry is a sequence of tokens for the type.
24 |     local_variables: sequence of names of all local variables.
25 |     called_functions: sequence of other function names called.
26 |     tokens: sequence of tokens consisting of the function body.
27 |   """
28 |   name: str
29 |   return_types: Sequence[Sequence[str]]
30 |   parameters: Sequence[str]
31 |   used_data_types: Sequence[Sequence[str]]
32 |   local_variables: Sequence[str]
33 |   called_functions: Sequence[str]
34 |   tokens: Sequence[str]
35 | 
36 | 
37 | @dataclasses.dataclass(frozen=True)
38 | class LineChunkBase:
39 |   """Data class for meaningful tokenized lines extracted by a parser."""
40 |   tokens: Mapping[int, Sequence[str]]
41 | 
42 | 
43 | @dataclasses.dataclass(frozen=True)
44 | class ParseError:
45 |   """Data class for holding an error found during parsing."""
46 |   line: int
47 |   column: int
48 |   bad_token: str
49 |   message: str
50 | 
51 | 
52 | @dataclasses.dataclass(frozen=True)
53 | class ParseResults:
54 |   """Data class holding all parsing results (function/line chunk, errors)."""
55 |   function_chunks: Sequence[FunctionChunkBase]
56 |   line_chunk: LineChunkBase
57 |   parse_errors: Sequence[ParseError]
58 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/cpp/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Bazel build rules for Vanir parser core.
 8 | load("@rules_python//python:defs.bzl", "py_library", "py_test")
 9 | load("@vanir_deps//:requirements.bzl", "requirement")
10 | 
11 | package(default_visibility = [
12 |     "//visibility:public",
13 | ])
14 | 
15 | cc_library(
16 |     name = "parser_core",
17 |     srcs = ["parser_core.cc"],
18 |     hdrs = ["parser_core.h"],
19 |     copts = [
20 |         # fexception is needed only for catching Antlr4 exceptions.
21 |         "-fexceptions",
22 |     ],
23 |     features = ["-use_header_modules"],
24 |     deps = [
25 |         "@com_google_absl//absl/status",
26 |         "@com_google_absl//absl/status:statusor",
27 |         "@com_google_absl//absl/strings",
28 |         "@fuzzyc//:fuzzyc_cc_function",
29 |         "@fuzzyc//:fuzzyc_cc_module",
30 |     ],
31 | )
32 | 
33 | py_library(
34 |     name = "cpp_parser",
35 |     srcs = ["cpp_parser.py"],
36 |     data = [
37 |         "//vanir/language_parsers/cpp/python:parser_core.so",
38 |         "@pybind11_abseil//pybind11_abseil:status.so",
39 |     ],
40 |     deps = [
41 |         "//vanir/language_parsers:abstract_language_parser",
42 |         "//vanir/language_parsers:common",
43 |         requirement("absl-py"),
44 |     ],
45 | )
46 | 
47 | py_test(
48 |     name = "cpp_parser_test",
49 |     size = "small",
50 |     srcs = ["cpp_parser_test.py"],
51 |     data = ["@pybind11_abseil//pybind11_abseil:status.so"],
52 |     deps = [
53 |         ":cpp_parser",
54 |         requirement("absl-py"),
55 |     ],
56 | )
57 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/cpp/cpp_parser.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Vanir C/C++ parser.
  8 | 
  9 | This module interfaces with the native Antlr FuzzyC parser.
 10 | """
 11 | import os
 12 | import tempfile
 13 | from typing import Iterable, Optional, Sequence, Tuple
 14 | 
 15 | from absl import logging
 16 | from vanir.language_parsers import abstract_language_parser
 17 | from vanir.language_parsers import common
 18 | from vanir.language_parsers.cpp.python import parser_core
 19 | 
 20 | from pybind11_abseil import status
 21 | 
 22 | _ANTLR4_DECODE_ERROR = 'UTF-8 string contains an illegal byte sequence'
 23 | _ALTNERNATIVE_ENCODINGS = ['LATIN-1']
 24 | 
 25 | 
 26 | class CppParser(abstract_language_parser.AbstractLanguageParser):
 27 |   """Vanir C/C++ parser.
 28 | 
 29 |   This class implements the AbstractLanguageParser base class.
 30 |   """
 31 | 
 32 |   def __init__(self, filename: str):
 33 |     try:
 34 |       self.parser_core = parser_core.ParserCore(filename)
 35 |       self.parser_core.init()
 36 |     except status.StatusNotOk as e:
 37 |       if (
 38 |           e.code == status.StatusCode.INVALID_ARGUMENT.value
 39 |           and e.message == _ANTLR4_DECODE_ERROR
 40 |       ):
 41 |         # If encoding problem, try again after converting to UTF-8.
 42 |         logging.info('%s is not encoded in UTF-8. Trying altneratives.')
 43 |         self._temp_filename = self._convert_to_utf8(filename)
 44 |         self.parser_core = parser_core.ParserCore(self._temp_filename)
 45 |         self.parser_core.init()
 46 |       else:
 47 |         raise e
 48 | 
 49 |   def __del__(self):
 50 |     if getattr(self, '_temp_filename', None):
 51 |       os.unlink(self._temp_filename)
 52 | 
 53 |   @classmethod
 54 |   def get_supported_extensions(cls) -> Iterable[str]:
 55 |     return ['.c', '.h', '.cc', '.hh', '.cpp', '.hpp', '.cxx', '.hxx']
 56 | 
 57 |   @classmethod
 58 |   def _convert_to_utf8(cls, filename) -> str:
 59 |     """Creates a new file with UTF-8 encoding and returns the file name."""
 60 |     for encoding in _ALTNERNATIVE_ENCODINGS:
 61 |       try:
 62 |         with open(filename, encoding=encoding, mode='r') as file:
 63 |           new_file = tempfile.NamedTemporaryFile(
 64 |               encoding='UTF-8', mode='w', delete=False
 65 |           )
 66 |           new_file.write(file.read())
 67 |           new_file.close()
 68 |           return new_file.name
 69 |       except ValueError:  # Try other encodings on decoding failure
 70 |         continue
 71 |     raise ValueError(
 72 |         'Failed to deocde %s. Tried encodings: UTF-8, %s'
 73 |         % (filename, ', '.join(_ALTNERNATIVE_ENCODINGS))
 74 |     )
 75 | 
 76 |   def _to_standard_function_chunk_base(
 77 |       self, chunk: parser_core.FunctionChunkRaw
 78 |   ) -> common.FunctionChunkBase:
 79 |     return common.FunctionChunkBase(
 80 |         chunk.name,
 81 |         [chunk.return_type],
 82 |         chunk.parameters,
 83 |         chunk.used_data_types,
 84 |         chunk.local_variables,
 85 |         chunk.called_functions,
 86 |         chunk.tokens,
 87 |     )
 88 | 
 89 |   def get_chunks(
 90 |       self,
 91 |       affected_line_ranges_for_functions: Optional[
 92 |           Sequence[Tuple[int, int]]
 93 |       ] = None,
 94 |   ) -> common.ParseResults:
 95 |     if affected_line_ranges_for_functions is None:
 96 |       affected_line_ranges_for_functions = []
 97 |     function_chunks = [
 98 |         self._to_standard_function_chunk_base(function_chunk_raw)
 99 |         for function_chunk_raw in self.parser_core.get_function_chunks(
100 |             affected_line_ranges_for_functions)
101 |     ]
102 |     line_chunk = common.LineChunkBase(self.parser_core.get_line_chunk().tokens)
103 |     errors = [
104 |         common.ParseError(e.line, e.column, e.bad_token, e.message)
105 |         for e in self.parser_core.get_parse_errors()
106 |     ]
107 |     return common.ParseResults(function_chunks, line_chunk, errors)
108 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/cpp/cpp_parser_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Tests for parser_core Pybind wrapped by cpp_parser."""
  8 | 
  9 | from unittest import mock
 10 | 
 11 | from absl import logging
 12 | from vanir.language_parsers.cpp import cpp_parser
 13 | 
 14 | from absl.testing import absltest
 15 | from pybind11_abseil import status
 16 | 
 17 | 
 18 | class ParserCoreTest(absltest.TestCase):
 19 | 
 20 |   def setUp(self):
 21 |     super().setUp()
 22 |     self.testcode = """
 23 |       /* This code is for testing Vanir Parser. */ int test_globalvar = 10;
 24 |       int test_func1(const unsigned int64 test_arg) {
 25 |         const struct teststruct *testvar = (struct teststruct *)globalvar->data;
 26 |         test_func2(test_arg, testvar);  // some comment.
 27 |         /* additional comment line. */
 28 |         0xe8();  // broken code -- won't be counted as func call.
 29 |         return 0;
 30 |       }
 31 |       void test_func_decl(int myarg);
 32 |       void test_func_def(int myarg) {}
 33 |       """
 34 |     testfile = self.create_tempfile('testfile.c', content=self.testcode)
 35 |     self.test_filename = testfile.full_path
 36 |     self.expected_tokens = {
 37 |         2: ['int', 'test_globalvar', '=', '10', ';'],
 38 |         3: [
 39 |             'int', 'test_func1', '(', 'const', 'unsigned', 'int64', 'test_arg',
 40 |             ')', '{'
 41 |         ],
 42 |         4: [
 43 |             'const', 'struct', 'teststruct', '*', 'testvar', '=', '(', 'struct',
 44 |             'teststruct', '*', ')', 'globalvar', '->', 'data', ';'
 45 |         ],
 46 |         5: ['test_func2', '(', 'test_arg', ',', 'testvar', ')', ';'],
 47 |         7: ['0xe8', '(', ')', ';'],
 48 |         8: ['return', '0', ';'],
 49 |         9: ['}'],
 50 |         10: ['void', 'test_func_decl', '(', 'int', 'myarg', ')', ';'],
 51 |         11: ['void', 'test_func_def', '(', 'int', 'myarg', ')', '{', '}']
 52 |     }
 53 | 
 54 |   def test_cpp_parser_with_line_limits(self):
 55 |     parser = cpp_parser.CppParser(self.test_filename)
 56 | 
 57 |     results = parser.get_chunks([(5, 7)])
 58 |     self.assertEmpty(results.parse_errors)
 59 |     self.assertLen(results.function_chunks, 1)
 60 |     self.assertEqual(results.function_chunks[0].name, 'test_func1')
 61 |     self.assertEqual(results.function_chunks[0].return_types, [['int']])
 62 |     self.assertEqual(results.function_chunks[0].parameters, ['test_arg'])
 63 |     self.assertEqual(
 64 |         results.function_chunks[0].used_data_types,
 65 |         [['const', 'unsigned', 'int64'], ['const', 'struct', 'teststruct'],
 66 |          ['struct', 'teststruct']])
 67 |     self.assertEqual(results.function_chunks[0].local_variables, ['testvar'])
 68 |     self.assertEqual(
 69 |         results.function_chunks[0].called_functions, ['test_func2'])
 70 | 
 71 |     self.assertEqual(results.line_chunk.tokens, self.expected_tokens)
 72 | 
 73 |   def test_cpp_parser_without_line_limits(self):
 74 |     parser = cpp_parser.CppParser(self.test_filename)
 75 | 
 76 |     results = parser.get_chunks()
 77 |     self.assertEmpty(results.parse_errors)
 78 |     self.assertLen(results.function_chunks, 2)
 79 |     self.assertEqual(results.function_chunks[0].name, 'test_func1')
 80 |     self.assertEqual(results.function_chunks[0].parameters, ['test_arg'])
 81 |     self.assertEqual(
 82 |         results.function_chunks[0].used_data_types,
 83 |         [['const', 'unsigned', 'int64'], ['const', 'struct', 'teststruct'],
 84 |          ['struct', 'teststruct']])
 85 |     self.assertEqual(results.function_chunks[0].local_variables, ['testvar'])
 86 |     self.assertEqual(
 87 |         results.function_chunks[0].called_functions, ['test_func2'])
 88 |     self.assertEqual(results.function_chunks[1].name, 'test_func_def')
 89 |     self.assertEqual(results.function_chunks[1].return_types, [['void']])
 90 |     self.assertEqual(results.function_chunks[1].parameters, ['myarg'])
 91 | 
 92 |     self.assertEqual(results.line_chunk.tokens, self.expected_tokens)
 93 | 
 94 |   def test_cpp_parser_with_nonexistent_file_failure(self):
 95 |     filename = 'NonExistingFile.c'
 96 |     with self.assertRaisesRegex(status.StatusNotOk, 'Failed to open file:.*'):
 97 |       _ = cpp_parser.CppParser(filename)
 98 | 
 99 |   def test_cpp_parser_with_non_utf8_file(self):
100 |     latin1_str = '  // \xE0'
101 |     testfile = self.create_tempfile(
102 |         'testfile_latein1.c',
103 |         content=self.testcode + latin1_str,
104 |         encoding='LATIN-1',
105 |     )
106 |     with self.assertLogs(level=logging.INFO) as logs:
107 |       parser = cpp_parser.CppParser(testfile.full_path)
108 |       results = parser.get_chunks([(5, 7)])
109 |       self.assertEmpty(results.parse_errors)
110 |       self.assertLen(results.function_chunks, 1)
111 |     self.assertIn(
112 |         'is not encoded in UTF-8. Trying altneratives.', logs.output[0]
113 |     )
114 | 
115 |   def test_cpp_parser_with_known_encoding_file(self):
116 |     latin1_str = '  // \xE0'
117 |     testfile = self.create_tempfile(
118 |         'testfile_latein1.c',
119 |         content=self.testcode + latin1_str,
120 |         encoding='LATIN-1',
121 |     )
122 |     # Delete latin-1 from the alternative encoding.
123 |     with mock.patch.object(cpp_parser, '_ALTNERNATIVE_ENCODINGS', []):
124 |       with self.assertRaisesRegex(ValueError, 'Failed to deocde'):
125 |         cpp_parser.CppParser(testfile.full_path)
126 | 
127 | 
128 | if __name__ == '__main__':
129 |   absltest.main()
130 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/cpp/python/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Build rule for binding C++ parser core to python using Pybind.
 8 | 
 9 | load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
10 | 
11 | package(default_visibility = [
12 |     "//visibility:public",
13 | ])
14 | 
15 | pybind_extension(
16 |     name = "parser_core",
17 |     srcs = ["parser_core.cc"],
18 |     deps = [
19 |         "//vanir/language_parsers/cpp:parser_core",
20 |         "@pybind11_abseil//pybind11_abseil:absl_casters",
21 |         "@pybind11_abseil//pybind11_abseil:status_casters",
22 |     ],
23 | )
24 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/cpp/python/parser_core.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Google LLC
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | #include "vanir/language_parsers/cpp/parser_core.h"
 8 | 
 9 | #include "pybind11/pybind11.h"
10 | #include "pybind11_abseil/absl_casters.h"
11 | #include "pybind11_abseil/status_casters.h"
12 | 
13 | namespace vanir {
14 | namespace cpp_parser {
15 | namespace {
16 | 
17 | PYBIND11_MODULE(parser_core, m) {
18 |   pybind11::google::ImportStatusModule();
19 |   pybind11::class_<FunctionChunk>(m, "FunctionChunkRaw")
20 |       .def_readwrite("name", &FunctionChunk::name_)
21 |       .def_readwrite("return_type", &FunctionChunk::return_type_)
22 |       .def_readwrite("parameters", &FunctionChunk::parameters_)
23 |       // Since individual data type element can be used after freeing the owner
24 |       // chunk in Python, access to used_data_types_ transfers the ownership of
25 |       // individual data type element to Python.
26 |       .def_property_readonly("used_data_types",
27 |            [](const FunctionChunk& func_chunk) {
28 |              auto used_data_types = pybind11::list();
29 |              for (auto& data_type : func_chunk.used_data_types_) {
30 |                used_data_types.append(pybind11::cast(
31 |                    *data_type, pybind11::return_value_policy::take_ownership));
32 |              }
33 |              return used_data_types;
34 |            })
35 |       .def_readwrite("local_variables", &FunctionChunk::local_variables_)
36 |       .def_readwrite("called_functions", &FunctionChunk::called_functions_)
37 |       .def_readwrite("tokens", &FunctionChunk::tokens_);
38 | 
39 |   pybind11::class_<LineChunk>(m, "LineChunkRaw")
40 |       .def_readwrite("tokens", &LineChunk::tokens_);
41 | 
42 |   pybind11::class_<ParseError>(m, "ParseErrorRaw")
43 |       .def_readonly("line", &ParseError::line)
44 |       .def_readonly("column", &ParseError::column)
45 |       .def_readonly("bad_token", &ParseError::bad_token)
46 |       .def_readonly("message", &ParseError::message);
47 | 
48 |   // GetFunctionChunks and GetLineChunk transfer the ownership.
49 |   pybind11::class_<ParserCore>(m, "ParserCore")
50 |       .def(pybind11::init<std::string>())
51 |       .def("init", &ParserCore::Init)
52 |       .def("get_function_chunks", &ParserCore::GetFunctionChunks,
53 |            pybind11::arg("affected_line_ranges"))
54 |       .def("get_line_chunk", &ParserCore::GetLineChunk)
55 |       .def("get_parse_errors", &ParserCore::GetParseErrors);
56 | }
57 | 
58 | }  // namespace
59 | }  // namespace cpp_parser
60 | }  // namespace vanir
61 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/java/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | load("@rules_python//python:defs.bzl", "py_library", "py_test")
 8 | load("@vanir_deps//:requirements.bzl", "requirement")
 9 | load(":antlr4.external.bzl", "antlr4_cc_gen")
10 | 
11 | package(default_visibility = [
12 |     "//visibility:public",
13 | ])
14 | 
15 | ANTLR4_VER = "4.11.1"
16 | 
17 | antlr4_cc_gen(
18 |     name = "java_cc_lexer",
19 |     srcs = ["@antlr4_grammar_java_lexer_g4//file"],
20 |     antlr4_ver = ANTLR4_VER,
21 |     cc_files_prefix = "JavaLexer",
22 |     cc_namespace = "java_cc_lexer",
23 |     listener = False,
24 | )
25 | 
26 | antlr4_cc_gen(
27 |     name = "java_cc_parser",
28 |     srcs = [
29 |         "@antlr4_grammar_java_lexer_g4//file",
30 |         "@antlr4_grammar_java_parser_g4//file",
31 |     ],
32 |     antlr4_ver = ANTLR4_VER,
33 |     cc_files_prefix = "JavaParser",
34 |     cc_namespace = "java_cc_parser",
35 |     listener = True,
36 | )
37 | 
38 | cc_library(
39 |     name = "parser_core",
40 |     srcs = ["parser_core.cc"],
41 |     hdrs = ["parser_core.h"],
42 |     deps = [
43 |         ":java_cc_lexer",
44 |         ":java_cc_parser",
45 |         "@com_google_absl//absl/log:check",
46 |         "@com_google_absl//absl/status:statusor",
47 |     ],
48 | )
49 | 
50 | py_library(
51 |     name = "java_parser",
52 |     srcs = ["java_parser.py"],
53 |     data = [
54 |         "//vanir/language_parsers/java/python:parser_core.so",
55 |     ],
56 |     deps = [
57 |         "//vanir/language_parsers:abstract_language_parser",
58 |         "//vanir/language_parsers:common",
59 |     ],
60 | )
61 | 
62 | py_test(
63 |     name = "java_parser_test",
64 |     size = "small",
65 |     srcs = ["java_parser_test.py"],
66 |     data = ["@pybind11_abseil//pybind11_abseil:status.so"],
67 |     deps = [
68 |         ":java_parser",
69 |         requirement("absl-py"),
70 |     ],
71 | )
72 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/java/antlr4.external.bzl:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Starlark macro to wrap Antlr4 code and library generation from grammar files."""
 8 | 
 9 | load("@antlr4_deps//:requirements.bzl", "requirement")
10 | 
11 | def antlr4_cc_gen(name, srcs, cc_namespace, cc_files_prefix, antlr4_ver, listener):
12 |     """Generates the C++ source corresponding to an Antlr4 lexer definition.
13 | 
14 |     Args:
15 |         name: name of the parser/lexer library target
16 |         srcs: grammar files
17 |         cc_namespace: C++ namespace to put the parser/lexer under
18 |         cc_files_prefix: prefix for all generated C++ files
19 |         antlr4_ver: specify antlr4 tools version
20 |         listener: whether to generate antlr4 listener classes
21 |     """
22 | 
23 |     out_src_files = [
24 |         "%s.h" % cc_files_prefix,
25 |         "%s.cpp" % cc_files_prefix,
26 |     ]
27 |     if listener:
28 |         out_src_files += [
29 |             "%sBaseListener.h" % cc_files_prefix,
30 |             "%sBaseListener.cpp" % cc_files_prefix,
31 |             "%sListener.h" % cc_files_prefix,
32 |             "%sListener.cpp" % cc_files_prefix,
33 |         ]
34 |     extra_args = "-listener" if listener else "-no-listener"
35 |     cmd = (
36 |         "VANIR_ANTLR_TMPDIR=$$(mktemp -d);" +
37 |         "$(locations @antlr4_entry_points//:antlr4) " +
38 |         "-v " + antlr4_ver + " " +
39 |         "$(SRCS) " +
40 |         "-no-visitor " +
41 |         "-Dlanguage=Cpp " +
42 |         "-package " + cc_namespace + " " +
43 |         "-o $$VANIR_ANTLR_TMPDIR " +
44 |         "-Xexact-output-dir " +
45 |         extra_args + ";" +
46 |         "cp " + " ".join([("$$VANIR_ANTLR_TMPDIR/" + f) for f in out_src_files]) + " $(@D);" +
47 |         "rm -r $$VANIR_ANTLR_TMPDIR"
48 |     )
49 | 
50 |     native.genrule(
51 |         name = name + "_src",
52 |         srcs = srcs,
53 |         outs = out_src_files,
54 |         cmd = cmd,
55 |         local = True,
56 |         tools = [
57 |             requirement("antlr4-tools"),
58 |             "@antlr4_entry_points//:antlr4",
59 |         ],
60 |     )
61 |     native.cc_library(
62 |         name = name,
63 |         srcs = [(":" + f) for f in out_src_files if f.endswith(".cpp")],
64 |         hdrs = [(":" + f) for f in out_src_files if f.endswith(".h")],
65 |         deps = [
66 |             ":{target}_src".format(target = name),
67 |             "@antlr4_runtimes//:cpp",
68 |         ],
69 |         linkstatic = 1,  
70 |     )
71 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/java/java_parser.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Vanir Java parser.
 8 | 
 9 | This module implements an AbstractLanguageParser that handles all .java files.
10 | """
11 | 
12 | from typing import Iterable, Optional, Sequence, Tuple
13 | 
14 | from vanir.language_parsers import abstract_language_parser
15 | from vanir.language_parsers import common
16 | from vanir.language_parsers.java.python import parser_core
17 | 
18 | 
19 | class JavaParser(abstract_language_parser.AbstractLanguageParser):
20 |   """Vanir Java parser.
21 | 
22 |   This class implements the AbstractLanguageParser base class.
23 |   """
24 | 
25 |   def __init__(self, filename: str):
26 |     self.parser = parser_core.ParserCore(filename)
27 | 
28 |   @classmethod
29 |   def get_supported_extensions(cls) -> Iterable[str]:
30 |     return ['.java']
31 | 
32 |   def get_chunks(
33 |       self,
34 |       affected_line_ranges_for_functions: Optional[
35 |           Sequence[Tuple[int, int]]
36 |       ] = None,
37 |   ) -> common.ParseResults:
38 |     if not affected_line_ranges_for_functions:
39 |       affected_line_ranges_for_functions = []
40 | 
41 |     function_chunks_raw, line_chunk_raw, errors_raw = self.parser.parse(
42 |         affected_line_ranges_for_functions)
43 |     function_chunks = []
44 |     for function_chunk_raw in function_chunks_raw:
45 |       function_chunks.append(
46 |           common.FunctionChunkBase(
47 |               name=function_chunk_raw.name,
48 |               return_types=[function_chunk_raw.return_type],
49 |               parameters=function_chunk_raw.parameters,
50 |               used_data_types=function_chunk_raw.used_data_types,
51 |               local_variables=function_chunk_raw.local_variables,
52 |               called_functions=function_chunk_raw.called_functions,
53 |               tokens=function_chunk_raw.tokens,
54 |           )
55 |       )
56 | 
57 |     errors = []
58 |     for error_raw in errors_raw:
59 |       errors.append(common.ParseError(
60 |           error_raw.line, error_raw.column,
61 |           error_raw.bad_token,
62 |           error_raw.message))
63 | 
64 |     return common.ParseResults(
65 |         function_chunks, common.LineChunkBase(line_chunk_raw.tokens_), errors)
66 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/java/parser_core.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2023 Google LLC
  3 |  *
  4 |  * Use of this source code is governed by a BSD-style
  5 |  * license that can be found in the LICENSE file or at
  6 |  * https://developers.google.com/open-source/licenses/bsd
  7 |  */
  8 | 
  9 | #ifndef VANIR_LANGUAGE_PARSERS_JAVA_PARSER_CORE_H_
 10 | #define VANIR_LANGUAGE_PARSERS_JAVA_PARSER_CORE_H_
 11 | 
 12 | #include <cstddef>
 13 | #include <memory>
 14 | #include <string>
 15 | #include <vector>
 16 | 
 17 | #include "absl/status/statusor.h"
 18 | #include "vanir/language_parsers/java/JavaParser.h"
 19 | #include "vanir/language_parsers/java/JavaParserBaseListener.h"
 20 | 
 21 | namespace vanir {
 22 | namespace java_parser {
 23 | 
 24 | using ::java_cc_parser::JavaParser;
 25 | using ::java_cc_parser::JavaParserBaseListener;
 26 | 
 27 | // Container for a function and its metadata extracted by the parser
 28 | class FunctionChunk {
 29 |  public:
 30 |   explicit FunctionChunk()
 31 |       : line_start_(0), line_stop_(0), start_token_idx_(0), stop_token_idx_(0)
 32 |       {}
 33 | 
 34 |   std::string name_;
 35 |   std::vector<std::string> return_type_;
 36 |   std::vector<std::string> parameters_;
 37 |   std::vector<std::vector<std::string>> used_data_types_;
 38 |   std::vector<std::string> local_variables_;
 39 |   std::vector<std::string> called_functions_;
 40 |   std::vector<std::string> tokens_;
 41 |   size_t line_start_, line_stop_;
 42 |   size_t start_token_idx_, stop_token_idx_;
 43 | 
 44 |  private:
 45 |   FunctionChunk(const FunctionChunk &) = delete;
 46 |   FunctionChunk(FunctionChunk &&) = delete;
 47 |   FunctionChunk &operator=(const FunctionChunk &) = delete;
 48 |   FunctionChunk &operator=(FunctionChunk &&) = delete;
 49 | };
 50 | 
 51 | // LineChunk is a wrapper class for a map from line numbers to all tokens in
 52 | // that line. This is needed instead of a simple type alias because pybind's
 53 | // automatic conversion of wrappers (e.g. unique_ptr) only supports custom
 54 | // types, and not e.g. unordered_map.
 55 | class LineChunk {
 56 |  public:
 57 |   explicit LineChunk() {}
 58 |   std::unordered_map<size_t, std::vector<std::string>> tokens_;
 59 | 
 60 |  private:
 61 |   LineChunk(const LineChunk &) = delete;
 62 |   LineChunk(LineChunk &&) = delete;
 63 |   LineChunk &operator=(const LineChunk &) = delete;
 64 |   LineChunk &operator=(LineChunk &&) = delete;
 65 | };
 66 | 
 67 | // Container for any error encountered during parsing
 68 | struct ParseError {
 69 |   size_t line, column;
 70 |   std::string bad_token;
 71 |   std::string message;
 72 | };
 73 | 
 74 | // Antlr4 parser tree walking listener.
 75 | class FileListener : public JavaParserBaseListener {
 76 |  public:
 77 |   explicit FileListener(
 78 |       antlr4::BufferedTokenStream &tokens,
 79 |       std::vector<std::pair<size_t, size_t>> function_line_ranges = {})
 80 |       : token_stream_(tokens), function_line_ranges_(function_line_ranges) {}
 81 | 
 82 |   void enterMethodDeclaration(JavaParser::MethodDeclarationContext*) override;
 83 |   void exitMethodDeclaration(JavaParser::MethodDeclarationContext*) override;
 84 |   void enterConstructorDeclaration(
 85 |       JavaParser::ConstructorDeclarationContext*) override;
 86 |   void exitConstructorDeclaration(
 87 |       JavaParser::ConstructorDeclarationContext*) override;
 88 | 
 89 |   void enterTypeType(JavaParser::TypeTypeContext*) override;
 90 | 
 91 |   void enterLocalVariableDeclaration(
 92 |       JavaParser::LocalVariableDeclarationContext*) override;
 93 | 
 94 |   void enterMethodCall(JavaParser::MethodCallContext*) override;
 95 |   void enterCreator(JavaParser::CreatorContext*) override;
 96 | 
 97 |   std::vector<std::unique_ptr<FunctionChunk>> GetFunctionChunks();
 98 | 
 99 |  private:
100 |   antlr4::BufferedTokenStream &token_stream_;
101 | 
102 |   // last_type_token_stop_idx_ holds the last token in a typeType, so that we
103 |   // can ignore all other nested typeType, e.g. `ArrayList<Object>` should only
104 |   // manifest as a single used datatype instead of two.
105 |   size_t last_type_token_stop_idx_ = 0;
106 |   std::vector<std::pair<size_t, size_t>> function_line_ranges_;
107 |   std::stack<std::unique_ptr<FunctionChunk>> chunks_stack_;
108 |   std::vector<std::unique_ptr<FunctionChunk>> function_chunks_;
109 | 
110 |   // Helper function to collect various information bits into a FunctionChunk
111 |   // ctx can be either a MethodDeclarationContext or a
112 |   // ConstructorDeclarationContext, both should have all the needed information.
113 |   std::unique_ptr<FunctionChunk> ToFunctionChunk(
114 |       std::variant<JavaParser::MethodDeclarationContext*,
115 |                    JavaParser::ConstructorDeclarationContext*> ctx);
116 | 
117 |   // Pops a function chunk from the chunks stack, checking to make sure the
118 |   // function chunk being popped is the same as the one being processed, then
119 |   // adds the chunk to the list of function_chunks_.
120 |   void PopChunk(size_t start_token_idx, size_t stop_token_idx);
121 | };
122 | 
123 | // Listener for errors during parsing
124 | class ErrorListener : public antlr4::BaseErrorListener {
125 |  public:
126 |   explicit ErrorListener(std::string identifier) : identifier_(identifier) {}
127 |   void syntaxError(
128 |       antlr4::Recognizer* recognizer,
129 |       antlr4::Token* offendingSymbol,
130 |       size_t line, size_t charPositionInLine,
131 |       const std::string &msg,
132 |       std::exception_ptr e) override;
133 | 
134 |   std::vector<std::unique_ptr<ParseError>> errors_;
135 | 
136 |  private:
137 |   const std::string identifier_;
138 | };
139 | 
140 | // Parses Java files and extract functions, metadata, and tokens split by lines.
141 | class ParserCore {
142 |  public:
143 |   // Instantiate Parser for file located at |file_path|
144 |   explicit ParserCore(std::string file_path);
145 | 
146 |   // Parses the given file and returns a list of function chunks and tokens map
147 |   // split by lines.
148 |   // If |affected_line_ranges_for_functions| is empty, parses all functions.
149 |   // Otherwise, parse, only the functions that has at least one line in one of
150 |   // the ranges. Ranges are inclusive.
151 |   absl::StatusOr<std::tuple<
152 |       std::vector<std::unique_ptr<FunctionChunk>>,
153 |       std::unique_ptr<LineChunk>,
154 |       std::vector<std::unique_ptr<ParseError>>
155 |   >>
156 |   Parse(std::vector<std::pair<size_t, size_t>>
157 |         affected_line_ranges_for_functions = {});
158 | 
159 |  private:
160 |   std::string file_path_;
161 | };
162 | 
163 | }  // namespace java_parser
164 | }  // namespace vanir
165 | 
166 | #endif  // VANIR_LANGUAGE_PARSERS_JAVA_PARSER_CORE_H_
167 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/java/python/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Build rule for binding C++ parser core to python using Pybind.
 8 | 
 9 | load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
10 | 
11 | package(default_visibility = [
12 |     "//visibility:public",
13 | ])
14 | 
15 | pybind_extension(
16 |     name = "parser_core",
17 |     srcs = ["parser_core.cc"],
18 |     deps = [
19 |         "//vanir/language_parsers/java:parser_core",
20 |         "@pybind11_abseil//pybind11_abseil:absl_casters",
21 |         "@pybind11_abseil//pybind11_abseil:status_casters",
22 |     ],
23 | )
24 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/java/python/parser_core.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Google LLC
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | #include "vanir/language_parsers/java/parser_core.h"
 8 | 
 9 | #include "pybind11/pybind11.h"
10 | #include "pybind11/stl.h"
11 | #include "pybind11_abseil/absl_casters.h"
12 | #include "pybind11_abseil/status_casters.h"
13 | 
14 | namespace vanir {
15 | namespace java_parser {
16 | namespace {
17 | 
18 | PYBIND11_MODULE(parser_core, m) {
19 |   pybind11::google::ImportStatusModule();
20 |   pybind11::class_<FunctionChunk>(m, "FunctionChunkRaw")
21 |       .def_readonly("name", &FunctionChunk::name_)
22 |       .def_readonly("return_type", &FunctionChunk::return_type_)
23 |       .def_readonly("parameters", &FunctionChunk::parameters_)
24 |       .def_readonly("used_data_types", &FunctionChunk::used_data_types_)
25 |       .def_readonly("local_variables", &FunctionChunk::local_variables_)
26 |       .def_readonly("called_functions", &FunctionChunk::called_functions_)
27 |       .def_readonly("tokens", &FunctionChunk::tokens_)
28 |       .def_readonly("start_line", &FunctionChunk::line_start_)
29 |       .def_readonly("end_line", &FunctionChunk::line_stop_);
30 | 
31 |   pybind11::class_<LineChunk>(m, "LineChunkRaw")
32 |       .def_readonly("tokens_", &LineChunk::tokens_);
33 | 
34 |   pybind11::class_<ParseError>(m, "ParseErrorRaw")
35 |       .def_readonly("line", &ParseError::line)
36 |       .def_readonly("column", &ParseError::column)
37 |       .def_readonly("bad_token", &ParseError::bad_token)
38 |       .def_readonly("message", &ParseError::message);
39 | 
40 |   pybind11::class_<ParserCore>(m, "ParserCore")
41 |       .def(pybind11::init<std::string>())
42 |       .def("parse", &ParserCore::Parse,
43 |            pybind11::arg("affected_line_ranges_for_functions"));
44 | }
45 | 
46 | }  // namespace
47 | }  // namespace java_parser
48 | }  // namespace vanir
49 | 


--------------------------------------------------------------------------------
/vanir/language_parsers/language_parsers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Collection of Parsers to extract function/line blocks from the code snippets.
 8 | 
 9 | This module is the common entry point for parsers of different languages that
10 | Vanir supports. A Language Parser implements the AbstractLanguageParser class,
11 | and which parser to use for a particular file will be selected automatically
12 | by file extensions.
13 | """
14 | 
15 | import os
16 | from typing import Optional, Sequence, Tuple, Type, TypeVar
17 | 
18 | from vanir.language_parsers import abstract_language_parser
19 | from vanir.language_parsers import common
20 | 
21 | # Simply importing the parsers will register them as subclasses of the abstract
22 | # parser class and therefore available for use.
23 | # pylint: disable=unused-import
24 | from vanir.language_parsers.cpp import cpp_parser
25 | from vanir.language_parsers.java import java_parser
26 | # pylint: enable=unused-import
27 | 
28 | _P = TypeVar('_P', bound=abstract_language_parser.AbstractLanguageParser)
29 | 
30 | 
31 | def get_parser_class(filename: str) -> Optional[Type[_P]]:
32 |   """Returns the language parser class that handles the given file, or None."""
33 |   parsers = abstract_language_parser.AbstractLanguageParser.__subclasses__()
34 |   ext = os.path.splitext(filename)[1]
35 |   for parser_class in parsers:
36 |     if ext in parser_class.get_supported_extensions():
37 |       return parser_class
38 |   return None
39 | 
40 | 
41 | def parse_file(
42 |     filename: str,
43 |     functions_line_ranges: Optional[Sequence[Tuple[int, int]]] = None,
44 | ) -> common.ParseResults:
45 |   """Parses the given file and extract function and line chunks.
46 | 
47 |   Args:
48 |     filename: the absolute path to the file to analyze.
49 |     functions_line_ranges: list of line ranges of interest to filter function
50 |       chunks on.
51 | 
52 |   Returns:
53 |     A tuple of function and line chunks extracted by the language parser, as
54 |     well as a Sequence of errors returned by the parser.
55 |   """
56 |   parser_class = get_parser_class(filename)
57 |   if not parser_class:
58 |     raise NotImplementedError(f'File {filename} is not supported.')
59 | 
60 |   return parser_class(filename).get_chunks(functions_line_ranges)
61 | 


--------------------------------------------------------------------------------
/vanir/normalizer_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Tests for normalizer."""
  8 | 
  9 | import json
 10 | 
 11 | from vanir import normalizer
 12 | from vanir.language_parsers import common
 13 | 
 14 | from absl.testing import absltest
 15 | 
 16 | 
 17 | class NormalizerTest(absltest.TestCase):
 18 | 
 19 |   def test_token_trie_insert_entry(self):
 20 |     token_trie = normalizer._TokenTrie()
 21 |     token_trie.insert_entry(['a', 'b', 'c', 'd', 'e'], 'V1')
 22 |     token_trie.insert_entry(['a', 'b'], 'V2')
 23 |     token_trie.insert_entry(['c', 'd', '', 'e'], 'V3')
 24 |     token_trie.insert_entry(['c', 'd'], 'V4')
 25 |     token_trie.insert_entry(['c', 'd'], 'V5')
 26 |     token_trie.insert_entry(['', '', ''], 'invalid entry')
 27 |     token_trie.insert_entry([], 'invalid entry')
 28 | 
 29 |     # Strip off defaultdict wraps using json.
 30 |     internal_trie_dict = json.loads(json.dumps(token_trie._trie))
 31 |     expected_dict = {'a': {'b': {'c': {'d': {'e': {'0': 'V1'}}}, '0': 'V2'}},
 32 |                      'c': {'d': {'e': {'0': 'V3'}, '0': 'V5'}}}
 33 | 
 34 |     self.assertEqual(internal_trie_dict, expected_dict)
 35 | 
 36 |   def test_token_trie_normalized_tokens(self):
 37 |     token_trie = normalizer._TokenTrie()
 38 |     token_trie.insert_entry(['a', 'b', 'c', 'd', 'e'], 'V1')
 39 |     token_trie.insert_entry(['a', 'b'], 'V2')
 40 |     token_trie.insert_entry(['c', 'd', 'e'], 'V3')
 41 |     token_trie.insert_entry(['c', 'd'], 'V4')
 42 | 
 43 |     test_token_stream = 'a b c d e a b c d e a b c a b c d x y z'.split()
 44 |     test_token_stream.insert(5, '')
 45 |     normalized_tokens = ' '.join(
 46 |         token_trie.generate_normalized_tokens(test_token_stream))
 47 |     expected_normalized_tokens = 'V1 V1 V2 c V2 V4 x y z'
 48 |     self.assertEqual(normalized_tokens, expected_normalized_tokens)
 49 | 
 50 |   def test_token_trie_get_next_normalized_token_makes_boundary_error(self):
 51 |     # The exception in _get_next_normalized_token() is not reachable with
 52 |     # public methods, so we directly run the function for this exception test.
 53 |     token_trie = normalizer._TokenTrie()
 54 |     with self.assertRaisesRegex(
 55 |         IndexError, r'Index:3 is out of boundary of tokens \(max:2\)'):
 56 |       token_trie._get_next_normalized_token(['a', 'b', 'c'], index=3)
 57 | 
 58 |   def test_function_chunk_normalization(self):
 59 |     test_chunk_base = common.FunctionChunkBase(
 60 |         name='test_func1',
 61 |         return_types=[['int']],
 62 |         parameters=['test_arg'],
 63 |         used_data_types=[['const', 'unsigned', '', 'int64'],
 64 |                          ['const', 'struct', 'teststruct'],
 65 |                          ['struct', 'teststruct']],
 66 |         local_variables=['testvar'],
 67 |         called_functions=['test_func2'],
 68 |         tokens=(
 69 |             'int test_func1 ( const unsigned int64 test_arg ) { const struct '
 70 |             'teststruct * testvar = ( struct teststruct * ) globalVar -> data '
 71 |             '; test_func2 ( test_arg , testvar ) ; 0xe8 ( ) ; return 0 ; }'
 72 |         ).split(),
 73 |     )
 74 | 
 75 |     normalized_code = normalizer.normalize_function_chunk(test_chunk_base)
 76 | 
 77 |     expected_normalized_code = (
 78 |         'DTYPE FUNCNAME ( const unsigned DTYPE PARAM ) { const DTYPE '
 79 |         '* VAR = ( DTYPE * ) globalvar -> data '
 80 |         '; FUNCCALL ( PARAM , VAR ) ; 0xe8 ( ) ; return 0 ; }')
 81 |     self.assertEqual(normalized_code, expected_normalized_code)
 82 | 
 83 |   def test_line_chunk_normalization(self):
 84 |     test_chunk_base = common.LineChunkBase(
 85 |         tokens={
 86 |             3: ['void', 'testFUNC', '(', 'int64', 'test_arg', ')', '{'],
 87 |             4: [
 88 |                 'printk', '(', 'KERN_INFO', '"', '%d', '"', ',', 'test_arg',
 89 |                 ')', ';', '}'
 90 |             ],
 91 |         },
 92 |     )
 93 | 
 94 |     normalized_code = normalizer.normalize_line_chunk(test_chunk_base)
 95 | 
 96 |     expected_normalized_code = {
 97 |         3: 'void testfunc ( int64 test_arg ) {',
 98 |         4: 'printk ( kern_info " %d " , test_arg ) ; }',
 99 |     }
100 |     self.assertEqual(normalized_code, expected_normalized_code)
101 | 
102 | 
103 | if __name__ == '__main__':
104 |   absltest.main()
105 | 


--------------------------------------------------------------------------------
/vanir/osv_client.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """This module abstracts the usage of OSV APIs for retrieving Android CVEs."""
  8 | 
  9 | import enum
 10 | import io
 11 | import json
 12 | from typing import Any, Dict, Optional, Sequence
 13 | import zipfile
 14 | 
 15 | from absl import flags
 16 | import requests
 17 | 
 18 | 
 19 | _OSV_PROD_URL_BASE = 'https://api.osv.dev/v1/'
 20 | 
 21 | _OSV_API_KEY = 'AIzaSyAVJKt1YY0yNWHz2TBZU6Hj1nAJq1O-9Gc'
 22 | 
 23 | _OSV_QUERY_POSTFIX = 'query'
 24 | _OSV_VULNERABILITY_POSTFIX = 'vulns'
 25 | 
 26 | _OSV_VULNERABILITIES = 'vulns'
 27 | _OSV_NEXT_PAGE_TOKEN = 'next_page_token'
 28 | _ANDROID_ECOSYSTEM = 'Android'
 29 | 
 30 | _ANDROID_COMPONENT_KERNEL = ':linux_kernel:'
 31 | 
 32 | _KNOWN_SOC_SUBCOMPONENTS = (
 33 |     'AMLogic',
 34 |     'ARM',
 35 |     'Broadcom',
 36 |     'MediaTek',
 37 |     'Marvell',
 38 |     'NVIDIA',
 39 |     'Qualcomm',
 40 |     'Unisoc',
 41 | )
 42 | 
 43 | ANDROID_KERNEL_PACKAGES = (_ANDROID_COMPONENT_KERNEL,) + tuple(
 44 |     _ANDROID_COMPONENT_KERNEL + soc_subcomponent
 45 |     for soc_subcomponent in _KNOWN_SOC_SUBCOMPONENTS)
 46 | 
 47 | _OSV_LINK_PREFIX = 'https://osv.dev/vulnerability/'
 48 | 
 49 | # https://google.github.io/osv.dev/data/#data-dumps
 50 | _OSV_ZIP_URL = (
 51 |     'https://osv-vulnerabilities.storage.googleapis.com/{ecosystem}/all.zip'
 52 | )
 53 | 
 54 | 
 55 | def get_osv_url(osv_id: str) -> str:
 56 |   """Returns public OSV URL for the given OSV entry."""
 57 |   return _OSV_LINK_PREFIX + osv_id
 58 | 
 59 | 
 60 | class OsvClient:
 61 |   """Class to abstract OSV APIs for retrieving Android CVEs."""
 62 | 
 63 |   def __init__(self, session: Optional[requests.sessions.Session] = None):
 64 |     if not session:
 65 |       session = requests.session()
 66 |     self._session = session
 67 |     self._osv_url_base = _OSV_PROD_URL_BASE
 68 | 
 69 |   def get_vuln(self, osv_id: str) -> Dict[str, Any]:
 70 |     """Retrieve specific vulnerability for the given OSV ID from OSV."""
 71 |     osv_vulnerability_url = '%s%s/%s?key=%s' % (
 72 |         self._osv_url_base,
 73 |         _OSV_VULNERABILITY_POSTFIX,
 74 |         osv_id,
 75 |         _OSV_API_KEY,
 76 |     )
 77 |     response = self._session.get(osv_vulnerability_url)
 78 |     return json.loads(response.text)
 79 | 
 80 |   def get_vulns_for_packages(
 81 |       self, ecosystem: str, package_names: Sequence[str]
 82 |   ) -> list[Dict[str, Any]]:
 83 |     """Retrieve all vulns in the given ecosystem and package list from OSV."""
 84 |     vulnerabilities = []
 85 |     for package_name in package_names:
 86 |       osv_query_url = '%s%s?key=%s' % (
 87 |           self._osv_url_base,
 88 |           _OSV_QUERY_POSTFIX,
 89 |           _OSV_API_KEY,
 90 |       )
 91 |       payload = {
 92 |           'package': {
 93 |               'ecosystem': ecosystem,
 94 |               'name': package_name,
 95 |           }
 96 |       }
 97 |       while True:
 98 |         response = self._session.post(osv_query_url, data=json.dumps(payload))
 99 |         osv_data = json.loads(response.text)
100 |         vulnerabilities += osv_data.get(_OSV_VULNERABILITIES, [])
101 |         next_page_token = osv_data.get(_OSV_NEXT_PAGE_TOKEN, None)
102 |         if not next_page_token:
103 |           break
104 |         payload['page_token'] = next_page_token
105 | 
106 |     return vulnerabilities
107 | 
108 |   def get_vulns_for_ecosystem(self, ecosystem: str) -> list[Dict[str, Any]]:
109 |     """Retrieve all vulns in the given ecosystem from OSV."""
110 |     vulnerabilities = []
111 |     response = self._session.get(_OSV_ZIP_URL.format(ecosystem=ecosystem))
112 |     response.raise_for_status()
113 |     zip_file = zipfile.ZipFile(io.BytesIO(response.content))
114 |     for filename in zip_file.namelist():
115 |       if filename.endswith('.json'):
116 |         vulnerabilities.append(json.loads(zip_file.read(filename)))
117 |     return vulnerabilities
118 | 


--------------------------------------------------------------------------------
/vanir/overwrite_specs_validity_test.py:
--------------------------------------------------------------------------------
  1 | """The test to validate the overwrite specs file.
  2 | 
  3 | Use this test to validate the overwrite specs file.
  4 | The test makes sure:
  5 | 1. The overwrite specs are valid.
  6 | 2. 'Reason' field is provided.
  7 | 3. Each spec matches a vulnerability (if signature files are provided).
  8 | 4. Each 'replace' operation has a JSON Path that yields a match (if signature
  9 | files are provided).
 10 | 
 11 | For future reference, each overwrite spec should include a reason for the
 12 | overwrite.
 13 | 
 14 | If you provide a list of signature files, the test will also verify that each
 15 | spec has a matching vulnerability ID, ensuring accuracy and preventing typos.
 16 | The test will verify that each 'replace' operation has a valid
 17 | JSONPath expression that yields a match, ensuring the expression is not
 18 | mistyped. Note that empty matches at runtime do not raise an error.
 19 | """
 20 | 
 21 | from collections.abc import Mapping
 22 | from collections.abc import Sequence
 23 | import json
 24 | import logging
 25 | from typing import Any
 26 | from absl import flags
 27 | from vanir import vulnerability_overwriter
 28 | from absl.testing import absltest
 29 | 
 30 | _OVERWRITE_SPECS = flags.DEFINE_string(
 31 |     'overwrite_specs',
 32 |     None,
 33 |     'Path to a file containing vulnerability overwrite specs. The file should '
 34 |     'be a JSON array with vulnerability_overwriter.OverwriteSpec objects.',
 35 |     required=True,
 36 | )
 37 | 
 38 | _SIGNATURE_FILES = flags.DEFINE_multi_string(
 39 |     'signature_files',
 40 |     None,
 41 |     'List of files containing vulnerability signatures to ensure that overwrite'
 42 |     ' specs have a matching vulnerability id.',
 43 |     required=False,
 44 | )
 45 | 
 46 | _NO_SIGNATURE_VALIDATION = flags.DEFINE_bool(
 47 |     'no_signature_validation',
 48 |     False,
 49 |     'Skip validation of overwrite specs against signature files.',
 50 |     required=False,
 51 | )
 52 | 
 53 | 
 54 | class OverwriteSpecValidityTest(absltest.TestCase):
 55 | 
 56 |   def _load_from_signature_files(self) -> Sequence[dict[str, Any]]:
 57 |     """Loads signature files and returns a list of OSV vulnerability objects."""
 58 | 
 59 |     vulnerabilities = []
 60 |     if _SIGNATURE_FILES.value:
 61 |       for signature_file in _SIGNATURE_FILES.value:
 62 |         logging.info('Loading vulnerabilities from %s', signature_file)
 63 |         with open(signature_file) as f:
 64 |           signature_file_content = f.read()
 65 |           vulnerabilities.extend(json.loads(signature_file_content))
 66 |     return vulnerabilities
 67 | 
 68 |   def _to_specs_map(
 69 |       self,
 70 |       overwrite_specs: Sequence[vulnerability_overwriter.OverwriteSpec],
 71 |   ) -> Mapping[str, vulnerability_overwriter.OverwriteSpec]:
 72 |     """Converts a list of overwrite specs to a map keyed by vulnerability ID.
 73 | 
 74 |     Args:
 75 |       overwrite_specs: A list of overwrite specs.
 76 | 
 77 |     Returns:
 78 |       A map of overwrite specs, keyed by vulnerability ID.
 79 |     """
 80 |     overwrite_specs_map = {}
 81 |     for spec in overwrite_specs:
 82 |       if spec.vuln_id in overwrite_specs_map:
 83 |         self.fail(
 84 |             'Found multiple specs for vulnerability'
 85 |             f' "{spec.vuln_id}". There should be only one spec for a'
 86 |             ' vulnerability'
 87 |         )
 88 |       else:
 89 |         overwrite_specs_map[spec.vuln_id] = spec
 90 |     return overwrite_specs_map
 91 | 
 92 |   def test_validate_overwrite_spec(self):
 93 | 
 94 |     # Validate overwrite specs during loading and raise an error if
 95 |     # the file or its contents are invalid.
 96 |     overwrite_specs_file_path = _OVERWRITE_SPECS.value
 97 |     overwrite_specs = vulnerability_overwriter.load_overwrite_specs_from_file(
 98 |         overwrite_specs_file_path
 99 |     )
100 | 
101 |     # Need a map to quickly find the spec for a given vulnerability by ID.
102 |     overwrite_specs_map = self._to_specs_map(overwrite_specs)
103 |     self.assertNotEmpty(
104 |         overwrite_specs_map, msg='No overwrite specs found in the file'
105 |     )
106 | 
107 |     vulnerabilities = self._load_from_signature_files()
108 |     if not vulnerabilities or _NO_SIGNATURE_VALIDATION.value:
109 |       logging.info('Validated overwrite specs without signature files.')
110 |       return
111 | 
112 |     # Track vulnerabilities that should be overwritten but haven't been yet.
113 |     target_vulnerabilities = set(overwrite_specs_map.keys())
114 |     for vulnerability in vulnerabilities:
115 |       if vulnerability['id'] in overwrite_specs_map:
116 |         target_vulnerabilities.discard(vulnerability['id'])
117 | 
118 |         # Check that the spec has a path yielding a match, since 'overwrite'
119 |         # does not raise an error if no matches are found.
120 |         for replace in overwrite_specs_map[vulnerability['id']].replace:
121 |           if not replace.path.find(vulnerability):
122 |             self.fail(
123 |                 f'Overwrite spec for {vulnerability["id"]} has a path yielding '
124 |                 'no matches.'
125 |             )
126 | 
127 |         # Make sure the spec doesn't cause any runtime errors.
128 |         vulnerability_overwriter.overwrite(
129 |             [vulnerability], [overwrite_specs_map[vulnerability['id']]]
130 |         )
131 |     self.assertEmpty(
132 |         target_vulnerabilities,
133 |         'All overwrite specs are loaded correctly and have required fields, but'
134 |         ' some specs do not match any vulnerabilities in the signature files. '
135 |         'You may suppress this error by setting --no_signature_validation=True.'
136 |         ' Missing vulnerabilities: %s' % target_vulnerabilities,
137 |     )
138 | 
139 | 
140 | if __name__ == '__main__':
141 |   absltest.main()
142 | 


--------------------------------------------------------------------------------
/vanir/parser.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Parser to extract affected function blocks from the code snippets.
 8 | 
 9 | This module abstracts the collection of language parsers for a safer and
10 | simpler use of it.
11 | FunctionChunk and LineChunk are data classes for maintaining data for function
12 | signature and line signature generation. |base| of the continaner classes is a
13 | raw function/line chunk object generated by the language parsers. Data
14 | generated by language parsers can be accessed through |base|, whereas data
15 | generated outside of the language parsers can be updated and accessed through
16 | the other member variables of the data classes.
17 | """
18 | 
19 | from typing import Optional, Sequence, Tuple
20 | 
21 | from absl import logging
22 | from vanir import signature
23 | from vanir.language_parsers import language_parsers
24 | 
25 | 
26 | def is_supported_type(filename: str) -> bool:
27 |   """Returns whether any parser supports named file."""
28 |   return language_parsers.get_parser_class(filename) is not None
29 | 
30 | 
31 | class Parser:
32 |   """Parses a given file and exports code chunks for making signatures."""
33 | 
34 |   def __init__(
35 |       self,
36 |       file_path: str,
37 |       target_file: str,
38 |       affected_line_ranges: Optional[Sequence[Tuple[int, int]]] = None,
39 |   ):
40 |     """Parses the given file and extract function and line chunks.
41 | 
42 |     Args:
43 |       file_path: the absolute path to the file to analyze.
44 |       target_file: path of the signature's target file, relative to the root of
45 |         the target source tree. E.g., arch/x86/pci/irq.c in Linux Kernel. Note
46 |         that this value is used as a label for the chunks generated from the
47 |         parser and their corresponding signatures. |file_path| is not suitable
48 |         as a label value since it is an absolute path in the runtime system and
49 |         can be a temporary file.
50 |       affected_line_ranges: list of the ranges of lines affected by a patch.
51 |         Only function chunks that are affected by at least one line within this
52 |         range will be processed. This does not affect how line chunks are
53 |         processed, i.e. this will still return all line chunks in the file.
54 | 
55 |     Raises:
56 |       StatusNotOk: if failed to open the file at |file_path|.
57 |     """
58 |     if not affected_line_ranges:
59 |       affected_line_ranges = []
60 | 
61 |     results = language_parsers.parse_file(
62 |         file_path,
63 |         functions_line_ranges=affected_line_ranges,
64 |     )
65 |     if results.parse_errors:
66 |       logging.warning(
67 |           'Syntax errors encountered while parsing file "%s" ("%s"): %s',
68 |           file_path, target_file, results.parse_errors)
69 | 
70 |     self._function_chunks = [
71 |         signature.create_function_chunk(chunk_base, target_file)
72 |         for chunk_base in results.function_chunks
73 |     ]
74 |     self._line_chunk = signature.create_line_chunk(
75 |         results.line_chunk, affected_line_ranges, target_file
76 |     )
77 | 
78 |   def get_function_chunks(self) -> Sequence[signature.FunctionChunk]:
79 |     """Gets function chunk list for each function affected by the patch.
80 | 
81 |     If no affected lines are passed, assumes entire file is affected.
82 |     Returns:
83 |       Function Chunk list.
84 |     """
85 |     return self._function_chunks
86 | 
87 |   def get_line_chunk(self) -> signature.LineChunk:
88 |     """Gets the line chunk holding tokens of each line.
89 | 
90 |     Returns:
91 |       Line Chunk object.
92 |     """
93 |     return self._line_chunk
94 | 


--------------------------------------------------------------------------------
/vanir/parser_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Tests for parser."""
  8 | 
  9 | from vanir import parser
 10 | 
 11 | from absl.testing import absltest
 12 | from pybind11_abseil import status
 13 | 
 14 | 
 15 | class ParserTest(absltest.TestCase):
 16 | 
 17 |   def test_parser(self):
 18 |     testcode = """
 19 |       /* This code is for testing Vanir Parser. */ int test_globalvar = 10;
 20 |       int test_func1(const unsigned int64 test_arg) {
 21 |         const struct teststruct *testvar = (struct teststruct *)globalvar->data;
 22 |         test_func2(test_arg, testvar);  // some comment.
 23 |         /* additional comment line. */
 24 |         0xe8();  // broken code -- won't be counted as func call.
 25 |         return 0;
 26 |       }
 27 |       int test_func_decl(int myarg);
 28 |       """
 29 |     testfile = self.create_tempfile('testfile.c', content=testcode)
 30 |     filename = testfile.full_path
 31 |     test_target_file = 'foo/bar/testfile.c'
 32 |     test_parser = parser.Parser(filename, test_target_file)
 33 |     function_chunks = test_parser.get_function_chunks()
 34 |     line_chunk = test_parser.get_line_chunk()
 35 | 
 36 |     self.assertLen(function_chunks, 1)
 37 |     self.assertEqual(function_chunks[0].base.name, 'test_func1')
 38 |     self.assertEqual(function_chunks[0].base.parameters, ['test_arg'])
 39 |     self.assertCountEqual(
 40 |         function_chunks[0].base.used_data_types,
 41 |         [['const', 'unsigned', 'int64'], ['const', 'struct', 'teststruct'],
 42 |          ['struct', 'teststruct']])
 43 |     self.assertEqual(function_chunks[0].base.local_variables, ['testvar'])
 44 |     self.assertEqual(function_chunks[0].base.called_functions, ['test_func2'])
 45 |     self.assertEqual(function_chunks[0].target_file, test_target_file)
 46 |     self.assertIsNotNone(function_chunks[0].normalized_code)
 47 |     self.assertIsNotNone(function_chunks[0].function_hash)
 48 | 
 49 |     expected_tokens = {
 50 |         2: ['int', 'test_globalvar', '=', '10', ';'],
 51 |         3: [
 52 |             'int', 'test_func1', '(', 'const', 'unsigned', 'int64', 'test_arg',
 53 |             ')', '{'
 54 |         ],
 55 |         4: [
 56 |             'const', 'struct', 'teststruct', '*', 'testvar', '=', '(', 'struct',
 57 |             'teststruct', '*', ')', 'globalvar', '->', 'data', ';'
 58 |         ],
 59 |         5: ['test_func2', '(', 'test_arg', ',', 'testvar', ')', ';'],
 60 |         7: ['0xe8', '(', ')', ';'],
 61 |         8: ['return', '0', ';'],
 62 |         9: ['}'],
 63 |         10: ['int', 'test_func_decl', '(', 'int', 'myarg', ')', ';']
 64 |     }
 65 |     self.assertEqual(line_chunk.base.tokens, expected_tokens)
 66 |     self.assertEqual(line_chunk.target_file, test_target_file)
 67 |     self.assertIsNotNone(line_chunk.normalized_code)
 68 |     self.assertIsNotNone(line_chunk.line_hashes)
 69 |     self.assertIsNotNone(line_chunk.used_lines)
 70 | 
 71 |   def test_parser_with_affected_ranges(self):
 72 |     testcode = r"""
 73 |       int unaffected_function1(void) { printk("ok"); }
 74 |       void affected_function1(void) {
 75 |         printk("this line is vulnerable. %s", sensitive_info);
 76 |       }
 77 |       int unaffected_function2(void) { printk("this is fine."); }
 78 |       void affected_function2(void) {
 79 |         printk("this line is also vulnerable. %s", sensitive_info);
 80 |       }
 81 |       int unaffected_function3(void) { printk("this is fine, too."); }
 82 |       void affected_function3(void) {
 83 |         printk("this line is also vulnerable. %s", sensitive_info);
 84 |       }
 85 |       int unaffected_function4(void) { printk("this is fine, too."); }
 86 |       """
 87 |     affected_ranges = [(4, 4), (7, 7), (13, 13)]
 88 |     testfile = self.create_tempfile('testfile.c', content=testcode)
 89 |     filename = testfile.full_path
 90 |     test_target_file = 'foo/bar/testfile.c'
 91 |     test_parser = parser.Parser(filename, test_target_file, affected_ranges)
 92 |     function_chunks = test_parser.get_function_chunks()
 93 |     line_chunk = test_parser.get_line_chunk()
 94 | 
 95 |     self.assertLen(function_chunks, 3)
 96 |     self.assertEqual(function_chunks[0].base.name, 'affected_function1')
 97 |     self.assertEqual(function_chunks[1].base.name, 'affected_function2')
 98 |     self.assertEqual(function_chunks[2].base.name, 'affected_function3')
 99 | 
100 |     expected_tokens = {
101 |         2: [
102 |             'int', 'unaffected_function1', '(', 'void', ')', '{', 'printk', '(',
103 |             '"ok"', ')', ';', '}'
104 |         ],
105 |         3: ['void', 'affected_function1', '(', 'void', ')', '{'],
106 |         4: [
107 |             'printk', '(', '"this line is vulnerable. %s"', ',',
108 |             'sensitive_info', ')', ';'
109 |         ],
110 |         5: ['}'],
111 |         6: [
112 |             'int', 'unaffected_function2', '(', 'void', ')', '{', 'printk', '(',
113 |             '"this is fine."', ')', ';', '}'
114 |         ],
115 |         7: ['void', 'affected_function2', '(', 'void', ')', '{'],
116 |         8: [
117 |             'printk', '(', '"this line is also vulnerable. %s"', ',',
118 |             'sensitive_info', ')', ';'
119 |         ],
120 |         9: ['}'],
121 |         10: [
122 |             'int', 'unaffected_function3', '(', 'void', ')', '{', 'printk', '(',
123 |             '"this is fine, too."', ')', ';', '}'
124 |         ],
125 |         11: ['void', 'affected_function3', '(', 'void', ')', '{'],
126 |         12: [
127 |             'printk', '(', '"this line is also vulnerable. %s"', ',',
128 |             'sensitive_info', ')', ';'
129 |         ],
130 |         13: ['}'],
131 |         14: [
132 |             'int', 'unaffected_function4', '(', 'void', ')', '{', 'printk', '(',
133 |             '"this is fine, too."', ')', ';', '}'
134 |         ]
135 |     }
136 |     self.assertEqual(line_chunk.base.tokens, expected_tokens)
137 |     self.assertEqual(line_chunk.target_file, test_target_file)
138 |     self.assertIsNotNone(line_chunk.normalized_code)
139 |     self.assertIsNotNone(line_chunk.line_hashes)
140 |     self.assertIsNotNone(line_chunk.used_lines)
141 | 
142 |   def test_parser_with_init_failure(self):
143 |     filename = 'NonExistingFile.c'
144 |     test_target_file = 'foo/bar/testfile.c'
145 |     with self.assertRaisesRegex(status.StatusNotOk, 'Failed to open file:.*'):
146 |       parser.Parser(filename, test_target_file)
147 | 
148 | 
149 | if __name__ == '__main__':
150 |   absltest.main()
151 | 


--------------------------------------------------------------------------------
/vanir/reporter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Reporter module for managing Vanir report data structures."""
  8 | 
  9 | import collections
 10 | import dataclasses
 11 | import functools
 12 | import itertools
 13 | from typing import Optional, Sequence, Union
 14 | from vanir import vulnerability_manager
 15 | from vanir.scanners import scanner_base
 16 | 
 17 | 
 18 | @dataclasses.dataclass(frozen=True)
 19 | class Report:
 20 |   """Dataclass to contain an individual finding to report.
 21 | 
 22 |   Each report corresponds to a mapping of one signature and one matched chunk.
 23 | 
 24 |   Attributes:
 25 |     signature_id: unique ID of the matched signature.
 26 |     signature_target_file: original target file of the signature.
 27 |     signature_target_function: original target function of the signature.
 28 |     signature_source: the source of the patch used to generate the signature.
 29 |     unpatched_file: the file matched the signature in the target system.
 30 |     unpatched_function_name: the function matched the signature in the target
 31 |       system.
 32 |     is_non_target_match: whether this matches against a signature's target
 33 |       file, or match against other files in the scanned code.
 34 |   """
 35 | 
 36 |   signature_id: str
 37 |   signature_target_file: str
 38 |   signature_target_function: str
 39 |   signature_source: str
 40 |   unpatched_file: str
 41 |   unpatched_function_name: str
 42 |   is_non_target_match: bool
 43 | 
 44 |   def get_simple_report(
 45 |       self,
 46 |       include_patch_source: bool = False,
 47 |       use_html_link_for_patch_source: bool = False,
 48 |   ) -> str:
 49 |     """Returns unpatched file and optionally unpatched function name."""
 50 |     simple_report = self.unpatched_file
 51 |     if self.unpatched_function_name:
 52 |       simple_report += '::%s()' % self.unpatched_function_name
 53 |     if include_patch_source:
 54 |       if use_html_link_for_patch_source:
 55 |         simple_report += '  (<a href="%s">patch</a>)' % self.signature_source
 56 |       else:
 57 |         simple_report += '  (patch:%s)' % self.signature_source
 58 |     return simple_report
 59 | 
 60 | 
 61 | @dataclasses.dataclass(frozen=True)
 62 | class ReportGroup:
 63 |   """Dataclass for managing multiple reports grouped by a vulnerability ID."""
 64 | 
 65 |   osv_id: str
 66 |   cve_ids: Sequence[str]
 67 |   reports: Sequence[Report]
 68 | 
 69 | 
 70 | class ReportBook:
 71 |   """Class for managing multiple report groups."""
 72 | 
 73 |   def __init__(
 74 |       self,
 75 |       reports: Sequence[Report],
 76 |       vul_manager: vulnerability_manager.VulnerabilityManager,
 77 |   ):
 78 |     """Generates a report book for the given reports."""
 79 |     self._report_group_dict = {}
 80 |     reports_per_vul = collections.defaultdict(list)
 81 |     for report in reports:
 82 |       osv_id = vul_manager.sign_id_to_osv_id(report.signature_id)
 83 |       reports_per_vul[osv_id].append(report)
 84 |     for osv_id, reports in reports_per_vul.items():
 85 |       report_group = ReportGroup(
 86 |           osv_id, vul_manager.osv_id_to_cve_ids(osv_id), reports
 87 |       )
 88 |       self._report_group_dict[osv_id] = report_group
 89 | 
 90 |   @property
 91 |   def unpatched_vulnerabilities(self) -> Sequence[Union[str, None]]:
 92 |     """Returns a list of OSV IDs of vulns reported as not patched."""
 93 |     return list(self._report_group_dict.keys())
 94 | 
 95 |   @functools.cached_property
 96 |   def unpatched_cves(self) -> Sequence[str]:
 97 |     """Returns a list of CVEs reported as not patched."""
 98 |     cves = itertools.chain.from_iterable(
 99 |         [rgroup.cve_ids for rgroup in self._report_group_dict.values()]
100 |     )
101 |     return sorted(set(cves))
102 | 
103 |   def get_report_group(self, osv_id: str) -> Optional[ReportGroup]:
104 |     """Returns a report group mapped to |osv_id|.
105 | 
106 |     Args:
107 |       osv_id: the OSV ID string.
108 | 
109 |     Returns:
110 |       Returns a report group mapped to |osv_id| or None if none matches.
111 |     """
112 |     return self._report_group_dict.get(osv_id)
113 | 
114 | 
115 | def generate_reports(
116 |     findings: scanner_base.Findings
117 | ) -> Sequence[Report]:
118 |   """A helper function to convert a Scanner's Findings to a list of Reports."""
119 |   reports = []
120 |   for sign, chunks in findings.items():
121 |     for chunk in chunks:
122 |       is_non_target_match = not chunk.target_file.endswith(sign.target_file)
123 |       reports.append(
124 |           Report(
125 |               signature_id=sign.signature_id,
126 |               signature_target_file=sign.target_file,
127 |               signature_target_function=getattr(sign, 'target_function', ''),
128 |               signature_source=sign.source,
129 |               unpatched_file=chunk.target_file,
130 |               unpatched_function_name=getattr(chunk.base, 'name', ''),
131 |               is_non_target_match=is_non_target_match,
132 |           )
133 |       )
134 |   return reports
135 | 


--------------------------------------------------------------------------------
/vanir/reporter_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Tests for Reporter."""
 8 | 
 9 | import dataclasses
10 | from unittest import mock
11 | 
12 | from vanir import reporter
13 | from vanir import vulnerability_manager
14 | 
15 | from absl.testing import absltest
16 | 
17 | 
18 | _TEST_SIGN_ID = 'asb-a-test-sign-1234'
19 | _TEST_TARGET_FILE = 'foo/bar/target_file.c'
20 | _TEST_TARGET_FUNC = 'target_func1'
21 | _TEST_SOURCE = 'https://android.googlesource.com/some/test/source'
22 | _TEST_UNPATCHED_FILE = 'foo/bar/unpatched_file.c'
23 | _TEST_UNPATCHED_FUNC = 'unpatched_func1'
24 | _TEST_IS_NON_TARGET_MATCH = True
25 | 
26 | 
27 | class ReporterTest(absltest.TestCase):
28 | 
29 |   def setUp(self):
30 |     super().setUp()
31 |     self._test_report = reporter.Report(
32 |         _TEST_SIGN_ID,
33 |         _TEST_TARGET_FILE,
34 |         _TEST_TARGET_FUNC,
35 |         _TEST_SOURCE,
36 |         _TEST_UNPATCHED_FILE,
37 |         _TEST_UNPATCHED_FUNC,
38 |         _TEST_IS_NON_TARGET_MATCH,
39 |     )
40 | 
41 |   def test_get_simple_report(self):
42 |     expected_simple_report = 'foo/bar/unpatched_file.c::unpatched_func1()'
43 |     self.assertEqual(
44 |         self._test_report.get_simple_report(), expected_simple_report
45 |     )
46 | 
47 |   def test_generate_report_book(self):
48 |     reports = []
49 |     for i in range(10):
50 |       new_sign_id = _TEST_SIGN_ID + str(i)
51 |       new_source = _TEST_SOURCE + str(i)
52 |       report = dataclasses.replace(
53 |           self._test_report,
54 |           signature_id=new_sign_id,
55 |           signature_source=new_source,
56 |       )
57 |       reports.append(report)
58 |     mock_vul_manager = mock.create_autospec(
59 |         vulnerability_manager.VulnerabilityManager, instance=True
60 |     )
61 |     mock_vul_manager.sign_id_to_osv_id.side_effect = (
62 |         lambda sign_id: 'osv-id-%s' % sign_id[-1]
63 |     )
64 |     mock_vul_manager.osv_id_to_cve_ids.side_effect = (
65 |         lambda osv_id: [osv_id.replace('osv', 'cve')]
66 |     )
67 |     test_report_book = reporter.ReportBook(reports, mock_vul_manager)
68 |     expected_unpatched_vuls = ['osv-id-%d' % i for i in range(10)]
69 |     self.assertEqual(
70 |         test_report_book.unpatched_vulnerabilities, expected_unpatched_vuls
71 |     )
72 |     expected_unpatched_cves = ['cve-id-%d' % i for i in range(10)]
73 |     self.assertEqual(test_report_book.unpatched_cves, expected_unpatched_cves)
74 |     embedded_reports = []
75 |     for osv_id in test_report_book.unpatched_vulnerabilities:
76 |       rgroup = test_report_book.get_report_group(osv_id)
77 |       embedded_reports += rgroup.reports
78 |     self.assertCountEqual(embedded_reports, reports)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |   absltest.main()
83 | 


--------------------------------------------------------------------------------
/vanir/scanners/BUILD.bazel:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | # This package contains code related to Vanir scanners, which scans some code
  8 | # against some vulnerability patch signatures, and report the findings.
  9 | 
 10 | load("@rules_python//python:defs.bzl", "py_library", "py_test")
 11 | load("@vanir_deps//:requirements.bzl", "requirement")
 12 | 
 13 | package(default_visibility = ["//visibility:public"])
 14 | 
 15 | py_library(
 16 |     name = "scanner_base",
 17 |     srcs = ["scanner_base.py"],
 18 |     deps = [
 19 |         ":target_selection_strategy",
 20 |         "//:parser",
 21 |         "//:signature",
 22 |         "//:vulnerability",
 23 |         "//:vulnerability_manager",
 24 |         requirement("absl-py"),
 25 |     ],
 26 | )
 27 | 
 28 | py_library(
 29 |     name = "package_scanner",
 30 |     srcs = ["package_scanner.py"],
 31 |     deps = [
 32 |         ":offline_directory_scanner",
 33 |         ":scanner_base",
 34 |         ":target_selection_strategy",
 35 |         "//:vulnerability",
 36 |         "//:vulnerability_manager",
 37 |     ],
 38 | )
 39 | 
 40 | py_library(
 41 |     name = "android_kernel_scanner",
 42 |     srcs = ["android_kernel_scanner.py"],
 43 |     deps = [
 44 |         ":package_scanner",
 45 |         ":scanner_base",
 46 |         ":target_selection_strategy",
 47 |         "//:version_extractor",
 48 |         "//:vulnerability",
 49 |         "//:vulnerability_manager",
 50 |     ],
 51 | )
 52 | 
 53 | py_library(
 54 |     name = "offline_directory_scanner",
 55 |     srcs = ["offline_directory_scanner.py"],
 56 |     deps = [
 57 |         ":scanner_base",
 58 |         ":target_selection_strategy",
 59 |         "//:vulnerability_manager",
 60 |     ],
 61 | )
 62 | 
 63 | py_library(
 64 |     name = "repo_scanner",
 65 |     srcs = ["repo_scanner.py"],
 66 |     deps = [
 67 |         ":package_identifier",
 68 |         ":scanner_base",
 69 |         ":target_selection_strategy",
 70 |         "//:signature",
 71 |         "//:vulnerability",
 72 |         "//:vulnerability_manager",
 73 |     ],
 74 | )
 75 | 
 76 | py_library(
 77 |     name = "target_selection_strategy",
 78 |     srcs = ["target_selection_strategy.py"],
 79 |     deps = [
 80 |         ":package_identifier",
 81 |         "//:parser",
 82 |         "//:signature",
 83 |         "//:truncated_path",
 84 |     ],
 85 | )
 86 | 
 87 | py_library(
 88 |     name = "package_identifier",
 89 |     srcs = ["package_identifier.py"],
 90 |     deps = [
 91 |         "//:truncated_path",
 92 |         "//:vulnerability",
 93 |         "//:vulnerability_manager",
 94 |         requirement("absl-py"),
 95 |     ],
 96 | )
 97 | 
 98 | py_test(
 99 |     name = "scanner_base_test",
100 |     srcs = ["scanner_base_test.py"],
101 |     deps = [
102 |         ":scanner_base",
103 |         ":target_selection_strategy",
104 |         "//:parser",
105 |         "//:reporter",
106 |         "//:signature",
107 |         "//vanir/language_parsers:common",
108 |         requirement("absl-py"),
109 |     ],
110 | )
111 | 
112 | py_test(
113 |     name = "offline_directory_scanner_test",
114 |     srcs = ["offline_directory_scanner_test.py"],
115 |     data = [
116 |         "//vanir/testdata:test_signatures_json",
117 |     ],
118 |     deps = [
119 |         ":offline_directory_scanner",
120 |         ":scanner_base",
121 |         "//:vulnerability_manager",
122 |         requirement("absl-py"),
123 |     ],
124 | )
125 | 
126 | py_test(
127 |     name = "android_kernel_scanner_test",
128 |     srcs = ["android_kernel_scanner_test.py"],
129 |     data = [
130 |         "//vanir/testdata:test_signatures_json",
131 |     ],
132 |     deps = [
133 |         ":android_kernel_scanner",
134 |         ":scanner_base",
135 |         "//:vulnerability_manager",
136 |         requirement("absl-py"),
137 |         requirement("requests"),
138 |     ],
139 | )
140 | 
141 | py_test(
142 |     name = "package_scanner_test",
143 |     srcs = ["package_scanner_test.py"],
144 |     data = [
145 |         "//vanir/testdata:test_signatures_json",
146 |     ],
147 |     deps = [
148 |         ":package_scanner",
149 |         ":scanner_base",
150 |         "//:vulnerability",
151 |         "//:vulnerability_manager",
152 |         requirement("absl-py"),
153 |         requirement("requests"),
154 |     ],
155 | )
156 | 
157 | py_test(
158 |     name = "repo_scanner_test",
159 |     srcs = ["repo_scanner_test.py"],
160 |     data = [
161 |         "//vanir/testdata:test_signatures_json",
162 |     ],
163 |     deps = [
164 |         ":package_identifier",
165 |         ":repo_scanner",
166 |         ":scanner_base",
167 |         ":target_selection_strategy",
168 |         "//:signature",
169 |         "//:vulnerability_manager",
170 |         requirement("absl-py"),
171 |         requirement("requests"),
172 |     ],
173 | )
174 | 
175 | py_test(
176 |     name = "target_selection_strategy_test",
177 |     srcs = ["target_selection_strategy_test.py"],
178 |     deps = [
179 |         ":target_selection_strategy",
180 |         "//:signature",
181 |         requirement("absl-py"),
182 |     ],
183 | )
184 | 
185 | py_test(
186 |     name = "package_identifier_test",
187 |     srcs = ["package_identifier_test.py"],
188 |     data = [
189 |         "//vanir/testdata:test_signatures_json",
190 |     ],
191 |     deps = [
192 |         ":package_identifier",
193 |         "//:truncated_path",
194 |         "//:vulnerability",
195 |         "//:vulnerability_manager",
196 |         requirement("absl-py"),
197 |     ],
198 | )
199 | 


--------------------------------------------------------------------------------
/vanir/scanners/android_kernel_scanner.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Vanir detector scanner that scans Android Linux Kernel vulns.
 8 | """
 9 | 
10 | import dataclasses
11 | from typing import Optional, Sequence, Tuple
12 | 
13 | from absl import logging
14 | from vanir import version_extractor
15 | from vanir import vulnerability
16 | from vanir import vulnerability_manager
17 | from vanir import vulnerability_overwriter
18 | from vanir.scanners import package_scanner
19 | from vanir.scanners import scanner_base
20 | from vanir.scanners import target_selection_strategy
21 | 
22 | 
23 | class AndroidKernelScanner(package_scanner.PackageScanner):
24 |   """Vanir detector scanner that scans Android Linux Kernel vulns.
25 | 
26 |   AndroidKernelScanner is an offline package scanner specialized for Android
27 |   kernel vulnerabilities, which scans the designated directory with kernel
28 |   vuln signatures. Any non-kernel vuln signatures included in the vulnerability
29 |   manager will be ignored.
30 |   """
31 | 
32 |   def __init__(self, code_location):
33 |     super().__init__(
34 |         ecosystem='Android',
35 |         package=vulnerability.MetaPackage.ANDROID_KERNEL,
36 |         code_location=code_location,
37 |     )
38 | 
39 |   @classmethod
40 |   def name(cls):
41 |     return 'android_kernel_scanner'
42 | 
43 |   def scan(
44 |       self,
45 |       strategy: target_selection_strategy.Strategy = (
46 |           target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH
47 |       ),
48 |       override_vuln_manager: Optional[
49 |           vulnerability_manager.VulnerabilityManager
50 |       ] = None,
51 |       extra_vulnerability_filters: Optional[
52 |           Sequence[vulnerability_manager.VulnerabilityFilter]
53 |       ] = None,
54 |       vulnerability_overwrite_specs: Optional[
55 |           Sequence[vulnerability_overwriter.OverwriteSpec]
56 |       ] = None,
57 |   ) -> Tuple[
58 |       scanner_base.Findings,
59 |       scanner_base.ScannedFileStats,
60 |       vulnerability_manager.VulnerabilityManager,
61 |   ]:
62 |     """Run the scan and returns a tuple of Findings and ScannedFileStats."""
63 | 
64 |     logging.info(
65 |         'Scanning %s against Android kernel signatures...', self._code_location)
66 |     findings, stats, vuln_manager = super().scan(
67 |         strategy,
68 |         override_vuln_manager,
69 |         extra_vulnerability_filters,
70 |         vulnerability_overwrite_specs
71 |     )
72 | 
73 |     logging.info('Collecting findings...')
74 |     version_data = {
75 |         'version': version_extractor.extract_version(self._code_location)}
76 |     new_stats = dataclasses.replace(
77 |         stats, scan_metadata={**(stats.scan_metadata or {}), **version_data})
78 |     return findings, new_stats, vuln_manager
79 | 


--------------------------------------------------------------------------------
/vanir/scanners/android_kernel_scanner_test.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | # Copyright 2023 Google LLC
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd
 7 | 
 8 | from unittest import mock
 9 | import requests
10 | 
11 | from vanir import version_extractor
12 | from vanir import vulnerability_manager
13 | from vanir.scanners import android_kernel_scanner
14 | from vanir.scanners import scanner_base
15 | 
16 | from absl.testing import absltest
17 | 
18 | _TESTDATA_DIR = 'vanir/testdata/'
19 | _TEST_SIGNATURES_FILE = _TESTDATA_DIR + 'test_signatures.json'
20 | 
21 | 
22 | class AndroidKernelScannerTest(absltest.TestCase):
23 |   def setUp(self):
24 |     super().setUp()
25 | 
26 |     self._code_location = self.create_tempdir().full_path
27 | 
28 |     self._mock_findings = mock.create_autospec(
29 |         scanner_base.Findings, instance=True)
30 |     self._mock_version = self.enter_context(
31 |         mock.patch.object(
32 |             version_extractor, 'extract_version',
33 |             autospec=True, return_value='6.5.1'))
34 |     self._fake_base_scanner_stats = scanner_base.ScannedFileStats(1, 2, None)
35 |     self._mock_scan = self.enter_context(
36 |         mock.patch.object(
37 |             scanner_base, 'scan', autospec=True,
38 |             return_value=(self._mock_findings, self._fake_base_scanner_stats)))
39 | 
40 |   def test_scan(self):
41 |     override_vuln_manager = vulnerability_manager.generate_from_json_string(
42 |         open(_TEST_SIGNATURES_FILE, mode='rb').read())
43 |     scanner = android_kernel_scanner.AndroidKernelScanner(self._code_location)
44 |     findings, stats, output_vul_manager = scanner.scan(
45 |         override_vuln_manager=override_vuln_manager
46 |     )
47 |     self.assertIs(findings, self._mock_findings)
48 |     self.assertEqual(
49 |         stats, scanner_base.ScannedFileStats(1, 2, {'version': '6.5.1'}))
50 |     self.assertSameElements(
51 |         [sig.signature_id for sig in output_vul_manager.signatures],
52 |         ['ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27'])
53 | 
54 |   @mock.patch.object(requests.sessions, 'Session', autospec=True)
55 |   def test_scan_osv(self, mock_session_class):
56 |     text = b'{"vulns":' + open(_TEST_SIGNATURES_FILE, mode='rb').read() + b'}'
57 |     mock_session_class().post.side_effect = (
58 |         itertools.chain(
59 |             [mock.Mock(text=text)], itertools.repeat(mock.Mock(text=b'{}'))
60 |         )
61 |     )
62 |     scanner = android_kernel_scanner.AndroidKernelScanner(self._code_location)
63 |     _, _, vul_manager = scanner.scan()
64 |     self.assertEqual(
65 |         {sig.signature_id for sig in vul_manager.signatures},
66 |         {'ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27'}
67 |     )
68 | 
69 | 
70 | if __name__ == '__main__':
71 |   absltest.main()
72 | 


--------------------------------------------------------------------------------
/vanir/scanners/offline_directory_scanner.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Vanir detector scanner that scans a given directory against given signatures.
 8 | 
 9 | This scanner requires signatures to be given in vul_file_path.
10 | """
11 | 
12 | from typing import Optional, Sequence, Tuple
13 | 
14 | from absl import logging
15 | from vanir import vulnerability_manager
16 | from vanir import vulnerability_overwriter
17 | from vanir.scanners import scanner_base
18 | from vanir.scanners import target_selection_strategy
19 | 
20 | 
21 | class OfflineDirectoryScanner(scanner_base.ScannerBase):
22 |   """Vanir scanner that scans a directory against a local vulns json file."""
23 | 
24 |   def __init__(self, code_location: str):
25 |     self._code_location = code_location
26 | 
27 |   @classmethod
28 |   def name(cls):
29 |     return 'offline_directory_scanner'
30 | 
31 |   def scan_offline_directory(
32 |       self,
33 |       vuln_manager: vulnerability_manager.VulnerabilityManager,
34 |       strategy: target_selection_strategy.Strategy = (
35 |           target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH
36 |       ),
37 |   ) -> Tuple[scanner_base.Findings, scanner_base.ScannedFileStats]:
38 |     """Scans the local direcotry designated in |_code_location|."""
39 |     return scanner_base.scan(
40 |         self._code_location,
41 |         vuln_manager.signatures,
42 |         strategy=strategy,
43 |     )
44 | 
45 |   def scan(
46 |       self,
47 |       strategy: target_selection_strategy.Strategy = (
48 |           target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH
49 |       ),
50 |       override_vuln_manager: Optional[
51 |           vulnerability_manager.VulnerabilityManager
52 |       ] = None,
53 |       extra_vulnerability_filters: Optional[
54 |           Sequence[vulnerability_manager.VulnerabilityFilter]
55 |       ] = None,
56 |       vulnerability_overwrite_specs: Optional[
57 |           Sequence[vulnerability_overwriter.OverwriteSpec]
58 |       ] = None,
59 |   ) -> Tuple[
60 |       scanner_base.Findings,
61 |       scanner_base.ScannedFileStats,
62 |       vulnerability_manager.VulnerabilityManager,
63 |   ]:
64 |     if override_vuln_manager is None:
65 |       raise ValueError(
66 |           f'{self.name()} requires at least one --vulnerability_file_name')
67 |     vuln_manager = vulnerability_manager.generate_from_managers(
68 |         [override_vuln_manager],
69 |         vulnerability_filters=extra_vulnerability_filters,
70 |     )
71 |     logging.info('Scanning %s against all signatures...', self._code_location)
72 |     findings, stats = self.scan_offline_directory(vuln_manager, strategy)
73 | 
74 |     return findings, stats, vuln_manager
75 | 


--------------------------------------------------------------------------------
/vanir/scanners/offline_directory_scanner_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | from unittest import mock
 8 | 
 9 | from vanir import vulnerability_manager
10 | from vanir.scanners import offline_directory_scanner
11 | from vanir.scanners import scanner_base
12 | 
13 | from absl.testing import absltest
14 | 
15 | _TESTDATA_DIR = 'vanir/testdata/'
16 | _TEST_SIGNATURES_FILE = _TESTDATA_DIR + 'test_signatures.json'
17 | 
18 | 
19 | class OfflineDirectoryScannerTest(absltest.TestCase):
20 |   def setUp(self):
21 |     super().setUp()
22 | 
23 |     self._vul_manager = vulnerability_manager.generate_from_json_string(
24 |         open(_TEST_SIGNATURES_FILE, mode='rb').read())
25 |     self._code_location = self.create_tempdir().full_path
26 | 
27 |     self._mock_findings = mock.create_autospec(
28 |         scanner_base.Findings, instance=True)
29 |     self._mock_stats = mock.create_autospec(
30 |         scanner_base.ScannedFileStats, instance=True)
31 |     self._mock_scan = self.enter_context(
32 |         mock.patch.object(
33 |             scanner_base, 'scan', autospec=True,
34 |             return_value=(self._mock_findings, self._mock_stats)))
35 | 
36 |   def test_scan_missing_flag(self):
37 |     with self.assertRaisesRegex(
38 |         ValueError,
39 |         r'offline_directory_scanner requires at least one '
40 |         r'--vulnerability_file_name'):
41 |       offline_directory_scanner.OfflineDirectoryScanner(
42 |           self._code_location).scan()
43 | 
44 |   def test_scan(self):
45 |     scanner = offline_directory_scanner.OfflineDirectoryScanner(
46 |         self._code_location)
47 |     findings, stats, vul_manager = scanner.scan(
48 |         override_vuln_manager=self._vul_manager)
49 |     self.assertIs(findings, self._mock_findings)
50 |     self.assertIs(stats, self._mock_stats)
51 |     self.assertSetEqual(
52 |         set(sig.signature_id for sig in vul_manager.signatures),
53 |         set(['ASB-A-281018094-2d96898e', 'ASB-A-281018094-97ec235e',
54 |              'ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27']))
55 | 
56 | 
57 | if __name__ == '__main__':
58 |   absltest.main()
59 | 


--------------------------------------------------------------------------------
/vanir/scanners/package_identifier.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Pakcage Identifier identifies maps a directory to a package."""
  8 | 
  9 | import functools
 10 | from typing import Collection, FrozenSet, Optional, Sequence
 11 | 
 12 | from absl import logging
 13 | from vanir import truncated_path
 14 | from vanir import vulnerability_manager
 15 | 
 16 | DEFAULT_TRUNCATED_PATH_LEVEL = 2
 17 | 
 18 | _DEFAULT_TRHESHOLD = 0.5
 19 | _DEFAULT_MIN_PACKAGE_TRUNCATED_PATHS = 5
 20 | 
 21 | 
 22 | class PackageIdentifier:
 23 |   """Class to heuristically identify if a directory belongs to a package."""
 24 | 
 25 |   def __init__(
 26 |       self,
 27 |       vuln_manager: vulnerability_manager.VulnerabilityManager,
 28 |       ecosystem: str,
 29 |   ):
 30 |     # Mapping from a package name to its signatures from vuln manager
 31 |     self._signatures_per_package = {}
 32 |     for package_name in vuln_manager.affected_package_names:
 33 |       self._signatures_per_package[package_name] = (
 34 |           vuln_manager.get_signatures_for_package(ecosystem, package_name)
 35 |       )
 36 | 
 37 |   @functools.lru_cache
 38 |   def get_truncated_paths(
 39 |       self,
 40 |       package_name: str,
 41 |   ) -> FrozenSet[truncated_path.TruncatedPath]:
 42 |     """Returns truncated paths of signatures of the given package."""
 43 |     signatures = self._signatures_per_package.get(package_name)
 44 |     truncated_paths = set()
 45 |     for sign in signatures:
 46 |       if sign.truncated_path_level is not None:
 47 |         level = sign.truncated_path_level
 48 |       else:
 49 |         level = min(
 50 |             DEFAULT_TRUNCATED_PATH_LEVEL,
 51 |             truncated_path.TruncatedPath.get_max_level(sign.target_file)
 52 |         )
 53 |       if truncated_path.TruncatedPath.is_level_ok(sign.target_file, level):
 54 |         truncated_paths.add(
 55 |             truncated_path.TruncatedPath(sign.target_file, level)
 56 |         )
 57 |     return frozenset(truncated_paths)
 58 | 
 59 |   def get_package_name_if_signature_exist(
 60 |       self,
 61 |       package_name: str,
 62 |   ) -> Optional[str]:
 63 |     """Returns package name if any signature under the pkg exists.
 64 | 
 65 |     This function checks if any signatures mapped to |package_name| exist
 66 |     and returns the package name if exists. If the returning package belongs to
 67 |     a meta package, this function will return the meta package instead.
 68 | 
 69 |     Args:
 70 |       package_name: the string package name or meta package to check.
 71 | 
 72 |     Returns:
 73 |       The mapped package name or meta package if corresponding signature exist.
 74 |     """
 75 |     if package_name not in self._signatures_per_package:
 76 |       logging.debug(
 77 |           'Found no signatures registered for package %s',
 78 |           package_name,
 79 |       )
 80 |       return None
 81 |     return package_name
 82 | 
 83 |   def is_package_mapped_to_repo(
 84 |       self,
 85 |       package_name: str,
 86 |       repo_file_list: Sequence[str],
 87 |       threshold: float = _DEFAULT_TRHESHOLD,
 88 |       min_package_truncated_paths: int = _DEFAULT_MIN_PACKAGE_TRUNCATED_PATHS,
 89 |   ) -> bool:
 90 |     """Heuristically checks if a repository maps to the given package.
 91 | 
 92 |     Note that the use of this function is not necessarily limited to
 93 |     repositories. Repository here essentially means a collection of files.
 94 | 
 95 |     Args:
 96 |       package_name: the package name to check.
 97 |       repo_file_list: names of all files in the repository.
 98 |       threshold: the minimum truncated path inclusion rate for a package to be
 99 |         determined as being mapped to the the repository.
100 |       min_package_truncated_paths: the minimum number of truncated paths found
101 |         for the package. If the number of the truncated paths for the package
102 |         found from the signatures is less than this, this function will blindly
103 |         return False.
104 | 
105 |     Returns:
106 |       True if the package is mapped to the repository; False, otherwise.
107 |     """
108 | 
109 |     package_name = self.get_package_name_if_signature_exist(package_name)
110 | 
111 |     package_truncated_paths = self.get_truncated_paths(package_name)
112 | 
113 |     if len(package_truncated_paths) < min_package_truncated_paths:
114 |       logging.debug(
115 |           'Too few truncated paths found for the package %s. '
116 |           'The directory will be regarded as not mapped. '
117 |           'Found: %d. Required: %d.',
118 |           package_name,
119 |           len(package_truncated_paths),
120 |           min_package_truncated_paths,
121 |       )
122 |       return False
123 | 
124 |     inclusion_rate = (
125 |         truncated_path.check_inclusion_rate_of_truncated_paths_in_file_list(
126 |             package_truncated_paths, repo_file_list
127 |         )
128 |     )
129 |     logging.debug('inclusion rate of %s: %f', package_name, inclusion_rate)
130 | 
131 |     return inclusion_rate >= threshold
132 | 
133 |   def packages_for_repo(
134 |       self,
135 |       repo_name: str,
136 |       repo_file_list: Sequence[str],
137 |       threshold: float = _DEFAULT_TRHESHOLD,
138 |       min_package_truncated_paths: int = _DEFAULT_MIN_PACKAGE_TRUNCATED_PATHS,
139 |   ) -> Collection[str]:
140 |     """Returns normalized OSV package names corresponding to this repo.
141 | 
142 |     This function checks which OSV packages the given repo maps to by
143 |     1) checking its repository name maps to any package name in the signatures
144 |     registered in this PackageIdentifier; 2) heuristically checking if the file
145 |     list of this repo maps to one or more known packages.
146 | 
147 |     Note that for OSV package names that belongs to a meta package, the names
148 |     are normalized to their closest meta package
149 |     e.g. ":linux_kernel:Qualcomm" -> ":linux_kernel:".
150 | 
151 |     Args:
152 |       repo_name: Name of the repo as listed in the manifest.
153 |       repo_file_list: list of all files in |repo|.
154 |       threshold: the minimum truncated path inclusion rate for a package to be
155 |         determined as being mapped to the the repository.
156 |       min_package_truncated_paths: the minimum number of truncated paths found
157 |         for this repo to be matched to this package.
158 | 
159 |     Returns:
160 |       Zero or more normalized OSV package name that the given repo maps to.
161 |     """
162 |     packages = set()
163 |     # Many packages use its manifest project name as their package name
164 |     # e.g. "platform/packages/apps/Bluetooth".
165 |     normalized_pkg_name = self.get_package_name_if_signature_exist(repo_name)
166 |     if normalized_pkg_name:
167 |       packages.add(normalized_pkg_name)
168 | 
169 |     # Also heuristically identify the packages that could be mapped to this repo
170 |     packages.update(
171 |         package for package in self._signatures_per_package
172 |         if self.is_package_mapped_to_repo(
173 |             package, repo_file_list, threshold, min_package_truncated_paths
174 |         )
175 |     )
176 |     return packages
177 | 


--------------------------------------------------------------------------------
/vanir/scanners/package_scanner.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Vanir detector scanner that scans vulns pertaining to one OSV package.
 8 | """
 9 | 
10 | from typing import Optional, Sequence, Tuple
11 | 
12 | from absl import logging
13 | from vanir import vulnerability
14 | from vanir import vulnerability_manager
15 | from vanir import vulnerability_overwriter
16 | from vanir.scanners import offline_directory_scanner
17 | from vanir.scanners import scanner_base
18 | from vanir.scanners import target_selection_strategy
19 | 
20 | 
21 | class PackageScanner(offline_directory_scanner.OfflineDirectoryScanner):
22 |   """Scan a code directory against vulns within a single OSV package.
23 | 
24 |   PackageScanner is an offline directory scanner with pacakage filters. I.e.,
25 |   this scanner first filters out all signatures unrelated to the package and
26 |   ecosystem designated in |ecosystem| and |package_name|, and run against
27 |   the offline directory designated in |code_location|.
28 |   """
29 | 
30 |   def __init__(
31 |       self, ecosystem: str, package: scanner_base.Package, code_location: str
32 |   ):
33 |     super().__init__(code_location)
34 |     self._ecosystem = ecosystem
35 |     self._package = package
36 | 
37 |   @classmethod
38 |   def name(cls):
39 |     return 'package_scanner'
40 | 
41 |   def scan(
42 |       self,
43 |       strategy: target_selection_strategy.Strategy = (
44 |           target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH
45 |       ),
46 |       override_vuln_manager: Optional[
47 |           vulnerability_manager.VulnerabilityManager
48 |       ] = None,
49 |       extra_vulnerability_filters: Optional[
50 |           Sequence[vulnerability_manager.VulnerabilityFilter]
51 |       ] = None,
52 |       vulnerability_overwrite_specs: Optional[
53 |           Sequence[vulnerability_overwriter.OverwriteSpec]
54 |       ] = None,
55 |   ) -> Tuple[
56 |       scanner_base.Findings,
57 |       scanner_base.ScannedFileStats,
58 |       vulnerability_manager.VulnerabilityManager,
59 |   ]:
60 |     """Run the scan and returns a tuple of Findings and ScannedFileStats."""
61 |     is_meta_package = isinstance(self._package, vulnerability.MetaPackage)
62 |     if is_meta_package:
63 |       package_vfilter = vulnerability_manager.AffectedPackageNameFilter(
64 |           self._package.package_pattern
65 |       )
66 |     else:
67 |       package_vfilter = vulnerability_manager.AffectedPackageNameFilter(
68 |           self._package
69 |       )
70 |     ecosystem_filter = vulnerability_manager.AffectedEcosystemFilter(
71 |         self._ecosystem
72 |     )
73 |     vfilters = [ecosystem_filter, package_vfilter] + (
74 |         extra_vulnerability_filters or []
75 |     )
76 | 
77 |     if override_vuln_manager is not None:
78 |       vuln_manager = vulnerability_manager.generate_from_managers(
79 |           [override_vuln_manager], vulnerability_filters=vfilters
80 |       )
81 |     else:
82 |       vuln_manager = vulnerability_manager.generate_from_osv(
83 |           self._ecosystem,
84 |           self._package if is_meta_package else [self._package],
85 |           vulnerability_filters=vfilters,
86 |           vulnerability_overwrite_specs=vulnerability_overwrite_specs,
87 |       )
88 | 
89 |     logging.info(
90 |         'Scanning %s against signatures for %s...',
91 |         self._code_location, self._package)
92 |     findings, stats = self.scan_offline_directory(vuln_manager, strategy)
93 | 
94 |     return findings, stats, vuln_manager
95 | 


--------------------------------------------------------------------------------
/vanir/scanners/package_scanner_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | from unittest import mock
  8 | import requests
  9 | 
 10 | from vanir import vulnerability
 11 | from vanir import vulnerability_manager
 12 | from vanir.scanners import package_scanner
 13 | from vanir.scanners import scanner_base
 14 | 
 15 | from absl.testing import absltest
 16 | 
 17 | _TESTDATA_DIR = 'vanir/testdata/'
 18 | _TEST_SIGNATURES_FILE = _TESTDATA_DIR + 'test_signatures.json'
 19 | 
 20 | 
 21 | class PackageScannerTest(absltest.TestCase):
 22 |   def setUp(self):
 23 |     super().setUp()
 24 | 
 25 |     self._vul_manager = vulnerability_manager.generate_from_json_string(
 26 |         open(_TEST_SIGNATURES_FILE, mode='rb').read())
 27 |     self._code_location = self.create_tempdir().full_path
 28 | 
 29 |     self._mock_findings = mock.create_autospec(
 30 |         scanner_base.Findings, instance=True)
 31 |     self._mock_stats = mock.create_autospec(
 32 |         scanner_base.ScannedFileStats, instance=True)
 33 |     self._mock_scan = self.enter_context(
 34 |         mock.patch.object(
 35 |             scanner_base, 'scan', autospec=True,
 36 |             return_value=(self._mock_findings, self._mock_stats)))
 37 | 
 38 |   def test_scan_frameworks_base(self):
 39 |     scanner = package_scanner.PackageScanner(
 40 |         'Android', 'platform/frameworks/base', self._code_location)
 41 |     findings, stats, vul_manager = scanner.scan(
 42 |         override_vuln_manager=self._vul_manager)
 43 |     self.assertIs(findings, self._mock_findings)
 44 |     self.assertIs(stats, self._mock_stats)
 45 |     self.assertSetEqual(
 46 |         set(sig.signature_id for sig in vul_manager.signatures),
 47 |         set(['ASB-A-281018094-2d96898e']))
 48 | 
 49 |   def test_scan_kernel_with_fixed_package_name(self):
 50 |     scanner = package_scanner.PackageScanner(
 51 |         'Android', ':linux_kernel:', self._code_location)
 52 |     findings, stats, vul_manager = scanner.scan(
 53 |                 override_vuln_manager=self._vul_manager)
 54 |     self.assertIs(findings, self._mock_findings)
 55 |     self.assertIs(stats, self._mock_stats)
 56 |     self.assertSetEqual(
 57 |         set(sig.signature_id for sig in vul_manager.signatures),
 58 |         set(['ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27']))
 59 | 
 60 |   def test_scan_kernel_with_metapackage(self):
 61 |     scanner = package_scanner.PackageScanner(
 62 |         'Android',
 63 |         vulnerability.MetaPackage.ANDROID_KERNEL,
 64 |         self._code_location,
 65 |     )
 66 |     findings, stats, vul_manager = scanner.scan(
 67 |         override_vuln_manager=self._vul_manager
 68 |     )
 69 |     self.assertIs(findings, self._mock_findings)
 70 |     self.assertIs(stats, self._mock_stats)
 71 |     self.assertSetEqual(
 72 |         set(sig.signature_id for sig in vul_manager.signatures),
 73 |         set(['ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27']),
 74 |     )
 75 | 
 76 |   def test_scan_with_no_matching_signatures(self):
 77 |     scanner = package_scanner.PackageScanner(
 78 |         'NonExistingEcosystem', ':linux_kernel:', self._code_location
 79 |     )
 80 |     _, _, vul_manager = scanner.scan(override_vuln_manager=self._vul_manager)
 81 |     self.assertEmpty(vul_manager.signatures)
 82 | 
 83 |     scanner = package_scanner.PackageScanner(
 84 |         'Android', 'non/existing/package', self._code_location
 85 |     )
 86 |     _, _, vul_manager = scanner.scan(override_vuln_manager=self._vul_manager)
 87 |     self.assertEmpty(vul_manager.signatures)
 88 | 
 89 |   @mock.patch.object(requests.sessions, 'Session', autospec=True)
 90 |   def test_scan_frameworks_base_from_osv(self, mock_session_class):
 91 |     text = b'{"vulns":' + open(_TEST_SIGNATURES_FILE, mode='rb').read() + b'}'
 92 |     mock_session_class().post.return_value = mock.Mock(text=text)
 93 |     scanner = package_scanner.PackageScanner(
 94 |         'Android', 'platform/frameworks/base', self._code_location
 95 |     )
 96 |     findings, stats, vul_manager = scanner.scan(
 97 |         override_vuln_manager=self._vul_manager)
 98 |     self.assertIs(findings, self._mock_findings)
 99 |     self.assertIs(stats, self._mock_stats)
100 |     self.assertSetEqual(
101 |         set(sig.signature_id for sig in vul_manager.signatures),
102 |         set(['ASB-A-281018094-2d96898e'])
103 |     )
104 | 
105 | 
106 | if __name__ == '__main__':
107 |   absltest.main()
108 | 


--------------------------------------------------------------------------------
/vanir/scanners/target_selection_strategy_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Test for Target Selection Strategy module."""
  8 | 
  9 | import os
 10 | from unittest import mock
 11 | 
 12 | from vanir import signature
 13 | from vanir.scanners import target_selection_strategy
 14 | 
 15 | from absl.testing import absltest
 16 | 
 17 | 
 18 | class TargetSelectionStrategyTest(absltest.TestCase):
 19 | 
 20 |   def setUp(self):
 21 |     super().setUp()
 22 |     self._test_dir = self.create_tempdir()
 23 |     self._test_files = {
 24 |         'exact_match1.c',
 25 |         'foo/exact_match2.c',
 26 |         'prefix/on/sig/without/tp/foo/exact_match2.c',
 27 |         'foo/bar/no_match1.c',
 28 |         'baz/no_match2.c',
 29 |         'unsupported_file_type.txt',  # no count for |total_skipped|.
 30 |         'no_matching/prefix/dirs/foo/bar/truncated_path_match.c',
 31 |     }
 32 |     for file in self._test_files:
 33 |       self._test_dir.create_file(file)
 34 |     self._mock_sign_1 = mock.create_autospec(
 35 |         signature.FunctionSignature,
 36 |         instance=True,
 37 |         signature_id='sign1',
 38 |         source='https://android.googlesource.com/sign1_source',
 39 |         target_file='exact_match1.c',
 40 |         target_function='foo',
 41 |         truncated_path_level=None,
 42 |         signature_type=signature.SignatureType.FUNCTION_SIGNATURE,
 43 |         length=3,
 44 |     )
 45 |     self._mock_sign_2 = mock.create_autospec(
 46 |         signature.LineSignature,
 47 |         instance=True,
 48 |         signature_id='sign2',
 49 |         source='https://android.googlesource.com/sign2_source',
 50 |         target_file='foo/exact_match2.c',
 51 |         truncated_path_level=None,
 52 |         signature_type=signature.SignatureType.LINE_SIGNATURE,
 53 |     )
 54 |     self._mock_sign_3 = mock.create_autospec(
 55 |         signature.LineSignature,
 56 |         instance=True,
 57 |         signature_id='sign3',
 58 |         source='https://android.googlesource.com/sign3_source',
 59 |         target_file=(
 60 |             'somewhat/different/dir/prefix/foo/bar/truncated_path_match.c'
 61 |         ),
 62 |         truncated_path_level=2,
 63 |         signature_type=signature.SignatureType.LINE_SIGNATURE,
 64 |     )
 65 | 
 66 |     self._mock_signatures = [
 67 |         self._mock_sign_1,
 68 |         self._mock_sign_2,
 69 |         self._mock_sign_3,
 70 |     ]
 71 |     self._mock_sign_bundle = mock.create_autospec(
 72 |         signature.SignatureBundle, instance=True
 73 |     )
 74 |     type(self._mock_sign_bundle).signatures = mock.PropertyMock(
 75 |         return_value=self._mock_signatures
 76 |     )
 77 | 
 78 |   def test_all_files_strategy(self):
 79 |     to_scan, skipped = (
 80 |         target_selection_strategy.Strategy.ALL_FILES.get_target_files(
 81 |             self._test_dir.full_path, self._mock_sign_bundle
 82 |         )
 83 |     )
 84 |     expected_scan_targets = {
 85 |         os.path.join(self._test_dir.full_path, test_file_path)
 86 |         for test_file_path in (self._test_files - {'unsupported_file_type.txt'})
 87 |     }
 88 |     self.assertCountEqual(to_scan, expected_scan_targets)
 89 |     self.assertEqual(skipped, 0)
 90 | 
 91 |   def test_exact_path_match_strategy(self):
 92 |     to_scan, skipped = (
 93 |         target_selection_strategy.Strategy.EXACT_PATH_MATCH.get_target_files(
 94 |             self._test_dir.full_path, self._mock_sign_bundle
 95 |         )
 96 |     )
 97 |     expected_scan_targets = {
 98 |         os.path.join(self._test_dir.full_path, test_file_path)
 99 |         for test_file_path in [
100 |             'exact_match1.c',
101 |             'foo/exact_match2.c',
102 |         ]
103 |     }
104 |     self.assertCountEqual(to_scan, expected_scan_targets)
105 |     self.assertEqual(skipped, 4)
106 | 
107 |   def test_truncated_path_match_strategy(self):
108 |     to_scan, skipped = (
109 |         target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH.get_target_files(
110 |             self._test_dir.full_path, self._mock_sign_bundle
111 |         )
112 |     )
113 |     expected_scan_targets = {
114 |         os.path.join(self._test_dir.full_path, test_file_path)
115 |         for test_file_path in [
116 |             'exact_match1.c',
117 |             'foo/exact_match2.c',
118 |             'prefix/on/sig/without/tp/foo/exact_match2.c',
119 |             'no_matching/prefix/dirs/foo/bar/truncated_path_match.c',
120 |         ]
121 |     }
122 |     self.assertCountEqual(to_scan, expected_scan_targets)
123 |     self.assertEqual(skipped, 2)
124 | 
125 |   def test_truncated_path_match_raises_if_level_is_invalid(self):
126 |     self._mock_sign_3.truncated_path_level = 100
127 |     expected_error_message = 'The signature .* has invalid Truncated Path Level'
128 |     with self.assertRaisesRegex(ValueError, expected_error_message):
129 |       target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH.get_target_files(
130 |           self._test_dir.full_path, self._mock_sign_bundle
131 |       )
132 | 
133 | 
134 | if __name__ == '__main__':
135 |   absltest.main()
136 | 


--------------------------------------------------------------------------------
/vanir/testdata/BUILD.bazel:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | # Package for Vanir test data.
 8 | load("@rules_python//python:defs.bzl", "py_library")
 9 | 
10 | package(default_visibility = [
11 |     "//visibility:public",
12 | ])
13 | 
14 | filegroup(
15 |     name = "vanir_test_source_simplified",
16 |     srcs = [
17 |         "test_frameworks_base.tar.gz",
18 |     ],
19 | )
20 | 
21 | filegroup(
22 |     name = "test_vulnerabilities",
23 |     srcs = [
24 |         "test_vulnerabilities_kernel.json",
25 |         "test_vulnerabilities_platform.json",
26 |     ],
27 | )
28 | 
29 | filegroup(
30 |     name = "test_signatures_json",
31 |     srcs = [
32 |         "test_signatures.json",
33 |         "test_signatures.zip",
34 |     ],
35 | )
36 | 
37 | filegroup(
38 |     name = "test_gitiles_data",
39 |     srcs = glob(["gitiles/**"]),
40 | )
41 | 
42 | filegroup(
43 |     name = "test_patch_set",
44 |     srcs = [
45 |         "test_patch_file",
46 |         "test_patched_file",
47 |         "test_unpatched_file",
48 |         "test_unrelated_file",
49 |     ],
50 | )
51 | 
52 | exports_files([
53 |     "test_overwrite_specs.json",
54 | ])
55 | 
56 | py_library(
57 |     name = "test_signatures",
58 |     srcs = ["test_signatures.py"],
59 |     deps = [
60 |         "//:signature",
61 |     ],
62 | )
63 | 


--------------------------------------------------------------------------------
/vanir/testdata/gitiles/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patch.base64:
--------------------------------------------------------------------------------
1 | ZGlmZiAtLWdpdCBhL3NlcnZpY2VzL2NvcmUvamF2YS9jb20vYW5kcm9pZC9zZXJ2ZXIvb20vT3ZlcmxheU1hbmFnZXJTZXJ2aWNlLmphdmEgYi9zZXJ2aWNlcy9jb3JlL2phdmEvY29tL2FuZHJvaWQvc2VydmVyL29tL092ZXJsYXlNYW5hZ2VyU2VydmljZS5qYXZhCmluZGV4IDI3YjE2NDguLmVlMGIzZDUgMTAwNjQ0Ci0tLSBhL3NlcnZpY2VzL2NvcmUvamF2YS9jb20vYW5kcm9pZC9zZXJ2ZXIvb20vT3ZlcmxheU1hbmFnZXJTZXJ2aWNlLmphdmEKKysrIGIvc2VydmljZXMvY29yZS9qYXZhL2NvbS9hbmRyb2lkL3NlcnZlci9vbS9PdmVybGF5TWFuYWdlclNlcnZpY2UuamF2YQpAQCAtNzEsNiArNzEsNyBAQAogaW1wb3J0IGFuZHJvaWQudXRpbC5BcnJheU1hcDsKIGltcG9ydCBhbmRyb2lkLnV0aWwuQXJyYXlTZXQ7CiBpbXBvcnQgYW5kcm9pZC51dGlsLkF0b21pY0ZpbGU7CitpbXBvcnQgYW5kcm9pZC51dGlsLkV2ZW50TG9nOwogaW1wb3J0IGFuZHJvaWQudXRpbC5TbG9nOwogaW1wb3J0IGFuZHJvaWQudXRpbC5TcGFyc2VBcnJheTsKIApAQCAtODEsNyArODIsNiBAQAogaW1wb3J0IGNvbS5hbmRyb2lkLnNlcnZlci5Mb2NhbFNlcnZpY2VzOwogaW1wb3J0IGNvbS5hbmRyb2lkLnNlcnZlci5TeXN0ZW1Db25maWc7CiBpbXBvcnQgY29tLmFuZHJvaWQuc2VydmVyLlN5c3RlbVNlcnZpY2U7Ci0KIGltcG9ydCBjb20uYW5kcm9pZC5zZXJ2ZXIucG0uVXNlck1hbmFnZXJTZXJ2aWNlOwogaW1wb3J0IGNvbS5hbmRyb2lkLnNlcnZlci5wbS5wYXJzaW5nLnBrZy5BbmRyb2lkUGFja2FnZTsKIApAQCAtMjg1LDYgKzI4NSwxMiBAQAogCiAgICAgICAgICAgICByZXN0b3JlU2V0dGluZ3MoKTsKIAorICAgICAgICAgICAgLy8gV2lwZSBhbGwgc2hlbGwgb3ZlcmxheXMgb24gYm9vdCwgdG8gcmVjb3ZlciBmcm9tIGEgcG90ZW50aWFsbHkgYnJva2VuIGRldmljZQorICAgICAgICAgICAgU3RyaW5nIHNoZWxsUGtnTmFtZSA9IFRleHRVdGlscy5lbXB0eUlmTnVsbCgKKyAgICAgICAgICAgICAgICAgICAgZ2V0Q29udGV4dCgpLmdldFN0cmluZyhhbmRyb2lkLlIuc3RyaW5nLmNvbmZpZ19zeXN0ZW1TaGVsbCkpOworICAgICAgICAgICAgbVNldHRpbmdzLnJlbW92ZUlmKG92ZXJsYXlJbmZvIC0+IG92ZXJsYXlJbmZvLmlzRmFicmljYXRlZAorICAgICAgICAgICAgICAgICAgICAmJiBzaGVsbFBrZ05hbWUuZXF1YWxzKG92ZXJsYXlJbmZvLnBhY2thZ2VOYW1lKSk7CisKICAgICAgICAgICAgIGluaXRJZk5lZWRlZCgpOwogICAgICAgICAgICAgb25Td2l0Y2hVc2VyKFVzZXJIYW5kbGUuVVNFUl9TWVNURU0pOwogCkBAIC04OTEsNiArODk3LDE2IEBACiAgICAgICAgICAgICAgICAgICAgIHRocm93IG5ldyBJbGxlZ2FsQXJndW1lbnRFeGNlcHRpb24ocmVxdWVzdC50eXBlVG9TdHJpbmcoKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICsgIiB1bnN1cHBvcnRlZCBmb3IgdXNlciAiICsgcmVxdWVzdC51c2VySWQpOwogICAgICAgICAgICAgICAgIH0KKworICAgICAgICAgICAgICAgIC8vIE5vcm1hbCBhcHBzIGFyZSBibG9ja2VkIGZyb20gYWNjZXNzaW5nIE9NUyB2aWEgU0VMaW51eCwgc28gdG8gYmxvY2sgbm9uLXJvb3QsCisgICAgICAgICAgICAgICAgLy8gbm9uIHByaXZpbGVnZWQgY2FsbGVycywgYSBzaW1wbGUgY2hlY2sgYWdhaW5zdCB0aGUgc2hlbGwgVUlEIGlzIHN1ZmZpY2llbnQsIHNpbmNlCisgICAgICAgICAgICAgICAgLy8gdGhhdCdzIHRoZSBvbmx5IGV4Y2VwdGlvbiBmcm9tIHRoZSBvdGhlciBjYXRlZ29yaWVzLiBUaGlzIGlzIGVub3VnaCB3aGlsZSBPTVMKKyAgICAgICAgICAgICAgICAvLyBpcyBub3QgYSBwdWJsaWMgQVBJLCBidXQgdGhpcyB3aWxsIGhhdmUgdG8gYmUgY2hhbmdlZCBpZiBpdCdzIGV2ZXIgZXhwb3NlZC4KKyAgICAgICAgICAgICAgICBpZiAoY2FsbGluZ1VpZCA9PSBQcm9jZXNzLlNIRUxMX1VJRCkgeworICAgICAgICAgICAgICAgICAgICBFdmVudExvZy53cml0ZUV2ZW50KDB4NTM0ZTQ1NTQsICIyMDI3NjgyOTIiLCAtMSwgIiIpOworICAgICAgICAgICAgICAgICAgICB0aHJvdyBuZXcgSWxsZWdhbEFyZ3VtZW50RXhjZXB0aW9uKCJOb24tcm9vdCBzaGVsbCBjYW5ub3QgZmFicmljYXRlIG92ZXJsYXlzIik7CisgICAgICAgICAgICAgICAgfQorCiAgICAgICAgICAgICAgICAgcmVhbFVzZXJJZCA9IFVzZXJIYW5kbGUuVVNFUl9BTEw7CiAKICAgICAgICAgICAgICAgICAvLyBFbmZvcmNlIHRoYXQgdGhlIGNhbGxpbmcgcHJvY2VzcyBjYW4gb25seSByZWdpc3RlciBhbmQgdW5yZWdpc3RlciBmYWJyaWNhdGVkCg==
2 | 


--------------------------------------------------------------------------------
/vanir/testdata/gitiles/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patchinfo.base64:
--------------------------------------------------------------------------------
1 | dHJlZSAyZWVmY2IwYTRhNmY3YzkxYTNmMWE2N2E0NGU0N2Q0MjIxZmY0YmUyCnBhcmVudCBjM2M5YWRhNmU1ZDk0NmNlN2QyMjQ2NDlmMWQ2NTI4Y2U4MGIxYTI0CmF1dGhvciBXaW5zb24gPGNoaXV3aW5zb25AZ29vZ2xlLmNvbT4gMTYzNDU5MDg0MSAtMDcwMApjb21taXR0ZXIgQW5kcm9pZCBCdWlsZCBDb2FzdGd1YXJkIFdvcmtlciA8YW5kcm9pZC1idWlsZC1jb2FzdGd1YXJkLXdvcmtlckBnb29nbGUuY29tPiAxNjM3Nzk2MTA2ICswMDAwCgpCbG9jayBTSEVMTF9VSUQgZnJvbSBvdmVybGF5IGZhYnJpY2F0ZSBhbmQgY2xlYXIgc2hlbGwgb3ZlcmxheXMgb24gYm9vdAoKUHJldmVudHMgbm9uLXJvb3Qgc2hlbGwgZnJvbSBmYWJyaWNhdGluZyBvdmVybGF5cyBhbmQgcmVtb3ZlcyBhbGwKb3ZlcmxheXMgd2l0aCB0aGVpciBjcmVhdGluZyBwYWNrYWdlIGFzIHNoZWxsIG9uIGJvb3QuCgpQcmV2ZW50cyB1bmFwcHJvdmVkIHVzYWdlcyBvZiB0aGUgZmFicmljYXRpb24gQVBJIGFuZCBjbGVhbnMgdXAKYW55IHBvdGVudGlhbGx5IGJyb2tlbiBvdmVybGF5cyByZXNwZWN0aXZlbHkuCgpCdWc6IDIwMjc2ODI5MgoKVGVzdDogc3RzLXRyYWRlZmVkIHJ1biBzdHMtZW5nYnVpbGQtbm8tc3BsLWxvY2sgXAogICAgICAgIC1tIFN0c0hvc3RUZXN0Q2FzZXMgLXQgYW5kcm9pZC5zZWN1cml0eS5zdHMuQnVnXzIwMjc2ODI5MgoKQ2hhbmdlLUlkOiBJMTg0Y2M0OThlNDljNDE2ZTE4NGNjMDg1NWE4ODEwYjU3ZGQwODE3NQooY2hlcnJ5IHBpY2tlZCBmcm9tIGNvbW1pdCA2MjdkNWViNjhlMTlhOGVhMThjM2MxNDA1NzAxYjNhMzNmMDczMzE1KQpNZXJnZWQtSW46STE4NGNjNDk4ZTQ5YzQxNmUxODRjYzA4NTVhODgxMGI1N2RkMDgxNzUK
2 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_frameworks_base.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/vanir/testdata/test_frameworks_base.tar.gz


--------------------------------------------------------------------------------
/vanir/testdata/test_overwrite_specs.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "vuln_id": "ASB-A-172999675",
 4 |     "replace": [
 5 |       {
 6 |         "path": "affected[*].ecosystem_specific.severity",
 7 |         "value": "Low"
 8 |       }
 9 |     ],
10 |     "reason": "This vulnerability is not applicable to Nest devices because they lack SMS capability and do not use the telephony service."
11 |   },
12 |   {
13 |     "vuln_id": "ASB-A-202768292",
14 |     "replace": [
15 |       {
16 |         "path": "affected[*].ecosystem_specific.severity",
17 |         "value": "Low"
18 |       }
19 |     ],
20 |     "reason": "This issue is specific to devices using Quick Share and is not relevant to smartwatches. Therefore, it can be marked as Low severity."
21 |   }
22 | ]
23 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_patch_file:
--------------------------------------------------------------------------------
 1 | diff --git a/ipsum b/ipsum
 2 | index dcf24e1..8b16546 100644
 3 | --- a/ipsum
 4 | +++ b/ipsum
 5 | @@ -1,3 +1,6 @@
 6 | +AFFECTED LINE START CASE 2) This hunk's first non-context line is
 7 | +an added line and there is no context line before
 8 | +the first non-context line.
 9 |  1 Lorem ipsum dolor sit amet,
10 |  2 consectetur adipiscing elit.
11 |  3 Vivamus condimentum rhoncus est volutpat venenatis.
12 | @@ -15,6 +18,9 @@
13 |  15  Morbi sagittis eget neque sagittis egestas.
14 |  16  Quisque viverra arcu a cursus dignissim.
15 |  17
16 | +AFFECTED LINE START CASE 3) This hunk's first non-context line is
17 | +an added line and there are context lines before
18 | +the first non-context line.
19 |  18  In aliquam,
20 |  19  mi ut laoreet varius,
21 |  20  ex ante posuere justo,
22 | @@ -37,11 +43,9 @@
23 |  37  Sed vulputate quam augue,
24 |  38  ac lacinia diam ullamcorper at.
25 |  39
26 | -40  Fusce efficitur fermentum mi,
27 | -41  sed imperdiet ipsum dignissim eu.
28 | -42  Nam semper quis ex eget iaculis.
29 | -43  In a diam dolor.
30 |  44  Integer fermentum in arcu commodo tempus.
31 | +AFFECTED LINE START CASE 1) This hunk's first non-context line is
32 | +a removed line.
33 |  45  Phasellus ultricies gravida ante ac tristique.
34 |  46  Vestibulum volutpat id neque a porttitor.
35 |  47  Donec ullamcorper sed augue eget efficitur.
36 | @@ -112,8 +116,8 @@
37 |  112 Nulla facilisi.
38 |  113 Sed pooosuere nisi quis felis varius,
39 |  114 ut maximus elit placerat.
40 | +AFFECTED LINE STOP CASE 1) This hunk's last non-context line is a removed line.
41 |  115 Sed vulputate quam augue,
42 | -116 ac lacinia diam ullamcooorper at.
43 |  117
44 |  118 Fusce efficitur fermentum mi,
45 |  119 sed imperdiet ipsum dignissim eu.
46 | @@ -136,6 +140,8 @@
47 |  136 Aenean metus leooo,
48 |  137 looobooortis nooon auctooor eget,
49 |  138 bibendum at mi.
50 | +AFFECTED LINE STOP CASE 3) This hunk's last non-context line is an added line,
51 | +and there are context lines after the added line.
52 |  139 Aenean mauris est,
53 |  140 cooonvallis efficitur mooolestie et,
54 |  141 suscipit in dooolooor.
55 | @@ -153,3 +159,5 @@
56 |  153 Quisque cooongue turpis id erat egestas pellentesque.
57 |  154 Nulla fermentum ac massa nec sooollicitudin.
58 |  155 Fusce sed ultrices tellus.
59 | +AFFECTED LINE STOP CASE 2) This hunk's last non-context line is an added line,
60 | +and there is no context line after the added line.
61 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_patched_file:
--------------------------------------------------------------------------------
  1 | AFFECTED LINE START CASE 2) This hunk's first non-context line is
  2 | an added line and there is no context line before
  3 | the first non-context line.
  4 | 1 Lorem ipsum dolor sit amet,
  5 | 2 consectetur adipiscing elit.
  6 | 3 Vivamus condimentum rhoncus est volutpat venenatis.
  7 | 4 Fusce semper,
  8 | 5 sapien ut venenatis pellentesque,
  9 | 6 lorem dui aliquam sapien,
 10 | 7 non pharetra diam neque id mi.
 11 | 8 Suspendisse sollicitudin,
 12 | 9 metus ut gravida semper,
 13 | 10  nunc ipsum ullamcorper nunc,
 14 | 11  ut maximus nulla nunc dignissim justo.
 15 | 12  Duis nec nisi leo.
 16 | 13  Proin tristique massa mi,
 17 | 14  imperdiet tempus nibh vulputate quis.
 18 | 15  Morbi sagittis eget neque sagittis egestas.
 19 | 16  Quisque viverra arcu a cursus dignissim.
 20 | 17
 21 | AFFECTED LINE START CASE 3) This hunk's first non-context line is
 22 | an added line and there are context lines before
 23 | the first non-context line.
 24 | 18  In aliquam,
 25 | 19  mi ut laoreet varius,
 26 | 20  ex ante posuere justo,
 27 | 21  eget aliquam magna metus id purus.
 28 | 22  Aenean convallis sem ac purus bibendum,
 29 | 23  sit amet mattis augue fermentum.
 30 | 24  Quisque cursus posuere mi,
 31 | 25  vitae vestibulum purus egestas eget.
 32 | 26  Nunc eu sagittis est,
 33 | 27  at elementum leo.
 34 | 28  Pellentesque habitant morbi tristique senectus et netus
 35 | 29  et malesuada fames ac turpis egestas.
 36 | 30  Nulla interdum lacus a turpis maximus,
 37 | 31  scelerisque aliquam magna ultricies.
 38 | 32  Aliquam erat volutpat.
 39 | 33  Mauris eget tellus sed velit aliquet venenatis ut nec eros.
 40 | 34  Nulla facilisi.
 41 | 35  Sed posuere nisi quis felis varius,
 42 | 36  ut maximus elit placerat.
 43 | 37  Sed vulputate quam augue,
 44 | 38  ac lacinia diam ullamcorper at.
 45 | 39
 46 | 44  Integer fermentum in arcu commodo tempus.
 47 | AFFECTED LINE START CASE 1) This hunk's first non-context line is
 48 | a removed line.
 49 | 45  Phasellus ultricies gravida ante ac tristique.
 50 | 46  Vestibulum volutpat id neque a porttitor.
 51 | 47  Donec ullamcorper sed augue eget efficitur.
 52 | 48  Ut vitae aliquet lorem,
 53 | 49  quis ultrices neque.
 54 | 50  Fusce blandit ac mauris vel porttitor.
 55 | 51  Pellentesque eget ultrices augue.
 56 | 52  Pellentesque tempor maximus nunc sit amet ullamcorper.
 57 | 53
 58 | 54  Nulla non vehicula magna.
 59 | 55  In eget condimentum nisi,
 60 | 56  sit amet auctor purus.
 61 | 57  Proin venenatis lorem non mattis interdum.
 62 | 58  Aenean metus leo,
 63 | 59  lobortis non auctor eget,
 64 | 60  bibendum at mi.
 65 | 61  Aenean mauris est,
 66 | 62  convallis efficitur molestie et,
 67 | 63  suscipit in dolor.
 68 | 64  Duis eget volutpat est.
 69 | 65  Fusce id maximus arcu,
 70 | 66  et tincidunt purus.
 71 | 67  In efficitur volutpat est nec eleifend.
 72 | 68  Proin malesuada pharetra mattis.
 73 | 69
 74 | 70  Donec sed arcu sit amet nisi vulputate ornare.
 75 | 71  Integer accumsan ullamcorper mauris,
 76 | 72  eget semper ipsum.
 77 | 73  Suspendisse sagittis pellentesque molestie.
 78 | 74  Duis at venenatis erat.
 79 | 75  Quisque congue turpis id erat egestas pellentesque.
 80 | 76  Nulla fermentum ac massa nec sollicitudin.
 81 | 77  Fusce sed ultrices tellus.
 82 | 78
 83 | 79  Looorem ipsum dooolooor sit amet,
 84 | 80  cooonsectetur adipiscing elit.
 85 | 81  Vivamus cooondimentum rhoooncus est vooolutpat venenatis.
 86 | 82  Fusce semper,
 87 | 83  sapien ut venenatis pellentesque,
 88 | 84  looorem dui aliquam sapien,
 89 | 85  nooon pharetra diam neque id mi.
 90 | 86  Suspendisse sooollicitudin,
 91 | 87  metus ut gravida semper,
 92 | 88  nunc ipsum ullamcooorper nunc,
 93 | 89  ut maximus nulla nunc dignissim justooo.
 94 | 90  Duis nec nisi leooo.
 95 | 91  Proooin tristique massa mi,
 96 | 92  imperdiet tempus nibh vulputate quis.
 97 | 93  Mooorbi sagittis eget neque sagittis egestas.
 98 | 94  Quisque viverra arcu a cursus dignissim.
 99 | 95
100 | 96  In aliquam,
101 | 97  mi ut laoooreet varius,
102 | 98  ex ante pooosuere justooo,
103 | 99  eget aliquam magna metus id purus.
104 | 100 Aenean cooonvallis sem ac purus bibendum,
105 | 101 sit amet mattis augue fermentum.
106 | 102 Quisque cursus pooosuere mi,
107 | 103 vitae vestibulum purus egestas eget.
108 | 104 Nunc eu sagittis est,
109 | 105 at elementum leooo.
110 | 106 Pellentesque habitant mooorbi tristique senectus et netus
111 | 107 et malesuada fames ac turpis egestas.
112 | 108 Nulla interdum lacus a turpis maximus,
113 | 109 scelerisque aliquam magna ultricies.
114 | 110 Aliquam erat vooolutpat.
115 | 111 Mauris eget tellus sed velit aliquet venenatis ut nec erooos.
116 | 112 Nulla facilisi.
117 | 113 Sed pooosuere nisi quis felis varius,
118 | 114 ut maximus elit placerat.
119 | AFFECTED LINE STOP CASE 1) This hunk's last non-context line is a removed line.
120 | 115 Sed vulputate quam augue,
121 | 117
122 | 118 Fusce efficitur fermentum mi,
123 | 119 sed imperdiet ipsum dignissim eu.
124 | 120 Nam semper quis ex eget iaculis.
125 | 121 In a diam dooolooor.
126 | 122 Integer fermentum in arcu cooommooodooo tempus.
127 | 123 Phasellus ultricies gravida ante ac tristique.
128 | 124 Vestibulum vooolutpat id neque a pooorttitooor.
129 | 125 Dooonec ullamcooorper sed augue eget efficitur.
130 | 126 Ut vitae aliquet looorem,
131 | 127 quis ultrices neque.
132 | 128 Fusce blandit ac mauris vel pooorttitooor.
133 | 129 Pellentesque eget ultrices augue.
134 | 130 Pellentesque tempooor maximus nunc sit amet ullamcooorper.
135 | 131
136 | 132 Nulla nooon vehicula magna.
137 | 133 In eget cooondimentum nisi,
138 | 134 sit amet auctooor purus.
139 | 135 Proooin venenatis looorem nooon mattis interdum.
140 | 136 Aenean metus leooo,
141 | 137 looobooortis nooon auctooor eget,
142 | 138 bibendum at mi.
143 | AFFECTED LINE STOP CASE 3) This hunk's last non-context line is an added line,
144 | and there are context lines after the added line.
145 | 139 Aenean mauris est,
146 | 140 cooonvallis efficitur mooolestie et,
147 | 141 suscipit in dooolooor.
148 | 142 Duis eget vooolutpat est.
149 | 143 Fusce id maximus arcu,
150 | 144 et tincidunt purus.
151 | 145 In efficitur vooolutpat est nec eleifend.
152 | 146 Proooin malesuada pharetra mattis.
153 | 147
154 | 148 Dooonec sed arcu sit amet nisi vulputate ooornare.
155 | 149 Integer accumsan ullamcooorper mauris,
156 | 150 eget semper ipsum.
157 | 151 Suspendisse sagittis pellentesque mooolestie.
158 | 152 Duis at venenatis erat.
159 | 153 Quisque cooongue turpis id erat egestas pellentesque.
160 | 154 Nulla fermentum ac massa nec sooollicitudin.
161 | 155 Fusce sed ultrices tellus.
162 | AFFECTED LINE STOP CASE 2) This hunk's last non-context line is an added line,
163 | and there is no context line after the added line.
164 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_signatures.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "id": "ASB-A-281018094",
  4 |     "modified": "2023-08-10T00:29:25Z",
  5 |     "aliases": ["CVE-2023-21277"],
  6 |     "affected": [
  7 |       {
  8 |         "package": {
  9 |           "ecosystem": "Android",
 10 |           "name": "platform/frameworks/base"
 11 |         },
 12 |         "ecosystem_specific": {
 13 |           "spl": "2023-08-01",
 14 |           "vanir_signatures": [
 15 |             {
 16 |               "id": "ASB-A-281018094-2d96898e",
 17 |               "signature_type": "Line",
 18 |               "signature_version": "v1",
 19 |               "source": "sourceurl1",
 20 |               "target": {
 21 |                 "file": "core/tests/coretests/src/android/widget/RemoteViewsTest.java"
 22 |               },
 23 |               "deprecated": false,
 24 |               "digest": {
 25 |                 "line_hashes": [
 26 |                   334131876326018032544521194211819459045,
 27 |                   121761277599204750374848752368714155334,
 28 |                   259240847050637455583249007782337036423,
 29 |                   134254861871231063659248509086353379703
 30 |                 ],
 31 |                 "threshold": 0.9
 32 |               }
 33 |             }
 34 |           ]
 35 |         }
 36 |       },
 37 |       {
 38 |         "package": {
 39 |           "ecosystem": "Android",
 40 |           "name": "platform/packages/apps/Bluetooth"
 41 |         },
 42 |         "ecosystem_specific": {
 43 |           "spl": "2023-08-01",
 44 |           "vanir_signatures": [
 45 |             {
 46 |               "id": "ASB-A-281018094-97ec235e",
 47 |               "signature_type": "Line",
 48 |               "signature_version": "v1",
 49 |               "source": "sourceurl2",
 50 |               "target": {
 51 |                 "file": "core/java/android/widget/RemoteViews.java"
 52 |               },
 53 |               "deprecated": false,
 54 |               "digest": {
 55 |                 "line_hashes": [
 56 |                   329665021611362043020227002656192683749,
 57 |                   33777507542537233731901269949093178718,
 58 |                   124601394975036829142918177453582828532,
 59 |                   284236484958232671673872160266776287285,
 60 |                   213586894066632749964895258528904068608,
 61 |                   175496572067704317475568725408983891312,
 62 |                   283390899376001510600988390542171144052,
 63 |                   83283854559210764264479836884934600211
 64 |                 ],
 65 |                 "threshold": 0.9
 66 |               }
 67 |             }
 68 |           ]
 69 |         }
 70 |       }
 71 |     ]
 72 |   },
 73 |   {
 74 |     "id": "ASB-A-111893654",
 75 |     "modified": "2023-07-27T21:43:08Z",
 76 |     "aliases": ["CVE-2020-0404"],
 77 |     "affected": [
 78 |       {
 79 |         "package": {
 80 |           "ecosystem": "Android",
 81 |           "name": ":linux_kernel:Qualcomm"
 82 |         },
 83 |         "ecosystem_specific": {
 84 |           "spl": "2020-09-05",
 85 |           "vanir_signatures": [
 86 |             {
 87 |               "id": "ASB-A-111893654-8ead4b9c",
 88 |               "signature_type": "Function",
 89 |               "signature_version": "v1",
 90 |               "source": "https://android.googlesource.com/kernel/common/+/68035c80e129",
 91 |               "target": {
 92 |                 "file": "drivers/media/usb/uvc/uvc_driver.c",
 93 |                 "function": "uvc_scan_chain_forward",
 94 |                 "truncated_path_level": 1
 95 |               },
 96 |               "deprecated": false,
 97 |               "digest": {
 98 |                 "function_hash": 194215759102333248697385159289433439026,
 99 |                 "length": 1173
100 |               }
101 |             },
102 |             {
103 |               "id": "ASB-A-111893654-2d607d27",
104 |               "signature_type": "Function",
105 |               "signature_version": "v1",
106 |               "source": "https://android.googlesource.com/kernel/common/+/68035c80e129",
107 |               "target": {
108 |                 "file": "drivers/media/usb/uvc/uvc_driver.c",
109 |                 "function": "uvc_scan_chain_backward",
110 |                 "truncated_path_level": 3
111 |               },
112 |               "deprecated": false,
113 |               "digest": {
114 |                 "function_hash": 60626479737422533824532741070373279186,
115 |                 "length": 1549
116 |               }
117 |             }
118 |           ]
119 |         }
120 |       }
121 |     ]
122 |   }
123 | ]
124 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_signatures.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/vanir/testdata/test_signatures.zip


--------------------------------------------------------------------------------
/vanir/testdata/test_unpatched_file:
--------------------------------------------------------------------------------
  1 | 1 Lorem ipsum dolor sit amet,
  2 | 2 consectetur adipiscing elit.
  3 | 3 Vivamus condimentum rhoncus est volutpat venenatis.
  4 | 4 Fusce semper,
  5 | 5 sapien ut venenatis pellentesque,
  6 | 6 lorem dui aliquam sapien,
  7 | 7 non pharetra diam neque id mi.
  8 | 8 Suspendisse sollicitudin,
  9 | 9 metus ut gravida semper,
 10 | 10  nunc ipsum ullamcorper nunc,
 11 | 11  ut maximus nulla nunc dignissim justo.
 12 | 12  Duis nec nisi leo.
 13 | 13  Proin tristique massa mi,
 14 | 14  imperdiet tempus nibh vulputate quis.
 15 | 15  Morbi sagittis eget neque sagittis egestas.
 16 | 16  Quisque viverra arcu a cursus dignissim.
 17 | 17
 18 | 18  In aliquam,
 19 | 19  mi ut laoreet varius,
 20 | 20  ex ante posuere justo,
 21 | 21  eget aliquam magna metus id purus.
 22 | 22  Aenean convallis sem ac purus bibendum,
 23 | 23  sit amet mattis augue fermentum.
 24 | 24  Quisque cursus posuere mi,
 25 | 25  vitae vestibulum purus egestas eget.
 26 | 26  Nunc eu sagittis est,
 27 | 27  at elementum leo.
 28 | 28  Pellentesque habitant morbi tristique senectus et netus
 29 | 29  et malesuada fames ac turpis egestas.
 30 | 30  Nulla interdum lacus a turpis maximus,
 31 | 31  scelerisque aliquam magna ultricies.
 32 | 32  Aliquam erat volutpat.
 33 | 33  Mauris eget tellus sed velit aliquet venenatis ut nec eros.
 34 | 34  Nulla facilisi.
 35 | 35  Sed posuere nisi quis felis varius,
 36 | 36  ut maximus elit placerat.
 37 | 37  Sed vulputate quam augue,
 38 | 38  ac lacinia diam ullamcorper at.
 39 | 39
 40 | 40  Fusce efficitur fermentum mi,
 41 | 41  sed imperdiet ipsum dignissim eu.
 42 | 42  Nam semper quis ex eget iaculis.
 43 | 43  In a diam dolor.
 44 | 44  Integer fermentum in arcu commodo tempus.
 45 | 45  Phasellus ultricies gravida ante ac tristique.
 46 | 46  Vestibulum volutpat id neque a porttitor.
 47 | 47  Donec ullamcorper sed augue eget efficitur.
 48 | 48  Ut vitae aliquet lorem,
 49 | 49  quis ultrices neque.
 50 | 50  Fusce blandit ac mauris vel porttitor.
 51 | 51  Pellentesque eget ultrices augue.
 52 | 52  Pellentesque tempor maximus nunc sit amet ullamcorper.
 53 | 53
 54 | 54  Nulla non vehicula magna.
 55 | 55  In eget condimentum nisi,
 56 | 56  sit amet auctor purus.
 57 | 57  Proin venenatis lorem non mattis interdum.
 58 | 58  Aenean metus leo,
 59 | 59  lobortis non auctor eget,
 60 | 60  bibendum at mi.
 61 | 61  Aenean mauris est,
 62 | 62  convallis efficitur molestie et,
 63 | 63  suscipit in dolor.
 64 | 64  Duis eget volutpat est.
 65 | 65  Fusce id maximus arcu,
 66 | 66  et tincidunt purus.
 67 | 67  In efficitur volutpat est nec eleifend.
 68 | 68  Proin malesuada pharetra mattis.
 69 | 69
 70 | 70  Donec sed arcu sit amet nisi vulputate ornare.
 71 | 71  Integer accumsan ullamcorper mauris,
 72 | 72  eget semper ipsum.
 73 | 73  Suspendisse sagittis pellentesque molestie.
 74 | 74  Duis at venenatis erat.
 75 | 75  Quisque congue turpis id erat egestas pellentesque.
 76 | 76  Nulla fermentum ac massa nec sollicitudin.
 77 | 77  Fusce sed ultrices tellus.
 78 | 78
 79 | 79  Looorem ipsum dooolooor sit amet,
 80 | 80  cooonsectetur adipiscing elit.
 81 | 81  Vivamus cooondimentum rhoooncus est vooolutpat venenatis.
 82 | 82  Fusce semper,
 83 | 83  sapien ut venenatis pellentesque,
 84 | 84  looorem dui aliquam sapien,
 85 | 85  nooon pharetra diam neque id mi.
 86 | 86  Suspendisse sooollicitudin,
 87 | 87  metus ut gravida semper,
 88 | 88  nunc ipsum ullamcooorper nunc,
 89 | 89  ut maximus nulla nunc dignissim justooo.
 90 | 90  Duis nec nisi leooo.
 91 | 91  Proooin tristique massa mi,
 92 | 92  imperdiet tempus nibh vulputate quis.
 93 | 93  Mooorbi sagittis eget neque sagittis egestas.
 94 | 94  Quisque viverra arcu a cursus dignissim.
 95 | 95
 96 | 96  In aliquam,
 97 | 97  mi ut laoooreet varius,
 98 | 98  ex ante pooosuere justooo,
 99 | 99  eget aliquam magna metus id purus.
100 | 100 Aenean cooonvallis sem ac purus bibendum,
101 | 101 sit amet mattis augue fermentum.
102 | 102 Quisque cursus pooosuere mi,
103 | 103 vitae vestibulum purus egestas eget.
104 | 104 Nunc eu sagittis est,
105 | 105 at elementum leooo.
106 | 106 Pellentesque habitant mooorbi tristique senectus et netus
107 | 107 et malesuada fames ac turpis egestas.
108 | 108 Nulla interdum lacus a turpis maximus,
109 | 109 scelerisque aliquam magna ultricies.
110 | 110 Aliquam erat vooolutpat.
111 | 111 Mauris eget tellus sed velit aliquet venenatis ut nec erooos.
112 | 112 Nulla facilisi.
113 | 113 Sed pooosuere nisi quis felis varius,
114 | 114 ut maximus elit placerat.
115 | 115 Sed vulputate quam augue,
116 | 116 ac lacinia diam ullamcooorper at.
117 | 117
118 | 118 Fusce efficitur fermentum mi,
119 | 119 sed imperdiet ipsum dignissim eu.
120 | 120 Nam semper quis ex eget iaculis.
121 | 121 In a diam dooolooor.
122 | 122 Integer fermentum in arcu cooommooodooo tempus.
123 | 123 Phasellus ultricies gravida ante ac tristique.
124 | 124 Vestibulum vooolutpat id neque a pooorttitooor.
125 | 125 Dooonec ullamcooorper sed augue eget efficitur.
126 | 126 Ut vitae aliquet looorem,
127 | 127 quis ultrices neque.
128 | 128 Fusce blandit ac mauris vel pooorttitooor.
129 | 129 Pellentesque eget ultrices augue.
130 | 130 Pellentesque tempooor maximus nunc sit amet ullamcooorper.
131 | 131
132 | 132 Nulla nooon vehicula magna.
133 | 133 In eget cooondimentum nisi,
134 | 134 sit amet auctooor purus.
135 | 135 Proooin venenatis looorem nooon mattis interdum.
136 | 136 Aenean metus leooo,
137 | 137 looobooortis nooon auctooor eget,
138 | 138 bibendum at mi.
139 | 139 Aenean mauris est,
140 | 140 cooonvallis efficitur mooolestie et,
141 | 141 suscipit in dooolooor.
142 | 142 Duis eget vooolutpat est.
143 | 143 Fusce id maximus arcu,
144 | 144 et tincidunt purus.
145 | 145 In efficitur vooolutpat est nec eleifend.
146 | 146 Proooin malesuada pharetra mattis.
147 | 147
148 | 148 Dooonec sed arcu sit amet nisi vulputate ooornare.
149 | 149 Integer accumsan ullamcooorper mauris,
150 | 150 eget semper ipsum.
151 | 151 Suspendisse sagittis pellentesque mooolestie.
152 | 152 Duis at venenatis erat.
153 | 153 Quisque cooongue turpis id erat egestas pellentesque.
154 | 154 Nulla fermentum ac massa nec sooollicitudin.
155 | 155 Fusce sed ultrices tellus.
156 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_unrelated_file:
--------------------------------------------------------------------------------
 1 | Duis et enim est. Nunc condimentum felis nec porta finibus. Sed ac justo sit
 2 | amet urna dictum finibus. Maecenas sapien tellus, rhoncus et viverra rutrum,
 3 | auctor quis erat. Proin condimentum nulla quam, eu pretium magna egestas sit
 4 | amet. Sed luctus lorem quis erat hendrerit, vitae rutrum diam congue. Sed ipsum
 5 | tellus, faucibus vel molestie non, pharetra sit amet dolor. Nullam nec consequat
 6 | sem, quis ullamcorper nunc. Integer odio ligula, pretium quis volutpat quis,
 7 | bibendum sit amet turpis. Sed ut consequat velit, quis congue neque.
 8 | 
 9 | Duis gravida sapien non leo tempor consequat. Maecenas congue nisl posuere,
10 | tristique nulla vel, dictum dui. In scelerisque neque at pellentesque dignissim.
11 | Cras dapibus elit sit amet varius aliquam. Phasellus sit amet viverra tortor.
12 | Quisque eget risus vitae diam efficitur ornare at id tortor. Etiam ut ligula
13 | metus. In fermentum semper efficitur. Nunc pretium consectetur commodo. Donec
14 | quis magna libero. Ut sit amet eros eget massa ullamcorper facilisis.
15 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_vulnerabilities_kernel.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "id": "ASB-A-210498909",
  4 |     "modified": "2022-06-01T00:00:00Z",
  5 |     "published": "2022-06-01T00:00:00Z",
  6 |     "aliases": ["CVE-2018-25020"],
  7 |     "summary": "",
  8 |     "references": [
  9 |       {
 10 |         "type": "ADVISORY",
 11 |         "url": "https://source.android.com/security/bulletin/2022-06-01#2022-06-05-security-patch-level-vulnerability-details"
 12 |       },
 13 |       {
 14 |         "type": "FIX",
 15 |         "url": "https://android.googlesource.com/kernel/common/+/050fad7c4534c13c8eb1d9c2ba66012e014773cb"
 16 |       },
 17 |       {
 18 |         "type": "FIX",
 19 |         "url": "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745"
 20 |       }
 21 |     ],
 22 |     "affected": [
 23 |       {
 24 |         "package": {
 25 |           "ecosystem": "Android",
 26 |           "name": ":linux_kernel:"
 27 |         },
 28 |         "ecosystem_specific": {
 29 |           "type": "EoP",
 30 |           "severity": "Moderate",
 31 |           "spl": "2022-06-05",
 32 |           "fixes": [
 33 |             "https://android.googlesource.com/kernel/common/+/050fad7c4534c13c8eb1d9c2ba66012e014773cb",
 34 |             "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745"
 35 |           ]
 36 |         },
 37 |         "ranges": [
 38 |           {
 39 |             "type": "ECOSYSTEM",
 40 |             "events": [
 41 |               {
 42 |                 "introduced": ":0"
 43 |               },
 44 |               {
 45 |                 "fixed": ":2022-06-05"
 46 |               }
 47 |             ]
 48 |           }
 49 |         ]
 50 |       }
 51 |     ]
 52 |   },
 53 |   {
 54 |     "id": "ASB-A-172999675",
 55 |     "modified": "2023-03-11T02:15:13Z",
 56 |     "published": "2021-02-01T00:00:00Z",
 57 |     "aliases": ["CVE-2017-18509"],
 58 |     "summary": "In ip6_mroute_setsockopt and related functions of ip6mr.c, there is a possible out of bounds write due to a logic error in the code. This could lead to local escalation of privilege with no additional execution privileges needed. User interaction is not needed for exploitation.",
 59 |     "references": [
 60 |       {
 61 |         "type": "ADVISORY",
 62 |         "url": "https://source.android.com/security/bulletin/2021-02-01#2021-02-05-security-patch-level-vulnerability-details"
 63 |       },
 64 |       {
 65 |         "type": "FIX",
 66 |         "url": "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745"
 67 |       }
 68 |     ],
 69 |     "affected": [
 70 |       {
 71 |         "package": { "ecosystem": "Android", "name": "kernel/common" },
 72 |         "versions": ["Kernel"],
 73 |         "ecosystem_specific": {
 74 |           "type": "EoP",
 75 |           "severity": "High",
 76 |           "spl": "2021-02-05",
 77 |           "fixes": [
 78 |             "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745"
 79 |           ]
 80 |         },
 81 |         "ranges": [
 82 |           {
 83 |             "type": "ECOSYSTEM",
 84 |             "events": [{ "introduced": ":0" }, { "fixed": ":2021-02-05" }]
 85 |           }
 86 |         ]
 87 |       }
 88 |     ]
 89 |   },
 90 |   {
 91 |     "id": "PUB-A-160818461",
 92 |     "modified": "2023-04-12T18:19:28Z",
 93 |     "published": "2022-12-01T00:00:00Z",
 94 |     "aliases": [
 95 |       "CVE-2020-0465"
 96 |     ],
 97 |     "summary": "USB device causing OOB write in hid_output_report",
 98 |     "details": "In hid_output_report of hid-core.c, there is a possible out of bounds write due to a missing bounds check. This could lead to local escalation of privilege with no additional execution privileges needed. User interaction is needed for exploitation.",
 99 |     "references": [
100 |       {
101 |         "type": "ADVISORY",
102 |         "url": "https://source.android.com/security/bulletin/2022-12-01"
103 |       },
104 |       {
105 |         "type": "FIX",
106 |         "url": "https://android.googlesource.com/kernel/common/+/bce1305c0ece3"
107 |       }
108 |     ],
109 |     "affected": [
110 |       {
111 |         "package": {
112 |           "ecosystem": "Android",
113 |           "name": ":linux_kernel:"
114 |         },
115 |         "versions": [
116 |           "Kernel"
117 |         ],
118 |         "ecosystem_specific": {
119 |           "type": "EoP",
120 |           "severity": "Moderate",
121 |           "spl": "2022-12-05",
122 |           "fixes": [
123 |             "https://android.googlesource.com/kernel/common/+/bce1305c0ece3"
124 |           ]
125 |         },
126 |         "ranges": [
127 |           {
128 |             "type": "ECOSYSTEM",
129 |             "events": [
130 |               {
131 |                 "introduced": ":0"
132 |               },
133 |               {
134 |                 "fixed": ":2022-12-05"
135 |               }
136 |             ]
137 |           }
138 |         ]
139 |       }
140 |     ]
141 |   }
142 | ]
143 | 


--------------------------------------------------------------------------------
/vanir/testdata/test_vulnerabilities_platform.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "ASB-A-202768292",
 4 |     "modified": "2023-04-12T18:19:27Z",
 5 |     "published": "2022-01-01T00:00:00Z",
 6 |     "aliases": [
 7 |       "CVE-2021-39630"
 8 |     ],
 9 |     "summary": "[Security Issue] Inconsistent Root Permission Check for Fabricated Overlays",
10 |     "details": "In executeRequest of OverlayManagerService.java, there is a possible way to control fabricated overlays from adb shell due to a permissions bypass. This could lead to local escalation of privilege with no additional execution privileges needed. User interaction is not needed for exploitation.",
11 |     "references": [
12 |       {
13 |         "type": "ADVISORY",
14 |         "url": "https://source.android.com/security/bulletin/2022-01-01"
15 |       },
16 |       {
17 |         "type": "FIX",
18 |         "url": "https://android.googlesource.com/platform/frameworks/base/+/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a"
19 |       }
20 |     ],
21 |     "affected": [
22 |       {
23 |         "package": {
24 |           "ecosystem": "Android",
25 |           "name": "platform/frameworks/base"
26 |         },
27 |         "versions": [
28 |           "12"
29 |         ],
30 |         "ecosystem_specific": {
31 |           "type": "EoP",
32 |           "severity": "High",
33 |           "spl": "2022-01-01",
34 |           "fixes": [
35 |             "https://android.googlesource.com/platform/frameworks/base/+/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a"
36 |           ]
37 |         },
38 |         "ranges": [
39 |           {
40 |             "type": "ECOSYSTEM",
41 |             "events": [
42 |               {
43 |                 "introduced": "12:0"
44 |               },
45 |               {
46 |                 "fixed": "12:2022-01-01"
47 |               }
48 |             ]
49 |           }
50 |         ]
51 |       }
52 |     ]
53 |   }
54 | ]
55 | 


--------------------------------------------------------------------------------
/vanir/truncated_path.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Module implementing Truncated Path algorithm.
  8 | 
  9 | Truncated Path algorithm is designed to compute an applicability of Vanir
 10 | signatures against a given directory with toeralance on directory structure
 11 | changes. For a given path p, a truncated path of p with level-L is the modified
 12 | path preserving only L-th terminal directory names and the file name. For
 13 | instance, for a file path 'foo/bar/baz/qux.c', level-0, 1, 2 and 3 Truncated
 14 | Paths are 'qux.c', 'baz/qux.c', 'bar/baz/qux.c' and 'foo/bar/baz/qux.c'.
 15 | 
 16 | Truncated Path Match algorithm utilizes Truncated Paths of known target file
 17 | paths with empirically obtained levels to check if a given file matches each
 18 | signature's target file. For example, for a signature's target file
 19 | 'foo/bar/baz/qux.c' with truncated path level-1, if a scanned directory contains
 20 | 'guux/corge/grault/corge/baz/qux.c', Truncated Path algorithm will regard the
 21 | file as mactching since its level-1 Truncated Path matches the signature's
 22 | Truncated Path.
 23 | """
 24 | 
 25 | import collections
 26 | from collections.abc import Mapping, Sequence
 27 | import functools
 28 | import os
 29 | from typing import FrozenSet, Optional, Set
 30 | 
 31 | 
 32 | class PathLevelError(ValueError):
 33 |   """Raised when the Truncated Path level is improper."""
 34 | 
 35 | 
 36 | class TruncatedPath:
 37 |   """This class represents a Truncated Path."""
 38 | 
 39 |   @classmethod
 40 |   @functools.cache
 41 |   def _normalize_path(cls, file_path: str) -> Sequence[str]:
 42 |     normalized_path = os.path.normpath(file_path)
 43 |     return normalized_path.split(os.sep)
 44 | 
 45 |   @classmethod
 46 |   def is_level_ok(cls, file_path: str, level: int) -> bool:
 47 |     path_elements = cls._normalize_path(file_path)
 48 |     if level + 1 > len(path_elements) or level < 0:
 49 |       return False
 50 |     return True
 51 | 
 52 |   @classmethod
 53 |   def get_max_level(cls, file_path: str) -> int:
 54 |     """Returns the maximum truncated path level of the given file path."""
 55 |     path_elements = cls._normalize_path(file_path)
 56 |     return len(path_elements) - 1
 57 | 
 58 |   def __init__(self, file_path: str, level: int):
 59 |     """Instantiate a Truncated Path for given file path with given level.
 60 | 
 61 |     Args:
 62 |       file_path: a string path or TruncatedPath object to be computed.
 63 |       level: truncated path level.
 64 | 
 65 |     Returns:
 66 |       The truncated path.
 67 | 
 68 |     Raises:
 69 |       PathLevelError: if the given level is too large for the given path.
 70 |     """
 71 | 
 72 |     if not self.is_level_ok(file_path, level):
 73 |       raise PathLevelError(
 74 |           f'Given path {file_path} does not have enough directories or the'
 75 |           f' given level is negative (level: {level}).'
 76 |       )
 77 |     path_elements = self._normalize_path(file_path)
 78 |     self._path_elements = path_elements[-(level + 1) :]
 79 |     self._path = os.path.join(*(self._path_elements))
 80 | 
 81 |   def __str__(self):
 82 |     return self._path
 83 | 
 84 |   def __hash__(self):
 85 |     return hash(self._path)
 86 | 
 87 |   def __eq__(self, other: 'TruncatedPath'):
 88 |     return self._path_elements == other._path_elements
 89 | 
 90 |   @functools.cached_property
 91 |   def level(self):
 92 |     return len(self._path_elements) - 1
 93 | 
 94 |   def truncate(self, level: int) -> Optional['TruncatedPath']:
 95 |     """Truncates the given Truncated Path with the new level."""
 96 |     return TruncatedPath(self._path, level)
 97 | 
 98 | 
 99 | class MinLevelUniqueTruncatedPathFinder:
100 |   """Finds min levels of uniquely identifiable Truncated Paths.
101 | 
102 |   This class maintains a list of file paths representing a system, and finds
103 |   a minimum level of unique Truncated Path of the given file among all Truncated
104 |   Paths of the given reference file list.
105 |   """
106 | 
107 |   def __init__(self, ref_file_list: Sequence[str]):
108 |     """Initializes with the full list of files representing a system.
109 | 
110 |     Args:
111 |       ref_file_list: the full list of files representing a system. Each file is
112 |         supposed to be in a relative path format as follows:
113 |         ['mm/backing-dev.c', 'mm/balloon_compaction.c', ...]
114 |     """
115 |     self._ref_file_list = ref_file_list
116 | 
117 |   @functools.lru_cache(128)
118 |   def _get_ref_truncated_path_counter(
119 |       self, level: int
120 |   ) -> Mapping[TruncatedPath, int]:
121 |     """Returns Truncated Path to matched file counter map."""
122 |     ref_truncated_path_counter = collections.defaultdict(int)
123 |     for file_path in self._ref_file_list:
124 |       if not TruncatedPath.is_level_ok(file_path, level):
125 |         continue
126 |       tp = TruncatedPath(file_path, level)
127 |       ref_truncated_path_counter[tp] += 1
128 |     return ref_truncated_path_counter
129 | 
130 |   def find(self, file_path: str) -> Optional[TruncatedPath]:
131 |     """Returns the min level of uniquely identifiable TP for the given file.
132 | 
133 |     Args:
134 |       file_path: a relative path of a file.
135 | 
136 |     Returns:
137 |       Minimum level Truncated Path making the path uniquely identifiable among
138 |       the reference files. Returns None if the given path has no unique
139 |       truncated path at any level.
140 |     """
141 |     for level in range(0, TruncatedPath.get_max_level(file_path) + 1):
142 |       tp = TruncatedPath(file_path, level)
143 |       ref_tp_counters = self._get_ref_truncated_path_counter(level)
144 |       if ref_tp_counters.get(tp, 0) <= 1:  # Unique if counter is 0 or 1.
145 |         return tp
146 |     return None
147 | 
148 | 
149 | @functools.cache
150 | def _get_levels(truncated_path_set: FrozenSet[TruncatedPath]) -> Set[int]:
151 |   return {tp.level for tp in truncated_path_set}
152 | 
153 | 
154 | def check_inclusion(
155 |     truncated_path_set: Set[TruncatedPath], file_path: str
156 | ) -> bool:
157 |   """Check inclusion of a file path in a set of Truncated Path.
158 | 
159 |   Args:
160 |     truncated_path_set: a set of truncated path.
161 |     file_path: a file path to be checked against the given truncated path set.
162 | 
163 |   Returns:
164 |     Returns True if any level of truncated path of the given file path is
165 |     included the given Truncated Path Set. Returns False, otherwise.
166 |   """
167 |   levels = _get_levels(frozenset(truncated_path_set))
168 |   for level in levels:
169 |     if not TruncatedPath.is_level_ok(file_path, level):
170 |       continue
171 |     tp = TruncatedPath(file_path, level)
172 |     if tp in truncated_path_set:
173 |       return True
174 |   return False
175 | 
176 | 
177 | def check_inclusion_rate_of_truncated_paths_in_file_list(
178 |     truncated_path_set: FrozenSet[TruncatedPath], file_list: Sequence[str]
179 | ) -> float:
180 |   """Check inclusion rate of a list of Truncated Paths in the list of files.
181 | 
182 |   Args:
183 |     truncated_path_set: a set of truncated path.
184 |     file_list: a list of file paths.
185 | 
186 |   Returns:
187 |     Returns the truncated path inclusion rate of |truncated_path_set| in
188 |     |file_list|.
189 |   """
190 |   file_tps = set()
191 |   levels = _get_levels(frozenset(truncated_path_set))
192 |   for level in levels:
193 |     for file_path in file_list:
194 |       if not TruncatedPath.is_level_ok(file_path, level):
195 |         continue
196 |       file_tps.add(TruncatedPath(file_path, level))
197 |   intersection = file_tps.intersection(truncated_path_set)
198 |   return len(intersection) / len(truncated_path_set)
199 | 


--------------------------------------------------------------------------------
/vanir/truncated_path_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Test for Truncated Path module."""
  8 | 
  9 | from vanir import truncated_path
 10 | from absl.testing import absltest
 11 | 
 12 | 
 13 | class TruncatedPathTest(absltest.TestCase):
 14 | 
 15 |   def test_truncate_file_path(self):
 16 |     tp = truncated_path.TruncatedPath('foo/bar/baz', level=0)
 17 |     self.assertEqual(str(tp), 'baz')
 18 | 
 19 |     tp = truncated_path.TruncatedPath('foo/bar/baz', level=2)
 20 |     self.assertEqual(str(tp), 'foo/bar/baz')
 21 | 
 22 |     with self.assertRaises(truncated_path.PathLevelError):
 23 |       _ = truncated_path.TruncatedPath('foo/bar/baz', level=3)
 24 | 
 25 |     with self.assertRaises(truncated_path.PathLevelError):
 26 |       _ = truncated_path.TruncatedPath('foo/bar/baz', level=-1)
 27 | 
 28 |     tp = tp.truncate(level=1)
 29 |     self.assertEqual(str(tp), 'bar/baz')
 30 | 
 31 |   def test_check_inclusion(self):
 32 |     tp_set = {
 33 |         truncated_path.TruncatedPath(file_path, level)
 34 |         for file_path, level in [
 35 |             ('foo/bar/baz', 2),
 36 |             ('qux/quux/corge', 2),
 37 |             ('garply/waldo/fred', 2),
 38 |             ('fred/plugh/xyzzy/thud', 3),
 39 |         ]
 40 |     }
 41 |     self.assertTrue(truncated_path.check_inclusion(tp_set, 'foo/bar/baz'))
 42 |     self.assertTrue(
 43 |         truncated_path.check_inclusion(
 44 |             tp_set, 'some/additional/prefix/directories/foo/bar/baz'
 45 |         )
 46 |     )
 47 |     self.assertFalse(truncated_path.check_inclusion(tp_set, 'bar/baz'))
 48 | 
 49 |   def test_check_inclusion_rate_of_truncated_paths_in_file_list(self):
 50 |     tp_set = {
 51 |         truncated_path.TruncatedPath(file_path, level)
 52 |         for file_path, level in [
 53 |             ('foo/bar/baz', 2),
 54 |             ('qux/quux/corge', 2),
 55 |             ('garply/waldo/fred', 2),
 56 |             ('fred/plugh/xyzzy/thud', 3),
 57 |         ]
 58 |     }
 59 |     file_list = [
 60 |         '1/2/3/4/foo/bar/baz',
 61 |         '1/2/3/qux/quux/corge',
 62 |         '1/2/3/4/5/6/qux/quux/corge',  # duplicate
 63 |         '1/2/3/4/5/unrelated_file1',
 64 |         '1/2/3/4/unrelated_file2',
 65 |         '1/2/3/unrelated_file3',
 66 |         '1/2/unrelated_file5',
 67 |     ]
 68 |     rate = truncated_path.check_inclusion_rate_of_truncated_paths_in_file_list(
 69 |         tp_set, file_list
 70 |     )
 71 |     self.assertEqual(rate, 0.5)
 72 | 
 73 |     file_list += [
 74 |         '1/2/3/4/garply/waldo/fred',
 75 |         '1/2/3/4/5/6/7/8/fred/plugh/xyzzy/thud',
 76 |     ]
 77 |     rate = truncated_path.check_inclusion_rate_of_truncated_paths_in_file_list(
 78 |         tp_set, file_list
 79 |     )
 80 |     self.assertEqual(rate, 1)
 81 | 
 82 |   def test_min_level_unique_tp_finder(self):
 83 |     ref_flie_list = [
 84 |         '1/2/3/4/foo/bar/baz',
 85 |         '1/2/3/qux/quux/corge',
 86 |         '1/2/3/4/5/6/qux/quux/corge',  # duplicate
 87 |         '1/2/3/grault1/garply/waldo',
 88 |         '4/5/6/grault2/garply/waldo',  # diverging at the max_level.
 89 |         '1/2/3/4/5/unrelated_file1',
 90 |         '1/2/3/4/unrelated_file2',
 91 |         '1/2/3/unrelated_file3',
 92 |         '1/2/unrelated_file5',
 93 |     ]
 94 |     finder = truncated_path.MinLevelUniqueTruncatedPathFinder(ref_flie_list)
 95 | 
 96 |     test_file = 'foo/bar/baz'
 97 |     found = finder.find(test_file)
 98 |     expected = truncated_path.TruncatedPath('baz', level=0)
 99 |     self.assertEqual(found, expected)
100 | 
101 |     test_file = 'very/unique/quux/corge'
102 |     found = finder.find(test_file)
103 |     expected = truncated_path.TruncatedPath('unique/quux/corge', level=2)
104 |     self.assertEqual(found, expected)
105 | 
106 |     test_file = 'grault1/garply/waldo'
107 |     found = finder.find(test_file)
108 |     expected = truncated_path.TruncatedPath('grault1/garply/waldo', level=2)
109 |     self.assertEqual(found, expected)
110 | 
111 |     test_file = 'qux/quux/corge'
112 |     found = finder.find(test_file)
113 |     self.assertIsNone(found)
114 | 
115 | 
116 | if __name__ == '__main__':
117 |   absltest.main()
118 | 


--------------------------------------------------------------------------------
/vanir/vanir_test_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | """Vanir test base module.
 8 | 
 9 | This module contains the base class of Vanir tests that includes utility
10 | methods and constants for internal tests.
11 | """
12 | 
13 | from collections import abc
14 | import contextlib
15 | import time
16 | 
17 | from absl import logging
18 | from absl.testing import absltest
19 | 
20 | 
21 | class VanirTestBase(absltest.TestCase):
22 |   """Vanir test base class containing common utility methods for tests."""
23 | 
24 |   @contextlib.contextmanager
25 |   def runtime_reporter(self, name: str) -> abc.Generator[None, None, None]:
26 |     start = time.monotonic()
27 |     try:
28 |       yield
29 |     finally:
30 |       elapsed = time.monotonic() - start
31 |       logging.info('runtime::%s %d seconds', name, elapsed)
32 |       pass
33 | 


--------------------------------------------------------------------------------
/vanir/version_extractor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """This module extracts the version of the target system.
  8 | 
  9 | Version extractor is to extract version numbers of the scanned target system.
 10 | Note that the version number should not be regarded as a reliable source of
 11 | truth of the immunity against certain vulnerabilities since it can be simply
 12 | modified without actual code patching.
 13 | """
 14 | 
 15 | import abc
 16 | import collections
 17 | import collections.abc
 18 | import enum
 19 | import os
 20 | import re
 21 | from typing import Optional, Sequence
 22 | 
 23 | from absl import logging
 24 | 
 25 | 
 26 | @enum.unique
 27 | class TargetSystem(str, enum.Enum):
 28 |   UNKNOWN = 'unknown'
 29 |   KERNEL = 'kernel'
 30 | 
 31 | 
 32 | def get_target_version_files(
 33 |     target_system_type: Optional[TargetSystem] = None,
 34 | ) -> Sequence[str]:
 35 |   """Returns a list of files containing version information.
 36 | 
 37 |   Args:
 38 |     target_system_type: optional arg for specifying the type of the target
 39 |       system. If None, try all known target systems to scan the version string.
 40 | 
 41 |   Returns a sequence of files containing version information.
 42 |   """
 43 |   target_version_files = []
 44 |   for cls in VersionExtractor.__subclasses__():
 45 |     if target_system_type and target_system_type != cls.get_target_system():
 46 |       continue
 47 |     target_version_files += cls.get_version_files()
 48 |   return target_version_files
 49 | 
 50 | 
 51 | def extract_version(
 52 |     target_root: str, target_system_type: Optional[TargetSystem] = None
 53 | ) -> Optional[str]:
 54 |   """Returns the version string of the target system.
 55 | 
 56 |   Args:
 57 |     target_root: the absolute path to the root of the target system.
 58 |     target_system_type: optional arg for specifying the type of the target
 59 |       system. If None, try all known target systems to scan the version string.
 60 | 
 61 |   Raises:
 62 |     RuntimeError: if the target system contains more than one version.
 63 | 
 64 |   Returns:
 65 |     The version string extracted if found. Returns None otherwise.
 66 |   """
 67 |   versions = {}
 68 |   for cls in VersionExtractor.__subclasses__():
 69 |     if target_system_type and target_system_type != cls.get_target_system():
 70 |       continue
 71 |     ver = cls.extract_version(target_root)
 72 |     if ver:
 73 |       versions[cls] = ver
 74 |   if len(versions) > 1:
 75 |     raise RuntimeError(
 76 |         'Multiple versions were found from the target root: %s' % versions
 77 |     )
 78 |   if len(versions) < 1:
 79 |     return None
 80 |   matched_extractor = list(versions.keys())[0]
 81 |   target_system = matched_extractor.get_target_system()
 82 |   version = versions[matched_extractor]
 83 |   logging.info(
 84 |       'Extracted version: %s (Target system is recoganized as %s)',
 85 |       version,
 86 |       target_system.value,
 87 |   )
 88 |   return version
 89 | 
 90 | 
 91 | class VersionExtractor(metaclass=abc.ABCMeta):
 92 |   """Abstract class for version extractor classes."""
 93 | 
 94 |   @classmethod
 95 |   @abc.abstractmethod
 96 |   def get_version_files(cls) -> Sequence[str]:
 97 |     """Returns a list of files that may contain version information."""
 98 | 
 99 |   @classmethod
100 |   @abc.abstractmethod
101 |   def get_target_system(cls) -> TargetSystem:
102 |     """Returns the target system of the class."""
103 | 
104 |   @classmethod
105 |   @abc.abstractmethod
106 |   def extract_version(cls, target_root: str) -> Optional[str]:
107 |     """Extracts the version string from the target system."""
108 | 
109 | 
110 | class KernelVersionExtractor(VersionExtractor):
111 |   """Class to extract Linux kernel version."""
112 | 
113 |   @classmethod
114 |   def get_version_files(cls) -> Sequence[str]:
115 |     return [cls._get_version_file()]
116 | 
117 |   @classmethod
118 |   def get_target_system(cls) -> TargetSystem:
119 |     return TargetSystem.KERNEL
120 | 
121 |   @classmethod
122 |   def _get_version_file(cls) -> str:
123 |     return 'Makefile'
124 | 
125 |   @classmethod
126 |   def extract_version(cls, target_root: str) -> Optional[str]:
127 |     target_root = os.path.abspath(target_root)
128 |     if not os.path.isdir(target_root):
129 |       raise ValueError('Invalid directory: %s' % target_root)
130 | 
131 |     version_file_path = os.path.join(target_root, cls._get_version_file())
132 |     if not os.path.isfile(version_file_path):
133 |       return None
134 | 
135 |     with open(version_file_path) as vfile:
136 |       return cls._parse_makefile(vfile.read())
137 | 
138 |   @classmethod
139 |   def _parse_makefile(cls, makefile_content: str) -> str:
140 |     """Parses the given Makefile line and returns kernel version.
141 | 
142 |     Args:
143 |       makefile_content: the content of the Makefile in the string format.
144 | 
145 |     Returns:
146 |       Kernel version string if version info is found. Empty string otherwise.
147 |     """
148 |     patterns = {
149 |         'version': r'^VERSION\s?=\s?(?P<version>[0-9]*)\s*$',
150 |         'patchlevel': r'^PATCHLEVEL\s?=\s?(?P<patchlevel>[0-9]*)\s*$',
151 |         'sublevel': r'^SUBLEVEL\s?=\s?(?P<sublevel>[0-9]*)\s*$',
152 |         'extraversion': r'^EXTRAVERSION\s?=\s?(?P<extraversion>\S*)\s*$',
153 |     }
154 |     version_info = collections.defaultdict(lambda: '')
155 |     delimiters = collections.defaultdict(lambda: '')
156 |     delimiters['patchlevel'] = '.'
157 |     delimiters['sublevel'] = '.'
158 | 
159 |     for name, pattern in patterns.items():
160 |       match = re.search(pattern, makefile_content, flags=re.MULTILINE)
161 |       if not match:
162 |         continue
163 |       version_info[name] = match.group(name)
164 | 
165 |     kernelversion = ''
166 |     ordered_names = ['version', 'patchlevel', 'sublevel', 'extraversion']
167 | 
168 |     for name in ordered_names:
169 |       if not version_info[name]:
170 |         return kernelversion
171 |       kernelversion += '%s%s' % (delimiters[name], version_info[name])
172 |     return kernelversion
173 | 


--------------------------------------------------------------------------------
/vanir/version_extractor_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | """Tests for version extractor."""
  8 | 
  9 | import os
 10 | from unittest import mock
 11 | 
 12 | from vanir import version_extractor
 13 | 
 14 | from absl.testing import absltest
 15 | from absl.testing import parameterized
 16 | 
 17 | 
 18 | _MAKEFILE_FULL_VERSION = """
 19 | VERSION = 10
 20 | PATCHLEVEL = 20
 21 | SUBLEVEL = 30
 22 | EXTRAVERSION = -special
 23 | """
 24 | 
 25 | _MAKEFILE_PARTIAL_VERSION = """
 26 | VERSION = 10
 27 | PATCHLEVEL = 20
 28 | """
 29 | 
 30 | _MAKEFILE_NO_VERSION = """
 31 | PATCHLEVEL = 20
 32 | SUBLEVEL = 30
 33 | EXTRAVERSION = -special
 34 | """
 35 | 
 36 | 
 37 | class VersionExtractorTest(parameterized.TestCase):
 38 | 
 39 |   @parameterized.named_parameters(
 40 |       ('without_target_system_type', None, ['Makefile']),
 41 |       (
 42 |           'with_target_system_type',
 43 |           version_extractor.TargetSystem.KERNEL,
 44 |           ['Makefile'],
 45 |       ),
 46 |       (
 47 |           'with_useless_target_system_type',
 48 |           version_extractor.TargetSystem.UNKNOWN,
 49 |           [],
 50 |       ),
 51 |   )
 52 |   def test_get_target_version_files(self, target_system_type, expected_files):
 53 |     files = version_extractor.get_target_version_files(target_system_type)
 54 |     self.assertCountEqual(files, expected_files)
 55 | 
 56 |   @parameterized.named_parameters(
 57 |       (
 58 |           'no_target_system_type',
 59 |           None,
 60 |           '10.20.30-special',
 61 |       ),
 62 |       (
 63 |           'matched_target_system_type',
 64 |           version_extractor.TargetSystem.KERNEL,
 65 |           '10.20.30-special',
 66 |       ),
 67 |       (
 68 |           'unmatched_target_system_type',
 69 |           version_extractor.TargetSystem.UNKNOWN,
 70 |           None,
 71 |       ),
 72 |   )
 73 |   def test_extract_version(self, target_system_type, expected_version):
 74 |     test_root = self.create_tempdir().full_path
 75 |     self.create_tempfile(
 76 |         os.path.join(test_root, 'Makefile'), content=_MAKEFILE_FULL_VERSION
 77 |     )
 78 |     version = version_extractor.extract_version(test_root, target_system_type)
 79 |     self.assertEqual(version, expected_version)
 80 | 
 81 |   def test_extract_version_fails_if_multiple_versions_found(self):
 82 |     class MockVersionExtractor:
 83 |       """A mock version extractor class.
 84 | 
 85 |       To avoid auto-registeration of this mock class through __subclasses__() in
 86 |       other tests, this mock class intentionally does not inherit from
 87 |       VersionExtractor.
 88 |       """
 89 | 
 90 |       @classmethod
 91 |       def get_version_files(cls):
 92 |         return ['foo']
 93 | 
 94 |       @classmethod
 95 |       def get_target_system(cls):
 96 |         return version_extractor.TargetSystem.KERNEL
 97 | 
 98 |       @classmethod
 99 |       def extract_version(cls, _):
100 |         return '1234.5678'
101 | 
102 |     test_root = self.create_tempdir().full_path
103 |     self.create_tempfile(
104 |         os.path.join(test_root, 'Makefile'), content=_MAKEFILE_FULL_VERSION
105 |     )
106 |     self.create_tempfile(os.path.join(test_root, 'foo'))
107 |     with mock.patch.object(
108 |         version_extractor.VersionExtractor,
109 |         '__subclasses__',
110 |         autospec=True,
111 |         return_value=[
112 |             version_extractor.KernelVersionExtractor,
113 |             MockVersionExtractor,
114 |         ],
115 |     ):
116 |       expected_error_msg = 'Multiple versions were found from the target root:'
117 |       with self.assertRaisesRegex(RuntimeError, expected_error_msg):
118 |         version_extractor.extract_version(test_root)
119 | 
120 |   @parameterized.named_parameters(
121 |       ('full_version', _MAKEFILE_FULL_VERSION, '10.20.30-special'),
122 |       ('partial_version', _MAKEFILE_PARTIAL_VERSION, '10.20'),
123 |       ('no_version', _MAKEFILE_NO_VERSION, ''),
124 |   )
125 |   def test_kernel_version_extractor_parse_makefile(
126 |       self, makefile_content, expected_kernelversion
127 |   ):
128 |     kernelversion = version_extractor.KernelVersionExtractor._parse_makefile(
129 |         makefile_content
130 |     )
131 |     self.assertEqual(kernelversion, expected_kernelversion)
132 | 
133 |   def test_kernel_version_extractor_fails_if_target_root_is_invalid(self):
134 |     with self.assertRaisesRegex(ValueError, 'Invalid directory:'):
135 |       version_extractor.KernelVersionExtractor.extract_version(
136 |           'nonexisting_dir'
137 |       )
138 | 
139 |   def test_kernel_version_extractor_returns_none_if_makefile_not_exist(self):
140 |     test_root = self.create_tempdir().full_path
141 |     self.assertIsNone(
142 |         version_extractor.KernelVersionExtractor.extract_version(test_root)
143 |     )
144 | 
145 | 
146 | if __name__ == '__main__':
147 |   absltest.main()
148 | 


--------------------------------------------------------------------------------
/vanir/vulnerability_overwriter.py:
--------------------------------------------------------------------------------
  1 | """Contains classes and functions to overwrite OSV vulnerability data.
  2 | 
  3 | In certain scenarios, you may want to overwrite vulnerability information. For
  4 | example, a severity may be critical for Android phones, but on a smart camera
  5 | that doesn't support third-party apps, it might be NSI. The OSV schema allows
  6 | you to specify severity per ecosystem, but in some cases, you may need even
  7 | further granularity. This is the scenario where you need to overwrite
  8 | vulnerability information so that it is reported with the correct severity for
  9 | each device.
 10 | 
 11 | The overwriting happens after signatures are loaded and before they are
 12 | passed to a scanner. The changes happen in memory and are not persisted. They
 13 | are applied to a vulnerability JSON object.
 14 | 
 15 | 'detector_runner' supports a flag '--overwrite_specs' that takes a path to a
 16 | JSON file containing a list of OverwriteSpec objects. Each spec defines a
 17 | vulnerability ID and one or more paths to field using basic JSONPath syntax
 18 | and values to be set.
 19 | 
 20 | Users can validate their overwrite specifications by running the
 21 | 'overwrite_specs_validity_test'.
 22 | """
 23 | 
 24 | import dataclasses
 25 | import json
 26 | from typing import Any, Sequence, Mapping
 27 | import jsonpath_rw
 28 | 
 29 | 
 30 | @dataclasses.dataclass(frozen=True)
 31 | class Replace:
 32 |   """Represents a replacement operation on a OSV vulnerability JSON object.
 33 | 
 34 |   Attributes:
 35 |     value: The value to replace with.
 36 |     path: This field specifies the JSONPath expression to locate the field to be
 37 |       replaced. The expression should yield a single field, wildcards are
 38 |       supported for modifying arrays in a single operation (e.g.,
 39 |       affected[*].ecosystem_specific.severity). It is applied per vulnerability,
 40 |       not file-wide.
 41 |   """
 42 | 
 43 |   value: str
 44 |   path: jsonpath_rw.jsonpath.JSONPath
 45 | 
 46 | 
 47 | @dataclasses.dataclass(frozen=True)
 48 | class OverwriteSpec:
 49 |   """Specifies how to overwrite vulnerability data in a JSON object.
 50 | 
 51 |   Attributes:
 52 |     vuln_id: The ID of the vulnerability to overwrite. Should match exactly the
 53 |       'id' field in the OSV vulnerability.
 54 |     replace: A sequence of replace operations to apply to the vulnerability.
 55 |     reason: A mandatory explanation of the reason for the overwrite.
 56 |   """
 57 | 
 58 |   vuln_id: str
 59 |   replace: Sequence[Replace]
 60 |   reason: str
 61 | 
 62 | 
 63 | def _parse_replace(
 64 |     raw_replace: Mapping[str, Any],
 65 |     vuln_id: str,
 66 | ) -> Replace:
 67 |   """Parses and validates a replace operation from a raw JSON object."""
 68 | 
 69 |   if 'value' not in raw_replace:
 70 |     raise ValueError(
 71 |         'Value is required for each replace operation even if it is empty,'
 72 |         f' check spec for {vuln_id}'
 73 |     )
 74 | 
 75 |   if not raw_replace.get('path'):
 76 |     raise ValueError(
 77 |         'JSON path is required for each replace operation, check spec for'
 78 |         f' {vuln_id}'
 79 |     )
 80 |   try:
 81 |     json_path = jsonpath_rw.parser.parse(raw_replace['path'])
 82 |     return Replace(
 83 |         value=raw_replace['value'],
 84 |         path=json_path,
 85 |     )
 86 |   except Exception as e:
 87 |     raise ValueError(
 88 |         f'Invalid JSON path in replace operation, check spec for {vuln_id}: {e}'
 89 |     ) from e
 90 | 
 91 | 
 92 | def load_overwrite_specs_from_json(
 93 |     json_content: str,
 94 | ) -> Sequence[OverwriteSpec]:
 95 |   """Loads overwrite specs from a JSON string.
 96 | 
 97 |   Args:
 98 |     json_content: JSON string containing a list of overwrite specs in a JSON
 99 |       format.
100 | 
101 |   Returns:
102 |     Sequence of OverwriteSpec objects.
103 | 
104 |   Raises:
105 |     ValueError: If the JSON string is invalid or if any of the overwrite specs
106 |       is missing required fields.
107 |   """
108 | 
109 |   raw_overwriters_info = json.loads(json_content)
110 |   overwrite_specs = []
111 |   for raw_spec in raw_overwriters_info:
112 |     if not raw_spec.get('vuln_id'):
113 |       raise ValueError(
114 |           f'Vulnerability ID is required for each overwriter: {raw_spec}'
115 |       )
116 |     if not raw_spec.get('reason'):
117 |       raise ValueError(
118 |           'Reason is required for each overwriter, vuln_id:'
119 |           f' {raw_spec.get("vuln_id")}'
120 |       )
121 | 
122 |     if not raw_spec.get('replace'):
123 |       raise ValueError(
124 |           'Replace operations are not provided, vuln_id:'
125 |           f' {raw_spec.get("vuln_id")}'
126 |       )
127 | 
128 |     overwrite_specs.append(
129 |         OverwriteSpec(
130 |             vuln_id=raw_spec['vuln_id'],
131 |             replace=[
132 |                 _parse_replace(raw_replace, raw_spec['vuln_id'])
133 |                 for raw_replace in raw_spec['replace']
134 |             ],
135 |             reason=raw_spec['reason'],
136 |         )
137 |     )
138 |   return overwrite_specs
139 | 
140 | 
141 | def load_overwrite_specs_from_file(file_path: str) -> Sequence[OverwriteSpec]:
142 |   """Loads overwrite specs from a JSON file.
143 | 
144 |   See an example of a valid overwrite spec file in the
145 |   testdata/test_overwrite_specs.json file.
146 | 
147 |   Args:
148 |     file_path: Path to the JSON file containing overwrite specs.
149 | 
150 |   Returns:
151 |     Sequence of OverwriteSpec objects.
152 | 
153 |   Raises:
154 |     ValueError: If the JSON file is invalid or if any of the overwrite specs
155 |       is missing required fields.
156 |   """
157 | 
158 |   with open(file_path, 'r') as f:
159 |     return load_overwrite_specs_from_json(f.read())
160 | 
161 | 
162 | def overwrite(
163 |     vulnerabilities: Sequence[dict[str, Any]],
164 |     overwrite_specs: Sequence[OverwriteSpec],
165 | ):
166 |   """Applies overwrite specs to a list of vulnerabilities.
167 | 
168 |   This function works as following. First it creates a map of overwriter specs
169 |   by their vuln_id to quickly find the spec for a given vulnerability. Then it
170 |   iterates over the vulnerabilities and for each one it finds the
171 |   corresponding spec (if any) and applies the replace operations on it.
172 | 
173 |   The replace operations rely on JSONPath to find the fields to modify.
174 |   JSONPath may return multiple matches for a given Replace operation. In this
175 |   case all the matches are modified.
176 | 
177 |   Args:
178 |     vulnerabilities: Sequence of vulnerabilities in OSV schema format. The
179 |     function can modify these objects by applying overwrite specs.
180 |     overwrite_specs: Sequence of OverwriteSpec objects to apply.
181 |   """
182 |   if not overwrite_specs:
183 |     return
184 |   overwrite_spec_map = {spec.vuln_id: spec for spec in overwrite_specs}
185 |   for vulnerability in vulnerabilities:
186 |     vulnerability_id = vulnerability['id']
187 |     if vulnerability_id in overwrite_spec_map:
188 |       spec = overwrite_spec_map[vulnerability_id]
189 |       for replace in spec.replace:
190 |         replace.path.update(vulnerability, replace.value)
191 | 


--------------------------------------------------------------------------------