├── .bazelrc ├── .bazelversion ├── BUILD.bazel ├── CONTRIBUTING.md ├── LICENSE ├── MODULE.bazel ├── README.md ├── WORKSPACE.bazel ├── docs └── images │ ├── vanir_detector_report.png │ ├── vanir_macro_arch.png │ └── vanir_micro_arch.png ├── extensions.bzl ├── repositories.bzl ├── requirements.txt ├── requirements_antlr4.txt ├── requirements_antlr4_lock.txt ├── requirements_lock.txt └── vanir ├── cache ├── BUILD.bazel └── ecosystem_file_lists.json ├── code_extractors ├── BUILD.bazel ├── code_extractor.py ├── code_extractor_android.py ├── code_extractor_android_test.py ├── code_extractor_base.py └── code_extractor_test.py ├── detector_common_flags.py ├── detector_common_flags_test.py ├── detector_runner.py ├── detector_runner_test.py ├── file_list_manager.py ├── file_list_manager_test.py ├── hasher.py ├── hasher_test.py ├── integration_tests ├── BUILD.bazel └── missing_patch_detection_hermetic_test.py ├── language_parsers ├── BUILD.bazel ├── abstract_language_parser.py ├── common.py ├── cpp │ ├── BUILD.bazel │ ├── cpp_parser.py │ ├── cpp_parser_test.py │ ├── parser_core.cc │ ├── parser_core.h │ └── python │ │ ├── BUILD.bazel │ │ └── parser_core.cc ├── java │ ├── BUILD.bazel │ ├── antlr4.external.bzl │ ├── java_parser.py │ ├── java_parser_test.py │ ├── parser_core.cc │ ├── parser_core.h │ └── python │ │ ├── BUILD.bazel │ │ └── parser_core.cc └── language_parsers.py ├── normalizer.py ├── normalizer_test.py ├── osv_client.py ├── overwrite_specs_validity_test.py ├── parser.py ├── parser_test.py ├── refiner.py ├── refiner_test.py ├── reporter.py ├── reporter_test.py ├── scanners ├── BUILD.bazel ├── android_kernel_scanner.py ├── android_kernel_scanner_test.py ├── offline_directory_scanner.py ├── offline_directory_scanner_test.py ├── package_identifier.py ├── package_identifier_test.py ├── package_scanner.py ├── package_scanner_test.py ├── repo_scanner.py ├── repo_scanner_test.py ├── scanner_base.py ├── scanner_base_test.py ├── target_selection_strategy.py └── target_selection_strategy_test.py ├── sign_generator.py ├── sign_generator_runner.py ├── sign_generator_runner_test.py ├── sign_generator_test.py ├── signature.py ├── signature_test.py ├── testdata ├── BUILD.bazel ├── gitiles │ ├── b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patch.base64 │ ├── b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patchinfo.base64 │ ├── b2dc041a4e84986e3a6932b127d3a18ef02b6d0a_services_core_java_com_android_server_om_OverlayManagerService.java.base64 │ └── c3c9ada6e5d946ce7d224649f1d6528ce80b1a24_services_core_java_com_android_server_om_OverlayManagerService.java.base64 ├── test_frameworks_base.tar.gz ├── test_overwrite_specs.json ├── test_patch_file ├── test_patched_file ├── test_signatures.json ├── test_signatures.py ├── test_signatures.zip ├── test_unpatched_file ├── test_unrelated_file ├── test_vulnerabilities_kernel.json └── test_vulnerabilities_platform.json ├── truncated_path.py ├── truncated_path_test.py ├── vanir_test_base.py ├── version_extractor.py ├── version_extractor_test.py ├── vulnerability.py ├── vulnerability_manager.py ├── vulnerability_manager_test.py ├── vulnerability_overwriter.py ├── vulnerability_overwriter_test.py └── vulnerability_test.py /.bazelrc: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # The following file specifies the Bazel configuration. Currently, Vanir is 8 | # only tested with Bazel >= 6.0.0. For Bazel >= 7.2.0, Vanir runs 9 | # without any changes in this file. For earlier versions, following 10 | # configuration changes are needed. 11 | 12 | # In order to run on Bazel <= 7.1.0, following line needs to be 13 | # commented as bzlmod build does not work on Bazel 7.1 and earlier. 14 | 15 | common --enable_workspace=False 16 | 17 | # Additionally, for Bazel 7.1.0 and 7.0.0, following line needs 18 | # to be uncommented. 19 | 20 | # common --enable_bzlmod=False 21 | 22 | build --cxxopt=-std=c++17 --cxxopt=-fexceptions --cxxopt=-Wno-nonnull --cxxopt=-Wno-sign-compare --cxxopt=-Wno-parentheses --cxxopt=-Wno-deprecated-declarations 23 | -------------------------------------------------------------------------------- /.bazelversion: -------------------------------------------------------------------------------- 1 | 8.1.1 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | We'd love to accept your patches and contributions to this project. 4 | 5 | ## Before you begin 6 | 7 | ### Sign our Contributor License Agreement 8 | 9 | Contributions to this project must be accompanied by a 10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). 11 | You (or your employer) retain the copyright to your contribution; this simply 12 | gives us permission to use and redistribute your contributions as part of the 13 | project. 14 | 15 | If you or your current employer have already signed the Google CLA (even if it 16 | was for a different project), you probably don't need to do it again. 17 | 18 | Visit to see your current agreements or to 19 | sign a new one. 20 | 21 | ### Review our community guidelines 22 | 23 | This project follows 24 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 25 | 26 | ## Contribution process 27 | 28 | ### Code reviews 29 | 30 | All submissions, including submissions by project members, require review. We 31 | use GitHub pull requests for this purpose. Consult 32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 33 | information on using pull requests. 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Google LLC 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google LLC nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir build and test dependencies.""" 8 | 9 | module(name = "vanir") 10 | 11 | bazel_dep(name = "rules_python", version = "1.1.0") 12 | bazel_dep(name = "platforms", version = "0.0.11") 13 | bazel_dep(name = "abseil-cpp", version = "20250127.0", repo_name = "com_google_absl") 14 | bazel_dep(name = "pybind11_bazel", version = "2.13.6") 15 | bazel_dep(name = "pybind11_abseil", version = "202402.0") 16 | bazel_dep(name = "rules_proto", version = "7.1.0") 17 | bazel_dep(name = "protobuf", version = "29.3", repo_name = "com_google_protobuf") 18 | 19 | python = use_extension("@rules_python//python/extensions:python.bzl", "python") 20 | python.toolchain( 21 | is_default = True, 22 | python_version = "3.9" 23 | ) 24 | 25 | pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") 26 | pip.parse( 27 | hub_name = "vanir_deps", 28 | python_version = "3.9", 29 | requirements_lock = "//:requirements_lock.txt", 30 | ) 31 | use_repo(pip, "vanir_deps") 32 | 33 | pip.parse( 34 | hub_name = "antlr4_deps", 35 | python_version = "3.9", 36 | requirements_lock = "//:requirements_antlr4_lock.txt", 37 | ) 38 | use_repo(pip, "antlr4_deps") 39 | 40 | antlr4_grammar_java_lexer_g4_extension = use_extension("//:extensions.bzl", "antlr4_grammar_java_lexer_g4_extension") 41 | use_repo(antlr4_grammar_java_lexer_g4_extension, "antlr4_grammar_java_lexer_g4") 42 | 43 | antlr4_grammar_java_parser_g4_extension = use_extension("//:extensions.bzl", "antlr4_grammar_java_parser_g4_extension") 44 | use_repo(antlr4_grammar_java_parser_g4_extension, "antlr4_grammar_java_parser_g4") 45 | 46 | antlr4_runtimes_extension = use_extension("//:extensions.bzl", "antlr4_runtimes_extension") 47 | use_repo(antlr4_runtimes_extension, "antlr4_runtimes") 48 | 49 | fuzzyc_extension = use_extension("//:extensions.bzl", "fuzzyc_extension") 50 | use_repo(fuzzyc_extension, "fuzzyc") 51 | 52 | com_google_osv_extension = use_extension("//:extensions.bzl", "com_google_osv_extension") 53 | use_repo(com_google_osv_extension, "com_google_osv") 54 | 55 | jsonpath_rw_extension = use_extension("//:extensions.bzl", "jsonpath_rw_extension") 56 | use_repo(jsonpath_rw_extension, "jsonpath-rw-git") 57 | 58 | antlr4_entry_points_extension = use_extension("//:extensions.bzl", "antlr4_entry_points_extension") 59 | use_repo(antlr4_entry_points_extension, "antlr4_entry_points") 60 | -------------------------------------------------------------------------------- /WORKSPACE.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Bazel workspace file for Vanir. 8 | 9 | workspace(name = "vanir") 10 | 11 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") 12 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository") 13 | 14 | # Install Vanir python dependencies through PIP. 15 | http_archive( 16 | name = "rules_python", 17 | sha256 = "8c15896f6686beb5c631a4459a3aa8392daccaab805ea899c9d14215074b60ef", 18 | strip_prefix = "rules_python-0.17.3", 19 | url = "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.17.3.tar.gz", 20 | ) 21 | 22 | load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") 23 | 24 | py_repositories() 25 | 26 | 27 | python_register_toolchains( 28 | name = "python3_9", 29 | python_version = "3.9", 30 | ) 31 | 32 | load("@rules_python//python:pip.bzl", "pip_parse") 33 | load("@python3_9//:defs.bzl", "interpreter") 34 | 35 | pip_parse( 36 | name = "vanir_deps", 37 | python_interpreter_target = interpreter, 38 | requirements_lock = "//:requirements_lock.txt", 39 | ) 40 | 41 | load("@vanir_deps//:requirements.bzl", "install_deps") 42 | 43 | install_deps() 44 | 45 | # Install Fuzzyc and its transitive dependencies. 46 | git_repository( 47 | name = "fuzzyc", 48 | commit = "0f00ba6804c56f5b6d91bc214a91bb05fd17fcda", 49 | remote = "https://third-party-mirror.googlesource.com/fuzzyc", 50 | ) 51 | 52 | # Install antlr4 tools 53 | pip_parse( 54 | name = "antlr4_deps", 55 | python_interpreter_target = interpreter, 56 | requirements_lock = "//:requirements_antlr4_lock.txt", 57 | ) 58 | 59 | load("@antlr4_deps//:requirements.bzl", install_antlr4_deps = "install_deps") 60 | install_antlr4_deps() 61 | 62 | new_local_repository( 63 | name = "antlr4_entry_points", 64 | path = "vanir/language_parsers/java", 65 | build_file_content = """ 66 | alias( 67 | name = "antlr4", 68 | actual = "@antlr4_deps_antlr4_tools//:rules_python_wheel_entry_point_antlr4", 69 | visibility = ["//visibility:public"], 70 | ) 71 | """ 72 | ) 73 | 74 | # Download Antlr4 Java grammar 75 | ANTLR4_JAVA_REV = "c85ec510bd7cfba4649aec1ac2cf66bebd8ce2ed" 76 | http_file( 77 | name = "antlr4_grammar_java_lexer_g4", 78 | url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaLexer.g4" % ANTLR4_JAVA_REV, 79 | downloaded_file_path = "JavaLexer.g4", 80 | sha256 = "9a812eea62aeddc7bd54f8ba9dac4615d0f3f6b98328cf46b4143fdf75ba2c92", 81 | ) 82 | http_file( 83 | name = "antlr4_grammar_java_parser_g4", 84 | url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaParser.g4" % ANTLR4_JAVA_REV, 85 | downloaded_file_path = "JavaParser.g4", 86 | sha256 = "0555bd978b2a7e47ec373ee0671cd13f6ba576ca8c26d127fa0b7467dd6df8ce", 87 | ) 88 | 89 | # Antlr4 CC Runtime Library. 90 | http_archive( 91 | name = "antlr4_runtimes", 92 | build_file_content = """ 93 | package(default_visibility = ["//visibility:public"]) 94 | cc_library( 95 | name = "cpp", 96 | srcs = glob(["runtime/Cpp/runtime/src/**/*.cpp"]), 97 | hdrs = glob(["runtime/Cpp/runtime/src/**/*.h"]), 98 | includes = ["runtime/Cpp/runtime/src"], 99 | ) 100 | """, 101 | sha256 = "50e87636a61daabd424d884c60f804387430920072f585a9fee2b90e2043fdcc", 102 | strip_prefix = "antlr4-4.11.1", 103 | urls = ["https://github.com/antlr/antlr4/archive/v4.11.1.tar.gz"], 104 | ) 105 | 106 | # Google absl 107 | http_archive( 108 | name = "com_google_absl", 109 | sha256 = "3ea49a7d97421b88a8c48a0de16c16048e17725c7ec0f1d3ea2683a2a75adc21", 110 | strip_prefix = "abseil-cpp-20230125.0", 111 | urls = ["https://github.com/abseil/abseil-cpp/archive/refs/tags/20230125.0.tar.gz"], 112 | ) 113 | 114 | # GoogleTest 115 | http_archive( 116 | name = "com_google_googletest", 117 | sha256 = "ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363", 118 | strip_prefix = "googletest-1.13.0", 119 | urls = ["https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz"], 120 | ) 121 | 122 | # Pybind11 Bazel extension 123 | git_repository( 124 | name = "pybind11_bazel", 125 | commit = "5f458fa53870223a0de7eeb60480dd278b442698", 126 | remote = "https://github.com/pybind/pybind11_bazel.git", 127 | ) 128 | 129 | # Pybind11 130 | new_git_repository( 131 | name = "pybind11", 132 | build_file = "@pybind11_bazel//:pybind11.BUILD", 133 | remote = "https://github.com/pybind/pybind11.git", 134 | tag = "v2.10.4", 135 | ) 136 | 137 | load("@pybind11_bazel//:python_configure.bzl", "python_configure") 138 | 139 | python_configure( 140 | name = "local_config_python", 141 | python_interpreter_target = interpreter, 142 | ) 143 | 144 | # Pybind11 Abseil. Don't use master - it can be unstable. 145 | git_repository( 146 | name = "pybind11_abseil", 147 | commit = "13d4f99d5309df3d5afa80fe2ae332d7a2a64c6b", 148 | remote = "https://github.com/pybind/pybind11_abseil.git", 149 | ) 150 | 151 | http_archive( 152 | name = "com_google_protobuf", 153 | urls = ["https://github.com/google/protobuf/archive/v3.10.0.zip"], 154 | strip_prefix = "protobuf-3.10.0", 155 | ) 156 | 157 | # OSV 158 | OSV_REV = "bbb8ab4f0491bf367f8e1406d8ddf9e9dbf5de86" 159 | http_archive( 160 | name = "com_google_osv", 161 | strip_prefix = "osv.dev-%s" % OSV_REV, 162 | build_file_content = """ 163 | load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") 164 | load("@rules_proto//proto:defs.bzl", "proto_library") 165 | 166 | package(default_visibility = ["//visibility:public"]) 167 | 168 | PROTO_FILES = [ 169 | "osv/vulnerability.proto", 170 | ] 171 | 172 | filegroup( 173 | name = "protobuf_files", 174 | srcs = PROTO_FILES, 175 | visibility = ["//visibility:public"], 176 | ) 177 | 178 | proto_library( 179 | name = 'vulnerability_proto', 180 | srcs = PROTO_FILES, 181 | deps = [ 182 | '@com_google_protobuf//:struct_proto', 183 | '@com_google_protobuf//:timestamp_proto', 184 | ], 185 | visibility = ['//visibility:public'], 186 | ) 187 | 188 | py_proto_library( 189 | name = "vulnerability_py_pb2", 190 | visibility = ["//visibility:public"], 191 | srcs = PROTO_FILES, 192 | deps = [ 193 | "@com_google_protobuf//:protobuf_python", 194 | ], 195 | ) 196 | """, 197 | urls = ["https://github.com/google/osv.dev/archive/%s.tar.gz" % OSV_REV], 198 | ) 199 | 200 | load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") 201 | 202 | protobuf_deps() 203 | 204 | 205 | # jsonpath_rw 206 | git_repository( 207 | name = "jsonpath-rw-git", 208 | build_file_content = """ 209 | load("@rules_python//python:defs.bzl", "py_library") 210 | load("@vanir_deps//:requirements.bzl", "requirement") 211 | 212 | py_library( 213 | name = "jsonpath_rw", 214 | visibility = ["//visibility:public"], 215 | srcs = [ 216 | "jsonpath_rw/__init__.py", 217 | "jsonpath_rw/jsonpath.py", 218 | "jsonpath_rw/lexer.py", 219 | "jsonpath_rw/parser.py" 220 | ], 221 | srcs_version = "PY3", 222 | deps = [ 223 | requirement("six"), 224 | requirement("ply"), 225 | requirement("decorator"), 226 | ], 227 | ) 228 | """, 229 | commit = "6f5647bb3ad2395c20f0191fef07a1df51c9fed8", 230 | remote = "https://github.com/kennknowles/python-jsonpath-rw.git", 231 | ) 232 | -------------------------------------------------------------------------------- /docs/images/vanir_detector_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/docs/images/vanir_detector_report.png -------------------------------------------------------------------------------- /docs/images/vanir_macro_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/docs/images/vanir_macro_arch.png -------------------------------------------------------------------------------- /docs/images/vanir_micro_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/docs/images/vanir_micro_arch.png -------------------------------------------------------------------------------- /extensions.bzl: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Extensions for Vanir.""" 8 | 9 | load( 10 | "//:repositories.bzl", 11 | "antlr4_entry_points_repo", 12 | "antlr4_grammar_java_lexer_g4_repo", 13 | "antlr4_grammar_java_parser_g4_repo", 14 | "antlr4_runtimes_repo", 15 | "com_google_osv_repo", 16 | "fuzzyc_repo", 17 | "jsonpath_rw_repo", 18 | ) 19 | 20 | def _antlr4_grammar_java_lexer_g4_impl(_ctx): 21 | antlr4_grammar_java_lexer_g4_repo() 22 | 23 | antlr4_grammar_java_lexer_g4_extension = module_extension( 24 | implementation = _antlr4_grammar_java_lexer_g4_impl, 25 | ) 26 | 27 | def _antlr4_grammar_java_parser_g4_impl(_ctx): 28 | antlr4_grammar_java_parser_g4_repo() 29 | 30 | antlr4_grammar_java_parser_g4_extension = module_extension( 31 | implementation = _antlr4_grammar_java_parser_g4_impl, 32 | ) 33 | 34 | def _fuzzyc_impl(_ctx): 35 | fuzzyc_repo() 36 | 37 | fuzzyc_extension = module_extension(implementation = _fuzzyc_impl) 38 | 39 | def _antlr4_runtimes_impl(_ctx): 40 | antlr4_runtimes_repo() 41 | 42 | antlr4_runtimes_extension = module_extension(implementation = _antlr4_runtimes_impl) 43 | 44 | def _com_google_osv_impl(_ctx): 45 | com_google_osv_repo() 46 | 47 | com_google_osv_extension = module_extension(implementation = _com_google_osv_impl) 48 | 49 | def _jsonpath_rw_impl(_ctx): 50 | jsonpath_rw_repo() 51 | 52 | jsonpath_rw_extension = module_extension(implementation = _jsonpath_rw_impl) 53 | 54 | def _antlr4_entry_points_impl(_ctx): 55 | antlr4_entry_points_repo() 56 | 57 | antlr4_entry_points_extension = module_extension(implementation = _antlr4_entry_points_impl) 58 | -------------------------------------------------------------------------------- /repositories.bzl: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Repositories for Vanir.""" 8 | 9 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") 10 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") 11 | load("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository") 12 | 13 | ANTLR4_JAVA_REV = "c85ec510bd7cfba4649aec1ac2cf66bebd8ce2ed" 14 | 15 | def antlr4_grammar_java_parser_g4_repo(): 16 | # Download Antlr4 Java grammar - Parser 17 | http_file( 18 | name = "antlr4_grammar_java_parser_g4", 19 | url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaParser.g4" % ANTLR4_JAVA_REV, 20 | downloaded_file_path = "JavaParser.g4", 21 | sha256 = "0555bd978b2a7e47ec373ee0671cd13f6ba576ca8c26d127fa0b7467dd6df8ce", 22 | ) 23 | 24 | def antlr4_grammar_java_lexer_g4_repo(): 25 | # Download Antlr4 Java grammar - Lexer 26 | http_file( 27 | name = "antlr4_grammar_java_lexer_g4", 28 | url = "https://github.com/antlr/grammars-v4/raw/%s/java/java/JavaLexer.g4" % ANTLR4_JAVA_REV, 29 | downloaded_file_path = "JavaLexer.g4", 30 | sha256 = "9a812eea62aeddc7bd54f8ba9dac4615d0f3f6b98328cf46b4143fdf75ba2c92", 31 | ) 32 | 33 | def fuzzyc_repo(): 34 | git_repository( 35 | name = "fuzzyc", 36 | commit = "f227d19e433a53e264ec6151c66dd85ec53b4c71", 37 | remote = "https://third-party-mirror.googlesource.com/fuzzyc", 38 | ) 39 | 40 | def antlr4_runtimes_repo(): 41 | http_archive( 42 | name = "antlr4_runtimes", 43 | build_file_content = """ 44 | package(default_visibility = ["//visibility:public"]) 45 | cc_library( 46 | name = "cpp", 47 | srcs = glob(["runtime/Cpp/runtime/src/**/*.cpp"]), 48 | hdrs = glob(["runtime/Cpp/runtime/src/**/*.h"]), 49 | includes = ["runtime/Cpp/runtime/src"], 50 | ) 51 | """, 52 | sha256 = "50e87636a61daabd424d884c60f804387430920072f585a9fee2b90e2043fdcc", 53 | strip_prefix = "antlr4-4.11.1", 54 | urls = ["https://github.com/antlr/antlr4/archive/v4.11.1.tar.gz"], 55 | ) 56 | 57 | def com_google_osv_repo(): 58 | # OSV 59 | OSV_REV = "bbb8ab4f0491bf367f8e1406d8ddf9e9dbf5de86" 60 | http_archive( 61 | name = "com_google_osv", 62 | strip_prefix = "osv.dev-%s" % OSV_REV, 63 | build_file_content = """ 64 | load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library") 65 | load("@rules_proto//proto:defs.bzl", "proto_library") 66 | 67 | package(default_visibility = ["//visibility:public"]) 68 | 69 | PROTO_FILES = [ 70 | "osv/vulnerability.proto", 71 | ] 72 | 73 | filegroup( 74 | name = "protobuf_files", 75 | srcs = PROTO_FILES, 76 | visibility = ["//visibility:public"], 77 | ) 78 | 79 | proto_library( 80 | name = 'vulnerability_proto', 81 | srcs = PROTO_FILES, 82 | deps = [ 83 | '@com_google_protobuf//:struct_proto', 84 | '@com_google_protobuf//:timestamp_proto', 85 | ], 86 | visibility = ['//visibility:public'], 87 | ) 88 | 89 | py_proto_library( 90 | name = "vulnerability_py_pb2", 91 | deps = [ 92 | "vulnerability_proto", 93 | ], 94 | ) 95 | """, 96 | urls = ["https://github.com/google/osv.dev/archive/%s.tar.gz" % OSV_REV], 97 | ) 98 | 99 | def jsonpath_rw_repo(): 100 | git_repository( 101 | name = "jsonpath-rw-git", 102 | build_file_content = """ 103 | load("@rules_python//python:defs.bzl", "py_library") 104 | load("@vanir_deps//:requirements.bzl", "requirement") 105 | 106 | py_library( 107 | name = "jsonpath_rw", 108 | visibility = ["//visibility:public"], 109 | srcs = [ 110 | "jsonpath_rw/__init__.py", 111 | "jsonpath_rw/jsonpath.py", 112 | "jsonpath_rw/lexer.py", 113 | "jsonpath_rw/parser.py" 114 | ], 115 | srcs_version = "PY3", 116 | deps = [ 117 | requirement("six"), 118 | requirement("ply"), 119 | requirement("decorator"), 120 | ], 121 | ) 122 | """, 123 | commit = "6f5647bb3ad2395c20f0191fef07a1df51c9fed8", 124 | remote = "https://github.com/kennknowles/python-jsonpath-rw.git", 125 | ) 126 | 127 | def antlr4_entry_points_repo(): 128 | new_local_repository( 129 | name = "antlr4_entry_points", 130 | path = "vanir/language_parsers/java", 131 | build_file_content = """ 132 | load("@rules_python//python/entry_points:py_console_script_binary.bzl", "py_console_script_binary") 133 | 134 | py_console_script_binary( 135 | name = "antlr4", 136 | pkg = "@antlr4_deps//antlr4_tools", 137 | script = "antlr4", 138 | visibility = ["//visibility:public"], 139 | ) 140 | """, 141 | ) 142 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Vanir direct dependencies 2 | requests 3 | absl-py 4 | mmh3 5 | unidiff 6 | jinja2 7 | typing_extensions>=4,<5 8 | python-dateutil 9 | 10 | # jsonpath_rw dependencies 11 | six 12 | ply 13 | decorator 14 | -------------------------------------------------------------------------------- /requirements_antlr4.txt: -------------------------------------------------------------------------------- 1 | antlr4-tools==0.2 2 | -------------------------------------------------------------------------------- /requirements_antlr4_lock.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.10 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements_antlr4_lock.txt requirements_antlr4.txt 6 | # 7 | antlr4-tools==0.2 8 | # via -r requirements_fuzzyc.txt 9 | install-jdk==0.3.0 10 | # via antlr4-tools 11 | -------------------------------------------------------------------------------- /requirements_lock.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.9 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements_lock.txt requirements.txt 6 | # 7 | absl-py==2.1.0 8 | # via -r requirements.txt 9 | certifi==2024.12.14 10 | # via requests 11 | charset-normalizer==3.4.1 12 | # via requests 13 | decorator==5.1.1 14 | # via -r requirements.txt 15 | idna==3.10 16 | # via requests 17 | jinja2==3.1.5 18 | # via -r requirements.txt 19 | markupsafe==3.0.2 20 | # via jinja2 21 | mmh3==5.0.1 22 | # via -r requirements.txt 23 | ply==3.11 24 | # via -r requirements.txt 25 | python-dateutil==2.9.0.post0 26 | # via -r requirements.txt 27 | requests==2.32.3 28 | # via -r requirements.txt 29 | six==1.17.0 30 | # via 31 | # -r requirements.txt 32 | # python-dateutil 33 | typing-extensions==4.12.2 34 | # via -r requirements.txt 35 | unidiff==0.7.5 36 | # via -r requirements.txt 37 | urllib3==2.3.0 38 | # via requests 39 | -------------------------------------------------------------------------------- /vanir/cache/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Package for Vanir cached data. 8 | package(default_visibility = ["//visibility:public"]) 9 | 10 | filegroup( 11 | name = "ecosystem_file_lists", 12 | srcs = [ 13 | "ecosystem_file_lists.json", 14 | ], 15 | ) 16 | -------------------------------------------------------------------------------- /vanir/code_extractors/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Bazel build rules for Vanir code extractors. 8 | load("@rules_python//python:defs.bzl", "py_library", "py_test") 9 | load("@vanir_deps//:requirements.bzl", "requirement") 10 | 11 | package(default_visibility = [ 12 | "//visibility:public", 13 | ]) 14 | 15 | py_library( 16 | name = "code_extractor_base", 17 | srcs = ["code_extractor_base.py"], 18 | deps = [ 19 | "//:vulnerability", 20 | requirement("requests"), 21 | requirement("unidiff"), 22 | ], 23 | ) 24 | 25 | py_library( 26 | name = "code_extractor", 27 | srcs = ["code_extractor.py"], 28 | deps = [ 29 | ":code_extractor_android", 30 | ":code_extractor_base", 31 | "//:vulnerability", 32 | requirement("requests"), 33 | ], 34 | ) 35 | 36 | py_library( 37 | name = "code_extractor_android", 38 | srcs = ["code_extractor_android.py"], 39 | deps = [ 40 | ":code_extractor_base", 41 | "//:vulnerability", 42 | requirement("requests"), 43 | requirement("unidiff"), 44 | ], 45 | ) 46 | 47 | py_test( 48 | name = "code_extractor_test", 49 | srcs = ["code_extractor_test.py"], 50 | data = [ 51 | "//vanir/testdata:test_patch_set", 52 | ], 53 | deps = [ 54 | ":code_extractor", 55 | ":code_extractor_base", 56 | "//:vulnerability", 57 | requirement("absl-py"), 58 | ], 59 | ) 60 | 61 | py_test( 62 | name = "code_extractor_android_test", 63 | srcs = ["code_extractor_android_test.py"], 64 | data = [ 65 | "//vanir/testdata:test_patch_set", 66 | ], 67 | deps = [ 68 | ":code_extractor_android", 69 | ":code_extractor_base", 70 | "//:vulnerability", 71 | requirement("absl-py"), 72 | requirement("requests"), 73 | ], 74 | ) 75 | -------------------------------------------------------------------------------- /vanir/code_extractors/code_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Extracts code snippets and metadata needed for CVE signature generation. 8 | 9 | This module contains utility classes and functions to extract code snippets and 10 | metadata of CVEs such as patch files (i.e., file diff) and unpatched files. 11 | """ 12 | 13 | from typing import Collection, Optional, Sequence, Tuple, Type, TypeVar 14 | 15 | import requests 16 | from vanir import vulnerability 17 | # Simply importing the extractors will register them as subclasses of the 18 | # abstract extractor class and therefore available for use. 19 | # pylint: disable=unused-import 20 | from vanir.code_extractors import code_extractor_android 21 | # pylint: enable=unused-import 22 | from vanir.code_extractors import code_extractor_base 23 | 24 | _P = TypeVar('_P', bound=code_extractor_base.AbstractCodeExtractor) 25 | 26 | OSV_ID = 'id' 27 | REF_URL = 'url' 28 | REF_TYPE = 'type' 29 | REF_TYPE_FIX = 'FIX' 30 | VULN_AFFECTED = 'affected' 31 | AFFECTED_PACKAGE = 'package' 32 | PACKAGE_NAME = 'name' 33 | PACKAGE_ECOSYSTEM = 'ecosystem' 34 | AFFECTED_ECOSYSTEM_SPECIFIC = 'ecosystem_specific' 35 | 36 | 37 | class DuplicatedCodeExtractorError(Exception): 38 | pass 39 | 40 | 41 | def _get_extractor_class(ecosystem: str) -> Optional[Type[_P]]: 42 | """Returns the extractor class for the given ecosystem, or None.""" 43 | extractors = code_extractor_base.AbstractCodeExtractor.__subclasses__() 44 | found_extractors = [] 45 | for extractor_class in extractors: 46 | if extractor_class.is_supported_ecosystem(ecosystem): 47 | found_extractors.append(extractor_class) 48 | 49 | if not found_extractors: 50 | return None 51 | if len(found_extractors) > 1: 52 | raise DuplicatedCodeExtractorError( 53 | 'Multiple code extractors supported ecosystem "%s": %s' % 54 | (ecosystem, found_extractors)) 55 | return found_extractors[0] 56 | 57 | 58 | def extract_for_affected_entry( 59 | affected: vulnerability.AffectedEntry, 60 | session: Optional[requests.sessions.Session] = None, 61 | ) -> Tuple[Sequence[code_extractor_base.Commit], 62 | Sequence[code_extractor_base.FailedCommitUrl]]: 63 | """Extracts fix commit data for the given Vulnerability. 64 | 65 | For each commit, this class extracts the following data: 66 | 1. commit message 67 | 2. per-file patch (diff) 68 | 3. unmodified & modified versions of the files changed by the patch 69 | 70 | Args: 71 | affected: the OSV affected entry to extract fixes for. 72 | session: requests session to use for retrieving files and patches. If 73 | None, a new session will be used. 74 | 75 | Returns: 76 | A tuple where the first item is the list of |Commit| objects pertaining 77 | to the given |vuln|, and the second item is the list of URLs found but 78 | failed to be converted to |Commit| objects. 79 | """ 80 | extractor_class = _get_extractor_class(affected.ecosystem) 81 | if not extractor_class: 82 | raise NotImplementedError(f'Unsupported ecosystem: {affected.ecosystem}') 83 | return extractor_class(session).extract_commits_for_affected_entry(affected) 84 | 85 | 86 | def extract_files_at_tip_of_unaffected_versions( 87 | ecosystem: str, 88 | package_name: str, 89 | affected_versions: Sequence[str], 90 | files: Collection[str], 91 | session: Optional[requests.sessions.Session] = None, 92 | ) -> Tuple[ 93 | Sequence[code_extractor_base.Commit], 94 | Sequence[code_extractor_base.FailedCommitUrl], 95 | ]: 96 | """Extracts files tip of unmentioned versions of the given package. 97 | 98 | This method checks the list of given versions and determine the active tips of 99 | branches that are not mentioned in the list and extract the listed files at 100 | the those tips. 101 | 102 | Args: 103 | ecosystem: the ecosystem of the package. 104 | package_name: the name of the package. 105 | affected_versions: the list of affected versions of the package. Tip of 106 | versions not in this list will be extracted. 107 | files: the list of files to include. 108 | session: requests session to use for retrieving files and patches. If 109 | None, a new session will be used. 110 | 111 | Returns: 112 | A tuple where the first item is the list of |Commit| objects pertaining 113 | to the tip of a version not mentioned in |versions|, and the second item 114 | is the list of tip URLs failed to convert to |Commit| objects. 115 | """ 116 | extractor_class = _get_extractor_class(ecosystem) 117 | if not extractor_class: 118 | raise NotImplementedError(f'Unsupported ecosystem: {ecosystem}') 119 | return extractor_class(session).extract_files_at_tip_of_unaffected_versions( 120 | package_name, affected_versions, files, 121 | ) 122 | -------------------------------------------------------------------------------- /vanir/code_extractors/code_extractor_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for code_extractor.""" 8 | 9 | from vanir import vulnerability 10 | from vanir.code_extractors import code_extractor 11 | from vanir.code_extractors import code_extractor_base 12 | 13 | from absl.testing import absltest 14 | from absl.testing import parameterized 15 | 16 | _TEST_COMMIT = 'abcdef0000000000000000000000000000000000' 17 | _TEST_PARENT_COMMIT = 'fedcba1111111111111111111111111111111111' 18 | 19 | _ANDROID_PATCH_URL_BASE = 'https://android.googlesource.com/kernel/common/+/' 20 | _TEST_ANDROID_COMMIT_URL = _ANDROID_PATCH_URL_BASE + _TEST_COMMIT 21 | 22 | 23 | class CodeExtractorTest(parameterized.TestCase): 24 | 25 | @absltest.mock.patch.object( 26 | code_extractor_base, 'Commit', autospec=True, instance=True 27 | ) 28 | @absltest.mock.patch.object(code_extractor_base, 'AbstractCodeExtractor') 29 | def test_extract(self, mock_extractor_class, mock_commit): 30 | mock_extractor_class.__subclasses__ = lambda self: [mock_extractor_class] 31 | mock_extractor_class.is_supported_ecosystem.side_effect = ( 32 | lambda s: True if s == 'test_ecosystem' else False 33 | ) 34 | 35 | mock_extractor_class( 36 | None 37 | ).extract_commits_for_affected_entry.return_value = ([mock_commit], []) 38 | test_affected = vulnerability.AffectedEntry( 39 | {'package': {'ecosystem': 'test_ecosystem', 'name': 'pkg'}} 40 | ) 41 | commits, failures = code_extractor.extract_for_affected_entry(test_affected) 42 | self.assertEmpty(failures) 43 | self.assertListEqual(commits, [mock_commit]) 44 | 45 | def test_extract_with_no_package(self): 46 | with self.assertRaisesRegex(ValueError, 'Missing package info.*'): 47 | code_extractor.extract_for_affected_entry( 48 | vulnerability.AffectedEntry({}) 49 | ) 50 | 51 | @absltest.mock.patch.object(code_extractor_base, 'AbstractCodeExtractor') 52 | def test_extract_with_no_patch_found(self, mock_extractor_class): 53 | mock_extractor_class.__subclasses__ = lambda self: [mock_extractor_class] 54 | mock_extractor_class.is_supported_ecosystem.side_effect = ( 55 | lambda s: True if s == 'test_ecosystem' else False 56 | ) 57 | mock_extractor_class( 58 | None 59 | ).extract_commits_for_affected_entry.return_value = ([], []) 60 | test_affected = vulnerability.AffectedEntry( 61 | {'package': {'ecosystem': 'test_ecosystem', 'name': 'pkg'}} 62 | ) 63 | commits, failures = code_extractor.extract_for_affected_entry(test_affected) 64 | self.assertEmpty(commits) 65 | self.assertEmpty(failures) 66 | 67 | def test_extract_with_unsupported_ecosystem(self): 68 | test_affected = vulnerability.AffectedEntry( 69 | {'package': {'ecosystem': 'unknown_ecosystem', 'name': 'pkg'}} 70 | ) 71 | with self.assertRaises(NotImplementedError): 72 | _, _ = code_extractor.extract_for_affected_entry(test_affected) 73 | 74 | @absltest.mock.patch.object( 75 | code_extractor_base, 'Commit', autospec=True, instance=True 76 | ) 77 | @absltest.mock.patch.object(code_extractor_base, 'AbstractCodeExtractor') 78 | def test_extract_files_at_tip_of_unaffected_versions( 79 | self, mock_extractor_class, mock_commit, 80 | ): 81 | mock_extractor_class.__subclasses__ = lambda self: [mock_extractor_class] 82 | mock_extractor_class.is_supported_ecosystem.side_effect = ( 83 | lambda s: True if s == 'test_ecosystem' else False 84 | ) 85 | mock_extractor_class( 86 | None 87 | ).extract_files_at_tip_of_unaffected_versions.return_value = ( 88 | [mock_commit], [] 89 | ) 90 | 91 | commits, failures = ( 92 | code_extractor.extract_files_at_tip_of_unaffected_versions( 93 | 'test_ecosystem', 'test_package', ['1.0.0'], ['file1'], None, 94 | ) 95 | ) 96 | self.assertEmpty(failures) 97 | self.assertListEqual(commits, [mock_commit]) 98 | 99 | def test_extract_files_at_tip_of_unaffected_versions_unsupported_ecosystem( 100 | self 101 | ): 102 | with self.assertRaises(NotImplementedError): 103 | code_extractor.extract_files_at_tip_of_unaffected_versions( 104 | 'test_ecosystem', 'test_package', ['1.0.0'], ['file1', 'file2'], None, 105 | ) 106 | 107 | 108 | if __name__ == '__main__': 109 | absltest.main() 110 | -------------------------------------------------------------------------------- /vanir/file_list_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Module for managing known files for each ecosystem/package. 8 | 9 | This module manages lists of known files for each ecysostem & package needed 10 | for calculating truncated path level. 11 | """ 12 | 13 | import collections 14 | import enum 15 | import json 16 | from typing import Mapping, Sequence 17 | 18 | from vanir import parser 19 | 20 | 21 | 22 | _GITFS_TIMEOUT_SEC = 60 23 | _GITFS_ADDR = 'blade:git' 24 | 25 | ANDROID_ECOSYSTEM = 'Android' 26 | KERNEL_PACKAGE = ':linux_kernel:' 27 | _MAINLINE_KERNEL_PROJECT = 'android:kernel/common:refs/heads/android-mainline:' 28 | 29 | _KNOWN_SOURCES = [(ANDROID_ECOSYSTEM, KERNEL_PACKAGE, _MAINLINE_KERNEL_PROJECT)] 30 | 31 | ECOSYSTEM_FILE_LISTS_CACHE = ( 32 | 'vanir/cache/ecosystem_file_lists.json' 33 | ) 34 | 35 | 36 | @enum.unique 37 | class Source(enum.Enum): 38 | CACHE = 'cache' 39 | 40 | 41 | def get_file_lists( 42 | source: Source = Source.CACHE, 43 | ) -> Mapping[str, Mapping[str, Sequence[str]]]: 44 | """Returns reference file lists for signature generation. 45 | 46 | Args: 47 | source: source to retrieve file lists. 48 | 49 | Returns: 50 | Reference file list map where the first key is ecosystem, the second key is 51 | package name and the value is list of files. 52 | """ 53 | if source == Source.CACHE: 54 | resource = open(ECOSYSTEM_FILE_LISTS_CACHE, mode='rb').read() 55 | file_lists = json.loads(resource) 56 | return file_lists 57 | else: 58 | raise ValueError('Unknown file list source: %s' % source) 59 | -------------------------------------------------------------------------------- /vanir/file_list_manager_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Test for file list manager module.""" 8 | 9 | import json 10 | 11 | from vanir import file_list_manager 12 | 13 | from absl.testing import absltest 14 | 15 | _TEST_SUPPORTED_FILE_LIST = ['foo.c', 'bar.c'] 16 | _TEST_UNSUPPORTED_FILE_LIST = ['unsupported_filetype.asp'] 17 | _TEST_FILE_LIST = _TEST_SUPPORTED_FILE_LIST + _TEST_UNSUPPORTED_FILE_LIST 18 | _TEST_SHA = 'abcdef1234567890' 19 | _TEST_FILE_LISTS_JSON_STR = json.dumps( 20 | {'Android': {':linux_kernel:': _TEST_SUPPORTED_FILE_LIST}} 21 | ) 22 | 23 | 24 | class FileListManagerTest(absltest.TestCase): 25 | 26 | def test_get_file_lists_with_cache(self): 27 | file_lists = file_list_manager.get_file_lists( 28 | file_list_manager.Source.CACHE 29 | ) 30 | kernel_file_list = file_lists.get('Android', {}).get(':linux_kernel:') 31 | self.assertGreater(len(kernel_file_list), 50000) 32 | 33 | def test_get_file_lists_fail_with_unknown_source(self): 34 | with self.assertRaises(ValueError): 35 | file_list_manager.get_file_lists('unknown_source') 36 | 37 | 38 | if __name__ == '__main__': 39 | absltest.main() 40 | -------------------------------------------------------------------------------- /vanir/hasher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Hasher to generate signature hashes for the given code snippets.""" 8 | 9 | import functools 10 | from typing import Mapping, Optional, Sequence, Tuple 11 | 12 | from absl import logging 13 | import mmh3 14 | 15 | # For experimental purpose, the n-gram size can be adjusted, but note that any 16 | # change on the line n-gram size requires regeneration of entire signatures. 17 | # Generally, decreasing n-gram size may end up with increase of findings, 18 | # including both true positives and false positives. 19 | _LINE_SIGNATURE_NGRAM_SIZE = 4 20 | 21 | _HASH = functools.partial(mmh3.hash128, seed=0, x64arch=True, signed=False) 22 | 23 | 24 | class _LineNgram: 25 | """Class for maintaining an n-gram where the units are code lines.""" 26 | 27 | def __init__(self, 28 | normalized_code: Mapping[int, str], 29 | line_numbers: Sequence[int], 30 | is_first: Optional[bool] = False, 31 | is_last: Optional[bool] = False): 32 | """Initializes the line n-gram. 33 | 34 | The first and last ngram of a file must be explicitly marked through 35 | |is_first| and |is_last| in order to cover patch hunks adding lines at the 36 | top or the bottom of the file. Internally, the first ngram will be regarded 37 | as ranging from the line number negative infinity, and the last ngram 38 | ranging to the line number infinity. 39 | 40 | Args: 41 | normalized_code: dictionary of normalized code lines including (but not 42 | limited to) the lines for the n-gram. Each key is a line number, and the 43 | value is normalized line in string. 44 | line_numbers: the list of line numbers comprising the n-gram. Each line 45 | number must be a valid line number existing in |normalized_code|. 46 | is_first: True if the n-gram is the first n-gram of the target file. 47 | is_last: True if the n-gram is the last n-gram of the target file. 48 | 49 | Raises: 50 | ValueError: raises value error if any line number in |line_numbers| is not 51 | a valid line number in |normalized_code|. 52 | """ 53 | 54 | self._normalized_code = normalized_code 55 | self._line_numbers = sorted(line_numbers) 56 | self._is_first = is_first 57 | self._is_last = is_last 58 | 59 | def is_overlapping(self, line_range: Tuple[int, int]) -> bool: 60 | """Returns true if the line ngram range overlaps with given |line_range|.""" 61 | if self._is_first and self._is_last: 62 | # This n-gram is the first and the last n-gram, covering the entire file. 63 | return True 64 | 65 | range_start, range_end = line_range 66 | if range_start > range_end: 67 | raise ValueError(f'line_range: start ({range_start}) cannot be greater ' 68 | f'than end ({range_end})') 69 | 70 | # For given ranges r1 and r2, if r1.start <= r2.end && r1.end >= r2.start, 71 | # r1 and r2 overlaps. 72 | if self._is_first: 73 | return self._line_numbers[-1] >= range_start 74 | if self._is_last: 75 | return self._line_numbers[0] <= range_end 76 | return (self._line_numbers[0] <= range_end and 77 | self._line_numbers[-1] >= range_start) 78 | 79 | def get_ngram_string(self) -> str: 80 | """Returns the actual string of the n-gram.""" 81 | try: 82 | return ' '.join([ 83 | self._normalized_code[line_number] 84 | for line_number in self._line_numbers 85 | ]) 86 | except KeyError as e: 87 | raise KeyError( 88 | f'Invalid line numbers for ngram: {self._line_numbers}. This is a ' 89 | 'bug and should never have happened. A _LineNgram object should only ' 90 | 'be initialized with line_numbers being a subset of normalized_code.' 91 | ) from e 92 | 93 | def get_line_numbers(self) -> Sequence[int]: 94 | """Returns the line numbers comprising the n-gram.""" 95 | return self._line_numbers 96 | 97 | 98 | def hash_function_chunk(normalized_code: str) -> int: 99 | """Computes hash for the normalized code of a function chunk. 100 | 101 | A function chunk signature is a Murmur3 128-bit x64 hash of the normalized 102 | function code. 103 | 104 | Args: 105 | normalized_code: a normalized function code in string. 106 | 107 | Returns: 108 | The 128-bit hash in integer. 109 | """ 110 | return _HASH(normalized_code) 111 | 112 | 113 | def hash_line_chunk( 114 | normalized_code: Mapping[int, str], 115 | affected_line_ranges: Sequence[Tuple[int, int]] 116 | ) -> Tuple[Sequence[int], Sequence[int]]: 117 | """Computes hash for the normalized code of a line chunk. 118 | 119 | A line chunk signature is a set of n-gram line hashes. Each n-gram consists 120 | of affected lines and their context lines (up to n - 1 lines before and 121 | after the affected lines). Note that any empty lines / comment lines are not 122 | regarded as valid lines so the actual context lines can be located further 123 | than n - 1 lines of an affected line. 124 | 125 | Args: 126 | normalized_code: a normalized code of a line chunk. 127 | affected_line_ranges: list of the ranges indicating the lines changed by the 128 | patch in the chunk's target file. The line numbers are based on the 129 | unpatched file. Inclusive. 130 | 131 | Returns: 132 | A tuple of the hash list and used line list. The hash list is a list of 133 | 128-bit line n-gram hashes. The used line list is a list of integer line 134 | numbers used as elements of the n-grams. 135 | """ 136 | valid_line_numbers = sorted(normalized_code.keys()) 137 | if not valid_line_numbers: 138 | logging.debug('No valid line found from the normalized code. Returning ' 139 | 'empty lists.') 140 | return [], [] 141 | 142 | if not affected_line_ranges: 143 | # If no affected line range is specified, regard all lines as affected. 144 | affected_line_ranges = [ 145 | (valid_line_numbers[0], valid_line_numbers[-1]) 146 | ] 147 | 148 | # Make a list of all valid line ngrams. 149 | ngrams = [] 150 | if len(valid_line_numbers) < _LINE_SIGNATURE_NGRAM_SIZE: 151 | # If the number of valid lines in a file is shorter than n-gram size, 152 | # just use all valid lines. 153 | ngrams.append( 154 | _LineNgram( 155 | normalized_code, valid_line_numbers, is_first=True, is_last=True)) 156 | else: 157 | ngram_first_line_indices = range( 158 | len(valid_line_numbers) - _LINE_SIGNATURE_NGRAM_SIZE + 1) 159 | for line_index in ngram_first_line_indices: 160 | ngram_line_numbers = valid_line_numbers[ 161 | line_index:_LINE_SIGNATURE_NGRAM_SIZE + line_index] 162 | is_first = line_index == ngram_first_line_indices[0] 163 | is_last = line_index == ngram_first_line_indices[-1] 164 | ngrams.append( 165 | _LineNgram(normalized_code, ngram_line_numbers, is_first, is_last)) 166 | 167 | # For only "valid & affected" ngrams, compute ngram hashes. 168 | line_hashes = [] 169 | used_lines = set() 170 | for affected_range in affected_line_ranges: 171 | for ngram in ngrams.copy(): 172 | if ngram.is_overlapping(affected_range): 173 | ngram_hash = _HASH(ngram.get_ngram_string()) 174 | line_hashes.append(ngram_hash) 175 | used_lines.update(ngram.get_line_numbers()) 176 | ngrams.remove(ngram) 177 | 178 | return line_hashes, sorted(used_lines) 179 | -------------------------------------------------------------------------------- /vanir/hasher_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for hasher.""" 8 | 9 | import functools 10 | 11 | from absl import logging 12 | import mmh3 13 | from vanir import hasher 14 | 15 | from absl.testing import absltest 16 | from absl.testing import parameterized 17 | 18 | 19 | _TEST_LINE_CHUNK_NORMALIZED_CODE = { 20 | 5: 'void __init testdev_init ( void )', 21 | 6: '{', 22 | 7: 'memset ( cdev , 0 , sizeof * cdev ) ;', 23 | 8: 'init_list_head ( & cdev -> list ) ;', 24 | 9: 'kobject_init ( & cdev -> kobj , & ktype_cdev_default ) ;', 25 | 10: 'cdev -> ops = fops ;', 26 | 11: '}', 27 | 19: 'cdev_map = kobj_map_init ( base_probe , & testdevs_lock ) ;', 28 | 20: '}', 29 | 24: 'export_symbol ( register_testdev_region ) ;', 30 | 34: 'export_symbol ( __register_testdev ) ;', 31 | 35: 'export_symbol ( __unregister_testdev ) ;' 32 | } 33 | 34 | 35 | class HasherTest(parameterized.TestCase): 36 | 37 | def setUp(self): 38 | self._hash = functools.partial( 39 | mmh3.hash128, seed=0, x64arch=True, signed=False) 40 | super().setUp() 41 | 42 | def test_function_chunk_hash(self): 43 | test_normalized_code = ( 44 | 'DTYPE FUNCNAME ( const unsigned DTYPE PARAM ) { const DTYPE ' 45 | '* VAR = ( DTYPE * ) globalvar -> data ' 46 | '; FUNCCALL ( PARAM , VAR ) ; 0xe8 ( ) ; return 0 ; }') 47 | 48 | function_hash = hasher.hash_function_chunk(test_normalized_code) 49 | 50 | expected_function_hash = self._hash(test_normalized_code) 51 | self.assertEqual(function_hash, expected_function_hash) 52 | 53 | @parameterized.named_parameters( 54 | dict( 55 | testcase_name='with_no_affected_ranges', 56 | normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE, 57 | affected_ranges=[], 58 | expected_used_lines=[5, 6, 7, 8, 9, 10, 11, 19, 20, 24, 34, 35]), 59 | dict( 60 | testcase_name='with_affected_ranges_in_middle', 61 | normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE, 62 | affected_ranges=[(9, 10)], 63 | expected_used_lines=[6, 7, 8, 9, 10, 11, 19, 20]), 64 | dict( 65 | testcase_name='with_affected_ranges_at_file_start', 66 | normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE, 67 | affected_ranges=[(0, 0)], 68 | expected_used_lines=[5, 6, 7, 8]), 69 | dict( 70 | testcase_name='with_affected_ranges_at_file_end', 71 | normalized_code=_TEST_LINE_CHUNK_NORMALIZED_CODE, 72 | affected_ranges=[(50, 50)], 73 | expected_used_lines=[20, 24, 34, 35]), 74 | dict( 75 | testcase_name='with_short_normalized_code', 76 | normalized_code={ 77 | 3: '#define AUDIT_NAMES 5', 78 | 4: '#define auditsc_get_stamp ( c , t , s ) 0' 79 | }, 80 | affected_ranges=[], 81 | expected_used_lines=[3, 4])) 82 | def test_line_chunk_hash(self, normalized_code, affected_ranges, 83 | expected_used_lines): 84 | """Tests various successful cases of line chunk hash generation. 85 | 86 | Args: 87 | normalized_code: the normalized code to test. 88 | affected_ranges: the affected ranges to test with for the normalized code. 89 | expected_used_lines: expected lines to be used for signature hash 90 | generation. This value varies depending on |affected_ranges| but this 91 | test explicitly requires this arg because we want to test the 92 | corresponding logic in the main code rather than to run the identical 93 | logic again in the test. 94 | """ 95 | expected_hashes = [] 96 | expected_line_number_ngrams = [] 97 | index = 0 98 | while index + 3 < len(expected_used_lines): 99 | expected_line_number_ngrams.append(expected_used_lines[index:index + 4]) 100 | index += 1 101 | if not expected_line_number_ngrams: 102 | expected_line_number_ngrams.append(normalized_code.keys()) 103 | 104 | for line_numbers in expected_line_number_ngrams: 105 | ngram = ' '.join( 106 | [normalized_code[line_number] for line_number in line_numbers]) 107 | expected_hashes.append(self._hash(ngram)) 108 | 109 | line_hashes, used_lines = hasher.hash_line_chunk(normalized_code, 110 | affected_ranges) 111 | 112 | self.assertCountEqual(expected_used_lines, used_lines) 113 | self.assertEqual(expected_hashes, line_hashes) 114 | 115 | def test_line_chunk_hash_with_empty_normalized_code_is_warned(self): 116 | test_normalized_code = {} 117 | with self.assertLogs(level=logging.WARNING) as logs: 118 | line_hashes, used_lines = hasher.hash_line_chunk(test_normalized_code, []) 119 | self.assertIn( 120 | 'No valid line found from the normalized code. Returning empty lists.', 121 | logs.output[0]) 122 | self.assertEmpty(line_hashes) 123 | self.assertEmpty(used_lines) 124 | 125 | def test_line_ngram_overlap_check_fails_with_reversed_line_range(self): 126 | # Case unable to be triggered by public class; directly test private class. 127 | test_ngram_line_numbers = [7, 8, 9, 10] 128 | test_affected_line_range = (10, 7) 129 | ngram = hasher._LineNgram(_TEST_LINE_CHUNK_NORMALIZED_CODE, 130 | test_ngram_line_numbers) 131 | expected_error_msg = ( 132 | r'line_range: start \(10\) cannot be greater than end \(7\)') 133 | with self.assertRaisesRegex(ValueError, expected_error_msg): 134 | ngram.is_overlapping(test_affected_line_range) 135 | 136 | if __name__ == '__main__': 137 | absltest.main() 138 | -------------------------------------------------------------------------------- /vanir/integration_tests/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Integration tests for Vanir 8 | 9 | load("@rules_python//python:defs.bzl", "py_test") 10 | load("@vanir_deps//:requirements.bzl", "requirement") 11 | 12 | package(default_visibility = ["//visibility:public"]) 13 | 14 | py_test( 15 | name = "missing_patch_detection_hermetic_test", 16 | srcs = ["missing_patch_detection_hermetic_test.py"], 17 | data = [ 18 | "//vanir/testdata:test_gitiles_data", 19 | "//vanir/testdata:test_vulnerabilities", 20 | "//vanir/testdata:vanir_test_source_simplified", 21 | ], 22 | deps = [ 23 | "//:osv_client", 24 | "//:reporter", 25 | "//:signature", 26 | "//:vanir_test_base", 27 | "//:vulnerability", 28 | "//:vulnerability_manager", 29 | "//vanir/scanners:scanner_base", 30 | "//vanir/scanners:target_selection_strategy", 31 | "//vanir/testdata:test_signatures", 32 | requirement("absl-py"), 33 | requirement("requests"), 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /vanir/language_parsers/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Package containing all programming language parsers for Vanir. 8 | 9 | load("@rules_python//python:defs.bzl", "py_library", "py_test") 10 | 11 | package(default_visibility = ["//visibility:public"]) 12 | 13 | py_library( 14 | name = "abstract_language_parser", 15 | srcs = [ 16 | "abstract_language_parser.py", 17 | ], 18 | deps = [ 19 | ":common", 20 | ], 21 | ) 22 | 23 | py_library( 24 | name = "common", 25 | srcs = [ 26 | "common.py", 27 | ], 28 | ) 29 | 30 | py_library( 31 | name = "language_parsers", 32 | srcs = ["language_parsers.py"], 33 | deps = [ 34 | ":abstract_language_parser", 35 | ":common", 36 | "//vanir/language_parsers/cpp:cpp_parser", 37 | "//vanir/language_parsers/java:java_parser", 38 | ], 39 | ) 40 | -------------------------------------------------------------------------------- /vanir/language_parsers/abstract_language_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Abstract Parser base class that all parsers implement. 8 | """ 9 | 10 | import abc 11 | from typing import Optional, Sequence, Tuple 12 | 13 | from vanir.language_parsers import common 14 | 15 | 16 | class AbstractLanguageParser(abc.ABC): 17 | """Abstract language parser interface that all language parsers implement. 18 | 19 | A Parser object parses one file, optionally only on a set of select line 20 | ranges, and presents methods to extract function and line chunks to Vanir 21 | signature generation and scanning. 22 | 23 | A Parser supports a set of file extensions, given by each implementation as 24 | the return value of get_supported_extensions(). 25 | """ 26 | 27 | @classmethod 28 | @abc.abstractmethod 29 | def get_supported_extensions(cls) -> Sequence[str]: 30 | """Returns a list of supported file extensions. Should include the dot.""" 31 | 32 | def __init__(self, filename: str): 33 | """Construct the Parser object for given filename. 34 | 35 | Args: 36 | filename: the absolute path to the file to analyze. 37 | """ 38 | 39 | @abc.abstractmethod 40 | def get_chunks( 41 | self, 42 | affected_line_ranges_for_functions: Optional[ 43 | Sequence[Tuple[int, int]] 44 | ] = None, 45 | ) -> common.ParseResults: 46 | """Parse the file and return the line chunk and function chunks. 47 | 48 | Args: 49 | affected_line_ranges_for_functions: list of line ranges of interest to 50 | filter function chunks on. A parser should return only functions that 51 | contains at least one line in this range. If 52 | affected_line_ranges_for_functions is empty, return all functions. 53 | Return: A ParseResults object containing all the parsing output. 54 | """ 55 | -------------------------------------------------------------------------------- /vanir/language_parsers/common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Common data types for Vanir language parser.""" 8 | 9 | import dataclasses 10 | from typing import Mapping, Sequence 11 | 12 | 13 | @dataclasses.dataclass(frozen=True) 14 | class FunctionChunkBase: 15 | """Data class representing a function and metadata extracted by a parser. 16 | 17 | Attributes: 18 | name: function name 19 | return_types: sequence of return types. Each return type is a sequence of 20 | tokens representing the type. E.g. [['struct', 'foo'], ['int']] 21 | parameters: sequence of parameter names that the function accepts 22 | used_data_types: sequence of all data types used by the function. Similar 23 | to return_types, each entry is a sequence of tokens for the type. 24 | local_variables: sequence of names of all local variables. 25 | called_functions: sequence of other function names called. 26 | tokens: sequence of tokens consisting of the function body. 27 | """ 28 | name: str 29 | return_types: Sequence[Sequence[str]] 30 | parameters: Sequence[str] 31 | used_data_types: Sequence[Sequence[str]] 32 | local_variables: Sequence[str] 33 | called_functions: Sequence[str] 34 | tokens: Sequence[str] 35 | 36 | 37 | @dataclasses.dataclass(frozen=True) 38 | class LineChunkBase: 39 | """Data class for meaningful tokenized lines extracted by a parser.""" 40 | tokens: Mapping[int, Sequence[str]] 41 | 42 | 43 | @dataclasses.dataclass(frozen=True) 44 | class ParseError: 45 | """Data class for holding an error found during parsing.""" 46 | line: int 47 | column: int 48 | bad_token: str 49 | message: str 50 | 51 | 52 | @dataclasses.dataclass(frozen=True) 53 | class ParseResults: 54 | """Data class holding all parsing results (function/line chunk, errors).""" 55 | function_chunks: Sequence[FunctionChunkBase] 56 | line_chunk: LineChunkBase 57 | parse_errors: Sequence[ParseError] 58 | -------------------------------------------------------------------------------- /vanir/language_parsers/cpp/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Bazel build rules for Vanir parser core. 8 | load("@rules_python//python:defs.bzl", "py_library", "py_test") 9 | load("@vanir_deps//:requirements.bzl", "requirement") 10 | 11 | package(default_visibility = [ 12 | "//visibility:public", 13 | ]) 14 | 15 | cc_library( 16 | name = "parser_core", 17 | srcs = ["parser_core.cc"], 18 | hdrs = ["parser_core.h"], 19 | copts = [ 20 | # fexception is needed only for catching Antlr4 exceptions. 21 | "-fexceptions", 22 | ], 23 | features = ["-use_header_modules"], 24 | deps = [ 25 | "@com_google_absl//absl/status", 26 | "@com_google_absl//absl/status:statusor", 27 | "@com_google_absl//absl/strings", 28 | "@fuzzyc//:fuzzyc_cc_function", 29 | "@fuzzyc//:fuzzyc_cc_module", 30 | ], 31 | ) 32 | 33 | py_library( 34 | name = "cpp_parser", 35 | srcs = ["cpp_parser.py"], 36 | data = [ 37 | "//vanir/language_parsers/cpp/python:parser_core.so", 38 | "@pybind11_abseil//pybind11_abseil:status.so", 39 | ], 40 | deps = [ 41 | "//vanir/language_parsers:abstract_language_parser", 42 | "//vanir/language_parsers:common", 43 | requirement("absl-py"), 44 | ], 45 | ) 46 | 47 | py_test( 48 | name = "cpp_parser_test", 49 | size = "small", 50 | srcs = ["cpp_parser_test.py"], 51 | data = ["@pybind11_abseil//pybind11_abseil:status.so"], 52 | deps = [ 53 | ":cpp_parser", 54 | requirement("absl-py"), 55 | ], 56 | ) 57 | -------------------------------------------------------------------------------- /vanir/language_parsers/cpp/cpp_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir C/C++ parser. 8 | 9 | This module interfaces with the native Antlr FuzzyC parser. 10 | """ 11 | import os 12 | import tempfile 13 | from typing import Iterable, Optional, Sequence, Tuple 14 | 15 | from absl import logging 16 | from vanir.language_parsers import abstract_language_parser 17 | from vanir.language_parsers import common 18 | from vanir.language_parsers.cpp.python import parser_core 19 | 20 | from pybind11_abseil import status 21 | 22 | _ANTLR4_DECODE_ERROR = 'UTF-8 string contains an illegal byte sequence' 23 | _ALTNERNATIVE_ENCODINGS = ['LATIN-1'] 24 | 25 | 26 | class CppParser(abstract_language_parser.AbstractLanguageParser): 27 | """Vanir C/C++ parser. 28 | 29 | This class implements the AbstractLanguageParser base class. 30 | """ 31 | 32 | def __init__(self, filename: str): 33 | try: 34 | self.parser_core = parser_core.ParserCore(filename) 35 | self.parser_core.init() 36 | except status.StatusNotOk as e: 37 | if ( 38 | e.code == status.StatusCode.INVALID_ARGUMENT.value 39 | and e.message == _ANTLR4_DECODE_ERROR 40 | ): 41 | # If encoding problem, try again after converting to UTF-8. 42 | logging.info('%s is not encoded in UTF-8. Trying altneratives.') 43 | self._temp_filename = self._convert_to_utf8(filename) 44 | self.parser_core = parser_core.ParserCore(self._temp_filename) 45 | self.parser_core.init() 46 | else: 47 | raise e 48 | 49 | def __del__(self): 50 | if getattr(self, '_temp_filename', None): 51 | os.unlink(self._temp_filename) 52 | 53 | @classmethod 54 | def get_supported_extensions(cls) -> Iterable[str]: 55 | return ['.c', '.h', '.cc', '.hh', '.cpp', '.hpp', '.cxx', '.hxx'] 56 | 57 | @classmethod 58 | def _convert_to_utf8(cls, filename) -> str: 59 | """Creates a new file with UTF-8 encoding and returns the file name.""" 60 | for encoding in _ALTNERNATIVE_ENCODINGS: 61 | try: 62 | with open(filename, encoding=encoding, mode='r') as file: 63 | new_file = tempfile.NamedTemporaryFile( 64 | encoding='UTF-8', mode='w', delete=False 65 | ) 66 | new_file.write(file.read()) 67 | new_file.close() 68 | return new_file.name 69 | except ValueError: # Try other encodings on decoding failure 70 | continue 71 | raise ValueError( 72 | 'Failed to deocde %s. Tried encodings: UTF-8, %s' 73 | % (filename, ', '.join(_ALTNERNATIVE_ENCODINGS)) 74 | ) 75 | 76 | def _to_standard_function_chunk_base( 77 | self, chunk: parser_core.FunctionChunkRaw 78 | ) -> common.FunctionChunkBase: 79 | return common.FunctionChunkBase( 80 | chunk.name, 81 | [chunk.return_type], 82 | chunk.parameters, 83 | chunk.used_data_types, 84 | chunk.local_variables, 85 | chunk.called_functions, 86 | chunk.tokens, 87 | ) 88 | 89 | def get_chunks( 90 | self, 91 | affected_line_ranges_for_functions: Optional[ 92 | Sequence[Tuple[int, int]] 93 | ] = None, 94 | ) -> common.ParseResults: 95 | if affected_line_ranges_for_functions is None: 96 | affected_line_ranges_for_functions = [] 97 | function_chunks = [ 98 | self._to_standard_function_chunk_base(function_chunk_raw) 99 | for function_chunk_raw in self.parser_core.get_function_chunks( 100 | affected_line_ranges_for_functions) 101 | ] 102 | line_chunk = common.LineChunkBase(self.parser_core.get_line_chunk().tokens) 103 | errors = [ 104 | common.ParseError(e.line, e.column, e.bad_token, e.message) 105 | for e in self.parser_core.get_parse_errors() 106 | ] 107 | return common.ParseResults(function_chunks, line_chunk, errors) 108 | -------------------------------------------------------------------------------- /vanir/language_parsers/cpp/cpp_parser_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for parser_core Pybind wrapped by cpp_parser.""" 8 | 9 | from unittest import mock 10 | 11 | from absl import logging 12 | from vanir.language_parsers.cpp import cpp_parser 13 | 14 | from absl.testing import absltest 15 | from pybind11_abseil import status 16 | 17 | 18 | class ParserCoreTest(absltest.TestCase): 19 | 20 | def setUp(self): 21 | super().setUp() 22 | self.testcode = """ 23 | /* This code is for testing Vanir Parser. */ int test_globalvar = 10; 24 | int test_func1(const unsigned int64 test_arg) { 25 | const struct teststruct *testvar = (struct teststruct *)globalvar->data; 26 | test_func2(test_arg, testvar); // some comment. 27 | /* additional comment line. */ 28 | 0xe8(); // broken code -- won't be counted as func call. 29 | return 0; 30 | } 31 | void test_func_decl(int myarg); 32 | void test_func_def(int myarg) {} 33 | """ 34 | testfile = self.create_tempfile('testfile.c', content=self.testcode) 35 | self.test_filename = testfile.full_path 36 | self.expected_tokens = { 37 | 2: ['int', 'test_globalvar', '=', '10', ';'], 38 | 3: [ 39 | 'int', 'test_func1', '(', 'const', 'unsigned', 'int64', 'test_arg', 40 | ')', '{' 41 | ], 42 | 4: [ 43 | 'const', 'struct', 'teststruct', '*', 'testvar', '=', '(', 'struct', 44 | 'teststruct', '*', ')', 'globalvar', '->', 'data', ';' 45 | ], 46 | 5: ['test_func2', '(', 'test_arg', ',', 'testvar', ')', ';'], 47 | 7: ['0xe8', '(', ')', ';'], 48 | 8: ['return', '0', ';'], 49 | 9: ['}'], 50 | 10: ['void', 'test_func_decl', '(', 'int', 'myarg', ')', ';'], 51 | 11: ['void', 'test_func_def', '(', 'int', 'myarg', ')', '{', '}'] 52 | } 53 | 54 | def test_cpp_parser_with_line_limits(self): 55 | parser = cpp_parser.CppParser(self.test_filename) 56 | 57 | results = parser.get_chunks([(5, 7)]) 58 | self.assertEmpty(results.parse_errors) 59 | self.assertLen(results.function_chunks, 1) 60 | self.assertEqual(results.function_chunks[0].name, 'test_func1') 61 | self.assertEqual(results.function_chunks[0].return_types, [['int']]) 62 | self.assertEqual(results.function_chunks[0].parameters, ['test_arg']) 63 | self.assertEqual( 64 | results.function_chunks[0].used_data_types, 65 | [['const', 'unsigned', 'int64'], ['const', 'struct', 'teststruct'], 66 | ['struct', 'teststruct']]) 67 | self.assertEqual(results.function_chunks[0].local_variables, ['testvar']) 68 | self.assertEqual( 69 | results.function_chunks[0].called_functions, ['test_func2']) 70 | 71 | self.assertEqual(results.line_chunk.tokens, self.expected_tokens) 72 | 73 | def test_cpp_parser_without_line_limits(self): 74 | parser = cpp_parser.CppParser(self.test_filename) 75 | 76 | results = parser.get_chunks() 77 | self.assertEmpty(results.parse_errors) 78 | self.assertLen(results.function_chunks, 2) 79 | self.assertEqual(results.function_chunks[0].name, 'test_func1') 80 | self.assertEqual(results.function_chunks[0].parameters, ['test_arg']) 81 | self.assertEqual( 82 | results.function_chunks[0].used_data_types, 83 | [['const', 'unsigned', 'int64'], ['const', 'struct', 'teststruct'], 84 | ['struct', 'teststruct']]) 85 | self.assertEqual(results.function_chunks[0].local_variables, ['testvar']) 86 | self.assertEqual( 87 | results.function_chunks[0].called_functions, ['test_func2']) 88 | self.assertEqual(results.function_chunks[1].name, 'test_func_def') 89 | self.assertEqual(results.function_chunks[1].return_types, [['void']]) 90 | self.assertEqual(results.function_chunks[1].parameters, ['myarg']) 91 | 92 | self.assertEqual(results.line_chunk.tokens, self.expected_tokens) 93 | 94 | def test_cpp_parser_with_nonexistent_file_failure(self): 95 | filename = 'NonExistingFile.c' 96 | with self.assertRaisesRegex(status.StatusNotOk, 'Failed to open file:.*'): 97 | _ = cpp_parser.CppParser(filename) 98 | 99 | def test_cpp_parser_with_non_utf8_file(self): 100 | latin1_str = ' // \xE0' 101 | testfile = self.create_tempfile( 102 | 'testfile_latein1.c', 103 | content=self.testcode + latin1_str, 104 | encoding='LATIN-1', 105 | ) 106 | with self.assertLogs(level=logging.INFO) as logs: 107 | parser = cpp_parser.CppParser(testfile.full_path) 108 | results = parser.get_chunks([(5, 7)]) 109 | self.assertEmpty(results.parse_errors) 110 | self.assertLen(results.function_chunks, 1) 111 | self.assertIn( 112 | 'is not encoded in UTF-8. Trying altneratives.', logs.output[0] 113 | ) 114 | 115 | def test_cpp_parser_with_known_encoding_file(self): 116 | latin1_str = ' // \xE0' 117 | testfile = self.create_tempfile( 118 | 'testfile_latein1.c', 119 | content=self.testcode + latin1_str, 120 | encoding='LATIN-1', 121 | ) 122 | # Delete latin-1 from the alternative encoding. 123 | with mock.patch.object(cpp_parser, '_ALTNERNATIVE_ENCODINGS', []): 124 | with self.assertRaisesRegex(ValueError, 'Failed to deocde'): 125 | cpp_parser.CppParser(testfile.full_path) 126 | 127 | 128 | if __name__ == '__main__': 129 | absltest.main() 130 | -------------------------------------------------------------------------------- /vanir/language_parsers/cpp/python/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Build rule for binding C++ parser core to python using Pybind. 8 | 9 | load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") 10 | 11 | package(default_visibility = [ 12 | "//visibility:public", 13 | ]) 14 | 15 | pybind_extension( 16 | name = "parser_core", 17 | srcs = ["parser_core.cc"], 18 | deps = [ 19 | "//vanir/language_parsers/cpp:parser_core", 20 | "@pybind11_abseil//pybind11_abseil:absl_casters", 21 | "@pybind11_abseil//pybind11_abseil:status_casters", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /vanir/language_parsers/cpp/python/parser_core.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd 6 | 7 | #include "vanir/language_parsers/cpp/parser_core.h" 8 | 9 | #include "pybind11/pybind11.h" 10 | #include "pybind11_abseil/absl_casters.h" 11 | #include "pybind11_abseil/status_casters.h" 12 | 13 | namespace vanir { 14 | namespace cpp_parser { 15 | namespace { 16 | 17 | PYBIND11_MODULE(parser_core, m) { 18 | pybind11::google::ImportStatusModule(); 19 | pybind11::class_(m, "FunctionChunkRaw") 20 | .def_readwrite("name", &FunctionChunk::name_) 21 | .def_readwrite("return_type", &FunctionChunk::return_type_) 22 | .def_readwrite("parameters", &FunctionChunk::parameters_) 23 | // Since individual data type element can be used after freeing the owner 24 | // chunk in Python, access to used_data_types_ transfers the ownership of 25 | // individual data type element to Python. 26 | .def_property_readonly("used_data_types", 27 | [](const FunctionChunk& func_chunk) { 28 | auto used_data_types = pybind11::list(); 29 | for (auto& data_type : func_chunk.used_data_types_) { 30 | used_data_types.append(pybind11::cast( 31 | *data_type, pybind11::return_value_policy::take_ownership)); 32 | } 33 | return used_data_types; 34 | }) 35 | .def_readwrite("local_variables", &FunctionChunk::local_variables_) 36 | .def_readwrite("called_functions", &FunctionChunk::called_functions_) 37 | .def_readwrite("tokens", &FunctionChunk::tokens_); 38 | 39 | pybind11::class_(m, "LineChunkRaw") 40 | .def_readwrite("tokens", &LineChunk::tokens_); 41 | 42 | pybind11::class_(m, "ParseErrorRaw") 43 | .def_readonly("line", &ParseError::line) 44 | .def_readonly("column", &ParseError::column) 45 | .def_readonly("bad_token", &ParseError::bad_token) 46 | .def_readonly("message", &ParseError::message); 47 | 48 | // GetFunctionChunks and GetLineChunk transfer the ownership. 49 | pybind11::class_(m, "ParserCore") 50 | .def(pybind11::init()) 51 | .def("init", &ParserCore::Init) 52 | .def("get_function_chunks", &ParserCore::GetFunctionChunks, 53 | pybind11::arg("affected_line_ranges")) 54 | .def("get_line_chunk", &ParserCore::GetLineChunk) 55 | .def("get_parse_errors", &ParserCore::GetParseErrors); 56 | } 57 | 58 | } // namespace 59 | } // namespace cpp_parser 60 | } // namespace vanir 61 | -------------------------------------------------------------------------------- /vanir/language_parsers/java/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | load("@rules_python//python:defs.bzl", "py_library", "py_test") 8 | load("@vanir_deps//:requirements.bzl", "requirement") 9 | load(":antlr4.external.bzl", "antlr4_cc_gen") 10 | 11 | package(default_visibility = [ 12 | "//visibility:public", 13 | ]) 14 | 15 | ANTLR4_VER = "4.11.1" 16 | 17 | antlr4_cc_gen( 18 | name = "java_cc_lexer", 19 | srcs = ["@antlr4_grammar_java_lexer_g4//file"], 20 | antlr4_ver = ANTLR4_VER, 21 | cc_files_prefix = "JavaLexer", 22 | cc_namespace = "java_cc_lexer", 23 | listener = False, 24 | ) 25 | 26 | antlr4_cc_gen( 27 | name = "java_cc_parser", 28 | srcs = [ 29 | "@antlr4_grammar_java_lexer_g4//file", 30 | "@antlr4_grammar_java_parser_g4//file", 31 | ], 32 | antlr4_ver = ANTLR4_VER, 33 | cc_files_prefix = "JavaParser", 34 | cc_namespace = "java_cc_parser", 35 | listener = True, 36 | ) 37 | 38 | cc_library( 39 | name = "parser_core", 40 | srcs = ["parser_core.cc"], 41 | hdrs = ["parser_core.h"], 42 | deps = [ 43 | ":java_cc_lexer", 44 | ":java_cc_parser", 45 | "@com_google_absl//absl/log:check", 46 | "@com_google_absl//absl/status:statusor", 47 | ], 48 | ) 49 | 50 | py_library( 51 | name = "java_parser", 52 | srcs = ["java_parser.py"], 53 | data = [ 54 | "//vanir/language_parsers/java/python:parser_core.so", 55 | ], 56 | deps = [ 57 | "//vanir/language_parsers:abstract_language_parser", 58 | "//vanir/language_parsers:common", 59 | ], 60 | ) 61 | 62 | py_test( 63 | name = "java_parser_test", 64 | size = "small", 65 | srcs = ["java_parser_test.py"], 66 | data = ["@pybind11_abseil//pybind11_abseil:status.so"], 67 | deps = [ 68 | ":java_parser", 69 | requirement("absl-py"), 70 | ], 71 | ) 72 | -------------------------------------------------------------------------------- /vanir/language_parsers/java/antlr4.external.bzl: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Starlark macro to wrap Antlr4 code and library generation from grammar files.""" 8 | 9 | load("@antlr4_deps//:requirements.bzl", "requirement") 10 | 11 | def antlr4_cc_gen(name, srcs, cc_namespace, cc_files_prefix, antlr4_ver, listener): 12 | """Generates the C++ source corresponding to an Antlr4 lexer definition. 13 | 14 | Args: 15 | name: name of the parser/lexer library target 16 | srcs: grammar files 17 | cc_namespace: C++ namespace to put the parser/lexer under 18 | cc_files_prefix: prefix for all generated C++ files 19 | antlr4_ver: specify antlr4 tools version 20 | listener: whether to generate antlr4 listener classes 21 | """ 22 | 23 | out_src_files = [ 24 | "%s.h" % cc_files_prefix, 25 | "%s.cpp" % cc_files_prefix, 26 | ] 27 | if listener: 28 | out_src_files += [ 29 | "%sBaseListener.h" % cc_files_prefix, 30 | "%sBaseListener.cpp" % cc_files_prefix, 31 | "%sListener.h" % cc_files_prefix, 32 | "%sListener.cpp" % cc_files_prefix, 33 | ] 34 | extra_args = "-listener" if listener else "-no-listener" 35 | cmd = ( 36 | "VANIR_ANTLR_TMPDIR=$$(mktemp -d);" + 37 | "$(locations @antlr4_entry_points//:antlr4) " + 38 | "-v " + antlr4_ver + " " + 39 | "$(SRCS) " + 40 | "-no-visitor " + 41 | "-Dlanguage=Cpp " + 42 | "-package " + cc_namespace + " " + 43 | "-o $$VANIR_ANTLR_TMPDIR " + 44 | "-Xexact-output-dir " + 45 | extra_args + ";" + 46 | "cp " + " ".join([("$$VANIR_ANTLR_TMPDIR/" + f) for f in out_src_files]) + " $(@D);" + 47 | "rm -r $$VANIR_ANTLR_TMPDIR" 48 | ) 49 | 50 | native.genrule( 51 | name = name + "_src", 52 | srcs = srcs, 53 | outs = out_src_files, 54 | cmd = cmd, 55 | local = True, 56 | tools = [ 57 | requirement("antlr4-tools"), 58 | "@antlr4_entry_points//:antlr4", 59 | ], 60 | ) 61 | native.cc_library( 62 | name = name, 63 | srcs = [(":" + f) for f in out_src_files if f.endswith(".cpp")], 64 | hdrs = [(":" + f) for f in out_src_files if f.endswith(".h")], 65 | deps = [ 66 | ":{target}_src".format(target = name), 67 | "@antlr4_runtimes//:cpp", 68 | ], 69 | linkstatic = 1, 70 | ) 71 | -------------------------------------------------------------------------------- /vanir/language_parsers/java/java_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir Java parser. 8 | 9 | This module implements an AbstractLanguageParser that handles all .java files. 10 | """ 11 | 12 | from typing import Iterable, Optional, Sequence, Tuple 13 | 14 | from vanir.language_parsers import abstract_language_parser 15 | from vanir.language_parsers import common 16 | from vanir.language_parsers.java.python import parser_core 17 | 18 | 19 | class JavaParser(abstract_language_parser.AbstractLanguageParser): 20 | """Vanir Java parser. 21 | 22 | This class implements the AbstractLanguageParser base class. 23 | """ 24 | 25 | def __init__(self, filename: str): 26 | self.parser = parser_core.ParserCore(filename) 27 | 28 | @classmethod 29 | def get_supported_extensions(cls) -> Iterable[str]: 30 | return ['.java'] 31 | 32 | def get_chunks( 33 | self, 34 | affected_line_ranges_for_functions: Optional[ 35 | Sequence[Tuple[int, int]] 36 | ] = None, 37 | ) -> common.ParseResults: 38 | if not affected_line_ranges_for_functions: 39 | affected_line_ranges_for_functions = [] 40 | 41 | function_chunks_raw, line_chunk_raw, errors_raw = self.parser.parse( 42 | affected_line_ranges_for_functions) 43 | function_chunks = [] 44 | for function_chunk_raw in function_chunks_raw: 45 | function_chunks.append( 46 | common.FunctionChunkBase( 47 | name=function_chunk_raw.name, 48 | return_types=[function_chunk_raw.return_type], 49 | parameters=function_chunk_raw.parameters, 50 | used_data_types=function_chunk_raw.used_data_types, 51 | local_variables=function_chunk_raw.local_variables, 52 | called_functions=function_chunk_raw.called_functions, 53 | tokens=function_chunk_raw.tokens, 54 | ) 55 | ) 56 | 57 | errors = [] 58 | for error_raw in errors_raw: 59 | errors.append(common.ParseError( 60 | error_raw.line, error_raw.column, 61 | error_raw.bad_token, 62 | error_raw.message)) 63 | 64 | return common.ParseResults( 65 | function_chunks, common.LineChunkBase(line_chunk_raw.tokens_), errors) 66 | -------------------------------------------------------------------------------- /vanir/language_parsers/java/parser_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Google LLC 3 | * 4 | * Use of this source code is governed by a BSD-style 5 | * license that can be found in the LICENSE file or at 6 | * https://developers.google.com/open-source/licenses/bsd 7 | */ 8 | 9 | #ifndef VANIR_LANGUAGE_PARSERS_JAVA_PARSER_CORE_H_ 10 | #define VANIR_LANGUAGE_PARSERS_JAVA_PARSER_CORE_H_ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "absl/status/statusor.h" 18 | #include "vanir/language_parsers/java/JavaParser.h" 19 | #include "vanir/language_parsers/java/JavaParserBaseListener.h" 20 | 21 | namespace vanir { 22 | namespace java_parser { 23 | 24 | using ::java_cc_parser::JavaParser; 25 | using ::java_cc_parser::JavaParserBaseListener; 26 | 27 | // Container for a function and its metadata extracted by the parser 28 | class FunctionChunk { 29 | public: 30 | explicit FunctionChunk() 31 | : line_start_(0), line_stop_(0), start_token_idx_(0), stop_token_idx_(0) 32 | {} 33 | 34 | std::string name_; 35 | std::vector return_type_; 36 | std::vector parameters_; 37 | std::vector> used_data_types_; 38 | std::vector local_variables_; 39 | std::vector called_functions_; 40 | std::vector tokens_; 41 | size_t line_start_, line_stop_; 42 | size_t start_token_idx_, stop_token_idx_; 43 | 44 | private: 45 | FunctionChunk(const FunctionChunk &) = delete; 46 | FunctionChunk(FunctionChunk &&) = delete; 47 | FunctionChunk &operator=(const FunctionChunk &) = delete; 48 | FunctionChunk &operator=(FunctionChunk &&) = delete; 49 | }; 50 | 51 | // LineChunk is a wrapper class for a map from line numbers to all tokens in 52 | // that line. This is needed instead of a simple type alias because pybind's 53 | // automatic conversion of wrappers (e.g. unique_ptr) only supports custom 54 | // types, and not e.g. unordered_map. 55 | class LineChunk { 56 | public: 57 | explicit LineChunk() {} 58 | std::unordered_map> tokens_; 59 | 60 | private: 61 | LineChunk(const LineChunk &) = delete; 62 | LineChunk(LineChunk &&) = delete; 63 | LineChunk &operator=(const LineChunk &) = delete; 64 | LineChunk &operator=(LineChunk &&) = delete; 65 | }; 66 | 67 | // Container for any error encountered during parsing 68 | struct ParseError { 69 | size_t line, column; 70 | std::string bad_token; 71 | std::string message; 72 | }; 73 | 74 | // Antlr4 parser tree walking listener. 75 | class FileListener : public JavaParserBaseListener { 76 | public: 77 | explicit FileListener( 78 | antlr4::BufferedTokenStream &tokens, 79 | std::vector> function_line_ranges = {}) 80 | : token_stream_(tokens), function_line_ranges_(function_line_ranges) {} 81 | 82 | void enterMethodDeclaration(JavaParser::MethodDeclarationContext*) override; 83 | void exitMethodDeclaration(JavaParser::MethodDeclarationContext*) override; 84 | void enterConstructorDeclaration( 85 | JavaParser::ConstructorDeclarationContext*) override; 86 | void exitConstructorDeclaration( 87 | JavaParser::ConstructorDeclarationContext*) override; 88 | 89 | void enterTypeType(JavaParser::TypeTypeContext*) override; 90 | 91 | void enterLocalVariableDeclaration( 92 | JavaParser::LocalVariableDeclarationContext*) override; 93 | 94 | void enterMethodCall(JavaParser::MethodCallContext*) override; 95 | void enterCreator(JavaParser::CreatorContext*) override; 96 | 97 | std::vector> GetFunctionChunks(); 98 | 99 | private: 100 | antlr4::BufferedTokenStream &token_stream_; 101 | 102 | // last_type_token_stop_idx_ holds the last token in a typeType, so that we 103 | // can ignore all other nested typeType, e.g. `ArrayList` should only 104 | // manifest as a single used datatype instead of two. 105 | size_t last_type_token_stop_idx_ = 0; 106 | std::vector> function_line_ranges_; 107 | std::stack> chunks_stack_; 108 | std::vector> function_chunks_; 109 | 110 | // Helper function to collect various information bits into a FunctionChunk 111 | // ctx can be either a MethodDeclarationContext or a 112 | // ConstructorDeclarationContext, both should have all the needed information. 113 | std::unique_ptr ToFunctionChunk( 114 | std::variant ctx); 116 | 117 | // Pops a function chunk from the chunks stack, checking to make sure the 118 | // function chunk being popped is the same as the one being processed, then 119 | // adds the chunk to the list of function_chunks_. 120 | void PopChunk(size_t start_token_idx, size_t stop_token_idx); 121 | }; 122 | 123 | // Listener for errors during parsing 124 | class ErrorListener : public antlr4::BaseErrorListener { 125 | public: 126 | explicit ErrorListener(std::string identifier) : identifier_(identifier) {} 127 | void syntaxError( 128 | antlr4::Recognizer* recognizer, 129 | antlr4::Token* offendingSymbol, 130 | size_t line, size_t charPositionInLine, 131 | const std::string &msg, 132 | std::exception_ptr e) override; 133 | 134 | std::vector> errors_; 135 | 136 | private: 137 | const std::string identifier_; 138 | }; 139 | 140 | // Parses Java files and extract functions, metadata, and tokens split by lines. 141 | class ParserCore { 142 | public: 143 | // Instantiate Parser for file located at |file_path| 144 | explicit ParserCore(std::string file_path); 145 | 146 | // Parses the given file and returns a list of function chunks and tokens map 147 | // split by lines. 148 | // If |affected_line_ranges_for_functions| is empty, parses all functions. 149 | // Otherwise, parse, only the functions that has at least one line in one of 150 | // the ranges. Ranges are inclusive. 151 | absl::StatusOr>, 153 | std::unique_ptr, 154 | std::vector> 155 | >> 156 | Parse(std::vector> 157 | affected_line_ranges_for_functions = {}); 158 | 159 | private: 160 | std::string file_path_; 161 | }; 162 | 163 | } // namespace java_parser 164 | } // namespace vanir 165 | 166 | #endif // VANIR_LANGUAGE_PARSERS_JAVA_PARSER_CORE_H_ 167 | -------------------------------------------------------------------------------- /vanir/language_parsers/java/python/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Build rule for binding C++ parser core to python using Pybind. 8 | 9 | load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") 10 | 11 | package(default_visibility = [ 12 | "//visibility:public", 13 | ]) 14 | 15 | pybind_extension( 16 | name = "parser_core", 17 | srcs = ["parser_core.cc"], 18 | deps = [ 19 | "//vanir/language_parsers/java:parser_core", 20 | "@pybind11_abseil//pybind11_abseil:absl_casters", 21 | "@pybind11_abseil//pybind11_abseil:status_casters", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /vanir/language_parsers/java/python/parser_core.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd 6 | 7 | #include "vanir/language_parsers/java/parser_core.h" 8 | 9 | #include "pybind11/pybind11.h" 10 | #include "pybind11/stl.h" 11 | #include "pybind11_abseil/absl_casters.h" 12 | #include "pybind11_abseil/status_casters.h" 13 | 14 | namespace vanir { 15 | namespace java_parser { 16 | namespace { 17 | 18 | PYBIND11_MODULE(parser_core, m) { 19 | pybind11::google::ImportStatusModule(); 20 | pybind11::class_(m, "FunctionChunkRaw") 21 | .def_readonly("name", &FunctionChunk::name_) 22 | .def_readonly("return_type", &FunctionChunk::return_type_) 23 | .def_readonly("parameters", &FunctionChunk::parameters_) 24 | .def_readonly("used_data_types", &FunctionChunk::used_data_types_) 25 | .def_readonly("local_variables", &FunctionChunk::local_variables_) 26 | .def_readonly("called_functions", &FunctionChunk::called_functions_) 27 | .def_readonly("tokens", &FunctionChunk::tokens_) 28 | .def_readonly("start_line", &FunctionChunk::line_start_) 29 | .def_readonly("end_line", &FunctionChunk::line_stop_); 30 | 31 | pybind11::class_(m, "LineChunkRaw") 32 | .def_readonly("tokens_", &LineChunk::tokens_); 33 | 34 | pybind11::class_(m, "ParseErrorRaw") 35 | .def_readonly("line", &ParseError::line) 36 | .def_readonly("column", &ParseError::column) 37 | .def_readonly("bad_token", &ParseError::bad_token) 38 | .def_readonly("message", &ParseError::message); 39 | 40 | pybind11::class_(m, "ParserCore") 41 | .def(pybind11::init()) 42 | .def("parse", &ParserCore::Parse, 43 | pybind11::arg("affected_line_ranges_for_functions")); 44 | } 45 | 46 | } // namespace 47 | } // namespace java_parser 48 | } // namespace vanir 49 | -------------------------------------------------------------------------------- /vanir/language_parsers/language_parsers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Collection of Parsers to extract function/line blocks from the code snippets. 8 | 9 | This module is the common entry point for parsers of different languages that 10 | Vanir supports. A Language Parser implements the AbstractLanguageParser class, 11 | and which parser to use for a particular file will be selected automatically 12 | by file extensions. 13 | """ 14 | 15 | import os 16 | from typing import Optional, Sequence, Tuple, Type, TypeVar 17 | 18 | from vanir.language_parsers import abstract_language_parser 19 | from vanir.language_parsers import common 20 | 21 | # Simply importing the parsers will register them as subclasses of the abstract 22 | # parser class and therefore available for use. 23 | # pylint: disable=unused-import 24 | from vanir.language_parsers.cpp import cpp_parser 25 | from vanir.language_parsers.java import java_parser 26 | # pylint: enable=unused-import 27 | 28 | _P = TypeVar('_P', bound=abstract_language_parser.AbstractLanguageParser) 29 | 30 | 31 | def get_parser_class(filename: str) -> Optional[Type[_P]]: 32 | """Returns the language parser class that handles the given file, or None.""" 33 | parsers = abstract_language_parser.AbstractLanguageParser.__subclasses__() 34 | ext = os.path.splitext(filename)[1] 35 | for parser_class in parsers: 36 | if ext in parser_class.get_supported_extensions(): 37 | return parser_class 38 | return None 39 | 40 | 41 | def parse_file( 42 | filename: str, 43 | functions_line_ranges: Optional[Sequence[Tuple[int, int]]] = None, 44 | ) -> common.ParseResults: 45 | """Parses the given file and extract function and line chunks. 46 | 47 | Args: 48 | filename: the absolute path to the file to analyze. 49 | functions_line_ranges: list of line ranges of interest to filter function 50 | chunks on. 51 | 52 | Returns: 53 | A tuple of function and line chunks extracted by the language parser, as 54 | well as a Sequence of errors returned by the parser. 55 | """ 56 | parser_class = get_parser_class(filename) 57 | if not parser_class: 58 | raise NotImplementedError(f'File {filename} is not supported.') 59 | 60 | return parser_class(filename).get_chunks(functions_line_ranges) 61 | -------------------------------------------------------------------------------- /vanir/normalizer_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for normalizer.""" 8 | 9 | import json 10 | 11 | from vanir import normalizer 12 | from vanir.language_parsers import common 13 | 14 | from absl.testing import absltest 15 | 16 | 17 | class NormalizerTest(absltest.TestCase): 18 | 19 | def test_token_trie_insert_entry(self): 20 | token_trie = normalizer._TokenTrie() 21 | token_trie.insert_entry(['a', 'b', 'c', 'd', 'e'], 'V1') 22 | token_trie.insert_entry(['a', 'b'], 'V2') 23 | token_trie.insert_entry(['c', 'd', '', 'e'], 'V3') 24 | token_trie.insert_entry(['c', 'd'], 'V4') 25 | token_trie.insert_entry(['c', 'd'], 'V5') 26 | token_trie.insert_entry(['', '', ''], 'invalid entry') 27 | token_trie.insert_entry([], 'invalid entry') 28 | 29 | # Strip off defaultdict wraps using json. 30 | internal_trie_dict = json.loads(json.dumps(token_trie._trie)) 31 | expected_dict = {'a': {'b': {'c': {'d': {'e': {'0': 'V1'}}}, '0': 'V2'}}, 32 | 'c': {'d': {'e': {'0': 'V3'}, '0': 'V5'}}} 33 | 34 | self.assertEqual(internal_trie_dict, expected_dict) 35 | 36 | def test_token_trie_normalized_tokens(self): 37 | token_trie = normalizer._TokenTrie() 38 | token_trie.insert_entry(['a', 'b', 'c', 'd', 'e'], 'V1') 39 | token_trie.insert_entry(['a', 'b'], 'V2') 40 | token_trie.insert_entry(['c', 'd', 'e'], 'V3') 41 | token_trie.insert_entry(['c', 'd'], 'V4') 42 | 43 | test_token_stream = 'a b c d e a b c d e a b c a b c d x y z'.split() 44 | test_token_stream.insert(5, '') 45 | normalized_tokens = ' '.join( 46 | token_trie.generate_normalized_tokens(test_token_stream)) 47 | expected_normalized_tokens = 'V1 V1 V2 c V2 V4 x y z' 48 | self.assertEqual(normalized_tokens, expected_normalized_tokens) 49 | 50 | def test_token_trie_get_next_normalized_token_makes_boundary_error(self): 51 | # The exception in _get_next_normalized_token() is not reachable with 52 | # public methods, so we directly run the function for this exception test. 53 | token_trie = normalizer._TokenTrie() 54 | with self.assertRaisesRegex( 55 | IndexError, r'Index:3 is out of boundary of tokens \(max:2\)'): 56 | token_trie._get_next_normalized_token(['a', 'b', 'c'], index=3) 57 | 58 | def test_function_chunk_normalization(self): 59 | test_chunk_base = common.FunctionChunkBase( 60 | name='test_func1', 61 | return_types=[['int']], 62 | parameters=['test_arg'], 63 | used_data_types=[['const', 'unsigned', '', 'int64'], 64 | ['const', 'struct', 'teststruct'], 65 | ['struct', 'teststruct']], 66 | local_variables=['testvar'], 67 | called_functions=['test_func2'], 68 | tokens=( 69 | 'int test_func1 ( const unsigned int64 test_arg ) { const struct ' 70 | 'teststruct * testvar = ( struct teststruct * ) globalVar -> data ' 71 | '; test_func2 ( test_arg , testvar ) ; 0xe8 ( ) ; return 0 ; }' 72 | ).split(), 73 | ) 74 | 75 | normalized_code = normalizer.normalize_function_chunk(test_chunk_base) 76 | 77 | expected_normalized_code = ( 78 | 'DTYPE FUNCNAME ( const unsigned DTYPE PARAM ) { const DTYPE ' 79 | '* VAR = ( DTYPE * ) globalvar -> data ' 80 | '; FUNCCALL ( PARAM , VAR ) ; 0xe8 ( ) ; return 0 ; }') 81 | self.assertEqual(normalized_code, expected_normalized_code) 82 | 83 | def test_line_chunk_normalization(self): 84 | test_chunk_base = common.LineChunkBase( 85 | tokens={ 86 | 3: ['void', 'testFUNC', '(', 'int64', 'test_arg', ')', '{'], 87 | 4: [ 88 | 'printk', '(', 'KERN_INFO', '"', '%d', '"', ',', 'test_arg', 89 | ')', ';', '}' 90 | ], 91 | }, 92 | ) 93 | 94 | normalized_code = normalizer.normalize_line_chunk(test_chunk_base) 95 | 96 | expected_normalized_code = { 97 | 3: 'void testfunc ( int64 test_arg ) {', 98 | 4: 'printk ( kern_info " %d " , test_arg ) ; }', 99 | } 100 | self.assertEqual(normalized_code, expected_normalized_code) 101 | 102 | 103 | if __name__ == '__main__': 104 | absltest.main() 105 | -------------------------------------------------------------------------------- /vanir/osv_client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """This module abstracts the usage of OSV APIs for retrieving Android CVEs.""" 8 | 9 | import enum 10 | import io 11 | import json 12 | from typing import Any, Dict, Optional, Sequence 13 | import zipfile 14 | 15 | from absl import flags 16 | import requests 17 | 18 | 19 | _OSV_PROD_URL_BASE = 'https://api.osv.dev/v1/' 20 | 21 | _OSV_API_KEY = 'AIzaSyAVJKt1YY0yNWHz2TBZU6Hj1nAJq1O-9Gc' 22 | 23 | _OSV_QUERY_POSTFIX = 'query' 24 | _OSV_VULNERABILITY_POSTFIX = 'vulns' 25 | 26 | _OSV_VULNERABILITIES = 'vulns' 27 | _OSV_NEXT_PAGE_TOKEN = 'next_page_token' 28 | _ANDROID_ECOSYSTEM = 'Android' 29 | 30 | _ANDROID_COMPONENT_KERNEL = ':linux_kernel:' 31 | 32 | _KNOWN_SOC_SUBCOMPONENTS = ( 33 | 'AMLogic', 34 | 'ARM', 35 | 'Broadcom', 36 | 'MediaTek', 37 | 'Marvell', 38 | 'NVIDIA', 39 | 'Qualcomm', 40 | 'Unisoc', 41 | ) 42 | 43 | ANDROID_KERNEL_PACKAGES = (_ANDROID_COMPONENT_KERNEL,) + tuple( 44 | _ANDROID_COMPONENT_KERNEL + soc_subcomponent 45 | for soc_subcomponent in _KNOWN_SOC_SUBCOMPONENTS) 46 | 47 | _OSV_LINK_PREFIX = 'https://osv.dev/vulnerability/' 48 | 49 | # https://google.github.io/osv.dev/data/#data-dumps 50 | _OSV_ZIP_URL = ( 51 | 'https://osv-vulnerabilities.storage.googleapis.com/{ecosystem}/all.zip' 52 | ) 53 | 54 | 55 | def get_osv_url(osv_id: str) -> str: 56 | """Returns public OSV URL for the given OSV entry.""" 57 | return _OSV_LINK_PREFIX + osv_id 58 | 59 | 60 | class OsvClient: 61 | """Class to abstract OSV APIs for retrieving Android CVEs.""" 62 | 63 | def __init__(self, session: Optional[requests.sessions.Session] = None): 64 | if not session: 65 | session = requests.session() 66 | self._session = session 67 | self._osv_url_base = _OSV_PROD_URL_BASE 68 | 69 | def get_vuln(self, osv_id: str) -> Dict[str, Any]: 70 | """Retrieve specific vulnerability for the given OSV ID from OSV.""" 71 | osv_vulnerability_url = '%s%s/%s?key=%s' % ( 72 | self._osv_url_base, 73 | _OSV_VULNERABILITY_POSTFIX, 74 | osv_id, 75 | _OSV_API_KEY, 76 | ) 77 | response = self._session.get(osv_vulnerability_url) 78 | return json.loads(response.text) 79 | 80 | def get_vulns_for_packages( 81 | self, ecosystem: str, package_names: Sequence[str] 82 | ) -> list[Dict[str, Any]]: 83 | """Retrieve all vulns in the given ecosystem and package list from OSV.""" 84 | vulnerabilities = [] 85 | for package_name in package_names: 86 | osv_query_url = '%s%s?key=%s' % ( 87 | self._osv_url_base, 88 | _OSV_QUERY_POSTFIX, 89 | _OSV_API_KEY, 90 | ) 91 | payload = { 92 | 'package': { 93 | 'ecosystem': ecosystem, 94 | 'name': package_name, 95 | } 96 | } 97 | while True: 98 | response = self._session.post(osv_query_url, data=json.dumps(payload)) 99 | osv_data = json.loads(response.text) 100 | vulnerabilities += osv_data.get(_OSV_VULNERABILITIES, []) 101 | next_page_token = osv_data.get(_OSV_NEXT_PAGE_TOKEN, None) 102 | if not next_page_token: 103 | break 104 | payload['page_token'] = next_page_token 105 | 106 | return vulnerabilities 107 | 108 | def get_vulns_for_ecosystem(self, ecosystem: str) -> list[Dict[str, Any]]: 109 | """Retrieve all vulns in the given ecosystem from OSV.""" 110 | vulnerabilities = [] 111 | response = self._session.get(_OSV_ZIP_URL.format(ecosystem=ecosystem)) 112 | response.raise_for_status() 113 | zip_file = zipfile.ZipFile(io.BytesIO(response.content)) 114 | for filename in zip_file.namelist(): 115 | if filename.endswith('.json'): 116 | vulnerabilities.append(json.loads(zip_file.read(filename))) 117 | return vulnerabilities 118 | -------------------------------------------------------------------------------- /vanir/overwrite_specs_validity_test.py: -------------------------------------------------------------------------------- 1 | """The test to validate the overwrite specs file. 2 | 3 | Use this test to validate the overwrite specs file. 4 | The test makes sure: 5 | 1. The overwrite specs are valid. 6 | 2. 'Reason' field is provided. 7 | 3. Each spec matches a vulnerability (if signature files are provided). 8 | 4. Each 'replace' operation has a JSON Path that yields a match (if signature 9 | files are provided). 10 | 11 | For future reference, each overwrite spec should include a reason for the 12 | overwrite. 13 | 14 | If you provide a list of signature files, the test will also verify that each 15 | spec has a matching vulnerability ID, ensuring accuracy and preventing typos. 16 | The test will verify that each 'replace' operation has a valid 17 | JSONPath expression that yields a match, ensuring the expression is not 18 | mistyped. Note that empty matches at runtime do not raise an error. 19 | """ 20 | 21 | from collections.abc import Mapping 22 | from collections.abc import Sequence 23 | import json 24 | import logging 25 | from typing import Any 26 | from absl import flags 27 | from vanir import vulnerability_overwriter 28 | from absl.testing import absltest 29 | 30 | _OVERWRITE_SPECS = flags.DEFINE_string( 31 | 'overwrite_specs', 32 | None, 33 | 'Path to a file containing vulnerability overwrite specs. The file should ' 34 | 'be a JSON array with vulnerability_overwriter.OverwriteSpec objects.', 35 | required=True, 36 | ) 37 | 38 | _SIGNATURE_FILES = flags.DEFINE_multi_string( 39 | 'signature_files', 40 | None, 41 | 'List of files containing vulnerability signatures to ensure that overwrite' 42 | ' specs have a matching vulnerability id.', 43 | required=False, 44 | ) 45 | 46 | _NO_SIGNATURE_VALIDATION = flags.DEFINE_bool( 47 | 'no_signature_validation', 48 | False, 49 | 'Skip validation of overwrite specs against signature files.', 50 | required=False, 51 | ) 52 | 53 | 54 | class OverwriteSpecValidityTest(absltest.TestCase): 55 | 56 | def _load_from_signature_files(self) -> Sequence[dict[str, Any]]: 57 | """Loads signature files and returns a list of OSV vulnerability objects.""" 58 | 59 | vulnerabilities = [] 60 | if _SIGNATURE_FILES.value: 61 | for signature_file in _SIGNATURE_FILES.value: 62 | logging.info('Loading vulnerabilities from %s', signature_file) 63 | with open(signature_file) as f: 64 | signature_file_content = f.read() 65 | vulnerabilities.extend(json.loads(signature_file_content)) 66 | return vulnerabilities 67 | 68 | def _to_specs_map( 69 | self, 70 | overwrite_specs: Sequence[vulnerability_overwriter.OverwriteSpec], 71 | ) -> Mapping[str, vulnerability_overwriter.OverwriteSpec]: 72 | """Converts a list of overwrite specs to a map keyed by vulnerability ID. 73 | 74 | Args: 75 | overwrite_specs: A list of overwrite specs. 76 | 77 | Returns: 78 | A map of overwrite specs, keyed by vulnerability ID. 79 | """ 80 | overwrite_specs_map = {} 81 | for spec in overwrite_specs: 82 | if spec.vuln_id in overwrite_specs_map: 83 | self.fail( 84 | 'Found multiple specs for vulnerability' 85 | f' "{spec.vuln_id}". There should be only one spec for a' 86 | ' vulnerability' 87 | ) 88 | else: 89 | overwrite_specs_map[spec.vuln_id] = spec 90 | return overwrite_specs_map 91 | 92 | def test_validate_overwrite_spec(self): 93 | 94 | # Validate overwrite specs during loading and raise an error if 95 | # the file or its contents are invalid. 96 | overwrite_specs_file_path = _OVERWRITE_SPECS.value 97 | overwrite_specs = vulnerability_overwriter.load_overwrite_specs_from_file( 98 | overwrite_specs_file_path 99 | ) 100 | 101 | # Need a map to quickly find the spec for a given vulnerability by ID. 102 | overwrite_specs_map = self._to_specs_map(overwrite_specs) 103 | self.assertNotEmpty( 104 | overwrite_specs_map, msg='No overwrite specs found in the file' 105 | ) 106 | 107 | vulnerabilities = self._load_from_signature_files() 108 | if not vulnerabilities or _NO_SIGNATURE_VALIDATION.value: 109 | logging.info('Validated overwrite specs without signature files.') 110 | return 111 | 112 | # Track vulnerabilities that should be overwritten but haven't been yet. 113 | target_vulnerabilities = set(overwrite_specs_map.keys()) 114 | for vulnerability in vulnerabilities: 115 | if vulnerability['id'] in overwrite_specs_map: 116 | target_vulnerabilities.discard(vulnerability['id']) 117 | 118 | # Check that the spec has a path yielding a match, since 'overwrite' 119 | # does not raise an error if no matches are found. 120 | for replace in overwrite_specs_map[vulnerability['id']].replace: 121 | if not replace.path.find(vulnerability): 122 | self.fail( 123 | f'Overwrite spec for {vulnerability["id"]} has a path yielding ' 124 | 'no matches.' 125 | ) 126 | 127 | # Make sure the spec doesn't cause any runtime errors. 128 | vulnerability_overwriter.overwrite( 129 | [vulnerability], [overwrite_specs_map[vulnerability['id']]] 130 | ) 131 | self.assertEmpty( 132 | target_vulnerabilities, 133 | 'All overwrite specs are loaded correctly and have required fields, but' 134 | ' some specs do not match any vulnerabilities in the signature files. ' 135 | 'You may suppress this error by setting --no_signature_validation=True.' 136 | ' Missing vulnerabilities: %s' % target_vulnerabilities, 137 | ) 138 | 139 | 140 | if __name__ == '__main__': 141 | absltest.main() 142 | -------------------------------------------------------------------------------- /vanir/parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Parser to extract affected function blocks from the code snippets. 8 | 9 | This module abstracts the collection of language parsers for a safer and 10 | simpler use of it. 11 | FunctionChunk and LineChunk are data classes for maintaining data for function 12 | signature and line signature generation. |base| of the continaner classes is a 13 | raw function/line chunk object generated by the language parsers. Data 14 | generated by language parsers can be accessed through |base|, whereas data 15 | generated outside of the language parsers can be updated and accessed through 16 | the other member variables of the data classes. 17 | """ 18 | 19 | from typing import Optional, Sequence, Tuple 20 | 21 | from absl import logging 22 | from vanir import signature 23 | from vanir.language_parsers import language_parsers 24 | 25 | 26 | def is_supported_type(filename: str) -> bool: 27 | """Returns whether any parser supports named file.""" 28 | return language_parsers.get_parser_class(filename) is not None 29 | 30 | 31 | class Parser: 32 | """Parses a given file and exports code chunks for making signatures.""" 33 | 34 | def __init__( 35 | self, 36 | file_path: str, 37 | target_file: str, 38 | affected_line_ranges: Optional[Sequence[Tuple[int, int]]] = None, 39 | ): 40 | """Parses the given file and extract function and line chunks. 41 | 42 | Args: 43 | file_path: the absolute path to the file to analyze. 44 | target_file: path of the signature's target file, relative to the root of 45 | the target source tree. E.g., arch/x86/pci/irq.c in Linux Kernel. Note 46 | that this value is used as a label for the chunks generated from the 47 | parser and their corresponding signatures. |file_path| is not suitable 48 | as a label value since it is an absolute path in the runtime system and 49 | can be a temporary file. 50 | affected_line_ranges: list of the ranges of lines affected by a patch. 51 | Only function chunks that are affected by at least one line within this 52 | range will be processed. This does not affect how line chunks are 53 | processed, i.e. this will still return all line chunks in the file. 54 | 55 | Raises: 56 | StatusNotOk: if failed to open the file at |file_path|. 57 | """ 58 | if not affected_line_ranges: 59 | affected_line_ranges = [] 60 | 61 | results = language_parsers.parse_file( 62 | file_path, 63 | functions_line_ranges=affected_line_ranges, 64 | ) 65 | if results.parse_errors: 66 | logging.warning( 67 | 'Syntax errors encountered while parsing file "%s" ("%s"): %s', 68 | file_path, target_file, results.parse_errors) 69 | 70 | self._function_chunks = [ 71 | signature.create_function_chunk(chunk_base, target_file) 72 | for chunk_base in results.function_chunks 73 | ] 74 | self._line_chunk = signature.create_line_chunk( 75 | results.line_chunk, affected_line_ranges, target_file 76 | ) 77 | 78 | def get_function_chunks(self) -> Sequence[signature.FunctionChunk]: 79 | """Gets function chunk list for each function affected by the patch. 80 | 81 | If no affected lines are passed, assumes entire file is affected. 82 | Returns: 83 | Function Chunk list. 84 | """ 85 | return self._function_chunks 86 | 87 | def get_line_chunk(self) -> signature.LineChunk: 88 | """Gets the line chunk holding tokens of each line. 89 | 90 | Returns: 91 | Line Chunk object. 92 | """ 93 | return self._line_chunk 94 | -------------------------------------------------------------------------------- /vanir/parser_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for parser.""" 8 | 9 | from vanir import parser 10 | 11 | from absl.testing import absltest 12 | from pybind11_abseil import status 13 | 14 | 15 | class ParserTest(absltest.TestCase): 16 | 17 | def test_parser(self): 18 | testcode = """ 19 | /* This code is for testing Vanir Parser. */ int test_globalvar = 10; 20 | int test_func1(const unsigned int64 test_arg) { 21 | const struct teststruct *testvar = (struct teststruct *)globalvar->data; 22 | test_func2(test_arg, testvar); // some comment. 23 | /* additional comment line. */ 24 | 0xe8(); // broken code -- won't be counted as func call. 25 | return 0; 26 | } 27 | int test_func_decl(int myarg); 28 | """ 29 | testfile = self.create_tempfile('testfile.c', content=testcode) 30 | filename = testfile.full_path 31 | test_target_file = 'foo/bar/testfile.c' 32 | test_parser = parser.Parser(filename, test_target_file) 33 | function_chunks = test_parser.get_function_chunks() 34 | line_chunk = test_parser.get_line_chunk() 35 | 36 | self.assertLen(function_chunks, 1) 37 | self.assertEqual(function_chunks[0].base.name, 'test_func1') 38 | self.assertEqual(function_chunks[0].base.parameters, ['test_arg']) 39 | self.assertCountEqual( 40 | function_chunks[0].base.used_data_types, 41 | [['const', 'unsigned', 'int64'], ['const', 'struct', 'teststruct'], 42 | ['struct', 'teststruct']]) 43 | self.assertEqual(function_chunks[0].base.local_variables, ['testvar']) 44 | self.assertEqual(function_chunks[0].base.called_functions, ['test_func2']) 45 | self.assertEqual(function_chunks[0].target_file, test_target_file) 46 | self.assertIsNotNone(function_chunks[0].normalized_code) 47 | self.assertIsNotNone(function_chunks[0].function_hash) 48 | 49 | expected_tokens = { 50 | 2: ['int', 'test_globalvar', '=', '10', ';'], 51 | 3: [ 52 | 'int', 'test_func1', '(', 'const', 'unsigned', 'int64', 'test_arg', 53 | ')', '{' 54 | ], 55 | 4: [ 56 | 'const', 'struct', 'teststruct', '*', 'testvar', '=', '(', 'struct', 57 | 'teststruct', '*', ')', 'globalvar', '->', 'data', ';' 58 | ], 59 | 5: ['test_func2', '(', 'test_arg', ',', 'testvar', ')', ';'], 60 | 7: ['0xe8', '(', ')', ';'], 61 | 8: ['return', '0', ';'], 62 | 9: ['}'], 63 | 10: ['int', 'test_func_decl', '(', 'int', 'myarg', ')', ';'] 64 | } 65 | self.assertEqual(line_chunk.base.tokens, expected_tokens) 66 | self.assertEqual(line_chunk.target_file, test_target_file) 67 | self.assertIsNotNone(line_chunk.normalized_code) 68 | self.assertIsNotNone(line_chunk.line_hashes) 69 | self.assertIsNotNone(line_chunk.used_lines) 70 | 71 | def test_parser_with_affected_ranges(self): 72 | testcode = r""" 73 | int unaffected_function1(void) { printk("ok"); } 74 | void affected_function1(void) { 75 | printk("this line is vulnerable. %s", sensitive_info); 76 | } 77 | int unaffected_function2(void) { printk("this is fine."); } 78 | void affected_function2(void) { 79 | printk("this line is also vulnerable. %s", sensitive_info); 80 | } 81 | int unaffected_function3(void) { printk("this is fine, too."); } 82 | void affected_function3(void) { 83 | printk("this line is also vulnerable. %s", sensitive_info); 84 | } 85 | int unaffected_function4(void) { printk("this is fine, too."); } 86 | """ 87 | affected_ranges = [(4, 4), (7, 7), (13, 13)] 88 | testfile = self.create_tempfile('testfile.c', content=testcode) 89 | filename = testfile.full_path 90 | test_target_file = 'foo/bar/testfile.c' 91 | test_parser = parser.Parser(filename, test_target_file, affected_ranges) 92 | function_chunks = test_parser.get_function_chunks() 93 | line_chunk = test_parser.get_line_chunk() 94 | 95 | self.assertLen(function_chunks, 3) 96 | self.assertEqual(function_chunks[0].base.name, 'affected_function1') 97 | self.assertEqual(function_chunks[1].base.name, 'affected_function2') 98 | self.assertEqual(function_chunks[2].base.name, 'affected_function3') 99 | 100 | expected_tokens = { 101 | 2: [ 102 | 'int', 'unaffected_function1', '(', 'void', ')', '{', 'printk', '(', 103 | '"ok"', ')', ';', '}' 104 | ], 105 | 3: ['void', 'affected_function1', '(', 'void', ')', '{'], 106 | 4: [ 107 | 'printk', '(', '"this line is vulnerable. %s"', ',', 108 | 'sensitive_info', ')', ';' 109 | ], 110 | 5: ['}'], 111 | 6: [ 112 | 'int', 'unaffected_function2', '(', 'void', ')', '{', 'printk', '(', 113 | '"this is fine."', ')', ';', '}' 114 | ], 115 | 7: ['void', 'affected_function2', '(', 'void', ')', '{'], 116 | 8: [ 117 | 'printk', '(', '"this line is also vulnerable. %s"', ',', 118 | 'sensitive_info', ')', ';' 119 | ], 120 | 9: ['}'], 121 | 10: [ 122 | 'int', 'unaffected_function3', '(', 'void', ')', '{', 'printk', '(', 123 | '"this is fine, too."', ')', ';', '}' 124 | ], 125 | 11: ['void', 'affected_function3', '(', 'void', ')', '{'], 126 | 12: [ 127 | 'printk', '(', '"this line is also vulnerable. %s"', ',', 128 | 'sensitive_info', ')', ';' 129 | ], 130 | 13: ['}'], 131 | 14: [ 132 | 'int', 'unaffected_function4', '(', 'void', ')', '{', 'printk', '(', 133 | '"this is fine, too."', ')', ';', '}' 134 | ] 135 | } 136 | self.assertEqual(line_chunk.base.tokens, expected_tokens) 137 | self.assertEqual(line_chunk.target_file, test_target_file) 138 | self.assertIsNotNone(line_chunk.normalized_code) 139 | self.assertIsNotNone(line_chunk.line_hashes) 140 | self.assertIsNotNone(line_chunk.used_lines) 141 | 142 | def test_parser_with_init_failure(self): 143 | filename = 'NonExistingFile.c' 144 | test_target_file = 'foo/bar/testfile.c' 145 | with self.assertRaisesRegex(status.StatusNotOk, 'Failed to open file:.*'): 146 | parser.Parser(filename, test_target_file) 147 | 148 | 149 | if __name__ == '__main__': 150 | absltest.main() 151 | -------------------------------------------------------------------------------- /vanir/reporter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Reporter module for managing Vanir report data structures.""" 8 | 9 | import collections 10 | import dataclasses 11 | import functools 12 | import itertools 13 | from typing import Optional, Sequence, Union 14 | from vanir import vulnerability_manager 15 | from vanir.scanners import scanner_base 16 | 17 | 18 | @dataclasses.dataclass(frozen=True) 19 | class Report: 20 | """Dataclass to contain an individual finding to report. 21 | 22 | Each report corresponds to a mapping of one signature and one matched chunk. 23 | 24 | Attributes: 25 | signature_id: unique ID of the matched signature. 26 | signature_target_file: original target file of the signature. 27 | signature_target_function: original target function of the signature. 28 | signature_source: the source of the patch used to generate the signature. 29 | unpatched_file: the file matched the signature in the target system. 30 | unpatched_function_name: the function matched the signature in the target 31 | system. 32 | is_non_target_match: whether this matches against a signature's target 33 | file, or match against other files in the scanned code. 34 | """ 35 | 36 | signature_id: str 37 | signature_target_file: str 38 | signature_target_function: str 39 | signature_source: str 40 | unpatched_file: str 41 | unpatched_function_name: str 42 | is_non_target_match: bool 43 | 44 | def get_simple_report( 45 | self, 46 | include_patch_source: bool = False, 47 | use_html_link_for_patch_source: bool = False, 48 | ) -> str: 49 | """Returns unpatched file and optionally unpatched function name.""" 50 | simple_report = self.unpatched_file 51 | if self.unpatched_function_name: 52 | simple_report += '::%s()' % self.unpatched_function_name 53 | if include_patch_source: 54 | if use_html_link_for_patch_source: 55 | simple_report += ' (patch)' % self.signature_source 56 | else: 57 | simple_report += ' (patch:%s)' % self.signature_source 58 | return simple_report 59 | 60 | 61 | @dataclasses.dataclass(frozen=True) 62 | class ReportGroup: 63 | """Dataclass for managing multiple reports grouped by a vulnerability ID.""" 64 | 65 | osv_id: str 66 | cve_ids: Sequence[str] 67 | reports: Sequence[Report] 68 | 69 | 70 | class ReportBook: 71 | """Class for managing multiple report groups.""" 72 | 73 | def __init__( 74 | self, 75 | reports: Sequence[Report], 76 | vul_manager: vulnerability_manager.VulnerabilityManager, 77 | ): 78 | """Generates a report book for the given reports.""" 79 | self._report_group_dict = {} 80 | reports_per_vul = collections.defaultdict(list) 81 | for report in reports: 82 | osv_id = vul_manager.sign_id_to_osv_id(report.signature_id) 83 | reports_per_vul[osv_id].append(report) 84 | for osv_id, reports in reports_per_vul.items(): 85 | report_group = ReportGroup( 86 | osv_id, vul_manager.osv_id_to_cve_ids(osv_id), reports 87 | ) 88 | self._report_group_dict[osv_id] = report_group 89 | 90 | @property 91 | def unpatched_vulnerabilities(self) -> Sequence[Union[str, None]]: 92 | """Returns a list of OSV IDs of vulns reported as not patched.""" 93 | return list(self._report_group_dict.keys()) 94 | 95 | @functools.cached_property 96 | def unpatched_cves(self) -> Sequence[str]: 97 | """Returns a list of CVEs reported as not patched.""" 98 | cves = itertools.chain.from_iterable( 99 | [rgroup.cve_ids for rgroup in self._report_group_dict.values()] 100 | ) 101 | return sorted(set(cves)) 102 | 103 | def get_report_group(self, osv_id: str) -> Optional[ReportGroup]: 104 | """Returns a report group mapped to |osv_id|. 105 | 106 | Args: 107 | osv_id: the OSV ID string. 108 | 109 | Returns: 110 | Returns a report group mapped to |osv_id| or None if none matches. 111 | """ 112 | return self._report_group_dict.get(osv_id) 113 | 114 | 115 | def generate_reports( 116 | findings: scanner_base.Findings 117 | ) -> Sequence[Report]: 118 | """A helper function to convert a Scanner's Findings to a list of Reports.""" 119 | reports = [] 120 | for sign, chunks in findings.items(): 121 | for chunk in chunks: 122 | is_non_target_match = not chunk.target_file.endswith(sign.target_file) 123 | reports.append( 124 | Report( 125 | signature_id=sign.signature_id, 126 | signature_target_file=sign.target_file, 127 | signature_target_function=getattr(sign, 'target_function', ''), 128 | signature_source=sign.source, 129 | unpatched_file=chunk.target_file, 130 | unpatched_function_name=getattr(chunk.base, 'name', ''), 131 | is_non_target_match=is_non_target_match, 132 | ) 133 | ) 134 | return reports 135 | -------------------------------------------------------------------------------- /vanir/reporter_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for Reporter.""" 8 | 9 | import dataclasses 10 | from unittest import mock 11 | 12 | from vanir import reporter 13 | from vanir import vulnerability_manager 14 | 15 | from absl.testing import absltest 16 | 17 | 18 | _TEST_SIGN_ID = 'asb-a-test-sign-1234' 19 | _TEST_TARGET_FILE = 'foo/bar/target_file.c' 20 | _TEST_TARGET_FUNC = 'target_func1' 21 | _TEST_SOURCE = 'https://android.googlesource.com/some/test/source' 22 | _TEST_UNPATCHED_FILE = 'foo/bar/unpatched_file.c' 23 | _TEST_UNPATCHED_FUNC = 'unpatched_func1' 24 | _TEST_IS_NON_TARGET_MATCH = True 25 | 26 | 27 | class ReporterTest(absltest.TestCase): 28 | 29 | def setUp(self): 30 | super().setUp() 31 | self._test_report = reporter.Report( 32 | _TEST_SIGN_ID, 33 | _TEST_TARGET_FILE, 34 | _TEST_TARGET_FUNC, 35 | _TEST_SOURCE, 36 | _TEST_UNPATCHED_FILE, 37 | _TEST_UNPATCHED_FUNC, 38 | _TEST_IS_NON_TARGET_MATCH, 39 | ) 40 | 41 | def test_get_simple_report(self): 42 | expected_simple_report = 'foo/bar/unpatched_file.c::unpatched_func1()' 43 | self.assertEqual( 44 | self._test_report.get_simple_report(), expected_simple_report 45 | ) 46 | 47 | def test_generate_report_book(self): 48 | reports = [] 49 | for i in range(10): 50 | new_sign_id = _TEST_SIGN_ID + str(i) 51 | new_source = _TEST_SOURCE + str(i) 52 | report = dataclasses.replace( 53 | self._test_report, 54 | signature_id=new_sign_id, 55 | signature_source=new_source, 56 | ) 57 | reports.append(report) 58 | mock_vul_manager = mock.create_autospec( 59 | vulnerability_manager.VulnerabilityManager, instance=True 60 | ) 61 | mock_vul_manager.sign_id_to_osv_id.side_effect = ( 62 | lambda sign_id: 'osv-id-%s' % sign_id[-1] 63 | ) 64 | mock_vul_manager.osv_id_to_cve_ids.side_effect = ( 65 | lambda osv_id: [osv_id.replace('osv', 'cve')] 66 | ) 67 | test_report_book = reporter.ReportBook(reports, mock_vul_manager) 68 | expected_unpatched_vuls = ['osv-id-%d' % i for i in range(10)] 69 | self.assertEqual( 70 | test_report_book.unpatched_vulnerabilities, expected_unpatched_vuls 71 | ) 72 | expected_unpatched_cves = ['cve-id-%d' % i for i in range(10)] 73 | self.assertEqual(test_report_book.unpatched_cves, expected_unpatched_cves) 74 | embedded_reports = [] 75 | for osv_id in test_report_book.unpatched_vulnerabilities: 76 | rgroup = test_report_book.get_report_group(osv_id) 77 | embedded_reports += rgroup.reports 78 | self.assertCountEqual(embedded_reports, reports) 79 | 80 | 81 | if __name__ == '__main__': 82 | absltest.main() 83 | -------------------------------------------------------------------------------- /vanir/scanners/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # This package contains code related to Vanir scanners, which scans some code 8 | # against some vulnerability patch signatures, and report the findings. 9 | 10 | load("@rules_python//python:defs.bzl", "py_library", "py_test") 11 | load("@vanir_deps//:requirements.bzl", "requirement") 12 | 13 | package(default_visibility = ["//visibility:public"]) 14 | 15 | py_library( 16 | name = "scanner_base", 17 | srcs = ["scanner_base.py"], 18 | deps = [ 19 | ":target_selection_strategy", 20 | "//:parser", 21 | "//:signature", 22 | "//:vulnerability", 23 | "//:vulnerability_manager", 24 | requirement("absl-py"), 25 | ], 26 | ) 27 | 28 | py_library( 29 | name = "package_scanner", 30 | srcs = ["package_scanner.py"], 31 | deps = [ 32 | ":offline_directory_scanner", 33 | ":scanner_base", 34 | ":target_selection_strategy", 35 | "//:vulnerability", 36 | "//:vulnerability_manager", 37 | ], 38 | ) 39 | 40 | py_library( 41 | name = "android_kernel_scanner", 42 | srcs = ["android_kernel_scanner.py"], 43 | deps = [ 44 | ":package_scanner", 45 | ":scanner_base", 46 | ":target_selection_strategy", 47 | "//:version_extractor", 48 | "//:vulnerability", 49 | "//:vulnerability_manager", 50 | ], 51 | ) 52 | 53 | py_library( 54 | name = "offline_directory_scanner", 55 | srcs = ["offline_directory_scanner.py"], 56 | deps = [ 57 | ":scanner_base", 58 | ":target_selection_strategy", 59 | "//:vulnerability_manager", 60 | ], 61 | ) 62 | 63 | py_library( 64 | name = "repo_scanner", 65 | srcs = ["repo_scanner.py"], 66 | deps = [ 67 | ":package_identifier", 68 | ":scanner_base", 69 | ":target_selection_strategy", 70 | "//:signature", 71 | "//:vulnerability", 72 | "//:vulnerability_manager", 73 | ], 74 | ) 75 | 76 | py_library( 77 | name = "target_selection_strategy", 78 | srcs = ["target_selection_strategy.py"], 79 | deps = [ 80 | ":package_identifier", 81 | "//:parser", 82 | "//:signature", 83 | "//:truncated_path", 84 | ], 85 | ) 86 | 87 | py_library( 88 | name = "package_identifier", 89 | srcs = ["package_identifier.py"], 90 | deps = [ 91 | "//:truncated_path", 92 | "//:vulnerability", 93 | "//:vulnerability_manager", 94 | requirement("absl-py"), 95 | ], 96 | ) 97 | 98 | py_test( 99 | name = "scanner_base_test", 100 | srcs = ["scanner_base_test.py"], 101 | deps = [ 102 | ":scanner_base", 103 | ":target_selection_strategy", 104 | "//:parser", 105 | "//:reporter", 106 | "//:signature", 107 | "//vanir/language_parsers:common", 108 | requirement("absl-py"), 109 | ], 110 | ) 111 | 112 | py_test( 113 | name = "offline_directory_scanner_test", 114 | srcs = ["offline_directory_scanner_test.py"], 115 | data = [ 116 | "//vanir/testdata:test_signatures_json", 117 | ], 118 | deps = [ 119 | ":offline_directory_scanner", 120 | ":scanner_base", 121 | "//:vulnerability_manager", 122 | requirement("absl-py"), 123 | ], 124 | ) 125 | 126 | py_test( 127 | name = "android_kernel_scanner_test", 128 | srcs = ["android_kernel_scanner_test.py"], 129 | data = [ 130 | "//vanir/testdata:test_signatures_json", 131 | ], 132 | deps = [ 133 | ":android_kernel_scanner", 134 | ":scanner_base", 135 | "//:vulnerability_manager", 136 | requirement("absl-py"), 137 | requirement("requests"), 138 | ], 139 | ) 140 | 141 | py_test( 142 | name = "package_scanner_test", 143 | srcs = ["package_scanner_test.py"], 144 | data = [ 145 | "//vanir/testdata:test_signatures_json", 146 | ], 147 | deps = [ 148 | ":package_scanner", 149 | ":scanner_base", 150 | "//:vulnerability", 151 | "//:vulnerability_manager", 152 | requirement("absl-py"), 153 | requirement("requests"), 154 | ], 155 | ) 156 | 157 | py_test( 158 | name = "repo_scanner_test", 159 | srcs = ["repo_scanner_test.py"], 160 | data = [ 161 | "//vanir/testdata:test_signatures_json", 162 | ], 163 | deps = [ 164 | ":package_identifier", 165 | ":repo_scanner", 166 | ":scanner_base", 167 | ":target_selection_strategy", 168 | "//:signature", 169 | "//:vulnerability_manager", 170 | requirement("absl-py"), 171 | requirement("requests"), 172 | ], 173 | ) 174 | 175 | py_test( 176 | name = "target_selection_strategy_test", 177 | srcs = ["target_selection_strategy_test.py"], 178 | deps = [ 179 | ":target_selection_strategy", 180 | "//:signature", 181 | requirement("absl-py"), 182 | ], 183 | ) 184 | 185 | py_test( 186 | name = "package_identifier_test", 187 | srcs = ["package_identifier_test.py"], 188 | data = [ 189 | "//vanir/testdata:test_signatures_json", 190 | ], 191 | deps = [ 192 | ":package_identifier", 193 | "//:truncated_path", 194 | "//:vulnerability", 195 | "//:vulnerability_manager", 196 | requirement("absl-py"), 197 | ], 198 | ) 199 | -------------------------------------------------------------------------------- /vanir/scanners/android_kernel_scanner.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir detector scanner that scans Android Linux Kernel vulns. 8 | """ 9 | 10 | import dataclasses 11 | from typing import Optional, Sequence, Tuple 12 | 13 | from absl import logging 14 | from vanir import version_extractor 15 | from vanir import vulnerability 16 | from vanir import vulnerability_manager 17 | from vanir import vulnerability_overwriter 18 | from vanir.scanners import package_scanner 19 | from vanir.scanners import scanner_base 20 | from vanir.scanners import target_selection_strategy 21 | 22 | 23 | class AndroidKernelScanner(package_scanner.PackageScanner): 24 | """Vanir detector scanner that scans Android Linux Kernel vulns. 25 | 26 | AndroidKernelScanner is an offline package scanner specialized for Android 27 | kernel vulnerabilities, which scans the designated directory with kernel 28 | vuln signatures. Any non-kernel vuln signatures included in the vulnerability 29 | manager will be ignored. 30 | """ 31 | 32 | def __init__(self, code_location): 33 | super().__init__( 34 | ecosystem='Android', 35 | package=vulnerability.MetaPackage.ANDROID_KERNEL, 36 | code_location=code_location, 37 | ) 38 | 39 | @classmethod 40 | def name(cls): 41 | return 'android_kernel_scanner' 42 | 43 | def scan( 44 | self, 45 | strategy: target_selection_strategy.Strategy = ( 46 | target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH 47 | ), 48 | override_vuln_manager: Optional[ 49 | vulnerability_manager.VulnerabilityManager 50 | ] = None, 51 | extra_vulnerability_filters: Optional[ 52 | Sequence[vulnerability_manager.VulnerabilityFilter] 53 | ] = None, 54 | vulnerability_overwrite_specs: Optional[ 55 | Sequence[vulnerability_overwriter.OverwriteSpec] 56 | ] = None, 57 | ) -> Tuple[ 58 | scanner_base.Findings, 59 | scanner_base.ScannedFileStats, 60 | vulnerability_manager.VulnerabilityManager, 61 | ]: 62 | """Run the scan and returns a tuple of Findings and ScannedFileStats.""" 63 | 64 | logging.info( 65 | 'Scanning %s against Android kernel signatures...', self._code_location) 66 | findings, stats, vuln_manager = super().scan( 67 | strategy, 68 | override_vuln_manager, 69 | extra_vulnerability_filters, 70 | vulnerability_overwrite_specs 71 | ) 72 | 73 | logging.info('Collecting findings...') 74 | version_data = { 75 | 'version': version_extractor.extract_version(self._code_location)} 76 | new_stats = dataclasses.replace( 77 | stats, scan_metadata={**(stats.scan_metadata or {}), **version_data}) 78 | return findings, new_stats, vuln_manager 79 | -------------------------------------------------------------------------------- /vanir/scanners/android_kernel_scanner_test.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | # Copyright 2023 Google LLC 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd 7 | 8 | from unittest import mock 9 | import requests 10 | 11 | from vanir import version_extractor 12 | from vanir import vulnerability_manager 13 | from vanir.scanners import android_kernel_scanner 14 | from vanir.scanners import scanner_base 15 | 16 | from absl.testing import absltest 17 | 18 | _TESTDATA_DIR = 'vanir/testdata/' 19 | _TEST_SIGNATURES_FILE = _TESTDATA_DIR + 'test_signatures.json' 20 | 21 | 22 | class AndroidKernelScannerTest(absltest.TestCase): 23 | def setUp(self): 24 | super().setUp() 25 | 26 | self._code_location = self.create_tempdir().full_path 27 | 28 | self._mock_findings = mock.create_autospec( 29 | scanner_base.Findings, instance=True) 30 | self._mock_version = self.enter_context( 31 | mock.patch.object( 32 | version_extractor, 'extract_version', 33 | autospec=True, return_value='6.5.1')) 34 | self._fake_base_scanner_stats = scanner_base.ScannedFileStats(1, 2, None) 35 | self._mock_scan = self.enter_context( 36 | mock.patch.object( 37 | scanner_base, 'scan', autospec=True, 38 | return_value=(self._mock_findings, self._fake_base_scanner_stats))) 39 | 40 | def test_scan(self): 41 | override_vuln_manager = vulnerability_manager.generate_from_json_string( 42 | open(_TEST_SIGNATURES_FILE, mode='rb').read()) 43 | scanner = android_kernel_scanner.AndroidKernelScanner(self._code_location) 44 | findings, stats, output_vul_manager = scanner.scan( 45 | override_vuln_manager=override_vuln_manager 46 | ) 47 | self.assertIs(findings, self._mock_findings) 48 | self.assertEqual( 49 | stats, scanner_base.ScannedFileStats(1, 2, {'version': '6.5.1'})) 50 | self.assertSameElements( 51 | [sig.signature_id for sig in output_vul_manager.signatures], 52 | ['ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27']) 53 | 54 | @mock.patch.object(requests.sessions, 'Session', autospec=True) 55 | def test_scan_osv(self, mock_session_class): 56 | text = b'{"vulns":' + open(_TEST_SIGNATURES_FILE, mode='rb').read() + b'}' 57 | mock_session_class().post.side_effect = ( 58 | itertools.chain( 59 | [mock.Mock(text=text)], itertools.repeat(mock.Mock(text=b'{}')) 60 | ) 61 | ) 62 | scanner = android_kernel_scanner.AndroidKernelScanner(self._code_location) 63 | _, _, vul_manager = scanner.scan() 64 | self.assertEqual( 65 | {sig.signature_id for sig in vul_manager.signatures}, 66 | {'ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27'} 67 | ) 68 | 69 | 70 | if __name__ == '__main__': 71 | absltest.main() 72 | -------------------------------------------------------------------------------- /vanir/scanners/offline_directory_scanner.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir detector scanner that scans a given directory against given signatures. 8 | 9 | This scanner requires signatures to be given in vul_file_path. 10 | """ 11 | 12 | from typing import Optional, Sequence, Tuple 13 | 14 | from absl import logging 15 | from vanir import vulnerability_manager 16 | from vanir import vulnerability_overwriter 17 | from vanir.scanners import scanner_base 18 | from vanir.scanners import target_selection_strategy 19 | 20 | 21 | class OfflineDirectoryScanner(scanner_base.ScannerBase): 22 | """Vanir scanner that scans a directory against a local vulns json file.""" 23 | 24 | def __init__(self, code_location: str): 25 | self._code_location = code_location 26 | 27 | @classmethod 28 | def name(cls): 29 | return 'offline_directory_scanner' 30 | 31 | def scan_offline_directory( 32 | self, 33 | vuln_manager: vulnerability_manager.VulnerabilityManager, 34 | strategy: target_selection_strategy.Strategy = ( 35 | target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH 36 | ), 37 | ) -> Tuple[scanner_base.Findings, scanner_base.ScannedFileStats]: 38 | """Scans the local direcotry designated in |_code_location|.""" 39 | return scanner_base.scan( 40 | self._code_location, 41 | vuln_manager.signatures, 42 | strategy=strategy, 43 | ) 44 | 45 | def scan( 46 | self, 47 | strategy: target_selection_strategy.Strategy = ( 48 | target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH 49 | ), 50 | override_vuln_manager: Optional[ 51 | vulnerability_manager.VulnerabilityManager 52 | ] = None, 53 | extra_vulnerability_filters: Optional[ 54 | Sequence[vulnerability_manager.VulnerabilityFilter] 55 | ] = None, 56 | vulnerability_overwrite_specs: Optional[ 57 | Sequence[vulnerability_overwriter.OverwriteSpec] 58 | ] = None, 59 | ) -> Tuple[ 60 | scanner_base.Findings, 61 | scanner_base.ScannedFileStats, 62 | vulnerability_manager.VulnerabilityManager, 63 | ]: 64 | if override_vuln_manager is None: 65 | raise ValueError( 66 | f'{self.name()} requires at least one --vulnerability_file_name') 67 | vuln_manager = vulnerability_manager.generate_from_managers( 68 | [override_vuln_manager], 69 | vulnerability_filters=extra_vulnerability_filters, 70 | ) 71 | logging.info('Scanning %s against all signatures...', self._code_location) 72 | findings, stats = self.scan_offline_directory(vuln_manager, strategy) 73 | 74 | return findings, stats, vuln_manager 75 | -------------------------------------------------------------------------------- /vanir/scanners/offline_directory_scanner_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | from unittest import mock 8 | 9 | from vanir import vulnerability_manager 10 | from vanir.scanners import offline_directory_scanner 11 | from vanir.scanners import scanner_base 12 | 13 | from absl.testing import absltest 14 | 15 | _TESTDATA_DIR = 'vanir/testdata/' 16 | _TEST_SIGNATURES_FILE = _TESTDATA_DIR + 'test_signatures.json' 17 | 18 | 19 | class OfflineDirectoryScannerTest(absltest.TestCase): 20 | def setUp(self): 21 | super().setUp() 22 | 23 | self._vul_manager = vulnerability_manager.generate_from_json_string( 24 | open(_TEST_SIGNATURES_FILE, mode='rb').read()) 25 | self._code_location = self.create_tempdir().full_path 26 | 27 | self._mock_findings = mock.create_autospec( 28 | scanner_base.Findings, instance=True) 29 | self._mock_stats = mock.create_autospec( 30 | scanner_base.ScannedFileStats, instance=True) 31 | self._mock_scan = self.enter_context( 32 | mock.patch.object( 33 | scanner_base, 'scan', autospec=True, 34 | return_value=(self._mock_findings, self._mock_stats))) 35 | 36 | def test_scan_missing_flag(self): 37 | with self.assertRaisesRegex( 38 | ValueError, 39 | r'offline_directory_scanner requires at least one ' 40 | r'--vulnerability_file_name'): 41 | offline_directory_scanner.OfflineDirectoryScanner( 42 | self._code_location).scan() 43 | 44 | def test_scan(self): 45 | scanner = offline_directory_scanner.OfflineDirectoryScanner( 46 | self._code_location) 47 | findings, stats, vul_manager = scanner.scan( 48 | override_vuln_manager=self._vul_manager) 49 | self.assertIs(findings, self._mock_findings) 50 | self.assertIs(stats, self._mock_stats) 51 | self.assertSetEqual( 52 | set(sig.signature_id for sig in vul_manager.signatures), 53 | set(['ASB-A-281018094-2d96898e', 'ASB-A-281018094-97ec235e', 54 | 'ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27'])) 55 | 56 | 57 | if __name__ == '__main__': 58 | absltest.main() 59 | -------------------------------------------------------------------------------- /vanir/scanners/package_identifier.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Pakcage Identifier identifies maps a directory to a package.""" 8 | 9 | import functools 10 | from typing import Collection, FrozenSet, Optional, Sequence 11 | 12 | from absl import logging 13 | from vanir import truncated_path 14 | from vanir import vulnerability_manager 15 | 16 | DEFAULT_TRUNCATED_PATH_LEVEL = 2 17 | 18 | _DEFAULT_TRHESHOLD = 0.5 19 | _DEFAULT_MIN_PACKAGE_TRUNCATED_PATHS = 5 20 | 21 | 22 | class PackageIdentifier: 23 | """Class to heuristically identify if a directory belongs to a package.""" 24 | 25 | def __init__( 26 | self, 27 | vuln_manager: vulnerability_manager.VulnerabilityManager, 28 | ecosystem: str, 29 | ): 30 | # Mapping from a package name to its signatures from vuln manager 31 | self._signatures_per_package = {} 32 | for package_name in vuln_manager.affected_package_names: 33 | self._signatures_per_package[package_name] = ( 34 | vuln_manager.get_signatures_for_package(ecosystem, package_name) 35 | ) 36 | 37 | @functools.lru_cache 38 | def get_truncated_paths( 39 | self, 40 | package_name: str, 41 | ) -> FrozenSet[truncated_path.TruncatedPath]: 42 | """Returns truncated paths of signatures of the given package.""" 43 | signatures = self._signatures_per_package.get(package_name) 44 | truncated_paths = set() 45 | for sign in signatures: 46 | if sign.truncated_path_level is not None: 47 | level = sign.truncated_path_level 48 | else: 49 | level = min( 50 | DEFAULT_TRUNCATED_PATH_LEVEL, 51 | truncated_path.TruncatedPath.get_max_level(sign.target_file) 52 | ) 53 | if truncated_path.TruncatedPath.is_level_ok(sign.target_file, level): 54 | truncated_paths.add( 55 | truncated_path.TruncatedPath(sign.target_file, level) 56 | ) 57 | return frozenset(truncated_paths) 58 | 59 | def get_package_name_if_signature_exist( 60 | self, 61 | package_name: str, 62 | ) -> Optional[str]: 63 | """Returns package name if any signature under the pkg exists. 64 | 65 | This function checks if any signatures mapped to |package_name| exist 66 | and returns the package name if exists. If the returning package belongs to 67 | a meta package, this function will return the meta package instead. 68 | 69 | Args: 70 | package_name: the string package name or meta package to check. 71 | 72 | Returns: 73 | The mapped package name or meta package if corresponding signature exist. 74 | """ 75 | if package_name not in self._signatures_per_package: 76 | logging.debug( 77 | 'Found no signatures registered for package %s', 78 | package_name, 79 | ) 80 | return None 81 | return package_name 82 | 83 | def is_package_mapped_to_repo( 84 | self, 85 | package_name: str, 86 | repo_file_list: Sequence[str], 87 | threshold: float = _DEFAULT_TRHESHOLD, 88 | min_package_truncated_paths: int = _DEFAULT_MIN_PACKAGE_TRUNCATED_PATHS, 89 | ) -> bool: 90 | """Heuristically checks if a repository maps to the given package. 91 | 92 | Note that the use of this function is not necessarily limited to 93 | repositories. Repository here essentially means a collection of files. 94 | 95 | Args: 96 | package_name: the package name to check. 97 | repo_file_list: names of all files in the repository. 98 | threshold: the minimum truncated path inclusion rate for a package to be 99 | determined as being mapped to the the repository. 100 | min_package_truncated_paths: the minimum number of truncated paths found 101 | for the package. If the number of the truncated paths for the package 102 | found from the signatures is less than this, this function will blindly 103 | return False. 104 | 105 | Returns: 106 | True if the package is mapped to the repository; False, otherwise. 107 | """ 108 | 109 | package_name = self.get_package_name_if_signature_exist(package_name) 110 | 111 | package_truncated_paths = self.get_truncated_paths(package_name) 112 | 113 | if len(package_truncated_paths) < min_package_truncated_paths: 114 | logging.debug( 115 | 'Too few truncated paths found for the package %s. ' 116 | 'The directory will be regarded as not mapped. ' 117 | 'Found: %d. Required: %d.', 118 | package_name, 119 | len(package_truncated_paths), 120 | min_package_truncated_paths, 121 | ) 122 | return False 123 | 124 | inclusion_rate = ( 125 | truncated_path.check_inclusion_rate_of_truncated_paths_in_file_list( 126 | package_truncated_paths, repo_file_list 127 | ) 128 | ) 129 | logging.debug('inclusion rate of %s: %f', package_name, inclusion_rate) 130 | 131 | return inclusion_rate >= threshold 132 | 133 | def packages_for_repo( 134 | self, 135 | repo_name: str, 136 | repo_file_list: Sequence[str], 137 | threshold: float = _DEFAULT_TRHESHOLD, 138 | min_package_truncated_paths: int = _DEFAULT_MIN_PACKAGE_TRUNCATED_PATHS, 139 | ) -> Collection[str]: 140 | """Returns normalized OSV package names corresponding to this repo. 141 | 142 | This function checks which OSV packages the given repo maps to by 143 | 1) checking its repository name maps to any package name in the signatures 144 | registered in this PackageIdentifier; 2) heuristically checking if the file 145 | list of this repo maps to one or more known packages. 146 | 147 | Note that for OSV package names that belongs to a meta package, the names 148 | are normalized to their closest meta package 149 | e.g. ":linux_kernel:Qualcomm" -> ":linux_kernel:". 150 | 151 | Args: 152 | repo_name: Name of the repo as listed in the manifest. 153 | repo_file_list: list of all files in |repo|. 154 | threshold: the minimum truncated path inclusion rate for a package to be 155 | determined as being mapped to the the repository. 156 | min_package_truncated_paths: the minimum number of truncated paths found 157 | for this repo to be matched to this package. 158 | 159 | Returns: 160 | Zero or more normalized OSV package name that the given repo maps to. 161 | """ 162 | packages = set() 163 | # Many packages use its manifest project name as their package name 164 | # e.g. "platform/packages/apps/Bluetooth". 165 | normalized_pkg_name = self.get_package_name_if_signature_exist(repo_name) 166 | if normalized_pkg_name: 167 | packages.add(normalized_pkg_name) 168 | 169 | # Also heuristically identify the packages that could be mapped to this repo 170 | packages.update( 171 | package for package in self._signatures_per_package 172 | if self.is_package_mapped_to_repo( 173 | package, repo_file_list, threshold, min_package_truncated_paths 174 | ) 175 | ) 176 | return packages 177 | -------------------------------------------------------------------------------- /vanir/scanners/package_scanner.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir detector scanner that scans vulns pertaining to one OSV package. 8 | """ 9 | 10 | from typing import Optional, Sequence, Tuple 11 | 12 | from absl import logging 13 | from vanir import vulnerability 14 | from vanir import vulnerability_manager 15 | from vanir import vulnerability_overwriter 16 | from vanir.scanners import offline_directory_scanner 17 | from vanir.scanners import scanner_base 18 | from vanir.scanners import target_selection_strategy 19 | 20 | 21 | class PackageScanner(offline_directory_scanner.OfflineDirectoryScanner): 22 | """Scan a code directory against vulns within a single OSV package. 23 | 24 | PackageScanner is an offline directory scanner with pacakage filters. I.e., 25 | this scanner first filters out all signatures unrelated to the package and 26 | ecosystem designated in |ecosystem| and |package_name|, and run against 27 | the offline directory designated in |code_location|. 28 | """ 29 | 30 | def __init__( 31 | self, ecosystem: str, package: scanner_base.Package, code_location: str 32 | ): 33 | super().__init__(code_location) 34 | self._ecosystem = ecosystem 35 | self._package = package 36 | 37 | @classmethod 38 | def name(cls): 39 | return 'package_scanner' 40 | 41 | def scan( 42 | self, 43 | strategy: target_selection_strategy.Strategy = ( 44 | target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH 45 | ), 46 | override_vuln_manager: Optional[ 47 | vulnerability_manager.VulnerabilityManager 48 | ] = None, 49 | extra_vulnerability_filters: Optional[ 50 | Sequence[vulnerability_manager.VulnerabilityFilter] 51 | ] = None, 52 | vulnerability_overwrite_specs: Optional[ 53 | Sequence[vulnerability_overwriter.OverwriteSpec] 54 | ] = None, 55 | ) -> Tuple[ 56 | scanner_base.Findings, 57 | scanner_base.ScannedFileStats, 58 | vulnerability_manager.VulnerabilityManager, 59 | ]: 60 | """Run the scan and returns a tuple of Findings and ScannedFileStats.""" 61 | is_meta_package = isinstance(self._package, vulnerability.MetaPackage) 62 | if is_meta_package: 63 | package_vfilter = vulnerability_manager.AffectedPackageNameFilter( 64 | self._package.package_pattern 65 | ) 66 | else: 67 | package_vfilter = vulnerability_manager.AffectedPackageNameFilter( 68 | self._package 69 | ) 70 | ecosystem_filter = vulnerability_manager.AffectedEcosystemFilter( 71 | self._ecosystem 72 | ) 73 | vfilters = [ecosystem_filter, package_vfilter] + ( 74 | extra_vulnerability_filters or [] 75 | ) 76 | 77 | if override_vuln_manager is not None: 78 | vuln_manager = vulnerability_manager.generate_from_managers( 79 | [override_vuln_manager], vulnerability_filters=vfilters 80 | ) 81 | else: 82 | vuln_manager = vulnerability_manager.generate_from_osv( 83 | self._ecosystem, 84 | self._package if is_meta_package else [self._package], 85 | vulnerability_filters=vfilters, 86 | vulnerability_overwrite_specs=vulnerability_overwrite_specs, 87 | ) 88 | 89 | logging.info( 90 | 'Scanning %s against signatures for %s...', 91 | self._code_location, self._package) 92 | findings, stats = self.scan_offline_directory(vuln_manager, strategy) 93 | 94 | return findings, stats, vuln_manager 95 | -------------------------------------------------------------------------------- /vanir/scanners/package_scanner_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | from unittest import mock 8 | import requests 9 | 10 | from vanir import vulnerability 11 | from vanir import vulnerability_manager 12 | from vanir.scanners import package_scanner 13 | from vanir.scanners import scanner_base 14 | 15 | from absl.testing import absltest 16 | 17 | _TESTDATA_DIR = 'vanir/testdata/' 18 | _TEST_SIGNATURES_FILE = _TESTDATA_DIR + 'test_signatures.json' 19 | 20 | 21 | class PackageScannerTest(absltest.TestCase): 22 | def setUp(self): 23 | super().setUp() 24 | 25 | self._vul_manager = vulnerability_manager.generate_from_json_string( 26 | open(_TEST_SIGNATURES_FILE, mode='rb').read()) 27 | self._code_location = self.create_tempdir().full_path 28 | 29 | self._mock_findings = mock.create_autospec( 30 | scanner_base.Findings, instance=True) 31 | self._mock_stats = mock.create_autospec( 32 | scanner_base.ScannedFileStats, instance=True) 33 | self._mock_scan = self.enter_context( 34 | mock.patch.object( 35 | scanner_base, 'scan', autospec=True, 36 | return_value=(self._mock_findings, self._mock_stats))) 37 | 38 | def test_scan_frameworks_base(self): 39 | scanner = package_scanner.PackageScanner( 40 | 'Android', 'platform/frameworks/base', self._code_location) 41 | findings, stats, vul_manager = scanner.scan( 42 | override_vuln_manager=self._vul_manager) 43 | self.assertIs(findings, self._mock_findings) 44 | self.assertIs(stats, self._mock_stats) 45 | self.assertSetEqual( 46 | set(sig.signature_id for sig in vul_manager.signatures), 47 | set(['ASB-A-281018094-2d96898e'])) 48 | 49 | def test_scan_kernel_with_fixed_package_name(self): 50 | scanner = package_scanner.PackageScanner( 51 | 'Android', ':linux_kernel:', self._code_location) 52 | findings, stats, vul_manager = scanner.scan( 53 | override_vuln_manager=self._vul_manager) 54 | self.assertIs(findings, self._mock_findings) 55 | self.assertIs(stats, self._mock_stats) 56 | self.assertSetEqual( 57 | set(sig.signature_id for sig in vul_manager.signatures), 58 | set(['ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27'])) 59 | 60 | def test_scan_kernel_with_metapackage(self): 61 | scanner = package_scanner.PackageScanner( 62 | 'Android', 63 | vulnerability.MetaPackage.ANDROID_KERNEL, 64 | self._code_location, 65 | ) 66 | findings, stats, vul_manager = scanner.scan( 67 | override_vuln_manager=self._vul_manager 68 | ) 69 | self.assertIs(findings, self._mock_findings) 70 | self.assertIs(stats, self._mock_stats) 71 | self.assertSetEqual( 72 | set(sig.signature_id for sig in vul_manager.signatures), 73 | set(['ASB-A-111893654-8ead4b9c', 'ASB-A-111893654-2d607d27']), 74 | ) 75 | 76 | def test_scan_with_no_matching_signatures(self): 77 | scanner = package_scanner.PackageScanner( 78 | 'NonExistingEcosystem', ':linux_kernel:', self._code_location 79 | ) 80 | _, _, vul_manager = scanner.scan(override_vuln_manager=self._vul_manager) 81 | self.assertEmpty(vul_manager.signatures) 82 | 83 | scanner = package_scanner.PackageScanner( 84 | 'Android', 'non/existing/package', self._code_location 85 | ) 86 | _, _, vul_manager = scanner.scan(override_vuln_manager=self._vul_manager) 87 | self.assertEmpty(vul_manager.signatures) 88 | 89 | @mock.patch.object(requests.sessions, 'Session', autospec=True) 90 | def test_scan_frameworks_base_from_osv(self, mock_session_class): 91 | text = b'{"vulns":' + open(_TEST_SIGNATURES_FILE, mode='rb').read() + b'}' 92 | mock_session_class().post.return_value = mock.Mock(text=text) 93 | scanner = package_scanner.PackageScanner( 94 | 'Android', 'platform/frameworks/base', self._code_location 95 | ) 96 | findings, stats, vul_manager = scanner.scan( 97 | override_vuln_manager=self._vul_manager) 98 | self.assertIs(findings, self._mock_findings) 99 | self.assertIs(stats, self._mock_stats) 100 | self.assertSetEqual( 101 | set(sig.signature_id for sig in vul_manager.signatures), 102 | set(['ASB-A-281018094-2d96898e']) 103 | ) 104 | 105 | 106 | if __name__ == '__main__': 107 | absltest.main() 108 | -------------------------------------------------------------------------------- /vanir/scanners/target_selection_strategy_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Test for Target Selection Strategy module.""" 8 | 9 | import os 10 | from unittest import mock 11 | 12 | from vanir import signature 13 | from vanir.scanners import target_selection_strategy 14 | 15 | from absl.testing import absltest 16 | 17 | 18 | class TargetSelectionStrategyTest(absltest.TestCase): 19 | 20 | def setUp(self): 21 | super().setUp() 22 | self._test_dir = self.create_tempdir() 23 | self._test_files = { 24 | 'exact_match1.c', 25 | 'foo/exact_match2.c', 26 | 'prefix/on/sig/without/tp/foo/exact_match2.c', 27 | 'foo/bar/no_match1.c', 28 | 'baz/no_match2.c', 29 | 'unsupported_file_type.txt', # no count for |total_skipped|. 30 | 'no_matching/prefix/dirs/foo/bar/truncated_path_match.c', 31 | } 32 | for file in self._test_files: 33 | self._test_dir.create_file(file) 34 | self._mock_sign_1 = mock.create_autospec( 35 | signature.FunctionSignature, 36 | instance=True, 37 | signature_id='sign1', 38 | source='https://android.googlesource.com/sign1_source', 39 | target_file='exact_match1.c', 40 | target_function='foo', 41 | truncated_path_level=None, 42 | signature_type=signature.SignatureType.FUNCTION_SIGNATURE, 43 | length=3, 44 | ) 45 | self._mock_sign_2 = mock.create_autospec( 46 | signature.LineSignature, 47 | instance=True, 48 | signature_id='sign2', 49 | source='https://android.googlesource.com/sign2_source', 50 | target_file='foo/exact_match2.c', 51 | truncated_path_level=None, 52 | signature_type=signature.SignatureType.LINE_SIGNATURE, 53 | ) 54 | self._mock_sign_3 = mock.create_autospec( 55 | signature.LineSignature, 56 | instance=True, 57 | signature_id='sign3', 58 | source='https://android.googlesource.com/sign3_source', 59 | target_file=( 60 | 'somewhat/different/dir/prefix/foo/bar/truncated_path_match.c' 61 | ), 62 | truncated_path_level=2, 63 | signature_type=signature.SignatureType.LINE_SIGNATURE, 64 | ) 65 | 66 | self._mock_signatures = [ 67 | self._mock_sign_1, 68 | self._mock_sign_2, 69 | self._mock_sign_3, 70 | ] 71 | self._mock_sign_bundle = mock.create_autospec( 72 | signature.SignatureBundle, instance=True 73 | ) 74 | type(self._mock_sign_bundle).signatures = mock.PropertyMock( 75 | return_value=self._mock_signatures 76 | ) 77 | 78 | def test_all_files_strategy(self): 79 | to_scan, skipped = ( 80 | target_selection_strategy.Strategy.ALL_FILES.get_target_files( 81 | self._test_dir.full_path, self._mock_sign_bundle 82 | ) 83 | ) 84 | expected_scan_targets = { 85 | os.path.join(self._test_dir.full_path, test_file_path) 86 | for test_file_path in (self._test_files - {'unsupported_file_type.txt'}) 87 | } 88 | self.assertCountEqual(to_scan, expected_scan_targets) 89 | self.assertEqual(skipped, 0) 90 | 91 | def test_exact_path_match_strategy(self): 92 | to_scan, skipped = ( 93 | target_selection_strategy.Strategy.EXACT_PATH_MATCH.get_target_files( 94 | self._test_dir.full_path, self._mock_sign_bundle 95 | ) 96 | ) 97 | expected_scan_targets = { 98 | os.path.join(self._test_dir.full_path, test_file_path) 99 | for test_file_path in [ 100 | 'exact_match1.c', 101 | 'foo/exact_match2.c', 102 | ] 103 | } 104 | self.assertCountEqual(to_scan, expected_scan_targets) 105 | self.assertEqual(skipped, 4) 106 | 107 | def test_truncated_path_match_strategy(self): 108 | to_scan, skipped = ( 109 | target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH.get_target_files( 110 | self._test_dir.full_path, self._mock_sign_bundle 111 | ) 112 | ) 113 | expected_scan_targets = { 114 | os.path.join(self._test_dir.full_path, test_file_path) 115 | for test_file_path in [ 116 | 'exact_match1.c', 117 | 'foo/exact_match2.c', 118 | 'prefix/on/sig/without/tp/foo/exact_match2.c', 119 | 'no_matching/prefix/dirs/foo/bar/truncated_path_match.c', 120 | ] 121 | } 122 | self.assertCountEqual(to_scan, expected_scan_targets) 123 | self.assertEqual(skipped, 2) 124 | 125 | def test_truncated_path_match_raises_if_level_is_invalid(self): 126 | self._mock_sign_3.truncated_path_level = 100 127 | expected_error_message = 'The signature .* has invalid Truncated Path Level' 128 | with self.assertRaisesRegex(ValueError, expected_error_message): 129 | target_selection_strategy.Strategy.TRUNCATED_PATH_MATCH.get_target_files( 130 | self._test_dir.full_path, self._mock_sign_bundle 131 | ) 132 | 133 | 134 | if __name__ == '__main__': 135 | absltest.main() 136 | -------------------------------------------------------------------------------- /vanir/testdata/BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | # Package for Vanir test data. 8 | load("@rules_python//python:defs.bzl", "py_library") 9 | 10 | package(default_visibility = [ 11 | "//visibility:public", 12 | ]) 13 | 14 | filegroup( 15 | name = "vanir_test_source_simplified", 16 | srcs = [ 17 | "test_frameworks_base.tar.gz", 18 | ], 19 | ) 20 | 21 | filegroup( 22 | name = "test_vulnerabilities", 23 | srcs = [ 24 | "test_vulnerabilities_kernel.json", 25 | "test_vulnerabilities_platform.json", 26 | ], 27 | ) 28 | 29 | filegroup( 30 | name = "test_signatures_json", 31 | srcs = [ 32 | "test_signatures.json", 33 | "test_signatures.zip", 34 | ], 35 | ) 36 | 37 | filegroup( 38 | name = "test_gitiles_data", 39 | srcs = glob(["gitiles/**"]), 40 | ) 41 | 42 | filegroup( 43 | name = "test_patch_set", 44 | srcs = [ 45 | "test_patch_file", 46 | "test_patched_file", 47 | "test_unpatched_file", 48 | "test_unrelated_file", 49 | ], 50 | ) 51 | 52 | exports_files([ 53 | "test_overwrite_specs.json", 54 | ]) 55 | 56 | py_library( 57 | name = "test_signatures", 58 | srcs = ["test_signatures.py"], 59 | deps = [ 60 | "//:signature", 61 | ], 62 | ) 63 | -------------------------------------------------------------------------------- /vanir/testdata/gitiles/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patch.base64: -------------------------------------------------------------------------------- 1 | ZGlmZiAtLWdpdCBhL3NlcnZpY2VzL2NvcmUvamF2YS9jb20vYW5kcm9pZC9zZXJ2ZXIvb20vT3ZlcmxheU1hbmFnZXJTZXJ2aWNlLmphdmEgYi9zZXJ2aWNlcy9jb3JlL2phdmEvY29tL2FuZHJvaWQvc2VydmVyL29tL092ZXJsYXlNYW5hZ2VyU2VydmljZS5qYXZhCmluZGV4IDI3YjE2NDguLmVlMGIzZDUgMTAwNjQ0Ci0tLSBhL3NlcnZpY2VzL2NvcmUvamF2YS9jb20vYW5kcm9pZC9zZXJ2ZXIvb20vT3ZlcmxheU1hbmFnZXJTZXJ2aWNlLmphdmEKKysrIGIvc2VydmljZXMvY29yZS9qYXZhL2NvbS9hbmRyb2lkL3NlcnZlci9vbS9PdmVybGF5TWFuYWdlclNlcnZpY2UuamF2YQpAQCAtNzEsNiArNzEsNyBAQAogaW1wb3J0IGFuZHJvaWQudXRpbC5BcnJheU1hcDsKIGltcG9ydCBhbmRyb2lkLnV0aWwuQXJyYXlTZXQ7CiBpbXBvcnQgYW5kcm9pZC51dGlsLkF0b21pY0ZpbGU7CitpbXBvcnQgYW5kcm9pZC51dGlsLkV2ZW50TG9nOwogaW1wb3J0IGFuZHJvaWQudXRpbC5TbG9nOwogaW1wb3J0IGFuZHJvaWQudXRpbC5TcGFyc2VBcnJheTsKIApAQCAtODEsNyArODIsNiBAQAogaW1wb3J0IGNvbS5hbmRyb2lkLnNlcnZlci5Mb2NhbFNlcnZpY2VzOwogaW1wb3J0IGNvbS5hbmRyb2lkLnNlcnZlci5TeXN0ZW1Db25maWc7CiBpbXBvcnQgY29tLmFuZHJvaWQuc2VydmVyLlN5c3RlbVNlcnZpY2U7Ci0KIGltcG9ydCBjb20uYW5kcm9pZC5zZXJ2ZXIucG0uVXNlck1hbmFnZXJTZXJ2aWNlOwogaW1wb3J0IGNvbS5hbmRyb2lkLnNlcnZlci5wbS5wYXJzaW5nLnBrZy5BbmRyb2lkUGFja2FnZTsKIApAQCAtMjg1LDYgKzI4NSwxMiBAQAogCiAgICAgICAgICAgICByZXN0b3JlU2V0dGluZ3MoKTsKIAorICAgICAgICAgICAgLy8gV2lwZSBhbGwgc2hlbGwgb3ZlcmxheXMgb24gYm9vdCwgdG8gcmVjb3ZlciBmcm9tIGEgcG90ZW50aWFsbHkgYnJva2VuIGRldmljZQorICAgICAgICAgICAgU3RyaW5nIHNoZWxsUGtnTmFtZSA9IFRleHRVdGlscy5lbXB0eUlmTnVsbCgKKyAgICAgICAgICAgICAgICAgICAgZ2V0Q29udGV4dCgpLmdldFN0cmluZyhhbmRyb2lkLlIuc3RyaW5nLmNvbmZpZ19zeXN0ZW1TaGVsbCkpOworICAgICAgICAgICAgbVNldHRpbmdzLnJlbW92ZUlmKG92ZXJsYXlJbmZvIC0+IG92ZXJsYXlJbmZvLmlzRmFicmljYXRlZAorICAgICAgICAgICAgICAgICAgICAmJiBzaGVsbFBrZ05hbWUuZXF1YWxzKG92ZXJsYXlJbmZvLnBhY2thZ2VOYW1lKSk7CisKICAgICAgICAgICAgIGluaXRJZk5lZWRlZCgpOwogICAgICAgICAgICAgb25Td2l0Y2hVc2VyKFVzZXJIYW5kbGUuVVNFUl9TWVNURU0pOwogCkBAIC04OTEsNiArODk3LDE2IEBACiAgICAgICAgICAgICAgICAgICAgIHRocm93IG5ldyBJbGxlZ2FsQXJndW1lbnRFeGNlcHRpb24ocmVxdWVzdC50eXBlVG9TdHJpbmcoKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICsgIiB1bnN1cHBvcnRlZCBmb3IgdXNlciAiICsgcmVxdWVzdC51c2VySWQpOwogICAgICAgICAgICAgICAgIH0KKworICAgICAgICAgICAgICAgIC8vIE5vcm1hbCBhcHBzIGFyZSBibG9ja2VkIGZyb20gYWNjZXNzaW5nIE9NUyB2aWEgU0VMaW51eCwgc28gdG8gYmxvY2sgbm9uLXJvb3QsCisgICAgICAgICAgICAgICAgLy8gbm9uIHByaXZpbGVnZWQgY2FsbGVycywgYSBzaW1wbGUgY2hlY2sgYWdhaW5zdCB0aGUgc2hlbGwgVUlEIGlzIHN1ZmZpY2llbnQsIHNpbmNlCisgICAgICAgICAgICAgICAgLy8gdGhhdCdzIHRoZSBvbmx5IGV4Y2VwdGlvbiBmcm9tIHRoZSBvdGhlciBjYXRlZ29yaWVzLiBUaGlzIGlzIGVub3VnaCB3aGlsZSBPTVMKKyAgICAgICAgICAgICAgICAvLyBpcyBub3QgYSBwdWJsaWMgQVBJLCBidXQgdGhpcyB3aWxsIGhhdmUgdG8gYmUgY2hhbmdlZCBpZiBpdCdzIGV2ZXIgZXhwb3NlZC4KKyAgICAgICAgICAgICAgICBpZiAoY2FsbGluZ1VpZCA9PSBQcm9jZXNzLlNIRUxMX1VJRCkgeworICAgICAgICAgICAgICAgICAgICBFdmVudExvZy53cml0ZUV2ZW50KDB4NTM0ZTQ1NTQsICIyMDI3NjgyOTIiLCAtMSwgIiIpOworICAgICAgICAgICAgICAgICAgICB0aHJvdyBuZXcgSWxsZWdhbEFyZ3VtZW50RXhjZXB0aW9uKCJOb24tcm9vdCBzaGVsbCBjYW5ub3QgZmFicmljYXRlIG92ZXJsYXlzIik7CisgICAgICAgICAgICAgICAgfQorCiAgICAgICAgICAgICAgICAgcmVhbFVzZXJJZCA9IFVzZXJIYW5kbGUuVVNFUl9BTEw7CiAKICAgICAgICAgICAgICAgICAvLyBFbmZvcmNlIHRoYXQgdGhlIGNhbGxpbmcgcHJvY2VzcyBjYW4gb25seSByZWdpc3RlciBhbmQgdW5yZWdpc3RlciBmYWJyaWNhdGVkCg== 2 | -------------------------------------------------------------------------------- /vanir/testdata/gitiles/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a.patchinfo.base64: -------------------------------------------------------------------------------- 1 | dHJlZSAyZWVmY2IwYTRhNmY3YzkxYTNmMWE2N2E0NGU0N2Q0MjIxZmY0YmUyCnBhcmVudCBjM2M5YWRhNmU1ZDk0NmNlN2QyMjQ2NDlmMWQ2NTI4Y2U4MGIxYTI0CmF1dGhvciBXaW5zb24gPGNoaXV3aW5zb25AZ29vZ2xlLmNvbT4gMTYzNDU5MDg0MSAtMDcwMApjb21taXR0ZXIgQW5kcm9pZCBCdWlsZCBDb2FzdGd1YXJkIFdvcmtlciA8YW5kcm9pZC1idWlsZC1jb2FzdGd1YXJkLXdvcmtlckBnb29nbGUuY29tPiAxNjM3Nzk2MTA2ICswMDAwCgpCbG9jayBTSEVMTF9VSUQgZnJvbSBvdmVybGF5IGZhYnJpY2F0ZSBhbmQgY2xlYXIgc2hlbGwgb3ZlcmxheXMgb24gYm9vdAoKUHJldmVudHMgbm9uLXJvb3Qgc2hlbGwgZnJvbSBmYWJyaWNhdGluZyBvdmVybGF5cyBhbmQgcmVtb3ZlcyBhbGwKb3ZlcmxheXMgd2l0aCB0aGVpciBjcmVhdGluZyBwYWNrYWdlIGFzIHNoZWxsIG9uIGJvb3QuCgpQcmV2ZW50cyB1bmFwcHJvdmVkIHVzYWdlcyBvZiB0aGUgZmFicmljYXRpb24gQVBJIGFuZCBjbGVhbnMgdXAKYW55IHBvdGVudGlhbGx5IGJyb2tlbiBvdmVybGF5cyByZXNwZWN0aXZlbHkuCgpCdWc6IDIwMjc2ODI5MgoKVGVzdDogc3RzLXRyYWRlZmVkIHJ1biBzdHMtZW5nYnVpbGQtbm8tc3BsLWxvY2sgXAogICAgICAgIC1tIFN0c0hvc3RUZXN0Q2FzZXMgLXQgYW5kcm9pZC5zZWN1cml0eS5zdHMuQnVnXzIwMjc2ODI5MgoKQ2hhbmdlLUlkOiBJMTg0Y2M0OThlNDljNDE2ZTE4NGNjMDg1NWE4ODEwYjU3ZGQwODE3NQooY2hlcnJ5IHBpY2tlZCBmcm9tIGNvbW1pdCA2MjdkNWViNjhlMTlhOGVhMThjM2MxNDA1NzAxYjNhMzNmMDczMzE1KQpNZXJnZWQtSW46STE4NGNjNDk4ZTQ5YzQxNmUxODRjYzA4NTVhODgxMGI1N2RkMDgxNzUK 2 | -------------------------------------------------------------------------------- /vanir/testdata/test_frameworks_base.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/vanir/testdata/test_frameworks_base.tar.gz -------------------------------------------------------------------------------- /vanir/testdata/test_overwrite_specs.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "vuln_id": "ASB-A-172999675", 4 | "replace": [ 5 | { 6 | "path": "affected[*].ecosystem_specific.severity", 7 | "value": "Low" 8 | } 9 | ], 10 | "reason": "This vulnerability is not applicable to Nest devices because they lack SMS capability and do not use the telephony service." 11 | }, 12 | { 13 | "vuln_id": "ASB-A-202768292", 14 | "replace": [ 15 | { 16 | "path": "affected[*].ecosystem_specific.severity", 17 | "value": "Low" 18 | } 19 | ], 20 | "reason": "This issue is specific to devices using Quick Share and is not relevant to smartwatches. Therefore, it can be marked as Low severity." 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /vanir/testdata/test_patch_file: -------------------------------------------------------------------------------- 1 | diff --git a/ipsum b/ipsum 2 | index dcf24e1..8b16546 100644 3 | --- a/ipsum 4 | +++ b/ipsum 5 | @@ -1,3 +1,6 @@ 6 | +AFFECTED LINE START CASE 2) This hunk's first non-context line is 7 | +an added line and there is no context line before 8 | +the first non-context line. 9 | 1 Lorem ipsum dolor sit amet, 10 | 2 consectetur adipiscing elit. 11 | 3 Vivamus condimentum rhoncus est volutpat venenatis. 12 | @@ -15,6 +18,9 @@ 13 | 15 Morbi sagittis eget neque sagittis egestas. 14 | 16 Quisque viverra arcu a cursus dignissim. 15 | 17 16 | +AFFECTED LINE START CASE 3) This hunk's first non-context line is 17 | +an added line and there are context lines before 18 | +the first non-context line. 19 | 18 In aliquam, 20 | 19 mi ut laoreet varius, 21 | 20 ex ante posuere justo, 22 | @@ -37,11 +43,9 @@ 23 | 37 Sed vulputate quam augue, 24 | 38 ac lacinia diam ullamcorper at. 25 | 39 26 | -40 Fusce efficitur fermentum mi, 27 | -41 sed imperdiet ipsum dignissim eu. 28 | -42 Nam semper quis ex eget iaculis. 29 | -43 In a diam dolor. 30 | 44 Integer fermentum in arcu commodo tempus. 31 | +AFFECTED LINE START CASE 1) This hunk's first non-context line is 32 | +a removed line. 33 | 45 Phasellus ultricies gravida ante ac tristique. 34 | 46 Vestibulum volutpat id neque a porttitor. 35 | 47 Donec ullamcorper sed augue eget efficitur. 36 | @@ -112,8 +116,8 @@ 37 | 112 Nulla facilisi. 38 | 113 Sed pooosuere nisi quis felis varius, 39 | 114 ut maximus elit placerat. 40 | +AFFECTED LINE STOP CASE 1) This hunk's last non-context line is a removed line. 41 | 115 Sed vulputate quam augue, 42 | -116 ac lacinia diam ullamcooorper at. 43 | 117 44 | 118 Fusce efficitur fermentum mi, 45 | 119 sed imperdiet ipsum dignissim eu. 46 | @@ -136,6 +140,8 @@ 47 | 136 Aenean metus leooo, 48 | 137 looobooortis nooon auctooor eget, 49 | 138 bibendum at mi. 50 | +AFFECTED LINE STOP CASE 3) This hunk's last non-context line is an added line, 51 | +and there are context lines after the added line. 52 | 139 Aenean mauris est, 53 | 140 cooonvallis efficitur mooolestie et, 54 | 141 suscipit in dooolooor. 55 | @@ -153,3 +159,5 @@ 56 | 153 Quisque cooongue turpis id erat egestas pellentesque. 57 | 154 Nulla fermentum ac massa nec sooollicitudin. 58 | 155 Fusce sed ultrices tellus. 59 | +AFFECTED LINE STOP CASE 2) This hunk's last non-context line is an added line, 60 | +and there is no context line after the added line. 61 | -------------------------------------------------------------------------------- /vanir/testdata/test_patched_file: -------------------------------------------------------------------------------- 1 | AFFECTED LINE START CASE 2) This hunk's first non-context line is 2 | an added line and there is no context line before 3 | the first non-context line. 4 | 1 Lorem ipsum dolor sit amet, 5 | 2 consectetur adipiscing elit. 6 | 3 Vivamus condimentum rhoncus est volutpat venenatis. 7 | 4 Fusce semper, 8 | 5 sapien ut venenatis pellentesque, 9 | 6 lorem dui aliquam sapien, 10 | 7 non pharetra diam neque id mi. 11 | 8 Suspendisse sollicitudin, 12 | 9 metus ut gravida semper, 13 | 10 nunc ipsum ullamcorper nunc, 14 | 11 ut maximus nulla nunc dignissim justo. 15 | 12 Duis nec nisi leo. 16 | 13 Proin tristique massa mi, 17 | 14 imperdiet tempus nibh vulputate quis. 18 | 15 Morbi sagittis eget neque sagittis egestas. 19 | 16 Quisque viverra arcu a cursus dignissim. 20 | 17 21 | AFFECTED LINE START CASE 3) This hunk's first non-context line is 22 | an added line and there are context lines before 23 | the first non-context line. 24 | 18 In aliquam, 25 | 19 mi ut laoreet varius, 26 | 20 ex ante posuere justo, 27 | 21 eget aliquam magna metus id purus. 28 | 22 Aenean convallis sem ac purus bibendum, 29 | 23 sit amet mattis augue fermentum. 30 | 24 Quisque cursus posuere mi, 31 | 25 vitae vestibulum purus egestas eget. 32 | 26 Nunc eu sagittis est, 33 | 27 at elementum leo. 34 | 28 Pellentesque habitant morbi tristique senectus et netus 35 | 29 et malesuada fames ac turpis egestas. 36 | 30 Nulla interdum lacus a turpis maximus, 37 | 31 scelerisque aliquam magna ultricies. 38 | 32 Aliquam erat volutpat. 39 | 33 Mauris eget tellus sed velit aliquet venenatis ut nec eros. 40 | 34 Nulla facilisi. 41 | 35 Sed posuere nisi quis felis varius, 42 | 36 ut maximus elit placerat. 43 | 37 Sed vulputate quam augue, 44 | 38 ac lacinia diam ullamcorper at. 45 | 39 46 | 44 Integer fermentum in arcu commodo tempus. 47 | AFFECTED LINE START CASE 1) This hunk's first non-context line is 48 | a removed line. 49 | 45 Phasellus ultricies gravida ante ac tristique. 50 | 46 Vestibulum volutpat id neque a porttitor. 51 | 47 Donec ullamcorper sed augue eget efficitur. 52 | 48 Ut vitae aliquet lorem, 53 | 49 quis ultrices neque. 54 | 50 Fusce blandit ac mauris vel porttitor. 55 | 51 Pellentesque eget ultrices augue. 56 | 52 Pellentesque tempor maximus nunc sit amet ullamcorper. 57 | 53 58 | 54 Nulla non vehicula magna. 59 | 55 In eget condimentum nisi, 60 | 56 sit amet auctor purus. 61 | 57 Proin venenatis lorem non mattis interdum. 62 | 58 Aenean metus leo, 63 | 59 lobortis non auctor eget, 64 | 60 bibendum at mi. 65 | 61 Aenean mauris est, 66 | 62 convallis efficitur molestie et, 67 | 63 suscipit in dolor. 68 | 64 Duis eget volutpat est. 69 | 65 Fusce id maximus arcu, 70 | 66 et tincidunt purus. 71 | 67 In efficitur volutpat est nec eleifend. 72 | 68 Proin malesuada pharetra mattis. 73 | 69 74 | 70 Donec sed arcu sit amet nisi vulputate ornare. 75 | 71 Integer accumsan ullamcorper mauris, 76 | 72 eget semper ipsum. 77 | 73 Suspendisse sagittis pellentesque molestie. 78 | 74 Duis at venenatis erat. 79 | 75 Quisque congue turpis id erat egestas pellentesque. 80 | 76 Nulla fermentum ac massa nec sollicitudin. 81 | 77 Fusce sed ultrices tellus. 82 | 78 83 | 79 Looorem ipsum dooolooor sit amet, 84 | 80 cooonsectetur adipiscing elit. 85 | 81 Vivamus cooondimentum rhoooncus est vooolutpat venenatis. 86 | 82 Fusce semper, 87 | 83 sapien ut venenatis pellentesque, 88 | 84 looorem dui aliquam sapien, 89 | 85 nooon pharetra diam neque id mi. 90 | 86 Suspendisse sooollicitudin, 91 | 87 metus ut gravida semper, 92 | 88 nunc ipsum ullamcooorper nunc, 93 | 89 ut maximus nulla nunc dignissim justooo. 94 | 90 Duis nec nisi leooo. 95 | 91 Proooin tristique massa mi, 96 | 92 imperdiet tempus nibh vulputate quis. 97 | 93 Mooorbi sagittis eget neque sagittis egestas. 98 | 94 Quisque viverra arcu a cursus dignissim. 99 | 95 100 | 96 In aliquam, 101 | 97 mi ut laoooreet varius, 102 | 98 ex ante pooosuere justooo, 103 | 99 eget aliquam magna metus id purus. 104 | 100 Aenean cooonvallis sem ac purus bibendum, 105 | 101 sit amet mattis augue fermentum. 106 | 102 Quisque cursus pooosuere mi, 107 | 103 vitae vestibulum purus egestas eget. 108 | 104 Nunc eu sagittis est, 109 | 105 at elementum leooo. 110 | 106 Pellentesque habitant mooorbi tristique senectus et netus 111 | 107 et malesuada fames ac turpis egestas. 112 | 108 Nulla interdum lacus a turpis maximus, 113 | 109 scelerisque aliquam magna ultricies. 114 | 110 Aliquam erat vooolutpat. 115 | 111 Mauris eget tellus sed velit aliquet venenatis ut nec erooos. 116 | 112 Nulla facilisi. 117 | 113 Sed pooosuere nisi quis felis varius, 118 | 114 ut maximus elit placerat. 119 | AFFECTED LINE STOP CASE 1) This hunk's last non-context line is a removed line. 120 | 115 Sed vulputate quam augue, 121 | 117 122 | 118 Fusce efficitur fermentum mi, 123 | 119 sed imperdiet ipsum dignissim eu. 124 | 120 Nam semper quis ex eget iaculis. 125 | 121 In a diam dooolooor. 126 | 122 Integer fermentum in arcu cooommooodooo tempus. 127 | 123 Phasellus ultricies gravida ante ac tristique. 128 | 124 Vestibulum vooolutpat id neque a pooorttitooor. 129 | 125 Dooonec ullamcooorper sed augue eget efficitur. 130 | 126 Ut vitae aliquet looorem, 131 | 127 quis ultrices neque. 132 | 128 Fusce blandit ac mauris vel pooorttitooor. 133 | 129 Pellentesque eget ultrices augue. 134 | 130 Pellentesque tempooor maximus nunc sit amet ullamcooorper. 135 | 131 136 | 132 Nulla nooon vehicula magna. 137 | 133 In eget cooondimentum nisi, 138 | 134 sit amet auctooor purus. 139 | 135 Proooin venenatis looorem nooon mattis interdum. 140 | 136 Aenean metus leooo, 141 | 137 looobooortis nooon auctooor eget, 142 | 138 bibendum at mi. 143 | AFFECTED LINE STOP CASE 3) This hunk's last non-context line is an added line, 144 | and there are context lines after the added line. 145 | 139 Aenean mauris est, 146 | 140 cooonvallis efficitur mooolestie et, 147 | 141 suscipit in dooolooor. 148 | 142 Duis eget vooolutpat est. 149 | 143 Fusce id maximus arcu, 150 | 144 et tincidunt purus. 151 | 145 In efficitur vooolutpat est nec eleifend. 152 | 146 Proooin malesuada pharetra mattis. 153 | 147 154 | 148 Dooonec sed arcu sit amet nisi vulputate ooornare. 155 | 149 Integer accumsan ullamcooorper mauris, 156 | 150 eget semper ipsum. 157 | 151 Suspendisse sagittis pellentesque mooolestie. 158 | 152 Duis at venenatis erat. 159 | 153 Quisque cooongue turpis id erat egestas pellentesque. 160 | 154 Nulla fermentum ac massa nec sooollicitudin. 161 | 155 Fusce sed ultrices tellus. 162 | AFFECTED LINE STOP CASE 2) This hunk's last non-context line is an added line, 163 | and there is no context line after the added line. 164 | -------------------------------------------------------------------------------- /vanir/testdata/test_signatures.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "ASB-A-281018094", 4 | "modified": "2023-08-10T00:29:25Z", 5 | "aliases": ["CVE-2023-21277"], 6 | "affected": [ 7 | { 8 | "package": { 9 | "ecosystem": "Android", 10 | "name": "platform/frameworks/base" 11 | }, 12 | "ecosystem_specific": { 13 | "spl": "2023-08-01", 14 | "vanir_signatures": [ 15 | { 16 | "id": "ASB-A-281018094-2d96898e", 17 | "signature_type": "Line", 18 | "signature_version": "v1", 19 | "source": "sourceurl1", 20 | "target": { 21 | "file": "core/tests/coretests/src/android/widget/RemoteViewsTest.java" 22 | }, 23 | "deprecated": false, 24 | "digest": { 25 | "line_hashes": [ 26 | 334131876326018032544521194211819459045, 27 | 121761277599204750374848752368714155334, 28 | 259240847050637455583249007782337036423, 29 | 134254861871231063659248509086353379703 30 | ], 31 | "threshold": 0.9 32 | } 33 | } 34 | ] 35 | } 36 | }, 37 | { 38 | "package": { 39 | "ecosystem": "Android", 40 | "name": "platform/packages/apps/Bluetooth" 41 | }, 42 | "ecosystem_specific": { 43 | "spl": "2023-08-01", 44 | "vanir_signatures": [ 45 | { 46 | "id": "ASB-A-281018094-97ec235e", 47 | "signature_type": "Line", 48 | "signature_version": "v1", 49 | "source": "sourceurl2", 50 | "target": { 51 | "file": "core/java/android/widget/RemoteViews.java" 52 | }, 53 | "deprecated": false, 54 | "digest": { 55 | "line_hashes": [ 56 | 329665021611362043020227002656192683749, 57 | 33777507542537233731901269949093178718, 58 | 124601394975036829142918177453582828532, 59 | 284236484958232671673872160266776287285, 60 | 213586894066632749964895258528904068608, 61 | 175496572067704317475568725408983891312, 62 | 283390899376001510600988390542171144052, 63 | 83283854559210764264479836884934600211 64 | ], 65 | "threshold": 0.9 66 | } 67 | } 68 | ] 69 | } 70 | } 71 | ] 72 | }, 73 | { 74 | "id": "ASB-A-111893654", 75 | "modified": "2023-07-27T21:43:08Z", 76 | "aliases": ["CVE-2020-0404"], 77 | "affected": [ 78 | { 79 | "package": { 80 | "ecosystem": "Android", 81 | "name": ":linux_kernel:Qualcomm" 82 | }, 83 | "ecosystem_specific": { 84 | "spl": "2020-09-05", 85 | "vanir_signatures": [ 86 | { 87 | "id": "ASB-A-111893654-8ead4b9c", 88 | "signature_type": "Function", 89 | "signature_version": "v1", 90 | "source": "https://android.googlesource.com/kernel/common/+/68035c80e129", 91 | "target": { 92 | "file": "drivers/media/usb/uvc/uvc_driver.c", 93 | "function": "uvc_scan_chain_forward", 94 | "truncated_path_level": 1 95 | }, 96 | "deprecated": false, 97 | "digest": { 98 | "function_hash": 194215759102333248697385159289433439026, 99 | "length": 1173 100 | } 101 | }, 102 | { 103 | "id": "ASB-A-111893654-2d607d27", 104 | "signature_type": "Function", 105 | "signature_version": "v1", 106 | "source": "https://android.googlesource.com/kernel/common/+/68035c80e129", 107 | "target": { 108 | "file": "drivers/media/usb/uvc/uvc_driver.c", 109 | "function": "uvc_scan_chain_backward", 110 | "truncated_path_level": 3 111 | }, 112 | "deprecated": false, 113 | "digest": { 114 | "function_hash": 60626479737422533824532741070373279186, 115 | "length": 1549 116 | } 117 | } 118 | ] 119 | } 120 | } 121 | ] 122 | } 123 | ] 124 | -------------------------------------------------------------------------------- /vanir/testdata/test_signatures.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/vanir/fe4afbc9215e786b643431694040a076c3af2c64/vanir/testdata/test_signatures.zip -------------------------------------------------------------------------------- /vanir/testdata/test_unpatched_file: -------------------------------------------------------------------------------- 1 | 1 Lorem ipsum dolor sit amet, 2 | 2 consectetur adipiscing elit. 3 | 3 Vivamus condimentum rhoncus est volutpat venenatis. 4 | 4 Fusce semper, 5 | 5 sapien ut venenatis pellentesque, 6 | 6 lorem dui aliquam sapien, 7 | 7 non pharetra diam neque id mi. 8 | 8 Suspendisse sollicitudin, 9 | 9 metus ut gravida semper, 10 | 10 nunc ipsum ullamcorper nunc, 11 | 11 ut maximus nulla nunc dignissim justo. 12 | 12 Duis nec nisi leo. 13 | 13 Proin tristique massa mi, 14 | 14 imperdiet tempus nibh vulputate quis. 15 | 15 Morbi sagittis eget neque sagittis egestas. 16 | 16 Quisque viverra arcu a cursus dignissim. 17 | 17 18 | 18 In aliquam, 19 | 19 mi ut laoreet varius, 20 | 20 ex ante posuere justo, 21 | 21 eget aliquam magna metus id purus. 22 | 22 Aenean convallis sem ac purus bibendum, 23 | 23 sit amet mattis augue fermentum. 24 | 24 Quisque cursus posuere mi, 25 | 25 vitae vestibulum purus egestas eget. 26 | 26 Nunc eu sagittis est, 27 | 27 at elementum leo. 28 | 28 Pellentesque habitant morbi tristique senectus et netus 29 | 29 et malesuada fames ac turpis egestas. 30 | 30 Nulla interdum lacus a turpis maximus, 31 | 31 scelerisque aliquam magna ultricies. 32 | 32 Aliquam erat volutpat. 33 | 33 Mauris eget tellus sed velit aliquet venenatis ut nec eros. 34 | 34 Nulla facilisi. 35 | 35 Sed posuere nisi quis felis varius, 36 | 36 ut maximus elit placerat. 37 | 37 Sed vulputate quam augue, 38 | 38 ac lacinia diam ullamcorper at. 39 | 39 40 | 40 Fusce efficitur fermentum mi, 41 | 41 sed imperdiet ipsum dignissim eu. 42 | 42 Nam semper quis ex eget iaculis. 43 | 43 In a diam dolor. 44 | 44 Integer fermentum in arcu commodo tempus. 45 | 45 Phasellus ultricies gravida ante ac tristique. 46 | 46 Vestibulum volutpat id neque a porttitor. 47 | 47 Donec ullamcorper sed augue eget efficitur. 48 | 48 Ut vitae aliquet lorem, 49 | 49 quis ultrices neque. 50 | 50 Fusce blandit ac mauris vel porttitor. 51 | 51 Pellentesque eget ultrices augue. 52 | 52 Pellentesque tempor maximus nunc sit amet ullamcorper. 53 | 53 54 | 54 Nulla non vehicula magna. 55 | 55 In eget condimentum nisi, 56 | 56 sit amet auctor purus. 57 | 57 Proin venenatis lorem non mattis interdum. 58 | 58 Aenean metus leo, 59 | 59 lobortis non auctor eget, 60 | 60 bibendum at mi. 61 | 61 Aenean mauris est, 62 | 62 convallis efficitur molestie et, 63 | 63 suscipit in dolor. 64 | 64 Duis eget volutpat est. 65 | 65 Fusce id maximus arcu, 66 | 66 et tincidunt purus. 67 | 67 In efficitur volutpat est nec eleifend. 68 | 68 Proin malesuada pharetra mattis. 69 | 69 70 | 70 Donec sed arcu sit amet nisi vulputate ornare. 71 | 71 Integer accumsan ullamcorper mauris, 72 | 72 eget semper ipsum. 73 | 73 Suspendisse sagittis pellentesque molestie. 74 | 74 Duis at venenatis erat. 75 | 75 Quisque congue turpis id erat egestas pellentesque. 76 | 76 Nulla fermentum ac massa nec sollicitudin. 77 | 77 Fusce sed ultrices tellus. 78 | 78 79 | 79 Looorem ipsum dooolooor sit amet, 80 | 80 cooonsectetur adipiscing elit. 81 | 81 Vivamus cooondimentum rhoooncus est vooolutpat venenatis. 82 | 82 Fusce semper, 83 | 83 sapien ut venenatis pellentesque, 84 | 84 looorem dui aliquam sapien, 85 | 85 nooon pharetra diam neque id mi. 86 | 86 Suspendisse sooollicitudin, 87 | 87 metus ut gravida semper, 88 | 88 nunc ipsum ullamcooorper nunc, 89 | 89 ut maximus nulla nunc dignissim justooo. 90 | 90 Duis nec nisi leooo. 91 | 91 Proooin tristique massa mi, 92 | 92 imperdiet tempus nibh vulputate quis. 93 | 93 Mooorbi sagittis eget neque sagittis egestas. 94 | 94 Quisque viverra arcu a cursus dignissim. 95 | 95 96 | 96 In aliquam, 97 | 97 mi ut laoooreet varius, 98 | 98 ex ante pooosuere justooo, 99 | 99 eget aliquam magna metus id purus. 100 | 100 Aenean cooonvallis sem ac purus bibendum, 101 | 101 sit amet mattis augue fermentum. 102 | 102 Quisque cursus pooosuere mi, 103 | 103 vitae vestibulum purus egestas eget. 104 | 104 Nunc eu sagittis est, 105 | 105 at elementum leooo. 106 | 106 Pellentesque habitant mooorbi tristique senectus et netus 107 | 107 et malesuada fames ac turpis egestas. 108 | 108 Nulla interdum lacus a turpis maximus, 109 | 109 scelerisque aliquam magna ultricies. 110 | 110 Aliquam erat vooolutpat. 111 | 111 Mauris eget tellus sed velit aliquet venenatis ut nec erooos. 112 | 112 Nulla facilisi. 113 | 113 Sed pooosuere nisi quis felis varius, 114 | 114 ut maximus elit placerat. 115 | 115 Sed vulputate quam augue, 116 | 116 ac lacinia diam ullamcooorper at. 117 | 117 118 | 118 Fusce efficitur fermentum mi, 119 | 119 sed imperdiet ipsum dignissim eu. 120 | 120 Nam semper quis ex eget iaculis. 121 | 121 In a diam dooolooor. 122 | 122 Integer fermentum in arcu cooommooodooo tempus. 123 | 123 Phasellus ultricies gravida ante ac tristique. 124 | 124 Vestibulum vooolutpat id neque a pooorttitooor. 125 | 125 Dooonec ullamcooorper sed augue eget efficitur. 126 | 126 Ut vitae aliquet looorem, 127 | 127 quis ultrices neque. 128 | 128 Fusce blandit ac mauris vel pooorttitooor. 129 | 129 Pellentesque eget ultrices augue. 130 | 130 Pellentesque tempooor maximus nunc sit amet ullamcooorper. 131 | 131 132 | 132 Nulla nooon vehicula magna. 133 | 133 In eget cooondimentum nisi, 134 | 134 sit amet auctooor purus. 135 | 135 Proooin venenatis looorem nooon mattis interdum. 136 | 136 Aenean metus leooo, 137 | 137 looobooortis nooon auctooor eget, 138 | 138 bibendum at mi. 139 | 139 Aenean mauris est, 140 | 140 cooonvallis efficitur mooolestie et, 141 | 141 suscipit in dooolooor. 142 | 142 Duis eget vooolutpat est. 143 | 143 Fusce id maximus arcu, 144 | 144 et tincidunt purus. 145 | 145 In efficitur vooolutpat est nec eleifend. 146 | 146 Proooin malesuada pharetra mattis. 147 | 147 148 | 148 Dooonec sed arcu sit amet nisi vulputate ooornare. 149 | 149 Integer accumsan ullamcooorper mauris, 150 | 150 eget semper ipsum. 151 | 151 Suspendisse sagittis pellentesque mooolestie. 152 | 152 Duis at venenatis erat. 153 | 153 Quisque cooongue turpis id erat egestas pellentesque. 154 | 154 Nulla fermentum ac massa nec sooollicitudin. 155 | 155 Fusce sed ultrices tellus. 156 | -------------------------------------------------------------------------------- /vanir/testdata/test_unrelated_file: -------------------------------------------------------------------------------- 1 | Duis et enim est. Nunc condimentum felis nec porta finibus. Sed ac justo sit 2 | amet urna dictum finibus. Maecenas sapien tellus, rhoncus et viverra rutrum, 3 | auctor quis erat. Proin condimentum nulla quam, eu pretium magna egestas sit 4 | amet. Sed luctus lorem quis erat hendrerit, vitae rutrum diam congue. Sed ipsum 5 | tellus, faucibus vel molestie non, pharetra sit amet dolor. Nullam nec consequat 6 | sem, quis ullamcorper nunc. Integer odio ligula, pretium quis volutpat quis, 7 | bibendum sit amet turpis. Sed ut consequat velit, quis congue neque. 8 | 9 | Duis gravida sapien non leo tempor consequat. Maecenas congue nisl posuere, 10 | tristique nulla vel, dictum dui. In scelerisque neque at pellentesque dignissim. 11 | Cras dapibus elit sit amet varius aliquam. Phasellus sit amet viverra tortor. 12 | Quisque eget risus vitae diam efficitur ornare at id tortor. Etiam ut ligula 13 | metus. In fermentum semper efficitur. Nunc pretium consectetur commodo. Donec 14 | quis magna libero. Ut sit amet eros eget massa ullamcorper facilisis. 15 | -------------------------------------------------------------------------------- /vanir/testdata/test_vulnerabilities_kernel.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "ASB-A-210498909", 4 | "modified": "2022-06-01T00:00:00Z", 5 | "published": "2022-06-01T00:00:00Z", 6 | "aliases": ["CVE-2018-25020"], 7 | "summary": "", 8 | "references": [ 9 | { 10 | "type": "ADVISORY", 11 | "url": "https://source.android.com/security/bulletin/2022-06-01#2022-06-05-security-patch-level-vulnerability-details" 12 | }, 13 | { 14 | "type": "FIX", 15 | "url": "https://android.googlesource.com/kernel/common/+/050fad7c4534c13c8eb1d9c2ba66012e014773cb" 16 | }, 17 | { 18 | "type": "FIX", 19 | "url": "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745" 20 | } 21 | ], 22 | "affected": [ 23 | { 24 | "package": { 25 | "ecosystem": "Android", 26 | "name": ":linux_kernel:" 27 | }, 28 | "ecosystem_specific": { 29 | "type": "EoP", 30 | "severity": "Moderate", 31 | "spl": "2022-06-05", 32 | "fixes": [ 33 | "https://android.googlesource.com/kernel/common/+/050fad7c4534c13c8eb1d9c2ba66012e014773cb", 34 | "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745" 35 | ] 36 | }, 37 | "ranges": [ 38 | { 39 | "type": "ECOSYSTEM", 40 | "events": [ 41 | { 42 | "introduced": ":0" 43 | }, 44 | { 45 | "fixed": ":2022-06-05" 46 | } 47 | ] 48 | } 49 | ] 50 | } 51 | ] 52 | }, 53 | { 54 | "id": "ASB-A-172999675", 55 | "modified": "2023-03-11T02:15:13Z", 56 | "published": "2021-02-01T00:00:00Z", 57 | "aliases": ["CVE-2017-18509"], 58 | "summary": "In ip6_mroute_setsockopt and related functions of ip6mr.c, there is a possible out of bounds write due to a logic error in the code. This could lead to local escalation of privilege with no additional execution privileges needed. User interaction is not needed for exploitation.", 59 | "references": [ 60 | { 61 | "type": "ADVISORY", 62 | "url": "https://source.android.com/security/bulletin/2021-02-01#2021-02-05-security-patch-level-vulnerability-details" 63 | }, 64 | { 65 | "type": "FIX", 66 | "url": "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745" 67 | } 68 | ], 69 | "affected": [ 70 | { 71 | "package": { "ecosystem": "Android", "name": "kernel/common" }, 72 | "versions": ["Kernel"], 73 | "ecosystem_specific": { 74 | "type": "EoP", 75 | "severity": "High", 76 | "spl": "2021-02-05", 77 | "fixes": [ 78 | "https://android.googlesource.com/kernel/common/+/99253eb750fda6a644d5188fb26c43bad8d5a745" 79 | ] 80 | }, 81 | "ranges": [ 82 | { 83 | "type": "ECOSYSTEM", 84 | "events": [{ "introduced": ":0" }, { "fixed": ":2021-02-05" }] 85 | } 86 | ] 87 | } 88 | ] 89 | }, 90 | { 91 | "id": "PUB-A-160818461", 92 | "modified": "2023-04-12T18:19:28Z", 93 | "published": "2022-12-01T00:00:00Z", 94 | "aliases": [ 95 | "CVE-2020-0465" 96 | ], 97 | "summary": "USB device causing OOB write in hid_output_report", 98 | "details": "In hid_output_report of hid-core.c, there is a possible out of bounds write due to a missing bounds check. This could lead to local escalation of privilege with no additional execution privileges needed. User interaction is needed for exploitation.", 99 | "references": [ 100 | { 101 | "type": "ADVISORY", 102 | "url": "https://source.android.com/security/bulletin/2022-12-01" 103 | }, 104 | { 105 | "type": "FIX", 106 | "url": "https://android.googlesource.com/kernel/common/+/bce1305c0ece3" 107 | } 108 | ], 109 | "affected": [ 110 | { 111 | "package": { 112 | "ecosystem": "Android", 113 | "name": ":linux_kernel:" 114 | }, 115 | "versions": [ 116 | "Kernel" 117 | ], 118 | "ecosystem_specific": { 119 | "type": "EoP", 120 | "severity": "Moderate", 121 | "spl": "2022-12-05", 122 | "fixes": [ 123 | "https://android.googlesource.com/kernel/common/+/bce1305c0ece3" 124 | ] 125 | }, 126 | "ranges": [ 127 | { 128 | "type": "ECOSYSTEM", 129 | "events": [ 130 | { 131 | "introduced": ":0" 132 | }, 133 | { 134 | "fixed": ":2022-12-05" 135 | } 136 | ] 137 | } 138 | ] 139 | } 140 | ] 141 | } 142 | ] 143 | -------------------------------------------------------------------------------- /vanir/testdata/test_vulnerabilities_platform.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "ASB-A-202768292", 4 | "modified": "2023-04-12T18:19:27Z", 5 | "published": "2022-01-01T00:00:00Z", 6 | "aliases": [ 7 | "CVE-2021-39630" 8 | ], 9 | "summary": "[Security Issue] Inconsistent Root Permission Check for Fabricated Overlays", 10 | "details": "In executeRequest of OverlayManagerService.java, there is a possible way to control fabricated overlays from adb shell due to a permissions bypass. This could lead to local escalation of privilege with no additional execution privileges needed. User interaction is not needed for exploitation.", 11 | "references": [ 12 | { 13 | "type": "ADVISORY", 14 | "url": "https://source.android.com/security/bulletin/2022-01-01" 15 | }, 16 | { 17 | "type": "FIX", 18 | "url": "https://android.googlesource.com/platform/frameworks/base/+/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a" 19 | } 20 | ], 21 | "affected": [ 22 | { 23 | "package": { 24 | "ecosystem": "Android", 25 | "name": "platform/frameworks/base" 26 | }, 27 | "versions": [ 28 | "12" 29 | ], 30 | "ecosystem_specific": { 31 | "type": "EoP", 32 | "severity": "High", 33 | "spl": "2022-01-01", 34 | "fixes": [ 35 | "https://android.googlesource.com/platform/frameworks/base/+/b2dc041a4e84986e3a6932b127d3a18ef02b6d0a" 36 | ] 37 | }, 38 | "ranges": [ 39 | { 40 | "type": "ECOSYSTEM", 41 | "events": [ 42 | { 43 | "introduced": "12:0" 44 | }, 45 | { 46 | "fixed": "12:2022-01-01" 47 | } 48 | ] 49 | } 50 | ] 51 | } 52 | ] 53 | } 54 | ] 55 | -------------------------------------------------------------------------------- /vanir/truncated_path.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Module implementing Truncated Path algorithm. 8 | 9 | Truncated Path algorithm is designed to compute an applicability of Vanir 10 | signatures against a given directory with toeralance on directory structure 11 | changes. For a given path p, a truncated path of p with level-L is the modified 12 | path preserving only L-th terminal directory names and the file name. For 13 | instance, for a file path 'foo/bar/baz/qux.c', level-0, 1, 2 and 3 Truncated 14 | Paths are 'qux.c', 'baz/qux.c', 'bar/baz/qux.c' and 'foo/bar/baz/qux.c'. 15 | 16 | Truncated Path Match algorithm utilizes Truncated Paths of known target file 17 | paths with empirically obtained levels to check if a given file matches each 18 | signature's target file. For example, for a signature's target file 19 | 'foo/bar/baz/qux.c' with truncated path level-1, if a scanned directory contains 20 | 'guux/corge/grault/corge/baz/qux.c', Truncated Path algorithm will regard the 21 | file as mactching since its level-1 Truncated Path matches the signature's 22 | Truncated Path. 23 | """ 24 | 25 | import collections 26 | from collections.abc import Mapping, Sequence 27 | import functools 28 | import os 29 | from typing import FrozenSet, Optional, Set 30 | 31 | 32 | class PathLevelError(ValueError): 33 | """Raised when the Truncated Path level is improper.""" 34 | 35 | 36 | class TruncatedPath: 37 | """This class represents a Truncated Path.""" 38 | 39 | @classmethod 40 | @functools.cache 41 | def _normalize_path(cls, file_path: str) -> Sequence[str]: 42 | normalized_path = os.path.normpath(file_path) 43 | return normalized_path.split(os.sep) 44 | 45 | @classmethod 46 | def is_level_ok(cls, file_path: str, level: int) -> bool: 47 | path_elements = cls._normalize_path(file_path) 48 | if level + 1 > len(path_elements) or level < 0: 49 | return False 50 | return True 51 | 52 | @classmethod 53 | def get_max_level(cls, file_path: str) -> int: 54 | """Returns the maximum truncated path level of the given file path.""" 55 | path_elements = cls._normalize_path(file_path) 56 | return len(path_elements) - 1 57 | 58 | def __init__(self, file_path: str, level: int): 59 | """Instantiate a Truncated Path for given file path with given level. 60 | 61 | Args: 62 | file_path: a string path or TruncatedPath object to be computed. 63 | level: truncated path level. 64 | 65 | Returns: 66 | The truncated path. 67 | 68 | Raises: 69 | PathLevelError: if the given level is too large for the given path. 70 | """ 71 | 72 | if not self.is_level_ok(file_path, level): 73 | raise PathLevelError( 74 | f'Given path {file_path} does not have enough directories or the' 75 | f' given level is negative (level: {level}).' 76 | ) 77 | path_elements = self._normalize_path(file_path) 78 | self._path_elements = path_elements[-(level + 1) :] 79 | self._path = os.path.join(*(self._path_elements)) 80 | 81 | def __str__(self): 82 | return self._path 83 | 84 | def __hash__(self): 85 | return hash(self._path) 86 | 87 | def __eq__(self, other: 'TruncatedPath'): 88 | return self._path_elements == other._path_elements 89 | 90 | @functools.cached_property 91 | def level(self): 92 | return len(self._path_elements) - 1 93 | 94 | def truncate(self, level: int) -> Optional['TruncatedPath']: 95 | """Truncates the given Truncated Path with the new level.""" 96 | return TruncatedPath(self._path, level) 97 | 98 | 99 | class MinLevelUniqueTruncatedPathFinder: 100 | """Finds min levels of uniquely identifiable Truncated Paths. 101 | 102 | This class maintains a list of file paths representing a system, and finds 103 | a minimum level of unique Truncated Path of the given file among all Truncated 104 | Paths of the given reference file list. 105 | """ 106 | 107 | def __init__(self, ref_file_list: Sequence[str]): 108 | """Initializes with the full list of files representing a system. 109 | 110 | Args: 111 | ref_file_list: the full list of files representing a system. Each file is 112 | supposed to be in a relative path format as follows: 113 | ['mm/backing-dev.c', 'mm/balloon_compaction.c', ...] 114 | """ 115 | self._ref_file_list = ref_file_list 116 | 117 | @functools.lru_cache(128) 118 | def _get_ref_truncated_path_counter( 119 | self, level: int 120 | ) -> Mapping[TruncatedPath, int]: 121 | """Returns Truncated Path to matched file counter map.""" 122 | ref_truncated_path_counter = collections.defaultdict(int) 123 | for file_path in self._ref_file_list: 124 | if not TruncatedPath.is_level_ok(file_path, level): 125 | continue 126 | tp = TruncatedPath(file_path, level) 127 | ref_truncated_path_counter[tp] += 1 128 | return ref_truncated_path_counter 129 | 130 | def find(self, file_path: str) -> Optional[TruncatedPath]: 131 | """Returns the min level of uniquely identifiable TP for the given file. 132 | 133 | Args: 134 | file_path: a relative path of a file. 135 | 136 | Returns: 137 | Minimum level Truncated Path making the path uniquely identifiable among 138 | the reference files. Returns None if the given path has no unique 139 | truncated path at any level. 140 | """ 141 | for level in range(0, TruncatedPath.get_max_level(file_path) + 1): 142 | tp = TruncatedPath(file_path, level) 143 | ref_tp_counters = self._get_ref_truncated_path_counter(level) 144 | if ref_tp_counters.get(tp, 0) <= 1: # Unique if counter is 0 or 1. 145 | return tp 146 | return None 147 | 148 | 149 | @functools.cache 150 | def _get_levels(truncated_path_set: FrozenSet[TruncatedPath]) -> Set[int]: 151 | return {tp.level for tp in truncated_path_set} 152 | 153 | 154 | def check_inclusion( 155 | truncated_path_set: Set[TruncatedPath], file_path: str 156 | ) -> bool: 157 | """Check inclusion of a file path in a set of Truncated Path. 158 | 159 | Args: 160 | truncated_path_set: a set of truncated path. 161 | file_path: a file path to be checked against the given truncated path set. 162 | 163 | Returns: 164 | Returns True if any level of truncated path of the given file path is 165 | included the given Truncated Path Set. Returns False, otherwise. 166 | """ 167 | levels = _get_levels(frozenset(truncated_path_set)) 168 | for level in levels: 169 | if not TruncatedPath.is_level_ok(file_path, level): 170 | continue 171 | tp = TruncatedPath(file_path, level) 172 | if tp in truncated_path_set: 173 | return True 174 | return False 175 | 176 | 177 | def check_inclusion_rate_of_truncated_paths_in_file_list( 178 | truncated_path_set: FrozenSet[TruncatedPath], file_list: Sequence[str] 179 | ) -> float: 180 | """Check inclusion rate of a list of Truncated Paths in the list of files. 181 | 182 | Args: 183 | truncated_path_set: a set of truncated path. 184 | file_list: a list of file paths. 185 | 186 | Returns: 187 | Returns the truncated path inclusion rate of |truncated_path_set| in 188 | |file_list|. 189 | """ 190 | file_tps = set() 191 | levels = _get_levels(frozenset(truncated_path_set)) 192 | for level in levels: 193 | for file_path in file_list: 194 | if not TruncatedPath.is_level_ok(file_path, level): 195 | continue 196 | file_tps.add(TruncatedPath(file_path, level)) 197 | intersection = file_tps.intersection(truncated_path_set) 198 | return len(intersection) / len(truncated_path_set) 199 | -------------------------------------------------------------------------------- /vanir/truncated_path_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Test for Truncated Path module.""" 8 | 9 | from vanir import truncated_path 10 | from absl.testing import absltest 11 | 12 | 13 | class TruncatedPathTest(absltest.TestCase): 14 | 15 | def test_truncate_file_path(self): 16 | tp = truncated_path.TruncatedPath('foo/bar/baz', level=0) 17 | self.assertEqual(str(tp), 'baz') 18 | 19 | tp = truncated_path.TruncatedPath('foo/bar/baz', level=2) 20 | self.assertEqual(str(tp), 'foo/bar/baz') 21 | 22 | with self.assertRaises(truncated_path.PathLevelError): 23 | _ = truncated_path.TruncatedPath('foo/bar/baz', level=3) 24 | 25 | with self.assertRaises(truncated_path.PathLevelError): 26 | _ = truncated_path.TruncatedPath('foo/bar/baz', level=-1) 27 | 28 | tp = tp.truncate(level=1) 29 | self.assertEqual(str(tp), 'bar/baz') 30 | 31 | def test_check_inclusion(self): 32 | tp_set = { 33 | truncated_path.TruncatedPath(file_path, level) 34 | for file_path, level in [ 35 | ('foo/bar/baz', 2), 36 | ('qux/quux/corge', 2), 37 | ('garply/waldo/fred', 2), 38 | ('fred/plugh/xyzzy/thud', 3), 39 | ] 40 | } 41 | self.assertTrue(truncated_path.check_inclusion(tp_set, 'foo/bar/baz')) 42 | self.assertTrue( 43 | truncated_path.check_inclusion( 44 | tp_set, 'some/additional/prefix/directories/foo/bar/baz' 45 | ) 46 | ) 47 | self.assertFalse(truncated_path.check_inclusion(tp_set, 'bar/baz')) 48 | 49 | def test_check_inclusion_rate_of_truncated_paths_in_file_list(self): 50 | tp_set = { 51 | truncated_path.TruncatedPath(file_path, level) 52 | for file_path, level in [ 53 | ('foo/bar/baz', 2), 54 | ('qux/quux/corge', 2), 55 | ('garply/waldo/fred', 2), 56 | ('fred/plugh/xyzzy/thud', 3), 57 | ] 58 | } 59 | file_list = [ 60 | '1/2/3/4/foo/bar/baz', 61 | '1/2/3/qux/quux/corge', 62 | '1/2/3/4/5/6/qux/quux/corge', # duplicate 63 | '1/2/3/4/5/unrelated_file1', 64 | '1/2/3/4/unrelated_file2', 65 | '1/2/3/unrelated_file3', 66 | '1/2/unrelated_file5', 67 | ] 68 | rate = truncated_path.check_inclusion_rate_of_truncated_paths_in_file_list( 69 | tp_set, file_list 70 | ) 71 | self.assertEqual(rate, 0.5) 72 | 73 | file_list += [ 74 | '1/2/3/4/garply/waldo/fred', 75 | '1/2/3/4/5/6/7/8/fred/plugh/xyzzy/thud', 76 | ] 77 | rate = truncated_path.check_inclusion_rate_of_truncated_paths_in_file_list( 78 | tp_set, file_list 79 | ) 80 | self.assertEqual(rate, 1) 81 | 82 | def test_min_level_unique_tp_finder(self): 83 | ref_flie_list = [ 84 | '1/2/3/4/foo/bar/baz', 85 | '1/2/3/qux/quux/corge', 86 | '1/2/3/4/5/6/qux/quux/corge', # duplicate 87 | '1/2/3/grault1/garply/waldo', 88 | '4/5/6/grault2/garply/waldo', # diverging at the max_level. 89 | '1/2/3/4/5/unrelated_file1', 90 | '1/2/3/4/unrelated_file2', 91 | '1/2/3/unrelated_file3', 92 | '1/2/unrelated_file5', 93 | ] 94 | finder = truncated_path.MinLevelUniqueTruncatedPathFinder(ref_flie_list) 95 | 96 | test_file = 'foo/bar/baz' 97 | found = finder.find(test_file) 98 | expected = truncated_path.TruncatedPath('baz', level=0) 99 | self.assertEqual(found, expected) 100 | 101 | test_file = 'very/unique/quux/corge' 102 | found = finder.find(test_file) 103 | expected = truncated_path.TruncatedPath('unique/quux/corge', level=2) 104 | self.assertEqual(found, expected) 105 | 106 | test_file = 'grault1/garply/waldo' 107 | found = finder.find(test_file) 108 | expected = truncated_path.TruncatedPath('grault1/garply/waldo', level=2) 109 | self.assertEqual(found, expected) 110 | 111 | test_file = 'qux/quux/corge' 112 | found = finder.find(test_file) 113 | self.assertIsNone(found) 114 | 115 | 116 | if __name__ == '__main__': 117 | absltest.main() 118 | -------------------------------------------------------------------------------- /vanir/vanir_test_base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Vanir test base module. 8 | 9 | This module contains the base class of Vanir tests that includes utility 10 | methods and constants for internal tests. 11 | """ 12 | 13 | from collections import abc 14 | import contextlib 15 | import time 16 | 17 | from absl import logging 18 | from absl.testing import absltest 19 | 20 | 21 | class VanirTestBase(absltest.TestCase): 22 | """Vanir test base class containing common utility methods for tests.""" 23 | 24 | @contextlib.contextmanager 25 | def runtime_reporter(self, name: str) -> abc.Generator[None, None, None]: 26 | start = time.monotonic() 27 | try: 28 | yield 29 | finally: 30 | elapsed = time.monotonic() - start 31 | logging.info('runtime::%s %d seconds', name, elapsed) 32 | pass 33 | -------------------------------------------------------------------------------- /vanir/version_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """This module extracts the version of the target system. 8 | 9 | Version extractor is to extract version numbers of the scanned target system. 10 | Note that the version number should not be regarded as a reliable source of 11 | truth of the immunity against certain vulnerabilities since it can be simply 12 | modified without actual code patching. 13 | """ 14 | 15 | import abc 16 | import collections 17 | import collections.abc 18 | import enum 19 | import os 20 | import re 21 | from typing import Optional, Sequence 22 | 23 | from absl import logging 24 | 25 | 26 | @enum.unique 27 | class TargetSystem(str, enum.Enum): 28 | UNKNOWN = 'unknown' 29 | KERNEL = 'kernel' 30 | 31 | 32 | def get_target_version_files( 33 | target_system_type: Optional[TargetSystem] = None, 34 | ) -> Sequence[str]: 35 | """Returns a list of files containing version information. 36 | 37 | Args: 38 | target_system_type: optional arg for specifying the type of the target 39 | system. If None, try all known target systems to scan the version string. 40 | 41 | Returns a sequence of files containing version information. 42 | """ 43 | target_version_files = [] 44 | for cls in VersionExtractor.__subclasses__(): 45 | if target_system_type and target_system_type != cls.get_target_system(): 46 | continue 47 | target_version_files += cls.get_version_files() 48 | return target_version_files 49 | 50 | 51 | def extract_version( 52 | target_root: str, target_system_type: Optional[TargetSystem] = None 53 | ) -> Optional[str]: 54 | """Returns the version string of the target system. 55 | 56 | Args: 57 | target_root: the absolute path to the root of the target system. 58 | target_system_type: optional arg for specifying the type of the target 59 | system. If None, try all known target systems to scan the version string. 60 | 61 | Raises: 62 | RuntimeError: if the target system contains more than one version. 63 | 64 | Returns: 65 | The version string extracted if found. Returns None otherwise. 66 | """ 67 | versions = {} 68 | for cls in VersionExtractor.__subclasses__(): 69 | if target_system_type and target_system_type != cls.get_target_system(): 70 | continue 71 | ver = cls.extract_version(target_root) 72 | if ver: 73 | versions[cls] = ver 74 | if len(versions) > 1: 75 | raise RuntimeError( 76 | 'Multiple versions were found from the target root: %s' % versions 77 | ) 78 | if len(versions) < 1: 79 | return None 80 | matched_extractor = list(versions.keys())[0] 81 | target_system = matched_extractor.get_target_system() 82 | version = versions[matched_extractor] 83 | logging.info( 84 | 'Extracted version: %s (Target system is recoganized as %s)', 85 | version, 86 | target_system.value, 87 | ) 88 | return version 89 | 90 | 91 | class VersionExtractor(metaclass=abc.ABCMeta): 92 | """Abstract class for version extractor classes.""" 93 | 94 | @classmethod 95 | @abc.abstractmethod 96 | def get_version_files(cls) -> Sequence[str]: 97 | """Returns a list of files that may contain version information.""" 98 | 99 | @classmethod 100 | @abc.abstractmethod 101 | def get_target_system(cls) -> TargetSystem: 102 | """Returns the target system of the class.""" 103 | 104 | @classmethod 105 | @abc.abstractmethod 106 | def extract_version(cls, target_root: str) -> Optional[str]: 107 | """Extracts the version string from the target system.""" 108 | 109 | 110 | class KernelVersionExtractor(VersionExtractor): 111 | """Class to extract Linux kernel version.""" 112 | 113 | @classmethod 114 | def get_version_files(cls) -> Sequence[str]: 115 | return [cls._get_version_file()] 116 | 117 | @classmethod 118 | def get_target_system(cls) -> TargetSystem: 119 | return TargetSystem.KERNEL 120 | 121 | @classmethod 122 | def _get_version_file(cls) -> str: 123 | return 'Makefile' 124 | 125 | @classmethod 126 | def extract_version(cls, target_root: str) -> Optional[str]: 127 | target_root = os.path.abspath(target_root) 128 | if not os.path.isdir(target_root): 129 | raise ValueError('Invalid directory: %s' % target_root) 130 | 131 | version_file_path = os.path.join(target_root, cls._get_version_file()) 132 | if not os.path.isfile(version_file_path): 133 | return None 134 | 135 | with open(version_file_path) as vfile: 136 | return cls._parse_makefile(vfile.read()) 137 | 138 | @classmethod 139 | def _parse_makefile(cls, makefile_content: str) -> str: 140 | """Parses the given Makefile line and returns kernel version. 141 | 142 | Args: 143 | makefile_content: the content of the Makefile in the string format. 144 | 145 | Returns: 146 | Kernel version string if version info is found. Empty string otherwise. 147 | """ 148 | patterns = { 149 | 'version': r'^VERSION\s?=\s?(?P[0-9]*)\s*$', 150 | 'patchlevel': r'^PATCHLEVEL\s?=\s?(?P[0-9]*)\s*$', 151 | 'sublevel': r'^SUBLEVEL\s?=\s?(?P[0-9]*)\s*$', 152 | 'extraversion': r'^EXTRAVERSION\s?=\s?(?P\S*)\s*$', 153 | } 154 | version_info = collections.defaultdict(lambda: '') 155 | delimiters = collections.defaultdict(lambda: '') 156 | delimiters['patchlevel'] = '.' 157 | delimiters['sublevel'] = '.' 158 | 159 | for name, pattern in patterns.items(): 160 | match = re.search(pattern, makefile_content, flags=re.MULTILINE) 161 | if not match: 162 | continue 163 | version_info[name] = match.group(name) 164 | 165 | kernelversion = '' 166 | ordered_names = ['version', 'patchlevel', 'sublevel', 'extraversion'] 167 | 168 | for name in ordered_names: 169 | if not version_info[name]: 170 | return kernelversion 171 | kernelversion += '%s%s' % (delimiters[name], version_info[name]) 172 | return kernelversion 173 | -------------------------------------------------------------------------------- /vanir/version_extractor_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | """Tests for version extractor.""" 8 | 9 | import os 10 | from unittest import mock 11 | 12 | from vanir import version_extractor 13 | 14 | from absl.testing import absltest 15 | from absl.testing import parameterized 16 | 17 | 18 | _MAKEFILE_FULL_VERSION = """ 19 | VERSION = 10 20 | PATCHLEVEL = 20 21 | SUBLEVEL = 30 22 | EXTRAVERSION = -special 23 | """ 24 | 25 | _MAKEFILE_PARTIAL_VERSION = """ 26 | VERSION = 10 27 | PATCHLEVEL = 20 28 | """ 29 | 30 | _MAKEFILE_NO_VERSION = """ 31 | PATCHLEVEL = 20 32 | SUBLEVEL = 30 33 | EXTRAVERSION = -special 34 | """ 35 | 36 | 37 | class VersionExtractorTest(parameterized.TestCase): 38 | 39 | @parameterized.named_parameters( 40 | ('without_target_system_type', None, ['Makefile']), 41 | ( 42 | 'with_target_system_type', 43 | version_extractor.TargetSystem.KERNEL, 44 | ['Makefile'], 45 | ), 46 | ( 47 | 'with_useless_target_system_type', 48 | version_extractor.TargetSystem.UNKNOWN, 49 | [], 50 | ), 51 | ) 52 | def test_get_target_version_files(self, target_system_type, expected_files): 53 | files = version_extractor.get_target_version_files(target_system_type) 54 | self.assertCountEqual(files, expected_files) 55 | 56 | @parameterized.named_parameters( 57 | ( 58 | 'no_target_system_type', 59 | None, 60 | '10.20.30-special', 61 | ), 62 | ( 63 | 'matched_target_system_type', 64 | version_extractor.TargetSystem.KERNEL, 65 | '10.20.30-special', 66 | ), 67 | ( 68 | 'unmatched_target_system_type', 69 | version_extractor.TargetSystem.UNKNOWN, 70 | None, 71 | ), 72 | ) 73 | def test_extract_version(self, target_system_type, expected_version): 74 | test_root = self.create_tempdir().full_path 75 | self.create_tempfile( 76 | os.path.join(test_root, 'Makefile'), content=_MAKEFILE_FULL_VERSION 77 | ) 78 | version = version_extractor.extract_version(test_root, target_system_type) 79 | self.assertEqual(version, expected_version) 80 | 81 | def test_extract_version_fails_if_multiple_versions_found(self): 82 | class MockVersionExtractor: 83 | """A mock version extractor class. 84 | 85 | To avoid auto-registeration of this mock class through __subclasses__() in 86 | other tests, this mock class intentionally does not inherit from 87 | VersionExtractor. 88 | """ 89 | 90 | @classmethod 91 | def get_version_files(cls): 92 | return ['foo'] 93 | 94 | @classmethod 95 | def get_target_system(cls): 96 | return version_extractor.TargetSystem.KERNEL 97 | 98 | @classmethod 99 | def extract_version(cls, _): 100 | return '1234.5678' 101 | 102 | test_root = self.create_tempdir().full_path 103 | self.create_tempfile( 104 | os.path.join(test_root, 'Makefile'), content=_MAKEFILE_FULL_VERSION 105 | ) 106 | self.create_tempfile(os.path.join(test_root, 'foo')) 107 | with mock.patch.object( 108 | version_extractor.VersionExtractor, 109 | '__subclasses__', 110 | autospec=True, 111 | return_value=[ 112 | version_extractor.KernelVersionExtractor, 113 | MockVersionExtractor, 114 | ], 115 | ): 116 | expected_error_msg = 'Multiple versions were found from the target root:' 117 | with self.assertRaisesRegex(RuntimeError, expected_error_msg): 118 | version_extractor.extract_version(test_root) 119 | 120 | @parameterized.named_parameters( 121 | ('full_version', _MAKEFILE_FULL_VERSION, '10.20.30-special'), 122 | ('partial_version', _MAKEFILE_PARTIAL_VERSION, '10.20'), 123 | ('no_version', _MAKEFILE_NO_VERSION, ''), 124 | ) 125 | def test_kernel_version_extractor_parse_makefile( 126 | self, makefile_content, expected_kernelversion 127 | ): 128 | kernelversion = version_extractor.KernelVersionExtractor._parse_makefile( 129 | makefile_content 130 | ) 131 | self.assertEqual(kernelversion, expected_kernelversion) 132 | 133 | def test_kernel_version_extractor_fails_if_target_root_is_invalid(self): 134 | with self.assertRaisesRegex(ValueError, 'Invalid directory:'): 135 | version_extractor.KernelVersionExtractor.extract_version( 136 | 'nonexisting_dir' 137 | ) 138 | 139 | def test_kernel_version_extractor_returns_none_if_makefile_not_exist(self): 140 | test_root = self.create_tempdir().full_path 141 | self.assertIsNone( 142 | version_extractor.KernelVersionExtractor.extract_version(test_root) 143 | ) 144 | 145 | 146 | if __name__ == '__main__': 147 | absltest.main() 148 | -------------------------------------------------------------------------------- /vanir/vulnerability_overwriter.py: -------------------------------------------------------------------------------- 1 | """Contains classes and functions to overwrite OSV vulnerability data. 2 | 3 | In certain scenarios, you may want to overwrite vulnerability information. For 4 | example, a severity may be critical for Android phones, but on a smart camera 5 | that doesn't support third-party apps, it might be NSI. The OSV schema allows 6 | you to specify severity per ecosystem, but in some cases, you may need even 7 | further granularity. This is the scenario where you need to overwrite 8 | vulnerability information so that it is reported with the correct severity for 9 | each device. 10 | 11 | The overwriting happens after signatures are loaded and before they are 12 | passed to a scanner. The changes happen in memory and are not persisted. They 13 | are applied to a vulnerability JSON object. 14 | 15 | 'detector_runner' supports a flag '--overwrite_specs' that takes a path to a 16 | JSON file containing a list of OverwriteSpec objects. Each spec defines a 17 | vulnerability ID and one or more paths to field using basic JSONPath syntax 18 | and values to be set. 19 | 20 | Users can validate their overwrite specifications by running the 21 | 'overwrite_specs_validity_test'. 22 | """ 23 | 24 | import dataclasses 25 | import json 26 | from typing import Any, Sequence, Mapping 27 | import jsonpath_rw 28 | 29 | 30 | @dataclasses.dataclass(frozen=True) 31 | class Replace: 32 | """Represents a replacement operation on a OSV vulnerability JSON object. 33 | 34 | Attributes: 35 | value: The value to replace with. 36 | path: This field specifies the JSONPath expression to locate the field to be 37 | replaced. The expression should yield a single field, wildcards are 38 | supported for modifying arrays in a single operation (e.g., 39 | affected[*].ecosystem_specific.severity). It is applied per vulnerability, 40 | not file-wide. 41 | """ 42 | 43 | value: str 44 | path: jsonpath_rw.jsonpath.JSONPath 45 | 46 | 47 | @dataclasses.dataclass(frozen=True) 48 | class OverwriteSpec: 49 | """Specifies how to overwrite vulnerability data in a JSON object. 50 | 51 | Attributes: 52 | vuln_id: The ID of the vulnerability to overwrite. Should match exactly the 53 | 'id' field in the OSV vulnerability. 54 | replace: A sequence of replace operations to apply to the vulnerability. 55 | reason: A mandatory explanation of the reason for the overwrite. 56 | """ 57 | 58 | vuln_id: str 59 | replace: Sequence[Replace] 60 | reason: str 61 | 62 | 63 | def _parse_replace( 64 | raw_replace: Mapping[str, Any], 65 | vuln_id: str, 66 | ) -> Replace: 67 | """Parses and validates a replace operation from a raw JSON object.""" 68 | 69 | if 'value' not in raw_replace: 70 | raise ValueError( 71 | 'Value is required for each replace operation even if it is empty,' 72 | f' check spec for {vuln_id}' 73 | ) 74 | 75 | if not raw_replace.get('path'): 76 | raise ValueError( 77 | 'JSON path is required for each replace operation, check spec for' 78 | f' {vuln_id}' 79 | ) 80 | try: 81 | json_path = jsonpath_rw.parser.parse(raw_replace['path']) 82 | return Replace( 83 | value=raw_replace['value'], 84 | path=json_path, 85 | ) 86 | except Exception as e: 87 | raise ValueError( 88 | f'Invalid JSON path in replace operation, check spec for {vuln_id}: {e}' 89 | ) from e 90 | 91 | 92 | def load_overwrite_specs_from_json( 93 | json_content: str, 94 | ) -> Sequence[OverwriteSpec]: 95 | """Loads overwrite specs from a JSON string. 96 | 97 | Args: 98 | json_content: JSON string containing a list of overwrite specs in a JSON 99 | format. 100 | 101 | Returns: 102 | Sequence of OverwriteSpec objects. 103 | 104 | Raises: 105 | ValueError: If the JSON string is invalid or if any of the overwrite specs 106 | is missing required fields. 107 | """ 108 | 109 | raw_overwriters_info = json.loads(json_content) 110 | overwrite_specs = [] 111 | for raw_spec in raw_overwriters_info: 112 | if not raw_spec.get('vuln_id'): 113 | raise ValueError( 114 | f'Vulnerability ID is required for each overwriter: {raw_spec}' 115 | ) 116 | if not raw_spec.get('reason'): 117 | raise ValueError( 118 | 'Reason is required for each overwriter, vuln_id:' 119 | f' {raw_spec.get("vuln_id")}' 120 | ) 121 | 122 | if not raw_spec.get('replace'): 123 | raise ValueError( 124 | 'Replace operations are not provided, vuln_id:' 125 | f' {raw_spec.get("vuln_id")}' 126 | ) 127 | 128 | overwrite_specs.append( 129 | OverwriteSpec( 130 | vuln_id=raw_spec['vuln_id'], 131 | replace=[ 132 | _parse_replace(raw_replace, raw_spec['vuln_id']) 133 | for raw_replace in raw_spec['replace'] 134 | ], 135 | reason=raw_spec['reason'], 136 | ) 137 | ) 138 | return overwrite_specs 139 | 140 | 141 | def load_overwrite_specs_from_file(file_path: str) -> Sequence[OverwriteSpec]: 142 | """Loads overwrite specs from a JSON file. 143 | 144 | See an example of a valid overwrite spec file in the 145 | testdata/test_overwrite_specs.json file. 146 | 147 | Args: 148 | file_path: Path to the JSON file containing overwrite specs. 149 | 150 | Returns: 151 | Sequence of OverwriteSpec objects. 152 | 153 | Raises: 154 | ValueError: If the JSON file is invalid or if any of the overwrite specs 155 | is missing required fields. 156 | """ 157 | 158 | with open(file_path, 'r') as f: 159 | return load_overwrite_specs_from_json(f.read()) 160 | 161 | 162 | def overwrite( 163 | vulnerabilities: Sequence[dict[str, Any]], 164 | overwrite_specs: Sequence[OverwriteSpec], 165 | ): 166 | """Applies overwrite specs to a list of vulnerabilities. 167 | 168 | This function works as following. First it creates a map of overwriter specs 169 | by their vuln_id to quickly find the spec for a given vulnerability. Then it 170 | iterates over the vulnerabilities and for each one it finds the 171 | corresponding spec (if any) and applies the replace operations on it. 172 | 173 | The replace operations rely on JSONPath to find the fields to modify. 174 | JSONPath may return multiple matches for a given Replace operation. In this 175 | case all the matches are modified. 176 | 177 | Args: 178 | vulnerabilities: Sequence of vulnerabilities in OSV schema format. The 179 | function can modify these objects by applying overwrite specs. 180 | overwrite_specs: Sequence of OverwriteSpec objects to apply. 181 | """ 182 | if not overwrite_specs: 183 | return 184 | overwrite_spec_map = {spec.vuln_id: spec for spec in overwrite_specs} 185 | for vulnerability in vulnerabilities: 186 | vulnerability_id = vulnerability['id'] 187 | if vulnerability_id in overwrite_spec_map: 188 | spec = overwrite_spec_map[vulnerability_id] 189 | for replace in spec.replace: 190 | replace.path.update(vulnerability, replace.value) 191 | --------------------------------------------------------------------------------