├── tests ├── __init__.py ├── utils.py ├── ops │ ├── version │ │ └── test_version.py │ ├── ls │ │ └── test_ls.py │ ├── upgrade_filter │ │ └── test_upgrade_filter.py │ ├── csv │ │ └── test_csv.py │ ├── codeclimate │ │ └── test_codeclimate.py │ ├── emacs │ │ └── test_emacs.py │ ├── summary │ │ └── test_summary.py │ ├── word │ │ └── test_word.py │ ├── info │ │ └── test_info.py │ ├── copy │ │ └── test_copy.py │ ├── trend │ │ └── test_trend.py │ ├── diff │ │ ├── test_diff.py │ │ └── test_diff_issues_reordered.py │ ├── html │ │ └── test_html.py │ └── blame │ │ └── test_blame.py ├── test_check_switch.py ├── test_sarif_file_utils.py └── test_general_filter.py ├── sarif ├── cmdline │ ├── __init__.py │ └── main.py ├── filter │ ├── __init__.py │ ├── filter_stats.py │ └── general_filter.py ├── operations │ ├── __init__.py │ ├── templates │ │ ├── sarif_emacs.txt │ │ └── sarif_summary.html │ ├── ls_op.py │ ├── csv_op.py │ ├── summary_op.py │ ├── upgrade_filter_op.py │ ├── codeclimate_op.py │ ├── trend_op.py │ ├── emacs_op.py │ ├── copy_op.py │ ├── info_op.py │ ├── html_op.py │ ├── blame_op.py │ ├── diff_op.py │ └── word_op.py ├── __init__.py ├── __main__.py ├── charts.py ├── loader.py ├── issues_report.py └── sarif_file_utils.py ├── poetry.toml ├── azure-pipelines ├── templates │ ├── globals.yml │ ├── use_python.yml │ └── build_stage.yml ├── build.yml └── release.yml ├── .pylintrc ├── .gitignore ├── SUPPORT.md ├── .vscode └── extensions.json ├── CODE_OF_CONDUCT.md ├── .github └── workflows │ ├── build.yml │ └── validation.yml ├── pyproject.toml ├── LICENSE ├── SECURITY.md ├── CONTRIBUTING.md └── CHANGELOG.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sarif/cmdline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sarif/filter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sarif/operations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /azure-pipelines/templates/globals.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | ARTIFACT_NAME_WHEEL: wheel 3 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [TYPECHECK] 2 | 3 | ignored-classes=WD_PARAGRAPH_ALIGNMENT,WD_TAB_ALIGNMENT 4 | 5 | -------------------------------------------------------------------------------- /sarif/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Top-level version information for sarif-tools. 3 | """ 4 | 5 | __version__ = "3.0.5" 6 | -------------------------------------------------------------------------------- /sarif/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file supports `python -m sarif` invocation. 3 | """ 4 | 5 | import sys 6 | 7 | from sarif.cmdline import main 8 | 9 | sys.exit(main.main()) 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /dist 3 | *.egg-info 4 | *.pyc 5 | *.orig 6 | /.venv 7 | /.vscode 8 | /.idea 9 | /.pytest_cache 10 | .DS_Store 11 | *.sarif 12 | *.csv 13 | .coverage 14 | coverage.xml 15 | *filter.yaml 16 | -------------------------------------------------------------------------------- /azure-pipelines/templates/use_python.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - task: UsePythonVersion@0 3 | inputs: 4 | versionSpec: "$(python.version)" 5 | architecture: "$(architecture)" 6 | displayName: "Use Python $(python.version) $(architecture)" 7 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=827846 3 | // for the documentation about the extensions.json format 4 | "recommendations": [ 5 | "charliermarsh.ruff", 6 | "ms-python.python", 7 | "ms-python.vscode-pylance" 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | def get_sarif_schema(): 6 | # JSON Schema file for SARIF obtained from https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/ 7 | sarif_schema_file = os.path.join( 8 | os.path.dirname(__file__), "sarif-schema-2.1.0.json" 9 | ) 10 | with open(sarif_schema_file, "rb") as f_schema: 11 | return json.load(f_schema) 12 | -------------------------------------------------------------------------------- /tests/ops/version/test_version.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import sarif 4 | 5 | 6 | def test_version(): 7 | with open( 8 | pathlib.Path(__file__).parent.parent.parent.parent / "pyproject.toml" 9 | ) as pyproject_in: 10 | for pyproject_line in pyproject_in.readlines(): 11 | if pyproject_line.startswith('version = "'): 12 | assert pyproject_line.strip() == f'version = "{sarif.__version__}"' 13 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /sarif/operations/templates/sarif_emacs.txt: -------------------------------------------------------------------------------- 1 | -*- compilation -*- 2 | 3 | Sarif Summary: {{ report_type }} 4 | Document generated on: {{ report_date }} 5 | Total number of distinct issues of all severities ({{ severities }}): {{ total }} 6 | {% if filtered -%} 7 |

{{ filtered }}

8 | {%- endif %} 9 | 10 | {% for problem in problems %} 11 | Severity : {{ problem.type }} [{{ problem.count }}] 12 | {% for error in problem.details -%} 13 | {% for line in error.details -%} 14 | {{ line.Location }}:{{ line.Line }}: {{ error.code }} 15 | {% endfor %} 16 | {% endfor %} 17 | {% endfor -%} 18 | 19 | -------------------------------------------------------------------------------- /tests/test_check_switch.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from sarif.cmdline.main import _check 3 | from sarif import sarif_file 4 | 5 | SARIF = { 6 | "runs": [ 7 | { 8 | "tool": {"driver": {"name": "Tool"}}, 9 | "results": [{"level": "warning", "ruleId": "rule"}], 10 | } 11 | ] 12 | } 13 | 14 | 15 | def test_check(): 16 | fileSet = sarif_file.SarifFileSet() 17 | fileSet.add_file( 18 | sarif_file.SarifFile("SARIF", SARIF, mtime=datetime.datetime.now()) 19 | ) 20 | 21 | result = _check(fileSet, "error") 22 | assert result == 0 23 | 24 | result = _check(fileSet, "warning") 25 | assert result == 1 26 | 27 | result = _check(fileSet, "note") 28 | assert result == 1 29 | -------------------------------------------------------------------------------- /tests/ops/ls/test_ls.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | from sarif.operations import ls_op 5 | 6 | 7 | def test_ls(): 8 | file_names = ["file1.sarif", "file2.sarif", "aaaa.sarif"] 9 | 10 | with tempfile.TemporaryDirectory() as tmp: 11 | for file_name in file_names: 12 | with open(os.path.join(tmp, file_name), "wb") as f_in: 13 | f_in.write("{}".encode()) 14 | 15 | output_path = os.path.join(tmp, "output.txt") 16 | ls_op.print_ls([tmp], output_path) 17 | 18 | with open(output_path, "rb") as f_out: 19 | output = f_out.read().decode().splitlines() 20 | 21 | assert len(output) == len(file_names) + 1 22 | assert output[0] == tmp + ":" 23 | assert output[1:] == sorted([" " + file_name for file_name in file_names]) 24 | -------------------------------------------------------------------------------- /azure-pipelines/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | pr: none 3 | trigger: 4 | branches: 5 | include: 6 | - main 7 | paths: 8 | exclude: 9 | - azure-pipelines/release.yml 10 | 11 | resources: 12 | repositories: 13 | - repository: 1ESPipelineTemplates 14 | type: git 15 | name: 1ESPipelineTemplates/1ESPipelineTemplates 16 | ref: refs/tags/release 17 | 18 | variables: 19 | TeamName: sarif-tools 20 | 21 | extends: 22 | template: v1/1ES.Official.PipelineTemplate.yml@1ESPipelineTemplates 23 | parameters: 24 | sdl: 25 | sourceAnalysisPool: VSEngSS-MicroBuild2022-1ES 26 | pool: 27 | name: AzurePipelines-EO 28 | demands: 29 | - ImageOverride -equals 1ESPT-Ubuntu22.04 30 | os: Linux 31 | customBuildTags: 32 | - ES365AIMigrationTooling 33 | stages: 34 | - template: templates/build_stage.yml@self 35 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | if: github.repository == 'microsoft/sarif-tools' 11 | runs-on: ubuntu-latest 12 | name: Build 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Setup Python 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: '3.8' 23 | 24 | - name: Install Poetry 25 | run: pip install poetry 26 | 27 | - name: Poetry Build 28 | run: poetry build --no-interaction 29 | 30 | - name: Get Verison 31 | id: get_version 32 | shell: bash 33 | run: echo "releaseVersion=$(poetry version --short)" >> $GITHUB_OUTPUT 34 | 35 | - uses: actions/upload-artifact@v4 36 | with: 37 | name: wheel 38 | path: dist/sarif_tools-${{ steps.get_version.outputs.releaseVersion }}-py3-none-any.whl 39 | -------------------------------------------------------------------------------- /sarif/operations/ls_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif ls` command. 3 | """ 4 | 5 | from typing import List 6 | 7 | from sarif import loader 8 | 9 | 10 | def print_ls(files_or_dirs: List[str], output): 11 | """ 12 | Print a SARIF file listing for each of the input files or directories. 13 | """ 14 | dir_result = [] 15 | for path in files_or_dirs: 16 | dir_result.append(f"{path}:") 17 | sarif_files = loader.load_sarif_files(path) 18 | if sarif_files: 19 | sarif_file_names = [f.get_file_name() for f in sarif_files] 20 | for file_name in sorted(sarif_file_names): 21 | dir_result.append(f" {file_name}") 22 | else: 23 | dir_result.append(" (None)") 24 | if output: 25 | print("Writing file listing to", output) 26 | with open(output, "w", encoding="utf-8") as file_out: 27 | file_out.writelines(d + "\n" for d in dir_result) 28 | else: 29 | for directory in dir_result: 30 | print(directory) 31 | print() 32 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "sarif-tools" 3 | version = "3.0.5" 4 | description = "SARIF tools" 5 | authors = ["Microsoft"] 6 | readme = "README.md" 7 | homepage = "https://github.com/microsoft/sarif-tools" 8 | packages = [ 9 | { include = "sarif" } 10 | ] 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "License :: OSI Approved :: MIT License", 14 | "Operating System :: OS Independent" 15 | ] 16 | 17 | [tool.poetry.urls] 18 | "Bug Tracker" = "https://github.com/microsoft/sarif-tools/issues" 19 | 20 | [tool.poetry.dependencies] 21 | jinja2 = "^3.1.6" 22 | jsonpath-ng = "^1.6.0" 23 | matplotlib = "^3.7" # Need Python 3.9+ for newer 24 | python = "^3.8" 25 | python-docx = "^1.1.2" 26 | pyyaml = "^6.0.1" 27 | 28 | [tool.poetry.dev-dependencies] 29 | jsonschema = "^4.23.0" 30 | pylint = "^3.2" 31 | pytest = "^8.3" 32 | pytest-cov = "^5.0" 33 | ruff = "^0.6.8" 34 | 35 | [tool.poetry.scripts] 36 | sarif = "sarif.cmdline.main:main" 37 | 38 | [build-system] 39 | requires = ["poetry-core>=1.0.0"] 40 | build-backend = "poetry.core.masonry.api" 41 | -------------------------------------------------------------------------------- /tests/ops/upgrade_filter/test_upgrade_filter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | from sarif.operations import upgrade_filter_op 5 | 6 | INPUT_FILTER = """ 7 | description: Test filter 8 | #comment 9 | +: include_with_prefix 10 | include_without_prefix 11 | -: exclude 12 | """ 13 | 14 | 15 | EXPECTED_OUTPUT_TXT = """configuration: 16 | check-line-number: true 17 | default-include: true 18 | description: Test filter 19 | exclude: 20 | - author-mail: exclude 21 | include: 22 | - author-mail: include_with_prefix 23 | - author-mail: include_without_prefix 24 | """ 25 | 26 | 27 | def test_upgrade_filter(): 28 | with tempfile.TemporaryDirectory() as tmp: 29 | input_file_path = os.path.join(tmp, "input_filter.txt") 30 | with open(input_file_path, "wb") as f_in: 31 | f_in.write(INPUT_FILTER.encode()) 32 | 33 | output_file_path = os.path.join(tmp, "output.txt") 34 | upgrade_filter_op.upgrade_filter_file(input_file_path, output_file_path) 35 | 36 | with open(output_file_path, "rb") as f_out: 37 | output = f_out.read().decode() 38 | 39 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep) 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /azure-pipelines/templates/build_stage.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - stage: Build 3 | variables: 4 | - template: globals.yml 5 | jobs: 6 | - job: Build 7 | 8 | templateContext: 9 | outputs: 10 | - output: pipelineArtifact 11 | targetPath: $(Build.StagingDirectory)/dist 12 | sbomBuildDropPath: $(Build.StagingDirectory)/dist 13 | artifactName: $(ARTIFACT_NAME_WHEEL) 14 | 15 | variables: 16 | python.version: "3.8" 17 | architecture: x64 18 | 19 | steps: 20 | - template: use_python.yml@self 21 | 22 | - script: pipx install poetry 23 | displayName: Install Poetry 24 | 25 | - script: poetry build --no-interaction 26 | displayName: poetry build 27 | 28 | - powershell: | 29 | $releaseVersion = & poetry version --short 30 | echo "releaseVersion: $releaseVersion" 31 | echo "##vso[task.setvariable variable=releaseVersion]$releaseVersion" 32 | echo "##vso[task.setvariable variable=releaseVersionWithPrefix;isOutput=true]v$releaseVersion" 33 | displayName: Get release version 34 | name: getReleaseVersionStep 35 | 36 | - task: CopyFiles@2 37 | displayName: Copy wheel and tarball 38 | inputs: 39 | sourceFolder: dist 40 | targetFolder: $(Build.StagingDirectory)/dist 41 | contents: | 42 | sarif_tools-$(releaseVersion)-py3-none-any.whl 43 | sarif_tools-$(releaseVersion).tar.gz 44 | -------------------------------------------------------------------------------- /.github/workflows/validation.yml: -------------------------------------------------------------------------------- 1 | name: Validation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | hygiene: 13 | if: github.repository == 'microsoft/sarif-tools' 14 | runs-on: ubuntu-latest 15 | name: Hygiene 16 | permissions: 17 | contents: read 18 | pull-requests: write 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: Setup Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: '3.8' 26 | 27 | - name: Install Poetry 28 | run: pip install poetry 29 | 30 | - name: Validate pyproject.toml and poetry.lock 31 | run: poetry check 32 | 33 | - name: Install dependencies 34 | run: poetry install 35 | 36 | - name: Validate code formatting 37 | run: poetry run ruff format --check 38 | 39 | - name: Validate code style 40 | run: poetry run ruff check 41 | 42 | 43 | test: 44 | if: github.repository == 'microsoft/sarif-tools' 45 | runs-on: ubuntu-latest 46 | name: Test 47 | steps: 48 | - uses: actions/checkout@v4 49 | 50 | - name: Setup Python 51 | uses: actions/setup-python@v5 52 | with: 53 | python-version: '3.8' 54 | 55 | - name: Install Poetry 56 | run: pip install poetry 57 | 58 | - name: Install dependencies 59 | run: poetry install --with dev 60 | 61 | - name: Run tests 62 | run: poetry run pytest 63 | -------------------------------------------------------------------------------- /sarif/charts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for generating charts from SARIF data 3 | """ 4 | 5 | import io 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def generate_severity_pie_chart(report, output_file=None): 10 | """ 11 | Generate a pie chart from the breakdown of issues by severity. 12 | The slices are ordered and plotted counter-clockwise. The return 13 | value is truthy if the number of issues is not zero, False otherwise. 14 | If `output_file` is `None`, return the bytes of the pie chart image in 15 | png format. Otherwise, write the bytes to the file specified (image 16 | format inferred from filename). 17 | """ 18 | sizes = [] 19 | labels = [] 20 | explode = [] 21 | for severity in report.get_severities(): 22 | count = report.get_issue_count_for_severity(severity) 23 | if count > 0: 24 | sizes.append(count) 25 | labels.append(severity) 26 | explode.append(0.1) # could add more logic to highlight specific severities 27 | 28 | any_issues = bool(sizes) 29 | if any_issues: 30 | _fig1, ax1 = plt.subplots() 31 | ax1.pie( 32 | sizes, 33 | explode=explode, 34 | labels=labels, 35 | autopct="%1.1f%%", 36 | shadow=True, 37 | startangle=90, 38 | ) 39 | ax1.axis("equal") 40 | 41 | if output_file: 42 | plt.savefig(output_file) 43 | else: 44 | byte_buffer = io.BytesIO() 45 | plt.savefig(byte_buffer, format="png") 46 | return byte_buffer.getbuffer() 47 | return any_issues 48 | -------------------------------------------------------------------------------- /tests/ops/csv/test_csv.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import tempfile 4 | 5 | from sarif.operations import csv_op 6 | from sarif import sarif_file 7 | 8 | INPUT_SARIF = { 9 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 10 | "version": "2.1.0", 11 | "runs": [ 12 | { 13 | "tool": {"driver": {"name": "unit test"}}, 14 | "results": [ 15 | { 16 | "ruleId": "CA2101", 17 | "level": "error", 18 | "locations": [ 19 | { 20 | "physicalLocation": { 21 | "artifactLocation": { 22 | "uri": "file:///C:/Code/main.c", 23 | "index": 0, 24 | }, 25 | "region": {"startLine": 24, "startColumn": 9}, 26 | } 27 | } 28 | ], 29 | } 30 | ], 31 | } 32 | ], 33 | } 34 | 35 | 36 | EXPECTED_OUTPUT_CSV = [ 37 | "Tool,Severity,Code,Description,Location,Line", 38 | "unit test,error,CA2101,CA2101,file:///C:/Code/main.c,24", 39 | ] 40 | 41 | 42 | def test_csv(): 43 | mtime = datetime.datetime.now() 44 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime) 45 | 46 | input_sarif_file_set = sarif_file.SarifFileSet() 47 | input_sarif_file_set.files.append(input_sarif_file) 48 | 49 | with tempfile.TemporaryDirectory() as tmp: 50 | file_path = os.path.join(tmp, "output.csv") 51 | csv_op.generate_csv( 52 | input_sarif_file_set, file_path, output_multiple_files=False 53 | ) 54 | 55 | with open(file_path, "rb") as f_in: 56 | output_lines = f_in.read().decode().splitlines() 57 | 58 | assert output_lines == EXPECTED_OUTPUT_CSV 59 | -------------------------------------------------------------------------------- /sarif/loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code to load SARIF files from disk. 3 | """ 4 | 5 | import glob 6 | import json 7 | import os 8 | 9 | from sarif.sarif_file import has_sarif_file_extension, SarifFile, SarifFileSet 10 | 11 | 12 | def _add_path_to_sarif_file_set(path, sarif_file_set): 13 | if os.path.isdir(path): 14 | sarif_file_set.add_dir(_load_dir(path)) 15 | return True 16 | if os.path.isfile(path): 17 | sarif_file_set.add_file(load_sarif_file(path)) 18 | return True 19 | return False 20 | 21 | 22 | def load_sarif_files(*args) -> SarifFileSet: 23 | """ 24 | Load SARIF files specified as individual filenames or directories. Return a SarifFileSet 25 | object. 26 | """ 27 | ret = SarifFileSet() 28 | if args: 29 | for path in args: 30 | path_exists = _add_path_to_sarif_file_set(path, ret) 31 | if not path_exists: 32 | for resolved_path in glob.glob(path, recursive=True): 33 | if _add_path_to_sarif_file_set(resolved_path, ret): 34 | path_exists = True 35 | if not path_exists: 36 | print(f"Warning: input path {path} not found") 37 | return ret 38 | 39 | 40 | def _load_dir(path): 41 | subdir = SarifFileSet() 42 | for dirpath, _dirnames, filenames in os.walk(path): 43 | for filename in filenames: 44 | if has_sarif_file_extension(filename): 45 | subdir.add_file(load_sarif_file(os.path.join(dirpath, filename))) 46 | return subdir 47 | 48 | 49 | def load_sarif_file(file_path: str) -> SarifFile: 50 | """ 51 | Load JSON data from a file and return as a SarifFile object. 52 | As per https://tools.ietf.org/id/draft-ietf-json-rfc4627bis-09.html#rfc.section.8.1, JSON 53 | data SHALL be encoded in utf-8. 54 | """ 55 | try: 56 | with open(file_path, encoding="utf-8-sig") as file_in: 57 | data = json.load(file_in) 58 | return SarifFile(file_path, data) 59 | except Exception as exception: 60 | raise IOError(f"Cannot load {file_path}") from exception 61 | -------------------------------------------------------------------------------- /tests/ops/codeclimate/test_codeclimate.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | 6 | from sarif.operations import codeclimate_op 7 | from sarif import sarif_file 8 | 9 | INPUT_SARIF = { 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "unit test"}}, 15 | "results": [ 16 | { 17 | "ruleId": "CA2101", 18 | "level": "error", 19 | "locations": [ 20 | { 21 | "physicalLocation": { 22 | "artifactLocation": { 23 | "uri": "file:///C:/Code/main.c", 24 | "index": 0, 25 | }, 26 | "region": {"startLine": 24, "startColumn": 9}, 27 | } 28 | } 29 | ], 30 | } 31 | ], 32 | } 33 | ], 34 | } 35 | 36 | 37 | EXPECTED_OUTPUT_JSON = [ 38 | { 39 | "type": "issue", 40 | "check_name": "CA2101", 41 | "description": "CA2101", 42 | "categories": ["Bug Risk"], 43 | "location": { 44 | "path": "file:///C:/Code/main.c", 45 | "lines": {"begin": 24}, 46 | }, 47 | "severity": "major", 48 | "fingerprint": "e972b812ed32bf29ee306141244050b9", 49 | } 50 | ] 51 | 52 | 53 | def test_code_climate(): 54 | mtime = datetime.datetime.now() 55 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime) 56 | 57 | input_sarif_file_set = sarif_file.SarifFileSet() 58 | input_sarif_file_set.files.append(input_sarif_file) 59 | 60 | with tempfile.TemporaryDirectory() as tmp: 61 | file_path = os.path.join(tmp, "codeclimate.json") 62 | codeclimate_op.generate( 63 | input_sarif_file_set, file_path, output_multiple_files=False 64 | ) 65 | 66 | with open(file_path, "rb") as f_in: 67 | output_json = json.load(f_in) 68 | 69 | assert output_json == EXPECTED_OUTPUT_JSON 70 | -------------------------------------------------------------------------------- /tests/ops/emacs/test_emacs.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import tempfile 4 | 5 | from sarif.operations import emacs_op 6 | from sarif import sarif_file 7 | 8 | INPUT_SARIF = { 9 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 10 | "version": "2.1.0", 11 | "runs": [ 12 | { 13 | "tool": {"driver": {"name": "unit test"}}, 14 | "results": [ 15 | { 16 | "ruleId": "CA2101", 17 | "level": "error", 18 | "locations": [ 19 | { 20 | "physicalLocation": { 21 | "artifactLocation": { 22 | "uri": "file:///C:/Code/main.c", 23 | "index": 0, 24 | }, 25 | "region": {"startLine": 24, "startColumn": 9}, 26 | } 27 | } 28 | ], 29 | } 30 | ], 31 | } 32 | ], 33 | } 34 | 35 | 36 | EXPECTED_OUTPUT_TXT = """-*- compilation -*- 37 | 38 | Sarif Summary: unit test 39 | Document generated on: 40 | Total number of distinct issues of all severities (error, warning, note): 1 41 | 42 | 43 | 44 | Severity : error [1] 45 | file:///C:/Code/main.c:24: CA2101 46 | 47 | 48 | 49 | Severity : warning [0] 50 | 51 | 52 | Severity : note [0] 53 | 54 | """ 55 | 56 | 57 | def test_emacs(): 58 | mtime = datetime.datetime.now() 59 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime) 60 | 61 | input_sarif_file_set = sarif_file.SarifFileSet() 62 | input_sarif_file_set.files.append(input_sarif_file) 63 | 64 | with tempfile.TemporaryDirectory() as tmp: 65 | file_path = os.path.join(tmp, "output.txt") 66 | emacs_op.generate_compile( 67 | input_sarif_file_set, file_path, output_multiple_files=False, date_val=mtime 68 | ) 69 | 70 | with open(file_path, "rb") as f_in: 71 | output = f_in.read().decode() 72 | 73 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep).replace( 74 | "", mtime.strftime("%Y-%m-%d %H:%M:%S.%f") 75 | ) 76 | -------------------------------------------------------------------------------- /tests/ops/summary/test_summary.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | 6 | from sarif.operations import summary_op 7 | from sarif import sarif_file 8 | 9 | INPUT_SARIF = """{ 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "unit test"}}, 15 | "results": [ 16 | { 17 | "ruleId": "CA2103", 18 | "level": "error" 19 | }, 20 | { 21 | "ruleId": "CA2102", 22 | "level": "warning" 23 | }, 24 | { 25 | "ruleId": "CA2101", 26 | "level": "warning" 27 | }, 28 | { 29 | "ruleId": "CA2101", 30 | "level": "error" 31 | }, 32 | { 33 | "ruleId": "CA2101", 34 | "level": "note" 35 | }, 36 | { 37 | "ruleId": "CA2101", 38 | "level": "none" 39 | }, 40 | { 41 | "ruleId": "CA2101", 42 | "level": "error" 43 | } 44 | ] 45 | } 46 | ] 47 | } 48 | """ 49 | 50 | EXPECTED_OUTPUT_TXT = """ 51 | error: 3 52 | - CA2101: 2 53 | - CA2103: 1 54 | 55 | warning: 2 56 | - CA2102: 1 57 | - CA2101: 1 58 | 59 | note: 1 60 | - CA2101: 1 61 | 62 | none: 1 63 | - CA2101: 1 64 | """ 65 | 66 | 67 | def test_summary(): 68 | with tempfile.TemporaryDirectory() as tmp: 69 | input_sarif_file_path = os.path.join(tmp, "input.sarif") 70 | with open(input_sarif_file_path, "wb") as f_in: 71 | f_in.write(INPUT_SARIF.encode()) 72 | 73 | input_sarif = json.loads(INPUT_SARIF) 74 | 75 | input_sarif_file = sarif_file.SarifFile( 76 | input_sarif_file_path, input_sarif, mtime=datetime.datetime.now() 77 | ) 78 | 79 | input_sarif_file_set = sarif_file.SarifFileSet() 80 | input_sarif_file_set.files.append(input_sarif_file) 81 | 82 | file_path = os.path.join(tmp, "output.txt") 83 | summary_op.generate_summary( 84 | input_sarif_file_set, file_path, output_multiple_files=False 85 | ) 86 | 87 | with open(file_path, "rb") as f_out: 88 | output = f_out.read().decode() 89 | 90 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep) 91 | -------------------------------------------------------------------------------- /sarif/operations/csv_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif csv` command. 3 | """ 4 | 5 | import csv 6 | import os 7 | 8 | from sarif import sarif_file 9 | from sarif.sarif_file import SarifFileSet 10 | from sarif.sarif_file_utils import combine_record_code_and_description 11 | 12 | 13 | def generate_csv(input_files: SarifFileSet, output: str, output_multiple_files: bool): 14 | """ 15 | Generate a CSV file containing the list of issues from the SARIF files. 16 | sarif_dict is a dict from filename to deserialized SARIF data. 17 | """ 18 | output_file = output 19 | if output_multiple_files: 20 | for input_file in input_files: 21 | output_file_name = input_file.get_file_name_without_extension() + ".csv" 22 | print( 23 | "Writing CSV summary of", 24 | input_file.get_file_name(), 25 | "to", 26 | output_file_name, 27 | ) 28 | _write_to_csv(input_file, os.path.join(output, output_file_name)) 29 | filter_stats = input_file.get_filter_stats() 30 | if filter_stats: 31 | print(f" Results are filtered by {filter_stats}") 32 | output_file = os.path.join(output, "static_analysis_output.csv") 33 | source_description = input_files.get_description() 34 | print( 35 | "Writing CSV summary for", 36 | source_description, 37 | "to", 38 | os.path.basename(output_file), 39 | ) 40 | _write_to_csv(input_files, output_file) 41 | filter_stats = input_files.get_filter_stats() 42 | if filter_stats: 43 | print(f" Results are filtered by {filter_stats}") 44 | 45 | 46 | def _write_to_csv(file_or_files, output_file): 47 | """ 48 | Write out the errors to a CSV file so that a human can do further analysis. 49 | """ 50 | list_of_errors = file_or_files.get_records() 51 | severities = file_or_files.get_severities() 52 | with open(output_file, "w", encoding="utf-8") as file_out: 53 | writer = csv.DictWriter( 54 | file_out, 55 | sarif_file.get_record_headings(file_or_files.has_blame_info()), 56 | lineterminator="\n", 57 | ) 58 | writer.writeheader() 59 | for severity in severities: 60 | errors_of_severity = [ 61 | e for e in list_of_errors if e["Severity"] == severity 62 | ] 63 | sorted_errors_by_severity = sorted( 64 | errors_of_severity, key=combine_record_code_and_description 65 | ) 66 | writer.writerows(error_dict for error_dict in sorted_errors_by_severity) 67 | -------------------------------------------------------------------------------- /tests/ops/word/test_word.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import tempfile 4 | 5 | from docx import Document 6 | from sarif.operations import word_op 7 | from sarif import sarif_file 8 | 9 | INPUT_SARIF = { 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "unit test"}}, 15 | "results": [ 16 | { 17 | "ruleId": "CA2101", 18 | "level": "error", 19 | "locations": [ 20 | { 21 | "physicalLocation": { 22 | "artifactLocation": { 23 | "uri": "file:///C:/Code/main.c", 24 | "index": 0, 25 | }, 26 | "region": {"startLine": 24, "startColumn": 9}, 27 | } 28 | } 29 | ], 30 | } 31 | ], 32 | } 33 | ], 34 | } 35 | 36 | 37 | EXPECTED_OUTPUT_TXT = [ 38 | "Sarif Summary: unit test", 39 | "Document generated on: ", 40 | "Total number of various severities (error, warning, note): 1", 41 | "", 42 | "", 43 | "Severity : error [ 1 ]", 44 | "CA2101: 1", 45 | "Severity : warning [ 0 ]", 46 | "None", 47 | "Severity : note [ 0 ]", 48 | "None", 49 | "", 50 | "Severity : error", 51 | "Severity : warning", 52 | "None", 53 | "Severity : note", 54 | "None", 55 | ] 56 | 57 | 58 | def test_word(): 59 | mtime = datetime.datetime.now() 60 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime) 61 | 62 | input_sarif_file_set = sarif_file.SarifFileSet() 63 | input_sarif_file_set.files.append(input_sarif_file) 64 | 65 | with tempfile.TemporaryDirectory() as tmp: 66 | output_file_path = os.path.join(tmp, "output.docx") 67 | word_op.generate_word_docs_from_sarif_inputs( 68 | input_sarif_file_set, 69 | None, 70 | output_file_path, 71 | output_multiple_files=False, 72 | date_val=mtime, 73 | ) 74 | 75 | word_doc = Document(output_file_path) 76 | word_doc_text = [paragraph.text for paragraph in word_doc.paragraphs] 77 | 78 | assert len(word_doc_text) == len(EXPECTED_OUTPUT_TXT) 79 | for actual, expected in zip(word_doc_text, EXPECTED_OUTPUT_TXT): 80 | assert actual == expected.replace( 81 | "", mtime.strftime("%Y-%m-%d %H:%M:%S.%f") 82 | ) 83 | -------------------------------------------------------------------------------- /sarif/operations/summary_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif summary` command. 3 | """ 4 | 5 | import os 6 | from typing import List 7 | 8 | from sarif.sarif_file import SarifFileSet 9 | 10 | 11 | def generate_summary( 12 | input_files: SarifFileSet, output: str, output_multiple_files: bool 13 | ): 14 | """ 15 | Generate a summary of the issues from the SARIF files. 16 | sarif_dict is a dict from filename to deserialized SARIF data. 17 | output_file is the name of a text file to write, or if None, the summary is written to the 18 | console. 19 | """ 20 | output_file = output 21 | if output_multiple_files: 22 | for input_file in input_files: 23 | output_file_name = ( 24 | input_file.get_file_name_without_extension() + "_summary.txt" 25 | ) 26 | output_file = os.path.join(output, output_file_name) 27 | summary_lines = _generate_summary(input_file) 28 | print( 29 | "Writing summary of", 30 | input_file.get_file_name(), 31 | "to", 32 | output_file_name, 33 | ) 34 | with open(output_file, "w", encoding="utf-8") as file_out: 35 | file_out.writelines(line + "\n" for line in summary_lines) 36 | output_file_name = "static_analysis_summary.txt" 37 | output_file = os.path.join(output, output_file_name) 38 | 39 | summary_lines = _generate_summary(input_files) 40 | if output: 41 | print( 42 | "Writing summary of", 43 | input_files.get_description(), 44 | "to", 45 | output_file, 46 | ) 47 | with open(output_file, "w", encoding="utf-8") as file_out: 48 | file_out.writelines(line + "\n" for line in summary_lines) 49 | else: 50 | for lstr in summary_lines: 51 | print(lstr) 52 | 53 | 54 | def _generate_summary(input_files: SarifFileSet) -> List[str]: 55 | """ 56 | For each severity level (in priority order): create a list of the errors of 57 | that severity, print out how many there are and then do some further analysis 58 | of which error codes are present. 59 | """ 60 | ret = [] 61 | report = input_files.get_report() 62 | for severity in report.get_severities(): 63 | result_count = report.get_issue_count_for_severity(severity) 64 | issue_type_histogram = report.get_issue_type_histogram_for_severity(severity) 65 | ret.append(f"\n{severity}: {result_count}") 66 | ret += [f" - {key}: {count}" for (key, count) in issue_type_histogram.items()] 67 | filter_stats = input_files.get_filter_stats() 68 | if filter_stats: 69 | ret.append(f"\nResults were filtered by {filter_stats}") 70 | return ret 71 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /sarif/operations/upgrade_filter_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif upgrade-filter` command. 3 | """ 4 | 5 | import os 6 | import yaml 7 | 8 | 9 | def _load_blame_filter_file(file_path): 10 | filter_description = os.path.basename(file_path) 11 | include_patterns = [] 12 | exclude_patterns = [] 13 | try: 14 | with open(file_path, encoding="utf-8") as file_in: 15 | for line in file_in.readlines(): 16 | if line.startswith("\ufeff"): 17 | # Strip byte order mark 18 | line = line[1:] 19 | lstrip = line.strip() 20 | if lstrip.startswith("#"): 21 | # Ignore comment lines 22 | continue 23 | pattern_spec = None 24 | is_include = True 25 | if lstrip.startswith("description:"): 26 | filter_description = lstrip[12:].strip() 27 | elif lstrip.startswith("+: "): 28 | is_include = True 29 | pattern_spec = lstrip[3:].strip() 30 | elif lstrip.startswith("-: "): 31 | is_include = False 32 | pattern_spec = lstrip[3:].strip() 33 | else: 34 | is_include = True 35 | pattern_spec = lstrip 36 | if pattern_spec: 37 | (include_patterns if is_include else exclude_patterns).append( 38 | pattern_spec 39 | ) 40 | except UnicodeDecodeError as error: 41 | raise IOError( 42 | f"Cannot read blame filter file {file_path}: not UTF-8 encoded?" 43 | ) from error 44 | return ( 45 | filter_description, 46 | include_patterns, 47 | exclude_patterns, 48 | ) 49 | 50 | 51 | def upgrade_filter_file(old_filter_file, output_file): 52 | """Convert blame filter file to general filter file.""" 53 | ( 54 | filter_description, 55 | include_patterns, 56 | exclude_patterns, 57 | ) = _load_blame_filter_file(old_filter_file) 58 | new_filter_definition = { 59 | "description": ( 60 | filter_description 61 | if filter_description 62 | else f"Migrated from {os.path.basename(old_filter_file)}" 63 | ), 64 | "configuration": {"default-include": True, "check-line-number": True}, 65 | } 66 | if include_patterns: 67 | new_filter_definition["include"] = [ 68 | {"author-mail": include_pattern} for include_pattern in include_patterns 69 | ] 70 | if exclude_patterns: 71 | new_filter_definition["exclude"] = [ 72 | {"author-mail": exclude_pattern} for exclude_pattern in exclude_patterns 73 | ] 74 | with open(output_file, "w", encoding="utf8") as yaml_out: 75 | yaml.dump(new_filter_definition, yaml_out) 76 | print("Wrote", output_file) 77 | -------------------------------------------------------------------------------- /tests/ops/info/test_info.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | 6 | from sarif.operations import info_op 7 | from sarif import sarif_file 8 | 9 | INPUT_SARIF = """{ 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "unit test"}}, 15 | "results": [ 16 | { 17 | "ruleId": "CA2101", 18 | "level": "error", 19 | "locations": [ 20 | { 21 | "physicalLocation": { 22 | "artifactLocation": { 23 | "uri": "file:///C:/Code/main.c", 24 | "index": 0 25 | }, 26 | "region": {"startLine": 24, "startColumn": 9} 27 | } 28 | } 29 | ] 30 | } 31 | ] 32 | } 33 | ] 34 | } 35 | """ 36 | 37 | EXPECTED_OUTPUT_TXT = """ 38 | 840 bytes (1 KiB) 39 | modified: , accessed: , ctime: 40 | 1 run 41 | Tool: unit test 42 | 1 result 43 | 44 | """ 45 | 46 | 47 | def test_info(): 48 | with tempfile.TemporaryDirectory() as tmp: 49 | input_sarif_file_path = os.path.join(tmp, "input.sarif") 50 | with open(input_sarif_file_path, "wb") as f_in: 51 | f_in.write(INPUT_SARIF.encode()) 52 | 53 | stat = os.stat(input_sarif_file_path) 54 | stat_mtime = datetime.datetime.fromtimestamp(stat.st_mtime).strftime( 55 | "%Y-%m-%d %H:%M:%S.%f" 56 | ) 57 | stat_atime = datetime.datetime.fromtimestamp(stat.st_atime).strftime( 58 | "%Y-%m-%d %H:%M:%S.%f" 59 | ) 60 | stat_ctime = datetime.datetime.fromtimestamp(stat.st_ctime).strftime( 61 | "%Y-%m-%d %H:%M:%S.%f" 62 | ) 63 | 64 | input_sarif = json.loads(INPUT_SARIF) 65 | 66 | input_sarif_file = sarif_file.SarifFile( 67 | input_sarif_file_path, input_sarif, mtime=datetime.datetime.now() 68 | ) 69 | 70 | input_sarif_file_set = sarif_file.SarifFileSet() 71 | input_sarif_file_set.files.append(input_sarif_file) 72 | 73 | file_path = os.path.join(tmp, "output.txt") 74 | info_op.generate_info(input_sarif_file_set, file_path) 75 | 76 | with open(file_path, "rb") as f_out: 77 | output = f_out.read().decode() 78 | 79 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep).replace( 80 | "", input_sarif_file_path 81 | ).replace( 82 | "", 83 | stat_mtime, 84 | ).replace( 85 | "", 86 | stat_atime, 87 | ).replace( 88 | "", 89 | stat_ctime, 90 | ) 91 | -------------------------------------------------------------------------------- /sarif/operations/codeclimate_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif codeclimate` command. 3 | """ 4 | 5 | import os 6 | import json 7 | import hashlib 8 | 9 | from sarif.sarif_file import SarifFileSet 10 | 11 | _SEVERITIES = {"none": "info", "note": "info", "warning": "minor", "error": "major"} 12 | 13 | 14 | def generate(input_files: SarifFileSet, output: str, output_multiple_files: bool): 15 | """ 16 | Generate a JSON file in Code Climate schema containing the list of issues from the SARIF files. 17 | See https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md 18 | Gitlab usage guide - https://docs.gitlab.com/ee/ci/testing/code_quality.html#implement-a-custom-tool 19 | """ 20 | output_file = output 21 | if output_multiple_files: 22 | for input_file in input_files: 23 | output_file_name = input_file.get_file_name_without_extension() + ".json" 24 | print( 25 | "Writing Code Climate JSON summary of", 26 | input_file.get_file_name(), 27 | "to", 28 | output_file_name, 29 | ) 30 | _write_to_json( 31 | input_file.get_records(), os.path.join(output, output_file_name) 32 | ) 33 | filter_stats = input_file.get_filter_stats() 34 | if filter_stats: 35 | print(f" Results are filtered by {filter_stats}") 36 | output_file = os.path.join(output, "static_analysis_output.json") 37 | source_description = input_files.get_description() 38 | print( 39 | "Writing Code Climate JSON summary for", 40 | source_description, 41 | "to", 42 | os.path.basename(output_file), 43 | ) 44 | _write_to_json(input_files.get_records(), output_file) 45 | filter_stats = input_files.get_filter_stats() 46 | if filter_stats: 47 | print(f" Results are filtered by {filter_stats}") 48 | 49 | 50 | def _write_to_json(list_of_errors, output_file): 51 | """ 52 | Write out the errors to a JSON file according to Code Climate specification. 53 | """ 54 | content = [] 55 | for record in list_of_errors: 56 | severity = _SEVERITIES.get(record.get("Severity", "warning"), "minor") 57 | 58 | # split Code value to extract error ID and description 59 | rule = record["Code"] 60 | description = record["Description"] 61 | 62 | path = record["Location"] 63 | line = record["Line"] 64 | 65 | fingerprint = hashlib.md5( 66 | f"{description} {path} ${line}`]".encode() 67 | ).hexdigest() 68 | 69 | # "categories" property is not used in GitLab but marked as "required" in Code Climate spec. 70 | # There is no easy way to determine a category so the fixed value is set. 71 | content.append( 72 | { 73 | "type": "issue", 74 | "check_name": rule, 75 | "description": description, 76 | "categories": ["Bug Risk"], 77 | "location": {"path": path, "lines": {"begin": line}}, 78 | "severity": severity, 79 | "fingerprint": fingerprint, 80 | } 81 | ) 82 | 83 | with open(output_file, "w", encoding="utf-8") as file_out: 84 | json.dump(content, file_out, indent=4) 85 | -------------------------------------------------------------------------------- /tests/ops/copy/test_copy.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import datetime 3 | import json 4 | import jsonschema 5 | import os 6 | import tempfile 7 | 8 | from sarif.operations import copy_op 9 | from sarif import sarif_file 10 | from tests.utils import get_sarif_schema 11 | 12 | SARIF_WITH_1_ISSUE = { 13 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 14 | "version": "2.1.0", 15 | "runs": [ 16 | { 17 | "tool": {"driver": {"name": "unit test"}}, 18 | "results": [ 19 | { 20 | "ruleId": "CA2101", 21 | "message": {"text": "just testing"}, 22 | "level": "error", 23 | "locations": [ 24 | { 25 | "physicalLocation": { 26 | "artifactLocation": { 27 | "uri": "file:///C:/Code/main.c", 28 | "index": 0, 29 | }, 30 | "region": {"startLine": 24, "startColumn": 9}, 31 | } 32 | } 33 | ], 34 | } 35 | ], 36 | } 37 | ], 38 | } 39 | 40 | 41 | def test_generate_sarif(): 42 | sarif_schema = get_sarif_schema() 43 | input_sarif_file = sarif_file.SarifFile( 44 | "SARIF_WITH_1_ISSUE", SARIF_WITH_1_ISSUE, mtime=datetime.datetime.now() 45 | ) 46 | jsonschema.validate(input_sarif_file.data, schema=sarif_schema) 47 | 48 | input_sarif_file_set = sarif_file.SarifFileSet() 49 | input_sarif_file_set.files.append(input_sarif_file) 50 | with tempfile.TemporaryDirectory() as tmp: 51 | output_file_path = os.path.join(tmp, "copied.json") 52 | output_sarif_file = copy_op.generate_sarif( 53 | input_sarif_file_set, 54 | output_file_path, 55 | append_timestamp=False, 56 | sarif_tools_version="1.2.3", 57 | cmdline="unit-test", 58 | ) 59 | 60 | with open(output_file_path, "rb") as f_out: 61 | output_sarif = json.load(f_out) 62 | assert output_sarif_file.data == output_sarif 63 | jsonschema.validate(output_sarif, schema=sarif_schema) 64 | 65 | expected_sarif = deepcopy(input_sarif_file.data) 66 | conversion = { 67 | "tool": { 68 | "driver": { 69 | "name": "sarif-tools", 70 | "fullName": "sarif-tools https://github.com/microsoft/sarif-tools/", 71 | "version": "1.2.3", 72 | "properties": { 73 | "file": input_sarif_file.abs_file_path, 74 | "modified": input_sarif_file.mtime.isoformat(), 75 | "processed": output_sarif["runs"][0]["conversion"]["tool"][ 76 | "driver" 77 | ]["properties"]["processed"], 78 | }, 79 | } 80 | }, 81 | "invocation": { 82 | "commandLine": "unit-test", 83 | "executionSuccessful": True, 84 | }, 85 | } 86 | expected_sarif["runs"][0]["conversion"] = conversion 87 | assert output_sarif == expected_sarif 88 | -------------------------------------------------------------------------------- /sarif/operations/trend_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif trend` command. 3 | """ 4 | 5 | import csv 6 | from typing import Dict, List, Literal 7 | 8 | from sarif import sarif_file 9 | from sarif.sarif_file import SarifFileSet 10 | 11 | TIMESTAMP_COLUMNS = ["Date", "Tool", *sarif_file.SARIF_SEVERITIES_WITH_NONE] 12 | 13 | 14 | def generate_trend_csv( 15 | input_files: SarifFileSet, 16 | output_file: str, 17 | dateformat: Literal["dmy", "mdy", "ymd"], 18 | ) -> None: 19 | """ 20 | Generate a timeline csv of the issues from the SARIF files. Each SARIF file must contain a 21 | timestamp of the form 20211012T110000Z in its filename. 22 | sarif_dict is a dict from filename to deserialized SARIF data. 23 | output_file is the name of a CSV file to write, or if None, the name 24 | `static_analysis_trend.csv` will be used. 25 | """ 26 | if not output_file: 27 | output_file = "static_analysis_trend.csv" 28 | 29 | error_storage = [] 30 | for input_file in input_files: 31 | input_file_name = input_file.get_file_name() 32 | print("Processing", input_file_name) 33 | error_list = input_file.get_records() 34 | tool_name = "/".join(input_file.get_distinct_tool_names()) 35 | # Date parsing 36 | parsed_date = input_file.get_filename_timestamp() 37 | if not parsed_date: 38 | raise ValueError(f"Unable to parse date from filename: {input_file_name}") 39 | 40 | # Turn the date into something that looks nice in excel (d/m/y UK date format) 41 | dstr = parsed_date[0] 42 | (year, month, day, hour, minute) = ( 43 | dstr[0:4], 44 | dstr[4:6], 45 | dstr[6:8], 46 | dstr[9:11], 47 | dstr[11:13], 48 | ) 49 | if dateformat == "ymd": 50 | excel_date = f"{year}-{month}-{day} {hour}:{minute}" 51 | elif dateformat == "mdy": 52 | excel_date = f"{month}/{day}/{year} {hour}:{minute}" 53 | else: 54 | excel_date = f"{day}/{month}/{year} {hour}:{minute}" 55 | 56 | # Store data 57 | error_storage.append( 58 | _store_errors(parsed_date, excel_date, tool_name, error_list) 59 | ) 60 | 61 | error_storage.sort(key=lambda record: record["_timestamp"]) 62 | 63 | print("Writing trend CSV to", output_file) 64 | _write_csv(output_file, error_storage) 65 | filter_stats = input_files.get_filter_stats() 66 | if filter_stats: 67 | print(f" Results are filtered by {filter_stats}") 68 | 69 | 70 | def _write_csv(output_file: str, error_storage: List[Dict]) -> None: 71 | with open(output_file, "w", encoding="utf-8") as file_out: 72 | writer = csv.DictWriter( 73 | file_out, TIMESTAMP_COLUMNS, extrasaction="ignore", lineterminator="\n" 74 | ) 75 | writer.writeheader() 76 | for key in error_storage: 77 | writer.writerow(key) 78 | 79 | 80 | def _store_errors(timestamp, excel_date, tool: str, list_of_errors: List[Dict]) -> Dict: 81 | results = { 82 | "_timestamp": timestamp, # not written to CSV, but used for sorting 83 | "Date": excel_date, 84 | "Tool": tool, 85 | } 86 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE: 87 | error_count = sum(1 for e in list_of_errors if severity in e["Severity"]) 88 | results[severity] = error_count 89 | 90 | return results 91 | -------------------------------------------------------------------------------- /sarif/operations/emacs_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif emacs` command. 3 | """ 4 | 5 | from datetime import datetime 6 | import os 7 | 8 | from jinja2 import Environment, FileSystemLoader, select_autoescape 9 | 10 | from sarif import sarif_file 11 | 12 | _THIS_MODULE_PATH = os.path.dirname(__file__) 13 | 14 | _TEMPLATES_PATH = os.path.join(_THIS_MODULE_PATH, "templates") 15 | 16 | _ENV = Environment( 17 | loader=FileSystemLoader(searchpath=_TEMPLATES_PATH), 18 | autoescape=select_autoescape(), 19 | ) 20 | 21 | 22 | def generate_compile( 23 | input_files: sarif_file.SarifFileSet, 24 | output: str, 25 | output_multiple_files: bool, 26 | date_val: datetime = datetime.now(), 27 | ): 28 | """ 29 | Generate txt file from the input files. 30 | """ 31 | output_file = output 32 | if output_multiple_files: 33 | for input_file in input_files: 34 | output_file_name = input_file.get_file_name_without_extension() + ".txt" 35 | print( 36 | "Writing results for", 37 | input_file.get_file_name(), 38 | "to", 39 | output_file_name, 40 | ) 41 | _generate_single_txt( 42 | input_file, os.path.join(output, output_file_name), date_val 43 | ) 44 | output_file = os.path.join(output, ".compile.txt") 45 | source_description = input_files.get_description() 46 | print( 47 | "Writing results for", 48 | source_description, 49 | "to", 50 | os.path.basename(output_file), 51 | ) 52 | _generate_single_txt(input_files, output_file, date_val) 53 | 54 | 55 | def _generate_single_txt(input_file, output_file, date_val): 56 | all_tools = input_file.get_distinct_tool_names() 57 | report = input_file.get_report() 58 | 59 | total_distinct_issue_codes = 0 60 | problems = [] 61 | severities = report.get_severities() 62 | 63 | for severity in severities: 64 | distinct_issue_codes = report.get_issue_type_count_for_severity(severity) 65 | 66 | total_distinct_issue_codes += distinct_issue_codes 67 | 68 | severity_details = _enrich_details( 69 | report.get_issues_grouped_by_type_for_severity(severity) 70 | ) 71 | 72 | severity_section = { 73 | "type": severity, 74 | "count": distinct_issue_codes, 75 | "details": severity_details, 76 | } 77 | 78 | problems.append(severity_section) 79 | 80 | filtered = None 81 | filter_stats = input_file.get_filter_stats() 82 | if filter_stats: 83 | filtered = f"Results were filtered by {filter_stats}." 84 | 85 | template = _ENV.get_template("sarif_emacs.txt") 86 | txt_content = template.render( 87 | report_type=", ".join(all_tools), 88 | report_date=date_val, 89 | severities=", ".join(severities), 90 | total=total_distinct_issue_codes, 91 | problems=problems, 92 | filtered=filtered, 93 | ) 94 | 95 | with open(output_file, "wt", encoding="utf-8") as file_out: 96 | file_out.write(txt_content) 97 | 98 | 99 | def _enrich_details(records_of_severity): 100 | return [ 101 | {"code": key, "count": len(records), "details": records} 102 | for (key, records) in records_of_severity.items() 103 | ] 104 | -------------------------------------------------------------------------------- /sarif/operations/copy_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif copy` command. 3 | """ 4 | 5 | import copy 6 | import datetime 7 | import json 8 | import os 9 | 10 | from sarif import loader, sarif_file 11 | from sarif.sarif_file import SarifFileSet, SarifFile 12 | 13 | 14 | def generate_sarif( 15 | input_files: SarifFileSet, 16 | output: str, 17 | append_timestamp: bool, 18 | sarif_tools_version: str, 19 | cmdline: str, 20 | ) -> SarifFile: 21 | """ 22 | Generate a new SARIF file based on the input files 23 | """ 24 | sarif_data_out = { 25 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 26 | "version": "2.1.0", 27 | "runs": [], 28 | } 29 | now = datetime.datetime.now(datetime.timezone.utc) 30 | output_file_abs_path = os.path.abspath(output) 31 | conversion_timestamp_iso8601 = now.isoformat() 32 | conversion_timestamp_trendformat = now.strftime(sarif_file.DATETIME_FORMAT) 33 | run_count = 0 34 | input_file_count = 0 35 | for input_file in input_files: 36 | if input_file.get_abs_file_path() == output_file_abs_path: 37 | print(f"Auto-excluding output file {output} from input file list") 38 | continue 39 | input_file_count += 1 40 | input_file_path = input_file.get_abs_file_path() 41 | input_file_modified_iso8601 = input_file.mtime.isoformat() 42 | for input_run in input_file.runs: 43 | run_count += 1 44 | # Create a shallow copy 45 | input_run_json_copy = copy.copy(input_run.run_data) 46 | conversion_properties = { 47 | "file": input_file_path, 48 | "modified": input_file_modified_iso8601, 49 | "processed": conversion_timestamp_iso8601, 50 | } 51 | input_run_json_copy["conversion"] = { 52 | "tool": { 53 | "driver": { 54 | "name": "sarif-tools", 55 | "fullName": "sarif-tools https://github.com/microsoft/sarif-tools/", 56 | "version": sarif_tools_version, 57 | "properties": conversion_properties, 58 | } 59 | }, 60 | "invocation": {"commandLine": cmdline, "executionSuccessful": True}, 61 | } 62 | results = input_run.get_results() 63 | filter_stats = input_run.get_filter_stats() 64 | if filter_stats: 65 | input_run_json_copy["results"] = results 66 | conversion_properties["filtered"] = filter_stats.to_json_camel_case() 67 | sarif_data_out["runs"].append(input_run_json_copy) 68 | output_file_path = output 69 | if append_timestamp: 70 | output_split = os.path.splitext(output) 71 | output_file_path = ( 72 | output_split[0] 73 | + f"_{conversion_timestamp_trendformat}" 74 | + (output_split[1] or ".sarif") 75 | ) 76 | with open(output_file_path, "w", encoding="utf-8") as file_out: 77 | json.dump(sarif_data_out, file_out, indent=4) 78 | runs_string = "1 run" if run_count == 1 else f"{run_count} runs" 79 | files_string = ( 80 | "1 SARIF file" if input_file_count == 1 else f"{input_file_count} SARIF files" 81 | ) 82 | print(f"Wrote {output_file_path} with {runs_string} from {files_string}") 83 | total_filter_stats = input_files.get_filter_stats() 84 | if total_filter_stats: 85 | print(total_filter_stats.to_string()) 86 | return loader.load_sarif_file(output_file_path) 87 | -------------------------------------------------------------------------------- /tests/ops/trend/test_trend.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | 6 | from sarif.operations import trend_op 7 | from sarif import sarif_file 8 | 9 | INPUT_SARIF_1 = """{ 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "name 1"}}, 15 | "results": [ 16 | { 17 | "ruleId": "CA2103", 18 | "level": "error" 19 | }, 20 | { 21 | "ruleId": "CA2102", 22 | "level": "warning" 23 | }, 24 | { 25 | "ruleId": "CA2101", 26 | "level": "warning" 27 | }, 28 | { 29 | "ruleId": "CA2101", 30 | "level": "error" 31 | }, 32 | { 33 | "ruleId": "CA2101", 34 | "level": "note" 35 | }, 36 | { 37 | "ruleId": "CA2101", 38 | "level": "none" 39 | }, 40 | { 41 | "ruleId": "CA2101", 42 | "level": "error" 43 | } 44 | ] 45 | } 46 | ] 47 | } 48 | """ 49 | 50 | INPUT_SARIF_2 = """{ 51 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 52 | "version": "2.1.0", 53 | "runs": [ 54 | { 55 | "tool": {"driver": {"name": "name 2"}}, 56 | "results": [ 57 | { 58 | "ruleId": "CA2101", 59 | "level": "error" 60 | }, 61 | { 62 | "ruleId": "CA2101", 63 | "level": "note" 64 | }, 65 | { 66 | "ruleId": "CA2101", 67 | "level": "none" 68 | }, 69 | { 70 | "ruleId": "CA2101", 71 | "level": "error" 72 | } 73 | ] 74 | } 75 | ] 76 | } 77 | """ 78 | 79 | INPUTS = { 80 | "trend_test_20250106T060000Z.sarif": INPUT_SARIF_1, 81 | "trend_test_20250107T060000Z.sarif": INPUT_SARIF_2, 82 | } 83 | 84 | EXPECTED_OUTPUT_TXT = """Date,Tool,error,warning,note,none 85 | 06/01/2025 06:00,name 1,3,2,1,1 86 | 07/01/2025 06:00,name 2,2,0,1,1 87 | """ 88 | 89 | 90 | def test_trend(): 91 | with tempfile.TemporaryDirectory() as tmp: 92 | input_sarif_file_set = sarif_file.SarifFileSet() 93 | 94 | for input_file_name, input_json in INPUTS.items(): 95 | input_sarif_file_path = os.path.join(tmp, input_file_name) 96 | with open(input_sarif_file_path, "wb") as f_in: 97 | f_in.write(input_json.encode()) 98 | 99 | input_sarif = json.loads(input_json) 100 | 101 | input_sarif_file = sarif_file.SarifFile( 102 | input_sarif_file_path, input_sarif, mtime=datetime.datetime.now() 103 | ) 104 | 105 | input_sarif_file_set.files.append(input_sarif_file) 106 | 107 | file_path = os.path.join(tmp, "output.txt") 108 | trend_op.generate_trend_csv(input_sarif_file_set, file_path, dateformat="dmy") 109 | 110 | with open(file_path, "rb") as f_out: 111 | output = f_out.read().decode() 112 | 113 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep) 114 | -------------------------------------------------------------------------------- /tests/ops/diff/test_diff.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | 6 | from sarif.operations import diff_op 7 | from sarif import sarif_file 8 | 9 | SARIF_WITH_1_ISSUE = { 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "unit test"}}, 15 | "results": [ 16 | { 17 | "ruleId": "CA2101", 18 | "level": "error", 19 | "locations": [ 20 | { 21 | "physicalLocation": { 22 | "artifactLocation": { 23 | "uri": "file:///C:/Code/main.c", 24 | "index": 0, 25 | }, 26 | "region": {"startLine": 24, "startColumn": 9}, 27 | } 28 | } 29 | ], 30 | } 31 | ], 32 | } 33 | ], 34 | } 35 | 36 | SARIF_WITH_2_ISSUES = { 37 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 38 | "version": "2.1.0", 39 | "runs": [ 40 | { 41 | "tool": {"driver": {"name": "unit test"}}, 42 | "results": [ 43 | { 44 | "ruleId": "CA2101", 45 | "level": "error", 46 | "locations": [ 47 | { 48 | "physicalLocation": { 49 | "artifactLocation": { 50 | "uri": "file:///C:/Code/main.c", 51 | "index": 0, 52 | }, 53 | "region": {"startLine": 24, "startColumn": 9}, 54 | } 55 | } 56 | ], 57 | }, 58 | { 59 | "ruleId": "CA2102", 60 | "level": "error", 61 | "locations": [ 62 | { 63 | "physicalLocation": { 64 | "artifactLocation": { 65 | "uri": "file:///C:/Code/main.c", 66 | "index": 0, 67 | }, 68 | "region": {"startLine": 34, "startColumn": 9}, 69 | } 70 | } 71 | ], 72 | }, 73 | ], 74 | "columnKind": "utf16CodeUnits", 75 | } 76 | ], 77 | } 78 | 79 | 80 | def test_print_diff(): 81 | mtime = datetime.datetime.now() 82 | old_sarif = sarif_file.SarifFile( 83 | "SARIF_WITH_1_ISSUE", SARIF_WITH_1_ISSUE, mtime=mtime 84 | ) 85 | new_sarif = sarif_file.SarifFile( 86 | "SARIF_WITH_2_ISSUES", SARIF_WITH_2_ISSUES, mtime=mtime 87 | ) 88 | with tempfile.TemporaryDirectory() as tmp: 89 | file_path = os.path.join(tmp, "diff.json") 90 | result = diff_op.print_diff( 91 | old_sarif, new_sarif, file_path, check_level="warning" 92 | ) 93 | with open(file_path, "rb") as f_in: 94 | diff_dict = json.load(f_in) 95 | assert result == 1 96 | assert diff_dict == { 97 | "all": {"+": 1, "-": 0}, 98 | "error": { 99 | "+": 1, 100 | "-": 0, 101 | "codes": { 102 | "CA2102": { 103 | "<": 0, 104 | ">": 1, 105 | "+@": [{"Location": "file:///C:/Code/main.c", "Line": 34}], 106 | } 107 | }, 108 | }, 109 | "warning": {"+": 0, "-": 0, "codes": {}}, 110 | "note": {"+": 0, "-": 0, "codes": {}}, 111 | } 112 | # If issues have decreased, return value should be 0. 113 | assert ( 114 | diff_op.print_diff(new_sarif, old_sarif, file_path, check_level="warning") 115 | == 0 116 | ) 117 | -------------------------------------------------------------------------------- /sarif/filter/filter_stats.py: -------------------------------------------------------------------------------- 1 | """ 2 | Statistics that record the outcome of a filter. 3 | """ 4 | 5 | import datetime 6 | 7 | 8 | class FilterStats: 9 | """ 10 | Statistics that record the outcome of a filter. 11 | """ 12 | 13 | def __init__(self, filter_description): 14 | self.filter_description = filter_description 15 | # Filter stats can also be loaded from a file created by `sarif copy`. 16 | self.rehydrated = False 17 | self.filter_datetime = None 18 | self.filtered_in_result_count = 0 19 | self.filtered_out_result_count = 0 20 | self.missing_property_count = 0 21 | self.unconvincing_line_number_count = 0 22 | 23 | def reset_counters(self): 24 | """ 25 | Zero all the counters. 26 | """ 27 | self.filter_datetime = datetime.datetime.now() 28 | self.filtered_in_result_count = 0 29 | self.filtered_out_result_count = 0 30 | self.missing_property_count = 0 31 | self.unconvincing_line_number_count = 0 32 | 33 | def add(self, other_filter_stats): 34 | """ 35 | Add another set of filter stats to my totals. 36 | """ 37 | if other_filter_stats: 38 | if other_filter_stats.filter_description and ( 39 | other_filter_stats.filter_description != self.filter_description 40 | ): 41 | self.filter_description += f", {other_filter_stats.filter_description}" 42 | self.filtered_in_result_count += other_filter_stats.filtered_in_result_count 43 | self.filtered_out_result_count += ( 44 | other_filter_stats.filtered_out_result_count 45 | ) 46 | self.missing_property_count += other_filter_stats.missing_property_count 47 | self.unconvincing_line_number_count += ( 48 | other_filter_stats.unconvincing_line_number_count 49 | ) 50 | 51 | def __str__(self): 52 | """ 53 | Automatic to_string() 54 | """ 55 | return self.to_string() 56 | 57 | def to_string(self): 58 | """ 59 | Generate a summary string for these filter stats. 60 | """ 61 | ret = f"'{self.filter_description}'" 62 | if self.filter_datetime: 63 | ret += " at " 64 | ret += self.filter_datetime.strftime("%c") 65 | ret += ( 66 | f": {self.filtered_out_result_count} filtered out, " 67 | f"{self.filtered_in_result_count} passed the filter" 68 | ) 69 | if self.unconvincing_line_number_count: 70 | ret += ( 71 | f", {self.unconvincing_line_number_count} included by default " 72 | "for lacking line number information" 73 | ) 74 | if self.missing_property_count: 75 | ret += ( 76 | f", {self.missing_property_count} included by default " 77 | "for lacking data to filter" 78 | ) 79 | 80 | return ret 81 | 82 | def to_json_camel_case(self): 83 | """ 84 | Generate filter stats as JSON using camelCase naming, 85 | to fit with SARIF standard section 3.8.1 (Property Bags). 86 | """ 87 | return { 88 | "filter": self.filter_description, 89 | "in": self.filtered_in_result_count, 90 | "out": self.filtered_out_result_count, 91 | "default": { 92 | "noProperty": self.missing_property_count, 93 | "noLineNumber": self.unconvincing_line_number_count, 94 | }, 95 | } 96 | 97 | 98 | def load_filter_stats_from_json(json_data): 99 | """ 100 | Load filter stats from a SARIF file property bag using camelCase naming 101 | as per SARIF standard section 3.8.1 (Property Bags). 102 | """ 103 | ret = None 104 | if json_data: 105 | ret = FilterStats(json_data["filter"]) 106 | ret.rehydrated = True 107 | ret.filtered_in_result_count = json_data.get("in", 0) 108 | ret.filtered_out_result_count = json_data.get("out", 0) 109 | default_stats = json_data.get("default", {}) 110 | ret.unconvincing_line_number_count = default_stats.get("noLineNumber", 0) 111 | ret.missing_property_count = default_stats.get("noProperty", 0) 112 | return ret 113 | -------------------------------------------------------------------------------- /sarif/operations/templates/sarif_summary.html: -------------------------------------------------------------------------------- 1 | 2 | 76 | 77 | 78 | {% if image_data_base64 -%} 79 |
80 |
81 |
82 | 85 |
86 |
87 |
88 | {%- endif %} 89 | 90 |

Sarif Summary: {{ report_type }}

91 |

Document generated on: {{ report_date }}

92 |

Total number of distinct issues of all severities ({{ severities }}): {{ total }}

93 | {% if filtered -%} 94 |

{{ filtered }}

95 | {%- endif %} 96 | 97 | {% if chart_image_data_base64 -%} 98 | 99 | {%- endif %} 100 | 101 | {% for problem in problems %} 102 |

Severity : {{ problem.type }} [ {{ problem.count }} ]

103 |
    104 | {%- for error in problem.details %} 105 |
  • 106 | 107 |
    108 |
      109 | {%- for link in error.links %} 110 |
    • {{ link.0 }}
    • 111 | {%- endfor %} 112 | {%- for line in error.details %} 113 | {%- if line.Location %} 114 |
    • {{ line.Location }}:{{ line.Line }}
    • 115 | {%- else %} 116 |
    • {{ line.Description }}
    • 117 | {%- endif %} 118 | {%- endfor %} 119 |
    120 |
    121 |
  • 122 | {%- endfor %} 123 |
124 | {%- endfor %} 125 | 126 | -------------------------------------------------------------------------------- /azure-pipelines/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | trigger: none 3 | pr: none 4 | 5 | variables: 6 | - template: templates/globals.yml 7 | - name: TeamName 8 | value: sarif-tools 9 | 10 | resources: 11 | repositories: 12 | - repository: MicroBuildTemplate 13 | type: git 14 | name: 1ESPipelineTemplates/MicroBuildTemplate 15 | ref: refs/tags/release 16 | 17 | extends: 18 | template: azure-pipelines/MicroBuild.1ES.Official.yml@MicroBuildTemplate 19 | parameters: 20 | sdl: 21 | sourceAnalysisPool: VSEngSS-MicroBuild2022-1ES 22 | pool: 23 | name: AzurePipelines-EO 24 | demands: 25 | - ImageOverride -equals 1ESPT-Ubuntu22.04 26 | os: Linux 27 | customBuildTags: 28 | - ES365AIMigrationTooling 29 | stages: 30 | - template: templates/build_stage.yml@self 31 | 32 | - stage: CreateTag 33 | displayName: Create Tag 34 | dependsOn: Build 35 | variables: 36 | releaseVersionWithPrefix: $[ stageDependencies.Build.Build.outputs['getReleaseVersionStep.releaseVersionWithPrefix'] ] 37 | jobs: 38 | - job: CreateTag 39 | steps: 40 | - checkout: self 41 | fetchDepth: 1 42 | fetchTags: false 43 | persistCredentials: true 44 | 45 | - script: | 46 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" 47 | git config user.name "Azure Piplines" 48 | git fetch --depth 1 origin $(Build.SourceBranchName) 49 | git tag -a $(releaseVersionWithPrefix) -m "Release $(releaseVersionWithPrefix)" origin/$(Build.SourceBranchName) 50 | git push origin $(releaseVersionWithPrefix) 51 | displayName: Create git tag 52 | 53 | - stage: CreateRelease 54 | displayName: Create GitHub Release 55 | dependsOn: 56 | - Build 57 | - CreateTag 58 | variables: 59 | releaseVersionWithPrefix: $[ stageDependencies.Build.Build.outputs['getReleaseVersionStep.releaseVersionWithPrefix'] ] 60 | jobs: 61 | - job: CreateRelease 62 | templateContext: 63 | type: releaseJob 64 | isProduction: true 65 | inputs: 66 | - input: pipelineArtifact 67 | artifactName: $(ARTIFACT_NAME_WHEEL) 68 | targetPath: $(Build.StagingDirectory)/dist 69 | steps: 70 | - task: GitHubRelease@1 #https://learn.microsoft.com/en-us/azure/devops/pipelines/tasks/reference/github-release-v1?view=azure-pipelines 71 | displayName: Create GitHub Release 72 | inputs: 73 | gitHubConnection: GitHub-sarif-tools 74 | repositoryName: microsoft/sarif-tools 75 | action: create 76 | target: $(Build.SourceBranchName) 77 | title: $(releaseVersionWithPrefix) 78 | tag: $(releaseVersionWithPrefix) 79 | tagSource: userSpecifiedTag 80 | isDraft: true 81 | addChangeLog: false 82 | assets: $(Build.StagingDirectory)/dist/* 83 | 84 | - stage: WaitForValidation 85 | dependsOn: CreateRelease 86 | jobs: 87 | - job: wait_for_validation 88 | displayName: Wait for manual validation 89 | pool: server 90 | steps: 91 | - task: ManualValidation@0 92 | timeoutInMinutes: 1440 # task times out in 1 day 93 | inputs: 94 | notifyUsers: plseng@microsoft.com 95 | instructions: Please test the latest draft release and then publish it. 96 | onTimeout: reject 97 | 98 | - stage: Release 99 | dependsOn: WaitForValidation 100 | jobs: 101 | - job: PublishToPyPi 102 | displayName: Release to PyPi 103 | 104 | pool: 105 | name: VSEngSS-MicroBuild2022-1ES # This pool is required to have the certs needed to publish to PyPi using ESRP. 106 | os: windows 107 | image: server2022-microbuildVS2022-1es 108 | 109 | templateContext: 110 | type: releaseJob 111 | isProduction: true 112 | inputs: 113 | - input: pipelineArtifact 114 | artifactName: $(ARTIFACT_NAME_WHEEL) 115 | targetPath: $(Build.StagingDirectory)/dist 116 | 117 | steps: 118 | - template: MicroBuild.Publish.yml@MicroBuildTemplate 119 | parameters: 120 | intent: PackageDistribution 121 | contentType: PyPi 122 | contentSource: Folder 123 | folderLocation: $(Build.StagingDirectory)/dist 124 | waitForReleaseCompletion: true 125 | owners: rchiodo@microsoft.com 126 | approvers: grwheele@microsoft.com 127 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to, 5 | and actually do, grant us the rights to use your contribution. For details, visit 6 | https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need 9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the 10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 14 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 15 | 16 | ## Pull Requests 17 | 18 | Pull requests are welcome. 19 | 20 | 1. Fork the repository. 21 | 2. Make and test your changes (see Developer Guide below). 22 | 3. Run `poetry run ruff format` and `poetry run black sarif` to format the code. 23 | 4. Run `poetry run pylint sarif` and check for no new errors or warnings. 24 | 5. Raise Pull Request in GitHub.com. 25 | 26 | ## Developer Guide 27 | 28 | ### Prerequisites 29 | 30 | - You need Python 3.8 installed. 31 | - This is the minimum supported version of the tool. Developing with a later version risks introducing type hints such as `list[dict]` that are not compatible with Python 3.8. 32 | - You need Poetry installed. Run this in an Admin CMD or under `sudo`: 33 | - `pip install poetry` 34 | 35 | Initialise Poetry by telling it where Python 3.8 is, e.g. 36 | 37 | ```bash 38 | # Windows - adjust to the path where you have installed Python 3.8. 39 | poetry env use "C:\Python38\python.exe" 40 | # Linux 41 | poetry env use 3.8 42 | ``` 43 | 44 | This is not necessary if your system Python version is 3.8. 45 | 46 | ### Running locally in Poetry virtualenv 47 | 48 | ```bash 49 | poetry install 50 | poetry run sarif 51 | ``` 52 | 53 | To check that the right versions are being run: 54 | 55 | ```bash 56 | poetry run python --version 57 | poetry run sarif --version --debug 58 | poetry run python -m sarif --version --debug 59 | ``` 60 | 61 | To see which executable is being run: 62 | 63 | ```bash 64 | # Windows 65 | poetry run cmd /c "where sarif" 66 | # Linux 67 | poetry run which sarif 68 | ``` 69 | 70 | ### Update dependency versions 71 | 72 | Run `poetry update` to bump package versions in the `poetry.lock` file. 73 | 74 | ### Update product version 75 | 76 | Change the `version =` line in `pyproject.toml` for the new semantic version for your change. 77 | 78 | Change the version in `sarif/__init__.py` as well. 79 | 80 | ### Run unit tests 81 | 82 | ```bash 83 | poetry run pytest 84 | ``` 85 | 86 | ### Package using `poetry build` 87 | 88 | Run it on the source code: 89 | 90 | ```bash 91 | poetry build 92 | ``` 93 | 94 | If you want, you can install the package built locally at system level (outside the Poetry virtual environment): 95 | 96 | ```bash 97 | pip install dist/sarif-*.whl 98 | ``` 99 | 100 | To remove it again: 101 | 102 | ```bash 103 | pip uninstall sarif-tools 104 | ``` 105 | 106 | Note that there are two possible levels of installation: 107 | 108 | #### User installation 109 | 110 | When you run `pip install` and `pip` doesn't have permissions to write to the Python installation's `site-packages` directory, probably because you are not running as an admin/superuser, the package is installed at "user" level only. You can run it using: 111 | 112 | ```bash 113 | python -m sarif 114 | ``` 115 | 116 | You *cannot* run it using the bare command `sarif`, unless you add your user-level `Scripts` directory to your `PATH`. You can see where that is in the output from `pip install`: 117 | 118 | ```plain 119 | Installing collected packages: sarif 120 | WARNING: The script sarif.exe is installed in 'C:\Users\yournamehere\AppData\Roaming\Python\Python39\Scripts' which is not on PATH. 121 | Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location. 122 | ``` 123 | 124 | #### System installation 125 | 126 | When you run `pip install` and `pip` has permissions to write to the Python installation's `site-packages` directory, and the Python installation's `Scripts` directory is in your path, then you can run the `sarif` command without `python -m`: 127 | 128 | ```bash 129 | sarif 130 | ``` 131 | 132 | ### Adding packages from pypi to the project 133 | 134 | Add the package and its latest version number (as minimum version) to `[tool.poetry.dependencies]` in `pyproject.toml`. 135 | 136 | Then run this to update Poetry's lockfile. 137 | 138 | ```bash 139 | poetry update 140 | ``` 141 | 142 | ### Adding resource files to the project 143 | 144 | Add the file within the `sarif` directory and it will be installed with the Python source. For example, `sarif/operations/templates/sarif_summary.html`. 145 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [3.0.5] - 2025-07-17 9 | 10 | - #76 Support in HTML display rules as links, when help Uri provided 11 | - #82 `trend` and `csv` output csv files have `\r\n` line terminators when generated on Linux machine 12 | - #97 sarif blame crashes with UnicodeDecodeError when it encounters invalid UTF-8 13 | 14 | ## [3.0.4] - 2024-11-15 15 | 16 | - #73 Crash when using `--check`. 17 | 18 | ## [3.0.3] - 2024-09-30 19 | 20 | - #43 Support getting level from `ruleConfigurationOverrides` and `defaultConfiguration`. 21 | - #68 Fixed regression where reversing diff direction gave different results. 22 | 23 | ## [3.0.2] - 2024-09-18 24 | 25 | - #55 part 2: Add `executionSuccessful` to `copy` operation output for SARIF schema compliance. 26 | 27 | ## [3.0.1] - 2024-09-16 28 | 29 | ### Fixed 30 | 31 | - #58 Fixed regression that broke `sarif diff` command in v3.0.0. 32 | 33 | ## [3.0.0](releases/tag/v3.0.0) - 2024-09-10 34 | 35 | ### Breaking Changes 36 | 37 | - Changed Python API to use new IssueReport type for issue grouping and sorting: 38 | - `SarifFileSet` now has a `get_report()` method 39 | - `s.get_result_count_by_severity()` replaced by 40 | `s.get_report().get_issue_type_histogram_for_severity(severity)` 41 | - `s.get_result_count_by_severity()` replaced by 42 | `s.get_report().get_issue_count_for_severity(severity)` 43 | - `s.get_records_grouped_by_severity()` replaced by 44 | `s.get_report().get_issues_for_severity(severity)` 45 | 46 | ### Added 47 | 48 | - Support "none" severity level. It's only included in the output if present in the input. 49 | 50 | ### Fixed 51 | 52 | - #39 Truncate long summaries. 53 | - Made issue sorting and grouping more consistent across the various reports. 54 | - Multiple occurrences of a single issue are now sorted by location in the Word report. 55 | - Improved debug and version reporting for when multiple versions are installed. 56 | - For the copy operation, "invocation" in the resulting sarif is changed to an object to match the spec. 57 | - #53 Fix the `blame` command for `file:///` URL locations. 58 | 59 | ### Compatibility 60 | 61 | - Python 3.8+ 62 | 63 | ## [2.0.0](releases/tag/v2.0.0) - 2022-11-07 64 | 65 | ### Breaking Changes 66 | 67 | - "Code" and "Description" are now separate columns in the CSV output, whereas before they were 68 | combined in the "Code" column. They are also separate keys in the "record" format if calling 69 | sarif-tools from Python. 70 | - `--blame-filter` argument has been replaced with `--filter`, using a new YAML-based format for 71 | more general filtering to replace the previous ad hoc text format which only supported blame. 72 | - There is a new `upgrade-filter` command to upgrade your old blame filter files to the new 73 | format. 74 | - Thanks to @abyss638 for contributing this enhancement! 75 | 76 | ### Added 77 | 78 | - New `codeclimate` command to generate output for GitLab use. 79 | - Thanks to @abyss638 for contributing this enhancement! 80 | - New `emacs` command to generate output for the popular Linux text editor. 81 | - Thanks to @dkloper for contributing this enhancement! 82 | - #14 Support recursive glob 83 | - Thanks to @bushelofsilicon for contributing this enhancement! 84 | 85 | ### Changed 86 | 87 | - When an input SARIF file contains blame information, the `csv` command output now has a column 88 | for `Author`. 89 | - #18 The `diff` command now prints up to three locations of new occurrences of issues (all are 90 | listed in the file output mode). 91 | 92 | ### Fixed 93 | 94 | - #4 and #19 docs improvements. 95 | - #12 allow zero locations for record. 96 | - #15 allow `text` to be absent in `message` object. 97 | - #20 allow UTF8 with BOM (`utf-8-sig`` encoding) 98 | - Thanks to @ManuelBerrueta for contributing this fix! 99 | 100 | ### Compatibility 101 | 102 | - Python 3.8+ 103 | 104 | ## [1.0.0](releases/tag/v1.0.0) - 2022-05-09 105 | 106 | ### Changed 107 | 108 | - Development, build and release is now based on [python-poetry](https://python-poetry.org). 109 | - No change to functionality since v0.3.0. 110 | 111 | ### Compatibility 112 | 113 | - Python 3.8+ 114 | 115 | ## [0.3.0](releases/tag/v0.3.0) - 2022-01-14 116 | 117 | ### Added 118 | 119 | - Support for globs in Windows, e.g. `sarif summary android*.sarif` 120 | - `info` and `copy` commands 121 | 122 | ### Compatibility 123 | 124 | - Python 3.8+ 125 | 126 | ## [0.2.0](releases/tag/v0.2.0) - 2022-01-07 127 | 128 | ### Added 129 | 130 | - `--blame-filter` argument. 131 | 132 | ### Changed 133 | 134 | - Compatible with Python v3.8. Previously, Python v3.9 was required. 135 | 136 | ### Compatibility 137 | 138 | - Python 3.8+ 139 | 140 | ## [0.1.0](releases/tag/v0.1.0) - 2021-11-11 141 | 142 | ### Added 143 | 144 | - Initial versions of commands `blame`, `csv`, `diff`, `html`, `ls`, `summary`, `trend`, `usage` and `word` created in Microsoft Global Hackathon 2021. 145 | 146 | ### Compatibility 147 | 148 | - Python 3.9+ 149 | -------------------------------------------------------------------------------- /sarif/operations/info_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif info` command. 3 | """ 4 | 5 | import datetime 6 | import os 7 | 8 | from sarif.sarif_file import SarifFileSet 9 | 10 | _BYTES_PER_MIB = 1024 * 1024 11 | _BYTES_PER_KIB = 1024 12 | 13 | 14 | def _property_bag_stats(object_list): 15 | tally = {} 16 | universal_property_keys = [] 17 | partial_properties = [] 18 | if object_list: 19 | for obj in object_list: 20 | for key in obj.get("properties", {}): 21 | tally[key] = tally[key] + 1 if key in tally else 1 22 | object_count = len(object_list) 23 | universal_property_keys = [ 24 | key for (key, count) in tally.items() if count == object_count 25 | ] 26 | 27 | def tally_rank(key_count_pair): 28 | # Sort by descending tally then alphabetically 29 | return (-key_count_pair[1], key_count_pair[0]) 30 | 31 | partial_properties = [ 32 | {"key": key, "count": count, "percent": 100 * count / object_count} 33 | for (key, count) in sorted(tally.items(), key=tally_rank) 34 | if count < object_count 35 | ] 36 | return universal_property_keys, partial_properties 37 | 38 | 39 | def _generate_info_to_file(sarif_files, file_out): 40 | file_count = False 41 | for input_file in sarif_files: 42 | file_count += 1 43 | file_path = input_file.get_abs_file_path() 44 | file_stat = os.stat(file_path) 45 | size_in_bytes = file_stat.st_size 46 | if size_in_bytes > _BYTES_PER_MIB: 47 | readable_size = f"{file_stat.st_size / _BYTES_PER_MIB:.1f} MiB" 48 | else: 49 | readable_size = ( 50 | f"{(file_stat.st_size + _BYTES_PER_KIB - 1) // _BYTES_PER_KIB} KiB" 51 | ) 52 | print(input_file.get_abs_file_path(), file=file_out) 53 | print(f" {file_stat.st_size} bytes ({readable_size})", file=file_out) 54 | print( 55 | f" modified: {datetime.datetime.fromtimestamp(file_stat.st_mtime)}, " 56 | f"accessed: {datetime.datetime.fromtimestamp(file_stat.st_atime)}, " 57 | f"ctime: {datetime.datetime.fromtimestamp(file_stat.st_ctime)}", 58 | file=file_out, 59 | ) 60 | run_count = len(input_file.runs) 61 | print(f" {run_count} runs" if run_count != 1 else " 1 run", file=file_out) 62 | for run_index, run in enumerate(input_file.runs): 63 | if run_count != 1: 64 | print(f" Run #{run_index + 1}:", file=file_out) 65 | print(f" Tool: {run.get_tool_name()}", file=file_out) 66 | conversion_tool = run.get_conversion_tool_name() 67 | if conversion_tool: 68 | print(f" Conversion tool: {conversion_tool}", file=file_out) 69 | results = run.get_results() 70 | result_count = len(results) 71 | print( 72 | f" {result_count} results" if result_count != 1 else " 1 result", 73 | file=file_out, 74 | ) 75 | universal_property_keys, partial_properties = _property_bag_stats(results) 76 | ppk_string = ( 77 | ", ".join( 78 | "{} {}/{} ({:.1f} %)".format( 79 | p["key"], p["count"], result_count, p["percent"] 80 | ) 81 | for p in partial_properties 82 | ) 83 | if partial_properties 84 | else None 85 | ) 86 | if universal_property_keys: 87 | upk_string = ", ".join(universal_property_keys) 88 | if partial_properties: 89 | print( 90 | f" Result properties: all results have properties: {upk_string}; " 91 | f"some results have properties: {ppk_string}", 92 | file=file_out, 93 | ) 94 | else: 95 | print( 96 | f" All results have properties: {upk_string}", 97 | file=file_out, 98 | ) 99 | elif partial_properties: 100 | print( 101 | f" Result properties: {ppk_string}", 102 | file=file_out, 103 | ) 104 | print(file=file_out) 105 | return file_count 106 | 107 | 108 | def generate_info(sarif_files: SarifFileSet, output: str): 109 | """ 110 | Print structure information about the provided `sarif_files`. 111 | """ 112 | if output: 113 | with open(output, "w", encoding="utf-8") as file_out: 114 | file_count = _generate_info_to_file(sarif_files, file_out) 115 | if file_count: 116 | files_string = ( 117 | "1 SARIF file" if file_count == 1 else f"{file_count} SARIF files" 118 | ) 119 | print("Wrote information about", files_string, "to", output) 120 | else: 121 | file_count = _generate_info_to_file(sarif_files, None) 122 | if file_count == 0: 123 | print( 124 | "No SARIF files found. Try passing a path of a SARIF file or containing SARIF files." 125 | ) 126 | -------------------------------------------------------------------------------- /sarif/operations/html_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif html` command. 3 | """ 4 | 5 | import base64 6 | from datetime import datetime 7 | import os 8 | from typing import Union 9 | 10 | from jinja2 import Environment, FileSystemLoader, select_autoescape 11 | 12 | from sarif import charts, sarif_file 13 | 14 | _THIS_MODULE_PATH = os.path.dirname(__file__) 15 | 16 | _TEMPLATES_PATH = os.path.join(_THIS_MODULE_PATH, "templates") 17 | 18 | _ENV = Environment( 19 | loader=FileSystemLoader(searchpath=_TEMPLATES_PATH), 20 | autoescape=select_autoescape(), 21 | ) 22 | 23 | 24 | def generate_html( 25 | input_files: sarif_file.SarifFileSet, 26 | image_file: Union[str, None], 27 | output: str, 28 | output_multiple_files: bool, 29 | date_val: datetime = datetime.now(), 30 | ): 31 | """ 32 | Generate HTML file from the input files. 33 | """ 34 | if image_file: 35 | image_mime_type = "image/" + os.path.splitext(image_file)[-1] 36 | if image_mime_type == "image/jpg": 37 | image_mime_type = "image/jpeg" 38 | with open(image_file, "rb") as input_file: 39 | image_data = input_file.read() 40 | 41 | image_data_base64 = base64.b64encode(image_data).decode("utf-8") 42 | else: 43 | image_mime_type = None 44 | image_data_base64 = None 45 | 46 | output_file = output 47 | if output_multiple_files: 48 | for input_file in input_files: 49 | output_file_name = input_file.get_file_name_without_extension() + ".html" 50 | print( 51 | "Writing HTML report for", 52 | input_file.get_file_name(), 53 | "to", 54 | output_file_name, 55 | ) 56 | _generate_single_html( 57 | input_file, 58 | os.path.join(output, output_file_name), 59 | date_val, 60 | image_mime_type, 61 | image_data_base64, 62 | ) 63 | output_file = os.path.join(output, "static_analysis_output.html") 64 | source_description = input_files.get_description() 65 | print( 66 | "Writing HTML report for", 67 | source_description, 68 | "to", 69 | os.path.basename(output_file), 70 | ) 71 | _generate_single_html( 72 | input_files, output_file, date_val, image_mime_type, image_data_base64 73 | ) 74 | 75 | 76 | def _generate_single_html( 77 | input_file, output_file, date_val, image_mime_type, image_data_base64 78 | ): 79 | all_tools = input_file.get_distinct_tool_names() 80 | report = input_file.get_report() 81 | 82 | total_distinct_issue_codes = 0 83 | problems = [] 84 | severities = report.get_severities() 85 | 86 | for severity in severities: 87 | distinct_issue_codes = report.get_issue_type_count_for_severity(severity) 88 | 89 | total_distinct_issue_codes += distinct_issue_codes 90 | 91 | severity_details = _enrich_details( 92 | report.get_issues_grouped_by_type_for_severity(severity), input_file 93 | ) 94 | 95 | severity_section = { 96 | "type": severity, 97 | "count": distinct_issue_codes, 98 | "details": severity_details, 99 | } 100 | 101 | problems.append(severity_section) 102 | 103 | chart_data = charts.generate_severity_pie_chart(report, output_file=None) 104 | if chart_data: 105 | chart_image_data_base64 = base64.b64encode(chart_data).decode("utf-8") 106 | else: 107 | chart_image_data_base64 = None 108 | 109 | filtered = None 110 | filter_stats = input_file.get_filter_stats() 111 | if filter_stats: 112 | filtered = f"Results were filtered by {filter_stats}." 113 | 114 | template = _ENV.get_template("sarif_summary.html") 115 | html_content = template.render( 116 | report_type=", ".join(all_tools), 117 | report_date=date_val, 118 | severities=", ".join(severities), 119 | total=total_distinct_issue_codes, 120 | problems=problems, 121 | image_mime_type=image_mime_type, 122 | image_data_base64=image_data_base64, 123 | chart_image_data_base64=chart_image_data_base64, 124 | filtered=filtered, 125 | ) 126 | 127 | with open(output_file, "wt", encoding="utf-8") as file_out: 128 | file_out.write(html_content) 129 | 130 | 131 | def _extract_help_links_from_rules(rules, link_to_desc, key): 132 | for rule in rules: 133 | if "helpUri" in rule: 134 | uri = rule["helpUri"] 135 | if uri not in link_to_desc: 136 | desc = rule.get("fullDescription", {}).get("text") 137 | if not desc: 138 | desc = rule.get("name") 139 | if not desc: 140 | desc = key 141 | link_to_desc[uri] = desc 142 | 143 | 144 | def _enrich_details(records_of_severity, input_file): 145 | ret = [] 146 | 147 | for key, records in records_of_severity.items(): 148 | link_to_desc = {} 149 | for record in records: 150 | rule_id = record["Code"] 151 | rules = input_file.get_rules_by_id(rule_id) 152 | _extract_help_links_from_rules(rules, link_to_desc, key) 153 | links = [(desc, uri) for (uri, desc) in link_to_desc.items()] 154 | ret.append( 155 | {"code": key, "count": len(records), "links": links, "details": records} 156 | ) 157 | return ret 158 | -------------------------------------------------------------------------------- /tests/ops/html/test_html.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import tempfile 4 | 5 | from sarif.operations import html_op 6 | from sarif import sarif_file 7 | 8 | INPUT_SARIF = { 9 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 10 | "version": "2.1.0", 11 | "runs": [ 12 | { 13 | "tool": { 14 | "driver": { 15 | "name": "unit test", 16 | "rules": [ 17 | { 18 | "id": "CA2101", 19 | "name": "Specify for P/Invoke string arguments", 20 | "helpUri": "https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2101", 21 | } 22 | ], 23 | } 24 | }, 25 | "results": [ 26 | { 27 | "ruleId": "CA2101", 28 | "level": "error", 29 | "locations": [ 30 | { 31 | "physicalLocation": { 32 | "artifactLocation": { 33 | "uri": "file:///C:/Code/main.c", 34 | "index": 0, 35 | }, 36 | "region": {"startLine": 24, "startColumn": 9}, 37 | } 38 | } 39 | ], 40 | } 41 | ], 42 | } 43 | ], 44 | } 45 | 46 | 47 | EXPECTED_OUTPUT_TXT = """ 48 | 49 | 123 | 124 | 125 |

Sarif Summary: unit test

126 |

Document generated on:

127 |

Total number of distinct issues of all severities (error, warning, note): 1

128 | 129 |

Severity : error [ 1 ]

130 | 142 | 143 |

Severity : warning [ 0 ]

144 |
    145 | 146 |
147 | 148 |

Severity : note [ 0 ]

149 |
    150 | 151 |
152 | """ 168 | 169 | 170 | def test_html(): 171 | mtime = datetime.datetime.now() 172 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime) 173 | 174 | input_sarif_file_set = sarif_file.SarifFileSet() 175 | input_sarif_file_set.files.append(input_sarif_file) 176 | 177 | with tempfile.TemporaryDirectory() as tmp: 178 | file_path = os.path.join(tmp, "output.html") 179 | html_op.generate_html( 180 | input_sarif_file_set, 181 | None, 182 | file_path, 183 | output_multiple_files=False, 184 | date_val=mtime, 185 | ) 186 | 187 | with open(file_path, "rb") as f_in: 188 | output = f_in.read().decode() 189 | 190 | # Remove pie chart before diffing 191 | pie_chart_start = output.find("", pie_chart_start) + 2 193 | output = output[:pie_chart_start] + output[pie_chart_end:] 194 | 195 | # Check the output line-by-line, ignoring whitespace around and between lines. 196 | output_split = output.splitlines() 197 | for check_line in EXPECTED_OUTPUT_TXT.replace( 198 | "", mtime.strftime("%Y-%m-%d %H:%M:%S.%f") 199 | ).splitlines(): 200 | expected = check_line.strip() 201 | if not expected: 202 | continue 203 | actual = "" 204 | while output_split: 205 | actual = output_split.pop(0).strip() 206 | if actual: 207 | break 208 | assert actual == expected 209 | -------------------------------------------------------------------------------- /sarif/issues_report.py: -------------------------------------------------------------------------------- 1 | """ 2 | A report derived from a SARIF file or group of SARIF files. 3 | 4 | The issues are grouped by severity, then by key (which is either issue code + truncated 5 | description or just issue code if the issues have distinct descriptions), then listed in location 6 | order. 7 | """ 8 | 9 | from typing import Dict, List 10 | 11 | from sarif.sarif_file_utils import ( 12 | combine_code_and_description, 13 | combine_record_code_and_description, 14 | record_sort_key, 15 | SARIF_SEVERITIES_WITHOUT_NONE, 16 | SARIF_SEVERITIES_WITH_NONE, 17 | ) 18 | 19 | 20 | class IssuesReport: 21 | """ 22 | This class imposes a hierarchical structure on a list of records which is helpful 23 | for presenting reader-friendly summaries. 24 | """ 25 | 26 | def __init__(self): 27 | self._sev_to_records = {sev: [] for sev in SARIF_SEVERITIES_WITH_NONE} 28 | self._sev_to_sorted_keys = None 29 | self._records_have_been_sorted = False 30 | 31 | def add_record(self, record: dict): 32 | """Append record to list for severity - no sorting.""" 33 | self._sev_to_records.setdefault(record["Severity"], []).append(record) 34 | if self._records_have_been_sorted: 35 | self._sev_to_sorted_keys = None 36 | self._records_have_been_sorted = False 37 | 38 | def _group_records_by_key(self): 39 | self._sev_to_sorted_keys = {} 40 | code_to_key_and_count = {} 41 | for severity, issues in self._sev_to_records.items(): 42 | code_to_key_and_count.clear() 43 | for record in issues: 44 | code = record["Code"] 45 | key = combine_record_code_and_description(record) 46 | key_and_count = code_to_key_and_count.get(code) 47 | if key_and_count is None: 48 | code_to_key_and_count[code] = { 49 | "key": key, 50 | "common_desc": record["Description"], 51 | "count": 1, 52 | } 53 | else: 54 | key_and_count["count"] += 1 55 | common_desc_stem = key_and_count["common_desc"] 56 | desc = record["Description"] 57 | if not desc.startswith(common_desc_stem): 58 | for char_pos, (char1, char2) in enumerate( 59 | zip(common_desc_stem, desc) 60 | ): 61 | if char1 != char2: 62 | new_desc_stem = common_desc_stem[0:char_pos] 63 | key_and_count["common_desc"] = new_desc_stem 64 | key_and_count["key"] = combine_code_and_description( 65 | code, new_desc_stem + " ..." 66 | ) 67 | break 68 | sorted_codes = sorted( 69 | code_to_key_and_count.keys(), 70 | key=lambda code: code_to_key_and_count[code]["count"], 71 | reverse=True, 72 | ) 73 | self._sev_to_sorted_keys[severity] = { 74 | code_to_key_and_count[code]["key"]: [] for code in sorted_codes 75 | } 76 | for record in issues: 77 | # Not sorting the issues by location at this point 78 | code = record["Code"] 79 | self._sev_to_sorted_keys[severity][ 80 | code_to_key_and_count[code]["key"] 81 | ].append(record) 82 | 83 | def _sort_record_lists(self): 84 | if self._sev_to_sorted_keys is None: 85 | self._group_records_by_key() 86 | for key_to_records in self._sev_to_sorted_keys.values(): 87 | for records in key_to_records.values(): 88 | records.sort(key=record_sort_key) 89 | self._records_have_been_sorted = True 90 | 91 | def get_issue_count_for_severity(self, severity: str) -> int: 92 | """Get the number of individual records at this severity level.""" 93 | return len(self._sev_to_records.get(severity, [])) 94 | 95 | def get_issue_type_count_for_severity(self, severity: str) -> int: 96 | """Get the number of distinct issue types at this severity level.""" 97 | if self._sev_to_sorted_keys is None: 98 | self._group_records_by_key() 99 | return len(self._sev_to_sorted_keys.get(severity, [])) 100 | 101 | def any_none_severities(self) -> bool: 102 | """Are there any records with severity level "none"?""" 103 | return bool(self._sev_to_records.get("none", {})) 104 | 105 | def get_severities(self) -> List[str]: 106 | """ 107 | Get the list of relevant severity levels for these records. 108 | 109 | The returned list always includes "error", "warning" and "note", the standard SARIF severity 110 | levels for code issues. The unusual severity level "none" is only included at the end if 111 | there are any records with severity "none". 112 | """ 113 | return ( 114 | SARIF_SEVERITIES_WITH_NONE 115 | if self.any_none_severities() 116 | else SARIF_SEVERITIES_WITHOUT_NONE 117 | ) 118 | 119 | def get_issues_grouped_by_type_for_severity( 120 | self, severity: str 121 | ) -> Dict[str, List[dict]]: 122 | """ 123 | Get a dict from issue type key to list of matching records at this severity level. 124 | 125 | Issue type keys are derived from the issue code and (common prefix of) description. 126 | """ 127 | if not self._records_have_been_sorted: 128 | self._sort_record_lists() 129 | return self._sev_to_sorted_keys.get(severity, {}) 130 | 131 | def get_issue_type_histogram_for_severity(self, severity: str) -> Dict[str, int]: 132 | """ 133 | Get a dict from issue type key to number of matching records at this severity level. 134 | 135 | This is the same as `{k: len(v) for k, v in d.items()}` where 136 | `d = report.get_issues_grouped_by_type_for_severity(severity)`. 137 | """ 138 | if self._sev_to_sorted_keys is None: 139 | self._group_records_by_key() 140 | return { 141 | key: len(records) 142 | for key, records in self.get_issues_grouped_by_type_for_severity( 143 | severity 144 | ).items() 145 | } 146 | 147 | def get_issues_for_severity(self, severity: str) -> List[dict]: 148 | """ 149 | Get a flat list of the issues at this severity. 150 | 151 | The sorting is consistent with `get_issues_grouped_by_type`, but the issues are not grouped 152 | by type. 153 | """ 154 | type_to_issues = self.get_issues_grouped_by_type_for_severity(severity) 155 | ret = [] 156 | for issues_for_type in type_to_issues.values(): 157 | ret.extend(issues_for_type) 158 | return ret 159 | -------------------------------------------------------------------------------- /tests/ops/diff/test_diff_issues_reordered.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | 6 | from sarif.operations import diff_op 7 | from sarif import sarif_file 8 | 9 | SARIF = { 10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 11 | "version": "2.1.0", 12 | "runs": [ 13 | { 14 | "tool": {"driver": {"name": "unit test"}}, 15 | "results": [ 16 | { 17 | "ruleId": "core.NullDereference", 18 | "ruleIndex": 2, 19 | "message": { 20 | "text": "Access to field 'type' results in a dereference of a null pointer (loaded from variable 'json')" 21 | }, 22 | "locations": [ 23 | { 24 | "physicalLocation": { 25 | "artifactLocation": { 26 | "uri": "file:///C:/Code/main.c", 27 | "index": 0, 28 | }, 29 | "region": {"startLine": 24, "startColumn": 9}, 30 | } 31 | } 32 | ], 33 | }, 34 | { 35 | "ruleId": "core.NullDereference", 36 | "ruleIndex": 2, 37 | "message": { 38 | "text": "Dereference of null pointer (loaded from variable 's')" 39 | }, 40 | "locations": [ 41 | { 42 | "physicalLocation": { 43 | "artifactLocation": { 44 | "uri": "file:///C:/Code/main.c", 45 | "index": 0, 46 | }, 47 | "region": {"startLine": 24, "startColumn": 9}, 48 | } 49 | } 50 | ], 51 | }, 52 | { 53 | "ruleId": "core.NullDereference", 54 | "ruleIndex": 2, 55 | "message": { 56 | "text": "Access to field 'other' results in a dereference of a null pointer (loaded from variable 'json')" 57 | }, 58 | "locations": [ 59 | { 60 | "physicalLocation": { 61 | "artifactLocation": { 62 | "uri": "file:///C:/Code/main.c", 63 | "index": 0, 64 | }, 65 | "region": {"startLine": 24, "startColumn": 9}, 66 | } 67 | } 68 | ], 69 | }, 70 | ], 71 | } 72 | ], 73 | } 74 | 75 | SARIF_WITH_ISSUES_REORDERED = { 76 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 77 | "version": "2.1.0", 78 | "runs": [ 79 | { 80 | "tool": {"driver": {"name": "unit test"}}, 81 | "results": [ 82 | { 83 | "ruleId": "core.NullDereference", 84 | "ruleIndex": 2, 85 | "message": { 86 | "text": "Access to field 'type' results in a dereference of a null pointer (loaded from variable 'json')" 87 | }, 88 | "locations": [ 89 | { 90 | "physicalLocation": { 91 | "artifactLocation": { 92 | "uri": "file:///C:/Code/main.c", 93 | "index": 0, 94 | }, 95 | "region": {"startLine": 24, "startColumn": 9}, 96 | } 97 | } 98 | ], 99 | }, 100 | { 101 | "ruleId": "core.NullDereference", 102 | "ruleIndex": 2, 103 | "message": { 104 | "text": "Access to field 'other' results in a dereference of a null pointer (loaded from variable 'json')" 105 | }, 106 | "locations": [ 107 | { 108 | "physicalLocation": { 109 | "artifactLocation": { 110 | "uri": "file:///C:/Code/main.c", 111 | "index": 0, 112 | }, 113 | "region": {"startLine": 24, "startColumn": 9}, 114 | } 115 | } 116 | ], 117 | }, 118 | { 119 | "ruleId": "core.NullDereference", 120 | "ruleIndex": 2, 121 | "message": { 122 | "text": "Dereference of null pointer (loaded from variable 's')" 123 | }, 124 | "locations": [ 125 | { 126 | "physicalLocation": { 127 | "artifactLocation": { 128 | "uri": "file:///C:/Code/main.c", 129 | "index": 0, 130 | }, 131 | "region": {"startLine": 24, "startColumn": 9}, 132 | } 133 | } 134 | ], 135 | }, 136 | ], 137 | } 138 | ], 139 | } 140 | 141 | 142 | def test_diff_issues_reordered(): 143 | mtime = datetime.datetime.now() 144 | sarif = sarif_file.SarifFile("SARIF", SARIF, mtime=mtime) 145 | sarif_reordered = sarif_file.SarifFile( 146 | "SARIF_WITH_ISSUES_REORDERED", SARIF_WITH_ISSUES_REORDERED, mtime=mtime 147 | ) 148 | verify_no_diffs(sarif, sarif_reordered) 149 | verify_no_diffs(sarif_reordered, sarif) 150 | 151 | 152 | def verify_no_diffs(old_sarif: sarif_file.SarifFile, new_sarif: sarif_file.SarifFile): 153 | with tempfile.TemporaryDirectory() as tmp: 154 | file_path = os.path.join(tmp, "diff.json") 155 | result = diff_op.print_diff( 156 | old_sarif, new_sarif, file_path, check_level="warning" 157 | ) 158 | with open(file_path, "rb") as f_in: 159 | diff_dict = json.load(f_in) 160 | assert result == 0 161 | assert diff_dict == { 162 | "all": {"+": 0, "-": 0}, 163 | "error": {"+": 0, "-": 0, "codes": {}}, 164 | "warning": {"+": 0, "-": 0, "codes": {}}, 165 | "note": {"+": 0, "-": 0, "codes": {}}, 166 | } 167 | -------------------------------------------------------------------------------- /sarif/operations/blame_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif blame` command. 3 | """ 4 | 5 | import json 6 | import os 7 | import subprocess 8 | import sys 9 | from typing import Callable, Iterable, List, Union 10 | import urllib.parse 11 | import urllib.request 12 | 13 | from sarif.sarif_file import SarifFileSet 14 | 15 | 16 | def _run_git_blame(repo_path: str, file_path: str) -> List[bytes]: 17 | cmd = ["git", "blame", "--porcelain", _make_path_git_compatible(file_path)] 18 | with subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=repo_path) as proc: 19 | result = [] 20 | if proc.stdout: 21 | result = [x for x in proc.stdout.readlines()] 22 | 23 | # Ensure process terminates 24 | proc.communicate() 25 | if proc.returncode: 26 | cmd_str = " ".join(cmd) 27 | sys.stderr.write( 28 | f"WARNING: Command `{cmd_str} " 29 | f"failed with exit code {proc.returncode} in {repo_path}\n" 30 | ) 31 | 32 | return result 33 | 34 | 35 | def enhance_with_blame( 36 | input_files: SarifFileSet, 37 | repo_path: str, 38 | output: str, 39 | output_multiple_files: bool, 40 | run_git_blame: Callable[[str, str], List[bytes]] = _run_git_blame, 41 | ): 42 | """ 43 | Enhance SARIF files with information from `git blame`. The `git` command is run in the current 44 | directory, which must be a git repository containing the files at the paths specified in the 45 | input files. Updated files are written to output_path if specified, otherwise to the current 46 | directory. 47 | """ 48 | if not input_files: 49 | return 50 | if not os.path.isdir(repo_path): 51 | raise ValueError(f"No git repository directory found at {repo_path}") 52 | 53 | _enhance_with_blame(input_files, repo_path, run_git_blame) 54 | 55 | for input_file in input_files: 56 | input_file_name = input_file.get_file_name() 57 | if any( 58 | "blame" in result.get("properties", {}) 59 | for result in input_file.get_results() 60 | ): 61 | output_file = output 62 | if output_multiple_files: 63 | output_filename = ( 64 | input_file.get_file_name_without_extension() 65 | + "_with_blame." 66 | + input_file.get_file_name_extension() 67 | ) 68 | output_file = os.path.join(output, output_filename) 69 | print( 70 | "Writing", 71 | output_file, 72 | "combining original SARIF from", 73 | input_file_name, 74 | "with git blame information", 75 | ) 76 | with open(output_file, "w", encoding="utf-8") as file_out: 77 | json.dump(input_file.data, file_out) 78 | else: 79 | sys.stderr.write( 80 | f"WARNING: did not find any git blame information for {input_file_name}\n" 81 | ) 82 | 83 | 84 | def _enhance_with_blame( 85 | input_files: SarifFileSet, 86 | repo_path: str, 87 | run_git_blame: Callable[[str, str], List[bytes]], 88 | ): 89 | """ 90 | Run `git blame --porcelain` for each file path listed in input_files. 91 | Then enhance the results in error_list by adding a "blame" property including "hash", "author" 92 | and "timestamp". 93 | Porcelain format is used for parseability and stability. See documentation at 94 | https://git-scm.com/docs/git-blame#_the_porcelain_format. 95 | """ 96 | files_to_blame = set(item["Location"] for item in input_files.get_records()) 97 | file_count = len(files_to_blame) 98 | print( 99 | "Running `git blame --porcelain` on", 100 | "one file" if file_count == 1 else f"{file_count} files", 101 | "in", 102 | repo_path, 103 | ) 104 | file_blame_info = _run_git_blame_on_files(files_to_blame, repo_path, run_git_blame) 105 | 106 | # Now join up blame output with result list 107 | blame_info_count = 0 108 | item_count = 0 109 | for result, record in zip(input_files.get_results(), input_files.get_records()): 110 | item_count += 1 111 | file_path = record["Location"] 112 | if file_path in file_blame_info: 113 | blame_info = file_blame_info[file_path] 114 | # raw_line can be None if no line number information was included in the SARIF result. 115 | raw_line = record["Line"] 116 | if raw_line: 117 | line_no = str(raw_line) 118 | if line_no in blame_info["line_to_commit"]: 119 | commit_hash = blame_info["line_to_commit"][line_no] 120 | commit = blame_info["commits"][commit_hash] 121 | # Add commit hash to the blame information 122 | commit_with_hash = {"commit": commit_hash, **commit} 123 | # Add blame information to the SARIF Property Bag of the result 124 | result.setdefault("properties", {})["blame"] = commit_with_hash 125 | blame_info_count += 1 126 | print(f"Found blame information for {blame_info_count} of {item_count} results") 127 | 128 | 129 | def _make_path_git_compatible(file_path): 130 | try: 131 | path_as_url = urllib.parse.urlparse(file_path) 132 | if path_as_url.scheme == "file": 133 | return urllib.request.url2pathname(path_as_url.path) 134 | return file_path 135 | except ValueError: 136 | return file_path 137 | 138 | 139 | def _run_git_blame_on_files( 140 | files_to_blame: Iterable[str], 141 | repo_path: str, 142 | run_git_blame: Callable[[str, str], List[bytes]], 143 | ): 144 | file_blame_info = {} 145 | for file_path in files_to_blame: 146 | git_blame_output = run_git_blame(repo_path, file_path) 147 | blame_info = {"commits": {}, "line_to_commit": {}} 148 | file_blame_info[file_path] = blame_info 149 | commit_hash: Union[str, None] = None 150 | 151 | for line_bytes in git_blame_output: 152 | # Convert byte sequence to string and remove trailing LF 153 | line_string = line_bytes.decode("utf-8", errors="replace")[:-1] 154 | # Now parse output from git blame --porcelain 155 | if commit_hash: 156 | if line_string.startswith("\t"): 157 | commit_hash = None 158 | # Ignore line contents = source code 159 | elif " " in line_string: 160 | space_pos = line_string.index(" ") 161 | key = line_string[0:space_pos] 162 | value = line_string[space_pos + 1 :].strip() 163 | blame_info["commits"][commit_hash][key] = value 164 | else: 165 | # e.g. "boundary" 166 | key = line_string 167 | blame_info["commits"][commit_hash][key] = True 168 | else: 169 | commit_line_info = line_string.split(" ") 170 | commit_hash = commit_line_info[0] 171 | commit_line = commit_line_info[2] 172 | blame_info["commits"].setdefault(commit_hash, {}) 173 | blame_info["line_to_commit"][commit_line] = commit_hash 174 | 175 | return file_blame_info 176 | -------------------------------------------------------------------------------- /sarif/operations/diff_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for `sarif diff` command. 3 | """ 4 | 5 | import json 6 | import sys 7 | from typing import Dict 8 | 9 | from sarif import sarif_file 10 | 11 | 12 | def _occurrences(occurrence_count): 13 | return ( 14 | "1 occurrence" if occurrence_count == 1 else f"{occurrence_count} occurrences" 15 | ) 16 | 17 | 18 | def _signed_change(difference): 19 | return str(difference) if difference < 0 else f"+{difference}" 20 | 21 | 22 | def _record_to_location_tuple(record) -> str: 23 | return (record["Location"], record["Line"]) 24 | 25 | 26 | def print_diff( 27 | old_sarif: sarif_file.SarifFileSet, 28 | new_sarif: sarif_file.SarifFileSet, 29 | output, 30 | check_level=None, 31 | ) -> int: 32 | """ 33 | Generate a diff of the issues from the SARIF files and write it to stdout 34 | or a file if specified. 35 | :param old_sarif: corresponds to the old files. 36 | :param new_sarif: corresponds to the new files. 37 | :return: number of increased severities, or 0 if nothing has worsened. 38 | """ 39 | diff = _calc_diff(old_sarif, new_sarif) 40 | if output: 41 | print("writing diff to", output) 42 | with open(output, "w", encoding="utf-8") as output_file: 43 | json.dump(diff, output_file, indent=4) 44 | else: 45 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE: 46 | if severity not in diff: 47 | continue 48 | if diff[severity]["codes"]: 49 | print( 50 | severity, 51 | "level:", 52 | _signed_change(diff[severity]["+"]), 53 | _signed_change(-diff[severity]["-"]), 54 | ) 55 | for issue_key, code_info in diff[severity]["codes"].items(): 56 | (old_count, new_count, new_locations) = ( 57 | code_info["<"], 58 | code_info[">"], 59 | code_info.get("+@", []), 60 | ) 61 | if old_count == 0: 62 | print(f' New issue "{issue_key}" ({_occurrences(new_count)})') 63 | elif new_count == 0: 64 | print(f' Eliminated issue "{issue_key}"') 65 | else: 66 | print( 67 | f" Number of occurrences {old_count} -> {new_count}", 68 | f'({_signed_change(new_count - old_count)}) for issue "{issue_key}"', 69 | ) 70 | if new_locations: 71 | # Print the top 3 new locations 72 | for record in new_locations[0:3]: 73 | (location, line) = _record_to_location_tuple(record) 74 | print(f" {location}:{line}") 75 | if len(new_locations) > 3: 76 | print(" ...") 77 | else: 78 | print(severity, "level: +0 -0 no changes") 79 | print( 80 | "all levels:", 81 | _signed_change(diff["all"]["+"]), 82 | _signed_change(-diff["all"]["-"]), 83 | ) 84 | filter_stats = old_sarif.get_filter_stats() 85 | if filter_stats: 86 | print(f" 'Before' results were filtered by {filter_stats}") 87 | filter_stats = new_sarif.get_filter_stats() 88 | if filter_stats: 89 | print(f" 'After' results were filtered by {filter_stats}") 90 | ret = 0 91 | if check_level: 92 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE: 93 | ret += diff.get(severity, {}).get("+", 0) 94 | if severity == check_level: 95 | break 96 | if ret > 0: 97 | sys.stderr.write( 98 | f"Check: exiting with return code {ret} due to increase in issues at or above {check_level} severity\n" 99 | ) 100 | return ret 101 | 102 | 103 | def _find_new_occurrences(new_records, old_records): 104 | # Note: this is O(n²) complexity where n is the number of occurrences of this issue type, 105 | # so could be slow when there are a large number of occurrences. 106 | old_occurrences = old_records 107 | new_occurrences_new_locations = [] 108 | new_occurrences_new_lines = [] 109 | for new_record in new_records: 110 | (new_location, new_line) = (True, True) 111 | for old_record in old_occurrences: 112 | if old_record["Location"] == new_record["Location"]: 113 | new_location = False 114 | if old_record["Line"] == new_record["Line"]: 115 | new_line = False 116 | break 117 | if new_location: 118 | if new_record not in new_occurrences_new_locations: 119 | new_occurrences_new_locations.append(new_record) 120 | elif new_line: 121 | if new_record not in new_occurrences_new_lines: 122 | new_occurrences_new_lines.append(new_record) 123 | 124 | return sorted( 125 | new_occurrences_new_locations, key=_record_to_location_tuple 126 | ) + sorted(new_occurrences_new_lines, key=_record_to_location_tuple) 127 | 128 | 129 | def _calc_diff( 130 | old_sarif: sarif_file.SarifFileSet, new_sarif: sarif_file.SarifFileSet 131 | ) -> Dict: 132 | """ 133 | Generate a diff of the issues from the SARIF files. 134 | old_sarif corresponds to the old files. 135 | new_sarif corresponds to the new files. 136 | Return dict has keys "error", "warning", "note", "none" (if present) and "all". 137 | """ 138 | ret = {"all": {"+": 0, "-": 0}} 139 | old_report = old_sarif.get_report() 140 | new_report = new_sarif.get_report() 141 | # Include `none` in the list of severities if there are any `none` records in either the old 142 | # or new report. 143 | severities = ( 144 | old_report.get_severities() 145 | if old_report.any_none_severities() 146 | else new_report.get_severities() 147 | ) 148 | for severity in severities: 149 | old_histogram = old_report.get_issue_type_histogram_for_severity(severity) 150 | new_histogram = new_report.get_issue_type_histogram_for_severity(severity) 151 | ret[severity] = {"+": 0, "-": 0, "codes": {}} 152 | if old_histogram != new_histogram: 153 | for issue_key, count in new_histogram.items(): 154 | old_count = old_histogram.pop(issue_key, 0) 155 | if old_count != count: 156 | ret[severity]["codes"][issue_key] = {"<": old_count, ">": count} 157 | if old_count == 0: 158 | ret[severity]["+"] += 1 159 | new_occurrences = _find_new_occurrences( 160 | new_report.get_issues_grouped_by_type_for_severity( 161 | severity 162 | ).get(issue_key, []), 163 | old_report.get_issues_grouped_by_type_for_severity( 164 | severity 165 | ).get(issue_key, []), 166 | ) 167 | if new_occurrences: 168 | ret[severity]["codes"][issue_key]["+@"] = [ 169 | {"Location": r["Location"], "Line": r["Line"]} 170 | for r in new_occurrences 171 | ] 172 | for issue_key, old_count in old_histogram.items(): 173 | ret[severity]["codes"][issue_key] = {"<": old_count, ">": 0} 174 | ret[severity]["-"] += 1 175 | ret["all"]["+"] += ret[severity]["+"] 176 | ret["all"]["-"] += ret[severity]["-"] 177 | return ret 178 | -------------------------------------------------------------------------------- /sarif/operations/word_op.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate summary of given JSON or given JSON Document in to Microsoft Word Documents. 3 | This functionality uses a python module called 4 | 5 | python-docx - a Python library for creating and updating Microsoft Word (.docx) files. 6 | 7 | https://python-docx.readthedocs.io/ 8 | 9 | """ 10 | 11 | from datetime import datetime 12 | import os 13 | from typing import Union 14 | 15 | import docx 16 | from docx import oxml 17 | from docx import shared 18 | from docx.enum import text 19 | from docx.oxml import ns 20 | 21 | from sarif import charts, sarif_file 22 | from sarif.sarif_file_utils import combine_record_code_and_description 23 | 24 | 25 | def generate_word_docs_from_sarif_inputs( 26 | input_files: sarif_file.SarifFileSet, 27 | image_file: Union[str, None], 28 | output: str, 29 | output_multiple_files: bool, 30 | date_val: datetime = datetime.now(), 31 | ): 32 | """ 33 | Convert SARIF input to Word file output. 34 | """ 35 | if not input_files: 36 | raise ValueError("No input files specified!") 37 | 38 | output_file = output 39 | output_file_name = output 40 | if output_multiple_files: 41 | for input_file in input_files: 42 | output_file_name = input_file.get_file_name_without_extension() + ".docx" 43 | print( 44 | "Writing Word summary of", 45 | input_file.get_file_name(), 46 | "to", 47 | output_file_name, 48 | ) 49 | report = input_file.get_report() 50 | _generate_word_summary( 51 | input_file, 52 | report, 53 | os.path.join(output, output_file_name), 54 | image_file, 55 | date_val, 56 | ) 57 | output_file_name = "static_analysis_output.docx" 58 | output_file = os.path.join(output, output_file_name) 59 | 60 | source_description = input_files.get_description() 61 | print("Writing Word summary of", source_description, "to", output_file_name) 62 | report = input_files.get_report() 63 | _generate_word_summary(input_files, report, output_file, image_file, date_val) 64 | 65 | 66 | def _generate_word_summary( 67 | sarif_data, report, output_file, image_file: Union[str, None], date_val: datetime 68 | ): 69 | # Create a new document 70 | document = docx.Document() 71 | 72 | severities = report.get_severities() 73 | _add_heading_and_highlevel_info( 74 | document, sarif_data, report, severities, output_file, image_file, date_val 75 | ) 76 | _dump_errors_summary_by_sev(document, report, severities) 77 | _dump_each_error_in_detail(document, report, severities) 78 | 79 | # finally, save the document. 80 | document.save(output_file) 81 | 82 | 83 | def _add_heading_and_highlevel_info( 84 | document, 85 | sarif_data, 86 | report, 87 | severities, 88 | output_file, 89 | image_path: Union[str, None], 90 | date_val: datetime, 91 | ): 92 | tool_name = ", ".join(sarif_data.get_distinct_tool_names()) 93 | heading = f"Sarif Summary: {tool_name}" 94 | 95 | if image_path: 96 | document.add_picture(image_path) 97 | last_paragraph = document.paragraphs[-1] 98 | last_paragraph.alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER 99 | 100 | document.add_heading(heading, 0) 101 | document.add_paragraph(f"Document generated on: {date_val}") 102 | 103 | sevs = ", ".join(severities) 104 | document.add_paragraph( 105 | f"Total number of various severities ({sevs}): {sarif_data.get_result_count()}" 106 | ) 107 | filter_stats = sarif_data.get_filter_stats() 108 | if filter_stats: 109 | document.add_paragraph(f"Results were filtered by {filter_stats}.") 110 | 111 | pie_chart_image_file_path = output_file.replace(".docx", "_severity_pie_chart.png") 112 | if charts.generate_severity_pie_chart(report, pie_chart_image_file_path): 113 | document.add_picture(pie_chart_image_file_path) 114 | last_paragraph = document.paragraphs[-1] 115 | last_paragraph.alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER 116 | 117 | document.add_page_break() 118 | 119 | 120 | def _dump_errors_summary_by_sev(document, report, severities): 121 | """ 122 | For each severity level (in priority order): create a list of the errors of 123 | that severity, print out how many there are and then do some further analysis 124 | of which error codes are present. 125 | """ 126 | for severity in severities: 127 | errors_of_severity = report.get_issue_type_count_for_severity(severity) 128 | document.add_heading(f"Severity : {severity} [ {errors_of_severity} ]", level=1) 129 | sorted_dict = report.get_issue_type_histogram_for_severity(severity) 130 | if sorted_dict: 131 | for key, count in sorted_dict.items(): 132 | document.add_paragraph(f"{key}: {count}", style="List Bullet") 133 | else: 134 | document.add_paragraph("None", style="List Bullet") 135 | 136 | 137 | def _dump_each_error_in_detail(document, report, severities): 138 | """ 139 | Write out the errors to a table so that a human can do further analysis. 140 | """ 141 | document.add_page_break() 142 | 143 | for severity in severities: 144 | errors_of_severity = report.get_issues_for_severity(severity) 145 | # Sample: 146 | # [{'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_coverage.py', 'Line': 119, 147 | # 'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'}, 148 | # {'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_code_stats.py', 'Line': 61, 149 | # 'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'}, 150 | # ] 151 | if errors_of_severity: 152 | document.add_heading(f"Severity : {severity}", level=2) 153 | table = document.add_table(rows=1 + len(errors_of_severity), cols=3) 154 | 155 | table.style = "Table Grid" # ColorfulGrid-Accent5' 156 | table.autofit = False 157 | 158 | table.alignment = text.WD_TAB_ALIGNMENT.CENTER 159 | 160 | # Cell widths 161 | widths = [shared.Inches(2), shared.Inches(4), shared.Inches(0.5)] 162 | 163 | # To avoid performance problems with large tables, prepare the entries first in this 164 | # list, then iterate the table cells and copy them in. 165 | # First populate the header row 166 | cells_text = ["Code", "Location", "Line"] 167 | 168 | hdr_cells = table.rows[0].cells 169 | for i in range(3): 170 | table.rows[0].cells[i]._tc.get_or_add_tcPr().append( 171 | oxml.parse_xml( 172 | r''.format(ns.nsdecls("w")) 173 | ) 174 | ) 175 | run = hdr_cells[i].paragraphs[0].add_run(cells_text[i]) 176 | run.bold = True 177 | hdr_cells[i].paragraphs[ 178 | 0 179 | ].alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER 180 | hdr_cells[i].width = widths[i] 181 | 182 | for eachrow in errors_of_severity: 183 | cells_text += [ 184 | combine_record_code_and_description(eachrow), 185 | eachrow["Location"], 186 | str(eachrow["Line"]), 187 | ] 188 | 189 | # Note: using private property table._cells to avoid performance issue. See 190 | # https://stackoverflow.com/a/69105798/316578 191 | col_index = 0 192 | for cell, cell_text in zip(table._cells, cells_text): 193 | cell.text = cell_text 194 | cell.width = widths[col_index] 195 | col_index = col_index + 1 if col_index < 2 else 0 196 | else: 197 | document.add_heading(f"Severity : {severity}", level=2) 198 | document.add_paragraph("None", style="List Bullet") 199 | -------------------------------------------------------------------------------- /tests/test_sarif_file_utils.py: -------------------------------------------------------------------------------- 1 | from sarif import sarif_file_utils 2 | 3 | 4 | def test_combine_code_and_description_short(): 5 | cd = sarif_file_utils.combine_code_and_description( 6 | "ABC123", "Some short description" 7 | ) 8 | assert cd == "ABC123 Some short description" 9 | assert len(cd) <= 120 10 | 11 | 12 | def test_combine_code_and_description_long_desc(): 13 | cd = sarif_file_utils.combine_code_and_description( 14 | "ABC123", " ".join(f"blah{i}" for i in range(1, 30)) 15 | ) 16 | assert ( 17 | cd 18 | == "ABC123 blah1 blah2 blah3 blah4 blah5 blah6 blah7 blah8 blah9 blah10 blah11 blah12 blah13 blah14 blah15 blah16 ..." 19 | ) 20 | assert len(cd) <= 120 21 | 22 | 23 | def test_combine_code_and_description_long_code(): 24 | long_code = "".join(f"A{i}" for i in range(1, 36)) + "BC" 25 | assert ( 26 | len(long_code) == 98 27 | ), "98 is right length to hit 'placeholder too large for max width' without defensive code" 28 | cd = sarif_file_utils.combine_code_and_description( 29 | long_code, "wow that's a long code" 30 | ) 31 | assert cd == f"{long_code} wow that's a ..." 32 | assert len(cd) <= 120 33 | long_code = "".join(f"A{i}" for i in range(1, 50)) 34 | cd = sarif_file_utils.combine_code_and_description( 35 | long_code, "wow that's a long code" 36 | ) 37 | assert cd == long_code 38 | 39 | 40 | def test_read_result_rule(): 41 | run = { 42 | "tool": { 43 | "driver": { 44 | "rules": [ 45 | {"id": "id0", "defaultConfiguration": {"level": "none"}}, 46 | {"id": "id1", "defaultConfiguration": {"level": "error"}}, 47 | ] 48 | } 49 | } 50 | } 51 | rule_id0 = run["tool"]["driver"]["rules"][0] 52 | rule_id1 = run["tool"]["driver"]["rules"][1] 53 | 54 | result = {} 55 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run) 56 | assert rule is None 57 | assert ruleIndex == -1 58 | 59 | result = {"ruleIndex": 1} 60 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run) 61 | assert rule == rule_id1 62 | assert ruleIndex == 1 63 | 64 | result = {"rule": {"index": 1}} 65 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run) 66 | assert rule == rule_id1 67 | assert ruleIndex == 1 68 | 69 | result = {"ruleId": "id1"} 70 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run) 71 | assert rule == rule_id1 72 | assert ruleIndex == 1 73 | 74 | result = {"rule": {"id": "id1"}} 75 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run) 76 | assert rule == rule_id1 77 | assert ruleIndex == 1 78 | 79 | result = {"ruleIndex": 0} 80 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run) 81 | assert rule == rule_id0 82 | assert ruleIndex == 0 83 | 84 | result = {"ruleIndex": 0} 85 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, {}) 86 | assert rule is None 87 | assert ruleIndex == -1 88 | 89 | 90 | def test_read_result_invocation(): 91 | run = {"invocations": [{"foo": 1}, {"bar": "baz"}]} 92 | 93 | result = {} 94 | invocation = sarif_file_utils.read_result_invocation(result, run) 95 | assert invocation is None 96 | 97 | result = {"provenance": {}} 98 | invocation = sarif_file_utils.read_result_invocation(result, run) 99 | assert invocation is None 100 | 101 | result = {"provenance": {"invocationIndex": 0}} 102 | invocation = sarif_file_utils.read_result_invocation(result, {}) 103 | assert invocation is None 104 | 105 | result = {"provenance": {"invocationIndex": -1}} 106 | invocation = sarif_file_utils.read_result_invocation(result, run) 107 | assert invocation is None 108 | 109 | result = {"provenance": {"invocationIndex": 2}} 110 | invocation = sarif_file_utils.read_result_invocation(result, run) 111 | assert invocation is None 112 | 113 | result = {"provenance": {"invocationIndex": 1}} 114 | invocation = sarif_file_utils.read_result_invocation(result, run) 115 | assert invocation == run["invocations"][1] 116 | 117 | 118 | def test_read_result_severity(): 119 | result = {"level": "error"} 120 | severity = sarif_file_utils.read_result_severity(result, {}) 121 | assert severity == "error" 122 | 123 | # If kind has any value other than "fail", then if level is absent, it SHALL default to "none"... 124 | result = {"kind": "other"} 125 | severity = sarif_file_utils.read_result_severity(result, {}) 126 | assert severity == "none" 127 | 128 | run = { 129 | "invocations": [ 130 | { 131 | "ruleConfigurationOverrides": [ 132 | {"descriptor": {"id": "id1"}, "configuration": {"level": "note"}} 133 | ] 134 | }, 135 | { 136 | "ruleConfigurationOverrides": [ 137 | {"descriptor": {"index": 1}, "configuration": {"level": "note"}} 138 | ] 139 | }, 140 | {}, 141 | ], 142 | "tool": { 143 | "driver": { 144 | "rules": [ 145 | {"id": "id0", "defaultConfiguration": {"level": "none"}}, 146 | {"id": "id1", "defaultConfiguration": {"level": "error"}}, 147 | ] 148 | } 149 | }, 150 | } 151 | 152 | # If kind has the value "fail" and level is absent, then level SHALL be determined by the following procedure: 153 | # IF rule is present THEN 154 | # LET theDescriptor be the reportingDescriptor object that it specifies. 155 | # # Is there a configuration override for the level property? 156 | # IF result.provenance.invocationIndex is >= 0 THEN 157 | # LET theInvocation be the invocation object that it specifies. 158 | # IF theInvocation.ruleConfigurationOverrides is present 159 | # AND it contains a configurationOverride object whose 160 | # descriptor property specifies theDescriptor THEN 161 | # LET theOverride be that configurationOverride object. 162 | # IF theOverride.configuration.level is present THEN 163 | # Set level to theConfiguration.level. 164 | result = {"ruleIndex": 1, "provenance": {"invocationIndex": 0}} 165 | severity = sarif_file_utils.read_result_severity(result, run) 166 | assert severity == "note" 167 | 168 | result = {"ruleIndex": 1, "provenance": {"invocationIndex": 1}} 169 | severity = sarif_file_utils.read_result_severity(result, run) 170 | assert severity == "note" 171 | 172 | # ELSE 173 | # # There is no configuration override for level. Is there a default configuration for it? 174 | # IF theDescriptor.defaultConfiguration.level is present THEN 175 | # SET level to theDescriptor.defaultConfiguration.level. 176 | 177 | result = {"ruleIndex": 1} 178 | severity = sarif_file_utils.read_result_severity(result, run) 179 | assert severity == "error" 180 | 181 | result = {"rule": {"index": 1}} 182 | severity = sarif_file_utils.read_result_severity(result, run) 183 | assert severity == "error" 184 | 185 | result = {"ruleId": "id1"} 186 | severity = sarif_file_utils.read_result_severity(result, run) 187 | assert severity == "error" 188 | 189 | result = {"rule": {"id": "id1"}} 190 | severity = sarif_file_utils.read_result_severity(result, run) 191 | assert severity == "error" 192 | 193 | result = {"ruleIndex": 1, "provenance": {"invocationIndex": 2}} 194 | severity = sarif_file_utils.read_result_severity(result, run) 195 | assert severity == "error" 196 | 197 | # IF level has not yet been set THEN 198 | # SET level to "warning". 199 | result = {} 200 | severity = sarif_file_utils.read_result_severity(result, {}) 201 | assert severity == "warning" 202 | 203 | result = {"ruleIndex": -1} 204 | severity = sarif_file_utils.read_result_severity(result, {}) 205 | assert severity == "warning" 206 | -------------------------------------------------------------------------------- /tests/ops/blame/test_blame.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import datetime 3 | import json 4 | import jsonschema 5 | import os 6 | import tempfile 7 | from typing import Callable, Dict, List 8 | 9 | from sarif.operations import blame_op 10 | from sarif import sarif_file 11 | from tests.utils import get_sarif_schema 12 | 13 | ERROR_FILE_RELATIVE_PATH = "subdir/file.py" 14 | ERROR_FILE_ABSOLUTE_PATH = "file:///C:/repo/subdir/file.py" 15 | 16 | SARIF_FILE = { 17 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", 18 | "version": "2.1.0", 19 | "runs": [ 20 | { 21 | "tool": {"driver": {"name": "unit test"}}, 22 | "results": [ 23 | { 24 | "ruleId": "CA2101", 25 | "message": {"text": "just testing"}, 26 | "level": "error", 27 | "locations": [ 28 | { 29 | "physicalLocation": { 30 | "artifactLocation": { 31 | "uri": ERROR_FILE_ABSOLUTE_PATH, 32 | "index": 0, 33 | }, 34 | "region": {"startLine": 3, "startColumn": 9}, 35 | } 36 | } 37 | ], 38 | } 39 | ], 40 | } 41 | ], 42 | } 43 | 44 | GIT_BLAME_OUTPUT = [ 45 | "f9db03438aba52affc5c3fcdb619afa620ad603a 1 1 7\n", 46 | "author Taylor Developer\n", 47 | "author-mail \n", 48 | "author-time 1699272533\n", 49 | "author-tz +0000\n", 50 | "committer GitHub\n", 51 | "committer-mail \n", 52 | "committer-time 1699272533\n", 53 | "committer-tz +0000\n", 54 | "summary Commit message 1\n", 55 | "filename " + ERROR_FILE_RELATIVE_PATH + "\n", 56 | "\tFile text line 1\n", 57 | "f9db03438aba52affc5c3fcdb619afa620ad603a 2 2\n", 58 | "\tFile text line 2\n", 59 | "f9db03438aba52affc5c3fcdb619afa620ad603a 3 3\n", 60 | "\tFile text line 3\n", 61 | "eec0471db074a037d820abdda1f210f8a8c987ca 4 4 1\n", 62 | "author Other Developer\n", 63 | "author-mail \n", 64 | "author-time 1718035364\n", 65 | "author-tz +0100\n", 66 | "committer GitHub\n", 67 | "committer-mail \n", 68 | "committer-time 1718035364\n", 69 | "committer-tz +0100\n", 70 | "summary Commit message 2\n", 71 | "filename " + ERROR_FILE_RELATIVE_PATH + "\n", 72 | "\tFile text line 4\n", 73 | "6732313c320314c122bd00aa40e7c79954f21c15 5 5 1\n", 74 | "author Another Developer\n", 75 | "author-mail \n", 76 | "author-time 1727710690\n", 77 | "author-tz -0700\n", 78 | "committer GitHub\n", 79 | "committer-mail \n", 80 | "committer-time 1727710690\n", 81 | "committer-tz -0700\n", 82 | "summary Commit message 3\n", 83 | "filename " + ERROR_FILE_RELATIVE_PATH + "\n", 84 | "\tFile text line 5\n", 85 | "6732313c320314c122bd00aa40e7c79954f21c15 6 6\n", 86 | "\tFile text line 6\n", 87 | ] 88 | 89 | 90 | def test_blame_no_blame_info(): 91 | input_sarif_file = sarif_file.SarifFile( 92 | "SARIF_FILE", SARIF_FILE, mtime=datetime.datetime.now() 93 | ) 94 | input_sarif_file_set = sarif_file.SarifFileSet() 95 | input_sarif_file_set.files.append(input_sarif_file) 96 | 97 | with tempfile.TemporaryDirectory() as tmp: 98 | repo_path = os.path.join(tmp, "repo") 99 | os.makedirs(repo_path) 100 | output_file_path = os.path.join(tmp, "blamed.json") 101 | 102 | blame_op.enhance_with_blame( 103 | input_sarif_file_set, 104 | repo_path, 105 | output_file_path, 106 | output_multiple_files=False, 107 | run_git_blame=lambda repo_path, file_path: [], 108 | ) 109 | 110 | assert not os.path.isfile(output_file_path) 111 | 112 | 113 | def blame_test( 114 | run_git_blame: Callable[[str, str], List[bytes]], 115 | expected_blame_properties: Dict[str, Dict[str, str]], 116 | ): 117 | input_sarif_file = sarif_file.SarifFile( 118 | "SARIF_FILE", SARIF_FILE, mtime=datetime.datetime.now() 119 | ) 120 | input_sarif_file_set = sarif_file.SarifFileSet() 121 | input_sarif_file_set.files.append(input_sarif_file) 122 | 123 | with tempfile.TemporaryDirectory() as tmp: 124 | repo_path = os.path.join(tmp, "repo") 125 | os.makedirs(repo_path) 126 | output_file_path = os.path.join(tmp, "blamed.json") 127 | 128 | def run_git_blame_wrapper( 129 | blame_repo_path: str, blame_file_path: str 130 | ) -> List[bytes]: 131 | assert blame_repo_path == repo_path 132 | assert blame_file_path == ERROR_FILE_ABSOLUTE_PATH 133 | return run_git_blame(blame_repo_path, blame_file_path) 134 | 135 | blame_op.enhance_with_blame( 136 | input_sarif_file_set, 137 | repo_path, 138 | output_file_path, 139 | output_multiple_files=False, 140 | run_git_blame=run_git_blame_wrapper, 141 | ) 142 | 143 | with open(output_file_path, "rb") as f_out: 144 | output_sarif = json.load(f_out) 145 | jsonschema.validate(output_sarif, schema=get_sarif_schema()) 146 | 147 | expected_sarif = deepcopy(input_sarif_file.data) 148 | expected_sarif["runs"][0]["results"][0]["properties"] = ( 149 | expected_blame_properties 150 | ) 151 | assert output_sarif == expected_sarif 152 | 153 | 154 | def test_blame_success(): 155 | def run_git_blame(blame_repo_path: str, blame_file_path: str) -> List[bytes]: 156 | return [x.encode() for x in GIT_BLAME_OUTPUT] 157 | 158 | expected_blame_properties = { 159 | "blame": { 160 | "commit": "f9db03438aba52affc5c3fcdb619afa620ad603a", 161 | "author": "Taylor Developer", 162 | "author-mail": "", 163 | "author-time": "1699272533", 164 | "author-tz": "+0000", 165 | "committer": "GitHub", 166 | "committer-mail": "", 167 | "committer-time": "1699272533", 168 | "committer-tz": "+0000", 169 | "summary": "Commit message 1", 170 | "filename": ERROR_FILE_RELATIVE_PATH, 171 | } 172 | } 173 | 174 | blame_test(run_git_blame, expected_blame_properties) 175 | 176 | 177 | GIT_BLAME_OUTPUT_WITH_INVALID_UTF8 = [ 178 | b"f9db03438aba52affc5c3fcdb619afa620ad603a 1 1 7\n", 179 | b"author Taylor Developer\n", 180 | b"author-mail \n", 181 | b"author-time 1699272533\n", 182 | b"author-tz +0000\n", 183 | b"committer GitHub\n", 184 | b"committer-mail \n", 185 | b"committer-time 1699272533\n", 186 | b"committer-tz +0000\n", 187 | b"summary Commit message \x80\n", 188 | b"filename " + ERROR_FILE_RELATIVE_PATH.encode() + b"\n", 189 | b"\tFile text line 1\n", 190 | b"f9db03438aba52affc5c3fcdb619afa620ad603a 2 2\n", 191 | b"\tFile text line 2\n", 192 | b"f9db03438aba52affc5c3fcdb619afa620ad603a 3 3\n", 193 | b"\tFile text line 3\n", 194 | b"eec0471db074a037d820abdda1f210f8a8c987ca 4 4 1\n", 195 | ] 196 | 197 | 198 | def test_blame_invalid_utf8(): 199 | def run_git_blame(blame_repo_path: str, blame_file_path: str) -> List[bytes]: 200 | return GIT_BLAME_OUTPUT_WITH_INVALID_UTF8 201 | 202 | expected_blame_properties = { 203 | "blame": { 204 | "commit": "f9db03438aba52affc5c3fcdb619afa620ad603a", 205 | "author": "Taylor Developer", 206 | "author-mail": "", 207 | "author-time": "1699272533", 208 | "author-tz": "+0000", 209 | "committer": "GitHub", 210 | "committer-mail": "", 211 | "committer-time": "1699272533", 212 | "committer-tz": "+0000", 213 | "summary": "Commit message �", 214 | "filename": ERROR_FILE_RELATIVE_PATH, 215 | } 216 | } 217 | 218 | blame_test(run_git_blame, expected_blame_properties) 219 | -------------------------------------------------------------------------------- /sarif/sarif_file_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reusable utility functions for handling the SARIF format. 3 | 4 | Primarily interrogating the `result` JSON defined at 5 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012594 6 | """ 7 | 8 | import textwrap 9 | from typing import Literal, Tuple, Union 10 | 11 | # SARIF severity levels as per 12 | # https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790898 13 | SARIF_SEVERITIES_WITHOUT_NONE = ["error", "warning", "note"] 14 | SARIF_SEVERITIES_WITH_NONE = SARIF_SEVERITIES_WITHOUT_NONE + ["none"] 15 | 16 | 17 | def combine_code_and_description(code: str, description: str) -> str: 18 | """ 19 | Combine code and description into one string, keeping total length under 120 characters. 20 | """ 21 | length_budget = 120 22 | if code: 23 | code = code.strip() 24 | length_budget -= len(code) + 1 # Allow issue code and space character 25 | continuation_placeholder = " ..." 26 | # Allow extra space when truncating for continuation characters 27 | length_budget_pre_continuation = length_budget - len(continuation_placeholder) 28 | if length_budget_pre_continuation < 10: 29 | # Don't include description if it would be very short due to long code 30 | return code 31 | if description: 32 | if "\n" in description: 33 | description = description[: description.index("\n")] 34 | if description.startswith(code): 35 | # Don't duplicate the code 36 | description = description[len(code) :] 37 | description = description.strip() 38 | if description: 39 | if len(description) > length_budget: 40 | shorter_description = textwrap.shorten( 41 | description, 42 | width=length_budget_pre_continuation, 43 | placeholder=continuation_placeholder, 44 | ) 45 | if len(shorter_description) < length_budget_pre_continuation - 40: 46 | # Word wrap shortens the description significantly, so truncate mid-word instead 47 | description = ( 48 | description[:length_budget_pre_continuation] 49 | + continuation_placeholder 50 | ) 51 | else: 52 | description = shorter_description 53 | if code: 54 | return f"{code.strip()} {description}" 55 | return description 56 | if code: 57 | return code 58 | return "" 59 | 60 | 61 | def combine_record_code_and_description(record: dict) -> str: 62 | """ 63 | Combine code and description fields into one string. 64 | """ 65 | return combine_code_and_description(record["Code"], record["Description"]) 66 | 67 | 68 | def read_result_location(result) -> Tuple[str, str]: 69 | """ 70 | Extract the file path and line number strings from the Result. 71 | 72 | Tools store this in different ways, so this function tries a few different JSON locations. 73 | """ 74 | file_path = None 75 | line_number = None 76 | locations = result.get("locations", []) 77 | if locations and isinstance(locations, list): 78 | location = locations[0] 79 | physical_location = location.get("physicalLocation", {}) 80 | # SpotBugs has some errors with no line number so deal with them by just leaving it at 1 81 | line_number = physical_location.get("region", {}).get("startLine", None) 82 | # For file name, first try the location written by DevSkim 83 | file_path = ( 84 | location.get("physicalLocation", {}) 85 | .get("address", {}) 86 | .get("fullyQualifiedName", None) 87 | ) 88 | if not file_path: 89 | # Next try the physical location written by MobSF and by SpotBugs (for some errors) 90 | file_path = ( 91 | location.get("physicalLocation", {}) 92 | .get("artifactLocation", {}) 93 | .get("uri", None) 94 | ) 95 | if not file_path: 96 | logical_locations = location.get("logicalLocations", None) 97 | if logical_locations: 98 | # Finally, try the logical location written by SpotBugs for some errors 99 | file_path = logical_locations[0].get("fullyQualifiedName", None) 100 | return (file_path, line_number) 101 | 102 | 103 | def read_result_rule(result, run) -> Tuple[Union[dict, None], int]: 104 | """ 105 | Returns the corresponding rule object for the specified result, plus its index 106 | in the rules array. Follows the rules at 107 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790895 108 | """ 109 | ruleIndex = result.get("ruleIndex") 110 | ruleId = result.get("ruleId") 111 | rule = result.get("rule") 112 | 113 | if rule: 114 | if ruleIndex is None: 115 | ruleIndex = rule.get("index") 116 | 117 | if ruleId is None: 118 | ruleId = rule.get("id") 119 | 120 | rules = run.get("tool", {}).get("driver", {}).get("rules", []) 121 | 122 | if ruleIndex is not None and ruleIndex >= 0 and ruleIndex < len(rules): 123 | return (rules[ruleIndex], ruleIndex) 124 | 125 | if ruleId: 126 | for i, rule in enumerate(rules): 127 | if rule.get("id") == ruleId: 128 | return (rule, i) 129 | 130 | return (None, -1) 131 | 132 | 133 | def read_result_invocation(result, run): 134 | """ 135 | Extract the invocation metadata for the result, following the rules at 136 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790917 137 | """ 138 | invocationIndex = result.get("provenance", {}).get("invocationIndex") 139 | if invocationIndex is None: 140 | return None 141 | 142 | invocations = run.get("invocations") 143 | 144 | if invocations and invocationIndex >= 0 and invocationIndex < len(invocations): 145 | return invocations[invocationIndex] 146 | 147 | return None 148 | 149 | 150 | def read_result_severity(result, run) -> Literal["none", "note", "warning", "error"]: 151 | """ 152 | Extract the severity level from the result following the rules at 153 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790898 154 | """ 155 | severity = result.get("level") 156 | if severity: 157 | return severity 158 | 159 | # If kind has any value other than "fail", then if level is absent, 160 | # it SHALL default to "none" 161 | kind = result.get("kind", "fail") 162 | if kind and kind != "fail": 163 | return "none" 164 | 165 | # If kind has the value "fail" and level is absent, then... 166 | rule, ruleIndex = read_result_rule(result, run) 167 | if rule: 168 | # Honor the invocation's configuration override if present... 169 | invocation = read_result_invocation(result, run) 170 | if invocation: 171 | ruleConfigurationOverrides = invocation.get( 172 | "ruleConfigurationOverrides", [] 173 | ) 174 | override = next( 175 | ( 176 | override 177 | for override in ruleConfigurationOverrides 178 | if override.get("descriptor", {}).get("id") == rule.get("id") 179 | or override.get("descriptor", {}).get("index") == ruleIndex 180 | ), 181 | None, 182 | ) 183 | 184 | if override: 185 | overrideLevel = override.get("configuration", {}).get("level") 186 | if overrideLevel: 187 | return overrideLevel 188 | 189 | # Otherwise, use the rule's default configuraiton if present... 190 | defaultConfiguration = rule.get("defaultConfiguration") 191 | if defaultConfiguration: 192 | severity = defaultConfiguration.get("level") 193 | if severity: 194 | return severity 195 | 196 | # Otherwise, fall back to warning 197 | return "warning" 198 | 199 | 200 | def record_sort_key(record: dict) -> str: 201 | """Get a sort key for the record.""" 202 | return ( 203 | combine_record_code_and_description(record) 204 | + record["Location"] 205 | + str(record["Line"]).zfill(6) 206 | ) 207 | -------------------------------------------------------------------------------- /sarif/filter/general_filter.py: -------------------------------------------------------------------------------- 1 | """ 2 | SARIF file filtering functionality. 3 | """ 4 | 5 | import os 6 | import re 7 | from typing import Optional, List 8 | 9 | import copy 10 | import jsonpath_ng.ext 11 | import yaml 12 | 13 | from sarif import sarif_file_utils 14 | from sarif.filter.filter_stats import FilterStats, load_filter_stats_from_json 15 | 16 | # Commonly used properties can be specified using shortcuts 17 | # instead of full JSON path 18 | FILTER_SHORTCUTS = { 19 | "author": "properties.blame.author", 20 | "author-mail": "properties.blame.author-mail", 21 | "committer": "properties.blame.committer", 22 | "committer-mail": "properties.blame.committer-mail", 23 | "location": "locations[*].physicalLocation.artifactLocation.uri", 24 | "rule": "ruleId", 25 | "suppression": "suppressions[*].kind", 26 | } 27 | 28 | # Some properties can have specific shortcuts to make it easier to write filters 29 | # For example a file location can be specified using wildcards 30 | FIELDS_REGEX_SHORTCUTS = {"uri": {"**": ".*", "*": "[^/]*", "?": "."}} 31 | 32 | # Default configuration for all filters 33 | DEFAULT_CONFIGURATION = { 34 | "default-include": True, 35 | "check-line-number": True, 36 | } 37 | 38 | 39 | def _get_filter_function(filter_spec): 40 | """Return a filter function for the given specification.""" 41 | if filter_spec: 42 | filter_len = len(filter_spec) 43 | if filter_len > 2 and filter_spec.startswith("/") and filter_spec.endswith("/"): 44 | regex = filter_spec[1:-1] 45 | return lambda value: re.search(regex, value, re.IGNORECASE) 46 | substring = filter_spec 47 | # substring can be empty, in this case "in" returns true 48 | # and only existence of the property checked. 49 | return lambda value: substring in value 50 | return lambda value: True 51 | 52 | 53 | def _convert_glob_to_regex(property_name, property_value_spec): 54 | # skip if property_value_spec is a regex 55 | if property_value_spec and not ( 56 | property_value_spec.startswith("/") and property_value_spec.endswith("/") 57 | ): 58 | # get last component of property name 59 | last_component = property_name.split(".")[-1] 60 | if last_component in FIELDS_REGEX_SHORTCUTS: 61 | shortcuts = FIELDS_REGEX_SHORTCUTS[last_component] 62 | regex = re.compile("|".join(map(re.escape, shortcuts.keys()))) 63 | property_value_spec = regex.sub( 64 | lambda match: shortcuts[match.group(0)], property_value_spec 65 | ) 66 | 67 | return f"/{property_value_spec}/" 68 | return property_value_spec 69 | 70 | 71 | class PropertyFilter: 72 | """ 73 | Class that represents a filter term ready for efficient use. 74 | """ 75 | 76 | def __init__(self, prop_path, prop_value_spec, global_configuration): 77 | """ 78 | Compile a filter property. See README for the filter spec format. 79 | 80 | :param prop_path: JsonPath or preset. 81 | :param prop_value_spec: Value spec. 82 | :param global_configuration: Global configuration of the filter. 83 | """ 84 | self.prop_path = prop_path 85 | resolved_prop_path = FILTER_SHORTCUTS.get(prop_path, prop_path) 86 | self.jsonpath_expr = jsonpath_ng.ext.parse(resolved_prop_path) 87 | 88 | # if prop_value_spec is a dict, update filter configuration from it 89 | if isinstance(prop_value_spec, dict): 90 | self.filter_configuration = copy.deepcopy(global_configuration) 91 | for config_key, config_value in prop_value_spec.items(): 92 | if config_key != "value": 93 | self.filter_configuration[config_key] = config_value 94 | # actual value for the filter is in "value" key 95 | prop_value_spec = prop_value_spec.get("value", "") 96 | else: 97 | self.filter_configuration = global_configuration 98 | value_spec = _convert_glob_to_regex(resolved_prop_path, prop_value_spec) 99 | self.filter_function = _get_filter_function(value_spec) 100 | 101 | 102 | class MultiPropertyFilter: 103 | """ 104 | Class representing a list of PropertyFilter objects. 105 | 106 | These are combined using AND to filter results. 107 | """ 108 | 109 | def __init__(self, filter_spec: List[dict], global_filter_configuration: dict): 110 | """ 111 | Initialise from a filter spec. 112 | 113 | See README for filter spec format. It's a list of property paths and values to be 114 | combined with AND to form a filter. 115 | """ 116 | self.filter_spec = filter_spec 117 | self.and_terms = [ 118 | PropertyFilter(prop_path, prop_value_spec, global_filter_configuration) 119 | for prop_path, prop_value_spec in filter_spec.items() 120 | ] 121 | 122 | 123 | def _compile_filters( 124 | filters: List[dict], global_filter_configuration: dict 125 | ) -> List[MultiPropertyFilter]: 126 | return [ 127 | MultiPropertyFilter(filter_spec, global_filter_configuration) 128 | for filter_spec in filters 129 | if filter_spec 130 | ] 131 | 132 | 133 | class GeneralFilter: 134 | """ 135 | Class that implements filtering. 136 | """ 137 | 138 | def __init__(self): 139 | self.filter_stats = None 140 | self.include_filters = {} 141 | self.apply_inclusion_filter = False 142 | self.exclude_filters = {} 143 | self.apply_exclusion_filter = False 144 | self.configuration = copy.deepcopy(DEFAULT_CONFIGURATION) 145 | 146 | def init_filter( 147 | self, filter_description, configuration, include_filters, exclude_filters 148 | ): 149 | """ 150 | Initialise the filter with the given filter patterns. 151 | """ 152 | self.filter_stats = FilterStats(filter_description) 153 | self.configuration.update(configuration) 154 | self.include_filters = _compile_filters(include_filters, self.configuration) 155 | self.apply_inclusion_filter = len(include_filters) > 0 156 | self.exclude_filters = _compile_filters(exclude_filters, self.configuration) 157 | self.apply_exclusion_filter = len(exclude_filters) > 0 158 | 159 | def rehydrate_filter_stats(self, dehydrated_filter_stats, filter_datetime): 160 | """ 161 | Restore filter stats from the SARIF file directly, 162 | where they were recorded when the filter was previously run. 163 | 164 | Note that if init_filter is called, 165 | these rehydrated stats are discarded. 166 | """ 167 | self.filter_stats = load_filter_stats_from_json(dehydrated_filter_stats) 168 | self.filter_stats.filter_datetime = filter_datetime 169 | 170 | def _zero_counts(self): 171 | if self.filter_stats: 172 | self.filter_stats.reset_counters() 173 | 174 | def _filter_append(self, filtered_results: List[dict], result: dict): 175 | # Remove any existing filter log on the result 176 | result.setdefault("properties", {}).pop("filtered", None) 177 | 178 | if self.apply_inclusion_filter: 179 | included_stats = self._filter_result(result, self.include_filters) 180 | if not included_stats["matchedFilter"]: 181 | # Result is excluded by dint of not being included 182 | self.filter_stats.filtered_out_result_count += 1 183 | return 184 | else: 185 | # no inclusion filters, mark the result as included so far 186 | included_stats = {"state": "included", "matchedFilter": []} 187 | 188 | if self.apply_exclusion_filter: 189 | excluded_stats = self._filter_result(result, self.exclude_filters) 190 | if excluded_stats["matchedFilter"]: 191 | self.filter_stats.filtered_out_result_count += 1 192 | return 193 | 194 | included_state = included_stats["state"] 195 | if included_state == "included": 196 | self.filter_stats.filtered_in_result_count += 1 197 | elif included_state == "noLineNumber": 198 | self.filter_stats.unconvincing_line_number_count += 1 199 | else: 200 | self.filter_stats.missing_property_count += 1 201 | included_stats["filter"] = self.filter_stats.filter_description 202 | result["properties"]["filtered"] = included_stats 203 | 204 | filtered_results.append(result) 205 | 206 | def _filter_result(self, result: dict, filters: List[MultiPropertyFilter]) -> dict: 207 | matched_filters = [] 208 | warnings = [] 209 | (_file_path, line_number) = sarif_file_utils.read_result_location(result) 210 | unconvincing_line_number = line_number == "1" or not line_number 211 | default_include_noprop = False 212 | 213 | if filters: 214 | # filters contain rules which treated as OR. 215 | # if any rule matches, the record is selected. 216 | for mpf in filters: 217 | # filter_spec contains rules which treated as AND. 218 | # all rules must match to select the record. 219 | matched = True 220 | for property_filter in mpf.and_terms: 221 | if ( 222 | property_filter.filter_configuration.get( 223 | "check-line-number", True 224 | ) 225 | and unconvincing_line_number 226 | ): 227 | warnings.append( 228 | f"Field '{property_filter.prop_path}' not checked due to " 229 | "missing line number information" 230 | ) 231 | continue 232 | found_results = property_filter.jsonpath_expr.find(result) 233 | if found_results: 234 | value = found_results[0].value 235 | if property_filter.filter_function(value): 236 | continue 237 | else: 238 | # property to filter on is not found, or skipped due to invalid line number. 239 | # if "default-include" is true, include the "result" with a warning. 240 | if property_filter.filter_configuration.get( 241 | "default-include", True 242 | ): 243 | warnings.append( 244 | f"Field '{property_filter.prop_path}' is missing but " 245 | "the result included as default-include is true" 246 | ) 247 | default_include_noprop = True 248 | continue 249 | matched = False 250 | break 251 | if matched: 252 | matched_filters.append(mpf.filter_spec) 253 | break 254 | 255 | stats = { 256 | "state": "included", 257 | "matchedFilter": matched_filters, 258 | } 259 | 260 | if warnings: 261 | stats.update( 262 | { 263 | "state": "noProperty" if default_include_noprop else "noLineNumber", 264 | "warnings": warnings, 265 | } 266 | ) 267 | 268 | return stats 269 | 270 | def filter_results(self, results: List[dict]) -> List[dict]: 271 | """ 272 | Apply this filter to a list of results, 273 | return the results that pass the filter 274 | and as a side-effect, update the filter stats. 275 | """ 276 | if self.apply_inclusion_filter or self.apply_exclusion_filter: 277 | self._zero_counts() 278 | ret = [] 279 | for result in results: 280 | self._filter_append(ret, result) 281 | return ret 282 | # No inclusion or exclusion patterns 283 | return results 284 | 285 | def get_filter_stats(self) -> Optional[FilterStats]: 286 | """ 287 | Get the statistics from running this filter. 288 | """ 289 | return self.filter_stats 290 | 291 | 292 | def load_filter_file(file_path): 293 | """ 294 | Load a YAML filter file, return the filter description and the filters. 295 | """ 296 | try: 297 | file_name = os.path.basename(file_path) 298 | with open(file_path, encoding="utf-8") as file_in: 299 | yaml_content = yaml.safe_load(file_in) 300 | filter_description = yaml_content.get("description", file_name) 301 | configuration = yaml_content.get("configuration", {}) 302 | include_filters = yaml_content.get("include", {}) 303 | exclude_filters = yaml_content.get("exclude", {}) 304 | except yaml.YAMLError as error: 305 | raise IOError(f"Cannot read filter file {file_path}") from error 306 | return filter_description, configuration, include_filters, exclude_filters 307 | -------------------------------------------------------------------------------- /tests/test_general_filter.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sarif.filter.general_filter import GeneralFilter, load_filter_file 3 | from sarif.filter.filter_stats import load_filter_stats_from_json 4 | 5 | 6 | class TestGeneralFilter: 7 | def test_init_filter(self): 8 | gf = GeneralFilter() 9 | 10 | gf.init_filter( 11 | "test filter", 12 | {}, 13 | [{"author": "John Doe"}], 14 | [{"suppression": "not a suppression"}], 15 | ) 16 | assert gf.filter_stats.filter_description == "test filter" 17 | assert len(gf.include_filters[0].and_terms) == 1 18 | assert gf.include_filters[0].and_terms[0].prop_path == "author" 19 | assert gf.apply_inclusion_filter is True 20 | assert len(gf.exclude_filters[0].and_terms) == 1 21 | assert gf.exclude_filters[0].and_terms[0].prop_path == "suppression" 22 | assert gf.apply_exclusion_filter is True 23 | 24 | def test_init_filter_no_value(self): 25 | gf = GeneralFilter() 26 | 27 | gf.init_filter( 28 | "test filter", 29 | {}, 30 | [{"author": {"default-include": False}}], # forgot "value" 31 | [], 32 | ) 33 | assert gf.filter_stats.filter_description == "test filter" 34 | assert len(gf.include_filters[0].and_terms) == 1 35 | assert gf.include_filters[0].and_terms[0].prop_path == "author" 36 | assert gf.apply_inclusion_filter is True 37 | assert not gf.exclude_filters 38 | 39 | def test_rehydrate_filter_stats(self): 40 | gf = GeneralFilter() 41 | dehydrated_filter_stats = { 42 | "filter": "test filter", 43 | "in": 10, 44 | "out": 5, 45 | "default": {"noProperty": 3}, 46 | } 47 | gf.rehydrate_filter_stats(dehydrated_filter_stats, "2022-01-01T00:00:00Z") 48 | assert gf.filter_stats.filtered_in_result_count == 10 49 | assert gf.filter_stats.filtered_out_result_count == 5 50 | assert gf.filter_stats.missing_property_count == 3 51 | assert gf.filter_stats.filter_datetime == "2022-01-01T00:00:00Z" 52 | 53 | def test_zero_counts(self): 54 | gf = GeneralFilter() 55 | gf.filter_stats = load_filter_stats_from_json( 56 | {"filter": "test filter", "in": 10, "out": 5, "default": {"noProperty": 3}} 57 | ) 58 | 59 | gf._zero_counts() 60 | assert gf.filter_stats.filtered_in_result_count == 0 61 | assert gf.filter_stats.filtered_out_result_count == 0 62 | assert gf.filter_stats.missing_property_count == 0 63 | 64 | def test_filter_append_include(self): 65 | general_filter = GeneralFilter() 66 | general_filter.init_filter( 67 | "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], [] 68 | ) 69 | result = {"ruleId": "test-rule"} 70 | 71 | filtered_results = general_filter.filter_results([result]) 72 | assert len(filtered_results) == 1 73 | assert filtered_results[0] == result 74 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included" 75 | assert general_filter.filter_stats.filtered_in_result_count == 1 76 | assert general_filter.filter_stats.filtered_out_result_count == 0 77 | assert general_filter.filter_stats.missing_property_count == 0 78 | 79 | def test_filter_append_exclude(self): 80 | general_filter = GeneralFilter() 81 | general_filter.init_filter("test filter", {}, [], [{"level": "error"}]) 82 | result = {"level": "error"} 83 | 84 | filtered_results = general_filter.filter_results([result]) 85 | assert len(filtered_results) == 0 86 | assert "filtered" not in result 87 | assert general_filter.filter_stats.filtered_in_result_count == 0 88 | assert general_filter.filter_stats.filtered_out_result_count == 1 89 | assert general_filter.filter_stats.missing_property_count == 0 90 | 91 | def test_filter_append_no_filters(self): 92 | general_filter = GeneralFilter() 93 | general_filter.init_filter("test filter", {"check-line-number": False}, [], []) 94 | result = {"ruleId": "test-rule"} 95 | 96 | filtered_results = general_filter.filter_results([result]) 97 | assert len(filtered_results) == 1 98 | assert filtered_results[0] == result 99 | assert "filtered" not in result 100 | 101 | def test_filter_results_match(self): 102 | general_filter = GeneralFilter() 103 | general_filter.init_filter( 104 | "test filter", 105 | {"check-line-number": False}, 106 | [{"ruleId": "test-rule"}, {"level": "error"}], 107 | [], 108 | ) 109 | result = {"ruleId": "test-rule", "level": "error"} 110 | 111 | filtered_results = general_filter.filter_results([result]) 112 | assert len(filtered_results) == 1 113 | assert filtered_results[0] == result 114 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included" 115 | assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [ 116 | {"ruleId": "test-rule"} 117 | ] 118 | assert "warnings" not in filtered_results[0]["properties"]["filtered"] 119 | assert general_filter.filter_stats.filtered_in_result_count == 1 120 | assert general_filter.filter_stats.filtered_out_result_count == 0 121 | assert general_filter.filter_stats.missing_property_count == 0 122 | 123 | def test_filter_results_no_match(self): 124 | general_filter = GeneralFilter() 125 | general_filter.init_filter( 126 | "test filter", 127 | {"check-line-number": False}, 128 | [{"ruleId": "other-rule"}, {"level": "warning"}], 129 | [], 130 | ) 131 | result = {"ruleId": "test-rule", "level": "error"} 132 | 133 | filtered_results = general_filter.filter_results([result]) 134 | assert len(filtered_results) == 0 135 | 136 | def test_filter_results_regex(self): 137 | general_filter = GeneralFilter() 138 | rule = {"properties.blame.author-mail": "/myname\\..*\\.com/"} 139 | general_filter.init_filter( 140 | "test filter", 141 | {"check-line-number": True}, 142 | [rule], 143 | [], 144 | ) 145 | result = { 146 | "ruleId": "test-rule", 147 | "properties": {"blame": {"author-mail": "user@myname.example.com"}}, 148 | "locations": [{"physicalLocation": {"region": {"startLine": "123"}}}], 149 | } 150 | 151 | filtered_results = general_filter.filter_results([result]) 152 | assert len(filtered_results) == 1 153 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included" 154 | assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [rule] 155 | assert "warnings" not in filtered_results[0]["properties"]["filtered"] 156 | 157 | def test_filter_results_regex_guid(self): 158 | general_filter = GeneralFilter() 159 | guid_rule = { 160 | "properties.blame.author-mail": "/[0-9A-F]{8}[-][0-9A-F]{4}[-][0-9A-F]{4}" 161 | + "[-][0-9A-F]{4}[-][0-9A-F]{12}/" 162 | } 163 | general_filter.init_filter( 164 | "test filter", 165 | {"check-line-number": False}, 166 | [guid_rule], 167 | [], 168 | ) 169 | result = { 170 | "ruleId": "test-rule", 171 | "properties": { 172 | "blame": {"author-mail": "AAAAA1234ABCD-FEDC-BA09-8765-4321ABCDEF90"} 173 | }, 174 | } 175 | 176 | filtered_results = general_filter.filter_results([result]) 177 | assert len(filtered_results) == 1 178 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included" 179 | assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [ 180 | guid_rule 181 | ] 182 | assert "warnings" not in filtered_results[0]["properties"]["filtered"] 183 | 184 | def test_filter_results_existence_only(self): 185 | general_filter = GeneralFilter() 186 | general_filter.init_filter( 187 | "test filter", {"check-line-number": False}, [], [{"suppression": {}}] 188 | ) 189 | result = {"ruleId": "test-rule", "suppressions": [{"kind": "inSource"}]} 190 | 191 | filtered_results = general_filter.filter_results([result]) 192 | assert len(filtered_results) == 0 193 | 194 | def test_filter_results_match_default_include_default_configuration(self): 195 | general_filter = GeneralFilter() 196 | general_filter.init_filter( 197 | "test filter", {"check-line-number": False}, [{"level": "error"}], [] 198 | ) 199 | result = {"ruleId": "test-rule"} 200 | 201 | filtered_results = general_filter.filter_results([result]) 202 | assert len(filtered_results) == 1 203 | assert filtered_results[0] == result 204 | assert filtered_results[0]["properties"]["filtered"]["state"] == "noProperty" 205 | assert filtered_results[0]["properties"]["filtered"]["warnings"] == [ 206 | "Field 'level' is missing but the result included as default-include is true" 207 | ] 208 | assert general_filter.filter_stats.filtered_in_result_count == 0 209 | assert general_filter.filter_stats.filtered_out_result_count == 0 210 | assert general_filter.filter_stats.missing_property_count == 1 211 | 212 | def test_filter_results_check_line_number(self): 213 | general_filter = GeneralFilter() 214 | general_filter.init_filter("test filter", {}, [{"level": "error"}], []) 215 | result = { 216 | "ruleId": "test-rule", 217 | "locations": [{"physicalLocation": {"region": {"startLine": "1"}}}], 218 | } 219 | 220 | filtered_results = general_filter.filter_results([result]) 221 | assert len(filtered_results) == 1 222 | assert filtered_results[0] == result 223 | assert filtered_results[0]["properties"]["filtered"]["state"] == "noLineNumber" 224 | assert filtered_results[0]["properties"]["filtered"]["warnings"] == [ 225 | "Field 'level' not checked due to missing line number information" 226 | ] 227 | assert general_filter.filter_stats.filtered_in_result_count == 0 228 | assert general_filter.filter_stats.filtered_out_result_count == 0 229 | assert general_filter.filter_stats.missing_property_count == 0 230 | assert general_filter.filter_stats.unconvincing_line_number_count == 1 231 | 232 | def test_filter_results_match_default_include_rule_override(self): 233 | general_filter = GeneralFilter() 234 | general_filter.init_filter( 235 | "test filter", 236 | {"check-line-number": False}, 237 | [{"level": {"value": "error", "default-include": False}}], 238 | [], 239 | ) 240 | result = {"ruleId": "test-rule"} 241 | 242 | filtered_results = general_filter.filter_results([result]) 243 | assert len(filtered_results) == 0 244 | assert general_filter.filter_stats.filtered_in_result_count == 0 245 | # Filtered out because not filtered in 246 | assert general_filter.filter_stats.filtered_out_result_count == 1 247 | assert general_filter.filter_stats.missing_property_count == 0 248 | 249 | SHORTCUTS_TEST_PARAMS = [ 250 | ({"author": "John Smith"}, {"properties": {"blame": {"author": "John Smith"}}}), 251 | ( 252 | {"author-mail": "john.smith@example.com"}, 253 | {"properties": {"blame": {"author-mail": "john.smith@example.com"}}}, 254 | ), 255 | ( 256 | {"committer-mail": "john.smith@example.com"}, 257 | {"properties": {"blame": {"committer-mail": "john.smith@example.com"}}}, 258 | ), 259 | ( 260 | {"location": "test.cpp"}, 261 | { 262 | "locations": [ 263 | {"physicalLocation": {"artifactLocation": {"uri": "test.cpp"}}} 264 | ] 265 | }, 266 | ), 267 | ({"rule": "rule1"}, {"ruleId": "rule1"}), 268 | ({"suppression": "inSource"}, {"suppressions": [{"kind": "inSource"}]}), 269 | ] 270 | 271 | @pytest.mark.parametrize("shortcut_filter,result", SHORTCUTS_TEST_PARAMS) 272 | def test_filter_results_shortcuts(self, shortcut_filter, result): 273 | general_filter = GeneralFilter() 274 | general_filter.init_filter( 275 | "test filter", {"check-line-number": False}, [shortcut_filter], [] 276 | ) 277 | 278 | filtered_results = general_filter.filter_results([result]) 279 | assert len(filtered_results) == 1 280 | assert filtered_results[0] == result 281 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included" 282 | assert "warnings" not in filtered_results[0]["properties"]["filtered"] 283 | 284 | def test_filter_results_include(self): 285 | general_filter = GeneralFilter() 286 | general_filter.init_filter( 287 | "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], [] 288 | ) 289 | results = [{"ruleId": "test-rule"}] * 10 290 | 291 | filtered_results = general_filter.filter_results(results) 292 | assert len(filtered_results) == 10 293 | assert all(result in filtered_results for result in results) 294 | assert general_filter.filter_stats.filtered_in_result_count == 10 295 | assert general_filter.filter_stats.filtered_out_result_count == 0 296 | assert general_filter.filter_stats.missing_property_count == 0 297 | 298 | def test_filter_results_exclude(self): 299 | general_filter = GeneralFilter() 300 | general_filter.init_filter( 301 | "test filter", {"check-line-number": False}, [], [{"level": "error"}] 302 | ) 303 | results = [{"level": "error"}] * 10 304 | 305 | filtered_results = general_filter.filter_results(results) 306 | assert len(filtered_results) == 0 307 | assert general_filter.filter_stats.filtered_in_result_count == 0 308 | assert general_filter.filter_stats.filtered_out_result_count == 10 309 | assert general_filter.filter_stats.missing_property_count == 0 310 | 311 | def test_filter_results_exclude_not_all(self): 312 | general_filter = GeneralFilter() 313 | general_filter.init_filter( 314 | "test filter", {"check-line-number": False}, [], [{"level": "error"}] 315 | ) 316 | results = [{"level": "error"}, {"level": "warning"}, {"level": "error"}] 317 | 318 | filtered_results = general_filter.filter_results(results) 319 | assert len(filtered_results) == 1 320 | assert general_filter.filter_stats.filtered_in_result_count == 1 321 | assert general_filter.filter_stats.filtered_out_result_count == 2 322 | assert general_filter.filter_stats.missing_property_count == 0 323 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included" 324 | assert len(filtered_results[0]["properties"]["filtered"]["matchedFilter"]) == 0 325 | 326 | def test_filter_results_no_filters(self): 327 | general_filter = GeneralFilter() 328 | general_filter.init_filter("test filter", {"check-line-number": False}, [], []) 329 | results = [{"ruleId": "test-rule"}] * 10 330 | 331 | filtered_results = general_filter.filter_results(results) 332 | assert len(filtered_results) == 10 333 | assert all(result in filtered_results for result in results) 334 | assert general_filter.filter_stats.filtered_in_result_count == 0 335 | assert general_filter.filter_stats.filtered_out_result_count == 0 336 | assert general_filter.filter_stats.missing_property_count == 0 337 | 338 | def test_get_filter_stats(self): 339 | general_filter = GeneralFilter() 340 | general_filter.init_filter( 341 | "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], [] 342 | ) 343 | results = [{"ruleId": "test-rule"}] * 10 344 | 345 | general_filter.filter_results(results) 346 | filter_stats = general_filter.get_filter_stats() 347 | assert filter_stats.filtered_in_result_count == 10 348 | assert filter_stats.filtered_out_result_count == 0 349 | assert filter_stats.missing_property_count == 0 350 | 351 | def test_load_filter_file(self): 352 | file_path = "test_filter.yaml" 353 | filter_description = "Test filter" 354 | include_filters = {"ruleId": "test-rule"} 355 | exclude_filters = {"level": "error"} 356 | with open(file_path, "w") as f: 357 | f.write(f"description: {filter_description}\n") 358 | f.write(f"include:\n ruleId: {include_filters['ruleId']}\n") 359 | f.write(f"exclude:\n level: {exclude_filters['level']}\n") 360 | 361 | loaded_filter = load_filter_file(file_path) 362 | assert loaded_filter == ( 363 | filter_description, 364 | {}, 365 | include_filters, 366 | exclude_filters, 367 | ) 368 | 369 | def test_load_filter_file_with_configuration(self): 370 | file_path = "test_filter.yaml" 371 | filter_description = "Test filter" 372 | configuration = {"default-include": True} 373 | include_filters = {"ruleId": "test-rule"} 374 | exclude_filters = {"level": "error"} 375 | with open(file_path, "w") as f: 376 | f.write(f"description: {filter_description}\n") 377 | f.write("configuration:\n default-include: true\n") 378 | f.write(f"include:\n ruleId: {include_filters['ruleId']}\n") 379 | f.write(f"exclude:\n level: {exclude_filters['level']}\n") 380 | 381 | loaded_filter = load_filter_file(file_path) 382 | assert loaded_filter == ( 383 | filter_description, 384 | configuration, 385 | include_filters, 386 | exclude_filters, 387 | ) 388 | 389 | def test_load_filter_file_wrong_format(self): 390 | file_path = "test_filter.yaml" 391 | filter_description = "Test filter" 392 | with open(file_path, "w") as f: 393 | f.write(f"description: {filter_description}\n") 394 | f.write("include\n") 395 | f.write("exclude\n") 396 | 397 | with pytest.raises(IOError) as io_error: 398 | load_filter_file(file_path) 399 | assert str(io_error.value) == f"Cannot read filter file {file_path}" 400 | -------------------------------------------------------------------------------- /sarif/cmdline/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Program entry point for sarif-tools on the command line. 3 | """ 4 | 5 | import argparse 6 | import os 7 | import sys 8 | 9 | from sarif import loader, sarif_file, __version__ as SARIF_TOOLS_PACKAGE_VERSION 10 | from sarif.filter.general_filter import load_filter_file 11 | 12 | from sarif.operations import ( 13 | blame_op, 14 | codeclimate_op, 15 | copy_op, 16 | csv_op, 17 | diff_op, 18 | html_op, 19 | emacs_op, 20 | info_op, 21 | ls_op, 22 | summary_op, 23 | trend_op, 24 | upgrade_filter_op, 25 | word_op, 26 | ) 27 | 28 | 29 | def main(): 30 | """ 31 | Entry point function. 32 | """ 33 | args, unknown_args = ARG_PARSER.parse_known_args() 34 | 35 | if args.debug: 36 | _print_version() 37 | print(f"Running code from {__file__}") 38 | known_args_summary = ", ".join( 39 | f"{key}={getattr(args, key)}" for key in vars(args) 40 | ) 41 | print(f"Known arguments: {known_args_summary}") 42 | if args.version: 43 | return 0 44 | elif args.version: 45 | _print_version() 46 | return 0 47 | 48 | if unknown_args: 49 | if any( 50 | unknown_arg.startswith("--blame-filter") 51 | or unknown_arg.startswith("-b=") 52 | or unknown_arg == "-b" 53 | for unknown_arg in unknown_args 54 | ): 55 | print("ERROR: --blame-filter was removed in v2.0.0.") 56 | print( 57 | "Run the upgrade-filter command to convert your blame filter to the new filter format, then pass via --filter option." 58 | ) 59 | args = ARG_PARSER.parse_args() 60 | 61 | exitcode = args.func(args) 62 | return exitcode 63 | 64 | 65 | def _create_arg_parser(): 66 | cmd_list = "commands:\n" 67 | max_cmd_length = max(len(cmd) for cmd in _COMMANDS) 68 | col_width = max_cmd_length + 2 69 | for cmd, cmd_attributes in _COMMANDS.items(): 70 | cmd_list += cmd.ljust(col_width) + cmd_attributes["desc"] + "\n" 71 | cmd_list += "Run `sarif --help` for command-specific help." 72 | parser = argparse.ArgumentParser( 73 | prog="sarif", 74 | description="Process sets of SARIF files", 75 | epilog=cmd_list, 76 | formatter_class=argparse.RawDescriptionHelpFormatter, 77 | ) 78 | parser.set_defaults(func=_usage_command) 79 | subparsers = parser.add_subparsers(dest="command", help="command") 80 | subparser = {} 81 | for cmd, cmd_attributes in _COMMANDS.items(): 82 | subparser[cmd] = subparsers.add_parser(cmd, description=cmd_attributes["desc"]) 83 | subparser[cmd].set_defaults(func=cmd_attributes["fn"]) 84 | 85 | # Common options 86 | parser.add_argument("--version", "-v", action="store_true") 87 | parser.add_argument( 88 | "--debug", action="store_true", help="Print information useful for debugging" 89 | ) 90 | parser.add_argument( 91 | "--check", 92 | "-x", 93 | type=str, 94 | choices=sarif_file.SARIF_SEVERITIES_WITH_NONE, 95 | help="Exit with error code if there are any issues of the specified level " 96 | + "(or for diff, an increase in issues at that level).", 97 | ) 98 | 99 | for cmd in [ 100 | "blame", 101 | "codeclimate", 102 | "csv", 103 | "html", 104 | "emacs", 105 | "summary", 106 | "word", 107 | "upgrade-filter", 108 | ]: 109 | subparser[cmd].add_argument( 110 | "--output", "-o", type=str, metavar="PATH", help="Output file or directory" 111 | ) 112 | for cmd in ["copy", "diff", "info", "ls", "trend", "usage"]: 113 | subparser[cmd].add_argument( 114 | "--output", "-o", type=str, metavar="FILE", help="Output file" 115 | ) 116 | 117 | for cmd in [ 118 | "codeclimate", 119 | "copy", 120 | "csv", 121 | "diff", 122 | "summary", 123 | "html", 124 | "emacs", 125 | "trend", 126 | "word", 127 | ]: 128 | subparser[cmd].add_argument( 129 | "--filter", 130 | "-b", 131 | type=str, 132 | metavar="FILE", 133 | help="Specify the filter file to apply. See README for format.", 134 | ) 135 | 136 | # Command-specific options 137 | subparser["blame"].add_argument( 138 | "--code", 139 | "-c", 140 | metavar="PATH", 141 | type=str, 142 | help="Path to git repository; if not specified, the current working directory is used", 143 | ) 144 | subparser["copy"].add_argument( 145 | "--timestamp", 146 | "-t", 147 | action="store_true", 148 | help='Append current timestamp to output filename in the "yyyymmddThhmmssZ" format used by ' 149 | "the `sarif trend` command", 150 | ) 151 | # codeclimate and csv default to no trimming 152 | for cmd in ["codeclimate", "csv"]: 153 | subparser[cmd].add_argument( 154 | "--autotrim", 155 | "-a", 156 | action="store_true", 157 | help="Strip off the common prefix of paths in the CSV output", 158 | ) 159 | # word and html default to trimming 160 | for cmd in ["html", "emacs", "word"]: 161 | subparser[cmd].add_argument( 162 | "--no-autotrim", 163 | "-n", 164 | action="store_true", 165 | help="Do not strip off the common prefix of paths in the output document", 166 | ) 167 | subparser[cmd].add_argument( 168 | "--image", 169 | type=str, 170 | help="Image to include at top of file - SARIF logo by default", 171 | ) 172 | # codeclimate, csv, html and word allow trimmable paths to be specified 173 | for cmd in ["codeclimate", "csv", "word", "html", "emacs"]: 174 | subparser[cmd].add_argument( 175 | "--trim", 176 | metavar="PREFIX", 177 | action="append", 178 | type=str, 179 | help="Prefix to strip from issue paths, e.g. the checkout directory on the build agent", 180 | ) 181 | # Most commands take an arbitrary list of SARIF files or directories 182 | for cmd in _COMMANDS: 183 | if cmd not in ["diff", "upgrade-filter", "usage", "version"]: 184 | subparser[cmd].add_argument( 185 | "files_or_dirs", 186 | metavar="file_or_dir", 187 | type=str, 188 | nargs="*", 189 | default=["."], 190 | help="A SARIF file or a directory containing SARIF files", 191 | ) 192 | subparser["diff"].add_argument( 193 | "old_file_or_dir", 194 | type=str, 195 | nargs=1, 196 | help="An old SARIF file or a directory containing the old SARIF files", 197 | ) 198 | subparser["diff"].add_argument( 199 | "new_file_or_dir", 200 | type=str, 201 | nargs=1, 202 | help="A new SARIF file or a directory containing the new SARIF files", 203 | ) 204 | 205 | subparser["trend"].add_argument( 206 | "--dateformat", 207 | "-f", 208 | type=str, 209 | choices=["dmy", "mdy", "ymd"], 210 | default="dmy", 211 | help="Date component order to use in output CSV. Default is `dmy`", 212 | ) 213 | 214 | subparser["upgrade-filter"].add_argument( 215 | "files_or_dirs", 216 | metavar="file", 217 | type=str, 218 | nargs="*", 219 | default=["."], 220 | help="A v1-style blame-filter file", 221 | ) 222 | 223 | return parser 224 | 225 | 226 | def _check(input_files: sarif_file.SarifFileSet, check_level): 227 | ret = 0 228 | if check_level: 229 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE: 230 | ret += input_files.get_report().get_issue_count_for_severity(severity) 231 | if severity == check_level: 232 | break 233 | if ret > 0: 234 | sys.stderr.write( 235 | f"Check: exiting with return code {ret} due to issues at or above {check_level} " 236 | "severity\n" 237 | ) 238 | return ret 239 | 240 | 241 | def _init_filtering(input_files, args): 242 | if args.filter: 243 | filters = load_filter_file(args.filter) 244 | input_files.init_general_filter(*filters) 245 | 246 | 247 | def _init_path_prefix_stripping(input_files, args, strip_by_default): 248 | if strip_by_default: 249 | autotrim = not args.no_autotrim 250 | else: 251 | autotrim = args.autotrim 252 | trim_paths = args.trim 253 | if autotrim or trim_paths: 254 | input_files.init_path_prefix_stripping(autotrim, trim_paths) 255 | 256 | 257 | def _ensure_dir(dir_path): 258 | """ 259 | Create directory if it does not exist 260 | """ 261 | if dir_path and not os.path.isdir(dir_path): 262 | os.makedirs(dir_path) 263 | 264 | 265 | def _prepare_output( 266 | input_files: sarif_file.SarifFileSet, output_arg, output_file_extension: str 267 | ): 268 | """ 269 | Returns (output, output_multiple_files) 270 | output is args.output, or if that wasn't specified, a default output file based on the inputs 271 | and the file extension. 272 | output_multiple_files determines whether to output one file per input plus a totals file. 273 | It is false if there is only one input file, or args.output is a file that exists, 274 | or args.output ends with the expected file extension. 275 | """ 276 | input_file_count = len(input_files) 277 | if input_file_count == 0: 278 | return ("static_analysis_output" + output_file_extension, False) 279 | if input_file_count == 1: 280 | derived_output_filename = ( 281 | input_files[0].get_file_name_without_extension() + output_file_extension 282 | ) 283 | if output_arg: 284 | if os.path.isdir(output_arg): 285 | return (os.path.join(output_arg, derived_output_filename), False) 286 | _ensure_dir(os.path.dirname(output_arg)) 287 | return (output_arg, False) 288 | return (derived_output_filename, False) 289 | # Multiple input files 290 | if output_arg: 291 | if os.path.isfile(output_arg) or output_arg.strip().upper().endswith( 292 | output_file_extension.upper() 293 | ): 294 | # Output single file, even though there are multiple input files. 295 | _ensure_dir(os.path.dirname(output_arg)) 296 | return (output_arg, False) 297 | _ensure_dir(output_arg) 298 | return (output_arg, True) 299 | return (os.getcwd(), True) 300 | 301 | 302 | ####################################### Command handlers ####################################### 303 | 304 | 305 | def _blame_command(args): 306 | input_files = loader.load_sarif_files(*args.files_or_dirs) 307 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".sarif") 308 | blame_op.enhance_with_blame( 309 | input_files, args.code or os.getcwd(), output, multiple_file_output 310 | ) 311 | return _check(input_files, args.check) 312 | 313 | 314 | def _codeclimate_command(args): 315 | input_files = loader.load_sarif_files(*args.files_or_dirs) 316 | input_files.init_default_line_number_1() 317 | _init_path_prefix_stripping(input_files, args, strip_by_default=False) 318 | _init_filtering(input_files, args) 319 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".json") 320 | codeclimate_op.generate(input_files, output, multiple_file_output) 321 | return _check(input_files, args.check) 322 | 323 | 324 | def _copy_command(args): 325 | input_files = loader.load_sarif_files(*args.files_or_dirs) 326 | _init_filtering(input_files, args) 327 | output = args.output or "out.sarif" 328 | output_sarif_file_set = copy_op.generate_sarif( 329 | input_files, 330 | output, 331 | args.timestamp, 332 | SARIF_TOOLS_PACKAGE_VERSION, 333 | " ".join(sys.argv), 334 | ) 335 | return _check(output_sarif_file_set, args.check) 336 | 337 | 338 | def _csv_command(args): 339 | input_files = loader.load_sarif_files(*args.files_or_dirs) 340 | input_files.init_default_line_number_1() 341 | _init_path_prefix_stripping(input_files, args, strip_by_default=False) 342 | _init_filtering(input_files, args) 343 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".csv") 344 | csv_op.generate_csv(input_files, output, multiple_file_output) 345 | return _check(input_files, args.check) 346 | 347 | 348 | def _diff_command(args): 349 | old_sarif = loader.load_sarif_files(args.old_file_or_dir[0]) 350 | new_sarif = loader.load_sarif_files(args.new_file_or_dir[0]) 351 | _init_filtering(old_sarif, args) 352 | _init_filtering(new_sarif, args) 353 | return diff_op.print_diff(old_sarif, new_sarif, args.output, args.check) 354 | 355 | 356 | def _html_command(args): 357 | input_files = loader.load_sarif_files(*args.files_or_dirs) 358 | input_files.init_default_line_number_1() 359 | _init_path_prefix_stripping(input_files, args, strip_by_default=True) 360 | _init_filtering(input_files, args) 361 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".html") 362 | html_op.generate_html(input_files, args.image, output, multiple_file_output) 363 | return _check(input_files, args.check) 364 | 365 | 366 | def _emacs_command(args): 367 | input_files = loader.load_sarif_files(*args.files_or_dirs) 368 | input_files.init_default_line_number_1() 369 | _init_path_prefix_stripping(input_files, args, strip_by_default=True) 370 | _init_filtering(input_files, args) 371 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".txt") 372 | emacs_op.generate_compile(input_files, output, multiple_file_output) 373 | return _check(input_files, args.check) 374 | 375 | 376 | def _info_command(args): 377 | input_files = loader.load_sarif_files(*args.files_or_dirs) 378 | info_op.generate_info(input_files, args.output) 379 | if args.check: 380 | return _check(input_files, args.check) 381 | return 0 382 | 383 | 384 | def _ls_command(args): 385 | ls_op.print_ls(args.files_or_dirs, args.output) 386 | if args.check: 387 | input_files = loader.load_sarif_files(*args.files_or_dirs) 388 | return _check(input_files, args.check) 389 | return 0 390 | 391 | 392 | def _summary_command(args): 393 | input_files = loader.load_sarif_files(*args.files_or_dirs) 394 | _init_filtering(input_files, args) 395 | (output, multiple_file_output) = (None, False) 396 | if args.output: 397 | (output, multiple_file_output) = _prepare_output( 398 | input_files, args.output, ".txt" 399 | ) 400 | summary_op.generate_summary(input_files, output, multiple_file_output) 401 | return _check(input_files, args.check) 402 | 403 | 404 | def _trend_command(args): 405 | input_files = loader.load_sarif_files(*args.files_or_dirs) 406 | input_files.init_default_line_number_1() 407 | _init_filtering(input_files, args) 408 | if args.output: 409 | _ensure_dir(os.path.dirname(args.output)) 410 | output = args.output 411 | else: 412 | output = "static_analysis_trend.csv" 413 | trend_op.generate_trend_csv(input_files, output, args.dateformat) 414 | return _check(input_files, args.check) 415 | 416 | 417 | def _upgrade_filter_command(args): 418 | old_filter_files = args.files_or_dirs 419 | single_output_file = None 420 | output_dir = None 421 | if len(old_filter_files) == 1: 422 | if args.output and os.path.isdir(args.output): 423 | output_dir = args.output 424 | else: 425 | single_output_file = args.output or old_filter_files[0] + ".yaml" 426 | elif args.output: 427 | output_dir = args.output 428 | else: 429 | output_dir = os.path.dirname(args.output) 430 | for old_filter_file in old_filter_files: 431 | output_file = single_output_file or os.path.join( 432 | output_dir, os.path.basename(old_filter_file) + ".yaml" 433 | ) 434 | upgrade_filter_op.upgrade_filter_file(old_filter_file, output_file) 435 | return 0 436 | 437 | 438 | def _usage_command(args): 439 | if hasattr(args, "output") and args.output: 440 | with open(args.output, "w", encoding="utf-8") as file_out: 441 | ARG_PARSER.print_help(file_out) 442 | print("Wrote usage instructions to", args.output) 443 | else: 444 | ARG_PARSER.print_help() 445 | if args.check: 446 | sys.stderr.write("Spurious --check argument") 447 | return 1 448 | return 0 449 | 450 | 451 | def _version_command(args): 452 | _print_version(not args.version) 453 | 454 | 455 | def _print_version(bare=False): 456 | print( 457 | SARIF_TOOLS_PACKAGE_VERSION 458 | if bare 459 | else f"SARIF tools v{SARIF_TOOLS_PACKAGE_VERSION}" 460 | ) 461 | 462 | 463 | def _word_command(args): 464 | input_files = loader.load_sarif_files(*args.files_or_dirs) 465 | input_files.init_default_line_number_1() 466 | _init_path_prefix_stripping(input_files, args, strip_by_default=True) 467 | _init_filtering(input_files, args) 468 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".docx") 469 | word_op.generate_word_docs_from_sarif_inputs( 470 | input_files, args.image, output, multiple_file_output 471 | ) 472 | return _check(input_files, args.check) 473 | 474 | 475 | _COMMANDS = { 476 | "blame": { 477 | "fn": _blame_command, 478 | "desc": "Enhance SARIF file with information from `git blame`", 479 | }, 480 | "codeclimate": { 481 | "fn": _codeclimate_command, 482 | "desc": "Write a JSON representation in Code Climate format of SARIF file(s) " 483 | "for viewing as a Code Quality report in GitLab UI", 484 | }, 485 | "copy": { 486 | "fn": _copy_command, 487 | "desc": "Write a new SARIF file containing optionally-filtered data from other SARIF file(s)", 488 | }, 489 | "csv": { 490 | "fn": _csv_command, 491 | "desc": "Write a CSV file listing the issues from the SARIF files(s) specified", 492 | }, 493 | "diff": { 494 | "fn": _diff_command, 495 | "desc": "Find the difference between two [sets of] SARIF files", 496 | }, 497 | "emacs": { 498 | "fn": _emacs_command, 499 | "desc": "Write a representation of SARIF file(s) for viewing in emacs", 500 | }, 501 | "html": { 502 | "fn": _html_command, 503 | "desc": "Write an HTML representation of SARIF file(s) for viewing in a web browser", 504 | }, 505 | "info": { 506 | "fn": _info_command, 507 | "desc": "Print information about SARIF file(s) structure", 508 | }, 509 | "ls": { 510 | "fn": _ls_command, 511 | "desc": "List all SARIF files in the directories specified", 512 | }, 513 | "summary": { 514 | "fn": _summary_command, 515 | "desc": "Write a text summary with the counts of issues from the SARIF files(s) specified", 516 | }, 517 | "trend": { 518 | "fn": _trend_command, 519 | "desc": "Write a CSV file with time series data from SARIF files with " 520 | '"yyyymmddThhmmssZ" timestamps in their filenames', 521 | }, 522 | "upgrade-filter": { 523 | "fn": _upgrade_filter_command, 524 | "desc": "Upgrade a sarif-tools v1-style blame filter file to a v2-style filter YAML file", 525 | }, 526 | "usage": { 527 | "fn": _usage_command, 528 | "desc": "(Command optional) - print usage and exit", 529 | }, 530 | "version": {"fn": _version_command, "desc": "Print version and exit"}, 531 | "word": { 532 | "fn": _word_command, 533 | "desc": "Produce MS Word .docx summaries of the SARIF files specified", 534 | }, 535 | } 536 | 537 | ARG_PARSER = _create_arg_parser() 538 | --------------------------------------------------------------------------------