├── tests
├── __init__.py
├── utils.py
├── ops
│ ├── version
│ │ └── test_version.py
│ ├── ls
│ │ └── test_ls.py
│ ├── upgrade_filter
│ │ └── test_upgrade_filter.py
│ ├── csv
│ │ └── test_csv.py
│ ├── codeclimate
│ │ └── test_codeclimate.py
│ ├── emacs
│ │ └── test_emacs.py
│ ├── summary
│ │ └── test_summary.py
│ ├── word
│ │ └── test_word.py
│ ├── info
│ │ └── test_info.py
│ ├── copy
│ │ └── test_copy.py
│ ├── trend
│ │ └── test_trend.py
│ ├── diff
│ │ ├── test_diff.py
│ │ └── test_diff_issues_reordered.py
│ ├── html
│ │ └── test_html.py
│ └── blame
│ │ └── test_blame.py
├── test_check_switch.py
├── test_sarif_file_utils.py
└── test_general_filter.py
├── sarif
├── cmdline
│ ├── __init__.py
│ └── main.py
├── filter
│ ├── __init__.py
│ ├── filter_stats.py
│ └── general_filter.py
├── operations
│ ├── __init__.py
│ ├── templates
│ │ ├── sarif_emacs.txt
│ │ └── sarif_summary.html
│ ├── ls_op.py
│ ├── csv_op.py
│ ├── summary_op.py
│ ├── upgrade_filter_op.py
│ ├── codeclimate_op.py
│ ├── trend_op.py
│ ├── emacs_op.py
│ ├── copy_op.py
│ ├── info_op.py
│ ├── html_op.py
│ ├── blame_op.py
│ ├── diff_op.py
│ └── word_op.py
├── __init__.py
├── __main__.py
├── charts.py
├── loader.py
├── issues_report.py
└── sarif_file_utils.py
├── poetry.toml
├── azure-pipelines
├── templates
│ ├── globals.yml
│ ├── use_python.yml
│ └── build_stage.yml
├── build.yml
└── release.yml
├── .pylintrc
├── .gitignore
├── SUPPORT.md
├── .vscode
└── extensions.json
├── CODE_OF_CONDUCT.md
├── .github
└── workflows
│ ├── build.yml
│ └── validation.yml
├── pyproject.toml
├── LICENSE
├── SECURITY.md
├── CONTRIBUTING.md
└── CHANGELOG.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/sarif/cmdline/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/sarif/filter/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/sarif/operations/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 |
--------------------------------------------------------------------------------
/azure-pipelines/templates/globals.yml:
--------------------------------------------------------------------------------
1 | variables:
2 | ARTIFACT_NAME_WHEEL: wheel
3 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 |
3 | ignored-classes=WD_PARAGRAPH_ALIGNMENT,WD_TAB_ALIGNMENT
4 |
5 |
--------------------------------------------------------------------------------
/sarif/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Top-level version information for sarif-tools.
3 | """
4 |
5 | __version__ = "3.0.5"
6 |
--------------------------------------------------------------------------------
/sarif/__main__.py:
--------------------------------------------------------------------------------
1 | """
2 | This file supports `python -m sarif` invocation.
3 | """
4 |
5 | import sys
6 |
7 | from sarif.cmdline import main
8 |
9 | sys.exit(main.main())
10 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /dist
3 | *.egg-info
4 | *.pyc
5 | *.orig
6 | /.venv
7 | /.vscode
8 | /.idea
9 | /.pytest_cache
10 | .DS_Store
11 | *.sarif
12 | *.csv
13 | .coverage
14 | coverage.xml
15 | *filter.yaml
16 |
--------------------------------------------------------------------------------
/azure-pipelines/templates/use_python.yml:
--------------------------------------------------------------------------------
1 | steps:
2 | - task: UsePythonVersion@0
3 | inputs:
4 | versionSpec: "$(python.version)"
5 | architecture: "$(architecture)"
6 | displayName: "Use Python $(python.version) $(architecture)"
7 |
--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
1 | # Support
2 |
3 | ## How to file issues and get help
4 |
5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or
7 | feature request as a new Issue.
8 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | // See https://go.microsoft.com/fwlink/?LinkId=827846
3 | // for the documentation about the extensions.json format
4 | "recommendations": [
5 | "charliermarsh.ruff",
6 | "ms-python.python",
7 | "ms-python.vscode-pylance"
8 | ]
9 | }
10 |
--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 |
5 | def get_sarif_schema():
6 | # JSON Schema file for SARIF obtained from https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/
7 | sarif_schema_file = os.path.join(
8 | os.path.dirname(__file__), "sarif-schema-2.1.0.json"
9 | )
10 | with open(sarif_schema_file, "rb") as f_schema:
11 | return json.load(f_schema)
12 |
--------------------------------------------------------------------------------
/tests/ops/version/test_version.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 | import sarif
4 |
5 |
6 | def test_version():
7 | with open(
8 | pathlib.Path(__file__).parent.parent.parent.parent / "pyproject.toml"
9 | ) as pyproject_in:
10 | for pyproject_line in pyproject_in.readlines():
11 | if pyproject_line.startswith('version = "'):
12 | assert pyproject_line.strip() == f'version = "{sarif.__version__}"'
13 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 |
--------------------------------------------------------------------------------
/sarif/operations/templates/sarif_emacs.txt:
--------------------------------------------------------------------------------
1 | -*- compilation -*-
2 |
3 | Sarif Summary: {{ report_type }}
4 | Document generated on: {{ report_date }}
5 | Total number of distinct issues of all severities ({{ severities }}): {{ total }}
6 | {% if filtered -%}
7 |
{{ filtered }}
8 | {%- endif %}
9 |
10 | {% for problem in problems %}
11 | Severity : {{ problem.type }} [{{ problem.count }}]
12 | {% for error in problem.details -%}
13 | {% for line in error.details -%}
14 | {{ line.Location }}:{{ line.Line }}: {{ error.code }}
15 | {% endfor %}
16 | {% endfor %}
17 | {% endfor -%}
18 |
19 |
--------------------------------------------------------------------------------
/tests/test_check_switch.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from sarif.cmdline.main import _check
3 | from sarif import sarif_file
4 |
5 | SARIF = {
6 | "runs": [
7 | {
8 | "tool": {"driver": {"name": "Tool"}},
9 | "results": [{"level": "warning", "ruleId": "rule"}],
10 | }
11 | ]
12 | }
13 |
14 |
15 | def test_check():
16 | fileSet = sarif_file.SarifFileSet()
17 | fileSet.add_file(
18 | sarif_file.SarifFile("SARIF", SARIF, mtime=datetime.datetime.now())
19 | )
20 |
21 | result = _check(fileSet, "error")
22 | assert result == 0
23 |
24 | result = _check(fileSet, "warning")
25 | assert result == 1
26 |
27 | result = _check(fileSet, "note")
28 | assert result == 1
29 |
--------------------------------------------------------------------------------
/tests/ops/ls/test_ls.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 |
4 | from sarif.operations import ls_op
5 |
6 |
7 | def test_ls():
8 | file_names = ["file1.sarif", "file2.sarif", "aaaa.sarif"]
9 |
10 | with tempfile.TemporaryDirectory() as tmp:
11 | for file_name in file_names:
12 | with open(os.path.join(tmp, file_name), "wb") as f_in:
13 | f_in.write("{}".encode())
14 |
15 | output_path = os.path.join(tmp, "output.txt")
16 | ls_op.print_ls([tmp], output_path)
17 |
18 | with open(output_path, "rb") as f_out:
19 | output = f_out.read().decode().splitlines()
20 |
21 | assert len(output) == len(file_names) + 1
22 | assert output[0] == tmp + ":"
23 | assert output[1:] == sorted([" " + file_name for file_name in file_names])
24 |
--------------------------------------------------------------------------------
/azure-pipelines/build.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 | pr: none
3 | trigger:
4 | branches:
5 | include:
6 | - main
7 | paths:
8 | exclude:
9 | - azure-pipelines/release.yml
10 |
11 | resources:
12 | repositories:
13 | - repository: 1ESPipelineTemplates
14 | type: git
15 | name: 1ESPipelineTemplates/1ESPipelineTemplates
16 | ref: refs/tags/release
17 |
18 | variables:
19 | TeamName: sarif-tools
20 |
21 | extends:
22 | template: v1/1ES.Official.PipelineTemplate.yml@1ESPipelineTemplates
23 | parameters:
24 | sdl:
25 | sourceAnalysisPool: VSEngSS-MicroBuild2022-1ES
26 | pool:
27 | name: AzurePipelines-EO
28 | demands:
29 | - ImageOverride -equals 1ESPT-Ubuntu22.04
30 | os: Linux
31 | customBuildTags:
32 | - ES365AIMigrationTooling
33 | stages:
34 | - template: templates/build_stage.yml@self
35 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | build:
10 | if: github.repository == 'microsoft/sarif-tools'
11 | runs-on: ubuntu-latest
12 | name: Build
13 |
14 | steps:
15 | - uses: actions/checkout@v4
16 | with:
17 | fetch-depth: 0
18 |
19 | - name: Setup Python
20 | uses: actions/setup-python@v5
21 | with:
22 | python-version: '3.8'
23 |
24 | - name: Install Poetry
25 | run: pip install poetry
26 |
27 | - name: Poetry Build
28 | run: poetry build --no-interaction
29 |
30 | - name: Get Verison
31 | id: get_version
32 | shell: bash
33 | run: echo "releaseVersion=$(poetry version --short)" >> $GITHUB_OUTPUT
34 |
35 | - uses: actions/upload-artifact@v4
36 | with:
37 | name: wheel
38 | path: dist/sarif_tools-${{ steps.get_version.outputs.releaseVersion }}-py3-none-any.whl
39 |
--------------------------------------------------------------------------------
/sarif/operations/ls_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif ls` command.
3 | """
4 |
5 | from typing import List
6 |
7 | from sarif import loader
8 |
9 |
10 | def print_ls(files_or_dirs: List[str], output):
11 | """
12 | Print a SARIF file listing for each of the input files or directories.
13 | """
14 | dir_result = []
15 | for path in files_or_dirs:
16 | dir_result.append(f"{path}:")
17 | sarif_files = loader.load_sarif_files(path)
18 | if sarif_files:
19 | sarif_file_names = [f.get_file_name() for f in sarif_files]
20 | for file_name in sorted(sarif_file_names):
21 | dir_result.append(f" {file_name}")
22 | else:
23 | dir_result.append(" (None)")
24 | if output:
25 | print("Writing file listing to", output)
26 | with open(output, "w", encoding="utf-8") as file_out:
27 | file_out.writelines(d + "\n" for d in dir_result)
28 | else:
29 | for directory in dir_result:
30 | print(directory)
31 | print()
32 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "sarif-tools"
3 | version = "3.0.5"
4 | description = "SARIF tools"
5 | authors = ["Microsoft"]
6 | readme = "README.md"
7 | homepage = "https://github.com/microsoft/sarif-tools"
8 | packages = [
9 | { include = "sarif" }
10 | ]
11 | classifiers = [
12 | "Programming Language :: Python :: 3",
13 | "License :: OSI Approved :: MIT License",
14 | "Operating System :: OS Independent"
15 | ]
16 |
17 | [tool.poetry.urls]
18 | "Bug Tracker" = "https://github.com/microsoft/sarif-tools/issues"
19 |
20 | [tool.poetry.dependencies]
21 | jinja2 = "^3.1.6"
22 | jsonpath-ng = "^1.6.0"
23 | matplotlib = "^3.7" # Need Python 3.9+ for newer
24 | python = "^3.8"
25 | python-docx = "^1.1.2"
26 | pyyaml = "^6.0.1"
27 |
28 | [tool.poetry.dev-dependencies]
29 | jsonschema = "^4.23.0"
30 | pylint = "^3.2"
31 | pytest = "^8.3"
32 | pytest-cov = "^5.0"
33 | ruff = "^0.6.8"
34 |
35 | [tool.poetry.scripts]
36 | sarif = "sarif.cmdline.main:main"
37 |
38 | [build-system]
39 | requires = ["poetry-core>=1.0.0"]
40 | build-backend = "poetry.core.masonry.api"
41 |
--------------------------------------------------------------------------------
/tests/ops/upgrade_filter/test_upgrade_filter.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 |
4 | from sarif.operations import upgrade_filter_op
5 |
6 | INPUT_FILTER = """
7 | description: Test filter
8 | #comment
9 | +: include_with_prefix
10 | include_without_prefix
11 | -: exclude
12 | """
13 |
14 |
15 | EXPECTED_OUTPUT_TXT = """configuration:
16 | check-line-number: true
17 | default-include: true
18 | description: Test filter
19 | exclude:
20 | - author-mail: exclude
21 | include:
22 | - author-mail: include_with_prefix
23 | - author-mail: include_without_prefix
24 | """
25 |
26 |
27 | def test_upgrade_filter():
28 | with tempfile.TemporaryDirectory() as tmp:
29 | input_file_path = os.path.join(tmp, "input_filter.txt")
30 | with open(input_file_path, "wb") as f_in:
31 | f_in.write(INPUT_FILTER.encode())
32 |
33 | output_file_path = os.path.join(tmp, "output.txt")
34 | upgrade_filter_op.upgrade_filter_file(input_file_path, output_file_path)
35 |
36 | with open(output_file_path, "rb") as f_out:
37 | output = f_out.read().decode()
38 |
39 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep)
40 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
22 |
--------------------------------------------------------------------------------
/azure-pipelines/templates/build_stage.yml:
--------------------------------------------------------------------------------
1 | stages:
2 | - stage: Build
3 | variables:
4 | - template: globals.yml
5 | jobs:
6 | - job: Build
7 |
8 | templateContext:
9 | outputs:
10 | - output: pipelineArtifact
11 | targetPath: $(Build.StagingDirectory)/dist
12 | sbomBuildDropPath: $(Build.StagingDirectory)/dist
13 | artifactName: $(ARTIFACT_NAME_WHEEL)
14 |
15 | variables:
16 | python.version: "3.8"
17 | architecture: x64
18 |
19 | steps:
20 | - template: use_python.yml@self
21 |
22 | - script: pipx install poetry
23 | displayName: Install Poetry
24 |
25 | - script: poetry build --no-interaction
26 | displayName: poetry build
27 |
28 | - powershell: |
29 | $releaseVersion = & poetry version --short
30 | echo "releaseVersion: $releaseVersion"
31 | echo "##vso[task.setvariable variable=releaseVersion]$releaseVersion"
32 | echo "##vso[task.setvariable variable=releaseVersionWithPrefix;isOutput=true]v$releaseVersion"
33 | displayName: Get release version
34 | name: getReleaseVersionStep
35 |
36 | - task: CopyFiles@2
37 | displayName: Copy wheel and tarball
38 | inputs:
39 | sourceFolder: dist
40 | targetFolder: $(Build.StagingDirectory)/dist
41 | contents: |
42 | sarif_tools-$(releaseVersion)-py3-none-any.whl
43 | sarif_tools-$(releaseVersion).tar.gz
44 |
--------------------------------------------------------------------------------
/.github/workflows/validation.yml:
--------------------------------------------------------------------------------
1 | name: Validation
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | hygiene:
13 | if: github.repository == 'microsoft/sarif-tools'
14 | runs-on: ubuntu-latest
15 | name: Hygiene
16 | permissions:
17 | contents: read
18 | pull-requests: write
19 | steps:
20 | - uses: actions/checkout@v4
21 |
22 | - name: Setup Python
23 | uses: actions/setup-python@v5
24 | with:
25 | python-version: '3.8'
26 |
27 | - name: Install Poetry
28 | run: pip install poetry
29 |
30 | - name: Validate pyproject.toml and poetry.lock
31 | run: poetry check
32 |
33 | - name: Install dependencies
34 | run: poetry install
35 |
36 | - name: Validate code formatting
37 | run: poetry run ruff format --check
38 |
39 | - name: Validate code style
40 | run: poetry run ruff check
41 |
42 |
43 | test:
44 | if: github.repository == 'microsoft/sarif-tools'
45 | runs-on: ubuntu-latest
46 | name: Test
47 | steps:
48 | - uses: actions/checkout@v4
49 |
50 | - name: Setup Python
51 | uses: actions/setup-python@v5
52 | with:
53 | python-version: '3.8'
54 |
55 | - name: Install Poetry
56 | run: pip install poetry
57 |
58 | - name: Install dependencies
59 | run: poetry install --with dev
60 |
61 | - name: Run tests
62 | run: poetry run pytest
63 |
--------------------------------------------------------------------------------
/sarif/charts.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for generating charts from SARIF data
3 | """
4 |
5 | import io
6 | import matplotlib.pyplot as plt
7 |
8 |
9 | def generate_severity_pie_chart(report, output_file=None):
10 | """
11 | Generate a pie chart from the breakdown of issues by severity.
12 | The slices are ordered and plotted counter-clockwise. The return
13 | value is truthy if the number of issues is not zero, False otherwise.
14 | If `output_file` is `None`, return the bytes of the pie chart image in
15 | png format. Otherwise, write the bytes to the file specified (image
16 | format inferred from filename).
17 | """
18 | sizes = []
19 | labels = []
20 | explode = []
21 | for severity in report.get_severities():
22 | count = report.get_issue_count_for_severity(severity)
23 | if count > 0:
24 | sizes.append(count)
25 | labels.append(severity)
26 | explode.append(0.1) # could add more logic to highlight specific severities
27 |
28 | any_issues = bool(sizes)
29 | if any_issues:
30 | _fig1, ax1 = plt.subplots()
31 | ax1.pie(
32 | sizes,
33 | explode=explode,
34 | labels=labels,
35 | autopct="%1.1f%%",
36 | shadow=True,
37 | startangle=90,
38 | )
39 | ax1.axis("equal")
40 |
41 | if output_file:
42 | plt.savefig(output_file)
43 | else:
44 | byte_buffer = io.BytesIO()
45 | plt.savefig(byte_buffer, format="png")
46 | return byte_buffer.getbuffer()
47 | return any_issues
48 |
--------------------------------------------------------------------------------
/tests/ops/csv/test_csv.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | import tempfile
4 |
5 | from sarif.operations import csv_op
6 | from sarif import sarif_file
7 |
8 | INPUT_SARIF = {
9 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
10 | "version": "2.1.0",
11 | "runs": [
12 | {
13 | "tool": {"driver": {"name": "unit test"}},
14 | "results": [
15 | {
16 | "ruleId": "CA2101",
17 | "level": "error",
18 | "locations": [
19 | {
20 | "physicalLocation": {
21 | "artifactLocation": {
22 | "uri": "file:///C:/Code/main.c",
23 | "index": 0,
24 | },
25 | "region": {"startLine": 24, "startColumn": 9},
26 | }
27 | }
28 | ],
29 | }
30 | ],
31 | }
32 | ],
33 | }
34 |
35 |
36 | EXPECTED_OUTPUT_CSV = [
37 | "Tool,Severity,Code,Description,Location,Line",
38 | "unit test,error,CA2101,CA2101,file:///C:/Code/main.c,24",
39 | ]
40 |
41 |
42 | def test_csv():
43 | mtime = datetime.datetime.now()
44 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
45 |
46 | input_sarif_file_set = sarif_file.SarifFileSet()
47 | input_sarif_file_set.files.append(input_sarif_file)
48 |
49 | with tempfile.TemporaryDirectory() as tmp:
50 | file_path = os.path.join(tmp, "output.csv")
51 | csv_op.generate_csv(
52 | input_sarif_file_set, file_path, output_multiple_files=False
53 | )
54 |
55 | with open(file_path, "rb") as f_in:
56 | output_lines = f_in.read().decode().splitlines()
57 |
58 | assert output_lines == EXPECTED_OUTPUT_CSV
59 |
--------------------------------------------------------------------------------
/sarif/loader.py:
--------------------------------------------------------------------------------
1 | """
2 | Code to load SARIF files from disk.
3 | """
4 |
5 | import glob
6 | import json
7 | import os
8 |
9 | from sarif.sarif_file import has_sarif_file_extension, SarifFile, SarifFileSet
10 |
11 |
12 | def _add_path_to_sarif_file_set(path, sarif_file_set):
13 | if os.path.isdir(path):
14 | sarif_file_set.add_dir(_load_dir(path))
15 | return True
16 | if os.path.isfile(path):
17 | sarif_file_set.add_file(load_sarif_file(path))
18 | return True
19 | return False
20 |
21 |
22 | def load_sarif_files(*args) -> SarifFileSet:
23 | """
24 | Load SARIF files specified as individual filenames or directories. Return a SarifFileSet
25 | object.
26 | """
27 | ret = SarifFileSet()
28 | if args:
29 | for path in args:
30 | path_exists = _add_path_to_sarif_file_set(path, ret)
31 | if not path_exists:
32 | for resolved_path in glob.glob(path, recursive=True):
33 | if _add_path_to_sarif_file_set(resolved_path, ret):
34 | path_exists = True
35 | if not path_exists:
36 | print(f"Warning: input path {path} not found")
37 | return ret
38 |
39 |
40 | def _load_dir(path):
41 | subdir = SarifFileSet()
42 | for dirpath, _dirnames, filenames in os.walk(path):
43 | for filename in filenames:
44 | if has_sarif_file_extension(filename):
45 | subdir.add_file(load_sarif_file(os.path.join(dirpath, filename)))
46 | return subdir
47 |
48 |
49 | def load_sarif_file(file_path: str) -> SarifFile:
50 | """
51 | Load JSON data from a file and return as a SarifFile object.
52 | As per https://tools.ietf.org/id/draft-ietf-json-rfc4627bis-09.html#rfc.section.8.1, JSON
53 | data SHALL be encoded in utf-8.
54 | """
55 | try:
56 | with open(file_path, encoding="utf-8-sig") as file_in:
57 | data = json.load(file_in)
58 | return SarifFile(file_path, data)
59 | except Exception as exception:
60 | raise IOError(f"Cannot load {file_path}") from exception
61 |
--------------------------------------------------------------------------------
/tests/ops/codeclimate/test_codeclimate.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import os
4 | import tempfile
5 |
6 | from sarif.operations import codeclimate_op
7 | from sarif import sarif_file
8 |
9 | INPUT_SARIF = {
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "unit test"}},
15 | "results": [
16 | {
17 | "ruleId": "CA2101",
18 | "level": "error",
19 | "locations": [
20 | {
21 | "physicalLocation": {
22 | "artifactLocation": {
23 | "uri": "file:///C:/Code/main.c",
24 | "index": 0,
25 | },
26 | "region": {"startLine": 24, "startColumn": 9},
27 | }
28 | }
29 | ],
30 | }
31 | ],
32 | }
33 | ],
34 | }
35 |
36 |
37 | EXPECTED_OUTPUT_JSON = [
38 | {
39 | "type": "issue",
40 | "check_name": "CA2101",
41 | "description": "CA2101",
42 | "categories": ["Bug Risk"],
43 | "location": {
44 | "path": "file:///C:/Code/main.c",
45 | "lines": {"begin": 24},
46 | },
47 | "severity": "major",
48 | "fingerprint": "e972b812ed32bf29ee306141244050b9",
49 | }
50 | ]
51 |
52 |
53 | def test_code_climate():
54 | mtime = datetime.datetime.now()
55 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
56 |
57 | input_sarif_file_set = sarif_file.SarifFileSet()
58 | input_sarif_file_set.files.append(input_sarif_file)
59 |
60 | with tempfile.TemporaryDirectory() as tmp:
61 | file_path = os.path.join(tmp, "codeclimate.json")
62 | codeclimate_op.generate(
63 | input_sarif_file_set, file_path, output_multiple_files=False
64 | )
65 |
66 | with open(file_path, "rb") as f_in:
67 | output_json = json.load(f_in)
68 |
69 | assert output_json == EXPECTED_OUTPUT_JSON
70 |
--------------------------------------------------------------------------------
/tests/ops/emacs/test_emacs.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | import tempfile
4 |
5 | from sarif.operations import emacs_op
6 | from sarif import sarif_file
7 |
8 | INPUT_SARIF = {
9 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
10 | "version": "2.1.0",
11 | "runs": [
12 | {
13 | "tool": {"driver": {"name": "unit test"}},
14 | "results": [
15 | {
16 | "ruleId": "CA2101",
17 | "level": "error",
18 | "locations": [
19 | {
20 | "physicalLocation": {
21 | "artifactLocation": {
22 | "uri": "file:///C:/Code/main.c",
23 | "index": 0,
24 | },
25 | "region": {"startLine": 24, "startColumn": 9},
26 | }
27 | }
28 | ],
29 | }
30 | ],
31 | }
32 | ],
33 | }
34 |
35 |
36 | EXPECTED_OUTPUT_TXT = """-*- compilation -*-
37 |
38 | Sarif Summary: unit test
39 | Document generated on:
40 | Total number of distinct issues of all severities (error, warning, note): 1
41 |
42 |
43 |
44 | Severity : error [1]
45 | file:///C:/Code/main.c:24: CA2101
46 |
47 |
48 |
49 | Severity : warning [0]
50 |
51 |
52 | Severity : note [0]
53 |
54 | """
55 |
56 |
57 | def test_emacs():
58 | mtime = datetime.datetime.now()
59 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
60 |
61 | input_sarif_file_set = sarif_file.SarifFileSet()
62 | input_sarif_file_set.files.append(input_sarif_file)
63 |
64 | with tempfile.TemporaryDirectory() as tmp:
65 | file_path = os.path.join(tmp, "output.txt")
66 | emacs_op.generate_compile(
67 | input_sarif_file_set, file_path, output_multiple_files=False, date_val=mtime
68 | )
69 |
70 | with open(file_path, "rb") as f_in:
71 | output = f_in.read().decode()
72 |
73 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep).replace(
74 | "", mtime.strftime("%Y-%m-%d %H:%M:%S.%f")
75 | )
76 |
--------------------------------------------------------------------------------
/tests/ops/summary/test_summary.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import os
4 | import tempfile
5 |
6 | from sarif.operations import summary_op
7 | from sarif import sarif_file
8 |
9 | INPUT_SARIF = """{
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "unit test"}},
15 | "results": [
16 | {
17 | "ruleId": "CA2103",
18 | "level": "error"
19 | },
20 | {
21 | "ruleId": "CA2102",
22 | "level": "warning"
23 | },
24 | {
25 | "ruleId": "CA2101",
26 | "level": "warning"
27 | },
28 | {
29 | "ruleId": "CA2101",
30 | "level": "error"
31 | },
32 | {
33 | "ruleId": "CA2101",
34 | "level": "note"
35 | },
36 | {
37 | "ruleId": "CA2101",
38 | "level": "none"
39 | },
40 | {
41 | "ruleId": "CA2101",
42 | "level": "error"
43 | }
44 | ]
45 | }
46 | ]
47 | }
48 | """
49 |
50 | EXPECTED_OUTPUT_TXT = """
51 | error: 3
52 | - CA2101: 2
53 | - CA2103: 1
54 |
55 | warning: 2
56 | - CA2102: 1
57 | - CA2101: 1
58 |
59 | note: 1
60 | - CA2101: 1
61 |
62 | none: 1
63 | - CA2101: 1
64 | """
65 |
66 |
67 | def test_summary():
68 | with tempfile.TemporaryDirectory() as tmp:
69 | input_sarif_file_path = os.path.join(tmp, "input.sarif")
70 | with open(input_sarif_file_path, "wb") as f_in:
71 | f_in.write(INPUT_SARIF.encode())
72 |
73 | input_sarif = json.loads(INPUT_SARIF)
74 |
75 | input_sarif_file = sarif_file.SarifFile(
76 | input_sarif_file_path, input_sarif, mtime=datetime.datetime.now()
77 | )
78 |
79 | input_sarif_file_set = sarif_file.SarifFileSet()
80 | input_sarif_file_set.files.append(input_sarif_file)
81 |
82 | file_path = os.path.join(tmp, "output.txt")
83 | summary_op.generate_summary(
84 | input_sarif_file_set, file_path, output_multiple_files=False
85 | )
86 |
87 | with open(file_path, "rb") as f_out:
88 | output = f_out.read().decode()
89 |
90 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep)
91 |
--------------------------------------------------------------------------------
/sarif/operations/csv_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif csv` command.
3 | """
4 |
5 | import csv
6 | import os
7 |
8 | from sarif import sarif_file
9 | from sarif.sarif_file import SarifFileSet
10 | from sarif.sarif_file_utils import combine_record_code_and_description
11 |
12 |
13 | def generate_csv(input_files: SarifFileSet, output: str, output_multiple_files: bool):
14 | """
15 | Generate a CSV file containing the list of issues from the SARIF files.
16 | sarif_dict is a dict from filename to deserialized SARIF data.
17 | """
18 | output_file = output
19 | if output_multiple_files:
20 | for input_file in input_files:
21 | output_file_name = input_file.get_file_name_without_extension() + ".csv"
22 | print(
23 | "Writing CSV summary of",
24 | input_file.get_file_name(),
25 | "to",
26 | output_file_name,
27 | )
28 | _write_to_csv(input_file, os.path.join(output, output_file_name))
29 | filter_stats = input_file.get_filter_stats()
30 | if filter_stats:
31 | print(f" Results are filtered by {filter_stats}")
32 | output_file = os.path.join(output, "static_analysis_output.csv")
33 | source_description = input_files.get_description()
34 | print(
35 | "Writing CSV summary for",
36 | source_description,
37 | "to",
38 | os.path.basename(output_file),
39 | )
40 | _write_to_csv(input_files, output_file)
41 | filter_stats = input_files.get_filter_stats()
42 | if filter_stats:
43 | print(f" Results are filtered by {filter_stats}")
44 |
45 |
46 | def _write_to_csv(file_or_files, output_file):
47 | """
48 | Write out the errors to a CSV file so that a human can do further analysis.
49 | """
50 | list_of_errors = file_or_files.get_records()
51 | severities = file_or_files.get_severities()
52 | with open(output_file, "w", encoding="utf-8") as file_out:
53 | writer = csv.DictWriter(
54 | file_out,
55 | sarif_file.get_record_headings(file_or_files.has_blame_info()),
56 | lineterminator="\n",
57 | )
58 | writer.writeheader()
59 | for severity in severities:
60 | errors_of_severity = [
61 | e for e in list_of_errors if e["Severity"] == severity
62 | ]
63 | sorted_errors_by_severity = sorted(
64 | errors_of_severity, key=combine_record_code_and_description
65 | )
66 | writer.writerows(error_dict for error_dict in sorted_errors_by_severity)
67 |
--------------------------------------------------------------------------------
/tests/ops/word/test_word.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | import tempfile
4 |
5 | from docx import Document
6 | from sarif.operations import word_op
7 | from sarif import sarif_file
8 |
9 | INPUT_SARIF = {
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "unit test"}},
15 | "results": [
16 | {
17 | "ruleId": "CA2101",
18 | "level": "error",
19 | "locations": [
20 | {
21 | "physicalLocation": {
22 | "artifactLocation": {
23 | "uri": "file:///C:/Code/main.c",
24 | "index": 0,
25 | },
26 | "region": {"startLine": 24, "startColumn": 9},
27 | }
28 | }
29 | ],
30 | }
31 | ],
32 | }
33 | ],
34 | }
35 |
36 |
37 | EXPECTED_OUTPUT_TXT = [
38 | "Sarif Summary: unit test",
39 | "Document generated on: ",
40 | "Total number of various severities (error, warning, note): 1",
41 | "",
42 | "",
43 | "Severity : error [ 1 ]",
44 | "CA2101: 1",
45 | "Severity : warning [ 0 ]",
46 | "None",
47 | "Severity : note [ 0 ]",
48 | "None",
49 | "",
50 | "Severity : error",
51 | "Severity : warning",
52 | "None",
53 | "Severity : note",
54 | "None",
55 | ]
56 |
57 |
58 | def test_word():
59 | mtime = datetime.datetime.now()
60 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
61 |
62 | input_sarif_file_set = sarif_file.SarifFileSet()
63 | input_sarif_file_set.files.append(input_sarif_file)
64 |
65 | with tempfile.TemporaryDirectory() as tmp:
66 | output_file_path = os.path.join(tmp, "output.docx")
67 | word_op.generate_word_docs_from_sarif_inputs(
68 | input_sarif_file_set,
69 | None,
70 | output_file_path,
71 | output_multiple_files=False,
72 | date_val=mtime,
73 | )
74 |
75 | word_doc = Document(output_file_path)
76 | word_doc_text = [paragraph.text for paragraph in word_doc.paragraphs]
77 |
78 | assert len(word_doc_text) == len(EXPECTED_OUTPUT_TXT)
79 | for actual, expected in zip(word_doc_text, EXPECTED_OUTPUT_TXT):
80 | assert actual == expected.replace(
81 | "", mtime.strftime("%Y-%m-%d %H:%M:%S.%f")
82 | )
83 |
--------------------------------------------------------------------------------
/sarif/operations/summary_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif summary` command.
3 | """
4 |
5 | import os
6 | from typing import List
7 |
8 | from sarif.sarif_file import SarifFileSet
9 |
10 |
11 | def generate_summary(
12 | input_files: SarifFileSet, output: str, output_multiple_files: bool
13 | ):
14 | """
15 | Generate a summary of the issues from the SARIF files.
16 | sarif_dict is a dict from filename to deserialized SARIF data.
17 | output_file is the name of a text file to write, or if None, the summary is written to the
18 | console.
19 | """
20 | output_file = output
21 | if output_multiple_files:
22 | for input_file in input_files:
23 | output_file_name = (
24 | input_file.get_file_name_without_extension() + "_summary.txt"
25 | )
26 | output_file = os.path.join(output, output_file_name)
27 | summary_lines = _generate_summary(input_file)
28 | print(
29 | "Writing summary of",
30 | input_file.get_file_name(),
31 | "to",
32 | output_file_name,
33 | )
34 | with open(output_file, "w", encoding="utf-8") as file_out:
35 | file_out.writelines(line + "\n" for line in summary_lines)
36 | output_file_name = "static_analysis_summary.txt"
37 | output_file = os.path.join(output, output_file_name)
38 |
39 | summary_lines = _generate_summary(input_files)
40 | if output:
41 | print(
42 | "Writing summary of",
43 | input_files.get_description(),
44 | "to",
45 | output_file,
46 | )
47 | with open(output_file, "w", encoding="utf-8") as file_out:
48 | file_out.writelines(line + "\n" for line in summary_lines)
49 | else:
50 | for lstr in summary_lines:
51 | print(lstr)
52 |
53 |
54 | def _generate_summary(input_files: SarifFileSet) -> List[str]:
55 | """
56 | For each severity level (in priority order): create a list of the errors of
57 | that severity, print out how many there are and then do some further analysis
58 | of which error codes are present.
59 | """
60 | ret = []
61 | report = input_files.get_report()
62 | for severity in report.get_severities():
63 | result_count = report.get_issue_count_for_severity(severity)
64 | issue_type_histogram = report.get_issue_type_histogram_for_severity(severity)
65 | ret.append(f"\n{severity}: {result_count}")
66 | ret += [f" - {key}: {count}" for (key, count) in issue_type_histogram.items()]
67 | filter_stats = input_files.get_filter_stats()
68 | if filter_stats:
69 | ret.append(f"\nResults were filtered by {filter_stats}")
70 | return ret
71 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
6 |
7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
8 |
9 | ## Reporting Security Issues
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 |
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 |
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 |
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 |
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 |
21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | * Full paths of source file(s) related to the manifestation of the issue
23 | * The location of the affected source code (tag/branch/commit or direct URL)
24 | * Any special configuration required to reproduce the issue
25 | * Step-by-step instructions to reproduce the issue
26 | * Proof-of-concept or exploit code (if possible)
27 | * Impact of the issue, including how an attacker might exploit the issue
28 |
29 | This information will help us triage your report more quickly.
30 |
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 |
33 | ## Preferred Languages
34 |
35 | We prefer all communications to be in English.
36 |
37 | ## Policy
38 |
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 |
41 |
--------------------------------------------------------------------------------
/sarif/operations/upgrade_filter_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif upgrade-filter` command.
3 | """
4 |
5 | import os
6 | import yaml
7 |
8 |
9 | def _load_blame_filter_file(file_path):
10 | filter_description = os.path.basename(file_path)
11 | include_patterns = []
12 | exclude_patterns = []
13 | try:
14 | with open(file_path, encoding="utf-8") as file_in:
15 | for line in file_in.readlines():
16 | if line.startswith("\ufeff"):
17 | # Strip byte order mark
18 | line = line[1:]
19 | lstrip = line.strip()
20 | if lstrip.startswith("#"):
21 | # Ignore comment lines
22 | continue
23 | pattern_spec = None
24 | is_include = True
25 | if lstrip.startswith("description:"):
26 | filter_description = lstrip[12:].strip()
27 | elif lstrip.startswith("+: "):
28 | is_include = True
29 | pattern_spec = lstrip[3:].strip()
30 | elif lstrip.startswith("-: "):
31 | is_include = False
32 | pattern_spec = lstrip[3:].strip()
33 | else:
34 | is_include = True
35 | pattern_spec = lstrip
36 | if pattern_spec:
37 | (include_patterns if is_include else exclude_patterns).append(
38 | pattern_spec
39 | )
40 | except UnicodeDecodeError as error:
41 | raise IOError(
42 | f"Cannot read blame filter file {file_path}: not UTF-8 encoded?"
43 | ) from error
44 | return (
45 | filter_description,
46 | include_patterns,
47 | exclude_patterns,
48 | )
49 |
50 |
51 | def upgrade_filter_file(old_filter_file, output_file):
52 | """Convert blame filter file to general filter file."""
53 | (
54 | filter_description,
55 | include_patterns,
56 | exclude_patterns,
57 | ) = _load_blame_filter_file(old_filter_file)
58 | new_filter_definition = {
59 | "description": (
60 | filter_description
61 | if filter_description
62 | else f"Migrated from {os.path.basename(old_filter_file)}"
63 | ),
64 | "configuration": {"default-include": True, "check-line-number": True},
65 | }
66 | if include_patterns:
67 | new_filter_definition["include"] = [
68 | {"author-mail": include_pattern} for include_pattern in include_patterns
69 | ]
70 | if exclude_patterns:
71 | new_filter_definition["exclude"] = [
72 | {"author-mail": exclude_pattern} for exclude_pattern in exclude_patterns
73 | ]
74 | with open(output_file, "w", encoding="utf8") as yaml_out:
75 | yaml.dump(new_filter_definition, yaml_out)
76 | print("Wrote", output_file)
77 |
--------------------------------------------------------------------------------
/tests/ops/info/test_info.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import os
4 | import tempfile
5 |
6 | from sarif.operations import info_op
7 | from sarif import sarif_file
8 |
9 | INPUT_SARIF = """{
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "unit test"}},
15 | "results": [
16 | {
17 | "ruleId": "CA2101",
18 | "level": "error",
19 | "locations": [
20 | {
21 | "physicalLocation": {
22 | "artifactLocation": {
23 | "uri": "file:///C:/Code/main.c",
24 | "index": 0
25 | },
26 | "region": {"startLine": 24, "startColumn": 9}
27 | }
28 | }
29 | ]
30 | }
31 | ]
32 | }
33 | ]
34 | }
35 | """
36 |
37 | EXPECTED_OUTPUT_TXT = """
38 | 840 bytes (1 KiB)
39 | modified: , accessed: , ctime:
40 | 1 run
41 | Tool: unit test
42 | 1 result
43 |
44 | """
45 |
46 |
47 | def test_info():
48 | with tempfile.TemporaryDirectory() as tmp:
49 | input_sarif_file_path = os.path.join(tmp, "input.sarif")
50 | with open(input_sarif_file_path, "wb") as f_in:
51 | f_in.write(INPUT_SARIF.encode())
52 |
53 | stat = os.stat(input_sarif_file_path)
54 | stat_mtime = datetime.datetime.fromtimestamp(stat.st_mtime).strftime(
55 | "%Y-%m-%d %H:%M:%S.%f"
56 | )
57 | stat_atime = datetime.datetime.fromtimestamp(stat.st_atime).strftime(
58 | "%Y-%m-%d %H:%M:%S.%f"
59 | )
60 | stat_ctime = datetime.datetime.fromtimestamp(stat.st_ctime).strftime(
61 | "%Y-%m-%d %H:%M:%S.%f"
62 | )
63 |
64 | input_sarif = json.loads(INPUT_SARIF)
65 |
66 | input_sarif_file = sarif_file.SarifFile(
67 | input_sarif_file_path, input_sarif, mtime=datetime.datetime.now()
68 | )
69 |
70 | input_sarif_file_set = sarif_file.SarifFileSet()
71 | input_sarif_file_set.files.append(input_sarif_file)
72 |
73 | file_path = os.path.join(tmp, "output.txt")
74 | info_op.generate_info(input_sarif_file_set, file_path)
75 |
76 | with open(file_path, "rb") as f_out:
77 | output = f_out.read().decode()
78 |
79 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep).replace(
80 | "", input_sarif_file_path
81 | ).replace(
82 | "",
83 | stat_mtime,
84 | ).replace(
85 | "",
86 | stat_atime,
87 | ).replace(
88 | "",
89 | stat_ctime,
90 | )
91 |
--------------------------------------------------------------------------------
/sarif/operations/codeclimate_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif codeclimate` command.
3 | """
4 |
5 | import os
6 | import json
7 | import hashlib
8 |
9 | from sarif.sarif_file import SarifFileSet
10 |
11 | _SEVERITIES = {"none": "info", "note": "info", "warning": "minor", "error": "major"}
12 |
13 |
14 | def generate(input_files: SarifFileSet, output: str, output_multiple_files: bool):
15 | """
16 | Generate a JSON file in Code Climate schema containing the list of issues from the SARIF files.
17 | See https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md
18 | Gitlab usage guide - https://docs.gitlab.com/ee/ci/testing/code_quality.html#implement-a-custom-tool
19 | """
20 | output_file = output
21 | if output_multiple_files:
22 | for input_file in input_files:
23 | output_file_name = input_file.get_file_name_without_extension() + ".json"
24 | print(
25 | "Writing Code Climate JSON summary of",
26 | input_file.get_file_name(),
27 | "to",
28 | output_file_name,
29 | )
30 | _write_to_json(
31 | input_file.get_records(), os.path.join(output, output_file_name)
32 | )
33 | filter_stats = input_file.get_filter_stats()
34 | if filter_stats:
35 | print(f" Results are filtered by {filter_stats}")
36 | output_file = os.path.join(output, "static_analysis_output.json")
37 | source_description = input_files.get_description()
38 | print(
39 | "Writing Code Climate JSON summary for",
40 | source_description,
41 | "to",
42 | os.path.basename(output_file),
43 | )
44 | _write_to_json(input_files.get_records(), output_file)
45 | filter_stats = input_files.get_filter_stats()
46 | if filter_stats:
47 | print(f" Results are filtered by {filter_stats}")
48 |
49 |
50 | def _write_to_json(list_of_errors, output_file):
51 | """
52 | Write out the errors to a JSON file according to Code Climate specification.
53 | """
54 | content = []
55 | for record in list_of_errors:
56 | severity = _SEVERITIES.get(record.get("Severity", "warning"), "minor")
57 |
58 | # split Code value to extract error ID and description
59 | rule = record["Code"]
60 | description = record["Description"]
61 |
62 | path = record["Location"]
63 | line = record["Line"]
64 |
65 | fingerprint = hashlib.md5(
66 | f"{description} {path} ${line}`]".encode()
67 | ).hexdigest()
68 |
69 | # "categories" property is not used in GitLab but marked as "required" in Code Climate spec.
70 | # There is no easy way to determine a category so the fixed value is set.
71 | content.append(
72 | {
73 | "type": "issue",
74 | "check_name": rule,
75 | "description": description,
76 | "categories": ["Bug Risk"],
77 | "location": {"path": path, "lines": {"begin": line}},
78 | "severity": severity,
79 | "fingerprint": fingerprint,
80 | }
81 | )
82 |
83 | with open(output_file, "w", encoding="utf-8") as file_out:
84 | json.dump(content, file_out, indent=4)
85 |
--------------------------------------------------------------------------------
/tests/ops/copy/test_copy.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | import datetime
3 | import json
4 | import jsonschema
5 | import os
6 | import tempfile
7 |
8 | from sarif.operations import copy_op
9 | from sarif import sarif_file
10 | from tests.utils import get_sarif_schema
11 |
12 | SARIF_WITH_1_ISSUE = {
13 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
14 | "version": "2.1.0",
15 | "runs": [
16 | {
17 | "tool": {"driver": {"name": "unit test"}},
18 | "results": [
19 | {
20 | "ruleId": "CA2101",
21 | "message": {"text": "just testing"},
22 | "level": "error",
23 | "locations": [
24 | {
25 | "physicalLocation": {
26 | "artifactLocation": {
27 | "uri": "file:///C:/Code/main.c",
28 | "index": 0,
29 | },
30 | "region": {"startLine": 24, "startColumn": 9},
31 | }
32 | }
33 | ],
34 | }
35 | ],
36 | }
37 | ],
38 | }
39 |
40 |
41 | def test_generate_sarif():
42 | sarif_schema = get_sarif_schema()
43 | input_sarif_file = sarif_file.SarifFile(
44 | "SARIF_WITH_1_ISSUE", SARIF_WITH_1_ISSUE, mtime=datetime.datetime.now()
45 | )
46 | jsonschema.validate(input_sarif_file.data, schema=sarif_schema)
47 |
48 | input_sarif_file_set = sarif_file.SarifFileSet()
49 | input_sarif_file_set.files.append(input_sarif_file)
50 | with tempfile.TemporaryDirectory() as tmp:
51 | output_file_path = os.path.join(tmp, "copied.json")
52 | output_sarif_file = copy_op.generate_sarif(
53 | input_sarif_file_set,
54 | output_file_path,
55 | append_timestamp=False,
56 | sarif_tools_version="1.2.3",
57 | cmdline="unit-test",
58 | )
59 |
60 | with open(output_file_path, "rb") as f_out:
61 | output_sarif = json.load(f_out)
62 | assert output_sarif_file.data == output_sarif
63 | jsonschema.validate(output_sarif, schema=sarif_schema)
64 |
65 | expected_sarif = deepcopy(input_sarif_file.data)
66 | conversion = {
67 | "tool": {
68 | "driver": {
69 | "name": "sarif-tools",
70 | "fullName": "sarif-tools https://github.com/microsoft/sarif-tools/",
71 | "version": "1.2.3",
72 | "properties": {
73 | "file": input_sarif_file.abs_file_path,
74 | "modified": input_sarif_file.mtime.isoformat(),
75 | "processed": output_sarif["runs"][0]["conversion"]["tool"][
76 | "driver"
77 | ]["properties"]["processed"],
78 | },
79 | }
80 | },
81 | "invocation": {
82 | "commandLine": "unit-test",
83 | "executionSuccessful": True,
84 | },
85 | }
86 | expected_sarif["runs"][0]["conversion"] = conversion
87 | assert output_sarif == expected_sarif
88 |
--------------------------------------------------------------------------------
/sarif/operations/trend_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif trend` command.
3 | """
4 |
5 | import csv
6 | from typing import Dict, List, Literal
7 |
8 | from sarif import sarif_file
9 | from sarif.sarif_file import SarifFileSet
10 |
11 | TIMESTAMP_COLUMNS = ["Date", "Tool", *sarif_file.SARIF_SEVERITIES_WITH_NONE]
12 |
13 |
14 | def generate_trend_csv(
15 | input_files: SarifFileSet,
16 | output_file: str,
17 | dateformat: Literal["dmy", "mdy", "ymd"],
18 | ) -> None:
19 | """
20 | Generate a timeline csv of the issues from the SARIF files. Each SARIF file must contain a
21 | timestamp of the form 20211012T110000Z in its filename.
22 | sarif_dict is a dict from filename to deserialized SARIF data.
23 | output_file is the name of a CSV file to write, or if None, the name
24 | `static_analysis_trend.csv` will be used.
25 | """
26 | if not output_file:
27 | output_file = "static_analysis_trend.csv"
28 |
29 | error_storage = []
30 | for input_file in input_files:
31 | input_file_name = input_file.get_file_name()
32 | print("Processing", input_file_name)
33 | error_list = input_file.get_records()
34 | tool_name = "/".join(input_file.get_distinct_tool_names())
35 | # Date parsing
36 | parsed_date = input_file.get_filename_timestamp()
37 | if not parsed_date:
38 | raise ValueError(f"Unable to parse date from filename: {input_file_name}")
39 |
40 | # Turn the date into something that looks nice in excel (d/m/y UK date format)
41 | dstr = parsed_date[0]
42 | (year, month, day, hour, minute) = (
43 | dstr[0:4],
44 | dstr[4:6],
45 | dstr[6:8],
46 | dstr[9:11],
47 | dstr[11:13],
48 | )
49 | if dateformat == "ymd":
50 | excel_date = f"{year}-{month}-{day} {hour}:{minute}"
51 | elif dateformat == "mdy":
52 | excel_date = f"{month}/{day}/{year} {hour}:{minute}"
53 | else:
54 | excel_date = f"{day}/{month}/{year} {hour}:{minute}"
55 |
56 | # Store data
57 | error_storage.append(
58 | _store_errors(parsed_date, excel_date, tool_name, error_list)
59 | )
60 |
61 | error_storage.sort(key=lambda record: record["_timestamp"])
62 |
63 | print("Writing trend CSV to", output_file)
64 | _write_csv(output_file, error_storage)
65 | filter_stats = input_files.get_filter_stats()
66 | if filter_stats:
67 | print(f" Results are filtered by {filter_stats}")
68 |
69 |
70 | def _write_csv(output_file: str, error_storage: List[Dict]) -> None:
71 | with open(output_file, "w", encoding="utf-8") as file_out:
72 | writer = csv.DictWriter(
73 | file_out, TIMESTAMP_COLUMNS, extrasaction="ignore", lineterminator="\n"
74 | )
75 | writer.writeheader()
76 | for key in error_storage:
77 | writer.writerow(key)
78 |
79 |
80 | def _store_errors(timestamp, excel_date, tool: str, list_of_errors: List[Dict]) -> Dict:
81 | results = {
82 | "_timestamp": timestamp, # not written to CSV, but used for sorting
83 | "Date": excel_date,
84 | "Tool": tool,
85 | }
86 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
87 | error_count = sum(1 for e in list_of_errors if severity in e["Severity"])
88 | results[severity] = error_count
89 |
90 | return results
91 |
--------------------------------------------------------------------------------
/sarif/operations/emacs_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif emacs` command.
3 | """
4 |
5 | from datetime import datetime
6 | import os
7 |
8 | from jinja2 import Environment, FileSystemLoader, select_autoescape
9 |
10 | from sarif import sarif_file
11 |
12 | _THIS_MODULE_PATH = os.path.dirname(__file__)
13 |
14 | _TEMPLATES_PATH = os.path.join(_THIS_MODULE_PATH, "templates")
15 |
16 | _ENV = Environment(
17 | loader=FileSystemLoader(searchpath=_TEMPLATES_PATH),
18 | autoescape=select_autoescape(),
19 | )
20 |
21 |
22 | def generate_compile(
23 | input_files: sarif_file.SarifFileSet,
24 | output: str,
25 | output_multiple_files: bool,
26 | date_val: datetime = datetime.now(),
27 | ):
28 | """
29 | Generate txt file from the input files.
30 | """
31 | output_file = output
32 | if output_multiple_files:
33 | for input_file in input_files:
34 | output_file_name = input_file.get_file_name_without_extension() + ".txt"
35 | print(
36 | "Writing results for",
37 | input_file.get_file_name(),
38 | "to",
39 | output_file_name,
40 | )
41 | _generate_single_txt(
42 | input_file, os.path.join(output, output_file_name), date_val
43 | )
44 | output_file = os.path.join(output, ".compile.txt")
45 | source_description = input_files.get_description()
46 | print(
47 | "Writing results for",
48 | source_description,
49 | "to",
50 | os.path.basename(output_file),
51 | )
52 | _generate_single_txt(input_files, output_file, date_val)
53 |
54 |
55 | def _generate_single_txt(input_file, output_file, date_val):
56 | all_tools = input_file.get_distinct_tool_names()
57 | report = input_file.get_report()
58 |
59 | total_distinct_issue_codes = 0
60 | problems = []
61 | severities = report.get_severities()
62 |
63 | for severity in severities:
64 | distinct_issue_codes = report.get_issue_type_count_for_severity(severity)
65 |
66 | total_distinct_issue_codes += distinct_issue_codes
67 |
68 | severity_details = _enrich_details(
69 | report.get_issues_grouped_by_type_for_severity(severity)
70 | )
71 |
72 | severity_section = {
73 | "type": severity,
74 | "count": distinct_issue_codes,
75 | "details": severity_details,
76 | }
77 |
78 | problems.append(severity_section)
79 |
80 | filtered = None
81 | filter_stats = input_file.get_filter_stats()
82 | if filter_stats:
83 | filtered = f"Results were filtered by {filter_stats}."
84 |
85 | template = _ENV.get_template("sarif_emacs.txt")
86 | txt_content = template.render(
87 | report_type=", ".join(all_tools),
88 | report_date=date_val,
89 | severities=", ".join(severities),
90 | total=total_distinct_issue_codes,
91 | problems=problems,
92 | filtered=filtered,
93 | )
94 |
95 | with open(output_file, "wt", encoding="utf-8") as file_out:
96 | file_out.write(txt_content)
97 |
98 |
99 | def _enrich_details(records_of_severity):
100 | return [
101 | {"code": key, "count": len(records), "details": records}
102 | for (key, records) in records_of_severity.items()
103 | ]
104 |
--------------------------------------------------------------------------------
/sarif/operations/copy_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif copy` command.
3 | """
4 |
5 | import copy
6 | import datetime
7 | import json
8 | import os
9 |
10 | from sarif import loader, sarif_file
11 | from sarif.sarif_file import SarifFileSet, SarifFile
12 |
13 |
14 | def generate_sarif(
15 | input_files: SarifFileSet,
16 | output: str,
17 | append_timestamp: bool,
18 | sarif_tools_version: str,
19 | cmdline: str,
20 | ) -> SarifFile:
21 | """
22 | Generate a new SARIF file based on the input files
23 | """
24 | sarif_data_out = {
25 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
26 | "version": "2.1.0",
27 | "runs": [],
28 | }
29 | now = datetime.datetime.now(datetime.timezone.utc)
30 | output_file_abs_path = os.path.abspath(output)
31 | conversion_timestamp_iso8601 = now.isoformat()
32 | conversion_timestamp_trendformat = now.strftime(sarif_file.DATETIME_FORMAT)
33 | run_count = 0
34 | input_file_count = 0
35 | for input_file in input_files:
36 | if input_file.get_abs_file_path() == output_file_abs_path:
37 | print(f"Auto-excluding output file {output} from input file list")
38 | continue
39 | input_file_count += 1
40 | input_file_path = input_file.get_abs_file_path()
41 | input_file_modified_iso8601 = input_file.mtime.isoformat()
42 | for input_run in input_file.runs:
43 | run_count += 1
44 | # Create a shallow copy
45 | input_run_json_copy = copy.copy(input_run.run_data)
46 | conversion_properties = {
47 | "file": input_file_path,
48 | "modified": input_file_modified_iso8601,
49 | "processed": conversion_timestamp_iso8601,
50 | }
51 | input_run_json_copy["conversion"] = {
52 | "tool": {
53 | "driver": {
54 | "name": "sarif-tools",
55 | "fullName": "sarif-tools https://github.com/microsoft/sarif-tools/",
56 | "version": sarif_tools_version,
57 | "properties": conversion_properties,
58 | }
59 | },
60 | "invocation": {"commandLine": cmdline, "executionSuccessful": True},
61 | }
62 | results = input_run.get_results()
63 | filter_stats = input_run.get_filter_stats()
64 | if filter_stats:
65 | input_run_json_copy["results"] = results
66 | conversion_properties["filtered"] = filter_stats.to_json_camel_case()
67 | sarif_data_out["runs"].append(input_run_json_copy)
68 | output_file_path = output
69 | if append_timestamp:
70 | output_split = os.path.splitext(output)
71 | output_file_path = (
72 | output_split[0]
73 | + f"_{conversion_timestamp_trendformat}"
74 | + (output_split[1] or ".sarif")
75 | )
76 | with open(output_file_path, "w", encoding="utf-8") as file_out:
77 | json.dump(sarif_data_out, file_out, indent=4)
78 | runs_string = "1 run" if run_count == 1 else f"{run_count} runs"
79 | files_string = (
80 | "1 SARIF file" if input_file_count == 1 else f"{input_file_count} SARIF files"
81 | )
82 | print(f"Wrote {output_file_path} with {runs_string} from {files_string}")
83 | total_filter_stats = input_files.get_filter_stats()
84 | if total_filter_stats:
85 | print(total_filter_stats.to_string())
86 | return loader.load_sarif_file(output_file_path)
87 |
--------------------------------------------------------------------------------
/tests/ops/trend/test_trend.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import os
4 | import tempfile
5 |
6 | from sarif.operations import trend_op
7 | from sarif import sarif_file
8 |
9 | INPUT_SARIF_1 = """{
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "name 1"}},
15 | "results": [
16 | {
17 | "ruleId": "CA2103",
18 | "level": "error"
19 | },
20 | {
21 | "ruleId": "CA2102",
22 | "level": "warning"
23 | },
24 | {
25 | "ruleId": "CA2101",
26 | "level": "warning"
27 | },
28 | {
29 | "ruleId": "CA2101",
30 | "level": "error"
31 | },
32 | {
33 | "ruleId": "CA2101",
34 | "level": "note"
35 | },
36 | {
37 | "ruleId": "CA2101",
38 | "level": "none"
39 | },
40 | {
41 | "ruleId": "CA2101",
42 | "level": "error"
43 | }
44 | ]
45 | }
46 | ]
47 | }
48 | """
49 |
50 | INPUT_SARIF_2 = """{
51 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
52 | "version": "2.1.0",
53 | "runs": [
54 | {
55 | "tool": {"driver": {"name": "name 2"}},
56 | "results": [
57 | {
58 | "ruleId": "CA2101",
59 | "level": "error"
60 | },
61 | {
62 | "ruleId": "CA2101",
63 | "level": "note"
64 | },
65 | {
66 | "ruleId": "CA2101",
67 | "level": "none"
68 | },
69 | {
70 | "ruleId": "CA2101",
71 | "level": "error"
72 | }
73 | ]
74 | }
75 | ]
76 | }
77 | """
78 |
79 | INPUTS = {
80 | "trend_test_20250106T060000Z.sarif": INPUT_SARIF_1,
81 | "trend_test_20250107T060000Z.sarif": INPUT_SARIF_2,
82 | }
83 |
84 | EXPECTED_OUTPUT_TXT = """Date,Tool,error,warning,note,none
85 | 06/01/2025 06:00,name 1,3,2,1,1
86 | 07/01/2025 06:00,name 2,2,0,1,1
87 | """
88 |
89 |
90 | def test_trend():
91 | with tempfile.TemporaryDirectory() as tmp:
92 | input_sarif_file_set = sarif_file.SarifFileSet()
93 |
94 | for input_file_name, input_json in INPUTS.items():
95 | input_sarif_file_path = os.path.join(tmp, input_file_name)
96 | with open(input_sarif_file_path, "wb") as f_in:
97 | f_in.write(input_json.encode())
98 |
99 | input_sarif = json.loads(input_json)
100 |
101 | input_sarif_file = sarif_file.SarifFile(
102 | input_sarif_file_path, input_sarif, mtime=datetime.datetime.now()
103 | )
104 |
105 | input_sarif_file_set.files.append(input_sarif_file)
106 |
107 | file_path = os.path.join(tmp, "output.txt")
108 | trend_op.generate_trend_csv(input_sarif_file_set, file_path, dateformat="dmy")
109 |
110 | with open(file_path, "rb") as f_out:
111 | output = f_out.read().decode()
112 |
113 | assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep)
114 |
--------------------------------------------------------------------------------
/tests/ops/diff/test_diff.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import os
4 | import tempfile
5 |
6 | from sarif.operations import diff_op
7 | from sarif import sarif_file
8 |
9 | SARIF_WITH_1_ISSUE = {
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "unit test"}},
15 | "results": [
16 | {
17 | "ruleId": "CA2101",
18 | "level": "error",
19 | "locations": [
20 | {
21 | "physicalLocation": {
22 | "artifactLocation": {
23 | "uri": "file:///C:/Code/main.c",
24 | "index": 0,
25 | },
26 | "region": {"startLine": 24, "startColumn": 9},
27 | }
28 | }
29 | ],
30 | }
31 | ],
32 | }
33 | ],
34 | }
35 |
36 | SARIF_WITH_2_ISSUES = {
37 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
38 | "version": "2.1.0",
39 | "runs": [
40 | {
41 | "tool": {"driver": {"name": "unit test"}},
42 | "results": [
43 | {
44 | "ruleId": "CA2101",
45 | "level": "error",
46 | "locations": [
47 | {
48 | "physicalLocation": {
49 | "artifactLocation": {
50 | "uri": "file:///C:/Code/main.c",
51 | "index": 0,
52 | },
53 | "region": {"startLine": 24, "startColumn": 9},
54 | }
55 | }
56 | ],
57 | },
58 | {
59 | "ruleId": "CA2102",
60 | "level": "error",
61 | "locations": [
62 | {
63 | "physicalLocation": {
64 | "artifactLocation": {
65 | "uri": "file:///C:/Code/main.c",
66 | "index": 0,
67 | },
68 | "region": {"startLine": 34, "startColumn": 9},
69 | }
70 | }
71 | ],
72 | },
73 | ],
74 | "columnKind": "utf16CodeUnits",
75 | }
76 | ],
77 | }
78 |
79 |
80 | def test_print_diff():
81 | mtime = datetime.datetime.now()
82 | old_sarif = sarif_file.SarifFile(
83 | "SARIF_WITH_1_ISSUE", SARIF_WITH_1_ISSUE, mtime=mtime
84 | )
85 | new_sarif = sarif_file.SarifFile(
86 | "SARIF_WITH_2_ISSUES", SARIF_WITH_2_ISSUES, mtime=mtime
87 | )
88 | with tempfile.TemporaryDirectory() as tmp:
89 | file_path = os.path.join(tmp, "diff.json")
90 | result = diff_op.print_diff(
91 | old_sarif, new_sarif, file_path, check_level="warning"
92 | )
93 | with open(file_path, "rb") as f_in:
94 | diff_dict = json.load(f_in)
95 | assert result == 1
96 | assert diff_dict == {
97 | "all": {"+": 1, "-": 0},
98 | "error": {
99 | "+": 1,
100 | "-": 0,
101 | "codes": {
102 | "CA2102": {
103 | "<": 0,
104 | ">": 1,
105 | "+@": [{"Location": "file:///C:/Code/main.c", "Line": 34}],
106 | }
107 | },
108 | },
109 | "warning": {"+": 0, "-": 0, "codes": {}},
110 | "note": {"+": 0, "-": 0, "codes": {}},
111 | }
112 | # If issues have decreased, return value should be 0.
113 | assert (
114 | diff_op.print_diff(new_sarif, old_sarif, file_path, check_level="warning")
115 | == 0
116 | )
117 |
--------------------------------------------------------------------------------
/sarif/filter/filter_stats.py:
--------------------------------------------------------------------------------
1 | """
2 | Statistics that record the outcome of a filter.
3 | """
4 |
5 | import datetime
6 |
7 |
8 | class FilterStats:
9 | """
10 | Statistics that record the outcome of a filter.
11 | """
12 |
13 | def __init__(self, filter_description):
14 | self.filter_description = filter_description
15 | # Filter stats can also be loaded from a file created by `sarif copy`.
16 | self.rehydrated = False
17 | self.filter_datetime = None
18 | self.filtered_in_result_count = 0
19 | self.filtered_out_result_count = 0
20 | self.missing_property_count = 0
21 | self.unconvincing_line_number_count = 0
22 |
23 | def reset_counters(self):
24 | """
25 | Zero all the counters.
26 | """
27 | self.filter_datetime = datetime.datetime.now()
28 | self.filtered_in_result_count = 0
29 | self.filtered_out_result_count = 0
30 | self.missing_property_count = 0
31 | self.unconvincing_line_number_count = 0
32 |
33 | def add(self, other_filter_stats):
34 | """
35 | Add another set of filter stats to my totals.
36 | """
37 | if other_filter_stats:
38 | if other_filter_stats.filter_description and (
39 | other_filter_stats.filter_description != self.filter_description
40 | ):
41 | self.filter_description += f", {other_filter_stats.filter_description}"
42 | self.filtered_in_result_count += other_filter_stats.filtered_in_result_count
43 | self.filtered_out_result_count += (
44 | other_filter_stats.filtered_out_result_count
45 | )
46 | self.missing_property_count += other_filter_stats.missing_property_count
47 | self.unconvincing_line_number_count += (
48 | other_filter_stats.unconvincing_line_number_count
49 | )
50 |
51 | def __str__(self):
52 | """
53 | Automatic to_string()
54 | """
55 | return self.to_string()
56 |
57 | def to_string(self):
58 | """
59 | Generate a summary string for these filter stats.
60 | """
61 | ret = f"'{self.filter_description}'"
62 | if self.filter_datetime:
63 | ret += " at "
64 | ret += self.filter_datetime.strftime("%c")
65 | ret += (
66 | f": {self.filtered_out_result_count} filtered out, "
67 | f"{self.filtered_in_result_count} passed the filter"
68 | )
69 | if self.unconvincing_line_number_count:
70 | ret += (
71 | f", {self.unconvincing_line_number_count} included by default "
72 | "for lacking line number information"
73 | )
74 | if self.missing_property_count:
75 | ret += (
76 | f", {self.missing_property_count} included by default "
77 | "for lacking data to filter"
78 | )
79 |
80 | return ret
81 |
82 | def to_json_camel_case(self):
83 | """
84 | Generate filter stats as JSON using camelCase naming,
85 | to fit with SARIF standard section 3.8.1 (Property Bags).
86 | """
87 | return {
88 | "filter": self.filter_description,
89 | "in": self.filtered_in_result_count,
90 | "out": self.filtered_out_result_count,
91 | "default": {
92 | "noProperty": self.missing_property_count,
93 | "noLineNumber": self.unconvincing_line_number_count,
94 | },
95 | }
96 |
97 |
98 | def load_filter_stats_from_json(json_data):
99 | """
100 | Load filter stats from a SARIF file property bag using camelCase naming
101 | as per SARIF standard section 3.8.1 (Property Bags).
102 | """
103 | ret = None
104 | if json_data:
105 | ret = FilterStats(json_data["filter"])
106 | ret.rehydrated = True
107 | ret.filtered_in_result_count = json_data.get("in", 0)
108 | ret.filtered_out_result_count = json_data.get("out", 0)
109 | default_stats = json_data.get("default", {})
110 | ret.unconvincing_line_number_count = default_stats.get("noLineNumber", 0)
111 | ret.missing_property_count = default_stats.get("noProperty", 0)
112 | return ret
113 |
--------------------------------------------------------------------------------
/sarif/operations/templates/sarif_summary.html:
--------------------------------------------------------------------------------
1 |
2 |
76 |
77 |
78 | {% if image_data_base64 -%}
79 |
80 |
81 |
82 |
83 |

84 |
85 |
86 |
87 |
88 | {%- endif %}
89 |
90 | Sarif Summary: {{ report_type }}
91 | Document generated on: {{ report_date }}
92 | Total number of distinct issues of all severities ({{ severities }}): {{ total }}
93 | {% if filtered -%}
94 | {{ filtered }}
95 | {%- endif %}
96 |
97 | {% if chart_image_data_base64 -%}
98 |
99 | {%- endif %}
100 |
101 | {% for problem in problems %}
102 | Severity : {{ problem.type }} [ {{ problem.count }} ]
103 |
104 | {%- for error in problem.details %}
105 | -
106 |
107 |
108 |
109 | {%- for link in error.links %}
110 | - {{ link.0 }}
111 | {%- endfor %}
112 | {%- for line in error.details %}
113 | {%- if line.Location %}
114 | - {{ line.Location }}:{{ line.Line }}
115 | {%- else %}
116 | - {{ line.Description }}
117 | {%- endif %}
118 | {%- endfor %}
119 |
120 |
121 |
122 | {%- endfor %}
123 |
124 | {%- endfor %}
125 |
126 |
--------------------------------------------------------------------------------
/azure-pipelines/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 | trigger: none
3 | pr: none
4 |
5 | variables:
6 | - template: templates/globals.yml
7 | - name: TeamName
8 | value: sarif-tools
9 |
10 | resources:
11 | repositories:
12 | - repository: MicroBuildTemplate
13 | type: git
14 | name: 1ESPipelineTemplates/MicroBuildTemplate
15 | ref: refs/tags/release
16 |
17 | extends:
18 | template: azure-pipelines/MicroBuild.1ES.Official.yml@MicroBuildTemplate
19 | parameters:
20 | sdl:
21 | sourceAnalysisPool: VSEngSS-MicroBuild2022-1ES
22 | pool:
23 | name: AzurePipelines-EO
24 | demands:
25 | - ImageOverride -equals 1ESPT-Ubuntu22.04
26 | os: Linux
27 | customBuildTags:
28 | - ES365AIMigrationTooling
29 | stages:
30 | - template: templates/build_stage.yml@self
31 |
32 | - stage: CreateTag
33 | displayName: Create Tag
34 | dependsOn: Build
35 | variables:
36 | releaseVersionWithPrefix: $[ stageDependencies.Build.Build.outputs['getReleaseVersionStep.releaseVersionWithPrefix'] ]
37 | jobs:
38 | - job: CreateTag
39 | steps:
40 | - checkout: self
41 | fetchDepth: 1
42 | fetchTags: false
43 | persistCredentials: true
44 |
45 | - script: |
46 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
47 | git config user.name "Azure Piplines"
48 | git fetch --depth 1 origin $(Build.SourceBranchName)
49 | git tag -a $(releaseVersionWithPrefix) -m "Release $(releaseVersionWithPrefix)" origin/$(Build.SourceBranchName)
50 | git push origin $(releaseVersionWithPrefix)
51 | displayName: Create git tag
52 |
53 | - stage: CreateRelease
54 | displayName: Create GitHub Release
55 | dependsOn:
56 | - Build
57 | - CreateTag
58 | variables:
59 | releaseVersionWithPrefix: $[ stageDependencies.Build.Build.outputs['getReleaseVersionStep.releaseVersionWithPrefix'] ]
60 | jobs:
61 | - job: CreateRelease
62 | templateContext:
63 | type: releaseJob
64 | isProduction: true
65 | inputs:
66 | - input: pipelineArtifact
67 | artifactName: $(ARTIFACT_NAME_WHEEL)
68 | targetPath: $(Build.StagingDirectory)/dist
69 | steps:
70 | - task: GitHubRelease@1 #https://learn.microsoft.com/en-us/azure/devops/pipelines/tasks/reference/github-release-v1?view=azure-pipelines
71 | displayName: Create GitHub Release
72 | inputs:
73 | gitHubConnection: GitHub-sarif-tools
74 | repositoryName: microsoft/sarif-tools
75 | action: create
76 | target: $(Build.SourceBranchName)
77 | title: $(releaseVersionWithPrefix)
78 | tag: $(releaseVersionWithPrefix)
79 | tagSource: userSpecifiedTag
80 | isDraft: true
81 | addChangeLog: false
82 | assets: $(Build.StagingDirectory)/dist/*
83 |
84 | - stage: WaitForValidation
85 | dependsOn: CreateRelease
86 | jobs:
87 | - job: wait_for_validation
88 | displayName: Wait for manual validation
89 | pool: server
90 | steps:
91 | - task: ManualValidation@0
92 | timeoutInMinutes: 1440 # task times out in 1 day
93 | inputs:
94 | notifyUsers: plseng@microsoft.com
95 | instructions: Please test the latest draft release and then publish it.
96 | onTimeout: reject
97 |
98 | - stage: Release
99 | dependsOn: WaitForValidation
100 | jobs:
101 | - job: PublishToPyPi
102 | displayName: Release to PyPi
103 |
104 | pool:
105 | name: VSEngSS-MicroBuild2022-1ES # This pool is required to have the certs needed to publish to PyPi using ESRP.
106 | os: windows
107 | image: server2022-microbuildVS2022-1es
108 |
109 | templateContext:
110 | type: releaseJob
111 | isProduction: true
112 | inputs:
113 | - input: pipelineArtifact
114 | artifactName: $(ARTIFACT_NAME_WHEEL)
115 | targetPath: $(Build.StagingDirectory)/dist
116 |
117 | steps:
118 | - template: MicroBuild.Publish.yml@MicroBuildTemplate
119 | parameters:
120 | intent: PackageDistribution
121 | contentType: PyPi
122 | contentSource: Folder
123 | folderLocation: $(Build.StagingDirectory)/dist
124 | waitForReleaseCompletion: true
125 | owners: rchiodo@microsoft.com
126 | approvers: grwheele@microsoft.com
127 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | This project welcomes contributions and suggestions. Most contributions require you to
4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to,
5 | and actually do, grant us the rights to use your contribution. For details, visit
6 | https://cla.microsoft.com.
7 |
8 | When you submit a pull request, a CLA-bot will automatically determine whether you need
9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the
10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
11 |
12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
14 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
15 |
16 | ## Pull Requests
17 |
18 | Pull requests are welcome.
19 |
20 | 1. Fork the repository.
21 | 2. Make and test your changes (see Developer Guide below).
22 | 3. Run `poetry run ruff format` and `poetry run black sarif` to format the code.
23 | 4. Run `poetry run pylint sarif` and check for no new errors or warnings.
24 | 5. Raise Pull Request in GitHub.com.
25 |
26 | ## Developer Guide
27 |
28 | ### Prerequisites
29 |
30 | - You need Python 3.8 installed.
31 | - This is the minimum supported version of the tool. Developing with a later version risks introducing type hints such as `list[dict]` that are not compatible with Python 3.8.
32 | - You need Poetry installed. Run this in an Admin CMD or under `sudo`:
33 | - `pip install poetry`
34 |
35 | Initialise Poetry by telling it where Python 3.8 is, e.g.
36 |
37 | ```bash
38 | # Windows - adjust to the path where you have installed Python 3.8.
39 | poetry env use "C:\Python38\python.exe"
40 | # Linux
41 | poetry env use 3.8
42 | ```
43 |
44 | This is not necessary if your system Python version is 3.8.
45 |
46 | ### Running locally in Poetry virtualenv
47 |
48 | ```bash
49 | poetry install
50 | poetry run sarif
51 | ```
52 |
53 | To check that the right versions are being run:
54 |
55 | ```bash
56 | poetry run python --version
57 | poetry run sarif --version --debug
58 | poetry run python -m sarif --version --debug
59 | ```
60 |
61 | To see which executable is being run:
62 |
63 | ```bash
64 | # Windows
65 | poetry run cmd /c "where sarif"
66 | # Linux
67 | poetry run which sarif
68 | ```
69 |
70 | ### Update dependency versions
71 |
72 | Run `poetry update` to bump package versions in the `poetry.lock` file.
73 |
74 | ### Update product version
75 |
76 | Change the `version =` line in `pyproject.toml` for the new semantic version for your change.
77 |
78 | Change the version in `sarif/__init__.py` as well.
79 |
80 | ### Run unit tests
81 |
82 | ```bash
83 | poetry run pytest
84 | ```
85 |
86 | ### Package using `poetry build`
87 |
88 | Run it on the source code:
89 |
90 | ```bash
91 | poetry build
92 | ```
93 |
94 | If you want, you can install the package built locally at system level (outside the Poetry virtual environment):
95 |
96 | ```bash
97 | pip install dist/sarif-*.whl
98 | ```
99 |
100 | To remove it again:
101 |
102 | ```bash
103 | pip uninstall sarif-tools
104 | ```
105 |
106 | Note that there are two possible levels of installation:
107 |
108 | #### User installation
109 |
110 | When you run `pip install` and `pip` doesn't have permissions to write to the Python installation's `site-packages` directory, probably because you are not running as an admin/superuser, the package is installed at "user" level only. You can run it using:
111 |
112 | ```bash
113 | python -m sarif
114 | ```
115 |
116 | You *cannot* run it using the bare command `sarif`, unless you add your user-level `Scripts` directory to your `PATH`. You can see where that is in the output from `pip install`:
117 |
118 | ```plain
119 | Installing collected packages: sarif
120 | WARNING: The script sarif.exe is installed in 'C:\Users\yournamehere\AppData\Roaming\Python\Python39\Scripts' which is not on PATH.
121 | Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
122 | ```
123 |
124 | #### System installation
125 |
126 | When you run `pip install` and `pip` has permissions to write to the Python installation's `site-packages` directory, and the Python installation's `Scripts` directory is in your path, then you can run the `sarif` command without `python -m`:
127 |
128 | ```bash
129 | sarif
130 | ```
131 |
132 | ### Adding packages from pypi to the project
133 |
134 | Add the package and its latest version number (as minimum version) to `[tool.poetry.dependencies]` in `pyproject.toml`.
135 |
136 | Then run this to update Poetry's lockfile.
137 |
138 | ```bash
139 | poetry update
140 | ```
141 |
142 | ### Adding resource files to the project
143 |
144 | Add the file within the `sarif` directory and it will be installed with the Python source. For example, `sarif/operations/templates/sarif_summary.html`.
145 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7 |
8 | ## [3.0.5] - 2025-07-17
9 |
10 | - #76 Support in HTML display rules as links, when help Uri provided
11 | - #82 `trend` and `csv` output csv files have `\r\n` line terminators when generated on Linux machine
12 | - #97 sarif blame crashes with UnicodeDecodeError when it encounters invalid UTF-8
13 |
14 | ## [3.0.4] - 2024-11-15
15 |
16 | - #73 Crash when using `--check`.
17 |
18 | ## [3.0.3] - 2024-09-30
19 |
20 | - #43 Support getting level from `ruleConfigurationOverrides` and `defaultConfiguration`.
21 | - #68 Fixed regression where reversing diff direction gave different results.
22 |
23 | ## [3.0.2] - 2024-09-18
24 |
25 | - #55 part 2: Add `executionSuccessful` to `copy` operation output for SARIF schema compliance.
26 |
27 | ## [3.0.1] - 2024-09-16
28 |
29 | ### Fixed
30 |
31 | - #58 Fixed regression that broke `sarif diff` command in v3.0.0.
32 |
33 | ## [3.0.0](releases/tag/v3.0.0) - 2024-09-10
34 |
35 | ### Breaking Changes
36 |
37 | - Changed Python API to use new IssueReport type for issue grouping and sorting:
38 | - `SarifFileSet` now has a `get_report()` method
39 | - `s.get_result_count_by_severity()` replaced by
40 | `s.get_report().get_issue_type_histogram_for_severity(severity)`
41 | - `s.get_result_count_by_severity()` replaced by
42 | `s.get_report().get_issue_count_for_severity(severity)`
43 | - `s.get_records_grouped_by_severity()` replaced by
44 | `s.get_report().get_issues_for_severity(severity)`
45 |
46 | ### Added
47 |
48 | - Support "none" severity level. It's only included in the output if present in the input.
49 |
50 | ### Fixed
51 |
52 | - #39 Truncate long summaries.
53 | - Made issue sorting and grouping more consistent across the various reports.
54 | - Multiple occurrences of a single issue are now sorted by location in the Word report.
55 | - Improved debug and version reporting for when multiple versions are installed.
56 | - For the copy operation, "invocation" in the resulting sarif is changed to an object to match the spec.
57 | - #53 Fix the `blame` command for `file:///` URL locations.
58 |
59 | ### Compatibility
60 |
61 | - Python 3.8+
62 |
63 | ## [2.0.0](releases/tag/v2.0.0) - 2022-11-07
64 |
65 | ### Breaking Changes
66 |
67 | - "Code" and "Description" are now separate columns in the CSV output, whereas before they were
68 | combined in the "Code" column. They are also separate keys in the "record" format if calling
69 | sarif-tools from Python.
70 | - `--blame-filter` argument has been replaced with `--filter`, using a new YAML-based format for
71 | more general filtering to replace the previous ad hoc text format which only supported blame.
72 | - There is a new `upgrade-filter` command to upgrade your old blame filter files to the new
73 | format.
74 | - Thanks to @abyss638 for contributing this enhancement!
75 |
76 | ### Added
77 |
78 | - New `codeclimate` command to generate output for GitLab use.
79 | - Thanks to @abyss638 for contributing this enhancement!
80 | - New `emacs` command to generate output for the popular Linux text editor.
81 | - Thanks to @dkloper for contributing this enhancement!
82 | - #14 Support recursive glob
83 | - Thanks to @bushelofsilicon for contributing this enhancement!
84 |
85 | ### Changed
86 |
87 | - When an input SARIF file contains blame information, the `csv` command output now has a column
88 | for `Author`.
89 | - #18 The `diff` command now prints up to three locations of new occurrences of issues (all are
90 | listed in the file output mode).
91 |
92 | ### Fixed
93 |
94 | - #4 and #19 docs improvements.
95 | - #12 allow zero locations for record.
96 | - #15 allow `text` to be absent in `message` object.
97 | - #20 allow UTF8 with BOM (`utf-8-sig`` encoding)
98 | - Thanks to @ManuelBerrueta for contributing this fix!
99 |
100 | ### Compatibility
101 |
102 | - Python 3.8+
103 |
104 | ## [1.0.0](releases/tag/v1.0.0) - 2022-05-09
105 |
106 | ### Changed
107 |
108 | - Development, build and release is now based on [python-poetry](https://python-poetry.org).
109 | - No change to functionality since v0.3.0.
110 |
111 | ### Compatibility
112 |
113 | - Python 3.8+
114 |
115 | ## [0.3.0](releases/tag/v0.3.0) - 2022-01-14
116 |
117 | ### Added
118 |
119 | - Support for globs in Windows, e.g. `sarif summary android*.sarif`
120 | - `info` and `copy` commands
121 |
122 | ### Compatibility
123 |
124 | - Python 3.8+
125 |
126 | ## [0.2.0](releases/tag/v0.2.0) - 2022-01-07
127 |
128 | ### Added
129 |
130 | - `--blame-filter` argument.
131 |
132 | ### Changed
133 |
134 | - Compatible with Python v3.8. Previously, Python v3.9 was required.
135 |
136 | ### Compatibility
137 |
138 | - Python 3.8+
139 |
140 | ## [0.1.0](releases/tag/v0.1.0) - 2021-11-11
141 |
142 | ### Added
143 |
144 | - Initial versions of commands `blame`, `csv`, `diff`, `html`, `ls`, `summary`, `trend`, `usage` and `word` created in Microsoft Global Hackathon 2021.
145 |
146 | ### Compatibility
147 |
148 | - Python 3.9+
149 |
--------------------------------------------------------------------------------
/sarif/operations/info_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif info` command.
3 | """
4 |
5 | import datetime
6 | import os
7 |
8 | from sarif.sarif_file import SarifFileSet
9 |
10 | _BYTES_PER_MIB = 1024 * 1024
11 | _BYTES_PER_KIB = 1024
12 |
13 |
14 | def _property_bag_stats(object_list):
15 | tally = {}
16 | universal_property_keys = []
17 | partial_properties = []
18 | if object_list:
19 | for obj in object_list:
20 | for key in obj.get("properties", {}):
21 | tally[key] = tally[key] + 1 if key in tally else 1
22 | object_count = len(object_list)
23 | universal_property_keys = [
24 | key for (key, count) in tally.items() if count == object_count
25 | ]
26 |
27 | def tally_rank(key_count_pair):
28 | # Sort by descending tally then alphabetically
29 | return (-key_count_pair[1], key_count_pair[0])
30 |
31 | partial_properties = [
32 | {"key": key, "count": count, "percent": 100 * count / object_count}
33 | for (key, count) in sorted(tally.items(), key=tally_rank)
34 | if count < object_count
35 | ]
36 | return universal_property_keys, partial_properties
37 |
38 |
39 | def _generate_info_to_file(sarif_files, file_out):
40 | file_count = False
41 | for input_file in sarif_files:
42 | file_count += 1
43 | file_path = input_file.get_abs_file_path()
44 | file_stat = os.stat(file_path)
45 | size_in_bytes = file_stat.st_size
46 | if size_in_bytes > _BYTES_PER_MIB:
47 | readable_size = f"{file_stat.st_size / _BYTES_PER_MIB:.1f} MiB"
48 | else:
49 | readable_size = (
50 | f"{(file_stat.st_size + _BYTES_PER_KIB - 1) // _BYTES_PER_KIB} KiB"
51 | )
52 | print(input_file.get_abs_file_path(), file=file_out)
53 | print(f" {file_stat.st_size} bytes ({readable_size})", file=file_out)
54 | print(
55 | f" modified: {datetime.datetime.fromtimestamp(file_stat.st_mtime)}, "
56 | f"accessed: {datetime.datetime.fromtimestamp(file_stat.st_atime)}, "
57 | f"ctime: {datetime.datetime.fromtimestamp(file_stat.st_ctime)}",
58 | file=file_out,
59 | )
60 | run_count = len(input_file.runs)
61 | print(f" {run_count} runs" if run_count != 1 else " 1 run", file=file_out)
62 | for run_index, run in enumerate(input_file.runs):
63 | if run_count != 1:
64 | print(f" Run #{run_index + 1}:", file=file_out)
65 | print(f" Tool: {run.get_tool_name()}", file=file_out)
66 | conversion_tool = run.get_conversion_tool_name()
67 | if conversion_tool:
68 | print(f" Conversion tool: {conversion_tool}", file=file_out)
69 | results = run.get_results()
70 | result_count = len(results)
71 | print(
72 | f" {result_count} results" if result_count != 1 else " 1 result",
73 | file=file_out,
74 | )
75 | universal_property_keys, partial_properties = _property_bag_stats(results)
76 | ppk_string = (
77 | ", ".join(
78 | "{} {}/{} ({:.1f} %)".format(
79 | p["key"], p["count"], result_count, p["percent"]
80 | )
81 | for p in partial_properties
82 | )
83 | if partial_properties
84 | else None
85 | )
86 | if universal_property_keys:
87 | upk_string = ", ".join(universal_property_keys)
88 | if partial_properties:
89 | print(
90 | f" Result properties: all results have properties: {upk_string}; "
91 | f"some results have properties: {ppk_string}",
92 | file=file_out,
93 | )
94 | else:
95 | print(
96 | f" All results have properties: {upk_string}",
97 | file=file_out,
98 | )
99 | elif partial_properties:
100 | print(
101 | f" Result properties: {ppk_string}",
102 | file=file_out,
103 | )
104 | print(file=file_out)
105 | return file_count
106 |
107 |
108 | def generate_info(sarif_files: SarifFileSet, output: str):
109 | """
110 | Print structure information about the provided `sarif_files`.
111 | """
112 | if output:
113 | with open(output, "w", encoding="utf-8") as file_out:
114 | file_count = _generate_info_to_file(sarif_files, file_out)
115 | if file_count:
116 | files_string = (
117 | "1 SARIF file" if file_count == 1 else f"{file_count} SARIF files"
118 | )
119 | print("Wrote information about", files_string, "to", output)
120 | else:
121 | file_count = _generate_info_to_file(sarif_files, None)
122 | if file_count == 0:
123 | print(
124 | "No SARIF files found. Try passing a path of a SARIF file or containing SARIF files."
125 | )
126 |
--------------------------------------------------------------------------------
/sarif/operations/html_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif html` command.
3 | """
4 |
5 | import base64
6 | from datetime import datetime
7 | import os
8 | from typing import Union
9 |
10 | from jinja2 import Environment, FileSystemLoader, select_autoescape
11 |
12 | from sarif import charts, sarif_file
13 |
14 | _THIS_MODULE_PATH = os.path.dirname(__file__)
15 |
16 | _TEMPLATES_PATH = os.path.join(_THIS_MODULE_PATH, "templates")
17 |
18 | _ENV = Environment(
19 | loader=FileSystemLoader(searchpath=_TEMPLATES_PATH),
20 | autoescape=select_autoescape(),
21 | )
22 |
23 |
24 | def generate_html(
25 | input_files: sarif_file.SarifFileSet,
26 | image_file: Union[str, None],
27 | output: str,
28 | output_multiple_files: bool,
29 | date_val: datetime = datetime.now(),
30 | ):
31 | """
32 | Generate HTML file from the input files.
33 | """
34 | if image_file:
35 | image_mime_type = "image/" + os.path.splitext(image_file)[-1]
36 | if image_mime_type == "image/jpg":
37 | image_mime_type = "image/jpeg"
38 | with open(image_file, "rb") as input_file:
39 | image_data = input_file.read()
40 |
41 | image_data_base64 = base64.b64encode(image_data).decode("utf-8")
42 | else:
43 | image_mime_type = None
44 | image_data_base64 = None
45 |
46 | output_file = output
47 | if output_multiple_files:
48 | for input_file in input_files:
49 | output_file_name = input_file.get_file_name_without_extension() + ".html"
50 | print(
51 | "Writing HTML report for",
52 | input_file.get_file_name(),
53 | "to",
54 | output_file_name,
55 | )
56 | _generate_single_html(
57 | input_file,
58 | os.path.join(output, output_file_name),
59 | date_val,
60 | image_mime_type,
61 | image_data_base64,
62 | )
63 | output_file = os.path.join(output, "static_analysis_output.html")
64 | source_description = input_files.get_description()
65 | print(
66 | "Writing HTML report for",
67 | source_description,
68 | "to",
69 | os.path.basename(output_file),
70 | )
71 | _generate_single_html(
72 | input_files, output_file, date_val, image_mime_type, image_data_base64
73 | )
74 |
75 |
76 | def _generate_single_html(
77 | input_file, output_file, date_val, image_mime_type, image_data_base64
78 | ):
79 | all_tools = input_file.get_distinct_tool_names()
80 | report = input_file.get_report()
81 |
82 | total_distinct_issue_codes = 0
83 | problems = []
84 | severities = report.get_severities()
85 |
86 | for severity in severities:
87 | distinct_issue_codes = report.get_issue_type_count_for_severity(severity)
88 |
89 | total_distinct_issue_codes += distinct_issue_codes
90 |
91 | severity_details = _enrich_details(
92 | report.get_issues_grouped_by_type_for_severity(severity), input_file
93 | )
94 |
95 | severity_section = {
96 | "type": severity,
97 | "count": distinct_issue_codes,
98 | "details": severity_details,
99 | }
100 |
101 | problems.append(severity_section)
102 |
103 | chart_data = charts.generate_severity_pie_chart(report, output_file=None)
104 | if chart_data:
105 | chart_image_data_base64 = base64.b64encode(chart_data).decode("utf-8")
106 | else:
107 | chart_image_data_base64 = None
108 |
109 | filtered = None
110 | filter_stats = input_file.get_filter_stats()
111 | if filter_stats:
112 | filtered = f"Results were filtered by {filter_stats}."
113 |
114 | template = _ENV.get_template("sarif_summary.html")
115 | html_content = template.render(
116 | report_type=", ".join(all_tools),
117 | report_date=date_val,
118 | severities=", ".join(severities),
119 | total=total_distinct_issue_codes,
120 | problems=problems,
121 | image_mime_type=image_mime_type,
122 | image_data_base64=image_data_base64,
123 | chart_image_data_base64=chart_image_data_base64,
124 | filtered=filtered,
125 | )
126 |
127 | with open(output_file, "wt", encoding="utf-8") as file_out:
128 | file_out.write(html_content)
129 |
130 |
131 | def _extract_help_links_from_rules(rules, link_to_desc, key):
132 | for rule in rules:
133 | if "helpUri" in rule:
134 | uri = rule["helpUri"]
135 | if uri not in link_to_desc:
136 | desc = rule.get("fullDescription", {}).get("text")
137 | if not desc:
138 | desc = rule.get("name")
139 | if not desc:
140 | desc = key
141 | link_to_desc[uri] = desc
142 |
143 |
144 | def _enrich_details(records_of_severity, input_file):
145 | ret = []
146 |
147 | for key, records in records_of_severity.items():
148 | link_to_desc = {}
149 | for record in records:
150 | rule_id = record["Code"]
151 | rules = input_file.get_rules_by_id(rule_id)
152 | _extract_help_links_from_rules(rules, link_to_desc, key)
153 | links = [(desc, uri) for (uri, desc) in link_to_desc.items()]
154 | ret.append(
155 | {"code": key, "count": len(records), "links": links, "details": records}
156 | )
157 | return ret
158 |
--------------------------------------------------------------------------------
/tests/ops/html/test_html.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | import tempfile
4 |
5 | from sarif.operations import html_op
6 | from sarif import sarif_file
7 |
8 | INPUT_SARIF = {
9 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
10 | "version": "2.1.0",
11 | "runs": [
12 | {
13 | "tool": {
14 | "driver": {
15 | "name": "unit test",
16 | "rules": [
17 | {
18 | "id": "CA2101",
19 | "name": "Specify for P/Invoke string arguments",
20 | "helpUri": "https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2101",
21 | }
22 | ],
23 | }
24 | },
25 | "results": [
26 | {
27 | "ruleId": "CA2101",
28 | "level": "error",
29 | "locations": [
30 | {
31 | "physicalLocation": {
32 | "artifactLocation": {
33 | "uri": "file:///C:/Code/main.c",
34 | "index": 0,
35 | },
36 | "region": {"startLine": 24, "startColumn": 9},
37 | }
38 | }
39 | ],
40 | }
41 | ],
42 | }
43 | ],
44 | }
45 |
46 |
47 | EXPECTED_OUTPUT_TXT = """
48 |
49 |
123 |
124 |
125 | Sarif Summary: unit test
126 | Document generated on:
127 | Total number of distinct issues of all severities (error, warning, note): 1
128 |
129 | Severity : error [ 1 ]
130 |
131 | -
132 |
133 |
139 |
140 |
141 |
142 |
143 | Severity : warning [ 0 ]
144 |
147 |
148 | Severity : note [ 0 ]
149 |
152 | """
168 |
169 |
170 | def test_html():
171 | mtime = datetime.datetime.now()
172 | input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
173 |
174 | input_sarif_file_set = sarif_file.SarifFileSet()
175 | input_sarif_file_set.files.append(input_sarif_file)
176 |
177 | with tempfile.TemporaryDirectory() as tmp:
178 | file_path = os.path.join(tmp, "output.html")
179 | html_op.generate_html(
180 | input_sarif_file_set,
181 | None,
182 | file_path,
183 | output_multiple_files=False,
184 | date_val=mtime,
185 | )
186 |
187 | with open(file_path, "rb") as f_in:
188 | output = f_in.read().decode()
189 |
190 | # Remove pie chart before diffing
191 | pie_chart_start = output.find("
", pie_chart_start) + 2
193 | output = output[:pie_chart_start] + output[pie_chart_end:]
194 |
195 | # Check the output line-by-line, ignoring whitespace around and between lines.
196 | output_split = output.splitlines()
197 | for check_line in EXPECTED_OUTPUT_TXT.replace(
198 | "", mtime.strftime("%Y-%m-%d %H:%M:%S.%f")
199 | ).splitlines():
200 | expected = check_line.strip()
201 | if not expected:
202 | continue
203 | actual = ""
204 | while output_split:
205 | actual = output_split.pop(0).strip()
206 | if actual:
207 | break
208 | assert actual == expected
209 |
--------------------------------------------------------------------------------
/sarif/issues_report.py:
--------------------------------------------------------------------------------
1 | """
2 | A report derived from a SARIF file or group of SARIF files.
3 |
4 | The issues are grouped by severity, then by key (which is either issue code + truncated
5 | description or just issue code if the issues have distinct descriptions), then listed in location
6 | order.
7 | """
8 |
9 | from typing import Dict, List
10 |
11 | from sarif.sarif_file_utils import (
12 | combine_code_and_description,
13 | combine_record_code_and_description,
14 | record_sort_key,
15 | SARIF_SEVERITIES_WITHOUT_NONE,
16 | SARIF_SEVERITIES_WITH_NONE,
17 | )
18 |
19 |
20 | class IssuesReport:
21 | """
22 | This class imposes a hierarchical structure on a list of records which is helpful
23 | for presenting reader-friendly summaries.
24 | """
25 |
26 | def __init__(self):
27 | self._sev_to_records = {sev: [] for sev in SARIF_SEVERITIES_WITH_NONE}
28 | self._sev_to_sorted_keys = None
29 | self._records_have_been_sorted = False
30 |
31 | def add_record(self, record: dict):
32 | """Append record to list for severity - no sorting."""
33 | self._sev_to_records.setdefault(record["Severity"], []).append(record)
34 | if self._records_have_been_sorted:
35 | self._sev_to_sorted_keys = None
36 | self._records_have_been_sorted = False
37 |
38 | def _group_records_by_key(self):
39 | self._sev_to_sorted_keys = {}
40 | code_to_key_and_count = {}
41 | for severity, issues in self._sev_to_records.items():
42 | code_to_key_and_count.clear()
43 | for record in issues:
44 | code = record["Code"]
45 | key = combine_record_code_and_description(record)
46 | key_and_count = code_to_key_and_count.get(code)
47 | if key_and_count is None:
48 | code_to_key_and_count[code] = {
49 | "key": key,
50 | "common_desc": record["Description"],
51 | "count": 1,
52 | }
53 | else:
54 | key_and_count["count"] += 1
55 | common_desc_stem = key_and_count["common_desc"]
56 | desc = record["Description"]
57 | if not desc.startswith(common_desc_stem):
58 | for char_pos, (char1, char2) in enumerate(
59 | zip(common_desc_stem, desc)
60 | ):
61 | if char1 != char2:
62 | new_desc_stem = common_desc_stem[0:char_pos]
63 | key_and_count["common_desc"] = new_desc_stem
64 | key_and_count["key"] = combine_code_and_description(
65 | code, new_desc_stem + " ..."
66 | )
67 | break
68 | sorted_codes = sorted(
69 | code_to_key_and_count.keys(),
70 | key=lambda code: code_to_key_and_count[code]["count"],
71 | reverse=True,
72 | )
73 | self._sev_to_sorted_keys[severity] = {
74 | code_to_key_and_count[code]["key"]: [] for code in sorted_codes
75 | }
76 | for record in issues:
77 | # Not sorting the issues by location at this point
78 | code = record["Code"]
79 | self._sev_to_sorted_keys[severity][
80 | code_to_key_and_count[code]["key"]
81 | ].append(record)
82 |
83 | def _sort_record_lists(self):
84 | if self._sev_to_sorted_keys is None:
85 | self._group_records_by_key()
86 | for key_to_records in self._sev_to_sorted_keys.values():
87 | for records in key_to_records.values():
88 | records.sort(key=record_sort_key)
89 | self._records_have_been_sorted = True
90 |
91 | def get_issue_count_for_severity(self, severity: str) -> int:
92 | """Get the number of individual records at this severity level."""
93 | return len(self._sev_to_records.get(severity, []))
94 |
95 | def get_issue_type_count_for_severity(self, severity: str) -> int:
96 | """Get the number of distinct issue types at this severity level."""
97 | if self._sev_to_sorted_keys is None:
98 | self._group_records_by_key()
99 | return len(self._sev_to_sorted_keys.get(severity, []))
100 |
101 | def any_none_severities(self) -> bool:
102 | """Are there any records with severity level "none"?"""
103 | return bool(self._sev_to_records.get("none", {}))
104 |
105 | def get_severities(self) -> List[str]:
106 | """
107 | Get the list of relevant severity levels for these records.
108 |
109 | The returned list always includes "error", "warning" and "note", the standard SARIF severity
110 | levels for code issues. The unusual severity level "none" is only included at the end if
111 | there are any records with severity "none".
112 | """
113 | return (
114 | SARIF_SEVERITIES_WITH_NONE
115 | if self.any_none_severities()
116 | else SARIF_SEVERITIES_WITHOUT_NONE
117 | )
118 |
119 | def get_issues_grouped_by_type_for_severity(
120 | self, severity: str
121 | ) -> Dict[str, List[dict]]:
122 | """
123 | Get a dict from issue type key to list of matching records at this severity level.
124 |
125 | Issue type keys are derived from the issue code and (common prefix of) description.
126 | """
127 | if not self._records_have_been_sorted:
128 | self._sort_record_lists()
129 | return self._sev_to_sorted_keys.get(severity, {})
130 |
131 | def get_issue_type_histogram_for_severity(self, severity: str) -> Dict[str, int]:
132 | """
133 | Get a dict from issue type key to number of matching records at this severity level.
134 |
135 | This is the same as `{k: len(v) for k, v in d.items()}` where
136 | `d = report.get_issues_grouped_by_type_for_severity(severity)`.
137 | """
138 | if self._sev_to_sorted_keys is None:
139 | self._group_records_by_key()
140 | return {
141 | key: len(records)
142 | for key, records in self.get_issues_grouped_by_type_for_severity(
143 | severity
144 | ).items()
145 | }
146 |
147 | def get_issues_for_severity(self, severity: str) -> List[dict]:
148 | """
149 | Get a flat list of the issues at this severity.
150 |
151 | The sorting is consistent with `get_issues_grouped_by_type`, but the issues are not grouped
152 | by type.
153 | """
154 | type_to_issues = self.get_issues_grouped_by_type_for_severity(severity)
155 | ret = []
156 | for issues_for_type in type_to_issues.values():
157 | ret.extend(issues_for_type)
158 | return ret
159 |
--------------------------------------------------------------------------------
/tests/ops/diff/test_diff_issues_reordered.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import os
4 | import tempfile
5 |
6 | from sarif.operations import diff_op
7 | from sarif import sarif_file
8 |
9 | SARIF = {
10 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 | "version": "2.1.0",
12 | "runs": [
13 | {
14 | "tool": {"driver": {"name": "unit test"}},
15 | "results": [
16 | {
17 | "ruleId": "core.NullDereference",
18 | "ruleIndex": 2,
19 | "message": {
20 | "text": "Access to field 'type' results in a dereference of a null pointer (loaded from variable 'json')"
21 | },
22 | "locations": [
23 | {
24 | "physicalLocation": {
25 | "artifactLocation": {
26 | "uri": "file:///C:/Code/main.c",
27 | "index": 0,
28 | },
29 | "region": {"startLine": 24, "startColumn": 9},
30 | }
31 | }
32 | ],
33 | },
34 | {
35 | "ruleId": "core.NullDereference",
36 | "ruleIndex": 2,
37 | "message": {
38 | "text": "Dereference of null pointer (loaded from variable 's')"
39 | },
40 | "locations": [
41 | {
42 | "physicalLocation": {
43 | "artifactLocation": {
44 | "uri": "file:///C:/Code/main.c",
45 | "index": 0,
46 | },
47 | "region": {"startLine": 24, "startColumn": 9},
48 | }
49 | }
50 | ],
51 | },
52 | {
53 | "ruleId": "core.NullDereference",
54 | "ruleIndex": 2,
55 | "message": {
56 | "text": "Access to field 'other' results in a dereference of a null pointer (loaded from variable 'json')"
57 | },
58 | "locations": [
59 | {
60 | "physicalLocation": {
61 | "artifactLocation": {
62 | "uri": "file:///C:/Code/main.c",
63 | "index": 0,
64 | },
65 | "region": {"startLine": 24, "startColumn": 9},
66 | }
67 | }
68 | ],
69 | },
70 | ],
71 | }
72 | ],
73 | }
74 |
75 | SARIF_WITH_ISSUES_REORDERED = {
76 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
77 | "version": "2.1.0",
78 | "runs": [
79 | {
80 | "tool": {"driver": {"name": "unit test"}},
81 | "results": [
82 | {
83 | "ruleId": "core.NullDereference",
84 | "ruleIndex": 2,
85 | "message": {
86 | "text": "Access to field 'type' results in a dereference of a null pointer (loaded from variable 'json')"
87 | },
88 | "locations": [
89 | {
90 | "physicalLocation": {
91 | "artifactLocation": {
92 | "uri": "file:///C:/Code/main.c",
93 | "index": 0,
94 | },
95 | "region": {"startLine": 24, "startColumn": 9},
96 | }
97 | }
98 | ],
99 | },
100 | {
101 | "ruleId": "core.NullDereference",
102 | "ruleIndex": 2,
103 | "message": {
104 | "text": "Access to field 'other' results in a dereference of a null pointer (loaded from variable 'json')"
105 | },
106 | "locations": [
107 | {
108 | "physicalLocation": {
109 | "artifactLocation": {
110 | "uri": "file:///C:/Code/main.c",
111 | "index": 0,
112 | },
113 | "region": {"startLine": 24, "startColumn": 9},
114 | }
115 | }
116 | ],
117 | },
118 | {
119 | "ruleId": "core.NullDereference",
120 | "ruleIndex": 2,
121 | "message": {
122 | "text": "Dereference of null pointer (loaded from variable 's')"
123 | },
124 | "locations": [
125 | {
126 | "physicalLocation": {
127 | "artifactLocation": {
128 | "uri": "file:///C:/Code/main.c",
129 | "index": 0,
130 | },
131 | "region": {"startLine": 24, "startColumn": 9},
132 | }
133 | }
134 | ],
135 | },
136 | ],
137 | }
138 | ],
139 | }
140 |
141 |
142 | def test_diff_issues_reordered():
143 | mtime = datetime.datetime.now()
144 | sarif = sarif_file.SarifFile("SARIF", SARIF, mtime=mtime)
145 | sarif_reordered = sarif_file.SarifFile(
146 | "SARIF_WITH_ISSUES_REORDERED", SARIF_WITH_ISSUES_REORDERED, mtime=mtime
147 | )
148 | verify_no_diffs(sarif, sarif_reordered)
149 | verify_no_diffs(sarif_reordered, sarif)
150 |
151 |
152 | def verify_no_diffs(old_sarif: sarif_file.SarifFile, new_sarif: sarif_file.SarifFile):
153 | with tempfile.TemporaryDirectory() as tmp:
154 | file_path = os.path.join(tmp, "diff.json")
155 | result = diff_op.print_diff(
156 | old_sarif, new_sarif, file_path, check_level="warning"
157 | )
158 | with open(file_path, "rb") as f_in:
159 | diff_dict = json.load(f_in)
160 | assert result == 0
161 | assert diff_dict == {
162 | "all": {"+": 0, "-": 0},
163 | "error": {"+": 0, "-": 0, "codes": {}},
164 | "warning": {"+": 0, "-": 0, "codes": {}},
165 | "note": {"+": 0, "-": 0, "codes": {}},
166 | }
167 |
--------------------------------------------------------------------------------
/sarif/operations/blame_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif blame` command.
3 | """
4 |
5 | import json
6 | import os
7 | import subprocess
8 | import sys
9 | from typing import Callable, Iterable, List, Union
10 | import urllib.parse
11 | import urllib.request
12 |
13 | from sarif.sarif_file import SarifFileSet
14 |
15 |
16 | def _run_git_blame(repo_path: str, file_path: str) -> List[bytes]:
17 | cmd = ["git", "blame", "--porcelain", _make_path_git_compatible(file_path)]
18 | with subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=repo_path) as proc:
19 | result = []
20 | if proc.stdout:
21 | result = [x for x in proc.stdout.readlines()]
22 |
23 | # Ensure process terminates
24 | proc.communicate()
25 | if proc.returncode:
26 | cmd_str = " ".join(cmd)
27 | sys.stderr.write(
28 | f"WARNING: Command `{cmd_str} "
29 | f"failed with exit code {proc.returncode} in {repo_path}\n"
30 | )
31 |
32 | return result
33 |
34 |
35 | def enhance_with_blame(
36 | input_files: SarifFileSet,
37 | repo_path: str,
38 | output: str,
39 | output_multiple_files: bool,
40 | run_git_blame: Callable[[str, str], List[bytes]] = _run_git_blame,
41 | ):
42 | """
43 | Enhance SARIF files with information from `git blame`. The `git` command is run in the current
44 | directory, which must be a git repository containing the files at the paths specified in the
45 | input files. Updated files are written to output_path if specified, otherwise to the current
46 | directory.
47 | """
48 | if not input_files:
49 | return
50 | if not os.path.isdir(repo_path):
51 | raise ValueError(f"No git repository directory found at {repo_path}")
52 |
53 | _enhance_with_blame(input_files, repo_path, run_git_blame)
54 |
55 | for input_file in input_files:
56 | input_file_name = input_file.get_file_name()
57 | if any(
58 | "blame" in result.get("properties", {})
59 | for result in input_file.get_results()
60 | ):
61 | output_file = output
62 | if output_multiple_files:
63 | output_filename = (
64 | input_file.get_file_name_without_extension()
65 | + "_with_blame."
66 | + input_file.get_file_name_extension()
67 | )
68 | output_file = os.path.join(output, output_filename)
69 | print(
70 | "Writing",
71 | output_file,
72 | "combining original SARIF from",
73 | input_file_name,
74 | "with git blame information",
75 | )
76 | with open(output_file, "w", encoding="utf-8") as file_out:
77 | json.dump(input_file.data, file_out)
78 | else:
79 | sys.stderr.write(
80 | f"WARNING: did not find any git blame information for {input_file_name}\n"
81 | )
82 |
83 |
84 | def _enhance_with_blame(
85 | input_files: SarifFileSet,
86 | repo_path: str,
87 | run_git_blame: Callable[[str, str], List[bytes]],
88 | ):
89 | """
90 | Run `git blame --porcelain` for each file path listed in input_files.
91 | Then enhance the results in error_list by adding a "blame" property including "hash", "author"
92 | and "timestamp".
93 | Porcelain format is used for parseability and stability. See documentation at
94 | https://git-scm.com/docs/git-blame#_the_porcelain_format.
95 | """
96 | files_to_blame = set(item["Location"] for item in input_files.get_records())
97 | file_count = len(files_to_blame)
98 | print(
99 | "Running `git blame --porcelain` on",
100 | "one file" if file_count == 1 else f"{file_count} files",
101 | "in",
102 | repo_path,
103 | )
104 | file_blame_info = _run_git_blame_on_files(files_to_blame, repo_path, run_git_blame)
105 |
106 | # Now join up blame output with result list
107 | blame_info_count = 0
108 | item_count = 0
109 | for result, record in zip(input_files.get_results(), input_files.get_records()):
110 | item_count += 1
111 | file_path = record["Location"]
112 | if file_path in file_blame_info:
113 | blame_info = file_blame_info[file_path]
114 | # raw_line can be None if no line number information was included in the SARIF result.
115 | raw_line = record["Line"]
116 | if raw_line:
117 | line_no = str(raw_line)
118 | if line_no in blame_info["line_to_commit"]:
119 | commit_hash = blame_info["line_to_commit"][line_no]
120 | commit = blame_info["commits"][commit_hash]
121 | # Add commit hash to the blame information
122 | commit_with_hash = {"commit": commit_hash, **commit}
123 | # Add blame information to the SARIF Property Bag of the result
124 | result.setdefault("properties", {})["blame"] = commit_with_hash
125 | blame_info_count += 1
126 | print(f"Found blame information for {blame_info_count} of {item_count} results")
127 |
128 |
129 | def _make_path_git_compatible(file_path):
130 | try:
131 | path_as_url = urllib.parse.urlparse(file_path)
132 | if path_as_url.scheme == "file":
133 | return urllib.request.url2pathname(path_as_url.path)
134 | return file_path
135 | except ValueError:
136 | return file_path
137 |
138 |
139 | def _run_git_blame_on_files(
140 | files_to_blame: Iterable[str],
141 | repo_path: str,
142 | run_git_blame: Callable[[str, str], List[bytes]],
143 | ):
144 | file_blame_info = {}
145 | for file_path in files_to_blame:
146 | git_blame_output = run_git_blame(repo_path, file_path)
147 | blame_info = {"commits": {}, "line_to_commit": {}}
148 | file_blame_info[file_path] = blame_info
149 | commit_hash: Union[str, None] = None
150 |
151 | for line_bytes in git_blame_output:
152 | # Convert byte sequence to string and remove trailing LF
153 | line_string = line_bytes.decode("utf-8", errors="replace")[:-1]
154 | # Now parse output from git blame --porcelain
155 | if commit_hash:
156 | if line_string.startswith("\t"):
157 | commit_hash = None
158 | # Ignore line contents = source code
159 | elif " " in line_string:
160 | space_pos = line_string.index(" ")
161 | key = line_string[0:space_pos]
162 | value = line_string[space_pos + 1 :].strip()
163 | blame_info["commits"][commit_hash][key] = value
164 | else:
165 | # e.g. "boundary"
166 | key = line_string
167 | blame_info["commits"][commit_hash][key] = True
168 | else:
169 | commit_line_info = line_string.split(" ")
170 | commit_hash = commit_line_info[0]
171 | commit_line = commit_line_info[2]
172 | blame_info["commits"].setdefault(commit_hash, {})
173 | blame_info["line_to_commit"][commit_line] = commit_hash
174 |
175 | return file_blame_info
176 |
--------------------------------------------------------------------------------
/sarif/operations/diff_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for `sarif diff` command.
3 | """
4 |
5 | import json
6 | import sys
7 | from typing import Dict
8 |
9 | from sarif import sarif_file
10 |
11 |
12 | def _occurrences(occurrence_count):
13 | return (
14 | "1 occurrence" if occurrence_count == 1 else f"{occurrence_count} occurrences"
15 | )
16 |
17 |
18 | def _signed_change(difference):
19 | return str(difference) if difference < 0 else f"+{difference}"
20 |
21 |
22 | def _record_to_location_tuple(record) -> str:
23 | return (record["Location"], record["Line"])
24 |
25 |
26 | def print_diff(
27 | old_sarif: sarif_file.SarifFileSet,
28 | new_sarif: sarif_file.SarifFileSet,
29 | output,
30 | check_level=None,
31 | ) -> int:
32 | """
33 | Generate a diff of the issues from the SARIF files and write it to stdout
34 | or a file if specified.
35 | :param old_sarif: corresponds to the old files.
36 | :param new_sarif: corresponds to the new files.
37 | :return: number of increased severities, or 0 if nothing has worsened.
38 | """
39 | diff = _calc_diff(old_sarif, new_sarif)
40 | if output:
41 | print("writing diff to", output)
42 | with open(output, "w", encoding="utf-8") as output_file:
43 | json.dump(diff, output_file, indent=4)
44 | else:
45 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
46 | if severity not in diff:
47 | continue
48 | if diff[severity]["codes"]:
49 | print(
50 | severity,
51 | "level:",
52 | _signed_change(diff[severity]["+"]),
53 | _signed_change(-diff[severity]["-"]),
54 | )
55 | for issue_key, code_info in diff[severity]["codes"].items():
56 | (old_count, new_count, new_locations) = (
57 | code_info["<"],
58 | code_info[">"],
59 | code_info.get("+@", []),
60 | )
61 | if old_count == 0:
62 | print(f' New issue "{issue_key}" ({_occurrences(new_count)})')
63 | elif new_count == 0:
64 | print(f' Eliminated issue "{issue_key}"')
65 | else:
66 | print(
67 | f" Number of occurrences {old_count} -> {new_count}",
68 | f'({_signed_change(new_count - old_count)}) for issue "{issue_key}"',
69 | )
70 | if new_locations:
71 | # Print the top 3 new locations
72 | for record in new_locations[0:3]:
73 | (location, line) = _record_to_location_tuple(record)
74 | print(f" {location}:{line}")
75 | if len(new_locations) > 3:
76 | print(" ...")
77 | else:
78 | print(severity, "level: +0 -0 no changes")
79 | print(
80 | "all levels:",
81 | _signed_change(diff["all"]["+"]),
82 | _signed_change(-diff["all"]["-"]),
83 | )
84 | filter_stats = old_sarif.get_filter_stats()
85 | if filter_stats:
86 | print(f" 'Before' results were filtered by {filter_stats}")
87 | filter_stats = new_sarif.get_filter_stats()
88 | if filter_stats:
89 | print(f" 'After' results were filtered by {filter_stats}")
90 | ret = 0
91 | if check_level:
92 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
93 | ret += diff.get(severity, {}).get("+", 0)
94 | if severity == check_level:
95 | break
96 | if ret > 0:
97 | sys.stderr.write(
98 | f"Check: exiting with return code {ret} due to increase in issues at or above {check_level} severity\n"
99 | )
100 | return ret
101 |
102 |
103 | def _find_new_occurrences(new_records, old_records):
104 | # Note: this is O(n²) complexity where n is the number of occurrences of this issue type,
105 | # so could be slow when there are a large number of occurrences.
106 | old_occurrences = old_records
107 | new_occurrences_new_locations = []
108 | new_occurrences_new_lines = []
109 | for new_record in new_records:
110 | (new_location, new_line) = (True, True)
111 | for old_record in old_occurrences:
112 | if old_record["Location"] == new_record["Location"]:
113 | new_location = False
114 | if old_record["Line"] == new_record["Line"]:
115 | new_line = False
116 | break
117 | if new_location:
118 | if new_record not in new_occurrences_new_locations:
119 | new_occurrences_new_locations.append(new_record)
120 | elif new_line:
121 | if new_record not in new_occurrences_new_lines:
122 | new_occurrences_new_lines.append(new_record)
123 |
124 | return sorted(
125 | new_occurrences_new_locations, key=_record_to_location_tuple
126 | ) + sorted(new_occurrences_new_lines, key=_record_to_location_tuple)
127 |
128 |
129 | def _calc_diff(
130 | old_sarif: sarif_file.SarifFileSet, new_sarif: sarif_file.SarifFileSet
131 | ) -> Dict:
132 | """
133 | Generate a diff of the issues from the SARIF files.
134 | old_sarif corresponds to the old files.
135 | new_sarif corresponds to the new files.
136 | Return dict has keys "error", "warning", "note", "none" (if present) and "all".
137 | """
138 | ret = {"all": {"+": 0, "-": 0}}
139 | old_report = old_sarif.get_report()
140 | new_report = new_sarif.get_report()
141 | # Include `none` in the list of severities if there are any `none` records in either the old
142 | # or new report.
143 | severities = (
144 | old_report.get_severities()
145 | if old_report.any_none_severities()
146 | else new_report.get_severities()
147 | )
148 | for severity in severities:
149 | old_histogram = old_report.get_issue_type_histogram_for_severity(severity)
150 | new_histogram = new_report.get_issue_type_histogram_for_severity(severity)
151 | ret[severity] = {"+": 0, "-": 0, "codes": {}}
152 | if old_histogram != new_histogram:
153 | for issue_key, count in new_histogram.items():
154 | old_count = old_histogram.pop(issue_key, 0)
155 | if old_count != count:
156 | ret[severity]["codes"][issue_key] = {"<": old_count, ">": count}
157 | if old_count == 0:
158 | ret[severity]["+"] += 1
159 | new_occurrences = _find_new_occurrences(
160 | new_report.get_issues_grouped_by_type_for_severity(
161 | severity
162 | ).get(issue_key, []),
163 | old_report.get_issues_grouped_by_type_for_severity(
164 | severity
165 | ).get(issue_key, []),
166 | )
167 | if new_occurrences:
168 | ret[severity]["codes"][issue_key]["+@"] = [
169 | {"Location": r["Location"], "Line": r["Line"]}
170 | for r in new_occurrences
171 | ]
172 | for issue_key, old_count in old_histogram.items():
173 | ret[severity]["codes"][issue_key] = {"<": old_count, ">": 0}
174 | ret[severity]["-"] += 1
175 | ret["all"]["+"] += ret[severity]["+"]
176 | ret["all"]["-"] += ret[severity]["-"]
177 | return ret
178 |
--------------------------------------------------------------------------------
/sarif/operations/word_op.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate summary of given JSON or given JSON Document in to Microsoft Word Documents.
3 | This functionality uses a python module called
4 |
5 | python-docx - a Python library for creating and updating Microsoft Word (.docx) files.
6 |
7 | https://python-docx.readthedocs.io/
8 |
9 | """
10 |
11 | from datetime import datetime
12 | import os
13 | from typing import Union
14 |
15 | import docx
16 | from docx import oxml
17 | from docx import shared
18 | from docx.enum import text
19 | from docx.oxml import ns
20 |
21 | from sarif import charts, sarif_file
22 | from sarif.sarif_file_utils import combine_record_code_and_description
23 |
24 |
25 | def generate_word_docs_from_sarif_inputs(
26 | input_files: sarif_file.SarifFileSet,
27 | image_file: Union[str, None],
28 | output: str,
29 | output_multiple_files: bool,
30 | date_val: datetime = datetime.now(),
31 | ):
32 | """
33 | Convert SARIF input to Word file output.
34 | """
35 | if not input_files:
36 | raise ValueError("No input files specified!")
37 |
38 | output_file = output
39 | output_file_name = output
40 | if output_multiple_files:
41 | for input_file in input_files:
42 | output_file_name = input_file.get_file_name_without_extension() + ".docx"
43 | print(
44 | "Writing Word summary of",
45 | input_file.get_file_name(),
46 | "to",
47 | output_file_name,
48 | )
49 | report = input_file.get_report()
50 | _generate_word_summary(
51 | input_file,
52 | report,
53 | os.path.join(output, output_file_name),
54 | image_file,
55 | date_val,
56 | )
57 | output_file_name = "static_analysis_output.docx"
58 | output_file = os.path.join(output, output_file_name)
59 |
60 | source_description = input_files.get_description()
61 | print("Writing Word summary of", source_description, "to", output_file_name)
62 | report = input_files.get_report()
63 | _generate_word_summary(input_files, report, output_file, image_file, date_val)
64 |
65 |
66 | def _generate_word_summary(
67 | sarif_data, report, output_file, image_file: Union[str, None], date_val: datetime
68 | ):
69 | # Create a new document
70 | document = docx.Document()
71 |
72 | severities = report.get_severities()
73 | _add_heading_and_highlevel_info(
74 | document, sarif_data, report, severities, output_file, image_file, date_val
75 | )
76 | _dump_errors_summary_by_sev(document, report, severities)
77 | _dump_each_error_in_detail(document, report, severities)
78 |
79 | # finally, save the document.
80 | document.save(output_file)
81 |
82 |
83 | def _add_heading_and_highlevel_info(
84 | document,
85 | sarif_data,
86 | report,
87 | severities,
88 | output_file,
89 | image_path: Union[str, None],
90 | date_val: datetime,
91 | ):
92 | tool_name = ", ".join(sarif_data.get_distinct_tool_names())
93 | heading = f"Sarif Summary: {tool_name}"
94 |
95 | if image_path:
96 | document.add_picture(image_path)
97 | last_paragraph = document.paragraphs[-1]
98 | last_paragraph.alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER
99 |
100 | document.add_heading(heading, 0)
101 | document.add_paragraph(f"Document generated on: {date_val}")
102 |
103 | sevs = ", ".join(severities)
104 | document.add_paragraph(
105 | f"Total number of various severities ({sevs}): {sarif_data.get_result_count()}"
106 | )
107 | filter_stats = sarif_data.get_filter_stats()
108 | if filter_stats:
109 | document.add_paragraph(f"Results were filtered by {filter_stats}.")
110 |
111 | pie_chart_image_file_path = output_file.replace(".docx", "_severity_pie_chart.png")
112 | if charts.generate_severity_pie_chart(report, pie_chart_image_file_path):
113 | document.add_picture(pie_chart_image_file_path)
114 | last_paragraph = document.paragraphs[-1]
115 | last_paragraph.alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER
116 |
117 | document.add_page_break()
118 |
119 |
120 | def _dump_errors_summary_by_sev(document, report, severities):
121 | """
122 | For each severity level (in priority order): create a list of the errors of
123 | that severity, print out how many there are and then do some further analysis
124 | of which error codes are present.
125 | """
126 | for severity in severities:
127 | errors_of_severity = report.get_issue_type_count_for_severity(severity)
128 | document.add_heading(f"Severity : {severity} [ {errors_of_severity} ]", level=1)
129 | sorted_dict = report.get_issue_type_histogram_for_severity(severity)
130 | if sorted_dict:
131 | for key, count in sorted_dict.items():
132 | document.add_paragraph(f"{key}: {count}", style="List Bullet")
133 | else:
134 | document.add_paragraph("None", style="List Bullet")
135 |
136 |
137 | def _dump_each_error_in_detail(document, report, severities):
138 | """
139 | Write out the errors to a table so that a human can do further analysis.
140 | """
141 | document.add_page_break()
142 |
143 | for severity in severities:
144 | errors_of_severity = report.get_issues_for_severity(severity)
145 | # Sample:
146 | # [{'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_coverage.py', 'Line': 119,
147 | # 'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'},
148 | # {'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_code_stats.py', 'Line': 61,
149 | # 'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'},
150 | # ]
151 | if errors_of_severity:
152 | document.add_heading(f"Severity : {severity}", level=2)
153 | table = document.add_table(rows=1 + len(errors_of_severity), cols=3)
154 |
155 | table.style = "Table Grid" # ColorfulGrid-Accent5'
156 | table.autofit = False
157 |
158 | table.alignment = text.WD_TAB_ALIGNMENT.CENTER
159 |
160 | # Cell widths
161 | widths = [shared.Inches(2), shared.Inches(4), shared.Inches(0.5)]
162 |
163 | # To avoid performance problems with large tables, prepare the entries first in this
164 | # list, then iterate the table cells and copy them in.
165 | # First populate the header row
166 | cells_text = ["Code", "Location", "Line"]
167 |
168 | hdr_cells = table.rows[0].cells
169 | for i in range(3):
170 | table.rows[0].cells[i]._tc.get_or_add_tcPr().append(
171 | oxml.parse_xml(
172 | r''.format(ns.nsdecls("w"))
173 | )
174 | )
175 | run = hdr_cells[i].paragraphs[0].add_run(cells_text[i])
176 | run.bold = True
177 | hdr_cells[i].paragraphs[
178 | 0
179 | ].alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER
180 | hdr_cells[i].width = widths[i]
181 |
182 | for eachrow in errors_of_severity:
183 | cells_text += [
184 | combine_record_code_and_description(eachrow),
185 | eachrow["Location"],
186 | str(eachrow["Line"]),
187 | ]
188 |
189 | # Note: using private property table._cells to avoid performance issue. See
190 | # https://stackoverflow.com/a/69105798/316578
191 | col_index = 0
192 | for cell, cell_text in zip(table._cells, cells_text):
193 | cell.text = cell_text
194 | cell.width = widths[col_index]
195 | col_index = col_index + 1 if col_index < 2 else 0
196 | else:
197 | document.add_heading(f"Severity : {severity}", level=2)
198 | document.add_paragraph("None", style="List Bullet")
199 |
--------------------------------------------------------------------------------
/tests/test_sarif_file_utils.py:
--------------------------------------------------------------------------------
1 | from sarif import sarif_file_utils
2 |
3 |
4 | def test_combine_code_and_description_short():
5 | cd = sarif_file_utils.combine_code_and_description(
6 | "ABC123", "Some short description"
7 | )
8 | assert cd == "ABC123 Some short description"
9 | assert len(cd) <= 120
10 |
11 |
12 | def test_combine_code_and_description_long_desc():
13 | cd = sarif_file_utils.combine_code_and_description(
14 | "ABC123", " ".join(f"blah{i}" for i in range(1, 30))
15 | )
16 | assert (
17 | cd
18 | == "ABC123 blah1 blah2 blah3 blah4 blah5 blah6 blah7 blah8 blah9 blah10 blah11 blah12 blah13 blah14 blah15 blah16 ..."
19 | )
20 | assert len(cd) <= 120
21 |
22 |
23 | def test_combine_code_and_description_long_code():
24 | long_code = "".join(f"A{i}" for i in range(1, 36)) + "BC"
25 | assert (
26 | len(long_code) == 98
27 | ), "98 is right length to hit 'placeholder too large for max width' without defensive code"
28 | cd = sarif_file_utils.combine_code_and_description(
29 | long_code, "wow that's a long code"
30 | )
31 | assert cd == f"{long_code} wow that's a ..."
32 | assert len(cd) <= 120
33 | long_code = "".join(f"A{i}" for i in range(1, 50))
34 | cd = sarif_file_utils.combine_code_and_description(
35 | long_code, "wow that's a long code"
36 | )
37 | assert cd == long_code
38 |
39 |
40 | def test_read_result_rule():
41 | run = {
42 | "tool": {
43 | "driver": {
44 | "rules": [
45 | {"id": "id0", "defaultConfiguration": {"level": "none"}},
46 | {"id": "id1", "defaultConfiguration": {"level": "error"}},
47 | ]
48 | }
49 | }
50 | }
51 | rule_id0 = run["tool"]["driver"]["rules"][0]
52 | rule_id1 = run["tool"]["driver"]["rules"][1]
53 |
54 | result = {}
55 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
56 | assert rule is None
57 | assert ruleIndex == -1
58 |
59 | result = {"ruleIndex": 1}
60 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
61 | assert rule == rule_id1
62 | assert ruleIndex == 1
63 |
64 | result = {"rule": {"index": 1}}
65 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
66 | assert rule == rule_id1
67 | assert ruleIndex == 1
68 |
69 | result = {"ruleId": "id1"}
70 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
71 | assert rule == rule_id1
72 | assert ruleIndex == 1
73 |
74 | result = {"rule": {"id": "id1"}}
75 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
76 | assert rule == rule_id1
77 | assert ruleIndex == 1
78 |
79 | result = {"ruleIndex": 0}
80 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
81 | assert rule == rule_id0
82 | assert ruleIndex == 0
83 |
84 | result = {"ruleIndex": 0}
85 | (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, {})
86 | assert rule is None
87 | assert ruleIndex == -1
88 |
89 |
90 | def test_read_result_invocation():
91 | run = {"invocations": [{"foo": 1}, {"bar": "baz"}]}
92 |
93 | result = {}
94 | invocation = sarif_file_utils.read_result_invocation(result, run)
95 | assert invocation is None
96 |
97 | result = {"provenance": {}}
98 | invocation = sarif_file_utils.read_result_invocation(result, run)
99 | assert invocation is None
100 |
101 | result = {"provenance": {"invocationIndex": 0}}
102 | invocation = sarif_file_utils.read_result_invocation(result, {})
103 | assert invocation is None
104 |
105 | result = {"provenance": {"invocationIndex": -1}}
106 | invocation = sarif_file_utils.read_result_invocation(result, run)
107 | assert invocation is None
108 |
109 | result = {"provenance": {"invocationIndex": 2}}
110 | invocation = sarif_file_utils.read_result_invocation(result, run)
111 | assert invocation is None
112 |
113 | result = {"provenance": {"invocationIndex": 1}}
114 | invocation = sarif_file_utils.read_result_invocation(result, run)
115 | assert invocation == run["invocations"][1]
116 |
117 |
118 | def test_read_result_severity():
119 | result = {"level": "error"}
120 | severity = sarif_file_utils.read_result_severity(result, {})
121 | assert severity == "error"
122 |
123 | # If kind has any value other than "fail", then if level is absent, it SHALL default to "none"...
124 | result = {"kind": "other"}
125 | severity = sarif_file_utils.read_result_severity(result, {})
126 | assert severity == "none"
127 |
128 | run = {
129 | "invocations": [
130 | {
131 | "ruleConfigurationOverrides": [
132 | {"descriptor": {"id": "id1"}, "configuration": {"level": "note"}}
133 | ]
134 | },
135 | {
136 | "ruleConfigurationOverrides": [
137 | {"descriptor": {"index": 1}, "configuration": {"level": "note"}}
138 | ]
139 | },
140 | {},
141 | ],
142 | "tool": {
143 | "driver": {
144 | "rules": [
145 | {"id": "id0", "defaultConfiguration": {"level": "none"}},
146 | {"id": "id1", "defaultConfiguration": {"level": "error"}},
147 | ]
148 | }
149 | },
150 | }
151 |
152 | # If kind has the value "fail" and level is absent, then level SHALL be determined by the following procedure:
153 | # IF rule is present THEN
154 | # LET theDescriptor be the reportingDescriptor object that it specifies.
155 | # # Is there a configuration override for the level property?
156 | # IF result.provenance.invocationIndex is >= 0 THEN
157 | # LET theInvocation be the invocation object that it specifies.
158 | # IF theInvocation.ruleConfigurationOverrides is present
159 | # AND it contains a configurationOverride object whose
160 | # descriptor property specifies theDescriptor THEN
161 | # LET theOverride be that configurationOverride object.
162 | # IF theOverride.configuration.level is present THEN
163 | # Set level to theConfiguration.level.
164 | result = {"ruleIndex": 1, "provenance": {"invocationIndex": 0}}
165 | severity = sarif_file_utils.read_result_severity(result, run)
166 | assert severity == "note"
167 |
168 | result = {"ruleIndex": 1, "provenance": {"invocationIndex": 1}}
169 | severity = sarif_file_utils.read_result_severity(result, run)
170 | assert severity == "note"
171 |
172 | # ELSE
173 | # # There is no configuration override for level. Is there a default configuration for it?
174 | # IF theDescriptor.defaultConfiguration.level is present THEN
175 | # SET level to theDescriptor.defaultConfiguration.level.
176 |
177 | result = {"ruleIndex": 1}
178 | severity = sarif_file_utils.read_result_severity(result, run)
179 | assert severity == "error"
180 |
181 | result = {"rule": {"index": 1}}
182 | severity = sarif_file_utils.read_result_severity(result, run)
183 | assert severity == "error"
184 |
185 | result = {"ruleId": "id1"}
186 | severity = sarif_file_utils.read_result_severity(result, run)
187 | assert severity == "error"
188 |
189 | result = {"rule": {"id": "id1"}}
190 | severity = sarif_file_utils.read_result_severity(result, run)
191 | assert severity == "error"
192 |
193 | result = {"ruleIndex": 1, "provenance": {"invocationIndex": 2}}
194 | severity = sarif_file_utils.read_result_severity(result, run)
195 | assert severity == "error"
196 |
197 | # IF level has not yet been set THEN
198 | # SET level to "warning".
199 | result = {}
200 | severity = sarif_file_utils.read_result_severity(result, {})
201 | assert severity == "warning"
202 |
203 | result = {"ruleIndex": -1}
204 | severity = sarif_file_utils.read_result_severity(result, {})
205 | assert severity == "warning"
206 |
--------------------------------------------------------------------------------
/tests/ops/blame/test_blame.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | import datetime
3 | import json
4 | import jsonschema
5 | import os
6 | import tempfile
7 | from typing import Callable, Dict, List
8 |
9 | from sarif.operations import blame_op
10 | from sarif import sarif_file
11 | from tests.utils import get_sarif_schema
12 |
13 | ERROR_FILE_RELATIVE_PATH = "subdir/file.py"
14 | ERROR_FILE_ABSOLUTE_PATH = "file:///C:/repo/subdir/file.py"
15 |
16 | SARIF_FILE = {
17 | "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
18 | "version": "2.1.0",
19 | "runs": [
20 | {
21 | "tool": {"driver": {"name": "unit test"}},
22 | "results": [
23 | {
24 | "ruleId": "CA2101",
25 | "message": {"text": "just testing"},
26 | "level": "error",
27 | "locations": [
28 | {
29 | "physicalLocation": {
30 | "artifactLocation": {
31 | "uri": ERROR_FILE_ABSOLUTE_PATH,
32 | "index": 0,
33 | },
34 | "region": {"startLine": 3, "startColumn": 9},
35 | }
36 | }
37 | ],
38 | }
39 | ],
40 | }
41 | ],
42 | }
43 |
44 | GIT_BLAME_OUTPUT = [
45 | "f9db03438aba52affc5c3fcdb619afa620ad603a 1 1 7\n",
46 | "author Taylor Developer\n",
47 | "author-mail \n",
48 | "author-time 1699272533\n",
49 | "author-tz +0000\n",
50 | "committer GitHub\n",
51 | "committer-mail \n",
52 | "committer-time 1699272533\n",
53 | "committer-tz +0000\n",
54 | "summary Commit message 1\n",
55 | "filename " + ERROR_FILE_RELATIVE_PATH + "\n",
56 | "\tFile text line 1\n",
57 | "f9db03438aba52affc5c3fcdb619afa620ad603a 2 2\n",
58 | "\tFile text line 2\n",
59 | "f9db03438aba52affc5c3fcdb619afa620ad603a 3 3\n",
60 | "\tFile text line 3\n",
61 | "eec0471db074a037d820abdda1f210f8a8c987ca 4 4 1\n",
62 | "author Other Developer\n",
63 | "author-mail \n",
64 | "author-time 1718035364\n",
65 | "author-tz +0100\n",
66 | "committer GitHub\n",
67 | "committer-mail \n",
68 | "committer-time 1718035364\n",
69 | "committer-tz +0100\n",
70 | "summary Commit message 2\n",
71 | "filename " + ERROR_FILE_RELATIVE_PATH + "\n",
72 | "\tFile text line 4\n",
73 | "6732313c320314c122bd00aa40e7c79954f21c15 5 5 1\n",
74 | "author Another Developer\n",
75 | "author-mail \n",
76 | "author-time 1727710690\n",
77 | "author-tz -0700\n",
78 | "committer GitHub\n",
79 | "committer-mail \n",
80 | "committer-time 1727710690\n",
81 | "committer-tz -0700\n",
82 | "summary Commit message 3\n",
83 | "filename " + ERROR_FILE_RELATIVE_PATH + "\n",
84 | "\tFile text line 5\n",
85 | "6732313c320314c122bd00aa40e7c79954f21c15 6 6\n",
86 | "\tFile text line 6\n",
87 | ]
88 |
89 |
90 | def test_blame_no_blame_info():
91 | input_sarif_file = sarif_file.SarifFile(
92 | "SARIF_FILE", SARIF_FILE, mtime=datetime.datetime.now()
93 | )
94 | input_sarif_file_set = sarif_file.SarifFileSet()
95 | input_sarif_file_set.files.append(input_sarif_file)
96 |
97 | with tempfile.TemporaryDirectory() as tmp:
98 | repo_path = os.path.join(tmp, "repo")
99 | os.makedirs(repo_path)
100 | output_file_path = os.path.join(tmp, "blamed.json")
101 |
102 | blame_op.enhance_with_blame(
103 | input_sarif_file_set,
104 | repo_path,
105 | output_file_path,
106 | output_multiple_files=False,
107 | run_git_blame=lambda repo_path, file_path: [],
108 | )
109 |
110 | assert not os.path.isfile(output_file_path)
111 |
112 |
113 | def blame_test(
114 | run_git_blame: Callable[[str, str], List[bytes]],
115 | expected_blame_properties: Dict[str, Dict[str, str]],
116 | ):
117 | input_sarif_file = sarif_file.SarifFile(
118 | "SARIF_FILE", SARIF_FILE, mtime=datetime.datetime.now()
119 | )
120 | input_sarif_file_set = sarif_file.SarifFileSet()
121 | input_sarif_file_set.files.append(input_sarif_file)
122 |
123 | with tempfile.TemporaryDirectory() as tmp:
124 | repo_path = os.path.join(tmp, "repo")
125 | os.makedirs(repo_path)
126 | output_file_path = os.path.join(tmp, "blamed.json")
127 |
128 | def run_git_blame_wrapper(
129 | blame_repo_path: str, blame_file_path: str
130 | ) -> List[bytes]:
131 | assert blame_repo_path == repo_path
132 | assert blame_file_path == ERROR_FILE_ABSOLUTE_PATH
133 | return run_git_blame(blame_repo_path, blame_file_path)
134 |
135 | blame_op.enhance_with_blame(
136 | input_sarif_file_set,
137 | repo_path,
138 | output_file_path,
139 | output_multiple_files=False,
140 | run_git_blame=run_git_blame_wrapper,
141 | )
142 |
143 | with open(output_file_path, "rb") as f_out:
144 | output_sarif = json.load(f_out)
145 | jsonschema.validate(output_sarif, schema=get_sarif_schema())
146 |
147 | expected_sarif = deepcopy(input_sarif_file.data)
148 | expected_sarif["runs"][0]["results"][0]["properties"] = (
149 | expected_blame_properties
150 | )
151 | assert output_sarif == expected_sarif
152 |
153 |
154 | def test_blame_success():
155 | def run_git_blame(blame_repo_path: str, blame_file_path: str) -> List[bytes]:
156 | return [x.encode() for x in GIT_BLAME_OUTPUT]
157 |
158 | expected_blame_properties = {
159 | "blame": {
160 | "commit": "f9db03438aba52affc5c3fcdb619afa620ad603a",
161 | "author": "Taylor Developer",
162 | "author-mail": "",
163 | "author-time": "1699272533",
164 | "author-tz": "+0000",
165 | "committer": "GitHub",
166 | "committer-mail": "",
167 | "committer-time": "1699272533",
168 | "committer-tz": "+0000",
169 | "summary": "Commit message 1",
170 | "filename": ERROR_FILE_RELATIVE_PATH,
171 | }
172 | }
173 |
174 | blame_test(run_git_blame, expected_blame_properties)
175 |
176 |
177 | GIT_BLAME_OUTPUT_WITH_INVALID_UTF8 = [
178 | b"f9db03438aba52affc5c3fcdb619afa620ad603a 1 1 7\n",
179 | b"author Taylor Developer\n",
180 | b"author-mail \n",
181 | b"author-time 1699272533\n",
182 | b"author-tz +0000\n",
183 | b"committer GitHub\n",
184 | b"committer-mail \n",
185 | b"committer-time 1699272533\n",
186 | b"committer-tz +0000\n",
187 | b"summary Commit message \x80\n",
188 | b"filename " + ERROR_FILE_RELATIVE_PATH.encode() + b"\n",
189 | b"\tFile text line 1\n",
190 | b"f9db03438aba52affc5c3fcdb619afa620ad603a 2 2\n",
191 | b"\tFile text line 2\n",
192 | b"f9db03438aba52affc5c3fcdb619afa620ad603a 3 3\n",
193 | b"\tFile text line 3\n",
194 | b"eec0471db074a037d820abdda1f210f8a8c987ca 4 4 1\n",
195 | ]
196 |
197 |
198 | def test_blame_invalid_utf8():
199 | def run_git_blame(blame_repo_path: str, blame_file_path: str) -> List[bytes]:
200 | return GIT_BLAME_OUTPUT_WITH_INVALID_UTF8
201 |
202 | expected_blame_properties = {
203 | "blame": {
204 | "commit": "f9db03438aba52affc5c3fcdb619afa620ad603a",
205 | "author": "Taylor Developer",
206 | "author-mail": "",
207 | "author-time": "1699272533",
208 | "author-tz": "+0000",
209 | "committer": "GitHub",
210 | "committer-mail": "",
211 | "committer-time": "1699272533",
212 | "committer-tz": "+0000",
213 | "summary": "Commit message �",
214 | "filename": ERROR_FILE_RELATIVE_PATH,
215 | }
216 | }
217 |
218 | blame_test(run_git_blame, expected_blame_properties)
219 |
--------------------------------------------------------------------------------
/sarif/sarif_file_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Reusable utility functions for handling the SARIF format.
3 |
4 | Primarily interrogating the `result` JSON defined at
5 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012594
6 | """
7 |
8 | import textwrap
9 | from typing import Literal, Tuple, Union
10 |
11 | # SARIF severity levels as per
12 | # https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790898
13 | SARIF_SEVERITIES_WITHOUT_NONE = ["error", "warning", "note"]
14 | SARIF_SEVERITIES_WITH_NONE = SARIF_SEVERITIES_WITHOUT_NONE + ["none"]
15 |
16 |
17 | def combine_code_and_description(code: str, description: str) -> str:
18 | """
19 | Combine code and description into one string, keeping total length under 120 characters.
20 | """
21 | length_budget = 120
22 | if code:
23 | code = code.strip()
24 | length_budget -= len(code) + 1 # Allow issue code and space character
25 | continuation_placeholder = " ..."
26 | # Allow extra space when truncating for continuation characters
27 | length_budget_pre_continuation = length_budget - len(continuation_placeholder)
28 | if length_budget_pre_continuation < 10:
29 | # Don't include description if it would be very short due to long code
30 | return code
31 | if description:
32 | if "\n" in description:
33 | description = description[: description.index("\n")]
34 | if description.startswith(code):
35 | # Don't duplicate the code
36 | description = description[len(code) :]
37 | description = description.strip()
38 | if description:
39 | if len(description) > length_budget:
40 | shorter_description = textwrap.shorten(
41 | description,
42 | width=length_budget_pre_continuation,
43 | placeholder=continuation_placeholder,
44 | )
45 | if len(shorter_description) < length_budget_pre_continuation - 40:
46 | # Word wrap shortens the description significantly, so truncate mid-word instead
47 | description = (
48 | description[:length_budget_pre_continuation]
49 | + continuation_placeholder
50 | )
51 | else:
52 | description = shorter_description
53 | if code:
54 | return f"{code.strip()} {description}"
55 | return description
56 | if code:
57 | return code
58 | return ""
59 |
60 |
61 | def combine_record_code_and_description(record: dict) -> str:
62 | """
63 | Combine code and description fields into one string.
64 | """
65 | return combine_code_and_description(record["Code"], record["Description"])
66 |
67 |
68 | def read_result_location(result) -> Tuple[str, str]:
69 | """
70 | Extract the file path and line number strings from the Result.
71 |
72 | Tools store this in different ways, so this function tries a few different JSON locations.
73 | """
74 | file_path = None
75 | line_number = None
76 | locations = result.get("locations", [])
77 | if locations and isinstance(locations, list):
78 | location = locations[0]
79 | physical_location = location.get("physicalLocation", {})
80 | # SpotBugs has some errors with no line number so deal with them by just leaving it at 1
81 | line_number = physical_location.get("region", {}).get("startLine", None)
82 | # For file name, first try the location written by DevSkim
83 | file_path = (
84 | location.get("physicalLocation", {})
85 | .get("address", {})
86 | .get("fullyQualifiedName", None)
87 | )
88 | if not file_path:
89 | # Next try the physical location written by MobSF and by SpotBugs (for some errors)
90 | file_path = (
91 | location.get("physicalLocation", {})
92 | .get("artifactLocation", {})
93 | .get("uri", None)
94 | )
95 | if not file_path:
96 | logical_locations = location.get("logicalLocations", None)
97 | if logical_locations:
98 | # Finally, try the logical location written by SpotBugs for some errors
99 | file_path = logical_locations[0].get("fullyQualifiedName", None)
100 | return (file_path, line_number)
101 |
102 |
103 | def read_result_rule(result, run) -> Tuple[Union[dict, None], int]:
104 | """
105 | Returns the corresponding rule object for the specified result, plus its index
106 | in the rules array. Follows the rules at
107 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790895
108 | """
109 | ruleIndex = result.get("ruleIndex")
110 | ruleId = result.get("ruleId")
111 | rule = result.get("rule")
112 |
113 | if rule:
114 | if ruleIndex is None:
115 | ruleIndex = rule.get("index")
116 |
117 | if ruleId is None:
118 | ruleId = rule.get("id")
119 |
120 | rules = run.get("tool", {}).get("driver", {}).get("rules", [])
121 |
122 | if ruleIndex is not None and ruleIndex >= 0 and ruleIndex < len(rules):
123 | return (rules[ruleIndex], ruleIndex)
124 |
125 | if ruleId:
126 | for i, rule in enumerate(rules):
127 | if rule.get("id") == ruleId:
128 | return (rule, i)
129 |
130 | return (None, -1)
131 |
132 |
133 | def read_result_invocation(result, run):
134 | """
135 | Extract the invocation metadata for the result, following the rules at
136 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790917
137 | """
138 | invocationIndex = result.get("provenance", {}).get("invocationIndex")
139 | if invocationIndex is None:
140 | return None
141 |
142 | invocations = run.get("invocations")
143 |
144 | if invocations and invocationIndex >= 0 and invocationIndex < len(invocations):
145 | return invocations[invocationIndex]
146 |
147 | return None
148 |
149 |
150 | def read_result_severity(result, run) -> Literal["none", "note", "warning", "error"]:
151 | """
152 | Extract the severity level from the result following the rules at
153 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790898
154 | """
155 | severity = result.get("level")
156 | if severity:
157 | return severity
158 |
159 | # If kind has any value other than "fail", then if level is absent,
160 | # it SHALL default to "none"
161 | kind = result.get("kind", "fail")
162 | if kind and kind != "fail":
163 | return "none"
164 |
165 | # If kind has the value "fail" and level is absent, then...
166 | rule, ruleIndex = read_result_rule(result, run)
167 | if rule:
168 | # Honor the invocation's configuration override if present...
169 | invocation = read_result_invocation(result, run)
170 | if invocation:
171 | ruleConfigurationOverrides = invocation.get(
172 | "ruleConfigurationOverrides", []
173 | )
174 | override = next(
175 | (
176 | override
177 | for override in ruleConfigurationOverrides
178 | if override.get("descriptor", {}).get("id") == rule.get("id")
179 | or override.get("descriptor", {}).get("index") == ruleIndex
180 | ),
181 | None,
182 | )
183 |
184 | if override:
185 | overrideLevel = override.get("configuration", {}).get("level")
186 | if overrideLevel:
187 | return overrideLevel
188 |
189 | # Otherwise, use the rule's default configuraiton if present...
190 | defaultConfiguration = rule.get("defaultConfiguration")
191 | if defaultConfiguration:
192 | severity = defaultConfiguration.get("level")
193 | if severity:
194 | return severity
195 |
196 | # Otherwise, fall back to warning
197 | return "warning"
198 |
199 |
200 | def record_sort_key(record: dict) -> str:
201 | """Get a sort key for the record."""
202 | return (
203 | combine_record_code_and_description(record)
204 | + record["Location"]
205 | + str(record["Line"]).zfill(6)
206 | )
207 |
--------------------------------------------------------------------------------
/sarif/filter/general_filter.py:
--------------------------------------------------------------------------------
1 | """
2 | SARIF file filtering functionality.
3 | """
4 |
5 | import os
6 | import re
7 | from typing import Optional, List
8 |
9 | import copy
10 | import jsonpath_ng.ext
11 | import yaml
12 |
13 | from sarif import sarif_file_utils
14 | from sarif.filter.filter_stats import FilterStats, load_filter_stats_from_json
15 |
16 | # Commonly used properties can be specified using shortcuts
17 | # instead of full JSON path
18 | FILTER_SHORTCUTS = {
19 | "author": "properties.blame.author",
20 | "author-mail": "properties.blame.author-mail",
21 | "committer": "properties.blame.committer",
22 | "committer-mail": "properties.blame.committer-mail",
23 | "location": "locations[*].physicalLocation.artifactLocation.uri",
24 | "rule": "ruleId",
25 | "suppression": "suppressions[*].kind",
26 | }
27 |
28 | # Some properties can have specific shortcuts to make it easier to write filters
29 | # For example a file location can be specified using wildcards
30 | FIELDS_REGEX_SHORTCUTS = {"uri": {"**": ".*", "*": "[^/]*", "?": "."}}
31 |
32 | # Default configuration for all filters
33 | DEFAULT_CONFIGURATION = {
34 | "default-include": True,
35 | "check-line-number": True,
36 | }
37 |
38 |
39 | def _get_filter_function(filter_spec):
40 | """Return a filter function for the given specification."""
41 | if filter_spec:
42 | filter_len = len(filter_spec)
43 | if filter_len > 2 and filter_spec.startswith("/") and filter_spec.endswith("/"):
44 | regex = filter_spec[1:-1]
45 | return lambda value: re.search(regex, value, re.IGNORECASE)
46 | substring = filter_spec
47 | # substring can be empty, in this case "in" returns true
48 | # and only existence of the property checked.
49 | return lambda value: substring in value
50 | return lambda value: True
51 |
52 |
53 | def _convert_glob_to_regex(property_name, property_value_spec):
54 | # skip if property_value_spec is a regex
55 | if property_value_spec and not (
56 | property_value_spec.startswith("/") and property_value_spec.endswith("/")
57 | ):
58 | # get last component of property name
59 | last_component = property_name.split(".")[-1]
60 | if last_component in FIELDS_REGEX_SHORTCUTS:
61 | shortcuts = FIELDS_REGEX_SHORTCUTS[last_component]
62 | regex = re.compile("|".join(map(re.escape, shortcuts.keys())))
63 | property_value_spec = regex.sub(
64 | lambda match: shortcuts[match.group(0)], property_value_spec
65 | )
66 |
67 | return f"/{property_value_spec}/"
68 | return property_value_spec
69 |
70 |
71 | class PropertyFilter:
72 | """
73 | Class that represents a filter term ready for efficient use.
74 | """
75 |
76 | def __init__(self, prop_path, prop_value_spec, global_configuration):
77 | """
78 | Compile a filter property. See README for the filter spec format.
79 |
80 | :param prop_path: JsonPath or preset.
81 | :param prop_value_spec: Value spec.
82 | :param global_configuration: Global configuration of the filter.
83 | """
84 | self.prop_path = prop_path
85 | resolved_prop_path = FILTER_SHORTCUTS.get(prop_path, prop_path)
86 | self.jsonpath_expr = jsonpath_ng.ext.parse(resolved_prop_path)
87 |
88 | # if prop_value_spec is a dict, update filter configuration from it
89 | if isinstance(prop_value_spec, dict):
90 | self.filter_configuration = copy.deepcopy(global_configuration)
91 | for config_key, config_value in prop_value_spec.items():
92 | if config_key != "value":
93 | self.filter_configuration[config_key] = config_value
94 | # actual value for the filter is in "value" key
95 | prop_value_spec = prop_value_spec.get("value", "")
96 | else:
97 | self.filter_configuration = global_configuration
98 | value_spec = _convert_glob_to_regex(resolved_prop_path, prop_value_spec)
99 | self.filter_function = _get_filter_function(value_spec)
100 |
101 |
102 | class MultiPropertyFilter:
103 | """
104 | Class representing a list of PropertyFilter objects.
105 |
106 | These are combined using AND to filter results.
107 | """
108 |
109 | def __init__(self, filter_spec: List[dict], global_filter_configuration: dict):
110 | """
111 | Initialise from a filter spec.
112 |
113 | See README for filter spec format. It's a list of property paths and values to be
114 | combined with AND to form a filter.
115 | """
116 | self.filter_spec = filter_spec
117 | self.and_terms = [
118 | PropertyFilter(prop_path, prop_value_spec, global_filter_configuration)
119 | for prop_path, prop_value_spec in filter_spec.items()
120 | ]
121 |
122 |
123 | def _compile_filters(
124 | filters: List[dict], global_filter_configuration: dict
125 | ) -> List[MultiPropertyFilter]:
126 | return [
127 | MultiPropertyFilter(filter_spec, global_filter_configuration)
128 | for filter_spec in filters
129 | if filter_spec
130 | ]
131 |
132 |
133 | class GeneralFilter:
134 | """
135 | Class that implements filtering.
136 | """
137 |
138 | def __init__(self):
139 | self.filter_stats = None
140 | self.include_filters = {}
141 | self.apply_inclusion_filter = False
142 | self.exclude_filters = {}
143 | self.apply_exclusion_filter = False
144 | self.configuration = copy.deepcopy(DEFAULT_CONFIGURATION)
145 |
146 | def init_filter(
147 | self, filter_description, configuration, include_filters, exclude_filters
148 | ):
149 | """
150 | Initialise the filter with the given filter patterns.
151 | """
152 | self.filter_stats = FilterStats(filter_description)
153 | self.configuration.update(configuration)
154 | self.include_filters = _compile_filters(include_filters, self.configuration)
155 | self.apply_inclusion_filter = len(include_filters) > 0
156 | self.exclude_filters = _compile_filters(exclude_filters, self.configuration)
157 | self.apply_exclusion_filter = len(exclude_filters) > 0
158 |
159 | def rehydrate_filter_stats(self, dehydrated_filter_stats, filter_datetime):
160 | """
161 | Restore filter stats from the SARIF file directly,
162 | where they were recorded when the filter was previously run.
163 |
164 | Note that if init_filter is called,
165 | these rehydrated stats are discarded.
166 | """
167 | self.filter_stats = load_filter_stats_from_json(dehydrated_filter_stats)
168 | self.filter_stats.filter_datetime = filter_datetime
169 |
170 | def _zero_counts(self):
171 | if self.filter_stats:
172 | self.filter_stats.reset_counters()
173 |
174 | def _filter_append(self, filtered_results: List[dict], result: dict):
175 | # Remove any existing filter log on the result
176 | result.setdefault("properties", {}).pop("filtered", None)
177 |
178 | if self.apply_inclusion_filter:
179 | included_stats = self._filter_result(result, self.include_filters)
180 | if not included_stats["matchedFilter"]:
181 | # Result is excluded by dint of not being included
182 | self.filter_stats.filtered_out_result_count += 1
183 | return
184 | else:
185 | # no inclusion filters, mark the result as included so far
186 | included_stats = {"state": "included", "matchedFilter": []}
187 |
188 | if self.apply_exclusion_filter:
189 | excluded_stats = self._filter_result(result, self.exclude_filters)
190 | if excluded_stats["matchedFilter"]:
191 | self.filter_stats.filtered_out_result_count += 1
192 | return
193 |
194 | included_state = included_stats["state"]
195 | if included_state == "included":
196 | self.filter_stats.filtered_in_result_count += 1
197 | elif included_state == "noLineNumber":
198 | self.filter_stats.unconvincing_line_number_count += 1
199 | else:
200 | self.filter_stats.missing_property_count += 1
201 | included_stats["filter"] = self.filter_stats.filter_description
202 | result["properties"]["filtered"] = included_stats
203 |
204 | filtered_results.append(result)
205 |
206 | def _filter_result(self, result: dict, filters: List[MultiPropertyFilter]) -> dict:
207 | matched_filters = []
208 | warnings = []
209 | (_file_path, line_number) = sarif_file_utils.read_result_location(result)
210 | unconvincing_line_number = line_number == "1" or not line_number
211 | default_include_noprop = False
212 |
213 | if filters:
214 | # filters contain rules which treated as OR.
215 | # if any rule matches, the record is selected.
216 | for mpf in filters:
217 | # filter_spec contains rules which treated as AND.
218 | # all rules must match to select the record.
219 | matched = True
220 | for property_filter in mpf.and_terms:
221 | if (
222 | property_filter.filter_configuration.get(
223 | "check-line-number", True
224 | )
225 | and unconvincing_line_number
226 | ):
227 | warnings.append(
228 | f"Field '{property_filter.prop_path}' not checked due to "
229 | "missing line number information"
230 | )
231 | continue
232 | found_results = property_filter.jsonpath_expr.find(result)
233 | if found_results:
234 | value = found_results[0].value
235 | if property_filter.filter_function(value):
236 | continue
237 | else:
238 | # property to filter on is not found, or skipped due to invalid line number.
239 | # if "default-include" is true, include the "result" with a warning.
240 | if property_filter.filter_configuration.get(
241 | "default-include", True
242 | ):
243 | warnings.append(
244 | f"Field '{property_filter.prop_path}' is missing but "
245 | "the result included as default-include is true"
246 | )
247 | default_include_noprop = True
248 | continue
249 | matched = False
250 | break
251 | if matched:
252 | matched_filters.append(mpf.filter_spec)
253 | break
254 |
255 | stats = {
256 | "state": "included",
257 | "matchedFilter": matched_filters,
258 | }
259 |
260 | if warnings:
261 | stats.update(
262 | {
263 | "state": "noProperty" if default_include_noprop else "noLineNumber",
264 | "warnings": warnings,
265 | }
266 | )
267 |
268 | return stats
269 |
270 | def filter_results(self, results: List[dict]) -> List[dict]:
271 | """
272 | Apply this filter to a list of results,
273 | return the results that pass the filter
274 | and as a side-effect, update the filter stats.
275 | """
276 | if self.apply_inclusion_filter or self.apply_exclusion_filter:
277 | self._zero_counts()
278 | ret = []
279 | for result in results:
280 | self._filter_append(ret, result)
281 | return ret
282 | # No inclusion or exclusion patterns
283 | return results
284 |
285 | def get_filter_stats(self) -> Optional[FilterStats]:
286 | """
287 | Get the statistics from running this filter.
288 | """
289 | return self.filter_stats
290 |
291 |
292 | def load_filter_file(file_path):
293 | """
294 | Load a YAML filter file, return the filter description and the filters.
295 | """
296 | try:
297 | file_name = os.path.basename(file_path)
298 | with open(file_path, encoding="utf-8") as file_in:
299 | yaml_content = yaml.safe_load(file_in)
300 | filter_description = yaml_content.get("description", file_name)
301 | configuration = yaml_content.get("configuration", {})
302 | include_filters = yaml_content.get("include", {})
303 | exclude_filters = yaml_content.get("exclude", {})
304 | except yaml.YAMLError as error:
305 | raise IOError(f"Cannot read filter file {file_path}") from error
306 | return filter_description, configuration, include_filters, exclude_filters
307 |
--------------------------------------------------------------------------------
/tests/test_general_filter.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from sarif.filter.general_filter import GeneralFilter, load_filter_file
3 | from sarif.filter.filter_stats import load_filter_stats_from_json
4 |
5 |
6 | class TestGeneralFilter:
7 | def test_init_filter(self):
8 | gf = GeneralFilter()
9 |
10 | gf.init_filter(
11 | "test filter",
12 | {},
13 | [{"author": "John Doe"}],
14 | [{"suppression": "not a suppression"}],
15 | )
16 | assert gf.filter_stats.filter_description == "test filter"
17 | assert len(gf.include_filters[0].and_terms) == 1
18 | assert gf.include_filters[0].and_terms[0].prop_path == "author"
19 | assert gf.apply_inclusion_filter is True
20 | assert len(gf.exclude_filters[0].and_terms) == 1
21 | assert gf.exclude_filters[0].and_terms[0].prop_path == "suppression"
22 | assert gf.apply_exclusion_filter is True
23 |
24 | def test_init_filter_no_value(self):
25 | gf = GeneralFilter()
26 |
27 | gf.init_filter(
28 | "test filter",
29 | {},
30 | [{"author": {"default-include": False}}], # forgot "value"
31 | [],
32 | )
33 | assert gf.filter_stats.filter_description == "test filter"
34 | assert len(gf.include_filters[0].and_terms) == 1
35 | assert gf.include_filters[0].and_terms[0].prop_path == "author"
36 | assert gf.apply_inclusion_filter is True
37 | assert not gf.exclude_filters
38 |
39 | def test_rehydrate_filter_stats(self):
40 | gf = GeneralFilter()
41 | dehydrated_filter_stats = {
42 | "filter": "test filter",
43 | "in": 10,
44 | "out": 5,
45 | "default": {"noProperty": 3},
46 | }
47 | gf.rehydrate_filter_stats(dehydrated_filter_stats, "2022-01-01T00:00:00Z")
48 | assert gf.filter_stats.filtered_in_result_count == 10
49 | assert gf.filter_stats.filtered_out_result_count == 5
50 | assert gf.filter_stats.missing_property_count == 3
51 | assert gf.filter_stats.filter_datetime == "2022-01-01T00:00:00Z"
52 |
53 | def test_zero_counts(self):
54 | gf = GeneralFilter()
55 | gf.filter_stats = load_filter_stats_from_json(
56 | {"filter": "test filter", "in": 10, "out": 5, "default": {"noProperty": 3}}
57 | )
58 |
59 | gf._zero_counts()
60 | assert gf.filter_stats.filtered_in_result_count == 0
61 | assert gf.filter_stats.filtered_out_result_count == 0
62 | assert gf.filter_stats.missing_property_count == 0
63 |
64 | def test_filter_append_include(self):
65 | general_filter = GeneralFilter()
66 | general_filter.init_filter(
67 | "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], []
68 | )
69 | result = {"ruleId": "test-rule"}
70 |
71 | filtered_results = general_filter.filter_results([result])
72 | assert len(filtered_results) == 1
73 | assert filtered_results[0] == result
74 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
75 | assert general_filter.filter_stats.filtered_in_result_count == 1
76 | assert general_filter.filter_stats.filtered_out_result_count == 0
77 | assert general_filter.filter_stats.missing_property_count == 0
78 |
79 | def test_filter_append_exclude(self):
80 | general_filter = GeneralFilter()
81 | general_filter.init_filter("test filter", {}, [], [{"level": "error"}])
82 | result = {"level": "error"}
83 |
84 | filtered_results = general_filter.filter_results([result])
85 | assert len(filtered_results) == 0
86 | assert "filtered" not in result
87 | assert general_filter.filter_stats.filtered_in_result_count == 0
88 | assert general_filter.filter_stats.filtered_out_result_count == 1
89 | assert general_filter.filter_stats.missing_property_count == 0
90 |
91 | def test_filter_append_no_filters(self):
92 | general_filter = GeneralFilter()
93 | general_filter.init_filter("test filter", {"check-line-number": False}, [], [])
94 | result = {"ruleId": "test-rule"}
95 |
96 | filtered_results = general_filter.filter_results([result])
97 | assert len(filtered_results) == 1
98 | assert filtered_results[0] == result
99 | assert "filtered" not in result
100 |
101 | def test_filter_results_match(self):
102 | general_filter = GeneralFilter()
103 | general_filter.init_filter(
104 | "test filter",
105 | {"check-line-number": False},
106 | [{"ruleId": "test-rule"}, {"level": "error"}],
107 | [],
108 | )
109 | result = {"ruleId": "test-rule", "level": "error"}
110 |
111 | filtered_results = general_filter.filter_results([result])
112 | assert len(filtered_results) == 1
113 | assert filtered_results[0] == result
114 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
115 | assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [
116 | {"ruleId": "test-rule"}
117 | ]
118 | assert "warnings" not in filtered_results[0]["properties"]["filtered"]
119 | assert general_filter.filter_stats.filtered_in_result_count == 1
120 | assert general_filter.filter_stats.filtered_out_result_count == 0
121 | assert general_filter.filter_stats.missing_property_count == 0
122 |
123 | def test_filter_results_no_match(self):
124 | general_filter = GeneralFilter()
125 | general_filter.init_filter(
126 | "test filter",
127 | {"check-line-number": False},
128 | [{"ruleId": "other-rule"}, {"level": "warning"}],
129 | [],
130 | )
131 | result = {"ruleId": "test-rule", "level": "error"}
132 |
133 | filtered_results = general_filter.filter_results([result])
134 | assert len(filtered_results) == 0
135 |
136 | def test_filter_results_regex(self):
137 | general_filter = GeneralFilter()
138 | rule = {"properties.blame.author-mail": "/myname\\..*\\.com/"}
139 | general_filter.init_filter(
140 | "test filter",
141 | {"check-line-number": True},
142 | [rule],
143 | [],
144 | )
145 | result = {
146 | "ruleId": "test-rule",
147 | "properties": {"blame": {"author-mail": "user@myname.example.com"}},
148 | "locations": [{"physicalLocation": {"region": {"startLine": "123"}}}],
149 | }
150 |
151 | filtered_results = general_filter.filter_results([result])
152 | assert len(filtered_results) == 1
153 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
154 | assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [rule]
155 | assert "warnings" not in filtered_results[0]["properties"]["filtered"]
156 |
157 | def test_filter_results_regex_guid(self):
158 | general_filter = GeneralFilter()
159 | guid_rule = {
160 | "properties.blame.author-mail": "/[0-9A-F]{8}[-][0-9A-F]{4}[-][0-9A-F]{4}"
161 | + "[-][0-9A-F]{4}[-][0-9A-F]{12}/"
162 | }
163 | general_filter.init_filter(
164 | "test filter",
165 | {"check-line-number": False},
166 | [guid_rule],
167 | [],
168 | )
169 | result = {
170 | "ruleId": "test-rule",
171 | "properties": {
172 | "blame": {"author-mail": "AAAAA1234ABCD-FEDC-BA09-8765-4321ABCDEF90"}
173 | },
174 | }
175 |
176 | filtered_results = general_filter.filter_results([result])
177 | assert len(filtered_results) == 1
178 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
179 | assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [
180 | guid_rule
181 | ]
182 | assert "warnings" not in filtered_results[0]["properties"]["filtered"]
183 |
184 | def test_filter_results_existence_only(self):
185 | general_filter = GeneralFilter()
186 | general_filter.init_filter(
187 | "test filter", {"check-line-number": False}, [], [{"suppression": {}}]
188 | )
189 | result = {"ruleId": "test-rule", "suppressions": [{"kind": "inSource"}]}
190 |
191 | filtered_results = general_filter.filter_results([result])
192 | assert len(filtered_results) == 0
193 |
194 | def test_filter_results_match_default_include_default_configuration(self):
195 | general_filter = GeneralFilter()
196 | general_filter.init_filter(
197 | "test filter", {"check-line-number": False}, [{"level": "error"}], []
198 | )
199 | result = {"ruleId": "test-rule"}
200 |
201 | filtered_results = general_filter.filter_results([result])
202 | assert len(filtered_results) == 1
203 | assert filtered_results[0] == result
204 | assert filtered_results[0]["properties"]["filtered"]["state"] == "noProperty"
205 | assert filtered_results[0]["properties"]["filtered"]["warnings"] == [
206 | "Field 'level' is missing but the result included as default-include is true"
207 | ]
208 | assert general_filter.filter_stats.filtered_in_result_count == 0
209 | assert general_filter.filter_stats.filtered_out_result_count == 0
210 | assert general_filter.filter_stats.missing_property_count == 1
211 |
212 | def test_filter_results_check_line_number(self):
213 | general_filter = GeneralFilter()
214 | general_filter.init_filter("test filter", {}, [{"level": "error"}], [])
215 | result = {
216 | "ruleId": "test-rule",
217 | "locations": [{"physicalLocation": {"region": {"startLine": "1"}}}],
218 | }
219 |
220 | filtered_results = general_filter.filter_results([result])
221 | assert len(filtered_results) == 1
222 | assert filtered_results[0] == result
223 | assert filtered_results[0]["properties"]["filtered"]["state"] == "noLineNumber"
224 | assert filtered_results[0]["properties"]["filtered"]["warnings"] == [
225 | "Field 'level' not checked due to missing line number information"
226 | ]
227 | assert general_filter.filter_stats.filtered_in_result_count == 0
228 | assert general_filter.filter_stats.filtered_out_result_count == 0
229 | assert general_filter.filter_stats.missing_property_count == 0
230 | assert general_filter.filter_stats.unconvincing_line_number_count == 1
231 |
232 | def test_filter_results_match_default_include_rule_override(self):
233 | general_filter = GeneralFilter()
234 | general_filter.init_filter(
235 | "test filter",
236 | {"check-line-number": False},
237 | [{"level": {"value": "error", "default-include": False}}],
238 | [],
239 | )
240 | result = {"ruleId": "test-rule"}
241 |
242 | filtered_results = general_filter.filter_results([result])
243 | assert len(filtered_results) == 0
244 | assert general_filter.filter_stats.filtered_in_result_count == 0
245 | # Filtered out because not filtered in
246 | assert general_filter.filter_stats.filtered_out_result_count == 1
247 | assert general_filter.filter_stats.missing_property_count == 0
248 |
249 | SHORTCUTS_TEST_PARAMS = [
250 | ({"author": "John Smith"}, {"properties": {"blame": {"author": "John Smith"}}}),
251 | (
252 | {"author-mail": "john.smith@example.com"},
253 | {"properties": {"blame": {"author-mail": "john.smith@example.com"}}},
254 | ),
255 | (
256 | {"committer-mail": "john.smith@example.com"},
257 | {"properties": {"blame": {"committer-mail": "john.smith@example.com"}}},
258 | ),
259 | (
260 | {"location": "test.cpp"},
261 | {
262 | "locations": [
263 | {"physicalLocation": {"artifactLocation": {"uri": "test.cpp"}}}
264 | ]
265 | },
266 | ),
267 | ({"rule": "rule1"}, {"ruleId": "rule1"}),
268 | ({"suppression": "inSource"}, {"suppressions": [{"kind": "inSource"}]}),
269 | ]
270 |
271 | @pytest.mark.parametrize("shortcut_filter,result", SHORTCUTS_TEST_PARAMS)
272 | def test_filter_results_shortcuts(self, shortcut_filter, result):
273 | general_filter = GeneralFilter()
274 | general_filter.init_filter(
275 | "test filter", {"check-line-number": False}, [shortcut_filter], []
276 | )
277 |
278 | filtered_results = general_filter.filter_results([result])
279 | assert len(filtered_results) == 1
280 | assert filtered_results[0] == result
281 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
282 | assert "warnings" not in filtered_results[0]["properties"]["filtered"]
283 |
284 | def test_filter_results_include(self):
285 | general_filter = GeneralFilter()
286 | general_filter.init_filter(
287 | "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], []
288 | )
289 | results = [{"ruleId": "test-rule"}] * 10
290 |
291 | filtered_results = general_filter.filter_results(results)
292 | assert len(filtered_results) == 10
293 | assert all(result in filtered_results for result in results)
294 | assert general_filter.filter_stats.filtered_in_result_count == 10
295 | assert general_filter.filter_stats.filtered_out_result_count == 0
296 | assert general_filter.filter_stats.missing_property_count == 0
297 |
298 | def test_filter_results_exclude(self):
299 | general_filter = GeneralFilter()
300 | general_filter.init_filter(
301 | "test filter", {"check-line-number": False}, [], [{"level": "error"}]
302 | )
303 | results = [{"level": "error"}] * 10
304 |
305 | filtered_results = general_filter.filter_results(results)
306 | assert len(filtered_results) == 0
307 | assert general_filter.filter_stats.filtered_in_result_count == 0
308 | assert general_filter.filter_stats.filtered_out_result_count == 10
309 | assert general_filter.filter_stats.missing_property_count == 0
310 |
311 | def test_filter_results_exclude_not_all(self):
312 | general_filter = GeneralFilter()
313 | general_filter.init_filter(
314 | "test filter", {"check-line-number": False}, [], [{"level": "error"}]
315 | )
316 | results = [{"level": "error"}, {"level": "warning"}, {"level": "error"}]
317 |
318 | filtered_results = general_filter.filter_results(results)
319 | assert len(filtered_results) == 1
320 | assert general_filter.filter_stats.filtered_in_result_count == 1
321 | assert general_filter.filter_stats.filtered_out_result_count == 2
322 | assert general_filter.filter_stats.missing_property_count == 0
323 | assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
324 | assert len(filtered_results[0]["properties"]["filtered"]["matchedFilter"]) == 0
325 |
326 | def test_filter_results_no_filters(self):
327 | general_filter = GeneralFilter()
328 | general_filter.init_filter("test filter", {"check-line-number": False}, [], [])
329 | results = [{"ruleId": "test-rule"}] * 10
330 |
331 | filtered_results = general_filter.filter_results(results)
332 | assert len(filtered_results) == 10
333 | assert all(result in filtered_results for result in results)
334 | assert general_filter.filter_stats.filtered_in_result_count == 0
335 | assert general_filter.filter_stats.filtered_out_result_count == 0
336 | assert general_filter.filter_stats.missing_property_count == 0
337 |
338 | def test_get_filter_stats(self):
339 | general_filter = GeneralFilter()
340 | general_filter.init_filter(
341 | "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], []
342 | )
343 | results = [{"ruleId": "test-rule"}] * 10
344 |
345 | general_filter.filter_results(results)
346 | filter_stats = general_filter.get_filter_stats()
347 | assert filter_stats.filtered_in_result_count == 10
348 | assert filter_stats.filtered_out_result_count == 0
349 | assert filter_stats.missing_property_count == 0
350 |
351 | def test_load_filter_file(self):
352 | file_path = "test_filter.yaml"
353 | filter_description = "Test filter"
354 | include_filters = {"ruleId": "test-rule"}
355 | exclude_filters = {"level": "error"}
356 | with open(file_path, "w") as f:
357 | f.write(f"description: {filter_description}\n")
358 | f.write(f"include:\n ruleId: {include_filters['ruleId']}\n")
359 | f.write(f"exclude:\n level: {exclude_filters['level']}\n")
360 |
361 | loaded_filter = load_filter_file(file_path)
362 | assert loaded_filter == (
363 | filter_description,
364 | {},
365 | include_filters,
366 | exclude_filters,
367 | )
368 |
369 | def test_load_filter_file_with_configuration(self):
370 | file_path = "test_filter.yaml"
371 | filter_description = "Test filter"
372 | configuration = {"default-include": True}
373 | include_filters = {"ruleId": "test-rule"}
374 | exclude_filters = {"level": "error"}
375 | with open(file_path, "w") as f:
376 | f.write(f"description: {filter_description}\n")
377 | f.write("configuration:\n default-include: true\n")
378 | f.write(f"include:\n ruleId: {include_filters['ruleId']}\n")
379 | f.write(f"exclude:\n level: {exclude_filters['level']}\n")
380 |
381 | loaded_filter = load_filter_file(file_path)
382 | assert loaded_filter == (
383 | filter_description,
384 | configuration,
385 | include_filters,
386 | exclude_filters,
387 | )
388 |
389 | def test_load_filter_file_wrong_format(self):
390 | file_path = "test_filter.yaml"
391 | filter_description = "Test filter"
392 | with open(file_path, "w") as f:
393 | f.write(f"description: {filter_description}\n")
394 | f.write("include\n")
395 | f.write("exclude\n")
396 |
397 | with pytest.raises(IOError) as io_error:
398 | load_filter_file(file_path)
399 | assert str(io_error.value) == f"Cannot read filter file {file_path}"
400 |
--------------------------------------------------------------------------------
/sarif/cmdline/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Program entry point for sarif-tools on the command line.
3 | """
4 |
5 | import argparse
6 | import os
7 | import sys
8 |
9 | from sarif import loader, sarif_file, __version__ as SARIF_TOOLS_PACKAGE_VERSION
10 | from sarif.filter.general_filter import load_filter_file
11 |
12 | from sarif.operations import (
13 | blame_op,
14 | codeclimate_op,
15 | copy_op,
16 | csv_op,
17 | diff_op,
18 | html_op,
19 | emacs_op,
20 | info_op,
21 | ls_op,
22 | summary_op,
23 | trend_op,
24 | upgrade_filter_op,
25 | word_op,
26 | )
27 |
28 |
29 | def main():
30 | """
31 | Entry point function.
32 | """
33 | args, unknown_args = ARG_PARSER.parse_known_args()
34 |
35 | if args.debug:
36 | _print_version()
37 | print(f"Running code from {__file__}")
38 | known_args_summary = ", ".join(
39 | f"{key}={getattr(args, key)}" for key in vars(args)
40 | )
41 | print(f"Known arguments: {known_args_summary}")
42 | if args.version:
43 | return 0
44 | elif args.version:
45 | _print_version()
46 | return 0
47 |
48 | if unknown_args:
49 | if any(
50 | unknown_arg.startswith("--blame-filter")
51 | or unknown_arg.startswith("-b=")
52 | or unknown_arg == "-b"
53 | for unknown_arg in unknown_args
54 | ):
55 | print("ERROR: --blame-filter was removed in v2.0.0.")
56 | print(
57 | "Run the upgrade-filter command to convert your blame filter to the new filter format, then pass via --filter option."
58 | )
59 | args = ARG_PARSER.parse_args()
60 |
61 | exitcode = args.func(args)
62 | return exitcode
63 |
64 |
65 | def _create_arg_parser():
66 | cmd_list = "commands:\n"
67 | max_cmd_length = max(len(cmd) for cmd in _COMMANDS)
68 | col_width = max_cmd_length + 2
69 | for cmd, cmd_attributes in _COMMANDS.items():
70 | cmd_list += cmd.ljust(col_width) + cmd_attributes["desc"] + "\n"
71 | cmd_list += "Run `sarif --help` for command-specific help."
72 | parser = argparse.ArgumentParser(
73 | prog="sarif",
74 | description="Process sets of SARIF files",
75 | epilog=cmd_list,
76 | formatter_class=argparse.RawDescriptionHelpFormatter,
77 | )
78 | parser.set_defaults(func=_usage_command)
79 | subparsers = parser.add_subparsers(dest="command", help="command")
80 | subparser = {}
81 | for cmd, cmd_attributes in _COMMANDS.items():
82 | subparser[cmd] = subparsers.add_parser(cmd, description=cmd_attributes["desc"])
83 | subparser[cmd].set_defaults(func=cmd_attributes["fn"])
84 |
85 | # Common options
86 | parser.add_argument("--version", "-v", action="store_true")
87 | parser.add_argument(
88 | "--debug", action="store_true", help="Print information useful for debugging"
89 | )
90 | parser.add_argument(
91 | "--check",
92 | "-x",
93 | type=str,
94 | choices=sarif_file.SARIF_SEVERITIES_WITH_NONE,
95 | help="Exit with error code if there are any issues of the specified level "
96 | + "(or for diff, an increase in issues at that level).",
97 | )
98 |
99 | for cmd in [
100 | "blame",
101 | "codeclimate",
102 | "csv",
103 | "html",
104 | "emacs",
105 | "summary",
106 | "word",
107 | "upgrade-filter",
108 | ]:
109 | subparser[cmd].add_argument(
110 | "--output", "-o", type=str, metavar="PATH", help="Output file or directory"
111 | )
112 | for cmd in ["copy", "diff", "info", "ls", "trend", "usage"]:
113 | subparser[cmd].add_argument(
114 | "--output", "-o", type=str, metavar="FILE", help="Output file"
115 | )
116 |
117 | for cmd in [
118 | "codeclimate",
119 | "copy",
120 | "csv",
121 | "diff",
122 | "summary",
123 | "html",
124 | "emacs",
125 | "trend",
126 | "word",
127 | ]:
128 | subparser[cmd].add_argument(
129 | "--filter",
130 | "-b",
131 | type=str,
132 | metavar="FILE",
133 | help="Specify the filter file to apply. See README for format.",
134 | )
135 |
136 | # Command-specific options
137 | subparser["blame"].add_argument(
138 | "--code",
139 | "-c",
140 | metavar="PATH",
141 | type=str,
142 | help="Path to git repository; if not specified, the current working directory is used",
143 | )
144 | subparser["copy"].add_argument(
145 | "--timestamp",
146 | "-t",
147 | action="store_true",
148 | help='Append current timestamp to output filename in the "yyyymmddThhmmssZ" format used by '
149 | "the `sarif trend` command",
150 | )
151 | # codeclimate and csv default to no trimming
152 | for cmd in ["codeclimate", "csv"]:
153 | subparser[cmd].add_argument(
154 | "--autotrim",
155 | "-a",
156 | action="store_true",
157 | help="Strip off the common prefix of paths in the CSV output",
158 | )
159 | # word and html default to trimming
160 | for cmd in ["html", "emacs", "word"]:
161 | subparser[cmd].add_argument(
162 | "--no-autotrim",
163 | "-n",
164 | action="store_true",
165 | help="Do not strip off the common prefix of paths in the output document",
166 | )
167 | subparser[cmd].add_argument(
168 | "--image",
169 | type=str,
170 | help="Image to include at top of file - SARIF logo by default",
171 | )
172 | # codeclimate, csv, html and word allow trimmable paths to be specified
173 | for cmd in ["codeclimate", "csv", "word", "html", "emacs"]:
174 | subparser[cmd].add_argument(
175 | "--trim",
176 | metavar="PREFIX",
177 | action="append",
178 | type=str,
179 | help="Prefix to strip from issue paths, e.g. the checkout directory on the build agent",
180 | )
181 | # Most commands take an arbitrary list of SARIF files or directories
182 | for cmd in _COMMANDS:
183 | if cmd not in ["diff", "upgrade-filter", "usage", "version"]:
184 | subparser[cmd].add_argument(
185 | "files_or_dirs",
186 | metavar="file_or_dir",
187 | type=str,
188 | nargs="*",
189 | default=["."],
190 | help="A SARIF file or a directory containing SARIF files",
191 | )
192 | subparser["diff"].add_argument(
193 | "old_file_or_dir",
194 | type=str,
195 | nargs=1,
196 | help="An old SARIF file or a directory containing the old SARIF files",
197 | )
198 | subparser["diff"].add_argument(
199 | "new_file_or_dir",
200 | type=str,
201 | nargs=1,
202 | help="A new SARIF file or a directory containing the new SARIF files",
203 | )
204 |
205 | subparser["trend"].add_argument(
206 | "--dateformat",
207 | "-f",
208 | type=str,
209 | choices=["dmy", "mdy", "ymd"],
210 | default="dmy",
211 | help="Date component order to use in output CSV. Default is `dmy`",
212 | )
213 |
214 | subparser["upgrade-filter"].add_argument(
215 | "files_or_dirs",
216 | metavar="file",
217 | type=str,
218 | nargs="*",
219 | default=["."],
220 | help="A v1-style blame-filter file",
221 | )
222 |
223 | return parser
224 |
225 |
226 | def _check(input_files: sarif_file.SarifFileSet, check_level):
227 | ret = 0
228 | if check_level:
229 | for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
230 | ret += input_files.get_report().get_issue_count_for_severity(severity)
231 | if severity == check_level:
232 | break
233 | if ret > 0:
234 | sys.stderr.write(
235 | f"Check: exiting with return code {ret} due to issues at or above {check_level} "
236 | "severity\n"
237 | )
238 | return ret
239 |
240 |
241 | def _init_filtering(input_files, args):
242 | if args.filter:
243 | filters = load_filter_file(args.filter)
244 | input_files.init_general_filter(*filters)
245 |
246 |
247 | def _init_path_prefix_stripping(input_files, args, strip_by_default):
248 | if strip_by_default:
249 | autotrim = not args.no_autotrim
250 | else:
251 | autotrim = args.autotrim
252 | trim_paths = args.trim
253 | if autotrim or trim_paths:
254 | input_files.init_path_prefix_stripping(autotrim, trim_paths)
255 |
256 |
257 | def _ensure_dir(dir_path):
258 | """
259 | Create directory if it does not exist
260 | """
261 | if dir_path and not os.path.isdir(dir_path):
262 | os.makedirs(dir_path)
263 |
264 |
265 | def _prepare_output(
266 | input_files: sarif_file.SarifFileSet, output_arg, output_file_extension: str
267 | ):
268 | """
269 | Returns (output, output_multiple_files)
270 | output is args.output, or if that wasn't specified, a default output file based on the inputs
271 | and the file extension.
272 | output_multiple_files determines whether to output one file per input plus a totals file.
273 | It is false if there is only one input file, or args.output is a file that exists,
274 | or args.output ends with the expected file extension.
275 | """
276 | input_file_count = len(input_files)
277 | if input_file_count == 0:
278 | return ("static_analysis_output" + output_file_extension, False)
279 | if input_file_count == 1:
280 | derived_output_filename = (
281 | input_files[0].get_file_name_without_extension() + output_file_extension
282 | )
283 | if output_arg:
284 | if os.path.isdir(output_arg):
285 | return (os.path.join(output_arg, derived_output_filename), False)
286 | _ensure_dir(os.path.dirname(output_arg))
287 | return (output_arg, False)
288 | return (derived_output_filename, False)
289 | # Multiple input files
290 | if output_arg:
291 | if os.path.isfile(output_arg) or output_arg.strip().upper().endswith(
292 | output_file_extension.upper()
293 | ):
294 | # Output single file, even though there are multiple input files.
295 | _ensure_dir(os.path.dirname(output_arg))
296 | return (output_arg, False)
297 | _ensure_dir(output_arg)
298 | return (output_arg, True)
299 | return (os.getcwd(), True)
300 |
301 |
302 | ####################################### Command handlers #######################################
303 |
304 |
305 | def _blame_command(args):
306 | input_files = loader.load_sarif_files(*args.files_or_dirs)
307 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".sarif")
308 | blame_op.enhance_with_blame(
309 | input_files, args.code or os.getcwd(), output, multiple_file_output
310 | )
311 | return _check(input_files, args.check)
312 |
313 |
314 | def _codeclimate_command(args):
315 | input_files = loader.load_sarif_files(*args.files_or_dirs)
316 | input_files.init_default_line_number_1()
317 | _init_path_prefix_stripping(input_files, args, strip_by_default=False)
318 | _init_filtering(input_files, args)
319 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".json")
320 | codeclimate_op.generate(input_files, output, multiple_file_output)
321 | return _check(input_files, args.check)
322 |
323 |
324 | def _copy_command(args):
325 | input_files = loader.load_sarif_files(*args.files_or_dirs)
326 | _init_filtering(input_files, args)
327 | output = args.output or "out.sarif"
328 | output_sarif_file_set = copy_op.generate_sarif(
329 | input_files,
330 | output,
331 | args.timestamp,
332 | SARIF_TOOLS_PACKAGE_VERSION,
333 | " ".join(sys.argv),
334 | )
335 | return _check(output_sarif_file_set, args.check)
336 |
337 |
338 | def _csv_command(args):
339 | input_files = loader.load_sarif_files(*args.files_or_dirs)
340 | input_files.init_default_line_number_1()
341 | _init_path_prefix_stripping(input_files, args, strip_by_default=False)
342 | _init_filtering(input_files, args)
343 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".csv")
344 | csv_op.generate_csv(input_files, output, multiple_file_output)
345 | return _check(input_files, args.check)
346 |
347 |
348 | def _diff_command(args):
349 | old_sarif = loader.load_sarif_files(args.old_file_or_dir[0])
350 | new_sarif = loader.load_sarif_files(args.new_file_or_dir[0])
351 | _init_filtering(old_sarif, args)
352 | _init_filtering(new_sarif, args)
353 | return diff_op.print_diff(old_sarif, new_sarif, args.output, args.check)
354 |
355 |
356 | def _html_command(args):
357 | input_files = loader.load_sarif_files(*args.files_or_dirs)
358 | input_files.init_default_line_number_1()
359 | _init_path_prefix_stripping(input_files, args, strip_by_default=True)
360 | _init_filtering(input_files, args)
361 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".html")
362 | html_op.generate_html(input_files, args.image, output, multiple_file_output)
363 | return _check(input_files, args.check)
364 |
365 |
366 | def _emacs_command(args):
367 | input_files = loader.load_sarif_files(*args.files_or_dirs)
368 | input_files.init_default_line_number_1()
369 | _init_path_prefix_stripping(input_files, args, strip_by_default=True)
370 | _init_filtering(input_files, args)
371 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".txt")
372 | emacs_op.generate_compile(input_files, output, multiple_file_output)
373 | return _check(input_files, args.check)
374 |
375 |
376 | def _info_command(args):
377 | input_files = loader.load_sarif_files(*args.files_or_dirs)
378 | info_op.generate_info(input_files, args.output)
379 | if args.check:
380 | return _check(input_files, args.check)
381 | return 0
382 |
383 |
384 | def _ls_command(args):
385 | ls_op.print_ls(args.files_or_dirs, args.output)
386 | if args.check:
387 | input_files = loader.load_sarif_files(*args.files_or_dirs)
388 | return _check(input_files, args.check)
389 | return 0
390 |
391 |
392 | def _summary_command(args):
393 | input_files = loader.load_sarif_files(*args.files_or_dirs)
394 | _init_filtering(input_files, args)
395 | (output, multiple_file_output) = (None, False)
396 | if args.output:
397 | (output, multiple_file_output) = _prepare_output(
398 | input_files, args.output, ".txt"
399 | )
400 | summary_op.generate_summary(input_files, output, multiple_file_output)
401 | return _check(input_files, args.check)
402 |
403 |
404 | def _trend_command(args):
405 | input_files = loader.load_sarif_files(*args.files_or_dirs)
406 | input_files.init_default_line_number_1()
407 | _init_filtering(input_files, args)
408 | if args.output:
409 | _ensure_dir(os.path.dirname(args.output))
410 | output = args.output
411 | else:
412 | output = "static_analysis_trend.csv"
413 | trend_op.generate_trend_csv(input_files, output, args.dateformat)
414 | return _check(input_files, args.check)
415 |
416 |
417 | def _upgrade_filter_command(args):
418 | old_filter_files = args.files_or_dirs
419 | single_output_file = None
420 | output_dir = None
421 | if len(old_filter_files) == 1:
422 | if args.output and os.path.isdir(args.output):
423 | output_dir = args.output
424 | else:
425 | single_output_file = args.output or old_filter_files[0] + ".yaml"
426 | elif args.output:
427 | output_dir = args.output
428 | else:
429 | output_dir = os.path.dirname(args.output)
430 | for old_filter_file in old_filter_files:
431 | output_file = single_output_file or os.path.join(
432 | output_dir, os.path.basename(old_filter_file) + ".yaml"
433 | )
434 | upgrade_filter_op.upgrade_filter_file(old_filter_file, output_file)
435 | return 0
436 |
437 |
438 | def _usage_command(args):
439 | if hasattr(args, "output") and args.output:
440 | with open(args.output, "w", encoding="utf-8") as file_out:
441 | ARG_PARSER.print_help(file_out)
442 | print("Wrote usage instructions to", args.output)
443 | else:
444 | ARG_PARSER.print_help()
445 | if args.check:
446 | sys.stderr.write("Spurious --check argument")
447 | return 1
448 | return 0
449 |
450 |
451 | def _version_command(args):
452 | _print_version(not args.version)
453 |
454 |
455 | def _print_version(bare=False):
456 | print(
457 | SARIF_TOOLS_PACKAGE_VERSION
458 | if bare
459 | else f"SARIF tools v{SARIF_TOOLS_PACKAGE_VERSION}"
460 | )
461 |
462 |
463 | def _word_command(args):
464 | input_files = loader.load_sarif_files(*args.files_or_dirs)
465 | input_files.init_default_line_number_1()
466 | _init_path_prefix_stripping(input_files, args, strip_by_default=True)
467 | _init_filtering(input_files, args)
468 | (output, multiple_file_output) = _prepare_output(input_files, args.output, ".docx")
469 | word_op.generate_word_docs_from_sarif_inputs(
470 | input_files, args.image, output, multiple_file_output
471 | )
472 | return _check(input_files, args.check)
473 |
474 |
475 | _COMMANDS = {
476 | "blame": {
477 | "fn": _blame_command,
478 | "desc": "Enhance SARIF file with information from `git blame`",
479 | },
480 | "codeclimate": {
481 | "fn": _codeclimate_command,
482 | "desc": "Write a JSON representation in Code Climate format of SARIF file(s) "
483 | "for viewing as a Code Quality report in GitLab UI",
484 | },
485 | "copy": {
486 | "fn": _copy_command,
487 | "desc": "Write a new SARIF file containing optionally-filtered data from other SARIF file(s)",
488 | },
489 | "csv": {
490 | "fn": _csv_command,
491 | "desc": "Write a CSV file listing the issues from the SARIF files(s) specified",
492 | },
493 | "diff": {
494 | "fn": _diff_command,
495 | "desc": "Find the difference between two [sets of] SARIF files",
496 | },
497 | "emacs": {
498 | "fn": _emacs_command,
499 | "desc": "Write a representation of SARIF file(s) for viewing in emacs",
500 | },
501 | "html": {
502 | "fn": _html_command,
503 | "desc": "Write an HTML representation of SARIF file(s) for viewing in a web browser",
504 | },
505 | "info": {
506 | "fn": _info_command,
507 | "desc": "Print information about SARIF file(s) structure",
508 | },
509 | "ls": {
510 | "fn": _ls_command,
511 | "desc": "List all SARIF files in the directories specified",
512 | },
513 | "summary": {
514 | "fn": _summary_command,
515 | "desc": "Write a text summary with the counts of issues from the SARIF files(s) specified",
516 | },
517 | "trend": {
518 | "fn": _trend_command,
519 | "desc": "Write a CSV file with time series data from SARIF files with "
520 | '"yyyymmddThhmmssZ" timestamps in their filenames',
521 | },
522 | "upgrade-filter": {
523 | "fn": _upgrade_filter_command,
524 | "desc": "Upgrade a sarif-tools v1-style blame filter file to a v2-style filter YAML file",
525 | },
526 | "usage": {
527 | "fn": _usage_command,
528 | "desc": "(Command optional) - print usage and exit",
529 | },
530 | "version": {"fn": _version_command, "desc": "Print version and exit"},
531 | "word": {
532 | "fn": _word_command,
533 | "desc": "Produce MS Word .docx summaries of the SARIF files specified",
534 | },
535 | }
536 |
537 | ARG_PARSER = _create_arg_parser()
538 |
--------------------------------------------------------------------------------