├── tests
    ├── __init__.py
    ├── utils.py
    ├── ops
    │   ├── version
    │   │   └── test_version.py
    │   ├── ls
    │   │   └── test_ls.py
    │   ├── upgrade_filter
    │   │   └── test_upgrade_filter.py
    │   ├── csv
    │   │   └── test_csv.py
    │   ├── codeclimate
    │   │   └── test_codeclimate.py
    │   ├── emacs
    │   │   └── test_emacs.py
    │   ├── summary
    │   │   └── test_summary.py
    │   ├── word
    │   │   └── test_word.py
    │   ├── info
    │   │   └── test_info.py
    │   ├── copy
    │   │   └── test_copy.py
    │   ├── trend
    │   │   └── test_trend.py
    │   ├── diff
    │   │   ├── test_diff.py
    │   │   └── test_diff_issues_reordered.py
    │   ├── html
    │   │   └── test_html.py
    │   └── blame
    │   │   └── test_blame.py
    ├── test_check_switch.py
    ├── test_sarif_file_utils.py
    └── test_general_filter.py
├── sarif
    ├── cmdline
    │   ├── __init__.py
    │   └── main.py
    ├── filter
    │   ├── __init__.py
    │   ├── filter_stats.py
    │   └── general_filter.py
    ├── operations
    │   ├── __init__.py
    │   ├── templates
    │   │   ├── sarif_emacs.txt
    │   │   └── sarif_summary.html
    │   ├── ls_op.py
    │   ├── csv_op.py
    │   ├── summary_op.py
    │   ├── upgrade_filter_op.py
    │   ├── codeclimate_op.py
    │   ├── trend_op.py
    │   ├── emacs_op.py
    │   ├── copy_op.py
    │   ├── info_op.py
    │   ├── html_op.py
    │   ├── blame_op.py
    │   ├── diff_op.py
    │   └── word_op.py
    ├── __init__.py
    ├── __main__.py
    ├── charts.py
    ├── loader.py
    ├── issues_report.py
    └── sarif_file_utils.py
├── poetry.toml
├── azure-pipelines
    ├── templates
    │   ├── globals.yml
    │   ├── use_python.yml
    │   └── build_stage.yml
    ├── build.yml
    └── release.yml
├── .pylintrc
├── .gitignore
├── SUPPORT.md
├── .vscode
    └── extensions.json
├── CODE_OF_CONDUCT.md
├── .github
    └── workflows
    │   ├── build.yml
    │   └── validation.yml
├── pyproject.toml
├── LICENSE
├── SECURITY.md
├── CONTRIBUTING.md
└── CHANGELOG.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sarif/cmdline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sarif/filter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sarif/operations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 | 


--------------------------------------------------------------------------------
/azure-pipelines/templates/globals.yml:
--------------------------------------------------------------------------------
1 | variables:
2 |   ARTIFACT_NAME_WHEEL: wheel
3 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | 
3 | ignored-classes=WD_PARAGRAPH_ALIGNMENT,WD_TAB_ALIGNMENT
4 | 
5 | 


--------------------------------------------------------------------------------
/sarif/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Top-level version information for sarif-tools.
3 | """
4 | 
5 | __version__ = "3.0.5"
6 | 


--------------------------------------------------------------------------------
/sarif/__main__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file supports `python -m sarif` invocation.
 3 | """
 4 | 
 5 | import sys
 6 | 
 7 | from sarif.cmdline import main
 8 | 
 9 | sys.exit(main.main())
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /build
 2 | /dist
 3 | *.egg-info
 4 | *.pyc
 5 | *.orig
 6 | /.venv
 7 | /.vscode
 8 | /.idea
 9 | /.pytest_cache
10 | .DS_Store
11 | *.sarif
12 | *.csv
13 | .coverage
14 | coverage.xml
15 | *filter.yaml
16 | 


--------------------------------------------------------------------------------
/azure-pipelines/templates/use_python.yml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - task: UsePythonVersion@0
3 |     inputs:
4 |       versionSpec: "$(python.version)"
5 |       architecture: "$(architecture)"
6 |     displayName: "Use Python $(python.version) $(architecture)"
7 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
1 | # Support
2 | 
3 | ## How to file issues and get help  
4 | 
5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
6 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
7 | feature request as a new Issue.
8 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // See https://go.microsoft.com/fwlink/?LinkId=827846
 3 |     // for the documentation about the extensions.json format
 4 |     "recommendations": [
 5 |         "charliermarsh.ruff",
 6 |         "ms-python.python",
 7 |         "ms-python.vscode-pylance"
 8 |     ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | def get_sarif_schema():
 6 |     # JSON Schema file for SARIF obtained from https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/
 7 |     sarif_schema_file = os.path.join(
 8 |         os.path.dirname(__file__), "sarif-schema-2.1.0.json"
 9 |     )
10 |     with open(sarif_schema_file, "rb") as f_schema:
11 |         return json.load(f_schema)
12 | 


--------------------------------------------------------------------------------
/tests/ops/version/test_version.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import sarif
 4 | 
 5 | 
 6 | def test_version():
 7 |     with open(
 8 |         pathlib.Path(__file__).parent.parent.parent.parent / "pyproject.toml"
 9 |     ) as pyproject_in:
10 |         for pyproject_line in pyproject_in.readlines():
11 |             if pyproject_line.startswith('version = "'):
12 |                 assert pyproject_line.strip() == f'version = "{sarif.__version__}"'
13 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/sarif/operations/templates/sarif_emacs.txt:
--------------------------------------------------------------------------------
 1 | -*- compilation -*-
 2 | 
 3 | Sarif Summary: {{ report_type }}
 4 | Document generated on: {{ report_date }}
 5 | Total number of distinct issues of all severities ({{ severities }}): {{ total }}
 6 | {% if filtered -%}
 7 | <p>{{ filtered }}</p>
 8 | {%- endif %}
 9 | 
10 | {% for problem in problems %}
11 | Severity : {{ problem.type }} [{{ problem.count }}]
12 | {% for error in problem.details -%}
13 | {% for line in error.details -%}
14 | {{ line.Location }}:{{ line.Line }}: {{ error.code }}
15 | {% endfor %}
16 | {% endfor %}
17 | {% endfor -%}
18 | 
19 | 


--------------------------------------------------------------------------------
/tests/test_check_switch.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from sarif.cmdline.main import _check
 3 | from sarif import sarif_file
 4 | 
 5 | SARIF = {
 6 |     "runs": [
 7 |         {
 8 |             "tool": {"driver": {"name": "Tool"}},
 9 |             "results": [{"level": "warning", "ruleId": "rule"}],
10 |         }
11 |     ]
12 | }
13 | 
14 | 
15 | def test_check():
16 |     fileSet = sarif_file.SarifFileSet()
17 |     fileSet.add_file(
18 |         sarif_file.SarifFile("SARIF", SARIF, mtime=datetime.datetime.now())
19 |     )
20 | 
21 |     result = _check(fileSet, "error")
22 |     assert result == 0
23 | 
24 |     result = _check(fileSet, "warning")
25 |     assert result == 1
26 | 
27 |     result = _check(fileSet, "note")
28 |     assert result == 1
29 | 


--------------------------------------------------------------------------------
/tests/ops/ls/test_ls.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | from sarif.operations import ls_op
 5 | 
 6 | 
 7 | def test_ls():
 8 |     file_names = ["file1.sarif", "file2.sarif", "aaaa.sarif"]
 9 | 
10 |     with tempfile.TemporaryDirectory() as tmp:
11 |         for file_name in file_names:
12 |             with open(os.path.join(tmp, file_name), "wb") as f_in:
13 |                 f_in.write("{}".encode())
14 | 
15 |         output_path = os.path.join(tmp, "output.txt")
16 |         ls_op.print_ls([tmp], output_path)
17 | 
18 |         with open(output_path, "rb") as f_out:
19 |             output = f_out.read().decode().splitlines()
20 | 
21 |         assert len(output) == len(file_names) + 1
22 |         assert output[0] == tmp + ":"
23 |         assert output[1:] == sorted(["  " + file_name for file_name in file_names])
24 | 


--------------------------------------------------------------------------------
/azure-pipelines/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | pr: none
 3 | trigger:
 4 |   branches:
 5 |     include:
 6 |       - main
 7 |   paths:
 8 |     exclude:
 9 |       - azure-pipelines/release.yml
10 | 
11 | resources:
12 |   repositories:
13 |     - repository: 1ESPipelineTemplates
14 |       type: git
15 |       name: 1ESPipelineTemplates/1ESPipelineTemplates
16 |       ref: refs/tags/release
17 | 
18 | variables:
19 |   TeamName: sarif-tools
20 | 
21 | extends:
22 |   template: v1/1ES.Official.PipelineTemplate.yml@1ESPipelineTemplates
23 |   parameters:
24 |     sdl:
25 |       sourceAnalysisPool: VSEngSS-MicroBuild2022-1ES
26 |     pool:
27 |       name: AzurePipelines-EO
28 |       demands:
29 |         - ImageOverride -equals 1ESPT-Ubuntu22.04
30 |       os: Linux
31 |     customBuildTags:
32 |       - ES365AIMigrationTooling
33 |     stages:
34 |       - template: templates/build_stage.yml@self
35 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build:
10 |     if: github.repository == 'microsoft/sarif-tools'
11 |     runs-on: ubuntu-latest
12 |     name: Build
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |         with:
17 |           fetch-depth: 0
18 | 
19 |       - name: Setup Python
20 |         uses: actions/setup-python@v5
21 |         with:
22 |           python-version: '3.8'
23 | 
24 |       - name: Install Poetry
25 |         run: pip install poetry
26 | 
27 |       - name: Poetry Build
28 |         run: poetry build --no-interaction
29 | 
30 |       - name: Get Verison
31 |         id: get_version
32 |         shell: bash
33 |         run: echo "releaseVersion=$(poetry version --short)" >> $GITHUB_OUTPUT
34 | 
35 |       - uses: actions/upload-artifact@v4
36 |         with:
37 |           name: wheel
38 |           path: dist/sarif_tools-${{ steps.get_version.outputs.releaseVersion }}-py3-none-any.whl
39 | 


--------------------------------------------------------------------------------
/sarif/operations/ls_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif ls` command.
 3 | """
 4 | 
 5 | from typing import List
 6 | 
 7 | from sarif import loader
 8 | 
 9 | 
10 | def print_ls(files_or_dirs: List[str], output):
11 |     """
12 |     Print a SARIF file listing for each of the input files or directories.
13 |     """
14 |     dir_result = []
15 |     for path in files_or_dirs:
16 |         dir_result.append(f"{path}:")
17 |         sarif_files = loader.load_sarif_files(path)
18 |         if sarif_files:
19 |             sarif_file_names = [f.get_file_name() for f in sarif_files]
20 |             for file_name in sorted(sarif_file_names):
21 |                 dir_result.append(f"  {file_name}")
22 |         else:
23 |             dir_result.append("  (None)")
24 |     if output:
25 |         print("Writing file listing to", output)
26 |         with open(output, "w", encoding="utf-8") as file_out:
27 |             file_out.writelines(d + "\n" for d in dir_result)
28 |     else:
29 |         for directory in dir_result:
30 |             print(directory)
31 |         print()
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "sarif-tools"
 3 | version = "3.0.5"
 4 | description = "SARIF tools"
 5 | authors = ["Microsoft"]
 6 | readme = "README.md"
 7 | homepage = "https://github.com/microsoft/sarif-tools"
 8 | packages = [
 9 |     { include = "sarif" }
10 | ]
11 | classifiers = [
12 |     "Programming Language :: Python :: 3",
13 |     "License :: OSI Approved :: MIT License",
14 |     "Operating System :: OS Independent"
15 | ]
16 | 
17 | [tool.poetry.urls]
18 | "Bug Tracker" = "https://github.com/microsoft/sarif-tools/issues"
19 | 
20 | [tool.poetry.dependencies]
21 | jinja2 = "^3.1.6"
22 | jsonpath-ng = "^1.6.0"
23 | matplotlib = "^3.7"  # Need Python 3.9+ for newer
24 | python = "^3.8"
25 | python-docx = "^1.1.2"
26 | pyyaml = "^6.0.1"
27 | 
28 | [tool.poetry.dev-dependencies]
29 | jsonschema = "^4.23.0"
30 | pylint = "^3.2"
31 | pytest = "^8.3"
32 | pytest-cov = "^5.0"
33 | ruff = "^0.6.8"
34 | 
35 | [tool.poetry.scripts]
36 | sarif = "sarif.cmdline.main:main"
37 | 
38 | [build-system]
39 | requires = ["poetry-core>=1.0.0"]
40 | build-backend = "poetry.core.masonry.api"
41 | 


--------------------------------------------------------------------------------
/tests/ops/upgrade_filter/test_upgrade_filter.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | from sarif.operations import upgrade_filter_op
 5 | 
 6 | INPUT_FILTER = """
 7 | description: Test filter
 8 | #comment
 9 | +: include_with_prefix
10 | include_without_prefix
11 | -: exclude
12 | """
13 | 
14 | 
15 | EXPECTED_OUTPUT_TXT = """configuration:
16 |   check-line-number: true
17 |   default-include: true
18 | description: Test filter
19 | exclude:
20 | - author-mail: exclude
21 | include:
22 | - author-mail: include_with_prefix
23 | - author-mail: include_without_prefix
24 | """
25 | 
26 | 
27 | def test_upgrade_filter():
28 |     with tempfile.TemporaryDirectory() as tmp:
29 |         input_file_path = os.path.join(tmp, "input_filter.txt")
30 |         with open(input_file_path, "wb") as f_in:
31 |             f_in.write(INPUT_FILTER.encode())
32 | 
33 |         output_file_path = os.path.join(tmp, "output.txt")
34 |         upgrade_filter_op.upgrade_filter_file(input_file_path, output_file_path)
35 | 
36 |         with open(output_file_path, "rb") as f_out:
37 |             output = f_out.read().decode()
38 | 
39 |         assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep)
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/azure-pipelines/templates/build_stage.yml:
--------------------------------------------------------------------------------
 1 | stages:
 2 | - stage: Build
 3 |   variables:
 4 |     - template: globals.yml
 5 |   jobs:
 6 |   - job: Build
 7 | 
 8 |     templateContext:
 9 |       outputs:
10 |         - output: pipelineArtifact
11 |           targetPath: $(Build.StagingDirectory)/dist
12 |           sbomBuildDropPath: $(Build.StagingDirectory)/dist
13 |           artifactName: $(ARTIFACT_NAME_WHEEL)
14 | 
15 |     variables:
16 |       python.version: "3.8"
17 |       architecture: x64
18 | 
19 |     steps:
20 |       - template: use_python.yml@self
21 | 
22 |       - script: pipx install poetry
23 |         displayName: Install Poetry
24 | 
25 |       - script: poetry build --no-interaction
26 |         displayName: poetry build
27 | 
28 |       - powershell: |
29 |           $releaseVersion = & poetry version --short
30 |           echo "releaseVersion: $releaseVersion"
31 |           echo "##vso[task.setvariable variable=releaseVersion]$releaseVersion"
32 |           echo "##vso[task.setvariable variable=releaseVersionWithPrefix;isOutput=true]v$releaseVersion"
33 |         displayName: Get release version
34 |         name: getReleaseVersionStep
35 | 
36 |       - task: CopyFiles@2
37 |         displayName: Copy wheel and tarball
38 |         inputs:
39 |           sourceFolder: dist
40 |           targetFolder: $(Build.StagingDirectory)/dist
41 |           contents: |
42 |             sarif_tools-$(releaseVersion)-py3-none-any.whl
43 |             sarif_tools-$(releaseVersion).tar.gz
44 | 


--------------------------------------------------------------------------------
/.github/workflows/validation.yml:
--------------------------------------------------------------------------------
 1 | name: Validation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   hygiene:
13 |     if: github.repository == 'microsoft/sarif-tools'
14 |     runs-on: ubuntu-latest
15 |     name: Hygiene
16 |     permissions:
17 |       contents: read
18 |       pull-requests: write
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - name: Setup Python
23 |         uses: actions/setup-python@v5
24 |         with:
25 |           python-version: '3.8'
26 | 
27 |       - name: Install Poetry
28 |         run: pip install poetry
29 | 
30 |       - name: Validate pyproject.toml and poetry.lock
31 |         run: poetry check
32 | 
33 |       - name: Install dependencies
34 |         run: poetry install
35 | 
36 |       - name: Validate code formatting
37 |         run: poetry run ruff format --check
38 | 
39 |       - name: Validate code style
40 |         run: poetry run ruff check
41 | 
42 | 
43 |   test:
44 |     if: github.repository == 'microsoft/sarif-tools'
45 |     runs-on: ubuntu-latest
46 |     name: Test
47 |     steps:
48 |       - uses: actions/checkout@v4
49 | 
50 |       - name: Setup Python
51 |         uses: actions/setup-python@v5
52 |         with:
53 |           python-version: '3.8'
54 | 
55 |       - name: Install Poetry
56 |         run: pip install poetry
57 | 
58 |       - name: Install dependencies
59 |         run: poetry install --with dev
60 | 
61 |       - name: Run tests
62 |         run: poetry run pytest
63 | 


--------------------------------------------------------------------------------
/sarif/charts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for generating charts from SARIF data
 3 | """
 4 | 
 5 | import io
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | 
 9 | def generate_severity_pie_chart(report, output_file=None):
10 |     """
11 |     Generate a pie chart from the breakdown of issues by severity.
12 |     The slices are ordered and plotted counter-clockwise.  The return
13 |     value is truthy if the number of issues is not zero, False otherwise.
14 |     If `output_file` is `None`, return the bytes of the pie chart image in
15 |     png format.  Otherwise, write the bytes to the file specified (image
16 |     format inferred from filename).
17 |     """
18 |     sizes = []
19 |     labels = []
20 |     explode = []
21 |     for severity in report.get_severities():
22 |         count = report.get_issue_count_for_severity(severity)
23 |         if count > 0:
24 |             sizes.append(count)
25 |             labels.append(severity)
26 |             explode.append(0.1)  # could add more logic to highlight specific severities
27 | 
28 |     any_issues = bool(sizes)
29 |     if any_issues:
30 |         _fig1, ax1 = plt.subplots()
31 |         ax1.pie(
32 |             sizes,
33 |             explode=explode,
34 |             labels=labels,
35 |             autopct="%1.1f%%",
36 |             shadow=True,
37 |             startangle=90,
38 |         )
39 |         ax1.axis("equal")
40 | 
41 |         if output_file:
42 |             plt.savefig(output_file)
43 |         else:
44 |             byte_buffer = io.BytesIO()
45 |             plt.savefig(byte_buffer, format="png")
46 |             return byte_buffer.getbuffer()
47 |     return any_issues
48 | 


--------------------------------------------------------------------------------
/tests/ops/csv/test_csv.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import tempfile
 4 | 
 5 | from sarif.operations import csv_op
 6 | from sarif import sarif_file
 7 | 
 8 | INPUT_SARIF = {
 9 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
10 |     "version": "2.1.0",
11 |     "runs": [
12 |         {
13 |             "tool": {"driver": {"name": "unit test"}},
14 |             "results": [
15 |                 {
16 |                     "ruleId": "CA2101",
17 |                     "level": "error",
18 |                     "locations": [
19 |                         {
20 |                             "physicalLocation": {
21 |                                 "artifactLocation": {
22 |                                     "uri": "file:///C:/Code/main.c",
23 |                                     "index": 0,
24 |                                 },
25 |                                 "region": {"startLine": 24, "startColumn": 9},
26 |                             }
27 |                         }
28 |                     ],
29 |                 }
30 |             ],
31 |         }
32 |     ],
33 | }
34 | 
35 | 
36 | EXPECTED_OUTPUT_CSV = [
37 |     "Tool,Severity,Code,Description,Location,Line",
38 |     "unit test,error,CA2101,CA2101,file:///C:/Code/main.c,24",
39 | ]
40 | 
41 | 
42 | def test_csv():
43 |     mtime = datetime.datetime.now()
44 |     input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
45 | 
46 |     input_sarif_file_set = sarif_file.SarifFileSet()
47 |     input_sarif_file_set.files.append(input_sarif_file)
48 | 
49 |     with tempfile.TemporaryDirectory() as tmp:
50 |         file_path = os.path.join(tmp, "output.csv")
51 |         csv_op.generate_csv(
52 |             input_sarif_file_set, file_path, output_multiple_files=False
53 |         )
54 | 
55 |         with open(file_path, "rb") as f_in:
56 |             output_lines = f_in.read().decode().splitlines()
57 | 
58 |         assert output_lines == EXPECTED_OUTPUT_CSV
59 | 


--------------------------------------------------------------------------------
/sarif/loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code to load SARIF files from disk.
 3 | """
 4 | 
 5 | import glob
 6 | import json
 7 | import os
 8 | 
 9 | from sarif.sarif_file import has_sarif_file_extension, SarifFile, SarifFileSet
10 | 
11 | 
12 | def _add_path_to_sarif_file_set(path, sarif_file_set):
13 |     if os.path.isdir(path):
14 |         sarif_file_set.add_dir(_load_dir(path))
15 |         return True
16 |     if os.path.isfile(path):
17 |         sarif_file_set.add_file(load_sarif_file(path))
18 |         return True
19 |     return False
20 | 
21 | 
22 | def load_sarif_files(*args) -> SarifFileSet:
23 |     """
24 |     Load SARIF files specified as individual filenames or directories.  Return a SarifFileSet
25 |     object.
26 |     """
27 |     ret = SarifFileSet()
28 |     if args:
29 |         for path in args:
30 |             path_exists = _add_path_to_sarif_file_set(path, ret)
31 |             if not path_exists:
32 |                 for resolved_path in glob.glob(path, recursive=True):
33 |                     if _add_path_to_sarif_file_set(resolved_path, ret):
34 |                         path_exists = True
35 |             if not path_exists:
36 |                 print(f"Warning: input path {path} not found")
37 |     return ret
38 | 
39 | 
40 | def _load_dir(path):
41 |     subdir = SarifFileSet()
42 |     for dirpath, _dirnames, filenames in os.walk(path):
43 |         for filename in filenames:
44 |             if has_sarif_file_extension(filename):
45 |                 subdir.add_file(load_sarif_file(os.path.join(dirpath, filename)))
46 |     return subdir
47 | 
48 | 
49 | def load_sarif_file(file_path: str) -> SarifFile:
50 |     """
51 |     Load JSON data from a file and return as a SarifFile object.
52 |     As per https://tools.ietf.org/id/draft-ietf-json-rfc4627bis-09.html#rfc.section.8.1, JSON
53 |     data SHALL be encoded in utf-8.
54 |     """
55 |     try:
56 |         with open(file_path, encoding="utf-8-sig") as file_in:
57 |             data = json.load(file_in)
58 |         return SarifFile(file_path, data)
59 |     except Exception as exception:
60 |         raise IOError(f"Cannot load {file_path}") from exception
61 | 


--------------------------------------------------------------------------------
/tests/ops/codeclimate/test_codeclimate.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import json
 3 | import os
 4 | import tempfile
 5 | 
 6 | from sarif.operations import codeclimate_op
 7 | from sarif import sarif_file
 8 | 
 9 | INPUT_SARIF = {
10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 |     "version": "2.1.0",
12 |     "runs": [
13 |         {
14 |             "tool": {"driver": {"name": "unit test"}},
15 |             "results": [
16 |                 {
17 |                     "ruleId": "CA2101",
18 |                     "level": "error",
19 |                     "locations": [
20 |                         {
21 |                             "physicalLocation": {
22 |                                 "artifactLocation": {
23 |                                     "uri": "file:///C:/Code/main.c",
24 |                                     "index": 0,
25 |                                 },
26 |                                 "region": {"startLine": 24, "startColumn": 9},
27 |                             }
28 |                         }
29 |                     ],
30 |                 }
31 |             ],
32 |         }
33 |     ],
34 | }
35 | 
36 | 
37 | EXPECTED_OUTPUT_JSON = [
38 |     {
39 |         "type": "issue",
40 |         "check_name": "CA2101",
41 |         "description": "CA2101",
42 |         "categories": ["Bug Risk"],
43 |         "location": {
44 |             "path": "file:///C:/Code/main.c",
45 |             "lines": {"begin": 24},
46 |         },
47 |         "severity": "major",
48 |         "fingerprint": "e972b812ed32bf29ee306141244050b9",
49 |     }
50 | ]
51 | 
52 | 
53 | def test_code_climate():
54 |     mtime = datetime.datetime.now()
55 |     input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
56 | 
57 |     input_sarif_file_set = sarif_file.SarifFileSet()
58 |     input_sarif_file_set.files.append(input_sarif_file)
59 | 
60 |     with tempfile.TemporaryDirectory() as tmp:
61 |         file_path = os.path.join(tmp, "codeclimate.json")
62 |         codeclimate_op.generate(
63 |             input_sarif_file_set, file_path, output_multiple_files=False
64 |         )
65 | 
66 |         with open(file_path, "rb") as f_in:
67 |             output_json = json.load(f_in)
68 | 
69 |         assert output_json == EXPECTED_OUTPUT_JSON
70 | 


--------------------------------------------------------------------------------
/tests/ops/emacs/test_emacs.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import tempfile
 4 | 
 5 | from sarif.operations import emacs_op
 6 | from sarif import sarif_file
 7 | 
 8 | INPUT_SARIF = {
 9 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
10 |     "version": "2.1.0",
11 |     "runs": [
12 |         {
13 |             "tool": {"driver": {"name": "unit test"}},
14 |             "results": [
15 |                 {
16 |                     "ruleId": "CA2101",
17 |                     "level": "error",
18 |                     "locations": [
19 |                         {
20 |                             "physicalLocation": {
21 |                                 "artifactLocation": {
22 |                                     "uri": "file:///C:/Code/main.c",
23 |                                     "index": 0,
24 |                                 },
25 |                                 "region": {"startLine": 24, "startColumn": 9},
26 |                             }
27 |                         }
28 |                     ],
29 |                 }
30 |             ],
31 |         }
32 |     ],
33 | }
34 | 
35 | 
36 | EXPECTED_OUTPUT_TXT = """-*- compilation -*-
37 | 
38 | Sarif Summary: unit test
39 | Document generated on: <date_val>
40 | Total number of distinct issues of all severities (error, warning, note): 1
41 | 
42 | 
43 | 
44 | Severity : error [1]
45 | file:///C:/Code/main.c:24: CA2101
46 | 
47 | 
48 | 
49 | Severity : warning [0]
50 | 
51 | 
52 | Severity : note [0]
53 | 
54 | """
55 | 
56 | 
57 | def test_emacs():
58 |     mtime = datetime.datetime.now()
59 |     input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
60 | 
61 |     input_sarif_file_set = sarif_file.SarifFileSet()
62 |     input_sarif_file_set.files.append(input_sarif_file)
63 | 
64 |     with tempfile.TemporaryDirectory() as tmp:
65 |         file_path = os.path.join(tmp, "output.txt")
66 |         emacs_op.generate_compile(
67 |             input_sarif_file_set, file_path, output_multiple_files=False, date_val=mtime
68 |         )
69 | 
70 |         with open(file_path, "rb") as f_in:
71 |             output = f_in.read().decode()
72 | 
73 |         assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep).replace(
74 |             "<date_val>", mtime.strftime("%Y-%m-%d %H:%M:%S.%f")
75 |         )
76 | 


--------------------------------------------------------------------------------
/tests/ops/summary/test_summary.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import json
 3 | import os
 4 | import tempfile
 5 | 
 6 | from sarif.operations import summary_op
 7 | from sarif import sarif_file
 8 | 
 9 | INPUT_SARIF = """{
10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 |     "version": "2.1.0",
12 |     "runs": [
13 |         {
14 |             "tool": {"driver": {"name": "unit test"}},
15 |             "results": [
16 |                 {
17 |                     "ruleId": "CA2103",
18 |                     "level": "error"
19 |                 },
20 |                 {
21 |                     "ruleId": "CA2102",
22 |                     "level": "warning"
23 |                 },
24 |                 {
25 |                     "ruleId": "CA2101",
26 |                     "level": "warning"
27 |                 },
28 |                 {
29 |                     "ruleId": "CA2101",
30 |                     "level": "error"
31 |                 },
32 |                 {
33 |                     "ruleId": "CA2101",
34 |                     "level": "note"
35 |                 },
36 |                 {
37 |                     "ruleId": "CA2101",
38 |                     "level": "none"
39 |                 },
40 |                 {
41 |                     "ruleId": "CA2101",
42 |                     "level": "error"
43 |                 }
44 |             ]
45 |         }
46 |     ]
47 | }
48 | """
49 | 
50 | EXPECTED_OUTPUT_TXT = """
51 | error: 3
52 |  - CA2101: 2
53 |  - CA2103: 1
54 | 
55 | warning: 2
56 |  - CA2102: 1
57 |  - CA2101: 1
58 | 
59 | note: 1
60 |  - CA2101: 1
61 | 
62 | none: 1
63 |  - CA2101: 1
64 | """
65 | 
66 | 
67 | def test_summary():
68 |     with tempfile.TemporaryDirectory() as tmp:
69 |         input_sarif_file_path = os.path.join(tmp, "input.sarif")
70 |         with open(input_sarif_file_path, "wb") as f_in:
71 |             f_in.write(INPUT_SARIF.encode())
72 | 
73 |         input_sarif = json.loads(INPUT_SARIF)
74 | 
75 |         input_sarif_file = sarif_file.SarifFile(
76 |             input_sarif_file_path, input_sarif, mtime=datetime.datetime.now()
77 |         )
78 | 
79 |         input_sarif_file_set = sarif_file.SarifFileSet()
80 |         input_sarif_file_set.files.append(input_sarif_file)
81 | 
82 |         file_path = os.path.join(tmp, "output.txt")
83 |         summary_op.generate_summary(
84 |             input_sarif_file_set, file_path, output_multiple_files=False
85 |         )
86 | 
87 |         with open(file_path, "rb") as f_out:
88 |             output = f_out.read().decode()
89 | 
90 |         assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep)
91 | 


--------------------------------------------------------------------------------
/sarif/operations/csv_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif csv` command.
 3 | """
 4 | 
 5 | import csv
 6 | import os
 7 | 
 8 | from sarif import sarif_file
 9 | from sarif.sarif_file import SarifFileSet
10 | from sarif.sarif_file_utils import combine_record_code_and_description
11 | 
12 | 
13 | def generate_csv(input_files: SarifFileSet, output: str, output_multiple_files: bool):
14 |     """
15 |     Generate a CSV file containing the list of issues from the SARIF files.
16 |     sarif_dict is a dict from filename to deserialized SARIF data.
17 |     """
18 |     output_file = output
19 |     if output_multiple_files:
20 |         for input_file in input_files:
21 |             output_file_name = input_file.get_file_name_without_extension() + ".csv"
22 |             print(
23 |                 "Writing CSV summary of",
24 |                 input_file.get_file_name(),
25 |                 "to",
26 |                 output_file_name,
27 |             )
28 |             _write_to_csv(input_file, os.path.join(output, output_file_name))
29 |             filter_stats = input_file.get_filter_stats()
30 |             if filter_stats:
31 |                 print(f"  Results are filtered by {filter_stats}")
32 |         output_file = os.path.join(output, "static_analysis_output.csv")
33 |     source_description = input_files.get_description()
34 |     print(
35 |         "Writing CSV summary for",
36 |         source_description,
37 |         "to",
38 |         os.path.basename(output_file),
39 |     )
40 |     _write_to_csv(input_files, output_file)
41 |     filter_stats = input_files.get_filter_stats()
42 |     if filter_stats:
43 |         print(f"  Results are filtered by {filter_stats}")
44 | 
45 | 
46 | def _write_to_csv(file_or_files, output_file):
47 |     """
48 |     Write out the errors to a CSV file so that a human can do further analysis.
49 |     """
50 |     list_of_errors = file_or_files.get_records()
51 |     severities = file_or_files.get_severities()
52 |     with open(output_file, "w", encoding="utf-8") as file_out:
53 |         writer = csv.DictWriter(
54 |             file_out,
55 |             sarif_file.get_record_headings(file_or_files.has_blame_info()),
56 |             lineterminator="\n",
57 |         )
58 |         writer.writeheader()
59 |         for severity in severities:
60 |             errors_of_severity = [
61 |                 e for e in list_of_errors if e["Severity"] == severity
62 |             ]
63 |             sorted_errors_by_severity = sorted(
64 |                 errors_of_severity, key=combine_record_code_and_description
65 |             )
66 |             writer.writerows(error_dict for error_dict in sorted_errors_by_severity)
67 | 


--------------------------------------------------------------------------------
/tests/ops/word/test_word.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import tempfile
 4 | 
 5 | from docx import Document
 6 | from sarif.operations import word_op
 7 | from sarif import sarif_file
 8 | 
 9 | INPUT_SARIF = {
10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 |     "version": "2.1.0",
12 |     "runs": [
13 |         {
14 |             "tool": {"driver": {"name": "unit test"}},
15 |             "results": [
16 |                 {
17 |                     "ruleId": "CA2101",
18 |                     "level": "error",
19 |                     "locations": [
20 |                         {
21 |                             "physicalLocation": {
22 |                                 "artifactLocation": {
23 |                                     "uri": "file:///C:/Code/main.c",
24 |                                     "index": 0,
25 |                                 },
26 |                                 "region": {"startLine": 24, "startColumn": 9},
27 |                             }
28 |                         }
29 |                     ],
30 |                 }
31 |             ],
32 |         }
33 |     ],
34 | }
35 | 
36 | 
37 | EXPECTED_OUTPUT_TXT = [
38 |     "Sarif Summary: unit test",
39 |     "Document generated on: <date_val>",
40 |     "Total number of various severities (error, warning, note): 1",
41 |     "",
42 |     "",
43 |     "Severity : error [ 1 ]",
44 |     "CA2101: 1",
45 |     "Severity : warning [ 0 ]",
46 |     "None",
47 |     "Severity : note [ 0 ]",
48 |     "None",
49 |     "",
50 |     "Severity : error",
51 |     "Severity : warning",
52 |     "None",
53 |     "Severity : note",
54 |     "None",
55 | ]
56 | 
57 | 
58 | def test_word():
59 |     mtime = datetime.datetime.now()
60 |     input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
61 | 
62 |     input_sarif_file_set = sarif_file.SarifFileSet()
63 |     input_sarif_file_set.files.append(input_sarif_file)
64 | 
65 |     with tempfile.TemporaryDirectory() as tmp:
66 |         output_file_path = os.path.join(tmp, "output.docx")
67 |         word_op.generate_word_docs_from_sarif_inputs(
68 |             input_sarif_file_set,
69 |             None,
70 |             output_file_path,
71 |             output_multiple_files=False,
72 |             date_val=mtime,
73 |         )
74 | 
75 |         word_doc = Document(output_file_path)
76 |         word_doc_text = [paragraph.text for paragraph in word_doc.paragraphs]
77 | 
78 |         assert len(word_doc_text) == len(EXPECTED_OUTPUT_TXT)
79 |         for actual, expected in zip(word_doc_text, EXPECTED_OUTPUT_TXT):
80 |             assert actual == expected.replace(
81 |                 "<date_val>", mtime.strftime("%Y-%m-%d %H:%M:%S.%f")
82 |             )
83 | 


--------------------------------------------------------------------------------
/sarif/operations/summary_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif summary` command.
 3 | """
 4 | 
 5 | import os
 6 | from typing import List
 7 | 
 8 | from sarif.sarif_file import SarifFileSet
 9 | 
10 | 
11 | def generate_summary(
12 |     input_files: SarifFileSet, output: str, output_multiple_files: bool
13 | ):
14 |     """
15 |     Generate a summary of the issues from the SARIF files.
16 |     sarif_dict is a dict from filename to deserialized SARIF data.
17 |     output_file is the name of a text file to write, or if None, the summary is written to the
18 |     console.
19 |     """
20 |     output_file = output
21 |     if output_multiple_files:
22 |         for input_file in input_files:
23 |             output_file_name = (
24 |                 input_file.get_file_name_without_extension() + "_summary.txt"
25 |             )
26 |             output_file = os.path.join(output, output_file_name)
27 |             summary_lines = _generate_summary(input_file)
28 |             print(
29 |                 "Writing summary of",
30 |                 input_file.get_file_name(),
31 |                 "to",
32 |                 output_file_name,
33 |             )
34 |             with open(output_file, "w", encoding="utf-8") as file_out:
35 |                 file_out.writelines(line + "\n" for line in summary_lines)
36 |         output_file_name = "static_analysis_summary.txt"
37 |         output_file = os.path.join(output, output_file_name)
38 | 
39 |     summary_lines = _generate_summary(input_files)
40 |     if output:
41 |         print(
42 |             "Writing summary of",
43 |             input_files.get_description(),
44 |             "to",
45 |             output_file,
46 |         )
47 |         with open(output_file, "w", encoding="utf-8") as file_out:
48 |             file_out.writelines(line + "\n" for line in summary_lines)
49 |     else:
50 |         for lstr in summary_lines:
51 |             print(lstr)
52 | 
53 | 
54 | def _generate_summary(input_files: SarifFileSet) -> List[str]:
55 |     """
56 |     For each severity level (in priority order): create a list of the errors of
57 |     that severity, print out how many there are and then do some further analysis
58 |     of which error codes are present.
59 |     """
60 |     ret = []
61 |     report = input_files.get_report()
62 |     for severity in report.get_severities():
63 |         result_count = report.get_issue_count_for_severity(severity)
64 |         issue_type_histogram = report.get_issue_type_histogram_for_severity(severity)
65 |         ret.append(f"\n{severity}: {result_count}")
66 |         ret += [f" - {key}: {count}" for (key, count) in issue_type_histogram.items()]
67 |     filter_stats = input_files.get_filter_stats()
68 |     if filter_stats:
69 |         ret.append(f"\nResults were filtered by {filter_stats}")
70 |     return ret
71 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/sarif/operations/upgrade_filter_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif upgrade-filter` command.
 3 | """
 4 | 
 5 | import os
 6 | import yaml
 7 | 
 8 | 
 9 | def _load_blame_filter_file(file_path):
10 |     filter_description = os.path.basename(file_path)
11 |     include_patterns = []
12 |     exclude_patterns = []
13 |     try:
14 |         with open(file_path, encoding="utf-8") as file_in:
15 |             for line in file_in.readlines():
16 |                 if line.startswith("\ufeff"):
17 |                     # Strip byte order mark
18 |                     line = line[1:]
19 |                 lstrip = line.strip()
20 |                 if lstrip.startswith("#"):
21 |                     # Ignore comment lines
22 |                     continue
23 |                 pattern_spec = None
24 |                 is_include = True
25 |                 if lstrip.startswith("description:"):
26 |                     filter_description = lstrip[12:].strip()
27 |                 elif lstrip.startswith("+: "):
28 |                     is_include = True
29 |                     pattern_spec = lstrip[3:].strip()
30 |                 elif lstrip.startswith("-: "):
31 |                     is_include = False
32 |                     pattern_spec = lstrip[3:].strip()
33 |                 else:
34 |                     is_include = True
35 |                     pattern_spec = lstrip
36 |                 if pattern_spec:
37 |                     (include_patterns if is_include else exclude_patterns).append(
38 |                         pattern_spec
39 |                     )
40 |     except UnicodeDecodeError as error:
41 |         raise IOError(
42 |             f"Cannot read blame filter file {file_path}: not UTF-8 encoded?"
43 |         ) from error
44 |     return (
45 |         filter_description,
46 |         include_patterns,
47 |         exclude_patterns,
48 |     )
49 | 
50 | 
51 | def upgrade_filter_file(old_filter_file, output_file):
52 |     """Convert blame filter file to general filter file."""
53 |     (
54 |         filter_description,
55 |         include_patterns,
56 |         exclude_patterns,
57 |     ) = _load_blame_filter_file(old_filter_file)
58 |     new_filter_definition = {
59 |         "description": (
60 |             filter_description
61 |             if filter_description
62 |             else f"Migrated from {os.path.basename(old_filter_file)}"
63 |         ),
64 |         "configuration": {"default-include": True, "check-line-number": True},
65 |     }
66 |     if include_patterns:
67 |         new_filter_definition["include"] = [
68 |             {"author-mail": include_pattern} for include_pattern in include_patterns
69 |         ]
70 |     if exclude_patterns:
71 |         new_filter_definition["exclude"] = [
72 |             {"author-mail": exclude_pattern} for exclude_pattern in exclude_patterns
73 |         ]
74 |     with open(output_file, "w", encoding="utf8") as yaml_out:
75 |         yaml.dump(new_filter_definition, yaml_out)
76 |     print("Wrote", output_file)
77 | 


--------------------------------------------------------------------------------
/tests/ops/info/test_info.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import json
 3 | import os
 4 | import tempfile
 5 | 
 6 | from sarif.operations import info_op
 7 | from sarif import sarif_file
 8 | 
 9 | INPUT_SARIF = """{
10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
11 |     "version": "2.1.0",
12 |     "runs": [
13 |         {
14 |             "tool": {"driver": {"name": "unit test"}},
15 |             "results": [
16 |                 {
17 |                     "ruleId": "CA2101",
18 |                     "level": "error",
19 |                     "locations": [
20 |                         {
21 |                             "physicalLocation": {
22 |                                 "artifactLocation": {
23 |                                     "uri": "file:///C:/Code/main.c",
24 |                                     "index": 0
25 |                                 },
26 |                                 "region": {"startLine": 24, "startColumn": 9}
27 |                             }
28 |                         }
29 |                     ]
30 |                 }
31 |             ]
32 |         }
33 |     ]
34 | }
35 | """
36 | 
37 | EXPECTED_OUTPUT_TXT = """<path>
38 |   840 bytes (1 KiB)
39 |   modified: <mtime>, accessed: <atime>, ctime: <ctime>
40 |   1 run
41 |     Tool: unit test
42 |     1 result
43 | 
44 | """
45 | 
46 | 
47 | def test_info():
48 |     with tempfile.TemporaryDirectory() as tmp:
49 |         input_sarif_file_path = os.path.join(tmp, "input.sarif")
50 |         with open(input_sarif_file_path, "wb") as f_in:
51 |             f_in.write(INPUT_SARIF.encode())
52 | 
53 |         stat = os.stat(input_sarif_file_path)
54 |         stat_mtime = datetime.datetime.fromtimestamp(stat.st_mtime).strftime(
55 |             "%Y-%m-%d %H:%M:%S.%f"
56 |         )
57 |         stat_atime = datetime.datetime.fromtimestamp(stat.st_atime).strftime(
58 |             "%Y-%m-%d %H:%M:%S.%f"
59 |         )
60 |         stat_ctime = datetime.datetime.fromtimestamp(stat.st_ctime).strftime(
61 |             "%Y-%m-%d %H:%M:%S.%f"
62 |         )
63 | 
64 |         input_sarif = json.loads(INPUT_SARIF)
65 | 
66 |         input_sarif_file = sarif_file.SarifFile(
67 |             input_sarif_file_path, input_sarif, mtime=datetime.datetime.now()
68 |         )
69 | 
70 |         input_sarif_file_set = sarif_file.SarifFileSet()
71 |         input_sarif_file_set.files.append(input_sarif_file)
72 | 
73 |         file_path = os.path.join(tmp, "output.txt")
74 |         info_op.generate_info(input_sarif_file_set, file_path)
75 | 
76 |         with open(file_path, "rb") as f_out:
77 |             output = f_out.read().decode()
78 | 
79 |         assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep).replace(
80 |             "<path>", input_sarif_file_path
81 |         ).replace(
82 |             "<mtime>",
83 |             stat_mtime,
84 |         ).replace(
85 |             "<atime>",
86 |             stat_atime,
87 |         ).replace(
88 |             "<ctime>",
89 |             stat_ctime,
90 |         )
91 | 


--------------------------------------------------------------------------------
/sarif/operations/codeclimate_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif codeclimate` command.
 3 | """
 4 | 
 5 | import os
 6 | import json
 7 | import hashlib
 8 | 
 9 | from sarif.sarif_file import SarifFileSet
10 | 
11 | _SEVERITIES = {"none": "info", "note": "info", "warning": "minor", "error": "major"}
12 | 
13 | 
14 | def generate(input_files: SarifFileSet, output: str, output_multiple_files: bool):
15 |     """
16 |     Generate a JSON file in Code Climate schema containing the list of issues from the SARIF files.
17 |     See https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md
18 |     Gitlab usage guide - https://docs.gitlab.com/ee/ci/testing/code_quality.html#implement-a-custom-tool
19 |     """
20 |     output_file = output
21 |     if output_multiple_files:
22 |         for input_file in input_files:
23 |             output_file_name = input_file.get_file_name_without_extension() + ".json"
24 |             print(
25 |                 "Writing Code Climate JSON summary of",
26 |                 input_file.get_file_name(),
27 |                 "to",
28 |                 output_file_name,
29 |             )
30 |             _write_to_json(
31 |                 input_file.get_records(), os.path.join(output, output_file_name)
32 |             )
33 |             filter_stats = input_file.get_filter_stats()
34 |             if filter_stats:
35 |                 print(f"  Results are filtered by {filter_stats}")
36 |         output_file = os.path.join(output, "static_analysis_output.json")
37 |     source_description = input_files.get_description()
38 |     print(
39 |         "Writing Code Climate JSON summary for",
40 |         source_description,
41 |         "to",
42 |         os.path.basename(output_file),
43 |     )
44 |     _write_to_json(input_files.get_records(), output_file)
45 |     filter_stats = input_files.get_filter_stats()
46 |     if filter_stats:
47 |         print(f"  Results are filtered by {filter_stats}")
48 | 
49 | 
50 | def _write_to_json(list_of_errors, output_file):
51 |     """
52 |     Write out the errors to a JSON file according to Code Climate specification.
53 |     """
54 |     content = []
55 |     for record in list_of_errors:
56 |         severity = _SEVERITIES.get(record.get("Severity", "warning"), "minor")
57 | 
58 |         # split Code value to extract error ID and description
59 |         rule = record["Code"]
60 |         description = record["Description"]
61 | 
62 |         path = record["Location"]
63 |         line = record["Line"]
64 | 
65 |         fingerprint = hashlib.md5(
66 |             f"{description} {path} ${line}`]".encode()
67 |         ).hexdigest()
68 | 
69 |         # "categories" property is not used in GitLab but marked as "required" in Code Climate spec.
70 |         # There is no easy way to determine a category so the fixed value is set.
71 |         content.append(
72 |             {
73 |                 "type": "issue",
74 |                 "check_name": rule,
75 |                 "description": description,
76 |                 "categories": ["Bug Risk"],
77 |                 "location": {"path": path, "lines": {"begin": line}},
78 |                 "severity": severity,
79 |                 "fingerprint": fingerprint,
80 |             }
81 |         )
82 | 
83 |     with open(output_file, "w", encoding="utf-8") as file_out:
84 |         json.dump(content, file_out, indent=4)
85 | 


--------------------------------------------------------------------------------
/tests/ops/copy/test_copy.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import datetime
 3 | import json
 4 | import jsonschema
 5 | import os
 6 | import tempfile
 7 | 
 8 | from sarif.operations import copy_op
 9 | from sarif import sarif_file
10 | from tests.utils import get_sarif_schema
11 | 
12 | SARIF_WITH_1_ISSUE = {
13 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
14 |     "version": "2.1.0",
15 |     "runs": [
16 |         {
17 |             "tool": {"driver": {"name": "unit test"}},
18 |             "results": [
19 |                 {
20 |                     "ruleId": "CA2101",
21 |                     "message": {"text": "just testing"},
22 |                     "level": "error",
23 |                     "locations": [
24 |                         {
25 |                             "physicalLocation": {
26 |                                 "artifactLocation": {
27 |                                     "uri": "file:///C:/Code/main.c",
28 |                                     "index": 0,
29 |                                 },
30 |                                 "region": {"startLine": 24, "startColumn": 9},
31 |                             }
32 |                         }
33 |                     ],
34 |                 }
35 |             ],
36 |         }
37 |     ],
38 | }
39 | 
40 | 
41 | def test_generate_sarif():
42 |     sarif_schema = get_sarif_schema()
43 |     input_sarif_file = sarif_file.SarifFile(
44 |         "SARIF_WITH_1_ISSUE", SARIF_WITH_1_ISSUE, mtime=datetime.datetime.now()
45 |     )
46 |     jsonschema.validate(input_sarif_file.data, schema=sarif_schema)
47 | 
48 |     input_sarif_file_set = sarif_file.SarifFileSet()
49 |     input_sarif_file_set.files.append(input_sarif_file)
50 |     with tempfile.TemporaryDirectory() as tmp:
51 |         output_file_path = os.path.join(tmp, "copied.json")
52 |         output_sarif_file = copy_op.generate_sarif(
53 |             input_sarif_file_set,
54 |             output_file_path,
55 |             append_timestamp=False,
56 |             sarif_tools_version="1.2.3",
57 |             cmdline="unit-test",
58 |         )
59 | 
60 |         with open(output_file_path, "rb") as f_out:
61 |             output_sarif = json.load(f_out)
62 |         assert output_sarif_file.data == output_sarif
63 |         jsonschema.validate(output_sarif, schema=sarif_schema)
64 | 
65 |         expected_sarif = deepcopy(input_sarif_file.data)
66 |         conversion = {
67 |             "tool": {
68 |                 "driver": {
69 |                     "name": "sarif-tools",
70 |                     "fullName": "sarif-tools https://github.com/microsoft/sarif-tools/",
71 |                     "version": "1.2.3",
72 |                     "properties": {
73 |                         "file": input_sarif_file.abs_file_path,
74 |                         "modified": input_sarif_file.mtime.isoformat(),
75 |                         "processed": output_sarif["runs"][0]["conversion"]["tool"][
76 |                             "driver"
77 |                         ]["properties"]["processed"],
78 |                     },
79 |                 }
80 |             },
81 |             "invocation": {
82 |                 "commandLine": "unit-test",
83 |                 "executionSuccessful": True,
84 |             },
85 |         }
86 |         expected_sarif["runs"][0]["conversion"] = conversion
87 |         assert output_sarif == expected_sarif
88 | 


--------------------------------------------------------------------------------
/sarif/operations/trend_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif trend` command.
 3 | """
 4 | 
 5 | import csv
 6 | from typing import Dict, List, Literal
 7 | 
 8 | from sarif import sarif_file
 9 | from sarif.sarif_file import SarifFileSet
10 | 
11 | TIMESTAMP_COLUMNS = ["Date", "Tool", *sarif_file.SARIF_SEVERITIES_WITH_NONE]
12 | 
13 | 
14 | def generate_trend_csv(
15 |     input_files: SarifFileSet,
16 |     output_file: str,
17 |     dateformat: Literal["dmy", "mdy", "ymd"],
18 | ) -> None:
19 |     """
20 |     Generate a timeline csv of the issues from the SARIF files.  Each SARIF file must contain a
21 |     timestamp of the form 20211012T110000Z in its filename.
22 |     sarif_dict is a dict from filename to deserialized SARIF data.
23 |     output_file is the name of a CSV file to write, or if None, the name
24 |     `static_analysis_trend.csv` will be used.
25 |     """
26 |     if not output_file:
27 |         output_file = "static_analysis_trend.csv"
28 | 
29 |     error_storage = []
30 |     for input_file in input_files:
31 |         input_file_name = input_file.get_file_name()
32 |         print("Processing", input_file_name)
33 |         error_list = input_file.get_records()
34 |         tool_name = "/".join(input_file.get_distinct_tool_names())
35 |         # Date parsing
36 |         parsed_date = input_file.get_filename_timestamp()
37 |         if not parsed_date:
38 |             raise ValueError(f"Unable to parse date from filename: {input_file_name}")
39 | 
40 |         # Turn the date into something that looks nice in excel (d/m/y UK date format)
41 |         dstr = parsed_date[0]
42 |         (year, month, day, hour, minute) = (
43 |             dstr[0:4],
44 |             dstr[4:6],
45 |             dstr[6:8],
46 |             dstr[9:11],
47 |             dstr[11:13],
48 |         )
49 |         if dateformat == "ymd":
50 |             excel_date = f"{year}-{month}-{day} {hour}:{minute}"
51 |         elif dateformat == "mdy":
52 |             excel_date = f"{month}/{day}/{year} {hour}:{minute}"
53 |         else:
54 |             excel_date = f"{day}/{month}/{year} {hour}:{minute}"
55 | 
56 |         # Store data
57 |         error_storage.append(
58 |             _store_errors(parsed_date, excel_date, tool_name, error_list)
59 |         )
60 | 
61 |     error_storage.sort(key=lambda record: record["_timestamp"])
62 | 
63 |     print("Writing trend CSV to", output_file)
64 |     _write_csv(output_file, error_storage)
65 |     filter_stats = input_files.get_filter_stats()
66 |     if filter_stats:
67 |         print(f"  Results are filtered by {filter_stats}")
68 | 
69 | 
70 | def _write_csv(output_file: str, error_storage: List[Dict]) -> None:
71 |     with open(output_file, "w", encoding="utf-8") as file_out:
72 |         writer = csv.DictWriter(
73 |             file_out, TIMESTAMP_COLUMNS, extrasaction="ignore", lineterminator="\n"
74 |         )
75 |         writer.writeheader()
76 |         for key in error_storage:
77 |             writer.writerow(key)
78 | 
79 | 
80 | def _store_errors(timestamp, excel_date, tool: str, list_of_errors: List[Dict]) -> Dict:
81 |     results = {
82 |         "_timestamp": timestamp,  # not written to CSV, but used for sorting
83 |         "Date": excel_date,
84 |         "Tool": tool,
85 |     }
86 |     for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
87 |         error_count = sum(1 for e in list_of_errors if severity in e["Severity"])
88 |         results[severity] = error_count
89 | 
90 |     return results
91 | 


--------------------------------------------------------------------------------
/sarif/operations/emacs_op.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code for `sarif emacs` command.
  3 | """
  4 | 
  5 | from datetime import datetime
  6 | import os
  7 | 
  8 | from jinja2 import Environment, FileSystemLoader, select_autoescape
  9 | 
 10 | from sarif import sarif_file
 11 | 
 12 | _THIS_MODULE_PATH = os.path.dirname(__file__)
 13 | 
 14 | _TEMPLATES_PATH = os.path.join(_THIS_MODULE_PATH, "templates")
 15 | 
 16 | _ENV = Environment(
 17 |     loader=FileSystemLoader(searchpath=_TEMPLATES_PATH),
 18 |     autoescape=select_autoescape(),
 19 | )
 20 | 
 21 | 
 22 | def generate_compile(
 23 |     input_files: sarif_file.SarifFileSet,
 24 |     output: str,
 25 |     output_multiple_files: bool,
 26 |     date_val: datetime = datetime.now(),
 27 | ):
 28 |     """
 29 |     Generate txt file from the input files.
 30 |     """
 31 |     output_file = output
 32 |     if output_multiple_files:
 33 |         for input_file in input_files:
 34 |             output_file_name = input_file.get_file_name_without_extension() + ".txt"
 35 |             print(
 36 |                 "Writing results for",
 37 |                 input_file.get_file_name(),
 38 |                 "to",
 39 |                 output_file_name,
 40 |             )
 41 |             _generate_single_txt(
 42 |                 input_file, os.path.join(output, output_file_name), date_val
 43 |             )
 44 |         output_file = os.path.join(output, ".compile.txt")
 45 |     source_description = input_files.get_description()
 46 |     print(
 47 |         "Writing results for",
 48 |         source_description,
 49 |         "to",
 50 |         os.path.basename(output_file),
 51 |     )
 52 |     _generate_single_txt(input_files, output_file, date_val)
 53 | 
 54 | 
 55 | def _generate_single_txt(input_file, output_file, date_val):
 56 |     all_tools = input_file.get_distinct_tool_names()
 57 |     report = input_file.get_report()
 58 | 
 59 |     total_distinct_issue_codes = 0
 60 |     problems = []
 61 |     severities = report.get_severities()
 62 | 
 63 |     for severity in severities:
 64 |         distinct_issue_codes = report.get_issue_type_count_for_severity(severity)
 65 | 
 66 |         total_distinct_issue_codes += distinct_issue_codes
 67 | 
 68 |         severity_details = _enrich_details(
 69 |             report.get_issues_grouped_by_type_for_severity(severity)
 70 |         )
 71 | 
 72 |         severity_section = {
 73 |             "type": severity,
 74 |             "count": distinct_issue_codes,
 75 |             "details": severity_details,
 76 |         }
 77 | 
 78 |         problems.append(severity_section)
 79 | 
 80 |     filtered = None
 81 |     filter_stats = input_file.get_filter_stats()
 82 |     if filter_stats:
 83 |         filtered = f"Results were filtered by {filter_stats}."
 84 | 
 85 |     template = _ENV.get_template("sarif_emacs.txt")
 86 |     txt_content = template.render(
 87 |         report_type=", ".join(all_tools),
 88 |         report_date=date_val,
 89 |         severities=", ".join(severities),
 90 |         total=total_distinct_issue_codes,
 91 |         problems=problems,
 92 |         filtered=filtered,
 93 |     )
 94 | 
 95 |     with open(output_file, "wt", encoding="utf-8") as file_out:
 96 |         file_out.write(txt_content)
 97 | 
 98 | 
 99 | def _enrich_details(records_of_severity):
100 |     return [
101 |         {"code": key, "count": len(records), "details": records}
102 |         for (key, records) in records_of_severity.items()
103 |     ]
104 | 


--------------------------------------------------------------------------------
/sarif/operations/copy_op.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for `sarif copy` command.
 3 | """
 4 | 
 5 | import copy
 6 | import datetime
 7 | import json
 8 | import os
 9 | 
10 | from sarif import loader, sarif_file
11 | from sarif.sarif_file import SarifFileSet, SarifFile
12 | 
13 | 
14 | def generate_sarif(
15 |     input_files: SarifFileSet,
16 |     output: str,
17 |     append_timestamp: bool,
18 |     sarif_tools_version: str,
19 |     cmdline: str,
20 | ) -> SarifFile:
21 |     """
22 |     Generate a new SARIF file based on the input files
23 |     """
24 |     sarif_data_out = {
25 |         "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
26 |         "version": "2.1.0",
27 |         "runs": [],
28 |     }
29 |     now = datetime.datetime.now(datetime.timezone.utc)
30 |     output_file_abs_path = os.path.abspath(output)
31 |     conversion_timestamp_iso8601 = now.isoformat()
32 |     conversion_timestamp_trendformat = now.strftime(sarif_file.DATETIME_FORMAT)
33 |     run_count = 0
34 |     input_file_count = 0
35 |     for input_file in input_files:
36 |         if input_file.get_abs_file_path() == output_file_abs_path:
37 |             print(f"Auto-excluding output file {output} from input file list")
38 |             continue
39 |         input_file_count += 1
40 |         input_file_path = input_file.get_abs_file_path()
41 |         input_file_modified_iso8601 = input_file.mtime.isoformat()
42 |         for input_run in input_file.runs:
43 |             run_count += 1
44 |             # Create a shallow copy
45 |             input_run_json_copy = copy.copy(input_run.run_data)
46 |             conversion_properties = {
47 |                 "file": input_file_path,
48 |                 "modified": input_file_modified_iso8601,
49 |                 "processed": conversion_timestamp_iso8601,
50 |             }
51 |             input_run_json_copy["conversion"] = {
52 |                 "tool": {
53 |                     "driver": {
54 |                         "name": "sarif-tools",
55 |                         "fullName": "sarif-tools https://github.com/microsoft/sarif-tools/",
56 |                         "version": sarif_tools_version,
57 |                         "properties": conversion_properties,
58 |                     }
59 |                 },
60 |                 "invocation": {"commandLine": cmdline, "executionSuccessful": True},
61 |             }
62 |             results = input_run.get_results()
63 |             filter_stats = input_run.get_filter_stats()
64 |             if filter_stats:
65 |                 input_run_json_copy["results"] = results
66 |                 conversion_properties["filtered"] = filter_stats.to_json_camel_case()
67 |             sarif_data_out["runs"].append(input_run_json_copy)
68 |     output_file_path = output
69 |     if append_timestamp:
70 |         output_split = os.path.splitext(output)
71 |         output_file_path = (
72 |             output_split[0]
73 |             + f"_{conversion_timestamp_trendformat}"
74 |             + (output_split[1] or ".sarif")
75 |         )
76 |     with open(output_file_path, "w", encoding="utf-8") as file_out:
77 |         json.dump(sarif_data_out, file_out, indent=4)
78 |     runs_string = "1 run" if run_count == 1 else f"{run_count} runs"
79 |     files_string = (
80 |         "1 SARIF file" if input_file_count == 1 else f"{input_file_count} SARIF files"
81 |     )
82 |     print(f"Wrote {output_file_path} with {runs_string} from {files_string}")
83 |     total_filter_stats = input_files.get_filter_stats()
84 |     if total_filter_stats:
85 |         print(total_filter_stats.to_string())
86 |     return loader.load_sarif_file(output_file_path)
87 | 


--------------------------------------------------------------------------------
/tests/ops/trend/test_trend.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import os
  4 | import tempfile
  5 | 
  6 | from sarif.operations import trend_op
  7 | from sarif import sarif_file
  8 | 
  9 | INPUT_SARIF_1 = """{
 10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 11 |     "version": "2.1.0",
 12 |     "runs": [
 13 |         {
 14 |             "tool": {"driver": {"name": "name 1"}},
 15 |             "results": [
 16 |                 {
 17 |                     "ruleId": "CA2103",
 18 |                     "level": "error"
 19 |                 },
 20 |                 {
 21 |                     "ruleId": "CA2102",
 22 |                     "level": "warning"
 23 |                 },
 24 |                 {
 25 |                     "ruleId": "CA2101",
 26 |                     "level": "warning"
 27 |                 },
 28 |                 {
 29 |                     "ruleId": "CA2101",
 30 |                     "level": "error"
 31 |                 },
 32 |                 {
 33 |                     "ruleId": "CA2101",
 34 |                     "level": "note"
 35 |                 },
 36 |                 {
 37 |                     "ruleId": "CA2101",
 38 |                     "level": "none"
 39 |                 },
 40 |                 {
 41 |                     "ruleId": "CA2101",
 42 |                     "level": "error"
 43 |                 }
 44 |             ]
 45 |         }
 46 |     ]
 47 | }
 48 | """
 49 | 
 50 | INPUT_SARIF_2 = """{
 51 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 52 |     "version": "2.1.0",
 53 |     "runs": [
 54 |         {
 55 |             "tool": {"driver": {"name": "name 2"}},
 56 |             "results": [
 57 |                 {
 58 |                     "ruleId": "CA2101",
 59 |                     "level": "error"
 60 |                 },
 61 |                 {
 62 |                     "ruleId": "CA2101",
 63 |                     "level": "note"
 64 |                 },
 65 |                 {
 66 |                     "ruleId": "CA2101",
 67 |                     "level": "none"
 68 |                 },
 69 |                 {
 70 |                     "ruleId": "CA2101",
 71 |                     "level": "error"
 72 |                 }
 73 |             ]
 74 |         }
 75 |     ]
 76 | }
 77 | """
 78 | 
 79 | INPUTS = {
 80 |     "trend_test_20250106T060000Z.sarif": INPUT_SARIF_1,
 81 |     "trend_test_20250107T060000Z.sarif": INPUT_SARIF_2,
 82 | }
 83 | 
 84 | EXPECTED_OUTPUT_TXT = """Date,Tool,error,warning,note,none
 85 | 06/01/2025 06:00,name 1,3,2,1,1
 86 | 07/01/2025 06:00,name 2,2,0,1,1
 87 | """
 88 | 
 89 | 
 90 | def test_trend():
 91 |     with tempfile.TemporaryDirectory() as tmp:
 92 |         input_sarif_file_set = sarif_file.SarifFileSet()
 93 | 
 94 |         for input_file_name, input_json in INPUTS.items():
 95 |             input_sarif_file_path = os.path.join(tmp, input_file_name)
 96 |             with open(input_sarif_file_path, "wb") as f_in:
 97 |                 f_in.write(input_json.encode())
 98 | 
 99 |             input_sarif = json.loads(input_json)
100 | 
101 |             input_sarif_file = sarif_file.SarifFile(
102 |                 input_sarif_file_path, input_sarif, mtime=datetime.datetime.now()
103 |             )
104 | 
105 |             input_sarif_file_set.files.append(input_sarif_file)
106 | 
107 |         file_path = os.path.join(tmp, "output.txt")
108 |         trend_op.generate_trend_csv(input_sarif_file_set, file_path, dateformat="dmy")
109 | 
110 |         with open(file_path, "rb") as f_out:
111 |             output = f_out.read().decode()
112 | 
113 |         assert output == EXPECTED_OUTPUT_TXT.replace("\n", os.linesep)
114 | 


--------------------------------------------------------------------------------
/tests/ops/diff/test_diff.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import os
  4 | import tempfile
  5 | 
  6 | from sarif.operations import diff_op
  7 | from sarif import sarif_file
  8 | 
  9 | SARIF_WITH_1_ISSUE = {
 10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 11 |     "version": "2.1.0",
 12 |     "runs": [
 13 |         {
 14 |             "tool": {"driver": {"name": "unit test"}},
 15 |             "results": [
 16 |                 {
 17 |                     "ruleId": "CA2101",
 18 |                     "level": "error",
 19 |                     "locations": [
 20 |                         {
 21 |                             "physicalLocation": {
 22 |                                 "artifactLocation": {
 23 |                                     "uri": "file:///C:/Code/main.c",
 24 |                                     "index": 0,
 25 |                                 },
 26 |                                 "region": {"startLine": 24, "startColumn": 9},
 27 |                             }
 28 |                         }
 29 |                     ],
 30 |                 }
 31 |             ],
 32 |         }
 33 |     ],
 34 | }
 35 | 
 36 | SARIF_WITH_2_ISSUES = {
 37 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 38 |     "version": "2.1.0",
 39 |     "runs": [
 40 |         {
 41 |             "tool": {"driver": {"name": "unit test"}},
 42 |             "results": [
 43 |                 {
 44 |                     "ruleId": "CA2101",
 45 |                     "level": "error",
 46 |                     "locations": [
 47 |                         {
 48 |                             "physicalLocation": {
 49 |                                 "artifactLocation": {
 50 |                                     "uri": "file:///C:/Code/main.c",
 51 |                                     "index": 0,
 52 |                                 },
 53 |                                 "region": {"startLine": 24, "startColumn": 9},
 54 |                             }
 55 |                         }
 56 |                     ],
 57 |                 },
 58 |                 {
 59 |                     "ruleId": "CA2102",
 60 |                     "level": "error",
 61 |                     "locations": [
 62 |                         {
 63 |                             "physicalLocation": {
 64 |                                 "artifactLocation": {
 65 |                                     "uri": "file:///C:/Code/main.c",
 66 |                                     "index": 0,
 67 |                                 },
 68 |                                 "region": {"startLine": 34, "startColumn": 9},
 69 |                             }
 70 |                         }
 71 |                     ],
 72 |                 },
 73 |             ],
 74 |             "columnKind": "utf16CodeUnits",
 75 |         }
 76 |     ],
 77 | }
 78 | 
 79 | 
 80 | def test_print_diff():
 81 |     mtime = datetime.datetime.now()
 82 |     old_sarif = sarif_file.SarifFile(
 83 |         "SARIF_WITH_1_ISSUE", SARIF_WITH_1_ISSUE, mtime=mtime
 84 |     )
 85 |     new_sarif = sarif_file.SarifFile(
 86 |         "SARIF_WITH_2_ISSUES", SARIF_WITH_2_ISSUES, mtime=mtime
 87 |     )
 88 |     with tempfile.TemporaryDirectory() as tmp:
 89 |         file_path = os.path.join(tmp, "diff.json")
 90 |         result = diff_op.print_diff(
 91 |             old_sarif, new_sarif, file_path, check_level="warning"
 92 |         )
 93 |         with open(file_path, "rb") as f_in:
 94 |             diff_dict = json.load(f_in)
 95 |         assert result == 1
 96 |         assert diff_dict == {
 97 |             "all": {"+": 1, "-": 0},
 98 |             "error": {
 99 |                 "+": 1,
100 |                 "-": 0,
101 |                 "codes": {
102 |                     "CA2102": {
103 |                         "<": 0,
104 |                         ">": 1,
105 |                         "+@": [{"Location": "file:///C:/Code/main.c", "Line": 34}],
106 |                     }
107 |                 },
108 |             },
109 |             "warning": {"+": 0, "-": 0, "codes": {}},
110 |             "note": {"+": 0, "-": 0, "codes": {}},
111 |         }
112 |         # If issues have decreased, return value should be 0.
113 |         assert (
114 |             diff_op.print_diff(new_sarif, old_sarif, file_path, check_level="warning")
115 |             == 0
116 |         )
117 | 


--------------------------------------------------------------------------------
/sarif/filter/filter_stats.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Statistics that record the outcome of a filter.
  3 | """
  4 | 
  5 | import datetime
  6 | 
  7 | 
  8 | class FilterStats:
  9 |     """
 10 |     Statistics that record the outcome of a filter.
 11 |     """
 12 | 
 13 |     def __init__(self, filter_description):
 14 |         self.filter_description = filter_description
 15 |         # Filter stats can also be loaded from a file created by `sarif copy`.
 16 |         self.rehydrated = False
 17 |         self.filter_datetime = None
 18 |         self.filtered_in_result_count = 0
 19 |         self.filtered_out_result_count = 0
 20 |         self.missing_property_count = 0
 21 |         self.unconvincing_line_number_count = 0
 22 | 
 23 |     def reset_counters(self):
 24 |         """
 25 |         Zero all the counters.
 26 |         """
 27 |         self.filter_datetime = datetime.datetime.now()
 28 |         self.filtered_in_result_count = 0
 29 |         self.filtered_out_result_count = 0
 30 |         self.missing_property_count = 0
 31 |         self.unconvincing_line_number_count = 0
 32 | 
 33 |     def add(self, other_filter_stats):
 34 |         """
 35 |         Add another set of filter stats to my totals.
 36 |         """
 37 |         if other_filter_stats:
 38 |             if other_filter_stats.filter_description and (
 39 |                 other_filter_stats.filter_description != self.filter_description
 40 |             ):
 41 |                 self.filter_description += f", {other_filter_stats.filter_description}"
 42 |             self.filtered_in_result_count += other_filter_stats.filtered_in_result_count
 43 |             self.filtered_out_result_count += (
 44 |                 other_filter_stats.filtered_out_result_count
 45 |             )
 46 |             self.missing_property_count += other_filter_stats.missing_property_count
 47 |             self.unconvincing_line_number_count += (
 48 |                 other_filter_stats.unconvincing_line_number_count
 49 |             )
 50 | 
 51 |     def __str__(self):
 52 |         """
 53 |         Automatic to_string()
 54 |         """
 55 |         return self.to_string()
 56 | 
 57 |     def to_string(self):
 58 |         """
 59 |         Generate a summary string for these filter stats.
 60 |         """
 61 |         ret = f"'{self.filter_description}'"
 62 |         if self.filter_datetime:
 63 |             ret += " at "
 64 |             ret += self.filter_datetime.strftime("%c")
 65 |         ret += (
 66 |             f": {self.filtered_out_result_count} filtered out, "
 67 |             f"{self.filtered_in_result_count} passed the filter"
 68 |         )
 69 |         if self.unconvincing_line_number_count:
 70 |             ret += (
 71 |                 f", {self.unconvincing_line_number_count} included by default "
 72 |                 "for lacking line number information"
 73 |             )
 74 |         if self.missing_property_count:
 75 |             ret += (
 76 |                 f", {self.missing_property_count} included by default "
 77 |                 "for lacking data to filter"
 78 |             )
 79 | 
 80 |         return ret
 81 | 
 82 |     def to_json_camel_case(self):
 83 |         """
 84 |         Generate filter stats as JSON using camelCase naming,
 85 |         to fit with SARIF standard section 3.8.1 (Property Bags).
 86 |         """
 87 |         return {
 88 |             "filter": self.filter_description,
 89 |             "in": self.filtered_in_result_count,
 90 |             "out": self.filtered_out_result_count,
 91 |             "default": {
 92 |                 "noProperty": self.missing_property_count,
 93 |                 "noLineNumber": self.unconvincing_line_number_count,
 94 |             },
 95 |         }
 96 | 
 97 | 
 98 | def load_filter_stats_from_json(json_data):
 99 |     """
100 |     Load filter stats from a SARIF file property bag using camelCase naming
101 |     as per SARIF standard section 3.8.1 (Property Bags).
102 |     """
103 |     ret = None
104 |     if json_data:
105 |         ret = FilterStats(json_data["filter"])
106 |         ret.rehydrated = True
107 |         ret.filtered_in_result_count = json_data.get("in", 0)
108 |         ret.filtered_out_result_count = json_data.get("out", 0)
109 |         default_stats = json_data.get("default", {})
110 |         ret.unconvincing_line_number_count = default_stats.get("noLineNumber", 0)
111 |         ret.missing_property_count = default_stats.get("noProperty", 0)
112 |     return ret
113 | 


--------------------------------------------------------------------------------
/sarif/operations/templates/sarif_summary.html:
--------------------------------------------------------------------------------
  1 | <head>
  2 |     <style>
  3 |         #pageContainer {
  4 |             margin: auto;
  5 |             max-width: 1200px;
  6 |         }
  7 | 
  8 |         #heroContentGrid {
  9 |             align-content: center;
 10 |             display: grid;
 11 |             flex-grow: 1;
 12 |             grid-gap: 15px;
 13 |             grid-template-columns: auto auto;
 14 |             grid-template-rows: auto auto;
 15 |         }
 16 | 
 17 |         #heroContentGrid > div {
 18 |             align-self: center;
 19 |         }
 20 | 
 21 |         #hero: {
 22 |             align-items: center;
 23 |             display: flex;
 24 |         }
 25 | 
 26 |         #heroLogo {
 27 |             grid-column: 1;
 28 |             grid-row: 1;
 29 |             text-align: right;
 30 |         }
 31 | 
 32 |         #heroTitle {
 33 |             font-family: 'Roboto', sans-serif;
 34 |             font-size: 50px;
 35 |             grid-column: 2;
 36 |             grid-row: 1;
 37 |             line-height: 59px;
 38 |             text-align: left;
 39 |         }
 40 | 
 41 |         .collapsible {
 42 |           background-color: white;
 43 |           cursor: pointer;
 44 |           width: 100%;
 45 |           border: none;
 46 |           text-align: left;
 47 |           outline: none;
 48 |           font-size: 15px;
 49 |         }
 50 | 
 51 |         .active, .collapsible:hover {
 52 |           color: white;
 53 |           background-color: #555;
 54 |         }
 55 | 
 56 |         .collapsible:after {
 57 |           content: '\002B';
 58 |           color: white;
 59 |           font-weight: bold;
 60 |           float: right;
 61 |           margin-left: 5px;
 62 |         }
 63 | 
 64 |         .active:after {
 65 |           content: "\2212";
 66 |         }
 67 | 
 68 |         .content {
 69 |           padding: 0 18px;
 70 |           max-height: 0;
 71 |           overflow: hidden;
 72 |           transition: max-height 0.2s ease-out;
 73 |           background-color: #f1f1f1;
 74 |         }
 75 |     </style>
 76 | </head>
 77 | 
 78 | {% if image_data_base64 -%}
 79 | <div id="pageContainer">
 80 |     <section id="hero" role="region" aria-label="Header image">
 81 |         <div id="heroContentGrid">
 82 |             <div id="heroLogo">
 83 |                 <img src="data:{{ image_mime_type }};base64,{{ image_data_base64 }}" role="img" aria-label="Header image" />
 84 |             </div>
 85 |         </div>
 86 |     </section>
 87 | </div>
 88 | {%- endif %}
 89 | 
 90 | <h3>Sarif Summary: <b>{{ report_type }}</b></h3>
 91 | <h4>Document generated on: <b>{{ report_date }}</b></h4>
 92 | <h4>Total number of distinct issues of all severities ({{ severities }}): <b>{{ total }}</b></h4>
 93 | {% if filtered -%}
 94 | <p>{{ filtered }}</p>
 95 | {%- endif %}
 96 | 
 97 | {% if chart_image_data_base64 -%}
 98 | <img src="data:image/png;base64,{{ chart_image_data_base64 }}" role="img" aria-label="Pie chart" />
 99 | {%- endif %}
100 | 
101 | {% for problem in problems %}
102 | <h3>Severity : {{ problem.type }} [ {{ problem.count }} ]</h3>
103 | <ul>
104 |     {%- for error in problem.details %}
105 |     <li>
106 |         <button class="collapsible">{{- error.code }}: <b>{{ error.count -}}</b></button>
107 |         <div class="content">
108 |             <ul>
109 |             {%- for link in error.links %}
110 |                 <li><a href="{{ link.1 }}" target="_blank">{{ link.0 }}</a></li>
111 |             {%- endfor %}
112 |             {%- for line in error.details %}
113 |                 {%- if line.Location %}
114 |                 <li>{{ line.Location }}:{{ line.Line }}</li>
115 |                 {%- else %}
116 |                 <li>{{ line.Description }}</li>
117 |                 {%- endif %}
118 |             {%- endfor %}
119 |             </ul>
120 |         </div>
121 |     </li>
122 |     {%- endfor %}
123 | </ul>
124 | {%- endfor %}
125 | 
126 | <script>
127 |     var coll = document.getElementsByClassName("collapsible");
128 |     var i;
129 | 
130 |     for (i = 0; i < coll.length; i++) {
131 |       coll[i].addEventListener("click", function() {
132 |         this.classList.toggle("active");
133 |         var content = this.nextElementSibling;
134 |         if (content.style.maxHeight){
135 |           content.style.maxHeight = null;
136 |         } else {
137 |           content.style.maxHeight = content.scrollHeight + "px";
138 |         }
139 |       });
140 |     }
141 | </script>


--------------------------------------------------------------------------------
/azure-pipelines/release.yml:
--------------------------------------------------------------------------------
  1 | name: Release
  2 | trigger: none
  3 | pr: none
  4 | 
  5 | variables:
  6 |   - template: templates/globals.yml
  7 |   - name: TeamName
  8 |     value: sarif-tools
  9 | 
 10 | resources:
 11 |   repositories:
 12 |     - repository: MicroBuildTemplate
 13 |       type: git
 14 |       name: 1ESPipelineTemplates/MicroBuildTemplate
 15 |       ref: refs/tags/release
 16 | 
 17 | extends:
 18 |   template: azure-pipelines/MicroBuild.1ES.Official.yml@MicroBuildTemplate
 19 |   parameters:
 20 |     sdl:
 21 |       sourceAnalysisPool: VSEngSS-MicroBuild2022-1ES
 22 |     pool:
 23 |       name: AzurePipelines-EO
 24 |       demands:
 25 |         - ImageOverride -equals 1ESPT-Ubuntu22.04
 26 |       os: Linux
 27 |     customBuildTags:
 28 |       - ES365AIMigrationTooling
 29 |     stages:
 30 |       - template: templates/build_stage.yml@self
 31 | 
 32 |       - stage: CreateTag
 33 |         displayName: Create Tag
 34 |         dependsOn: Build
 35 |         variables:
 36 |           releaseVersionWithPrefix: $[ stageDependencies.Build.Build.outputs['getReleaseVersionStep.releaseVersionWithPrefix'] ]
 37 |         jobs:
 38 |         - job: CreateTag
 39 |           steps:
 40 |             - checkout: self
 41 |               fetchDepth: 1
 42 |               fetchTags: false
 43 |               persistCredentials: true
 44 | 
 45 |             - script: |
 46 |                 git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
 47 |                 git config user.name "Azure Piplines"
 48 |                 git fetch --depth 1 origin $(Build.SourceBranchName)
 49 |                 git tag -a $(releaseVersionWithPrefix) -m "Release $(releaseVersionWithPrefix)" origin/$(Build.SourceBranchName)
 50 |                 git push origin $(releaseVersionWithPrefix)
 51 |               displayName: Create git tag
 52 | 
 53 |       - stage: CreateRelease
 54 |         displayName: Create GitHub Release
 55 |         dependsOn:
 56 |           - Build
 57 |           - CreateTag
 58 |         variables:
 59 |           releaseVersionWithPrefix: $[ stageDependencies.Build.Build.outputs['getReleaseVersionStep.releaseVersionWithPrefix'] ]
 60 |         jobs:
 61 |         - job: CreateRelease
 62 |           templateContext:
 63 |             type: releaseJob
 64 |             isProduction: true
 65 |             inputs:
 66 |               - input: pipelineArtifact
 67 |                 artifactName: $(ARTIFACT_NAME_WHEEL)
 68 |                 targetPath: $(Build.StagingDirectory)/dist
 69 |           steps:
 70 |             - task: GitHubRelease@1 #https://learn.microsoft.com/en-us/azure/devops/pipelines/tasks/reference/github-release-v1?view=azure-pipelines
 71 |               displayName: Create GitHub Release
 72 |               inputs:
 73 |                 gitHubConnection: GitHub-sarif-tools
 74 |                 repositoryName: microsoft/sarif-tools
 75 |                 action: create
 76 |                 target: $(Build.SourceBranchName)
 77 |                 title: $(releaseVersionWithPrefix)
 78 |                 tag: $(releaseVersionWithPrefix)
 79 |                 tagSource: userSpecifiedTag
 80 |                 isDraft: true
 81 |                 addChangeLog: false
 82 |                 assets: $(Build.StagingDirectory)/dist/*
 83 | 
 84 |       - stage: WaitForValidation
 85 |         dependsOn: CreateRelease
 86 |         jobs:
 87 |           - job: wait_for_validation
 88 |             displayName: Wait for manual validation
 89 |             pool: server
 90 |             steps:
 91 |               - task: ManualValidation@0
 92 |                 timeoutInMinutes: 1440 # task times out in 1 day
 93 |                 inputs:
 94 |                   notifyUsers: plseng@microsoft.com
 95 |                   instructions: Please test the latest draft release and then publish it.
 96 |                   onTimeout: reject
 97 | 
 98 |       - stage: Release
 99 |         dependsOn: WaitForValidation
100 |         jobs:
101 |         - job: PublishToPyPi
102 |           displayName: Release to PyPi
103 | 
104 |           pool:
105 |             name: VSEngSS-MicroBuild2022-1ES # This pool is required to have the certs needed to publish to PyPi using ESRP.
106 |             os: windows
107 |             image: server2022-microbuildVS2022-1es
108 | 
109 |           templateContext:
110 |             type: releaseJob
111 |             isProduction: true
112 |             inputs:
113 |               - input: pipelineArtifact
114 |                 artifactName: $(ARTIFACT_NAME_WHEEL)
115 |                 targetPath: $(Build.StagingDirectory)/dist
116 | 
117 |           steps:
118 |             - template: MicroBuild.Publish.yml@MicroBuildTemplate
119 |               parameters:
120 |                 intent: PackageDistribution
121 |                 contentType: PyPi
122 |                 contentSource: Folder
123 |                 folderLocation: $(Build.StagingDirectory)/dist
124 |                 waitForReleaseCompletion: true
125 |                 owners: rchiodo@microsoft.com
126 |                 approvers: grwheele@microsoft.com
127 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | This project welcomes contributions and suggestions. Most contributions require you to
  4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to,
  5 | and actually do, grant us the rights to use your contribution. For details, visit
  6 | https://cla.microsoft.com.
  7 | 
  8 | When you submit a pull request, a CLA-bot will automatically determine whether you need
  9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the
 10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
 11 | 
 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 14 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
 15 | 
 16 | ## Pull Requests
 17 | 
 18 | Pull requests are welcome.
 19 | 
 20 | 1. Fork the repository.
 21 | 2. Make and test your changes (see Developer Guide below).
 22 | 3. Run `poetry run ruff format` and `poetry run black sarif` to format the code.
 23 | 4. Run `poetry run pylint sarif` and check for no new errors or warnings.
 24 | 5. Raise Pull Request in GitHub.com.
 25 | 
 26 | ## Developer Guide
 27 | 
 28 | ### Prerequisites
 29 | 
 30 | - You need Python 3.8 installed.
 31 |   - This is the minimum supported version of the tool.  Developing with a later version risks introducing type hints such as `list[dict]` that are not compatible with Python 3.8.
 32 | - You need Poetry installed.  Run this in an Admin CMD or under `sudo`:
 33 |   - `pip install poetry`
 34 | 
 35 | Initialise Poetry by telling it where Python 3.8 is, e.g.
 36 | 
 37 | ```bash
 38 | # Windows - adjust to the path where you have installed Python 3.8.
 39 | poetry env use "C:\Python38\python.exe"
 40 | # Linux
 41 | poetry env use 3.8
 42 | ```
 43 | 
 44 | This is not necessary if your system Python version is 3.8.
 45 | 
 46 | ### Running locally in Poetry virtualenv
 47 | 
 48 | ```bash
 49 | poetry install
 50 | poetry run sarif <OPTIONS>
 51 | ```
 52 | 
 53 | To check that the right versions are being run:
 54 | 
 55 | ```bash
 56 | poetry run python --version
 57 | poetry run sarif --version --debug
 58 | poetry run python -m sarif --version --debug
 59 | ```
 60 | 
 61 | To see which executable is being run:
 62 | 
 63 | ```bash
 64 | # Windows
 65 | poetry run cmd /c "where sarif"
 66 | # Linux
 67 | poetry run which sarif
 68 | ```
 69 | 
 70 | ### Update dependency versions
 71 | 
 72 | Run `poetry update` to bump package versions in the `poetry.lock` file.
 73 | 
 74 | ### Update product version
 75 | 
 76 | Change the `version =` line in `pyproject.toml` for the new semantic version for your change.
 77 | 
 78 | Change the version in `sarif/__init__.py` as well.
 79 | 
 80 | ### Run unit tests
 81 | 
 82 | ```bash
 83 | poetry run pytest
 84 | ```
 85 | 
 86 | ### Package using `poetry build`
 87 | 
 88 | Run it on the source code:
 89 | 
 90 | ```bash
 91 | poetry build
 92 | ```
 93 | 
 94 | If you want, you can install the package built locally at system level (outside the Poetry virtual environment):
 95 | 
 96 | ```bash
 97 | pip install dist/sarif-*.whl
 98 | ```
 99 | 
100 | To remove it again:
101 | 
102 | ```bash
103 | pip uninstall sarif-tools
104 | ```
105 | 
106 | Note that there are two possible levels of installation:
107 | 
108 | #### User installation
109 | 
110 | When you run `pip install` and `pip` doesn't have permissions to write to the Python installation's `site-packages` directory, probably because you are not running as an admin/superuser, the package is installed at "user" level only.  You can run it using:
111 | 
112 | ```bash
113 | python -m sarif
114 | ```
115 | 
116 | You *cannot* run it using the bare command `sarif`, unless you add your user-level `Scripts` directory to your `PATH`.  You can see where that is in the output from `pip install`:
117 | 
118 | ```plain
119 | Installing collected packages: sarif
120 |   WARNING: The script sarif.exe is installed in 'C:\Users\yournamehere\AppData\Roaming\Python\Python39\Scripts' which is not on PATH.
121 |   Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
122 | ```
123 | 
124 | #### System installation
125 | 
126 | When you run `pip install` and `pip` has permissions to write to the Python installation's `site-packages` directory, and the Python installation's `Scripts` directory is in your path, then you can run the `sarif` command without `python -m`:
127 | 
128 | ```bash
129 | sarif
130 | ```
131 | 
132 | ### Adding packages from pypi to the project
133 | 
134 | Add the package and its latest version number (as minimum version) to `[tool.poetry.dependencies]` in `pyproject.toml`.
135 | 
136 | Then run this to update Poetry's lockfile.
137 | 
138 | ```bash
139 | poetry update
140 | ```
141 | 
142 | ### Adding resource files to the project
143 | 
144 | Add the file within the `sarif` directory and it will be installed with the Python source.  For example, `sarif/operations/templates/sarif_summary.html`.
145 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [3.0.5] - 2025-07-17
  9 | 
 10 | - #76 Support in HTML display rules as links, when help Uri provided
 11 | - #82 `trend` and `csv` output csv files have `\r\n` line terminators when generated on Linux machine
 12 | - #97 sarif blame crashes with UnicodeDecodeError when it encounters invalid UTF-8
 13 | 
 14 | ## [3.0.4] - 2024-11-15
 15 | 
 16 | - #73 Crash when using `--check`.
 17 | 
 18 | ## [3.0.3] - 2024-09-30
 19 | 
 20 | - #43 Support getting level from `ruleConfigurationOverrides` and `defaultConfiguration`.
 21 | - #68 Fixed regression where reversing diff direction gave different results.
 22 | 
 23 | ## [3.0.2] - 2024-09-18
 24 | 
 25 | - #55 part 2: Add `executionSuccessful` to `copy` operation output for SARIF schema compliance.
 26 | 
 27 | ## [3.0.1] - 2024-09-16
 28 | 
 29 | ### Fixed
 30 | 
 31 | - #58 Fixed regression that broke `sarif diff` command in v3.0.0.
 32 | 
 33 | ## [3.0.0](releases/tag/v3.0.0) - 2024-09-10
 34 | 
 35 | ### Breaking Changes
 36 | 
 37 | - Changed Python API to use new IssueReport type for issue grouping and sorting:
 38 |   - `SarifFileSet` now has a `get_report()` method
 39 |   - `s.get_result_count_by_severity()` replaced by
 40 |     `s.get_report().get_issue_type_histogram_for_severity(severity)`
 41 |   - `s.get_result_count_by_severity()` replaced by
 42 |     `s.get_report().get_issue_count_for_severity(severity)`
 43 |   - `s.get_records_grouped_by_severity()` replaced by
 44 |     `s.get_report().get_issues_for_severity(severity)`
 45 | 
 46 | ### Added
 47 | 
 48 | - Support "none" severity level. It's only included in the output if present in the input.
 49 | 
 50 | ### Fixed
 51 | 
 52 | - #39 Truncate long summaries.
 53 | - Made issue sorting and grouping more consistent across the various reports.
 54 | - Multiple occurrences of a single issue are now sorted by location in the Word report.
 55 | - Improved debug and version reporting for when multiple versions are installed.
 56 | - For the copy operation, "invocation" in the resulting sarif is changed to an object to match the spec.
 57 | - #53 Fix the `blame` command for `file:///` URL locations.
 58 | 
 59 | ### Compatibility
 60 | 
 61 | - Python 3.8+
 62 | 
 63 | ## [2.0.0](releases/tag/v2.0.0) - 2022-11-07
 64 | 
 65 | ### Breaking Changes
 66 | 
 67 | - "Code" and "Description" are now separate columns in the CSV output, whereas before they were
 68 |   combined in the "Code" column. They are also separate keys in the "record" format if calling
 69 |   sarif-tools from Python.
 70 | - `--blame-filter` argument has been replaced with `--filter`, using a new YAML-based format for
 71 |   more general filtering to replace the previous ad hoc text format which only supported blame.
 72 |   - There is a new `upgrade-filter` command to upgrade your old blame filter files to the new
 73 |     format.
 74 |   - Thanks to @abyss638 for contributing this enhancement!
 75 | 
 76 | ### Added
 77 | 
 78 | - New `codeclimate` command to generate output for GitLab use.
 79 |   - Thanks to @abyss638 for contributing this enhancement!
 80 | - New `emacs` command to generate output for the popular Linux text editor.
 81 |   - Thanks to @dkloper for contributing this enhancement!
 82 | - #14 Support recursive glob
 83 |   - Thanks to @bushelofsilicon for contributing this enhancement!
 84 | 
 85 | ### Changed
 86 | 
 87 | - When an input SARIF file contains blame information, the `csv` command output now has a column
 88 |   for `Author`.
 89 | - #18 The `diff` command now prints up to three locations of new occurrences of issues (all are
 90 |   listed in the file output mode).
 91 | 
 92 | ### Fixed
 93 | 
 94 | - #4 and #19 docs improvements.
 95 | - #12 allow zero locations for record.
 96 | - #15 allow `text` to be absent in `message` object.
 97 | - #20 allow UTF8 with BOM (`utf-8-sig`` encoding)
 98 |   - Thanks to @ManuelBerrueta for contributing this fix!
 99 | 
100 | ### Compatibility
101 | 
102 | - Python 3.8+
103 | 
104 | ## [1.0.0](releases/tag/v1.0.0) - 2022-05-09
105 | 
106 | ### Changed
107 | 
108 | - Development, build and release is now based on [python-poetry](https://python-poetry.org).
109 | - No change to functionality since v0.3.0.
110 | 
111 | ### Compatibility
112 | 
113 | - Python 3.8+
114 | 
115 | ## [0.3.0](releases/tag/v0.3.0) - 2022-01-14
116 | 
117 | ### Added
118 | 
119 | - Support for globs in Windows, e.g. `sarif summary android*.sarif`
120 | - `info` and `copy` commands
121 | 
122 | ### Compatibility
123 | 
124 | - Python 3.8+
125 | 
126 | ## [0.2.0](releases/tag/v0.2.0) - 2022-01-07
127 | 
128 | ### Added
129 | 
130 | - `--blame-filter` argument.
131 | 
132 | ### Changed
133 | 
134 | - Compatible with Python v3.8. Previously, Python v3.9 was required.
135 | 
136 | ### Compatibility
137 | 
138 | - Python 3.8+
139 | 
140 | ## [0.1.0](releases/tag/v0.1.0) - 2021-11-11
141 | 
142 | ### Added
143 | 
144 | - Initial versions of commands `blame`, `csv`, `diff`, `html`, `ls`, `summary`, `trend`, `usage` and `word` created in Microsoft Global Hackathon 2021.
145 | 
146 | ### Compatibility
147 | 
148 | - Python 3.9+
149 | 


--------------------------------------------------------------------------------
/sarif/operations/info_op.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code for `sarif info` command.
  3 | """
  4 | 
  5 | import datetime
  6 | import os
  7 | 
  8 | from sarif.sarif_file import SarifFileSet
  9 | 
 10 | _BYTES_PER_MIB = 1024 * 1024
 11 | _BYTES_PER_KIB = 1024
 12 | 
 13 | 
 14 | def _property_bag_stats(object_list):
 15 |     tally = {}
 16 |     universal_property_keys = []
 17 |     partial_properties = []
 18 |     if object_list:
 19 |         for obj in object_list:
 20 |             for key in obj.get("properties", {}):
 21 |                 tally[key] = tally[key] + 1 if key in tally else 1
 22 |         object_count = len(object_list)
 23 |         universal_property_keys = [
 24 |             key for (key, count) in tally.items() if count == object_count
 25 |         ]
 26 | 
 27 |         def tally_rank(key_count_pair):
 28 |             # Sort by descending tally then alphabetically
 29 |             return (-key_count_pair[1], key_count_pair[0])
 30 | 
 31 |         partial_properties = [
 32 |             {"key": key, "count": count, "percent": 100 * count / object_count}
 33 |             for (key, count) in sorted(tally.items(), key=tally_rank)
 34 |             if count < object_count
 35 |         ]
 36 |     return universal_property_keys, partial_properties
 37 | 
 38 | 
 39 | def _generate_info_to_file(sarif_files, file_out):
 40 |     file_count = False
 41 |     for input_file in sarif_files:
 42 |         file_count += 1
 43 |         file_path = input_file.get_abs_file_path()
 44 |         file_stat = os.stat(file_path)
 45 |         size_in_bytes = file_stat.st_size
 46 |         if size_in_bytes > _BYTES_PER_MIB:
 47 |             readable_size = f"{file_stat.st_size / _BYTES_PER_MIB:.1f} MiB"
 48 |         else:
 49 |             readable_size = (
 50 |                 f"{(file_stat.st_size + _BYTES_PER_KIB - 1) // _BYTES_PER_KIB} KiB"
 51 |             )
 52 |         print(input_file.get_abs_file_path(), file=file_out)
 53 |         print(f"  {file_stat.st_size} bytes ({readable_size})", file=file_out)
 54 |         print(
 55 |             f"  modified: {datetime.datetime.fromtimestamp(file_stat.st_mtime)}, "
 56 |             f"accessed: {datetime.datetime.fromtimestamp(file_stat.st_atime)}, "
 57 |             f"ctime: {datetime.datetime.fromtimestamp(file_stat.st_ctime)}",
 58 |             file=file_out,
 59 |         )
 60 |         run_count = len(input_file.runs)
 61 |         print(f"  {run_count} runs" if run_count != 1 else "  1 run", file=file_out)
 62 |         for run_index, run in enumerate(input_file.runs):
 63 |             if run_count != 1:
 64 |                 print(f"  Run #{run_index + 1}:", file=file_out)
 65 |             print(f"    Tool: {run.get_tool_name()}", file=file_out)
 66 |             conversion_tool = run.get_conversion_tool_name()
 67 |             if conversion_tool:
 68 |                 print(f"    Conversion tool: {conversion_tool}", file=file_out)
 69 |             results = run.get_results()
 70 |             result_count = len(results)
 71 |             print(
 72 |                 f"    {result_count} results" if result_count != 1 else "    1 result",
 73 |                 file=file_out,
 74 |             )
 75 |             universal_property_keys, partial_properties = _property_bag_stats(results)
 76 |             ppk_string = (
 77 |                 ", ".join(
 78 |                     "{} {}/{} ({:.1f} %)".format(
 79 |                         p["key"], p["count"], result_count, p["percent"]
 80 |                     )
 81 |                     for p in partial_properties
 82 |                 )
 83 |                 if partial_properties
 84 |                 else None
 85 |             )
 86 |             if universal_property_keys:
 87 |                 upk_string = ", ".join(universal_property_keys)
 88 |                 if partial_properties:
 89 |                     print(
 90 |                         f"    Result properties: all results have properties: {upk_string}; "
 91 |                         f"some results have properties: {ppk_string}",
 92 |                         file=file_out,
 93 |                     )
 94 |                 else:
 95 |                     print(
 96 |                         f"    All results have properties: {upk_string}",
 97 |                         file=file_out,
 98 |                     )
 99 |             elif partial_properties:
100 |                 print(
101 |                     f"    Result properties: {ppk_string}",
102 |                     file=file_out,
103 |                 )
104 |         print(file=file_out)
105 |     return file_count
106 | 
107 | 
108 | def generate_info(sarif_files: SarifFileSet, output: str):
109 |     """
110 |     Print structure information about the provided `sarif_files`.
111 |     """
112 |     if output:
113 |         with open(output, "w", encoding="utf-8") as file_out:
114 |             file_count = _generate_info_to_file(sarif_files, file_out)
115 |         if file_count:
116 |             files_string = (
117 |                 "1 SARIF file" if file_count == 1 else f"{file_count} SARIF files"
118 |             )
119 |             print("Wrote information about", files_string, "to", output)
120 |     else:
121 |         file_count = _generate_info_to_file(sarif_files, None)
122 |     if file_count == 0:
123 |         print(
124 |             "No SARIF files found.  Try passing a path of a SARIF file or containing SARIF files."
125 |         )
126 | 


--------------------------------------------------------------------------------
/sarif/operations/html_op.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code for `sarif html` command.
  3 | """
  4 | 
  5 | import base64
  6 | from datetime import datetime
  7 | import os
  8 | from typing import Union
  9 | 
 10 | from jinja2 import Environment, FileSystemLoader, select_autoescape
 11 | 
 12 | from sarif import charts, sarif_file
 13 | 
 14 | _THIS_MODULE_PATH = os.path.dirname(__file__)
 15 | 
 16 | _TEMPLATES_PATH = os.path.join(_THIS_MODULE_PATH, "templates")
 17 | 
 18 | _ENV = Environment(
 19 |     loader=FileSystemLoader(searchpath=_TEMPLATES_PATH),
 20 |     autoescape=select_autoescape(),
 21 | )
 22 | 
 23 | 
 24 | def generate_html(
 25 |     input_files: sarif_file.SarifFileSet,
 26 |     image_file: Union[str, None],
 27 |     output: str,
 28 |     output_multiple_files: bool,
 29 |     date_val: datetime = datetime.now(),
 30 | ):
 31 |     """
 32 |     Generate HTML file from the input files.
 33 |     """
 34 |     if image_file:
 35 |         image_mime_type = "image/" + os.path.splitext(image_file)[-1]
 36 |         if image_mime_type == "image/jpg":
 37 |             image_mime_type = "image/jpeg"
 38 |         with open(image_file, "rb") as input_file:
 39 |             image_data = input_file.read()
 40 | 
 41 |         image_data_base64 = base64.b64encode(image_data).decode("utf-8")
 42 |     else:
 43 |         image_mime_type = None
 44 |         image_data_base64 = None
 45 | 
 46 |     output_file = output
 47 |     if output_multiple_files:
 48 |         for input_file in input_files:
 49 |             output_file_name = input_file.get_file_name_without_extension() + ".html"
 50 |             print(
 51 |                 "Writing HTML report for",
 52 |                 input_file.get_file_name(),
 53 |                 "to",
 54 |                 output_file_name,
 55 |             )
 56 |             _generate_single_html(
 57 |                 input_file,
 58 |                 os.path.join(output, output_file_name),
 59 |                 date_val,
 60 |                 image_mime_type,
 61 |                 image_data_base64,
 62 |             )
 63 |         output_file = os.path.join(output, "static_analysis_output.html")
 64 |     source_description = input_files.get_description()
 65 |     print(
 66 |         "Writing HTML report for",
 67 |         source_description,
 68 |         "to",
 69 |         os.path.basename(output_file),
 70 |     )
 71 |     _generate_single_html(
 72 |         input_files, output_file, date_val, image_mime_type, image_data_base64
 73 |     )
 74 | 
 75 | 
 76 | def _generate_single_html(
 77 |     input_file, output_file, date_val, image_mime_type, image_data_base64
 78 | ):
 79 |     all_tools = input_file.get_distinct_tool_names()
 80 |     report = input_file.get_report()
 81 | 
 82 |     total_distinct_issue_codes = 0
 83 |     problems = []
 84 |     severities = report.get_severities()
 85 | 
 86 |     for severity in severities:
 87 |         distinct_issue_codes = report.get_issue_type_count_for_severity(severity)
 88 | 
 89 |         total_distinct_issue_codes += distinct_issue_codes
 90 | 
 91 |         severity_details = _enrich_details(
 92 |             report.get_issues_grouped_by_type_for_severity(severity), input_file
 93 |         )
 94 | 
 95 |         severity_section = {
 96 |             "type": severity,
 97 |             "count": distinct_issue_codes,
 98 |             "details": severity_details,
 99 |         }
100 | 
101 |         problems.append(severity_section)
102 | 
103 |     chart_data = charts.generate_severity_pie_chart(report, output_file=None)
104 |     if chart_data:
105 |         chart_image_data_base64 = base64.b64encode(chart_data).decode("utf-8")
106 |     else:
107 |         chart_image_data_base64 = None
108 | 
109 |     filtered = None
110 |     filter_stats = input_file.get_filter_stats()
111 |     if filter_stats:
112 |         filtered = f"Results were filtered by {filter_stats}."
113 | 
114 |     template = _ENV.get_template("sarif_summary.html")
115 |     html_content = template.render(
116 |         report_type=", ".join(all_tools),
117 |         report_date=date_val,
118 |         severities=", ".join(severities),
119 |         total=total_distinct_issue_codes,
120 |         problems=problems,
121 |         image_mime_type=image_mime_type,
122 |         image_data_base64=image_data_base64,
123 |         chart_image_data_base64=chart_image_data_base64,
124 |         filtered=filtered,
125 |     )
126 | 
127 |     with open(output_file, "wt", encoding="utf-8") as file_out:
128 |         file_out.write(html_content)
129 | 
130 | 
131 | def _extract_help_links_from_rules(rules, link_to_desc, key):
132 |     for rule in rules:
133 |         if "helpUri" in rule:
134 |             uri = rule["helpUri"]
135 |             if uri not in link_to_desc:
136 |                 desc = rule.get("fullDescription", {}).get("text")
137 |                 if not desc:
138 |                     desc = rule.get("name")
139 |                 if not desc:
140 |                     desc = key
141 |                 link_to_desc[uri] = desc
142 | 
143 | 
144 | def _enrich_details(records_of_severity, input_file):
145 |     ret = []
146 | 
147 |     for key, records in records_of_severity.items():
148 |         link_to_desc = {}
149 |         for record in records:
150 |             rule_id = record["Code"]
151 |             rules = input_file.get_rules_by_id(rule_id)
152 |             _extract_help_links_from_rules(rules, link_to_desc, key)
153 |         links = [(desc, uri) for (uri, desc) in link_to_desc.items()]
154 |         ret.append(
155 |             {"code": key, "count": len(records), "links": links, "details": records}
156 |         )
157 |     return ret
158 | 


--------------------------------------------------------------------------------
/tests/ops/html/test_html.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import tempfile
  4 | 
  5 | from sarif.operations import html_op
  6 | from sarif import sarif_file
  7 | 
  8 | INPUT_SARIF = {
  9 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 10 |     "version": "2.1.0",
 11 |     "runs": [
 12 |         {
 13 |             "tool": {
 14 |                 "driver": {
 15 |                     "name": "unit test",
 16 |                     "rules": [
 17 |                         {
 18 |                             "id": "CA2101",
 19 |                             "name": "Specify <marshalling> for P/Invoke string arguments",
 20 |                             "helpUri": "https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2101",
 21 |                         }
 22 |                     ],
 23 |                 }
 24 |             },
 25 |             "results": [
 26 |                 {
 27 |                     "ruleId": "CA2101",
 28 |                     "level": "error",
 29 |                     "locations": [
 30 |                         {
 31 |                             "physicalLocation": {
 32 |                                 "artifactLocation": {
 33 |                                     "uri": "file:///C:/Code/main.c",
 34 |                                     "index": 0,
 35 |                                 },
 36 |                                 "region": {"startLine": 24, "startColumn": 9},
 37 |                             }
 38 |                         }
 39 |                     ],
 40 |                 }
 41 |             ],
 42 |         }
 43 |     ],
 44 | }
 45 | 
 46 | 
 47 | EXPECTED_OUTPUT_TXT = """
 48 | <head>
 49 |     <style>
 50 |         #pageContainer {
 51 |             margin: auto;
 52 |             max-width: 1200px;
 53 |         }
 54 | 
 55 |         #heroContentGrid {
 56 |             align-content: center;
 57 |             display: grid;
 58 |             flex-grow: 1;
 59 |             grid-gap: 15px;
 60 |             grid-template-columns: auto auto;
 61 |             grid-template-rows: auto auto;
 62 |         }
 63 | 
 64 |         #heroContentGrid > div {
 65 |             align-self: center;
 66 |         }
 67 | 
 68 |         #hero: {
 69 |             align-items: center;
 70 |             display: flex;
 71 |         }
 72 | 
 73 |         #heroLogo {
 74 |             grid-column: 1;
 75 |             grid-row: 1;
 76 |             text-align: right;
 77 |         }
 78 | 
 79 |         #heroTitle {
 80 |             font-family: 'Roboto', sans-serif;
 81 |             font-size: 50px;
 82 |             grid-column: 2;
 83 |             grid-row: 1;
 84 |             line-height: 59px;
 85 |             text-align: left;
 86 |         }
 87 | 
 88 |         .collapsible {
 89 |           background-color: white;
 90 |           cursor: pointer;
 91 |           width: 100%;
 92 |           border: none;
 93 |           text-align: left;
 94 |           outline: none;
 95 |           font-size: 15px;
 96 |         }
 97 | 
 98 |         .active, .collapsible:hover {
 99 |           color: white;
100 |           background-color: #555;
101 |         }
102 | 
103 |         .collapsible:after {
104 |           content: '\\002B';
105 |           color: white;
106 |           font-weight: bold;
107 |           float: right;
108 |           margin-left: 5px;
109 |         }
110 | 
111 |         .active:after {
112 |           content: "\\2212";
113 |         }
114 | 
115 |         .content {
116 |           padding: 0 18px;
117 |           max-height: 0;
118 |           overflow: hidden;
119 |           transition: max-height 0.2s ease-out;
120 |           background-color: #f1f1f1;
121 |         }
122 |     </style>
123 | </head>
124 | 
125 | <h3>Sarif Summary: <b>unit test</b></h3>
126 | <h4>Document generated on: <b><date_val></b></h4>
127 | <h4>Total number of distinct issues of all severities (error, warning, note): <b>1</b></h4>
128 | 
129 | <h3>Severity : error [ 1 ]</h3>
130 | <ul>
131 |     <li>
132 |         <button class="collapsible">CA2101: <b>1</b></button>
133 |         <div class="content">
134 |             <ul>
135 |                 <li><a href="https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2101" target="_blank">Specify &lt;marshalling&gt; for P/Invoke string arguments</a></li>
136 |                 <li>file:///C:/Code/main.c:24</li>
137 |             </ul>
138 |         </div>
139 |     </li>
140 | 
141 | </ul>
142 | 
143 | <h3>Severity : warning [ 0 ]</h3>
144 | <ul>
145 | 
146 | </ul>
147 | 
148 | <h3>Severity : note [ 0 ]</h3>
149 | <ul>
150 | 
151 | </ul>
152 | <script>
153 |     var coll = document.getElementsByClassName("collapsible");
154 |     var i;
155 | 
156 |     for (i = 0; i < coll.length; i++) {
157 |       coll[i].addEventListener("click", function() {
158 |         this.classList.toggle("active");
159 |         var content = this.nextElementSibling;
160 |         if (content.style.maxHeight){
161 |           content.style.maxHeight = null;
162 |         } else {
163 |           content.style.maxHeight = content.scrollHeight + "px";
164 |         }
165 |       });
166 |     }
167 | </script>"""
168 | 
169 | 
170 | def test_html():
171 |     mtime = datetime.datetime.now()
172 |     input_sarif_file = sarif_file.SarifFile("INPUT_SARIF", INPUT_SARIF, mtime=mtime)
173 | 
174 |     input_sarif_file_set = sarif_file.SarifFileSet()
175 |     input_sarif_file_set.files.append(input_sarif_file)
176 | 
177 |     with tempfile.TemporaryDirectory() as tmp:
178 |         file_path = os.path.join(tmp, "output.html")
179 |         html_op.generate_html(
180 |             input_sarif_file_set,
181 |             None,
182 |             file_path,
183 |             output_multiple_files=False,
184 |             date_val=mtime,
185 |         )
186 | 
187 |         with open(file_path, "rb") as f_in:
188 |             output = f_in.read().decode()
189 | 
190 |         # Remove pie chart before diffing
191 |         pie_chart_start = output.find("<img")
192 |         pie_chart_end = output.find("/>", pie_chart_start) + 2
193 |         output = output[:pie_chart_start] + output[pie_chart_end:]
194 | 
195 |         # Check the output line-by-line, ignoring whitespace around and between lines.
196 |         output_split = output.splitlines()
197 |         for check_line in EXPECTED_OUTPUT_TXT.replace(
198 |             "<date_val>", mtime.strftime("%Y-%m-%d %H:%M:%S.%f")
199 |         ).splitlines():
200 |             expected = check_line.strip()
201 |             if not expected:
202 |                 continue
203 |             actual = ""
204 |             while output_split:
205 |                 actual = output_split.pop(0).strip()
206 |                 if actual:
207 |                     break
208 |             assert actual == expected
209 | 


--------------------------------------------------------------------------------
/sarif/issues_report.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A report derived from a SARIF file or group of SARIF files.
  3 | 
  4 | The issues are grouped by severity, then by key (which is either issue code + truncated
  5 | description or just issue code if the issues have distinct descriptions), then listed in location
  6 | order.
  7 | """
  8 | 
  9 | from typing import Dict, List
 10 | 
 11 | from sarif.sarif_file_utils import (
 12 |     combine_code_and_description,
 13 |     combine_record_code_and_description,
 14 |     record_sort_key,
 15 |     SARIF_SEVERITIES_WITHOUT_NONE,
 16 |     SARIF_SEVERITIES_WITH_NONE,
 17 | )
 18 | 
 19 | 
 20 | class IssuesReport:
 21 |     """
 22 |     This class imposes a hierarchical structure on a list of records which is helpful
 23 |     for presenting reader-friendly summaries.
 24 |     """
 25 | 
 26 |     def __init__(self):
 27 |         self._sev_to_records = {sev: [] for sev in SARIF_SEVERITIES_WITH_NONE}
 28 |         self._sev_to_sorted_keys = None
 29 |         self._records_have_been_sorted = False
 30 | 
 31 |     def add_record(self, record: dict):
 32 |         """Append record to list for severity - no sorting."""
 33 |         self._sev_to_records.setdefault(record["Severity"], []).append(record)
 34 |         if self._records_have_been_sorted:
 35 |             self._sev_to_sorted_keys = None
 36 |             self._records_have_been_sorted = False
 37 | 
 38 |     def _group_records_by_key(self):
 39 |         self._sev_to_sorted_keys = {}
 40 |         code_to_key_and_count = {}
 41 |         for severity, issues in self._sev_to_records.items():
 42 |             code_to_key_and_count.clear()
 43 |             for record in issues:
 44 |                 code = record["Code"]
 45 |                 key = combine_record_code_and_description(record)
 46 |                 key_and_count = code_to_key_and_count.get(code)
 47 |                 if key_and_count is None:
 48 |                     code_to_key_and_count[code] = {
 49 |                         "key": key,
 50 |                         "common_desc": record["Description"],
 51 |                         "count": 1,
 52 |                     }
 53 |                 else:
 54 |                     key_and_count["count"] += 1
 55 |                     common_desc_stem = key_and_count["common_desc"]
 56 |                     desc = record["Description"]
 57 |                     if not desc.startswith(common_desc_stem):
 58 |                         for char_pos, (char1, char2) in enumerate(
 59 |                             zip(common_desc_stem, desc)
 60 |                         ):
 61 |                             if char1 != char2:
 62 |                                 new_desc_stem = common_desc_stem[0:char_pos]
 63 |                                 key_and_count["common_desc"] = new_desc_stem
 64 |                                 key_and_count["key"] = combine_code_and_description(
 65 |                                     code, new_desc_stem + " ..."
 66 |                                 )
 67 |                                 break
 68 |             sorted_codes = sorted(
 69 |                 code_to_key_and_count.keys(),
 70 |                 key=lambda code: code_to_key_and_count[code]["count"],
 71 |                 reverse=True,
 72 |             )
 73 |             self._sev_to_sorted_keys[severity] = {
 74 |                 code_to_key_and_count[code]["key"]: [] for code in sorted_codes
 75 |             }
 76 |             for record in issues:
 77 |                 # Not sorting the issues by location at this point
 78 |                 code = record["Code"]
 79 |                 self._sev_to_sorted_keys[severity][
 80 |                     code_to_key_and_count[code]["key"]
 81 |                 ].append(record)
 82 | 
 83 |     def _sort_record_lists(self):
 84 |         if self._sev_to_sorted_keys is None:
 85 |             self._group_records_by_key()
 86 |         for key_to_records in self._sev_to_sorted_keys.values():
 87 |             for records in key_to_records.values():
 88 |                 records.sort(key=record_sort_key)
 89 |         self._records_have_been_sorted = True
 90 | 
 91 |     def get_issue_count_for_severity(self, severity: str) -> int:
 92 |         """Get the number of individual records at this severity level."""
 93 |         return len(self._sev_to_records.get(severity, []))
 94 | 
 95 |     def get_issue_type_count_for_severity(self, severity: str) -> int:
 96 |         """Get the number of distinct issue types at this severity level."""
 97 |         if self._sev_to_sorted_keys is None:
 98 |             self._group_records_by_key()
 99 |         return len(self._sev_to_sorted_keys.get(severity, []))
100 | 
101 |     def any_none_severities(self) -> bool:
102 |         """Are there any records with severity level "none"?"""
103 |         return bool(self._sev_to_records.get("none", {}))
104 | 
105 |     def get_severities(self) -> List[str]:
106 |         """
107 |         Get the list of relevant severity levels for these records.
108 | 
109 |         The returned list always includes "error", "warning" and "note", the standard SARIF severity
110 |         levels for code issues.  The unusual severity level "none" is only included at the end if
111 |         there are any records with severity "none".
112 |         """
113 |         return (
114 |             SARIF_SEVERITIES_WITH_NONE
115 |             if self.any_none_severities()
116 |             else SARIF_SEVERITIES_WITHOUT_NONE
117 |         )
118 | 
119 |     def get_issues_grouped_by_type_for_severity(
120 |         self, severity: str
121 |     ) -> Dict[str, List[dict]]:
122 |         """
123 |         Get a dict from issue type key to list of matching records at this severity level.
124 | 
125 |         Issue type keys are derived from the issue code and (common prefix of) description.
126 |         """
127 |         if not self._records_have_been_sorted:
128 |             self._sort_record_lists()
129 |         return self._sev_to_sorted_keys.get(severity, {})
130 | 
131 |     def get_issue_type_histogram_for_severity(self, severity: str) -> Dict[str, int]:
132 |         """
133 |         Get a dict from issue type key to number of matching records at this severity level.
134 | 
135 |         This is the same as `{k: len(v) for k, v in d.items()}` where
136 |         `d = report.get_issues_grouped_by_type_for_severity(severity)`.
137 |         """
138 |         if self._sev_to_sorted_keys is None:
139 |             self._group_records_by_key()
140 |         return {
141 |             key: len(records)
142 |             for key, records in self.get_issues_grouped_by_type_for_severity(
143 |                 severity
144 |             ).items()
145 |         }
146 | 
147 |     def get_issues_for_severity(self, severity: str) -> List[dict]:
148 |         """
149 |         Get a flat list of the issues at this severity.
150 | 
151 |         The sorting is consistent with `get_issues_grouped_by_type`, but the issues are not grouped
152 |         by type.
153 |         """
154 |         type_to_issues = self.get_issues_grouped_by_type_for_severity(severity)
155 |         ret = []
156 |         for issues_for_type in type_to_issues.values():
157 |             ret.extend(issues_for_type)
158 |         return ret
159 | 


--------------------------------------------------------------------------------
/tests/ops/diff/test_diff_issues_reordered.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import os
  4 | import tempfile
  5 | 
  6 | from sarif.operations import diff_op
  7 | from sarif import sarif_file
  8 | 
  9 | SARIF = {
 10 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 11 |     "version": "2.1.0",
 12 |     "runs": [
 13 |         {
 14 |             "tool": {"driver": {"name": "unit test"}},
 15 |             "results": [
 16 |                 {
 17 |                     "ruleId": "core.NullDereference",
 18 |                     "ruleIndex": 2,
 19 |                     "message": {
 20 |                         "text": "Access to field 'type' results in a dereference of a null pointer (loaded from variable 'json')"
 21 |                     },
 22 |                     "locations": [
 23 |                         {
 24 |                             "physicalLocation": {
 25 |                                 "artifactLocation": {
 26 |                                     "uri": "file:///C:/Code/main.c",
 27 |                                     "index": 0,
 28 |                                 },
 29 |                                 "region": {"startLine": 24, "startColumn": 9},
 30 |                             }
 31 |                         }
 32 |                     ],
 33 |                 },
 34 |                 {
 35 |                     "ruleId": "core.NullDereference",
 36 |                     "ruleIndex": 2,
 37 |                     "message": {
 38 |                         "text": "Dereference of null pointer (loaded from variable 's')"
 39 |                     },
 40 |                     "locations": [
 41 |                         {
 42 |                             "physicalLocation": {
 43 |                                 "artifactLocation": {
 44 |                                     "uri": "file:///C:/Code/main.c",
 45 |                                     "index": 0,
 46 |                                 },
 47 |                                 "region": {"startLine": 24, "startColumn": 9},
 48 |                             }
 49 |                         }
 50 |                     ],
 51 |                 },
 52 |                 {
 53 |                     "ruleId": "core.NullDereference",
 54 |                     "ruleIndex": 2,
 55 |                     "message": {
 56 |                         "text": "Access to field 'other' results in a dereference of a null pointer (loaded from variable 'json')"
 57 |                     },
 58 |                     "locations": [
 59 |                         {
 60 |                             "physicalLocation": {
 61 |                                 "artifactLocation": {
 62 |                                     "uri": "file:///C:/Code/main.c",
 63 |                                     "index": 0,
 64 |                                 },
 65 |                                 "region": {"startLine": 24, "startColumn": 9},
 66 |                             }
 67 |                         }
 68 |                     ],
 69 |                 },
 70 |             ],
 71 |         }
 72 |     ],
 73 | }
 74 | 
 75 | SARIF_WITH_ISSUES_REORDERED = {
 76 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 77 |     "version": "2.1.0",
 78 |     "runs": [
 79 |         {
 80 |             "tool": {"driver": {"name": "unit test"}},
 81 |             "results": [
 82 |                 {
 83 |                     "ruleId": "core.NullDereference",
 84 |                     "ruleIndex": 2,
 85 |                     "message": {
 86 |                         "text": "Access to field 'type' results in a dereference of a null pointer (loaded from variable 'json')"
 87 |                     },
 88 |                     "locations": [
 89 |                         {
 90 |                             "physicalLocation": {
 91 |                                 "artifactLocation": {
 92 |                                     "uri": "file:///C:/Code/main.c",
 93 |                                     "index": 0,
 94 |                                 },
 95 |                                 "region": {"startLine": 24, "startColumn": 9},
 96 |                             }
 97 |                         }
 98 |                     ],
 99 |                 },
100 |                 {
101 |                     "ruleId": "core.NullDereference",
102 |                     "ruleIndex": 2,
103 |                     "message": {
104 |                         "text": "Access to field 'other' results in a dereference of a null pointer (loaded from variable 'json')"
105 |                     },
106 |                     "locations": [
107 |                         {
108 |                             "physicalLocation": {
109 |                                 "artifactLocation": {
110 |                                     "uri": "file:///C:/Code/main.c",
111 |                                     "index": 0,
112 |                                 },
113 |                                 "region": {"startLine": 24, "startColumn": 9},
114 |                             }
115 |                         }
116 |                     ],
117 |                 },
118 |                 {
119 |                     "ruleId": "core.NullDereference",
120 |                     "ruleIndex": 2,
121 |                     "message": {
122 |                         "text": "Dereference of null pointer (loaded from variable 's')"
123 |                     },
124 |                     "locations": [
125 |                         {
126 |                             "physicalLocation": {
127 |                                 "artifactLocation": {
128 |                                     "uri": "file:///C:/Code/main.c",
129 |                                     "index": 0,
130 |                                 },
131 |                                 "region": {"startLine": 24, "startColumn": 9},
132 |                             }
133 |                         }
134 |                     ],
135 |                 },
136 |             ],
137 |         }
138 |     ],
139 | }
140 | 
141 | 
142 | def test_diff_issues_reordered():
143 |     mtime = datetime.datetime.now()
144 |     sarif = sarif_file.SarifFile("SARIF", SARIF, mtime=mtime)
145 |     sarif_reordered = sarif_file.SarifFile(
146 |         "SARIF_WITH_ISSUES_REORDERED", SARIF_WITH_ISSUES_REORDERED, mtime=mtime
147 |     )
148 |     verify_no_diffs(sarif, sarif_reordered)
149 |     verify_no_diffs(sarif_reordered, sarif)
150 | 
151 | 
152 | def verify_no_diffs(old_sarif: sarif_file.SarifFile, new_sarif: sarif_file.SarifFile):
153 |     with tempfile.TemporaryDirectory() as tmp:
154 |         file_path = os.path.join(tmp, "diff.json")
155 |         result = diff_op.print_diff(
156 |             old_sarif, new_sarif, file_path, check_level="warning"
157 |         )
158 |         with open(file_path, "rb") as f_in:
159 |             diff_dict = json.load(f_in)
160 |         assert result == 0
161 |         assert diff_dict == {
162 |             "all": {"+": 0, "-": 0},
163 |             "error": {"+": 0, "-": 0, "codes": {}},
164 |             "warning": {"+": 0, "-": 0, "codes": {}},
165 |             "note": {"+": 0, "-": 0, "codes": {}},
166 |         }
167 | 


--------------------------------------------------------------------------------
/sarif/operations/blame_op.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code for `sarif blame` command.
  3 | """
  4 | 
  5 | import json
  6 | import os
  7 | import subprocess
  8 | import sys
  9 | from typing import Callable, Iterable, List, Union
 10 | import urllib.parse
 11 | import urllib.request
 12 | 
 13 | from sarif.sarif_file import SarifFileSet
 14 | 
 15 | 
 16 | def _run_git_blame(repo_path: str, file_path: str) -> List[bytes]:
 17 |     cmd = ["git", "blame", "--porcelain", _make_path_git_compatible(file_path)]
 18 |     with subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=repo_path) as proc:
 19 |         result = []
 20 |         if proc.stdout:
 21 |             result = [x for x in proc.stdout.readlines()]
 22 | 
 23 |         # Ensure process terminates
 24 |         proc.communicate()
 25 |         if proc.returncode:
 26 |             cmd_str = " ".join(cmd)
 27 |             sys.stderr.write(
 28 |                 f"WARNING: Command `{cmd_str} "
 29 |                 f"failed with exit code {proc.returncode} in {repo_path}\n"
 30 |             )
 31 | 
 32 |         return result
 33 | 
 34 | 
 35 | def enhance_with_blame(
 36 |     input_files: SarifFileSet,
 37 |     repo_path: str,
 38 |     output: str,
 39 |     output_multiple_files: bool,
 40 |     run_git_blame: Callable[[str, str], List[bytes]] = _run_git_blame,
 41 | ):
 42 |     """
 43 |     Enhance SARIF files with information from `git blame`.  The `git` command is run in the current
 44 |     directory, which must be a git repository containing the files at the paths specified in the
 45 |     input files.  Updated files are written to output_path if specified, otherwise to the current
 46 |     directory.
 47 |     """
 48 |     if not input_files:
 49 |         return
 50 |     if not os.path.isdir(repo_path):
 51 |         raise ValueError(f"No git repository directory found at {repo_path}")
 52 | 
 53 |     _enhance_with_blame(input_files, repo_path, run_git_blame)
 54 | 
 55 |     for input_file in input_files:
 56 |         input_file_name = input_file.get_file_name()
 57 |         if any(
 58 |             "blame" in result.get("properties", {})
 59 |             for result in input_file.get_results()
 60 |         ):
 61 |             output_file = output
 62 |             if output_multiple_files:
 63 |                 output_filename = (
 64 |                     input_file.get_file_name_without_extension()
 65 |                     + "_with_blame."
 66 |                     + input_file.get_file_name_extension()
 67 |                 )
 68 |                 output_file = os.path.join(output, output_filename)
 69 |             print(
 70 |                 "Writing",
 71 |                 output_file,
 72 |                 "combining original SARIF from",
 73 |                 input_file_name,
 74 |                 "with git blame information",
 75 |             )
 76 |             with open(output_file, "w", encoding="utf-8") as file_out:
 77 |                 json.dump(input_file.data, file_out)
 78 |         else:
 79 |             sys.stderr.write(
 80 |                 f"WARNING: did not find any git blame information for {input_file_name}\n"
 81 |             )
 82 | 
 83 | 
 84 | def _enhance_with_blame(
 85 |     input_files: SarifFileSet,
 86 |     repo_path: str,
 87 |     run_git_blame: Callable[[str, str], List[bytes]],
 88 | ):
 89 |     """
 90 |     Run `git blame --porcelain` for each file path listed in input_files.
 91 |     Then enhance the results in error_list by adding a "blame" property including "hash", "author"
 92 |     and "timestamp".
 93 |     Porcelain format is used for parseability and stability.  See documentation at
 94 |     https://git-scm.com/docs/git-blame#_the_porcelain_format.
 95 |     """
 96 |     files_to_blame = set(item["Location"] for item in input_files.get_records())
 97 |     file_count = len(files_to_blame)
 98 |     print(
 99 |         "Running `git blame --porcelain` on",
100 |         "one file" if file_count == 1 else f"{file_count} files",
101 |         "in",
102 |         repo_path,
103 |     )
104 |     file_blame_info = _run_git_blame_on_files(files_to_blame, repo_path, run_git_blame)
105 | 
106 |     # Now join up blame output with result list
107 |     blame_info_count = 0
108 |     item_count = 0
109 |     for result, record in zip(input_files.get_results(), input_files.get_records()):
110 |         item_count += 1
111 |         file_path = record["Location"]
112 |         if file_path in file_blame_info:
113 |             blame_info = file_blame_info[file_path]
114 |             # raw_line can be None if no line number information was included in the SARIF result.
115 |             raw_line = record["Line"]
116 |             if raw_line:
117 |                 line_no = str(raw_line)
118 |                 if line_no in blame_info["line_to_commit"]:
119 |                     commit_hash = blame_info["line_to_commit"][line_no]
120 |                     commit = blame_info["commits"][commit_hash]
121 |                     # Add commit hash to the blame information
122 |                     commit_with_hash = {"commit": commit_hash, **commit}
123 |                     # Add blame information to the SARIF Property Bag of the result
124 |                     result.setdefault("properties", {})["blame"] = commit_with_hash
125 |                     blame_info_count += 1
126 |     print(f"Found blame information for {blame_info_count} of {item_count} results")
127 | 
128 | 
129 | def _make_path_git_compatible(file_path):
130 |     try:
131 |         path_as_url = urllib.parse.urlparse(file_path)
132 |         if path_as_url.scheme == "file":
133 |             return urllib.request.url2pathname(path_as_url.path)
134 |         return file_path
135 |     except ValueError:
136 |         return file_path
137 | 
138 | 
139 | def _run_git_blame_on_files(
140 |     files_to_blame: Iterable[str],
141 |     repo_path: str,
142 |     run_git_blame: Callable[[str, str], List[bytes]],
143 | ):
144 |     file_blame_info = {}
145 |     for file_path in files_to_blame:
146 |         git_blame_output = run_git_blame(repo_path, file_path)
147 |         blame_info = {"commits": {}, "line_to_commit": {}}
148 |         file_blame_info[file_path] = blame_info
149 |         commit_hash: Union[str, None] = None
150 | 
151 |         for line_bytes in git_blame_output:
152 |             # Convert byte sequence to string and remove trailing LF
153 |             line_string = line_bytes.decode("utf-8", errors="replace")[:-1]
154 |             # Now parse output from git blame --porcelain
155 |             if commit_hash:
156 |                 if line_string.startswith("\t"):
157 |                     commit_hash = None
158 |                     # Ignore line contents = source code
159 |                 elif " " in line_string:
160 |                     space_pos = line_string.index(" ")
161 |                     key = line_string[0:space_pos]
162 |                     value = line_string[space_pos + 1 :].strip()
163 |                     blame_info["commits"][commit_hash][key] = value
164 |                 else:
165 |                     # e.g. "boundary"
166 |                     key = line_string
167 |                     blame_info["commits"][commit_hash][key] = True
168 |             else:
169 |                 commit_line_info = line_string.split(" ")
170 |                 commit_hash = commit_line_info[0]
171 |                 commit_line = commit_line_info[2]
172 |                 blame_info["commits"].setdefault(commit_hash, {})
173 |                 blame_info["line_to_commit"][commit_line] = commit_hash
174 | 
175 |     return file_blame_info
176 | 


--------------------------------------------------------------------------------
/sarif/operations/diff_op.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code for `sarif diff` command.
  3 | """
  4 | 
  5 | import json
  6 | import sys
  7 | from typing import Dict
  8 | 
  9 | from sarif import sarif_file
 10 | 
 11 | 
 12 | def _occurrences(occurrence_count):
 13 |     return (
 14 |         "1 occurrence" if occurrence_count == 1 else f"{occurrence_count} occurrences"
 15 |     )
 16 | 
 17 | 
 18 | def _signed_change(difference):
 19 |     return str(difference) if difference < 0 else f"+{difference}"
 20 | 
 21 | 
 22 | def _record_to_location_tuple(record) -> str:
 23 |     return (record["Location"], record["Line"])
 24 | 
 25 | 
 26 | def print_diff(
 27 |     old_sarif: sarif_file.SarifFileSet,
 28 |     new_sarif: sarif_file.SarifFileSet,
 29 |     output,
 30 |     check_level=None,
 31 | ) -> int:
 32 |     """
 33 |     Generate a diff of the issues from the SARIF files and write it to stdout
 34 |     or a file if specified.
 35 |     :param old_sarif: corresponds to the old files.
 36 |     :param new_sarif: corresponds to the new files.
 37 |     :return: number of increased severities, or 0 if nothing has worsened.
 38 |     """
 39 |     diff = _calc_diff(old_sarif, new_sarif)
 40 |     if output:
 41 |         print("writing diff to", output)
 42 |         with open(output, "w", encoding="utf-8") as output_file:
 43 |             json.dump(diff, output_file, indent=4)
 44 |     else:
 45 |         for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
 46 |             if severity not in diff:
 47 |                 continue
 48 |             if diff[severity]["codes"]:
 49 |                 print(
 50 |                     severity,
 51 |                     "level:",
 52 |                     _signed_change(diff[severity]["+"]),
 53 |                     _signed_change(-diff[severity]["-"]),
 54 |                 )
 55 |                 for issue_key, code_info in diff[severity]["codes"].items():
 56 |                     (old_count, new_count, new_locations) = (
 57 |                         code_info["<"],
 58 |                         code_info[">"],
 59 |                         code_info.get("+@", []),
 60 |                     )
 61 |                     if old_count == 0:
 62 |                         print(f'  New issue "{issue_key}" ({_occurrences(new_count)})')
 63 |                     elif new_count == 0:
 64 |                         print(f'  Eliminated issue "{issue_key}"')
 65 |                     else:
 66 |                         print(
 67 |                             f"  Number of occurrences {old_count} -> {new_count}",
 68 |                             f'({_signed_change(new_count - old_count)}) for issue "{issue_key}"',
 69 |                         )
 70 |                     if new_locations:
 71 |                         # Print the top 3 new locations
 72 |                         for record in new_locations[0:3]:
 73 |                             (location, line) = _record_to_location_tuple(record)
 74 |                             print(f"    {location}:{line}")
 75 |                         if len(new_locations) > 3:
 76 |                             print("    ...")
 77 |             else:
 78 |                 print(severity, "level: +0 -0 no changes")
 79 |         print(
 80 |             "all levels:",
 81 |             _signed_change(diff["all"]["+"]),
 82 |             _signed_change(-diff["all"]["-"]),
 83 |         )
 84 |     filter_stats = old_sarif.get_filter_stats()
 85 |     if filter_stats:
 86 |         print(f"  'Before' results were filtered by {filter_stats}")
 87 |     filter_stats = new_sarif.get_filter_stats()
 88 |     if filter_stats:
 89 |         print(f"  'After' results were filtered by {filter_stats}")
 90 |     ret = 0
 91 |     if check_level:
 92 |         for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
 93 |             ret += diff.get(severity, {}).get("+", 0)
 94 |             if severity == check_level:
 95 |                 break
 96 |     if ret > 0:
 97 |         sys.stderr.write(
 98 |             f"Check: exiting with return code {ret} due to increase in issues at or above {check_level} severity\n"
 99 |         )
100 |     return ret
101 | 
102 | 
103 | def _find_new_occurrences(new_records, old_records):
104 |     # Note: this is O(n²) complexity where n is the number of occurrences of this issue type,
105 |     # so could be slow when there are a large number of occurrences.
106 |     old_occurrences = old_records
107 |     new_occurrences_new_locations = []
108 |     new_occurrences_new_lines = []
109 |     for new_record in new_records:
110 |         (new_location, new_line) = (True, True)
111 |         for old_record in old_occurrences:
112 |             if old_record["Location"] == new_record["Location"]:
113 |                 new_location = False
114 |                 if old_record["Line"] == new_record["Line"]:
115 |                     new_line = False
116 |                     break
117 |         if new_location:
118 |             if new_record not in new_occurrences_new_locations:
119 |                 new_occurrences_new_locations.append(new_record)
120 |         elif new_line:
121 |             if new_record not in new_occurrences_new_lines:
122 |                 new_occurrences_new_lines.append(new_record)
123 | 
124 |     return sorted(
125 |         new_occurrences_new_locations, key=_record_to_location_tuple
126 |     ) + sorted(new_occurrences_new_lines, key=_record_to_location_tuple)
127 | 
128 | 
129 | def _calc_diff(
130 |     old_sarif: sarif_file.SarifFileSet, new_sarif: sarif_file.SarifFileSet
131 | ) -> Dict:
132 |     """
133 |     Generate a diff of the issues from the SARIF files.
134 |     old_sarif corresponds to the old files.
135 |     new_sarif corresponds to the new files.
136 |     Return dict has keys "error", "warning", "note", "none" (if present) and "all".
137 |     """
138 |     ret = {"all": {"+": 0, "-": 0}}
139 |     old_report = old_sarif.get_report()
140 |     new_report = new_sarif.get_report()
141 |     # Include `none` in the list of severities if there are any `none` records in either the old
142 |     # or new report.
143 |     severities = (
144 |         old_report.get_severities()
145 |         if old_report.any_none_severities()
146 |         else new_report.get_severities()
147 |     )
148 |     for severity in severities:
149 |         old_histogram = old_report.get_issue_type_histogram_for_severity(severity)
150 |         new_histogram = new_report.get_issue_type_histogram_for_severity(severity)
151 |         ret[severity] = {"+": 0, "-": 0, "codes": {}}
152 |         if old_histogram != new_histogram:
153 |             for issue_key, count in new_histogram.items():
154 |                 old_count = old_histogram.pop(issue_key, 0)
155 |                 if old_count != count:
156 |                     ret[severity]["codes"][issue_key] = {"<": old_count, ">": count}
157 |                     if old_count == 0:
158 |                         ret[severity]["+"] += 1
159 |                     new_occurrences = _find_new_occurrences(
160 |                         new_report.get_issues_grouped_by_type_for_severity(
161 |                             severity
162 |                         ).get(issue_key, []),
163 |                         old_report.get_issues_grouped_by_type_for_severity(
164 |                             severity
165 |                         ).get(issue_key, []),
166 |                     )
167 |                     if new_occurrences:
168 |                         ret[severity]["codes"][issue_key]["+@"] = [
169 |                             {"Location": r["Location"], "Line": r["Line"]}
170 |                             for r in new_occurrences
171 |                         ]
172 |             for issue_key, old_count in old_histogram.items():
173 |                 ret[severity]["codes"][issue_key] = {"<": old_count, ">": 0}
174 |                 ret[severity]["-"] += 1
175 |         ret["all"]["+"] += ret[severity]["+"]
176 |         ret["all"]["-"] += ret[severity]["-"]
177 |     return ret
178 | 


--------------------------------------------------------------------------------
/sarif/operations/word_op.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate summary of given JSON or given JSON Document in to Microsoft Word Documents.
  3 | This functionality uses a python module called
  4 | 
  5 | python-docx - a Python library for creating and updating Microsoft Word (.docx) files.
  6 | 
  7 | https://python-docx.readthedocs.io/
  8 | 
  9 | """
 10 | 
 11 | from datetime import datetime
 12 | import os
 13 | from typing import Union
 14 | 
 15 | import docx
 16 | from docx import oxml
 17 | from docx import shared
 18 | from docx.enum import text
 19 | from docx.oxml import ns
 20 | 
 21 | from sarif import charts, sarif_file
 22 | from sarif.sarif_file_utils import combine_record_code_and_description
 23 | 
 24 | 
 25 | def generate_word_docs_from_sarif_inputs(
 26 |     input_files: sarif_file.SarifFileSet,
 27 |     image_file: Union[str, None],
 28 |     output: str,
 29 |     output_multiple_files: bool,
 30 |     date_val: datetime = datetime.now(),
 31 | ):
 32 |     """
 33 |     Convert SARIF input to Word file output.
 34 |     """
 35 |     if not input_files:
 36 |         raise ValueError("No input files specified!")
 37 | 
 38 |     output_file = output
 39 |     output_file_name = output
 40 |     if output_multiple_files:
 41 |         for input_file in input_files:
 42 |             output_file_name = input_file.get_file_name_without_extension() + ".docx"
 43 |             print(
 44 |                 "Writing Word summary of",
 45 |                 input_file.get_file_name(),
 46 |                 "to",
 47 |                 output_file_name,
 48 |             )
 49 |             report = input_file.get_report()
 50 |             _generate_word_summary(
 51 |                 input_file,
 52 |                 report,
 53 |                 os.path.join(output, output_file_name),
 54 |                 image_file,
 55 |                 date_val,
 56 |             )
 57 |         output_file_name = "static_analysis_output.docx"
 58 |         output_file = os.path.join(output, output_file_name)
 59 | 
 60 |     source_description = input_files.get_description()
 61 |     print("Writing Word summary of", source_description, "to", output_file_name)
 62 |     report = input_files.get_report()
 63 |     _generate_word_summary(input_files, report, output_file, image_file, date_val)
 64 | 
 65 | 
 66 | def _generate_word_summary(
 67 |     sarif_data, report, output_file, image_file: Union[str, None], date_val: datetime
 68 | ):
 69 |     # Create a new document
 70 |     document = docx.Document()
 71 | 
 72 |     severities = report.get_severities()
 73 |     _add_heading_and_highlevel_info(
 74 |         document, sarif_data, report, severities, output_file, image_file, date_val
 75 |     )
 76 |     _dump_errors_summary_by_sev(document, report, severities)
 77 |     _dump_each_error_in_detail(document, report, severities)
 78 | 
 79 |     # finally, save the document.
 80 |     document.save(output_file)
 81 | 
 82 | 
 83 | def _add_heading_and_highlevel_info(
 84 |     document,
 85 |     sarif_data,
 86 |     report,
 87 |     severities,
 88 |     output_file,
 89 |     image_path: Union[str, None],
 90 |     date_val: datetime,
 91 | ):
 92 |     tool_name = ", ".join(sarif_data.get_distinct_tool_names())
 93 |     heading = f"Sarif Summary: {tool_name}"
 94 | 
 95 |     if image_path:
 96 |         document.add_picture(image_path)
 97 |         last_paragraph = document.paragraphs[-1]
 98 |         last_paragraph.alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER
 99 | 
100 |     document.add_heading(heading, 0)
101 |     document.add_paragraph(f"Document generated on: {date_val}")
102 | 
103 |     sevs = ", ".join(severities)
104 |     document.add_paragraph(
105 |         f"Total number of various severities ({sevs}): {sarif_data.get_result_count()}"
106 |     )
107 |     filter_stats = sarif_data.get_filter_stats()
108 |     if filter_stats:
109 |         document.add_paragraph(f"Results were filtered by {filter_stats}.")
110 | 
111 |     pie_chart_image_file_path = output_file.replace(".docx", "_severity_pie_chart.png")
112 |     if charts.generate_severity_pie_chart(report, pie_chart_image_file_path):
113 |         document.add_picture(pie_chart_image_file_path)
114 |     last_paragraph = document.paragraphs[-1]
115 |     last_paragraph.alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER
116 | 
117 |     document.add_page_break()
118 | 
119 | 
120 | def _dump_errors_summary_by_sev(document, report, severities):
121 |     """
122 |     For each severity level (in priority order): create a list of the errors of
123 |     that severity, print out how many there are and then do some further analysis
124 |     of which error codes are present.
125 |     """
126 |     for severity in severities:
127 |         errors_of_severity = report.get_issue_type_count_for_severity(severity)
128 |         document.add_heading(f"Severity : {severity} [ {errors_of_severity} ]", level=1)
129 |         sorted_dict = report.get_issue_type_histogram_for_severity(severity)
130 |         if sorted_dict:
131 |             for key, count in sorted_dict.items():
132 |                 document.add_paragraph(f"{key}: {count}", style="List Bullet")
133 |         else:
134 |             document.add_paragraph("None", style="List Bullet")
135 | 
136 | 
137 | def _dump_each_error_in_detail(document, report, severities):
138 |     """
139 |     Write out the errors to a table so that a human can do further analysis.
140 |     """
141 |     document.add_page_break()
142 | 
143 |     for severity in severities:
144 |         errors_of_severity = report.get_issues_for_severity(severity)
145 |         # Sample:
146 |         # [{'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_coverage.py', 'Line': 119,
147 |         #       'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'},
148 |         # {'Location': 'C:\\Max\\AccessionAndroid\\scripts\\parse_code_stats.py', 'Line': 61,
149 |         #       'Severity': 'error', 'Code': 'DS126186 Disabled certificate validation'},
150 |         # ]
151 |         if errors_of_severity:
152 |             document.add_heading(f"Severity : {severity}", level=2)
153 |             table = document.add_table(rows=1 + len(errors_of_severity), cols=3)
154 | 
155 |             table.style = "Table Grid"  # ColorfulGrid-Accent5'
156 |             table.autofit = False
157 | 
158 |             table.alignment = text.WD_TAB_ALIGNMENT.CENTER
159 | 
160 |             # Cell widths
161 |             widths = [shared.Inches(2), shared.Inches(4), shared.Inches(0.5)]
162 | 
163 |             # To avoid performance problems with large tables, prepare the entries first in this
164 |             # list, then iterate the table cells and copy them in.
165 |             # First populate the header row
166 |             cells_text = ["Code", "Location", "Line"]
167 | 
168 |             hdr_cells = table.rows[0].cells
169 |             for i in range(3):
170 |                 table.rows[0].cells[i]._tc.get_or_add_tcPr().append(
171 |                     oxml.parse_xml(
172 |                         r'<w:shd {} w:fill="5fe3d8"/>'.format(ns.nsdecls("w"))
173 |                     )
174 |                 )
175 |                 run = hdr_cells[i].paragraphs[0].add_run(cells_text[i])
176 |                 run.bold = True
177 |                 hdr_cells[i].paragraphs[
178 |                     0
179 |                 ].alignment = text.WD_PARAGRAPH_ALIGNMENT.CENTER
180 |                 hdr_cells[i].width = widths[i]
181 | 
182 |             for eachrow in errors_of_severity:
183 |                 cells_text += [
184 |                     combine_record_code_and_description(eachrow),
185 |                     eachrow["Location"],
186 |                     str(eachrow["Line"]),
187 |                 ]
188 | 
189 |             # Note: using private property table._cells to avoid performance issue.  See
190 |             # https://stackoverflow.com/a/69105798/316578
191 |             col_index = 0
192 |             for cell, cell_text in zip(table._cells, cells_text):
193 |                 cell.text = cell_text
194 |                 cell.width = widths[col_index]
195 |                 col_index = col_index + 1 if col_index < 2 else 0
196 |         else:
197 |             document.add_heading(f"Severity : {severity}", level=2)
198 |             document.add_paragraph("None", style="List Bullet")
199 | 


--------------------------------------------------------------------------------
/tests/test_sarif_file_utils.py:
--------------------------------------------------------------------------------
  1 | from sarif import sarif_file_utils
  2 | 
  3 | 
  4 | def test_combine_code_and_description_short():
  5 |     cd = sarif_file_utils.combine_code_and_description(
  6 |         "ABC123", "Some short description"
  7 |     )
  8 |     assert cd == "ABC123 Some short description"
  9 |     assert len(cd) <= 120
 10 | 
 11 | 
 12 | def test_combine_code_and_description_long_desc():
 13 |     cd = sarif_file_utils.combine_code_and_description(
 14 |         "ABC123", " ".join(f"blah{i}" for i in range(1, 30))
 15 |     )
 16 |     assert (
 17 |         cd
 18 |         == "ABC123 blah1 blah2 blah3 blah4 blah5 blah6 blah7 blah8 blah9 blah10 blah11 blah12 blah13 blah14 blah15 blah16 ..."
 19 |     )
 20 |     assert len(cd) <= 120
 21 | 
 22 | 
 23 | def test_combine_code_and_description_long_code():
 24 |     long_code = "".join(f"A{i}" for i in range(1, 36)) + "BC"
 25 |     assert (
 26 |         len(long_code) == 98
 27 |     ), "98 is right length to hit 'placeholder too large for max width' without defensive code"
 28 |     cd = sarif_file_utils.combine_code_and_description(
 29 |         long_code, "wow that's a long code"
 30 |     )
 31 |     assert cd == f"{long_code} wow that's a ..."
 32 |     assert len(cd) <= 120
 33 |     long_code = "".join(f"A{i}" for i in range(1, 50))
 34 |     cd = sarif_file_utils.combine_code_and_description(
 35 |         long_code, "wow that's a long code"
 36 |     )
 37 |     assert cd == long_code
 38 | 
 39 | 
 40 | def test_read_result_rule():
 41 |     run = {
 42 |         "tool": {
 43 |             "driver": {
 44 |                 "rules": [
 45 |                     {"id": "id0", "defaultConfiguration": {"level": "none"}},
 46 |                     {"id": "id1", "defaultConfiguration": {"level": "error"}},
 47 |                 ]
 48 |             }
 49 |         }
 50 |     }
 51 |     rule_id0 = run["tool"]["driver"]["rules"][0]
 52 |     rule_id1 = run["tool"]["driver"]["rules"][1]
 53 | 
 54 |     result = {}
 55 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
 56 |     assert rule is None
 57 |     assert ruleIndex == -1
 58 | 
 59 |     result = {"ruleIndex": 1}
 60 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
 61 |     assert rule == rule_id1
 62 |     assert ruleIndex == 1
 63 | 
 64 |     result = {"rule": {"index": 1}}
 65 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
 66 |     assert rule == rule_id1
 67 |     assert ruleIndex == 1
 68 | 
 69 |     result = {"ruleId": "id1"}
 70 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
 71 |     assert rule == rule_id1
 72 |     assert ruleIndex == 1
 73 | 
 74 |     result = {"rule": {"id": "id1"}}
 75 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
 76 |     assert rule == rule_id1
 77 |     assert ruleIndex == 1
 78 | 
 79 |     result = {"ruleIndex": 0}
 80 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, run)
 81 |     assert rule == rule_id0
 82 |     assert ruleIndex == 0
 83 | 
 84 |     result = {"ruleIndex": 0}
 85 |     (rule, ruleIndex) = sarif_file_utils.read_result_rule(result, {})
 86 |     assert rule is None
 87 |     assert ruleIndex == -1
 88 | 
 89 | 
 90 | def test_read_result_invocation():
 91 |     run = {"invocations": [{"foo": 1}, {"bar": "baz"}]}
 92 | 
 93 |     result = {}
 94 |     invocation = sarif_file_utils.read_result_invocation(result, run)
 95 |     assert invocation is None
 96 | 
 97 |     result = {"provenance": {}}
 98 |     invocation = sarif_file_utils.read_result_invocation(result, run)
 99 |     assert invocation is None
100 | 
101 |     result = {"provenance": {"invocationIndex": 0}}
102 |     invocation = sarif_file_utils.read_result_invocation(result, {})
103 |     assert invocation is None
104 | 
105 |     result = {"provenance": {"invocationIndex": -1}}
106 |     invocation = sarif_file_utils.read_result_invocation(result, run)
107 |     assert invocation is None
108 | 
109 |     result = {"provenance": {"invocationIndex": 2}}
110 |     invocation = sarif_file_utils.read_result_invocation(result, run)
111 |     assert invocation is None
112 | 
113 |     result = {"provenance": {"invocationIndex": 1}}
114 |     invocation = sarif_file_utils.read_result_invocation(result, run)
115 |     assert invocation == run["invocations"][1]
116 | 
117 | 
118 | def test_read_result_severity():
119 |     result = {"level": "error"}
120 |     severity = sarif_file_utils.read_result_severity(result, {})
121 |     assert severity == "error"
122 | 
123 |     # If kind has any value other than "fail", then if level is absent, it SHALL default to "none"...
124 |     result = {"kind": "other"}
125 |     severity = sarif_file_utils.read_result_severity(result, {})
126 |     assert severity == "none"
127 | 
128 |     run = {
129 |         "invocations": [
130 |             {
131 |                 "ruleConfigurationOverrides": [
132 |                     {"descriptor": {"id": "id1"}, "configuration": {"level": "note"}}
133 |                 ]
134 |             },
135 |             {
136 |                 "ruleConfigurationOverrides": [
137 |                     {"descriptor": {"index": 1}, "configuration": {"level": "note"}}
138 |                 ]
139 |             },
140 |             {},
141 |         ],
142 |         "tool": {
143 |             "driver": {
144 |                 "rules": [
145 |                     {"id": "id0", "defaultConfiguration": {"level": "none"}},
146 |                     {"id": "id1", "defaultConfiguration": {"level": "error"}},
147 |                 ]
148 |             }
149 |         },
150 |     }
151 | 
152 |     # If kind has the value "fail" and level is absent, then level SHALL be determined by the following procedure:
153 |     # IF rule is present THEN
154 |     #   LET theDescriptor be the reportingDescriptor object that it specifies.
155 |     #   # Is there a configuration override for the level property?
156 |     #   IF result.provenance.invocationIndex is >= 0 THEN
157 |     #     LET theInvocation be the invocation object that it specifies.
158 |     #     IF theInvocation.ruleConfigurationOverrides is present
159 |     #         AND it contains a configurationOverride object whose
160 |     #         descriptor property specifies theDescriptor THEN
161 |     #       LET theOverride be that configurationOverride object.
162 |     #       IF theOverride.configuration.level is present THEN
163 |     #         Set level to theConfiguration.level.
164 |     result = {"ruleIndex": 1, "provenance": {"invocationIndex": 0}}
165 |     severity = sarif_file_utils.read_result_severity(result, run)
166 |     assert severity == "note"
167 | 
168 |     result = {"ruleIndex": 1, "provenance": {"invocationIndex": 1}}
169 |     severity = sarif_file_utils.read_result_severity(result, run)
170 |     assert severity == "note"
171 | 
172 |     #   ELSE
173 |     #     # There is no configuration override for level. Is there a default configuration for it?
174 |     #     IF theDescriptor.defaultConfiguration.level is present THEN
175 |     #       SET level to theDescriptor.defaultConfiguration.level.
176 | 
177 |     result = {"ruleIndex": 1}
178 |     severity = sarif_file_utils.read_result_severity(result, run)
179 |     assert severity == "error"
180 | 
181 |     result = {"rule": {"index": 1}}
182 |     severity = sarif_file_utils.read_result_severity(result, run)
183 |     assert severity == "error"
184 | 
185 |     result = {"ruleId": "id1"}
186 |     severity = sarif_file_utils.read_result_severity(result, run)
187 |     assert severity == "error"
188 | 
189 |     result = {"rule": {"id": "id1"}}
190 |     severity = sarif_file_utils.read_result_severity(result, run)
191 |     assert severity == "error"
192 | 
193 |     result = {"ruleIndex": 1, "provenance": {"invocationIndex": 2}}
194 |     severity = sarif_file_utils.read_result_severity(result, run)
195 |     assert severity == "error"
196 | 
197 |     # IF level has not yet been set THEN
198 |     #   SET level to "warning".
199 |     result = {}
200 |     severity = sarif_file_utils.read_result_severity(result, {})
201 |     assert severity == "warning"
202 | 
203 |     result = {"ruleIndex": -1}
204 |     severity = sarif_file_utils.read_result_severity(result, {})
205 |     assert severity == "warning"
206 | 


--------------------------------------------------------------------------------
/tests/ops/blame/test_blame.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | import datetime
  3 | import json
  4 | import jsonschema
  5 | import os
  6 | import tempfile
  7 | from typing import Callable, Dict, List
  8 | 
  9 | from sarif.operations import blame_op
 10 | from sarif import sarif_file
 11 | from tests.utils import get_sarif_schema
 12 | 
 13 | ERROR_FILE_RELATIVE_PATH = "subdir/file.py"
 14 | ERROR_FILE_ABSOLUTE_PATH = "file:///C:/repo/subdir/file.py"
 15 | 
 16 | SARIF_FILE = {
 17 |     "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
 18 |     "version": "2.1.0",
 19 |     "runs": [
 20 |         {
 21 |             "tool": {"driver": {"name": "unit test"}},
 22 |             "results": [
 23 |                 {
 24 |                     "ruleId": "CA2101",
 25 |                     "message": {"text": "just testing"},
 26 |                     "level": "error",
 27 |                     "locations": [
 28 |                         {
 29 |                             "physicalLocation": {
 30 |                                 "artifactLocation": {
 31 |                                     "uri": ERROR_FILE_ABSOLUTE_PATH,
 32 |                                     "index": 0,
 33 |                                 },
 34 |                                 "region": {"startLine": 3, "startColumn": 9},
 35 |                             }
 36 |                         }
 37 |                     ],
 38 |                 }
 39 |             ],
 40 |         }
 41 |     ],
 42 | }
 43 | 
 44 | GIT_BLAME_OUTPUT = [
 45 |     "f9db03438aba52affc5c3fcdb619afa620ad603a 1 1 7\n",
 46 |     "author Taylor Developer\n",
 47 |     "author-mail <taylor@developer.com>\n",
 48 |     "author-time 1699272533\n",
 49 |     "author-tz +0000\n",
 50 |     "committer GitHub\n",
 51 |     "committer-mail <noreply@github.com>\n",
 52 |     "committer-time 1699272533\n",
 53 |     "committer-tz +0000\n",
 54 |     "summary Commit message 1\n",
 55 |     "filename " + ERROR_FILE_RELATIVE_PATH + "\n",
 56 |     "\tFile text line 1\n",
 57 |     "f9db03438aba52affc5c3fcdb619afa620ad603a 2 2\n",
 58 |     "\tFile text line 2\n",
 59 |     "f9db03438aba52affc5c3fcdb619afa620ad603a 3 3\n",
 60 |     "\tFile text line 3\n",
 61 |     "eec0471db074a037d820abdda1f210f8a8c987ca 4 4 1\n",
 62 |     "author Other Developer\n",
 63 |     "author-mail <other@developer.com>\n",
 64 |     "author-time 1718035364\n",
 65 |     "author-tz +0100\n",
 66 |     "committer GitHub\n",
 67 |     "committer-mail <noreply@github.com>\n",
 68 |     "committer-time 1718035364\n",
 69 |     "committer-tz +0100\n",
 70 |     "summary Commit message 2\n",
 71 |     "filename " + ERROR_FILE_RELATIVE_PATH + "\n",
 72 |     "\tFile text line 4\n",
 73 |     "6732313c320314c122bd00aa40e7c79954f21c15 5 5 1\n",
 74 |     "author Another Developer\n",
 75 |     "author-mail <another@developer.com>\n",
 76 |     "author-time 1727710690\n",
 77 |     "author-tz -0700\n",
 78 |     "committer GitHub\n",
 79 |     "committer-mail <noreply@github.com>\n",
 80 |     "committer-time 1727710690\n",
 81 |     "committer-tz -0700\n",
 82 |     "summary Commit message 3\n",
 83 |     "filename " + ERROR_FILE_RELATIVE_PATH + "\n",
 84 |     "\tFile text line 5\n",
 85 |     "6732313c320314c122bd00aa40e7c79954f21c15 6 6\n",
 86 |     "\tFile text line 6\n",
 87 | ]
 88 | 
 89 | 
 90 | def test_blame_no_blame_info():
 91 |     input_sarif_file = sarif_file.SarifFile(
 92 |         "SARIF_FILE", SARIF_FILE, mtime=datetime.datetime.now()
 93 |     )
 94 |     input_sarif_file_set = sarif_file.SarifFileSet()
 95 |     input_sarif_file_set.files.append(input_sarif_file)
 96 | 
 97 |     with tempfile.TemporaryDirectory() as tmp:
 98 |         repo_path = os.path.join(tmp, "repo")
 99 |         os.makedirs(repo_path)
100 |         output_file_path = os.path.join(tmp, "blamed.json")
101 | 
102 |         blame_op.enhance_with_blame(
103 |             input_sarif_file_set,
104 |             repo_path,
105 |             output_file_path,
106 |             output_multiple_files=False,
107 |             run_git_blame=lambda repo_path, file_path: [],
108 |         )
109 | 
110 |         assert not os.path.isfile(output_file_path)
111 | 
112 | 
113 | def blame_test(
114 |     run_git_blame: Callable[[str, str], List[bytes]],
115 |     expected_blame_properties: Dict[str, Dict[str, str]],
116 | ):
117 |     input_sarif_file = sarif_file.SarifFile(
118 |         "SARIF_FILE", SARIF_FILE, mtime=datetime.datetime.now()
119 |     )
120 |     input_sarif_file_set = sarif_file.SarifFileSet()
121 |     input_sarif_file_set.files.append(input_sarif_file)
122 | 
123 |     with tempfile.TemporaryDirectory() as tmp:
124 |         repo_path = os.path.join(tmp, "repo")
125 |         os.makedirs(repo_path)
126 |         output_file_path = os.path.join(tmp, "blamed.json")
127 | 
128 |         def run_git_blame_wrapper(
129 |             blame_repo_path: str, blame_file_path: str
130 |         ) -> List[bytes]:
131 |             assert blame_repo_path == repo_path
132 |             assert blame_file_path == ERROR_FILE_ABSOLUTE_PATH
133 |             return run_git_blame(blame_repo_path, blame_file_path)
134 | 
135 |         blame_op.enhance_with_blame(
136 |             input_sarif_file_set,
137 |             repo_path,
138 |             output_file_path,
139 |             output_multiple_files=False,
140 |             run_git_blame=run_git_blame_wrapper,
141 |         )
142 | 
143 |         with open(output_file_path, "rb") as f_out:
144 |             output_sarif = json.load(f_out)
145 |         jsonschema.validate(output_sarif, schema=get_sarif_schema())
146 | 
147 |         expected_sarif = deepcopy(input_sarif_file.data)
148 |         expected_sarif["runs"][0]["results"][0]["properties"] = (
149 |             expected_blame_properties
150 |         )
151 |         assert output_sarif == expected_sarif
152 | 
153 | 
154 | def test_blame_success():
155 |     def run_git_blame(blame_repo_path: str, blame_file_path: str) -> List[bytes]:
156 |         return [x.encode() for x in GIT_BLAME_OUTPUT]
157 | 
158 |     expected_blame_properties = {
159 |         "blame": {
160 |             "commit": "f9db03438aba52affc5c3fcdb619afa620ad603a",
161 |             "author": "Taylor Developer",
162 |             "author-mail": "<taylor@developer.com>",
163 |             "author-time": "1699272533",
164 |             "author-tz": "+0000",
165 |             "committer": "GitHub",
166 |             "committer-mail": "<noreply@github.com>",
167 |             "committer-time": "1699272533",
168 |             "committer-tz": "+0000",
169 |             "summary": "Commit message 1",
170 |             "filename": ERROR_FILE_RELATIVE_PATH,
171 |         }
172 |     }
173 | 
174 |     blame_test(run_git_blame, expected_blame_properties)
175 | 
176 | 
177 | GIT_BLAME_OUTPUT_WITH_INVALID_UTF8 = [
178 |     b"f9db03438aba52affc5c3fcdb619afa620ad603a 1 1 7\n",
179 |     b"author Taylor Developer\n",
180 |     b"author-mail <taylor@developer.com>\n",
181 |     b"author-time 1699272533\n",
182 |     b"author-tz +0000\n",
183 |     b"committer GitHub\n",
184 |     b"committer-mail <noreply@github.com>\n",
185 |     b"committer-time 1699272533\n",
186 |     b"committer-tz +0000\n",
187 |     b"summary Commit message \x80\n",
188 |     b"filename " + ERROR_FILE_RELATIVE_PATH.encode() + b"\n",
189 |     b"\tFile text line 1\n",
190 |     b"f9db03438aba52affc5c3fcdb619afa620ad603a 2 2\n",
191 |     b"\tFile text line 2\n",
192 |     b"f9db03438aba52affc5c3fcdb619afa620ad603a 3 3\n",
193 |     b"\tFile text line 3\n",
194 |     b"eec0471db074a037d820abdda1f210f8a8c987ca 4 4 1\n",
195 | ]
196 | 
197 | 
198 | def test_blame_invalid_utf8():
199 |     def run_git_blame(blame_repo_path: str, blame_file_path: str) -> List[bytes]:
200 |         return GIT_BLAME_OUTPUT_WITH_INVALID_UTF8
201 | 
202 |     expected_blame_properties = {
203 |         "blame": {
204 |             "commit": "f9db03438aba52affc5c3fcdb619afa620ad603a",
205 |             "author": "Taylor Developer",
206 |             "author-mail": "<taylor@developer.com>",
207 |             "author-time": "1699272533",
208 |             "author-tz": "+0000",
209 |             "committer": "GitHub",
210 |             "committer-mail": "<noreply@github.com>",
211 |             "committer-time": "1699272533",
212 |             "committer-tz": "+0000",
213 |             "summary": "Commit message �",
214 |             "filename": ERROR_FILE_RELATIVE_PATH,
215 |         }
216 |     }
217 | 
218 |     blame_test(run_git_blame, expected_blame_properties)
219 | 


--------------------------------------------------------------------------------
/sarif/sarif_file_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reusable utility functions for handling the SARIF format.
  3 | 
  4 | Primarily interrogating the `result` JSON defined at
  5 | https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012594
  6 | """
  7 | 
  8 | import textwrap
  9 | from typing import Literal, Tuple, Union
 10 | 
 11 | # SARIF severity levels as per
 12 | # https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790898
 13 | SARIF_SEVERITIES_WITHOUT_NONE = ["error", "warning", "note"]
 14 | SARIF_SEVERITIES_WITH_NONE = SARIF_SEVERITIES_WITHOUT_NONE + ["none"]
 15 | 
 16 | 
 17 | def combine_code_and_description(code: str, description: str) -> str:
 18 |     """
 19 |     Combine code and description into one string, keeping total length under 120 characters.
 20 |     """
 21 |     length_budget = 120
 22 |     if code:
 23 |         code = code.strip()
 24 |         length_budget -= len(code) + 1  # Allow issue code and space character
 25 |     continuation_placeholder = " ..."
 26 |     # Allow extra space when truncating for continuation characters
 27 |     length_budget_pre_continuation = length_budget - len(continuation_placeholder)
 28 |     if length_budget_pre_continuation < 10:
 29 |         # Don't include description if it would be very short due to long code
 30 |         return code
 31 |     if description:
 32 |         if "\n" in description:
 33 |             description = description[: description.index("\n")]
 34 |         if description.startswith(code):
 35 |             # Don't duplicate the code
 36 |             description = description[len(code) :]
 37 |         description = description.strip()
 38 |     if description:
 39 |         if len(description) > length_budget:
 40 |             shorter_description = textwrap.shorten(
 41 |                 description,
 42 |                 width=length_budget_pre_continuation,
 43 |                 placeholder=continuation_placeholder,
 44 |             )
 45 |             if len(shorter_description) < length_budget_pre_continuation - 40:
 46 |                 # Word wrap shortens the description significantly, so truncate mid-word instead
 47 |                 description = (
 48 |                     description[:length_budget_pre_continuation]
 49 |                     + continuation_placeholder
 50 |                 )
 51 |             else:
 52 |                 description = shorter_description
 53 |         if code:
 54 |             return f"{code.strip()} {description}"
 55 |         return description
 56 |     if code:
 57 |         return code
 58 |     return "<NONE>"
 59 | 
 60 | 
 61 | def combine_record_code_and_description(record: dict) -> str:
 62 |     """
 63 |     Combine code and description fields into one string.
 64 |     """
 65 |     return combine_code_and_description(record["Code"], record["Description"])
 66 | 
 67 | 
 68 | def read_result_location(result) -> Tuple[str, str]:
 69 |     """
 70 |     Extract the file path and line number strings from the Result.
 71 | 
 72 |     Tools store this in different ways, so this function tries a few different JSON locations.
 73 |     """
 74 |     file_path = None
 75 |     line_number = None
 76 |     locations = result.get("locations", [])
 77 |     if locations and isinstance(locations, list):
 78 |         location = locations[0]
 79 |         physical_location = location.get("physicalLocation", {})
 80 |         # SpotBugs has some errors with no line number so deal with them by just leaving it at 1
 81 |         line_number = physical_location.get("region", {}).get("startLine", None)
 82 |         # For file name, first try the location written by DevSkim
 83 |         file_path = (
 84 |             location.get("physicalLocation", {})
 85 |             .get("address", {})
 86 |             .get("fullyQualifiedName", None)
 87 |         )
 88 |         if not file_path:
 89 |             # Next try the physical location written by MobSF and by SpotBugs (for some errors)
 90 |             file_path = (
 91 |                 location.get("physicalLocation", {})
 92 |                 .get("artifactLocation", {})
 93 |                 .get("uri", None)
 94 |             )
 95 |         if not file_path:
 96 |             logical_locations = location.get("logicalLocations", None)
 97 |             if logical_locations:
 98 |                 # Finally, try the logical location written by SpotBugs for some errors
 99 |                 file_path = logical_locations[0].get("fullyQualifiedName", None)
100 |     return (file_path, line_number)
101 | 
102 | 
103 | def read_result_rule(result, run) -> Tuple[Union[dict, None], int]:
104 |     """
105 |     Returns the corresponding rule object for the specified result, plus its index
106 |     in the rules array. Follows the rules at
107 |     https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790895
108 |     """
109 |     ruleIndex = result.get("ruleIndex")
110 |     ruleId = result.get("ruleId")
111 |     rule = result.get("rule")
112 | 
113 |     if rule:
114 |         if ruleIndex is None:
115 |             ruleIndex = rule.get("index")
116 | 
117 |         if ruleId is None:
118 |             ruleId = rule.get("id")
119 | 
120 |     rules = run.get("tool", {}).get("driver", {}).get("rules", [])
121 | 
122 |     if ruleIndex is not None and ruleIndex >= 0 and ruleIndex < len(rules):
123 |         return (rules[ruleIndex], ruleIndex)
124 | 
125 |     if ruleId:
126 |         for i, rule in enumerate(rules):
127 |             if rule.get("id") == ruleId:
128 |                 return (rule, i)
129 | 
130 |     return (None, -1)
131 | 
132 | 
133 | def read_result_invocation(result, run):
134 |     """
135 |     Extract the invocation metadata for the result, following the rules at
136 |     https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790917
137 |     """
138 |     invocationIndex = result.get("provenance", {}).get("invocationIndex")
139 |     if invocationIndex is None:
140 |         return None
141 | 
142 |     invocations = run.get("invocations")
143 | 
144 |     if invocations and invocationIndex >= 0 and invocationIndex < len(invocations):
145 |         return invocations[invocationIndex]
146 | 
147 |     return None
148 | 
149 | 
150 | def read_result_severity(result, run) -> Literal["none", "note", "warning", "error"]:
151 |     """
152 |     Extract the severity level from the result following the rules at
153 |     https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html#_Toc141790898
154 |     """
155 |     severity = result.get("level")
156 |     if severity:
157 |         return severity
158 | 
159 |     # If kind has any value other than "fail", then if level is absent,
160 |     # it SHALL default to "none"
161 |     kind = result.get("kind", "fail")
162 |     if kind and kind != "fail":
163 |         return "none"
164 | 
165 |     # If kind has the value "fail" and level is absent, then...
166 |     rule, ruleIndex = read_result_rule(result, run)
167 |     if rule:
168 |         # Honor the invocation's configuration override if present...
169 |         invocation = read_result_invocation(result, run)
170 |         if invocation:
171 |             ruleConfigurationOverrides = invocation.get(
172 |                 "ruleConfigurationOverrides", []
173 |             )
174 |             override = next(
175 |                 (
176 |                     override
177 |                     for override in ruleConfigurationOverrides
178 |                     if override.get("descriptor", {}).get("id") == rule.get("id")
179 |                     or override.get("descriptor", {}).get("index") == ruleIndex
180 |                 ),
181 |                 None,
182 |             )
183 | 
184 |             if override:
185 |                 overrideLevel = override.get("configuration", {}).get("level")
186 |                 if overrideLevel:
187 |                     return overrideLevel
188 | 
189 |         # Otherwise, use the rule's default configuraiton if present...
190 |         defaultConfiguration = rule.get("defaultConfiguration")
191 |         if defaultConfiguration:
192 |             severity = defaultConfiguration.get("level")
193 |             if severity:
194 |                 return severity
195 | 
196 |     # Otherwise, fall back to warning
197 |     return "warning"
198 | 
199 | 
200 | def record_sort_key(record: dict) -> str:
201 |     """Get a sort key for the record."""
202 |     return (
203 |         combine_record_code_and_description(record)
204 |         + record["Location"]
205 |         + str(record["Line"]).zfill(6)
206 |     )
207 | 


--------------------------------------------------------------------------------
/sarif/filter/general_filter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SARIF file filtering functionality.
  3 | """
  4 | 
  5 | import os
  6 | import re
  7 | from typing import Optional, List
  8 | 
  9 | import copy
 10 | import jsonpath_ng.ext
 11 | import yaml
 12 | 
 13 | from sarif import sarif_file_utils
 14 | from sarif.filter.filter_stats import FilterStats, load_filter_stats_from_json
 15 | 
 16 | # Commonly used properties can be specified using shortcuts
 17 | # instead of full JSON path
 18 | FILTER_SHORTCUTS = {
 19 |     "author": "properties.blame.author",
 20 |     "author-mail": "properties.blame.author-mail",
 21 |     "committer": "properties.blame.committer",
 22 |     "committer-mail": "properties.blame.committer-mail",
 23 |     "location": "locations[*].physicalLocation.artifactLocation.uri",
 24 |     "rule": "ruleId",
 25 |     "suppression": "suppressions[*].kind",
 26 | }
 27 | 
 28 | # Some properties can have specific shortcuts to make it easier to write filters
 29 | # For example a file location can be specified using wildcards
 30 | FIELDS_REGEX_SHORTCUTS = {"uri": {"**": ".*", "*": "[^/]*", "?": "."}}
 31 | 
 32 | # Default configuration for all filters
 33 | DEFAULT_CONFIGURATION = {
 34 |     "default-include": True,
 35 |     "check-line-number": True,
 36 | }
 37 | 
 38 | 
 39 | def _get_filter_function(filter_spec):
 40 |     """Return a filter function for the given specification."""
 41 |     if filter_spec:
 42 |         filter_len = len(filter_spec)
 43 |         if filter_len > 2 and filter_spec.startswith("/") and filter_spec.endswith("/"):
 44 |             regex = filter_spec[1:-1]
 45 |             return lambda value: re.search(regex, value, re.IGNORECASE)
 46 |         substring = filter_spec
 47 |         # substring can be empty, in this case "in" returns true
 48 |         # and only existence of the property checked.
 49 |         return lambda value: substring in value
 50 |     return lambda value: True
 51 | 
 52 | 
 53 | def _convert_glob_to_regex(property_name, property_value_spec):
 54 |     # skip if property_value_spec is a regex
 55 |     if property_value_spec and not (
 56 |         property_value_spec.startswith("/") and property_value_spec.endswith("/")
 57 |     ):
 58 |         # get last component of property name
 59 |         last_component = property_name.split(".")[-1]
 60 |         if last_component in FIELDS_REGEX_SHORTCUTS:
 61 |             shortcuts = FIELDS_REGEX_SHORTCUTS[last_component]
 62 |             regex = re.compile("|".join(map(re.escape, shortcuts.keys())))
 63 |             property_value_spec = regex.sub(
 64 |                 lambda match: shortcuts[match.group(0)], property_value_spec
 65 |             )
 66 | 
 67 |             return f"/{property_value_spec}/"
 68 |     return property_value_spec
 69 | 
 70 | 
 71 | class PropertyFilter:
 72 |     """
 73 |     Class that represents a filter term ready for efficient use.
 74 |     """
 75 | 
 76 |     def __init__(self, prop_path, prop_value_spec, global_configuration):
 77 |         """
 78 |         Compile a filter property.  See README for the filter spec format.
 79 | 
 80 |         :param prop_path: JsonPath or preset.
 81 |         :param prop_value_spec: Value spec.
 82 |         :param global_configuration: Global configuration of the filter.
 83 |         """
 84 |         self.prop_path = prop_path
 85 |         resolved_prop_path = FILTER_SHORTCUTS.get(prop_path, prop_path)
 86 |         self.jsonpath_expr = jsonpath_ng.ext.parse(resolved_prop_path)
 87 | 
 88 |         # if prop_value_spec is a dict, update filter configuration from it
 89 |         if isinstance(prop_value_spec, dict):
 90 |             self.filter_configuration = copy.deepcopy(global_configuration)
 91 |             for config_key, config_value in prop_value_spec.items():
 92 |                 if config_key != "value":
 93 |                     self.filter_configuration[config_key] = config_value
 94 |             # actual value for the filter is in "value" key
 95 |             prop_value_spec = prop_value_spec.get("value", "")
 96 |         else:
 97 |             self.filter_configuration = global_configuration
 98 |         value_spec = _convert_glob_to_regex(resolved_prop_path, prop_value_spec)
 99 |         self.filter_function = _get_filter_function(value_spec)
100 | 
101 | 
102 | class MultiPropertyFilter:
103 |     """
104 |     Class representing a list of PropertyFilter objects.
105 | 
106 |     These are combined using AND to filter results.
107 |     """
108 | 
109 |     def __init__(self, filter_spec: List[dict], global_filter_configuration: dict):
110 |         """
111 |         Initialise from a filter spec.
112 | 
113 |         See README for filter spec format.  It's a list of property paths and values to be
114 |         combined with AND to form a filter.
115 |         """
116 |         self.filter_spec = filter_spec
117 |         self.and_terms = [
118 |             PropertyFilter(prop_path, prop_value_spec, global_filter_configuration)
119 |             for prop_path, prop_value_spec in filter_spec.items()
120 |         ]
121 | 
122 | 
123 | def _compile_filters(
124 |     filters: List[dict], global_filter_configuration: dict
125 | ) -> List[MultiPropertyFilter]:
126 |     return [
127 |         MultiPropertyFilter(filter_spec, global_filter_configuration)
128 |         for filter_spec in filters
129 |         if filter_spec
130 |     ]
131 | 
132 | 
133 | class GeneralFilter:
134 |     """
135 |     Class that implements filtering.
136 |     """
137 | 
138 |     def __init__(self):
139 |         self.filter_stats = None
140 |         self.include_filters = {}
141 |         self.apply_inclusion_filter = False
142 |         self.exclude_filters = {}
143 |         self.apply_exclusion_filter = False
144 |         self.configuration = copy.deepcopy(DEFAULT_CONFIGURATION)
145 | 
146 |     def init_filter(
147 |         self, filter_description, configuration, include_filters, exclude_filters
148 |     ):
149 |         """
150 |         Initialise the filter with the given filter patterns.
151 |         """
152 |         self.filter_stats = FilterStats(filter_description)
153 |         self.configuration.update(configuration)
154 |         self.include_filters = _compile_filters(include_filters, self.configuration)
155 |         self.apply_inclusion_filter = len(include_filters) > 0
156 |         self.exclude_filters = _compile_filters(exclude_filters, self.configuration)
157 |         self.apply_exclusion_filter = len(exclude_filters) > 0
158 | 
159 |     def rehydrate_filter_stats(self, dehydrated_filter_stats, filter_datetime):
160 |         """
161 |         Restore filter stats from the SARIF file directly,
162 |         where they were recorded when the filter was previously run.
163 | 
164 |         Note that if init_filter is called,
165 |         these rehydrated stats are discarded.
166 |         """
167 |         self.filter_stats = load_filter_stats_from_json(dehydrated_filter_stats)
168 |         self.filter_stats.filter_datetime = filter_datetime
169 | 
170 |     def _zero_counts(self):
171 |         if self.filter_stats:
172 |             self.filter_stats.reset_counters()
173 | 
174 |     def _filter_append(self, filtered_results: List[dict], result: dict):
175 |         # Remove any existing filter log on the result
176 |         result.setdefault("properties", {}).pop("filtered", None)
177 | 
178 |         if self.apply_inclusion_filter:
179 |             included_stats = self._filter_result(result, self.include_filters)
180 |             if not included_stats["matchedFilter"]:
181 |                 # Result is excluded by dint of not being included
182 |                 self.filter_stats.filtered_out_result_count += 1
183 |                 return
184 |         else:
185 |             # no inclusion filters, mark the result as included so far
186 |             included_stats = {"state": "included", "matchedFilter": []}
187 | 
188 |         if self.apply_exclusion_filter:
189 |             excluded_stats = self._filter_result(result, self.exclude_filters)
190 |             if excluded_stats["matchedFilter"]:
191 |                 self.filter_stats.filtered_out_result_count += 1
192 |                 return
193 | 
194 |         included_state = included_stats["state"]
195 |         if included_state == "included":
196 |             self.filter_stats.filtered_in_result_count += 1
197 |         elif included_state == "noLineNumber":
198 |             self.filter_stats.unconvincing_line_number_count += 1
199 |         else:
200 |             self.filter_stats.missing_property_count += 1
201 |         included_stats["filter"] = self.filter_stats.filter_description
202 |         result["properties"]["filtered"] = included_stats
203 | 
204 |         filtered_results.append(result)
205 | 
206 |     def _filter_result(self, result: dict, filters: List[MultiPropertyFilter]) -> dict:
207 |         matched_filters = []
208 |         warnings = []
209 |         (_file_path, line_number) = sarif_file_utils.read_result_location(result)
210 |         unconvincing_line_number = line_number == "1" or not line_number
211 |         default_include_noprop = False
212 | 
213 |         if filters:
214 |             # filters contain rules which treated as OR.
215 |             # if any rule matches, the record is selected.
216 |             for mpf in filters:
217 |                 # filter_spec contains rules which treated as AND.
218 |                 # all rules must match to select the record.
219 |                 matched = True
220 |                 for property_filter in mpf.and_terms:
221 |                     if (
222 |                         property_filter.filter_configuration.get(
223 |                             "check-line-number", True
224 |                         )
225 |                         and unconvincing_line_number
226 |                     ):
227 |                         warnings.append(
228 |                             f"Field '{property_filter.prop_path}' not checked due to "
229 |                             "missing line number information"
230 |                         )
231 |                         continue
232 |                     found_results = property_filter.jsonpath_expr.find(result)
233 |                     if found_results:
234 |                         value = found_results[0].value
235 |                         if property_filter.filter_function(value):
236 |                             continue
237 |                     else:
238 |                         # property to filter on is not found, or skipped due to invalid line number.
239 |                         # if "default-include" is true, include the "result" with a warning.
240 |                         if property_filter.filter_configuration.get(
241 |                             "default-include", True
242 |                         ):
243 |                             warnings.append(
244 |                                 f"Field '{property_filter.prop_path}' is missing but "
245 |                                 "the result included as default-include is true"
246 |                             )
247 |                             default_include_noprop = True
248 |                             continue
249 |                     matched = False
250 |                     break
251 |                 if matched:
252 |                     matched_filters.append(mpf.filter_spec)
253 |                     break
254 | 
255 |         stats = {
256 |             "state": "included",
257 |             "matchedFilter": matched_filters,
258 |         }
259 | 
260 |         if warnings:
261 |             stats.update(
262 |                 {
263 |                     "state": "noProperty" if default_include_noprop else "noLineNumber",
264 |                     "warnings": warnings,
265 |                 }
266 |             )
267 | 
268 |         return stats
269 | 
270 |     def filter_results(self, results: List[dict]) -> List[dict]:
271 |         """
272 |         Apply this filter to a list of results,
273 |         return the results that pass the filter
274 |         and as a side-effect, update the filter stats.
275 |         """
276 |         if self.apply_inclusion_filter or self.apply_exclusion_filter:
277 |             self._zero_counts()
278 |             ret = []
279 |             for result in results:
280 |                 self._filter_append(ret, result)
281 |             return ret
282 |         # No inclusion or exclusion patterns
283 |         return results
284 | 
285 |     def get_filter_stats(self) -> Optional[FilterStats]:
286 |         """
287 |         Get the statistics from running this filter.
288 |         """
289 |         return self.filter_stats
290 | 
291 | 
292 | def load_filter_file(file_path):
293 |     """
294 |     Load a YAML filter file, return the filter description and the filters.
295 |     """
296 |     try:
297 |         file_name = os.path.basename(file_path)
298 |         with open(file_path, encoding="utf-8") as file_in:
299 |             yaml_content = yaml.safe_load(file_in)
300 |             filter_description = yaml_content.get("description", file_name)
301 |             configuration = yaml_content.get("configuration", {})
302 |             include_filters = yaml_content.get("include", {})
303 |             exclude_filters = yaml_content.get("exclude", {})
304 |     except yaml.YAMLError as error:
305 |         raise IOError(f"Cannot read filter file {file_path}") from error
306 |     return filter_description, configuration, include_filters, exclude_filters
307 | 


--------------------------------------------------------------------------------
/tests/test_general_filter.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from sarif.filter.general_filter import GeneralFilter, load_filter_file
  3 | from sarif.filter.filter_stats import load_filter_stats_from_json
  4 | 
  5 | 
  6 | class TestGeneralFilter:
  7 |     def test_init_filter(self):
  8 |         gf = GeneralFilter()
  9 | 
 10 |         gf.init_filter(
 11 |             "test filter",
 12 |             {},
 13 |             [{"author": "John Doe"}],
 14 |             [{"suppression": "not a suppression"}],
 15 |         )
 16 |         assert gf.filter_stats.filter_description == "test filter"
 17 |         assert len(gf.include_filters[0].and_terms) == 1
 18 |         assert gf.include_filters[0].and_terms[0].prop_path == "author"
 19 |         assert gf.apply_inclusion_filter is True
 20 |         assert len(gf.exclude_filters[0].and_terms) == 1
 21 |         assert gf.exclude_filters[0].and_terms[0].prop_path == "suppression"
 22 |         assert gf.apply_exclusion_filter is True
 23 | 
 24 |     def test_init_filter_no_value(self):
 25 |         gf = GeneralFilter()
 26 | 
 27 |         gf.init_filter(
 28 |             "test filter",
 29 |             {},
 30 |             [{"author": {"default-include": False}}],  # forgot "value"
 31 |             [],
 32 |         )
 33 |         assert gf.filter_stats.filter_description == "test filter"
 34 |         assert len(gf.include_filters[0].and_terms) == 1
 35 |         assert gf.include_filters[0].and_terms[0].prop_path == "author"
 36 |         assert gf.apply_inclusion_filter is True
 37 |         assert not gf.exclude_filters
 38 | 
 39 |     def test_rehydrate_filter_stats(self):
 40 |         gf = GeneralFilter()
 41 |         dehydrated_filter_stats = {
 42 |             "filter": "test filter",
 43 |             "in": 10,
 44 |             "out": 5,
 45 |             "default": {"noProperty": 3},
 46 |         }
 47 |         gf.rehydrate_filter_stats(dehydrated_filter_stats, "2022-01-01T00:00:00Z")
 48 |         assert gf.filter_stats.filtered_in_result_count == 10
 49 |         assert gf.filter_stats.filtered_out_result_count == 5
 50 |         assert gf.filter_stats.missing_property_count == 3
 51 |         assert gf.filter_stats.filter_datetime == "2022-01-01T00:00:00Z"
 52 | 
 53 |     def test_zero_counts(self):
 54 |         gf = GeneralFilter()
 55 |         gf.filter_stats = load_filter_stats_from_json(
 56 |             {"filter": "test filter", "in": 10, "out": 5, "default": {"noProperty": 3}}
 57 |         )
 58 | 
 59 |         gf._zero_counts()
 60 |         assert gf.filter_stats.filtered_in_result_count == 0
 61 |         assert gf.filter_stats.filtered_out_result_count == 0
 62 |         assert gf.filter_stats.missing_property_count == 0
 63 | 
 64 |     def test_filter_append_include(self):
 65 |         general_filter = GeneralFilter()
 66 |         general_filter.init_filter(
 67 |             "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], []
 68 |         )
 69 |         result = {"ruleId": "test-rule"}
 70 | 
 71 |         filtered_results = general_filter.filter_results([result])
 72 |         assert len(filtered_results) == 1
 73 |         assert filtered_results[0] == result
 74 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
 75 |         assert general_filter.filter_stats.filtered_in_result_count == 1
 76 |         assert general_filter.filter_stats.filtered_out_result_count == 0
 77 |         assert general_filter.filter_stats.missing_property_count == 0
 78 | 
 79 |     def test_filter_append_exclude(self):
 80 |         general_filter = GeneralFilter()
 81 |         general_filter.init_filter("test filter", {}, [], [{"level": "error"}])
 82 |         result = {"level": "error"}
 83 | 
 84 |         filtered_results = general_filter.filter_results([result])
 85 |         assert len(filtered_results) == 0
 86 |         assert "filtered" not in result
 87 |         assert general_filter.filter_stats.filtered_in_result_count == 0
 88 |         assert general_filter.filter_stats.filtered_out_result_count == 1
 89 |         assert general_filter.filter_stats.missing_property_count == 0
 90 | 
 91 |     def test_filter_append_no_filters(self):
 92 |         general_filter = GeneralFilter()
 93 |         general_filter.init_filter("test filter", {"check-line-number": False}, [], [])
 94 |         result = {"ruleId": "test-rule"}
 95 | 
 96 |         filtered_results = general_filter.filter_results([result])
 97 |         assert len(filtered_results) == 1
 98 |         assert filtered_results[0] == result
 99 |         assert "filtered" not in result
100 | 
101 |     def test_filter_results_match(self):
102 |         general_filter = GeneralFilter()
103 |         general_filter.init_filter(
104 |             "test filter",
105 |             {"check-line-number": False},
106 |             [{"ruleId": "test-rule"}, {"level": "error"}],
107 |             [],
108 |         )
109 |         result = {"ruleId": "test-rule", "level": "error"}
110 | 
111 |         filtered_results = general_filter.filter_results([result])
112 |         assert len(filtered_results) == 1
113 |         assert filtered_results[0] == result
114 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
115 |         assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [
116 |             {"ruleId": "test-rule"}
117 |         ]
118 |         assert "warnings" not in filtered_results[0]["properties"]["filtered"]
119 |         assert general_filter.filter_stats.filtered_in_result_count == 1
120 |         assert general_filter.filter_stats.filtered_out_result_count == 0
121 |         assert general_filter.filter_stats.missing_property_count == 0
122 | 
123 |     def test_filter_results_no_match(self):
124 |         general_filter = GeneralFilter()
125 |         general_filter.init_filter(
126 |             "test filter",
127 |             {"check-line-number": False},
128 |             [{"ruleId": "other-rule"}, {"level": "warning"}],
129 |             [],
130 |         )
131 |         result = {"ruleId": "test-rule", "level": "error"}
132 | 
133 |         filtered_results = general_filter.filter_results([result])
134 |         assert len(filtered_results) == 0
135 | 
136 |     def test_filter_results_regex(self):
137 |         general_filter = GeneralFilter()
138 |         rule = {"properties.blame.author-mail": "/myname\\..*\\.com/"}
139 |         general_filter.init_filter(
140 |             "test filter",
141 |             {"check-line-number": True},
142 |             [rule],
143 |             [],
144 |         )
145 |         result = {
146 |             "ruleId": "test-rule",
147 |             "properties": {"blame": {"author-mail": "user@myname.example.com"}},
148 |             "locations": [{"physicalLocation": {"region": {"startLine": "123"}}}],
149 |         }
150 | 
151 |         filtered_results = general_filter.filter_results([result])
152 |         assert len(filtered_results) == 1
153 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
154 |         assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [rule]
155 |         assert "warnings" not in filtered_results[0]["properties"]["filtered"]
156 | 
157 |     def test_filter_results_regex_guid(self):
158 |         general_filter = GeneralFilter()
159 |         guid_rule = {
160 |             "properties.blame.author-mail": "/[0-9A-F]{8}[-][0-9A-F]{4}[-][0-9A-F]{4}"
161 |             + "[-][0-9A-F]{4}[-][0-9A-F]{12}/"
162 |         }
163 |         general_filter.init_filter(
164 |             "test filter",
165 |             {"check-line-number": False},
166 |             [guid_rule],
167 |             [],
168 |         )
169 |         result = {
170 |             "ruleId": "test-rule",
171 |             "properties": {
172 |                 "blame": {"author-mail": "AAAAA1234ABCD-FEDC-BA09-8765-4321ABCDEF90"}
173 |             },
174 |         }
175 | 
176 |         filtered_results = general_filter.filter_results([result])
177 |         assert len(filtered_results) == 1
178 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
179 |         assert filtered_results[0]["properties"]["filtered"]["matchedFilter"] == [
180 |             guid_rule
181 |         ]
182 |         assert "warnings" not in filtered_results[0]["properties"]["filtered"]
183 | 
184 |     def test_filter_results_existence_only(self):
185 |         general_filter = GeneralFilter()
186 |         general_filter.init_filter(
187 |             "test filter", {"check-line-number": False}, [], [{"suppression": {}}]
188 |         )
189 |         result = {"ruleId": "test-rule", "suppressions": [{"kind": "inSource"}]}
190 | 
191 |         filtered_results = general_filter.filter_results([result])
192 |         assert len(filtered_results) == 0
193 | 
194 |     def test_filter_results_match_default_include_default_configuration(self):
195 |         general_filter = GeneralFilter()
196 |         general_filter.init_filter(
197 |             "test filter", {"check-line-number": False}, [{"level": "error"}], []
198 |         )
199 |         result = {"ruleId": "test-rule"}
200 | 
201 |         filtered_results = general_filter.filter_results([result])
202 |         assert len(filtered_results) == 1
203 |         assert filtered_results[0] == result
204 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "noProperty"
205 |         assert filtered_results[0]["properties"]["filtered"]["warnings"] == [
206 |             "Field 'level' is missing but the result included as default-include is true"
207 |         ]
208 |         assert general_filter.filter_stats.filtered_in_result_count == 0
209 |         assert general_filter.filter_stats.filtered_out_result_count == 0
210 |         assert general_filter.filter_stats.missing_property_count == 1
211 | 
212 |     def test_filter_results_check_line_number(self):
213 |         general_filter = GeneralFilter()
214 |         general_filter.init_filter("test filter", {}, [{"level": "error"}], [])
215 |         result = {
216 |             "ruleId": "test-rule",
217 |             "locations": [{"physicalLocation": {"region": {"startLine": "1"}}}],
218 |         }
219 | 
220 |         filtered_results = general_filter.filter_results([result])
221 |         assert len(filtered_results) == 1
222 |         assert filtered_results[0] == result
223 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "noLineNumber"
224 |         assert filtered_results[0]["properties"]["filtered"]["warnings"] == [
225 |             "Field 'level' not checked due to missing line number information"
226 |         ]
227 |         assert general_filter.filter_stats.filtered_in_result_count == 0
228 |         assert general_filter.filter_stats.filtered_out_result_count == 0
229 |         assert general_filter.filter_stats.missing_property_count == 0
230 |         assert general_filter.filter_stats.unconvincing_line_number_count == 1
231 | 
232 |     def test_filter_results_match_default_include_rule_override(self):
233 |         general_filter = GeneralFilter()
234 |         general_filter.init_filter(
235 |             "test filter",
236 |             {"check-line-number": False},
237 |             [{"level": {"value": "error", "default-include": False}}],
238 |             [],
239 |         )
240 |         result = {"ruleId": "test-rule"}
241 | 
242 |         filtered_results = general_filter.filter_results([result])
243 |         assert len(filtered_results) == 0
244 |         assert general_filter.filter_stats.filtered_in_result_count == 0
245 |         # Filtered out because not filtered in
246 |         assert general_filter.filter_stats.filtered_out_result_count == 1
247 |         assert general_filter.filter_stats.missing_property_count == 0
248 | 
249 |     SHORTCUTS_TEST_PARAMS = [
250 |         ({"author": "John Smith"}, {"properties": {"blame": {"author": "John Smith"}}}),
251 |         (
252 |             {"author-mail": "john.smith@example.com"},
253 |             {"properties": {"blame": {"author-mail": "john.smith@example.com"}}},
254 |         ),
255 |         (
256 |             {"committer-mail": "john.smith@example.com"},
257 |             {"properties": {"blame": {"committer-mail": "john.smith@example.com"}}},
258 |         ),
259 |         (
260 |             {"location": "test.cpp"},
261 |             {
262 |                 "locations": [
263 |                     {"physicalLocation": {"artifactLocation": {"uri": "test.cpp"}}}
264 |                 ]
265 |             },
266 |         ),
267 |         ({"rule": "rule1"}, {"ruleId": "rule1"}),
268 |         ({"suppression": "inSource"}, {"suppressions": [{"kind": "inSource"}]}),
269 |     ]
270 | 
271 |     @pytest.mark.parametrize("shortcut_filter,result", SHORTCUTS_TEST_PARAMS)
272 |     def test_filter_results_shortcuts(self, shortcut_filter, result):
273 |         general_filter = GeneralFilter()
274 |         general_filter.init_filter(
275 |             "test filter", {"check-line-number": False}, [shortcut_filter], []
276 |         )
277 | 
278 |         filtered_results = general_filter.filter_results([result])
279 |         assert len(filtered_results) == 1
280 |         assert filtered_results[0] == result
281 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
282 |         assert "warnings" not in filtered_results[0]["properties"]["filtered"]
283 | 
284 |     def test_filter_results_include(self):
285 |         general_filter = GeneralFilter()
286 |         general_filter.init_filter(
287 |             "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], []
288 |         )
289 |         results = [{"ruleId": "test-rule"}] * 10
290 | 
291 |         filtered_results = general_filter.filter_results(results)
292 |         assert len(filtered_results) == 10
293 |         assert all(result in filtered_results for result in results)
294 |         assert general_filter.filter_stats.filtered_in_result_count == 10
295 |         assert general_filter.filter_stats.filtered_out_result_count == 0
296 |         assert general_filter.filter_stats.missing_property_count == 0
297 | 
298 |     def test_filter_results_exclude(self):
299 |         general_filter = GeneralFilter()
300 |         general_filter.init_filter(
301 |             "test filter", {"check-line-number": False}, [], [{"level": "error"}]
302 |         )
303 |         results = [{"level": "error"}] * 10
304 | 
305 |         filtered_results = general_filter.filter_results(results)
306 |         assert len(filtered_results) == 0
307 |         assert general_filter.filter_stats.filtered_in_result_count == 0
308 |         assert general_filter.filter_stats.filtered_out_result_count == 10
309 |         assert general_filter.filter_stats.missing_property_count == 0
310 | 
311 |     def test_filter_results_exclude_not_all(self):
312 |         general_filter = GeneralFilter()
313 |         general_filter.init_filter(
314 |             "test filter", {"check-line-number": False}, [], [{"level": "error"}]
315 |         )
316 |         results = [{"level": "error"}, {"level": "warning"}, {"level": "error"}]
317 | 
318 |         filtered_results = general_filter.filter_results(results)
319 |         assert len(filtered_results) == 1
320 |         assert general_filter.filter_stats.filtered_in_result_count == 1
321 |         assert general_filter.filter_stats.filtered_out_result_count == 2
322 |         assert general_filter.filter_stats.missing_property_count == 0
323 |         assert filtered_results[0]["properties"]["filtered"]["state"] == "included"
324 |         assert len(filtered_results[0]["properties"]["filtered"]["matchedFilter"]) == 0
325 | 
326 |     def test_filter_results_no_filters(self):
327 |         general_filter = GeneralFilter()
328 |         general_filter.init_filter("test filter", {"check-line-number": False}, [], [])
329 |         results = [{"ruleId": "test-rule"}] * 10
330 | 
331 |         filtered_results = general_filter.filter_results(results)
332 |         assert len(filtered_results) == 10
333 |         assert all(result in filtered_results for result in results)
334 |         assert general_filter.filter_stats.filtered_in_result_count == 0
335 |         assert general_filter.filter_stats.filtered_out_result_count == 0
336 |         assert general_filter.filter_stats.missing_property_count == 0
337 | 
338 |     def test_get_filter_stats(self):
339 |         general_filter = GeneralFilter()
340 |         general_filter.init_filter(
341 |             "test filter", {"check-line-number": False}, [{"ruleId": "test-rule"}], []
342 |         )
343 |         results = [{"ruleId": "test-rule"}] * 10
344 | 
345 |         general_filter.filter_results(results)
346 |         filter_stats = general_filter.get_filter_stats()
347 |         assert filter_stats.filtered_in_result_count == 10
348 |         assert filter_stats.filtered_out_result_count == 0
349 |         assert filter_stats.missing_property_count == 0
350 | 
351 |     def test_load_filter_file(self):
352 |         file_path = "test_filter.yaml"
353 |         filter_description = "Test filter"
354 |         include_filters = {"ruleId": "test-rule"}
355 |         exclude_filters = {"level": "error"}
356 |         with open(file_path, "w") as f:
357 |             f.write(f"description: {filter_description}\n")
358 |             f.write(f"include:\n  ruleId: {include_filters['ruleId']}\n")
359 |             f.write(f"exclude:\n  level: {exclude_filters['level']}\n")
360 | 
361 |         loaded_filter = load_filter_file(file_path)
362 |         assert loaded_filter == (
363 |             filter_description,
364 |             {},
365 |             include_filters,
366 |             exclude_filters,
367 |         )
368 | 
369 |     def test_load_filter_file_with_configuration(self):
370 |         file_path = "test_filter.yaml"
371 |         filter_description = "Test filter"
372 |         configuration = {"default-include": True}
373 |         include_filters = {"ruleId": "test-rule"}
374 |         exclude_filters = {"level": "error"}
375 |         with open(file_path, "w") as f:
376 |             f.write(f"description: {filter_description}\n")
377 |             f.write("configuration:\n  default-include: true\n")
378 |             f.write(f"include:\n  ruleId: {include_filters['ruleId']}\n")
379 |             f.write(f"exclude:\n  level: {exclude_filters['level']}\n")
380 | 
381 |         loaded_filter = load_filter_file(file_path)
382 |         assert loaded_filter == (
383 |             filter_description,
384 |             configuration,
385 |             include_filters,
386 |             exclude_filters,
387 |         )
388 | 
389 |     def test_load_filter_file_wrong_format(self):
390 |         file_path = "test_filter.yaml"
391 |         filter_description = "Test filter"
392 |         with open(file_path, "w") as f:
393 |             f.write(f"description: {filter_description}\n")
394 |             f.write("include\n")
395 |             f.write("exclude\n")
396 | 
397 |         with pytest.raises(IOError) as io_error:
398 |             load_filter_file(file_path)
399 |         assert str(io_error.value) == f"Cannot read filter file {file_path}"
400 | 


--------------------------------------------------------------------------------
/sarif/cmdline/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Program entry point for sarif-tools on the command line.
  3 | """
  4 | 
  5 | import argparse
  6 | import os
  7 | import sys
  8 | 
  9 | from sarif import loader, sarif_file, __version__ as SARIF_TOOLS_PACKAGE_VERSION
 10 | from sarif.filter.general_filter import load_filter_file
 11 | 
 12 | from sarif.operations import (
 13 |     blame_op,
 14 |     codeclimate_op,
 15 |     copy_op,
 16 |     csv_op,
 17 |     diff_op,
 18 |     html_op,
 19 |     emacs_op,
 20 |     info_op,
 21 |     ls_op,
 22 |     summary_op,
 23 |     trend_op,
 24 |     upgrade_filter_op,
 25 |     word_op,
 26 | )
 27 | 
 28 | 
 29 | def main():
 30 |     """
 31 |     Entry point function.
 32 |     """
 33 |     args, unknown_args = ARG_PARSER.parse_known_args()
 34 | 
 35 |     if args.debug:
 36 |         _print_version()
 37 |         print(f"Running code from {__file__}")
 38 |         known_args_summary = ", ".join(
 39 |             f"{key}={getattr(args, key)}" for key in vars(args)
 40 |         )
 41 |         print(f"Known arguments: {known_args_summary}")
 42 |         if args.version:
 43 |             return 0
 44 |     elif args.version:
 45 |         _print_version()
 46 |         return 0
 47 | 
 48 |     if unknown_args:
 49 |         if any(
 50 |             unknown_arg.startswith("--blame-filter")
 51 |             or unknown_arg.startswith("-b=")
 52 |             or unknown_arg == "-b"
 53 |             for unknown_arg in unknown_args
 54 |         ):
 55 |             print("ERROR: --blame-filter was removed in v2.0.0.")
 56 |             print(
 57 |                 "Run the upgrade-filter command to convert your blame filter to the new filter format, then pass via --filter option."
 58 |             )
 59 |         args = ARG_PARSER.parse_args()
 60 | 
 61 |     exitcode = args.func(args)
 62 |     return exitcode
 63 | 
 64 | 
 65 | def _create_arg_parser():
 66 |     cmd_list = "commands:\n"
 67 |     max_cmd_length = max(len(cmd) for cmd in _COMMANDS)
 68 |     col_width = max_cmd_length + 2
 69 |     for cmd, cmd_attributes in _COMMANDS.items():
 70 |         cmd_list += cmd.ljust(col_width) + cmd_attributes["desc"] + "\n"
 71 |     cmd_list += "Run `sarif <COMMAND> --help` for command-specific help."
 72 |     parser = argparse.ArgumentParser(
 73 |         prog="sarif",
 74 |         description="Process sets of SARIF files",
 75 |         epilog=cmd_list,
 76 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 77 |     )
 78 |     parser.set_defaults(func=_usage_command)
 79 |     subparsers = parser.add_subparsers(dest="command", help="command")
 80 |     subparser = {}
 81 |     for cmd, cmd_attributes in _COMMANDS.items():
 82 |         subparser[cmd] = subparsers.add_parser(cmd, description=cmd_attributes["desc"])
 83 |         subparser[cmd].set_defaults(func=cmd_attributes["fn"])
 84 | 
 85 |     # Common options
 86 |     parser.add_argument("--version", "-v", action="store_true")
 87 |     parser.add_argument(
 88 |         "--debug", action="store_true", help="Print information useful for debugging"
 89 |     )
 90 |     parser.add_argument(
 91 |         "--check",
 92 |         "-x",
 93 |         type=str,
 94 |         choices=sarif_file.SARIF_SEVERITIES_WITH_NONE,
 95 |         help="Exit with error code if there are any issues of the specified level "
 96 |         + "(or for diff, an increase in issues at that level).",
 97 |     )
 98 | 
 99 |     for cmd in [
100 |         "blame",
101 |         "codeclimate",
102 |         "csv",
103 |         "html",
104 |         "emacs",
105 |         "summary",
106 |         "word",
107 |         "upgrade-filter",
108 |     ]:
109 |         subparser[cmd].add_argument(
110 |             "--output", "-o", type=str, metavar="PATH", help="Output file or directory"
111 |         )
112 |     for cmd in ["copy", "diff", "info", "ls", "trend", "usage"]:
113 |         subparser[cmd].add_argument(
114 |             "--output", "-o", type=str, metavar="FILE", help="Output file"
115 |         )
116 | 
117 |     for cmd in [
118 |         "codeclimate",
119 |         "copy",
120 |         "csv",
121 |         "diff",
122 |         "summary",
123 |         "html",
124 |         "emacs",
125 |         "trend",
126 |         "word",
127 |     ]:
128 |         subparser[cmd].add_argument(
129 |             "--filter",
130 |             "-b",
131 |             type=str,
132 |             metavar="FILE",
133 |             help="Specify the filter file to apply.  See README for format.",
134 |         )
135 | 
136 |     # Command-specific options
137 |     subparser["blame"].add_argument(
138 |         "--code",
139 |         "-c",
140 |         metavar="PATH",
141 |         type=str,
142 |         help="Path to git repository; if not specified, the current working directory is used",
143 |     )
144 |     subparser["copy"].add_argument(
145 |         "--timestamp",
146 |         "-t",
147 |         action="store_true",
148 |         help='Append current timestamp to output filename in the "yyyymmddThhmmssZ" format used by '
149 |         "the `sarif trend` command",
150 |     )
151 |     # codeclimate and csv default to no trimming
152 |     for cmd in ["codeclimate", "csv"]:
153 |         subparser[cmd].add_argument(
154 |             "--autotrim",
155 |             "-a",
156 |             action="store_true",
157 |             help="Strip off the common prefix of paths in the CSV output",
158 |         )
159 |     # word and html default to trimming
160 |     for cmd in ["html", "emacs", "word"]:
161 |         subparser[cmd].add_argument(
162 |             "--no-autotrim",
163 |             "-n",
164 |             action="store_true",
165 |             help="Do not strip off the common prefix of paths in the output document",
166 |         )
167 |         subparser[cmd].add_argument(
168 |             "--image",
169 |             type=str,
170 |             help="Image to include at top of file - SARIF logo by default",
171 |         )
172 |     # codeclimate, csv, html and word allow trimmable paths to be specified
173 |     for cmd in ["codeclimate", "csv", "word", "html", "emacs"]:
174 |         subparser[cmd].add_argument(
175 |             "--trim",
176 |             metavar="PREFIX",
177 |             action="append",
178 |             type=str,
179 |             help="Prefix to strip from issue paths, e.g. the checkout directory on the build agent",
180 |         )
181 |     # Most commands take an arbitrary list of SARIF files or directories
182 |     for cmd in _COMMANDS:
183 |         if cmd not in ["diff", "upgrade-filter", "usage", "version"]:
184 |             subparser[cmd].add_argument(
185 |                 "files_or_dirs",
186 |                 metavar="file_or_dir",
187 |                 type=str,
188 |                 nargs="*",
189 |                 default=["."],
190 |                 help="A SARIF file or a directory containing SARIF files",
191 |             )
192 |     subparser["diff"].add_argument(
193 |         "old_file_or_dir",
194 |         type=str,
195 |         nargs=1,
196 |         help="An old SARIF file or a directory containing the old SARIF files",
197 |     )
198 |     subparser["diff"].add_argument(
199 |         "new_file_or_dir",
200 |         type=str,
201 |         nargs=1,
202 |         help="A new SARIF file or a directory containing the new SARIF files",
203 |     )
204 | 
205 |     subparser["trend"].add_argument(
206 |         "--dateformat",
207 |         "-f",
208 |         type=str,
209 |         choices=["dmy", "mdy", "ymd"],
210 |         default="dmy",
211 |         help="Date component order to use in output CSV.  Default is `dmy`",
212 |     )
213 | 
214 |     subparser["upgrade-filter"].add_argument(
215 |         "files_or_dirs",
216 |         metavar="file",
217 |         type=str,
218 |         nargs="*",
219 |         default=["."],
220 |         help="A v1-style blame-filter file",
221 |     )
222 | 
223 |     return parser
224 | 
225 | 
226 | def _check(input_files: sarif_file.SarifFileSet, check_level):
227 |     ret = 0
228 |     if check_level:
229 |         for severity in sarif_file.SARIF_SEVERITIES_WITH_NONE:
230 |             ret += input_files.get_report().get_issue_count_for_severity(severity)
231 |             if severity == check_level:
232 |                 break
233 |     if ret > 0:
234 |         sys.stderr.write(
235 |             f"Check: exiting with return code {ret} due to issues at or above {check_level} "
236 |             "severity\n"
237 |         )
238 |     return ret
239 | 
240 | 
241 | def _init_filtering(input_files, args):
242 |     if args.filter:
243 |         filters = load_filter_file(args.filter)
244 |         input_files.init_general_filter(*filters)
245 | 
246 | 
247 | def _init_path_prefix_stripping(input_files, args, strip_by_default):
248 |     if strip_by_default:
249 |         autotrim = not args.no_autotrim
250 |     else:
251 |         autotrim = args.autotrim
252 |     trim_paths = args.trim
253 |     if autotrim or trim_paths:
254 |         input_files.init_path_prefix_stripping(autotrim, trim_paths)
255 | 
256 | 
257 | def _ensure_dir(dir_path):
258 |     """
259 |     Create directory if it does not exist
260 |     """
261 |     if dir_path and not os.path.isdir(dir_path):
262 |         os.makedirs(dir_path)
263 | 
264 | 
265 | def _prepare_output(
266 |     input_files: sarif_file.SarifFileSet, output_arg, output_file_extension: str
267 | ):
268 |     """
269 |     Returns (output, output_multiple_files)
270 |     output is args.output, or if that wasn't specified, a default output file based on the inputs
271 |     and the file extension.
272 |     output_multiple_files determines whether to output one file per input plus a totals file.
273 |     It is false if there is only one input file, or args.output is a file that exists,
274 |     or args.output ends with the expected file extension.
275 |     """
276 |     input_file_count = len(input_files)
277 |     if input_file_count == 0:
278 |         return ("static_analysis_output" + output_file_extension, False)
279 |     if input_file_count == 1:
280 |         derived_output_filename = (
281 |             input_files[0].get_file_name_without_extension() + output_file_extension
282 |         )
283 |         if output_arg:
284 |             if os.path.isdir(output_arg):
285 |                 return (os.path.join(output_arg, derived_output_filename), False)
286 |             _ensure_dir(os.path.dirname(output_arg))
287 |             return (output_arg, False)
288 |         return (derived_output_filename, False)
289 |     # Multiple input files
290 |     if output_arg:
291 |         if os.path.isfile(output_arg) or output_arg.strip().upper().endswith(
292 |             output_file_extension.upper()
293 |         ):
294 |             # Output single file, even though there are multiple input files.
295 |             _ensure_dir(os.path.dirname(output_arg))
296 |             return (output_arg, False)
297 |         _ensure_dir(output_arg)
298 |         return (output_arg, True)
299 |     return (os.getcwd(), True)
300 | 
301 | 
302 | ####################################### Command handlers #######################################
303 | 
304 | 
305 | def _blame_command(args):
306 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
307 |     (output, multiple_file_output) = _prepare_output(input_files, args.output, ".sarif")
308 |     blame_op.enhance_with_blame(
309 |         input_files, args.code or os.getcwd(), output, multiple_file_output
310 |     )
311 |     return _check(input_files, args.check)
312 | 
313 | 
314 | def _codeclimate_command(args):
315 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
316 |     input_files.init_default_line_number_1()
317 |     _init_path_prefix_stripping(input_files, args, strip_by_default=False)
318 |     _init_filtering(input_files, args)
319 |     (output, multiple_file_output) = _prepare_output(input_files, args.output, ".json")
320 |     codeclimate_op.generate(input_files, output, multiple_file_output)
321 |     return _check(input_files, args.check)
322 | 
323 | 
324 | def _copy_command(args):
325 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
326 |     _init_filtering(input_files, args)
327 |     output = args.output or "out.sarif"
328 |     output_sarif_file_set = copy_op.generate_sarif(
329 |         input_files,
330 |         output,
331 |         args.timestamp,
332 |         SARIF_TOOLS_PACKAGE_VERSION,
333 |         " ".join(sys.argv),
334 |     )
335 |     return _check(output_sarif_file_set, args.check)
336 | 
337 | 
338 | def _csv_command(args):
339 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
340 |     input_files.init_default_line_number_1()
341 |     _init_path_prefix_stripping(input_files, args, strip_by_default=False)
342 |     _init_filtering(input_files, args)
343 |     (output, multiple_file_output) = _prepare_output(input_files, args.output, ".csv")
344 |     csv_op.generate_csv(input_files, output, multiple_file_output)
345 |     return _check(input_files, args.check)
346 | 
347 | 
348 | def _diff_command(args):
349 |     old_sarif = loader.load_sarif_files(args.old_file_or_dir[0])
350 |     new_sarif = loader.load_sarif_files(args.new_file_or_dir[0])
351 |     _init_filtering(old_sarif, args)
352 |     _init_filtering(new_sarif, args)
353 |     return diff_op.print_diff(old_sarif, new_sarif, args.output, args.check)
354 | 
355 | 
356 | def _html_command(args):
357 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
358 |     input_files.init_default_line_number_1()
359 |     _init_path_prefix_stripping(input_files, args, strip_by_default=True)
360 |     _init_filtering(input_files, args)
361 |     (output, multiple_file_output) = _prepare_output(input_files, args.output, ".html")
362 |     html_op.generate_html(input_files, args.image, output, multiple_file_output)
363 |     return _check(input_files, args.check)
364 | 
365 | 
366 | def _emacs_command(args):
367 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
368 |     input_files.init_default_line_number_1()
369 |     _init_path_prefix_stripping(input_files, args, strip_by_default=True)
370 |     _init_filtering(input_files, args)
371 |     (output, multiple_file_output) = _prepare_output(input_files, args.output, ".txt")
372 |     emacs_op.generate_compile(input_files, output, multiple_file_output)
373 |     return _check(input_files, args.check)
374 | 
375 | 
376 | def _info_command(args):
377 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
378 |     info_op.generate_info(input_files, args.output)
379 |     if args.check:
380 |         return _check(input_files, args.check)
381 |     return 0
382 | 
383 | 
384 | def _ls_command(args):
385 |     ls_op.print_ls(args.files_or_dirs, args.output)
386 |     if args.check:
387 |         input_files = loader.load_sarif_files(*args.files_or_dirs)
388 |         return _check(input_files, args.check)
389 |     return 0
390 | 
391 | 
392 | def _summary_command(args):
393 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
394 |     _init_filtering(input_files, args)
395 |     (output, multiple_file_output) = (None, False)
396 |     if args.output:
397 |         (output, multiple_file_output) = _prepare_output(
398 |             input_files, args.output, ".txt"
399 |         )
400 |     summary_op.generate_summary(input_files, output, multiple_file_output)
401 |     return _check(input_files, args.check)
402 | 
403 | 
404 | def _trend_command(args):
405 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
406 |     input_files.init_default_line_number_1()
407 |     _init_filtering(input_files, args)
408 |     if args.output:
409 |         _ensure_dir(os.path.dirname(args.output))
410 |         output = args.output
411 |     else:
412 |         output = "static_analysis_trend.csv"
413 |     trend_op.generate_trend_csv(input_files, output, args.dateformat)
414 |     return _check(input_files, args.check)
415 | 
416 | 
417 | def _upgrade_filter_command(args):
418 |     old_filter_files = args.files_or_dirs
419 |     single_output_file = None
420 |     output_dir = None
421 |     if len(old_filter_files) == 1:
422 |         if args.output and os.path.isdir(args.output):
423 |             output_dir = args.output
424 |         else:
425 |             single_output_file = args.output or old_filter_files[0] + ".yaml"
426 |     elif args.output:
427 |         output_dir = args.output
428 |     else:
429 |         output_dir = os.path.dirname(args.output)
430 |     for old_filter_file in old_filter_files:
431 |         output_file = single_output_file or os.path.join(
432 |             output_dir, os.path.basename(old_filter_file) + ".yaml"
433 |         )
434 |         upgrade_filter_op.upgrade_filter_file(old_filter_file, output_file)
435 |     return 0
436 | 
437 | 
438 | def _usage_command(args):
439 |     if hasattr(args, "output") and args.output:
440 |         with open(args.output, "w", encoding="utf-8") as file_out:
441 |             ARG_PARSER.print_help(file_out)
442 |         print("Wrote usage instructions to", args.output)
443 |     else:
444 |         ARG_PARSER.print_help()
445 |     if args.check:
446 |         sys.stderr.write("Spurious --check argument")
447 |         return 1
448 |     return 0
449 | 
450 | 
451 | def _version_command(args):
452 |     _print_version(not args.version)
453 | 
454 | 
455 | def _print_version(bare=False):
456 |     print(
457 |         SARIF_TOOLS_PACKAGE_VERSION
458 |         if bare
459 |         else f"SARIF tools v{SARIF_TOOLS_PACKAGE_VERSION}"
460 |     )
461 | 
462 | 
463 | def _word_command(args):
464 |     input_files = loader.load_sarif_files(*args.files_or_dirs)
465 |     input_files.init_default_line_number_1()
466 |     _init_path_prefix_stripping(input_files, args, strip_by_default=True)
467 |     _init_filtering(input_files, args)
468 |     (output, multiple_file_output) = _prepare_output(input_files, args.output, ".docx")
469 |     word_op.generate_word_docs_from_sarif_inputs(
470 |         input_files, args.image, output, multiple_file_output
471 |     )
472 |     return _check(input_files, args.check)
473 | 
474 | 
475 | _COMMANDS = {
476 |     "blame": {
477 |         "fn": _blame_command,
478 |         "desc": "Enhance SARIF file with information from `git blame`",
479 |     },
480 |     "codeclimate": {
481 |         "fn": _codeclimate_command,
482 |         "desc": "Write a JSON representation in Code Climate format of SARIF file(s) "
483 |         "for viewing as a Code Quality report in GitLab UI",
484 |     },
485 |     "copy": {
486 |         "fn": _copy_command,
487 |         "desc": "Write a new SARIF file containing optionally-filtered data from other SARIF file(s)",
488 |     },
489 |     "csv": {
490 |         "fn": _csv_command,
491 |         "desc": "Write a CSV file listing the issues from the SARIF files(s) specified",
492 |     },
493 |     "diff": {
494 |         "fn": _diff_command,
495 |         "desc": "Find the difference between two [sets of] SARIF files",
496 |     },
497 |     "emacs": {
498 |         "fn": _emacs_command,
499 |         "desc": "Write a representation of SARIF file(s) for viewing in emacs",
500 |     },
501 |     "html": {
502 |         "fn": _html_command,
503 |         "desc": "Write an HTML representation of SARIF file(s) for viewing in a web browser",
504 |     },
505 |     "info": {
506 |         "fn": _info_command,
507 |         "desc": "Print information about SARIF file(s) structure",
508 |     },
509 |     "ls": {
510 |         "fn": _ls_command,
511 |         "desc": "List all SARIF files in the directories specified",
512 |     },
513 |     "summary": {
514 |         "fn": _summary_command,
515 |         "desc": "Write a text summary with the counts of issues from the SARIF files(s) specified",
516 |     },
517 |     "trend": {
518 |         "fn": _trend_command,
519 |         "desc": "Write a CSV file with time series data from SARIF files with "
520 |         '"yyyymmddThhmmssZ" timestamps in their filenames',
521 |     },
522 |     "upgrade-filter": {
523 |         "fn": _upgrade_filter_command,
524 |         "desc": "Upgrade a sarif-tools v1-style blame filter file to a v2-style filter YAML file",
525 |     },
526 |     "usage": {
527 |         "fn": _usage_command,
528 |         "desc": "(Command optional) - print usage and exit",
529 |     },
530 |     "version": {"fn": _version_command, "desc": "Print version and exit"},
531 |     "word": {
532 |         "fn": _word_command,
533 |         "desc": "Produce MS Word .docx summaries of the SARIF files specified",
534 |     },
535 | }
536 | 
537 | ARG_PARSER = _create_arg_parser()
538 | 


--------------------------------------------------------------------------------