├── .gitignore ├── .gitattributes ├── model_signing ├── install │ ├── requirements_test.in │ ├── requirements.in │ ├── requirements_test_Darwin.txt │ ├── requirements_test_Linux.txt │ └── requirements_test_Windows.txt ├── images │ └── sigstore-model-diagram.png ├── __init__.py ├── hashing │ ├── __init__.py │ ├── precomputed.py │ ├── precomputed_test.py │ ├── memory.py │ ├── memory_test.py │ ├── hashing.py │ ├── file.py │ └── file_test.py ├── main.py ├── benchmarks │ └── run.sh ├── model.py ├── README.md └── serialize.py ├── CODEOWNERS ├── slsa_for_models ├── install │ └── requirements.in ├── images │ ├── slsa_results.png │ └── slsa_trigger.png ├── kubeflow │ ├── images │ │ ├── clone │ │ │ ├── Dockerfile │ │ │ └── clone.sh │ │ ├── build_model │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ └── upload_model │ │ │ ├── Dockerfile │ │ │ └── upload.sh │ ├── README.md │ └── model_transparency.py ├── gcp │ ├── pipelinerun.yml │ ├── tasks │ │ ├── build-model.yml │ │ ├── upload-model.yml │ │ └── git-clone.yml │ ├── pipeline.yml │ └── README.md ├── main.py ├── github_actions.md ├── README.md ├── tensorflow_cifar10.py └── pytorch_cifar10.py ├── .github ├── workflows │ ├── scripts │ │ └── venv_activate.sh │ ├── dependency_review.yml │ ├── unit_tests.yml │ ├── lint.yml │ ├── validate_deps.yml │ ├── codeql.yml │ ├── scorecard.yml │ ├── slsa_for_ml.yml │ └── pin_deps.yml └── dependabot.yml ├── CONTRIBUTING.md ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /model_signing/install/requirements_test.in: -------------------------------------------------------------------------------- 1 | pytest 2 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @sigstore/model-transparency-codeowners 2 | -------------------------------------------------------------------------------- /model_signing/install/requirements.in: -------------------------------------------------------------------------------- 1 | psutil 2 | sigstore 3 | -------------------------------------------------------------------------------- /slsa_for_models/install/requirements.in: -------------------------------------------------------------------------------- 1 | tensorflow 2 | tensorflow-datasets 3 | torch 4 | torchvision 5 | -------------------------------------------------------------------------------- /slsa_for_models/images/slsa_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/font/model-transparency/main/slsa_for_models/images/slsa_results.png -------------------------------------------------------------------------------- /slsa_for_models/images/slsa_trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/font/model-transparency/main/slsa_for_models/images/slsa_trigger.png -------------------------------------------------------------------------------- /model_signing/images/sigstore-model-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/font/model-transparency/main/model_signing/images/sigstore-model-diagram.png -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/images/clone/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | ARG DEBIAN_FRONTEND=noninteractive 3 | RUN apt update && apt install git-all -y 4 | COPY . /src 5 | WORKDIR /src 6 | RUN chmod +x /src/clone.sh 7 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/images/build_model/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/python:3.11 2 | ARG DEBIAN_FRONTEND=noninteractive 3 | RUN apt-get update && apt-get install coreutils -y 4 | COPY . /src 5 | WORKDIR /src 6 | RUN chmod +x /src/build.sh 7 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/images/upload_model/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google.com/cloudsdktool/cloud-sdk:379.0.0-slim@sha256:d844877c7aaa06a0072979230c68417ddb0f27087277f29747c7169d6ed0d2b9 2 | ARG DEBIAN_FRONTEND=noninteractive 3 | RUN apt-get update && apt-get install coreutils -y 4 | COPY . /src 5 | WORKDIR /src 6 | RUN chmod +x /src/upload.sh 7 | -------------------------------------------------------------------------------- /.github/workflows/scripts/venv_activate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # shellcheck source=/dev/null 4 | if [[ -f venv/bin/activate ]]; then 5 | source venv/bin/activate 6 | elif [[ -f venv/Scripts/activate ]]; then 7 | source venv/Scripts/activate 8 | else 9 | echo "Cannot activate venv sandbox. Failing" 10 | exit 1 11 | fi 12 | 13 | echo "Successfully activated venv sandbox. Python is at `which python`" 14 | -------------------------------------------------------------------------------- /model_signing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /slsa_for_models/gcp/pipelinerun.yml: -------------------------------------------------------------------------------- 1 | apiVersion: tekton.dev/v1 2 | kind: PipelineRun 3 | metadata: 4 | generateName: slsa-for-models- 5 | spec: 6 | params: 7 | - name: model-name 8 | value: 'pytorch_model.pth' 9 | - name: model-storage 10 | value: 11 | package: 'pytorch-model' 12 | location: 'us' 13 | repository: 'ml-artifacts' 14 | pipelineRef: 15 | name: slsa-for-models 16 | workspaces: 17 | - name: shared 18 | volumeClaimTemplate: 19 | spec: 20 | accessModes: 21 | - ReadWriteOnce 22 | resources: 23 | requests: 24 | storage: 1Gi 25 | -------------------------------------------------------------------------------- /model_signing/hashing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /.github/workflows/dependency_review.yml: -------------------------------------------------------------------------------- 1 | # Copyright Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: 'Dependency Review' 16 | on: 17 | pull_request: 18 | branches: [main] 19 | types: [opened, synchronize] 20 | 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | dependency-review: 26 | name: License and Vulnerability Scan 27 | uses: sigstore/community/.github/workflows/reusable-dependency-review.yml@8cc8d600fbf3012b9d9d84a499423fa96afa3765 28 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Copyright Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | version: 2 16 | # See https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file. 17 | updates: 18 | - package-ecosystem: "pip" 19 | directory: "/" 20 | schedule: 21 | interval: "weekly" 22 | groups: 23 | all: 24 | patterns: 25 | - "*" 26 | - package-ecosystem: "github-actions" 27 | directory: "/" 28 | schedule: 29 | interval: "weekly" 30 | groups: 31 | all: 32 | patterns: 33 | - "*" 34 | -------------------------------------------------------------------------------- /model_signing/install/requirements_test_Darwin.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --generate-hashes --output-file=model_signing/install/requirements_test_Darwin.txt --strip-extras model_signing/install/requirements_test.in 6 | # 7 | iniconfig==2.0.0 \ 8 | --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ 9 | --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 10 | # via pytest 11 | packaging==24.0 \ 12 | --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ 13 | --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 14 | # via pytest 15 | pluggy==1.5.0 \ 16 | --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ 17 | --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 18 | # via pytest 19 | pytest==8.2.1 \ 20 | --hash=sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd \ 21 | --hash=sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1 22 | # via -r model_signing/install/requirements_test.in 23 | -------------------------------------------------------------------------------- /model_signing/install/requirements_test_Linux.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --generate-hashes --output-file=model_signing/install/requirements_test_Linux.txt --strip-extras model_signing/install/requirements_test.in 6 | # 7 | iniconfig==2.0.0 \ 8 | --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ 9 | --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 10 | # via pytest 11 | packaging==24.0 \ 12 | --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ 13 | --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 14 | # via pytest 15 | pluggy==1.5.0 \ 16 | --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ 17 | --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 18 | # via pytest 19 | pytest==8.2.1 \ 20 | --hash=sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd \ 21 | --hash=sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1 22 | # via -r model_signing/install/requirements_test.in 23 | -------------------------------------------------------------------------------- /slsa_for_models/gcp/tasks/build-model.yml: -------------------------------------------------------------------------------- 1 | apiVersion: tekton.dev/v1 2 | kind: Task 3 | metadata: 4 | name: build-model 5 | spec: 6 | workspaces: 7 | - name: source 8 | params: 9 | - name: tool-versions 10 | properties: 11 | python: { } 12 | bash: { } 13 | default: 14 | python: '3.11' 15 | bash: 'latest' 16 | - name: model-source 17 | properties: 18 | requirements-path: {} 19 | main-path: {} 20 | - name: model-name 21 | enum: 22 | - 'tensorflow_model.keras' 23 | - 'tensorflow_hdf5_model.h5' 24 | - 'tensorflow_hdf5.weights.h5' 25 | - 'pytorch_model.pth' 26 | - 'pytorch_full_model.pth' 27 | - 'pytorch_jitted_model.pt' 28 | results: 29 | - name: digest 30 | steps: 31 | - name: run-script 32 | image: docker.io/python:$(params.tool-versions.python) 33 | workingDir: $(workspaces.source.path) 34 | script: | 35 | python -m pip install --require-hashes -r $(params.model-source.requirements-path) 36 | python $(params.model-source.main-path) $(params.model-name) 37 | - name: compute-digest 38 | image: bash:$(params.tool-versions.bash) 39 | workingDir: $(workspaces.source.path) 40 | script: 41 | sha256sum $(params.model-name) | awk '{print $1}' | tr -d '\n' | tee $(results.digest.path) 42 | -------------------------------------------------------------------------------- /model_signing/install/requirements_test_Windows.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --generate-hashes --output-file=model_signing/install/requirements_test_Windows.txt --strip-extras model_signing/install/requirements_test.in 6 | # 7 | colorama==0.4.6 \ 8 | --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ 9 | --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 10 | # via pytest 11 | iniconfig==2.0.0 \ 12 | --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ 13 | --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 14 | # via pytest 15 | packaging==24.0 \ 16 | --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ 17 | --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 18 | # via pytest 19 | pluggy==1.5.0 \ 20 | --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ 21 | --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 22 | # via pytest 23 | pytest==8.2.1 \ 24 | --hash=sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd \ 25 | --hash=sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1 26 | # via -r model_signing/install/requirements_test.in 27 | -------------------------------------------------------------------------------- /model_signing/hashing/precomputed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Precomputed digests. 16 | 17 | In order to support digests computed by external tooling, we provide trivial 18 | `HashEngine` instances that just wrap around the digest. 19 | 20 | Example usage: 21 | ```python 22 | >>> hasher = PrecomputedDigest("short-hash", b"abcd") 23 | >>> digest = hasher.compute() 24 | >>> digest.digest_hex 25 | '61626364' 26 | >>> digest.algorithm 27 | 'short-hash' 28 | ``` 29 | """ 30 | 31 | from dataclasses import dataclass 32 | from typing_extensions import override 33 | 34 | from model_signing.hashing import hashing 35 | 36 | 37 | @dataclass(frozen=True) 38 | class PrecomputedDigest(hashing.HashEngine): 39 | """A wrapper around digests computed by external tooling.""" 40 | 41 | _digest_type: str 42 | _digest_value: bytes 43 | 44 | @override 45 | def compute(self) -> hashing.Digest: 46 | return hashing.Digest(self._digest_type, self._digest_value) 47 | 48 | @override 49 | @property 50 | def digest_name(self) -> str: 51 | return self._digest_type 52 | -------------------------------------------------------------------------------- /slsa_for_models/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | import tensorflow_cifar10 as tf 18 | import pytorch_cifar10 as pt 19 | 20 | 21 | def readOptions(): 22 | parser = argparse.ArgumentParser('Train CIFAR10 models with TF/PT') 23 | model_formats = list(tf.supported_models().keys()) 24 | model_formats += list(pt.supported_models().keys()) 25 | parser.add_argument('model', choices=model_formats, 26 | help='Model to generate (name implies framework)') 27 | return parser.parse_args() 28 | 29 | 30 | def main(args): 31 | model_formats = list(tf.supported_models().keys()) 32 | for model_format in model_formats: 33 | if args.model == model_format: 34 | return tf.model_pipeline(args.model) 35 | 36 | model_formats = list(pt.supported_models().keys()) 37 | for model_format in model_formats: 38 | if args.model == model_format: 39 | return pt.model_pipeline(args.model) 40 | 41 | # we should not reach this case in the normal flow, but cover all corners 42 | raise ValueError("Model format not supported") 43 | 44 | 45 | if __name__ == '__main__': 46 | args = readOptions() 47 | main(args) 48 | -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run unit tests 2 | on: 3 | pull_request: 4 | branches: [main] 5 | types: [opened, synchronize] 6 | paths-ignore: 7 | - '**/*.md' 8 | - '*.md' 9 | 10 | permissions: {} 11 | 12 | defaults: 13 | run: 14 | shell: bash 15 | 16 | jobs: 17 | model-signing-unit-tests: 18 | name: Run unit tests for signing 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false # Don't cancel other jobs if one fails 22 | matrix: 23 | os: [ubuntu-latest, macos-latest, windows-latest] 24 | include: 25 | - os: macos-latest 26 | os_family: Darwin 27 | - os: ubuntu-latest 28 | os_family: Linux 29 | - os: windows-latest 30 | os_family: Windows 31 | steps: 32 | - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 33 | - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 34 | with: 35 | python-version: 3.11 36 | cache: pip 37 | cache-dependency-path: | 38 | model_signing/install/requirements_${{ matrix.os_family }}.txt 39 | model_signing/install/requirements_test_${{ matrix.os_family }}.txt 40 | - name: Install dependencies 41 | run: | 42 | set -exuo pipefail 43 | python -m venv venv 44 | .github/workflows/scripts/venv_activate.sh 45 | python -m pip install --require-hashes -r model_signing/install/requirements_${{ matrix.os_family }}.txt 46 | python -m pip install --require-hashes -r model_signing/install/requirements_test_${{ matrix.os_family }}.txt 47 | - name: Run unit tests 48 | run: | 49 | set -euo pipefail 50 | .github/workflows/scripts/venv_activate.sh 51 | # NOTE: option --full-trace may be useful for troubleshooting. 52 | # TODO(#68): Remove the need to create this folder. 53 | mkdir testdata 54 | pytest -v . 55 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/images/clone/clone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################ 3 | # Help # 4 | ############################################################ 5 | Help() 6 | { 7 | # Display Help 8 | echo "Add description of the script functions here." 9 | echo 10 | echo "Syntax: scriptTemplate [h|u|p|c|t]" 11 | echo "options:" 12 | echo "u Url of the repo to clone" 13 | echo "t Target of the repo to clone" 14 | echo "p Path to the url result" 15 | echo "c Path to commit result" 16 | echo "h Print this Help." 17 | echo 18 | } 19 | 20 | ############################################################ 21 | ############################################################ 22 | # Main program # 23 | ############################################################ 24 | ############################################################ 25 | 26 | url="" 27 | resultPathUrl="" 28 | resultPathCommit="" 29 | 30 | ############################################################ 31 | # Process the input options. Add options as needed. # 32 | ############################################################ 33 | # Get the options 34 | while getopts ":h:u:p:c:t:" option; do 35 | case $option in 36 | h) # display Help 37 | Help 38 | exit;; 39 | u) # clone url 40 | url=$OPTARG;; 41 | p) # result path url 42 | resultPathUrl=$OPTARG;; 43 | c) # result path commit 44 | resultPathCommit=$OPTARG;; 45 | t) # result path commit 46 | target=$OPTARG;; 47 | \?) # Invalid option 48 | echo "Error: Invalid option" 49 | exit;; 50 | esac 51 | done 52 | 53 | echo "cloning $url into ${target}" 54 | git clone ${url} ${target} 55 | cd ${target} 56 | RESULT_SHA=$(git rev-parse HEAD) 57 | printf "%s" "${RESULT_SHA}" > ${resultPathCommit} 58 | printf "%s" "${url}" > ${resultPathUrl} 59 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/images/build_model/build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | ############################################################ 3 | # Help # 4 | ############################################################ 5 | Help() 6 | { 7 | # Display Help 8 | echo "Add description of the script functions here." 9 | echo 10 | echo "Syntax: scriptTemplate [h|r|s|m|d]" 11 | echo "options:" 12 | echo "r Requirements path" 13 | echo "s source code path" 14 | echo "m model name" 15 | echo "d Path to digest result" 16 | echo "h Print this Help." 17 | echo 18 | } 19 | 20 | ############################################################ 21 | ############################################################ 22 | # Main program # 23 | ############################################################ 24 | ############################################################ 25 | 26 | ############################################################ 27 | # Process the input options. Add options as needed. # 28 | ############################################################ 29 | # Get the options 30 | while getopts ":h:r:w:s:m:d:" option; do 31 | case $option in 32 | h) # display Help 33 | Help 34 | exit;; 35 | r) # requirements path 36 | requirements=$OPTARG;; 37 | w) # workingDir 38 | workingDir=$OPTARG;; 39 | s) # source code path 40 | sourcePath=$OPTARG;; 41 | d) # result path digest 42 | resultPathDigest=$OPTARG;; 43 | m) # model name 44 | model=$OPTARG;; 45 | \?) # Invalid option 46 | echo "Error: Invalid option" 47 | exit;; 48 | esac 49 | done 50 | 51 | cd ${workingDir} 52 | python -m pip install --require-hashes -r ${requirements} 53 | python ${sourcePath} ${model} 54 | sha256sum ${model} | awk '{print $1}' | tr -d '\n' | tee ${resultPathDigest} 55 | echo "done..." 56 | echo ${workingDir} 57 | ls -lh 58 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | types: [opened, synchronize] 7 | 8 | permissions: read-all 9 | 10 | jobs: 11 | flake8-lint: 12 | runs-on: ubuntu-latest 13 | name: Lint 14 | steps: 15 | - name: Check out source repository 16 | uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 17 | - name: Set up Python environment 18 | uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 19 | with: 20 | python-version: "3.11" 21 | - name: flake8 Lint 22 | uses: py-actions/flake8@84ec6726560b6d5bd68f2a5bed83d62b52bb50ba # v2.3.0 23 | with: 24 | max-line-length: "80" 25 | - name: Detect empty lines at end of file and trailing whitespace 26 | run: | 27 | set -euxo pipefail # No -x here! 28 | failed=0 29 | # First, check for empty files at end 30 | for file in $(git ls-files --eol | grep 'i/[cr]*lf' | awk '{print $4}'); do 31 | lines=$(tac "$file" | awk 'NF{exit};END{print NR?NR-1:0}') 32 | if [[ $lines -ne 0 ]]; then 33 | line=$(wc -l "$file" | cut -d' ' -f1) 34 | echo "::error file=$file,line=$line::File $file has $lines empty lines at end. Please remove." 35 | failed=$((failed + 1)) 36 | fi 37 | done 38 | # Next, check for files with whitespace at end of line. Remove CRLF files. 39 | for file in $(git ls-files --eol | grep 'i/lf' | awk '{print $4}'); do 40 | for line in $(grep -n '[[:space:]]$' "$file" | cut -d: -f1); do 41 | echo "::error file=$file,line=$line::File $file has trailing whitespace at line $line. Please remove." 42 | failed=$((failed + 1)) 43 | done 44 | done 45 | if [[ $failed -ne 0 ]]; then 46 | echo "::error Found $failed whitespace errors, failing" 47 | exit 1 48 | fi 49 | -------------------------------------------------------------------------------- /model_signing/hashing/precomputed_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from model_signing.hashing import precomputed 16 | 17 | 18 | class TestPrecomputedDigest: 19 | 20 | def test_compute_does_not_change_hash(self): 21 | hash_value = b"value" 22 | hasher = precomputed.PrecomputedDigest("test", hash_value) 23 | digest = hasher.compute() 24 | assert digest.digest_value == hash_value 25 | digest = hasher.compute() 26 | assert digest.digest_value == hash_value 27 | 28 | def test_expected_hash_and_hex(self): 29 | hash_value = b"abcd" 30 | hash_hex_value = "61626364" 31 | hasher = precomputed.PrecomputedDigest("test", hash_value) 32 | digest = hasher.compute() 33 | assert digest.digest_value == hash_value 34 | assert digest.digest_hex == hash_hex_value 35 | 36 | def test_expected_hash_and_hex_unicode(self): 37 | hash_value = "*哈¥эш希".encode("utf-8") 38 | hash_hex_value = "2ae59388c2a5d18dd188e5b88c" 39 | hasher = precomputed.PrecomputedDigest("test", hash_value) 40 | digest = hasher.compute() 41 | assert digest.digest_value == hash_value 42 | assert digest.digest_hex == hash_hex_value 43 | 44 | def test_expected_hash_type(self): 45 | hasher = precomputed.PrecomputedDigest("test", b"abcd") 46 | assert hasher.digest_name == "test" 47 | digest = hasher.compute() 48 | assert digest.algorithm == "test" 49 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/images/upload_model/upload.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | ############################################################ 3 | # Help # 4 | ############################################################ 5 | Help() 6 | { 7 | # Display Help 8 | echo "Add description of the script functions here." 9 | echo 10 | echo "Syntax: scriptTemplate [h|r|s|m|d]" 11 | echo "options:" 12 | echo "r Requirements path" 13 | echo "s source code path" 14 | echo "m model name" 15 | echo "d Path to digest result" 16 | echo "h Print this Help." 17 | echo 18 | } 19 | 20 | ############################################################ 21 | ############################################################ 22 | # Main program # 23 | ############################################################ 24 | ############################################################ 25 | 26 | ############################################################ 27 | # Process the input options. Add options as needed. # 28 | ############################################################ 29 | # Get the options 30 | while getopts ":h:r:w:c:s:l:" option; do 31 | case $option in 32 | h) # display Help 33 | Help 34 | exit;; 35 | r) # result path url 36 | resultPathUrl=$OPTARG;; 37 | w) # workingDir 38 | workingDir=$OPTARG;; 39 | c) # result path commit 40 | resultPathCommit=$OPTARG;; 41 | s) # source code path 42 | SOURCE=$OPTARG;; 43 | l) # result path digest 44 | LOCATION=$OPTARG;; 45 | \?) # Invalid option 46 | echo "Error: Invalid option" 47 | exit;; 48 | esac 49 | done 50 | 51 | echo ${workingDir} 52 | cd ${workingDir} 53 | ls -lh 54 | echo "source: ${SOURCE}" 55 | echo "location: ${LOCATION}" 56 | gsutil cp "${SOURCE}" "${LOCATION}" 57 | SHA256=$(sha256sum ${SOURCE} | awk '{print $1}' | tr -d '\n') 58 | printf "sha256:%s" "${SHA256}" > ${resultPathCommit} 59 | printf "md5:%s" "${LOCATION}" > ${resultPathUrl} 60 | -------------------------------------------------------------------------------- /slsa_for_models/gcp/tasks/upload-model.yml: -------------------------------------------------------------------------------- 1 | apiVersion: tekton.dev/v1 2 | kind: Task 3 | metadata: 4 | name: upload-model 5 | spec: 6 | workspaces: 7 | - name: shared 8 | params: 9 | - name: tool-versions 10 | properties: 11 | gcloud: { } 12 | default: 13 | gcloud: 'slim' 14 | - name: config 15 | properties: 16 | package: {} 17 | version: {} 18 | source: {} 19 | location: {} 20 | repository: {} 21 | results: 22 | - name: output 23 | - name: json 24 | - name: model_ARTIFACT_OUTPUTS 25 | properties: 26 | uri: {} 27 | digest: {} 28 | steps: 29 | - name: upload-to-generic-repo 30 | image: gcr.io/google.com/cloudsdktool/cloud-sdk:$(params.tool-versions.gcloud) 31 | workingDir: $(workspaces.shared.path) 32 | script: | 33 | gcloud $@ 34 | args: 35 | - artifacts 36 | - generic 37 | - upload 38 | - --package=$(params.config.package) 39 | - --version=$(params.config.version) 40 | - --source=$(params.config.source) 41 | - --location=$(params.config.location) 42 | - --repository=$(params.config.repository) 43 | stdoutConfig: 44 | path: $(results.output.path) 45 | - name: convert-to-json 46 | image: docker.io/stedolan/jq@sha256:a61ed0bca213081b64be94c5e1b402ea58bc549f457c2682a86704dd55231e09 47 | script: | 48 | jq -R -n -c '[inputs|split(": ")|{(.[0]):.[1]}] | add' $(results.output.path) 49 | stdoutConfig: 50 | path: $(results.json.path) 51 | - name: type-hint 52 | image: docker.io/stedolan/jq@sha256:a61ed0bca213081b64be94c5e1b402ea58bc549f457c2682a86704dd55231e09 53 | script: | 54 | FULL=$(cat $(results.json.path) | jq -rj '.name') 55 | URI=$(echo $FULL | cut -d ":" -f 1) 56 | DIGEST=$(echo $FULL | cut -d ":" -f 2) 57 | cat <>> hasher = SHA256() 23 | >>> hasher.update(b"abcd") 24 | >>> digest = hasher.compute() 25 | >>> digest.digest_hex 26 | '88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589' 27 | ``` 28 | 29 | Or, passing the data directly in the constructor: 30 | ```python 31 | >>> hasher = SHA256(b"abcd") 32 | >>> digest = hasher.compute() 33 | >>> digest.digest_hex 34 | '88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589' 35 | ``` 36 | """ 37 | 38 | import hashlib 39 | from typing_extensions import override 40 | 41 | from model_signing.hashing import hashing 42 | 43 | 44 | class SHA256(hashing.StreamingHashEngine): 45 | """A wrapper around `hashlib.sha256`.""" 46 | 47 | def __init__(self, initial_data: bytes = b""): 48 | self._hasher = hashlib.sha256(initial_data) 49 | 50 | @override 51 | def update(self, data: bytes) -> None: 52 | self._hasher.update(data) 53 | 54 | @override 55 | def reset(self, data: bytes = b"") -> None: 56 | self._hasher = hashlib.sha256(data) 57 | 58 | @override 59 | def compute(self) -> hashing.Digest: 60 | return hashing.Digest(self.digest_name, self._hasher.digest()) 61 | 62 | @override 63 | @property 64 | def digest_name(self) -> str: 65 | return "sha256" 66 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/README.md: -------------------------------------------------------------------------------- 1 | # Kubeflow Pipeline to generate ML model with attestations 2 | 3 | ## Prerequisutes 4 | - A kubernetes cluster has been set up and running. 5 | - Tekton Pipelines and Chains have been installed and running on the cluster. 6 | - If pushing to private storage/repository 7 | - Workflow Identity federation has been setup with the `default` KSA. 8 | - kubectl has been installed on your system. 9 | 10 | ## Build the images 11 | For `clone`, `build-model` and `upload-model` `Tasks`, we need to build the images. 12 | The Dockerfiles and supporting scripts for each Task are available under `slsa_for_models/kubeflow/images/`. 13 | 14 | ### Build clone image 15 | 16 | ```bash 17 | cd slsa_for_models/kubeflow/images/clone 18 | IMAGE=/git-clone # e.g. docker.io/chitrangpatel/git-clone 19 | docker buildx build -f Dockerfile -t ${IMAGE} . 20 | docker push ${IMAGE} 21 | ``` 22 | 23 | ### Build build-model image 24 | 25 | ```bash 26 | cd slsa_for_models/kubeflow/images/build-model 27 | IMAGE=/build-model # e.g. docker.io/chitrangpatel/build-model 28 | docker buildx build -f Dockerfile -t ${IMAGE} . 29 | docker push ${IMAGE} 30 | ``` 31 | 32 | ### Build upload-model image 33 | 34 | ```bash 35 | cd slsa_for_models/kubeflow/images/upload-model 36 | IMAGE=/upload-model # e.g. docker.io/chitrangpatel/upload-model 37 | docker buildx build -f Dockerfile -t ${IMAGE} . 38 | docker push ${IMAGE} 39 | ``` 40 | 41 | ## Install kubeflow 42 | For exact details see https://github.com/kubeflow/kfp-tekton/tree/master/sdk#installation. 43 | Requires `> python3.5` 44 | 45 | ```bash 46 | python3 -m venv .venv 47 | source .venv/bin/activate 48 | pip install kfp-tekton 49 | ``` 50 | 51 | ## Compile the DSL to a yaml 52 | 53 | The python DSL is shown in `model_transparency.py` file. Depending on the image you produced and tagged, you will have to update the `image` value in corresponding the `components`. 54 | To generate a yaml from it, run: 55 | 56 | ```bash 57 | python3 model_transparency.py 58 | ``` 59 | 60 | This will update the `model_transparency.yaml` file. 61 | 62 | ## Run the pipeline 63 | 64 | ```bash 65 | kubectl apply -f model_transparency.yaml 66 | ``` 67 | -------------------------------------------------------------------------------- /model_signing/hashing/memory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from model_signing.hashing import memory 16 | 17 | 18 | class TestPrecomputedDigest: 19 | 20 | def test_hash_known_value(self): 21 | hasher = memory.SHA256(b"Test string") 22 | digest = hasher.compute() 23 | expected = ( 24 | "a3e49d843df13c2e2a7786f6ecd7e0d184f45d718d1ac1a8a63e570466e489dd" 25 | ) 26 | assert digest.digest_hex == expected 27 | 28 | def test_hash_update_twice_is_the_same_as_update_with_concatenation(self): 29 | str1 = "Test " 30 | str2 = "string" 31 | 32 | hasher1 = memory.SHA256() 33 | hasher1.update(str1.encode("utf-8")) 34 | hasher1.update(str2.encode("utf-8")) 35 | digest1 = hasher1.compute() 36 | 37 | str_all = str1 + str2 38 | hasher2 = memory.SHA256() 39 | hasher2.update(str_all.encode("utf-8")) 40 | digest2 = hasher2.compute() 41 | 42 | assert digest1.digest_hex == digest2.digest_hex 43 | assert digest1.digest_value == digest2.digest_value 44 | 45 | def test_hash_update_empty(self): 46 | hasher1 = memory.SHA256(b"Test string") 47 | hasher1.update(b"") 48 | digest1 = hasher1.compute() 49 | 50 | hasher2 = memory.SHA256(b"Test string") 51 | digest2 = hasher2.compute() 52 | 53 | assert digest1.digest_hex == digest2.digest_hex 54 | assert digest1.digest_value == digest2.digest_value 55 | 56 | def test_update_after_reset(self): 57 | hasher = memory.SHA256(b"Test string") 58 | digest1 = hasher.compute() 59 | hasher.reset() 60 | hasher.update(b"Test string") 61 | digest2 = hasher.compute() 62 | 63 | assert digest1.digest_hex == digest2.digest_hex 64 | assert digest1.digest_value == digest2.digest_value 65 | -------------------------------------------------------------------------------- /.github/workflows/validate_deps.yml: -------------------------------------------------------------------------------- 1 | name: Validate all Python dependencies work together 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: [main] 7 | types: [opened, synchronize] 8 | paths-ignore: 9 | - '**/*.md' 10 | - '*.md' 11 | 12 | permissions: {} 13 | 14 | defaults: 15 | run: 16 | shell: bash 17 | 18 | jobs: 19 | model-signing: 20 | name: Test model signing dependencies 21 | runs-on: ${{ matrix.os }} 22 | strategy: 23 | fail-fast: false # Don't cancel other jobs if one fails 24 | matrix: 25 | os: [ubuntu-latest, macos-latest, windows-latest] 26 | include: 27 | - os: macos-latest 28 | os_family: Darwin 29 | - os: ubuntu-latest 30 | os_family: Linux 31 | - os: windows-latest 32 | os_family: Windows 33 | steps: 34 | - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 35 | - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 36 | with: 37 | python-version: 3.11 38 | cache: pip 39 | cache-dependency-path: model_signing/install/requirements_${{ matrix.os_family }}.txt 40 | - name: Install dependencies 41 | run: | 42 | set -exuo pipefail 43 | python -m venv venv 44 | .github/workflows/scripts/venv_activate.sh 45 | python -m pip install --require-hashes -r model_signing/install/requirements_${{ matrix.os_family }}.txt 46 | 47 | slsa-for-ml: 48 | name: Test SLSA for ML demo dependencies 49 | runs-on: ${{ matrix.os }} 50 | strategy: 51 | fail-fast: false # Don't cancel other jobs if one fails 52 | matrix: 53 | os: [ubuntu-latest, macos-latest, windows-latest] 54 | include: 55 | - os: macos-latest 56 | os_family: Darwin 57 | - os: ubuntu-latest 58 | os_family: Linux 59 | - os: windows-latest 60 | os_family: Windows 61 | steps: 62 | - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 63 | - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 64 | with: 65 | python-version: 3.11 66 | cache: pip 67 | cache-dependency-path: slsa_for_models/install/requirements_${{ matrix.os_family }}.txt 68 | - name: Install dependencies 69 | run: | 70 | set -exuo pipefail 71 | python -m venv venv 72 | .github/workflows/scripts/venv_activate.sh 73 | python -m pip install --require-hashes -r slsa_for_models/install/requirements_${{ matrix.os_family }}.txt 74 | -------------------------------------------------------------------------------- /model_signing/hashing/hashing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Machinery for computing digests for a single object. 16 | 17 | We define an abstract `HashEngine` class which can be used in type annotations 18 | and is at the root of the hashing classes hierarchy. 19 | 20 | Since there are multiple hashing methods that we support, users should always 21 | specify the algorithm and the digest value. 22 | """ 23 | 24 | from abc import ABCMeta, abstractmethod 25 | from dataclasses import dataclass 26 | from typing import Protocol 27 | 28 | 29 | @dataclass(frozen=True) 30 | class Digest: 31 | """A digest computed by a `HashEngine`.""" 32 | 33 | algorithm: str 34 | digest_value: bytes 35 | 36 | @property 37 | def digest_hex(self) -> str: 38 | """Hexadecimal, human readable, equivalent of `digest`.""" 39 | return self.digest_value.hex() 40 | 41 | 42 | class HashEngine(metaclass=ABCMeta): 43 | """Generic hash engine.""" 44 | 45 | @abstractmethod 46 | def compute(self) -> Digest: 47 | """Computes the digest of data passed to the engine.""" 48 | pass 49 | 50 | @property 51 | @abstractmethod 52 | def digest_name(self) -> str: 53 | """The canonical name of the algorithm used to compute the hash. 54 | 55 | Subclasses MUST use the `digest_name()` method to record all parameters 56 | that influence the hash output. For example, if a file is split into 57 | shards which are hashed separately and the final digest value is 58 | computed by aggregating these hashes, then the shard size must be given 59 | in the output string. 60 | """ 61 | pass 62 | 63 | 64 | class Streaming(Protocol): 65 | """A protocol to support streaming data to `HashEngine` objects.""" 66 | 67 | @abstractmethod 68 | def update(self, data: bytes) -> None: 69 | """Appends additional bytes to the data to be hashed.""" 70 | pass 71 | 72 | @abstractmethod 73 | def reset(self, data: bytes = b"") -> None: 74 | """Resets the data to be hashed to the passed argument.""" 75 | pass 76 | 77 | 78 | class StreamingHashEngine(Streaming, HashEngine): 79 | """A `HashEngine` that can stream data to be hashed.""" 80 | 81 | pass 82 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ "main" ] 9 | schedule: 10 | - cron: '30 22 * * 4' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | # Runner size impacts CodeQL analysis time. To learn more, please see: 16 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 17 | # - https://gh.io/supported-runners-and-hardware-resources 18 | # - https://gh.io/using-larger-runners 19 | # Consider using larger runners for possible analysis time improvements. 20 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 21 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 22 | permissions: 23 | actions: read 24 | contents: read 25 | security-events: write 26 | 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | language: [ 'python' ] 31 | 32 | steps: 33 | - name: Checkout repository 34 | uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 35 | 36 | # Initializes the CodeQL tools for scanning. 37 | - name: Initialize CodeQL 38 | uses: github/codeql-action/init@f9a7c6738f28efb36e31d49c53a201a9c5d6a476 # v2.14.2 39 | with: 40 | languages: ${{ matrix.language }} 41 | # If you wish to specify custom queries, you can do so here or in a config file. 42 | # By default, queries listed here will override any specified in a config file. 43 | # Prefix the list here with "+" to use these queries and those in the config file. 44 | 45 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 46 | # queries: security-extended,security-and-quality 47 | 48 | 49 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 50 | # If this step fails, then you should remove it and run the build manually (see below) 51 | - name: Autobuild 52 | uses: github/codeql-action/autobuild@f9a7c6738f28efb36e31d49c53a201a9c5d6a476 # v2.14.2 53 | 54 | # ℹ️ Command-line programs to run using the OS shell. 55 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 56 | 57 | # If the Autobuild fails above, remove it and uncomment the following three lines. 58 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 59 | 60 | # - run: | 61 | # echo "Run, Build Application using script" 62 | # ./location_of_script_within_repo/buildscript.sh 63 | 64 | - name: Perform CodeQL Analysis 65 | uses: github/codeql-action/analyze@f9a7c6738f28efb36e31d49c53a201a9c5d6a476 # v2.14.2 66 | with: 67 | category: "/language:${{matrix.language}}" 68 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. They are provided 2 | # by a third-party and are governed by separate terms of service, privacy 3 | # policy, and support documentation. 4 | 5 | name: Scorecard supply-chain security 6 | on: 7 | # For Branch-Protection check. Only the default branch is supported. See 8 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 9 | branch_protection_rule: 10 | # To guarantee Maintained check is occasionally updated. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 12 | schedule: 13 | - cron: '37 21 * * 0' 14 | push: 15 | branches: [ "main" ] 16 | 17 | # Declare default permissions as read only. 18 | permissions: read-all 19 | 20 | jobs: 21 | analysis: 22 | name: Scorecard analysis 23 | runs-on: ubuntu-latest 24 | permissions: 25 | # Needed to upload the results to code-scanning dashboard. 26 | security-events: write 27 | # Needed to publish results and get a badge (see publish_results below). 28 | id-token: write 29 | # Uncomment the permissions below if installing in a private repository. 30 | # contents: read 31 | # actions: read 32 | 33 | steps: 34 | - name: "Checkout code" 35 | uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 36 | with: 37 | persist-credentials: false 38 | 39 | - name: "Run analysis" 40 | uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # v2.3.3 41 | with: 42 | results_file: results.sarif 43 | results_format: sarif 44 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 45 | # - you want to enable the Branch-Protection check on a *public* repository, or 46 | # - you are installing Scorecard on a *private* repository 47 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. 48 | # repo_token: ${{ secrets.SCORECARD_TOKEN }} 49 | 50 | # Public repositories: 51 | # - Publish results to OpenSSF REST API for easy access by consumers 52 | # - Allows the repository to include the Scorecard badge. 53 | # - See https://github.com/ossf/scorecard-action#publishing-results. 54 | # For private repositories: 55 | # - `publish_results` will always be set to `false`, regardless 56 | # of the value entered here. 57 | publish_results: true 58 | 59 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 60 | # format to the repository Actions tab. 61 | - name: "Upload artifact" 62 | uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 63 | with: 64 | name: SARIF file 65 | path: results.sarif 66 | retention-days: 5 67 | 68 | # Upload the results to GitHub's code scanning dashboard. 69 | - name: "Upload to code-scanning" 70 | uses: github/codeql-action/upload-sarif@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # v2.2.4 71 | with: 72 | sarif_file: results.sarif 73 | -------------------------------------------------------------------------------- /slsa_for_models/github_actions.md: -------------------------------------------------------------------------------- 1 | ## SLSA for Models in GitHub Actions 2 | 3 | This example uses [SLSA L3 GitHub generator][slsa-generator] to generate SLSA 4 | provenance for ML models in GitHub Actions. This happens during a 5 | [workflow][workflow] which takes as input the format to save the model into. 6 | 7 | When users download a given version of a model they can also check its 8 | provenance by using [the SLSA verifier][slsa-verifier] repository. 9 | 10 | To test, fork this repository, then head over to the Actions tab and select the 11 | "SLSA for ML models example" workflow. Since the workflow has a 12 | `workflow_dispatch` trigger, it can be invoked on demand: click the `Run 13 | workflow` button, then select the value for the "Name of the model" argument. 14 | 15 | ![Triggering a SLSA workflow](images/slsa_trigger.png) 16 | 17 | The supported formats are: 18 | 19 | | Workflow Argument | Training Framework | Model format | 20 | |------------------------------|--------------------|---------------------------------| 21 | | `tensorflow_model.keras` | TensorFlow | Keras format (default) | 22 | | `tensorflow_hdf5_model.h5` | TensorFlow | Legacy HDF5 format | 23 | | `tensorflow_hdf5.weights.h5` | TensorFlow | Legacy HDF5 weights only format | 24 | | `pytorch_model.pth` | PyTorch | PyTorch default format | 25 | | `pytorch_full_model.pth` | PyTorch | PyTorch complete model format | 26 | | `pytorch_jitted_model.pt` | PyTorch | PyTorch TorchScript format | 27 | 28 | After the workflow finishes execution, there will be two archives in the 29 | "Artifacts" section: one is the model that was trained and the other one is the 30 | SLSA provenance attached to the model. 31 | 32 | ![Results of running a SLSA workflow](images/slsa_results.png) 33 | 34 | To verify the provenance, download both archives, unzip each and then run 35 | `slsa-verifier`, making sure to replace the `--source-uri` argument with the 36 | _path to your fork_. For example, for a PyTorch model, which has been [built on 37 | this repository](https://github.com/sigstore/model-transparency/actions/runs/6646816974): 38 | 39 | ```bash 40 | [...]$ slsa-verifier verify-artifact \ 41 | --provenance-path pytorch_model.pth.intoto.jsonl \ 42 | --source-uri github.com/sigstore/model-transparency \ 43 | pytorch_model.pth 44 | Verified signature against tlog entry index 45419124 at URL: https://rekor.sigstore.dev/api/v1/log/entries/24296fb24b8ad77a98dd03d23a78657e7f1efd3d9bea6988abbf23a72290a4ec7dc35c9edeab7ee1 45 | Verified build using builder "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@refs/tags/v1.9.0" at commit ac26cbf66849cfec6f29747f4525180595c7eef0 46 | Verifying artifact pytorch_model.pth: PASSED 47 | 48 | PASSED: Verified SLSA provenance 49 | ``` 50 | 51 | The verification of provenance can be done just before the model gets loaded in 52 | the serving pipeline. 53 | 54 | [cifar10]: https://www.cs.toronto.edu/~kriz/cifar.html 55 | [slsa-generator]: https://github.com/slsa-framework/slsa-github-generator 56 | [slsa-verifier]: https://github.com/slsa-framework/slsa-verifier/ 57 | [slsa]: https://slsa.dev 58 | [solarwinds]: https://www.techtarget.com/whatis/feature/SolarWinds-hack-explained-Everything-you-need-to-know 59 | [tekton-chains]: https://github.com/tektoncd/chains 60 | [tekton-kubeflow]: https://www.kubeflow.org/docs/components/pipelines/v1/sdk/pipelines-with-tekton/ 61 | [workflow]: https://github.com/sigstore/model-transparency/blob/main/.github/workflows/slsa_for_ml.yml 62 | -------------------------------------------------------------------------------- /.github/workflows/slsa_for_ml.yml: -------------------------------------------------------------------------------- 1 | name: SLSA for ML models example 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | model_type: 6 | description: Name of the model (implies framework) 7 | required: true 8 | type: choice 9 | options: 10 | - tensorflow_model.keras 11 | - tensorflow_hdf5_model.h5 12 | - tensorflow_hdf5.weights.h5 13 | - pytorch_model.pth 14 | - pytorch_full_model.pth 15 | - pytorch_jitted_model.pt 16 | pull_request: 17 | branches: [main] 18 | types: [opened, synchronize] 19 | paths-ignore: 20 | - '**/*.md' 21 | - '*.md' 22 | 23 | permissions: read-all 24 | 25 | defaults: 26 | run: 27 | shell: bash 28 | 29 | jobs: 30 | train: 31 | name: Train model 32 | runs-on: ${{ matrix.os }} 33 | strategy: 34 | fail-fast: false # Don't cancel other jobs if one fails 35 | matrix: 36 | os: [ubuntu-latest, macos-latest, windows-latest] 37 | include: 38 | - os: macos-latest 39 | os_family: Darwin 40 | - os: ubuntu-latest 41 | os_family: Linux 42 | - os: windows-latest 43 | os_family: Windows 44 | outputs: 45 | hash-ubuntu-latest: ${{ steps.hash.outputs.hash-ubuntu-latest }} 46 | hash-macos-latest: ${{ steps.hash.outputs.hash-macos-latest }} 47 | hash-windows-latest: ${{ steps.hash.outputs.hash-windows-latest }} 48 | steps: 49 | - run: git config --global core.autocrlf input 50 | - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 51 | - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 52 | with: 53 | python-version: 3.11 54 | cache: pip 55 | cache-dependency-path: slsa_for_models/install/requirements_${{ matrix.os_family }}.txt 56 | - name: Install dependencies 57 | run: | 58 | set -exuo pipefail 59 | python -m venv venv 60 | .github/workflows/scripts/venv_activate.sh 61 | python -m pip install --require-hashes -r slsa_for_models/install/requirements_${{ matrix.os_family }}.txt 62 | - name: Build model 63 | env: 64 | MODEL_TYPE: ${{ github.event.inputs.model_type || 'pytorch_jitted_model.pt' }} 65 | run: | 66 | set -exuo pipefail 67 | python -m venv venv 68 | .github/workflows/scripts/venv_activate.sh 69 | python slsa_for_models/main.py "$MODEL_TYPE" 70 | - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 71 | with: 72 | path: ${{ github.event.inputs.model_type || 'pytorch_jitted_model.pt' }} 73 | name: ${{ github.event.inputs.model_type || 'pytorch_jitted_model.pt' }}_${{ matrix.os_family }} 74 | if-no-files-found: error 75 | - id: hash 76 | env: 77 | MODEL: ${{ github.event.inputs.model_type || 'pytorch_jitted_model.pt' }} 78 | run: | 79 | set -euo pipefail 80 | (sha256sum -t "$MODEL" || shasum -a 256 "$MODEL") > checksum 81 | echo "hash-${{ matrix.os }}=$(base64 -w0 checksum || base64 checksum)" >> "${GITHUB_OUTPUT}" 82 | 83 | provenance: 84 | # TODO(mihaimaruseac): Don't run on pull requests for now 85 | if: ${{ github.event_name != 'pull_request' }} 86 | needs: [train] 87 | strategy: 88 | fail-fast: false # Don't cancel other jobs if one fails 89 | matrix: 90 | os: [ubuntu-latest, macos-latest, windows-latest] 91 | permissions: 92 | actions: read 93 | id-token: write 94 | contents: write 95 | uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0 96 | with: 97 | base64-subjects: "${{ needs.train.outputs[format('hash-{0}', matrix.os)] }}" 98 | upload-assets: true # NOTE: This does nothing unless 'upload-tag-name' parameter is also set to an existing tag 99 | -------------------------------------------------------------------------------- /slsa_for_models/gcp/pipeline.yml: -------------------------------------------------------------------------------- 1 | apiVersion: tekton.dev/v1 2 | kind: Pipeline 3 | metadata: 4 | name: slsa-for-models 5 | spec: 6 | workspaces: 7 | - name: shared 8 | params: 9 | - name: tool-versions 10 | properties: 11 | python: {} 12 | gcloud: {} 13 | default: 14 | python: '3.11' 15 | gcloud: 'slim' 16 | - name: model-source 17 | properties: 18 | url: {} 19 | revision: {} 20 | requirements-path: {} 21 | main-path: {} 22 | default: 23 | url: 'https://github.com/sigstore/model-transparency' 24 | revision: 'main' 25 | requirements-path: 'slsa_for_models/install/requirements_Linux.txt' 26 | main-path: 'slsa_for_models/main.py' 27 | - name: model-name 28 | enum: 29 | - 'tensorflow_model.keras' 30 | - 'tensorflow_hdf5_model.h5' 31 | - 'tensorflow_hdf5.weights.h5' 32 | - 'pytorch_model.pth' 33 | - 'pytorch_full_model.pth' 34 | - 'pytorch_jitted_model.pt' 35 | - name: model-storage 36 | properties: 37 | package: {} 38 | location: {} 39 | repository: {} 40 | results: 41 | - name: source_ARTIFACT_INPUTS 42 | value: $(tasks.git-clone.results.source_ARTIFACT_INPUTS[*]) 43 | - name: model_ARTIFACT_OUTPUTS 44 | value: $(tasks.upload-model.results.model_ARTIFACT_OUTPUTS[*]) 45 | tasks: 46 | - name: git-clone 47 | workspaces: 48 | - name: output 49 | workspace: shared 50 | params: 51 | - name: url 52 | value: $(params.model-source.url) 53 | - name: revision 54 | value: $(params.model-source.revision) 55 | taskRef: 56 | resolver: git 57 | params: 58 | - name: url 59 | value: https://github.com/sigstore/model-transparency.git 60 | - name: revision 61 | value: $(params.model-source.revision) 62 | - name: pathInRepo 63 | value: slsa_for_models/gcp/tasks/git-clone.yml 64 | - name: build-model 65 | runAfter: 66 | - git-clone 67 | workspaces: 68 | - name: source 69 | workspace: shared 70 | params: 71 | - name: model-name 72 | value: $(params.model-name) 73 | - name: model-source 74 | value: 75 | requirements-path: $(params.model-source.requirements-path) 76 | main-path: $(params.model-source.main-path) 77 | - name: python-version 78 | value: $(params.tool-versions.python) 79 | taskRef: 80 | resolver: git 81 | params: 82 | - name: url 83 | value: https://github.com/sigstore/model-transparency.git 84 | - name: revision 85 | value: $(params.model-source.revision) 86 | - name: pathInRepo 87 | value: slsa_for_models/gcp/tasks/build-model.yml 88 | - name: upload-model 89 | runAfter: 90 | - build-model 91 | workspaces: 92 | - name: shared 93 | params: 94 | - name: config 95 | value: 96 | package: $(params.model-storage.package) 97 | version: $(tasks.build-model.results.digest) 98 | source: $(params.model-name) 99 | location: $(params.model-storage.location) 100 | repository: $(params.model-storage.repository) 101 | - name: tool-versions 102 | value: 103 | gcloud: $(params.tool-versions.gcloud) 104 | taskRef: 105 | resolver: git 106 | params: 107 | - name: url 108 | value: https://github.com/sigstore/model-transparency.git 109 | - name: revision 110 | value: $(params.model-source.revision) 111 | - name: pathInRepo 112 | value: slsa_for_models/gcp/tasks/upload-model.yml 113 | -------------------------------------------------------------------------------- /slsa_for_models/README.md: -------------------------------------------------------------------------------- 1 | # SLSA for Models 2 | 3 | This project shows how we can generate [SLSA][slsa] provenance for ML models 4 | on [GitHub Actions][gha] and [Google Cloud Platform][gcp]. 5 | 6 | SLSA was originally developed for traditional software to protect against 7 | tampering with builds, such as in the [Solarwinds attack][solarwinds], and 8 | this project is a proof of concept that the _same supply chain protections 9 | can be applied to ML_. 10 | 11 | When users download a given version of a model they can also check its provenance. 12 | This can be integrated in the model hub and/or model serving platforms: for example 13 | the model serving pipeline could validate provenance for all new models before 14 | serving them. However, the verification can also be done manually, on demand. 15 | 16 | As an additional benefit, having provenance for a model allows users to react 17 | to vulnerabilities in a training framework: they can quickly determine if a 18 | model needs to be retrained because it was created using a vulnerable version. 19 | 20 | See the guides for [GitHub Actions][gha] and [Google Cloud Platform][gcp] for details. 21 | 22 | ## Models 23 | 24 | We support both TensorFlow and PyTorch models. The example repo trains a model 25 | on [CIFAR10][cifar10] dataset, saves it in one of the supported formats, and 26 | generates provenance for the output. The supported formats are: 27 | 28 | | Workflow Argument | Training Framework | Model format | 29 | |------------------------------|--------------------|---------------------------------| 30 | | `tensorflow_model.keras` | TensorFlow | Keras format (default) | 31 | | `tensorflow_hdf5_model.h5` | TensorFlow | Legacy HDF5 format | 32 | | `tensorflow_hdf5.weights.h5` | TensorFlow | Legacy HDF5 weights only format | 33 | | `pytorch_model.pth` | PyTorch | PyTorch default format | 34 | | `pytorch_full_model.pth` | PyTorch | PyTorch complete model format | 35 | | `pytorch_jitted_model.pt` | PyTorch | PyTorch TorchScript format | 36 | 37 | While most of the ML models are currently too expensive to train, future work will 38 | cover the training of ML models that require access to accelerators (i.e., GPUs, TPUs) 39 | or that require multiple hours for training. 40 | 41 | ## Future Work 42 | 43 | ### Accelerators 44 | Future work will involve covering training ML models that require access to 45 | accelerators (i.e., GPUs, TPUs). 46 | 47 | ### Platforms 48 | While our examples have targeted GitHub Actions and Tekton in GCP, we aim to bring 49 | support for other platforms (e.g., GCB and GitLab) and model training environments. 50 | 51 | ### Directory Format 52 | TensorFlow also supports saving models in `SavedModel` format. This is 53 | a directory-based serialization format and currently we don't fully support 54 | this. We can generate SLSA provenance for all the files in the directory but 55 | there are caveats regarding verification. Furthermore, because there is a 56 | difference between the hashes generated by provenance and the hash generated 57 | during model signing, we have decided to add support for these model formats at 58 | a future time, after standardizing a way to generate and verify provenance in 59 | SLSA (in general, not just for ML). 60 | 61 | [cifar10]: https://www.cs.toronto.edu/~kriz/cifar.html 62 | [slsa-generator]: https://github.com/slsa-framework/slsa-github-generator 63 | [slsa-verifier]: https://github.com/slsa-framework/slsa-verifier/ 64 | [slsa]: https://slsa.dev 65 | [solarwinds]: https://www.techtarget.com/whatis/feature/SolarWinds-hack-explained-Everything-you-need-to-know 66 | [tekton-chains]: https://github.com/tektoncd/chains 67 | [tekton-kubeflow]: https://www.kubeflow.org/docs/components/pipelines/v1/sdk/pipelines-with-tekton/ 68 | [workflow]: https://github.com/sigstore/model-transparency/blob/main/.github/workflows/slsa_for_ml.yml 69 | [gha]: github_actions.md 70 | [gcp]: gcp 71 | -------------------------------------------------------------------------------- /model_signing/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | import sys 17 | from pathlib import Path 18 | import model 19 | 20 | 21 | # https://github.com/sigstore/sigstore-python/issues/661 22 | # contains the logic to start the web browser. 23 | 24 | def readOptions(): 25 | parser = argparse.ArgumentParser("CLI for signing AI models") 26 | subcommands = parser.add_subparsers(required=True, dest="subcommand") 27 | 28 | # TODO: option for a path to store the signature. 29 | # Sign group. 30 | sign = subcommands.add_parser( 31 | "sign", formatter_class=argparse.ArgumentDefaultsHelpFormatter 32 | ) 33 | sign.add_argument("--path", required=True, help="The path to sign") 34 | sign.add_argument("--disable-ambient", required=False, 35 | default=False, action='store_true', 36 | help="Auto detect ambient authority") 37 | 38 | # Verify group. 39 | verify = subcommands.add_parser( 40 | "verify", formatter_class=argparse.ArgumentDefaultsHelpFormatter 41 | ) 42 | verify.add_argument("--path", required=True, 43 | help="The path to a file to verify") 44 | verify.add_argument("--identity", required=True, 45 | help="The identity (email, workload identity) to " + 46 | "verify") 47 | verify.add_argument("--identity-provider", required=True, 48 | help="The OIDC provider to verify") 49 | 50 | args = parser.parse_args() 51 | return args 52 | 53 | 54 | def signature_path(modelfn: Path) -> Path: 55 | if modelfn.is_file(): 56 | return Path(modelfn.parent).joinpath(f"{modelfn.name}.sig") 57 | return modelfn.joinpath("model.sig") 58 | 59 | 60 | def ignored_paths(modelfn: Path) -> [Path]: 61 | if modelfn.is_file(): 62 | return [] 63 | return [modelfn.joinpath(".git")] 64 | 65 | 66 | # Sign function 67 | def sign(modelfn: Path, disable_ambient: bool) -> model.SignatureResult: 68 | signer = model.SigstoreSigner(disable_ambient=disable_ambient) 69 | return signer.sign(modelfn, signature_path(modelfn), 70 | ignored_paths(modelfn)) 71 | 72 | 73 | def verify(modelfn: Path, issuer: str, identity: str, 74 | offline=False) -> model.VerificationResult: 75 | verifier = model.SigstoreVerifier(oidc_provider=issuer, identity=identity) 76 | return verifier.verify(modelfn, signature_path(modelfn), 77 | ignored_paths(modelfn), offline) 78 | 79 | 80 | def main(args) -> int: 81 | if args.subcommand == "sign": 82 | result = sign(Path(args.path), disable_ambient=args.disable_ambient) 83 | if result: 84 | print("signature success") 85 | else: 86 | print(f"signature failure: {str(result)}") 87 | return -1 88 | elif args.subcommand == "verify": 89 | modelfn = Path(args.path) 90 | result = verify(modelfn=modelfn, 91 | issuer=args.identity_provider, 92 | identity=args.identity) 93 | if result: 94 | print("verification success") 95 | else: 96 | print(f"verification failure: {str(result)}") 97 | return -1 98 | return 0 99 | 100 | 101 | if __name__ == '__main__': 102 | args = readOptions() 103 | 104 | sys.exit(main(args)) 105 | -------------------------------------------------------------------------------- /.github/workflows/pin_deps.yml: -------------------------------------------------------------------------------- 1 | name: Pin dependencies 2 | on: 3 | workflow_dispatch: 4 | schedule: 5 | - cron: '0 0 * * TUE' # run every Tuesday at midnight 6 | 7 | permissions: {} 8 | 9 | defaults: 10 | run: 11 | shell: bash 12 | 13 | jobs: 14 | pin: 15 | name: Generate dependency lock 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | fail-fast: false # Don't cancel other jobs if one fails 19 | matrix: 20 | os: [ubuntu-latest, macos-latest, windows-latest] 21 | include: 22 | - os: ubuntu-latest 23 | os_family: Linux 24 | - os: macos-latest 25 | os_family: Darwin 26 | - os: windows-latest 27 | os_family: Windows 28 | steps: 29 | - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 30 | - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 31 | with: 32 | python-version: 3.11 33 | cache: pip 34 | cache-dependency-path: | 35 | model_signing/install/requirements_${{ matrix.os_family }}.txt 36 | model_signing/install/requirements_test_${{ matrix.os_family }}.txt 37 | slsa_for_models/install/requirements_${{ matrix.os_family }}.txt 38 | - name: Create an empty virtualenv and install `pip-tools` 39 | run: | 40 | set -exuo pipefail 41 | python -m venv venv 42 | .github/workflows/scripts/venv_activate.sh 43 | pip install pip-tools 44 | pip list # For debugging 45 | - name: Use `pip-compile` to generate all freeze files 46 | run: | 47 | set -exuo pipefail 48 | .github/workflows/scripts/venv_activate.sh 49 | pip-compile --upgrade --generate-hashes --strip-extras --output-file=model_signing/install/requirements_${{ matrix.os_family }}.txt model_signing/install/requirements.in 50 | pip-compile --upgrade --generate-hashes --strip-extras --output-file=model_signing/install/requirements_test_${{ matrix.os_family }}.txt model_signing/install/requirements_test.in 51 | pip-compile --upgrade --generate-hashes --strip-extras --output-file=slsa_for_models/install/requirements_${{ matrix.os_family }}.txt slsa_for_models/install/requirements.in 52 | - name: Test freeze file (for model signing) 53 | run: | 54 | set -exuo pipefail 55 | rm -rf venv # Need clean sandbox 56 | python -m venv venv 57 | .github/workflows/scripts/venv_activate.sh 58 | pip install -r model_signing/install/requirements_${{ matrix.os_family }}.txt 59 | pip list # For debugging 60 | - name: Test freeze file (for testing model signing) 61 | run: | 62 | set -exuo pipefail 63 | rm -rf venv # Need clean sandbox 64 | python -m venv venv 65 | .github/workflows/scripts/venv_activate.sh 66 | pip install -r model_signing/install/requirements_test_${{ matrix.os_family }}.txt 67 | pip list # For debugging 68 | - name: Test freeze file (for SLSA for models) 69 | run: | 70 | set -exuo pipefail 71 | rm -rf venv # Need clean sandbox 72 | python -m venv venv 73 | .github/workflows/scripts/venv_activate.sh 74 | pip install -r slsa_for_models/install/requirements_${{ matrix.os_family }}.txt 75 | pip list # For debugging 76 | - name: Upload freeze files 77 | uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 78 | with: 79 | name: freeze-files-${{ matrix.os }} 80 | path: ./*/install/requirements*${{ matrix.os_family }}*txt 81 | 82 | # Separate PR creation job to make sure it creates only one single PR with 83 | # all changed files, eliminate race-conditions and restrict permissions only 84 | # to this specific job. 85 | create-pr: 86 | needs: [pin] 87 | runs-on: ubuntu-latest 88 | permissions: 89 | contents: write 90 | pull-requests: write 91 | steps: 92 | - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 93 | - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 94 | with: 95 | path: . 96 | merge-multiple: true 97 | - name: Create dependent PR with dependency changes 98 | uses: peter-evans/create-pull-request@6d6857d36972b65feb161a90e484f2984215f83e # v6.0.5 99 | with: 100 | title: "Update frozen python dependencies" 101 | commit-message: "Bump frozen dependencies" 102 | signoff: true 103 | delete-branch: true 104 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to a repository in the Sigstore organization, please first discuss the change you wish 4 | to make via an issue in the repository. 5 | 6 | ## Pull Request Process 7 | 8 | 1. Create an issue in the repository outlining the fix or feature. 9 | 2. Fork the repository to your own GitHub account and clone it locally. 10 | 3. Complete and test the change. 11 | 4. If relevant, update documentation with details of the change. This includes updates to an API, new environment 12 | variables, exposed ports, useful file locations, CLI parameters and 13 | new or changed configuration values. 14 | 5. Correctly format your commit message - See [Commit Messages](#commit-message-guidelines) 15 | below. 16 | 6. Sign off your commit. 17 | 7. Ensure that CI passes. If it fails, fix the failures. 18 | 8. Every pull request requires a review from the Sigstore subprojects MAINTAINERS. 19 | 9. If your pull request consists of more than one commit, please squash your 20 | commits as described in [Squash Commits](#squash-commits), or the commits 21 | will be squashed on merge. 22 | 23 | ## Commit Message Guidelines 24 | 25 | We follow the commit formatting recommendations found on [Chris Beams' How to Write a Git Commit Message article](https://chris.beams.io/posts/git-commit/). 26 | 27 | Well formed commit messages not only help reviewers understand the nature of 28 | the Pull Request, but also assists the release process where commit messages 29 | are used to generate release notes. 30 | 31 | A good example of a commit message would be as follows: 32 | 33 | ``` 34 | Summarize changes in around 50 characters or less 35 | 36 | More detailed explanatory text, if necessary. Wrap it to about 72 37 | characters or so. In some contexts, the first line is treated as the 38 | subject of the commit and the rest of the text as the body. The 39 | blank line separating the summary from the body is critical (unless 40 | you omit the body entirely); various tools like `log`, `shortlog` 41 | and `rebase` can get confused if you run the two together. 42 | 43 | Explain the problem that this commit is solving. Focus on why you 44 | are making this change as opposed to how (the code explains that). 45 | Are there side effects or other unintuitive consequences of this 46 | change? Here's the place to explain them. 47 | 48 | Further paragraphs come after blank lines. 49 | 50 | - Bullet points are okay, too 51 | 52 | - Typically a hyphen or asterisk is used for the bullet, preceded 53 | by a single space, with blank lines in between, but conventions 54 | vary here 55 | 56 | If you use an issue tracker, put references to them at the bottom, 57 | like this: 58 | 59 | Resolves: #123 60 | See also: #456, #789 61 | ``` 62 | 63 | Note the `Resolves #123` tag, this references the issue raised and allows us to 64 | ensure issues are associated and closed when a pull request is merged. 65 | 66 | Please refer to [the github help page on message types](https://help.github.com/articles/closing-issues-using-keywords/) 67 | for a complete list of issue references. 68 | 69 | ## Squash Commits 70 | 71 | Should your pull request consist of more than one commit (perhaps due to 72 | a change being requested during the review cycle), please perform a git squash 73 | once a reviewer has approved your pull request. 74 | 75 | A squash can be performed as follows. Let's say you have the following commits: 76 | 77 | initial commit 78 | second commit 79 | final commit 80 | 81 | Run the command below with the number set to the total commits you wish to 82 | squash (in our case 3 commits): 83 | 84 | git rebase -i HEAD~3 85 | 86 | You default text editor will then open up and you will see the following:: 87 | 88 | pick eb36612 initial commit 89 | pick 9ac8968 second commit 90 | pick a760569 final commit 91 | 92 | # Rebase eb1429f..a760569 onto eb1429f (3 commands) 93 | 94 | We want to rebase on top of our first commit, so we change the other two commits 95 | to `squash`: 96 | 97 | pick eb36612 initial commit 98 | squash 9ac8968 second commit 99 | squash a760569 final commit 100 | 101 | After this, should you wish to update your commit message to better summarise 102 | all of your pull request, run: 103 | 104 | git commit --amend 105 | 106 | You will then need to force push (assuming your initial commit(s) were posted 107 | to github): 108 | 109 | git push origin your-branch --force 110 | 111 | Alternatively, a core member can squash your commits within Github. 112 | 113 | ## Code of Conduct 114 | 115 | Sigstore adheres to and enforces the [Contributor Covenant](http://contributor-covenant.org/version/1/4/) Code of Conduct. 116 | Please take a moment to read the [CODE_OF_CONDUCT.md](https://github.com/sigstore/community/blob/main/CODE_OF_CONDUCT.md) document. 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Model Transparency 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | - [Overview](#overview) 10 | - [Projects](#projects) 11 | - [Model Signing](#model-signing) 12 | - [SLSA for ML](#slsa-for-ml) 13 | - [Status](#status) 14 | - [Contributing](#contributing) 15 | 16 | 17 | 18 | ## Overview 19 | 20 | There is currently significant growth in the number of ML-powered applications. 21 | This brings benefits, but it also provides grounds for attackers to exploit 22 | unsuspecting ML users. This is why Google launched the [Secure AI Framework 23 | (SAIF)][saif] to establish industry standards for creating trustworthy and 24 | responsible AI applications. The first principle of SAIF is to 25 | 26 | > Expand strong security foundations to the AI ecosystem 27 | 28 | Building on the work with [Open Source Security Foundation][openssf], we are 29 | creating this repository to demonstrate how the ML supply chain can be 30 | strengthened in _the same way_ as the traditional software supply chain. 31 | 32 | This repository hosts a collection of utilities and examples related to the 33 | security of machine learning pipelines. The focus is on providing *verifiable* 34 | claims about the integrity and provenance of the resulting models, meaning users 35 | can check for themselves that these claims are true rather than having to just 36 | trust the model trainer. 37 | 38 | ## Projects 39 | 40 | Currently, there are two main projects in the repository: model signing (to 41 | prevent tampering of models after publication to ML model hubs) and 42 | [SLSA](https://slsa.dev/) (to prevent tampering of models during the build 43 | process). 44 | 45 | ### Model Signing 46 | 47 | This project demonstrates how to protect the integrity of a model by signing it 48 | with [Sigstore](https://www.sigstore.dev/), a tool for making code signatures 49 | transparent without requiring management of cryptographic key material. 50 | 51 | When users download a given version of a signed model they can check that the 52 | signature comes from a known or trusted identity and thus that the model hasn't 53 | been tampered with after training. 54 | 55 | We are able to sign large models with very good performance, as the following 56 | table shows: 57 | 58 | | Model | Size | Sign Time | Verify Time | 59 | |--------------------|-------|:----------:|:-----------:| 60 | | roberta-base-11 | 8K | 1s | 0.6s | 61 | | hustvl/YOLOP | 215M | 1s | 1s | 62 | | bertseq2seq | 2.8G | 1.9s | 1.4s | 63 | | bert-base-uncased | 3.3G | 1.6s | 1.1s | 64 | | tiiuae/falcon-7b | 14GB | 2.1s | 1.8s | 65 | 66 | See [model_signing/README.md](model_signing/README.md) for more information. 67 | 68 | ### SLSA for ML 69 | 70 | This project shows how we can generate [SLSA][slsa] provenance for ML models, 71 | using either Github Actions or Google Cloud Platform. 72 | 73 | SLSA was originally developed for traditional software to protect against 74 | tampering with builds, such as in the [Solarwinds attack][solarwinds], and 75 | this project is a proof of concept that the same supply chain protections 76 | can be applied to ML. 77 | 78 | We support both TensorFlow and PyTorch models. The examples train a model 79 | on [CIFAR10][cifar10] dataset, save it in one of the supported formats, and 80 | generate provenance for the output. The supported formats are: 81 | 82 | | Workflow Argument | Training Framework | Model format | 83 | |------------------------------|--------------------|---------------------------------| 84 | | `tensorflow_model.keras` | TensorFlow | Keras format (default) | 85 | | `tensorflow_hdf5_model.h5` | TensorFlow | Legacy HDF5 format | 86 | | `tensorflow_hdf5.weights.h5` | TensorFlow | Legacy HDF5 weights only format | 87 | | `pytorch_model.pth` | PyTorch | PyTorch default format | 88 | | `pytorch_full_model.pth` | PyTorch | PyTorch complete model format | 89 | | `pytorch_jitted_model.pt` | PyTorch | PyTorch TorchScript format | 90 | 91 | See [slsa_for_models/README.md](slsa_for_models/README.md) for more information. 92 | 93 | ## Status 94 | 95 | This project is currently experimental, not ready for all production use-cases. 96 | We may make breaking changes until the first official release. 97 | 98 | ## Contributing 99 | 100 | Please see the [Contributor Guide](CONTRIBUTING.md) for more information. 101 | 102 | [slsa]: https://slsa.dev/ 103 | [saif]: https://blog.google/technology/safety-security/introducing-googles-secure-ai-framework/ 104 | [openssf]: https://openssf.org/ 105 | [slsa-generator]: https://github.com/slsa-framework/slsa-github-generator 106 | [solarwinds]: https://www.techtarget.com/whatis/feature/SolarWinds-hack-explained-Everything-you-need-to-know 107 | -------------------------------------------------------------------------------- /slsa_for_models/kubeflow/model_transparency.py: -------------------------------------------------------------------------------- 1 | import kfp_tekton 2 | from kfp import dsl, components 3 | from kubernetes.client.models import ( 4 | V1PersistentVolumeClaimSpec, 5 | V1ResourceRequirements, 6 | ) 7 | import json 8 | 9 | 10 | def git_clone(url: str, target: str): 11 | return components.load_component_from_text( 12 | """ 13 | name: git-clone 14 | description: Git clone 15 | inputs: 16 | - {name: url, type: String} 17 | - {name: target, type: Directory} 18 | outputs: 19 | - {name: CHAINS-GIT_COMMIT, type: String} 20 | - {name: CHAINS-GIT_URL, type: String} 21 | implementation: 22 | container: 23 | image: chitrangpatel/git-clone 24 | command: 25 | - ./clone.sh 26 | args: 27 | - -u 28 | - {inputValue: url} 29 | - -c 30 | - {outputPath: CHAINS-GIT_COMMIT} 31 | - -p 32 | - {outputPath: CHAINS-GIT_URL} 33 | - -t 34 | - {inputValue: target} 35 | """ 36 | )(url=url, target=target) 37 | 38 | 39 | def build_model(requirements: str, source: str, model: str, workDir: str): 40 | return components.load_component_from_text( 41 | """ 42 | name: build-model 43 | description: Build Model 44 | inputs: 45 | - {name: requirements, type: String} 46 | - {name: source, type: String} 47 | - {name: model, type: String} 48 | - {name: work, type: String} 49 | outputs: 50 | - {name: digest, type: String} 51 | implementation: 52 | container: 53 | image: chitrangpatel/build-model 54 | command: 55 | - ./build.sh 56 | args: 57 | - -r 58 | - {inputValue: requirements} 59 | - -w 60 | - {inputValue: work} 61 | - -s 62 | - {inputValue: source} 63 | - -m 64 | - {inputValue: model} 65 | - -d 66 | - {outputPath: digest} 67 | """ 68 | )(requirements=requirements, source=source, model=model, work=workDir) 69 | 70 | 71 | def upload_model(location: str, source: str, workDir: str): 72 | return components.load_component_from_text( 73 | """ 74 | name: upload-model 75 | description: Upload Model 76 | inputs: 77 | - {name: location, type: String} 78 | - {name: source, type: String} 79 | - {name: work, type: String} 80 | outputs: 81 | - {name: model_ARTIFACT_URI, type: String} 82 | - {name: model_ARTIFACT_DIGEST, type: String} 83 | implementation: 84 | container: 85 | image: chitrangpatel/upload-model 86 | command: 87 | - ./upload.sh 88 | args: 89 | - -r 90 | - {outputPath: model_ARTIFACT_URI} 91 | - -w 92 | - {inputValue: work} 93 | - -c 94 | - {outputPath: model_ARTIFACT_DIGEST} 95 | - -s 96 | - {inputValue: source} 97 | - -l 98 | - {inputValue: location} 99 | """ 100 | )(location=location, source=source, work=workDir) 101 | 102 | 103 | @dsl.pipeline( 104 | name="clone-build-push-pipeline", 105 | description="Clone the source code, build & upload the model to GCS.", 106 | ) 107 | def clone_build_push( 108 | url: str = "https://github.com/sigstore/model-transparency", 109 | target: str = "source", 110 | model: str = "pytorch_model.pth", 111 | ): 112 | """A three-step pipeline with the first two steps running in parallel.""" 113 | 114 | source_code = "$(workspaces.shared-ws.path)/source" 115 | relative_main_path = "slsa_for_models/main.py" 116 | relative_requirements = "slsa_for_models/install/requirements_Linux.txt" 117 | gcs_path = "gs://chitrang-ml-models/pytorch_model.pth" 118 | 119 | clone_task = git_clone(url, source_code) 120 | workspace_json = {"shared-ws": {}} 121 | clone_task.add_pod_annotation("workspaces", json.dumps(workspace_json)) 122 | 123 | build_task = build_model( 124 | requirements=relative_requirements, 125 | workDir=source_code, 126 | source=relative_main_path, 127 | model=model, 128 | ) 129 | build_task.after(clone_task) 130 | build_task.add_pod_annotation("workspaces", json.dumps(workspace_json)) 131 | 132 | upload_task = upload_model(gcs_path, model, source_code) 133 | upload_task.after(build_task) 134 | upload_task.add_pod_annotation("workspaces", json.dumps(workspace_json)) 135 | 136 | 137 | pipeline_conf = kfp_tekton.compiler.pipeline_utils.TektonPipelineConf() 138 | pipeline_conf.add_pipeline_workspace( 139 | workspace_name="shared-ws", 140 | volume_claim_template_spec=V1PersistentVolumeClaimSpec( 141 | access_modes=["ReadWriteOnce"], 142 | resources=V1ResourceRequirements(requests={"storage": "5Gi"}), 143 | ), 144 | ) 145 | pipeline_conf.set_generate_component_spec_annotations(False) 146 | 147 | if __name__ == "__main__": 148 | from kfp_tekton.compiler import TektonCompiler 149 | 150 | TektonCompiler().compile( 151 | clone_build_push, 152 | __file__.replace(".py", ".yaml"), 153 | tekton_pipeline_conf=pipeline_conf, 154 | ) 155 | -------------------------------------------------------------------------------- /model_signing/benchmarks/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: $0 identity-provider identity output_path " 6 | echo "Example: $0 https://accounts.google.com myemail@gmail.com" 7 | exit 1 8 | fi 9 | 10 | time_cmd() { 11 | local cmd="$1" 12 | local arguments="$2" 13 | # shellcheck disable=SC2086 # We want word splitting 14 | { time "${cmd}" ${arguments} >/dev/null; } 2>&1 | grep real | cut -f2 15 | } 16 | 17 | run() { 18 | local model_name="$1" 19 | local model_path="$2" 20 | local model_init="$3" 21 | 22 | echo "Initializing ${model_name} ..." 23 | eval "${model_init}" 24 | # Replace the '/' character. 25 | model_name="${model_name/\//_}" 26 | 27 | echo "Running sign / verify for ${model_name} ..." 28 | results["${model_name}[size]"]=$(du -hs "${model_path}" | cut -f1) 29 | results["${model_name}[sign_time]"]=$(time_cmd python3 "main.py sign --path ${model_path}") 30 | results["${model_name}[verify_time]"]=$(time_cmd python3 "main.py verify --path ${model_path} --identity-provider ${identity_provider} --identity ${identity}") 31 | if [[ "${cleanup}" == "true" ]]; then 32 | rm -rf "${model_path}" "${model_path}.sig" 2>/dev/null || true 33 | fi 34 | } 35 | 36 | # shellcheck disable=SC2317 # Called via model_init(). 37 | download_github_repository() { 38 | local repository="$1" 39 | local model_path="$2" 40 | 41 | # We download the zip which does _not_ contain the .git folder. 42 | wget "https://github.com/${repository}/archive/main.zip" -O "${model_path}".zip 43 | mkdir -p "${model_path}" 44 | shopt -s dotglob 45 | cd "${model_path}" && unzip ../"${model_path}".zip && rm ../"${model_path}".zip && mv "${model_path}"-main/* . && rmdir "${model_path}"-main/ && cd - 46 | shopt -u dotglob 47 | } 48 | 49 | # shellcheck disable=SC2317 # Called via model_init(). 50 | download_hf_repository() { 51 | local repository="$1" 52 | local model_path="$2" 53 | git clone --depth=1 "https://huggingface.co/${repository}" "${model_path}" 54 | # We delete the .git folder. 55 | rm -rf "${model_path}"/.git 56 | } 57 | 58 | # User inputs. 59 | identity_provider="$1" 60 | identity="$2" 61 | cleanup="" 62 | 63 | if [ "$#" -eq 3 ]; then 64 | cleanup="$3" 65 | fi 66 | 67 | echo 68 | echo "INFO: Be patient, this will take a few minutes!" 69 | echo 70 | 71 | # Variable holding results. 72 | declare -A results 73 | 74 | # Init the environment. 75 | if [[ ! -d "test_env/" ]]; then 76 | python3 -m venv test_env 77 | fi 78 | # shellcheck disable=SC1091 # We have access to source=test_env/bin/activate. 79 | source test_env/bin/activate 80 | python3 -m pip install --require-hashes -r install/requirements_Linux.txt 81 | 82 | # ========================================= 83 | # Warm up! 84 | # ========================================= 85 | # We need to have the identity in the environment, so perform one signature. 86 | file=$(mktemp) 87 | python3 main.py sign --path "${file}" 88 | python3 main.py verify --path "${file}" --identity-provider "${identity_provider}" --identity "${identity}" 89 | rm "${file}" "${file}.sig" 90 | 91 | # ========================================= 92 | # PyTorch YOLOP model 93 | # ========================================= 94 | model_name=hustvl/YOLOP 95 | model_path=$(echo "${model_name}" | cut -d/ -f2) 96 | # shellcheck disable=SC2317 # Reachable via run() call. 97 | model_init() { 98 | if [[ ! -d "${model_path}" ]]; then 99 | download_github_repository "${model_name}" "${model_path}" 100 | fi 101 | } 102 | run "${model_name}" "${model_path}" model_init 103 | 104 | # ========================================= 105 | # ONNX Roberta-base-11 model 106 | # ========================================= 107 | model_name=roberta-base-11 108 | model_path="${model_name}.onnx" 109 | # shellcheck disable=SC2317 # Reachable via run() call. 110 | model_init() { 111 | if [[ ! -f "${model_path}" ]]; then 112 | wget "https://github.com/onnx/models/tree/857a3434216bd6f2be1ea1ff045fb94a437cbe10/text/machine_comprehension/roberta/model/${model_name}.onnx" 113 | fi 114 | } 115 | run "${model_name}" "${model_path}" model_init 116 | 117 | # ========================================= 118 | # tfhub bertseq2seq model 119 | # ========================================= 120 | model_name=bertseq2seq 121 | model_path="${model_name}" 122 | # shellcheck disable=SC2317 # Reachable via run() call. 123 | model_init() { 124 | if [[ ! -d "${model_path}" ]]; then 125 | wget "https://tfhub.dev/google/bertseq2seq/bert24_en_de/1?tf-hub-format=compressed" -O "${model_path}".tgz 126 | mkdir -p "${model_path}" 127 | cd "${model_path}" && tar xvzf ../"${model_path}".tgz && rm ../"${model_path}".tgz && cd - 128 | fi 129 | } 130 | run "${model_name}" "${model_path}" model_init 131 | 132 | # ========================================= 133 | # Huggingface bert base model 134 | # (Tensorflow and PyTorch) 135 | # ========================================= 136 | model_name=bert-base-uncased 137 | model_path="${model_name}" 138 | # shellcheck disable=SC2317 # Reachable via run() call. 139 | model_init() { 140 | if [[ ! -d "${model_path}" ]]; then 141 | download_hf_repository "${model_name}" "${model_path}" 142 | fi 143 | } 144 | run "${model_name}" "${model_path}" model_init 145 | 146 | # ========================================= 147 | # PyTorch falcon-7b model 148 | # ========================================= 149 | model_name=tiiuae/falcon-7b 150 | model_path=$(echo "${model_name}" | cut -d/ -f2) 151 | # shellcheck disable=SC2317 # Reachable via run() call. 152 | model_init() { 153 | if [[ ! -d "${model_path}" ]]; then 154 | download_hf_repository "${model_name}" "${model_path}" 155 | fi 156 | } 157 | run "${model_name}" "${model_path}" model_init 158 | 159 | 160 | echo 161 | echo "===== RESULTS ======" 162 | # NOTE: Requires bash >= 4.4. 163 | echo "results:" "${!results[@]}" 164 | mapfile -d '' sorted < <(printf '%s\0' "${!results[@]}" | sort -z) 165 | for key in "${sorted[@]}"; do 166 | echo "$key = ${results[${key}]}" 167 | done 168 | 169 | deactivate 170 | -------------------------------------------------------------------------------- /slsa_for_models/tensorflow_cifar10.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # We will do a lazy import for these 2 modules, exploiting Python's symbol 17 | # resolution. The lazy import is needed to make sure we only import TensorFlow 18 | # libraries only if we want to train a TensorFlow model. 19 | tf = None 20 | tfds = None 21 | 22 | 23 | def pretraining(): 24 | """Perform setup required before training. 25 | 26 | Does the lazy loading of TensorFlow too, to prevent compatibility issues 27 | with mixing TensorFlow and PyTorch imports. 28 | """ 29 | global tf 30 | global tfds 31 | import tensorflow as tf 32 | import tensorflow_datasets as tfds 33 | # Also compile model using XLA for ~20% performance gain 34 | tf.config.optimizer.set_jit(True) 35 | 36 | 37 | def load_data(): 38 | """Load the CIFAR10 data. 39 | 40 | Obtains both the train and the test splits. According to 41 | https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training 42 | images and 10000 test ones. Each image is 32x32 RGB. 43 | 44 | Data is normalized to be in [0, 1). Labels are one-hot encoded. 45 | 46 | Returns train and test pairs. Each pair consists of features and labels 47 | vectors of similar size. 48 | """ 49 | result = tfds.load('cifar10', batch_size=-1) 50 | x_train = result['train']['image'] 51 | y_train = result['train']['label'] 52 | x_test = result['test']['image'] 53 | y_test = result['test']['label'] 54 | 55 | # transform input 56 | x_train = x_train.numpy().astype('float32') / 256 57 | x_test = x_test.numpy().astype('float32') / 256 58 | y_train = tf.keras.utils.to_categorical(y_train, num_classes=10) 59 | y_test = tf.keras.utils.to_categorical(y_test, num_classes=10) 60 | 61 | return (x_train, y_train), (x_test, y_test) 62 | 63 | 64 | def create_model(in_shape): 65 | """Create a TensorFlow NN model. 66 | 67 | The model is taken from the tutorial at 68 | https://www.tensorflow.org/xla/tutorials/autoclustering_xla. 69 | 70 | We need to pass as argument the expected input shape. 71 | 72 | Returns the model. 73 | """ 74 | x, _, c = in_shape 75 | return tf.keras.models.Sequential([ 76 | tf.keras.layers.Conv2D(x, (c, c), padding='same', 77 | input_shape=in_shape), 78 | tf.keras.layers.Activation('relu'), 79 | tf.keras.layers.Conv2D(x, (c, c)), 80 | tf.keras.layers.Activation('relu'), 81 | tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), 82 | tf.keras.layers.Dropout(0.25), 83 | tf.keras.layers.Conv2D(2*x, (c, c), padding='same'), 84 | tf.keras.layers.Activation('relu'), 85 | tf.keras.layers.Conv2D(2*x, (c, c)), 86 | tf.keras.layers.Activation('relu'), 87 | tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), 88 | tf.keras.layers.Dropout(0.25), 89 | tf.keras.layers.Flatten(), 90 | tf.keras.layers.Dense(512), 91 | tf.keras.layers.Activation('relu'), 92 | tf.keras.layers.Dropout(0.5), 93 | tf.keras.layers.Dense(10), 94 | tf.keras.layers.Activation('softmax'), 95 | ]) 96 | 97 | 98 | def prepare_model(model): 99 | """Prepare model for training with loss and optimizer.""" 100 | opt = tf.keras.optimizers.RMSprop(learning_rate=0.0001) 101 | model.compile(loss='categorical_crossentropy', 102 | optimizer=opt, 103 | metrics=['accuracy']) 104 | return model 105 | 106 | 107 | def train_model(model, train, test): 108 | """Train a model on the training set. 109 | 110 | The test set is used for cross validation. 111 | """ 112 | x, y = train 113 | model.fit(x, y, batch_size=256, epochs=16, 114 | validation_data=test, shuffle=True) 115 | 116 | 117 | def score_model(model, test): 118 | """Score a trained model on the test set.""" 119 | x, y = test 120 | scores = model.evaluate(x, y, verbose=1) 121 | print(f'Test loss: {scores[0]}') 122 | print(f'Test accuracy: {scores[1]}') 123 | 124 | 125 | def supported_models(): 126 | """Returns supported model types paired with method to save them.""" 127 | return { 128 | # New Keras format 129 | 'tensorflow_model.keras': lambda m, p: m.save(p, save_format='keras'), 130 | # TF SavedModel formats, full model and weights only 131 | # TODO: Re-enable support for these when SLSA supports directories 132 | # 'tensorflow_saved_model': lambda m, p: m.save(p, save_format='tf'), 133 | # 'tensorflow_exported_model': lambda m, p: m.export(p), 134 | # Legacy HDFS format, full model and weights only 135 | 'tensorflow_hdf5_model.h5': lambda m, p: m.save(p, save_format='h5'), 136 | 'tensorflow_hdf5.weights.h5': lambda m, p: m.save_weights(p), 137 | } 138 | 139 | 140 | def save_model(model, model_format): 141 | """Save the model after training to be transferred to production. 142 | 143 | Saves in the requested format, if supported by TensorFlow. 144 | """ 145 | saver = supported_models().get(model_format, None) 146 | if not saver: 147 | raise ValueError( 148 | 'Requested a model format not supported by TensorFlow') 149 | saver(model, './' + model_format) 150 | 151 | 152 | def model_pipeline(model_format): 153 | """Train a model and save it in the requested format.""" 154 | pretraining() 155 | data = load_data() 156 | model = create_model(data[0][0].shape[1:]) 157 | model = prepare_model(model) 158 | train_model(model, data[0], data[1]) 159 | score_model(model, data[1]) 160 | save_model(model, model_format) 161 | -------------------------------------------------------------------------------- /slsa_for_models/pytorch_cifar10.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # We will do a lazy import for these 7 modules, exploiting Python's symbol 17 | # resolution. The lazy import is needed to make sure we only import PyTorch 18 | # libraries only if we want to train a PyTorch model. 19 | torch = None 20 | nn = None 21 | F = None 22 | optim = None 23 | torchvision = None 24 | transforms = None 25 | 26 | 27 | def pretraining(): 28 | """Perform setup required before training. 29 | 30 | Does the lazy loading of TensorFlow too, to prevent compatibility issues 31 | with mixing TensorFlow and PyTorch imports. 32 | """ 33 | global torch 34 | global nn 35 | global F 36 | global optim 37 | global torchvision 38 | global transforms 39 | import torch 40 | import torch.nn as nn 41 | import torch.nn.functional as F 42 | import torch.optim as optim 43 | import torchvision 44 | import torchvision.transforms as transforms 45 | 46 | 47 | def load_data(): 48 | """Load the CIFAR10 data. 49 | 50 | Obtains both the train and the test splits. According to 51 | https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training 52 | images and 10000 test ones. Each image is 32x32 RGB. 53 | 54 | Data is normalized to be in range [-1, 1]. 55 | 56 | Returns iterators to train and test sets. 57 | """ 58 | transform = transforms.Compose([ 59 | transforms.ToTensor(), 60 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 61 | ]) 62 | 63 | batch_size = 4 64 | num_workers = 2 65 | 66 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 67 | download=True, transform=transform) 68 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, 69 | shuffle=True, 70 | num_workers=num_workers) 71 | testset = torchvision.datasets.CIFAR10(root='./data', train=False, 72 | download=True, transform=transform) 73 | testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, 74 | shuffle=True, 75 | num_workers=num_workers) 76 | 77 | return trainloader, testloader 78 | 79 | 80 | def create_model(): 81 | """Create a Torch NN model. 82 | 83 | The model is taken from the tutorial at 84 | https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. 85 | 86 | Returns the model. 87 | """ 88 | # Train a model based on tutorial from 89 | # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. 90 | # We inline the class to be able to use lazy loading of PyTorch modules. 91 | class MyModel(nn.Module): 92 | def __init__(self): 93 | super().__init__() 94 | self.conv1 = nn.Conv2d(3, 6, 5) 95 | self.pool = nn.MaxPool2d(2, 2) 96 | self.conv2 = nn.Conv2d(6, 16, 5) 97 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 98 | self.fc2 = nn.Linear(120, 84) 99 | self.fc3 = nn.Linear(84, 10) 100 | 101 | def forward(self, x): 102 | x = self.pool(F.relu(self.conv1(x))) 103 | x = self.pool(F.relu(self.conv2(x))) 104 | x = torch.flatten(x, 1) 105 | x = F.relu(self.fc1(x)) 106 | x = F.relu(self.fc2(x)) 107 | x = self.fc3(x) 108 | return x 109 | 110 | return MyModel() 111 | 112 | 113 | def prepare_model(model): 114 | """Prepare model for training with loss and optimizer.""" 115 | # We only need to return loss and optimizer 116 | loss = nn.CrossEntropyLoss() 117 | optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) 118 | return loss, optimizer 119 | 120 | 121 | def train_model(model, loss, optimizer, train): 122 | """Train a model on the training set.""" 123 | num_epochs = 2 124 | batch_size = 2000 125 | for epoch in range(num_epochs): 126 | running_loss = 0.0 127 | for i, data in enumerate(train, 1): 128 | x, y = data 129 | optimizer.zero_grad() 130 | outputs = model(x) 131 | loss_score = loss(outputs, y) 132 | loss_score.backward() 133 | optimizer.step() 134 | running_loss += loss_score.item() 135 | if i % batch_size == 0: 136 | print(f'[{epoch}, {i:5d}], ' 137 | f'loss: {running_loss / batch_size :.3f}') 138 | running_loss = 0.0 139 | 140 | 141 | def score_model(model, test): 142 | """Score a trained model on the test set.""" 143 | correct = 0 144 | total = 0 145 | with torch.no_grad(): 146 | for data in test: 147 | x, y = data 148 | outputs = model(x) 149 | _, predicted = torch.max(outputs.data, 1) 150 | total += y.size(0) 151 | correct += (predicted == y).sum().item() 152 | print(f'Test accuracy: {correct / total}') 153 | 154 | 155 | def supported_models(): 156 | """Returns supported model types paired with method to save them.""" 157 | return { 158 | 'pytorch_model.pth': lambda m, p: torch.save(m.state_dict(), p), 159 | 'pytorch_full_model.pth': lambda m, p: torch.save(m, p), 160 | 'pytorch_jitted_model.pt': lambda m, p: torch.jit.script(m).save(p), 161 | } 162 | 163 | 164 | def save_model(model, model_format): 165 | """Save the model after training to be transferred to production. 166 | 167 | Saves in the requested format, if supported by PyTorch. 168 | """ 169 | saver = supported_models().get(model_format, None) 170 | if not saver: 171 | raise ValueError('Requested a model format not supported by PyTorch') 172 | saver(model, './' + model_format) 173 | 174 | 175 | def model_pipeline(model_format): 176 | """Train a model and save it in the requested format.""" 177 | pretraining() 178 | data = load_data() 179 | model = create_model() 180 | loss, optimizer = prepare_model(model) 181 | train_model(model, loss, optimizer, data[0]) 182 | score_model(model, data[1]) 183 | save_model(model, model_format) 184 | -------------------------------------------------------------------------------- /model_signing/model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from sigstore.sign import SigningContext 16 | 17 | from sigstore.oidc import ( 18 | IdentityToken, 19 | ExpiredIdentity, 20 | Issuer, 21 | detect_credential, 22 | ) 23 | from sigstore_protobuf_specs.dev.sigstore.bundle.v1 import Bundle 24 | from sigstore.verify import ( 25 | policy, 26 | Verifier, 27 | ) 28 | from sigstore.verify.models import ( 29 | VerificationMaterials, 30 | ) 31 | 32 | from sigstore._internal.fulcio.client import ( 33 | ExpiredCertificate, 34 | ) 35 | 36 | import io 37 | from pathlib import Path 38 | from typing import Optional 39 | from serialize import Serializer 40 | import psutil 41 | import sys 42 | 43 | 44 | def chunk_size() -> int: 45 | return int(psutil.virtual_memory().available // 2) 46 | 47 | 48 | # TODO: Update this class to have a status instead of success. 49 | class BaseResult: 50 | def __init__(self, success: bool = True, reason: str = "success"): 51 | self.success = success 52 | self.reason = reason 53 | 54 | def __bool__(self) -> bool: 55 | return self.success 56 | 57 | def __str__(self) -> str: 58 | return f"success=\"{self.success}\" reason=\"{self.reason}\"" 59 | 60 | 61 | class SignatureResult(BaseResult): 62 | pass 63 | 64 | 65 | class SigstoreSigner(): 66 | def __init__(self, 67 | disable_ambient: bool = False, 68 | start_default_browser: bool = False, 69 | oidc_issuer: str = None): 70 | self.signing_ctx = SigningContext.production() 71 | self.disable_ambient = disable_ambient 72 | self.start_default_browser = start_default_browser 73 | self.oidc_issuer = oidc_issuer 74 | # NOTE: The client ID to use during OAuth2 flow. 75 | self.client_id = "sigstore" 76 | 77 | def get_identity_token(self) -> Optional[IdentityToken]: 78 | token: IdentityToken 79 | client_id = self.client_id 80 | if not self.disable_ambient: 81 | token = detect_credential() 82 | # Happy path: we've detected an ambient credential, 83 | # so we can return early. 84 | if token: 85 | return IdentityToken(token) 86 | 87 | # TODO(): Support staging for testing. 88 | if self.oidc_issuer is not None: 89 | issuer = Issuer(self.oidc_issuer) 90 | else: 91 | issuer = Issuer.production() 92 | 93 | token = issuer.identity_token(client_id=client_id, 94 | force_oob=not self.start_default_browser) 95 | return token 96 | 97 | # NOTE: Only path in the top-level folder are considered for ignorepaths. 98 | def sign(self, inputfn: Path, signaturefn: Path, 99 | ignorepaths: [Path]) -> SignatureResult: 100 | try: 101 | oidc_token = self.get_identity_token() 102 | if not oidc_token: 103 | raise ValueError("No identity token supplied or detected!") 104 | # Calling the private attribute IdentityToken._federated issuer 105 | # is a workaround for earlier versions of sigstore-python (<3.0.0) 106 | # that do not support the federated_issuer property. 107 | print(f"identity-provider: {oidc_token._federated_issuer}", 108 | file=sys.stderr) 109 | print(f"identity: {oidc_token.identity}", file=sys.stderr) 110 | 111 | contentio = io.BytesIO(Serializer.serialize_v1( 112 | inputfn, chunk_size(), signaturefn, ignorepaths)) 113 | with self.signing_ctx.signer(oidc_token) as signer: 114 | result = signer.sign(input_=contentio) 115 | with signaturefn.open(mode="w") as b: 116 | print(result.to_bundle().to_json(), file=b) 117 | return SignatureResult() 118 | except ExpiredIdentity: 119 | return SignatureResult(success=False, 120 | reason="exception caught: Signature failed: identity token has expired") # noqa: E501 121 | except ExpiredCertificate: 122 | return SignatureResult(success=False, 123 | reason="exception caught: Signature failed: Fulcio signing certificate has expired") # noqa: E501 124 | except Exception as e: 125 | return SignatureResult(success=False, 126 | reason=f"exception caught: {str(e)}") 127 | 128 | 129 | # TODO: re-visit error handling and use a verbosity mode 130 | # to avoid leaking info 131 | class VerificationResult(BaseResult): 132 | pass 133 | 134 | 135 | class SigstoreVerifier(): 136 | def __init__(self, oidc_provider: str, identity: str): 137 | self.oidc_provider = oidc_provider 138 | self.identity = identity 139 | self.verifier = Verifier.production() 140 | 141 | # NOTE: Only path in the top-level folder are considered for ignorepaths. 142 | def verify(self, inputfn: Path, signaturefn: Path, 143 | ignorepaths: [Path], offline: bool) -> VerificationResult: 144 | try: 145 | bundle_bytes = signaturefn.read_bytes() 146 | bundle = Bundle().from_json(bundle_bytes) 147 | 148 | material: tuple[Path, VerificationMaterials] 149 | contentio = io.BytesIO(Serializer.serialize_v1( 150 | inputfn, chunk_size(), signaturefn, ignorepaths)) 151 | material = VerificationMaterials.from_bundle(input_=contentio, 152 | bundle=bundle, 153 | offline=offline) 154 | policy_ = policy.Identity( 155 | identity=self.identity, 156 | issuer=self.oidc_provider, 157 | ) 158 | result = self.verifier.verify(materials=material, policy=policy_) 159 | if result: 160 | return VerificationResult() 161 | return VerificationResult(success=False, reason=result.reason) 162 | except Exception as e: 163 | return VerificationResult(success=False, 164 | reason=f"exception caught: {str(e)}") 165 | raise ValueError("unreachable") 166 | -------------------------------------------------------------------------------- /slsa_for_models/gcp/README.md: -------------------------------------------------------------------------------- 1 | # SLSA for Models on Google Cloud Platform 2 | 3 | This project uses [Tekton][tekton] to generate SLSA provenance for ML models on 4 | Google Cloud Platform (GCP). It uses [Google Kubernetes Engine][gke] (GKE), 5 | [Artifact Registry][ar], [Tekton] and [Sigstore]. 6 | 7 | ## Guide 8 | 9 | 1. To get started, you'll need to have a [GCP Project][gcp]. You will also need 10 | to have these CLI tools installed: 11 | - [`gcloud`][gcloud] 12 | - [`kubectl`][kubectl] 13 | - [`tkn`][tkn] 14 | - [`cosign`][cosign] 15 | 16 | 2. Enable the needed services: 17 | 18 | ```bash 19 | gcloud services enable \ 20 | container.googleapis.com \ 21 | artifactregistry.googleapis.com 22 | ``` 23 | 24 | 3. Create a GKE cluster: 25 | 26 | 1. Set the `PROJECT_ID` environment variable from your GCP project: 27 | 28 | ```bash 29 | export PROJECT_ID= 30 | ``` 31 | 32 | 2. Set the `CLUSTER_NAME` environment variable to a cluster name of your 33 | choice: 34 | 35 | ```bash 36 | export CLUSTER_NAME= 37 | ``` 38 | 39 | 3. Create a cluster: 40 | 41 | ```bash 42 | gcloud container clusters create $CLUSTER_NAME \ 43 | --enable-autoscaling \ 44 | --min-nodes=1 \ 45 | --max-nodes=3 \ 46 | --scopes=cloud-platform \ 47 | --no-issue-client-certificate \ 48 | --project=$PROJECT_ID \ 49 | --region=us-central1 \ 50 | --machine-type=e2-standard-4 \ 51 | --num-nodes=1 \ 52 | --cluster-version=latest 53 | ``` 54 | 55 | 4. Install Tekton: 56 | 57 | 1. Install Tekton Pipelines: 58 | 59 | ```bash 60 | kubectl apply --filename https://storage.googleapis.com/tekton-releases/pipeline/latest/release.yaml 61 | ``` 62 | 63 | 2. Install Tekton Chains: 64 | 65 | ```bash 66 | kubectl apply --filename https://storage.googleapis.com/tekton-releases/chains/latest/release.yaml 67 | ``` 68 | 69 | 5. Verify your Tekton installation was successful: 70 | 71 | 1. Check that Tekton Pipelines Pods are running in Kubernetes: 72 | 73 | ```bash 74 | kubectl get pods -n tekton-pipelines 75 | ``` 76 | 77 | 2. Check that Tekton Chains Pods are running in Kubernetes: 78 | 79 | ```bash 80 | kubectl get pods -n tekton-chains 81 | ``` 82 | 83 | 6. Configure Tekton: 84 | 85 | 1. Configure Tekton Pipelines to enable enumerations and alpha features: 86 | 87 | ```bash 88 | kubectl patch cm feature-flags -n tekton-pipelines -p '{"data":{ 89 | "enable-param-enum":"true", 90 | "enable-api-fields":"alpha" 91 | }}' 92 | ``` 93 | 94 | 2. Then restart the Tekton Pipelines controller to ensure it picks up the 95 | changes: 96 | 97 | ```bash 98 | kubectl delete pods -n tekton-pipelines -l app=tekton-pipelines-controller 99 | ``` 100 | 101 | 3. Configure Tekton Chains to enable transparency log, set SLSA format and 102 | configure storage: 103 | 104 | ```bash 105 | kubectl patch configmap chains-config -n tekton-chains -p='{"data":{ 106 | "transparency.enabled": "true", 107 | "artifacts.taskrun.format":"slsa/v2alpha2", 108 | "artifacts.taskrun.storage": "tekton", 109 | "artifacts.pipelinerun.format":"slsa/v2alpha2", 110 | "artifacts.pipelinerun.storage": "tekton" 111 | }}' 112 | ``` 113 | 4. Then restart the Tekton Chains controller to ensure it picks up the 114 | changes: 115 | 116 | ```bash 117 | kubectl delete pods -n tekton-chains -l app=tekton-chains-controller 118 | ``` 119 | 120 | 7. Generate an encrypted x509 keypair and save it as a Kubernetes secret: 121 | 122 | ```bash 123 | cosign generate-key-pair k8s://tekton-chains/signing-secrets 124 | ``` 125 | 126 | 8. (Optional) View the Tekton resources: 127 | 128 | 1. View the git-clone `Task`: 129 | 130 | ```bash 131 | cat slsa_for_models/gcp/tasks/git-clone.yml 132 | ``` 133 | 134 | 2. View the build-model `Task`: 135 | 136 | ```bash 137 | cat slsa_for_models/gcp/tasks/build-model.yml 138 | ``` 139 | 140 | 3. View the upload-model `Task`: 141 | 142 | ```bash 143 | cat slsa_for_models/gcp/tasks/upload-model.yml 144 | ``` 145 | 146 | 4. View the `Pipeline`: 147 | 148 | ```bash 149 | cat slsa_for_models/gcp/pipeline.yml 150 | ``` 151 | 152 | 5. View the `PipelineRun`: 153 | 154 | ```bash 155 | cat slsa_for_models/gcp/pipelinerun.yml 156 | ``` 157 | 158 | 9. Apply the `Pipeline`: 159 | 160 | ```bash 161 | kubectl apply -f slsa_for_models/gcp/pipeline.yml 162 | ``` 163 | 164 | 10. Create a generic repository in Artifact Registry: 165 | 166 | 1. Set the `REPOSITORY_NAME` environment variable to a name of your choice: 167 | 168 | ```bash 169 | export REPOSITORY_NAME=ml-artifacts 170 | ``` 171 | 172 | 2. Set the `LOCATION` environment variable to a [location] of your choice: 173 | 174 | ```bash 175 | export LOCATION=us 176 | ``` 177 | 178 | 3. Create a generic repository: 179 | ```bash 180 | gcloud artifacts repositories create $REPOSITORY_NAME \ 181 | --location=$LOCATION \ 182 | --repository-format=generic 183 | ``` 184 | 185 | 4. If you set a different repository name and location from the example 186 | above, make sure to modify the `Parameter` named 'model-storage' in the 187 | `PipelineRun` with your own values. 188 | 189 | 11. Execute the `PipelineRun`: 190 | 191 | ```bash 192 | kubectl create -f slsa_for_models/gcp/pipelinerun.yml 193 | ``` 194 | 195 | 12. Observe the `PipelineRun` execution: 196 | 197 | ```bash 198 | export PIPELINERUN_NAME=$(tkn pr describe --last --output jsonpath='{.metadata.name}') 199 | tkn pipelinerun logs $PIPELINERUN_NAME --follow 200 | ``` 201 | 202 | 13. When the `PipelineRun` succeeds, view its status: 203 | 204 | ```bash 205 | kubectl get pipelinerun $PIPELINERUN_NAME --output yaml 206 | ``` 207 | 208 | 14. View the transparency log entry in the public [Rekor][rekor] instance: 209 | 210 | ```bash 211 | export TLOG_ENTRY=$(tkn pr describe $PIPELINERUN_NAME --output jsonpath="{.metadata.annotations.chains\.tekton\.dev/transparency}") 212 | open $TLOG_ENTRY 213 | ``` 214 | 215 | 15. Retrieve the attestation from the `PipelineRun` which is stored as a base64-encoded annotation: 216 | 217 | ```bash 218 | export PIPELINERUN_UID=$(tkn pr describe $PIPELINERUN_NAME --output jsonpath='{.metadata.uid}') 219 | tkn pr describe $PIPELINERUN_NAME --output jsonpath="{.metadata.annotations.chains\.tekton\.dev/signature-pipelinerun-$PIPELINERUN_UID}" | base64 -d > pytorch_model.pth.build-slsa 220 | ``` 221 | 222 | 16. View the attestation: 223 | 224 | ```bash 225 | cat pytorch_model.pth.build-slsa | tr -d '\n' | pbcopy 226 | pbpaste | jq '.payload | @base64d | fromjson' 227 | ``` 228 | 229 | 17. Download the model: 230 | 231 | ```bash 232 | export MODEL_VERSION=$(tkn pr describe $PIPELINERUN_NAME --output jsonpath='{.status.results[1].value.digest}' | cut -d ':' -f 2) 233 | gcloud artifacts generic download \ 234 | --package=pytorch-model \ 235 | --repository=$REPOSITORY_NAME \ 236 | --destination=. \ 237 | --version=$MODEL_VERSION 238 | ``` 239 | 240 | 18. Verify the attestation: 241 | 242 | ```bash 243 | cosign verify-blob-attestation \ 244 | --key k8s://tekton-chains/signing-secrets \ 245 | --signature pytorch_model.pth.build-slsa \ 246 | --type slsaprovenance1 \ 247 | pytorch_model.pth 248 | ``` 249 | 250 | ### Kubeflow on Tekton 251 | 252 | Provide a [Kubeflow Pipeline](#../kubeflow/README.md) that can be compiled into the above Tekton Pipeline 253 | using [Kubeflow on Tekton][tekton-kubeflow]. 254 | 255 | ## Future Work 256 | 257 | ### Automate Provenance Verification 258 | 259 | Demonstrate how to verify the provenance of the model before deploying and 260 | serving the model. 261 | 262 | ### Automated Testing 263 | 264 | Trigger execution of the `PipelineRun` whenever changes are made in the 265 | codebase. 266 | 267 | 268 | ### Accelerators 269 | 270 | Demonstrate training ML models that require multiple hours for training and 271 | require access to accelerators (i.e., GPUs, TPUs). 272 | 273 | [gcp]: https://cloud.google.com/docs/get-started 274 | [gcloud]: https://cloud.google.com/sdk/docs/install 275 | [kubectl]: https://kubernetes.io/docs/tasks/tools/ 276 | [tkn]: https://tekton.dev/docs/cli/ 277 | [cosign]: https://docs.sigstore.dev/system_config/installation/ 278 | [tekton-kubeflow]: https://www.kubeflow.org/docs/components/pipelines/v1/sdk/pipelines-with-tekton/ 279 | [tekton-chains]: https://tekton.dev/docs/chains/ 280 | [tekton]: https://tekton.dev/docs/ 281 | [rekor]: https://rekor.sigstore.dev 282 | [location]: https://cloud.google.com/artifact-registry/docs/repositories/repo-locations 283 | [gke]: https://cloud.google.com/kubernetes-engine?hl=en 284 | [ar]: https://cloud.google.com/artifact-registry 285 | [sigstore]: https://docs.sigstore.dev 286 | -------------------------------------------------------------------------------- /model_signing/hashing/file.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Sigstore Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Machinery for computing digests for a single file. 16 | 17 | Example usage for `FileHasher`: 18 | ```python 19 | >>> with open("/tmp/file", "w") as f: 20 | ... f.write("abcd") 21 | >>> hasher = FileHasher("/tmp/file", SHA256()) 22 | >>> digest = hasher.compute() 23 | >>> digest.digest_hex 24 | '88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589' 25 | ``` 26 | 27 | Example usage for `ShardedFileHasher`, reading only the second part of a file: 28 | ```python 29 | >>> with open("/tmp/file", "w") as f: 30 | ... f.write("0123abcd") 31 | >>> hasher = ShardedFileHasher("/tmo/file", SHA256(), start=4, end=8) 32 | >>> digest = hasher.compute() 33 | >>> digest.digest_hex 34 | '88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589' 35 | ``` 36 | """ 37 | 38 | import pathlib 39 | from typing_extensions import override 40 | 41 | from model_signing.hashing import hashing 42 | 43 | 44 | class FileHasher(hashing.HashEngine): 45 | """Generic file hash engine. 46 | 47 | To compute the hash of a file, we read the file exactly once, including for 48 | very large files that don't fit in memory. Files are read in chunks and each 49 | chunk is passed to the `update` method of an inner 50 | `hashing.StreamingHashEngine`, instance. This ensures that the file digest 51 | will not change even if the chunk size changes. As such, we can dynamically 52 | determine an optimal value for the chunk argument. 53 | 54 | The `digest_name()` method MUST record all parameters that influence the 55 | hash output. For example, if a file is split into shards which are hashed 56 | separately and the final digest value is computed by aggregating these 57 | hashes, then the shard size must be given in the output string. However, for 58 | simplicity, predefined names can be used to override the `digest_name()` 59 | output. 60 | """ 61 | 62 | def __init__( 63 | self, 64 | file: pathlib.Path, 65 | content_hasher: hashing.StreamingHashEngine, 66 | *, 67 | chunk_size: int = 8192, 68 | digest_name_override: str | None = None, 69 | ): 70 | """Initializes an instance to hash a file with a specific `HashEngine`. 71 | 72 | Args: 73 | file: The file to hash. Use `set_file` to reset it. 74 | content_hasher: A `hashing.StreamingHashEngine` instance used to 75 | compute the digest of the file. 76 | chunk_size: The amount of file to read at once. Default is 8KB. A 77 | special value of 0 signals to attempt to read everything in a 78 | single call. 79 | digest_name_override: Optional string to allow overriding the 80 | `digest_name` property to support shorter, standardized names. 81 | """ 82 | if chunk_size < 0: 83 | raise ValueError( 84 | f"Chunk size must be non-negative, got {chunk_size}." 85 | ) 86 | 87 | self._file = file 88 | self._content_hasher = content_hasher 89 | self._chunk_size = chunk_size 90 | self._digest_name_override = digest_name_override 91 | 92 | def set_file(self, file: pathlib.Path) -> None: 93 | """Redefines the file to be hashed in `compute`.""" 94 | self._file = file 95 | 96 | @override 97 | @property 98 | def digest_name(self) -> str: 99 | if self._digest_name_override is not None: 100 | return self._digest_name_override 101 | return f"file-{self._content_hasher.digest_name}" 102 | 103 | @override 104 | def compute(self) -> hashing.Digest: 105 | self._content_hasher.reset() 106 | 107 | if self._chunk_size == 0: 108 | with open(self._file, "rb") as f: 109 | self._content_hasher.update(f.read()) 110 | else: 111 | with open(self._file, "rb") as f: 112 | while True: 113 | data = f.read(self._chunk_size) 114 | if not data: 115 | break 116 | self._content_hasher.update(data) 117 | 118 | digest = self._content_hasher.compute() 119 | return hashing.Digest(self.digest_name, digest.digest_value) 120 | 121 | 122 | class ShardedFileHasher(FileHasher): 123 | """File hash engine that can be invoked in parallel. 124 | 125 | To efficiently support hashing large files, this class provides an ability 126 | to compute the digest over a shard of the file. It is the responsibility of 127 | the user to compose the digests of each shard into a single digest for the 128 | entire file. 129 | """ 130 | 131 | def __init__( 132 | self, 133 | file: pathlib.Path, 134 | content_hasher: hashing.StreamingHashEngine, 135 | *, 136 | start: int, 137 | end: int, 138 | chunk_size: int = 8192, 139 | shard_size: int = 1000000, 140 | digest_name_override: str | None = None, 141 | ): 142 | """Initializes an instance to hash a file with a specific `HashEngine`. 143 | 144 | Args: 145 | file: The file to hash. Use `set_file` to reset it. 146 | content_hasher: A `hashing.HashEngine` instance used to compute the 147 | digest of the file. This instance must not be used outside of this 148 | instance. However, it may be pre-initialized with a header. 149 | start: The file offset to start reading from. Must be valid. Reset 150 | with `set_shard`. 151 | end: The file offset to start reading from. Must be stricly greater 152 | than start. If past the file size, or -1, it will be trimmed. 153 | Reset with `set_shard`. 154 | chunk_size: The amount of file to read at once. Default is 8KB. A 155 | special value of 0 signals to attempt to read everything in a 156 | single call. 157 | shard_size: The amount of file to read at once. Default is 8KB. 158 | digest_name_override: Optional string to allow overriding the 159 | `digest_name` property to support shorter, standardized names. 160 | """ 161 | super().__init__( 162 | file=file, 163 | content_hasher=content_hasher, 164 | chunk_size=chunk_size, 165 | digest_name_override=digest_name_override, 166 | ) 167 | 168 | if shard_size <= 0: 169 | raise ValueError( 170 | f"Shard size must be strictly positive, got {shard_size}." 171 | ) 172 | self._shard_size = shard_size 173 | 174 | self.set_shard(start=start, end=end) 175 | 176 | def set_shard(self, *, start: int, end: int) -> None: 177 | """Redefines the file shard to be hashed in `compute`.""" 178 | if start < 0: 179 | raise ValueError( 180 | f"File start offset must be non-negative, got {start}." 181 | ) 182 | if end <= start: 183 | raise ValueError( 184 | "File end offset must be stricly higher that file start offset," 185 | f" got {start=}, {end=}." 186 | ) 187 | read_length = end - start 188 | if read_length > self._shard_size: 189 | raise ValueError( 190 | f"Must not read more than shard_size={self._shard_size}, got" 191 | f" {read_length}." 192 | ) 193 | 194 | self._start = start 195 | self._end = end 196 | 197 | @override 198 | def compute(self) -> hashing.Digest: 199 | self._content_hasher.reset() 200 | 201 | with open(self._file, "rb") as f: 202 | f.seek(self._start) 203 | to_read = self._end - self._start 204 | if self._chunk_size == 0 or self._chunk_size >= to_read: 205 | data = f.read(to_read) 206 | self._content_hasher.update(data) 207 | else: 208 | while to_read >= 0: 209 | data = f.read(min(self._chunk_size, to_read)) 210 | if not data: 211 | break 212 | to_read -= len(data) 213 | self._content_hasher.update(data) 214 | 215 | digest = self._content_hasher.compute() 216 | return hashing.Digest(self.digest_name, digest.digest_value) 217 | 218 | @override 219 | @property 220 | def digest_name(self) -> str: 221 | if self._digest_name_override is not None: 222 | return self._digest_name_override 223 | return f"file-{self._content_hasher.digest_name}-{self._shard_size}" 224 | -------------------------------------------------------------------------------- /model_signing/README.md: -------------------------------------------------------------------------------- 1 | # Model Signing 2 | 3 | This project demonstrates how to protect the integrity of a model by signing it 4 | with [Sigstore](https://www.sigstore.dev/), a tool for making code signatures 5 | transparent without requiring management of cryptographic key material. 6 | 7 | When users download a given version of a signed model they can check that the 8 | signature comes from a known or trusted identity and thus that the model hasn't 9 | been tampered with after training. 10 | 11 | Signing events are recorded to Sigstore's append-only transparency log. 12 | Transparency logs make signing events discoverable: Model verifiers can validate 13 | that the models they are looking at exist in the transparency log by checking a 14 | proof of inclusion (which is handled by the model signing library). 15 | Furthermore, model signers that monitor the log can check for any unexpected 16 | signing events. 17 | 18 | Model signers should monitor for occurences of their signing identity in the 19 | log. Sigstore is actively developing a [log 20 | monitor](https://github.com/sigstore/rekor-monitor) that runs on GitHub Actions. 21 | 22 | ![Signing models with Sigstore](images/sigstore-model-diagram.png) 23 | 24 | ## Usage 25 | 26 | You will need to install a few prerequisites to be able to run all of the 27 | examples below: 28 | 29 | ```bash 30 | sudo apt install git git-lfs python3-venv python3-pip unzip 31 | git lfs install 32 | ``` 33 | 34 | After this, you can clone the repository, create a Python virtual environment 35 | and install the dependencies needed by the project: 36 | 37 | ```bash 38 | git clone git@github.com:sigstore/model-transparency.git 39 | cd model-transparency/model_signing 40 | python3 -m venv test_env 41 | source test_env/bin/activate 42 | os=Linux # Supported: Linux, Windows, Darwin. 43 | python3 -m pip install --require-hashes -r "install/requirements_${os}".txt 44 | ``` 45 | 46 | After this point, you can use the project to sign and verify models and 47 | checkpoints. A help message with all arguments can be obtained by passing `-h` 48 | argument, either to the main driver or to the two subcommands: 49 | 50 | ```bash 51 | python3 main.py -h 52 | python3 main.py sign -h 53 | python3 main.py verify -h 54 | ``` 55 | 56 | Signing a model requires passing an argument for the path to the model. This can 57 | be a path to a file or a directory (for large models, or model formats such as 58 | `SavedModel` which are stored as a directory of related files): 59 | 60 | ```bash 61 | path=path/to/model 62 | python3 main.py sign --path "${path}" 63 | ``` 64 | 65 | The sign process will start an OIDC workflow to generate a short lived 66 | certificate based on an identity provider. This will be relevant when verifying 67 | the signature, as shown below. 68 | 69 | **Note**: The signature is stored as `.sig` for a model serialized as a 70 | single file, and `/model.sig` for a model in a folder-based format. 71 | 72 | For verification, we need to pass both the path to the model and identity 73 | related arguments: 74 | 75 | ```bash 76 | python3 main.py verify --path "${path}" \ 77 | --identity-provider https://accounts.google.com \ 78 | --identity myemail@gmail.com 79 | ``` 80 | 81 | For developers signing models, there are three identity providers that can 82 | be used at the moment: 83 | 84 | * Google's provider is `https://accounts.google.com`. 85 | * GitHub's provider is `https://github.com/login/oauth`. 86 | * Microsoft's provider is `https://login.microsoftonline.com`. 87 | 88 | For automated signing using a workload identity, the following platforms 89 | are currently supported, shown with their expected identities: 90 | 91 | * GitHub Actions 92 | (`https://github.com/octo-org/octo-automation/.github/workflows/oidc.yml@refs/heads/main`) 93 | * GitLab CI 94 | (`https://gitlab.com/my-group/my-project//path/to/.gitlab-ci.yml@refs/heads/main`) 95 | * Google Cloud Platform (`SERVICE_ACCOUNT_NAME@PROJECT_ID.iam.gserviceaccount.com`) 96 | * Buildkite CI (`https://buildkite.com/ORGANIZATION_SLUG/PIPELINE_SLUG`) 97 | 98 | ### Supported Models 99 | 100 | The library supports multiple models, from multiple training frameworks and 101 | model hubs. 102 | 103 | For example, to sign and verify a Bertseq2seq model, trained with TensorFlow, 104 | stored in TFHub, run the following commands: 105 | 106 | ```bash 107 | model_path=bertseq2seq 108 | wget "https://tfhub.dev/google/bertseq2seq/bert24_en_de/1?tf-hub-format=compressed" -O "${model_path}".tgz 109 | mkdir -p "${model_path}" 110 | cd "${model_path}" && tar xvzf ../"${model_path}".tgz && rm ../"${model_path}".tgz && cd - 111 | python3 main.py sign --path "${model_path}" 112 | python3 main.py verify --path "${model_path}" \ 113 | --identity-provider https://accounts.google.com \ 114 | --identity myemail@gmail.com 115 | ``` 116 | 117 | For models stored in Hugging Face we need the large file support from git, which 118 | can be obtained via 119 | 120 | ```bash 121 | sudo apt install git-lfs 122 | git lfs install 123 | ``` 124 | 125 | After this, we can sign and verify a Bert base model: 126 | 127 | ```bash 128 | model_name=bert-base-uncased 129 | model_path="${model_name}" 130 | git clone --depth=1 "https://huggingface.co/${model_name}" && rm -rf "${model_name}"/.git 131 | python3 main.py sign --path "${model_path}" 132 | python3 main.py verify --path "${model_path}" \ 133 | --identity-provider https://accounts.google.com \ 134 | --identity myemail@gmail.com 135 | ``` 136 | 137 | Similarly, we can sign and verify a Falcon model: 138 | 139 | ```bash 140 | model_name=tiiuae/falcon-7b 141 | model_path=$(echo "${model_name}" | cut -d/ -f2) 142 | git clone --depth=1 "https://huggingface.co/${model_name}" && rm -rf "${model_name}"/.git 143 | python3 main.py sign --path "${model_path}" 144 | python3 main.py verify --path "${model_path}" \ 145 | --identity-provider https://accounts.google.com \ 146 | --identity myemail@gmail.com 147 | ``` 148 | 149 | We can also support models from the PyTorch Hub: 150 | 151 | ```bash 152 | model_name=hustvl/YOLOP 153 | model_path=$(echo "${model_name}" | cut -d/ -f2) 154 | wget "https://github.com/${model_name}/archive/main.zip" -O "${model_path}".zip 155 | mkdir -p "${model_path}" 156 | cd "${model_path}" && unzip ../"${model_path}".zip && rm ../"${model_path}".zip && shopt -s dotglob && mv YOLOP-main/* . && shopt -u dotglob && rmdir YOLOP-main/ && cd - 157 | python3 main.py sign --path "${model_path}" 158 | python3 main.py verify --path "${model_path}" \ 159 | --identity-provider https://accounts.google.com \ 160 | --identity myemail@gmail.com 161 | ``` 162 | 163 | We also support ONNX models, for example Roberta: 164 | 165 | ```bash 166 | model_name=roberta-base-11 167 | model_path="${model_name}.onnx" 168 | wget "https://github.com/onnx/models/raw/main/text/machine_comprehension/roberta/model/${model_name}.onnx" 169 | python3 main.py sign --path "${model_path}" 170 | python3 main.py verify --path "${model_path}" \ 171 | --identity-provider https://accounts.google.com \ 172 | --identity myemail@gmail.com 173 | ``` 174 | 175 | ## Benchmarking 176 | 177 | Install as per [Usage section](#usage). 178 | Ensure you have enough disk space: 179 | - if passing 3rd script argument as `true`: at least 50GB 180 | - otherwise: at least 100GB 181 | 182 | To run the benchmarks: 183 | 184 | ```bash 185 | git clone git@github.com:sigstore/model-transparency.git 186 | cd model-transparency/model_signing 187 | bash benchmarks/run.sh https://accounts.google.com myemail@gmail.com [true] 188 | ``` 189 | 190 | A single run was performed. 191 | 192 | Hashes used: 193 | - H1: Hashing using a tree representation of the directory. 194 | - H2: Hashing using a list representation of the directory. (Implementation is parallized with shards of 1GB sizes across vCPUs). 195 | 196 | Machine M1: Debian 6.3.11 x86_64 GNU/Linux, 200GB RAM, 48 vCPUs, 512KB cache, AMD EPYC 7B12: 197 | 198 | | Hash | Model | Size | Sign Time | Verify Time | 199 | |------|--------------------|-------|:------:|:-----:| 200 | | H1 | roberta-base-11 | 8K | 0.8s | 0.6s | 201 | | H1 | hustvl/YOLOP | 215M | 1.2s | 0.8s | 202 | | H1 | bertseq2seq | 2.8G | 4.6s | 4.4s | 203 | | H1 | bert-base-uncased | 3.3G | 5s | 4.7s | 204 | | H1 | tiiuae/falcon-7b | 14GB | 12.2s | 11.8s | 205 | | H2 | roberta-base-11 | 8K | 1s | 0.6s | 206 | | H2 | hustvl/YOLOP | 215M | 1s | 1s | 207 | | H2 | bertseq2seq | 2.8G | 1.9s | 1.4s | 208 | | H2 | bert-base-uncased | 3.3G | 1.6s | 1.1s | 209 | | H2 | tiiuae/falcon-7b | 14GB | 2.1s | 1.8s | 210 | 211 | Machine M2: Debian 5.10.1 x86_64 GNU/Linux, 4GB RAM, 2 vCPUs, 56320 KB, Intel(R) Xeon(R) CPU @ 2.20GHz: 212 | 213 | | Hash | Model | Size | Sign Time | Verify Time | 214 | |------|--------------------|-------|:------:|:-----:| 215 | | H1 | roberta-base-11 | 8K | 1.1s | 0.7s | 216 | | H1 | hustvl/YOLOP | 215M | 1.9s | 1.7s | 217 | | H1 | bertseq2seq | 2.8G | 18s | 23.2s | 218 | | H1 | bert-base-uncased | 3.3G | 23.4s | 18.9s | 219 | | H1 | tiiuae/falcon-7b | 14GB | 2m4s | 2m2s | 220 | | H2 | roberta-base-11 | 8K | 1.1s | 0.8s | 221 | | H2 | hustvl/YOLOP | 215M | 1.9s | 1.6s | 222 | | H2 | bertseq2seq | 2.8G | 13.8s | 25.9s | 223 | | H2 | bert-base-uncased | 3.3G | 22.7s | 23.3s | 224 | | H2 | tiiuae/falcon-7b | 14GB | 2m.1s | 2m3s | 225 | -------------------------------------------------------------------------------- /slsa_for_models/gcp/tasks/git-clone.yml: -------------------------------------------------------------------------------- 1 | # copied from https://github.com/tektoncd/catalog/tree/main/task/git-clone/0.7 2 | # and modified to contain type hinting for provenance generation -- remove when 3 | # the catalog updates the task to support type hinting 4 | apiVersion: tekton.dev/v1beta1 5 | kind: Task 6 | metadata: 7 | name: git-clone 8 | labels: 9 | app.kubernetes.io/version: "0.7" 10 | annotations: 11 | tekton.dev/pipelines.minVersion: "0.29.0" 12 | tekton.dev/categories: Git 13 | tekton.dev/tags: git 14 | tekton.dev/displayName: "git clone" 15 | tekton.dev/platforms: "linux/amd64,linux/s390x,linux/ppc64le,linux/arm64" 16 | spec: 17 | description: >- 18 | These Tasks are Git tasks to work with repositories used by other tasks 19 | in your Pipeline. 20 | 21 | The git-clone Task will clone a repo from the provided url into the 22 | output Workspace. By default the repo will be cloned into the root of 23 | your Workspace. You can clone into a subdirectory by setting this Task's 24 | subdirectory param. This Task also supports sparse checkouts. To perform 25 | a sparse checkout, pass a list of comma separated directory patterns to 26 | this Task's sparseCheckoutDirectories param. 27 | workspaces: 28 | - name: output 29 | description: The git repo will be cloned onto the volume backing this Workspace. 30 | - name: ssh-directory 31 | optional: true 32 | description: | 33 | A .ssh directory with private key, known_hosts, config, etc. Copied to 34 | the user's home before git commands are executed. Used to authenticate 35 | with the git remote when performing the clone. Binding a Secret to this 36 | Workspace is strongly recommended over other volume types. 37 | - name: basic-auth 38 | optional: true 39 | description: | 40 | A Workspace containing a .gitconfig and .git-credentials file. These 41 | will be copied to the user's home before any git commands are run. Any 42 | other files in this Workspace are ignored. It is strongly recommended 43 | to use ssh-directory over basic-auth whenever possible and to bind a 44 | Secret to this Workspace over other volume types. 45 | - name: ssl-ca-directory 46 | optional: true 47 | description: | 48 | A workspace containing CA certificates, this will be used by Git to 49 | verify the peer with when fetching or pushing over HTTPS. 50 | params: 51 | - name: url 52 | description: Repository URL to clone from. 53 | type: string 54 | - name: revision 55 | description: Revision to checkout. (branch, tag, sha, ref, etc...) 56 | type: string 57 | default: "" 58 | - name: refspec 59 | description: Refspec to fetch before checking out revision. 60 | default: "" 61 | - name: submodules 62 | description: Initialize and fetch git submodules. 63 | type: string 64 | default: "true" 65 | - name: depth 66 | description: Perform a shallow clone, fetching only the most recent N commits. 67 | type: string 68 | default: "1" 69 | - name: sslVerify 70 | description: Set the `http.sslVerify` global git config. Setting this to `false` is not advised unless you are sure that you trust your git remote. 71 | type: string 72 | default: "true" 73 | - name: crtFileName 74 | description: file name of mounted crt using ssl-ca-directory workspace. default value is ca-bundle.crt. 75 | type: string 76 | default: "ca-bundle.crt" 77 | - name: subdirectory 78 | description: Subdirectory inside the `output` Workspace to clone the repo into. 79 | type: string 80 | default: "" 81 | - name: sparseCheckoutDirectories 82 | description: Define the directory patterns to match or exclude when performing a sparse checkout. 83 | type: string 84 | default: "" 85 | - name: deleteExisting 86 | description: Clean out the contents of the destination directory if it already exists before cloning. 87 | type: string 88 | default: "true" 89 | - name: httpProxy 90 | description: HTTP proxy server for non-SSL requests. 91 | type: string 92 | default: "" 93 | - name: httpsProxy 94 | description: HTTPS proxy server for SSL requests. 95 | type: string 96 | default: "" 97 | - name: noProxy 98 | description: Opt out of proxying HTTP/HTTPS requests. 99 | type: string 100 | default: "" 101 | - name: verbose 102 | description: Log the commands that are executed during `git-clone`'s operation. 103 | type: string 104 | default: "true" 105 | - name: gitInitImage 106 | description: The image providing the git-init binary that this Task runs. 107 | type: string 108 | default: "gcr.io/tekton-releases/github.com/tektoncd/pipeline/cmd/git-init:v0.29.0" 109 | - name: userHome 110 | description: | 111 | Absolute path to the user's home directory. Set this explicitly if you are running the image as a non-root user or have overridden 112 | the gitInitImage param with an image containing custom user configuration. 113 | type: string 114 | default: "/tekton/home" 115 | results: 116 | - name: commit 117 | description: The precise commit SHA that was fetched by this Task. 118 | - name: url 119 | description: The precise URL that was fetched by this Task. 120 | - name: source_ARTIFACT_INPUTS 121 | properties: 122 | uri: { } 123 | digest: { } 124 | steps: 125 | - name: clone 126 | image: "$(params.gitInitImage)" 127 | env: 128 | - name: HOME 129 | value: "$(params.userHome)" 130 | - name: PARAM_URL 131 | value: $(params.url) 132 | - name: PARAM_REVISION 133 | value: $(params.revision) 134 | - name: PARAM_REFSPEC 135 | value: $(params.refspec) 136 | - name: PARAM_SUBMODULES 137 | value: $(params.submodules) 138 | - name: PARAM_DEPTH 139 | value: $(params.depth) 140 | - name: PARAM_SSL_VERIFY 141 | value: $(params.sslVerify) 142 | - name: PARAM_CRT_FILENAME 143 | value: $(params.crtFileName) 144 | - name: PARAM_SUBDIRECTORY 145 | value: $(params.subdirectory) 146 | - name: PARAM_DELETE_EXISTING 147 | value: $(params.deleteExisting) 148 | - name: PARAM_HTTP_PROXY 149 | value: $(params.httpProxy) 150 | - name: PARAM_HTTPS_PROXY 151 | value: $(params.httpsProxy) 152 | - name: PARAM_NO_PROXY 153 | value: $(params.noProxy) 154 | - name: PARAM_VERBOSE 155 | value: $(params.verbose) 156 | - name: PARAM_SPARSE_CHECKOUT_DIRECTORIES 157 | value: $(params.sparseCheckoutDirectories) 158 | - name: PARAM_USER_HOME 159 | value: $(params.userHome) 160 | - name: WORKSPACE_OUTPUT_PATH 161 | value: $(workspaces.output.path) 162 | - name: WORKSPACE_SSH_DIRECTORY_BOUND 163 | value: $(workspaces.ssh-directory.bound) 164 | - name: WORKSPACE_SSH_DIRECTORY_PATH 165 | value: $(workspaces.ssh-directory.path) 166 | - name: WORKSPACE_BASIC_AUTH_DIRECTORY_BOUND 167 | value: $(workspaces.basic-auth.bound) 168 | - name: WORKSPACE_BASIC_AUTH_DIRECTORY_PATH 169 | value: $(workspaces.basic-auth.path) 170 | - name: WORKSPACE_SSL_CA_DIRECTORY_BOUND 171 | value: $(workspaces.ssl-ca-directory.bound) 172 | - name: WORKSPACE_SSL_CA_DIRECTORY_PATH 173 | value: $(workspaces.ssl-ca-directory.path) 174 | script: | 175 | #!/usr/bin/env sh 176 | set -eu 177 | 178 | if [ "${PARAM_VERBOSE}" = "true" ] ; then 179 | set -x 180 | fi 181 | 182 | 183 | if [ "${WORKSPACE_BASIC_AUTH_DIRECTORY_BOUND}" = "true" ] ; then 184 | cp "${WORKSPACE_BASIC_AUTH_DIRECTORY_PATH}/.git-credentials" "${PARAM_USER_HOME}/.git-credentials" 185 | cp "${WORKSPACE_BASIC_AUTH_DIRECTORY_PATH}/.gitconfig" "${PARAM_USER_HOME}/.gitconfig" 186 | chmod 400 "${PARAM_USER_HOME}/.git-credentials" 187 | chmod 400 "${PARAM_USER_HOME}/.gitconfig" 188 | fi 189 | 190 | if [ "${WORKSPACE_SSH_DIRECTORY_BOUND}" = "true" ] ; then 191 | cp -R "${WORKSPACE_SSH_DIRECTORY_PATH}" "${PARAM_USER_HOME}"/.ssh 192 | chmod 700 "${PARAM_USER_HOME}"/.ssh 193 | chmod -R 400 "${PARAM_USER_HOME}"/.ssh/* 194 | fi 195 | 196 | if [ "${WORKSPACE_SSL_CA_DIRECTORY_BOUND}" = "true" ] ; then 197 | export GIT_SSL_CAPATH="${WORKSPACE_SSL_CA_DIRECTORY_PATH}" 198 | if [ "${PARAM_CRT_FILENAME}" != "" ] ; then 199 | export GIT_SSL_CAINFO="${WORKSPACE_SSL_CA_DIRECTORY_PATH}/${PARAM_CRT_FILENAME}" 200 | fi 201 | fi 202 | CHECKOUT_DIR="${WORKSPACE_OUTPUT_PATH}/${PARAM_SUBDIRECTORY}" 203 | 204 | cleandir() { 205 | # Delete any existing contents of the repo directory if it exists. 206 | # 207 | # We don't just "rm -rf ${CHECKOUT_DIR}" because ${CHECKOUT_DIR} might be "/" 208 | # or the root of a mounted volume. 209 | if [ -d "${CHECKOUT_DIR}" ] ; then 210 | # Delete non-hidden files and directories 211 | rm -rf "${CHECKOUT_DIR:?}"/* 212 | # Delete files and directories starting with . but excluding .. 213 | rm -rf "${CHECKOUT_DIR}"/.[!.]* 214 | # Delete files and directories starting with .. plus any other character 215 | rm -rf "${CHECKOUT_DIR}"/..?* 216 | fi 217 | } 218 | 219 | if [ "${PARAM_DELETE_EXISTING}" = "true" ] ; then 220 | cleandir 221 | fi 222 | 223 | test -z "${PARAM_HTTP_PROXY}" || export HTTP_PROXY="${PARAM_HTTP_PROXY}" 224 | test -z "${PARAM_HTTPS_PROXY}" || export HTTPS_PROXY="${PARAM_HTTPS_PROXY}" 225 | test -z "${PARAM_NO_PROXY}" || export NO_PROXY="${PARAM_NO_PROXY}" 226 | 227 | /ko-app/git-init \ 228 | -url="${PARAM_URL}" \ 229 | -revision="${PARAM_REVISION}" \ 230 | -refspec="${PARAM_REFSPEC}" \ 231 | -path="${CHECKOUT_DIR}" \ 232 | -sslVerify="${PARAM_SSL_VERIFY}" \ 233 | -submodules="${PARAM_SUBMODULES}" \ 234 | -depth="${PARAM_DEPTH}" \ 235 | -sparseCheckoutDirectories="${PARAM_SPARSE_CHECKOUT_DIRECTORIES}" 236 | cd "${CHECKOUT_DIR}" 237 | RESULT_SHA="$(git rev-parse HEAD)" 238 | EXIT_CODE="$?" 239 | if [ "${EXIT_CODE}" != 0 ] ; then 240 | exit "${EXIT_CODE}" 241 | fi 242 | printf "%s" "${RESULT_SHA}" > "$(results.commit.path)" 243 | printf "%s" "${PARAM_URL}" > "$(results.url.path)" 244 | 245 | # type hinting for provenance generation 246 | cat < bytes: 30 | header = ty.encode('utf-8') + b'.' + \ 31 | base64.b64encode(name.encode('utf-8')) + b'.' 32 | return header 33 | 34 | @staticmethod 35 | def root_folder(path: Path, content: bytes) -> str: 36 | return Hasher._node_folder_compute(name="root", content=content) 37 | 38 | @staticmethod 39 | def node_folder(path: Path, content: bytes) -> str: 40 | return Hasher._node_folder_compute(name=path.name, content=content) 41 | 42 | @staticmethod 43 | def _node_folder_compute(name: str, content: bytes) -> bytes: 44 | value = Hasher.node_header(name, "dir") + content 45 | return hashlib.sha256(value).digest() 46 | 47 | @staticmethod 48 | def root_file(path: Path, chunk: int) -> bytes: 49 | return Hasher._node_file_compute(path, b'', chunk) 50 | 51 | @staticmethod 52 | def node_file(path: Path, chunk: int = 0) -> bytes: 53 | if not path.is_file(): 54 | raise ValueError(f"path {path} is not a file") 55 | header = Hasher.node_header(path.name, "file") 56 | return Hasher._node_file_compute(path, header, chunk) 57 | 58 | @staticmethod 59 | def _node_file_compute(path: Path, header: bytes, chunk: int) -> bytes: 60 | h = hashlib.sha256(header) 61 | with open(path, "rb") as f: 62 | if chunk == 0: 63 | all_data = f.read() 64 | h.update(all_data) 65 | else: 66 | # Compute the hash by reading chunk bytes at a time. 67 | while True: 68 | chunk_data = f.read(chunk) 69 | if not chunk_data: 70 | break 71 | h.update(chunk_data) 72 | return h.digest() 73 | 74 | @staticmethod 75 | def _node_file_compute_v1(path: Path, header: bytes, 76 | start: int, end: int, chunk: int) -> bytes: 77 | h = hashlib.sha256(header) 78 | with open(path, "rb") as f: 79 | # WARNING: We must start reading the file at the starting offset. 80 | f.seek(start) 81 | # Read all at once. 82 | if chunk == 0 or chunk >= (end - start): 83 | content = f.read(end - start) 84 | # print(f"all: {f.name}: {start}-{end}") 85 | h.update(content) 86 | else: 87 | # Compute the hash by reading chunk bytes at a time. 88 | remains = end - start 89 | while remains != 0: 90 | # read = (end - start) - remains 91 | # print(f"loop {i}: {f.name}: 92 | # {read}-{read + min(chunk, remains)}") 93 | processed = min(chunk, remains) 94 | chunk_data = f.read(processed) 95 | if processed != len(chunk_data): 96 | raise ValueError("internal: unread bytes: " + 97 | f"{processed} != {len(chunk_data)}") 98 | if not chunk_data: 99 | raise ValueError("internal: no data: " + 100 | f"filename={str(path)}, " + 101 | f"remains={remains}, " + 102 | f"{processed} != {len(chunk_data)}") 103 | h.update(chunk_data) 104 | remains -= processed 105 | return h.digest() 106 | 107 | 108 | def remove_prefix(text, prefix): 109 | if text.startswith(prefix): 110 | return text[len(prefix):] 111 | return text 112 | 113 | 114 | def validate_signature_path(model_path: Path, sig_path: Path): 115 | if model_path.is_file(): 116 | return 117 | # Note: Only allow top-level folder to have the signature for simplicity. 118 | if sig_path is not None and sig_path.is_relative_to(model_path) and \ 119 | sig_path.parent != model_path: 120 | raise ValueError(f"{sig_path} must be in the folder root") 121 | 122 | 123 | def is_relative_to(p: Path, path_list: [Path]) -> bool: 124 | for e in path_list: 125 | if p.is_relative_to(e): 126 | return True 127 | return False 128 | 129 | 130 | # TODO(): add a context "AI model"? 131 | class Serializer: 132 | @staticmethod 133 | # TODO: type of returned value. 134 | def _ordered_files(path: Path, ignorepaths: [Path]) -> []: 135 | children: [Path] 136 | if path.is_file(): 137 | children = [path] 138 | else: 139 | # NOTE: the parent (..) and current directory (.) are not present. 140 | # NOTE: this returns hidden files as well. 141 | # TODO: tests that this pattern reports all files, 142 | # regardless of their depth. 143 | children = sorted(path.glob("**/*")) 144 | 145 | filtered = [] 146 | total_size = 0 147 | for child in children: 148 | if is_relative_to(child, ignorepaths): 149 | continue 150 | 151 | # To avoid bugs where we read the link rather than its target, 152 | # we don't allow symlinks for now. 153 | # NOTE: It seems that Python's read() *always* follows symlinks, 154 | # so it may be safe to allow them. (readlink() is the function 155 | # to read the link metadata). 156 | if not allow_symlinks and child.is_symlink(): 157 | raise ValueError(f"{str(child)} is symlink") 158 | 159 | if not child.is_file() and not child.is_dir(): 160 | raise ValueError(f"{str(child)} is not a dir or file") 161 | 162 | # The recorded path must *not* contains the folder name, 163 | # since users may rename it. 164 | record_path = remove_prefix( 165 | str(child.as_posix()), str(path.as_posix() + '/')) 166 | record_type = "file" if child.is_file() else "dir" 167 | record_size = \ 168 | os.path.getsize(str(child)) if record_type == "file" else 0 169 | filtered += [(record_path, record_type, record_size)] 170 | total_size += record_size 171 | return filtered 172 | 173 | @staticmethod 174 | # TODO: type of returned value. 175 | def _create_tasks(children: [], shard_size: int) -> [[]]: 176 | tasks = [[]] * 0 177 | curr_file = 0 178 | curr_pos = 0 179 | 180 | while True: 181 | # All files have been processed. 182 | if curr_file >= len(children): 183 | break 184 | 185 | name, typ, size = children[curr_file] 186 | 187 | # It's a directory. 188 | # NOTE: It is fast to compute the hash because there's no data 189 | # besides the name and the type. 190 | # TODO(#12): do we need this at all? This only matters 191 | # if we care about empty directories, since non-empty ones have 192 | # their file + path recorded. 193 | if typ == "dir": 194 | # Record the task. 195 | tasks += [(name, typ, 0, size)] 196 | curr_file += 1 197 | curr_pos = 0 198 | continue 199 | 200 | # It's a file. 201 | 202 | # Sanity checks. 203 | if size <= curr_pos and size > 0: 204 | raise ValueError(f"internal: size={size}, " + 205 | f"curr_pos={curr_pos} " + 206 | f"for {children[curr_file]}") 207 | 208 | # Compute the number of bytes to process. 209 | remains = size - curr_pos 210 | if remains < 0: 211 | raise ValueError(f"internal: remains is {remains}") 212 | processed = min(remains, shard_size) 213 | end_pos = curr_pos + processed 214 | 215 | # Record the task. 216 | tasks += [(name, typ, curr_pos, end_pos)] 217 | 218 | # Update position. 219 | curr_pos += processed 220 | 221 | # If we have processed all bytes, we move on to the next file. 222 | if remains == processed: 223 | curr_file += 1 224 | curr_pos = 0 225 | return tasks 226 | 227 | @staticmethod 228 | # TODO: type of tasks 229 | def _run_tasks(path: Path, chunk: int, tasks: []) -> bytes: 230 | # See https://superfastpython.com/processpoolexecutor-in-python/ 231 | # NOTE: 32 = length of sha256 digest. 232 | digest_len = 32 233 | all_hashes = [None] * (digest_len*len(tasks)) 234 | org_len = len(all_hashes) 235 | 236 | # Use fork on Linux as it's supposed to be faster. 237 | if platform.system() == "Linux" and get_start_method() != "fork": 238 | set_start_method('fork') 239 | with ProcessPoolExecutor() as ppe: 240 | futures = [ppe.submit(Serializer.task, (path, chunk, task)) 241 | for task in tasks] 242 | results = [f.result() for f in futures] 243 | for i, result in enumerate(results): 244 | all_hashes[i*digest_len:(i+1)*digest_len] = result 245 | # Sanity check. 246 | if len(all_hashes) != org_len: 247 | raise ValueError(f"internal: {len(all_hashes)} != {org_len}") 248 | return bytes(all_hashes) 249 | 250 | @staticmethod 251 | # TODO: type of task_info. 252 | def task(task_info: []): 253 | # NOTE: we can get process info using: 254 | # from multiprocessing import current_process 255 | # worker = current_process() 256 | # print(f'Task {task_info}, 257 | # worker name={worker.name}, pid={worker.pid}', flush=True) 258 | 259 | model_path, chunk, (name, ty, start_pos, end_pos) = task_info 260 | 261 | # Header format is: "type.b64(filename).start-end." 262 | header = ty.encode('utf-8') + b'.' + \ 263 | base64.b64encode(name.encode('utf-8')) + \ 264 | b'.' + f"{start_pos}-{end_pos}".encode('utf-8') + b'.' 265 | 266 | # To hash a directory, we use "none" content. 267 | # TODO(#12): do we need this at all? This only matters 268 | # if we care about empty directories, since non-empty ones have 269 | # their file + path recorded. 270 | if ty == "dir": 271 | value = header + b'none' 272 | return hashlib.sha256(value).digest() 273 | 274 | # We need to hash a file. 275 | 276 | # The model is a directory. 277 | if model_path.is_dir(): 278 | return Hasher._node_file_compute_v1(model_path.joinpath(name), 279 | header, start_pos, 280 | end_pos, chunk) 281 | 282 | # The model is a single file. 283 | # We update the file name to a generic "root". 284 | header = ty.encode('utf-8') + b'.' + \ 285 | base64.b64encode("root".encode('utf-8')) + \ 286 | b'.' + f"{start_pos}-{end_pos}".encode('utf-8') + b'.' 287 | return Hasher._node_file_compute_v1(name, 288 | header, start_pos, end_pos, chunk) 289 | 290 | @staticmethod 291 | def _serialize_v1(path: Path, chunk: int, shard: int, signature_path: Path, 292 | ignorepaths: [Path] = []) -> bytes: 293 | if not path.exists(): 294 | raise ValueError(f"{str(path)} does not exist") 295 | 296 | if not allow_symlinks and path.is_symlink(): 297 | raise ValueError(f"{str(path)} is a symlink") 298 | 299 | if chunk < 0: 300 | raise ValueError(f"{str(chunk)} is invalid") 301 | 302 | if not path.is_file() and not path.is_dir(): 303 | raise ValueError(f"{str(path)} is not a dir or file") 304 | 305 | # Validate the signature path. 306 | validate_signature_path(path, signature_path) 307 | 308 | # Children to hash. 309 | children = Serializer._ordered_files(path, 310 | [signature_path] + ignorepaths) 311 | 312 | # We shard the computation by creating independent "tasks". 313 | if shard < 0: 314 | raise ValueError(f"{str(shard)} is invalid") 315 | tasks = Serializer._create_tasks(children, shard) 316 | 317 | # Share the computation of hashes. 318 | # For simplicity, we pre-allocate the entire array that will hold 319 | # the concatenation of all hashes. 320 | all_hashes = Serializer._run_tasks(path, chunk, tasks) 321 | 322 | # Finally, we hash everything. 323 | return hashlib.sha256(bytes(all_hashes)).digest() 324 | 325 | def serialize_v1(path: Path, chunk: int, signature_path: Path, 326 | ignorepaths: [Path] = []) -> bytes: 327 | # NOTE: The shard size must be the same for all clients for 328 | # compatibility. We could make it configurable; but in this 329 | # case the signature file must contain the value used by the signer. 330 | shard_size = 1000000000 # 1GB 331 | return Serializer._serialize_v1(path, chunk, shard_size, 332 | signature_path, ignorepaths) 333 | 334 | @staticmethod 335 | def serialize_v0(path: Path, chunk: int, signature_path: Path, 336 | ignorepaths: [Path] = []) -> bytes: 337 | if not path.exists(): 338 | raise ValueError(f"{str(path)} does not exist") 339 | 340 | if not allow_symlinks and path.is_symlink(): 341 | raise ValueError(f"{str(path)} is a symlink") 342 | 343 | if chunk < 0: 344 | raise ValueError(f"{str(chunk)} is invalid") 345 | 346 | if path.is_file(): 347 | return Hasher.root_file(path, chunk) 348 | 349 | if not path.is_dir(): 350 | raise ValueError(f"{str(path)} is not a dir") 351 | 352 | # Validate the signature path. 353 | validate_signature_path(path, signature_path) 354 | 355 | children = sorted([x for x in path.iterdir() 356 | if x != signature_path and x not in ignorepaths]) 357 | # TODO: remove this special case? 358 | if len(children) == 0: 359 | return Hasher.root_folder(path, b"empty") 360 | 361 | hash = hashlib.sha256() 362 | for child in children: 363 | child_hash = Serializer._serialize_node(child, chunk, " ", 364 | ignorepaths) 365 | hash.update(child_hash) 366 | content = hash.digest() 367 | return Hasher.root_folder(path, content) 368 | 369 | @staticmethod 370 | def _serialize_node(path: Path, chunk: int, indent="", 371 | ignorepaths: [Path] = []) -> bytes: 372 | if not allow_symlinks and path.is_symlink(): 373 | raise ValueError(f"{str(path)} is a symlink") 374 | 375 | if path.is_file(): 376 | return Hasher.node_file(path, chunk) 377 | 378 | if not path.is_dir(): 379 | raise ValueError(f"{str(path)} is not a dir") 380 | 381 | children = sorted([x for x in path.iterdir() if x not in ignorepaths]) 382 | # TODO: remove this special case? 383 | if len(children) == 0: 384 | return Hasher.node_folder(path, b"empty") 385 | 386 | hash = hashlib.sha256() 387 | for child in children: 388 | child_hash = Serializer._serialize_node(child, chunk, indent + " ", 389 | ignorepaths) 390 | hash.update(child_hash) 391 | content = hash.digest() 392 | return Hasher.node_folder(path, content) 393 | --------------------------------------------------------------------------------