├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── codeql-analysis.yml │ ├── deploy.yml │ ├── depsreview.yml │ ├── docker.yaml │ ├── release.yaml │ └── scorecards-analysis.yml ├── .gitignore ├── .golangci.yml ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── cmd └── scheduled-feed │ └── main.go ├── docker-compose.yml ├── go.mod ├── go.sum ├── hacks └── tree-status ├── integration ├── config │ └── feeds.yml ├── docker-compose-integration.yml └── test_consumer │ ├── check_kafka_output.py │ └── requirements.txt ├── package.schema.json ├── pkg ├── config │ ├── config_test.go │ ├── scheduledfeed.go │ └── structs.go ├── events │ ├── README.md │ ├── handler.go │ ├── handler_test.go │ ├── logrus_sink.go │ ├── logrus_sink_test.go │ ├── lossy_feed_event.go │ └── mocks.go ├── feeds │ ├── README.md │ ├── crates │ │ ├── README.md │ │ ├── crates.go │ │ └── crates_test.go │ ├── feed.go │ ├── feed_test.go │ ├── goproxy │ │ ├── README.md │ │ ├── goproxy.go │ │ └── goproxy_test.go │ ├── lossy_logging.go │ ├── lossy_logging_test.go │ ├── maven │ │ ├── README.md │ │ ├── maven.go │ │ └── maven_test.go │ ├── npm │ │ ├── README.md │ │ ├── npm.go │ │ └── npm_test.go │ ├── nuget │ │ ├── README.md │ │ ├── nuget.go │ │ └── nuget_test.go │ ├── packagist │ │ ├── README.md │ │ ├── doc.go │ │ ├── packagist.go │ │ ├── packagist_test.go │ │ └── version.go │ ├── pypi │ │ ├── README.md │ │ ├── pypi.go │ │ ├── pypi_artifacts.go │ │ ├── pypi_artifacts_test.go │ │ └── pypi_test.go │ └── rubygems │ │ ├── README.md │ │ ├── rubygems.go │ │ └── rubygems_test.go ├── publisher │ ├── README.md │ ├── gcppubsub │ │ └── gcppubsub.go │ ├── httpclientpubsub │ │ └── httpclientpubsub.go │ ├── kafkapubsub │ │ └── kafkapubsub.go │ ├── publisher.go │ └── stdout │ │ └── stdout.go ├── scheduler │ ├── feed_group.go │ ├── feed_group_test.go │ ├── feed_groups_handler.go │ ├── mocks.go │ ├── scheduler.go │ └── scheduler_test.go ├── useragent │ ├── useragent.go │ └── useragent_test.go └── utils │ ├── http_requests.go │ ├── test │ └── http_helper.go │ └── xml_reader.go └── terraform ├── feeds.tf ├── scheduler ├── main.tf └── variables.tf ├── terraform.tfvars └── variables.tf /.dockerignore: -------------------------------------------------------------------------------- 1 | terraform 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | - package-ecosystem: docker 13 | directory: "/" 14 | schedule: 15 | interval: daily 16 | open-pull-requests-limit: 10 17 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | # Trigger the CI on pull requests and direct pushes to any branch 3 | on: 4 | push: 5 | pull_request: 6 | permissions: read-all 7 | jobs: 8 | terraform-check: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 12 | 13 | - name: Setup Terraform 14 | uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd 15 | 16 | - name: terraform fmt 17 | run: terraform fmt -check 18 | working-directory: ./terraform 19 | 20 | 21 | run-tests: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 25 | 26 | - name: Set up Go 27 | uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 28 | with: 29 | go-version-file: 'go.mod' 30 | 31 | - name: Run tests 32 | run: go test -v -skip TestPyPIArtifactsLive ./... 33 | 34 | build-verify: 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 38 | 39 | - name: Set up Go 40 | uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 41 | with: 42 | go-version-file: 'go.mod' 43 | 44 | - name: Build 45 | run: make build 46 | 47 | - name: Verify 48 | run: make verify 49 | 50 | run-integration-check: 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 54 | - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 55 | with: 56 | python-version: '3.12' 57 | - run: pip install -r requirements.txt 58 | working-directory: ./integration/test_consumer/ 59 | - name: Build the stack 60 | run: docker compose -f docker-compose-integration.yml up -d 61 | working-directory: ./integration 62 | - name: Run consumer python script 63 | run: python check_kafka_output.py 64 | working-directory: ./integration/test_consumer 65 | - name: Output filtered log (full log in artifacts) 66 | run: docker compose -f docker-compose-integration.yml logs feeds | grep -v "Sending package" | grep -v "Processing Package" 67 | working-directory: ./integration/ 68 | if: ${{ always() }} 69 | - name: Dump logs for archive 70 | run: docker compose -f docker-compose-integration.yml logs feeds > feeds-log.txt 71 | working-directory: ./integration/ 72 | if: ${{ always() }} 73 | - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 74 | with: 75 | name: package-feeds log 76 | path: ./integration/feeds-log.txt 77 | if: ${{ always() }} 78 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 2 | name: "CodeQL" 3 | 4 | on: 5 | push: 6 | branches: [ main ] 7 | pull_request: 8 | # The branches below must be a subset of the branches above 9 | branches: [ main ] 10 | schedule: 11 | - cron: '22 19 * * 0' 12 | 13 | permissions: read-all 14 | 15 | jobs: 16 | analyze: 17 | name: Analyze 18 | runs-on: ubuntu-latest 19 | permissions: 20 | security-events: write 21 | actions: read 22 | contents: read 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | language: [ 'go' ] 28 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 29 | # Learn more: 30 | 31 | steps: 32 | - name: Checkout repository 33 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 34 | 35 | # Initializes the CodeQL tools for scanning. 36 | - name: Initialize CodeQL 37 | uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 38 | with: 39 | languages: ${{ matrix.language }} 40 | 41 | - name: Autobuild 42 | uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 43 | 44 | - name: Perform CodeQL Analysis 45 | uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 46 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: 'Deploy with Terraform' 2 | 3 | on: workflow_dispatch 4 | 5 | permissions: read-all 6 | 7 | jobs: 8 | terraform: 9 | name: 'Terraform' 10 | runs-on: ubuntu-latest 11 | 12 | defaults: 13 | run: 14 | shell: bash 15 | 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 19 | 20 | - name: Setup Terraform 21 | uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd 22 | 23 | - name: Authenticate to Google Cloud 24 | uses: google-github-actions/auth@62cf5bd3e4211a0a0b51f2c6d6a37129d828611d # v2.1.5 25 | with: 26 | project_id: ${{ secrets.GCP_PROJECT_ID }} 27 | credentials_json: ${{ secrets.TF_GOOGLE_CREDENTIALS }} 28 | export_environment_variables: true 29 | 30 | - name: Set up Cloud SDK 31 | uses: google-github-actions/setup-gcloud@f0990588f1e5b5af6827153b93673613abdc6ec7 # v2.1.1 32 | 33 | - name: Configure gcloud 34 | run: gcloud config set project ${{ secrets.GOOGLE_PROJECT_ID }} 35 | 36 | - name: Generate the images for each feed 37 | run: gcloud builds submit --tag gcr.io/${{ secrets.GOOGLE_PROJECT_ID }}/scheduled-feeds 38 | 39 | - name: Terraform Init 40 | run: terraform init 41 | working-directory: ./terraform 42 | 43 | - name: Terraform Format 44 | run: terraform fmt -check 45 | working-directory: ./terraform 46 | 47 | - name: Terraform Plan 48 | run: terraform plan 49 | working-directory: ./terraform 50 | 51 | - name: Terraform Apply 52 | if: github.ref == 'refs/heads/main' 53 | run: terraform apply -auto-approve 54 | working-directory: ./terraform 55 | 56 | - name: Deploy to Cloud Run 57 | if: github.ref == 'refs/heads/main' 58 | run: | 59 | gcloud run deploy \ 60 | scheduled-feeds-srv \ 61 | --platform managed \ 62 | --region us-central1 \ 63 | --max-instances=1 \ 64 | --image gcr.io/${{ secrets.GOOGLE_PROJECT_ID }}/scheduled-feeds; 65 | -------------------------------------------------------------------------------- /.github/workflows/depsreview.yml: -------------------------------------------------------------------------------- 1 | name: 'Dependency Review' 2 | on: [pull_request] 3 | 4 | permissions: 5 | contents: read 6 | 7 | jobs: 8 | dependency-review: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: 'Checkout Repository' 12 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 13 | - name: 'Dependency Review' 14 | uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 15 | -------------------------------------------------------------------------------- /.github/workflows/docker.yaml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | on: 3 | # Run tests for any PRs. 4 | pull_request: 5 | 6 | permissions: read-all 7 | 8 | env: 9 | IMAGE_NAME: packagefeeds 10 | 11 | jobs: 12 | build-image: 13 | name: Build image 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 18 | 19 | - name: Set up Docker Buildx 20 | uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3 21 | 22 | - name: Test build on x86 23 | id: docker_build 24 | uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v5 25 | with: 26 | context: . 27 | file: ./Dockerfile 28 | platforms: linux/amd64 29 | push: false # Only attempt to build, to verify the Dockerfile is working 30 | load: true 31 | cache-from: type=gha 32 | cache-to: type=gha,mode=max 33 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | # Publish `v1.2.3` tags as releases. 8 | tags: 9 | - v* 10 | 11 | permissions: read-all 12 | 13 | env: 14 | IMAGE_NAME: packagefeeds 15 | 16 | jobs: 17 | # Push image to GitHub Packages. 18 | push: 19 | name: Push 20 | runs-on: ubuntu-latest 21 | permissions: 22 | packages: write 23 | id-token: write 24 | contents: read 25 | 26 | steps: 27 | - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 28 | 29 | - name: Set up Docker Buildx 30 | uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3 31 | 32 | - name: Install Cosign 33 | uses: sigstore/cosign-installer@4959ce089c160fddf62f7b42464195ba1a56d382 # v3.6.0 34 | with: 35 | cosign-release: 'v2.2.2' 36 | 37 | - name: Log into registry 38 | uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 39 | with: 40 | registry: ghcr.io 41 | username: ${{ github.actor }} 42 | password: ${{ secrets.GITHUB_TOKEN }} 43 | 44 | - name: Set container metadata 45 | uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5 46 | id: docker-metadata 47 | with: 48 | images: ghcr.io/${{ github.repository }}/${{ env.IMAGE_NAME }} 49 | labels: | 50 | org.opencontainers.image.source=${{ github.repositoryUrl }} 51 | org.opencontainers.image.description="This is a container for the Package Feeds process" 52 | org.opencontainers.image.title="Package Feeds" 53 | org.opencontainers.image.vendor="OpenSSF" 54 | org.opencontainers.image.version=${{ github.sha }} 55 | flavor: | 56 | latest=auto 57 | # Using the {{version}} placeholder to automatically detect the version from the git tag 58 | # without the prefix "v". 59 | # We'll also generate tags for commit sha, main branch changes and semver tags. 60 | tags: | 61 | type=sha 62 | type=ref,event=tag 63 | type=ref,event=branch 64 | type=semver,pattern={{version}} 65 | type=raw,value=latest,enable={{is_default_branch}} 66 | 67 | - name: Build image 68 | id: image-build 69 | uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v5 70 | with: 71 | context: . 72 | platforms: linux/amd64,linux/arm64 73 | push: true 74 | file: ./Dockerfile 75 | tags: ${{ steps.docker-metadata.outputs.tags }} 76 | labels: ${{ steps.docker-metadata.outputs.labels }} 77 | provenance: true 78 | sbom: true 79 | cache-from: type=gha 80 | cache-to: type=gha,mode=max 81 | 82 | - name: Sign the image 83 | run: | 84 | cosign sign --yes ghcr.io/${{ github.repository }}/${{ env.IMAGE_NAME }}@${{ steps.image-build.outputs.digest }} 85 | -------------------------------------------------------------------------------- /.github/workflows/scorecards-analysis.yml: -------------------------------------------------------------------------------- 1 | name: Scorecards supply-chain security 2 | on: 3 | # Only the default branch is supported. 4 | branch_protection_rule: 5 | schedule: 6 | - cron: '34 18 * * 6' 7 | push: 8 | branches: [ main ] 9 | 10 | # Declare default permissions as read only. 11 | permissions: read-all 12 | 13 | jobs: 14 | analysis: 15 | name: Scorecards analysis 16 | runs-on: ubuntu-latest 17 | permissions: 18 | # Needed to upload the results to code-scanning dashboard. 19 | security-events: write 20 | actions: read 21 | contents: read 22 | id-token: write 23 | 24 | steps: 25 | - name: "Checkout code" 26 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 27 | with: 28 | persist-credentials: false 29 | 30 | - name: "Run analysis" 31 | uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 32 | with: 33 | results_file: results.sarif 34 | results_format: sarif 35 | # Read-only PAT token. To create it, 36 | # follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation. 37 | repo_token: ${{ secrets.SCORECARD_READ_TOKEN }} 38 | # Publish the results to enable scorecard badges. For more details, see 39 | # https://github.com/ossf/scorecard-action#publishing-results. 40 | # For private repositories, `publish_results` will automatically be set to `false`, 41 | # regardless of the value entered here. 42 | publish_results: true 43 | 44 | # Upload the results as artifacts (optional). 45 | - name: "Upload artifact" 46 | uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 47 | with: 48 | name: SARIF file 49 | path: results.sarif 50 | retention-days: 5 51 | 52 | # Upload the results to GitHub's code scanning dashboard. 53 | - name: "Upload to code-scanning" 54 | uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 55 | with: 56 | sarif_file: results.sarif 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | terraform/.terraform* 18 | *.tfstate 19 | 20 | bin* 21 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | run: 3 | concurrency: 6 4 | deadline: 5m 5 | linters: 6 | disable-all: true 7 | enable: 8 | - asciicheck 9 | - bodyclose 10 | - copyloopvar 11 | #- depguard 12 | - dogsled 13 | - dupl 14 | - err113 15 | - errcheck 16 | - errorlint 17 | - exhaustive 18 | - gci 19 | - gochecknoinits 20 | - gocognit 21 | - goconst 22 | - gocritic 23 | - gocyclo 24 | - godot 25 | - godox 26 | - gofmt 27 | - gofumpt 28 | - goheader 29 | - goimports 30 | # - gomnd 31 | - gomodguard 32 | - goprintffuncname 33 | - gosec 34 | - gosimple 35 | - govet 36 | - ineffassign 37 | - lll 38 | - makezero 39 | - misspell 40 | - nakedret 41 | - nestif 42 | - nolintlint 43 | - paralleltest 44 | - prealloc 45 | - predeclared 46 | - revive 47 | - rowserrcheck 48 | - sqlclosecheck 49 | - staticcheck 50 | - stylecheck 51 | - thelper 52 | - tparallel 53 | - typecheck 54 | - unconvert 55 | - unparam 56 | - unused 57 | - whitespace 58 | # - wrapcheck 59 | # - exhaustivestruct 60 | # - forbidigo 61 | # - funlen 62 | # - gochecknoglobals 63 | # - nlreturn 64 | # - testpackage 65 | # - wsl 66 | linters-settings: 67 | gci: 68 | local-prefixes: github.com/ossf/package-feeds 69 | errcheck: 70 | check-type-assertions: true 71 | check-blank: true 72 | godox: 73 | keywords: 74 | - BUG 75 | - FIXME 76 | - HACK 77 | gocritic: 78 | enabled-checks: 79 | # Diagnostic 80 | - appendAssign 81 | - argOrder 82 | - badCond 83 | - caseOrder 84 | - codegenComment 85 | - commentedOutCode 86 | - deprecatedComment 87 | - dupArg 88 | - dupBranchBody 89 | - dupCase 90 | - dupSubExpr 91 | - exitAfterDefer 92 | - flagDeref 93 | - flagName 94 | - nilValReturn 95 | - offBy1 96 | - sloppyReassign 97 | - weakCond 98 | - octalLiteral 99 | 100 | # Performance 101 | - appendCombine 102 | - equalFold 103 | - hugeParam 104 | - indexAlloc 105 | - rangeExprCopy 106 | - rangeValCopy 107 | 108 | # Style 109 | - assignOp 110 | - boolExprSimplify 111 | - captLocal 112 | - commentFormatting 113 | - commentedOutImport 114 | - defaultCaseOrder 115 | - docStub 116 | - elseif 117 | - emptyFallthrough 118 | - emptyStringTest 119 | - hexLiteral 120 | - ifElseChain 121 | - methodExprCall 122 | - regexpMust 123 | - singleCaseSwitch 124 | - sloppyLen 125 | - stringXbytes 126 | - switchTrue 127 | - typeAssertChain 128 | - typeSwitchVar 129 | - underef 130 | - unlabelStmt 131 | - unlambda 132 | - unslice 133 | - valSwap 134 | - wrapperFunc 135 | - yodaStyleExpr 136 | 137 | # Opinionated 138 | - builtinShadow 139 | - importShadow 140 | - initClause 141 | - nestingReduce 142 | - paramTypeCombine 143 | - ptrToRefParam 144 | - typeUnparen 145 | - unnamedResult 146 | - unnecessaryBlock 147 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | [INSERT CONTACT METHOD]. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Originally modified from the main Cloud Run documentation 2 | 3 | FROM golang:1.22.6-bookworm@sha256:f020456572fc292e9627b3fb435c6de5dfb8020fbcef1fd7b65dd092c0ac56bb AS builder 4 | 5 | # Create and change to the app directory. 6 | WORKDIR /app 7 | ENV CGO_ENABLED=0 8 | # # Retrieve application dependencies. 9 | # # This allows the container build to reuse cached dependencies. 10 | # # Expecting to copy go.mod and if present go.sum. 11 | COPY go.* ./ 12 | RUN go mod download 13 | 14 | # Copy local code to the container image. 15 | COPY . ./ 16 | 17 | # Build the binary. 18 | RUN go build -mod=readonly -v -o server ./cmd/scheduled-feed 19 | 20 | #https://github.com/GoogleContainerTools/distroless/ 21 | FROM gcr.io/distroless/base:nonroot 22 | 23 | # Copy the binary to the production image from the builder stage. 24 | COPY --from=builder /app/server /app/server 25 | 26 | # Run the web service on container startup. 27 | CMD ["/app/server"] 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GO ?= go 2 | BIN := bin 3 | GOOS ?= $(shell uname | tr A-Z a-z) 4 | GOLANGCI_LINT_VERSION = v1.60.3 5 | PROJECT := package-feeds 6 | 7 | .PHONY: help 8 | help: ## Display this help 9 | @awk \ 10 | -v "col=${COLOR}" -v "nocol=${NOCOLOR}" \ 11 | ' \ 12 | BEGIN { \ 13 | FS = ":.*##" ; \ 14 | printf "Available targets:\n"; \ 15 | } \ 16 | /^[a-zA-Z0-9_-]+:.*?##/ { \ 17 | printf " %s%-25s%s %s\n", col, $$1, nocol, $$2 \ 18 | } \ 19 | /^##@/ { \ 20 | printf "\n%s%s%s\n", col, substr($$0, 5), nocol \ 21 | } \ 22 | ' $(MAKEFILE_LIST) 23 | 24 | .PHONY: build 25 | build: 26 | mkdir -p $(BIN)/$(PROJECT) && \ 27 | env CGO_ENABLED=0 GOOS=$(GOOS) go build -o $(BIN)/$(PROJECT) -a ./... 28 | 29 | .PHONY: clean 30 | clean: ## Clean the build directory 31 | rm -rf $(BIN) 32 | 33 | .PHONY: go-mod 34 | go-mod: ## Cleanup and verify go modules 35 | $(GO) mod tidy && $(GO) mod verify 36 | 37 | # Verification targets 38 | .PHONY: verify 39 | verify: verify-go-mod verify-go-lint ## Run all verification targets 40 | 41 | .PHONY: verify-go-mod 42 | verify-go-mod: go-mod ## Verify the go modules 43 | ./hacks/tree-status 44 | 45 | .PHONY: verify-go-lint 46 | verify-go-lint: golangci-lint ## Verify the golang code by linting 47 | $(BIN)/golangci-lint run -c ./.golangci.yml 48 | 49 | golangci-lint: 50 | export \ 51 | VERSION=$(GOLANGCI_LINT_VERSION) \ 52 | URL=https://raw.githubusercontent.com/golangci/golangci-lint \ 53 | BINDIR=$(BIN) && \ 54 | curl -sfL $$URL/$$VERSION/install.sh | sh -s $$VERSION 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Package Feeds 2 | 3 | The binary produced by [cmd/scheduled-feed/main.go](cmd/scheduled-feed/main.go) can be used to monitor various 4 | package repositories for changes and publish data to external services for further processing. 5 | 6 | Additionally, the repo contains a few subprojects to aid in the analysis of these open source packages, in particular to look for malicious software. 7 | 8 | These are: 9 | 10 | [Feeds](./pkg/feeds/) to watch package registries (PyPI, NPM, etc.) for changes to packages 11 | and to make that data available via a single standard interface. 12 | 13 | [Publisher](./pkg/publisher/) provides the functionality to push package details from feeds towards 14 | external services such as GCP Pub/Sub. Package details are formatted inline with a versioned 15 | [json-schema](./package.schema.json). 16 | 17 | This repo used to contain several other projects, which have since been split out into 18 | [github.com/ossf/package-analysis](https://github.com/ossf/package-analysis). 19 | 20 | The goal is for all of these components to work together and provide extensible, community-run 21 | infrastructure to study behavior of open source packages and to look for malicious software. 22 | We also hope that the components can be used independently, to provide package feeds or runtime 23 | behavior data for anyone interested. 24 | 25 | # Configuration 26 | 27 | A YAML configuration file can be provided with the following format: 28 | 29 | ``` 30 | feeds: 31 | - type: pypi 32 | - type: npm 33 | - type: goproxy 34 | - type: rubygems 35 | - type: crates 36 | 37 | publisher: 38 | type: 'gcp_pubsub' 39 | config: 40 | url: "gcppubsub://foobar.com" 41 | 42 | http_port: 8080 43 | 44 | poll_rate: 5m 45 | 46 | timer: false 47 | ``` 48 | 49 | `poll_rate` string formatted for [duration parser](https://golang.org/pkg/time/#ParseDuration).This is used as an initial value to generate a cutoff point for feed events relative to the given time at execution, with subsequent events using the previous time at execution as the cutoff point. 50 | `timer` will configure interal polling of the `feeds` at the given `poll_rate` period, individual feeds configured with a `poll_rate` will poll on an interval regardless of these options. To specify this configuration file, define its path in your environment under the `PACKAGE_FEEDS_CONFIG_PATH` variable. 51 | 52 | An event handler can be configured through the `events` field, this is documented in the [events README](./pkg/events/README.md). 53 | 54 | ## FeedOptions 55 | 56 | Feeds can be configured with additional options, not all feeds will support these features. Check [feeds/README.md](./pkg/feeds/README.md) for more information on feed specific configurations. 57 | 58 | Below is an example of such options with pypi being configured to poll a specific set of packages 59 | 60 | ``` 61 | feeds: 62 | - type: pypi 63 | options: 64 | packages: 65 | - fooPackage 66 | - barPackage 67 | ``` 68 | 69 | ## Legacy Configuration 70 | 71 | Legacy configuration methods are still supported. By default, without a configuration file all feeds will be enabled. The environment variable `OSSMALWARE_TOPIC_URL` can be used to select the GCP pubsub publisher and `PORT` will configure the port for the HTTP server. 72 | The default `poll_rate` is 5 minutes, it is assumed that an external service is dispatching requests to the configured HTTP server at this frequency. 73 | 74 | # Running Locally 75 | 76 | To start an instance of `package-feeds` running on a local machine, run the 77 | following commands: 78 | 79 | ```shell 80 | $ docker build . -t local-package-feeds ## Build the container 81 | $ mkdir /tmp/feedconfig/ ## Create the feeds.yml config 82 | $ cat << EOF >> /tmp/feedconfig/feeds.yml 83 | publisher: 84 | type: stdout 85 | poll_rate: 5m 86 | http_port: 8080 87 | EOF 88 | $ docker run -v /tmp/feedconfig:/config/ \ 89 | -e "PACKAGE_FEEDS_CONFIG_PATH=/config/feeds.yml" \ 90 | -p 8080:8080 --rm -ti local-package-feeds ## Start the container 91 | ``` 92 | 93 | # Contributing 94 | 95 | If you want to get involved or have ideas you'd like to chat about, we discuss this project in the [OSSF Securing Critical Projects Working Group](https://github.com/ossf/wg-securing-critical-projects) meetings. 96 | 97 | See the [Community Calendar](https://calendar.google.com/calendar?cid=czYzdm9lZmhwNWk5cGZsdGI1cTY3bmdwZXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) for the schedule and meeting invitations. 98 | 99 | PRs are linted using `golangci-lint` with the following [config file](./.golangci.yml). If you wish to run this locally, see the [install docs](https://golangci-lint.run/usage/install/#local-installation). 100 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting Security Issues 2 | 3 | To report a security issue, please email 4 | [oss-security@googlegroups.com](mailto:oss-security@googlegroups.com) 5 | with a description of the issue, the steps you took to create the issue, 6 | affected versions, and, if known, mitigations for the issue. 7 | 8 | Our vulnerability management team will respond within 3 working days of your 9 | email. If the issue is confirmed as a vulnerability, we will open a 10 | Security Advisory and acknowledge your contributions as part of it. This project 11 | follows a 90 day disclosure timeline. 12 | -------------------------------------------------------------------------------- /cmd/scheduled-feed/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "os" 7 | "strings" 8 | "time" 9 | 10 | log "github.com/sirupsen/logrus" 11 | 12 | "github.com/ossf/package-feeds/pkg/config" 13 | "github.com/ossf/package-feeds/pkg/scheduler" 14 | ) 15 | 16 | func main() { 17 | // Increase idle conns per host to increase the reuse of existing 18 | // connections between requests. This only applies to HTTP1. HTTP2 requests 19 | // are multiplexed over a single TCP connection. HTTP2 is supported by 20 | // all the current feeds. 21 | http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 8 22 | 23 | configPath, useConfig := os.LookupEnv("PACKAGE_FEEDS_CONFIG_PATH") 24 | var err error 25 | 26 | var appConfig *config.ScheduledFeedConfig 27 | if useConfig { 28 | appConfig, err = config.FromFile(configPath) 29 | log.Infof("Using config from file: %v", configPath) 30 | } else { 31 | appConfig = config.Default() 32 | log.Info("No config specified, using default configuration") 33 | } 34 | if err != nil { 35 | log.Fatal(err) 36 | } 37 | 38 | pub, err := appConfig.PubConfig.ToPublisher(context.TODO()) 39 | if err != nil { 40 | log.Fatalf("Failed to initialize publisher from config: %v", err) 41 | } 42 | log.Infof("Using %q publisher", pub.Name()) 43 | 44 | scheduledFeeds, err := appConfig.GetScheduledFeeds() 45 | feedNames := []string{} 46 | for k := range scheduledFeeds { 47 | feedNames = append(feedNames, k) 48 | } 49 | log.Infof("Watching feeds: %v", strings.Join(feedNames, ", ")) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | 54 | pollRate, err := time.ParseDuration(appConfig.PollRate) 55 | if err != nil { 56 | log.Fatalf("Failed to parse poll_rate to duration: %v", err) 57 | } 58 | sched := scheduler.New(scheduledFeeds, pub, appConfig.HTTPPort) 59 | err = sched.Run(pollRate, appConfig.Timer) 60 | if err != nil { 61 | log.Fatal(err) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | services: 3 | package-feeds: 4 | build: . 5 | image: local-package-feeds 6 | ports: 7 | - "8080:8080" 8 | volumes: 9 | - "${FEED_CONFIG_PATH:-/tmp/feedconfig}:/config/" 10 | environment: 11 | - PACKAGE_FEEDS_CONFIG_PATH=/config/feeds.yml 12 | tty: true 13 | stdin_open: true 14 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ossf/package-feeds 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/hashicorp/golang-lru/v2 v2.0.7 7 | github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b 8 | github.com/mitchellh/mapstructure v1.5.0 9 | github.com/robfig/cron/v3 v3.0.1 10 | github.com/sirupsen/logrus v1.9.3 11 | github.com/xeipuuv/gojsonschema v1.2.0 12 | gocloud.dev v0.37.0 13 | gocloud.dev/pubsub/kafkapubsub v0.37.0 14 | gopkg.in/yaml.v3 v3.0.1 15 | ) 16 | 17 | require ( 18 | cloud.google.com/go/compute v1.25.0 // indirect 19 | cloud.google.com/go/compute/metadata v0.2.3 // indirect 20 | cloud.google.com/go/iam v1.1.6 // indirect 21 | cloud.google.com/go/pubsub v1.37.0 // indirect 22 | github.com/IBM/sarama v1.43.0 // indirect 23 | github.com/davecgh/go-spew v1.1.1 // indirect 24 | github.com/eapache/go-resiliency v1.6.0 // indirect 25 | github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect 26 | github.com/eapache/queue v1.1.0 // indirect 27 | github.com/felixge/httpsnoop v1.0.4 // indirect 28 | github.com/go-logr/logr v1.4.1 // indirect 29 | github.com/go-logr/stdr v1.2.2 // indirect 30 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 31 | github.com/golang/protobuf v1.5.4 // indirect 32 | github.com/golang/snappy v0.0.4 // indirect 33 | github.com/google/s2a-go v0.1.7 // indirect 34 | github.com/google/wire v0.6.0 // indirect 35 | github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect 36 | github.com/googleapis/gax-go/v2 v2.12.2 // indirect 37 | github.com/hashicorp/errwrap v1.1.0 // indirect 38 | github.com/hashicorp/go-multierror v1.1.1 // indirect 39 | github.com/hashicorp/go-uuid v1.0.3 // indirect 40 | github.com/jcmturner/aescts/v2 v2.0.0 // indirect 41 | github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect 42 | github.com/jcmturner/gofork v1.7.6 // indirect 43 | github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect 44 | github.com/jcmturner/rpc/v2 v2.0.3 // indirect 45 | github.com/klauspost/compress v1.17.7 // indirect 46 | github.com/kr/text v0.2.0 // indirect 47 | github.com/pierrec/lz4/v4 v4.1.21 // indirect 48 | github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect 49 | github.com/rogpeppe/go-internal v1.12.0 // indirect 50 | github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect 51 | github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect 52 | go.opencensus.io v0.24.0 // indirect 53 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect 54 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect 55 | go.opentelemetry.io/otel v1.24.0 // indirect 56 | go.opentelemetry.io/otel/metric v1.24.0 // indirect 57 | go.opentelemetry.io/otel/trace v1.24.0 // indirect 58 | golang.org/x/crypto v0.21.0 // indirect 59 | golang.org/x/net v0.23.0 // indirect 60 | golang.org/x/oauth2 v0.18.0 // indirect 61 | golang.org/x/sync v0.6.0 // indirect 62 | golang.org/x/sys v0.18.0 // indirect 63 | golang.org/x/text v0.14.0 // indirect 64 | golang.org/x/time v0.5.0 // indirect 65 | golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect 66 | google.golang.org/api v0.169.0 // indirect 67 | google.golang.org/appengine v1.6.8 // indirect 68 | google.golang.org/genproto v0.0.0-20240311173647-c811ad7063a7 // indirect 69 | google.golang.org/genproto/googleapis/api v0.0.0-20240311173647-c811ad7063a7 // indirect 70 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240311173647-c811ad7063a7 // indirect 71 | google.golang.org/grpc v1.62.1 // indirect 72 | google.golang.org/protobuf v1.33.0 // indirect 73 | ) 74 | -------------------------------------------------------------------------------- /hacks/tree-status: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | STATUS=$(git status --porcelain) 5 | if [[ -z $STATUS ]]; then 6 | echo tree is clean 7 | else 8 | echo tree is dirty, please commit all changes 9 | echo "$STATUS" 10 | git diff 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /integration/config/feeds.yml: -------------------------------------------------------------------------------- 1 | poll_rate: 30m 2 | publisher: 3 | type: kafka 4 | config: 5 | brokers: ["kafka:9092"] 6 | topic: "package-feeds" -------------------------------------------------------------------------------- /integration/docker-compose-integration.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper@sha256:87314e87320abf190f0407bf1689f4827661fbb4d671a41cba62673b45b66bfa 5 | ports: 6 | - "2181:2181" 7 | environment: 8 | ZOOKEEPER_CLIENT_PORT: 2181 9 | ZOOKEEPER_TICK_TIME: 2000 10 | ZOOKEEPER_SYNC_LIMIT: 2 11 | 12 | kafka: 13 | image: confluentinc/cp-kafka@sha256:c6320f9a0cbf57075e102546de110dcebdf374955f12388d58c23a54b8a47d31 14 | ports: 15 | - 9094:9094 16 | depends_on: 17 | - zookeeper 18 | environment: 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_LISTENERS: INTERNAL://kafka:9092,OUTSIDE://kafka:9094 21 | KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,OUTSIDE://localhost:9094 22 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,OUTSIDE:PLAINTEXT 23 | KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL 24 | KAFKA_offsets_topic_replication_factor: 1 25 | 26 | feeds: 27 | restart: "on-failure" 28 | build: 29 | context: .. 30 | ports: 31 | - 8080:8080 32 | depends_on: 33 | - kafka 34 | environment: 35 | PACKAGE_FEEDS_CONFIG_PATH: /config/feeds.yml 36 | volumes: 37 | - "./config/:/config/" 38 | -------------------------------------------------------------------------------- /integration/test_consumer/check_kafka_output.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from confluent_kafka import Consumer 3 | 4 | import time 5 | import requests 6 | 7 | def trigger_feeds(): 8 | attempts = 5 9 | print("Requesting feeds poll data from registries...") 10 | while True: 11 | try: 12 | r = requests.get('http://127.0.0.1:8080') 13 | break 14 | except: 15 | if attempts == 0: 16 | raise 17 | print("Warning: Failed to request data from package-feeds, retrying after 5s") 18 | time.sleep(5) 19 | 20 | attempts -= 1 21 | 22 | print(r.text) 23 | 24 | 25 | 26 | def main(): 27 | msgs = [] 28 | 29 | trigger_feeds() 30 | 31 | c = Consumer({ 32 | 'bootstrap.servers': '127.0.0.1:9094', 33 | 'group.id': 'consumer', 34 | 'auto.offset.reset': 'earliest' 35 | }) 36 | 37 | c.subscribe(['package-feeds']) 38 | 39 | last_poll_success = datetime.now() 40 | while True: 41 | msg = c.poll(2.0) 42 | 43 | if msg is None: 44 | delta = datetime.now() - last_poll_success 45 | # Timeout to avoid hanging on poll() loop 46 | if delta.total_seconds() > 10: 47 | break 48 | continue 49 | if msg.error(): 50 | print("Consumer error: {}".format(msg.error())) 51 | continue 52 | msgs.append(msg) 53 | last_poll_success = datetime.now() 54 | print('Received message: {}'.format(msg.value().decode('utf-8'))) 55 | 56 | print(f"\n\n------------------------------------------------------------\n\nReceived a total of {len(msgs)} messages") 57 | c.close() 58 | assert len(msgs) > 0, "Failed to assert that atleast a single package was received" 59 | 60 | if __name__ == '__main__': 61 | main() -------------------------------------------------------------------------------- /integration/test_consumer/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.7.4 2 | chardet==4.0.0 3 | confluent-kafka==2.5.0 4 | idna==3.7 5 | requests==2.32.2 6 | urllib3==1.26.19 7 | -------------------------------------------------------------------------------- /package.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://github.com/ossf/package-feeds/blob/main/package.schema.json", 4 | "title": "Package Schema Version 1.0", 5 | "description": "The package representation as outputted by a ScheduledFeed", 6 | "type": "object", 7 | "properties": { 8 | "name": { 9 | "type": "string", 10 | "description": "The name of the package", 11 | "examples": ["foopackage", "Foo.Package", "@foouser/barpackage", "github.com/foo-user/bar-package"] 12 | }, 13 | "version": { 14 | "type": "string", 15 | "description": "The package version, formatted respective to the given package type", 16 | "examples": ["1.0.0", "v1.0", "v0.1.1-197001010-ae2f65d", "foo-main"] 17 | }, 18 | "created_date": { 19 | "type": "string", 20 | "description": "RFC 3339 timestamp representing the package creation date", 21 | "format": "date-time", 22 | "examples": ["1970-01-01T00:00:00.00000Z"] 23 | }, 24 | "type": { 25 | "type": "string", 26 | "description": "The type of package, this being the `FeedName` of the given package feed", 27 | "examples": ["pypi", "npm", "crates", "goproxy"] 28 | }, 29 | "artifact_id": { 30 | "type": "string", 31 | "description": "Identifies a particular downloadable artifact for this package version. No particular format is guaranteed; e.g. names may indicate platform-specific variants or include commit hashes.", 32 | "examples": ["factor_reader-1.0.3.tar.gz", "micrograd2023-0.0.3-py3-none-any.whl", "odoo14_addon_l10n_es_aeat-14.0.3.0.2.dev1-py3-none-any.whl"] 33 | }, 34 | "schema_ver": { 35 | "type": "string", 36 | "pattern": "^[1-9][0-9]*\\.[0-9]+", 37 | "description": "The schema version, increments in the minor reflect additive changes", 38 | "examples": ["1.0", "1.5", "2.0", "10.0"] 39 | } 40 | }, 41 | "required": [ "name", "version", "created_date", "type", "schema_ver" ], 42 | "additionalProperties": false 43 | } 44 | -------------------------------------------------------------------------------- /pkg/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config_test 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/ossf/package-feeds/pkg/config" 8 | "github.com/ossf/package-feeds/pkg/events" 9 | "github.com/ossf/package-feeds/pkg/feeds" 10 | "github.com/ossf/package-feeds/pkg/feeds/pypi" 11 | "github.com/ossf/package-feeds/pkg/publisher/stdout" 12 | "github.com/ossf/package-feeds/pkg/scheduler" 13 | ) 14 | 15 | const ( 16 | TestConfigStr = ` 17 | feeds: 18 | - type: rubygems 19 | - type: goproxy 20 | - type: npm 21 | 22 | publisher: 23 | type: "gcp" 24 | config: 25 | endpoint: "https://foobaz.com" 26 | 27 | http_port: 8080 28 | poll_rate: 5m 29 | timer: true 30 | ` 31 | TestConfigStrUnknownFeedType = ` 32 | feeds: 33 | - type: foo 34 | ` 35 | TestConfigStrUnknownField = ` 36 | foo: 37 | - bar 38 | - baz 39 | ` 40 | TestEventsConfig = ` 41 | events: 42 | sink: stdout 43 | filter: 44 | enabled_event_types: 45 | - foo 46 | disabled_event_types: 47 | - bar 48 | enabled_components: 49 | - baz 50 | ` 51 | ) 52 | 53 | func TestDefault(t *testing.T) { 54 | t.Parallel() 55 | 56 | c := config.Default() 57 | scheduledFeeds, err := c.GetScheduledFeeds() 58 | if err != nil { 59 | t.Fatalf("failed to initialize feeds: %v", err) 60 | } 61 | pub, err := c.PubConfig.ToPublisher(context.TODO()) 62 | if err != nil { 63 | t.Fatalf("Failed to initialise publisher from config") 64 | } 65 | _ = scheduler.New(scheduledFeeds, pub, c.HTTPPort) 66 | } 67 | 68 | func TestGetScheduledFeeds(t *testing.T) { 69 | t.Parallel() 70 | 71 | c, err := config.NewConfigFromBytes([]byte(TestConfigStr)) 72 | if err != nil { 73 | t.Fatal(err) 74 | } 75 | if len(c.Feeds) != 3 { 76 | t.Fatalf("Feeds is expected to be 3 but was `%v`", len(c.Feeds)) 77 | } 78 | scheduledFeeds, err := c.GetScheduledFeeds() 79 | if err != nil { 80 | t.Fatal(err) 81 | } 82 | for _, feed := range c.Feeds { 83 | if _, ok := scheduledFeeds[feed.Type]; !ok { 84 | t.Errorf("expected `%v` feed was not found in scheduled feeds after GetScheduledFeeds()", feed.Type) 85 | } 86 | } 87 | } 88 | 89 | func TestLoadFeedConfigUnknownFeedType(t *testing.T) { 90 | t.Parallel() 91 | 92 | c, err := config.NewConfigFromBytes([]byte(TestConfigStrUnknownFeedType)) 93 | if err != nil { 94 | t.Fatalf("failed to parse config: %v", err) 95 | } 96 | _, err = c.GetScheduledFeeds() 97 | if err == nil { 98 | t.Error("unknown feed type was successfully parsed when it should've failed") 99 | } 100 | } 101 | 102 | func TestPublisherConfigToPublisherStdout(t *testing.T) { 103 | t.Parallel() 104 | 105 | c := config.PublisherConfig{ 106 | Type: stdout.PublisherType, 107 | } 108 | pub, err := c.ToPublisher(context.TODO()) 109 | if err != nil { 110 | t.Fatal("failed to create stdout publisher from config") 111 | } 112 | if pub.Name() != stdout.PublisherType { 113 | t.Errorf("stdout sub config produced a publisher with an unexpected name: '%v' != '%v'", 114 | pub.Name(), stdout.PublisherType) 115 | } 116 | } 117 | 118 | func TestPublisherConfigToFeed(t *testing.T) { 119 | t.Parallel() 120 | 121 | packages := []string{ 122 | "foo", 123 | "bar", 124 | "baz", 125 | } 126 | 127 | c := config.FeedConfig{ 128 | Type: pypi.FeedName, 129 | Options: feeds.FeedOptions{ 130 | Packages: &packages, 131 | }, 132 | } 133 | feed, err := c.ToFeed(events.NewNullHandler()) 134 | if err != nil { 135 | t.Fatalf("failed to create pypi feed from configuration: %v", err) 136 | } 137 | 138 | pypiFeed, ok := feed.(*pypi.Feed) 139 | if !ok { 140 | t.Fatal("failed to cast feed as pypi feed") 141 | } 142 | 143 | feedPackages := pypiFeed.GetPackageList() 144 | if feedPackages == nil { 145 | t.Fatalf("failed to initialize pypi feed package list to poll") 146 | } 147 | if feedPackages != nil && len(*feedPackages) != len(packages) { 148 | t.Errorf("pypi package list does not match config provided package list") 149 | } else { 150 | for i := 0; i < len(packages); i++ { 151 | if (*feedPackages)[i] != packages[i] { 152 | t.Errorf("pypi package '%v' does not match configured package '%v'", (*feedPackages)[i], packages[i]) 153 | } 154 | } 155 | } 156 | } 157 | 158 | func TestStrictConfigDecoding(t *testing.T) { 159 | t.Parallel() 160 | 161 | _, err := config.NewConfigFromBytes([]byte(TestConfigStrUnknownField)) 162 | if err == nil { 163 | t.Fatal("config successfully parsed despite invalid top level configuration field") 164 | } 165 | } 166 | 167 | func TestEventHandlerConfiguration(t *testing.T) { 168 | t.Parallel() 169 | 170 | c, err := config.NewConfigFromBytes([]byte(TestEventsConfig)) 171 | if err != nil { 172 | t.Fatalf("failed to load config from bytes: %v", err) 173 | } 174 | 175 | handler, err := c.GetEventHandler() 176 | if err != nil || handler == nil { 177 | t.Fatalf("failed to initialize event handler from config") 178 | } 179 | 180 | _, ok := handler.GetSink().(*events.LoggingEventSink) 181 | if !ok { 182 | t.Fatalf("sink is not configured as stdout as config file expects") 183 | } 184 | 185 | filter := handler.GetFilter() 186 | 187 | fooEvent := events.MockEvent{ 188 | Type: "foo", 189 | Component: "qux", 190 | } 191 | barEvent := events.MockEvent{ 192 | Type: "bar", 193 | Component: "baz", 194 | } 195 | bazEvent := events.MockEvent{ 196 | Type: "qux", 197 | Component: "baz", 198 | } 199 | if !filter.ShouldDispatch(fooEvent) { 200 | t.Errorf("configured filter incorrectly rejects type `foo` from being dispatched") 201 | } 202 | if filter.ShouldDispatch(barEvent) { 203 | t.Errorf("configured filter incorrectly allows type `bar` to be dispatched") 204 | } 205 | if !filter.ShouldDispatch(bazEvent) { 206 | t.Errorf("configured filter incorrectly rejects component `baz` from being dispatched") 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /pkg/config/scheduledfeed.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "os" 10 | "strconv" 11 | 12 | "github.com/mitchellh/mapstructure" 13 | log "github.com/sirupsen/logrus" 14 | "gopkg.in/yaml.v3" 15 | 16 | "github.com/ossf/package-feeds/pkg/events" 17 | "github.com/ossf/package-feeds/pkg/feeds" 18 | "github.com/ossf/package-feeds/pkg/feeds/crates" 19 | "github.com/ossf/package-feeds/pkg/feeds/goproxy" 20 | "github.com/ossf/package-feeds/pkg/feeds/maven" 21 | "github.com/ossf/package-feeds/pkg/feeds/npm" 22 | "github.com/ossf/package-feeds/pkg/feeds/nuget" 23 | "github.com/ossf/package-feeds/pkg/feeds/packagist" 24 | "github.com/ossf/package-feeds/pkg/feeds/pypi" 25 | "github.com/ossf/package-feeds/pkg/feeds/rubygems" 26 | "github.com/ossf/package-feeds/pkg/publisher" 27 | "github.com/ossf/package-feeds/pkg/publisher/gcppubsub" 28 | "github.com/ossf/package-feeds/pkg/publisher/httpclientpubsub" 29 | "github.com/ossf/package-feeds/pkg/publisher/kafkapubsub" 30 | "github.com/ossf/package-feeds/pkg/publisher/stdout" 31 | ) 32 | 33 | var ( 34 | errUnknownFeed = errors.New("unknown feed type") 35 | errUnknownPub = errors.New("unknown publisher type") 36 | errUnknownSinkType = errors.New("unknown sink type") 37 | 38 | // feed-specific poll rate is left unspecified, so it can still be 39 | // configured by the global 'poll_rate' option in the ScheduledFeedConfig YAML. 40 | defaultFeedOptions = feeds.FeedOptions{Packages: nil, PollRate: ""} 41 | npmDefaultFeedOptions = feeds.FeedOptions{Packages: nil, PollRate: "2m"} 42 | ) 43 | 44 | // Loads a ScheduledFeedConfig struct from a yaml config file. 45 | func FromFile(configPath string) (*ScheduledFeedConfig, error) { 46 | data, err := os.ReadFile(configPath) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | return NewConfigFromBytes(data) 52 | } 53 | 54 | // Loads a ScheduledFeedConfig struct from a yaml bytes. 55 | func NewConfigFromBytes(yamlBytes []byte) (*ScheduledFeedConfig, error) { 56 | config := Default() 57 | 58 | err := unmarshalStrict(yamlBytes, config) 59 | if err != nil { 60 | return nil, err 61 | } 62 | config.applyEnvVars() 63 | 64 | return config, nil 65 | } 66 | 67 | // Applies environment variables to the configuration. 68 | func (sc *ScheduledFeedConfig) applyEnvVars() { 69 | // Support legacy env var definition for gcp pub sub. 70 | pubURL := os.Getenv("OSSMALWARE_TOPIC_URL") 71 | if pubURL != "" { 72 | sc.PubConfig = PublisherConfig{ 73 | Type: gcppubsub.PublisherType, 74 | Config: map[string]interface{}{ 75 | "url": pubURL, 76 | }, 77 | } 78 | } 79 | 80 | portStr, portProvided := os.LookupEnv("PORT") 81 | port, err := strconv.Atoi(portStr) 82 | 83 | if portProvided && err == nil { 84 | sc.HTTPPort = port 85 | } 86 | } 87 | 88 | func AddTo(ls *[]int, value int) { 89 | *ls = append(*ls, value) 90 | } 91 | 92 | // Constructs a map of ScheduledFeeds to enable based on the Feeds 93 | // provided from configuration, indexed by the feed type. 94 | func (sc *ScheduledFeedConfig) GetScheduledFeeds() (map[string]feeds.ScheduledFeed, error) { 95 | scheduledFeeds := map[string]feeds.ScheduledFeed{} 96 | eventHandler, err := sc.GetEventHandler() 97 | if err != nil { 98 | return nil, err 99 | } 100 | 101 | for _, entry := range sc.Feeds { 102 | feed, err := entry.ToFeed(eventHandler) 103 | if err != nil { 104 | return nil, err 105 | } 106 | scheduledFeeds[entry.Type] = feed 107 | } 108 | 109 | return scheduledFeeds, nil 110 | } 111 | 112 | func (sc *ScheduledFeedConfig) GetEventHandler() (*events.Handler, error) { 113 | var err error 114 | if sc.EventsConfig == nil { 115 | sc.eventHandler = events.NewNullHandler() 116 | } else if sc.eventHandler == nil { 117 | sc.eventHandler, err = sc.EventsConfig.ToEventHandler() 118 | if err != nil { 119 | return nil, err 120 | } 121 | } 122 | return sc.eventHandler, nil 123 | } 124 | 125 | func (ec *EventsConfig) ToEventHandler() (*events.Handler, error) { 126 | var sink events.Sink 127 | switch ec.Sink { 128 | case events.LoggingEventSinkType: 129 | sink = events.NewLoggingEventSink(log.New()) 130 | default: 131 | return nil, fmt.Errorf("%w : %v", errUnknownSinkType, ec.Sink) 132 | } 133 | return events.NewHandler(sink, ec.EventFilter), nil 134 | } 135 | 136 | // Produces a Publisher object from the provided PublisherConfig 137 | // The PublisherConfig.Type value is evaluated and the appropriate Publisher is 138 | // constructed from the Config field. If the type is not a recognised Publisher type, 139 | // an error is returned. 140 | func (pc PublisherConfig) ToPublisher(ctx context.Context) (publisher.Publisher, error) { 141 | var err error 142 | switch pc.Type { 143 | case gcppubsub.PublisherType: 144 | var gcpConfig gcppubsub.Config 145 | err = strictDecode(pc.Config, &gcpConfig) 146 | if err != nil { 147 | return nil, fmt.Errorf("failed to decode gcppubsub config: %w", err) 148 | } 149 | return gcppubsub.FromConfig(ctx, gcpConfig) 150 | case kafkapubsub.PublisherType: 151 | var kafkaConfig kafkapubsub.Config 152 | err = strictDecode(pc.Config, &kafkaConfig) 153 | if err != nil { 154 | return nil, fmt.Errorf("failed to decode kafkapubsub config: %w", err) 155 | } 156 | return kafkapubsub.FromConfig(ctx, kafkaConfig) 157 | case httpclientpubsub.PublisherType: 158 | var httpClientConfig httpclientpubsub.Config 159 | err = strictDecode(pc.Config, &httpClientConfig) 160 | if err != nil { 161 | return nil, fmt.Errorf("failed to decode httpclient config: %w", err) 162 | } 163 | return httpclientpubsub.FromConfig(ctx, httpClientConfig) 164 | 165 | case stdout.PublisherType: 166 | return stdout.New(), nil 167 | default: 168 | return nil, fmt.Errorf("%w : %v", errUnknownPub, pc.Type) 169 | } 170 | } 171 | 172 | // Constructs the appropriate feed for the given type, providing the 173 | // options to the feed. 174 | func (fc FeedConfig) ToFeed(eventHandler *events.Handler) (feeds.ScheduledFeed, error) { 175 | switch fc.Type { 176 | case crates.FeedName: 177 | return crates.New(fc.Options, eventHandler) 178 | case goproxy.FeedName: 179 | return goproxy.New(fc.Options) 180 | case npm.FeedName: 181 | return npm.New(fc.Options, eventHandler) 182 | case nuget.FeedName: 183 | return nuget.New(fc.Options) 184 | case maven.FeedName: 185 | return maven.New(fc.Options) 186 | case pypi.FeedName: 187 | return pypi.New(fc.Options, eventHandler) 188 | case packagist.FeedName: 189 | return packagist.New(fc.Options) 190 | case rubygems.FeedName: 191 | return rubygems.New(fc.Options, eventHandler) 192 | default: 193 | return nil, fmt.Errorf("%w : %v", errUnknownFeed, fc.Type) 194 | } 195 | } 196 | 197 | // Decode an input using mapstruct decoder with strictness enabled, errors will be returned in 198 | // the case of unused fields. 199 | func strictDecode(input, out interface{}) error { 200 | strictDecoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ 201 | ErrorUnused: true, 202 | Result: out, 203 | }) 204 | if err != nil { 205 | return err 206 | } 207 | return strictDecoder.Decode(input) 208 | } 209 | 210 | func Default() *ScheduledFeedConfig { 211 | config := &ScheduledFeedConfig{ 212 | Feeds: []FeedConfig{ 213 | { 214 | Type: crates.FeedName, 215 | Options: defaultFeedOptions, 216 | }, 217 | { 218 | Type: goproxy.FeedName, 219 | Options: defaultFeedOptions, 220 | }, 221 | { 222 | Type: npm.FeedName, 223 | Options: npmDefaultFeedOptions, 224 | }, 225 | { 226 | Type: nuget.FeedName, 227 | Options: defaultFeedOptions, 228 | }, 229 | { 230 | Type: maven.FeedName, 231 | Options: defaultFeedOptions, 232 | }, 233 | { 234 | Type: packagist.FeedName, 235 | Options: defaultFeedOptions, 236 | }, 237 | { 238 | Type: pypi.FeedName, 239 | Options: defaultFeedOptions, 240 | }, 241 | { 242 | Type: rubygems.FeedName, 243 | Options: defaultFeedOptions, 244 | }, 245 | }, 246 | PubConfig: PublisherConfig{ 247 | Type: stdout.PublisherType, 248 | }, 249 | HTTPPort: 8080, 250 | PollRate: "5m", 251 | Timer: false, 252 | } 253 | config.applyEnvVars() 254 | return config 255 | } 256 | 257 | // Unmarshals configuration data from bytes into the provided interface, strictness is 258 | // enabled which returns an error in the case that an unknown field is provided. 259 | func unmarshalStrict(data []byte, out interface{}) error { 260 | dec := yaml.NewDecoder(bytes.NewReader(data)) 261 | dec.KnownFields(true) 262 | if err := dec.Decode(out); err != nil && !errors.Is(err, io.EOF) { 263 | return err 264 | } 265 | return nil 266 | } 267 | -------------------------------------------------------------------------------- /pkg/config/structs.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/ossf/package-feeds/pkg/events" 5 | "github.com/ossf/package-feeds/pkg/feeds" 6 | ) 7 | 8 | type ScheduledFeedConfig struct { 9 | // Configures the publisher for pushing packages after polling. 10 | PubConfig PublisherConfig `yaml:"publisher"` 11 | 12 | // Configures the feeds to be used for polling from package repositories. 13 | Feeds []FeedConfig `yaml:"feeds"` 14 | 15 | HTTPPort int `yaml:"http_port,omitempty"` 16 | PollRate string `yaml:"poll_rate"` 17 | Timer bool `yaml:"timer"` 18 | 19 | // Configures the EventHandler instance to be used throughout the package-feeds application. 20 | EventsConfig *EventsConfig `yaml:"events"` 21 | 22 | eventHandler *events.Handler 23 | } 24 | 25 | type PublisherConfig struct { 26 | Type string `mapstructure:"type"` 27 | Config interface{} `mapstructure:"config"` 28 | } 29 | 30 | type FeedConfig struct { 31 | Type string `mapstructure:"type"` 32 | Options feeds.FeedOptions `mapstructure:"options"` 33 | } 34 | 35 | type EventsConfig struct { 36 | Sink string `yaml:"sink"` 37 | EventFilter events.Filter `yaml:"filter"` 38 | } 39 | -------------------------------------------------------------------------------- /pkg/events/README.md: -------------------------------------------------------------------------------- 1 | # Event Handling 2 | 3 | package-feeds supports publishing specific application 'events' to be processed. 4 | 5 | ## Configuration 6 | 7 | ``` 8 | events: 9 | sink: "stdout" 10 | filter: 11 | enabled_event_types: ["LOSSY_FEED"] 12 | disabled_event_types: [] 13 | enabled_components: ["Feeds"] 14 | ``` 15 | 16 | ## Events 17 | 18 | **N.B** Currently only events for potential loss during package polling are available. 19 | 20 | Types: 21 | - "LOSSY_FEED" - Potential loss was detected in a feed 22 | 23 | Components: 24 | - "Feeds" - Events which occur within feed logic 25 | 26 | Sinks: 27 | - "stdout" - Logs events to stdout 28 | -------------------------------------------------------------------------------- /pkg/events/handler.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | const ( 4 | // Event Types. 5 | LossyFeedEventType = "LOSSY_FEED" 6 | 7 | // Components. 8 | FeedsComponentType = "Feeds" 9 | ) 10 | 11 | type Sink interface { 12 | AddEvent(e Event) error 13 | } 14 | 15 | type Event interface { 16 | GetComponent() string 17 | GetType() string 18 | GetMessage() string 19 | } 20 | 21 | type Filter struct { 22 | EnabledEventTypes []string `yaml:"enabled_event_types"` 23 | DisabledEventTypes []string `yaml:"disabled_event_types"` 24 | 25 | EnabledComponents []string `yaml:"enabled_components"` 26 | } 27 | 28 | type Handler struct { 29 | eventSink Sink 30 | eventFilter Filter 31 | } 32 | 33 | func NewHandler(sink Sink, filter Filter) *Handler { 34 | return &Handler{ 35 | eventSink: sink, 36 | eventFilter: filter, 37 | } 38 | } 39 | 40 | func NewNullHandler() *Handler { 41 | return &Handler{} 42 | } 43 | 44 | // Creates a filter for use with an event handler, nil can be provided for non values. 45 | func NewFilter(enabledEventTypes, disabledEventTypes, enabledComponents []string) *Filter { 46 | if enabledEventTypes == nil { 47 | enabledEventTypes = []string{} 48 | } 49 | if disabledEventTypes == nil { 50 | disabledEventTypes = []string{} 51 | } 52 | if enabledComponents == nil { 53 | enabledComponents = []string{} 54 | } 55 | return &Filter{ 56 | EnabledEventTypes: enabledEventTypes, 57 | DisabledEventTypes: disabledEventTypes, 58 | EnabledComponents: enabledComponents, 59 | } 60 | } 61 | 62 | // Dispatches an event to the configured sink if it passes the configured filter. 63 | func (h *Handler) DispatchEvent(e Event) error { 64 | if h.eventSink == nil { 65 | return nil 66 | } 67 | filter := h.eventFilter 68 | 69 | if filter.ShouldDispatch(e) { 70 | return h.eventSink.AddEvent(e) 71 | } 72 | return nil 73 | } 74 | 75 | func (h *Handler) GetSink() Sink { 76 | return h.eventSink 77 | } 78 | 79 | func (h *Handler) GetFilter() Filter { 80 | return h.eventFilter 81 | } 82 | 83 | // ShouldDispatch checks whether an event should be dispatched under the 84 | // configured filter options. 85 | // Options are applied as follows: 86 | // - disabled event types are always disabled. 87 | // - enabled event types are enabled 88 | // - enabled components are enabled except for disabled event types. 89 | func (f Filter) ShouldDispatch(e Event) bool { 90 | dispatch := false 91 | eComponent := e.GetComponent() 92 | eType := e.GetType() 93 | 94 | // Enable components. 95 | if stringExistsInSlice(eComponent, f.EnabledComponents) { 96 | dispatch = true 97 | } 98 | // Handle specific event types. 99 | if stringExistsInSlice(eType, f.EnabledEventTypes) { 100 | dispatch = true 101 | } else if stringExistsInSlice(eType, f.DisabledEventTypes) { 102 | dispatch = false 103 | } 104 | return dispatch 105 | } 106 | 107 | // Checks for existence of a string within a slice of strings. 108 | func stringExistsInSlice(s string, slice []string) bool { 109 | for _, sliceStr := range slice { 110 | if s == sliceStr { 111 | return true 112 | } 113 | } 114 | return false 115 | } 116 | -------------------------------------------------------------------------------- /pkg/events/handler_test.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestHandlerDispatchEventNoFilterConfigured(t *testing.T) { 8 | t.Parallel() 9 | 10 | sink := &MockSink{} 11 | filter := NewFilter(nil, nil, nil) 12 | 13 | handler := NewHandler(sink, *filter) 14 | 15 | event := &LossyFeedEvent{ 16 | Feed: "Foo", 17 | } 18 | 19 | err := handler.DispatchEvent(event) 20 | if err != nil { 21 | t.Fatal(err) 22 | } 23 | 24 | if len(sink.events) != 0 { 25 | t.Error("LossyFeedEvent was dispatched despite not being enabled") 26 | } 27 | } 28 | 29 | func TestHandlerDispatchEventFilterAllowLossyFeed(t *testing.T) { 30 | t.Parallel() 31 | 32 | sink := &MockSink{} 33 | filter := NewFilter([]string{LossyFeedEventType}, nil, nil) 34 | 35 | handler := NewHandler(sink, *filter) 36 | 37 | event := &LossyFeedEvent{ 38 | Feed: "Foo", 39 | } 40 | 41 | err := handler.DispatchEvent(event) 42 | if err != nil { 43 | t.Fatal(err) 44 | } 45 | 46 | if len(sink.events) != 1 { 47 | t.Error("LossyFeedEvent was not dispatched despite being configured to allow dispatch") 48 | } 49 | } 50 | 51 | func TestHandlerDispatchEventFilterAllowFeedComponent(t *testing.T) { 52 | t.Parallel() 53 | 54 | sink := &MockSink{} 55 | filter := NewFilter(nil, nil, []string{FeedsComponentType}) 56 | 57 | handler := NewHandler(sink, *filter) 58 | 59 | event := &LossyFeedEvent{ 60 | Feed: "Foo", 61 | } 62 | 63 | err := handler.DispatchEvent(event) 64 | if err != nil { 65 | t.Fatal(err) 66 | } 67 | 68 | if len(sink.events) != 1 { 69 | t.Error("LossyFeedEvent was not dispatched despite feeds component being allowed") 70 | } 71 | } 72 | 73 | func TestHandlerDispatchEventFilterDisableLossyFeed(t *testing.T) { 74 | t.Parallel() 75 | 76 | sink := &MockSink{} 77 | filter := NewFilter(nil, []string{LossyFeedEventType}, []string{FeedsComponentType}) 78 | 79 | handler := NewHandler(sink, *filter) 80 | 81 | event := &LossyFeedEvent{ 82 | Feed: "Foo", 83 | } 84 | 85 | err := handler.DispatchEvent(event) 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | if len(sink.events) != 0 { 91 | t.Error("LossyFeedEvent was dispatched despite being configured to disable dispatch") 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /pkg/events/logrus_sink.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | ) 6 | 7 | const ( 8 | LoggingEventSinkType = "stdout" 9 | ) 10 | 11 | type LoggingEventSink struct { 12 | logger *logrus.Logger 13 | } 14 | 15 | // Creates an event sink which logs events using a provided logrus logger, 16 | // fields "component" and "event_type" are applied to the logger and 17 | // warnings are logged for each event. 18 | func NewLoggingEventSink(logger *logrus.Logger) *LoggingEventSink { 19 | return &LoggingEventSink{ 20 | logger: logger, 21 | } 22 | } 23 | 24 | func (sink LoggingEventSink) AddEvent(e Event) error { 25 | sink.logger.WithFields(logrus.Fields{ 26 | "event_type": e.GetType(), 27 | "component": e.GetComponent(), 28 | }).Warn(e.GetMessage()) 29 | return nil 30 | } 31 | -------------------------------------------------------------------------------- /pkg/events/logrus_sink_test.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/sirupsen/logrus/hooks/test" 7 | ) 8 | 9 | func TestLogrusSink(t *testing.T) { 10 | t.Parallel() 11 | 12 | log, hook := test.NewNullLogger() 13 | 14 | sink := NewLoggingEventSink(log) 15 | 16 | event := LossyFeedEvent{ 17 | Feed: "Foo", 18 | } 19 | 20 | err := sink.AddEvent(event) 21 | if err != nil { 22 | t.Error(err) 23 | } 24 | 25 | logEntry := hook.LastEntry() 26 | if logEntry == nil { 27 | t.Fatal("Log entry was not added to the configured logger") 28 | } 29 | 30 | if logEntry.Data["event_type"] != event.GetType() { 31 | t.Errorf( 32 | "Log entry had incorrect event_type field '%v' when '%v' was expected", 33 | logEntry.Data["event_type"], 34 | event.GetType(), 35 | ) 36 | } 37 | 38 | if logEntry.Data["component"] != event.GetComponent() { 39 | t.Errorf( 40 | "Log entry had incorrect component field '%v' when '%v' was expected", 41 | logEntry.Data["component"], 42 | event.GetComponent(), 43 | ) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /pkg/events/lossy_feed_event.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type LossyFeedEvent struct { 8 | Feed string 9 | } 10 | 11 | func (e LossyFeedEvent) GetComponent() string { 12 | return FeedsComponentType 13 | } 14 | 15 | func (e LossyFeedEvent) GetType() string { 16 | return LossyFeedEventType 17 | } 18 | 19 | func (e LossyFeedEvent) GetMessage() string { 20 | return fmt.Sprintf("detected potential missing package data when polling %v feed", e.Feed) 21 | } 22 | -------------------------------------------------------------------------------- /pkg/events/mocks.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | type MockSink struct { 4 | events []Event 5 | } 6 | 7 | func (s *MockSink) GetEvents() []Event { 8 | return s.events 9 | } 10 | 11 | func (s *MockSink) AddEvent(e Event) error { 12 | s.events = append(s.events, e) 13 | return nil 14 | } 15 | 16 | type MockEvent struct { 17 | Component string 18 | Type string 19 | Message string 20 | } 21 | 22 | func (e MockEvent) GetComponent() string { 23 | return e.Component 24 | } 25 | 26 | func (e MockEvent) GetType() string { 27 | return e.Type 28 | } 29 | 30 | func (e MockEvent) GetMessage() string { 31 | return e.Message 32 | } 33 | -------------------------------------------------------------------------------- /pkg/feeds/README.md: -------------------------------------------------------------------------------- 1 | # Feeds 2 | 3 | Each of the feeds have their own implementation and support their own set of configuration options. 4 | 5 | ## Configuration options 6 | 7 | `packages` this configuration option is only available on certain feeds, check the README of the feed you're interested in for information on this. 8 | 9 | `poll_rate` this allows for setting the frequency of polling for this specific feed. This is supported by all feeds. The value should be a string formatted for [duration parser](https://golang.org/pkg/time/#ParseDuration). Setting this value will enable the scheduled polling regardless of the value of `timer` in the root of the configuration. 10 | 11 | ## Example 12 | 13 | ### Poll Pypi every 5 minutes 14 | 15 | ``` 16 | feeds: 17 | - type: pypi 18 | options: 19 | poll_rate: "5m" 20 | ``` 21 | 22 | ### Poll npm every 10 minutes and crates every hour 23 | 24 | ``` 25 | feeds: 26 | - type: npm 27 | options: 28 | poll_rate: "10m" 29 | - type: crates 30 | options: 31 | poll_rate: "1h" 32 | ``` 33 | 34 | ### Poll a subset of pypi every 10 minutes 35 | 36 | ``` 37 | feeds: 38 | - type: pypi 39 | options: 40 | packages: 41 | - numpy 42 | - django 43 | poll_rate: "10m" 44 | ``` 45 | -------------------------------------------------------------------------------- /pkg/feeds/crates/README.md: -------------------------------------------------------------------------------- 1 | # Crates Feed 2 | 3 | This feed allows polling of package updates from the crates package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` field is not supported by the crates feed. 8 | 9 | 10 | ``` 11 | feeds: 12 | - type: crates 13 | ``` -------------------------------------------------------------------------------- /pkg/feeds/crates/crates.go: -------------------------------------------------------------------------------- 1 | package crates 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "net/url" 8 | "time" 9 | 10 | "github.com/ossf/package-feeds/pkg/events" 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | "github.com/ossf/package-feeds/pkg/useragent" 13 | "github.com/ossf/package-feeds/pkg/utils" 14 | ) 15 | 16 | const ( 17 | FeedName = "crates" 18 | activityPath = "/api/v1/summary" 19 | ) 20 | 21 | var httpClient = &http.Client{ 22 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 23 | Timeout: 10 * time.Second, 24 | } 25 | 26 | type crates struct { 27 | JustUpdated []*Package `json:"just_updated"` 28 | } 29 | 30 | // Package stores the information from crates.io updates. 31 | type Package struct { 32 | ID string `json:"id"` 33 | Name string `json:"name"` 34 | UpdatedAt time.Time `json:"updated_at"` 35 | NewestVersion string `json:"newest_version"` 36 | MaxStableVersion string `json:"max_stable_version"` 37 | Repository string `json:"repository"` 38 | } 39 | 40 | // Gets crates.io packages. 41 | func fetchPackages(baseURL string) ([]*Package, error) { 42 | pkgURL, err := url.JoinPath(baseURL, activityPath) 43 | if err != nil { 44 | return nil, err 45 | } 46 | resp, err := httpClient.Get(pkgURL) 47 | if err != nil { 48 | return nil, err 49 | } 50 | defer resp.Body.Close() 51 | 52 | err = utils.CheckResponseStatus(resp) 53 | if err != nil { 54 | return nil, fmt.Errorf("failed to fetch crates package data: %w", err) 55 | } 56 | 57 | v := &crates{} 58 | err = json.NewDecoder(resp.Body).Decode(v) 59 | if err != nil { 60 | return nil, err 61 | } 62 | // TODO: We should check both the NewCrates as well. 63 | return v.JustUpdated, nil 64 | } 65 | 66 | type Feed struct { 67 | lossyFeedAlerter *feeds.LossyFeedAlerter 68 | baseURL string 69 | options feeds.FeedOptions 70 | } 71 | 72 | func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { 73 | if feedOptions.Packages != nil { 74 | return nil, feeds.UnsupportedOptionError{ 75 | Feed: FeedName, 76 | Option: "packages", 77 | } 78 | } 79 | return &Feed{ 80 | lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), 81 | baseURL: "https://crates.io", 82 | options: feedOptions, 83 | }, nil 84 | } 85 | 86 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 87 | pkgs := []*feeds.Package{} 88 | packages, err := fetchPackages(feed.baseURL) 89 | if err != nil { 90 | return pkgs, cutoff, []error{err} 91 | } 92 | for _, pkg := range packages { 93 | pkg := feeds.NewPackage(pkg.UpdatedAt, pkg.Name, pkg.NewestVersion, FeedName) 94 | pkgs = append(pkgs, pkg) 95 | } 96 | feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs) 97 | 98 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 99 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 100 | return pkgs, newCutoff, []error{} 101 | } 102 | 103 | func (feed Feed) GetName() string { 104 | return FeedName 105 | } 106 | 107 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 108 | return feed.options 109 | } 110 | -------------------------------------------------------------------------------- /pkg/feeds/crates/crates_test.go: -------------------------------------------------------------------------------- 1 | package crates 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "testing" 7 | "time" 8 | 9 | "github.com/ossf/package-feeds/pkg/events" 10 | "github.com/ossf/package-feeds/pkg/feeds" 11 | "github.com/ossf/package-feeds/pkg/utils" 12 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 13 | ) 14 | 15 | func TestCratesLatest(t *testing.T) { 16 | t.Parallel() 17 | 18 | handlers := map[string]testutils.HTTPHandlerFunc{ 19 | activityPath: cratesSummaryResponse, 20 | } 21 | srv := testutils.HTTPServerMock(handlers) 22 | 23 | feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) 24 | feed.baseURL = srv.URL 25 | if err != nil { 26 | t.Fatalf("Failed to create crates feed: %v", err) 27 | } 28 | 29 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 30 | pkgs, gotCutoff, errs := feed.Latest(cutoff) 31 | if len(errs) != 0 { 32 | t.Fatalf("feed.Latest returned error: %v", err) 33 | } 34 | 35 | // Returned cutoff should match the newest package creation time of packages retrieved. 36 | wantCutoff := time.Date(2021, 3, 19, 13, 36, 33, 0, time.UTC) 37 | if gotCutoff.Sub(wantCutoff).Abs() > time.Second { 38 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 39 | } 40 | 41 | if pkgs[0].Name != "FooPackage" { 42 | t.Errorf("Unexpected package `%s` found in place of expected `FooPackage`", pkgs[0].Name) 43 | } 44 | if pkgs[1].Name != "BarPackage" { 45 | t.Errorf("Unexpected package `%s` found in place of expected `BarPackage`", pkgs[1].Name) 46 | } 47 | if pkgs[0].Version != "0.2.0" { 48 | t.Errorf("Unexpected version `%s` found in place of expected `0.2.0`", pkgs[0].Version) 49 | } 50 | if pkgs[1].Version != "0.1.1" { 51 | t.Errorf("Unexpected version `%s` found in place of expected `0.1.1`", pkgs[1].Version) 52 | } 53 | 54 | for _, p := range pkgs { 55 | if p.Type != FeedName { 56 | t.Errorf("Feed type not set correctly in crates package following Latest()") 57 | } 58 | } 59 | } 60 | 61 | func TestCratesNotFound(t *testing.T) { 62 | t.Parallel() 63 | 64 | handlers := map[string]testutils.HTTPHandlerFunc{ 65 | activityPath: testutils.NotFoundHandlerFunc, 66 | } 67 | srv := testutils.HTTPServerMock(handlers) 68 | 69 | feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) 70 | feed.baseURL = srv.URL 71 | if err != nil { 72 | t.Fatalf("Failed to create crates feed: %v", err) 73 | } 74 | 75 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 76 | _, gotCutoff, errs := feed.Latest(cutoff) 77 | if cutoff != gotCutoff { 78 | t.Error("feed.Latest() cutoff should be unchanged if an error is returned") 79 | } 80 | if len(errs) == 0 { 81 | t.Fatalf("feed.Latest() was successful when an error was expected") 82 | } 83 | if !errors.Is(errs[len(errs)-1], utils.ErrUnsuccessfulRequest) { 84 | t.Fatalf("feed.Latest() returned an error which did not match the expected error") 85 | } 86 | } 87 | 88 | func cratesSummaryResponse(w http.ResponseWriter, _ *http.Request) { 89 | _, err := w.Write([]byte(` 90 | { 91 | "just_updated": [ 92 | { 93 | "id": "FooPackage", 94 | "name": "FooPackage", 95 | "updated_at": "2021-03-19T13:36:33.871721+00:00", 96 | "versions": null, 97 | "keywords": null, 98 | "categories": null, 99 | "badges": null, 100 | "created_at": "2021-03-17T20:04:15.901201+00:00", 101 | "downloads": 46, 102 | "recent_downloads": 46, 103 | "max_version": "0.2.0", 104 | "newest_version": "0.2.0", 105 | "max_stable_version": "0.2.0", 106 | "description": "Package for foo mangement", 107 | "homepage": "https://github.com/Foo/Foo", 108 | "documentation": "https://github.com/Foo/Foo", 109 | "repository": "https://github.com/Foo/Foo", 110 | "links": { 111 | "version_downloads": "/api/v1/crates/Foo/downloads", 112 | "versions": "/api/v1/crates/Foo/versions", 113 | "owners": "/api/v1/crates/Foo/owners", 114 | "owner_team": "/api/v1/crates/Foo/owner_team", 115 | "owner_user": "/api/v1/crates/Foo/owner_user", 116 | "reverse_dependencies": "/api/v1/crates/Foo/reverse_dependencies" 117 | }, 118 | "exact_match": false 119 | }, 120 | { 121 | "id": "BarPackage", 122 | "name": "BarPackage", 123 | "updated_at": "2021-03-19T13:17:25.784319+00:00", 124 | "versions": null, 125 | "keywords": null, 126 | "categories": null, 127 | "badges": null, 128 | "created_at": "2021-03-13T14:24:30.835625+00:00", 129 | "downloads": 31, 130 | "recent_downloads": 31, 131 | "max_version": "0.1.1", 132 | "newest_version": "0.1.1", 133 | "max_stable_version": "0.1.1", 134 | "description": "Provides Bar functionality", 135 | "homepage": "https://github.com/Bar/Bar", 136 | "documentation": "https://github.com/Bar/Bar", 137 | "repository": "https://github.com/Bar/Bar", 138 | "links": { 139 | "version_downloads": "/api/v1/crates/Bar/downloads", 140 | "versions": "/api/v1/crates/Bar/versions", 141 | "owners": "/api/v1/crates/Bar/owners", 142 | "owner_team": "/api/v1/crates/Bar/owner_team", 143 | "owner_user": "/api/v1/crates/Bar/owner_user", 144 | "reverse_dependencies": "/api/v1/crates/Bar/reverse_dependencies" 145 | }, 146 | "exact_match": false 147 | } 148 | ] 149 | } 150 | `)) 151 | if err != nil { 152 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /pkg/feeds/feed.go: -------------------------------------------------------------------------------- 1 | package feeds 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "time" 7 | ) 8 | 9 | const ( 10 | schemaVer = "1.1" 11 | 12 | DefaultUserAgent = "package-feeds (github.com/ossf/package-feeds)" 13 | ) 14 | 15 | var ErrNoPackagesPolled = errors.New("no packages were successfully polled") 16 | 17 | type UnsupportedOptionError struct { 18 | Option string 19 | Feed string 20 | } 21 | 22 | type ScheduledFeed interface { 23 | Latest(cutoff time.Time) ([]*Package, time.Time, []error) 24 | GetFeedOptions() FeedOptions 25 | GetName() string 26 | } 27 | 28 | // General configuration options for feeds. 29 | type FeedOptions struct { 30 | // A collection of package names to poll instead of standard firehose behaviour. 31 | // Not supported by all feeds. 32 | Packages *[]string `yaml:"packages"` 33 | 34 | // Cron string for scheduling the polling for the feed. 35 | PollRate string `yaml:"poll_rate"` 36 | } 37 | 38 | // Marshalled json output validated against package.schema.json. 39 | type Package struct { 40 | Name string `json:"name"` 41 | Version string `json:"version"` 42 | CreatedDate time.Time `json:"created_date"` 43 | Type string `json:"type"` 44 | ArtifactID string `json:"artifact_id"` 45 | SchemaVer string `json:"schema_ver"` 46 | } 47 | 48 | type PackagePollError struct { 49 | Err error 50 | Name string 51 | } 52 | 53 | func (err PackagePollError) Error() string { 54 | return fmt.Sprintf("Polling for package %s returned error: %v", err.Name, err.Err) 55 | } 56 | 57 | // NewPackage creates a Package object without the artifact ID field populated. 58 | func NewPackage(created time.Time, name, version, feed string) *Package { 59 | return NewArtifact(created, name, version, "", feed) 60 | } 61 | 62 | // NewArtifact creates a Package object with the artifact ID field populated. 63 | func NewArtifact(created time.Time, name, version, artifactID, feed string) *Package { 64 | return &Package{ 65 | Name: name, 66 | Version: version, 67 | CreatedDate: created, 68 | Type: feed, 69 | ArtifactID: artifactID, 70 | SchemaVer: schemaVer, 71 | } 72 | } 73 | 74 | func ApplyCutoff(pkgs []*Package, cutoff time.Time) []*Package { 75 | filteredPackages := []*Package{} 76 | for _, pkg := range pkgs { 77 | if pkg.CreatedDate.After(cutoff) { 78 | filteredPackages = append(filteredPackages, pkg) 79 | } 80 | } 81 | return filteredPackages 82 | } 83 | 84 | func FindCutoff(cutoff time.Time, pkgs []*Package) time.Time { 85 | for _, pkg := range pkgs { 86 | if pkg.CreatedDate.After(cutoff) { 87 | cutoff = pkg.CreatedDate 88 | } 89 | } 90 | return cutoff 91 | } 92 | 93 | func (err UnsupportedOptionError) Error() string { 94 | return fmt.Sprintf("unsupported option `%v` supplied to %v feed", err.Option, err.Feed) 95 | } 96 | -------------------------------------------------------------------------------- /pkg/feeds/feed_test.go: -------------------------------------------------------------------------------- 1 | package feeds 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/xeipuuv/gojsonschema" 9 | ) 10 | 11 | const schemaPath = "../../package.schema.json" 12 | 13 | type extendPackage struct { 14 | Package 15 | NonConformingField string `json:"non_conforming_field"` 16 | } 17 | 18 | var ( 19 | schemaLoader = gojsonschema.NewReferenceLoader("file://" + schemaPath) 20 | dummyPackage = Package{ 21 | Name: "foobarpackage", 22 | Version: "1.0.0", 23 | CreatedDate: time.Now().UTC(), 24 | Type: "npm", 25 | SchemaVer: schemaVer, 26 | } 27 | ) 28 | 29 | func TestValidSchema(t *testing.T) { 30 | t.Parallel() 31 | 32 | validPackage := gojsonschema.NewGoLoader(dummyPackage) 33 | result, err := gojsonschema.Validate(schemaLoader, validPackage) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | 38 | if result.Valid() != true { 39 | out := "The Package json is not valid against the current schema. see errors :\n" 40 | for _, desc := range result.Errors() { 41 | out += fmt.Sprintf("- %s\n", desc) 42 | } 43 | t.Fatal(out) 44 | } 45 | } 46 | 47 | func TestInvalidSchema(t *testing.T) { 48 | t.Parallel() 49 | 50 | // The Schema defines that additional properties are not valid, ensure enforcement 51 | // against an extra struct field. If an extra field is added, the SchemVer minor should 52 | // be incremented to advertise an additive change. 53 | invalidPackageField := extendPackage{dummyPackage, "extrafield"} 54 | invalidField := gojsonschema.NewGoLoader(invalidPackageField) 55 | result, err := gojsonschema.Validate(schemaLoader, invalidField) 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | 60 | if result.Valid() { 61 | t.Fatalf("Non-conformant extra field incorrectly validated") 62 | } 63 | 64 | // The Schema defines a required pattern for the schema_ver, ensure enforcement against 65 | // empty string. 66 | invalidPackageFormat := dummyPackage 67 | invalidPackageFormat.SchemaVer = "" 68 | invalidFormat := gojsonschema.NewGoLoader(invalidPackageFormat) 69 | result, err = gojsonschema.Validate(schemaLoader, invalidFormat) 70 | if err != nil { 71 | t.Fatal(err) 72 | } 73 | 74 | if result.Valid() { 75 | t.Fatalf("Non-conformant field format incorrectly validated") 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /pkg/feeds/goproxy/README.md: -------------------------------------------------------------------------------- 1 | # goproxy Feed 2 | 3 | This feed allows polling of package updates from the golang.org/index package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` field is not supported by the goproxy feed. 8 | 9 | 10 | ``` 11 | feeds: 12 | - type: goproxy 13 | ``` -------------------------------------------------------------------------------- /pkg/feeds/goproxy/goproxy.go: -------------------------------------------------------------------------------- 1 | package goproxy 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "net/url" 9 | "time" 10 | 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | "github.com/ossf/package-feeds/pkg/useragent" 13 | "github.com/ossf/package-feeds/pkg/utils" 14 | ) 15 | 16 | const ( 17 | FeedName = "goproxy" 18 | indexPath = "/index" 19 | ) 20 | 21 | var httpClient = &http.Client{ 22 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 23 | Timeout: 10 * time.Second, 24 | } 25 | 26 | type PackageJSON struct { 27 | Path string `json:"Path"` 28 | Version string `json:"Version"` 29 | Timestamp string `json:"Timestamp"` 30 | } 31 | 32 | type Package struct { 33 | Title string 34 | ModifiedDate time.Time 35 | Version string 36 | } 37 | 38 | func fetchPackages(baseURL string, since time.Time) ([]Package, error) { 39 | var packages []Package 40 | indexURL, err := url.JoinPath(baseURL, indexPath) 41 | if err != nil { 42 | return nil, err 43 | } 44 | pkgURL, err := url.Parse(indexURL) 45 | if err != nil { 46 | return nil, err 47 | } 48 | params := url.Values{} 49 | params.Add("since", since.Format(time.RFC3339)) 50 | pkgURL.RawQuery = params.Encode() 51 | 52 | resp, err := httpClient.Get(pkgURL.String()) 53 | if err != nil { 54 | return nil, err 55 | } 56 | defer resp.Body.Close() 57 | 58 | err = utils.CheckResponseStatus(resp) 59 | if err != nil { 60 | return nil, fmt.Errorf("failed to fetch goproxy package data: %w", err) 61 | } 62 | 63 | scanner := bufio.NewScanner(resp.Body) 64 | for scanner.Scan() { 65 | var packageJSON PackageJSON 66 | err = json.Unmarshal([]byte(scanner.Text()), &packageJSON) 67 | if err != nil { 68 | return nil, err 69 | } 70 | modifiedDate, err := time.Parse(time.RFC3339, packageJSON.Timestamp) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | pkg := Package{ 76 | Title: packageJSON.Path, 77 | ModifiedDate: modifiedDate, 78 | Version: packageJSON.Version, 79 | } 80 | packages = append(packages, pkg) 81 | } 82 | err = scanner.Err() 83 | if err != nil { 84 | return nil, err 85 | } 86 | return packages, nil 87 | } 88 | 89 | type Feed struct { 90 | baseURL string 91 | options feeds.FeedOptions 92 | } 93 | 94 | func New(feedOptions feeds.FeedOptions) (*Feed, error) { 95 | if feedOptions.Packages != nil { 96 | return nil, feeds.UnsupportedOptionError{ 97 | Feed: FeedName, 98 | Option: "packages", 99 | } 100 | } 101 | return &Feed{ 102 | baseURL: "https://index.golang.org/", 103 | options: feedOptions, 104 | }, nil 105 | } 106 | 107 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 108 | pkgs := []*feeds.Package{} 109 | packages, err := fetchPackages(feed.baseURL, cutoff) 110 | if err != nil { 111 | return pkgs, cutoff, []error{err} 112 | } 113 | for _, pkg := range packages { 114 | pkg := feeds.NewPackage(pkg.ModifiedDate, pkg.Title, pkg.Version, FeedName) 115 | pkgs = append(pkgs, pkg) 116 | } 117 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 118 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 119 | return pkgs, newCutoff, []error{} 120 | } 121 | 122 | func (feed Feed) GetName() string { 123 | return FeedName 124 | } 125 | 126 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 127 | return feed.options 128 | } 129 | -------------------------------------------------------------------------------- /pkg/feeds/goproxy/goproxy_test.go: -------------------------------------------------------------------------------- 1 | package goproxy 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "testing" 7 | "time" 8 | 9 | "github.com/ossf/package-feeds/pkg/feeds" 10 | "github.com/ossf/package-feeds/pkg/utils" 11 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 12 | ) 13 | 14 | func TestGoProxyLatest(t *testing.T) { 15 | t.Parallel() 16 | 17 | handlers := map[string]testutils.HTTPHandlerFunc{ 18 | indexPath: goproxyPackageResponse, 19 | } 20 | srv := testutils.HTTPServerMock(handlers) 21 | 22 | feed, err := New(feeds.FeedOptions{}) 23 | if err != nil { 24 | t.Fatalf("Failed to create goproxy feed: %v", err) 25 | } 26 | feed.baseURL = srv.URL 27 | 28 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 29 | pkgs, gotCutoff, errs := feed.Latest(cutoff) 30 | if len(errs) != 0 { 31 | t.Fatalf("feed.Latest returned error: %v", err) 32 | } 33 | 34 | // Returned cutoff should match the newest package creation time of packages retrieved. 35 | wantCutoff := time.Date(2019, 4, 10, 20, 30, 2, 0, time.UTC) 36 | if gotCutoff.Sub(wantCutoff).Abs() > time.Second { 37 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 38 | } 39 | if pkgs[0].Name != "golang.org/x/foo" { 40 | t.Errorf("Unexpected package `%s` found in place of expected `golang.org/x/foo`", pkgs[0].Name) 41 | } 42 | if pkgs[1].Name != "golang.org/x/bar" { 43 | t.Errorf("Unexpected package `%s` found in place of expected `golang.org/x/bar`", pkgs[1].Name) 44 | } 45 | if pkgs[0].Version != "v0.3.0" { 46 | t.Errorf("Unexpected version `%s` found in place of expected `v0.3.0`", pkgs[0].Version) 47 | } 48 | if pkgs[1].Version != "v0.4.0" { 49 | t.Errorf("Unexpected version `%s` found in place of expected `v0.4.0`", pkgs[1].Version) 50 | } 51 | 52 | for _, p := range pkgs { 53 | if p.Type != FeedName { 54 | t.Errorf("Feed type not set correctly in goproxy package following Latest()") 55 | } 56 | } 57 | } 58 | 59 | func TestGoproxyNotFound(t *testing.T) { 60 | t.Parallel() 61 | 62 | handlers := map[string]testutils.HTTPHandlerFunc{ 63 | indexPath: testutils.NotFoundHandlerFunc, 64 | } 65 | srv := testutils.HTTPServerMock(handlers) 66 | 67 | feed, err := New(feeds.FeedOptions{}) 68 | if err != nil { 69 | t.Fatalf("Failed to create goproxy feed: %v", err) 70 | } 71 | feed.baseURL = srv.URL 72 | 73 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 74 | _, gotCutoff, errs := feed.Latest(cutoff) 75 | if cutoff != gotCutoff { 76 | t.Error("feed.Latest() cutoff should be unchanged if an error is returned") 77 | } 78 | if len(errs) == 0 { 79 | t.Fatalf("feed.Latest() was successful when an error was expected") 80 | } 81 | if !errors.Is(errs[len(errs)-1], utils.ErrUnsuccessfulRequest) { 82 | t.Fatalf("feed.Latest() returned an error which did not match the expected error") 83 | } 84 | } 85 | 86 | func goproxyPackageResponse(w http.ResponseWriter, _ *http.Request) { 87 | _, err := w.Write([]byte(`{"Path": "golang.org/x/foo","Version": "v0.3.0","Timestamp": "2019-04-10T19:08:52.997264Z"} 88 | {"Path": "golang.org/x/bar","Version": "v0.4.0","Timestamp": "2019-04-10T20:30:02.04035Z"} 89 | `)) 90 | if err != nil { 91 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /pkg/feeds/lossy_logging.go: -------------------------------------------------------------------------------- 1 | package feeds 2 | 3 | import ( 4 | "sort" 5 | 6 | log "github.com/sirupsen/logrus" 7 | 8 | "github.com/ossf/package-feeds/pkg/events" 9 | ) 10 | 11 | type LossyFeedAlerter struct { 12 | previousPackages map[string][]*Package 13 | eventHandler *events.Handler 14 | } 15 | 16 | // Creates a LossyFeedAlerter, capable of tracking packages and identifying 17 | // potential loss in feeds using RSS style APIs. This can only be used in 18 | // feeds which produce an overlap of packages upon their requests to the API, 19 | // if a timestamp is used to query the API then loss is unlikely due to requesting 20 | // data since a previous query. 21 | func NewLossyFeedAlerter(eventHandler *events.Handler) *LossyFeedAlerter { 22 | return &LossyFeedAlerter{ 23 | eventHandler: eventHandler, 24 | previousPackages: map[string][]*Package{}, 25 | } 26 | } 27 | 28 | // Processes a new collection of packages and compares against the previously processed 29 | // slice of packages, if an overlap is not detected this is a sign of potential loss of 30 | // data and the configured event handler is notified via a LossyFeedEvent. 31 | func (lfa *LossyFeedAlerter) ProcessPackages(feed string, packages []*Package) { 32 | pkgs := make([]*Package, len(packages)) 33 | copy(pkgs, packages) 34 | 35 | // Ensure packages are sorted by CreatedDate in order of most recent 36 | sort.SliceStable(pkgs, func(i, j int) bool { 37 | return pkgs[j].CreatedDate.Before(pkgs[i].CreatedDate) 38 | }) 39 | 40 | previousPackages, ok := lfa.previousPackages[feed] 41 | nonZeroResults := len(pkgs) > 0 && len(previousPackages) > 0 42 | if ok && nonZeroResults { 43 | if !findOverlap(pkgs, previousPackages) { 44 | err := lfa.eventHandler.DispatchEvent(events.LossyFeedEvent{ 45 | Feed: feed, 46 | }) 47 | if err != nil { 48 | log.WithError(err).Error("failed to dispatch event via event handler") 49 | } 50 | } 51 | } 52 | lfa.previousPackages[feed] = pkgs 53 | } 54 | 55 | // Checks whether there is an overlap in package creation date between a result 56 | // and a previous result. This assumes that pollResult.packages is sorted by 57 | // CreatedDate in order of most recent first. 58 | func findOverlap(latestPackages, previousPackages []*Package) bool { 59 | rOldestPkg := latestPackages[len(latestPackages)-1] 60 | previousResultMostRecent := previousPackages[0] 61 | 62 | afterDate := previousResultMostRecent.CreatedDate.After(rOldestPkg.CreatedDate) 63 | equalDate := previousResultMostRecent.CreatedDate.Equal(rOldestPkg.CreatedDate) 64 | return afterDate || equalDate 65 | } 66 | -------------------------------------------------------------------------------- /pkg/feeds/lossy_logging_test.go: -------------------------------------------------------------------------------- 1 | package feeds 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/ossf/package-feeds/pkg/events" 8 | ) 9 | 10 | func TestProcessPackagesNoOverlap(t *testing.T) { 11 | t.Parallel() 12 | feedName := "foo-feed" 13 | 14 | mockSink := &events.MockSink{} 15 | allowLossyFeedEventsFilter := events.NewFilter([]string{events.LossyFeedEventType}, nil, nil) 16 | eventHandler := events.NewHandler(mockSink, *allowLossyFeedEventsFilter) 17 | lossyFeedAlerter := NewLossyFeedAlerter(eventHandler) 18 | 19 | baseTime := time.Date(2021, 4, 20, 14, 30, 0, 0, time.UTC) 20 | pkgs1 := []*Package{ 21 | NewPackage(baseTime.Add(-time.Minute*2), "foopkg", "1.0", feedName), 22 | NewPackage(baseTime.Add(-time.Minute*3), "barpkg", "1.0", feedName), 23 | } 24 | // Populate previous packages 25 | lossyFeedAlerter.ProcessPackages(feedName, pkgs1) 26 | 27 | pkgs2 := []*Package{ 28 | NewPackage(baseTime, "bazpkg", "1.0", feedName), 29 | NewPackage(baseTime.Add(-time.Minute*1), "quxpkg", "2.0", feedName), 30 | } 31 | // Trigger no overlap 32 | lossyFeedAlerter.ProcessPackages(feedName, pkgs2) 33 | 34 | evs := mockSink.GetEvents() 35 | 36 | if len(evs) != 1 { 37 | t.Fatalf("ProcessPackages failed to detect a lack of overlap") 38 | } 39 | 40 | if evs[0].GetType() != events.LossyFeedEventType { 41 | t.Errorf("ProcessPackages did not produce a lossy feed event") 42 | } 43 | } 44 | 45 | func TestProcessPackagesWithOverlap(t *testing.T) { 46 | t.Parallel() 47 | feedName := "foo-feed" 48 | 49 | mockSink := &events.MockSink{} 50 | allowLossyFeedEventsFilter := events.NewFilter([]string{events.LossyFeedEventType}, nil, nil) 51 | eventHandler := events.NewHandler(mockSink, *allowLossyFeedEventsFilter) 52 | lossyFeedAlerter := NewLossyFeedAlerter(eventHandler) 53 | 54 | baseTime := time.Date(2021, 4, 20, 14, 30, 0, 0, time.UTC) 55 | pkgs1 := []*Package{ 56 | NewPackage(baseTime.Add(-time.Minute*2), "foopkg", "1.0", feedName), 57 | NewPackage(baseTime.Add(-time.Minute*3), "barpkg", "1.0", feedName), 58 | } 59 | // Populate previous packages 60 | lossyFeedAlerter.ProcessPackages(feedName, pkgs1) 61 | 62 | pkgs2 := []*Package{ 63 | NewPackage(baseTime, "bazpkg", "1.0", feedName), 64 | NewPackage(baseTime.Add(-time.Minute*1), "quxpkg", "2.0", feedName), 65 | NewPackage(baseTime.Add(-time.Minute*2), "foopkg", "1.0", feedName), 66 | } 67 | // Trigger overlap 68 | lossyFeedAlerter.ProcessPackages(feedName, pkgs2) 69 | 70 | evs := mockSink.GetEvents() 71 | 72 | if len(evs) != 0 { 73 | t.Fatalf("ProcessPackages failed to identify an overlap when one existed") 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/feeds/maven/README.md: -------------------------------------------------------------------------------- 1 | # maven Feed 2 | 3 | This feed allows polling of package updates from central.sonatype, polling Maven central repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` field is not supported by the maven feed. 8 | 9 | 10 | ``` 11 | feeds: 12 | - type: maven-central 13 | ``` 14 | -------------------------------------------------------------------------------- /pkg/feeds/maven/maven.go: -------------------------------------------------------------------------------- 1 | package maven 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "net/http" 9 | "net/url" 10 | "time" 11 | 12 | "github.com/ossf/package-feeds/pkg/feeds" 13 | "github.com/ossf/package-feeds/pkg/useragent" 14 | ) 15 | 16 | const ( 17 | FeedName = "maven-central" 18 | indexPath = "/api/internal/browse/components" 19 | ) 20 | 21 | type Feed struct { 22 | baseURL string 23 | options feeds.FeedOptions 24 | } 25 | 26 | var ( 27 | httpClient = &http.Client{ 28 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 29 | Timeout: 10 * time.Second, 30 | } 31 | 32 | ErrMaxRetriesReached = errors.New("maximum retries reached due to rate limiting") 33 | ) 34 | 35 | func New(feedOptions feeds.FeedOptions) (*Feed, error) { 36 | if feedOptions.Packages != nil { 37 | return nil, feeds.UnsupportedOptionError{ 38 | Feed: FeedName, 39 | Option: "packages", 40 | } 41 | } 42 | return &Feed{ 43 | baseURL: "https://central.sonatype.com", 44 | options: feedOptions, 45 | }, nil 46 | } 47 | 48 | // Package represents package information. 49 | type LatestVersionInfo struct { 50 | Version string `json:"version"` 51 | TimestampUnixWithMS int64 `json:"timestampUnixWithMS"` 52 | } 53 | 54 | type Package struct { 55 | Name string `json:"name"` 56 | Namespace string `json:"namespace"` 57 | LatestVersionInfo LatestVersionInfo `json:"latestVersionInfo"` 58 | } 59 | 60 | // Response represents the response structure from Sonatype API. 61 | type Response struct { 62 | Components []Package `json:"components"` 63 | } 64 | 65 | // fetchPackages fetches packages from Sonatype API for the given page. 66 | func (feed Feed) fetchPackages(page int) ([]Package, error) { 67 | indexURL, err := url.JoinPath(feed.baseURL, indexPath) 68 | if err != nil { 69 | return nil, err 70 | } 71 | indexURL += "?repository=maven-central" 72 | 73 | maxRetries := 5 74 | retryDelay := 5 * time.Second 75 | 76 | for attempt := 0; attempt <= maxRetries; attempt++ { 77 | // Define the request payload 78 | payload := map[string]interface{}{ 79 | "page": page, 80 | "size": 20, 81 | "sortField": "publishedDate", 82 | "sortDirection": "desc", 83 | } 84 | 85 | jsonPayload, err := json.Marshal(payload) 86 | if err != nil { 87 | return nil, fmt.Errorf("error encoding JSON: %w", err) 88 | } 89 | body := bytes.NewReader(jsonPayload) 90 | 91 | // Send POST request to Sonatype API. 92 | resp, err := httpClient.Post(indexURL, "application/json", body) 93 | if err != nil { 94 | // Check if maximum retries have been reached 95 | if attempt == maxRetries { 96 | return nil, fmt.Errorf("error sending request: %w", err) 97 | } 98 | time.Sleep(retryDelay) // Wait before retrying 99 | continue 100 | } 101 | defer resp.Body.Close() 102 | 103 | // Handle rate limiting (HTTP status code 429). 104 | if resp.StatusCode == http.StatusTooManyRequests { 105 | // Check if maximum retries have been reached 106 | if attempt == maxRetries { 107 | return nil, ErrMaxRetriesReached 108 | } 109 | time.Sleep(retryDelay) // Wait before retrying 110 | continue 111 | } 112 | 113 | // Decode response. 114 | var response Response 115 | err = json.NewDecoder(resp.Body).Decode(&response) 116 | if err != nil { 117 | return nil, fmt.Errorf("error decoding response: %w", err) 118 | } 119 | return response.Components, nil 120 | } 121 | return nil, ErrMaxRetriesReached 122 | } 123 | 124 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 125 | pkgs := []*feeds.Package{} 126 | var errs []error 127 | 128 | page := 0 129 | for { 130 | // Fetch packages from Sonatype API for the current page. 131 | packages, err := feed.fetchPackages(page) 132 | if err != nil { 133 | errs = append(errs, err) 134 | break 135 | } 136 | 137 | // Iterate over packages 138 | hasToCut := false 139 | for _, pkg := range packages { 140 | // convert published to date to compare with cutoff 141 | if pkg.LatestVersionInfo.TimestampUnixWithMS > cutoff.UnixMilli() { 142 | // Append package to pkgs 143 | timestamp := time.Unix(pkg.LatestVersionInfo.TimestampUnixWithMS/1000, 0) 144 | packageName := pkg.Namespace + ":" + pkg.Name 145 | 146 | newPkg := feeds.NewPackage(timestamp, packageName, pkg.LatestVersionInfo.Version, FeedName) 147 | pkgs = append(pkgs, newPkg) 148 | } else { 149 | // Break the loop if the cutoff date is reached 150 | hasToCut = true 151 | } 152 | } 153 | 154 | // Move to the next page 155 | page++ 156 | 157 | // Check if the loop should be terminated 158 | if len(pkgs) == 0 || hasToCut { 159 | break 160 | } 161 | } 162 | 163 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 164 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 165 | 166 | return pkgs, newCutoff, errs 167 | } 168 | 169 | func (feed Feed) GetName() string { 170 | return FeedName 171 | } 172 | 173 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 174 | return feed.options 175 | } 176 | -------------------------------------------------------------------------------- /pkg/feeds/maven/maven_test.go: -------------------------------------------------------------------------------- 1 | package maven 2 | 3 | import ( 4 | "net/http" 5 | "testing" 6 | "time" 7 | 8 | "github.com/ossf/package-feeds/pkg/feeds" 9 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 10 | ) 11 | 12 | func TestMavenLatest(t *testing.T) { 13 | t.Parallel() 14 | 15 | handlers := map[string]testutils.HTTPHandlerFunc{ 16 | indexPath: mavenPackageResponse, 17 | } 18 | srv := testutils.HTTPServerMock(handlers) 19 | 20 | feed, err := New(feeds.FeedOptions{}) 21 | if err != nil { 22 | t.Fatalf("Failed to create Maven feed: %v", err) 23 | } 24 | feed.baseURL = srv.URL 25 | 26 | cutoff := time.Date(1990, 1, 1, 0, 0, 0, 0, time.UTC) 27 | pkgs, gotCutoff, errs := feed.Latest(cutoff) 28 | 29 | if len(errs) != 0 { 30 | t.Fatalf("feed.Latest returned error: %v", errs) 31 | } 32 | 33 | // Returned cutoff should match the newest package creation time of packages retrieved. 34 | wantCutoff := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) 35 | if gotCutoff.UTC().Sub(wantCutoff).Abs() > time.Second { 36 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 37 | } 38 | if pkgs[0].Name != "com.github.example:project" { 39 | t.Errorf("Unexpected package `%s` found in place of expected `com.github.example:project`", pkgs[0].Name) 40 | } 41 | if pkgs[0].Version != "1.0.0" { 42 | t.Errorf("Unexpected version `%s` found in place of expected `1.0.0`", pkgs[0].Version) 43 | } 44 | 45 | for _, p := range pkgs { 46 | if p.Type != FeedName { 47 | t.Errorf("Feed type not set correctly in goproxy package following Latest()") 48 | } 49 | } 50 | } 51 | 52 | func TestMavenNotFound(t *testing.T) { 53 | t.Parallel() 54 | 55 | handlers := map[string]testutils.HTTPHandlerFunc{ 56 | indexPath: testutils.NotFoundHandlerFunc, 57 | } 58 | srv := testutils.HTTPServerMock(handlers) 59 | 60 | feed, err := New(feeds.FeedOptions{}) 61 | if err != nil { 62 | t.Fatalf("Failed to create Maven feed: %v", err) 63 | } 64 | feed.baseURL = srv.URL 65 | 66 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 67 | 68 | _, gotCutoff, errs := feed.Latest(cutoff) 69 | if cutoff != gotCutoff { 70 | t.Error("feed.Latest() cutoff should be unchanged if an error is returned") 71 | } 72 | if len(errs) == 0 { 73 | t.Fatalf("feed.Latest() was successful when an error was expected") 74 | } 75 | } 76 | 77 | func mavenPackageResponse(w http.ResponseWriter, _ *http.Request) { 78 | w.Header().Set("Content-Type", "application/json") 79 | responseJSON := ` 80 | { 81 | "components": [ 82 | { 83 | "id": "pkg:maven/com.github.example/project", 84 | "type": "COMPONENT", 85 | "namespace": "com.github.example", 86 | "name": "project", 87 | "version": "1.0.0", 88 | "publishedEpochMillis": 946684800000, 89 | "latestVersionInfo": { 90 | "version": "1.0.0", 91 | "timestampUnixWithMS": 946684800000 92 | } 93 | }, 94 | { 95 | "id": "pkg:maven/com.github.example/project1", 96 | "type": "COMPONENT", 97 | "namespace": "com.github.example", 98 | "name": "project", 99 | "version": "1.0.0", 100 | "publishedEpochMillis": null, 101 | "latestVersionInfo": { 102 | "version": "1.0.0", 103 | "timestampUnixWithMS": 0 104 | } 105 | } 106 | ] 107 | } 108 | ` 109 | _, err := w.Write([]byte(responseJSON)) 110 | if err != nil { 111 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /pkg/feeds/npm/README.md: -------------------------------------------------------------------------------- 1 | # npm Feed 2 | 3 | This feed allows polling of package updates from the repository.npmjs.org package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` Field can be supplied to the npm feed options to enable polling of package specific apis. This is much slower 8 | with large lists of packages, but it is much less likely to miss package updates between polling. 9 | 10 | ``` 11 | feeds: 12 | - type: npm 13 | options: 14 | packages: 15 | - lodash 16 | - react 17 | ``` -------------------------------------------------------------------------------- /pkg/feeds/npm/npm.go: -------------------------------------------------------------------------------- 1 | package npm 2 | 3 | import ( 4 | "crypto/tls" 5 | "encoding/json" 6 | "encoding/xml" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "net/http" 11 | "net/url" 12 | "sort" 13 | "sync" 14 | "time" 15 | 16 | lru "github.com/hashicorp/golang-lru/v2" 17 | 18 | "github.com/ossf/package-feeds/pkg/events" 19 | "github.com/ossf/package-feeds/pkg/feeds" 20 | "github.com/ossf/package-feeds/pkg/useragent" 21 | "github.com/ossf/package-feeds/pkg/utils" 22 | ) 23 | 24 | const ( 25 | FeedName = "npm" 26 | rssPath = "/-/rss" 27 | 28 | // rssLimit controls how many RSS results should be returned. 29 | // Can up to about 420 before the feed will consistently fail to return any data. 30 | // Lower numbers will sometimes fail too. Default value if not specified is 50. 31 | rssLimit = 200 32 | 33 | // fetchWorkers defines the totoal number of concurrent HTTP1 requests to 34 | // allow at any one time. 35 | fetchWorkers = 10 36 | 37 | // cacheEntryLimit defines how many responses to store in the LRU cache. 38 | // The value should be larger than rssLimit to ensure all rss entries can 39 | // be covered by a cache entry. 40 | cacheEntryLimit = 500 41 | ) 42 | 43 | var ( 44 | errJSON = errors.New("error unmarshaling json response internally") 45 | errUnpublished = errors.New("package is currently unpublished") 46 | ) 47 | 48 | type Response struct { 49 | PackageEvents []PackageEvent `xml:"channel>item"` 50 | } 51 | 52 | type Package struct { 53 | Title string 54 | CreatedDate time.Time 55 | Version string 56 | Unpublished bool 57 | } 58 | 59 | type PackageEvent struct { 60 | Title string `xml:"title"` 61 | } 62 | 63 | // cacheEntry is stored in an LRU cache for a given URL key. The cache is used 64 | // only by the fetchPackage function to avoid needlessly requesting the unchanged 65 | // content repeatedly. 66 | type cacheEntry struct { 67 | // ETag stores the value provided in the "Etag" header in a 200 OK response 68 | // returned for the URL key. The ETag is used in requests in the 69 | // "If-None-Match" header to only get the request body if the ETag no longer 70 | // matches. 71 | ETag string 72 | 73 | // Versions stores the data retrieved and returned by the fetchPackage 74 | // function below. 75 | Versions []*Package 76 | } 77 | 78 | // Returns a slice of PackageEvent{} structs. 79 | func fetchPackageEvents(feed Feed) ([]PackageEvent, error) { 80 | pkgURL, err := url.Parse(feed.baseURL) 81 | if err != nil { 82 | return nil, err 83 | } 84 | 85 | pkgURL = pkgURL.JoinPath(rssPath) 86 | q := pkgURL.Query() 87 | q.Set("limit", fmt.Sprintf("%d", rssLimit)) 88 | pkgURL.RawQuery = q.Encode() 89 | 90 | resp, err := feed.client.Get(pkgURL.String()) 91 | if err != nil { 92 | return nil, err 93 | } 94 | defer resp.Body.Close() 95 | 96 | if err := utils.CheckResponseStatus(resp); err != nil { 97 | return nil, fmt.Errorf("failed to fetch npm package data: %w", err) 98 | } 99 | 100 | rssResponse := &Response{} 101 | reader := utils.NewXMLReader(resp.Body, true) 102 | if err := xml.NewDecoder(reader).Decode(rssResponse); err != nil { 103 | return nil, err 104 | } 105 | 106 | return rssResponse.PackageEvents, nil 107 | } 108 | 109 | // Gets the package version & corresponding created date from NPM. Returns 110 | // a slice of {}Package. 111 | func fetchPackage(feed Feed, pkgTitle string) ([]*Package, error) { 112 | versionURL, err := url.JoinPath(feed.baseURL, pkgTitle) 113 | if err != nil { 114 | return nil, err 115 | } 116 | 117 | req, err := http.NewRequest("GET", versionURL, nil) 118 | if err != nil { 119 | return nil, err 120 | } 121 | 122 | e, inCache := feed.cache.Get(versionURL) 123 | if inCache && e != nil { 124 | // We found a cache entry, so do a conditional request that only returns 125 | // content if the etag has changed. 126 | req.Header.Add("If-None-Match", e.ETag) 127 | } 128 | 129 | resp, err := feed.client.Do(req) 130 | if err != nil { 131 | return nil, err 132 | } 133 | body, readErr := io.ReadAll(resp.Body) 134 | closeErr := resp.Body.Close() 135 | 136 | if inCache && e != nil && utils.IsNotModified(resp) { 137 | // We have a cached value and a 304 was returned, which means we can use 138 | // our cached value as the result of this function call. 139 | return e.Versions, nil 140 | } 141 | if err := utils.CheckResponseStatus(resp); err != nil { 142 | return nil, fmt.Errorf("failed to fetch npm package version data: %w", err) 143 | } 144 | 145 | if readErr != nil { 146 | return nil, readErr 147 | } 148 | if closeErr != nil { 149 | return nil, closeErr 150 | } 151 | etag := resp.Header.Get("etag") 152 | 153 | // We only care about the `time` field as it contains all the versions in 154 | // date order, from oldest to newest. 155 | // Using a struct for parsing also avoids the cost of deserializing data 156 | // that is ultimately unused. 157 | var packageDetails struct { 158 | Time map[string]interface{} `json:"time"` 159 | } 160 | 161 | if err := json.Unmarshal(body, &packageDetails); err != nil { 162 | return nil, fmt.Errorf("%w : %w for package %s", errJSON, err, pkgTitle) 163 | } 164 | versions := packageDetails.Time 165 | 166 | // If `unpublished` exists in the version map then at a given point in time 167 | // the package was 'entirely' removed, the packageEvent(s) received are for package 168 | // versions that no longer exist. For a given 24h period no further versions can 169 | // be uploaded, with any previous versions never being available again. 170 | // https://www.npmjs.com/policies/unpublish 171 | _, unPublished := versions["unpublished"] 172 | 173 | if unPublished { 174 | return nil, fmt.Errorf("%s %w", pkgTitle, errUnpublished) 175 | } 176 | 177 | // Remove redundant entries in map, we're only interested in actual version pairs. 178 | delete(versions, "created") 179 | delete(versions, "modified") 180 | 181 | // Create slice of Package{} to allow sorting of a slice, as maps 182 | // are unordered. 183 | versionSlice := []*Package{} 184 | for version, timestamp := range versions { 185 | date, err := time.Parse(time.RFC3339, timestamp.(string)) 186 | if err != nil { 187 | return nil, err 188 | } 189 | versionSlice = append(versionSlice, 190 | &Package{Title: pkgTitle, CreatedDate: date, Version: version}) 191 | } 192 | 193 | // Sort slice of versions into order of most recent. 194 | sort.SliceStable(versionSlice, func(i, j int) bool { 195 | return versionSlice[j].CreatedDate.Before(versionSlice[i].CreatedDate) 196 | }) 197 | 198 | if etag != "" { 199 | // Add the result to the cache, only if the the etag is actually present. 200 | // An etag should be present, but a server issue may result in the etag 201 | // not being included. 202 | feed.cache.Add(versionURL, &cacheEntry{ 203 | ETag: etag, 204 | Versions: versionSlice, 205 | }) 206 | } 207 | 208 | return versionSlice, nil 209 | } 210 | 211 | func fetchAllPackages(feed Feed) ([]*feeds.Package, []error) { 212 | pkgs := []*feeds.Package{} 213 | errs := []error{} 214 | packageChannel := make(chan []*Package) 215 | errChannel := make(chan error) 216 | packageEvents, err := fetchPackageEvents(feed) 217 | if err != nil { 218 | // If we can't generate package events then return early. 219 | return pkgs, append(errs, err) 220 | } 221 | // Handle the possibility of multiple releases of the same package 222 | // within the polled `packages` slice. 223 | uniquePackages := make(map[string]int) 224 | for _, pkg := range packageEvents { 225 | uniquePackages[pkg.Title]++ 226 | } 227 | 228 | // Start a collection of workers to fetch all the packages. 229 | // This limits the number of concurrent requests to avoid flooding the NPM 230 | // registry API with too many simultaneous requests. 231 | workChannel := make(chan struct { 232 | pkgTitle string 233 | count int 234 | }) 235 | 236 | // Define the fetcher function that grabs the repos from NPM 237 | fetcherFn := func(pkgTitle string, count int) { 238 | pkgs, err := fetchPackage(feed, pkgTitle) 239 | if err != nil { 240 | if !errors.Is(err, errUnpublished) { 241 | err = feeds.PackagePollError{Name: pkgTitle, Err: err} 242 | } 243 | errChannel <- err 244 | return 245 | } 246 | // Apply count slice, guard against a given events corresponding 247 | // version entry being unpublished by the time the specific 248 | // endpoint has been processed. This seemingly happens silently 249 | // without being recorded in the json. An `event` could be logged 250 | // here. 251 | if len(pkgs) > count { 252 | packageChannel <- pkgs[:count] 253 | } else { 254 | packageChannel <- pkgs 255 | } 256 | } 257 | 258 | // The WaitGroup is used to ensure all the goroutines are complete before 259 | // returning. 260 | var wg sync.WaitGroup 261 | 262 | // Start the fetcher workers. 263 | for i := 0; i < fetchWorkers; i++ { 264 | wg.Add(1) 265 | go func() { 266 | defer wg.Done() 267 | for { 268 | w, more := <-workChannel 269 | if !more { 270 | // If we have no more work then return. 271 | return 272 | } 273 | fetcherFn(w.pkgTitle, w.count) 274 | } 275 | }() 276 | } 277 | 278 | // Start a goroutine to push work to the workers. 279 | go func() { 280 | // Populate the worker feed. 281 | for pkgTitle, count := range uniquePackages { 282 | workChannel <- struct { 283 | pkgTitle string 284 | count int 285 | }{pkgTitle: pkgTitle, count: count} 286 | } 287 | 288 | // Close the channel to indicate that there is no more work. 289 | close(workChannel) 290 | }() 291 | 292 | // Collect all the work. 293 | for i := 0; i < len(uniquePackages); i++ { 294 | select { 295 | case npmPkgs := <-packageChannel: 296 | for _, pkg := range npmPkgs { 297 | feedPkg := feeds.NewPackage(pkg.CreatedDate, pkg.Title, 298 | pkg.Version, FeedName) 299 | pkgs = append(pkgs, feedPkg) 300 | } 301 | case err := <-errChannel: 302 | // When polling the 'firehose' unpublished packages 303 | // don't need to be logged as an error. 304 | if !errors.Is(err, errUnpublished) { 305 | errs = append(errs, err) 306 | } 307 | } 308 | } 309 | 310 | wg.Wait() 311 | 312 | return pkgs, errs 313 | } 314 | 315 | func fetchCriticalPackages(feed Feed, packages []string) ([]*feeds.Package, []error) { 316 | pkgs := []*feeds.Package{} 317 | errs := []error{} 318 | packageChannel := make(chan []*Package) 319 | errChannel := make(chan error) 320 | 321 | for _, pkgTitle := range packages { 322 | go func(pkgTitle string) { 323 | pkgs, err := fetchPackage(feed, pkgTitle) 324 | if err != nil { 325 | if !errors.Is(err, errUnpublished) { 326 | err = feeds.PackagePollError{Name: pkgTitle, Err: err} 327 | } 328 | errChannel <- err 329 | return 330 | } 331 | packageChannel <- pkgs 332 | }(pkgTitle) 333 | } 334 | 335 | for i := 0; i < len(packages); i++ { 336 | select { 337 | case npmPkgs := <-packageChannel: 338 | for _, pkg := range npmPkgs { 339 | feedPkg := feeds.NewPackage(pkg.CreatedDate, pkg.Title, 340 | pkg.Version, FeedName) 341 | pkgs = append(pkgs, feedPkg) 342 | } 343 | case err := <-errChannel: 344 | // Assume if a package has been unpublished that it is a valid reason 345 | // to log the error when polling for 'critical' packages. This could 346 | // be changed for a 'lossy' type event instead. Further packages should 347 | // be proccessed. 348 | errs = append(errs, err) 349 | } 350 | } 351 | return pkgs, errs 352 | } 353 | 354 | type Feed struct { 355 | packages *[]string 356 | lossyFeedAlerter *feeds.LossyFeedAlerter 357 | baseURL string 358 | options feeds.FeedOptions 359 | client *http.Client 360 | cache *lru.Cache[string, *cacheEntry] 361 | } 362 | 363 | func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { 364 | tr := http.DefaultTransport.(*http.Transport).Clone() 365 | // Disable HTTP2. HTTP2 flow control hurts performance for large concurrent 366 | // responses. 367 | tr.ForceAttemptHTTP2 = false 368 | tr.TLSNextProto = make(map[string]func(authority string, c *tls.Conn) http.RoundTripper) 369 | tr.TLSClientConfig.NextProtos = []string{"http/1.1"} 370 | 371 | tr.MaxIdleConns = 100 372 | tr.MaxIdleConnsPerHost = fetchWorkers 373 | tr.MaxConnsPerHost = fetchWorkers 374 | tr.IdleConnTimeout = 0 // No limit, try and reuse the idle connecitons. 375 | 376 | cache, err := lru.New[string, *cacheEntry](cacheEntryLimit) 377 | if err != nil { 378 | return nil, err 379 | } 380 | 381 | return &Feed{ 382 | packages: feedOptions.Packages, 383 | lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), 384 | baseURL: "https://registry.npmjs.org/", 385 | options: feedOptions, 386 | client: &http.Client{ 387 | Transport: &useragent.RoundTripper{ 388 | UserAgent: feeds.DefaultUserAgent, 389 | Parent: tr, 390 | }, 391 | Timeout: 45 * time.Second, 392 | }, 393 | cache: cache, 394 | }, nil 395 | } 396 | 397 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 398 | var pkgs []*feeds.Package 399 | var errs []error 400 | 401 | if feed.packages == nil { 402 | pkgs, errs = fetchAllPackages(feed) 403 | } else { 404 | pkgs, errs = fetchCriticalPackages(feed, *feed.packages) 405 | } 406 | 407 | if len(pkgs) == 0 { 408 | // If none of the packages were successfully polled for, return early. 409 | return nil, cutoff, append(errs, feeds.ErrNoPackagesPolled) 410 | } 411 | 412 | // Ensure packages are sorted by CreatedDate in order of most recent, as goroutine 413 | // concurrency isn't deterministic. 414 | sort.SliceStable(pkgs, func(i, j int) bool { 415 | return pkgs[j].CreatedDate.Before(pkgs[i].CreatedDate) 416 | }) 417 | 418 | // After sorting the first entry should be the largest. 419 | newCutoff := pkgs[0].CreatedDate 420 | 421 | // TODO: Add an event for checking if the previous package list contains entries 422 | // that do not exist in the latest package list when polling for critical packages. 423 | // This can highlight cases where specific versions have been unpublished. 424 | if feed.packages == nil { 425 | feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs) 426 | } 427 | 428 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 429 | return pkgs, newCutoff, errs 430 | } 431 | 432 | func (feed Feed) GetName() string { 433 | return FeedName 434 | } 435 | 436 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 437 | return feed.options 438 | } 439 | -------------------------------------------------------------------------------- /pkg/feeds/nuget/README.md: -------------------------------------------------------------------------------- 1 | # nuget Feed 2 | 3 | This feed allows polling of package updates from the nuget package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` field is not supported by the nuget feed. 8 | 9 | 10 | ``` 11 | feeds: 12 | - type: nuget 13 | ``` -------------------------------------------------------------------------------- /pkg/feeds/nuget/nuget.go: -------------------------------------------------------------------------------- 1 | package nuget 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "net/url" 9 | "time" 10 | 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | "github.com/ossf/package-feeds/pkg/useragent" 13 | "github.com/ossf/package-feeds/pkg/utils" 14 | ) 15 | 16 | const ( 17 | FeedName = "nuget" 18 | catalogServiceType = "Catalog/3.0.0" 19 | indexPath = "/v3/index.json" 20 | ) 21 | 22 | var ( 23 | httpClient = &http.Client{ 24 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 25 | Timeout: 10 * time.Second, 26 | } 27 | errCatalogService = errors.New("error fetching catalog service") 28 | ) 29 | 30 | type serviceIndex struct { 31 | Services []*nugetService `json:"resources"` 32 | } 33 | 34 | type nugetService struct { 35 | URI string `json:"@id"` 36 | Type string `json:"@type"` 37 | } 38 | 39 | type catalog struct { 40 | Pages []*catalogPage `json:"items"` 41 | } 42 | 43 | type catalogPage struct { 44 | URI string `json:"@id"` 45 | Created time.Time `json:"commitTimeStamp"` 46 | Packages []*catalogLeaf `json:"items"` 47 | } 48 | 49 | type catalogLeaf struct { 50 | URI string `json:"@id"` 51 | CatalogCreated time.Time `json:"commitTimeStamp"` 52 | Type string `json:"@type"` 53 | } 54 | 55 | type nugetPackageDetails struct { 56 | PackageID string `json:"id"` 57 | Version string `json:"version"` 58 | Created time.Time `json:"published"` 59 | } 60 | 61 | func fetchCatalogService(baseURL string) (*nugetService, error) { 62 | var err error 63 | catalogServiceURL, err := url.JoinPath(baseURL, indexPath) 64 | if err != nil { 65 | return nil, err 66 | } 67 | resp, err := httpClient.Get(catalogServiceURL) 68 | if err != nil { 69 | return nil, err 70 | } 71 | 72 | defer resp.Body.Close() 73 | 74 | err = utils.CheckResponseStatus(resp) 75 | if err != nil { 76 | return nil, fmt.Errorf("failed to fetch nuget catalog service: %w", err) 77 | } 78 | 79 | directory := &serviceIndex{} 80 | err = json.NewDecoder(resp.Body).Decode(directory) 81 | if err != nil { 82 | return nil, err 83 | } 84 | 85 | for _, service := range directory.Services { 86 | if service.Type == catalogServiceType { 87 | return service, nil 88 | } 89 | } 90 | return nil, fmt.Errorf("%w : could not locate catalog service for nuget feed %s", 91 | errCatalogService, catalogServiceURL) 92 | } 93 | 94 | func fetchCatalogPages(catalogURL string) ([]*catalogPage, error) { 95 | resp, err := httpClient.Get(catalogURL) 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | defer resp.Body.Close() 101 | 102 | err = utils.CheckResponseStatus(resp) 103 | if err != nil { 104 | return nil, fmt.Errorf("failed to fetch nuget catalog pages: %w", err) 105 | } 106 | 107 | c := &catalog{} 108 | err = json.NewDecoder(resp.Body).Decode(c) 109 | if err != nil { 110 | return nil, err 111 | } 112 | 113 | return c.Pages, nil 114 | } 115 | 116 | func fetchCatalogPage(catalogURL string) ([]*catalogLeaf, error) { 117 | resp, err := httpClient.Get(catalogURL) 118 | if err != nil { 119 | return nil, err 120 | } 121 | 122 | defer resp.Body.Close() 123 | 124 | err = utils.CheckResponseStatus(resp) 125 | if err != nil { 126 | return nil, fmt.Errorf("failed to fetch nuget catalog page: %w", err) 127 | } 128 | 129 | page := &catalogPage{} 130 | err = json.NewDecoder(resp.Body).Decode(page) 131 | if err != nil { 132 | return nil, err 133 | } 134 | 135 | return page.Packages, nil 136 | } 137 | 138 | func fetchPackageInfo(infoURL string) (*nugetPackageDetails, error) { 139 | resp, err := httpClient.Get(infoURL) 140 | if err != nil { 141 | return nil, err 142 | } 143 | 144 | defer resp.Body.Close() 145 | 146 | err = utils.CheckResponseStatus(resp) 147 | if err != nil { 148 | return nil, fmt.Errorf("failed to fetch nuget package data: %w", err) 149 | } 150 | 151 | packageDetail := &nugetPackageDetails{} 152 | err = json.NewDecoder(resp.Body).Decode(packageDetail) 153 | if err != nil { 154 | return nil, err 155 | } 156 | 157 | return packageDetail, nil 158 | } 159 | 160 | type Feed struct { 161 | baseURL string 162 | options feeds.FeedOptions 163 | } 164 | 165 | func New(feedOptions feeds.FeedOptions) (*Feed, error) { 166 | if feedOptions.Packages != nil { 167 | return nil, feeds.UnsupportedOptionError{ 168 | Feed: FeedName, 169 | Option: "packages", 170 | } 171 | } 172 | return &Feed{ 173 | baseURL: "https://api.nuget.org/", 174 | options: feedOptions, 175 | }, nil 176 | } 177 | 178 | // Latest will parse all creation events for packages in the nuget.org catalog feed 179 | // for packages that have been published since the cutoff 180 | // https://docs.microsoft.com/en-us/nuget/api/catalog-resource 181 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 182 | pkgs := []*feeds.Package{} 183 | var errs []error 184 | 185 | catalogService, err := fetchCatalogService(feed.baseURL) 186 | if err != nil { 187 | return nil, cutoff, append(errs, err) 188 | } 189 | 190 | catalogPages, err := fetchCatalogPages(catalogService.URI) 191 | if err != nil { 192 | return nil, cutoff, append(errs, err) 193 | } 194 | 195 | for _, catalogPage := range catalogPages { 196 | if catalogPage.Created.Before(cutoff) { 197 | continue 198 | } 199 | 200 | page, err := fetchCatalogPage(catalogPage.URI) 201 | if err != nil { 202 | errs = append(errs, err) 203 | continue 204 | } 205 | 206 | for _, catalogLeafNode := range page { 207 | if catalogLeafNode.CatalogCreated.Before(cutoff) { 208 | continue 209 | } 210 | 211 | if catalogLeafNode.Type != "nuget:PackageDetails" { 212 | continue // Not currently interested in package deletion events 213 | } 214 | 215 | pkgInfo, err := fetchPackageInfo(catalogLeafNode.URI) 216 | if err != nil { 217 | errs = append(errs, err) 218 | continue 219 | } 220 | 221 | pkg := feeds.NewPackage(pkgInfo.Created, pkgInfo.PackageID, pkgInfo.Version, FeedName) 222 | pkgs = append(pkgs, pkg) 223 | } 224 | } 225 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 226 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 227 | 228 | return pkgs, newCutoff, errs 229 | } 230 | 231 | func (feed Feed) GetName() string { 232 | return FeedName 233 | } 234 | 235 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 236 | return feed.options 237 | } 238 | -------------------------------------------------------------------------------- /pkg/feeds/nuget/nuget_test.go: -------------------------------------------------------------------------------- 1 | package nuget 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/url" 7 | "testing" 8 | "time" 9 | 10 | "github.com/ossf/package-feeds/pkg/feeds" 11 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 12 | ) 13 | 14 | var testEndpoint *url.URL 15 | 16 | func TestCanParseFeed(t *testing.T) { 17 | t.Parallel() 18 | var err error 19 | handlers := map[string]testutils.HTTPHandlerFunc{ 20 | indexPath: indexMock, 21 | "/v3/catalog0/index.json": catalogMock, 22 | "/v3/catalog0/page1.json": catalogPageMock, 23 | "/v3/catalog0/data/somecatalog/new.expected.package.0.0.1.json": packageDetailMock, 24 | } 25 | srv := testutils.HTTPServerMock(handlers) 26 | testEndpoint, err = url.Parse(srv.URL) 27 | if err != nil { 28 | t.Fatalf("Unexpected error during test url parsing: %v", err) 29 | } 30 | if err != nil { 31 | t.Fatal(err) 32 | } 33 | 34 | sut, err := New(feeds.FeedOptions{}) 35 | if err != nil { 36 | t.Fatalf("Failed to create nuget feed: %v", err) 37 | } 38 | sut.baseURL = srv.URL 39 | 40 | cutoff := time.Now().Add(-5 * time.Minute) 41 | 42 | results, gotCutoff, errs := sut.Latest(cutoff) 43 | if len(errs) != 0 { 44 | t.Fatal(errs[len(errs)-1]) 45 | } 46 | 47 | // Returned cutoff should match the newest package creation time of packages retrieved. 48 | // This time is automatically generated at approx 1 minutes ago. The threshold of 70s 49 | // ensures this test doesn't fail with the multiple mocked API requests. 50 | wantCutoff := time.Now().UTC() 51 | if gotCutoff == cutoff || gotCutoff.Sub(wantCutoff).Abs() > (70*time.Second) { 52 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 53 | } 54 | 55 | if len(results) != 1 { 56 | t.Fatalf("1 result expected but %d retrieved", len(results)) 57 | } 58 | 59 | const expectedName = "new.expected.package" 60 | const expectedVersion = "0.0.1" 61 | const expectedType = "nuget" 62 | result := results[0] 63 | 64 | if result.Name != expectedName { 65 | t.Fatalf("expected %s but %s was retrieved", expectedName, result.Name) 66 | } 67 | 68 | if result.Version != expectedVersion { 69 | t.Fatalf("expected version %s but %s was retrieved", expectedVersion, result.Version) 70 | } 71 | 72 | if result.Type != expectedType { 73 | t.Fatalf("expected type %s but %s was retrieved", expectedType, result.Type) 74 | } 75 | } 76 | 77 | func indexMock(w http.ResponseWriter, _ *http.Request) { 78 | var err error 79 | catalogEndpoint, err := makeTestURL("v3/catalog0/index.json") 80 | if err != nil { 81 | http.Error(w, err.Error(), http.StatusInternalServerError) 82 | } else { 83 | response := fmt.Sprintf(`{"resources": [{"@id": "%s", "@type": "Catalog/3.0.0"}]}`, 84 | catalogEndpoint) 85 | _, err = w.Write([]byte(response)) 86 | if err != nil { 87 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 88 | } 89 | } 90 | } 91 | 92 | func catalogMock(w http.ResponseWriter, _ *http.Request) { 93 | var err error 94 | pageEndpoint, err := makeTestURL("v3/catalog0/page1.json") 95 | if err != nil { 96 | http.Error(w, err.Error(), http.StatusInternalServerError) 97 | } else { 98 | response := fmt.Sprintf(`{"items": [{"@id": "%s", "commitTimeStamp": "%s"}]}`, 99 | pageEndpoint, time.Now().Format(time.RFC3339)) 100 | _, err = w.Write([]byte(response)) 101 | if err != nil { 102 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 103 | } 104 | } 105 | } 106 | 107 | func catalogPageMock(w http.ResponseWriter, _ *http.Request) { 108 | var err error 109 | pkgAdded := "nuget:PackageDetails" 110 | pkgDeleted := "nuget:PackageDelete" 111 | pkgTemplate := `{"@id": "%s", "@type": "%s", "commitTimeStamp": "%s"}` 112 | 113 | addedItemURL, err := makeTestURL("v3/catalog0/data/somecatalog/new.expected.package.0.0.1.json") 114 | if err != nil { 115 | http.Error(w, err.Error(), http.StatusInternalServerError) 116 | return 117 | } 118 | addedItem := fmt.Sprintf(pkgTemplate, 119 | addedItemURL, 120 | pkgAdded, time.Now().UTC().Format(time.RFC3339)) 121 | 122 | oldAddedItemURL, err := makeTestURL("v3/catalog0/data/somecatalog/old.not.expected.package.0.0.1.json") 123 | if err != nil { 124 | http.Error(w, err.Error(), http.StatusInternalServerError) 125 | return 126 | } 127 | oldAddedItem := fmt.Sprintf(pkgTemplate, 128 | oldAddedItemURL, 129 | pkgAdded, time.Now().UTC().Add(-10*time.Minute).Format(time.RFC3339)) 130 | 131 | deletedItemURL, err := makeTestURL("v3/catalog0/data/somecatalog/modified.not.expected.0.0.1.json") 132 | if err != nil { 133 | http.Error(w, err.Error(), http.StatusInternalServerError) 134 | return 135 | } 136 | deletedItem := fmt.Sprintf(pkgTemplate, 137 | deletedItemURL, 138 | pkgDeleted, time.Now().UTC().Format(time.RFC3339)) 139 | 140 | response := fmt.Sprintf(`{"items": [%s, %s, %s]}`, addedItem, deletedItem, oldAddedItem) 141 | 142 | _, err = w.Write([]byte(response)) 143 | if err != nil { 144 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 145 | } 146 | } 147 | 148 | func packageDetailMock(w http.ResponseWriter, _ *http.Request) { 149 | response := fmt.Sprintf(`{"id": "new.expected.package", "version": "0.0.1", "published": "%s"}`, 150 | time.Now().UTC().Add(-1*time.Minute).Format(time.RFC3339)) 151 | 152 | _, err := w.Write([]byte(response)) 153 | if err != nil { 154 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 155 | } 156 | } 157 | 158 | func makeTestURL(suffix string) (string, error) { 159 | path, err := url.Parse(suffix) 160 | if err != nil { 161 | return "", err 162 | } 163 | return testEndpoint.ResolveReference(path).String(), nil 164 | } 165 | -------------------------------------------------------------------------------- /pkg/feeds/packagist/README.md: -------------------------------------------------------------------------------- 1 | # packagist Feed 2 | 3 | This feed allows polling of package updates from the packagist package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` field is not supported by the packagist feed. 8 | 9 | 10 | ``` 11 | feeds: 12 | - type: packagist 13 | ``` -------------------------------------------------------------------------------- /pkg/feeds/packagist/doc.go: -------------------------------------------------------------------------------- 1 | // package packagist fetches packages updates from the API and static files from packagist 2 | // for getting the version information. We choose the API instead of the RSS feed as it includes 3 | // both newly created packages but also updated packages. 4 | package packagist 5 | -------------------------------------------------------------------------------- /pkg/feeds/packagist/packagist.go: -------------------------------------------------------------------------------- 1 | package packagist 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "net/url" 8 | "strconv" 9 | "time" 10 | 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | "github.com/ossf/package-feeds/pkg/useragent" 13 | "github.com/ossf/package-feeds/pkg/utils" 14 | ) 15 | 16 | const FeedName = "packagist" 17 | 18 | var httpClient = &http.Client{ 19 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 20 | Timeout: 10 * time.Second, 21 | } 22 | 23 | type response struct { 24 | Actions []actions `json:"actions"` 25 | Timestamp int64 `json:"timestamp"` 26 | } 27 | 28 | type actions struct { 29 | Type string `json:"type"` 30 | Package string `json:"package"` 31 | Time int64 `json:"time"` 32 | } 33 | 34 | type Feed struct { 35 | updateHost string 36 | versionHost string 37 | options feeds.FeedOptions 38 | } 39 | 40 | func New(feedOptions feeds.FeedOptions) (*Feed, error) { 41 | if feedOptions.Packages != nil { 42 | return nil, feeds.UnsupportedOptionError{ 43 | Feed: FeedName, 44 | Option: "packages", 45 | } 46 | } 47 | return &Feed{ 48 | updateHost: "https://packagist.org", 49 | versionHost: "https://repo.packagist.org", 50 | options: feedOptions, 51 | }, nil 52 | } 53 | 54 | func fetchPackages(updateHost string, since time.Time) ([]actions, error) { 55 | pkgURL, err := url.JoinPath(updateHost, "/metadata/changes.json") 56 | if err != nil { 57 | return nil, err 58 | } 59 | request, err := http.NewRequest(http.MethodGet, pkgURL, nil) 60 | if err != nil { 61 | return nil, err 62 | } 63 | values := request.URL.Query() 64 | sinceStr := strconv.FormatInt(since.Unix()*10000, 10) 65 | values.Add("since", sinceStr) 66 | request.URL.RawQuery = values.Encode() 67 | resp, err := httpClient.Do(request) 68 | if err != nil { 69 | return nil, err 70 | } 71 | defer resp.Body.Close() 72 | 73 | err = utils.CheckResponseStatus(resp) 74 | if err != nil { 75 | return nil, fmt.Errorf("failed to fetch packagist package data: %w", err) 76 | } 77 | 78 | apiResponse := &response{} 79 | err = json.NewDecoder(resp.Body).Decode(apiResponse) 80 | if err != nil { 81 | return nil, err 82 | } 83 | 84 | return apiResponse.Actions, nil 85 | } 86 | 87 | func fetchVersionInformation(versionHost string, action actions) ([]*feeds.Package, error) { 88 | resp, err := httpClient.Get(fmt.Sprintf("%s/p2/%s.json", versionHost, action.Package)) 89 | if err != nil { 90 | return nil, err 91 | } 92 | defer resp.Body.Close() 93 | 94 | err = utils.CheckResponseStatus(resp) 95 | if err != nil { 96 | return nil, fmt.Errorf("failed to fetch packagist package version data: %w", err) 97 | } 98 | 99 | versionResponse := &packages{} 100 | err = json.NewDecoder(resp.Body).Decode(versionResponse) 101 | if err != nil { 102 | return nil, err 103 | } 104 | 105 | pkgs := []*feeds.Package{} 106 | for pkgName, versions := range versionResponse.Packages { 107 | for _, version := range versions { 108 | pkg := feeds.NewPackage(version.Time, pkgName, version.Version, FeedName) 109 | if err != nil { 110 | continue 111 | } 112 | pkgs = append(pkgs, pkg) 113 | } 114 | } 115 | 116 | return pkgs, nil 117 | } 118 | 119 | // Latest returns all package updates of packagist packages since cutoff. 120 | func (f Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 121 | pkgs := []*feeds.Package{} 122 | var errs []error 123 | packages, err := fetchPackages(f.updateHost, cutoff) 124 | if err != nil { 125 | return nil, cutoff, append(errs, err) 126 | } 127 | for _, pkg := range packages { 128 | if time.Unix(pkg.Time, 0).Before(cutoff) { 129 | continue 130 | } 131 | if pkg.Type == "delete" { 132 | continue 133 | } 134 | updates, err := fetchVersionInformation(f.versionHost, pkg) 135 | if err != nil { 136 | errs = append(errs, fmt.Errorf("error in fetching version information: %w", err)) 137 | continue 138 | } 139 | pkgs = append(pkgs, updates...) 140 | } 141 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 142 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 143 | return pkgs, newCutoff, errs 144 | } 145 | 146 | func (f Feed) GetName() string { 147 | return FeedName 148 | } 149 | 150 | func (f Feed) GetFeedOptions() feeds.FeedOptions { 151 | return f.options 152 | } 153 | -------------------------------------------------------------------------------- /pkg/feeds/packagist/packagist_test.go: -------------------------------------------------------------------------------- 1 | package packagist 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "testing" 7 | "time" 8 | 9 | "github.com/ossf/package-feeds/pkg/feeds" 10 | "github.com/ossf/package-feeds/pkg/utils" 11 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 12 | ) 13 | 14 | func TestFetch(t *testing.T) { 15 | t.Parallel() 16 | 17 | handlers := map[string]testutils.HTTPHandlerFunc{ 18 | "/p2/": versionMock, 19 | "/metadata/changes.json": changesMock, 20 | } 21 | srv := testutils.HTTPServerMock(handlers) 22 | 23 | defer srv.Close() 24 | feed, err := New(feeds.FeedOptions{}) 25 | if err != nil { 26 | t.Fatalf("Failed to create packagist feed: %v", err) 27 | } 28 | feed.updateHost = srv.URL 29 | feed.versionHost = srv.URL 30 | 31 | cutoff := time.Unix(1614513658, 0) 32 | latest, gotCutoff, errs := feed.Latest(cutoff) 33 | if len(errs) != 0 { 34 | t.Fatalf("got error: %v", errs[len(errs)-1]) 35 | } 36 | if len(latest) == 0 { 37 | t.Fatalf("did not get any updates") 38 | } 39 | 40 | // Returned cutoff should match the newest package creation time of packages retrieved. 41 | wantCutoff := time.Date(2021, 2, 28, 12, 20, 3, 0, time.UTC) 42 | if gotCutoff.Sub(wantCutoff).Abs() > time.Second { 43 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 44 | } 45 | for _, pkg := range latest { 46 | if pkg.CreatedDate.Before(cutoff) { 47 | t.Fatalf("package returned that was updated before cutoff %f", pkg.CreatedDate.Sub(cutoff).Minutes()) 48 | } 49 | if pkg.Name == "to-delete/deleted-package" { 50 | t.Fatalf("pkg to-delete/deleted-package was deleted and should not be included here") 51 | } 52 | } 53 | } 54 | 55 | func TestPackagistNotFound(t *testing.T) { 56 | t.Parallel() 57 | 58 | handlers := map[string]testutils.HTTPHandlerFunc{ 59 | "/p2/": testutils.NotFoundHandlerFunc, 60 | "/metadata/changes.json": testutils.NotFoundHandlerFunc, 61 | } 62 | srv := testutils.HTTPServerMock(handlers) 63 | 64 | defer srv.Close() 65 | feed, err := New(feeds.FeedOptions{}) 66 | if err != nil { 67 | t.Fatalf("Failed to create packagist feed: %v", err) 68 | } 69 | feed.updateHost = srv.URL 70 | feed.versionHost = srv.URL 71 | 72 | cutoff := time.Unix(1614513658, 0) 73 | _, gotCutoff, errs := feed.Latest(cutoff) 74 | if cutoff != gotCutoff { 75 | t.Error("feed.Latest() cutoff should be unchanged if an error is returned") 76 | } 77 | if len(errs) != 1 { 78 | t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs)) 79 | } 80 | if !errors.Is(errs[len(errs)-1], utils.ErrUnsuccessfulRequest) { 81 | t.Fatalf("feed.Latest() returned an error which did not match the expected error") 82 | } 83 | } 84 | 85 | func changesMock(w http.ResponseWriter, r *http.Request) { 86 | if r.URL.Query().Get("since") == "" { 87 | w.WriteHeader(http.StatusBadRequest) 88 | _, err := w.Write([]byte(`{"error":"Invalid or missing \u0022since\u0022 query parameter, 89 | make sure you store the timestamp at the initial point you started mirroring, 90 | then send that to begin receiving changes, 91 | e.g. https:\/\/packagist.org\/metadata\/changes.json?since=16145146222715 for example." 92 | ,"timestamp":16145146222715}`)) 93 | if err != nil { 94 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 95 | } 96 | } 97 | _, err := w.Write([]byte(`{"actions":[{"type":"delete","package":"to-delete/deleted-package", 98 | "time":1614513806},{"type":"update","package":"ossf/package","time":1614514502}, 99 | {"type":"update","package":"ossf/package~dev","time":1614514502}],"timestamp":16145145025048}`)) 100 | if err != nil { 101 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 102 | } 103 | } 104 | 105 | func versionMock(w http.ResponseWriter, r *http.Request) { 106 | m := map[string]string{ 107 | "/p2/ossf/package.json": `{"packages":{"ossf/package":[{"name":"ossf/package", 108 | "description":"Lorem Ipsum","keywords":["Lorem Ipsum 1","Lorem Ipsum 2"],"homepage":"", 109 | "version":"v1.0.0","version_normalized":"1.0.0.0","license":["MIT"], 110 | "authors":[{"name":"John Doe","email":"john.doe@local"}], 111 | "source":{"type":"git","url":"https://github.com/ossf/package.git","reference": 112 | "c3afaa087afb42bfaf40fc3cda1252cd9a653d7f"},"dist":{"type":"zip","url": 113 | "https://api.github.com/repos/ossf/package/zipball/c3afaa087afb42bfaf40fc3cda1252cd9a653d7f", 114 | "reference":"c3afaa087afb42bfaf40fc3cda1252cd9a653d7f","shasum":""},"type":"library", 115 | "time":"2021-02-28T12:20:03+00:00","autoload":{"psr-4":{"package\\":"src/"}}, 116 | "require":{"php":"^8.0","guzzlehttp/guzzle":"^7.2"}, 117 | "require-dev":{"codeception/codeception":"^4.1","codeception/module-phpbrowser":"^1.0.0", 118 | "codeception/module-asserts":"^1.0.0","hoa/console":"^3.17"}, 119 | "support":{"issues":"https://github.com/ossf/package/issues", 120 | "source":"https://github.com/ossf/package/tree/v1.0.0"}}]},"minified":"composer/2.0"}`, 121 | "/p2/ossf/package~dev.json": `{"packages":{"ossf/package":[{"name":"ossf/package", 122 | "description":"Lorem Ipsum","keywords":["Lorem Ipsum 1","Lorem Ipsum 2"],"homepage":"", 123 | "version":"dev-master","version_normalized":"dev-master","license":["MIT"], 124 | "authors":[{"name":"John Doe","email":"john.doe@local"}], 125 | "source":{"type":"git","url":"https://github.com/ossf/package.git","reference": 126 | "c3afaa087afb42bfaf40fc3cda1252cd9a653d7f"},"dist":{"type":"zip","url": 127 | "https://api.github.com/repos/ossf/package/zipball/c3afaa087afb42bfaf40fc3cda1252cd9a653d7f", 128 | "reference":"c3afaa087afb42bfaf40fc3cda1252cd9a653d7f","shasum":""},"type":"library", 129 | "time":"2021-02-28T12:20:03+00:00","autoload":{"psr-4":{"package\\":"src/"}}, 130 | "default-branch":true,"require":{"php":"^8.0","guzzlehttp/guzzle":"^7.2"}, 131 | "require-dev":{"codeception/codeception":"^4.1","codeception/module-phpbrowser":"^1.0.0", 132 | "codeception/module-asserts":"^1.0.0","hoa/console":"^3.17"}, 133 | "support":{"issues":"https://github.com/ossf/package/issues", 134 | "source":"https://github.com/ossf/package/tree/v1.0.0"}}]},"minified":"composer/2.0"}`, 135 | } 136 | for u, response := range m { 137 | if u == r.URL.String() { 138 | _, err := w.Write([]byte(response)) 139 | if err != nil { 140 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 141 | } 142 | return 143 | } 144 | } 145 | http.NotFound(w, r) 146 | } 147 | -------------------------------------------------------------------------------- /pkg/feeds/packagist/version.go: -------------------------------------------------------------------------------- 1 | package packagist 2 | 3 | import "time" 4 | 5 | type versionInfo struct { 6 | Version string `json:"version"` 7 | VersionNormalized string `json:"version_normalized"` 8 | License []string `json:"license,omitempty"` 9 | Time time.Time `json:"time"` 10 | Name string `json:"name,omitempty"` 11 | } 12 | 13 | type packages struct { 14 | Packages map[string][]versionInfo `json:"packages"` 15 | } 16 | -------------------------------------------------------------------------------- /pkg/feeds/pypi/README.md: -------------------------------------------------------------------------------- 1 | # PyPI Feed 2 | 3 | This feed allows polling of package updates from the PyPI package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` Field can be supplied to the PyPI feed options to enable polling of package specific apis. 8 | This is less effective with large lists of packages as it polls the RSS feed for each package individually, 9 | but it is much less likely to miss package updates between polling. 10 | 11 | 12 | ``` 13 | feeds: 14 | - type: pypi 15 | options: 16 | packages: 17 | - numpy 18 | - scipy 19 | ``` 20 | 21 | # PyPI Artifacts Feed 22 | 23 | This feed allows polling of PyPI package updates using the 24 | [XML-RPC feed](https://warehouse.pypa.io/api-reference/xml-rpc.html#mirroring-support). 25 | This feed contains extra information compared to the other PyPI feed in this project. 26 | In particular, this avoids missing upstream notifications when platform-specific archives are 27 | uploaded for a package some time after the release was made. Furthermore, the `artifact_id` 28 | field in the output JSON schema is populated with the filename of the release tarball. 29 | 30 | 31 | ## Configuration 32 | 33 | No configuration; all package updates are monitored. In particular, `FeedOptions.Packages` is not supported 34 | ``` 35 | feeds: 36 | - type: pypi-v2 37 | ``` 38 | -------------------------------------------------------------------------------- /pkg/feeds/pypi/pypi.go: -------------------------------------------------------------------------------- 1 | package pypi 2 | 3 | import ( 4 | "encoding/xml" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "net/url" 9 | "strings" 10 | "time" 11 | 12 | "github.com/ossf/package-feeds/pkg/events" 13 | "github.com/ossf/package-feeds/pkg/feeds" 14 | "github.com/ossf/package-feeds/pkg/useragent" 15 | "github.com/ossf/package-feeds/pkg/utils" 16 | ) 17 | 18 | const ( 19 | FeedName = "pypi" 20 | updatesPath = "/rss/updates.xml" 21 | packagePathFormat = "/rss/project/%s/releases.xml" 22 | ) 23 | 24 | var ( 25 | httpClient = &http.Client{ 26 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 27 | Timeout: 10 * time.Second, 28 | } 29 | errInvalidLinkForPackage = errors.New("invalid link provided by pypi API") 30 | ) 31 | 32 | type Response struct { 33 | Packages []*Package `xml:"channel>item"` 34 | } 35 | 36 | type Package struct { 37 | Title string `xml:"title"` 38 | CreatedDate rfc1123Time `xml:"pubDate"` 39 | Link string `xml:"link"` 40 | } 41 | 42 | func (p *Package) Name() (string, error) { 43 | // The XML Link splits to: []string{"https:", "", "pypi.org", "project", "foopy", "2.1", ""} 44 | parts := strings.Split(p.Link, "/") 45 | if len(parts) < 5 { 46 | return "", errInvalidLinkForPackage 47 | } 48 | return parts[len(parts)-3], nil 49 | } 50 | 51 | func (p *Package) Version() (string, error) { 52 | // The XML Link splits to: []string{"https:", "", "pypi.org", "project", "foopy", "2.1", ""} 53 | parts := strings.Split(p.Link, "/") 54 | if len(parts) < 5 { 55 | return "", errInvalidLinkForPackage 56 | } 57 | return parts[len(parts)-2], nil 58 | } 59 | 60 | type rfc1123Time struct { 61 | time.Time 62 | } 63 | 64 | func (t *rfc1123Time) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { 65 | var marshaledTime string 66 | err := d.DecodeElement(&marshaledTime, &start) 67 | if err != nil { 68 | return err 69 | } 70 | decodedTime, err := time.Parse(time.RFC1123, marshaledTime) 71 | if err != nil { 72 | return err 73 | } 74 | *t = rfc1123Time{decodedTime} 75 | return nil 76 | } 77 | 78 | func fetchPackages(baseURL string) ([]*Package, error) { 79 | pkgURL, err := url.JoinPath(baseURL, updatesPath) 80 | if err != nil { 81 | return nil, err 82 | } 83 | resp, err := httpClient.Get(pkgURL) 84 | if err != nil { 85 | return nil, err 86 | } 87 | defer resp.Body.Close() 88 | 89 | err = utils.CheckResponseStatus(resp) 90 | if err != nil { 91 | return nil, fmt.Errorf("failed to fetch pypi package data: %w", err) 92 | } 93 | 94 | rssResponse := &Response{} 95 | reader := utils.NewXMLReader(resp.Body, true) 96 | err = xml.NewDecoder(reader).Decode(rssResponse) 97 | if err != nil { 98 | return nil, err 99 | } 100 | return rssResponse.Packages, nil 101 | } 102 | 103 | func fetchCriticalPackages(baseURL string, packageList []string) ([]*Package, []error) { 104 | responseChannel := make(chan *Response) 105 | errChannel := make(chan error) 106 | 107 | for _, pkgName := range packageList { 108 | go func(pkgName string) { 109 | packageDataPath := fmt.Sprintf(packagePathFormat, pkgName) 110 | pkgURL, err := url.JoinPath(baseURL, packageDataPath) 111 | if err != nil { 112 | errChannel <- feeds.PackagePollError{Name: pkgName, Err: err} 113 | return 114 | } 115 | resp, err := httpClient.Get(pkgURL) 116 | if err != nil { 117 | errChannel <- feeds.PackagePollError{Name: pkgName, Err: err} 118 | return 119 | } 120 | defer resp.Body.Close() 121 | 122 | err = utils.CheckResponseStatus(resp) 123 | if err != nil { 124 | errChannel <- feeds.PackagePollError{Name: pkgName, Err: fmt.Errorf("failed to fetch pypi package data: %w", err)} 125 | return 126 | } 127 | 128 | rssResponse := &Response{} 129 | reader := utils.NewXMLReader(resp.Body, true) 130 | err = xml.NewDecoder(reader).Decode(rssResponse) 131 | if err != nil { 132 | errChannel <- feeds.PackagePollError{Name: pkgName, Err: err} 133 | return 134 | } 135 | 136 | responseChannel <- rssResponse 137 | }(pkgName) 138 | } 139 | 140 | pkgs := []*Package{} 141 | errs := []error{} 142 | for i := 0; i < len(packageList); i++ { 143 | select { 144 | case response := <-responseChannel: 145 | pkgs = append(pkgs, response.Packages...) 146 | case err := <-errChannel: 147 | errs = append(errs, err) 148 | } 149 | } 150 | return pkgs, errs 151 | } 152 | 153 | type Feed struct { 154 | packages *[]string 155 | 156 | lossyFeedAlerter *feeds.LossyFeedAlerter 157 | baseURL string 158 | 159 | options feeds.FeedOptions 160 | } 161 | 162 | func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { 163 | return &Feed{ 164 | packages: feedOptions.Packages, 165 | lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), 166 | baseURL: "https://pypi.org/", 167 | options: feedOptions, 168 | }, nil 169 | } 170 | 171 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 172 | pkgs := []*feeds.Package{} 173 | var pypiPackages []*Package 174 | var errs []error 175 | var err error 176 | 177 | if feed.packages == nil { 178 | // Firehose fetch all packages. 179 | // If this fails then we need to return, as it's the only source of 180 | // data. 181 | pypiPackages, err = fetchPackages(feed.baseURL) 182 | if err != nil { 183 | return nil, cutoff, append(errs, err) 184 | } 185 | } else { 186 | // Fetch specific packages individually from configured packages list. 187 | pypiPackages, errs = fetchCriticalPackages(feed.baseURL, *feed.packages) 188 | if len(pypiPackages) == 0 { 189 | // If none of the packages were successfully polled for, return early. 190 | return nil, cutoff, append(errs, feeds.ErrNoPackagesPolled) 191 | } 192 | } 193 | 194 | for _, pkg := range pypiPackages { 195 | pkgName, err := pkg.Name() 196 | if err != nil { 197 | errs = append(errs, err) 198 | continue 199 | } 200 | pkgVersion, err := pkg.Version() 201 | if err != nil { 202 | errs = append(errs, err) 203 | continue 204 | } 205 | pkg := feeds.NewPackage(pkg.CreatedDate.Time, pkgName, pkgVersion, FeedName) 206 | pkgs = append(pkgs, pkg) 207 | } 208 | 209 | // Lossy feed detection is only necessary for firehose fetching 210 | if feed.packages == nil { 211 | feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs) 212 | } 213 | 214 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 215 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 216 | return pkgs, newCutoff, errs 217 | } 218 | 219 | func (feed Feed) GetPackageList() *[]string { 220 | return feed.packages 221 | } 222 | 223 | func (feed Feed) GetName() string { 224 | return FeedName 225 | } 226 | 227 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 228 | return feed.options 229 | } 230 | -------------------------------------------------------------------------------- /pkg/feeds/pypi/pypi_artifacts.go: -------------------------------------------------------------------------------- 1 | package pypi 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "time" 7 | 8 | "github.com/kolo/xmlrpc" 9 | 10 | "github.com/ossf/package-feeds/pkg/feeds" 11 | "github.com/ossf/package-feeds/pkg/useragent" 12 | ) 13 | 14 | const ( 15 | ArtifactFeedName = "pypi-artifacts" 16 | ) 17 | 18 | // We care about changelog entries where the action is 'add X file '. 19 | var archiveUploadAction = regexp.MustCompile("add (.*) file (.*)") 20 | 21 | type ArtifactFeed struct { 22 | baseURL string 23 | options feeds.FeedOptions 24 | } 25 | 26 | func NewArtifactFeed(feedOptions feeds.FeedOptions) (*ArtifactFeed, error) { 27 | return &ArtifactFeed{ 28 | baseURL: "https://pypi.org/pypi", 29 | options: feedOptions, 30 | }, nil 31 | } 32 | 33 | func (feed ArtifactFeed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 34 | client, err := xmlrpc.NewClient(feed.baseURL, &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}) 35 | if err != nil { 36 | return nil, cutoff, []error{err} 37 | } 38 | 39 | changelogEntries, err := getPyPIChangeLog(client, cutoff) 40 | if err != nil { 41 | return nil, cutoff, []error{err} 42 | } 43 | 44 | pkgs := getUploadedArtifacts(changelogEntries) 45 | return pkgs, feeds.FindCutoff(cutoff, pkgs), nil 46 | } 47 | 48 | func (feed ArtifactFeed) GetFeedOptions() feeds.FeedOptions { 49 | return feed.options 50 | } 51 | 52 | func (feed ArtifactFeed) GetName() string { 53 | return ArtifactFeedName 54 | } 55 | 56 | type pypiChangelogEntry struct { 57 | Name string 58 | Version string 59 | Timestamp time.Time 60 | Action string 61 | ArchiveName string 62 | } 63 | 64 | func (e *pypiChangelogEntry) isArchiveUpload() bool { 65 | return e.ArchiveName != "" 66 | } 67 | 68 | func (e *pypiChangelogEntry) String() string { 69 | return fmt.Sprintf("%s (%s): %s ts=%s", e.Name, e.Version, e.Action, e.Timestamp) 70 | } 71 | 72 | // getPyPIChangeLog returns a list of PyPI changelog entries since the given timestamp 73 | // defined by https://warehouse.pypa.io/api-reference/xml-rpc.html#changelog-since-with-ids-false 74 | func getPyPIChangeLog(client *xmlrpc.Client, since time.Time) ([]pypiChangelogEntry, error) { 75 | // Raw result structure is array[array[string, string|nil, int64, string (, int64 if with_ids=true) ]] 76 | // which cannot be represented in Go (struct mapping is not supported by library) 77 | var result [][]interface{} 78 | if err := client.Call("changelog", []interface{}{since.Unix(), false}, &result); err != nil { 79 | return nil, err 80 | } 81 | 82 | return processRawChangelog(result), nil 83 | } 84 | 85 | func processRawChangelog(apiResult [][]interface{}) []pypiChangelogEntry { 86 | changelogEntries := make([]pypiChangelogEntry, len(apiResult)) 87 | for i, r := range apiResult { 88 | changelogEntries[i] = processRawChangelogItem(r) 89 | } 90 | 91 | return changelogEntries 92 | } 93 | 94 | func processRawChangelogItem(data []interface{}) pypiChangelogEntry { 95 | /* 96 | Each item of the changelog contains the following fields: 97 | name: string 98 | version: string (nullable) 99 | timestamp: int64 100 | action: string 101 | */ 102 | name, ok := data[0].(string) 103 | if !ok { 104 | name = "" 105 | } 106 | version, ok := data[1].(string) 107 | if !ok { 108 | version = "" 109 | } 110 | unixTimestamp, ok := data[2].(int64) 111 | if !ok { 112 | unixTimestamp = 0 113 | } 114 | action, ok := data[3].(string) 115 | if !ok { 116 | action = "" 117 | } 118 | 119 | archiveName := "" 120 | // Changelog entries corresponding to new archives being added have an action string 121 | // that looks like 'add file '. This code is generated by 122 | // github.com/pypi/warehouse/blob/3bfd3e0d32e7396582d9635a316f1a47e407304d/warehouse/forklift/legacy.py#L1355 123 | if match := archiveUploadAction.FindStringSubmatch(action); match != nil { 124 | // it's a new archive! 125 | archiveName = match[2] 126 | } 127 | 128 | return pypiChangelogEntry{ 129 | Name: name, 130 | Version: version, 131 | Timestamp: time.Unix(unixTimestamp, 0), 132 | Action: action, 133 | ArchiveName: archiveName, 134 | } 135 | } 136 | 137 | func getUploadedArtifacts(changelogEntries []pypiChangelogEntry) []*feeds.Package { 138 | var pkgs []*feeds.Package 139 | for _, e := range changelogEntries { 140 | if e.isArchiveUpload() { 141 | pkgs = append(pkgs, feeds.NewArtifact(e.Timestamp, e.Name, e.Version, e.ArchiveName, ArtifactFeedName)) 142 | } 143 | } 144 | 145 | return pkgs 146 | } 147 | -------------------------------------------------------------------------------- /pkg/feeds/pypi/pypi_test.go: -------------------------------------------------------------------------------- 1 | package pypi 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "strings" 7 | "testing" 8 | "time" 9 | 10 | "github.com/ossf/package-feeds/pkg/events" 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 13 | ) 14 | 15 | func TestPypiLatest(t *testing.T) { 16 | t.Parallel() 17 | 18 | handlers := map[string]testutils.HTTPHandlerFunc{ 19 | updatesPath: updatesXMLHandle, 20 | } 21 | srv := testutils.HTTPServerMock(handlers) 22 | 23 | feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) 24 | if err != nil { 25 | t.Fatalf("Failed to create new pypi feed: %v", err) 26 | } 27 | feed.baseURL = srv.URL 28 | 29 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 30 | pkgs, gotCutoff, errs := feed.Latest(cutoff) 31 | if len(errs) != 0 { 32 | t.Fatalf("feed.Latest returned error: %v", err) 33 | } 34 | 35 | // Returned cutoff should match the newest package creation time of packages retrieved. 36 | wantCutoff := time.Date(2021, 3, 19, 12, 1, 4, 0, time.UTC) 37 | if gotCutoff.Sub(wantCutoff).Abs() > time.Second { 38 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 39 | } 40 | 41 | if pkgs[0].Name != "FooPackage" { 42 | t.Errorf("Unexpected package `%s` found in place of expected `FooPackage`", pkgs[0].Name) 43 | } 44 | if pkgs[1].Name != "BarPackage" { 45 | t.Errorf("Unexpected package `%s` found in place of expected `BarPackage`", pkgs[1].Name) 46 | } 47 | if pkgs[0].Version != "0.0.2" { 48 | t.Errorf("Unexpected version `%s` found in place of expected `0.0.2`", pkgs[0].Version) 49 | } 50 | if pkgs[1].Version != "0.7a2" { 51 | t.Errorf("Unexpected version `%s` found in place of expected `0.7a2`", pkgs[1].Version) 52 | } 53 | 54 | for _, p := range pkgs { 55 | if p.Type != FeedName { 56 | t.Errorf("Feed type not set correctly in pypi package following Latest()") 57 | } 58 | } 59 | } 60 | 61 | func TestPypiCriticalLatest(t *testing.T) { 62 | t.Parallel() 63 | 64 | handlers := map[string]testutils.HTTPHandlerFunc{ 65 | "/rss/project/foopy/releases.xml": foopyReleasesResponse, 66 | "/rss/project/barpy/releases.xml": barpyReleasesResponse, 67 | } 68 | packages := []string{ 69 | "foopy", 70 | "barpy", 71 | } 72 | srv := testutils.HTTPServerMock(handlers) 73 | 74 | feed, err := New(feeds.FeedOptions{ 75 | Packages: &packages, 76 | }, events.NewNullHandler()) 77 | if err != nil { 78 | t.Fatalf("Failed to create pypi feed: %v", err) 79 | } 80 | feed.baseURL = srv.URL 81 | 82 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 83 | pkgs, gotCutoff, errs := feed.Latest(cutoff) 84 | if len(errs) != 0 { 85 | t.Fatalf("Failed to call Latest() with err: %v", errs[len(errs)-1]) 86 | } 87 | 88 | // Returned cutoff should match the newest package creation time of packages retrieved. 89 | wantCutoff := time.Date(2021, 3, 27, 22, 16, 26, 0, time.UTC) 90 | if gotCutoff.Sub(wantCutoff).Abs() > time.Second { 91 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 92 | } 93 | 94 | const expectedNumPackages = 4 95 | if len(pkgs) != expectedNumPackages { 96 | t.Fatalf("Latest() produced %v packages instead of the expected %v", len(pkgs), expectedNumPackages) 97 | } 98 | pkgMap := map[string]map[string]*feeds.Package{} 99 | pkgMap["foopy"] = map[string]*feeds.Package{} 100 | pkgMap["barpy"] = map[string]*feeds.Package{} 101 | 102 | for _, pkg := range pkgs { 103 | pkgMap[pkg.Name][pkg.Version] = pkg 104 | } 105 | 106 | if _, ok := pkgMap["foopy"]["2.1"]; !ok { 107 | t.Fatalf("Missing foopy 2.1") 108 | } 109 | if _, ok := pkgMap["foopy"]["2.0"]; !ok { 110 | t.Fatalf("Missing foopy 2.0") 111 | } 112 | if _, ok := pkgMap["barpy"]["1.1"]; !ok { 113 | t.Fatalf("Missing barpy 1.1") 114 | } 115 | if _, ok := pkgMap["barpy"]["1.0"]; !ok { 116 | t.Fatalf("Missing barpy 1.0") 117 | } 118 | } 119 | 120 | func TestPypiAllNotFound(t *testing.T) { 121 | t.Parallel() 122 | 123 | handlers := map[string]testutils.HTTPHandlerFunc{ 124 | "/rss/project/foopy/releases.xml": testutils.NotFoundHandlerFunc, 125 | "/rss/project/barpy/releases.xml": testutils.NotFoundHandlerFunc, 126 | } 127 | packages := []string{ 128 | "foopy", 129 | "barpy", 130 | } 131 | srv := testutils.HTTPServerMock(handlers) 132 | 133 | feed, err := New(feeds.FeedOptions{ 134 | Packages: &packages, 135 | }, events.NewNullHandler()) 136 | if err != nil { 137 | t.Fatalf("Failed to create pypi feed: %v", err) 138 | } 139 | feed.baseURL = srv.URL 140 | 141 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 142 | _, gotCutoff, errs := feed.Latest(cutoff) 143 | if cutoff != gotCutoff { 144 | t.Error("feed.Latest() cutoff should be unchanged if an error is returned") 145 | } 146 | if len(errs) != 3 { 147 | t.Fatalf("feed.Latest() returned %v errors when 3 were expected", len(errs)) 148 | } 149 | if !errors.Is(errs[len(errs)-1], feeds.ErrNoPackagesPolled) { 150 | t.Fatalf("feed.Latest() returned an error which did not match the expected error") 151 | } 152 | } 153 | 154 | func TestPypiCriticalPartialNotFound(t *testing.T) { 155 | t.Parallel() 156 | 157 | handlers := map[string]testutils.HTTPHandlerFunc{ 158 | "/rss/project/foopy/releases.xml": foopyReleasesResponse, 159 | "/rss/project/barpy/releases.xml": testutils.NotFoundHandlerFunc, 160 | } 161 | packages := []string{ 162 | "foopy", 163 | "barpy", 164 | } 165 | srv := testutils.HTTPServerMock(handlers) 166 | 167 | feed, err := New(feeds.FeedOptions{ 168 | Packages: &packages, 169 | }, events.NewNullHandler()) 170 | if err != nil { 171 | t.Fatalf("Failed to create pypi feed: %v", err) 172 | } 173 | feed.baseURL = srv.URL 174 | 175 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 176 | pkgs, _, errs := feed.Latest(cutoff) 177 | if len(errs) != 1 { 178 | t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs)) 179 | } 180 | if !strings.Contains(errs[len(errs)-1].Error(), "barpy") { 181 | t.Fatalf("Failed to correctly include the package name in feeds.PackagePollError, instead: %v", errs[len(errs)-1]) 182 | } 183 | if !strings.Contains(errs[len(errs)-1].Error(), "404") { 184 | t.Fatalf("Failed to wrapped expected 404 error in feeds.PackagePollError, instead: %v", errs[len(errs)-1]) 185 | } 186 | if len(pkgs) != 2 { 187 | t.Fatalf("Latest() produced %v packages instead of the expected %v", len(pkgs), 2) 188 | } 189 | } 190 | 191 | // Mock data for pypi firehose with all packages. 192 | func updatesXMLHandle(w http.ResponseWriter, _ *http.Request) { 193 | _, err := w.Write([]byte(` 194 | 195 | 196 | 197 | PyPI recent updates 198 | https://pypi.org/ 199 | Recent updates to the Python Package Index 200 | en 201 | 202 | FooPackage 0.0.2 203 | https://pypi.org/project/FooPackage/0.0.2/ 204 | Python wrapper for fooing 205 | fooman@bazco.org 206 | Fri, 19 Mar 2021 12:01:04 GMT 207 | 208 | 209 | BarPackage 0.7a2 210 | https://pypi.org/project/BarPackage/0.7a2/ 211 | A package full of bars 212 | barman@bazco.org 213 | Fri, 19 Mar 2021 12:00:39 GMT 214 | 215 | 216 | 217 | `)) 218 | if err != nil { 219 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 220 | } 221 | } 222 | 223 | // Mock data response for package specific api when pypi is configured with 224 | // a package list in FeedOptions. 225 | func foopyReleasesResponse(w http.ResponseWriter, _ *http.Request) { 226 | _, err := w.Write([]byte(` 227 | 228 | 229 | 230 | PyPI recent updates for foopy 231 | https://pypi.org/project/foopy/ 232 | Recent updates to the Python Package Index for foopy 233 | en 234 | 235 | 2.1 236 | https://pypi.org/project/foopy/2.1/ 237 | Sat, 27 Mar 2021 22:16:26 GMT 238 | 239 | 240 | 2.0 241 | https://pypi.org/project/foopy/2.0/ 242 | Sun, 23 Sep 2018 16:50:37 GMT 243 | 244 | 245 | 246 | `)) 247 | if err != nil { 248 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 249 | } 250 | } 251 | 252 | // Mock data response for package specific api when pypi is configured with 253 | // a package list in FeedOptions. 254 | func barpyReleasesResponse(w http.ResponseWriter, _ *http.Request) { 255 | _, err := w.Write([]byte(` 256 | 257 | 258 | 259 | PyPI recent updates for barpy 260 | https://pypi.org/project/barpy/ 261 | Recent updates to the Python Package Index for barpy 262 | en 263 | 264 | 1.1 265 | https://pypi.org/project/barpy/1.1/ 266 | Sat, 27 Mar 2021 22:16:26 GMT 267 | 268 | 269 | 1.0 270 | https://pypi.org/project/barpy/1.0/ 271 | Sun, 23 Sep 2018 16:50:37 GMT 272 | 273 | 274 | 275 | `)) 276 | if err != nil { 277 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /pkg/feeds/rubygems/README.md: -------------------------------------------------------------------------------- 1 | # rubygems Feed 2 | 3 | This feed allows polling of package updates from the rubygems package repository. 4 | 5 | ## Configuration options 6 | 7 | The `packages` field is not supported by the rubygems feed. 8 | 9 | 10 | ``` 11 | feeds: 12 | - type: rubygems 13 | ``` -------------------------------------------------------------------------------- /pkg/feeds/rubygems/rubygems.go: -------------------------------------------------------------------------------- 1 | package rubygems 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "net/url" 8 | "time" 9 | 10 | "github.com/ossf/package-feeds/pkg/events" 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | "github.com/ossf/package-feeds/pkg/useragent" 13 | "github.com/ossf/package-feeds/pkg/utils" 14 | ) 15 | 16 | const ( 17 | FeedName = "rubygems" 18 | activityPath = "/api/v1/activity" 19 | ) 20 | 21 | var httpClient = &http.Client{ 22 | Transport: &useragent.RoundTripper{UserAgent: feeds.DefaultUserAgent}, 23 | Timeout: 10 * time.Second, 24 | } 25 | 26 | type Package struct { 27 | Name string `json:"name"` 28 | Version string `json:"version"` 29 | CreatedDate time.Time `json:"version_created_at"` 30 | } 31 | 32 | func fetchPackages(packagesURL string) ([]*Package, error) { 33 | resp, err := httpClient.Get(packagesURL) 34 | if err != nil { 35 | return nil, err 36 | } 37 | defer resp.Body.Close() 38 | 39 | err = utils.CheckResponseStatus(resp) 40 | if err != nil { 41 | return nil, fmt.Errorf("failed to fetch rubygems package data: %w", err) 42 | } 43 | 44 | response := []*Package{} 45 | err = json.NewDecoder(resp.Body).Decode(&response) 46 | return response, err 47 | } 48 | 49 | type Feed struct { 50 | lossyFeedAlerter *feeds.LossyFeedAlerter 51 | baseURL string 52 | options feeds.FeedOptions 53 | } 54 | 55 | func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, error) { 56 | if feedOptions.Packages != nil { 57 | return nil, feeds.UnsupportedOptionError{ 58 | Feed: FeedName, 59 | Option: "packages", 60 | } 61 | } 62 | return &Feed{ 63 | lossyFeedAlerter: feeds.NewLossyFeedAlerter(eventHandler), 64 | baseURL: "https://rubygems.org", 65 | options: feedOptions, 66 | }, nil 67 | } 68 | 69 | func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) { 70 | pkgs := []*feeds.Package{} 71 | packages := make(map[string]*Package) 72 | var errs []error 73 | 74 | newPackagesURL, err := url.JoinPath(feed.baseURL, activityPath, "latest.json") 75 | if err != nil { 76 | // Failure to construct a url should lead to a hard failure. 77 | return nil, cutoff, append(errs, err) 78 | } 79 | newPackages, err := fetchPackages(newPackagesURL) 80 | if err != nil { 81 | // Updated Packages could still be processed. 82 | errs = append(errs, err) 83 | } else { 84 | for _, pkg := range newPackages { 85 | packages[pkg.Name] = pkg 86 | } 87 | } 88 | updatedPackagesURL, err := url.JoinPath(feed.baseURL, activityPath, "just_updated.json") 89 | if err != nil { 90 | // Failure to construct a url should lead to a hard failure. 91 | return nil, cutoff, append(errs, err) 92 | } 93 | updatedPackages, err := fetchPackages(updatedPackagesURL) 94 | if err != nil { 95 | // New Packages could still be processed. 96 | errs = append(errs, err) 97 | } else { 98 | for _, pkg := range updatedPackages { 99 | packages[pkg.Name] = pkg 100 | } 101 | } 102 | 103 | for _, pkg := range packages { 104 | pkg := feeds.NewPackage(pkg.CreatedDate, pkg.Name, pkg.Version, FeedName) 105 | pkgs = append(pkgs, pkg) 106 | } 107 | feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs) 108 | 109 | newCutoff := feeds.FindCutoff(cutoff, pkgs) 110 | pkgs = feeds.ApplyCutoff(pkgs, cutoff) 111 | return pkgs, newCutoff, errs 112 | } 113 | 114 | func (feed Feed) GetName() string { 115 | return FeedName 116 | } 117 | 118 | func (feed Feed) GetFeedOptions() feeds.FeedOptions { 119 | return feed.options 120 | } 121 | -------------------------------------------------------------------------------- /pkg/feeds/rubygems/rubygems_test.go: -------------------------------------------------------------------------------- 1 | package rubygems 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "testing" 7 | "time" 8 | 9 | "github.com/ossf/package-feeds/pkg/events" 10 | "github.com/ossf/package-feeds/pkg/feeds" 11 | "github.com/ossf/package-feeds/pkg/utils" 12 | testutils "github.com/ossf/package-feeds/pkg/utils/test" 13 | ) 14 | 15 | func TestRubyLatest(t *testing.T) { 16 | t.Parallel() 17 | 18 | handlers := map[string]testutils.HTTPHandlerFunc{ 19 | "/api/v1/activity/latest.json": rubyGemsPackagesResponse, 20 | "/api/v1/activity/just_updated.json": rubyGemsPackagesResponse, 21 | } 22 | srv := testutils.HTTPServerMock(handlers) 23 | 24 | feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) 25 | feed.baseURL = srv.URL 26 | if err != nil { 27 | t.Fatalf("failed to create new ruby feed: %v", err) 28 | } 29 | 30 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 31 | pkgs, gotCutoff, errs := feed.Latest(cutoff) 32 | if len(errs) != 0 { 33 | t.Fatalf("feed.Latest returned error: %v", errs[len(errs)-1]) 34 | } 35 | 36 | // Returned cutoff should match the newest package creation time of packages retrieved. 37 | wantCutoff := time.Date(2021, 3, 19, 13, 0, 43, 0, time.UTC) 38 | if gotCutoff.Sub(wantCutoff).Abs() > time.Second { 39 | t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff) 40 | } 41 | 42 | var fooPkg *feeds.Package 43 | var barPkg *feeds.Package 44 | 45 | // rubygems constructs pkgs from a dict so the order is unpredictable 46 | for _, pkg := range pkgs { 47 | switch pkg.Name { 48 | case "FooPackage": 49 | fooPkg = pkg 50 | case "BarPackage": 51 | barPkg = pkg 52 | default: 53 | t.Errorf("Unexpected package `%s` found in packages", pkg.Name) 54 | } 55 | } 56 | 57 | if fooPkg.Version != "0.13.0" { 58 | t.Errorf("Unexpected version `%s` found in place of expected `0.13.0`", pkgs[0].Version) 59 | } 60 | if barPkg.Version != "0.0.3" { 61 | t.Errorf("Unexpected version `%s` found in place of expected `0.0.3`", pkgs[1].Version) 62 | } 63 | 64 | for _, p := range pkgs { 65 | if p.Type != FeedName { 66 | t.Errorf("Feed type not set correctly in ruby package following Latest()") 67 | } 68 | } 69 | } 70 | 71 | func TestRubyGemsNotFound(t *testing.T) { 72 | t.Parallel() 73 | 74 | handlers := map[string]testutils.HTTPHandlerFunc{ 75 | "/api/v1/activity/latest.json": testutils.NotFoundHandlerFunc, 76 | "/api/v1/activity/just_updated.json": testutils.NotFoundHandlerFunc, 77 | } 78 | srv := testutils.HTTPServerMock(handlers) 79 | 80 | feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) 81 | feed.baseURL = srv.URL 82 | if err != nil { 83 | t.Fatalf("failed to create new ruby feed: %v", err) 84 | } 85 | 86 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 87 | _, gotCutoff, errs := feed.Latest(cutoff) 88 | if cutoff != gotCutoff { 89 | t.Error("feed.Latest() cutoff should be unchanged if an error is returned") 90 | } 91 | if len(errs) == 0 { 92 | t.Fatalf("feed.Latest() was successful when an error was expected") 93 | } 94 | if !errors.Is(errs[len(errs)-1], utils.ErrUnsuccessfulRequest) { 95 | t.Fatalf("feed.Latest() returned an error which did not match the expected error") 96 | } 97 | } 98 | 99 | func TestRubyGemsPartialNotFound(t *testing.T) { 100 | t.Parallel() 101 | 102 | handlers := map[string]testutils.HTTPHandlerFunc{ 103 | "/api/v1/activity/latest.json": rubyGemsPackagesResponse, 104 | "/api/v1/activity/just_updated.json": testutils.NotFoundHandlerFunc, 105 | } 106 | srv := testutils.HTTPServerMock(handlers) 107 | 108 | feed, err := New(feeds.FeedOptions{}, events.NewNullHandler()) 109 | feed.baseURL = srv.URL 110 | if err != nil { 111 | t.Fatalf("failed to create new ruby feed: %v", err) 112 | } 113 | 114 | cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 115 | pkgs, _, errs := feed.Latest(cutoff) 116 | if len(errs) != 1 { 117 | t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs)) 118 | } 119 | if !errors.Is(errs[len(errs)-1], utils.ErrUnsuccessfulRequest) { 120 | t.Fatalf("feed.Latest() returned an error which did not match the expected error") 121 | } 122 | // Although the just_updated (updatedPackages) endpoint failed, the two latest (newPackages) 123 | // should be processed. 124 | if len(pkgs) != 2 { 125 | t.Fatalf("Latest() produced %v packages instead of the expected %v", len(pkgs), 2) 126 | } 127 | } 128 | 129 | func rubyGemsPackagesResponse(w http.ResponseWriter, _ *http.Request) { 130 | _, err := w.Write([]byte(` 131 | [ 132 | { 133 | "name": "FooPackage", 134 | "downloads": 35, 135 | "version": "0.13.0", 136 | "version_created_at": "2021-03-19T13:00:43.260Z", 137 | "version_downloads": 35, 138 | "platform": "ruby", 139 | "authors": "FooMan", 140 | "info": "A package to support Foo", 141 | "licenses": [ 142 | "MIT" 143 | ], 144 | "metadata": {}, 145 | "yanked": false, 146 | "sha": "8649253fb98b8ed0f733e2fc723b2435ead35cb1a70004ebff821abe7abaf131", 147 | "project_uri": "https://rubygems.org/gems/FooPackage", 148 | "gem_uri": "https://rubygems.org/gems/FooPackage-0.13.0.gem", 149 | "homepage_uri": "http://github.com/FooMan/FooPackage/", 150 | "wiki_uri": null, 151 | "documentation_uri": "https://www.rubydoc.info/gems/FooPackage/0.13.0", 152 | "mailing_list_uri": null, 153 | "source_code_uri": null, 154 | "bug_tracker_uri": null, 155 | "changelog_uri": null, 156 | "funding_uri": null 157 | }, 158 | { 159 | "name": "BarPackage", 160 | "downloads": 41, 161 | "version": "0.0.3", 162 | "version_created_at": "2021-03-19T12:52:15.157Z", 163 | "version_downloads": 41, 164 | "platform": "ruby", 165 | "authors": "BarMan", 166 | "info": "A package to add Bar support.", 167 | "licenses": [ 168 | "MIT" 169 | ], 170 | "metadata": {}, 171 | "yanked": false, 172 | "sha": "fd38fbd77499eb494fd84e710034314287d6895460253aec4a7d105e3199a0fb", 173 | "project_uri": "https://rubygems.org/gems/BarPackage", 174 | "gem_uri": "https://rubygems.org/gems/BarPackage-0.0.3.gem", 175 | "homepage_uri": "http://github.com/BarMan/BarPackage/", 176 | "wiki_uri": null, 177 | "documentation_uri": "https://www.rubydoc.info/gems/BarPackage/0.0.3", 178 | "mailing_list_uri": null, 179 | "source_code_uri": null, 180 | "bug_tracker_uri": null, 181 | "changelog_uri": null, 182 | "funding_uri": null 183 | } 184 | ] 185 | 186 | `)) 187 | if err != nil { 188 | http.Error(w, testutils.UnexpectedWriteError(err), http.StatusInternalServerError) 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /pkg/publisher/README.md: -------------------------------------------------------------------------------- 1 | # Publishers 2 | 3 | Various publishers are available for use publishing packages, each of these can be configured for use as seen in examples below. 4 | 5 | ## Configuration examples 6 | 7 | ### stdout 8 | 9 | ``` 10 | publisher: 11 | type: stdout 12 | ``` 13 | 14 | ### GCP Pub Sub 15 | 16 | ``` 17 | publisher: 18 | type: gcp_pubsub 19 | config: 20 | url: gcppubsub://foo.bar 21 | ``` 22 | 23 | ### stdout 24 | 25 | ``` 26 | publisher: 27 | type: kafka 28 | config: 29 | brokers: 30 | - 127.0.0.1:9092 31 | topic: packagefeeds 32 | ``` 33 | 34 | ### HTTP client 35 | 36 | ``` 37 | publisher: 38 | type: http-client 39 | config: 40 | url: "http://target-server:8000/package_feeds_hook" 41 | ``` 42 | -------------------------------------------------------------------------------- /pkg/publisher/gcppubsub/gcppubsub.go: -------------------------------------------------------------------------------- 1 | package gcppubsub 2 | 3 | import ( 4 | "context" 5 | 6 | "gocloud.dev/pubsub" 7 | // Load gcp driver. 8 | _ "gocloud.dev/pubsub/gcppubsub" 9 | ) 10 | 11 | const ( 12 | PublisherType = "gcp_pubsub" 13 | ) 14 | 15 | type GCPPubSub struct { 16 | topic *pubsub.Topic 17 | } 18 | 19 | type Config struct { 20 | URL string `mapstructure:"url"` 21 | } 22 | 23 | func New(ctx context.Context, url string) (*GCPPubSub, error) { 24 | topic, err := pubsub.OpenTopic(ctx, url) 25 | if err != nil { 26 | return nil, err 27 | } 28 | pub := &GCPPubSub{ 29 | topic: topic, 30 | } 31 | return pub, nil 32 | } 33 | 34 | func FromConfig(ctx context.Context, config Config) (*GCPPubSub, error) { 35 | return New(ctx, config.URL) 36 | } 37 | 38 | func (pub *GCPPubSub) Name() string { 39 | return PublisherType 40 | } 41 | 42 | func (pub *GCPPubSub) Send(ctx context.Context, body []byte) error { 43 | return pub.topic.Send(ctx, &pubsub.Message{ 44 | Body: body, 45 | }) 46 | } 47 | -------------------------------------------------------------------------------- /pkg/publisher/httpclientpubsub/httpclientpubsub.go: -------------------------------------------------------------------------------- 1 | package httpclientpubsub 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "net/http" 9 | 10 | log "github.com/sirupsen/logrus" 11 | ) 12 | 13 | const PublisherType = "http-client" 14 | 15 | var ErrHTTPRequestFailed = errors.New("HTTP request failed") 16 | 17 | type Config struct { 18 | URL string `mapstructure:"url"` 19 | } 20 | 21 | type HTTPClientPubSub struct { 22 | url string 23 | } 24 | 25 | func New(_ context.Context, url string) (*HTTPClientPubSub, error) { 26 | pub := &HTTPClientPubSub{url: url} 27 | return pub, nil 28 | } 29 | 30 | func (pub *HTTPClientPubSub) Name() string { 31 | return PublisherType 32 | } 33 | 34 | func FromConfig(ctx context.Context, config Config) (*HTTPClientPubSub, error) { 35 | return New(ctx, config.URL) 36 | } 37 | 38 | func (pub *HTTPClientPubSub) Send(_ context.Context, body []byte) error { 39 | log.Info("Sending event to HTTP client publisher") 40 | // Print the url to the log so that we can see where the event is being sent. 41 | req, err := http.NewRequest("POST", pub.url, bytes.NewReader(body)) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | req.Header.Set("Content-Type", "application/json") 47 | 48 | client := &http.Client{} 49 | resp, err := client.Do(req) 50 | if err != nil { 51 | return err 52 | } 53 | defer resp.Body.Close() 54 | 55 | if resp.StatusCode != http.StatusOK { 56 | return fmt.Errorf("%w with status code: %d", ErrHTTPRequestFailed, resp.StatusCode) 57 | } 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /pkg/publisher/kafkapubsub/kafkapubsub.go: -------------------------------------------------------------------------------- 1 | package kafkapubsub 2 | 3 | import ( 4 | "context" 5 | 6 | "gocloud.dev/pubsub" 7 | "gocloud.dev/pubsub/kafkapubsub" 8 | ) 9 | 10 | const ( 11 | PublisherType = "kafka" 12 | ) 13 | 14 | type KafkaPubSub struct { 15 | topic *pubsub.Topic 16 | } 17 | 18 | type Config struct { 19 | Brokers []string `mapstructure:"brokers"` 20 | Topic string `mapstructure:"topic"` 21 | } 22 | 23 | func New(_ context.Context, brokers []string, topic string) (*KafkaPubSub, error) { 24 | config := kafkapubsub.MinimalConfig() 25 | 26 | pubSubTopic, err := kafkapubsub.OpenTopic(brokers, config, topic, nil) 27 | if err != nil { 28 | return nil, err 29 | } 30 | return &KafkaPubSub{ 31 | topic: pubSubTopic, 32 | }, nil 33 | } 34 | 35 | func FromConfig(ctx context.Context, config Config) (*KafkaPubSub, error) { 36 | return New(ctx, config.Brokers, config.Topic) 37 | } 38 | 39 | func (pub *KafkaPubSub) Name() string { 40 | return PublisherType 41 | } 42 | 43 | func (pub *KafkaPubSub) Send(ctx context.Context, body []byte) error { 44 | return pub.topic.Send(ctx, &pubsub.Message{ 45 | Body: body, 46 | }) 47 | } 48 | -------------------------------------------------------------------------------- /pkg/publisher/publisher.go: -------------------------------------------------------------------------------- 1 | package publisher 2 | 3 | import ( 4 | "context" 5 | ) 6 | 7 | type Publisher interface { 8 | Send(ctx context.Context, body []byte) error 9 | Name() string 10 | } 11 | -------------------------------------------------------------------------------- /pkg/publisher/stdout/stdout.go: -------------------------------------------------------------------------------- 1 | package stdout 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | ) 7 | 8 | const ( 9 | PublisherType = "stdout" 10 | ) 11 | 12 | type Stdout struct{} 13 | 14 | func New() *Stdout { 15 | return &Stdout{} 16 | } 17 | 18 | func (pub *Stdout) Name() string { 19 | return PublisherType 20 | } 21 | 22 | func (pub *Stdout) Send(_ context.Context, body []byte) error { 23 | fmt.Printf("%s\n", body) 24 | return nil 25 | } 26 | -------------------------------------------------------------------------------- /pkg/scheduler/feed_group.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | "time" 8 | 9 | log "github.com/sirupsen/logrus" 10 | 11 | "github.com/ossf/package-feeds/pkg/feeds" 12 | "github.com/ossf/package-feeds/pkg/publisher" 13 | ) 14 | 15 | var ( 16 | errPoll = errors.New("error when polling for packages") 17 | errPub = errors.New("error when publishing packages") 18 | ) 19 | 20 | type feedEntry struct { 21 | feed feeds.ScheduledFeed 22 | lastPoll time.Time 23 | } 24 | 25 | type FeedGroup struct { 26 | feeds []*feedEntry 27 | publisher publisher.Publisher 28 | initialCutoff time.Time 29 | } 30 | 31 | type groupResult struct { 32 | numPublished int 33 | pollErr error 34 | pubErr error 35 | } 36 | 37 | //nolint:lll 38 | func NewFeedGroup(scheduledFeeds []feeds.ScheduledFeed, pub publisher.Publisher, initialCutoff time.Duration) *FeedGroup { 39 | fg := &FeedGroup{ 40 | publisher: pub, 41 | initialCutoff: time.Now().UTC().Add(-initialCutoff), 42 | feeds: make([]*feedEntry, 0), 43 | } 44 | for _, feed := range scheduledFeeds { 45 | fg.AddFeed(feed) 46 | } 47 | return fg 48 | } 49 | 50 | func (fg *FeedGroup) AddFeed(feed feeds.ScheduledFeed) { 51 | fg.feeds = append(fg.feeds, &feedEntry{ 52 | feed: feed, 53 | lastPoll: fg.initialCutoff, 54 | }) 55 | } 56 | 57 | func (fg *FeedGroup) Run() { 58 | result := fg.pollAndPublish() 59 | if result.pollErr != nil { 60 | log.Error(result.pollErr) 61 | } 62 | if result.pubErr != nil { 63 | log.Error(result.pubErr) 64 | } 65 | } 66 | 67 | func (fg *FeedGroup) pollAndPublish() groupResult { 68 | result := groupResult{} 69 | pkgs, err := fg.poll() 70 | result.pollErr = err 71 | // Return early if no packages to process 72 | if len(pkgs) == 0 { 73 | return result 74 | } 75 | log.WithField("num_packages", len(pkgs)).Printf("Publishing packages...") 76 | result.numPublished, result.pubErr = fg.publishPackages(pkgs) 77 | if result.numPublished > 0 { 78 | log.WithField("num_packages", result.numPublished).Printf("Successfully published packages") 79 | } 80 | return result 81 | } 82 | 83 | // Poll fetches the latest packages from each registered feed. 84 | func (fg *FeedGroup) poll() ([]*feeds.Package, error) { 85 | results := make(chan pollResult, len(fg.feeds)) 86 | for _, f := range fg.feeds { 87 | go func(f *feedEntry) { 88 | result := pollResult{ 89 | name: f.feed.GetName(), 90 | feed: f.feed, 91 | } 92 | result.packages, f.lastPoll, result.errs = f.feed.Latest(f.lastPoll) 93 | results <- result 94 | }(f) 95 | } 96 | errs := []error{} 97 | packages := []*feeds.Package{} 98 | for i := 0; i < len(fg.feeds); i++ { 99 | result := <-results 100 | 101 | logger := log.WithField("feed", result.name) 102 | for _, err := range result.errs { 103 | logger.WithError(err).Error("Error fetching packages") 104 | errs = append(errs, err) 105 | } 106 | for _, pkg := range result.packages { 107 | log.WithFields(log.Fields{ 108 | "feed": result.name, 109 | "name": pkg.Name, 110 | "version": pkg.Version, 111 | }).Print("Processing Package") 112 | } 113 | packages = append(packages, result.packages...) 114 | logger.WithField("num_processed", len(result.packages)).Print("Packages successfully processed") 115 | } 116 | err := errPoll 117 | if len(errs) == 0 { 118 | err = nil 119 | } 120 | 121 | log.WithField("time", time.Now().UTC()).Printf("%d packages processed", len(packages)) 122 | return packages, err 123 | } 124 | 125 | func (fg *FeedGroup) publishPackages(pkgs []*feeds.Package) (int, error) { 126 | processed := 0 127 | errs := []error{} 128 | for _, pkg := range pkgs { 129 | log.WithFields(log.Fields{ 130 | "name": pkg.Name, 131 | "feed": pkg.Type, 132 | "created_date": pkg.CreatedDate, 133 | }).Print("Sending package upstream") 134 | b, err := json.Marshal(pkg) 135 | if err != nil { 136 | log.WithField("name", pkg.Name).WithError(err).Error("Error marshaling package") 137 | errs = append(errs, err) 138 | } 139 | if err := (fg.publisher).Send(context.Background(), b); err != nil { 140 | log.WithField("name", pkg.Name).WithError(err).Error("Error sending package to upstream publisher") 141 | errs = append(errs, err) 142 | } 143 | processed++ 144 | } 145 | err := errPub 146 | if len(errs) == 0 { 147 | err = nil 148 | } 149 | if len(pkgs)-processed != 0 { 150 | log.Errorf("Failed to publish %v packages", len(pkgs)-processed) 151 | } 152 | return processed, err 153 | } 154 | -------------------------------------------------------------------------------- /pkg/scheduler/feed_group_test.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | "time" 7 | 8 | "github.com/ossf/package-feeds/pkg/feeds" 9 | "github.com/ossf/package-feeds/pkg/publisher" 10 | ) 11 | 12 | var ( 13 | errPackage = errors.New("error fetching packages") 14 | errPublishing = errors.New("error publishing packages") 15 | ) 16 | 17 | func TestFeedGroupPoll(t *testing.T) { 18 | t.Parallel() 19 | 20 | mockFeeds := []feeds.ScheduledFeed{ 21 | mockFeed{ 22 | packages: []*feeds.Package{ 23 | {Name: "Foo"}, 24 | {Name: "Bar"}, 25 | }, 26 | }, 27 | mockFeed{ 28 | packages: []*feeds.Package{ 29 | {Name: "Baz"}, 30 | {Name: "Qux"}, 31 | }, 32 | }, 33 | } 34 | mockPub := mockPublisher{} 35 | var pub publisher.Publisher = mockPub 36 | 37 | feedGroup := NewFeedGroup(mockFeeds, pub, time.Minute) 38 | 39 | pkgs, err := feedGroup.poll() 40 | if err != nil { 41 | t.Fatalf("Unexpected error arose during polling: %v", err) 42 | } 43 | if len(pkgs) != 4 { 44 | t.Fatalf("poll() returned %v packages when 4 were expected", len(pkgs)) 45 | } 46 | } 47 | 48 | func TestFeedGroupPollWithErr(t *testing.T) { 49 | t.Parallel() 50 | 51 | mockFeeds := []feeds.ScheduledFeed{ 52 | mockFeed{ 53 | errs: []error{errPackage}, 54 | }, 55 | mockFeed{ 56 | packages: []*feeds.Package{ 57 | {Name: "Baz"}, 58 | {Name: "Qux"}, 59 | }, 60 | }, 61 | } 62 | 63 | mockPub := mockPublisher{} 64 | var pub publisher.Publisher = mockPub 65 | 66 | feedGroup := NewFeedGroup(mockFeeds, pub, time.Minute) 67 | 68 | pkgs, err := feedGroup.poll() 69 | if err == nil { 70 | t.Fatalf("Expected error during polling") 71 | } 72 | if !errors.Is(err, errPoll) { 73 | t.Fatalf("Expected errPoll during polling") 74 | } 75 | if len(pkgs) != 2 { 76 | t.Fatalf("Expected 2 packages alongside errors but found %v", len(pkgs)) 77 | } 78 | } 79 | 80 | func TestFeedGroupPublish(t *testing.T) { 81 | t.Parallel() 82 | 83 | pkgs := []*feeds.Package{ 84 | {Name: "Baz"}, 85 | {Name: "Qux"}, 86 | } 87 | mockFeeds := []feeds.ScheduledFeed{} 88 | 89 | pubMessages := []string{} 90 | mockPub := mockPublisher{sendCallback: func(msg string) error { 91 | pubMessages = append(pubMessages, msg) 92 | return nil 93 | }} 94 | var pub publisher.Publisher = mockPub 95 | 96 | feedGroup := NewFeedGroup(mockFeeds, pub, time.Minute) 97 | numPublished, err := feedGroup.publishPackages(pkgs) 98 | if err != nil { 99 | t.Fatalf("Unexpected error whilst publishing packages: %v", err) 100 | } 101 | if numPublished != len(pkgs) { 102 | t.Fatalf("Expected %v packages to successfully publish but only %v were published", len(pkgs), numPublished) 103 | } 104 | } 105 | 106 | func TestFeedGroupPublishWithErr(t *testing.T) { 107 | t.Parallel() 108 | 109 | pkgs := []*feeds.Package{ 110 | {Name: "Baz"}, 111 | {Name: "Qux"}, 112 | } 113 | mockFeeds := []feeds.ScheduledFeed{} 114 | 115 | mockPub := mockPublisher{sendCallback: func(_ string) error { 116 | return errPublishing 117 | }} 118 | var pub publisher.Publisher = mockPub 119 | 120 | feedGroup := NewFeedGroup(mockFeeds, pub, time.Minute) 121 | _, err := feedGroup.publishPackages(pkgs) 122 | if err == nil { 123 | t.Fatalf("publishPackages provided no error when publishing produced an error") 124 | } 125 | if !errors.Is(err, errPub) { 126 | t.Fatalf("Expected errPub during publishing") 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /pkg/scheduler/feed_groups_handler.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "strings" 7 | ) 8 | 9 | type FeedGroupsHandler struct { 10 | feedGroups []*FeedGroup 11 | } 12 | 13 | func NewFeedGroupsHandler(feeds []*FeedGroup) *FeedGroupsHandler { 14 | return &FeedGroupsHandler{feedGroups: feeds} 15 | } 16 | 17 | func (srv *FeedGroupsHandler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { 18 | resultChannel := make(chan groupResult, len(srv.feedGroups)) 19 | numPublished := 0 20 | var pollErr, pubErr error 21 | var errStrings []string 22 | for _, group := range srv.feedGroups { 23 | go func(group *FeedGroup) { 24 | result := group.pollAndPublish() 25 | resultChannel <- result 26 | }(group) 27 | } 28 | for range srv.feedGroups { 29 | result := <-resultChannel 30 | numPublished += result.numPublished 31 | if result.pollErr != nil { 32 | pollErr = result.pollErr 33 | } 34 | if result.pubErr != nil { 35 | pubErr = result.pubErr 36 | } 37 | } 38 | for _, err := range []error{pollErr, pubErr} { 39 | if err != nil { 40 | errStrings = append(errStrings, err.Error()) 41 | } 42 | } 43 | if len(errStrings) > 0 { 44 | http.Error(w, strings.Join(errStrings, "\n")+fmt.Sprintf("\n%d packages successfully processed, see log for details", 45 | numPublished), 46 | http.StatusInternalServerError) 47 | return 48 | } 49 | _, err := w.Write([]byte(fmt.Sprintf("%d packages processed", numPublished))) 50 | if err != nil { 51 | http.Error(w, "unexpected error during http server write: %w", http.StatusInternalServerError) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /pkg/scheduler/mocks.go: -------------------------------------------------------------------------------- 1 | //nolint:gocritic 2 | package scheduler 3 | 4 | import ( 5 | "context" 6 | "time" 7 | 8 | "github.com/ossf/package-feeds/pkg/feeds" 9 | ) 10 | 11 | type mockFeed struct { 12 | packages []*feeds.Package 13 | errs []error 14 | cutoff time.Time 15 | options feeds.FeedOptions 16 | } 17 | 18 | func (feed mockFeed) GetName() string { 19 | return "mockFeed" 20 | } 21 | 22 | func (feed mockFeed) GetFeedOptions() feeds.FeedOptions { 23 | return feed.options 24 | } 25 | 26 | func (feed mockFeed) Latest(_ time.Time) ([]*feeds.Package, time.Time, []error) { 27 | return feed.packages, feed.cutoff, feed.errs 28 | } 29 | 30 | type mockPublisher struct { 31 | sendCallback func(string) error 32 | } 33 | 34 | func (pub mockPublisher) Send(_ context.Context, body []byte) error { 35 | if pub.sendCallback != nil { 36 | return pub.sendCallback(string(body)) 37 | } 38 | return nil 39 | } 40 | 41 | func (pub mockPublisher) Name() string { 42 | return "mockPublisher" 43 | } 44 | -------------------------------------------------------------------------------- /pkg/scheduler/scheduler.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "strings" 8 | "time" 9 | 10 | "github.com/robfig/cron/v3" 11 | log "github.com/sirupsen/logrus" 12 | 13 | "github.com/ossf/package-feeds/pkg/feeds" 14 | "github.com/ossf/package-feeds/pkg/publisher" 15 | ) 16 | 17 | // Scheduler is a registry of feeds that should be run on a schedule. 18 | type Scheduler struct { 19 | registry map[string]feeds.ScheduledFeed 20 | publisher publisher.Publisher 21 | httpPort int 22 | } 23 | 24 | // New returns a new Scheduler with a publisher and feeds configured for polling. 25 | func New(feedsMap map[string]feeds.ScheduledFeed, pub publisher.Publisher, httpPort int) *Scheduler { 26 | return &Scheduler{ 27 | registry: feedsMap, 28 | publisher: pub, 29 | httpPort: httpPort, 30 | } 31 | } 32 | 33 | type pollResult struct { 34 | name string 35 | feed feeds.ScheduledFeed 36 | packages []*feeds.Package 37 | errs []error 38 | } 39 | 40 | // healthCheckHandler is a simple health check handler for the HTTP server. 41 | func healthCheckHandler(w http.ResponseWriter, _ *http.Request) { 42 | w.Header().Set("Content-Type", "application/json") 43 | 44 | // Define a simple JSON response structure 45 | response := struct { 46 | Status string `json:"status"` 47 | Message string `json:"message"` 48 | }{ 49 | Status: "OK", 50 | Message: "Service is up and running", 51 | } 52 | 53 | // Serialize the response structure to JSON 54 | jsonResponse, err := json.Marshal(response) 55 | if err != nil { 56 | // If there's an error, log it and return a 500 Internal Server Error 57 | log.Errorf("Failed to marshal health check response: %v", err) 58 | w.WriteHeader(http.StatusInternalServerError) 59 | return 60 | } 61 | 62 | // Write an HTTP status code to the response 63 | w.WriteHeader(http.StatusOK) 64 | 65 | // Write the JSON response body 66 | _, err = w.Write(jsonResponse) 67 | if err != nil { 68 | log.Errorf("Failed to write health check response: %v", err) 69 | } 70 | } 71 | 72 | // Runs several services for the operation of scheduler, this call is blocking until application exit 73 | // or failure in the HTTP server 74 | // Services include: Cron polling via FeedGroups, HTTP serving of FeedGroupsHandler. 75 | func (s *Scheduler) Run(initialCutoff time.Duration, enableDefaultTimer bool) error { 76 | defaultSchedule := fmt.Sprintf("@every %s", initialCutoff.String()) 77 | 78 | schedules, err := buildSchedules(s.registry, s.publisher, initialCutoff) 79 | if err != nil { 80 | return err 81 | } 82 | var feedGroups []*FeedGroup 83 | var pollFeedNames []string 84 | 85 | // Configure cron job for scheduled polling. 86 | cronJob := cron.New( 87 | cron.WithLogger(cron.PrintfLogger(log.StandardLogger())), 88 | cron.WithParser(cron.NewParser( 89 | cron.SecondOptional|cron.Minute|cron.Hour|cron.Dom|cron.Month|cron.Dow|cron.Descriptor, 90 | ))) 91 | for schedule, feedGroup := range schedules { 92 | var feedNames []string 93 | for _, f := range feedGroup.feeds { 94 | feedNames = append(feedNames, f.feed.GetName()) 95 | } 96 | 97 | if schedule == "" { 98 | if !enableDefaultTimer { 99 | // Without the default timer enabled, undefined schedules depend on HTTP request polling. 100 | // This avoids race conditions where the cron based request is in flight when an HTTP 101 | // request is made (or visa-versa). 102 | feedGroups = append(feedGroups, feedGroup) 103 | pollFeedNames = append(pollFeedNames, feedNames...) 104 | continue 105 | } 106 | // Undefined schedules will follow the default schedule, if the default timer is enabled. 107 | schedule = defaultSchedule 108 | } 109 | 110 | _, err := cronJob.AddJob(schedule, cron.NewChain( 111 | cron.SkipIfStillRunning(cron.VerbosePrintfLogger(log.StandardLogger())), 112 | ).Then(feedGroup)) 113 | if err != nil { 114 | return fmt.Errorf("failed to parse schedule `%s`: %w", schedule, err) 115 | } 116 | 117 | log.Printf("Running a timer for %s with schedule %s", strings.Join(feedNames, ", "), schedule) 118 | } 119 | cronJob.Start() 120 | 121 | // Start http server for polling via HTTP requests 122 | pollServer := NewFeedGroupsHandler(feedGroups) 123 | log.Infof("Listening on port %v for %s", s.httpPort, strings.Join(pollFeedNames, ", ")) 124 | http.Handle("/", pollServer) 125 | http.HandleFunc("/health", healthCheckHandler) 126 | 127 | server := &http.Server{ 128 | Addr: fmt.Sprintf(":%v", s.httpPort), 129 | // default 60s timeout used from nginx 130 | // https://medium.com/a-journey-with-go/go-understand-and-mitigate-slowloris-attack-711c1b1403f6 131 | ReadHeaderTimeout: 60 * time.Second, 132 | } 133 | if err := server.ListenAndServe(); err != nil { 134 | return err 135 | } 136 | 137 | return nil 138 | } 139 | 140 | // buildSchedules prepares a map of FeedGroups indexed by their appropriate cron schedule 141 | // The resulting map may have index "" with a FeedGroup of feeds without a schedule option configured. 142 | // 143 | //nolint:lll 144 | func buildSchedules(registry map[string]feeds.ScheduledFeed, pub publisher.Publisher, initialCutoff time.Duration) (map[string]*FeedGroup, error) { 145 | schedules := map[string]*FeedGroup{} 146 | for _, feed := range registry { 147 | options := feed.GetFeedOptions() 148 | 149 | pollRate := options.PollRate 150 | cutoff := initialCutoff 151 | var err error 152 | var schedule string 153 | 154 | if pollRate != "" { 155 | cutoff, err = time.ParseDuration(pollRate) 156 | if err != nil { 157 | return nil, fmt.Errorf("failed to parse `%s` as duration: %w", pollRate, err) 158 | } 159 | schedule = fmt.Sprintf("@every %s", pollRate) 160 | } 161 | 162 | // Initialize new schedules in map. 163 | if _, ok := schedules[schedule]; !ok { 164 | schedules[schedule] = NewFeedGroup([]feeds.ScheduledFeed{}, pub, cutoff) 165 | } 166 | schedules[schedule].AddFeed(feed) 167 | } 168 | return schedules, nil 169 | } 170 | -------------------------------------------------------------------------------- /pkg/scheduler/scheduler_test.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/ossf/package-feeds/pkg/feeds" 8 | ) 9 | 10 | func TestBuildSchedules(t *testing.T) { 11 | t.Parallel() 12 | 13 | scheduledFeeds := map[string]feeds.ScheduledFeed{ 14 | "Foo": mockFeed{ 15 | packages: []*feeds.Package{ 16 | {Name: "Foo"}, 17 | }, 18 | options: feeds.FeedOptions{PollRate: "30s"}, 19 | }, 20 | "Bar": mockFeed{ 21 | packages: []*feeds.Package{ 22 | {Name: "Bar"}, 23 | }, 24 | options: feeds.FeedOptions{PollRate: "30s"}, 25 | }, 26 | "Baz": mockFeed{ 27 | packages: []*feeds.Package{ 28 | {Name: "Baz"}, 29 | }, 30 | options: feeds.FeedOptions{PollRate: "20s"}, 31 | }, 32 | "Qux": mockFeed{ 33 | packages: []*feeds.Package{ 34 | {Name: "Baz"}, 35 | }, 36 | }, 37 | } 38 | cutoff := time.Minute 39 | pub := mockPublisher{} 40 | schedules, err := buildSchedules(scheduledFeeds, pub, cutoff) 41 | if err != nil { 42 | t.Fatalf("Failed to build schedules: %v", err) 43 | } 44 | 45 | defaultFg, ok := schedules[""] 46 | if !ok { 47 | t.Fatalf("Schedules did not contain a FeedGroup under the default schedule") 48 | } 49 | twentySecFg, ok := schedules["@every 20s"] 50 | if !ok { 51 | t.Fatalf("Schedules did not contain a FeedGroup under the 20s schedule") 52 | } 53 | thirtySecFg, ok := schedules["@every 30s"] 54 | if !ok { 55 | t.Fatalf("Schedules did not contain a FeedGroup under the 30s schedule") 56 | } 57 | 58 | if len(defaultFg.feeds) != 1 { 59 | t.Fatalf("Default schedule contained %v feeds when %v was expected.", len(defaultFg.feeds), 1) 60 | } 61 | if len(twentySecFg.feeds) != 1 { 62 | t.Fatalf("20s schedule contained %v feeds when %v was expected.", len(twentySecFg.feeds), 1) 63 | } 64 | if len(thirtySecFg.feeds) != 2 { 65 | t.Fatalf("30s schedule contained %v feeds when %v was expected.", len(thirtySecFg.feeds), 2) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /pkg/useragent/useragent.go: -------------------------------------------------------------------------------- 1 | package useragent 2 | 3 | import "net/http" 4 | 5 | type RoundTripper struct { 6 | UserAgent string 7 | Parent http.RoundTripper 8 | } 9 | 10 | func (rt *RoundTripper) RoundTrip(ireq *http.Request) (*http.Response, error) { 11 | req := ireq.Clone(ireq.Context()) 12 | req.Header.Set("User-Agent", rt.UserAgent) 13 | return rt.parent().RoundTrip(req) 14 | } 15 | 16 | func (rt *RoundTripper) parent() http.RoundTripper { 17 | if rt.Parent != nil { 18 | return rt.Parent 19 | } 20 | return http.DefaultTransport 21 | } 22 | -------------------------------------------------------------------------------- /pkg/useragent/useragent_test.go: -------------------------------------------------------------------------------- 1 | package useragent_test 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/ossf/package-feeds/pkg/useragent" 9 | ) 10 | 11 | func TestRoundTripper(t *testing.T) { 12 | t.Parallel() 13 | want := "test user agent string" 14 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 15 | got := r.Header.Get("user-agent") 16 | if got != want { 17 | t.Errorf("User Agent = %q, want %q", got, want) 18 | } 19 | w.WriteHeader(http.StatusOK) 20 | })) 21 | defer ts.Close() 22 | 23 | c := http.Client{ 24 | Transport: &useragent.RoundTripper{UserAgent: want}, 25 | } 26 | resp, err := c.Get(ts.URL) 27 | if err != nil { 28 | t.Fatalf("Get() = %v; want no error", err) 29 | } 30 | resp.Body.Close() 31 | if resp.StatusCode != http.StatusOK { 32 | t.Fatalf("Get() status = %v; want 200", resp.StatusCode) 33 | } 34 | } 35 | 36 | type roundTripperFunc func(*http.Request) (*http.Response, error) 37 | 38 | func (rt roundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) { 39 | return rt(r) 40 | } 41 | 42 | func TestRoundTripper_Parent(t *testing.T) { 43 | t.Parallel() 44 | want := "test user agent string" 45 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 46 | got := r.Header.Get("user-agent") 47 | if got != want { 48 | t.Errorf("User Agent = %q, want %q", got, want) 49 | } 50 | w.WriteHeader(http.StatusOK) 51 | })) 52 | defer ts.Close() 53 | 54 | calledParent := false 55 | c := http.Client{ 56 | Transport: &useragent.RoundTripper{ 57 | UserAgent: want, 58 | Parent: roundTripperFunc(func(r *http.Request) (*http.Response, error) { 59 | calledParent = true 60 | return http.DefaultTransport.RoundTrip(r) 61 | }), 62 | }, 63 | } 64 | resp, err := c.Get(ts.URL) 65 | if err != nil { 66 | t.Fatalf("Get() = %v; want no error", err) 67 | } 68 | resp.Body.Close() 69 | if resp.StatusCode != http.StatusOK { 70 | t.Fatalf("Get() status = %v; want 200", resp.StatusCode) 71 | } 72 | if !calledParent { 73 | t.Errorf("Failed to call Parent RoundTripper") 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/utils/http_requests.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net/http" 7 | ) 8 | 9 | var ErrUnsuccessfulRequest = errors.New("unsuccessful request") 10 | 11 | func CheckResponseStatus(res *http.Response) error { 12 | if res.StatusCode < 200 || res.StatusCode > 299 { 13 | return fmt.Errorf("%w: %v", ErrUnsuccessfulRequest, res.Status) 14 | } 15 | return nil 16 | } 17 | 18 | func IsNotModified(res *http.Response) bool { 19 | return res.StatusCode == http.StatusNotModified 20 | } 21 | -------------------------------------------------------------------------------- /pkg/utils/test/http_helper.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/http/httptest" 7 | ) 8 | 9 | type HTTPHandlerFunc func(w http.ResponseWriter, r *http.Request) 10 | 11 | func HTTPServerMock(handlerFuncs map[string]HTTPHandlerFunc) *httptest.Server { 12 | handler := http.NewServeMux() 13 | for endpoint, f := range handlerFuncs { 14 | handler.HandleFunc(endpoint, f) 15 | } 16 | srv := httptest.NewServer(handler) 17 | 18 | return srv 19 | } 20 | 21 | func UnexpectedWriteError(err error) string { 22 | return fmt.Sprintf("Unexpected error during mock http server write: %s", err.Error()) 23 | } 24 | 25 | func NotFoundHandlerFunc(w http.ResponseWriter, r *http.Request) { 26 | http.NotFound(w, r) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/utils/xml_reader.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "unicode" 7 | "unicode/utf8" 8 | ) 9 | 10 | const unicodeWhiteSquare = '\u25A1' 11 | 12 | // XMLReader implements a Reader that reads XML responses and does two things: 13 | // 1. Ignores non UTF-8 characters. 14 | // 2. If ReplacementChar is not rune(0), non-XML valid runes are replaced with that. 15 | type XMLReader struct { 16 | buffer *bufio.Reader 17 | ReplacementChar rune 18 | } 19 | 20 | // NewXMLReader wraps a new XMLReader around an existing reader. 21 | // replaceNonXMLChars is a convenience option that sets ReplacementChar 22 | // to the unicode white square character. This character will replace 23 | // all invalid XML characters found in the stream. 24 | func NewXMLReader(rd io.Reader, replaceNonXMLChars bool) XMLReader { 25 | reader := XMLReader{buffer: bufio.NewReader(rd)} 26 | if replaceNonXMLChars { 27 | reader.ReplacementChar = unicodeWhiteSquare 28 | } 29 | return reader 30 | } 31 | 32 | func (reader XMLReader) replaceNonXMLChars() bool { 33 | return reader.ReplacementChar != rune(0) 34 | } 35 | 36 | // Returns true iff the given rune is in the XML Character Range, as defined 37 | // by https://www.xml.com/axml/testaxml.htm, Section 2.2 Characters. 38 | // Implementation copied from xml/xml.go. 39 | func isInXMLCharacterRange(r rune) bool { 40 | return r == 0x09 || r == 0x0A || r == 0x0D || 41 | r >= 0x20 && r <= 0xD7FF || 42 | r >= 0xE000 && r <= 0xFFFD || 43 | r >= 0x10000 && r <= 0x10FFFF 44 | } 45 | 46 | // Reads bytes into the byte array b whilst ignoring non utf-8 characters 47 | // Returns the number of bytes read and an error if one occurs. 48 | func (reader XMLReader) Read(b []byte) (int, error) { 49 | numBytesRead := 0 50 | for { 51 | r, runeSize, err := reader.buffer.ReadRune() 52 | if err != nil { 53 | return numBytesRead, err 54 | } 55 | 56 | // Invalid UTF-8 characters are represented with r set to utf8.RuneError 57 | // (i.e. Unicode replacement character) and read size of 1 58 | if r == utf8.RuneError && runeSize == 1 { 59 | continue 60 | } 61 | 62 | // Also ignore the replacement character for compatibility with previous behaviour 63 | // (yes, utf8.RuneError == unicode.ReplacementChar) 64 | if r == unicode.ReplacementChar { 65 | continue 66 | } 67 | 68 | if reader.replaceNonXMLChars() && !isInXMLCharacterRange(r) { 69 | r = reader.ReplacementChar 70 | runeSize = utf8.RuneLen(r) 71 | } 72 | 73 | // Finish Read if we don't have enough space for this rune in the output byte slice 74 | if numBytesRead+runeSize >= len(b) { 75 | err = reader.buffer.UnreadRune() 76 | return numBytesRead, err 77 | } 78 | 79 | utf8.EncodeRune(b[numBytesRead:], r) 80 | numBytesRead += runeSize 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /terraform/feeds.tf: -------------------------------------------------------------------------------- 1 | provider "google" { 2 | project = var.project 3 | region = var.region 4 | } 5 | 6 | terraform { 7 | backend "gcs" { 8 | bucket = "ossf-feeds-tf-state" 9 | prefix = "terraform/state" 10 | } 11 | } 12 | 13 | locals { 14 | services = [ 15 | "cloudbuild.googleapis.com", 16 | "run.googleapis.com", 17 | "cloudscheduler.googleapis.com", 18 | "cloudresourcemanager.googleapis.com", 19 | ] 20 | } 21 | 22 | resource "google_service_account" "run-invoker-account" { 23 | account_id = "run-invoker-sa" 24 | display_name = "Feed Run Invoker" 25 | } 26 | 27 | resource "google_project_iam_member" "run-invoker-iam" { 28 | role = "roles/run.invoker" 29 | project = var.project 30 | member = "serviceAccount:${google_service_account.run-invoker-account.email}" 31 | } 32 | 33 | resource "google_project_service" "services" { 34 | for_each = toset(local.services) 35 | service = each.value 36 | disable_on_destroy = false 37 | } 38 | 39 | resource "google_pubsub_topic" "feed-topic" { 40 | name = "feed-topic" 41 | } 42 | 43 | module "feed_scheduler" { 44 | source = "./scheduler" 45 | 46 | project = var.project 47 | region = var.region 48 | service-account-email = google_service_account.run-invoker-account.email 49 | pubsub-topic-feed-id = google_pubsub_topic.feed-topic.id 50 | } 51 | -------------------------------------------------------------------------------- /terraform/scheduler/main.tf: -------------------------------------------------------------------------------- 1 | resource "google_cloud_scheduler_job" "trigger-ecosystem-scheduler" { 2 | name = "trigger-feeds-scheduler" 3 | description = "Scheduler for packages from feeds" 4 | schedule = "*/5 * * * *" 5 | 6 | http_target { 7 | http_method = "POST" 8 | uri = google_cloud_run_service.run-scheduler.status[0].url 9 | 10 | oidc_token { 11 | service_account_email = var.service-account-email 12 | } 13 | } 14 | } 15 | 16 | resource "google_cloud_run_service" "run-scheduler" { 17 | name = "scheduled-feeds-srv" 18 | location = var.region 19 | autogenerate_revision_name = true 20 | 21 | metadata { 22 | annotations = { 23 | "autoscaling.knative.dev/maxScale" = "1" 24 | } 25 | } 26 | template { 27 | spec { 28 | container_concurrency = 1 29 | containers { 30 | image = "gcr.io/${var.project}/scheduled-feeds" 31 | env { 32 | name = "OSSMALWARE_TOPIC_URL" 33 | value = "gcppubsub://${var.pubsub-topic-feed-id}" 34 | } 35 | resources { 36 | limits = { 37 | memory = "2Gi" 38 | cpu = "1000m" 39 | } 40 | } 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /terraform/scheduler/variables.tf: -------------------------------------------------------------------------------- 1 | variable "project" {} 2 | variable "region" {} 3 | variable "service-account-email" {} 4 | variable "pubsub-topic-feed-id" {} 5 | -------------------------------------------------------------------------------- /terraform/terraform.tfvars: -------------------------------------------------------------------------------- 1 | project = "ossf-malware-analysis" 2 | region = "us-central1" 3 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "project" {} 2 | variable "region" {} 3 | --------------------------------------------------------------------------------