├── .github └── workflows │ ├── ci.yaml │ ├── mkdocs.yaml │ ├── oci-ci.yaml │ ├── publish.yaml │ └── release.yaml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yml ├── CNAME ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── _config.yml ├── cmd ├── constants.go ├── list.go ├── list_amazonlinux1.go ├── list_amazonlinux2.go ├── list_amazonlinux2022.go ├── list_amazonlinux2023.go ├── list_archlinux.go ├── list_centos.go ├── list_debian.go ├── list_fedora.go ├── list_opensuse.go ├── list_oracle.go ├── list_ubuntu.go ├── root.go └── version.go ├── docs ├── CNAME ├── getting-started.md ├── index.md ├── reference │ ├── cli.md │ ├── config.md │ └── index.md └── roadmap.md ├── go.mod ├── go.sum ├── images ├── amazonlinux.png ├── archlinux.png ├── centos.png ├── debian.png ├── fedora.png ├── madagascar.png ├── madagascar_small.png ├── opensuse.png ├── oracle.png └── ubuntu.png ├── internal ├── format │ └── format.go └── utils │ ├── config.go │ ├── maps.go │ └── utils.go ├── main.go ├── mkdocs.yml ├── pkg ├── distro │ ├── amazonlinux │ │ ├── amazonlinux.go │ │ ├── config.go │ │ ├── constants.go │ │ ├── v1 │ │ │ ├── amazonlinux.go │ │ │ └── constants.go │ │ ├── v2 │ │ │ ├── amazonlinux.go │ │ │ └── constants.go │ │ ├── v2022 │ │ │ ├── amazonlinux.go │ │ │ └── constants.go │ │ └── v2023 │ │ │ ├── amazonlinux.go │ │ │ └── constants.go │ ├── archlinux │ │ ├── archlinux.go │ │ ├── config.go │ │ └── constants.go │ ├── centos │ │ ├── centos.go │ │ ├── config.go │ │ └── constants.go │ ├── constants.go │ ├── debian │ │ ├── config.go │ │ ├── constants.go │ │ ├── debian.go │ │ └── release.go │ ├── distro.go │ ├── error.go │ ├── fedora │ │ ├── config.go │ │ ├── constants.go │ │ └── fedora.go │ ├── opensuse │ │ ├── config.go │ │ ├── constants.go │ │ └── opensuse.go │ ├── oracle │ │ ├── config.go │ │ ├── constants.go │ │ └── oracle.go │ ├── ubuntu │ │ ├── constants.go │ │ └── ubuntu.go │ └── utils.go ├── kernelrelease │ ├── compiler.go │ ├── constants.go │ ├── errors.go │ ├── kernelrelease.go │ ├── kernelrelease_test.go │ ├── list.go │ └── version.go ├── output │ ├── log.go │ ├── options.go │ ├── progress.go │ └── progress_test.go ├── packages │ ├── alpm │ │ ├── alpm.go │ │ ├── errors.go │ │ └── search.go │ ├── deb │ │ ├── constants.go │ │ ├── deb.go │ │ ├── package.go │ │ └── search.go │ ├── package.go │ ├── repository.go │ ├── rpm │ │ ├── constants.go │ │ ├── database.go │ │ ├── errors.go │ │ ├── package.go │ │ ├── rpm.go │ │ ├── search.go │ │ └── types.go │ ├── search.go │ └── sync.go ├── scrape │ ├── constants.go │ ├── scrape.go │ └── utils.go └── utils │ ├── matrix │ ├── README.md │ ├── error.go │ ├── matrix.go │ ├── matrix_test.go │ └── types.go │ └── template │ ├── constants.go │ ├── template.go │ ├── types.go │ └── utils.go └── testdata ├── amazonlinux-norepos.yaml ├── amazonlinux.yaml ├── amazonlinux2-norepos.yaml ├── amazonlinux2.yaml ├── amazonlinux2022-norepos.yaml ├── amazonlinux2022.yaml ├── amazonlinux2023-norepos.yaml ├── amazonlinux2023.yaml ├── centos-norepos.yaml ├── centos.yaml ├── debian-norepos.yaml ├── debian.yaml ├── ubuntu-norepos.yaml └── ubuntu.yaml /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ "*" ] 6 | pull_request: 7 | branches: [ "*" ] 8 | 9 | jobs: 10 | golangci: 11 | name: lint 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Run golangci-lint 16 | uses: golangci/golangci-lint-action@v3.2.0 17 | with: 18 | version: v1.50.0 19 | only-new-issues: false 20 | args: --config .golangci.yml 21 | -------------------------------------------------------------------------------- /.github/workflows/mkdocs.yaml: -------------------------------------------------------------------------------- 1 | name: Publish docs via GitHub Pages 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | build: 8 | name: Deploy docs 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout master 12 | uses: actions/checkout@v2 13 | - name: Deploy docs 14 | uses: mhausenblas/mkdocs-deploy-gh-pages@master 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | CONFIG_FILE: mkdocs.yml 18 | EXTRA_PACKAGES: build-base 19 | -------------------------------------------------------------------------------- /.github/workflows/oci-ci.yaml: -------------------------------------------------------------------------------- 1 | name: OCI-CI 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | jobs: 9 | oci-ci: 10 | runs-on: ubuntu-latest 11 | steps: 12 | 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | 16 | - name: Generate build-args 17 | id: build-args 18 | run: | 19 | # Declare vars for internal use 20 | VERSION=$(git describe --abbrev=0 --tags) 21 | GIT_HEAD_COMMIT=$(git rev-parse --short HEAD) 22 | GIT_TAG_COMMIT=$(git rev-parse --short $VERSION) 23 | GIT_MODIFIED_1=$(git diff $GIT_HEAD_COMMIT $GIT_TAG_COMMIT --quiet && echo "" || echo ".dev") 24 | GIT_MODIFIED_2=$(git diff --quiet && echo "" || echo ".dirty") 25 | # Export to GH_ENV 26 | echo "GIT_LAST_TAG=$VERSION" >> $GITHUB_ENV 27 | echo "GIT_HEAD_COMMIT=$GIT_HEAD_COMMIT" >> $GITHUB_ENV 28 | echo "GIT_TAG_COMMIT=$GIT_TAG_COMMIT" >> $GITHUB_ENV 29 | echo "GIT_MODIFIED=$(echo "$GIT_MODIFIED_1""$GIT_MODIFIED_2")" >> $GITHUB_ENV 30 | echo "GIT_REPO=$(git config --get remote.origin.url)" >> $GITHUB_ENV 31 | echo "BUILD_DATE=$(git log -1 --format="%at" | xargs -I{} date -d @{} +%Y-%m-%dT%H:%M:%S)" >> $GITHUB_ENV 32 | 33 | - name: Docker meta 34 | id: meta 35 | uses: docker/metadata-action@v4 36 | with: 37 | images: | 38 | quay.io/${{ github.repository }} 39 | docker.io/${{ github.repository }} 40 | tags: | 41 | type=semver,pattern={{raw}} 42 | flavor: | 43 | latest=false 44 | 45 | - name: Set up QEMU 46 | id: qemu 47 | uses: docker/setup-qemu-action@v1 48 | with: 49 | platforms: arm64,arm 50 | 51 | - name: Set up Docker Buildx 52 | id: buildx 53 | uses: docker/setup-buildx-action@v2 54 | with: 55 | install: true 56 | 57 | - name: Inspect builder 58 | run: | 59 | echo "Name: ${{ steps.buildx.outputs.name }}" 60 | echo "Endpoint: ${{ steps.buildx.outputs.endpoint }}" 61 | echo "Status: ${{ steps.buildx.outputs.status }}" 62 | echo "Flags: ${{ steps.buildx.outputs.flags }}" 63 | echo "Platforms: ${{ steps.buildx.outputs.platforms }}" 64 | 65 | - name: Login to quay.io Container Registry 66 | uses: docker/login-action@v1 67 | with: 68 | registry: quay.io 69 | username: ${{ secrets.QUAY_USERNAME }} 70 | password: ${{ secrets.QUAY_PASSWORD }} 71 | 72 | - name: Login to docker.io Container Registry 73 | uses: docker/login-action@v1 74 | with: 75 | registry: docker.io 76 | username: ${{ secrets.DOCKER_HUB_USERNAME }} 77 | password: ${{ secrets.DOCKER_HUB_PASSWORD }} 78 | 79 | - name: Build and push 80 | id: build-release 81 | uses: docker/build-push-action@v2 82 | with: 83 | file: Dockerfile 84 | context: . 85 | platforms: linux/amd64,linux/arm64,linux/arm 86 | push: true 87 | tags: ${{ steps.meta.outputs.tags }} 88 | build-args: | 89 | GIT_HEAD_COMMIT=${{ env.GIT_HEAD_COMMIT }} 90 | GIT_TAG_COMMIT=${{ env.GIT_TAG_COMMIT }} 91 | GIT_REPO=${{ env.GIT_REPO }} 92 | GIT_LAST_TAG=${{ env.GIT_LAST_TAG }} 93 | GIT_MODIFIED=${{ env.GIT_MODIFIED }} 94 | BUILD_DATE=${{ env.BUILD_DATE }} 95 | 96 | - name: Image digest 97 | run: echo ${{ steps.build-release.outputs.digest }} 98 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Database 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | env: 8 | AWS_REGION: "eu-west-1" 9 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 10 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 11 | 12 | jobs: 13 | amazonlinux-1: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | with: 18 | fetch-depth: '0' 19 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 20 | - uses: actions/checkout@v2 21 | with: 22 | ref: ${{ env.STABLE }} 23 | - uses: actions/setup-go@v3 24 | with: 25 | go-version-file: 'go.mod' 26 | - name: Install AWS CLI 27 | id: install-aws-cli 28 | uses: unfor19/install-aws-cli-action@master 29 | with: 30 | version: "2" 31 | - name: Publish results to S3 32 | run: make publish/amazonlinux 33 | amazonlinux-2: 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v2 37 | with: 38 | fetch-depth: '0' 39 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 40 | - uses: actions/checkout@v2 41 | with: 42 | ref: ${{ env.STABLE }} 43 | - uses: actions/setup-go@v3 44 | with: 45 | go-version-file: 'go.mod' 46 | - name: Install AWS CLI 47 | id: install-aws-cli 48 | uses: unfor19/install-aws-cli-action@master 49 | with: 50 | version: "2" 51 | - name: Publish results to S3 52 | run: make publish/amazonlinux2 53 | amazonlinux-2022: 54 | runs-on: ubuntu-latest 55 | steps: 56 | - uses: actions/checkout@v2 57 | with: 58 | fetch-depth: '0' 59 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 60 | - uses: actions/checkout@v2 61 | with: 62 | ref: ${{ env.STABLE }} 63 | - uses: actions/setup-go@v3 64 | with: 65 | go-version-file: 'go.mod' 66 | - name: Install AWS CLI 67 | id: install-aws-cli 68 | uses: unfor19/install-aws-cli-action@master 69 | with: 70 | version: "2" 71 | - name: Publish results to S3 72 | run: make publish/amazonlinux2022 73 | amazonlinux-2023: 74 | runs-on: ubuntu-latest 75 | steps: 76 | - uses: actions/checkout@v2 77 | with: 78 | fetch-depth: '0' 79 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 80 | - uses: actions/checkout@v2 81 | with: 82 | ref: ${{ env.STABLE }} 83 | - uses: actions/setup-go@v3 84 | with: 85 | go-version-file: 'go.mod' 86 | - name: Install AWS CLI 87 | id: install-aws-cli 88 | uses: unfor19/install-aws-cli-action@master 89 | with: 90 | version: "2" 91 | - name: Publish results to S3 92 | run: make publish/amazonlinux2023 93 | centos: 94 | runs-on: ubuntu-latest 95 | steps: 96 | - uses: actions/checkout@v2 97 | with: 98 | fetch-depth: '0' 99 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 100 | - uses: actions/checkout@v2 101 | with: 102 | ref: ${{ env.STABLE }} 103 | - uses: actions/setup-go@v3 104 | with: 105 | go-version-file: 'go.mod' 106 | - name: Install AWS CLI 107 | id: install-aws-cli 108 | uses: unfor19/install-aws-cli-action@master 109 | with: 110 | version: "2" 111 | - name: Publish results to S3 112 | run: make publish/centos 113 | debian: 114 | runs-on: ubuntu-latest 115 | steps: 116 | - uses: actions/checkout@v2 117 | with: 118 | fetch-depth: '0' 119 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 120 | - uses: actions/checkout@v2 121 | with: 122 | ref: ${{ env.STABLE }} 123 | - uses: actions/setup-go@v3 124 | with: 125 | go-version-file: 'go.mod' 126 | - name: Install AWS CLI 127 | id: install-aws-cli 128 | uses: unfor19/install-aws-cli-action@master 129 | with: 130 | version: "2" 131 | - name: Publish results to S3 132 | run: make publish/debian 133 | fedora: 134 | runs-on: ubuntu-latest 135 | steps: 136 | - uses: actions/checkout@v2 137 | with: 138 | fetch-depth: '0' 139 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 140 | - uses: actions/checkout@v2 141 | with: 142 | ref: ${{ env.STABLE }} 143 | - uses: actions/setup-go@v3 144 | with: 145 | go-version-file: 'go.mod' 146 | - name: Install AWS CLI 147 | id: install-aws-cli 148 | uses: unfor19/install-aws-cli-action@master 149 | with: 150 | version: "2" 151 | - name: Publish results to S3 152 | run: make publish/fedora 153 | opensuse: 154 | runs-on: ubuntu-latest 155 | steps: 156 | - uses: actions/checkout@v2 157 | with: 158 | fetch-depth: '0' 159 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 160 | - uses: actions/checkout@v2 161 | with: 162 | ref: ${{ env.STABLE }} 163 | - uses: actions/setup-go@v3 164 | with: 165 | go-version-file: 'go.mod' 166 | - name: Install AWS CLI 167 | id: install-aws-cli 168 | uses: unfor19/install-aws-cli-action@master 169 | with: 170 | version: "2" 171 | - name: Publish results to S3 172 | run: make publish/opensuse 173 | oracle: 174 | runs-on: ubuntu-latest 175 | steps: 176 | - uses: actions/checkout@v2 177 | with: 178 | fetch-depth: '0' 179 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 180 | - uses: actions/checkout@v2 181 | with: 182 | ref: ${{ env.STABLE }} 183 | - uses: actions/setup-go@v3 184 | with: 185 | go-version-file: 'go.mod' 186 | - name: Install AWS CLI 187 | id: install-aws-cli 188 | uses: unfor19/install-aws-cli-action@master 189 | with: 190 | version: "2" 191 | - name: Publish results to S3 192 | run: make publish/oracle 193 | ubuntu: 194 | runs-on: ubuntu-latest 195 | steps: 196 | - uses: actions/checkout@v2 197 | with: 198 | fetch-depth: '0' 199 | - run: echo "STABLE=`echo $(git describe --tags --abbrev=0)`" >> $GITHUB_ENV 200 | - uses: actions/checkout@v2 201 | with: 202 | ref: ${{ env.STABLE }} 203 | - uses: actions/setup-go@v3 204 | with: 205 | go-version-file: 'go.mod' 206 | - name: Install AWS CLI 207 | id: install-aws-cli 208 | uses: unfor19/install-aws-cli-action@master 209 | with: 210 | version: "2" 211 | - name: Publish results to S3 212 | run: make publish/ubuntu 213 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | permissions: 9 | contents: write 10 | packages: write 11 | 12 | jobs: 13 | release-cross: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: checkout code 17 | uses: actions/checkout@v2 18 | with: 19 | submodules: 'true' 20 | - run: git fetch --prune --unshallow 21 | - name: Set up Go and dependencies 22 | uses: actions/setup-go@v2 23 | - name: Setup release environment 24 | run: |- 25 | echo 'GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}' > .release-env 26 | - name: Publish release 27 | run: make release 28 | 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | # Vim files https://github.com/github/gitignore/blob/master/Global/Vim.gitignore 23 | # swap 24 | [._]*.s[a-w][a-z] 25 | [._]s[a-w][a-z] 26 | # session 27 | Session.vim 28 | # temporary 29 | .netrwhist 30 | *~ 31 | # auto-generated tag files 32 | tags 33 | 34 | *.exe 35 | cobra.test 36 | bin 37 | 38 | .idea/ 39 | *.iml 40 | 41 | # Default binary object 42 | /krawler 43 | 44 | # Visual Studio Code stuff 45 | /.vscode 46 | 47 | e2e/results/* 48 | 49 | # CGO Cross-compile GoReleaser-cross env file 50 | .release-env 51 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters-settings: 2 | govet: 3 | check-shadowing: true 4 | maligned: 5 | suggest-new: true 6 | goconst: 7 | min-len: 2 8 | min-occurrences: 3 9 | gci: 10 | sections: 11 | - standard # Captures all standard packages if they do not match another section. 12 | - prefix(github.com/maxgio92/krawler) # Groups all imports with the specified Prefix. 13 | - default # Contains all imports that could not be matched to another section type. 14 | tagliatelle: 15 | case: 16 | rules: 17 | json: snake 18 | 19 | linters: 20 | enable-all: true 21 | disable: 22 | - interfacer 23 | - godox 24 | - golint 25 | - scopelint 26 | - maligned 27 | - gochecknoglobals 28 | - gochecknoinits 29 | - exhaustivestruct 30 | - exhaustruct 31 | - ireturn 32 | - lll 33 | - nonamedreturns 34 | - wrapcheck 35 | - varnamelen 36 | 37 | issues: 38 | exclude-rules: 39 | - path: / 40 | linters: 41 | - typecheck 42 | 43 | run: 44 | skip-dirs: 45 | - pkg/utils/matrix 46 | - pkg/utils/template 47 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: krawler 2 | 3 | before: 4 | hooks: 5 | - go mod tidy 6 | - go generate ./... 7 | - go vet ./... 8 | 9 | builds: 10 | - env: 11 | - CGO_ENABLED=1 12 | goos: 13 | - linux 14 | - windows 15 | - darwin 16 | goarch: 17 | - amd64 18 | - arm 19 | - arm64 20 | goarm: 21 | - '6' 22 | - '7' 23 | ignore: 24 | - goos: darwin 25 | goarch: '386' 26 | - goos: windows 27 | goarch: 'arm' 28 | - goos: windows 29 | goarch: 'arm64' 30 | ldflags: 31 | - -s -w -X main.buildVersion={{ .Version }} 32 | overrides: 33 | - goos: windows 34 | goarch: amd64 35 | goamd64: v1 36 | env: 37 | - CC=x86_64-w64-mingw32-gcc 38 | - goos: windows 39 | goarch: arm64 40 | env: 41 | - CC=/llvm-mingw/llvm-mingw/bin/aarch64-w64-mingw32-gcc 42 | - goos: darwin 43 | goarch: amd64 44 | goamd64: v1 45 | env: 46 | - CC=o64-clang 47 | - goos: darwin 48 | goarch: arm64 49 | env: 50 | - CC=oa64-clang 51 | - goos: linux 52 | goarch: arm64 53 | env: 54 | - CC=aarch64-linux-gnu-gcc 55 | - goos: linux 56 | goarch: arm 57 | goarm: '6' 58 | env: 59 | - CC=arm-linux-gnueabihf-gcc 60 | - goos: linux 61 | goarch: arm 62 | goarm: '7' 63 | env: 64 | - CC=arm-linux-gnueabihf-gcc 65 | - goos: linux 66 | goarch: amd64 67 | env: 68 | - CC=gcc 69 | 70 | checksum: 71 | name_template: '{{ .ProjectName }}_{{ .Version }}_SHA256SUMS' 72 | algorithm: sha256 73 | 74 | changelog: 75 | sort: asc 76 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | krawler.dev 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GOVERSION=1.19 2 | FROM golang:$GOVERSION as builder 3 | 4 | ARG TARGETARCH 5 | ARG GIT_HEAD_COMMIT 6 | ARG GIT_TAG_COMMIT 7 | ARG GIT_LAST_TAG 8 | ARG GIT_MODIFIED 9 | ARG GIT_REPO 10 | ARG BUILD_DATE 11 | 12 | WORKDIR /workspace 13 | 14 | # Copy the Go Modules manifests 15 | COPY go.mod go.sum ./ 16 | 17 | # cache deps before building and copying source so that we don't need to re-download as much 18 | # and so that source changes don't invalidate our downloaded layer 19 | RUN go mod download 20 | 21 | # Copy the go source 22 | COPY main.go main.go 23 | COPY cmd/ cmd/ 24 | COPY internal/ internal/ 25 | COPY pkg/ pkg/ 26 | 27 | # Build 28 | RUN CGO_ENABLED=1 GOOS=linux GOARCH=$TARGETARCH GO111MODULE=on go build \ 29 | -gcflags "-N -l" \ 30 | -ldflags "-X main.GitRepo=$GIT_REPO -X main.GitTag=$GIT_LAST_TAG -X main.GitCommit=$GIT_HEAD_COMMIT -X main.GitDirty=$GIT_MODIFIED -X main.BuildTime=$BUILD_DATE" \ 31 | -o krawler 32 | 33 | # Refer to https://github.com/GoogleContainerTools/distroless for more details 34 | FROM gcr.io/distroless/static:nonroot 35 | WORKDIR / 36 | COPY --from=builder /workspace/krawler . 37 | USER nonroot:nonroot 38 | 39 | ENTRYPOINT ["/krawler"] 40 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | app := krawler 2 | version := 0.1.0 3 | 4 | oci_image := quay.io/maxgio92/$(app) 5 | 6 | bins := go golangci-lint gofumpt aws 7 | 8 | DISTROS ?= amazonlinux amazonlinux2 amazonlinux2022 amazonlinux2023 archlinux centos debian fedora opensuse oracle ubuntu 9 | 10 | RESULTS_DIR := e2e/results 11 | 12 | BUCKET_NAME := krawler-kernel-releases 13 | 14 | define declare_binpaths 15 | $(1) = $(shell command -v 2>/dev/null $(1)) 16 | endef 17 | 18 | define gen_run_targets 19 | .PHONY: run/$(1) 20 | run/$(1): clean build 21 | @rm -rf $(RESULTS_DIR)/$(1) 2>/dev/null || true 22 | @mkdir -p $(RESULTS_DIR)/$(1) 23 | 24 | @echo -n "$(1) with default configuration: " 25 | @./$(app) list $(1) \ 26 | -o json \ 27 | > $(RESULTS_DIR)/$(1)/index.json 2> $(RESULTS_DIR)/$(1)/krawler.log 28 | @echo "$$$$(jq length $(RESULTS_DIR)/$(1)/index.json) releases found." 29 | endef 30 | 31 | $(foreach bin,$(bins),\ 32 | $(eval $(call declare_binpaths,$(bin)))\ 33 | ) 34 | 35 | define gen_e2e_targets 36 | .PHONY: e2e/$(1) 37 | e2e/$(1): run/$(1) 38 | @echo -n "$(1) with custom configuration (full): " 39 | @./$(app) list $(1) \ 40 | -c testdata/$(1).yaml \ 41 | -o json \ 42 | > $(RESULTS_DIR)/$(1)/index_custom.json 2> $(RESULTS_DIR)/$(1)/krawler_custom.log 43 | @echo "$$$$(jq length $(RESULTS_DIR)/$(1)/index_custom.json) releases found." 44 | 45 | @echo -n "$(1) with custom configuration (no repositories): " 46 | @./$(app) list $(1) \ 47 | -c testdata/$(1)-norepos.yaml \ 48 | -o json \ 49 | > $(RESULTS_DIR)/$(1)/index_custom_norepos.json 2> $(RESULTS_DIR)/$(1)/krawler_custom_norepos.log 50 | @echo "$$$$(jq length $(RESULTS_DIR)/$(1)/index_custom_norepos.json) releases found." 51 | 52 | @{ DEFAULT=$$$$(jq length $(RESULTS_DIR)/$(1)/index.json) \ 53 | CUSTOM=$$$$(jq length $(RESULTS_DIR)/$(1)/index_custom.json) \ 54 | CUSTOM_NOREPOS=$$$$(jq length $(RESULTS_DIR)/$(1)/index_custom_norepos.json); \ 55 | [[ $$$$DEFAULT == $$$$CUSTOM ]] && \ 56 | [[ $$$$CUSTOM == $$$$CUSTOM_NOREPOS ]] && \ 57 | echo "$(1) OK"; \ 58 | } \ 59 | || { echo "$(1) KO"; exit 1; } 60 | endef 61 | 62 | define gen_publish_targets 63 | .PHONY: publish/$(1) 64 | publish/$(1): run/$(1) 65 | $(aws) s3 sync $(RESULTS_DIR)/$(1)/ s3://$(BUCKET_NAME)/$(1)/ 66 | endef 67 | 68 | $(foreach distro,$(DISTROS),\ 69 | $(eval $(call gen_run_targets,$(distro)))\ 70 | $(eval $(call gen_e2e_targets,$(distro)))\ 71 | $(eval $(call gen_publish_targets,$(distro)))\ 72 | ) 73 | 74 | .PHONY: build 75 | build: 76 | @$(go) build . 77 | 78 | .PHONY: test 79 | test: 80 | @go test -v -cover -gcflags=-l ./... 81 | 82 | .PHONY: lint 83 | lint: golangci-lint 84 | @$(golangci-lint) run ./... 85 | 86 | .PHONY: golangci-lint 87 | golangci-lint: 88 | @$(go) install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.50.0 89 | 90 | .PHONY: gofumpt 91 | gofumpt: 92 | @$(go) install mvdan.cc/gofumpt@v0.3.1 93 | 94 | .PHONY: oci/build 95 | oci/build: 96 | @docker build . -t $(oci_image):$(version) -f Dockerfile 97 | 98 | .PHONY: oci/push 99 | oci/push: oci/build 100 | @docker push $(oci_image):$(version) 101 | 102 | .PHONY: clean 103 | clean: 104 | @rm -f $(app) 105 | 106 | .PHONY: help 107 | help: list 108 | 109 | .PHONY: list 110 | list: 111 | @LC_ALL=C $(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' 112 | 113 | .PHONY: run 114 | run: clean build $(patsubst %,run/%,$(DISTROS)) 115 | 116 | .PHONY: e2e 117 | e2e: clean build $(patsubst %,e2e/%,$(DISTROS)) 118 | 119 | .PHONY: publish 120 | publish: $(patsubst %,publish/%,$(DISTROS)) 121 | 122 | PACKAGE_NAME := github.com/maxgio92/$(app) 123 | GOLANG_CROSS_VERSION ?= v1.19.4 124 | 125 | .PHONY: release 126 | release: 127 | @if [ ! -f ".release-env" ]; then \ 128 | echo "\033[91m.release-env is required for release\033[0m";\ 129 | exit 1;\ 130 | fi 131 | docker run \ 132 | --rm \ 133 | -e CGO_ENABLED=1 \ 134 | --env-file .release-env \ 135 | -v /var/run/docker.sock:/var/run/docker.sock \ 136 | -v `pwd`:/go/src/$(PACKAGE_NAME) \ 137 | -w /go/src/$(PACKAGE_NAME) \ 138 | goreleaser/goreleaser-cross:${GOLANG_CROSS_VERSION} \ 139 | release --rm-dist 140 | 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Krawler: a kernel releases crawler ![logo](images/madagascar_small.png) 2 | 3 | [![Latest](https://img.shields.io/github/v/release/maxgio92/krawler)](https://github.com/maxgio92/krawler/releases/latest) 4 | [![CI](https://github.com/maxgio92/krawler/actions/workflows/ci.yaml/badge.svg)](https://github.com/maxgio92/krawler/actions/workflows/ci.yaml) 5 | [![Release](https://github.com/maxgio92/krawler/actions/workflows/release.yaml/badge.svg)](https://github.com/maxgio92/krawler/actions/workflows/release.yaml) 6 | [![Database update](https://github.com/maxgio92/krawler/actions/workflows/publish.yaml/badge.svg)](https://github.com/maxgio92/krawler/actions/workflows/publish.yaml) 7 | 8 | ![](images/debian.png) ![](images/ubuntu.png) ![](images/centos.png) ![](images/fedora.png) ![](images/oracle.png) ![](images/opensuse.png) ![](images/amazonlinux.png) ![](images/archlinux.png) 9 | 10 | A crawler for kernel releases distributed by the major Linux distributions. 11 | 12 | It supports, Amazon Linux v1, Amazon Linux v2, Amazon Linux 2022, Centos, Debian, Ubuntu, Fedora, Oracle Linux, OpenSUSE Linux, Arch Linux. 13 | 14 | The crawling data is continuously published and is available at [db.krawler.dev](https://db.krawler.dev). 15 | 16 | ## Usage 17 | 18 | ``` 19 | krawler [options] 20 | ``` 21 | 22 | ### Options 23 | - `-c, --config file`: (optional) the config file to customize the list of mirrors to scrape for kernel releases (by default it looks at *$HOME/.krawler.yaml*). 24 | - `-v, --verbosity level`: (optional) the verbosity level (*debug*, *info*, *warn*, *error*, *fatal*, *panic*). By (default *warning*). 25 | 26 | ### Commands 27 | 28 | #### `list`|`ls` 29 | 30 | List available kernel releases with distributed headers, by Linux distribution. 31 | It returns a list of `kernelRelease` objects. The output format can be specified by flag parameter. 32 | 33 | ``` 34 | krawler [options] list|ls [-o ] 35 | ``` 36 | 37 | #### Parameters 38 | 39 | `distribution`: (**required**) The Linux distribution for which the release has been pubished. 40 | Available distributions: 41 | 42 | - *amazonlinux* 43 | - *amazonlinux2* 44 | - *amazonlinux2022* 45 | - *amazonlinux2023* 46 | - *centos* 47 | - *debian* 48 | - *ubuntu* 49 | - *fedora* 50 | - *oracle* 51 | - *opensuse* 52 | - *archlinux* 53 | 54 | #### Options 55 | 56 | `-o, --output format`: (optional) the format of the output of the list of kernel releases (one of *text*, *json* or *yaml*). By default *yaml*. 57 | 58 | #### Output 59 | 60 | The `list`|`ls` command prints on standard ouput a is a list of kernel release objects of type [`KernelRelease`](https://github.com/maxgio92/krawler/blob/main/pkg/kernelrelease/kernelrelease.go#L16). 61 | 62 | An example of a `json` result entry: 63 | 64 | ``` 65 | { 66 | "full_version": "4.18.0", 67 | "version": 4, 68 | "patch_level": 18, 69 | "sublevel": 0, 70 | "extra_version": "331", 71 | "full_extra_version": "-331.el8.aarch64", 72 | "architecture": "aarch64", 73 | "package_name": "kernel-devel", 74 | "package_url": "https://mirrors.edge.kernel.org/centos/8-stream/BaseOS/aarch64/os/Packages/kernel-devel-4.18.0-331.el8.aarch64.rpm", 75 | "compiler_version": "80500" 76 | } 77 | ``` 78 | 79 | ## Getting started 80 | 81 | Let's imagine you want to list the available CentOS kernel releases, scraping default mirrors. You do it by running: 82 | 83 | ``` 84 | krawler ls centos 85 | ``` 86 | 87 | ## Configuration 88 | 89 | A configuration lets you configure parameters for the crawling, like the mirrors to scrape. 90 | 91 | The default configuration file path is `$HOME/.krawler.yaml`. You can specify a custom path with the `--config` option. 92 | 93 | When a configuration is not present, a default configurations for repositories are used (for example [this](https://github.com/maxgio92/krawler/blob/main/pkg/distro/centos/constants.go#L20) is the default for Centos). 94 | 95 | For a detailed overview see the [**reference**](docs/reference/CONFIG.md). 96 | 97 | Moreover, sample configurations are available [here](./config/samples). 98 | 99 | ## Roadmap 100 | 101 | - [ ] Provide GCC versions for all releases 102 | - [ ] Support new distributions 103 | 104 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /cmd/constants.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package cmd 18 | 19 | const ( 20 | ConfigDistrosRoot = "distros" 21 | RPMKernelHeadersPackageName = "kernel-devel" 22 | DebKernelHeadersPackageName = "linux-headers" 23 | ) 24 | -------------------------------------------------------------------------------- /cmd/list.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/utils" 20 | "github.com/maxgio92/krawler/pkg/distro" 21 | kr "github.com/maxgio92/krawler/pkg/kernelrelease" 22 | "github.com/maxgio92/krawler/pkg/packages" 23 | "github.com/spf13/cobra" 24 | v "github.com/spf13/viper" 25 | ) 26 | 27 | var ( 28 | // The output format flag value. 29 | outputFormat string 30 | 31 | // listCmd represents the list command. 32 | listCmd = &cobra.Command{ 33 | Use: "list", 34 | Aliases: []string{"ls"}, 35 | Short: "List available kernel releases with distributed headers, by Linux distribution", 36 | } 37 | ) 38 | 39 | func init() { 40 | rootCmd.AddCommand(listCmd) 41 | 42 | // Bind the output format flag. Default is text. 43 | listCmd.PersistentFlags().StringVarP(&outputFormat, "output", "o", "text", "Output format (text, json, yaml)") 44 | } 45 | 46 | func getKernelReleases(distro distro.Distro, packageName string) ([]kr.KernelRelease, error) { 47 | config, err := utils.GetDistroConfigAndVarsFromViper(v.GetViper()) 48 | if err != nil { 49 | return []kr.KernelRelease{}, err 50 | } 51 | 52 | // The searchOptions for searchOptions packages. 53 | searchOptions := packages.NewSearchOptions( 54 | packageName, 55 | config.Archs, 56 | nil, 57 | config.Output.Verbosity, 58 | "Total", 59 | ".config", 60 | ) 61 | 62 | err = distro.Configure(config) 63 | if err != nil { 64 | return []kr.KernelRelease{}, err 65 | } 66 | 67 | // Scrape mirrors for packeges by searchOptions. 68 | packages, err := distro.SearchPackages(*searchOptions) 69 | if err != nil { 70 | return []kr.KernelRelease{}, err 71 | } 72 | 73 | // Get kernel releases from kernel header packages. 74 | kernelReleases, err := kr.GetKernelReleasesFromPackages(packages, packageName) 75 | if err != nil { 76 | return []kr.KernelRelease{}, err 77 | } 78 | 79 | return kernelReleases, nil 80 | } 81 | -------------------------------------------------------------------------------- /cmd/list_amazonlinux1.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version v2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | v1 "github.com/maxgio92/krawler/pkg/distro/amazonlinux/v1" 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | // amazonLinuxCmd represents the centos command. 25 | var amazonLinuxCmd = &cobra.Command{ 26 | Use: "amazonlinux", 27 | Short: "List Amazon Linux 1 kernel releases", 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | kernelReleases, err := getKernelReleases(&v1.AmazonLinux{}, RPMKernelHeadersPackageName) 30 | cobra.CheckErr(err) 31 | 32 | if len(kernelReleases) > 0 { 33 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 34 | cobra.CheckErr(err) 35 | } else { 36 | //nolint:errcheck 37 | Output.WriteString("No releases found.\n") 38 | } 39 | 40 | return nil 41 | }, 42 | } 43 | 44 | func init() { 45 | listCmd.AddCommand(amazonLinuxCmd) 46 | } 47 | -------------------------------------------------------------------------------- /cmd/list_amazonlinux2.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version v2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | v2 "github.com/maxgio92/krawler/pkg/distro/amazonlinux/v2" 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | // amazonLinux2Cmd represents the centos command. 25 | var amazonLinux2Cmd = &cobra.Command{ 26 | Use: "amazonlinux2", 27 | Short: "List Amazon Linux 2 kernel releases", 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | kernelReleases, err := getKernelReleases(&v2.AmazonLinux{}, RPMKernelHeadersPackageName) 30 | cobra.CheckErr(err) 31 | 32 | if len(kernelReleases) > 0 { 33 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 34 | cobra.CheckErr(err) 35 | } else { 36 | //nolint:errcheck 37 | Output.WriteString("No releases found.\n") 38 | } 39 | 40 | return nil 41 | }, 42 | } 43 | 44 | func init() { 45 | listCmd.AddCommand(amazonLinux2Cmd) 46 | } 47 | -------------------------------------------------------------------------------- /cmd/list_amazonlinux2022.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version v2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | v2022 "github.com/maxgio92/krawler/pkg/distro/amazonlinux/v2022" 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | // amazonLinux2Cmd represents the centos command. 25 | var amazonLinux2022Cmd = &cobra.Command{ 26 | Use: "amazonlinux2022", 27 | Short: "List Amazon Linux 2022 kernel releases", 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | kernelReleases, err := getKernelReleases(&v2022.AmazonLinux{}, RPMKernelHeadersPackageName) 30 | cobra.CheckErr(err) 31 | 32 | if len(kernelReleases) > 0 { 33 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 34 | cobra.CheckErr(err) 35 | } else { 36 | //nolint:errcheck 37 | Output.WriteString("No releases found.\n") 38 | } 39 | 40 | return nil 41 | }, 42 | } 43 | 44 | func init() { 45 | listCmd.AddCommand(amazonLinux2022Cmd) 46 | } 47 | -------------------------------------------------------------------------------- /cmd/list_amazonlinux2023.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version v2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/spf13/cobra" 20 | 21 | "github.com/maxgio92/krawler/internal/format" 22 | v2023 "github.com/maxgio92/krawler/pkg/distro/amazonlinux/v2023" 23 | ) 24 | 25 | // amazonLinux2Cmd represents the centos command. 26 | var amazonLinux2023Cmd = &cobra.Command{ 27 | Use: "amazonlinux2023", 28 | Short: "List Amazon Linux 2023 kernel releases", 29 | RunE: func(cmd *cobra.Command, args []string) error { 30 | kernelReleases, err := getKernelReleases(&v2023.AmazonLinux{}, RPMKernelHeadersPackageName) 31 | cobra.CheckErr(err) 32 | 33 | if len(kernelReleases) > 0 { 34 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 35 | cobra.CheckErr(err) 36 | } else { 37 | //nolint:errcheck 38 | Output.WriteString("No releases found.\n") 39 | } 40 | 41 | return nil 42 | }, 43 | } 44 | 45 | func init() { 46 | listCmd.AddCommand(amazonLinux2023Cmd) 47 | } 48 | -------------------------------------------------------------------------------- /cmd/list_archlinux.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | /* 4 | Copyright © 2022 maxgio92 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package cmd 20 | 21 | import ( 22 | "github.com/maxgio92/krawler/internal/format" 23 | "github.com/maxgio92/krawler/pkg/distro/archlinux" 24 | "github.com/spf13/cobra" 25 | ) 26 | 27 | // fedoraCmd represents the fedora command. 28 | var archLinuxCmd = &cobra.Command{ 29 | Use: "archlinux", 30 | Short: "List Arch Linux kernel releases (current plus three months archive)", 31 | RunE: func(cmd *cobra.Command, args []string) error { 32 | kernelReleases, err := getKernelReleases(&archlinux.ArchLinux{}, "linux-headers") 33 | cobra.CheckErr(err) 34 | 35 | if len(kernelReleases) > 0 { 36 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 37 | cobra.CheckErr(err) 38 | } else { 39 | //nolint:errcheck 40 | Output.WriteString("No releases found.\n") 41 | } 42 | 43 | return nil 44 | }, 45 | } 46 | 47 | func init() { 48 | listCmd.AddCommand(archLinuxCmd) 49 | } 50 | -------------------------------------------------------------------------------- /cmd/list_centos.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | "github.com/maxgio92/krawler/pkg/distro/centos" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | // centosCmd represents the centos command. 26 | var centosCmd = &cobra.Command{ 27 | Use: "centos", 28 | Short: "List CentOS kernel releases", 29 | RunE: func(cmd *cobra.Command, args []string) error { 30 | kernelReleases, err := getKernelReleases(¢os.Centos{}, RPMKernelHeadersPackageName) 31 | cobra.CheckErr(err) 32 | 33 | if len(kernelReleases) > 0 { 34 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 35 | cobra.CheckErr(err) 36 | } else { 37 | //nolint:errcheck 38 | Output.WriteString("No releases found.\n") 39 | } 40 | 41 | return nil 42 | }, 43 | } 44 | 45 | func init() { 46 | listCmd.AddCommand(centosCmd) 47 | } 48 | -------------------------------------------------------------------------------- /cmd/list_debian.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | "github.com/maxgio92/krawler/pkg/distro/debian" 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | // debianCmd represents the debian command. 25 | var debianCmd = &cobra.Command{ 26 | Use: "debian", 27 | Short: "List Debian kernel releases", 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | kernelReleases, err := getKernelReleases(&debian.Debian{}, DebKernelHeadersPackageName) 30 | cobra.CheckErr(err) 31 | 32 | if len(kernelReleases) > 0 { 33 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 34 | cobra.CheckErr(err) 35 | } else { 36 | //nolint:errcheck 37 | Output.WriteString("No releases found.\n") 38 | } 39 | 40 | return nil 41 | }, 42 | } 43 | 44 | func init() { 45 | listCmd.AddCommand(debianCmd) 46 | } 47 | -------------------------------------------------------------------------------- /cmd/list_fedora.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package cmd 18 | 19 | import ( 20 | "github.com/maxgio92/krawler/internal/format" 21 | "github.com/maxgio92/krawler/pkg/distro/fedora" 22 | 23 | "github.com/spf13/cobra" 24 | ) 25 | 26 | // fedoraCmd represents the fedora command. 27 | var fedoraCmd = &cobra.Command{ 28 | Use: "fedora", 29 | Short: "List Fedora kernel releases", 30 | RunE: func(cmd *cobra.Command, args []string) error { 31 | kernelReleases, err := getKernelReleases(&fedora.Fedora{}, RPMKernelHeadersPackageName) 32 | cobra.CheckErr(err) 33 | 34 | if len(kernelReleases) > 0 { 35 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 36 | cobra.CheckErr(err) 37 | } else { 38 | //nolint:errcheck 39 | Output.WriteString("No releases found.\n") 40 | } 41 | 42 | return nil 43 | }, 44 | } 45 | 46 | func init() { 47 | listCmd.AddCommand(fedoraCmd) 48 | } 49 | -------------------------------------------------------------------------------- /cmd/list_opensuse.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package cmd 18 | 19 | import ( 20 | "github.com/maxgio92/krawler/internal/format" 21 | "github.com/maxgio92/krawler/pkg/distro/opensuse" 22 | 23 | "github.com/spf13/cobra" 24 | ) 25 | 26 | // openSuseCmd represents the openSUSE command. 27 | var openSuseCmd = &cobra.Command{ 28 | Use: "opensuse", 29 | Short: "List OpenSUSE kernel releases", 30 | RunE: func(cmd *cobra.Command, args []string) error { 31 | cmd.Flags() 32 | kernelReleases, err := getKernelReleases(&opensuse.OpenSuse{}, "kernel-default-devel") 33 | cobra.CheckErr(err) 34 | 35 | if len(kernelReleases) > 0 { 36 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 37 | cobra.CheckErr(err) 38 | } else { 39 | //nolint:errcheck 40 | Output.WriteString("No releases found.\n") 41 | } 42 | 43 | return nil 44 | }, 45 | } 46 | 47 | func init() { 48 | listCmd.AddCommand(openSuseCmd) 49 | } 50 | -------------------------------------------------------------------------------- /cmd/list_oracle.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | "github.com/maxgio92/krawler/pkg/distro/oracle" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | // oracleCmd represents the oracle command. 26 | var oracleCmd = &cobra.Command{ 27 | Use: "oracle", 28 | Short: "List Oracle Linux kernel releases", 29 | RunE: func(cmd *cobra.Command, args []string) error { 30 | kernelReleases, err := getKernelReleases(&oracle.Oracle{}, RPMKernelHeadersPackageName) 31 | cobra.CheckErr(err) 32 | 33 | if len(kernelReleases) > 0 { 34 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 35 | cobra.CheckErr(err) 36 | } else { 37 | //nolint:errcheck 38 | Output.WriteString("No releases found.\n") 39 | } 40 | 41 | return nil 42 | }, 43 | } 44 | 45 | func init() { 46 | listCmd.AddCommand(oracleCmd) 47 | } 48 | -------------------------------------------------------------------------------- /cmd/list_ubuntu.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "github.com/maxgio92/krawler/internal/format" 20 | "github.com/maxgio92/krawler/pkg/distro/ubuntu" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | // ubuntuCmd represents the ubuntu command. 26 | var ubuntuCmd = &cobra.Command{ 27 | Use: "ubuntu", 28 | Short: "List Ubuntu kernel releases", 29 | RunE: func(cmd *cobra.Command, args []string) error { 30 | kernelReleases, err := getKernelReleases(&ubuntu.Ubuntu{}, DebKernelHeadersPackageName) 31 | cobra.CheckErr(err) 32 | 33 | if len(kernelReleases) > 0 { 34 | Output, err = format.Encode(Output, kernelReleases, format.Type(outputFormat)) 35 | cobra.CheckErr(err) 36 | } else { 37 | //nolint:errcheck 38 | Output.WriteString("No releases found.\n") 39 | } 40 | 41 | return nil 42 | }, 43 | } 44 | 45 | func init() { 46 | listCmd.AddCommand(ubuntuCmd) 47 | } 48 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 me@maxgio.it 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "bufio" 20 | "fmt" 21 | "io" 22 | "os" 23 | 24 | "github.com/sirupsen/logrus" 25 | "github.com/spf13/cobra" 26 | "github.com/spf13/viper" 27 | ) 28 | 29 | var ( 30 | // The config file flag value. 31 | cfgFile string 32 | 33 | // The commands output buffer. 34 | Output = bufio.NewWriter(os.Stdout) 35 | 36 | // The verbose flag value. 37 | verbosity string 38 | 39 | // rootCmd represents the base command when called without any subcommands. 40 | rootCmd = &cobra.Command{ 41 | Use: "krawler", 42 | Short: "A brief description of your application", 43 | Long: `A longer description that spans multiple lines and likely contains 44 | examples and usage of using your application. For example: 45 | 46 | Cobra is a CLI library for Go that empowers applications. 47 | This application is a tool to generate the needed files 48 | to quickly create a Cobra application.`, 49 | PersistentPostRunE: func(cmd *cobra.Command, args []string) error { 50 | if err := Output.Flush(); err != nil { 51 | return fmt.Errorf("cannot flush output: %w", err) 52 | } 53 | 54 | return nil 55 | }, 56 | } 57 | ) 58 | 59 | // Execute adds all child commands to the root command and sets flags appropriately. 60 | // This is called by main.main(). It only needs to happen once to the rootCmd. 61 | func Execute() { 62 | if err := rootCmd.Execute(); err != nil { 63 | fmt.Fprintln(os.Stderr, err) 64 | os.Exit(1) 65 | } 66 | } 67 | 68 | func init() { 69 | cobra.OnInitialize(initConfig) 70 | 71 | // Here is where we define the PreRun func, using the verbose flag value. 72 | // We use the standard output for logs. 73 | rootCmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { 74 | if err := initLogs(os.Stdout, verbosity); err != nil { 75 | return err 76 | } 77 | 78 | return nil 79 | } 80 | 81 | // Bind the config file flag. Default value is $HOME/.krawler.yaml. 82 | rootCmd.PersistentFlags().StringVarP(&cfgFile, "config", "c", "", "config file (default is $HOME/.krawler.yaml)") 83 | 84 | // Bind the verbose flag. Default value is the warn level. 85 | rootCmd.PersistentFlags().StringVarP(&verbosity, "verbosity", "v", logrus.WarnLevel.String(), "Log level (debug, info, warn, error, fatal, panic)") 86 | } 87 | 88 | // initConfig reads in config file and ENV variables if set. 89 | func initConfig() { 90 | if cfgFile != "" { 91 | // Use config file from the flag. 92 | viper.SetConfigFile(cfgFile) 93 | } else { 94 | // Find home directory. 95 | home, err := os.UserHomeDir() 96 | cobra.CheckErr(err) 97 | 98 | // Search config in home directory with name ".krawler" (without extension). 99 | viper.AddConfigPath(home) 100 | viper.SetConfigType("yaml") 101 | viper.SetConfigName(".krawler") 102 | } 103 | 104 | viper.AutomaticEnv() 105 | 106 | if err := viper.ReadInConfig(); err != nil { 107 | //nolint:errorlint 108 | if _, ok := err.(viper.ConfigFileNotFoundError); !ok { 109 | panic(fmt.Errorf("fatal error config file: %w", err)) 110 | } 111 | } 112 | } 113 | 114 | // setUpLogs set the log output ans the log level. 115 | func initLogs(out io.Writer, level string) error { 116 | logrus.SetOutput(out) 117 | 118 | lvl, err := logrus.ParseLevel(level) 119 | if err != nil { 120 | return err 121 | } 122 | 123 | logrus.SetLevel(lvl) 124 | 125 | return nil 126 | } 127 | -------------------------------------------------------------------------------- /cmd/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | var version = "Krawler CLI v0.2.0" 25 | 26 | // versionCmd represents the version command. 27 | var versionCmd = &cobra.Command{ 28 | Use: "version", 29 | Short: "Print the version of Krawler CLI", 30 | Run: func(cmd *cobra.Command, args []string) { 31 | //nolint:forbidigo 32 | fmt.Println(version) 33 | }, 34 | } 35 | 36 | func init() { 37 | rootCmd.AddCommand(versionCmd) 38 | } 39 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | krawler.dev 2 | -------------------------------------------------------------------------------- /docs/getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | Let's imagine you want to list the available CentOS kernel releases, scraping default mirrors. You do it by running: 4 | 5 | ``` 6 | krawler ls centos -o yaml 7 | ``` 8 | 9 | ## Configuration 10 | 11 | A configuration lets you configure parameters for the crawling, like the mirrors to scrape. 12 | 13 | The default configuration file path is `$HOME/.krawler.yaml`. You can specify a custom path with the `--config` option. 14 | 15 | When a configuration is not present, the [default configurations](https://github.com/maxgio92/krawler/tree/main/pkg/scrape/defaults.go) for repositories are used. 16 | 17 | For a detailed overview see the [**reference**](/reference). 18 | 19 | Moreover, sample configurations are available [here](https://github.com/maxgio92/krawler/tree/main/testdata). 20 | 21 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Krawler: a kernel releases crawler 2 | 3 | A crawler for kernel releases distributed by the major Linux distributions. 4 | 5 | It supports, Amazon Linux v1, Amazon Linux v2, Amazon Linux 2022, Centos, Debian, Ubuntu, Fedora, Oracle Linux, OpenSUSE Linux, Arch Linux. 6 | 7 | The crawling data is continuously published and is available at [db.krawler.dev](https://db.krawler.dev). 8 | 9 | -------------------------------------------------------------------------------- /docs/reference/cli.md: -------------------------------------------------------------------------------- 1 | # CLI reference 2 | 3 | ``` 4 | krawler [options] 5 | ``` 6 | 7 | ## Options 8 | - `-c, --config file`: (optional) the config file to customize the list of mirrors to scrape for kernel releases (by default it looks at *$HOME/.krawler.yaml*). 9 | - `-v, --verbosity level`: (optional) the verbosity level (*debug*, *info*, *warn*, *error*, *fatal*, *panic*). By (default *warning*). 10 | 11 | ## Commands 12 | 13 | ### `list`|`ls` 14 | 15 | List available kernel releases with distributed headers, by Linux distribution. 16 | It returns a list of `kernelRelease` objects. The output format can be specified by flag parameter. 17 | 18 | ``` 19 | krawler [options] list|ls [-o ] 20 | ``` 21 | 22 | ### Parameters 23 | `distribution`: (**required**) The Linux distribution for which the release has been pubished. 24 | Available distributions: 25 | 26 | - amazonlinux 27 | - amazonlinux2 28 | - amazonlinux2022 29 | - amazonlinux2023 30 | - centos 31 | - debian 32 | - ubuntu 33 | - fedora 34 | - oracle 35 | - opensuse 36 | 37 | ### Options 38 | `-o, --output format`: (optional) the format of the output of the list of kernel releases (one of *text*, *json* or *yaml*). By default *yaml*. 39 | 40 | ### Output 41 | 42 | The `list`|`ls` command prints on standard ouput a is a list of kernel release objects of type [`KernelRelease`](https://github.com/maxgio92/krawler/blob/main/pkg/kernelrelease/kernelrelease.go#L16). 43 | 44 | An example of a `yaml`-formatted result entry: 45 | 46 | ```yml 47 | https://github.com/maxgio92/krawler/blob/main/pkg/kernelrelease/kernelrelease.go#L16 48 | fullversion: 4.18.0 49 | version: 4 50 | patchlevel: 18 51 | sublevel: 0 52 | extraversion: "326" 53 | fullextraversion: -326.el8.x86_64 54 | architecture: x86_64 55 | packagename: kernel-devel 56 | packageurl: https://mirrors.edge.kernel.org/centos/8-stream/BaseOS/x86_64/os/Packages/kernel-devel-4.18.0-326.el8.x86_64.rpm 57 | compilerversion: "80500" 58 | ``` 59 | -------------------------------------------------------------------------------- /docs/reference/config.md: -------------------------------------------------------------------------------- 1 | # Configuration reference 2 | 3 | ## Languages 4 | 5 | Configuration can be expressed in: 6 | 7 | - `json` 8 | - `yaml` 9 | 10 | ## The structure 11 | 12 | ```yaml 13 | distros: 14 | : 15 | versions: [""] 16 | archs: [""] 17 | mirrors: [{name: "", url: ""}] 18 | repositories: [{name: "", uri: ""}] 19 | vars: [] 20 | output: 21 | verbosity: [0-6] 22 | ``` 23 | 24 | > All `versions`, `archs`, `mirrors` are optional fields of the distro configuration. 25 | 26 | ### Distros 27 | 28 | `distros` is a map of well-known supported distro structures. 29 | 30 | #### Supported distros 31 | 32 | As of now, the supported Linux distributions are: 33 | - *amazonlinux1* 34 | - *amazonlinux2* 35 | - *amazonlinux2022* 36 | - *amazonlinux2023* 37 | - *centos* 38 | - *debian* 39 | - *ubuntu* 40 | - *fedora* 41 | - *oracle* 42 | - *opensuse* 43 | - *archlinux* 44 | 45 | `distro` structure is a map of `versions`, `archs`, `mirrors`, `repositories`. 46 | 47 | ##### Example 48 | 49 | ``` 50 | distros: 51 | centos: 52 | versions: [] 53 | archs: [] 54 | mirrors: [] 55 | repositories: [] 56 | vars: [] 57 | ``` 58 | 59 | ### Distro.Versions 60 | 61 | `versions` is an array of well-known distribution versions, as named under package repository trees (e.g. [*8-stream*](http://mirrors.edge.kernel.org/centos/8-stream/)). 62 | 63 | ### Distro.Archs 64 | 65 | `archs` is an array of supported architecture IDs. 66 | 67 | The name follows the one provided by package repository trees. For example *x86_64*/*amd64*, *aarch86*, *ppc64le*. 68 | 69 | > If omitted, **all supported** CPU architectures are selected. 70 | 71 | ### Distro.Mirrors 72 | 73 | `mirrors` is an array of `mirror` structure, which is a map of: 74 | - `name` (optional) 75 | - `url` 76 | 77 | `name` is a string label for the name of the mirror (e.g. [*Edge*](http://mirrors.edge.kernel.org)). Please note that this is a label, the value does not have side effects in the crawling flow. 78 | 79 | `url` is the root URL of the mirror (e.g. *https://mirrors.kernel.org/centos*). 80 | 81 | ##### Example 82 | 83 | ``` 84 | centos: 85 | mirrors: 86 | - url: https://mirrors.kernel.org/centos 87 | ``` 88 | 89 | ### Distro.Repositories 90 | 91 | `repositories` is an array of `repository` structure, which in turn is a map of: 92 | - `name` (optional) 93 | - `uri` 94 | 95 | `name` is a string label for the name of the repository (e.g. [*AppStream*](http://mirrors.edge.kernel.org/centos/8-stream/AppStream/) for Centos). Please note that this is a label, the value does not have side effects in the crawling flow. 96 | 97 | `uri` is a string that contains the uri path to the repository root folder, starting from the root URL of the mirror. Note that the uri format should start with a "/". 98 | 99 | ##### Example 100 | 101 | ``` 102 | centos: 103 | repositories: 104 | - name: AppStream 105 | uri: /AppStream/x86_64/os/ 106 | ``` 107 | 108 | ### Repositories Templating 109 | 110 | `uri` field supports templates in the Go template format for annotations that refer to elements of the related distro's data structure (e.g. `distros.centos`). These elements can be both system-declared and user-declared data structures. 111 | 112 | ##### Supported data types 113 | 114 | The supported element types are: 115 | 116 | - array of strings 117 | 118 | #### System declared variables 119 | 120 | - `Distro.Archs` 121 | - `Distro.Versions` 122 | 123 | #### Distro.Vars: User declared variables 124 | 125 | You can define your declared variables in `Distro.Vars` structure, which is expected at `distros..vars` path. 126 | 127 | **Example** 128 | 129 | For example, to configure both old and new Centos repositories, given both the archive and current kernel.org mirrors, you can template the repository URLs like below: 130 | 131 | ```yaml 132 | distros: 133 | centos: 134 | archs: ["aarch64", "x86_64"] 135 | mirrors: 136 | - name: archive 137 | url: https://archive.kernel.org/centos-vault/ 138 | - name: edge 139 | url: https://mirrors.edge.kernel.org/centos/ 140 | repositories: 141 | - name: old 142 | uri: "/{{ .old_repos }}/{{ .archs }}/" 143 | - name: new 144 | uri: "/{{ .new_repos }}/{{ .archs }}/os/" 145 | vars: 146 | new_repos: ["BaseOS", "AppStream"] 147 | old_repos: ["os", "updates"] 148 | ``` 149 | 150 | As you can see both system-declared (e.g. `archs`) and user-declared (e.g. `new_repos`) data structure can be referenced in the template string. 151 | 152 | ### Output 153 | 154 | `output` is a map of settings for visual output of the commands: 155 | - `verbosity` 156 | 157 | #### Verbosity 158 | 159 | `verbosity` allows to set the verbosity of the visual output of the commands, through a decimal number from 0 to 6. 160 | 161 | It can be set either globally (as [above](#the-structure)), and per `distro`. For example: 162 | 163 | ``` 164 | distros: 165 | ubuntu: 166 | mirrors: 167 | - url: "https://mirrors.edge.kernel.org/ubuntu" 168 | name: Edge 169 | - url: "http://security.ubuntu.com/ubuntu" 170 | name: Security 171 | output: 172 | verbosity: 6 173 | ``` 174 | 175 | -------------------------------------------------------------------------------- /docs/reference/index.md: -------------------------------------------------------------------------------- 1 | - [CLI reference](/reference/cli) 2 | - [Configuration reference](/reference/config) 3 | -------------------------------------------------------------------------------- /docs/roadmap.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | - Provide GCC versions 4 | - Support new distributions 5 | 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/maxgio92/krawler 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/Jguer/go-alpm/v2 v2.2.2 7 | github.com/antchfx/xmlquery v1.3.9 8 | github.com/gocolly/colly v1.2.0 9 | github.com/olekukonko/tablewriter v0.0.6-0.20210304033056-74c60be0ef68 10 | github.com/pkg/errors v0.9.1 11 | github.com/sassoftware/go-rpmutils v0.2.0 12 | github.com/schollz/progressbar/v3 v3.13.0 13 | github.com/sirupsen/logrus v1.8.1 14 | github.com/spf13/afero v1.8.2 15 | github.com/spf13/cobra v1.6.1 16 | github.com/spf13/viper v1.11.0 17 | github.com/stretchr/testify v1.8.4 18 | golang.org/x/exp v0.0.0-20230118134722-a68e582fa157 19 | gopkg.in/yaml.v2 v2.4.0 20 | gotest.tools v2.2.0+incompatible 21 | pault.ag/go/archive v0.0.0-20200912011324-7149510a39c7 22 | pault.ag/go/debian v0.12.0 23 | ) 24 | 25 | require ( 26 | github.com/DataDog/zstd v1.4.8 // indirect 27 | github.com/PuerkitoBio/goquery v1.8.0 // indirect 28 | github.com/andybalholm/cascadia v1.3.1 // indirect 29 | github.com/antchfx/htmlquery v1.2.4 // indirect 30 | github.com/antchfx/xpath v1.2.0 // indirect 31 | github.com/davecgh/go-spew v1.1.1 // indirect 32 | github.com/fsnotify/fsnotify v1.5.1 // indirect 33 | github.com/gobwas/glob v0.2.3 // indirect 34 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 35 | github.com/golang/protobuf v1.5.2 // indirect 36 | github.com/google/go-cmp v0.5.9 // indirect 37 | github.com/hashicorp/hcl v1.0.0 // indirect 38 | github.com/inconshreveable/mousetrap v1.0.1 // indirect 39 | github.com/kennygrant/sanitize v1.2.4 // indirect 40 | github.com/kjk/lzma v0.0.0-20161016003348-3fd93898850d // indirect 41 | github.com/klauspost/compress v1.11.7 // indirect 42 | github.com/kr/pretty v0.3.0 // indirect 43 | github.com/magiconair/properties v1.8.6 // indirect 44 | github.com/mattn/go-runewidth v0.0.14 // indirect 45 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect 46 | github.com/mitchellh/mapstructure v1.4.3 // indirect 47 | github.com/pelletier/go-toml v1.9.4 // indirect 48 | github.com/pelletier/go-toml/v2 v2.0.0-beta.8 // indirect 49 | github.com/pmezard/go-difflib v1.0.0 // indirect 50 | github.com/rivo/uniseg v0.4.3 // indirect 51 | github.com/rogpeppe/go-internal v1.8.0 // indirect 52 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 53 | github.com/spf13/cast v1.4.1 // indirect 54 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 55 | github.com/spf13/pflag v1.0.5 // indirect 56 | github.com/subosito/gotenv v1.2.0 // indirect 57 | github.com/temoto/robotstxt v1.1.2 // indirect 58 | github.com/ulikunitz/xz v0.5.9 // indirect 59 | github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect 60 | golang.org/x/crypto v0.1.0 // indirect 61 | golang.org/x/net v0.7.0 // indirect 62 | golang.org/x/sys v0.5.0 // indirect 63 | golang.org/x/term v0.5.0 // indirect 64 | golang.org/x/text v0.7.0 // indirect 65 | google.golang.org/appengine v1.6.7 // indirect 66 | google.golang.org/protobuf v1.28.1 // indirect 67 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 68 | gopkg.in/ini.v1 v1.66.4 // indirect 69 | gopkg.in/yaml.v3 v3.0.1 // indirect 70 | pault.ag/go/blobstore v0.0.0-20180314122834-d6d187c5a029 // indirect 71 | pault.ag/go/topsort v0.0.0-20160530003732-f98d2ad46e1a // indirect 72 | ) 73 | -------------------------------------------------------------------------------- /images/amazonlinux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/amazonlinux.png -------------------------------------------------------------------------------- /images/archlinux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/archlinux.png -------------------------------------------------------------------------------- /images/centos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/centos.png -------------------------------------------------------------------------------- /images/debian.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/debian.png -------------------------------------------------------------------------------- /images/fedora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/fedora.png -------------------------------------------------------------------------------- /images/madagascar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/madagascar.png -------------------------------------------------------------------------------- /images/madagascar_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/madagascar_small.png -------------------------------------------------------------------------------- /images/opensuse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/opensuse.png -------------------------------------------------------------------------------- /images/oracle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/oracle.png -------------------------------------------------------------------------------- /images/ubuntu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxgio92/krawler/d9e9a370760ac8290625cc3fed3c4dd94bc852f4/images/ubuntu.png -------------------------------------------------------------------------------- /internal/format/format.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | 7 | "github.com/olekukonko/tablewriter" 8 | "gopkg.in/yaml.v2" 9 | ) 10 | 11 | type Type string 12 | 13 | const ( 14 | Text Type = "text" 15 | JSON Type = "json" 16 | YAML Type = "yaml" 17 | ) 18 | 19 | func Encode(output *bufio.Writer, objects interface{}, format Type) (*bufio.Writer, error) { 20 | switch format { 21 | case JSON: 22 | return encodeJSON(output, objects) 23 | case Text: 24 | return encodeText(output, objects) 25 | case YAML: 26 | return encodeYAML(output, objects) 27 | default: 28 | return encodeText(output, objects) 29 | } 30 | } 31 | 32 | func encodeJSON(output *bufio.Writer, objects interface{}) (*bufio.Writer, error) { 33 | json, err := json.Marshal(objects) 34 | if err != nil { 35 | return nil, err 36 | } 37 | 38 | _, err = output.Write(json) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | return output, nil 44 | } 45 | 46 | func encodeYAML(output *bufio.Writer, objects interface{}) (*bufio.Writer, error) { 47 | yaml, err := yaml.Marshal(objects) 48 | if err != nil { 49 | return nil, err 50 | } 51 | 52 | _, err = output.Write(yaml) 53 | if err != nil { 54 | return nil, err 55 | } 56 | 57 | return output, nil 58 | } 59 | 60 | func encodeText(output *bufio.Writer, objects interface{}) (*bufio.Writer, error) { 61 | return encodeTableFromStructs(output, objects) 62 | } 63 | 64 | func encodeTableFromStructs(output *bufio.Writer, objects interface{}) (*bufio.Writer, error) { 65 | printer := tablewriter.NewWriter(output) 66 | 67 | if err := printer.SetStructs(objects); err != nil { 68 | return nil, err 69 | } 70 | 71 | printer.Render() 72 | 73 | return output, nil 74 | } 75 | -------------------------------------------------------------------------------- /internal/utils/config.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | v "github.com/spf13/viper" 5 | 6 | d "github.com/maxgio92/krawler/pkg/distro" 7 | ) 8 | 9 | //nolint:cyclop,funlen,gocognit 10 | func GetDistroConfigAndVarsFromViper(viper *v.Viper) (d.Config, error) { 11 | // The distro configuration. 12 | config := d.Config{} 13 | 14 | // The distro all settings from Viper 15 | var allsettings map[string]interface{} 16 | 17 | // The distro config variables from Viper 18 | var varsSettings map[string]interface{} 19 | 20 | if output := viper.Sub("output"); output != nil { 21 | if err := output.Unmarshal(&config.Output); err != nil { 22 | return d.Config{}, err 23 | } 24 | } 25 | 26 | //nolint:nestif 27 | if distros := viper.Sub("distros"); distros != nil { 28 | if centos := distros.Sub(d.CentosType); centos != nil { 29 | if err := centos.Unmarshal(&config); err != nil { 30 | return d.Config{}, err 31 | } 32 | 33 | allsettings = centos.AllSettings() 34 | } 35 | 36 | if amazonLinuxV1 := distros.Sub(d.AmazonLinuxV1Type); amazonLinuxV1 != nil { 37 | if err := amazonLinuxV1.Unmarshal(&config); err != nil { 38 | return d.Config{}, err 39 | } 40 | 41 | allsettings = amazonLinuxV1.AllSettings() 42 | } 43 | 44 | if amazonLinuxV2 := distros.Sub(d.AmazonLinuxV2Type); amazonLinuxV2 != nil { 45 | if err := amazonLinuxV2.Unmarshal(&config); err != nil { 46 | return d.Config{}, err 47 | } 48 | 49 | allsettings = amazonLinuxV2.AllSettings() 50 | } 51 | 52 | if amazonLinuxV2022 := distros.Sub(d.AmazonLinuxV2022Type); amazonLinuxV2022 != nil { 53 | if err := amazonLinuxV2022.Unmarshal(&config); err != nil { 54 | return d.Config{}, err 55 | } 56 | 57 | allsettings = amazonLinuxV2022.AllSettings() 58 | } 59 | 60 | if amazonLinuxV2023 := distros.Sub(d.AmazonLinuxV2023Type); amazonLinuxV2023 != nil { 61 | if err := amazonLinuxV2023.Unmarshal(&config); err != nil { 62 | return d.Config{}, err 63 | } 64 | 65 | allsettings = amazonLinuxV2023.AllSettings() 66 | } 67 | 68 | if debian := distros.Sub(d.DebianType); debian != nil { 69 | if err := debian.Unmarshal(&config); err != nil { 70 | return d.Config{}, err 71 | } 72 | 73 | allsettings = debian.AllSettings() 74 | } 75 | 76 | if ubuntu := distros.Sub(d.UbuntuType); ubuntu != nil { 77 | if err := ubuntu.Unmarshal(&config); err != nil { 78 | return d.Config{}, err 79 | } 80 | 81 | allsettings = ubuntu.AllSettings() 82 | } 83 | 84 | if fedora := distros.Sub(d.FedoraType); fedora != nil { 85 | if err := fedora.Unmarshal(&config); err != nil { 86 | return d.Config{}, err 87 | } 88 | 89 | allsettings = fedora.AllSettings() 90 | } 91 | 92 | if oracle := distros.Sub(d.OracleType); oracle != nil { 93 | if err := oracle.Unmarshal(&config); err != nil { 94 | return d.Config{}, err 95 | } 96 | 97 | allsettings = oracle.AllSettings() 98 | } 99 | 100 | if archlinux := distros.Sub(d.ArchLinuxType); archlinux != nil { 101 | if err := archlinux.Unmarshal(&config); err != nil { 102 | return d.Config{}, err 103 | } 104 | 105 | allsettings = archlinux.AllSettings() 106 | } 107 | } 108 | 109 | if _, ok := allsettings["vars"].(map[string]interface{}); ok { 110 | //nolint:forcetypeassert 111 | varsSettings = allsettings["vars"].(map[string]interface{}) 112 | } 113 | 114 | vars := MergeMapsAndDeleteKeys(allsettings, varsSettings, "vars", "mirrors", "repositories") 115 | 116 | err := config.BuildTemplates(vars) 117 | if err != nil { 118 | return d.Config{}, err 119 | } 120 | 121 | return config, nil 122 | } 123 | -------------------------------------------------------------------------------- /internal/utils/maps.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | func MergeMapsAndDeleteKeys(m1 map[string]interface{}, m2 map[string]interface{}, keysToDelete ...string) map[string]interface{} { 4 | result := MergeMaps(m1, m2) 5 | 6 | for _, k := range keysToDelete { 7 | delete(result, k) 8 | } 9 | 10 | return result 11 | } 12 | 13 | func MergeMaps(m1 map[string]interface{}, m2 map[string]interface{}) map[string]interface{} { 14 | result := make(map[string]interface{}) 15 | 16 | for k, v := range m1 { 17 | result[k] = v 18 | } 19 | 20 | for k, v := range m2 { 21 | result[k] = v 22 | } 23 | 24 | return result 25 | } 26 | -------------------------------------------------------------------------------- /internal/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | func Unique(ss []string) []string { 4 | keys := make(map[string]bool) 5 | 6 | unique := []string{} 7 | 8 | for _, v := range ss { 9 | if ok := keys[v]; !ok { 10 | keys[v] = true 11 | 12 | unique = append(unique, v) 13 | } 14 | } 15 | 16 | return unique 17 | } 18 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 me@maxgio.it 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package main 17 | 18 | import "github.com/maxgio92/krawler/cmd" 19 | 20 | func main() { 21 | cmd.Execute() 22 | } 23 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Krawler 2 | repo_name: maxgio92/krawler 3 | repo_url: https://github.com/maxgio92/krawler 4 | site_url: https://krawler.dev/ 5 | docs_dir: docs 6 | site_dir: site 7 | 8 | theme: 9 | name: material 10 | features: 11 | - navigation.tabs 12 | - navigation.tabs.sticky 13 | - navigation.instant 14 | - navigation.sections 15 | include_sidebar: true 16 | palette: 17 | primary: 'blue grey' 18 | 19 | font: 20 | text: 'Ubuntu' 21 | code: 'Ubuntu Mono' 22 | 23 | # Generate navigation bar 24 | nav: 25 | - 'Introduction': index.md 26 | - 'Getting started': getting-started.md 27 | - 'Reference': 28 | - reference/index.md 29 | - reference/cli.md 30 | - reference/config.md 31 | - 'Roadmap': roadmap.md 32 | - 'Database': https://db.krawler.dev 33 | 34 | extra: 35 | generator: false 36 | 37 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/amazonlinux.go: -------------------------------------------------------------------------------- 1 | package amazonlinux 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "net/http" 7 | "net/url" 8 | "strings" 9 | 10 | "github.com/maxgio92/krawler/pkg/distro" 11 | "github.com/maxgio92/krawler/pkg/output" 12 | p "github.com/maxgio92/krawler/pkg/packages" 13 | "github.com/maxgio92/krawler/pkg/packages/rpm" 14 | "github.com/maxgio92/krawler/pkg/scrape" 15 | ) 16 | 17 | type AmazonLinux struct { 18 | Config distro.Config 19 | } 20 | 21 | func (a *AmazonLinux) ConfigureCommon(def distro.Config, config distro.Config) error { 22 | c, err := mergeAndSanitizeConfig(def, config) 23 | if err != nil { 24 | return err 25 | } 26 | 27 | a.Config = c 28 | 29 | return nil 30 | } 31 | 32 | // BuildMirrorURLs returns the list of version-specific mirror URLs. 33 | func (a *AmazonLinux) BuildMirrorURLs(mirrors []p.Mirror, versions []distro.Version) ([]*url.URL, error) { 34 | versions, err := a.buildVersions(mirrors, versions) 35 | if err != nil { 36 | return []*url.URL{}, err 37 | } 38 | 39 | if (len(versions) > 0) && (len(mirrors) > 0) { 40 | var versionRoots []*url.URL 41 | 42 | for _, mirror := range mirrors { 43 | for _, version := range versions { 44 | versionRoot, err := url.Parse(mirror.URL + string(version)) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | versionRoots = append(versionRoots, versionRoot) 50 | } 51 | } 52 | 53 | return versionRoots, nil 54 | } 55 | 56 | return nil, distro.ErrNoDistroVersionSpecified 57 | } 58 | 59 | // BuildRepositoryURLs returns the list of repositories URLs. 60 | func BuildRepositoryURLs(roots []*url.URL, repositories []p.Repository) ([]*url.URL, error) { 61 | var urls []*url.URL 62 | 63 | for _, root := range roots { 64 | for _, r := range repositories { 65 | us, err := url.JoinPath(root.String(), string(r.URI)) 66 | if err != nil { 67 | return nil, err 68 | } 69 | 70 | repositoryURL, err := url.Parse(us) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | urls = append(urls, repositoryURL) 76 | } 77 | } 78 | 79 | return urls, nil 80 | } 81 | 82 | // buildVersions returns a list of distro versions, considering the user-provided configuration, 83 | // and if not, the ones available on configured mirrors. 84 | func (a *AmazonLinux) buildVersions(mirrors []p.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { 85 | if staticVersions != nil { 86 | return staticVersions, nil 87 | } 88 | 89 | var dynamicVersions []distro.Version 90 | 91 | dynamicVersions, err := a.crawlVersions(mirrors) 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | return dynamicVersions, nil 97 | } 98 | 99 | // crawlVersions returns the list of the current available distro versions, by scraping 100 | // the specified mirrors, dynamically. 101 | func (a *AmazonLinux) crawlVersions(mirrors []p.Mirror) ([]distro.Version, error) { 102 | versions := []distro.Version{} 103 | 104 | seedUrls := make([]*url.URL, 0, len(mirrors)) 105 | 106 | for _, mirror := range mirrors { 107 | u, err := url.Parse(mirror.URL) 108 | if err != nil { 109 | return []distro.Version{}, err 110 | } 111 | 112 | seedUrls = append(seedUrls, u) 113 | } 114 | 115 | folderNames, err := scrape.CrawlFolders( 116 | seedUrls, 117 | MirrorsDistroVersionRegex, 118 | true, 119 | a.Config.Output.Verbosity >= output.DebugLevel, 120 | ) 121 | if err != nil { 122 | return []distro.Version{}, err 123 | } 124 | 125 | for _, v := range folderNames { 126 | versions = append(versions, distro.Version(v)) 127 | } 128 | 129 | return versions, nil 130 | } 131 | 132 | // SearchPackages scrapes each mirror, for each distro version, for each repository, 133 | // for each architecture, and returns slice of Package and optionally an error. 134 | func (a *AmazonLinux) SearchPackages(options p.SearchOptions) ([]p.Package, error) { 135 | a.Config.Output.Logger = options.Log() 136 | 137 | // Build distribution version-specific mirror root URLs. 138 | perVersionMirrorURLs, err := a.BuildMirrorURLs(a.Config.Mirrors, a.Config.Versions) 139 | if err != nil { 140 | return nil, err 141 | } 142 | 143 | // Build available repository URLs based on provided configuration, 144 | // for each distribution version. 145 | repositoriesURLrefs, err := BuildRepositoryURLs(perVersionMirrorURLs, a.Config.Repositories) 146 | if err != nil { 147 | return nil, err 148 | } 149 | 150 | // Dereference repository URLs. 151 | repositoryURLs, err := a.dereferenceRepositoryURLs(repositoriesURLrefs, a.Config.Archs) 152 | if err != nil { 153 | return nil, err 154 | } 155 | 156 | // Get RPM packages from each repository. 157 | rss := []string{} 158 | for _, ru := range repositoryURLs { 159 | rss = append(rss, ru.String()) 160 | } 161 | 162 | searchOptions := rpm.NewSearchOptions(&options, a.Config.Archs, rss) 163 | rpmPackages, err := rpm.SearchPackages(searchOptions) 164 | if err != nil { 165 | return nil, err 166 | } 167 | 168 | return rpmPackages, nil 169 | } 170 | 171 | func (a *AmazonLinux) dereferenceRepositoryURLs(repoURLs []*url.URL, archs []p.Architecture) ([]*url.URL, error) { 172 | var urls []*url.URL 173 | 174 | for _, ar := range archs { 175 | for _, v := range repoURLs { 176 | r, err := a.dereferenceRepositoryURL(v, ar) 177 | if err != nil { 178 | return nil, err 179 | } 180 | 181 | if r != nil { 182 | urls = append(urls, r) 183 | } 184 | } 185 | } 186 | 187 | return urls, nil 188 | } 189 | 190 | func (a *AmazonLinux) dereferenceRepositoryURL(src *url.URL, arch p.Architecture) (*url.URL, error) { 191 | var dest *url.URL 192 | 193 | mirrorListURL, err := url.JoinPath(src.String(), string(arch), "mirror.list") 194 | if err != nil { 195 | return nil, err 196 | } 197 | 198 | req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, mirrorListURL, nil) 199 | if err != nil { 200 | return nil, err 201 | } 202 | 203 | resp, err := http.DefaultClient.Do(req) 204 | if err != nil { 205 | return nil, err 206 | } 207 | defer resp.Body.Close() 208 | 209 | if resp.StatusCode != http.StatusOK { 210 | a.Config.Output.Logger.Error("Amazon Linux v2023 repository URL not valid to be dereferenced") 211 | //nolint:nilnil 212 | return nil, nil 213 | } 214 | 215 | if resp.Body == nil { 216 | a.Config.Output.Logger.Error("empty response from Amazon Linux v2023 repository reference URL") 217 | //nolint:nilnil 218 | return nil, nil 219 | } 220 | 221 | b, err := io.ReadAll(resp.Body) 222 | if err != nil { 223 | return nil, err 224 | } 225 | 226 | // Get first repository URL available, no matter what the geolocation. 227 | s := strings.Split(string(b), "\n")[0] 228 | 229 | dest, err = url.Parse(s) 230 | if err != nil { 231 | return nil, err 232 | } 233 | 234 | return dest, nil 235 | } 236 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/config.go: -------------------------------------------------------------------------------- 1 | package amazonlinux 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | 7 | "github.com/maxgio92/krawler/pkg/distro" 8 | "github.com/maxgio92/krawler/pkg/packages" 9 | ) 10 | 11 | func mergeAndSanitizeConfig(def distro.Config, user distro.Config) (distro.Config, error) { 12 | config := mergeConfig(def, user) 13 | 14 | if err := sanitizeConfig(&config); err != nil { 15 | return distro.Config{}, err 16 | } 17 | 18 | return config, nil 19 | } 20 | 21 | // mergeConfig returns the final configuration by merging the default with the user provided. 22 | // 23 | //nolint:cyclop 24 | func mergeConfig(def distro.Config, config distro.Config) distro.Config { 25 | if len(config.Archs) < 1 { 26 | config.Archs = def.Archs 27 | } else { 28 | for _, arch := range config.Archs { 29 | if arch == "" { 30 | config.Archs = def.Archs 31 | 32 | break 33 | } 34 | } 35 | } 36 | 37 | if len(config.Mirrors) < 1 { 38 | config.Mirrors = def.Mirrors 39 | } else { 40 | for _, mirror := range config.Mirrors { 41 | if mirror.URL == "" { 42 | config.Mirrors = def.Mirrors 43 | 44 | break 45 | } 46 | } 47 | } 48 | 49 | if len(config.Repositories) < 1 { 50 | config.Repositories = def.Repositories 51 | } else { 52 | for _, repository := range config.Repositories { 53 | if repository.URI == "" { 54 | config.Repositories = def.Repositories 55 | 56 | break 57 | } 58 | } 59 | } 60 | 61 | // Force Amazon Linux versions as folder URLs are forbidden. 62 | if len(config.Versions) < 1 { 63 | config.Versions = def.Versions 64 | } 65 | 66 | return config 67 | } 68 | 69 | func sanitizeConfig(config *distro.Config) error { 70 | err := sanitizeMirrors(&config.Mirrors) 71 | if err != nil { 72 | return err 73 | } 74 | 75 | return nil 76 | } 77 | 78 | func sanitizeMirrors(mirrors *[]packages.Mirror) error { 79 | for i, mirror := range *mirrors { 80 | if !strings.HasSuffix(mirror.URL, "/") { 81 | (*mirrors)[i].URL = mirror.URL + "/" 82 | } 83 | 84 | _, err := url.Parse(mirror.URL) 85 | if err != nil { 86 | return err 87 | } 88 | } 89 | 90 | return nil 91 | } 92 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/constants.go: -------------------------------------------------------------------------------- 1 | package amazonlinux 2 | 3 | const ( 4 | MirrorsDistroVersionRegex = `^(0|[v1-9]\d*)(\.(0|[v1-9]\d*)?)?(\.(0|[v1-9]\d*)?)?(-[a-zA-Z\d][-a-zA-Z.\d]*)?(\+[a-zA-Z\d][-a-zA-Z.\d]*)?\/$` 5 | ) 6 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v1/amazonlinux.go: -------------------------------------------------------------------------------- 1 | package v1 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "net/http" 7 | "net/url" 8 | "strings" 9 | 10 | "github.com/maxgio92/krawler/pkg/distro" 11 | common "github.com/maxgio92/krawler/pkg/distro/amazonlinux" 12 | "github.com/maxgio92/krawler/pkg/packages" 13 | "github.com/maxgio92/krawler/pkg/packages/rpm" 14 | ) 15 | 16 | type AmazonLinux struct { 17 | common.AmazonLinux 18 | } 19 | 20 | func (a *AmazonLinux) Configure(config distro.Config) error { 21 | return a.ConfigureCommon(DefaultConfig, config) 22 | } 23 | 24 | // GetPackages scrapes each mirror, for each distro version, for each repository, 25 | // for each architecture, and returns slice of Package and optionally an error. 26 | func (a *AmazonLinux) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 27 | a.Config.Output.Logger = options.Log() 28 | 29 | // Build distribution version-specific mirror root URLs. 30 | perVersionMirrorURLs, err := a.BuildMirrorURLs(a.Config.Mirrors, a.Config.Versions) 31 | if err != nil { 32 | return nil, err 33 | } 34 | 35 | // Build available repository URLs based on provided configuration, 36 | // for each distribution version. 37 | repositoriesURLrefs, err := common.BuildRepositoryURLs(perVersionMirrorURLs, a.Config.Repositories) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | // Dereference repository URLs. 43 | repositoryURLs, err := a.dereferenceRepositoryURLs(repositoriesURLrefs, a.Config.Archs) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | // Get RPM packages from each repository. 49 | rss := []string{} 50 | for _, ru := range repositoryURLs { 51 | rss = append(rss, ru.String()) 52 | } 53 | 54 | searchOptions := rpm.NewSearchOptions(&options, a.Config.Archs, rss) 55 | rpmPackages, err := rpm.SearchPackages(searchOptions) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | return rpmPackages, nil 61 | } 62 | 63 | func (a *AmazonLinux) dereferenceRepositoryURLs(repoURLs []*url.URL, archs []packages.Architecture) ([]*url.URL, error) { 64 | var urls []*url.URL 65 | 66 | for _, ar := range archs { 67 | for _, v := range repoURLs { 68 | r, err := a.dereferenceRepositoryURL(v, ar) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | if r != nil { 74 | urls = append(urls, r) 75 | } 76 | } 77 | } 78 | 79 | return urls, nil 80 | } 81 | 82 | func (a *AmazonLinux) dereferenceRepositoryURL(src *url.URL, arch packages.Architecture) (*url.URL, error) { 83 | var dest *url.URL 84 | 85 | mirrorListURL, err := url.JoinPath(src.String(), "mirror.list") 86 | if err != nil { 87 | return nil, err 88 | } 89 | 90 | req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, mirrorListURL, nil) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | resp, err := http.DefaultClient.Do(req) 96 | if err != nil { 97 | return nil, err 98 | } 99 | defer resp.Body.Close() 100 | 101 | if resp.StatusCode != http.StatusOK { 102 | a.Config.Output.Logger.Error("Amazon Linux v1 repository URL not valid to be dereferenced") 103 | //nolint:nilnil 104 | return nil, nil 105 | } 106 | 107 | if resp.Body == nil { 108 | a.Config.Output.Logger.Error("empty response from Amazon Linux v1 repository reference URL") 109 | //nolint:nilnil 110 | return nil, nil 111 | } 112 | 113 | b, err := io.ReadAll(resp.Body) 114 | if err != nil { 115 | return nil, err 116 | } 117 | 118 | // Get first repository URL available, no matter what the geolocation. 119 | s := strings.Split(strings.ReplaceAll(string(b), "$basearch", string(arch)), "\n")[0] 120 | 121 | dest, err = url.Parse(s) 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | return dest, nil 127 | } 128 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v1/constants.go: -------------------------------------------------------------------------------- 1 | package v1 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | // DefaultConfig is the default configuration for scrape Amazon Linux (RPM) packages. 9 | // As of now URI templating depends on distro's viper.AllSettings() data. 10 | var DefaultConfig = distro.Config{ 11 | Mirrors: []packages.Mirror{ 12 | {Name: "AL1", URL: "http://repo.us-east-1.amazonaws.com/"}, 13 | }, 14 | Repositories: []packages.Repository{ 15 | {Name: "", URI: "/updates/"}, 16 | {Name: "", URI: "/main/"}, 17 | }, 18 | Archs: []packages.Architecture{ 19 | "aarch64", 20 | "x86_64", 21 | "ppc64le", 22 | }, 23 | Versions: []distro.Version{ 24 | "latest", 25 | "2017.03", 26 | "2017.09", 27 | "2018.03", 28 | }, 29 | } 30 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v2/amazonlinux.go: -------------------------------------------------------------------------------- 1 | package v2 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "net/http" 7 | "net/url" 8 | "strings" 9 | 10 | "github.com/maxgio92/krawler/pkg/distro" 11 | common "github.com/maxgio92/krawler/pkg/distro/amazonlinux" 12 | "github.com/maxgio92/krawler/pkg/packages" 13 | "github.com/maxgio92/krawler/pkg/packages/rpm" 14 | ) 15 | 16 | type AmazonLinux struct { 17 | common.AmazonLinux 18 | } 19 | 20 | func (a *AmazonLinux) Configure(config distro.Config) error { 21 | return a.ConfigureCommon(DefaultConfig, config) 22 | } 23 | 24 | // GetPackages scrapes each mirror, for each distro version, for each repository, 25 | // for each architecture, and returns slice of Package and optionally an error. 26 | func (a *AmazonLinux) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 27 | a.Config.Output.Logger = options.Log() 28 | 29 | // Build distribution version-specific mirror root URLs. 30 | perVersionMirrorURLs, err := a.BuildMirrorURLs(a.Config.Mirrors, a.Config.Versions) 31 | if err != nil { 32 | return nil, err 33 | } 34 | 35 | // Build available repository URLs based on provided configuration, 36 | // for each distribution version. 37 | repositoriesURLrefs, err := common.BuildRepositoryURLs(perVersionMirrorURLs, a.Config.Repositories) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | // Dereference repository URLs. 43 | repositoryURLs, err := a.dereferenceRepositoryURLs(repositoriesURLrefs, a.Config.Archs) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | // Get RPM packages from each repository. 49 | rss := []string{} 50 | for _, ru := range repositoryURLs { 51 | rss = append(rss, ru.String()) 52 | } 53 | 54 | searchOptions := rpm.NewSearchOptions(&options, a.Config.Archs, rss) 55 | rpmPackages, err := rpm.SearchPackages(searchOptions) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | return rpmPackages, nil 61 | } 62 | 63 | func (a *AmazonLinux) dereferenceRepositoryURLs(repoURLs []*url.URL, archs []packages.Architecture) ([]*url.URL, error) { 64 | var urls []*url.URL 65 | 66 | for _, ar := range archs { 67 | for _, v := range repoURLs { 68 | r, err := a.dereferenceRepositoryURL(v, ar) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | if r != nil { 74 | urls = append(urls, r) 75 | } 76 | } 77 | } 78 | 79 | return urls, nil 80 | } 81 | 82 | func (a *AmazonLinux) dereferenceRepositoryURL(src *url.URL, arch packages.Architecture) (*url.URL, error) { 83 | var dest *url.URL 84 | 85 | mirrorListURL, err := url.JoinPath(src.String(), string(arch), "mirror.list") 86 | if err != nil { 87 | return nil, err 88 | } 89 | 90 | req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, mirrorListURL, nil) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | resp, err := http.DefaultClient.Do(req) 96 | if err != nil { 97 | return nil, err 98 | } 99 | defer resp.Body.Close() 100 | 101 | if resp.StatusCode != http.StatusOK { 102 | a.Config.Output.Logger.Error("Amazon Linux v2 repository URL not valid to be dereferenced") 103 | //nolint:nilnil 104 | return nil, nil 105 | } 106 | 107 | if resp.Body == nil { 108 | a.Config.Output.Logger.Error("empty response from Amazon Linux v2 repository reference URL") 109 | //nolint:nilnil 110 | return nil, nil 111 | } 112 | 113 | b, err := io.ReadAll(resp.Body) 114 | if err != nil { 115 | return nil, err 116 | } 117 | 118 | // Get first repository URL available, no matter what the geolocation. 119 | s := strings.Split(string(b), "\n")[0] 120 | 121 | dest, err = url.Parse(s) 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | return dest, nil 127 | } 128 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v2/constants.go: -------------------------------------------------------------------------------- 1 | package v2 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | // DefaultConfig is the default configuration for scrape Amazon Linux (RPM) packages. 9 | // As of now URI templating depends on distro's viper.AllSettings() data. 10 | var DefaultConfig = distro.Config{ 11 | Mirrors: []packages.Mirror{ 12 | { 13 | Name: "AL2", 14 | URL: "http://amazonlinux.us-east-1.amazonaws.com/2/", 15 | }, 16 | }, 17 | Repositories: []packages.Repository{ 18 | {Name: "", URI: "core/2.0"}, 19 | {Name: "", URI: "core/latest"}, 20 | {Name: "", URI: "extras/kernel-5.4/latest"}, 21 | {Name: "", URI: "extras/kernel-5.10/latest"}, 22 | {Name: "", URI: "extras/kernel-5.15/latest"}, 23 | }, 24 | Archs: []packages.Architecture{ 25 | "aarch64", 26 | "x86_64", 27 | "ppc64le", 28 | }, 29 | Versions: []distro.Version{""}, 30 | } 31 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v2022/amazonlinux.go: -------------------------------------------------------------------------------- 1 | package v2022 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | common "github.com/maxgio92/krawler/pkg/distro/amazonlinux" 6 | ) 7 | 8 | type AmazonLinux struct { 9 | common.AmazonLinux 10 | } 11 | 12 | func (a *AmazonLinux) Configure(config distro.Config) error { 13 | return a.ConfigureCommon(DefaultConfig, config) 14 | } 15 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v2022/constants.go: -------------------------------------------------------------------------------- 1 | package v2022 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | // DefaultConfig is the default configuration for scrape Amazon Linux (RPM) packages. 9 | // As of now URI templating depends on distro's viper.AllSettings() data. 10 | var DefaultConfig = distro.Config{ 11 | Mirrors: []packages.Mirror{ 12 | { 13 | Name: "AL2022", 14 | URL: "https://al2022-repos-us-east-1-9761ab97.s3.dualstack.us-east-1.amazonaws.com/core/mirrors/", 15 | }, 16 | }, 17 | Repositories: []packages.Repository{ 18 | {Name: "", URI: "2022.0.20220202"}, 19 | {Name: "", URI: "2022.0.20220315"}, 20 | {Name: "", URI: "2022.0.20221012"}, 21 | }, 22 | Archs: []packages.Architecture{ 23 | "aarch64", 24 | "x86_64", 25 | "ppc64le", 26 | }, 27 | Versions: []distro.Version{""}, 28 | } 29 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v2023/amazonlinux.go: -------------------------------------------------------------------------------- 1 | package v2023 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | common "github.com/maxgio92/krawler/pkg/distro/amazonlinux" 6 | ) 7 | 8 | type AmazonLinux struct { 9 | common.AmazonLinux 10 | } 11 | 12 | func (a *AmazonLinux) Configure(config distro.Config) error { 13 | return a.ConfigureCommon(DefaultConfig, config) 14 | } 15 | -------------------------------------------------------------------------------- /pkg/distro/amazonlinux/v2023/constants.go: -------------------------------------------------------------------------------- 1 | package v2023 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | // DefaultConfig is the default configuration for scrape Amazon Linux (RPM) packages. 9 | // As of now URI templating depends on distro's viper.AllSettings() data. 10 | var DefaultConfig = distro.Config{ 11 | Mirrors: []packages.Mirror{ 12 | { 13 | Name: "AL2023", 14 | URL: "https://cdn.amazonlinux.com/al2023/core/mirrors/", 15 | }, 16 | }, 17 | Repositories: []packages.Repository{ 18 | {Name: "", URI: "latest"}, 19 | {Name: "", URI: "2023.5.20240730"}, 20 | }, 21 | Archs: []packages.Architecture{ 22 | "aarch64", 23 | "x86_64", 24 | "ppc64le", 25 | }, 26 | Versions: []distro.Version{""}, 27 | } 28 | -------------------------------------------------------------------------------- /pkg/distro/archlinux/archlinux.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | /* 4 | Copyright © 2022 maxgio92 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package archlinux 20 | 21 | import ( 22 | "fmt" 23 | "net/url" 24 | "strconv" 25 | "strings" 26 | "time" 27 | 28 | "github.com/pkg/errors" 29 | 30 | "github.com/maxgio92/krawler/pkg/distro" 31 | "github.com/maxgio92/krawler/pkg/packages" 32 | "github.com/maxgio92/krawler/pkg/packages/alpm" 33 | ) 34 | 35 | type ArchLinux struct { 36 | config distro.Config 37 | } 38 | 39 | func (a *ArchLinux) Configure(config distro.Config) error { 40 | cfg, err := a.buildConfig(DefaultConfig, config) 41 | if err != nil { 42 | return err 43 | } 44 | 45 | a.config = cfg 46 | 47 | return nil 48 | } 49 | 50 | // GetPackages scrapes each mirror, for each distro version, for each repository, 51 | // for each architecture, and returns slice of Package and optionally an error. 52 | func (a *ArchLinux) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 53 | a.config.Output.Logger = options.Log() 54 | 55 | mirrorURLs := []*url.URL{} 56 | 57 | // Get current release mirrors. 58 | currentURLs, err := a.buildMirrorURLs() 59 | if err != nil { 60 | return nil, errors.Wrap(err, "error building mirror URLs") 61 | } 62 | 63 | mirrorURLs = append(mirrorURLs, currentURLs...) 64 | 65 | // Get archive mirrors. 66 | archiveURLs, err := a.buildArchiveURLs(archiveMirrorURLs, archiveRepos) 67 | if err != nil { 68 | return nil, errors.Wrap(err, "error building archive mirror URLs") 69 | } 70 | 71 | mirrorURLs = append(mirrorURLs, archiveURLs...) 72 | 73 | // Build available repository URLs based on provided configuration, 74 | // for each distribution version. 75 | repositoryURLs, err := a.buildRepositoriesURLs(mirrorURLs, a.config.Repositories) 76 | if err != nil { 77 | return nil, err 78 | } 79 | 80 | packageNames := []string{options.PackageName()} 81 | packageNames = append(packageNames, additionalKernelHeadersPackages...) 82 | 83 | dbURLs, err := buildDBURLs(repositoryURLs) 84 | if err != nil { 85 | return nil, errors.Wrap(err, "error building DB urls") 86 | } 87 | 88 | searchOptions := alpm.NewSearchOptions(&options, dbURLs, packageNames) 89 | res, err := alpm.SearchPackages(searchOptions) 90 | if err != nil { 91 | return nil, errors.Wrap(err, "searching packages") 92 | } 93 | 94 | return res, nil 95 | } 96 | 97 | func (a *ArchLinux) buildMirrorURLs() ([]*url.URL, error) { 98 | mirrorURLs := []*url.URL{} 99 | for _, v := range a.config.Mirrors { 100 | u, err := url.Parse(v.URL) 101 | if err != nil { 102 | return nil, errors.Wrap(err, "error parsing mirror URL") 103 | } 104 | 105 | mirrorURLs = append(mirrorURLs, u) 106 | } 107 | 108 | return mirrorURLs, nil 109 | } 110 | 111 | // Returns the list of repositories URLs. 112 | func (a *ArchLinux) buildRepositoriesURLs(roots []*url.URL, repositories []packages.Repository) ([]string, error) { 113 | var urls []string 114 | 115 | for _, root := range roots { 116 | //nolint:revive,stylecheck 117 | for _, r := range repositories { 118 | // Get repository URL from URI. 119 | //nolint:revive,stylecheck 120 | us, err := url.JoinPath(root.String(), string(r.URI)) 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | urls = append(urls, us) 126 | } 127 | } 128 | 129 | return urls, nil 130 | } 131 | 132 | // buildArchiveURLs build a list of archive reposity URLs of the last 12 months. 133 | func (a *ArchLinux) buildArchiveURLs(mirrorURLs []string, repositoryNames []string) ([]*url.URL, error) { 134 | now := time.Now() 135 | 136 | // Get last 12 months. 137 | lastMonths := []time.Time{} 138 | i := 0 139 | for i < archiveMonthRetention { 140 | i++ 141 | lastMonths = append(lastMonths, now.AddDate(0, -i, 0)) 142 | } 143 | 144 | // Build the last 12 months archive URLs. 145 | seeds := []string{} 146 | releaseDay := strconv.Itoa(archiveReleaseDayOfMonth) 147 | for _, v := range mirrorURLs { 148 | for _, m := range lastMonths { 149 | u, err := url.JoinPath( 150 | v, 151 | fmt.Sprintf("%04d", int(m.Year())), 152 | fmt.Sprintf("%02d", int(m.Month())), 153 | releaseDay, 154 | "/", 155 | ) 156 | if err != nil { 157 | return nil, err 158 | } 159 | 160 | seeds = append(seeds, u) 161 | } 162 | } 163 | 164 | archiveURLs := []*url.URL{} 165 | for _, v := range seeds { 166 | u, err := url.Parse(v) 167 | if err != nil { 168 | return nil, err 169 | } 170 | 171 | archiveURLs = append(archiveURLs, u) 172 | } 173 | 174 | return archiveURLs, nil 175 | } 176 | 177 | // Returns the list of default repositories from the default config. 178 | func (a *ArchLinux) getDefaultRepositories() []packages.Repository { 179 | var repositories []packages.Repository 180 | 181 | for _, repository := range DefaultConfig.Repositories { 182 | if !distro.RepositorySliceContains(repositories, repository) { 183 | repositories = append(repositories, repository) 184 | } 185 | } 186 | 187 | return repositories 188 | } 189 | 190 | func buildDBURLs(repoURLs []string) ([]string, error) { 191 | dbURLs := []string{} 192 | for _, v := range repoURLs { 193 | var dbURL string 194 | var repo string 195 | 196 | switch { 197 | case strings.Contains(v, RepoCoreDebug): 198 | repo = RepoCoreDebug 199 | case strings.Contains(v, RepoCore): 200 | repo = RepoCore 201 | case strings.Contains(v, RepoCommunity): 202 | repo = RepoCommunity 203 | case strings.Contains(v, RepoCommunityDebug): 204 | repo = RepoCommunityDebug 205 | case strings.Contains(v, RepoCommunityTestingDebug): 206 | repo = RepoCommunityTestingDebug 207 | case strings.Contains(v, RepoCommunityTesting): 208 | repo = RepoCommunityTesting 209 | case strings.Contains(v, RepoCommunityStagingDebug): 210 | repo = RepoCommunityStagingDebug 211 | case strings.Contains(v, RepoCommunityStaging): 212 | repo = RepoCommunityStaging 213 | case strings.Contains(v, RepoExtraDebug): 214 | repo = RepoExtraDebug 215 | case strings.Contains(v, RepoExtra): 216 | repo = RepoExtra 217 | case strings.Contains(v, RepoStagingDebug): 218 | repo = RepoStagingDebug 219 | case strings.Contains(v, RepoStaging): 220 | repo = RepoStaging 221 | case strings.Contains(v, RepoTestingDebug): 222 | repo = RepoTestingDebug 223 | case strings.Contains(v, RepoTesting): 224 | repo = RepoTesting 225 | default: 226 | repo = RepoCore 227 | } 228 | 229 | dbURL, err := url.JoinPath(v, fmt.Sprintf("%s.db.tar.gz", repo)) 230 | if err != nil { 231 | return nil, err 232 | } 233 | 234 | dbURLs = append(dbURLs, dbURL) 235 | } 236 | 237 | return dbURLs, nil 238 | } 239 | -------------------------------------------------------------------------------- /pkg/distro/archlinux/config.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | /* 4 | Copyright © 2022 maxgio92 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package archlinux 20 | 21 | import ( 22 | "net/url" 23 | "strings" 24 | 25 | "github.com/maxgio92/krawler/pkg/distro" 26 | "github.com/maxgio92/krawler/pkg/packages" 27 | ) 28 | 29 | func (a *ArchLinux) buildConfig(def distro.Config, user distro.Config) (distro.Config, error) { 30 | config, err := a.mergeConfig(def, user) 31 | if err != nil { 32 | return distro.Config{}, err 33 | } 34 | 35 | err = a.sanitizeConfig(&config) 36 | if err != nil { 37 | return distro.Config{}, err 38 | } 39 | 40 | // Build templated repositories URIs against built-in variables (archs). 41 | archs := make([]interface{}, 0, len(config.Archs)) 42 | for _, v := range config.Archs { 43 | archs = append(archs, string(v)) 44 | } 45 | if err = config.BuildTemplates(map[string]interface{}{ 46 | "archs": archs, 47 | }); err != nil { 48 | return distro.Config{}, err 49 | } 50 | 51 | return config, nil 52 | } 53 | 54 | // Returns the final configuration by merging the default with the user provided. 55 | // 56 | //nolint:unparam 57 | func (a *ArchLinux) mergeConfig(def distro.Config, config distro.Config) (distro.Config, error) { 58 | if len(config.Archs) < 1 { 59 | config.Archs = def.Archs 60 | } else { 61 | for _, arch := range config.Archs { 62 | if arch == "" { 63 | config.Archs = def.Archs 64 | 65 | break 66 | } 67 | } 68 | } 69 | 70 | if len(config.Mirrors) < 1 { 71 | config.Mirrors = def.Mirrors 72 | } else { 73 | for _, mirror := range config.Mirrors { 74 | if mirror.URL == "" { 75 | config.Mirrors = def.Mirrors 76 | 77 | break 78 | } 79 | } 80 | } 81 | 82 | if len(config.Repositories) < 1 { 83 | config.Repositories = a.getDefaultRepositories() 84 | } else { 85 | for _, repository := range config.Repositories { 86 | if repository.URI == "" { 87 | config.Repositories = a.getDefaultRepositories() 88 | 89 | break 90 | } 91 | } 92 | } 93 | 94 | return config, nil 95 | } 96 | 97 | func (a *ArchLinux) sanitizeConfig(config *distro.Config) error { 98 | err := a.sanitizeMirrors(&config.Mirrors) 99 | if err != nil { 100 | return err 101 | } 102 | 103 | return nil 104 | } 105 | 106 | func (a *ArchLinux) sanitizeMirrors(mirrors *[]packages.Mirror) error { 107 | for i, mirror := range *mirrors { 108 | if !strings.HasSuffix(mirror.URL, "/") { 109 | (*mirrors)[i].URL = mirror.URL + "/" 110 | } 111 | 112 | _, err := url.Parse(mirror.URL) 113 | if err != nil { 114 | return err 115 | } 116 | } 117 | 118 | return nil 119 | } 120 | -------------------------------------------------------------------------------- /pkg/distro/archlinux/constants.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | /* 4 | Copyright © 2022 maxgio92 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package archlinux 20 | 21 | import ( 22 | "github.com/maxgio92/krawler/pkg/distro" 23 | "github.com/maxgio92/krawler/pkg/packages" 24 | ) 25 | 26 | const ( 27 | RepoCore = "core" 28 | RepoCoreDebug = "core-debug" 29 | RepoCommunity = "community" 30 | RepoCommunityTesting = "community-testing" 31 | RepoCommunityStaging = "community-staging" 32 | RepoCommunityDebug = "community-debug" 33 | RepoCommunityTestingDebug = "community-testing-debug" 34 | RepoCommunityStagingDebug = "community-staging-debug" 35 | RepoExtra = "extra" 36 | RepoExtraDebug = "extra-debug" 37 | RepoStaging = "staging" 38 | RepoStagingDebug = "staging-debug" 39 | RepoTesting = "testing" 40 | RepoTestingDebug = "testing-debug" 41 | 42 | archiveMonthRetention = 3 43 | archiveReleaseDayOfMonth = 10 44 | archiveMirror = "https://archive.archlinux.org/repos/" 45 | ) 46 | 47 | var ( 48 | // As Arch Linux is a rolling release distribution, we need archives to track previous rollouts. 49 | repos = []string{ 50 | RepoCore, 51 | RepoCoreDebug, 52 | RepoCommunity, 53 | RepoCommunityTesting, 54 | RepoCommunityDebug, 55 | RepoCommunityTestingDebug, 56 | RepoCommunityStagingDebug, 57 | RepoExtra, 58 | RepoExtraDebug, 59 | RepoStaging, 60 | RepoStagingDebug, 61 | RepoTesting, 62 | RepoTestingDebug, 63 | } 64 | 65 | DefaultConfig = distro.Config{ 66 | Mirrors: []packages.Mirror{ 67 | {Name: "arm64", URL: "http://de.mirror.archlinuxarm.org/aarch64/"}, 68 | {Name: "arm64", URL: "http://de.mirror.archlinuxarm.org/aarch64/"}, 69 | {Name: "arm32", URL: "http://de.mirror.archlinuxarm.org/armv7h/"}, 70 | {Name: "archmirror", URL: "https://archmirror.it/repos/"}, 71 | {Name: "kernel.org", URL: "https://mirrors.edge.kernel.org/archlinux/"}, 72 | }, 73 | Repositories: []packages.Repository{ 74 | {Name: "core", URI: packages.URITemplate("/core/os/{{ .archs }}/")}, 75 | {Name: "aur", URI: packages.URITemplate("/aur/os/{{ .archs }}/")}, 76 | {Name: "community", URI: packages.URITemplate("/community/os/{{ .archs }}/")}, 77 | {Name: "extra", URI: packages.URITemplate("/extra/os/{{ .archs }}/")}, 78 | 79 | // Architecture is embedded already in the mirror URL. 80 | {Name: "core-arm", URI: packages.URITemplate("/core-arm/")}, 81 | {Name: "aur-arm", URI: packages.URITemplate("/aur-arm/")}, 82 | {Name: "community-arm", URI: packages.URITemplate("/community-arm/")}, 83 | {Name: "extra-arm", URI: packages.URITemplate("/extra-arm/")}, 84 | }, 85 | Archs: []packages.Architecture{ 86 | "x86_64", 87 | "aarch64", 88 | "armv7h", 89 | }, 90 | 91 | // Arch Linux is a rollin-release distribution. 92 | Versions: nil, 93 | } 94 | 95 | archiveMirrorURLs = []string{archiveMirror} 96 | archiveRepos = repos 97 | 98 | additionalKernelHeadersPackages = []string{ 99 | "linux-zen-headers", 100 | "linux-lts-headers", 101 | "linux-hardened-headers", 102 | "linux-aarch64-headers", 103 | "linux-armv7-headers", 104 | } 105 | ) 106 | -------------------------------------------------------------------------------- /pkg/distro/centos/centos.go: -------------------------------------------------------------------------------- 1 | package centos 2 | 3 | import ( 4 | "net/url" 5 | 6 | "github.com/maxgio92/krawler/pkg/distro" 7 | "github.com/maxgio92/krawler/pkg/output" 8 | "github.com/maxgio92/krawler/pkg/packages" 9 | "github.com/maxgio92/krawler/pkg/packages/rpm" 10 | "github.com/maxgio92/krawler/pkg/scrape" 11 | ) 12 | 13 | type Centos struct { 14 | config distro.Config 15 | } 16 | 17 | func (c *Centos) Configure(config distro.Config) error { 18 | cfg, err := c.buildConfig(DefaultConfig, config) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | c.config = cfg 24 | 25 | return nil 26 | } 27 | 28 | // GetPackages scrapes each mirror, for each distro version, for each repository, 29 | // for each architecture, and returns slice of Package and optionally an error. 30 | func (c *Centos) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 31 | c.config.Output.Logger = options.Log() 32 | 33 | // Build distribution version-specific mirror root URLs. 34 | perVersionMirrorUrls, err := c.buildPerVersionMirrorUrls(c.config.Mirrors, c.config.Versions) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | // Build available repository URLs based on provided configuration, 40 | // for each distribution version. 41 | repositoryURLs, err := c.buildRepositoriesUrls(perVersionMirrorUrls, c.config.Repositories) 42 | if err != nil { 43 | return nil, err 44 | } 45 | 46 | // Get RPM packages from each repository. 47 | rss := []string{} 48 | for _, ru := range repositoryURLs { 49 | rss = append(rss, ru.String()) 50 | } 51 | searchOptions := rpm.NewSearchOptions(&options, c.config.Archs, rss) 52 | rpmPackages, err := rpm.SearchPackages(searchOptions) 53 | if err != nil { 54 | return nil, err 55 | } 56 | 57 | return rpmPackages, nil 58 | } 59 | 60 | // Returns the list of version-specific mirror URLs. 61 | func (c *Centos) buildPerVersionMirrorUrls(mirrors []packages.Mirror, versions []distro.Version) ([]*url.URL, error) { 62 | versions, err := c.buildVersions(mirrors, versions) 63 | if err != nil { 64 | return []*url.URL{}, err 65 | } 66 | 67 | if (len(versions) > 0) && (len(mirrors) > 0) { 68 | var versionRoots []*url.URL 69 | 70 | for _, mirror := range mirrors { 71 | for _, version := range versions { 72 | versionRoot, err := url.Parse(mirror.URL + string(version)) 73 | if err != nil { 74 | return nil, err 75 | } 76 | 77 | versionRoots = append(versionRoots, versionRoot) 78 | } 79 | } 80 | 81 | return versionRoots, nil 82 | } 83 | 84 | return nil, distro.ErrNoDistroVersionSpecified 85 | } 86 | 87 | // Returns a list of distro versions, considering the user-provided configuration, 88 | // and if not, the ones available on configured mirrors. 89 | func (c *Centos) buildVersions(mirrors []packages.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { 90 | if staticVersions != nil { 91 | return staticVersions, nil 92 | } 93 | 94 | var dynamicVersions []distro.Version 95 | 96 | dynamicVersions, err := c.crawlVersions(mirrors) 97 | if err != nil { 98 | return nil, err 99 | } 100 | 101 | return dynamicVersions, nil 102 | } 103 | 104 | // Returns the list of the current available distro versions, by scraping 105 | // the specified mirrors, dynamically. 106 | func (c *Centos) crawlVersions(mirrors []packages.Mirror) ([]distro.Version, error) { 107 | versions := []distro.Version{} 108 | 109 | seedUrls := make([]*url.URL, 0, len(mirrors)) 110 | 111 | for _, mirror := range mirrors { 112 | u, err := url.Parse(mirror.URL) 113 | if err != nil { 114 | return []distro.Version{}, err 115 | } 116 | 117 | seedUrls = append(seedUrls, u) 118 | } 119 | 120 | folderNames, err := scrape.CrawlFolders( 121 | seedUrls, 122 | CentosMirrorsDistroVersionRegex, 123 | false, 124 | c.config.Output.Verbosity >= output.DebugLevel, 125 | ) 126 | if err != nil { 127 | return []distro.Version{}, err 128 | } 129 | 130 | for _, v := range folderNames { 131 | versions = append(versions, distro.Version(v)) 132 | } 133 | 134 | return versions, nil 135 | } 136 | 137 | // Returns the list of repositories URLs. 138 | func (c *Centos) buildRepositoriesUrls(roots []*url.URL, repositories []packages.Repository) ([]*url.URL, error) { 139 | var urls []*url.URL 140 | 141 | for _, root := range roots { 142 | //nolint:revive,stylecheck 143 | for _, r := range repositories { 144 | // Get repository URL from URI. 145 | //nolint:revive,stylecheck 146 | us, err := url.JoinPath(root.String(), string(r.URI)) 147 | if err != nil { 148 | return nil, err 149 | } 150 | 151 | repositoryUrl, err := url.Parse(us) 152 | if err != nil { 153 | return nil, err 154 | } 155 | 156 | urls = append(urls, repositoryUrl) 157 | } 158 | } 159 | 160 | return urls, nil 161 | } 162 | 163 | // Returns the list of default repositories from the default config. 164 | func (c *Centos) getDefaultRepositories() []packages.Repository { 165 | var repositories []packages.Repository 166 | 167 | for _, repository := range DefaultConfig.Repositories { 168 | if !distro.RepositorySliceContains(repositories, repository) { 169 | repositories = append(repositories, repository) 170 | } 171 | } 172 | 173 | return repositories 174 | } 175 | -------------------------------------------------------------------------------- /pkg/distro/centos/config.go: -------------------------------------------------------------------------------- 1 | package centos 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | 7 | "github.com/maxgio92/krawler/pkg/distro" 8 | "github.com/maxgio92/krawler/pkg/packages" 9 | ) 10 | 11 | func (c *Centos) buildConfig(def distro.Config, user distro.Config) (distro.Config, error) { 12 | config, err := c.mergeConfig(def, user) 13 | if err != nil { 14 | return distro.Config{}, err 15 | } 16 | 17 | err = c.sanitizeConfig(&config) 18 | if err != nil { 19 | return distro.Config{}, err 20 | } 21 | 22 | // Build templated repositories URIs against built-in variables (archs). 23 | archs := make([]interface{}, 0, len(config.Archs)) 24 | for _, v := range config.Archs { 25 | archs = append(archs, string(v)) 26 | } 27 | if err = config.BuildTemplates(map[string]interface{}{ 28 | "archs": archs, 29 | }); err != nil { 30 | return distro.Config{}, err 31 | } 32 | 33 | return config, nil 34 | } 35 | 36 | // Returns the final configuration by merging the default with the user provided. 37 | // 38 | //nolint:unparam 39 | func (c *Centos) mergeConfig(def distro.Config, config distro.Config) (distro.Config, error) { 40 | if len(config.Archs) < 1 { 41 | config.Archs = def.Archs 42 | } else { 43 | for _, arch := range config.Archs { 44 | if arch == "" { 45 | config.Archs = def.Archs 46 | 47 | break 48 | } 49 | } 50 | } 51 | 52 | if len(config.Mirrors) < 1 { 53 | config.Mirrors = def.Mirrors 54 | } else { 55 | for _, mirror := range config.Mirrors { 56 | if mirror.URL == "" { 57 | config.Mirrors = def.Mirrors 58 | 59 | break 60 | } 61 | } 62 | } 63 | 64 | if len(config.Repositories) < 1 { 65 | config.Repositories = c.getDefaultRepositories() 66 | } else { 67 | for _, repository := range config.Repositories { 68 | if repository.URI == "" { 69 | config.Repositories = c.getDefaultRepositories() 70 | 71 | break 72 | } 73 | } 74 | } 75 | 76 | return config, nil 77 | } 78 | 79 | func (c *Centos) sanitizeConfig(config *distro.Config) error { 80 | err := c.sanitizeMirrors(&config.Mirrors) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | return nil 86 | } 87 | 88 | func (c *Centos) sanitizeMirrors(mirrors *[]packages.Mirror) error { 89 | for i, mirror := range *mirrors { 90 | if !strings.HasSuffix(mirror.URL, "/") { 91 | (*mirrors)[i].URL = mirror.URL + "/" 92 | } 93 | 94 | _, err := url.Parse(mirror.URL) 95 | if err != nil { 96 | return err 97 | } 98 | } 99 | 100 | return nil 101 | } 102 | -------------------------------------------------------------------------------- /pkg/distro/centos/constants.go: -------------------------------------------------------------------------------- 1 | package centos 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | const ( 9 | // Default regex to base the distro version detection on. 10 | CentosMirrorsDistroVersionRegex = `^(0|[1-9]\d*)(\.(0|[1-9]\d*)?)?(\.(0|[1-9]\d*)?)?(-[a-zA-Z\d][-a-zA-Z.\d]*)?(\+[a-zA-Z\d][-a-zA-Z.\d]*)?\/$` 11 | ) 12 | 13 | var DefaultConfig = distro.Config{ 14 | Mirrors: []packages.Mirror{ 15 | {URL: "https://mirrors.edge.kernel.org/centos/"}, 16 | {URL: "https://archive.kernel.org/centos-vault/"}, 17 | }, 18 | Repositories: []packages.Repository{ 19 | {Name: "base", URI: packages.URITemplate("/os/{{ .archs }}/")}, 20 | {Name: "updates", URI: packages.URITemplate("/updates/{{ .archs }}/")}, 21 | {Name: "BaseOS", URI: packages.URITemplate("/BaseOS/{{ .archs }}/os/")}, 22 | {Name: "AppStream", URI: packages.URITemplate("/AppStream/{{ .archs }}/os/")}, 23 | {Name: "Devel", URI: packages.URITemplate("/Devel/{{ .archs }}/os/")}, 24 | }, 25 | Archs: []packages.Architecture{ 26 | "aarch64", 27 | "x86_64", 28 | "ppc64le", 29 | }, 30 | Versions: nil, 31 | } 32 | -------------------------------------------------------------------------------- /pkg/distro/constants.go: -------------------------------------------------------------------------------- 1 | package distro 2 | 3 | import "github.com/maxgio92/krawler/pkg/packages" 4 | 5 | const ( 6 | X8664Arch packages.Architecture = "x86_64" 7 | 8 | // DefaultArch is the default architecture for which scrape for packages. 9 | DefaultArch = X8664Arch 10 | CentosType = "centos" 11 | AmazonLinuxV1Type = "amazonlinux" 12 | AmazonLinuxV2Type = "amazonlinux2" 13 | AmazonLinuxV2022Type = "amazonlinux2022" 14 | AmazonLinuxV2023Type = "amazonlinux2023" 15 | DebianType = "debian" 16 | UbuntuType = "ubuntu" 17 | FedoraType = "fedora" 18 | OracleType = "oracle" 19 | ArchLinuxType = "archlinux" 20 | ) 21 | -------------------------------------------------------------------------------- /pkg/distro/debian/config.go: -------------------------------------------------------------------------------- 1 | package debian 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | 7 | "github.com/maxgio92/krawler/pkg/distro" 8 | "github.com/maxgio92/krawler/pkg/packages" 9 | ) 10 | 11 | func (d *Debian) BuildConfig(def distro.Config, user distro.Config) (distro.Config, error) { 12 | config, err := d.mergeConfig(def, user) 13 | if err != nil { 14 | return distro.Config{}, err 15 | } 16 | 17 | err = d.sanitizeConfig(&config) 18 | if err != nil { 19 | return distro.Config{}, err 20 | } 21 | 22 | return config, nil 23 | } 24 | 25 | // Returns the final configuration by merging the default with the user provided. 26 | // 27 | //nolint:unparam 28 | func (d *Debian) mergeConfig(def distro.Config, config distro.Config) (distro.Config, error) { 29 | if len(config.Archs) < 1 { 30 | config.Archs = def.Archs 31 | } else { 32 | for _, arch := range config.Archs { 33 | if arch == "" { 34 | config.Archs = def.Archs 35 | 36 | break 37 | } 38 | } 39 | } 40 | 41 | if len(config.Mirrors) < 1 { 42 | config.Mirrors = def.Mirrors 43 | } else { 44 | for _, mirror := range config.Mirrors { 45 | if mirror.URL == "" { 46 | config.Mirrors = def.Mirrors 47 | 48 | break 49 | } 50 | } 51 | } 52 | 53 | if len(config.Repositories) < 1 { 54 | config.Repositories = d.getDefaultRepositories() 55 | } else { 56 | for _, repository := range config.Repositories { 57 | if repository.URI == "" { 58 | config.Repositories = d.getDefaultRepositories() 59 | 60 | break 61 | } 62 | } 63 | } 64 | 65 | return config, nil 66 | } 67 | 68 | func (d *Debian) sanitizeConfig(config *distro.Config) error { 69 | err := d.sanitizeMirrors(&config.Mirrors) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | return nil 75 | } 76 | 77 | func (d *Debian) sanitizeMirrors(mirrors *[]packages.Mirror) error { 78 | for i, mirror := range *mirrors { 79 | if !strings.HasSuffix(mirror.URL, "/") { 80 | (*mirrors)[i].URL = mirror.URL + "/" 81 | } 82 | 83 | _, err := url.Parse(mirror.URL) 84 | if err != nil { 85 | return err 86 | } 87 | } 88 | 89 | return nil 90 | } 91 | -------------------------------------------------------------------------------- /pkg/distro/debian/constants.go: -------------------------------------------------------------------------------- 1 | package debian 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | const ( 9 | DebianMirrorsDistroVersionRegex = `^.+$` 10 | DefaultArch = X8664Arch 11 | X8664Arch packages.Architecture = "amd64" 12 | ) 13 | 14 | var DefaultConfig = distro.Config{ 15 | Mirrors: []packages.Mirror{ 16 | {URL: "https://mirrors.edge.kernel.org/debian/"}, 17 | {URL: "http://security.debian.org"}, 18 | }, 19 | Repositories: []packages.Repository{ 20 | {Name: "main", URI: packages.URITemplate("main")}, 21 | {Name: "contrib", URI: packages.URITemplate("contrib")}, 22 | {Name: "non-free", URI: packages.URITemplate("non-free")}, 23 | {Name: "multiverse", URI: packages.URITemplate("multiverse")}, 24 | {Name: "universe", URI: packages.URITemplate("universe")}, 25 | {Name: "restricted", URI: packages.URITemplate("restricted")}, 26 | }, 27 | Archs: nil, 28 | 29 | // Distribution versions, i.e. Debian dists 30 | Versions: nil, 31 | } 32 | -------------------------------------------------------------------------------- /pkg/distro/debian/debian.go: -------------------------------------------------------------------------------- 1 | package debian 2 | 3 | import ( 4 | "net/url" 5 | "path" 6 | "strings" 7 | 8 | "github.com/maxgio92/krawler/pkg/distro" 9 | "github.com/maxgio92/krawler/pkg/output" 10 | "github.com/maxgio92/krawler/pkg/packages" 11 | "github.com/maxgio92/krawler/pkg/packages/deb" 12 | "github.com/maxgio92/krawler/pkg/scrape" 13 | ) 14 | 15 | type Debian struct { 16 | Config distro.Config 17 | } 18 | 19 | func (d *Debian) Configure(config distro.Config) error { 20 | c, err := d.BuildConfig(DefaultConfig, config) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | d.Config = c 26 | 27 | return nil 28 | } 29 | 30 | // GetPackages scrapes each mirror, for each distro version, for each repository, 31 | // for each architecture, and returns slice of Package and optionally an error. 32 | func (d *Debian) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 33 | d.Config.Output.Logger = options.Log() 34 | 35 | // Build distribution version-specific seed URLs. 36 | // TODO: introduce support for Release index files, where InRelease does not exist. 37 | distURLs, err := d.buildReleaseIndexURLs(d.Config.Mirrors, d.Config.Versions) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | components := []string{} 43 | for _, v := range d.Config.Repositories { 44 | components = append(components, strings.TrimPrefix(path.Clean(string(v.URI)), "/")) 45 | } 46 | 47 | searchOptions := deb.NewSearchOptions(&options, d.Config.Archs, distURLs, components) 48 | 49 | debs, err := deb.SearchPackages(searchOptions) 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | return debs, nil 55 | } 56 | 57 | // Returns the list of version-specific mirror URLs. 58 | func (d *Debian) buildReleaseIndexURLs(mirrors []packages.Mirror, versions []distro.Version) ([]string, error) { 59 | versions, _ = d.buildVersions(mirrors, versions) 60 | 61 | if (len(versions) > 0) && (len(mirrors) > 0) { 62 | var versionRoots []string 63 | 64 | for _, mirror := range mirrors { 65 | for _, version := range versions { 66 | v, err := url.JoinPath(mirror.URL, "dists", string(version)) 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | versionRoots = append(versionRoots, v) 72 | } 73 | } 74 | 75 | return versionRoots, nil 76 | } 77 | 78 | return nil, distro.ErrNoDistroVersionSpecified 79 | } 80 | 81 | // Returns a list of distro versions, considering the user-provided configuration, 82 | // and if not, the ones available on configured mirrors. 83 | func (d *Debian) buildVersions(mirrors []packages.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { 84 | if staticVersions != nil { 85 | return staticVersions, nil 86 | } 87 | 88 | var dynamicVersions []distro.Version 89 | 90 | dynamicVersions, err := d.crawlVersions(mirrors) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | return dynamicVersions, nil 96 | } 97 | 98 | // Returns the list of the current available distro versions, by scraping 99 | // the specified mirrors, dynamically. 100 | func (d *Debian) crawlVersions(mirrors []packages.Mirror) ([]distro.Version, error) { 101 | versions := []distro.Version{} 102 | 103 | seedUrls := make([]*url.URL, 0, len(mirrors)) 104 | 105 | for _, mirror := range mirrors { 106 | distsURL, err := url.JoinPath(mirror.URL, "dists/") 107 | if err != nil { 108 | return []distro.Version{}, err 109 | } 110 | 111 | u, err := url.Parse(distsURL) 112 | if err != nil { 113 | return []distro.Version{}, err 114 | } 115 | 116 | seedUrls = append(seedUrls, u) 117 | } 118 | 119 | folderNames, err := scrape.CrawlFolders( 120 | seedUrls, 121 | DebianMirrorsDistroVersionRegex, 122 | false, 123 | d.Config.Output.Verbosity >= output.DebugLevel, 124 | ) 125 | if err != nil { 126 | return []distro.Version{}, err 127 | } 128 | 129 | for _, v := range folderNames { 130 | versions = append(versions, distro.Version(v)) 131 | } 132 | 133 | return versions, nil 134 | } 135 | 136 | // Returns the list of default repositories from the default Config. 137 | func (d *Debian) getDefaultRepositories() []packages.Repository { 138 | var repositories []packages.Repository 139 | 140 | for _, repository := range DefaultConfig.Repositories { 141 | if !distro.RepositorySliceContains(repositories, repository) { 142 | repositories = append(repositories, repository) 143 | } 144 | } 145 | 146 | return repositories 147 | } 148 | -------------------------------------------------------------------------------- /pkg/distro/debian/release.go: -------------------------------------------------------------------------------- 1 | package debian 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/maxgio92/krawler/internal/utils" 7 | 8 | "pault.ag/go/archive" 9 | ) 10 | 11 | func GetReleasesFromPackages(packages []archive.Package) ([]string, error) { 12 | releases := []string{} 13 | 14 | if len(packages) > 0 { 15 | for _, v := range packages { 16 | releases = append(releases, fmt.Sprintf("%s-%s-%s", v.Version.Version, v.Version.Revision, v.Architecture.CPU)) 17 | } 18 | } 19 | 20 | return utils.Unique(releases), nil 21 | } 22 | -------------------------------------------------------------------------------- /pkg/distro/distro.go: -------------------------------------------------------------------------------- 1 | package distro 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/output" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | "github.com/maxgio92/krawler/pkg/utils/template" 7 | ) 8 | 9 | type Config struct { 10 | // A list of Mirrors to scrape. 11 | Mirrors []packages.Mirror 12 | 13 | // The mirrored repositories. 14 | Repositories []packages.Repository 15 | 16 | // A list of architecture for to which scrape packages. 17 | Archs []packages.Architecture 18 | 19 | // A list of Distro versions. 20 | Versions []Version 21 | 22 | // Options for visual output. 23 | Output output.Options `json:"output,omitempty"` 24 | } 25 | 26 | type Distro interface { 27 | // Configure expects distro.Config and arbitrary variables 28 | // for config fields that support templating. 29 | Configure(Config) error 30 | 31 | // GetPackages should return a slice of Package based on 32 | // the provided SearchOptions-type filter. 33 | SearchPackages(packages.SearchOptions) ([]packages.Package, error) 34 | } 35 | 36 | type Version string 37 | 38 | type Type string 39 | 40 | // BuildTemplates computes templated Config fields by evaluating the template against a set of variables, 41 | // expected as a map of string to interface argument. 42 | // As of now, only the URI field of Config.Repositories is a supported field to be templated. 43 | func (c *Config) BuildTemplates(vars map[string]interface{}) error { 44 | uris := []string{} 45 | 46 | for _, repository := range c.Repositories { 47 | if repository.URI != "" { 48 | result, err := template.MultiplexAndExecute(string(repository.URI), vars) 49 | if err != nil { 50 | return err 51 | } 52 | 53 | uris = append(uris, result...) 54 | } 55 | } 56 | 57 | r := []packages.Repository{} 58 | for _, v := range uris { 59 | r = append(r, packages.Repository{Name: "", URI: packages.URITemplate(v)}) 60 | } 61 | 62 | c.Repositories = r 63 | 64 | return nil 65 | } 66 | -------------------------------------------------------------------------------- /pkg/distro/error.go: -------------------------------------------------------------------------------- 1 | package distro 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrDistroNotConfigured = errors.New("the distro has not been configured") 7 | ErrDistroNotFound = errors.New("no distribution found with the specified name") 8 | ErrNoDistroVersionSpecified = errors.New("no versions specified") 9 | ErrDomainsFromMirrorUrls = errors.New("error while retrieving DNS names from mirrors URLs") 10 | ) 11 | -------------------------------------------------------------------------------- /pkg/distro/fedora/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package fedora 18 | 19 | import ( 20 | "net/url" 21 | "strings" 22 | 23 | "github.com/maxgio92/krawler/pkg/distro" 24 | "github.com/maxgio92/krawler/pkg/packages" 25 | ) 26 | 27 | func (f *Fedora) buildConfig(def distro.Config, user distro.Config) (distro.Config, error) { 28 | config, err := f.mergeConfig(def, user) 29 | if err != nil { 30 | return distro.Config{}, err 31 | } 32 | 33 | err = f.sanitizeConfig(&config) 34 | if err != nil { 35 | return distro.Config{}, err 36 | } 37 | 38 | // Build templated repositories URIs against built-in variables (archs). 39 | archs := make([]interface{}, 0, len(config.Archs)) 40 | for _, v := range config.Archs { 41 | archs = append(archs, string(v)) 42 | } 43 | if err = config.BuildTemplates(map[string]interface{}{ 44 | "archs": archs, 45 | }); err != nil { 46 | return distro.Config{}, err 47 | } 48 | 49 | return config, nil 50 | } 51 | 52 | // Returns the final configuration by merging the default with the user provided. 53 | // 54 | //nolint:unparam 55 | func (f *Fedora) mergeConfig(def distro.Config, config distro.Config) (distro.Config, error) { 56 | if len(config.Archs) < 1 { 57 | config.Archs = def.Archs 58 | } else { 59 | for _, arch := range config.Archs { 60 | if arch == "" { 61 | config.Archs = def.Archs 62 | 63 | break 64 | } 65 | } 66 | } 67 | 68 | if len(config.Mirrors) < 1 { 69 | config.Mirrors = def.Mirrors 70 | } else { 71 | for _, mirror := range config.Mirrors { 72 | if mirror.URL == "" { 73 | config.Mirrors = def.Mirrors 74 | 75 | break 76 | } 77 | } 78 | } 79 | 80 | if len(config.Repositories) < 1 { 81 | config.Repositories = f.getDefaultRepositories() 82 | } else { 83 | for _, repository := range config.Repositories { 84 | if repository.URI == "" { 85 | config.Repositories = f.getDefaultRepositories() 86 | 87 | break 88 | } 89 | } 90 | } 91 | 92 | return config, nil 93 | } 94 | 95 | func (f *Fedora) sanitizeConfig(config *distro.Config) error { 96 | err := f.sanitizeMirrors(&config.Mirrors) 97 | if err != nil { 98 | return err 99 | } 100 | 101 | return nil 102 | } 103 | 104 | func (f *Fedora) sanitizeMirrors(mirrors *[]packages.Mirror) error { 105 | for i, mirror := range *mirrors { 106 | if !strings.HasSuffix(mirror.URL, "/") { 107 | (*mirrors)[i].URL = mirror.URL + "/" 108 | } 109 | 110 | _, err := url.Parse(mirror.URL) 111 | if err != nil { 112 | return err 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | -------------------------------------------------------------------------------- /pkg/distro/fedora/constants.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package fedora 18 | 19 | import ( 20 | "github.com/maxgio92/krawler/pkg/distro" 21 | "github.com/maxgio92/krawler/pkg/packages" 22 | ) 23 | 24 | const ( 25 | // Default regex to base the distro version detection on. 26 | DistroVersionRegex = `^(0|[1-9]\d*)\/$` 27 | ) 28 | 29 | var DefaultConfig = distro.Config{ 30 | Mirrors: []packages.Mirror{ 31 | {Name: "releases", URL: "https://mirrors.edge.kernel.org/fedora/releases/"}, 32 | {Name: "updates", URL: "https://mirrors.edge.kernel.org/fedora/updates/"}, 33 | }, 34 | Repositories: []packages.Repository{ 35 | {Name: "releases", URI: packages.URITemplate("/Everything/{{ .archs }}/os/")}, 36 | {Name: "updates", URI: packages.URITemplate("/Everything/{{ .archs }}/")}, 37 | }, 38 | Archs: []packages.Architecture{ 39 | "aarch64", 40 | "x86_64", 41 | "armhfp", 42 | "ppc64le", 43 | }, 44 | Versions: nil, 45 | } 46 | -------------------------------------------------------------------------------- /pkg/distro/fedora/fedora.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package fedora 18 | 19 | import ( 20 | "net/url" 21 | 22 | "github.com/maxgio92/krawler/pkg/distro" 23 | "github.com/maxgio92/krawler/pkg/output" 24 | "github.com/maxgio92/krawler/pkg/packages" 25 | "github.com/maxgio92/krawler/pkg/packages/rpm" 26 | "github.com/maxgio92/krawler/pkg/scrape" 27 | ) 28 | 29 | type Fedora struct { 30 | config distro.Config 31 | } 32 | 33 | func (f *Fedora) Configure(config distro.Config) error { 34 | cfg, err := f.buildConfig(DefaultConfig, config) 35 | if err != nil { 36 | return err 37 | } 38 | 39 | f.config = cfg 40 | 41 | return nil 42 | } 43 | 44 | // GetPackages scrapes each mirror, for each distro version, for each repository, 45 | // for each architecture, and returns slice of Package and optionally an error. 46 | func (f *Fedora) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 47 | f.config.Output.Logger = options.Log() 48 | 49 | // Build distribution version-specific mirror root URLs. 50 | perVersionMirrorUrls, err := f.buildPerVersionMirrorUrls(f.config.Mirrors, f.config.Versions) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | // Build available repository URLs based on provided configuration, 56 | // for each distribution version. 57 | repositoryURLs, err := f.buildRepositoriesUrls(perVersionMirrorUrls, f.config.Repositories) 58 | if err != nil { 59 | return nil, err 60 | } 61 | 62 | // Get RPM packages from each repository. 63 | rss := []string{} 64 | for _, ru := range repositoryURLs { 65 | rss = append(rss, ru.String()) 66 | } 67 | searchOptions := rpm.NewSearchOptions(&options, f.config.Archs, rss) 68 | rpmPackages, err := rpm.SearchPackages(searchOptions) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | return rpmPackages, nil 74 | } 75 | 76 | // Returns the list of version-specific mirror URLs. 77 | func (f *Fedora) buildPerVersionMirrorUrls(mirrors []packages.Mirror, versions []distro.Version) ([]*url.URL, error) { 78 | versions, err := f.buildVersions(mirrors, versions) 79 | if err != nil { 80 | return []*url.URL{}, err 81 | } 82 | 83 | if (len(versions) > 0) && (len(mirrors) > 0) { 84 | var versionRoots []*url.URL 85 | 86 | for _, mirror := range mirrors { 87 | for _, version := range versions { 88 | versionRoot, err := url.Parse(mirror.URL + string(version)) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | versionRoots = append(versionRoots, versionRoot) 94 | } 95 | } 96 | 97 | return versionRoots, nil 98 | } 99 | 100 | return nil, distro.ErrNoDistroVersionSpecified 101 | } 102 | 103 | // Returns a list of distro versions, considering the user-provided configuration, 104 | // and if not, the ones available on configured mirrors. 105 | func (f *Fedora) buildVersions(mirrors []packages.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { 106 | if staticVersions != nil { 107 | return staticVersions, nil 108 | } 109 | 110 | var dynamicVersions []distro.Version 111 | 112 | dynamicVersions, err := f.crawlVersions(mirrors) 113 | if err != nil { 114 | return nil, err 115 | } 116 | 117 | return dynamicVersions, nil 118 | } 119 | 120 | // Returns the list of the current available distro versions, by scraping 121 | // the specified mirrors, dynamically. 122 | func (f *Fedora) crawlVersions(mirrors []packages.Mirror) ([]distro.Version, error) { 123 | versions := []distro.Version{} 124 | 125 | seedUrls := make([]*url.URL, 0, len(mirrors)) 126 | 127 | for _, mirror := range mirrors { 128 | u, err := url.Parse(mirror.URL) 129 | if err != nil { 130 | return []distro.Version{}, err 131 | } 132 | 133 | seedUrls = append(seedUrls, u) 134 | } 135 | 136 | folderNames, err := scrape.CrawlFolders( 137 | seedUrls, 138 | DistroVersionRegex, 139 | false, 140 | f.config.Output.Verbosity >= output.DebugLevel, 141 | ) 142 | if err != nil { 143 | return []distro.Version{}, err 144 | } 145 | 146 | for _, v := range folderNames { 147 | versions = append(versions, distro.Version(v)) 148 | } 149 | 150 | return versions, nil 151 | } 152 | 153 | // Returns the list of repositories URLs. 154 | func (f *Fedora) buildRepositoriesUrls(roots []*url.URL, repositories []packages.Repository) ([]*url.URL, error) { 155 | var urls []*url.URL 156 | 157 | for _, root := range roots { 158 | //nolint:revive,stylecheck 159 | for _, r := range repositories { 160 | // Get repository URL from URI. 161 | //nolint:revive,stylecheck 162 | us, err := url.JoinPath(root.String(), string(r.URI)) 163 | if err != nil { 164 | return nil, err 165 | } 166 | 167 | repositoryUrl, err := url.Parse(us) 168 | if err != nil { 169 | return nil, err 170 | } 171 | 172 | urls = append(urls, repositoryUrl) 173 | } 174 | } 175 | 176 | return urls, nil 177 | } 178 | 179 | // Returns the list of default repositories from the default config. 180 | func (f *Fedora) getDefaultRepositories() []packages.Repository { 181 | var repositories []packages.Repository 182 | 183 | for _, repository := range DefaultConfig.Repositories { 184 | if !distro.RepositorySliceContains(repositories, repository) { 185 | repositories = append(repositories, repository) 186 | } 187 | } 188 | 189 | return repositories 190 | } 191 | -------------------------------------------------------------------------------- /pkg/distro/opensuse/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package opensuse 18 | 19 | import ( 20 | "net/url" 21 | "strings" 22 | 23 | "github.com/maxgio92/krawler/pkg/distro" 24 | "github.com/maxgio92/krawler/pkg/packages" 25 | ) 26 | 27 | func (f *OpenSuse) buildConfig(def distro.Config, user distro.Config) (distro.Config, error) { 28 | config, err := f.mergeConfig(def, user) 29 | if err != nil { 30 | return distro.Config{}, err 31 | } 32 | 33 | err = f.sanitizeConfig(&config) 34 | if err != nil { 35 | return distro.Config{}, err 36 | } 37 | 38 | // Build templated repositories URIs against built-in variables (archs). 39 | archs := make([]interface{}, 0, len(config.Archs)) 40 | for _, v := range config.Archs { 41 | archs = append(archs, string(v)) 42 | } 43 | if err = config.BuildTemplates(map[string]interface{}{ 44 | "archs": archs, 45 | }); err != nil { 46 | return distro.Config{}, err 47 | } 48 | 49 | return config, nil 50 | } 51 | 52 | // Returns the final configuration by merging the default with the user provided. 53 | // 54 | //nolint:unparam 55 | func (f *OpenSuse) mergeConfig(def distro.Config, config distro.Config) (distro.Config, error) { 56 | if len(config.Archs) < 1 { 57 | config.Archs = def.Archs 58 | } else { 59 | for _, arch := range config.Archs { 60 | if arch == "" { 61 | config.Archs = def.Archs 62 | 63 | break 64 | } 65 | } 66 | } 67 | 68 | if len(config.Mirrors) < 1 { 69 | config.Mirrors = def.Mirrors 70 | } else { 71 | for _, mirror := range config.Mirrors { 72 | if mirror.URL == "" { 73 | config.Mirrors = def.Mirrors 74 | 75 | break 76 | } 77 | } 78 | } 79 | 80 | if len(config.Repositories) < 1 { 81 | config.Repositories = f.getDefaultRepositories() 82 | } else { 83 | for _, repository := range config.Repositories { 84 | if repository.URI == "" { 85 | config.Repositories = f.getDefaultRepositories() 86 | 87 | break 88 | } 89 | } 90 | } 91 | 92 | return config, nil 93 | } 94 | 95 | func (f *OpenSuse) sanitizeConfig(config *distro.Config) error { 96 | err := f.sanitizeMirrors(&config.Mirrors) 97 | if err != nil { 98 | return err 99 | } 100 | 101 | return nil 102 | } 103 | 104 | func (f *OpenSuse) sanitizeMirrors(mirrors *[]packages.Mirror) error { 105 | for i, mirror := range *mirrors { 106 | if !strings.HasSuffix(mirror.URL, "/") { 107 | (*mirrors)[i].URL = mirror.URL + "/" 108 | } 109 | 110 | _, err := url.Parse(mirror.URL) 111 | if err != nil { 112 | return err 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | -------------------------------------------------------------------------------- /pkg/distro/opensuse/constants.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package opensuse 18 | 19 | import ( 20 | "github.com/maxgio92/krawler/pkg/distro" 21 | "github.com/maxgio92/krawler/pkg/packages" 22 | ) 23 | 24 | const ( 25 | // Default regex to base the distro version detection on. 26 | // Match both SemVer and tags like 'openSUSE-stable'. 27 | DistroVersionRegex = `^.+\/$` 28 | ) 29 | 30 | var DefaultConfig = distro.Config{ 31 | Mirrors: []packages.Mirror{ 32 | {Name: "default", URL: "https://mirrors.edge.kernel.org/opensuse/distribution/"}, 33 | {Name: "tumbleweed", URL: "https://mirrors.edge.kernel.org/opensuse/"}, 34 | {Name: "leap", URL: "https://mirrors.edge.kernel.org/opensuse/distribution/leap/"}, 35 | {Name: "kernel", URL: "http://download.opensuse.org/repositories/Kernel:/"}, 36 | }, 37 | Repositories: []packages.Repository{ 38 | {Name: "default", URI: packages.URITemplate("/repo/oss/")}, 39 | {Name: "kernel-arm", URI: packages.URITemplate("/ARM/")}, 40 | {Name: "kernel-ppc", URI: packages.URITemplate("/PPC/")}, 41 | {Name: "kernel-riscv", URI: packages.URITemplate("/RISCV/")}, 42 | {Name: "kernel-s390", URI: packages.URITemplate("/S390/")}, 43 | {Name: "kernel-standard", URI: packages.URITemplate("/standard/")}, 44 | {Name: "kernel-ports", URI: packages.URITemplate("/ports/")}, 45 | {Name: "kernel-backport-standard", URI: packages.URITemplate("/Backport/standard")}, 46 | {Name: "kernel-backport-ports", URI: packages.URITemplate("/Backport/ports")}, 47 | {Name: "kernel-submit-standard", URI: packages.URITemplate("/Submit/standard/")}, 48 | {Name: "kernel-submit-ports", URI: packages.URITemplate("/Submit/ports/")}, 49 | }, 50 | Archs: []packages.Architecture{ 51 | "armv6hl", 52 | "armv7hl", 53 | "aarch64", 54 | "armhfp", 55 | "x86_64", 56 | "noarch", 57 | "i686", 58 | "ppc", 59 | "ppc64", 60 | "ppc64le", 61 | "s390x", 62 | }, 63 | 64 | // Crawl all versions by default, filtering names on the DistroVersionRegex regular expression. 65 | Versions: nil, 66 | } 67 | -------------------------------------------------------------------------------- /pkg/distro/opensuse/opensuse.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 maxgio92 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package opensuse 18 | 19 | import ( 20 | "net/url" 21 | 22 | "github.com/maxgio92/krawler/pkg/distro" 23 | "github.com/maxgio92/krawler/pkg/output" 24 | "github.com/maxgio92/krawler/pkg/packages" 25 | "github.com/maxgio92/krawler/pkg/packages/rpm" 26 | "github.com/maxgio92/krawler/pkg/scrape" 27 | ) 28 | 29 | type OpenSuse struct { 30 | config distro.Config 31 | } 32 | 33 | func (f *OpenSuse) Configure(config distro.Config) error { 34 | cfg, err := f.buildConfig(DefaultConfig, config) 35 | if err != nil { 36 | return err 37 | } 38 | 39 | f.config = cfg 40 | 41 | return nil 42 | } 43 | 44 | // GetPackages scrapes each mirror, for each distro version, for each repository, 45 | // for each architecture, and returns slice of Package and optionally an error. 46 | func (f *OpenSuse) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 47 | f.config.Output.Logger = options.Log() 48 | 49 | // Build distribution version-specific mirror root URLs. 50 | perVersionMirrorUrls, err := f.buildPerVersionMirrorUrls(f.config.Mirrors, f.config.Versions) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | // Build available repository URLs based on provided configuration, 56 | // for each distribution version. 57 | repositoryURLs, err := f.buildRepositoriesUrls(perVersionMirrorUrls, f.config.Repositories) 58 | if err != nil { 59 | return nil, err 60 | } 61 | 62 | // Get RPM packages from each repository. 63 | rss := []string{} 64 | for _, ru := range repositoryURLs { 65 | rss = append(rss, ru.String()) 66 | } 67 | searchOptions := rpm.NewSearchOptions(&options, f.config.Archs, rss) 68 | rpmPackages, err := rpm.SearchPackages(searchOptions) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | return rpmPackages, nil 74 | } 75 | 76 | // Returns the list of version-specific mirror URLs. 77 | func (f *OpenSuse) buildPerVersionMirrorUrls(mirrors []packages.Mirror, versions []distro.Version) ([]*url.URL, error) { 78 | versions, err := f.buildVersions(mirrors, versions) 79 | if err != nil { 80 | return []*url.URL{}, err 81 | } 82 | 83 | if (len(versions) > 0) && (len(mirrors) > 0) { 84 | var versionRoots []*url.URL 85 | 86 | for _, mirror := range mirrors { 87 | for _, version := range versions { 88 | versionRoot, err := url.Parse(mirror.URL + string(version)) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | versionRoots = append(versionRoots, versionRoot) 94 | } 95 | } 96 | 97 | return versionRoots, nil 98 | } 99 | 100 | return nil, distro.ErrNoDistroVersionSpecified 101 | } 102 | 103 | // Returns a list of distro versions, considering the user-provided configuration, 104 | // and if not, the ones available on configured mirrors. 105 | func (f *OpenSuse) buildVersions(mirrors []packages.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { 106 | if staticVersions != nil { 107 | return staticVersions, nil 108 | } 109 | 110 | var dynamicVersions []distro.Version 111 | 112 | dynamicVersions, err := f.crawlVersions(mirrors) 113 | if err != nil { 114 | return nil, err 115 | } 116 | 117 | return dynamicVersions, nil 118 | } 119 | 120 | // Returns the list of the current available distro versions, by scraping 121 | // the specified mirrors, dynamically. 122 | func (f *OpenSuse) crawlVersions(mirrors []packages.Mirror) ([]distro.Version, error) { 123 | versions := []distro.Version{} 124 | 125 | seedUrls := make([]*url.URL, 0, len(mirrors)) 126 | 127 | for _, mirror := range mirrors { 128 | u, err := url.Parse(mirror.URL) 129 | if err != nil { 130 | return []distro.Version{}, err 131 | } 132 | 133 | seedUrls = append(seedUrls, u) 134 | } 135 | 136 | folderNames, err := scrape.CrawlFolders( 137 | seedUrls, 138 | DistroVersionRegex, 139 | false, 140 | f.config.Output.Verbosity >= output.DebugLevel, 141 | ) 142 | if err != nil { 143 | return []distro.Version{}, err 144 | } 145 | 146 | for _, v := range folderNames { 147 | versions = append(versions, distro.Version(v)) 148 | } 149 | 150 | return versions, nil 151 | } 152 | 153 | // Returns the list of repositories URLs. 154 | func (f *OpenSuse) buildRepositoriesUrls(roots []*url.URL, repositories []packages.Repository) ([]*url.URL, error) { 155 | var urls []*url.URL 156 | 157 | for _, root := range roots { 158 | //nolint:revive,stylecheck 159 | for _, r := range repositories { 160 | // Get repository URL from URI. 161 | //nolint:revive,stylecheck 162 | us, err := url.JoinPath(root.String(), string(r.URI)) 163 | if err != nil { 164 | return nil, err 165 | } 166 | 167 | repositoryUrl, err := url.Parse(us) 168 | if err != nil { 169 | return nil, err 170 | } 171 | 172 | urls = append(urls, repositoryUrl) 173 | } 174 | } 175 | 176 | return urls, nil 177 | } 178 | 179 | // Returns the list of default repositories from the default config. 180 | func (f *OpenSuse) getDefaultRepositories() []packages.Repository { 181 | var repositories []packages.Repository 182 | 183 | for _, repository := range DefaultConfig.Repositories { 184 | if !distro.RepositorySliceContains(repositories, repository) { 185 | repositories = append(repositories, repository) 186 | } 187 | } 188 | 189 | return repositories 190 | } 191 | -------------------------------------------------------------------------------- /pkg/distro/oracle/config.go: -------------------------------------------------------------------------------- 1 | package oracle 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | 7 | "github.com/maxgio92/krawler/pkg/distro" 8 | "github.com/maxgio92/krawler/pkg/packages" 9 | ) 10 | 11 | func (o *Oracle) buildConfig(def distro.Config, user distro.Config) (distro.Config, error) { 12 | config, err := o.mergeConfig(def, user) 13 | if err != nil { 14 | return distro.Config{}, err 15 | } 16 | 17 | err = o.sanitizeConfig(&config) 18 | if err != nil { 19 | return distro.Config{}, err 20 | } 21 | 22 | // Build templated repositories URIs against built-in variables (archs). 23 | archs := make([]interface{}, 0, len(config.Archs)) 24 | for _, v := range config.Archs { 25 | archs = append(archs, string(v)) 26 | } 27 | if err = config.BuildTemplates(map[string]interface{}{ 28 | "archs": archs, 29 | }); err != nil { 30 | return distro.Config{}, err 31 | } 32 | 33 | return config, nil 34 | } 35 | 36 | // Returns the final configuration by merging the default with the user provided. 37 | // 38 | //nolint:unparam 39 | func (o *Oracle) mergeConfig(def distro.Config, config distro.Config) (distro.Config, error) { 40 | if len(config.Archs) < 1 { 41 | config.Archs = def.Archs 42 | } else { 43 | for _, arch := range config.Archs { 44 | if arch == "" { 45 | config.Archs = def.Archs 46 | 47 | break 48 | } 49 | } 50 | } 51 | 52 | if len(config.Mirrors) < 1 { 53 | config.Mirrors = def.Mirrors 54 | } else { 55 | for _, mirror := range config.Mirrors { 56 | if mirror.URL == "" { 57 | config.Mirrors = def.Mirrors 58 | 59 | break 60 | } 61 | } 62 | } 63 | 64 | if len(config.Repositories) < 1 { 65 | config.Repositories = o.getDefaultRepositories() 66 | } else { 67 | for _, repository := range config.Repositories { 68 | if repository.URI == "" { 69 | config.Repositories = o.getDefaultRepositories() 70 | 71 | break 72 | } 73 | } 74 | } 75 | 76 | // Cannot scrape over Oracle repositories. 77 | if len(config.Versions) < 1 { 78 | config.Versions = DefaultConfig.Versions 79 | } 80 | 81 | return config, nil 82 | } 83 | 84 | func (o *Oracle) sanitizeConfig(config *distro.Config) error { 85 | err := o.sanitizeMirrors(&config.Mirrors) 86 | if err != nil { 87 | return err 88 | } 89 | 90 | return nil 91 | } 92 | 93 | func (o *Oracle) sanitizeMirrors(mirrors *[]packages.Mirror) error { 94 | for i, mirror := range *mirrors { 95 | if !strings.HasSuffix(mirror.URL, "/") { 96 | (*mirrors)[i].URL = mirror.URL + "/" 97 | } 98 | 99 | _, err := url.Parse(mirror.URL) 100 | if err != nil { 101 | return err 102 | } 103 | } 104 | 105 | return nil 106 | } 107 | -------------------------------------------------------------------------------- /pkg/distro/oracle/constants.go: -------------------------------------------------------------------------------- 1 | package oracle 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | const ( 9 | // Default regex to base the distro version detection on. 10 | CentosMirrorsDistroVersionRegex = `^(0|[1-9]\d*)(\.(0|[1-9]\d*)?)?(\.(0|[1-9]\d*)?)?(-[a-zA-Z\d][-a-zA-Z.\d]*)?(\+[a-zA-Z\d][-a-zA-Z.\d]*)?\/$` 11 | ) 12 | 13 | var DefaultConfig = distro.Config{ 14 | Mirrors: []packages.Mirror{ 15 | {URL: "https://yum.oracle.com/repo/OracleLinux/"}, 16 | }, 17 | Repositories: []packages.Repository{ 18 | {Name: "", URI: packages.URITemplate("/latest/{{ .archs }}/")}, 19 | {Name: "", URI: packages.URITemplate("/MODRHCK/{{ .archs }}/")}, 20 | {Name: "", URI: packages.URITemplate("/UEK/latest/{{ .archs }}/")}, 21 | {Name: "", URI: packages.URITemplate("/UEKR3/latest/{{ .archs }}/")}, 22 | {Name: "", URI: packages.URITemplate("/UEKR3/{{ .archs }}/")}, 23 | {Name: "", URI: packages.URITemplate("/UEKR4/{{ .archs }}/")}, 24 | {Name: "", URI: packages.URITemplate("/UEKR5/{{ .archs }}/")}, 25 | {Name: "", URI: packages.URITemplate("/UEKR6/{{ .archs }}/")}, 26 | {Name: "", URI: packages.URITemplate("/UEKR7/{{ .archs }}/")}, 27 | {Name: "", URI: packages.URITemplate("/baseos/latest/{{ .archs }}/")}, 28 | {Name: "", URI: packages.URITemplate("/appstream/{{ .archs }}/")}, 29 | }, 30 | Archs: []packages.Architecture{ 31 | "aarch64", 32 | "x86_64", 33 | "ppc64le", 34 | }, 35 | Versions: []distro.Version{ 36 | "OL6", 37 | "OL7", 38 | "OL8", 39 | "OL9", 40 | }, 41 | } 42 | -------------------------------------------------------------------------------- /pkg/distro/oracle/oracle.go: -------------------------------------------------------------------------------- 1 | package oracle 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | "net/url" 6 | 7 | "github.com/maxgio92/krawler/pkg/distro" 8 | "github.com/maxgio92/krawler/pkg/output" 9 | "github.com/maxgio92/krawler/pkg/packages" 10 | "github.com/maxgio92/krawler/pkg/packages/rpm" 11 | "github.com/maxgio92/krawler/pkg/scrape" 12 | ) 13 | 14 | type Oracle struct { 15 | config distro.Config 16 | } 17 | 18 | func (o *Oracle) Configure(config distro.Config) error { 19 | cfg, err := o.buildConfig(DefaultConfig, config) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | o.config = cfg 25 | 26 | return nil 27 | } 28 | 29 | // GetPackages scrapes each mirror, for each distro version, for each repository, 30 | // for each architecture, and returns slice of Package and optionally an error. 31 | func (o *Oracle) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { 32 | o.config.Output.Logger = options.Log() 33 | 34 | // Build distribution version-specific mirror root URLs. 35 | perVersionMirrorUrls, err := o.buildPerVersionMirrorUrls(o.config.Mirrors, o.config.Versions) 36 | if err != nil { 37 | return nil, err 38 | } 39 | 40 | // Build available repository URLs based on provided configuration, 41 | // for each distribution version. 42 | repositoryURLs, err := o.buildRepositoriesUrls(perVersionMirrorUrls, o.config.Repositories) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | // Get RPM packages from each repository. 48 | rss := []string{} 49 | for _, ru := range repositoryURLs { 50 | rss = append(rss, ru.String()) 51 | } 52 | searchOptions := rpm.NewSearchOptions(&options, o.config.Archs, rss) 53 | rpmPackages, err := rpm.SearchPackages(searchOptions) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | return rpmPackages, nil 59 | } 60 | 61 | // Returns the list of version-specific mirror URLs. 62 | func (o *Oracle) buildPerVersionMirrorUrls(mirrors []packages.Mirror, versions []distro.Version) ([]*url.URL, error) { 63 | versions, err := o.buildVersions(mirrors, versions) 64 | if err != nil { 65 | return []*url.URL{}, err 66 | } 67 | 68 | if (len(versions) > 0) && (len(mirrors) > 0) { 69 | var versionRoots []*url.URL 70 | 71 | for _, mirror := range mirrors { 72 | for _, version := range versions { 73 | versionRoot, err := url.Parse(mirror.URL + string(version)) 74 | if err != nil { 75 | return nil, err 76 | } 77 | 78 | versionRoots = append(versionRoots, versionRoot) 79 | } 80 | } 81 | 82 | return versionRoots, nil 83 | } 84 | 85 | return nil, distro.ErrNoDistroVersionSpecified 86 | } 87 | 88 | // Returns a list of distro versions, considering the user-provided configuration, 89 | // and if not, the ones available on configured mirrors. 90 | func (o *Oracle) buildVersions(mirrors []packages.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { 91 | if staticVersions != nil { 92 | return staticVersions, nil 93 | } 94 | 95 | var dynamicVersions []distro.Version 96 | 97 | dynamicVersions, err := o.crawlVersions(mirrors) 98 | if err != nil { 99 | return nil, errors.Wrap(err, "error crawling Oracle Linux versions") 100 | } 101 | 102 | return dynamicVersions, nil 103 | } 104 | 105 | // Returns the list of the current available distro versions, by scraping 106 | // the specified mirrors, dynamically. 107 | func (o *Oracle) crawlVersions(mirrors []packages.Mirror) ([]distro.Version, error) { 108 | versions := []distro.Version{} 109 | 110 | seedUrls := make([]*url.URL, 0, len(mirrors)) 111 | 112 | for _, mirror := range mirrors { 113 | u, err := url.Parse(mirror.URL) 114 | if err != nil { 115 | return []distro.Version{}, err 116 | } 117 | 118 | seedUrls = append(seedUrls, u) 119 | } 120 | 121 | folderNames, err := scrape.CrawlFolders( 122 | seedUrls, 123 | CentosMirrorsDistroVersionRegex, 124 | false, 125 | o.config.Output.Verbosity >= output.DebugLevel, 126 | ) 127 | if err != nil { 128 | return []distro.Version{}, err 129 | } 130 | 131 | for _, v := range folderNames { 132 | versions = append(versions, distro.Version(v)) 133 | } 134 | 135 | return versions, nil 136 | } 137 | 138 | // Returns the list of repositories URLs. 139 | func (o *Oracle) buildRepositoriesUrls(roots []*url.URL, repositories []packages.Repository) ([]*url.URL, error) { 140 | var urls []*url.URL 141 | 142 | for _, root := range roots { 143 | //nolint:revive,stylecheck 144 | for _, r := range repositories { 145 | // Get repository URL from URI. 146 | //nolint:revive,stylecheck 147 | us, err := url.JoinPath(root.String(), string(r.URI)) 148 | if err != nil { 149 | return nil, err 150 | } 151 | 152 | repositoryUrl, err := url.Parse(us) 153 | if err != nil { 154 | return nil, err 155 | } 156 | 157 | urls = append(urls, repositoryUrl) 158 | } 159 | } 160 | 161 | return urls, nil 162 | } 163 | 164 | // Returns the list of default repositories from the default config. 165 | func (o *Oracle) getDefaultRepositories() []packages.Repository { 166 | var repositories []packages.Repository 167 | 168 | for _, repository := range DefaultConfig.Repositories { 169 | if !distro.RepositorySliceContains(repositories, repository) { 170 | repositories = append(repositories, repository) 171 | } 172 | } 173 | 174 | return repositories 175 | } 176 | -------------------------------------------------------------------------------- /pkg/distro/ubuntu/constants.go: -------------------------------------------------------------------------------- 1 | package ubuntu 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/packages" 6 | ) 7 | 8 | const ( 9 | MirrorsDistroVersionRegex = `^.+$` 10 | DefaultArch = X8664Arch 11 | X8664Arch packages.Architecture = "amd64" 12 | ) 13 | 14 | var DefaultConfig = distro.Config{ 15 | Mirrors: []packages.Mirror{ 16 | {URL: "https://mirrors.edge.kernel.org/ubuntu/"}, 17 | {URL: "http://security.ubuntu.com/ubuntu"}, 18 | }, 19 | Repositories: []packages.Repository{ 20 | {Name: "main", URI: packages.URITemplate("main")}, 21 | {Name: "contrib", URI: packages.URITemplate("contrib")}, 22 | {Name: "non-free", URI: packages.URITemplate("non-free")}, 23 | {Name: "multiverse", URI: packages.URITemplate("multiverse")}, 24 | {Name: "universe", URI: packages.URITemplate("universe")}, 25 | {Name: "restricted", URI: packages.URITemplate("restricted")}, 26 | }, 27 | Archs: nil, 28 | 29 | // Distribution versions, i.e. Ubuntu dists 30 | Versions: nil, 31 | } 32 | -------------------------------------------------------------------------------- /pkg/distro/ubuntu/ubuntu.go: -------------------------------------------------------------------------------- 1 | package ubuntu 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/distro" 5 | "github.com/maxgio92/krawler/pkg/distro/debian" 6 | ) 7 | 8 | type Ubuntu struct { 9 | debian.Debian 10 | } 11 | 12 | func (u *Ubuntu) Configure(config distro.Config) error { 13 | c, err := u.BuildConfig(DefaultConfig, config) 14 | if err != nil { 15 | return err 16 | } 17 | 18 | u.Config = c 19 | 20 | return nil 21 | } 22 | -------------------------------------------------------------------------------- /pkg/distro/utils.go: -------------------------------------------------------------------------------- 1 | package distro 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/packages" 5 | ) 6 | 7 | func RepositorySliceContains(s []packages.Repository, e packages.Repository) bool { 8 | for _, v := range s { 9 | if v.URI == e.URI { 10 | return true 11 | } 12 | } 13 | 14 | return false 15 | } 16 | -------------------------------------------------------------------------------- /pkg/kernelrelease/compiler.go: -------------------------------------------------------------------------------- 1 | package kernelrelease 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "strings" 7 | "unicode" 8 | 9 | p "github.com/maxgio92/krawler/pkg/packages" 10 | ) 11 | 12 | const ( 13 | ConfigCompilerVersion = "CONFIG_GCC_VERSION" 14 | ) 15 | 16 | func GetCompilerVersionFromKernelPackage(pkg p.Package) (string, error) { 17 | return getCompilerVersionFromFileReaders(pkg.FileReaders()) 18 | } 19 | 20 | func getCompilerVersionFromFileReaders(files []io.Reader) (string, error) { 21 | for _, r := range files { 22 | fileScanner := bufio.NewScanner(r) 23 | fileScanner.Split(bufio.ScanLines) 24 | 25 | for fileScanner.Scan() { 26 | line := fileScanner.Text() 27 | if strings.Contains(line, ConfigCompilerVersion) { 28 | compilerVersion, err := parseConfig(line) 29 | if err == nil { 30 | return compilerVersion, nil 31 | } 32 | 33 | return "", err 34 | } 35 | } 36 | 37 | err := fileScanner.Err() 38 | if err != nil { 39 | return "", err 40 | } 41 | } 42 | 43 | return "", ErrKernelCompilerVersionNotFound 44 | } 45 | 46 | func parseConfig(line string) (string, error) { 47 | tokens := strings.FieldsFunc(line, func(c rune) bool { 48 | return unicode.Is(unicode.Space, c) || unicode.Is(unicode.Sm, c) 49 | }) 50 | if len(tokens) > 1 { 51 | return tokens[len(tokens)-1], nil 52 | } 53 | 54 | return "", ErrKernelConfigValueNotFound 55 | } 56 | -------------------------------------------------------------------------------- /pkg/kernelrelease/constants.go: -------------------------------------------------------------------------------- 1 | package kernelrelease 2 | 3 | import "regexp" 4 | 5 | var kernelVersionPattern = regexp.MustCompile(`(?P^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)[.+]?(?P0|[1-9]\d*)?)(?P[-.+](?P0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)([\.+~](0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-_]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$`) 6 | -------------------------------------------------------------------------------- /pkg/kernelrelease/errors.go: -------------------------------------------------------------------------------- 1 | package kernelrelease 2 | 3 | import "fmt" 4 | 5 | var ( 6 | ErrKernelCompilerVersionNotFound = fmt.Errorf("compiler version not found") 7 | ErrKernelConfigValueNotFound = fmt.Errorf("the line does not contain the config value") 8 | ) 9 | -------------------------------------------------------------------------------- /pkg/kernelrelease/kernelrelease.go: -------------------------------------------------------------------------------- 1 | package kernelrelease 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "fmt" 7 | "strconv" 8 | 9 | p "github.com/maxgio92/krawler/pkg/packages" 10 | ) 11 | 12 | type Arch string 13 | 14 | type Archs map[Arch]string 15 | 16 | type KernelRelease struct { 17 | Fullversion string `json:"full_version"` 18 | Version int `json:"version"` 19 | PatchLevel int `json:"patch_level"` 20 | Sublevel int `json:"sublevel"` 21 | Extraversion string `json:"extra_version"` 22 | FullExtraversion string `json:"full_extra_version"` 23 | Architecture Arch `json:"architecture"` 24 | PackageName string `json:"package_name"` 25 | PackageURL string `json:"package_url"` 26 | CompilerVersion string `json:"compiler_version"` 27 | } 28 | 29 | //nolint:cyclop 30 | func (k *KernelRelease) BuildFromPackage(pkg p.Package) error { 31 | k.PackageName = pkg.GetName() 32 | k.PackageURL = pkg.URL() 33 | k.Architecture = Arch(pkg.GetArch()) 34 | 35 | kernelVersion := versionStringFromPackage(pkg) 36 | match := kernelVersionPattern.FindStringSubmatch(kernelVersion) 37 | 38 | identifiers := make(map[string]string) 39 | 40 | for i, name := range kernelVersionPattern.SubexpNames() { 41 | if i > 0 && i <= len(match) { 42 | identifiers[name] = match[i] 43 | 44 | switch name { 45 | case "fullversion": 46 | k.Fullversion = match[i] 47 | case "version": 48 | k.Version, _ = strconv.Atoi(match[i]) 49 | case "patchlevel": 50 | k.PatchLevel, _ = strconv.Atoi(match[i]) 51 | case "sublevel": 52 | k.Sublevel, _ = strconv.Atoi(match[i]) 53 | case "extraversion": 54 | k.Extraversion = match[i] 55 | case "fullextraversion": 56 | k.FullExtraversion = match[i] 57 | } 58 | } 59 | } 60 | 61 | compilerVersion, err := GetCompilerVersionFromKernelPackage(pkg) 62 | if err != nil { 63 | k.CompilerVersion = "" 64 | } 65 | 66 | k.CompilerVersion = compilerVersion 67 | 68 | return nil 69 | } 70 | 71 | func (k *KernelRelease) SHA256Sum() string { 72 | sha256.New() 73 | 74 | s := fmt.Sprintf("%s%s%s%s", 75 | k.Fullversion, 76 | k.FullExtraversion, 77 | k.PackageName, 78 | string(k.Architecture), 79 | ) 80 | 81 | hash := sha256.Sum256([]byte(s)) 82 | 83 | return hex.EncodeToString(hash[:]) 84 | } 85 | -------------------------------------------------------------------------------- /pkg/kernelrelease/kernelrelease_test.go: -------------------------------------------------------------------------------- 1 | package kernelrelease_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/maxgio92/krawler/pkg/kernelrelease" 7 | "github.com/maxgio92/krawler/pkg/packages" 8 | "github.com/maxgio92/krawler/pkg/packages/deb" 9 | 10 | "gotest.tools/assert" 11 | ) 12 | 13 | //nolint:funlen 14 | func TestBuildFromPackage(t *testing.T) { 15 | t.Parallel() 16 | 17 | tests := map[string]struct { 18 | pkg packages.Package 19 | want kernelrelease.KernelRelease 20 | }{ 21 | "just kernel version": { 22 | pkg: &deb.Package{ 23 | Name: "linux-headers", 24 | Version: "5.5.2", 25 | Release: "", 26 | Arch: "", 27 | }, 28 | want: kernelrelease.KernelRelease{ 29 | Fullversion: "5.5.2", 30 | Version: 5, 31 | PatchLevel: 5, 32 | Sublevel: 2, 33 | Extraversion: "", 34 | FullExtraversion: "", 35 | PackageName: "linux-headers", 36 | Architecture: kernelrelease.Arch(""), 37 | }, 38 | }, 39 | "an empty string": { 40 | pkg: &deb.Package{ 41 | Name: "linux-headers", 42 | Version: "", 43 | Release: "", 44 | Arch: "", 45 | }, 46 | want: kernelrelease.KernelRelease{ 47 | Fullversion: "", 48 | Version: 0, 49 | PatchLevel: 0, 50 | Sublevel: 0, 51 | Extraversion: "", 52 | FullExtraversion: "", 53 | PackageName: "linux-headers", 54 | Architecture: kernelrelease.Arch(""), 55 | }, 56 | }, 57 | "Architecture Linux version": { 58 | pkg: &deb.Package{ 59 | Name: "linux-headers", 60 | Version: "6.1.5", 61 | Release: "arch2-1", 62 | Arch: "x86_64", 63 | }, 64 | want: kernelrelease.KernelRelease{ 65 | Fullversion: "6.1.5", 66 | Version: 6, 67 | PatchLevel: 1, 68 | Sublevel: 5, 69 | Extraversion: "arch2-1", 70 | FullExtraversion: "-arch2-1.x86_64", 71 | PackageName: "linux-headers", 72 | Architecture: kernelrelease.Arch("x86_64"), 73 | }, 74 | }, 75 | "Debian Jessie version": { 76 | pkg: &deb.Package{ 77 | Name: "linux-headers", 78 | Version: "3.16.0", 79 | Release: "10", 80 | Arch: "amd64", 81 | }, 82 | want: kernelrelease.KernelRelease{ 83 | Fullversion: "3.16.0", 84 | Version: 3, 85 | PatchLevel: 16, 86 | Sublevel: 0, 87 | Extraversion: "10", 88 | FullExtraversion: "-10.amd64", 89 | PackageName: "linux-headers", 90 | Architecture: kernelrelease.Arch("amd64"), 91 | }, 92 | }, 93 | "Debian Buster version": { 94 | pkg: &deb.Package{ 95 | Name: "linux-headers", 96 | Version: "4.19.0", 97 | Release: "6", 98 | Arch: "amd64", 99 | }, 100 | want: kernelrelease.KernelRelease{ 101 | Fullversion: "4.19.0", 102 | Version: 4, 103 | PatchLevel: 19, 104 | Sublevel: 0, 105 | Extraversion: "6", 106 | FullExtraversion: "-6.amd64", 107 | PackageName: "linux-headers", 108 | Architecture: kernelrelease.Arch("amd64"), 109 | }, 110 | }, 111 | "Debian version with tilde separator": { 112 | pkg: &deb.Package{ 113 | Name: "linux-headers", 114 | Version: "4.9.65", 115 | Release: "2+grsecunoff1~bpo9+1", 116 | Arch: "amd64", 117 | }, 118 | want: kernelrelease.KernelRelease{ 119 | Fullversion: "4.9.65", 120 | Version: 4, 121 | PatchLevel: 9, 122 | Sublevel: 65, 123 | Extraversion: "2", 124 | FullExtraversion: "-2+grsecunoff1~bpo9+1.amd64", 125 | PackageName: "linux-headers", 126 | Architecture: kernelrelease.Arch("amd64"), 127 | }, 128 | }, 129 | "Debian version with plus separator": { 130 | pkg: &deb.Package{ 131 | Name: "linux-headers", 132 | Version: "4.19+105", 133 | Release: "deb10u4~bpo9+1", 134 | Arch: "amd64", 135 | }, 136 | want: kernelrelease.KernelRelease{ 137 | Fullversion: "4.19+105", 138 | Version: 4, 139 | PatchLevel: 19, 140 | Sublevel: 105, 141 | Extraversion: "deb10u4", 142 | FullExtraversion: "-deb10u4~bpo9+1.amd64", 143 | PackageName: "linux-headers", 144 | Architecture: kernelrelease.Arch("amd64"), 145 | }, 146 | }, 147 | } 148 | for name, tt := range tests { 149 | tt := tt 150 | 151 | t.Run(name, func(t *testing.T) { 152 | t.Parallel() 153 | 154 | got := kernelrelease.KernelRelease{} 155 | err := got.BuildFromPackage(tt.pkg) 156 | 157 | assert.NilError(t, err) 158 | assert.DeepEqual(t, tt.want, got) 159 | }) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /pkg/kernelrelease/list.go: -------------------------------------------------------------------------------- 1 | package kernelrelease 2 | 3 | import p "github.com/maxgio92/krawler/pkg/packages" 4 | 5 | func GetKernelReleasesFromPackages(packages []p.Package, prefix string) ([]KernelRelease, error) { 6 | releases := []KernelRelease{} 7 | 8 | for _, pkg := range packages { 9 | kr := &KernelRelease{} 10 | 11 | err := kr.BuildFromPackage(pkg) 12 | if err != nil { 13 | return []KernelRelease{}, err 14 | } 15 | 16 | if kr.Fullversion != "" { 17 | releases = append(releases, *kr) 18 | } 19 | } 20 | 21 | return unique(releases), nil 22 | } 23 | 24 | func unique(kernelReleases []KernelRelease) []KernelRelease { 25 | krs := make([]KernelRelease, 0, len(kernelReleases)) 26 | m := make(map[string]bool) 27 | 28 | for _, v := range kernelReleases { 29 | if _, ok := m[v.SHA256Sum()]; !ok { 30 | m[v.SHA256Sum()] = true 31 | 32 | krs = append(krs, v) 33 | } 34 | } 35 | 36 | return krs 37 | } 38 | -------------------------------------------------------------------------------- /pkg/kernelrelease/version.go: -------------------------------------------------------------------------------- 1 | package kernelrelease 2 | 3 | import ( 4 | "fmt" 5 | 6 | p "github.com/maxgio92/krawler/pkg/packages" 7 | ) 8 | 9 | func versionStringFromPackage(pkg p.Package) string { 10 | version := pkg.GetVersion() 11 | if pkg.GetRelease() != "" { 12 | version += fmt.Sprintf("-%s", pkg.GetRelease()) 13 | } 14 | 15 | if pkg.GetArch() != "" { 16 | version += fmt.Sprintf(".%s", pkg.GetArch()) 17 | } 18 | 19 | return version 20 | } 21 | -------------------------------------------------------------------------------- /pkg/output/log.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import ( 4 | log "github.com/sirupsen/logrus" 5 | ) 6 | 7 | type Logger struct { 8 | log.Logger 9 | } 10 | 11 | func NewLogger() *Logger { 12 | return &Logger{*log.New()} 13 | } 14 | -------------------------------------------------------------------------------- /pkg/output/options.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | type Options struct { 4 | Logger *Logger 5 | Verbosity Verbosity 6 | } 7 | 8 | type Verbosity uint32 9 | 10 | const ( 11 | PanicLevel Verbosity = iota 12 | FatalLevel 13 | ErrorLevel 14 | WarnLevel 15 | InfoLevel 16 | DebugLevel 17 | TraceLevel 18 | ) 19 | -------------------------------------------------------------------------------- /pkg/output/progress.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/schollz/progressbar/v3" 9 | ) 10 | 11 | type ProgressOptions struct { 12 | bar *progressbar.ProgressBar 13 | } 14 | 15 | const ( 16 | progressBarThrottleMilliseconds = 65 17 | progressBarWidth = 10 18 | progressBarSpinnerType = 14 19 | ) 20 | 21 | func NewProgressOptions(total int, message ...string) *ProgressOptions { 22 | desc := "" 23 | if len(message) > 0 { 24 | desc = message[0] 25 | } 26 | 27 | bar := progressbar.NewOptions64( 28 | int64(total), 29 | progressbar.OptionSetDescription(desc), 30 | progressbar.OptionSetWriter(os.Stderr), 31 | progressbar.OptionSetWidth(progressBarWidth), 32 | progressbar.OptionThrottle(progressBarThrottleMilliseconds*time.Millisecond), 33 | progressbar.OptionShowCount(), 34 | progressbar.OptionShowIts(), 35 | progressbar.OptionOnCompletion(func() { 36 | fmt.Fprint(os.Stderr, "\n") 37 | }), 38 | progressbar.OptionSpinnerType(progressBarSpinnerType), 39 | progressbar.OptionFullWidth(), 40 | progressbar.OptionSetRenderBlankState(false), 41 | ) 42 | 43 | return &ProgressOptions{ 44 | bar: bar, 45 | } 46 | } 47 | 48 | func (b *ProgressOptions) Progress(n int) { 49 | if b.bar != nil { 50 | //nolint:errcheck 51 | b.bar.Add(n) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /pkg/output/progress_test.go: -------------------------------------------------------------------------------- 1 | package output_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | "github.com/maxgio92/krawler/pkg/output" 9 | ) 10 | 11 | func TestNewProgressOptions(t *testing.T) { 12 | t.Parallel() 13 | 14 | po := output.NewProgressOptions(100) 15 | assert.NotNil(t, po) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/packages/alpm/alpm.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | package alpm 4 | 5 | import ( 6 | "archive/tar" 7 | "bytes" 8 | "compress/gzip" 9 | "fmt" 10 | "io" 11 | "net/http" 12 | "os" 13 | "path" 14 | "strconv" 15 | 16 | "github.com/Jguer/go-alpm/v2" 17 | "github.com/pkg/errors" 18 | "github.com/spf13/afero" 19 | 20 | "github.com/maxgio92/krawler/pkg/packages" 21 | ) 22 | 23 | type Package struct { 24 | Name string 25 | Version string 26 | Release string 27 | Architecture string 28 | Location string 29 | url string 30 | fileReaders []io.Reader 31 | Files []string 32 | } 33 | 34 | func (p *Package) GetName() string { return p.Name } 35 | func (p *Package) GetVersion() string { return p.Version } 36 | func (p *Package) GetRelease() string { return p.Release } 37 | func (p *Package) GetArch() string { return p.Architecture } 38 | func (p *Package) GetLocation() string { return p.Location } 39 | func (p *Package) URL() string { return p.url } 40 | func (p *Package) FileReaders() []io.Reader { return p.fileReaders } 41 | 42 | const ( 43 | root = "/" 44 | ALPMDBVersionFile = "ALPM_DB_VERSION" 45 | ALPMDBVersion = 9 46 | ) 47 | 48 | func SearchPackages(so *SearchOptions) ([]packages.Package, error) { 49 | var result []packages.Package 50 | 51 | search := func(dbURL string) { 52 | searchPackagesFromDB( 53 | func() { 54 | so.Progress(1) 55 | so.SigProducerCompletion() 56 | }, 57 | so, dbURL) 58 | } 59 | 60 | collect := func() { 61 | so.Consume( 62 | func(p ...packages.Package) { 63 | so.Log().Info("scanned db") 64 | if len(p) > 0 { 65 | result = append(result, p...) 66 | so.Log().Infof("new %d packages found", len(p)) 67 | } 68 | }, 69 | func(e error) { 70 | so.Log().Error(e) 71 | }, 72 | ) 73 | } 74 | 75 | // Run search producers. 76 | for _, v := range so.SeedURLs() { 77 | dbURL := v 78 | go search(dbURL) 79 | } 80 | 81 | // Run collect consumer. 82 | go collect() 83 | 84 | // Wait for producers and consumers to complete and cleanup. 85 | so.WaitAndClose() 86 | 87 | return result, nil 88 | } 89 | 90 | func searchPackagesFromDB(doneFunc func(), so *SearchOptions, dbURL string) { 91 | defer doneFunc() 92 | 93 | p, err := doSearchPackagesFromDB(dbURL, so.PackageNames()) 94 | if err != nil { 95 | so.SendError(errors.Wrap(err, "searching packages from db")) 96 | } 97 | so.SendMessage(p...) 98 | } 99 | 100 | // doSearchPackagesFromDB looks for the package of which the specified package names, parsing the remote 101 | // repository DB, and returns a slice of packages.Package. 102 | // It possibly returns an error. 103 | func doSearchPackagesFromDB(dbURL string, packageNames []string) ([]packages.Package, error) { 104 | fs := afero.NewOsFs() 105 | 106 | tmpdir, err := afero.TempDir(fs, os.TempDir(), "krawler") 107 | if err != nil { 108 | return nil, errors.Wrap(err, "error creating local DB temporeary directory") 109 | } 110 | 111 | req, err := http.NewRequest(http.MethodGet, dbURL, nil) 112 | if err != nil { 113 | return nil, errors.Wrap(err, "error creating HTTP request") 114 | } 115 | res, err := http.DefaultClient.Do(req) 116 | if err != nil { 117 | return nil, errors.Wrap(err, "error doing HTTP request") 118 | } 119 | defer res.Body.Close() 120 | 121 | if res.StatusCode != http.StatusOK { 122 | return nil, nil 123 | } 124 | 125 | gzr, err := gzip.NewReader(res.Body) 126 | if err != nil { 127 | return nil, errors.Wrap(err, "error reading gzip response") 128 | } 129 | defer gzr.Close() 130 | 131 | trr := tar.NewReader(gzr) 132 | local := path.Join(tmpdir, "local") 133 | err = untar(trr, fs, local) 134 | if err != nil { 135 | if errors.Is(err, ErrDirEmpty) { 136 | return []packages.Package{}, nil 137 | } 138 | return nil, err 139 | } 140 | 141 | if err = createALPMDBVersionFile(path.Join(local, ALPMDBVersionFile), strconv.Itoa(ALPMDBVersion)); err != nil { 142 | return nil, errors.Wrap(err, "error creating ALPM DB version file") 143 | } 144 | 145 | h, err := alpm.Initialize(root, tmpdir) 146 | if err != nil { 147 | return nil, errors.Wrap(err, "error initializing Arch Linux Package Manager handler") 148 | } 149 | defer h.Release() 150 | 151 | localDb, err := h.LocalDB() 152 | if err != nil { 153 | return nil, err 154 | } 155 | 156 | var packageList []alpm.IPackage 157 | for _, v := range packageNames { 158 | list := localDb.Search([]string{v}).Slice() 159 | packageList = append(packageList, list...) 160 | } 161 | os.Remove(tmpdir) 162 | 163 | ps := []packages.Package{} 164 | for _, p := range packageList { 165 | ps = append(ps, &Package{ 166 | Name: p.Name(), 167 | Version: p.Version(), 168 | Release: "", 169 | Architecture: p.Architecture(), 170 | Location: p.FileName(), 171 | url: p.URL(), 172 | fileReaders: nil, 173 | }) 174 | } 175 | 176 | return ps, nil 177 | } 178 | 179 | func untar(source *tar.Reader, fs afero.Fs, target string) error { 180 | err := fs.MkdirAll(target, 0755) 181 | if err != nil { 182 | return errors.Wrap(err, "creating the target directory") 183 | } 184 | 185 | for { 186 | header, err := source.Next() 187 | 188 | if err == io.EOF { 189 | break 190 | } 191 | 192 | if err != nil { 193 | return err 194 | } 195 | 196 | if header == nil { 197 | continue 198 | } 199 | 200 | target := path.Join(target, header.Name) 201 | 202 | switch header.Typeflag { 203 | 204 | case tar.TypeDir: 205 | if _, err := os.Stat(target); err != nil { 206 | if err := os.MkdirAll(target, 0755); err != nil { 207 | return errors.Wrap(err, "error on untar creating directory") 208 | } 209 | } 210 | 211 | case tar.TypeReg: 212 | f, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) 213 | if err != nil { 214 | return errors.Wrap(err, "error on untar creating file") 215 | } 216 | if _, err := io.Copy(f, source); err != nil { 217 | return errors.Wrap(err, "error on untar copying file content") 218 | } 219 | if err = f.Close(); err != nil { 220 | return errors.Wrap(err, "error on untar closing file") 221 | } 222 | 223 | default: 224 | return fmt.Errorf("error on untar: file %s type not supported", header.Name) 225 | } 226 | } 227 | 228 | d, err := fs.Open(target) 229 | if _, err = d.Readdirnames(1); err != nil { 230 | if err == io.EOF { 231 | return ErrDirEmpty 232 | } 233 | } 234 | defer d.Close() 235 | 236 | return nil 237 | } 238 | 239 | func createALPMDBVersionFile(filename, version string) error { 240 | f, err := os.Create(filename) 241 | if err != nil { 242 | return err 243 | } 244 | if _, err = io.Copy(f, bytes.NewBuffer([]byte(version))); err != nil { 245 | return err 246 | } 247 | 248 | return nil 249 | } 250 | -------------------------------------------------------------------------------- /pkg/packages/alpm/errors.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | package alpm 4 | 5 | import ( 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | var ( 10 | ErrDirEmpty = errors.New("directory is empty") 11 | ) 12 | -------------------------------------------------------------------------------- /pkg/packages/alpm/search.go: -------------------------------------------------------------------------------- 1 | //go:build archlinux 2 | 3 | package alpm 4 | 5 | import ( 6 | "github.com/maxgio92/krawler/pkg/packages" 7 | ) 8 | 9 | type SearchOptions struct { 10 | *packages.SearchOptions 11 | packageNames []string 12 | } 13 | 14 | // NewSearchOptions returns a pointer to a SearchOptions object from a pointer to a packages.SearchOptions, and 15 | // overriding architectures and seedURLs. 16 | func NewSearchOptions(options *packages.SearchOptions, seedURLs []string, packageNames []string) *SearchOptions { 17 | return &SearchOptions{ 18 | packages.NewSearchOptions( 19 | options.PackageName(), 20 | nil, 21 | seedURLs, 22 | options.Verbosity(), 23 | options.ProgressMessage(), 24 | options.PackageFileNames()..., 25 | ), 26 | packageNames, 27 | } 28 | } 29 | 30 | func (o *SearchOptions) PackageNames() []string { 31 | return o.packageNames 32 | } 33 | -------------------------------------------------------------------------------- /pkg/packages/deb/constants.go: -------------------------------------------------------------------------------- 1 | package deb 2 | 3 | const ( 4 | InRelease = "InRelease" 5 | PackagesIndexFormat = ".xz" 6 | ) 7 | -------------------------------------------------------------------------------- /pkg/packages/deb/package.go: -------------------------------------------------------------------------------- 1 | package deb 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | type Package struct { 8 | Name string 9 | Arch string 10 | Version string 11 | Release string 12 | Location string 13 | //nolint:stylecheck,revive 14 | Url string 15 | fileReaders []io.Reader 16 | } 17 | 18 | type PackageLocation struct { 19 | Href string 20 | } 21 | 22 | func (p *Package) GetName() string { 23 | return p.Name 24 | } 25 | 26 | func (p *Package) GetVersion() string { 27 | return p.Version 28 | } 29 | 30 | func (p *Package) GetRelease() string { 31 | return p.Release 32 | } 33 | 34 | func (p *Package) GetArch() string { 35 | return p.Arch 36 | } 37 | 38 | func (p *Package) GetLocation() string { 39 | return p.Location 40 | } 41 | 42 | func (p *Package) URL() string { 43 | return p.Url 44 | } 45 | 46 | func (p *Package) FileReaders() []io.Reader { 47 | return p.fileReaders 48 | } 49 | -------------------------------------------------------------------------------- /pkg/packages/deb/search.go: -------------------------------------------------------------------------------- 1 | package deb 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/packages" 5 | ) 6 | 7 | type SearchOptions struct { 8 | // Deb components are needed to filter indexed packages. 9 | // More on this here: https://wiki.debian.org/DebianRepository. 10 | components []string 11 | *packages.SearchOptions 12 | } 13 | 14 | // NewSearchOptions returns a pointer to a SearchOptions object from a pointer to a packages.SearchOptions, and 15 | // overriding architectures and seedURLs. 16 | func NewSearchOptions(options *packages.SearchOptions, architectures []packages.Architecture, seedURLs []string, components []string) *SearchOptions { 17 | return &SearchOptions{ 18 | components, 19 | packages.NewSearchOptions( 20 | options.PackageName(), 21 | architectures, 22 | seedURLs, 23 | options.Verbosity(), 24 | options.ProgressMessage(), 25 | options.PackageFileNames()..., 26 | ), 27 | } 28 | } 29 | 30 | func (s *SearchOptions) Components() []string { 31 | return s.components 32 | } 33 | -------------------------------------------------------------------------------- /pkg/packages/package.go: -------------------------------------------------------------------------------- 1 | package packages 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | type Package interface { 8 | GetName() string 9 | GetVersion() string 10 | GetRelease() string 11 | GetArch() string 12 | GetLocation() string 13 | URL() string 14 | FileReaders() []io.Reader 15 | } 16 | 17 | type Architecture string 18 | -------------------------------------------------------------------------------- /pkg/packages/repository.go: -------------------------------------------------------------------------------- 1 | package packages 2 | 3 | type Repository struct { 4 | Name string 5 | URI URITemplate 6 | } 7 | 8 | type URITemplate string 9 | 10 | type Mirror struct { 11 | Name string 12 | // The base URL of the package mirror 13 | // (e.g. https://mirrors.kernel.org/) 14 | URL string 15 | } 16 | -------------------------------------------------------------------------------- /pkg/packages/rpm/constants.go: -------------------------------------------------------------------------------- 1 | package rpm 2 | 3 | import log "github.com/sirupsen/logrus" 4 | 5 | const ( 6 | metadataPath = "repodata/repomd.xml" 7 | metadataDataXPath = "//repomd/data" 8 | dataPackageXPath = "//package" 9 | primary = "primary" 10 | ) 11 | 12 | var logger = log.New() 13 | -------------------------------------------------------------------------------- /pkg/packages/rpm/database.go: -------------------------------------------------------------------------------- 1 | package rpm 2 | 3 | type Data struct { 4 | Type string `xml:"type,attr"` 5 | Location Location `xml:"location"` 6 | } 7 | 8 | type Location struct { 9 | Href string `xml:"href,attr"` 10 | } 11 | 12 | func (d *Data) GetLocation() string { 13 | return d.Location.Href 14 | } 15 | -------------------------------------------------------------------------------- /pkg/packages/rpm/errors.go: -------------------------------------------------------------------------------- 1 | package rpm 2 | 3 | import "errors" 4 | 5 | var ( 6 | errMetadataURLNotValid = errors.New("metadata url is not valid") 7 | errMetadataInvalidResponse = errors.New("metadata url returned an invalid response") 8 | errRepositoryURLNotValid = errors.New("repository url is not valid") 9 | errRepositoryInvalidResponse = errors.New("repository url returned an invalid response") 10 | errPackageURLNotFound = errors.New("package url not found") 11 | errPackageURLInvalidResponse = errors.New("package url returned an invalid response") 12 | ) 13 | -------------------------------------------------------------------------------- /pkg/packages/rpm/package.go: -------------------------------------------------------------------------------- 1 | package rpm 2 | 3 | import ( 4 | "encoding/xml" 5 | "io" 6 | ) 7 | 8 | type Package struct { 9 | XMLName xml.Name `xml:"package"` 10 | Name string `xml:"name"` 11 | Arch string `xml:"arch"` 12 | Version PackageVersion `xml:"version"` 13 | Summary string `xml:"summary"` 14 | Description string `xml:"description"` 15 | Packager string `xml:"packager"` 16 | Time PackageTime `xml:"time"` 17 | Size PackageSize `xml:"size"` 18 | Location PackageLocation `xml:"location"` 19 | Format PackageFormat `xml:"format"` 20 | url string 21 | fileReaders []io.Reader 22 | } 23 | 24 | func (p *Package) GetName() string { 25 | return p.Name 26 | } 27 | 28 | func (p *Package) GetVersion() string { 29 | return p.Version.Ver 30 | } 31 | 32 | func (p *Package) GetRelease() string { 33 | return p.Version.Rel 34 | } 35 | 36 | func (p *Package) GetArch() string { 37 | return p.Arch 38 | } 39 | 40 | func (p *Package) GetLocation() string { 41 | return p.Location.Href 42 | } 43 | 44 | func (p *Package) URL() string { 45 | return p.url 46 | } 47 | 48 | func (p *Package) FileReaders() []io.Reader { 49 | return p.fileReaders 50 | } 51 | -------------------------------------------------------------------------------- /pkg/packages/rpm/search.go: -------------------------------------------------------------------------------- 1 | package rpm 2 | 3 | import ( 4 | "github.com/maxgio92/krawler/pkg/packages" 5 | ) 6 | 7 | type SearchOptions struct { 8 | *packages.SearchOptions 9 | } 10 | 11 | // NewSearchOptions returns a pointer to a SearchOptions object from a pointer to a packages.SearchOptions, and 12 | // overriding architectures and seedURLs. 13 | func NewSearchOptions(options *packages.SearchOptions, architectures []packages.Architecture, seedURLs []string) *SearchOptions { 14 | return &SearchOptions{ 15 | packages.NewSearchOptions( 16 | options.PackageName(), 17 | architectures, 18 | seedURLs, 19 | options.Verbosity(), 20 | options.ProgressMessage(), 21 | options.PackageFileNames()..., 22 | ), 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /pkg/packages/rpm/types.go: -------------------------------------------------------------------------------- 1 | package rpm 2 | 3 | import ( 4 | "encoding/xml" 5 | ) 6 | 7 | type RepositoryMetadata struct { 8 | XMLName xml.Name `xml:"repomd"` 9 | Revision string `xml:"revision"` 10 | Data []Data `xml:"data"` 11 | } 12 | 13 | type PrimaryRepositoryMetadata struct { 14 | XMLName xml.Name `xml:"metadata"` 15 | Packages []Package `xml:"package"` 16 | } 17 | 18 | type PackageVersion struct { 19 | XMLName xml.Name `xml:"version"` 20 | Epoch string `xml:"epoch,attr"` 21 | Ver string `xml:"ver,attr"` 22 | Rel string `xml:"rel,attr"` 23 | } 24 | 25 | type PackageTime struct { 26 | File string `xml:"file,attr"` 27 | Build string `xml:"build,attr"` 28 | } 29 | 30 | type PackageSize struct { 31 | Package string `xml:"package,attr"` 32 | Installed string `xml:"installed,attr"` 33 | Archive string `xml:"archive,attr"` 34 | } 35 | 36 | type PackageLocation struct { 37 | XMLName xml.Name `xml:"location"` 38 | Href string `xml:"href,attr"` 39 | } 40 | 41 | type PackageFormat struct { 42 | XMLName xml.Name `xml:"format"` 43 | License string `xml:"license"` 44 | Vendor string `xml:"vendor"` 45 | Group string `xml:"group"` 46 | Buildhost string `xml:"buildhost"` 47 | HeaderRange PackageHeaderRange `xml:"header-range"` 48 | Requires PackageRequires `xml:"requires"` 49 | Provides PackageProvides `xml:"provides"` 50 | } 51 | 52 | type PackageHeaderRange struct { 53 | Start string `xml:"start,attr"` 54 | End string `xml:"end,attr"` 55 | } 56 | 57 | type PackageProvides struct { 58 | XMLName xml.Name `xml:"provides"` 59 | Entries []Entry `xml:"entry"` 60 | } 61 | 62 | type PackageRequires struct { 63 | XMLName xml.Name `xml:"requires"` 64 | Entries []Entry `xml:"entry"` 65 | } 66 | 67 | type Entry struct { 68 | XMLName xml.Name `xml:"entry"` 69 | Name string `xml:"name,attr"` 70 | } 71 | -------------------------------------------------------------------------------- /pkg/packages/search.go: -------------------------------------------------------------------------------- 1 | package packages 2 | 3 | import ( 4 | log "github.com/sirupsen/logrus" 5 | 6 | "github.com/maxgio92/krawler/pkg/output" 7 | ) 8 | 9 | type SearchOptions struct { 10 | packageName string 11 | architectures []Architecture 12 | packageFileNames []string 13 | seedURLs []string 14 | *output.ProgressOptions 15 | progressMessage string 16 | *MPSCQueue 17 | verbosity output.Verbosity 18 | logger *output.Logger 19 | } 20 | 21 | func NewSearchOptions(packageName string, architectures []Architecture, seedURLs []string, verbosity output.Verbosity, progressMessage string, packageFileNames ...string) *SearchOptions { 22 | logger := output.NewLogger() 23 | logger.SetLevel(log.Level(verbosity)) 24 | logger.SetFormatter(&log.TextFormatter{ 25 | ForceColors: true, 26 | DisableTimestamp: true, 27 | }) 28 | 29 | progressOptions := output.NewProgressOptions(len(seedURLs), progressMessage) 30 | 31 | queue := NewMPSCQueue(len(seedURLs)) 32 | 33 | return &SearchOptions{ 34 | packageName: packageName, 35 | architectures: architectures, 36 | packageFileNames: packageFileNames, 37 | seedURLs: seedURLs, 38 | ProgressOptions: progressOptions, 39 | progressMessage: progressMessage, 40 | MPSCQueue: queue, 41 | verbosity: verbosity, 42 | logger: logger, 43 | } 44 | } 45 | 46 | func (o *SearchOptions) PackageName() string { 47 | return o.packageName 48 | } 49 | 50 | func (o *SearchOptions) PackageFileNames() []string { 51 | return o.packageFileNames 52 | } 53 | 54 | func (o *SearchOptions) SeedURLs() []string { 55 | return o.seedURLs 56 | } 57 | 58 | func (o *SearchOptions) Log() *output.Logger { 59 | return o.logger 60 | } 61 | 62 | func (o *SearchOptions) Verbosity() output.Verbosity { 63 | return o.verbosity 64 | } 65 | 66 | func (o *SearchOptions) Architectures() []Architecture { 67 | return o.architectures 68 | } 69 | 70 | func (o *SearchOptions) ProgressMessage() string { 71 | return o.progressMessage 72 | } 73 | -------------------------------------------------------------------------------- /pkg/packages/sync.go: -------------------------------------------------------------------------------- 1 | package packages 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | // MPSCQueue provides an option set to manage a sync group of multiple producer workers and 8 | // single consumer, leveraging Go sync.WaitGroup and channels to notify errors, results and completion 9 | // of consuming the results from the single consumer worker. 10 | type MPSCQueue struct { 11 | producersWG *sync.WaitGroup 12 | consumerDoneCh chan bool 13 | msgCh chan []Package 14 | errCh chan error 15 | } 16 | 17 | func NewMPSCQueue(parallelism int) *MPSCQueue { 18 | wg := &sync.WaitGroup{} 19 | wg.Add(parallelism) 20 | 21 | msgCh := make(chan []Package) 22 | 23 | errCh := make(chan error) 24 | 25 | doneCh := make(chan bool, 1) 26 | 27 | return &MPSCQueue{ 28 | producersWG: wg, 29 | consumerDoneCh: doneCh, 30 | msgCh: msgCh, 31 | errCh: errCh, 32 | } 33 | } 34 | 35 | // SendMessage sends a message as variadic parameter msg of type packages.Package to the messages queue. 36 | func (q *MPSCQueue) SendMessage(msg ...Package) { 37 | q.msgCh <- msg 38 | } 39 | 40 | // SendMessageAndComplete sends a message as variadic parameter msg of type archive.Package to the messages queue, 41 | // and eventually signals the completion of the current producer. 42 | func (q *MPSCQueue) SendMessageAndComplete(msg ...Package) { 43 | defer q.SigProducerCompletion() 44 | q.msgCh <- msg 45 | } 46 | 47 | // SendError sends an error message of type error to the errors queue. 48 | func (q *MPSCQueue) SendError(err error) { 49 | q.errCh <- err 50 | } 51 | 52 | // Consume listens for both messages and errors on queues and do something with them, 53 | // as specified by msgHandler and errHandler functions. 54 | func (q *MPSCQueue) Consume(msgHandler func(msg ...Package), errHandler func(err error)) { 55 | for q.errCh != nil || q.msgCh != nil { 56 | select { 57 | case p, ok := <-q.msgCh: 58 | // If the channel is still open. 59 | if ok { 60 | // Do something with the message. 61 | msgHandler(p...) 62 | 63 | continue 64 | } 65 | 66 | q.msgCh = nil 67 | case e, ok := <-q.errCh: 68 | // If the channel is still open. 69 | if ok { 70 | // Do something with error. 71 | errHandler(e) 72 | 73 | continue 74 | } 75 | 76 | q.errCh = nil 77 | } 78 | } 79 | q.SigConsumerCompletion() 80 | } 81 | 82 | // SigProducerCompletion signals that a producer completed its work. 83 | func (q *MPSCQueue) SigProducerCompletion() { 84 | q.producersWG.Done() 85 | } 86 | 87 | // SigConsumerCompletion signals that the consumer completed its work. 88 | func (q *MPSCQueue) SigConsumerCompletion() { 89 | q.consumerDoneCh <- true 90 | } 91 | 92 | func (q *MPSCQueue) WaitAndClose() { 93 | // Wait for producersWG to complete. 94 | q.producersWG.Wait() 95 | close(q.msgCh) 96 | close(q.errCh) 97 | 98 | // Wait for consumers to complete. 99 | <-q.consumerDoneCh 100 | } 101 | 102 | func (q *MPSCQueue) ProducersWG() *sync.WaitGroup { 103 | return q.producersWG 104 | } 105 | 106 | func (q *MPSCQueue) MessageCh() chan []Package { 107 | return q.msgCh 108 | } 109 | 110 | func (q *MPSCQueue) ErrorCh() chan error { 111 | return q.errCh 112 | } 113 | 114 | func (q *MPSCQueue) ConsumerDoneCh() chan bool { 115 | return q.consumerDoneCh 116 | } 117 | -------------------------------------------------------------------------------- /pkg/scrape/constants.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | const ( 4 | folderRegex = `.+\/$` 5 | ) 6 | -------------------------------------------------------------------------------- /pkg/scrape/utils.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import "net/url" 4 | 5 | func getHostnamesFromURLs(urls []*url.URL) []string { 6 | hostnames := []string{} 7 | 8 | for _, v := range urls { 9 | hostnames = append(hostnames, v.Host) 10 | } 11 | 12 | return hostnames 13 | } 14 | 15 | func urlSliceContains(us []*url.URL, u *url.URL) bool { 16 | for _, v := range us { 17 | if v == u { 18 | return true 19 | } 20 | } 21 | 22 | return false 23 | } 24 | -------------------------------------------------------------------------------- /pkg/utils/matrix/README.md: -------------------------------------------------------------------------------- 1 | # Quickstart 2 | 3 | Below an example `main.go`: 4 | 5 | ``` 6 | package main 7 | 8 | import ( 9 | "fmt" 10 | "github.com/maxgio92/krawler/pkg/matrix" 11 | ) 12 | 13 | var ( 14 | columns = []matrix.Column{ 15 | {OrdinateIndex: 0, Points: []string{"A", "B"}}, // part 16 | {OrdinateIndex: 0, Points: []string{"1", "2", "3", "4", "5"}}, // part 17 | {OrdinateIndex: 0, Points: []string{"w", "x", "y", "z"}}, // part 18 | {OrdinateIndex: 0, Points: []string{"E", "F", "G", "H"}}, // part 19 | {OrdinateIndex: 0, Points: []string{"A", "B"}}, // part 20 | } 21 | ) 22 | 23 | func main() { 24 | for _, v := range columns { 25 | fmt.Println(v.Points) 26 | } 27 | combinations, err := matrix.GetColumnOrderedCombinationRows(columns) 28 | if err != nil { 29 | panic(err) 30 | } 31 | fmt.Println(combinations) 32 | } 33 | ``` -------------------------------------------------------------------------------- /pkg/utils/matrix/error.go: -------------------------------------------------------------------------------- 1 | package matrix 2 | 3 | import "fmt" 4 | 5 | var ( 6 | errSupportedColumnTypes = []string{"[]string"} 7 | errUnsopportedPointTypeMessage = fmt.Sprintf("type of the matrix column is not supported: supported types are %s", errSupportedColumnTypes) 8 | ) 9 | 10 | type ErrUnsopportedPointType struct { 11 | message string 12 | } 13 | 14 | func NewErrUnsopportedPointType() *ErrUnsopportedPointType { 15 | return &ErrUnsopportedPointType{ 16 | message: errUnsopportedPointTypeMessage, 17 | } 18 | } 19 | 20 | func (e *ErrUnsopportedPointType) Error() string { 21 | return e.message 22 | } 23 | -------------------------------------------------------------------------------- /pkg/utils/matrix/matrix.go: -------------------------------------------------------------------------------- 1 | package matrix 2 | 3 | // The combinations are built concatenating one element per column into a row, 4 | // and traversing all the columns' elements by shifting them from the last to the first columns 5 | // (decremental abscissa order). 6 | // 7 | // (ordinate) 8 | // y 9 | // ^ 10 | // | 4 11 | // | 3 Z 12 | // |B 2 Y 13 | // |A 1 X 14 | // -----------> x (abscissa) 15 | // 16 | // Provided the sample scenario above, this function 17 | // should combine the elements in the order below: 18 | // 19 | // A + 1 + X 20 | // A + 1 + Y 21 | // A + 1 + Z 22 | // A + 2 + X 23 | // ... 24 | // B + 4 + Z 25 | // 26 | //nolint:godot 27 | func GetColumnOrderedCombinationRows(columns []Column) ([]string, error) { 28 | rows := []string{} 29 | row := "" 30 | completed := false 31 | 32 | // For each time the last column has been reached 33 | // exit from recursion until reaching this: 34 | for { 35 | row = "" 36 | 37 | // Start always from the first column (x=0) 38 | err := gotoNextColumn(&rows, &row, 0, &columns[0], columns, &completed) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | ssp, ok := columns[0].Points.([]string) 44 | if !ok { 45 | return nil, NewErrUnsopportedPointType() 46 | } 47 | 48 | if columns[0].CurrentOrdinateIndex == len(ssp) || completed { 49 | break 50 | } 51 | } 52 | 53 | return rows, nil 54 | } 55 | 56 | func gotoNextColumn(points *[]string, row *string, abscissaIndex int, column *Column, columns []Column, completed *bool) error { 57 | currentColumnPoints, ok := column.Points.([]string) 58 | if !ok { 59 | return NewErrUnsopportedPointType() 60 | } 61 | 62 | if abscissaIndex+1 < len(columns) { // Until the last column is reached 63 | 64 | *row += currentColumnPoints[column.CurrentOrdinateIndex] 65 | 66 | // Move forward 67 | abscissaIndex++ 68 | column = &columns[abscissaIndex] 69 | err := gotoNextColumn(points, row, abscissaIndex, column, columns, completed) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | } else { // When the last column is reached 75 | 76 | for _, point := range currentColumnPoints { 77 | *points = append(*points, string(*row+point)) 78 | } 79 | 80 | // Move backward 81 | if abscissaIndex > 0 { 82 | abscissaIndex-- 83 | } 84 | column = &columns[abscissaIndex] 85 | 86 | // Store where we gone 87 | err := scrollDownPrevColumnPoint(column, columns, abscissaIndex, completed) 88 | if err != nil { 89 | return err 90 | } 91 | } 92 | return nil 93 | } 94 | 95 | func scrollDownPrevColumnPoint(column *Column, columns []Column, abscissaIndex int, completed *bool) error { 96 | currentColumnPoints, ok := column.Points.([]string) 97 | if !ok { 98 | return NewErrUnsopportedPointType() 99 | } 100 | 101 | // If the current column has still elements/points 102 | // and there are more than one column. 103 | if column.CurrentOrdinateIndex+1 < len(currentColumnPoints) && len(columns) > 1 { 104 | column.CurrentOrdinateIndex++ 105 | 106 | // If the current column has been completely processed. 107 | } else { 108 | column.CurrentOrdinateIndex = 0 109 | abscissaIndex-- 110 | 111 | // If it's not the first column. 112 | if abscissaIndex >= 0 { 113 | err := scrollDownPrevColumnPoint(&columns[abscissaIndex], columns, abscissaIndex, completed) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | // If it's the first column. 119 | } else { 120 | *completed = true 121 | } 122 | } 123 | return nil 124 | } 125 | -------------------------------------------------------------------------------- /pkg/utils/matrix/matrix_test.go: -------------------------------------------------------------------------------- 1 | package matrix_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/maxgio92/krawler/pkg/utils/matrix" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestGetColumnOrderedCombinationRows(t *testing.T) { 12 | t.Parallel() 13 | 14 | tests := map[string]struct { 15 | given []matrix.Column 16 | 17 | // The number of combinations. 18 | want int 19 | }{ 20 | "single column should return a number of combinations which is equal to the number of the element of the column": { 21 | given: []matrix.Column{ 22 | {0, 23 | []string{ 24 | "a/", 25 | "b/", 26 | "c/", 27 | }, 28 | }, 29 | }, 30 | want: 3, 31 | }, 32 | "two columns should return a number of combinations equal to the multiplication between the numer of the elements in each column": { 33 | given: []matrix.Column{ 34 | {0, 35 | []string{ 36 | "a/", 37 | "b/", 38 | "c/", 39 | }, 40 | }, 41 | {0, 42 | []string{ 43 | "1/", 44 | "2/", 45 | "3/", 46 | }, 47 | }, 48 | }, 49 | want: 9, 50 | }, 51 | "three columns should return a number of combinations equal to the multiplication between the numer of the elements in each column": { 52 | given: []matrix.Column{ 53 | {0, 54 | []string{ 55 | "a/", 56 | "b/", 57 | "c/", 58 | }, 59 | }, 60 | {0, 61 | []string{ 62 | "1/", 63 | "2/", 64 | "3/", 65 | }, 66 | }, 67 | {0, 68 | []string{ 69 | "x/", 70 | "y/", 71 | "z/", 72 | }, 73 | }, 74 | }, 75 | want: 27, 76 | }, 77 | } 78 | 79 | for _, v := range tests { 80 | combinations, err := matrix.GetColumnOrderedCombinationRows(v.given) 81 | assert.ErrorIs(t, err, nil) 82 | assert.Len(t, combinations, v.want) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /pkg/utils/matrix/types.go: -------------------------------------------------------------------------------- 1 | package matrix 2 | 3 | type Points interface{} 4 | 5 | type Column struct { 6 | CurrentOrdinateIndex int 7 | Points Points 8 | } 9 | -------------------------------------------------------------------------------- /pkg/utils/template/constants.go: -------------------------------------------------------------------------------- 1 | package template 2 | 3 | const ( 4 | cursor = `.` 5 | openDelimiter = `{{` 6 | closeDelimiter = `}}` 7 | variableNameRegex = `[a-zA-Z0-9_]+` 8 | ) 9 | -------------------------------------------------------------------------------- /pkg/utils/template/template.go: -------------------------------------------------------------------------------- 1 | package template 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | 9 | t "html/template" 10 | 11 | "github.com/maxgio92/krawler/pkg/utils/matrix" 12 | ) 13 | 14 | // Returns a list of strings of executed templates from a template string 15 | // input, by applying an arbitrary variables inventory with multiple values. 16 | // The expecetd arguments are: 17 | 18 | // - templateString: the input string of the template to execute. 19 | // - inventory: the inventory map of the variable data structure to apply the 20 | // template to. The key of the map is the name of the variable, that should 21 | // match related annotation in the template. Each map item is a slice where in each slice item is 22 | // a single variable value. 23 | // 24 | // The result is multiple templates from a single template string and multiple 25 | // arbitrary variable values. 26 | // func MultiplexAndExecute(templateString string, inventory map[string][]interface{}) ([]string, error) { 27 | func MultiplexAndExecute(templateString string, input map[string]interface{}) ([]string, error) { 28 | supportedVariables, err := GetSupportedVariables(templateString) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | // If the template string does not contain variables 34 | // return the template string directly. 35 | if len(supportedVariables) == 0 { 36 | return []string{templateString}, nil 37 | } 38 | 39 | // Populate the inventory. 40 | inventory := make(map[string][]interface{}) 41 | for _, key := range supportedVariables { 42 | i := input[key] 43 | is, ok := i.([]interface{}) 44 | if !ok { 45 | return nil, fmt.Errorf("variable '%s' in template is not a valid slice", key) 46 | } 47 | 48 | for _, v := range is { 49 | if _, ok := v.(string); !ok { 50 | return nil, fmt.Errorf("variable '%s' in template is not a valid slice of strings", key) 51 | } 52 | if v != "" { 53 | inventory[key] = append(inventory[key], v) 54 | } 55 | } 56 | } 57 | 58 | templateRegex, err := generateTemplateRegex(supportedVariables) 59 | if err != nil { 60 | return nil, err 61 | } 62 | templatePattern := regexp.MustCompile(templateRegex) 63 | 64 | ss, err := cutTemplateString(templateString, closeDelimiter) 65 | if err != nil { 66 | return nil, err 67 | } 68 | 69 | templateParts := []TemplatePart{} 70 | 71 | for _, s := range ss { 72 | 73 | // match are the template parts matched against the template regex. 74 | templatePartMatches := templatePattern.FindStringSubmatch(s) 75 | 76 | // name is the variable data structure to apply the template part to. 77 | for i, variableName := range templatePattern.SubexpNames() { 78 | 79 | // discard first variable name match and ensure a template part matched. 80 | if i > 0 && i <= len(templatePartMatches) && templatePartMatches[i] != "" { 81 | y := len(templateParts) 82 | 83 | templateParts = append(templateParts, TemplatePart{ 84 | TemplateString: templatePartMatches[i], 85 | MatchedVariable: variableName, 86 | }) 87 | 88 | templateParts[y].Points = []string{} 89 | templateParts[y].TemplateString = strings.ReplaceAll( 90 | templateParts[y].TemplateString, 91 | openDelimiter+` `+cursor+variableName+` `+closeDelimiter, 92 | openDelimiter+` `+cursor+` `+closeDelimiter, 93 | ) 94 | templateParts[y].Template = t.New(fmt.Sprintf("%d", y)) 95 | templateParts[y].Template, err = templateParts[y].Template.Parse(templateParts[y].TemplateString) 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | // for each item (variable name) of MatchedVariable 101 | // compose one Template and `execute()` it 102 | for _, value := range inventory[variableName] { 103 | o := new(bytes.Buffer) 104 | err = templateParts[y].Template.Execute(o, value) 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | templateParts[y].Points = append(templateParts[y].Points.([]string), o.String()) 110 | } 111 | } 112 | } 113 | } 114 | 115 | matrixColumns := []matrix.Column{} 116 | 117 | for _, part := range templateParts { 118 | matrixColumns = append(matrixColumns, part.Column) 119 | } 120 | 121 | if len(matrixColumns) <= 0 { 122 | return nil, fmt.Errorf("cannot multiplex template: the template contains syntax errors") 123 | } 124 | 125 | result, err := matrix.GetColumnOrderedCombinationRows(matrixColumns) 126 | if err != nil { 127 | return nil, err 128 | } 129 | 130 | return result, nil 131 | } 132 | -------------------------------------------------------------------------------- /pkg/utils/template/types.go: -------------------------------------------------------------------------------- 1 | package template 2 | 3 | import ( 4 | t "html/template" 5 | 6 | "github.com/maxgio92/krawler/pkg/utils/matrix" 7 | ) 8 | 9 | type TemplatePart struct { 10 | matrix.Column 11 | TemplateString string 12 | MatchedVariable string 13 | Template *t.Template 14 | } 15 | -------------------------------------------------------------------------------- /pkg/utils/template/utils.go: -------------------------------------------------------------------------------- 1 | package template 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strings" 7 | ) 8 | 9 | func generateTemplateRegex(variables []string) (string, error) { 10 | if len(variables) < 1 { 11 | return "", fmt.Errorf("at least one variable is required") 12 | } 13 | 14 | templateRegex := `` 15 | for _, v := range variables { 16 | templateRegex += `(?P<` + v + `>^.*` + openDelimiter + ` \.` + v + ` ` + closeDelimiter + `.*$)?` 17 | } 18 | 19 | return templateRegex, nil 20 | } 21 | 22 | // Return the variables that the template string expects. 23 | func GetSupportedVariables(templateString string) ([]string, error) { 24 | return getVariablesFromTemplateString(templateString) 25 | } 26 | 27 | func getVariablesFromTemplateString(templateString string) ([]string, error) { 28 | rs := openDelimiter + ` \` + cursor + `(` + variableNameRegex + `) ` + closeDelimiter 29 | rp := regexp.MustCompile(rs) 30 | 31 | v := []string{} 32 | 33 | ss := rp.FindAllStringSubmatch(templateString, -1) 34 | if len(ss) < 1 { 35 | return []string{}, nil 36 | } 37 | 38 | for _, s := range ss { 39 | if len(s) < 1 { 40 | return nil, fmt.Errorf("cannot find supported variables") 41 | } 42 | v = append(v, s[1]) 43 | } 44 | 45 | return v, nil 46 | } 47 | 48 | func cutTemplateString(t string, closeDelimiter string) ([]string, error) { 49 | var parts []string 50 | 51 | before, after, found := strings.Cut(t, closeDelimiter) 52 | if !found { 53 | return nil, fmt.Errorf("cannot cut input template string") 54 | } 55 | 56 | parts = append(parts, before+closeDelimiter) 57 | for { 58 | before, after, found = strings.Cut(after, closeDelimiter) 59 | if !found { 60 | break 61 | } 62 | parts = append(parts, before+closeDelimiter) 63 | } 64 | parts[len(parts)-1] += before 65 | 66 | return parts, nil 67 | } 68 | -------------------------------------------------------------------------------- /testdata/amazonlinux-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux: 3 | 4 | mirrors: 5 | - url: http://repo.us-east-1.amazonaws.com 6 | name: AmazonLinux1 7 | 8 | versions: 9 | - "latest" 10 | - "2017.03" 11 | - "2017.08" 12 | - "2018.03" 13 | 14 | output: 15 | verbosity: 6 16 | -------------------------------------------------------------------------------- /testdata/amazonlinux.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux: 3 | 4 | mirrors: 5 | - url: http://repo.us-east-1.amazonaws.com 6 | name: AmazonLinux1 7 | 8 | repositories: 9 | - uri: /{{ .al1_repos }} 10 | 11 | versions: 12 | - "latest" 13 | - "2017.03" 14 | - "2017.08" 15 | - "2018.03" 16 | 17 | vars: 18 | al1_repos: 19 | - "main" 20 | - "updates" 21 | 22 | output: 23 | verbosity: 6 24 | -------------------------------------------------------------------------------- /testdata/amazonlinux2-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux2: 3 | 4 | mirrors: 5 | - url: http://amazonlinux.us-east-1.amazonaws.com/2 6 | name: AmazonLinux2 7 | 8 | output: 9 | verbosity: 6 10 | -------------------------------------------------------------------------------- /testdata/amazonlinux2.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux2: 3 | 4 | mirrors: 5 | - url: http://amazonlinux.us-east-1.amazonaws.com/2 6 | name: AmazonLinux2 7 | 8 | repositories: 9 | - uri: /core/{{ .al2_core_repos }} 10 | - uri: /extras/{{ .al2_extras_repos }}/latest 11 | 12 | vars: 13 | al2_core_repos: 14 | - "2.0" 15 | - "latest" 16 | al2_extras_repos: 17 | - kernel-5.4 18 | - kernel-5.10 19 | - kernel-5.15 20 | 21 | output: 22 | verbosity: 6 23 | -------------------------------------------------------------------------------- /testdata/amazonlinux2022-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux2022: 3 | 4 | mirrors: 5 | - url: https://al2022-repos-us-east-1-9761ab97.s3.dualstack.us-east-1.amazonaws.com/core/mirrors 6 | name: AmazonLinux2022 7 | 8 | output: 9 | verbosity: 6 10 | -------------------------------------------------------------------------------- /testdata/amazonlinux2022.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux2022: 3 | 4 | mirrors: 5 | - url: https://al2022-repos-us-east-1-9761ab97.s3.dualstack.us-east-1.amazonaws.com/core/mirrors 6 | name: AmazonLinux2022 7 | 8 | repositories: 9 | - uri: /2022.0.20220202 10 | - uri: /2022.0.20220315 11 | - uri: /2022.0.20221012 12 | 13 | output: 14 | verbosity: 6 15 | -------------------------------------------------------------------------------- /testdata/amazonlinux2023-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux2023: 3 | 4 | mirrors: 5 | - url: https://cdn.amazonlinux.com/al2023/core/mirrors 6 | name: AmazonLinux2023 7 | 8 | output: 9 | verbosity: 6 10 | -------------------------------------------------------------------------------- /testdata/amazonlinux2023.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | amazonlinux2023: 3 | 4 | mirrors: 5 | - url: https://cdn.amazonlinux.com/al2023/core/mirrors 6 | name: AmazonLinux2023 7 | 8 | repositories: 9 | - uri: /2023.5.20240730 10 | - uri: /latest 11 | 12 | output: 13 | verbosity: 6 14 | -------------------------------------------------------------------------------- /testdata/centos-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | centos: 3 | 4 | mirrors: 5 | - url: https://mirrors.edge.kernel.org/centos/ 6 | name: Edge 7 | - url: https://archive.kernel.org/centos-vault 8 | name: Archive 9 | 10 | output: 11 | verbosity: 6 12 | -------------------------------------------------------------------------------- /testdata/centos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | centos: 3 | 4 | archs: 5 | - "aarch64" 6 | - "x86_64" 7 | - "ppc64le" 8 | 9 | mirrors: 10 | - url: https://archive.kernel.org/centos-vault 11 | name: Archive 12 | - url: https://mirrors.edge.kernel.org/centos 13 | name: Edge 14 | 15 | repositories: 16 | - name: old 17 | uri: "/{{ .old_repos }}/{{ .archs }}/" 18 | - name: new 19 | uri: "/{{ .new_repos }}/{{ .archs }}/os/" 20 | 21 | vars: 22 | 23 | # Distribution version dependent (i.e. CentOS >= 8) 24 | # (https://docs.centos.org/en-US/8-docs/managing-userspace-components/assembly_using-appstream/#packaging-methods-in-rhel-8_using-appstream) 25 | new_repos: 26 | - "BaseOS" 27 | - "AppStream" 28 | - "Devel" 29 | 30 | # Distribution version dependent (i.e. CentOS < 8) 31 | old_repos: 32 | - "os" 33 | - "updates" 34 | 35 | output: 36 | verbosity: 6 37 | -------------------------------------------------------------------------------- /testdata/debian-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | debian: 3 | 4 | mirrors: 5 | - url: "https://mirrors.edge.kernel.org/debian/" 6 | name: Edge 7 | - url: "https://security.debian.org/" 8 | name: Security 9 | 10 | output: 11 | verbosity: 6 12 | -------------------------------------------------------------------------------- /testdata/debian.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | debian: 3 | 4 | mirrors: 5 | - url: "https://mirrors.edge.kernel.org/debian/" 6 | name: Edge 7 | - url: "https://security.debian.org/" 8 | name: Security 9 | 10 | repositories: 11 | - name: main 12 | uri: main 13 | - name: contrib 14 | uri: contrib 15 | - name: non-free 16 | uri: non-free 17 | - name: multiverse 18 | uri: multiverse 19 | - name: universe 20 | uri: universe 21 | - name: restricted 22 | uri: restricted 23 | 24 | output: 25 | verbosity: 6 26 | -------------------------------------------------------------------------------- /testdata/ubuntu-norepos.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | ubuntu: 3 | 4 | mirrors: 5 | - url: "https://mirrors.edge.kernel.org/ubuntu" 6 | name: Edge 7 | - url: "http://security.ubuntu.com/ubuntu" 8 | name: Security 9 | 10 | output: 11 | verbosity: 6 12 | -------------------------------------------------------------------------------- /testdata/ubuntu.yaml: -------------------------------------------------------------------------------- 1 | distros: 2 | ubuntu: 3 | 4 | mirrors: 5 | - url: "https://mirrors.edge.kernel.org/ubuntu" 6 | name: Edge 7 | - url: "http://security.ubuntu.com/ubuntu" 8 | name: Security 9 | 10 | repositories: 11 | - name: main 12 | uri: main 13 | - name: contrib 14 | uri: contrib 15 | - name: non-free 16 | uri: non-free 17 | - name: multiverse 18 | uri: multiverse 19 | - name: universe 20 | uri: universe 21 | - name: restricted 22 | uri: restricted 23 | 24 | output: 25 | verbosity: 6 26 | --------------------------------------------------------------------------------