├── .dockerignore ├── .env ├── .github ├── FUNDING.yml └── workflows │ ├── docker-image.yml │ └── rust.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── LICENSE ├── README.md ├── db └── migrations ├── docker-compose.yml ├── migrations ├── 20240410165319_init.down.sql ├── 20240410165319_init.up.sql ├── 20240411180313_tasks.down.sql ├── 20240411180313_tasks.up.sql ├── 20240414163453_refs-idx-uniq.down.sql ├── 20240414163453_refs-idx-uniq.up.sql ├── 20240414184554_packages.down.sql ├── 20240414184554_packages.up.sql ├── 20240415204505_search-index.down.sql ├── 20240415204505_search-index.up.sql ├── 20240416124029_retry-limits.down.sql ├── 20240416124029_retry-limits.up.sql ├── 20240503120056_sbom.down.sql ├── 20240503120056_sbom.up.sql ├── 20240506111249_timestamps.down.sql ├── 20240506111249_timestamps.up.sql ├── 20240510222903_import_time_index.down.sql ├── 20240510222903_import_time_index.up.sql ├── 20240517213440_index-refs-vendor.down.sql ├── 20240517213440_index-refs-vendor.up.sql ├── 20240526194942_aliases-unique.down.sql ├── 20240526194942_aliases-unique.up.sql ├── 20240530155351_compressed-artifact-metadata.down.sql └── 20240530155351_compressed-artifact-metadata.up.sql ├── src ├── alias.rs ├── apkbuild.rs ├── apt.rs ├── args.rs ├── chksums.rs ├── compression.rs ├── db.rs ├── errors.rs ├── ingest │ ├── alpine.rs │ ├── git.rs │ ├── mod.rs │ ├── pacman.rs │ ├── rpm.rs │ ├── tar.rs │ ├── void.rs │ └── wolfi.rs ├── main.rs ├── pkgbuild.rs ├── reindex.rs ├── sbom │ ├── cargo.rs │ ├── composer.rs │ ├── go.rs │ ├── mod.rs │ ├── npm.rs │ └── yarn.rs ├── sync │ ├── alpine.rs │ ├── apt.rs │ ├── gentoo.rs │ ├── guix.rs │ ├── homebrew.rs │ ├── live_bootstrap.rs │ ├── mod.rs │ ├── pacman.rs │ ├── rpm.rs │ ├── stagex.rs │ ├── void.rs │ └── yocto.rs ├── utils.rs ├── void_template.rs ├── web.rs ├── worker.rs └── yocto.rs └── templates ├── archive.txt.hbs ├── artifact.html.hbs ├── base.html.hbs ├── diff.html.hbs ├── index.html.hbs ├── sbom.html.hbs ├── search.html.hbs ├── stats.html.hbs └── style.css /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | .dockerignore 3 | docker-compose.yml 4 | target 5 | .env 6 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | DATABASE_URL=postgres://postgres:postgres@localhost/what-the-src 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [kpcyrd] 2 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | tags: [ "v*.*.*" ] 7 | pull_request: 8 | branches: [ "main" ] 9 | workflow_dispatch: 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-24.04 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: docker/setup-buildx-action@v3 17 | 18 | - name: Expose actions cache variables 19 | uses: actions/github-script@v6 20 | with: 21 | script: | 22 | core.exportVariable('ACTIONS_CACHE_URL', process.env['ACTIONS_CACHE_URL']) 23 | core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env['ACTIONS_RUNTIME_TOKEN']) 24 | 25 | - name: Build Docker image 26 | uses: docker/build-push-action@v5 27 | with: 28 | tags: whatsrc 29 | load: true 30 | cache-from: type=gha 31 | cache-to: type=gha,mode=max 32 | build-args: | 33 | UPDATE_CHECK_COMMIT=${{ github.sha }} 34 | 35 | - name: Test the Docker image 36 | run: | 37 | docker run --rm whatsrc --help 38 | 39 | - name: Login to github container registry 40 | if: github.event_name != 'pull_request' 41 | uses: docker/login-action@v3 42 | with: 43 | registry: ghcr.io 44 | username: ${{ github.actor }} 45 | password: ${{ secrets.GITHUB_TOKEN }} 46 | 47 | - name: Push the image to `edge` 48 | if: github.event_name == 'push' && github.ref_name == 'main' 49 | run: | 50 | docker tag whatsrc ghcr.io/${{ github.repository }}:edge 51 | docker push ghcr.io/${{ github.repository }}:edge 52 | 53 | - name: Push the image to `${{ github.ref_name }}` 54 | if: github.ref_type == 'tag' 55 | run: | 56 | docker tag whatsrc ghcr.io/${{ github.repository }}:${{ github.ref_name }} 57 | docker push ghcr.io/${{ github.repository }}:${{ github.ref_name }} 58 | 59 | - name: Push the image to `latest` 60 | if: github.ref_type == 'tag' 61 | run: | 62 | docker tag whatsrc ghcr.io/${{ github.repository }}:latest 63 | docker push ghcr.io/${{ github.repository }}:latest 64 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | schedule: 9 | - cron: '0 9 * * 1' 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | build: 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up cargo cache 21 | uses: actions/cache@v4 22 | with: 23 | path: | 24 | ~/.cargo/bin/ 25 | ~/.cargo/registry/index/ 26 | ~/.cargo/registry/cache/ 27 | ~/.cargo/git/db/ 28 | target/ 29 | key: ${{ runner.os }}-cargo-debug-${{ hashFiles('**/Cargo.lock') }} 30 | restore-keys: ${{ runner.os }}-cargo-debug- 31 | 32 | - name: Build 33 | run: cargo build --locked --verbose 34 | unit-test: 35 | runs-on: ubuntu-24.04 36 | steps: 37 | - uses: actions/checkout@v4 38 | 39 | - name: Set up cargo cache 40 | uses: actions/cache@v4 41 | with: 42 | path: | 43 | ~/.cargo/bin/ 44 | ~/.cargo/registry/index/ 45 | ~/.cargo/registry/cache/ 46 | ~/.cargo/git/db/ 47 | target/ 48 | key: ${{ runner.os }}-cargo-debug-${{ hashFiles('**/Cargo.lock') }} 49 | restore-keys: ${{ runner.os }}-cargo-debug- 50 | 51 | - name: Run clippy 52 | run: cargo clippy ${{ matrix.os.features }} -- -D warnings 53 | - name: Run tests 54 | run: cargo test ${{ matrix.os.features }} --verbose 55 | 56 | fmt: 57 | runs-on: ubuntu-24.04 58 | steps: 59 | - uses: actions/checkout@v4 60 | - name: Run cargo fmt 61 | run: cargo fmt --all -- --check 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "what-the-src" 3 | version = "0.1.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | apt-parser = "1.0.6" 11 | async-compression = { version = "0.4.8", features = ["tokio", "gzip", "xz", "bzip2", "zstd"] } 12 | async-stream = "0.3.5" 13 | blake2 = "0.10.6" 14 | clap = { version = "4.5.4", features = ["derive", "env"] } 15 | data-encoding = "2.6.0" 16 | diffy-fork-filenames = "0.4.0" 17 | digest = "0.10.7" 18 | dotenvy = "0.15.7" 19 | env_logger = "0.11.3" 20 | fastrand = "2.1.0" 21 | fd-lock = "4.0.2" 22 | futures = "0.3.30" 23 | handlebars = { version = "6", features = ["rust-embed"] } 24 | hex = "0.4.3" 25 | log = "0.4.21" 26 | lz4_flex = "0.11.3" 27 | num-format = "0.4.4" 28 | plist = "1.6.1" 29 | regex = "1.10.4" 30 | reqwest = { version = "0.12.3", default-features = false, features = ["rustls-tls-webpki-roots", "stream", "socks"] } 31 | rpm = { version = "0.16", default-features = false } 32 | rust-embed = "8.3.0" 33 | serde = { version = "1.0.197", features = ["derive"] } 34 | serde-xml-rs = "0.6.0" 35 | serde_json = "1.0.115" 36 | serde_urlencoded = "0.7.1" 37 | serde_yaml = "0.9.34" 38 | sha2 = "0.10.8" 39 | sqlx = { version = "0.8", features = ["runtime-tokio", "chrono", "postgres"] } 40 | srcinfo = "1.1.0" 41 | thiserror = "2" 42 | tokio = { version = "1.37.0", features = ["macros", "rt-multi-thread", "io-std", "fs", "process"] } 43 | tokio-tar = "0.3.1" 44 | tokio-util = "0.7.10" 45 | toml = "0.8.12" 46 | url-escape = "0.1.1" 47 | warp = "0.3.7" 48 | warp-embed = "0.5.0" 49 | yarn-lock-parser = "0.11" 50 | yash-syntax = "0.13" 51 | 52 | [dev-dependencies] 53 | maplit = "1.0.2" 54 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:alpine3.20 2 | ENV RUSTFLAGS="-C target-feature=-crt-static" 3 | RUN apk add musl-dev postgresql-dev bzip2-dev xz-dev zstd-dev 4 | WORKDIR /app 5 | COPY . . 6 | RUN --mount=type=cache,target=/var/cache/buildkit \ 7 | CARGO_HOME=/var/cache/buildkit/cargo \ 8 | CARGO_TARGET_DIR=/var/cache/buildkit/target \ 9 | cargo build --release --locked && \ 10 | cp -v /var/cache/buildkit/target/release/what-the-src / 11 | 12 | FROM alpine:3.20 13 | RUN apk add libgcc libpq libbz2 xz-libs zstd-libs git 14 | # current rpm parser depends on /usr/bin/bsdtar 15 | RUN apk add libarchive-tools 16 | WORKDIR /app 17 | COPY --from=0 /what-the-src / 18 | USER nobody 19 | ENV BIND_ADDR=0.0.0.0:8000 20 | ENV WHATSRC_GIT_TMP=/var/cache/whatsrc 21 | ENTRYPOINT ["/what-the-src"] 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Sync Arch Linux 4 | 5 | ``` 6 | what-the-src plumbing sync-pacman --vendor archlinux --fetch https://github.com/archlinux/state/archive/refs/heads/main.tar.gz -r core-x86_64 -r extra-x86_64 -r core-any -r extra-any -r multilib-x86_64 7 | ``` 8 | 9 | ### Sync Debian 10 | 11 | ``` 12 | what-the-src plumbing sync-apt --vendor debian http://deb.debian.org/debian/ --release sid --release stable 13 | what-the-src plumbing sync-apt --vendor debian https://security.debian.org/debian-security/ --release stable-security 14 | ``` 15 | 16 | ### Sync Fedora rawhide 17 | 18 | ``` 19 | what-the-src plumbing sync-rpm --vendor fedora https://ftp.halifax.rwth-aachen.de/fedora/linux/development/rawhide/Everything/source/tree/ 20 | ``` 21 | 22 | ### Sync Alpine 23 | 24 | ``` 25 | what-the-src plumbing sync-alpine --vendor alpine --fetch https://ftp.halifax.rwth-aachen.de/alpine/edge/main/x86_64/APKINDEX.tar.gz --repo main 26 | what-the-src plumbing sync-alpine --vendor alpine --fetch https://ftp.halifax.rwth-aachen.de/alpine/edge/community/x86_64/APKINDEX.tar.gz --repo community 27 | ``` 28 | 29 | ### Sync openSUSE 30 | 31 | ``` 32 | what-the-src plumbing sync-rpm --vendor opensuse https://ftp.halifax.rwth-aachen.de/opensuse/tumbleweed/repo/src-oss/ 33 | ``` 34 | 35 | ### Sync Gentoo 36 | 37 | ``` 38 | what-the-src plumbing sync-gentoo --vendor gentoo --fetch https://github.com/gentoo-mirror/gentoo/archive/refs/heads/master.tar.gz 39 | ``` 40 | 41 | ### Sync live-bootstrap 42 | 43 | ``` 44 | what-the-src plumbing sync-live-bootstrap --vendor live-bootstrap --fetch https://github.com/fosslinux/live-bootstrap/archive/refs/heads/master.tar.gz 45 | ``` 46 | 47 | ### Sync Homebrew 48 | 49 | ``` 50 | what-the-src plumbing sync-homebrew --vendor homebrew --fetch https://formulae.brew.sh/api/formula.json 51 | ``` 52 | 53 | ### Sync Wolfi OS 54 | 55 | ``` 56 | what-the-src plumbing sync-alpine --vendor wolfi --fetch https://packages.wolfi.dev/os/x86_64/APKINDEX.tar.gz 57 | ``` 58 | 59 | ### Sync Guix 60 | 61 | ``` 62 | what-the-src plumbing sync-guix --vendor guix --fetch 'https://guix.gnu.org/packages.json' 63 | ``` 64 | 65 | ### Sync Ubuntu 66 | 67 | ``` 68 | what-the-src plumbing sync-apt --vendor ubuntu https://ftp.halifax.rwth-aachen.de/ubuntu/ --release jammy --release jammy-updates --release jammy-security --release jammy-backports --suite main --suite multiverse --suite universe 69 | ``` 70 | 71 | ### Sync Void Linux 72 | 73 | ``` 74 | what-the-src plumbing sync-void --vendor void --fetch https://repo-fastly.voidlinux.org/current/x86_64-repodata 75 | ``` 76 | 77 | ### Sync Yocto 78 | 79 | ``` 80 | what-the-src plumbing sync-yocto --vendor yocto --fetch https://git.openembedded.org/openembedded-core/snapshot/master.tar.gz 81 | ``` 82 | 83 | ### Sync StageX 84 | 85 | ``` 86 | what-the-src plumbing sync-stagex --vendor stagex --fetch https://codeberg.org/stagex/stagex/archive/main.tar.gz 87 | ``` 88 | -------------------------------------------------------------------------------- /db/migrations: -------------------------------------------------------------------------------- 1 | ../migrations -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | services: 3 | web: 4 | build: . 5 | init: true 6 | ports: 7 | - 127.0.0.1:8000:8000 8 | command: ["daemon"] 9 | environment: 10 | - DATABASE_URL=postgres://postgres:postgres@db/what-the-src 11 | 12 | worker: 13 | build: . 14 | init: true 15 | command: ["worker"] 16 | environment: 17 | - DATABASE_URL=postgres://postgres:postgres@db/what-the-src 18 | 19 | db: 20 | image: postgres:16-alpine 21 | ports: 22 | - 127.0.0.1:5432:5432 23 | environment: 24 | - POSTGRES_DB=what-the-src 25 | - POSTGRES_PASSWORD=postgres 26 | -------------------------------------------------------------------------------- /migrations/20240410165319_init.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE artifacts, aliases, refs; 2 | -------------------------------------------------------------------------------- /migrations/20240410165319_init.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE artifacts ( 2 | chksum VARCHAR PRIMARY KEY, 3 | db_version SMALLINT NOT NULL, 4 | files JSON 5 | ); 6 | 7 | CREATE TABLE aliases ( 8 | alias_from VARCHAR PRIMARY KEY, 9 | alias_to VARCHAR NOT NULL, 10 | reason VARCHAR, 11 | 12 | CONSTRAINT fk_artifact 13 | FOREIGN KEY(alias_to) 14 | REFERENCES artifacts(chksum) 15 | ON DELETE CASCADE 16 | ); 17 | CREATE INDEX aliases_idx_to ON aliases (alias_to); 18 | CREATE INDEX aliases_idx_from ON aliases (alias_from); 19 | 20 | CREATE TABLE refs ( 21 | id bigserial PRIMARY KEY, 22 | chksum VARCHAR NOT NULL, 23 | vendor VARCHAR NOT NULL, 24 | package VARCHAR NOT NULL, 25 | version VARCHAR NOT NULL, 26 | filename VARCHAR 27 | ); 28 | CREATE INDEX refs_idx_chksum ON refs (chksum); 29 | CREATE UNIQUE INDEX refs_idx_uniq ON refs (chksum, vendor, package, version, filename); 30 | -------------------------------------------------------------------------------- /migrations/20240411180313_tasks.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE tasks; 2 | -------------------------------------------------------------------------------- /migrations/20240411180313_tasks.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE tasks ( 2 | id bigserial PRIMARY KEY, 3 | key VARCHAR UNIQUE NOT NULL, 4 | data JSON NOT NULL 5 | ); 6 | -------------------------------------------------------------------------------- /migrations/20240414163453_refs-idx-uniq.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX refs_idx_uniq; 2 | CREATE UNIQUE INDEX refs_idx_uniq ON refs (chksum, vendor, package, version, filename); 3 | -------------------------------------------------------------------------------- /migrations/20240414163453_refs-idx-uniq.up.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX refs_idx_uniq; 2 | CREATE UNIQUE INDEX refs_idx_uniq ON refs (chksum, vendor, package, version); 3 | -------------------------------------------------------------------------------- /migrations/20240414184554_packages.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE packages; 2 | -------------------------------------------------------------------------------- /migrations/20240414184554_packages.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE packages ( 2 | id bigserial PRIMARY KEY, 3 | vendor VARCHAR NOT NULL, 4 | package VARCHAR NOT NULL, 5 | version VARCHAR NOT NULL 6 | ); 7 | CREATE INDEX packages_idx_vendor ON packages (vendor); 8 | CREATE INDEX packages_idx_package ON packages (package); 9 | CREATE UNIQUE INDEX packages_idx_uniq ON packages (vendor, package, version); 10 | -------------------------------------------------------------------------------- /migrations/20240415204505_search-index.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX refs_idx_package; 2 | -------------------------------------------------------------------------------- /migrations/20240415204505_search-index.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX refs_idx_package ON refs (package text_pattern_ops); 2 | -------------------------------------------------------------------------------- /migrations/20240416124029_retry-limits.down.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE tasks 2 | DROP COLUMN retries, 3 | DROP COLUMN error; 4 | -------------------------------------------------------------------------------- /migrations/20240416124029_retry-limits.up.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE tasks 2 | ADD COLUMN retries SMALLINT NOT NULL DEFAULT 0, 3 | ADD COLUMN error VARCHAR; 4 | CREATE INDEX tasks_idx_retries ON tasks (retries); 5 | -------------------------------------------------------------------------------- /migrations/20240503120056_sbom.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE sbom_refs; 2 | DROP TABLE sboms; 3 | -------------------------------------------------------------------------------- /migrations/20240503120056_sbom.up.sql: -------------------------------------------------------------------------------- 1 | -- Add up migration script here 2 | CREATE TABLE sboms ( 3 | id bigserial PRIMARY KEY, 4 | chksum VARCHAR NOT NULL, 5 | strain VARCHAR NOT NULL, 6 | data VARCHAR NOT NULL 7 | ); 8 | 9 | CREATE INDEX sboms_idx_strain ON sboms (strain); 10 | CREATE INDEX sboms_idx_chksum ON sboms (chksum); 11 | CREATE UNIQUE INDEX sboms_idx_uniq ON sboms (chksum, strain); 12 | 13 | CREATE TABLE sbom_refs ( 14 | from_archive VARCHAR NOT NULL, 15 | sbom_strain VARCHAR NOT NULL, 16 | sbom_chksum VARCHAR NOT NULL, 17 | path VARCHAR NOT NULL, 18 | 19 | CONSTRAINT fk_from_archive 20 | FOREIGN KEY(from_archive) 21 | REFERENCES artifacts(chksum) 22 | ON DELETE CASCADE 23 | ); 24 | 25 | CREATE INDEX sbom_refs_idx_from_archive ON sbom_refs (from_archive); 26 | CREATE INDEX sbom_refs_idx_sbom_chksum ON sbom_refs (sbom_chksum); 27 | CREATE UNIQUE INDEX sbom_refs_idx_uniq ON sbom_refs (from_archive, sbom_chksum, path); 28 | -------------------------------------------------------------------------------- /migrations/20240506111249_timestamps.down.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE artifacts 2 | DROP COLUMN first_seen, 3 | DROP COLUMN last_imported, 4 | ADD COLUMN db_version SMALLINT NOT NULL DEFAULT 0; 5 | 6 | ALTER TABLE refs 7 | DROP COLUMN first_seen, 8 | DROP COLUMN last_seen; 9 | -------------------------------------------------------------------------------- /migrations/20240506111249_timestamps.up.sql: -------------------------------------------------------------------------------- 1 | -- Add timestamps to artifacts 2 | ALTER TABLE artifacts 3 | DROP COLUMN db_version, 4 | ADD COLUMN first_seen timestamptz NOT NULL DEFAULT NOW(), 5 | ADD COLUMN last_imported timestamptz; 6 | 7 | UPDATE artifacts 8 | SET last_imported = to_timestamp(0); 9 | 10 | ALTER TABLE artifacts 11 | ALTER COLUMN last_imported SET NOT NULL; 12 | 13 | -- Add timestamps to refs 14 | ALTER TABLE refs 15 | ADD COLUMN first_seen timestamptz NOT NULL DEFAULT NOW(), 16 | ADD COLUMN last_seen timestamptz NOT NULL DEFAULT NOW(); 17 | 18 | ALTER TABLE refs 19 | ALTER COLUMN last_seen DROP DEFAULT; 20 | -------------------------------------------------------------------------------- /migrations/20240510222903_import_time_index.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX artifacts_idx_last_imported_date; 2 | DROP INDEX artifacts_idx_last_imported; 3 | DROP FUNCTION to_date_char; 4 | -------------------------------------------------------------------------------- /migrations/20240510222903_import_time_index.up.sql: -------------------------------------------------------------------------------- 1 | CREATE FUNCTION to_date_char(timestamptz) RETURNS text AS 2 | $$ select to_char($1, 'YYYY-MM-DD'); $$ 3 | LANGUAGE sql immutable; 4 | 5 | CREATE INDEX artifacts_idx_last_imported ON artifacts (last_imported); 6 | CREATE INDEX artifacts_idx_last_imported_date ON artifacts (to_date_char(last_imported)); 7 | -------------------------------------------------------------------------------- /migrations/20240517213440_index-refs-vendor.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX refs_idx_vendor; 2 | -------------------------------------------------------------------------------- /migrations/20240517213440_index-refs-vendor.up.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX refs_idx_vendor ON refs (vendor); 2 | -------------------------------------------------------------------------------- /migrations/20240526194942_aliases-unique.down.sql: -------------------------------------------------------------------------------- 1 | DROP INDEX aliases_idx_uniq; 2 | -------------------------------------------------------------------------------- /migrations/20240526194942_aliases-unique.up.sql: -------------------------------------------------------------------------------- 1 | CREATE UNIQUE INDEX aliases_idx_uniq ON aliases (alias_from, alias_to); 2 | -------------------------------------------------------------------------------- /migrations/20240530155351_compressed-artifact-metadata.down.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE artifacts 2 | DROP COLUMN files_compressed; 3 | -------------------------------------------------------------------------------- /migrations/20240530155351_compressed-artifact-metadata.up.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE artifacts 2 | ADD COLUMN files_compressed BYTEA; 3 | -------------------------------------------------------------------------------- /src/alias.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::ingest; 5 | use tokio::io; 6 | 7 | pub async fn run(args: &args::AddRef) -> Result<()> { 8 | let db = db::Client::create().await?; 9 | 10 | let summary = ingest::tar::stream_data(Some(&db), io::stdin(), None).await?; 11 | let chksum = summary.outer_digests.sha256; 12 | 13 | db.insert_ref(&db::Ref { 14 | chksum, 15 | vendor: args.vendor.clone(), 16 | package: args.package.clone(), 17 | version: args.version.clone(), 18 | filename: args.filename.clone(), 19 | }) 20 | .await?; 21 | 22 | Ok(()) 23 | } 24 | -------------------------------------------------------------------------------- /src/apkbuild.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use std::collections::HashMap; 3 | use yash_syntax::syntax::{self, Unquote, Value}; 4 | 5 | /// Variables we keep track of for interpolation but nothing else 6 | const TRACKED_VARIABLES: &[&str] = &["_pkgname", "_pkgver", "_gitrev", "_commit", "url"]; 7 | 8 | #[derive(Debug, Default, PartialEq)] 9 | pub struct Apkbuild { 10 | pub pkgname: Option, 11 | pub pkgver: Option, 12 | pub extra: HashMap<&'static str, String>, 13 | 14 | pub source: Vec, 15 | pub sha512sums: Vec, 16 | } 17 | 18 | impl Apkbuild { 19 | pub fn resolve_vars(&self, mut text: &str) -> Result { 20 | let mut out = String::new(); 21 | 22 | 'outer: while !text.is_empty() { 23 | if let Some((before, after)) = text.split_once('$') { 24 | let vars = [ 25 | ("pkgname", self.pkgname.as_ref()), 26 | ("pkgver", self.pkgver.as_ref()), 27 | ] 28 | .into_iter() 29 | .chain(self.extra.iter().map(|(key, value)| (*key, Some(value)))); 30 | 31 | out.push_str(before); 32 | for (name, value) in vars { 33 | if let Some(after) = after.strip_prefix(name) { 34 | let Some(value) = value else { 35 | return Err(Error::UnknownVariable(name.to_string())); 36 | }; 37 | out.push_str(value); 38 | text = after; 39 | continue 'outer; 40 | } 41 | } 42 | 43 | return Err(Error::UnknownVariable(after.to_string())); 44 | } else { 45 | out.push_str(text); 46 | break; 47 | } 48 | } 49 | 50 | Ok(out) 51 | } 52 | 53 | pub fn register_var(&mut self, key: &'static str, value: String) { 54 | self.extra.insert(key, value); 55 | } 56 | } 57 | 58 | pub fn parse(script: &str) -> Result { 59 | let parsed: syntax::List = script 60 | .parse() 61 | .map_err(|err| Error::InvalidPkgbuild(format!("{err:#?}")))?; 62 | 63 | let mut apkbuild = Apkbuild::default(); 64 | 65 | for item in &parsed.0 { 66 | for cmd in &item.and_or.first.commands { 67 | let syntax::Command::Simple(cmd) = cmd.as_ref() else { 68 | continue; 69 | }; 70 | 71 | for assign in &cmd.assigns { 72 | let name = assign.name.as_str(); 73 | 74 | let Value::Scalar(value) = &assign.value else { 75 | continue; 76 | }; 77 | let (value, _) = value.unquote(); 78 | 79 | debug!("Found variable in APKBUILD: key={name:?} value={value:?}"); 80 | let value = apkbuild.resolve_vars(&value); 81 | 82 | match name { 83 | "pkgname" => { 84 | apkbuild.pkgname = Some(value?); 85 | } 86 | "pkgver" => { 87 | apkbuild.pkgver = Some(value?); 88 | } 89 | "source" => { 90 | apkbuild.source = value? 91 | .trim() 92 | .split('\n') 93 | .map(|line| line.trim()) 94 | .map(|line| line.split_once("::").map(|x| x.1).unwrap_or(line)) 95 | .map(String::from) 96 | .collect(); 97 | } 98 | "sha512sums" => { 99 | apkbuild.sha512sums = value? 100 | .trim() 101 | .split('\n') 102 | .map(|line| line.split_once(" ").map(|x| x.0).unwrap_or(line)) 103 | .map(String::from) 104 | .collect(); 105 | } 106 | _ => { 107 | if let Some(name) = TRACKED_VARIABLES.iter().find(|x| **x == name) { 108 | apkbuild.register_var(name, value?); 109 | } 110 | } 111 | } 112 | } 113 | } 114 | } 115 | 116 | Ok(apkbuild) 117 | } 118 | 119 | #[cfg(test)] 120 | mod tests { 121 | use super::*; 122 | 123 | fn init() { 124 | let _ = env_logger::builder().is_test(true).try_init(); 125 | } 126 | 127 | #[test] 128 | fn test_parse_cmatrix() { 129 | init(); 130 | 131 | let data = r#"# Contributor: alpterry 132 | # Maintainer: alpterry 133 | pkgname=cmatrix 134 | pkgver=2.0 135 | pkgrel=2 136 | pkgdesc="Terminal based 'The Matrix' like implementation" 137 | url="https://github.com/abishekvashok/cmatrix" 138 | arch="all" 139 | license="GPL-3.0-or-later" 140 | makedepends="ncurses-dev kbd autoconf automake" 141 | subpackages="$pkgname-doc" 142 | options="!check" # no test suite 143 | source="$pkgname-$pkgver.tar.gz::https://github.com/abishekvashok/cmatrix/archive/v$pkgver.tar.gz" 144 | 145 | prepare() { 146 | default_prepare 147 | autoreconf -i 148 | } 149 | 150 | build() { 151 | ./configure \ 152 | --build=$CBUILD \ 153 | --host=$CHOST \ 154 | --prefix=/usr \ 155 | --sysconfdir=/etc \ 156 | --mandir=/usr/share/man \ 157 | --localstatedir=/var 158 | make 159 | } 160 | 161 | package() { 162 | make DESTDIR="$pkgdir" install 163 | } 164 | 165 | sha512sums="1aeecd8e8abb6f87fc54f88a8c25478f69d42d450af782e73c0fca7f051669a415c0505ca61c904f960b46bbddf98cfb3dd1f9b18917b0b39e95d8c899889530 cmatrix-2.0.tar.gz" 166 | "#; 167 | let apkbuild = parse(data).unwrap(); 168 | assert_eq!(apkbuild, Apkbuild { 169 | pkgname: Some("cmatrix".to_string()), 170 | pkgver: Some("2.0".to_string()), 171 | extra: [ 172 | ("url", "https://github.com/abishekvashok/cmatrix".to_string()), 173 | ].into_iter().collect(), 174 | 175 | source: vec![ 176 | "https://github.com/abishekvashok/cmatrix/archive/v2.0.tar.gz".to_string(), 177 | ], 178 | sha512sums: vec![ 179 | "1aeecd8e8abb6f87fc54f88a8c25478f69d42d450af782e73c0fca7f051669a415c0505ca61c904f960b46bbddf98cfb3dd1f9b18917b0b39e95d8c899889530".to_string(), 180 | ], 181 | }); 182 | } 183 | 184 | #[test] 185 | fn test_parse_7zip() { 186 | init(); 187 | 188 | let data = r#"# Maintainer: Alex Xu (Hello71) 189 | pkgname=7zip 190 | pkgver=23.01 191 | #_pkgver=${pkgver//./} # Can't parse this and don't support _pkgver 192 | _pkgver=2301 193 | pkgrel=0 194 | pkgdesc="File archiver with a high compression ratio" 195 | url="https://7-zip.org/" 196 | arch="all" 197 | license="LGPL-2.0-only" 198 | subpackages="$pkgname-doc" 199 | source="https://7-zip.org/a/7z$_pkgver-src.tar.xz 200 | armv7.patch 201 | 7-zip-flags.patch 202 | 7-zip-musl.patch 203 | " 204 | builddir="$srcdir" 205 | 206 | provides="7zip-virtual p7zip=$pkgver-r$pkgrel" 207 | replaces="p7zip" 208 | provider_priority=100 209 | 210 | build() { 211 | cd CPP/7zip/Bundles/Alone2 212 | mkdir -p b/g 213 | # TODO: enable asm (requires jwasm or uasm) 214 | # DISABLE_RAR: RAR codec is non-free 215 | # -D_GNU_SOURCE: broken sched.h defines 216 | make -f ../../cmpl_gcc.mak \ 217 | CC="${CC:-cc} $CFLAGS $LDFLAGS -D_GNU_SOURCE" \ 218 | CXX="${CXX:-c++} $CXXFLAGS $LDFLAGS -D_GNU_SOURCE" \ 219 | DISABLE_RAR=1 220 | } 221 | 222 | check() { 223 | # no proper test suite so just try to compress and decompress some files 224 | mkdir tmp 225 | CPP/7zip/Bundles/Alone2/b/g/7zz a tmp/7z$_pkgver-src.7z Asm C CPP DOC 226 | cd tmp 227 | ../CPP/7zip/Bundles/Alone2/b/g/7zz x 7z$_pkgver-src.7z 228 | # TODO: check if extracted result is identical 229 | } 230 | 231 | package() { 232 | install -Dm755 CPP/7zip/Bundles/Alone2/b/g/7zz "$pkgdir"/usr/bin/7zz 233 | ln -s 7zz "$pkgdir"/usr/bin/7z 234 | 235 | install -Dm644 "$builddir"/DOC/* -t "$pkgdir"/usr/share/doc/$pkgname/ 236 | } 237 | 238 | sha512sums=" 239 | e39f660c023aa65e55388be225b5591fe2a5c9138693f3c9107e2eb4ce97fafde118d3375e01ada99d29de9633f56221b5b3d640c982178884670cd84c8aa986 7z2301-src.tar.xz 240 | e52e542709a23ced76b651adf54609efae705801e940e74310ae4e67070bdb3841da5b801362aa0329b77993cdc3f6cd63ac2802240b16cde865f9d01bb1936d armv7.patch 241 | dfecb69861d00ee47311d83930adf80321b3c95ae01ce325677bde7aee6aa880a1979b0aa2909d9acb7a88ff31f910ac545ac218a0b5fd9e1270df2276b46d44 7-zip-flags.patch 242 | c652a87ad95f61901820adb61f3d1ceacedcb8aeaf9e89b2b728b7372eff67d9669eb363d5b2d2fb848ff2d8c5a727134fe13cc77d1215df7b2d32fe87711ebf 7-zip-musl.patch 243 | " 244 | 245 | "#; 246 | let apkbuild = parse(data).unwrap(); 247 | assert_eq!( 248 | apkbuild, 249 | Apkbuild { 250 | pkgname: Some("7zip".to_string()), 251 | pkgver: Some("23.01".to_string()), 252 | extra: [ 253 | ("_pkgver", "2301".to_string()), 254 | ("url", "https://7-zip.org/".to_string()), 255 | ].into_iter().collect(), 256 | 257 | source: vec![ 258 | "https://7-zip.org/a/7z2301-src.tar.xz".to_string(), 259 | "armv7.patch".to_string(), 260 | "7-zip-flags.patch".to_string(), 261 | "7-zip-musl.patch".to_string() 262 | ], 263 | sha512sums: vec![ 264 | "e39f660c023aa65e55388be225b5591fe2a5c9138693f3c9107e2eb4ce97fafde118d3375e01ada99d29de9633f56221b5b3d640c982178884670cd84c8aa986".to_string(), 265 | "e52e542709a23ced76b651adf54609efae705801e940e74310ae4e67070bdb3841da5b801362aa0329b77993cdc3f6cd63ac2802240b16cde865f9d01bb1936d".to_string(), 266 | "dfecb69861d00ee47311d83930adf80321b3c95ae01ce325677bde7aee6aa880a1979b0aa2909d9acb7a88ff31f910ac545ac218a0b5fd9e1270df2276b46d44".to_string(), 267 | "c652a87ad95f61901820adb61f3d1ceacedcb8aeaf9e89b2b728b7372eff67d9669eb363d5b2d2fb848ff2d8c5a727134fe13cc77d1215df7b2d32fe87711ebf".to_string(), 268 | ], 269 | } 270 | ); 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /src/apt.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use apt_parser::errors::{APTError, ParseError}; 3 | use apt_parser::ReleaseHash; 4 | use std::str; 5 | 6 | #[derive(Debug, PartialEq)] 7 | pub struct SourcesIndex { 8 | pub pkgs: Vec, 9 | } 10 | 11 | impl SourcesIndex { 12 | pub fn parse(bytes: &[u8]) -> Result { 13 | let sources = str::from_utf8(bytes)?; 14 | 15 | let mut pkgs = Vec::new(); 16 | 17 | let mut package = None; 18 | let mut in_checksums_sha256_section = false; 19 | for line in sources.lines() { 20 | if let Some(value) = line.strip_prefix("Package: ") { 21 | package = Some(SourcePkg { 22 | package: value.to_string(), 23 | ..Default::default() 24 | }); 25 | } else if let Some(value) = line.strip_prefix("Version: ") { 26 | let Some(package) = package.as_mut() else { 27 | continue; 28 | }; 29 | package.version = Some(value.to_string()); 30 | } else if let Some(value) = line.strip_prefix("Directory: ") { 31 | let Some(package) = package.as_mut() else { 32 | continue; 33 | }; 34 | package.directory = Some(value.to_string()); 35 | } else if line.is_empty() { 36 | if let Some(package) = package.take() { 37 | pkgs.push(package); 38 | } 39 | } else if line.trim_end() == "Checksums-Sha256:" { 40 | in_checksums_sha256_section = true; 41 | } else if let Some(line) = line.strip_prefix(' ') { 42 | if !in_checksums_sha256_section { 43 | continue; 44 | } 45 | 46 | let Some(package) = package.as_mut() else { 47 | continue; 48 | }; 49 | 50 | let (hash, line) = line 51 | .split_once(' ') 52 | .ok_or(APTError::ParseError(ParseError))?; 53 | 54 | let (size, filename) = line 55 | .split_once(' ') 56 | .ok_or(APTError::ParseError(ParseError))?; 57 | let size = size 58 | .parse() 59 | .map_err(|_err| APTError::ParseError(ParseError))?; 60 | 61 | package.checksums_sha256.push(ReleaseHash { 62 | hash: hash.to_string(), 63 | size, 64 | filename: filename.to_string(), 65 | }); 66 | } else { 67 | in_checksums_sha256_section = false; 68 | } 69 | } 70 | 71 | Ok(SourcesIndex { pkgs }) 72 | } 73 | 74 | pub fn find_pkg_by_sha256( 75 | &self, 76 | filter_name: Option<&str>, 77 | filter_version: Option<&str>, 78 | sha256: &str, 79 | ) -> Result<&SourcePkg> { 80 | for pkg in &self.pkgs { 81 | trace!("Found package in sources index: {pkg:?}"); 82 | 83 | if let Some(name) = filter_name { 84 | if pkg.package != *name { 85 | trace!("Skipping due to package name mismatch"); 86 | continue; 87 | } 88 | } 89 | 90 | if let Some(version) = filter_version { 91 | if pkg.version.as_deref() != Some(version) { 92 | trace!("Skipping due to package version mismatch"); 93 | continue; 94 | } 95 | } 96 | 97 | for chksum in &pkg.checksums_sha256 { 98 | if !chksum.filename.ends_with(".orig.tar.gz") 99 | && !chksum.filename.ends_with(".orig.tar.xz") 100 | { 101 | continue; 102 | } 103 | 104 | if chksum.hash == sha256 { 105 | info!("File verified successfully"); 106 | return Ok(pkg); 107 | } 108 | } 109 | } 110 | 111 | Err(APTError::ParseError(ParseError).into()) 112 | } 113 | } 114 | 115 | #[derive(Debug, Default, PartialEq)] 116 | pub struct SourcePkg { 117 | pub package: String, 118 | pub version: Option, 119 | pub directory: Option, 120 | pub checksums_sha256: Vec, 121 | } 122 | 123 | pub struct Release { 124 | release: apt_parser::Release, 125 | } 126 | 127 | impl Release { 128 | pub fn parse(bytes: &[u8]) -> Result { 129 | let release = str::from_utf8(bytes)?; 130 | let release = apt_parser::Release::from(release)?; 131 | Ok(Release { release }) 132 | } 133 | 134 | pub fn find_source_entry_by_sha256(&self, sha256: &str) -> Result<&ReleaseHash> { 135 | let sha256sums = self 136 | .release 137 | .sha256sum 138 | .as_ref() 139 | .ok_or(APTError::ParseError(ParseError))?; 140 | 141 | let sources_entry = sha256sums 142 | .iter() 143 | .filter(|entry| entry.filename.contains("/source/Sources")) 144 | .find(|entry| { 145 | debug!("Found sha256sum entry for sources index: {entry:?}"); 146 | entry.hash == sha256 147 | }) 148 | .ok_or(APTError::ParseError(ParseError))?; 149 | 150 | Ok(sources_entry) 151 | } 152 | } 153 | 154 | #[cfg(test)] 155 | mod tests { 156 | use super::*; 157 | 158 | #[test] 159 | fn test_parse_kali() { 160 | let data = br#"Package: sn0int 161 | Format: 3.0 (quilt) 162 | Binary: sn0int 163 | Architecture: any 164 | Version: 0.26.0-0kali3 165 | Maintainer: Kali Developers 166 | Uploaders: Sophie Brun 167 | Homepage: https://github.com/kpcyrd/sn0int 168 | Standards-Version: 4.6.2 169 | Vcs-Browser: https://gitlab.com/kalilinux/packages/sn0int 170 | Vcs-Git: https://gitlab.com/kalilinux/packages/sn0int.git 171 | Build-Depends: bash-completion, ca-certificates, debhelper-compat (= 13), libseccomp-dev, libsodium-dev, libsqlite3-dev, pkg-config, publicsuffix, python3-sphinx, cargo 172 | Package-List: 173 | sn0int deb net optional arch=any 174 | Priority: optional 175 | Section: net 176 | Directory: pool/main/s/sn0int 177 | Files: 178 | a2f2a9f592c506b6a746dc9debd1cacd 1807 sn0int_0.26.0-0kali3.dsc 179 | 5c5578537a0abe07b683f8b454af025d 1798079 sn0int_0.26.0.orig.tar.gz 180 | b103d74ae55843b0f87112988062be54 8648 sn0int_0.26.0-0kali3.debian.tar.xz 181 | Checksums-Sha1: 182 | 0545bcbba1fcf73b6bd5ba830124ccec7abbf5f8 1807 sn0int_0.26.0-0kali3.dsc 183 | e0b7135bd653540cdc234e2aa334eb1d4bba27f6 1798079 sn0int_0.26.0.orig.tar.gz 184 | d02663536e05ffc292f139d3727a140886c21023 8648 sn0int_0.26.0-0kali3.debian.tar.xz 185 | Checksums-Sha256: 186 | 6075e8c34b5a08aea77319e1346e42b846b7ee460d2c6ea2bb58e1ab6a651674 1807 sn0int_0.26.0-0kali3.dsc 187 | 4ce71f69410a9c9470edf922c3c09b6a53bfbf41d154aa124859bbce8014cf13 1798079 sn0int_0.26.0.orig.tar.gz 188 | 206f6f924a3b79f5495c512e965a0d44915c9b0a2b8c32feac7aac12f1ca1aa9 8648 sn0int_0.26.0-0kali3.debian.tar.xz 189 | 190 | "#; 191 | let index = SourcesIndex::parse(data).unwrap(); 192 | assert_eq!( 193 | index, 194 | SourcesIndex { 195 | pkgs: vec![SourcePkg { 196 | package: "sn0int".to_string(), 197 | version: Some("0.26.0-0kali3".to_string()), 198 | directory: Some("pool/main/s/sn0int".to_string()), 199 | checksums_sha256: vec![ 200 | ReleaseHash { 201 | filename: "sn0int_0.26.0-0kali3.dsc".to_string(), 202 | hash: 203 | "6075e8c34b5a08aea77319e1346e42b846b7ee460d2c6ea2bb58e1ab6a651674" 204 | .to_string(), 205 | size: 1807 206 | }, 207 | ReleaseHash { 208 | filename: "sn0int_0.26.0.orig.tar.gz".to_string(), 209 | hash: 210 | "4ce71f69410a9c9470edf922c3c09b6a53bfbf41d154aa124859bbce8014cf13" 211 | .to_string(), 212 | size: 1798079 213 | }, 214 | ReleaseHash { 215 | filename: "sn0int_0.26.0-0kali3.debian.tar.xz".to_string(), 216 | hash: 217 | "206f6f924a3b79f5495c512e965a0d44915c9b0a2b8c32feac7aac12f1ca1aa9" 218 | .to_string(), 219 | size: 8648 220 | } 221 | ], 222 | }] 223 | } 224 | ); 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use crate::ingest; 2 | use clap::{ArgAction, Parser, Subcommand}; 3 | use std::net::SocketAddr; 4 | 5 | #[derive(Debug, Parser)] 6 | #[command(version)] 7 | pub struct Args { 8 | /// Increase logging output (can be used multiple times) 9 | #[arg(short, long, global = true, action(ArgAction::Count))] 10 | pub verbose: u8, 11 | #[command(subcommand)] 12 | pub subcommand: SubCommand, 13 | } 14 | 15 | #[derive(Debug, Subcommand)] 16 | pub enum SubCommand { 17 | #[command(alias = "daemon")] 18 | Web(Web), 19 | Worker(Worker), 20 | #[command(subcommand)] 21 | Plumbing(Plumbing), 22 | } 23 | 24 | /// Run the web server daemon 25 | #[derive(Debug, Parser)] 26 | pub struct Web { 27 | #[arg(short = 'B', long, env)] 28 | pub bind_addr: SocketAddr, 29 | } 30 | 31 | /// Run worker for background jobs 32 | #[derive(Debug, Parser)] 33 | pub struct Worker { 34 | /// Request through a proxy to evade rate limits 35 | #[arg(long)] 36 | pub socks5: Option, 37 | /// Path to use for temporary git clone operations 38 | #[arg(long, env = "WHATSRC_GIT_TMP")] 39 | pub git_tmp: String, 40 | } 41 | 42 | #[derive(Debug, Subcommand)] 43 | pub enum Plumbing { 44 | IngestTar(IngestTar), 45 | IngestGit(IngestGit), 46 | IngestPacmanSnapshot(IngestPacmanSnapshot), 47 | IngestRpm(IngestRpm), 48 | IngestWolfi(IngestWolfi), 49 | IngestVoid(IngestVoid), 50 | IngestSbom(IngestSbom), 51 | ParsePkgbuild(ParsePkgbuild), 52 | SyncAlpine(SyncAlpine), 53 | SyncApt(SyncApt), 54 | SyncPacman(SyncPacman), 55 | SyncLiveBootstrap(SyncLiveBootstrap), 56 | SyncRpm(SyncRpm), 57 | SyncGentoo(SyncGentoo), 58 | SyncHomebrew(SyncHomebrew), 59 | SyncGuix(SyncGuix), 60 | SyncVoid(SyncVoid), 61 | SyncYocto(SyncYocto), 62 | SyncStagex(SyncStagex), 63 | AddRef(AddRef), 64 | ReindexUrl(ReindexUrl), 65 | ReindexSbom(ReindexSbom), 66 | } 67 | 68 | /// Ingest a .tar into the archive 69 | #[derive(Debug, Parser)] 70 | pub struct IngestTar { 71 | #[arg(short, long)] 72 | pub compression: Option, 73 | pub file: Option, 74 | } 75 | 76 | /// Create a `git archive` of a git ref 77 | #[derive(Debug, Parser)] 78 | pub struct IngestGit { 79 | /// The directory to clone into 80 | #[arg(long)] 81 | pub tmp: String, 82 | /// The url to clone from, including tag information 83 | pub git: ingest::git::GitUrl, 84 | } 85 | 86 | /// Ingest a pacman git .tar.gz 87 | #[derive(Debug, Parser)] 88 | pub struct IngestPacmanSnapshot { 89 | #[arg(long)] 90 | pub vendor: String, 91 | #[arg(long)] 92 | pub package: String, 93 | #[arg(long)] 94 | pub version: String, 95 | /// Ignore .SRCINFO even if present 96 | #[arg(long)] 97 | pub prefer_pkgbuild: bool, 98 | #[arg(long)] 99 | pub fetch: bool, 100 | pub file: String, 101 | } 102 | 103 | /// Ingest a .src.rpm 104 | #[derive(Debug, Parser)] 105 | pub struct IngestRpm { 106 | #[arg(long)] 107 | pub vendor: String, 108 | #[arg(long)] 109 | pub package: String, 110 | #[arg(long)] 111 | pub version: String, 112 | #[arg(long)] 113 | pub fetch: bool, 114 | pub file: String, 115 | } 116 | 117 | /// Ingest a wolfi yaml 118 | #[derive(Debug, Parser)] 119 | pub struct IngestWolfi { 120 | #[arg(long)] 121 | pub vendor: String, 122 | #[arg(long)] 123 | pub package: String, 124 | #[arg(long)] 125 | pub version: String, 126 | #[arg(long)] 127 | pub fetch: bool, 128 | pub file: String, 129 | } 130 | 131 | /// Ingest a void package template 132 | #[derive(Debug, Parser)] 133 | pub struct IngestVoid { 134 | #[arg(long)] 135 | pub vendor: String, 136 | #[arg(long)] 137 | pub srcpkg: String, 138 | #[arg(long)] 139 | pub package: String, 140 | #[arg(long)] 141 | pub version: String, 142 | #[arg(long)] 143 | pub fetch: bool, 144 | pub file: String, 145 | } 146 | 147 | /// Ingest a dependency lockfile 148 | #[derive(Debug, Parser)] 149 | pub struct IngestSbom { 150 | #[arg(long)] 151 | pub strain: String, 152 | pub file: String, 153 | } 154 | 155 | /// Attempt parsing a PKGBUILD 156 | #[derive(Debug, Parser)] 157 | pub struct ParsePkgbuild {} 158 | 159 | /// Start an import of a software vendor (alpine) 160 | #[derive(Debug, Parser)] 161 | pub struct SyncAlpine { 162 | #[arg(long)] 163 | pub vendor: String, 164 | #[arg(long)] 165 | pub repo: Option, 166 | #[arg(long)] 167 | pub fetch: bool, 168 | pub file: String, 169 | } 170 | 171 | /// Start an import of a software vendor (apt) 172 | #[derive(Debug, Parser)] 173 | pub struct SyncApt { 174 | #[arg(long)] 175 | pub vendor: String, 176 | /// The release names to import, e.g. `sid`, `stable` or `stable-security` 177 | #[arg(short, long = "release", required = true)] 178 | pub releases: Vec, 179 | /// The suite name to import, e.g. `main`, `contrib` or `non-free` 180 | #[arg(long = "suite", default_value = "main")] 181 | pub suites: Vec, 182 | /// Queue a task even if artifact is already known 183 | #[arg(short = 'R', long)] 184 | pub reindex: bool, 185 | pub url: String, 186 | } 187 | 188 | /// Start an import of a software vendor (pacman) 189 | #[derive(Debug, Parser)] 190 | pub struct SyncPacman { 191 | #[arg(long)] 192 | pub vendor: String, 193 | #[arg(long)] 194 | pub fetch: bool, 195 | /// The repositories to ingest e.g. `core-x86_64` or `extra-x86_64` 196 | #[arg(short, long = "repo", required = true)] 197 | pub repos: Vec, 198 | pub file: String, 199 | } 200 | 201 | /// Start an import of a software vendor (live-bootstrap) 202 | #[derive(Debug, Parser)] 203 | pub struct SyncLiveBootstrap { 204 | #[arg(long)] 205 | pub vendor: String, 206 | #[arg(long)] 207 | pub fetch: bool, 208 | pub file: String, 209 | } 210 | 211 | /// Start an import of a software vendor (pacman) 212 | #[derive(Debug, Parser)] 213 | pub struct SyncRpm { 214 | #[arg(long)] 215 | pub vendor: String, 216 | pub url: String, 217 | } 218 | 219 | /// Start an import of a software vendor (gentoo) 220 | #[derive(Debug, Parser)] 221 | pub struct SyncGentoo { 222 | #[arg(long)] 223 | pub vendor: String, 224 | #[arg(long)] 225 | pub fetch: bool, 226 | pub file: String, 227 | } 228 | 229 | /// Start an import of a software vendor (homebrew) 230 | #[derive(Debug, Parser)] 231 | pub struct SyncHomebrew { 232 | #[arg(long)] 233 | pub vendor: String, 234 | #[arg(long)] 235 | pub fetch: bool, 236 | pub file: String, 237 | } 238 | 239 | /// Start an import of a software vendor (guix) 240 | #[derive(Debug, Parser)] 241 | pub struct SyncGuix { 242 | #[arg(long)] 243 | pub vendor: String, 244 | #[arg(long)] 245 | pub fetch: bool, 246 | pub file: String, 247 | } 248 | 249 | /// Start an import of a software vendor (void) 250 | #[derive(Debug, Parser)] 251 | pub struct SyncVoid { 252 | #[arg(long)] 253 | pub vendor: String, 254 | #[arg(long)] 255 | pub fetch: bool, 256 | pub file: String, 257 | } 258 | 259 | /// Start an import of a software vendor (yocto) 260 | #[derive(Debug, Parser)] 261 | pub struct SyncYocto { 262 | #[arg(long)] 263 | pub vendor: String, 264 | #[arg(long)] 265 | pub fetch: bool, 266 | pub file: String, 267 | } 268 | 269 | /// Start an import of a software vendor (stagex) 270 | #[derive(Debug, Parser)] 271 | pub struct SyncStagex { 272 | #[arg(long)] 273 | pub vendor: String, 274 | #[arg(long)] 275 | pub fetch: bool, 276 | pub file: String, 277 | } 278 | 279 | /// This command should merge into Ingest eventually 280 | #[derive(Debug, Parser)] 281 | pub struct AddRef { 282 | #[arg(long)] 283 | pub vendor: String, 284 | #[arg(long)] 285 | pub package: String, 286 | #[arg(long)] 287 | pub version: String, 288 | #[arg(long)] 289 | pub filename: Option, 290 | } 291 | 292 | /// Requeue all known urls 293 | #[derive(Debug, Parser)] 294 | pub struct ReindexUrl { 295 | /// Only queue urls containing this string 296 | #[arg(long)] 297 | pub filter: Option, 298 | /// Upper limit of tasks to schedule 299 | #[arg(long)] 300 | pub limit: Option, 301 | /// Only reindex items that haven't been imported the last X days 302 | #[arg(long)] 303 | pub age: Option, 304 | } 305 | 306 | /// Reindex all known sboms 307 | #[derive(Debug, Parser)] 308 | pub struct ReindexSbom { 309 | /// Only queue sboms of this strain 310 | #[arg(long)] 311 | pub strain: Option, 312 | /// Upper limit of tasks to schedule 313 | #[arg(long)] 314 | pub limit: Option, 315 | } 316 | -------------------------------------------------------------------------------- /src/chksums.rs: -------------------------------------------------------------------------------- 1 | use blake2::Blake2b512; 2 | use digest::Digest; 3 | use sha2::{Sha256, Sha512}; 4 | use std::pin::Pin; 5 | use std::task::Poll; 6 | use tokio::io::{self, AsyncRead, ReadBuf}; 7 | 8 | pub struct Hasher { 9 | reader: R, 10 | sha256: Sha256, 11 | sha512: Sha512, 12 | blake2b: Blake2b512, 13 | } 14 | 15 | impl AsyncRead for Hasher { 16 | fn poll_read( 17 | mut self: Pin<&mut Self>, 18 | cx: &mut std::task::Context<'_>, 19 | buf: &mut ReadBuf<'_>, 20 | ) -> Poll> { 21 | let before = buf.filled().len(); 22 | if let Poll::Ready(x) = Pin::new(&mut self.reader).poll_read(cx, buf) { 23 | let buf = buf.filled(); 24 | self.sha256.update(&buf[before..]); 25 | self.sha512.update(&buf[before..]); 26 | self.blake2b.update(&buf[before..]); 27 | Poll::Ready(x) 28 | } else { 29 | Poll::Pending 30 | } 31 | } 32 | } 33 | 34 | impl Hasher { 35 | pub fn new(reader: R) -> Self { 36 | let sha256 = Sha256::new(); 37 | let sha512 = Sha512::new(); 38 | let blake2b = Blake2b512::new(); 39 | Hasher { 40 | reader, 41 | sha256, 42 | sha512, 43 | blake2b, 44 | } 45 | } 46 | 47 | pub fn digests(self) -> (R, Checksums) { 48 | ( 49 | self.reader, 50 | Checksums { 51 | sha256: format!("sha256:{}", hex::encode(self.sha256.finalize())), 52 | sha512: format!("sha512:{}", hex::encode(self.sha512.finalize())), 53 | blake2b: format!("blake2b:{}", hex::encode(self.blake2b.finalize())), 54 | }, 55 | ) 56 | } 57 | } 58 | 59 | #[derive(Debug, PartialEq)] 60 | pub struct Checksums { 61 | pub sha256: String, 62 | pub sha512: String, 63 | pub blake2b: String, 64 | } 65 | 66 | pub fn sha256(data: &[u8]) -> String { 67 | let mut sha256 = Sha256::new(); 68 | sha256.update(data); 69 | format!("sha256:{}", hex::encode(sha256.finalize())) 70 | } 71 | -------------------------------------------------------------------------------- /src/compression.rs: -------------------------------------------------------------------------------- 1 | use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder}; 2 | use std::pin::Pin; 3 | use std::task::Poll; 4 | use tokio::io::{self, AsyncBufRead, AsyncRead, ReadBuf}; 5 | 6 | pub enum Decompressor { 7 | Plain(R), 8 | Gz(GzipDecoder), 9 | Xz(XzDecoder), 10 | Bz2(BzDecoder), 11 | } 12 | 13 | impl Decompressor { 14 | pub fn gz(reader: R) -> Self { 15 | Decompressor::Gz(GzipDecoder::new(reader)) 16 | } 17 | 18 | pub fn xz(reader: R) -> Self { 19 | Decompressor::Xz(XzDecoder::new(reader)) 20 | } 21 | 22 | pub fn bz2(reader: R) -> Self { 23 | let mut decoder = BzDecoder::new(reader); 24 | decoder.multiple_members(true); 25 | Decompressor::Bz2(decoder) 26 | } 27 | 28 | pub fn into_inner(self) -> R { 29 | match self { 30 | Decompressor::Plain(r) => r, 31 | Decompressor::Gz(r) => r.into_inner(), 32 | Decompressor::Xz(r) => r.into_inner(), 33 | Decompressor::Bz2(r) => r.into_inner(), 34 | } 35 | } 36 | } 37 | 38 | impl AsyncRead for Decompressor { 39 | fn poll_read( 40 | self: Pin<&mut Self>, 41 | cx: &mut std::task::Context<'_>, 42 | buf: &mut ReadBuf<'_>, 43 | ) -> Poll> { 44 | match self.get_mut() { 45 | Decompressor::Plain(r) => Pin::new(r).poll_read(cx, buf), 46 | Decompressor::Gz(r) => Pin::new(r).poll_read(cx, buf), 47 | Decompressor::Xz(r) => Pin::new(r).poll_read(cx, buf), 48 | Decompressor::Bz2(r) => Pin::new(r).poll_read(cx, buf), 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::ingest; 2 | use crate::sync::stagex; 3 | pub use log::{debug, error, info, trace, warn}; 4 | use std::process::ExitStatus; 5 | 6 | #[derive(Debug, thiserror::Error)] 7 | pub enum Error { 8 | #[error(transparent)] 9 | TemplateError(#[from] handlebars::TemplateError), 10 | #[error(transparent)] 11 | Sql(#[from] sqlx::Error), 12 | #[error(transparent)] 13 | Io(#[from] std::io::Error), 14 | #[error(transparent)] 15 | Migrate(#[from] sqlx::migrate::MigrateError), 16 | #[error(transparent)] 17 | RenderError(#[from] handlebars::RenderError), 18 | #[error(transparent)] 19 | Json(#[from] serde_json::Error), 20 | #[error(transparent)] 21 | Xml(#[from] serde_xml_rs::Error), 22 | #[error(transparent)] 23 | Toml(#[from] toml::de::Error), 24 | #[error(transparent)] 25 | Plist(#[from] plist::Error), 26 | #[error(transparent)] 27 | Utf8(#[from] std::str::Utf8Error), 28 | #[error(transparent)] 29 | Lz4(#[from] lz4_flex::frame::Error), 30 | #[error(transparent)] 31 | AptError(#[from] apt_parser::errors::APTError), 32 | #[error(transparent)] 33 | Reqwest(#[from] reqwest::Error), 34 | #[error(transparent)] 35 | Srcinfo(#[from] srcinfo::Error), 36 | #[error(transparent)] 37 | Rpm(#[from] rpm::Error), 38 | #[error(transparent)] 39 | YarnLock(#[from] yarn_lock_parser::YarnLockError), 40 | #[error(transparent)] 41 | ParseInt(#[from] std::num::ParseIntError), 42 | #[error(transparent)] 43 | Base64(#[from] data_encoding::DecodeError), 44 | #[error(transparent)] 45 | Yaml(#[from] serde_yaml::Error), 46 | #[error(transparent)] 47 | Regex(#[from] regex::Error), 48 | #[error(transparent)] 49 | InvalidUri(#[from] warp::http::uri::InvalidUri), 50 | #[error(transparent)] 51 | SerdeUrl(#[from] serde_urlencoded::ser::Error), 52 | #[error(transparent)] 53 | JoinError(#[from] tokio::task::JoinError), 54 | #[error("Child process has exited with error: {0}")] 55 | ChildExit(std::process::ExitStatus), 56 | #[error("Parser encountered invalid data")] 57 | InvalidData, 58 | #[error("Parser encountered unknown variable: ${0}")] 59 | UnknownVariable(String), 60 | #[error("Parser encountered invalid PKGBUILD: {0}")] 61 | InvalidPkgbuild(String), 62 | #[error("Rpm is missing a `primary` data entry")] 63 | RpmMissingPrimary, 64 | #[error("Unknown git reference string: {0:?}")] 65 | UnknownGitRef(String), 66 | #[error("Invalid git reference: {0:?}")] 67 | InvalidGitRef(ingest::git::GitUrl), 68 | #[error("Error in git operation")] 69 | GitError(ExitStatus), 70 | #[error("Timeout of git fetch operation")] 71 | GitFetchTimeout, 72 | #[error("Error in git fetch operation")] 73 | GitFetchError(ExitStatus), 74 | #[error("Failed to parse git rev-parse output")] 75 | GitRevParseError(String), 76 | #[error("Failed to determine filename for Sources index")] 77 | AptIndexMissingSources, 78 | #[error("Unknown sbom strain: {0:?}")] 79 | UnknownSbomStrain(String), 80 | #[error("Task is missing mandatory repo field")] 81 | AlpineMissingRepo, 82 | #[error("APKINDEX is missing mandatory field: {0:?}")] 83 | ApkMissingField(&'static str), 84 | #[error("Unrecognized apk vendor: {0:?}")] 85 | UnrecognizedApkVendor(String), 86 | #[error("Failed to detect artifact checksum in wolfi package: {0:?}")] 87 | WolfiMissingChecksum(ingest::wolfi::Step), 88 | #[error("Unrecognized substitute in wolfi package: {0:?}")] 89 | WolfiUnknownSubstitute(String), 90 | #[error("String is poisoned, failed to interpolate: {0:?}")] 91 | YoctoPoisonedStr(String), 92 | #[error("Stagex package has no mirrors for source: {0:?}")] 93 | StagexMissingMirrors(stagex::Source), 94 | #[error("Stagex interpolate expression is never closed: {0:?}")] 95 | StagexUnclosedInterpolate(String), 96 | #[error("Stagex interpolate expression references undefined variable: {0:?}")] 97 | StagexUndefinedVariable(String), 98 | } 99 | 100 | // TODO: consider fixing this 101 | impl warp::reject::Reject for Error {} 102 | 103 | pub type Result = std::result::Result; 104 | -------------------------------------------------------------------------------- /src/ingest/alpine.rs: -------------------------------------------------------------------------------- 1 | use crate::apkbuild; 2 | use crate::db::{self, Task, TaskData}; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use tokio::io::{AsyncRead, AsyncReadExt}; 6 | 7 | pub async fn stream_data( 8 | db: &db::Client, 9 | mut reader: R, 10 | vendor: &str, 11 | package: &str, 12 | version: &str, 13 | ) -> Result<()> { 14 | let mut buf = String::new(); 15 | reader.read_to_string(&mut buf).await?; 16 | 17 | info!("Parsing APKBUILD"); 18 | let apkbuild = apkbuild::parse(&buf)?; 19 | 20 | for i in 0..apkbuild.source.len() { 21 | let Some(url) = apkbuild.source.get(i) else { 22 | continue; 23 | }; 24 | let Some(sha512) = apkbuild.sha512sums.get(i) else { 25 | continue; 26 | }; 27 | 28 | if !utils::is_possible_tar_artifact(url) { 29 | continue; 30 | } 31 | 32 | let chksum = format!("sha512:{sha512}"); 33 | 34 | // check if already known 35 | if db.resolve_artifact(&chksum).await?.is_none() { 36 | db.insert_task(&Task::new( 37 | format!("fetch:{url}"), 38 | &TaskData::FetchTar { 39 | url: url.to_string(), 40 | compression: None, 41 | success_ref: None, 42 | }, 43 | )?) 44 | .await?; 45 | } 46 | 47 | let r = db::Ref { 48 | chksum, 49 | vendor: vendor.to_string(), 50 | package: package.to_string(), 51 | version: version.to_string(), 52 | filename: Some(url.to_string()), 53 | }; 54 | info!("insert: {r:?}"); 55 | db.insert_ref(&r).await?; 56 | } 57 | 58 | Ok(()) 59 | } 60 | -------------------------------------------------------------------------------- /src/ingest/git.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::ingest; 5 | use fd_lock::RwLock; 6 | use std::io::BufRead; 7 | use std::process::Stdio; 8 | use std::str::FromStr; 9 | use tokio::fs; 10 | use tokio::process; 11 | use tokio::time::{self, Duration}; 12 | 13 | /// Do not tolerate occupying more than 20min of our time 14 | pub const CLONE_TIMEOUT: Duration = Duration::from_secs(20 * 60); 15 | 16 | #[derive(Debug, Clone, PartialEq, Default)] 17 | pub struct GitUrl { 18 | url: String, 19 | tag: Option, 20 | commit: Option, 21 | } 22 | 23 | impl FromStr for GitUrl { 24 | type Err = Error; 25 | 26 | fn from_str(full_url: &str) -> Result { 27 | let url = full_url.strip_prefix("git+").unwrap_or(full_url); 28 | let url = url.trim_end_matches("?signed"); 29 | 30 | let (url, info) = url 31 | .rsplit_once('#') 32 | .map(|(url, info)| (url, Some(info))) 33 | .unwrap_or((url, None)); 34 | 35 | let mut git = GitUrl { 36 | url: url.trim_end_matches("?signed").to_string(), 37 | ..Default::default() 38 | }; 39 | 40 | if let Some(info) = info { 41 | match info.split_once('=') { 42 | Some(("tag", value)) => git.tag = Some(value.to_string()), 43 | Some(("commit", value)) => git.commit = Some(value.to_string()), 44 | _ => return Err(Error::UnknownGitRef(info.to_string())), 45 | }; 46 | } 47 | 48 | Ok(git) 49 | } 50 | } 51 | 52 | pub async fn take_snapshot(db: &db::Client, git: &GitUrl, tmp: &str) -> Result<()> { 53 | fs::create_dir_all(tmp).await?; 54 | let dir = fs::File::open(tmp).await?; 55 | info!("Getting lock on filesystem git workdir..."); 56 | let mut lock = RwLock::new(dir.into_std().await); 57 | let _lock = lock.write(); 58 | debug!("Acquired lock"); 59 | 60 | let reference = if let Some(tag) = &git.tag { 61 | tag 62 | } else if let Some(commit) = &git.commit { 63 | commit 64 | } else { 65 | return Err(Error::InvalidGitRef(git.clone())); 66 | }; 67 | 68 | let path = format!("{}/git", tmp.strip_suffix('/').unwrap_or(tmp)); 69 | if fs::metadata(&path).await.is_ok() { 70 | debug!("Running cleanup of temporary git repository"); 71 | fs::remove_dir_all(&path).await?; 72 | } 73 | 74 | // Checking out a single commit occupies 40GB disk 75 | if [ 76 | "https://chromium.googlesource.com/chromium/src.git", 77 | "https://github.com/chromium/chromium.git", 78 | ] 79 | .contains(&git.url.as_str()) 80 | { 81 | info!("Detected chromium, skipping 🤡: {:?}", git.url); 82 | return Ok(()); 83 | } 84 | 85 | // run git clone 86 | info!("Setting up git repository"); 87 | let status = process::Command::new("git") 88 | .args(["init", "-qb", "main", &path]) 89 | .status() 90 | .await?; 91 | if !status.success() { 92 | return Err(Error::GitError(status)); 93 | } 94 | 95 | // https://gitlab.archlinux.org/pacman/pacman/-/commit/0828a085c146601f21d5e4afb5f396f00de2963b 96 | debug!("Setting up .git/info/attributes to disable .gitattributes"); 97 | fs::write( 98 | format!("{path}/.git/info/attributes"), 99 | b"* -export-subst -export-ignore\n", 100 | ) 101 | .await?; 102 | 103 | debug!("Adding git remote: {:?}", git.url); 104 | let status = process::Command::new("git") 105 | .args(["-C", &path, "remote", "add", "origin", &git.url]) 106 | .status() 107 | .await?; 108 | if !status.success() { 109 | return Err(Error::GitError(status)); 110 | } 111 | 112 | info!( 113 | "Fetching git VCS tree-ish reference from {:?}: {:?}", 114 | git.url, reference 115 | ); 116 | let child = process::Command::new("git") 117 | .args(["-C", &path, "fetch", "origin", reference]) 118 | .status(); 119 | let Ok(status) = time::timeout(CLONE_TIMEOUT, child).await else { 120 | return Err(Error::GitFetchTimeout); 121 | }; 122 | let status = status?; 123 | if !status.success() { 124 | return Err(Error::GitFetchError(status)); 125 | } 126 | 127 | info!("Resolving FETCH_HEAD git ref"); 128 | let output = process::Command::new("git") 129 | .args(["-C", &path, "rev-list", "-n1", "FETCH_HEAD"]) 130 | .output() 131 | .await?; 132 | if !output.status.success() { 133 | return Err(Error::GitFetchError(status)); 134 | } 135 | let Some(Ok(commit)) = output.stdout.lines().next() else { 136 | let output = String::from_utf8_lossy(&output.stdout).into_owned(); 137 | return Err(Error::GitRevParseError(output)); 138 | }; 139 | info!("Resolved ref FETCH_HEAD to git commit: {commit:?}"); 140 | 141 | info!("Taking `git archive` snapshot of FETCH_HEAD"); 142 | let mut child = process::Command::new("git") 143 | .args([ 144 | "-C", 145 | &path, 146 | "-c", 147 | "core.abbrev=no", 148 | "archive", 149 | "--format", 150 | "tar", 151 | "FETCH_HEAD", 152 | ]) 153 | .stdout(Stdio::piped()) 154 | .spawn()?; 155 | 156 | let stdout = child.stdout.take().unwrap(); 157 | let summary = ingest::tar::stream_data(Some(db), stdout, None).await?; 158 | 159 | let status = child.wait().await?; 160 | if !status.success() { 161 | return Err(Error::GitFetchError(status)); 162 | } 163 | 164 | db.insert_alias_from_to( 165 | &format!("git:{commit}"), 166 | &summary.inner_digests.sha256, 167 | "git-archive", 168 | ) 169 | .await?; 170 | 171 | Ok(()) 172 | } 173 | 174 | pub async fn run(args: &args::IngestGit) -> Result<()> { 175 | let db = db::Client::create().await?; 176 | 177 | take_snapshot(&db, &args.git, &args.tmp).await?; 178 | 179 | Ok(()) 180 | } 181 | 182 | #[cfg(test)] 183 | mod tests { 184 | use super::*; 185 | 186 | #[test] 187 | fn parse_git_url() { 188 | let url = "git+https://github.com/curl/curl.git?signed"; 189 | let git = url.parse::().unwrap(); 190 | assert_eq!( 191 | git, 192 | GitUrl { 193 | url: "https://github.com/curl/curl.git".to_string(), 194 | tag: None, 195 | commit: None, 196 | } 197 | ); 198 | } 199 | 200 | #[test] 201 | fn parse_git_url_tag() { 202 | let url = "git+https://github.com/curl/curl.git#tag=curl-8_7_1?signed"; 203 | let git = url.parse::().unwrap(); 204 | assert_eq!( 205 | git, 206 | GitUrl { 207 | url: "https://github.com/curl/curl.git".to_string(), 208 | tag: Some("curl-8_7_1".to_string()), 209 | commit: None, 210 | } 211 | ); 212 | } 213 | 214 | #[test] 215 | fn parse_git_url_commit() { 216 | let url = "git+https://github.com/rapid7/metasploit-framework.git?signed#commit=77fb7ae14f17fd7f4851bca87e0c28c704797591"; 217 | let git = url.parse::().unwrap(); 218 | assert_eq!( 219 | git, 220 | GitUrl { 221 | url: "https://github.com/rapid7/metasploit-framework.git".to_string(), 222 | tag: None, 223 | commit: Some("77fb7ae14f17fd7f4851bca87e0c28c704797591".to_string()), 224 | } 225 | ); 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /src/ingest/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod alpine; 2 | pub mod git; 3 | pub mod pacman; 4 | pub mod rpm; 5 | pub mod tar; 6 | pub mod void; 7 | pub mod wolfi; 8 | -------------------------------------------------------------------------------- /src/ingest/pacman.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::pkgbuild; 5 | use crate::sbom; 6 | use crate::utils; 7 | use async_compression::tokio::bufread::GzipDecoder; 8 | use futures::StreamExt; 9 | use srcinfo::Srcinfo; 10 | use tokio::io::{self, AsyncRead, AsyncReadExt}; 11 | use tokio_tar::Archive; 12 | 13 | #[derive(Debug)] 14 | pub struct Snapshot { 15 | pkgbuild: String, 16 | srcinfo: Option, 17 | } 18 | 19 | impl Snapshot { 20 | pub async fn parse_from_tgz( 21 | db: &db::Client, 22 | reader: R, 23 | ) -> Result { 24 | let reader = io::BufReader::new(reader); 25 | let reader = GzipDecoder::new(reader); 26 | let mut tar = Archive::new(reader); 27 | 28 | let mut pkgbuild = None; 29 | let mut srcinfo = None; 30 | 31 | let mut entries = tar.entries()?; 32 | while let Some(entry) = entries.next().await { 33 | let mut entry = entry?; 34 | let path = entry.path()?; 35 | let Some(file_name) = path.file_name() else { 36 | continue; 37 | }; 38 | match file_name.to_str() { 39 | Some(".SRCINFO") => { 40 | let mut buf = String::new(); 41 | entry.read_to_string(&mut buf).await?; 42 | srcinfo = Some(buf); 43 | } 44 | Some("PKGBUILD") => { 45 | let mut buf = String::new(); 46 | entry.read_to_string(&mut buf).await?; 47 | pkgbuild = Some(buf); 48 | } 49 | filename => { 50 | if let Some(strain) = sbom::detect_from_filename(filename) { 51 | let mut buf = String::new(); 52 | entry.read_to_string(&mut buf).await?; 53 | 54 | let sbom = sbom::Sbom::new(strain, buf)?; 55 | let chksum = db.insert_sbom(&sbom).await?; 56 | let strain = sbom.strain(); 57 | info!("Inserted sbom {strain:?}: {chksum:?}"); 58 | db.insert_task(&db::Task::new( 59 | format!("sbom:{strain}:{chksum}"), 60 | &db::TaskData::IndexSbom { 61 | strain: Some(strain.to_string()), 62 | chksum, 63 | }, 64 | )?) 65 | .await?; 66 | } 67 | } 68 | } 69 | } 70 | 71 | Ok(Snapshot { 72 | pkgbuild: pkgbuild.ok_or(Error::InvalidData)?, 73 | srcinfo, 74 | }) 75 | } 76 | 77 | fn get_from_archvec(vec: &[srcinfo::ArchVec]) -> &[String] { 78 | vec.iter() 79 | .find(|x| x.arch.is_none()) 80 | .map(|e| &e.vec[..]) 81 | .unwrap_or(&[]) 82 | } 83 | 84 | fn source_entries_from_lists( 85 | max: usize, 86 | sources: &[String], 87 | sha256sums: &[String], 88 | sha512sums: &[String], 89 | b2sums: &[String], 90 | ) -> Vec { 91 | let mut out = Vec::new(); 92 | for i in 0..max { 93 | let url = sources.get(i).map(|url| { 94 | url.split_once("::") 95 | .map(|(_filename, url)| url) 96 | .unwrap_or(url) 97 | .to_string() 98 | }); 99 | 100 | // Skip entries that we know for sure are not urls 101 | if let Some(url) = &url { 102 | if !url.contains("://") { 103 | continue; 104 | } 105 | } 106 | 107 | out.push(SourceEntry { 108 | url, 109 | sha256: Self::filter_skip(sha256sums.get(i)), 110 | sha512: Self::filter_skip(sha512sums.get(i)), 111 | blake2b: Self::filter_skip(b2sums.get(i)), 112 | }); 113 | } 114 | out 115 | } 116 | 117 | fn filter_skip(v: Option<&String>) -> Option { 118 | v.filter(|v| *v != "SKIP").cloned() 119 | } 120 | 121 | pub fn source_entries(&self) -> Result> { 122 | if let Some(srcinfo) = &self.srcinfo { 123 | let srcinfo = Srcinfo::parse_buf(srcinfo.as_bytes())?; 124 | let sources = Self::get_from_archvec(&srcinfo.base.source); 125 | let sha256sums = Self::get_from_archvec(&srcinfo.base.sha256sums); 126 | let sha512sums = Self::get_from_archvec(&srcinfo.base.sha512sums); 127 | let b2sums = Self::get_from_archvec(&srcinfo.base.b2sums); 128 | 129 | Ok(Self::source_entries_from_lists( 130 | sources.len(), 131 | sources, 132 | sha256sums, 133 | sha512sums, 134 | b2sums, 135 | )) 136 | } else { 137 | let pkgbuild = pkgbuild::parse(self.pkgbuild.as_bytes())?; 138 | 139 | let max = [ 140 | pkgbuild.sha256sums.len(), 141 | pkgbuild.sha512sums.len(), 142 | pkgbuild.b2sums.len(), 143 | ] 144 | .into_iter() 145 | .max() 146 | .unwrap_or(0); 147 | 148 | Ok(Self::source_entries_from_lists( 149 | max, 150 | &[], 151 | &pkgbuild.sha256sums, 152 | &pkgbuild.sha512sums, 153 | &pkgbuild.b2sums, 154 | )) 155 | } 156 | } 157 | } 158 | 159 | #[derive(Debug)] 160 | pub struct SourceEntry { 161 | url: Option, 162 | sha256: Option, 163 | sha512: Option, 164 | blake2b: Option, 165 | } 166 | 167 | impl SourceEntry { 168 | pub fn preferred_chksum(&self) -> Option { 169 | match (&self.sha256, &self.sha512, &self.blake2b) { 170 | (Some(sha256), _, _) => Some(format!("sha256:{sha256}")), 171 | (None, Some(sha512), _) => Some(format!("sha512:{sha512}")), 172 | (None, None, Some(blake2b)) => Some(format!("blake2b:{blake2b}")), 173 | (None, None, None) => None, 174 | } 175 | } 176 | } 177 | 178 | pub async fn stream_data( 179 | db: &db::Client, 180 | reader: R, 181 | vendor: &str, 182 | package: &str, 183 | version: &str, 184 | prefer_pkgbuild: bool, 185 | ) -> Result<()> { 186 | let mut snapshot = Snapshot::parse_from_tgz(db, reader).await?; 187 | if prefer_pkgbuild { 188 | snapshot.srcinfo = None; 189 | } 190 | 191 | for entry in snapshot.source_entries()? { 192 | debug!("Found source entry: {entry:?}"); 193 | let Some(chksum) = entry.preferred_chksum() else { 194 | continue; 195 | }; 196 | 197 | // TODO: check if already known 198 | if let Some(url) = &entry.url { 199 | if let Some(task) = utils::task_for_url(url) { 200 | db.insert_task(&task).await?; 201 | } 202 | } 203 | 204 | let r = db::Ref { 205 | chksum, 206 | vendor: vendor.to_string(), 207 | package: package.to_string(), 208 | version: version.to_string(), 209 | filename: entry.url, 210 | }; 211 | info!("insert: {r:?}"); 212 | db.insert_ref(&r).await?; 213 | } 214 | 215 | Ok(()) 216 | } 217 | 218 | pub async fn run(args: &args::IngestPacmanSnapshot) -> Result<()> { 219 | let db = db::Client::create().await?; 220 | 221 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 222 | stream_data( 223 | &db, 224 | reader, 225 | &args.vendor, 226 | &args.package, 227 | &args.version, 228 | args.prefer_pkgbuild, 229 | ) 230 | .await?; 231 | 232 | Ok(()) 233 | } 234 | -------------------------------------------------------------------------------- /src/ingest/rpm.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::ingest; 5 | use crate::utils; 6 | use futures::StreamExt; 7 | use std::process::Stdio; 8 | use std::sync::Arc; 9 | use tokio::io::{self, AsyncRead}; 10 | use tokio::process::Command; 11 | use tokio_tar::{Archive, EntryType}; 12 | 13 | pub async fn read_routine( 14 | db: &db::Client, 15 | reader: R, 16 | vendor: String, 17 | package: String, 18 | version: String, 19 | ) -> Result<()> { 20 | let mut tar = Archive::new(reader); 21 | let mut entries = tar.entries()?; 22 | 23 | while let Some(entry) = entries.next().await { 24 | let entry = entry?; 25 | let filename = { 26 | let path = entry.path()?; 27 | debug!("Found entry in .rpm: {:?}", path); 28 | 29 | if entry.header().entry_type() != EntryType::Regular { 30 | continue; 31 | } 32 | 33 | let Some(filename) = path.file_name() else { 34 | continue; 35 | }; 36 | let Some(filename) = filename.to_str() else { 37 | continue; 38 | }; 39 | 40 | filename.to_string() 41 | }; 42 | 43 | // TODO: find a better solution for this, can we just autodetect all regardless of file name? 44 | let compression = if filename.ends_with(".tar.gz") 45 | || filename.ends_with(".tgz") 46 | || filename.ends_with(".crate") 47 | { 48 | Some("gz") 49 | } else if filename.ends_with(".tar.xz") { 50 | Some("xz") 51 | } else if filename.ends_with(".tar.bz2") { 52 | Some("bz2") 53 | } else if filename.ends_with(".tar") { 54 | None 55 | } else { 56 | continue; 57 | }; 58 | 59 | // in case of chromium, calculate the checksum but do not import 60 | let tar_db = if filename.starts_with("chromium-") { 61 | None 62 | } else { 63 | Some(db) 64 | }; 65 | let summary = ingest::tar::stream_data(tar_db, entry, compression).await?; 66 | 67 | let r = db::Ref { 68 | chksum: summary.outer_digests.sha256.clone(), 69 | vendor: vendor.to_string(), 70 | package: package.to_string(), 71 | version: version.to_string(), 72 | filename: Some(filename.to_string()), 73 | }; 74 | info!("insert ref: {r:?}"); 75 | db.insert_ref(&r).await?; 76 | } 77 | Ok(()) 78 | } 79 | 80 | pub async fn stream_data( 81 | db: Arc, 82 | mut reader: R, 83 | vendor: String, 84 | package: String, 85 | version: String, 86 | ) -> Result<()> { 87 | let mut child = Command::new("bsdtar") 88 | .args(["-c", "@-"]) 89 | .stdin(Stdio::piped()) 90 | .stdout(Stdio::piped()) 91 | .spawn()?; 92 | 93 | let mut stdin = child.stdin.take().unwrap(); 94 | let writer = async { 95 | let n = io::copy(&mut reader, &mut stdin).await; 96 | drop(stdin); 97 | n 98 | }; 99 | 100 | let stdout = child.stdout.take().unwrap(); 101 | let reader = 102 | tokio::spawn(async move { read_routine(&db, stdout, vendor, package, version).await }); 103 | 104 | let (reader, writer) = tokio::join!(reader, writer); 105 | debug!("Sent {} bytes to child process", writer?); 106 | let status = child.wait().await?; 107 | if !status.success() { 108 | return Err(Error::ChildExit(status)); 109 | } 110 | debug!("Finished processing .rpm"); 111 | reader? 112 | } 113 | 114 | pub async fn run(args: &args::IngestRpm) -> Result<()> { 115 | let db = db::Client::create().await?; 116 | let db = Arc::new(db); 117 | 118 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 119 | stream_data( 120 | db.clone(), 121 | reader, 122 | args.vendor.to_string(), 123 | args.package.to_string(), 124 | args.version.to_string(), 125 | ) 126 | .await?; 127 | 128 | Ok(()) 129 | } 130 | -------------------------------------------------------------------------------- /src/ingest/tar.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::chksums::{Checksums, Hasher}; 3 | use crate::compression::Decompressor; 4 | use crate::db; 5 | use crate::errors::*; 6 | use crate::sbom; 7 | use digest::Digest; 8 | use futures::stream::StreamExt; 9 | use serde::{Deserialize, Serialize}; 10 | use sha2::Sha256; 11 | use tokio::fs::File; 12 | use tokio::io::{self, AsyncRead, AsyncReadExt}; 13 | use tokio_tar::{Archive, EntryType}; 14 | 15 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 16 | pub struct Entry { 17 | pub path: String, 18 | #[serde(skip_serializing_if = "Option::is_none")] 19 | pub digest: Option, 20 | #[serde(flatten)] 21 | pub metadata: Metadata, 22 | } 23 | 24 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 25 | pub struct Metadata { 26 | #[serde(skip_serializing_if = "Option::is_none")] 27 | pub mode: Option, 28 | #[serde(skip_serializing_if = "Option::is_none")] 29 | pub links_to: Option, 30 | #[serde(skip_serializing_if = "Option::is_none")] 31 | pub mtime: Option, 32 | #[serde(skip_serializing_if = "Option::is_none")] 33 | pub uid: Option, 34 | #[serde(skip_serializing_if = "Option::is_none")] 35 | pub username: Option, 36 | #[serde(skip_serializing_if = "Option::is_none")] 37 | pub gid: Option, 38 | #[serde(skip_serializing_if = "Option::is_none")] 39 | pub groupname: Option, 40 | } 41 | 42 | impl Metadata { 43 | fn from_tar_header( 44 | entry: &tokio_tar::Entry, 45 | ) -> Result> { 46 | let header = entry.header(); 47 | let (is_file, links_to) = match header.entry_type() { 48 | EntryType::XGlobalHeader => return Ok(None), 49 | EntryType::Regular => (true, None), 50 | EntryType::Symlink => { 51 | let link = entry.link_name()?.map(|path| { 52 | let path = path.to_string_lossy(); 53 | LinksTo::Symbolic(path.into_owned()) 54 | }); 55 | (false, link) 56 | } 57 | EntryType::Link => { 58 | let link = entry.link_name()?.map(|path| { 59 | let path = path.to_string_lossy(); 60 | LinksTo::Hard(path.into_owned()) 61 | }); 62 | (false, link) 63 | } 64 | _ => (false, None), 65 | }; 66 | 67 | let metadata = Metadata { 68 | mode: header.mode().ok().map(|mode| format!("0o{mode:o}")), 69 | links_to, 70 | mtime: header.mtime().ok(), 71 | uid: header.uid().ok(), 72 | username: header.username().ok().flatten().map(String::from), 73 | gid: header.gid().ok(), 74 | groupname: header.groupname().ok().flatten().map(String::from), 75 | }; 76 | Ok(Some((metadata, is_file))) 77 | } 78 | } 79 | 80 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 81 | #[serde(rename_all = "snake_case")] 82 | pub enum LinksTo { 83 | Hard(String), 84 | Symbolic(String), 85 | } 86 | 87 | #[derive(Debug, PartialEq)] 88 | pub struct TarSummary { 89 | pub inner_digests: Checksums, 90 | pub outer_digests: Checksums, 91 | pub files: Vec, 92 | pub sbom_refs: Vec, 93 | } 94 | 95 | pub async fn stream_data( 96 | db: Option<&db::Client>, 97 | reader: R, 98 | compression: Option<&str>, 99 | ) -> Result { 100 | // Setup decompressor 101 | let reader = io::BufReader::new(Hasher::new(reader)); 102 | let (reader, outer_label) = match compression { 103 | Some("gz") => (Decompressor::gz(reader), "gz(tar)"), 104 | Some("xz") => (Decompressor::xz(reader), "xz(tar)"), 105 | Some("bz2") => (Decompressor::bz2(reader), "bz2(tar)"), 106 | None => (Decompressor::Plain(reader), "tar"), 107 | unknown => panic!("Unknown compression algorithm: {unknown:?}"), 108 | }; 109 | let reader = Hasher::new(reader); 110 | 111 | // Open archive 112 | let mut tar = Archive::new(reader); 113 | let mut files = Vec::new(); 114 | let mut sbom_refs = Vec::new(); 115 | { 116 | let mut entries = tar.entries()?; 117 | while let Some(entry) = entries.next().await { 118 | let mut entry = entry?; 119 | let Some((metadata, is_file)) = Metadata::from_tar_header(&entry)? else { 120 | continue; 121 | }; 122 | 123 | let path = entry.path()?; 124 | let filename = path.file_name().and_then(|f| f.to_str()).map(String::from); 125 | let path = path.to_string_lossy().into_owned(); 126 | 127 | let digest = if is_file { 128 | let sbom = sbom::detect_from_filename(filename.as_deref()); 129 | 130 | let mut buf = [0; 4096]; 131 | let mut data = Vec::::new(); 132 | let mut sha256 = Sha256::new(); 133 | loop { 134 | let n = entry.read(&mut buf).await?; 135 | if n == 0 { 136 | break; 137 | } 138 | let buf = &buf[..n]; 139 | sha256.update(buf); 140 | if sbom.is_some() { 141 | data.extend(buf); 142 | } 143 | } 144 | 145 | let digest = format!("sha256:{}", hex::encode(sha256.finalize())); 146 | 147 | if let Some(sbom) = sbom { 148 | if let Ok(data) = String::from_utf8(data) { 149 | if let Some(db) = db { 150 | let sbom = sbom::Sbom::new(sbom, data)?; 151 | let chksum = db.insert_sbom(&sbom).await?; 152 | let strain = sbom.strain(); 153 | info!("Inserted sbom {strain:?}: {digest:?}"); 154 | sbom_refs.push(sbom::Ref { 155 | strain, 156 | chksum: chksum.clone(), 157 | path: path.clone(), 158 | }); 159 | db.insert_task(&db::Task::new( 160 | format!("sbom:{strain}:{chksum}"), 161 | &db::TaskData::IndexSbom { 162 | strain: Some(strain.to_string()), 163 | chksum, 164 | }, 165 | )?) 166 | .await?; 167 | } 168 | } 169 | } 170 | 171 | Some(digest) 172 | } else { 173 | None 174 | }; 175 | 176 | let entry = Entry { 177 | path: path.to_string(), 178 | digest, 179 | metadata, 180 | }; 181 | debug!("Found entry={entry:?}"); 182 | 183 | files.push(entry); 184 | } 185 | } 186 | let Ok(mut reader) = tar.into_inner() else { 187 | panic!("can't get hasher from tar reader") 188 | }; 189 | 190 | // Consume any remaining data 191 | io::copy(&mut reader, &mut io::sink()).await?; 192 | 193 | // Determine hashes 194 | let (reader, inner_digests) = reader.digests(); 195 | info!("Found digest for inner .tar: {inner_digests:?}"); 196 | let reader = reader.into_inner().into_inner(); 197 | 198 | let (_stream, outer_digests) = reader.digests(); 199 | info!("Found digests for outer compressed tar: {outer_digests:?}"); 200 | 201 | if let Some(db) = db { 202 | // Insert into database 203 | db.insert_artifact(&inner_digests.sha256, &files).await?; 204 | db.register_chksums_aliases(&inner_digests, &inner_digests.sha256, "tar") 205 | .await?; 206 | db.register_chksums_aliases(&outer_digests, &inner_digests.sha256, outer_label) 207 | .await?; 208 | 209 | for sbom in &sbom_refs { 210 | db.insert_sbom_ref(&inner_digests.sha256, sbom.strain, &sbom.chksum, &sbom.path) 211 | .await?; 212 | } 213 | } 214 | 215 | Ok(TarSummary { 216 | inner_digests, 217 | outer_digests, 218 | files, 219 | sbom_refs, 220 | }) 221 | } 222 | 223 | pub async fn run(args: &args::IngestTar) -> Result<()> { 224 | let db = db::Client::create().await?; 225 | 226 | let input: Box = if let Some(path) = &args.file { 227 | Box::new(File::open(path).await?) 228 | } else { 229 | Box::new(io::stdin()) 230 | }; 231 | 232 | stream_data(Some(&db), input, args.compression.as_deref()).await?; 233 | 234 | Ok(()) 235 | } 236 | 237 | #[cfg(test)] 238 | mod tests { 239 | use super::*; 240 | 241 | #[test] 242 | fn test_minimal_json_format() { 243 | let txt = serde_json::to_string_pretty(&Entry { 244 | path: "foo-1.0/".to_string(), 245 | digest: None, 246 | metadata: Metadata { 247 | mode: Some("0o775".to_string()), 248 | links_to: None, 249 | mtime: Some(1337), 250 | uid: Some(0), 251 | username: None, 252 | gid: Some(0), 253 | groupname: None, 254 | }, 255 | }) 256 | .unwrap(); 257 | assert_eq!( 258 | txt, 259 | r#"{ 260 | "path": "foo-1.0/", 261 | "mode": "0o775", 262 | "mtime": 1337, 263 | "uid": 0, 264 | "gid": 0 265 | }"# 266 | ); 267 | } 268 | 269 | #[test] 270 | fn test_regular_json_format() { 271 | let txt = serde_json::to_string_pretty(&Entry { 272 | path: "foo-1.0/original_file".to_string(), 273 | digest: None, 274 | metadata: Metadata { 275 | mode: Some("0o775".to_string()), 276 | links_to: None, 277 | mtime: Some(1337), 278 | uid: Some(1000), 279 | username: Some("user".to_string()), 280 | gid: Some(1000), 281 | groupname: Some("user".to_string()), 282 | }, 283 | }) 284 | .unwrap(); 285 | assert_eq!( 286 | txt, 287 | r#"{ 288 | "path": "foo-1.0/original_file", 289 | "mode": "0o775", 290 | "mtime": 1337, 291 | "uid": 1000, 292 | "username": "user", 293 | "gid": 1000, 294 | "groupname": "user" 295 | }"# 296 | ); 297 | } 298 | 299 | #[tokio::test] 300 | async fn test_ingest_tar() { 301 | let data = [ 302 | 0x1f, 0x8b, 0x8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0xed, 0xd5, 0xed, 0xa, 0x82, 0x30, 303 | 0x14, 0x6, 0xe0, 0xfd, 0xee, 0x2a, 0x76, 0x3, 0xd9, 0xbe, 0x77, 0x23, 0xfd, 0xf, 0x21, 304 | 0xcd, 0x91, 0x29, 0xa8, 0xfd, 0xe8, 0xee, 0x9b, 0xd4, 0x20, 0xfa, 0x10, 0x2, 0xa7, 305 | 0x94, 0xef, 0x83, 0x30, 0x50, 0xc1, 0xc3, 0xde, 0x9d, 0x63, 0x5e, 0xd7, 0x6b, 0x9e, 306 | 0xb0, 0xd, 0x89, 0x88, 0x79, 0x56, 0xeb, 0x7e, 0xe5, 0x56, 0xb3, 0xc7, 0x35, 0x20, 307 | 0x5c, 0x19, 0xee, 0x6f, 0x5a, 0x61, 0x2c, 0x61, 0x9c, 0x19, 0x2b, 0x9, 0xd5, 0x31, 308 | 0x8b, 0xa, 0xce, 0x6d, 0x97, 0x36, 0x94, 0xfa, 0x35, 0x6b, 0x86, 0xdf, 0x1b, 0x7e, 309 | 0xfe, 0xa3, 0xf2, 0x7b, 0xfe, 0x75, 0xe3, 0xe, 0xae, 0x4a, 0xcb, 0x5d, 0xee, 0xca, 310 | 0x6c, 0xe4, 0x6f, 0xf4, 0x1, 0x1b, 0xa5, 0x3e, 0xe7, 0x2f, 0xe5, 0x53, 0xfe, 0x52, 311 | 0x71, 0x4d, 0x28, 0x1b, 0xb9, 0x8e, 0xb7, 0x16, 0x9e, 0xff, 0xb6, 0x70, 0x2d, 0xf5, 312 | 0x57, 0x57, 0x64, 0x34, 0x9c, 0x1, 0xda, 0x9f, 0x81, 0x64, 0x35, 0x77, 0x69, 0x30, 313 | 0x81, 0xd0, 0xff, 0x45, 0xda, 0xec, 0x4b, 0x57, 0x1d, 0x67, 0xe9, 0xff, 0x97, 0xf9, 314 | 0x6f, 0x5, 0xf3, 0xf3, 0x9f, 0x4f, 0x31, 0x9c, 0x16, 0xde, 0xff, 0x61, 0x8b, 0xdb, 315 | 0xcb, 0x29, 0x56, 0xfc, 0xb7, 0xff, 0xbf, 0xb5, 0x5f, 0xe4, 0x6f, 0x98, 0x30, 0x84, 316 | 0x8a, 0x98, 0xb9, 0x7, 0xb, 0xcf, 0x1f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 317 | 0x0, 0x0, 0x0, 0x0, 0x0, 0xfe, 0xc3, 0x15, 0xdc, 0x23, 0xbf, 0x4f, 0x0, 0x28, 0x0, 0x0, 318 | ]; 319 | 320 | let summary = stream_data(None, &data[..], Some("gz")).await.unwrap(); 321 | assert_eq!(summary, TarSummary { 322 | inner_digests: Checksums { 323 | sha256: "sha256:55f514c48ef9359b792e23abbad6ca8a1e999065ba8879d8717fecb52efc1ea0".to_string(), 324 | sha512: "sha512:d2d14d47a23f20ef522b76765b9feb80d6d66f06b97d8ba8cbabebdee483880d31cf0522eb318613d94a808cde4e8ef8860733f8bde41dd7c4fca3b82cd354eb".to_string(), 325 | blake2b: "blake2b:601ba064ff937c07e0695408111694230af5eeef97bd3d783d619d88dcb4a434cebb38d2eb6fc7a3b9b36e9e76676c18ba237c3eea922fe7cf41d61bcf86f65a".to_string(), 326 | }, 327 | outer_digests: Checksums { 328 | sha256: "sha256:9390fb29874d4e70ae4e8379aa7fc396e0a44cacf8256aa8d87fdec9b56261d4".to_string(), 329 | sha512: "sha512:8b981a89ec6735f0c1de0f7d58cbd30921b9fdf645b68330ab1080b2d563410acb3ae77881a2817438ca6405eaafbb62f131a371f0f0e5fcb91727310fb7a370".to_string(), 330 | blake2b: "blake2b:47e872432ce32b7cecc554cc9c67d12553e62fed8f42768a43e64f16ca72e9679b0f539e7f47bf89ffe658be7b3a29f857d4ce244523dce181587c42ec4c7533".to_string(), 331 | }, 332 | files: vec![ 333 | Entry { 334 | path: "foo-1.0/".to_string(), 335 | digest: None, 336 | metadata: Metadata { 337 | mode: Some("0o755".to_string()), 338 | links_to: None, 339 | mtime: Some(1713888951), 340 | uid: Some(1000), 341 | username: Some("user".to_string()), 342 | gid: Some(1000), 343 | groupname: Some("user".to_string()), 344 | } 345 | }, 346 | Entry { 347 | path: "foo-1.0/original_file".to_string(), 348 | digest: Some("sha256:56d9fc4585da4f39bbc5c8ec953fb7962188fa5ed70b2dd5a19dc82df997ba5e".to_string()), 349 | metadata: Metadata { 350 | mode: Some("0o644".to_string()), 351 | links_to: None, 352 | mtime: Some(1713888951), 353 | uid: Some(1000), 354 | username: Some("user".to_string()), 355 | gid: Some(1000), 356 | groupname: Some("user".to_string()), 357 | } 358 | }, 359 | Entry { 360 | path: "foo-1.0/hardlink_file".to_string(), 361 | digest: None, 362 | metadata: Metadata { 363 | mode: Some("0o644".to_string()), 364 | links_to: Some(LinksTo::Hard("foo-1.0/original_file".to_string())), 365 | mtime: Some(1713888951), 366 | uid: Some(1000), 367 | username: Some("user".to_string()), 368 | gid: Some(1000), 369 | groupname: Some("user".to_string()), 370 | } 371 | }, 372 | Entry { 373 | path: "foo-1.0/symlink_file".to_string(), 374 | digest: None, 375 | metadata: Metadata { 376 | mode: Some("0o777".to_string()), 377 | links_to: Some(LinksTo::Symbolic("original_file".to_string())), 378 | mtime: Some(1713888951), 379 | uid: Some(1000), 380 | username: Some("user".to_string()), 381 | gid: Some(1000), 382 | groupname: Some("user".to_string()), 383 | } 384 | }, 385 | ], 386 | sbom_refs: vec![], 387 | }); 388 | } 389 | } 390 | -------------------------------------------------------------------------------- /src/ingest/void.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db::{self, Task, TaskData}; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use crate::void_template; 6 | use async_compression::tokio::bufread::GzipDecoder; 7 | use futures::StreamExt; 8 | use tokio::io::{self, AsyncRead, AsyncReadExt}; 9 | use tokio_tar::Archive; 10 | 11 | pub async fn extract_template( 12 | reader: R, 13 | srcpkg: &str, 14 | ) -> Result> { 15 | let reader = io::BufReader::new(reader); 16 | let reader = GzipDecoder::new(reader); 17 | let mut tar = Archive::new(reader); 18 | 19 | let mut entries = tar.entries()?; 20 | while let Some(entry) = entries.next().await { 21 | let mut entry = entry?; 22 | let path = entry.path()?; 23 | let Some(path) = path.to_str() else { continue }; 24 | let Some((_, path)) = path.split_once("/srcpkgs/") else { 25 | continue; 26 | }; 27 | let Some(path) = path.strip_suffix("/template") else { 28 | continue; 29 | }; 30 | 31 | if path != srcpkg { 32 | continue; 33 | } 34 | 35 | let mut buf = String::new(); 36 | entry.read_to_string(&mut buf).await?; 37 | return Ok(Some(buf)); 38 | } 39 | 40 | Ok(None) 41 | } 42 | 43 | pub async fn stream_data( 44 | db: &db::Client, 45 | reader: R, 46 | vendor: &str, 47 | srcpkg: &str, 48 | package: &str, 49 | version: &str, 50 | ) -> Result<()> { 51 | let Some(template) = extract_template(reader, srcpkg).await? else { 52 | return Ok(()); 53 | }; 54 | let template = void_template::parse(&template)?; 55 | debug!("Found Void Linux template: {template:?}"); 56 | 57 | for i in 0..template.distfiles.len() { 58 | let Some(url) = template.distfiles.get(i) else { 59 | continue; 60 | }; 61 | let Some(sha256) = template.checksum.get(i) else { 62 | continue; 63 | }; 64 | 65 | if sha256.len() != 64 { 66 | warn!("Unexpected checksum length for Void Linux: {sha256:?}"); 67 | continue; 68 | } 69 | let chksum = format!("sha256:{sha256}"); 70 | 71 | if !utils::is_possible_tar_artifact(url) { 72 | continue; 73 | } 74 | 75 | if db.resolve_artifact(&chksum).await?.is_none() { 76 | db.insert_task(&Task::new( 77 | format!("fetch:{url}"), 78 | &TaskData::FetchTar { 79 | url: url.to_string(), 80 | compression: None, 81 | success_ref: None, 82 | }, 83 | )?) 84 | .await?; 85 | } 86 | 87 | let r = db::Ref { 88 | chksum, 89 | vendor: vendor.to_string(), 90 | package: package.to_string(), 91 | version: version.to_string(), 92 | filename: Some(url.to_string()), 93 | }; 94 | info!("insert: {r:?}"); 95 | db.insert_ref(&r).await?; 96 | } 97 | 98 | Ok(()) 99 | } 100 | 101 | pub async fn run(args: &args::IngestVoid) -> Result<()> { 102 | let db = db::Client::create().await?; 103 | 104 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 105 | stream_data( 106 | &db, 107 | reader, 108 | &args.vendor, 109 | &args.srcpkg, 110 | &args.package, 111 | &args.version, 112 | ) 113 | .await?; 114 | 115 | Ok(()) 116 | } 117 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | pub mod alias; 2 | pub mod apkbuild; 3 | pub mod apt; 4 | pub mod args; 5 | pub mod chksums; 6 | pub mod compression; 7 | pub mod db; 8 | pub mod errors; 9 | pub mod ingest; 10 | pub mod pkgbuild; 11 | pub mod reindex; 12 | pub mod sbom; 13 | pub mod sync; 14 | pub mod utils; 15 | pub mod void_template; 16 | pub mod web; 17 | pub mod worker; 18 | pub mod yocto; 19 | 20 | use crate::args::{Args, Plumbing, SubCommand}; 21 | use crate::errors::*; 22 | use clap::Parser; 23 | use env_logger::Env; 24 | use tokio::io::{self, AsyncReadExt}; 25 | 26 | #[tokio::main] 27 | async fn main() -> Result<()> { 28 | let args = Args::parse(); 29 | let log_level = match args.verbose { 30 | 0 => "what_the_src=info", 31 | 1 => "info,what_the_src=debug", 32 | 2 => "debug", 33 | 3 => "debug,what_the_src=trace", 34 | _ => "trace", 35 | }; 36 | env_logger::Builder::from_env(Env::default().default_filter_or(log_level)).init(); 37 | 38 | dotenvy::dotenv().ok(); 39 | 40 | match args.subcommand { 41 | SubCommand::Web(args) => web::run(&args).await, 42 | SubCommand::Worker(args) => worker::run(&args).await, 43 | SubCommand::Plumbing(Plumbing::IngestTar(args)) => ingest::tar::run(&args).await, 44 | SubCommand::Plumbing(Plumbing::IngestGit(args)) => ingest::git::run(&args).await, 45 | SubCommand::Plumbing(Plumbing::IngestPacmanSnapshot(args)) => { 46 | ingest::pacman::run(&args).await 47 | } 48 | SubCommand::Plumbing(Plumbing::IngestRpm(args)) => ingest::rpm::run(&args).await, 49 | SubCommand::Plumbing(Plumbing::IngestWolfi(args)) => ingest::wolfi::run(&args).await, 50 | SubCommand::Plumbing(Plumbing::IngestVoid(args)) => ingest::void::run(&args).await, 51 | SubCommand::Plumbing(Plumbing::IngestSbom(args)) => sbom::run(&args).await, 52 | SubCommand::Plumbing(Plumbing::ParsePkgbuild(_args)) => { 53 | let mut bytes = Vec::new(); 54 | let mut stdin = io::stdin(); 55 | stdin.read_to_end(&mut bytes).await?; 56 | 57 | let pkgbuild = pkgbuild::parse(&bytes)?; 58 | println!("pkgbuild={pkgbuild:?}"); 59 | Ok(()) 60 | } 61 | SubCommand::Plumbing(Plumbing::SyncAlpine(args)) => sync::alpine::run(&args).await, 62 | SubCommand::Plumbing(Plumbing::SyncApt(args)) => sync::apt::run(&args).await, 63 | SubCommand::Plumbing(Plumbing::SyncPacman(args)) => sync::pacman::run(&args).await, 64 | SubCommand::Plumbing(Plumbing::SyncLiveBootstrap(args)) => { 65 | sync::live_bootstrap::run(&args).await 66 | } 67 | SubCommand::Plumbing(Plumbing::SyncRpm(args)) => sync::rpm::run(&args).await, 68 | SubCommand::Plumbing(Plumbing::SyncGentoo(args)) => sync::gentoo::run(&args).await, 69 | SubCommand::Plumbing(Plumbing::SyncHomebrew(args)) => sync::homebrew::run(&args).await, 70 | SubCommand::Plumbing(Plumbing::SyncGuix(args)) => sync::guix::run(&args).await, 71 | SubCommand::Plumbing(Plumbing::SyncVoid(args)) => sync::void::run(&args).await, 72 | SubCommand::Plumbing(Plumbing::SyncYocto(args)) => sync::yocto::run(&args).await, 73 | SubCommand::Plumbing(Plumbing::SyncStagex(args)) => sync::stagex::run(&args).await, 74 | SubCommand::Plumbing(Plumbing::AddRef(args)) => alias::run(&args).await, 75 | SubCommand::Plumbing(Plumbing::ReindexUrl(args)) => reindex::run_url(&args).await, 76 | SubCommand::Plumbing(Plumbing::ReindexSbom(args)) => reindex::run_sbom(&args).await, 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/pkgbuild.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use std::str; 3 | use yash_syntax::syntax::{self, TextUnit, Value, WordUnit}; 4 | 5 | #[derive(Debug, Default, PartialEq)] 6 | pub struct Pkgbuild { 7 | pub sha256sums: Vec, 8 | pub sha512sums: Vec, 9 | pub b2sums: Vec, 10 | } 11 | 12 | pub fn parse(bytes: &[u8]) -> Result { 13 | let script = str::from_utf8(bytes)?; 14 | let parsed: syntax::List = script 15 | .parse() 16 | .map_err(|err| Error::InvalidPkgbuild(format!("{err:#?}")))?; 17 | 18 | let mut pkgbuild = Pkgbuild::default(); 19 | 20 | for item in &parsed.0 { 21 | for cmd in &item.and_or.first.commands { 22 | let syntax::Command::Simple(cmd) = cmd.as_ref() else { 23 | continue; 24 | }; 25 | for assign in &cmd.assigns { 26 | let name = assign.name.as_str(); 27 | 28 | // handle bash-style `+=` assignments 29 | let name = name.strip_suffix('+').unwrap_or(name); 30 | debug!("Found assignment to {name:?}"); 31 | 32 | let target = match name { 33 | "sha256sums" => &mut pkgbuild.sha256sums, 34 | "sha512sums" => &mut pkgbuild.sha512sums, 35 | "b2sums" => &mut pkgbuild.b2sums, 36 | _ => continue, 37 | }; 38 | 39 | let Value::Array(values) = &assign.value else { 40 | continue; 41 | }; 42 | 43 | for value in values { 44 | for unit in &value.units { 45 | trace!("Found word unit: {unit:?}"); 46 | 47 | match unit { 48 | WordUnit::SingleQuote(text) => target.push(text.to_string()), 49 | WordUnit::DoubleQuote(text) => { 50 | let mut s = String::new(); 51 | for unit in &text.0 { 52 | if let TextUnit::Literal(chr) = unit { 53 | s.push(*chr); 54 | } 55 | } 56 | target.push(s); 57 | } 58 | other => { 59 | return Err(Error::InvalidPkgbuild(format!( 60 | "Unsupported word unit: {other:?}" 61 | ))) 62 | } 63 | } 64 | } 65 | } 66 | } 67 | } 68 | } 69 | 70 | Ok(pkgbuild) 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | use super::*; 76 | 77 | fn init() { 78 | let _ = env_logger::builder().is_test(true).try_init(); 79 | } 80 | 81 | #[test] 82 | fn test_parse() { 83 | init(); 84 | 85 | let script = b"sha256sums=('7a1258a5dfc48c54cea1092adddb6bcfb1fcf19c7272c0a6a9e1d2d7daee6e12') 86 | sha256sums+=(\"f9a4925f7d7bb7de54e17cd9ad7c584dfae88ad182d943b79cf403425000f128\") 87 | b2sums=('cd594be73fcf632544195d09518901b1055ae86dcf463a5d446a83beba66073c70a9dfb75efd9d826c2ecf7215ab6cd76128a20104d5ef4ea57470061d2e29bf' 88 | 'f4f89b720bcbe23c5413c6cbc2d0793d8e379fc53861a6fbd83f506e56a86132bb92236498b4357310b09e51fd05aa5ccc941649a4f205fb4e53cb6bc32cdd64') 89 | "; 90 | let pkgbuild = parse(script).unwrap(); 91 | assert_eq!( 92 | pkgbuild, 93 | Pkgbuild { 94 | sha256sums: vec![ 95 | "7a1258a5dfc48c54cea1092adddb6bcfb1fcf19c7272c0a6a9e1d2d7daee6e12".to_string(), 96 | "f9a4925f7d7bb7de54e17cd9ad7c584dfae88ad182d943b79cf403425000f128".to_string(), 97 | ], 98 | sha512sums: vec![], 99 | b2sums: vec![ 100 | "cd594be73fcf632544195d09518901b1055ae86dcf463a5d446a83beba66073c70a9dfb75efd9d826c2ecf7215ab6cd76128a20104d5ef4ea57470061d2e29bf".to_string(), 101 | "f4f89b720bcbe23c5413c6cbc2d0793d8e379fc53861a6fbd83f506e56a86132bb92236498b4357310b09e51fd05aa5ccc941649a4f205fb4e53cb6bc32cdd64".to_string(), 102 | ], 103 | } 104 | ); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/reindex.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::sbom; 5 | use crate::utils; 6 | use futures::StreamExt; 7 | use sqlx::types::chrono::Utc; 8 | 9 | pub async fn run_url(args: &args::ReindexUrl) -> Result<()> { 10 | let db = db::Client::create().await?; 11 | 12 | let mut scheduled = 0; 13 | let now = Utc::now(); 14 | 15 | let stream = db.get_all_artifacts_by_age(); 16 | tokio::pin!(stream); 17 | while let Some(artifact) = stream.next().await { 18 | let artifact = artifact?; 19 | 20 | if let Some(limit) = &args.limit { 21 | if scheduled >= *limit { 22 | info!("Reached schedule limit of {limit} items, exiting"); 23 | break; 24 | } 25 | } 26 | 27 | if let Some(age) = &args.age { 28 | let delta = now.signed_duration_since(artifact.last_imported); 29 | if delta.num_days() < *age { 30 | // since we sort by age, no further artifact will match 31 | break; 32 | } 33 | } 34 | 35 | let refs = db.get_all_refs_for(&artifact.chksum).await?; 36 | 37 | let mut refs = refs 38 | .into_iter() 39 | .flat_map(|r| r.filename) 40 | .filter(|filename| { 41 | if let Some(filter) = &args.filter { 42 | filename.contains(filter) 43 | } else { 44 | true 45 | } 46 | }) 47 | .collect::>(); 48 | fastrand::shuffle(&mut refs); 49 | 50 | let Some(filename) = refs.into_iter().next() else { 51 | continue; 52 | }; 53 | 54 | if let Some(task) = utils::task_for_url(&filename) { 55 | info!("Inserting task: {task:?}"); 56 | db.insert_task(&task).await?; 57 | scheduled += 1; 58 | } 59 | } 60 | 61 | Ok(()) 62 | } 63 | 64 | pub async fn run_sbom(args: &args::ReindexSbom) -> Result<()> { 65 | let db = db::Client::create().await?; 66 | 67 | let mut scheduled = 0; 68 | let stream = db.get_all_sboms(); 69 | tokio::pin!(stream); 70 | while let Some(sbom) = stream.next().await { 71 | let sbom = sbom?; 72 | 73 | if let Some(strain) = &args.strain { 74 | if *strain != sbom.strain { 75 | continue; 76 | } 77 | } 78 | 79 | if let Some(limit) = &args.limit { 80 | if scheduled >= *limit { 81 | info!("Reached schedule limit of {limit} items, exiting"); 82 | break; 83 | } 84 | } 85 | 86 | let chksum = &sbom.chksum; 87 | let sbom = match sbom::Sbom::try_from(&sbom) { 88 | Ok(sbom) => sbom, 89 | Err(err) => { 90 | error!("Failed to parse sbom: {err:#}"); 91 | continue; 92 | } 93 | }; 94 | 95 | let strain = sbom.strain(); 96 | info!("Indexing sbom ({strain}: {chksum:?}"); 97 | if let Err(err) = sbom::index(&db, &sbom).await { 98 | error!("Failed to index sbom: {err:#}"); 99 | continue; 100 | } 101 | scheduled += 1; 102 | } 103 | 104 | Ok(()) 105 | } 106 | -------------------------------------------------------------------------------- /src/sbom/cargo.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use crate::sbom; 3 | use serde::{Deserialize, Serialize}; 4 | use std::collections::VecDeque; 5 | 6 | pub const STRAIN: &str = "cargo-lock"; 7 | pub const VENDOR: &str = "crates.io"; 8 | 9 | #[derive(Debug, PartialEq)] 10 | pub struct CargoLock { 11 | pub data: String, 12 | } 13 | 14 | impl CargoLock { 15 | pub fn parse(&self) -> Result { 16 | let toml = toml::from_str(&self.data)?; 17 | Ok(toml) 18 | } 19 | } 20 | 21 | #[derive(Debug, PartialEq, Deserialize)] 22 | pub struct ParsedLock { 23 | version: Option, 24 | #[serde(default, rename = "package")] 25 | packages: VecDeque, 26 | } 27 | 28 | impl Iterator for ParsedLock { 29 | type Item = Result; 30 | 31 | fn next(&mut self) -> Option { 32 | let package = self.packages.pop_front()?; 33 | match serde_json::from_value::(package) { 34 | Ok(pkg) => Some(Ok(pkg.into())), 35 | Err(err) => Some(Err(err.into())), 36 | } 37 | } 38 | } 39 | 40 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 41 | pub struct Packagev3 { 42 | pub name: String, 43 | pub version: String, 44 | pub source: Option, 45 | pub checksum: Option, 46 | } 47 | 48 | impl From for sbom::Package { 49 | fn from(sbom: Packagev3) -> Self { 50 | let official_registry = 51 | sbom.source.as_deref() == Some("registry+https://github.com/rust-lang/crates.io-index"); 52 | Self { 53 | name: sbom.name, 54 | version: sbom.version, 55 | checksum: sbom.checksum.map(|chksum| format!("sha256:{chksum}")), 56 | official_registry, 57 | } 58 | } 59 | } 60 | 61 | #[cfg(test)] 62 | mod tests { 63 | use crate::sbom::{Package, Sbom}; 64 | 65 | #[test] 66 | fn test_parse_cargo_lock() { 67 | let data = r#"# This file is automatically @generated by Cargo. 68 | # It is not intended for manual editing. 69 | version = 3 70 | 71 | [[package]] 72 | name = "aho-corasick" 73 | version = "1.1.2" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 76 | dependencies = [ 77 | "memchr", 78 | ] 79 | 80 | [[package]] 81 | name = "anyhow" 82 | version = "1.0.79" 83 | source = "registry+https://github.com/rust-lang/crates.io-index" 84 | checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" 85 | "#; 86 | let cargo = Sbom::new("cargo-lock", data.to_string()).unwrap(); 87 | let list = cargo.to_packages().unwrap(); 88 | assert_eq!( 89 | list, 90 | [ 91 | Package { 92 | name: "aho-corasick".to_string(), 93 | version: "1.1.2".to_string(), 94 | checksum: Some( 95 | "sha256:b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 96 | .to_string() 97 | ), 98 | official_registry: true, 99 | }, 100 | Package { 101 | name: "anyhow".to_string(), 102 | version: "1.0.79".to_string(), 103 | checksum: Some( 104 | "sha256:080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" 105 | .to_string() 106 | ), 107 | official_registry: true, 108 | }, 109 | ] 110 | ); 111 | } 112 | 113 | #[test] 114 | fn test_cargo_lock_no_packages() { 115 | let data = r#" 116 | [root] 117 | name = "stable-check" 118 | version = "0.1.0" 119 | "#; 120 | let cargo = Sbom::new("cargo-lock", data.to_string()).unwrap(); 121 | let list = cargo.to_packages().unwrap(); 122 | assert_eq!(list, []); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/sbom/composer.rs: -------------------------------------------------------------------------------- 1 | pub const STRAIN: &str = "composer-lock"; 2 | 3 | #[derive(Debug, PartialEq)] 4 | pub struct ComposerLock { 5 | pub data: String, 6 | } 7 | -------------------------------------------------------------------------------- /src/sbom/go.rs: -------------------------------------------------------------------------------- 1 | pub const STRAIN: &str = "go-sum"; 2 | 3 | #[derive(Debug, PartialEq)] 4 | pub struct GoSum { 5 | pub data: String, 6 | } 7 | -------------------------------------------------------------------------------- /src/sbom/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cargo; 2 | pub mod composer; 3 | pub mod go; 4 | pub mod npm; 5 | pub mod yarn; 6 | 7 | use crate::args; 8 | use crate::db; 9 | use crate::errors::*; 10 | use serde::Serialize; 11 | use tokio::fs; 12 | 13 | #[derive(Debug, PartialEq)] 14 | pub enum Sbom { 15 | Cargo(cargo::CargoLock), 16 | Composer(composer::ComposerLock), 17 | Go(go::GoSum), 18 | Npm(npm::PackageLockJson), 19 | Yarn(yarn::YarnLock), 20 | } 21 | 22 | impl TryFrom<&db::Sbom> for Sbom { 23 | type Error = Error; 24 | 25 | fn try_from(sbom: &db::Sbom) -> Result { 26 | Sbom::new(&sbom.strain, sbom.data.clone()) 27 | } 28 | } 29 | 30 | impl Sbom { 31 | pub fn new(strain: &str, data: String) -> Result { 32 | match strain { 33 | cargo::STRAIN => Ok(Sbom::Cargo(cargo::CargoLock { data })), 34 | composer::STRAIN => Ok(Sbom::Composer(composer::ComposerLock { data })), 35 | go::STRAIN => Ok(Sbom::Go(go::GoSum { data })), 36 | npm::STRAIN => Ok(Sbom::Npm(npm::PackageLockJson { data })), 37 | yarn::STRAIN => Ok(Sbom::Yarn(yarn::YarnLock { data })), 38 | _ => Err(Error::UnknownSbomStrain(strain.to_string())), 39 | } 40 | } 41 | 42 | pub fn strain(&self) -> &'static str { 43 | match self { 44 | Sbom::Cargo(_) => cargo::STRAIN, 45 | Sbom::Composer(_) => composer::STRAIN, 46 | Sbom::Go(_) => go::STRAIN, 47 | Sbom::Npm(_) => npm::STRAIN, 48 | Sbom::Yarn(_) => yarn::STRAIN, 49 | } 50 | } 51 | 52 | pub fn data(&self) -> &str { 53 | match self { 54 | Sbom::Cargo(sbom) => &sbom.data, 55 | Sbom::Composer(sbom) => &sbom.data, 56 | Sbom::Go(sbom) => &sbom.data, 57 | Sbom::Npm(sbom) => &sbom.data, 58 | Sbom::Yarn(sbom) => &sbom.data, 59 | } 60 | } 61 | 62 | pub fn to_packages(&self) -> Result> { 63 | match self { 64 | Sbom::Cargo(sbom) => { 65 | let sbom = sbom.parse()?; 66 | sbom.collect::>>() 67 | } 68 | Sbom::Yarn(sbom) => { 69 | let sbom = sbom.parse()?; 70 | Ok(sbom.collect::>()) 71 | } 72 | _ => Ok(vec![]), 73 | } 74 | } 75 | } 76 | 77 | #[derive(Debug, PartialEq, Serialize)] 78 | pub struct Package { 79 | pub name: String, 80 | pub version: String, 81 | pub checksum: Option, 82 | pub official_registry: bool, 83 | } 84 | 85 | #[derive(Debug, PartialEq)] 86 | pub struct Ref { 87 | pub strain: &'static str, 88 | pub chksum: String, 89 | pub path: String, 90 | } 91 | 92 | pub fn detect_from_filename(filename: Option<&str>) -> Option<&'static str> { 93 | match filename { 94 | Some("Cargo.lock") => Some(cargo::STRAIN), 95 | Some("package-lock.json") => Some(npm::STRAIN), 96 | Some("yarn.lock") => Some(yarn::STRAIN), 97 | Some("composer.lock") => Some(composer::STRAIN), 98 | Some("go.sum") => Some(go::STRAIN), 99 | _ => None, 100 | } 101 | } 102 | 103 | pub async fn index(db: &db::Client, sbom: &Sbom) -> Result<()> { 104 | match sbom.strain() { 105 | cargo::STRAIN => { 106 | for pkg in sbom.to_packages()? { 107 | let Some(chksum) = pkg.checksum else { continue }; 108 | 109 | if !pkg.official_registry { 110 | continue; 111 | } 112 | 113 | let (has_artifact, has_ref) = tokio::join!( 114 | db.resolve_artifact(&chksum), 115 | db.get_ref(&chksum, cargo::VENDOR, &pkg.name, &pkg.version), 116 | ); 117 | if has_artifact?.is_some() && has_ref?.is_some() { 118 | continue; 119 | } 120 | 121 | let url = format!( 122 | "https://crates.io/api/v1/crates/{}/{}/download", 123 | url_escape::encode_component(&pkg.name), 124 | url_escape::encode_component(&pkg.version), 125 | ); 126 | info!("Adding download task url={url:?}"); 127 | db.insert_task(&db::Task::new( 128 | format!("fetch:{url}"), 129 | &db::TaskData::FetchTar { 130 | url, 131 | compression: Some("gz".to_string()), 132 | success_ref: Some(db::DownloadRef { 133 | vendor: cargo::VENDOR.to_string(), 134 | package: pkg.name.to_string(), 135 | version: pkg.version.to_string(), 136 | }), 137 | }, 138 | )?) 139 | .await?; 140 | } 141 | } 142 | yarn::STRAIN => { 143 | for pkg in sbom.to_packages()? { 144 | let full_name = &pkg.name; 145 | let suffix = pkg 146 | .name 147 | .rsplit_once('/') 148 | .map(|(_, x)| x) 149 | .unwrap_or(&pkg.name); 150 | let version = &pkg.version; 151 | 152 | match pkg.checksum { 153 | Some(chksum) if !chksum.starts_with("sha1:") => { 154 | let (has_artifact, has_ref) = tokio::join!( 155 | db.resolve_artifact(&chksum), 156 | db.get_ref(&chksum, yarn::VENDOR, &pkg.name, &pkg.version), 157 | ); 158 | if has_artifact?.is_some() && has_ref?.is_some() { 159 | debug!("Skipping because known yarn reference (package={:?} version={:?} chksum={:?})", pkg.name, pkg.version, chksum); 160 | continue; 161 | } 162 | } 163 | _ => { 164 | let r = db 165 | .get_named_ref(yarn::VENDOR, &pkg.name, &pkg.version) 166 | .await?; 167 | if r.is_some() { 168 | debug!("Skipping because known yarn reference (despite no checksum: package={:?} version={:?})", pkg.name, pkg.version); 169 | continue; 170 | } 171 | } 172 | } 173 | 174 | let url = 175 | format!("https://registry.yarnpkg.com/{full_name}/-/{suffix}-{version}.tgz"); 176 | 177 | info!("Adding download task url={url:?}"); 178 | db.insert_task(&db::Task::new( 179 | format!("fetch:{url}"), 180 | &db::TaskData::FetchTar { 181 | url, 182 | compression: Some("gz".to_string()), 183 | success_ref: Some(db::DownloadRef { 184 | vendor: yarn::VENDOR.to_string(), 185 | package: pkg.name.to_string(), 186 | version: pkg.version.to_string(), 187 | }), 188 | }, 189 | )?) 190 | .await?; 191 | } 192 | } 193 | _ => (), 194 | } 195 | Ok(()) 196 | } 197 | 198 | pub async fn run(args: &args::IngestSbom) -> Result<()> { 199 | let db = db::Client::create().await?; 200 | 201 | let data = fs::read_to_string(&args.file).await?; 202 | let sbom = Sbom::new(&args.strain, data)?; 203 | 204 | db.insert_sbom(&sbom).await?; 205 | index(&db, &sbom).await?; 206 | 207 | Ok(()) 208 | } 209 | -------------------------------------------------------------------------------- /src/sbom/npm.rs: -------------------------------------------------------------------------------- 1 | pub const STRAIN: &str = "package-lock-json"; 2 | 3 | #[derive(Debug, PartialEq)] 4 | pub struct PackageLockJson { 5 | pub data: String, 6 | } 7 | -------------------------------------------------------------------------------- /src/sbom/yarn.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use crate::sbom::Package; 3 | use data_encoding::BASE64; 4 | use std::collections::VecDeque; 5 | 6 | pub const STRAIN: &str = "yarn-lock"; 7 | pub const VENDOR: &str = "registry.yarnpkg.com"; 8 | 9 | #[derive(Debug, PartialEq)] 10 | pub struct YarnLock { 11 | pub data: String, 12 | } 13 | 14 | impl YarnLock { 15 | pub fn parse(&self) -> Result { 16 | let yarn = yarn_lock_parser::parse_str(&self.data)?; 17 | let mut packages = VecDeque::new(); 18 | for entry in yarn.entries { 19 | let checksum = if let Some((family, value)) = entry.integrity.split_once('-') { 20 | let digest = hex::encode(BASE64.decode(value.as_bytes())?); 21 | Some(format!("{family}:{digest}")) 22 | } else { 23 | None 24 | }; 25 | 26 | packages.push_back(Package { 27 | name: entry.name.to_string(), 28 | version: entry.version.to_string(), 29 | checksum, 30 | official_registry: false, 31 | }); 32 | } 33 | Ok(ParsedLock { packages }) 34 | } 35 | } 36 | 37 | #[derive(Debug, PartialEq)] 38 | pub struct ParsedLock { 39 | packages: VecDeque, 40 | } 41 | 42 | impl Iterator for ParsedLock { 43 | type Item = Package; 44 | 45 | fn next(&mut self) -> Option { 46 | self.packages.pop_front() 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/sync/alpine.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::GzipDecoder; 6 | use futures::{Stream, StreamExt}; 7 | use std::collections::HashMap; 8 | use std::mem; 9 | use tokio::io::{self, AsyncBufReadExt, AsyncRead}; 10 | use tokio_tar::{Archive, EntryType}; 11 | 12 | #[derive(Debug, PartialEq)] 13 | pub struct Pkg { 14 | package: String, 15 | // This field is always set in Alpine, but sometimes missing in wolfi 16 | origin: Option, 17 | version: String, 18 | // In wolfi this field is sometimes missing 19 | // We are going to ignore packages with no commit tho 20 | commit: Option, 21 | } 22 | 23 | impl TryFrom> for Pkg { 24 | type Error = Error; 25 | 26 | fn try_from(mut map: HashMap) -> Result { 27 | let package = map.remove("P").ok_or(Error::ApkMissingField("P"))?; 28 | let origin = map.remove("o"); 29 | let version = map.remove("V").ok_or(Error::ApkMissingField("V"))?; 30 | let commit = map.remove("c"); 31 | 32 | Ok(Pkg { 33 | package, 34 | origin, 35 | version, 36 | commit, 37 | }) 38 | } 39 | } 40 | 41 | fn parse(reader: R) -> impl Stream> { 42 | async_stream::stream! { 43 | let reader = io::BufReader::new(reader); 44 | let mut lines = reader.lines(); 45 | let mut pkg = HashMap::new(); 46 | while let Some(line) = lines.next_line().await? { 47 | if line.is_empty() { 48 | let pkg = mem::take(&mut pkg); 49 | let pkg = Pkg::try_from(pkg)?; 50 | yield Ok(pkg); 51 | } else if let Some((key, value)) = line.split_once(':') { 52 | pkg.insert(key.to_string(), value.to_string()); 53 | } 54 | } 55 | } 56 | } 57 | 58 | pub async fn run(args: &args::SyncAlpine) -> Result<()> { 59 | let db = db::Client::create().await?; 60 | let vendor = &args.vendor; 61 | 62 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 63 | let mut reader = io::BufReader::new(reader); 64 | { 65 | // discard first part of apkv2 66 | let mut reader = GzipDecoder::new(&mut reader); 67 | io::copy(&mut reader, &mut io::sink()).await?; 68 | } 69 | let reader = GzipDecoder::new(reader); 70 | let mut tar = Archive::new(reader); 71 | 72 | let mut entries = tar.entries()?; 73 | while let Some(entry) = entries.next().await { 74 | let entry = entry?; 75 | let header = entry.header(); 76 | if header.entry_type() != EntryType::Regular { 77 | continue; 78 | } 79 | 80 | let path = entry.path()?; 81 | if path.to_str() != Some("APKINDEX") { 82 | continue; 83 | } 84 | 85 | let stream = parse(entry); 86 | tokio::pin!(stream); 87 | while let Some(pkg) = stream.next().await { 88 | let pkg = pkg?; 89 | debug!("Found package: {pkg:?}"); 90 | 91 | let origin = pkg.origin.unwrap_or(pkg.package); 92 | let version = pkg.version; 93 | let Some(commit) = pkg.commit else { continue }; 94 | 95 | // mark all refs known for this package as "last_seen now" 96 | db.bump_named_refs(vendor, &origin, &version).await?; 97 | 98 | // check if package already imported 99 | if db.get_package(vendor, &origin, &commit).await?.is_some() { 100 | debug!("Package is already imported: vendor={vendor:?} origin={origin:?} commit={commit:?}"); 101 | continue; 102 | } 103 | 104 | // queue for import 105 | info!("Inserting task: vendor={vendor:?} origin={origin:?} commit={commit:?}"); 106 | db.insert_task(&db::Task::new( 107 | format!("{vendor}-apkbuild:{origin}:{commit}"), 108 | &db::TaskData::ApkbuildGit { 109 | vendor: vendor.clone(), 110 | repo: args.repo.clone(), 111 | origin, 112 | version, 113 | commit, 114 | }, 115 | )?) 116 | .await?; 117 | } 118 | } 119 | 120 | Ok(()) 121 | } 122 | 123 | #[cfg(test)] 124 | mod tests { 125 | use super::*; 126 | 127 | #[tokio::test] 128 | async fn test_parse_apkindex() { 129 | let data = "C:Q19qUyV9TFS+tErPDBnvqG7VqyvyM= 130 | P:7zip-doc 131 | V:23.01-r0 132 | A:x86_64 133 | S:38269 134 | I:155648 135 | T:File archiver with a high compression ratio (documentation) 136 | U:https://7-zip.org/ 137 | L:LGPL-2.0-only 138 | o:7zip 139 | m:Alex Xu (Hello71) 140 | t:1688146859 141 | c:da4780262417a9446b7d13fe9bb7e83c54edb53d 142 | k:100 143 | i:docs 7zip=23.01-r0 144 | 145 | C:Q13kfUUaHQXJ5h+wwmkL6GXbVcbj8= 146 | P:aaudit 147 | V:0.7.2-r3 148 | A:x86_64 149 | S:3392 150 | I:49152 151 | T:Alpine Auditor 152 | U:https://alpinelinux.org 153 | L:Unknown 154 | o:aaudit 155 | m:Timo Teräs 156 | t:1659792088 157 | c:0714a84b7f79009ae8b96aef50216ed72f54b885 158 | D:lua5.2 lua5.2-posix lua5.2-cjson lua5.2-pc lua5.2-socket 159 | p:cmd:aaudit=0.7.2-r3 160 | 161 | "; 162 | let mut out = vec![]; 163 | let stream = parse(data.as_bytes()); 164 | tokio::pin!(stream); 165 | while let Some(item) = stream.next().await { 166 | out.push(item.unwrap()); 167 | } 168 | assert_eq!( 169 | &out[..], 170 | &[ 171 | Pkg { 172 | package: "7zip-doc".to_string(), 173 | origin: Some("7zip".to_string()), 174 | version: "23.01-r0".to_string(), 175 | commit: Some("da4780262417a9446b7d13fe9bb7e83c54edb53d".to_string()), 176 | }, 177 | Pkg { 178 | package: "aaudit".to_string(), 179 | origin: Some("aaudit".to_string()), 180 | version: "0.7.2-r3".to_string(), 181 | commit: Some("0714a84b7f79009ae8b96aef50216ed72f54b885".to_string()), 182 | }, 183 | ][..] 184 | ); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/sync/apt.rs: -------------------------------------------------------------------------------- 1 | use crate::apt; 2 | use crate::args; 3 | use crate::compression::Decompressor; 4 | use crate::db::{self, Task, TaskData}; 5 | use crate::errors::*; 6 | use crate::utils; 7 | use apt_parser::Release; 8 | use tokio::io::{self, AsyncReadExt}; 9 | 10 | async fn find_source_index_path( 11 | http: &utils::HttpClient, 12 | url: &str, 13 | suite: &str, 14 | ) -> Result<(String, &'static str)> { 15 | let mut reader = http.fetch(url).await?; 16 | 17 | let mut buf = String::new(); 18 | reader.read_to_string(&mut buf).await?; 19 | let release = Release::from(&buf)?; 20 | 21 | for file in release.sha256sum.into_iter().flatten() { 22 | let name = file.filename; 23 | 24 | match name.strip_prefix(suite) { 25 | Some("/source/Sources.xz") => return Ok((name, "xz")), 26 | Some("/source/Sources.gz") => return Ok((name, "gz")), 27 | _ => (), 28 | } 29 | } 30 | 31 | Err(Error::AptIndexMissingSources) 32 | } 33 | 34 | pub async fn run(args: &args::SyncApt) -> Result<()> { 35 | let base_url = args.url.strip_suffix('/').unwrap_or(&args.url); 36 | 37 | let db = db::Client::create().await?; 38 | let http = utils::http_client(None)?; 39 | 40 | for release in &args.releases { 41 | for suite in &args.suites { 42 | let url = format!("{base_url}/dists/{release}/Release"); 43 | info!("Fetching Release file: url={url:?}"); 44 | let (filename, compression) = find_source_index_path(&http, &url, suite).await?; 45 | 46 | let url = format!("{base_url}/dists/{release}/{filename}"); 47 | info!("Fetching Sources index: url={url:?}"); 48 | let reader = http.fetch(&url).await?; 49 | let reader = io::BufReader::new(reader); 50 | let mut reader = match compression { 51 | "gz" => Decompressor::gz(reader), 52 | "xz" => Decompressor::xz(reader), 53 | unknown => panic!("Unknown compression algorithm: {unknown:?}"), 54 | }; 55 | 56 | let mut buf = Vec::new(); 57 | reader.read_to_end(&mut buf).await?; 58 | 59 | let sources = apt::SourcesIndex::parse(&buf)?; 60 | 61 | for pkg in &sources.pkgs { 62 | debug!("pkg={pkg:?}"); 63 | pkg.version.as_ref().unwrap(); 64 | pkg.directory.as_ref().unwrap(); 65 | 66 | for entry in &pkg.checksums_sha256 { 67 | let name = entry.filename.clone(); 68 | if name.ends_with(".orig.tar.xz") 69 | || name.ends_with(".orig.tar.gz") 70 | || name.ends_with(".orig.tar.bz2") 71 | { 72 | let chksum = format!("sha256:{}", entry.hash); 73 | let package = pkg.package.to_string(); 74 | let version = pkg.version.clone().unwrap(); 75 | info!( 76 | "digest={chksum:?} package={package:?} version={version:?} name={name:?}" 77 | ); 78 | let obj = db::Ref { 79 | chksum, 80 | vendor: args.vendor.to_string(), 81 | package, 82 | version, 83 | filename: Some(name.clone()), 84 | }; 85 | db.insert_ref(&obj).await?; 86 | 87 | if name.starts_with("chromium_") { 88 | continue; 89 | } 90 | 91 | if args.reindex || db.resolve_artifact(&obj.chksum).await?.is_none() { 92 | let directory = pkg.directory.as_ref().unwrap(); 93 | let url = format!("{base_url}/{directory}/{name}"); 94 | info!("url={url:?}"); 95 | db.insert_task(&Task::new( 96 | format!("fetch:{url}"), 97 | &TaskData::FetchTar { 98 | url, 99 | compression: None, 100 | success_ref: None, 101 | }, 102 | )?) 103 | .await?; 104 | } 105 | } 106 | } 107 | } 108 | } 109 | } 110 | 111 | Ok(()) 112 | } 113 | -------------------------------------------------------------------------------- /src/sync/guix.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db::{self, Task, TaskData}; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::GzipDecoder; 6 | use data_encoding::BASE64; 7 | use serde::Deserialize; 8 | use tokio::io; 9 | use tokio::io::AsyncReadExt; 10 | 11 | #[derive(Debug, PartialEq, Deserialize)] 12 | pub struct Package { 13 | name: String, 14 | version: String, 15 | #[serde(default)] 16 | source: Vec, 17 | } 18 | 19 | #[derive(Debug, PartialEq, Deserialize)] 20 | #[serde(tag = "type", rename_all = "snake_case")] 21 | pub enum Source { 22 | Url(UrlSource), 23 | Git(GitSource), 24 | Hg(HgSource), 25 | Svn(SvnSource), 26 | } 27 | 28 | impl Source { 29 | fn integrity(&self) -> &Integrity { 30 | match self { 31 | Source::Url(source) => &source.integrity, 32 | Source::Git(source) => &source.integrity, 33 | Source::Hg(source) => &source.integrity, 34 | Source::Svn(source) => &source.integrity, 35 | } 36 | } 37 | } 38 | 39 | #[derive(Debug, PartialEq, Deserialize)] 40 | pub struct Integrity { 41 | #[serde(rename = "integrity")] 42 | pub hash: String, 43 | #[serde(rename = "outputHashAlgo")] 44 | pub output_hash_algo: String, 45 | #[serde(rename = "outputHashMode")] 46 | pub output_hash_mode: String, 47 | } 48 | 49 | #[derive(Debug, PartialEq, Deserialize)] 50 | pub struct UrlSource { 51 | pub urls: Vec, 52 | #[serde(flatten)] 53 | pub integrity: Integrity, 54 | } 55 | 56 | #[derive(Debug, PartialEq, Deserialize)] 57 | pub struct GitSource { 58 | pub git_url: String, 59 | #[serde(flatten)] 60 | pub integrity: Integrity, 61 | pub git_ref: String, 62 | #[serde(default)] 63 | pub submodule: bool, 64 | } 65 | 66 | #[derive(Debug, PartialEq, Deserialize)] 67 | pub struct HgSource { 68 | #[serde(flatten)] 69 | pub integrity: Integrity, 70 | } 71 | 72 | #[derive(Debug, PartialEq, Deserialize)] 73 | pub struct SvnSource { 74 | #[serde(flatten)] 75 | pub integrity: Integrity, 76 | } 77 | 78 | pub async fn run(args: &args::SyncGuix) -> Result<()> { 79 | let db = db::Client::create().await?; 80 | 81 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 82 | let reader = io::BufReader::new(reader); 83 | let mut reader = GzipDecoder::new(reader); 84 | 85 | let mut buf = String::new(); 86 | reader.read_to_string(&mut buf).await?; 87 | 88 | let packages = serde_json::from_str::>(&buf)?; 89 | for package in packages { 90 | debug!("package={:?} version={:?}", package.name, package.version); 91 | for source in package.source { 92 | let Ok(source) = serde_json::from_value::(source) else { 93 | continue; 94 | }; 95 | 96 | let integrity = source.integrity(); 97 | if integrity.output_hash_mode != "flat" { 98 | continue; 99 | } 100 | 101 | let Some(hash) = integrity.hash.strip_prefix("sha256-") else { 102 | continue; 103 | }; 104 | let digest = hex::encode(BASE64.decode(hash.as_bytes())?); 105 | let chksum = format!("sha256:{digest}"); 106 | 107 | if let Source::Url(source) = &source { 108 | let Some(url) = source.urls.first() else { 109 | continue; 110 | }; 111 | debug!("chksum={chksum:?} url={url:?}"); 112 | 113 | if !utils::is_possible_tar_artifact(url) { 114 | continue; 115 | } 116 | 117 | let obj = db::Ref { 118 | chksum: chksum.to_string(), 119 | vendor: args.vendor.to_string(), 120 | package: package.name.to_string(), 121 | version: package.version.to_string(), 122 | filename: Some(url.to_string()), 123 | }; 124 | info!("insert: {obj:?}"); 125 | db.insert_ref(&obj).await?; 126 | 127 | if db.resolve_artifact(&chksum).await?.is_none() { 128 | info!("Adding download task: url={url:?}"); 129 | db.insert_task(&Task::new( 130 | format!("fetch:{url}"), 131 | &TaskData::FetchTar { 132 | url: url.to_string(), 133 | compression: None, 134 | success_ref: None, 135 | }, 136 | )?) 137 | .await?; 138 | } 139 | } 140 | } 141 | } 142 | 143 | Ok(()) 144 | } 145 | 146 | #[cfg(test)] 147 | mod tests { 148 | use super::*; 149 | 150 | #[test] 151 | fn test_parse_git_source() { 152 | let data = r#" 153 | { 154 | "name": "zig", 155 | "version": "0.9.1", 156 | "variable_name": "zig-0.9", 157 | "source": [ 158 | { 159 | "type": "git", 160 | "git_url": "https://github.com/ziglang/zig.git", 161 | "integrity": "sha256-x2c4c9RSrNWGqEngio4ArW7dJjW0gg+8nqBwPcR721k=", 162 | "outputHashAlgo": "sha256", 163 | "outputHashMode": "recursive", 164 | "git_ref": "0.9.1" 165 | } 166 | ], 167 | "synopsis": "General purpose programming language and toolchain", 168 | "homepage": "https://github.com/ziglang/zig", 169 | "location": "gnu/packages/zig.scm:36" 170 | } 171 | "#; 172 | let mut pkg = serde_json::from_str::(data).unwrap(); 173 | let source = pkg 174 | .source 175 | .drain(..) 176 | .flat_map(serde_json::from_value) 177 | .collect::>(); 178 | assert_eq!( 179 | pkg, 180 | Package { 181 | name: "zig".to_string(), 182 | version: "0.9.1".to_string(), 183 | source: vec![], 184 | } 185 | ); 186 | assert_eq!( 187 | source, 188 | vec![Source::Git(GitSource { 189 | git_url: "https://github.com/ziglang/zig.git".to_string(), 190 | integrity: Integrity { 191 | hash: "sha256-x2c4c9RSrNWGqEngio4ArW7dJjW0gg+8nqBwPcR721k=".to_string(), 192 | output_hash_algo: "sha256".to_string(), 193 | output_hash_mode: "recursive".to_string(), 194 | }, 195 | git_ref: "0.9.1".to_string(), 196 | submodule: false, 197 | })] 198 | ); 199 | } 200 | 201 | #[test] 202 | fn test_parse_url_source() { 203 | let data = r#" 204 | { 205 | "name": "xdialog", 206 | "version": "2.3.1", 207 | "variable_name": "xdialog", 208 | "source": [ 209 | { 210 | "type": "url", 211 | "urls": [ 212 | "http://xdialog.free.fr/Xdialog-2.3.1.tar.bz2", 213 | "https://bordeaux.guix.gnu.org/file/Xdialog-2.3.1.tar.bz2/sha256/16jqparb33lfq4cvd9l3jgd7fq86fk9gv2ixc8vgqibid6cnhi0x", 214 | "https://ci.guix.gnu.org/file/Xdialog-2.3.1.tar.bz2/sha256/16jqparb33lfq4cvd9l3jgd7fq86fk9gv2ixc8vgqibid6cnhi0x", 215 | "https://tarballs.nixos.org/sha256/16jqparb33lfq4cvd9l3jgd7fq86fk9gv2ixc8vgqibid6cnhi0x" 216 | ], 217 | "integrity": "sha256-HURomWlxRfw2Yj2K/dJ0BmF32pODprYZwY6OsbK6WJo=", 218 | "outputHashAlgo": "sha256", 219 | "outputHashMode": "flat" 220 | } 221 | ], 222 | "synopsis": "Convert a terminal program into a program with an X interface", 223 | "homepage": "http://xdialog.free.fr/", 224 | "location": "gnu/packages/xorg.scm:6640" 225 | } 226 | "#; 227 | let mut pkg = serde_json::from_str::(data).unwrap(); 228 | let source = pkg 229 | .source 230 | .drain(..) 231 | .flat_map(serde_json::from_value) 232 | .collect::>(); 233 | assert_eq!( 234 | pkg, 235 | Package { 236 | name: "xdialog".to_string(), 237 | version: "2.3.1".to_string(), 238 | source: vec![], 239 | } 240 | ); 241 | assert_eq!(source, vec![ 242 | Source::Url(UrlSource { 243 | urls: vec![ 244 | "http://xdialog.free.fr/Xdialog-2.3.1.tar.bz2".to_string(), 245 | "https://bordeaux.guix.gnu.org/file/Xdialog-2.3.1.tar.bz2/sha256/16jqparb33lfq4cvd9l3jgd7fq86fk9gv2ixc8vgqibid6cnhi0x".to_string(), 246 | "https://ci.guix.gnu.org/file/Xdialog-2.3.1.tar.bz2/sha256/16jqparb33lfq4cvd9l3jgd7fq86fk9gv2ixc8vgqibid6cnhi0x".to_string(), 247 | "https://tarballs.nixos.org/sha256/16jqparb33lfq4cvd9l3jgd7fq86fk9gv2ixc8vgqibid6cnhi0x".to_string(), 248 | ], 249 | integrity: Integrity { 250 | hash: "sha256-HURomWlxRfw2Yj2K/dJ0BmF32pODprYZwY6OsbK6WJo=".to_string(), 251 | output_hash_algo: "sha256".to_string(), 252 | output_hash_mode: "flat".to_string(), 253 | }, 254 | }) 255 | ]); 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /src/sync/homebrew.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db::{self, Task, TaskData}; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use serde::Deserialize; 6 | use tokio::io::AsyncReadExt; 7 | 8 | #[derive(Debug, Deserialize)] 9 | pub struct Formula { 10 | name: String, 11 | versions: Versions, 12 | urls: SourceSet, 13 | revision: u16, 14 | } 15 | 16 | #[derive(Debug, Deserialize)] 17 | pub struct Versions { 18 | stable: String, 19 | } 20 | 21 | #[derive(Debug, Deserialize)] 22 | pub struct SourceSet { 23 | stable: SourceUrl, 24 | } 25 | 26 | #[derive(Debug, Deserialize)] 27 | pub struct SourceUrl { 28 | url: String, 29 | tag: Option, 30 | revision: Option, 31 | checksum: Option, 32 | } 33 | 34 | pub async fn run(args: &args::SyncHomebrew) -> Result<()> { 35 | let db = db::Client::create().await?; 36 | let vendor = &args.vendor; 37 | 38 | let mut reader = utils::fetch_or_open(&args.file, args.fetch).await?; 39 | 40 | let mut buf = String::new(); 41 | reader.read_to_string(&mut buf).await?; 42 | 43 | let formulas = serde_json::from_str::>(&buf)?; 44 | for formula in formulas { 45 | debug!("formula={formula:?}"); 46 | 47 | let package = formula.name; 48 | let version = format!("{}-{}", formula.versions.stable, formula.revision); 49 | let url = formula.urls.stable.url; 50 | 51 | let (url, chksum) = if let Some(checksum) = formula.urls.stable.checksum { 52 | (url, format!("sha256:{checksum}")) 53 | } else if let Some(revision) = &formula.urls.stable.revision { 54 | let tag = formula.urls.stable.tag.as_ref().unwrap_or(revision); 55 | let url = format!("git+{url}#tag={tag}"); 56 | (url, format!("git:{revision}")) 57 | } else { 58 | continue; 59 | }; 60 | 61 | if db.resolve_artifact(&chksum).await?.is_none() { 62 | if url.starts_with("https://") || url.starts_with("http://") { 63 | info!("Found tarball url: {url:?}"); 64 | db.insert_task(&Task::new( 65 | format!("fetch:{url}"), 66 | &TaskData::FetchTar { 67 | url: url.to_string(), 68 | compression: None, 69 | success_ref: None, 70 | }, 71 | )?) 72 | .await?; 73 | } else if url.starts_with("git+https://") { 74 | info!("Found git remote: {url:?}"); 75 | db.insert_task(&Task::new( 76 | format!("git-clone:{url}"), 77 | &TaskData::GitSnapshot { 78 | url: url.to_string(), 79 | }, 80 | )?) 81 | .await?; 82 | } 83 | } 84 | 85 | debug!("package={package:?} version={version:?} url={url:?} ({chksum})"); 86 | let obj = db::Ref { 87 | chksum, 88 | vendor: vendor.to_string(), 89 | package, 90 | version, 91 | filename: Some(url), 92 | }; 93 | db.insert_ref(&obj).await?; 94 | } 95 | 96 | Ok(()) 97 | } 98 | -------------------------------------------------------------------------------- /src/sync/live_bootstrap.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::GzipDecoder; 6 | use futures::StreamExt; 7 | use std::path::Path; 8 | use tokio::io::{self, AsyncReadExt}; 9 | use tokio_tar::Archive; 10 | 11 | fn metadata_from_path(path: &Path) -> Option<(&str, &str)> { 12 | let path = path.to_str()?; 13 | let path = path.strip_suffix("/sources")?; 14 | let (_, filename) = path.rsplit_once('/')?; 15 | let (package, version) = filename.rsplit_once('-')?; 16 | Some((package, version)) 17 | } 18 | 19 | fn input_from_lines(content: &str) -> Vec<(&str, &str)> { 20 | let mut inputs = Vec::new(); 21 | for line in content.lines() { 22 | let Some((url, sha256)) = line.rsplit_once(' ') else { 23 | continue; 24 | }; 25 | let (url, sha256) = if let Some(data) = url.rsplit_once(' ') { 26 | data 27 | } else { 28 | (url, sha256) 29 | }; 30 | inputs.push((url, sha256)); 31 | } 32 | inputs 33 | } 34 | 35 | pub async fn run(args: &args::SyncLiveBootstrap) -> Result<()> { 36 | let db = db::Client::create().await?; 37 | let vendor = &args.vendor; 38 | 39 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 40 | let reader = io::BufReader::new(reader); 41 | let reader = GzipDecoder::new(reader); 42 | let mut tar = Archive::new(reader); 43 | 44 | let mut entries = tar.entries()?; 45 | while let Some(entry) = entries.next().await { 46 | let mut entry = entry?; 47 | if !entry.header().entry_type().is_file() { 48 | continue; 49 | } 50 | 51 | let path = entry.path()?; 52 | 53 | let Some((package, version)) = metadata_from_path(&path) else { 54 | trace!("Skipping path in git snapshot: {path:?}"); 55 | continue; 56 | }; 57 | let package = package.to_string(); 58 | let version = version.to_string(); 59 | 60 | debug!("Found package in export: package={package:?} version={version:?}"); 61 | 62 | let mut buf = String::new(); 63 | entry.read_to_string(&mut buf).await?; 64 | 65 | for (url, sha256) in input_from_lines(&buf) { 66 | if !utils::is_possible_tar_artifact(url) { 67 | continue; 68 | } 69 | 70 | let chksum = format!("sha256:{sha256}"); 71 | debug!("Found artifact for package: url={url:?} chksum={chksum:?}"); 72 | 73 | let task = if db.resolve_artifact(&chksum).await?.is_none() { 74 | utils::task_for_url(url) 75 | } else { 76 | None 77 | }; 78 | 79 | let r = db::Ref { 80 | chksum, 81 | vendor: vendor.to_string(), 82 | package: package.to_string(), 83 | version: version.to_string(), 84 | filename: Some(url.to_string()), 85 | }; 86 | debug!("insert: {r:?}"); 87 | db.insert_ref(&r).await?; 88 | 89 | if let Some(task) = task { 90 | info!("Adding task: {task:?}"); 91 | db.insert_task(&task).await?; 92 | } 93 | } 94 | } 95 | 96 | Ok(()) 97 | } 98 | 99 | #[cfg(test)] 100 | mod tests { 101 | use super::*; 102 | 103 | #[test] 104 | fn test_parse_path_package_version() { 105 | let path = Path::new("live-bootstrap-master/steps/pkg-config-0.29.2/sources"); 106 | let md = metadata_from_path(path); 107 | assert_eq!(md, Some(("pkg-config", "0.29.2"))); 108 | } 109 | 110 | #[test] 111 | fn test_parse_sources() { 112 | // most sources files are very simple, this is one of the more complex ones 113 | let buf = "http://git.savannah.gnu.org/cgit/coreutils.git/snapshot/coreutils-9.4.tar.xz 8fb56810310253300b3d6f84e68dc97eb2d74e1f4f78e05776831d9d82e4f2d7\nhttps://files.bootstrapping.world/coreutils-9.4.tar.xz 8fb56810310253300b3d6f84e68dc97eb2d74e1f4f78e05776831d9d82e4f2d7\nhttp://git.savannah.gnu.org/cgit/gnulib.git/snapshot/gnulib-bb5bb43.tar.gz b8aa1ac1b18c67f081486069e6a7a5564f20431c2313a94c20a46dcfb904be2a\nhttps://files.bootstrapping.world/gnulib-bb5bb43.tar.gz b8aa1ac1b18c67f081486069e6a7a5564f20431c2313a94c20a46dcfb904be2a\nhttp://ftp.unicode.org/Public/15.0.0/ucd/UnicodeData.txt 806e9aed65037197f1ec85e12be6e8cd870fc5608b4de0fffd990f689f376a73 UnicodeData-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/PropList.txt e05c0a2811d113dae4abd832884199a3ea8d187ee1b872d8240a788a96540bfd PropList-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/DerivedCoreProperties.txt d367290bc0867e6b484c68370530bdd1a08b6b32404601b8c7accaf83e05628d DerivedCoreProperties-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt 29071dba22c72c27783a73016afb8ffaeb025866740791f9c2d0b55cc45a3470 emoji-data-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/ArabicShaping.txt eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 ArabicShaping-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/Scripts.txt cca85d830f46aece2e7c1459ef1249993dca8f2e46d51e869255be140d7ea4b0 Scripts-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/Blocks.txt 529dc5d0f6386d52f2f56e004bbfab48ce2d587eea9d38ba546c4052491bd820 Blocks-15.0.0.txt\nhttp://ftp.unicode.org/Public/3.0-Update1/PropList-3.0.1.txt 909eef4adbeddbdddcd9487c856fe8cdbb8912aa8eb315ed7885b6ef65f4dc4c\nhttp://ftp.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt 743e7bc435c04ab1a8459710b1c3cad56eedced5b806b4659b6e69b85d0adf2a EastAsianWidth-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/LineBreak.txt 012bca868e2c4e59a5a10a7546baf0c6fb1b2ef458c277f054915c8a49d292bf LineBreak-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakProperty.txt 5188a56e91593467c2e912601ebc78750e6adc9b04541b8c5becb5441e388ce2 WordBreakProperty-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt 5a0f8748575432f8ff95e1dd5bfaa27bda1a844809e17d6939ee912bba6568a1 GraphemeBreakProperty-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/CompositionExclusions.txt 3b019c0a33c3140cbc920c078f4f9af2680ba4f71869c8d4de5190667c70b6a3 CompositionExclusions-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/SpecialCasing.txt 78b29c64b5840d25c11a9f31b665ee551b8a499eca6c70d770fcad7dd710f494 SpecialCasing-15.0.0.txt\nhttp://ftp.unicode.org/Public/15.0.0/ucd/CaseFolding.txt cdd49e55eae3bbf1f0a3f6580c974a0263cb86a6a08daa10fbf705b4808a56f7 CaseFolding-15.0.0.txt\n"; 114 | let inputs = input_from_lines(buf); 115 | assert_eq!( 116 | inputs, 117 | [ 118 | ( 119 | "http://git.savannah.gnu.org/cgit/coreutils.git/snapshot/coreutils-9.4.tar.xz", 120 | "8fb56810310253300b3d6f84e68dc97eb2d74e1f4f78e05776831d9d82e4f2d7" 121 | ), 122 | ( 123 | "https://files.bootstrapping.world/coreutils-9.4.tar.xz", 124 | "8fb56810310253300b3d6f84e68dc97eb2d74e1f4f78e05776831d9d82e4f2d7" 125 | ), 126 | ( 127 | "http://git.savannah.gnu.org/cgit/gnulib.git/snapshot/gnulib-bb5bb43.tar.gz", 128 | "b8aa1ac1b18c67f081486069e6a7a5564f20431c2313a94c20a46dcfb904be2a" 129 | ), 130 | ( 131 | "https://files.bootstrapping.world/gnulib-bb5bb43.tar.gz", 132 | "b8aa1ac1b18c67f081486069e6a7a5564f20431c2313a94c20a46dcfb904be2a" 133 | ), 134 | ( 135 | "http://ftp.unicode.org/Public/15.0.0/ucd/UnicodeData.txt", 136 | "806e9aed65037197f1ec85e12be6e8cd870fc5608b4de0fffd990f689f376a73" 137 | ), 138 | ( 139 | "http://ftp.unicode.org/Public/15.0.0/ucd/PropList.txt", 140 | "e05c0a2811d113dae4abd832884199a3ea8d187ee1b872d8240a788a96540bfd" 141 | ), 142 | ( 143 | "http://ftp.unicode.org/Public/15.0.0/ucd/DerivedCoreProperties.txt", 144 | "d367290bc0867e6b484c68370530bdd1a08b6b32404601b8c7accaf83e05628d" 145 | ), 146 | ( 147 | "http://ftp.unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt", 148 | "29071dba22c72c27783a73016afb8ffaeb025866740791f9c2d0b55cc45a3470" 149 | ), 150 | ( 151 | "http://ftp.unicode.org/Public/15.0.0/ucd/ArabicShaping.txt", 152 | "eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9" 153 | ), 154 | ( 155 | "http://ftp.unicode.org/Public/15.0.0/ucd/Scripts.txt", 156 | "cca85d830f46aece2e7c1459ef1249993dca8f2e46d51e869255be140d7ea4b0" 157 | ), 158 | ( 159 | "http://ftp.unicode.org/Public/15.0.0/ucd/Blocks.txt", 160 | "529dc5d0f6386d52f2f56e004bbfab48ce2d587eea9d38ba546c4052491bd820" 161 | ), 162 | ( 163 | "http://ftp.unicode.org/Public/3.0-Update1/PropList-3.0.1.txt", 164 | "909eef4adbeddbdddcd9487c856fe8cdbb8912aa8eb315ed7885b6ef65f4dc4c" 165 | ), 166 | ( 167 | "http://ftp.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt", 168 | "743e7bc435c04ab1a8459710b1c3cad56eedced5b806b4659b6e69b85d0adf2a" 169 | ), 170 | ( 171 | "http://ftp.unicode.org/Public/15.0.0/ucd/LineBreak.txt", 172 | "012bca868e2c4e59a5a10a7546baf0c6fb1b2ef458c277f054915c8a49d292bf" 173 | ), 174 | ( 175 | "http://ftp.unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakProperty.txt", 176 | "5188a56e91593467c2e912601ebc78750e6adc9b04541b8c5becb5441e388ce2" 177 | ), 178 | ( 179 | "http://ftp.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt", 180 | "5a0f8748575432f8ff95e1dd5bfaa27bda1a844809e17d6939ee912bba6568a1" 181 | ), 182 | ( 183 | "http://ftp.unicode.org/Public/15.0.0/ucd/CompositionExclusions.txt", 184 | "3b019c0a33c3140cbc920c078f4f9af2680ba4f71869c8d4de5190667c70b6a3" 185 | ), 186 | ( 187 | "http://ftp.unicode.org/Public/15.0.0/ucd/SpecialCasing.txt", 188 | "78b29c64b5840d25c11a9f31b665ee551b8a499eca6c70d770fcad7dd710f494" 189 | ), 190 | ( 191 | "http://ftp.unicode.org/Public/15.0.0/ucd/CaseFolding.txt", 192 | "cdd49e55eae3bbf1f0a3f6580c974a0263cb86a6a08daa10fbf705b4808a56f7" 193 | ), 194 | ] 195 | ); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/sync/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod alpine; 2 | pub mod apt; 3 | pub mod gentoo; 4 | pub mod guix; 5 | pub mod homebrew; 6 | pub mod live_bootstrap; 7 | pub mod pacman; 8 | pub mod rpm; 9 | pub mod stagex; 10 | pub mod void; 11 | pub mod yocto; 12 | -------------------------------------------------------------------------------- /src/sync/pacman.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::GzipDecoder; 6 | use futures::StreamExt; 7 | use std::path::Path; 8 | use tokio::io::{self, AsyncReadExt}; 9 | use tokio_tar::{Archive, EntryType}; 10 | 11 | fn matches_repo(path: &Path, repos: &[String]) -> bool { 12 | let Ok(path) = path.strip_prefix("state-main") else { 13 | return false; 14 | }; 15 | for repo in repos { 16 | if path.starts_with(repo) { 17 | return true; 18 | } 19 | } 20 | false 21 | } 22 | 23 | pub async fn run(args: &args::SyncPacman) -> Result<()> { 24 | let db = db::Client::create().await?; 25 | let vendor = &args.vendor; 26 | 27 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 28 | let reader = io::BufReader::new(reader); 29 | let reader = GzipDecoder::new(reader); 30 | let mut tar = Archive::new(reader); 31 | 32 | let mut entries = tar.entries()?; 33 | while let Some(entry) = entries.next().await { 34 | let mut entry = entry?; 35 | let header = entry.header(); 36 | if header.entry_type() != EntryType::Regular { 37 | continue; 38 | } 39 | 40 | if !matches_repo(&entry.path()?, &args.repos) { 41 | continue; 42 | } 43 | 44 | let mut buf = String::new(); 45 | entry.read_to_string(&mut buf).await?; 46 | debug!("Found data in state repo: {buf:?}"); 47 | 48 | let mut chunker = buf.split(' '); 49 | let Some(pkgbase) = chunker.next() else { 50 | continue; 51 | }; 52 | let Some(version) = chunker.next() else { 53 | continue; 54 | }; 55 | let Some(tag) = chunker.next() else { continue }; 56 | 57 | // mark all refs known for this package as "last_seen now" 58 | db.bump_named_refs(vendor, pkgbase, version).await?; 59 | 60 | // check if package already imported 61 | if db.get_package(vendor, pkgbase, version).await?.is_some() { 62 | debug!("Package is already imported: vendor={vendor:?} package={pkgbase:?} version={version:?}"); 63 | continue; 64 | } 65 | 66 | // queue for import 67 | info!("package={pkgbase:?} version={version:?} tag={tag:?}"); 68 | db.insert_task(&db::Task::new( 69 | format!("pacman-git-snapshot:{pkgbase}:{tag}"), 70 | &db::TaskData::PacmanGitSnapshot { 71 | vendor: vendor.to_string(), 72 | package: pkgbase.to_string(), 73 | version: version.to_string(), 74 | tag: tag.to_string(), 75 | }, 76 | )?) 77 | .await?; 78 | } 79 | 80 | Ok(()) 81 | } 82 | -------------------------------------------------------------------------------- /src/sync/rpm.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::{GzipDecoder, ZstdDecoder}; 6 | use serde::Deserialize; 7 | use tokio::io::{self, AsyncRead, AsyncReadExt}; 8 | 9 | #[derive(Debug, PartialEq, Deserialize)] 10 | pub struct Metadata { 11 | #[serde(rename = "package")] 12 | packages: Vec, 13 | } 14 | 15 | impl Metadata { 16 | pub fn from_xml(xml: &str) -> Result { 17 | let xml = serde_xml_rs::from_str(xml)?; 18 | Ok(xml) 19 | } 20 | } 21 | 22 | #[derive(Debug, PartialEq, Deserialize)] 23 | pub struct Package { 24 | name: String, 25 | version: Version, 26 | location: Location, 27 | } 28 | 29 | #[derive(Debug, PartialEq, Deserialize)] 30 | pub struct Version { 31 | ver: String, 32 | rel: String, 33 | } 34 | 35 | #[derive(Debug, PartialEq, Deserialize)] 36 | pub struct Location { 37 | href: String, 38 | } 39 | 40 | #[derive(Debug, PartialEq, Deserialize)] 41 | pub struct RepoMd { 42 | data: Vec, 43 | } 44 | 45 | impl RepoMd { 46 | pub fn from_xml(xml: &str) -> Result { 47 | let xml = serde_xml_rs::from_str(xml)?; 48 | Ok(xml) 49 | } 50 | 51 | pub fn find_primary_location(&self) -> Result<&str> { 52 | let href = self 53 | .data 54 | .iter() 55 | .find(|e| e.data_type == "primary") 56 | .ok_or(Error::RpmMissingPrimary)? 57 | .location 58 | .href 59 | .as_str(); 60 | Ok(href) 61 | } 62 | } 63 | 64 | #[derive(Debug, PartialEq, Deserialize)] 65 | pub struct Data { 66 | #[serde(rename = "type")] 67 | data_type: String, 68 | location: Location, 69 | } 70 | 71 | pub async fn run(args: &args::SyncRpm) -> Result<()> { 72 | let db = db::Client::create().await?; 73 | let base_url = args.url.strip_suffix('/').unwrap_or(&args.url); 74 | let vendor = &args.vendor; 75 | 76 | let http = utils::http_client(None)?; 77 | 78 | let url = format!("{base_url}/repodata/repomd.xml"); 79 | info!("Downloading url: {url:?}"); 80 | let mut reader = http.fetch(&url).await?; 81 | 82 | let mut text = String::new(); 83 | reader.read_to_string(&mut text).await?; 84 | 85 | let repomd = RepoMd::from_xml(&text)?; 86 | let url = format!("{base_url}/{}", repomd.find_primary_location()?); 87 | 88 | info!("Downloading url: {url:?}"); 89 | let reader = http.fetch(&url).await?; 90 | let reader = io::BufReader::new(reader); 91 | let mut reader: Box = if url.ends_with(".zst") { 92 | Box::new(ZstdDecoder::new(reader)) 93 | } else { 94 | Box::new(GzipDecoder::new(reader)) 95 | }; 96 | 97 | let mut buf = String::new(); 98 | reader.read_to_string(&mut buf).await?; 99 | 100 | info!("Processing xml"); 101 | let md = Metadata::from_xml(&buf)?; 102 | for pkg in md.packages { 103 | let package = pkg.name; 104 | let version = format!("{}-{}", pkg.version.ver, pkg.version.rel); 105 | 106 | // mark all refs known for this package as "last_seen now" 107 | db.bump_named_refs(vendor, &package, &version).await?; 108 | 109 | if db.get_package(vendor, &package, &version).await?.is_some() { 110 | debug!("Package is already imported: vendor={vendor:?} package={package:?} version={version:?}"); 111 | continue; 112 | } 113 | 114 | let url = format!("{base_url}/{}", pkg.location.href); 115 | 116 | info!("package={package:?} version={version:?} url={url:?}"); 117 | db.insert_task(&db::Task::new( 118 | format!("source-rpm:{vendor}:{package}:{version}"), 119 | &db::TaskData::SourceRpm { 120 | vendor: vendor.to_string(), 121 | package: package.to_string(), 122 | version: version.to_string(), 123 | url, 124 | }, 125 | )?) 126 | .await?; 127 | } 128 | 129 | Ok(()) 130 | } 131 | 132 | #[cfg(test)] 133 | mod tests { 134 | use super::*; 135 | 136 | #[test] 137 | fn test_parse_primary_xml() { 138 | let data = r#" 139 | 140 | 141 | 0ad 142 | src 143 | 144 | 2368bc4da6effe91983f4136e651834cc3b547cecafaed3bf06bf2fcfdc53848 145 | Cross-Platform RTS Game of Ancient Warfare 146 | 0 A.D. (pronounced "zero ey-dee") is a free, open-source, cross-platform 147 | real-time strategy (RTS) game of ancient warfare. In short, it is a 148 | historically-based war/economy game that allows players to relive or rewrite 149 | the history of Western civilizations, focusing on the years between 500 B.C. 150 | and 500 A.D. The project is highly ambitious, involving state-of-the-art 3D 151 | graphics, detailed artwork, sound, and a flexible and powerful custom-built 152 | game engine. 153 | 154 | The game has been in development by Wildfire Games (WFG), a group of volunteer, 155 | hobbyist game developers, since 2001. 156 | Fedora Project 157 | http://play0ad.com 158 | 213 | 214 | "#; 215 | let md = Metadata::from_xml(data).unwrap(); 216 | assert_eq!( 217 | md, 218 | Metadata { 219 | packages: vec![Package { 220 | name: "0ad".to_string(), 221 | version: Version { 222 | ver: "0.0.26".to_string(), 223 | rel: "21.fc41".to_string(), 224 | }, 225 | location: Location { 226 | href: "Packages/0/0ad-0.0.26-21.fc41.src.rpm".to_string() 227 | }, 228 | }] 229 | } 230 | ); 231 | } 232 | 233 | #[test] 234 | fn test_parse_repomd() { 235 | let data = r#" 236 | 237 | 1712990641 238 | 239 | fa72c03d43e9ffe131633347045c0c56fbeacbd3281b2b03a6351f487218a158 240 | 259d84fce5ecb46226a21765561539eb992fff76356df088f9ed3d1d3d44cd28 241 | 242 | 1712990625 243 | 7587566 244 | 49907129 245 | 246 | 247 | caf9e9202dbd97fcf4da6ca3f228fd459505f0b17d37fb387240b03c8dc0e84a 248 | a35a9e10b149715434f405d3b5f3a895699d9a2939adb3435358337194bad323 249 | 250 | 1712990625 251 | 2013585 252 | 7783810 253 | 254 | 255 | "#; 256 | let md = RepoMd::from_xml(data).unwrap(); 257 | assert_eq!( 258 | md, 259 | RepoMd { 260 | data: vec![ 261 | Data { 262 | data_type: "primary".to_string(), 263 | location: Location { 264 | href: "repodata/fa72c03d43e9ffe131633347045c0c56fbeacbd3281b2b03a6351f487218a158-primary.xml.gz".to_string() 265 | } 266 | }, 267 | Data { 268 | data_type: "filelists".to_string(), 269 | location: Location { 270 | href: "repodata/caf9e9202dbd97fcf4da6ca3f228fd459505f0b17d37fb387240b03c8dc0e84a-filelists.xml.gz".to_string() 271 | } 272 | } 273 | ], 274 | } 275 | ); 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /src/sync/stagex.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db::{self, Task, TaskData}; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::GzipDecoder; 6 | use futures::StreamExt; 7 | use serde::Deserialize; 8 | use std::collections::BTreeMap; 9 | use tokio::io::{self, AsyncReadExt}; 10 | use tokio_tar::{Archive, EntryType}; 11 | 12 | #[derive(Debug, Deserialize)] 13 | pub struct Manifest { 14 | pub package: Package, 15 | #[serde(default)] 16 | pub sources: BTreeMap, 17 | } 18 | 19 | impl Manifest { 20 | pub fn parse(toml: &str) -> Result { 21 | let toml = toml::from_str(toml)?; 22 | Ok(toml) 23 | } 24 | 25 | fn optional_value(&self, key: &str, value: Option<&str>, source: &Source) -> Result { 26 | let Some(value) = value else { 27 | return Err(Error::StagexUndefinedVariable(key.to_string())); 28 | }; 29 | self.interpolate(value, source) 30 | } 31 | 32 | fn version(&self, source: &Source) -> Result { 33 | self.optional_value("version", self.package.version.as_deref(), source) 34 | } 35 | 36 | pub fn interpolate(&self, text: &str, source: &Source) -> Result { 37 | let mut current = text; 38 | let mut out = String::new(); 39 | loop { 40 | let Some((before, after)) = current.split_once('{') else { 41 | out.push_str(current); 42 | break; 43 | }; 44 | out.push_str(before); 45 | let Some((key, after)) = after.split_once('}') else { 46 | return Err(Error::StagexUnclosedInterpolate(text.to_string())); 47 | }; 48 | 49 | let value = match key { 50 | "version" => self.version(source)?, 51 | "format" => self.optional_value(key, source.format.as_deref(), source)?, 52 | "file" => self.optional_value(key, source.file.as_deref(), source)?, 53 | "version_dash" => { 54 | let version = self.version(source)?; 55 | version.replace('.', "-") 56 | } 57 | "version_under" => { 58 | let version = self.version(source)?; 59 | version.replace('.', "_") 60 | } 61 | "version_major" => { 62 | let version = self.version(source)?; 63 | version.split('.').next().unwrap_or("").to_string() 64 | } 65 | "version_major_minor" => { 66 | let version = self.version(source)?; 67 | let parts: Vec<&str> = version.split('.').collect(); 68 | if parts.len() >= 2 { 69 | format!("{}.{}", parts[0], parts[1]) 70 | } else { 71 | version.to_string() 72 | } 73 | } 74 | "version_strip_suffix" => { 75 | let version = self.version(source)?; 76 | version 77 | .rsplit_once('-') 78 | .map(|(x, _)| x) 79 | .unwrap_or(&version) 80 | .to_string() 81 | } 82 | _ => return Err(Error::StagexUndefinedVariable(key.to_string())), 83 | }; 84 | out.push_str(&value); 85 | 86 | current = after; 87 | } 88 | Ok(out) 89 | } 90 | 91 | pub fn resolve_refs(&self, vendor: &str) -> Result> { 92 | let Some(version) = &self.package.version else { 93 | return Ok(Vec::new()); 94 | }; 95 | self.sources 96 | .values() 97 | .map(|source| { 98 | let mirror = source 99 | .mirrors 100 | .first() 101 | .ok_or_else(|| Error::StagexMissingMirrors(source.clone()))?; 102 | 103 | Ok(db::Ref { 104 | chksum: format!("sha256:{}", source.hash), 105 | vendor: vendor.to_string(), 106 | package: self.package.name.to_string(), 107 | version: version.to_string(), 108 | filename: Some(self.interpolate(mirror, source)?), 109 | }) 110 | }) 111 | .collect() 112 | } 113 | } 114 | 115 | #[derive(Debug, Deserialize)] 116 | pub struct Package { 117 | pub name: String, 118 | pub version: Option, 119 | } 120 | 121 | #[derive(Debug, Deserialize, Clone)] 122 | pub struct Source { 123 | pub hash: String, 124 | pub format: Option, 125 | pub file: Option, 126 | pub mirrors: Vec, 127 | } 128 | 129 | pub async fn run(args: &args::SyncStagex) -> Result<()> { 130 | let db = db::Client::create().await?; 131 | let vendor = &args.vendor; 132 | 133 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 134 | let reader = io::BufReader::new(reader); 135 | let reader = GzipDecoder::new(reader); 136 | let mut tar = Archive::new(reader); 137 | 138 | let mut entries = tar.entries()?; 139 | while let Some(entry) = entries.next().await { 140 | let mut entry = entry?; 141 | let header = entry.header(); 142 | if header.entry_type() != EntryType::Regular { 143 | continue; 144 | } 145 | 146 | let path = entry.path()?; 147 | let Some(filename) = path.file_name() else { 148 | continue; 149 | }; 150 | if filename.to_str() != Some("package.toml") { 151 | continue; 152 | } 153 | 154 | info!("Found stagex package.toml: {path:?}"); 155 | 156 | let mut buf = String::new(); 157 | entry.read_to_string(&mut buf).await?; 158 | 159 | let manifest = Manifest::parse(&buf)?; 160 | debug!("Parsed stagex package.toml: {manifest:?}"); 161 | 162 | let refs = manifest.resolve_refs(vendor)?; 163 | for obj in &refs { 164 | let chksum = &obj.chksum; 165 | let Some(url) = &obj.filename else { 166 | continue; 167 | }; 168 | debug!("chksum={chksum:?} url={url:?}"); 169 | 170 | if !utils::is_possible_tar_artifact(url) { 171 | continue; 172 | } 173 | 174 | info!("insert: {obj:?}"); 175 | db.insert_ref(obj).await?; 176 | 177 | if db.resolve_artifact(chksum).await?.is_none() { 178 | info!("Adding download task: url={url:?}"); 179 | db.insert_task(&Task::new( 180 | format!("fetch:{url}"), 181 | &TaskData::FetchTar { 182 | url: url.to_string(), 183 | compression: None, 184 | success_ref: None, 185 | }, 186 | )?) 187 | .await?; 188 | } 189 | } 190 | } 191 | 192 | Ok(()) 193 | } 194 | 195 | #[cfg(test)] 196 | mod tests { 197 | use super::*; 198 | 199 | #[test] 200 | fn test_parse_binutils() { 201 | let data = r#" 202 | [package] 203 | name = "binutils" 204 | version = "2.43.1" 205 | description = "TODO" 206 | 207 | [sources.binutils] 208 | hash = "13f74202a3c4c51118b797a39ea4200d3f6cfbe224da6d1d95bb938480132dfd" 209 | format = "tar.xz" 210 | file = "binutils-{version}.{format}" 211 | mirrors = [ "https://ftp.gnu.org/gnu/binutils/{file}",] 212 | "#; 213 | let manifest = Manifest::parse(data).unwrap(); 214 | let refs = manifest.resolve_refs("stagex").unwrap(); 215 | assert_eq!( 216 | refs, 217 | &[db::Ref { 218 | chksum: "sha256:13f74202a3c4c51118b797a39ea4200d3f6cfbe224da6d1d95bb938480132dfd" 219 | .to_string(), 220 | vendor: "stagex".to_string(), 221 | package: "binutils".to_string(), 222 | version: "2.43.1".to_string(), 223 | filename: Some( 224 | "https://ftp.gnu.org/gnu/binutils/binutils-2.43.1.tar.xz".to_string() 225 | ) 226 | }] 227 | ); 228 | } 229 | 230 | #[test] 231 | fn test_parse_icu() { 232 | let data = r#" 233 | [package] 234 | name = "icu" 235 | version = "74.2" 236 | description = "TODO" 237 | 238 | [sources.icu] 239 | hash = "68db082212a96d6f53e35d60f47d38b962e9f9d207a74cfac78029ae8ff5e08c" 240 | mirrors = ["https://github.com/unicode-org/icu/releases/download/release-{version_dash}/icu4c-{version_under}-src.tgz",] 241 | 242 | [sources.icudata] 243 | hash = "c28c3ca5f4ba3384781797138a294ca360988d4322674ad4d51e52f5d9b0a2b6" 244 | mirrors = ["https://github.com/unicode-org/icu/releases/download/release-{version_dash}/icu4c-{version_under}-data.zip",] 245 | 246 | [sources.icudatab] 247 | hash = "42a12ebfb1a82f80bb0005d9b6e018382ccaa2462f0d086a8c69ae736fdded3e" 248 | mirrors = ["https://github.com/unicode-org/icu/releases/download/release-{version_dash}/icu4c-{version_under}-data-bin-b.zip",] 249 | 250 | [sources.icudatal] 251 | hash = "2acdb1b982228040963d183b2dd9d321252c613e0f4db213d4bbc10417cde569" 252 | mirrors = ["https://github.com/unicode-org/icu/releases/download/release-{version_dash}/icu4c-{version_under}-data-bin-l.zip",] 253 | "#; 254 | let manifest = Manifest::parse(data).unwrap(); 255 | let refs = manifest.resolve_refs("stagex").unwrap(); 256 | assert_eq!( 257 | refs, &[ 258 | db::Ref { 259 | chksum: "sha256:68db082212a96d6f53e35d60f47d38b962e9f9d207a74cfac78029ae8ff5e08c".to_string(), 260 | vendor: "stagex".to_string(), 261 | package: "icu".to_string(), 262 | version: "74.2".to_string(), 263 | filename: Some("https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-src.tgz".to_string()), 264 | }, 265 | db::Ref { 266 | chksum: "sha256:c28c3ca5f4ba3384781797138a294ca360988d4322674ad4d51e52f5d9b0a2b6".to_string(), 267 | vendor: "stagex".to_string(), 268 | package: "icu".to_string(), 269 | version: "74.2".to_string(), 270 | filename: Some("https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-data.zip".to_string()), 271 | }, 272 | db::Ref { 273 | chksum: "sha256:42a12ebfb1a82f80bb0005d9b6e018382ccaa2462f0d086a8c69ae736fdded3e".to_string(), 274 | vendor: "stagex".to_string(), 275 | package: "icu".to_string(), 276 | version: "74.2".to_string(), 277 | filename: Some("https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-data-bin-b.zip".to_string()), 278 | }, 279 | db::Ref { 280 | chksum: "sha256:2acdb1b982228040963d183b2dd9d321252c613e0f4db213d4bbc10417cde569".to_string(), 281 | vendor: "stagex".to_string(), 282 | package: "icu".to_string(), 283 | version: "74.2".to_string(), 284 | filename: Some("https://github.com/unicode-org/icu/releases/download/release-74-2/icu4c-74_2-data-bin-l.zip".to_string()), 285 | }, 286 | 287 | 288 | ] 289 | ); 290 | } 291 | 292 | #[test] 293 | fn test_parse_zip() { 294 | let data = r#" 295 | [package] 296 | name = "zip" 297 | version = "30" 298 | description = "TODO" 299 | 300 | [sources.zip] 301 | hash = "f0e8bb1f9b7eb0b01285495a2699df3a4b766784c1765a8f1aeedf63c0806369" 302 | format = "tar.gz" 303 | file = "zip-{version}.{format}" 304 | mirrors = [ "https://fossies.org/linux/misc/zip{version}.{format}",] 305 | "#; 306 | let manifest = Manifest::parse(data).unwrap(); 307 | let refs = manifest.resolve_refs("stagex").unwrap(); 308 | assert_eq!( 309 | refs, 310 | &[db::Ref { 311 | chksum: "sha256:f0e8bb1f9b7eb0b01285495a2699df3a4b766784c1765a8f1aeedf63c0806369" 312 | .to_string(), 313 | vendor: "stagex".to_string(), 314 | package: "zip".to_string(), 315 | version: "30".to_string(), 316 | filename: Some("https://fossies.org/linux/misc/zip30.tar.gz".to_string()), 317 | }] 318 | ); 319 | } 320 | } 321 | -------------------------------------------------------------------------------- /src/sync/void.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use async_compression::tokio::bufread::ZstdDecoder; 6 | use futures::StreamExt; 7 | use serde::{Deserialize, Serialize}; 8 | use std::collections::HashMap; 9 | use std::path::Path; 10 | use tokio::io::{self, AsyncReadExt}; 11 | use tokio_tar::{Archive, EntryType}; 12 | 13 | pub type PackageList = HashMap; 14 | 15 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 16 | #[serde(rename_all = "kebab-case")] 17 | pub struct Package { 18 | pkgver: String, 19 | source_revisions: String, 20 | } 21 | 22 | pub async fn run(args: &args::SyncVoid) -> Result<()> { 23 | let db = db::Client::create().await?; 24 | let vendor = &args.vendor; 25 | 26 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 27 | let reader = io::BufReader::new(reader); 28 | let reader = ZstdDecoder::new(reader); 29 | let mut tar = Archive::new(reader); 30 | 31 | let mut entries = tar.entries()?; 32 | while let Some(entry) = entries.next().await { 33 | let mut entry = entry?; 34 | { 35 | let header = entry.header(); 36 | if header.entry_type() != EntryType::Regular { 37 | continue; 38 | } 39 | if header.path()? != Path::new("index.plist") { 40 | continue; 41 | } 42 | } 43 | let mut buf = Vec::new(); 44 | entry.read_to_end(&mut buf).await?; 45 | 46 | let plist = plist::from_bytes::(&buf)?; 47 | for (pkgname, pkg) in plist { 48 | debug!("Found in plist: key={pkgname:?} pkg={pkg:?}"); 49 | let version = pkg 50 | .pkgver 51 | .strip_prefix(&pkgname) 52 | .unwrap() 53 | .strip_prefix('-') 54 | .unwrap(); 55 | let Some((srcpkg, commit)) = pkg.source_revisions.split_once(':') else { 56 | return Err(Error::InvalidData); 57 | }; 58 | 59 | // mark all refs known for this package as "last_seen now" 60 | db.bump_named_refs(vendor, &pkgname, version).await?; 61 | 62 | // check if package already imported 63 | if db.get_package(vendor, &pkgname, version).await?.is_some() { 64 | debug!("Package is already imported: srcpkg={srcpkg:?} commit={commit:?} package={pkgname:?} version={version:?}"); 65 | continue; 66 | } 67 | 68 | // queue for import 69 | info!("srcpkg={srcpkg:?} commit={commit:?} package={pkgname:?} version={version:?}"); 70 | db.insert_task(&db::Task::new( 71 | format!("void-linux-git:{srcpkg}:{commit}:{pkgname}:{version}"), 72 | &db::TaskData::VoidLinuxGit { 73 | vendor: vendor.to_string(), 74 | srcpkg: srcpkg.to_string(), 75 | commit: commit.to_string(), 76 | package: pkgname.to_string(), 77 | version: version.to_string(), 78 | }, 79 | )?) 80 | .await?; 81 | } 82 | } 83 | 84 | Ok(()) 85 | } 86 | -------------------------------------------------------------------------------- /src/sync/yocto.rs: -------------------------------------------------------------------------------- 1 | use crate::args; 2 | use crate::db; 3 | use crate::errors::*; 4 | use crate::utils; 5 | use crate::yocto; 6 | use async_compression::tokio::bufread::GzipDecoder; 7 | use futures::StreamExt; 8 | use std::path::Path; 9 | use tokio::io::{self, AsyncReadExt}; 10 | use tokio_tar::Archive; 11 | 12 | fn metadata_from_path(path: &Path) -> Option<(&str, &str)> { 13 | let path = path.to_str()?; 14 | let (_, path) = path.split_once("/recipes")?; 15 | let (_, path) = path.split_once('/')?; 16 | 17 | let (_parent, filename) = path.split_once('/')?; 18 | let release = filename.strip_suffix(".bb")?; 19 | 20 | let (package, version) = release.rsplit_once('_')?; 21 | 22 | Some((package, version)) 23 | } 24 | 25 | pub async fn run(args: &args::SyncYocto) -> Result<()> { 26 | let db = db::Client::create().await?; 27 | let vendor = &args.vendor; 28 | 29 | let reader = utils::fetch_or_open(&args.file, args.fetch).await?; 30 | let reader = io::BufReader::new(reader); 31 | let reader = GzipDecoder::new(reader); 32 | let mut tar = Archive::new(reader); 33 | 34 | let mut entries = tar.entries()?; 35 | let mut errors = 0; 36 | while let Some(entry) = entries.next().await { 37 | let mut entry = entry?; 38 | if !entry.header().entry_type().is_file() { 39 | continue; 40 | } 41 | 42 | let path = entry.path()?; 43 | let Some((package, version)) = metadata_from_path(&path) else { 44 | continue; 45 | }; 46 | 47 | let package = package.to_string(); 48 | let version = version.to_string(); 49 | 50 | let mut buf = String::new(); 51 | entry.read_to_string(&mut buf).await?; 52 | 53 | let pkg = match yocto::parse(&buf, Some(package.clone()), Some(version.clone())) { 54 | Ok(pkg) => pkg, 55 | Err(err) => { 56 | error!("Failed to parse package={package:?} version={version:?}: {err:#}"); 57 | errors += 1; 58 | continue; 59 | } 60 | }; 61 | 62 | let artifacts = match pkg.artifacts() { 63 | Ok(list) => list, 64 | Err(err) => { 65 | error!("Failed to parse package={package:?} version={version:?}: {err:#}"); 66 | errors += 1; 67 | continue; 68 | } 69 | }; 70 | 71 | for artifact in artifacts { 72 | let (chksum, url) = match (artifact.sha256, artifact.commit) { 73 | (Some(sha256), _) => (format!("sha256:{sha256}"), artifact.src), 74 | (_, Some(commit)) => ( 75 | format!("git:{commit}"), 76 | format!("{}#commit={commit}", artifact.src), 77 | ), 78 | _ => continue, 79 | }; 80 | 81 | let task = if db.resolve_artifact(&chksum).await?.is_none() { 82 | utils::task_for_url(&url) 83 | } else { 84 | None 85 | }; 86 | 87 | let r = db::Ref { 88 | chksum, 89 | vendor: vendor.to_string(), 90 | package: package.to_string(), 91 | version: version.to_string(), 92 | filename: Some(url), 93 | }; 94 | debug!("insert: {r:?}"); 95 | db.insert_ref(&r).await?; 96 | 97 | if let Some(task) = task { 98 | info!("Adding task: {task:?}"); 99 | db.insert_task(&task).await?; 100 | } 101 | } 102 | } 103 | 104 | if errors > 0 { 105 | warn!("Encounted {errors} errors while processing snapshot"); 106 | } 107 | 108 | Ok(()) 109 | } 110 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::db::{Task, TaskData}; 2 | use crate::errors::*; 3 | use futures::TryStreamExt; 4 | use std::time::Duration; 5 | use tokio::fs; 6 | use tokio::io::{self, AsyncRead}; 7 | use tokio_util::io::StreamReader; 8 | 9 | pub const CONNECT_TIMEOUT: Duration = Duration::from_secs(15); 10 | pub const READ_TIMEOUT: Duration = Duration::from_secs(60); 11 | // do not immediately give away who we are, version string is from Debian bookworm 12 | pub const USER_AGENT: &str = "curl/7.88.1"; 13 | 14 | pub fn http_client(socks5: Option<&String>) -> Result { 15 | let mut http = reqwest::ClientBuilder::new(); 16 | if let Some(socks5) = socks5 { 17 | http = http.proxy(reqwest::Proxy::all(socks5)?); 18 | } 19 | let http = http 20 | .user_agent(USER_AGENT) 21 | .connect_timeout(CONNECT_TIMEOUT) 22 | .read_timeout(READ_TIMEOUT) 23 | .build()?; 24 | Ok(HttpClient { reqwest: http }) 25 | } 26 | 27 | pub struct HttpClient { 28 | reqwest: reqwest::Client, 29 | } 30 | 31 | impl HttpClient { 32 | pub async fn fetch(&self, url: &str) -> Result> { 33 | let resp = self.reqwest.get(url).send().await?.error_for_status()?; 34 | let stream = resp.bytes_stream(); 35 | let stream = StreamReader::new(stream.map_err(|e| io::Error::new(io::ErrorKind::Other, e))); 36 | Ok(Box::new(stream)) 37 | } 38 | } 39 | 40 | pub async fn fetch_or_open(path: &str, should_fetch: bool) -> Result> { 41 | if should_fetch { 42 | http_client(None)?.fetch(path).await 43 | } else { 44 | let file = fs::File::open(path).await?; 45 | Ok(Box::new(file)) 46 | } 47 | } 48 | 49 | pub fn is_possible_tar_artifact(url: &str) -> bool { 50 | if !url.starts_with("https://") && !url.starts_with("http://") { 51 | false 52 | } else { 53 | url.contains(".tar") || url.ends_with(".crate") || url.ends_with(".tgz") 54 | } 55 | } 56 | 57 | pub fn task_for_url(url: &str) -> Option { 58 | match url.split_once("://") { 59 | Some(("https" | "http", _)) => { 60 | if is_possible_tar_artifact(url) { 61 | Task::new( 62 | format!("fetch:{url}"), 63 | &TaskData::FetchTar { 64 | url: url.to_string(), 65 | compression: None, 66 | success_ref: None, 67 | }, 68 | ) 69 | .ok() 70 | } else { 71 | None 72 | } 73 | } 74 | Some((schema, _)) if schema.starts_with("git+") => { 75 | debug!("Found git remote: {url:?}"); 76 | Task::new( 77 | format!("git-clone:{url}"), 78 | &TaskData::GitSnapshot { 79 | url: url.to_string(), 80 | }, 81 | ) 82 | .ok() 83 | } 84 | _ => None, 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/void_template.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use std::collections::HashMap; 3 | use yash_syntax::syntax::{self, Unquote, Value}; 4 | 5 | /// Variables we keep track of for interpolation but nothing else 6 | const TRACKED_VARIABLES: &[&str] = &["_pkgname", "_pkgver", "_gitrev", "_commit", "url"]; 7 | 8 | #[derive(Debug, Default, PartialEq)] 9 | pub struct Template { 10 | pub pkgname: Option, 11 | pub version: Option, 12 | pub extra: HashMap<&'static str, String>, 13 | 14 | pub distfiles: Vec, 15 | pub checksum: Vec, 16 | } 17 | 18 | impl Template { 19 | pub fn resolve_vars(&self, mut text: &str) -> Result { 20 | let mut out = String::new(); 21 | 22 | 'outer: while !text.is_empty() { 23 | if let Some((before, after)) = text.split_once('$') { 24 | let vars = [ 25 | ("pkgname", self.pkgname.as_deref()), 26 | ("version", self.version.as_deref()), 27 | // https://github.com/void-linux/void-packages/blob/master/common/environment/setup/misc.sh 28 | ( 29 | "SOURCEFORGE_SITE", 30 | Some("https://downloads.sourceforge.net/sourceforge"), 31 | ), 32 | ( 33 | "NONGNU_SITE", 34 | Some("https://download.savannah.nongnu.org/releases"), 35 | ), 36 | ("UBUNTU_SITE", Some("http://archive.ubuntu.com/ubuntu/pool")), 37 | ("XORG_SITE", Some("https://www.x.org/releases/individual")), 38 | ("DEBIAN_SITE", Some("https://ftp.debian.org/debian/pool")), 39 | ("GNOME_SITE", Some("https://download.gnome.org/sources")), 40 | ("KERNEL_SITE", Some("https://www.kernel.org/pub/linux")), 41 | ("CPAN_SITE", Some("https://www.cpan.org/modules/by-module")), 42 | ( 43 | "PYPI_SITE", 44 | Some("https://files.pythonhosted.org/packages/source"), 45 | ), 46 | ("MOZILLA_SITE", Some("https://ftp.mozilla.org/pub")), 47 | ("GNU_SITE", Some("https://ftp.gnu.org/gnu")), 48 | ("FREEDESKTOP_SITE", Some("https://freedesktop.org/software")), 49 | ("KDE_SITE", Some("https://download.kde.org/stable")), 50 | ( 51 | "VIDEOLAN_SITE", 52 | Some("https://download.videolan.org/pub/videolan"), 53 | ), 54 | ] 55 | .into_iter() 56 | .chain( 57 | self.extra 58 | .iter() 59 | .map(|(key, value)| (*key, Some(value.as_str()))), 60 | ); 61 | 62 | out.push_str(before); 63 | let (after, curly) = after 64 | .strip_prefix('{') 65 | .map(|x| (x, true)) 66 | .unwrap_or((after, false)); 67 | 68 | for (name, value) in vars { 69 | if let Some(after) = after.strip_prefix(name) { 70 | let Some(value) = value else { 71 | return Err(Error::UnknownVariable(name.to_string())); 72 | }; 73 | out.push_str(value); 74 | text = if curly { 75 | after.strip_prefix('}').ok_or_else(|| { 76 | Error::InvalidPkgbuild("Missing closing }".to_string()) 77 | })? 78 | } else { 79 | after 80 | }; 81 | continue 'outer; 82 | } 83 | } 84 | 85 | return Err(Error::UnknownVariable(after.to_string())); 86 | } else { 87 | out.push_str(text); 88 | break; 89 | } 90 | } 91 | 92 | Ok(out) 93 | } 94 | 95 | pub fn register_var(&mut self, key: &'static str, value: String) { 96 | self.extra.insert(key, value); 97 | } 98 | } 99 | 100 | pub fn parse(script: &str) -> Result