├── .cargo └── config.toml ├── .gitattributes ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── scripts │ └── package.sh └── workflows │ └── ci.yml ├── .gitignore ├── .mergify.yml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── deny.toml ├── release.toml ├── src ├── backends.rs ├── backends │ ├── blob.rs │ ├── blob │ │ ├── vendor.rs │ │ └── vendor │ │ │ ├── download.rs │ │ │ ├── insert.rs │ │ │ ├── list.rs │ │ │ └── properties.rs │ ├── fs.rs │ ├── gcs.rs │ └── s3.rs ├── cargo.rs ├── cmds │ ├── main.rs │ ├── mirror.rs │ └── sync.rs ├── fetch.rs ├── git.rs ├── lib.rs ├── mirror.rs ├── sync.rs └── util.rs └── tests ├── diff_cargo.rs ├── full ├── Cargo.lock ├── Cargo.toml └── src │ └── lib.rs ├── lock.rs ├── multi_one.lock ├── multi_two.lock ├── pretty-crate.txt ├── sync_crates_io.rs ├── sync_git.rs ├── tutil.rs ├── unpretty-wasi.txt ├── v2.lock └── v3.lock /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | # add the below section to `.cargo/config.toml` 2 | 3 | [target.'cfg(all())'] 4 | rustflags = [ 5 | # BEGIN - Embark standard lints v6 for Rust 1.55+ 6 | # do not change or add/remove here, but one can add exceptions after this section 7 | # for more info see: 8 | "-Dunsafe_code", 9 | "-Wclippy::all", 10 | "-Wclippy::await_holding_lock", 11 | "-Wclippy::char_lit_as_u8", 12 | "-Wclippy::checked_conversions", 13 | "-Wclippy::dbg_macro", 14 | "-Wclippy::debug_assert_with_mut_call", 15 | "-Wclippy::doc_markdown", 16 | "-Wclippy::empty_enum", 17 | "-Wclippy::enum_glob_use", 18 | "-Wclippy::exit", 19 | "-Wclippy::expl_impl_clone_on_copy", 20 | "-Wclippy::explicit_deref_methods", 21 | "-Wclippy::explicit_into_iter_loop", 22 | "-Wclippy::fallible_impl_from", 23 | "-Wclippy::filter_map_next", 24 | "-Wclippy::flat_map_option", 25 | "-Wclippy::float_cmp_const", 26 | "-Wclippy::fn_params_excessive_bools", 27 | "-Wclippy::from_iter_instead_of_collect", 28 | "-Wclippy::if_let_mutex", 29 | "-Wclippy::implicit_clone", 30 | "-Wclippy::imprecise_flops", 31 | "-Wclippy::inefficient_to_string", 32 | "-Wclippy::invalid_upcast_comparisons", 33 | "-Wclippy::large_digit_groups", 34 | "-Wclippy::large_stack_arrays", 35 | "-Wclippy::large_types_passed_by_value", 36 | "-Wclippy::let_unit_value", 37 | "-Wclippy::linkedlist", 38 | "-Wclippy::lossy_float_literal", 39 | "-Wclippy::macro_use_imports", 40 | "-Wclippy::manual_ok_or", 41 | "-Wclippy::map_err_ignore", 42 | "-Wclippy::map_flatten", 43 | "-Wclippy::map_unwrap_or", 44 | "-Wclippy::match_on_vec_items", 45 | "-Wclippy::match_same_arms", 46 | "-Wclippy::match_wild_err_arm", 47 | "-Wclippy::match_wildcard_for_single_variants", 48 | "-Wclippy::mem_forget", 49 | "-Wclippy::mismatched_target_os", 50 | "-Wclippy::missing_enforced_import_renames", 51 | "-Wclippy::mut_mut", 52 | "-Wclippy::mutex_integer", 53 | "-Wclippy::needless_borrow", 54 | "-Wclippy::needless_continue", 55 | "-Wclippy::needless_for_each", 56 | "-Wclippy::option_option", 57 | "-Wclippy::path_buf_push_overwrite", 58 | "-Wclippy::ptr_as_ptr", 59 | "-Wclippy::rc_mutex", 60 | "-Wclippy::ref_option_ref", 61 | "-Wclippy::rest_pat_in_fully_bound_structs", 62 | "-Wclippy::same_functions_in_if_condition", 63 | "-Wclippy::semicolon_if_nothing_returned", 64 | "-Wclippy::single_match_else", 65 | "-Wclippy::string_add_assign", 66 | "-Wclippy::string_add", 67 | "-Wclippy::string_lit_as_bytes", 68 | "-Wclippy::string_to_string", 69 | "-Wclippy::todo", 70 | "-Wclippy::trait_duplication_in_bounds", 71 | "-Wclippy::unimplemented", 72 | "-Wclippy::unnested_or_patterns", 73 | "-Wclippy::unused_self", 74 | "-Wclippy::useless_transmute", 75 | "-Wclippy::verbose_file_reads", 76 | "-Wclippy::zero_sized_map_values", 77 | "-Wfuture_incompatible", 78 | "-Wnonstandard_style", 79 | "-Wrust_2018_idioms", 80 | # END - Embark standard lints v6 for Rust 1.55+ 81 | ] 82 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.txt text eol=lf 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @Jake-Shadle 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Device:** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/scripts/package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eu 3 | 4 | # When run in a container, the ownership will be messed up, so mark the 5 | # checkout dir as safe regardless of our env 6 | git config --global --add safe.directory "$GITHUB_WORKSPACE" 7 | 8 | # Normally we'll only do this on tags, but add --always to fallback to the revision 9 | # if we're iterating or the like 10 | tag=$(git describe --tags --abbrev=0 --always) 11 | release_name="$NAME-$tag-$TARGET" 12 | release_tar="${release_name}.tar.gz" 13 | mkdir "$release_name" 14 | 15 | if [[ "$TARGET" =~ windows ]]; then 16 | bin="$NAME.exe" 17 | else 18 | bin="$NAME" 19 | fi 20 | 21 | cp "target/$TARGET/release/$bin" "$release_name/" 22 | cp README.md LICENSE-APACHE LICENSE-MIT "$release_name/" 23 | tar czf "$release_tar" "$release_name" 24 | 25 | rm -r "$release_name" 26 | 27 | # Windows environments in github actions don't have the gnu coreutils installed, 28 | # which includes the shasum exe, so we just use powershell instead 29 | if [[ "$TARGET" =~ windows ]]; then 30 | echo "(Get-FileHash \"${release_tar}\" -Algorithm SHA256).Hash | Out-File -Encoding ASCII -NoNewline \"${release_tar}.sha256\"" | pwsh -c - 31 | else 32 | echo -n "$(shasum -ba 256 "${release_tar}" | cut -d " " -f 1)" > "${release_tar}.sha256" 33 | fi 34 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - main 6 | tags: 7 | - "*" 8 | pull_request: 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | lint: 16 | name: Lint 17 | runs-on: ubuntu-22.04 18 | steps: 19 | - uses: actions/checkout@v3 20 | - uses: dtolnay/rust-toolchain@stable 21 | with: 22 | components: "clippy, rustfmt" 23 | - uses: Swatinem/rust-cache@v2 24 | 25 | # make sure all code has been formatted with rustfmt 26 | - name: check rustfmt 27 | run: cargo fmt -- --check --color always 28 | 29 | # run clippy to verify we have no warnings 30 | - run: cargo fetch 31 | - name: cargo clippy 32 | run: cargo clippy --all-targets --all-features -- -D warnings 33 | 34 | test: 35 | name: Test 36 | strategy: 37 | matrix: 38 | os: [ubuntu-22.04, macOS-12] 39 | protocol: ["sparse", "git"] 40 | runs-on: ${{ matrix.os }} 41 | env: 42 | CARGO_FETCHER_CRATES_IO_PROTOCOL: ${{ matrix.protocol }} 43 | steps: 44 | - uses: actions/checkout@v3 45 | - uses: dtolnay/rust-toolchain@stable 46 | - uses: Swatinem/rust-cache@v2 47 | - run: cargo fetch 48 | - name: cargo test build 49 | run: cargo build --tests --features=gcs 50 | - name: cargo test 51 | run: cargo test --features=gcs 52 | 53 | deny-check: 54 | name: cargo-deny check 55 | runs-on: ubuntu-22.04 56 | steps: 57 | - uses: actions/checkout@v3 58 | - uses: EmbarkStudios/cargo-deny-action@v1 59 | 60 | publish-check: 61 | name: Publish Check 62 | runs-on: ubuntu-22.04 63 | container: ghcr.io/cross-rs/aarch64-unknown-linux-musl:edge 64 | strategy: 65 | matrix: 66 | include: 67 | - target: aarch64-unknown-linux-musl 68 | steps: 69 | - uses: actions/checkout@v3 70 | - uses: dtolnay/rust-toolchain@stable 71 | with: 72 | target: ${{ matrix.target }} 73 | - uses: Swatinem/rust-cache@v2 74 | - run: cargo fetch 75 | - name: cargo publish 76 | run: cargo publish --dry-run --all-features --target ${{ matrix.target }} 77 | 78 | release: 79 | name: Release 80 | if: startsWith(github.ref, 'refs/tags/') 81 | strategy: 82 | matrix: 83 | include: 84 | - os: ubuntu-22.04 85 | target: x86_64-unknown-linux-musl 86 | - os: ubuntu-22.04 87 | target: aarch64-unknown-linux-musl 88 | container: ghcr.io/cross-rs/aarch64-unknown-linux-musl:edge 89 | - os: windows-2022 90 | target: x86_64-pc-windows-msvc 91 | - os: macOS-12 92 | target: x86_64-apple-darwin 93 | - os: macOS-12 94 | target: aarch64-apple-darwin 95 | runs-on: ${{ matrix.os }} 96 | container: ${{ matrix.container }} 97 | steps: 98 | - uses: dtolnay/rust-toolchain@stable 99 | with: 100 | target: ${{ matrix.target }} 101 | - name: Install musl tools 102 | if: matrix.target == 'x86_64-unknown-linux-musl' 103 | run: | 104 | sudo apt install -y musl-tools 105 | - name: Checkout 106 | uses: actions/checkout@v3 107 | - name: cargo fetch 108 | run: cargo fetch --target ${{ matrix.target }} 109 | - name: Release build 110 | run: cargo build --release --target ${{ matrix.target }} --features=gcs,s3,blob 111 | - name: Package 112 | shell: bash 113 | env: 114 | NAME: cargo-fetcher 115 | TARGET: ${{ matrix.target }} 116 | run: .github/scripts/package.sh 117 | - name: Publish 118 | uses: softprops/action-gh-release@v1 119 | with: 120 | draft: true 121 | files: "cargo-fetcher*" 122 | env: 123 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 124 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | tests/full/target -------------------------------------------------------------------------------- /.mergify.yml: -------------------------------------------------------------------------------- 1 | pull_request_rules: 2 | - name: automatic merge when CI passes and 1 reviews 3 | conditions: 4 | - "#approved-reviews-by>=1" 5 | - "#review-requested=0" 6 | - "#changes-requested-reviews-by=0" 7 | - "#commented-reviews-by=0" 8 | - base=main 9 | - label!=work-in-progress 10 | actions: 11 | merge: 12 | method: squash 13 | - name: delete head branch after merge 14 | conditions: [] 15 | actions: 16 | delete_head_branch: {} 17 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Changelog 4 | All notable changes to this project will be documented in this file. 5 | 6 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 7 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 8 | 9 | 10 | ## [Unreleased] - ReleaseDate 11 | ## [0.14.6] - 2023-08-23 12 | ### Changed 13 | - [PR#187](https://github.com/EmbarkStudios/cargo-fetcher/pull/187) updated dependencies, including the fix for [RUSTSEC-2023-0053](https://rustsec.org/advisories/RUSTSEC-2023-0053.html), not that this crate was particularly affected by it. 14 | 15 | ## [0.14.5] - 2023-08-22 16 | ### Fixed 17 | - [PR#187](https://github.com/EmbarkStudios/cargo-fetcher/pull/187) resolved [#186](https://github.com/EmbarkStudios/cargo-fetcher/issues/186) by actually doing the correct thing when parsing configs and setting up registry sources. 18 | 19 | ## [0.14.4] - 2023-08-16 20 | ### Fixed 21 | - [PR#185](https://github.com/EmbarkStudios/cargo-fetcher/pull/185) significantly improved the speed of the sync subcommand. 22 | 23 | ## [0.14.3] - 2023-08-15 24 | ### Fixed 25 | - [PR#184](https://github.com/EmbarkStudios/cargo-fetcher/pull/184) fixed submodule checkout even better this time. For real this time. 26 | 27 | ## [0.14.2] - 2023-08-15 28 | ### Fixed 29 | - [PR#183](https://github.com/EmbarkStudios/cargo-fetcher/pull/183) fixed an issue where submodule `HEAD`s did not reflect the actual checkout, causing cargo to resync them even though it didn't need to. 30 | 31 | ## [0.14.1] - 2023-08-15 32 | ### Fixed 33 | - [PR#182](https://github.com/EmbarkStudios/cargo-fetcher/pull/182) fixed an issue where non-github.com urls ending in `.git` were not properly synced to disk. 34 | 35 | ## [0.14.0] - 2023-08-11 36 | ### Added 37 | - [PR#178](https://github.com/EmbarkStudios/cargo-fetcher/pull/174) resolved [#177](https://github.com/EmbarkStudios/cargo-fetcher/issues/177) by adding support for sparse indices. This was further improved in [PR#180](https://github.com/EmbarkStudios/cargo-fetcher/pull/180) by using `tame-index` for registry index operations. 38 | 39 | ### Changed 40 | - [PR#180](https://github.com/EmbarkStudios/cargo-fetcher/pull/180) introduced 2 major refactors. `tame-index` is now used to fetch index metadata as well as several related helper functions, shrinking this codebase a bit. `git2` has been replaced by `gix`, completely removing both it and openssl from the dependency graph. 41 | - [PR#181](https://github.com/EmbarkStudios/cargo-fetcher/pull/181) made changes to asyncify the code, giving good speedups in `mirror` operations, but (at the moment) slightly worse timings for `sync`. This will hopefully be fixed in a later patch. 42 | 43 | ## [0.13.1] - 2023-01-10 44 | ### Changed 45 | - [PR#174](https://github.com/EmbarkStudios/cargo-fetcher/pull/174) made it so that git sources can now be specified however the user likes instead of just supporting the `rev` specifier, as the exact revision is now acquired via the fragment in the source url instead. 46 | 47 | ### Added 48 | - [PR#174](https://github.com/EmbarkStudios/cargo-fetcher/pull/174) added release binaries for `aarch64-unknown-linux-musl`. 49 | 50 | ## [0.13.0] - 2022-05-25 51 | ### Added 52 | - [PR#172](https://github.com/EmbarkStudios/cargo-fetcher/pull/172) added the `--timeout | CARGO_FETCHER_TIMEOUT` option, allowing control over how long each individual HTTP request is allowed to take. Defaults to 30 seconds, which is the same default timeout as `reqwest`. 53 | 54 | ### Changed 55 | - [PR#172](https://github.com/EmbarkStudios/cargo-fetcher/pull/172) split git packages (bare clones and checkouts) and registry packages and downloads them in parallel. In my local tests this reduced overall wall time as typically git packages are an order of magnitude or more larger than a registry package, so splitting them allows the git packages to take up threads and I/O slots earlier, and registry packages can then fill in the remaining capacity. In addition, the git bare clone and checkout for each crate are now downloaded in parallel, as previously the checkout download would wait until the bare clone was downloaded before doing the disk splat, but this was wasteful. 56 | - [PR#172](https://github.com/EmbarkStudios/cargo-fetcher/pull/172) updated dependencies. 57 | 58 | ## [0.12.1] - 2022-02-28 59 | ### Added 60 | - [PR#171](https://github.com/EmbarkStudios/cargo-fetcher/pull/171) added EC2 credential sourcing from IMDS for the `s3` backend, allowing for easier configuration when running in AWS. Thanks [@jelmansouri](https://github.com/jelmansouri)! 61 | 62 | ## [0.12.0] - 2022-02-03 63 | ### Changed 64 | - [PR#168](https://github.com/EmbarkStudios/cargo-fetcher/pull/168) updated all dependencies. 65 | - [PR#168](https://github.com/EmbarkStudios/cargo-fetcher/pull/168) removed all usage of async/await in favor of blocking HTTP requests and rayon parallelization. This seems to have resulted in noticeable speed ups depending on the size of your workload. 66 | - [PR#168](https://github.com/EmbarkStudios/cargo-fetcher/pull/168) replaced usage of `structopt` with `clap`. 67 | - [PR#168](https://github.com/EmbarkStudios/cargo-fetcher/pull/168) removed all usage of the unmaintained `chrono` with `time`. 68 | - [PR#168](https://github.com/EmbarkStudios/cargo-fetcher/pull/168) temporarily vendored `bloblock` for Azure blob storage to reduce duplicate dependencies. 69 | 70 | ## [0.11.0] - 2021-07-22 71 | ### Changed 72 | - [PR#161](https://github.com/EmbarkStudios/cargo-fetcher/pull/161) replaced the bloated auto-generated crates for rusoto with much leaner [`rusty-s3`](https://crates.io/crates/rusty-s3) crate. Thanks [@m0ssc0de](https://github.com/m0ssc0de)! 73 | - [PR#166](https://github.com/EmbarkStudios/cargo-fetcher/pull/166) replaced the bloated auto-generated crates for the azure SDK with the much leaner [`bloblock`](https://crates.io/crates/bloblock) crate. Thanks [@m0ssc0de](https://github.com/m0ssc0de)! 74 | 75 | ## [0.10.0] - 2020-12-14 76 | ### Added 77 | - [PR#131](https://github.com/EmbarkStudios/cargo-fetcher/pull/131) and [PR#151](https://github.com/EmbarkStudios/cargo-fetcher/pull/150) added support for registries other than crates.io, resolving [#118](https://github.com/EmbarkStudios/cargo-fetcher/issues/118). Thanks [@m0ssc0de](https://github.com/m0ssc0de)! 78 | - [PR#152](https://github.com/EmbarkStudios/cargo-fetcher/pull/152) added support for creating `.cache` entries when mirroring/syncing registry indices, resolving [#16](https://github.com/EmbarkStudios/cargo-fetcher/issues/16) and [#117](https://github.com/EmbarkStudios/cargo-fetcher/issues/117). 79 | - [PR#154](https://github.com/EmbarkStudios/cargo-fetcher/pull/154) added support for mirroring and syncing git submodules, which was the final missing piece for having "perfect" copying of cargo's behavior when fetching crates and registries, resolving [#141](https://github.com/EmbarkStudios/cargo-fetcher/issues/141). 80 | 81 | ## [0.9.0] - 2020-07-28 82 | ### Added 83 | - [PR#109](https://github.com/EmbarkStudios/cargo-fetcher/pull/109) added support for Azure Blob storage, under the `blob` feature flag. Thanks [@m0ssc0de](https://github.com/m0ssc0de)! 84 | 85 | ## [0.8.0] - 2020-06-05 86 | ### Added 87 | - [PR#92](https://github.com/EmbarkStudios/cargo-fetcher/pull/92) added support for a local filesystem backend. Thanks [@cosmicexplorer](https://github.com/cosmicexplorer)! 88 | 89 | ## [0.7.0] - 2020-02-21 90 | ### Added 91 | - Cargo's v2 Cargo.lock format is now supported, in addition to the v1 format. 92 | 93 | ### Changed 94 | - Async (almost) all the things! 95 | - Replaced log/env_logger with [tracing](https://github.com/tokio-rs/tracing) 96 | 97 | ## [0.6.1] - 2019-11-14 98 | ### Fixed 99 | - Fetch registry index instead of pull 100 | 101 | ## [0.6.0] - 2019-11-14 102 | ### Added 103 | - Added support for S3 storage behind the `s3` feature 104 | - Integration tests using s3 via minio are now run in CI 105 | - Git dependencies are now checked out to the git/checkouts folder 106 | - Git dependencies now also recursively download submodules 107 | 108 | ### Changed 109 | - Updated dependencies 110 | - Place all GCS specific code/dependencies behind a `gcs` feature 111 | - The url for the storage location is now supplied via `-u | --url` 112 | 113 | ### Fixed 114 | - Replaced `failure` with `anyhow` 115 | - Fixed issue where **all** crates were synced every time due to pruning and removing duplicates only to then completely ignore them and use the original crate list :facepalm: 116 | - Fixed issue where crates.io packages were being unpacked with an extra parent directory 117 | 118 | ## [0.5.1] - 2019-10-27 119 | ### Fixed 120 | - Allow using as `cargo fetcher` instead of only `cargo-fetcher` 121 | 122 | ## [0.5.0] - 2019-10-26 123 | ### Added 124 | - Validate crate checksums after download 125 | 126 | ### Fixed 127 | - Ensure duplicates are only downloaded once eg. same git source for multiple crates 128 | 129 | ## [0.4.1] - 2019-10-25 130 | ### Added 131 | - Add support for only updating the registry index after it hasn't been updated 132 | for a user specified amount of time, rather than always 133 | 134 | ## [0.4.0] - 2019-10-25 135 | ### Added 136 | - Add support for retrieving and uploading the crates.io index 137 | 138 | ## [0.3.0] - 2019-10-25 139 | ### Added 140 | - Add support for unpacking compressed crate tarballs into registry/src 141 | 142 | ## [0.2.0] - 2019-07-24 143 | ### Added 144 | - Add crate retrieval and uploading for `git` sources 145 | 146 | ## [0.1.1] - 2019-07-23 147 | ### Fixed 148 | - Travis config 149 | 150 | ## [0.1.0] - 2019-07-23 151 | ### Added 152 | - Initial add of `cargo-fetcher` 153 | 154 | 155 | [Unreleased]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.6...HEAD 156 | [0.14.6]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.5...0.14.6 157 | [0.14.5]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.4...0.14.5 158 | [0.14.4]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.3...0.14.4 159 | [0.14.3]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.2...0.14.3 160 | [0.14.2]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.1...0.14.2 161 | [0.14.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.14.0...0.14.1 162 | [0.14.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.13.1...0.14.0 163 | [0.13.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.13.0...0.13.1 164 | [0.13.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.12.1...0.13.0 165 | [0.12.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.12.0...0.12.1 166 | [0.12.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.11.0...0.12.0 167 | [0.11.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.10.0...0.11.0 168 | [0.10.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.9.0...0.10.0 169 | [0.9.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.8.0...0.9.0 170 | [0.8.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.7.0...0.8.0 171 | [0.7.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.6.1...0.7.0 172 | [0.6.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.6.0...0.6.1 173 | [0.6.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.5.1...0.6.0 174 | [0.5.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.5.0...0.5.1 175 | [0.5.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.4.1...0.5.0 176 | [0.4.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.4.0...0.4.1 177 | [0.4.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.3.0...0.4.0 178 | [0.3.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.2.0...0.3.0 179 | [0.2.0]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.1.1...0.2.0 180 | [0.1.1]: https://github.com/EmbarkStudios/cargo-fetcher/compare/0.1.0...0.1.1 181 | [0.1.0]: https://github.com/EmbarkStudios/cargo-fetcher/releases/tag/0.1.0 182 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | opensource@embark-studios.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 126 | at [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | [translations]: https://www.contributor-covenant.org/translations 133 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Embark Contributor Guidelines 2 | 3 | Welcome! This project is created by the team at [Embark Studios](https://embark.games). We're glad you're interested in contributing! We welcome contributions from people of all backgrounds who are interested in making great software with us. 4 | 5 | At Embark, we aspire to empower everyone to create interactive experiences. To do this, we're exploring and pushing the boundaries of new technologies, and sharing our learnings with the open source community. 6 | 7 | If you have any difficulties getting involved or finding answers to your questions, please don't hesitate to ask your questions in our [Discord server](https://discord.com/invite/8TW9nfF). 8 | 9 | If you have ideas for collaboration, email us at opensource@embark-studios.com. 10 | 11 | We're also hiring full-time engineers to work with us in Stockholm! Check out our current job postings [here](https://www.embark-studios.com/jobs). 12 | 13 | ## Issues 14 | 15 | ### Feature Requests 16 | 17 | If you have ideas or how to improve our projects, you can suggest features by opening a GitHub issue. Make sure to include details about the feature or change, and describe any uses cases it would enable. 18 | 19 | Feature requests will be tagged as `enhancement` and their status will be updated in the comments of the issue. 20 | 21 | ### Bugs 22 | 23 | When reporting a bug or unexpected behaviour in a project, make sure your issue descibes steps to reproduce the behaviour, including the platform you were using, what steps you took, and any error messages. 24 | 25 | Reproducible bugs will be tagged as `bug` and their status will be updated in the comments of the issue. 26 | 27 | ### Wontfix 28 | 29 | Issues will be closed and tagged as `wontfix` if we decide that we do not wish to implement it, usually due to being misaligned with the project vision or out of scope. We will comment on the issue with more detailed reasoning. 30 | 31 | ## Contribution Workflow 32 | 33 | ### Open Issues 34 | 35 | If you're ready to contribute, start by looking at our open issues tagged as [`help wanted`](../../issues?q=is%3Aopen+is%3Aissue+label%3A"help+wanted") or [`good first issue`](../../issues?q=is%3Aopen+is%3Aissue+label%3A"good+first+issue"). 36 | 37 | You can comment on the issue to let others know you're interested in working on it or to ask questions. 38 | 39 | ### Making Changes 40 | 41 | 1. Fork the repository. 42 | 43 | 2. Create a new feature branch. 44 | 45 | 3. Make your changes. Ensure that there are no build errors by running the project with your changes locally. 46 | 47 | 4. Open a pull request with a name and description of what you did. You can read more about working with pull requests on GitHub [here](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork). 48 | 49 | 5. A maintainer will review your pull request and may ask you to make changes. 50 | 51 | ## Licensing 52 | 53 | Unless otherwise specified, all Embark open source projects are licensed under a dual MIT OR Apache-2.0 license, allowing licensees to chose either at their option. You can read more in each project's respective README. 54 | 55 | ## Code of Conduct 56 | 57 | Please note that our projects are released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md) to ensure that they are welcoming places for everyone to contribute. By participating in any Embark open source project, you agree to abide by these terms. 58 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cargo-fetcher" 3 | version = "0.14.6" 4 | authors = [ 5 | "Embark ", 6 | "Jake Shadle ", 7 | ] 8 | edition = "2021" 9 | description = "🎁 Alternative to cargo fetch" 10 | license = "MIT OR Apache-2.0" 11 | documentation = "https://docs.rs/cargo-fetcher" 12 | homepage = "https://github.com/EmbarkStudios/cargo-fetcher" 13 | repository = "https://github.com/EmbarkStudios/cargo-fetcher" 14 | keywords = ["fetch", "gcs", "s3"] 15 | categories = ["development-tools::cargo-plugins"] 16 | readme = "README.md" 17 | 18 | [[bin]] 19 | name = "cargo-fetcher" 20 | path = "src/cmds/main.rs" 21 | 22 | [features] 23 | default = [] 24 | gcs = ["tame-gcs", "tame-oauth"] 25 | s3 = ["rusty-s3"] 26 | blob = ["base64", "quick-xml"] 27 | 28 | [profile.release] 29 | strip = "debuginfo" 30 | 31 | [dependencies] 32 | # Ergonomic error handling 33 | anyhow = "1.0" 34 | async-scoped = { version = "0.7", features = ["use-tokio"] } 35 | async-trait = "0.1" 36 | base64 = { version = "0.21", optional = true } 37 | bytes = "1.0" 38 | camino = "1.1" 39 | clap = { version = "4.0", features = ["derive", "env"] } 40 | crossbeam-channel = "0.5" 41 | flate2 = { version = "1.0", default-features = false, features = [ 42 | "rust_backend", 43 | ] } 44 | home = "0.5" 45 | http = "0.2" 46 | rayon = "1.5" 47 | remove_dir_all = "0.8" 48 | reqwest = { version = "0.11", default-features = false, features = [ 49 | "rustls-tls", 50 | # We could do this manually, but..meh 51 | "gzip", 52 | ] } 53 | ring = "0.17" 54 | rusty-s3 = { version = "0.5", optional = true } 55 | serde = { version = "1.0", features = ["derive", "rc"] } 56 | quick-xml = { version = "0.30", features = ["serialize"], optional = true } 57 | tame-gcs = { version = "0.12", optional = true } 58 | tame-index = { version = "0.8", features = ["git", "sparse"] } 59 | tame-oauth = { version = "0.9", features = ["gcp"], optional = true } 60 | tar = "0.4" 61 | tempfile = "3.1" 62 | time = { version = "0.3", features = ["formatting", "macros"] } 63 | toml = "0.8" 64 | tracing = "0.1" 65 | tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } 66 | url = { version = "2.2", features = ["serde"] } 67 | walkdir = "2.3" 68 | zstd = "0.13" 69 | 70 | [dependencies.gix] 71 | version = "0.55" 72 | features = [ 73 | "max-performance-safe", 74 | "blocking-network-client", 75 | "blocking-http-transport-reqwest", 76 | "reqwest-for-configuration-only", 77 | ] 78 | 79 | [dependencies.tokio] 80 | version = "1.4" 81 | features = ["rt-multi-thread", "macros"] 82 | 83 | [target.'cfg(unix)'.dependencies] 84 | libc = "0.2" 85 | 86 | [dev-dependencies] 87 | similar-asserts = "1.2" 88 | twox-hash = { version = "1.6", default-features = false } 89 | walkdir = "2.3" 90 | 91 | [[test]] 92 | name = "sync_crates_io" 93 | path = "tests/sync_crates_io.rs" 94 | 95 | [[test]] 96 | name = "sync_git" 97 | path = "tests/sync_git.rs" 98 | 99 | [[test]] 100 | name = "diff_cargo" 101 | path = "tests/diff_cargo.rs" 102 | 103 | [profile.dev.build-override] 104 | opt-level = 0 105 | 106 | [profile.dev.package."*"] 107 | opt-level = 3 108 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Embark Studios 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # `🎁 cargo-fetcher` 4 | 5 | [![Embark](https://img.shields.io/badge/embark-open%20source-blueviolet.svg)](https://embark.dev) 6 | [![Embark](https://img.shields.io/badge/discord-ark-%237289da.svg?logo=discord)](https://discord.gg/dAuKfZS) 7 | [![Crates.io](https://img.shields.io/crates/v/cargo-fetcher.svg)](https://crates.io/crates/cargo-fetcher) 8 | [![Docs](https://docs.rs/cargo-fetcher/badge.svg)](https://docs.rs/cargo-fetcher) 9 | [![dependency status](https://deps.rs/repo/github/EmbarkStudios/cargo-fetcher/status.svg)](https://deps.rs/repo/github/EmbarkStudios/cargo-fetcher) 10 | [![Build Status](https://github.com/EmbarkStudios/cargo-fetcher/workflows/CI/badge.svg)](https://github.com/EmbarkStudios/cargo-fetcher/actions?workflow=CI) 11 | 12 | Alternative to `cargo fetch` for use in CI or other "clean" environments that you want to quickly bootstrap with the necessary crates to compile/test etc your project(s). 13 | 14 |
15 | 16 | ## Why? 17 | 18 | * You run many CI jobs in clean and/or containerized environments and you want to quickly fetch cargo registries and crates so that you can spend your compute resources on actually compiling and testing the code, rather than downloading dependencies. 19 | 20 | ## Why not? 21 | 22 | * Other than the `fs` storage backend, the only supported backends are the 3 major cloud storage backends, as it is generally beneficial to store crate and registry information in the same cloud as you are running your CI jobs to take advantage of locality and I/O throughput. 23 | * `cargo-fetcher` should not be used in a typical user environment as it completely disregards various safety mechanisms that are built into cargo, such as file-based locking. 24 | * `cargo-fetcher` assumes it is running in an environment with high network throughput and low latency. 25 | 26 | ## Supported Storage Backends 27 | 28 | ### `gcs` 29 | 30 | The `gcs` feature enables the use of [Google Cloud Storage](https://cloud.google.com/storage/) as a backend. 31 | 32 | * Must provide a url to the `-u | --url` parameter with the [gsutil](https://cloud.google.com/storage/docs/gsutil#syntax) syntax `gs://(/)?` 33 | * Must provide [GCP service account](https://cloud.google.com/iam/docs/service-accounts) credentials either with `--credentials` or via the `GOOGLE_APPLICATION_CREDENTIALS` environment variable 34 | 35 | ### `s3` 36 | 37 | The `s3` feature enables the use of [Amazon S3](https://aws.amazon.com/s3/) as a backend. 38 | 39 | * Must provide a url to the `-u | --url` parameter, it must of the form `http(s)?://.s3(-).(/)?` 40 | * Must provide AWS IAM user via the environment `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` described [here](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html) or run from an ec2 instance with an assumed role as described [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html). 41 | 42 | ### `fs` 43 | 44 | The `fs` feature enables use of a folder on a local disk to store crates to and fetch crates from. 45 | 46 | * Must provide a url to the `-u | --url` parameter with the `file:` scheme 47 | 48 | ### `blob` 49 | 50 | The `blob` feature enables the use of [Azure Blob storage](https://azure.microsoft.com/services/storage/blobs/) as a backend. 51 | 52 | * Must provide a url to the `-u | --url` parameter, it must of the form `blob://(/)?` 53 | * Must provide [Azure Storage Account](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-overview) via the environment variables `STORAGE_ACCOUNT` and `STORAGE_MASTER_KEY` described [here](https://docs.microsoft.com/azure/storage/common/storage-account-keys-manage?tabs=azure-portal). 54 | 55 | ## Examples 56 | 57 | This is an example from our CI for an internal project. 58 | 59 | ### Dependencies 60 | 61 | * 424 crates.io crates: cached - 38MB, unpacked - 214MB 62 | * 13 crates source from 10 git repositories: db - 27MB, checked out - 38MB 63 | 64 | ### Scenario 65 | 66 | The following CI jobs are run in parallel, each in a Kubernetes Job running on GKE. The container base is roughly the same as the official [rust](https://hub.docker.com/_/rust):1.39.0-slim image. 67 | 68 | * Build modules for WASM 69 | * Build modules for native 70 | * Build host client for native 71 | 72 | ~ wait for all jobs to finish ~ 73 | 74 | * Run the tests for both the WASM and native modules from the host client 75 | 76 | ### Before 77 | 78 | All 3 build jobs take around **1m2s** each to do `cargo fetch --target x86_64-unknown-linux-gnu` 79 | 80 | ### After 81 | 82 | All 3 build jobs take **3-4s** each to do `cargo fetcher --include-index sync`. 83 | 84 | ## Usage 85 | 86 | `cargo-fetcher` has only 2 subcommands. Both of them share a set of options, the important inputs for each backend are described in [Storage Backends](#supported-storage-backends). 87 | 88 | In addition to the backend specifics, the only required optional is the path to the `Cargo.lock` lockfile that you are operating on. `cargo-fetcher` requires a lockfile, as otherwise the normal cargo work of generating a lockfile requires having a full registry index locally, which partially defeats the point of this tool. 89 | 90 | ```text 91 | -l, --lock-file 92 | Path to the lockfile used for determining what crates to operate on [default: Cargo.lock] 93 | ``` 94 | 95 | ### `mirror` 96 | 97 | The `mirror` subcommand does the work of downloading crates and registry indexes from their original locations and re-uploading them to your storage backend. 98 | 99 | It does have one additional option however, to determine how often it should take snapshots of the registry index(es). 100 | 101 | ```text 102 | -m, --max-stale 103 | The duration for which the index will not be replaced after its most recent update. 104 | 105 | Times may be specified with no suffix (default days), or one of: 106 | * (s)econds 107 | * (m)inutes 108 | * (h)ours 109 | * (d)ays 110 | ``` 111 | 112 | ### Custom registries 113 | 114 | One wrinkle with mirroring is the presence of custom registries. To handle these, `cargo fetcher` uses the same logic that cargo uses to locate `.cargo/config<.toml>` config files to detect custom registries, however, cargo's config files only contain the metadata needed to fetch and publish to the registry, but the url template for where to download crates from is actually present in a `config.json` file in the root of the registry itself. 115 | 116 | Rather than wait for a registry index to be downloaded each time before fetching any crates sourced that registry, `cargo-fetcher` instead allows you to specify the download location yourself via an environment variable, that way it can fully parallelize the fetching of registry indices and crates. 117 | 118 | #### Example 119 | 120 | ```ini 121 | # .cargo/config.toml 122 | 123 | [registries] 124 | embark = { index = "" } 125 | ``` 126 | 127 | The environment variable is of the form `CARGO_FETCHER__DL` where name is the same name (upper-cased) of the registry in the configuration file. 128 | 129 | ```sh 130 | CARGO_FETCHER_EMBARK_DL="https://secret/rust/cargo/{crate}-{version}.crate" cargo fetcher mirror 131 | ``` 132 | 133 | The [format](https://doc.rust-lang.org/cargo/reference/registries.html#index-format) of the URL should be the same as the one in your registry's `config.json` file, if this environment variable is not specified for your registry, the default of `/{crate}/{version}/download` is just appended to the url of the registry. 134 | 135 | ### `sync` 136 | 137 | The `sync` subcommand is the actual replacement for `cargo fetch`, except instead of downloading crates and registries from their normal location, it downloads them from your storage backend, and splats them to disk in the same way that cargo does, so that cargo won't have to do any actual work before it can start building code. 138 | 139 | ## Contributing 140 | 141 | [![Contributor Covenant](https://img.shields.io/badge/contributor%20covenant-v1.4-ff69b4.svg)](../CODE_OF_CONDUCT.md) 142 | 143 | We welcome community contributions to this project. 144 | 145 | Please read our [Contributor Guide](CONTRIBUTING.md) for more information on how to get started. 146 | 147 | ## License 148 | 149 | Licensed under either of 150 | 151 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ) 152 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or ) 153 | 154 | at your option. 155 | 156 | ### Contribution 157 | 158 | Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. 159 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | targets = [ 2 | { triple = "x86_64-unknown-linux-gnu" }, 3 | { triple = "aarch64-unknown-linux-gnu" }, 4 | { triple = "aarch64-unknown-linux-musl" }, 5 | { triple = "x86_64-unknown-linux-musl" }, 6 | { triple = "x86_64-apple-darwin" }, 7 | { triple = "aarch64-apple-darwin" }, 8 | { triple = "x86_64-pc-windows-msvc" }, 9 | ] 10 | all-features = true 11 | 12 | [advisories] 13 | unmaintained = "deny" 14 | unsound = "deny" 15 | ignore = [] 16 | 17 | [bans] 18 | multiple-versions = "deny" 19 | deny = [{ name = "git2" }, { name = "openssl" }] 20 | skip = [ 21 | # nix 22 | { name = "bitflags", version = "=1.3.2" }, 23 | # tracing-subscriber and similar-asserts 24 | { name = "regex-syntax", version = "=0.6.29" }, 25 | # hyper/tokio 26 | { name = "socket2", version = "=0.4.10" }, 27 | # only a couple left on old 1.0 version 28 | { name = "syn", version = "=1.0.109" }, 29 | ] 30 | skip-tree = [ 31 | # similar 32 | { name = "bstr", version = "=0.2.17" }, 33 | # ugh 34 | { name = "windows-sys" }, 35 | # gix has a several duplicates I don't feel like dealing with atm 36 | { name = "gix" }, 37 | ] 38 | 39 | [licenses] 40 | unlicensed = "deny" 41 | # We want really high confidence when inferring licenses from text 42 | confidence-threshold = 0.92 43 | allow = [ 44 | "Apache-2.0", 45 | "BSD-2-Clause", 46 | "BSD-3-Clause", 47 | "ISC", 48 | "MIT", 49 | "MPL-2.0", 50 | "OpenSSL", 51 | "Zlib", 52 | "Unicode-DFS-2016", 53 | ] 54 | 55 | [[licenses.clarify]] 56 | name = "ring" 57 | # SPDX considers OpenSSL to encompass both the OpenSSL and SSLeay licenses 58 | # https://spdx.org/licenses/OpenSSL.html 59 | # ISC - Both BoringSSL and ring use this for their new files 60 | # MIT - "Files in third_party/ have their own licenses, as described therein. The MIT 61 | # license, for third_party/fiat, which, unlike other third_party directories, is 62 | # compiled into non-test libraries, is included below." 63 | # OpenSSL - Obviously 64 | expression = "ISC AND MIT AND OpenSSL" 65 | license-files = [{ path = "LICENSE", hash = 0xbd0eed23 }] 66 | 67 | [[licenses.clarify]] 68 | name = "webpki" 69 | expression = "ISC" 70 | license-files = [{ path = "LICENSE", hash = 0x001c7e6c }] 71 | 72 | [[licenses.clarify]] 73 | name = "rustls-webpki" 74 | expression = "ISC" 75 | license-files = [{ path = "LICENSE", hash = 0x001c7e6c }] 76 | 77 | [[licenses.clarify]] 78 | name = "encoding_rs" 79 | expression = "(Apache-2.0 OR MIT) AND BSD-3-Clause" 80 | license-files = [{ path = "COPYRIGHT", hash = 0x39f8ad31 }] 81 | -------------------------------------------------------------------------------- /release.toml: -------------------------------------------------------------------------------- 1 | pre-release-commit-message = "Release {{version}}" 2 | tag-message = "Release {{version}}" 3 | tag-name = "{{version}}" 4 | pre-release-replacements = [ 5 | { file = "CHANGELOG.md", search = "Unreleased", replace = "{{version}}" }, 6 | { file = "CHANGELOG.md", search = "\\.\\.\\.HEAD", replace = "...{{tag_name}}" }, 7 | { file = "CHANGELOG.md", search = "ReleaseDate", replace = "{{date}}" }, 8 | { file = "CHANGELOG.md", search = "", replace = "\n## [Unreleased] - ReleaseDate" }, 9 | { file = "CHANGELOG.md", search = "", replace = "\n[Unreleased]: https://github.com/EmbarkStudios/cargo-fetcher/compare/{{tag_name}}...HEAD" }, 10 | ] 11 | -------------------------------------------------------------------------------- /src/backends.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "gcs")] 2 | pub mod gcs; 3 | 4 | #[cfg(feature = "s3")] 5 | pub mod s3; 6 | 7 | pub mod fs; 8 | 9 | #[cfg(feature = "blob")] 10 | pub mod blob; 11 | -------------------------------------------------------------------------------- /src/backends/blob.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | util::{self, send_request_with_retry}, 3 | CloudId, HttpClient, 4 | }; 5 | use anyhow::{Context as _, Result}; 6 | use bytes::Bytes; 7 | 8 | mod vendor; 9 | use vendor as blob; 10 | 11 | #[derive(Debug)] 12 | pub struct BlobBackend { 13 | prefix: String, 14 | instance: blob::Blob, 15 | client: HttpClient, 16 | } 17 | 18 | impl BlobBackend { 19 | pub fn new(loc: crate::BlobLocation<'_>, timeout: std::time::Duration) -> Result { 20 | let account = 21 | std::env::var("STORAGE_ACCOUNT").context("Set env variable STORAGE_ACCOUNT first!")?; 22 | let master_key = std::env::var("STORAGE_MASTER_KEY") 23 | .context("Set env variable STORAGE_MASTER_KEY first!")?; 24 | 25 | let instance = blob::Blob::new(&account, &master_key, loc.container, false); 26 | let client = HttpClient::builder() 27 | .use_rustls_tls() 28 | .timeout(timeout) 29 | .build()?; 30 | 31 | Ok(Self { 32 | prefix: loc.prefix.to_owned(), 33 | instance, 34 | client, 35 | }) 36 | } 37 | 38 | #[inline] 39 | fn make_key(&self, id: CloudId<'_>) -> String { 40 | format!("{}{id}", self.prefix) 41 | } 42 | } 43 | 44 | const FMT: &[time::format_description::FormatItem<'_>] = time::macros::format_description!( 45 | "[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second] GMT" 46 | ); 47 | 48 | #[inline] 49 | fn utc_now_to_str() -> String { 50 | time::OffsetDateTime::now_utc().format(&FMT).unwrap() 51 | } 52 | 53 | #[async_trait::async_trait] 54 | impl crate::Backend for BlobBackend { 55 | async fn fetch(&self, id: CloudId<'_>) -> Result { 56 | let dl_req = self 57 | .instance 58 | .download(&self.make_key(id), &utc_now_to_str())?; 59 | 60 | let res = send_request_with_retry(&self.client, util::convert_request(dl_req)) 61 | .await? 62 | .error_for_status()?; 63 | 64 | Ok(res.bytes().await?) 65 | } 66 | 67 | async fn upload(&self, source: Bytes, id: CloudId<'_>) -> Result { 68 | let content_len = source.len() as u64; 69 | let insert_req = self 70 | .instance 71 | .insert(&self.make_key(id), source, &utc_now_to_str())?; 72 | 73 | send_request_with_retry(&self.client, insert_req.try_into()?) 74 | .await? 75 | .error_for_status()?; 76 | 77 | Ok(content_len as usize) 78 | } 79 | 80 | async fn list(&self) -> Result> { 81 | let list_req = self.instance.list(&utc_now_to_str())?; 82 | 83 | let response = send_request_with_retry(&self.client, util::convert_request(list_req)) 84 | .await? 85 | .error_for_status()?; 86 | 87 | let resp_body = response 88 | .text() 89 | .await 90 | .context("failed to get list response")?; 91 | let resp_body = resp_body.trim_start_matches('\u{feff}'); 92 | let resp = blob::parse_list_body(resp_body)?; 93 | let a = resp 94 | .blobs 95 | .blob 96 | .into_iter() 97 | .map(|b| b.name) 98 | .collect::>(); 99 | Ok(a) 100 | } 101 | 102 | async fn updated(&self, id: CloudId<'_>) -> Result> { 103 | let request = self 104 | .instance 105 | .properties(&self.make_key(id), &utc_now_to_str())?; 106 | 107 | let response = send_request_with_retry(&self.client, util::convert_request(request)) 108 | .await? 109 | .error_for_status()?; 110 | 111 | let properties = 112 | blob::PropertiesResponse::try_from(util::convert_response(response).await?)?; 113 | 114 | // Ensure the offset is UTC, the azure datetime format is truly terrible 115 | let last_modified = crate::Timestamp::parse(&properties.last_modified, &FMT)? 116 | .replace_offset(time::UtcOffset::UTC); 117 | 118 | Ok(Some(last_modified)) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/backends/blob/vendor.rs: -------------------------------------------------------------------------------- 1 | mod download; 2 | mod insert; 3 | mod list; 4 | mod properties; 5 | 6 | use anyhow::Error; 7 | use std::fmt; 8 | 9 | pub use list::parse_list_body; 10 | pub use list::EnumerationResults; 11 | 12 | pub struct PropertiesResponse { 13 | pub last_modified: String, 14 | } 15 | 16 | #[derive(Debug)] 17 | pub struct Blob { 18 | account: String, 19 | key: String, 20 | container: String, 21 | version_value: String, 22 | azurite: bool, 23 | } 24 | 25 | impl Blob { 26 | pub fn new(account: &str, key: &str, container: &str, azurite: bool) -> Self { 27 | Self { 28 | account: account.to_owned(), 29 | key: key.to_owned(), 30 | container: container.to_owned(), 31 | version_value: String::from("2015-02-21"), 32 | azurite, 33 | } 34 | } 35 | 36 | fn container_uri(&self) -> String { 37 | if self.azurite { 38 | format!("http://127.0.0.1:10000/{}/{}", self.account, self.container) 39 | } else { 40 | format!( 41 | "https://{}.blob.core.windows.net/{}", 42 | self.account, self.container 43 | ) 44 | } 45 | } 46 | 47 | fn sign( 48 | &self, 49 | action: &Actions, 50 | path: &str, 51 | time_str: &str, 52 | content_length: usize, 53 | ) -> Result { 54 | let string_to_sign = prepare_to_sign( 55 | &self.account, 56 | path, 57 | action, 58 | time_str, 59 | content_length, 60 | &self.version_value, 61 | ); 62 | 63 | hmacsha256(&self.key, &string_to_sign) 64 | } 65 | } 66 | 67 | impl fmt::Display for Blob { 68 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 69 | write!(f, "Blob: {self:#?}") 70 | } 71 | } 72 | 73 | enum Actions { 74 | Download, 75 | Insert, 76 | Properties, 77 | List, 78 | } 79 | 80 | impl From<&Actions> for http::Method { 81 | fn from(action: &Actions) -> Self { 82 | match action { 83 | Actions::Download | Actions::List => http::Method::GET, 84 | Actions::Insert => http::Method::PUT, 85 | Actions::Properties => http::Method::HEAD, 86 | } 87 | } 88 | } 89 | 90 | pub fn hmacsha256(key: &str, string_to_sign: &str) -> Result { 91 | use base64::{engine::general_purpose::STANDARD, Engine as _}; 92 | use ring::hmac; 93 | 94 | let key_bytes = STANDARD.decode(key)?; 95 | 96 | let key = hmac::Key::new(hmac::HMAC_SHA256, &key_bytes); 97 | let tag = hmac::sign(&key, string_to_sign.as_bytes()); 98 | 99 | Ok(STANDARD.encode(tag.as_ref())) 100 | } 101 | 102 | fn prepare_to_sign( 103 | account: &str, 104 | path: &str, 105 | action: &Actions, 106 | time_str: &str, 107 | content_length: usize, 108 | version_value: &str, 109 | ) -> String { 110 | { 111 | let content_encoding = ""; 112 | let content_language = ""; 113 | let content_length = { 114 | if content_length == 0 { 115 | String::from("") 116 | } else { 117 | content_length.to_string() 118 | } 119 | }; 120 | let content_md5 = ""; 121 | let content_type = ""; 122 | let date = ""; 123 | let if_modified_since = ""; 124 | let if_match = ""; 125 | let if_none_match = ""; 126 | let if_unmodified_since = ""; 127 | let range = ""; 128 | let canonicalized_headers = if matches!(action, Actions::Properties) { 129 | format!("x-ms-date:{time_str}\nx-ms-version:{version_value}") 130 | } else { 131 | format!("x-ms-blob-type:BlockBlob\nx-ms-date:{time_str}\nx-ms-version:{version_value}") 132 | }; 133 | // let canonicalized_headers = 134 | // format!("x-ms-date:{}\nx-ms-version:{}", time_str, version_value); 135 | let verb = http::Method::from(action).to_string(); 136 | let canonicalized_resource = if matches!(action, Actions::List) { 137 | format!("/{account}{path}\ncomp:list\nrestype:container") 138 | } else { 139 | format!("/{account}{path}") 140 | }; 141 | format!( 142 | "{verb}\n{content_encoding}\n{content_language}\n{content_length}\n{content_md5}\n{content_type}\n{date}\n{if_modified_since}\n{if_match}\n{if_none_match}\n{if_unmodified_since}\n{range}\n{canonicalized_headers}\n{canonicalized_resource}" 143 | ) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/backends/blob/vendor/download.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Error}; 2 | use http::HeaderValue; 3 | use http::Uri; 4 | use std::str::FromStr; 5 | 6 | impl super::Blob { 7 | pub fn download( 8 | &self, 9 | file_name: &str, 10 | timefmt: &str, 11 | ) -> Result, Error> { 12 | let action = super::Actions::Download; 13 | let now = timefmt; 14 | 15 | let mut req_builder = http::Request::builder(); 16 | let mut uri = self.container_uri(); 17 | uri.push('/'); 18 | uri.push_str(file_name); 19 | let sign = self.sign(&action, Uri::from_str(&uri)?.path(), timefmt, 0); 20 | let formatedkey = format!("SharedKey {}:{}", &self.account, sign?,); 21 | let hm = req_builder.headers_mut().context("context")?; 22 | hm.insert("Authorization", HeaderValue::from_str(&formatedkey)?); 23 | hm.insert("x-ms-date", HeaderValue::from_str(now)?); 24 | hm.insert("x-ms-version", HeaderValue::from_str(&self.version_value)?); 25 | hm.insert("x-ms-blob-type", HeaderValue::from_str("BlockBlob")?); 26 | let request = req_builder 27 | .method(http::Method::from(&action)) 28 | .uri(uri) 29 | .body(std::io::empty())?; 30 | Ok(request) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/backends/blob/vendor/insert.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | 3 | use anyhow::{Context, Error}; 4 | use http::HeaderValue; 5 | use http::Uri; 6 | 7 | impl super::Blob { 8 | pub fn insert( 9 | &self, 10 | file_name: &str, 11 | source: bytes::Bytes, 12 | timefmt: &str, 13 | ) -> Result, Error> { 14 | let action = super::Actions::Insert; 15 | let now = timefmt; 16 | 17 | let mut uri = self.container_uri(); 18 | uri.push('/'); 19 | uri.push_str(file_name); 20 | let sign = self.sign(&action, Uri::from_str(&uri)?.path(), timefmt, source.len()); 21 | let formatedkey = format!("SharedKey {}:{}", self.account, sign?); 22 | let mut req_builder = http::Request::builder(); 23 | let hm = req_builder.headers_mut().context("context")?; 24 | hm.insert("Authorization", HeaderValue::from_str(&formatedkey)?); 25 | hm.insert("x-ms-date", HeaderValue::from_str(now)?); 26 | hm.insert("x-ms-version", HeaderValue::from_str(&self.version_value)?); 27 | hm.insert("x-ms-blob-type", HeaderValue::from_str("BlockBlob")?); 28 | let request = req_builder 29 | .method(http::Method::from(&action)) 30 | .uri(uri) 31 | .body(source)?; 32 | Ok(request) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/backends/blob/vendor/list.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Context, Error}; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | impl super::Blob { 5 | pub fn list(&self, timefmt: &str) -> Result, Error> { 6 | let action = super::Actions::List; 7 | let now = timefmt; 8 | 9 | let mut req_builder = http::Request::builder(); 10 | let mut uri = self.container_uri(); 11 | uri.push_str("?restype=container&comp=list"); 12 | let uri: http::Uri = uri.parse()?; 13 | 14 | let sign = self.sign(&action, uri.path(), timefmt, 0); 15 | let formatedkey = format!( 16 | "SharedKey {}:{}", 17 | &self.account, 18 | sign?, 19 | // self.sign(&action, Uri::from_str(&uri)?.path(), timefmt, 0)? 20 | ); 21 | let hm = req_builder.headers_mut().context("context")?; 22 | hm.insert("Authorization", formatedkey.parse()?); 23 | hm.insert("x-ms-date", now.parse()?); 24 | hm.insert("x-ms-version", self.version_value.parse()?); 25 | hm.insert( 26 | "x-ms-blob-type", 27 | http::HeaderValue::from_static("BlockBlob"), 28 | ); 29 | let request = req_builder 30 | .method(http::Method::from(&action)) 31 | .uri(uri) 32 | .body(std::io::empty())?; 33 | Ok(request) 34 | } 35 | } 36 | 37 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 38 | pub struct EnumerationResults { 39 | #[serde(rename = "Blobs")] 40 | pub blobs: Blobs, 41 | } 42 | 43 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 44 | pub struct Blobs { 45 | #[serde(rename = "Blob")] 46 | pub blob: Vec, 47 | } 48 | 49 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 50 | pub struct Blob { 51 | // #[serde(rename(serialize = "Name", deserialize = "Name"))] 52 | #[serde(rename = "Name")] 53 | pub name: String, 54 | // #[serde(rename(serialize = "Properties", deserialize = "Properties"))] 55 | #[serde(rename = "Properties")] 56 | pub properties: Properties, 57 | } 58 | 59 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 60 | pub struct Properties { 61 | // #[serde(rename(serialize = "Last-Modified", deserialize = "Last-Modified"))] 62 | #[serde(rename = "Last-Modified")] 63 | pub last_modified: String, 64 | // #[serde(rename(serialize = "Content-Length", deserialize = "Content-Length"))] 65 | #[serde(rename = "Content-Length")] 66 | pub content_length: usize, 67 | // #[serde(rename(serialize = "Content-MD5", deserialize = "Content-MD5"))] 68 | #[serde(rename = "Content-MD5")] 69 | pub content_md5: String, 70 | } 71 | 72 | pub fn parse_list_body(s: &str) -> Result { 73 | match quick_xml::de::from_str(s) { 74 | Ok(d) => Ok(d), 75 | Err(e) => Err(anyhow!("failed to parse list action body. {}", e)), 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/backends/blob/vendor/properties.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Error}; 2 | use http::HeaderValue; 3 | use http::Uri; 4 | use std::str::FromStr; 5 | 6 | impl TryFrom> for super::PropertiesResponse { 7 | type Error = Error; 8 | fn try_from(response: http::Response) -> Result { 9 | Ok(Self { 10 | last_modified: response 11 | .headers() 12 | .get("Last-Modified") 13 | .context("failed to read Last-Modified in headers")? 14 | .to_str()? 15 | .to_owned(), 16 | }) 17 | } 18 | } 19 | 20 | impl super::Blob { 21 | pub fn properties( 22 | &self, 23 | file_name: &str, 24 | timefmt: &str, 25 | ) -> Result, Error> { 26 | let action = super::Actions::Properties; 27 | let now = timefmt; 28 | 29 | let mut req_builder = http::Request::builder(); 30 | let mut uri = self.container_uri(); 31 | uri.push('/'); 32 | uri.push_str(file_name); 33 | let sign = self.sign( 34 | &super::Actions::Properties, 35 | Uri::from_str(&uri)?.path(), 36 | timefmt, 37 | 0, 38 | ); 39 | let formatedkey = format!( 40 | "SharedKey {}:{}", 41 | &self.account, 42 | sign?, 43 | // self.sign(&super::Actions::Properties, file_name, timefmt, 0)? 44 | ); 45 | let hm = req_builder.headers_mut().context("context")?; 46 | hm.insert("Authorization", HeaderValue::from_str(&formatedkey)?); 47 | hm.insert("x-ms-date", HeaderValue::from_str(now)?); 48 | hm.insert("x-ms-version", HeaderValue::from_str(&self.version_value)?); 49 | let request = req_builder 50 | .method(http::Method::from(&action)) 51 | .uri(uri) 52 | .body(std::io::empty())?; 53 | Ok(request) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/backends/fs.rs: -------------------------------------------------------------------------------- 1 | use crate::{CloudId, PathBuf}; 2 | use anyhow::Result; 3 | use bytes::Bytes; 4 | use std::fs; 5 | 6 | #[derive(Debug)] 7 | pub struct FsBackend { 8 | path: PathBuf, 9 | } 10 | 11 | impl FsBackend { 12 | pub fn new(loc: crate::FilesystemLocation<'_>) -> Result { 13 | let crate::FilesystemLocation { path } = loc; 14 | 15 | if !path.exists() { 16 | fs::create_dir_all(path)?; 17 | } 18 | 19 | Ok(Self { 20 | path: path.to_owned(), 21 | }) 22 | } 23 | 24 | #[inline] 25 | fn make_path(&self, id: CloudId<'_>) -> PathBuf { 26 | self.path.join(id.to_string()) 27 | } 28 | } 29 | 30 | #[async_trait::async_trait] 31 | impl crate::Backend for FsBackend { 32 | async fn fetch(&self, id: CloudId<'_>) -> Result { 33 | let path = self.make_path(id); 34 | let buf = fs::read(path)?; 35 | Ok(buf.into()) 36 | } 37 | 38 | async fn upload(&self, source: Bytes, id: CloudId<'_>) -> Result { 39 | let path = self.make_path(id); 40 | fs::write(path, &source)?; 41 | Ok(source.len()) 42 | } 43 | 44 | async fn list(&self) -> Result> { 45 | let entries = fs::read_dir(&self.path)? 46 | .filter_map(|entry| { 47 | let entry = entry.ok()?; 48 | entry.file_type().ok().filter(|ft| ft.is_file())?; 49 | entry.file_name().into_string().ok() 50 | }) 51 | .collect(); 52 | 53 | Ok(entries) 54 | } 55 | 56 | async fn updated(&self, id: CloudId<'_>) -> Result> { 57 | let path = self.make_path(id); 58 | 59 | if !path.exists() { 60 | return Ok(None); 61 | } 62 | 63 | let metadata = fs::metadata(&path)?; 64 | let modified = metadata.modified()?.into(); 65 | 66 | Ok(Some(modified)) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/backends/gcs.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | util::{self, send_request_with_retry}, 3 | CloudId, HttpClient, Path, 4 | }; 5 | use anyhow::{Context as _, Result}; 6 | use tame_gcs::{objects::Object, BucketName, ObjectName}; 7 | use tracing::debug; 8 | 9 | async fn acquire_gcs_token(cred_path: &Path) -> Result { 10 | // If we're not completing whatever task in under an hour then we 11 | // have more problems than the token expiring 12 | use tame_oauth::gcp::{self, TokenProvider}; 13 | 14 | #[cfg(feature = "gcs")] 15 | debug!("using credentials in {cred_path}"); 16 | 17 | let svc_account_info = 18 | gcp::ServiceAccountInfo::deserialize(std::fs::read_to_string(cred_path)?) 19 | .context("failed to deserilize service account")?; 20 | let svc_account_access = gcp::ServiceAccountProvider::new(svc_account_info)?; 21 | 22 | let token = match svc_account_access.get_token(&[tame_gcs::Scopes::ReadWrite])? { 23 | gcp::TokenOrRequest::Request { 24 | request, 25 | scope_hash, 26 | .. 27 | } => { 28 | let client = HttpClient::new(); 29 | let res = client.execute(request.try_into().unwrap()).await?; 30 | 31 | let mut builder = http::Response::builder() 32 | .status(res.status()) 33 | .version(res.version()); 34 | 35 | let headers = builder 36 | .headers_mut() 37 | .context("failed to convert response headers")?; 38 | 39 | headers.extend( 40 | res.headers() 41 | .into_iter() 42 | .map(|(k, v)| (k.clone(), v.clone())), 43 | ); 44 | 45 | let body = res.bytes().await?; 46 | let response = builder.body(body)?; 47 | 48 | svc_account_access.parse_token_response(scope_hash, response)? 49 | } 50 | gcp::TokenOrRequest::Token(_) => unreachable!(), 51 | }; 52 | 53 | Ok(token) 54 | } 55 | 56 | pub struct GcsBackend { 57 | client: HttpClient, 58 | bucket: BucketName<'static>, 59 | prefix: String, 60 | obj: Object, 61 | } 62 | 63 | impl GcsBackend { 64 | pub async fn new( 65 | loc: crate::GcsLocation<'_>, 66 | credentials: &Path, 67 | timeout: std::time::Duration, 68 | ) -> Result { 69 | let bucket = BucketName::try_from(loc.bucket.to_owned())?; 70 | 71 | let token = acquire_gcs_token(credentials).await?; 72 | 73 | use reqwest::header; 74 | 75 | let hm = { 76 | let mut hm = header::HeaderMap::new(); 77 | hm.insert( 78 | header::AUTHORIZATION, 79 | >::try_into(token)?, 80 | ); 81 | hm 82 | }; 83 | 84 | let client = HttpClient::builder() 85 | .default_headers(hm) 86 | .use_rustls_tls() 87 | .timeout(timeout) 88 | .build()?; 89 | 90 | Ok(Self { 91 | bucket, 92 | client, 93 | prefix: loc.prefix.to_owned(), 94 | obj: Object::default(), 95 | }) 96 | } 97 | 98 | #[inline] 99 | fn obj_name(&self, id: CloudId<'_>) -> Result> { 100 | Ok(ObjectName::try_from(format!("{}{id}", self.prefix))?) 101 | } 102 | } 103 | 104 | use std::fmt; 105 | 106 | impl fmt::Debug for GcsBackend { 107 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 108 | f.debug_struct("gcs") 109 | .field("bucket", &self.bucket) 110 | .field("prefix", &self.prefix) 111 | .finish() 112 | } 113 | } 114 | 115 | #[async_trait::async_trait] 116 | impl crate::Backend for GcsBackend { 117 | async fn fetch(&self, id: CloudId<'_>) -> Result { 118 | let dl_req = self 119 | .obj 120 | .download(&(&self.bucket, &self.obj_name(id)?), None)?; 121 | 122 | let content = send_request_with_retry(&self.client, util::convert_request(dl_req)) 123 | .await? 124 | .error_for_status()? 125 | .bytes() 126 | .await?; 127 | 128 | Ok(content) 129 | } 130 | 131 | async fn upload(&self, source: bytes::Bytes, id: CloudId<'_>) -> Result { 132 | use tame_gcs::objects::InsertObjectOptional; 133 | 134 | let content_len = source.len() as u64; 135 | 136 | let insert_req = self.obj.insert_simple( 137 | &(&self.bucket, &self.obj_name(id)?), 138 | source, 139 | content_len, 140 | Some(InsertObjectOptional { 141 | content_type: Some("application/x-tar"), 142 | ..Default::default() 143 | }), 144 | )?; 145 | 146 | send_request_with_retry(&self.client, insert_req.try_into()?) 147 | .await? 148 | .error_for_status()?; 149 | 150 | Ok(content_len as usize) 151 | } 152 | 153 | async fn list(&self) -> Result> { 154 | use tame_gcs::objects::{ListOptional, ListResponse}; 155 | 156 | // Get a list of all crates already present in gcs, the list 157 | // operation can return a maximum of 1000 entries per request, 158 | // so we may have to send multiple requests to determine all 159 | // of the available crates 160 | let mut names = Vec::new(); 161 | let mut page_token: Option = None; 162 | 163 | loop { 164 | let ls_req = self.obj.list( 165 | &self.bucket, 166 | Some(ListOptional { 167 | // We only care about a single directory 168 | delimiter: Some("/"), 169 | prefix: Some(&self.prefix), 170 | page_token: page_token.as_ref().map(|s| s.as_ref()), 171 | ..Default::default() 172 | }), 173 | )?; 174 | 175 | let response = util::convert_response( 176 | send_request_with_retry(&self.client, util::convert_request(ls_req)).await?, 177 | ) 178 | .await?; 179 | let list_response = ListResponse::try_from(response)?; 180 | 181 | let name_block: Vec<_> = list_response 182 | .objects 183 | .into_iter() 184 | .filter_map(|obj| obj.name) 185 | .collect(); 186 | names.push(name_block); 187 | 188 | page_token = list_response.page_token; 189 | 190 | if page_token.is_none() { 191 | break; 192 | } 193 | } 194 | 195 | let len = self.prefix.len(); 196 | 197 | Ok(names 198 | .into_iter() 199 | .flat_map(|v| v.into_iter().map(|p| p[len..].to_owned())) 200 | .collect()) 201 | } 202 | 203 | async fn updated(&self, id: CloudId<'_>) -> Result> { 204 | use tame_gcs::objects::{GetObjectOptional, GetObjectResponse}; 205 | 206 | let get_req = self.obj.get( 207 | &(&self.bucket, &self.obj_name(id)?), 208 | Some(GetObjectOptional { 209 | standard_params: tame_gcs::common::StandardQueryParameters { 210 | fields: Some("updated"), 211 | ..Default::default() 212 | }, 213 | ..Default::default() 214 | }), 215 | )?; 216 | 217 | let response = util::convert_response( 218 | send_request_with_retry(&self.client, util::convert_request(get_req)).await?, 219 | ) 220 | .await?; 221 | let get_response = GetObjectResponse::try_from(response)?; 222 | 223 | Ok(get_response.metadata.updated) 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/backends/s3.rs: -------------------------------------------------------------------------------- 1 | use crate::{util::send_request_with_retry, CloudId, HttpClient}; 2 | use anyhow::{Context as _, Result}; 3 | use rusty_s3::{ 4 | actions::{CreateBucket, GetObject, ListObjectsV2, PutObject, S3Action}, 5 | credentials::Ec2SecurityCredentialsMetadataResponse, 6 | Bucket, Credentials, 7 | }; 8 | use std::time::Duration; 9 | 10 | const ONE_HOUR: Duration = Duration::from_secs(3600); 11 | 12 | pub struct S3Backend { 13 | prefix: String, 14 | bucket: Bucket, 15 | credential: Credentials, 16 | client: HttpClient, 17 | } 18 | 19 | impl S3Backend { 20 | pub async fn new(loc: crate::S3Location<'_>, timeout: std::time::Duration) -> Result { 21 | let endpoint = format!("https://s3.{}.{}", loc.region, loc.host) 22 | .parse() 23 | .context("failed to parse s3 endpoint")?; 24 | 25 | let bucket = Bucket::new( 26 | endpoint, 27 | rusty_s3::UrlStyle::VirtualHost, 28 | loc.bucket.to_owned(), 29 | loc.region.to_owned(), 30 | ) 31 | .context("failed to new Bucket")?; 32 | 33 | let client = HttpClient::builder() 34 | .use_rustls_tls() 35 | .timeout(timeout) 36 | .build()?; 37 | let credential = if let Some(creds) = Credentials::from_env() { 38 | creds 39 | } else { 40 | ec2_credentials(&client).await.context("Either set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY, or run from an ec2 instance with an assumed IAM role")? 41 | }; 42 | 43 | Ok(Self { 44 | prefix: loc.prefix.to_owned(), 45 | bucket, 46 | credential, 47 | client, 48 | }) 49 | } 50 | 51 | #[inline] 52 | fn make_key(&self, id: CloudId<'_>) -> String { 53 | format!("{}{id}", self.prefix) 54 | } 55 | 56 | pub async fn make_bucket(&self) -> Result<()> { 57 | let action = CreateBucket::new(&self.bucket, &self.credential); 58 | let signed_url = action.sign(ONE_HOUR); 59 | self.client 60 | .put(signed_url) 61 | .send() 62 | .await? 63 | .error_for_status()?; 64 | 65 | Ok(()) 66 | } 67 | 68 | async fn send_request( 69 | &self, 70 | signed_url: url::Url, 71 | body: Option, 72 | ) -> Result { 73 | let req = if let Some(body) = body { 74 | self.client.put(signed_url).body(body).build() 75 | } else { 76 | self.client.get(signed_url).build() 77 | } 78 | .unwrap(); 79 | Ok(send_request_with_retry(&self.client, req) 80 | .await? 81 | .error_for_status()?) 82 | } 83 | } 84 | 85 | #[async_trait::async_trait] 86 | impl crate::Backend for S3Backend { 87 | async fn fetch(&self, id: CloudId<'_>) -> Result { 88 | let obj = self.make_key(id); 89 | let mut action = GetObject::new(&self.bucket, Some(&self.credential), &obj); 90 | action 91 | .query_mut() 92 | .insert("response-cache-control", "no-cache, no-store"); 93 | let signed_url = action.sign(ONE_HOUR); 94 | 95 | Ok(self.send_request(signed_url, None).await?.bytes().await?) 96 | } 97 | 98 | async fn upload(&self, source: bytes::Bytes, id: CloudId<'_>) -> Result { 99 | let len = source.len(); 100 | let obj = self.make_key(id); 101 | let action = PutObject::new(&self.bucket, Some(&self.credential), &obj); 102 | let signed_url = action.sign(ONE_HOUR); 103 | self.send_request(signed_url, Some(source)) 104 | .await? 105 | .bytes() 106 | .await?; 107 | Ok(len) 108 | } 109 | 110 | async fn list(&self) -> Result> { 111 | let action = ListObjectsV2::new(&self.bucket, Some(&self.credential)); 112 | let signed_url = action.sign(ONE_HOUR); 113 | let text = self.send_request(signed_url, None).await?.text().await?; 114 | let parsed = 115 | ListObjectsV2::parse_response(&text).context("failed parsing list response")?; 116 | Ok(parsed.contents.into_iter().map(|obj| obj.key).collect()) 117 | } 118 | 119 | async fn updated(&self, id: CloudId<'_>) -> Result> { 120 | let mut action = ListObjectsV2::new(&self.bucket, Some(&self.credential)); 121 | action.query_mut().insert("prefix", self.make_key(id)); 122 | action.query_mut().insert("max-keys", "1"); 123 | let signed_url = action.sign(ONE_HOUR); 124 | let text = self.send_request(signed_url, None).await?.text().await?; 125 | let parsed = ListObjectsV2::parse_response(&text).context("failed parsing updated info")?; 126 | let last_modified = &parsed 127 | .contents 128 | .get(0) 129 | .context("could not locate update info")? 130 | .last_modified; 131 | 132 | let last_modified = crate::Timestamp::parse( 133 | last_modified, 134 | &time::format_description::well_known::Rfc3339, 135 | ) 136 | .context("failed to parse last_modified timestamp")? 137 | // This _should_ already be set during parsing? 138 | .replace_offset(time::UtcOffset::UTC); 139 | 140 | Ok(Some(last_modified)) 141 | } 142 | } 143 | 144 | use std::fmt; 145 | 146 | impl fmt::Debug for S3Backend { 147 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 148 | f.debug_struct("s3") 149 | .field("bucket", &self.bucket) 150 | .field("prefix", &self.prefix) 151 | .finish() 152 | } 153 | } 154 | 155 | const AWS_IMDS_CREDENTIALS: &str = 156 | "http://169.254.169.254/latest/meta-data/iam/security-credentials"; 157 | 158 | /// 159 | async fn ec2_credentials(client: &HttpClient) -> Result { 160 | let resp = client 161 | .get(AWS_IMDS_CREDENTIALS) 162 | .send() 163 | .await? 164 | .error_for_status()?; 165 | 166 | let role_name = resp.text().await?; 167 | let resp = client 168 | .get(format!("{AWS_IMDS_CREDENTIALS}/{role_name}")) 169 | .send() 170 | .await? 171 | .error_for_status()?; 172 | 173 | let json = resp.text().await?; 174 | let ec2_creds = Ec2SecurityCredentialsMetadataResponse::deserialize(&json)?; 175 | Ok(ec2_creds.into_credentials()) 176 | } 177 | -------------------------------------------------------------------------------- /src/cmds/main.rs: -------------------------------------------------------------------------------- 1 | // crate-specific exceptions: 2 | #![allow(clippy::exit)] 3 | 4 | extern crate cargo_fetcher as cf; 5 | 6 | use anyhow::Context as _; 7 | use cf::PathBuf; 8 | use std::{sync::Arc, time::Duration}; 9 | use tracing_subscriber::filter::LevelFilter; 10 | use url::Url; 11 | 12 | mod mirror; 13 | mod sync; 14 | 15 | #[derive(Clone)] 16 | struct Dur(Duration); 17 | 18 | impl std::str::FromStr for Dur { 19 | type Err = clap::Error; 20 | 21 | fn from_str(src: &str) -> Result { 22 | let suffix_pos = src.find(char::is_alphabetic).unwrap_or(src.len()); 23 | 24 | let num: u64 = src[..suffix_pos] 25 | .parse() 26 | .map_err(|err| clap::Error::raw(clap::error::ErrorKind::ValueValidation, err))?; 27 | let suffix = if suffix_pos == src.len() { 28 | "s" 29 | } else { 30 | &src[suffix_pos..] 31 | }; 32 | 33 | let duration = match suffix { 34 | "s" | "S" => Duration::from_secs(num), 35 | "m" | "M" => Duration::from_secs(num * 60), 36 | "h" | "H" => Duration::from_secs(num * 60 * 60), 37 | "d" | "D" => Duration::from_secs(num * 60 * 60 * 24), 38 | s => { 39 | return Err(clap::Error::raw( 40 | clap::error::ErrorKind::ValueValidation, 41 | format!("unknown duration suffix '{s}'"), 42 | )) 43 | } 44 | }; 45 | 46 | Ok(Dur(duration)) 47 | } 48 | } 49 | 50 | #[derive(clap::Subcommand)] 51 | enum Command { 52 | /// Uploads any crates in the lockfile that aren't already present 53 | /// in the cloud storage location 54 | #[clap(name = "mirror")] 55 | Mirror(mirror::Args), 56 | /// Downloads missing crates to the local cargo locations and unpacks 57 | /// them 58 | #[clap(name = "sync")] 59 | Sync(sync::Args), 60 | } 61 | 62 | #[derive(clap::Parser)] 63 | #[clap( 64 | author, 65 | version, 66 | about, 67 | long_about = "cargo plugin to quickly fetch crate sources from cloud or local storage" 68 | )] 69 | struct Opts { 70 | /// Path to a service account credentials file used to obtain 71 | /// oauth2 tokens. By default uses GOOGLE_APPLICATION_CREDENTIALS 72 | /// environment variable. 73 | #[clap(short, long, env = "GOOGLE_APPLICATION_CREDENTIALS")] 74 | credentials: Option, 75 | /// A url to a cloud storage bucket and prefix path at which to store 76 | /// or retrieve archives 77 | #[clap(short, long)] 78 | url: Url, 79 | /// Path to the lockfile used for determining what crates to operate on 80 | #[clap(short, long, default_value = "Cargo.lock")] 81 | lock_files: Vec, 82 | #[clap( 83 | short = 'L', 84 | long, 85 | default_value = "info", 86 | long_help = "The log level for messages, only log messages at or above the level will be emitted. 87 | 88 | Possible values: 89 | * off 90 | * error 91 | * warn 92 | * info (default) 93 | * debug 94 | * trace" 95 | )] 96 | log_level: LevelFilter, 97 | /// Output log messages as json 98 | #[clap(long)] 99 | json: bool, 100 | /// A snapshot of the registry index is also included when mirroring or syncing 101 | #[clap(short, long)] 102 | include_index: bool, 103 | #[clap( 104 | short, 105 | env = "CARGO_FETCHER_TIMEOUT", 106 | default_value = "30s", 107 | long_help = "The maximum duration of a single crate request 108 | 109 | Times may be specified with no suffix (default seconds), or one of: 110 | * (s)econds 111 | * (m)inutes 112 | * (h)ours 113 | * (d)ays 114 | 115 | " 116 | )] 117 | timeout: Dur, 118 | #[clap(subcommand)] 119 | cmd: Command, 120 | } 121 | 122 | async fn init_backend( 123 | loc: cf::CloudLocation<'_>, 124 | _credentials: Option, 125 | _timeout: Duration, 126 | ) -> anyhow::Result> { 127 | match loc { 128 | #[cfg(feature = "gcs")] 129 | cf::CloudLocation::Gcs(gcs) => { 130 | let cred_path = _credentials.context("GCS credentials not specified")?; 131 | 132 | let gcs = cf::backends::gcs::GcsBackend::new(gcs, &cred_path, _timeout).await?; 133 | Ok(Arc::new(gcs)) 134 | } 135 | #[cfg(not(feature = "gcs"))] 136 | cf::CloudLocation::Gcs(_) => anyhow::bail!("GCS backend not enabled"), 137 | #[cfg(feature = "s3")] 138 | cf::CloudLocation::S3(loc) => { 139 | // Special case local testing 140 | let make_bucket = loc.bucket == "testing" && loc.host.contains("localhost"); 141 | 142 | let s3 = cf::backends::s3::S3Backend::new(loc, _timeout).await?; 143 | 144 | if make_bucket { 145 | s3.make_bucket() 146 | .await 147 | .context("failed to create test bucket")?; 148 | } 149 | 150 | Ok(Arc::new(s3)) 151 | } 152 | #[cfg(not(feature = "s3"))] 153 | cf::CloudLocation::S3(_) => anyhow::bail!("S3 backend not enabled"), 154 | cf::CloudLocation::Fs(loc) => Ok(Arc::new(cf::backends::fs::FsBackend::new(loc)?)), 155 | #[cfg(feature = "blob")] 156 | cf::CloudLocation::Blob(loc) => Ok(Arc::new(cf::backends::blob::BlobBackend::new( 157 | loc, _timeout, 158 | )?)), 159 | #[cfg(not(feature = "blob"))] 160 | cf::CloudLocation::Blob(_) => anyhow::bail!("blob backend not enabled"), 161 | } 162 | } 163 | 164 | async fn real_main() -> anyhow::Result<()> { 165 | use clap::Parser; 166 | let args = Opts::parse_from({ 167 | std::env::args().enumerate().filter_map(|(i, a)| { 168 | if i == 1 && a == "fetcher" { 169 | None 170 | } else { 171 | Some(a) 172 | } 173 | }) 174 | }); 175 | 176 | let mut env_filter = tracing_subscriber::EnvFilter::from_default_env(); 177 | 178 | // If a user specifies a log level, we assume it only pertains to cargo_fetcher, 179 | // if they want to trace other crates they can use the RUST_LOG env approach 180 | env_filter = env_filter.add_directive(format!("cargo_fetcher={}", args.log_level).parse()?); 181 | 182 | let subscriber = tracing_subscriber::FmtSubscriber::builder().with_env_filter(env_filter); 183 | 184 | if args.json { 185 | tracing::subscriber::set_global_default(subscriber.json().finish()) 186 | .context("failed to set default subscriber")?; 187 | } else { 188 | tracing::subscriber::set_global_default(subscriber.finish()) 189 | .context("failed to set default subscriber")?; 190 | }; 191 | 192 | let cloud_location = cf::util::CloudLocationUrl::from_url(args.url.clone())?; 193 | let location = cf::util::parse_cloud_location(&cloud_location)?; 194 | let backend = init_backend(location, args.credentials, args.timeout.0).await?; 195 | 196 | // Since we can take multiple lock files unlike...every? other cargo command, 197 | // we'll just decide that the first one is the most important and where config 198 | // data is pulled from 199 | let lock_files = args.lock_files; 200 | anyhow::ensure!( 201 | !lock_files.is_empty(), 202 | "must provide at least one Cargo.lock" 203 | ); 204 | 205 | let lock_file = &lock_files[0]; 206 | 207 | // Note that unlike cargo (since we require a Cargo.lock), we don't use the 208 | // current directory as the root when resolving cargo configurations, but 209 | // rather the directory in which the lockfile is located 210 | let root_dir = if lock_file.is_relative() { 211 | let root_dir = std::env::current_dir().context("unable to acquire current directory")?; 212 | let mut root_dir = cf::util::path(&root_dir)?.to_owned(); 213 | root_dir.push(lock_file); 214 | root_dir.pop(); 215 | root_dir 216 | } else { 217 | let mut root_dir = lock_file.clone(); 218 | root_dir.pop(); 219 | root_dir 220 | }; 221 | 222 | let cargo_root = cf::cargo::determine_cargo_root(Some(&root_dir)) 223 | .context("failed to determine $CARGO_HOME")?; 224 | 225 | let registries = cf::read_cargo_config(cargo_root.clone(), root_dir)?; 226 | 227 | let (krates, registries) = cf::cargo::read_lock_files(lock_files, registries) 228 | .context("failed to get crates from lock file")?; 229 | 230 | match args.cmd { 231 | Command::Mirror(margs) => { 232 | let ctx = cf::Ctx::new(None, backend, krates, registries) 233 | .context("failed to create context")?; 234 | mirror::cmd(ctx, args.include_index, margs).await 235 | } 236 | Command::Sync(sargs) => { 237 | let ctx = cf::Ctx::new(Some(cargo_root), backend, krates, registries) 238 | .context("failed to create context")?; 239 | sync::cmd(ctx, args.include_index, sargs).await 240 | } 241 | } 242 | } 243 | 244 | #[tokio::main] 245 | async fn main() { 246 | match real_main().await { 247 | Ok(_) => {} 248 | Err(e) => { 249 | tracing::error!("{:#}", e); 250 | std::process::exit(1); 251 | } 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /src/cmds/mirror.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Error; 2 | use cf::{mirror, Ctx}; 3 | use tracing::{error, info}; 4 | 5 | #[derive(clap::Parser)] 6 | pub struct Args { 7 | #[clap( 8 | short, 9 | default_value = "1d", 10 | long_help = "The duration for which the index will not be replaced after its most recent update. 11 | 12 | Times may be specified with no suffix (default seconds), or one of: 13 | * (s)econds 14 | * (m)inutes 15 | * (h)ours 16 | * (d)ays 17 | 18 | " 19 | )] 20 | max_stale: crate::Dur, 21 | } 22 | 23 | pub(crate) async fn cmd(ctx: Ctx, include_index: bool, args: Args) -> Result<(), Error> { 24 | let regs = ctx.registry_sets(); 25 | 26 | async_scoped::TokioScope::scope_and_block(|s| { 27 | if include_index { 28 | s.spawn(async { 29 | mirror::registry_indices(&ctx, args.max_stale.0, regs).await; 30 | info!("finished uploading registry indices"); 31 | }); 32 | } 33 | 34 | s.spawn(async { 35 | match mirror::crates(&ctx).await { 36 | Ok(_) => info!("finished uploading crates"), 37 | Err(e) => error!("failed to mirror crates: {:#}", e), 38 | } 39 | }); 40 | }); 41 | 42 | Ok(()) 43 | } 44 | -------------------------------------------------------------------------------- /src/cmds/sync.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Error; 2 | use cf::{sync, Ctx}; 3 | use tracing::{error, info}; 4 | 5 | #[derive(clap::Parser)] 6 | pub struct Args {} 7 | 8 | pub(crate) async fn cmd(ctx: Ctx, include_index: bool, _args: Args) -> Result<(), Error> { 9 | ctx.prep_sync_dirs()?; 10 | 11 | let root = ctx.root_dir.clone(); 12 | let backend = ctx.backend.clone(); 13 | let registries = ctx.registries.clone(); 14 | 15 | async_scoped::TokioScope::scope_and_block(|s| { 16 | if include_index { 17 | s.spawn(async { 18 | info!("syncing registries index"); 19 | sync::registry_indices(root, backend, registries).await; 20 | info!("synced registries index"); 21 | }); 22 | } 23 | 24 | s.spawn(async { 25 | match sync::crates(&ctx).await { 26 | Ok(summary) => { 27 | info!( 28 | bytes = summary.total_bytes, 29 | succeeded = summary.good, 30 | failed = summary.bad, 31 | "synced crates" 32 | ); 33 | } 34 | Err(e) => error!(err = ?e, "failed to sync crates"), 35 | } 36 | }); 37 | }); 38 | 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /src/fetch.rs: -------------------------------------------------------------------------------- 1 | use crate::{cargo::Source, util, Krate}; 2 | use anyhow::Context as _; 3 | use bytes::Bytes; 4 | use tracing::warn; 5 | 6 | pub(crate) enum KratePackage { 7 | Registry(Bytes), 8 | Git(crate::git::GitPackage), 9 | } 10 | 11 | impl KratePackage { 12 | pub(crate) fn len(&self) -> usize { 13 | match self { 14 | Self::Registry(bytes) => bytes.len(), 15 | Self::Git(gs) => gs.db.len() + gs.checkout.as_ref().map_or(0, |s| s.len()), 16 | } 17 | } 18 | } 19 | 20 | #[tracing::instrument(level = "debug")] 21 | pub(crate) async fn from_registry( 22 | client: &crate::HttpClient, 23 | krate: &Krate, 24 | ) -> anyhow::Result { 25 | match &krate.source { 26 | Source::Git(gs) => { 27 | let gs = gs.clone(); 28 | tokio::task::spawn_blocking(move || crate::git::clone(&gs).map(KratePackage::Git)) 29 | .await 30 | .unwrap() 31 | } 32 | Source::Registry(rs) => { 33 | let url = rs.registry.download_url(krate); 34 | 35 | // Depending on how many crates we are mirroring, we can be sending 36 | // hundreds of concurrent requests to crates.io...and hit 37 | // https://github.com/seanmonstar/reqwest/issues/1748 38 | let res = loop { 39 | let res = client.get(&url).send().await; 40 | 41 | match res { 42 | Err(err) if err.is_connect() || err.is_timeout() || err.is_request() => { 43 | continue 44 | } 45 | Err(err) => return Err(err.into()), 46 | Ok(res) => break res, 47 | } 48 | }; 49 | 50 | let response = res.error_for_status()?; 51 | let res = util::convert_response(response).await?; 52 | let content = res.into_body(); 53 | 54 | util::validate_checksum(&content, &rs.chksum)?; 55 | 56 | Ok(KratePackage::Registry(content)) 57 | } 58 | } 59 | } 60 | 61 | #[tracing::instrument(level = "debug", skip(krates))] 62 | pub async fn registry( 63 | client: &crate::HttpClient, 64 | registry: &crate::cargo::Registry, 65 | krates: Vec, 66 | ) -> anyhow::Result { 67 | use tame_index::index; 68 | 69 | // We don't bother to support older versions of cargo that don't support 70 | // bare checkouts of registry indexes, as that has been in since early 2017 71 | // See https://github.com/rust-lang/cargo/blob/0e38712d4d7b346747bf91fb26cce8df6934e178/src/cargo/sources/registry/remote.rs#L61 72 | // for details on why cargo still does what it does 73 | let temp_dir = tempfile::tempdir()?; 74 | let temp_dir_path = util::path(temp_dir.path())?; 75 | 76 | let index_url = registry.index.as_str().to_owned(); 77 | 78 | let write_cache = tracing::span!(tracing::Level::DEBUG, "write-cache-entries"); 79 | 80 | let location = index::IndexLocation { 81 | // note this is a bit of a misnomer, it could be the crates.io registry 82 | url: index::IndexUrl::NonCratesIo(index_url.clone().into()), 83 | root: index::IndexPath::Exact(temp_dir_path.to_owned()), 84 | }; 85 | 86 | // Writes .cache entries in the registry's directory for all of the specified 87 | // crates. 88 | // 89 | // Cargo will write these entries itself if they don't exist the first time it 90 | // tries to access the crate's metadata in the case of git, but this noticeably 91 | // increases initial fetch times. (see src/cargo/sources/registry/index.rs) 92 | // 93 | // For sparse indices, the cache entries are the _only_ local state, and if 94 | // not present means every missing crate needs to be fetched, without the 95 | // possibility of the local cache entry being up to date according to the 96 | // etag/modified time of the remote 97 | match registry.protocol { 98 | crate::cargo::RegistryProtocol::Git => { 99 | tokio::task::spawn_blocking(move || -> anyhow::Result<()> { 100 | let rgi = { 101 | let span = tracing::debug_span!("fetch"); 102 | let _fs = span.enter(); 103 | 104 | tame_index::index::RemoteGitIndex::new( 105 | tame_index::index::GitIndex::new(location) 106 | .context("unable to open git index")?, 107 | &tame_index::index::FileLock::unlocked(), 108 | ) 109 | .context("failed to fetch")? 110 | }; 111 | 112 | write_cache.in_scope(|| { 113 | // As with git2, gix::Repository is not thread safe, we _could_ 114 | // read blobs in serial then write in parallel, but that's not really 115 | // worth it for a few hundred crates (probably), but see 116 | // https://github.com/frewsxcv/rust-crates-index/blob/a9b60653efb72d9e6be98c4f8fe56194475cbd3f/src/git/mod.rs#L316-L360 117 | // for a way this could be done in the future 118 | let unlocked = &tame_index::index::FileLock::unlocked(); 119 | for name in krates { 120 | let Ok(name) = name.as_str().try_into() else { 121 | warn!("crate name '{name}' is invalid"); 122 | continue; 123 | }; 124 | if let Err(err) = 125 | rgi.krate(name, true /* write the cache entry */, unlocked) 126 | { 127 | warn!("unable to write .cache entry: {err:#}"); 128 | } 129 | } 130 | }); 131 | 132 | Ok(()) 133 | }) 134 | .await 135 | .unwrap()?; 136 | } 137 | crate::cargo::RegistryProtocol::Sparse => { 138 | let index = index::AsyncRemoteSparseIndex::new( 139 | index::SparseIndex::new(location)?, 140 | client.clone(), 141 | ); 142 | 143 | #[allow(unsafe_code)] 144 | // SAFETY: we don't forget the future :p 145 | unsafe { 146 | async_scoped::TokioScope::scope_and_collect(|s| { 147 | s.spawn(async { 148 | // We don't particularly care if an individual crate fails here 149 | // since the index will be healed by cargo, but still good to 150 | // know if something was amiss 151 | for (name, res) in index 152 | .krates( 153 | krates.into_iter().collect(), 154 | true, 155 | None, 156 | &tame_index::index::FileLock::unlocked(), 157 | ) 158 | .await 159 | { 160 | match res { 161 | Ok(Some(_)) => {} 162 | Ok(None) => { 163 | warn!("index entry for '{name}' was not found"); 164 | } 165 | Err(err) => { 166 | warn!("unable to write .cache entry for '{name}': {err:#}"); 167 | } 168 | } 169 | } 170 | }); 171 | 172 | s.spawn(async { 173 | let write_config = async { 174 | let url = 175 | format!("{}config.json", index_url.split_once('+').unwrap().1); 176 | 177 | let res = loop { 178 | let res = client.get(&url).send().await; 179 | 180 | match res { 181 | Err(err) 182 | if err.is_connect() 183 | || err.is_timeout() 184 | || err.is_request() => 185 | { 186 | continue 187 | } 188 | Err(err) => Err(err) 189 | .context("failed to send request for config.json")?, 190 | Ok(res) => break res, 191 | } 192 | }; 193 | 194 | let config_body = res 195 | .bytes() 196 | .await 197 | .context("failed to read config.json response body")?; 198 | 199 | std::fs::write(temp_dir.path().join("config.json"), &config_body) 200 | .context("failed to write config.json") 201 | }; 202 | 203 | if let Err(err) = write_config.await { 204 | warn!("unable to write config.json: {err:#}"); 205 | } 206 | }); 207 | }) 208 | .await; 209 | } 210 | } 211 | }; 212 | 213 | util::pack_tar(temp_dir_path) 214 | } 215 | -------------------------------------------------------------------------------- /src/git.rs: -------------------------------------------------------------------------------- 1 | use crate::{util, PathBuf}; 2 | use anyhow::{Context as _, Result}; 3 | 4 | pub struct GitPackage { 5 | /// The tarball of the bare repository 6 | pub db: bytes::Bytes, 7 | /// The tarball of the checked out repository, including all submodules 8 | pub checkout: Option, 9 | } 10 | 11 | const DIR: gix::remote::Direction = gix::remote::Direction::Fetch; 12 | use gix::progress::Discard; 13 | 14 | /// Clones the git source and all of its submodules 15 | /// 16 | /// The bare git clone acts as the source for `$CARGO_HOME/git/db/*` 17 | /// The checkout and submodules clones act as the source for `$CARGO_HOME/git/checkouts/*` 18 | #[tracing::instrument(level = "debug")] 19 | pub fn clone(src: &crate::cargo::GitSource) -> Result { 20 | // Create a temporary directory to fetch the repo into 21 | let temp_dir = tempfile::tempdir()?; 22 | // Create another temporary directory where we *may* checkout submodules into 23 | let submodule_dir = tempfile::tempdir()?; 24 | 25 | let (repo, _out) = { 26 | let span = tracing::debug_span!("fetch"); 27 | let _fs = span.enter(); 28 | gix::prepare_clone_bare(src.url.as_str(), temp_dir.path()) 29 | .context("failed to prepare clone")? 30 | .with_remote_name("origin")? 31 | .configure_remote(|remote| { 32 | Ok(remote 33 | .with_fetch_tags(gix::remote::fetch::Tags::All) 34 | .with_refspecs(["+HEAD:refs/remotes/origin/HEAD"], DIR)?) 35 | }) 36 | .fetch_only(&mut Discard, &Default::default()) 37 | .context("failed to fetch")? 38 | }; 39 | 40 | // Ensure that the repo actually contains the revision we need 41 | repo.find_object(src.rev.id).with_context(|| { 42 | format!( 43 | "'{}' doesn't contain rev '{}'", 44 | src.url, 45 | src.rev.id.to_hex() 46 | ) 47 | })?; 48 | 49 | let fetch_rev = src.rev.id; 50 | let temp_db_path = util::path(temp_dir.path())?; 51 | let sub_dir_path = util::path(submodule_dir.path())?; 52 | 53 | let (checkout, db) = rayon::join( 54 | || -> anyhow::Result<_> { 55 | let span = tracing::info_span!("cloning submodules", %src.url); 56 | let _ms = span.enter(); 57 | 58 | crate::git::prepare_submodules( 59 | temp_db_path.to_owned(), 60 | sub_dir_path.to_owned(), 61 | fetch_rev, 62 | )?; 63 | 64 | util::pack_tar(sub_dir_path) 65 | }, 66 | || -> anyhow::Result<_> { util::pack_tar(temp_db_path) }, 67 | ); 68 | 69 | Ok(crate::git::GitPackage { 70 | db: db?, 71 | checkout: match checkout { 72 | Ok(co) => Some(co), 73 | Err(err) => { 74 | tracing::error!("failed to checkout: {err:#}"); 75 | None 76 | } 77 | }, 78 | }) 79 | } 80 | 81 | #[tracing::instrument(level = "debug")] 82 | pub(crate) fn checkout( 83 | src: PathBuf, 84 | target: PathBuf, 85 | rev: gix::ObjectId, 86 | ) -> Result { 87 | // We require the target directory to be clean 88 | std::fs::create_dir_all(target.parent().unwrap()).context("failed to create checkout dir")?; 89 | if target.exists() { 90 | remove_dir_all::remove_dir_all(&target).context("failed to clean checkout dir")?; 91 | } 92 | 93 | // NOTE: gix does not support local hardlink clones like git/libgit2 does, 94 | // and is essentially doing `git clone file:// `, which, by 95 | // default, only gets the history for the default branch, meaning if the revision 96 | // comes from a non-default branch it won't be available on the checkout 97 | // clone. So...we cheat and shell out to git, at least for now 98 | { 99 | let start = std::time::Instant::now(); 100 | let mut cmd = std::process::Command::new("git"); 101 | cmd.args(["clone", "--local", "--no-checkout"]) 102 | .args([&src, &target]) 103 | .stderr(std::process::Stdio::piped()) 104 | .stdout(std::process::Stdio::piped()); 105 | 106 | let output = cmd.output().context("failed to spawn git")?; 107 | if !output.status.success() { 108 | let error = String::from_utf8(output.stderr) 109 | .unwrap_or_else(|_err| "git error output is non-utf8".to_owned()); 110 | 111 | anyhow::bail!("failed to perform local clone:\n{error}"); 112 | } 113 | 114 | tracing::debug!("local clone performed in {}ms", start.elapsed().as_millis()); 115 | } 116 | 117 | let mut repo = gix::open(target).context("failed to open local clone")?; 118 | 119 | modify_config(&mut repo, |config| { 120 | let mut core = config 121 | .section_mut("core", None) 122 | .context("unable to find core section")?; 123 | core.set( 124 | "autocrlf" 125 | .try_into() 126 | .context("autocrlf is not a valid key")?, 127 | "false".into(), 128 | ); 129 | Ok(()) 130 | }) 131 | .context("failed to set autocrlf")?; 132 | 133 | reset(&mut repo, rev)?; 134 | 135 | Ok(repo) 136 | } 137 | 138 | use gix::bstr::BString; 139 | use gix::bstr::ByteSlice; 140 | 141 | struct Submodule { 142 | name: BString, 143 | path: BString, 144 | url: BString, 145 | branch: Option, 146 | head_id: Option, 147 | } 148 | 149 | impl Submodule { 150 | #[inline] 151 | fn path(&self) -> &crate::Path { 152 | crate::Path::new(self.path.to_str().unwrap()) 153 | } 154 | } 155 | 156 | fn read_submodule_config(config: &gix::config::File<'_>) -> Vec { 157 | let Some(iter) = config.sections_by_name("submodule") else { 158 | return Vec::new(); 159 | }; 160 | 161 | iter.filter_map(|sec| { 162 | // Each submodule _should_ be a subsection with a name, that 163 | // (usually, always?) matches the path the submodule will be 164 | // checked out to 165 | let name = sec.header().subsection_name()?; 166 | 167 | // Every submodule must have a url 168 | let url = sec.value("url")?; 169 | 170 | // Every submodule must have a path 171 | let path = sec.value("path")?; 172 | 173 | // Validate the path is utf-8 174 | path.to_str().ok()?; 175 | 176 | // Branch is optional 177 | let branch = sec.value("branch"); 178 | 179 | Some(Submodule { 180 | name: name.into(), 181 | url: url.into_owned(), 182 | path: path.into_owned(), 183 | branch: branch.map(|b| b.into_owned()), 184 | head_id: None, 185 | }) 186 | }) 187 | .collect() 188 | } 189 | 190 | fn modify_config( 191 | repo: &mut gix::Repository, 192 | mutate: impl FnOnce(&mut gix::config::SnapshotMut<'_>) -> Result<()>, 193 | ) -> Result<()> { 194 | let mut config = repo.config_snapshot_mut(); 195 | 196 | mutate(&mut config)?; 197 | 198 | { 199 | use std::io::Write; 200 | let mut local_config = std::fs::OpenOptions::new() 201 | .create(false) 202 | .write(true) 203 | .append(false) 204 | .open( 205 | config 206 | .meta() 207 | .path 208 | .as_deref() 209 | .context("local config with path set")?, 210 | ) 211 | .context("failed to open local config")?; 212 | local_config.write_all(config.detect_newline_style())?; 213 | config 214 | .write_to_filter(&mut local_config, &mut |s| { 215 | s.meta().source == gix::config::Source::Local 216 | }) 217 | .context("failed to write submodules to config")?; 218 | } 219 | 220 | config 221 | .commit() 222 | .context("failed to commit submodule(s) to config")?; 223 | Ok(()) 224 | } 225 | 226 | fn reset(repo: &mut gix::Repository, rev: gix::ObjectId) -> Result<()> { 227 | let workdir = repo 228 | .work_dir() 229 | .context("unable to checkout, repository is bare")?; 230 | let root_tree = repo 231 | .find_object(rev) 232 | .context("failed to find revision")? 233 | .peel_to_tree() 234 | .context("unable to peel to tree")? 235 | .id; 236 | 237 | use gix::odb::FindExt; 238 | let index = gix::index::State::from_tree(&root_tree, |oid, buf| { 239 | repo.objects.find_tree_iter(oid, buf).ok() 240 | }) 241 | .with_context(|| format!("failed to create index from tree '{root_tree}'"))?; 242 | let mut index = gix::index::File::from_state(index, repo.index_path()); 243 | 244 | let opts = gix::worktree::state::checkout::Options { 245 | destination_is_initially_empty: false, 246 | overwrite_existing: true, 247 | ..Default::default() 248 | }; 249 | 250 | gix::worktree::state::checkout( 251 | &mut index, 252 | workdir, 253 | { 254 | let objects = repo.objects.clone().into_arc()?; 255 | move |oid, buf| objects.find_blob(oid, buf) 256 | }, 257 | &Discard, 258 | &Discard, 259 | &Default::default(), 260 | opts, 261 | ) 262 | .context("failed to checkout")?; 263 | 264 | index 265 | .write(Default::default()) 266 | .context("failed to write index")?; 267 | 268 | // cargo uses the head oid to check if it needs to update the submodule 269 | // so force set HEAD to the appropriate commit, since we don't really 270 | // care about updates for the head we just set it directly rather than 271 | // via reference 272 | let head_path = repo.path().join("HEAD"); 273 | std::fs::write(head_path, format!("{}\n", rev.to_hex()))?; 274 | 275 | Ok(()) 276 | } 277 | 278 | #[tracing::instrument(level = "debug")] 279 | pub(crate) fn prepare_submodules(src: PathBuf, target: PathBuf, rev: gix::ObjectId) -> Result<()> { 280 | fn update_submodules(repo: &mut gix::Repository, rev: gix::ObjectId) -> Result<()> { 281 | // We only get here if checkout succeeds, so we're guaranteed to have a working dir 282 | let work_dir = repo.work_dir().unwrap().to_owned(); 283 | 284 | let submodules_config = work_dir.join(".gitmodules"); 285 | if !submodules_config.exists() { 286 | return Ok(()); 287 | } 288 | 289 | // Open the .gitmodules file, which has the same format as regular git config 290 | // Note we don't use the more convenient gix::config::File::from_path_no_includes 291 | // here since it forces a 'static lifetime :( 292 | let subm_config_buf = 293 | std::fs::read(&submodules_config).context("failed to read .gitmodules")?; 294 | let submodules_file = { 295 | let meta = gix::config::file::Metadata { 296 | path: Some(submodules_config.clone()), 297 | source: gix::config::Source::Local, 298 | level: 0, 299 | trust: gix::sec::Trust::Full, 300 | }; 301 | 302 | gix::config::File::from_bytes_no_includes(&subm_config_buf, meta, Default::default()) 303 | .context("failed to deserialize .gitmodules")? 304 | }; 305 | 306 | let submodules = { 307 | let mut submodules = read_submodule_config(&submodules_file); 308 | if submodules.is_empty() { 309 | tracing::info!("repo contained a .gitmodules file, but it had no valid submodules"); 310 | return Ok(()); 311 | } 312 | 313 | // This is really all that git2::Submodule::init(false) does, write 314 | // each submodule url to the git config. Note that we follow cargo here 315 | // by not updating the submodule if it exists already, but I'm not actually 316 | // sure if that is correct... 317 | modify_config(repo, |config| { 318 | for subm in &submodules { 319 | if config 320 | .section("submodule", Some(subm.name.as_bstr())) 321 | .is_ok() 322 | { 323 | tracing::debug!("submodule {} already exists in config", subm.name); 324 | continue; 325 | } 326 | 327 | let mut sec = config 328 | .new_section("submodule", Some(subm.name.clone().into())) 329 | .context("failed to add submodule section")?; 330 | sec.push("path".try_into()?, Some(subm.path.as_bstr())); 331 | sec.push("url".try_into()?, Some(subm.url.as_bstr())); 332 | 333 | if let Some(branch) = &subm.branch { 334 | sec.push("branch".try_into()?, Some(branch.as_bstr())); 335 | } 336 | } 337 | 338 | Ok(()) 339 | }) 340 | .context("failed to add submodules")?; 341 | 342 | // Now, find the actual head id of the module so that we can determine 343 | // what tree to set the submodule checkout to 344 | let tree = repo 345 | .find_object(rev) 346 | .context("failed to find rev")? 347 | .peel_to_tree() 348 | .context("failed to peel rev to tree")?; 349 | let mut buf = Vec::new(); 350 | for subm in &mut submodules { 351 | let span = tracing::info_span!("locating submodule head", name = %subm.name, path = %subm.path); 352 | let _ms = span.enter(); 353 | 354 | let path = subm.path(); 355 | 356 | let entry = match tree.lookup_entry(path, &mut buf) { 357 | Ok(Some(e)) => e, 358 | Ok(None) => { 359 | tracing::warn!("unable to locate submodule path in tree"); 360 | continue; 361 | } 362 | Err(err) => { 363 | tracing::warn!(err = %err, "failed to lookup entry for submodule"); 364 | continue; 365 | } 366 | }; 367 | 368 | if !matches!(entry.mode(), gix::object::tree::EntryMode::Commit) { 369 | tracing::warn!(kind = ?entry.mode(), "path is not a submodule"); 370 | continue; 371 | } 372 | 373 | subm.head_id = Some(entry.id().detach()); 374 | } 375 | 376 | submodules 377 | }; 378 | 379 | // The initial config editing is the only thing we really need to do 380 | // serially, the rest of work of actually cloning/updating submodules 381 | // can be done in parallel since they are each distinct entities 382 | use rayon::prelude::*; 383 | let mut res = Vec::new(); 384 | submodules 385 | .into_par_iter() 386 | .map(|subm| update_submodule(&work_dir, subm).context("failed to update submodule")) 387 | .collect_into_vec(&mut res); 388 | 389 | res.into_iter().collect::>()?; 390 | Ok(()) 391 | } 392 | 393 | #[tracing::instrument(level = "debug", skip_all)] 394 | fn update_submodule(parent: &std::path::Path, subm: Submodule) -> Result<()> { 395 | // A submodule which is listed in .gitmodules but not actually 396 | // checked out will not have a head id, so we should ignore it. 397 | let Some(head) = subm.head_id else { 398 | tracing::debug!("skipping submodule '{}' without HEAD", subm.name); 399 | return Ok(()); 400 | }; 401 | 402 | let submodule_path = parent.join(subm.path()); 403 | 404 | let open_or_init_repo = || -> Result<_> { 405 | let open_with_complete_config = 406 | gix::open::Options::default().permissions(gix::open::Permissions { 407 | config: gix::open::permissions::Config { 408 | // Be sure to get all configuration, some of which is only known by the git binary. 409 | // That way we are sure to see all the systems credential helpers 410 | git_binary: true, 411 | ..Default::default() 412 | }, 413 | ..Default::default() 414 | }); 415 | 416 | let repo = if let Ok(repo) = gix::open_opts(&submodule_path, open_with_complete_config) 417 | { 418 | repo 419 | } else { 420 | // Blow away the submodules directory in case it exists but is 421 | // corrupted somehow which cause gix to fail to open it, if there 422 | // is an error the init or subsequent clone _might_ fail but also 423 | // might not! 424 | let _ = remove_dir_all::remove_dir_all(&submodule_path); 425 | gix::init(&submodule_path).context("failed to init submodule")? 426 | }; 427 | 428 | Ok(repo) 429 | }; 430 | 431 | // If the submodule hasn't been checked out yet, we need to clone it. If 432 | // it has been checked out and the head is the same as the submodule's 433 | // head, then we can skip an update and keep recursing. 434 | let mut repo = open_or_init_repo()?; 435 | if repo 436 | .head_commit() 437 | .ok() 438 | .map_or(false, |commit| commit.id == head) 439 | { 440 | return update_submodules(&mut repo, head); 441 | } 442 | 443 | // We perform fetches and update the reflog, and gix forces us to set a 444 | // committer for these, this is particularly true in CI environments 445 | // that likely don't have a global committer set 446 | modify_config(&mut repo, |config| { 447 | let mut core = config 448 | .section_mut("core", None) 449 | .context("unable to find core section")?; 450 | core.set( 451 | "autocrlf" 452 | .try_into() 453 | .context("autocrlf is not a valid key")?, 454 | "false".into(), 455 | ); 456 | 457 | config 458 | .set_raw_value("committer", None, "name", "cargo-fetcher") 459 | .context("failed to set committer.name")?; 460 | // Note we _have_ to set the email as well, but luckily gix does not actually 461 | // validate if it's a proper email or not :) 462 | config 463 | .set_raw_value("committer", None, "email", "") 464 | .context("failed to set committer.email")?; 465 | Ok(()) 466 | })?; 467 | 468 | let mut remote = repo 469 | .remote_at(subm.url.as_bstr()) 470 | .context("invalid submodule url")?; 471 | 472 | remote 473 | .replace_refspecs( 474 | [ 475 | "+refs/heads/*:refs/remotes/origin/*", 476 | "+HEAD:refs/remotes/origin/HEAD", 477 | ], 478 | DIR, 479 | ) 480 | .expect("valid statically known refspec"); 481 | remote = remote.with_fetch_tags(gix::remote::fetch::Tags::All); 482 | 483 | // Perform the actual fetch 484 | let outcome = remote 485 | .connect(DIR) 486 | .context("failed to connect to remote")? 487 | .prepare_fetch(&mut Discard, Default::default()) 488 | .context("failed to prepare fetch")? 489 | .receive(&mut Discard, &Default::default()) 490 | .context("failed to fetch submodule")?; 491 | 492 | tame_index::utils::git::write_fetch_head(&repo, &outcome, &remote) 493 | .context("failed to write FETCH_HEAD")?; 494 | 495 | reset(&mut repo, head)?; 496 | update_submodules(&mut repo, head) 497 | } 498 | 499 | let mut repo = checkout(src, target, rev)?; 500 | update_submodules(&mut repo, rev) 501 | } 502 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Error; 2 | pub use camino::{Utf8Path as Path, Utf8PathBuf as PathBuf}; 3 | use std::{fmt, sync::Arc}; 4 | pub use url::Url; 5 | 6 | pub mod backends; 7 | pub mod cargo; 8 | mod fetch; 9 | pub(crate) mod git; 10 | pub mod mirror; 11 | pub mod sync; 12 | pub mod util; 13 | 14 | pub type HttpClient = reqwest::Client; 15 | 16 | pub use cargo::{read_cargo_config, GitSource, Registry, RegistryProtocol, RegistrySource, Source}; 17 | 18 | #[derive(Eq, Clone, Debug)] 19 | pub struct Krate { 20 | pub name: String, 21 | pub version: String, // We just treat versions as opaque strings 22 | pub source: Source, 23 | } 24 | 25 | impl Ord for Krate { 26 | #[inline] 27 | fn cmp(&self, b: &Self) -> std::cmp::Ordering { 28 | self.source.cmp(&b.source) 29 | } 30 | } 31 | 32 | impl PartialOrd for Krate { 33 | #[inline] 34 | fn partial_cmp(&self, b: &Self) -> Option { 35 | Some(self.cmp(b)) 36 | } 37 | } 38 | 39 | impl PartialEq for Krate { 40 | fn eq(&self, b: &Self) -> bool { 41 | self.source.eq(&b.source) 42 | } 43 | } 44 | 45 | impl PartialEq for Krate { 46 | fn eq(&self, b: &Registry) -> bool { 47 | match &self.source { 48 | Source::Git(..) => false, 49 | Source::Registry(rs) => b.eq(&rs.registry), 50 | } 51 | } 52 | } 53 | 54 | impl Krate { 55 | #[inline] 56 | pub fn cloud_id(&self, is_checkout: bool) -> CloudId<'_> { 57 | CloudId { 58 | inner: self, 59 | is_checkout, 60 | } 61 | } 62 | 63 | #[inline] 64 | pub fn local_id(&self) -> LocalId<'_> { 65 | LocalId { inner: self } 66 | } 67 | } 68 | 69 | impl fmt::Display for Krate { 70 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 71 | let typ = match &self.source { 72 | Source::Git { .. } => "git", 73 | Source::Registry { .. } => "registry", 74 | }; 75 | 76 | write!(f, "{}-{}({typ})", self.name, self.version) 77 | } 78 | } 79 | 80 | pub struct LocalId<'a> { 81 | inner: &'a Krate, 82 | } 83 | 84 | impl<'a> fmt::Display for LocalId<'a> { 85 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 86 | match &self.inner.source { 87 | Source::Git(gs) => f.write_str(&gs.ident), 88 | Source::Registry(..) => { 89 | write!(f, "{}-{}.crate", self.inner.name, self.inner.version) 90 | } 91 | } 92 | } 93 | } 94 | 95 | pub struct CloudId<'a> { 96 | inner: &'a Krate, 97 | is_checkout: bool, 98 | } 99 | 100 | impl<'a> fmt::Display for CloudId<'a> { 101 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 102 | match &self.inner.source { 103 | Source::Git(gs) => write!( 104 | f, 105 | "{}-{}{}", 106 | gs.ident, 107 | gs.rev.short(), 108 | if self.is_checkout { "-checkout" } else { "" } 109 | ), 110 | Source::Registry(rs) => f.write_str(&rs.chksum), 111 | } 112 | } 113 | } 114 | 115 | #[allow(dead_code)] 116 | pub struct GcsLocation<'a> { 117 | pub bucket: &'a str, 118 | pub prefix: &'a str, 119 | } 120 | 121 | #[allow(dead_code)] 122 | pub struct S3Location<'a> { 123 | pub bucket: &'a str, 124 | pub region: &'a str, 125 | pub host: &'a str, 126 | pub prefix: &'a str, 127 | } 128 | 129 | pub struct FilesystemLocation<'a> { 130 | pub path: &'a Path, 131 | } 132 | 133 | pub struct BlobLocation<'a> { 134 | pub prefix: &'a str, 135 | pub container: &'a str, 136 | } 137 | 138 | pub enum CloudLocation<'a> { 139 | Gcs(GcsLocation<'a>), 140 | S3(S3Location<'a>), 141 | Fs(FilesystemLocation<'a>), 142 | Blob(BlobLocation<'a>), 143 | } 144 | 145 | pub type Storage = Arc; 146 | 147 | pub struct Ctx { 148 | pub client: HttpClient, 149 | pub backend: Storage, 150 | pub krates: Vec, 151 | pub registries: Vec>, 152 | pub root_dir: PathBuf, 153 | } 154 | 155 | impl Ctx { 156 | pub fn new( 157 | root_dir: Option, 158 | backend: Storage, 159 | krates: Vec, 160 | registries: Vec>, 161 | ) -> Result { 162 | Ok(Self { 163 | client: HttpClient::builder().build()?, 164 | backend, 165 | krates, 166 | registries, 167 | root_dir: root_dir.unwrap_or_else(|| PathBuf::from(".")), 168 | }) 169 | } 170 | 171 | /// Create the registry and git directories as they are the root of multiple other ones 172 | pub fn prep_sync_dirs(&self) -> Result<(), Error> { 173 | std::fs::create_dir_all(self.root_dir.join("registry"))?; 174 | std::fs::create_dir_all(self.root_dir.join("git"))?; 175 | 176 | Ok(()) 177 | } 178 | 179 | pub fn registry_sets(&self) -> Vec { 180 | self.registries 181 | .iter() 182 | .map(|registry| { 183 | // Gather the names of all of the crates sourced in the registry so we 184 | // can add .cache entries 185 | let krates = self 186 | .krates 187 | .iter() 188 | .filter_map(|krate| { 189 | if krate == registry.as_ref() { 190 | Some(krate.name.clone()) 191 | } else { 192 | None 193 | } 194 | }) 195 | .collect(); 196 | 197 | mirror::RegistrySet { 198 | registry: registry.clone(), 199 | krates, 200 | } 201 | }) 202 | .collect() 203 | } 204 | } 205 | 206 | impl fmt::Debug for Ctx { 207 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 208 | write!(f, "krates: {}", self.krates.len()) 209 | } 210 | } 211 | 212 | pub type Timestamp = time::OffsetDateTime; 213 | 214 | #[async_trait::async_trait] 215 | pub trait Backend: fmt::Debug { 216 | async fn fetch(&self, id: CloudId<'_>) -> Result; 217 | async fn upload(&self, source: bytes::Bytes, id: CloudId<'_>) -> Result; 218 | async fn list(&self) -> Result, Error>; 219 | async fn updated(&self, id: CloudId<'_>) -> Result, Error>; 220 | } 221 | -------------------------------------------------------------------------------- /src/mirror.rs: -------------------------------------------------------------------------------- 1 | use crate::{fetch, Ctx, Krate, Registry, Source}; 2 | use anyhow::Error; 3 | use std::time::Duration; 4 | use tracing::{debug, error, info}; 5 | 6 | pub struct RegistrySet { 7 | pub registry: std::sync::Arc, 8 | pub krates: Vec, 9 | } 10 | 11 | #[tracing::instrument(level = "debug", skip_all)] 12 | pub async fn registry_indices( 13 | ctx: &crate::Ctx, 14 | max_stale: Duration, 15 | registries: Vec, 16 | ) -> usize { 17 | #[allow(unsafe_code)] 18 | // SAFETY: we don't forget the future :p 19 | unsafe { 20 | async_scoped::TokioScope::scope_and_collect(|s| { 21 | for rset in registries { 22 | s.spawn(async { 23 | match registry_index(ctx, max_stale, rset).await { 24 | Ok(size) => size, 25 | Err(err) => { 26 | error!("{err:#}"); 27 | 0 28 | } 29 | } 30 | }); 31 | } 32 | }) 33 | .await 34 | .1 35 | .into_iter() 36 | .map(|res| res.unwrap()) 37 | .sum() 38 | } 39 | } 40 | 41 | #[tracing::instrument(level = "debug", skip_all)] 42 | pub async fn registry_index( 43 | ctx: &crate::Ctx, 44 | max_stale: Duration, 45 | rset: RegistrySet, 46 | ) -> Result { 47 | let ident = rset.registry.short_name().to_owned(); 48 | 49 | // Create a fake krate for the index, we don't have to worry about clashing 50 | // since we use a `.` which is not an allowed character in crate names 51 | let krate = Krate { 52 | name: ident.clone(), 53 | version: "2.0.0".to_owned(), 54 | source: Source::Git(crate::cargo::GitSource { 55 | url: rset.registry.index.clone(), 56 | ident, 57 | rev: crate::cargo::GitRev::parse("feedc0de00000000000000000000000000000000").unwrap(), 58 | follow: None, 59 | }), 60 | }; 61 | 62 | // Retrieve the metadata for the last updated registry entry, and update 63 | // only it if it's stale 64 | if let Ok(Some(last_updated)) = ctx.backend.updated(krate.cloud_id(false)).await { 65 | let now = time::OffsetDateTime::now_utc(); 66 | 67 | if now - last_updated < max_stale { 68 | info!( 69 | "the registry ({}) was last updated {last_updated}, skipping update as it is less than {max_stale:?} old", 70 | rset.registry.index 71 | ); 72 | return Ok(0); 73 | } 74 | } 75 | 76 | let index = fetch::registry( 77 | &ctx.client, 78 | &rset.registry, 79 | rset.krates.into_iter().collect(), 80 | ) 81 | .await?; 82 | 83 | debug!( 84 | size = index.len(), 85 | "{} index downloaded", rset.registry.index 86 | ); 87 | 88 | let span = tracing::debug_span!("upload"); 89 | let _us = span.enter(); 90 | ctx.backend.upload(index, krate.cloud_id(false)).await 91 | } 92 | 93 | pub async fn crates(ctx: &Ctx) -> Result { 94 | debug!("checking existing crates..."); 95 | let mut names = ctx.backend.list().await?; 96 | 97 | names.sort(); 98 | 99 | let mut to_mirror = Vec::with_capacity(names.len()); 100 | for krate in &ctx.krates { 101 | let cid = krate.cloud_id(false).to_string(); 102 | if names 103 | .binary_search_by(|name| name.as_str().cmp(&cid)) 104 | .is_err() 105 | { 106 | to_mirror.push(krate.clone()); 107 | } 108 | } 109 | 110 | // Remove duplicates, eg. when 2 crates are sourced from the same git repository 111 | to_mirror.sort(); 112 | to_mirror.dedup(); 113 | 114 | if to_mirror.is_empty() { 115 | info!("all crates already uploaded"); 116 | return Ok(0); 117 | } 118 | 119 | info!( 120 | "mirroring {} of {} crates", 121 | to_mirror.len(), 122 | ctx.krates.len() 123 | ); 124 | 125 | let client = &ctx.client; 126 | let backend = &ctx.backend; 127 | 128 | #[allow(unsafe_code)] 129 | // SAFETY: we don't forget the future :p 130 | let total_bytes = unsafe { 131 | async_scoped::TokioScope::scope_and_collect(|s| { 132 | for krate in to_mirror { 133 | s.spawn(async move { 134 | let span = tracing::info_span!("mirror", %krate); 135 | let _ms = span.enter(); 136 | 137 | let fetch_res = { 138 | let span = tracing::debug_span!("fetch"); 139 | let _ms = span.enter(); 140 | fetch::from_registry(client, &krate).await 141 | }; 142 | 143 | match fetch_res { 144 | Ok(krate_data) => { 145 | debug!(size = krate_data.len(), "fetched"); 146 | 147 | { 148 | let span = tracing::debug_span!("upload"); 149 | let _us = span.enter(); 150 | 151 | match krate_data { 152 | fetch::KratePackage::Registry(buffer) => { 153 | match backend.upload(buffer, krate.cloud_id(false)).await { 154 | Ok(len) => len, 155 | Err(err) => { 156 | error!("failed to upload crate tarball: {err:#}"); 157 | 0 158 | } 159 | } 160 | } 161 | fetch::KratePackage::Git(gs) => { 162 | let db = gs.db; 163 | let co = krate.clone(); 164 | let checkout = gs.checkout; 165 | let db_backend = backend.clone(); 166 | 167 | let db_fut = tokio::task::spawn(async move { 168 | match db_backend.upload(db, krate.cloud_id(false)).await { 169 | Ok(l) => l, 170 | Err(err) => { 171 | error!("failed to upload git db: {err:#}"); 172 | 0 173 | } 174 | } 175 | }); 176 | 177 | let co_backend = backend.clone(); 178 | let co_fut = tokio::task::spawn(async move { 179 | if let Some(buffer) = checkout { 180 | match co_backend.upload(buffer, co.cloud_id(true)).await { 181 | Ok(l) => l, 182 | Err(err) => { 183 | error!("failed to upload git checkout: {err:#}"); 184 | 0 185 | } 186 | } 187 | } else { 188 | 0 189 | } 190 | }); 191 | 192 | let (db, co) = tokio::join!(db_fut, co_fut); 193 | db.unwrap() + co.unwrap() 194 | } 195 | } 196 | } 197 | } 198 | Err(err) => { 199 | error!(krate = %krate, "failed to retrieve: {err:#}"); 200 | 0 201 | } 202 | } 203 | }); 204 | } 205 | }) 206 | .await 207 | .1 208 | .into_iter() 209 | .map(|res| res.unwrap()) 210 | .sum() 211 | }; 212 | 213 | Ok(total_bytes) 214 | } 215 | -------------------------------------------------------------------------------- /src/sync.rs: -------------------------------------------------------------------------------- 1 | use crate::{util, Krate, Path, PathBuf, Registry, RegistryProtocol, Source}; 2 | use anyhow::Context as _; 3 | use std::io::Write; 4 | use tracing::{debug, error, info, warn}; 5 | 6 | pub const INDEX_DIR: &str = "registry/index"; 7 | pub const CACHE_DIR: &str = "registry/cache"; 8 | pub const SRC_DIR: &str = "registry/src"; 9 | pub const GIT_DB_DIR: &str = "git/db"; 10 | pub const GIT_CO_DIR: &str = "git/checkouts"; 11 | 12 | pub async fn registry_indices( 13 | root_dir: PathBuf, 14 | backend: crate::Storage, 15 | registries: Vec>, 16 | ) { 17 | #[allow(unsafe_code)] 18 | // SAFETY: we don't forget the future :p 19 | unsafe { 20 | async_scoped::TokioScope::scope_and_collect(|s| { 21 | for registry in registries { 22 | s.spawn(async { 23 | if let Err(err) = registry_index(&root_dir, backend.clone(), registry).await { 24 | error!("{err:#}"); 25 | } 26 | }); 27 | } 28 | }) 29 | .await; 30 | } 31 | } 32 | 33 | /// Just skip the index if the git directory already exists, as a patch on 34 | /// top of an existing repo via git fetch is presumably faster 35 | async fn maybe_fetch_index(index_path: &Path, registry: &Registry) -> anyhow::Result<()> { 36 | anyhow::ensure!(gix::open(index_path).is_ok(), "failed to open index repo"); 37 | info!("registry index already exists, fetching instead"); 38 | 39 | let index_path = index_path.to_owned(); 40 | let index_url = registry.index.to_string(); 41 | tokio::task::spawn_blocking(move || { 42 | let last_updated = index_path.join(".last-updated"); 43 | 44 | let gi = tame_index::GitIndex::new(tame_index::IndexLocation { 45 | url: tame_index::IndexUrl::NonCratesIo(index_url.as_str().into()), 46 | root: tame_index::IndexPath::Exact(index_path), 47 | })?; 48 | 49 | { 50 | let span = tracing::debug_span!("fetch", index = index_url.clone()); 51 | let _sf = span.enter(); 52 | let unlocked = &tame_index::index::FileLock::unlocked(); 53 | let mut rgi = tame_index::index::RemoteGitIndex::new(gi, unlocked)?; 54 | rgi.fetch(unlocked)?; 55 | } 56 | 57 | std::fs::File::create(last_updated).context("failed to crate .last-updated")?; 58 | Ok(()) 59 | }) 60 | .await 61 | .unwrap() 62 | } 63 | 64 | #[tracing::instrument(skip(backend))] 65 | pub async fn registry_index( 66 | root_dir: &Path, 67 | backend: crate::Storage, 68 | registry: std::sync::Arc, 69 | ) -> anyhow::Result<()> { 70 | let ident = registry.short_name().to_owned(); 71 | 72 | let index_path = { 73 | let mut ip = root_dir.join(INDEX_DIR); 74 | ip.push(&ident); 75 | ip 76 | }; 77 | std::fs::create_dir_all(&index_path).context("failed to create index dir")?; 78 | 79 | if registry.protocol == RegistryProtocol::Git { 80 | match maybe_fetch_index(&index_path, ®istry).await { 81 | Ok(()) => return Ok(()), 82 | Err(err) => { 83 | debug!(error = %err, "unable to fetch index"); 84 | // Attempt to nuke the directory in case there are actually files 85 | // there, to give the best chance for the tarball unpack to work 86 | let _ = remove_dir_all::remove_dir_all(&index_path); 87 | } 88 | } 89 | } 90 | 91 | let krate = Krate { 92 | name: ident.clone(), 93 | version: "2.0.0".to_owned(), 94 | source: Source::Git(crate::cargo::GitSource { 95 | url: registry.index.clone(), 96 | ident, 97 | rev: crate::cargo::GitRev::parse("feedc0de00000000000000000000000000000000").unwrap(), 98 | follow: None, 99 | }), 100 | }; 101 | 102 | let index_data = backend.fetch(krate.cloud_id(false)).await?; 103 | 104 | if let Err(e) = util::unpack_tar(index_data, util::Encoding::Zstd, &index_path) { 105 | error!(err = ?e, "failed to unpack crates.io-index"); 106 | } 107 | 108 | Ok(()) 109 | } 110 | 111 | #[tracing::instrument(level = "debug", skip_all, fields(name = krate.name, version = krate.version, rev = %rev.id))] 112 | fn sync_git( 113 | db_dir: &Path, 114 | co_dir: &Path, 115 | krate: &Krate, 116 | pkg: crate::git::GitPackage, 117 | rev: &crate::cargo::GitRev, 118 | ) -> anyhow::Result<()> { 119 | let db_path = db_dir.join(krate.local_id().to_string()); 120 | 121 | // Always just blow away and do a sync from the remote tar 122 | if db_path.exists() { 123 | remove_dir_all::remove_dir_all(&db_path).context("failed to remove existing DB path")?; 124 | } 125 | 126 | let crate::git::GitPackage { db, checkout } = pkg; 127 | 128 | let unpack_path = db_path.clone(); 129 | let compressed = db.len(); 130 | let uncompressed = util::unpack_tar(db, util::Encoding::Zstd, &unpack_path)?; 131 | debug!( 132 | compressed = compressed, 133 | uncompressed = uncompressed, 134 | "unpacked db dir" 135 | ); 136 | 137 | let co_path = co_dir.join(format!("{}/{}", krate.local_id(), rev.short())); 138 | 139 | // If we get here, it means there wasn't a .cargo-ok in the dir, even if the 140 | // rest of it is checked out and ready, so blow it away just in case as we are 141 | // doing a clone/checkout from a local bare repository rather than a remote one 142 | if co_path.exists() { 143 | debug!("removing checkout dir {co_path} for {krate}"); 144 | remove_dir_all::remove_dir_all(&co_path) 145 | .with_context(|| format!("unable to remove {co_path}"))?; 146 | } 147 | 148 | // If we have a checkout tarball, use that, as it will include submodules, 149 | // otherwise do a checkout 150 | match checkout { 151 | Some(checkout) => { 152 | let compressed = checkout.len(); 153 | let uncompressed = util::unpack_tar(checkout, util::Encoding::Zstd, &co_path)?; 154 | debug!( 155 | compressed = compressed, 156 | uncompressed = uncompressed, 157 | "unpacked checkout dir" 158 | ); 159 | } 160 | None => { 161 | // Do a checkout of the bare clone if we didn't/couldn't unpack the 162 | // checkout tarball 163 | crate::git::checkout(db_path, co_path.clone(), rev.id)?; 164 | } 165 | } 166 | 167 | let ok = co_path.join(".cargo-ok"); 168 | std::fs::File::create(&ok).with_context(|| ok.to_string())?; 169 | 170 | Ok(()) 171 | } 172 | 173 | #[tracing::instrument(level = "debug", skip_all, fields(name = krate.name, version = krate.version))] 174 | fn sync_package( 175 | cache_dir: &Path, 176 | src_dir: &Path, 177 | krate: &Krate, 178 | data: bytes::Bytes, 179 | chksum: &str, 180 | ) -> anyhow::Result<()> { 181 | util::validate_checksum(&data, chksum)?; 182 | 183 | let packed_krate_path = cache_dir.join(format!("{}", krate.local_id())); 184 | 185 | let pack_data = data.clone(); 186 | let packed_path = packed_krate_path; 187 | 188 | let (pack_write, unpack) = rayon::join( 189 | // Spawn a worker thread to write the original pack file to disk as we don't 190 | // particularly care when it is done 191 | || -> anyhow::Result<()> { 192 | let s = tracing::debug_span!("pack_write"); 193 | let _ = s.enter(); 194 | let mut f = std::fs::File::create(&packed_path)?; 195 | 196 | let _ = f.set_len(pack_data.len() as u64); 197 | f.write_all(&pack_data)?; 198 | f.sync_all()?; 199 | 200 | debug!(bytes = pack_data.len(), "wrote pack file to disk"); 201 | Ok(()) 202 | }, 203 | || -> anyhow::Result<()> { 204 | let mut src_path = src_dir.join(format!("{}", krate.local_id())); 205 | 206 | // Remove the .crate extension 207 | src_path.set_extension(""); 208 | let ok = src_path.join(".cargo-ok"); 209 | 210 | if !ok.exists() { 211 | if src_path.exists() { 212 | debug!("cleaning src/"); 213 | if let Err(e) = remove_dir_all::remove_dir_all(&src_path) { 214 | error!(err = ?e, "failed to remove src/"); 215 | return Err(e.into()); 216 | } 217 | } 218 | 219 | // Crate tarballs already include the top level directory internally, 220 | // so unpack in the top-level source directory 221 | if let Err(e) = 222 | util::unpack_tar(data, util::Encoding::Gzip, src_path.parent().unwrap()) 223 | { 224 | error!(err = ?e, "failed to unpack to src/"); 225 | return Err(e); 226 | } 227 | 228 | // Create the .cargo-ok file so that cargo doesn't suspect a thing 229 | if let Err(e) = util::write_ok(&ok) { 230 | // If this happens, cargo will just resync and recheckout the repo most likely 231 | warn!(err = ?e, "failed to write .cargo-ok"); 232 | } 233 | } 234 | 235 | Ok(()) 236 | }, 237 | ); 238 | 239 | if let Err(err) = pack_write { 240 | error!(?err, path = ?packed_path, "failed to write tarball to disk"); 241 | } 242 | 243 | if let Err(err) = unpack { 244 | error!(?err, "failed to unpack tarball to disk"); 245 | } 246 | 247 | Ok(()) 248 | } 249 | 250 | fn get_missing_git_sources<'krate>( 251 | ctx: &'krate crate::Ctx, 252 | git_co_dir: &Path, 253 | to_sync: &mut Vec<&'krate Krate>, 254 | ) { 255 | for (rev, ident, krate) in ctx.krates.iter().filter_map(|k| match &k.source { 256 | Source::Git(gs) => Some((gs.rev.short(), &gs.ident, k)), 257 | Source::Registry { .. } => None, 258 | }) { 259 | let path = git_co_dir.join(format!("{ident}/{rev}/.cargo-ok")); 260 | 261 | if !path.exists() { 262 | to_sync.push(krate); 263 | } 264 | } 265 | } 266 | 267 | fn get_missing_registry_sources<'krate>( 268 | ctx: &'krate crate::Ctx, 269 | registry: &Registry, 270 | cache_dir: &Path, 271 | to_sync: &mut Vec<&'krate Krate>, 272 | ) -> anyhow::Result<()> { 273 | let cache_iter = std::fs::read_dir(cache_dir)?; 274 | 275 | let mut cached_crates: Vec = cache_iter 276 | .filter_map(|entry| { 277 | entry 278 | .ok() 279 | .and_then(|entry| entry.file_name().to_str().map(|s| s.to_owned())) 280 | }) 281 | .collect(); 282 | 283 | cached_crates.sort(); 284 | 285 | let mut krate_name = String::with_capacity(128); 286 | 287 | for krate in ctx.krates.iter().filter(|k| *k == registry) { 288 | use std::fmt::Write; 289 | write!(&mut krate_name, "{}", krate.local_id()).unwrap(); 290 | 291 | if cached_crates.binary_search(&krate_name).is_err() { 292 | to_sync.push(krate); 293 | } 294 | 295 | krate_name.clear(); 296 | } 297 | 298 | Ok(()) 299 | } 300 | 301 | #[derive(Debug)] 302 | pub struct Summary { 303 | pub total_bytes: usize, 304 | pub bad: u32, 305 | pub good: u32, 306 | } 307 | 308 | pub async fn crates(ctx: &crate::Ctx) -> anyhow::Result { 309 | info!("synchronizing {} crates...", ctx.krates.len()); 310 | 311 | let root_dir = &ctx.root_dir; 312 | let git_db_dir = root_dir.join(GIT_DB_DIR); 313 | let git_co_dir = root_dir.join(GIT_CO_DIR); 314 | 315 | std::fs::create_dir_all(&git_db_dir).context("failed to create git/db/")?; 316 | std::fs::create_dir_all(&git_co_dir).context("failed to create git/checkouts/")?; 317 | 318 | info!("checking local cache for missing crates..."); 319 | let mut git_sync = Vec::new(); 320 | get_missing_git_sources(ctx, &git_co_dir, &mut git_sync); 321 | 322 | let mut registry_sync = Vec::new(); 323 | for registry in &ctx.registries { 324 | let (cache_dir, src_dir) = registry.sync_dirs(root_dir); 325 | std::fs::create_dir_all(&cache_dir).context("failed to create registry/cache")?; 326 | std::fs::create_dir_all(src_dir).context("failed to create registry/src")?; 327 | 328 | get_missing_registry_sources(ctx, registry, &cache_dir, &mut registry_sync)?; 329 | } 330 | 331 | // Remove duplicates, eg. when 2 crates are sourced from the same git repository 332 | git_sync.sort(); 333 | git_sync.dedup(); 334 | 335 | // probably shouldn't be needed, but why not 336 | registry_sync.sort(); 337 | registry_sync.dedup(); 338 | 339 | if git_sync.is_empty() && registry_sync.is_empty() { 340 | info!("all crates already available on local disk"); 341 | return Ok(Summary { 342 | total_bytes: 0, 343 | good: 0, 344 | bad: 0, 345 | }); 346 | } 347 | 348 | info!( 349 | "synchronizing {} missing crates...", 350 | git_sync.len() + registry_sync.len() 351 | ); 352 | 353 | enum Pkg { 354 | Registry(bytes::Bytes), 355 | Git(crate::git::GitPackage), 356 | } 357 | 358 | // Kick off all the remote I/O first 359 | let mut tasks = tokio::task::JoinSet::new(); 360 | for krate in git_sync 361 | .into_iter() 362 | .chain(registry_sync.into_iter()) 363 | .cloned() 364 | { 365 | let backend = ctx.backend.clone(); 366 | 367 | tasks.spawn(async move { 368 | let span = tracing::info_span!("sync", %krate); 369 | let _ss = span.enter(); 370 | 371 | match &krate.source { 372 | Source::Registry(_rs) => { 373 | match { 374 | let span = tracing::debug_span!("download"); 375 | let _ds = span.enter(); 376 | backend.fetch(krate.cloud_id(false)).await 377 | } { 378 | Ok(krate_data) => { 379 | Some((krate, Pkg::Registry(krate_data))) 380 | } 381 | Err(err) => { 382 | error!(err = ?err, krate = %krate, cloud = %krate.cloud_id(false), "failed to download"); 383 | None 384 | } 385 | } 386 | } 387 | Source::Git(_gs) => { 388 | let kd = krate.clone(); 389 | let kdb = backend.clone(); 390 | let co = krate.clone(); 391 | let (krate_data, checkout) = tokio::join!( 392 | tokio::task::spawn(async move { 393 | let span = tracing::debug_span!("download"); 394 | let _ds = span.enter(); 395 | kdb.fetch(kd.cloud_id(false)).await 396 | }), 397 | tokio::task::spawn(async move { 398 | let span = tracing::debug_span!("download_checkout"); 399 | let _ds = span.enter(); 400 | backend.fetch(co.cloud_id(true)).await.ok() 401 | }), 402 | ); 403 | 404 | let krate_data = match krate_data.unwrap() { 405 | Ok(krate_data) => { 406 | krate_data 407 | } 408 | Err(err) => { 409 | error!(err = ?err, krate = %krate, cloud = %krate.cloud_id(false), "failed to download"); 410 | return None; 411 | } 412 | }; 413 | 414 | let git_pkg = crate::git::GitPackage { 415 | db: krate_data, 416 | checkout: checkout.unwrap(), 417 | }; 418 | 419 | Some((krate, Pkg::Git(git_pkg))) 420 | } 421 | } 422 | }); 423 | } 424 | 425 | let summary = std::sync::Arc::new(std::sync::Mutex::new(Summary { 426 | total_bytes: 0, 427 | bad: 0, 428 | good: 0, 429 | })); 430 | 431 | let (tx, rx) = crossbeam_channel::unbounded::<(Krate, Pkg)>(); 432 | let fs_thread = { 433 | let summary = summary.clone(); 434 | let root_dir = root_dir.clone(); 435 | 436 | std::thread::spawn(move || { 437 | let db_dir = &git_db_dir; 438 | let co_dir = &git_co_dir; 439 | let root_dir = &root_dir; 440 | let summary = &summary; 441 | rayon::scope(|s| { 442 | while let Ok((krate, pkg)) = rx.recv() { 443 | s.spawn(move |_s| { 444 | let synced = match (&krate.source, pkg) { 445 | (Source::Registry(rs), Pkg::Registry(krate_data)) => { 446 | let len = krate_data.len(); 447 | let (cache_dir, src_dir) = rs.registry.sync_dirs(root_dir); 448 | if let Err(err) = sync_package( 449 | &cache_dir, &src_dir, &krate, krate_data, &rs.chksum, 450 | ) { 451 | error!(krate = %krate, "failed to splat package: {err:#}"); 452 | None 453 | } else { 454 | Some(len) 455 | } 456 | } 457 | (Source::Git(gs), Pkg::Git(pkg)) => { 458 | let mut len = pkg.db.len(); 459 | 460 | if let Some(co) = &pkg.checkout { 461 | len += co.len(); 462 | } 463 | 464 | match sync_git(db_dir, co_dir, &krate, pkg, &gs.rev) { 465 | Ok(_) => Some(len), 466 | Err(err) => { 467 | error!(krate = %krate, "failed to splat git repo: {err:#}"); 468 | None 469 | } 470 | } 471 | } 472 | _ => unreachable!(), 473 | }; 474 | 475 | let mut sum = summary.lock().unwrap(); 476 | if let Some(synced) = synced { 477 | sum.good += 1; 478 | sum.total_bytes += synced; 479 | } else { 480 | sum.bad += 1; 481 | } 482 | }); 483 | } 484 | }); 485 | }) 486 | }; 487 | 488 | // As each remote I/O op completes, pass it off to the thread pool to do 489 | // the more CPU intensive work of decompression, etc 490 | while let Some(res) = tasks.join_next().await { 491 | let Ok(res) = res else { 492 | continue; 493 | }; 494 | 495 | if let Some(pkg) = res { 496 | let _ = tx.send(pkg); 497 | } else { 498 | summary.lock().unwrap().bad += 1; 499 | } 500 | } 501 | 502 | // Drop the sender otherwise we'll deadlock 503 | drop(tx); 504 | 505 | fs_thread.join().expect("failed to join thread"); 506 | 507 | Ok(std::sync::Arc::into_inner(summary) 508 | .unwrap() 509 | .into_inner() 510 | .unwrap()) 511 | } 512 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use crate::{Path, PathBuf}; 2 | use anyhow::{bail, Context as _}; 3 | use tracing::debug; 4 | use url::Url; 5 | 6 | #[inline] 7 | pub fn convert_request(req: http::Request) -> reqwest::Request { 8 | let (parts, _) = req.into_parts(); 9 | http::Request::from_parts(parts, Vec::new()) 10 | .try_into() 11 | .unwrap() 12 | } 13 | 14 | pub async fn convert_response( 15 | res: reqwest::Response, 16 | ) -> anyhow::Result> { 17 | let mut builder = http::Response::builder() 18 | .status(res.status()) 19 | .version(res.version()); 20 | 21 | let headers = builder 22 | .headers_mut() 23 | .context("failed to convert response headers")?; 24 | 25 | headers.extend( 26 | res.headers() 27 | .into_iter() 28 | .map(|(k, v)| (k.clone(), v.clone())), 29 | ); 30 | 31 | let body = res.bytes().await?; 32 | 33 | Ok(builder.body(body)?) 34 | } 35 | 36 | pub async fn send_request_with_retry( 37 | client: &crate::HttpClient, 38 | req: reqwest::Request, 39 | ) -> anyhow::Result { 40 | loop { 41 | let reqc = req.try_clone().unwrap(); 42 | 43 | match client.execute(reqc).await { 44 | Err(err) if err.is_connect() || err.is_timeout() || err.is_request() => continue, 45 | Err(err) => return Err(err.into()), 46 | Ok(res) => return Ok(res), 47 | } 48 | } 49 | } 50 | 51 | #[derive(Clone, Copy, Debug)] 52 | pub(crate) enum Encoding { 53 | Gzip, 54 | Zstd, 55 | } 56 | 57 | use bytes::Bytes; 58 | use std::io; 59 | 60 | #[tracing::instrument(level = "debug")] 61 | pub(crate) fn unpack_tar(buffer: Bytes, encoding: Encoding, dir: &Path) -> anyhow::Result { 62 | struct DecoderWrapper<'z, R: io::Read + io::BufRead> { 63 | /// The total bytes read from the compressed stream 64 | total: u64, 65 | inner: Decoder<'z, R>, 66 | } 67 | 68 | #[allow(clippy::large_enum_variant)] 69 | enum Decoder<'z, R: io::Read + io::BufRead> { 70 | Gzip(flate2::read::GzDecoder), 71 | Zstd(zstd::Decoder<'z, R>), 72 | } 73 | 74 | impl<'z, R> io::Read for DecoderWrapper<'z, R> 75 | where 76 | R: io::Read + io::BufRead, 77 | { 78 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 79 | let read = match &mut self.inner { 80 | Decoder::Gzip(gz) => gz.read(buf), 81 | Decoder::Zstd(zstd) => zstd.read(buf), 82 | }; 83 | 84 | let read = read?; 85 | self.total += read as u64; 86 | Ok(read) 87 | } 88 | } 89 | 90 | use bytes::Buf; 91 | let buf_reader = buffer.reader(); 92 | 93 | let decoder = match encoding { 94 | Encoding::Gzip => { 95 | // zstd::Decoder automatically wraps the Read(er) in a BufReader, so do 96 | // that explicitly for gzip so the types match 97 | let buf_reader = std::io::BufReader::new(buf_reader); 98 | Decoder::Gzip(flate2::read::GzDecoder::new(buf_reader)) 99 | } 100 | Encoding::Zstd => Decoder::Zstd(zstd::Decoder::new(buf_reader)?), 101 | }; 102 | 103 | let mut archive_reader = tar::Archive::new(DecoderWrapper { 104 | total: 0, 105 | inner: decoder, 106 | }); 107 | 108 | #[cfg(unix)] 109 | #[allow(clippy::unnecessary_cast)] 110 | { 111 | use std::sync::OnceLock; 112 | static UMASK: OnceLock = OnceLock::new(); 113 | archive_reader.set_mask( 114 | *UMASK.get_or_init(|| { 115 | #[allow(unsafe_code)] 116 | // SAFETY: Syscalls are unsafe. Calling `umask` twice is even unsafer for 117 | // multithreading program, since it doesn't provide a way to retrive the 118 | // value without modifications. We use a static `OnceLock` here to ensure 119 | // it only gets call once during the entire program lifetime. 120 | unsafe { 121 | let umask = libc::umask(0o022); 122 | libc::umask(umask); 123 | umask 124 | } 125 | }) as u32, // it is u16 on macos 126 | ); 127 | } 128 | 129 | if let Err(e) = archive_reader.unpack(dir) { 130 | // Attempt to remove anything that may have been written so that we 131 | // _hopefully_ don't mess up cargo itself 132 | if dir.exists() { 133 | if let Err(e) = remove_dir_all::remove_dir_all(dir) { 134 | tracing::error!("error trying to remove contents of {dir}: {e}"); 135 | } 136 | } 137 | 138 | return Err(e).context("failed to unpack"); 139 | } 140 | 141 | Ok(archive_reader.into_inner().total) 142 | } 143 | 144 | #[tracing::instrument(level = "debug")] 145 | pub(crate) fn pack_tar(path: &Path) -> anyhow::Result { 146 | // If we don't allocate adequate space in our output buffer, things 147 | // go very poorly for everyone involved 148 | let mut estimated_size = 0; 149 | const TAR_HEADER_SIZE: u64 = 512; 150 | for entry in walkdir::WalkDir::new(path) 151 | .into_iter() 152 | .filter_map(|e| e.ok()) 153 | { 154 | estimated_size += TAR_HEADER_SIZE; 155 | if let Ok(md) = entry.metadata() { 156 | estimated_size += md.len(); 157 | 158 | // Add write permissions to all files, this is to 159 | // get around an issue where unpacking tar files on 160 | // Windows will result in errors if there are read-only 161 | // directories 162 | #[cfg(windows)] 163 | { 164 | let mut perms = md.permissions(); 165 | perms.set_readonly(false); 166 | std::fs::set_permissions(entry.path(), perms)?; 167 | } 168 | } 169 | } 170 | 171 | struct Writer<'z, W: io::Write> { 172 | encoder: zstd::Encoder<'z, W>, 173 | original: usize, 174 | } 175 | 176 | // zstd has a pointer in it, which means it isn't Sync, but 177 | // this _should_ be fine as writing of the tar is never going to 178 | // do a write until a previous one has succeeded, as otherwise 179 | // the stream could be corrupted regardless of the actual write 180 | // implementation, so this should be fine. :tm: 181 | // #[allow(unsafe_code)] 182 | // unsafe impl<'z, W: io::Write + Sync> Sync for Writer<'z, W> {} 183 | 184 | impl<'z, W> io::Write for Writer<'z, W> 185 | where 186 | W: io::Write, 187 | { 188 | fn write(&mut self, buf: &[u8]) -> io::Result { 189 | self.original += buf.len(); 190 | self.encoder.write(buf) 191 | } 192 | 193 | fn flush(&mut self) -> io::Result<()> { 194 | self.encoder.flush() 195 | } 196 | } 197 | 198 | use bytes::BufMut; 199 | let out_buffer = bytes::BytesMut::with_capacity(estimated_size as usize); 200 | let buf_writer = out_buffer.writer(); 201 | 202 | let zstd_encoder = zstd::Encoder::new(buf_writer, 9)?; 203 | 204 | let mut archiver = tar::Builder::new(Writer { 205 | encoder: zstd_encoder, 206 | original: 0, 207 | }); 208 | archiver.append_dir_all(".", path)?; 209 | archiver.finish()?; 210 | 211 | let writer = archiver.into_inner()?; 212 | let buf_writer = writer.encoder.finish()?; 213 | let out_buffer = buf_writer.into_inner(); 214 | 215 | debug!( 216 | input = writer.original, 217 | output = out_buffer.len(), 218 | ratio = (out_buffer.len() as f64 / writer.original as f64 * 100.0) as u32, 219 | "compressed" 220 | ); 221 | 222 | Ok(out_buffer.freeze()) 223 | } 224 | 225 | /// Validates the specified buffer's SHA-256 checksum matches the specified value 226 | pub fn validate_checksum(buffer: &[u8], expected: &str) -> anyhow::Result<()> { 227 | // All of cargo's checksums are currently SHA256 228 | anyhow::ensure!( 229 | expected.len() == 64, 230 | "hex checksum length is {} instead of expected 64", 231 | expected.len() 232 | ); 233 | 234 | let content_digest = ring::digest::digest(&ring::digest::SHA256, buffer); 235 | let digest = content_digest.as_ref(); 236 | 237 | for (ind, exp) in expected.as_bytes().chunks(2).enumerate() { 238 | #[inline] 239 | fn parse_hex(b: u8) -> Result { 240 | Ok(match b { 241 | b'A'..=b'F' => b - b'A' + 10, 242 | b'a'..=b'f' => b - b'a' + 10, 243 | b'0'..=b'9' => b - b'0', 244 | c => bail!("invalid byte in expected checksum string {c}"), 245 | }) 246 | } 247 | 248 | let mut cur = parse_hex(exp[0])?; 249 | cur <<= 4; 250 | cur |= parse_hex(exp[1])?; 251 | 252 | anyhow::ensure!(digest[ind] == cur, "checksum mismatch, expected {expected}"); 253 | } 254 | 255 | Ok(()) 256 | } 257 | 258 | fn parse_s3_url(url: &Url) -> anyhow::Result> { 259 | let host = url.host().context("url has no host")?; 260 | 261 | let host_dns = match host { 262 | url::Host::Domain(h) => h, 263 | _ => anyhow::bail!("host name is an IP"), 264 | }; 265 | 266 | // We only support virtual-hosted-style references as path style is being deprecated 267 | // mybucket.s3-us-west-2.amazonaws.com 268 | // https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/ 269 | if host_dns.contains("s3") { 270 | let mut bucket = None; 271 | let mut region = None; 272 | let mut host = None; 273 | 274 | for part in host_dns.split('.') { 275 | if part.is_empty() { 276 | anyhow::bail!("malformed host name detected"); 277 | } 278 | 279 | if bucket.is_none() { 280 | bucket = Some(part); 281 | continue; 282 | } 283 | 284 | if part.starts_with("s3") && region.is_none() { 285 | let rgn = &part[2..]; 286 | 287 | if let Some(r) = rgn.strip_prefix('-') { 288 | region = Some((r, part.len())); 289 | } else { 290 | region = Some(("us-east-1", part.len())); 291 | } 292 | } else if region.is_none() { 293 | bucket = Some(&host_dns[..bucket.as_ref().unwrap().len() + 1 + part.len()]); 294 | } else if host.is_none() { 295 | host = Some( 296 | &host_dns[2 // for the 2 dots 297 | + bucket.as_ref().unwrap().len() 298 | + region.as_ref().unwrap().1..], 299 | ); 300 | break; 301 | } 302 | } 303 | 304 | let bucket = bucket.context("bucket not specified")?; 305 | let region = region.context("region not specified")?.0; 306 | let host = host.context("host not specified")?; 307 | 308 | Ok(crate::S3Location { 309 | bucket, 310 | region, 311 | host, 312 | prefix: if !url.path().is_empty() { 313 | &url.path()[1..] 314 | } else { 315 | url.path() 316 | }, 317 | }) 318 | } else if host_dns == "localhost" { 319 | let root = url.as_str(); 320 | Ok(crate::S3Location { 321 | bucket: "testing", 322 | region: "", 323 | host: &root[..root.len() - 1], 324 | prefix: "", 325 | }) 326 | } else { 327 | anyhow::bail!("not an s3 url"); 328 | } 329 | } 330 | 331 | pub struct CloudLocationUrl { 332 | pub url: Url, 333 | pub path: Option, 334 | } 335 | 336 | impl CloudLocationUrl { 337 | pub fn from_url(url: Url) -> anyhow::Result { 338 | if url.scheme() == "file" { 339 | let path = url 340 | .to_file_path() 341 | .map_err(|_sigh| anyhow::anyhow!("failed to parse file path from url {url:?}")) 342 | .and_then(|path| match PathBuf::from_path_buf(path) { 343 | Ok(p) => Ok(p), 344 | Err(err) => Err(anyhow::anyhow!("url path '{}' is not utf-8", err.display())), 345 | })?; 346 | Ok(CloudLocationUrl { 347 | url, 348 | path: Some(path), 349 | }) 350 | } else { 351 | Ok(CloudLocationUrl { url, path: None }) 352 | } 353 | } 354 | } 355 | 356 | #[inline] 357 | pub fn path(p: &std::path::Path) -> anyhow::Result<&Path> { 358 | p.try_into().context("path is not utf-8") 359 | } 360 | 361 | pub fn parse_cloud_location( 362 | cloud_url: &CloudLocationUrl, 363 | ) -> anyhow::Result> { 364 | let CloudLocationUrl { url, path: _path } = cloud_url; 365 | match url.scheme() { 366 | #[cfg(feature = "gcs")] 367 | "gs" => { 368 | let bucket = url.domain().context("url doesn't contain a bucket")?; 369 | // Remove the leading slash that url gives us 370 | let path = if !url.path().is_empty() { 371 | &url.path()[1..] 372 | } else { 373 | url.path() 374 | }; 375 | 376 | let loc = crate::GcsLocation { 377 | bucket, 378 | prefix: path, 379 | }; 380 | 381 | Ok(crate::CloudLocation::Gcs(loc)) 382 | } 383 | #[cfg(not(feature = "gcs"))] 384 | "gs" => { 385 | anyhow::bail!("GCS support was not enabled, you must compile with the 'gcs' feature") 386 | } 387 | "file" => { 388 | let path = _path.as_ref().unwrap(); 389 | Ok(crate::CloudLocation::Fs(crate::FilesystemLocation { path })) 390 | } 391 | "http" | "https" => { 392 | let s3 = parse_s3_url(url).context("failed to parse s3 url")?; 393 | 394 | if cfg!(feature = "s3") { 395 | Ok(crate::CloudLocation::S3(s3)) 396 | } else { 397 | anyhow::bail!("S3 support was not enabled, you must compile with the 's3' feature") 398 | } 399 | } 400 | #[cfg(feature = "blob")] 401 | "blob" => { 402 | let container = url.domain().context("url doesn't contain a container")?; 403 | let prefix = if !url.path().is_empty() { 404 | &url.path()[1..] 405 | } else { 406 | url.path() 407 | }; 408 | Ok(crate::CloudLocation::Blob(crate::BlobLocation { 409 | prefix, 410 | container, 411 | })) 412 | } 413 | #[cfg(not(feature = "blob"))] 414 | "blob" => { 415 | anyhow::bail!("Blob support was not enabled, you must compile with the 'blob' feature") 416 | } 417 | scheme => anyhow::bail!("the scheme '{}' is not supported", scheme), 418 | } 419 | } 420 | 421 | pub(crate) fn write_ok(to: &Path) -> anyhow::Result<()> { 422 | let mut f = std::fs::File::create(to).with_context(|| format!("failed to create: {to}"))?; 423 | 424 | use std::io::Write; 425 | f.write_all(b"{\"v\":1}")?; 426 | Ok(()) 427 | } 428 | 429 | #[cfg(test)] 430 | mod test { 431 | use super::*; 432 | use tame_index::utils::url_to_local_dir; 433 | 434 | #[test] 435 | fn idents_urls() { 436 | let url = Url::parse("git+https://github.com/gfx-rs/genmesh?rev=71abe4d").unwrap(); 437 | 438 | assert_eq!( 439 | url_to_local_dir(url.as_str()).unwrap().dir_name, 440 | "genmesh-401fe503e87439cc" 441 | ); 442 | 443 | let url = Url::parse("git+https://github.com/EmbarkStudios/cpal?rev=d59b4de#d59b4decf72a96932a1482cc27fe4c0b50c40d32").unwrap(); 444 | 445 | assert_eq!( 446 | url_to_local_dir(url.as_str()).unwrap().dir_name, 447 | "cpal-a7ffd7cabefac714" 448 | ); 449 | } 450 | 451 | #[test] 452 | fn gets_proper_registry_ident() { 453 | use crate::cargo::RegistryProtocol; 454 | let crates_io_registry = crate::Registry::crates_io(RegistryProtocol::Git); 455 | 456 | assert_eq!( 457 | "github.com-1ecc6299db9ec823", 458 | crates_io_registry.short_name() 459 | ); 460 | 461 | let crates_io_sparse_registry = crate::Registry::crates_io(RegistryProtocol::Sparse); 462 | 463 | assert_eq!( 464 | "index.crates.io-6f17d22bba15001f", 465 | crates_io_sparse_registry.short_name() 466 | ); 467 | } 468 | 469 | #[test] 470 | fn validates_checksums() { 471 | let expected = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"; 472 | 473 | validate_checksum(b"hello world", expected).unwrap(); 474 | } 475 | 476 | #[test] 477 | fn parses_s3_virtual_hosted_style() { 478 | let url = Url::parse("http://johnsmith.net.s3.amazonaws.com/homepage.html").unwrap(); 479 | let loc = parse_s3_url(&url).unwrap(); 480 | 481 | assert_eq!(loc.bucket, "johnsmith.net"); 482 | assert_eq!(loc.region, "us-east-1"); 483 | assert_eq!(loc.host, "amazonaws.com"); 484 | assert_eq!(loc.prefix, "homepage.html"); 485 | 486 | let url = 487 | Url::parse("http://johnsmith.eu.s3-eu-west-1.amazonaws.com/homepage.html").unwrap(); 488 | let loc = parse_s3_url(&url).unwrap(); 489 | 490 | assert_eq!(loc.bucket, "johnsmith.eu"); 491 | assert_eq!(loc.region, "eu-west-1"); 492 | assert_eq!(loc.host, "amazonaws.com"); 493 | assert_eq!(loc.prefix, "homepage.html"); 494 | 495 | let url = Url::parse("http://mybucket.s3-us-west-2.amazonaws.com/some_prefix/").unwrap(); 496 | let loc = parse_s3_url(&url).unwrap(); 497 | 498 | assert_eq!(loc.bucket, "mybucket"); 499 | assert_eq!(loc.region, "us-west-2"); 500 | assert_eq!(loc.host, "amazonaws.com"); 501 | assert_eq!(loc.prefix, "some_prefix/"); 502 | 503 | let url = Url::parse("http://mybucket.with.many.dots.in.it.s3.amazonaws.com/some_prefix/") 504 | .unwrap(); 505 | let loc = parse_s3_url(&url).unwrap(); 506 | 507 | assert_eq!(loc.bucket, "mybucket.with.many.dots.in.it"); 508 | assert_eq!(loc.region, "us-east-1"); 509 | assert_eq!(loc.host, "amazonaws.com"); 510 | assert_eq!(loc.prefix, "some_prefix/"); 511 | } 512 | } 513 | -------------------------------------------------------------------------------- /tests/diff_cargo.rs: -------------------------------------------------------------------------------- 1 | use std::{cmp::Ordering, fs::File, path::Path}; 2 | use walkdir::{DirEntry, WalkDir}; 3 | 4 | #[cfg(unix)] 5 | fn perms(_p: &std::fs::Permissions) -> u32 { 6 | // use std::os::unix::fs::PermissionsExt; 7 | // p.mode() 8 | 0 9 | } 10 | 11 | #[cfg(windows)] 12 | fn perms(_p: &std::fs::Permissions) -> u32 { 13 | 0 14 | } 15 | 16 | fn assert_diff, B: AsRef>(a_base: A, b_base: B) { 17 | let a_walker = walk_dir(&a_base).expect("failed to open root dir"); 18 | let b_walker = walk_dir(&b_base).expect("failed to open root dir"); 19 | 20 | let write_tree = |p: &Path, walker: walkdir::IntoIter| -> String { 21 | use std::fmt::Write; 22 | 23 | let mut tree = String::with_capacity(4 * 1024); 24 | 25 | for item in walker.filter_entry(|entry| { 26 | let path = entry.path(); 27 | if entry.metadata().unwrap().is_dir() { 28 | // Both .git and git/db contain things like pack files that are 29 | // non-deterministic, and are otherwise just uninteresting to check 30 | // as the checked out source matching is what actually matters 31 | !(path.ends_with(".git") || path.strip_prefix(p).unwrap().starts_with("git/db")) 32 | } else { 33 | !( 34 | // We don't write this file, it's a nicety added by cargo but 35 | // not really relevant for the primary use case of short-lived CI 36 | // jobs 37 | path.ends_with("CACHEDIR.TAG") || 38 | // We don't write this file, again, not really relevant for 39 | // primary use case 40 | path.ends_with(".package-cache") 41 | ) 42 | } 43 | }) { 44 | let item = item.unwrap(); 45 | 46 | let hash = if item.file_type().is_file() { 47 | hash(item.path()) 48 | } else { 49 | 0 50 | }; 51 | 52 | let md = item.metadata().unwrap(); 53 | let perms = perms(&md.permissions()); 54 | 55 | // Strip off the root prefix so only the stems are matched against 56 | let path = item.path().strip_prefix(p).unwrap(); 57 | 58 | writeln!(&mut tree, "{} {perms:o} {hash}", path.display()).unwrap(); 59 | } 60 | 61 | tree 62 | }; 63 | 64 | let a_base = a_base.as_ref(); 65 | let b_base = b_base.as_ref(); 66 | 67 | let (a, b) = rayon::join( 68 | || write_tree(a_base, a_walker), 69 | || write_tree(b_base, b_walker), 70 | ); 71 | 72 | if a != b { 73 | panic!( 74 | "{}\nfetcher: {} cargo: {}", 75 | similar_asserts::SimpleDiff::from_str(&a, &b, "fetcher", "cargo"), 76 | a_base.display(), 77 | b_base.display() 78 | ); 79 | } 80 | } 81 | 82 | fn walk_dir>(path: P) -> Result { 83 | let mut walkdir = WalkDir::new(path).sort_by(compare_by_file_name).into_iter(); 84 | if let Some(Err(e)) = walkdir.next() { 85 | Err(e.into()) 86 | } else { 87 | Ok(walkdir) 88 | } 89 | } 90 | 91 | #[inline] 92 | fn compare_by_file_name(a: &DirEntry, b: &DirEntry) -> Ordering { 93 | a.file_name().cmp(b.file_name()) 94 | } 95 | 96 | fn hash>(file: P) -> u64 { 97 | use std::{hash::Hasher, io::Read}; 98 | use twox_hash::XxHash64 as xx; 99 | 100 | match File::open(file.as_ref()) { 101 | Ok(mut f) => { 102 | let mut xh = xx::with_seed(0); 103 | 104 | let mut chunk = [0; 8 * 1024]; 105 | 106 | loop { 107 | let read = f.read(&mut chunk).unwrap_or(0xdead_beef); 108 | 109 | if read > 0 { 110 | xh.write(&chunk[..read]); 111 | } else { 112 | break; 113 | } 114 | } 115 | 116 | xh.finish() 117 | } 118 | Err(_) => 0xdead_dead, 119 | } 120 | } 121 | 122 | use cargo_fetcher as cf; 123 | 124 | mod tutil; 125 | use tutil as util; 126 | 127 | #[tokio::test] 128 | async fn diff_cargo() { 129 | if std::env::var("CARGO_FETCHER_CRATES_IO_PROTOCOL") 130 | .ok() 131 | .as_deref() 132 | == Some("git") 133 | { 134 | // Git registry is too unstable for diffing as the index changes too often 135 | return; 136 | } 137 | 138 | util::hook_logger(); 139 | 140 | let fs_root = util::tempdir(); 141 | let (the_krates, registries) = cf::cargo::read_lock_files( 142 | vec!["tests/full/Cargo.lock".into()], 143 | vec![util::crates_io_registry()], 144 | ) 145 | .unwrap(); 146 | 147 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), registries); 148 | fs_ctx.krates = the_krates; 149 | 150 | let fetcher_root = util::tempdir(); 151 | let cargo_home = util::tempdir(); 152 | let cargo_home_path = cargo_home.pb(); 153 | 154 | // Fetch with cargo 155 | let cargo_fetch = std::thread::spawn(move || { 156 | std::process::Command::new("cargo") 157 | .env("CARGO_HOME", cargo_home_path) 158 | .args([ 159 | "fetch", 160 | "--quiet", 161 | "--locked", 162 | "--manifest-path", 163 | "tests/full/Cargo.toml", 164 | ]) 165 | .status() 166 | .unwrap(); 167 | }); 168 | 169 | // Synchronize with cargo-fetcher 170 | { 171 | fs_ctx.root_dir = fetcher_root.pb(); 172 | 173 | let registry_sets = fs_ctx.registry_sets(); 174 | 175 | assert_eq!(registry_sets.len(), 1); 176 | let the_registry = fs_ctx.registries[0].clone(); 177 | 178 | cf::mirror::registry_indices(&fs_ctx, std::time::Duration::new(10, 0), registry_sets).await; 179 | cf::mirror::crates(&fs_ctx) 180 | .await 181 | .expect("failed to mirror crates"); 182 | 183 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 184 | cf::sync::crates(&fs_ctx).await.expect("synced crates"); 185 | cf::sync::registry_index(&fs_ctx.root_dir, fs_ctx.backend.clone(), the_registry) 186 | .await 187 | .expect("failed to sync index"); 188 | } 189 | 190 | cargo_fetch.join().unwrap(); 191 | 192 | if std::env::var_os("CARGO_FETCHER_DEBUG_DIFF_CARGO").is_none() { 193 | assert_diff(&fetcher_root, &cargo_home); 194 | } else { 195 | // Can be useful when iterating to keep the temp directories 196 | let fetcher_root = fetcher_root.into_path(); 197 | let cargo_home = cargo_home.into_path(); 198 | 199 | // Compare the outputs to ensure they match "exactly" 200 | assert_diff(fetcher_root, cargo_home); 201 | } 202 | } 203 | 204 | /// Validates that a cargo sync following a fetcher sync should do nothing 205 | #[tokio::test] 206 | async fn nothing_to_do() { 207 | if std::env::var("CARGO_FETCHER_CRATES_IO_PROTOCOL") 208 | .ok() 209 | .as_deref() 210 | == Some("git") 211 | { 212 | // Git registry is too unstable for diffing as the index changes too often 213 | return; 214 | } 215 | 216 | util::hook_logger(); 217 | 218 | let sync_dir = util::tempdir(); 219 | let fs_root = util::tempdir(); 220 | 221 | { 222 | let (the_krates, registries) = cf::cargo::read_lock_files( 223 | vec!["tests/full/Cargo.lock".into()], 224 | vec![util::crates_io_registry()], 225 | ) 226 | .unwrap(); 227 | 228 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), registries); 229 | fs_ctx.krates = the_krates; 230 | fs_ctx.root_dir = sync_dir.pb(); 231 | 232 | let registry_sets = fs_ctx.registry_sets(); 233 | let the_registry = fs_ctx.registries[0].clone(); 234 | 235 | cf::mirror::registry_indices(&fs_ctx, std::time::Duration::new(10, 0), registry_sets).await; 236 | cf::mirror::crates(&fs_ctx) 237 | .await 238 | .expect("failed to mirror crates"); 239 | 240 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 241 | cf::sync::crates(&fs_ctx).await.expect("synced crates"); 242 | cf::sync::registry_index(&fs_ctx.root_dir, fs_ctx.backend.clone(), the_registry) 243 | .await 244 | .expect("failed to sync index"); 245 | } 246 | 247 | let output = std::process::Command::new("cargo") 248 | .env("CARGO_HOME", sync_dir.path()) 249 | .args([ 250 | "fetch", 251 | "--locked", 252 | "--manifest-path", 253 | "tests/full/Cargo.toml", 254 | ]) 255 | .stdout(std::process::Stdio::piped()) 256 | .stderr(std::process::Stdio::piped()) 257 | .output() 258 | .unwrap(); 259 | 260 | let stdout = String::from_utf8(output.stdout).unwrap(); 261 | let stderr = String::from_utf8(output.stderr).unwrap(); 262 | 263 | if !stdout.is_empty() || !stderr.is_empty() { 264 | panic!("expected no output from cargo, got:\nstdout:\n{stdout}\nstderr:{stderr}\n"); 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /tests/full/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "full" 3 | version = "0.1.0" 4 | authors = ["Jake Shadle "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | # regular basic crate 9 | tame-oauth = "=0.4.2" 10 | # contains several crates, which is important when patching, as they will share 11 | # the same db/checkout dir 12 | cpal = { version = "0.13.5" } 13 | # this repo contains recursive submodules 14 | sentry-contrib-breakpad = { git = "https://github.com/EmbarkStudios/sentry-contrib-rust", rev = "5e10bd5ad" } 15 | # this repo on gitlab requires us to use the .git extension otherwise it will 16 | # redirect, but we still need to calculate the same hash as cargo for the local directory 17 | gilrs = { git = "https://gitlab.com/gilrs-project/gilrs.git", rev = "1bbec17" } 18 | # submodule 19 | meshopt = { git = "https://github.com/EmbarkStudios/meshopt-rs", rev = "16a3046" } 20 | lmdb-rkv = { git = "https://github.com/EmbarkStudios/lmdb-rs", branch = "check-local-lib" } 21 | lmdb-rkv-sys = { git = "https://github.com/EmbarkStudios/lmdb-rs", branch = "check-local-lib" } 22 | 23 | [patch.crates-io] 24 | cpal = { git = "https://github.com/RustAudio/cpal", rev = "971c46346" } 25 | -------------------------------------------------------------------------------- /tests/full/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tests { 3 | #[test] 4 | fn it_works() { 5 | assert_eq!(2 + 2, 4); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/lock.rs: -------------------------------------------------------------------------------- 1 | use cargo_fetcher::{cargo::read_lock_files, Registry, RegistryProtocol}; 2 | 3 | #[test] 4 | fn parses_v2() { 5 | let (krates, _) = read_lock_files( 6 | vec!["tests/v2.lock".into()], 7 | vec![Registry::crates_io(RegistryProtocol::Git)], 8 | ) 9 | .unwrap(); 10 | assert_eq!(krates.len(), 258); 11 | } 12 | 13 | #[test] 14 | fn parses_v3() { 15 | let (krates, _) = read_lock_files( 16 | vec!["tests/v3.lock".into()], 17 | vec![Registry::crates_io(RegistryProtocol::Sparse)], 18 | ) 19 | .unwrap(); 20 | assert_eq!(krates.len(), 223); 21 | } 22 | -------------------------------------------------------------------------------- /tests/multi_one.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "autometrics-macros" 3 | version = "0.4.1" 4 | source = "registry+https://github.com/rust-lang/crates.io-index" 5 | checksum = "2edb1335006ff621fe85b2c876f8e77ce31779fce866867b99a300891133aed9" 6 | 7 | [[package]] 8 | name = "axum" 9 | version = "0.6.17" 10 | source = "registry+https://github.com/rust-lang/crates.io-index" 11 | checksum = "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" 12 | 13 | [[package]] 14 | name = "axum-core" 15 | version = "0.3.4" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" 18 | 19 | [[package]] 20 | name = "axum-extra" 21 | version = "0.7.4" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | checksum = "febf23ab04509bd7672e6abe76bd8277af31b679e89fa5ffc6087dc289a448a3" 24 | 25 | [[package]] 26 | name = "axum-live-view" 27 | version = "0.1.0" 28 | source = "git+https://github.com/EmbarkStudios/axum-live-view?branch=main#165e11655aa0094388df1905da8758d7a4f60e3c" 29 | 30 | [[package]] 31 | name = "axum-live-view-macros" 32 | version = "0.1.0" 33 | source = "git+https://github.com/EmbarkStudios/axum-live-view?branch=main#165e11655aa0094388df1905da8758d7a4f60e3c" 34 | 35 | [[package]] 36 | name = "axum-macros" 37 | version = "0.3.7" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "2bb524613be645939e280b7279f7b017f98cf7f5ef084ec374df373530e73277" 40 | 41 | [[package]] 42 | name = "backtrace" 43 | version = "0.3.67" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" 46 | -------------------------------------------------------------------------------- /tests/multi_two.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "autometrics-macros" 3 | version = "0.4.1" 4 | source = "registry+https://github.com/rust-lang/crates.io-index" 5 | checksum = "2edb1335006ff621fe85b2c876f8e77ce31779fce866867b99a300891133aed9" 6 | 7 | [[package]] 8 | name = "axum" 9 | version = "0.6.18" 10 | source = "registry+https://github.com/rust-lang/crates.io-index" 11 | checksum = "f8175979259124331c1d7bf6586ee7e0da434155e4b2d48ec2c8386281d8df39" 12 | 13 | [[package]] 14 | name = "axum-core" 15 | version = "0.3.4" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" 18 | 19 | [[package]] 20 | name = "axum-extra" 21 | version = "0.7.4" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | checksum = "febf23ab04509bd7672e6abe76bd8277af31b679e89fa5ffc6087dc289a448a3" 24 | 25 | [[package]] 26 | name = "axum-live-view" 27 | version = "0.1.0" 28 | source = "git+https://github.com/EmbarkStudios/axum-live-view?branch=main#165e11655aa0094388df1905da8758d7a4f60e3c" 29 | 30 | [[package]] 31 | name = "axum-macros" 32 | version = "0.3.7" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "2bb524613be645939e280b7279f7b017f98cf7f5ef084ec374df373530e73277" 35 | 36 | [[package]] 37 | name = "bark" 38 | version = "0.2.0" -------------------------------------------------------------------------------- /tests/pretty-crate.txt: -------------------------------------------------------------------------------- 1 | {"name": "macaw", "vers": "0.2.0", "deps": [{"name": "glam", "req": "^0.9.5", "features": [], "optional": false, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}, {"name": "serde", "req": "^1.0", "features": [], "optional": true, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}], "cksum": "6516bcd0e59d72114c0dee6d895ee1aa8663949f4ee2885db97605f6f2450d1a", "features": {"default": [], "with_serde": ["serde", "glam/serde"]}, "yanked": false, "links": null} 2 | {"name": "macaw", "vers": "0.3.0", "deps": [{"name": "glam", "req": "^0.10.0", "features": ["std"], "optional": false, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}, {"name": "serde", "req": "^1.0", "features": [], "optional": true, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}], "cksum": "9a1c44d7fa4ba0fda79bfc9b38bef59cf49510daf864372f3754a4caece46b58", "features": {"default": [], "with_serde": ["serde", "glam/serde"]}, "yanked": false, "links": null} 3 | {"name": "macaw", "vers": "0.3.1", "deps": [{"name": "glam", "req": "^0.10.2", "features": ["std"], "optional": false, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}, {"name": "serde", "req": "^1.0", "features": [], "optional": true, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}], "cksum": "d469c774e76103775e12185df81ed38c84f2fb865db436ba6eda1a1cfa1a009b", "features": {"default": [], "with_serde": ["serde", "glam/serde"]}, "yanked": false, "links": null} 4 | {"name": "macaw", "vers": "0.4.0", "deps": [{"name": "glam", "req": "^0.10.2", "features": ["std"], "optional": false, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}, {"name": "serde", "req": "^1.0", "features": [], "optional": true, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}], "cksum": "b38f48a9a3f723b4f46ed6a8c265ee45d6b20d284b2feafafb00819e9b2e833c", "features": {"default": [], "with_serde": ["serde", "glam/serde"]}, "yanked": false, "links": null} 5 | {"name": "macaw", "vers": "0.5.0", "deps": [{"name": "glam", "req": "^0.11.1", "features": [], "optional": false, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}, {"name": "serde", "req": "^1.0", "features": ["derive"], "optional": true, "default_features": false, "target": null, "kind": "normal", "registry": "https://github.com/rust-lang/crates.io-index"}], "cksum": "a7f702887e3990ea0a4c83ce90285cc09650c7d95b49deb5c3ead8817eb8284c", "features": {"default": ["std"], "libm": ["glam/libm"], "std": ["glam/std"], "with_serde": ["serde", "glam/serde"]}, "yanked": false, "links": null} 6 | -------------------------------------------------------------------------------- /tests/sync_crates_io.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Context; 2 | use cargo_fetcher as cf; 3 | use cf::{Krate, RegistrySource, Source}; 4 | 5 | mod tutil; 6 | use tutil as util; 7 | 8 | #[tokio::test] 9 | async fn all_missing() { 10 | let fs_root = util::tempdir(); 11 | let registry = std::sync::Arc::new(util::crates_io_registry()); 12 | let registries = vec![registry.clone()]; 13 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), registries); 14 | 15 | let missing_root = util::tempdir(); 16 | fs_ctx.root_dir = missing_root.pb(); 17 | 18 | fs_ctx.krates = vec![ 19 | Krate { 20 | name: "ansi_term".to_owned(), 21 | version: "0.11.0".to_owned(), 22 | source: Source::Registry(RegistrySource { 23 | registry: registry.clone(), 24 | chksum: "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 25 | .to_owned(), 26 | }), 27 | }, 28 | Krate { 29 | name: "base64".to_owned(), 30 | version: "0.10.1".to_owned(), 31 | source: Source::Registry(RegistrySource { 32 | registry: registry.clone(), 33 | chksum: "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" 34 | .to_owned(), 35 | }), 36 | }, 37 | Krate { 38 | name: "uuid".to_owned(), 39 | version: "0.7.4".to_owned(), 40 | source: Source::Registry(RegistrySource { 41 | registry, 42 | chksum: "90dbc611eb48397705a6b0f6e917da23ae517e4d127123d2cf7674206627d32a" 43 | .to_owned(), 44 | }), 45 | }, 46 | ]; 47 | 48 | cf::mirror::crates(&fs_ctx) 49 | .await 50 | .expect("failed to mirror crates"); 51 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 52 | assert_eq!( 53 | cf::sync::crates(&fs_ctx) 54 | .await 55 | .expect("synced 3 crates") 56 | .good, 57 | 3, 58 | ); 59 | 60 | let (cache_root, src_root) = util::get_sync_dirs(&fs_ctx); 61 | 62 | // Ensure the unmutated crates are in the cache directory 63 | { 64 | for krate in &fs_ctx.krates { 65 | let bytes = { 66 | let path = cache_root.join(format!("{}-{}.crate", krate.name, krate.version)); 67 | 68 | std::fs::read(&path) 69 | .with_context(|| format!("{krate:#} {path}")) 70 | .expect("can't read") 71 | }; 72 | 73 | match &krate.source { 74 | Source::Registry(rs) => cf::util::validate_checksum(&bytes, &rs.chksum) 75 | .expect("failed to validate checksum"), 76 | Source::Git { .. } => unreachable!(), 77 | } 78 | } 79 | } 80 | 81 | // Ensure the crates are unpacked 82 | { 83 | for krate in &fs_ctx.krates { 84 | let path = src_root.join(format!("{}-{}/Cargo.toml", krate.name, krate.version)); 85 | assert!(path.exists(), "didn't find unpacked {path}"); 86 | } 87 | } 88 | } 89 | 90 | #[tokio::test] 91 | async fn some_missing() { 92 | let fs_root = util::tempdir(); 93 | let registry = std::sync::Arc::new(util::crates_io_registry()); 94 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), vec![registry.clone()]); 95 | 96 | let missing_root = util::tempdir(); 97 | fs_ctx.root_dir = missing_root.pb(); 98 | 99 | fs_ctx.krates = vec![ 100 | Krate { 101 | name: "ansi_term".to_owned(), 102 | version: "0.11.0".to_owned(), 103 | source: Source::Registry(RegistrySource { 104 | registry: registry.clone(), 105 | chksum: "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 106 | .to_owned(), 107 | }), 108 | }, 109 | Krate { 110 | name: "base64".to_owned(), 111 | version: "0.10.1".to_owned(), 112 | source: Source::Registry(RegistrySource { 113 | registry: registry.clone(), 114 | chksum: "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" 115 | .to_owned(), 116 | }), 117 | }, 118 | Krate { 119 | name: "uuid".to_owned(), 120 | version: "0.7.4".to_owned(), 121 | source: Source::Registry(RegistrySource { 122 | registry, 123 | chksum: "90dbc611eb48397705a6b0f6e917da23ae517e4d127123d2cf7674206627d32a" 124 | .to_owned(), 125 | }), 126 | }, 127 | ]; 128 | 129 | tracing::info!("mirroring crates"); 130 | 131 | // Download and store the crates in the local fs backend 132 | cf::mirror::crates(&fs_ctx) 133 | .await 134 | .expect("failed to mirror crates"); 135 | 136 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 137 | 138 | // Sync just the base64 crate to the local store 139 | let stored = fs_ctx.krates.clone(); 140 | fs_ctx.krates = vec![stored[2].clone()]; 141 | assert_eq!( 142 | cf::sync::crates(&fs_ctx) 143 | .await 144 | .expect("synced 1 crate") 145 | .good, 146 | 1 147 | ); 148 | 149 | let (cache_root, src_root) = util::get_sync_dirs(&fs_ctx); 150 | 151 | // Ensure the unmutated crates are in the cache directory 152 | { 153 | for krate in &fs_ctx.krates { 154 | let bytes = 155 | std::fs::read(cache_root.join(format!("{}-{}.crate", krate.name, krate.version))) 156 | .with_context(|| format!("{krate:#}")) 157 | .expect("can't read"); 158 | 159 | match &krate.source { 160 | Source::Registry(rs) => cf::util::validate_checksum(&bytes, &rs.chksum) 161 | .expect("failed to validate checksum"), 162 | Source::Git { .. } => unreachable!(), 163 | } 164 | } 165 | } 166 | 167 | // Ensure the crates are unpacked 168 | { 169 | for krate in &fs_ctx.krates { 170 | assert!(src_root 171 | .join(format!("{}-{}/Cargo.toml", krate.name, krate.version)) 172 | .exists()); 173 | } 174 | } 175 | 176 | // Sync all of the crates, except since we've already synced base64, we should 177 | // only receive the other 2 178 | fs_ctx.krates = stored; 179 | assert_eq!( 180 | cf::sync::crates(&fs_ctx) 181 | .await 182 | .expect("synced 2 crates") 183 | .good, 184 | 2 185 | ); 186 | 187 | // Ensure the unmutated crates are in the cache directory 188 | { 189 | for krate in &fs_ctx.krates { 190 | let bytes = 191 | std::fs::read(cache_root.join(format!("{}-{}.crate", krate.name, krate.version))) 192 | .with_context(|| format!("{krate:#}")) 193 | .expect("can't read"); 194 | 195 | match &krate.source { 196 | Source::Registry(rs) => cf::util::validate_checksum(&bytes, &rs.chksum) 197 | .expect("failed to validate checksum"), 198 | Source::Git { .. } => unreachable!(), 199 | } 200 | } 201 | } 202 | 203 | // Ensure the crates are unpacked 204 | { 205 | for krate in &fs_ctx.krates { 206 | let path = src_root.join(format!("{}-{}/Cargo.toml", krate.name, krate.version)); 207 | assert!(path.exists(), "didn't find unpacked {path}"); 208 | } 209 | } 210 | } 211 | 212 | #[tokio::test] 213 | async fn none_missing() { 214 | let fs_root = util::tempdir(); 215 | let registry = std::sync::Arc::new(util::crates_io_registry()); 216 | let registries = vec![registry.clone()]; 217 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), registries); 218 | 219 | let missing_root = util::tempdir(); 220 | fs_ctx.root_dir = missing_root.pb(); 221 | 222 | fs_ctx.krates = vec![ 223 | Krate { 224 | name: "ansi_term".to_owned(), 225 | version: "0.11.0".to_owned(), 226 | source: Source::Registry(RegistrySource { 227 | registry: registry.clone(), 228 | chksum: "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 229 | .to_owned(), 230 | }), 231 | }, 232 | Krate { 233 | name: "base64".to_owned(), 234 | version: "0.10.1".to_owned(), 235 | source: Source::Registry(RegistrySource { 236 | registry: registry.clone(), 237 | chksum: "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" 238 | .to_owned(), 239 | }), 240 | }, 241 | Krate { 242 | name: "uuid".to_owned(), 243 | version: "0.7.4".to_owned(), 244 | source: Source::Registry(RegistrySource { 245 | registry, 246 | chksum: "90dbc611eb48397705a6b0f6e917da23ae517e4d127123d2cf7674206627d32a" 247 | .to_owned(), 248 | }), 249 | }, 250 | ]; 251 | 252 | cf::mirror::crates(&fs_ctx) 253 | .await 254 | .expect("failed to mirror crates"); 255 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 256 | 257 | assert_eq!( 258 | cf::sync::crates(&fs_ctx) 259 | .await 260 | .expect("synced 3 crate") 261 | .good, 262 | 3 263 | ); 264 | 265 | let (cache_root, src_root) = util::get_sync_dirs(&fs_ctx); 266 | 267 | // Ensure the unmutated crates are in the cache directory 268 | { 269 | for krate in &fs_ctx.krates { 270 | let bytes = 271 | std::fs::read(cache_root.join(format!("{}-{}.crate", krate.name, krate.version))) 272 | .with_context(|| format!("{:#}", krate)) 273 | .expect("can't read"); 274 | 275 | match &krate.source { 276 | Source::Registry(rs) => cf::util::validate_checksum(&bytes, &rs.chksum) 277 | .expect("failed to validate checksum"), 278 | Source::Git { .. } => unreachable!(), 279 | } 280 | } 281 | } 282 | 283 | // Ensure the crates are unpacked 284 | { 285 | for krate in &fs_ctx.krates { 286 | assert!(src_root 287 | .join(format!("{}-{}/Cargo.toml", krate.name, krate.version)) 288 | .exists()); 289 | } 290 | } 291 | 292 | assert_eq!( 293 | cf::sync::crates(&fs_ctx) 294 | .await 295 | .expect("synced 0 crates") 296 | .total_bytes, 297 | 0 298 | ); 299 | 300 | // Ensure the unmutated crates are in the cache directory 301 | { 302 | for krate in &fs_ctx.krates { 303 | let bytes = 304 | std::fs::read(cache_root.join(format!("{}-{}.crate", krate.name, krate.version))) 305 | .with_context(|| format!("{:#}", krate)) 306 | .expect("can't read"); 307 | 308 | match &krate.source { 309 | Source::Registry(rs) => cf::util::validate_checksum(&bytes, &rs.chksum) 310 | .expect("failed to validate checksum"), 311 | Source::Git { .. } => unreachable!(), 312 | } 313 | } 314 | } 315 | 316 | // Ensure the crates are unpacked 317 | { 318 | for krate in &fs_ctx.krates { 319 | let path = src_root.join(format!("{}-{}/Cargo.toml", krate.name, krate.version)); 320 | assert!(path.exists(), "didn't find unpacked {path}"); 321 | } 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /tests/sync_git.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Context; 2 | use cargo_fetcher as cf; 3 | use cf::{Krate, Source}; 4 | 5 | mod tutil; 6 | use tutil as util; 7 | 8 | macro_rules! git_source { 9 | ($url:expr) => {{ 10 | let url = cf::Url::parse($url).expect("failed to parse url"); 11 | Source::from_git_url(&url) 12 | .context("failed to create git source") 13 | .unwrap() 14 | }}; 15 | } 16 | 17 | #[tokio::test] 18 | async fn multiple_from_same_repo() { 19 | util::hook_logger(); 20 | 21 | let fs_root = util::tempdir(); 22 | let registry = std::sync::Arc::new(util::crates_io_registry()); 23 | let registries = vec![registry]; 24 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), registries); 25 | 26 | let missing_root = util::tempdir(); 27 | fs_ctx.root_dir = missing_root.pb(); 28 | 29 | fs_ctx.krates = vec![ 30 | Krate { 31 | name: "asio-sys".to_owned(), 32 | version: "0.2.1".to_owned(), 33 | source: git_source!("git+https://github.com/RustAudio/cpal?rev=971c46346#971c463462e3560e66f7629e5afcd6b25c4411ab"), 34 | }, 35 | Krate { 36 | name: "cpal".to_owned(), 37 | version: "0.13.5".to_owned(), 38 | source: git_source!("git+https://github.com/rustaudio/cpal?rev=971c46346#971c463462e3560e66f7629e5afcd6b25c4411ab"), 39 | }, 40 | ]; 41 | 42 | cf::mirror::crates(&fs_ctx) 43 | .await 44 | .expect("failed to mirror crates"); 45 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 46 | assert_eq!( 47 | cf::sync::crates(&fs_ctx) 48 | .await 49 | .expect("synced 1 git source") 50 | .good, 51 | 1, 52 | ); 53 | 54 | let ident = "c2179e82da06da7e"; 55 | let rev = "971c463"; 56 | 57 | // Ensure there is a db for cpal 58 | { 59 | let db_root = fs_ctx.root_dir.join(cf::sync::GIT_DB_DIR); 60 | 61 | let cpal_root = db_root.join(format!("cpal-{ident}")); 62 | assert!(cpal_root.exists(), "unable to find cpal db"); 63 | 64 | // We expect a pack and idx file 65 | let mut has_idx = false; 66 | let mut has_pack = false; 67 | for entry in std::fs::read_dir(cpal_root.join("objects/pack")).unwrap() { 68 | let entry = entry.unwrap(); 69 | 70 | let path = entry.path(); 71 | let path = path.to_str().unwrap(); 72 | 73 | if path.ends_with(".pack") { 74 | has_pack = true; 75 | } 76 | 77 | if path.ends_with(".idx") { 78 | has_idx = true; 79 | } 80 | } 81 | 82 | assert!(has_idx && has_pack); 83 | } 84 | 85 | // Ensure cpal is checked out 86 | { 87 | let co_root = fs_ctx.root_dir.join(cf::sync::GIT_CO_DIR); 88 | 89 | let cpal_root = co_root.join(format!("cpal-{ident}")); 90 | assert!(cpal_root.exists(), "unable to find cpal checkout"); 91 | 92 | assert!(cpal_root.join(rev).exists(), "unable to find cpal checkout"); 93 | 94 | let ok = cpal_root.join(format!("{rev}/.cargo-ok")); 95 | assert!(ok.exists(), "unable to find .cargo-ok"); 96 | 97 | assert_eq!(std::fs::read_to_string(ok).unwrap(), ""); 98 | } 99 | } 100 | 101 | #[tokio::test] 102 | async fn proper_head() { 103 | util::hook_logger(); 104 | 105 | let fs_root = util::tempdir(); 106 | let registry = std::sync::Arc::new(util::crates_io_registry()); 107 | let registries = vec![registry]; 108 | let mut fs_ctx = util::fs_ctx(fs_root.pb(), registries); 109 | 110 | let missing_root = util::tempdir(); 111 | fs_ctx.root_dir = missing_root.pb(); 112 | 113 | fs_ctx.krates = vec![ 114 | Krate { 115 | name: "gilrs".to_owned(), 116 | version: "0.10.2".to_owned(), 117 | source: git_source!("git+https://gitlab.com/gilrs-project/gilrs.git?rev=1bbec17c9ecb6884f96370064b34544f132c93af#1bbec17c9ecb6884f96370064b34544f132c93af"), 118 | }, 119 | Krate { 120 | name: "gilrs-core".to_owned(), 121 | version: "0.5.6".to_owned(), 122 | source: git_source!("git+https://gitlab.com/gilrs-project/gilrs.git?rev=1bbec17c9ecb6884f96370064b34544f132c93af#1bbec17c9ecb6884f96370064b34544f132c93af"), 123 | }, 124 | ]; 125 | 126 | cf::mirror::crates(&fs_ctx) 127 | .await 128 | .expect("failed to mirror crates"); 129 | fs_ctx.prep_sync_dirs().expect("create base dirs"); 130 | assert_eq!( 131 | cf::sync::crates(&fs_ctx) 132 | .await 133 | .expect("synced 1 git source") 134 | .good, 135 | 1, 136 | ); 137 | 138 | let ident = "7804d1d6a17891c9"; 139 | let rev = "1bbec17"; 140 | 141 | // Ensure that gilrs's checkout matches what cargo expects 142 | let mut checkout = fs_ctx 143 | .root_dir 144 | .join(format!("{}/gilrs-{ident}/{rev}", cf::sync::GIT_CO_DIR)); 145 | 146 | let head = |path| { 147 | let mut cmd = std::process::Command::new("git"); 148 | cmd.current_dir(path); 149 | cmd.args(["rev-parse", "HEAD"]); 150 | cmd.stdout(std::process::Stdio::piped()); 151 | String::from_utf8(cmd.output().unwrap().stdout).unwrap() 152 | }; 153 | 154 | assert_eq!( 155 | head(checkout.clone()).trim(), 156 | "1bbec17c9ecb6884f96370064b34544f132c93af" 157 | ); 158 | checkout.push("gilrs/SDL_GameControllerDB"); 159 | assert_eq!( 160 | head(checkout).trim(), 161 | "c3517cf0d87b35ebe6ae4f738e1d96166e44b58f" 162 | ); 163 | } 164 | -------------------------------------------------------------------------------- /tests/tutil.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use cargo_fetcher as cf; 4 | use cf::{Path, PathBuf}; 5 | 6 | pub fn fs_ctx(root: PathBuf, registries: Vec>) -> cf::Ctx { 7 | let backend = std::sync::Arc::new( 8 | cf::backends::fs::FsBackend::new(cf::FilesystemLocation { path: &root }) 9 | .expect("failed to create fs backend"), 10 | ); 11 | 12 | cf::Ctx::new(None, backend, Vec::new(), registries).expect("failed to create context") 13 | } 14 | 15 | pub struct TempDir { 16 | pub td: tempfile::TempDir, 17 | } 18 | 19 | impl TempDir { 20 | #[inline] 21 | pub fn path(&self) -> &Path { 22 | Path::from_path(self.td.path()).unwrap() 23 | } 24 | 25 | #[inline] 26 | pub fn pb(&self) -> PathBuf { 27 | self.path().to_owned() 28 | } 29 | 30 | #[inline] 31 | pub fn into_path(self) -> PathBuf { 32 | PathBuf::from_path_buf(self.td.into_path()).unwrap() 33 | } 34 | } 35 | 36 | impl Default for TempDir { 37 | #[inline] 38 | fn default() -> Self { 39 | Self { 40 | td: tempfile::TempDir::new_in(env!("CARGO_TARGET_TMPDIR")).unwrap(), 41 | } 42 | } 43 | } 44 | 45 | impl AsRef for TempDir { 46 | #[inline] 47 | fn as_ref(&self) -> &std::path::Path { 48 | self.td.path() 49 | } 50 | } 51 | 52 | #[inline] 53 | pub fn tempdir() -> TempDir { 54 | TempDir::default() 55 | } 56 | 57 | pub fn get_sync_dirs(ctx: &cf::Ctx) -> (PathBuf, PathBuf) { 58 | ctx.registries[0].sync_dirs(&ctx.root_dir) 59 | } 60 | 61 | #[inline] 62 | pub fn crates_io_registry() -> cf::Registry { 63 | use anyhow::Context as _; 64 | let protocol = std::env::var("CARGO_FETCHER_CRATES_IO_PROTOCOL") 65 | .context("invalid env") 66 | .and_then(|prot| prot.parse()) 67 | .unwrap_or(cf::RegistryProtocol::Sparse); 68 | 69 | cf::Registry::crates_io(protocol) 70 | } 71 | 72 | pub fn hook_logger() { 73 | static HOOK: std::sync::Once = std::sync::Once::new(); 74 | 75 | HOOK.call_once(|| { 76 | let mut env_filter = tracing_subscriber::EnvFilter::from_default_env(); 77 | 78 | // If a user specifies a log level, we assume it only pertains to cargo_fetcher, 79 | // if they want to trace other crates they can use the RUST_LOG env approach 80 | env_filter = env_filter.add_directive( 81 | format!("cargo_fetcher={}", tracing::Level::TRACE) 82 | .parse() 83 | .unwrap(), 84 | ); 85 | 86 | let subscriber = tracing_subscriber::FmtSubscriber::builder().with_env_filter(env_filter); 87 | 88 | tracing::subscriber::set_global_default(subscriber.finish()).unwrap(); 89 | }); 90 | } 91 | -------------------------------------------------------------------------------- /tests/unpretty-wasi.txt: -------------------------------------------------------------------------------- 1 | {"name":"wasi","vers":"0.0.0","deps":[],"cksum":"4edc404bfb264a8d68c1b8b27ad6d2d75ef7f35206f732326bec14d792449b60","features":{},"yanked":false,"links":null} 2 | {"name":"wasi","vers":"0.3.0","deps":[],"cksum":"b44366f326fac32f38353093cc9eafa471208549398241ccc863f2c1c21f6a8b","features":{},"yanked":false,"links":null} 3 | {"name":"wasi","vers":"0.4.0","deps":[],"cksum":"7217d1d6f4f2ed4498e047877d3c3a71be8838d8a25ac93814436cd61149fbed","features":{},"yanked":false,"links":null} 4 | {"name":"wasi","vers":"0.5.0","deps":[],"cksum":"fd5442abcac6525a045cc8c795aedb60da7a2e5e89c7bf18a0d5357849bb23c7","features":{},"yanked":false,"links":null} 5 | {"name":"wasi","vers":"0.6.0","deps":[{"name":"compiler_builtins","req":"^0.1","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"core","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal","package":"rustc-std-workspace-core"}],"cksum":"334e45554d85b9d8973581e023a153e08dbd3e72c4a9f2ef5750f97389745145","features":{"rustc-dep-of-std":["compiler_builtins","core"]},"yanked":false,"links":null} 6 | {"name":"wasi","vers":"0.7.0","deps":[{"name":"compiler_builtins","req":"^0.1","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"core","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal","package":"rustc-std-workspace-core"},{"name":"rustc-std-workspace-alloc","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d","features":{"default":["alloc"],"rustc-dep-of-std":["compiler_builtins","core","rustc-std-workspace-alloc"],"alloc":[]},"yanked":false,"links":null} 7 | {"name":"wasi","vers":"0.9.0+wasi-snapshot-preview1","deps":[{"name":"compiler_builtins","req":"^0.1","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"core","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal","package":"rustc-std-workspace-core"},{"name":"rustc-std-workspace-alloc","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519","features":{"rustc-dep-of-std":["compiler_builtins","core","rustc-std-workspace-alloc"],"default":["std"],"std":[]},"yanked":false,"links":null} 8 | {"name":"wasi","vers":"0.10.0+wasi-snapshot-preview1","deps":[{"name":"compiler_builtins","req":"^0.1","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"},{"name":"core","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal","package":"rustc-std-workspace-core"},{"name":"rustc-std-workspace-alloc","req":"^1.0","features":[],"optional":true,"default_features":true,"target":null,"kind":"normal"}],"cksum":"1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f","features":{"rustc-dep-of-std":["compiler_builtins","core","rustc-std-workspace-alloc"],"std":[],"default":["std"]},"yanked":false,"links":null} --------------------------------------------------------------------------------